~hc/RK356X_SDK_RELEASE.git

..	..	@@ -187,7 +187,7 @@
187	187	if (p->signal_mapped_size &&
188	188	p->signal_event_count == p->signal_mapped_size / 8) {
189	189	if (!p->signal_event_limit_reached) {
190		- pr_warn("Signal event wasn't created because limit was reached\n");
	190	+ pr_debug("Signal event wasn't created because limit was reached\n");
191	191	p->signal_event_limit_reached = true;
192	192	}
193	193	return -ENOSPC;
..	..	@@ -346,7 +346,6 @@
346	346	ret = create_signal_event(devkfd, p, ev);
347	347	if (!ret) {
348	348	*event_page_offset = KFD_MMAP_TYPE_EVENTS;
349		- *event_page_offset <<= PAGE_SHIFT;
350	349	*event_slot_index = ev->event_id;
351	350	}
352	351	break;
..	..	@@ -461,7 +460,7 @@
461	460	}
462	461	}
463	462
464		-void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
	463	+void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
465	464	uint32_t valid_id_bits)
466	465	{
467	466	struct kfd_event *ev = NULL;
..	..	@@ -532,6 +531,8 @@
532	531	event_waiters = kmalloc_array(num_events,
533	532	sizeof(struct kfd_event_waiter),
534	533	GFP_KERNEL);
	534	+ if (!event_waiters)
	535	+ return NULL;
535	536
536	537	for (i = 0; (event_waiters) && (i < num_events) ; i++) {
537	538	init_wait(&event_waiters[i].wait);
..	..	@@ -852,8 +853,8 @@
852	853
853	854	if (type == KFD_EVENT_TYPE_MEMORY) {
854	855	dev_warn(kfd_device,
855		- "Sending SIGSEGV to HSA Process with PID %d ",
856		- p->lead_thread->pid);
	856	+ "Sending SIGSEGV to process %d (pasid 0x%x)",
	857	+ p->lead_thread->pid, p->pasid);
857	858	send_sig(SIGSEGV, p->lead_thread, 0);
858	859	}
859	860
..	..	@@ -861,19 +862,19 @@
861	862	if (send_signal) {
862	863	if (send_sigterm) {
863	864	dev_warn(kfd_device,
864		- "Sending SIGTERM to HSA Process with PID %d ",
865		- p->lead_thread->pid);
	865	+ "Sending SIGTERM to process %d (pasid 0x%x)",
	866	+ p->lead_thread->pid, p->pasid);
866	867	send_sig(SIGTERM, p->lead_thread, 0);
867	868	} else {
868	869	dev_err(kfd_device,
869		- "HSA Process (PID %d) got unhandled exception",
870		- p->lead_thread->pid);
	870	+ "Process %d (pasid 0x%x) got unhandled exception",
	871	+ p->lead_thread->pid, p->pasid);
871	872	}
872	873	}
873	874	}
874	875
875	876	#ifdef KFD_SUPPORT_IOMMU_V2
876		-void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
	877	+void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid,
877	878	unsigned long address, bool is_write_requested,
878	879	bool is_execute_requested)
879	880	{
..	..	@@ -902,7 +903,7 @@
902	903
903	904	memset(&memory_exception_data, 0, sizeof(memory_exception_data));
904	905
905		- down_read(&mm->mmap_sem);
	906	+ mmap_read_lock(mm);
906	907	vma = find_vma(mm, address);
907	908
908	909	memory_exception_data.gpu_id = dev->id;
..	..	@@ -925,7 +926,7 @@
925	926	memory_exception_data.failure.NoExecute = 0;
926	927	}
927	928
928		- up_read(&mm->mmap_sem);
	929	+ mmap_read_unlock(mm);
929	930	mmput(mm);
930	931
931	932	pr_debug("notpresent %d, noexecute %d, readonly %d\n",
..	..	@@ -936,7 +937,8 @@
936	937	/* Workaround on Raven to not kill the process when memory is freed
937	938	* before IOMMU is able to finish processing all the excessive PPRs
938	939	*/
939		- if (dev->device_info->asic_family != CHIP_RAVEN) {
	940	+ if (dev->device_info->asic_family != CHIP_RAVEN &&
	941	+ dev->device_info->asic_family != CHIP_RENOIR) {
940	942	mutex_lock(&p->event_mutex);
941	943
942	944	/* Lookup events by type and signal them */
..	..	@@ -950,7 +952,7 @@
950	952	}
951	953	#endif /* KFD_SUPPORT_IOMMU_V2 */
952	954
953		-void kfd_signal_hw_exception_event(unsigned int pasid)
	955	+void kfd_signal_hw_exception_event(u32 pasid)
954	956	{
955	957	/*
956	958	* Because we are called from arbitrary context (workqueue) as opposed
..	..	@@ -971,7 +973,7 @@
971	973	kfd_unref_process(p);
972	974	}
973	975
974		-void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
	976	+void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
975	977	struct kfd_vm_fault_info *info)
976	978	{
977	979	struct kfd_event *ev;
..	..	@@ -983,7 +985,7 @@
983	985	return; /* Presumably process exited. */
984	986	memset(&memory_exception_data, 0, sizeof(memory_exception_data));
985	987	memory_exception_data.gpu_id = dev->id;
986		- memory_exception_data.failure.imprecise = 1;
	988	+ memory_exception_data.failure.imprecise = true;
987	989	/* Set failure reason */
988	990	if (info) {
989	991	memory_exception_data.va = (info->page_addr) << PAGE_SHIFT;
..	..	@@ -1011,25 +1013,41 @@
1011	1013	void kfd_signal_reset_event(struct kfd_dev *dev)
1012	1014	{
1013	1015	struct kfd_hsa_hw_exception_data hw_exception_data;
	1016	+ struct kfd_hsa_memory_exception_data memory_exception_data;
1014	1017	struct kfd_process *p;
1015	1018	struct kfd_event *ev;
1016	1019	unsigned int temp;
1017	1020	uint32_t id, idx;
	1021	+ int reset_cause = atomic_read(&dev->sram_ecc_flag) ?
	1022	+ KFD_HW_EXCEPTION_ECC :
	1023	+ KFD_HW_EXCEPTION_GPU_HANG;
1018	1024
1019	1025	/* Whole gpu reset caused by GPU hang and memory is lost */
1020	1026	memset(&hw_exception_data, 0, sizeof(hw_exception_data));
1021	1027	hw_exception_data.gpu_id = dev->id;
1022	1028	hw_exception_data.memory_lost = 1;
	1029	+ hw_exception_data.reset_cause = reset_cause;
	1030	+
	1031	+ memset(&memory_exception_data, 0, sizeof(memory_exception_data));
	1032	+ memory_exception_data.ErrorType = KFD_MEM_ERR_SRAM_ECC;
	1033	+ memory_exception_data.gpu_id = dev->id;
	1034	+ memory_exception_data.failure.imprecise = true;
1023	1035
1024	1036	idx = srcu_read_lock(&kfd_processes_srcu);
1025	1037	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1026	1038	mutex_lock(&p->event_mutex);
1027	1039	id = KFD_FIRST_NONSIGNAL_EVENT_ID;
1028		- idr_for_each_entry_continue(&p->event_idr, ev, id)
	1040	+ idr_for_each_entry_continue(&p->event_idr, ev, id) {
1029	1041	if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
1030	1042	ev->hw_exception_data = hw_exception_data;
1031	1043	set_event(ev);
1032	1044	}
	1045	+ if (ev->type == KFD_EVENT_TYPE_MEMORY &&
	1046	+ reset_cause == KFD_HW_EXCEPTION_ECC) {
	1047	+ ev->memory_exception_data = memory_exception_data;
	1048	+ set_event(ev);
	1049	+ }
	1050	+ }
1033	1051	mutex_unlock(&p->event_mutex);
1034	1052	}
1035	1053	srcu_read_unlock(&kfd_processes_srcu, idx);