~hc/RK356X_SDK_RELEASE.git

..	..	@@ -187,7 +187,7 @@
187	187	if (p->signal_mapped_size &&
188	188	p->signal_event_count == p->signal_mapped_size / 8) {
189	189	if (!p->signal_event_limit_reached) {
190		- pr_warn("Signal event wasn't created because limit was reached\n");
	190	+ pr_debug("Signal event wasn't created because limit was reached\n");
191	191	p->signal_event_limit_reached = true;
192	192	}
193	193	return -ENOSPC;
..	..	@@ -346,7 +346,6 @@
346	346	ret = create_signal_event(devkfd, p, ev);
347	347	if (!ret) {
348	348	*event_page_offset = KFD_MMAP_TYPE_EVENTS;
349		- *event_page_offset <<= PAGE_SHIFT;
350	349	*event_slot_index = ev->event_id;
351	350	}
352	351	break;
..	..	@@ -461,7 +460,7 @@
461	460	}
462	461	}
463	462
464		-void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
	463	+void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
465	464	uint32_t valid_id_bits)
466	465	{
467	466	struct kfd_event *ev = NULL;
..	..	@@ -529,14 +528,13 @@
529	528	struct kfd_event_waiter *event_waiters;
530	529	uint32_t i;
531	530
532		- event_waiters = kmalloc_array(num_events,
533		- sizeof(struct kfd_event_waiter),
534		- GFP_KERNEL);
	531	+ event_waiters = kcalloc(num_events, sizeof(struct kfd_event_waiter),
	532	+ GFP_KERNEL);
	533	+ if (!event_waiters)
	534	+ return NULL;
535	535
536		- for (i = 0; (event_waiters) && (i < num_events) ; i++) {
	536	+ for (i = 0; i < num_events; i++)
537	537	init_wait(&event_waiters[i].wait);
538		- event_waiters[i].activated = false;
539		- }
540	538
541	539	return event_waiters;
542	540	}
..	..	@@ -852,8 +850,8 @@
852	850
853	851	if (type == KFD_EVENT_TYPE_MEMORY) {
854	852	dev_warn(kfd_device,
855		- "Sending SIGSEGV to HSA Process with PID %d ",
856		- p->lead_thread->pid);
	853	+ "Sending SIGSEGV to process %d (pasid 0x%x)",
	854	+ p->lead_thread->pid, p->pasid);
857	855	send_sig(SIGSEGV, p->lead_thread, 0);
858	856	}
859	857
..	..	@@ -861,19 +859,19 @@
861	859	if (send_signal) {
862	860	if (send_sigterm) {
863	861	dev_warn(kfd_device,
864		- "Sending SIGTERM to HSA Process with PID %d ",
865		- p->lead_thread->pid);
	862	+ "Sending SIGTERM to process %d (pasid 0x%x)",
	863	+ p->lead_thread->pid, p->pasid);
866	864	send_sig(SIGTERM, p->lead_thread, 0);
867	865	} else {
868	866	dev_err(kfd_device,
869		- "HSA Process (PID %d) got unhandled exception",
870		- p->lead_thread->pid);
	867	+ "Process %d (pasid 0x%x) got unhandled exception",
	868	+ p->lead_thread->pid, p->pasid);
871	869	}
872	870	}
873	871	}
874	872
875	873	#ifdef KFD_SUPPORT_IOMMU_V2
876		-void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
	874	+void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid,
877	875	unsigned long address, bool is_write_requested,
878	876	bool is_execute_requested)
879	877	{
..	..	@@ -902,7 +900,7 @@
902	900
903	901	memset(&memory_exception_data, 0, sizeof(memory_exception_data));
904	902
905		- down_read(&mm->mmap_sem);
	903	+ mmap_read_lock(mm);
906	904	vma = find_vma(mm, address);
907	905
908	906	memory_exception_data.gpu_id = dev->id;
..	..	@@ -925,7 +923,7 @@
925	923	memory_exception_data.failure.NoExecute = 0;
926	924	}
927	925
928		- up_read(&mm->mmap_sem);
	926	+ mmap_read_unlock(mm);
929	927	mmput(mm);
930	928
931	929	pr_debug("notpresent %d, noexecute %d, readonly %d\n",
..	..	@@ -936,7 +934,8 @@
936	934	/* Workaround on Raven to not kill the process when memory is freed
937	935	* before IOMMU is able to finish processing all the excessive PPRs
938	936	*/
939		- if (dev->device_info->asic_family != CHIP_RAVEN) {
	937	+ if (dev->device_info->asic_family != CHIP_RAVEN &&
	938	+ dev->device_info->asic_family != CHIP_RENOIR) {
940	939	mutex_lock(&p->event_mutex);
941	940
942	941	/* Lookup events by type and signal them */
..	..	@@ -950,7 +949,7 @@
950	949	}
951	950	#endif /* KFD_SUPPORT_IOMMU_V2 */
952	951
953		-void kfd_signal_hw_exception_event(unsigned int pasid)
	952	+void kfd_signal_hw_exception_event(u32 pasid)
954	953	{
955	954	/*
956	955	* Because we are called from arbitrary context (workqueue) as opposed
..	..	@@ -971,7 +970,7 @@
971	970	kfd_unref_process(p);
972	971	}
973	972
974		-void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
	973	+void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
975	974	struct kfd_vm_fault_info *info)
976	975	{
977	976	struct kfd_event *ev;
..	..	@@ -983,7 +982,7 @@
983	982	return; /* Presumably process exited. */
984	983	memset(&memory_exception_data, 0, sizeof(memory_exception_data));
985	984	memory_exception_data.gpu_id = dev->id;
986		- memory_exception_data.failure.imprecise = 1;
	985	+ memory_exception_data.failure.imprecise = true;
987	986	/* Set failure reason */
988	987	if (info) {
989	988	memory_exception_data.va = (info->page_addr) << PAGE_SHIFT;
..	..	@@ -1011,25 +1010,41 @@
1011	1010	void kfd_signal_reset_event(struct kfd_dev *dev)
1012	1011	{
1013	1012	struct kfd_hsa_hw_exception_data hw_exception_data;
	1013	+ struct kfd_hsa_memory_exception_data memory_exception_data;
1014	1014	struct kfd_process *p;
1015	1015	struct kfd_event *ev;
1016	1016	unsigned int temp;
1017	1017	uint32_t id, idx;
	1018	+ int reset_cause = atomic_read(&dev->sram_ecc_flag) ?
	1019	+ KFD_HW_EXCEPTION_ECC :
	1020	+ KFD_HW_EXCEPTION_GPU_HANG;
1018	1021
1019	1022	/* Whole gpu reset caused by GPU hang and memory is lost */
1020	1023	memset(&hw_exception_data, 0, sizeof(hw_exception_data));
1021	1024	hw_exception_data.gpu_id = dev->id;
1022	1025	hw_exception_data.memory_lost = 1;
	1026	+ hw_exception_data.reset_cause = reset_cause;
	1027	+
	1028	+ memset(&memory_exception_data, 0, sizeof(memory_exception_data));
	1029	+ memory_exception_data.ErrorType = KFD_MEM_ERR_SRAM_ECC;
	1030	+ memory_exception_data.gpu_id = dev->id;
	1031	+ memory_exception_data.failure.imprecise = true;
1023	1032
1024	1033	idx = srcu_read_lock(&kfd_processes_srcu);
1025	1034	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1026	1035	mutex_lock(&p->event_mutex);
1027	1036	id = KFD_FIRST_NONSIGNAL_EVENT_ID;
1028		- idr_for_each_entry_continue(&p->event_idr, ev, id)
	1037	+ idr_for_each_entry_continue(&p->event_idr, ev, id) {
1029	1038	if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
1030	1039	ev->hw_exception_data = hw_exception_data;
1031	1040	set_event(ev);
1032	1041	}
	1042	+ if (ev->type == KFD_EVENT_TYPE_MEMORY &&
	1043	+ reset_cause == KFD_HW_EXCEPTION_ECC) {
	1044	+ ev->memory_exception_data = memory_exception_data;
	1045	+ set_event(ev);
	1046	+ }
	1047	+ }
1033	1048	mutex_unlock(&p->event_mutex);
1034	1049	}
1035	1050	srcu_read_unlock(&kfd_processes_srcu, idx);