~hc/RK356X_SDK_RELEASE.git

..	..	@@ -26,6 +26,7 @@
26	26	#include "nbio/nbio_6_1_sh_mask.h"
27	27	#include "gc/gc_9_0_offset.h"
28	28	#include "gc/gc_9_0_sh_mask.h"
	29	+#include "mp/mp_9_0_offset.h"
29	30	#include "soc15.h"
30	31	#include "vega10_ih.h"
31	32	#include "soc15_common.h"
..	..	@@ -237,19 +238,15 @@
237	238	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
238	239	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
239	240	int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
240		- int locked;
241	241
242	242	/* block amdgpu_gpu_recover till msg FLR COMPLETE received,
243	243	* otherwise the mailbox msg will be ruined/reseted by
244	244	* the VF FLR.
245		- *
246		- * we can unlock the lock_reset to allow "amdgpu_job_timedout"
247		- * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
248		- * which means host side had finished this VF's FLR.
249	245	*/
250		- locked = mutex_trylock(&adev->lock_reset);
251		- if (locked)
252		- adev->in_gpu_reset = 1;
	246	+ if (!down_read_trylock(&adev->reset_sem))
	247	+ return;
	248	+
	249	+ atomic_set(&adev->in_gpu_reset, 1);
253	250
254	251	do {
255	252	if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
..	..	@@ -260,14 +257,14 @@
260	257	} while (timeout > 1);
261	258
262	259	flr_done:
263		- if (locked) {
264		- adev->in_gpu_reset = 0;
265		- mutex_unlock(&adev->lock_reset);
266		- }
	260	+ atomic_set(&adev->in_gpu_reset, 0);
	261	+ up_read(&adev->reset_sem);
267	262
268	263	/* Trigger recovery for world switch failure if no TDR */
269		- if (amdgpu_lockup_timeout == 0)
270		- amdgpu_device_gpu_recover(adev, NULL, true);
	264	+ if (amdgpu_device_should_recover_gpu(adev)
	265	+ && (!amdgpu_device_has_job_running(adev) \|\|
	266	+ adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT))
	267	+ amdgpu_device_gpu_recover(adev, NULL);
271	268	}
272	269
273	270	static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
..	..	@@ -295,6 +292,9 @@
295	292	if (amdgpu_sriov_runtime(adev))
296	293	schedule_work(&adev->virt.flr_work);
297	294	break;
	295	+ case IDH_QUERY_ALIVE:
	296	+ xgpu_ai_mailbox_send_ack(adev);
	297	+ break;
298	298	/* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
299	299	* it byfar since that polling thread will handle it,
300	300	* other msg like flr complete is not handled here.