~hc/RK356X_SDK_RELEASE.git

..	..	@@ -24,20 +24,26 @@
24	24	* Authors:
25	25	* Jerome Glisse <glisse@freedesktop.org>
26	26	*/
	27	+
	28	+#include <linux/file.h>
27	29	#include <linux/pagemap.h>
28	30	#include <linux/sync_file.h>
29		-#include <drm/drmP.h>
	31	+#include <linux/dma-buf.h>
	32	+
30	33	#include <drm/amdgpu_drm.h>
31	34	#include <drm/drm_syncobj.h>
32	35	#include "amdgpu.h"
33	36	#include "amdgpu_trace.h"
34	37	#include "amdgpu_gmc.h"
	38	+#include "amdgpu_gem.h"
	39	+#include "amdgpu_ras.h"
35	40
36	41	static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
37	42	struct drm_amdgpu_cs_chunk_fence *data,
38	43	uint32_t *offset)
39	44	{
40	45	struct drm_gem_object *gobj;
	46	+ struct amdgpu_bo *bo;
41	47	unsigned long size;
42	48	int r;
43	49
..	..	@@ -45,21 +51,21 @@
45	51	if (gobj == NULL)
46	52	return -EINVAL;
47	53
48		- p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
	54	+ bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
49	55	p->uf_entry.priority = 0;
50		- p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
51		- p->uf_entry.tv.shared = true;
52		- p->uf_entry.user_pages = NULL;
	56	+ p->uf_entry.tv.bo = &bo->tbo;
	57	+ /* One for TTM and one for the CS job */
	58	+ p->uf_entry.tv.num_shared = 2;
53	59
54		- drm_gem_object_put_unlocked(gobj);
	60	+ drm_gem_object_put(gobj);
55	61
56		- size = amdgpu_bo_size(p->uf_entry.robj);
	62	+ size = amdgpu_bo_size(bo);
57	63	if (size != PAGE_SIZE \|\| (data->offset + 8) > size) {
58	64	r = -EINVAL;
59	65	goto error_unref;
60	66	}
61	67
62		- if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
	68	+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
63	69	r = -EINVAL;
64	70	goto error_unref;
65	71	}
..	..	@@ -69,7 +75,7 @@
69	75	return 0;
70	76
71	77	error_unref:
72		- amdgpu_bo_unref(&p->uf_entry.robj);
	78	+ amdgpu_bo_unref(&bo);
73	79	return r;
74	80	}
75	81
..	..	@@ -110,7 +116,7 @@
110	116	int ret;
111	117
112	118	if (cs->in.num_chunks == 0)
113		- return 0;
	119	+ return -EINVAL;
114	120
115	121	chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
116	122	if (!chunk_array)
..	..	@@ -211,6 +217,9 @@
211	217	case AMDGPU_CHUNK_ID_DEPENDENCIES:
212	218	case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
213	219	case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
	220	+ case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
	221	+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
	222	+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
214	223	break;
215	224
216	225	default:
..	..	@@ -228,7 +237,7 @@
228	237	goto free_all_kdata;
229	238	}
230	239
231		- if (p->uf_entry.robj)
	240	+ if (p->uf_entry.tv.bo)
232	241	p->job->uf_addr = uf_offset;
233	242	kfree(chunk_array);
234	243
..	..	@@ -290,7 +299,7 @@
290	299	{
291	300	s64 time_us, increment_us;
292	301	u64 free_vram, total_vram, used_vram;
293		-
	302	+ struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
294	303	/* Allow a maximum of 200 accumulated ms. This is basically per-IB
295	304	* throttling.
296	305	*
..	..	@@ -307,7 +316,7 @@
307	316	}
308	317
309	318	total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
310		- used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
	319	+ used_vram = amdgpu_vram_mgr_usage(vram_man);
311	320	free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
312	321
313	322	spin_lock(&adev->mm_stats.lock);
..	..	@@ -354,7 +363,7 @@
354	363	if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
355	364	u64 total_vis_vram = adev->gmc.visible_vram_size;
356	365	u64 used_vis_vram =
357		- amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
	366	+ amdgpu_vram_mgr_vis_usage(vram_man);
358	367
359	368	if (used_vis_vram < total_vis_vram) {
360	369	u64 free_vis_vram = total_vis_vram - used_vis_vram;
..	..	@@ -395,7 +404,7 @@
395	404	struct ttm_operation_ctx ctx = {
396	405	.interruptible = true,
397	406	.no_wait_gpu = false,
398		- .resv = bo->tbo.resv,
	407	+ .resv = bo->tbo.base.resv,
399	408	.flags = 0
400	409	};
401	410	uint32_t domain;
..	..	@@ -407,7 +416,9 @@
407	416	/* Don't move this buffer if we have depleted our allowance
408	417	* to move it. Don't move anything if the threshold is zero.
409	418	*/
410		- if (p->bytes_moved < p->bytes_moved_threshold) {
	419	+ if (p->bytes_moved < p->bytes_moved_threshold &&
	420	+ (!bo->tbo.base.dma_buf \|\|
	421	+ list_empty(&bo->tbo.base.dma_buf->attachments))) {
411	422	if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
412	423	(bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
413	424	/* And don't move a CPU_ACCESS_REQUIRED BO to limited
..	..	@@ -442,75 +453,12 @@
442	453	return r;
443	454	}
444	455
445		-/* Last resort, try to evict something from the current working set */
446		-static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
447		- struct amdgpu_bo *validated)
448		-{
449		- uint32_t domain = validated->allowed_domains;
450		- struct ttm_operation_ctx ctx = { true, false };
451		- int r;
452		-
453		- if (!p->evictable)
454		- return false;
455		-
456		- for (;&p->evictable->tv.head != &p->validated;
457		- p->evictable = list_prev_entry(p->evictable, tv.head)) {
458		-
459		- struct amdgpu_bo_list_entry *candidate = p->evictable;
460		- struct amdgpu_bo *bo = candidate->robj;
461		- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
462		- bool update_bytes_moved_vis;
463		- uint32_t other;
464		-
465		- /* If we reached our current BO we can forget it */
466		- if (candidate->robj == validated)
467		- break;
468		-
469		- /* We can't move pinned BOs here */
470		- if (bo->pin_count)
471		- continue;
472		-
473		- other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
474		-
475		- /* Check if this BO is in one of the domains we need space for */
476		- if (!(other & domain))
477		- continue;
478		-
479		- /* Check if we can move this BO somewhere else */
480		- other = bo->allowed_domains & ~domain;
481		- if (!other)
482		- continue;
483		-
484		- /* Good we can try to move this BO somewhere else */
485		- update_bytes_moved_vis =
486		- !amdgpu_gmc_vram_full_visible(&adev->gmc) &&
487		- amdgpu_bo_in_cpu_visible_vram(bo);
488		- amdgpu_bo_placement_from_domain(bo, other);
489		- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
490		- p->bytes_moved += ctx.bytes_moved;
491		- if (update_bytes_moved_vis)
492		- p->bytes_moved_vis += ctx.bytes_moved;
493		-
494		- if (unlikely(r))
495		- break;
496		-
497		- p->evictable = list_prev_entry(p->evictable, tv.head);
498		- list_move(&candidate->tv.head, &p->validated);
499		-
500		- return true;
501		- }
502		-
503		- return false;
504		-}
505		-
506	456	static int amdgpu_cs_validate(void param, struct amdgpu_bo bo)
507	457	{
508	458	struct amdgpu_cs_parser *p = param;
509	459	int r;
510	460
511		- do {
512		- r = amdgpu_cs_bo_validate(p, bo);
513		- } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
	461	+ r = amdgpu_cs_bo_validate(p, bo);
514	462	if (r)
515	463	return r;
516	464
..	..	@@ -528,38 +476,31 @@
528	476	int r;
529	477
530	478	list_for_each_entry(lobj, validated, tv.head) {
531		- struct amdgpu_bo *bo = lobj->robj;
532		- bool binding_userptr = false;
	479	+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
533	480	struct mm_struct *usermm;
534	481
535	482	usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
536	483	if (usermm && usermm != current->mm)
537	484	return -EPERM;
538	485
539		- /* Check if we have user pages and nobody bound the BO already */
540		- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
541		- lobj->user_pages) {
	486	+ if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
	487	+ lobj->user_invalidated && lobj->user_pages) {
542	488	amdgpu_bo_placement_from_domain(bo,
543	489	AMDGPU_GEM_DOMAIN_CPU);
544	490	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
545	491	if (r)
546	492	return r;
	493	+
547	494	amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
548	495	lobj->user_pages);
549		- binding_userptr = true;
550	496	}
551		-
552		- if (p->evictable == lobj)
553		- p->evictable = NULL;
554	497
555	498	r = amdgpu_cs_validate(p, bo);
556	499	if (r)
557	500	return r;
558	501
559		- if (binding_userptr) {
560		- kvfree(lobj->user_pages);
561		- lobj->user_pages = NULL;
562		- }
	502	+ kvfree(lobj->user_pages);
	503	+ lobj->user_pages = NULL;
563	504	}
564	505	return 0;
565	506	}
..	..	@@ -574,7 +515,6 @@
574	515	struct amdgpu_bo *gds;
575	516	struct amdgpu_bo *gws;
576	517	struct amdgpu_bo *oa;
577		- unsigned tries = 10;
578	518	int r;
579	519
580	520	INIT_LIST_HEAD(&p->validated);
..	..	@@ -596,98 +536,63 @@
596	536	return r;
597	537	}
598	538
	539	+ /* One for TTM and one for the CS job */
	540	+ amdgpu_bo_list_for_each_entry(e, p->bo_list)
	541	+ e->tv.num_shared = 2;
	542	+
599	543	amdgpu_bo_list_get_list(p->bo_list, &p->validated);
600		- if (p->bo_list->first_userptr != p->bo_list->num_entries)
601		- p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
602	544
603	545	INIT_LIST_HEAD(&duplicates);
604	546	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
605	547
606		- if (p->uf_entry.robj && !p->uf_entry.robj->parent)
	548	+ if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
607	549	list_add(&p->uf_entry.tv.head, &p->validated);
608	550
609		- while (1) {
610		- struct list_head need_pages;
	551	+ /* Get userptr backing pages. If pages are updated after registered
	552	+ * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
	553	+ * amdgpu_ttm_backend_bind() to flush and invalidate new pages
	554	+ */
	555	+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
	556	+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
	557	+ bool userpage_invalidated = false;
	558	+ int i;
611	559
612		- r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
613		- &duplicates);
614		- if (unlikely(r != 0)) {
615		- if (r != -ERESTARTSYS)
616		- DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
617		- goto error_free_pages;
	560	+ e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
	561	+ sizeof(struct page *),
	562	+ GFP_KERNEL \| __GFP_ZERO);
	563	+ if (!e->user_pages) {
	564	+ DRM_ERROR("calloc failure\n");
	565	+ return -ENOMEM;
618	566	}
619	567
620		- INIT_LIST_HEAD(&need_pages);
621		- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
622		- struct amdgpu_bo *bo = e->robj;
623		-
624		- if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
625		- &e->user_invalidated) && e->user_pages) {
626		-
627		- /* We acquired a page array, but somebody
628		- * invalidated it. Free it and try again
629		- */
630		- release_pages(e->user_pages,
631		- bo->tbo.ttm->num_pages);
632		- kvfree(e->user_pages);
633		- e->user_pages = NULL;
634		- }
635		-
636		- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
637		- !e->user_pages) {
638		- list_del(&e->tv.head);
639		- list_add(&e->tv.head, &need_pages);
640		-
641		- amdgpu_bo_unreserve(e->robj);
642		- }
	568	+ r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
	569	+ if (r) {
	570	+ kvfree(e->user_pages);
	571	+ e->user_pages = NULL;
	572	+ return r;
643	573	}
644	574
645		- if (list_empty(&need_pages))
646		- break;
647		-
648		- /* Unreserve everything again. */
649		- ttm_eu_backoff_reservation(&p->ticket, &p->validated);
650		-
651		- /* We tried too many times, just abort */
652		- if (!--tries) {
653		- r = -EDEADLK;
654		- DRM_ERROR("deadlock in %s\n", __func__);
655		- goto error_free_pages;
656		- }
657		-
658		- /* Fill the page arrays for all userptrs. */
659		- list_for_each_entry(e, &need_pages, tv.head) {
660		- struct ttm_tt *ttm = e->robj->tbo.ttm;
661		-
662		- e->user_pages = kvmalloc_array(ttm->num_pages,
663		- sizeof(struct page*),
664		- GFP_KERNEL \| __GFP_ZERO);
665		- if (!e->user_pages) {
666		- r = -ENOMEM;
667		- DRM_ERROR("calloc failure in %s\n", __func__);
668		- goto error_free_pages;
669		- }
670		-
671		- r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
672		- if (r) {
673		- DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
674		- kvfree(e->user_pages);
675		- e->user_pages = NULL;
676		- goto error_free_pages;
	575	+ for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
	576	+ if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
	577	+ userpage_invalidated = true;
	578	+ break;
677	579	}
678	580	}
	581	+ e->user_invalidated = userpage_invalidated;
	582	+ }
679	583
680		- /* And try again. */
681		- list_splice(&need_pages, &p->validated);
	584	+ r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
	585	+ &duplicates);
	586	+ if (unlikely(r != 0)) {
	587	+ if (r != -ERESTARTSYS)
	588	+ DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
	589	+ goto out;
682	590	}
683	591
684	592	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
685	593	&p->bytes_moved_vis_threshold);
686	594	p->bytes_moved = 0;
687	595	p->bytes_moved_vis = 0;
688		- p->evictable = list_last_entry(&p->validated,
689		- struct amdgpu_bo_list_entry,
690		- tv.head);
691	596
692	597	r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
693	598	amdgpu_cs_validate, p);
..	..	@@ -697,16 +602,12 @@
697	602	}
698	603
699	604	r = amdgpu_cs_list_validate(p, &duplicates);
700		- if (r) {
701		- DRM_ERROR("amdgpu_cs_list_validate(duplicates) failed.\n");
	605	+ if (r)
702	606	goto error_validate;
703		- }
704	607
705	608	r = amdgpu_cs_list_validate(p, &p->validated);
706		- if (r) {
707		- DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n");
	609	+ if (r)
708	610	goto error_validate;
709		- }
710	611
711	612	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
712	613	p->bytes_moved_vis);
..	..	@@ -715,24 +616,30 @@
715	616	gws = p->bo_list->gws_obj;
716	617	oa = p->bo_list->oa_obj;
717	618
718		- amdgpu_bo_list_for_each_entry(e, p->bo_list)
719		- e->bo_va = amdgpu_vm_bo_find(vm, e->robj);
	619	+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
	620	+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
	621	+
	622	+ /* Make sure we use the exclusive slot for shared BOs */
	623	+ if (bo->prime_shared_count)
	624	+ e->tv.num_shared = 0;
	625	+ e->bo_va = amdgpu_vm_bo_find(vm, bo);
	626	+ }
720	627
721	628	if (gds) {
722		- p->job->gds_base = amdgpu_bo_gpu_offset(gds);
723		- p->job->gds_size = amdgpu_bo_size(gds);
	629	+ p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
	630	+ p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
724	631	}
725	632	if (gws) {
726		- p->job->gws_base = amdgpu_bo_gpu_offset(gws);
727		- p->job->gws_size = amdgpu_bo_size(gws);
	633	+ p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
	634	+ p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
728	635	}
729	636	if (oa) {
730		- p->job->oa_base = amdgpu_bo_gpu_offset(oa);
731		- p->job->oa_size = amdgpu_bo_size(oa);
	637	+ p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
	638	+ p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
732	639	}
733	640
734		- if (!r && p->uf_entry.robj) {
735		- struct amdgpu_bo *uf = p->uf_entry.robj;
	641	+ if (!r && p->uf_entry.tv.bo) {
	642	+ struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
736	643
737	644	r = amdgpu_ttm_alloc_gart(&uf->tbo);
738	645	p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
..	..	@@ -741,31 +648,25 @@
741	648	error_validate:
742	649	if (r)
743	650	ttm_eu_backoff_reservation(&p->ticket, &p->validated);
744		-
745		-error_free_pages:
746		-
747		- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
748		- if (!e->user_pages)
749		- continue;
750		-
751		- release_pages(e->user_pages,
752		- e->robj->tbo.ttm->num_pages);
753		- kvfree(e->user_pages);
754		- }
755		-
	651	+out:
756	652	return r;
757	653	}
758	654
759	655	static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
760	656	{
	657	+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
761	658	struct amdgpu_bo_list_entry *e;
762	659	int r;
763	660
764	661	list_for_each_entry(e, &p->validated, tv.head) {
765		- struct reservation_object *resv = e->robj->tbo.resv;
766		- r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
767		- amdgpu_bo_explicit_sync(e->robj));
	662	+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
	663	+ struct dma_resv *resv = bo->tbo.base.resv;
	664	+ enum amdgpu_sync_mode sync_mode;
768	665
	666	+ sync_mode = amdgpu_bo_explicit_sync(bo) ?
	667	+ AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
	668	+ r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
	669	+ &fpriv->vm);
769	670	if (r)
770	671	return r;
771	672	}
..	..	@@ -789,9 +690,11 @@
789	690	ttm_eu_backoff_reservation(&parser->ticket,
790	691	&parser->validated);
791	692
792		- for (i = 0; i < parser->num_post_dep_syncobjs; i++)
793		- drm_syncobj_put(parser->post_dep_syncobjs[i]);
794		- kfree(parser->post_dep_syncobjs);
	693	+ for (i = 0; i < parser->num_post_deps; i++) {
	694	+ drm_syncobj_put(parser->post_deps[i].syncobj);
	695	+ kfree(parser->post_deps[i].chain);
	696	+ }
	697	+ kfree(parser->post_deps);
795	698
796	699	dma_fence_put(parser->fence);
797	700
..	..	@@ -807,11 +710,16 @@
807	710	kfree(parser->chunks);
808	711	if (parser->job)
809	712	amdgpu_job_free(parser->job);
810		- amdgpu_bo_unref(&parser->uf_entry.robj);
	713	+ if (parser->uf_entry.tv.bo) {
	714	+ struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
	715	+
	716	+ amdgpu_bo_unref(&uf);
	717	+ }
811	718	}
812	719
813		-static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
	720	+static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
814	721	{
	722	+ struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
815	723	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
816	724	struct amdgpu_device *adev = p->adev;
817	725	struct amdgpu_vm *vm = &fpriv->vm;
..	..	@@ -820,92 +728,8 @@
820	728	struct amdgpu_bo *bo;
821	729	int r;
822	730
823		- r = amdgpu_vm_clear_freed(adev, vm, NULL);
824		- if (r)
825		- return r;
826		-
827		- r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
828		- if (r)
829		- return r;
830		-
831		- r = amdgpu_sync_fence(adev, &p->job->sync,
832		- fpriv->prt_va->last_pt_update, false);
833		- if (r)
834		- return r;
835		-
836		- if (amdgpu_sriov_vf(adev)) {
837		- struct dma_fence *f;
838		-
839		- bo_va = fpriv->csa_va;
840		- BUG_ON(!bo_va);
841		- r = amdgpu_vm_bo_update(adev, bo_va, false);
842		- if (r)
843		- return r;
844		-
845		- f = bo_va->last_pt_update;
846		- r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
847		- if (r)
848		- return r;
849		- }
850		-
851		- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
852		- struct dma_fence *f;
853		-
854		- /* ignore duplicates */
855		- bo = e->robj;
856		- if (!bo)
857		- continue;
858		-
859		- bo_va = e->bo_va;
860		- if (bo_va == NULL)
861		- continue;
862		-
863		- r = amdgpu_vm_bo_update(adev, bo_va, false);
864		- if (r)
865		- return r;
866		-
867		- f = bo_va->last_pt_update;
868		- r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
869		- if (r)
870		- return r;
871		- }
872		-
873		- r = amdgpu_vm_handle_moved(adev, vm);
874		- if (r)
875		- return r;
876		-
877		- r = amdgpu_vm_update_directories(adev, vm);
878		- if (r)
879		- return r;
880		-
881		- r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false);
882		- if (r)
883		- return r;
884		-
885		- if (amdgpu_vm_debug) {
886		- /* Invalidate all BOs to test for userspace bugs */
887		- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
888		- /* ignore duplicates */
889		- if (!e->robj)
890		- continue;
891		-
892		- amdgpu_vm_bo_invalidate(adev, e->robj, false);
893		- }
894		- }
895		-
896		- return r;
897		-}
898		-
899		-static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
900		- struct amdgpu_cs_parser *p)
901		-{
902		- struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
903		- struct amdgpu_vm *vm = &fpriv->vm;
904		- struct amdgpu_ring *ring = p->ring;
905		- int r;
906		-
907	731	/* Only for UVD/VCE VM emulation */
908		- if (p->ring->funcs->parse_cs \|\| p->ring->funcs->patch_cs_in_place) {
	732	+ if (ring->funcs->parse_cs \|\| ring->funcs->patch_cs_in_place) {
909	733	unsigned i, j;
910	734
911	735	for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
..	..	@@ -924,7 +748,7 @@
924	748	if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
925	749	continue;
926	750
927		- va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK;
	751	+ va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
928	752	r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
929	753	if (r) {
930	754	DRM_ERROR("IB va_start is invalid\n");
..	..	@@ -946,7 +770,7 @@
946	770	offset = m->start * AMDGPU_GPU_PAGE_SIZE;
947	771	kptr += va_start - offset;
948	772
949		- if (p->ring->funcs->parse_cs) {
	773	+ if (ring->funcs->parse_cs) {
950	774	memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
951	775	amdgpu_bo_kunmap(aobj);
952	776
..	..	@@ -965,16 +789,78 @@
965	789	}
966	790	}
967	791
968		- if (p->job->vm) {
969		- p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
	792	+ if (!p->job->vm)
	793	+ return amdgpu_cs_sync_rings(p);
970	794
971		- r = amdgpu_bo_vm_update_pte(p);
	795	+
	796	+ r = amdgpu_vm_clear_freed(adev, vm, NULL);
	797	+ if (r)
	798	+ return r;
	799	+
	800	+ r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
	801	+ if (r)
	802	+ return r;
	803	+
	804	+ r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
	805	+ if (r)
	806	+ return r;
	807	+
	808	+ if (amdgpu_mcbp \|\| amdgpu_sriov_vf(adev)) {
	809	+ bo_va = fpriv->csa_va;
	810	+ BUG_ON(!bo_va);
	811	+ r = amdgpu_vm_bo_update(adev, bo_va, false);
972	812	if (r)
973	813	return r;
974	814
975		- r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
	815	+ r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
976	816	if (r)
977	817	return r;
	818	+ }
	819	+
	820	+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
	821	+ /* ignore duplicates */
	822	+ bo = ttm_to_amdgpu_bo(e->tv.bo);
	823	+ if (!bo)
	824	+ continue;
	825	+
	826	+ bo_va = e->bo_va;
	827	+ if (bo_va == NULL)
	828	+ continue;
	829	+
	830	+ r = amdgpu_vm_bo_update(adev, bo_va, false);
	831	+ if (r)
	832	+ return r;
	833	+
	834	+ r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
	835	+ if (r)
	836	+ return r;
	837	+ }
	838	+
	839	+ r = amdgpu_vm_handle_moved(adev, vm);
	840	+ if (r)
	841	+ return r;
	842	+
	843	+ r = amdgpu_vm_update_pdes(adev, vm, false);
	844	+ if (r)
	845	+ return r;
	846	+
	847	+ r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
	848	+ if (r)
	849	+ return r;
	850	+
	851	+ p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
	852	+
	853	+ if (amdgpu_vm_debug) {
	854	+ /* Invalidate all BOs to test for userspace bugs */
	855	+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
	856	+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
	857	+
	858	+ /* ignore duplicates */
	859	+ if (!bo)
	860	+ continue;
	861	+
	862	+ amdgpu_vm_bo_invalidate(adev, bo, false);
	863	+ }
978	864	}
979	865
980	866	return amdgpu_cs_sync_rings(p);
..	..	@@ -985,14 +871,15 @@
985	871	{
986	872	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
987	873	struct amdgpu_vm *vm = &fpriv->vm;
988		- int i, j;
989	874	int r, ce_preempt = 0, de_preempt = 0;
	875	+ struct amdgpu_ring *ring;
	876	+ int i, j;
990	877
991	878	for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
992	879	struct amdgpu_cs_chunk *chunk;
993	880	struct amdgpu_ib *ib;
994	881	struct drm_amdgpu_cs_chunk_ib *chunk_ib;
995		- struct amdgpu_ring *ring;
	882	+ struct drm_sched_entity *entity;
996	883
997	884	chunk = &parser->chunks[i];
998	885	ib = &parser->job->ibs[j];
..	..	@@ -1001,7 +888,8 @@
1001	888	if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
1002	889	continue;
1003	890
1004		- if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && amdgpu_sriov_vf(adev)) {
	891	+ if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
	892	+ (amdgpu_mcbp \|\| amdgpu_sriov_vf(adev))) {
1005	893	if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
1006	894	if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
1007	895	ce_preempt++;
..	..	@@ -1014,8 +902,9 @@
1014	902	return -EINVAL;
1015	903	}
1016	904
1017		- r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type,
1018		- chunk_ib->ip_instance, chunk_ib->ring, &ring);
	905	+ r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
	906	+ chunk_ib->ip_instance, chunk_ib->ring,
	907	+ &entity);
1019	908	if (r)
1020	909	return r;
1021	910
..	..	@@ -1023,14 +912,20 @@
1023	912	parser->job->preamble_status \|=
1024	913	AMDGPU_PREAMBLE_IB_PRESENT;
1025	914
1026		- if (parser->ring && parser->ring != ring)
	915	+ if (parser->entity && parser->entity != entity)
1027	916	return -EINVAL;
1028	917
1029		- parser->ring = ring;
	918	+ /* Return if there is no run queue associated with this entity.
	919	+ * Possibly because of disabled HW IP*/
	920	+ if (entity->rq == NULL)
	921	+ return -EINVAL;
1030	922
1031		- r = amdgpu_ib_get(adev, vm,
1032		- ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
1033		- ib);
	923	+ parser->entity = entity;
	924	+
	925	+ ring = to_amdgpu_ring(entity->rq->sched);
	926	+ r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
	927	+ chunk_ib->ib_bytes : 0,
	928	+ AMDGPU_IB_POOL_DELAYED, ib);
1034	929	if (r) {
1035	930	DRM_ERROR("Failed to get ib !\n");
1036	931	return r;
..	..	@@ -1043,13 +938,12 @@
1043	938	j++;
1044	939	}
1045	940
1046		- /* UVD & VCE fw doesn't support user fences */
1047		- if (parser->job->uf_addr && (
1048		- parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD \|\|
1049		- parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
	941	+ /* MM engine doesn't support user fences */
	942	+ ring = to_amdgpu_ring(parser->entity->rq->sched);
	943	+ if (parser->job->uf_addr && ring->funcs->no_user_fence)
1050	944	return -EINVAL;
1051	945
1052		- return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx);
	946	+ return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
1053	947	}
1054	948
1055	949	static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
..	..	@@ -1065,51 +959,62 @@
1065	959	sizeof(struct drm_amdgpu_cs_chunk_dep);
1066	960
1067	961	for (i = 0; i < num_deps; ++i) {
1068		- struct amdgpu_ring *ring;
1069	962	struct amdgpu_ctx *ctx;
	963	+ struct drm_sched_entity *entity;
1070	964	struct dma_fence *fence;
1071	965
1072	966	ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
1073	967	if (ctx == NULL)
1074	968	return -EINVAL;
1075	969
1076		- r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr,
1077		- deps[i].ip_type,
1078		- deps[i].ip_instance,
1079		- deps[i].ring, &ring);
	970	+ r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
	971	+ deps[i].ip_instance,
	972	+ deps[i].ring, &entity);
1080	973	if (r) {
1081	974	amdgpu_ctx_put(ctx);
1082	975	return r;
1083	976	}
1084	977
1085		- fence = amdgpu_ctx_get_fence(ctx, ring,
1086		- deps[i].handle);
1087		- if (IS_ERR(fence)) {
1088		- r = PTR_ERR(fence);
1089		- amdgpu_ctx_put(ctx);
1090		- return r;
1091		- } else if (fence) {
1092		- r = amdgpu_sync_fence(p->adev, &p->job->sync, fence,
1093		- true);
1094		- dma_fence_put(fence);
1095		- amdgpu_ctx_put(ctx);
1096		- if (r)
1097		- return r;
	978	+ fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
	979	+ amdgpu_ctx_put(ctx);
	980	+
	981	+ if (IS_ERR(fence))
	982	+ return PTR_ERR(fence);
	983	+ else if (!fence)
	984	+ continue;
	985	+
	986	+ if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
	987	+ struct drm_sched_fence *s_fence;
	988	+ struct dma_fence *old = fence;
	989	+
	990	+ s_fence = to_drm_sched_fence(fence);
	991	+ fence = dma_fence_get(&s_fence->scheduled);
	992	+ dma_fence_put(old);
1098	993	}
	994	+
	995	+ r = amdgpu_sync_fence(&p->job->sync, fence);
	996	+ dma_fence_put(fence);
	997	+ if (r)
	998	+ return r;
1099	999	}
1100	1000	return 0;
1101	1001	}
1102	1002
1103	1003	static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
1104		- uint32_t handle)
	1004	+ uint32_t handle, u64 point,
	1005	+ u64 flags)
1105	1006	{
1106		- int r;
1107	1007	struct dma_fence *fence;
1108		- r = drm_syncobj_find_fence(p->filp, handle, &fence);
1109		- if (r)
1110		- return r;
	1008	+ int r;
1111	1009
1112		- r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
	1010	+ r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
	1011	+ if (r) {
	1012	+ DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
	1013	+ handle, point, r);
	1014	+ return r;
	1015	+ }
	1016	+
	1017	+ r = amdgpu_sync_fence(&p->job->sync, fence);
1113	1018	dma_fence_put(fence);
1114	1019
1115	1020	return r;
..	..	@@ -1118,46 +1023,123 @@
1118	1023	static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
1119	1024	struct amdgpu_cs_chunk *chunk)
1120	1025	{
	1026	+ struct drm_amdgpu_cs_chunk_sem *deps;
1121	1027	unsigned num_deps;
1122	1028	int i, r;
1123		- struct drm_amdgpu_cs_chunk_sem *deps;
1124	1029
1125	1030	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1126	1031	num_deps = chunk->length_dw * 4 /
1127	1032	sizeof(struct drm_amdgpu_cs_chunk_sem);
1128		-
1129	1033	for (i = 0; i < num_deps; ++i) {
1130		- r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
	1034	+ r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
	1035	+ 0, 0);
1131	1036	if (r)
1132	1037	return r;
1133	1038	}
	1039	+
	1040	+ return 0;
	1041	+}
	1042	+
	1043	+
	1044	+static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
	1045	+ struct amdgpu_cs_chunk *chunk)
	1046	+{
	1047	+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
	1048	+ unsigned num_deps;
	1049	+ int i, r;
	1050	+
	1051	+ syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
	1052	+ num_deps = chunk->length_dw * 4 /
	1053	+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
	1054	+ for (i = 0; i < num_deps; ++i) {
	1055	+ r = amdgpu_syncobj_lookup_and_add_to_sync(p,
	1056	+ syncobj_deps[i].handle,
	1057	+ syncobj_deps[i].point,
	1058	+ syncobj_deps[i].flags);
	1059	+ if (r)
	1060	+ return r;
	1061	+ }
	1062	+
1134	1063	return 0;
1135	1064	}
1136	1065
1137	1066	static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
1138	1067	struct amdgpu_cs_chunk *chunk)
1139	1068	{
	1069	+ struct drm_amdgpu_cs_chunk_sem *deps;
1140	1070	unsigned num_deps;
1141	1071	int i;
1142		- struct drm_amdgpu_cs_chunk_sem *deps;
	1072	+
1143	1073	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1144	1074	num_deps = chunk->length_dw * 4 /
1145	1075	sizeof(struct drm_amdgpu_cs_chunk_sem);
1146	1076
1147		- p->post_dep_syncobjs = kmalloc_array(num_deps,
1148		- sizeof(struct drm_syncobj *),
1149		- GFP_KERNEL);
1150		- p->num_post_dep_syncobjs = 0;
	1077	+ if (p->post_deps)
	1078	+ return -EINVAL;
1151	1079
1152		- if (!p->post_dep_syncobjs)
	1080	+ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
	1081	+ GFP_KERNEL);
	1082	+ p->num_post_deps = 0;
	1083	+
	1084	+ if (!p->post_deps)
	1085	+ return -ENOMEM;
	1086	+
	1087	+
	1088	+ for (i = 0; i < num_deps; ++i) {
	1089	+ p->post_deps[i].syncobj =
	1090	+ drm_syncobj_find(p->filp, deps[i].handle);
	1091	+ if (!p->post_deps[i].syncobj)
	1092	+ return -EINVAL;
	1093	+ p->post_deps[i].chain = NULL;
	1094	+ p->post_deps[i].point = 0;
	1095	+ p->num_post_deps++;
	1096	+ }
	1097	+
	1098	+ return 0;
	1099	+}
	1100	+
	1101	+
	1102	+static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
	1103	+ struct amdgpu_cs_chunk *chunk)
	1104	+{
	1105	+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
	1106	+ unsigned num_deps;
	1107	+ int i;
	1108	+
	1109	+ syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
	1110	+ num_deps = chunk->length_dw * 4 /
	1111	+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
	1112	+
	1113	+ if (p->post_deps)
	1114	+ return -EINVAL;
	1115	+
	1116	+ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
	1117	+ GFP_KERNEL);
	1118	+ p->num_post_deps = 0;
	1119	+
	1120	+ if (!p->post_deps)
1153	1121	return -ENOMEM;
1154	1122
1155	1123	for (i = 0; i < num_deps; ++i) {
1156		- p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
1157		- if (!p->post_dep_syncobjs[i])
	1124	+ struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
	1125	+
	1126	+ dep->chain = NULL;
	1127	+ if (syncobj_deps[i].point) {
	1128	+ dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
	1129	+ if (!dep->chain)
	1130	+ return -ENOMEM;
	1131	+ }
	1132	+
	1133	+ dep->syncobj = drm_syncobj_find(p->filp,
	1134	+ syncobj_deps[i].handle);
	1135	+ if (!dep->syncobj) {
	1136	+ kfree(dep->chain);
1158	1137	return -EINVAL;
1159		- p->num_post_dep_syncobjs++;
	1138	+ }
	1139	+ dep->point = syncobj_deps[i].point;
	1140	+ p->num_post_deps++;
1160	1141	}
	1142	+
1161	1143	return 0;
1162	1144	}
1163	1145
..	..	@@ -1171,18 +1153,33 @@
1171	1153
1172	1154	chunk = &p->chunks[i];
1173	1155
1174		- if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
	1156	+ switch (chunk->chunk_id) {
	1157	+ case AMDGPU_CHUNK_ID_DEPENDENCIES:
	1158	+ case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
1175	1159	r = amdgpu_cs_process_fence_dep(p, chunk);
1176	1160	if (r)
1177	1161	return r;
1178		- } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
	1162	+ break;
	1163	+ case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
1179	1164	r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
1180	1165	if (r)
1181	1166	return r;
1182		- } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
	1167	+ break;
	1168	+ case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
1183	1169	r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
1184	1170	if (r)
1185	1171	return r;
	1172	+ break;
	1173	+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
	1174	+ r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
	1175	+ if (r)
	1176	+ return r;
	1177	+ break;
	1178	+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
	1179	+ r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
	1180	+ if (r)
	1181	+ return r;
	1182	+ break;
1186	1183	}
1187	1184	}
1188	1185
..	..	@@ -1193,53 +1190,58 @@
1193	1190	{
1194	1191	int i;
1195	1192
1196		- for (i = 0; i < p->num_post_dep_syncobjs; ++i)
1197		- drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
	1193	+ for (i = 0; i < p->num_post_deps; ++i) {
	1194	+ if (p->post_deps[i].chain && p->post_deps[i].point) {
	1195	+ drm_syncobj_add_point(p->post_deps[i].syncobj,
	1196	+ p->post_deps[i].chain,
	1197	+ p->fence, p->post_deps[i].point);
	1198	+ p->post_deps[i].chain = NULL;
	1199	+ } else {
	1200	+ drm_syncobj_replace_fence(p->post_deps[i].syncobj,
	1201	+ p->fence);
	1202	+ }
	1203	+ }
1198	1204	}
1199	1205
1200	1206	static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1201	1207	union drm_amdgpu_cs *cs)
1202	1208	{
1203	1209	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1204		- struct amdgpu_ring *ring = p->ring;
1205		- struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
1206		- enum drm_sched_priority priority;
	1210	+ struct drm_sched_entity *entity = p->entity;
1207	1211	struct amdgpu_bo_list_entry *e;
1208	1212	struct amdgpu_job *job;
1209	1213	uint64_t seq;
1210		-
1211	1214	int r;
1212	1215
1213	1216	job = p->job;
1214	1217	p->job = NULL;
1215	1218
1216		- r = drm_sched_job_init(&job->base, entity, p->filp);
	1219	+ r = drm_sched_job_init(&job->base, entity, &fpriv->vm);
1217	1220	if (r)
1218	1221	goto error_unlock;
1219	1222
1220		- /* No memory allocation is allowed while holding the mn lock */
1221		- amdgpu_mn_lock(p->mn);
1222		- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1223		- struct amdgpu_bo *bo = e->robj;
	1223	+ /* No memory allocation is allowed while holding the notifier lock.
	1224	+ * The lock is held until amdgpu_cs_submit is finished and fence is
	1225	+ * added to BOs.
	1226	+ */
	1227	+ mutex_lock(&p->adev->notifier_lock);
1224	1228
1225		- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
1226		- r = -ERESTARTSYS;
1227		- goto error_abort;
1228		- }
	1229	+ /* If userptr are invalidated after amdgpu_cs_parser_bos(), return
	1230	+ * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
	1231	+ */
	1232	+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
	1233	+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
	1234	+
	1235	+ r \|= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
	1236	+ }
	1237	+ if (r) {
	1238	+ r = -EAGAIN;
	1239	+ goto error_abort;
1229	1240	}
1230	1241
1231		- job->owner = p->filp;
1232	1242	p->fence = dma_fence_get(&job->base.s_fence->finished);
1233	1243
1234		- r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
1235		- if (r) {
1236		- dma_fence_put(p->fence);
1237		- dma_fence_put(&job->base.s_fence->finished);
1238		- amdgpu_job_free(job);
1239		- amdgpu_mn_unlock(p->mn);
1240		- return r;
1241		- }
1242		-
	1244	+ amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
1243	1245	amdgpu_cs_post_dependencies(p);
1244	1246
1245	1247	if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
..	..	@@ -1255,34 +1257,45 @@
1255	1257
1256	1258	trace_amdgpu_cs_ioctl(job);
1257	1259	amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
1258		- priority = job->base.s_priority;
1259	1260	drm_sched_entity_push_job(&job->base, entity);
1260	1261
1261		- ring = to_amdgpu_ring(entity->rq->sched);
1262		- amdgpu_ring_priority_get(ring, priority);
	1262	+ amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
1263	1263
1264	1264	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
1265		- amdgpu_mn_unlock(p->mn);
	1265	+ mutex_unlock(&p->adev->notifier_lock);
1266	1266
1267	1267	return 0;
1268	1268
1269	1269	error_abort:
1270		- dma_fence_put(&job->base.s_fence->finished);
1271		- job->base.s_fence = NULL;
1272		- amdgpu_mn_unlock(p->mn);
	1270	+ drm_sched_job_cleanup(&job->base);
	1271	+ mutex_unlock(&p->adev->notifier_lock);
1273	1272
1274	1273	error_unlock:
1275	1274	amdgpu_job_free(job);
1276	1275	return r;
1277	1276	}
1278	1277
	1278	+static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
	1279	+{
	1280	+ int i;
	1281	+
	1282	+ if (!trace_amdgpu_cs_enabled())
	1283	+ return;
	1284	+
	1285	+ for (i = 0; i < parser->job->num_ibs; i++)
	1286	+ trace_amdgpu_cs(parser, i);
	1287	+}
	1288	+
1279	1289	int amdgpu_cs_ioctl(struct drm_device dev, void data, struct drm_file *filp)
1280	1290	{
1281		- struct amdgpu_device *adev = dev->dev_private;
	1291	+ struct amdgpu_device *adev = drm_to_adev(dev);
1282	1292	union drm_amdgpu_cs *cs = data;
1283	1293	struct amdgpu_cs_parser parser = {};
1284	1294	bool reserved_buffers = false;
1285		- int i, r;
	1295	+ int r;
	1296	+
	1297	+ if (amdgpu_ras_intr_triggered())
	1298	+ return -EHWPOISON;
1286	1299
1287	1300	if (!adev->accel_working)
1288	1301	return -EBUSY;
..	..	@@ -1292,7 +1305,8 @@
1292	1305
1293	1306	r = amdgpu_cs_parser_init(&parser, data);
1294	1307	if (r) {
1295		- DRM_ERROR("Failed to initialize parser !\n");
	1308	+ if (printk_ratelimit())
	1309	+ DRM_ERROR("Failed to initialize parser %d!\n", r);
1296	1310	goto out;
1297	1311	}
1298	1312
..	..	@@ -1300,27 +1314,26 @@
1300	1314	if (r)
1301	1315	goto out;
1302	1316
1303		- r = amdgpu_cs_parser_bos(&parser, data);
1304		- if (r) {
1305		- if (r == -ENOMEM)
1306		- DRM_ERROR("Not enough memory for command submission!\n");
1307		- else if (r != -ERESTARTSYS)
1308		- DRM_ERROR("Failed to process the buffer list %d!\n", r);
1309		- goto out;
1310		- }
1311		-
1312		- reserved_buffers = true;
1313		-
1314	1317	r = amdgpu_cs_dependencies(adev, &parser);
1315	1318	if (r) {
1316	1319	DRM_ERROR("Failed in the dependencies handling %d!\n", r);
1317	1320	goto out;
1318	1321	}
1319	1322
1320		- for (i = 0; i < parser.job->num_ibs; i++)
1321		- trace_amdgpu_cs(&parser, i);
	1323	+ r = amdgpu_cs_parser_bos(&parser, data);
	1324	+ if (r) {
	1325	+ if (r == -ENOMEM)
	1326	+ DRM_ERROR("Not enough memory for command submission!\n");
	1327	+ else if (r != -ERESTARTSYS && r != -EAGAIN)
	1328	+ DRM_ERROR("Failed to process the buffer list %d!\n", r);
	1329	+ goto out;
	1330	+ }
1322	1331
1323		- r = amdgpu_cs_ib_vm_chunk(adev, &parser);
	1332	+ reserved_buffers = true;
	1333	+
	1334	+ trace_amdgpu_cs_ibs(&parser);
	1335	+
	1336	+ r = amdgpu_cs_vm_handling(&parser);
1324	1337	if (r)
1325	1338	goto out;
1326	1339
..	..	@@ -1328,6 +1341,7 @@
1328	1341
1329	1342	out:
1330	1343	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
	1344	+
1331	1345	return r;
1332	1346	}
1333	1347
..	..	@@ -1344,9 +1358,8 @@
1344	1358	struct drm_file *filp)
1345	1359	{
1346	1360	union drm_amdgpu_wait_cs *wait = data;
1347		- struct amdgpu_device *adev = dev->dev_private;
1348	1361	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
1349		- struct amdgpu_ring *ring = NULL;
	1362	+ struct drm_sched_entity *entity;
1350	1363	struct amdgpu_ctx *ctx;
1351	1364	struct dma_fence *fence;
1352	1365	long r;
..	..	@@ -1355,15 +1368,14 @@
1355	1368	if (ctx == NULL)
1356	1369	return -EINVAL;
1357	1370
1358		- r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
1359		- wait->in.ip_type, wait->in.ip_instance,
1360		- wait->in.ring, &ring);
	1371	+ r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
	1372	+ wait->in.ring, &entity);
1361	1373	if (r) {
1362	1374	amdgpu_ctx_put(ctx);
1363	1375	return r;
1364	1376	}
1365	1377
1366		- fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
	1378	+ fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
1367	1379	if (IS_ERR(fence))
1368	1380	r = PTR_ERR(fence);
1369	1381	else if (fence) {
..	..	@@ -1395,7 +1407,7 @@
1395	1407	struct drm_file *filp,
1396	1408	struct drm_amdgpu_fence *user)
1397	1409	{
1398		- struct amdgpu_ring *ring;
	1410	+ struct drm_sched_entity *entity;
1399	1411	struct amdgpu_ctx *ctx;
1400	1412	struct dma_fence *fence;
1401	1413	int r;
..	..	@@ -1404,14 +1416,14 @@
1404	1416	if (ctx == NULL)
1405	1417	return ERR_PTR(-EINVAL);
1406	1418
1407		- r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type,
1408		- user->ip_instance, user->ring, &ring);
	1419	+ r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
	1420	+ user->ring, &entity);
1409	1421	if (r) {
1410	1422	amdgpu_ctx_put(ctx);
1411	1423	return ERR_PTR(r);
1412	1424	}
1413	1425
1414		- fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
	1426	+ fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
1415	1427	amdgpu_ctx_put(ctx);
1416	1428
1417	1429	return fence;
..	..	@@ -1420,7 +1432,7 @@
1420	1432	int amdgpu_cs_fence_to_handle_ioctl(struct drm_device dev, void data,
1421	1433	struct drm_file *filp)
1422	1434	{
1423		- struct amdgpu_device *adev = dev->dev_private;
	1435	+ struct amdgpu_device *adev = drm_to_adev(dev);
1424	1436	union drm_amdgpu_fence_to_handle *info = data;
1425	1437	struct dma_fence *fence;
1426	1438	struct drm_syncobj *syncobj;
..	..	@@ -1430,6 +1442,9 @@
1430	1442	fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
1431	1443	if (IS_ERR(fence))
1432	1444	return PTR_ERR(fence);
	1445	+
	1446	+ if (!fence)
	1447	+ fence = dma_fence_get_stub();
1433	1448
1434	1449	switch (info->in.what) {
1435	1450	case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
..	..	@@ -1469,6 +1484,7 @@
1469	1484	return 0;
1470	1485
1471	1486	default:
	1487	+ dma_fence_put(fence);
1472	1488	return -EINVAL;
1473	1489	}
1474	1490	}
..	..	@@ -1593,7 +1609,7 @@
1593	1609	int amdgpu_cs_wait_fences_ioctl(struct drm_device dev, void data,
1594	1610	struct drm_file *filp)
1595	1611	{
1596		- struct amdgpu_device *adev = dev->dev_private;
	1612	+ struct amdgpu_device *adev = drm_to_adev(dev);
1597	1613	union drm_amdgpu_wait_fences *wait = data;
1598	1614	uint32_t fence_count = wait->in.fence_count;
1599	1615	struct drm_amdgpu_fence *fences_user;
..	..	@@ -1655,7 +1671,7 @@
1655	1671	*map = mapping;
1656	1672
1657	1673	/* Double check that the BO is reserved by this CS */
1658		- if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket)
	1674	+ if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket)
1659	1675	return -EINVAL;
1660	1676
1661	1677	if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {