~hc/RK356X_SDK_RELEASE.git

..	..	@@ -24,53 +24,49 @@
24	24	* Authors:
25	25	* Jerome Glisse <glisse@freedesktop.org>
26	26	*/
	27	+
	28	+#include <linux/file.h>
27	29	#include <linux/pagemap.h>
28	30	#include <linux/sync_file.h>
29		-#include <drm/drmP.h>
	31	+#include <linux/dma-buf.h>
	32	+
30	33	#include <drm/amdgpu_drm.h>
31	34	#include <drm/drm_syncobj.h>
32	35	#include "amdgpu.h"
33	36	#include "amdgpu_trace.h"
34	37	#include "amdgpu_gmc.h"
	38	+#include "amdgpu_gem.h"
	39	+#include "amdgpu_ras.h"
35	40
36	41	static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
37	42	struct drm_amdgpu_cs_chunk_fence *data,
38	43	uint32_t *offset)
39	44	{
40	45	struct drm_gem_object *gobj;
	46	+ struct amdgpu_bo *bo;
41	47	unsigned long size;
42		- int r;
43	48
44	49	gobj = drm_gem_object_lookup(p->filp, data->handle);
45	50	if (gobj == NULL)
46	51	return -EINVAL;
47	52
48		- p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
	53	+ bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
49	54	p->uf_entry.priority = 0;
50		- p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
51		- p->uf_entry.tv.shared = true;
52		- p->uf_entry.user_pages = NULL;
	55	+ p->uf_entry.tv.bo = &bo->tbo;
	56	+ /* One for TTM and one for the CS job */
	57	+ p->uf_entry.tv.num_shared = 2;
53	58
54		- drm_gem_object_put_unlocked(gobj);
	59	+ drm_gem_object_put(gobj);
55	60
56		- size = amdgpu_bo_size(p->uf_entry.robj);
57		- if (size != PAGE_SIZE \|\| (data->offset + 8) > size) {
58		- r = -EINVAL;
59		- goto error_unref;
60		- }
	61	+ size = amdgpu_bo_size(bo);
	62	+ if (size != PAGE_SIZE \|\| data->offset > (size - 8))
	63	+ return -EINVAL;
61	64
62		- if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
63		- r = -EINVAL;
64		- goto error_unref;
65		- }
	65	+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
	66	+ return -EINVAL;
66	67
67	68	*offset = data->offset;
68		-
69	69	return 0;
70		-
71		-error_unref:
72		- amdgpu_bo_unref(&p->uf_entry.robj);
73		- return r;
74	70	}
75	71
76	72	static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
..	..	@@ -110,7 +106,7 @@
110	106	int ret;
111	107
112	108	if (cs->in.num_chunks == 0)
113		- return 0;
	109	+ return -EINVAL;
114	110
115	111	chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
116	112	if (!chunk_array)
..	..	@@ -211,6 +207,9 @@
211	207	case AMDGPU_CHUNK_ID_DEPENDENCIES:
212	208	case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
213	209	case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
	210	+ case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
	211	+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
	212	+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
214	213	break;
215	214
216	215	default:
..	..	@@ -228,7 +227,7 @@
228	227	goto free_all_kdata;
229	228	}
230	229
231		- if (p->uf_entry.robj)
	230	+ if (p->uf_entry.tv.bo)
232	231	p->job->uf_addr = uf_offset;
233	232	kfree(chunk_array);
234	233
..	..	@@ -290,7 +289,7 @@
290	289	{
291	290	s64 time_us, increment_us;
292	291	u64 free_vram, total_vram, used_vram;
293		-
	292	+ struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
294	293	/* Allow a maximum of 200 accumulated ms. This is basically per-IB
295	294	* throttling.
296	295	*
..	..	@@ -307,7 +306,7 @@
307	306	}
308	307
309	308	total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
310		- used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
	309	+ used_vram = amdgpu_vram_mgr_usage(vram_man);
311	310	free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
312	311
313	312	spin_lock(&adev->mm_stats.lock);
..	..	@@ -354,7 +353,7 @@
354	353	if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
355	354	u64 total_vis_vram = adev->gmc.visible_vram_size;
356	355	u64 used_vis_vram =
357		- amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
	356	+ amdgpu_vram_mgr_vis_usage(vram_man);
358	357
359	358	if (used_vis_vram < total_vis_vram) {
360	359	u64 free_vis_vram = total_vis_vram - used_vis_vram;
..	..	@@ -395,7 +394,7 @@
395	394	struct ttm_operation_ctx ctx = {
396	395	.interruptible = true,
397	396	.no_wait_gpu = false,
398		- .resv = bo->tbo.resv,
	397	+ .resv = bo->tbo.base.resv,
399	398	.flags = 0
400	399	};
401	400	uint32_t domain;
..	..	@@ -407,7 +406,9 @@
407	406	/* Don't move this buffer if we have depleted our allowance
408	407	* to move it. Don't move anything if the threshold is zero.
409	408	*/
410		- if (p->bytes_moved < p->bytes_moved_threshold) {
	409	+ if (p->bytes_moved < p->bytes_moved_threshold &&
	410	+ (!bo->tbo.base.dma_buf \|\|
	411	+ list_empty(&bo->tbo.base.dma_buf->attachments))) {
411	412	if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
412	413	(bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
413	414	/* And don't move a CPU_ACCESS_REQUIRED BO to limited
..	..	@@ -442,75 +443,12 @@
442	443	return r;
443	444	}
444	445
445		-/* Last resort, try to evict something from the current working set */
446		-static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
447		- struct amdgpu_bo *validated)
448		-{
449		- uint32_t domain = validated->allowed_domains;
450		- struct ttm_operation_ctx ctx = { true, false };
451		- int r;
452		-
453		- if (!p->evictable)
454		- return false;
455		-
456		- for (;&p->evictable->tv.head != &p->validated;
457		- p->evictable = list_prev_entry(p->evictable, tv.head)) {
458		-
459		- struct amdgpu_bo_list_entry *candidate = p->evictable;
460		- struct amdgpu_bo *bo = candidate->robj;
461		- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
462		- bool update_bytes_moved_vis;
463		- uint32_t other;
464		-
465		- /* If we reached our current BO we can forget it */
466		- if (candidate->robj == validated)
467		- break;
468		-
469		- /* We can't move pinned BOs here */
470		- if (bo->pin_count)
471		- continue;
472		-
473		- other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
474		-
475		- /* Check if this BO is in one of the domains we need space for */
476		- if (!(other & domain))
477		- continue;
478		-
479		- /* Check if we can move this BO somewhere else */
480		- other = bo->allowed_domains & ~domain;
481		- if (!other)
482		- continue;
483		-
484		- /* Good we can try to move this BO somewhere else */
485		- update_bytes_moved_vis =
486		- !amdgpu_gmc_vram_full_visible(&adev->gmc) &&
487		- amdgpu_bo_in_cpu_visible_vram(bo);
488		- amdgpu_bo_placement_from_domain(bo, other);
489		- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
490		- p->bytes_moved += ctx.bytes_moved;
491		- if (update_bytes_moved_vis)
492		- p->bytes_moved_vis += ctx.bytes_moved;
493		-
494		- if (unlikely(r))
495		- break;
496		-
497		- p->evictable = list_prev_entry(p->evictable, tv.head);
498		- list_move(&candidate->tv.head, &p->validated);
499		-
500		- return true;
501		- }
502		-
503		- return false;
504		-}
505		-
506	446	static int amdgpu_cs_validate(void param, struct amdgpu_bo bo)
507	447	{
508	448	struct amdgpu_cs_parser *p = param;
509	449	int r;
510	450
511		- do {
512		- r = amdgpu_cs_bo_validate(p, bo);
513		- } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
	451	+ r = amdgpu_cs_bo_validate(p, bo);
514	452	if (r)
515	453	return r;
516	454
..	..	@@ -528,38 +466,31 @@
528	466	int r;
529	467
530	468	list_for_each_entry(lobj, validated, tv.head) {
531		- struct amdgpu_bo *bo = lobj->robj;
532		- bool binding_userptr = false;
	469	+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
533	470	struct mm_struct *usermm;
534	471
535	472	usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
536	473	if (usermm && usermm != current->mm)
537	474	return -EPERM;
538	475
539		- /* Check if we have user pages and nobody bound the BO already */
540		- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
541		- lobj->user_pages) {
	476	+ if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
	477	+ lobj->user_invalidated && lobj->user_pages) {
542	478	amdgpu_bo_placement_from_domain(bo,
543	479	AMDGPU_GEM_DOMAIN_CPU);
544	480	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
545	481	if (r)
546	482	return r;
	483	+
547	484	amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
548	485	lobj->user_pages);
549		- binding_userptr = true;
550	486	}
551		-
552		- if (p->evictable == lobj)
553		- p->evictable = NULL;
554	487
555	488	r = amdgpu_cs_validate(p, bo);
556	489	if (r)
557	490	return r;
558	491
559		- if (binding_userptr) {
560		- kvfree(lobj->user_pages);
561		- lobj->user_pages = NULL;
562		- }
	492	+ kvfree(lobj->user_pages);
	493	+ lobj->user_pages = NULL;
563	494	}
564	495	return 0;
565	496	}
..	..	@@ -574,7 +505,6 @@
574	505	struct amdgpu_bo *gds;
575	506	struct amdgpu_bo *gws;
576	507	struct amdgpu_bo *oa;
577		- unsigned tries = 10;
578	508	int r;
579	509
580	510	INIT_LIST_HEAD(&p->validated);
..	..	@@ -596,98 +526,63 @@
596	526	return r;
597	527	}
598	528
	529	+ /* One for TTM and one for the CS job */
	530	+ amdgpu_bo_list_for_each_entry(e, p->bo_list)
	531	+ e->tv.num_shared = 2;
	532	+
599	533	amdgpu_bo_list_get_list(p->bo_list, &p->validated);
600		- if (p->bo_list->first_userptr != p->bo_list->num_entries)
601		- p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
602	534
603	535	INIT_LIST_HEAD(&duplicates);
604	536	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
605	537
606		- if (p->uf_entry.robj && !p->uf_entry.robj->parent)
	538	+ if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
607	539	list_add(&p->uf_entry.tv.head, &p->validated);
608	540
609		- while (1) {
610		- struct list_head need_pages;
	541	+ /* Get userptr backing pages. If pages are updated after registered
	542	+ * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
	543	+ * amdgpu_ttm_backend_bind() to flush and invalidate new pages
	544	+ */
	545	+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
	546	+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
	547	+ bool userpage_invalidated = false;
	548	+ int i;
611	549
612		- r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
613		- &duplicates);
614		- if (unlikely(r != 0)) {
615		- if (r != -ERESTARTSYS)
616		- DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
617		- goto error_free_pages;
	550	+ e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
	551	+ sizeof(struct page *),
	552	+ GFP_KERNEL \| __GFP_ZERO);
	553	+ if (!e->user_pages) {
	554	+ DRM_ERROR("calloc failure\n");
	555	+ return -ENOMEM;
618	556	}
619	557
620		- INIT_LIST_HEAD(&need_pages);
621		- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
622		- struct amdgpu_bo *bo = e->robj;
623		-
624		- if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
625		- &e->user_invalidated) && e->user_pages) {
626		-
627		- /* We acquired a page array, but somebody
628		- * invalidated it. Free it and try again
629		- */
630		- release_pages(e->user_pages,
631		- bo->tbo.ttm->num_pages);
632		- kvfree(e->user_pages);
633		- e->user_pages = NULL;
634		- }
635		-
636		- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
637		- !e->user_pages) {
638		- list_del(&e->tv.head);
639		- list_add(&e->tv.head, &need_pages);
640		-
641		- amdgpu_bo_unreserve(e->robj);
642		- }
	558	+ r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
	559	+ if (r) {
	560	+ kvfree(e->user_pages);
	561	+ e->user_pages = NULL;
	562	+ return r;
643	563	}
644	564
645		- if (list_empty(&need_pages))
646		- break;
647		-
648		- /* Unreserve everything again. */
649		- ttm_eu_backoff_reservation(&p->ticket, &p->validated);
650		-
651		- /* We tried too many times, just abort */
652		- if (!--tries) {
653		- r = -EDEADLK;
654		- DRM_ERROR("deadlock in %s\n", __func__);
655		- goto error_free_pages;
656		- }
657		-
658		- /* Fill the page arrays for all userptrs. */
659		- list_for_each_entry(e, &need_pages, tv.head) {
660		- struct ttm_tt *ttm = e->robj->tbo.ttm;
661		-
662		- e->user_pages = kvmalloc_array(ttm->num_pages,
663		- sizeof(struct page*),
664		- GFP_KERNEL \| __GFP_ZERO);
665		- if (!e->user_pages) {
666		- r = -ENOMEM;
667		- DRM_ERROR("calloc failure in %s\n", __func__);
668		- goto error_free_pages;
669		- }
670		-
671		- r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
672		- if (r) {
673		- DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
674		- kvfree(e->user_pages);
675		- e->user_pages = NULL;
676		- goto error_free_pages;
	565	+ for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
	566	+ if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
	567	+ userpage_invalidated = true;
	568	+ break;
677	569	}
678	570	}
	571	+ e->user_invalidated = userpage_invalidated;
	572	+ }
679	573
680		- /* And try again. */
681		- list_splice(&need_pages, &p->validated);
	574	+ r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
	575	+ &duplicates);
	576	+ if (unlikely(r != 0)) {
	577	+ if (r != -ERESTARTSYS)
	578	+ DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
	579	+ goto out;
682	580	}
683	581
684	582	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
685	583	&p->bytes_moved_vis_threshold);
686	584	p->bytes_moved = 0;
687	585	p->bytes_moved_vis = 0;
688		- p->evictable = list_last_entry(&p->validated,
689		- struct amdgpu_bo_list_entry,
690		- tv.head);
691	586
692	587	r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
693	588	amdgpu_cs_validate, p);
..	..	@@ -697,16 +592,12 @@
697	592	}
698	593
699	594	r = amdgpu_cs_list_validate(p, &duplicates);
700		- if (r) {
701		- DRM_ERROR("amdgpu_cs_list_validate(duplicates) failed.\n");
	595	+ if (r)
702	596	goto error_validate;
703		- }
704	597
705	598	r = amdgpu_cs_list_validate(p, &p->validated);
706		- if (r) {
707		- DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n");
	599	+ if (r)
708	600	goto error_validate;
709		- }
710	601
711	602	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
712	603	p->bytes_moved_vis);
..	..	@@ -715,24 +606,30 @@
715	606	gws = p->bo_list->gws_obj;
716	607	oa = p->bo_list->oa_obj;
717	608
718		- amdgpu_bo_list_for_each_entry(e, p->bo_list)
719		- e->bo_va = amdgpu_vm_bo_find(vm, e->robj);
	609	+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
	610	+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
	611	+
	612	+ /* Make sure we use the exclusive slot for shared BOs */
	613	+ if (bo->prime_shared_count)
	614	+ e->tv.num_shared = 0;
	615	+ e->bo_va = amdgpu_vm_bo_find(vm, bo);
	616	+ }
720	617
721	618	if (gds) {
722		- p->job->gds_base = amdgpu_bo_gpu_offset(gds);
723		- p->job->gds_size = amdgpu_bo_size(gds);
	619	+ p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
	620	+ p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
724	621	}
725	622	if (gws) {
726		- p->job->gws_base = amdgpu_bo_gpu_offset(gws);
727		- p->job->gws_size = amdgpu_bo_size(gws);
	623	+ p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
	624	+ p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
728	625	}
729	626	if (oa) {
730		- p->job->oa_base = amdgpu_bo_gpu_offset(oa);
731		- p->job->oa_size = amdgpu_bo_size(oa);
	627	+ p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
	628	+ p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
732	629	}
733	630
734		- if (!r && p->uf_entry.robj) {
735		- struct amdgpu_bo *uf = p->uf_entry.robj;
	631	+ if (!r && p->uf_entry.tv.bo) {
	632	+ struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
736	633
737	634	r = amdgpu_ttm_alloc_gart(&uf->tbo);
738	635	p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
..	..	@@ -741,31 +638,25 @@
741	638	error_validate:
742	639	if (r)
743	640	ttm_eu_backoff_reservation(&p->ticket, &p->validated);
744		-
745		-error_free_pages:
746		-
747		- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
748		- if (!e->user_pages)
749		- continue;
750		-
751		- release_pages(e->user_pages,
752		- e->robj->tbo.ttm->num_pages);
753		- kvfree(e->user_pages);
754		- }
755		-
	641	+out:
756	642	return r;
757	643	}
758	644
759	645	static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
760	646	{
	647	+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
761	648	struct amdgpu_bo_list_entry *e;
762	649	int r;
763	650
764	651	list_for_each_entry(e, &p->validated, tv.head) {
765		- struct reservation_object *resv = e->robj->tbo.resv;
766		- r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
767		- amdgpu_bo_explicit_sync(e->robj));
	652	+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
	653	+ struct dma_resv *resv = bo->tbo.base.resv;
	654	+ enum amdgpu_sync_mode sync_mode;
768	655
	656	+ sync_mode = amdgpu_bo_explicit_sync(bo) ?
	657	+ AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
	658	+ r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
	659	+ &fpriv->vm);
769	660	if (r)
770	661	return r;
771	662	}
..	..	@@ -789,9 +680,11 @@
789	680	ttm_eu_backoff_reservation(&parser->ticket,
790	681	&parser->validated);
791	682
792		- for (i = 0; i < parser->num_post_dep_syncobjs; i++)
793		- drm_syncobj_put(parser->post_dep_syncobjs[i]);
794		- kfree(parser->post_dep_syncobjs);
	683	+ for (i = 0; i < parser->num_post_deps; i++) {
	684	+ drm_syncobj_put(parser->post_deps[i].syncobj);
	685	+ kfree(parser->post_deps[i].chain);
	686	+ }
	687	+ kfree(parser->post_deps);
795	688
796	689	dma_fence_put(parser->fence);
797	690
..	..	@@ -807,11 +700,16 @@
807	700	kfree(parser->chunks);
808	701	if (parser->job)
809	702	amdgpu_job_free(parser->job);
810		- amdgpu_bo_unref(&parser->uf_entry.robj);
	703	+ if (parser->uf_entry.tv.bo) {
	704	+ struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
	705	+
	706	+ amdgpu_bo_unref(&uf);
	707	+ }
811	708	}
812	709
813		-static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
	710	+static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
814	711	{
	712	+ struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
815	713	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
816	714	struct amdgpu_device *adev = p->adev;
817	715	struct amdgpu_vm *vm = &fpriv->vm;
..	..	@@ -820,92 +718,8 @@
820	718	struct amdgpu_bo *bo;
821	719	int r;
822	720
823		- r = amdgpu_vm_clear_freed(adev, vm, NULL);
824		- if (r)
825		- return r;
826		-
827		- r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
828		- if (r)
829		- return r;
830		-
831		- r = amdgpu_sync_fence(adev, &p->job->sync,
832		- fpriv->prt_va->last_pt_update, false);
833		- if (r)
834		- return r;
835		-
836		- if (amdgpu_sriov_vf(adev)) {
837		- struct dma_fence *f;
838		-
839		- bo_va = fpriv->csa_va;
840		- BUG_ON(!bo_va);
841		- r = amdgpu_vm_bo_update(adev, bo_va, false);
842		- if (r)
843		- return r;
844		-
845		- f = bo_va->last_pt_update;
846		- r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
847		- if (r)
848		- return r;
849		- }
850		-
851		- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
852		- struct dma_fence *f;
853		-
854		- /* ignore duplicates */
855		- bo = e->robj;
856		- if (!bo)
857		- continue;
858		-
859		- bo_va = e->bo_va;
860		- if (bo_va == NULL)
861		- continue;
862		-
863		- r = amdgpu_vm_bo_update(adev, bo_va, false);
864		- if (r)
865		- return r;
866		-
867		- f = bo_va->last_pt_update;
868		- r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
869		- if (r)
870		- return r;
871		- }
872		-
873		- r = amdgpu_vm_handle_moved(adev, vm);
874		- if (r)
875		- return r;
876		-
877		- r = amdgpu_vm_update_directories(adev, vm);
878		- if (r)
879		- return r;
880		-
881		- r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false);
882		- if (r)
883		- return r;
884		-
885		- if (amdgpu_vm_debug) {
886		- /* Invalidate all BOs to test for userspace bugs */
887		- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
888		- /* ignore duplicates */
889		- if (!e->robj)
890		- continue;
891		-
892		- amdgpu_vm_bo_invalidate(adev, e->robj, false);
893		- }
894		- }
895		-
896		- return r;
897		-}
898		-
899		-static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
900		- struct amdgpu_cs_parser *p)
901		-{
902		- struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
903		- struct amdgpu_vm *vm = &fpriv->vm;
904		- struct amdgpu_ring *ring = p->ring;
905		- int r;
906		-
907	721	/* Only for UVD/VCE VM emulation */
908		- if (p->ring->funcs->parse_cs \|\| p->ring->funcs->patch_cs_in_place) {
	722	+ if (ring->funcs->parse_cs \|\| ring->funcs->patch_cs_in_place) {
909	723	unsigned i, j;
910	724
911	725	for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
..	..	@@ -924,7 +738,7 @@
924	738	if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
925	739	continue;
926	740
927		- va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK;
	741	+ va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
928	742	r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
929	743	if (r) {
930	744	DRM_ERROR("IB va_start is invalid\n");
..	..	@@ -946,7 +760,7 @@
946	760	offset = m->start * AMDGPU_GPU_PAGE_SIZE;
947	761	kptr += va_start - offset;
948	762
949		- if (p->ring->funcs->parse_cs) {
	763	+ if (ring->funcs->parse_cs) {
950	764	memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
951	765	amdgpu_bo_kunmap(aobj);
952	766
..	..	@@ -965,16 +779,78 @@
965	779	}
966	780	}
967	781
968		- if (p->job->vm) {
969		- p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
	782	+ if (!p->job->vm)
	783	+ return amdgpu_cs_sync_rings(p);
970	784
971		- r = amdgpu_bo_vm_update_pte(p);
	785	+
	786	+ r = amdgpu_vm_clear_freed(adev, vm, NULL);
	787	+ if (r)
	788	+ return r;
	789	+
	790	+ r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
	791	+ if (r)
	792	+ return r;
	793	+
	794	+ r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
	795	+ if (r)
	796	+ return r;
	797	+
	798	+ if (amdgpu_mcbp \|\| amdgpu_sriov_vf(adev)) {
	799	+ bo_va = fpriv->csa_va;
	800	+ BUG_ON(!bo_va);
	801	+ r = amdgpu_vm_bo_update(adev, bo_va, false);
972	802	if (r)
973	803	return r;
974	804
975		- r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
	805	+ r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
976	806	if (r)
977	807	return r;
	808	+ }
	809	+
	810	+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
	811	+ /* ignore duplicates */
	812	+ bo = ttm_to_amdgpu_bo(e->tv.bo);
	813	+ if (!bo)
	814	+ continue;
	815	+
	816	+ bo_va = e->bo_va;
	817	+ if (bo_va == NULL)
	818	+ continue;
	819	+
	820	+ r = amdgpu_vm_bo_update(adev, bo_va, false);
	821	+ if (r)
	822	+ return r;
	823	+
	824	+ r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
	825	+ if (r)
	826	+ return r;
	827	+ }
	828	+
	829	+ r = amdgpu_vm_handle_moved(adev, vm);
	830	+ if (r)
	831	+ return r;
	832	+
	833	+ r = amdgpu_vm_update_pdes(adev, vm, false);
	834	+ if (r)
	835	+ return r;
	836	+
	837	+ r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
	838	+ if (r)
	839	+ return r;
	840	+
	841	+ p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
	842	+
	843	+ if (amdgpu_vm_debug) {
	844	+ /* Invalidate all BOs to test for userspace bugs */
	845	+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
	846	+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
	847	+
	848	+ /* ignore duplicates */
	849	+ if (!bo)
	850	+ continue;
	851	+
	852	+ amdgpu_vm_bo_invalidate(adev, bo, false);
	853	+ }
978	854	}
979	855
980	856	return amdgpu_cs_sync_rings(p);
..	..	@@ -985,14 +861,15 @@
985	861	{
986	862	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
987	863	struct amdgpu_vm *vm = &fpriv->vm;
988		- int i, j;
989	864	int r, ce_preempt = 0, de_preempt = 0;
	865	+ struct amdgpu_ring *ring;
	866	+ int i, j;
990	867
991	868	for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
992	869	struct amdgpu_cs_chunk *chunk;
993	870	struct amdgpu_ib *ib;
994	871	struct drm_amdgpu_cs_chunk_ib *chunk_ib;
995		- struct amdgpu_ring *ring;
	872	+ struct drm_sched_entity *entity;
996	873
997	874	chunk = &parser->chunks[i];
998	875	ib = &parser->job->ibs[j];
..	..	@@ -1001,7 +878,8 @@
1001	878	if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
1002	879	continue;
1003	880
1004		- if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && amdgpu_sriov_vf(adev)) {
	881	+ if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
	882	+ (amdgpu_mcbp \|\| amdgpu_sriov_vf(adev))) {
1005	883	if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
1006	884	if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
1007	885	ce_preempt++;
..	..	@@ -1014,8 +892,9 @@
1014	892	return -EINVAL;
1015	893	}
1016	894
1017		- r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type,
1018		- chunk_ib->ip_instance, chunk_ib->ring, &ring);
	895	+ r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
	896	+ chunk_ib->ip_instance, chunk_ib->ring,
	897	+ &entity);
1019	898	if (r)
1020	899	return r;
1021	900
..	..	@@ -1023,14 +902,20 @@
1023	902	parser->job->preamble_status \|=
1024	903	AMDGPU_PREAMBLE_IB_PRESENT;
1025	904
1026		- if (parser->ring && parser->ring != ring)
	905	+ if (parser->entity && parser->entity != entity)
1027	906	return -EINVAL;
1028	907
1029		- parser->ring = ring;
	908	+ /* Return if there is no run queue associated with this entity.
	909	+ * Possibly because of disabled HW IP*/
	910	+ if (entity->rq == NULL)
	911	+ return -EINVAL;
1030	912
1031		- r = amdgpu_ib_get(adev, vm,
1032		- ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
1033		- ib);
	913	+ parser->entity = entity;
	914	+
	915	+ ring = to_amdgpu_ring(entity->rq->sched);
	916	+ r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
	917	+ chunk_ib->ib_bytes : 0,
	918	+ AMDGPU_IB_POOL_DELAYED, ib);
1034	919	if (r) {
1035	920	DRM_ERROR("Failed to get ib !\n");
1036	921	return r;
..	..	@@ -1043,13 +928,12 @@
1043	928	j++;
1044	929	}
1045	930
1046		- /* UVD & VCE fw doesn't support user fences */
1047		- if (parser->job->uf_addr && (
1048		- parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD \|\|
1049		- parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
	931	+ /* MM engine doesn't support user fences */
	932	+ ring = to_amdgpu_ring(parser->entity->rq->sched);
	933	+ if (parser->job->uf_addr && ring->funcs->no_user_fence)
1050	934	return -EINVAL;
1051	935
1052		- return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx);
	936	+ return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
1053	937	}
1054	938
1055	939	static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
..	..	@@ -1065,51 +949,62 @@
1065	949	sizeof(struct drm_amdgpu_cs_chunk_dep);
1066	950
1067	951	for (i = 0; i < num_deps; ++i) {
1068		- struct amdgpu_ring *ring;
1069	952	struct amdgpu_ctx *ctx;
	953	+ struct drm_sched_entity *entity;
1070	954	struct dma_fence *fence;
1071	955
1072	956	ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
1073	957	if (ctx == NULL)
1074	958	return -EINVAL;
1075	959
1076		- r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr,
1077		- deps[i].ip_type,
1078		- deps[i].ip_instance,
1079		- deps[i].ring, &ring);
	960	+ r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
	961	+ deps[i].ip_instance,
	962	+ deps[i].ring, &entity);
1080	963	if (r) {
1081	964	amdgpu_ctx_put(ctx);
1082	965	return r;
1083	966	}
1084	967
1085		- fence = amdgpu_ctx_get_fence(ctx, ring,
1086		- deps[i].handle);
1087		- if (IS_ERR(fence)) {
1088		- r = PTR_ERR(fence);
1089		- amdgpu_ctx_put(ctx);
1090		- return r;
1091		- } else if (fence) {
1092		- r = amdgpu_sync_fence(p->adev, &p->job->sync, fence,
1093		- true);
1094		- dma_fence_put(fence);
1095		- amdgpu_ctx_put(ctx);
1096		- if (r)
1097		- return r;
	968	+ fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
	969	+ amdgpu_ctx_put(ctx);
	970	+
	971	+ if (IS_ERR(fence))
	972	+ return PTR_ERR(fence);
	973	+ else if (!fence)
	974	+ continue;
	975	+
	976	+ if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
	977	+ struct drm_sched_fence *s_fence;
	978	+ struct dma_fence *old = fence;
	979	+
	980	+ s_fence = to_drm_sched_fence(fence);
	981	+ fence = dma_fence_get(&s_fence->scheduled);
	982	+ dma_fence_put(old);
1098	983	}
	984	+
	985	+ r = amdgpu_sync_fence(&p->job->sync, fence);
	986	+ dma_fence_put(fence);
	987	+ if (r)
	988	+ return r;
1099	989	}
1100	990	return 0;
1101	991	}
1102	992
1103	993	static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
1104		- uint32_t handle)
	994	+ uint32_t handle, u64 point,
	995	+ u64 flags)
1105	996	{
1106		- int r;
1107	997	struct dma_fence *fence;
1108		- r = drm_syncobj_find_fence(p->filp, handle, &fence);
1109		- if (r)
1110		- return r;
	998	+ int r;
1111	999
1112		- r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
	1000	+ r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
	1001	+ if (r) {
	1002	+ DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
	1003	+ handle, point, r);
	1004	+ return r;
	1005	+ }
	1006	+
	1007	+ r = amdgpu_sync_fence(&p->job->sync, fence);
1113	1008	dma_fence_put(fence);
1114	1009
1115	1010	return r;
..	..	@@ -1118,46 +1013,123 @@
1118	1013	static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
1119	1014	struct amdgpu_cs_chunk *chunk)
1120	1015	{
	1016	+ struct drm_amdgpu_cs_chunk_sem *deps;
1121	1017	unsigned num_deps;
1122	1018	int i, r;
1123		- struct drm_amdgpu_cs_chunk_sem *deps;
1124	1019
1125	1020	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1126	1021	num_deps = chunk->length_dw * 4 /
1127	1022	sizeof(struct drm_amdgpu_cs_chunk_sem);
1128		-
1129	1023	for (i = 0; i < num_deps; ++i) {
1130		- r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
	1024	+ r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
	1025	+ 0, 0);
1131	1026	if (r)
1132	1027	return r;
1133	1028	}
	1029	+
	1030	+ return 0;
	1031	+}
	1032	+
	1033	+
	1034	+static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
	1035	+ struct amdgpu_cs_chunk *chunk)
	1036	+{
	1037	+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
	1038	+ unsigned num_deps;
	1039	+ int i, r;
	1040	+
	1041	+ syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
	1042	+ num_deps = chunk->length_dw * 4 /
	1043	+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
	1044	+ for (i = 0; i < num_deps; ++i) {
	1045	+ r = amdgpu_syncobj_lookup_and_add_to_sync(p,
	1046	+ syncobj_deps[i].handle,
	1047	+ syncobj_deps[i].point,
	1048	+ syncobj_deps[i].flags);
	1049	+ if (r)
	1050	+ return r;
	1051	+ }
	1052	+
1134	1053	return 0;
1135	1054	}
1136	1055
1137	1056	static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
1138	1057	struct amdgpu_cs_chunk *chunk)
1139	1058	{
	1059	+ struct drm_amdgpu_cs_chunk_sem *deps;
1140	1060	unsigned num_deps;
1141	1061	int i;
1142		- struct drm_amdgpu_cs_chunk_sem *deps;
	1062	+
1143	1063	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1144	1064	num_deps = chunk->length_dw * 4 /
1145	1065	sizeof(struct drm_amdgpu_cs_chunk_sem);
1146	1066
1147		- p->post_dep_syncobjs = kmalloc_array(num_deps,
1148		- sizeof(struct drm_syncobj *),
1149		- GFP_KERNEL);
1150		- p->num_post_dep_syncobjs = 0;
	1067	+ if (p->post_deps)
	1068	+ return -EINVAL;
1151	1069
1152		- if (!p->post_dep_syncobjs)
	1070	+ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
	1071	+ GFP_KERNEL);
	1072	+ p->num_post_deps = 0;
	1073	+
	1074	+ if (!p->post_deps)
	1075	+ return -ENOMEM;
	1076	+
	1077	+
	1078	+ for (i = 0; i < num_deps; ++i) {
	1079	+ p->post_deps[i].syncobj =
	1080	+ drm_syncobj_find(p->filp, deps[i].handle);
	1081	+ if (!p->post_deps[i].syncobj)
	1082	+ return -EINVAL;
	1083	+ p->post_deps[i].chain = NULL;
	1084	+ p->post_deps[i].point = 0;
	1085	+ p->num_post_deps++;
	1086	+ }
	1087	+
	1088	+ return 0;
	1089	+}
	1090	+
	1091	+
	1092	+static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
	1093	+ struct amdgpu_cs_chunk *chunk)
	1094	+{
	1095	+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
	1096	+ unsigned num_deps;
	1097	+ int i;
	1098	+
	1099	+ syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
	1100	+ num_deps = chunk->length_dw * 4 /
	1101	+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
	1102	+
	1103	+ if (p->post_deps)
	1104	+ return -EINVAL;
	1105	+
	1106	+ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
	1107	+ GFP_KERNEL);
	1108	+ p->num_post_deps = 0;
	1109	+
	1110	+ if (!p->post_deps)
1153	1111	return -ENOMEM;
1154	1112
1155	1113	for (i = 0; i < num_deps; ++i) {
1156		- p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
1157		- if (!p->post_dep_syncobjs[i])
	1114	+ struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
	1115	+
	1116	+ dep->chain = NULL;
	1117	+ if (syncobj_deps[i].point) {
	1118	+ dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
	1119	+ if (!dep->chain)
	1120	+ return -ENOMEM;
	1121	+ }
	1122	+
	1123	+ dep->syncobj = drm_syncobj_find(p->filp,
	1124	+ syncobj_deps[i].handle);
	1125	+ if (!dep->syncobj) {
	1126	+ kfree(dep->chain);
1158	1127	return -EINVAL;
1159		- p->num_post_dep_syncobjs++;
	1128	+ }
	1129	+ dep->point = syncobj_deps[i].point;
	1130	+ p->num_post_deps++;
1160	1131	}
	1132	+
1161	1133	return 0;
1162	1134	}
1163	1135
..	..	@@ -1171,18 +1143,33 @@
1171	1143
1172	1144	chunk = &p->chunks[i];
1173	1145
1174		- if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
	1146	+ switch (chunk->chunk_id) {
	1147	+ case AMDGPU_CHUNK_ID_DEPENDENCIES:
	1148	+ case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
1175	1149	r = amdgpu_cs_process_fence_dep(p, chunk);
1176	1150	if (r)
1177	1151	return r;
1178		- } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
	1152	+ break;
	1153	+ case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
1179	1154	r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
1180	1155	if (r)
1181	1156	return r;
1182		- } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
	1157	+ break;
	1158	+ case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
1183	1159	r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
1184	1160	if (r)
1185	1161	return r;
	1162	+ break;
	1163	+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
	1164	+ r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
	1165	+ if (r)
	1166	+ return r;
	1167	+ break;
	1168	+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
	1169	+ r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
	1170	+ if (r)
	1171	+ return r;
	1172	+ break;
1186	1173	}
1187	1174	}
1188	1175
..	..	@@ -1193,53 +1180,58 @@
1193	1180	{
1194	1181	int i;
1195	1182
1196		- for (i = 0; i < p->num_post_dep_syncobjs; ++i)
1197		- drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
	1183	+ for (i = 0; i < p->num_post_deps; ++i) {
	1184	+ if (p->post_deps[i].chain && p->post_deps[i].point) {
	1185	+ drm_syncobj_add_point(p->post_deps[i].syncobj,
	1186	+ p->post_deps[i].chain,
	1187	+ p->fence, p->post_deps[i].point);
	1188	+ p->post_deps[i].chain = NULL;
	1189	+ } else {
	1190	+ drm_syncobj_replace_fence(p->post_deps[i].syncobj,
	1191	+ p->fence);
	1192	+ }
	1193	+ }
1198	1194	}
1199	1195
1200	1196	static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1201	1197	union drm_amdgpu_cs *cs)
1202	1198	{
1203	1199	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1204		- struct amdgpu_ring *ring = p->ring;
1205		- struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
1206		- enum drm_sched_priority priority;
	1200	+ struct drm_sched_entity *entity = p->entity;
1207	1201	struct amdgpu_bo_list_entry *e;
1208	1202	struct amdgpu_job *job;
1209	1203	uint64_t seq;
1210		-
1211	1204	int r;
1212	1205
1213	1206	job = p->job;
1214	1207	p->job = NULL;
1215	1208
1216		- r = drm_sched_job_init(&job->base, entity, p->filp);
	1209	+ r = drm_sched_job_init(&job->base, entity, &fpriv->vm);
1217	1210	if (r)
1218	1211	goto error_unlock;
1219	1212
1220		- /* No memory allocation is allowed while holding the mn lock */
1221		- amdgpu_mn_lock(p->mn);
1222		- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1223		- struct amdgpu_bo *bo = e->robj;
	1213	+ /* No memory allocation is allowed while holding the notifier lock.
	1214	+ * The lock is held until amdgpu_cs_submit is finished and fence is
	1215	+ * added to BOs.
	1216	+ */
	1217	+ mutex_lock(&p->adev->notifier_lock);
1224	1218
1225		- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
1226		- r = -ERESTARTSYS;
1227		- goto error_abort;
1228		- }
	1219	+ /* If userptr are invalidated after amdgpu_cs_parser_bos(), return
	1220	+ * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
	1221	+ */
	1222	+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
	1223	+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
	1224	+
	1225	+ r \|= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
	1226	+ }
	1227	+ if (r) {
	1228	+ r = -EAGAIN;
	1229	+ goto error_abort;
1229	1230	}
1230	1231
1231		- job->owner = p->filp;
1232	1232	p->fence = dma_fence_get(&job->base.s_fence->finished);
1233	1233
1234		- r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
1235		- if (r) {
1236		- dma_fence_put(p->fence);
1237		- dma_fence_put(&job->base.s_fence->finished);
1238		- amdgpu_job_free(job);
1239		- amdgpu_mn_unlock(p->mn);
1240		- return r;
1241		- }
1242		-
	1234	+ amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
1243	1235	amdgpu_cs_post_dependencies(p);
1244	1236
1245	1237	if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
..	..	@@ -1255,34 +1247,45 @@
1255	1247
1256	1248	trace_amdgpu_cs_ioctl(job);
1257	1249	amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
1258		- priority = job->base.s_priority;
1259	1250	drm_sched_entity_push_job(&job->base, entity);
1260	1251
1261		- ring = to_amdgpu_ring(entity->rq->sched);
1262		- amdgpu_ring_priority_get(ring, priority);
	1252	+ amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
1263	1253
1264	1254	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
1265		- amdgpu_mn_unlock(p->mn);
	1255	+ mutex_unlock(&p->adev->notifier_lock);
1266	1256
1267	1257	return 0;
1268	1258
1269	1259	error_abort:
1270		- dma_fence_put(&job->base.s_fence->finished);
1271		- job->base.s_fence = NULL;
1272		- amdgpu_mn_unlock(p->mn);
	1260	+ drm_sched_job_cleanup(&job->base);
	1261	+ mutex_unlock(&p->adev->notifier_lock);
1273	1262
1274	1263	error_unlock:
1275	1264	amdgpu_job_free(job);
1276	1265	return r;
1277	1266	}
1278	1267
	1268	+static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
	1269	+{
	1270	+ int i;
	1271	+
	1272	+ if (!trace_amdgpu_cs_enabled())
	1273	+ return;
	1274	+
	1275	+ for (i = 0; i < parser->job->num_ibs; i++)
	1276	+ trace_amdgpu_cs(parser, i);
	1277	+}
	1278	+
1279	1279	int amdgpu_cs_ioctl(struct drm_device dev, void data, struct drm_file *filp)
1280	1280	{
1281		- struct amdgpu_device *adev = dev->dev_private;
	1281	+ struct amdgpu_device *adev = drm_to_adev(dev);
1282	1282	union drm_amdgpu_cs *cs = data;
1283	1283	struct amdgpu_cs_parser parser = {};
1284	1284	bool reserved_buffers = false;
1285		- int i, r;
	1285	+ int r;
	1286	+
	1287	+ if (amdgpu_ras_intr_triggered())
	1288	+ return -EHWPOISON;
1286	1289
1287	1290	if (!adev->accel_working)
1288	1291	return -EBUSY;
..	..	@@ -1292,7 +1295,8 @@
1292	1295
1293	1296	r = amdgpu_cs_parser_init(&parser, data);
1294	1297	if (r) {
1295		- DRM_ERROR("Failed to initialize parser !\n");
	1298	+ if (printk_ratelimit())
	1299	+ DRM_ERROR("Failed to initialize parser %d!\n", r);
1296	1300	goto out;
1297	1301	}
1298	1302
..	..	@@ -1300,27 +1304,26 @@
1300	1304	if (r)
1301	1305	goto out;
1302	1306
1303		- r = amdgpu_cs_parser_bos(&parser, data);
1304		- if (r) {
1305		- if (r == -ENOMEM)
1306		- DRM_ERROR("Not enough memory for command submission!\n");
1307		- else if (r != -ERESTARTSYS)
1308		- DRM_ERROR("Failed to process the buffer list %d!\n", r);
1309		- goto out;
1310		- }
1311		-
1312		- reserved_buffers = true;
1313		-
1314	1307	r = amdgpu_cs_dependencies(adev, &parser);
1315	1308	if (r) {
1316	1309	DRM_ERROR("Failed in the dependencies handling %d!\n", r);
1317	1310	goto out;
1318	1311	}
1319	1312
1320		- for (i = 0; i < parser.job->num_ibs; i++)
1321		- trace_amdgpu_cs(&parser, i);
	1313	+ r = amdgpu_cs_parser_bos(&parser, data);
	1314	+ if (r) {
	1315	+ if (r == -ENOMEM)
	1316	+ DRM_ERROR("Not enough memory for command submission!\n");
	1317	+ else if (r != -ERESTARTSYS && r != -EAGAIN)
	1318	+ DRM_ERROR("Failed to process the buffer list %d!\n", r);
	1319	+ goto out;
	1320	+ }
1322	1321
1323		- r = amdgpu_cs_ib_vm_chunk(adev, &parser);
	1322	+ reserved_buffers = true;
	1323	+
	1324	+ trace_amdgpu_cs_ibs(&parser);
	1325	+
	1326	+ r = amdgpu_cs_vm_handling(&parser);
1324	1327	if (r)
1325	1328	goto out;
1326	1329
..	..	@@ -1328,6 +1331,7 @@
1328	1331
1329	1332	out:
1330	1333	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
	1334	+
1331	1335	return r;
1332	1336	}
1333	1337
..	..	@@ -1344,9 +1348,8 @@
1344	1348	struct drm_file *filp)
1345	1349	{
1346	1350	union drm_amdgpu_wait_cs *wait = data;
1347		- struct amdgpu_device *adev = dev->dev_private;
1348	1351	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
1349		- struct amdgpu_ring *ring = NULL;
	1352	+ struct drm_sched_entity *entity;
1350	1353	struct amdgpu_ctx *ctx;
1351	1354	struct dma_fence *fence;
1352	1355	long r;
..	..	@@ -1355,15 +1358,14 @@
1355	1358	if (ctx == NULL)
1356	1359	return -EINVAL;
1357	1360
1358		- r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
1359		- wait->in.ip_type, wait->in.ip_instance,
1360		- wait->in.ring, &ring);
	1361	+ r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
	1362	+ wait->in.ring, &entity);
1361	1363	if (r) {
1362	1364	amdgpu_ctx_put(ctx);
1363	1365	return r;
1364	1366	}
1365	1367
1366		- fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
	1368	+ fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
1367	1369	if (IS_ERR(fence))
1368	1370	r = PTR_ERR(fence);
1369	1371	else if (fence) {
..	..	@@ -1395,7 +1397,7 @@
1395	1397	struct drm_file *filp,
1396	1398	struct drm_amdgpu_fence *user)
1397	1399	{
1398		- struct amdgpu_ring *ring;
	1400	+ struct drm_sched_entity *entity;
1399	1401	struct amdgpu_ctx *ctx;
1400	1402	struct dma_fence *fence;
1401	1403	int r;
..	..	@@ -1404,14 +1406,14 @@
1404	1406	if (ctx == NULL)
1405	1407	return ERR_PTR(-EINVAL);
1406	1408
1407		- r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type,
1408		- user->ip_instance, user->ring, &ring);
	1409	+ r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
	1410	+ user->ring, &entity);
1409	1411	if (r) {
1410	1412	amdgpu_ctx_put(ctx);
1411	1413	return ERR_PTR(r);
1412	1414	}
1413	1415
1414		- fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
	1416	+ fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
1415	1417	amdgpu_ctx_put(ctx);
1416	1418
1417	1419	return fence;
..	..	@@ -1420,7 +1422,7 @@
1420	1422	int amdgpu_cs_fence_to_handle_ioctl(struct drm_device dev, void data,
1421	1423	struct drm_file *filp)
1422	1424	{
1423		- struct amdgpu_device *adev = dev->dev_private;
	1425	+ struct amdgpu_device *adev = drm_to_adev(dev);
1424	1426	union drm_amdgpu_fence_to_handle *info = data;
1425	1427	struct dma_fence *fence;
1426	1428	struct drm_syncobj *syncobj;
..	..	@@ -1430,6 +1432,9 @@
1430	1432	fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
1431	1433	if (IS_ERR(fence))
1432	1434	return PTR_ERR(fence);
	1435	+
	1436	+ if (!fence)
	1437	+ fence = dma_fence_get_stub();
1433	1438
1434	1439	switch (info->in.what) {
1435	1440	case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
..	..	@@ -1469,6 +1474,7 @@
1469	1474	return 0;
1470	1475
1471	1476	default:
	1477	+ dma_fence_put(fence);
1472	1478	return -EINVAL;
1473	1479	}
1474	1480	}
..	..	@@ -1501,15 +1507,15 @@
1501	1507	continue;
1502	1508
1503	1509	r = dma_fence_wait_timeout(fence, true, timeout);
	1510	+ if (r > 0 && fence->error)
	1511	+ r = fence->error;
	1512	+
1504	1513	dma_fence_put(fence);
1505	1514	if (r < 0)
1506	1515	return r;
1507	1516
1508	1517	if (r == 0)
1509	1518	break;
1510		-
1511		- if (fence->error)
1512		- return fence->error;
1513	1519	}
1514	1520
1515	1521	memset(wait, 0, sizeof(*wait));
..	..	@@ -1593,7 +1599,7 @@
1593	1599	int amdgpu_cs_wait_fences_ioctl(struct drm_device dev, void data,
1594	1600	struct drm_file *filp)
1595	1601	{
1596		- struct amdgpu_device *adev = dev->dev_private;
	1602	+ struct amdgpu_device *adev = drm_to_adev(dev);
1597	1603	union drm_amdgpu_wait_fences *wait = data;
1598	1604	uint32_t fence_count = wait->in.fence_count;
1599	1605	struct drm_amdgpu_fence *fences_user;
..	..	@@ -1655,7 +1661,7 @@
1655	1661	*map = mapping;
1656	1662
1657	1663	/* Double check that the BO is reserved by this CS */
1658		- if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket)
	1664	+ if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket)
1659	1665	return -EINVAL;
1660	1666
1661	1667	if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {