~hc/RK356X_SDK_RELEASE.git

..	..	@@ -22,14 +22,33 @@
22	22	* Authors: monk liu <monk.liu@amd.com>
23	23	*/
24	24
25		-#include <drm/drmP.h>
26	25	#include <drm/drm_auth.h>
27	26	#include "amdgpu.h"
28	27	#include "amdgpu_sched.h"
	28	+#include "amdgpu_ras.h"
	29	+#include <linux/nospec.h>
	30	+
	31	+#define to_amdgpu_ctx_entity(e) \
	32	+ container_of((e), struct amdgpu_ctx_entity, entity)
	33	+
	34	+const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
	35	+ [AMDGPU_HW_IP_GFX] = 1,
	36	+ [AMDGPU_HW_IP_COMPUTE] = 4,
	37	+ [AMDGPU_HW_IP_DMA] = 2,
	38	+ [AMDGPU_HW_IP_UVD] = 1,
	39	+ [AMDGPU_HW_IP_VCE] = 1,
	40	+ [AMDGPU_HW_IP_UVD_ENC] = 1,
	41	+ [AMDGPU_HW_IP_VCN_DEC] = 1,
	42	+ [AMDGPU_HW_IP_VCN_ENC] = 1,
	43	+ [AMDGPU_HW_IP_VCN_JPEG] = 1,
	44	+};
29	45
30	46	static int amdgpu_ctx_priority_permit(struct drm_file *filp,
31	47	enum drm_sched_priority priority)
32	48	{
	49	+ if (priority < 0 \|\| priority >= DRM_SCHED_PRIORITY_COUNT)
	50	+ return -EINVAL;
	51	+
33	52	/* NORMAL and below are accessible by everyone */
34	53	if (priority <= DRM_SCHED_PRIORITY_NORMAL)
35	54	return 0;
..	..	@@ -43,36 +62,100 @@
43	62	return -EACCES;
44	63	}
45	64
	65	+static enum gfx_pipe_priority amdgpu_ctx_sched_prio_to_compute_prio(enum drm_sched_priority prio)
	66	+{
	67	+ switch (prio) {
	68	+ case DRM_SCHED_PRIORITY_HIGH:
	69	+ case DRM_SCHED_PRIORITY_KERNEL:
	70	+ return AMDGPU_GFX_PIPE_PRIO_HIGH;
	71	+ default:
	72	+ return AMDGPU_GFX_PIPE_PRIO_NORMAL;
	73	+ }
	74	+}
	75	+
	76	+static unsigned int amdgpu_ctx_prio_sched_to_hw(struct amdgpu_device *adev,
	77	+ enum drm_sched_priority prio,
	78	+ u32 hw_ip)
	79	+{
	80	+ unsigned int hw_prio;
	81	+
	82	+ hw_prio = (hw_ip == AMDGPU_HW_IP_COMPUTE) ?
	83	+ amdgpu_ctx_sched_prio_to_compute_prio(prio) :
	84	+ AMDGPU_RING_PRIO_DEFAULT;
	85	+ hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
	86	+ if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
	87	+ hw_prio = AMDGPU_RING_PRIO_DEFAULT;
	88	+
	89	+ return hw_prio;
	90	+}
	91	+
	92	+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
	93	+ const u32 ring)
	94	+{
	95	+ struct amdgpu_device *adev = ctx->adev;
	96	+ struct amdgpu_ctx_entity *entity;
	97	+ struct drm_gpu_scheduler *scheds = NULL, sched = NULL;
	98	+ unsigned num_scheds = 0;
	99	+ unsigned int hw_prio;
	100	+ enum drm_sched_priority priority;
	101	+ int r;
	102	+
	103	+ entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]),
	104	+ GFP_KERNEL);
	105	+ if (!entity)
	106	+ return -ENOMEM;
	107	+
	108	+ entity->sequence = 1;
	109	+ priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
	110	+ ctx->init_priority : ctx->override_priority;
	111	+ hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority, hw_ip);
	112	+
	113	+ hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
	114	+ scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
	115	+ num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
	116	+
	117	+ /* disable load balance if the hw engine retains context among dependent jobs */
	118	+ if (hw_ip == AMDGPU_HW_IP_VCN_ENC \|\|
	119	+ hw_ip == AMDGPU_HW_IP_VCN_DEC \|\|
	120	+ hw_ip == AMDGPU_HW_IP_UVD_ENC \|\|
	121	+ hw_ip == AMDGPU_HW_IP_UVD) {
	122	+ sched = drm_sched_pick_best(scheds, num_scheds);
	123	+ scheds = &sched;
	124	+ num_scheds = 1;
	125	+ }
	126	+
	127	+ r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds,
	128	+ &ctx->guilty);
	129	+ if (r)
	130	+ goto error_free_entity;
	131	+
	132	+ ctx->entities[hw_ip][ring] = entity;
	133	+ return 0;
	134	+
	135	+error_free_entity:
	136	+ kfree(entity);
	137	+
	138	+ return r;
	139	+}
	140	+
46	141	static int amdgpu_ctx_init(struct amdgpu_device *adev,
47	142	enum drm_sched_priority priority,
48	143	struct drm_file *filp,
49	144	struct amdgpu_ctx *ctx)
50	145	{
51		- unsigned i, j;
52	146	int r;
53		-
54		- if (priority < 0 \|\| priority >= DRM_SCHED_PRIORITY_MAX)
55		- return -EINVAL;
56	147
57	148	r = amdgpu_ctx_priority_permit(filp, priority);
58	149	if (r)
59	150	return r;
60	151
61	152	memset(ctx, 0, sizeof(*ctx));
	153	+
62	154	ctx->adev = adev;
	155	+
63	156	kref_init(&ctx->refcount);
64	157	spin_lock_init(&ctx->ring_lock);
65		- ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
66		- sizeof(struct dma_fence*), GFP_KERNEL);
67		- if (!ctx->fences)
68		- return -ENOMEM;
69		-
70	158	mutex_init(&ctx->lock);
71		-
72		- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
73		- ctx->rings[i].sequence = 1;
74		- ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
75		- }
76	159
77	160	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
78	161	ctx->reset_counter_query = ctx->reset_counter;
..	..	@@ -80,34 +163,21 @@
80	163	ctx->init_priority = priority;
81	164	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
82	165
83		- /* create context entity for each ring */
84		- for (i = 0; i < adev->num_rings; i++) {
85		- struct amdgpu_ring *ring = adev->rings[i];
86		- struct drm_sched_rq *rq;
87		-
88		- rq = &ring->sched.sched_rq[priority];
89		-
90		- if (ring == &adev->gfx.kiq.ring)
91		- continue;
92		-
93		- r = drm_sched_entity_init(&ctx->rings[i].entity,
94		- &rq, 1, &ctx->guilty);
95		- if (r)
96		- goto failed;
97		- }
98		-
99		- r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
100		- if (r)
101		- goto failed;
102		-
103	166	return 0;
	167	+}
104	168
105		-failed:
106		- for (j = 0; j < i; j++)
107		- drm_sched_entity_destroy(&ctx->rings[j].entity);
108		- kfree(ctx->fences);
109		- ctx->fences = NULL;
110		- return r;
	169	+static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
	170	+{
	171	+
	172	+ int i;
	173	+
	174	+ if (!entity)
	175	+ return;
	176	+
	177	+ for (i = 0; i < amdgpu_sched_jobs; ++i)
	178	+ dma_fence_put(entity->fences[i]);
	179	+
	180	+ kfree(entity);
111	181	}
112	182
113	183	static void amdgpu_ctx_fini(struct kref *ref)
..	..	@@ -119,17 +189,46 @@
119	189	if (!adev)
120	190	return;
121	191
122		- for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
123		- for (j = 0; j < amdgpu_sched_jobs; ++j)
124		- dma_fence_put(ctx->rings[i].fences[j]);
125		- kfree(ctx->fences);
126		- ctx->fences = NULL;
127		-
128		- amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
	192	+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
	193	+ for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
	194	+ amdgpu_ctx_fini_entity(ctx->entities[i][j]);
	195	+ ctx->entities[i][j] = NULL;
	196	+ }
	197	+ }
129	198
130	199	mutex_destroy(&ctx->lock);
131		-
132	200	kfree(ctx);
	201	+}
	202	+
	203	+int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
	204	+ u32 ring, struct drm_sched_entity **entity)
	205	+{
	206	+ int r;
	207	+
	208	+ if (hw_ip >= AMDGPU_HW_IP_NUM) {
	209	+ DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
	210	+ return -EINVAL;
	211	+ }
	212	+
	213	+ /* Right now all IPs have only one instance - multiple rings. */
	214	+ if (instance != 0) {
	215	+ DRM_DEBUG("invalid ip instance: %d\n", instance);
	216	+ return -EINVAL;
	217	+ }
	218	+
	219	+ if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
	220	+ DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
	221	+ return -EINVAL;
	222	+ }
	223	+
	224	+ if (ctx->entities[hw_ip][ring] == NULL) {
	225	+ r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
	226	+ if (r)
	227	+ return r;
	228	+ }
	229	+
	230	+ *entity = &ctx->entities[hw_ip][ring]->entity;
	231	+ return 0;
133	232	}
134	233
135	234	static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
..	..	@@ -147,7 +246,7 @@
147	246	return -ENOMEM;
148	247
149	248	mutex_lock(&mgr->lock);
150		- r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
	249	+ r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
151	250	if (r < 0) {
152	251	mutex_unlock(&mgr->lock);
153	252	kfree(ctx);
..	..	@@ -168,16 +267,16 @@
168	267	static void amdgpu_ctx_do_release(struct kref *ref)
169	268	{
170	269	struct amdgpu_ctx *ctx;
171		- u32 i;
	270	+ u32 i, j;
172	271
173	272	ctx = container_of(ref, struct amdgpu_ctx, refcount);
	273	+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
	274	+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
	275	+ if (!ctx->entities[i][j])
	276	+ continue;
174	277
175		- for (i = 0; i < ctx->adev->num_rings; i++) {
176		-
177		- if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
178		- continue;
179		-
180		- drm_sched_entity_destroy(&ctx->rings[i].entity);
	278	+ drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
	279	+ }
181	280	}
182	281
183	282	amdgpu_ctx_fini(ref);
..	..	@@ -274,16 +373,15 @@
274	373	enum drm_sched_priority priority;
275	374
276	375	union drm_amdgpu_ctx *args = data;
277		- struct amdgpu_device *adev = dev->dev_private;
	376	+ struct amdgpu_device *adev = drm_to_adev(dev);
278	377	struct amdgpu_fpriv *fpriv = filp->driver_priv;
279	378
280		- r = 0;
281	379	id = args->in.ctx_id;
282		- priority = amdgpu_to_sched_priority(args->in.priority);
	380	+ r = amdgpu_to_sched_priority(args->in.priority, &priority);
283	381
284	382	/* For backwards compatibility reasons, we need to accept
285	383	* ioctls with garbage in the priority field */
286		- if (priority == DRM_SCHED_PRIORITY_INVALID)
	384	+ if (r == -EINVAL)
287	385	priority = DRM_SCHED_PRIORITY_NORMAL;
288	386
289	387	switch (args->in.op) {
..	..	@@ -334,106 +432,129 @@
334	432	return 0;
335	433	}
336	434
337		-int amdgpu_ctx_add_fence(struct amdgpu_ctx ctx, struct amdgpu_ring ring,
338		- struct dma_fence fence, uint64_t handler)
	435	+void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
	436	+ struct drm_sched_entity *entity,
	437	+ struct dma_fence fence, uint64_t handle)
339	438	{
340		- struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
341		- uint64_t seq = cring->sequence;
342		- unsigned idx = 0;
	439	+ struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
	440	+ uint64_t seq = centity->sequence;
343	441	struct dma_fence *other = NULL;
	442	+ unsigned idx = 0;
344	443
345	444	idx = seq & (amdgpu_sched_jobs - 1);
346		- other = cring->fences[idx];
	445	+ other = centity->fences[idx];
347	446	if (other)
348	447	BUG_ON(!dma_fence_is_signaled(other));
349	448
350	449	dma_fence_get(fence);
351	450
352	451	spin_lock(&ctx->ring_lock);
353		- cring->fences[idx] = fence;
354		- cring->sequence++;
	452	+ centity->fences[idx] = fence;
	453	+ centity->sequence++;
355	454	spin_unlock(&ctx->ring_lock);
356	455
357	456	dma_fence_put(other);
358		- if (handler)
359		- *handler = seq;
360		-
361		- return 0;
	457	+ if (handle)
	458	+ *handle = seq;
362	459	}
363	460
364	461	struct dma_fence amdgpu_ctx_get_fence(struct amdgpu_ctx ctx,
365		- struct amdgpu_ring *ring, uint64_t seq)
	462	+ struct drm_sched_entity *entity,
	463	+ uint64_t seq)
366	464	{
367		- struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
	465	+ struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
368	466	struct dma_fence *fence;
369	467
370	468	spin_lock(&ctx->ring_lock);
371	469
372	470	if (seq == ~0ull)
373		- seq = ctx->rings[ring->idx].sequence - 1;
	471	+ seq = centity->sequence - 1;
374	472
375		- if (seq >= cring->sequence) {
	473	+ if (seq >= centity->sequence) {
376	474	spin_unlock(&ctx->ring_lock);
377	475	return ERR_PTR(-EINVAL);
378	476	}
379	477
380	478
381		- if (seq + amdgpu_sched_jobs < cring->sequence) {
	479	+ if (seq + amdgpu_sched_jobs < centity->sequence) {
382	480	spin_unlock(&ctx->ring_lock);
383	481	return NULL;
384	482	}
385	483
386		- fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);
	484	+ fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
387	485	spin_unlock(&ctx->ring_lock);
388	486
389	487	return fence;
390	488	}
391	489
	490	+static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
	491	+ struct amdgpu_ctx_entity *aentity,
	492	+ int hw_ip,
	493	+ enum drm_sched_priority priority)
	494	+{
	495	+ struct amdgpu_device *adev = ctx->adev;
	496	+ unsigned int hw_prio;
	497	+ struct drm_gpu_scheduler **scheds = NULL;
	498	+ unsigned num_scheds;
	499	+
	500	+ /* set sw priority */
	501	+ drm_sched_entity_set_priority(&aentity->entity, priority);
	502	+
	503	+ /* set hw priority */
	504	+ if (hw_ip == AMDGPU_HW_IP_COMPUTE) {
	505	+ hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority,
	506	+ AMDGPU_HW_IP_COMPUTE);
	507	+ hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
	508	+ scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
	509	+ num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
	510	+ drm_sched_entity_modify_sched(&aentity->entity, scheds,
	511	+ num_scheds);
	512	+ }
	513	+}
	514	+
392	515	void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
393	516	enum drm_sched_priority priority)
394	517	{
395		- int i;
396		- struct amdgpu_device *adev = ctx->adev;
397		- struct drm_sched_rq *rq;
398		- struct drm_sched_entity *entity;
399		- struct amdgpu_ring *ring;
400	518	enum drm_sched_priority ctx_prio;
	519	+ unsigned i, j;
401	520
402	521	ctx->override_priority = priority;
403	522
404	523	ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
405	524	ctx->init_priority : ctx->override_priority;
	525	+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
	526	+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
	527	+ if (!ctx->entities[i][j])
	528	+ continue;
406	529
407		- for (i = 0; i < adev->num_rings; i++) {
408		- ring = adev->rings[i];
409		- entity = &ctx->rings[i].entity;
410		- rq = &ring->sched.sched_rq[ctx_prio];
411		-
412		- if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
413		- continue;
414		-
415		- drm_sched_entity_set_rq(entity, rq);
	530	+ amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j],
	531	+ i, ctx_prio);
	532	+ }
416	533	}
417	534	}
418	535
419		-int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
	536	+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
	537	+ struct drm_sched_entity *entity)
420	538	{
421		- struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
422		- unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
423		- struct dma_fence *other = cring->fences[idx];
	539	+ struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
	540	+ struct dma_fence *other;
	541	+ unsigned idx;
	542	+ long r;
424	543
425		- if (other) {
426		- signed long r;
427		- r = dma_fence_wait(other, true);
428		- if (r < 0) {
429		- if (r != -ERESTARTSYS)
430		- DRM_ERROR("Error (%ld) waiting for fence!\n", r);
	544	+ spin_lock(&ctx->ring_lock);
	545	+ idx = centity->sequence & (amdgpu_sched_jobs - 1);
	546	+ other = dma_fence_get(centity->fences[idx]);
	547	+ spin_unlock(&ctx->ring_lock);
431	548
432		- return r;
433		- }
434		- }
	549	+ if (!other)
	550	+ return 0;
435	551
436		- return 0;
	552	+ r = dma_fence_wait(other, true);
	553	+ if (r < 0 && r != -ERESTARTSYS)
	554	+ DRM_ERROR("Error (%ld) waiting for fence!\n", r);
	555	+
	556	+ dma_fence_put(other);
	557	+ return r;
437	558	}
438	559
439	560	void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
..	..	@@ -442,57 +563,56 @@
442	563	idr_init(&mgr->ctx_handles);
443	564	}
444	565
445		-void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
	566	+long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
446	567	{
447	568	struct amdgpu_ctx *ctx;
448	569	struct idr *idp;
449		- uint32_t id, i;
450		- long max_wait = MAX_WAIT_SCHED_ENTITY_Q_EMPTY;
	570	+ uint32_t id, i, j;
451	571
452	572	idp = &mgr->ctx_handles;
453	573
454	574	mutex_lock(&mgr->lock);
455	575	idr_for_each_entry(idp, ctx, id) {
	576	+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
	577	+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
	578	+ struct drm_sched_entity *entity;
456	579
457		- if (!ctx->adev) {
458		- mutex_unlock(&mgr->lock);
459		- return;
460		- }
	580	+ if (!ctx->entities[i][j])
	581	+ continue;
461	582
462		- for (i = 0; i < ctx->adev->num_rings; i++) {
463		-
464		- if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
465		- continue;
466		-
467		- max_wait = drm_sched_entity_flush(&ctx->rings[i].entity,
468		- max_wait);
	583	+ entity = &ctx->entities[i][j]->entity;
	584	+ timeout = drm_sched_entity_flush(entity, timeout);
	585	+ }
469	586	}
470	587	}
471	588	mutex_unlock(&mgr->lock);
	589	+ return timeout;
472	590	}
473	591
474	592	void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
475	593	{
476	594	struct amdgpu_ctx *ctx;
477	595	struct idr *idp;
478		- uint32_t id, i;
	596	+ uint32_t id, i, j;
479	597
480	598	idp = &mgr->ctx_handles;
481	599
482	600	idr_for_each_entry(idp, ctx, id) {
	601	+ if (kref_read(&ctx->refcount) != 1) {
	602	+ DRM_ERROR("ctx %p is still alive\n", ctx);
	603	+ continue;
	604	+ }
483	605
484		- if (!ctx->adev)
485		- return;
	606	+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
	607	+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
	608	+ struct drm_sched_entity *entity;
486	609
487		- for (i = 0; i < ctx->adev->num_rings; i++) {
	610	+ if (!ctx->entities[i][j])
	611	+ continue;
488	612
489		- if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
490		- continue;
491		-
492		- if (kref_read(&ctx->refcount) == 1)
493		- drm_sched_entity_fini(&ctx->rings[i].entity);
494		- else
495		- DRM_ERROR("ctx %p is still alive\n", ctx);
	613	+ entity = &ctx->entities[i][j]->entity;
	614	+ drm_sched_entity_fini(entity);
	615	+ }
496	616	}
497	617	}
498	618	}