hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
....@@ -22,14 +22,33 @@
2222 * Authors: monk liu <monk.liu@amd.com>
2323 */
2424
25
-#include <drm/drmP.h>
2625 #include <drm/drm_auth.h>
2726 #include "amdgpu.h"
2827 #include "amdgpu_sched.h"
28
+#include "amdgpu_ras.h"
29
+#include <linux/nospec.h>
30
+
31
+#define to_amdgpu_ctx_entity(e) \
32
+ container_of((e), struct amdgpu_ctx_entity, entity)
33
+
34
+const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
35
+ [AMDGPU_HW_IP_GFX] = 1,
36
+ [AMDGPU_HW_IP_COMPUTE] = 4,
37
+ [AMDGPU_HW_IP_DMA] = 2,
38
+ [AMDGPU_HW_IP_UVD] = 1,
39
+ [AMDGPU_HW_IP_VCE] = 1,
40
+ [AMDGPU_HW_IP_UVD_ENC] = 1,
41
+ [AMDGPU_HW_IP_VCN_DEC] = 1,
42
+ [AMDGPU_HW_IP_VCN_ENC] = 1,
43
+ [AMDGPU_HW_IP_VCN_JPEG] = 1,
44
+};
2945
3046 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
3147 enum drm_sched_priority priority)
3248 {
49
+ if (priority < 0 || priority >= DRM_SCHED_PRIORITY_COUNT)
50
+ return -EINVAL;
51
+
3352 /* NORMAL and below are accessible by everyone */
3453 if (priority <= DRM_SCHED_PRIORITY_NORMAL)
3554 return 0;
....@@ -43,36 +62,100 @@
4362 return -EACCES;
4463 }
4564
65
+static enum gfx_pipe_priority amdgpu_ctx_sched_prio_to_compute_prio(enum drm_sched_priority prio)
66
+{
67
+ switch (prio) {
68
+ case DRM_SCHED_PRIORITY_HIGH:
69
+ case DRM_SCHED_PRIORITY_KERNEL:
70
+ return AMDGPU_GFX_PIPE_PRIO_HIGH;
71
+ default:
72
+ return AMDGPU_GFX_PIPE_PRIO_NORMAL;
73
+ }
74
+}
75
+
76
+static unsigned int amdgpu_ctx_prio_sched_to_hw(struct amdgpu_device *adev,
77
+ enum drm_sched_priority prio,
78
+ u32 hw_ip)
79
+{
80
+ unsigned int hw_prio;
81
+
82
+ hw_prio = (hw_ip == AMDGPU_HW_IP_COMPUTE) ?
83
+ amdgpu_ctx_sched_prio_to_compute_prio(prio) :
84
+ AMDGPU_RING_PRIO_DEFAULT;
85
+ hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
86
+ if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
87
+ hw_prio = AMDGPU_RING_PRIO_DEFAULT;
88
+
89
+ return hw_prio;
90
+}
91
+
92
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
93
+ const u32 ring)
94
+{
95
+ struct amdgpu_device *adev = ctx->adev;
96
+ struct amdgpu_ctx_entity *entity;
97
+ struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
98
+ unsigned num_scheds = 0;
99
+ unsigned int hw_prio;
100
+ enum drm_sched_priority priority;
101
+ int r;
102
+
103
+ entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]),
104
+ GFP_KERNEL);
105
+ if (!entity)
106
+ return -ENOMEM;
107
+
108
+ entity->sequence = 1;
109
+ priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
110
+ ctx->init_priority : ctx->override_priority;
111
+ hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority, hw_ip);
112
+
113
+ hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
114
+ scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
115
+ num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
116
+
117
+ /* disable load balance if the hw engine retains context among dependent jobs */
118
+ if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
119
+ hw_ip == AMDGPU_HW_IP_VCN_DEC ||
120
+ hw_ip == AMDGPU_HW_IP_UVD_ENC ||
121
+ hw_ip == AMDGPU_HW_IP_UVD) {
122
+ sched = drm_sched_pick_best(scheds, num_scheds);
123
+ scheds = &sched;
124
+ num_scheds = 1;
125
+ }
126
+
127
+ r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds,
128
+ &ctx->guilty);
129
+ if (r)
130
+ goto error_free_entity;
131
+
132
+ ctx->entities[hw_ip][ring] = entity;
133
+ return 0;
134
+
135
+error_free_entity:
136
+ kfree(entity);
137
+
138
+ return r;
139
+}
140
+
46141 static int amdgpu_ctx_init(struct amdgpu_device *adev,
47142 enum drm_sched_priority priority,
48143 struct drm_file *filp,
49144 struct amdgpu_ctx *ctx)
50145 {
51
- unsigned i, j;
52146 int r;
53
-
54
- if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
55
- return -EINVAL;
56147
57148 r = amdgpu_ctx_priority_permit(filp, priority);
58149 if (r)
59150 return r;
60151
61152 memset(ctx, 0, sizeof(*ctx));
153
+
62154 ctx->adev = adev;
155
+
63156 kref_init(&ctx->refcount);
64157 spin_lock_init(&ctx->ring_lock);
65
- ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
66
- sizeof(struct dma_fence*), GFP_KERNEL);
67
- if (!ctx->fences)
68
- return -ENOMEM;
69
-
70158 mutex_init(&ctx->lock);
71
-
72
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
73
- ctx->rings[i].sequence = 1;
74
- ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
75
- }
76159
77160 ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
78161 ctx->reset_counter_query = ctx->reset_counter;
....@@ -80,34 +163,21 @@
80163 ctx->init_priority = priority;
81164 ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
82165
83
- /* create context entity for each ring */
84
- for (i = 0; i < adev->num_rings; i++) {
85
- struct amdgpu_ring *ring = adev->rings[i];
86
- struct drm_sched_rq *rq;
87
-
88
- rq = &ring->sched.sched_rq[priority];
89
-
90
- if (ring == &adev->gfx.kiq.ring)
91
- continue;
92
-
93
- r = drm_sched_entity_init(&ctx->rings[i].entity,
94
- &rq, 1, &ctx->guilty);
95
- if (r)
96
- goto failed;
97
- }
98
-
99
- r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
100
- if (r)
101
- goto failed;
102
-
103166 return 0;
167
+}
104168
105
-failed:
106
- for (j = 0; j < i; j++)
107
- drm_sched_entity_destroy(&ctx->rings[j].entity);
108
- kfree(ctx->fences);
109
- ctx->fences = NULL;
110
- return r;
169
+static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
170
+{
171
+
172
+ int i;
173
+
174
+ if (!entity)
175
+ return;
176
+
177
+ for (i = 0; i < amdgpu_sched_jobs; ++i)
178
+ dma_fence_put(entity->fences[i]);
179
+
180
+ kfree(entity);
111181 }
112182
113183 static void amdgpu_ctx_fini(struct kref *ref)
....@@ -119,17 +189,46 @@
119189 if (!adev)
120190 return;
121191
122
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
123
- for (j = 0; j < amdgpu_sched_jobs; ++j)
124
- dma_fence_put(ctx->rings[i].fences[j]);
125
- kfree(ctx->fences);
126
- ctx->fences = NULL;
127
-
128
- amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
192
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
193
+ for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
194
+ amdgpu_ctx_fini_entity(ctx->entities[i][j]);
195
+ ctx->entities[i][j] = NULL;
196
+ }
197
+ }
129198
130199 mutex_destroy(&ctx->lock);
131
-
132200 kfree(ctx);
201
+}
202
+
203
+int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
204
+ u32 ring, struct drm_sched_entity **entity)
205
+{
206
+ int r;
207
+
208
+ if (hw_ip >= AMDGPU_HW_IP_NUM) {
209
+ DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
210
+ return -EINVAL;
211
+ }
212
+
213
+ /* Right now all IPs have only one instance - multiple rings. */
214
+ if (instance != 0) {
215
+ DRM_DEBUG("invalid ip instance: %d\n", instance);
216
+ return -EINVAL;
217
+ }
218
+
219
+ if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
220
+ DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
221
+ return -EINVAL;
222
+ }
223
+
224
+ if (ctx->entities[hw_ip][ring] == NULL) {
225
+ r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
226
+ if (r)
227
+ return r;
228
+ }
229
+
230
+ *entity = &ctx->entities[hw_ip][ring]->entity;
231
+ return 0;
133232 }
134233
135234 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
....@@ -147,7 +246,7 @@
147246 return -ENOMEM;
148247
149248 mutex_lock(&mgr->lock);
150
- r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
249
+ r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
151250 if (r < 0) {
152251 mutex_unlock(&mgr->lock);
153252 kfree(ctx);
....@@ -168,16 +267,16 @@
168267 static void amdgpu_ctx_do_release(struct kref *ref)
169268 {
170269 struct amdgpu_ctx *ctx;
171
- u32 i;
270
+ u32 i, j;
172271
173272 ctx = container_of(ref, struct amdgpu_ctx, refcount);
273
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
274
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
275
+ if (!ctx->entities[i][j])
276
+ continue;
174277
175
- for (i = 0; i < ctx->adev->num_rings; i++) {
176
-
177
- if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
178
- continue;
179
-
180
- drm_sched_entity_destroy(&ctx->rings[i].entity);
278
+ drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
279
+ }
181280 }
182281
183282 amdgpu_ctx_fini(ref);
....@@ -274,16 +373,15 @@
274373 enum drm_sched_priority priority;
275374
276375 union drm_amdgpu_ctx *args = data;
277
- struct amdgpu_device *adev = dev->dev_private;
376
+ struct amdgpu_device *adev = drm_to_adev(dev);
278377 struct amdgpu_fpriv *fpriv = filp->driver_priv;
279378
280
- r = 0;
281379 id = args->in.ctx_id;
282
- priority = amdgpu_to_sched_priority(args->in.priority);
380
+ r = amdgpu_to_sched_priority(args->in.priority, &priority);
283381
284382 /* For backwards compatibility reasons, we need to accept
285383 * ioctls with garbage in the priority field */
286
- if (priority == DRM_SCHED_PRIORITY_INVALID)
384
+ if (r == -EINVAL)
287385 priority = DRM_SCHED_PRIORITY_NORMAL;
288386
289387 switch (args->in.op) {
....@@ -334,106 +432,129 @@
334432 return 0;
335433 }
336434
337
-int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
338
- struct dma_fence *fence, uint64_t* handler)
435
+void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
436
+ struct drm_sched_entity *entity,
437
+ struct dma_fence *fence, uint64_t* handle)
339438 {
340
- struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
341
- uint64_t seq = cring->sequence;
342
- unsigned idx = 0;
439
+ struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
440
+ uint64_t seq = centity->sequence;
343441 struct dma_fence *other = NULL;
442
+ unsigned idx = 0;
344443
345444 idx = seq & (amdgpu_sched_jobs - 1);
346
- other = cring->fences[idx];
445
+ other = centity->fences[idx];
347446 if (other)
348447 BUG_ON(!dma_fence_is_signaled(other));
349448
350449 dma_fence_get(fence);
351450
352451 spin_lock(&ctx->ring_lock);
353
- cring->fences[idx] = fence;
354
- cring->sequence++;
452
+ centity->fences[idx] = fence;
453
+ centity->sequence++;
355454 spin_unlock(&ctx->ring_lock);
356455
357456 dma_fence_put(other);
358
- if (handler)
359
- *handler = seq;
360
-
361
- return 0;
457
+ if (handle)
458
+ *handle = seq;
362459 }
363460
364461 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
365
- struct amdgpu_ring *ring, uint64_t seq)
462
+ struct drm_sched_entity *entity,
463
+ uint64_t seq)
366464 {
367
- struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
465
+ struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
368466 struct dma_fence *fence;
369467
370468 spin_lock(&ctx->ring_lock);
371469
372470 if (seq == ~0ull)
373
- seq = ctx->rings[ring->idx].sequence - 1;
471
+ seq = centity->sequence - 1;
374472
375
- if (seq >= cring->sequence) {
473
+ if (seq >= centity->sequence) {
376474 spin_unlock(&ctx->ring_lock);
377475 return ERR_PTR(-EINVAL);
378476 }
379477
380478
381
- if (seq + amdgpu_sched_jobs < cring->sequence) {
479
+ if (seq + amdgpu_sched_jobs < centity->sequence) {
382480 spin_unlock(&ctx->ring_lock);
383481 return NULL;
384482 }
385483
386
- fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);
484
+ fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
387485 spin_unlock(&ctx->ring_lock);
388486
389487 return fence;
390488 }
391489
490
+static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
491
+ struct amdgpu_ctx_entity *aentity,
492
+ int hw_ip,
493
+ enum drm_sched_priority priority)
494
+{
495
+ struct amdgpu_device *adev = ctx->adev;
496
+ unsigned int hw_prio;
497
+ struct drm_gpu_scheduler **scheds = NULL;
498
+ unsigned num_scheds;
499
+
500
+ /* set sw priority */
501
+ drm_sched_entity_set_priority(&aentity->entity, priority);
502
+
503
+ /* set hw priority */
504
+ if (hw_ip == AMDGPU_HW_IP_COMPUTE) {
505
+ hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority,
506
+ AMDGPU_HW_IP_COMPUTE);
507
+ hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
508
+ scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
509
+ num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
510
+ drm_sched_entity_modify_sched(&aentity->entity, scheds,
511
+ num_scheds);
512
+ }
513
+}
514
+
392515 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
393516 enum drm_sched_priority priority)
394517 {
395
- int i;
396
- struct amdgpu_device *adev = ctx->adev;
397
- struct drm_sched_rq *rq;
398
- struct drm_sched_entity *entity;
399
- struct amdgpu_ring *ring;
400518 enum drm_sched_priority ctx_prio;
519
+ unsigned i, j;
401520
402521 ctx->override_priority = priority;
403522
404523 ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
405524 ctx->init_priority : ctx->override_priority;
525
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
526
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
527
+ if (!ctx->entities[i][j])
528
+ continue;
406529
407
- for (i = 0; i < adev->num_rings; i++) {
408
- ring = adev->rings[i];
409
- entity = &ctx->rings[i].entity;
410
- rq = &ring->sched.sched_rq[ctx_prio];
411
-
412
- if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
413
- continue;
414
-
415
- drm_sched_entity_set_rq(entity, rq);
530
+ amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j],
531
+ i, ctx_prio);
532
+ }
416533 }
417534 }
418535
419
-int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
536
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
537
+ struct drm_sched_entity *entity)
420538 {
421
- struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
422
- unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
423
- struct dma_fence *other = cring->fences[idx];
539
+ struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
540
+ struct dma_fence *other;
541
+ unsigned idx;
542
+ long r;
424543
425
- if (other) {
426
- signed long r;
427
- r = dma_fence_wait(other, true);
428
- if (r < 0) {
429
- if (r != -ERESTARTSYS)
430
- DRM_ERROR("Error (%ld) waiting for fence!\n", r);
544
+ spin_lock(&ctx->ring_lock);
545
+ idx = centity->sequence & (amdgpu_sched_jobs - 1);
546
+ other = dma_fence_get(centity->fences[idx]);
547
+ spin_unlock(&ctx->ring_lock);
431548
432
- return r;
433
- }
434
- }
549
+ if (!other)
550
+ return 0;
435551
436
- return 0;
552
+ r = dma_fence_wait(other, true);
553
+ if (r < 0 && r != -ERESTARTSYS)
554
+ DRM_ERROR("Error (%ld) waiting for fence!\n", r);
555
+
556
+ dma_fence_put(other);
557
+ return r;
437558 }
438559
439560 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
....@@ -442,57 +563,56 @@
442563 idr_init(&mgr->ctx_handles);
443564 }
444565
445
-void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
566
+long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
446567 {
447568 struct amdgpu_ctx *ctx;
448569 struct idr *idp;
449
- uint32_t id, i;
450
- long max_wait = MAX_WAIT_SCHED_ENTITY_Q_EMPTY;
570
+ uint32_t id, i, j;
451571
452572 idp = &mgr->ctx_handles;
453573
454574 mutex_lock(&mgr->lock);
455575 idr_for_each_entry(idp, ctx, id) {
576
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
577
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
578
+ struct drm_sched_entity *entity;
456579
457
- if (!ctx->adev) {
458
- mutex_unlock(&mgr->lock);
459
- return;
460
- }
580
+ if (!ctx->entities[i][j])
581
+ continue;
461582
462
- for (i = 0; i < ctx->adev->num_rings; i++) {
463
-
464
- if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
465
- continue;
466
-
467
- max_wait = drm_sched_entity_flush(&ctx->rings[i].entity,
468
- max_wait);
583
+ entity = &ctx->entities[i][j]->entity;
584
+ timeout = drm_sched_entity_flush(entity, timeout);
585
+ }
469586 }
470587 }
471588 mutex_unlock(&mgr->lock);
589
+ return timeout;
472590 }
473591
474592 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
475593 {
476594 struct amdgpu_ctx *ctx;
477595 struct idr *idp;
478
- uint32_t id, i;
596
+ uint32_t id, i, j;
479597
480598 idp = &mgr->ctx_handles;
481599
482600 idr_for_each_entry(idp, ctx, id) {
601
+ if (kref_read(&ctx->refcount) != 1) {
602
+ DRM_ERROR("ctx %p is still alive\n", ctx);
603
+ continue;
604
+ }
483605
484
- if (!ctx->adev)
485
- return;
606
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
607
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
608
+ struct drm_sched_entity *entity;
486609
487
- for (i = 0; i < ctx->adev->num_rings; i++) {
610
+ if (!ctx->entities[i][j])
611
+ continue;
488612
489
- if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
490
- continue;
491
-
492
- if (kref_read(&ctx->refcount) == 1)
493
- drm_sched_entity_fini(&ctx->rings[i].entity);
494
- else
495
- DRM_ERROR("ctx %p is still alive\n", ctx);
613
+ entity = &ctx->entities[i][j]->entity;
614
+ drm_sched_entity_fini(entity);
615
+ }
496616 }
497617 }
498618 }