| .. | .. |
|---|
| 22 | 22 | * Authors: monk liu <monk.liu@amd.com> |
|---|
| 23 | 23 | */ |
|---|
| 24 | 24 | |
|---|
| 25 | | -#include <drm/drmP.h> |
|---|
| 26 | 25 | #include <drm/drm_auth.h> |
|---|
| 27 | 26 | #include "amdgpu.h" |
|---|
| 28 | 27 | #include "amdgpu_sched.h" |
|---|
| 28 | +#include "amdgpu_ras.h" |
|---|
| 29 | +#include <linux/nospec.h> |
|---|
| 30 | + |
|---|
| 31 | +#define to_amdgpu_ctx_entity(e) \ |
|---|
| 32 | + container_of((e), struct amdgpu_ctx_entity, entity) |
|---|
| 33 | + |
|---|
| 34 | +const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = { |
|---|
| 35 | + [AMDGPU_HW_IP_GFX] = 1, |
|---|
| 36 | + [AMDGPU_HW_IP_COMPUTE] = 4, |
|---|
| 37 | + [AMDGPU_HW_IP_DMA] = 2, |
|---|
| 38 | + [AMDGPU_HW_IP_UVD] = 1, |
|---|
| 39 | + [AMDGPU_HW_IP_VCE] = 1, |
|---|
| 40 | + [AMDGPU_HW_IP_UVD_ENC] = 1, |
|---|
| 41 | + [AMDGPU_HW_IP_VCN_DEC] = 1, |
|---|
| 42 | + [AMDGPU_HW_IP_VCN_ENC] = 1, |
|---|
| 43 | + [AMDGPU_HW_IP_VCN_JPEG] = 1, |
|---|
| 44 | +}; |
|---|
| 29 | 45 | |
|---|
| 30 | 46 | static int amdgpu_ctx_priority_permit(struct drm_file *filp, |
|---|
| 31 | 47 | enum drm_sched_priority priority) |
|---|
| 32 | 48 | { |
|---|
| 49 | + if (priority < 0 || priority >= DRM_SCHED_PRIORITY_COUNT) |
|---|
| 50 | + return -EINVAL; |
|---|
| 51 | + |
|---|
| 33 | 52 | /* NORMAL and below are accessible by everyone */ |
|---|
| 34 | 53 | if (priority <= DRM_SCHED_PRIORITY_NORMAL) |
|---|
| 35 | 54 | return 0; |
|---|
| .. | .. |
|---|
| 43 | 62 | return -EACCES; |
|---|
| 44 | 63 | } |
|---|
| 45 | 64 | |
|---|
| 65 | +static enum gfx_pipe_priority amdgpu_ctx_sched_prio_to_compute_prio(enum drm_sched_priority prio) |
|---|
| 66 | +{ |
|---|
| 67 | + switch (prio) { |
|---|
| 68 | + case DRM_SCHED_PRIORITY_HIGH: |
|---|
| 69 | + case DRM_SCHED_PRIORITY_KERNEL: |
|---|
| 70 | + return AMDGPU_GFX_PIPE_PRIO_HIGH; |
|---|
| 71 | + default: |
|---|
| 72 | + return AMDGPU_GFX_PIPE_PRIO_NORMAL; |
|---|
| 73 | + } |
|---|
| 74 | +} |
|---|
| 75 | + |
|---|
| 76 | +static unsigned int amdgpu_ctx_prio_sched_to_hw(struct amdgpu_device *adev, |
|---|
| 77 | + enum drm_sched_priority prio, |
|---|
| 78 | + u32 hw_ip) |
|---|
| 79 | +{ |
|---|
| 80 | + unsigned int hw_prio; |
|---|
| 81 | + |
|---|
| 82 | + hw_prio = (hw_ip == AMDGPU_HW_IP_COMPUTE) ? |
|---|
| 83 | + amdgpu_ctx_sched_prio_to_compute_prio(prio) : |
|---|
| 84 | + AMDGPU_RING_PRIO_DEFAULT; |
|---|
| 85 | + hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM); |
|---|
| 86 | + if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0) |
|---|
| 87 | + hw_prio = AMDGPU_RING_PRIO_DEFAULT; |
|---|
| 88 | + |
|---|
| 89 | + return hw_prio; |
|---|
| 90 | +} |
|---|
| 91 | + |
|---|
| 92 | +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, |
|---|
| 93 | + const u32 ring) |
|---|
| 94 | +{ |
|---|
| 95 | + struct amdgpu_device *adev = ctx->adev; |
|---|
| 96 | + struct amdgpu_ctx_entity *entity; |
|---|
| 97 | + struct drm_gpu_scheduler **scheds = NULL, *sched = NULL; |
|---|
| 98 | + unsigned num_scheds = 0; |
|---|
| 99 | + unsigned int hw_prio; |
|---|
| 100 | + enum drm_sched_priority priority; |
|---|
| 101 | + int r; |
|---|
| 102 | + |
|---|
| 103 | + entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]), |
|---|
| 104 | + GFP_KERNEL); |
|---|
| 105 | + if (!entity) |
|---|
| 106 | + return -ENOMEM; |
|---|
| 107 | + |
|---|
| 108 | + entity->sequence = 1; |
|---|
| 109 | + priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? |
|---|
| 110 | + ctx->init_priority : ctx->override_priority; |
|---|
| 111 | + hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority, hw_ip); |
|---|
| 112 | + |
|---|
| 113 | + hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM); |
|---|
| 114 | + scheds = adev->gpu_sched[hw_ip][hw_prio].sched; |
|---|
| 115 | + num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds; |
|---|
| 116 | + |
|---|
| 117 | + /* disable load balance if the hw engine retains context among dependent jobs */ |
|---|
| 118 | + if (hw_ip == AMDGPU_HW_IP_VCN_ENC || |
|---|
| 119 | + hw_ip == AMDGPU_HW_IP_VCN_DEC || |
|---|
| 120 | + hw_ip == AMDGPU_HW_IP_UVD_ENC || |
|---|
| 121 | + hw_ip == AMDGPU_HW_IP_UVD) { |
|---|
| 122 | + sched = drm_sched_pick_best(scheds, num_scheds); |
|---|
| 123 | + scheds = &sched; |
|---|
| 124 | + num_scheds = 1; |
|---|
| 125 | + } |
|---|
| 126 | + |
|---|
| 127 | + r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds, |
|---|
| 128 | + &ctx->guilty); |
|---|
| 129 | + if (r) |
|---|
| 130 | + goto error_free_entity; |
|---|
| 131 | + |
|---|
| 132 | + ctx->entities[hw_ip][ring] = entity; |
|---|
| 133 | + return 0; |
|---|
| 134 | + |
|---|
| 135 | +error_free_entity: |
|---|
| 136 | + kfree(entity); |
|---|
| 137 | + |
|---|
| 138 | + return r; |
|---|
| 139 | +} |
|---|
| 140 | + |
|---|
| 46 | 141 | static int amdgpu_ctx_init(struct amdgpu_device *adev, |
|---|
| 47 | 142 | enum drm_sched_priority priority, |
|---|
| 48 | 143 | struct drm_file *filp, |
|---|
| 49 | 144 | struct amdgpu_ctx *ctx) |
|---|
| 50 | 145 | { |
|---|
| 51 | | - unsigned i, j; |
|---|
| 52 | 146 | int r; |
|---|
| 53 | | - |
|---|
| 54 | | - if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) |
|---|
| 55 | | - return -EINVAL; |
|---|
| 56 | 147 | |
|---|
| 57 | 148 | r = amdgpu_ctx_priority_permit(filp, priority); |
|---|
| 58 | 149 | if (r) |
|---|
| 59 | 150 | return r; |
|---|
| 60 | 151 | |
|---|
| 61 | 152 | memset(ctx, 0, sizeof(*ctx)); |
|---|
| 153 | + |
|---|
| 62 | 154 | ctx->adev = adev; |
|---|
| 155 | + |
|---|
| 63 | 156 | kref_init(&ctx->refcount); |
|---|
| 64 | 157 | spin_lock_init(&ctx->ring_lock); |
|---|
| 65 | | - ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS, |
|---|
| 66 | | - sizeof(struct dma_fence*), GFP_KERNEL); |
|---|
| 67 | | - if (!ctx->fences) |
|---|
| 68 | | - return -ENOMEM; |
|---|
| 69 | | - |
|---|
| 70 | 158 | mutex_init(&ctx->lock); |
|---|
| 71 | | - |
|---|
| 72 | | - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
|---|
| 73 | | - ctx->rings[i].sequence = 1; |
|---|
| 74 | | - ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; |
|---|
| 75 | | - } |
|---|
| 76 | 159 | |
|---|
| 77 | 160 | ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); |
|---|
| 78 | 161 | ctx->reset_counter_query = ctx->reset_counter; |
|---|
| .. | .. |
|---|
| 80 | 163 | ctx->init_priority = priority; |
|---|
| 81 | 164 | ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; |
|---|
| 82 | 165 | |
|---|
| 83 | | - /* create context entity for each ring */ |
|---|
| 84 | | - for (i = 0; i < adev->num_rings; i++) { |
|---|
| 85 | | - struct amdgpu_ring *ring = adev->rings[i]; |
|---|
| 86 | | - struct drm_sched_rq *rq; |
|---|
| 87 | | - |
|---|
| 88 | | - rq = &ring->sched.sched_rq[priority]; |
|---|
| 89 | | - |
|---|
| 90 | | - if (ring == &adev->gfx.kiq.ring) |
|---|
| 91 | | - continue; |
|---|
| 92 | | - |
|---|
| 93 | | - r = drm_sched_entity_init(&ctx->rings[i].entity, |
|---|
| 94 | | - &rq, 1, &ctx->guilty); |
|---|
| 95 | | - if (r) |
|---|
| 96 | | - goto failed; |
|---|
| 97 | | - } |
|---|
| 98 | | - |
|---|
| 99 | | - r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr); |
|---|
| 100 | | - if (r) |
|---|
| 101 | | - goto failed; |
|---|
| 102 | | - |
|---|
| 103 | 166 | return 0; |
|---|
| 167 | +} |
|---|
| 104 | 168 | |
|---|
| 105 | | -failed: |
|---|
| 106 | | - for (j = 0; j < i; j++) |
|---|
| 107 | | - drm_sched_entity_destroy(&ctx->rings[j].entity); |
|---|
| 108 | | - kfree(ctx->fences); |
|---|
| 109 | | - ctx->fences = NULL; |
|---|
| 110 | | - return r; |
|---|
| 169 | +static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) |
|---|
| 170 | +{ |
|---|
| 171 | + |
|---|
| 172 | + int i; |
|---|
| 173 | + |
|---|
| 174 | + if (!entity) |
|---|
| 175 | + return; |
|---|
| 176 | + |
|---|
| 177 | + for (i = 0; i < amdgpu_sched_jobs; ++i) |
|---|
| 178 | + dma_fence_put(entity->fences[i]); |
|---|
| 179 | + |
|---|
| 180 | + kfree(entity); |
|---|
| 111 | 181 | } |
|---|
| 112 | 182 | |
|---|
| 113 | 183 | static void amdgpu_ctx_fini(struct kref *ref) |
|---|
| .. | .. |
|---|
| 119 | 189 | if (!adev) |
|---|
| 120 | 190 | return; |
|---|
| 121 | 191 | |
|---|
| 122 | | - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) |
|---|
| 123 | | - for (j = 0; j < amdgpu_sched_jobs; ++j) |
|---|
| 124 | | - dma_fence_put(ctx->rings[i].fences[j]); |
|---|
| 125 | | - kfree(ctx->fences); |
|---|
| 126 | | - ctx->fences = NULL; |
|---|
| 127 | | - |
|---|
| 128 | | - amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); |
|---|
| 192 | + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { |
|---|
| 193 | + for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) { |
|---|
| 194 | + amdgpu_ctx_fini_entity(ctx->entities[i][j]); |
|---|
| 195 | + ctx->entities[i][j] = NULL; |
|---|
| 196 | + } |
|---|
| 197 | + } |
|---|
| 129 | 198 | |
|---|
| 130 | 199 | mutex_destroy(&ctx->lock); |
|---|
| 131 | | - |
|---|
| 132 | 200 | kfree(ctx); |
|---|
| 201 | +} |
|---|
| 202 | + |
|---|
| 203 | +int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, |
|---|
| 204 | + u32 ring, struct drm_sched_entity **entity) |
|---|
| 205 | +{ |
|---|
| 206 | + int r; |
|---|
| 207 | + |
|---|
| 208 | + if (hw_ip >= AMDGPU_HW_IP_NUM) { |
|---|
| 209 | + DRM_ERROR("unknown HW IP type: %d\n", hw_ip); |
|---|
| 210 | + return -EINVAL; |
|---|
| 211 | + } |
|---|
| 212 | + |
|---|
| 213 | + /* Right now all IPs have only one instance - multiple rings. */ |
|---|
| 214 | + if (instance != 0) { |
|---|
| 215 | + DRM_DEBUG("invalid ip instance: %d\n", instance); |
|---|
| 216 | + return -EINVAL; |
|---|
| 217 | + } |
|---|
| 218 | + |
|---|
| 219 | + if (ring >= amdgpu_ctx_num_entities[hw_ip]) { |
|---|
| 220 | + DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring); |
|---|
| 221 | + return -EINVAL; |
|---|
| 222 | + } |
|---|
| 223 | + |
|---|
| 224 | + if (ctx->entities[hw_ip][ring] == NULL) { |
|---|
| 225 | + r = amdgpu_ctx_init_entity(ctx, hw_ip, ring); |
|---|
| 226 | + if (r) |
|---|
| 227 | + return r; |
|---|
| 228 | + } |
|---|
| 229 | + |
|---|
| 230 | + *entity = &ctx->entities[hw_ip][ring]->entity; |
|---|
| 231 | + return 0; |
|---|
| 133 | 232 | } |
|---|
| 134 | 233 | |
|---|
| 135 | 234 | static int amdgpu_ctx_alloc(struct amdgpu_device *adev, |
|---|
| .. | .. |
|---|
| 147 | 246 | return -ENOMEM; |
|---|
| 148 | 247 | |
|---|
| 149 | 248 | mutex_lock(&mgr->lock); |
|---|
| 150 | | - r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL); |
|---|
| 249 | + r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL); |
|---|
| 151 | 250 | if (r < 0) { |
|---|
| 152 | 251 | mutex_unlock(&mgr->lock); |
|---|
| 153 | 252 | kfree(ctx); |
|---|
| .. | .. |
|---|
| 168 | 267 | static void amdgpu_ctx_do_release(struct kref *ref) |
|---|
| 169 | 268 | { |
|---|
| 170 | 269 | struct amdgpu_ctx *ctx; |
|---|
| 171 | | - u32 i; |
|---|
| 270 | + u32 i, j; |
|---|
| 172 | 271 | |
|---|
| 173 | 272 | ctx = container_of(ref, struct amdgpu_ctx, refcount); |
|---|
| 273 | + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { |
|---|
| 274 | + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { |
|---|
| 275 | + if (!ctx->entities[i][j]) |
|---|
| 276 | + continue; |
|---|
| 174 | 277 | |
|---|
| 175 | | - for (i = 0; i < ctx->adev->num_rings; i++) { |
|---|
| 176 | | - |
|---|
| 177 | | - if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) |
|---|
| 178 | | - continue; |
|---|
| 179 | | - |
|---|
| 180 | | - drm_sched_entity_destroy(&ctx->rings[i].entity); |
|---|
| 278 | + drm_sched_entity_destroy(&ctx->entities[i][j]->entity); |
|---|
| 279 | + } |
|---|
| 181 | 280 | } |
|---|
| 182 | 281 | |
|---|
| 183 | 282 | amdgpu_ctx_fini(ref); |
|---|
| .. | .. |
|---|
| 274 | 373 | enum drm_sched_priority priority; |
|---|
| 275 | 374 | |
|---|
| 276 | 375 | union drm_amdgpu_ctx *args = data; |
|---|
| 277 | | - struct amdgpu_device *adev = dev->dev_private; |
|---|
| 376 | + struct amdgpu_device *adev = drm_to_adev(dev); |
|---|
| 278 | 377 | struct amdgpu_fpriv *fpriv = filp->driver_priv; |
|---|
| 279 | 378 | |
|---|
| 280 | | - r = 0; |
|---|
| 281 | 379 | id = args->in.ctx_id; |
|---|
| 282 | | - priority = amdgpu_to_sched_priority(args->in.priority); |
|---|
| 380 | + r = amdgpu_to_sched_priority(args->in.priority, &priority); |
|---|
| 283 | 381 | |
|---|
| 284 | 382 | /* For backwards compatibility reasons, we need to accept |
|---|
| 285 | 383 | * ioctls with garbage in the priority field */ |
|---|
| 286 | | - if (priority == DRM_SCHED_PRIORITY_INVALID) |
|---|
| 384 | + if (r == -EINVAL) |
|---|
| 287 | 385 | priority = DRM_SCHED_PRIORITY_NORMAL; |
|---|
| 288 | 386 | |
|---|
| 289 | 387 | switch (args->in.op) { |
|---|
| .. | .. |
|---|
| 334 | 432 | return 0; |
|---|
| 335 | 433 | } |
|---|
| 336 | 434 | |
|---|
| 337 | | -int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, |
|---|
| 338 | | - struct dma_fence *fence, uint64_t* handler) |
|---|
| 435 | +void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, |
|---|
| 436 | + struct drm_sched_entity *entity, |
|---|
| 437 | + struct dma_fence *fence, uint64_t* handle) |
|---|
| 339 | 438 | { |
|---|
| 340 | | - struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; |
|---|
| 341 | | - uint64_t seq = cring->sequence; |
|---|
| 342 | | - unsigned idx = 0; |
|---|
| 439 | + struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); |
|---|
| 440 | + uint64_t seq = centity->sequence; |
|---|
| 343 | 441 | struct dma_fence *other = NULL; |
|---|
| 442 | + unsigned idx = 0; |
|---|
| 344 | 443 | |
|---|
| 345 | 444 | idx = seq & (amdgpu_sched_jobs - 1); |
|---|
| 346 | | - other = cring->fences[idx]; |
|---|
| 445 | + other = centity->fences[idx]; |
|---|
| 347 | 446 | if (other) |
|---|
| 348 | 447 | BUG_ON(!dma_fence_is_signaled(other)); |
|---|
| 349 | 448 | |
|---|
| 350 | 449 | dma_fence_get(fence); |
|---|
| 351 | 450 | |
|---|
| 352 | 451 | spin_lock(&ctx->ring_lock); |
|---|
| 353 | | - cring->fences[idx] = fence; |
|---|
| 354 | | - cring->sequence++; |
|---|
| 452 | + centity->fences[idx] = fence; |
|---|
| 453 | + centity->sequence++; |
|---|
| 355 | 454 | spin_unlock(&ctx->ring_lock); |
|---|
| 356 | 455 | |
|---|
| 357 | 456 | dma_fence_put(other); |
|---|
| 358 | | - if (handler) |
|---|
| 359 | | - *handler = seq; |
|---|
| 360 | | - |
|---|
| 361 | | - return 0; |
|---|
| 457 | + if (handle) |
|---|
| 458 | + *handle = seq; |
|---|
| 362 | 459 | } |
|---|
| 363 | 460 | |
|---|
| 364 | 461 | struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, |
|---|
| 365 | | - struct amdgpu_ring *ring, uint64_t seq) |
|---|
| 462 | + struct drm_sched_entity *entity, |
|---|
| 463 | + uint64_t seq) |
|---|
| 366 | 464 | { |
|---|
| 367 | | - struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; |
|---|
| 465 | + struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); |
|---|
| 368 | 466 | struct dma_fence *fence; |
|---|
| 369 | 467 | |
|---|
| 370 | 468 | spin_lock(&ctx->ring_lock); |
|---|
| 371 | 469 | |
|---|
| 372 | 470 | if (seq == ~0ull) |
|---|
| 373 | | - seq = ctx->rings[ring->idx].sequence - 1; |
|---|
| 471 | + seq = centity->sequence - 1; |
|---|
| 374 | 472 | |
|---|
| 375 | | - if (seq >= cring->sequence) { |
|---|
| 473 | + if (seq >= centity->sequence) { |
|---|
| 376 | 474 | spin_unlock(&ctx->ring_lock); |
|---|
| 377 | 475 | return ERR_PTR(-EINVAL); |
|---|
| 378 | 476 | } |
|---|
| 379 | 477 | |
|---|
| 380 | 478 | |
|---|
| 381 | | - if (seq + amdgpu_sched_jobs < cring->sequence) { |
|---|
| 479 | + if (seq + amdgpu_sched_jobs < centity->sequence) { |
|---|
| 382 | 480 | spin_unlock(&ctx->ring_lock); |
|---|
| 383 | 481 | return NULL; |
|---|
| 384 | 482 | } |
|---|
| 385 | 483 | |
|---|
| 386 | | - fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]); |
|---|
| 484 | + fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]); |
|---|
| 387 | 485 | spin_unlock(&ctx->ring_lock); |
|---|
| 388 | 486 | |
|---|
| 389 | 487 | return fence; |
|---|
| 390 | 488 | } |
|---|
| 391 | 489 | |
|---|
| 490 | +static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx, |
|---|
| 491 | + struct amdgpu_ctx_entity *aentity, |
|---|
| 492 | + int hw_ip, |
|---|
| 493 | + enum drm_sched_priority priority) |
|---|
| 494 | +{ |
|---|
| 495 | + struct amdgpu_device *adev = ctx->adev; |
|---|
| 496 | + unsigned int hw_prio; |
|---|
| 497 | + struct drm_gpu_scheduler **scheds = NULL; |
|---|
| 498 | + unsigned num_scheds; |
|---|
| 499 | + |
|---|
| 500 | + /* set sw priority */ |
|---|
| 501 | + drm_sched_entity_set_priority(&aentity->entity, priority); |
|---|
| 502 | + |
|---|
| 503 | + /* set hw priority */ |
|---|
| 504 | + if (hw_ip == AMDGPU_HW_IP_COMPUTE) { |
|---|
| 505 | + hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority, |
|---|
| 506 | + AMDGPU_HW_IP_COMPUTE); |
|---|
| 507 | + hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX); |
|---|
| 508 | + scheds = adev->gpu_sched[hw_ip][hw_prio].sched; |
|---|
| 509 | + num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds; |
|---|
| 510 | + drm_sched_entity_modify_sched(&aentity->entity, scheds, |
|---|
| 511 | + num_scheds); |
|---|
| 512 | + } |
|---|
| 513 | +} |
|---|
| 514 | + |
|---|
| 392 | 515 | void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, |
|---|
| 393 | 516 | enum drm_sched_priority priority) |
|---|
| 394 | 517 | { |
|---|
| 395 | | - int i; |
|---|
| 396 | | - struct amdgpu_device *adev = ctx->adev; |
|---|
| 397 | | - struct drm_sched_rq *rq; |
|---|
| 398 | | - struct drm_sched_entity *entity; |
|---|
| 399 | | - struct amdgpu_ring *ring; |
|---|
| 400 | 518 | enum drm_sched_priority ctx_prio; |
|---|
| 519 | + unsigned i, j; |
|---|
| 401 | 520 | |
|---|
| 402 | 521 | ctx->override_priority = priority; |
|---|
| 403 | 522 | |
|---|
| 404 | 523 | ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? |
|---|
| 405 | 524 | ctx->init_priority : ctx->override_priority; |
|---|
| 525 | + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { |
|---|
| 526 | + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { |
|---|
| 527 | + if (!ctx->entities[i][j]) |
|---|
| 528 | + continue; |
|---|
| 406 | 529 | |
|---|
| 407 | | - for (i = 0; i < adev->num_rings; i++) { |
|---|
| 408 | | - ring = adev->rings[i]; |
|---|
| 409 | | - entity = &ctx->rings[i].entity; |
|---|
| 410 | | - rq = &ring->sched.sched_rq[ctx_prio]; |
|---|
| 411 | | - |
|---|
| 412 | | - if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) |
|---|
| 413 | | - continue; |
|---|
| 414 | | - |
|---|
| 415 | | - drm_sched_entity_set_rq(entity, rq); |
|---|
| 530 | + amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j], |
|---|
| 531 | + i, ctx_prio); |
|---|
| 532 | + } |
|---|
| 416 | 533 | } |
|---|
| 417 | 534 | } |
|---|
| 418 | 535 | |
|---|
| 419 | | -int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id) |
|---|
| 536 | +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, |
|---|
| 537 | + struct drm_sched_entity *entity) |
|---|
| 420 | 538 | { |
|---|
| 421 | | - struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id]; |
|---|
| 422 | | - unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1); |
|---|
| 423 | | - struct dma_fence *other = cring->fences[idx]; |
|---|
| 539 | + struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); |
|---|
| 540 | + struct dma_fence *other; |
|---|
| 541 | + unsigned idx; |
|---|
| 542 | + long r; |
|---|
| 424 | 543 | |
|---|
| 425 | | - if (other) { |
|---|
| 426 | | - signed long r; |
|---|
| 427 | | - r = dma_fence_wait(other, true); |
|---|
| 428 | | - if (r < 0) { |
|---|
| 429 | | - if (r != -ERESTARTSYS) |
|---|
| 430 | | - DRM_ERROR("Error (%ld) waiting for fence!\n", r); |
|---|
| 544 | + spin_lock(&ctx->ring_lock); |
|---|
| 545 | + idx = centity->sequence & (amdgpu_sched_jobs - 1); |
|---|
| 546 | + other = dma_fence_get(centity->fences[idx]); |
|---|
| 547 | + spin_unlock(&ctx->ring_lock); |
|---|
| 431 | 548 | |
|---|
| 432 | | - return r; |
|---|
| 433 | | - } |
|---|
| 434 | | - } |
|---|
| 549 | + if (!other) |
|---|
| 550 | + return 0; |
|---|
| 435 | 551 | |
|---|
| 436 | | - return 0; |
|---|
| 552 | + r = dma_fence_wait(other, true); |
|---|
| 553 | + if (r < 0 && r != -ERESTARTSYS) |
|---|
| 554 | + DRM_ERROR("Error (%ld) waiting for fence!\n", r); |
|---|
| 555 | + |
|---|
| 556 | + dma_fence_put(other); |
|---|
| 557 | + return r; |
|---|
| 437 | 558 | } |
|---|
| 438 | 559 | |
|---|
| 439 | 560 | void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) |
|---|
| .. | .. |
|---|
| 442 | 563 | idr_init(&mgr->ctx_handles); |
|---|
| 443 | 564 | } |
|---|
| 444 | 565 | |
|---|
| 445 | | -void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr) |
|---|
| 566 | +long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) |
|---|
| 446 | 567 | { |
|---|
| 447 | 568 | struct amdgpu_ctx *ctx; |
|---|
| 448 | 569 | struct idr *idp; |
|---|
| 449 | | - uint32_t id, i; |
|---|
| 450 | | - long max_wait = MAX_WAIT_SCHED_ENTITY_Q_EMPTY; |
|---|
| 570 | + uint32_t id, i, j; |
|---|
| 451 | 571 | |
|---|
| 452 | 572 | idp = &mgr->ctx_handles; |
|---|
| 453 | 573 | |
|---|
| 454 | 574 | mutex_lock(&mgr->lock); |
|---|
| 455 | 575 | idr_for_each_entry(idp, ctx, id) { |
|---|
| 576 | + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { |
|---|
| 577 | + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { |
|---|
| 578 | + struct drm_sched_entity *entity; |
|---|
| 456 | 579 | |
|---|
| 457 | | - if (!ctx->adev) { |
|---|
| 458 | | - mutex_unlock(&mgr->lock); |
|---|
| 459 | | - return; |
|---|
| 460 | | - } |
|---|
| 580 | + if (!ctx->entities[i][j]) |
|---|
| 581 | + continue; |
|---|
| 461 | 582 | |
|---|
| 462 | | - for (i = 0; i < ctx->adev->num_rings; i++) { |
|---|
| 463 | | - |
|---|
| 464 | | - if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) |
|---|
| 465 | | - continue; |
|---|
| 466 | | - |
|---|
| 467 | | - max_wait = drm_sched_entity_flush(&ctx->rings[i].entity, |
|---|
| 468 | | - max_wait); |
|---|
| 583 | + entity = &ctx->entities[i][j]->entity; |
|---|
| 584 | + timeout = drm_sched_entity_flush(entity, timeout); |
|---|
| 585 | + } |
|---|
| 469 | 586 | } |
|---|
| 470 | 587 | } |
|---|
| 471 | 588 | mutex_unlock(&mgr->lock); |
|---|
| 589 | + return timeout; |
|---|
| 472 | 590 | } |
|---|
| 473 | 591 | |
|---|
| 474 | 592 | void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) |
|---|
| 475 | 593 | { |
|---|
| 476 | 594 | struct amdgpu_ctx *ctx; |
|---|
| 477 | 595 | struct idr *idp; |
|---|
| 478 | | - uint32_t id, i; |
|---|
| 596 | + uint32_t id, i, j; |
|---|
| 479 | 597 | |
|---|
| 480 | 598 | idp = &mgr->ctx_handles; |
|---|
| 481 | 599 | |
|---|
| 482 | 600 | idr_for_each_entry(idp, ctx, id) { |
|---|
| 601 | + if (kref_read(&ctx->refcount) != 1) { |
|---|
| 602 | + DRM_ERROR("ctx %p is still alive\n", ctx); |
|---|
| 603 | + continue; |
|---|
| 604 | + } |
|---|
| 483 | 605 | |
|---|
| 484 | | - if (!ctx->adev) |
|---|
| 485 | | - return; |
|---|
| 606 | + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { |
|---|
| 607 | + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { |
|---|
| 608 | + struct drm_sched_entity *entity; |
|---|
| 486 | 609 | |
|---|
| 487 | | - for (i = 0; i < ctx->adev->num_rings; i++) { |
|---|
| 610 | + if (!ctx->entities[i][j]) |
|---|
| 611 | + continue; |
|---|
| 488 | 612 | |
|---|
| 489 | | - if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) |
|---|
| 490 | | - continue; |
|---|
| 491 | | - |
|---|
| 492 | | - if (kref_read(&ctx->refcount) == 1) |
|---|
| 493 | | - drm_sched_entity_fini(&ctx->rings[i].entity); |
|---|
| 494 | | - else |
|---|
| 495 | | - DRM_ERROR("ctx %p is still alive\n", ctx); |
|---|
| 613 | + entity = &ctx->entities[i][j]->entity; |
|---|
| 614 | + drm_sched_entity_fini(entity); |
|---|
| 615 | + } |
|---|
| 496 | 616 | } |
|---|
| 497 | 617 | } |
|---|
| 498 | 618 | } |
|---|