| .. | .. |
|---|
| 22 | 22 | * OTHER DEALINGS IN THE SOFTWARE. |
|---|
| 23 | 23 | * |
|---|
| 24 | 24 | */ |
|---|
| 25 | | -#include <drm/drmP.h> |
|---|
| 25 | + |
|---|
| 26 | 26 | #include "amdgpu.h" |
|---|
| 27 | 27 | #include "amdgpu_gfx.h" |
|---|
| 28 | +#include "amdgpu_rlc.h" |
|---|
| 29 | +#include "amdgpu_ras.h" |
|---|
| 30 | + |
|---|
| 31 | +/* delay 0.1 second to enable gfx off feature */ |
|---|
| 32 | +#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) |
|---|
| 28 | 33 | |
|---|
| 29 | 34 | /* |
|---|
| 30 | | - * GPU scratch registers helpers function. |
|---|
| 35 | + * GPU GFX IP block helpers function. |
|---|
| 31 | 36 | */ |
|---|
| 37 | + |
|---|
| 38 | +int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec, |
|---|
| 39 | + int pipe, int queue) |
|---|
| 40 | +{ |
|---|
| 41 | + int bit = 0; |
|---|
| 42 | + |
|---|
| 43 | + bit += mec * adev->gfx.mec.num_pipe_per_mec |
|---|
| 44 | + * adev->gfx.mec.num_queue_per_pipe; |
|---|
| 45 | + bit += pipe * adev->gfx.mec.num_queue_per_pipe; |
|---|
| 46 | + bit += queue; |
|---|
| 47 | + |
|---|
| 48 | + return bit; |
|---|
| 49 | +} |
|---|
| 50 | + |
|---|
| 51 | +void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit, |
|---|
| 52 | + int *mec, int *pipe, int *queue) |
|---|
| 53 | +{ |
|---|
| 54 | + *queue = bit % adev->gfx.mec.num_queue_per_pipe; |
|---|
| 55 | + *pipe = (bit / adev->gfx.mec.num_queue_per_pipe) |
|---|
| 56 | + % adev->gfx.mec.num_pipe_per_mec; |
|---|
| 57 | + *mec = (bit / adev->gfx.mec.num_queue_per_pipe) |
|---|
| 58 | + / adev->gfx.mec.num_pipe_per_mec; |
|---|
| 59 | + |
|---|
| 60 | +} |
|---|
| 61 | + |
|---|
| 62 | +bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, |
|---|
| 63 | + int mec, int pipe, int queue) |
|---|
| 64 | +{ |
|---|
| 65 | + return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue), |
|---|
| 66 | + adev->gfx.mec.queue_bitmap); |
|---|
| 67 | +} |
|---|
| 68 | + |
|---|
| 69 | +int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, |
|---|
| 70 | + int me, int pipe, int queue) |
|---|
| 71 | +{ |
|---|
| 72 | + int bit = 0; |
|---|
| 73 | + |
|---|
| 74 | + bit += me * adev->gfx.me.num_pipe_per_me |
|---|
| 75 | + * adev->gfx.me.num_queue_per_pipe; |
|---|
| 76 | + bit += pipe * adev->gfx.me.num_queue_per_pipe; |
|---|
| 77 | + bit += queue; |
|---|
| 78 | + |
|---|
| 79 | + return bit; |
|---|
| 80 | +} |
|---|
| 81 | + |
|---|
| 82 | +void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, |
|---|
| 83 | + int *me, int *pipe, int *queue) |
|---|
| 84 | +{ |
|---|
| 85 | + *queue = bit % adev->gfx.me.num_queue_per_pipe; |
|---|
| 86 | + *pipe = (bit / adev->gfx.me.num_queue_per_pipe) |
|---|
| 87 | + % adev->gfx.me.num_pipe_per_me; |
|---|
| 88 | + *me = (bit / adev->gfx.me.num_queue_per_pipe) |
|---|
| 89 | + / adev->gfx.me.num_pipe_per_me; |
|---|
| 90 | +} |
|---|
| 91 | + |
|---|
| 92 | +bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, |
|---|
| 93 | + int me, int pipe, int queue) |
|---|
| 94 | +{ |
|---|
| 95 | + return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue), |
|---|
| 96 | + adev->gfx.me.queue_bitmap); |
|---|
| 97 | +} |
|---|
| 98 | + |
|---|
| 32 | 99 | /** |
|---|
| 33 | 100 | * amdgpu_gfx_scratch_get - Allocate a scratch register |
|---|
| 34 | 101 | * |
|---|
| .. | .. |
|---|
| 125 | 192 | return adev->gfx.mec.num_mec > 1; |
|---|
| 126 | 193 | } |
|---|
| 127 | 194 | |
|---|
| 195 | +bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, |
|---|
| 196 | + int pipe, int queue) |
|---|
| 197 | +{ |
|---|
| 198 | + bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); |
|---|
| 199 | + int cond; |
|---|
| 200 | + /* Policy: alternate between normal and high priority */ |
|---|
| 201 | + cond = multipipe_policy ? pipe : queue; |
|---|
| 202 | + |
|---|
| 203 | + return ((cond % 2) != 0); |
|---|
| 204 | + |
|---|
| 205 | +} |
|---|
| 206 | + |
|---|
| 128 | 207 | void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) |
|---|
| 129 | 208 | { |
|---|
| 130 | | - int i, queue, pipe, mec; |
|---|
| 209 | + int i, queue, pipe; |
|---|
| 131 | 210 | bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); |
|---|
| 211 | + int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec * |
|---|
| 212 | + adev->gfx.mec.num_queue_per_pipe, |
|---|
| 213 | + adev->gfx.num_compute_rings); |
|---|
| 132 | 214 | |
|---|
| 133 | | - /* policy for amdgpu compute queue ownership */ |
|---|
| 134 | | - for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { |
|---|
| 135 | | - queue = i % adev->gfx.mec.num_queue_per_pipe; |
|---|
| 136 | | - pipe = (i / adev->gfx.mec.num_queue_per_pipe) |
|---|
| 137 | | - % adev->gfx.mec.num_pipe_per_mec; |
|---|
| 138 | | - mec = (i / adev->gfx.mec.num_queue_per_pipe) |
|---|
| 139 | | - / adev->gfx.mec.num_pipe_per_mec; |
|---|
| 215 | + if (multipipe_policy) { |
|---|
| 216 | + /* policy: make queues evenly cross all pipes on MEC1 only */ |
|---|
| 217 | + for (i = 0; i < max_queues_per_mec; i++) { |
|---|
| 218 | + pipe = i % adev->gfx.mec.num_pipe_per_mec; |
|---|
| 219 | + queue = (i / adev->gfx.mec.num_pipe_per_mec) % |
|---|
| 220 | + adev->gfx.mec.num_queue_per_pipe; |
|---|
| 140 | 221 | |
|---|
| 141 | | - /* we've run out of HW */ |
|---|
| 142 | | - if (mec >= adev->gfx.mec.num_mec) |
|---|
| 143 | | - break; |
|---|
| 144 | | - |
|---|
| 145 | | - if (multipipe_policy) { |
|---|
| 146 | | - /* policy: amdgpu owns the first two queues of the first MEC */ |
|---|
| 147 | | - if (mec == 0 && queue < 2) |
|---|
| 148 | | - set_bit(i, adev->gfx.mec.queue_bitmap); |
|---|
| 149 | | - } else { |
|---|
| 150 | | - /* policy: amdgpu owns all queues in the first pipe */ |
|---|
| 151 | | - if (mec == 0 && pipe == 0) |
|---|
| 152 | | - set_bit(i, adev->gfx.mec.queue_bitmap); |
|---|
| 222 | + set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue, |
|---|
| 223 | + adev->gfx.mec.queue_bitmap); |
|---|
| 153 | 224 | } |
|---|
| 225 | + } else { |
|---|
| 226 | + /* policy: amdgpu owns all queues in the given pipe */ |
|---|
| 227 | + for (i = 0; i < max_queues_per_mec; ++i) |
|---|
| 228 | + set_bit(i, adev->gfx.mec.queue_bitmap); |
|---|
| 154 | 229 | } |
|---|
| 155 | 230 | |
|---|
| 156 | | - /* update the number of active compute rings */ |
|---|
| 157 | | - adev->gfx.num_compute_rings = |
|---|
| 158 | | - bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); |
|---|
| 231 | + dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)); |
|---|
| 232 | +} |
|---|
| 159 | 233 | |
|---|
| 160 | | - /* If you hit this case and edited the policy, you probably just |
|---|
| 161 | | - * need to increase AMDGPU_MAX_COMPUTE_RINGS */ |
|---|
| 162 | | - if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) |
|---|
| 163 | | - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; |
|---|
| 234 | +void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) |
|---|
| 235 | +{ |
|---|
| 236 | + int i, queue, me; |
|---|
| 237 | + |
|---|
| 238 | + for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) { |
|---|
| 239 | + queue = i % adev->gfx.me.num_queue_per_pipe; |
|---|
| 240 | + me = (i / adev->gfx.me.num_queue_per_pipe) |
|---|
| 241 | + / adev->gfx.me.num_pipe_per_me; |
|---|
| 242 | + |
|---|
| 243 | + if (me >= adev->gfx.me.num_me) |
|---|
| 244 | + break; |
|---|
| 245 | + /* policy: amdgpu owns the first queue per pipe at this stage |
|---|
| 246 | + * will extend to mulitple queues per pipe later */ |
|---|
| 247 | + if (me == 0 && queue < 1) |
|---|
| 248 | + set_bit(i, adev->gfx.me.queue_bitmap); |
|---|
| 249 | + } |
|---|
| 250 | + |
|---|
| 251 | + /* update the number of active graphics rings */ |
|---|
| 252 | + adev->gfx.num_gfx_rings = |
|---|
| 253 | + bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); |
|---|
| 164 | 254 | } |
|---|
| 165 | 255 | |
|---|
| 166 | 256 | static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, |
|---|
| .. | .. |
|---|
| 173 | 263 | * adev->gfx.mec.num_pipe_per_mec |
|---|
| 174 | 264 | * adev->gfx.mec.num_queue_per_pipe; |
|---|
| 175 | 265 | |
|---|
| 176 | | - while (queue_bit-- >= 0) { |
|---|
| 266 | + while (--queue_bit >= 0) { |
|---|
| 177 | 267 | if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) |
|---|
| 178 | 268 | continue; |
|---|
| 179 | 269 | |
|---|
| 180 | | - amdgpu_gfx_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue); |
|---|
| 270 | + amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); |
|---|
| 181 | 271 | |
|---|
| 182 | 272 | /* |
|---|
| 183 | 273 | * 1. Using pipes 2/3 from MEC 2 seems cause problems. |
|---|
| .. | .. |
|---|
| 207 | 297 | |
|---|
| 208 | 298 | spin_lock_init(&kiq->ring_lock); |
|---|
| 209 | 299 | |
|---|
| 210 | | - r = amdgpu_device_wb_get(adev, &adev->virt.reg_val_offs); |
|---|
| 211 | | - if (r) |
|---|
| 212 | | - return r; |
|---|
| 213 | | - |
|---|
| 214 | 300 | ring->adev = NULL; |
|---|
| 215 | 301 | ring->ring_obj = NULL; |
|---|
| 216 | 302 | ring->use_doorbell = true; |
|---|
| 217 | | - ring->doorbell_index = AMDGPU_DOORBELL_KIQ; |
|---|
| 303 | + ring->doorbell_index = adev->doorbell_index.kiq; |
|---|
| 218 | 304 | |
|---|
| 219 | 305 | r = amdgpu_gfx_kiq_acquire(adev, ring); |
|---|
| 220 | 306 | if (r) |
|---|
| 221 | 307 | return r; |
|---|
| 222 | 308 | |
|---|
| 223 | 309 | ring->eop_gpu_addr = kiq->eop_gpu_addr; |
|---|
| 310 | + ring->no_scheduler = true; |
|---|
| 224 | 311 | sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue); |
|---|
| 225 | 312 | r = amdgpu_ring_init(adev, ring, 1024, |
|---|
| 226 | | - irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); |
|---|
| 313 | + irq, AMDGPU_CP_KIQ_IRQ_DRIVER0, |
|---|
| 314 | + AMDGPU_RING_PRIO_DEFAULT); |
|---|
| 227 | 315 | if (r) |
|---|
| 228 | 316 | dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); |
|---|
| 229 | 317 | |
|---|
| 230 | 318 | return r; |
|---|
| 231 | 319 | } |
|---|
| 232 | 320 | |
|---|
| 233 | | -void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, |
|---|
| 234 | | - struct amdgpu_irq_src *irq) |
|---|
| 321 | +void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) |
|---|
| 235 | 322 | { |
|---|
| 236 | | - amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs); |
|---|
| 237 | 323 | amdgpu_ring_fini(ring); |
|---|
| 238 | 324 | } |
|---|
| 239 | 325 | |
|---|
| .. | .. |
|---|
| 270 | 356 | return 0; |
|---|
| 271 | 357 | } |
|---|
| 272 | 358 | |
|---|
| 273 | | -/* create MQD for each compute queue */ |
|---|
| 274 | | -int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, |
|---|
| 275 | | - unsigned mqd_size) |
|---|
| 359 | +/* create MQD for each compute/gfx queue */ |
|---|
| 360 | +int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, |
|---|
| 361 | + unsigned mqd_size) |
|---|
| 276 | 362 | { |
|---|
| 277 | 363 | struct amdgpu_ring *ring = NULL; |
|---|
| 278 | 364 | int r, i; |
|---|
| .. | .. |
|---|
| 299 | 385 | dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); |
|---|
| 300 | 386 | } |
|---|
| 301 | 387 | |
|---|
| 388 | + if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { |
|---|
| 389 | + /* create MQD for each KGQ */ |
|---|
| 390 | + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { |
|---|
| 391 | + ring = &adev->gfx.gfx_ring[i]; |
|---|
| 392 | + if (!ring->mqd_obj) { |
|---|
| 393 | + r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, |
|---|
| 394 | + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, |
|---|
| 395 | + &ring->mqd_gpu_addr, &ring->mqd_ptr); |
|---|
| 396 | + if (r) { |
|---|
| 397 | + dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); |
|---|
| 398 | + return r; |
|---|
| 399 | + } |
|---|
| 400 | + |
|---|
| 401 | + /* prepare MQD backup */ |
|---|
| 402 | + adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); |
|---|
| 403 | + if (!adev->gfx.me.mqd_backup[i]) |
|---|
| 404 | + dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); |
|---|
| 405 | + } |
|---|
| 406 | + } |
|---|
| 407 | + } |
|---|
| 408 | + |
|---|
| 302 | 409 | /* create MQD for each KCQ */ |
|---|
| 303 | 410 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
|---|
| 304 | 411 | ring = &adev->gfx.compute_ring[i]; |
|---|
| .. | .. |
|---|
| 307 | 414 | AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, |
|---|
| 308 | 415 | &ring->mqd_gpu_addr, &ring->mqd_ptr); |
|---|
| 309 | 416 | if (r) { |
|---|
| 310 | | - dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); |
|---|
| 417 | + dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); |
|---|
| 311 | 418 | return r; |
|---|
| 312 | 419 | } |
|---|
| 313 | 420 | |
|---|
| .. | .. |
|---|
| 321 | 428 | return 0; |
|---|
| 322 | 429 | } |
|---|
| 323 | 430 | |
|---|
| 324 | | -void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev) |
|---|
| 431 | +void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) |
|---|
| 325 | 432 | { |
|---|
| 326 | 433 | struct amdgpu_ring *ring = NULL; |
|---|
| 327 | 434 | int i; |
|---|
| 435 | + |
|---|
| 436 | + if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { |
|---|
| 437 | + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { |
|---|
| 438 | + ring = &adev->gfx.gfx_ring[i]; |
|---|
| 439 | + kfree(adev->gfx.me.mqd_backup[i]); |
|---|
| 440 | + amdgpu_bo_free_kernel(&ring->mqd_obj, |
|---|
| 441 | + &ring->mqd_gpu_addr, |
|---|
| 442 | + &ring->mqd_ptr); |
|---|
| 443 | + } |
|---|
| 444 | + } |
|---|
| 328 | 445 | |
|---|
| 329 | 446 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
|---|
| 330 | 447 | ring = &adev->gfx.compute_ring[i]; |
|---|
| .. | .. |
|---|
| 340 | 457 | &ring->mqd_gpu_addr, |
|---|
| 341 | 458 | &ring->mqd_ptr); |
|---|
| 342 | 459 | } |
|---|
| 460 | + |
|---|
| 461 | +int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev) |
|---|
| 462 | +{ |
|---|
| 463 | + struct amdgpu_kiq *kiq = &adev->gfx.kiq; |
|---|
| 464 | + struct amdgpu_ring *kiq_ring = &kiq->ring; |
|---|
| 465 | + int i; |
|---|
| 466 | + |
|---|
| 467 | + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) |
|---|
| 468 | + return -EINVAL; |
|---|
| 469 | + |
|---|
| 470 | + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * |
|---|
| 471 | + adev->gfx.num_compute_rings)) |
|---|
| 472 | + return -ENOMEM; |
|---|
| 473 | + |
|---|
| 474 | + for (i = 0; i < adev->gfx.num_compute_rings; i++) |
|---|
| 475 | + kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], |
|---|
| 476 | + RESET_QUEUES, 0, 0); |
|---|
| 477 | + |
|---|
| 478 | + return amdgpu_ring_test_helper(kiq_ring); |
|---|
| 479 | +} |
|---|
| 480 | + |
|---|
| 481 | +int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, |
|---|
| 482 | + int queue_bit) |
|---|
| 483 | +{ |
|---|
| 484 | + int mec, pipe, queue; |
|---|
| 485 | + int set_resource_bit = 0; |
|---|
| 486 | + |
|---|
| 487 | + amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); |
|---|
| 488 | + |
|---|
| 489 | + set_resource_bit = mec * 4 * 8 + pipe * 8 + queue; |
|---|
| 490 | + |
|---|
| 491 | + return set_resource_bit; |
|---|
| 492 | +} |
|---|
| 493 | + |
|---|
| 494 | +int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) |
|---|
| 495 | +{ |
|---|
| 496 | + struct amdgpu_kiq *kiq = &adev->gfx.kiq; |
|---|
| 497 | + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; |
|---|
| 498 | + uint64_t queue_mask = 0; |
|---|
| 499 | + int r, i; |
|---|
| 500 | + |
|---|
| 501 | + if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources) |
|---|
| 502 | + return -EINVAL; |
|---|
| 503 | + |
|---|
| 504 | + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { |
|---|
| 505 | + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) |
|---|
| 506 | + continue; |
|---|
| 507 | + |
|---|
| 508 | + /* This situation may be hit in the future if a new HW |
|---|
| 509 | + * generation exposes more than 64 queues. If so, the |
|---|
| 510 | + * definition of queue_mask needs updating */ |
|---|
| 511 | + if (WARN_ON(i > (sizeof(queue_mask)*8))) { |
|---|
| 512 | + DRM_ERROR("Invalid KCQ enabled: %d\n", i); |
|---|
| 513 | + break; |
|---|
| 514 | + } |
|---|
| 515 | + |
|---|
| 516 | + queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i)); |
|---|
| 517 | + } |
|---|
| 518 | + |
|---|
| 519 | + DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, |
|---|
| 520 | + kiq_ring->queue); |
|---|
| 521 | + |
|---|
| 522 | + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * |
|---|
| 523 | + adev->gfx.num_compute_rings + |
|---|
| 524 | + kiq->pmf->set_resources_size); |
|---|
| 525 | + if (r) { |
|---|
| 526 | + DRM_ERROR("Failed to lock KIQ (%d).\n", r); |
|---|
| 527 | + return r; |
|---|
| 528 | + } |
|---|
| 529 | + |
|---|
| 530 | + kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); |
|---|
| 531 | + for (i = 0; i < adev->gfx.num_compute_rings; i++) |
|---|
| 532 | + kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]); |
|---|
| 533 | + |
|---|
| 534 | + r = amdgpu_ring_test_helper(kiq_ring); |
|---|
| 535 | + if (r) |
|---|
| 536 | + DRM_ERROR("KCQ enable failed\n"); |
|---|
| 537 | + |
|---|
| 538 | + return r; |
|---|
| 539 | +} |
|---|
| 540 | + |
|---|
| 541 | +/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable |
|---|
| 542 | + * |
|---|
| 543 | + * @adev: amdgpu_device pointer |
|---|
| 544 | + * @bool enable true: enable gfx off feature, false: disable gfx off feature |
|---|
| 545 | + * |
|---|
| 546 | + * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled. |
|---|
| 547 | + * 2. other client can send request to disable gfx off feature, the request should be honored. |
|---|
| 548 | + * 3. other client can cancel their request of disable gfx off feature |
|---|
| 549 | + * 4. other client should not send request to enable gfx off feature before disable gfx off feature. |
|---|
| 550 | + */ |
|---|
| 551 | + |
|---|
| 552 | +void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) |
|---|
| 553 | +{ |
|---|
| 554 | + if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) |
|---|
| 555 | + return; |
|---|
| 556 | + |
|---|
| 557 | + mutex_lock(&adev->gfx.gfx_off_mutex); |
|---|
| 558 | + |
|---|
| 559 | + if (enable) { |
|---|
| 560 | + /* If the count is already 0, it means there's an imbalance bug somewhere. |
|---|
| 561 | + * Note that the bug may be in a different caller than the one which triggers the |
|---|
| 562 | + * WARN_ON_ONCE. |
|---|
| 563 | + */ |
|---|
| 564 | + if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0)) |
|---|
| 565 | + goto unlock; |
|---|
| 566 | + |
|---|
| 567 | + adev->gfx.gfx_off_req_count--; |
|---|
| 568 | + |
|---|
| 569 | + if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state) |
|---|
| 570 | + schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE); |
|---|
| 571 | + } else { |
|---|
| 572 | + if (adev->gfx.gfx_off_req_count == 0) { |
|---|
| 573 | + cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); |
|---|
| 574 | + |
|---|
| 575 | + if (adev->gfx.gfx_off_state && |
|---|
| 576 | + !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) { |
|---|
| 577 | + adev->gfx.gfx_off_state = false; |
|---|
| 578 | + |
|---|
| 579 | + if (adev->gfx.funcs->init_spm_golden) { |
|---|
| 580 | + dev_dbg(adev->dev, |
|---|
| 581 | + "GFXOFF is disabled, re-init SPM golden settings\n"); |
|---|
| 582 | + amdgpu_gfx_init_spm_golden(adev); |
|---|
| 583 | + } |
|---|
| 584 | + } |
|---|
| 585 | + } |
|---|
| 586 | + |
|---|
| 587 | + adev->gfx.gfx_off_req_count++; |
|---|
| 588 | + } |
|---|
| 589 | + |
|---|
| 590 | +unlock: |
|---|
| 591 | + mutex_unlock(&adev->gfx.gfx_off_mutex); |
|---|
| 592 | +} |
|---|
| 593 | + |
|---|
| 594 | +int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) |
|---|
| 595 | +{ |
|---|
| 596 | + |
|---|
| 597 | + int r = 0; |
|---|
| 598 | + |
|---|
| 599 | + mutex_lock(&adev->gfx.gfx_off_mutex); |
|---|
| 600 | + |
|---|
| 601 | + r = smu_get_status_gfxoff(adev, value); |
|---|
| 602 | + |
|---|
| 603 | + mutex_unlock(&adev->gfx.gfx_off_mutex); |
|---|
| 604 | + |
|---|
| 605 | + return r; |
|---|
| 606 | +} |
|---|
| 607 | + |
|---|
| 608 | +int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev) |
|---|
| 609 | +{ |
|---|
| 610 | + int r; |
|---|
| 611 | + struct ras_fs_if fs_info = { |
|---|
| 612 | + .sysfs_name = "gfx_err_count", |
|---|
| 613 | + }; |
|---|
| 614 | + struct ras_ih_if ih_info = { |
|---|
| 615 | + .cb = amdgpu_gfx_process_ras_data_cb, |
|---|
| 616 | + }; |
|---|
| 617 | + |
|---|
| 618 | + if (!adev->gfx.ras_if) { |
|---|
| 619 | + adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); |
|---|
| 620 | + if (!adev->gfx.ras_if) |
|---|
| 621 | + return -ENOMEM; |
|---|
| 622 | + adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX; |
|---|
| 623 | + adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; |
|---|
| 624 | + adev->gfx.ras_if->sub_block_index = 0; |
|---|
| 625 | + strcpy(adev->gfx.ras_if->name, "gfx"); |
|---|
| 626 | + } |
|---|
| 627 | + fs_info.head = ih_info.head = *adev->gfx.ras_if; |
|---|
| 628 | + |
|---|
| 629 | + r = amdgpu_ras_late_init(adev, adev->gfx.ras_if, |
|---|
| 630 | + &fs_info, &ih_info); |
|---|
| 631 | + if (r) |
|---|
| 632 | + goto free; |
|---|
| 633 | + |
|---|
| 634 | + if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) { |
|---|
| 635 | + r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); |
|---|
| 636 | + if (r) |
|---|
| 637 | + goto late_fini; |
|---|
| 638 | + } else { |
|---|
| 639 | + /* free gfx ras_if if ras is not supported */ |
|---|
| 640 | + r = 0; |
|---|
| 641 | + goto free; |
|---|
| 642 | + } |
|---|
| 643 | + |
|---|
| 644 | + return 0; |
|---|
| 645 | +late_fini: |
|---|
| 646 | + amdgpu_ras_late_fini(adev, adev->gfx.ras_if, &ih_info); |
|---|
| 647 | +free: |
|---|
| 648 | + kfree(adev->gfx.ras_if); |
|---|
| 649 | + adev->gfx.ras_if = NULL; |
|---|
| 650 | + return r; |
|---|
| 651 | +} |
|---|
| 652 | + |
|---|
| 653 | +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev) |
|---|
| 654 | +{ |
|---|
| 655 | + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && |
|---|
| 656 | + adev->gfx.ras_if) { |
|---|
| 657 | + struct ras_common_if *ras_if = adev->gfx.ras_if; |
|---|
| 658 | + struct ras_ih_if ih_info = { |
|---|
| 659 | + .head = *ras_if, |
|---|
| 660 | + .cb = amdgpu_gfx_process_ras_data_cb, |
|---|
| 661 | + }; |
|---|
| 662 | + |
|---|
| 663 | + amdgpu_ras_late_fini(adev, ras_if, &ih_info); |
|---|
| 664 | + kfree(ras_if); |
|---|
| 665 | + } |
|---|
| 666 | +} |
|---|
| 667 | + |
|---|
| 668 | +int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, |
|---|
| 669 | + void *err_data, |
|---|
| 670 | + struct amdgpu_iv_entry *entry) |
|---|
| 671 | +{ |
|---|
| 672 | + /* TODO ue will trigger an interrupt. |
|---|
| 673 | + * |
|---|
| 674 | + * When “Full RAS” is enabled, the per-IP interrupt sources should |
|---|
| 675 | + * be disabled and the driver should only look for the aggregated |
|---|
| 676 | + * interrupt via sync flood |
|---|
| 677 | + */ |
|---|
| 678 | + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { |
|---|
| 679 | + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); |
|---|
| 680 | + if (adev->gfx.funcs->query_ras_error_count) |
|---|
| 681 | + adev->gfx.funcs->query_ras_error_count(adev, err_data); |
|---|
| 682 | + amdgpu_ras_reset_gpu(adev); |
|---|
| 683 | + } |
|---|
| 684 | + return AMDGPU_RAS_SUCCESS; |
|---|
| 685 | +} |
|---|
| 686 | + |
|---|
| 687 | +int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, |
|---|
| 688 | + struct amdgpu_irq_src *source, |
|---|
| 689 | + struct amdgpu_iv_entry *entry) |
|---|
| 690 | +{ |
|---|
| 691 | + struct ras_common_if *ras_if = adev->gfx.ras_if; |
|---|
| 692 | + struct ras_dispatch_if ih_data = { |
|---|
| 693 | + .entry = entry, |
|---|
| 694 | + }; |
|---|
| 695 | + |
|---|
| 696 | + if (!ras_if) |
|---|
| 697 | + return 0; |
|---|
| 698 | + |
|---|
| 699 | + ih_data.head = *ras_if; |
|---|
| 700 | + |
|---|
| 701 | + DRM_ERROR("CP ECC ERROR IRQ\n"); |
|---|
| 702 | + amdgpu_ras_interrupt_dispatch(adev, &ih_data); |
|---|
| 703 | + return 0; |
|---|
| 704 | +} |
|---|
| 705 | + |
|---|
| 706 | +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) |
|---|
| 707 | +{ |
|---|
| 708 | + signed long r, cnt = 0; |
|---|
| 709 | + unsigned long flags; |
|---|
| 710 | + uint32_t seq, reg_val_offs = 0, value = 0; |
|---|
| 711 | + struct amdgpu_kiq *kiq = &adev->gfx.kiq; |
|---|
| 712 | + struct amdgpu_ring *ring = &kiq->ring; |
|---|
| 713 | + |
|---|
| 714 | + if (adev->in_pci_err_recovery) |
|---|
| 715 | + return 0; |
|---|
| 716 | + |
|---|
| 717 | + BUG_ON(!ring->funcs->emit_rreg); |
|---|
| 718 | + |
|---|
| 719 | + spin_lock_irqsave(&kiq->ring_lock, flags); |
|---|
| 720 | + if (amdgpu_device_wb_get(adev, ®_val_offs)) { |
|---|
| 721 | + pr_err("critical bug! too many kiq readers\n"); |
|---|
| 722 | + goto failed_unlock; |
|---|
| 723 | + } |
|---|
| 724 | + amdgpu_ring_alloc(ring, 32); |
|---|
| 725 | + amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); |
|---|
| 726 | + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); |
|---|
| 727 | + if (r) |
|---|
| 728 | + goto failed_undo; |
|---|
| 729 | + |
|---|
| 730 | + amdgpu_ring_commit(ring); |
|---|
| 731 | + spin_unlock_irqrestore(&kiq->ring_lock, flags); |
|---|
| 732 | + |
|---|
| 733 | + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); |
|---|
| 734 | + |
|---|
| 735 | + /* don't wait anymore for gpu reset case because this way may |
|---|
| 736 | + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg |
|---|
| 737 | + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will |
|---|
| 738 | + * never return if we keep waiting in virt_kiq_rreg, which cause |
|---|
| 739 | + * gpu_recover() hang there. |
|---|
| 740 | + * |
|---|
| 741 | + * also don't wait anymore for IRQ context |
|---|
| 742 | + * */ |
|---|
| 743 | + if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) |
|---|
| 744 | + goto failed_kiq_read; |
|---|
| 745 | + |
|---|
| 746 | + might_sleep(); |
|---|
| 747 | + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { |
|---|
| 748 | + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); |
|---|
| 749 | + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); |
|---|
| 750 | + } |
|---|
| 751 | + |
|---|
| 752 | + if (cnt > MAX_KIQ_REG_TRY) |
|---|
| 753 | + goto failed_kiq_read; |
|---|
| 754 | + |
|---|
| 755 | + mb(); |
|---|
| 756 | + value = adev->wb.wb[reg_val_offs]; |
|---|
| 757 | + amdgpu_device_wb_free(adev, reg_val_offs); |
|---|
| 758 | + return value; |
|---|
| 759 | + |
|---|
| 760 | +failed_undo: |
|---|
| 761 | + amdgpu_ring_undo(ring); |
|---|
| 762 | +failed_unlock: |
|---|
| 763 | + spin_unlock_irqrestore(&kiq->ring_lock, flags); |
|---|
| 764 | +failed_kiq_read: |
|---|
| 765 | + if (reg_val_offs) |
|---|
| 766 | + amdgpu_device_wb_free(adev, reg_val_offs); |
|---|
| 767 | + dev_err(adev->dev, "failed to read reg:%x\n", reg); |
|---|
| 768 | + return ~0; |
|---|
| 769 | +} |
|---|
| 770 | + |
|---|
| 771 | +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) |
|---|
| 772 | +{ |
|---|
| 773 | + signed long r, cnt = 0; |
|---|
| 774 | + unsigned long flags; |
|---|
| 775 | + uint32_t seq; |
|---|
| 776 | + struct amdgpu_kiq *kiq = &adev->gfx.kiq; |
|---|
| 777 | + struct amdgpu_ring *ring = &kiq->ring; |
|---|
| 778 | + |
|---|
| 779 | + BUG_ON(!ring->funcs->emit_wreg); |
|---|
| 780 | + |
|---|
| 781 | + if (adev->in_pci_err_recovery) |
|---|
| 782 | + return; |
|---|
| 783 | + |
|---|
| 784 | + spin_lock_irqsave(&kiq->ring_lock, flags); |
|---|
| 785 | + amdgpu_ring_alloc(ring, 32); |
|---|
| 786 | + amdgpu_ring_emit_wreg(ring, reg, v); |
|---|
| 787 | + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); |
|---|
| 788 | + if (r) |
|---|
| 789 | + goto failed_undo; |
|---|
| 790 | + |
|---|
| 791 | + amdgpu_ring_commit(ring); |
|---|
| 792 | + spin_unlock_irqrestore(&kiq->ring_lock, flags); |
|---|
| 793 | + |
|---|
| 794 | + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); |
|---|
| 795 | + |
|---|
| 796 | + /* don't wait anymore for gpu reset case because this way may |
|---|
| 797 | + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg |
|---|
| 798 | + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will |
|---|
| 799 | + * never return if we keep waiting in virt_kiq_rreg, which cause |
|---|
| 800 | + * gpu_recover() hang there. |
|---|
| 801 | + * |
|---|
| 802 | + * also don't wait anymore for IRQ context |
|---|
| 803 | + * */ |
|---|
| 804 | + if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) |
|---|
| 805 | + goto failed_kiq_write; |
|---|
| 806 | + |
|---|
| 807 | + might_sleep(); |
|---|
| 808 | + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { |
|---|
| 809 | + |
|---|
| 810 | + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); |
|---|
| 811 | + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); |
|---|
| 812 | + } |
|---|
| 813 | + |
|---|
| 814 | + if (cnt > MAX_KIQ_REG_TRY) |
|---|
| 815 | + goto failed_kiq_write; |
|---|
| 816 | + |
|---|
| 817 | + return; |
|---|
| 818 | + |
|---|
| 819 | +failed_undo: |
|---|
| 820 | + amdgpu_ring_undo(ring); |
|---|
| 821 | + spin_unlock_irqrestore(&kiq->ring_lock, flags); |
|---|
| 822 | +failed_kiq_write: |
|---|
| 823 | + dev_err(adev->dev, "failed to write reg:%x\n", reg); |
|---|
| 824 | +} |
|---|