.. | .. |
---|
22 | 22 | * OTHER DEALINGS IN THE SOFTWARE. |
---|
23 | 23 | * |
---|
24 | 24 | */ |
---|
25 | | -#include <drm/drmP.h> |
---|
| 25 | + |
---|
26 | 26 | #include "amdgpu.h" |
---|
27 | 27 | #include "amdgpu_gfx.h" |
---|
| 28 | +#include "amdgpu_rlc.h" |
---|
| 29 | +#include "amdgpu_ras.h" |
---|
| 30 | + |
---|
| 31 | +/* delay 0.1 second to enable gfx off feature */ |
---|
| 32 | +#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) |
---|
28 | 33 | |
---|
29 | 34 | /* |
---|
30 | | - * GPU scratch registers helpers function. |
---|
| 35 | + * GPU GFX IP block helpers function. |
---|
31 | 36 | */ |
---|
| 37 | + |
---|
| 38 | +int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec, |
---|
| 39 | + int pipe, int queue) |
---|
| 40 | +{ |
---|
| 41 | + int bit = 0; |
---|
| 42 | + |
---|
| 43 | + bit += mec * adev->gfx.mec.num_pipe_per_mec |
---|
| 44 | + * adev->gfx.mec.num_queue_per_pipe; |
---|
| 45 | + bit += pipe * adev->gfx.mec.num_queue_per_pipe; |
---|
| 46 | + bit += queue; |
---|
| 47 | + |
---|
| 48 | + return bit; |
---|
| 49 | +} |
---|
| 50 | + |
---|
| 51 | +void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit, |
---|
| 52 | + int *mec, int *pipe, int *queue) |
---|
| 53 | +{ |
---|
| 54 | + *queue = bit % adev->gfx.mec.num_queue_per_pipe; |
---|
| 55 | + *pipe = (bit / adev->gfx.mec.num_queue_per_pipe) |
---|
| 56 | + % adev->gfx.mec.num_pipe_per_mec; |
---|
| 57 | + *mec = (bit / adev->gfx.mec.num_queue_per_pipe) |
---|
| 58 | + / adev->gfx.mec.num_pipe_per_mec; |
---|
| 59 | + |
---|
| 60 | +} |
---|
| 61 | + |
---|
| 62 | +bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, |
---|
| 63 | + int mec, int pipe, int queue) |
---|
| 64 | +{ |
---|
| 65 | + return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue), |
---|
| 66 | + adev->gfx.mec.queue_bitmap); |
---|
| 67 | +} |
---|
| 68 | + |
---|
| 69 | +int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, |
---|
| 70 | + int me, int pipe, int queue) |
---|
| 71 | +{ |
---|
| 72 | + int bit = 0; |
---|
| 73 | + |
---|
| 74 | + bit += me * adev->gfx.me.num_pipe_per_me |
---|
| 75 | + * adev->gfx.me.num_queue_per_pipe; |
---|
| 76 | + bit += pipe * adev->gfx.me.num_queue_per_pipe; |
---|
| 77 | + bit += queue; |
---|
| 78 | + |
---|
| 79 | + return bit; |
---|
| 80 | +} |
---|
| 81 | + |
---|
| 82 | +void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, |
---|
| 83 | + int *me, int *pipe, int *queue) |
---|
| 84 | +{ |
---|
| 85 | + *queue = bit % adev->gfx.me.num_queue_per_pipe; |
---|
| 86 | + *pipe = (bit / adev->gfx.me.num_queue_per_pipe) |
---|
| 87 | + % adev->gfx.me.num_pipe_per_me; |
---|
| 88 | + *me = (bit / adev->gfx.me.num_queue_per_pipe) |
---|
| 89 | + / adev->gfx.me.num_pipe_per_me; |
---|
| 90 | +} |
---|
| 91 | + |
---|
| 92 | +bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, |
---|
| 93 | + int me, int pipe, int queue) |
---|
| 94 | +{ |
---|
| 95 | + return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue), |
---|
| 96 | + adev->gfx.me.queue_bitmap); |
---|
| 97 | +} |
---|
| 98 | + |
---|
32 | 99 | /** |
---|
33 | 100 | * amdgpu_gfx_scratch_get - Allocate a scratch register |
---|
34 | 101 | * |
---|
.. | .. |
---|
125 | 192 | return adev->gfx.mec.num_mec > 1; |
---|
126 | 193 | } |
---|
127 | 194 | |
---|
| 195 | +bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, |
---|
| 196 | + int pipe, int queue) |
---|
| 197 | +{ |
---|
| 198 | + bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); |
---|
| 199 | + int cond; |
---|
| 200 | + /* Policy: alternate between normal and high priority */ |
---|
| 201 | + cond = multipipe_policy ? pipe : queue; |
---|
| 202 | + |
---|
| 203 | + return ((cond % 2) != 0); |
---|
| 204 | + |
---|
| 205 | +} |
---|
| 206 | + |
---|
128 | 207 | void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) |
---|
129 | 208 | { |
---|
130 | | - int i, queue, pipe, mec; |
---|
| 209 | + int i, queue, pipe; |
---|
131 | 210 | bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); |
---|
| 211 | + int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec * |
---|
| 212 | + adev->gfx.mec.num_queue_per_pipe, |
---|
| 213 | + adev->gfx.num_compute_rings); |
---|
132 | 214 | |
---|
133 | | - /* policy for amdgpu compute queue ownership */ |
---|
134 | | - for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { |
---|
135 | | - queue = i % adev->gfx.mec.num_queue_per_pipe; |
---|
136 | | - pipe = (i / adev->gfx.mec.num_queue_per_pipe) |
---|
137 | | - % adev->gfx.mec.num_pipe_per_mec; |
---|
138 | | - mec = (i / adev->gfx.mec.num_queue_per_pipe) |
---|
139 | | - / adev->gfx.mec.num_pipe_per_mec; |
---|
| 215 | + if (multipipe_policy) { |
---|
| 216 | + /* policy: make queues evenly cross all pipes on MEC1 only */ |
---|
| 217 | + for (i = 0; i < max_queues_per_mec; i++) { |
---|
| 218 | + pipe = i % adev->gfx.mec.num_pipe_per_mec; |
---|
| 219 | + queue = (i / adev->gfx.mec.num_pipe_per_mec) % |
---|
| 220 | + adev->gfx.mec.num_queue_per_pipe; |
---|
140 | 221 | |
---|
141 | | - /* we've run out of HW */ |
---|
142 | | - if (mec >= adev->gfx.mec.num_mec) |
---|
143 | | - break; |
---|
144 | | - |
---|
145 | | - if (multipipe_policy) { |
---|
146 | | - /* policy: amdgpu owns the first two queues of the first MEC */ |
---|
147 | | - if (mec == 0 && queue < 2) |
---|
148 | | - set_bit(i, adev->gfx.mec.queue_bitmap); |
---|
149 | | - } else { |
---|
150 | | - /* policy: amdgpu owns all queues in the first pipe */ |
---|
151 | | - if (mec == 0 && pipe == 0) |
---|
152 | | - set_bit(i, adev->gfx.mec.queue_bitmap); |
---|
| 222 | + set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue, |
---|
| 223 | + adev->gfx.mec.queue_bitmap); |
---|
153 | 224 | } |
---|
| 225 | + } else { |
---|
| 226 | + /* policy: amdgpu owns all queues in the given pipe */ |
---|
| 227 | + for (i = 0; i < max_queues_per_mec; ++i) |
---|
| 228 | + set_bit(i, adev->gfx.mec.queue_bitmap); |
---|
154 | 229 | } |
---|
155 | 230 | |
---|
156 | | - /* update the number of active compute rings */ |
---|
157 | | - adev->gfx.num_compute_rings = |
---|
158 | | - bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); |
---|
| 231 | + dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)); |
---|
| 232 | +} |
---|
159 | 233 | |
---|
160 | | - /* If you hit this case and edited the policy, you probably just |
---|
161 | | - * need to increase AMDGPU_MAX_COMPUTE_RINGS */ |
---|
162 | | - if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) |
---|
163 | | - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; |
---|
| 234 | +void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) |
---|
| 235 | +{ |
---|
| 236 | + int i, queue, me; |
---|
| 237 | + |
---|
| 238 | + for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) { |
---|
| 239 | + queue = i % adev->gfx.me.num_queue_per_pipe; |
---|
| 240 | + me = (i / adev->gfx.me.num_queue_per_pipe) |
---|
| 241 | + / adev->gfx.me.num_pipe_per_me; |
---|
| 242 | + |
---|
| 243 | + if (me >= adev->gfx.me.num_me) |
---|
| 244 | + break; |
---|
| 245 | + /* policy: amdgpu owns the first queue per pipe at this stage |
---|
| 246 | + * will extend to mulitple queues per pipe later */ |
---|
| 247 | + if (me == 0 && queue < 1) |
---|
| 248 | + set_bit(i, adev->gfx.me.queue_bitmap); |
---|
| 249 | + } |
---|
| 250 | + |
---|
| 251 | + /* update the number of active graphics rings */ |
---|
| 252 | + adev->gfx.num_gfx_rings = |
---|
| 253 | + bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); |
---|
164 | 254 | } |
---|
165 | 255 | |
---|
166 | 256 | static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, |
---|
.. | .. |
---|
173 | 263 | * adev->gfx.mec.num_pipe_per_mec |
---|
174 | 264 | * adev->gfx.mec.num_queue_per_pipe; |
---|
175 | 265 | |
---|
176 | | - while (queue_bit-- >= 0) { |
---|
| 266 | + while (--queue_bit >= 0) { |
---|
177 | 267 | if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) |
---|
178 | 268 | continue; |
---|
179 | 269 | |
---|
180 | | - amdgpu_gfx_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue); |
---|
| 270 | + amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); |
---|
181 | 271 | |
---|
182 | 272 | /* |
---|
183 | 273 | * 1. Using pipes 2/3 from MEC 2 seems cause problems. |
---|
.. | .. |
---|
207 | 297 | |
---|
208 | 298 | spin_lock_init(&kiq->ring_lock); |
---|
209 | 299 | |
---|
210 | | - r = amdgpu_device_wb_get(adev, &adev->virt.reg_val_offs); |
---|
211 | | - if (r) |
---|
212 | | - return r; |
---|
213 | | - |
---|
214 | 300 | ring->adev = NULL; |
---|
215 | 301 | ring->ring_obj = NULL; |
---|
216 | 302 | ring->use_doorbell = true; |
---|
217 | | - ring->doorbell_index = AMDGPU_DOORBELL_KIQ; |
---|
| 303 | + ring->doorbell_index = adev->doorbell_index.kiq; |
---|
218 | 304 | |
---|
219 | 305 | r = amdgpu_gfx_kiq_acquire(adev, ring); |
---|
220 | 306 | if (r) |
---|
221 | 307 | return r; |
---|
222 | 308 | |
---|
223 | 309 | ring->eop_gpu_addr = kiq->eop_gpu_addr; |
---|
| 310 | + ring->no_scheduler = true; |
---|
224 | 311 | sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue); |
---|
225 | 312 | r = amdgpu_ring_init(adev, ring, 1024, |
---|
226 | | - irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); |
---|
| 313 | + irq, AMDGPU_CP_KIQ_IRQ_DRIVER0, |
---|
| 314 | + AMDGPU_RING_PRIO_DEFAULT); |
---|
227 | 315 | if (r) |
---|
228 | 316 | dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); |
---|
229 | 317 | |
---|
230 | 318 | return r; |
---|
231 | 319 | } |
---|
232 | 320 | |
---|
233 | | -void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, |
---|
234 | | - struct amdgpu_irq_src *irq) |
---|
| 321 | +void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) |
---|
235 | 322 | { |
---|
236 | | - amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs); |
---|
237 | 323 | amdgpu_ring_fini(ring); |
---|
238 | 324 | } |
---|
239 | 325 | |
---|
.. | .. |
---|
270 | 356 | return 0; |
---|
271 | 357 | } |
---|
272 | 358 | |
---|
273 | | -/* create MQD for each compute queue */ |
---|
274 | | -int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, |
---|
275 | | - unsigned mqd_size) |
---|
| 359 | +/* create MQD for each compute/gfx queue */ |
---|
| 360 | +int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, |
---|
| 361 | + unsigned mqd_size) |
---|
276 | 362 | { |
---|
277 | 363 | struct amdgpu_ring *ring = NULL; |
---|
278 | 364 | int r, i; |
---|
.. | .. |
---|
299 | 385 | dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); |
---|
300 | 386 | } |
---|
301 | 387 | |
---|
| 388 | + if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { |
---|
| 389 | + /* create MQD for each KGQ */ |
---|
| 390 | + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { |
---|
| 391 | + ring = &adev->gfx.gfx_ring[i]; |
---|
| 392 | + if (!ring->mqd_obj) { |
---|
| 393 | + r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, |
---|
| 394 | + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, |
---|
| 395 | + &ring->mqd_gpu_addr, &ring->mqd_ptr); |
---|
| 396 | + if (r) { |
---|
| 397 | + dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); |
---|
| 398 | + return r; |
---|
| 399 | + } |
---|
| 400 | + |
---|
| 401 | + /* prepare MQD backup */ |
---|
| 402 | + adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); |
---|
| 403 | + if (!adev->gfx.me.mqd_backup[i]) |
---|
| 404 | + dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); |
---|
| 405 | + } |
---|
| 406 | + } |
---|
| 407 | + } |
---|
| 408 | + |
---|
302 | 409 | /* create MQD for each KCQ */ |
---|
303 | 410 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
---|
304 | 411 | ring = &adev->gfx.compute_ring[i]; |
---|
.. | .. |
---|
307 | 414 | AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, |
---|
308 | 415 | &ring->mqd_gpu_addr, &ring->mqd_ptr); |
---|
309 | 416 | if (r) { |
---|
310 | | - dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); |
---|
| 417 | + dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); |
---|
311 | 418 | return r; |
---|
312 | 419 | } |
---|
313 | 420 | |
---|
.. | .. |
---|
321 | 428 | return 0; |
---|
322 | 429 | } |
---|
323 | 430 | |
---|
324 | | -void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev) |
---|
| 431 | +void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) |
---|
325 | 432 | { |
---|
326 | 433 | struct amdgpu_ring *ring = NULL; |
---|
327 | 434 | int i; |
---|
| 435 | + |
---|
| 436 | + if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { |
---|
| 437 | + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { |
---|
| 438 | + ring = &adev->gfx.gfx_ring[i]; |
---|
| 439 | + kfree(adev->gfx.me.mqd_backup[i]); |
---|
| 440 | + amdgpu_bo_free_kernel(&ring->mqd_obj, |
---|
| 441 | + &ring->mqd_gpu_addr, |
---|
| 442 | + &ring->mqd_ptr); |
---|
| 443 | + } |
---|
| 444 | + } |
---|
328 | 445 | |
---|
329 | 446 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
---|
330 | 447 | ring = &adev->gfx.compute_ring[i]; |
---|
.. | .. |
---|
340 | 457 | &ring->mqd_gpu_addr, |
---|
341 | 458 | &ring->mqd_ptr); |
---|
342 | 459 | } |
---|
| 460 | + |
---|
| 461 | +int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev) |
---|
| 462 | +{ |
---|
| 463 | + struct amdgpu_kiq *kiq = &adev->gfx.kiq; |
---|
| 464 | + struct amdgpu_ring *kiq_ring = &kiq->ring; |
---|
| 465 | + int i; |
---|
| 466 | + |
---|
| 467 | + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) |
---|
| 468 | + return -EINVAL; |
---|
| 469 | + |
---|
| 470 | + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * |
---|
| 471 | + adev->gfx.num_compute_rings)) |
---|
| 472 | + return -ENOMEM; |
---|
| 473 | + |
---|
| 474 | + for (i = 0; i < adev->gfx.num_compute_rings; i++) |
---|
| 475 | + kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], |
---|
| 476 | + RESET_QUEUES, 0, 0); |
---|
| 477 | + |
---|
| 478 | + return amdgpu_ring_test_helper(kiq_ring); |
---|
| 479 | +} |
---|
| 480 | + |
---|
| 481 | +int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, |
---|
| 482 | + int queue_bit) |
---|
| 483 | +{ |
---|
| 484 | + int mec, pipe, queue; |
---|
| 485 | + int set_resource_bit = 0; |
---|
| 486 | + |
---|
| 487 | + amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); |
---|
| 488 | + |
---|
| 489 | + set_resource_bit = mec * 4 * 8 + pipe * 8 + queue; |
---|
| 490 | + |
---|
| 491 | + return set_resource_bit; |
---|
| 492 | +} |
---|
| 493 | + |
---|
| 494 | +int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) |
---|
| 495 | +{ |
---|
| 496 | + struct amdgpu_kiq *kiq = &adev->gfx.kiq; |
---|
| 497 | + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; |
---|
| 498 | + uint64_t queue_mask = 0; |
---|
| 499 | + int r, i; |
---|
| 500 | + |
---|
| 501 | + if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources) |
---|
| 502 | + return -EINVAL; |
---|
| 503 | + |
---|
| 504 | + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { |
---|
| 505 | + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) |
---|
| 506 | + continue; |
---|
| 507 | + |
---|
| 508 | + /* This situation may be hit in the future if a new HW |
---|
| 509 | + * generation exposes more than 64 queues. If so, the |
---|
| 510 | + * definition of queue_mask needs updating */ |
---|
| 511 | + if (WARN_ON(i > (sizeof(queue_mask)*8))) { |
---|
| 512 | + DRM_ERROR("Invalid KCQ enabled: %d\n", i); |
---|
| 513 | + break; |
---|
| 514 | + } |
---|
| 515 | + |
---|
| 516 | + queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i)); |
---|
| 517 | + } |
---|
| 518 | + |
---|
| 519 | + DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, |
---|
| 520 | + kiq_ring->queue); |
---|
| 521 | + |
---|
| 522 | + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * |
---|
| 523 | + adev->gfx.num_compute_rings + |
---|
| 524 | + kiq->pmf->set_resources_size); |
---|
| 525 | + if (r) { |
---|
| 526 | + DRM_ERROR("Failed to lock KIQ (%d).\n", r); |
---|
| 527 | + return r; |
---|
| 528 | + } |
---|
| 529 | + |
---|
| 530 | + kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); |
---|
| 531 | + for (i = 0; i < adev->gfx.num_compute_rings; i++) |
---|
| 532 | + kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]); |
---|
| 533 | + |
---|
| 534 | + r = amdgpu_ring_test_helper(kiq_ring); |
---|
| 535 | + if (r) |
---|
| 536 | + DRM_ERROR("KCQ enable failed\n"); |
---|
| 537 | + |
---|
| 538 | + return r; |
---|
| 539 | +} |
---|
| 540 | + |
---|
| 541 | +/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable |
---|
| 542 | + * |
---|
| 543 | + * @adev: amdgpu_device pointer |
---|
| 544 | + * @bool enable true: enable gfx off feature, false: disable gfx off feature |
---|
| 545 | + * |
---|
| 546 | + * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled. |
---|
| 547 | + * 2. other client can send request to disable gfx off feature, the request should be honored. |
---|
| 548 | + * 3. other client can cancel their request of disable gfx off feature |
---|
| 549 | + * 4. other client should not send request to enable gfx off feature before disable gfx off feature. |
---|
| 550 | + */ |
---|
| 551 | + |
---|
| 552 | +void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) |
---|
| 553 | +{ |
---|
| 554 | + if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) |
---|
| 555 | + return; |
---|
| 556 | + |
---|
| 557 | + mutex_lock(&adev->gfx.gfx_off_mutex); |
---|
| 558 | + |
---|
| 559 | + if (enable) { |
---|
| 560 | + /* If the count is already 0, it means there's an imbalance bug somewhere. |
---|
| 561 | + * Note that the bug may be in a different caller than the one which triggers the |
---|
| 562 | + * WARN_ON_ONCE. |
---|
| 563 | + */ |
---|
| 564 | + if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0)) |
---|
| 565 | + goto unlock; |
---|
| 566 | + |
---|
| 567 | + adev->gfx.gfx_off_req_count--; |
---|
| 568 | + |
---|
| 569 | + if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state) |
---|
| 570 | + schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE); |
---|
| 571 | + } else { |
---|
| 572 | + if (adev->gfx.gfx_off_req_count == 0) { |
---|
| 573 | + cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); |
---|
| 574 | + |
---|
| 575 | + if (adev->gfx.gfx_off_state && |
---|
| 576 | + !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) { |
---|
| 577 | + adev->gfx.gfx_off_state = false; |
---|
| 578 | + |
---|
| 579 | + if (adev->gfx.funcs->init_spm_golden) { |
---|
| 580 | + dev_dbg(adev->dev, |
---|
| 581 | + "GFXOFF is disabled, re-init SPM golden settings\n"); |
---|
| 582 | + amdgpu_gfx_init_spm_golden(adev); |
---|
| 583 | + } |
---|
| 584 | + } |
---|
| 585 | + } |
---|
| 586 | + |
---|
| 587 | + adev->gfx.gfx_off_req_count++; |
---|
| 588 | + } |
---|
| 589 | + |
---|
| 590 | +unlock: |
---|
| 591 | + mutex_unlock(&adev->gfx.gfx_off_mutex); |
---|
| 592 | +} |
---|
| 593 | + |
---|
| 594 | +int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) |
---|
| 595 | +{ |
---|
| 596 | + |
---|
| 597 | + int r = 0; |
---|
| 598 | + |
---|
| 599 | + mutex_lock(&adev->gfx.gfx_off_mutex); |
---|
| 600 | + |
---|
| 601 | + r = smu_get_status_gfxoff(adev, value); |
---|
| 602 | + |
---|
| 603 | + mutex_unlock(&adev->gfx.gfx_off_mutex); |
---|
| 604 | + |
---|
| 605 | + return r; |
---|
| 606 | +} |
---|
| 607 | + |
---|
| 608 | +int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev) |
---|
| 609 | +{ |
---|
| 610 | + int r; |
---|
| 611 | + struct ras_fs_if fs_info = { |
---|
| 612 | + .sysfs_name = "gfx_err_count", |
---|
| 613 | + }; |
---|
| 614 | + struct ras_ih_if ih_info = { |
---|
| 615 | + .cb = amdgpu_gfx_process_ras_data_cb, |
---|
| 616 | + }; |
---|
| 617 | + |
---|
| 618 | + if (!adev->gfx.ras_if) { |
---|
| 619 | + adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); |
---|
| 620 | + if (!adev->gfx.ras_if) |
---|
| 621 | + return -ENOMEM; |
---|
| 622 | + adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX; |
---|
| 623 | + adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; |
---|
| 624 | + adev->gfx.ras_if->sub_block_index = 0; |
---|
| 625 | + strcpy(adev->gfx.ras_if->name, "gfx"); |
---|
| 626 | + } |
---|
| 627 | + fs_info.head = ih_info.head = *adev->gfx.ras_if; |
---|
| 628 | + |
---|
| 629 | + r = amdgpu_ras_late_init(adev, adev->gfx.ras_if, |
---|
| 630 | + &fs_info, &ih_info); |
---|
| 631 | + if (r) |
---|
| 632 | + goto free; |
---|
| 633 | + |
---|
| 634 | + if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) { |
---|
| 635 | + r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); |
---|
| 636 | + if (r) |
---|
| 637 | + goto late_fini; |
---|
| 638 | + } else { |
---|
| 639 | + /* free gfx ras_if if ras is not supported */ |
---|
| 640 | + r = 0; |
---|
| 641 | + goto free; |
---|
| 642 | + } |
---|
| 643 | + |
---|
| 644 | + return 0; |
---|
| 645 | +late_fini: |
---|
| 646 | + amdgpu_ras_late_fini(adev, adev->gfx.ras_if, &ih_info); |
---|
| 647 | +free: |
---|
| 648 | + kfree(adev->gfx.ras_if); |
---|
| 649 | + adev->gfx.ras_if = NULL; |
---|
| 650 | + return r; |
---|
| 651 | +} |
---|
| 652 | + |
---|
| 653 | +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev) |
---|
| 654 | +{ |
---|
| 655 | + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && |
---|
| 656 | + adev->gfx.ras_if) { |
---|
| 657 | + struct ras_common_if *ras_if = adev->gfx.ras_if; |
---|
| 658 | + struct ras_ih_if ih_info = { |
---|
| 659 | + .head = *ras_if, |
---|
| 660 | + .cb = amdgpu_gfx_process_ras_data_cb, |
---|
| 661 | + }; |
---|
| 662 | + |
---|
| 663 | + amdgpu_ras_late_fini(adev, ras_if, &ih_info); |
---|
| 664 | + kfree(ras_if); |
---|
| 665 | + } |
---|
| 666 | +} |
---|
| 667 | + |
---|
| 668 | +int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, |
---|
| 669 | + void *err_data, |
---|
| 670 | + struct amdgpu_iv_entry *entry) |
---|
| 671 | +{ |
---|
| 672 | + /* TODO ue will trigger an interrupt. |
---|
| 673 | + * |
---|
| 674 | + * When “Full RAS” is enabled, the per-IP interrupt sources should |
---|
| 675 | + * be disabled and the driver should only look for the aggregated |
---|
| 676 | + * interrupt via sync flood |
---|
| 677 | + */ |
---|
| 678 | + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { |
---|
| 679 | + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); |
---|
| 680 | + if (adev->gfx.funcs->query_ras_error_count) |
---|
| 681 | + adev->gfx.funcs->query_ras_error_count(adev, err_data); |
---|
| 682 | + amdgpu_ras_reset_gpu(adev); |
---|
| 683 | + } |
---|
| 684 | + return AMDGPU_RAS_SUCCESS; |
---|
| 685 | +} |
---|
| 686 | + |
---|
| 687 | +int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, |
---|
| 688 | + struct amdgpu_irq_src *source, |
---|
| 689 | + struct amdgpu_iv_entry *entry) |
---|
| 690 | +{ |
---|
| 691 | + struct ras_common_if *ras_if = adev->gfx.ras_if; |
---|
| 692 | + struct ras_dispatch_if ih_data = { |
---|
| 693 | + .entry = entry, |
---|
| 694 | + }; |
---|
| 695 | + |
---|
| 696 | + if (!ras_if) |
---|
| 697 | + return 0; |
---|
| 698 | + |
---|
| 699 | + ih_data.head = *ras_if; |
---|
| 700 | + |
---|
| 701 | + DRM_ERROR("CP ECC ERROR IRQ\n"); |
---|
| 702 | + amdgpu_ras_interrupt_dispatch(adev, &ih_data); |
---|
| 703 | + return 0; |
---|
| 704 | +} |
---|
| 705 | + |
---|
| 706 | +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) |
---|
| 707 | +{ |
---|
| 708 | + signed long r, cnt = 0; |
---|
| 709 | + unsigned long flags; |
---|
| 710 | + uint32_t seq, reg_val_offs = 0, value = 0; |
---|
| 711 | + struct amdgpu_kiq *kiq = &adev->gfx.kiq; |
---|
| 712 | + struct amdgpu_ring *ring = &kiq->ring; |
---|
| 713 | + |
---|
| 714 | + if (adev->in_pci_err_recovery) |
---|
| 715 | + return 0; |
---|
| 716 | + |
---|
| 717 | + BUG_ON(!ring->funcs->emit_rreg); |
---|
| 718 | + |
---|
| 719 | + spin_lock_irqsave(&kiq->ring_lock, flags); |
---|
| 720 | + if (amdgpu_device_wb_get(adev, ®_val_offs)) { |
---|
| 721 | + pr_err("critical bug! too many kiq readers\n"); |
---|
| 722 | + goto failed_unlock; |
---|
| 723 | + } |
---|
| 724 | + amdgpu_ring_alloc(ring, 32); |
---|
| 725 | + amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); |
---|
| 726 | + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); |
---|
| 727 | + if (r) |
---|
| 728 | + goto failed_undo; |
---|
| 729 | + |
---|
| 730 | + amdgpu_ring_commit(ring); |
---|
| 731 | + spin_unlock_irqrestore(&kiq->ring_lock, flags); |
---|
| 732 | + |
---|
| 733 | + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); |
---|
| 734 | + |
---|
| 735 | + /* don't wait anymore for gpu reset case because this way may |
---|
| 736 | + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg |
---|
| 737 | + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will |
---|
| 738 | + * never return if we keep waiting in virt_kiq_rreg, which cause |
---|
| 739 | + * gpu_recover() hang there. |
---|
| 740 | + * |
---|
| 741 | + * also don't wait anymore for IRQ context |
---|
| 742 | + * */ |
---|
| 743 | + if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) |
---|
| 744 | + goto failed_kiq_read; |
---|
| 745 | + |
---|
| 746 | + might_sleep(); |
---|
| 747 | + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { |
---|
| 748 | + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); |
---|
| 749 | + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); |
---|
| 750 | + } |
---|
| 751 | + |
---|
| 752 | + if (cnt > MAX_KIQ_REG_TRY) |
---|
| 753 | + goto failed_kiq_read; |
---|
| 754 | + |
---|
| 755 | + mb(); |
---|
| 756 | + value = adev->wb.wb[reg_val_offs]; |
---|
| 757 | + amdgpu_device_wb_free(adev, reg_val_offs); |
---|
| 758 | + return value; |
---|
| 759 | + |
---|
| 760 | +failed_undo: |
---|
| 761 | + amdgpu_ring_undo(ring); |
---|
| 762 | +failed_unlock: |
---|
| 763 | + spin_unlock_irqrestore(&kiq->ring_lock, flags); |
---|
| 764 | +failed_kiq_read: |
---|
| 765 | + if (reg_val_offs) |
---|
| 766 | + amdgpu_device_wb_free(adev, reg_val_offs); |
---|
| 767 | + dev_err(adev->dev, "failed to read reg:%x\n", reg); |
---|
| 768 | + return ~0; |
---|
| 769 | +} |
---|
| 770 | + |
---|
| 771 | +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) |
---|
| 772 | +{ |
---|
| 773 | + signed long r, cnt = 0; |
---|
| 774 | + unsigned long flags; |
---|
| 775 | + uint32_t seq; |
---|
| 776 | + struct amdgpu_kiq *kiq = &adev->gfx.kiq; |
---|
| 777 | + struct amdgpu_ring *ring = &kiq->ring; |
---|
| 778 | + |
---|
| 779 | + BUG_ON(!ring->funcs->emit_wreg); |
---|
| 780 | + |
---|
| 781 | + if (adev->in_pci_err_recovery) |
---|
| 782 | + return; |
---|
| 783 | + |
---|
| 784 | + spin_lock_irqsave(&kiq->ring_lock, flags); |
---|
| 785 | + amdgpu_ring_alloc(ring, 32); |
---|
| 786 | + amdgpu_ring_emit_wreg(ring, reg, v); |
---|
| 787 | + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); |
---|
| 788 | + if (r) |
---|
| 789 | + goto failed_undo; |
---|
| 790 | + |
---|
| 791 | + amdgpu_ring_commit(ring); |
---|
| 792 | + spin_unlock_irqrestore(&kiq->ring_lock, flags); |
---|
| 793 | + |
---|
| 794 | + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); |
---|
| 795 | + |
---|
| 796 | + /* don't wait anymore for gpu reset case because this way may |
---|
| 797 | + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg |
---|
| 798 | + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will |
---|
| 799 | + * never return if we keep waiting in virt_kiq_rreg, which cause |
---|
| 800 | + * gpu_recover() hang there. |
---|
| 801 | + * |
---|
| 802 | + * also don't wait anymore for IRQ context |
---|
| 803 | + * */ |
---|
| 804 | + if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) |
---|
| 805 | + goto failed_kiq_write; |
---|
| 806 | + |
---|
| 807 | + might_sleep(); |
---|
| 808 | + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { |
---|
| 809 | + |
---|
| 810 | + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); |
---|
| 811 | + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); |
---|
| 812 | + } |
---|
| 813 | + |
---|
| 814 | + if (cnt > MAX_KIQ_REG_TRY) |
---|
| 815 | + goto failed_kiq_write; |
---|
| 816 | + |
---|
| 817 | + return; |
---|
| 818 | + |
---|
| 819 | +failed_undo: |
---|
| 820 | + amdgpu_ring_undo(ring); |
---|
| 821 | + spin_unlock_irqrestore(&kiq->ring_lock, flags); |
---|
| 822 | +failed_kiq_write: |
---|
| 823 | + dev_err(adev->dev, "failed to write reg:%x\n", reg); |
---|
| 824 | +} |
---|