.. | .. |
---|
28 | 28 | */ |
---|
29 | 29 | #include <linux/seq_file.h> |
---|
30 | 30 | #include <linux/slab.h> |
---|
31 | | -#include <drm/drmP.h> |
---|
| 31 | + |
---|
32 | 32 | #include <drm/amdgpu_drm.h> |
---|
| 33 | +#include <drm/drm_debugfs.h> |
---|
| 34 | + |
---|
33 | 35 | #include "amdgpu.h" |
---|
34 | 36 | #include "atom.h" |
---|
| 37 | +#include "amdgpu_trace.h" |
---|
35 | 38 | |
---|
36 | 39 | #define AMDGPU_IB_TEST_TIMEOUT msecs_to_jiffies(1000) |
---|
| 40 | +#define AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT msecs_to_jiffies(2000) |
---|
37 | 41 | |
---|
38 | 42 | /* |
---|
39 | 43 | * IB |
---|
.. | .. |
---|
44 | 48 | * produce command buffers which are send to the kernel and |
---|
45 | 49 | * put in IBs for execution by the requested ring. |
---|
46 | 50 | */ |
---|
47 | | -static int amdgpu_debugfs_sa_init(struct amdgpu_device *adev); |
---|
48 | 51 | |
---|
49 | 52 | /** |
---|
50 | 53 | * amdgpu_ib_get - request an IB (Indirect Buffer) |
---|
.. | .. |
---|
58 | 61 | * Returns 0 on success, error on failure. |
---|
59 | 62 | */ |
---|
60 | 63 | int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
---|
61 | | - unsigned size, struct amdgpu_ib *ib) |
---|
| 64 | + unsigned size, enum amdgpu_ib_pool_type pool_type, |
---|
| 65 | + struct amdgpu_ib *ib) |
---|
62 | 66 | { |
---|
63 | 67 | int r; |
---|
64 | 68 | |
---|
65 | 69 | if (size) { |
---|
66 | | - r = amdgpu_sa_bo_new(&adev->ring_tmp_bo, |
---|
| 70 | + r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type], |
---|
67 | 71 | &ib->sa_bo, size, 256); |
---|
68 | 72 | if (r) { |
---|
69 | 73 | dev_err(adev->dev, "failed to get a new IB (%d)\n", r); |
---|
.. | .. |
---|
71 | 75 | } |
---|
72 | 76 | |
---|
73 | 77 | ib->ptr = amdgpu_sa_bo_cpu_addr(ib->sa_bo); |
---|
| 78 | + /* flush the cache before commit the IB */ |
---|
| 79 | + ib->flags = AMDGPU_IB_FLAG_EMIT_MEM_SYNC; |
---|
74 | 80 | |
---|
75 | 81 | if (!vm) |
---|
76 | 82 | ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); |
---|
.. | .. |
---|
122 | 128 | struct amdgpu_device *adev = ring->adev; |
---|
123 | 129 | struct amdgpu_ib *ib = &ibs[0]; |
---|
124 | 130 | struct dma_fence *tmp = NULL; |
---|
125 | | - bool skip_preamble, need_ctx_switch; |
---|
| 131 | + bool need_ctx_switch; |
---|
126 | 132 | unsigned patch_offset = ~0; |
---|
127 | 133 | struct amdgpu_vm *vm; |
---|
128 | 134 | uint64_t fence_ctx; |
---|
129 | 135 | uint32_t status = 0, alloc_size; |
---|
130 | 136 | unsigned fence_flags = 0; |
---|
| 137 | + bool secure; |
---|
131 | 138 | |
---|
132 | 139 | unsigned i; |
---|
133 | 140 | int r = 0; |
---|
.. | .. |
---|
146 | 153 | fence_ctx = 0; |
---|
147 | 154 | } |
---|
148 | 155 | |
---|
149 | | - if (!ring->ready) { |
---|
| 156 | + if (!ring->sched.ready) { |
---|
150 | 157 | dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name); |
---|
151 | 158 | return -EINVAL; |
---|
152 | 159 | } |
---|
153 | 160 | |
---|
154 | 161 | if (vm && !job->vmid) { |
---|
155 | 162 | dev_err(adev->dev, "VM IB without ID\n"); |
---|
| 163 | + return -EINVAL; |
---|
| 164 | + } |
---|
| 165 | + |
---|
| 166 | + if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) && |
---|
| 167 | + (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)) { |
---|
| 168 | + dev_err(adev->dev, "secure submissions not supported on compute rings\n"); |
---|
156 | 169 | return -EINVAL; |
---|
157 | 170 | } |
---|
158 | 171 | |
---|
.. | .. |
---|
167 | 180 | |
---|
168 | 181 | need_ctx_switch = ring->current_ctx != fence_ctx; |
---|
169 | 182 | if (ring->funcs->emit_pipeline_sync && job && |
---|
170 | | - ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) || |
---|
| 183 | + ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || |
---|
171 | 184 | (amdgpu_sriov_vf(adev) && need_ctx_switch) || |
---|
172 | 185 | amdgpu_vm_need_pipeline_sync(ring, job))) { |
---|
173 | 186 | need_pipe_sync = true; |
---|
| 187 | + |
---|
| 188 | + if (tmp) |
---|
| 189 | + trace_amdgpu_ib_pipe_sync(job, tmp); |
---|
| 190 | + |
---|
174 | 191 | dma_fence_put(tmp); |
---|
175 | 192 | } |
---|
| 193 | + |
---|
| 194 | + if ((ib->flags & AMDGPU_IB_FLAG_EMIT_MEM_SYNC) && ring->funcs->emit_mem_sync) |
---|
| 195 | + ring->funcs->emit_mem_sync(ring); |
---|
176 | 196 | |
---|
177 | 197 | if (ring->funcs->insert_start) |
---|
178 | 198 | ring->funcs->insert_start(ring); |
---|
.. | .. |
---|
198 | 218 | amdgpu_asic_flush_hdp(adev, ring); |
---|
199 | 219 | } |
---|
200 | 220 | |
---|
201 | | - skip_preamble = ring->current_ctx == fence_ctx; |
---|
202 | | - if (job && ring->funcs->emit_cntxcntl) { |
---|
203 | | - if (need_ctx_switch) |
---|
204 | | - status |= AMDGPU_HAVE_CTX_SWITCH; |
---|
205 | | - status |= job->preamble_status; |
---|
| 221 | + if (need_ctx_switch) |
---|
| 222 | + status |= AMDGPU_HAVE_CTX_SWITCH; |
---|
206 | 223 | |
---|
| 224 | + if (job && ring->funcs->emit_cntxcntl) { |
---|
| 225 | + status |= job->preamble_status; |
---|
| 226 | + status |= job->preemption_status; |
---|
207 | 227 | amdgpu_ring_emit_cntxcntl(ring, status); |
---|
| 228 | + } |
---|
| 229 | + |
---|
| 230 | + /* Setup initial TMZiness and send it off. |
---|
| 231 | + */ |
---|
| 232 | + secure = false; |
---|
| 233 | + if (job && ring->funcs->emit_frame_cntl) { |
---|
| 234 | + secure = ib->flags & AMDGPU_IB_FLAGS_SECURE; |
---|
| 235 | + amdgpu_ring_emit_frame_cntl(ring, true, secure); |
---|
208 | 236 | } |
---|
209 | 237 | |
---|
210 | 238 | for (i = 0; i < num_ibs; ++i) { |
---|
211 | 239 | ib = &ibs[i]; |
---|
212 | 240 | |
---|
213 | | - /* drop preamble IBs if we don't have a context switch */ |
---|
214 | | - if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && |
---|
215 | | - skip_preamble && |
---|
216 | | - !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) && |
---|
217 | | - !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ |
---|
218 | | - continue; |
---|
| 241 | + if (job && ring->funcs->emit_frame_cntl) { |
---|
| 242 | + if (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) { |
---|
| 243 | + amdgpu_ring_emit_frame_cntl(ring, false, secure); |
---|
| 244 | + secure = !secure; |
---|
| 245 | + amdgpu_ring_emit_frame_cntl(ring, true, secure); |
---|
| 246 | + } |
---|
| 247 | + } |
---|
219 | 248 | |
---|
220 | | - amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0, |
---|
221 | | - need_ctx_switch); |
---|
222 | | - need_ctx_switch = false; |
---|
| 249 | + amdgpu_ring_emit_ib(ring, job, ib, status); |
---|
| 250 | + status &= ~AMDGPU_HAVE_CTX_SWITCH; |
---|
223 | 251 | } |
---|
224 | 252 | |
---|
225 | | - if (ring->funcs->emit_tmz) |
---|
226 | | - amdgpu_ring_emit_tmz(ring, false); |
---|
| 253 | + if (job && ring->funcs->emit_frame_cntl) |
---|
| 254 | + amdgpu_ring_emit_frame_cntl(ring, false, secure); |
---|
227 | 255 | |
---|
228 | 256 | #ifdef CONFIG_X86_64 |
---|
229 | 257 | if (!(adev->flags & AMD_IS_APU)) |
---|
.. | .. |
---|
272 | 300 | */ |
---|
273 | 301 | int amdgpu_ib_pool_init(struct amdgpu_device *adev) |
---|
274 | 302 | { |
---|
275 | | - int r; |
---|
| 303 | + unsigned size; |
---|
| 304 | + int r, i; |
---|
276 | 305 | |
---|
277 | | - if (adev->ib_pool_ready) { |
---|
| 306 | + if (adev->ib_pool_ready) |
---|
278 | 307 | return 0; |
---|
279 | | - } |
---|
280 | | - r = amdgpu_sa_bo_manager_init(adev, &adev->ring_tmp_bo, |
---|
281 | | - AMDGPU_IB_POOL_SIZE*64*1024, |
---|
282 | | - AMDGPU_GPU_PAGE_SIZE, |
---|
283 | | - AMDGPU_GEM_DOMAIN_GTT); |
---|
284 | | - if (r) { |
---|
285 | | - return r; |
---|
286 | | - } |
---|
287 | 308 | |
---|
288 | | - adev->ib_pool_ready = true; |
---|
289 | | - if (amdgpu_debugfs_sa_init(adev)) { |
---|
290 | | - dev_err(adev->dev, "failed to register debugfs file for SA\n"); |
---|
| 309 | + for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) { |
---|
| 310 | + if (i == AMDGPU_IB_POOL_DIRECT) |
---|
| 311 | + size = PAGE_SIZE * 2; |
---|
| 312 | + else |
---|
| 313 | + size = AMDGPU_IB_POOL_SIZE; |
---|
| 314 | + |
---|
| 315 | + r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i], |
---|
| 316 | + size, AMDGPU_GPU_PAGE_SIZE, |
---|
| 317 | + AMDGPU_GEM_DOMAIN_GTT); |
---|
| 318 | + if (r) |
---|
| 319 | + goto error; |
---|
291 | 320 | } |
---|
| 321 | + adev->ib_pool_ready = true; |
---|
| 322 | + |
---|
292 | 323 | return 0; |
---|
| 324 | + |
---|
| 325 | +error: |
---|
| 326 | + while (i--) |
---|
| 327 | + amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]); |
---|
| 328 | + return r; |
---|
293 | 329 | } |
---|
294 | 330 | |
---|
295 | 331 | /** |
---|
.. | .. |
---|
302 | 338 | */ |
---|
303 | 339 | void amdgpu_ib_pool_fini(struct amdgpu_device *adev) |
---|
304 | 340 | { |
---|
305 | | - if (adev->ib_pool_ready) { |
---|
306 | | - amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo); |
---|
307 | | - adev->ib_pool_ready = false; |
---|
308 | | - } |
---|
| 341 | + int i; |
---|
| 342 | + |
---|
| 343 | + if (!adev->ib_pool_ready) |
---|
| 344 | + return; |
---|
| 345 | + |
---|
| 346 | + for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) |
---|
| 347 | + amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]); |
---|
| 348 | + adev->ib_pool_ready = false; |
---|
309 | 349 | } |
---|
310 | 350 | |
---|
311 | 351 | /** |
---|
.. | .. |
---|
320 | 360 | */ |
---|
321 | 361 | int amdgpu_ib_ring_tests(struct amdgpu_device *adev) |
---|
322 | 362 | { |
---|
323 | | - unsigned i; |
---|
324 | | - int r, ret = 0; |
---|
325 | 363 | long tmo_gfx, tmo_mm; |
---|
| 364 | + int r, ret = 0; |
---|
| 365 | + unsigned i; |
---|
326 | 366 | |
---|
327 | 367 | tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT; |
---|
328 | 368 | if (amdgpu_sriov_vf(adev)) { |
---|
.. | .. |
---|
341 | 381 | * cost waiting for it coming back under RUNTIME only |
---|
342 | 382 | */ |
---|
343 | 383 | tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT; |
---|
| 384 | + } else if (adev->gmc.xgmi.hive_id) { |
---|
| 385 | + tmo_gfx = AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT; |
---|
344 | 386 | } |
---|
345 | 387 | |
---|
346 | | - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
---|
| 388 | + for (i = 0; i < adev->num_rings; ++i) { |
---|
347 | 389 | struct amdgpu_ring *ring = adev->rings[i]; |
---|
348 | 390 | long tmo; |
---|
349 | 391 | |
---|
350 | | - if (!ring || !ring->ready) |
---|
| 392 | + /* KIQ rings don't have an IB test because we never submit IBs |
---|
| 393 | + * to them and they have no interrupt support. |
---|
| 394 | + */ |
---|
| 395 | + if (!ring->sched.ready || !ring->funcs->test_ib) |
---|
351 | 396 | continue; |
---|
352 | 397 | |
---|
353 | 398 | /* MM engine need more time */ |
---|
.. | .. |
---|
362 | 407 | tmo = tmo_gfx; |
---|
363 | 408 | |
---|
364 | 409 | r = amdgpu_ring_test_ib(ring, tmo); |
---|
365 | | - if (r) { |
---|
366 | | - ring->ready = false; |
---|
| 410 | + if (!r) { |
---|
| 411 | + DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n", |
---|
| 412 | + ring->name); |
---|
| 413 | + continue; |
---|
| 414 | + } |
---|
367 | 415 | |
---|
368 | | - if (ring == &adev->gfx.gfx_ring[0]) { |
---|
369 | | - /* oh, oh, that's really bad */ |
---|
370 | | - DRM_ERROR("amdgpu: failed testing IB on GFX ring (%d).\n", r); |
---|
371 | | - adev->accel_working = false; |
---|
372 | | - return r; |
---|
| 416 | + ring->sched.ready = false; |
---|
| 417 | + DRM_DEV_ERROR(adev->dev, "IB test failed on %s (%d).\n", |
---|
| 418 | + ring->name, r); |
---|
373 | 419 | |
---|
374 | | - } else { |
---|
375 | | - /* still not good, but we can live with it */ |
---|
376 | | - DRM_ERROR("amdgpu: failed testing IB on ring %d (%d).\n", i, r); |
---|
377 | | - ret = r; |
---|
378 | | - } |
---|
| 420 | + if (ring == &adev->gfx.gfx_ring[0]) { |
---|
| 421 | + /* oh, oh, that's really bad */ |
---|
| 422 | + adev->accel_working = false; |
---|
| 423 | + return r; |
---|
| 424 | + |
---|
| 425 | + } else { |
---|
| 426 | + ret = r; |
---|
379 | 427 | } |
---|
380 | 428 | } |
---|
381 | 429 | return ret; |
---|
.. | .. |
---|
390 | 438 | { |
---|
391 | 439 | struct drm_info_node *node = (struct drm_info_node *) m->private; |
---|
392 | 440 | struct drm_device *dev = node->minor->dev; |
---|
393 | | - struct amdgpu_device *adev = dev->dev_private; |
---|
| 441 | + struct amdgpu_device *adev = drm_to_adev(dev); |
---|
394 | 442 | |
---|
395 | | - amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo, m); |
---|
| 443 | + seq_printf(m, "--------------------- DELAYED --------------------- \n"); |
---|
| 444 | + amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED], |
---|
| 445 | + m); |
---|
| 446 | + seq_printf(m, "-------------------- IMMEDIATE -------------------- \n"); |
---|
| 447 | + amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_IMMEDIATE], |
---|
| 448 | + m); |
---|
| 449 | + seq_printf(m, "--------------------- DIRECT ---------------------- \n"); |
---|
| 450 | + amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DIRECT], m); |
---|
396 | 451 | |
---|
397 | 452 | return 0; |
---|
398 | | - |
---|
399 | 453 | } |
---|
400 | 454 | |
---|
401 | 455 | static const struct drm_info_list amdgpu_debugfs_sa_list[] = { |
---|
.. | .. |
---|
404 | 458 | |
---|
405 | 459 | #endif |
---|
406 | 460 | |
---|
407 | | -static int amdgpu_debugfs_sa_init(struct amdgpu_device *adev) |
---|
| 461 | +int amdgpu_debugfs_sa_init(struct amdgpu_device *adev) |
---|
408 | 462 | { |
---|
409 | 463 | #if defined(CONFIG_DEBUG_FS) |
---|
410 | | - return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_sa_list, 1); |
---|
| 464 | + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_sa_list, |
---|
| 465 | + ARRAY_SIZE(amdgpu_debugfs_sa_list)); |
---|
411 | 466 | #else |
---|
412 | 467 | return 0; |
---|
413 | 468 | #endif |
---|