| .. | .. |
|---|
| 28 | 28 | */ |
|---|
| 29 | 29 | #include <linux/seq_file.h> |
|---|
| 30 | 30 | #include <linux/slab.h> |
|---|
| 31 | | -#include <drm/drmP.h> |
|---|
| 31 | + |
|---|
| 32 | 32 | #include <drm/amdgpu_drm.h> |
|---|
| 33 | +#include <drm/drm_debugfs.h> |
|---|
| 34 | + |
|---|
| 33 | 35 | #include "amdgpu.h" |
|---|
| 34 | 36 | #include "atom.h" |
|---|
| 37 | +#include "amdgpu_trace.h" |
|---|
| 35 | 38 | |
|---|
| 36 | 39 | #define AMDGPU_IB_TEST_TIMEOUT msecs_to_jiffies(1000) |
|---|
| 40 | +#define AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT msecs_to_jiffies(2000) |
|---|
| 37 | 41 | |
|---|
| 38 | 42 | /* |
|---|
| 39 | 43 | * IB |
|---|
| .. | .. |
|---|
| 44 | 48 | * produce command buffers which are send to the kernel and |
|---|
| 45 | 49 | * put in IBs for execution by the requested ring. |
|---|
| 46 | 50 | */ |
|---|
| 47 | | -static int amdgpu_debugfs_sa_init(struct amdgpu_device *adev); |
|---|
| 48 | 51 | |
|---|
| 49 | 52 | /** |
|---|
| 50 | 53 | * amdgpu_ib_get - request an IB (Indirect Buffer) |
|---|
| .. | .. |
|---|
| 58 | 61 | * Returns 0 on success, error on failure. |
|---|
| 59 | 62 | */ |
|---|
| 60 | 63 | int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
|---|
| 61 | | - unsigned size, struct amdgpu_ib *ib) |
|---|
| 64 | + unsigned size, enum amdgpu_ib_pool_type pool_type, |
|---|
| 65 | + struct amdgpu_ib *ib) |
|---|
| 62 | 66 | { |
|---|
| 63 | 67 | int r; |
|---|
| 64 | 68 | |
|---|
| 65 | 69 | if (size) { |
|---|
| 66 | | - r = amdgpu_sa_bo_new(&adev->ring_tmp_bo, |
|---|
| 70 | + r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type], |
|---|
| 67 | 71 | &ib->sa_bo, size, 256); |
|---|
| 68 | 72 | if (r) { |
|---|
| 69 | 73 | dev_err(adev->dev, "failed to get a new IB (%d)\n", r); |
|---|
| .. | .. |
|---|
| 71 | 75 | } |
|---|
| 72 | 76 | |
|---|
| 73 | 77 | ib->ptr = amdgpu_sa_bo_cpu_addr(ib->sa_bo); |
|---|
| 78 | + /* flush the cache before commit the IB */ |
|---|
| 79 | + ib->flags = AMDGPU_IB_FLAG_EMIT_MEM_SYNC; |
|---|
| 74 | 80 | |
|---|
| 75 | 81 | if (!vm) |
|---|
| 76 | 82 | ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); |
|---|
| .. | .. |
|---|
| 122 | 128 | struct amdgpu_device *adev = ring->adev; |
|---|
| 123 | 129 | struct amdgpu_ib *ib = &ibs[0]; |
|---|
| 124 | 130 | struct dma_fence *tmp = NULL; |
|---|
| 125 | | - bool skip_preamble, need_ctx_switch; |
|---|
| 131 | + bool need_ctx_switch; |
|---|
| 126 | 132 | unsigned patch_offset = ~0; |
|---|
| 127 | 133 | struct amdgpu_vm *vm; |
|---|
| 128 | 134 | uint64_t fence_ctx; |
|---|
| 129 | 135 | uint32_t status = 0, alloc_size; |
|---|
| 130 | 136 | unsigned fence_flags = 0; |
|---|
| 137 | + bool secure; |
|---|
| 131 | 138 | |
|---|
| 132 | 139 | unsigned i; |
|---|
| 133 | 140 | int r = 0; |
|---|
| .. | .. |
|---|
| 146 | 153 | fence_ctx = 0; |
|---|
| 147 | 154 | } |
|---|
| 148 | 155 | |
|---|
| 149 | | - if (!ring->ready) { |
|---|
| 156 | + if (!ring->sched.ready) { |
|---|
| 150 | 157 | dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name); |
|---|
| 151 | 158 | return -EINVAL; |
|---|
| 152 | 159 | } |
|---|
| 153 | 160 | |
|---|
| 154 | 161 | if (vm && !job->vmid) { |
|---|
| 155 | 162 | dev_err(adev->dev, "VM IB without ID\n"); |
|---|
| 163 | + return -EINVAL; |
|---|
| 164 | + } |
|---|
| 165 | + |
|---|
| 166 | + if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) && |
|---|
| 167 | + (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)) { |
|---|
| 168 | + dev_err(adev->dev, "secure submissions not supported on compute rings\n"); |
|---|
| 156 | 169 | return -EINVAL; |
|---|
| 157 | 170 | } |
|---|
| 158 | 171 | |
|---|
| .. | .. |
|---|
| 167 | 180 | |
|---|
| 168 | 181 | need_ctx_switch = ring->current_ctx != fence_ctx; |
|---|
| 169 | 182 | if (ring->funcs->emit_pipeline_sync && job && |
|---|
| 170 | | - ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) || |
|---|
| 183 | + ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || |
|---|
| 171 | 184 | (amdgpu_sriov_vf(adev) && need_ctx_switch) || |
|---|
| 172 | 185 | amdgpu_vm_need_pipeline_sync(ring, job))) { |
|---|
| 173 | 186 | need_pipe_sync = true; |
|---|
| 187 | + |
|---|
| 188 | + if (tmp) |
|---|
| 189 | + trace_amdgpu_ib_pipe_sync(job, tmp); |
|---|
| 190 | + |
|---|
| 174 | 191 | dma_fence_put(tmp); |
|---|
| 175 | 192 | } |
|---|
| 193 | + |
|---|
| 194 | + if ((ib->flags & AMDGPU_IB_FLAG_EMIT_MEM_SYNC) && ring->funcs->emit_mem_sync) |
|---|
| 195 | + ring->funcs->emit_mem_sync(ring); |
|---|
| 176 | 196 | |
|---|
| 177 | 197 | if (ring->funcs->insert_start) |
|---|
| 178 | 198 | ring->funcs->insert_start(ring); |
|---|
| .. | .. |
|---|
| 198 | 218 | amdgpu_asic_flush_hdp(adev, ring); |
|---|
| 199 | 219 | } |
|---|
| 200 | 220 | |
|---|
| 201 | | - skip_preamble = ring->current_ctx == fence_ctx; |
|---|
| 202 | | - if (job && ring->funcs->emit_cntxcntl) { |
|---|
| 203 | | - if (need_ctx_switch) |
|---|
| 204 | | - status |= AMDGPU_HAVE_CTX_SWITCH; |
|---|
| 205 | | - status |= job->preamble_status; |
|---|
| 221 | + if (need_ctx_switch) |
|---|
| 222 | + status |= AMDGPU_HAVE_CTX_SWITCH; |
|---|
| 206 | 223 | |
|---|
| 224 | + if (job && ring->funcs->emit_cntxcntl) { |
|---|
| 225 | + status |= job->preamble_status; |
|---|
| 226 | + status |= job->preemption_status; |
|---|
| 207 | 227 | amdgpu_ring_emit_cntxcntl(ring, status); |
|---|
| 228 | + } |
|---|
| 229 | + |
|---|
| 230 | + /* Setup initial TMZiness and send it off. |
|---|
| 231 | + */ |
|---|
| 232 | + secure = false; |
|---|
| 233 | + if (job && ring->funcs->emit_frame_cntl) { |
|---|
| 234 | + secure = ib->flags & AMDGPU_IB_FLAGS_SECURE; |
|---|
| 235 | + amdgpu_ring_emit_frame_cntl(ring, true, secure); |
|---|
| 208 | 236 | } |
|---|
| 209 | 237 | |
|---|
| 210 | 238 | for (i = 0; i < num_ibs; ++i) { |
|---|
| 211 | 239 | ib = &ibs[i]; |
|---|
| 212 | 240 | |
|---|
| 213 | | - /* drop preamble IBs if we don't have a context switch */ |
|---|
| 214 | | - if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && |
|---|
| 215 | | - skip_preamble && |
|---|
| 216 | | - !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) && |
|---|
| 217 | | - !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ |
|---|
| 218 | | - continue; |
|---|
| 241 | + if (job && ring->funcs->emit_frame_cntl) { |
|---|
| 242 | + if (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) { |
|---|
| 243 | + amdgpu_ring_emit_frame_cntl(ring, false, secure); |
|---|
| 244 | + secure = !secure; |
|---|
| 245 | + amdgpu_ring_emit_frame_cntl(ring, true, secure); |
|---|
| 246 | + } |
|---|
| 247 | + } |
|---|
| 219 | 248 | |
|---|
| 220 | | - amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0, |
|---|
| 221 | | - need_ctx_switch); |
|---|
| 222 | | - need_ctx_switch = false; |
|---|
| 249 | + amdgpu_ring_emit_ib(ring, job, ib, status); |
|---|
| 250 | + status &= ~AMDGPU_HAVE_CTX_SWITCH; |
|---|
| 223 | 251 | } |
|---|
| 224 | 252 | |
|---|
| 225 | | - if (ring->funcs->emit_tmz) |
|---|
| 226 | | - amdgpu_ring_emit_tmz(ring, false); |
|---|
| 253 | + if (job && ring->funcs->emit_frame_cntl) |
|---|
| 254 | + amdgpu_ring_emit_frame_cntl(ring, false, secure); |
|---|
| 227 | 255 | |
|---|
| 228 | 256 | #ifdef CONFIG_X86_64 |
|---|
| 229 | 257 | if (!(adev->flags & AMD_IS_APU)) |
|---|
| .. | .. |
|---|
| 272 | 300 | */ |
|---|
| 273 | 301 | int amdgpu_ib_pool_init(struct amdgpu_device *adev) |
|---|
| 274 | 302 | { |
|---|
| 275 | | - int r; |
|---|
| 303 | + unsigned size; |
|---|
| 304 | + int r, i; |
|---|
| 276 | 305 | |
|---|
| 277 | | - if (adev->ib_pool_ready) { |
|---|
| 306 | + if (adev->ib_pool_ready) |
|---|
| 278 | 307 | return 0; |
|---|
| 279 | | - } |
|---|
| 280 | | - r = amdgpu_sa_bo_manager_init(adev, &adev->ring_tmp_bo, |
|---|
| 281 | | - AMDGPU_IB_POOL_SIZE*64*1024, |
|---|
| 282 | | - AMDGPU_GPU_PAGE_SIZE, |
|---|
| 283 | | - AMDGPU_GEM_DOMAIN_GTT); |
|---|
| 284 | | - if (r) { |
|---|
| 285 | | - return r; |
|---|
| 286 | | - } |
|---|
| 287 | 308 | |
|---|
| 288 | | - adev->ib_pool_ready = true; |
|---|
| 289 | | - if (amdgpu_debugfs_sa_init(adev)) { |
|---|
| 290 | | - dev_err(adev->dev, "failed to register debugfs file for SA\n"); |
|---|
| 309 | + for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) { |
|---|
| 310 | + if (i == AMDGPU_IB_POOL_DIRECT) |
|---|
| 311 | + size = PAGE_SIZE * 2; |
|---|
| 312 | + else |
|---|
| 313 | + size = AMDGPU_IB_POOL_SIZE; |
|---|
| 314 | + |
|---|
| 315 | + r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i], |
|---|
| 316 | + size, AMDGPU_GPU_PAGE_SIZE, |
|---|
| 317 | + AMDGPU_GEM_DOMAIN_GTT); |
|---|
| 318 | + if (r) |
|---|
| 319 | + goto error; |
|---|
| 291 | 320 | } |
|---|
| 321 | + adev->ib_pool_ready = true; |
|---|
| 322 | + |
|---|
| 292 | 323 | return 0; |
|---|
| 324 | + |
|---|
| 325 | +error: |
|---|
| 326 | + while (i--) |
|---|
| 327 | + amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]); |
|---|
| 328 | + return r; |
|---|
| 293 | 329 | } |
|---|
| 294 | 330 | |
|---|
| 295 | 331 | /** |
|---|
| .. | .. |
|---|
| 302 | 338 | */ |
|---|
| 303 | 339 | void amdgpu_ib_pool_fini(struct amdgpu_device *adev) |
|---|
| 304 | 340 | { |
|---|
| 305 | | - if (adev->ib_pool_ready) { |
|---|
| 306 | | - amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo); |
|---|
| 307 | | - adev->ib_pool_ready = false; |
|---|
| 308 | | - } |
|---|
| 341 | + int i; |
|---|
| 342 | + |
|---|
| 343 | + if (!adev->ib_pool_ready) |
|---|
| 344 | + return; |
|---|
| 345 | + |
|---|
| 346 | + for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) |
|---|
| 347 | + amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]); |
|---|
| 348 | + adev->ib_pool_ready = false; |
|---|
| 309 | 349 | } |
|---|
| 310 | 350 | |
|---|
| 311 | 351 | /** |
|---|
| .. | .. |
|---|
| 320 | 360 | */ |
|---|
| 321 | 361 | int amdgpu_ib_ring_tests(struct amdgpu_device *adev) |
|---|
| 322 | 362 | { |
|---|
| 323 | | - unsigned i; |
|---|
| 324 | | - int r, ret = 0; |
|---|
| 325 | 363 | long tmo_gfx, tmo_mm; |
|---|
| 364 | + int r, ret = 0; |
|---|
| 365 | + unsigned i; |
|---|
| 326 | 366 | |
|---|
| 327 | 367 | tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT; |
|---|
| 328 | 368 | if (amdgpu_sriov_vf(adev)) { |
|---|
| .. | .. |
|---|
| 341 | 381 | * cost waiting for it coming back under RUNTIME only |
|---|
| 342 | 382 | */ |
|---|
| 343 | 383 | tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT; |
|---|
| 384 | + } else if (adev->gmc.xgmi.hive_id) { |
|---|
| 385 | + tmo_gfx = AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT; |
|---|
| 344 | 386 | } |
|---|
| 345 | 387 | |
|---|
| 346 | | - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
|---|
| 388 | + for (i = 0; i < adev->num_rings; ++i) { |
|---|
| 347 | 389 | struct amdgpu_ring *ring = adev->rings[i]; |
|---|
| 348 | 390 | long tmo; |
|---|
| 349 | 391 | |
|---|
| 350 | | - if (!ring || !ring->ready) |
|---|
| 392 | + /* KIQ rings don't have an IB test because we never submit IBs |
|---|
| 393 | + * to them and they have no interrupt support. |
|---|
| 394 | + */ |
|---|
| 395 | + if (!ring->sched.ready || !ring->funcs->test_ib) |
|---|
| 351 | 396 | continue; |
|---|
| 352 | 397 | |
|---|
| 353 | 398 | /* MM engine need more time */ |
|---|
| .. | .. |
|---|
| 362 | 407 | tmo = tmo_gfx; |
|---|
| 363 | 408 | |
|---|
| 364 | 409 | r = amdgpu_ring_test_ib(ring, tmo); |
|---|
| 365 | | - if (r) { |
|---|
| 366 | | - ring->ready = false; |
|---|
| 410 | + if (!r) { |
|---|
| 411 | + DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n", |
|---|
| 412 | + ring->name); |
|---|
| 413 | + continue; |
|---|
| 414 | + } |
|---|
| 367 | 415 | |
|---|
| 368 | | - if (ring == &adev->gfx.gfx_ring[0]) { |
|---|
| 369 | | - /* oh, oh, that's really bad */ |
|---|
| 370 | | - DRM_ERROR("amdgpu: failed testing IB on GFX ring (%d).\n", r); |
|---|
| 371 | | - adev->accel_working = false; |
|---|
| 372 | | - return r; |
|---|
| 416 | + ring->sched.ready = false; |
|---|
| 417 | + DRM_DEV_ERROR(adev->dev, "IB test failed on %s (%d).\n", |
|---|
| 418 | + ring->name, r); |
|---|
| 373 | 419 | |
|---|
| 374 | | - } else { |
|---|
| 375 | | - /* still not good, but we can live with it */ |
|---|
| 376 | | - DRM_ERROR("amdgpu: failed testing IB on ring %d (%d).\n", i, r); |
|---|
| 377 | | - ret = r; |
|---|
| 378 | | - } |
|---|
| 420 | + if (ring == &adev->gfx.gfx_ring[0]) { |
|---|
| 421 | + /* oh, oh, that's really bad */ |
|---|
| 422 | + adev->accel_working = false; |
|---|
| 423 | + return r; |
|---|
| 424 | + |
|---|
| 425 | + } else { |
|---|
| 426 | + ret = r; |
|---|
| 379 | 427 | } |
|---|
| 380 | 428 | } |
|---|
| 381 | 429 | return ret; |
|---|
| .. | .. |
|---|
| 390 | 438 | { |
|---|
| 391 | 439 | struct drm_info_node *node = (struct drm_info_node *) m->private; |
|---|
| 392 | 440 | struct drm_device *dev = node->minor->dev; |
|---|
| 393 | | - struct amdgpu_device *adev = dev->dev_private; |
|---|
| 441 | + struct amdgpu_device *adev = drm_to_adev(dev); |
|---|
| 394 | 442 | |
|---|
| 395 | | - amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo, m); |
|---|
| 443 | + seq_printf(m, "--------------------- DELAYED --------------------- \n"); |
|---|
| 444 | + amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED], |
|---|
| 445 | + m); |
|---|
| 446 | + seq_printf(m, "-------------------- IMMEDIATE -------------------- \n"); |
|---|
| 447 | + amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_IMMEDIATE], |
|---|
| 448 | + m); |
|---|
| 449 | + seq_printf(m, "--------------------- DIRECT ---------------------- \n"); |
|---|
| 450 | + amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DIRECT], m); |
|---|
| 396 | 451 | |
|---|
| 397 | 452 | return 0; |
|---|
| 398 | | - |
|---|
| 399 | 453 | } |
|---|
| 400 | 454 | |
|---|
| 401 | 455 | static const struct drm_info_list amdgpu_debugfs_sa_list[] = { |
|---|
| .. | .. |
|---|
| 404 | 458 | |
|---|
| 405 | 459 | #endif |
|---|
| 406 | 460 | |
|---|
| 407 | | -static int amdgpu_debugfs_sa_init(struct amdgpu_device *adev) |
|---|
| 461 | +int amdgpu_debugfs_sa_init(struct amdgpu_device *adev) |
|---|
| 408 | 462 | { |
|---|
| 409 | 463 | #if defined(CONFIG_DEBUG_FS) |
|---|
| 410 | | - return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_sa_list, 1); |
|---|
| 464 | + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_sa_list, |
|---|
| 465 | + ARRAY_SIZE(amdgpu_debugfs_sa_list)); |
|---|
| 411 | 466 | #else |
|---|
| 412 | 467 | return 0; |
|---|
| 413 | 468 | #endif |
|---|