.. | .. |
---|
21 | 21 | * |
---|
22 | 22 | * Authors: Alex Deucher |
---|
23 | 23 | */ |
---|
| 24 | + |
---|
24 | 25 | #include <linux/firmware.h> |
---|
25 | | -#include <drm/drmP.h> |
---|
| 26 | +#include <linux/module.h> |
---|
| 27 | + |
---|
26 | 28 | #include "amdgpu.h" |
---|
27 | 29 | #include "amdgpu_ucode.h" |
---|
28 | 30 | #include "amdgpu_trace.h" |
---|
.. | .. |
---|
198 | 200 | |
---|
199 | 201 | static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) |
---|
200 | 202 | { |
---|
201 | | - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); |
---|
| 203 | + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); |
---|
202 | 204 | int i; |
---|
203 | 205 | |
---|
204 | 206 | for (i = 0; i < count; i++) |
---|
.. | .. |
---|
218 | 220 | * Schedule an IB in the DMA ring (CIK). |
---|
219 | 221 | */ |
---|
220 | 222 | static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, |
---|
| 223 | + struct amdgpu_job *job, |
---|
221 | 224 | struct amdgpu_ib *ib, |
---|
222 | | - unsigned vmid, bool ctx_switch) |
---|
| 225 | + uint32_t flags) |
---|
223 | 226 | { |
---|
| 227 | + unsigned vmid = AMDGPU_JOB_GET_VMID(job); |
---|
224 | 228 | u32 extra_bits = vmid & 0xf; |
---|
225 | 229 | |
---|
226 | 230 | /* IB packet must end on a 8 DW boundary */ |
---|
227 | | - cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8); |
---|
| 231 | + cik_sdma_ring_insert_nop(ring, (4 - lower_32_bits(ring->wptr)) & 7); |
---|
228 | 232 | |
---|
229 | 233 | amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); |
---|
230 | 234 | amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ |
---|
.. | .. |
---|
316 | 320 | WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); |
---|
317 | 321 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], 0); |
---|
318 | 322 | } |
---|
319 | | - sdma0->ready = false; |
---|
320 | | - sdma1->ready = false; |
---|
321 | 323 | } |
---|
322 | 324 | |
---|
323 | 325 | /** |
---|
.. | .. |
---|
494 | 496 | /* enable DMA IBs */ |
---|
495 | 497 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); |
---|
496 | 498 | |
---|
497 | | - ring->ready = true; |
---|
| 499 | + ring->sched.ready = true; |
---|
498 | 500 | } |
---|
499 | 501 | |
---|
500 | 502 | cik_sdma_enable(adev, true); |
---|
501 | 503 | |
---|
502 | 504 | for (i = 0; i < adev->sdma.num_instances; i++) { |
---|
503 | 505 | ring = &adev->sdma.instance[i].ring; |
---|
504 | | - r = amdgpu_ring_test_ring(ring); |
---|
505 | | - if (r) { |
---|
506 | | - ring->ready = false; |
---|
| 506 | + r = amdgpu_ring_test_helper(ring); |
---|
| 507 | + if (r) |
---|
507 | 508 | return r; |
---|
508 | | - } |
---|
509 | 509 | |
---|
510 | 510 | if (adev->mman.buffer_funcs_ring == ring) |
---|
511 | 511 | amdgpu_ttm_set_buffer_funcs_status(adev, true); |
---|
.. | .. |
---|
618 | 618 | u64 gpu_addr; |
---|
619 | 619 | |
---|
620 | 620 | r = amdgpu_device_wb_get(adev, &index); |
---|
621 | | - if (r) { |
---|
622 | | - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); |
---|
| 621 | + if (r) |
---|
623 | 622 | return r; |
---|
624 | | - } |
---|
625 | 623 | |
---|
626 | 624 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
---|
627 | 625 | tmp = 0xCAFEDEAD; |
---|
628 | 626 | adev->wb.wb[index] = cpu_to_le32(tmp); |
---|
629 | 627 | |
---|
630 | 628 | r = amdgpu_ring_alloc(ring, 5); |
---|
631 | | - if (r) { |
---|
632 | | - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); |
---|
633 | | - amdgpu_device_wb_free(adev, index); |
---|
634 | | - return r; |
---|
635 | | - } |
---|
| 629 | + if (r) |
---|
| 630 | + goto error_free_wb; |
---|
| 631 | + |
---|
636 | 632 | amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); |
---|
637 | 633 | amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); |
---|
638 | 634 | amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); |
---|
.. | .. |
---|
644 | 640 | tmp = le32_to_cpu(adev->wb.wb[index]); |
---|
645 | 641 | if (tmp == 0xDEADBEEF) |
---|
646 | 642 | break; |
---|
647 | | - DRM_UDELAY(1); |
---|
| 643 | + udelay(1); |
---|
648 | 644 | } |
---|
649 | 645 | |
---|
650 | | - if (i < adev->usec_timeout) { |
---|
651 | | - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); |
---|
652 | | - } else { |
---|
653 | | - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", |
---|
654 | | - ring->idx, tmp); |
---|
655 | | - r = -EINVAL; |
---|
656 | | - } |
---|
| 646 | + if (i >= adev->usec_timeout) |
---|
| 647 | + r = -ETIMEDOUT; |
---|
| 648 | + |
---|
| 649 | +error_free_wb: |
---|
657 | 650 | amdgpu_device_wb_free(adev, index); |
---|
658 | | - |
---|
659 | 651 | return r; |
---|
660 | 652 | } |
---|
661 | 653 | |
---|
.. | .. |
---|
678 | 670 | long r; |
---|
679 | 671 | |
---|
680 | 672 | r = amdgpu_device_wb_get(adev, &index); |
---|
681 | | - if (r) { |
---|
682 | | - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); |
---|
| 673 | + if (r) |
---|
683 | 674 | return r; |
---|
684 | | - } |
---|
685 | 675 | |
---|
686 | 676 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
---|
687 | 677 | tmp = 0xCAFEDEAD; |
---|
688 | 678 | adev->wb.wb[index] = cpu_to_le32(tmp); |
---|
689 | 679 | memset(&ib, 0, sizeof(ib)); |
---|
690 | | - r = amdgpu_ib_get(adev, NULL, 256, &ib); |
---|
691 | | - if (r) { |
---|
692 | | - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); |
---|
| 680 | + r = amdgpu_ib_get(adev, NULL, 256, |
---|
| 681 | + AMDGPU_IB_POOL_DIRECT, &ib); |
---|
| 682 | + if (r) |
---|
693 | 683 | goto err0; |
---|
694 | | - } |
---|
695 | 684 | |
---|
696 | 685 | ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, |
---|
697 | 686 | SDMA_WRITE_SUB_OPCODE_LINEAR, 0); |
---|
.. | .. |
---|
706 | 695 | |
---|
707 | 696 | r = dma_fence_wait_timeout(f, false, timeout); |
---|
708 | 697 | if (r == 0) { |
---|
709 | | - DRM_ERROR("amdgpu: IB test timed out\n"); |
---|
710 | 698 | r = -ETIMEDOUT; |
---|
711 | 699 | goto err1; |
---|
712 | 700 | } else if (r < 0) { |
---|
713 | | - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); |
---|
714 | 701 | goto err1; |
---|
715 | 702 | } |
---|
716 | 703 | tmp = le32_to_cpu(adev->wb.wb[index]); |
---|
717 | | - if (tmp == 0xDEADBEEF) { |
---|
718 | | - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); |
---|
| 704 | + if (tmp == 0xDEADBEEF) |
---|
719 | 705 | r = 0; |
---|
720 | | - } else { |
---|
721 | | - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); |
---|
| 706 | + else |
---|
722 | 707 | r = -EINVAL; |
---|
723 | | - } |
---|
724 | 708 | |
---|
725 | 709 | err1: |
---|
726 | 710 | amdgpu_ib_free(adev, &ib, NULL); |
---|
.. | .. |
---|
822 | 806 | */ |
---|
823 | 807 | static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) |
---|
824 | 808 | { |
---|
825 | | - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); |
---|
| 809 | + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); |
---|
826 | 810 | u32 pad_count; |
---|
827 | 811 | int i; |
---|
828 | 812 | |
---|
829 | | - pad_count = (8 - (ib->length_dw & 0x7)) % 8; |
---|
| 813 | + pad_count = (-ib->length_dw) & 7; |
---|
830 | 814 | for (i = 0; i < pad_count; i++) |
---|
831 | 815 | if (sdma && sdma->burst_nop && (i == 0)) |
---|
832 | 816 | ib->ptr[ib->length_dw++] = |
---|
.. | .. |
---|
970 | 954 | } |
---|
971 | 955 | |
---|
972 | 956 | /* SDMA trap event */ |
---|
973 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224, |
---|
| 957 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224, |
---|
974 | 958 | &adev->sdma.trap_irq); |
---|
975 | 959 | if (r) |
---|
976 | 960 | return r; |
---|
977 | 961 | |
---|
978 | 962 | /* SDMA Privileged inst */ |
---|
979 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241, |
---|
| 963 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241, |
---|
980 | 964 | &adev->sdma.illegal_inst_irq); |
---|
981 | 965 | if (r) |
---|
982 | 966 | return r; |
---|
983 | 967 | |
---|
984 | 968 | /* SDMA Privileged inst */ |
---|
985 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 247, |
---|
| 969 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 247, |
---|
986 | 970 | &adev->sdma.illegal_inst_irq); |
---|
987 | 971 | if (r) |
---|
988 | 972 | return r; |
---|
.. | .. |
---|
994 | 978 | r = amdgpu_ring_init(adev, ring, 1024, |
---|
995 | 979 | &adev->sdma.trap_irq, |
---|
996 | 980 | (i == 0) ? |
---|
997 | | - AMDGPU_SDMA_IRQ_TRAP0 : |
---|
998 | | - AMDGPU_SDMA_IRQ_TRAP1); |
---|
| 981 | + AMDGPU_SDMA_IRQ_INSTANCE0 : |
---|
| 982 | + AMDGPU_SDMA_IRQ_INSTANCE1, |
---|
| 983 | + AMDGPU_RING_PRIO_DEFAULT); |
---|
999 | 984 | if (r) |
---|
1000 | 985 | return r; |
---|
1001 | 986 | } |
---|
.. | .. |
---|
1128 | 1113 | u32 sdma_cntl; |
---|
1129 | 1114 | |
---|
1130 | 1115 | switch (type) { |
---|
1131 | | - case AMDGPU_SDMA_IRQ_TRAP0: |
---|
| 1116 | + case AMDGPU_SDMA_IRQ_INSTANCE0: |
---|
1132 | 1117 | switch (state) { |
---|
1133 | 1118 | case AMDGPU_IRQ_STATE_DISABLE: |
---|
1134 | 1119 | sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); |
---|
.. | .. |
---|
1144 | 1129 | break; |
---|
1145 | 1130 | } |
---|
1146 | 1131 | break; |
---|
1147 | | - case AMDGPU_SDMA_IRQ_TRAP1: |
---|
| 1132 | + case AMDGPU_SDMA_IRQ_INSTANCE1: |
---|
1148 | 1133 | switch (state) { |
---|
1149 | 1134 | case AMDGPU_IRQ_STATE_DISABLE: |
---|
1150 | 1135 | sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); |
---|
.. | .. |
---|
1211 | 1196 | struct amdgpu_irq_src *source, |
---|
1212 | 1197 | struct amdgpu_iv_entry *entry) |
---|
1213 | 1198 | { |
---|
| 1199 | + u8 instance_id; |
---|
| 1200 | + |
---|
1214 | 1201 | DRM_ERROR("Illegal instruction in SDMA command stream\n"); |
---|
1215 | | - schedule_work(&adev->reset_work); |
---|
| 1202 | + instance_id = (entry->ring_id & 0x3) >> 0; |
---|
| 1203 | + drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched); |
---|
1216 | 1204 | return 0; |
---|
1217 | 1205 | } |
---|
1218 | 1206 | |
---|
.. | .. |
---|
1322 | 1310 | static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, |
---|
1323 | 1311 | uint64_t src_offset, |
---|
1324 | 1312 | uint64_t dst_offset, |
---|
1325 | | - uint32_t byte_count) |
---|
| 1313 | + uint32_t byte_count, |
---|
| 1314 | + bool tmz) |
---|
1326 | 1315 | { |
---|
1327 | 1316 | ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); |
---|
1328 | 1317 | ib->ptr[ib->length_dw++] = byte_count; |
---|
.. | .. |
---|
1367 | 1356 | |
---|
1368 | 1357 | static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev) |
---|
1369 | 1358 | { |
---|
1370 | | - if (adev->mman.buffer_funcs == NULL) { |
---|
1371 | | - adev->mman.buffer_funcs = &cik_sdma_buffer_funcs; |
---|
1372 | | - adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; |
---|
1373 | | - } |
---|
| 1359 | + adev->mman.buffer_funcs = &cik_sdma_buffer_funcs; |
---|
| 1360 | + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; |
---|
1374 | 1361 | } |
---|
1375 | 1362 | |
---|
1376 | 1363 | static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { |
---|
.. | .. |
---|
1385 | 1372 | { |
---|
1386 | 1373 | unsigned i; |
---|
1387 | 1374 | |
---|
1388 | | - if (adev->vm_manager.vm_pte_funcs == NULL) { |
---|
1389 | | - adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; |
---|
1390 | | - for (i = 0; i < adev->sdma.num_instances; i++) |
---|
1391 | | - adev->vm_manager.vm_pte_rings[i] = |
---|
1392 | | - &adev->sdma.instance[i].ring; |
---|
1393 | | - |
---|
1394 | | - adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; |
---|
| 1375 | + adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; |
---|
| 1376 | + for (i = 0; i < adev->sdma.num_instances; i++) { |
---|
| 1377 | + adev->vm_manager.vm_pte_scheds[i] = |
---|
| 1378 | + &adev->sdma.instance[i].ring.sched; |
---|
1395 | 1379 | } |
---|
| 1380 | + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; |
---|
1396 | 1381 | } |
---|
1397 | 1382 | |
---|
1398 | 1383 | const struct amdgpu_ip_block_version cik_sdma_ip_block = |
---|