.. | .. |
---|
21 | 21 | * |
---|
22 | 22 | * Authors: Alex Deucher |
---|
23 | 23 | */ |
---|
| 24 | + |
---|
| 25 | +#include <linux/delay.h> |
---|
24 | 26 | #include <linux/firmware.h> |
---|
25 | | -#include <drm/drmP.h> |
---|
| 27 | +#include <linux/module.h> |
---|
| 28 | + |
---|
26 | 29 | #include "amdgpu.h" |
---|
27 | 30 | #include "amdgpu_ucode.h" |
---|
28 | 31 | #include "amdgpu_trace.h" |
---|
.. | .. |
---|
318 | 321 | if (adev->sdma.instance[i].feature_version >= 20) |
---|
319 | 322 | adev->sdma.instance[i].burst_nop = true; |
---|
320 | 323 | |
---|
321 | | - if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { |
---|
322 | | - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; |
---|
323 | | - info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; |
---|
324 | | - info->fw = adev->sdma.instance[i].fw; |
---|
325 | | - header = (const struct common_firmware_header *)info->fw->data; |
---|
326 | | - adev->firmware.fw_size += |
---|
327 | | - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); |
---|
328 | | - } |
---|
| 324 | + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; |
---|
| 325 | + info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; |
---|
| 326 | + info->fw = adev->sdma.instance[i].fw; |
---|
| 327 | + header = (const struct common_firmware_header *)info->fw->data; |
---|
| 328 | + adev->firmware.fw_size += |
---|
| 329 | + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); |
---|
| 330 | + |
---|
329 | 331 | } |
---|
330 | 332 | out: |
---|
331 | 333 | if (err) { |
---|
.. | .. |
---|
400 | 402 | |
---|
401 | 403 | static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) |
---|
402 | 404 | { |
---|
403 | | - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); |
---|
| 405 | + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); |
---|
404 | 406 | int i; |
---|
405 | 407 | |
---|
406 | 408 | for (i = 0; i < count; i++) |
---|
.. | .. |
---|
420 | 422 | * Schedule an IB in the DMA ring (VI). |
---|
421 | 423 | */ |
---|
422 | 424 | static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, |
---|
| 425 | + struct amdgpu_job *job, |
---|
423 | 426 | struct amdgpu_ib *ib, |
---|
424 | | - unsigned vmid, bool ctx_switch) |
---|
| 427 | + uint32_t flags) |
---|
425 | 428 | { |
---|
| 429 | + unsigned vmid = AMDGPU_JOB_GET_VMID(job); |
---|
| 430 | + |
---|
426 | 431 | /* IB packet must end on a 8 DW boundary */ |
---|
427 | | - sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); |
---|
| 432 | + sdma_v3_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); |
---|
428 | 433 | |
---|
429 | 434 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | |
---|
430 | 435 | SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); |
---|
.. | .. |
---|
524 | 529 | ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); |
---|
525 | 530 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); |
---|
526 | 531 | } |
---|
527 | | - sdma0->ready = false; |
---|
528 | | - sdma1->ready = false; |
---|
529 | 532 | } |
---|
530 | 533 | |
---|
531 | 534 | /** |
---|
.. | .. |
---|
740 | 743 | /* enable DMA IBs */ |
---|
741 | 744 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); |
---|
742 | 745 | |
---|
743 | | - ring->ready = true; |
---|
| 746 | + ring->sched.ready = true; |
---|
744 | 747 | } |
---|
745 | 748 | |
---|
746 | 749 | /* unhalt the MEs */ |
---|
.. | .. |
---|
750 | 753 | |
---|
751 | 754 | for (i = 0; i < adev->sdma.num_instances; i++) { |
---|
752 | 755 | ring = &adev->sdma.instance[i].ring; |
---|
753 | | - r = amdgpu_ring_test_ring(ring); |
---|
754 | | - if (r) { |
---|
755 | | - ring->ready = false; |
---|
| 756 | + r = amdgpu_ring_test_helper(ring); |
---|
| 757 | + if (r) |
---|
756 | 758 | return r; |
---|
757 | | - } |
---|
758 | 759 | |
---|
759 | 760 | if (adev->mman.buffer_funcs_ring == ring) |
---|
760 | 761 | amdgpu_ttm_set_buffer_funcs_status(adev, true); |
---|
.. | .. |
---|
778 | 779 | } |
---|
779 | 780 | |
---|
780 | 781 | /** |
---|
781 | | - * sdma_v3_0_load_microcode - load the sDMA ME ucode |
---|
782 | | - * |
---|
783 | | - * @adev: amdgpu_device pointer |
---|
784 | | - * |
---|
785 | | - * Loads the sDMA0/1 ucode. |
---|
786 | | - * Returns 0 for success, -EINVAL if the ucode is not available. |
---|
787 | | - */ |
---|
788 | | -static int sdma_v3_0_load_microcode(struct amdgpu_device *adev) |
---|
789 | | -{ |
---|
790 | | - const struct sdma_firmware_header_v1_0 *hdr; |
---|
791 | | - const __le32 *fw_data; |
---|
792 | | - u32 fw_size; |
---|
793 | | - int i, j; |
---|
794 | | - |
---|
795 | | - /* halt the MEs */ |
---|
796 | | - sdma_v3_0_enable(adev, false); |
---|
797 | | - |
---|
798 | | - for (i = 0; i < adev->sdma.num_instances; i++) { |
---|
799 | | - if (!adev->sdma.instance[i].fw) |
---|
800 | | - return -EINVAL; |
---|
801 | | - hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; |
---|
802 | | - amdgpu_ucode_print_sdma_hdr(&hdr->header); |
---|
803 | | - fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; |
---|
804 | | - fw_data = (const __le32 *) |
---|
805 | | - (adev->sdma.instance[i].fw->data + |
---|
806 | | - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
---|
807 | | - WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0); |
---|
808 | | - for (j = 0; j < fw_size; j++) |
---|
809 | | - WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++)); |
---|
810 | | - WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma.instance[i].fw_version); |
---|
811 | | - } |
---|
812 | | - |
---|
813 | | - return 0; |
---|
814 | | -} |
---|
815 | | - |
---|
816 | | -/** |
---|
817 | 782 | * sdma_v3_0_start - setup and start the async dma engines |
---|
818 | 783 | * |
---|
819 | 784 | * @adev: amdgpu_device pointer |
---|
.. | .. |
---|
824 | 789 | static int sdma_v3_0_start(struct amdgpu_device *adev) |
---|
825 | 790 | { |
---|
826 | 791 | int r; |
---|
827 | | - |
---|
828 | | - if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { |
---|
829 | | - r = sdma_v3_0_load_microcode(adev); |
---|
830 | | - if (r) |
---|
831 | | - return r; |
---|
832 | | - } |
---|
833 | 792 | |
---|
834 | 793 | /* disable sdma engine before programing it */ |
---|
835 | 794 | sdma_v3_0_ctx_switch_enable(adev, false); |
---|
.. | .. |
---|
865 | 824 | u64 gpu_addr; |
---|
866 | 825 | |
---|
867 | 826 | r = amdgpu_device_wb_get(adev, &index); |
---|
868 | | - if (r) { |
---|
869 | | - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); |
---|
| 827 | + if (r) |
---|
870 | 828 | return r; |
---|
871 | | - } |
---|
872 | 829 | |
---|
873 | 830 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
---|
874 | 831 | tmp = 0xCAFEDEAD; |
---|
875 | 832 | adev->wb.wb[index] = cpu_to_le32(tmp); |
---|
876 | 833 | |
---|
877 | 834 | r = amdgpu_ring_alloc(ring, 5); |
---|
878 | | - if (r) { |
---|
879 | | - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); |
---|
880 | | - amdgpu_device_wb_free(adev, index); |
---|
881 | | - return r; |
---|
882 | | - } |
---|
| 835 | + if (r) |
---|
| 836 | + goto error_free_wb; |
---|
883 | 837 | |
---|
884 | 838 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | |
---|
885 | 839 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); |
---|
.. | .. |
---|
893 | 847 | tmp = le32_to_cpu(adev->wb.wb[index]); |
---|
894 | 848 | if (tmp == 0xDEADBEEF) |
---|
895 | 849 | break; |
---|
896 | | - DRM_UDELAY(1); |
---|
| 850 | + udelay(1); |
---|
897 | 851 | } |
---|
898 | 852 | |
---|
899 | | - if (i < adev->usec_timeout) { |
---|
900 | | - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); |
---|
901 | | - } else { |
---|
902 | | - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", |
---|
903 | | - ring->idx, tmp); |
---|
904 | | - r = -EINVAL; |
---|
905 | | - } |
---|
| 853 | + if (i >= adev->usec_timeout) |
---|
| 854 | + r = -ETIMEDOUT; |
---|
| 855 | + |
---|
| 856 | +error_free_wb: |
---|
906 | 857 | amdgpu_device_wb_free(adev, index); |
---|
907 | | - |
---|
908 | 858 | return r; |
---|
909 | 859 | } |
---|
910 | 860 | |
---|
.. | .. |
---|
927 | 877 | long r; |
---|
928 | 878 | |
---|
929 | 879 | r = amdgpu_device_wb_get(adev, &index); |
---|
930 | | - if (r) { |
---|
931 | | - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); |
---|
| 880 | + if (r) |
---|
932 | 881 | return r; |
---|
933 | | - } |
---|
934 | 882 | |
---|
935 | 883 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
---|
936 | 884 | tmp = 0xCAFEDEAD; |
---|
937 | 885 | adev->wb.wb[index] = cpu_to_le32(tmp); |
---|
938 | 886 | memset(&ib, 0, sizeof(ib)); |
---|
939 | | - r = amdgpu_ib_get(adev, NULL, 256, &ib); |
---|
940 | | - if (r) { |
---|
941 | | - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); |
---|
| 887 | + r = amdgpu_ib_get(adev, NULL, 256, |
---|
| 888 | + AMDGPU_IB_POOL_DIRECT, &ib); |
---|
| 889 | + if (r) |
---|
942 | 890 | goto err0; |
---|
943 | | - } |
---|
944 | 891 | |
---|
945 | 892 | ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | |
---|
946 | 893 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); |
---|
.. | .. |
---|
959 | 906 | |
---|
960 | 907 | r = dma_fence_wait_timeout(f, false, timeout); |
---|
961 | 908 | if (r == 0) { |
---|
962 | | - DRM_ERROR("amdgpu: IB test timed out\n"); |
---|
963 | 909 | r = -ETIMEDOUT; |
---|
964 | 910 | goto err1; |
---|
965 | 911 | } else if (r < 0) { |
---|
966 | | - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); |
---|
967 | 912 | goto err1; |
---|
968 | 913 | } |
---|
969 | 914 | tmp = le32_to_cpu(adev->wb.wb[index]); |
---|
970 | | - if (tmp == 0xDEADBEEF) { |
---|
971 | | - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); |
---|
| 915 | + if (tmp == 0xDEADBEEF) |
---|
972 | 916 | r = 0; |
---|
973 | | - } else { |
---|
974 | | - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); |
---|
| 917 | + else |
---|
975 | 918 | r = -EINVAL; |
---|
976 | | - } |
---|
977 | 919 | err1: |
---|
978 | 920 | amdgpu_ib_free(adev, &ib, NULL); |
---|
979 | 921 | dma_fence_put(f); |
---|
.. | .. |
---|
1074 | 1016 | */ |
---|
1075 | 1017 | static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) |
---|
1076 | 1018 | { |
---|
1077 | | - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); |
---|
| 1019 | + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); |
---|
1078 | 1020 | u32 pad_count; |
---|
1079 | 1021 | int i; |
---|
1080 | 1022 | |
---|
1081 | | - pad_count = (8 - (ib->length_dw & 0x7)) % 8; |
---|
| 1023 | + pad_count = (-ib->length_dw) & 7; |
---|
1082 | 1024 | for (i = 0; i < pad_count; i++) |
---|
1083 | 1025 | if (sdma && sdma->burst_nop && (i == 0)) |
---|
1084 | 1026 | ib->ptr[ib->length_dw++] = |
---|
.. | .. |
---|
1177 | 1119 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
---|
1178 | 1120 | |
---|
1179 | 1121 | /* SDMA trap event */ |
---|
1180 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, |
---|
| 1122 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, |
---|
1181 | 1123 | &adev->sdma.trap_irq); |
---|
1182 | 1124 | if (r) |
---|
1183 | 1125 | return r; |
---|
1184 | 1126 | |
---|
1185 | 1127 | /* SDMA Privileged inst */ |
---|
1186 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241, |
---|
| 1128 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241, |
---|
1187 | 1129 | &adev->sdma.illegal_inst_irq); |
---|
1188 | 1130 | if (r) |
---|
1189 | 1131 | return r; |
---|
1190 | 1132 | |
---|
1191 | 1133 | /* SDMA Privileged inst */ |
---|
1192 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE, |
---|
| 1134 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE, |
---|
1193 | 1135 | &adev->sdma.illegal_inst_irq); |
---|
1194 | 1136 | if (r) |
---|
1195 | 1137 | return r; |
---|
.. | .. |
---|
1205 | 1147 | ring->ring_obj = NULL; |
---|
1206 | 1148 | if (!amdgpu_sriov_vf(adev)) { |
---|
1207 | 1149 | ring->use_doorbell = true; |
---|
1208 | | - ring->doorbell_index = (i == 0) ? |
---|
1209 | | - AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1; |
---|
| 1150 | + ring->doorbell_index = adev->doorbell_index.sdma_engine[i]; |
---|
1210 | 1151 | } else { |
---|
1211 | 1152 | ring->use_pollmem = true; |
---|
1212 | 1153 | } |
---|
.. | .. |
---|
1215 | 1156 | r = amdgpu_ring_init(adev, ring, 1024, |
---|
1216 | 1157 | &adev->sdma.trap_irq, |
---|
1217 | 1158 | (i == 0) ? |
---|
1218 | | - AMDGPU_SDMA_IRQ_TRAP0 : |
---|
1219 | | - AMDGPU_SDMA_IRQ_TRAP1); |
---|
| 1159 | + AMDGPU_SDMA_IRQ_INSTANCE0 : |
---|
| 1160 | + AMDGPU_SDMA_IRQ_INSTANCE1, |
---|
| 1161 | + AMDGPU_RING_PRIO_DEFAULT); |
---|
1220 | 1162 | if (r) |
---|
1221 | 1163 | return r; |
---|
1222 | 1164 | } |
---|
.. | .. |
---|
1401 | 1343 | u32 sdma_cntl; |
---|
1402 | 1344 | |
---|
1403 | 1345 | switch (type) { |
---|
1404 | | - case AMDGPU_SDMA_IRQ_TRAP0: |
---|
| 1346 | + case AMDGPU_SDMA_IRQ_INSTANCE0: |
---|
1405 | 1347 | switch (state) { |
---|
1406 | 1348 | case AMDGPU_IRQ_STATE_DISABLE: |
---|
1407 | 1349 | sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); |
---|
.. | .. |
---|
1417 | 1359 | break; |
---|
1418 | 1360 | } |
---|
1419 | 1361 | break; |
---|
1420 | | - case AMDGPU_SDMA_IRQ_TRAP1: |
---|
| 1362 | + case AMDGPU_SDMA_IRQ_INSTANCE1: |
---|
1421 | 1363 | switch (state) { |
---|
1422 | 1364 | case AMDGPU_IRQ_STATE_DISABLE: |
---|
1423 | 1365 | sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); |
---|
.. | .. |
---|
1483 | 1425 | struct amdgpu_irq_src *source, |
---|
1484 | 1426 | struct amdgpu_iv_entry *entry) |
---|
1485 | 1427 | { |
---|
| 1428 | + u8 instance_id, queue_id; |
---|
| 1429 | + |
---|
1486 | 1430 | DRM_ERROR("Illegal instruction in SDMA command stream\n"); |
---|
1487 | | - schedule_work(&adev->reset_work); |
---|
| 1431 | + instance_id = (entry->ring_id & 0x3) >> 0; |
---|
| 1432 | + queue_id = (entry->ring_id & 0xc) >> 2; |
---|
| 1433 | + |
---|
| 1434 | + if (instance_id <= 1 && queue_id == 0) |
---|
| 1435 | + drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched); |
---|
1488 | 1436 | return 0; |
---|
1489 | 1437 | } |
---|
1490 | 1438 | |
---|
.. | .. |
---|
1690 | 1638 | static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib, |
---|
1691 | 1639 | uint64_t src_offset, |
---|
1692 | 1640 | uint64_t dst_offset, |
---|
1693 | | - uint32_t byte_count) |
---|
| 1641 | + uint32_t byte_count, |
---|
| 1642 | + bool tmz) |
---|
1694 | 1643 | { |
---|
1695 | 1644 | ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | |
---|
1696 | 1645 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); |
---|
.. | .. |
---|
1736 | 1685 | |
---|
1737 | 1686 | static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev) |
---|
1738 | 1687 | { |
---|
1739 | | - if (adev->mman.buffer_funcs == NULL) { |
---|
1740 | | - adev->mman.buffer_funcs = &sdma_v3_0_buffer_funcs; |
---|
1741 | | - adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; |
---|
1742 | | - } |
---|
| 1688 | + adev->mman.buffer_funcs = &sdma_v3_0_buffer_funcs; |
---|
| 1689 | + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; |
---|
1743 | 1690 | } |
---|
1744 | 1691 | |
---|
1745 | 1692 | static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { |
---|
.. | .. |
---|
1754 | 1701 | { |
---|
1755 | 1702 | unsigned i; |
---|
1756 | 1703 | |
---|
1757 | | - if (adev->vm_manager.vm_pte_funcs == NULL) { |
---|
1758 | | - adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; |
---|
1759 | | - for (i = 0; i < adev->sdma.num_instances; i++) |
---|
1760 | | - adev->vm_manager.vm_pte_rings[i] = |
---|
1761 | | - &adev->sdma.instance[i].ring; |
---|
1762 | | - |
---|
1763 | | - adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; |
---|
| 1704 | + adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; |
---|
| 1705 | + for (i = 0; i < adev->sdma.num_instances; i++) { |
---|
| 1706 | + adev->vm_manager.vm_pte_scheds[i] = |
---|
| 1707 | + &adev->sdma.instance[i].ring.sched; |
---|
1764 | 1708 | } |
---|
| 1709 | + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; |
---|
1765 | 1710 | } |
---|
1766 | 1711 | |
---|
1767 | 1712 | const struct amdgpu_ip_block_version sdma_v3_0_ip_block = |
---|