hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
....@@ -21,8 +21,11 @@
2121 *
2222 * Authors: Alex Deucher
2323 */
24
+
25
+#include <linux/delay.h>
2426 #include <linux/firmware.h>
25
-#include <drm/drmP.h>
27
+#include <linux/module.h>
28
+
2629 #include "amdgpu.h"
2730 #include "amdgpu_ucode.h"
2831 #include "amdgpu_trace.h"
....@@ -318,14 +321,13 @@
318321 if (adev->sdma.instance[i].feature_version >= 20)
319322 adev->sdma.instance[i].burst_nop = true;
320323
321
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
322
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
323
- info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
324
- info->fw = adev->sdma.instance[i].fw;
325
- header = (const struct common_firmware_header *)info->fw->data;
326
- adev->firmware.fw_size +=
327
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
328
- }
324
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
325
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
326
+ info->fw = adev->sdma.instance[i].fw;
327
+ header = (const struct common_firmware_header *)info->fw->data;
328
+ adev->firmware.fw_size +=
329
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
330
+
329331 }
330332 out:
331333 if (err) {
....@@ -400,7 +402,7 @@
400402
401403 static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
402404 {
403
- struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
405
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
404406 int i;
405407
406408 for (i = 0; i < count; i++)
....@@ -420,11 +422,14 @@
420422 * Schedule an IB in the DMA ring (VI).
421423 */
422424 static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
425
+ struct amdgpu_job *job,
423426 struct amdgpu_ib *ib,
424
- unsigned vmid, bool ctx_switch)
427
+ uint32_t flags)
425428 {
429
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
430
+
426431 /* IB packet must end on a 8 DW boundary */
427
- sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
432
+ sdma_v3_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
428433
429434 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
430435 SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
....@@ -524,8 +529,6 @@
524529 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
525530 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
526531 }
527
- sdma0->ready = false;
528
- sdma1->ready = false;
529532 }
530533
531534 /**
....@@ -740,7 +743,7 @@
740743 /* enable DMA IBs */
741744 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
742745
743
- ring->ready = true;
746
+ ring->sched.ready = true;
744747 }
745748
746749 /* unhalt the MEs */
....@@ -750,11 +753,9 @@
750753
751754 for (i = 0; i < adev->sdma.num_instances; i++) {
752755 ring = &adev->sdma.instance[i].ring;
753
- r = amdgpu_ring_test_ring(ring);
754
- if (r) {
755
- ring->ready = false;
756
+ r = amdgpu_ring_test_helper(ring);
757
+ if (r)
756758 return r;
757
- }
758759
759760 if (adev->mman.buffer_funcs_ring == ring)
760761 amdgpu_ttm_set_buffer_funcs_status(adev, true);
....@@ -778,42 +779,6 @@
778779 }
779780
780781 /**
781
- * sdma_v3_0_load_microcode - load the sDMA ME ucode
782
- *
783
- * @adev: amdgpu_device pointer
784
- *
785
- * Loads the sDMA0/1 ucode.
786
- * Returns 0 for success, -EINVAL if the ucode is not available.
787
- */
788
-static int sdma_v3_0_load_microcode(struct amdgpu_device *adev)
789
-{
790
- const struct sdma_firmware_header_v1_0 *hdr;
791
- const __le32 *fw_data;
792
- u32 fw_size;
793
- int i, j;
794
-
795
- /* halt the MEs */
796
- sdma_v3_0_enable(adev, false);
797
-
798
- for (i = 0; i < adev->sdma.num_instances; i++) {
799
- if (!adev->sdma.instance[i].fw)
800
- return -EINVAL;
801
- hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
802
- amdgpu_ucode_print_sdma_hdr(&hdr->header);
803
- fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
804
- fw_data = (const __le32 *)
805
- (adev->sdma.instance[i].fw->data +
806
- le32_to_cpu(hdr->header.ucode_array_offset_bytes));
807
- WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0);
808
- for (j = 0; j < fw_size; j++)
809
- WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++));
810
- WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma.instance[i].fw_version);
811
- }
812
-
813
- return 0;
814
-}
815
-
816
-/**
817782 * sdma_v3_0_start - setup and start the async dma engines
818783 *
819784 * @adev: amdgpu_device pointer
....@@ -824,12 +789,6 @@
824789 static int sdma_v3_0_start(struct amdgpu_device *adev)
825790 {
826791 int r;
827
-
828
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
829
- r = sdma_v3_0_load_microcode(adev);
830
- if (r)
831
- return r;
832
- }
833792
834793 /* disable sdma engine before programing it */
835794 sdma_v3_0_ctx_switch_enable(adev, false);
....@@ -865,21 +824,16 @@
865824 u64 gpu_addr;
866825
867826 r = amdgpu_device_wb_get(adev, &index);
868
- if (r) {
869
- dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
827
+ if (r)
870828 return r;
871
- }
872829
873830 gpu_addr = adev->wb.gpu_addr + (index * 4);
874831 tmp = 0xCAFEDEAD;
875832 adev->wb.wb[index] = cpu_to_le32(tmp);
876833
877834 r = amdgpu_ring_alloc(ring, 5);
878
- if (r) {
879
- DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
880
- amdgpu_device_wb_free(adev, index);
881
- return r;
882
- }
835
+ if (r)
836
+ goto error_free_wb;
883837
884838 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
885839 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
....@@ -893,18 +847,14 @@
893847 tmp = le32_to_cpu(adev->wb.wb[index]);
894848 if (tmp == 0xDEADBEEF)
895849 break;
896
- DRM_UDELAY(1);
850
+ udelay(1);
897851 }
898852
899
- if (i < adev->usec_timeout) {
900
- DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
901
- } else {
902
- DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
903
- ring->idx, tmp);
904
- r = -EINVAL;
905
- }
853
+ if (i >= adev->usec_timeout)
854
+ r = -ETIMEDOUT;
855
+
856
+error_free_wb:
906857 amdgpu_device_wb_free(adev, index);
907
-
908858 return r;
909859 }
910860
....@@ -927,20 +877,17 @@
927877 long r;
928878
929879 r = amdgpu_device_wb_get(adev, &index);
930
- if (r) {
931
- dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
880
+ if (r)
932881 return r;
933
- }
934882
935883 gpu_addr = adev->wb.gpu_addr + (index * 4);
936884 tmp = 0xCAFEDEAD;
937885 adev->wb.wb[index] = cpu_to_le32(tmp);
938886 memset(&ib, 0, sizeof(ib));
939
- r = amdgpu_ib_get(adev, NULL, 256, &ib);
940
- if (r) {
941
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
887
+ r = amdgpu_ib_get(adev, NULL, 256,
888
+ AMDGPU_IB_POOL_DIRECT, &ib);
889
+ if (r)
942890 goto err0;
943
- }
944891
945892 ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
946893 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
....@@ -959,21 +906,16 @@
959906
960907 r = dma_fence_wait_timeout(f, false, timeout);
961908 if (r == 0) {
962
- DRM_ERROR("amdgpu: IB test timed out\n");
963909 r = -ETIMEDOUT;
964910 goto err1;
965911 } else if (r < 0) {
966
- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
967912 goto err1;
968913 }
969914 tmp = le32_to_cpu(adev->wb.wb[index]);
970
- if (tmp == 0xDEADBEEF) {
971
- DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
915
+ if (tmp == 0xDEADBEEF)
972916 r = 0;
973
- } else {
974
- DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
917
+ else
975918 r = -EINVAL;
976
- }
977919 err1:
978920 amdgpu_ib_free(adev, &ib, NULL);
979921 dma_fence_put(f);
....@@ -1074,11 +1016,11 @@
10741016 */
10751017 static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
10761018 {
1077
- struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
1019
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
10781020 u32 pad_count;
10791021 int i;
10801022
1081
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
1023
+ pad_count = (-ib->length_dw) & 7;
10821024 for (i = 0; i < pad_count; i++)
10831025 if (sdma && sdma->burst_nop && (i == 0))
10841026 ib->ptr[ib->length_dw++] =
....@@ -1177,19 +1119,19 @@
11771119 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
11781120
11791121 /* SDMA trap event */
1180
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
1122
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
11811123 &adev->sdma.trap_irq);
11821124 if (r)
11831125 return r;
11841126
11851127 /* SDMA Privileged inst */
1186
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241,
1128
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241,
11871129 &adev->sdma.illegal_inst_irq);
11881130 if (r)
11891131 return r;
11901132
11911133 /* SDMA Privileged inst */
1192
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE,
1134
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE,
11931135 &adev->sdma.illegal_inst_irq);
11941136 if (r)
11951137 return r;
....@@ -1205,8 +1147,7 @@
12051147 ring->ring_obj = NULL;
12061148 if (!amdgpu_sriov_vf(adev)) {
12071149 ring->use_doorbell = true;
1208
- ring->doorbell_index = (i == 0) ?
1209
- AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1;
1150
+ ring->doorbell_index = adev->doorbell_index.sdma_engine[i];
12101151 } else {
12111152 ring->use_pollmem = true;
12121153 }
....@@ -1215,8 +1156,9 @@
12151156 r = amdgpu_ring_init(adev, ring, 1024,
12161157 &adev->sdma.trap_irq,
12171158 (i == 0) ?
1218
- AMDGPU_SDMA_IRQ_TRAP0 :
1219
- AMDGPU_SDMA_IRQ_TRAP1);
1159
+ AMDGPU_SDMA_IRQ_INSTANCE0 :
1160
+ AMDGPU_SDMA_IRQ_INSTANCE1,
1161
+ AMDGPU_RING_PRIO_DEFAULT);
12201162 if (r)
12211163 return r;
12221164 }
....@@ -1401,7 +1343,7 @@
14011343 u32 sdma_cntl;
14021344
14031345 switch (type) {
1404
- case AMDGPU_SDMA_IRQ_TRAP0:
1346
+ case AMDGPU_SDMA_IRQ_INSTANCE0:
14051347 switch (state) {
14061348 case AMDGPU_IRQ_STATE_DISABLE:
14071349 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
....@@ -1417,7 +1359,7 @@
14171359 break;
14181360 }
14191361 break;
1420
- case AMDGPU_SDMA_IRQ_TRAP1:
1362
+ case AMDGPU_SDMA_IRQ_INSTANCE1:
14211363 switch (state) {
14221364 case AMDGPU_IRQ_STATE_DISABLE:
14231365 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
....@@ -1483,8 +1425,14 @@
14831425 struct amdgpu_irq_src *source,
14841426 struct amdgpu_iv_entry *entry)
14851427 {
1428
+ u8 instance_id, queue_id;
1429
+
14861430 DRM_ERROR("Illegal instruction in SDMA command stream\n");
1487
- schedule_work(&adev->reset_work);
1431
+ instance_id = (entry->ring_id & 0x3) >> 0;
1432
+ queue_id = (entry->ring_id & 0xc) >> 2;
1433
+
1434
+ if (instance_id <= 1 && queue_id == 0)
1435
+ drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
14881436 return 0;
14891437 }
14901438
....@@ -1690,7 +1638,8 @@
16901638 static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib,
16911639 uint64_t src_offset,
16921640 uint64_t dst_offset,
1693
- uint32_t byte_count)
1641
+ uint32_t byte_count,
1642
+ bool tmz)
16941643 {
16951644 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
16961645 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
....@@ -1736,10 +1685,8 @@
17361685
17371686 static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev)
17381687 {
1739
- if (adev->mman.buffer_funcs == NULL) {
1740
- adev->mman.buffer_funcs = &sdma_v3_0_buffer_funcs;
1741
- adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
1742
- }
1688
+ adev->mman.buffer_funcs = &sdma_v3_0_buffer_funcs;
1689
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
17431690 }
17441691
17451692 static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
....@@ -1754,14 +1701,12 @@
17541701 {
17551702 unsigned i;
17561703
1757
- if (adev->vm_manager.vm_pte_funcs == NULL) {
1758
- adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs;
1759
- for (i = 0; i < adev->sdma.num_instances; i++)
1760
- adev->vm_manager.vm_pte_rings[i] =
1761
- &adev->sdma.instance[i].ring;
1762
-
1763
- adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
1704
+ adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs;
1705
+ for (i = 0; i < adev->sdma.num_instances; i++) {
1706
+ adev->vm_manager.vm_pte_scheds[i] =
1707
+ &adev->sdma.instance[i].ring.sched;
17641708 }
1709
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
17651710 }
17661711
17671712 const struct amdgpu_ip_block_version sdma_v3_0_ip_block =