hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
....@@ -21,8 +21,11 @@
2121 *
2222 * Authors: Alex Deucher
2323 */
24
+
25
+#include <linux/delay.h>
2426 #include <linux/firmware.h>
25
-#include <drm/drmP.h>
27
+#include <linux/module.h>
28
+
2629 #include "amdgpu.h"
2730 #include "amdgpu_ucode.h"
2831 #include "amdgpu_trace.h"
....@@ -225,7 +228,7 @@
225228
226229 static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
227230 {
228
- struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
231
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
229232 int i;
230233
231234 for (i = 0; i < count; i++)
....@@ -245,11 +248,14 @@
245248 * Schedule an IB in the DMA ring (VI).
246249 */
247250 static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
251
+ struct amdgpu_job *job,
248252 struct amdgpu_ib *ib,
249
- unsigned vmid, bool ctx_switch)
253
+ uint32_t flags)
250254 {
255
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
256
+
251257 /* IB packet must end on a 8 DW boundary */
252
- sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
258
+ sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
253259
254260 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
255261 SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
....@@ -349,8 +355,6 @@
349355 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
350356 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
351357 }
352
- sdma0->ready = false;
353
- sdma1->ready = false;
354358 }
355359
356360 /**
....@@ -471,17 +475,15 @@
471475 /* enable DMA IBs */
472476 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
473477
474
- ring->ready = true;
478
+ ring->sched.ready = true;
475479 }
476480
477481 sdma_v2_4_enable(adev, true);
478482 for (i = 0; i < adev->sdma.num_instances; i++) {
479483 ring = &adev->sdma.instance[i].ring;
480
- r = amdgpu_ring_test_ring(ring);
481
- if (r) {
482
- ring->ready = false;
484
+ r = amdgpu_ring_test_helper(ring);
485
+ if (r)
483486 return r;
484
- }
485487
486488 if (adev->mman.buffer_funcs_ring == ring)
487489 amdgpu_ttm_set_buffer_funcs_status(adev, true);
....@@ -504,41 +506,6 @@
504506 return 0;
505507 }
506508
507
-/**
508
- * sdma_v2_4_load_microcode - load the sDMA ME ucode
509
- *
510
- * @adev: amdgpu_device pointer
511
- *
512
- * Loads the sDMA0/1 ucode.
513
- * Returns 0 for success, -EINVAL if the ucode is not available.
514
- */
515
-static int sdma_v2_4_load_microcode(struct amdgpu_device *adev)
516
-{
517
- const struct sdma_firmware_header_v1_0 *hdr;
518
- const __le32 *fw_data;
519
- u32 fw_size;
520
- int i, j;
521
-
522
- /* halt the MEs */
523
- sdma_v2_4_enable(adev, false);
524
-
525
- for (i = 0; i < adev->sdma.num_instances; i++) {
526
- if (!adev->sdma.instance[i].fw)
527
- return -EINVAL;
528
- hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
529
- amdgpu_ucode_print_sdma_hdr(&hdr->header);
530
- fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
531
- fw_data = (const __le32 *)
532
- (adev->sdma.instance[i].fw->data +
533
- le32_to_cpu(hdr->header.ucode_array_offset_bytes));
534
- WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0);
535
- for (j = 0; j < fw_size; j++)
536
- WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++));
537
- WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma.instance[i].fw_version);
538
- }
539
-
540
- return 0;
541
-}
542509
543510 /**
544511 * sdma_v2_4_start - setup and start the async dma engines
....@@ -551,13 +518,6 @@
551518 static int sdma_v2_4_start(struct amdgpu_device *adev)
552519 {
553520 int r;
554
-
555
-
556
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
557
- r = sdma_v2_4_load_microcode(adev);
558
- if (r)
559
- return r;
560
- }
561521
562522 /* halt the engine before programing */
563523 sdma_v2_4_enable(adev, false);
....@@ -592,21 +552,16 @@
592552 u64 gpu_addr;
593553
594554 r = amdgpu_device_wb_get(adev, &index);
595
- if (r) {
596
- dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
555
+ if (r)
597556 return r;
598
- }
599557
600558 gpu_addr = adev->wb.gpu_addr + (index * 4);
601559 tmp = 0xCAFEDEAD;
602560 adev->wb.wb[index] = cpu_to_le32(tmp);
603561
604562 r = amdgpu_ring_alloc(ring, 5);
605
- if (r) {
606
- DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
607
- amdgpu_device_wb_free(adev, index);
608
- return r;
609
- }
563
+ if (r)
564
+ goto error_free_wb;
610565
611566 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
612567 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
....@@ -620,18 +575,14 @@
620575 tmp = le32_to_cpu(adev->wb.wb[index]);
621576 if (tmp == 0xDEADBEEF)
622577 break;
623
- DRM_UDELAY(1);
578
+ udelay(1);
624579 }
625580
626
- if (i < adev->usec_timeout) {
627
- DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
628
- } else {
629
- DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
630
- ring->idx, tmp);
631
- r = -EINVAL;
632
- }
581
+ if (i >= adev->usec_timeout)
582
+ r = -ETIMEDOUT;
583
+
584
+error_free_wb:
633585 amdgpu_device_wb_free(adev, index);
634
-
635586 return r;
636587 }
637588
....@@ -654,20 +605,17 @@
654605 long r;
655606
656607 r = amdgpu_device_wb_get(adev, &index);
657
- if (r) {
658
- dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
608
+ if (r)
659609 return r;
660
- }
661610
662611 gpu_addr = adev->wb.gpu_addr + (index * 4);
663612 tmp = 0xCAFEDEAD;
664613 adev->wb.wb[index] = cpu_to_le32(tmp);
665614 memset(&ib, 0, sizeof(ib));
666
- r = amdgpu_ib_get(adev, NULL, 256, &ib);
667
- if (r) {
668
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
615
+ r = amdgpu_ib_get(adev, NULL, 256,
616
+ AMDGPU_IB_POOL_DIRECT, &ib);
617
+ if (r)
669618 goto err0;
670
- }
671619
672620 ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
673621 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
....@@ -686,21 +634,16 @@
686634
687635 r = dma_fence_wait_timeout(f, false, timeout);
688636 if (r == 0) {
689
- DRM_ERROR("amdgpu: IB test timed out\n");
690637 r = -ETIMEDOUT;
691638 goto err1;
692639 } else if (r < 0) {
693
- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
694640 goto err1;
695641 }
696642 tmp = le32_to_cpu(adev->wb.wb[index]);
697
- if (tmp == 0xDEADBEEF) {
698
- DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
643
+ if (tmp == 0xDEADBEEF)
699644 r = 0;
700
- } else {
701
- DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
645
+ else
702646 r = -EINVAL;
703
- }
704647
705648 err1:
706649 amdgpu_ib_free(adev, &ib, NULL);
....@@ -802,11 +745,11 @@
802745 */
803746 static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
804747 {
805
- struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
748
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
806749 u32 pad_count;
807750 int i;
808751
809
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
752
+ pad_count = (-ib->length_dw) & 7;
810753 for (i = 0; i < pad_count; i++)
811754 if (sdma && sdma->burst_nop && (i == 0))
812755 ib->ptr[ib->length_dw++] =
....@@ -898,19 +841,19 @@
898841 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
899842
900843 /* SDMA trap event */
901
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
844
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
902845 &adev->sdma.trap_irq);
903846 if (r)
904847 return r;
905848
906849 /* SDMA Privileged inst */
907
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241,
850
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241,
908851 &adev->sdma.illegal_inst_irq);
909852 if (r)
910853 return r;
911854
912855 /* SDMA Privileged inst */
913
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE,
856
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE,
914857 &adev->sdma.illegal_inst_irq);
915858 if (r)
916859 return r;
....@@ -929,8 +872,9 @@
929872 r = amdgpu_ring_init(adev, ring, 1024,
930873 &adev->sdma.trap_irq,
931874 (i == 0) ?
932
- AMDGPU_SDMA_IRQ_TRAP0 :
933
- AMDGPU_SDMA_IRQ_TRAP1);
875
+ AMDGPU_SDMA_IRQ_INSTANCE0 :
876
+ AMDGPU_SDMA_IRQ_INSTANCE1,
877
+ AMDGPU_RING_PRIO_DEFAULT);
934878 if (r)
935879 return r;
936880 }
....@@ -1065,7 +1009,7 @@
10651009 u32 sdma_cntl;
10661010
10671011 switch (type) {
1068
- case AMDGPU_SDMA_IRQ_TRAP0:
1012
+ case AMDGPU_SDMA_IRQ_INSTANCE0:
10691013 switch (state) {
10701014 case AMDGPU_IRQ_STATE_DISABLE:
10711015 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
....@@ -1081,7 +1025,7 @@
10811025 break;
10821026 }
10831027 break;
1084
- case AMDGPU_SDMA_IRQ_TRAP1:
1028
+ case AMDGPU_SDMA_IRQ_INSTANCE1:
10851029 switch (state) {
10861030 case AMDGPU_IRQ_STATE_DISABLE:
10871031 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
....@@ -1147,8 +1091,14 @@
11471091 struct amdgpu_irq_src *source,
11481092 struct amdgpu_iv_entry *entry)
11491093 {
1094
+ u8 instance_id, queue_id;
1095
+
11501096 DRM_ERROR("Illegal instruction in SDMA command stream\n");
1151
- schedule_work(&adev->reset_work);
1097
+ instance_id = (entry->ring_id & 0x3) >> 0;
1098
+ queue_id = (entry->ring_id & 0xc) >> 2;
1099
+
1100
+ if (instance_id <= 1 && queue_id == 0)
1101
+ drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
11521102 return 0;
11531103 }
11541104
....@@ -1250,7 +1200,8 @@
12501200 static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
12511201 uint64_t src_offset,
12521202 uint64_t dst_offset,
1253
- uint32_t byte_count)
1203
+ uint32_t byte_count,
1204
+ bool tmz)
12541205 {
12551206 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
12561207 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
....@@ -1296,10 +1247,8 @@
12961247
12971248 static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev)
12981249 {
1299
- if (adev->mman.buffer_funcs == NULL) {
1300
- adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs;
1301
- adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
1302
- }
1250
+ adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs;
1251
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
13031252 }
13041253
13051254 static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
....@@ -1314,14 +1263,12 @@
13141263 {
13151264 unsigned i;
13161265
1317
- if (adev->vm_manager.vm_pte_funcs == NULL) {
1318
- adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
1319
- for (i = 0; i < adev->sdma.num_instances; i++)
1320
- adev->vm_manager.vm_pte_rings[i] =
1321
- &adev->sdma.instance[i].ring;
1322
-
1323
- adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
1266
+ adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
1267
+ for (i = 0; i < adev->sdma.num_instances; i++) {
1268
+ adev->vm_manager.vm_pte_scheds[i] =
1269
+ &adev->sdma.instance[i].ring.sched;
13241270 }
1271
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
13251272 }
13261273
13271274 const struct amdgpu_ip_block_version sdma_v2_4_ip_block =