.. | .. |
---|
21 | 21 | * |
---|
22 | 22 | * Authors: Alex Deucher |
---|
23 | 23 | */ |
---|
| 24 | + |
---|
| 25 | +#include <linux/delay.h> |
---|
24 | 26 | #include <linux/firmware.h> |
---|
25 | | -#include <drm/drmP.h> |
---|
| 27 | +#include <linux/module.h> |
---|
| 28 | + |
---|
26 | 29 | #include "amdgpu.h" |
---|
27 | 30 | #include "amdgpu_ucode.h" |
---|
28 | 31 | #include "amdgpu_trace.h" |
---|
.. | .. |
---|
225 | 228 | |
---|
226 | 229 | static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) |
---|
227 | 230 | { |
---|
228 | | - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); |
---|
| 231 | + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); |
---|
229 | 232 | int i; |
---|
230 | 233 | |
---|
231 | 234 | for (i = 0; i < count; i++) |
---|
.. | .. |
---|
245 | 248 | * Schedule an IB in the DMA ring (VI). |
---|
246 | 249 | */ |
---|
247 | 250 | static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, |
---|
| 251 | + struct amdgpu_job *job, |
---|
248 | 252 | struct amdgpu_ib *ib, |
---|
249 | | - unsigned vmid, bool ctx_switch) |
---|
| 253 | + uint32_t flags) |
---|
250 | 254 | { |
---|
| 255 | + unsigned vmid = AMDGPU_JOB_GET_VMID(job); |
---|
| 256 | + |
---|
251 | 257 | /* IB packet must end on a 8 DW boundary */ |
---|
252 | | - sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); |
---|
| 258 | + sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); |
---|
253 | 259 | |
---|
254 | 260 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | |
---|
255 | 261 | SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); |
---|
.. | .. |
---|
349 | 355 | ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); |
---|
350 | 356 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); |
---|
351 | 357 | } |
---|
352 | | - sdma0->ready = false; |
---|
353 | | - sdma1->ready = false; |
---|
354 | 358 | } |
---|
355 | 359 | |
---|
356 | 360 | /** |
---|
.. | .. |
---|
471 | 475 | /* enable DMA IBs */ |
---|
472 | 476 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); |
---|
473 | 477 | |
---|
474 | | - ring->ready = true; |
---|
| 478 | + ring->sched.ready = true; |
---|
475 | 479 | } |
---|
476 | 480 | |
---|
477 | 481 | sdma_v2_4_enable(adev, true); |
---|
478 | 482 | for (i = 0; i < adev->sdma.num_instances; i++) { |
---|
479 | 483 | ring = &adev->sdma.instance[i].ring; |
---|
480 | | - r = amdgpu_ring_test_ring(ring); |
---|
481 | | - if (r) { |
---|
482 | | - ring->ready = false; |
---|
| 484 | + r = amdgpu_ring_test_helper(ring); |
---|
| 485 | + if (r) |
---|
483 | 486 | return r; |
---|
484 | | - } |
---|
485 | 487 | |
---|
486 | 488 | if (adev->mman.buffer_funcs_ring == ring) |
---|
487 | 489 | amdgpu_ttm_set_buffer_funcs_status(adev, true); |
---|
.. | .. |
---|
504 | 506 | return 0; |
---|
505 | 507 | } |
---|
506 | 508 | |
---|
507 | | -/** |
---|
508 | | - * sdma_v2_4_load_microcode - load the sDMA ME ucode |
---|
509 | | - * |
---|
510 | | - * @adev: amdgpu_device pointer |
---|
511 | | - * |
---|
512 | | - * Loads the sDMA0/1 ucode. |
---|
513 | | - * Returns 0 for success, -EINVAL if the ucode is not available. |
---|
514 | | - */ |
---|
515 | | -static int sdma_v2_4_load_microcode(struct amdgpu_device *adev) |
---|
516 | | -{ |
---|
517 | | - const struct sdma_firmware_header_v1_0 *hdr; |
---|
518 | | - const __le32 *fw_data; |
---|
519 | | - u32 fw_size; |
---|
520 | | - int i, j; |
---|
521 | | - |
---|
522 | | - /* halt the MEs */ |
---|
523 | | - sdma_v2_4_enable(adev, false); |
---|
524 | | - |
---|
525 | | - for (i = 0; i < adev->sdma.num_instances; i++) { |
---|
526 | | - if (!adev->sdma.instance[i].fw) |
---|
527 | | - return -EINVAL; |
---|
528 | | - hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; |
---|
529 | | - amdgpu_ucode_print_sdma_hdr(&hdr->header); |
---|
530 | | - fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; |
---|
531 | | - fw_data = (const __le32 *) |
---|
532 | | - (adev->sdma.instance[i].fw->data + |
---|
533 | | - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
---|
534 | | - WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0); |
---|
535 | | - for (j = 0; j < fw_size; j++) |
---|
536 | | - WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++)); |
---|
537 | | - WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma.instance[i].fw_version); |
---|
538 | | - } |
---|
539 | | - |
---|
540 | | - return 0; |
---|
541 | | -} |
---|
542 | 509 | |
---|
543 | 510 | /** |
---|
544 | 511 | * sdma_v2_4_start - setup and start the async dma engines |
---|
.. | .. |
---|
551 | 518 | static int sdma_v2_4_start(struct amdgpu_device *adev) |
---|
552 | 519 | { |
---|
553 | 520 | int r; |
---|
554 | | - |
---|
555 | | - |
---|
556 | | - if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { |
---|
557 | | - r = sdma_v2_4_load_microcode(adev); |
---|
558 | | - if (r) |
---|
559 | | - return r; |
---|
560 | | - } |
---|
561 | 521 | |
---|
562 | 522 | /* halt the engine before programing */ |
---|
563 | 523 | sdma_v2_4_enable(adev, false); |
---|
.. | .. |
---|
592 | 552 | u64 gpu_addr; |
---|
593 | 553 | |
---|
594 | 554 | r = amdgpu_device_wb_get(adev, &index); |
---|
595 | | - if (r) { |
---|
596 | | - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); |
---|
| 555 | + if (r) |
---|
597 | 556 | return r; |
---|
598 | | - } |
---|
599 | 557 | |
---|
600 | 558 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
---|
601 | 559 | tmp = 0xCAFEDEAD; |
---|
602 | 560 | adev->wb.wb[index] = cpu_to_le32(tmp); |
---|
603 | 561 | |
---|
604 | 562 | r = amdgpu_ring_alloc(ring, 5); |
---|
605 | | - if (r) { |
---|
606 | | - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); |
---|
607 | | - amdgpu_device_wb_free(adev, index); |
---|
608 | | - return r; |
---|
609 | | - } |
---|
| 563 | + if (r) |
---|
| 564 | + goto error_free_wb; |
---|
610 | 565 | |
---|
611 | 566 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | |
---|
612 | 567 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); |
---|
.. | .. |
---|
620 | 575 | tmp = le32_to_cpu(adev->wb.wb[index]); |
---|
621 | 576 | if (tmp == 0xDEADBEEF) |
---|
622 | 577 | break; |
---|
623 | | - DRM_UDELAY(1); |
---|
| 578 | + udelay(1); |
---|
624 | 579 | } |
---|
625 | 580 | |
---|
626 | | - if (i < adev->usec_timeout) { |
---|
627 | | - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); |
---|
628 | | - } else { |
---|
629 | | - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", |
---|
630 | | - ring->idx, tmp); |
---|
631 | | - r = -EINVAL; |
---|
632 | | - } |
---|
| 581 | + if (i >= adev->usec_timeout) |
---|
| 582 | + r = -ETIMEDOUT; |
---|
| 583 | + |
---|
| 584 | +error_free_wb: |
---|
633 | 585 | amdgpu_device_wb_free(adev, index); |
---|
634 | | - |
---|
635 | 586 | return r; |
---|
636 | 587 | } |
---|
637 | 588 | |
---|
.. | .. |
---|
654 | 605 | long r; |
---|
655 | 606 | |
---|
656 | 607 | r = amdgpu_device_wb_get(adev, &index); |
---|
657 | | - if (r) { |
---|
658 | | - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); |
---|
| 608 | + if (r) |
---|
659 | 609 | return r; |
---|
660 | | - } |
---|
661 | 610 | |
---|
662 | 611 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
---|
663 | 612 | tmp = 0xCAFEDEAD; |
---|
664 | 613 | adev->wb.wb[index] = cpu_to_le32(tmp); |
---|
665 | 614 | memset(&ib, 0, sizeof(ib)); |
---|
666 | | - r = amdgpu_ib_get(adev, NULL, 256, &ib); |
---|
667 | | - if (r) { |
---|
668 | | - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); |
---|
| 615 | + r = amdgpu_ib_get(adev, NULL, 256, |
---|
| 616 | + AMDGPU_IB_POOL_DIRECT, &ib); |
---|
| 617 | + if (r) |
---|
669 | 618 | goto err0; |
---|
670 | | - } |
---|
671 | 619 | |
---|
672 | 620 | ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | |
---|
673 | 621 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); |
---|
.. | .. |
---|
686 | 634 | |
---|
687 | 635 | r = dma_fence_wait_timeout(f, false, timeout); |
---|
688 | 636 | if (r == 0) { |
---|
689 | | - DRM_ERROR("amdgpu: IB test timed out\n"); |
---|
690 | 637 | r = -ETIMEDOUT; |
---|
691 | 638 | goto err1; |
---|
692 | 639 | } else if (r < 0) { |
---|
693 | | - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); |
---|
694 | 640 | goto err1; |
---|
695 | 641 | } |
---|
696 | 642 | tmp = le32_to_cpu(adev->wb.wb[index]); |
---|
697 | | - if (tmp == 0xDEADBEEF) { |
---|
698 | | - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); |
---|
| 643 | + if (tmp == 0xDEADBEEF) |
---|
699 | 644 | r = 0; |
---|
700 | | - } else { |
---|
701 | | - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); |
---|
| 645 | + else |
---|
702 | 646 | r = -EINVAL; |
---|
703 | | - } |
---|
704 | 647 | |
---|
705 | 648 | err1: |
---|
706 | 649 | amdgpu_ib_free(adev, &ib, NULL); |
---|
.. | .. |
---|
802 | 745 | */ |
---|
803 | 746 | static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) |
---|
804 | 747 | { |
---|
805 | | - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); |
---|
| 748 | + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); |
---|
806 | 749 | u32 pad_count; |
---|
807 | 750 | int i; |
---|
808 | 751 | |
---|
809 | | - pad_count = (8 - (ib->length_dw & 0x7)) % 8; |
---|
| 752 | + pad_count = (-ib->length_dw) & 7; |
---|
810 | 753 | for (i = 0; i < pad_count; i++) |
---|
811 | 754 | if (sdma && sdma->burst_nop && (i == 0)) |
---|
812 | 755 | ib->ptr[ib->length_dw++] = |
---|
.. | .. |
---|
898 | 841 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
---|
899 | 842 | |
---|
900 | 843 | /* SDMA trap event */ |
---|
901 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, |
---|
| 844 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, |
---|
902 | 845 | &adev->sdma.trap_irq); |
---|
903 | 846 | if (r) |
---|
904 | 847 | return r; |
---|
905 | 848 | |
---|
906 | 849 | /* SDMA Privileged inst */ |
---|
907 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241, |
---|
| 850 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241, |
---|
908 | 851 | &adev->sdma.illegal_inst_irq); |
---|
909 | 852 | if (r) |
---|
910 | 853 | return r; |
---|
911 | 854 | |
---|
912 | 855 | /* SDMA Privileged inst */ |
---|
913 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE, |
---|
| 856 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE, |
---|
914 | 857 | &adev->sdma.illegal_inst_irq); |
---|
915 | 858 | if (r) |
---|
916 | 859 | return r; |
---|
.. | .. |
---|
929 | 872 | r = amdgpu_ring_init(adev, ring, 1024, |
---|
930 | 873 | &adev->sdma.trap_irq, |
---|
931 | 874 | (i == 0) ? |
---|
932 | | - AMDGPU_SDMA_IRQ_TRAP0 : |
---|
933 | | - AMDGPU_SDMA_IRQ_TRAP1); |
---|
| 875 | + AMDGPU_SDMA_IRQ_INSTANCE0 : |
---|
| 876 | + AMDGPU_SDMA_IRQ_INSTANCE1, |
---|
| 877 | + AMDGPU_RING_PRIO_DEFAULT); |
---|
934 | 878 | if (r) |
---|
935 | 879 | return r; |
---|
936 | 880 | } |
---|
.. | .. |
---|
1065 | 1009 | u32 sdma_cntl; |
---|
1066 | 1010 | |
---|
1067 | 1011 | switch (type) { |
---|
1068 | | - case AMDGPU_SDMA_IRQ_TRAP0: |
---|
| 1012 | + case AMDGPU_SDMA_IRQ_INSTANCE0: |
---|
1069 | 1013 | switch (state) { |
---|
1070 | 1014 | case AMDGPU_IRQ_STATE_DISABLE: |
---|
1071 | 1015 | sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); |
---|
.. | .. |
---|
1081 | 1025 | break; |
---|
1082 | 1026 | } |
---|
1083 | 1027 | break; |
---|
1084 | | - case AMDGPU_SDMA_IRQ_TRAP1: |
---|
| 1028 | + case AMDGPU_SDMA_IRQ_INSTANCE1: |
---|
1085 | 1029 | switch (state) { |
---|
1086 | 1030 | case AMDGPU_IRQ_STATE_DISABLE: |
---|
1087 | 1031 | sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); |
---|
.. | .. |
---|
1147 | 1091 | struct amdgpu_irq_src *source, |
---|
1148 | 1092 | struct amdgpu_iv_entry *entry) |
---|
1149 | 1093 | { |
---|
| 1094 | + u8 instance_id, queue_id; |
---|
| 1095 | + |
---|
1150 | 1096 | DRM_ERROR("Illegal instruction in SDMA command stream\n"); |
---|
1151 | | - schedule_work(&adev->reset_work); |
---|
| 1097 | + instance_id = (entry->ring_id & 0x3) >> 0; |
---|
| 1098 | + queue_id = (entry->ring_id & 0xc) >> 2; |
---|
| 1099 | + |
---|
| 1100 | + if (instance_id <= 1 && queue_id == 0) |
---|
| 1101 | + drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched); |
---|
1152 | 1102 | return 0; |
---|
1153 | 1103 | } |
---|
1154 | 1104 | |
---|
.. | .. |
---|
1250 | 1200 | static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, |
---|
1251 | 1201 | uint64_t src_offset, |
---|
1252 | 1202 | uint64_t dst_offset, |
---|
1253 | | - uint32_t byte_count) |
---|
| 1203 | + uint32_t byte_count, |
---|
| 1204 | + bool tmz) |
---|
1254 | 1205 | { |
---|
1255 | 1206 | ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | |
---|
1256 | 1207 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); |
---|
.. | .. |
---|
1296 | 1247 | |
---|
1297 | 1248 | static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev) |
---|
1298 | 1249 | { |
---|
1299 | | - if (adev->mman.buffer_funcs == NULL) { |
---|
1300 | | - adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs; |
---|
1301 | | - adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; |
---|
1302 | | - } |
---|
| 1250 | + adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs; |
---|
| 1251 | + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; |
---|
1303 | 1252 | } |
---|
1304 | 1253 | |
---|
1305 | 1254 | static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { |
---|
.. | .. |
---|
1314 | 1263 | { |
---|
1315 | 1264 | unsigned i; |
---|
1316 | 1265 | |
---|
1317 | | - if (adev->vm_manager.vm_pte_funcs == NULL) { |
---|
1318 | | - adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; |
---|
1319 | | - for (i = 0; i < adev->sdma.num_instances; i++) |
---|
1320 | | - adev->vm_manager.vm_pte_rings[i] = |
---|
1321 | | - &adev->sdma.instance[i].ring; |
---|
1322 | | - |
---|
1323 | | - adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; |
---|
| 1266 | + adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; |
---|
| 1267 | + for (i = 0; i < adev->sdma.num_instances; i++) { |
---|
| 1268 | + adev->vm_manager.vm_pte_scheds[i] = |
---|
| 1269 | + &adev->sdma.instance[i].ring.sched; |
---|
1324 | 1270 | } |
---|
| 1271 | + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; |
---|
1325 | 1272 | } |
---|
1326 | 1273 | |
---|
1327 | 1274 | const struct amdgpu_ip_block_version sdma_v2_4_ip_block = |
---|