| .. | .. |
|---|
| 21 | 21 | * |
|---|
| 22 | 22 | * Authors: Alex Deucher |
|---|
| 23 | 23 | */ |
|---|
| 24 | + |
|---|
| 25 | +#include <linux/delay.h> |
|---|
| 24 | 26 | #include <linux/firmware.h> |
|---|
| 25 | | -#include <drm/drmP.h> |
|---|
| 27 | +#include <linux/module.h> |
|---|
| 28 | + |
|---|
| 26 | 29 | #include "amdgpu.h" |
|---|
| 27 | 30 | #include "amdgpu_ucode.h" |
|---|
| 28 | 31 | #include "amdgpu_trace.h" |
|---|
| .. | .. |
|---|
| 225 | 228 | |
|---|
| 226 | 229 | static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) |
|---|
| 227 | 230 | { |
|---|
| 228 | | - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); |
|---|
| 231 | + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); |
|---|
| 229 | 232 | int i; |
|---|
| 230 | 233 | |
|---|
| 231 | 234 | for (i = 0; i < count; i++) |
|---|
| .. | .. |
|---|
| 245 | 248 | * Schedule an IB in the DMA ring (VI). |
|---|
| 246 | 249 | */ |
|---|
| 247 | 250 | static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, |
|---|
| 251 | + struct amdgpu_job *job, |
|---|
| 248 | 252 | struct amdgpu_ib *ib, |
|---|
| 249 | | - unsigned vmid, bool ctx_switch) |
|---|
| 253 | + uint32_t flags) |
|---|
| 250 | 254 | { |
|---|
| 255 | + unsigned vmid = AMDGPU_JOB_GET_VMID(job); |
|---|
| 256 | + |
|---|
| 251 | 257 | /* IB packet must end on a 8 DW boundary */ |
|---|
| 252 | | - sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); |
|---|
| 258 | + sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); |
|---|
| 253 | 259 | |
|---|
| 254 | 260 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | |
|---|
| 255 | 261 | SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); |
|---|
| .. | .. |
|---|
| 349 | 355 | ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); |
|---|
| 350 | 356 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); |
|---|
| 351 | 357 | } |
|---|
| 352 | | - sdma0->ready = false; |
|---|
| 353 | | - sdma1->ready = false; |
|---|
| 354 | 358 | } |
|---|
| 355 | 359 | |
|---|
| 356 | 360 | /** |
|---|
| .. | .. |
|---|
| 471 | 475 | /* enable DMA IBs */ |
|---|
| 472 | 476 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); |
|---|
| 473 | 477 | |
|---|
| 474 | | - ring->ready = true; |
|---|
| 478 | + ring->sched.ready = true; |
|---|
| 475 | 479 | } |
|---|
| 476 | 480 | |
|---|
| 477 | 481 | sdma_v2_4_enable(adev, true); |
|---|
| 478 | 482 | for (i = 0; i < adev->sdma.num_instances; i++) { |
|---|
| 479 | 483 | ring = &adev->sdma.instance[i].ring; |
|---|
| 480 | | - r = amdgpu_ring_test_ring(ring); |
|---|
| 481 | | - if (r) { |
|---|
| 482 | | - ring->ready = false; |
|---|
| 484 | + r = amdgpu_ring_test_helper(ring); |
|---|
| 485 | + if (r) |
|---|
| 483 | 486 | return r; |
|---|
| 484 | | - } |
|---|
| 485 | 487 | |
|---|
| 486 | 488 | if (adev->mman.buffer_funcs_ring == ring) |
|---|
| 487 | 489 | amdgpu_ttm_set_buffer_funcs_status(adev, true); |
|---|
| .. | .. |
|---|
| 504 | 506 | return 0; |
|---|
| 505 | 507 | } |
|---|
| 506 | 508 | |
|---|
| 507 | | -/** |
|---|
| 508 | | - * sdma_v2_4_load_microcode - load the sDMA ME ucode |
|---|
| 509 | | - * |
|---|
| 510 | | - * @adev: amdgpu_device pointer |
|---|
| 511 | | - * |
|---|
| 512 | | - * Loads the sDMA0/1 ucode. |
|---|
| 513 | | - * Returns 0 for success, -EINVAL if the ucode is not available. |
|---|
| 514 | | - */ |
|---|
| 515 | | -static int sdma_v2_4_load_microcode(struct amdgpu_device *adev) |
|---|
| 516 | | -{ |
|---|
| 517 | | - const struct sdma_firmware_header_v1_0 *hdr; |
|---|
| 518 | | - const __le32 *fw_data; |
|---|
| 519 | | - u32 fw_size; |
|---|
| 520 | | - int i, j; |
|---|
| 521 | | - |
|---|
| 522 | | - /* halt the MEs */ |
|---|
| 523 | | - sdma_v2_4_enable(adev, false); |
|---|
| 524 | | - |
|---|
| 525 | | - for (i = 0; i < adev->sdma.num_instances; i++) { |
|---|
| 526 | | - if (!adev->sdma.instance[i].fw) |
|---|
| 527 | | - return -EINVAL; |
|---|
| 528 | | - hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; |
|---|
| 529 | | - amdgpu_ucode_print_sdma_hdr(&hdr->header); |
|---|
| 530 | | - fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; |
|---|
| 531 | | - fw_data = (const __le32 *) |
|---|
| 532 | | - (adev->sdma.instance[i].fw->data + |
|---|
| 533 | | - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
|---|
| 534 | | - WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0); |
|---|
| 535 | | - for (j = 0; j < fw_size; j++) |
|---|
| 536 | | - WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++)); |
|---|
| 537 | | - WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma.instance[i].fw_version); |
|---|
| 538 | | - } |
|---|
| 539 | | - |
|---|
| 540 | | - return 0; |
|---|
| 541 | | -} |
|---|
| 542 | 509 | |
|---|
| 543 | 510 | /** |
|---|
| 544 | 511 | * sdma_v2_4_start - setup and start the async dma engines |
|---|
| .. | .. |
|---|
| 551 | 518 | static int sdma_v2_4_start(struct amdgpu_device *adev) |
|---|
| 552 | 519 | { |
|---|
| 553 | 520 | int r; |
|---|
| 554 | | - |
|---|
| 555 | | - |
|---|
| 556 | | - if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { |
|---|
| 557 | | - r = sdma_v2_4_load_microcode(adev); |
|---|
| 558 | | - if (r) |
|---|
| 559 | | - return r; |
|---|
| 560 | | - } |
|---|
| 561 | 521 | |
|---|
| 562 | 522 | /* halt the engine before programing */ |
|---|
| 563 | 523 | sdma_v2_4_enable(adev, false); |
|---|
| .. | .. |
|---|
| 592 | 552 | u64 gpu_addr; |
|---|
| 593 | 553 | |
|---|
| 594 | 554 | r = amdgpu_device_wb_get(adev, &index); |
|---|
| 595 | | - if (r) { |
|---|
| 596 | | - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); |
|---|
| 555 | + if (r) |
|---|
| 597 | 556 | return r; |
|---|
| 598 | | - } |
|---|
| 599 | 557 | |
|---|
| 600 | 558 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
|---|
| 601 | 559 | tmp = 0xCAFEDEAD; |
|---|
| 602 | 560 | adev->wb.wb[index] = cpu_to_le32(tmp); |
|---|
| 603 | 561 | |
|---|
| 604 | 562 | r = amdgpu_ring_alloc(ring, 5); |
|---|
| 605 | | - if (r) { |
|---|
| 606 | | - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); |
|---|
| 607 | | - amdgpu_device_wb_free(adev, index); |
|---|
| 608 | | - return r; |
|---|
| 609 | | - } |
|---|
| 563 | + if (r) |
|---|
| 564 | + goto error_free_wb; |
|---|
| 610 | 565 | |
|---|
| 611 | 566 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | |
|---|
| 612 | 567 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); |
|---|
| .. | .. |
|---|
| 620 | 575 | tmp = le32_to_cpu(adev->wb.wb[index]); |
|---|
| 621 | 576 | if (tmp == 0xDEADBEEF) |
|---|
| 622 | 577 | break; |
|---|
| 623 | | - DRM_UDELAY(1); |
|---|
| 578 | + udelay(1); |
|---|
| 624 | 579 | } |
|---|
| 625 | 580 | |
|---|
| 626 | | - if (i < adev->usec_timeout) { |
|---|
| 627 | | - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); |
|---|
| 628 | | - } else { |
|---|
| 629 | | - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", |
|---|
| 630 | | - ring->idx, tmp); |
|---|
| 631 | | - r = -EINVAL; |
|---|
| 632 | | - } |
|---|
| 581 | + if (i >= adev->usec_timeout) |
|---|
| 582 | + r = -ETIMEDOUT; |
|---|
| 583 | + |
|---|
| 584 | +error_free_wb: |
|---|
| 633 | 585 | amdgpu_device_wb_free(adev, index); |
|---|
| 634 | | - |
|---|
| 635 | 586 | return r; |
|---|
| 636 | 587 | } |
|---|
| 637 | 588 | |
|---|
| .. | .. |
|---|
| 654 | 605 | long r; |
|---|
| 655 | 606 | |
|---|
| 656 | 607 | r = amdgpu_device_wb_get(adev, &index); |
|---|
| 657 | | - if (r) { |
|---|
| 658 | | - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); |
|---|
| 608 | + if (r) |
|---|
| 659 | 609 | return r; |
|---|
| 660 | | - } |
|---|
| 661 | 610 | |
|---|
| 662 | 611 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
|---|
| 663 | 612 | tmp = 0xCAFEDEAD; |
|---|
| 664 | 613 | adev->wb.wb[index] = cpu_to_le32(tmp); |
|---|
| 665 | 614 | memset(&ib, 0, sizeof(ib)); |
|---|
| 666 | | - r = amdgpu_ib_get(adev, NULL, 256, &ib); |
|---|
| 667 | | - if (r) { |
|---|
| 668 | | - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); |
|---|
| 615 | + r = amdgpu_ib_get(adev, NULL, 256, |
|---|
| 616 | + AMDGPU_IB_POOL_DIRECT, &ib); |
|---|
| 617 | + if (r) |
|---|
| 669 | 618 | goto err0; |
|---|
| 670 | | - } |
|---|
| 671 | 619 | |
|---|
| 672 | 620 | ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | |
|---|
| 673 | 621 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); |
|---|
| .. | .. |
|---|
| 686 | 634 | |
|---|
| 687 | 635 | r = dma_fence_wait_timeout(f, false, timeout); |
|---|
| 688 | 636 | if (r == 0) { |
|---|
| 689 | | - DRM_ERROR("amdgpu: IB test timed out\n"); |
|---|
| 690 | 637 | r = -ETIMEDOUT; |
|---|
| 691 | 638 | goto err1; |
|---|
| 692 | 639 | } else if (r < 0) { |
|---|
| 693 | | - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); |
|---|
| 694 | 640 | goto err1; |
|---|
| 695 | 641 | } |
|---|
| 696 | 642 | tmp = le32_to_cpu(adev->wb.wb[index]); |
|---|
| 697 | | - if (tmp == 0xDEADBEEF) { |
|---|
| 698 | | - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); |
|---|
| 643 | + if (tmp == 0xDEADBEEF) |
|---|
| 699 | 644 | r = 0; |
|---|
| 700 | | - } else { |
|---|
| 701 | | - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); |
|---|
| 645 | + else |
|---|
| 702 | 646 | r = -EINVAL; |
|---|
| 703 | | - } |
|---|
| 704 | 647 | |
|---|
| 705 | 648 | err1: |
|---|
| 706 | 649 | amdgpu_ib_free(adev, &ib, NULL); |
|---|
| .. | .. |
|---|
| 802 | 745 | */ |
|---|
| 803 | 746 | static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) |
|---|
| 804 | 747 | { |
|---|
| 805 | | - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); |
|---|
| 748 | + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); |
|---|
| 806 | 749 | u32 pad_count; |
|---|
| 807 | 750 | int i; |
|---|
| 808 | 751 | |
|---|
| 809 | | - pad_count = (8 - (ib->length_dw & 0x7)) % 8; |
|---|
| 752 | + pad_count = (-ib->length_dw) & 7; |
|---|
| 810 | 753 | for (i = 0; i < pad_count; i++) |
|---|
| 811 | 754 | if (sdma && sdma->burst_nop && (i == 0)) |
|---|
| 812 | 755 | ib->ptr[ib->length_dw++] = |
|---|
| .. | .. |
|---|
| 898 | 841 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
|---|
| 899 | 842 | |
|---|
| 900 | 843 | /* SDMA trap event */ |
|---|
| 901 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, |
|---|
| 844 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, |
|---|
| 902 | 845 | &adev->sdma.trap_irq); |
|---|
| 903 | 846 | if (r) |
|---|
| 904 | 847 | return r; |
|---|
| 905 | 848 | |
|---|
| 906 | 849 | /* SDMA Privileged inst */ |
|---|
| 907 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241, |
|---|
| 850 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241, |
|---|
| 908 | 851 | &adev->sdma.illegal_inst_irq); |
|---|
| 909 | 852 | if (r) |
|---|
| 910 | 853 | return r; |
|---|
| 911 | 854 | |
|---|
| 912 | 855 | /* SDMA Privileged inst */ |
|---|
| 913 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE, |
|---|
| 856 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE, |
|---|
| 914 | 857 | &adev->sdma.illegal_inst_irq); |
|---|
| 915 | 858 | if (r) |
|---|
| 916 | 859 | return r; |
|---|
| .. | .. |
|---|
| 929 | 872 | r = amdgpu_ring_init(adev, ring, 1024, |
|---|
| 930 | 873 | &adev->sdma.trap_irq, |
|---|
| 931 | 874 | (i == 0) ? |
|---|
| 932 | | - AMDGPU_SDMA_IRQ_TRAP0 : |
|---|
| 933 | | - AMDGPU_SDMA_IRQ_TRAP1); |
|---|
| 875 | + AMDGPU_SDMA_IRQ_INSTANCE0 : |
|---|
| 876 | + AMDGPU_SDMA_IRQ_INSTANCE1, |
|---|
| 877 | + AMDGPU_RING_PRIO_DEFAULT); |
|---|
| 934 | 878 | if (r) |
|---|
| 935 | 879 | return r; |
|---|
| 936 | 880 | } |
|---|
| .. | .. |
|---|
| 1065 | 1009 | u32 sdma_cntl; |
|---|
| 1066 | 1010 | |
|---|
| 1067 | 1011 | switch (type) { |
|---|
| 1068 | | - case AMDGPU_SDMA_IRQ_TRAP0: |
|---|
| 1012 | + case AMDGPU_SDMA_IRQ_INSTANCE0: |
|---|
| 1069 | 1013 | switch (state) { |
|---|
| 1070 | 1014 | case AMDGPU_IRQ_STATE_DISABLE: |
|---|
| 1071 | 1015 | sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); |
|---|
| .. | .. |
|---|
| 1081 | 1025 | break; |
|---|
| 1082 | 1026 | } |
|---|
| 1083 | 1027 | break; |
|---|
| 1084 | | - case AMDGPU_SDMA_IRQ_TRAP1: |
|---|
| 1028 | + case AMDGPU_SDMA_IRQ_INSTANCE1: |
|---|
| 1085 | 1029 | switch (state) { |
|---|
| 1086 | 1030 | case AMDGPU_IRQ_STATE_DISABLE: |
|---|
| 1087 | 1031 | sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); |
|---|
| .. | .. |
|---|
| 1147 | 1091 | struct amdgpu_irq_src *source, |
|---|
| 1148 | 1092 | struct amdgpu_iv_entry *entry) |
|---|
| 1149 | 1093 | { |
|---|
| 1094 | + u8 instance_id, queue_id; |
|---|
| 1095 | + |
|---|
| 1150 | 1096 | DRM_ERROR("Illegal instruction in SDMA command stream\n"); |
|---|
| 1151 | | - schedule_work(&adev->reset_work); |
|---|
| 1097 | + instance_id = (entry->ring_id & 0x3) >> 0; |
|---|
| 1098 | + queue_id = (entry->ring_id & 0xc) >> 2; |
|---|
| 1099 | + |
|---|
| 1100 | + if (instance_id <= 1 && queue_id == 0) |
|---|
| 1101 | + drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched); |
|---|
| 1152 | 1102 | return 0; |
|---|
| 1153 | 1103 | } |
|---|
| 1154 | 1104 | |
|---|
| .. | .. |
|---|
| 1250 | 1200 | static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, |
|---|
| 1251 | 1201 | uint64_t src_offset, |
|---|
| 1252 | 1202 | uint64_t dst_offset, |
|---|
| 1253 | | - uint32_t byte_count) |
|---|
| 1203 | + uint32_t byte_count, |
|---|
| 1204 | + bool tmz) |
|---|
| 1254 | 1205 | { |
|---|
| 1255 | 1206 | ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | |
|---|
| 1256 | 1207 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); |
|---|
| .. | .. |
|---|
| 1296 | 1247 | |
|---|
| 1297 | 1248 | static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev) |
|---|
| 1298 | 1249 | { |
|---|
| 1299 | | - if (adev->mman.buffer_funcs == NULL) { |
|---|
| 1300 | | - adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs; |
|---|
| 1301 | | - adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; |
|---|
| 1302 | | - } |
|---|
| 1250 | + adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs; |
|---|
| 1251 | + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; |
|---|
| 1303 | 1252 | } |
|---|
| 1304 | 1253 | |
|---|
| 1305 | 1254 | static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { |
|---|
| .. | .. |
|---|
| 1314 | 1263 | { |
|---|
| 1315 | 1264 | unsigned i; |
|---|
| 1316 | 1265 | |
|---|
| 1317 | | - if (adev->vm_manager.vm_pte_funcs == NULL) { |
|---|
| 1318 | | - adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; |
|---|
| 1319 | | - for (i = 0; i < adev->sdma.num_instances; i++) |
|---|
| 1320 | | - adev->vm_manager.vm_pte_rings[i] = |
|---|
| 1321 | | - &adev->sdma.instance[i].ring; |
|---|
| 1322 | | - |
|---|
| 1323 | | - adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; |
|---|
| 1266 | + adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; |
|---|
| 1267 | + for (i = 0; i < adev->sdma.num_instances; i++) { |
|---|
| 1268 | + adev->vm_manager.vm_pte_scheds[i] = |
|---|
| 1269 | + &adev->sdma.instance[i].ring.sched; |
|---|
| 1324 | 1270 | } |
|---|
| 1271 | + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; |
|---|
| 1325 | 1272 | } |
|---|
| 1326 | 1273 | |
|---|
| 1327 | 1274 | const struct amdgpu_ip_block_version sdma_v2_4_ip_block = |
|---|