.. | .. |
---|
21 | 21 | * |
---|
22 | 22 | * Authors: Alex Deucher |
---|
23 | 23 | */ |
---|
24 | | -#include <drm/drmP.h> |
---|
| 24 | + |
---|
25 | 25 | #include "amdgpu.h" |
---|
26 | 26 | #include "amdgpu_trace.h" |
---|
27 | 27 | #include "si.h" |
---|
.. | .. |
---|
61 | 61 | } |
---|
62 | 62 | |
---|
63 | 63 | static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, |
---|
| 64 | + struct amdgpu_job *job, |
---|
64 | 65 | struct amdgpu_ib *ib, |
---|
65 | | - unsigned vmid, bool ctx_switch) |
---|
| 66 | + uint32_t flags) |
---|
66 | 67 | { |
---|
| 68 | + unsigned vmid = AMDGPU_JOB_GET_VMID(job); |
---|
67 | 69 | /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. |
---|
68 | 70 | * Pad as necessary with NOPs. |
---|
69 | 71 | */ |
---|
.. | .. |
---|
122 | 124 | |
---|
123 | 125 | if (adev->mman.buffer_funcs_ring == ring) |
---|
124 | 126 | amdgpu_ttm_set_buffer_funcs_status(adev, false); |
---|
125 | | - ring->ready = false; |
---|
126 | 127 | } |
---|
127 | 128 | } |
---|
128 | 129 | |
---|
.. | .. |
---|
175 | 176 | WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); |
---|
176 | 177 | WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); |
---|
177 | 178 | |
---|
178 | | - ring->ready = true; |
---|
| 179 | + ring->sched.ready = true; |
---|
179 | 180 | |
---|
180 | | - r = amdgpu_ring_test_ring(ring); |
---|
181 | | - if (r) { |
---|
182 | | - ring->ready = false; |
---|
| 181 | + r = amdgpu_ring_test_helper(ring); |
---|
| 182 | + if (r) |
---|
183 | 183 | return r; |
---|
184 | | - } |
---|
185 | 184 | |
---|
186 | 185 | if (adev->mman.buffer_funcs_ring == ring) |
---|
187 | 186 | amdgpu_ttm_set_buffer_funcs_status(adev, true); |
---|
.. | .. |
---|
209 | 208 | u64 gpu_addr; |
---|
210 | 209 | |
---|
211 | 210 | r = amdgpu_device_wb_get(adev, &index); |
---|
212 | | - if (r) { |
---|
213 | | - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); |
---|
| 211 | + if (r) |
---|
214 | 212 | return r; |
---|
215 | | - } |
---|
216 | 213 | |
---|
217 | 214 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
---|
218 | 215 | tmp = 0xCAFEDEAD; |
---|
219 | 216 | adev->wb.wb[index] = cpu_to_le32(tmp); |
---|
220 | 217 | |
---|
221 | 218 | r = amdgpu_ring_alloc(ring, 4); |
---|
222 | | - if (r) { |
---|
223 | | - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); |
---|
224 | | - amdgpu_device_wb_free(adev, index); |
---|
225 | | - return r; |
---|
226 | | - } |
---|
| 219 | + if (r) |
---|
| 220 | + goto error_free_wb; |
---|
227 | 221 | |
---|
228 | 222 | amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1)); |
---|
229 | 223 | amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); |
---|
.. | .. |
---|
235 | 229 | tmp = le32_to_cpu(adev->wb.wb[index]); |
---|
236 | 230 | if (tmp == 0xDEADBEEF) |
---|
237 | 231 | break; |
---|
238 | | - DRM_UDELAY(1); |
---|
| 232 | + udelay(1); |
---|
239 | 233 | } |
---|
240 | 234 | |
---|
241 | | - if (i < adev->usec_timeout) { |
---|
242 | | - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); |
---|
243 | | - } else { |
---|
244 | | - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", |
---|
245 | | - ring->idx, tmp); |
---|
246 | | - r = -EINVAL; |
---|
247 | | - } |
---|
| 235 | + if (i >= adev->usec_timeout) |
---|
| 236 | + r = -ETIMEDOUT; |
---|
| 237 | + |
---|
| 238 | +error_free_wb: |
---|
248 | 239 | amdgpu_device_wb_free(adev, index); |
---|
249 | | - |
---|
250 | 240 | return r; |
---|
251 | 241 | } |
---|
252 | 242 | |
---|
.. | .. |
---|
269 | 259 | long r; |
---|
270 | 260 | |
---|
271 | 261 | r = amdgpu_device_wb_get(adev, &index); |
---|
272 | | - if (r) { |
---|
273 | | - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); |
---|
| 262 | + if (r) |
---|
274 | 263 | return r; |
---|
275 | | - } |
---|
276 | 264 | |
---|
277 | 265 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
---|
278 | 266 | tmp = 0xCAFEDEAD; |
---|
279 | 267 | adev->wb.wb[index] = cpu_to_le32(tmp); |
---|
280 | 268 | memset(&ib, 0, sizeof(ib)); |
---|
281 | | - r = amdgpu_ib_get(adev, NULL, 256, &ib); |
---|
282 | | - if (r) { |
---|
283 | | - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); |
---|
| 269 | + r = amdgpu_ib_get(adev, NULL, 256, |
---|
| 270 | + AMDGPU_IB_POOL_DIRECT, &ib); |
---|
| 271 | + if (r) |
---|
284 | 272 | goto err0; |
---|
285 | | - } |
---|
286 | 273 | |
---|
287 | 274 | ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1); |
---|
288 | 275 | ib.ptr[1] = lower_32_bits(gpu_addr); |
---|
.. | .. |
---|
295 | 282 | |
---|
296 | 283 | r = dma_fence_wait_timeout(f, false, timeout); |
---|
297 | 284 | if (r == 0) { |
---|
298 | | - DRM_ERROR("amdgpu: IB test timed out\n"); |
---|
299 | 285 | r = -ETIMEDOUT; |
---|
300 | 286 | goto err1; |
---|
301 | 287 | } else if (r < 0) { |
---|
302 | | - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); |
---|
303 | 288 | goto err1; |
---|
304 | 289 | } |
---|
305 | 290 | tmp = le32_to_cpu(adev->wb.wb[index]); |
---|
306 | | - if (tmp == 0xDEADBEEF) { |
---|
307 | | - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); |
---|
| 291 | + if (tmp == 0xDEADBEEF) |
---|
308 | 292 | r = 0; |
---|
309 | | - } else { |
---|
310 | | - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); |
---|
| 293 | + else |
---|
311 | 294 | r = -EINVAL; |
---|
312 | | - } |
---|
313 | 295 | |
---|
314 | 296 | err1: |
---|
315 | 297 | amdgpu_ib_free(adev, &ib, NULL); |
---|
.. | .. |
---|
502 | 484 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
---|
503 | 485 | |
---|
504 | 486 | /* DMA0 trap event */ |
---|
505 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224, &adev->sdma.trap_irq); |
---|
| 487 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224, |
---|
| 488 | + &adev->sdma.trap_irq); |
---|
506 | 489 | if (r) |
---|
507 | 490 | return r; |
---|
508 | 491 | |
---|
509 | 492 | /* DMA1 trap event */ |
---|
510 | | - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 244, &adev->sdma.trap_irq_1); |
---|
| 493 | + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 244, |
---|
| 494 | + &adev->sdma.trap_irq); |
---|
511 | 495 | if (r) |
---|
512 | 496 | return r; |
---|
513 | 497 | |
---|
.. | .. |
---|
519 | 503 | r = amdgpu_ring_init(adev, ring, 1024, |
---|
520 | 504 | &adev->sdma.trap_irq, |
---|
521 | 505 | (i == 0) ? |
---|
522 | | - AMDGPU_SDMA_IRQ_TRAP0 : |
---|
523 | | - AMDGPU_SDMA_IRQ_TRAP1); |
---|
| 506 | + AMDGPU_SDMA_IRQ_INSTANCE0 : |
---|
| 507 | + AMDGPU_SDMA_IRQ_INSTANCE1, |
---|
| 508 | + AMDGPU_RING_PRIO_DEFAULT); |
---|
524 | 509 | if (r) |
---|
525 | 510 | return r; |
---|
526 | 511 | } |
---|
.. | .. |
---|
607 | 592 | u32 sdma_cntl; |
---|
608 | 593 | |
---|
609 | 594 | switch (type) { |
---|
610 | | - case AMDGPU_SDMA_IRQ_TRAP0: |
---|
| 595 | + case AMDGPU_SDMA_IRQ_INSTANCE0: |
---|
611 | 596 | switch (state) { |
---|
612 | 597 | case AMDGPU_IRQ_STATE_DISABLE: |
---|
613 | 598 | sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET); |
---|
.. | .. |
---|
623 | 608 | break; |
---|
624 | 609 | } |
---|
625 | 610 | break; |
---|
626 | | - case AMDGPU_SDMA_IRQ_TRAP1: |
---|
| 611 | + case AMDGPU_SDMA_IRQ_INSTANCE1: |
---|
627 | 612 | switch (state) { |
---|
628 | 613 | case AMDGPU_IRQ_STATE_DISABLE: |
---|
629 | 614 | sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET); |
---|
.. | .. |
---|
649 | 634 | struct amdgpu_irq_src *source, |
---|
650 | 635 | struct amdgpu_iv_entry *entry) |
---|
651 | 636 | { |
---|
652 | | - amdgpu_fence_process(&adev->sdma.instance[0].ring); |
---|
653 | | - |
---|
654 | | - return 0; |
---|
655 | | -} |
---|
656 | | - |
---|
657 | | -static int si_dma_process_trap_irq_1(struct amdgpu_device *adev, |
---|
658 | | - struct amdgpu_irq_src *source, |
---|
659 | | - struct amdgpu_iv_entry *entry) |
---|
660 | | -{ |
---|
661 | | - amdgpu_fence_process(&adev->sdma.instance[1].ring); |
---|
662 | | - |
---|
663 | | - return 0; |
---|
664 | | -} |
---|
665 | | - |
---|
666 | | -static int si_dma_process_illegal_inst_irq(struct amdgpu_device *adev, |
---|
667 | | - struct amdgpu_irq_src *source, |
---|
668 | | - struct amdgpu_iv_entry *entry) |
---|
669 | | -{ |
---|
670 | | - DRM_ERROR("Illegal instruction in SDMA command stream\n"); |
---|
671 | | - schedule_work(&adev->reset_work); |
---|
| 637 | + if (entry->src_id == 224) |
---|
| 638 | + amdgpu_fence_process(&adev->sdma.instance[0].ring); |
---|
| 639 | + else |
---|
| 640 | + amdgpu_fence_process(&adev->sdma.instance[1].ring); |
---|
672 | 641 | return 0; |
---|
673 | 642 | } |
---|
674 | 643 | |
---|
.. | .. |
---|
680 | 649 | bool enable; |
---|
681 | 650 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
---|
682 | 651 | |
---|
683 | | - enable = (state == AMD_CG_STATE_GATE) ? true : false; |
---|
| 652 | + enable = (state == AMD_CG_STATE_GATE); |
---|
684 | 653 | |
---|
685 | 654 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { |
---|
686 | 655 | for (i = 0; i < adev->sdma.num_instances; i++) { |
---|
.. | .. |
---|
786 | 755 | .process = si_dma_process_trap_irq, |
---|
787 | 756 | }; |
---|
788 | 757 | |
---|
789 | | -static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs_1 = { |
---|
790 | | - .set = si_dma_set_trap_irq_state, |
---|
791 | | - .process = si_dma_process_trap_irq_1, |
---|
792 | | -}; |
---|
793 | | - |
---|
794 | | -static const struct amdgpu_irq_src_funcs si_dma_illegal_inst_irq_funcs = { |
---|
795 | | - .process = si_dma_process_illegal_inst_irq, |
---|
796 | | -}; |
---|
797 | | - |
---|
798 | 758 | static void si_dma_set_irq_funcs(struct amdgpu_device *adev) |
---|
799 | 759 | { |
---|
800 | 760 | adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; |
---|
801 | 761 | adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs; |
---|
802 | | - adev->sdma.trap_irq_1.funcs = &si_dma_trap_irq_funcs_1; |
---|
803 | | - adev->sdma.illegal_inst_irq.funcs = &si_dma_illegal_inst_irq_funcs; |
---|
804 | 762 | } |
---|
805 | 763 | |
---|
806 | 764 | /** |
---|
.. | .. |
---|
818 | 776 | static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib, |
---|
819 | 777 | uint64_t src_offset, |
---|
820 | 778 | uint64_t dst_offset, |
---|
821 | | - uint32_t byte_count) |
---|
| 779 | + uint32_t byte_count, |
---|
| 780 | + bool tmz) |
---|
822 | 781 | { |
---|
823 | 782 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, |
---|
824 | 783 | 1, 0, 0, byte_count); |
---|
.. | .. |
---|
863 | 822 | |
---|
864 | 823 | static void si_dma_set_buffer_funcs(struct amdgpu_device *adev) |
---|
865 | 824 | { |
---|
866 | | - if (adev->mman.buffer_funcs == NULL) { |
---|
867 | | - adev->mman.buffer_funcs = &si_dma_buffer_funcs; |
---|
868 | | - adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; |
---|
869 | | - } |
---|
| 825 | + adev->mman.buffer_funcs = &si_dma_buffer_funcs; |
---|
| 826 | + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; |
---|
870 | 827 | } |
---|
871 | 828 | |
---|
872 | 829 | static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { |
---|
.. | .. |
---|
881 | 838 | { |
---|
882 | 839 | unsigned i; |
---|
883 | 840 | |
---|
884 | | - if (adev->vm_manager.vm_pte_funcs == NULL) { |
---|
885 | | - adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs; |
---|
886 | | - for (i = 0; i < adev->sdma.num_instances; i++) |
---|
887 | | - adev->vm_manager.vm_pte_rings[i] = |
---|
888 | | - &adev->sdma.instance[i].ring; |
---|
889 | | - |
---|
890 | | - adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; |
---|
| 841 | + adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs; |
---|
| 842 | + for (i = 0; i < adev->sdma.num_instances; i++) { |
---|
| 843 | + adev->vm_manager.vm_pte_scheds[i] = |
---|
| 844 | + &adev->sdma.instance[i].ring.sched; |
---|
891 | 845 | } |
---|
| 846 | + adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; |
---|
892 | 847 | } |
---|
893 | 848 | |
---|
894 | 849 | const struct amdgpu_ip_block_version si_dma_ip_block = |
---|