| .. | .. |
|---|
| 25 | 25 | */ |
|---|
| 26 | 26 | |
|---|
| 27 | 27 | #include <linux/firmware.h> |
|---|
| 28 | | -#include <drm/drmP.h> |
|---|
| 28 | + |
|---|
| 29 | 29 | #include "amdgpu.h" |
|---|
| 30 | 30 | #include "amdgpu_vce.h" |
|---|
| 31 | 31 | #include "soc15.h" |
|---|
| .. | .. |
|---|
| 244 | 244 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); |
|---|
| 245 | 245 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); |
|---|
| 246 | 246 | |
|---|
| 247 | + offset = AMDGPU_VCE_FIRMWARE_OFFSET; |
|---|
| 247 | 248 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { |
|---|
| 249 | + uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; |
|---|
| 250 | + uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi; |
|---|
| 251 | + uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low; |
|---|
| 252 | + |
|---|
| 248 | 253 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, |
|---|
| 249 | | - mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), |
|---|
| 250 | | - adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); |
|---|
| 254 | + mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8); |
|---|
| 251 | 255 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, |
|---|
| 252 | 256 | mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), |
|---|
| 253 | | - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); |
|---|
| 257 | + (tmr_mc_addr >> 40) & 0xff); |
|---|
| 258 | + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); |
|---|
| 254 | 259 | } else { |
|---|
| 255 | 260 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, |
|---|
| 256 | 261 | mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), |
|---|
| .. | .. |
|---|
| 258 | 263 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, |
|---|
| 259 | 264 | mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), |
|---|
| 260 | 265 | (adev->vce.gpu_addr >> 40) & 0xff); |
|---|
| 266 | + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), |
|---|
| 267 | + offset & ~0x0f000000); |
|---|
| 268 | + |
|---|
| 261 | 269 | } |
|---|
| 262 | 270 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, |
|---|
| 263 | 271 | mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), |
|---|
| .. | .. |
|---|
| 272 | 280 | mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), |
|---|
| 273 | 281 | (adev->vce.gpu_addr >> 40) & 0xff); |
|---|
| 274 | 282 | |
|---|
| 275 | | - offset = AMDGPU_VCE_FIRMWARE_OFFSET; |
|---|
| 276 | 283 | size = VCE_V4_0_FW_SIZE; |
|---|
| 277 | | - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), |
|---|
| 278 | | - offset & ~0x0f000000); |
|---|
| 279 | 284 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); |
|---|
| 280 | 285 | |
|---|
| 281 | 286 | offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; |
|---|
| .. | .. |
|---|
| 382 | 387 | static int vce_v4_0_stop(struct amdgpu_device *adev) |
|---|
| 383 | 388 | { |
|---|
| 384 | 389 | |
|---|
| 390 | + /* Disable VCPU */ |
|---|
| 385 | 391 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); |
|---|
| 386 | 392 | |
|---|
| 387 | 393 | /* hold on ECPU */ |
|---|
| .. | .. |
|---|
| 389 | 395 | VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, |
|---|
| 390 | 396 | ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); |
|---|
| 391 | 397 | |
|---|
| 392 | | - /* clear BUSY flag */ |
|---|
| 393 | | - WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); |
|---|
| 398 | + /* clear VCE_STATUS */ |
|---|
| 399 | + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0); |
|---|
| 394 | 400 | |
|---|
| 395 | 401 | /* Set Clock-Gating off */ |
|---|
| 396 | 402 | /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) |
|---|
| .. | .. |
|---|
| 466 | 472 | * so set unused location for other unused rings. |
|---|
| 467 | 473 | */ |
|---|
| 468 | 474 | if (i == 0) |
|---|
| 469 | | - ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2; |
|---|
| 475 | + ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2; |
|---|
| 470 | 476 | else |
|---|
| 471 | | - ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1; |
|---|
| 477 | + ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1; |
|---|
| 472 | 478 | } |
|---|
| 473 | | - r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); |
|---|
| 479 | + r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0, |
|---|
| 480 | + AMDGPU_RING_PRIO_DEFAULT); |
|---|
| 474 | 481 | if (r) |
|---|
| 475 | 482 | return r; |
|---|
| 476 | 483 | } |
|---|
| .. | .. |
|---|
| 519 | 526 | if (r) |
|---|
| 520 | 527 | return r; |
|---|
| 521 | 528 | |
|---|
| 522 | | - for (i = 0; i < adev->vce.num_rings; i++) |
|---|
| 523 | | - adev->vce.ring[i].ready = false; |
|---|
| 524 | | - |
|---|
| 525 | 529 | for (i = 0; i < adev->vce.num_rings; i++) { |
|---|
| 526 | | - r = amdgpu_ring_test_ring(&adev->vce.ring[i]); |
|---|
| 530 | + r = amdgpu_ring_test_helper(&adev->vce.ring[i]); |
|---|
| 527 | 531 | if (r) |
|---|
| 528 | 532 | return r; |
|---|
| 529 | | - else |
|---|
| 530 | | - adev->vce.ring[i].ready = true; |
|---|
| 531 | 533 | } |
|---|
| 532 | 534 | |
|---|
| 533 | 535 | DRM_INFO("VCE initialized successfully.\n"); |
|---|
| .. | .. |
|---|
| 538 | 540 | static int vce_v4_0_hw_fini(void *handle) |
|---|
| 539 | 541 | { |
|---|
| 540 | 542 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
|---|
| 541 | | - int i; |
|---|
| 542 | 543 | |
|---|
| 543 | 544 | if (!amdgpu_sriov_vf(adev)) { |
|---|
| 544 | 545 | /* vce_v4_0_wait_for_idle(handle); */ |
|---|
| .. | .. |
|---|
| 547 | 548 | /* full access mode, so don't touch any VCE register */ |
|---|
| 548 | 549 | DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); |
|---|
| 549 | 550 | } |
|---|
| 550 | | - |
|---|
| 551 | | - for (i = 0; i < adev->vce.num_rings; i++) |
|---|
| 552 | | - adev->vce.ring[i].ready = false; |
|---|
| 553 | 551 | |
|---|
| 554 | 552 | return 0; |
|---|
| 555 | 553 | } |
|---|
| .. | .. |
|---|
| 601 | 599 | static void vce_v4_0_mc_resume(struct amdgpu_device *adev) |
|---|
| 602 | 600 | { |
|---|
| 603 | 601 | uint32_t offset, size; |
|---|
| 602 | + uint64_t tmr_mc_addr; |
|---|
| 604 | 603 | |
|---|
| 605 | 604 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); |
|---|
| 606 | 605 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); |
|---|
| .. | .. |
|---|
| 613 | 612 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); |
|---|
| 614 | 613 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); |
|---|
| 615 | 614 | |
|---|
| 615 | + offset = AMDGPU_VCE_FIRMWARE_OFFSET; |
|---|
| 616 | + |
|---|
| 616 | 617 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { |
|---|
| 618 | + tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 | |
|---|
| 619 | + adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; |
|---|
| 617 | 620 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), |
|---|
| 618 | | - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8)); |
|---|
| 621 | + (tmr_mc_addr >> 8)); |
|---|
| 619 | 622 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), |
|---|
| 620 | | - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); |
|---|
| 623 | + (tmr_mc_addr >> 40) & 0xff); |
|---|
| 624 | + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); |
|---|
| 621 | 625 | } else { |
|---|
| 622 | 626 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), |
|---|
| 623 | 627 | (adev->vce.gpu_addr >> 8)); |
|---|
| 624 | 628 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), |
|---|
| 625 | 629 | (adev->vce.gpu_addr >> 40) & 0xff); |
|---|
| 630 | + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); |
|---|
| 626 | 631 | } |
|---|
| 627 | 632 | |
|---|
| 628 | | - offset = AMDGPU_VCE_FIRMWARE_OFFSET; |
|---|
| 629 | 633 | size = VCE_V4_0_FW_SIZE; |
|---|
| 630 | | - WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); |
|---|
| 631 | 634 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); |
|---|
| 632 | 635 | |
|---|
| 633 | 636 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); |
|---|
| .. | .. |
|---|
| 881 | 884 | enum amd_clockgating_state state) |
|---|
| 882 | 885 | { |
|---|
| 883 | 886 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
|---|
| 884 | | - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; |
|---|
| 887 | + bool enable = (state == AMD_CG_STATE_GATE); |
|---|
| 885 | 888 | int i; |
|---|
| 886 | 889 | |
|---|
| 887 | 890 | if ((adev->asic_type == CHIP_POLARIS10) || |
|---|
| .. | .. |
|---|
| 922 | 925 | |
|---|
| 923 | 926 | return 0; |
|---|
| 924 | 927 | } |
|---|
| 928 | +#endif |
|---|
| 925 | 929 | |
|---|
| 926 | 930 | static int vce_v4_0_set_powergating_state(void *handle, |
|---|
| 927 | 931 | enum amd_powergating_state state) |
|---|
| .. | .. |
|---|
| 935 | 939 | */ |
|---|
| 936 | 940 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
|---|
| 937 | 941 | |
|---|
| 938 | | - if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE)) |
|---|
| 939 | | - return 0; |
|---|
| 940 | | - |
|---|
| 941 | 942 | if (state == AMD_PG_STATE_GATE) |
|---|
| 942 | | - /* XXX do we need a vce_v4_0_stop()? */ |
|---|
| 943 | | - return 0; |
|---|
| 943 | + return vce_v4_0_stop(adev); |
|---|
| 944 | 944 | else |
|---|
| 945 | 945 | return vce_v4_0_start(adev); |
|---|
| 946 | 946 | } |
|---|
| 947 | | -#endif |
|---|
| 948 | 947 | |
|---|
| 949 | | -static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, |
|---|
| 950 | | - struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) |
|---|
| 948 | +static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, |
|---|
| 949 | + struct amdgpu_ib *ib, uint32_t flags) |
|---|
| 951 | 950 | { |
|---|
| 951 | + unsigned vmid = AMDGPU_JOB_GET_VMID(job); |
|---|
| 952 | + |
|---|
| 952 | 953 | amdgpu_ring_write(ring, VCE_CMD_IB_VM); |
|---|
| 953 | 954 | amdgpu_ring_write(ring, vmid); |
|---|
| 954 | 955 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); |
|---|
| .. | .. |
|---|
| 990 | 991 | pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); |
|---|
| 991 | 992 | |
|---|
| 992 | 993 | /* wait for reg writes */ |
|---|
| 993 | | - vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, |
|---|
| 994 | + vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + |
|---|
| 995 | + vmid * hub->ctx_addr_distance, |
|---|
| 994 | 996 | lower_32_bits(pd_addr), 0xffffffff); |
|---|
| 995 | 997 | } |
|---|
| 996 | 998 | |
|---|
| .. | .. |
|---|
| 1057 | 1059 | .soft_reset = NULL /* vce_v4_0_soft_reset */, |
|---|
| 1058 | 1060 | .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */, |
|---|
| 1059 | 1061 | .set_clockgating_state = vce_v4_0_set_clockgating_state, |
|---|
| 1060 | | - .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */, |
|---|
| 1062 | + .set_powergating_state = vce_v4_0_set_powergating_state, |
|---|
| 1061 | 1063 | }; |
|---|
| 1062 | 1064 | |
|---|
| 1063 | 1065 | static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { |
|---|
| .. | .. |
|---|
| 1065 | 1067 | .align_mask = 0x3f, |
|---|
| 1066 | 1068 | .nop = VCE_CMD_NO_OP, |
|---|
| 1067 | 1069 | .support_64bit_ptrs = false, |
|---|
| 1068 | | - .vmhub = AMDGPU_MMHUB, |
|---|
| 1070 | + .no_user_fence = true, |
|---|
| 1071 | + .vmhub = AMDGPU_MMHUB_0, |
|---|
| 1069 | 1072 | .get_rptr = vce_v4_0_ring_get_rptr, |
|---|
| 1070 | 1073 | .get_wptr = vce_v4_0_ring_get_wptr, |
|---|
| 1071 | 1074 | .set_wptr = vce_v4_0_ring_set_wptr, |
|---|