From d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 11 Dec 2023 02:45:28 +0000 Subject: [PATCH] add boot partition size --- kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 192 +++++++++++++++++++++++++++++++++++++++++++++--- 1 files changed, 180 insertions(+), 12 deletions(-) diff --git a/kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 0b0b863..1769115 100644 --- a/kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -24,12 +24,19 @@ #ifndef __AMDGPU_VCN_H__ #define __AMDGPU_VCN_H__ -#define AMDGPU_VCN_STACK_SIZE (200*1024) -#define AMDGPU_VCN_HEAP_SIZE (256*1024) -#define AMDGPU_VCN_SESSION_SIZE (50*1024) +#define AMDGPU_VCN_STACK_SIZE (128*1024) +#define AMDGPU_VCN_CONTEXT_SIZE (512*1024) + #define AMDGPU_VCN_FIRMWARE_OFFSET 256 #define AMDGPU_VCN_MAX_ENC_RINGS 3 +#define AMDGPU_MAX_VCN_INSTANCES 2 +#define AMDGPU_MAX_VCN_ENC_RINGS AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES + +#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0) +#define AMDGPU_VCN_HARVEST_VCN1 (1 << 1) + +#define VCN_DEC_KMD_CMD 0x80000000 #define VCN_DEC_CMD_FENCE 0x00000000 #define VCN_DEC_CMD_TRAP 0x00000001 #define VCN_DEC_CMD_WRITE_REG 0x00000004 @@ -45,8 +52,109 @@ #define VCN_ENC_CMD_REG_WRITE 0x0000000b #define VCN_ENC_CMD_REG_WAIT 0x0000000c +#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 +#define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN_AON_SOC_ADDRESS_2_0 0x1f800 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 +#define VCN_VID_IP_ADDRESS_2_0 0x0 +#define VCN_AON_IP_ADDRESS_2_0 0x30000 + +#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b +#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 +#define mmUVD_REG_XX_MASK 0x026c +#define mmUVD_REG_XX_MASK_BASE_IDX 1 + +/* 1 second timeout */ +#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) + +#define RREG32_SOC15_DPG_MODE_1_0(ip, inst_idx, reg, mask, sram_sel) \ + ({ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \ + UVD_DPG_LMA_CTL__MASK_EN_MASK | \ + ((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) \ + << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ + (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + RREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA); \ + }) + +#define WREG32_SOC15_DPG_MODE_1_0(ip, inst_idx, reg, value, mask, sram_sel) \ + do { \ + WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \ + UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ + ((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) \ + << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ + (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + } while (0) + +#define SOC15_DPG_MODE_OFFSET(ip, inst_idx, reg) \ + ({ \ + uint32_t internal_reg_offset, addr; \ + bool video_range, video1_range, aon_range, aon1_range; \ + \ + addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \ + addr <<= 2; \ + video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS_2_0)) && \ + ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS_2_0 + 0x2600))))); \ + video1_range = ((((0xFFFFF & addr) >= (VCN1_VID_SOC_ADDRESS_3_0)) && \ + ((0xFFFFF & addr) < ((VCN1_VID_SOC_ADDRESS_3_0 + 0x2600))))); \ + aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS_2_0)) && \ + ((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS_2_0 + 0x600))))); \ + aon1_range = ((((0xFFFFF & addr) >= (VCN1_AON_SOC_ADDRESS_3_0)) && \ + ((0xFFFFF & addr) < ((VCN1_AON_SOC_ADDRESS_3_0 + 0x600))))); \ + if (video_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS_2_0) + \ + (VCN_VID_IP_ADDRESS_2_0)); \ + else if (aon_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS_2_0) + \ + (VCN_AON_IP_ADDRESS_2_0)); \ + else if (video1_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN1_VID_SOC_ADDRESS_3_0) + \ + (VCN_VID_IP_ADDRESS_2_0)); \ + else if (aon1_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN1_AON_SOC_ADDRESS_3_0) + \ + (VCN_AON_IP_ADDRESS_2_0)); \ + else \ + internal_reg_offset = (0xFFFFF & addr); \ + \ + internal_reg_offset >>= 2; \ + }) + +#define RREG32_SOC15_DPG_MODE(inst_idx, offset, mask_en) \ + ({ \ + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \ + (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \ + }) + +#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \ + do { \ + if (!indirect) { \ + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \ + (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + } else { \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset; \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value; \ + } \ + } while (0) + +#define AMDGPU_VCN_MULTI_QUEUE_FLAG (1 << 8) + +enum fw_queue_mode { + FW_QUEUE_RING_RESET = 1, + FW_QUEUE_DPG_HOLD_OFF = 2, +}; + enum engine_status_constants { UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0, + UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0 = 0xAAAA0, + UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0 = 0x2A2A8AA0, UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002, UVD_STATUS__UVD_BUSY = 0x00000004, GB_ADDR_CONFIG_DEFAULT = 0x26010011, @@ -54,22 +162,85 @@ UVD_STATUS__BUSY = 0x5, UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF = 0x1, UVD_STATUS__RBC_BUSY = 0x1, + UVD_PGFSM_STATUS_UVDJ_PWR_ON = 0, }; -struct amdgpu_vcn { +enum internal_dpg_state { + VCN_DPG_STATE__UNPAUSE = 0, + VCN_DPG_STATE__PAUSE, +}; + +struct dpg_pause_state { + enum internal_dpg_state fw_based; + enum internal_dpg_state jpeg; +}; + +struct amdgpu_vcn_reg{ + unsigned data0; + unsigned data1; + unsigned cmd; + unsigned nop; + unsigned context_id; + unsigned ib_vmid; + unsigned ib_bar_low; + unsigned ib_bar_high; + unsigned ib_size; + unsigned gp_scratch8; + unsigned scratch9; +}; + +struct amdgpu_vcn_inst { struct amdgpu_bo *vcpu_bo; void *cpu_addr; uint64_t gpu_addr; - unsigned fw_version; void *saved_bo; - struct delayed_work idle_work; - const struct firmware *fw; /* VCN firmware */ struct amdgpu_ring ring_dec; struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; - struct amdgpu_ring ring_jpeg; struct amdgpu_irq_src irq; - unsigned num_enc_rings; + struct amdgpu_vcn_reg external; + struct amdgpu_bo *dpg_sram_bo; + struct dpg_pause_state pause_state; + void *dpg_sram_cpu_addr; + uint64_t dpg_sram_gpu_addr; + uint32_t *dpg_sram_curr_addr; + atomic_t dpg_enc_submission_cnt; + void *fw_shared_cpu_addr; + uint64_t fw_shared_gpu_addr; }; + +struct amdgpu_vcn { + unsigned fw_version; + struct delayed_work idle_work; + const struct firmware *fw; /* VCN firmware */ + unsigned num_enc_rings; + enum amd_powergating_state cur_state; + bool indirect_sram; + + uint8_t num_vcn_inst; + struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; + struct amdgpu_vcn_reg internal; + struct mutex vcn_pg_lock; + struct mutex vcn1_jpeg1_workaround; + atomic_t total_submission_cnt; + + unsigned harvest_config; + int (*pause_dpg_mode)(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state); +}; + +struct amdgpu_fw_shared_multi_queue { + uint8_t decode_queue_mode; + uint8_t encode_generalpurpose_queue_mode; + uint8_t encode_lowlatency_queue_mode; + uint8_t encode_realtime_queue_mode; + uint8_t padding[4]; +}; + +struct amdgpu_fw_shared { + uint32_t present_flag_0; + uint8_t pad[53]; + struct amdgpu_fw_shared_multi_queue multi_queue; +} __attribute__((__packed__)); int amdgpu_vcn_sw_init(struct amdgpu_device *adev); int amdgpu_vcn_sw_fini(struct amdgpu_device *adev); @@ -83,8 +254,5 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring); int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout); - -int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring); -int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout); #endif -- Gitblit v1.6.2