hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
....@@ -24,12 +24,19 @@
2424 #ifndef __AMDGPU_VCN_H__
2525 #define __AMDGPU_VCN_H__
2626
27
-#define AMDGPU_VCN_STACK_SIZE (200*1024)
28
-#define AMDGPU_VCN_HEAP_SIZE (256*1024)
29
-#define AMDGPU_VCN_SESSION_SIZE (50*1024)
27
+#define AMDGPU_VCN_STACK_SIZE (128*1024)
28
+#define AMDGPU_VCN_CONTEXT_SIZE (512*1024)
29
+
3030 #define AMDGPU_VCN_FIRMWARE_OFFSET 256
3131 #define AMDGPU_VCN_MAX_ENC_RINGS 3
3232
33
+#define AMDGPU_MAX_VCN_INSTANCES 2
34
+#define AMDGPU_MAX_VCN_ENC_RINGS AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES
35
+
36
+#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0)
37
+#define AMDGPU_VCN_HARVEST_VCN1 (1 << 1)
38
+
39
+#define VCN_DEC_KMD_CMD 0x80000000
3340 #define VCN_DEC_CMD_FENCE 0x00000000
3441 #define VCN_DEC_CMD_TRAP 0x00000001
3542 #define VCN_DEC_CMD_WRITE_REG 0x00000004
....@@ -45,8 +52,109 @@
4552 #define VCN_ENC_CMD_REG_WRITE 0x0000000b
4653 #define VCN_ENC_CMD_REG_WAIT 0x0000000c
4754
55
+#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
56
+#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
57
+#define VCN_AON_SOC_ADDRESS_2_0 0x1f800
58
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
59
+#define VCN_VID_IP_ADDRESS_2_0 0x0
60
+#define VCN_AON_IP_ADDRESS_2_0 0x30000
61
+
62
+#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b
63
+#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1
64
+#define mmUVD_REG_XX_MASK 0x026c
65
+#define mmUVD_REG_XX_MASK_BASE_IDX 1
66
+
67
+/* 1 second timeout */
68
+#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000)
69
+
70
+#define RREG32_SOC15_DPG_MODE_1_0(ip, inst_idx, reg, mask, sram_sel) \
71
+ ({ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \
72
+ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \
73
+ UVD_DPG_LMA_CTL__MASK_EN_MASK | \
74
+ ((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) \
75
+ << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
76
+ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
77
+ RREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA); \
78
+ })
79
+
80
+#define WREG32_SOC15_DPG_MODE_1_0(ip, inst_idx, reg, value, mask, sram_sel) \
81
+ do { \
82
+ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA, value); \
83
+ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \
84
+ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \
85
+ UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
86
+ ((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) \
87
+ << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
88
+ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
89
+ } while (0)
90
+
91
+#define SOC15_DPG_MODE_OFFSET(ip, inst_idx, reg) \
92
+ ({ \
93
+ uint32_t internal_reg_offset, addr; \
94
+ bool video_range, video1_range, aon_range, aon1_range; \
95
+ \
96
+ addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \
97
+ addr <<= 2; \
98
+ video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS_2_0)) && \
99
+ ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS_2_0 + 0x2600))))); \
100
+ video1_range = ((((0xFFFFF & addr) >= (VCN1_VID_SOC_ADDRESS_3_0)) && \
101
+ ((0xFFFFF & addr) < ((VCN1_VID_SOC_ADDRESS_3_0 + 0x2600))))); \
102
+ aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS_2_0)) && \
103
+ ((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS_2_0 + 0x600))))); \
104
+ aon1_range = ((((0xFFFFF & addr) >= (VCN1_AON_SOC_ADDRESS_3_0)) && \
105
+ ((0xFFFFF & addr) < ((VCN1_AON_SOC_ADDRESS_3_0 + 0x600))))); \
106
+ if (video_range) \
107
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS_2_0) + \
108
+ (VCN_VID_IP_ADDRESS_2_0)); \
109
+ else if (aon_range) \
110
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS_2_0) + \
111
+ (VCN_AON_IP_ADDRESS_2_0)); \
112
+ else if (video1_range) \
113
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN1_VID_SOC_ADDRESS_3_0) + \
114
+ (VCN_VID_IP_ADDRESS_2_0)); \
115
+ else if (aon1_range) \
116
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN1_AON_SOC_ADDRESS_3_0) + \
117
+ (VCN_AON_IP_ADDRESS_2_0)); \
118
+ else \
119
+ internal_reg_offset = (0xFFFFF & addr); \
120
+ \
121
+ internal_reg_offset >>= 2; \
122
+ })
123
+
124
+#define RREG32_SOC15_DPG_MODE(inst_idx, offset, mask_en) \
125
+ ({ \
126
+ WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \
127
+ (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
128
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
129
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
130
+ RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \
131
+ })
132
+
133
+#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \
134
+ do { \
135
+ if (!indirect) { \
136
+ WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \
137
+ WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \
138
+ (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
139
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
140
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
141
+ } else { \
142
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset; \
143
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value; \
144
+ } \
145
+ } while (0)
146
+
147
+#define AMDGPU_VCN_MULTI_QUEUE_FLAG (1 << 8)
148
+
149
+enum fw_queue_mode {
150
+ FW_QUEUE_RING_RESET = 1,
151
+ FW_QUEUE_DPG_HOLD_OFF = 2,
152
+};
153
+
48154 enum engine_status_constants {
49155 UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
156
+ UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0 = 0xAAAA0,
157
+ UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0 = 0x2A2A8AA0,
50158 UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
51159 UVD_STATUS__UVD_BUSY = 0x00000004,
52160 GB_ADDR_CONFIG_DEFAULT = 0x26010011,
....@@ -54,22 +162,85 @@
54162 UVD_STATUS__BUSY = 0x5,
55163 UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF = 0x1,
56164 UVD_STATUS__RBC_BUSY = 0x1,
165
+ UVD_PGFSM_STATUS_UVDJ_PWR_ON = 0,
57166 };
58167
59
-struct amdgpu_vcn {
168
+enum internal_dpg_state {
169
+ VCN_DPG_STATE__UNPAUSE = 0,
170
+ VCN_DPG_STATE__PAUSE,
171
+};
172
+
173
+struct dpg_pause_state {
174
+ enum internal_dpg_state fw_based;
175
+ enum internal_dpg_state jpeg;
176
+};
177
+
178
+struct amdgpu_vcn_reg{
179
+ unsigned data0;
180
+ unsigned data1;
181
+ unsigned cmd;
182
+ unsigned nop;
183
+ unsigned context_id;
184
+ unsigned ib_vmid;
185
+ unsigned ib_bar_low;
186
+ unsigned ib_bar_high;
187
+ unsigned ib_size;
188
+ unsigned gp_scratch8;
189
+ unsigned scratch9;
190
+};
191
+
192
+struct amdgpu_vcn_inst {
60193 struct amdgpu_bo *vcpu_bo;
61194 void *cpu_addr;
62195 uint64_t gpu_addr;
63
- unsigned fw_version;
64196 void *saved_bo;
65
- struct delayed_work idle_work;
66
- const struct firmware *fw; /* VCN firmware */
67197 struct amdgpu_ring ring_dec;
68198 struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
69
- struct amdgpu_ring ring_jpeg;
70199 struct amdgpu_irq_src irq;
71
- unsigned num_enc_rings;
200
+ struct amdgpu_vcn_reg external;
201
+ struct amdgpu_bo *dpg_sram_bo;
202
+ struct dpg_pause_state pause_state;
203
+ void *dpg_sram_cpu_addr;
204
+ uint64_t dpg_sram_gpu_addr;
205
+ uint32_t *dpg_sram_curr_addr;
206
+ atomic_t dpg_enc_submission_cnt;
207
+ void *fw_shared_cpu_addr;
208
+ uint64_t fw_shared_gpu_addr;
72209 };
210
+
211
+struct amdgpu_vcn {
212
+ unsigned fw_version;
213
+ struct delayed_work idle_work;
214
+ const struct firmware *fw; /* VCN firmware */
215
+ unsigned num_enc_rings;
216
+ enum amd_powergating_state cur_state;
217
+ bool indirect_sram;
218
+
219
+ uint8_t num_vcn_inst;
220
+ struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES];
221
+ struct amdgpu_vcn_reg internal;
222
+ struct mutex vcn_pg_lock;
223
+ struct mutex vcn1_jpeg1_workaround;
224
+ atomic_t total_submission_cnt;
225
+
226
+ unsigned harvest_config;
227
+ int (*pause_dpg_mode)(struct amdgpu_device *adev,
228
+ int inst_idx, struct dpg_pause_state *new_state);
229
+};
230
+
231
+struct amdgpu_fw_shared_multi_queue {
232
+ uint8_t decode_queue_mode;
233
+ uint8_t encode_generalpurpose_queue_mode;
234
+ uint8_t encode_lowlatency_queue_mode;
235
+ uint8_t encode_realtime_queue_mode;
236
+ uint8_t padding[4];
237
+};
238
+
239
+struct amdgpu_fw_shared {
240
+ uint32_t present_flag_0;
241
+ uint8_t pad[53];
242
+ struct amdgpu_fw_shared_multi_queue multi_queue;
243
+} __attribute__((__packed__));
73244
74245 int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
75246 int amdgpu_vcn_sw_fini(struct amdgpu_device *adev);
....@@ -83,8 +254,5 @@
83254
84255 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);
85256 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);
86
-
87
-int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring);
88
-int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout);
89257
90258 #endif