hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
....@@ -21,16 +21,31 @@
2121 *
2222 */
2323
24
+#include <linux/delay.h>
2425 #include <linux/firmware.h>
25
-#include <drm/drmP.h>
26
+#include <linux/module.h>
27
+#include <linux/pci.h>
28
+
2629 #include "amdgpu.h"
2730 #include "amdgpu_ucode.h"
2831 #include "amdgpu_trace.h"
2932
30
-#include "sdma0/sdma0_4_0_offset.h"
31
-#include "sdma0/sdma0_4_0_sh_mask.h"
32
-#include "sdma1/sdma1_4_0_offset.h"
33
-#include "sdma1/sdma1_4_0_sh_mask.h"
33
+#include "sdma0/sdma0_4_2_offset.h"
34
+#include "sdma0/sdma0_4_2_sh_mask.h"
35
+#include "sdma1/sdma1_4_2_offset.h"
36
+#include "sdma1/sdma1_4_2_sh_mask.h"
37
+#include "sdma2/sdma2_4_2_2_offset.h"
38
+#include "sdma2/sdma2_4_2_2_sh_mask.h"
39
+#include "sdma3/sdma3_4_2_2_offset.h"
40
+#include "sdma3/sdma3_4_2_2_sh_mask.h"
41
+#include "sdma4/sdma4_4_2_2_offset.h"
42
+#include "sdma4/sdma4_4_2_2_sh_mask.h"
43
+#include "sdma5/sdma5_4_2_2_offset.h"
44
+#include "sdma5/sdma5_4_2_2_sh_mask.h"
45
+#include "sdma6/sdma6_4_2_2_offset.h"
46
+#include "sdma6/sdma6_4_2_2_sh_mask.h"
47
+#include "sdma7/sdma7_4_2_2_offset.h"
48
+#include "sdma7/sdma7_4_2_2_sh_mask.h"
3449 #include "hdp/hdp_4_0_offset.h"
3550 #include "sdma0/sdma0_4_1_default.h"
3651
....@@ -41,6 +56,8 @@
4156 #include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h"
4257 #include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h"
4358
59
+#include "amdgpu_ras.h"
60
+
4461 MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
4562 MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
4663 MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
....@@ -48,14 +65,25 @@
4865 MODULE_FIRMWARE("amdgpu/vega20_sdma.bin");
4966 MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");
5067 MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
68
+MODULE_FIRMWARE("amdgpu/picasso_sdma.bin");
69
+MODULE_FIRMWARE("amdgpu/raven2_sdma.bin");
70
+MODULE_FIRMWARE("amdgpu/arcturus_sdma.bin");
71
+MODULE_FIRMWARE("amdgpu/renoir_sdma.bin");
72
+MODULE_FIRMWARE("amdgpu/green_sardine_sdma.bin");
5173
5274 #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
5375 #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
76
+
77
+#define WREG32_SDMA(instance, offset, value) \
78
+ WREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)), value)
79
+#define RREG32_SDMA(instance, offset) \
80
+ RREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)))
5481
5582 static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev);
5683 static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
5784 static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
5885 static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev);
86
+static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev);
5987
6088 static const struct soc15_reg_golden golden_settings_sdma_4[] = {
6189 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
....@@ -88,21 +116,24 @@
88116 static const struct soc15_reg_golden golden_settings_sdma_vg10[] = {
89117 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
90118 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
119
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
91120 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
92121 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
93
- SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002)
122
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
123
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
94124 };
95125
96126 static const struct soc15_reg_golden golden_settings_sdma_vg12[] = {
97127 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
98128 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001),
129
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
99130 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
100131 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
101
- SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001)
132
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001),
133
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
102134 };
103135
104
-static const struct soc15_reg_golden golden_settings_sdma_4_1[] =
105
-{
136
+static const struct soc15_reg_golden golden_settings_sdma_4_1[] = {
106137 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
107138 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
108139 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100),
....@@ -112,30 +143,73 @@
112143 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
113144 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100),
114145 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
115
- SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
146
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003e0),
116147 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000)
117148 };
118149
119
-static const struct soc15_reg_golden golden_settings_sdma_4_2[] =
150
+static const struct soc15_reg_golden golden_settings_sdma0_4_2_init[] = {
151
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
152
+};
153
+
154
+static const struct soc15_reg_golden golden_settings_sdma0_4_2[] =
120155 {
121
- SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
156
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
122157 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
123158 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
124159 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
160
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
125161 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
162
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
126163 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
164
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RD_BURST_CNTL, 0x0000000f, 0x00000003),
165
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
127166 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
167
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
128168 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
169
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC2_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
170
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
171
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC3_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
172
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
173
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC4_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
174
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
175
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC5_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
176
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
177
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC6_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
178
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
179
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
180
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
129181 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
130
- SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
182
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
183
+};
184
+
185
+static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = {
186
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
131187 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
132188 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
133189 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
190
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
134191 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
192
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
135193 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
136
- SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
194
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RD_BURST_CNTL, 0x0000000f, 0x00000003),
195
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
196
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
197
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
137198 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
138
- SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0)
199
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC2_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
200
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
201
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC3_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
202
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
203
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC4_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
204
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
205
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC5_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
206
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
207
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC6_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
208
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
209
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
210
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
211
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0),
212
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
139213 };
140214
141215 static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
....@@ -144,11 +218,234 @@
144218 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002)
145219 };
146220
221
+static const struct soc15_reg_golden golden_settings_sdma_rv2[] =
222
+{
223
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00003001),
224
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00003001)
225
+};
226
+
227
+static const struct soc15_reg_golden golden_settings_sdma_arct[] =
228
+{
229
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
230
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
231
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
232
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
233
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
234
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
235
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
236
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
237
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
238
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
239
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
240
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
241
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
242
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
243
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
244
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
245
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
246
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
247
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
248
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
249
+ SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
250
+ SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
251
+ SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
252
+ SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
253
+ SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
254
+ SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
255
+ SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
256
+ SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
257
+ SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
258
+ SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
259
+ SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
260
+ SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_UTCL1_TIMEOUT, 0xffffffff, 0x00010001)
261
+};
262
+
263
+static const struct soc15_reg_golden golden_settings_sdma_4_3[] = {
264
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
265
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
266
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
267
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002),
268
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
269
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003fff07, 0x40000051),
270
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
271
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
272
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003e0),
273
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe)
274
+};
275
+
276
+static const struct soc15_ras_field_entry sdma_v4_0_ras_fields[] = {
277
+ { "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
278
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UCODE_BUF_SED),
279
+ 0, 0,
280
+ },
281
+ { "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
282
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_RB_CMD_BUF_SED),
283
+ 0, 0,
284
+ },
285
+ { "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
286
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_IB_CMD_BUF_SED),
287
+ 0, 0,
288
+ },
289
+ { "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
290
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RD_FIFO_SED),
291
+ 0, 0,
292
+ },
293
+ { "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
294
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RDBST_FIFO_SED),
295
+ 0, 0,
296
+ },
297
+ { "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
298
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_DATA_LUT_FIFO_SED),
299
+ 0, 0,
300
+ },
301
+ { "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
302
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED),
303
+ 0, 0,
304
+ },
305
+ { "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
306
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED),
307
+ 0, 0,
308
+ },
309
+ { "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
310
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED),
311
+ 0, 0,
312
+ },
313
+ { "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
314
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED),
315
+ 0, 0,
316
+ },
317
+ { "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
318
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED),
319
+ 0, 0,
320
+ },
321
+ { "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
322
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED),
323
+ 0, 0,
324
+ },
325
+ { "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
326
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED),
327
+ 0, 0,
328
+ },
329
+ { "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
330
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED),
331
+ 0, 0,
332
+ },
333
+ { "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
334
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED),
335
+ 0, 0,
336
+ },
337
+ { "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
338
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED),
339
+ 0, 0,
340
+ },
341
+ { "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
342
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED),
343
+ 0, 0,
344
+ },
345
+ { "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
346
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED),
347
+ 0, 0,
348
+ },
349
+ { "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
350
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED),
351
+ 0, 0,
352
+ },
353
+ { "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
354
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED),
355
+ 0, 0,
356
+ },
357
+ { "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
358
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED),
359
+ 0, 0,
360
+ },
361
+ { "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
362
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED),
363
+ 0, 0,
364
+ },
365
+ { "SDMA_SPLIT_DAT_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
366
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_SPLIT_DAT_BUF_SED),
367
+ 0, 0,
368
+ },
369
+ { "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
370
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MC_WR_ADDR_FIFO_SED),
371
+ 0, 0,
372
+ },
373
+};
374
+
147375 static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
148376 u32 instance, u32 offset)
149377 {
150
- return ( 0 == instance ? (adev->reg_offset[SDMA0_HWIP][0][0] + offset) :
151
- (adev->reg_offset[SDMA1_HWIP][0][0] + offset));
378
+ switch (instance) {
379
+ case 0:
380
+ return (adev->reg_offset[SDMA0_HWIP][0][0] + offset);
381
+ case 1:
382
+ return (adev->reg_offset[SDMA1_HWIP][0][0] + offset);
383
+ case 2:
384
+ return (adev->reg_offset[SDMA2_HWIP][0][1] + offset);
385
+ case 3:
386
+ return (adev->reg_offset[SDMA3_HWIP][0][1] + offset);
387
+ case 4:
388
+ return (adev->reg_offset[SDMA4_HWIP][0][1] + offset);
389
+ case 5:
390
+ return (adev->reg_offset[SDMA5_HWIP][0][1] + offset);
391
+ case 6:
392
+ return (adev->reg_offset[SDMA6_HWIP][0][1] + offset);
393
+ case 7:
394
+ return (adev->reg_offset[SDMA7_HWIP][0][1] + offset);
395
+ default:
396
+ break;
397
+ }
398
+ return 0;
399
+}
400
+
401
+static unsigned sdma_v4_0_seq_to_irq_id(int seq_num)
402
+{
403
+ switch (seq_num) {
404
+ case 0:
405
+ return SOC15_IH_CLIENTID_SDMA0;
406
+ case 1:
407
+ return SOC15_IH_CLIENTID_SDMA1;
408
+ case 2:
409
+ return SOC15_IH_CLIENTID_SDMA2;
410
+ case 3:
411
+ return SOC15_IH_CLIENTID_SDMA3;
412
+ case 4:
413
+ return SOC15_IH_CLIENTID_SDMA4;
414
+ case 5:
415
+ return SOC15_IH_CLIENTID_SDMA5;
416
+ case 6:
417
+ return SOC15_IH_CLIENTID_SDMA6;
418
+ case 7:
419
+ return SOC15_IH_CLIENTID_SDMA7;
420
+ default:
421
+ break;
422
+ }
423
+ return -EINVAL;
424
+}
425
+
426
+static int sdma_v4_0_irq_id_to_seq(unsigned client_id)
427
+{
428
+ switch (client_id) {
429
+ case SOC15_IH_CLIENTID_SDMA0:
430
+ return 0;
431
+ case SOC15_IH_CLIENTID_SDMA1:
432
+ return 1;
433
+ case SOC15_IH_CLIENTID_SDMA2:
434
+ return 2;
435
+ case SOC15_IH_CLIENTID_SDMA3:
436
+ return 3;
437
+ case SOC15_IH_CLIENTID_SDMA4:
438
+ return 4;
439
+ case SOC15_IH_CLIENTID_SDMA5:
440
+ return 5;
441
+ case SOC15_IH_CLIENTID_SDMA6:
442
+ return 6;
443
+ case SOC15_IH_CLIENTID_SDMA7:
444
+ return 7;
445
+ default:
446
+ break;
447
+ }
448
+ return -EINVAL;
152449 }
153450
154451 static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
....@@ -156,11 +453,11 @@
156453 switch (adev->asic_type) {
157454 case CHIP_VEGA10:
158455 soc15_program_register_sequence(adev,
159
- golden_settings_sdma_4,
160
- ARRAY_SIZE(golden_settings_sdma_4));
456
+ golden_settings_sdma_4,
457
+ ARRAY_SIZE(golden_settings_sdma_4));
161458 soc15_program_register_sequence(adev,
162
- golden_settings_sdma_vg10,
163
- ARRAY_SIZE(golden_settings_sdma_vg10));
459
+ golden_settings_sdma_vg10,
460
+ ARRAY_SIZE(golden_settings_sdma_vg10));
164461 break;
165462 case CHIP_VEGA12:
166463 soc15_program_register_sequence(adev,
....@@ -172,20 +469,108 @@
172469 break;
173470 case CHIP_VEGA20:
174471 soc15_program_register_sequence(adev,
175
- golden_settings_sdma_4_2,
176
- ARRAY_SIZE(golden_settings_sdma_4_2));
472
+ golden_settings_sdma0_4_2_init,
473
+ ARRAY_SIZE(golden_settings_sdma0_4_2_init));
474
+ soc15_program_register_sequence(adev,
475
+ golden_settings_sdma0_4_2,
476
+ ARRAY_SIZE(golden_settings_sdma0_4_2));
477
+ soc15_program_register_sequence(adev,
478
+ golden_settings_sdma1_4_2,
479
+ ARRAY_SIZE(golden_settings_sdma1_4_2));
480
+ break;
481
+ case CHIP_ARCTURUS:
482
+ soc15_program_register_sequence(adev,
483
+ golden_settings_sdma_arct,
484
+ ARRAY_SIZE(golden_settings_sdma_arct));
177485 break;
178486 case CHIP_RAVEN:
179487 soc15_program_register_sequence(adev,
180
- golden_settings_sdma_4_1,
181
- ARRAY_SIZE(golden_settings_sdma_4_1));
488
+ golden_settings_sdma_4_1,
489
+ ARRAY_SIZE(golden_settings_sdma_4_1));
490
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
491
+ soc15_program_register_sequence(adev,
492
+ golden_settings_sdma_rv2,
493
+ ARRAY_SIZE(golden_settings_sdma_rv2));
494
+ else
495
+ soc15_program_register_sequence(adev,
496
+ golden_settings_sdma_rv1,
497
+ ARRAY_SIZE(golden_settings_sdma_rv1));
498
+ break;
499
+ case CHIP_RENOIR:
182500 soc15_program_register_sequence(adev,
183
- golden_settings_sdma_rv1,
184
- ARRAY_SIZE(golden_settings_sdma_rv1));
501
+ golden_settings_sdma_4_3,
502
+ ARRAY_SIZE(golden_settings_sdma_4_3));
185503 break;
186504 default:
187505 break;
188506 }
507
+}
508
+
509
+static void sdma_v4_0_setup_ulv(struct amdgpu_device *adev)
510
+{
511
+ int i;
512
+
513
+ /*
514
+ * The only chips with SDMAv4 and ULV are VG10 and VG20.
515
+ * Server SKUs take a different hysteresis setting from other SKUs.
516
+ */
517
+ switch (adev->asic_type) {
518
+ case CHIP_VEGA10:
519
+ if (adev->pdev->device == 0x6860)
520
+ break;
521
+ return;
522
+ case CHIP_VEGA20:
523
+ if (adev->pdev->device == 0x66a1)
524
+ break;
525
+ return;
526
+ default:
527
+ return;
528
+ }
529
+
530
+ for (i = 0; i < adev->sdma.num_instances; i++) {
531
+ uint32_t temp;
532
+
533
+ temp = RREG32_SDMA(i, mmSDMA0_ULV_CNTL);
534
+ temp = REG_SET_FIELD(temp, SDMA0_ULV_CNTL, HYSTERESIS, 0x0);
535
+ WREG32_SDMA(i, mmSDMA0_ULV_CNTL, temp);
536
+ }
537
+}
538
+
539
+static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
540
+{
541
+ int err = 0;
542
+ const struct sdma_firmware_header_v1_0 *hdr;
543
+
544
+ err = amdgpu_ucode_validate(sdma_inst->fw);
545
+ if (err)
546
+ return err;
547
+
548
+ hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
549
+ sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
550
+ sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
551
+
552
+ if (sdma_inst->feature_version >= 20)
553
+ sdma_inst->burst_nop = true;
554
+
555
+ return 0;
556
+}
557
+
558
+static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev)
559
+{
560
+ int i;
561
+
562
+ for (i = 0; i < adev->sdma.num_instances; i++) {
563
+ release_firmware(adev->sdma.instance[i].fw);
564
+ adev->sdma.instance[i].fw = NULL;
565
+
566
+ /* arcturus shares the same FW memory across
567
+ all SDMA isntances */
568
+ if (adev->asic_type == CHIP_ARCTURUS)
569
+ break;
570
+ }
571
+
572
+ memset((void*)adev->sdma.instance, 0,
573
+ sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
189574 }
190575
191576 /**
....@@ -207,7 +592,9 @@
207592 int err = 0, i;
208593 struct amdgpu_firmware_info *info = NULL;
209594 const struct common_firmware_header *header = NULL;
210
- const struct sdma_firmware_header_v1_0 *hdr;
595
+
596
+ if (amdgpu_sriov_vf(adev))
597
+ return 0;
211598
212599 DRM_DEBUG("\n");
213600
....@@ -222,32 +609,62 @@
222609 chip_name = "vega20";
223610 break;
224611 case CHIP_RAVEN:
225
- chip_name = "raven";
612
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
613
+ chip_name = "raven2";
614
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
615
+ chip_name = "picasso";
616
+ else
617
+ chip_name = "raven";
618
+ break;
619
+ case CHIP_ARCTURUS:
620
+ chip_name = "arcturus";
621
+ break;
622
+ case CHIP_RENOIR:
623
+ if (adev->apu_flags & AMD_APU_IS_RENOIR)
624
+ chip_name = "renoir";
625
+ else
626
+ chip_name = "green_sardine";
226627 break;
227628 default:
228629 BUG();
229630 }
230631
231
- for (i = 0; i < adev->sdma.num_instances; i++) {
232
- if (i == 0)
233
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
234
- else
235
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
236
- err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
237
- if (err)
238
- goto out;
239
- err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
240
- if (err)
241
- goto out;
242
- hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
243
- adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
244
- adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
245
- if (adev->sdma.instance[i].feature_version >= 20)
246
- adev->sdma.instance[i].burst_nop = true;
247
- DRM_DEBUG("psp_load == '%s'\n",
248
- adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
632
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
249633
250
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
634
+ err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev);
635
+ if (err)
636
+ goto out;
637
+
638
+ err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[0]);
639
+ if (err)
640
+ goto out;
641
+
642
+ for (i = 1; i < adev->sdma.num_instances; i++) {
643
+ if (adev->asic_type == CHIP_ARCTURUS) {
644
+ /* Acturus will leverage the same FW memory
645
+ for every SDMA instance */
646
+ memcpy((void*)&adev->sdma.instance[i],
647
+ (void*)&adev->sdma.instance[0],
648
+ sizeof(struct amdgpu_sdma_instance));
649
+ }
650
+ else {
651
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i);
652
+
653
+ err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
654
+ if (err)
655
+ goto out;
656
+
657
+ err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[i]);
658
+ if (err)
659
+ goto out;
660
+ }
661
+ }
662
+
663
+ DRM_DEBUG("psp_load == '%s'\n",
664
+ adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
665
+
666
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
667
+ for (i = 0; i < adev->sdma.num_instances; i++) {
251668 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
252669 info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
253670 info->fw = adev->sdma.instance[i].fw;
....@@ -256,13 +673,11 @@
256673 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
257674 }
258675 }
676
+
259677 out:
260678 if (err) {
261679 DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name);
262
- for (i = 0; i < adev->sdma.num_instances; i++) {
263
- release_firmware(adev->sdma.instance[i].fw);
264
- adev->sdma.instance[i].fw = NULL;
265
- }
680
+ sdma_v4_0_destroy_inst_ctx(adev);
266681 }
267682 return err;
268683 }
....@@ -302,23 +717,18 @@
302717 wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
303718 DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
304719 } else {
305
- u32 lowbit, highbit;
306
-
307
- lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2;
308
- highbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
309
-
310
- DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
311
- ring->me, highbit, lowbit);
312
- wptr = highbit;
720
+ wptr = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI);
313721 wptr = wptr << 32;
314
- wptr |= lowbit;
722
+ wptr |= RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR);
723
+ DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n",
724
+ ring->me, wptr);
315725 }
316726
317727 return wptr >> 2;
318728 }
319729
320730 /**
321
- * sdma_v4_0_ring_set_wptr - commit the write pointer
731
+ * sdma_v4_0_page_ring_set_wptr - commit the write pointer
322732 *
323733 * @ring: amdgpu ring pointer
324734 *
....@@ -352,14 +762,67 @@
352762 lower_32_bits(ring->wptr << 2),
353763 ring->me,
354764 upper_32_bits(ring->wptr << 2));
355
- WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
356
- WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
765
+ WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR,
766
+ lower_32_bits(ring->wptr << 2));
767
+ WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI,
768
+ upper_32_bits(ring->wptr << 2));
769
+ }
770
+}
771
+
772
+/**
773
+ * sdma_v4_0_page_ring_get_wptr - get the current write pointer
774
+ *
775
+ * @ring: amdgpu ring pointer
776
+ *
777
+ * Get the current wptr from the hardware (VEGA10+).
778
+ */
779
+static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring)
780
+{
781
+ struct amdgpu_device *adev = ring->adev;
782
+ u64 wptr;
783
+
784
+ if (ring->use_doorbell) {
785
+ /* XXX check if swapping is necessary on BE */
786
+ wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
787
+ } else {
788
+ wptr = RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI);
789
+ wptr = wptr << 32;
790
+ wptr |= RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR);
791
+ }
792
+
793
+ return wptr >> 2;
794
+}
795
+
796
+/**
797
+ * sdma_v4_0_ring_set_wptr - commit the write pointer
798
+ *
799
+ * @ring: amdgpu ring pointer
800
+ *
801
+ * Write the wptr back to the hardware (VEGA10+).
802
+ */
803
+static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring)
804
+{
805
+ struct amdgpu_device *adev = ring->adev;
806
+
807
+ if (ring->use_doorbell) {
808
+ u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
809
+
810
+ /* XXX check if swapping is necessary on BE */
811
+ WRITE_ONCE(*wb, (ring->wptr << 2));
812
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
813
+ } else {
814
+ uint64_t wptr = ring->wptr << 2;
815
+
816
+ WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR,
817
+ lower_32_bits(wptr));
818
+ WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI,
819
+ upper_32_bits(wptr));
357820 }
358821 }
359822
360823 static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
361824 {
362
- struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
825
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
363826 int i;
364827
365828 for (i = 0; i < count; i++)
....@@ -379,11 +842,14 @@
379842 * Schedule an IB in the DMA ring (VEGA10).
380843 */
381844 static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
382
- struct amdgpu_ib *ib,
383
- unsigned vmid, bool ctx_switch)
845
+ struct amdgpu_job *job,
846
+ struct amdgpu_ib *ib,
847
+ uint32_t flags)
384848 {
849
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
850
+
385851 /* IB packet must end on a 8 DW boundary */
386
- sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
852
+ sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
387853
388854 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
389855 SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
....@@ -432,16 +898,13 @@
432898 {
433899 struct amdgpu_device *adev = ring->adev;
434900 u32 ref_and_mask = 0;
435
- const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
901
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
436902
437
- if (ring->me == 0)
438
- ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0;
439
- else
440
- ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1;
903
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
441904
442905 sdma_v4_0_wait_reg_mem(ring, 0, 1,
443
- adev->nbio_funcs->get_hdp_flush_done_offset(adev),
444
- adev->nbio_funcs->get_hdp_flush_req_offset(adev),
906
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
907
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
445908 ref_and_mask, ref_and_mask, 10);
446909 }
447910
....@@ -493,26 +956,25 @@
493956 */
494957 static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
495958 {
496
- struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
497
- struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
959
+ struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
498960 u32 rb_cntl, ib_cntl;
499
- int i;
500
-
501
- if ((adev->mman.buffer_funcs_ring == sdma0) ||
502
- (adev->mman.buffer_funcs_ring == sdma1))
503
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
961
+ int i, unset = 0;
504962
505963 for (i = 0; i < adev->sdma.num_instances; i++) {
506
- rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
507
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
508
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
509
- ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
510
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
511
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
512
- }
964
+ sdma[i] = &adev->sdma.instance[i].ring;
513965
514
- sdma0->ready = false;
515
- sdma1->ready = false;
966
+ if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
967
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
968
+ unset = 1;
969
+ }
970
+
971
+ rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
972
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
973
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
974
+ ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
975
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
976
+ WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
977
+ }
516978 }
517979
518980 /**
....@@ -528,7 +990,41 @@
528990 }
529991
530992 /**
531
- * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch
993
+ * sdma_v4_0_page_stop - stop the page async dma engines
994
+ *
995
+ * @adev: amdgpu_device pointer
996
+ *
997
+ * Stop the page async dma ring buffers (VEGA10).
998
+ */
999
+static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
1000
+{
1001
+ struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
1002
+ u32 rb_cntl, ib_cntl;
1003
+ int i;
1004
+ bool unset = false;
1005
+
1006
+ for (i = 0; i < adev->sdma.num_instances; i++) {
1007
+ sdma[i] = &adev->sdma.instance[i].page;
1008
+
1009
+ if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
1010
+ (!unset)) {
1011
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
1012
+ unset = true;
1013
+ }
1014
+
1015
+ rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
1016
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
1017
+ RB_ENABLE, 0);
1018
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
1019
+ ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
1020
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL,
1021
+ IB_ENABLE, 0);
1022
+ WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
1023
+ }
1024
+}
1025
+
1026
+/**
1027
+ * sdma_v4_0_ctx_switch_enable - stop the async dma engines context switch
5321028 *
5331029 * @adev: amdgpu_device pointer
5341030 * @enable: enable/disable the DMA MEs context switch.
....@@ -565,18 +1061,24 @@
5651061 }
5661062
5671063 for (i = 0; i < adev->sdma.num_instances; i++) {
568
- f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
1064
+ f32_cntl = RREG32_SDMA(i, mmSDMA0_CNTL);
5691065 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
5701066 AUTO_CTXSW_ENABLE, enable ? 1 : 0);
5711067 if (enable && amdgpu_sdma_phase_quantum) {
572
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
573
- phase_quantum);
574
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
575
- phase_quantum);
576
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
577
- phase_quantum);
1068
+ WREG32_SDMA(i, mmSDMA0_PHASE0_QUANTUM, phase_quantum);
1069
+ WREG32_SDMA(i, mmSDMA0_PHASE1_QUANTUM, phase_quantum);
1070
+ WREG32_SDMA(i, mmSDMA0_PHASE2_QUANTUM, phase_quantum);
5781071 }
579
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
1072
+ WREG32_SDMA(i, mmSDMA0_CNTL, f32_cntl);
1073
+
1074
+ /*
1075
+ * Enable SDMA utilization. Its only supported on
1076
+ * Arcturus for the moment and firmware version 14
1077
+ * and above.
1078
+ */
1079
+ if (adev->asic_type == CHIP_ARCTURUS &&
1080
+ adev->sdma.instance[i].fw_version >= 14)
1081
+ WREG32_SDMA(i, mmSDMA0_PUB_DUMMY_REG2, enable);
5801082 }
5811083
5821084 }
....@@ -594,159 +1096,216 @@
5941096 u32 f32_cntl;
5951097 int i;
5961098
597
- if (enable == false) {
1099
+ if (!enable) {
5981100 sdma_v4_0_gfx_stop(adev);
5991101 sdma_v4_0_rlc_stop(adev);
1102
+ if (adev->sdma.has_page_queue)
1103
+ sdma_v4_0_page_stop(adev);
6001104 }
6011105
6021106 for (i = 0; i < adev->sdma.num_instances; i++) {
603
- f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
1107
+ f32_cntl = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
6041108 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
605
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
1109
+ WREG32_SDMA(i, mmSDMA0_F32_CNTL, f32_cntl);
6061110 }
1111
+}
1112
+
1113
+/**
1114
+ * sdma_v4_0_rb_cntl - get parameters for rb_cntl
1115
+ */
1116
+static uint32_t sdma_v4_0_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
1117
+{
1118
+ /* Set ring buffer size in dwords */
1119
+ uint32_t rb_bufsz = order_base_2(ring->ring_size / 4);
1120
+
1121
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
1122
+#ifdef __BIG_ENDIAN
1123
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
1124
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
1125
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
1126
+#endif
1127
+ return rb_cntl;
6071128 }
6081129
6091130 /**
6101131 * sdma_v4_0_gfx_resume - setup and start the async dma engines
6111132 *
6121133 * @adev: amdgpu_device pointer
1134
+ * @i: instance to resume
6131135 *
6141136 * Set up the gfx DMA ring buffers and enable them (VEGA10).
6151137 * Returns 0 for success, error for failure.
6161138 */
617
-static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
1139
+static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
6181140 {
619
- struct amdgpu_ring *ring;
1141
+ struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
6201142 u32 rb_cntl, ib_cntl, wptr_poll_cntl;
621
- u32 rb_bufsz;
6221143 u32 wb_offset;
6231144 u32 doorbell;
6241145 u32 doorbell_offset;
625
- u32 temp;
6261146 u64 wptr_gpu_addr;
627
- int i, r;
6281147
629
- for (i = 0; i < adev->sdma.num_instances; i++) {
630
- ring = &adev->sdma.instance[i].ring;
631
- wb_offset = (ring->rptr_offs * 4);
1148
+ wb_offset = (ring->rptr_offs * 4);
6321149
633
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
1150
+ rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
1151
+ rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
1152
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
6341153
635
- /* Set ring buffer size in dwords */
636
- rb_bufsz = order_base_2(ring->ring_size / 4);
637
- rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
638
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
639
-#ifdef __BIG_ENDIAN
640
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
641
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
642
- RPTR_WRITEBACK_SWAP_ENABLE, 1);
643
-#endif
644
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
1154
+ /* Initialize the ring buffer's read and write pointers */
1155
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR, 0);
1156
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_HI, 0);
1157
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR, 0);
1158
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_HI, 0);
6451159
646
- /* Initialize the ring buffer's read and write pointers */
647
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
648
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
649
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
650
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
1160
+ /* set the wb address whether it's enabled or not */
1161
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI,
1162
+ upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
1163
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO,
1164
+ lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
6511165
652
- /* set the wb address whether it's enabled or not */
653
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
654
- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
655
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
656
- lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
1166
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
1167
+ RPTR_WRITEBACK_ENABLE, 1);
6571168
658
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
1169
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE, ring->gpu_addr >> 8);
1170
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
6591171
660
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
661
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
1172
+ ring->wptr = 0;
6621173
663
- ring->wptr = 0;
1174
+ /* before programing wptr to a less value, need set minor_ptr_update first */
1175
+ WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 1);
6641176
665
- /* before programing wptr to a less value, need set minor_ptr_update first */
666
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
1177
+ doorbell = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL);
1178
+ doorbell_offset = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET);
6671179
668
- if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
669
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
670
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
671
- }
672
-
673
- doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
674
- doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
675
-
676
- if (ring->use_doorbell) {
677
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
678
- doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
1180
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE,
1181
+ ring->use_doorbell);
1182
+ doorbell_offset = REG_SET_FIELD(doorbell_offset,
1183
+ SDMA0_GFX_DOORBELL_OFFSET,
6791184 OFFSET, ring->doorbell_index);
680
- } else {
681
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
682
- }
683
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
684
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
685
- adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
686
- ring->doorbell_index);
1185
+ WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell);
1186
+ WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset);
6871187
688
- if (amdgpu_sriov_vf(adev))
689
- sdma_v4_0_ring_set_wptr(ring);
1188
+ sdma_v4_0_ring_set_wptr(ring);
6901189
691
- /* set minor_ptr_update to 0 after wptr programed */
692
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
1190
+ /* set minor_ptr_update to 0 after wptr programed */
1191
+ WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 0);
6931192
694
- /* set utc l1 enable flag always to 1 */
695
- temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
696
- temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
697
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
1193
+ /* setup the wptr shadow polling */
1194
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
1195
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO,
1196
+ lower_32_bits(wptr_gpu_addr));
1197
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI,
1198
+ upper_32_bits(wptr_gpu_addr));
1199
+ wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL);
1200
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
1201
+ SDMA0_GFX_RB_WPTR_POLL_CNTL,
1202
+ F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
1203
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
6981204
699
- if (!amdgpu_sriov_vf(adev)) {
700
- /* unhalt engine */
701
- temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
702
- temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
703
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
704
- }
1205
+ /* enable DMA RB */
1206
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
1207
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
7051208
706
- /* setup the wptr shadow polling */
707
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
708
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
709
- lower_32_bits(wptr_gpu_addr));
710
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
711
- upper_32_bits(wptr_gpu_addr));
712
- wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
713
- if (amdgpu_sriov_vf(adev))
714
- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
715
- else
716
- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
717
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
718
-
719
- /* enable DMA RB */
720
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
721
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
722
-
723
- ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
724
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
1209
+ ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
1210
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
7251211 #ifdef __BIG_ENDIAN
726
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
1212
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
7271213 #endif
728
- /* enable DMA IBs */
729
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
1214
+ /* enable DMA IBs */
1215
+ WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
7301216
731
- ring->ready = true;
1217
+ ring->sched.ready = true;
1218
+}
7321219
733
- if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
734
- sdma_v4_0_ctx_switch_enable(adev, true);
735
- sdma_v4_0_enable(adev, true);
736
- }
1220
+/**
1221
+ * sdma_v4_0_page_resume - setup and start the async dma engines
1222
+ *
1223
+ * @adev: amdgpu_device pointer
1224
+ * @i: instance to resume
1225
+ *
1226
+ * Set up the page DMA ring buffers and enable them (VEGA10).
1227
+ * Returns 0 for success, error for failure.
1228
+ */
1229
+static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
1230
+{
1231
+ struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
1232
+ u32 rb_cntl, ib_cntl, wptr_poll_cntl;
1233
+ u32 wb_offset;
1234
+ u32 doorbell;
1235
+ u32 doorbell_offset;
1236
+ u64 wptr_gpu_addr;
7371237
738
- r = amdgpu_ring_test_ring(ring);
739
- if (r) {
740
- ring->ready = false;
741
- return r;
742
- }
1238
+ wb_offset = (ring->rptr_offs * 4);
7431239
744
- if (adev->mman.buffer_funcs_ring == ring)
745
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
1240
+ rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
1241
+ rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
1242
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
7461243
747
- }
1244
+ /* Initialize the ring buffer's read and write pointers */
1245
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR, 0);
1246
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_HI, 0);
1247
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR, 0);
1248
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_HI, 0);
7481249
749
- return 0;
1250
+ /* set the wb address whether it's enabled or not */
1251
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_HI,
1252
+ upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
1253
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_LO,
1254
+ lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
1255
+
1256
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
1257
+ RPTR_WRITEBACK_ENABLE, 1);
1258
+
1259
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE, ring->gpu_addr >> 8);
1260
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
1261
+
1262
+ ring->wptr = 0;
1263
+
1264
+ /* before programing wptr to a less value, need set minor_ptr_update first */
1265
+ WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 1);
1266
+
1267
+ doorbell = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL);
1268
+ doorbell_offset = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET);
1269
+
1270
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_PAGE_DOORBELL, ENABLE,
1271
+ ring->use_doorbell);
1272
+ doorbell_offset = REG_SET_FIELD(doorbell_offset,
1273
+ SDMA0_PAGE_DOORBELL_OFFSET,
1274
+ OFFSET, ring->doorbell_index);
1275
+ WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL, doorbell);
1276
+ WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET, doorbell_offset);
1277
+
1278
+ /* paging queue doorbell range is setup at sdma_v4_0_gfx_resume */
1279
+ sdma_v4_0_page_ring_set_wptr(ring);
1280
+
1281
+ /* set minor_ptr_update to 0 after wptr programed */
1282
+ WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 0);
1283
+
1284
+ /* setup the wptr shadow polling */
1285
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
1286
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_LO,
1287
+ lower_32_bits(wptr_gpu_addr));
1288
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_HI,
1289
+ upper_32_bits(wptr_gpu_addr));
1290
+ wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL);
1291
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
1292
+ SDMA0_PAGE_RB_WPTR_POLL_CNTL,
1293
+ F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
1294
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
1295
+
1296
+ /* enable DMA RB */
1297
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 1);
1298
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
1299
+
1300
+ ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
1301
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_ENABLE, 1);
1302
+#ifdef __BIG_ENDIAN
1303
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1);
1304
+#endif
1305
+ /* enable DMA IBs */
1306
+ WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
1307
+
1308
+ ring->sched.ready = true;
7501309 }
7511310
7521311 static void
....@@ -755,7 +1314,7 @@
7551314 uint32_t def, data;
7561315
7571316 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_SDMA)) {
758
- /* disable idle interrupt */
1317
+ /* enable idle interrupt */
7591318 def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
7601319 data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
7611320
....@@ -804,6 +1363,7 @@
8041363
8051364 switch (adev->asic_type) {
8061365 case CHIP_RAVEN:
1366
+ case CHIP_RENOIR:
8071367 sdma_v4_1_init_power_gating(adev);
8081368 sdma_v4_1_update_power_gating(adev, true);
8091369 break;
....@@ -857,12 +1417,14 @@
8571417 (adev->sdma.instance[i].fw->data +
8581418 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
8591419
860
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0);
1420
+ WREG32_SDMA(i, mmSDMA0_UCODE_ADDR, 0);
8611421
8621422 for (j = 0; j < fw_size; j++)
863
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
1423
+ WREG32_SDMA(i, mmSDMA0_UCODE_DATA,
1424
+ le32_to_cpup(fw_data++));
8641425
865
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
1426
+ WREG32_SDMA(i, mmSDMA0_UCODE_ADDR,
1427
+ adev->sdma.instance[i].fw_version);
8661428 }
8671429
8681430 return 0;
....@@ -878,33 +1440,78 @@
8781440 */
8791441 static int sdma_v4_0_start(struct amdgpu_device *adev)
8801442 {
881
- int r = 0;
1443
+ struct amdgpu_ring *ring;
1444
+ int i, r = 0;
8821445
8831446 if (amdgpu_sriov_vf(adev)) {
8841447 sdma_v4_0_ctx_switch_enable(adev, false);
8851448 sdma_v4_0_enable(adev, false);
1449
+ } else {
8861450
887
- /* set RB registers */
888
- r = sdma_v4_0_gfx_resume(adev);
889
- return r;
1451
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
1452
+ r = sdma_v4_0_load_microcode(adev);
1453
+ if (r)
1454
+ return r;
1455
+ }
1456
+
1457
+ /* unhalt the MEs */
1458
+ sdma_v4_0_enable(adev, true);
1459
+ /* enable sdma ring preemption */
1460
+ sdma_v4_0_ctx_switch_enable(adev, true);
8901461 }
8911462
892
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
893
- r = sdma_v4_0_load_microcode(adev);
1463
+ /* start the gfx rings and rlc compute queues */
1464
+ for (i = 0; i < adev->sdma.num_instances; i++) {
1465
+ uint32_t temp;
1466
+
1467
+ WREG32_SDMA(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL, 0);
1468
+ sdma_v4_0_gfx_resume(adev, i);
1469
+ if (adev->sdma.has_page_queue)
1470
+ sdma_v4_0_page_resume(adev, i);
1471
+
1472
+ /* set utc l1 enable flag always to 1 */
1473
+ temp = RREG32_SDMA(i, mmSDMA0_CNTL);
1474
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
1475
+ WREG32_SDMA(i, mmSDMA0_CNTL, temp);
1476
+
1477
+ if (!amdgpu_sriov_vf(adev)) {
1478
+ /* unhalt engine */
1479
+ temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
1480
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
1481
+ WREG32_SDMA(i, mmSDMA0_F32_CNTL, temp);
1482
+ }
1483
+ }
1484
+
1485
+ if (amdgpu_sriov_vf(adev)) {
1486
+ sdma_v4_0_ctx_switch_enable(adev, true);
1487
+ sdma_v4_0_enable(adev, true);
1488
+ } else {
1489
+ r = sdma_v4_0_rlc_resume(adev);
8941490 if (r)
8951491 return r;
8961492 }
8971493
898
- /* unhalt the MEs */
899
- sdma_v4_0_enable(adev, true);
900
- /* enable sdma ring preemption */
901
- sdma_v4_0_ctx_switch_enable(adev, true);
1494
+ for (i = 0; i < adev->sdma.num_instances; i++) {
1495
+ ring = &adev->sdma.instance[i].ring;
9021496
903
- /* start the gfx rings and rlc compute queues */
904
- r = sdma_v4_0_gfx_resume(adev);
905
- if (r)
906
- return r;
907
- r = sdma_v4_0_rlc_resume(adev);
1497
+ r = amdgpu_ring_test_helper(ring);
1498
+ if (r)
1499
+ return r;
1500
+
1501
+ if (adev->sdma.has_page_queue) {
1502
+ struct amdgpu_ring *page = &adev->sdma.instance[i].page;
1503
+
1504
+ r = amdgpu_ring_test_helper(page);
1505
+ if (r)
1506
+ return r;
1507
+
1508
+ if (adev->mman.buffer_funcs_ring == page)
1509
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
1510
+ }
1511
+
1512
+ if (adev->mman.buffer_funcs_ring == ring)
1513
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
1514
+ }
9081515
9091516 return r;
9101517 }
....@@ -928,21 +1535,16 @@
9281535 u64 gpu_addr;
9291536
9301537 r = amdgpu_device_wb_get(adev, &index);
931
- if (r) {
932
- dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
1538
+ if (r)
9331539 return r;
934
- }
9351540
9361541 gpu_addr = adev->wb.gpu_addr + (index * 4);
9371542 tmp = 0xCAFEDEAD;
9381543 adev->wb.wb[index] = cpu_to_le32(tmp);
9391544
9401545 r = amdgpu_ring_alloc(ring, 5);
941
- if (r) {
942
- DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
943
- amdgpu_device_wb_free(adev, index);
944
- return r;
945
- }
1546
+ if (r)
1547
+ goto error_free_wb;
9461548
9471549 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
9481550 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
....@@ -956,18 +1558,14 @@
9561558 tmp = le32_to_cpu(adev->wb.wb[index]);
9571559 if (tmp == 0xDEADBEEF)
9581560 break;
959
- DRM_UDELAY(1);
1561
+ udelay(1);
9601562 }
9611563
962
- if (i < adev->usec_timeout) {
963
- DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
964
- } else {
965
- DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
966
- ring->idx, tmp);
967
- r = -EINVAL;
968
- }
1564
+ if (i >= adev->usec_timeout)
1565
+ r = -ETIMEDOUT;
1566
+
1567
+error_free_wb:
9691568 amdgpu_device_wb_free(adev, index);
970
-
9711569 return r;
9721570 }
9731571
....@@ -990,20 +1588,17 @@
9901588 u64 gpu_addr;
9911589
9921590 r = amdgpu_device_wb_get(adev, &index);
993
- if (r) {
994
- dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
1591
+ if (r)
9951592 return r;
996
- }
9971593
9981594 gpu_addr = adev->wb.gpu_addr + (index * 4);
9991595 tmp = 0xCAFEDEAD;
10001596 adev->wb.wb[index] = cpu_to_le32(tmp);
10011597 memset(&ib, 0, sizeof(ib));
1002
- r = amdgpu_ib_get(adev, NULL, 256, &ib);
1003
- if (r) {
1004
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
1598
+ r = amdgpu_ib_get(adev, NULL, 256,
1599
+ AMDGPU_IB_POOL_DIRECT, &ib);
1600
+ if (r)
10051601 goto err0;
1006
- }
10071602
10081603 ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
10091604 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
....@@ -1022,21 +1617,17 @@
10221617
10231618 r = dma_fence_wait_timeout(f, false, timeout);
10241619 if (r == 0) {
1025
- DRM_ERROR("amdgpu: IB test timed out\n");
10261620 r = -ETIMEDOUT;
10271621 goto err1;
10281622 } else if (r < 0) {
1029
- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
10301623 goto err1;
10311624 }
10321625 tmp = le32_to_cpu(adev->wb.wb[index]);
1033
- if (tmp == 0xDEADBEEF) {
1034
- DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
1626
+ if (tmp == 0xDEADBEEF)
10351627 r = 0;
1036
- } else {
1037
- DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
1628
+ else
10381629 r = -EINVAL;
1039
- }
1630
+
10401631 err1:
10411632 amdgpu_ib_free(adev, &ib, NULL);
10421633 dma_fence_put(f);
....@@ -1141,11 +1732,11 @@
11411732 */
11421733 static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
11431734 {
1144
- struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
1735
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
11451736 u32 pad_count;
11461737 int i;
11471738
1148
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
1739
+ pad_count = (-ib->length_dw) & 7;
11491740 for (i = 0; i < pad_count; i++)
11501741 if (sdma && sdma->burst_nop && (i == 0))
11511742 ib->ptr[ib->length_dw++] =
....@@ -1207,23 +1798,77 @@
12071798 sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10);
12081799 }
12091800
1801
+static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev)
1802
+{
1803
+ uint fw_version = adev->sdma.instance[0].fw_version;
1804
+
1805
+ switch (adev->asic_type) {
1806
+ case CHIP_VEGA10:
1807
+ return fw_version >= 430;
1808
+ case CHIP_VEGA12:
1809
+ /*return fw_version >= 31;*/
1810
+ return false;
1811
+ case CHIP_VEGA20:
1812
+ return fw_version >= 123;
1813
+ default:
1814
+ return false;
1815
+ }
1816
+}
1817
+
12101818 static int sdma_v4_0_early_init(void *handle)
12111819 {
12121820 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1821
+ int r;
12131822
1214
- if (adev->asic_type == CHIP_RAVEN)
1823
+ if (adev->flags & AMD_IS_APU)
12151824 adev->sdma.num_instances = 1;
1825
+ else if (adev->asic_type == CHIP_ARCTURUS)
1826
+ adev->sdma.num_instances = 8;
12161827 else
12171828 adev->sdma.num_instances = 2;
1829
+
1830
+ r = sdma_v4_0_init_microcode(adev);
1831
+ if (r) {
1832
+ DRM_ERROR("Failed to load sdma firmware!\n");
1833
+ return r;
1834
+ }
1835
+
1836
+ /* TODO: Page queue breaks driver reload under SRIOV */
1837
+ if ((adev->asic_type == CHIP_VEGA10) && amdgpu_sriov_vf((adev)))
1838
+ adev->sdma.has_page_queue = false;
1839
+ else if (sdma_v4_0_fw_support_paging_queue(adev))
1840
+ adev->sdma.has_page_queue = true;
12181841
12191842 sdma_v4_0_set_ring_funcs(adev);
12201843 sdma_v4_0_set_buffer_funcs(adev);
12211844 sdma_v4_0_set_vm_pte_funcs(adev);
12221845 sdma_v4_0_set_irq_funcs(adev);
1846
+ sdma_v4_0_set_ras_funcs(adev);
12231847
12241848 return 0;
12251849 }
12261850
1851
+static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
1852
+ void *err_data,
1853
+ struct amdgpu_iv_entry *entry);
1854
+
1855
+static int sdma_v4_0_late_init(void *handle)
1856
+{
1857
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1858
+ struct ras_ih_if ih_info = {
1859
+ .cb = sdma_v4_0_process_ras_data_cb,
1860
+ };
1861
+
1862
+ sdma_v4_0_setup_ulv(adev);
1863
+
1864
+ if (adev->sdma.funcs && adev->sdma.funcs->reset_ras_error_count)
1865
+ adev->sdma.funcs->reset_ras_error_count(adev);
1866
+
1867
+ if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
1868
+ return adev->sdma.funcs->ras_late_init(adev, &ih_info);
1869
+ else
1870
+ return 0;
1871
+}
12271872
12281873 static int sdma_v4_0_sw_init(void *handle)
12291874 {
....@@ -1232,21 +1877,21 @@
12321877 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
12331878
12341879 /* SDMA trap event */
1235
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_TRAP,
1236
- &adev->sdma.trap_irq);
1237
- if (r)
1238
- return r;
1880
+ for (i = 0; i < adev->sdma.num_instances; i++) {
1881
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
1882
+ SDMA0_4_0__SRCID__SDMA_TRAP,
1883
+ &adev->sdma.trap_irq);
1884
+ if (r)
1885
+ return r;
1886
+ }
12391887
1240
- /* SDMA trap event */
1241
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_TRAP,
1242
- &adev->sdma.trap_irq);
1243
- if (r)
1244
- return r;
1245
-
1246
- r = sdma_v4_0_init_microcode(adev);
1247
- if (r) {
1248
- DRM_ERROR("Failed to load sdma firmware!\n");
1249
- return r;
1888
+ /* SDMA SRAM ECC event */
1889
+ for (i = 0; i < adev->sdma.num_instances; i++) {
1890
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
1891
+ SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
1892
+ &adev->sdma.ecc_irq);
1893
+ if (r)
1894
+ return r;
12501895 }
12511896
12521897 for (i = 0; i < adev->sdma.num_instances; i++) {
....@@ -1254,21 +1899,38 @@
12541899 ring->ring_obj = NULL;
12551900 ring->use_doorbell = true;
12561901
1257
- DRM_INFO("use_doorbell being set to: [%s]\n",
1902
+ DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
12581903 ring->use_doorbell?"true":"false");
12591904
1260
- ring->doorbell_index = (i == 0) ?
1261
- (AMDGPU_DOORBELL64_sDMA_ENGINE0 << 1) //get DWORD offset
1262
- : (AMDGPU_DOORBELL64_sDMA_ENGINE1 << 1); // get DWORD offset
1905
+ /* doorbell size is 2 dwords, get DWORD offset */
1906
+ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
12631907
12641908 sprintf(ring->name, "sdma%d", i);
1265
- r = amdgpu_ring_init(adev, ring, 1024,
1266
- &adev->sdma.trap_irq,
1267
- (i == 0) ?
1268
- AMDGPU_SDMA_IRQ_TRAP0 :
1269
- AMDGPU_SDMA_IRQ_TRAP1);
1909
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
1910
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
1911
+ AMDGPU_RING_PRIO_DEFAULT);
12701912 if (r)
12711913 return r;
1914
+
1915
+ if (adev->sdma.has_page_queue) {
1916
+ ring = &adev->sdma.instance[i].page;
1917
+ ring->ring_obj = NULL;
1918
+ ring->use_doorbell = true;
1919
+
1920
+ /* paging queue use same doorbell index/routing as gfx queue
1921
+ * with 0x400 (4096 dwords) offset on second doorbell page
1922
+ */
1923
+ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
1924
+ ring->doorbell_index += 0x400;
1925
+
1926
+ sprintf(ring->name, "page%d", i);
1927
+ r = amdgpu_ring_init(adev, ring, 1024,
1928
+ &adev->sdma.trap_irq,
1929
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
1930
+ AMDGPU_RING_PRIO_DEFAULT);
1931
+ if (r)
1932
+ return r;
1933
+ }
12721934 }
12731935
12741936 return r;
....@@ -1279,13 +1941,16 @@
12791941 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
12801942 int i;
12811943
1282
- for (i = 0; i < adev->sdma.num_instances; i++)
1283
- amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1944
+ if (adev->sdma.funcs && adev->sdma.funcs->ras_fini)
1945
+ adev->sdma.funcs->ras_fini(adev);
12841946
12851947 for (i = 0; i < adev->sdma.num_instances; i++) {
1286
- release_firmware(adev->sdma.instance[i].fw);
1287
- adev->sdma.instance[i].fw = NULL;
1948
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1949
+ if (adev->sdma.has_page_queue)
1950
+ amdgpu_ring_fini(&adev->sdma.instance[i].page);
12881951 }
1952
+
1953
+ sdma_v4_0_destroy_inst_ctx(adev);
12891954
12901955 return 0;
12911956 }
....@@ -1295,7 +1960,11 @@
12951960 int r;
12961961 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
12971962
1298
- sdma_v4_0_init_golden_registers(adev);
1963
+ if (adev->flags & AMD_IS_APU)
1964
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
1965
+
1966
+ if (!amdgpu_sriov_vf(adev))
1967
+ sdma_v4_0_init_golden_registers(adev);
12991968
13001969 r = sdma_v4_0_start(adev);
13011970
....@@ -1305,12 +1974,23 @@
13051974 static int sdma_v4_0_hw_fini(void *handle)
13061975 {
13071976 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1977
+ int i;
13081978
13091979 if (amdgpu_sriov_vf(adev))
13101980 return 0;
13111981
1982
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
1983
+ for (i = 0; i < adev->sdma.num_instances; i++) {
1984
+ amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
1985
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
1986
+ }
1987
+ }
1988
+
13121989 sdma_v4_0_ctx_switch_enable(adev, false);
13131990 sdma_v4_0_enable(adev, false);
1991
+
1992
+ if (adev->flags & AMD_IS_APU)
1993
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
13141994
13151995 return 0;
13161996 }
....@@ -1335,7 +2015,7 @@
13352015 u32 i;
13362016
13372017 for (i = 0; i < adev->sdma.num_instances; i++) {
1338
- u32 tmp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG));
2018
+ u32 tmp = RREG32_SDMA(i, mmSDMA0_STATUS_REG);
13392019
13402020 if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
13412021 return false;
....@@ -1346,15 +2026,17 @@
13462026
13472027 static int sdma_v4_0_wait_for_idle(void *handle)
13482028 {
1349
- unsigned i;
1350
- u32 sdma0, sdma1;
2029
+ unsigned i, j;
2030
+ u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
13512031 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
13522032
13532033 for (i = 0; i < adev->usec_timeout; i++) {
1354
- sdma0 = RREG32(sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
1355
- sdma1 = RREG32(sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG));
1356
-
1357
- if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
2034
+ for (j = 0; j < adev->sdma.num_instances; j++) {
2035
+ sdma[j] = RREG32_SDMA(j, mmSDMA0_STATUS_REG);
2036
+ if (!(sdma[j] & SDMA0_STATUS_REG__IDLE_MASK))
2037
+ break;
2038
+ }
2039
+ if (j == adev->sdma.num_instances)
13582040 return 0;
13592041 udelay(1);
13602042 }
....@@ -1375,14 +2057,10 @@
13752057 {
13762058 u32 sdma_cntl;
13772059
1378
- u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ?
1379
- sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) :
1380
- sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_CNTL);
1381
-
1382
- sdma_cntl = RREG32(reg_offset);
2060
+ sdma_cntl = RREG32_SDMA(type, mmSDMA0_CNTL);
13832061 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
13842062 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
1385
- WREG32(reg_offset, sdma_cntl);
2063
+ WREG32_SDMA(type, mmSDMA0_CNTL, sdma_cntl);
13862064
13872065 return 0;
13882066 }
....@@ -1391,114 +2069,121 @@
13912069 struct amdgpu_irq_src *source,
13922070 struct amdgpu_iv_entry *entry)
13932071 {
2072
+ uint32_t instance;
2073
+
13942074 DRM_DEBUG("IH: SDMA trap\n");
1395
- switch (entry->client_id) {
1396
- case SOC15_IH_CLIENTID_SDMA0:
1397
- switch (entry->ring_id) {
1398
- case 0:
1399
- amdgpu_fence_process(&adev->sdma.instance[0].ring);
1400
- break;
1401
- case 1:
1402
- /* XXX compute */
1403
- break;
1404
- case 2:
1405
- /* XXX compute */
1406
- break;
1407
- case 3:
1408
- /* XXX page queue*/
1409
- break;
1410
- }
2075
+ instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
2076
+ switch (entry->ring_id) {
2077
+ case 0:
2078
+ amdgpu_fence_process(&adev->sdma.instance[instance].ring);
14112079 break;
1412
- case SOC15_IH_CLIENTID_SDMA1:
1413
- switch (entry->ring_id) {
1414
- case 0:
1415
- amdgpu_fence_process(&adev->sdma.instance[1].ring);
1416
- break;
1417
- case 1:
1418
- /* XXX compute */
1419
- break;
1420
- case 2:
1421
- /* XXX compute */
1422
- break;
1423
- case 3:
1424
- /* XXX page queue*/
1425
- break;
1426
- }
2080
+ case 1:
2081
+ if (adev->asic_type == CHIP_VEGA20)
2082
+ amdgpu_fence_process(&adev->sdma.instance[instance].page);
2083
+ break;
2084
+ case 2:
2085
+ /* XXX compute */
2086
+ break;
2087
+ case 3:
2088
+ if (adev->asic_type != CHIP_VEGA20)
2089
+ amdgpu_fence_process(&adev->sdma.instance[instance].page);
14272090 break;
14282091 }
14292092 return 0;
2093
+}
2094
+
2095
+static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
2096
+ void *err_data,
2097
+ struct amdgpu_iv_entry *entry)
2098
+{
2099
+ int instance;
2100
+
2101
+ /* When “Full RAS” is enabled, the per-IP interrupt sources should
2102
+ * be disabled and the driver should only look for the aggregated
2103
+ * interrupt via sync flood
2104
+ */
2105
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
2106
+ goto out;
2107
+
2108
+ instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
2109
+ if (instance < 0)
2110
+ goto out;
2111
+
2112
+ amdgpu_sdma_process_ras_data_cb(adev, err_data, entry);
2113
+
2114
+out:
2115
+ return AMDGPU_RAS_SUCCESS;
14302116 }
14312117
14322118 static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
14332119 struct amdgpu_irq_src *source,
14342120 struct amdgpu_iv_entry *entry)
14352121 {
2122
+ int instance;
2123
+
14362124 DRM_ERROR("Illegal instruction in SDMA command stream\n");
1437
- schedule_work(&adev->reset_work);
2125
+
2126
+ instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
2127
+ if (instance < 0)
2128
+ return 0;
2129
+
2130
+ switch (entry->ring_id) {
2131
+ case 0:
2132
+ drm_sched_fault(&adev->sdma.instance[instance].ring.sched);
2133
+ break;
2134
+ }
14382135 return 0;
14392136 }
14402137
2138
+static int sdma_v4_0_set_ecc_irq_state(struct amdgpu_device *adev,
2139
+ struct amdgpu_irq_src *source,
2140
+ unsigned type,
2141
+ enum amdgpu_interrupt_state state)
2142
+{
2143
+ u32 sdma_edc_config;
2144
+
2145
+ sdma_edc_config = RREG32_SDMA(type, mmSDMA0_EDC_CONFIG);
2146
+ sdma_edc_config = REG_SET_FIELD(sdma_edc_config, SDMA0_EDC_CONFIG, ECC_INT_ENABLE,
2147
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
2148
+ WREG32_SDMA(type, mmSDMA0_EDC_CONFIG, sdma_edc_config);
2149
+
2150
+ return 0;
2151
+}
14412152
14422153 static void sdma_v4_0_update_medium_grain_clock_gating(
14432154 struct amdgpu_device *adev,
14442155 bool enable)
14452156 {
14462157 uint32_t data, def;
2158
+ int i;
14472159
14482160 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
1449
- /* enable sdma0 clock gating */
1450
- def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL));
1451
- data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
1452
- SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
1453
- SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
1454
- SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
1455
- SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
1456
- SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
1457
- SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
1458
- SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
1459
- if (def != data)
1460
- WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data);
1461
-
1462
- if (adev->sdma.num_instances > 1) {
1463
- def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL));
1464
- data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK |
1465
- SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK |
1466
- SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK |
1467
- SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK |
1468
- SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK |
1469
- SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK |
1470
- SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK |
1471
- SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK);
2161
+ for (i = 0; i < adev->sdma.num_instances; i++) {
2162
+ def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL);
2163
+ data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
2164
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
2165
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
2166
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
2167
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
2168
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
2169
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
2170
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
14722171 if (def != data)
1473
- WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data);
2172
+ WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data);
14742173 }
14752174 } else {
1476
- /* disable sdma0 clock gating */
1477
- def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL));
1478
- data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
1479
- SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
1480
- SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
1481
- SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
1482
- SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
1483
- SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
1484
- SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
1485
- SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
1486
-
1487
- if (def != data)
1488
- WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data);
1489
-
1490
- if (adev->sdma.num_instances > 1) {
1491
- def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL));
1492
- data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK |
1493
- SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK |
1494
- SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK |
1495
- SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK |
1496
- SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK |
1497
- SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK |
1498
- SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK |
1499
- SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK);
2175
+ for (i = 0; i < adev->sdma.num_instances; i++) {
2176
+ def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL);
2177
+ data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
2178
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
2179
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
2180
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
2181
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
2182
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
2183
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
2184
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
15002185 if (def != data)
1501
- WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data);
2186
+ WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data);
15022187 }
15032188 }
15042189 }
....@@ -1509,34 +2194,23 @@
15092194 bool enable)
15102195 {
15112196 uint32_t data, def;
2197
+ int i;
15122198
15132199 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
1514
- /* 1-not override: enable sdma0 mem light sleep */
1515
- def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
1516
- data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1517
- if (def != data)
1518
- WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
1519
-
1520
- /* 1-not override: enable sdma1 mem light sleep */
1521
- if (adev->sdma.num_instances > 1) {
1522
- def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
1523
- data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
2200
+ for (i = 0; i < adev->sdma.num_instances; i++) {
2201
+ /* 1-not override: enable sdma mem light sleep */
2202
+ def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL);
2203
+ data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
15242204 if (def != data)
1525
- WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data);
2205
+ WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data);
15262206 }
15272207 } else {
1528
- /* 0-override:disable sdma0 mem light sleep */
1529
- def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
1530
- data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1531
- if (def != data)
1532
- WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
1533
-
1534
- /* 0-override:disable sdma1 mem light sleep */
1535
- if (adev->sdma.num_instances > 1) {
1536
- def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
1537
- data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
2208
+ for (i = 0; i < adev->sdma.num_instances; i++) {
2209
+ /* 0-override:disable sdma mem light sleep */
2210
+ def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL);
2211
+ data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
15382212 if (def != data)
1539
- WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data);
2213
+ WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data);
15402214 }
15412215 }
15422216 }
....@@ -1554,10 +2228,12 @@
15542228 case CHIP_VEGA12:
15552229 case CHIP_VEGA20:
15562230 case CHIP_RAVEN:
2231
+ case CHIP_ARCTURUS:
2232
+ case CHIP_RENOIR:
15572233 sdma_v4_0_update_medium_grain_clock_gating(adev,
1558
- state == AMD_CG_STATE_GATE ? true : false);
2234
+ state == AMD_CG_STATE_GATE);
15592235 sdma_v4_0_update_medium_grain_light_sleep(adev,
1560
- state == AMD_CG_STATE_GATE ? true : false);
2236
+ state == AMD_CG_STATE_GATE);
15612237 break;
15622238 default:
15632239 break;
....@@ -1572,6 +2248,7 @@
15722248
15732249 switch (adev->asic_type) {
15742250 case CHIP_RAVEN:
2251
+ case CHIP_RENOIR:
15752252 sdma_v4_1_update_power_gating(adev,
15762253 state == AMD_PG_STATE_GATE ? true : false);
15772254 break;
....@@ -1604,7 +2281,7 @@
16042281 const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
16052282 .name = "sdma_v4_0",
16062283 .early_init = sdma_v4_0_early_init,
1607
- .late_init = NULL,
2284
+ .late_init = sdma_v4_0_late_init,
16082285 .sw_init = sdma_v4_0_sw_init,
16092286 .sw_fini = sdma_v4_0_sw_fini,
16102287 .hw_init = sdma_v4_0_hw_init,
....@@ -1624,10 +2301,110 @@
16242301 .align_mask = 0xf,
16252302 .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
16262303 .support_64bit_ptrs = true,
1627
- .vmhub = AMDGPU_MMHUB,
2304
+ .vmhub = AMDGPU_MMHUB_0,
16282305 .get_rptr = sdma_v4_0_ring_get_rptr,
16292306 .get_wptr = sdma_v4_0_ring_get_wptr,
16302307 .set_wptr = sdma_v4_0_ring_set_wptr,
2308
+ .emit_frame_size =
2309
+ 6 + /* sdma_v4_0_ring_emit_hdp_flush */
2310
+ 3 + /* hdp invalidate */
2311
+ 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
2312
+ /* sdma_v4_0_ring_emit_vm_flush */
2313
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
2314
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
2315
+ 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
2316
+ .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
2317
+ .emit_ib = sdma_v4_0_ring_emit_ib,
2318
+ .emit_fence = sdma_v4_0_ring_emit_fence,
2319
+ .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
2320
+ .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
2321
+ .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
2322
+ .test_ring = sdma_v4_0_ring_test_ring,
2323
+ .test_ib = sdma_v4_0_ring_test_ib,
2324
+ .insert_nop = sdma_v4_0_ring_insert_nop,
2325
+ .pad_ib = sdma_v4_0_ring_pad_ib,
2326
+ .emit_wreg = sdma_v4_0_ring_emit_wreg,
2327
+ .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
2328
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
2329
+};
2330
+
2331
+/*
2332
+ * On Arcturus, SDMA instance 5~7 has a different vmhub type(AMDGPU_MMHUB_1).
2333
+ * So create a individual constant ring_funcs for those instances.
2334
+ */
2335
+static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = {
2336
+ .type = AMDGPU_RING_TYPE_SDMA,
2337
+ .align_mask = 0xf,
2338
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
2339
+ .support_64bit_ptrs = true,
2340
+ .vmhub = AMDGPU_MMHUB_1,
2341
+ .get_rptr = sdma_v4_0_ring_get_rptr,
2342
+ .get_wptr = sdma_v4_0_ring_get_wptr,
2343
+ .set_wptr = sdma_v4_0_ring_set_wptr,
2344
+ .emit_frame_size =
2345
+ 6 + /* sdma_v4_0_ring_emit_hdp_flush */
2346
+ 3 + /* hdp invalidate */
2347
+ 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
2348
+ /* sdma_v4_0_ring_emit_vm_flush */
2349
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
2350
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
2351
+ 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
2352
+ .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
2353
+ .emit_ib = sdma_v4_0_ring_emit_ib,
2354
+ .emit_fence = sdma_v4_0_ring_emit_fence,
2355
+ .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
2356
+ .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
2357
+ .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
2358
+ .test_ring = sdma_v4_0_ring_test_ring,
2359
+ .test_ib = sdma_v4_0_ring_test_ib,
2360
+ .insert_nop = sdma_v4_0_ring_insert_nop,
2361
+ .pad_ib = sdma_v4_0_ring_pad_ib,
2362
+ .emit_wreg = sdma_v4_0_ring_emit_wreg,
2363
+ .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
2364
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
2365
+};
2366
+
2367
+static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {
2368
+ .type = AMDGPU_RING_TYPE_SDMA,
2369
+ .align_mask = 0xf,
2370
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
2371
+ .support_64bit_ptrs = true,
2372
+ .vmhub = AMDGPU_MMHUB_0,
2373
+ .get_rptr = sdma_v4_0_ring_get_rptr,
2374
+ .get_wptr = sdma_v4_0_page_ring_get_wptr,
2375
+ .set_wptr = sdma_v4_0_page_ring_set_wptr,
2376
+ .emit_frame_size =
2377
+ 6 + /* sdma_v4_0_ring_emit_hdp_flush */
2378
+ 3 + /* hdp invalidate */
2379
+ 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
2380
+ /* sdma_v4_0_ring_emit_vm_flush */
2381
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
2382
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
2383
+ 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
2384
+ .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
2385
+ .emit_ib = sdma_v4_0_ring_emit_ib,
2386
+ .emit_fence = sdma_v4_0_ring_emit_fence,
2387
+ .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
2388
+ .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
2389
+ .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
2390
+ .test_ring = sdma_v4_0_ring_test_ring,
2391
+ .test_ib = sdma_v4_0_ring_test_ib,
2392
+ .insert_nop = sdma_v4_0_ring_insert_nop,
2393
+ .pad_ib = sdma_v4_0_ring_pad_ib,
2394
+ .emit_wreg = sdma_v4_0_ring_emit_wreg,
2395
+ .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
2396
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
2397
+};
2398
+
2399
+static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = {
2400
+ .type = AMDGPU_RING_TYPE_SDMA,
2401
+ .align_mask = 0xf,
2402
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
2403
+ .support_64bit_ptrs = true,
2404
+ .vmhub = AMDGPU_MMHUB_1,
2405
+ .get_rptr = sdma_v4_0_ring_get_rptr,
2406
+ .get_wptr = sdma_v4_0_page_ring_get_wptr,
2407
+ .set_wptr = sdma_v4_0_page_ring_set_wptr,
16312408 .emit_frame_size =
16322409 6 + /* sdma_v4_0_ring_emit_hdp_flush */
16332410 3 + /* hdp invalidate */
....@@ -1656,8 +2433,22 @@
16562433 int i;
16572434
16582435 for (i = 0; i < adev->sdma.num_instances; i++) {
1659
- adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs;
2436
+ if (adev->asic_type == CHIP_ARCTURUS && i >= 5)
2437
+ adev->sdma.instance[i].ring.funcs =
2438
+ &sdma_v4_0_ring_funcs_2nd_mmhub;
2439
+ else
2440
+ adev->sdma.instance[i].ring.funcs =
2441
+ &sdma_v4_0_ring_funcs;
16602442 adev->sdma.instance[i].ring.me = i;
2443
+ if (adev->sdma.has_page_queue) {
2444
+ if (adev->asic_type == CHIP_ARCTURUS && i >= 5)
2445
+ adev->sdma.instance[i].page.funcs =
2446
+ &sdma_v4_0_page_ring_funcs_2nd_mmhub;
2447
+ else
2448
+ adev->sdma.instance[i].page.funcs =
2449
+ &sdma_v4_0_page_ring_funcs;
2450
+ adev->sdma.instance[i].page.me = i;
2451
+ }
16612452 }
16622453 }
16632454
....@@ -1670,11 +2461,33 @@
16702461 .process = sdma_v4_0_process_illegal_inst_irq,
16712462 };
16722463
2464
+static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = {
2465
+ .set = sdma_v4_0_set_ecc_irq_state,
2466
+ .process = amdgpu_sdma_process_ecc_irq,
2467
+};
2468
+
2469
+
2470
+
16732471 static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
16742472 {
1675
- adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
2473
+ switch (adev->sdma.num_instances) {
2474
+ case 1:
2475
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
2476
+ adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
2477
+ break;
2478
+ case 8:
2479
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
2480
+ adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
2481
+ break;
2482
+ case 2:
2483
+ default:
2484
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
2485
+ adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
2486
+ break;
2487
+ }
16762488 adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;
16772489 adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs;
2490
+ adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs;
16782491 }
16792492
16802493 /**
....@@ -1692,10 +2505,12 @@
16922505 static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib,
16932506 uint64_t src_offset,
16942507 uint64_t dst_offset,
1695
- uint32_t byte_count)
2508
+ uint32_t byte_count,
2509
+ bool tmz)
16962510 {
16972511 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
1698
- SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
2512
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
2513
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
16992514 ib->ptr[ib->length_dw++] = byte_count - 1;
17002515 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
17012516 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
....@@ -1738,10 +2553,11 @@
17382553
17392554 static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)
17402555 {
1741
- if (adev->mman.buffer_funcs == NULL) {
1742
- adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs;
2556
+ adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs;
2557
+ if (adev->sdma.has_page_queue)
2558
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page;
2559
+ else
17432560 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
1744
- }
17452561 }
17462562
17472563 static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
....@@ -1754,15 +2570,93 @@
17542570
17552571 static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
17562572 {
2573
+ struct drm_gpu_scheduler *sched;
17572574 unsigned i;
17582575
1759
- if (adev->vm_manager.vm_pte_funcs == NULL) {
1760
- adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
1761
- for (i = 0; i < adev->sdma.num_instances; i++)
1762
- adev->vm_manager.vm_pte_rings[i] =
1763
- &adev->sdma.instance[i].ring;
2576
+ adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
2577
+ for (i = 0; i < adev->sdma.num_instances; i++) {
2578
+ if (adev->sdma.has_page_queue)
2579
+ sched = &adev->sdma.instance[i].page.sched;
2580
+ else
2581
+ sched = &adev->sdma.instance[i].ring.sched;
2582
+ adev->vm_manager.vm_pte_scheds[i] = sched;
2583
+ }
2584
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
2585
+}
17642586
1765
- adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
2587
+static void sdma_v4_0_get_ras_error_count(uint32_t value,
2588
+ uint32_t instance,
2589
+ uint32_t *sec_count)
2590
+{
2591
+ uint32_t i;
2592
+ uint32_t sec_cnt;
2593
+
2594
+ /* double bits error (multiple bits) error detection is not supported */
2595
+ for (i = 0; i < ARRAY_SIZE(sdma_v4_0_ras_fields); i++) {
2596
+ /* the SDMA_EDC_COUNTER register in each sdma instance
2597
+ * shares the same sed shift_mask
2598
+ * */
2599
+ sec_cnt = (value &
2600
+ sdma_v4_0_ras_fields[i].sec_count_mask) >>
2601
+ sdma_v4_0_ras_fields[i].sec_count_shift;
2602
+ if (sec_cnt) {
2603
+ DRM_INFO("Detected %s in SDMA%d, SED %d\n",
2604
+ sdma_v4_0_ras_fields[i].name,
2605
+ instance, sec_cnt);
2606
+ *sec_count += sec_cnt;
2607
+ }
2608
+ }
2609
+}
2610
+
2611
+static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
2612
+ uint32_t instance, void *ras_error_status)
2613
+{
2614
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
2615
+ uint32_t sec_count = 0;
2616
+ uint32_t reg_value = 0;
2617
+
2618
+ reg_value = RREG32_SDMA(instance, mmSDMA0_EDC_COUNTER);
2619
+ /* double bit error is not supported */
2620
+ if (reg_value)
2621
+ sdma_v4_0_get_ras_error_count(reg_value,
2622
+ instance, &sec_count);
2623
+ /* err_data->ce_count should be initialized to 0
2624
+ * before calling into this function */
2625
+ err_data->ce_count += sec_count;
2626
+ /* double bit error is not supported
2627
+ * set ue count to 0 */
2628
+ err_data->ue_count = 0;
2629
+
2630
+ return 0;
2631
+};
2632
+
2633
+static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
2634
+{
2635
+ int i;
2636
+
2637
+ /* read back edc counter registers to clear the counters */
2638
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
2639
+ for (i = 0; i < adev->sdma.num_instances; i++)
2640
+ RREG32_SDMA(i, mmSDMA0_EDC_COUNTER);
2641
+ }
2642
+}
2643
+
2644
+static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = {
2645
+ .ras_late_init = amdgpu_sdma_ras_late_init,
2646
+ .ras_fini = amdgpu_sdma_ras_fini,
2647
+ .query_ras_error_count = sdma_v4_0_query_ras_error_count,
2648
+ .reset_ras_error_count = sdma_v4_0_reset_ras_error_count,
2649
+};
2650
+
2651
+static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
2652
+{
2653
+ switch (adev->asic_type) {
2654
+ case CHIP_VEGA20:
2655
+ case CHIP_ARCTURUS:
2656
+ adev->sdma.funcs = &sdma_v4_0_ras_funcs;
2657
+ break;
2658
+ default:
2659
+ break;
17662660 }
17672661 }
17682662