hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
....@@ -20,9 +20,13 @@
2020 * OTHER DEALINGS IN THE SOFTWARE.
2121 *
2222 */
23
+
24
+#include <linux/delay.h>
2325 #include <linux/kernel.h>
2426 #include <linux/firmware.h>
25
-#include <drm/drmP.h>
27
+#include <linux/module.h>
28
+#include <linux/pci.h>
29
+
2630 #include "amdgpu.h"
2731 #include "amdgpu_gfx.h"
2832 #include "vi.h"
....@@ -44,7 +48,6 @@
4448 #include "gca/gfx_8_0_d.h"
4549 #include "gca/gfx_8_0_enum.h"
4650 #include "gca/gfx_8_0_sh_mask.h"
47
-#include "gca/gfx_8_0_enum.h"
4851
4952 #include "dce/dce_10_0_d.h"
5053 #include "dce/dce_10_0_sh_mask.h"
....@@ -54,7 +57,7 @@
5457 #include "ivsrcid/ivsrcid_vislands30.h"
5558
5659 #define GFX8_NUM_GFX_RINGS 1
57
-#define GFX8_MEC_HPD_SIZE 2048
60
+#define GFX8_MEC_HPD_SIZE 4096
5861
5962 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
6063 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
....@@ -839,18 +842,14 @@
839842 int r;
840843
841844 r = amdgpu_gfx_scratch_get(adev, &scratch);
842
- if (r) {
843
- DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
845
+ if (r)
844846 return r;
845
- }
847
+
846848 WREG32(scratch, 0xCAFEDEAD);
847849 r = amdgpu_ring_alloc(ring, 3);
848
- if (r) {
849
- DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
850
- ring->idx, r);
851
- amdgpu_gfx_scratch_free(adev, scratch);
852
- return r;
853
- }
850
+ if (r)
851
+ goto error_free_scratch;
852
+
854853 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
855854 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
856855 amdgpu_ring_write(ring, 0xDEADBEEF);
....@@ -860,16 +859,13 @@
860859 tmp = RREG32(scratch);
861860 if (tmp == 0xDEADBEEF)
862861 break;
863
- DRM_UDELAY(1);
862
+ udelay(1);
864863 }
865
- if (i < adev->usec_timeout) {
866
- DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
867
- ring->idx, i);
868
- } else {
869
- DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
870
- ring->idx, scratch, tmp);
871
- r = -EINVAL;
872
- }
864
+
865
+ if (i >= adev->usec_timeout)
866
+ r = -ETIMEDOUT;
867
+
868
+error_free_scratch:
873869 amdgpu_gfx_scratch_free(adev, scratch);
874870 return r;
875871 }
....@@ -886,19 +882,17 @@
886882 long r;
887883
888884 r = amdgpu_device_wb_get(adev, &index);
889
- if (r) {
890
- dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
885
+ if (r)
891886 return r;
892
- }
893887
894888 gpu_addr = adev->wb.gpu_addr + (index * 4);
895889 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
896890 memset(&ib, 0, sizeof(ib));
897
- r = amdgpu_ib_get(adev, NULL, 16, &ib);
898
- if (r) {
899
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
891
+ r = amdgpu_ib_get(adev, NULL, 16,
892
+ AMDGPU_IB_POOL_DIRECT, &ib);
893
+ if (r)
900894 goto err1;
901
- }
895
+
902896 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
903897 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
904898 ib.ptr[2] = lower_32_bits(gpu_addr);
....@@ -912,22 +906,17 @@
912906
913907 r = dma_fence_wait_timeout(f, false, timeout);
914908 if (r == 0) {
915
- DRM_ERROR("amdgpu: IB test timed out.\n");
916909 r = -ETIMEDOUT;
917910 goto err2;
918911 } else if (r < 0) {
919
- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
920912 goto err2;
921913 }
922914
923915 tmp = adev->wb.wb[index];
924
- if (tmp == 0xDEADBEEF) {
925
- DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
916
+ if (tmp == 0xDEADBEEF)
926917 r = 0;
927
- } else {
928
- DRM_ERROR("ib test on ring %d failed\n", ring->idx);
918
+ else
929919 r = -EINVAL;
930
- }
931920
932921 err2:
933922 amdgpu_ib_free(adev, &ib, NULL);
....@@ -1114,14 +1103,14 @@
11141103
11151104 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
11161105 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1117
- for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1106
+ for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
11181107 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
11191108
11201109 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
11211110
11221111 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
11231112 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1124
- for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1113
+ for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
11251114 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
11261115
11271116 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
....@@ -1173,64 +1162,61 @@
11731162 }
11741163 }
11751164
1176
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1177
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1178
- info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1179
- info->fw = adev->gfx.pfp_fw;
1180
- header = (const struct common_firmware_header *)info->fw->data;
1181
- adev->firmware.fw_size +=
1182
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1165
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1166
+ info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1167
+ info->fw = adev->gfx.pfp_fw;
1168
+ header = (const struct common_firmware_header *)info->fw->data;
1169
+ adev->firmware.fw_size +=
1170
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
11831171
1184
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1185
- info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1186
- info->fw = adev->gfx.me_fw;
1187
- header = (const struct common_firmware_header *)info->fw->data;
1188
- adev->firmware.fw_size +=
1189
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1172
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1173
+ info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1174
+ info->fw = adev->gfx.me_fw;
1175
+ header = (const struct common_firmware_header *)info->fw->data;
1176
+ adev->firmware.fw_size +=
1177
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
11901178
1191
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1192
- info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1193
- info->fw = adev->gfx.ce_fw;
1194
- header = (const struct common_firmware_header *)info->fw->data;
1195
- adev->firmware.fw_size +=
1196
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1179
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1180
+ info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1181
+ info->fw = adev->gfx.ce_fw;
1182
+ header = (const struct common_firmware_header *)info->fw->data;
1183
+ adev->firmware.fw_size +=
1184
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
11971185
1198
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1199
- info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1200
- info->fw = adev->gfx.rlc_fw;
1201
- header = (const struct common_firmware_header *)info->fw->data;
1202
- adev->firmware.fw_size +=
1203
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1186
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1187
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1188
+ info->fw = adev->gfx.rlc_fw;
1189
+ header = (const struct common_firmware_header *)info->fw->data;
1190
+ adev->firmware.fw_size +=
1191
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
12041192
1205
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1206
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1193
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1194
+ info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1195
+ info->fw = adev->gfx.mec_fw;
1196
+ header = (const struct common_firmware_header *)info->fw->data;
1197
+ adev->firmware.fw_size +=
1198
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1199
+
1200
+ /* we need account JT in */
1201
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1202
+ adev->firmware.fw_size +=
1203
+ ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1204
+
1205
+ if (amdgpu_sriov_vf(adev)) {
1206
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1207
+ info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
12071208 info->fw = adev->gfx.mec_fw;
1209
+ adev->firmware.fw_size +=
1210
+ ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1211
+ }
1212
+
1213
+ if (adev->gfx.mec2_fw) {
1214
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1215
+ info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1216
+ info->fw = adev->gfx.mec2_fw;
12081217 header = (const struct common_firmware_header *)info->fw->data;
12091218 adev->firmware.fw_size +=
12101219 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1211
-
1212
- /* we need account JT in */
1213
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1214
- adev->firmware.fw_size +=
1215
- ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1216
-
1217
- if (amdgpu_sriov_vf(adev)) {
1218
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1219
- info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1220
- info->fw = adev->gfx.mec_fw;
1221
- adev->firmware.fw_size +=
1222
- ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1223
- }
1224
-
1225
- if (adev->gfx.mec2_fw) {
1226
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1227
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1228
- info->fw = adev->gfx.mec2_fw;
1229
- header = (const struct common_firmware_header *)info->fw->data;
1230
- adev->firmware.fw_size +=
1231
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1232
- }
1233
-
12341220 }
12351221
12361222 out:
....@@ -1301,81 +1287,16 @@
13011287 buffer[count++] = cpu_to_le32(0);
13021288 }
13031289
1304
-static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1290
+static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
13051291 {
1306
- const __le32 *fw_data;
1307
- volatile u32 *dst_ptr;
1308
- int me, i, max_me = 4;
1309
- u32 bo_offset = 0;
1310
- u32 table_offset, table_size;
1311
-
13121292 if (adev->asic_type == CHIP_CARRIZO)
1313
- max_me = 5;
1314
-
1315
- /* write the cp table buffer */
1316
- dst_ptr = adev->gfx.rlc.cp_table_ptr;
1317
- for (me = 0; me < max_me; me++) {
1318
- if (me == 0) {
1319
- const struct gfx_firmware_header_v1_0 *hdr =
1320
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1321
- fw_data = (const __le32 *)
1322
- (adev->gfx.ce_fw->data +
1323
- le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1324
- table_offset = le32_to_cpu(hdr->jt_offset);
1325
- table_size = le32_to_cpu(hdr->jt_size);
1326
- } else if (me == 1) {
1327
- const struct gfx_firmware_header_v1_0 *hdr =
1328
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1329
- fw_data = (const __le32 *)
1330
- (adev->gfx.pfp_fw->data +
1331
- le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1332
- table_offset = le32_to_cpu(hdr->jt_offset);
1333
- table_size = le32_to_cpu(hdr->jt_size);
1334
- } else if (me == 2) {
1335
- const struct gfx_firmware_header_v1_0 *hdr =
1336
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1337
- fw_data = (const __le32 *)
1338
- (adev->gfx.me_fw->data +
1339
- le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1340
- table_offset = le32_to_cpu(hdr->jt_offset);
1341
- table_size = le32_to_cpu(hdr->jt_size);
1342
- } else if (me == 3) {
1343
- const struct gfx_firmware_header_v1_0 *hdr =
1344
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1345
- fw_data = (const __le32 *)
1346
- (adev->gfx.mec_fw->data +
1347
- le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1348
- table_offset = le32_to_cpu(hdr->jt_offset);
1349
- table_size = le32_to_cpu(hdr->jt_size);
1350
- } else if (me == 4) {
1351
- const struct gfx_firmware_header_v1_0 *hdr =
1352
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1353
- fw_data = (const __le32 *)
1354
- (adev->gfx.mec2_fw->data +
1355
- le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1356
- table_offset = le32_to_cpu(hdr->jt_offset);
1357
- table_size = le32_to_cpu(hdr->jt_size);
1358
- }
1359
-
1360
- for (i = 0; i < table_size; i ++) {
1361
- dst_ptr[bo_offset + i] =
1362
- cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1363
- }
1364
-
1365
- bo_offset += table_size;
1366
- }
1367
-}
1368
-
1369
-static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1370
-{
1371
- amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1372
- amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1293
+ return 5;
1294
+ else
1295
+ return 4;
13731296 }
13741297
13751298 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
13761299 {
1377
- volatile u32 *dst_ptr;
1378
- u32 dws;
13791300 const struct cs_section_def *cs_data;
13801301 int r;
13811302
....@@ -1384,45 +1305,23 @@
13841305 cs_data = adev->gfx.rlc.cs_data;
13851306
13861307 if (cs_data) {
1387
- /* clear state block */
1388
- adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1389
-
1390
- r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1391
- AMDGPU_GEM_DOMAIN_VRAM,
1392
- &adev->gfx.rlc.clear_state_obj,
1393
- &adev->gfx.rlc.clear_state_gpu_addr,
1394
- (void **)&adev->gfx.rlc.cs_ptr);
1395
- if (r) {
1396
- dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1397
- gfx_v8_0_rlc_fini(adev);
1308
+ /* init clear state block */
1309
+ r = amdgpu_gfx_rlc_init_csb(adev);
1310
+ if (r)
13981311 return r;
1399
- }
1400
-
1401
- /* set up the cs buffer */
1402
- dst_ptr = adev->gfx.rlc.cs_ptr;
1403
- gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1404
- amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1405
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
14061312 }
14071313
14081314 if ((adev->asic_type == CHIP_CARRIZO) ||
14091315 (adev->asic_type == CHIP_STONEY)) {
14101316 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1411
- r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1412
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1413
- &adev->gfx.rlc.cp_table_obj,
1414
- &adev->gfx.rlc.cp_table_gpu_addr,
1415
- (void **)&adev->gfx.rlc.cp_table_ptr);
1416
- if (r) {
1417
- dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1317
+ r = amdgpu_gfx_rlc_init_cpt(adev);
1318
+ if (r)
14181319 return r;
1419
- }
1420
-
1421
- cz_init_cp_jump_table(adev);
1422
-
1423
- amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1424
- amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
14251320 }
1321
+
1322
+ /* init spm vmid with 0xf */
1323
+ if (adev->gfx.rlc.funcs->update_spm_vmid)
1324
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
14261325
14271326 return 0;
14281327 }
....@@ -1444,21 +1343,22 @@
14441343 amdgpu_gfx_compute_queue_acquire(adev);
14451344
14461345 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1346
+ if (mec_hpd_size) {
1347
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1348
+ AMDGPU_GEM_DOMAIN_VRAM,
1349
+ &adev->gfx.mec.hpd_eop_obj,
1350
+ &adev->gfx.mec.hpd_eop_gpu_addr,
1351
+ (void **)&hpd);
1352
+ if (r) {
1353
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1354
+ return r;
1355
+ }
14471356
1448
- r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1449
- AMDGPU_GEM_DOMAIN_GTT,
1450
- &adev->gfx.mec.hpd_eop_obj,
1451
- &adev->gfx.mec.hpd_eop_gpu_addr,
1452
- (void **)&hpd);
1453
- if (r) {
1454
- dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1455
- return r;
1357
+ memset(hpd, 0, mec_hpd_size);
1358
+
1359
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1360
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
14561361 }
1457
-
1458
- memset(hpd, 0, mec_hpd_size);
1459
-
1460
- amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1461
- amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
14621362
14631363 return 0;
14641364 }
....@@ -1632,7 +1532,7 @@
16321532 return 0;
16331533
16341534 /* bail if the compute ring is not ready */
1635
- if (!ring->ready)
1535
+ if (!ring->sched.ready)
16361536 return 0;
16371537
16381538 tmp = RREG32(mmGB_EDC_MODE);
....@@ -1652,7 +1552,8 @@
16521552
16531553 /* allocate an indirect buffer to put the commands in */
16541554 memset(&ib, 0, sizeof(ib));
1655
- r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1555
+ r = amdgpu_ib_get(adev, NULL, total_size,
1556
+ AMDGPU_IB_POOL_DIRECT, &ib);
16561557 if (r) {
16571558 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
16581559 return r;
....@@ -1783,7 +1684,7 @@
17831684 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
17841685 {
17851686 u32 gb_addr_config;
1786
- u32 mc_shared_chmap, mc_arb_ramcfg;
1687
+ u32 mc_arb_ramcfg;
17871688 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
17881689 u32 tmp;
17891690 int ret;
....@@ -1923,9 +1824,13 @@
19231824 break;
19241825 }
19251826
1926
- mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
19271827 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
19281828 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1829
+
1830
+ adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1831
+ MC_ARB_RAMCFG, NOOFBANK);
1832
+ adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1833
+ MC_ARB_RAMCFG, NOOFRANKS);
19291834
19301835 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
19311836 adev->gfx.config.mem_max_burst_length_bytes = 256;
....@@ -1990,6 +1895,7 @@
19901895 int r;
19911896 unsigned irq_type;
19921897 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1898
+ unsigned int hw_prio;
19931899
19941900 ring = &adev->gfx.compute_ring[ring_id];
19951901
....@@ -2000,7 +1906,7 @@
20001906
20011907 ring->ring_obj = NULL;
20021908 ring->use_doorbell = true;
2003
- ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1909
+ ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
20041910 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
20051911 + (ring_id * GFX8_MEC_HPD_SIZE);
20061912 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
....@@ -2009,9 +1915,12 @@
20091915 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
20101916 + ring->pipe;
20111917
1918
+ hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe,
1919
+ ring->queue) ?
1920
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT;
20121921 /* type-2 packets are deprecated on MEC, use type-3 instead */
20131922 r = amdgpu_ring_init(adev, ring, 1024,
2014
- &adev->gfx.eop_irq, irq_type);
1923
+ &adev->gfx.eop_irq, irq_type, hw_prio);
20151924 if (r)
20161925 return r;
20171926
....@@ -2048,36 +1957,31 @@
20481957 adev->gfx.mec.num_pipe_per_mec = 4;
20491958 adev->gfx.mec.num_queue_per_pipe = 8;
20501959
2051
- /* KIQ event */
2052
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_INT_IB2, &adev->gfx.kiq.irq);
2053
- if (r)
2054
- return r;
2055
-
20561960 /* EOP Event */
2057
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1961
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
20581962 if (r)
20591963 return r;
20601964
20611965 /* Privileged reg */
2062
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1966
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
20631967 &adev->gfx.priv_reg_irq);
20641968 if (r)
20651969 return r;
20661970
20671971 /* Privileged inst */
2068
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1972
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
20691973 &adev->gfx.priv_inst_irq);
20701974 if (r)
20711975 return r;
20721976
20731977 /* Add CP EDC/ECC irq */
2074
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1978
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
20751979 &adev->gfx.cp_ecc_error_irq);
20761980 if (r)
20771981 return r;
20781982
20791983 /* SQ interrupts. */
2080
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1984
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
20811985 &adev->gfx.sq_irq);
20821986 if (r) {
20831987 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
....@@ -2096,7 +2000,7 @@
20962000 return r;
20972001 }
20982002
2099
- r = gfx_v8_0_rlc_init(adev);
2003
+ r = adev->gfx.rlc.funcs->init(adev);
21002004 if (r) {
21012005 DRM_ERROR("Failed to init rlc BOs!\n");
21022006 return r;
....@@ -2116,11 +2020,12 @@
21162020 /* no gfx doorbells on iceland */
21172021 if (adev->asic_type != CHIP_TOPAZ) {
21182022 ring->use_doorbell = true;
2119
- ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2023
+ ring->doorbell_index = adev->doorbell_index.gfx_ring0;
21202024 }
21212025
21222026 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2123
- AMDGPU_CP_IRQ_GFX_EOP);
2027
+ AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2028
+ AMDGPU_RING_PRIO_DEFAULT);
21242029 if (r)
21252030 return r;
21262031 }
....@@ -2157,26 +2062,7 @@
21572062 return r;
21582063
21592064 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2160
- r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2161
- if (r)
2162
- return r;
2163
-
2164
- /* reserve GDS, GWS and OA resource for gfx */
2165
- r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2166
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2167
- &adev->gds.gds_gfx_bo, NULL, NULL);
2168
- if (r)
2169
- return r;
2170
-
2171
- r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2172
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2173
- &adev->gds.gws_gfx_bo, NULL, NULL);
2174
- if (r)
2175
- return r;
2176
-
2177
- r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2178
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2179
- &adev->gds.oa_gfx_bo, NULL, NULL);
2065
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
21802066 if (r)
21812067 return r;
21822068
....@@ -2191,24 +2077,20 @@
21912077
21922078 static int gfx_v8_0_sw_fini(void *handle)
21932079 {
2194
- int i;
21952080 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2196
-
2197
- amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2198
- amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2199
- amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2081
+ int i;
22002082
22012083 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
22022084 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
22032085 for (i = 0; i < adev->gfx.num_compute_rings; i++)
22042086 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
22052087
2206
- amdgpu_gfx_compute_mqd_sw_fini(adev);
2207
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2088
+ amdgpu_gfx_mqd_sw_fini(adev);
2089
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
22082090 amdgpu_gfx_kiq_fini(adev);
22092091
22102092 gfx_v8_0_mec_fini(adev);
2211
- gfx_v8_0_rlc_fini(adev);
2093
+ amdgpu_gfx_rlc_fini(adev);
22122094 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
22132095 &adev->gfx.rlc.clear_state_gpu_addr,
22142096 (void **)&adev->gfx.rlc.cs_ptr);
....@@ -3370,6 +3252,7 @@
33703252 dev_warn(adev->dev,
33713253 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
33723254 adev->asic_type);
3255
+ fallthrough;
33733256
33743257 case CHIP_CARRIZO:
33753258 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
....@@ -3569,9 +3452,9 @@
35693452 }
35703453
35713454 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3572
- u32 me, u32 pipe, u32 q)
3455
+ u32 me, u32 pipe, u32 q, u32 vm)
35733456 {
3574
- vi_srbm_select(adev, me, pipe, q, 0);
3457
+ vi_srbm_select(adev, me, pipe, q, vm);
35753458 }
35763459
35773460 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
....@@ -3805,8 +3688,6 @@
38053688 *
38063689 */
38073690 #define DEFAULT_SH_MEM_BASES (0x6000)
3808
-#define FIRST_COMPUTE_VMID (8)
3809
-#define LAST_COMPUTE_VMID (16)
38103691 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
38113692 {
38123693 int i;
....@@ -3829,7 +3710,7 @@
38293710 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
38303711
38313712 mutex_lock(&adev->srbm_mutex);
3832
- for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3713
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
38333714 vi_srbm_select(adev, 0, 0, 0, i);
38343715 /* CP and shaders */
38353716 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
....@@ -3839,6 +3720,33 @@
38393720 }
38403721 vi_srbm_select(adev, 0, 0, 0, 0);
38413722 mutex_unlock(&adev->srbm_mutex);
3723
+
3724
+ /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3725
+ acccess. These should be enabled by FW for target VMIDs. */
3726
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3727
+ WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3728
+ WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3729
+ WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3730
+ WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3731
+ }
3732
+}
3733
+
3734
+static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3735
+{
3736
+ int vmid;
3737
+
3738
+ /*
3739
+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3740
+ * access. Compute VMIDs should be enabled by FW for target VMIDs,
3741
+ * the driver can enable them for graphics. VMID0 should maintain
3742
+ * access so that HWS firmware can save/restore entries.
3743
+ */
3744
+ for (vmid = 1; vmid < 16; vmid++) {
3745
+ WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3746
+ WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3747
+ WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3748
+ WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3749
+ }
38423750 }
38433751
38443752 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
....@@ -3854,7 +3762,7 @@
38543762 }
38553763 }
38563764
3857
-static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3765
+static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
38583766 {
38593767 u32 tmp, sh_static_mem_cfg;
38603768 int i;
....@@ -3907,6 +3815,7 @@
39073815 mutex_unlock(&adev->srbm_mutex);
39083816
39093817 gfx_v8_0_init_compute_vmid(adev);
3818
+ gfx_v8_0_init_gds_vmid(adev);
39103819
39113820 mutex_lock(&adev->grbm_idx_mutex);
39123821 /*
....@@ -3989,6 +3898,7 @@
39893898
39903899 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
39913900 {
3901
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
39923902 /* csib */
39933903 WREG32(mmRLC_CSIB_ADDR_HI,
39943904 adev->gfx.rlc.clear_state_gpu_addr >> 32);
....@@ -4058,11 +3968,10 @@
40583968
40593969 int list_size;
40603970 unsigned int *register_list_format =
4061
- kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3971
+ kmemdup(adev->gfx.rlc.register_list_format,
3972
+ adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
40623973 if (!register_list_format)
40633974 return -ENOMEM;
4064
- memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4065
- adev->gfx.rlc.reg_list_format_size_bytes);
40663975
40673976 gfx_v8_0_parse_ind_reg_list(register_list_format,
40683977 RLC_FormatDirectRegListLength,
....@@ -4200,73 +4109,23 @@
42004109 udelay(50);
42014110 }
42024111
4203
-static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4204
-{
4205
- const struct rlc_firmware_header_v2_0 *hdr;
4206
- const __le32 *fw_data;
4207
- unsigned i, fw_size;
4208
-
4209
- if (!adev->gfx.rlc_fw)
4210
- return -EINVAL;
4211
-
4212
- hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4213
- amdgpu_ucode_print_rlc_hdr(&hdr->header);
4214
-
4215
- fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4216
- le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4217
- fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4218
-
4219
- WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4220
- for (i = 0; i < fw_size; i++)
4221
- WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4222
- WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4223
-
4224
- return 0;
4225
-}
4226
-
42274112 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
42284113 {
4229
- int r;
4230
- u32 tmp;
4231
-
4232
- gfx_v8_0_rlc_stop(adev);
4233
-
4234
- /* disable CG */
4235
- tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4236
- tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4237
- RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4238
- WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4239
- if (adev->asic_type == CHIP_POLARIS11 ||
4240
- adev->asic_type == CHIP_POLARIS10 ||
4241
- adev->asic_type == CHIP_POLARIS12 ||
4242
- adev->asic_type == CHIP_VEGAM) {
4243
- tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4244
- tmp &= ~0x3;
4245
- WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4114
+ if (amdgpu_sriov_vf(adev)) {
4115
+ gfx_v8_0_init_csb(adev);
4116
+ return 0;
42464117 }
42474118
4248
- /* disable PG */
4249
- WREG32(mmRLC_PG_CNTL, 0);
4250
-
4251
- gfx_v8_0_rlc_reset(adev);
4119
+ adev->gfx.rlc.funcs->stop(adev);
4120
+ adev->gfx.rlc.funcs->reset(adev);
42524121 gfx_v8_0_init_pg(adev);
4253
-
4254
-
4255
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4256
- /* legacy rlc firmware loading */
4257
- r = gfx_v8_0_rlc_load_microcode(adev);
4258
- if (r)
4259
- return r;
4260
- }
4261
-
4262
- gfx_v8_0_rlc_start(adev);
4122
+ adev->gfx.rlc.funcs->start(adev);
42634123
42644124 return 0;
42654125 }
42664126
42674127 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
42684128 {
4269
- int i;
42704129 u32 tmp = RREG32(mmCP_ME_CNTL);
42714130
42724131 if (enable) {
....@@ -4277,68 +4136,9 @@
42774136 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
42784137 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
42794138 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4280
- for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4281
- adev->gfx.gfx_ring[i].ready = false;
42824139 }
42834140 WREG32(mmCP_ME_CNTL, tmp);
42844141 udelay(50);
4285
-}
4286
-
4287
-static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4288
-{
4289
- const struct gfx_firmware_header_v1_0 *pfp_hdr;
4290
- const struct gfx_firmware_header_v1_0 *ce_hdr;
4291
- const struct gfx_firmware_header_v1_0 *me_hdr;
4292
- const __le32 *fw_data;
4293
- unsigned i, fw_size;
4294
-
4295
- if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4296
- return -EINVAL;
4297
-
4298
- pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4299
- adev->gfx.pfp_fw->data;
4300
- ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4301
- adev->gfx.ce_fw->data;
4302
- me_hdr = (const struct gfx_firmware_header_v1_0 *)
4303
- adev->gfx.me_fw->data;
4304
-
4305
- amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4306
- amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4307
- amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4308
-
4309
- gfx_v8_0_cp_gfx_enable(adev, false);
4310
-
4311
- /* PFP */
4312
- fw_data = (const __le32 *)
4313
- (adev->gfx.pfp_fw->data +
4314
- le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4315
- fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4316
- WREG32(mmCP_PFP_UCODE_ADDR, 0);
4317
- for (i = 0; i < fw_size; i++)
4318
- WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4319
- WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4320
-
4321
- /* CE */
4322
- fw_data = (const __le32 *)
4323
- (adev->gfx.ce_fw->data +
4324
- le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4325
- fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4326
- WREG32(mmCP_CE_UCODE_ADDR, 0);
4327
- for (i = 0; i < fw_size; i++)
4328
- WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4329
- WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4330
-
4331
- /* ME */
4332
- fw_data = (const __le32 *)
4333
- (adev->gfx.me_fw->data +
4334
- le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4335
- fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4336
- WREG32(mmCP_ME_RAM_WADDR, 0);
4337
- for (i = 0; i < fw_size; i++)
4338
- WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4339
- WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4340
-
4341
- return 0;
43424142 }
43434143
43444144 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
....@@ -4460,7 +4260,7 @@
44604260
44614261 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
44624262 DOORBELL_RANGE_LOWER,
4463
- AMDGPU_DOORBELL_GFX_RING0);
4263
+ adev->doorbell_index.gfx_ring0);
44644264 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
44654265
44664266 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
....@@ -4473,7 +4273,6 @@
44734273 u32 tmp;
44744274 u32 rb_bufsz;
44754275 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4476
- int r;
44774276
44784277 /* Set the write pointer delay */
44794278 WREG32(mmCP_RB_WPTR_DELAY, 0);
....@@ -4517,73 +4316,20 @@
45174316 /* start the ring */
45184317 amdgpu_ring_clear_ring(ring);
45194318 gfx_v8_0_cp_gfx_start(adev);
4520
- ring->ready = true;
4521
- r = amdgpu_ring_test_ring(ring);
4522
- if (r)
4523
- ring->ready = false;
4319
+ ring->sched.ready = true;
45244320
4525
- return r;
4321
+ return 0;
45264322 }
45274323
45284324 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
45294325 {
4530
- int i;
4531
-
45324326 if (enable) {
45334327 WREG32(mmCP_MEC_CNTL, 0);
45344328 } else {
45354329 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4536
- for (i = 0; i < adev->gfx.num_compute_rings; i++)
4537
- adev->gfx.compute_ring[i].ready = false;
4538
- adev->gfx.kiq.ring.ready = false;
4330
+ adev->gfx.kiq.ring.sched.ready = false;
45394331 }
45404332 udelay(50);
4541
-}
4542
-
4543
-static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4544
-{
4545
- const struct gfx_firmware_header_v1_0 *mec_hdr;
4546
- const __le32 *fw_data;
4547
- unsigned i, fw_size;
4548
-
4549
- if (!adev->gfx.mec_fw)
4550
- return -EINVAL;
4551
-
4552
- gfx_v8_0_cp_compute_enable(adev, false);
4553
-
4554
- mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4555
- amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4556
-
4557
- fw_data = (const __le32 *)
4558
- (adev->gfx.mec_fw->data +
4559
- le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4560
- fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4561
-
4562
- /* MEC1 */
4563
- WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4564
- for (i = 0; i < fw_size; i++)
4565
- WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4566
- WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4567
-
4568
- /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4569
- if (adev->gfx.mec2_fw) {
4570
- const struct gfx_firmware_header_v1_0 *mec2_hdr;
4571
-
4572
- mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4573
- amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4574
-
4575
- fw_data = (const __le32 *)
4576
- (adev->gfx.mec2_fw->data +
4577
- le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4578
- fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4579
-
4580
- WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4581
- for (i = 0; i < fw_size; i++)
4582
- WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4583
- WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4584
- }
4585
-
4586
- return 0;
45874333 }
45884334
45894335 /* KIQ functions */
....@@ -4604,7 +4350,6 @@
46044350 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
46054351 {
46064352 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4607
- uint32_t scratch, tmp = 0;
46084353 uint64_t queue_mask = 0;
46094354 int r, i;
46104355
....@@ -4623,17 +4368,9 @@
46234368 queue_mask |= (1ull << i);
46244369 }
46254370
4626
- r = amdgpu_gfx_scratch_get(adev, &scratch);
4627
- if (r) {
4628
- DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4629
- return r;
4630
- }
4631
- WREG32(scratch, 0xCAFEDEAD);
4632
-
4633
- r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4371
+ r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
46344372 if (r) {
46354373 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4636
- amdgpu_gfx_scratch_free(adev, scratch);
46374374 return r;
46384375 }
46394376 /* set resources */
....@@ -4665,26 +4402,10 @@
46654402 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
46664403 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
46674404 }
4668
- /* write to scratch for completion */
4669
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4670
- amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4671
- amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4405
+
46724406 amdgpu_ring_commit(kiq_ring);
46734407
4674
- for (i = 0; i < adev->usec_timeout; i++) {
4675
- tmp = RREG32(scratch);
4676
- if (tmp == 0xDEADBEEF)
4677
- break;
4678
- DRM_UDELAY(1);
4679
- }
4680
- if (i >= adev->usec_timeout) {
4681
- DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4682
- scratch, tmp);
4683
- r = -EINVAL;
4684
- }
4685
- amdgpu_gfx_scratch_free(adev, scratch);
4686
-
4687
- return r;
4408
+ return 0;
46884409 }
46894410
46904411 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
....@@ -4706,6 +4427,20 @@
47064427 WREG32(mmCP_HQD_PQ_WPTR, 0);
47074428
47084429 return r;
4430
+}
4431
+
4432
+static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4433
+{
4434
+ struct amdgpu_device *adev = ring->adev;
4435
+
4436
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4437
+ if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe,
4438
+ ring->queue)) {
4439
+ mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4440
+ mqd->cp_hqd_queue_priority =
4441
+ AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4442
+ }
4443
+ }
47094444 }
47104445
47114446 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
....@@ -4831,9 +4566,6 @@
48314566 /* defaults */
48324567 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
48334568 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4834
- mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4835
- mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4836
- mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
48374569 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
48384570 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
48394571 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
....@@ -4845,13 +4577,20 @@
48454577 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
48464578 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
48474579
4848
- /* activate the queue */
4849
- mqd->cp_hqd_active = 1;
4580
+ /* set static priority for a queue/ring */
4581
+ gfx_v8_0_mqd_set_priority(ring, mqd);
4582
+ mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4583
+
4584
+ /* map_queues packet doesn't need activate the queue,
4585
+ * so only kiq need set this field.
4586
+ */
4587
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4588
+ mqd->cp_hqd_active = 1;
48504589
48514590 return 0;
48524591 }
48534592
4854
-int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4593
+static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
48554594 struct vi_mqd *mqd)
48564595 {
48574596 uint32_t mqd_reg;
....@@ -4896,7 +4635,7 @@
48964635
48974636 gfx_v8_0_kiq_setting(ring);
48984637
4899
- if (adev->in_gpu_reset) { /* for GPU_RESET case */
4638
+ if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
49004639 /* reset MQD to a clean status */
49014640 if (adev->gfx.mec.mqd_backup[mqd_idx])
49024641 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
....@@ -4933,7 +4672,7 @@
49334672 struct vi_mqd *mqd = ring->mqd_ptr;
49344673 int mqd_idx = ring - &adev->gfx.compute_ring[0];
49354674
4936
- if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4675
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
49374676 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
49384677 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
49394678 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
....@@ -4945,7 +4684,7 @@
49454684
49464685 if (adev->gfx.mec.mqd_backup[mqd_idx])
49474686 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4948
- } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4687
+ } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
49494688 /* reset MQD to a clean status */
49504689 if (adev->gfx.mec.mqd_backup[mqd_idx])
49514690 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
....@@ -4961,8 +4700,8 @@
49614700 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
49624701 {
49634702 if (adev->asic_type > CHIP_TONGA) {
4964
- WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4965
- WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4703
+ WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4704
+ WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
49664705 }
49674706 /* enable doorbells */
49684707 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
....@@ -4970,26 +4709,33 @@
49704709
49714710 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
49724711 {
4973
- struct amdgpu_ring *ring = NULL;
4974
- int r = 0, i;
4975
-
4976
- gfx_v8_0_cp_compute_enable(adev, true);
4712
+ struct amdgpu_ring *ring;
4713
+ int r;
49774714
49784715 ring = &adev->gfx.kiq.ring;
49794716
49804717 r = amdgpu_bo_reserve(ring->mqd_obj, false);
49814718 if (unlikely(r != 0))
4982
- goto done;
4719
+ return r;
49834720
49844721 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4985
- if (!r) {
4986
- r = gfx_v8_0_kiq_init_queue(ring);
4987
- amdgpu_bo_kunmap(ring->mqd_obj);
4988
- ring->mqd_ptr = NULL;
4989
- }
4722
+ if (unlikely(r != 0))
4723
+ return r;
4724
+
4725
+ gfx_v8_0_kiq_init_queue(ring);
4726
+ amdgpu_bo_kunmap(ring->mqd_obj);
4727
+ ring->mqd_ptr = NULL;
49904728 amdgpu_bo_unreserve(ring->mqd_obj);
4991
- if (r)
4992
- goto done;
4729
+ ring->sched.ready = true;
4730
+ return 0;
4731
+}
4732
+
4733
+static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4734
+{
4735
+ struct amdgpu_ring *ring = NULL;
4736
+ int r = 0, i;
4737
+
4738
+ gfx_v8_0_cp_compute_enable(adev, true);
49934739
49944740 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
49954741 ring = &adev->gfx.compute_ring[i];
....@@ -5014,26 +4760,32 @@
50144760 if (r)
50154761 goto done;
50164762
5017
- /* Test KIQ */
5018
- ring = &adev->gfx.kiq.ring;
5019
- ring->ready = true;
5020
- r = amdgpu_ring_test_ring(ring);
5021
- if (r) {
5022
- ring->ready = false;
5023
- goto done;
5024
- }
5025
-
5026
- /* Test KCQs */
5027
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5028
- ring = &adev->gfx.compute_ring[i];
5029
- ring->ready = true;
5030
- r = amdgpu_ring_test_ring(ring);
5031
- if (r)
5032
- ring->ready = false;
5033
- }
5034
-
50354763 done:
50364764 return r;
4765
+}
4766
+
4767
+static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4768
+{
4769
+ int r, i;
4770
+ struct amdgpu_ring *ring;
4771
+
4772
+ /* collect all the ring_tests here, gfx, kiq, compute */
4773
+ ring = &adev->gfx.gfx_ring[0];
4774
+ r = amdgpu_ring_test_helper(ring);
4775
+ if (r)
4776
+ return r;
4777
+
4778
+ ring = &adev->gfx.kiq.ring;
4779
+ r = amdgpu_ring_test_helper(ring);
4780
+ if (r)
4781
+ return r;
4782
+
4783
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4784
+ ring = &adev->gfx.compute_ring[i];
4785
+ amdgpu_ring_test_helper(ring);
4786
+ }
4787
+
4788
+ return 0;
50374789 }
50384790
50394791 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
....@@ -5043,22 +4795,19 @@
50434795 if (!(adev->flags & AMD_IS_APU))
50444796 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
50454797
5046
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
5047
- /* legacy firmware loading */
5048
- r = gfx_v8_0_cp_gfx_load_microcode(adev);
5049
- if (r)
5050
- return r;
5051
-
5052
- r = gfx_v8_0_cp_compute_load_microcode(adev);
5053
- if (r)
5054
- return r;
5055
- }
4798
+ r = gfx_v8_0_kiq_resume(adev);
4799
+ if (r)
4800
+ return r;
50564801
50574802 r = gfx_v8_0_cp_gfx_resume(adev);
50584803 if (r)
50594804 return r;
50604805
5061
- r = gfx_v8_0_kiq_resume(adev);
4806
+ r = gfx_v8_0_kcq_resume(adev);
4807
+ if (r)
4808
+ return r;
4809
+
4810
+ r = gfx_v8_0_cp_test_all_rings(adev);
50624811 if (r)
50634812 return r;
50644813
....@@ -5079,9 +4828,9 @@
50794828 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
50804829
50814830 gfx_v8_0_init_golden_registers(adev);
5082
- gfx_v8_0_gpu_init(adev);
4831
+ gfx_v8_0_constants_init(adev);
50834832
5084
- r = gfx_v8_0_rlc_resume(adev);
4833
+ r = adev->gfx.rlc.funcs->resume(adev);
50854834 if (r)
50864835 return r;
50874836
....@@ -5090,101 +4839,33 @@
50904839 return r;
50914840 }
50924841
5093
-static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
4842
+static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
50944843 {
5095
- struct amdgpu_device *adev = kiq_ring->adev;
5096
- uint32_t scratch, tmp = 0;
50974844 int r, i;
4845
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
50984846
5099
- r = amdgpu_gfx_scratch_get(adev, &scratch);
5100
- if (r) {
5101
- DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5102
- return r;
5103
- }
5104
- WREG32(scratch, 0xCAFEDEAD);
5105
-
5106
- r = amdgpu_ring_alloc(kiq_ring, 10);
5107
- if (r) {
4847
+ r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4848
+ if (r)
51084849 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5109
- amdgpu_gfx_scratch_free(adev, scratch);
5110
- return r;
5111
- }
51124850
5113
- /* unmap queues */
5114
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5115
- amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4851
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4852
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4853
+
4854
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4855
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
51164856 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
51174857 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
51184858 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
51194859 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5120
- amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5121
- amdgpu_ring_write(kiq_ring, 0);
5122
- amdgpu_ring_write(kiq_ring, 0);
5123
- amdgpu_ring_write(kiq_ring, 0);
5124
- /* write to scratch for completion */
5125
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5126
- amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5127
- amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5128
- amdgpu_ring_commit(kiq_ring);
5129
-
5130
- for (i = 0; i < adev->usec_timeout; i++) {
5131
- tmp = RREG32(scratch);
5132
- if (tmp == 0xDEADBEEF)
5133
- break;
5134
- DRM_UDELAY(1);
4860
+ amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4861
+ amdgpu_ring_write(kiq_ring, 0);
4862
+ amdgpu_ring_write(kiq_ring, 0);
4863
+ amdgpu_ring_write(kiq_ring, 0);
51354864 }
5136
- if (i >= adev->usec_timeout) {
5137
- DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5138
- r = -EINVAL;
5139
- }
5140
- amdgpu_gfx_scratch_free(adev, scratch);
5141
- return r;
5142
-}
4865
+ r = amdgpu_ring_test_helper(kiq_ring);
4866
+ if (r)
4867
+ DRM_ERROR("KCQ disable failed\n");
51434868
5144
-static int gfx_v8_0_hw_fini(void *handle)
5145
-{
5146
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5147
- int i;
5148
-
5149
- amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5150
- amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5151
-
5152
- amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
5153
-
5154
- amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
5155
-
5156
- /* disable KCQ to avoid CPC touch memory not valid anymore */
5157
- for (i = 0; i < adev->gfx.num_compute_rings; i++)
5158
- gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5159
-
5160
- if (amdgpu_sriov_vf(adev)) {
5161
- pr_debug("For SRIOV client, shouldn't do anything.\n");
5162
- return 0;
5163
- }
5164
- gfx_v8_0_cp_enable(adev, false);
5165
- gfx_v8_0_rlc_stop(adev);
5166
-
5167
- amdgpu_device_ip_set_powergating_state(adev,
5168
- AMD_IP_BLOCK_TYPE_GFX,
5169
- AMD_PG_STATE_UNGATE);
5170
-
5171
- return 0;
5172
-}
5173
-
5174
-static int gfx_v8_0_suspend(void *handle)
5175
-{
5176
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5177
- adev->gfx.in_suspend = true;
5178
- return gfx_v8_0_hw_fini(adev);
5179
-}
5180
-
5181
-static int gfx_v8_0_resume(void *handle)
5182
-{
5183
- int r;
5184
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5185
-
5186
- r = gfx_v8_0_hw_init(adev);
5187
- adev->gfx.in_suspend = false;
51884869 return r;
51894870 }
51904871
....@@ -5192,15 +4873,40 @@
51924873 {
51934874 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
51944875
5195
- if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4876
+ if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4877
+ || RREG32(mmGRBM_STATUS2) != 0x8)
51964878 return false;
51974879 else
51984880 return true;
51994881 }
52004882
4883
+static bool gfx_v8_0_rlc_is_idle(void *handle)
4884
+{
4885
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4886
+
4887
+ if (RREG32(mmGRBM_STATUS2) != 0x8)
4888
+ return false;
4889
+ else
4890
+ return true;
4891
+}
4892
+
4893
+static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4894
+{
4895
+ unsigned int i;
4896
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4897
+
4898
+ for (i = 0; i < adev->usec_timeout; i++) {
4899
+ if (gfx_v8_0_rlc_is_idle(handle))
4900
+ return 0;
4901
+
4902
+ udelay(1);
4903
+ }
4904
+ return -ETIMEDOUT;
4905
+}
4906
+
52014907 static int gfx_v8_0_wait_for_idle(void *handle)
52024908 {
5203
- unsigned i;
4909
+ unsigned int i;
52044910 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
52054911
52064912 for (i = 0; i < adev->usec_timeout; i++) {
....@@ -5210,6 +4916,48 @@
52104916 udelay(1);
52114917 }
52124918 return -ETIMEDOUT;
4919
+}
4920
+
4921
+static int gfx_v8_0_hw_fini(void *handle)
4922
+{
4923
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4924
+
4925
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4926
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4927
+
4928
+ amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4929
+
4930
+ amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4931
+
4932
+ /* disable KCQ to avoid CPC touch memory not valid anymore */
4933
+ gfx_v8_0_kcq_disable(adev);
4934
+
4935
+ if (amdgpu_sriov_vf(adev)) {
4936
+ pr_debug("For SRIOV client, shouldn't do anything.\n");
4937
+ return 0;
4938
+ }
4939
+ amdgpu_gfx_rlc_enter_safe_mode(adev);
4940
+ if (!gfx_v8_0_wait_for_idle(adev))
4941
+ gfx_v8_0_cp_enable(adev, false);
4942
+ else
4943
+ pr_err("cp is busy, skip halt cp\n");
4944
+ if (!gfx_v8_0_wait_for_rlc_idle(adev))
4945
+ adev->gfx.rlc.funcs->stop(adev);
4946
+ else
4947
+ pr_err("rlc is busy, skip halt rlc\n");
4948
+ amdgpu_gfx_rlc_exit_safe_mode(adev);
4949
+
4950
+ return 0;
4951
+}
4952
+
4953
+static int gfx_v8_0_suspend(void *handle)
4954
+{
4955
+ return gfx_v8_0_hw_fini(handle);
4956
+}
4957
+
4958
+static int gfx_v8_0_resume(void *handle)
4959
+{
4960
+ return gfx_v8_0_hw_init(handle);
52134961 }
52144962
52154963 static bool gfx_v8_0_check_soft_reset(void *handle)
....@@ -5277,17 +5025,16 @@
52775025 static int gfx_v8_0_pre_soft_reset(void *handle)
52785026 {
52795027 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5280
- u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5028
+ u32 grbm_soft_reset = 0;
52815029
52825030 if ((!adev->gfx.grbm_soft_reset) &&
52835031 (!adev->gfx.srbm_soft_reset))
52845032 return 0;
52855033
52865034 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5287
- srbm_soft_reset = adev->gfx.srbm_soft_reset;
52885035
52895036 /* stop the rlc */
5290
- gfx_v8_0_rlc_stop(adev);
5037
+ adev->gfx.rlc.funcs->stop(adev);
52915038
52925039 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
52935040 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
....@@ -5381,18 +5128,13 @@
53815128 static int gfx_v8_0_post_soft_reset(void *handle)
53825129 {
53835130 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5384
- u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5131
+ u32 grbm_soft_reset = 0;
53855132
53865133 if ((!adev->gfx.grbm_soft_reset) &&
53875134 (!adev->gfx.srbm_soft_reset))
53885135 return 0;
53895136
53905137 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5391
- srbm_soft_reset = adev->gfx.srbm_soft_reset;
5392
-
5393
- if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5394
- REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5395
- gfx_v8_0_cp_gfx_resume(adev);
53965138
53975139 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
53985140 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
....@@ -5410,8 +5152,16 @@
54105152 mutex_unlock(&adev->srbm_mutex);
54115153 }
54125154 gfx_v8_0_kiq_resume(adev);
5155
+ gfx_v8_0_kcq_resume(adev);
54135156 }
5414
- gfx_v8_0_rlc_start(adev);
5157
+
5158
+ if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5159
+ REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5160
+ gfx_v8_0_cp_gfx_resume(adev);
5161
+
5162
+ gfx_v8_0_cp_test_all_rings(adev);
5163
+
5164
+ adev->gfx.rlc.funcs->start(adev);
54155165
54165166 return 0;
54175167 }
....@@ -5442,15 +5192,6 @@
54425192 uint32_t gws_base, uint32_t gws_size,
54435193 uint32_t oa_base, uint32_t oa_size)
54445194 {
5445
- gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5446
- gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5447
-
5448
- gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5449
- gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5450
-
5451
- oa_base = oa_base >> AMDGPU_OA_SHIFT;
5452
- oa_size = oa_size >> AMDGPU_OA_SHIFT;
5453
-
54545195 /* GDS Base */
54555196 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
54565197 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
....@@ -5556,7 +5297,7 @@
55565297 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
55575298
55585299 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5559
- adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5300
+ adev->gfx.num_compute_rings = amdgpu_num_kcq;
55605301 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
55615302 gfx_v8_0_set_ring_funcs(adev);
55625303 gfx_v8_0_set_irq_funcs(adev);
....@@ -5604,10 +5345,9 @@
56045345 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
56055346 bool enable)
56065347 {
5607
- if (((adev->asic_type == CHIP_POLARIS11) ||
5348
+ if ((adev->asic_type == CHIP_POLARIS11) ||
56085349 (adev->asic_type == CHIP_POLARIS12) ||
5609
- (adev->asic_type == CHIP_VEGAM)) &&
5610
- adev->powerplay.pp_funcs->set_powergating_by_smu)
5350
+ (adev->asic_type == CHIP_VEGAM))
56115351 /* Send msg to SMU via Powerplay */
56125352 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
56135353
....@@ -5668,7 +5408,7 @@
56685408 AMD_PG_SUPPORT_RLC_SMU_HS |
56695409 AMD_PG_SUPPORT_CP |
56705410 AMD_PG_SUPPORT_GFX_DMG))
5671
- adev->gfx.rlc.funcs->enter_safe_mode(adev);
5411
+ amdgpu_gfx_rlc_enter_safe_mode(adev);
56725412 switch (adev->asic_type) {
56735413 case CHIP_CARRIZO:
56745414 case CHIP_STONEY:
....@@ -5722,7 +5462,7 @@
57225462 AMD_PG_SUPPORT_RLC_SMU_HS |
57235463 AMD_PG_SUPPORT_CP |
57245464 AMD_PG_SUPPORT_GFX_DMG))
5725
- adev->gfx.rlc.funcs->exit_safe_mode(adev);
5465
+ amdgpu_gfx_rlc_exit_safe_mode(adev);
57265466 return 0;
57275467 }
57285468
....@@ -5816,57 +5556,53 @@
58165556 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
58175557 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
58185558
5819
-static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5559
+static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
58205560 {
5821
- u32 data;
5561
+ uint32_t rlc_setting;
5562
+
5563
+ rlc_setting = RREG32(mmRLC_CNTL);
5564
+ if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5565
+ return false;
5566
+
5567
+ return true;
5568
+}
5569
+
5570
+static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5571
+{
5572
+ uint32_t data;
58225573 unsigned i;
5823
-
58245574 data = RREG32(mmRLC_CNTL);
5825
- if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5826
- return;
5575
+ data |= RLC_SAFE_MODE__CMD_MASK;
5576
+ data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5577
+ data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5578
+ WREG32(mmRLC_SAFE_MODE, data);
58275579
5828
- if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5829
- data |= RLC_SAFE_MODE__CMD_MASK;
5830
- data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5831
- data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5832
- WREG32(mmRLC_SAFE_MODE, data);
5833
-
5834
- for (i = 0; i < adev->usec_timeout; i++) {
5835
- if ((RREG32(mmRLC_GPM_STAT) &
5836
- (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5837
- RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5838
- (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5839
- RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5840
- break;
5841
- udelay(1);
5842
- }
5843
-
5844
- for (i = 0; i < adev->usec_timeout; i++) {
5845
- if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5846
- break;
5847
- udelay(1);
5848
- }
5849
- adev->gfx.rlc.in_safe_mode = true;
5580
+ /* wait for RLC_SAFE_MODE */
5581
+ for (i = 0; i < adev->usec_timeout; i++) {
5582
+ if ((RREG32(mmRLC_GPM_STAT) &
5583
+ (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5584
+ RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5585
+ (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5586
+ RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5587
+ break;
5588
+ udelay(1);
5589
+ }
5590
+ for (i = 0; i < adev->usec_timeout; i++) {
5591
+ if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5592
+ break;
5593
+ udelay(1);
58505594 }
58515595 }
58525596
5853
-static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5597
+static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
58545598 {
5855
- u32 data = 0;
5599
+ uint32_t data;
58565600 unsigned i;
58575601
58585602 data = RREG32(mmRLC_CNTL);
5859
- if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5860
- return;
5861
-
5862
- if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5863
- if (adev->gfx.rlc.in_safe_mode) {
5864
- data |= RLC_SAFE_MODE__CMD_MASK;
5865
- data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5866
- WREG32(mmRLC_SAFE_MODE, data);
5867
- adev->gfx.rlc.in_safe_mode = false;
5868
- }
5869
- }
5603
+ data |= RLC_SAFE_MODE__CMD_MASK;
5604
+ data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5605
+ WREG32(mmRLC_SAFE_MODE, data);
58705606
58715607 for (i = 0; i < adev->usec_timeout; i++) {
58725608 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
....@@ -5875,9 +5611,37 @@
58755611 }
58765612 }
58775613
5614
+static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5615
+{
5616
+ u32 data;
5617
+
5618
+ if (amdgpu_sriov_is_pp_one_vf(adev))
5619
+ data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5620
+ else
5621
+ data = RREG32(mmRLC_SPM_VMID);
5622
+
5623
+ data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5624
+ data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5625
+
5626
+ if (amdgpu_sriov_is_pp_one_vf(adev))
5627
+ WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5628
+ else
5629
+ WREG32(mmRLC_SPM_VMID, data);
5630
+}
5631
+
58785632 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5879
- .enter_safe_mode = iceland_enter_rlc_safe_mode,
5880
- .exit_safe_mode = iceland_exit_rlc_safe_mode
5633
+ .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5634
+ .set_safe_mode = gfx_v8_0_set_safe_mode,
5635
+ .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5636
+ .init = gfx_v8_0_rlc_init,
5637
+ .get_csb_size = gfx_v8_0_get_csb_size,
5638
+ .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5639
+ .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5640
+ .resume = gfx_v8_0_rlc_resume,
5641
+ .stop = gfx_v8_0_rlc_stop,
5642
+ .reset = gfx_v8_0_rlc_reset,
5643
+ .start = gfx_v8_0_rlc_start,
5644
+ .update_spm_vmid = gfx_v8_0_update_spm_vmid
58815645 };
58825646
58835647 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
....@@ -5885,7 +5649,7 @@
58855649 {
58865650 uint32_t temp, data;
58875651
5888
- adev->gfx.rlc.funcs->enter_safe_mode(adev);
5652
+ amdgpu_gfx_rlc_enter_safe_mode(adev);
58895653
58905654 /* It is disabled by HW by default */
58915655 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
....@@ -5981,7 +5745,7 @@
59815745 gfx_v8_0_wait_for_rlc_serdes(adev);
59825746 }
59835747
5984
- adev->gfx.rlc.funcs->exit_safe_mode(adev);
5748
+ amdgpu_gfx_rlc_exit_safe_mode(adev);
59855749 }
59865750
59875751 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
....@@ -5991,7 +5755,7 @@
59915755
59925756 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
59935757
5994
- adev->gfx.rlc.funcs->enter_safe_mode(adev);
5758
+ amdgpu_gfx_rlc_enter_safe_mode(adev);
59955759
59965760 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
59975761 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
....@@ -6074,7 +5838,7 @@
60745838
60755839 gfx_v8_0_wait_for_rlc_serdes(adev);
60765840
6077
- adev->gfx.rlc.funcs->exit_safe_mode(adev);
5841
+ amdgpu_gfx_rlc_exit_safe_mode(adev);
60785842 }
60795843 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
60805844 bool enable)
....@@ -6117,8 +5881,7 @@
61175881 PP_BLOCK_GFX_CG,
61185882 pp_support_state,
61195883 pp_state);
6120
- if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6121
- amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5884
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
61225885 }
61235886
61245887 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
....@@ -6139,8 +5902,7 @@
61395902 PP_BLOCK_GFX_MG,
61405903 pp_support_state,
61415904 pp_state);
6142
- if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6143
- amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5905
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
61445906 }
61455907
61465908 return 0;
....@@ -6169,8 +5931,7 @@
61695931 PP_BLOCK_GFX_CG,
61705932 pp_support_state,
61715933 pp_state);
6172
- if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6173
- amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5934
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
61745935 }
61755936
61765937 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
....@@ -6189,8 +5950,7 @@
61895950 PP_BLOCK_GFX_3D,
61905951 pp_support_state,
61915952 pp_state);
6192
- if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6193
- amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5953
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
61945954 }
61955955
61965956 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
....@@ -6211,8 +5971,7 @@
62115971 PP_BLOCK_GFX_MG,
62125972 pp_support_state,
62135973 pp_state);
6214
- if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6215
- amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5974
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
62165975 }
62175976
62185977 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
....@@ -6227,8 +5986,7 @@
62275986 PP_BLOCK_GFX_RLC,
62285987 pp_support_state,
62295988 pp_state);
6230
- if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6231
- amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5989
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
62325990 }
62335991
62345992 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
....@@ -6242,8 +6000,7 @@
62426000 PP_BLOCK_GFX_CP,
62436001 pp_support_state,
62446002 pp_state);
6245
- if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6246
- amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6003
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
62476004 }
62486005
62496006 return 0;
....@@ -6354,9 +6111,11 @@
63546111 }
63556112
63566113 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6357
- struct amdgpu_ib *ib,
6358
- unsigned vmid, bool ctx_switch)
6114
+ struct amdgpu_job *job,
6115
+ struct amdgpu_ib *ib,
6116
+ uint32_t flags)
63596117 {
6118
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
63606119 u32 header, control = 0;
63616120
63626121 if (ib->flags & AMDGPU_IB_FLAG_CE)
....@@ -6369,7 +6128,7 @@
63696128 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
63706129 control |= INDIRECT_BUFFER_PRE_ENB(1);
63716130
6372
- if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6131
+ if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
63736132 gfx_v8_0_ring_emit_de_meta(ring);
63746133 }
63756134
....@@ -6384,10 +6143,28 @@
63846143 }
63856144
63866145 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6146
+ struct amdgpu_job *job,
63876147 struct amdgpu_ib *ib,
6388
- unsigned vmid, bool ctx_switch)
6148
+ uint32_t flags)
63896149 {
6150
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
63906151 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6152
+
6153
+ /* Currently, there is a high possibility to get wave ID mismatch
6154
+ * between ME and GDS, leading to a hw deadlock, because ME generates
6155
+ * different wave IDs than the GDS expects. This situation happens
6156
+ * randomly when at least 5 compute pipes use GDS ordered append.
6157
+ * The wave IDs generated by ME are also wrong after suspend/resume.
6158
+ * Those are probably bugs somewhere else in the kernel driver.
6159
+ *
6160
+ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6161
+ * GDS to 0 for this ring (me/pipe).
6162
+ */
6163
+ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6164
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6165
+ amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6166
+ amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6167
+ }
63916168
63926169 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
63936170 amdgpu_ring_write(ring,
....@@ -6491,104 +6268,6 @@
64916268 /* XXX check if swapping is necessary on BE */
64926269 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
64936270 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6494
-}
6495
-
6496
-static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6497
- bool acquire)
6498
-{
6499
- struct amdgpu_device *adev = ring->adev;
6500
- int pipe_num, tmp, reg;
6501
- int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6502
-
6503
- pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6504
-
6505
- /* first me only has 2 entries, GFX and HP3D */
6506
- if (ring->me > 0)
6507
- pipe_num -= 2;
6508
-
6509
- reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6510
- tmp = RREG32(reg);
6511
- tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6512
- WREG32(reg, tmp);
6513
-}
6514
-
6515
-static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6516
- struct amdgpu_ring *ring,
6517
- bool acquire)
6518
-{
6519
- int i, pipe;
6520
- bool reserve;
6521
- struct amdgpu_ring *iring;
6522
-
6523
- mutex_lock(&adev->gfx.pipe_reserve_mutex);
6524
- pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6525
- if (acquire)
6526
- set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6527
- else
6528
- clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6529
-
6530
- if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6531
- /* Clear all reservations - everyone reacquires all resources */
6532
- for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6533
- gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6534
- true);
6535
-
6536
- for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6537
- gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6538
- true);
6539
- } else {
6540
- /* Lower all pipes without a current reservation */
6541
- for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6542
- iring = &adev->gfx.gfx_ring[i];
6543
- pipe = amdgpu_gfx_queue_to_bit(adev,
6544
- iring->me,
6545
- iring->pipe,
6546
- 0);
6547
- reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6548
- gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6549
- }
6550
-
6551
- for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6552
- iring = &adev->gfx.compute_ring[i];
6553
- pipe = amdgpu_gfx_queue_to_bit(adev,
6554
- iring->me,
6555
- iring->pipe,
6556
- 0);
6557
- reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6558
- gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6559
- }
6560
- }
6561
-
6562
- mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6563
-}
6564
-
6565
-static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6566
- struct amdgpu_ring *ring,
6567
- bool acquire)
6568
-{
6569
- uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6570
- uint32_t queue_priority = acquire ? 0xf : 0x0;
6571
-
6572
- mutex_lock(&adev->srbm_mutex);
6573
- vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6574
-
6575
- WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6576
- WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6577
-
6578
- vi_srbm_select(adev, 0, 0, 0, 0);
6579
- mutex_unlock(&adev->srbm_mutex);
6580
-}
6581
-static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6582
- enum drm_sched_priority priority)
6583
-{
6584
- struct amdgpu_device *adev = ring->adev;
6585
- bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6586
-
6587
- if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6588
- return;
6589
-
6590
- gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6591
- gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
65926271 }
65936272
65946273 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
....@@ -6703,7 +6382,8 @@
67036382 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
67046383 }
67056384
6706
-static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6385
+static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6386
+ uint32_t reg_val_offs)
67076387 {
67086388 struct amdgpu_device *adev = ring->adev;
67096389
....@@ -6714,9 +6394,9 @@
67146394 amdgpu_ring_write(ring, reg);
67156395 amdgpu_ring_write(ring, 0);
67166396 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6717
- adev->virt.reg_val_offs * 4));
6397
+ reg_val_offs * 4));
67186398 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6719
- adev->virt.reg_val_offs * 4));
6399
+ reg_val_offs * 4));
67206400 }
67216401
67226402 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
....@@ -6741,6 +6421,18 @@
67416421 amdgpu_ring_write(ring, reg);
67426422 amdgpu_ring_write(ring, 0);
67436423 amdgpu_ring_write(ring, val);
6424
+}
6425
+
6426
+static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6427
+{
6428
+ struct amdgpu_device *adev = ring->adev;
6429
+ uint32_t value = 0;
6430
+
6431
+ value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6432
+ value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6433
+ value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6434
+ value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6435
+ WREG32(mmSQ_CMD, value);
67446436 }
67456437
67466438 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
....@@ -6829,7 +6521,7 @@
68296521 enum amdgpu_interrupt_state state)
68306522 {
68316523 switch (type) {
6832
- case AMDGPU_CP_IRQ_GFX_EOP:
6524
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
68336525 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
68346526 break;
68356527 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
....@@ -6965,12 +6657,39 @@
69656657 return 0;
69666658 }
69676659
6660
+static void gfx_v8_0_fault(struct amdgpu_device *adev,
6661
+ struct amdgpu_iv_entry *entry)
6662
+{
6663
+ u8 me_id, pipe_id, queue_id;
6664
+ struct amdgpu_ring *ring;
6665
+ int i;
6666
+
6667
+ me_id = (entry->ring_id & 0x0c) >> 2;
6668
+ pipe_id = (entry->ring_id & 0x03) >> 0;
6669
+ queue_id = (entry->ring_id & 0x70) >> 4;
6670
+
6671
+ switch (me_id) {
6672
+ case 0:
6673
+ drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6674
+ break;
6675
+ case 1:
6676
+ case 2:
6677
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6678
+ ring = &adev->gfx.compute_ring[i];
6679
+ if (ring->me == me_id && ring->pipe == pipe_id &&
6680
+ ring->queue == queue_id)
6681
+ drm_sched_fault(&ring->sched);
6682
+ }
6683
+ break;
6684
+ }
6685
+}
6686
+
69686687 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
69696688 struct amdgpu_irq_src *source,
69706689 struct amdgpu_iv_entry *entry)
69716690 {
69726691 DRM_ERROR("Illegal register access in command stream\n");
6973
- schedule_work(&adev->reset_work);
6692
+ gfx_v8_0_fault(adev, entry);
69746693 return 0;
69756694 }
69766695
....@@ -6979,7 +6698,7 @@
69796698 struct amdgpu_iv_entry *entry)
69806699 {
69816700 DRM_ERROR("Illegal instruction in command stream\n");
6982
- schedule_work(&adev->reset_work);
6701
+ gfx_v8_0_fault(adev, entry);
69836702 return 0;
69846703 }
69856704
....@@ -7091,50 +6810,32 @@
70916810 return 0;
70926811 }
70936812
7094
-static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
7095
- struct amdgpu_irq_src *src,
7096
- unsigned int type,
7097
- enum amdgpu_interrupt_state state)
6813
+static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
70986814 {
7099
- struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7100
-
7101
- switch (type) {
7102
- case AMDGPU_CP_KIQ_IRQ_DRIVER0:
7103
- WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
7104
- state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7105
- if (ring->me == 1)
7106
- WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
7107
- ring->pipe,
7108
- GENERIC2_INT_ENABLE,
7109
- state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7110
- else
7111
- WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
7112
- ring->pipe,
7113
- GENERIC2_INT_ENABLE,
7114
- state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7115
- break;
7116
- default:
7117
- BUG(); /* kiq only support GENERIC2_INT now */
7118
- break;
7119
- }
7120
- return 0;
6815
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6816
+ amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6817
+ PACKET3_TC_ACTION_ENA |
6818
+ PACKET3_SH_KCACHE_ACTION_ENA |
6819
+ PACKET3_SH_ICACHE_ACTION_ENA |
6820
+ PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */
6821
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
6822
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6823
+ amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
71216824 }
71226825
7123
-static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7124
- struct amdgpu_irq_src *source,
7125
- struct amdgpu_iv_entry *entry)
6826
+static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
71266827 {
7127
- u8 me_id, pipe_id, queue_id;
7128
- struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7129
-
7130
- me_id = (entry->ring_id & 0x0c) >> 2;
7131
- pipe_id = (entry->ring_id & 0x03) >> 0;
7132
- queue_id = (entry->ring_id & 0x70) >> 4;
7133
- DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7134
- me_id, pipe_id, queue_id);
7135
-
7136
- amdgpu_fence_process(ring);
7137
- return 0;
6828
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6829
+ amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6830
+ PACKET3_TC_ACTION_ENA |
6831
+ PACKET3_SH_KCACHE_ACTION_ENA |
6832
+ PACKET3_SH_ICACHE_ACTION_ENA |
6833
+ PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */
6834
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
6835
+ amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */
6836
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6837
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
6838
+ amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
71386839 }
71396840
71406841 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
....@@ -7183,7 +6884,8 @@
71836884 3 + /* CNTX_CTRL */
71846885 5 + /* HDP_INVL */
71856886 12 + 12 + /* FENCE x2 */
7186
- 2, /* SWITCH_BUFFER */
6887
+ 2 + /* SWITCH_BUFFER */
6888
+ 5, /* SURFACE_SYNC */
71876889 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
71886890 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
71896891 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
....@@ -7200,6 +6902,8 @@
72006902 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
72016903 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
72026904 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6905
+ .soft_recovery = gfx_v8_0_ring_soft_recovery,
6906
+ .emit_mem_sync = gfx_v8_0_emit_mem_sync,
72036907 };
72046908
72056909 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
....@@ -7216,8 +6920,9 @@
72166920 5 + /* hdp_invalidate */
72176921 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
72186922 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7219
- 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7220
- .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6923
+ 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6924
+ 7, /* gfx_v8_0_emit_mem_sync_compute */
6925
+ .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
72216926 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
72226927 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
72236928 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
....@@ -7228,8 +6933,8 @@
72286933 .test_ib = gfx_v8_0_ring_test_ib,
72296934 .insert_nop = amdgpu_ring_insert_nop,
72306935 .pad_ib = amdgpu_ring_generic_pad_ib,
7231
- .set_priority = gfx_v8_0_ring_set_priority_compute,
72326936 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6937
+ .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
72336938 };
72346939
72356940 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
....@@ -7247,11 +6952,9 @@
72476952 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
72486953 17 + /* gfx_v8_0_ring_emit_vm_flush */
72496954 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7250
- .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7251
- .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6955
+ .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
72526956 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
72536957 .test_ring = gfx_v8_0_ring_test_ring,
7254
- .test_ib = gfx_v8_0_ring_test_ib,
72556958 .insert_nop = amdgpu_ring_insert_nop,
72566959 .pad_ib = amdgpu_ring_generic_pad_ib,
72576960 .emit_rreg = gfx_v8_0_ring_emit_rreg,
....@@ -7286,11 +6989,6 @@
72866989 .process = gfx_v8_0_priv_inst_irq,
72876990 };
72886991
7289
-static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7290
- .set = gfx_v8_0_kiq_set_interrupt_state,
7291
- .process = gfx_v8_0_kiq_irq,
7292
-};
7293
-
72946992 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
72956993 .set = gfx_v8_0_set_cp_ecc_int_state,
72966994 .process = gfx_v8_0_cp_ecc_error_irq,
....@@ -7312,9 +7010,6 @@
73127010 adev->gfx.priv_inst_irq.num_types = 1;
73137011 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
73147012
7315
- adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7316
- adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7317
-
73187013 adev->gfx.cp_ecc_error_irq.num_types = 1;
73197014 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
73207015
....@@ -7330,29 +7025,10 @@
73307025 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
73317026 {
73327027 /* init asci gds info */
7333
- adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7334
- adev->gds.gws.total_size = 64;
7335
- adev->gds.oa.total_size = 16;
7336
-
7337
- if (adev->gds.mem.total_size == 64 * 1024) {
7338
- adev->gds.mem.gfx_partition_size = 4096;
7339
- adev->gds.mem.cs_partition_size = 4096;
7340
-
7341
- adev->gds.gws.gfx_partition_size = 4;
7342
- adev->gds.gws.cs_partition_size = 4;
7343
-
7344
- adev->gds.oa.gfx_partition_size = 4;
7345
- adev->gds.oa.cs_partition_size = 1;
7346
- } else {
7347
- adev->gds.mem.gfx_partition_size = 1024;
7348
- adev->gds.mem.cs_partition_size = 1024;
7349
-
7350
- adev->gds.gws.gfx_partition_size = 16;
7351
- adev->gds.gws.cs_partition_size = 16;
7352
-
7353
- adev->gds.oa.gfx_partition_size = 4;
7354
- adev->gds.oa.cs_partition_size = 4;
7355
- }
7028
+ adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7029
+ adev->gds.gws_size = 64;
7030
+ adev->gds.oa_size = 16;
7031
+ adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
73567032 }
73577033
73587034 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,