From 1543e317f1da31b75942316931e8f491a8920811 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Thu, 04 Jan 2024 10:08:02 +0000
Subject: [PATCH] disable FB
---
kernel/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 528 +++++++++++++++++++++++++++-------------------------------
1 files changed, 247 insertions(+), 281 deletions(-)
diff --git a/kernel/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/kernel/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 95452c5..04eaf3a 100644
--- a/kernel/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/kernel/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -20,8 +20,10 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
+
#include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+
#include "amdgpu.h"
#include "amdgpu_ih.h"
#include "amdgpu_gfx.h"
@@ -882,7 +884,6 @@
static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
-static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev);
static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
@@ -1849,8 +1850,6 @@
*
*/
#define DEFAULT_SH_MEM_BASES (0x6000)
-#define FIRST_COMPUTE_VMID (8)
-#define LAST_COMPUTE_VMID (16)
static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
{
int i;
@@ -1868,7 +1867,7 @@
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT;
mutex_lock(&adev->srbm_mutex);
- for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
cik_srbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32(mmSH_MEM_CONFIG, sh_mem_config);
@@ -1878,6 +1877,33 @@
}
cik_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
+
+ /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+ acccess. These should be enabled by FW for target VMIDs. */
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
+ WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
+ WREG32(amdgpu_gds_reg_offset[i].gws, 0);
+ WREG32(amdgpu_gds_reg_offset[i].oa, 0);
+ }
+}
+
+static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev)
+{
+ int vmid;
+
+ /*
+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+ * access. Compute VMIDs should be enabled by FW for target VMIDs,
+ * the driver can enable them for graphics. VMID0 should maintain
+ * access so that HWS firmware can save/restore entries.
+ */
+ for (vmid = 1; vmid < 16; vmid++) {
+ WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
+ WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
+ WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
+ WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
+ }
}
static void gfx_v7_0_config_init(struct amdgpu_device *adev)
@@ -1886,14 +1912,14 @@
}
/**
- * gfx_v7_0_gpu_init - setup the 3D engine
+ * gfx_v7_0_constants_init - setup the 3D engine
*
* @adev: amdgpu_device pointer
*
- * Configures the 3D engine and tiling configuration
- * registers so that the 3D engine is usable.
+ * init the gfx constants such as the 3D engine, tiling configuration
+ * registers, maximum number of quad pipes, render backends...
*/
-static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
+static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
{
u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base;
u32 tmp;
@@ -1958,6 +1984,7 @@
mutex_unlock(&adev->srbm_mutex);
gfx_v7_0_init_compute_vmid(adev);
+ gfx_v7_0_init_gds_vmid(adev);
WREG32(mmSX_DEBUG_1, 0x20);
@@ -2064,17 +2091,14 @@
int r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r) {
- DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
+ if (r)
return r;
- }
+
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
- if (r) {
- DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r);
- amdgpu_gfx_scratch_free(adev, scratch);
- return r;
- }
+ if (r)
+ goto error_free_scratch;
+
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
amdgpu_ring_write(ring, 0xDEADBEEF);
@@ -2084,15 +2108,12 @@
tmp = RREG32(scratch);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
- if (i < adev->usec_timeout) {
- DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
- } else {
- DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
- ring->idx, scratch, tmp);
- r = -EINVAL;
- }
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+error_free_scratch:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@@ -2233,13 +2254,15 @@
* on the gfx ring for execution by the GPU.
*/
static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
- struct amdgpu_ib *ib,
- unsigned vmid, bool ctx_switch)
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 header, control = 0;
/* insert SWITCH_BUFFER packet before first IB in the ring frame */
- if (ctx_switch) {
+ if (flags & AMDGPU_HAVE_CTX_SWITCH) {
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0);
}
@@ -2262,10 +2285,28 @@
}
static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
struct amdgpu_ib *ib,
- unsigned vmid, bool ctx_switch)
+ uint32_t flags)
{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+ /* Currently, there is a high possibility to get wave ID mismatch
+ * between ME and GDS, leading to a hw deadlock, because ME generates
+ * different wave IDs than the GDS expects. This situation happens
+ * randomly when at least 5 compute pipes use GDS ordered append.
+ * The wave IDs generated by ME are also wrong after suspend/resume.
+ * Those are probably bugs somewhere else in the kernel driver.
+ *
+ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+ * GDS to 0 for this ring (me/pipe).
+ */
+ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
+ amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+ }
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
amdgpu_ring_write(ring,
@@ -2316,17 +2357,16 @@
long r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r) {
- DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
+ if (r)
return r;
- }
+
WREG32(scratch, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256, &ib);
- if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r)
goto err1;
- }
+
ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
ib.ptr[2] = 0xDEADBEEF;
@@ -2338,22 +2378,16 @@
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
- DRM_ERROR("amdgpu: IB test timed out\n");
r = -ETIMEDOUT;
goto err2;
} else if (r < 0) {
- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err2;
}
tmp = RREG32(scratch);
- if (tmp == 0xDEADBEEF) {
- DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
+ if (tmp == 0xDEADBEEF)
r = 0;
- } else {
- DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
- scratch, tmp);
+ else
r = -EINVAL;
- }
err2:
amdgpu_ib_free(adev, &ib, NULL);
@@ -2396,15 +2430,12 @@
*/
static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
{
- int i;
-
- if (enable) {
+ if (enable)
WREG32(mmCP_ME_CNTL, 0);
- } else {
- WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK));
- for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- adev->gfx.gfx_ring[i].ready = false;
- }
+ else
+ WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK |
+ CP_ME_CNTL__PFP_HALT_MASK |
+ CP_ME_CNTL__CE_HALT_MASK));
udelay(50);
}
@@ -2613,12 +2644,9 @@
/* start the ring */
gfx_v7_0_cp_gfx_start(adev);
- ring->ready = true;
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->ready = false;
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
return r;
- }
return 0;
}
@@ -2668,15 +2696,11 @@
*/
static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
{
- int i;
-
- if (enable) {
+ if (enable)
WREG32(mmCP_MEC_CNTL, 0);
- } else {
- WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
- for (i = 0; i < adev->gfx.num_compute_rings; i++)
- adev->gfx.compute_ring[i].ready = false;
- }
+ else
+ WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+ CP_MEC_CNTL__MEC_ME2_HALT_MASK));
udelay(50);
}
@@ -2781,7 +2805,7 @@
* GFX7_MEC_HPD_SIZE * 2;
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.mec.hpd_eop_obj,
&adev->gfx.mec.hpd_eop_gpu_addr,
(void **)&hpd);
@@ -3013,7 +3037,7 @@
mqd->cp_hqd_active = 1;
}
-int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
+static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
{
uint32_t tmp;
uint32_t mqd_reg;
@@ -3106,10 +3130,7 @@
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
- ring->ready = true;
- r = amdgpu_ring_test_ring(ring);
- if (r)
- ring->ready = false;
+ amdgpu_ring_test_helper(ring);
}
return 0;
@@ -3268,18 +3289,10 @@
* The RLC is a multi-purpose microengine that handles a
* variety of functions.
*/
-static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev)
-{
- amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
-}
-
static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
{
const u32 *src_ptr;
- volatile u32 *dst_ptr;
- u32 dws, i;
+ u32 dws;
const struct cs_section_def *cs_data;
int r;
@@ -3306,67 +3319,28 @@
cs_data = adev->gfx.rlc.cs_data;
if (src_ptr) {
- /* save restore block */
- r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->gfx.rlc.save_restore_obj,
- &adev->gfx.rlc.save_restore_gpu_addr,
- (void **)&adev->gfx.rlc.sr_ptr);
- if (r) {
- dev_warn(adev->dev, "(%d) create, pin or map of RLC sr bo failed\n", r);
- gfx_v7_0_rlc_fini(adev);
+ /* init save restore block */
+ r = amdgpu_gfx_rlc_init_sr(adev, dws);
+ if (r)
return r;
- }
-
- /* write the sr buffer */
- dst_ptr = adev->gfx.rlc.sr_ptr;
- for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
- dst_ptr[i] = cpu_to_le32(src_ptr[i]);
- amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
- amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
}
if (cs_data) {
- /* clear state block */
- adev->gfx.rlc.clear_state_size = dws = gfx_v7_0_get_csb_size(adev);
-
- r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->gfx.rlc.clear_state_obj,
- &adev->gfx.rlc.clear_state_gpu_addr,
- (void **)&adev->gfx.rlc.cs_ptr);
- if (r) {
- dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
- gfx_v7_0_rlc_fini(adev);
+ /* init clear state block */
+ r = amdgpu_gfx_rlc_init_csb(adev);
+ if (r)
return r;
- }
-
- /* set up the cs buffer */
- dst_ptr = adev->gfx.rlc.cs_ptr;
- gfx_v7_0_get_csb_buffer(adev, dst_ptr);
- amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
}
if (adev->gfx.rlc.cp_table_size) {
-
- r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
- &adev->gfx.rlc.cp_table_obj,
- &adev->gfx.rlc.cp_table_gpu_addr,
- (void **)&adev->gfx.rlc.cp_table_ptr);
- if (r) {
- dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
- gfx_v7_0_rlc_fini(adev);
+ r = amdgpu_gfx_rlc_init_cpt(adev);
+ if (r)
return r;
- }
-
- gfx_v7_0_init_cp_pg_table(adev);
-
- amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
- amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
-
}
+
+ /* init spm vmid with 0xf */
+ if (adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
return 0;
}
@@ -3446,7 +3420,12 @@
return orig;
}
-static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
+static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev)
+{
+ return true;
+}
+
+static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev)
{
u32 tmp, i, mask;
@@ -3468,7 +3447,7 @@
}
}
-static void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
+static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev)
{
u32 tmp;
@@ -3545,13 +3524,13 @@
adev->gfx.rlc_feature_version = le32_to_cpu(
hdr->ucode_feature_version);
- gfx_v7_0_rlc_stop(adev);
+ adev->gfx.rlc.funcs->stop(adev);
/* disable CG */
tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc;
WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
- gfx_v7_0_rlc_reset(adev);
+ adev->gfx.rlc.funcs->reset(adev);
gfx_v7_0_init_pg(adev);
@@ -3582,9 +3561,21 @@
if (adev->asic_type == CHIP_BONAIRE)
WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0);
- gfx_v7_0_rlc_start(adev);
+ adev->gfx.rlc.funcs->start(adev);
return 0;
+}
+
+static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+{
+ u32 data;
+
+ data = RREG32(mmRLC_SPM_VMID);
+
+ data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
+ data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
+
+ WREG32(mmRLC_SPM_VMID, data);
}
static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
@@ -3784,72 +3775,12 @@
WREG32(mmRLC_PG_CNTL, data);
}
-static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev)
+static int gfx_v7_0_cp_pg_table_num(struct amdgpu_device *adev)
{
- const __le32 *fw_data;
- volatile u32 *dst_ptr;
- int me, i, max_me = 4;
- u32 bo_offset = 0;
- u32 table_offset, table_size;
-
if (adev->asic_type == CHIP_KAVERI)
- max_me = 5;
-
- if (adev->gfx.rlc.cp_table_ptr == NULL)
- return;
-
- /* write the cp table buffer */
- dst_ptr = adev->gfx.rlc.cp_table_ptr;
- for (me = 0; me < max_me; me++) {
- if (me == 0) {
- const struct gfx_firmware_header_v1_0 *hdr =
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
- fw_data = (const __le32 *)
- (adev->gfx.ce_fw->data +
- le32_to_cpu(hdr->header.ucode_array_offset_bytes));
- table_offset = le32_to_cpu(hdr->jt_offset);
- table_size = le32_to_cpu(hdr->jt_size);
- } else if (me == 1) {
- const struct gfx_firmware_header_v1_0 *hdr =
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
- fw_data = (const __le32 *)
- (adev->gfx.pfp_fw->data +
- le32_to_cpu(hdr->header.ucode_array_offset_bytes));
- table_offset = le32_to_cpu(hdr->jt_offset);
- table_size = le32_to_cpu(hdr->jt_size);
- } else if (me == 2) {
- const struct gfx_firmware_header_v1_0 *hdr =
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
- fw_data = (const __le32 *)
- (adev->gfx.me_fw->data +
- le32_to_cpu(hdr->header.ucode_array_offset_bytes));
- table_offset = le32_to_cpu(hdr->jt_offset);
- table_size = le32_to_cpu(hdr->jt_size);
- } else if (me == 3) {
- const struct gfx_firmware_header_v1_0 *hdr =
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
- fw_data = (const __le32 *)
- (adev->gfx.mec_fw->data +
- le32_to_cpu(hdr->header.ucode_array_offset_bytes));
- table_offset = le32_to_cpu(hdr->jt_offset);
- table_size = le32_to_cpu(hdr->jt_size);
- } else {
- const struct gfx_firmware_header_v1_0 *hdr =
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
- fw_data = (const __le32 *)
- (adev->gfx.mec2_fw->data +
- le32_to_cpu(hdr->header.ucode_array_offset_bytes));
- table_offset = le32_to_cpu(hdr->jt_offset);
- table_size = le32_to_cpu(hdr->jt_size);
- }
-
- for (i = 0; i < table_size; i ++) {
- dst_ptr[bo_offset + i] =
- cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
- }
-
- bo_offset += table_size;
- }
+ return 5;
+ else
+ return 4;
}
static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
@@ -4170,15 +4101,6 @@
uint32_t gws_base, uint32_t gws_size,
uint32_t oa_base, uint32_t oa_size)
{
- gds_base = gds_base >> AMDGPU_GDS_SHIFT;
- gds_size = gds_size >> AMDGPU_GDS_SHIFT;
-
- gws_base = gws_base >> AMDGPU_GWS_SHIFT;
- gws_size = gws_size >> AMDGPU_GWS_SHIFT;
-
- oa_base = oa_base >> AMDGPU_OA_SHIFT;
- oa_size = oa_size >> AMDGPU_OA_SHIFT;
-
/* GDS Base */
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
@@ -4210,6 +4132,18 @@
amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
+}
+
+static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t value = 0;
+
+ value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
+ value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
+ value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
+ value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+ WREG32(mmSQ_CMD, value);
}
static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
@@ -4271,9 +4205,9 @@
}
static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev,
- u32 me, u32 pipe, u32 q)
+ u32 me, u32 pipe, u32 q, u32 vm)
{
- cik_srbm_select(adev, me, pipe, q, 0);
+ cik_srbm_select(adev, me, pipe, q, vm);
}
static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
@@ -4285,8 +4219,18 @@
};
static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
- .enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode,
- .exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode
+ .is_rlc_enabled = gfx_v7_0_is_rlc_enabled,
+ .set_safe_mode = gfx_v7_0_set_safe_mode,
+ .unset_safe_mode = gfx_v7_0_unset_safe_mode,
+ .init = gfx_v7_0_rlc_init,
+ .get_csb_size = gfx_v7_0_get_csb_size,
+ .get_csb_buffer = gfx_v7_0_get_csb_buffer,
+ .get_cp_table_num = gfx_v7_0_cp_pg_table_num,
+ .resume = gfx_v7_0_rlc_resume,
+ .stop = gfx_v7_0_rlc_stop,
+ .reset = gfx_v7_0_rlc_reset,
+ .start = gfx_v7_0_rlc_start,
+ .update_spm_vmid = gfx_v7_0_update_spm_vmid
};
static int gfx_v7_0_early_init(void *handle)
@@ -4323,7 +4267,7 @@
static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
{
u32 gb_addr_config;
- u32 mc_shared_chmap, mc_arb_ramcfg;
+ u32 mc_arb_ramcfg;
u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
u32 tmp;
@@ -4400,9 +4344,13 @@
break;
}
- mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
+
+ adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
+ MC_ARB_RAMCFG, NOOFBANK);
+ adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
+ MC_ARB_RAMCFG, NOOFRANKS);
adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
adev->gfx.config.mem_max_burst_length_bytes = 256;
@@ -4474,7 +4422,7 @@
ring->ring_obj = NULL;
ring->use_doorbell = true;
- ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+ ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
@@ -4483,7 +4431,8 @@
/* type-2 packets are deprecated on MEC, use type-3 instead */
r = amdgpu_ring_init(adev, ring, 1024,
- &adev->gfx.eop_irq, irq_type);
+ &adev->gfx.eop_irq, irq_type,
+ AMDGPU_RING_PRIO_DEFAULT);
if (r)
return r;
@@ -4513,18 +4462,18 @@
adev->gfx.mec.num_queue_per_pipe = 8;
/* EOP Event */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
if (r)
return r;
/* Privileged reg */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184,
&adev->gfx.priv_reg_irq);
if (r)
return r;
/* Privileged inst */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185,
&adev->gfx.priv_inst_irq);
if (r)
return r;
@@ -4537,7 +4486,7 @@
return r;
}
- r = gfx_v7_0_rlc_init(adev);
+ r = adev->gfx.rlc.funcs->init(adev);
if (r) {
DRM_ERROR("Failed to init rlc BOs!\n");
return r;
@@ -4555,7 +4504,9 @@
ring->ring_obj = NULL;
sprintf(ring->name, "gfx");
r = amdgpu_ring_init(adev, ring, 1024,
- &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
+ &adev->gfx.eop_irq,
+ AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
+ AMDGPU_RING_PRIO_DEFAULT);
if (r)
return r;
}
@@ -4579,25 +4530,6 @@
}
}
- /* reserve GDS, GWS and OA resource for gfx */
- r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
- &adev->gds.gds_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
- r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
- &adev->gds.gws_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
- r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
- &adev->gds.oa_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
adev->gfx.ce_ram_size = 0x8000;
gfx_v7_0_gpu_early_init(adev);
@@ -4607,12 +4539,8 @@
static int gfx_v7_0_sw_fini(void *handle)
{
- int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
+ int i;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -4620,7 +4548,7 @@
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
gfx_v7_0_cp_compute_fini(adev);
- gfx_v7_0_rlc_fini(adev);
+ amdgpu_gfx_rlc_fini(adev);
gfx_v7_0_mec_fini(adev);
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
@@ -4640,10 +4568,12 @@
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- gfx_v7_0_gpu_init(adev);
+ gfx_v7_0_constants_init(adev);
+ /* init CSB */
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* init rlc */
- r = gfx_v7_0_rlc_resume(adev);
+ r = adev->gfx.rlc.funcs->resume(adev);
if (r)
return r;
@@ -4661,7 +4591,7 @@
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
gfx_v7_0_cp_enable(adev, false);
- gfx_v7_0_rlc_stop(adev);
+ adev->gfx.rlc.funcs->stop(adev);
gfx_v7_0_fini_pg(adev);
return 0;
@@ -4746,7 +4676,7 @@
gfx_v7_0_update_cg(adev, false);
/* stop the rlc */
- gfx_v7_0_rlc_stop(adev);
+ adev->gfx.rlc.funcs->stop(adev);
/* Disable GFX parsing/prefetching */
WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
@@ -4915,7 +4845,7 @@
enum amdgpu_interrupt_state state)
{
switch (type) {
- case AMDGPU_CP_IRQ_GFX_EOP:
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
gfx_v7_0_set_gfx_eop_interrupt_state(adev, state);
break;
case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
@@ -4975,12 +4905,36 @@
return 0;
}
+static void gfx_v7_0_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ struct amdgpu_ring *ring;
+ u8 me_id, pipe_id;
+ int i;
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ switch (me_id) {
+ case 0:
+ drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if ((ring->me == me_id) && (ring->pipe == pipe_id))
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ }
+}
+
static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
DRM_ERROR("Illegal register access in command stream\n");
- schedule_work(&adev->reset_work);
+ gfx_v7_0_fault(adev, entry);
return 0;
}
@@ -4990,7 +4944,7 @@
{
DRM_ERROR("Illegal instruction in command stream\n");
// XXX soft reset the gfx block only
- schedule_work(&adev->reset_work);
+ gfx_v7_0_fault(adev, entry);
return 0;
}
@@ -5042,6 +4996,32 @@
return 0;
}
+static void gfx_v7_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+ amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+ PACKET3_TC_ACTION_ENA |
+ PACKET3_SH_KCACHE_ACTION_ENA |
+ PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
+}
+
+static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
+ amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+ PACKET3_TC_ACTION_ENA |
+ PACKET3_SH_KCACHE_ACTION_ENA |
+ PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
+ amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
+}
+
static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
.name = "gfx_v7_0",
.early_init = gfx_v7_0_early_init,
@@ -5074,7 +5054,8 @@
12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
- 3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
+ 3 + 4 + /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
+ 5, /* SURFACE_SYNC */
.emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */
.emit_ib = gfx_v7_0_ring_emit_ib_gfx,
.emit_fence = gfx_v7_0_ring_emit_fence_gfx,
@@ -5088,6 +5069,8 @@
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
.emit_wreg = gfx_v7_0_ring_emit_wreg,
+ .soft_recovery = gfx_v7_0_ring_soft_recovery,
+ .emit_mem_sync = gfx_v7_0_emit_mem_sync,
};
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
@@ -5104,8 +5087,9 @@
5 + /* hdp invalidate */
7 + /* gfx_v7_0_ring_emit_pipeline_sync */
CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
- 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
- .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */
+ 7 + 7 + 7 + /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
+ 7, /* gfx_v7_0_emit_mem_sync_compute */
+ .emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */
.emit_ib = gfx_v7_0_ring_emit_ib_compute,
.emit_fence = gfx_v7_0_ring_emit_fence_compute,
.emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
@@ -5117,6 +5101,7 @@
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v7_0_ring_emit_wreg,
+ .emit_mem_sync = gfx_v7_0_emit_mem_sync_compute,
};
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -5159,29 +5144,10 @@
static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
{
/* init asci gds info */
- adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
- adev->gds.gws.total_size = 64;
- adev->gds.oa.total_size = 16;
-
- if (adev->gds.mem.total_size == 64 * 1024) {
- adev->gds.mem.gfx_partition_size = 4096;
- adev->gds.mem.cs_partition_size = 4096;
-
- adev->gds.gws.gfx_partition_size = 4;
- adev->gds.gws.cs_partition_size = 4;
-
- adev->gds.oa.gfx_partition_size = 4;
- adev->gds.oa.cs_partition_size = 1;
- } else {
- adev->gds.mem.gfx_partition_size = 1024;
- adev->gds.mem.cs_partition_size = 1024;
-
- adev->gds.gws.gfx_partition_size = 16;
- adev->gds.gws.cs_partition_size = 16;
-
- adev->gds.oa.gfx_partition_size = 4;
- adev->gds.oa.cs_partition_size = 4;
- }
+ adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
+ adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
}
@@ -5241,7 +5207,7 @@
cu_info->lds_size = 64;
}
-const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
+static const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_GFX,
.major = 7,
--
Gitblit v1.6.2