From d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 11 Dec 2023 02:45:28 +0000 Subject: [PATCH] add boot partition size --- kernel/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 528 +++++++++++++++++++++++++++------------------------------- 1 files changed, 247 insertions(+), 281 deletions(-) diff --git a/kernel/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/kernel/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 95452c5..04eaf3a 100644 --- a/kernel/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/kernel/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -20,8 +20,10 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ + #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> + #include "amdgpu.h" #include "amdgpu_ih.h" #include "amdgpu_gfx.h" @@ -882,7 +884,6 @@ static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev); static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer); -static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev); static void gfx_v7_0_init_pg(struct amdgpu_device *adev); static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev); @@ -1849,8 +1850,6 @@ * */ #define DEFAULT_SH_MEM_BASES (0x6000) -#define FIRST_COMPUTE_VMID (8) -#define LAST_COMPUTE_VMID (16) static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) { int i; @@ -1868,7 +1867,7 @@ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT; mutex_lock(&adev->srbm_mutex); - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { cik_srbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32(mmSH_MEM_CONFIG, sh_mem_config); @@ -1878,6 +1877,33 @@ } cik_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); + + /* Initialize all compute VMIDs to have no GDS, GWS, or OA + acccess. These should be enabled by FW for target VMIDs. */ + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { + WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); + WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); + WREG32(amdgpu_gds_reg_offset[i].gws, 0); + WREG32(amdgpu_gds_reg_offset[i].oa, 0); + } +} + +static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < 16; vmid++) { + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); + WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); + WREG32(amdgpu_gds_reg_offset[vmid].oa, 0); + } } static void gfx_v7_0_config_init(struct amdgpu_device *adev) @@ -1886,14 +1912,14 @@ } /** - * gfx_v7_0_gpu_init - setup the 3D engine + * gfx_v7_0_constants_init - setup the 3D engine * * @adev: amdgpu_device pointer * - * Configures the 3D engine and tiling configuration - * registers so that the 3D engine is usable. + * init the gfx constants such as the 3D engine, tiling configuration + * registers, maximum number of quad pipes, render backends... */ -static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) +static void gfx_v7_0_constants_init(struct amdgpu_device *adev) { u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base; u32 tmp; @@ -1958,6 +1984,7 @@ mutex_unlock(&adev->srbm_mutex); gfx_v7_0_init_compute_vmid(adev); + gfx_v7_0_init_gds_vmid(adev); WREG32(mmSX_DEBUG_1, 0x20); @@ -2064,17 +2091,14 @@ int r; r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); + if (r) return r; - } + WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_gfx_scratch_free(adev, scratch); - return r; - } + if (r) + goto error_free_scratch; + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); amdgpu_ring_write(ring, 0xDEADBEEF); @@ -2084,15 +2108,12 @@ tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", - ring->idx, scratch, tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + +error_free_scratch: amdgpu_gfx_scratch_free(adev, scratch); return r; } @@ -2233,13 +2254,15 @@ * on the gfx ring for execution by the GPU. */ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; /* insert SWITCH_BUFFER packet before first IB in the ring frame */ - if (ctx_switch) { + if (flags & AMDGPU_HAVE_CTX_SWITCH) { amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); amdgpu_ring_write(ring, 0); } @@ -2262,10 +2285,28 @@ } static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + uint32_t flags) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + + /* Currently, there is a high possibility to get wave ID mismatch + * between ME and GDS, leading to a hw deadlock, because ME generates + * different wave IDs than the GDS expects. This situation happens + * randomly when at least 5 compute pipes use GDS ordered append. + * The wave IDs generated by ME are also wrong after suspend/resume. + * Those are probably bugs somewhere else in the kernel driver. + * + * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and + * GDS to 0 for this ring (me/pipe). + */ + if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START); + amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); + } amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); amdgpu_ring_write(ring, @@ -2316,17 +2357,16 @@ long r; r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); + if (r) return r; - } + WREG32(scratch, 0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + r = amdgpu_ib_get(adev, NULL, 256, + AMDGPU_IB_POOL_DIRECT, &ib); + if (r) goto err1; - } + ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); ib.ptr[2] = 0xDEADBEEF; @@ -2338,22 +2378,16 @@ r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err2; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err2; } tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", - scratch, tmp); + else r = -EINVAL; - } err2: amdgpu_ib_free(adev, &ib, NULL); @@ -2396,15 +2430,12 @@ */ static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) { - int i; - - if (enable) { + if (enable) WREG32(mmCP_ME_CNTL, 0); - } else { - WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK)); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - adev->gfx.gfx_ring[i].ready = false; - } + else + WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | + CP_ME_CNTL__PFP_HALT_MASK | + CP_ME_CNTL__CE_HALT_MASK)); udelay(50); } @@ -2613,12 +2644,9 @@ /* start the ring */ gfx_v7_0_cp_gfx_start(adev); - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } return 0; } @@ -2668,15 +2696,11 @@ */ static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) { - int i; - - if (enable) { + if (enable) WREG32(mmCP_MEC_CNTL, 0); - } else { - WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); - for (i = 0; i < adev->gfx.num_compute_rings; i++) - adev->gfx.compute_ring[i].ready = false; - } + else + WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | + CP_MEC_CNTL__MEC_ME2_HALT_MASK)); udelay(50); } @@ -2781,7 +2805,7 @@ * GFX7_MEC_HPD_SIZE * 2; r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_DOMAIN_VRAM, &adev->gfx.mec.hpd_eop_obj, &adev->gfx.mec.hpd_eop_gpu_addr, (void **)&hpd); @@ -3013,7 +3037,7 @@ mqd->cp_hqd_active = 1; } -int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd) +static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd) { uint32_t tmp; uint32_t mqd_reg; @@ -3106,10 +3130,7 @@ for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) - ring->ready = false; + amdgpu_ring_test_helper(ring); } return 0; @@ -3268,18 +3289,10 @@ * The RLC is a multi-purpose microengine that handles a * variety of functions. */ -static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev) -{ - amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); -} - static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) { const u32 *src_ptr; - volatile u32 *dst_ptr; - u32 dws, i; + u32 dws; const struct cs_section_def *cs_data; int r; @@ -3306,67 +3319,28 @@ cs_data = adev->gfx.rlc.cs_data; if (src_ptr) { - /* save restore block */ - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.save_restore_obj, - &adev->gfx.rlc.save_restore_gpu_addr, - (void **)&adev->gfx.rlc.sr_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create, pin or map of RLC sr bo failed\n", r); - gfx_v7_0_rlc_fini(adev); + /* init save restore block */ + r = amdgpu_gfx_rlc_init_sr(adev, dws); + if (r) return r; - } - - /* write the sr buffer */ - dst_ptr = adev->gfx.rlc.sr_ptr; - for (i = 0; i < adev->gfx.rlc.reg_list_size; i++) - dst_ptr[i] = cpu_to_le32(src_ptr[i]); - amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); } if (cs_data) { - /* clear state block */ - adev->gfx.rlc.clear_state_size = dws = gfx_v7_0_get_csb_size(adev); - - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.clear_state_obj, - &adev->gfx.rlc.clear_state_gpu_addr, - (void **)&adev->gfx.rlc.cs_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); - gfx_v7_0_rlc_fini(adev); + /* init clear state block */ + r = amdgpu_gfx_rlc_init_csb(adev); + if (r) return r; - } - - /* set up the cs buffer */ - dst_ptr = adev->gfx.rlc.cs_ptr; - gfx_v7_0_get_csb_buffer(adev, dst_ptr); - amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); } if (adev->gfx.rlc.cp_table_size) { - - r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.cp_table_obj, - &adev->gfx.rlc.cp_table_gpu_addr, - (void **)&adev->gfx.rlc.cp_table_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); - gfx_v7_0_rlc_fini(adev); + r = amdgpu_gfx_rlc_init_cpt(adev); + if (r) return r; - } - - gfx_v7_0_init_cp_pg_table(adev); - - amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); - } + + /* init spm vmid with 0xf */ + if (adev->gfx.rlc.funcs->update_spm_vmid) + adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); return 0; } @@ -3446,7 +3420,12 @@ return orig; } -static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev) +static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev) +{ + return true; +} + +static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev) { u32 tmp, i, mask; @@ -3468,7 +3447,7 @@ } } -static void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev) +static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev) { u32 tmp; @@ -3545,13 +3524,13 @@ adev->gfx.rlc_feature_version = le32_to_cpu( hdr->ucode_feature_version); - gfx_v7_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); /* disable CG */ tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc; WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); - gfx_v7_0_rlc_reset(adev); + adev->gfx.rlc.funcs->reset(adev); gfx_v7_0_init_pg(adev); @@ -3582,9 +3561,21 @@ if (adev->asic_type == CHIP_BONAIRE) WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0); - gfx_v7_0_rlc_start(adev); + adev->gfx.rlc.funcs->start(adev); return 0; +} + +static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +{ + u32 data; + + data = RREG32(mmRLC_SPM_VMID); + + data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK; + data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT; + + WREG32(mmRLC_SPM_VMID, data); } static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) @@ -3784,72 +3775,12 @@ WREG32(mmRLC_PG_CNTL, data); } -static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev) +static int gfx_v7_0_cp_pg_table_num(struct amdgpu_device *adev) { - const __le32 *fw_data; - volatile u32 *dst_ptr; - int me, i, max_me = 4; - u32 bo_offset = 0; - u32 table_offset, table_size; - if (adev->asic_type == CHIP_KAVERI) - max_me = 5; - - if (adev->gfx.rlc.cp_table_ptr == NULL) - return; - - /* write the cp table buffer */ - dst_ptr = adev->gfx.rlc.cp_table_ptr; - for (me = 0; me < max_me; me++) { - if (me == 0) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; - fw_data = (const __le32 *) - (adev->gfx.ce_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 1) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; - fw_data = (const __le32 *) - (adev->gfx.pfp_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 2) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; - fw_data = (const __le32 *) - (adev->gfx.me_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 3) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - fw_data = (const __le32 *) - (adev->gfx.mec_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; - fw_data = (const __le32 *) - (adev->gfx.mec2_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } - - for (i = 0; i < table_size; i ++) { - dst_ptr[bo_offset + i] = - cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); - } - - bo_offset += table_size; - } + return 5; + else + return 4; } static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev, @@ -4170,15 +4101,6 @@ uint32_t gws_base, uint32_t gws_size, uint32_t oa_base, uint32_t oa_size) { - gds_base = gds_base >> AMDGPU_GDS_SHIFT; - gds_size = gds_size >> AMDGPU_GDS_SHIFT; - - gws_base = gws_base >> AMDGPU_GWS_SHIFT; - gws_size = gws_size >> AMDGPU_GWS_SHIFT; - - oa_base = oa_base >> AMDGPU_OA_SHIFT; - oa_size = oa_size >> AMDGPU_OA_SHIFT; - /* GDS Base */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | @@ -4210,6 +4132,18 @@ amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); +} + +static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t value = 0; + + value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); + value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); + value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); + value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + WREG32(mmSQ_CMD, value); } static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) @@ -4271,9 +4205,9 @@ } static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q) + u32 me, u32 pipe, u32 q, u32 vm) { - cik_srbm_select(adev, me, pipe, q, 0); + cik_srbm_select(adev, me, pipe, q, vm); } static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = { @@ -4285,8 +4219,18 @@ }; static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = { - .enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode, - .exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode + .is_rlc_enabled = gfx_v7_0_is_rlc_enabled, + .set_safe_mode = gfx_v7_0_set_safe_mode, + .unset_safe_mode = gfx_v7_0_unset_safe_mode, + .init = gfx_v7_0_rlc_init, + .get_csb_size = gfx_v7_0_get_csb_size, + .get_csb_buffer = gfx_v7_0_get_csb_buffer, + .get_cp_table_num = gfx_v7_0_cp_pg_table_num, + .resume = gfx_v7_0_rlc_resume, + .stop = gfx_v7_0_rlc_stop, + .reset = gfx_v7_0_rlc_reset, + .start = gfx_v7_0_rlc_start, + .update_spm_vmid = gfx_v7_0_update_spm_vmid }; static int gfx_v7_0_early_init(void *handle) @@ -4323,7 +4267,7 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; - u32 mc_shared_chmap, mc_arb_ramcfg; + u32 mc_arb_ramcfg; u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; u32 tmp; @@ -4400,9 +4344,13 @@ break; } - mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; + + adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFBANK); + adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFRANKS); adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; adev->gfx.config.mem_max_burst_length_bytes = 256; @@ -4474,7 +4422,7 @@ ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; + ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id; sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -4483,7 +4431,8 @@ /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, irq_type); + &adev->gfx.eop_irq, irq_type, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; @@ -4513,18 +4462,18 @@ adev->gfx.mec.num_queue_per_pipe = 8; /* EOP Event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); if (r) return r; /* Privileged reg */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184, &adev->gfx.priv_reg_irq); if (r) return r; /* Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185, &adev->gfx.priv_inst_irq); if (r) return r; @@ -4537,7 +4486,7 @@ return r; } - r = gfx_v7_0_rlc_init(adev); + r = adev->gfx.rlc.funcs->init(adev); if (r) { DRM_ERROR("Failed to init rlc BOs!\n"); return r; @@ -4555,7 +4504,9 @@ ring->ring_obj = NULL; sprintf(ring->name, "gfx"); r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); + &adev->gfx.eop_irq, + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, + AMDGPU_RING_PRIO_DEFAULT); if (r) return r; } @@ -4579,25 +4530,6 @@ } } - /* reserve GDS, GWS and OA resource for gfx */ - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, - &adev->gds.gds_gfx_bo, NULL, NULL); - if (r) - return r; - - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, - &adev->gds.gws_gfx_bo, NULL, NULL); - if (r) - return r; - - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, - &adev->gds.oa_gfx_bo, NULL, NULL); - if (r) - return r; - adev->gfx.ce_ram_size = 0x8000; gfx_v7_0_gpu_early_init(adev); @@ -4607,12 +4539,8 @@ static int gfx_v7_0_sw_fini(void *handle) { - int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); + int i; for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); @@ -4620,7 +4548,7 @@ amdgpu_ring_fini(&adev->gfx.compute_ring[i]); gfx_v7_0_cp_compute_fini(adev); - gfx_v7_0_rlc_fini(adev); + amdgpu_gfx_rlc_fini(adev); gfx_v7_0_mec_fini(adev); amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, &adev->gfx.rlc.clear_state_gpu_addr, @@ -4640,10 +4568,12 @@ int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gfx_v7_0_gpu_init(adev); + gfx_v7_0_constants_init(adev); + /* init CSB */ + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* init rlc */ - r = gfx_v7_0_rlc_resume(adev); + r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -4661,7 +4591,7 @@ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); gfx_v7_0_cp_enable(adev, false); - gfx_v7_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); gfx_v7_0_fini_pg(adev); return 0; @@ -4746,7 +4676,7 @@ gfx_v7_0_update_cg(adev, false); /* stop the rlc */ - gfx_v7_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); /* Disable GFX parsing/prefetching */ WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK); @@ -4915,7 +4845,7 @@ enum amdgpu_interrupt_state state) { switch (type) { - case AMDGPU_CP_IRQ_GFX_EOP: + case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: gfx_v7_0_set_gfx_eop_interrupt_state(adev, state); break; case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: @@ -4975,12 +4905,36 @@ return 0; } +static void gfx_v7_0_fault(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + struct amdgpu_ring *ring; + u8 me_id, pipe_id; + int i; + + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + switch (me_id) { + case 0: + drm_sched_fault(&adev->gfx.gfx_ring[0].sched); + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if ((ring->me == me_id) && (ring->pipe == pipe_id)) + drm_sched_fault(&ring->sched); + } + break; + } +} + static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal register access in command stream\n"); - schedule_work(&adev->reset_work); + gfx_v7_0_fault(adev, entry); return 0; } @@ -4990,7 +4944,7 @@ { DRM_ERROR("Illegal instruction in command stream\n"); // XXX soft reset the gfx block only - schedule_work(&adev->reset_work); + gfx_v7_0_fault(adev, entry); return 0; } @@ -5042,6 +4996,32 @@ return 0; } +static void gfx_v7_0_emit_mem_sync(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); + amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | + PACKET3_TC_ACTION_ENA | + PACKET3_SH_KCACHE_ACTION_ENA | + PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ +} + +static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); + amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | + PACKET3_TC_ACTION_ENA | + PACKET3_SH_KCACHE_ACTION_ENA | + PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ + amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ +} + static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { .name = "gfx_v7_0", .early_init = gfx_v7_0_early_init, @@ -5074,7 +5054,8 @@ 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */ 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */ CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */ - 3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/ + 3 + 4 + /* gfx_v7_ring_emit_cntxcntl including vgt flush*/ + 5, /* SURFACE_SYNC */ .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */ .emit_ib = gfx_v7_0_ring_emit_ib_gfx, .emit_fence = gfx_v7_0_ring_emit_fence_gfx, @@ -5088,6 +5069,8 @@ .pad_ib = amdgpu_ring_generic_pad_ib, .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl, .emit_wreg = gfx_v7_0_ring_emit_wreg, + .soft_recovery = gfx_v7_0_ring_soft_recovery, + .emit_mem_sync = gfx_v7_0_emit_mem_sync, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { @@ -5104,8 +5087,9 @@ 5 + /* hdp invalidate */ 7 + /* gfx_v7_0_ring_emit_pipeline_sync */ CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */ - 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ - .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */ + 7 + 7 + 7 + /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ + 7, /* gfx_v7_0_emit_mem_sync_compute */ + .emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */ .emit_ib = gfx_v7_0_ring_emit_ib_compute, .emit_fence = gfx_v7_0_ring_emit_fence_compute, .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync, @@ -5117,6 +5101,7 @@ .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v7_0_ring_emit_wreg, + .emit_mem_sync = gfx_v7_0_emit_mem_sync_compute, }; static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) @@ -5159,29 +5144,10 @@ static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ - adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); - adev->gds.gws.total_size = 64; - adev->gds.oa.total_size = 16; - - if (adev->gds.mem.total_size == 64 * 1024) { - adev->gds.mem.gfx_partition_size = 4096; - adev->gds.mem.cs_partition_size = 4096; - - adev->gds.gws.gfx_partition_size = 4; - adev->gds.gws.cs_partition_size = 4; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 1; - } else { - adev->gds.mem.gfx_partition_size = 1024; - adev->gds.mem.cs_partition_size = 1024; - - adev->gds.gws.gfx_partition_size = 16; - adev->gds.gws.cs_partition_size = 16; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 4; - } + adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE); + adev->gds.gws_size = 64; + adev->gds.oa_size = 16; + adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); } @@ -5241,7 +5207,7 @@ cu_info->lds_size = 64; } -const struct amdgpu_ip_block_version gfx_v7_0_ip_block = +static const struct amdgpu_ip_block_version gfx_v7_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_GFX, .major = 7, -- Gitblit v1.6.2