From 04dd17822334871b23ea2862f7798fb0e0007777 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Sat, 11 May 2024 08:53:19 +0000
Subject: [PATCH] change otg to host mode
---
kernel/drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 489 ++++++++++++++++++++++++++++++-----------------------
1 files changed, 277 insertions(+), 212 deletions(-)
diff --git a/kernel/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/kernel/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index ba51301..9ae0e60 100644
--- a/kernel/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/kernel/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -1,26 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
*/
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/cpumask.h>
#include <linux/qcom_scm.h>
-#include <linux/dma-mapping.h>
-#include <linux/of_address.h>
-#include <linux/soc/qcom/mdt_loader.h>
#include <linux/pm_opp.h>
#include <linux/nvmem-consumer.h>
-#include <linux/iopoll.h>
#include <linux/slab.h>
#include "msm_gem.h"
#include "msm_mmu.h"
@@ -31,104 +18,25 @@
#define GPU_PAS_ID 13
-static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
-{
- struct device *dev = &gpu->pdev->dev;
- const struct firmware *fw;
- struct device_node *np, *mem_np;
- struct resource r;
- phys_addr_t mem_phys;
- ssize_t mem_size;
- void *mem_region = NULL;
- int ret;
-
- if (!IS_ENABLED(CONFIG_ARCH_QCOM))
- return -EINVAL;
-
- np = of_get_child_by_name(dev->of_node, "zap-shader");
- if (!np)
- return -ENODEV;
-
- mem_np = of_parse_phandle(np, "memory-region", 0);
- of_node_put(np);
- if (!mem_np)
- return -EINVAL;
-
- ret = of_address_to_resource(mem_np, 0, &r);
- of_node_put(mem_np);
- if (ret)
- return ret;
-
- mem_phys = r.start;
- mem_size = resource_size(&r);
-
- /* Request the MDT file for the firmware */
- fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
- if (IS_ERR(fw)) {
- DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
- return PTR_ERR(fw);
- }
-
- /* Figure out how much memory we need */
- mem_size = qcom_mdt_get_size(fw);
- if (mem_size < 0) {
- ret = mem_size;
- goto out;
- }
-
- /* Allocate memory for the firmware image */
- mem_region = memremap(mem_phys, mem_size, MEMREMAP_WC);
- if (!mem_region) {
- ret = -ENOMEM;
- goto out;
- }
-
- /*
- * Load the rest of the MDT
- *
- * Note that we could be dealing with two different paths, since
- * with upstream linux-firmware it would be in a qcom/ subdir..
- * adreno_request_fw() handles this, but qcom_mdt_load() does
- * not. But since we've already gotten thru adreno_request_fw()
- * we know which of the two cases it is:
- */
- if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
- ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
- mem_region, mem_phys, mem_size, NULL);
- } else {
- char *newname;
-
- newname = kasprintf(GFP_KERNEL, "qcom/%s", fwname);
-
- ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
- mem_region, mem_phys, mem_size, NULL);
- kfree(newname);
- }
- if (ret)
- goto out;
-
- /* Send the image to the secure world */
- ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID);
- if (ret)
- DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
-
-out:
- if (mem_region)
- memunmap(mem_region);
-
- release_firmware(fw);
-
- return ret;
-}
-
-static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
+ bool sync)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
uint32_t wptr;
unsigned long flags;
- spin_lock_irqsave(&ring->lock, flags);
+ /*
+ * Most flush operations need to issue a WHERE_AM_I opcode to sync up
+ * the rptr shadow
+ */
+ if (a5xx_gpu->has_whereami && sync) {
+ OUT_PKT7(ring, CP_WHERE_AM_I, 2);
+ OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
+ OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
+ }
+
+ spin_lock_irqsave(&ring->preempt_lock, flags);
/* Copy the shadow to the actual register */
ring->cur = ring->next;
@@ -136,7 +44,7 @@
/* Make sure to wrap wptr if we need to */
wptr = get_wptr(ring);
- spin_unlock_irqrestore(&ring->lock, flags);
+ spin_unlock_irqrestore(&ring->preempt_lock, flags);
/* Make sure everything is posted before making a decision */
mb();
@@ -146,8 +54,7 @@
gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
}
-static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
- struct msm_file_private *ctx)
+static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit)
{
struct msm_drm_private *priv = gpu->dev->dev_private;
struct msm_ringbuffer *ring = submit->ring;
@@ -160,8 +67,9 @@
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
break;
case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
- if (priv->lastctx == ctx)
+ if (priv->lastctx == submit->queue->ctx)
break;
+ fallthrough;
case MSM_SUBMIT_CMD_BUF:
/* copy commands into RB: */
obj = submit->bos[submit->cmd[i].idx].obj;
@@ -173,7 +81,7 @@
* since we've already mapped it once in
* submit_reloc()
*/
- if (WARN_ON(!ptr))
+ if (WARN_ON(IS_ERR_OR_NULL(ptr)))
return;
for (i = 0; i < dwords; i++) {
@@ -193,7 +101,7 @@
}
}
- a5xx_flush(gpu, ring);
+ a5xx_flush(gpu, ring, true);
a5xx_preempt_trigger(gpu);
/* we might not necessarily have a cmd from userspace to
@@ -205,8 +113,7 @@
msm_gpu_retire(gpu);
}
-static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
- struct msm_file_private *ctx)
+static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
@@ -216,7 +123,7 @@
if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
priv->lastctx = NULL;
- a5xx_submit_in_rb(gpu, submit, ctx);
+ a5xx_submit_in_rb(gpu, submit);
return;
}
@@ -237,8 +144,8 @@
OUT_RING(ring, 1);
/* Enable local preemption for finegrain preemption */
- OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
- OUT_RING(ring, 0x02);
+ OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
+ OUT_RING(ring, 0x1);
/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
@@ -250,8 +157,9 @@
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
break;
case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
- if (priv->lastctx == ctx)
+ if (priv->lastctx == submit->queue->ctx)
break;
+ fallthrough;
case MSM_SUBMIT_CMD_BUF:
OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
@@ -287,7 +195,8 @@
* timestamp is written to the memory and then triggers the interrupt
*/
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
- OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
+ OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
+ CP_EVENT_WRITE_0_IRQ);
OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
OUT_RING(ring, submit->seqno);
@@ -306,7 +215,8 @@
/* Set bit 0 to trigger an interrupt on preempt complete */
OUT_RING(ring, 0x01);
- a5xx_flush(gpu, ring);
+ /* A WHERE_AM_I packet is not needed after a YIELD */
+ a5xx_flush(gpu, ring, false);
/* Check to see if we need to start preemption */
a5xx_preempt_trigger(gpu);
@@ -412,11 +322,17 @@
void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
unsigned int i;
for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
gpu_write(gpu, a5xx_hwcg[i].offset,
state ? a5xx_hwcg[i].value : 0);
+
+ if (adreno_is_a540(adreno_gpu)) {
+ gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
+ gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
+ }
gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
@@ -448,6 +364,9 @@
* 2D mode 3 draw
*/
OUT_RING(ring, 0x0000000B);
+ } else if (adreno_is_a510(adreno_gpu)) {
+ /* Workaround for token and syncs */
+ OUT_RING(ring, 0x00000001);
} else {
/* No workarounds enabled */
OUT_RING(ring, 0x00000000);
@@ -456,7 +375,7 @@
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
- gpu->funcs->flush(gpu, ring);
+ a5xx_flush(gpu, ring, true);
return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
}
@@ -498,9 +417,29 @@
OUT_RING(ring, 0x01);
OUT_RING(ring, 0x01);
- gpu->funcs->flush(gpu, ring);
+ /* The WHERE_AMI_I packet is not needed after a YIELD is issued */
+ a5xx_flush(gpu, ring, false);
return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
+}
+
+static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu,
+ struct drm_gem_object *obj)
+{
+ u32 *buf = msm_gem_get_vaddr_active(obj);
+
+ if (IS_ERR(buf))
+ return;
+
+ /*
+ * If the lowest nibble is 0xa that is an indication that this microcode
+ * has been patched. The actual version is in dword [3] but we only care
+ * about the patchlevel which is the lowest nibble of dword [3]
+ */
+ if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
+ a5xx_gpu->has_whereami = true;
+
+ msm_gem_put_vaddr(obj);
}
static int a5xx_ucode_init(struct msm_gpu *gpu)
@@ -513,13 +452,16 @@
a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
+
if (IS_ERR(a5xx_gpu->pm4_bo)) {
ret = PTR_ERR(a5xx_gpu->pm4_bo);
a5xx_gpu->pm4_bo = NULL;
- dev_err(gpu->dev->dev, "could not allocate PM4: %d\n",
+ DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
ret);
return ret;
}
+
+ msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
}
if (!a5xx_gpu->pfp_bo) {
@@ -529,10 +471,13 @@
if (IS_ERR(a5xx_gpu->pfp_bo)) {
ret = PTR_ERR(a5xx_gpu->pfp_bo);
a5xx_gpu->pfp_bo = NULL;
- dev_err(gpu->dev->dev, "could not allocate PFP: %d\n",
+ DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
ret);
return ret;
}
+
+ msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
+ a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
}
gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
@@ -561,8 +506,6 @@
static int a5xx_zap_shader_init(struct msm_gpu *gpu)
{
static bool loaded;
- struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
- struct platform_device *pdev = gpu->pdev;
int ret;
/*
@@ -572,23 +515,9 @@
if (loaded)
return a5xx_zap_shader_resume(gpu);
- /* We need SCM to be able to load the firmware */
- if (!qcom_scm_is_available()) {
- DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
- return -EPROBE_DEFER;
- }
-
- /* Each GPU has a target specific zap shader firmware name to use */
- if (!adreno_gpu->info->zapfw) {
- DRM_DEV_ERROR(&pdev->dev,
- "Zap shader firmware file not specified for this target\n");
- return -ENODEV;
- }
-
- ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
+ ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
loaded = !ret;
-
return ret;
}
@@ -608,9 +537,13 @@
static int a5xx_hw_init(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
int ret;
gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
+
+ if (adreno_is_a540(adreno_gpu))
+ gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
/* Make all blocks contribute to the GPU BUSY perf counter */
gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
@@ -671,12 +604,24 @@
0x00100000 + adreno_gpu->gmem - 1);
gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
- gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
- gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
- gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
- gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
-
- gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
+ if (adreno_is_a510(adreno_gpu)) {
+ gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
+ gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
+ gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
+ gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
+ gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
+ (0x200 << 11 | 0x200 << 22));
+ } else {
+ gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
+ if (adreno_is_a530(adreno_gpu))
+ gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
+ if (adreno_is_a540(adreno_gpu))
+ gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
+ gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
+ gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
+ gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
+ (0x400 << 11 | 0x300 << 22));
+ }
if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
@@ -687,6 +632,19 @@
/* Enable ME/PFP split notification */
gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
+ /*
+ * In A5x, CCU can send context_done event of a particular context to
+ * UCHE which ultimately reaches CP even when there is valid
+ * transaction of that context inside CCU. This can let CP to program
+ * config registers, which will make the "valid transaction" inside
+ * CCU to be interpreted differently. This can cause gpu fault. This
+ * bug is fixed in latest A510 revision. To enable this bug fix -
+ * bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
+ * (disable). For older A510 version this bit is unused.
+ */
+ if (adreno_is_a510(adreno_gpu))
+ gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
+
/* Enable HWCG */
a5xx_set_hwcg(gpu, true);
@@ -695,6 +653,8 @@
/* Set the highest bank bit */
gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
+ if (adreno_is_a540(adreno_gpu))
+ gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
/* Protect registers from the CP */
gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
@@ -731,7 +691,7 @@
/* UCHE */
gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
- if (adreno_is_a530(adreno_gpu))
+ if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
ADRENO_PROTECT_RW(0x10000, 0x8000));
@@ -745,17 +705,76 @@
REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
+ /* Put the GPU into 64 bit by default */
+ gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
+
+ /*
+ * VPC corner case with local memory load kill leads to corrupt
+ * internal state. Normal Disable does not work for all a5x chips.
+ * So do the following setting to disable it.
+ */
+ if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
+ gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
+ gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
+ }
+
ret = adreno_hw_init(gpu);
if (ret)
return ret;
- a5xx_preempt_hw_init(gpu);
-
- a5xx_gpmu_ucode_init(gpu);
+ if (!adreno_is_a510(adreno_gpu))
+ a5xx_gpmu_ucode_init(gpu);
ret = a5xx_ucode_init(gpu);
if (ret)
return ret;
+
+ /* Set the ringbuffer address */
+ gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
+ gpu->rb[0]->iova);
+
+ /*
+ * If the microcode supports the WHERE_AM_I opcode then we can use that
+ * in lieu of the RPTR shadow and enable preemption. Otherwise, we
+ * can't safely use the RPTR shadow or preemption. In either case, the
+ * RPTR shadow should be disabled in hardware.
+ */
+ gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
+ MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
+
+ /* Create a privileged buffer for the RPTR shadow */
+ if (a5xx_gpu->has_whereami) {
+ if (!a5xx_gpu->shadow_bo) {
+ a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
+ sizeof(u32) * gpu->nr_rings,
+ MSM_BO_UNCACHED | MSM_BO_MAP_PRIV,
+ gpu->aspace, &a5xx_gpu->shadow_bo,
+ &a5xx_gpu->shadow_iova);
+
+ if (IS_ERR(a5xx_gpu->shadow))
+ return PTR_ERR(a5xx_gpu->shadow);
+ }
+
+ gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
+ REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
+ } else if (gpu->nr_rings > 1) {
+ /* Disable preemption if WHERE_AM_I isn't available */
+ a5xx_preempt_fini(gpu);
+ gpu->nr_rings = 1;
+ }
+
+ a5xx_preempt_hw_init(gpu);
/* Disable the interrupts through the initial bringup stage */
gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
@@ -776,15 +795,16 @@
*/
if (adreno_is_a530(adreno_gpu)) {
OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
- OUT_RING(gpu->rb[0], 0x0F);
+ OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
- gpu->funcs->flush(gpu, gpu->rb[0]);
+ a5xx_flush(gpu, gpu->rb[0], true);
if (!a5xx_idle(gpu, gpu->rb[0]))
return -EINVAL;
}
/*
- * Try to load a zap shader into the secure world. If successful
+ * If the chip that we are using does support loading one, then
+ * try to load a zap shader into the secure world. If successful
* we can use the CP to switch out of secure mode. If not then we
* have no resource but to try to switch ourselves out manually. If we
* guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
@@ -795,14 +815,21 @@
OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
OUT_RING(gpu->rb[0], 0x00000000);
- gpu->funcs->flush(gpu, gpu->rb[0]);
+ a5xx_flush(gpu, gpu->rb[0], true);
if (!a5xx_idle(gpu, gpu->rb[0]))
return -EINVAL;
- } else {
- /* Print a warning so if we die, we know why */
+ } else if (ret == -ENODEV) {
+ /*
+ * This device does not use zap shader (but print a warning
+ * just in case someone got their dt wrong.. hopefully they
+ * have a debug UART to realize the error of their ways...
+ * if you mess this up you are about to crash horribly)
+ */
dev_warn_once(gpu->dev->dev,
"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
+ } else {
+ return ret;
}
/* Last step - yield the ringbuffer */
@@ -841,21 +868,23 @@
a5xx_preempt_fini(gpu);
if (a5xx_gpu->pm4_bo) {
- if (a5xx_gpu->pm4_iova)
- msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
- drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
+ msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
+ drm_gem_object_put(a5xx_gpu->pm4_bo);
}
if (a5xx_gpu->pfp_bo) {
- if (a5xx_gpu->pfp_iova)
- msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace);
- drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
+ msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
+ drm_gem_object_put(a5xx_gpu->pfp_bo);
}
if (a5xx_gpu->gpmu_bo) {
- if (a5xx_gpu->gpmu_iova)
- msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
- drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
+ msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
+ drm_gem_object_put(a5xx_gpu->gpmu_bo);
+ }
+
+ if (a5xx_gpu->shadow_bo) {
+ msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace);
+ drm_gem_object_put(a5xx_gpu->shadow_bo);
}
adreno_gpu_cleanup(adreno_gpu);
@@ -1028,7 +1057,7 @@
struct msm_drm_private *priv = dev->dev_private;
struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
- dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
+ DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
ring ? ring->id : -1, ring ? ring->seqno : 0,
gpu_read(gpu, REG_A5XX_RBBM_STATUS),
gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
@@ -1090,17 +1119,6 @@
return IRQ_HANDLED;
}
-static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
- REG_A5XX_CP_RB_RPTR_ADDR_HI),
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
-};
-
static const u32 a5xx_registers[] = {
0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
@@ -1134,19 +1152,29 @@
static void a5xx_dump(struct msm_gpu *gpu)
{
- dev_info(gpu->dev->dev, "status: %08x\n",
+ DRM_DEV_INFO(gpu->dev->dev, "status: %08x\n",
gpu_read(gpu, REG_A5XX_RBBM_STATUS));
adreno_dump(gpu);
}
static int a5xx_pm_resume(struct msm_gpu *gpu)
{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
int ret;
/* Turn on the core power */
ret = msm_gpu_pm_resume(gpu);
if (ret)
return ret;
+
+ if (adreno_is_a510(adreno_gpu)) {
+ /* Halt the sp_input_clk at HM level */
+ gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
+ a5xx_set_hwcg(gpu, true);
+ /* Turn on sp_input_clk at HM level */
+ gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
+ return 0;
+ }
/* Turn the RBCCU domain first to limit the chances of voltage droop */
gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
@@ -1176,9 +1204,19 @@
static int a5xx_pm_suspend(struct msm_gpu *gpu)
{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ u32 mask = 0xf;
+ int i, ret;
+
+ /* A510 has 3 XIN ports in VBIF */
+ if (adreno_is_a510(adreno_gpu))
+ mask = 0x7;
+
/* Clear the VBIF pipe before shutting down */
- gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
- spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
+ gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
+ spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
+ mask) == mask);
gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
@@ -1189,7 +1227,15 @@
gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
- return msm_gpu_pm_suspend(gpu);
+ ret = msm_gpu_pm_suspend(gpu);
+ if (ret)
+ return ret;
+
+ if (a5xx_gpu->has_whereami)
+ for (i = 0; i < gpu->nr_rings; i++)
+ a5xx_gpu->shadow[i] = 0;
+
+ return 0;
}
static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
@@ -1211,10 +1257,6 @@
u32 *hlsqregs;
};
-#define gpu_poll_timeout(gpu, addr, val, cond, interval, timeout) \
- readl_poll_timeout((gpu)->mmio + ((addr) << 2), val, cond, \
- interval, timeout)
-
static int a5xx_crashdumper_init(struct msm_gpu *gpu,
struct a5xx_crashdumper *dumper)
{
@@ -1222,19 +1264,10 @@
SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
&dumper->bo, &dumper->iova);
- if (IS_ERR(dumper->ptr))
- return PTR_ERR(dumper->ptr);
+ if (!IS_ERR(dumper->ptr))
+ msm_gem_object_set_name(dumper->bo, "crashdump");
- return 0;
-}
-
-static void a5xx_crashdumper_free(struct msm_gpu *gpu,
- struct a5xx_crashdumper *dumper)
-{
- msm_gem_put_iova(dumper->bo, gpu->aspace);
- msm_gem_put_vaddr(dumper->bo);
-
- drm_gem_object_unreference(dumper->bo);
+ return PTR_ERR_OR_ZERO(dumper->ptr);
}
static int a5xx_crashdumper_run(struct msm_gpu *gpu,
@@ -1329,7 +1362,7 @@
if (a5xx_crashdumper_run(gpu, &dumper)) {
kfree(a5xx_state->hlsqregs);
- a5xx_crashdumper_free(gpu, &dumper);
+ msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
return;
}
@@ -1337,7 +1370,7 @@
memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
count * sizeof(u32));
- a5xx_crashdumper_free(gpu, &dumper);
+ msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
}
static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
@@ -1377,7 +1410,7 @@
kfree(a5xx_state);
}
-int a5xx_gpu_state_put(struct msm_gpu_state *state)
+static int a5xx_gpu_state_put(struct msm_gpu_state *state)
{
if (IS_ERR_OR_NULL(state))
return 1;
@@ -1387,8 +1420,8 @@
#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
-void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
- struct drm_printer *p)
+static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
+ struct drm_printer *p)
{
int i, j;
u32 pos = 0;
@@ -1436,12 +1469,39 @@
return a5xx_gpu->cur_ring;
}
-static int a5xx_gpu_busy(struct msm_gpu *gpu, uint64_t *value)
+static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
{
- *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
- REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
+ u64 busy_cycles, busy_time;
- return 0;
+ /* Only read the gpu busy if the hardware is already active */
+ if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
+ return 0;
+
+ busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
+ REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
+
+ busy_time = busy_cycles - gpu->devfreq.busy_cycles;
+ do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
+
+ gpu->devfreq.busy_cycles = busy_cycles;
+
+ pm_runtime_put(&gpu->pdev->dev);
+
+ if (WARN_ON(busy_time > ~0LU))
+ return ~0LU;
+
+ return (unsigned long)busy_time;
+}
+
+static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+
+ if (a5xx_gpu->has_whereami)
+ return a5xx_gpu->shadow[ring->id];
+
+ return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR);
}
static const struct adreno_gpu_funcs funcs = {
@@ -1452,7 +1512,6 @@
.pm_resume = a5xx_pm_resume,
.recover = a5xx_recover,
.submit = a5xx_submit,
- .flush = a5xx_flush,
.active_ring = a5xx_active_ring,
.irq = a5xx_irq,
.destroy = a5xx_destroy,
@@ -1465,6 +1524,8 @@
.gpu_busy = a5xx_gpu_busy,
.gpu_state_get = a5xx_gpu_state_get,
.gpu_state_put = a5xx_gpu_state_put,
+ .create_address_space = adreno_iommu_create_address_space,
+ .get_rptr = a5xx_get_rptr,
},
.get_timestamp = a5xx_get_timestamp,
};
@@ -1508,10 +1569,11 @@
struct a5xx_gpu *a5xx_gpu = NULL;
struct adreno_gpu *adreno_gpu;
struct msm_gpu *gpu;
+ unsigned int nr_rings;
int ret;
if (!pdev) {
- dev_err(dev->dev, "No A5XX device is defined\n");
+ DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
return ERR_PTR(-ENXIO);
}
@@ -1523,14 +1585,17 @@
gpu = &adreno_gpu->base;
adreno_gpu->registers = a5xx_registers;
- adreno_gpu->reg_offsets = a5xx_register_offsets;
a5xx_gpu->lm_leakage = 0x4E001A;
check_speed_bin(&pdev->dev);
- /* Restricting nr_rings to 1 to temporarily disable preemption */
- ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
+ nr_rings = 4;
+
+ if (adreno_is_a510(adreno_gpu))
+ nr_rings = 1;
+
+ ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, nr_rings);
if (ret) {
a5xx_destroy(&(a5xx_gpu->base.base));
return ERR_PTR(ret);
--
Gitblit v1.6.2