~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,26 +1,13 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
2		- *
3		- * This program is free software; you can redistribute it and/or modify
4		- * it under the terms of the GNU General Public License version 2 and
5		- * only version 2 as published by the Free Software Foundation.
6		- *
7		- * This program is distributed in the hope that it will be useful,
8		- * but WITHOUT ANY WARRANTY; without even the implied warranty of
9		- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10		- * GNU General Public License for more details.
11		- *
12	3	*/
13	4
14	5	#include <linux/kernel.h>
15	6	#include <linux/types.h>
16	7	#include <linux/cpumask.h>
17	8	#include <linux/qcom_scm.h>
18		-#include <linux/dma-mapping.h>
19		-#include <linux/of_address.h>
20		-#include <linux/soc/qcom/mdt_loader.h>
21	9	#include <linux/pm_opp.h>
22	10	#include <linux/nvmem-consumer.h>
23		-#include <linux/iopoll.h>
24	11	#include <linux/slab.h>
25	12	#include "msm_gem.h"
26	13	#include "msm_mmu.h"
..	..	@@ -31,104 +18,25 @@
31	18
32	19	#define GPU_PAS_ID 13
33	20
34		-static int zap_shader_load_mdt(struct msm_gpu gpu, const char fwname)
35		-{
36		- struct device *dev = &gpu->pdev->dev;
37		- const struct firmware *fw;
38		- struct device_node np, mem_np;
39		- struct resource r;
40		- phys_addr_t mem_phys;
41		- ssize_t mem_size;
42		- void *mem_region = NULL;
43		- int ret;
44		-
45		- if (!IS_ENABLED(CONFIG_ARCH_QCOM))
46		- return -EINVAL;
47		-
48		- np = of_get_child_by_name(dev->of_node, "zap-shader");
49		- if (!np)
50		- return -ENODEV;
51		-
52		- mem_np = of_parse_phandle(np, "memory-region", 0);
53		- of_node_put(np);
54		- if (!mem_np)
55		- return -EINVAL;
56		-
57		- ret = of_address_to_resource(mem_np, 0, &r);
58		- of_node_put(mem_np);
59		- if (ret)
60		- return ret;
61		-
62		- mem_phys = r.start;
63		- mem_size = resource_size(&r);
64		-
65		- /* Request the MDT file for the firmware */
66		- fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
67		- if (IS_ERR(fw)) {
68		- DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
69		- return PTR_ERR(fw);
70		- }
71		-
72		- /* Figure out how much memory we need */
73		- mem_size = qcom_mdt_get_size(fw);
74		- if (mem_size < 0) {
75		- ret = mem_size;
76		- goto out;
77		- }
78		-
79		- /* Allocate memory for the firmware image */
80		- mem_region = memremap(mem_phys, mem_size, MEMREMAP_WC);
81		- if (!mem_region) {
82		- ret = -ENOMEM;
83		- goto out;
84		- }
85		-
86		- /*
87		- * Load the rest of the MDT
88		- *
89		- * Note that we could be dealing with two different paths, since
90		- * with upstream linux-firmware it would be in a qcom/ subdir..
91		- * adreno_request_fw() handles this, but qcom_mdt_load() does
92		- * not. But since we've already gotten thru adreno_request_fw()
93		- * we know which of the two cases it is:
94		- */
95		- if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
96		- ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
97		- mem_region, mem_phys, mem_size, NULL);
98		- } else {
99		- char *newname;
100		-
101		- newname = kasprintf(GFP_KERNEL, "qcom/%s", fwname);
102		-
103		- ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
104		- mem_region, mem_phys, mem_size, NULL);
105		- kfree(newname);
106		- }
107		- if (ret)
108		- goto out;
109		-
110		- /* Send the image to the secure world */
111		- ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID);
112		- if (ret)
113		- DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
114		-
115		-out:
116		- if (mem_region)
117		- memunmap(mem_region);
118		-
119		- release_firmware(fw);
120		-
121		- return ret;
122		-}
123		-
124		-static void a5xx_flush(struct msm_gpu gpu, struct msm_ringbuffer ring)
	21	+void a5xx_flush(struct msm_gpu gpu, struct msm_ringbuffer ring,
	22	+ bool sync)
125	23	{
126	24	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
127	25	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
128	26	uint32_t wptr;
129	27	unsigned long flags;
130	28
131		- spin_lock_irqsave(&ring->lock, flags);
	29	+ /*
	30	+ * Most flush operations need to issue a WHERE_AM_I opcode to sync up
	31	+ * the rptr shadow
	32	+ */
	33	+ if (a5xx_gpu->has_whereami && sync) {
	34	+ OUT_PKT7(ring, CP_WHERE_AM_I, 2);
	35	+ OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
	36	+ OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
	37	+ }
	38	+
	39	+ spin_lock_irqsave(&ring->preempt_lock, flags);
132	40
133	41	/* Copy the shadow to the actual register */
134	42	ring->cur = ring->next;
..	..	@@ -136,7 +44,7 @@
136	44	/* Make sure to wrap wptr if we need to */
137	45	wptr = get_wptr(ring);
138	46
139		- spin_unlock_irqrestore(&ring->lock, flags);
	47	+ spin_unlock_irqrestore(&ring->preempt_lock, flags);
140	48
141	49	/* Make sure everything is posted before making a decision */
142	50	mb();
..	..	@@ -146,8 +54,7 @@
146	54	gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
147	55	}
148	56
149		-static void a5xx_submit_in_rb(struct msm_gpu gpu, struct msm_gem_submit submit,
150		- struct msm_file_private *ctx)
	57	+static void a5xx_submit_in_rb(struct msm_gpu gpu, struct msm_gem_submit submit)
151	58	{
152	59	struct msm_drm_private *priv = gpu->dev->dev_private;
153	60	struct msm_ringbuffer *ring = submit->ring;
..	..	@@ -160,8 +67,9 @@
160	67	case MSM_SUBMIT_CMD_IB_TARGET_BUF:
161	68	break;
162	69	case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
163		- if (priv->lastctx == ctx)
	70	+ if (priv->lastctx == submit->queue->ctx)
164	71	break;
	72	+ fallthrough;
165	73	case MSM_SUBMIT_CMD_BUF:
166	74	/* copy commands into RB: */
167	75	obj = submit->bos[submit->cmd[i].idx].obj;
..	..	@@ -173,7 +81,7 @@
173	81	* since we've already mapped it once in
174	82	* submit_reloc()
175	83	*/
176		- if (WARN_ON(!ptr))
	84	+ if (WARN_ON(IS_ERR_OR_NULL(ptr)))
177	85	return;
178	86
179	87	for (i = 0; i < dwords; i++) {
..	..	@@ -193,7 +101,7 @@
193	101	}
194	102	}
195	103
196		- a5xx_flush(gpu, ring);
	104	+ a5xx_flush(gpu, ring, true);
197	105	a5xx_preempt_trigger(gpu);
198	106
199	107	/* we might not necessarily have a cmd from userspace to
..	..	@@ -205,8 +113,7 @@
205	113	msm_gpu_retire(gpu);
206	114	}
207	115
208		-static void a5xx_submit(struct msm_gpu gpu, struct msm_gem_submit submit,
209		- struct msm_file_private *ctx)
	116	+static void a5xx_submit(struct msm_gpu gpu, struct msm_gem_submit submit)
210	117	{
211	118	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
212	119	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
..	..	@@ -216,7 +123,7 @@
216	123
217	124	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
218	125	priv->lastctx = NULL;
219		- a5xx_submit_in_rb(gpu, submit, ctx);
	126	+ a5xx_submit_in_rb(gpu, submit);
220	127	return;
221	128	}
222	129
..	..	@@ -237,8 +144,8 @@
237	144	OUT_RING(ring, 1);
238	145
239	146	/* Enable local preemption for finegrain preemption */
240		- OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
241		- OUT_RING(ring, 0x02);
	147	+ OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
	148	+ OUT_RING(ring, 0x1);
242	149
243	150	/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
244	151	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
..	..	@@ -250,8 +157,9 @@
250	157	case MSM_SUBMIT_CMD_IB_TARGET_BUF:
251	158	break;
252	159	case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
253		- if (priv->lastctx == ctx)
	160	+ if (priv->lastctx == submit->queue->ctx)
254	161	break;
	162	+ fallthrough;
255	163	case MSM_SUBMIT_CMD_BUF:
256	164	OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
257	165	OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
..	..	@@ -287,7 +195,8 @@
287	195	* timestamp is written to the memory and then triggers the interrupt
288	196	*/
289	197	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
290		- OUT_RING(ring, CACHE_FLUSH_TS \| (1 << 31));
	198	+ OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) \|
	199	+ CP_EVENT_WRITE_0_IRQ);
291	200	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
292	201	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
293	202	OUT_RING(ring, submit->seqno);
..	..	@@ -306,7 +215,8 @@
306	215	/* Set bit 0 to trigger an interrupt on preempt complete */
307	216	OUT_RING(ring, 0x01);
308	217
309		- a5xx_flush(gpu, ring);
	218	+ /* A WHERE_AM_I packet is not needed after a YIELD */
	219	+ a5xx_flush(gpu, ring, false);
310	220
311	221	/* Check to see if we need to start preemption */
312	222	a5xx_preempt_trigger(gpu);
..	..	@@ -412,11 +322,17 @@
412	322
413	323	void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
414	324	{
	325	+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
415	326	unsigned int i;
416	327
417	328	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
418	329	gpu_write(gpu, a5xx_hwcg[i].offset,
419	330	state ? a5xx_hwcg[i].value : 0);
	331	+
	332	+ if (adreno_is_a540(adreno_gpu)) {
	333	+ gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
	334	+ gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
	335	+ }
420	336
421	337	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
422	338	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
..	..	@@ -448,6 +364,9 @@
448	364	* 2D mode 3 draw
449	365	*/
450	366	OUT_RING(ring, 0x0000000B);
	367	+ } else if (adreno_is_a510(adreno_gpu)) {
	368	+ /* Workaround for token and syncs */
	369	+ OUT_RING(ring, 0x00000001);
451	370	} else {
452	371	/* No workarounds enabled */
453	372	OUT_RING(ring, 0x00000000);
..	..	@@ -456,7 +375,7 @@
456	375	OUT_RING(ring, 0x00000000);
457	376	OUT_RING(ring, 0x00000000);
458	377
459		- gpu->funcs->flush(gpu, ring);
	378	+ a5xx_flush(gpu, ring, true);
460	379	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
461	380	}
462	381
..	..	@@ -498,9 +417,29 @@
498	417	OUT_RING(ring, 0x01);
499	418	OUT_RING(ring, 0x01);
500	419
501		- gpu->funcs->flush(gpu, ring);
	420	+ /* The WHERE_AMI_I packet is not needed after a YIELD is issued */
	421	+ a5xx_flush(gpu, ring, false);
502	422
503	423	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
	424	+}
	425	+
	426	+static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu,
	427	+ struct drm_gem_object *obj)
	428	+{
	429	+ u32 *buf = msm_gem_get_vaddr_active(obj);
	430	+
	431	+ if (IS_ERR(buf))
	432	+ return;
	433	+
	434	+ /*
	435	+ * If the lowest nibble is 0xa that is an indication that this microcode
	436	+ * has been patched. The actual version is in dword [3] but we only care
	437	+ * about the patchlevel which is the lowest nibble of dword [3]
	438	+ */
	439	+ if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
	440	+ a5xx_gpu->has_whereami = true;
	441	+
	442	+ msm_gem_put_vaddr(obj);
504	443	}
505	444
506	445	static int a5xx_ucode_init(struct msm_gpu *gpu)
..	..	@@ -513,13 +452,16 @@
513	452	a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
514	453	adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
515	454
	455	+
516	456	if (IS_ERR(a5xx_gpu->pm4_bo)) {
517	457	ret = PTR_ERR(a5xx_gpu->pm4_bo);
518	458	a5xx_gpu->pm4_bo = NULL;
519		- dev_err(gpu->dev->dev, "could not allocate PM4: %d\n",
	459	+ DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
520	460	ret);
521	461	return ret;
522	462	}
	463	+
	464	+ msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
523	465	}
524	466
525	467	if (!a5xx_gpu->pfp_bo) {
..	..	@@ -529,10 +471,13 @@
529	471	if (IS_ERR(a5xx_gpu->pfp_bo)) {
530	472	ret = PTR_ERR(a5xx_gpu->pfp_bo);
531	473	a5xx_gpu->pfp_bo = NULL;
532		- dev_err(gpu->dev->dev, "could not allocate PFP: %d\n",
	474	+ DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
533	475	ret);
534	476	return ret;
535	477	}
	478	+
	479	+ msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
	480	+ a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
536	481	}
537	482
538	483	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
..	..	@@ -561,8 +506,6 @@
561	506	static int a5xx_zap_shader_init(struct msm_gpu *gpu)
562	507	{
563	508	static bool loaded;
564		- struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
565		- struct platform_device *pdev = gpu->pdev;
566	509	int ret;
567	510
568	511	/*
..	..	@@ -572,23 +515,9 @@
572	515	if (loaded)
573	516	return a5xx_zap_shader_resume(gpu);
574	517
575		- /* We need SCM to be able to load the firmware */
576		- if (!qcom_scm_is_available()) {
577		- DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
578		- return -EPROBE_DEFER;
579		- }
580		-
581		- /* Each GPU has a target specific zap shader firmware name to use */
582		- if (!adreno_gpu->info->zapfw) {
583		- DRM_DEV_ERROR(&pdev->dev,
584		- "Zap shader firmware file not specified for this target\n");
585		- return -ENODEV;
586		- }
587		-
588		- ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
	518	+ ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
589	519
590	520	loaded = !ret;
591		-
592	521	return ret;
593	522	}
594	523
..	..	@@ -608,9 +537,13 @@
608	537	static int a5xx_hw_init(struct msm_gpu *gpu)
609	538	{
610	539	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
	540	+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
611	541	int ret;
612	542
613	543	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
	544	+
	545	+ if (adreno_is_a540(adreno_gpu))
	546	+ gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
614	547
615	548	/* Make all blocks contribute to the GPU BUSY perf counter */
616	549	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
..	..	@@ -671,12 +604,24 @@
671	604	0x00100000 + adreno_gpu->gmem - 1);
672	605	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
673	606
674		- gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
675		- gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
676		- gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
677		- gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
678		-
679		- gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 \| 0x300 << 22));
	607	+ if (adreno_is_a510(adreno_gpu)) {
	608	+ gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
	609	+ gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
	610	+ gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
	611	+ gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
	612	+ gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
	613	+ (0x200 << 11 \| 0x200 << 22));
	614	+ } else {
	615	+ gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
	616	+ if (adreno_is_a530(adreno_gpu))
	617	+ gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
	618	+ if (adreno_is_a540(adreno_gpu))
	619	+ gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
	620	+ gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
	621	+ gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
	622	+ gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
	623	+ (0x400 << 11 \| 0x300 << 22));
	624	+ }
680	625
681	626	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
682	627	gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
..	..	@@ -687,6 +632,19 @@
687	632	/* Enable ME/PFP split notification */
688	633	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
689	634
	635	+ /*
	636	+ * In A5x, CCU can send context_done event of a particular context to
	637	+ * UCHE which ultimately reaches CP even when there is valid
	638	+ * transaction of that context inside CCU. This can let CP to program
	639	+ * config registers, which will make the "valid transaction" inside
	640	+ * CCU to be interpreted differently. This can cause gpu fault. This
	641	+ * bug is fixed in latest A510 revision. To enable this bug fix -
	642	+ * bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
	643	+ * (disable). For older A510 version this bit is unused.
	644	+ */
	645	+ if (adreno_is_a510(adreno_gpu))
	646	+ gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
	647	+
690	648	/* Enable HWCG */
691	649	a5xx_set_hwcg(gpu, true);
692	650
..	..	@@ -695,6 +653,8 @@
695	653	/* Set the highest bank bit */
696	654	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
697	655	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
	656	+ if (adreno_is_a540(adreno_gpu))
	657	+ gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
698	658
699	659	/* Protect registers from the CP */
700	660	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
..	..	@@ -731,7 +691,7 @@
731	691	/* UCHE */
732	692	gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
733	693
734		- if (adreno_is_a530(adreno_gpu))
	694	+ if (adreno_is_a530(adreno_gpu) \|\| adreno_is_a510(adreno_gpu))
735	695	gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
736	696	ADRENO_PROTECT_RW(0x10000, 0x8000));
737	697
..	..	@@ -745,17 +705,76 @@
745	705	REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
746	706	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
747	707
	708	+ /* Put the GPU into 64 bit by default */
	709	+ gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
	710	+ gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
	711	+ gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
	712	+ gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
	713	+ gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
	714	+ gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
	715	+ gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
	716	+ gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
	717	+ gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
	718	+ gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
	719	+ gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
	720	+ gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
	721	+
	722	+ /*
	723	+ * VPC corner case with local memory load kill leads to corrupt
	724	+ * internal state. Normal Disable does not work for all a5x chips.
	725	+ * So do the following setting to disable it.
	726	+ */
	727	+ if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
	728	+ gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
	729	+ gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
	730	+ }
	731	+
748	732	ret = adreno_hw_init(gpu);
749	733	if (ret)
750	734	return ret;
751	735
752		- a5xx_preempt_hw_init(gpu);
753		-
754		- a5xx_gpmu_ucode_init(gpu);
	736	+ if (!adreno_is_a510(adreno_gpu))
	737	+ a5xx_gpmu_ucode_init(gpu);
755	738
756	739	ret = a5xx_ucode_init(gpu);
757	740	if (ret)
758	741	return ret;
	742	+
	743	+ /* Set the ringbuffer address */
	744	+ gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
	745	+ gpu->rb[0]->iova);
	746	+
	747	+ /*
	748	+ * If the microcode supports the WHERE_AM_I opcode then we can use that
	749	+ * in lieu of the RPTR shadow and enable preemption. Otherwise, we
	750	+ * can't safely use the RPTR shadow or preemption. In either case, the
	751	+ * RPTR shadow should be disabled in hardware.
	752	+ */
	753	+ gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
	754	+ MSM_GPU_RB_CNTL_DEFAULT \| AXXX_CP_RB_CNTL_NO_UPDATE);
	755	+
	756	+ /* Create a privileged buffer for the RPTR shadow */
	757	+ if (a5xx_gpu->has_whereami) {
	758	+ if (!a5xx_gpu->shadow_bo) {
	759	+ a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
	760	+ sizeof(u32) * gpu->nr_rings,
	761	+ MSM_BO_UNCACHED \| MSM_BO_MAP_PRIV,
	762	+ gpu->aspace, &a5xx_gpu->shadow_bo,
	763	+ &a5xx_gpu->shadow_iova);
	764	+
	765	+ if (IS_ERR(a5xx_gpu->shadow))
	766	+ return PTR_ERR(a5xx_gpu->shadow);
	767	+ }
	768	+
	769	+ gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
	770	+ REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
	771	+ } else if (gpu->nr_rings > 1) {
	772	+ /* Disable preemption if WHERE_AM_I isn't available */
	773	+ a5xx_preempt_fini(gpu);
	774	+ gpu->nr_rings = 1;
	775	+ }
	776	+
	777	+ a5xx_preempt_hw_init(gpu);
759	778
760	779	/* Disable the interrupts through the initial bringup stage */
761	780	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
..	..	@@ -776,15 +795,16 @@
776	795	*/
777	796	if (adreno_is_a530(adreno_gpu)) {
778	797	OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
779		- OUT_RING(gpu->rb[0], 0x0F);
	798	+ OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
780	799
781		- gpu->funcs->flush(gpu, gpu->rb[0]);
	800	+ a5xx_flush(gpu, gpu->rb[0], true);
782	801	if (!a5xx_idle(gpu, gpu->rb[0]))
783	802	return -EINVAL;
784	803	}
785	804
786	805	/*
787		- * Try to load a zap shader into the secure world. If successful
	806	+ * If the chip that we are using does support loading one, then
	807	+ * try to load a zap shader into the secure world. If successful
788	808	* we can use the CP to switch out of secure mode. If not then we
789	809	* have no resource but to try to switch ourselves out manually. If we
790	810	* guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
..	..	@@ -795,14 +815,21 @@
795	815	OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
796	816	OUT_RING(gpu->rb[0], 0x00000000);
797	817
798		- gpu->funcs->flush(gpu, gpu->rb[0]);
	818	+ a5xx_flush(gpu, gpu->rb[0], true);
799	819	if (!a5xx_idle(gpu, gpu->rb[0]))
800	820	return -EINVAL;
801		- } else {
802		- /* Print a warning so if we die, we know why */
	821	+ } else if (ret == -ENODEV) {
	822	+ /*
	823	+ * This device does not use zap shader (but print a warning
	824	+ * just in case someone got their dt wrong.. hopefully they
	825	+ * have a debug UART to realize the error of their ways...
	826	+ * if you mess this up you are about to crash horribly)
	827	+ */
803	828	dev_warn_once(gpu->dev->dev,
804	829	"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
805	830	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
	831	+ } else {
	832	+ return ret;
806	833	}
807	834
808	835	/* Last step - yield the ringbuffer */
..	..	@@ -841,21 +868,23 @@
841	868	a5xx_preempt_fini(gpu);
842	869
843	870	if (a5xx_gpu->pm4_bo) {
844		- if (a5xx_gpu->pm4_iova)
845		- msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
846		- drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
	871	+ msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
	872	+ drm_gem_object_put(a5xx_gpu->pm4_bo);
847	873	}
848	874
849	875	if (a5xx_gpu->pfp_bo) {
850		- if (a5xx_gpu->pfp_iova)
851		- msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace);
852		- drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
	876	+ msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
	877	+ drm_gem_object_put(a5xx_gpu->pfp_bo);
853	878	}
854	879
855	880	if (a5xx_gpu->gpmu_bo) {
856		- if (a5xx_gpu->gpmu_iova)
857		- msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
858		- drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
	881	+ msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
	882	+ drm_gem_object_put(a5xx_gpu->gpmu_bo);
	883	+ }
	884	+
	885	+ if (a5xx_gpu->shadow_bo) {
	886	+ msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace);
	887	+ drm_gem_object_put(a5xx_gpu->shadow_bo);
859	888	}
860	889
861	890	adreno_gpu_cleanup(adreno_gpu);
..	..	@@ -1028,7 +1057,7 @@
1028	1057	struct msm_drm_private *priv = dev->dev_private;
1029	1058	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1030	1059
1031		- dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
	1060	+ DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1032	1061	ring ? ring->id : -1, ring ? ring->seqno : 0,
1033	1062	gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1034	1063	gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
..	..	@@ -1090,17 +1119,6 @@
1090	1119	return IRQ_HANDLED;
1091	1120	}
1092	1121
1093		-static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1094		- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1095		- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1096		- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1097		- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1098		- REG_A5XX_CP_RB_RPTR_ADDR_HI),
1099		- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1100		- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1101		- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1102		-};
1103		-
1104	1122	static const u32 a5xx_registers[] = {
1105	1123	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1106	1124	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
..	..	@@ -1134,19 +1152,29 @@
1134	1152
1135	1153	static void a5xx_dump(struct msm_gpu *gpu)
1136	1154	{
1137		- dev_info(gpu->dev->dev, "status: %08x\n",
	1155	+ DRM_DEV_INFO(gpu->dev->dev, "status: %08x\n",
1138	1156	gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1139	1157	adreno_dump(gpu);
1140	1158	}
1141	1159
1142	1160	static int a5xx_pm_resume(struct msm_gpu *gpu)
1143	1161	{
	1162	+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1144	1163	int ret;
1145	1164
1146	1165	/* Turn on the core power */
1147	1166	ret = msm_gpu_pm_resume(gpu);
1148	1167	if (ret)
1149	1168	return ret;
	1169	+
	1170	+ if (adreno_is_a510(adreno_gpu)) {
	1171	+ /* Halt the sp_input_clk at HM level */
	1172	+ gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
	1173	+ a5xx_set_hwcg(gpu, true);
	1174	+ /* Turn on sp_input_clk at HM level */
	1175	+ gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
	1176	+ return 0;
	1177	+ }
1150	1178
1151	1179	/* Turn the RBCCU domain first to limit the chances of voltage droop */
1152	1180	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
..	..	@@ -1176,9 +1204,19 @@
1176	1204
1177	1205	static int a5xx_pm_suspend(struct msm_gpu *gpu)
1178	1206	{
	1207	+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
	1208	+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
	1209	+ u32 mask = 0xf;
	1210	+ int i, ret;
	1211	+
	1212	+ /* A510 has 3 XIN ports in VBIF */
	1213	+ if (adreno_is_a510(adreno_gpu))
	1214	+ mask = 0x7;
	1215	+
1179	1216	/* Clear the VBIF pipe before shutting down */
1180		- gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1181		- spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
	1217	+ gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
	1218	+ spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
	1219	+ mask) == mask);
1182	1220
1183	1221	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1184	1222
..	..	@@ -1189,7 +1227,15 @@
1189	1227	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1190	1228	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1191	1229
1192		- return msm_gpu_pm_suspend(gpu);
	1230	+ ret = msm_gpu_pm_suspend(gpu);
	1231	+ if (ret)
	1232	+ return ret;
	1233	+
	1234	+ if (a5xx_gpu->has_whereami)
	1235	+ for (i = 0; i < gpu->nr_rings; i++)
	1236	+ a5xx_gpu->shadow[i] = 0;
	1237	+
	1238	+ return 0;
1193	1239	}
1194	1240
1195	1241	static int a5xx_get_timestamp(struct msm_gpu gpu, uint64_t value)
..	..	@@ -1211,10 +1257,6 @@
1211	1257	u32 *hlsqregs;
1212	1258	};
1213	1259
1214		-#define gpu_poll_timeout(gpu, addr, val, cond, interval, timeout) \
1215		- readl_poll_timeout((gpu)->mmio + ((addr) << 2), val, cond, \
1216		- interval, timeout)
1217		-
1218	1260	static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1219	1261	struct a5xx_crashdumper *dumper)
1220	1262	{
..	..	@@ -1222,19 +1264,10 @@
1222	1264	SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1223	1265	&dumper->bo, &dumper->iova);
1224	1266
1225		- if (IS_ERR(dumper->ptr))
1226		- return PTR_ERR(dumper->ptr);
	1267	+ if (!IS_ERR(dumper->ptr))
	1268	+ msm_gem_object_set_name(dumper->bo, "crashdump");
1227	1269
1228		- return 0;
1229		-}
1230		-
1231		-static void a5xx_crashdumper_free(struct msm_gpu *gpu,
1232		- struct a5xx_crashdumper *dumper)
1233		-{
1234		- msm_gem_put_iova(dumper->bo, gpu->aspace);
1235		- msm_gem_put_vaddr(dumper->bo);
1236		-
1237		- drm_gem_object_unreference(dumper->bo);
	1270	+ return PTR_ERR_OR_ZERO(dumper->ptr);
1238	1271	}
1239	1272
1240	1273	static int a5xx_crashdumper_run(struct msm_gpu *gpu,
..	..	@@ -1329,7 +1362,7 @@
1329	1362
1330	1363	if (a5xx_crashdumper_run(gpu, &dumper)) {
1331	1364	kfree(a5xx_state->hlsqregs);
1332		- a5xx_crashdumper_free(gpu, &dumper);
	1365	+ msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1333	1366	return;
1334	1367	}
1335	1368
..	..	@@ -1337,7 +1370,7 @@
1337	1370	memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1338	1371	count * sizeof(u32));
1339	1372
1340		- a5xx_crashdumper_free(gpu, &dumper);
	1373	+ msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1341	1374	}
1342	1375
1343	1376	static struct msm_gpu_state a5xx_gpu_state_get(struct msm_gpu gpu)
..	..	@@ -1377,7 +1410,7 @@
1377	1410	kfree(a5xx_state);
1378	1411	}
1379	1412
1380		-int a5xx_gpu_state_put(struct msm_gpu_state *state)
	1413	+static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1381	1414	{
1382	1415	if (IS_ERR_OR_NULL(state))
1383	1416	return 1;
..	..	@@ -1387,8 +1420,8 @@
1387	1420
1388	1421
1389	1422	#if defined(CONFIG_DEBUG_FS) \|\| defined(CONFIG_DEV_COREDUMP)
1390		-void a5xx_show(struct msm_gpu gpu, struct msm_gpu_state state,
1391		- struct drm_printer *p)
	1423	+static void a5xx_show(struct msm_gpu gpu, struct msm_gpu_state state,
	1424	+ struct drm_printer *p)
1392	1425	{
1393	1426	int i, j;
1394	1427	u32 pos = 0;
..	..	@@ -1436,12 +1469,39 @@
1436	1469	return a5xx_gpu->cur_ring;
1437	1470	}
1438	1471
1439		-static int a5xx_gpu_busy(struct msm_gpu gpu, uint64_t value)
	1472	+static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1440	1473	{
1441		- *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1442		- REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
	1474	+ u64 busy_cycles, busy_time;
1443	1475
1444		- return 0;
	1476	+ /* Only read the gpu busy if the hardware is already active */
	1477	+ if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
	1478	+ return 0;
	1479	+
	1480	+ busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
	1481	+ REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
	1482	+
	1483	+ busy_time = busy_cycles - gpu->devfreq.busy_cycles;
	1484	+ do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
	1485	+
	1486	+ gpu->devfreq.busy_cycles = busy_cycles;
	1487	+
	1488	+ pm_runtime_put(&gpu->pdev->dev);
	1489	+
	1490	+ if (WARN_ON(busy_time > ~0LU))
	1491	+ return ~0LU;
	1492	+
	1493	+ return (unsigned long)busy_time;
	1494	+}
	1495	+
	1496	+static uint32_t a5xx_get_rptr(struct msm_gpu gpu, struct msm_ringbuffer ring)
	1497	+{
	1498	+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
	1499	+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
	1500	+
	1501	+ if (a5xx_gpu->has_whereami)
	1502	+ return a5xx_gpu->shadow[ring->id];
	1503	+
	1504	+ return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR);
1445	1505	}
1446	1506
1447	1507	static const struct adreno_gpu_funcs funcs = {
..	..	@@ -1452,7 +1512,6 @@
1452	1512	.pm_resume = a5xx_pm_resume,
1453	1513	.recover = a5xx_recover,
1454	1514	.submit = a5xx_submit,
1455		- .flush = a5xx_flush,
1456	1515	.active_ring = a5xx_active_ring,
1457	1516	.irq = a5xx_irq,
1458	1517	.destroy = a5xx_destroy,
..	..	@@ -1465,6 +1524,8 @@
1465	1524	.gpu_busy = a5xx_gpu_busy,
1466	1525	.gpu_state_get = a5xx_gpu_state_get,
1467	1526	.gpu_state_put = a5xx_gpu_state_put,
	1527	+ .create_address_space = adreno_iommu_create_address_space,
	1528	+ .get_rptr = a5xx_get_rptr,
1468	1529	},
1469	1530	.get_timestamp = a5xx_get_timestamp,
1470	1531	};
..	..	@@ -1508,10 +1569,11 @@
1508	1569	struct a5xx_gpu *a5xx_gpu = NULL;
1509	1570	struct adreno_gpu *adreno_gpu;
1510	1571	struct msm_gpu *gpu;
	1572	+ unsigned int nr_rings;
1511	1573	int ret;
1512	1574
1513	1575	if (!pdev) {
1514		- dev_err(dev->dev, "No A5XX device is defined\n");
	1576	+ DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1515	1577	return ERR_PTR(-ENXIO);
1516	1578	}
1517	1579
..	..	@@ -1523,14 +1585,17 @@
1523	1585	gpu = &adreno_gpu->base;
1524	1586
1525	1587	adreno_gpu->registers = a5xx_registers;
1526		- adreno_gpu->reg_offsets = a5xx_register_offsets;
1527	1588
1528	1589	a5xx_gpu->lm_leakage = 0x4E001A;
1529	1590
1530	1591	check_speed_bin(&pdev->dev);
1531	1592
1532		- /* Restricting nr_rings to 1 to temporarily disable preemption */
1533		- ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
	1593	+ nr_rings = 4;
	1594	+
	1595	+ if (adreno_is_a510(adreno_gpu))
	1596	+ nr_rings = 1;
	1597	+
	1598	+ ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, nr_rings);
1534	1599	if (ret) {
1535	1600	a5xx_destroy(&(a5xx_gpu->base.base));
1536	1601	return ERR_PTR(ret);