forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-13 9d77db3c730780c8ef5ccd4b66403ff5675cfe4e
kernel/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
....@@ -1,26 +1,13 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
2
- *
3
- * This program is free software; you can redistribute it and/or modify
4
- * it under the terms of the GNU General Public License version 2 and
5
- * only version 2 as published by the Free Software Foundation.
6
- *
7
- * This program is distributed in the hope that it will be useful,
8
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
9
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
- * GNU General Public License for more details.
11
- *
123 */
134
145 #include <linux/kernel.h>
156 #include <linux/types.h>
167 #include <linux/cpumask.h>
178 #include <linux/qcom_scm.h>
18
-#include <linux/dma-mapping.h>
19
-#include <linux/of_address.h>
20
-#include <linux/soc/qcom/mdt_loader.h>
219 #include <linux/pm_opp.h>
2210 #include <linux/nvmem-consumer.h>
23
-#include <linux/iopoll.h>
2411 #include <linux/slab.h>
2512 #include "msm_gem.h"
2613 #include "msm_mmu.h"
....@@ -31,104 +18,25 @@
3118
3219 #define GPU_PAS_ID 13
3320
34
-static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
35
-{
36
- struct device *dev = &gpu->pdev->dev;
37
- const struct firmware *fw;
38
- struct device_node *np, *mem_np;
39
- struct resource r;
40
- phys_addr_t mem_phys;
41
- ssize_t mem_size;
42
- void *mem_region = NULL;
43
- int ret;
44
-
45
- if (!IS_ENABLED(CONFIG_ARCH_QCOM))
46
- return -EINVAL;
47
-
48
- np = of_get_child_by_name(dev->of_node, "zap-shader");
49
- if (!np)
50
- return -ENODEV;
51
-
52
- mem_np = of_parse_phandle(np, "memory-region", 0);
53
- of_node_put(np);
54
- if (!mem_np)
55
- return -EINVAL;
56
-
57
- ret = of_address_to_resource(mem_np, 0, &r);
58
- of_node_put(mem_np);
59
- if (ret)
60
- return ret;
61
-
62
- mem_phys = r.start;
63
- mem_size = resource_size(&r);
64
-
65
- /* Request the MDT file for the firmware */
66
- fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
67
- if (IS_ERR(fw)) {
68
- DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
69
- return PTR_ERR(fw);
70
- }
71
-
72
- /* Figure out how much memory we need */
73
- mem_size = qcom_mdt_get_size(fw);
74
- if (mem_size < 0) {
75
- ret = mem_size;
76
- goto out;
77
- }
78
-
79
- /* Allocate memory for the firmware image */
80
- mem_region = memremap(mem_phys, mem_size, MEMREMAP_WC);
81
- if (!mem_region) {
82
- ret = -ENOMEM;
83
- goto out;
84
- }
85
-
86
- /*
87
- * Load the rest of the MDT
88
- *
89
- * Note that we could be dealing with two different paths, since
90
- * with upstream linux-firmware it would be in a qcom/ subdir..
91
- * adreno_request_fw() handles this, but qcom_mdt_load() does
92
- * not. But since we've already gotten thru adreno_request_fw()
93
- * we know which of the two cases it is:
94
- */
95
- if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
96
- ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
97
- mem_region, mem_phys, mem_size, NULL);
98
- } else {
99
- char *newname;
100
-
101
- newname = kasprintf(GFP_KERNEL, "qcom/%s", fwname);
102
-
103
- ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
104
- mem_region, mem_phys, mem_size, NULL);
105
- kfree(newname);
106
- }
107
- if (ret)
108
- goto out;
109
-
110
- /* Send the image to the secure world */
111
- ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID);
112
- if (ret)
113
- DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
114
-
115
-out:
116
- if (mem_region)
117
- memunmap(mem_region);
118
-
119
- release_firmware(fw);
120
-
121
- return ret;
122
-}
123
-
124
-static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
21
+void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
22
+ bool sync)
12523 {
12624 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
12725 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
12826 uint32_t wptr;
12927 unsigned long flags;
13028
131
- spin_lock_irqsave(&ring->lock, flags);
29
+ /*
30
+ * Most flush operations need to issue a WHERE_AM_I opcode to sync up
31
+ * the rptr shadow
32
+ */
33
+ if (a5xx_gpu->has_whereami && sync) {
34
+ OUT_PKT7(ring, CP_WHERE_AM_I, 2);
35
+ OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
36
+ OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
37
+ }
38
+
39
+ spin_lock_irqsave(&ring->preempt_lock, flags);
13240
13341 /* Copy the shadow to the actual register */
13442 ring->cur = ring->next;
....@@ -136,7 +44,7 @@
13644 /* Make sure to wrap wptr if we need to */
13745 wptr = get_wptr(ring);
13846
139
- spin_unlock_irqrestore(&ring->lock, flags);
47
+ spin_unlock_irqrestore(&ring->preempt_lock, flags);
14048
14149 /* Make sure everything is posted before making a decision */
14250 mb();
....@@ -146,8 +54,7 @@
14654 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
14755 }
14856
149
-static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
150
- struct msm_file_private *ctx)
57
+static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit)
15158 {
15259 struct msm_drm_private *priv = gpu->dev->dev_private;
15360 struct msm_ringbuffer *ring = submit->ring;
....@@ -160,8 +67,9 @@
16067 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
16168 break;
16269 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
163
- if (priv->lastctx == ctx)
70
+ if (priv->lastctx == submit->queue->ctx)
16471 break;
72
+ fallthrough;
16573 case MSM_SUBMIT_CMD_BUF:
16674 /* copy commands into RB: */
16775 obj = submit->bos[submit->cmd[i].idx].obj;
....@@ -173,7 +81,7 @@
17381 * since we've already mapped it once in
17482 * submit_reloc()
17583 */
176
- if (WARN_ON(!ptr))
84
+ if (WARN_ON(IS_ERR_OR_NULL(ptr)))
17785 return;
17886
17987 for (i = 0; i < dwords; i++) {
....@@ -193,7 +101,7 @@
193101 }
194102 }
195103
196
- a5xx_flush(gpu, ring);
104
+ a5xx_flush(gpu, ring, true);
197105 a5xx_preempt_trigger(gpu);
198106
199107 /* we might not necessarily have a cmd from userspace to
....@@ -205,8 +113,7 @@
205113 msm_gpu_retire(gpu);
206114 }
207115
208
-static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
209
- struct msm_file_private *ctx)
116
+static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
210117 {
211118 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
212119 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
....@@ -216,7 +123,7 @@
216123
217124 if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
218125 priv->lastctx = NULL;
219
- a5xx_submit_in_rb(gpu, submit, ctx);
126
+ a5xx_submit_in_rb(gpu, submit);
220127 return;
221128 }
222129
....@@ -237,8 +144,8 @@
237144 OUT_RING(ring, 1);
238145
239146 /* Enable local preemption for finegrain preemption */
240
- OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
241
- OUT_RING(ring, 0x02);
147
+ OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
148
+ OUT_RING(ring, 0x1);
242149
243150 /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
244151 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
....@@ -250,8 +157,9 @@
250157 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
251158 break;
252159 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
253
- if (priv->lastctx == ctx)
160
+ if (priv->lastctx == submit->queue->ctx)
254161 break;
162
+ fallthrough;
255163 case MSM_SUBMIT_CMD_BUF:
256164 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
257165 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
....@@ -287,7 +195,8 @@
287195 * timestamp is written to the memory and then triggers the interrupt
288196 */
289197 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
290
- OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
198
+ OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
199
+ CP_EVENT_WRITE_0_IRQ);
291200 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
292201 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
293202 OUT_RING(ring, submit->seqno);
....@@ -306,7 +215,8 @@
306215 /* Set bit 0 to trigger an interrupt on preempt complete */
307216 OUT_RING(ring, 0x01);
308217
309
- a5xx_flush(gpu, ring);
218
+ /* A WHERE_AM_I packet is not needed after a YIELD */
219
+ a5xx_flush(gpu, ring, false);
310220
311221 /* Check to see if we need to start preemption */
312222 a5xx_preempt_trigger(gpu);
....@@ -412,11 +322,17 @@
412322
413323 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
414324 {
325
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
415326 unsigned int i;
416327
417328 for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
418329 gpu_write(gpu, a5xx_hwcg[i].offset,
419330 state ? a5xx_hwcg[i].value : 0);
331
+
332
+ if (adreno_is_a540(adreno_gpu)) {
333
+ gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
334
+ gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
335
+ }
420336
421337 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
422338 gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
....@@ -448,6 +364,9 @@
448364 * 2D mode 3 draw
449365 */
450366 OUT_RING(ring, 0x0000000B);
367
+ } else if (adreno_is_a510(adreno_gpu)) {
368
+ /* Workaround for token and syncs */
369
+ OUT_RING(ring, 0x00000001);
451370 } else {
452371 /* No workarounds enabled */
453372 OUT_RING(ring, 0x00000000);
....@@ -456,7 +375,7 @@
456375 OUT_RING(ring, 0x00000000);
457376 OUT_RING(ring, 0x00000000);
458377
459
- gpu->funcs->flush(gpu, ring);
378
+ a5xx_flush(gpu, ring, true);
460379 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
461380 }
462381
....@@ -498,9 +417,29 @@
498417 OUT_RING(ring, 0x01);
499418 OUT_RING(ring, 0x01);
500419
501
- gpu->funcs->flush(gpu, ring);
420
+ /* The WHERE_AMI_I packet is not needed after a YIELD is issued */
421
+ a5xx_flush(gpu, ring, false);
502422
503423 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
424
+}
425
+
426
+static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu,
427
+ struct drm_gem_object *obj)
428
+{
429
+ u32 *buf = msm_gem_get_vaddr_active(obj);
430
+
431
+ if (IS_ERR(buf))
432
+ return;
433
+
434
+ /*
435
+ * If the lowest nibble is 0xa that is an indication that this microcode
436
+ * has been patched. The actual version is in dword [3] but we only care
437
+ * about the patchlevel which is the lowest nibble of dword [3]
438
+ */
439
+ if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
440
+ a5xx_gpu->has_whereami = true;
441
+
442
+ msm_gem_put_vaddr(obj);
504443 }
505444
506445 static int a5xx_ucode_init(struct msm_gpu *gpu)
....@@ -513,13 +452,16 @@
513452 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
514453 adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
515454
455
+
516456 if (IS_ERR(a5xx_gpu->pm4_bo)) {
517457 ret = PTR_ERR(a5xx_gpu->pm4_bo);
518458 a5xx_gpu->pm4_bo = NULL;
519
- dev_err(gpu->dev->dev, "could not allocate PM4: %d\n",
459
+ DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
520460 ret);
521461 return ret;
522462 }
463
+
464
+ msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
523465 }
524466
525467 if (!a5xx_gpu->pfp_bo) {
....@@ -529,10 +471,13 @@
529471 if (IS_ERR(a5xx_gpu->pfp_bo)) {
530472 ret = PTR_ERR(a5xx_gpu->pfp_bo);
531473 a5xx_gpu->pfp_bo = NULL;
532
- dev_err(gpu->dev->dev, "could not allocate PFP: %d\n",
474
+ DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
533475 ret);
534476 return ret;
535477 }
478
+
479
+ msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
480
+ a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
536481 }
537482
538483 gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
....@@ -561,8 +506,6 @@
561506 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
562507 {
563508 static bool loaded;
564
- struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
565
- struct platform_device *pdev = gpu->pdev;
566509 int ret;
567510
568511 /*
....@@ -572,23 +515,9 @@
572515 if (loaded)
573516 return a5xx_zap_shader_resume(gpu);
574517
575
- /* We need SCM to be able to load the firmware */
576
- if (!qcom_scm_is_available()) {
577
- DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
578
- return -EPROBE_DEFER;
579
- }
580
-
581
- /* Each GPU has a target specific zap shader firmware name to use */
582
- if (!adreno_gpu->info->zapfw) {
583
- DRM_DEV_ERROR(&pdev->dev,
584
- "Zap shader firmware file not specified for this target\n");
585
- return -ENODEV;
586
- }
587
-
588
- ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
518
+ ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
589519
590520 loaded = !ret;
591
-
592521 return ret;
593522 }
594523
....@@ -608,9 +537,13 @@
608537 static int a5xx_hw_init(struct msm_gpu *gpu)
609538 {
610539 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
540
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
611541 int ret;
612542
613543 gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
544
+
545
+ if (adreno_is_a540(adreno_gpu))
546
+ gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
614547
615548 /* Make all blocks contribute to the GPU BUSY perf counter */
616549 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
....@@ -671,12 +604,24 @@
671604 0x00100000 + adreno_gpu->gmem - 1);
672605 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
673606
674
- gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
675
- gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
676
- gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
677
- gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
678
-
679
- gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
607
+ if (adreno_is_a510(adreno_gpu)) {
608
+ gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
609
+ gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
610
+ gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
611
+ gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
612
+ gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
613
+ (0x200 << 11 | 0x200 << 22));
614
+ } else {
615
+ gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
616
+ if (adreno_is_a530(adreno_gpu))
617
+ gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
618
+ if (adreno_is_a540(adreno_gpu))
619
+ gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
620
+ gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
621
+ gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
622
+ gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
623
+ (0x400 << 11 | 0x300 << 22));
624
+ }
680625
681626 if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
682627 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
....@@ -687,6 +632,19 @@
687632 /* Enable ME/PFP split notification */
688633 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
689634
635
+ /*
636
+ * In A5x, CCU can send context_done event of a particular context to
637
+ * UCHE which ultimately reaches CP even when there is valid
638
+ * transaction of that context inside CCU. This can let CP to program
639
+ * config registers, which will make the "valid transaction" inside
640
+ * CCU to be interpreted differently. This can cause gpu fault. This
641
+ * bug is fixed in latest A510 revision. To enable this bug fix -
642
+ * bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
643
+ * (disable). For older A510 version this bit is unused.
644
+ */
645
+ if (adreno_is_a510(adreno_gpu))
646
+ gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
647
+
690648 /* Enable HWCG */
691649 a5xx_set_hwcg(gpu, true);
692650
....@@ -695,6 +653,8 @@
695653 /* Set the highest bank bit */
696654 gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
697655 gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
656
+ if (adreno_is_a540(adreno_gpu))
657
+ gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
698658
699659 /* Protect registers from the CP */
700660 gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
....@@ -731,7 +691,7 @@
731691 /* UCHE */
732692 gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
733693
734
- if (adreno_is_a530(adreno_gpu))
694
+ if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
735695 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
736696 ADRENO_PROTECT_RW(0x10000, 0x8000));
737697
....@@ -745,17 +705,76 @@
745705 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
746706 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
747707
708
+ /* Put the GPU into 64 bit by default */
709
+ gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
710
+ gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
711
+ gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
712
+ gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
713
+ gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
714
+ gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
715
+ gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
716
+ gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
717
+ gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
718
+ gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
719
+ gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
720
+ gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
721
+
722
+ /*
723
+ * VPC corner case with local memory load kill leads to corrupt
724
+ * internal state. Normal Disable does not work for all a5x chips.
725
+ * So do the following setting to disable it.
726
+ */
727
+ if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
728
+ gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
729
+ gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
730
+ }
731
+
748732 ret = adreno_hw_init(gpu);
749733 if (ret)
750734 return ret;
751735
752
- a5xx_preempt_hw_init(gpu);
753
-
754
- a5xx_gpmu_ucode_init(gpu);
736
+ if (!adreno_is_a510(adreno_gpu))
737
+ a5xx_gpmu_ucode_init(gpu);
755738
756739 ret = a5xx_ucode_init(gpu);
757740 if (ret)
758741 return ret;
742
+
743
+ /* Set the ringbuffer address */
744
+ gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
745
+ gpu->rb[0]->iova);
746
+
747
+ /*
748
+ * If the microcode supports the WHERE_AM_I opcode then we can use that
749
+ * in lieu of the RPTR shadow and enable preemption. Otherwise, we
750
+ * can't safely use the RPTR shadow or preemption. In either case, the
751
+ * RPTR shadow should be disabled in hardware.
752
+ */
753
+ gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
754
+ MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
755
+
756
+ /* Create a privileged buffer for the RPTR shadow */
757
+ if (a5xx_gpu->has_whereami) {
758
+ if (!a5xx_gpu->shadow_bo) {
759
+ a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
760
+ sizeof(u32) * gpu->nr_rings,
761
+ MSM_BO_UNCACHED | MSM_BO_MAP_PRIV,
762
+ gpu->aspace, &a5xx_gpu->shadow_bo,
763
+ &a5xx_gpu->shadow_iova);
764
+
765
+ if (IS_ERR(a5xx_gpu->shadow))
766
+ return PTR_ERR(a5xx_gpu->shadow);
767
+ }
768
+
769
+ gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
770
+ REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
771
+ } else if (gpu->nr_rings > 1) {
772
+ /* Disable preemption if WHERE_AM_I isn't available */
773
+ a5xx_preempt_fini(gpu);
774
+ gpu->nr_rings = 1;
775
+ }
776
+
777
+ a5xx_preempt_hw_init(gpu);
759778
760779 /* Disable the interrupts through the initial bringup stage */
761780 gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
....@@ -776,15 +795,16 @@
776795 */
777796 if (adreno_is_a530(adreno_gpu)) {
778797 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
779
- OUT_RING(gpu->rb[0], 0x0F);
798
+ OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
780799
781
- gpu->funcs->flush(gpu, gpu->rb[0]);
800
+ a5xx_flush(gpu, gpu->rb[0], true);
782801 if (!a5xx_idle(gpu, gpu->rb[0]))
783802 return -EINVAL;
784803 }
785804
786805 /*
787
- * Try to load a zap shader into the secure world. If successful
806
+ * If the chip that we are using does support loading one, then
807
+ * try to load a zap shader into the secure world. If successful
788808 * we can use the CP to switch out of secure mode. If not then we
789809 * have no resource but to try to switch ourselves out manually. If we
790810 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
....@@ -795,14 +815,21 @@
795815 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
796816 OUT_RING(gpu->rb[0], 0x00000000);
797817
798
- gpu->funcs->flush(gpu, gpu->rb[0]);
818
+ a5xx_flush(gpu, gpu->rb[0], true);
799819 if (!a5xx_idle(gpu, gpu->rb[0]))
800820 return -EINVAL;
801
- } else {
802
- /* Print a warning so if we die, we know why */
821
+ } else if (ret == -ENODEV) {
822
+ /*
823
+ * This device does not use zap shader (but print a warning
824
+ * just in case someone got their dt wrong.. hopefully they
825
+ * have a debug UART to realize the error of their ways...
826
+ * if you mess this up you are about to crash horribly)
827
+ */
803828 dev_warn_once(gpu->dev->dev,
804829 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
805830 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
831
+ } else {
832
+ return ret;
806833 }
807834
808835 /* Last step - yield the ringbuffer */
....@@ -841,21 +868,23 @@
841868 a5xx_preempt_fini(gpu);
842869
843870 if (a5xx_gpu->pm4_bo) {
844
- if (a5xx_gpu->pm4_iova)
845
- msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
846
- drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
871
+ msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
872
+ drm_gem_object_put(a5xx_gpu->pm4_bo);
847873 }
848874
849875 if (a5xx_gpu->pfp_bo) {
850
- if (a5xx_gpu->pfp_iova)
851
- msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace);
852
- drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
876
+ msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
877
+ drm_gem_object_put(a5xx_gpu->pfp_bo);
853878 }
854879
855880 if (a5xx_gpu->gpmu_bo) {
856
- if (a5xx_gpu->gpmu_iova)
857
- msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
858
- drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
881
+ msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
882
+ drm_gem_object_put(a5xx_gpu->gpmu_bo);
883
+ }
884
+
885
+ if (a5xx_gpu->shadow_bo) {
886
+ msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace);
887
+ drm_gem_object_put(a5xx_gpu->shadow_bo);
859888 }
860889
861890 adreno_gpu_cleanup(adreno_gpu);
....@@ -1028,7 +1057,7 @@
10281057 struct msm_drm_private *priv = dev->dev_private;
10291058 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
10301059
1031
- dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1060
+ DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
10321061 ring ? ring->id : -1, ring ? ring->seqno : 0,
10331062 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
10341063 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
....@@ -1090,17 +1119,6 @@
10901119 return IRQ_HANDLED;
10911120 }
10921121
1093
-static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1094
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1095
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1096
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1097
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1098
- REG_A5XX_CP_RB_RPTR_ADDR_HI),
1099
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1100
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1101
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1102
-};
1103
-
11041122 static const u32 a5xx_registers[] = {
11051123 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
11061124 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
....@@ -1134,19 +1152,29 @@
11341152
11351153 static void a5xx_dump(struct msm_gpu *gpu)
11361154 {
1137
- dev_info(gpu->dev->dev, "status: %08x\n",
1155
+ DRM_DEV_INFO(gpu->dev->dev, "status: %08x\n",
11381156 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
11391157 adreno_dump(gpu);
11401158 }
11411159
11421160 static int a5xx_pm_resume(struct msm_gpu *gpu)
11431161 {
1162
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
11441163 int ret;
11451164
11461165 /* Turn on the core power */
11471166 ret = msm_gpu_pm_resume(gpu);
11481167 if (ret)
11491168 return ret;
1169
+
1170
+ if (adreno_is_a510(adreno_gpu)) {
1171
+ /* Halt the sp_input_clk at HM level */
1172
+ gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1173
+ a5xx_set_hwcg(gpu, true);
1174
+ /* Turn on sp_input_clk at HM level */
1175
+ gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1176
+ return 0;
1177
+ }
11501178
11511179 /* Turn the RBCCU domain first to limit the chances of voltage droop */
11521180 gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
....@@ -1176,9 +1204,19 @@
11761204
11771205 static int a5xx_pm_suspend(struct msm_gpu *gpu)
11781206 {
1207
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1208
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1209
+ u32 mask = 0xf;
1210
+ int i, ret;
1211
+
1212
+ /* A510 has 3 XIN ports in VBIF */
1213
+ if (adreno_is_a510(adreno_gpu))
1214
+ mask = 0x7;
1215
+
11791216 /* Clear the VBIF pipe before shutting down */
1180
- gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1181
- spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1217
+ gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1218
+ spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1219
+ mask) == mask);
11821220
11831221 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
11841222
....@@ -1189,7 +1227,15 @@
11891227 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
11901228 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
11911229
1192
- return msm_gpu_pm_suspend(gpu);
1230
+ ret = msm_gpu_pm_suspend(gpu);
1231
+ if (ret)
1232
+ return ret;
1233
+
1234
+ if (a5xx_gpu->has_whereami)
1235
+ for (i = 0; i < gpu->nr_rings; i++)
1236
+ a5xx_gpu->shadow[i] = 0;
1237
+
1238
+ return 0;
11931239 }
11941240
11951241 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
....@@ -1211,10 +1257,6 @@
12111257 u32 *hlsqregs;
12121258 };
12131259
1214
-#define gpu_poll_timeout(gpu, addr, val, cond, interval, timeout) \
1215
- readl_poll_timeout((gpu)->mmio + ((addr) << 2), val, cond, \
1216
- interval, timeout)
1217
-
12181260 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
12191261 struct a5xx_crashdumper *dumper)
12201262 {
....@@ -1222,19 +1264,10 @@
12221264 SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
12231265 &dumper->bo, &dumper->iova);
12241266
1225
- if (IS_ERR(dumper->ptr))
1226
- return PTR_ERR(dumper->ptr);
1267
+ if (!IS_ERR(dumper->ptr))
1268
+ msm_gem_object_set_name(dumper->bo, "crashdump");
12271269
1228
- return 0;
1229
-}
1230
-
1231
-static void a5xx_crashdumper_free(struct msm_gpu *gpu,
1232
- struct a5xx_crashdumper *dumper)
1233
-{
1234
- msm_gem_put_iova(dumper->bo, gpu->aspace);
1235
- msm_gem_put_vaddr(dumper->bo);
1236
-
1237
- drm_gem_object_unreference(dumper->bo);
1270
+ return PTR_ERR_OR_ZERO(dumper->ptr);
12381271 }
12391272
12401273 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
....@@ -1329,7 +1362,7 @@
13291362
13301363 if (a5xx_crashdumper_run(gpu, &dumper)) {
13311364 kfree(a5xx_state->hlsqregs);
1332
- a5xx_crashdumper_free(gpu, &dumper);
1365
+ msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
13331366 return;
13341367 }
13351368
....@@ -1337,7 +1370,7 @@
13371370 memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
13381371 count * sizeof(u32));
13391372
1340
- a5xx_crashdumper_free(gpu, &dumper);
1373
+ msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
13411374 }
13421375
13431376 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
....@@ -1377,7 +1410,7 @@
13771410 kfree(a5xx_state);
13781411 }
13791412
1380
-int a5xx_gpu_state_put(struct msm_gpu_state *state)
1413
+static int a5xx_gpu_state_put(struct msm_gpu_state *state)
13811414 {
13821415 if (IS_ERR_OR_NULL(state))
13831416 return 1;
....@@ -1387,8 +1420,8 @@
13871420
13881421
13891422 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1390
-void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1391
- struct drm_printer *p)
1423
+static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1424
+ struct drm_printer *p)
13921425 {
13931426 int i, j;
13941427 u32 pos = 0;
....@@ -1436,12 +1469,39 @@
14361469 return a5xx_gpu->cur_ring;
14371470 }
14381471
1439
-static int a5xx_gpu_busy(struct msm_gpu *gpu, uint64_t *value)
1472
+static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
14401473 {
1441
- *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1442
- REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1474
+ u64 busy_cycles, busy_time;
14431475
1444
- return 0;
1476
+ /* Only read the gpu busy if the hardware is already active */
1477
+ if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
1478
+ return 0;
1479
+
1480
+ busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1481
+ REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1482
+
1483
+ busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1484
+ do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1485
+
1486
+ gpu->devfreq.busy_cycles = busy_cycles;
1487
+
1488
+ pm_runtime_put(&gpu->pdev->dev);
1489
+
1490
+ if (WARN_ON(busy_time > ~0LU))
1491
+ return ~0LU;
1492
+
1493
+ return (unsigned long)busy_time;
1494
+}
1495
+
1496
+static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1497
+{
1498
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1499
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1500
+
1501
+ if (a5xx_gpu->has_whereami)
1502
+ return a5xx_gpu->shadow[ring->id];
1503
+
1504
+ return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR);
14451505 }
14461506
14471507 static const struct adreno_gpu_funcs funcs = {
....@@ -1452,7 +1512,6 @@
14521512 .pm_resume = a5xx_pm_resume,
14531513 .recover = a5xx_recover,
14541514 .submit = a5xx_submit,
1455
- .flush = a5xx_flush,
14561515 .active_ring = a5xx_active_ring,
14571516 .irq = a5xx_irq,
14581517 .destroy = a5xx_destroy,
....@@ -1465,6 +1524,8 @@
14651524 .gpu_busy = a5xx_gpu_busy,
14661525 .gpu_state_get = a5xx_gpu_state_get,
14671526 .gpu_state_put = a5xx_gpu_state_put,
1527
+ .create_address_space = adreno_iommu_create_address_space,
1528
+ .get_rptr = a5xx_get_rptr,
14681529 },
14691530 .get_timestamp = a5xx_get_timestamp,
14701531 };
....@@ -1508,10 +1569,11 @@
15081569 struct a5xx_gpu *a5xx_gpu = NULL;
15091570 struct adreno_gpu *adreno_gpu;
15101571 struct msm_gpu *gpu;
1572
+ unsigned int nr_rings;
15111573 int ret;
15121574
15131575 if (!pdev) {
1514
- dev_err(dev->dev, "No A5XX device is defined\n");
1576
+ DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
15151577 return ERR_PTR(-ENXIO);
15161578 }
15171579
....@@ -1523,14 +1585,17 @@
15231585 gpu = &adreno_gpu->base;
15241586
15251587 adreno_gpu->registers = a5xx_registers;
1526
- adreno_gpu->reg_offsets = a5xx_register_offsets;
15271588
15281589 a5xx_gpu->lm_leakage = 0x4E001A;
15291590
15301591 check_speed_bin(&pdev->dev);
15311592
1532
- /* Restricting nr_rings to 1 to temporarily disable preemption */
1533
- ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
1593
+ nr_rings = 4;
1594
+
1595
+ if (adreno_is_a510(adreno_gpu))
1596
+ nr_rings = 1;
1597
+
1598
+ ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, nr_rings);
15341599 if (ret) {
15351600 a5xx_destroy(&(a5xx_gpu->base.base));
15361601 return ERR_PTR(ret);