hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
....@@ -1,19 +1,7 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /* Copyright (c) 2014 The Linux Foundation. All rights reserved.
2
- *
3
- * This program is free software; you can redistribute it and/or modify
4
- * it under the terms of the GNU General Public License version 2 and
5
- * only version 2 as published by the Free Software Foundation.
6
- *
7
- * This program is distributed in the hope that it will be useful,
8
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
9
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
- * GNU General Public License for more details.
11
- *
123 */
134 #include "a4xx_gpu.h"
14
-#ifdef CONFIG_MSM_OCMEM
15
-# include <soc/qcom/ocmem.h>
16
-#endif
175
186 #define A4XX_INT0_MASK \
197 (A4XX_INT0_RBBM_AHB_ERROR | \
....@@ -33,6 +21,54 @@
3321 extern bool hang_debug;
3422 static void a4xx_dump(struct msm_gpu *gpu);
3523 static bool a4xx_idle(struct msm_gpu *gpu);
24
+
25
+static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
26
+{
27
+ struct msm_drm_private *priv = gpu->dev->dev_private;
28
+ struct msm_ringbuffer *ring = submit->ring;
29
+ unsigned int i;
30
+
31
+ for (i = 0; i < submit->nr_cmds; i++) {
32
+ switch (submit->cmd[i].type) {
33
+ case MSM_SUBMIT_CMD_IB_TARGET_BUF:
34
+ /* ignore IB-targets */
35
+ break;
36
+ case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
37
+ /* ignore if there has not been a ctx switch: */
38
+ if (priv->lastctx == submit->queue->ctx)
39
+ break;
40
+ fallthrough;
41
+ case MSM_SUBMIT_CMD_BUF:
42
+ OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2);
43
+ OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
44
+ OUT_RING(ring, submit->cmd[i].size);
45
+ OUT_PKT2(ring);
46
+ break;
47
+ }
48
+ }
49
+
50
+ OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
51
+ OUT_RING(ring, submit->seqno);
52
+
53
+ /* Flush HLSQ lazy updates to make sure there is nothing
54
+ * pending for indirect loads after the timestamp has
55
+ * passed:
56
+ */
57
+ OUT_PKT3(ring, CP_EVENT_WRITE, 1);
58
+ OUT_RING(ring, HLSQ_FLUSH);
59
+
60
+ /* wait for idle before cache flush/interrupt */
61
+ OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
62
+ OUT_RING(ring, 0x00000000);
63
+
64
+ /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
65
+ OUT_PKT3(ring, CP_EVENT_WRITE, 3);
66
+ OUT_RING(ring, CACHE_FLUSH_TS | BIT(31));
67
+ OUT_RING(ring, rbmemptr(ring, fence));
68
+ OUT_RING(ring, submit->seqno);
69
+
70
+ adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
71
+}
3672
3773 /*
3874 * a4xx_enable_hwcg() - Program the clock control registers
....@@ -78,19 +114,22 @@
78114 }
79115 }
80116
81
- for (i = 0; i < 4; i++) {
82
- gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
83
- 0x00000922);
84
- }
117
+ /* No CCU for A405 */
118
+ if (!adreno_is_a405(adreno_gpu)) {
119
+ for (i = 0; i < 4; i++) {
120
+ gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
121
+ 0x00000922);
122
+ }
85123
86
- for (i = 0; i < 4; i++) {
87
- gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
88
- 0x00000000);
89
- }
124
+ for (i = 0; i < 4; i++) {
125
+ gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
126
+ 0x00000000);
127
+ }
90128
91
- for (i = 0; i < 4; i++) {
92
- gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
93
- 0x00000001);
129
+ for (i = 0; i < 4; i++) {
130
+ gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
131
+ 0x00000001);
132
+ }
94133 }
95134
96135 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
....@@ -138,7 +177,7 @@
138177 OUT_RING(ring, 0x00000000);
139178 OUT_RING(ring, 0x00000000);
140179
141
- gpu->funcs->flush(gpu, ring);
180
+ adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
142181 return a4xx_idle(gpu);
143182 }
144183
....@@ -149,7 +188,9 @@
149188 uint32_t *ptr, len;
150189 int i, ret;
151190
152
- if (adreno_is_a420(adreno_gpu)) {
191
+ if (adreno_is_a405(adreno_gpu)) {
192
+ gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
193
+ } else if (adreno_is_a420(adreno_gpu)) {
153194 gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
154195 gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
155196 gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
....@@ -197,7 +238,7 @@
197238 (1 << 30) | 0xFFFF);
198239
199240 gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
200
- (unsigned int)(a4xx_gpu->ocmem_base >> 14));
241
+ (unsigned int)(a4xx_gpu->ocmem.base >> 14));
201242
202243 /* Turn on performance counters: */
203244 gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
....@@ -274,6 +315,16 @@
274315 if (ret)
275316 return ret;
276317
318
+ /*
319
+ * Use the default ringbuffer size and block size but disable the RPTR
320
+ * shadow
321
+ */
322
+ gpu_write(gpu, REG_A4XX_CP_RB_CNTL,
323
+ MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
324
+
325
+ /* Set the ringbuffer address */
326
+ gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
327
+
277328 /* Load PM4: */
278329 ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
279330 len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
....@@ -327,10 +378,7 @@
327378
328379 adreno_gpu_cleanup(adreno_gpu);
329380
330
-#ifdef CONFIG_MSM_OCMEM
331
- if (a4xx_gpu->ocmem_base)
332
- ocmem_free(OCMEM_GRAPHICS, a4xx_gpu->ocmem_hdl);
333
-#endif
381
+ adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem);
334382
335383 kfree(a4xx_gpu);
336384 }
....@@ -455,6 +503,52 @@
455503 ~0 /* sentinel */
456504 };
457505
506
+static const unsigned int a405_registers[] = {
507
+ /* RBBM */
508
+ 0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
509
+ 0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
510
+ 0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
511
+ /* CP */
512
+ 0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
513
+ 0x0578, 0x058F,
514
+ /* VSC */
515
+ 0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
516
+ /* GRAS */
517
+ 0x0C80, 0x0C81, 0x0C88, 0x0C8F,
518
+ /* RB */
519
+ 0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
520
+ /* PC */
521
+ 0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
522
+ /* VFD */
523
+ 0x0E40, 0x0E4A,
524
+ /* VPC */
525
+ 0x0E60, 0x0E61, 0x0E63, 0x0E68,
526
+ /* UCHE */
527
+ 0x0E80, 0x0E84, 0x0E88, 0x0E95,
528
+ /* GRAS CTX 0 */
529
+ 0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
530
+ /* PC CTX 0 */
531
+ 0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
532
+ /* VFD CTX 0 */
533
+ 0x2200, 0x2204, 0x2208, 0x22A9,
534
+ /* GRAS CTX 1 */
535
+ 0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
536
+ /* PC CTX 1 */
537
+ 0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
538
+ /* VFD CTX 1 */
539
+ 0x2600, 0x2604, 0x2608, 0x26A9,
540
+ /* VBIF version 0x20050000*/
541
+ 0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036,
542
+ 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049,
543
+ 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D,
544
+ 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098,
545
+ 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0,
546
+ 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108,
547
+ 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125,
548
+ 0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410,
549
+ ~0 /* sentinel */
550
+};
551
+
458552 static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
459553 {
460554 struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
....@@ -468,17 +562,6 @@
468562
469563 return state;
470564 }
471
-
472
-/* Register offset defines for A4XX, in order of enum adreno_regs */
473
-static const unsigned int a4xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
474
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A4XX_CP_RB_BASE),
475
- REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
476
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A4XX_CP_RB_RPTR_ADDR),
477
- REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
478
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A4XX_CP_RB_RPTR),
479
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A4XX_CP_RB_WPTR),
480
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A4XX_CP_RB_CNTL),
481
-};
482565
483566 static void a4xx_dump(struct msm_gpu *gpu)
484567 {
....@@ -530,6 +613,12 @@
530613 return 0;
531614 }
532615
616
+static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
617
+{
618
+ ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR);
619
+ return ring->memptrs->rptr;
620
+}
621
+
533622 static const struct adreno_gpu_funcs funcs = {
534623 .base = {
535624 .get_param = adreno_get_param,
....@@ -537,8 +626,7 @@
537626 .pm_suspend = a4xx_pm_suspend,
538627 .pm_resume = a4xx_pm_resume,
539628 .recover = a4xx_recover,
540
- .submit = adreno_submit,
541
- .flush = adreno_flush,
629
+ .submit = a4xx_submit,
542630 .active_ring = adreno_active_ring,
543631 .irq = a4xx_irq,
544632 .destroy = a4xx_destroy,
....@@ -547,6 +635,8 @@
547635 #endif
548636 .gpu_state_get = a4xx_gpu_state_get,
549637 .gpu_state_put = adreno_gpu_state_put,
638
+ .create_address_space = adreno_iommu_create_address_space,
639
+ .get_rptr = a4xx_get_rptr,
550640 },
551641 .get_timestamp = a4xx_get_timestamp,
552642 };
....@@ -561,7 +651,7 @@
561651 int ret;
562652
563653 if (!pdev) {
564
- dev_err(dev->dev, "no a4xx device\n");
654
+ DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
565655 ret = -ENXIO;
566656 goto fail;
567657 }
....@@ -578,27 +668,18 @@
578668 gpu->perfcntrs = NULL;
579669 gpu->num_perfcntrs = 0;
580670
581
- adreno_gpu->registers = a4xx_registers;
582
- adreno_gpu->reg_offsets = a4xx_register_offsets;
583
-
584671 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
585672 if (ret)
586673 goto fail;
587674
588
- /* if needed, allocate gmem: */
589
- if (adreno_is_a4xx(adreno_gpu)) {
590
-#ifdef CONFIG_MSM_OCMEM
591
- /* TODO this is different/missing upstream: */
592
- struct ocmem_buf *ocmem_hdl =
593
- ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
675
+ adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers :
676
+ a4xx_registers;
594677
595
- a4xx_gpu->ocmem_hdl = ocmem_hdl;
596
- a4xx_gpu->ocmem_base = ocmem_hdl->addr;
597
- adreno_gpu->gmem = ocmem_hdl->len;
598
- DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
599
- a4xx_gpu->ocmem_base);
600
-#endif
601
- }
678
+ /* if needed, allocate gmem: */
679
+ ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
680
+ &a4xx_gpu->ocmem);
681
+ if (ret)
682
+ goto fail;
602683
603684 if (!gpu->aspace) {
604685 /* TODO we think it is possible to configure the GPU to
....@@ -608,11 +689,19 @@
608689 * to not be possible to restrict access, then we must
609690 * implement a cmdstream validator.
610691 */
611
- dev_err(dev->dev, "No memory protection without IOMMU\n");
692
+ DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
612693 ret = -ENXIO;
613694 goto fail;
614695 }
615696
697
+ /*
698
+ * Set the ICC path to maximum speed for now by multiplying the fastest
699
+ * frequency by the bus width (8). We'll want to scale this later on to
700
+ * improve battery life.
701
+ */
702
+ icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
703
+ icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
704
+
616705 return gpu;
617706
618707 fail: