forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-13 9d77db3c730780c8ef5ccd4b66403ff5675cfe4e
kernel/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
....@@ -1,11 +1,16 @@
11 // SPDX-License-Identifier: GPL-2.0
2
-/* Copyright (c) 2017-2018 The Linux Foundation. All rights reserved. */
2
+/* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
33
44
55 #include "msm_gem.h"
66 #include "msm_mmu.h"
7
+#include "msm_gpu_trace.h"
78 #include "a6xx_gpu.h"
89 #include "a6xx_gmu.xml.h"
10
+
11
+#include <linux/devfreq.h>
12
+
13
+#define GPU_PAS_ID 13
914
1015 static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
1116 {
....@@ -46,10 +51,21 @@
4651
4752 static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
4853 {
54
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
55
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
4956 uint32_t wptr;
5057 unsigned long flags;
5158
52
- spin_lock_irqsave(&ring->lock, flags);
59
+ /* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */
60
+ if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) {
61
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
62
+
63
+ OUT_PKT7(ring, CP_WHERE_AM_I, 2);
64
+ OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring)));
65
+ OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
66
+ }
67
+
68
+ spin_lock_irqsave(&ring->preempt_lock, flags);
5369
5470 /* Copy the shadow to the actual register */
5571 ring->cur = ring->next;
....@@ -57,7 +73,7 @@
5773 /* Make sure to wrap wptr if we need to */
5874 wptr = get_wptr(ring);
5975
60
- spin_unlock_irqrestore(&ring->lock, flags);
76
+ spin_unlock_irqrestore(&ring->preempt_lock, flags);
6177
6278 /* Make sure everything is posted before making a decision */
6379 mb();
....@@ -65,19 +81,88 @@
6581 gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
6682 }
6783
68
-static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
69
- struct msm_file_private *ctx)
84
+static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
85
+ u64 iova)
7086 {
87
+ OUT_PKT7(ring, CP_REG_TO_MEM, 3);
88
+ OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) |
89
+ CP_REG_TO_MEM_0_CNT(2) |
90
+ CP_REG_TO_MEM_0_64B);
91
+ OUT_RING(ring, lower_32_bits(iova));
92
+ OUT_RING(ring, upper_32_bits(iova));
93
+}
94
+
95
+static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
96
+ struct msm_ringbuffer *ring, struct msm_file_private *ctx)
97
+{
98
+ phys_addr_t ttbr;
99
+ u32 asid;
100
+ u64 memptr = rbmemptr(ring, ttbr0);
101
+
102
+ if (ctx->seqno == a6xx_gpu->cur_ctx_seqno)
103
+ return;
104
+
105
+ if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid))
106
+ return;
107
+
108
+ /* Execute the table update */
109
+ OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
110
+ OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr)));
111
+
112
+ OUT_RING(ring,
113
+ CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) |
114
+ CP_SMMU_TABLE_UPDATE_1_ASID(asid));
115
+ OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0));
116
+ OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0));
117
+
118
+ /*
119
+ * Write the new TTBR0 to the memstore. This is good for debugging.
120
+ */
121
+ OUT_PKT7(ring, CP_MEM_WRITE, 4);
122
+ OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr)));
123
+ OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr)));
124
+ OUT_RING(ring, lower_32_bits(ttbr));
125
+ OUT_RING(ring, (asid << 16) | upper_32_bits(ttbr));
126
+
127
+ /*
128
+ * And finally, trigger a uche flush to be sure there isn't anything
129
+ * lingering in that part of the GPU
130
+ */
131
+
132
+ OUT_PKT7(ring, CP_EVENT_WRITE, 1);
133
+ OUT_RING(ring, 0x31);
134
+
135
+ a6xx_gpu->cur_ctx_seqno = ctx->seqno;
136
+}
137
+
138
+static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
139
+{
140
+ unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
71141 struct msm_drm_private *priv = gpu->dev->dev_private;
142
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
143
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
72144 struct msm_ringbuffer *ring = submit->ring;
73145 unsigned int i;
74146
147
+ a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx);
148
+
149
+ get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO,
150
+ rbmemptr_stats(ring, index, cpcycles_start));
151
+
152
+ /*
153
+ * For PM4 the GMU register offsets are calculated from the base of the
154
+ * GPU registers so we need to add 0x1a800 to the register value on A630
155
+ * to get the right value from PM4.
156
+ */
157
+ get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
158
+ rbmemptr_stats(ring, index, alwayson_start));
159
+
75160 /* Invalidate CCU depth and color */
76161 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
77
- OUT_RING(ring, PC_CCU_INVALIDATE_DEPTH);
162
+ OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH));
78163
79164 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
80
- OUT_RING(ring, PC_CCU_INVALIDATE_COLOR);
165
+ OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR));
81166
82167 /* Submit the commands */
83168 for (i = 0; i < submit->nr_cmds; i++) {
....@@ -85,8 +170,9 @@
85170 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
86171 break;
87172 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
88
- if (priv->lastctx == ctx)
173
+ if (priv->lastctx == submit->queue->ctx)
89174 break;
175
+ fallthrough;
90176 case MSM_SUBMIT_CMD_BUF:
91177 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
92178 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
....@@ -95,6 +181,11 @@
95181 break;
96182 }
97183 }
184
+
185
+ get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO,
186
+ rbmemptr_stats(ring, index, cpcycles_end));
187
+ get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
188
+ rbmemptr_stats(ring, index, alwayson_end));
98189
99190 /* Write the fence to the scratch register */
100191 OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
....@@ -105,18 +196,20 @@
105196 * timestamp is written to the memory and then triggers the interrupt
106197 */
107198 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
108
- OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
199
+ OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
200
+ CP_EVENT_WRITE_0_IRQ);
109201 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
110202 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
111203 OUT_RING(ring, submit->seqno);
112204
205
+ trace_msm_gpu_submit_flush(submit,
206
+ gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
207
+ REG_A6XX_CP_ALWAYS_ON_COUNTER_HI));
208
+
113209 a6xx_flush(gpu, ring);
114210 }
115211
116
-static const struct {
117
- u32 offset;
118
- u32 value;
119
-} a6xx_hwcg[] = {
212
+const struct adreno_reglist a630_hwcg[] = {
120213 {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222},
121214 {REG_A6XX_RBBM_CLOCK_CNTL_SP1, 0x22222222},
122215 {REG_A6XX_RBBM_CLOCK_CNTL_SP2, 0x22222222},
....@@ -221,7 +314,114 @@
221314 {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
222315 {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
223316 {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
224
- {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}
317
+ {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
318
+ {},
319
+};
320
+
321
+const struct adreno_reglist a640_hwcg[] = {
322
+ {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
323
+ {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
324
+ {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
325
+ {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
326
+ {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
327
+ {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
328
+ {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
329
+ {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
330
+ {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
331
+ {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
332
+ {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
333
+ {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
334
+ {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
335
+ {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
336
+ {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
337
+ {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
338
+ {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
339
+ {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
340
+ {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
341
+ {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
342
+ {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05222022},
343
+ {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
344
+ {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
345
+ {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
346
+ {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
347
+ {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
348
+ {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
349
+ {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
350
+ {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
351
+ {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
352
+ {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
353
+ {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
354
+ {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
355
+ {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
356
+ {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
357
+ {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
358
+ {REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
359
+ {REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
360
+ {REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
361
+ {REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000},
362
+ {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
363
+ {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
364
+ {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
365
+ {REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
366
+ {REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
367
+ {REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
368
+ {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
369
+ {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
370
+ {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
371
+ {},
372
+};
373
+
374
+const struct adreno_reglist a650_hwcg[] = {
375
+ {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
376
+ {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
377
+ {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
378
+ {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
379
+ {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
380
+ {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
381
+ {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
382
+ {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
383
+ {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
384
+ {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
385
+ {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
386
+ {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
387
+ {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
388
+ {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
389
+ {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
390
+ {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
391
+ {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
392
+ {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
393
+ {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
394
+ {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
395
+ {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
396
+ {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
397
+ {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
398
+ {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
399
+ {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
400
+ {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
401
+ {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
402
+ {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
403
+ {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
404
+ {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
405
+ {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
406
+ {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
407
+ {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
408
+ {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
409
+ {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
410
+ {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
411
+ {REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
412
+ {REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
413
+ {REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
414
+ {REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000777},
415
+ {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
416
+ {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
417
+ {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
418
+ {REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
419
+ {REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
420
+ {REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
421
+ {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
422
+ {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
423
+ {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
424
+ {},
225425 };
226426
227427 static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
....@@ -229,26 +429,172 @@
229429 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
230430 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
231431 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
432
+ const struct adreno_reglist *reg;
232433 unsigned int i;
233
- u32 val;
434
+ u32 val, clock_cntl_on;
435
+
436
+ if (!adreno_gpu->info->hwcg)
437
+ return;
438
+
439
+ if (adreno_is_a630(adreno_gpu))
440
+ clock_cntl_on = 0x8aa8aa02;
441
+ else
442
+ clock_cntl_on = 0x8aa8aa82;
234443
235444 val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL);
236445
237446 /* Don't re-program the registers if they are already correct */
238
- if ((!state && !val) || (state && (val == 0x8aa8aa02)))
447
+ if ((!state && !val) || (state && (val == clock_cntl_on)))
239448 return;
240449
241450 /* Disable SP clock before programming HWCG registers */
242451 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
243452
244
- for (i = 0; i < ARRAY_SIZE(a6xx_hwcg); i++)
245
- gpu_write(gpu, a6xx_hwcg[i].offset,
246
- state ? a6xx_hwcg[i].value : 0);
453
+ for (i = 0; (reg = &adreno_gpu->info->hwcg[i], reg->offset); i++)
454
+ gpu_write(gpu, reg->offset, state ? reg->value : 0);
247455
248456 /* Enable SP clock */
249457 gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
250458
251
- gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? 0x8aa8aa02 : 0);
459
+ gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0);
460
+}
461
+
462
+/* For a615, a616, a618, A619, a630, a640 and a680 */
463
+static const u32 a6xx_protect[] = {
464
+ A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
465
+ A6XX_PROTECT_RDONLY(0x00501, 0x0005),
466
+ A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
467
+ A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
468
+ A6XX_PROTECT_NORDWR(0x00510, 0x0000),
469
+ A6XX_PROTECT_NORDWR(0x00534, 0x0000),
470
+ A6XX_PROTECT_NORDWR(0x00800, 0x0082),
471
+ A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
472
+ A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
473
+ A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
474
+ A6XX_PROTECT_NORDWR(0x00900, 0x004d),
475
+ A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
476
+ A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
477
+ A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
478
+ A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
479
+ A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
480
+ A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
481
+ A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
482
+ A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
483
+ A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
484
+ A6XX_PROTECT_NORDWR(0x09624, 0x01db),
485
+ A6XX_PROTECT_NORDWR(0x09e70, 0x0001),
486
+ A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
487
+ A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
488
+ A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
489
+ A6XX_PROTECT_NORDWR(0x0ae50, 0x032f),
490
+ A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
491
+ A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
492
+ A6XX_PROTECT_NORDWR(0x0be20, 0x17df),
493
+ A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
494
+ A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
495
+ A6XX_PROTECT_NORDWR(0x11c00, 0x0000), /* note: infinite range */
496
+};
497
+
498
+/* These are for a620 and a650 */
499
+static const u32 a650_protect[] = {
500
+ A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
501
+ A6XX_PROTECT_RDONLY(0x00501, 0x0005),
502
+ A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
503
+ A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
504
+ A6XX_PROTECT_NORDWR(0x00510, 0x0000),
505
+ A6XX_PROTECT_NORDWR(0x00534, 0x0000),
506
+ A6XX_PROTECT_NORDWR(0x00800, 0x0082),
507
+ A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
508
+ A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
509
+ A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
510
+ A6XX_PROTECT_NORDWR(0x00900, 0x004d),
511
+ A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
512
+ A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
513
+ A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
514
+ A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
515
+ A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
516
+ A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
517
+ A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
518
+ A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
519
+ A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
520
+ A6XX_PROTECT_NORDWR(0x08e80, 0x027f),
521
+ A6XX_PROTECT_NORDWR(0x09624, 0x01db),
522
+ A6XX_PROTECT_NORDWR(0x09e60, 0x0011),
523
+ A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
524
+ A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
525
+ A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
526
+ A6XX_PROTECT_NORDWR(0x0ae50, 0x032f),
527
+ A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
528
+ A6XX_PROTECT_NORDWR(0x0b608, 0x0007),
529
+ A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
530
+ A6XX_PROTECT_NORDWR(0x0be20, 0x17df),
531
+ A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
532
+ A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
533
+ A6XX_PROTECT_NORDWR(0x18400, 0x1fff),
534
+ A6XX_PROTECT_NORDWR(0x1a800, 0x1fff),
535
+ A6XX_PROTECT_NORDWR(0x1f400, 0x0443),
536
+ A6XX_PROTECT_RDONLY(0x1f844, 0x007b),
537
+ A6XX_PROTECT_NORDWR(0x1f887, 0x001b),
538
+ A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */
539
+};
540
+
541
+static void a6xx_set_cp_protect(struct msm_gpu *gpu)
542
+{
543
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
544
+ const u32 *regs = a6xx_protect;
545
+ unsigned i, count = ARRAY_SIZE(a6xx_protect), count_max = 32;
546
+
547
+ BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32);
548
+ BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48);
549
+
550
+ if (adreno_is_a650(adreno_gpu)) {
551
+ regs = a650_protect;
552
+ count = ARRAY_SIZE(a650_protect);
553
+ count_max = 48;
554
+ }
555
+
556
+ /*
557
+ * Enable access protection to privileged registers, fault on an access
558
+ * protect violation and select the last span to protect from the start
559
+ * address all the way to the end of the register address space
560
+ */
561
+ gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, BIT(0) | BIT(1) | BIT(3));
562
+
563
+ for (i = 0; i < count - 1; i++)
564
+ gpu_write(gpu, REG_A6XX_CP_PROTECT(i), regs[i]);
565
+ /* last CP_PROTECT to have "infinite" length on the last entry */
566
+ gpu_write(gpu, REG_A6XX_CP_PROTECT(count_max - 1), regs[i]);
567
+}
568
+
569
+static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
570
+{
571
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
572
+ u32 lower_bit = 2;
573
+ u32 amsbc = 0;
574
+ u32 rgb565_predicator = 0;
575
+ u32 uavflagprd_inv = 0;
576
+
577
+ /* a618 is using the hw default values */
578
+ if (adreno_is_a618(adreno_gpu))
579
+ return;
580
+
581
+ if (adreno_is_a640(adreno_gpu))
582
+ amsbc = 1;
583
+
584
+ if (adreno_is_a650(adreno_gpu)) {
585
+ /* TODO: get ddr type from bootloader and use 2 for LPDDR4 */
586
+ lower_bit = 3;
587
+ amsbc = 1;
588
+ rgb565_predicator = 1;
589
+ uavflagprd_inv = 2;
590
+ }
591
+
592
+ gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,
593
+ rgb565_predicator << 11 | amsbc << 4 | lower_bit << 1);
594
+ gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, lower_bit << 1);
595
+ gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
596
+ uavflagprd_inv << 4 | lower_bit << 1);
597
+ gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, lower_bit << 21);
252598 }
253599
254600 static int a6xx_cp_init(struct msm_gpu *gpu)
....@@ -280,6 +626,30 @@
280626 return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
281627 }
282628
629
+static void a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
630
+ struct drm_gem_object *obj)
631
+{
632
+ u32 *buf = msm_gem_get_vaddr_active(obj);
633
+
634
+ if (IS_ERR(buf))
635
+ return;
636
+
637
+ /*
638
+ * If the lowest nibble is 0xa that is an indication that this microcode
639
+ * has been patched. The actual version is in dword [3] but we only care
640
+ * about the patchlevel which is the lowest nibble of dword [3]
641
+ *
642
+ * Otherwise check that the firmware is greater than or equal to 1.90
643
+ * which was the first version that had this fix built in
644
+ */
645
+ if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
646
+ a6xx_gpu->has_whereami = true;
647
+ else if ((buf[0] & 0xfff) > 0x190)
648
+ a6xx_gpu->has_whereami = true;
649
+
650
+ msm_gem_put_vaddr(obj);
651
+}
652
+
283653 static int a6xx_ucode_init(struct msm_gpu *gpu)
284654 {
285655 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
....@@ -298,12 +668,29 @@
298668
299669 return ret;
300670 }
671
+
672
+ msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw");
673
+ a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo);
301674 }
302675
303676 gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE_LO,
304677 REG_A6XX_CP_SQE_INSTR_BASE_HI, a6xx_gpu->sqe_iova);
305678
306679 return 0;
680
+}
681
+
682
+static int a6xx_zap_shader_init(struct msm_gpu *gpu)
683
+{
684
+ static bool loaded;
685
+ int ret;
686
+
687
+ if (loaded)
688
+ return 0;
689
+
690
+ ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
691
+
692
+ loaded = !ret;
693
+ return ret;
307694 }
308695
309696 #define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
....@@ -338,12 +725,37 @@
338725 REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
339726 gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
340727
728
+ /* Turn on 64 bit addressing for all blocks */
729
+ gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1);
730
+ gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1);
731
+ gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1);
732
+ gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1);
733
+ gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1);
734
+ gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1);
735
+ gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1);
736
+ gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1);
737
+ gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1);
738
+ gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1);
739
+ gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
740
+ gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
741
+
341742 /* enable hardware clockgating */
342743 a6xx_set_hwcg(gpu, true);
343744
344
- /* VBIF start */
345
- gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
346
- gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3);
745
+ /* VBIF/GBIF start*/
746
+ if (adreno_is_a640(adreno_gpu) || adreno_is_a650(adreno_gpu)) {
747
+ gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620);
748
+ gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620);
749
+ gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620);
750
+ gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
751
+ gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
752
+ gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3);
753
+ } else {
754
+ gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3);
755
+ }
756
+
757
+ if (adreno_is_a630(adreno_gpu))
758
+ gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
347759
348760 /* Make all blocks contribute to the GPU BUSY perf counter */
349761 gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
....@@ -356,25 +768,35 @@
356768 gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_LO, 0xfffff000);
357769 gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff);
358770
359
- /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
360
- gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO,
361
- REG_A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x00100000);
771
+ if (!adreno_is_a650(adreno_gpu)) {
772
+ /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
773
+ gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO,
774
+ REG_A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x00100000);
362775
363
- gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO,
364
- REG_A6XX_UCHE_GMEM_RANGE_MAX_HI,
365
- 0x00100000 + adreno_gpu->gmem - 1);
776
+ gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO,
777
+ REG_A6XX_UCHE_GMEM_RANGE_MAX_HI,
778
+ 0x00100000 + adreno_gpu->gmem - 1);
779
+ }
366780
367781 gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804);
368782 gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4);
369783
370
- gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0);
784
+ if (adreno_is_a640(adreno_gpu) || adreno_is_a650(adreno_gpu))
785
+ gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140);
786
+ else
787
+ gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0);
371788 gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
372789
373790 /* Setting the mem pool size */
374791 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128);
375792
376793 /* Setting the primFifo thresholds default values */
377
- gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, (0x300 << 11));
794
+ if (adreno_is_a650(adreno_gpu))
795
+ gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300000);
796
+ else if (adreno_is_a640(adreno_gpu))
797
+ gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00200000);
798
+ else
799
+ gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, (0x300 << 11));
378800
379801 /* Set the AHB default slave response to "ERROR" */
380802 gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1);
....@@ -385,18 +807,7 @@
385807 /* Select CP0 to always count cycles */
386808 gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
387809
388
- /* FIXME: not sure if this should live here or in a6xx_gmu.c */
389
- gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK,
390
- 0xff000000);
391
- gmu_rmw(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0,
392
- 0xff, 0x20);
393
- gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE,
394
- 0x01);
395
-
396
- gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, 2 << 1);
397
- gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 2 << 1);
398
- gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 2 << 1);
399
- gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, 2 << 21);
810
+ a6xx_set_ubwc_config(gpu);
400811
401812 /* Enable fault detection */
402813 gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL,
....@@ -404,44 +815,27 @@
404815
405816 gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, 1);
406817
407
- /* Protect registers from the CP */
408
- gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, 0x00000003);
818
+ /* Set weights for bicubic filtering */
819
+ if (adreno_is_a650(adreno_gpu)) {
820
+ gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0);
821
+ gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1,
822
+ 0x3fe05ff4);
823
+ gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2,
824
+ 0x3fa0ebee);
825
+ gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3,
826
+ 0x3f5193ed);
827
+ gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4,
828
+ 0x3f0243f0);
829
+ }
409830
410
- gpu_write(gpu, REG_A6XX_CP_PROTECT(0),
411
- A6XX_PROTECT_RDONLY(0x600, 0x51));
412
- gpu_write(gpu, REG_A6XX_CP_PROTECT(1), A6XX_PROTECT_RW(0xae50, 0x2));
413
- gpu_write(gpu, REG_A6XX_CP_PROTECT(2), A6XX_PROTECT_RW(0x9624, 0x13));
414
- gpu_write(gpu, REG_A6XX_CP_PROTECT(3), A6XX_PROTECT_RW(0x8630, 0x8));
415
- gpu_write(gpu, REG_A6XX_CP_PROTECT(4), A6XX_PROTECT_RW(0x9e70, 0x1));
416
- gpu_write(gpu, REG_A6XX_CP_PROTECT(5), A6XX_PROTECT_RW(0x9e78, 0x187));
417
- gpu_write(gpu, REG_A6XX_CP_PROTECT(6), A6XX_PROTECT_RW(0xf000, 0x810));
418
- gpu_write(gpu, REG_A6XX_CP_PROTECT(7),
419
- A6XX_PROTECT_RDONLY(0xfc00, 0x3));
420
- gpu_write(gpu, REG_A6XX_CP_PROTECT(8), A6XX_PROTECT_RW(0x50e, 0x0));
421
- gpu_write(gpu, REG_A6XX_CP_PROTECT(9), A6XX_PROTECT_RDONLY(0x50f, 0x0));
422
- gpu_write(gpu, REG_A6XX_CP_PROTECT(10), A6XX_PROTECT_RW(0x510, 0x0));
423
- gpu_write(gpu, REG_A6XX_CP_PROTECT(11),
424
- A6XX_PROTECT_RDONLY(0x0, 0x4f9));
425
- gpu_write(gpu, REG_A6XX_CP_PROTECT(12),
426
- A6XX_PROTECT_RDONLY(0x501, 0xa));
427
- gpu_write(gpu, REG_A6XX_CP_PROTECT(13),
428
- A6XX_PROTECT_RDONLY(0x511, 0x44));
429
- gpu_write(gpu, REG_A6XX_CP_PROTECT(14), A6XX_PROTECT_RW(0xe00, 0xe));
430
- gpu_write(gpu, REG_A6XX_CP_PROTECT(15), A6XX_PROTECT_RW(0x8e00, 0x0));
431
- gpu_write(gpu, REG_A6XX_CP_PROTECT(16), A6XX_PROTECT_RW(0x8e50, 0xf));
432
- gpu_write(gpu, REG_A6XX_CP_PROTECT(17), A6XX_PROTECT_RW(0xbe02, 0x0));
433
- gpu_write(gpu, REG_A6XX_CP_PROTECT(18),
434
- A6XX_PROTECT_RW(0xbe20, 0x11f3));
435
- gpu_write(gpu, REG_A6XX_CP_PROTECT(19), A6XX_PROTECT_RW(0x800, 0x82));
436
- gpu_write(gpu, REG_A6XX_CP_PROTECT(20), A6XX_PROTECT_RW(0x8a0, 0x8));
437
- gpu_write(gpu, REG_A6XX_CP_PROTECT(21), A6XX_PROTECT_RW(0x8ab, 0x19));
438
- gpu_write(gpu, REG_A6XX_CP_PROTECT(22), A6XX_PROTECT_RW(0x900, 0x4d));
439
- gpu_write(gpu, REG_A6XX_CP_PROTECT(23), A6XX_PROTECT_RW(0x98d, 0x76));
440
- gpu_write(gpu, REG_A6XX_CP_PROTECT(24),
441
- A6XX_PROTECT_RDONLY(0x8d0, 0x23));
442
- gpu_write(gpu, REG_A6XX_CP_PROTECT(25),
443
- A6XX_PROTECT_RDONLY(0x980, 0x4));
444
- gpu_write(gpu, REG_A6XX_CP_PROTECT(26), A6XX_PROTECT_RW(0xa630, 0x0));
831
+ /* Protect registers from the CP */
832
+ a6xx_set_cp_protect(gpu);
833
+
834
+ /* Enable expanded apriv for targets that support it */
835
+ if (gpu->hw_apriv) {
836
+ gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
837
+ (1 << 6) | (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1));
838
+ }
445839
446840 /* Enable interrupts */
447841 gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, A6XX_INT_MASK);
....@@ -454,8 +848,46 @@
454848 if (ret)
455849 goto out;
456850
851
+ /* Set the ringbuffer address */
852
+ gpu_write64(gpu, REG_A6XX_CP_RB_BASE, REG_A6XX_CP_RB_BASE_HI,
853
+ gpu->rb[0]->iova);
854
+
855
+ /* Targets that support extended APRIV can use the RPTR shadow from
856
+ * hardware but all the other ones need to disable the feature. Targets
857
+ * that support the WHERE_AM_I opcode can use that instead
858
+ */
859
+ if (adreno_gpu->base.hw_apriv)
860
+ gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
861
+ else
862
+ gpu_write(gpu, REG_A6XX_CP_RB_CNTL,
863
+ MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
864
+
865
+ /*
866
+ * Expanded APRIV and targets that support WHERE_AM_I both need a
867
+ * privileged buffer to store the RPTR shadow
868
+ */
869
+
870
+ if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) {
871
+ if (!a6xx_gpu->shadow_bo) {
872
+ a6xx_gpu->shadow = msm_gem_kernel_new_locked(gpu->dev,
873
+ sizeof(u32) * gpu->nr_rings,
874
+ MSM_BO_UNCACHED | MSM_BO_MAP_PRIV,
875
+ gpu->aspace, &a6xx_gpu->shadow_bo,
876
+ &a6xx_gpu->shadow_iova);
877
+
878
+ if (IS_ERR(a6xx_gpu->shadow))
879
+ return PTR_ERR(a6xx_gpu->shadow);
880
+ }
881
+
882
+ gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR_LO,
883
+ REG_A6XX_CP_RB_RPTR_ADDR_HI,
884
+ shadowptr(a6xx_gpu, gpu->rb[0]));
885
+ }
886
+
457887 /* Always come up on rb 0 */
458888 a6xx_gpu->cur_ring = gpu->rb[0];
889
+
890
+ a6xx_gpu->cur_ctx_seqno = 0;
459891
460892 /* Enable the SQE_to start the CP engine */
461893 gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1);
....@@ -464,7 +896,35 @@
464896 if (ret)
465897 goto out;
466898
467
- gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
899
+ /*
900
+ * Try to load a zap shader into the secure world. If successful
901
+ * we can use the CP to switch out of secure mode. If not then we
902
+ * have no resource but to try to switch ourselves out manually. If we
903
+ * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
904
+ * be blocked and a permissions violation will soon follow.
905
+ */
906
+ ret = a6xx_zap_shader_init(gpu);
907
+ if (!ret) {
908
+ OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
909
+ OUT_RING(gpu->rb[0], 0x00000000);
910
+
911
+ a6xx_flush(gpu, gpu->rb[0]);
912
+ if (!a6xx_idle(gpu, gpu->rb[0]))
913
+ return -EINVAL;
914
+ } else if (ret == -ENODEV) {
915
+ /*
916
+ * This device does not use zap shader (but print a warning
917
+ * just in case someone got their dt wrong.. hopefully they
918
+ * have a debug UART to realize the error of their ways...
919
+ * if you mess this up you are about to crash horribly)
920
+ */
921
+ dev_warn_once(gpu->dev->dev,
922
+ "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
923
+ gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
924
+ ret = 0;
925
+ } else {
926
+ return ret;
927
+ }
468928
469929 out:
470930 /*
....@@ -473,15 +933,17 @@
473933 */
474934 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
475935
476
- /* Take the GMU out of its special boot mode */
477
- a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER);
936
+ if (a6xx_gpu->gmu.legacy) {
937
+ /* Take the GMU out of its special boot mode */
938
+ a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER);
939
+ }
478940
479941 return ret;
480942 }
481943
482944 static void a6xx_dump(struct msm_gpu *gpu)
483945 {
484
- dev_info(&gpu->pdev->dev, "status: %08x\n",
946
+ DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n",
485947 gpu_read(gpu, REG_A6XX_RBBM_STATUS));
486948 adreno_dump(gpu);
487949 }
....@@ -498,7 +960,7 @@
498960 adreno_dump_info(gpu);
499961
500962 for (i = 0; i < 8; i++)
501
- dev_info(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i,
963
+ DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i,
502964 gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i)));
503965
504966 if (hang_debug)
....@@ -633,100 +1095,64 @@
6331095 return IRQ_HANDLED;
6341096 }
6351097
636
-static const u32 a6xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
637
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A6XX_CP_RB_BASE),
638
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A6XX_CP_RB_BASE_HI),
639
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR,
640
- REG_A6XX_CP_RB_RPTR_ADDR_LO),
641
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
642
- REG_A6XX_CP_RB_RPTR_ADDR_HI),
643
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A6XX_CP_RB_RPTR),
644
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A6XX_CP_RB_WPTR),
645
- REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A6XX_CP_RB_CNTL),
646
-};
647
-
648
-static const u32 a6xx_registers[] = {
649
- 0x0000, 0x0002, 0x0010, 0x0010, 0x0012, 0x0012, 0x0018, 0x001b,
650
- 0x001e, 0x0032, 0x0038, 0x003c, 0x0042, 0x0042, 0x0044, 0x0044,
651
- 0x0047, 0x0047, 0x0056, 0x0056, 0x00ad, 0x00ae, 0x00b0, 0x00fb,
652
- 0x0100, 0x011d, 0x0200, 0x020d, 0x0210, 0x0213, 0x0218, 0x023d,
653
- 0x0400, 0x04f9, 0x0500, 0x0500, 0x0505, 0x050b, 0x050e, 0x0511,
654
- 0x0533, 0x0533, 0x0540, 0x0555, 0x0800, 0x0808, 0x0810, 0x0813,
655
- 0x0820, 0x0821, 0x0823, 0x0827, 0x0830, 0x0833, 0x0840, 0x0843,
656
- 0x084f, 0x086f, 0x0880, 0x088a, 0x08a0, 0x08ab, 0x08c0, 0x08c4,
657
- 0x08d0, 0x08dd, 0x08f0, 0x08f3, 0x0900, 0x0903, 0x0908, 0x0911,
658
- 0x0928, 0x093e, 0x0942, 0x094d, 0x0980, 0x0984, 0x098d, 0x0996,
659
- 0x0998, 0x099e, 0x09a0, 0x09a6, 0x09a8, 0x09ae, 0x09b0, 0x09b1,
660
- 0x09c2, 0x09c8, 0x0a00, 0x0a03, 0x0c00, 0x0c04, 0x0c06, 0x0c06,
661
- 0x0c10, 0x0cd9, 0x0e00, 0x0e0e, 0x0e10, 0x0e13, 0x0e17, 0x0e19,
662
- 0x0e1c, 0x0e2b, 0x0e30, 0x0e32, 0x0e38, 0x0e39, 0x8600, 0x8601,
663
- 0x8610, 0x861b, 0x8620, 0x8620, 0x8628, 0x862b, 0x8630, 0x8637,
664
- 0x8e01, 0x8e01, 0x8e04, 0x8e05, 0x8e07, 0x8e08, 0x8e0c, 0x8e0c,
665
- 0x8e10, 0x8e1c, 0x8e20, 0x8e25, 0x8e28, 0x8e28, 0x8e2c, 0x8e2f,
666
- 0x8e3b, 0x8e3e, 0x8e40, 0x8e43, 0x8e50, 0x8e5e, 0x8e70, 0x8e77,
667
- 0x9600, 0x9604, 0x9624, 0x9637, 0x9e00, 0x9e01, 0x9e03, 0x9e0e,
668
- 0x9e11, 0x9e16, 0x9e19, 0x9e19, 0x9e1c, 0x9e1c, 0x9e20, 0x9e23,
669
- 0x9e30, 0x9e31, 0x9e34, 0x9e34, 0x9e70, 0x9e72, 0x9e78, 0x9e79,
670
- 0x9e80, 0x9fff, 0xa600, 0xa601, 0xa603, 0xa603, 0xa60a, 0xa60a,
671
- 0xa610, 0xa617, 0xa630, 0xa630,
672
- ~0
673
-};
674
-
6751098 static int a6xx_pm_resume(struct msm_gpu *gpu)
6761099 {
6771100 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
6781101 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
6791102 int ret;
6801103
681
- ret = a6xx_gmu_resume(a6xx_gpu);
682
-
6831104 gpu->needs_hw_init = true;
6841105
685
- return ret;
1106
+ trace_msm_gpu_resume(0);
1107
+
1108
+ ret = a6xx_gmu_resume(a6xx_gpu);
1109
+ if (ret)
1110
+ return ret;
1111
+
1112
+ msm_gpu_resume_devfreq(gpu);
1113
+
1114
+ return 0;
6861115 }
6871116
6881117 static int a6xx_pm_suspend(struct msm_gpu *gpu)
6891118 {
6901119 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
6911120 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1121
+ int i, ret;
6921122
693
- /*
694
- * Make sure the GMU is idle before continuing (because some transitions
695
- * may use VBIF
696
- */
697
- a6xx_gmu_wait_for_idle(a6xx_gpu);
1123
+ trace_msm_gpu_suspend(0);
6981124
699
- /* Clear the VBIF pipe before shutting down */
700
- /* FIXME: This accesses the GPU - do we need to make sure it is on? */
701
- gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf);
702
- spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & 0xf) == 0xf);
703
- gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
1125
+ devfreq_suspend_device(gpu->devfreq.devfreq);
7041126
705
- return a6xx_gmu_stop(a6xx_gpu);
1127
+ ret = a6xx_gmu_stop(a6xx_gpu);
1128
+ if (ret)
1129
+ return ret;
1130
+
1131
+ if (a6xx_gpu->shadow_bo)
1132
+ for (i = 0; i < gpu->nr_rings; i++)
1133
+ a6xx_gpu->shadow[i] = 0;
1134
+
1135
+ return 0;
7061136 }
7071137
7081138 static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
7091139 {
7101140 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
7111141 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1142
+ static DEFINE_MUTEX(perfcounter_oob);
1143
+
1144
+ mutex_lock(&perfcounter_oob);
7121145
7131146 /* Force the GPU power on so we can read this register */
714
- a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1147
+ a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
7151148
7161149 *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
7171150 REG_A6XX_CP_ALWAYS_ON_COUNTER_HI);
7181151
719
- a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1152
+ a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
1153
+ mutex_unlock(&perfcounter_oob);
7201154 return 0;
7211155 }
722
-
723
-#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
724
-static void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
725
- struct drm_printer *p)
726
-{
727
- adreno_show(gpu, state, p);
728
-}
729
-#endif
7301156
7311157 static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu)
7321158 {
....@@ -742,15 +1168,72 @@
7421168 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
7431169
7441170 if (a6xx_gpu->sqe_bo) {
745
- if (a6xx_gpu->sqe_iova)
746
- msm_gem_put_iova(a6xx_gpu->sqe_bo, gpu->aspace);
747
- drm_gem_object_unreference_unlocked(a6xx_gpu->sqe_bo);
1171
+ msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
1172
+ drm_gem_object_put(a6xx_gpu->sqe_bo);
1173
+ }
1174
+
1175
+ if (a6xx_gpu->shadow_bo) {
1176
+ msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->aspace);
1177
+ drm_gem_object_put(a6xx_gpu->shadow_bo);
7481178 }
7491179
7501180 a6xx_gmu_remove(a6xx_gpu);
7511181
7521182 adreno_gpu_cleanup(adreno_gpu);
7531183 kfree(a6xx_gpu);
1184
+}
1185
+
1186
+static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
1187
+{
1188
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1189
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1190
+ u64 busy_cycles, busy_time;
1191
+
1192
+
1193
+ /* Only read the gpu busy if the hardware is already active */
1194
+ if (pm_runtime_get_if_in_use(a6xx_gpu->gmu.dev) == 0)
1195
+ return 0;
1196
+
1197
+ busy_cycles = gmu_read64(&a6xx_gpu->gmu,
1198
+ REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
1199
+ REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
1200
+
1201
+ busy_time = (busy_cycles - gpu->devfreq.busy_cycles) * 10;
1202
+ do_div(busy_time, 192);
1203
+
1204
+ gpu->devfreq.busy_cycles = busy_cycles;
1205
+
1206
+ pm_runtime_put(a6xx_gpu->gmu.dev);
1207
+
1208
+ if (WARN_ON(busy_time > ~0LU))
1209
+ return ~0LU;
1210
+
1211
+ return (unsigned long)busy_time;
1212
+}
1213
+
1214
+static struct msm_gem_address_space *
1215
+a6xx_create_private_address_space(struct msm_gpu *gpu)
1216
+{
1217
+ struct msm_mmu *mmu;
1218
+
1219
+ mmu = msm_iommu_pagetable_create(gpu->aspace->mmu);
1220
+
1221
+ if (IS_ERR(mmu))
1222
+ return ERR_CAST(mmu);
1223
+
1224
+ return msm_gem_address_space_create(mmu,
1225
+ "gpu", 0x100000000ULL, SZ_4G);
1226
+}
1227
+
1228
+static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1229
+{
1230
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1231
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1232
+
1233
+ if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami)
1234
+ return a6xx_gpu->shadow[ring->id];
1235
+
1236
+ return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
7541237 }
7551238
7561239 static const struct adreno_gpu_funcs funcs = {
....@@ -761,13 +1244,22 @@
7611244 .pm_resume = a6xx_pm_resume,
7621245 .recover = a6xx_recover,
7631246 .submit = a6xx_submit,
764
- .flush = a6xx_flush,
7651247 .active_ring = a6xx_active_ring,
7661248 .irq = a6xx_irq,
7671249 .destroy = a6xx_destroy,
768
-#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1250
+#if defined(CONFIG_DRM_MSM_GPU_STATE)
7691251 .show = a6xx_show,
7701252 #endif
1253
+ .gpu_busy = a6xx_gpu_busy,
1254
+ .gpu_get_freq = a6xx_gmu_get_freq,
1255
+ .gpu_set_freq = a6xx_gmu_set_freq,
1256
+#if defined(CONFIG_DRM_MSM_GPU_STATE)
1257
+ .gpu_state_get = a6xx_gpu_state_get,
1258
+ .gpu_state_put = a6xx_gpu_state_put,
1259
+#endif
1260
+ .create_address_space = adreno_iommu_create_address_space,
1261
+ .create_private_address_space = a6xx_create_private_address_space,
1262
+ .get_rptr = a6xx_get_rptr,
7711263 },
7721264 .get_timestamp = a6xx_get_timestamp,
7731265 };
....@@ -776,6 +1268,8 @@
7761268 {
7771269 struct msm_drm_private *priv = dev->dev_private;
7781270 struct platform_device *pdev = priv->gpu_pdev;
1271
+ struct adreno_platform_config *config = pdev->dev.platform_data;
1272
+ const struct adreno_info *info;
7791273 struct device_node *node;
7801274 struct a6xx_gpu *a6xx_gpu;
7811275 struct adreno_gpu *adreno_gpu;
....@@ -789,8 +1283,17 @@
7891283 adreno_gpu = &a6xx_gpu->base;
7901284 gpu = &adreno_gpu->base;
7911285
792
- adreno_gpu->registers = a6xx_registers;
793
- adreno_gpu->reg_offsets = a6xx_register_offsets;
1286
+ adreno_gpu->registers = NULL;
1287
+
1288
+ /*
1289
+ * We need to know the platform type before calling into adreno_gpu_init
1290
+ * so that the hw_apriv flag can be correctly set. Snoop into the info
1291
+ * and grab the revision number
1292
+ */
1293
+ info = adreno_info(config->rev);
1294
+
1295
+ if (info && info->revn == 650)
1296
+ adreno_gpu->base.hw_apriv = true;
7941297
7951298 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
7961299 if (ret) {
....@@ -799,12 +1302,13 @@
7991302 }
8001303
8011304 /* Check if there is a GMU phandle and set it up */
802
- node = of_parse_phandle(pdev->dev.of_node, "gmu", 0);
1305
+ node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
8031306
8041307 /* FIXME: How do we gracefully handle this? */
8051308 BUG_ON(!node);
8061309
807
- ret = a6xx_gmu_probe(a6xx_gpu, node);
1310
+ ret = a6xx_gmu_init(a6xx_gpu, node);
1311
+ of_node_put(node);
8081312 if (ret) {
8091313 a6xx_destroy(&(a6xx_gpu->base.base));
8101314 return ERR_PTR(ret);