.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* Copyright (c) 2014 The Linux Foundation. All rights reserved. |
---|
2 | | - * |
---|
3 | | - * This program is free software; you can redistribute it and/or modify |
---|
4 | | - * it under the terms of the GNU General Public License version 2 and |
---|
5 | | - * only version 2 as published by the Free Software Foundation. |
---|
6 | | - * |
---|
7 | | - * This program is distributed in the hope that it will be useful, |
---|
8 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | | - * GNU General Public License for more details. |
---|
11 | | - * |
---|
12 | 3 | */ |
---|
13 | 4 | #include "a4xx_gpu.h" |
---|
14 | | -#ifdef CONFIG_MSM_OCMEM |
---|
15 | | -# include <soc/qcom/ocmem.h> |
---|
16 | | -#endif |
---|
17 | 5 | |
---|
18 | 6 | #define A4XX_INT0_MASK \ |
---|
19 | 7 | (A4XX_INT0_RBBM_AHB_ERROR | \ |
---|
.. | .. |
---|
33 | 21 | extern bool hang_debug; |
---|
34 | 22 | static void a4xx_dump(struct msm_gpu *gpu); |
---|
35 | 23 | static bool a4xx_idle(struct msm_gpu *gpu); |
---|
| 24 | + |
---|
| 25 | +static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) |
---|
| 26 | +{ |
---|
| 27 | + struct msm_drm_private *priv = gpu->dev->dev_private; |
---|
| 28 | + struct msm_ringbuffer *ring = submit->ring; |
---|
| 29 | + unsigned int i; |
---|
| 30 | + |
---|
| 31 | + for (i = 0; i < submit->nr_cmds; i++) { |
---|
| 32 | + switch (submit->cmd[i].type) { |
---|
| 33 | + case MSM_SUBMIT_CMD_IB_TARGET_BUF: |
---|
| 34 | + /* ignore IB-targets */ |
---|
| 35 | + break; |
---|
| 36 | + case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: |
---|
| 37 | + /* ignore if there has not been a ctx switch: */ |
---|
| 38 | + if (priv->lastctx == submit->queue->ctx) |
---|
| 39 | + break; |
---|
| 40 | + fallthrough; |
---|
| 41 | + case MSM_SUBMIT_CMD_BUF: |
---|
| 42 | + OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2); |
---|
| 43 | + OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); |
---|
| 44 | + OUT_RING(ring, submit->cmd[i].size); |
---|
| 45 | + OUT_PKT2(ring); |
---|
| 46 | + break; |
---|
| 47 | + } |
---|
| 48 | + } |
---|
| 49 | + |
---|
| 50 | + OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); |
---|
| 51 | + OUT_RING(ring, submit->seqno); |
---|
| 52 | + |
---|
| 53 | + /* Flush HLSQ lazy updates to make sure there is nothing |
---|
| 54 | + * pending for indirect loads after the timestamp has |
---|
| 55 | + * passed: |
---|
| 56 | + */ |
---|
| 57 | + OUT_PKT3(ring, CP_EVENT_WRITE, 1); |
---|
| 58 | + OUT_RING(ring, HLSQ_FLUSH); |
---|
| 59 | + |
---|
| 60 | + /* wait for idle before cache flush/interrupt */ |
---|
| 61 | + OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); |
---|
| 62 | + OUT_RING(ring, 0x00000000); |
---|
| 63 | + |
---|
| 64 | + /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */ |
---|
| 65 | + OUT_PKT3(ring, CP_EVENT_WRITE, 3); |
---|
| 66 | + OUT_RING(ring, CACHE_FLUSH_TS | BIT(31)); |
---|
| 67 | + OUT_RING(ring, rbmemptr(ring, fence)); |
---|
| 68 | + OUT_RING(ring, submit->seqno); |
---|
| 69 | + |
---|
| 70 | + adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR); |
---|
| 71 | +} |
---|
36 | 72 | |
---|
37 | 73 | /* |
---|
38 | 74 | * a4xx_enable_hwcg() - Program the clock control registers |
---|
.. | .. |
---|
78 | 114 | } |
---|
79 | 115 | } |
---|
80 | 116 | |
---|
81 | | - for (i = 0; i < 4; i++) { |
---|
82 | | - gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i), |
---|
83 | | - 0x00000922); |
---|
84 | | - } |
---|
| 117 | + /* No CCU for A405 */ |
---|
| 118 | + if (!adreno_is_a405(adreno_gpu)) { |
---|
| 119 | + for (i = 0; i < 4; i++) { |
---|
| 120 | + gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i), |
---|
| 121 | + 0x00000922); |
---|
| 122 | + } |
---|
85 | 123 | |
---|
86 | | - for (i = 0; i < 4; i++) { |
---|
87 | | - gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i), |
---|
88 | | - 0x00000000); |
---|
89 | | - } |
---|
| 124 | + for (i = 0; i < 4; i++) { |
---|
| 125 | + gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i), |
---|
| 126 | + 0x00000000); |
---|
| 127 | + } |
---|
90 | 128 | |
---|
91 | | - for (i = 0; i < 4; i++) { |
---|
92 | | - gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i), |
---|
93 | | - 0x00000001); |
---|
| 129 | + for (i = 0; i < 4; i++) { |
---|
| 130 | + gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i), |
---|
| 131 | + 0x00000001); |
---|
| 132 | + } |
---|
94 | 133 | } |
---|
95 | 134 | |
---|
96 | 135 | gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222); |
---|
.. | .. |
---|
138 | 177 | OUT_RING(ring, 0x00000000); |
---|
139 | 178 | OUT_RING(ring, 0x00000000); |
---|
140 | 179 | |
---|
141 | | - gpu->funcs->flush(gpu, ring); |
---|
| 180 | + adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR); |
---|
142 | 181 | return a4xx_idle(gpu); |
---|
143 | 182 | } |
---|
144 | 183 | |
---|
.. | .. |
---|
149 | 188 | uint32_t *ptr, len; |
---|
150 | 189 | int i, ret; |
---|
151 | 190 | |
---|
152 | | - if (adreno_is_a420(adreno_gpu)) { |
---|
| 191 | + if (adreno_is_a405(adreno_gpu)) { |
---|
| 192 | + gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); |
---|
| 193 | + } else if (adreno_is_a420(adreno_gpu)) { |
---|
153 | 194 | gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F); |
---|
154 | 195 | gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4); |
---|
155 | 196 | gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001); |
---|
.. | .. |
---|
197 | 238 | (1 << 30) | 0xFFFF); |
---|
198 | 239 | |
---|
199 | 240 | gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR, |
---|
200 | | - (unsigned int)(a4xx_gpu->ocmem_base >> 14)); |
---|
| 241 | + (unsigned int)(a4xx_gpu->ocmem.base >> 14)); |
---|
201 | 242 | |
---|
202 | 243 | /* Turn on performance counters: */ |
---|
203 | 244 | gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01); |
---|
.. | .. |
---|
274 | 315 | if (ret) |
---|
275 | 316 | return ret; |
---|
276 | 317 | |
---|
| 318 | + /* |
---|
| 319 | + * Use the default ringbuffer size and block size but disable the RPTR |
---|
| 320 | + * shadow |
---|
| 321 | + */ |
---|
| 322 | + gpu_write(gpu, REG_A4XX_CP_RB_CNTL, |
---|
| 323 | + MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); |
---|
| 324 | + |
---|
| 325 | + /* Set the ringbuffer address */ |
---|
| 326 | + gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova)); |
---|
| 327 | + |
---|
277 | 328 | /* Load PM4: */ |
---|
278 | 329 | ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data); |
---|
279 | 330 | len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4; |
---|
.. | .. |
---|
327 | 378 | |
---|
328 | 379 | adreno_gpu_cleanup(adreno_gpu); |
---|
329 | 380 | |
---|
330 | | -#ifdef CONFIG_MSM_OCMEM |
---|
331 | | - if (a4xx_gpu->ocmem_base) |
---|
332 | | - ocmem_free(OCMEM_GRAPHICS, a4xx_gpu->ocmem_hdl); |
---|
333 | | -#endif |
---|
| 381 | + adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem); |
---|
334 | 382 | |
---|
335 | 383 | kfree(a4xx_gpu); |
---|
336 | 384 | } |
---|
.. | .. |
---|
455 | 503 | ~0 /* sentinel */ |
---|
456 | 504 | }; |
---|
457 | 505 | |
---|
| 506 | +static const unsigned int a405_registers[] = { |
---|
| 507 | + /* RBBM */ |
---|
| 508 | + 0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026, |
---|
| 509 | + 0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066, |
---|
| 510 | + 0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF, |
---|
| 511 | + /* CP */ |
---|
| 512 | + 0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B, |
---|
| 513 | + 0x0578, 0x058F, |
---|
| 514 | + /* VSC */ |
---|
| 515 | + 0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51, |
---|
| 516 | + /* GRAS */ |
---|
| 517 | + 0x0C80, 0x0C81, 0x0C88, 0x0C8F, |
---|
| 518 | + /* RB */ |
---|
| 519 | + 0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2, |
---|
| 520 | + /* PC */ |
---|
| 521 | + 0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23, |
---|
| 522 | + /* VFD */ |
---|
| 523 | + 0x0E40, 0x0E4A, |
---|
| 524 | + /* VPC */ |
---|
| 525 | + 0x0E60, 0x0E61, 0x0E63, 0x0E68, |
---|
| 526 | + /* UCHE */ |
---|
| 527 | + 0x0E80, 0x0E84, 0x0E88, 0x0E95, |
---|
| 528 | + /* GRAS CTX 0 */ |
---|
| 529 | + 0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E, |
---|
| 530 | + /* PC CTX 0 */ |
---|
| 531 | + 0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7, |
---|
| 532 | + /* VFD CTX 0 */ |
---|
| 533 | + 0x2200, 0x2204, 0x2208, 0x22A9, |
---|
| 534 | + /* GRAS CTX 1 */ |
---|
| 535 | + 0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E, |
---|
| 536 | + /* PC CTX 1 */ |
---|
| 537 | + 0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7, |
---|
| 538 | + /* VFD CTX 1 */ |
---|
| 539 | + 0x2600, 0x2604, 0x2608, 0x26A9, |
---|
| 540 | + /* VBIF version 0x20050000*/ |
---|
| 541 | + 0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036, |
---|
| 542 | + 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049, |
---|
| 543 | + 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D, |
---|
| 544 | + 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098, |
---|
| 545 | + 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0, |
---|
| 546 | + 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108, |
---|
| 547 | + 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125, |
---|
| 548 | + 0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410, |
---|
| 549 | + ~0 /* sentinel */ |
---|
| 550 | +}; |
---|
| 551 | + |
---|
458 | 552 | static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu) |
---|
459 | 553 | { |
---|
460 | 554 | struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL); |
---|
.. | .. |
---|
468 | 562 | |
---|
469 | 563 | return state; |
---|
470 | 564 | } |
---|
471 | | - |
---|
472 | | -/* Register offset defines for A4XX, in order of enum adreno_regs */ |
---|
473 | | -static const unsigned int a4xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { |
---|
474 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A4XX_CP_RB_BASE), |
---|
475 | | - REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI), |
---|
476 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A4XX_CP_RB_RPTR_ADDR), |
---|
477 | | - REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI), |
---|
478 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A4XX_CP_RB_RPTR), |
---|
479 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A4XX_CP_RB_WPTR), |
---|
480 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A4XX_CP_RB_CNTL), |
---|
481 | | -}; |
---|
482 | 565 | |
---|
483 | 566 | static void a4xx_dump(struct msm_gpu *gpu) |
---|
484 | 567 | { |
---|
.. | .. |
---|
530 | 613 | return 0; |
---|
531 | 614 | } |
---|
532 | 615 | |
---|
| 616 | +static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) |
---|
| 617 | +{ |
---|
| 618 | + ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR); |
---|
| 619 | + return ring->memptrs->rptr; |
---|
| 620 | +} |
---|
| 621 | + |
---|
533 | 622 | static const struct adreno_gpu_funcs funcs = { |
---|
534 | 623 | .base = { |
---|
535 | 624 | .get_param = adreno_get_param, |
---|
.. | .. |
---|
537 | 626 | .pm_suspend = a4xx_pm_suspend, |
---|
538 | 627 | .pm_resume = a4xx_pm_resume, |
---|
539 | 628 | .recover = a4xx_recover, |
---|
540 | | - .submit = adreno_submit, |
---|
541 | | - .flush = adreno_flush, |
---|
| 629 | + .submit = a4xx_submit, |
---|
542 | 630 | .active_ring = adreno_active_ring, |
---|
543 | 631 | .irq = a4xx_irq, |
---|
544 | 632 | .destroy = a4xx_destroy, |
---|
.. | .. |
---|
547 | 635 | #endif |
---|
548 | 636 | .gpu_state_get = a4xx_gpu_state_get, |
---|
549 | 637 | .gpu_state_put = adreno_gpu_state_put, |
---|
| 638 | + .create_address_space = adreno_iommu_create_address_space, |
---|
| 639 | + .get_rptr = a4xx_get_rptr, |
---|
550 | 640 | }, |
---|
551 | 641 | .get_timestamp = a4xx_get_timestamp, |
---|
552 | 642 | }; |
---|
.. | .. |
---|
561 | 651 | int ret; |
---|
562 | 652 | |
---|
563 | 653 | if (!pdev) { |
---|
564 | | - dev_err(dev->dev, "no a4xx device\n"); |
---|
| 654 | + DRM_DEV_ERROR(dev->dev, "no a4xx device\n"); |
---|
565 | 655 | ret = -ENXIO; |
---|
566 | 656 | goto fail; |
---|
567 | 657 | } |
---|
.. | .. |
---|
578 | 668 | gpu->perfcntrs = NULL; |
---|
579 | 669 | gpu->num_perfcntrs = 0; |
---|
580 | 670 | |
---|
581 | | - adreno_gpu->registers = a4xx_registers; |
---|
582 | | - adreno_gpu->reg_offsets = a4xx_register_offsets; |
---|
583 | | - |
---|
584 | 671 | ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); |
---|
585 | 672 | if (ret) |
---|
586 | 673 | goto fail; |
---|
587 | 674 | |
---|
588 | | - /* if needed, allocate gmem: */ |
---|
589 | | - if (adreno_is_a4xx(adreno_gpu)) { |
---|
590 | | -#ifdef CONFIG_MSM_OCMEM |
---|
591 | | - /* TODO this is different/missing upstream: */ |
---|
592 | | - struct ocmem_buf *ocmem_hdl = |
---|
593 | | - ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem); |
---|
| 675 | + adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers : |
---|
| 676 | + a4xx_registers; |
---|
594 | 677 | |
---|
595 | | - a4xx_gpu->ocmem_hdl = ocmem_hdl; |
---|
596 | | - a4xx_gpu->ocmem_base = ocmem_hdl->addr; |
---|
597 | | - adreno_gpu->gmem = ocmem_hdl->len; |
---|
598 | | - DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024, |
---|
599 | | - a4xx_gpu->ocmem_base); |
---|
600 | | -#endif |
---|
601 | | - } |
---|
| 678 | + /* if needed, allocate gmem: */ |
---|
| 679 | + ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu, |
---|
| 680 | + &a4xx_gpu->ocmem); |
---|
| 681 | + if (ret) |
---|
| 682 | + goto fail; |
---|
602 | 683 | |
---|
603 | 684 | if (!gpu->aspace) { |
---|
604 | 685 | /* TODO we think it is possible to configure the GPU to |
---|
.. | .. |
---|
608 | 689 | * to not be possible to restrict access, then we must |
---|
609 | 690 | * implement a cmdstream validator. |
---|
610 | 691 | */ |
---|
611 | | - dev_err(dev->dev, "No memory protection without IOMMU\n"); |
---|
| 692 | + DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n"); |
---|
612 | 693 | ret = -ENXIO; |
---|
613 | 694 | goto fail; |
---|
614 | 695 | } |
---|
615 | 696 | |
---|
| 697 | + /* |
---|
| 698 | + * Set the ICC path to maximum speed for now by multiplying the fastest |
---|
| 699 | + * frequency by the bus width (8). We'll want to scale this later on to |
---|
| 700 | + * improve battery life. |
---|
| 701 | + */ |
---|
| 702 | + icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); |
---|
| 703 | + icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); |
---|
| 704 | + |
---|
616 | 705 | return gpu; |
---|
617 | 706 | |
---|
618 | 707 | fail: |
---|