| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* Copyright (c) 2014 The Linux Foundation. All rights reserved. |
|---|
| 2 | | - * |
|---|
| 3 | | - * This program is free software; you can redistribute it and/or modify |
|---|
| 4 | | - * it under the terms of the GNU General Public License version 2 and |
|---|
| 5 | | - * only version 2 as published by the Free Software Foundation. |
|---|
| 6 | | - * |
|---|
| 7 | | - * This program is distributed in the hope that it will be useful, |
|---|
| 8 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 9 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 10 | | - * GNU General Public License for more details. |
|---|
| 11 | | - * |
|---|
| 12 | 3 | */ |
|---|
| 13 | 4 | #include "a4xx_gpu.h" |
|---|
| 14 | | -#ifdef CONFIG_MSM_OCMEM |
|---|
| 15 | | -# include <soc/qcom/ocmem.h> |
|---|
| 16 | | -#endif |
|---|
| 17 | 5 | |
|---|
| 18 | 6 | #define A4XX_INT0_MASK \ |
|---|
| 19 | 7 | (A4XX_INT0_RBBM_AHB_ERROR | \ |
|---|
| .. | .. |
|---|
| 33 | 21 | extern bool hang_debug; |
|---|
| 34 | 22 | static void a4xx_dump(struct msm_gpu *gpu); |
|---|
| 35 | 23 | static bool a4xx_idle(struct msm_gpu *gpu); |
|---|
| 24 | + |
|---|
| 25 | +static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) |
|---|
| 26 | +{ |
|---|
| 27 | + struct msm_drm_private *priv = gpu->dev->dev_private; |
|---|
| 28 | + struct msm_ringbuffer *ring = submit->ring; |
|---|
| 29 | + unsigned int i; |
|---|
| 30 | + |
|---|
| 31 | + for (i = 0; i < submit->nr_cmds; i++) { |
|---|
| 32 | + switch (submit->cmd[i].type) { |
|---|
| 33 | + case MSM_SUBMIT_CMD_IB_TARGET_BUF: |
|---|
| 34 | + /* ignore IB-targets */ |
|---|
| 35 | + break; |
|---|
| 36 | + case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: |
|---|
| 37 | + /* ignore if there has not been a ctx switch: */ |
|---|
| 38 | + if (priv->lastctx == submit->queue->ctx) |
|---|
| 39 | + break; |
|---|
| 40 | + fallthrough; |
|---|
| 41 | + case MSM_SUBMIT_CMD_BUF: |
|---|
| 42 | + OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2); |
|---|
| 43 | + OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); |
|---|
| 44 | + OUT_RING(ring, submit->cmd[i].size); |
|---|
| 45 | + OUT_PKT2(ring); |
|---|
| 46 | + break; |
|---|
| 47 | + } |
|---|
| 48 | + } |
|---|
| 49 | + |
|---|
| 50 | + OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); |
|---|
| 51 | + OUT_RING(ring, submit->seqno); |
|---|
| 52 | + |
|---|
| 53 | + /* Flush HLSQ lazy updates to make sure there is nothing |
|---|
| 54 | + * pending for indirect loads after the timestamp has |
|---|
| 55 | + * passed: |
|---|
| 56 | + */ |
|---|
| 57 | + OUT_PKT3(ring, CP_EVENT_WRITE, 1); |
|---|
| 58 | + OUT_RING(ring, HLSQ_FLUSH); |
|---|
| 59 | + |
|---|
| 60 | + /* wait for idle before cache flush/interrupt */ |
|---|
| 61 | + OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); |
|---|
| 62 | + OUT_RING(ring, 0x00000000); |
|---|
| 63 | + |
|---|
| 64 | + /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */ |
|---|
| 65 | + OUT_PKT3(ring, CP_EVENT_WRITE, 3); |
|---|
| 66 | + OUT_RING(ring, CACHE_FLUSH_TS | BIT(31)); |
|---|
| 67 | + OUT_RING(ring, rbmemptr(ring, fence)); |
|---|
| 68 | + OUT_RING(ring, submit->seqno); |
|---|
| 69 | + |
|---|
| 70 | + adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR); |
|---|
| 71 | +} |
|---|
| 36 | 72 | |
|---|
| 37 | 73 | /* |
|---|
| 38 | 74 | * a4xx_enable_hwcg() - Program the clock control registers |
|---|
| .. | .. |
|---|
| 78 | 114 | } |
|---|
| 79 | 115 | } |
|---|
| 80 | 116 | |
|---|
| 81 | | - for (i = 0; i < 4; i++) { |
|---|
| 82 | | - gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i), |
|---|
| 83 | | - 0x00000922); |
|---|
| 84 | | - } |
|---|
| 117 | + /* No CCU for A405 */ |
|---|
| 118 | + if (!adreno_is_a405(adreno_gpu)) { |
|---|
| 119 | + for (i = 0; i < 4; i++) { |
|---|
| 120 | + gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i), |
|---|
| 121 | + 0x00000922); |
|---|
| 122 | + } |
|---|
| 85 | 123 | |
|---|
| 86 | | - for (i = 0; i < 4; i++) { |
|---|
| 87 | | - gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i), |
|---|
| 88 | | - 0x00000000); |
|---|
| 89 | | - } |
|---|
| 124 | + for (i = 0; i < 4; i++) { |
|---|
| 125 | + gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i), |
|---|
| 126 | + 0x00000000); |
|---|
| 127 | + } |
|---|
| 90 | 128 | |
|---|
| 91 | | - for (i = 0; i < 4; i++) { |
|---|
| 92 | | - gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i), |
|---|
| 93 | | - 0x00000001); |
|---|
| 129 | + for (i = 0; i < 4; i++) { |
|---|
| 130 | + gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i), |
|---|
| 131 | + 0x00000001); |
|---|
| 132 | + } |
|---|
| 94 | 133 | } |
|---|
| 95 | 134 | |
|---|
| 96 | 135 | gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222); |
|---|
| .. | .. |
|---|
| 138 | 177 | OUT_RING(ring, 0x00000000); |
|---|
| 139 | 178 | OUT_RING(ring, 0x00000000); |
|---|
| 140 | 179 | |
|---|
| 141 | | - gpu->funcs->flush(gpu, ring); |
|---|
| 180 | + adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR); |
|---|
| 142 | 181 | return a4xx_idle(gpu); |
|---|
| 143 | 182 | } |
|---|
| 144 | 183 | |
|---|
| .. | .. |
|---|
| 149 | 188 | uint32_t *ptr, len; |
|---|
| 150 | 189 | int i, ret; |
|---|
| 151 | 190 | |
|---|
| 152 | | - if (adreno_is_a420(adreno_gpu)) { |
|---|
| 191 | + if (adreno_is_a405(adreno_gpu)) { |
|---|
| 192 | + gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); |
|---|
| 193 | + } else if (adreno_is_a420(adreno_gpu)) { |
|---|
| 153 | 194 | gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F); |
|---|
| 154 | 195 | gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4); |
|---|
| 155 | 196 | gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001); |
|---|
| .. | .. |
|---|
| 197 | 238 | (1 << 30) | 0xFFFF); |
|---|
| 198 | 239 | |
|---|
| 199 | 240 | gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR, |
|---|
| 200 | | - (unsigned int)(a4xx_gpu->ocmem_base >> 14)); |
|---|
| 241 | + (unsigned int)(a4xx_gpu->ocmem.base >> 14)); |
|---|
| 201 | 242 | |
|---|
| 202 | 243 | /* Turn on performance counters: */ |
|---|
| 203 | 244 | gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01); |
|---|
| .. | .. |
|---|
| 274 | 315 | if (ret) |
|---|
| 275 | 316 | return ret; |
|---|
| 276 | 317 | |
|---|
| 318 | + /* |
|---|
| 319 | + * Use the default ringbuffer size and block size but disable the RPTR |
|---|
| 320 | + * shadow |
|---|
| 321 | + */ |
|---|
| 322 | + gpu_write(gpu, REG_A4XX_CP_RB_CNTL, |
|---|
| 323 | + MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); |
|---|
| 324 | + |
|---|
| 325 | + /* Set the ringbuffer address */ |
|---|
| 326 | + gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova)); |
|---|
| 327 | + |
|---|
| 277 | 328 | /* Load PM4: */ |
|---|
| 278 | 329 | ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data); |
|---|
| 279 | 330 | len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4; |
|---|
| .. | .. |
|---|
| 327 | 378 | |
|---|
| 328 | 379 | adreno_gpu_cleanup(adreno_gpu); |
|---|
| 329 | 380 | |
|---|
| 330 | | -#ifdef CONFIG_MSM_OCMEM |
|---|
| 331 | | - if (a4xx_gpu->ocmem_base) |
|---|
| 332 | | - ocmem_free(OCMEM_GRAPHICS, a4xx_gpu->ocmem_hdl); |
|---|
| 333 | | -#endif |
|---|
| 381 | + adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem); |
|---|
| 334 | 382 | |
|---|
| 335 | 383 | kfree(a4xx_gpu); |
|---|
| 336 | 384 | } |
|---|
| .. | .. |
|---|
| 455 | 503 | ~0 /* sentinel */ |
|---|
| 456 | 504 | }; |
|---|
| 457 | 505 | |
|---|
| 506 | +static const unsigned int a405_registers[] = { |
|---|
| 507 | + /* RBBM */ |
|---|
| 508 | + 0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026, |
|---|
| 509 | + 0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066, |
|---|
| 510 | + 0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF, |
|---|
| 511 | + /* CP */ |
|---|
| 512 | + 0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B, |
|---|
| 513 | + 0x0578, 0x058F, |
|---|
| 514 | + /* VSC */ |
|---|
| 515 | + 0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51, |
|---|
| 516 | + /* GRAS */ |
|---|
| 517 | + 0x0C80, 0x0C81, 0x0C88, 0x0C8F, |
|---|
| 518 | + /* RB */ |
|---|
| 519 | + 0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2, |
|---|
| 520 | + /* PC */ |
|---|
| 521 | + 0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23, |
|---|
| 522 | + /* VFD */ |
|---|
| 523 | + 0x0E40, 0x0E4A, |
|---|
| 524 | + /* VPC */ |
|---|
| 525 | + 0x0E60, 0x0E61, 0x0E63, 0x0E68, |
|---|
| 526 | + /* UCHE */ |
|---|
| 527 | + 0x0E80, 0x0E84, 0x0E88, 0x0E95, |
|---|
| 528 | + /* GRAS CTX 0 */ |
|---|
| 529 | + 0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E, |
|---|
| 530 | + /* PC CTX 0 */ |
|---|
| 531 | + 0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7, |
|---|
| 532 | + /* VFD CTX 0 */ |
|---|
| 533 | + 0x2200, 0x2204, 0x2208, 0x22A9, |
|---|
| 534 | + /* GRAS CTX 1 */ |
|---|
| 535 | + 0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E, |
|---|
| 536 | + /* PC CTX 1 */ |
|---|
| 537 | + 0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7, |
|---|
| 538 | + /* VFD CTX 1 */ |
|---|
| 539 | + 0x2600, 0x2604, 0x2608, 0x26A9, |
|---|
| 540 | + /* VBIF version 0x20050000*/ |
|---|
| 541 | + 0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036, |
|---|
| 542 | + 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049, |
|---|
| 543 | + 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D, |
|---|
| 544 | + 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098, |
|---|
| 545 | + 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0, |
|---|
| 546 | + 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108, |
|---|
| 547 | + 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125, |
|---|
| 548 | + 0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410, |
|---|
| 549 | + ~0 /* sentinel */ |
|---|
| 550 | +}; |
|---|
| 551 | + |
|---|
| 458 | 552 | static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu) |
|---|
| 459 | 553 | { |
|---|
| 460 | 554 | struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL); |
|---|
| .. | .. |
|---|
| 468 | 562 | |
|---|
| 469 | 563 | return state; |
|---|
| 470 | 564 | } |
|---|
| 471 | | - |
|---|
| 472 | | -/* Register offset defines for A4XX, in order of enum adreno_regs */ |
|---|
| 473 | | -static const unsigned int a4xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { |
|---|
| 474 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A4XX_CP_RB_BASE), |
|---|
| 475 | | - REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI), |
|---|
| 476 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A4XX_CP_RB_RPTR_ADDR), |
|---|
| 477 | | - REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI), |
|---|
| 478 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A4XX_CP_RB_RPTR), |
|---|
| 479 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A4XX_CP_RB_WPTR), |
|---|
| 480 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A4XX_CP_RB_CNTL), |
|---|
| 481 | | -}; |
|---|
| 482 | 565 | |
|---|
| 483 | 566 | static void a4xx_dump(struct msm_gpu *gpu) |
|---|
| 484 | 567 | { |
|---|
| .. | .. |
|---|
| 530 | 613 | return 0; |
|---|
| 531 | 614 | } |
|---|
| 532 | 615 | |
|---|
| 616 | +static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) |
|---|
| 617 | +{ |
|---|
| 618 | + ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR); |
|---|
| 619 | + return ring->memptrs->rptr; |
|---|
| 620 | +} |
|---|
| 621 | + |
|---|
| 533 | 622 | static const struct adreno_gpu_funcs funcs = { |
|---|
| 534 | 623 | .base = { |
|---|
| 535 | 624 | .get_param = adreno_get_param, |
|---|
| .. | .. |
|---|
| 537 | 626 | .pm_suspend = a4xx_pm_suspend, |
|---|
| 538 | 627 | .pm_resume = a4xx_pm_resume, |
|---|
| 539 | 628 | .recover = a4xx_recover, |
|---|
| 540 | | - .submit = adreno_submit, |
|---|
| 541 | | - .flush = adreno_flush, |
|---|
| 629 | + .submit = a4xx_submit, |
|---|
| 542 | 630 | .active_ring = adreno_active_ring, |
|---|
| 543 | 631 | .irq = a4xx_irq, |
|---|
| 544 | 632 | .destroy = a4xx_destroy, |
|---|
| .. | .. |
|---|
| 547 | 635 | #endif |
|---|
| 548 | 636 | .gpu_state_get = a4xx_gpu_state_get, |
|---|
| 549 | 637 | .gpu_state_put = adreno_gpu_state_put, |
|---|
| 638 | + .create_address_space = adreno_iommu_create_address_space, |
|---|
| 639 | + .get_rptr = a4xx_get_rptr, |
|---|
| 550 | 640 | }, |
|---|
| 551 | 641 | .get_timestamp = a4xx_get_timestamp, |
|---|
| 552 | 642 | }; |
|---|
| .. | .. |
|---|
| 561 | 651 | int ret; |
|---|
| 562 | 652 | |
|---|
| 563 | 653 | if (!pdev) { |
|---|
| 564 | | - dev_err(dev->dev, "no a4xx device\n"); |
|---|
| 654 | + DRM_DEV_ERROR(dev->dev, "no a4xx device\n"); |
|---|
| 565 | 655 | ret = -ENXIO; |
|---|
| 566 | 656 | goto fail; |
|---|
| 567 | 657 | } |
|---|
| .. | .. |
|---|
| 578 | 668 | gpu->perfcntrs = NULL; |
|---|
| 579 | 669 | gpu->num_perfcntrs = 0; |
|---|
| 580 | 670 | |
|---|
| 581 | | - adreno_gpu->registers = a4xx_registers; |
|---|
| 582 | | - adreno_gpu->reg_offsets = a4xx_register_offsets; |
|---|
| 583 | | - |
|---|
| 584 | 671 | ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); |
|---|
| 585 | 672 | if (ret) |
|---|
| 586 | 673 | goto fail; |
|---|
| 587 | 674 | |
|---|
| 588 | | - /* if needed, allocate gmem: */ |
|---|
| 589 | | - if (adreno_is_a4xx(adreno_gpu)) { |
|---|
| 590 | | -#ifdef CONFIG_MSM_OCMEM |
|---|
| 591 | | - /* TODO this is different/missing upstream: */ |
|---|
| 592 | | - struct ocmem_buf *ocmem_hdl = |
|---|
| 593 | | - ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem); |
|---|
| 675 | + adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers : |
|---|
| 676 | + a4xx_registers; |
|---|
| 594 | 677 | |
|---|
| 595 | | - a4xx_gpu->ocmem_hdl = ocmem_hdl; |
|---|
| 596 | | - a4xx_gpu->ocmem_base = ocmem_hdl->addr; |
|---|
| 597 | | - adreno_gpu->gmem = ocmem_hdl->len; |
|---|
| 598 | | - DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024, |
|---|
| 599 | | - a4xx_gpu->ocmem_base); |
|---|
| 600 | | -#endif |
|---|
| 601 | | - } |
|---|
| 678 | + /* if needed, allocate gmem: */ |
|---|
| 679 | + ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu, |
|---|
| 680 | + &a4xx_gpu->ocmem); |
|---|
| 681 | + if (ret) |
|---|
| 682 | + goto fail; |
|---|
| 602 | 683 | |
|---|
| 603 | 684 | if (!gpu->aspace) { |
|---|
| 604 | 685 | /* TODO we think it is possible to configure the GPU to |
|---|
| .. | .. |
|---|
| 608 | 689 | * to not be possible to restrict access, then we must |
|---|
| 609 | 690 | * implement a cmdstream validator. |
|---|
| 610 | 691 | */ |
|---|
| 611 | | - dev_err(dev->dev, "No memory protection without IOMMU\n"); |
|---|
| 692 | + DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n"); |
|---|
| 612 | 693 | ret = -ENXIO; |
|---|
| 613 | 694 | goto fail; |
|---|
| 614 | 695 | } |
|---|
| 615 | 696 | |
|---|
| 697 | + /* |
|---|
| 698 | + * Set the ICC path to maximum speed for now by multiplying the fastest |
|---|
| 699 | + * frequency by the bus width (8). We'll want to scale this later on to |
|---|
| 700 | + * improve battery life. |
|---|
| 701 | + */ |
|---|
| 702 | + icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); |
|---|
| 703 | + icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); |
|---|
| 704 | + |
|---|
| 616 | 705 | return gpu; |
|---|
| 617 | 706 | |
|---|
| 618 | 707 | fail: |
|---|