| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Copyright (C) 2013 Red Hat |
|---|
| 3 | 4 | * Author: Rob Clark <robdclark@gmail.com> |
|---|
| 4 | 5 | * |
|---|
| 5 | 6 | * Copyright (c) 2014 The Linux Foundation. All rights reserved. |
|---|
| 6 | | - * |
|---|
| 7 | | - * This program is free software; you can redistribute it and/or modify it |
|---|
| 8 | | - * under the terms of the GNU General Public License version 2 as published by |
|---|
| 9 | | - * the Free Software Foundation. |
|---|
| 10 | | - * |
|---|
| 11 | | - * This program is distributed in the hope that it will be useful, but WITHOUT |
|---|
| 12 | | - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|---|
| 13 | | - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|---|
| 14 | | - * more details. |
|---|
| 15 | | - * |
|---|
| 16 | | - * You should have received a copy of the GNU General Public License along with |
|---|
| 17 | | - * this program. If not, see <http://www.gnu.org/licenses/>. |
|---|
| 18 | 7 | */ |
|---|
| 19 | | - |
|---|
| 20 | | -#ifdef CONFIG_MSM_OCMEM |
|---|
| 21 | | -# include <mach/ocmem.h> |
|---|
| 22 | | -#endif |
|---|
| 23 | 8 | |
|---|
| 24 | 9 | #include "a3xx_gpu.h" |
|---|
| 25 | 10 | |
|---|
| .. | .. |
|---|
| 42 | 27 | |
|---|
| 43 | 28 | static void a3xx_dump(struct msm_gpu *gpu); |
|---|
| 44 | 29 | static bool a3xx_idle(struct msm_gpu *gpu); |
|---|
| 30 | + |
|---|
| 31 | +static void a3xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) |
|---|
| 32 | +{ |
|---|
| 33 | + struct msm_drm_private *priv = gpu->dev->dev_private; |
|---|
| 34 | + struct msm_ringbuffer *ring = submit->ring; |
|---|
| 35 | + unsigned int i; |
|---|
| 36 | + |
|---|
| 37 | + for (i = 0; i < submit->nr_cmds; i++) { |
|---|
| 38 | + switch (submit->cmd[i].type) { |
|---|
| 39 | + case MSM_SUBMIT_CMD_IB_TARGET_BUF: |
|---|
| 40 | + /* ignore IB-targets */ |
|---|
| 41 | + break; |
|---|
| 42 | + case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: |
|---|
| 43 | + /* ignore if there has not been a ctx switch: */ |
|---|
| 44 | + if (priv->lastctx == submit->queue->ctx) |
|---|
| 45 | + break; |
|---|
| 46 | + fallthrough; |
|---|
| 47 | + case MSM_SUBMIT_CMD_BUF: |
|---|
| 48 | + OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2); |
|---|
| 49 | + OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); |
|---|
| 50 | + OUT_RING(ring, submit->cmd[i].size); |
|---|
| 51 | + OUT_PKT2(ring); |
|---|
| 52 | + break; |
|---|
| 53 | + } |
|---|
| 54 | + } |
|---|
| 55 | + |
|---|
| 56 | + OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); |
|---|
| 57 | + OUT_RING(ring, submit->seqno); |
|---|
| 58 | + |
|---|
| 59 | + /* Flush HLSQ lazy updates to make sure there is nothing |
|---|
| 60 | + * pending for indirect loads after the timestamp has |
|---|
| 61 | + * passed: |
|---|
| 62 | + */ |
|---|
| 63 | + OUT_PKT3(ring, CP_EVENT_WRITE, 1); |
|---|
| 64 | + OUT_RING(ring, HLSQ_FLUSH); |
|---|
| 65 | + |
|---|
| 66 | + /* wait for idle before cache flush/interrupt */ |
|---|
| 67 | + OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); |
|---|
| 68 | + OUT_RING(ring, 0x00000000); |
|---|
| 69 | + |
|---|
| 70 | + /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */ |
|---|
| 71 | + OUT_PKT3(ring, CP_EVENT_WRITE, 3); |
|---|
| 72 | + OUT_RING(ring, CACHE_FLUSH_TS | BIT(31)); |
|---|
| 73 | + OUT_RING(ring, rbmemptr(ring, fence)); |
|---|
| 74 | + OUT_RING(ring, submit->seqno); |
|---|
| 75 | + |
|---|
| 76 | +#if 0 |
|---|
| 77 | + /* Dummy set-constant to trigger context rollover */ |
|---|
| 78 | + OUT_PKT3(ring, CP_SET_CONSTANT, 2); |
|---|
| 79 | + OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG)); |
|---|
| 80 | + OUT_RING(ring, 0x00000000); |
|---|
| 81 | +#endif |
|---|
| 82 | + |
|---|
| 83 | + adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR); |
|---|
| 84 | +} |
|---|
| 45 | 85 | |
|---|
| 46 | 86 | static bool a3xx_me_init(struct msm_gpu *gpu) |
|---|
| 47 | 87 | { |
|---|
| .. | .. |
|---|
| 66 | 106 | OUT_RING(ring, 0x00000000); |
|---|
| 67 | 107 | OUT_RING(ring, 0x00000000); |
|---|
| 68 | 108 | |
|---|
| 69 | | - gpu->funcs->flush(gpu, ring); |
|---|
| 109 | + adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR); |
|---|
| 70 | 110 | return a3xx_idle(gpu); |
|---|
| 71 | 111 | } |
|---|
| 72 | 112 | |
|---|
| .. | .. |
|---|
| 206 | 246 | gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000); |
|---|
| 207 | 247 | |
|---|
| 208 | 248 | /* Set the OCMEM base address for A330, etc */ |
|---|
| 209 | | - if (a3xx_gpu->ocmem_hdl) { |
|---|
| 249 | + if (a3xx_gpu->ocmem.hdl) { |
|---|
| 210 | 250 | gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR, |
|---|
| 211 | | - (unsigned int)(a3xx_gpu->ocmem_base >> 14)); |
|---|
| 251 | + (unsigned int)(a3xx_gpu->ocmem.base >> 14)); |
|---|
| 212 | 252 | } |
|---|
| 213 | 253 | |
|---|
| 214 | 254 | /* Turn on performance counters: */ |
|---|
| .. | .. |
|---|
| 225 | 265 | ret = adreno_hw_init(gpu); |
|---|
| 226 | 266 | if (ret) |
|---|
| 227 | 267 | return ret; |
|---|
| 268 | + |
|---|
| 269 | + /* |
|---|
| 270 | + * Use the default ringbuffer size and block size but disable the RPTR |
|---|
| 271 | + * shadow |
|---|
| 272 | + */ |
|---|
| 273 | + gpu_write(gpu, REG_AXXX_CP_RB_CNTL, |
|---|
| 274 | + MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); |
|---|
| 275 | + |
|---|
| 276 | + /* Set the ringbuffer address */ |
|---|
| 277 | + gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova)); |
|---|
| 228 | 278 | |
|---|
| 229 | 279 | /* setup access protection: */ |
|---|
| 230 | 280 | gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007); |
|---|
| .. | .. |
|---|
| 329 | 379 | |
|---|
| 330 | 380 | adreno_gpu_cleanup(adreno_gpu); |
|---|
| 331 | 381 | |
|---|
| 332 | | -#ifdef CONFIG_MSM_OCMEM |
|---|
| 333 | | - if (a3xx_gpu->ocmem_base) |
|---|
| 334 | | - ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl); |
|---|
| 335 | | -#endif |
|---|
| 382 | + adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem); |
|---|
| 336 | 383 | |
|---|
| 337 | 384 | kfree(a3xx_gpu); |
|---|
| 338 | 385 | } |
|---|
| .. | .. |
|---|
| 431 | 478 | return state; |
|---|
| 432 | 479 | } |
|---|
| 433 | 480 | |
|---|
| 434 | | -/* Register offset defines for A3XX */ |
|---|
| 435 | | -static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { |
|---|
| 436 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE), |
|---|
| 437 | | - REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI), |
|---|
| 438 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR), |
|---|
| 439 | | - REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI), |
|---|
| 440 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR), |
|---|
| 441 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR), |
|---|
| 442 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL), |
|---|
| 443 | | -}; |
|---|
| 481 | +static u32 a3xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) |
|---|
| 482 | +{ |
|---|
| 483 | + ring->memptrs->rptr = gpu_read(gpu, REG_AXXX_CP_RB_RPTR); |
|---|
| 484 | + return ring->memptrs->rptr; |
|---|
| 485 | +} |
|---|
| 444 | 486 | |
|---|
| 445 | 487 | static const struct adreno_gpu_funcs funcs = { |
|---|
| 446 | 488 | .base = { |
|---|
| .. | .. |
|---|
| 449 | 491 | .pm_suspend = msm_gpu_pm_suspend, |
|---|
| 450 | 492 | .pm_resume = msm_gpu_pm_resume, |
|---|
| 451 | 493 | .recover = a3xx_recover, |
|---|
| 452 | | - .submit = adreno_submit, |
|---|
| 453 | | - .flush = adreno_flush, |
|---|
| 494 | + .submit = a3xx_submit, |
|---|
| 454 | 495 | .active_ring = adreno_active_ring, |
|---|
| 455 | 496 | .irq = a3xx_irq, |
|---|
| 456 | 497 | .destroy = a3xx_destroy, |
|---|
| .. | .. |
|---|
| 459 | 500 | #endif |
|---|
| 460 | 501 | .gpu_state_get = a3xx_gpu_state_get, |
|---|
| 461 | 502 | .gpu_state_put = adreno_gpu_state_put, |
|---|
| 503 | + .create_address_space = adreno_iommu_create_address_space, |
|---|
| 504 | + .get_rptr = a3xx_get_rptr, |
|---|
| 462 | 505 | }, |
|---|
| 463 | 506 | }; |
|---|
| 464 | 507 | |
|---|
| .. | .. |
|---|
| 479 | 522 | int ret; |
|---|
| 480 | 523 | |
|---|
| 481 | 524 | if (!pdev) { |
|---|
| 482 | | - dev_err(dev->dev, "no a3xx device\n"); |
|---|
| 525 | + DRM_DEV_ERROR(dev->dev, "no a3xx device\n"); |
|---|
| 483 | 526 | ret = -ENXIO; |
|---|
| 484 | 527 | goto fail; |
|---|
| 485 | 528 | } |
|---|
| .. | .. |
|---|
| 497 | 540 | gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs); |
|---|
| 498 | 541 | |
|---|
| 499 | 542 | adreno_gpu->registers = a3xx_registers; |
|---|
| 500 | | - adreno_gpu->reg_offsets = a3xx_register_offsets; |
|---|
| 501 | 543 | |
|---|
| 502 | 544 | ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); |
|---|
| 503 | 545 | if (ret) |
|---|
| .. | .. |
|---|
| 505 | 547 | |
|---|
| 506 | 548 | /* if needed, allocate gmem: */ |
|---|
| 507 | 549 | if (adreno_is_a330(adreno_gpu)) { |
|---|
| 508 | | -#ifdef CONFIG_MSM_OCMEM |
|---|
| 509 | | - /* TODO this is different/missing upstream: */ |
|---|
| 510 | | - struct ocmem_buf *ocmem_hdl = |
|---|
| 511 | | - ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem); |
|---|
| 512 | | - |
|---|
| 513 | | - a3xx_gpu->ocmem_hdl = ocmem_hdl; |
|---|
| 514 | | - a3xx_gpu->ocmem_base = ocmem_hdl->addr; |
|---|
| 515 | | - adreno_gpu->gmem = ocmem_hdl->len; |
|---|
| 516 | | - DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024, |
|---|
| 517 | | - a3xx_gpu->ocmem_base); |
|---|
| 518 | | -#endif |
|---|
| 550 | + ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev, |
|---|
| 551 | + adreno_gpu, &a3xx_gpu->ocmem); |
|---|
| 552 | + if (ret) |
|---|
| 553 | + goto fail; |
|---|
| 519 | 554 | } |
|---|
| 520 | 555 | |
|---|
| 521 | 556 | if (!gpu->aspace) { |
|---|
| .. | .. |
|---|
| 526 | 561 | * to not be possible to restrict access, then we must |
|---|
| 527 | 562 | * implement a cmdstream validator. |
|---|
| 528 | 563 | */ |
|---|
| 529 | | - dev_err(dev->dev, "No memory protection without IOMMU\n"); |
|---|
| 564 | + DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n"); |
|---|
| 530 | 565 | ret = -ENXIO; |
|---|
| 531 | 566 | goto fail; |
|---|
| 532 | 567 | } |
|---|
| 533 | 568 | |
|---|
| 569 | + /* |
|---|
| 570 | + * Set the ICC path to maximum speed for now by multiplying the fastest |
|---|
| 571 | + * frequency by the bus width (8). We'll want to scale this later on to |
|---|
| 572 | + * improve battery life. |
|---|
| 573 | + */ |
|---|
| 574 | + icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); |
|---|
| 575 | + icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); |
|---|
| 576 | + |
|---|
| 534 | 577 | return gpu; |
|---|
| 535 | 578 | |
|---|
| 536 | 579 | fail: |
|---|