.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Copyright (C) 2013 Red Hat |
---|
3 | 4 | * Author: Rob Clark <robdclark@gmail.com> |
---|
4 | 5 | * |
---|
5 | 6 | * Copyright (c) 2014 The Linux Foundation. All rights reserved. |
---|
6 | | - * |
---|
7 | | - * This program is free software; you can redistribute it and/or modify it |
---|
8 | | - * under the terms of the GNU General Public License version 2 as published by |
---|
9 | | - * the Free Software Foundation. |
---|
10 | | - * |
---|
11 | | - * This program is distributed in the hope that it will be useful, but WITHOUT |
---|
12 | | - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
---|
13 | | - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
---|
14 | | - * more details. |
---|
15 | | - * |
---|
16 | | - * You should have received a copy of the GNU General Public License along with |
---|
17 | | - * this program. If not, see <http://www.gnu.org/licenses/>. |
---|
18 | 7 | */ |
---|
19 | | - |
---|
20 | | -#ifdef CONFIG_MSM_OCMEM |
---|
21 | | -# include <mach/ocmem.h> |
---|
22 | | -#endif |
---|
23 | 8 | |
---|
24 | 9 | #include "a3xx_gpu.h" |
---|
25 | 10 | |
---|
.. | .. |
---|
42 | 27 | |
---|
43 | 28 | static void a3xx_dump(struct msm_gpu *gpu); |
---|
44 | 29 | static bool a3xx_idle(struct msm_gpu *gpu); |
---|
| 30 | + |
---|
| 31 | +static void a3xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) |
---|
| 32 | +{ |
---|
| 33 | + struct msm_drm_private *priv = gpu->dev->dev_private; |
---|
| 34 | + struct msm_ringbuffer *ring = submit->ring; |
---|
| 35 | + unsigned int i; |
---|
| 36 | + |
---|
| 37 | + for (i = 0; i < submit->nr_cmds; i++) { |
---|
| 38 | + switch (submit->cmd[i].type) { |
---|
| 39 | + case MSM_SUBMIT_CMD_IB_TARGET_BUF: |
---|
| 40 | + /* ignore IB-targets */ |
---|
| 41 | + break; |
---|
| 42 | + case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: |
---|
| 43 | + /* ignore if there has not been a ctx switch: */ |
---|
| 44 | + if (priv->lastctx == submit->queue->ctx) |
---|
| 45 | + break; |
---|
| 46 | + fallthrough; |
---|
| 47 | + case MSM_SUBMIT_CMD_BUF: |
---|
| 48 | + OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2); |
---|
| 49 | + OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); |
---|
| 50 | + OUT_RING(ring, submit->cmd[i].size); |
---|
| 51 | + OUT_PKT2(ring); |
---|
| 52 | + break; |
---|
| 53 | + } |
---|
| 54 | + } |
---|
| 55 | + |
---|
| 56 | + OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); |
---|
| 57 | + OUT_RING(ring, submit->seqno); |
---|
| 58 | + |
---|
| 59 | + /* Flush HLSQ lazy updates to make sure there is nothing |
---|
| 60 | + * pending for indirect loads after the timestamp has |
---|
| 61 | + * passed: |
---|
| 62 | + */ |
---|
| 63 | + OUT_PKT3(ring, CP_EVENT_WRITE, 1); |
---|
| 64 | + OUT_RING(ring, HLSQ_FLUSH); |
---|
| 65 | + |
---|
| 66 | + /* wait for idle before cache flush/interrupt */ |
---|
| 67 | + OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); |
---|
| 68 | + OUT_RING(ring, 0x00000000); |
---|
| 69 | + |
---|
| 70 | + /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */ |
---|
| 71 | + OUT_PKT3(ring, CP_EVENT_WRITE, 3); |
---|
| 72 | + OUT_RING(ring, CACHE_FLUSH_TS | BIT(31)); |
---|
| 73 | + OUT_RING(ring, rbmemptr(ring, fence)); |
---|
| 74 | + OUT_RING(ring, submit->seqno); |
---|
| 75 | + |
---|
| 76 | +#if 0 |
---|
| 77 | + /* Dummy set-constant to trigger context rollover */ |
---|
| 78 | + OUT_PKT3(ring, CP_SET_CONSTANT, 2); |
---|
| 79 | + OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG)); |
---|
| 80 | + OUT_RING(ring, 0x00000000); |
---|
| 81 | +#endif |
---|
| 82 | + |
---|
| 83 | + adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR); |
---|
| 84 | +} |
---|
45 | 85 | |
---|
46 | 86 | static bool a3xx_me_init(struct msm_gpu *gpu) |
---|
47 | 87 | { |
---|
.. | .. |
---|
66 | 106 | OUT_RING(ring, 0x00000000); |
---|
67 | 107 | OUT_RING(ring, 0x00000000); |
---|
68 | 108 | |
---|
69 | | - gpu->funcs->flush(gpu, ring); |
---|
| 109 | + adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR); |
---|
70 | 110 | return a3xx_idle(gpu); |
---|
71 | 111 | } |
---|
72 | 112 | |
---|
.. | .. |
---|
206 | 246 | gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000); |
---|
207 | 247 | |
---|
208 | 248 | /* Set the OCMEM base address for A330, etc */ |
---|
209 | | - if (a3xx_gpu->ocmem_hdl) { |
---|
| 249 | + if (a3xx_gpu->ocmem.hdl) { |
---|
210 | 250 | gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR, |
---|
211 | | - (unsigned int)(a3xx_gpu->ocmem_base >> 14)); |
---|
| 251 | + (unsigned int)(a3xx_gpu->ocmem.base >> 14)); |
---|
212 | 252 | } |
---|
213 | 253 | |
---|
214 | 254 | /* Turn on performance counters: */ |
---|
.. | .. |
---|
225 | 265 | ret = adreno_hw_init(gpu); |
---|
226 | 266 | if (ret) |
---|
227 | 267 | return ret; |
---|
| 268 | + |
---|
| 269 | + /* |
---|
| 270 | + * Use the default ringbuffer size and block size but disable the RPTR |
---|
| 271 | + * shadow |
---|
| 272 | + */ |
---|
| 273 | + gpu_write(gpu, REG_AXXX_CP_RB_CNTL, |
---|
| 274 | + MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); |
---|
| 275 | + |
---|
| 276 | + /* Set the ringbuffer address */ |
---|
| 277 | + gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova)); |
---|
228 | 278 | |
---|
229 | 279 | /* setup access protection: */ |
---|
230 | 280 | gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007); |
---|
.. | .. |
---|
329 | 379 | |
---|
330 | 380 | adreno_gpu_cleanup(adreno_gpu); |
---|
331 | 381 | |
---|
332 | | -#ifdef CONFIG_MSM_OCMEM |
---|
333 | | - if (a3xx_gpu->ocmem_base) |
---|
334 | | - ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl); |
---|
335 | | -#endif |
---|
| 382 | + adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem); |
---|
336 | 383 | |
---|
337 | 384 | kfree(a3xx_gpu); |
---|
338 | 385 | } |
---|
.. | .. |
---|
431 | 478 | return state; |
---|
432 | 479 | } |
---|
433 | 480 | |
---|
434 | | -/* Register offset defines for A3XX */ |
---|
435 | | -static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { |
---|
436 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE), |
---|
437 | | - REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI), |
---|
438 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR), |
---|
439 | | - REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI), |
---|
440 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR), |
---|
441 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR), |
---|
442 | | - REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL), |
---|
443 | | -}; |
---|
| 481 | +static u32 a3xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) |
---|
| 482 | +{ |
---|
| 483 | + ring->memptrs->rptr = gpu_read(gpu, REG_AXXX_CP_RB_RPTR); |
---|
| 484 | + return ring->memptrs->rptr; |
---|
| 485 | +} |
---|
444 | 486 | |
---|
445 | 487 | static const struct adreno_gpu_funcs funcs = { |
---|
446 | 488 | .base = { |
---|
.. | .. |
---|
449 | 491 | .pm_suspend = msm_gpu_pm_suspend, |
---|
450 | 492 | .pm_resume = msm_gpu_pm_resume, |
---|
451 | 493 | .recover = a3xx_recover, |
---|
452 | | - .submit = adreno_submit, |
---|
453 | | - .flush = adreno_flush, |
---|
| 494 | + .submit = a3xx_submit, |
---|
454 | 495 | .active_ring = adreno_active_ring, |
---|
455 | 496 | .irq = a3xx_irq, |
---|
456 | 497 | .destroy = a3xx_destroy, |
---|
.. | .. |
---|
459 | 500 | #endif |
---|
460 | 501 | .gpu_state_get = a3xx_gpu_state_get, |
---|
461 | 502 | .gpu_state_put = adreno_gpu_state_put, |
---|
| 503 | + .create_address_space = adreno_iommu_create_address_space, |
---|
| 504 | + .get_rptr = a3xx_get_rptr, |
---|
462 | 505 | }, |
---|
463 | 506 | }; |
---|
464 | 507 | |
---|
.. | .. |
---|
479 | 522 | int ret; |
---|
480 | 523 | |
---|
481 | 524 | if (!pdev) { |
---|
482 | | - dev_err(dev->dev, "no a3xx device\n"); |
---|
| 525 | + DRM_DEV_ERROR(dev->dev, "no a3xx device\n"); |
---|
483 | 526 | ret = -ENXIO; |
---|
484 | 527 | goto fail; |
---|
485 | 528 | } |
---|
.. | .. |
---|
497 | 540 | gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs); |
---|
498 | 541 | |
---|
499 | 542 | adreno_gpu->registers = a3xx_registers; |
---|
500 | | - adreno_gpu->reg_offsets = a3xx_register_offsets; |
---|
501 | 543 | |
---|
502 | 544 | ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); |
---|
503 | 545 | if (ret) |
---|
.. | .. |
---|
505 | 547 | |
---|
506 | 548 | /* if needed, allocate gmem: */ |
---|
507 | 549 | if (adreno_is_a330(adreno_gpu)) { |
---|
508 | | -#ifdef CONFIG_MSM_OCMEM |
---|
509 | | - /* TODO this is different/missing upstream: */ |
---|
510 | | - struct ocmem_buf *ocmem_hdl = |
---|
511 | | - ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem); |
---|
512 | | - |
---|
513 | | - a3xx_gpu->ocmem_hdl = ocmem_hdl; |
---|
514 | | - a3xx_gpu->ocmem_base = ocmem_hdl->addr; |
---|
515 | | - adreno_gpu->gmem = ocmem_hdl->len; |
---|
516 | | - DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024, |
---|
517 | | - a3xx_gpu->ocmem_base); |
---|
518 | | -#endif |
---|
| 550 | + ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev, |
---|
| 551 | + adreno_gpu, &a3xx_gpu->ocmem); |
---|
| 552 | + if (ret) |
---|
| 553 | + goto fail; |
---|
519 | 554 | } |
---|
520 | 555 | |
---|
521 | 556 | if (!gpu->aspace) { |
---|
.. | .. |
---|
526 | 561 | * to not be possible to restrict access, then we must |
---|
527 | 562 | * implement a cmdstream validator. |
---|
528 | 563 | */ |
---|
529 | | - dev_err(dev->dev, "No memory protection without IOMMU\n"); |
---|
| 564 | + DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n"); |
---|
530 | 565 | ret = -ENXIO; |
---|
531 | 566 | goto fail; |
---|
532 | 567 | } |
---|
533 | 568 | |
---|
| 569 | + /* |
---|
| 570 | + * Set the ICC path to maximum speed for now by multiplying the fastest |
---|
| 571 | + * frequency by the bus width (8). We'll want to scale this later on to |
---|
| 572 | + * improve battery life. |
---|
| 573 | + */ |
---|
| 574 | + icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); |
---|
| 575 | + icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8); |
---|
| 576 | + |
---|
534 | 577 | return gpu; |
---|
535 | 578 | |
---|
536 | 579 | fail: |
---|