| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Copyright (C) 2013 Red Hat |
|---|
| 3 | 4 | * Author: Rob Clark <robdclark@gmail.com> |
|---|
| 4 | | - * |
|---|
| 5 | | - * This program is free software; you can redistribute it and/or modify it |
|---|
| 6 | | - * under the terms of the GNU General Public License version 2 as published by |
|---|
| 7 | | - * the Free Software Foundation. |
|---|
| 8 | | - * |
|---|
| 9 | | - * This program is distributed in the hope that it will be useful, but WITHOUT |
|---|
| 10 | | - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|---|
| 11 | | - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|---|
| 12 | | - * more details. |
|---|
| 13 | | - * |
|---|
| 14 | | - * You should have received a copy of the GNU General Public License along with |
|---|
| 15 | | - * this program. If not, see <http://www.gnu.org/licenses/>. |
|---|
| 16 | 5 | */ |
|---|
| 17 | 6 | |
|---|
| 18 | 7 | #include "msm_gpu.h" |
|---|
| 19 | 8 | #include "msm_gem.h" |
|---|
| 20 | 9 | #include "msm_mmu.h" |
|---|
| 21 | 10 | #include "msm_fence.h" |
|---|
| 11 | +#include "msm_gpu_trace.h" |
|---|
| 12 | +#include "adreno/adreno_gpu.h" |
|---|
| 22 | 13 | |
|---|
| 23 | 14 | #include <generated/utsrelease.h> |
|---|
| 24 | 15 | #include <linux/string_helpers.h> |
|---|
| 25 | | -#include <linux/pm_opp.h> |
|---|
| 26 | 16 | #include <linux/devfreq.h> |
|---|
| 27 | 17 | #include <linux/devcoredump.h> |
|---|
| 28 | 18 | #include <linux/sched/task.h> |
|---|
| .. | .. |
|---|
| 34 | 24 | static int msm_devfreq_target(struct device *dev, unsigned long *freq, |
|---|
| 35 | 25 | u32 flags) |
|---|
| 36 | 26 | { |
|---|
| 37 | | - struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev)); |
|---|
| 27 | + struct msm_gpu *gpu = dev_to_gpu(dev); |
|---|
| 38 | 28 | struct dev_pm_opp *opp; |
|---|
| 39 | 29 | |
|---|
| 40 | 30 | opp = devfreq_recommended_opp(dev, freq, flags); |
|---|
| .. | .. |
|---|
| 42 | 32 | if (IS_ERR(opp)) |
|---|
| 43 | 33 | return PTR_ERR(opp); |
|---|
| 44 | 34 | |
|---|
| 45 | | - clk_set_rate(gpu->core_clk, *freq); |
|---|
| 35 | + trace_msm_gpu_freq_change(dev_pm_opp_get_freq(opp)); |
|---|
| 36 | + |
|---|
| 37 | + if (gpu->funcs->gpu_set_freq) |
|---|
| 38 | + gpu->funcs->gpu_set_freq(gpu, opp); |
|---|
| 39 | + else |
|---|
| 40 | + clk_set_rate(gpu->core_clk, *freq); |
|---|
| 41 | + |
|---|
| 46 | 42 | dev_pm_opp_put(opp); |
|---|
| 47 | 43 | |
|---|
| 48 | 44 | return 0; |
|---|
| .. | .. |
|---|
| 51 | 47 | static int msm_devfreq_get_dev_status(struct device *dev, |
|---|
| 52 | 48 | struct devfreq_dev_status *status) |
|---|
| 53 | 49 | { |
|---|
| 54 | | - struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev)); |
|---|
| 55 | | - u64 cycles; |
|---|
| 56 | | - u32 freq = ((u32) status->current_frequency) / 1000000; |
|---|
| 50 | + struct msm_gpu *gpu = dev_to_gpu(dev); |
|---|
| 57 | 51 | ktime_t time; |
|---|
| 58 | 52 | |
|---|
| 59 | | - status->current_frequency = (unsigned long) clk_get_rate(gpu->core_clk); |
|---|
| 60 | | - gpu->funcs->gpu_busy(gpu, &cycles); |
|---|
| 53 | + if (gpu->funcs->gpu_get_freq) |
|---|
| 54 | + status->current_frequency = gpu->funcs->gpu_get_freq(gpu); |
|---|
| 55 | + else |
|---|
| 56 | + status->current_frequency = clk_get_rate(gpu->core_clk); |
|---|
| 61 | 57 | |
|---|
| 62 | | - status->busy_time = ((u32) (cycles - gpu->devfreq.busy_cycles)) / freq; |
|---|
| 63 | | - |
|---|
| 64 | | - gpu->devfreq.busy_cycles = cycles; |
|---|
| 58 | + status->busy_time = gpu->funcs->gpu_busy(gpu); |
|---|
| 65 | 59 | |
|---|
| 66 | 60 | time = ktime_get(); |
|---|
| 67 | 61 | status->total_time = ktime_us_delta(time, gpu->devfreq.time); |
|---|
| .. | .. |
|---|
| 72 | 66 | |
|---|
| 73 | 67 | static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) |
|---|
| 74 | 68 | { |
|---|
| 75 | | - struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev)); |
|---|
| 69 | + struct msm_gpu *gpu = dev_to_gpu(dev); |
|---|
| 76 | 70 | |
|---|
| 77 | | - *freq = (unsigned long) clk_get_rate(gpu->core_clk); |
|---|
| 71 | + if (gpu->funcs->gpu_get_freq) |
|---|
| 72 | + *freq = gpu->funcs->gpu_get_freq(gpu); |
|---|
| 73 | + else |
|---|
| 74 | + *freq = clk_get_rate(gpu->core_clk); |
|---|
| 78 | 75 | |
|---|
| 79 | 76 | return 0; |
|---|
| 80 | 77 | } |
|---|
| .. | .. |
|---|
| 89 | 86 | static void msm_devfreq_init(struct msm_gpu *gpu) |
|---|
| 90 | 87 | { |
|---|
| 91 | 88 | /* We need target support to do devfreq */ |
|---|
| 92 | | - if (!gpu->funcs->gpu_busy || !gpu->core_clk) |
|---|
| 89 | + if (!gpu->funcs->gpu_busy) |
|---|
| 93 | 90 | return; |
|---|
| 94 | 91 | |
|---|
| 95 | 92 | msm_devfreq_profile.initial_freq = gpu->fast_rate; |
|---|
| .. | .. |
|---|
| 97 | 94 | /* |
|---|
| 98 | 95 | * Don't set the freq_table or max_state and let devfreq build the table |
|---|
| 99 | 96 | * from OPP |
|---|
| 97 | + * After a deferred probe, these may have be left to non-zero values, |
|---|
| 98 | + * so set them back to zero before creating the devfreq device |
|---|
| 100 | 99 | */ |
|---|
| 100 | + msm_devfreq_profile.freq_table = NULL; |
|---|
| 101 | + msm_devfreq_profile.max_state = 0; |
|---|
| 101 | 102 | |
|---|
| 102 | 103 | gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev, |
|---|
| 103 | | - &msm_devfreq_profile, "simple_ondemand", NULL); |
|---|
| 104 | + &msm_devfreq_profile, DEVFREQ_GOV_SIMPLE_ONDEMAND, |
|---|
| 105 | + NULL); |
|---|
| 104 | 106 | |
|---|
| 105 | 107 | if (IS_ERR(gpu->devfreq.devfreq)) { |
|---|
| 106 | | - dev_err(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n"); |
|---|
| 108 | + DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n"); |
|---|
| 107 | 109 | gpu->devfreq.devfreq = NULL; |
|---|
| 108 | 110 | } |
|---|
| 111 | + |
|---|
| 112 | + devfreq_suspend_device(gpu->devfreq.devfreq); |
|---|
| 109 | 113 | } |
|---|
| 110 | 114 | |
|---|
| 111 | 115 | static int enable_pwrrail(struct msm_gpu *gpu) |
|---|
| .. | .. |
|---|
| 116 | 120 | if (gpu->gpu_reg) { |
|---|
| 117 | 121 | ret = regulator_enable(gpu->gpu_reg); |
|---|
| 118 | 122 | if (ret) { |
|---|
| 119 | | - dev_err(dev->dev, "failed to enable 'gpu_reg': %d\n", ret); |
|---|
| 123 | + DRM_DEV_ERROR(dev->dev, "failed to enable 'gpu_reg': %d\n", ret); |
|---|
| 120 | 124 | return ret; |
|---|
| 121 | 125 | } |
|---|
| 122 | 126 | } |
|---|
| .. | .. |
|---|
| 124 | 128 | if (gpu->gpu_cx) { |
|---|
| 125 | 129 | ret = regulator_enable(gpu->gpu_cx); |
|---|
| 126 | 130 | if (ret) { |
|---|
| 127 | | - dev_err(dev->dev, "failed to enable 'gpu_cx': %d\n", ret); |
|---|
| 131 | + DRM_DEV_ERROR(dev->dev, "failed to enable 'gpu_cx': %d\n", ret); |
|---|
| 128 | 132 | return ret; |
|---|
| 129 | 133 | } |
|---|
| 130 | 134 | } |
|---|
| .. | .. |
|---|
| 185 | 189 | return 0; |
|---|
| 186 | 190 | } |
|---|
| 187 | 191 | |
|---|
| 192 | +void msm_gpu_resume_devfreq(struct msm_gpu *gpu) |
|---|
| 193 | +{ |
|---|
| 194 | + gpu->devfreq.busy_cycles = 0; |
|---|
| 195 | + gpu->devfreq.time = ktime_get(); |
|---|
| 196 | + |
|---|
| 197 | + devfreq_resume_device(gpu->devfreq.devfreq); |
|---|
| 198 | +} |
|---|
| 199 | + |
|---|
| 188 | 200 | int msm_gpu_pm_resume(struct msm_gpu *gpu) |
|---|
| 189 | 201 | { |
|---|
| 190 | 202 | int ret; |
|---|
| 191 | 203 | |
|---|
| 192 | 204 | DBG("%s", gpu->name); |
|---|
| 205 | + trace_msm_gpu_resume(0); |
|---|
| 193 | 206 | |
|---|
| 194 | 207 | ret = enable_pwrrail(gpu); |
|---|
| 195 | 208 | if (ret) |
|---|
| .. | .. |
|---|
| 203 | 216 | if (ret) |
|---|
| 204 | 217 | return ret; |
|---|
| 205 | 218 | |
|---|
| 206 | | - if (gpu->devfreq.devfreq) { |
|---|
| 207 | | - gpu->devfreq.busy_cycles = 0; |
|---|
| 208 | | - gpu->devfreq.time = ktime_get(); |
|---|
| 209 | | - |
|---|
| 210 | | - devfreq_resume_device(gpu->devfreq.devfreq); |
|---|
| 211 | | - } |
|---|
| 219 | + msm_gpu_resume_devfreq(gpu); |
|---|
| 212 | 220 | |
|---|
| 213 | 221 | gpu->needs_hw_init = true; |
|---|
| 214 | 222 | |
|---|
| .. | .. |
|---|
| 220 | 228 | int ret; |
|---|
| 221 | 229 | |
|---|
| 222 | 230 | DBG("%s", gpu->name); |
|---|
| 231 | + trace_msm_gpu_suspend(0); |
|---|
| 223 | 232 | |
|---|
| 224 | | - if (gpu->devfreq.devfreq) |
|---|
| 225 | | - devfreq_suspend_device(gpu->devfreq.devfreq); |
|---|
| 233 | + devfreq_suspend_device(gpu->devfreq.devfreq); |
|---|
| 226 | 234 | |
|---|
| 227 | 235 | ret = disable_axi(gpu); |
|---|
| 228 | 236 | if (ret) |
|---|
| .. | .. |
|---|
| 307 | 315 | struct msm_gpu_state_bo *state_bo = &state->bos[state->nr_bos]; |
|---|
| 308 | 316 | |
|---|
| 309 | 317 | /* Don't record write only objects */ |
|---|
| 310 | | - |
|---|
| 311 | 318 | state_bo->size = obj->base.size; |
|---|
| 312 | 319 | state_bo->iova = iova; |
|---|
| 313 | 320 | |
|---|
| 314 | | - /* Only store the data for buffer objects marked for read */ |
|---|
| 315 | | - if ((flags & MSM_SUBMIT_BO_READ)) { |
|---|
| 321 | + /* Only store data for non imported buffer objects marked for read */ |
|---|
| 322 | + if ((flags & MSM_SUBMIT_BO_READ) && !obj->base.import_attach) { |
|---|
| 316 | 323 | void *ptr; |
|---|
| 317 | 324 | |
|---|
| 318 | 325 | state_bo->data = kvmalloc(obj->base.size, GFP_KERNEL); |
|---|
| 319 | 326 | if (!state_bo->data) |
|---|
| 320 | | - return; |
|---|
| 327 | + goto out; |
|---|
| 321 | 328 | |
|---|
| 322 | 329 | ptr = msm_gem_get_vaddr_active(&obj->base); |
|---|
| 323 | 330 | if (IS_ERR(ptr)) { |
|---|
| 324 | 331 | kvfree(state_bo->data); |
|---|
| 325 | | - return; |
|---|
| 332 | + state_bo->data = NULL; |
|---|
| 333 | + goto out; |
|---|
| 326 | 334 | } |
|---|
| 327 | 335 | |
|---|
| 328 | 336 | memcpy(state_bo->data, ptr, obj->base.size); |
|---|
| 329 | 337 | msm_gem_put_vaddr(&obj->base); |
|---|
| 330 | 338 | } |
|---|
| 331 | | - |
|---|
| 339 | +out: |
|---|
| 332 | 340 | state->nr_bos++; |
|---|
| 333 | 341 | } |
|---|
| 334 | 342 | |
|---|
| .. | .. |
|---|
| 336 | 344 | struct msm_gem_submit *submit, char *comm, char *cmd) |
|---|
| 337 | 345 | { |
|---|
| 338 | 346 | struct msm_gpu_state *state; |
|---|
| 347 | + |
|---|
| 348 | + /* Check if the target supports capturing crash state */ |
|---|
| 349 | + if (!gpu->funcs->gpu_state_get) |
|---|
| 350 | + return; |
|---|
| 339 | 351 | |
|---|
| 340 | 352 | /* Only save one crash state at a time */ |
|---|
| 341 | 353 | if (gpu->crashstate) |
|---|
| .. | .. |
|---|
| 350 | 362 | state->cmd = kstrdup(cmd, GFP_KERNEL); |
|---|
| 351 | 363 | |
|---|
| 352 | 364 | if (submit) { |
|---|
| 353 | | - int i; |
|---|
| 365 | + int i, nr = 0; |
|---|
| 354 | 366 | |
|---|
| 355 | | - state->bos = kcalloc(submit->nr_bos, |
|---|
| 367 | + /* count # of buffers to dump: */ |
|---|
| 368 | + for (i = 0; i < submit->nr_bos; i++) |
|---|
| 369 | + if (should_dump(submit, i)) |
|---|
| 370 | + nr++; |
|---|
| 371 | + /* always dump cmd bo's, but don't double count them: */ |
|---|
| 372 | + for (i = 0; i < submit->nr_cmds; i++) |
|---|
| 373 | + if (!should_dump(submit, submit->cmd[i].idx)) |
|---|
| 374 | + nr++; |
|---|
| 375 | + |
|---|
| 376 | + state->bos = kcalloc(nr, |
|---|
| 356 | 377 | sizeof(struct msm_gpu_state_bo), GFP_KERNEL); |
|---|
| 357 | 378 | |
|---|
| 358 | | - for (i = 0; state->bos && i < submit->nr_bos; i++) |
|---|
| 359 | | - msm_gpu_crashstate_get_bo(state, submit->bos[i].obj, |
|---|
| 360 | | - submit->bos[i].iova, submit->bos[i].flags); |
|---|
| 379 | + for (i = 0; state->bos && i < submit->nr_bos; i++) { |
|---|
| 380 | + if (should_dump(submit, i)) { |
|---|
| 381 | + msm_gpu_crashstate_get_bo(state, submit->bos[i].obj, |
|---|
| 382 | + submit->bos[i].iova, submit->bos[i].flags); |
|---|
| 383 | + } |
|---|
| 384 | + } |
|---|
| 385 | + |
|---|
| 386 | + for (i = 0; state->bos && i < submit->nr_cmds; i++) { |
|---|
| 387 | + int idx = submit->cmd[i].idx; |
|---|
| 388 | + |
|---|
| 389 | + if (!should_dump(submit, submit->cmd[i].idx)) { |
|---|
| 390 | + msm_gpu_crashstate_get_bo(state, submit->bos[idx].obj, |
|---|
| 391 | + submit->bos[idx].iova, submit->bos[idx].flags); |
|---|
| 392 | + } |
|---|
| 393 | + } |
|---|
| 361 | 394 | } |
|---|
| 362 | 395 | |
|---|
| 363 | 396 | /* Set the active crash state to be dumped on failure */ |
|---|
| .. | .. |
|---|
| 420 | 453 | |
|---|
| 421 | 454 | mutex_lock(&dev->struct_mutex); |
|---|
| 422 | 455 | |
|---|
| 423 | | - dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); |
|---|
| 456 | + DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n", gpu->name); |
|---|
| 424 | 457 | |
|---|
| 425 | 458 | submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); |
|---|
| 426 | 459 | if (submit) { |
|---|
| 427 | 460 | struct task_struct *task; |
|---|
| 428 | 461 | |
|---|
| 462 | + /* Increment the fault counts */ |
|---|
| 463 | + gpu->global_faults++; |
|---|
| 464 | + submit->queue->faults++; |
|---|
| 465 | + |
|---|
| 429 | 466 | task = get_pid_task(submit->pid, PIDTYPE_PID); |
|---|
| 430 | 467 | if (task) { |
|---|
| 431 | 468 | comm = kstrdup(task->comm, GFP_KERNEL); |
|---|
| 432 | | - |
|---|
| 433 | | - /* |
|---|
| 434 | | - * So slightly annoying, in other paths like |
|---|
| 435 | | - * mmap'ing gem buffers, mmap_sem is acquired |
|---|
| 436 | | - * before struct_mutex, which means we can't |
|---|
| 437 | | - * hold struct_mutex across the call to |
|---|
| 438 | | - * get_cmdline(). But submits are retired |
|---|
| 439 | | - * from the same in-order workqueue, so we can |
|---|
| 440 | | - * safely drop the lock here without worrying |
|---|
| 441 | | - * about the submit going away. |
|---|
| 442 | | - */ |
|---|
| 443 | | - mutex_unlock(&dev->struct_mutex); |
|---|
| 444 | 469 | cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL); |
|---|
| 445 | 470 | put_task_struct(task); |
|---|
| 446 | | - mutex_lock(&dev->struct_mutex); |
|---|
| 447 | 471 | } |
|---|
| 448 | 472 | |
|---|
| 449 | 473 | if (comm && cmd) { |
|---|
| 450 | | - dev_err(dev->dev, "%s: offending task: %s (%s)\n", |
|---|
| 474 | + DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n", |
|---|
| 451 | 475 | gpu->name, comm, cmd); |
|---|
| 452 | 476 | |
|---|
| 453 | 477 | msm_rd_dump_submit(priv->hangrd, submit, |
|---|
| .. | .. |
|---|
| 500 | 524 | struct msm_ringbuffer *ring = gpu->rb[i]; |
|---|
| 501 | 525 | |
|---|
| 502 | 526 | list_for_each_entry(submit, &ring->submits, node) |
|---|
| 503 | | - gpu->funcs->submit(gpu, submit, NULL); |
|---|
| 527 | + gpu->funcs->submit(gpu, submit); |
|---|
| 504 | 528 | } |
|---|
| 505 | 529 | } |
|---|
| 506 | 530 | |
|---|
| .. | .. |
|---|
| 530 | 554 | } else if (fence < ring->seqno) { |
|---|
| 531 | 555 | /* no progress and not done.. hung! */ |
|---|
| 532 | 556 | ring->hangcheck_fence = fence; |
|---|
| 533 | | - dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", |
|---|
| 557 | + DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", |
|---|
| 534 | 558 | gpu->name, ring->id); |
|---|
| 535 | | - dev_err(dev->dev, "%s: completed fence: %u\n", |
|---|
| 559 | + DRM_DEV_ERROR(dev->dev, "%s: completed fence: %u\n", |
|---|
| 536 | 560 | gpu->name, fence); |
|---|
| 537 | | - dev_err(dev->dev, "%s: submitted fence: %u\n", |
|---|
| 561 | + DRM_DEV_ERROR(dev->dev, "%s: submitted fence: %u\n", |
|---|
| 538 | 562 | gpu->name, ring->seqno); |
|---|
| 539 | 563 | |
|---|
| 540 | 564 | queue_work(priv->wq, &gpu->recover_work); |
|---|
| .. | .. |
|---|
| 650 | 674 | * Cmdstream submission/retirement: |
|---|
| 651 | 675 | */ |
|---|
| 652 | 676 | |
|---|
| 653 | | -static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) |
|---|
| 677 | +static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, |
|---|
| 678 | + struct msm_gem_submit *submit) |
|---|
| 654 | 679 | { |
|---|
| 680 | + int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; |
|---|
| 681 | + volatile struct msm_gpu_submit_stats *stats; |
|---|
| 682 | + u64 elapsed, clock = 0; |
|---|
| 655 | 683 | int i; |
|---|
| 684 | + |
|---|
| 685 | + stats = &ring->memptrs->stats[index]; |
|---|
| 686 | + /* Convert 19.2Mhz alwayson ticks to nanoseconds for elapsed time */ |
|---|
| 687 | + elapsed = (stats->alwayson_end - stats->alwayson_start) * 10000; |
|---|
| 688 | + do_div(elapsed, 192); |
|---|
| 689 | + |
|---|
| 690 | + /* Calculate the clock frequency from the number of CP cycles */ |
|---|
| 691 | + if (elapsed) { |
|---|
| 692 | + clock = (stats->cpcycles_end - stats->cpcycles_start) * 1000; |
|---|
| 693 | + do_div(clock, elapsed); |
|---|
| 694 | + } |
|---|
| 695 | + |
|---|
| 696 | + trace_msm_gpu_submit_retired(submit, elapsed, clock, |
|---|
| 697 | + stats->alwayson_start, stats->alwayson_end); |
|---|
| 656 | 698 | |
|---|
| 657 | 699 | for (i = 0; i < submit->nr_bos; i++) { |
|---|
| 658 | 700 | struct msm_gem_object *msm_obj = submit->bos[i].obj; |
|---|
| 659 | | - /* move to inactive: */ |
|---|
| 660 | | - msm_gem_move_to_inactive(&msm_obj->base); |
|---|
| 661 | | - msm_gem_put_iova(&msm_obj->base, gpu->aspace); |
|---|
| 662 | | - drm_gem_object_put(&msm_obj->base); |
|---|
| 701 | + |
|---|
| 702 | + msm_gem_active_put(&msm_obj->base); |
|---|
| 703 | + msm_gem_unpin_iova(&msm_obj->base, submit->aspace); |
|---|
| 704 | + drm_gem_object_put_locked(&msm_obj->base); |
|---|
| 663 | 705 | } |
|---|
| 664 | 706 | |
|---|
| 665 | 707 | pm_runtime_mark_last_busy(&gpu->pdev->dev); |
|---|
| .. | .. |
|---|
| 681 | 723 | |
|---|
| 682 | 724 | list_for_each_entry_safe(submit, tmp, &ring->submits, node) { |
|---|
| 683 | 725 | if (dma_fence_is_signaled(submit->fence)) |
|---|
| 684 | | - retire_submit(gpu, submit); |
|---|
| 726 | + retire_submit(gpu, ring, submit); |
|---|
| 685 | 727 | } |
|---|
| 686 | 728 | } |
|---|
| 687 | 729 | } |
|---|
| .. | .. |
|---|
| 709 | 751 | } |
|---|
| 710 | 752 | |
|---|
| 711 | 753 | /* add bo's to gpu's ring, and kick gpu: */ |
|---|
| 712 | | -void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, |
|---|
| 713 | | - struct msm_file_private *ctx) |
|---|
| 754 | +void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) |
|---|
| 714 | 755 | { |
|---|
| 715 | 756 | struct drm_device *dev = gpu->dev; |
|---|
| 716 | 757 | struct msm_drm_private *priv = dev->dev_private; |
|---|
| .. | .. |
|---|
| 733 | 774 | |
|---|
| 734 | 775 | for (i = 0; i < submit->nr_bos; i++) { |
|---|
| 735 | 776 | struct msm_gem_object *msm_obj = submit->bos[i].obj; |
|---|
| 777 | + struct drm_gem_object *drm_obj = &msm_obj->base; |
|---|
| 736 | 778 | uint64_t iova; |
|---|
| 737 | 779 | |
|---|
| 738 | 780 | /* can't happen yet.. but when we add 2d support we'll have |
|---|
| .. | .. |
|---|
| 742 | 784 | |
|---|
| 743 | 785 | /* submit takes a reference to the bo and iova until retired: */ |
|---|
| 744 | 786 | drm_gem_object_get(&msm_obj->base); |
|---|
| 745 | | - msm_gem_get_iova(&msm_obj->base, |
|---|
| 746 | | - submit->gpu->aspace, &iova); |
|---|
| 787 | + msm_gem_get_and_pin_iova(&msm_obj->base, submit->aspace, &iova); |
|---|
| 747 | 788 | |
|---|
| 748 | 789 | if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) |
|---|
| 749 | | - msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence); |
|---|
| 790 | + dma_resv_add_excl_fence(drm_obj->resv, submit->fence); |
|---|
| 750 | 791 | else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) |
|---|
| 751 | | - msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->fence); |
|---|
| 792 | + dma_resv_add_shared_fence(drm_obj->resv, submit->fence); |
|---|
| 793 | + |
|---|
| 794 | + msm_gem_active_get(drm_obj, gpu); |
|---|
| 752 | 795 | } |
|---|
| 753 | 796 | |
|---|
| 754 | | - gpu->funcs->submit(gpu, submit, ctx); |
|---|
| 755 | | - priv->lastctx = ctx; |
|---|
| 797 | + gpu->funcs->submit(gpu, submit); |
|---|
| 798 | + priv->lastctx = submit->queue->ctx; |
|---|
| 756 | 799 | |
|---|
| 757 | 800 | hangcheck_timer_reset(gpu); |
|---|
| 758 | 801 | } |
|---|
| .. | .. |
|---|
| 769 | 812 | |
|---|
| 770 | 813 | static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu) |
|---|
| 771 | 814 | { |
|---|
| 772 | | - int ret = msm_clk_bulk_get(&pdev->dev, &gpu->grp_clks); |
|---|
| 815 | + int ret = devm_clk_bulk_get_all(&pdev->dev, &gpu->grp_clks); |
|---|
| 773 | 816 | |
|---|
| 774 | 817 | if (ret < 1) { |
|---|
| 775 | 818 | gpu->nr_clocks = 0; |
|---|
| .. | .. |
|---|
| 787 | 830 | return 0; |
|---|
| 788 | 831 | } |
|---|
| 789 | 832 | |
|---|
| 790 | | -static struct msm_gem_address_space * |
|---|
| 791 | | -msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev, |
|---|
| 792 | | - uint64_t va_start, uint64_t va_end) |
|---|
| 833 | +/* Return a new address space for a msm_drm_private instance */ |
|---|
| 834 | +struct msm_gem_address_space * |
|---|
| 835 | +msm_gpu_create_private_address_space(struct msm_gpu *gpu, struct task_struct *task) |
|---|
| 793 | 836 | { |
|---|
| 794 | | - struct iommu_domain *iommu; |
|---|
| 795 | | - struct msm_gem_address_space *aspace; |
|---|
| 796 | | - int ret; |
|---|
| 797 | | - |
|---|
| 798 | | - /* |
|---|
| 799 | | - * Setup IOMMU.. eventually we will (I think) do this once per context |
|---|
| 800 | | - * and have separate page tables per context. For now, to keep things |
|---|
| 801 | | - * simple and to get something working, just use a single address space: |
|---|
| 802 | | - */ |
|---|
| 803 | | - iommu = iommu_domain_alloc(&platform_bus_type); |
|---|
| 804 | | - if (!iommu) |
|---|
| 837 | + struct msm_gem_address_space *aspace = NULL; |
|---|
| 838 | + if (!gpu) |
|---|
| 805 | 839 | return NULL; |
|---|
| 806 | 840 | |
|---|
| 807 | | - iommu->geometry.aperture_start = va_start; |
|---|
| 808 | | - iommu->geometry.aperture_end = va_end; |
|---|
| 809 | | - |
|---|
| 810 | | - dev_info(gpu->dev->dev, "%s: using IOMMU\n", gpu->name); |
|---|
| 811 | | - |
|---|
| 812 | | - aspace = msm_gem_address_space_create(&pdev->dev, iommu, "gpu"); |
|---|
| 813 | | - if (IS_ERR(aspace)) { |
|---|
| 814 | | - dev_err(gpu->dev->dev, "failed to init iommu: %ld\n", |
|---|
| 815 | | - PTR_ERR(aspace)); |
|---|
| 816 | | - iommu_domain_free(iommu); |
|---|
| 817 | | - return ERR_CAST(aspace); |
|---|
| 841 | + /* |
|---|
| 842 | + * If the target doesn't support private address spaces then return |
|---|
| 843 | + * the global one |
|---|
| 844 | + */ |
|---|
| 845 | + if (gpu->funcs->create_private_address_space) { |
|---|
| 846 | + aspace = gpu->funcs->create_private_address_space(gpu); |
|---|
| 847 | + if (!IS_ERR(aspace)) |
|---|
| 848 | + aspace->pid = get_pid(task_pid(task)); |
|---|
| 818 | 849 | } |
|---|
| 819 | 850 | |
|---|
| 820 | | - ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0); |
|---|
| 821 | | - if (ret) { |
|---|
| 822 | | - msm_gem_address_space_put(aspace); |
|---|
| 823 | | - return ERR_PTR(ret); |
|---|
| 824 | | - } |
|---|
| 851 | + if (IS_ERR_OR_NULL(aspace)) |
|---|
| 852 | + aspace = msm_gem_address_space_get(gpu->aspace); |
|---|
| 825 | 853 | |
|---|
| 826 | 854 | return aspace; |
|---|
| 827 | 855 | } |
|---|
| .. | .. |
|---|
| 859 | 887 | } |
|---|
| 860 | 888 | |
|---|
| 861 | 889 | /* Get Interrupt: */ |
|---|
| 862 | | - gpu->irq = platform_get_irq_byname(pdev, config->irqname); |
|---|
| 890 | + gpu->irq = platform_get_irq(pdev, 0); |
|---|
| 863 | 891 | if (gpu->irq < 0) { |
|---|
| 864 | 892 | ret = gpu->irq; |
|---|
| 865 | | - dev_err(drm->dev, "failed to get irq: %d\n", ret); |
|---|
| 893 | + DRM_DEV_ERROR(drm->dev, "failed to get irq: %d\n", ret); |
|---|
| 866 | 894 | goto fail; |
|---|
| 867 | 895 | } |
|---|
| 868 | 896 | |
|---|
| 869 | 897 | ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler, |
|---|
| 870 | 898 | IRQF_TRIGGER_HIGH, gpu->name, gpu); |
|---|
| 871 | 899 | if (ret) { |
|---|
| 872 | | - dev_err(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret); |
|---|
| 900 | + DRM_DEV_ERROR(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret); |
|---|
| 873 | 901 | goto fail; |
|---|
| 874 | 902 | } |
|---|
| 875 | 903 | |
|---|
| .. | .. |
|---|
| 894 | 922 | gpu->gpu_cx = NULL; |
|---|
| 895 | 923 | |
|---|
| 896 | 924 | gpu->pdev = pdev; |
|---|
| 897 | | - platform_set_drvdata(pdev, gpu); |
|---|
| 925 | + platform_set_drvdata(pdev, &gpu->adreno_smmu); |
|---|
| 898 | 926 | |
|---|
| 899 | 927 | msm_devfreq_init(gpu); |
|---|
| 900 | 928 | |
|---|
| 901 | | - gpu->aspace = msm_gpu_create_address_space(gpu, pdev, |
|---|
| 902 | | - config->va_start, config->va_end); |
|---|
| 929 | + |
|---|
| 930 | + gpu->aspace = gpu->funcs->create_address_space(gpu, pdev); |
|---|
| 903 | 931 | |
|---|
| 904 | 932 | if (gpu->aspace == NULL) |
|---|
| 905 | | - dev_info(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name); |
|---|
| 933 | + DRM_DEV_INFO(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name); |
|---|
| 906 | 934 | else if (IS_ERR(gpu->aspace)) { |
|---|
| 907 | 935 | ret = PTR_ERR(gpu->aspace); |
|---|
| 908 | 936 | goto fail; |
|---|
| 909 | 937 | } |
|---|
| 910 | 938 | |
|---|
| 911 | | - memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo), |
|---|
| 912 | | - MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo, |
|---|
| 939 | + memptrs = msm_gem_kernel_new(drm, |
|---|
| 940 | + sizeof(struct msm_rbmemptrs) * nr_rings, |
|---|
| 941 | + check_apriv(gpu, MSM_BO_UNCACHED), gpu->aspace, &gpu->memptrs_bo, |
|---|
| 913 | 942 | &memptrs_iova); |
|---|
| 914 | 943 | |
|---|
| 915 | 944 | if (IS_ERR(memptrs)) { |
|---|
| 916 | 945 | ret = PTR_ERR(memptrs); |
|---|
| 917 | | - dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); |
|---|
| 946 | + DRM_DEV_ERROR(drm->dev, "could not allocate memptrs: %d\n", ret); |
|---|
| 918 | 947 | goto fail; |
|---|
| 919 | 948 | } |
|---|
| 949 | + |
|---|
| 950 | + msm_gem_object_set_name(gpu->memptrs_bo, "memptrs"); |
|---|
| 920 | 951 | |
|---|
| 921 | 952 | if (nr_rings > ARRAY_SIZE(gpu->rb)) { |
|---|
| 922 | 953 | DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n", |
|---|
| .. | .. |
|---|
| 930 | 961 | |
|---|
| 931 | 962 | if (IS_ERR(gpu->rb[i])) { |
|---|
| 932 | 963 | ret = PTR_ERR(gpu->rb[i]); |
|---|
| 933 | | - dev_err(drm->dev, |
|---|
| 964 | + DRM_DEV_ERROR(drm->dev, |
|---|
| 934 | 965 | "could not create ringbuffer %d: %d\n", i, ret); |
|---|
| 935 | 966 | goto fail; |
|---|
| 936 | 967 | } |
|---|
| .. | .. |
|---|
| 949 | 980 | gpu->rb[i] = NULL; |
|---|
| 950 | 981 | } |
|---|
| 951 | 982 | |
|---|
| 952 | | - if (gpu->memptrs_bo) { |
|---|
| 953 | | - msm_gem_put_vaddr(gpu->memptrs_bo); |
|---|
| 954 | | - msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); |
|---|
| 955 | | - drm_gem_object_put_unlocked(gpu->memptrs_bo); |
|---|
| 956 | | - } |
|---|
| 983 | + msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false); |
|---|
| 957 | 984 | |
|---|
| 958 | 985 | platform_set_drvdata(pdev, NULL); |
|---|
| 959 | 986 | return ret; |
|---|
| .. | .. |
|---|
| 972 | 999 | gpu->rb[i] = NULL; |
|---|
| 973 | 1000 | } |
|---|
| 974 | 1001 | |
|---|
| 975 | | - if (gpu->memptrs_bo) { |
|---|
| 976 | | - msm_gem_put_vaddr(gpu->memptrs_bo); |
|---|
| 977 | | - msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); |
|---|
| 978 | | - drm_gem_object_put_unlocked(gpu->memptrs_bo); |
|---|
| 979 | | - } |
|---|
| 1002 | + msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false); |
|---|
| 980 | 1003 | |
|---|
| 981 | 1004 | if (!IS_ERR_OR_NULL(gpu->aspace)) { |
|---|
| 982 | | - gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu, |
|---|
| 983 | | - NULL, 0); |
|---|
| 1005 | + gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu); |
|---|
| 984 | 1006 | msm_gem_address_space_put(gpu->aspace); |
|---|
| 985 | 1007 | } |
|---|
| 986 | 1008 | } |
|---|