.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Copyright (C) 2013 Red Hat |
---|
3 | 4 | * Author: Rob Clark <robdclark@gmail.com> |
---|
4 | | - * |
---|
5 | | - * This program is free software; you can redistribute it and/or modify it |
---|
6 | | - * under the terms of the GNU General Public License version 2 as published by |
---|
7 | | - * the Free Software Foundation. |
---|
8 | | - * |
---|
9 | | - * This program is distributed in the hope that it will be useful, but WITHOUT |
---|
10 | | - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
---|
11 | | - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
---|
12 | | - * more details. |
---|
13 | | - * |
---|
14 | | - * You should have received a copy of the GNU General Public License along with |
---|
15 | | - * this program. If not, see <http://www.gnu.org/licenses/>. |
---|
16 | 5 | */ |
---|
17 | 6 | |
---|
18 | 7 | #include "msm_gpu.h" |
---|
19 | 8 | #include "msm_gem.h" |
---|
20 | 9 | #include "msm_mmu.h" |
---|
21 | 10 | #include "msm_fence.h" |
---|
| 11 | +#include "msm_gpu_trace.h" |
---|
| 12 | +#include "adreno/adreno_gpu.h" |
---|
22 | 13 | |
---|
23 | 14 | #include <generated/utsrelease.h> |
---|
24 | 15 | #include <linux/string_helpers.h> |
---|
25 | | -#include <linux/pm_opp.h> |
---|
26 | 16 | #include <linux/devfreq.h> |
---|
27 | 17 | #include <linux/devcoredump.h> |
---|
28 | 18 | #include <linux/sched/task.h> |
---|
.. | .. |
---|
34 | 24 | static int msm_devfreq_target(struct device *dev, unsigned long *freq, |
---|
35 | 25 | u32 flags) |
---|
36 | 26 | { |
---|
37 | | - struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev)); |
---|
| 27 | + struct msm_gpu *gpu = dev_to_gpu(dev); |
---|
38 | 28 | struct dev_pm_opp *opp; |
---|
39 | 29 | |
---|
40 | 30 | opp = devfreq_recommended_opp(dev, freq, flags); |
---|
.. | .. |
---|
42 | 32 | if (IS_ERR(opp)) |
---|
43 | 33 | return PTR_ERR(opp); |
---|
44 | 34 | |
---|
45 | | - clk_set_rate(gpu->core_clk, *freq); |
---|
| 35 | + trace_msm_gpu_freq_change(dev_pm_opp_get_freq(opp)); |
---|
| 36 | + |
---|
| 37 | + if (gpu->funcs->gpu_set_freq) |
---|
| 38 | + gpu->funcs->gpu_set_freq(gpu, opp); |
---|
| 39 | + else |
---|
| 40 | + clk_set_rate(gpu->core_clk, *freq); |
---|
| 41 | + |
---|
46 | 42 | dev_pm_opp_put(opp); |
---|
47 | 43 | |
---|
48 | 44 | return 0; |
---|
.. | .. |
---|
51 | 47 | static int msm_devfreq_get_dev_status(struct device *dev, |
---|
52 | 48 | struct devfreq_dev_status *status) |
---|
53 | 49 | { |
---|
54 | | - struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev)); |
---|
55 | | - u64 cycles; |
---|
56 | | - u32 freq = ((u32) status->current_frequency) / 1000000; |
---|
| 50 | + struct msm_gpu *gpu = dev_to_gpu(dev); |
---|
57 | 51 | ktime_t time; |
---|
58 | 52 | |
---|
59 | | - status->current_frequency = (unsigned long) clk_get_rate(gpu->core_clk); |
---|
60 | | - gpu->funcs->gpu_busy(gpu, &cycles); |
---|
| 53 | + if (gpu->funcs->gpu_get_freq) |
---|
| 54 | + status->current_frequency = gpu->funcs->gpu_get_freq(gpu); |
---|
| 55 | + else |
---|
| 56 | + status->current_frequency = clk_get_rate(gpu->core_clk); |
---|
61 | 57 | |
---|
62 | | - status->busy_time = ((u32) (cycles - gpu->devfreq.busy_cycles)) / freq; |
---|
63 | | - |
---|
64 | | - gpu->devfreq.busy_cycles = cycles; |
---|
| 58 | + status->busy_time = gpu->funcs->gpu_busy(gpu); |
---|
65 | 59 | |
---|
66 | 60 | time = ktime_get(); |
---|
67 | 61 | status->total_time = ktime_us_delta(time, gpu->devfreq.time); |
---|
.. | .. |
---|
72 | 66 | |
---|
73 | 67 | static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) |
---|
74 | 68 | { |
---|
75 | | - struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev)); |
---|
| 69 | + struct msm_gpu *gpu = dev_to_gpu(dev); |
---|
76 | 70 | |
---|
77 | | - *freq = (unsigned long) clk_get_rate(gpu->core_clk); |
---|
| 71 | + if (gpu->funcs->gpu_get_freq) |
---|
| 72 | + *freq = gpu->funcs->gpu_get_freq(gpu); |
---|
| 73 | + else |
---|
| 74 | + *freq = clk_get_rate(gpu->core_clk); |
---|
78 | 75 | |
---|
79 | 76 | return 0; |
---|
80 | 77 | } |
---|
.. | .. |
---|
89 | 86 | static void msm_devfreq_init(struct msm_gpu *gpu) |
---|
90 | 87 | { |
---|
91 | 88 | /* We need target support to do devfreq */ |
---|
92 | | - if (!gpu->funcs->gpu_busy || !gpu->core_clk) |
---|
| 89 | + if (!gpu->funcs->gpu_busy) |
---|
93 | 90 | return; |
---|
94 | 91 | |
---|
95 | 92 | msm_devfreq_profile.initial_freq = gpu->fast_rate; |
---|
.. | .. |
---|
97 | 94 | /* |
---|
98 | 95 | * Don't set the freq_table or max_state and let devfreq build the table |
---|
99 | 96 | * from OPP |
---|
| 97 | + * After a deferred probe, these may have be left to non-zero values, |
---|
| 98 | + * so set them back to zero before creating the devfreq device |
---|
100 | 99 | */ |
---|
| 100 | + msm_devfreq_profile.freq_table = NULL; |
---|
| 101 | + msm_devfreq_profile.max_state = 0; |
---|
101 | 102 | |
---|
102 | 103 | gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev, |
---|
103 | | - &msm_devfreq_profile, "simple_ondemand", NULL); |
---|
| 104 | + &msm_devfreq_profile, DEVFREQ_GOV_SIMPLE_ONDEMAND, |
---|
| 105 | + NULL); |
---|
104 | 106 | |
---|
105 | 107 | if (IS_ERR(gpu->devfreq.devfreq)) { |
---|
106 | | - dev_err(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n"); |
---|
| 108 | + DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n"); |
---|
107 | 109 | gpu->devfreq.devfreq = NULL; |
---|
108 | 110 | } |
---|
| 111 | + |
---|
| 112 | + devfreq_suspend_device(gpu->devfreq.devfreq); |
---|
109 | 113 | } |
---|
110 | 114 | |
---|
111 | 115 | static int enable_pwrrail(struct msm_gpu *gpu) |
---|
.. | .. |
---|
116 | 120 | if (gpu->gpu_reg) { |
---|
117 | 121 | ret = regulator_enable(gpu->gpu_reg); |
---|
118 | 122 | if (ret) { |
---|
119 | | - dev_err(dev->dev, "failed to enable 'gpu_reg': %d\n", ret); |
---|
| 123 | + DRM_DEV_ERROR(dev->dev, "failed to enable 'gpu_reg': %d\n", ret); |
---|
120 | 124 | return ret; |
---|
121 | 125 | } |
---|
122 | 126 | } |
---|
.. | .. |
---|
124 | 128 | if (gpu->gpu_cx) { |
---|
125 | 129 | ret = regulator_enable(gpu->gpu_cx); |
---|
126 | 130 | if (ret) { |
---|
127 | | - dev_err(dev->dev, "failed to enable 'gpu_cx': %d\n", ret); |
---|
| 131 | + DRM_DEV_ERROR(dev->dev, "failed to enable 'gpu_cx': %d\n", ret); |
---|
128 | 132 | return ret; |
---|
129 | 133 | } |
---|
130 | 134 | } |
---|
.. | .. |
---|
185 | 189 | return 0; |
---|
186 | 190 | } |
---|
187 | 191 | |
---|
| 192 | +void msm_gpu_resume_devfreq(struct msm_gpu *gpu) |
---|
| 193 | +{ |
---|
| 194 | + gpu->devfreq.busy_cycles = 0; |
---|
| 195 | + gpu->devfreq.time = ktime_get(); |
---|
| 196 | + |
---|
| 197 | + devfreq_resume_device(gpu->devfreq.devfreq); |
---|
| 198 | +} |
---|
| 199 | + |
---|
188 | 200 | int msm_gpu_pm_resume(struct msm_gpu *gpu) |
---|
189 | 201 | { |
---|
190 | 202 | int ret; |
---|
191 | 203 | |
---|
192 | 204 | DBG("%s", gpu->name); |
---|
| 205 | + trace_msm_gpu_resume(0); |
---|
193 | 206 | |
---|
194 | 207 | ret = enable_pwrrail(gpu); |
---|
195 | 208 | if (ret) |
---|
.. | .. |
---|
203 | 216 | if (ret) |
---|
204 | 217 | return ret; |
---|
205 | 218 | |
---|
206 | | - if (gpu->devfreq.devfreq) { |
---|
207 | | - gpu->devfreq.busy_cycles = 0; |
---|
208 | | - gpu->devfreq.time = ktime_get(); |
---|
209 | | - |
---|
210 | | - devfreq_resume_device(gpu->devfreq.devfreq); |
---|
211 | | - } |
---|
| 219 | + msm_gpu_resume_devfreq(gpu); |
---|
212 | 220 | |
---|
213 | 221 | gpu->needs_hw_init = true; |
---|
214 | 222 | |
---|
.. | .. |
---|
220 | 228 | int ret; |
---|
221 | 229 | |
---|
222 | 230 | DBG("%s", gpu->name); |
---|
| 231 | + trace_msm_gpu_suspend(0); |
---|
223 | 232 | |
---|
224 | | - if (gpu->devfreq.devfreq) |
---|
225 | | - devfreq_suspend_device(gpu->devfreq.devfreq); |
---|
| 233 | + devfreq_suspend_device(gpu->devfreq.devfreq); |
---|
226 | 234 | |
---|
227 | 235 | ret = disable_axi(gpu); |
---|
228 | 236 | if (ret) |
---|
.. | .. |
---|
307 | 315 | struct msm_gpu_state_bo *state_bo = &state->bos[state->nr_bos]; |
---|
308 | 316 | |
---|
309 | 317 | /* Don't record write only objects */ |
---|
310 | | - |
---|
311 | 318 | state_bo->size = obj->base.size; |
---|
312 | 319 | state_bo->iova = iova; |
---|
313 | 320 | |
---|
314 | | - /* Only store the data for buffer objects marked for read */ |
---|
315 | | - if ((flags & MSM_SUBMIT_BO_READ)) { |
---|
| 321 | + /* Only store data for non imported buffer objects marked for read */ |
---|
| 322 | + if ((flags & MSM_SUBMIT_BO_READ) && !obj->base.import_attach) { |
---|
316 | 323 | void *ptr; |
---|
317 | 324 | |
---|
318 | 325 | state_bo->data = kvmalloc(obj->base.size, GFP_KERNEL); |
---|
319 | 326 | if (!state_bo->data) |
---|
320 | | - return; |
---|
| 327 | + goto out; |
---|
321 | 328 | |
---|
322 | 329 | ptr = msm_gem_get_vaddr_active(&obj->base); |
---|
323 | 330 | if (IS_ERR(ptr)) { |
---|
324 | 331 | kvfree(state_bo->data); |
---|
325 | | - return; |
---|
| 332 | + state_bo->data = NULL; |
---|
| 333 | + goto out; |
---|
326 | 334 | } |
---|
327 | 335 | |
---|
328 | 336 | memcpy(state_bo->data, ptr, obj->base.size); |
---|
329 | 337 | msm_gem_put_vaddr(&obj->base); |
---|
330 | 338 | } |
---|
331 | | - |
---|
| 339 | +out: |
---|
332 | 340 | state->nr_bos++; |
---|
333 | 341 | } |
---|
334 | 342 | |
---|
.. | .. |
---|
336 | 344 | struct msm_gem_submit *submit, char *comm, char *cmd) |
---|
337 | 345 | { |
---|
338 | 346 | struct msm_gpu_state *state; |
---|
| 347 | + |
---|
| 348 | + /* Check if the target supports capturing crash state */ |
---|
| 349 | + if (!gpu->funcs->gpu_state_get) |
---|
| 350 | + return; |
---|
339 | 351 | |
---|
340 | 352 | /* Only save one crash state at a time */ |
---|
341 | 353 | if (gpu->crashstate) |
---|
.. | .. |
---|
350 | 362 | state->cmd = kstrdup(cmd, GFP_KERNEL); |
---|
351 | 363 | |
---|
352 | 364 | if (submit) { |
---|
353 | | - int i; |
---|
| 365 | + int i, nr = 0; |
---|
354 | 366 | |
---|
355 | | - state->bos = kcalloc(submit->nr_bos, |
---|
| 367 | + /* count # of buffers to dump: */ |
---|
| 368 | + for (i = 0; i < submit->nr_bos; i++) |
---|
| 369 | + if (should_dump(submit, i)) |
---|
| 370 | + nr++; |
---|
| 371 | + /* always dump cmd bo's, but don't double count them: */ |
---|
| 372 | + for (i = 0; i < submit->nr_cmds; i++) |
---|
| 373 | + if (!should_dump(submit, submit->cmd[i].idx)) |
---|
| 374 | + nr++; |
---|
| 375 | + |
---|
| 376 | + state->bos = kcalloc(nr, |
---|
356 | 377 | sizeof(struct msm_gpu_state_bo), GFP_KERNEL); |
---|
357 | 378 | |
---|
358 | | - for (i = 0; state->bos && i < submit->nr_bos; i++) |
---|
359 | | - msm_gpu_crashstate_get_bo(state, submit->bos[i].obj, |
---|
360 | | - submit->bos[i].iova, submit->bos[i].flags); |
---|
| 379 | + for (i = 0; state->bos && i < submit->nr_bos; i++) { |
---|
| 380 | + if (should_dump(submit, i)) { |
---|
| 381 | + msm_gpu_crashstate_get_bo(state, submit->bos[i].obj, |
---|
| 382 | + submit->bos[i].iova, submit->bos[i].flags); |
---|
| 383 | + } |
---|
| 384 | + } |
---|
| 385 | + |
---|
| 386 | + for (i = 0; state->bos && i < submit->nr_cmds; i++) { |
---|
| 387 | + int idx = submit->cmd[i].idx; |
---|
| 388 | + |
---|
| 389 | + if (!should_dump(submit, submit->cmd[i].idx)) { |
---|
| 390 | + msm_gpu_crashstate_get_bo(state, submit->bos[idx].obj, |
---|
| 391 | + submit->bos[idx].iova, submit->bos[idx].flags); |
---|
| 392 | + } |
---|
| 393 | + } |
---|
361 | 394 | } |
---|
362 | 395 | |
---|
363 | 396 | /* Set the active crash state to be dumped on failure */ |
---|
.. | .. |
---|
420 | 453 | |
---|
421 | 454 | mutex_lock(&dev->struct_mutex); |
---|
422 | 455 | |
---|
423 | | - dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); |
---|
| 456 | + DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n", gpu->name); |
---|
424 | 457 | |
---|
425 | 458 | submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); |
---|
426 | 459 | if (submit) { |
---|
427 | 460 | struct task_struct *task; |
---|
428 | 461 | |
---|
| 462 | + /* Increment the fault counts */ |
---|
| 463 | + gpu->global_faults++; |
---|
| 464 | + submit->queue->faults++; |
---|
| 465 | + |
---|
429 | 466 | task = get_pid_task(submit->pid, PIDTYPE_PID); |
---|
430 | 467 | if (task) { |
---|
431 | 468 | comm = kstrdup(task->comm, GFP_KERNEL); |
---|
432 | | - |
---|
433 | | - /* |
---|
434 | | - * So slightly annoying, in other paths like |
---|
435 | | - * mmap'ing gem buffers, mmap_sem is acquired |
---|
436 | | - * before struct_mutex, which means we can't |
---|
437 | | - * hold struct_mutex across the call to |
---|
438 | | - * get_cmdline(). But submits are retired |
---|
439 | | - * from the same in-order workqueue, so we can |
---|
440 | | - * safely drop the lock here without worrying |
---|
441 | | - * about the submit going away. |
---|
442 | | - */ |
---|
443 | | - mutex_unlock(&dev->struct_mutex); |
---|
444 | 469 | cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL); |
---|
445 | 470 | put_task_struct(task); |
---|
446 | | - mutex_lock(&dev->struct_mutex); |
---|
447 | 471 | } |
---|
448 | 472 | |
---|
449 | 473 | if (comm && cmd) { |
---|
450 | | - dev_err(dev->dev, "%s: offending task: %s (%s)\n", |
---|
| 474 | + DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n", |
---|
451 | 475 | gpu->name, comm, cmd); |
---|
452 | 476 | |
---|
453 | 477 | msm_rd_dump_submit(priv->hangrd, submit, |
---|
.. | .. |
---|
500 | 524 | struct msm_ringbuffer *ring = gpu->rb[i]; |
---|
501 | 525 | |
---|
502 | 526 | list_for_each_entry(submit, &ring->submits, node) |
---|
503 | | - gpu->funcs->submit(gpu, submit, NULL); |
---|
| 527 | + gpu->funcs->submit(gpu, submit); |
---|
504 | 528 | } |
---|
505 | 529 | } |
---|
506 | 530 | |
---|
.. | .. |
---|
530 | 554 | } else if (fence < ring->seqno) { |
---|
531 | 555 | /* no progress and not done.. hung! */ |
---|
532 | 556 | ring->hangcheck_fence = fence; |
---|
533 | | - dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", |
---|
| 557 | + DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", |
---|
534 | 558 | gpu->name, ring->id); |
---|
535 | | - dev_err(dev->dev, "%s: completed fence: %u\n", |
---|
| 559 | + DRM_DEV_ERROR(dev->dev, "%s: completed fence: %u\n", |
---|
536 | 560 | gpu->name, fence); |
---|
537 | | - dev_err(dev->dev, "%s: submitted fence: %u\n", |
---|
| 561 | + DRM_DEV_ERROR(dev->dev, "%s: submitted fence: %u\n", |
---|
538 | 562 | gpu->name, ring->seqno); |
---|
539 | 563 | |
---|
540 | 564 | queue_work(priv->wq, &gpu->recover_work); |
---|
.. | .. |
---|
650 | 674 | * Cmdstream submission/retirement: |
---|
651 | 675 | */ |
---|
652 | 676 | |
---|
653 | | -static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) |
---|
| 677 | +static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, |
---|
| 678 | + struct msm_gem_submit *submit) |
---|
654 | 679 | { |
---|
| 680 | + int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; |
---|
| 681 | + volatile struct msm_gpu_submit_stats *stats; |
---|
| 682 | + u64 elapsed, clock = 0; |
---|
655 | 683 | int i; |
---|
| 684 | + |
---|
| 685 | + stats = &ring->memptrs->stats[index]; |
---|
| 686 | + /* Convert 19.2Mhz alwayson ticks to nanoseconds for elapsed time */ |
---|
| 687 | + elapsed = (stats->alwayson_end - stats->alwayson_start) * 10000; |
---|
| 688 | + do_div(elapsed, 192); |
---|
| 689 | + |
---|
| 690 | + /* Calculate the clock frequency from the number of CP cycles */ |
---|
| 691 | + if (elapsed) { |
---|
| 692 | + clock = (stats->cpcycles_end - stats->cpcycles_start) * 1000; |
---|
| 693 | + do_div(clock, elapsed); |
---|
| 694 | + } |
---|
| 695 | + |
---|
| 696 | + trace_msm_gpu_submit_retired(submit, elapsed, clock, |
---|
| 697 | + stats->alwayson_start, stats->alwayson_end); |
---|
656 | 698 | |
---|
657 | 699 | for (i = 0; i < submit->nr_bos; i++) { |
---|
658 | 700 | struct msm_gem_object *msm_obj = submit->bos[i].obj; |
---|
659 | | - /* move to inactive: */ |
---|
660 | | - msm_gem_move_to_inactive(&msm_obj->base); |
---|
661 | | - msm_gem_put_iova(&msm_obj->base, gpu->aspace); |
---|
662 | | - drm_gem_object_put(&msm_obj->base); |
---|
| 701 | + |
---|
| 702 | + msm_gem_active_put(&msm_obj->base); |
---|
| 703 | + msm_gem_unpin_iova(&msm_obj->base, submit->aspace); |
---|
| 704 | + drm_gem_object_put_locked(&msm_obj->base); |
---|
663 | 705 | } |
---|
664 | 706 | |
---|
665 | 707 | pm_runtime_mark_last_busy(&gpu->pdev->dev); |
---|
.. | .. |
---|
681 | 723 | |
---|
682 | 724 | list_for_each_entry_safe(submit, tmp, &ring->submits, node) { |
---|
683 | 725 | if (dma_fence_is_signaled(submit->fence)) |
---|
684 | | - retire_submit(gpu, submit); |
---|
| 726 | + retire_submit(gpu, ring, submit); |
---|
685 | 727 | } |
---|
686 | 728 | } |
---|
687 | 729 | } |
---|
.. | .. |
---|
709 | 751 | } |
---|
710 | 752 | |
---|
711 | 753 | /* add bo's to gpu's ring, and kick gpu: */ |
---|
712 | | -void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, |
---|
713 | | - struct msm_file_private *ctx) |
---|
| 754 | +void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) |
---|
714 | 755 | { |
---|
715 | 756 | struct drm_device *dev = gpu->dev; |
---|
716 | 757 | struct msm_drm_private *priv = dev->dev_private; |
---|
.. | .. |
---|
733 | 774 | |
---|
734 | 775 | for (i = 0; i < submit->nr_bos; i++) { |
---|
735 | 776 | struct msm_gem_object *msm_obj = submit->bos[i].obj; |
---|
| 777 | + struct drm_gem_object *drm_obj = &msm_obj->base; |
---|
736 | 778 | uint64_t iova; |
---|
737 | 779 | |
---|
738 | 780 | /* can't happen yet.. but when we add 2d support we'll have |
---|
.. | .. |
---|
742 | 784 | |
---|
743 | 785 | /* submit takes a reference to the bo and iova until retired: */ |
---|
744 | 786 | drm_gem_object_get(&msm_obj->base); |
---|
745 | | - msm_gem_get_iova(&msm_obj->base, |
---|
746 | | - submit->gpu->aspace, &iova); |
---|
| 787 | + msm_gem_get_and_pin_iova(&msm_obj->base, submit->aspace, &iova); |
---|
747 | 788 | |
---|
748 | 789 | if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) |
---|
749 | | - msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence); |
---|
| 790 | + dma_resv_add_excl_fence(drm_obj->resv, submit->fence); |
---|
750 | 791 | else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) |
---|
751 | | - msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->fence); |
---|
| 792 | + dma_resv_add_shared_fence(drm_obj->resv, submit->fence); |
---|
| 793 | + |
---|
| 794 | + msm_gem_active_get(drm_obj, gpu); |
---|
752 | 795 | } |
---|
753 | 796 | |
---|
754 | | - gpu->funcs->submit(gpu, submit, ctx); |
---|
755 | | - priv->lastctx = ctx; |
---|
| 797 | + gpu->funcs->submit(gpu, submit); |
---|
| 798 | + priv->lastctx = submit->queue->ctx; |
---|
756 | 799 | |
---|
757 | 800 | hangcheck_timer_reset(gpu); |
---|
758 | 801 | } |
---|
.. | .. |
---|
769 | 812 | |
---|
770 | 813 | static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu) |
---|
771 | 814 | { |
---|
772 | | - int ret = msm_clk_bulk_get(&pdev->dev, &gpu->grp_clks); |
---|
| 815 | + int ret = devm_clk_bulk_get_all(&pdev->dev, &gpu->grp_clks); |
---|
773 | 816 | |
---|
774 | 817 | if (ret < 1) { |
---|
775 | 818 | gpu->nr_clocks = 0; |
---|
.. | .. |
---|
787 | 830 | return 0; |
---|
788 | 831 | } |
---|
789 | 832 | |
---|
790 | | -static struct msm_gem_address_space * |
---|
791 | | -msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev, |
---|
792 | | - uint64_t va_start, uint64_t va_end) |
---|
| 833 | +/* Return a new address space for a msm_drm_private instance */ |
---|
| 834 | +struct msm_gem_address_space * |
---|
| 835 | +msm_gpu_create_private_address_space(struct msm_gpu *gpu, struct task_struct *task) |
---|
793 | 836 | { |
---|
794 | | - struct iommu_domain *iommu; |
---|
795 | | - struct msm_gem_address_space *aspace; |
---|
796 | | - int ret; |
---|
797 | | - |
---|
798 | | - /* |
---|
799 | | - * Setup IOMMU.. eventually we will (I think) do this once per context |
---|
800 | | - * and have separate page tables per context. For now, to keep things |
---|
801 | | - * simple and to get something working, just use a single address space: |
---|
802 | | - */ |
---|
803 | | - iommu = iommu_domain_alloc(&platform_bus_type); |
---|
804 | | - if (!iommu) |
---|
| 837 | + struct msm_gem_address_space *aspace = NULL; |
---|
| 838 | + if (!gpu) |
---|
805 | 839 | return NULL; |
---|
806 | 840 | |
---|
807 | | - iommu->geometry.aperture_start = va_start; |
---|
808 | | - iommu->geometry.aperture_end = va_end; |
---|
809 | | - |
---|
810 | | - dev_info(gpu->dev->dev, "%s: using IOMMU\n", gpu->name); |
---|
811 | | - |
---|
812 | | - aspace = msm_gem_address_space_create(&pdev->dev, iommu, "gpu"); |
---|
813 | | - if (IS_ERR(aspace)) { |
---|
814 | | - dev_err(gpu->dev->dev, "failed to init iommu: %ld\n", |
---|
815 | | - PTR_ERR(aspace)); |
---|
816 | | - iommu_domain_free(iommu); |
---|
817 | | - return ERR_CAST(aspace); |
---|
| 841 | + /* |
---|
| 842 | + * If the target doesn't support private address spaces then return |
---|
| 843 | + * the global one |
---|
| 844 | + */ |
---|
| 845 | + if (gpu->funcs->create_private_address_space) { |
---|
| 846 | + aspace = gpu->funcs->create_private_address_space(gpu); |
---|
| 847 | + if (!IS_ERR(aspace)) |
---|
| 848 | + aspace->pid = get_pid(task_pid(task)); |
---|
818 | 849 | } |
---|
819 | 850 | |
---|
820 | | - ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0); |
---|
821 | | - if (ret) { |
---|
822 | | - msm_gem_address_space_put(aspace); |
---|
823 | | - return ERR_PTR(ret); |
---|
824 | | - } |
---|
| 851 | + if (IS_ERR_OR_NULL(aspace)) |
---|
| 852 | + aspace = msm_gem_address_space_get(gpu->aspace); |
---|
825 | 853 | |
---|
826 | 854 | return aspace; |
---|
827 | 855 | } |
---|
.. | .. |
---|
859 | 887 | } |
---|
860 | 888 | |
---|
861 | 889 | /* Get Interrupt: */ |
---|
862 | | - gpu->irq = platform_get_irq_byname(pdev, config->irqname); |
---|
| 890 | + gpu->irq = platform_get_irq(pdev, 0); |
---|
863 | 891 | if (gpu->irq < 0) { |
---|
864 | 892 | ret = gpu->irq; |
---|
865 | | - dev_err(drm->dev, "failed to get irq: %d\n", ret); |
---|
| 893 | + DRM_DEV_ERROR(drm->dev, "failed to get irq: %d\n", ret); |
---|
866 | 894 | goto fail; |
---|
867 | 895 | } |
---|
868 | 896 | |
---|
869 | 897 | ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler, |
---|
870 | 898 | IRQF_TRIGGER_HIGH, gpu->name, gpu); |
---|
871 | 899 | if (ret) { |
---|
872 | | - dev_err(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret); |
---|
| 900 | + DRM_DEV_ERROR(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret); |
---|
873 | 901 | goto fail; |
---|
874 | 902 | } |
---|
875 | 903 | |
---|
.. | .. |
---|
894 | 922 | gpu->gpu_cx = NULL; |
---|
895 | 923 | |
---|
896 | 924 | gpu->pdev = pdev; |
---|
897 | | - platform_set_drvdata(pdev, gpu); |
---|
| 925 | + platform_set_drvdata(pdev, &gpu->adreno_smmu); |
---|
898 | 926 | |
---|
899 | 927 | msm_devfreq_init(gpu); |
---|
900 | 928 | |
---|
901 | | - gpu->aspace = msm_gpu_create_address_space(gpu, pdev, |
---|
902 | | - config->va_start, config->va_end); |
---|
| 929 | + |
---|
| 930 | + gpu->aspace = gpu->funcs->create_address_space(gpu, pdev); |
---|
903 | 931 | |
---|
904 | 932 | if (gpu->aspace == NULL) |
---|
905 | | - dev_info(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name); |
---|
| 933 | + DRM_DEV_INFO(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name); |
---|
906 | 934 | else if (IS_ERR(gpu->aspace)) { |
---|
907 | 935 | ret = PTR_ERR(gpu->aspace); |
---|
908 | 936 | goto fail; |
---|
909 | 937 | } |
---|
910 | 938 | |
---|
911 | | - memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo), |
---|
912 | | - MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo, |
---|
| 939 | + memptrs = msm_gem_kernel_new(drm, |
---|
| 940 | + sizeof(struct msm_rbmemptrs) * nr_rings, |
---|
| 941 | + check_apriv(gpu, MSM_BO_UNCACHED), gpu->aspace, &gpu->memptrs_bo, |
---|
913 | 942 | &memptrs_iova); |
---|
914 | 943 | |
---|
915 | 944 | if (IS_ERR(memptrs)) { |
---|
916 | 945 | ret = PTR_ERR(memptrs); |
---|
917 | | - dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); |
---|
| 946 | + DRM_DEV_ERROR(drm->dev, "could not allocate memptrs: %d\n", ret); |
---|
918 | 947 | goto fail; |
---|
919 | 948 | } |
---|
| 949 | + |
---|
| 950 | + msm_gem_object_set_name(gpu->memptrs_bo, "memptrs"); |
---|
920 | 951 | |
---|
921 | 952 | if (nr_rings > ARRAY_SIZE(gpu->rb)) { |
---|
922 | 953 | DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n", |
---|
.. | .. |
---|
930 | 961 | |
---|
931 | 962 | if (IS_ERR(gpu->rb[i])) { |
---|
932 | 963 | ret = PTR_ERR(gpu->rb[i]); |
---|
933 | | - dev_err(drm->dev, |
---|
| 964 | + DRM_DEV_ERROR(drm->dev, |
---|
934 | 965 | "could not create ringbuffer %d: %d\n", i, ret); |
---|
935 | 966 | goto fail; |
---|
936 | 967 | } |
---|
.. | .. |
---|
949 | 980 | gpu->rb[i] = NULL; |
---|
950 | 981 | } |
---|
951 | 982 | |
---|
952 | | - if (gpu->memptrs_bo) { |
---|
953 | | - msm_gem_put_vaddr(gpu->memptrs_bo); |
---|
954 | | - msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); |
---|
955 | | - drm_gem_object_put_unlocked(gpu->memptrs_bo); |
---|
956 | | - } |
---|
| 983 | + msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false); |
---|
957 | 984 | |
---|
958 | 985 | platform_set_drvdata(pdev, NULL); |
---|
959 | 986 | return ret; |
---|
.. | .. |
---|
972 | 999 | gpu->rb[i] = NULL; |
---|
973 | 1000 | } |
---|
974 | 1001 | |
---|
975 | | - if (gpu->memptrs_bo) { |
---|
976 | | - msm_gem_put_vaddr(gpu->memptrs_bo); |
---|
977 | | - msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); |
---|
978 | | - drm_gem_object_put_unlocked(gpu->memptrs_bo); |
---|
979 | | - } |
---|
| 1002 | + msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false); |
---|
980 | 1003 | |
---|
981 | 1004 | if (!IS_ERR_OR_NULL(gpu->aspace)) { |
---|
982 | | - gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu, |
---|
983 | | - NULL, 0); |
---|
| 1005 | + gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu); |
---|
984 | 1006 | msm_gem_address_space_put(gpu->aspace); |
---|
985 | 1007 | } |
---|
986 | 1008 | } |
---|