.. | .. |
---|
1 | 1 | // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note |
---|
2 | 2 | /* |
---|
3 | 3 | * |
---|
4 | | - * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. |
---|
| 4 | + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. |
---|
5 | 5 | * |
---|
6 | 6 | * This program is free software and is provided to you under the terms of the |
---|
7 | 7 | * GNU General Public License version 2 as published by the Free Software |
---|
.. | .. |
---|
29 | 29 | #include <mali_kbase_jm.h> |
---|
30 | 30 | #include <mali_kbase_js.h> |
---|
31 | 31 | #include <tl/mali_kbase_tracepoints.h> |
---|
32 | | -#include <mali_kbase_hwcnt_context.h> |
---|
| 32 | +#include <hwcnt/mali_kbase_hwcnt_context.h> |
---|
33 | 33 | #include <mali_kbase_reset_gpu.h> |
---|
34 | 34 | #include <mali_kbase_kinstr_jm.h> |
---|
35 | 35 | #include <backend/gpu/mali_kbase_cache_policy_backend.h> |
---|
.. | .. |
---|
37 | 37 | #include <backend/gpu/mali_kbase_jm_internal.h> |
---|
38 | 38 | #include <backend/gpu/mali_kbase_pm_internal.h> |
---|
39 | 39 | |
---|
40 | | -/* Return whether the specified ringbuffer is empty. HW access lock must be |
---|
41 | | - * held |
---|
| 40 | +/** |
---|
| 41 | + * SLOT_RB_EMPTY - Return whether the specified ringbuffer is empty. |
---|
| 42 | + * |
---|
| 43 | + * @rb: ring buffer |
---|
| 44 | + * |
---|
| 45 | + * Note: HW access lock must be held |
---|
42 | 46 | */ |
---|
43 | 47 | #define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx) |
---|
44 | | -/* Return number of atoms currently in the specified ringbuffer. HW access lock |
---|
45 | | - * must be held |
---|
| 48 | + |
---|
| 49 | +/** |
---|
| 50 | + * SLOT_RB_ENTRIES - Return number of atoms currently in the specified ringbuffer. |
---|
| 51 | + * |
---|
| 52 | + * @rb: ring buffer |
---|
| 53 | + * |
---|
| 54 | + * Note: HW access lock must be held |
---|
46 | 55 | */ |
---|
47 | | -#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx) |
---|
| 56 | +#define SLOT_RB_ENTRIES(rb) ((int)(s8)(rb->write_idx - rb->read_idx)) |
---|
48 | 57 | |
---|
49 | 58 | static void kbase_gpu_release_atom(struct kbase_device *kbdev, |
---|
50 | 59 | struct kbase_jd_atom *katom, |
---|
.. | .. |
---|
84 | 93 | * |
---|
85 | 94 | * Return: Atom removed from ringbuffer |
---|
86 | 95 | */ |
---|
87 | | -static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, |
---|
88 | | - int js, |
---|
89 | | - ktime_t *end_timestamp) |
---|
| 96 | +static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, unsigned int js, |
---|
| 97 | + ktime_t *end_timestamp) |
---|
90 | 98 | { |
---|
91 | 99 | struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; |
---|
92 | 100 | struct kbase_jd_atom *katom; |
---|
.. | .. |
---|
109 | 117 | return katom; |
---|
110 | 118 | } |
---|
111 | 119 | |
---|
112 | | -struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, |
---|
113 | | - int idx) |
---|
| 120 | +struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx) |
---|
114 | 121 | { |
---|
115 | 122 | struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; |
---|
116 | 123 | |
---|
.. | .. |
---|
122 | 129 | return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom; |
---|
123 | 130 | } |
---|
124 | 131 | |
---|
125 | | -struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, |
---|
126 | | - int js) |
---|
| 132 | +struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js) |
---|
127 | 133 | { |
---|
128 | 134 | struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; |
---|
129 | 135 | |
---|
.. | .. |
---|
135 | 141 | |
---|
136 | 142 | bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) |
---|
137 | 143 | { |
---|
138 | | - int js; |
---|
139 | | - int i; |
---|
| 144 | + unsigned int js; |
---|
140 | 145 | |
---|
141 | 146 | lockdep_assert_held(&kbdev->hwaccess_lock); |
---|
142 | 147 | |
---|
143 | 148 | for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { |
---|
| 149 | + int i; |
---|
| 150 | + |
---|
144 | 151 | for (i = 0; i < SLOT_RB_SIZE; i++) { |
---|
145 | 152 | struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); |
---|
146 | 153 | |
---|
.. | .. |
---|
151 | 158 | return false; |
---|
152 | 159 | } |
---|
153 | 160 | |
---|
154 | | -int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js) |
---|
| 161 | +int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js) |
---|
155 | 162 | { |
---|
156 | 163 | int nr = 0; |
---|
157 | 164 | int i; |
---|
.. | .. |
---|
169 | 176 | return nr; |
---|
170 | 177 | } |
---|
171 | 178 | |
---|
172 | | -int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js) |
---|
| 179 | +int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js) |
---|
173 | 180 | { |
---|
174 | 181 | int nr = 0; |
---|
175 | 182 | int i; |
---|
.. | .. |
---|
184 | 191 | return nr; |
---|
185 | 192 | } |
---|
186 | 193 | |
---|
187 | | -static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js, |
---|
188 | | - enum kbase_atom_gpu_rb_state min_rb_state) |
---|
| 194 | +static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, unsigned int js, |
---|
| 195 | + enum kbase_atom_gpu_rb_state min_rb_state) |
---|
189 | 196 | { |
---|
190 | 197 | int nr = 0; |
---|
191 | 198 | int i; |
---|
.. | .. |
---|
235 | 242 | static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, |
---|
236 | 243 | bool secure) |
---|
237 | 244 | { |
---|
238 | | - int js, i; |
---|
| 245 | + unsigned int js; |
---|
239 | 246 | |
---|
240 | 247 | for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { |
---|
| 248 | + int i; |
---|
| 249 | + |
---|
241 | 250 | for (i = 0; i < SLOT_RB_SIZE; i++) { |
---|
242 | 251 | struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, |
---|
243 | 252 | js, i); |
---|
.. | .. |
---|
252 | 261 | return false; |
---|
253 | 262 | } |
---|
254 | 263 | |
---|
255 | | -int kbase_backend_slot_free(struct kbase_device *kbdev, int js) |
---|
| 264 | +int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js) |
---|
256 | 265 | { |
---|
257 | 266 | lockdep_assert_held(&kbdev->hwaccess_lock); |
---|
258 | 267 | |
---|
.. | .. |
---|
304 | 313 | [katom->slot_nr]); |
---|
305 | 314 | |
---|
306 | 315 | /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ |
---|
307 | | - |
---|
| 316 | + fallthrough; |
---|
308 | 317 | case KBASE_ATOM_GPU_RB_READY: |
---|
309 | 318 | /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ |
---|
310 | | - |
---|
| 319 | + fallthrough; |
---|
311 | 320 | case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: |
---|
312 | 321 | break; |
---|
313 | 322 | |
---|
.. | .. |
---|
338 | 347 | katom->protected_state.exit != |
---|
339 | 348 | KBASE_ATOM_EXIT_PROTECTED_CHECK) |
---|
340 | 349 | kbdev->protected_mode_transition = false; |
---|
| 350 | + |
---|
| 351 | + /* If the atom is at KBASE_ATOM_ENTER_PROTECTED_HWCNT state, it means |
---|
| 352 | + * one of two events prevented it from progressing to the next state and |
---|
| 353 | + * ultimately reach protected mode: |
---|
| 354 | + * - hwcnts were enabled, and the atom had to schedule a worker to |
---|
| 355 | + * disable them. |
---|
| 356 | + * - the hwcnts were already disabled, but some other error occurred. |
---|
| 357 | + * In the first case, if the worker has not yet completed |
---|
| 358 | + * (kbdev->protected_mode_hwcnt_disabled == false), we need to re-enable |
---|
| 359 | + * them and signal to the worker they have already been enabled |
---|
| 360 | + */ |
---|
| 361 | + if (kbase_jd_katom_is_protected(katom) && |
---|
| 362 | + (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_HWCNT)) { |
---|
| 363 | + kbdev->protected_mode_hwcnt_desired = true; |
---|
| 364 | + if (kbdev->protected_mode_hwcnt_disabled) { |
---|
| 365 | + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); |
---|
| 366 | + kbdev->protected_mode_hwcnt_disabled = false; |
---|
| 367 | + } |
---|
| 368 | + } |
---|
| 369 | + |
---|
341 | 370 | /* If the atom has suspended hwcnt but has not yet entered |
---|
342 | 371 | * protected mode, then resume hwcnt now. If the GPU is now in |
---|
343 | 372 | * protected mode then hwcnt will be resumed by GPU reset so |
---|
344 | 373 | * don't resume it here. |
---|
345 | 374 | */ |
---|
346 | 375 | if (kbase_jd_katom_is_protected(katom) && |
---|
347 | | - ((katom->protected_state.enter == |
---|
348 | | - KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) || |
---|
349 | | - (katom->protected_state.enter == |
---|
350 | | - KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY))) { |
---|
| 376 | + ((katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) || |
---|
| 377 | + (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY) || |
---|
| 378 | + (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_FINISHED))) { |
---|
351 | 379 | WARN_ON(!kbdev->protected_mode_hwcnt_disabled); |
---|
352 | 380 | kbdev->protected_mode_hwcnt_desired = true; |
---|
353 | 381 | if (kbdev->protected_mode_hwcnt_disabled) { |
---|
.. | .. |
---|
367 | 395 | } |
---|
368 | 396 | |
---|
369 | 397 | /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ |
---|
370 | | - |
---|
| 398 | + fallthrough; |
---|
371 | 399 | case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: |
---|
372 | 400 | /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ |
---|
373 | | - |
---|
| 401 | + fallthrough; |
---|
374 | 402 | case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: |
---|
375 | 403 | /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ |
---|
376 | | - |
---|
| 404 | + fallthrough; |
---|
377 | 405 | case KBASE_ATOM_GPU_RB_RETURN_TO_JS: |
---|
378 | 406 | break; |
---|
379 | 407 | } |
---|
.. | .. |
---|
387 | 415 | { |
---|
388 | 416 | lockdep_assert_held(&kbdev->hwaccess_lock); |
---|
389 | 417 | |
---|
| 418 | + KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_MARK_FOR_RETURN_TO_JS, |
---|
| 419 | + katom->kctx, katom, katom->jc, |
---|
| 420 | + katom->slot_nr, katom->event_code); |
---|
390 | 421 | kbase_gpu_release_atom(kbdev, katom, NULL); |
---|
391 | 422 | katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS; |
---|
392 | 423 | } |
---|
.. | .. |
---|
399 | 430 | * |
---|
400 | 431 | * Return: true if any slots other than @js are busy, false otherwise |
---|
401 | 432 | */ |
---|
402 | | -static inline bool other_slots_busy(struct kbase_device *kbdev, int js) |
---|
| 433 | +static inline bool other_slots_busy(struct kbase_device *kbdev, unsigned int js) |
---|
403 | 434 | { |
---|
404 | | - int slot; |
---|
| 435 | + unsigned int slot; |
---|
405 | 436 | |
---|
406 | 437 | for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) { |
---|
407 | 438 | if (slot == js) |
---|
.. | .. |
---|
495 | 526 | KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev); |
---|
496 | 527 | if (err) { |
---|
497 | 528 | /* |
---|
498 | | - * Failed to switch into protected mode, resume |
---|
499 | | - * GPU hwcnt and fail atom. |
---|
| 529 | + * Failed to switch into protected mode. |
---|
| 530 | + * |
---|
| 531 | + * At this point we expect: |
---|
| 532 | + * katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && |
---|
| 533 | + * katom->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_FINISHED |
---|
| 534 | + * ==> |
---|
| 535 | + * kbdev->protected_mode_hwcnt_disabled = false |
---|
500 | 536 | */ |
---|
501 | | - WARN_ON(!kbdev->protected_mode_hwcnt_disabled); |
---|
502 | | - kbdev->protected_mode_hwcnt_desired = true; |
---|
503 | | - if (kbdev->protected_mode_hwcnt_disabled) { |
---|
504 | | - kbase_hwcnt_context_enable( |
---|
505 | | - kbdev->hwcnt_gpu_ctx); |
---|
506 | | - kbdev->protected_mode_hwcnt_disabled = false; |
---|
507 | | - } |
---|
508 | | - |
---|
509 | 537 | katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; |
---|
510 | 538 | kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); |
---|
511 | 539 | /* |
---|
.. | .. |
---|
525 | 553 | /* |
---|
526 | 554 | * Protected mode sanity checks. |
---|
527 | 555 | */ |
---|
528 | | - KBASE_DEBUG_ASSERT_MSG( |
---|
529 | | - kbase_jd_katom_is_protected(katom[idx]) == |
---|
530 | | - kbase_gpu_in_protected_mode(kbdev), |
---|
531 | | - "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", |
---|
532 | | - kbase_jd_katom_is_protected(katom[idx]), |
---|
533 | | - kbase_gpu_in_protected_mode(kbdev)); |
---|
| 556 | + WARN(kbase_jd_katom_is_protected(katom[idx]) != kbase_gpu_in_protected_mode(kbdev), |
---|
| 557 | + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", |
---|
| 558 | + kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev)); |
---|
534 | 559 | katom[idx]->gpu_rb_state = |
---|
535 | 560 | KBASE_ATOM_GPU_RB_READY; |
---|
536 | 561 | |
---|
.. | .. |
---|
564 | 589 | kbdev->protected_mode_transition = true; |
---|
565 | 590 | |
---|
566 | 591 | /* ***TRANSITION TO HIGHER STATE*** */ |
---|
567 | | - /* fallthrough */ |
---|
| 592 | + fallthrough; |
---|
568 | 593 | case KBASE_ATOM_ENTER_PROTECTED_HWCNT: |
---|
569 | 594 | /* See if we can get away with disabling hwcnt atomically */ |
---|
570 | 595 | kbdev->protected_mode_hwcnt_desired = false; |
---|
.. | .. |
---|
607 | 632 | kbase_pm_update_cores_state_nolock(kbdev); |
---|
608 | 633 | |
---|
609 | 634 | /* ***TRANSITION TO HIGHER STATE*** */ |
---|
610 | | - /* fallthrough */ |
---|
| 635 | + fallthrough; |
---|
611 | 636 | case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: |
---|
612 | 637 | /* Avoid unnecessary waiting on non-ACE platforms. */ |
---|
613 | 638 | if (kbdev->system_coherency == COHERENCY_ACE) { |
---|
.. | .. |
---|
638 | 663 | KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY; |
---|
639 | 664 | |
---|
640 | 665 | /* ***TRANSITION TO HIGHER STATE*** */ |
---|
641 | | - /* fallthrough */ |
---|
| 666 | + fallthrough; |
---|
642 | 667 | case KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: |
---|
643 | 668 | /* |
---|
644 | 669 | * When entering into protected mode, we must ensure that the |
---|
.. | .. |
---|
671 | 696 | return -EAGAIN; |
---|
672 | 697 | |
---|
673 | 698 | /* ***TRANSITION TO HIGHER STATE*** */ |
---|
674 | | - /* fallthrough */ |
---|
| 699 | + fallthrough; |
---|
675 | 700 | case KBASE_ATOM_ENTER_PROTECTED_FINISHED: |
---|
676 | 701 | if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { |
---|
677 | 702 | /* |
---|
.. | .. |
---|
742 | 767 | kbase_pm_update_cores_state_nolock(kbdev); |
---|
743 | 768 | |
---|
744 | 769 | /* ***TRANSITION TO HIGHER STATE*** */ |
---|
745 | | - /* fallthrough */ |
---|
| 770 | + fallthrough; |
---|
746 | 771 | case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: |
---|
747 | 772 | if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) { |
---|
748 | 773 | /* |
---|
.. | .. |
---|
755 | 780 | KBASE_ATOM_EXIT_PROTECTED_RESET; |
---|
756 | 781 | |
---|
757 | 782 | /* ***TRANSITION TO HIGHER STATE*** */ |
---|
758 | | - /* fallthrough */ |
---|
| 783 | + fallthrough; |
---|
759 | 784 | case KBASE_ATOM_EXIT_PROTECTED_RESET: |
---|
| 785 | + /* L2 cache has been turned off (which is needed prior to the reset of GPU |
---|
| 786 | + * to exit the protected mode), so the override flag can be safely cleared. |
---|
| 787 | + * Even if L2 cache is powered up again before the actual reset, it should |
---|
| 788 | + * not be an issue (there are no jobs running on the GPU). |
---|
| 789 | + */ |
---|
| 790 | + kbase_pm_protected_override_disable(kbdev); |
---|
| 791 | + |
---|
760 | 792 | /* Issue the reset to the GPU */ |
---|
761 | 793 | err = kbase_gpu_protected_mode_reset(kbdev); |
---|
762 | 794 | |
---|
.. | .. |
---|
765 | 797 | |
---|
766 | 798 | if (err) { |
---|
767 | 799 | kbdev->protected_mode_transition = false; |
---|
768 | | - kbase_pm_protected_override_disable(kbdev); |
---|
769 | 800 | |
---|
770 | 801 | /* Failed to exit protected mode, fail atom */ |
---|
771 | 802 | katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; |
---|
.. | .. |
---|
797 | 828 | KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; |
---|
798 | 829 | |
---|
799 | 830 | /* ***TRANSITION TO HIGHER STATE*** */ |
---|
800 | | - /* fallthrough */ |
---|
| 831 | + fallthrough; |
---|
801 | 832 | case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: |
---|
802 | 833 | /* A GPU reset is issued when exiting protected mode. Once the |
---|
803 | 834 | * reset is done all atoms' state will also be reset. For this |
---|
.. | .. |
---|
813 | 844 | |
---|
814 | 845 | void kbase_backend_slot_update(struct kbase_device *kbdev) |
---|
815 | 846 | { |
---|
816 | | - int js; |
---|
| 847 | + unsigned int js; |
---|
817 | 848 | |
---|
818 | 849 | lockdep_assert_held(&kbdev->hwaccess_lock); |
---|
819 | 850 | |
---|
.. | .. |
---|
854 | 885 | KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV; |
---|
855 | 886 | |
---|
856 | 887 | /* ***TRANSITION TO HIGHER STATE*** */ |
---|
857 | | - /* fallthrough */ |
---|
| 888 | + fallthrough; |
---|
858 | 889 | case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: |
---|
859 | 890 | if (kbase_gpu_check_secure_atoms(kbdev, |
---|
860 | 891 | !kbase_jd_katom_is_protected( |
---|
.. | .. |
---|
874 | 905 | KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION; |
---|
875 | 906 | |
---|
876 | 907 | /* ***TRANSITION TO HIGHER STATE*** */ |
---|
877 | | - /* fallthrough */ |
---|
| 908 | + fallthrough; |
---|
878 | 909 | case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: |
---|
879 | 910 | |
---|
880 | 911 | /* |
---|
.. | .. |
---|
909 | 940 | KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; |
---|
910 | 941 | |
---|
911 | 942 | /* ***TRANSITION TO HIGHER STATE*** */ |
---|
912 | | - /* fallthrough */ |
---|
| 943 | + fallthrough; |
---|
913 | 944 | case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: |
---|
914 | 945 | if (katom[idx]->will_fail_event_code) { |
---|
915 | 946 | kbase_gpu_mark_atom_for_return(kbdev, |
---|
.. | .. |
---|
934 | 965 | cores_ready = kbase_pm_cores_requested(kbdev, |
---|
935 | 966 | true); |
---|
936 | 967 | |
---|
937 | | - if (katom[idx]->event_code == |
---|
938 | | - BASE_JD_EVENT_PM_EVENT) { |
---|
939 | | - katom[idx]->gpu_rb_state = |
---|
940 | | - KBASE_ATOM_GPU_RB_RETURN_TO_JS; |
---|
941 | | - break; |
---|
942 | | - } |
---|
943 | | - |
---|
944 | 968 | if (!cores_ready) |
---|
945 | 969 | break; |
---|
946 | 970 | |
---|
.. | .. |
---|
948 | 972 | KBASE_ATOM_GPU_RB_READY; |
---|
949 | 973 | |
---|
950 | 974 | /* ***TRANSITION TO HIGHER STATE*** */ |
---|
951 | | - /* fallthrough */ |
---|
| 975 | + fallthrough; |
---|
952 | 976 | case KBASE_ATOM_GPU_RB_READY: |
---|
953 | 977 | |
---|
954 | 978 | if (idx == 1) { |
---|
.. | .. |
---|
977 | 1001 | other_slots_busy(kbdev, js)) |
---|
978 | 1002 | break; |
---|
979 | 1003 | |
---|
980 | | -#ifdef CONFIG_MALI_GEM5_BUILD |
---|
981 | | - if (!kbasep_jm_is_js_free(kbdev, js, |
---|
982 | | - katom[idx]->kctx)) |
---|
983 | | - break; |
---|
984 | | -#endif |
---|
985 | 1004 | /* Check if this job needs the cycle counter |
---|
986 | 1005 | * enabled before submission |
---|
987 | 1006 | */ |
---|
988 | 1007 | if (katom[idx]->core_req & BASE_JD_REQ_PERMON) |
---|
989 | | - kbase_pm_request_gpu_cycle_counter_l2_is_on( |
---|
990 | | - kbdev); |
---|
| 1008 | + kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev); |
---|
991 | 1009 | |
---|
992 | | - kbase_job_hw_submit(kbdev, katom[idx], js); |
---|
993 | | - katom[idx]->gpu_rb_state = |
---|
994 | | - KBASE_ATOM_GPU_RB_SUBMITTED; |
---|
| 1010 | + if (!kbase_job_hw_submit(kbdev, katom[idx], js)) { |
---|
| 1011 | + katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED; |
---|
| 1012 | + |
---|
| 1013 | + /* Inform power management at start/finish of |
---|
| 1014 | + * atom so it can update its GPU utilisation |
---|
| 1015 | + * metrics. |
---|
| 1016 | + */ |
---|
| 1017 | + kbase_pm_metrics_update(kbdev, |
---|
| 1018 | + &katom[idx]->start_timestamp); |
---|
| 1019 | + |
---|
| 1020 | + /* Inform platform at start/finish of atom */ |
---|
| 1021 | + kbasep_platform_event_atom_submit(katom[idx]); |
---|
| 1022 | + } else { |
---|
| 1023 | + if (katom[idx]->core_req & BASE_JD_REQ_PERMON) |
---|
| 1024 | + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); |
---|
| 1025 | + |
---|
| 1026 | + break; |
---|
| 1027 | + } |
---|
995 | 1028 | |
---|
996 | 1029 | /* ***TRANSITION TO HIGHER STATE*** */ |
---|
997 | | - /* fallthrough */ |
---|
| 1030 | + fallthrough; |
---|
998 | 1031 | case KBASE_ATOM_GPU_RB_SUBMITTED: |
---|
999 | | - |
---|
1000 | | - /* Inform power management at start/finish of |
---|
1001 | | - * atom so it can update its GPU utilisation |
---|
1002 | | - * metrics. |
---|
1003 | | - */ |
---|
1004 | | - kbase_pm_metrics_update(kbdev, |
---|
1005 | | - &katom[idx]->start_timestamp); |
---|
1006 | | - |
---|
1007 | | - /* Inform platform at start/finish of atom */ |
---|
1008 | | - kbasep_platform_event_atom_submit(katom[idx]); |
---|
1009 | | - |
---|
1010 | 1032 | break; |
---|
1011 | 1033 | |
---|
1012 | 1034 | case KBASE_ATOM_GPU_RB_RETURN_TO_JS: |
---|
.. | .. |
---|
1037 | 1059 | kbase_backend_slot_update(kbdev); |
---|
1038 | 1060 | } |
---|
1039 | 1061 | |
---|
1040 | | -#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \ |
---|
1041 | | - (KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER)) |
---|
| 1062 | +/** |
---|
| 1063 | + * kbase_rb_atom_might_depend - determine if one atom in the slot ringbuffer |
---|
| 1064 | + * might depend on another from the same kctx |
---|
| 1065 | + * @katom_a: dependee atom |
---|
| 1066 | + * @katom_b: atom to query |
---|
| 1067 | + * |
---|
| 1068 | + * This can be used on atoms that belong to different slot ringbuffers |
---|
| 1069 | + * |
---|
| 1070 | + * Return: true if @katom_b might depend on @katom_a, false if it cannot depend. |
---|
| 1071 | + */ |
---|
| 1072 | +static inline bool |
---|
| 1073 | +kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a, |
---|
| 1074 | + const struct kbase_jd_atom *katom_b) |
---|
| 1075 | +{ |
---|
| 1076 | + if (katom_a->kctx != katom_b->kctx) |
---|
| 1077 | + return false; |
---|
| 1078 | + return (katom_b->pre_dep || |
---|
| 1079 | + (katom_b->atom_flags & (KBASE_KATOM_FLAG_X_DEP_BLOCKED | |
---|
| 1080 | + KBASE_KATOM_FLAG_FAIL_BLOCKER))); |
---|
| 1081 | +} |
---|
1042 | 1082 | |
---|
1043 | | -bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, |
---|
1044 | | - u32 completion_code) |
---|
| 1083 | +/** |
---|
| 1084 | + * kbase_gpu_irq_evict - evict a slot's JSn_HEAD_NEXT atom from the HW if it is |
---|
| 1085 | + * related to a failed JSn_HEAD atom |
---|
| 1086 | + * @kbdev: kbase device |
---|
| 1087 | + * @js: job slot to check |
---|
| 1088 | + * @completion_code: completion code of the failed atom |
---|
| 1089 | + * |
---|
| 1090 | + * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but |
---|
| 1091 | + * unlike other failure codes we _can_ re-run them. |
---|
| 1092 | + * |
---|
| 1093 | + * This forms step 1 in a 2-step process of removing any related atoms from a |
---|
| 1094 | + * slot's JSn_HEAD_NEXT (ringbuffer index 1), should there have |
---|
| 1095 | + * been a 'failure' on an atom in JSn_HEAD (ringbuffer index 0). |
---|
| 1096 | + * |
---|
| 1097 | + * This step only removes the atoms from the HW, and marks them as |
---|
| 1098 | + * (potentially) ready to run again. |
---|
| 1099 | + * |
---|
| 1100 | + * Step 2 is on marking the JSn_HEAD atom as complete |
---|
| 1101 | + * (kbase_gpu_complete_hw()), to dequeue said atoms and return them to the JS |
---|
| 1102 | + * as appropriate, or re-submit them. |
---|
| 1103 | + * |
---|
| 1104 | + * Hence, this function must evict at a minimum the atoms related to the atom |
---|
| 1105 | + * in JSn_HEAD that kbase_gpu_complete_hw() will also dequeue. It is acceptable |
---|
| 1106 | + * if this function evicts more atoms than kbase_gpu_complete_hw() dequeues, as |
---|
| 1107 | + * the next kbase_backend_slot_update() will resubmit any remaining. |
---|
| 1108 | + * |
---|
| 1109 | + * Return: true if an atom was evicted, false otherwise. |
---|
| 1110 | + */ |
---|
| 1111 | +bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code) |
---|
1045 | 1112 | { |
---|
1046 | 1113 | struct kbase_jd_atom *katom; |
---|
1047 | 1114 | struct kbase_jd_atom *next_katom; |
---|
.. | .. |
---|
1049 | 1116 | lockdep_assert_held(&kbdev->hwaccess_lock); |
---|
1050 | 1117 | |
---|
1051 | 1118 | katom = kbase_gpu_inspect(kbdev, js, 0); |
---|
| 1119 | + if (!katom) { |
---|
| 1120 | + dev_err(kbdev->dev, "Can't get a katom from js(%u)\n", js); |
---|
| 1121 | + return false; |
---|
| 1122 | + } |
---|
1052 | 1123 | next_katom = kbase_gpu_inspect(kbdev, js, 1); |
---|
1053 | 1124 | |
---|
1054 | | - if (next_katom && katom->kctx == next_katom->kctx && |
---|
1055 | | - next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED && |
---|
1056 | | - (HAS_DEP(next_katom) || next_katom->sched_priority == |
---|
1057 | | - katom->sched_priority) && |
---|
1058 | | - (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO)) |
---|
1059 | | - != 0 || |
---|
1060 | | - kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) |
---|
1061 | | - != 0)) { |
---|
| 1125 | + if (next_katom && |
---|
| 1126 | + next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED && |
---|
| 1127 | + (kbase_rb_atom_might_depend(katom, next_katom) || |
---|
| 1128 | + kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) && |
---|
| 1129 | + (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO)) != 0 || |
---|
| 1130 | + kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) != 0)) { |
---|
1062 | 1131 | kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), |
---|
1063 | 1132 | JS_COMMAND_NOP); |
---|
1064 | 1133 | next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; |
---|
.. | .. |
---|
1077 | 1146 | if (next_katom->core_req & BASE_JD_REQ_PERMON) |
---|
1078 | 1147 | kbase_pm_release_gpu_cycle_counter_nolock(kbdev); |
---|
1079 | 1148 | |
---|
| 1149 | + /* On evicting the next_katom, the last submission kctx on the |
---|
| 1150 | + * given job slot then reverts back to the one that owns katom. |
---|
| 1151 | + * The aim is to enable the next submission that can determine |
---|
| 1152 | + * if the read only shader core L1 cache should be invalidated. |
---|
| 1153 | + */ |
---|
| 1154 | + kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = |
---|
| 1155 | + SLOT_RB_TAG_KCTX(katom->kctx); |
---|
| 1156 | + |
---|
1080 | 1157 | return true; |
---|
1081 | 1158 | } |
---|
1082 | 1159 | |
---|
1083 | 1160 | return false; |
---|
1084 | 1161 | } |
---|
1085 | 1162 | |
---|
1086 | | -void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, |
---|
1087 | | - u32 completion_code, |
---|
1088 | | - u64 job_tail, |
---|
1089 | | - ktime_t *end_timestamp) |
---|
| 1163 | +/** |
---|
| 1164 | + * kbase_gpu_complete_hw - complete the atom in a slot's JSn_HEAD |
---|
| 1165 | + * @kbdev: kbase device |
---|
| 1166 | + * @js: job slot to check |
---|
| 1167 | + * @completion_code: completion code of the completed atom |
---|
| 1168 | + * @job_tail: value read from JSn_TAIL, for STOPPED atoms |
---|
| 1169 | + * @end_timestamp: pointer to approximate ktime value when the katom completed |
---|
| 1170 | + * |
---|
| 1171 | + * Among other operations, this also executes step 2 of a 2-step process of |
---|
| 1172 | + * removing any related atoms from a slot's JSn_HEAD_NEXT (ringbuffer index 1), |
---|
| 1173 | + * should there have been a 'failure' on an atom in JSn_HEAD (ringbuffer index |
---|
| 1174 | + * 0). The first step is done in kbase_gpu_irq_evict(). |
---|
| 1175 | + * |
---|
| 1176 | + * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but |
---|
| 1177 | + * unlike other failure codes we _can_ re-run them. |
---|
| 1178 | + * |
---|
| 1179 | + * When the JSn_HEAD atom is considered to be 'failed', then this will dequeue |
---|
| 1180 | + * and return to the JS some (usually all) of the atoms evicted from the HW |
---|
| 1181 | + * during the kbase_gpu_irq_evict() for that JSn_HEAD atom. If it dequeues an |
---|
| 1182 | + * atom, that atom must not have been running or must already be evicted, as |
---|
| 1183 | + * otherwise we would be in the incorrect state of having an atom both running |
---|
| 1184 | + * on the HW and returned to the JS. |
---|
| 1185 | + */ |
---|
| 1186 | + |
---|
| 1187 | +void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code, |
---|
| 1188 | + u64 job_tail, ktime_t *end_timestamp) |
---|
1090 | 1189 | { |
---|
1091 | 1190 | struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); |
---|
1092 | | - struct kbase_context *kctx = katom->kctx; |
---|
| 1191 | + struct kbase_context *kctx = NULL; |
---|
| 1192 | + |
---|
| 1193 | + if (unlikely(!katom)) { |
---|
| 1194 | + dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js); |
---|
| 1195 | + return; |
---|
| 1196 | + } |
---|
| 1197 | + |
---|
| 1198 | + kctx = katom->kctx; |
---|
1093 | 1199 | |
---|
1094 | 1200 | dev_dbg(kbdev->dev, |
---|
1095 | 1201 | "Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", |
---|
.. | .. |
---|
1133 | 1239 | * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that |
---|
1134 | 1240 | * the atoms on this slot are returned in the correct order. |
---|
1135 | 1241 | */ |
---|
1136 | | - if (next_katom && katom->kctx == next_katom->kctx && |
---|
1137 | | - next_katom->sched_priority == |
---|
1138 | | - katom->sched_priority) { |
---|
| 1242 | + if (next_katom && |
---|
| 1243 | + kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) { |
---|
1139 | 1244 | WARN_ON(next_katom->gpu_rb_state == |
---|
1140 | 1245 | KBASE_ATOM_GPU_RB_SUBMITTED); |
---|
1141 | 1246 | kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); |
---|
.. | .. |
---|
1143 | 1248 | } |
---|
1144 | 1249 | } else if (completion_code != BASE_JD_EVENT_DONE) { |
---|
1145 | 1250 | struct kbasep_js_device_data *js_devdata = &kbdev->js_data; |
---|
1146 | | - int i; |
---|
| 1251 | + unsigned int i; |
---|
1147 | 1252 | |
---|
1148 | | - if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) |
---|
| 1253 | + if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) { |
---|
1149 | 1254 | dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", |
---|
1150 | 1255 | js, completion_code, |
---|
1151 | 1256 | kbase_gpu_exception_name( |
---|
1152 | 1257 | completion_code)); |
---|
| 1258 | + |
---|
| 1259 | + } |
---|
1153 | 1260 | |
---|
1154 | 1261 | #if KBASE_KTRACE_DUMP_ON_JOB_SLOT_ERROR != 0 |
---|
1155 | 1262 | KBASE_KTRACE_DUMP(kbdev); |
---|
.. | .. |
---|
1168 | 1275 | struct kbase_jd_atom *katom_idx1 = |
---|
1169 | 1276 | kbase_gpu_inspect(kbdev, i, 1); |
---|
1170 | 1277 | |
---|
1171 | | - if (katom_idx0 && katom_idx0->kctx == katom->kctx && |
---|
1172 | | - HAS_DEP(katom_idx0) && |
---|
1173 | | - katom_idx0->gpu_rb_state != |
---|
1174 | | - KBASE_ATOM_GPU_RB_SUBMITTED) { |
---|
| 1278 | + if (katom_idx0 && |
---|
| 1279 | + kbase_rb_atom_might_depend(katom, katom_idx0) && |
---|
| 1280 | + katom_idx0->gpu_rb_state != |
---|
| 1281 | + KBASE_ATOM_GPU_RB_SUBMITTED) { |
---|
1175 | 1282 | /* Dequeue katom_idx0 from ringbuffer */ |
---|
1176 | 1283 | kbase_gpu_dequeue_atom(kbdev, i, end_timestamp); |
---|
1177 | 1284 | |
---|
1178 | | - if (katom_idx1 && |
---|
1179 | | - katom_idx1->kctx == katom->kctx |
---|
1180 | | - && HAS_DEP(katom_idx1) && |
---|
1181 | | - katom_idx0->gpu_rb_state != |
---|
1182 | | - KBASE_ATOM_GPU_RB_SUBMITTED) { |
---|
| 1285 | + if (katom_idx1 && kbase_rb_atom_might_depend( |
---|
| 1286 | + katom, katom_idx1) && |
---|
| 1287 | + katom_idx0->gpu_rb_state != |
---|
| 1288 | + KBASE_ATOM_GPU_RB_SUBMITTED) { |
---|
1183 | 1289 | /* Dequeue katom_idx1 from ringbuffer */ |
---|
1184 | 1290 | kbase_gpu_dequeue_atom(kbdev, i, |
---|
1185 | 1291 | end_timestamp); |
---|
.. | .. |
---|
1192 | 1298 | katom_idx0->event_code = BASE_JD_EVENT_STOPPED; |
---|
1193 | 1299 | kbase_jm_return_atom_to_js(kbdev, katom_idx0); |
---|
1194 | 1300 | |
---|
1195 | | - } else if (katom_idx1 && |
---|
1196 | | - katom_idx1->kctx == katom->kctx && |
---|
1197 | | - HAS_DEP(katom_idx1) && |
---|
1198 | | - katom_idx1->gpu_rb_state != |
---|
1199 | | - KBASE_ATOM_GPU_RB_SUBMITTED) { |
---|
| 1301 | + } else if (katom_idx1 && kbase_rb_atom_might_depend( |
---|
| 1302 | + katom, katom_idx1) && |
---|
| 1303 | + katom_idx1->gpu_rb_state != |
---|
| 1304 | + KBASE_ATOM_GPU_RB_SUBMITTED) { |
---|
1200 | 1305 | /* Can not dequeue this atom yet - will be |
---|
1201 | 1306 | * dequeued when atom at idx0 completes |
---|
1202 | 1307 | */ |
---|
.. | .. |
---|
1248 | 1353 | ktime_to_ns(*end_timestamp), |
---|
1249 | 1354 | (u32)next_katom->kctx->id, 0, |
---|
1250 | 1355 | next_katom->work_id); |
---|
1251 | | - kbdev->hwaccess.backend.slot_rb[js].last_context = |
---|
1252 | | - next_katom->kctx; |
---|
1253 | 1356 | } else { |
---|
1254 | 1357 | char js_string[16]; |
---|
1255 | 1358 | |
---|
1256 | | - trace_gpu_sched_switch(kbasep_make_job_slot_string(js, |
---|
1257 | | - js_string, |
---|
1258 | | - sizeof(js_string)), |
---|
1259 | | - ktime_to_ns(ktime_get()), 0, 0, |
---|
1260 | | - 0); |
---|
1261 | | - kbdev->hwaccess.backend.slot_rb[js].last_context = 0; |
---|
| 1359 | + trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string, |
---|
| 1360 | + sizeof(js_string)), |
---|
| 1361 | + ktime_to_ns(ktime_get_raw()), 0, 0, 0); |
---|
1262 | 1362 | } |
---|
1263 | 1363 | } |
---|
1264 | 1364 | #endif |
---|
.. | .. |
---|
1293 | 1393 | |
---|
1294 | 1394 | void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) |
---|
1295 | 1395 | { |
---|
1296 | | - int js; |
---|
| 1396 | + unsigned int js; |
---|
1297 | 1397 | |
---|
1298 | 1398 | lockdep_assert_held(&kbdev->hwaccess_lock); |
---|
1299 | 1399 | |
---|
.. | .. |
---|
1314 | 1414 | if (katom->protected_state.exit == |
---|
1315 | 1415 | KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) { |
---|
1316 | 1416 | /* protected mode sanity checks */ |
---|
1317 | | - KBASE_DEBUG_ASSERT_MSG( |
---|
1318 | | - kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev), |
---|
1319 | | - "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", |
---|
1320 | | - kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev)); |
---|
1321 | | - KBASE_DEBUG_ASSERT_MSG( |
---|
1322 | | - (kbase_jd_katom_is_protected(katom) && js == 0) || |
---|
1323 | | - !kbase_jd_katom_is_protected(katom), |
---|
1324 | | - "Protected atom on JS%d not supported", js); |
---|
| 1417 | + WARN(kbase_jd_katom_is_protected(katom) != |
---|
| 1418 | + kbase_gpu_in_protected_mode(kbdev), |
---|
| 1419 | + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", |
---|
| 1420 | + kbase_jd_katom_is_protected(katom), |
---|
| 1421 | + kbase_gpu_in_protected_mode(kbdev)); |
---|
| 1422 | + WARN(!(kbase_jd_katom_is_protected(katom) && js == 0) && |
---|
| 1423 | + kbase_jd_katom_is_protected(katom), |
---|
| 1424 | + "Protected atom on JS%u not supported", js); |
---|
1325 | 1425 | } |
---|
1326 | 1426 | if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) && |
---|
1327 | 1427 | !kbase_ctx_flag(katom->kctx, KCTX_DYING)) |
---|
.. | .. |
---|
1352 | 1452 | katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; |
---|
1353 | 1453 | kbase_jm_complete(kbdev, katom, end_timestamp); |
---|
1354 | 1454 | } |
---|
| 1455 | + |
---|
| 1456 | + /* Clear the slot's last katom submission kctx on reset */ |
---|
| 1457 | + kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_NULL_TAG_VAL; |
---|
1355 | 1458 | } |
---|
1356 | 1459 | |
---|
1357 | 1460 | /* Re-enable GPU hardware counters if we're resetting from protected |
---|
.. | .. |
---|
1369 | 1472 | kbase_pm_protected_override_disable(kbdev); |
---|
1370 | 1473 | } |
---|
1371 | 1474 | |
---|
1372 | | -static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, |
---|
1373 | | - int js, |
---|
1374 | | - struct kbase_jd_atom *katom, |
---|
1375 | | - u32 action) |
---|
| 1475 | +/** |
---|
| 1476 | + * should_stop_next_atom - given a soft/hard stop action, determine if the next |
---|
| 1477 | + * atom on a slot should be stopped |
---|
| 1478 | + * @kbdev: kbase devices |
---|
| 1479 | + * @head_katom: atom currently in the JSn_HEAD |
---|
| 1480 | + * @next_katom: atom currently in the JSn_HEAD_NEXT |
---|
| 1481 | + * @action: JS_COMMAND_<...> action for soft/hard-stop |
---|
| 1482 | + * |
---|
| 1483 | + * This is used in cases where @head_katom is the target of the soft/hard-stop. |
---|
| 1484 | + * It only makes sense to call this when @head_katom and @next_katom are from |
---|
| 1485 | + * the same slot. |
---|
| 1486 | + * |
---|
| 1487 | + * Return: true if @next_katom should also be stopped with the given action, |
---|
| 1488 | + * false otherwise |
---|
| 1489 | + */ |
---|
| 1490 | +static bool should_stop_next_atom(struct kbase_device *kbdev, |
---|
| 1491 | + const struct kbase_jd_atom *head_katom, |
---|
| 1492 | + const struct kbase_jd_atom *next_katom, |
---|
| 1493 | + u32 action) |
---|
1376 | 1494 | { |
---|
| 1495 | + bool ret = false; |
---|
| 1496 | + u32 hw_action = action & JS_COMMAND_MASK; |
---|
| 1497 | + |
---|
| 1498 | + switch (hw_action) { |
---|
| 1499 | + case JS_COMMAND_SOFT_STOP: |
---|
| 1500 | + ret = kbase_js_atom_runs_before(kbdev, head_katom, next_katom, |
---|
| 1501 | + 0u); |
---|
| 1502 | + break; |
---|
| 1503 | + case JS_COMMAND_HARD_STOP: |
---|
| 1504 | + /* Unlike soft-stop, a hard-stop targeting a particular atom |
---|
| 1505 | + * should not cause atoms from unrelated contexts to be |
---|
| 1506 | + * removed |
---|
| 1507 | + */ |
---|
| 1508 | + ret = (head_katom->kctx == next_katom->kctx); |
---|
| 1509 | + break; |
---|
| 1510 | + default: |
---|
| 1511 | + /* Other stop actions are possible, but the driver should not |
---|
| 1512 | + * be generating them at this point in the call chain |
---|
| 1513 | + */ |
---|
| 1514 | + WARN(1, "Unexpected stop action: 0x%.8x", hw_action); |
---|
| 1515 | + break; |
---|
| 1516 | + } |
---|
| 1517 | + return ret; |
---|
| 1518 | +} |
---|
| 1519 | + |
---|
| 1520 | +static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, unsigned int js, |
---|
| 1521 | + struct kbase_jd_atom *katom, u32 action) |
---|
| 1522 | +{ |
---|
| 1523 | + struct kbase_context *kctx = katom->kctx; |
---|
1377 | 1524 | u32 hw_action = action & JS_COMMAND_MASK; |
---|
1378 | 1525 | |
---|
1379 | 1526 | kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom); |
---|
1380 | 1527 | kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action, |
---|
1381 | 1528 | katom->core_req, katom); |
---|
1382 | | - katom->kctx->blocked_js[js][katom->sched_priority] = true; |
---|
| 1529 | + kbase_jsctx_slot_prio_blocked_set(kctx, js, katom->sched_priority); |
---|
1383 | 1530 | } |
---|
1384 | 1531 | |
---|
1385 | 1532 | static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, |
---|
.. | .. |
---|
1387 | 1534 | u32 action, |
---|
1388 | 1535 | bool disjoint) |
---|
1389 | 1536 | { |
---|
| 1537 | + struct kbase_context *kctx = katom->kctx; |
---|
| 1538 | + |
---|
1390 | 1539 | lockdep_assert_held(&kbdev->hwaccess_lock); |
---|
1391 | 1540 | |
---|
1392 | 1541 | katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; |
---|
1393 | 1542 | kbase_gpu_mark_atom_for_return(kbdev, katom); |
---|
1394 | | - katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true; |
---|
| 1543 | + kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr, |
---|
| 1544 | + katom->sched_priority); |
---|
1395 | 1545 | |
---|
1396 | 1546 | if (disjoint) |
---|
1397 | 1547 | kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, |
---|
.. | .. |
---|
1412 | 1562 | return -1; |
---|
1413 | 1563 | } |
---|
1414 | 1564 | |
---|
1415 | | -bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, |
---|
1416 | | - struct kbase_context *kctx, |
---|
1417 | | - int js, |
---|
1418 | | - struct kbase_jd_atom *katom, |
---|
1419 | | - u32 action) |
---|
| 1565 | +bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx, |
---|
| 1566 | + unsigned int js, struct kbase_jd_atom *katom, u32 action) |
---|
1420 | 1567 | { |
---|
1421 | 1568 | struct kbase_jd_atom *katom_idx0; |
---|
| 1569 | + struct kbase_context *kctx_idx0 = NULL; |
---|
1422 | 1570 | struct kbase_jd_atom *katom_idx1; |
---|
| 1571 | + struct kbase_context *kctx_idx1 = NULL; |
---|
1423 | 1572 | |
---|
1424 | 1573 | bool katom_idx0_valid, katom_idx1_valid; |
---|
1425 | 1574 | |
---|
.. | .. |
---|
1433 | 1582 | katom_idx0 = kbase_gpu_inspect(kbdev, js, 0); |
---|
1434 | 1583 | katom_idx1 = kbase_gpu_inspect(kbdev, js, 1); |
---|
1435 | 1584 | |
---|
1436 | | - if (katom_idx0) |
---|
| 1585 | + if (katom_idx0) { |
---|
| 1586 | + kctx_idx0 = katom_idx0->kctx; |
---|
1437 | 1587 | prio_idx0 = katom_idx0->sched_priority; |
---|
1438 | | - if (katom_idx1) |
---|
| 1588 | + } |
---|
| 1589 | + if (katom_idx1) { |
---|
| 1590 | + kctx_idx1 = katom_idx1->kctx; |
---|
1439 | 1591 | prio_idx1 = katom_idx1->sched_priority; |
---|
| 1592 | + } |
---|
1440 | 1593 | |
---|
1441 | 1594 | if (katom) { |
---|
1442 | 1595 | katom_idx0_valid = (katom_idx0 == katom); |
---|
1443 | | - /* If idx0 is to be removed and idx1 is on the same context, |
---|
1444 | | - * then idx1 must also be removed otherwise the atoms might be |
---|
1445 | | - * returned out of order |
---|
1446 | | - */ |
---|
1447 | 1596 | if (katom_idx1) |
---|
1448 | | - katom_idx1_valid = (katom_idx1 == katom) || |
---|
1449 | | - (katom_idx0_valid && |
---|
1450 | | - (katom_idx0->kctx == |
---|
1451 | | - katom_idx1->kctx)); |
---|
| 1597 | + katom_idx1_valid = (katom_idx1 == katom); |
---|
1452 | 1598 | else |
---|
1453 | 1599 | katom_idx1_valid = false; |
---|
1454 | 1600 | } else { |
---|
1455 | | - katom_idx0_valid = (katom_idx0 && |
---|
1456 | | - (!kctx || katom_idx0->kctx == kctx)); |
---|
1457 | | - katom_idx1_valid = (katom_idx1 && |
---|
1458 | | - (!kctx || katom_idx1->kctx == kctx) && |
---|
1459 | | - prio_idx0 == prio_idx1); |
---|
| 1601 | + katom_idx0_valid = (katom_idx0 && (!kctx || kctx_idx0 == kctx)); |
---|
| 1602 | + katom_idx1_valid = (katom_idx1 && (!kctx || kctx_idx1 == kctx)); |
---|
1460 | 1603 | } |
---|
| 1604 | + /* If there's an atom in JSn_HEAD_NEXT that we haven't already decided |
---|
| 1605 | + * to stop, but we're stopping the JSn_HEAD atom, see if they are |
---|
| 1606 | + * related/ordered in some way that would require the same stop action |
---|
| 1607 | + */ |
---|
| 1608 | + if (!katom_idx1_valid && katom_idx0_valid && katom_idx1) |
---|
| 1609 | + katom_idx1_valid = should_stop_next_atom(kbdev, katom_idx0, |
---|
| 1610 | + katom_idx1, action); |
---|
1461 | 1611 | |
---|
1462 | 1612 | if (katom_idx0_valid) |
---|
1463 | 1613 | stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0); |
---|
.. | .. |
---|
1473 | 1623 | katom_idx1->event_code = |
---|
1474 | 1624 | BASE_JD_EVENT_REMOVED_FROM_NEXT; |
---|
1475 | 1625 | kbase_jm_return_atom_to_js(kbdev, katom_idx1); |
---|
1476 | | - katom_idx1->kctx->blocked_js[js][prio_idx1] = |
---|
1477 | | - true; |
---|
| 1626 | + kbase_jsctx_slot_prio_blocked_set(kctx_idx1, js, |
---|
| 1627 | + prio_idx1); |
---|
1478 | 1628 | } |
---|
1479 | 1629 | |
---|
1480 | 1630 | katom_idx0->event_code = |
---|
1481 | 1631 | BASE_JD_EVENT_REMOVED_FROM_NEXT; |
---|
1482 | 1632 | kbase_jm_return_atom_to_js(kbdev, katom_idx0); |
---|
1483 | | - katom_idx0->kctx->blocked_js[js][prio_idx0] = true; |
---|
| 1633 | + kbase_jsctx_slot_prio_blocked_set(kctx_idx0, js, |
---|
| 1634 | + prio_idx0); |
---|
1484 | 1635 | } else { |
---|
1485 | 1636 | /* katom_idx0 is on GPU */ |
---|
1486 | 1637 | if (katom_idx1_valid && katom_idx1->gpu_rb_state == |
---|
.. | .. |
---|
1521 | 1672 | kbase_gpu_remove_atom(kbdev, |
---|
1522 | 1673 | katom_idx1, |
---|
1523 | 1674 | action, true); |
---|
| 1675 | + /* Revert the last_context. */ |
---|
| 1676 | + kbdev->hwaccess.backend.slot_rb[js] |
---|
| 1677 | + .last_kctx_tagged = |
---|
| 1678 | + SLOT_RB_TAG_KCTX(katom_idx0->kctx); |
---|
| 1679 | + |
---|
1524 | 1680 | stop_x_dep_idx1 = |
---|
1525 | 1681 | should_stop_x_dep_slot(katom_idx1); |
---|
1526 | 1682 | |
---|
.. | .. |
---|
1596 | 1752 | kbase_gpu_remove_atom(kbdev, katom_idx1, |
---|
1597 | 1753 | action, |
---|
1598 | 1754 | false); |
---|
| 1755 | + /* Revert the last_context, or mark as purged */ |
---|
| 1756 | + kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = |
---|
| 1757 | + kctx_idx0 ? SLOT_RB_TAG_KCTX(katom_idx0->kctx) : |
---|
| 1758 | + SLOT_RB_TAG_PURGED; |
---|
1599 | 1759 | } else { |
---|
1600 | 1760 | /* idx0 has already completed - stop |
---|
1601 | 1761 | * idx1 |
---|
.. | .. |
---|
1625 | 1785 | struct kbase_jd_atom *katom) |
---|
1626 | 1786 | { |
---|
1627 | 1787 | if (katom->need_cache_flush_cores_retained) { |
---|
1628 | | - kbase_gpu_start_cache_clean(kbdev); |
---|
| 1788 | + kbase_gpu_start_cache_clean(kbdev, |
---|
| 1789 | + GPU_COMMAND_CACHE_CLN_INV_FULL); |
---|
1629 | 1790 | kbase_gpu_wait_cache_clean(kbdev); |
---|
1630 | 1791 | |
---|
1631 | 1792 | katom->need_cache_flush_cores_retained = false; |
---|
.. | .. |
---|
1646 | 1807 | base_jd_core_req core_req) |
---|
1647 | 1808 | { |
---|
1648 | 1809 | if (!kbdev->pm.active_count) { |
---|
1649 | | - mutex_lock(&kbdev->js_data.runpool_mutex); |
---|
1650 | | - mutex_lock(&kbdev->pm.lock); |
---|
| 1810 | + kbase_pm_lock(kbdev); |
---|
1651 | 1811 | kbase_pm_update_active(kbdev); |
---|
1652 | | - mutex_unlock(&kbdev->pm.lock); |
---|
1653 | | - mutex_unlock(&kbdev->js_data.runpool_mutex); |
---|
| 1812 | + kbase_pm_unlock(kbdev); |
---|
1654 | 1813 | } |
---|
1655 | 1814 | } |
---|
1656 | 1815 | |
---|
1657 | 1816 | void kbase_gpu_dump_slots(struct kbase_device *kbdev) |
---|
1658 | 1817 | { |
---|
1659 | 1818 | unsigned long flags; |
---|
1660 | | - int js; |
---|
| 1819 | + unsigned int js; |
---|
1661 | 1820 | |
---|
1662 | 1821 | spin_lock_irqsave(&kbdev->hwaccess_lock, flags); |
---|
1663 | 1822 | |
---|
1664 | | - dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n"); |
---|
| 1823 | + dev_info(kbdev->dev, "%s:\n", __func__); |
---|
1665 | 1824 | |
---|
1666 | 1825 | for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { |
---|
1667 | 1826 | int idx; |
---|
.. | .. |
---|
1672 | 1831 | idx); |
---|
1673 | 1832 | |
---|
1674 | 1833 | if (katom) |
---|
1675 | | - dev_info(kbdev->dev, |
---|
1676 | | - " js%d idx%d : katom=%pK gpu_rb_state=%d\n", |
---|
1677 | | - js, idx, katom, katom->gpu_rb_state); |
---|
| 1834 | + dev_info(kbdev->dev, " js%u idx%d : katom=%pK gpu_rb_state=%d\n", |
---|
| 1835 | + js, idx, katom, katom->gpu_rb_state); |
---|
1678 | 1836 | else |
---|
1679 | | - dev_info(kbdev->dev, " js%d idx%d : empty\n", |
---|
1680 | | - js, idx); |
---|
| 1837 | + dev_info(kbdev->dev, " js%u idx%d : empty\n", js, idx); |
---|
1681 | 1838 | } |
---|
1682 | 1839 | } |
---|
1683 | 1840 | |
---|
1684 | 1841 | spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); |
---|
1685 | 1842 | } |
---|
| 1843 | + |
---|
| 1844 | +void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx) |
---|
| 1845 | +{ |
---|
| 1846 | + unsigned int js; |
---|
| 1847 | + bool tracked = false; |
---|
| 1848 | + |
---|
| 1849 | + lockdep_assert_held(&kbdev->hwaccess_lock); |
---|
| 1850 | + |
---|
| 1851 | + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { |
---|
| 1852 | + u64 tagged_kctx = kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged; |
---|
| 1853 | + |
---|
| 1854 | + if (tagged_kctx == SLOT_RB_TAG_KCTX(kctx)) { |
---|
| 1855 | + /* Marking the slot kctx tracking field is purged */ |
---|
| 1856 | + kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_TAG_PURGED; |
---|
| 1857 | + tracked = true; |
---|
| 1858 | + } |
---|
| 1859 | + } |
---|
| 1860 | + |
---|
| 1861 | + if (tracked) { |
---|
| 1862 | + /* The context had run some jobs before the purge, other slots |
---|
| 1863 | + * in SLOT_RB_NULL_TAG_VAL condition needs to be marked as |
---|
| 1864 | + * purged as well. |
---|
| 1865 | + */ |
---|
| 1866 | + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { |
---|
| 1867 | + if (kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged == |
---|
| 1868 | + SLOT_RB_NULL_TAG_VAL) |
---|
| 1869 | + kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = |
---|
| 1870 | + SLOT_RB_TAG_PURGED; |
---|
| 1871 | + } |
---|
| 1872 | + } |
---|
| 1873 | +} |
---|