~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,7 +1,7 @@
1	1	// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2	2	/*
3	3	*
4		- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
	4	+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
5	5	*
6	6	* This program is free software and is provided to you under the terms of the
7	7	* GNU General Public License version 2 as published by the Free Software
..	..	@@ -29,7 +29,7 @@
29	29	#include <mali_kbase_jm.h>
30	30	#include <mali_kbase_js.h>
31	31	#include <tl/mali_kbase_tracepoints.h>
32		-#include <mali_kbase_hwcnt_context.h>
	32	+#include <hwcnt/mali_kbase_hwcnt_context.h>
33	33	#include <mali_kbase_reset_gpu.h>
34	34	#include <mali_kbase_kinstr_jm.h>
35	35	#include <backend/gpu/mali_kbase_cache_policy_backend.h>
..	..	@@ -37,14 +37,23 @@
37	37	#include <backend/gpu/mali_kbase_jm_internal.h>
38	38	#include <backend/gpu/mali_kbase_pm_internal.h>
39	39
40		-/* Return whether the specified ringbuffer is empty. HW access lock must be
41		- * held
	40	+/**
	41	+ * SLOT_RB_EMPTY - Return whether the specified ringbuffer is empty.
	42	+ *
	43	+ * @rb: ring buffer
	44	+ *
	45	+ * Note: HW access lock must be held
42	46	*/
43	47	#define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx)
44		-/* Return number of atoms currently in the specified ringbuffer. HW access lock
45		- * must be held
	48	+
	49	+/**
	50	+ * SLOT_RB_ENTRIES - Return number of atoms currently in the specified ringbuffer.
	51	+ *
	52	+ * @rb: ring buffer
	53	+ *
	54	+ * Note: HW access lock must be held
46	55	*/
47		-#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
	56	+#define SLOT_RB_ENTRIES(rb) ((int)(s8)(rb->write_idx - rb->read_idx))
48	57
49	58	static void kbase_gpu_release_atom(struct kbase_device *kbdev,
50	59	struct kbase_jd_atom *katom,
..	..	@@ -84,9 +93,8 @@
84	93	*
85	94	* Return: Atom removed from ringbuffer
86	95	*/
87		-static struct kbase_jd_atom kbase_gpu_dequeue_atom(struct kbase_device kbdev,
88		- int js,
89		- ktime_t *end_timestamp)
	96	+static struct kbase_jd_atom kbase_gpu_dequeue_atom(struct kbase_device kbdev, unsigned int js,
	97	+ ktime_t *end_timestamp)
90	98	{
91	99	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
92	100	struct kbase_jd_atom *katom;
..	..	@@ -109,8 +117,7 @@
109	117	return katom;
110	118	}
111	119
112		-struct kbase_jd_atom kbase_gpu_inspect(struct kbase_device kbdev, int js,
113		- int idx)
	120	+struct kbase_jd_atom kbase_gpu_inspect(struct kbase_device kbdev, unsigned int js, int idx)
114	121	{
115	122	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
116	123
..	..	@@ -122,8 +129,7 @@
122	129	return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
123	130	}
124	131
125		-struct kbase_jd_atom kbase_backend_inspect_tail(struct kbase_device kbdev,
126		- int js)
	132	+struct kbase_jd_atom kbase_backend_inspect_tail(struct kbase_device kbdev, unsigned int js)
127	133	{
128	134	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
129	135
..	..	@@ -135,12 +141,13 @@
135	141
136	142	bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
137	143	{
138		- int js;
139		- int i;
	144	+ unsigned int js;
140	145
141	146	lockdep_assert_held(&kbdev->hwaccess_lock);
142	147
143	148	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
	149	+ int i;
	150	+
144	151	for (i = 0; i < SLOT_RB_SIZE; i++) {
145	152	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
146	153
..	..	@@ -151,7 +158,7 @@
151	158	return false;
152	159	}
153	160
154		-int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
	161	+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js)
155	162	{
156	163	int nr = 0;
157	164	int i;
..	..	@@ -169,7 +176,7 @@
169	176	return nr;
170	177	}
171	178
172		-int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
	179	+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js)
173	180	{
174	181	int nr = 0;
175	182	int i;
..	..	@@ -184,8 +191,8 @@
184	191	return nr;
185	192	}
186	193
187		-static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
188		- enum kbase_atom_gpu_rb_state min_rb_state)
	194	+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, unsigned int js,
	195	+ enum kbase_atom_gpu_rb_state min_rb_state)
189	196	{
190	197	int nr = 0;
191	198	int i;
..	..	@@ -235,9 +242,11 @@
235	242	static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
236	243	bool secure)
237	244	{
238		- int js, i;
	245	+ unsigned int js;
239	246
240	247	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
	248	+ int i;
	249	+
241	250	for (i = 0; i < SLOT_RB_SIZE; i++) {
242	251	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
243	252	js, i);
..	..	@@ -252,7 +261,7 @@
252	261	return false;
253	262	}
254	263
255		-int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
	264	+int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js)
256	265	{
257	266	lockdep_assert_held(&kbdev->hwaccess_lock);
258	267
..	..	@@ -304,10 +313,10 @@
304	313	[katom->slot_nr]);
305	314
306	315	/* *FALLTHROUGH: TRANSITION TO LOWER STATE* */
307		-
	316	+ fallthrough;
308	317	case KBASE_ATOM_GPU_RB_READY:
309	318	/* *FALLTHROUGH: TRANSITION TO LOWER STATE* */
310		-
	319	+ fallthrough;
311	320	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
312	321	break;
313	322
..	..	@@ -338,16 +347,35 @@
338	347	katom->protected_state.exit !=
339	348	KBASE_ATOM_EXIT_PROTECTED_CHECK)
340	349	kbdev->protected_mode_transition = false;
	350	+
	351	+ /* If the atom is at KBASE_ATOM_ENTER_PROTECTED_HWCNT state, it means
	352	+ * one of two events prevented it from progressing to the next state and
	353	+ * ultimately reach protected mode:
	354	+ * - hwcnts were enabled, and the atom had to schedule a worker to
	355	+ * disable them.
	356	+ * - the hwcnts were already disabled, but some other error occurred.
	357	+ * In the first case, if the worker has not yet completed
	358	+ * (kbdev->protected_mode_hwcnt_disabled == false), we need to re-enable
	359	+ * them and signal to the worker they have already been enabled
	360	+ */
	361	+ if (kbase_jd_katom_is_protected(katom) &&
	362	+ (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_HWCNT)) {
	363	+ kbdev->protected_mode_hwcnt_desired = true;
	364	+ if (kbdev->protected_mode_hwcnt_disabled) {
	365	+ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
	366	+ kbdev->protected_mode_hwcnt_disabled = false;
	367	+ }
	368	+ }
	369	+
341	370	/* If the atom has suspended hwcnt but has not yet entered
342	371	* protected mode, then resume hwcnt now. If the GPU is now in
343	372	* protected mode then hwcnt will be resumed by GPU reset so
344	373	* don't resume it here.
345	374	*/
346	375	if (kbase_jd_katom_is_protected(katom) &&
347		- ((katom->protected_state.enter ==
348		- KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) \|\|
349		- (katom->protected_state.enter ==
350		- KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY))) {
	376	+ ((katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) \|\|
	377	+ (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY) \|\|
	378	+ (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_FINISHED))) {
351	379	WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
352	380	kbdev->protected_mode_hwcnt_desired = true;
353	381	if (kbdev->protected_mode_hwcnt_disabled) {
..	..	@@ -367,13 +395,13 @@
367	395	}
368	396
369	397	/* *FALLTHROUGH: TRANSITION TO LOWER STATE* */
370		-
	398	+ fallthrough;
371	399	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
372	400	/* *FALLTHROUGH: TRANSITION TO LOWER STATE* */
373		-
	401	+ fallthrough;
374	402	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
375	403	/* *FALLTHROUGH: TRANSITION TO LOWER STATE* */
376		-
	404	+ fallthrough;
377	405	case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
378	406	break;
379	407	}
..	..	@@ -387,6 +415,9 @@
387	415	{
388	416	lockdep_assert_held(&kbdev->hwaccess_lock);
389	417
	418	+ KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_MARK_FOR_RETURN_TO_JS,
	419	+ katom->kctx, katom, katom->jc,
	420	+ katom->slot_nr, katom->event_code);
390	421	kbase_gpu_release_atom(kbdev, katom, NULL);
391	422	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
392	423	}
..	..	@@ -399,9 +430,9 @@
399	430	*
400	431	* Return: true if any slots other than @js are busy, false otherwise
401	432	*/
402		-static inline bool other_slots_busy(struct kbase_device *kbdev, int js)
	433	+static inline bool other_slots_busy(struct kbase_device *kbdev, unsigned int js)
403	434	{
404		- int slot;
	435	+ unsigned int slot;
405	436
406	437	for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) {
407	438	if (slot == js)
..	..	@@ -495,17 +526,14 @@
495	526	KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev);
496	527	if (err) {
497	528	/*
498		- * Failed to switch into protected mode, resume
499		- * GPU hwcnt and fail atom.
	529	+ * Failed to switch into protected mode.
	530	+ *
	531	+ * At this point we expect:
	532	+ * katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION &&
	533	+ * katom->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_FINISHED
	534	+ * ==>
	535	+ * kbdev->protected_mode_hwcnt_disabled = false
500	536	*/
501		- WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
502		- kbdev->protected_mode_hwcnt_desired = true;
503		- if (kbdev->protected_mode_hwcnt_disabled) {
504		- kbase_hwcnt_context_enable(
505		- kbdev->hwcnt_gpu_ctx);
506		- kbdev->protected_mode_hwcnt_disabled = false;
507		- }
508		-
509	537	katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
510	538	kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
511	539	/*
..	..	@@ -525,12 +553,9 @@
525	553	/*
526	554	* Protected mode sanity checks.
527	555	*/
528		- KBASE_DEBUG_ASSERT_MSG(
529		- kbase_jd_katom_is_protected(katom[idx]) ==
530		- kbase_gpu_in_protected_mode(kbdev),
531		- "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
532		- kbase_jd_katom_is_protected(katom[idx]),
533		- kbase_gpu_in_protected_mode(kbdev));
	556	+ WARN(kbase_jd_katom_is_protected(katom[idx]) != kbase_gpu_in_protected_mode(kbdev),
	557	+ "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
	558	+ kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev));
534	559	katom[idx]->gpu_rb_state =
535	560	KBASE_ATOM_GPU_RB_READY;
536	561
..	..	@@ -564,7 +589,7 @@
564	589	kbdev->protected_mode_transition = true;
565	590
566	591	/* *TRANSITION TO HIGHER STATE* */
567		- /* fallthrough */
	592	+ fallthrough;
568	593	case KBASE_ATOM_ENTER_PROTECTED_HWCNT:
569	594	/* See if we can get away with disabling hwcnt atomically */
570	595	kbdev->protected_mode_hwcnt_desired = false;
..	..	@@ -607,7 +632,7 @@
607	632	kbase_pm_update_cores_state_nolock(kbdev);
608	633
609	634	/* *TRANSITION TO HIGHER STATE* */
610		- /* fallthrough */
	635	+ fallthrough;
611	636	case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2:
612	637	/* Avoid unnecessary waiting on non-ACE platforms. */
613	638	if (kbdev->system_coherency == COHERENCY_ACE) {
..	..	@@ -638,7 +663,7 @@
638	663	KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY;
639	664
640	665	/* *TRANSITION TO HIGHER STATE* */
641		- /* fallthrough */
	666	+ fallthrough;
642	667	case KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY:
643	668	/*
644	669	* When entering into protected mode, we must ensure that the
..	..	@@ -671,7 +696,7 @@
671	696	return -EAGAIN;
672	697
673	698	/* *TRANSITION TO HIGHER STATE* */
674		- /* fallthrough */
	699	+ fallthrough;
675	700	case KBASE_ATOM_ENTER_PROTECTED_FINISHED:
676	701	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
677	702	/*
..	..	@@ -742,7 +767,7 @@
742	767	kbase_pm_update_cores_state_nolock(kbdev);
743	768
744	769	/* *TRANSITION TO HIGHER STATE* */
745		- /* fallthrough */
	770	+ fallthrough;
746	771	case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2:
747	772	if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) {
748	773	/*
..	..	@@ -755,8 +780,15 @@
755	780	KBASE_ATOM_EXIT_PROTECTED_RESET;
756	781
757	782	/* *TRANSITION TO HIGHER STATE* */
758		- /* fallthrough */
	783	+ fallthrough;
759	784	case KBASE_ATOM_EXIT_PROTECTED_RESET:
	785	+ /* L2 cache has been turned off (which is needed prior to the reset of GPU
	786	+ * to exit the protected mode), so the override flag can be safely cleared.
	787	+ * Even if L2 cache is powered up again before the actual reset, it should
	788	+ * not be an issue (there are no jobs running on the GPU).
	789	+ */
	790	+ kbase_pm_protected_override_disable(kbdev);
	791	+
760	792	/* Issue the reset to the GPU */
761	793	err = kbase_gpu_protected_mode_reset(kbdev);
762	794
..	..	@@ -765,7 +797,6 @@
765	797
766	798	if (err) {
767	799	kbdev->protected_mode_transition = false;
768		- kbase_pm_protected_override_disable(kbdev);
769	800
770	801	/* Failed to exit protected mode, fail atom */
771	802	katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
..	..	@@ -797,7 +828,7 @@
797	828	KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT;
798	829
799	830	/* *TRANSITION TO HIGHER STATE* */
800		- /* fallthrough */
	831	+ fallthrough;
801	832	case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT:
802	833	/* A GPU reset is issued when exiting protected mode. Once the
803	834	* reset is done all atoms' state will also be reset. For this
..	..	@@ -813,7 +844,7 @@
813	844
814	845	void kbase_backend_slot_update(struct kbase_device *kbdev)
815	846	{
816		- int js;
	847	+ unsigned int js;
817	848
818	849	lockdep_assert_held(&kbdev->hwaccess_lock);
819	850
..	..	@@ -854,7 +885,7 @@
854	885	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV;
855	886
856	887	/* *TRANSITION TO HIGHER STATE* */
857		- /* fallthrough */
	888	+ fallthrough;
858	889	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
859	890	if (kbase_gpu_check_secure_atoms(kbdev,
860	891	!kbase_jd_katom_is_protected(
..	..	@@ -874,7 +905,7 @@
874	905	KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION;
875	906
876	907	/* *TRANSITION TO HIGHER STATE* */
877		- /* fallthrough */
	908	+ fallthrough;
878	909	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
879	910
880	911	/*
..	..	@@ -909,7 +940,7 @@
909	940	KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
910	941
911	942	/* *TRANSITION TO HIGHER STATE* */
912		- /* fallthrough */
	943	+ fallthrough;
913	944	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
914	945	if (katom[idx]->will_fail_event_code) {
915	946	kbase_gpu_mark_atom_for_return(kbdev,
..	..	@@ -934,13 +965,6 @@
934	965	cores_ready = kbase_pm_cores_requested(kbdev,
935	966	true);
936	967
937		- if (katom[idx]->event_code ==
938		- BASE_JD_EVENT_PM_EVENT) {
939		- katom[idx]->gpu_rb_state =
940		- KBASE_ATOM_GPU_RB_RETURN_TO_JS;
941		- break;
942		- }
943		-
944	968	if (!cores_ready)
945	969	break;
946	970
..	..	@@ -948,7 +972,7 @@
948	972	KBASE_ATOM_GPU_RB_READY;
949	973
950	974	/* *TRANSITION TO HIGHER STATE* */
951		- /* fallthrough */
	975	+ fallthrough;
952	976	case KBASE_ATOM_GPU_RB_READY:
953	977
954	978	if (idx == 1) {
..	..	@@ -977,36 +1001,34 @@
977	1001	other_slots_busy(kbdev, js))
978	1002	break;
979	1003
980		-#ifdef CONFIG_MALI_GEM5_BUILD
981		- if (!kbasep_jm_is_js_free(kbdev, js,
982		- katom[idx]->kctx))
983		- break;
984		-#endif
985	1004	/* Check if this job needs the cycle counter
986	1005	* enabled before submission
987	1006	*/
988	1007	if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
989		- kbase_pm_request_gpu_cycle_counter_l2_is_on(
990		- kbdev);
	1008	+ kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev);
991	1009
992		- kbase_job_hw_submit(kbdev, katom[idx], js);
993		- katom[idx]->gpu_rb_state =
994		- KBASE_ATOM_GPU_RB_SUBMITTED;
	1010	+ if (!kbase_job_hw_submit(kbdev, katom[idx], js)) {
	1011	+ katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED;
	1012	+
	1013	+ /* Inform power management at start/finish of
	1014	+ * atom so it can update its GPU utilisation
	1015	+ * metrics.
	1016	+ */
	1017	+ kbase_pm_metrics_update(kbdev,
	1018	+ &katom[idx]->start_timestamp);
	1019	+
	1020	+ /* Inform platform at start/finish of atom */
	1021	+ kbasep_platform_event_atom_submit(katom[idx]);
	1022	+ } else {
	1023	+ if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
	1024	+ kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
	1025	+
	1026	+ break;
	1027	+ }
995	1028
996	1029	/* *TRANSITION TO HIGHER STATE* */
997		- /* fallthrough */
	1030	+ fallthrough;
998	1031	case KBASE_ATOM_GPU_RB_SUBMITTED:
999		-
1000		- /* Inform power management at start/finish of
1001		- * atom so it can update its GPU utilisation
1002		- * metrics.
1003		- */
1004		- kbase_pm_metrics_update(kbdev,
1005		- &katom[idx]->start_timestamp);
1006		-
1007		- /* Inform platform at start/finish of atom */
1008		- kbasep_platform_event_atom_submit(katom[idx]);
1009		-
1010	1032	break;
1011	1033
1012	1034	case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
..	..	@@ -1037,11 +1059,56 @@
1037	1059	kbase_backend_slot_update(kbdev);
1038	1060	}
1039	1061
1040		-#define HAS_DEP(katom) (katom->pre_dep \|\| katom->atom_flags & \
1041		- (KBASE_KATOM_FLAG_X_DEP_BLOCKED \| KBASE_KATOM_FLAG_FAIL_BLOCKER))
	1062	+/**
	1063	+ * kbase_rb_atom_might_depend - determine if one atom in the slot ringbuffer
	1064	+ * might depend on another from the same kctx
	1065	+ * @katom_a: dependee atom
	1066	+ * @katom_b: atom to query
	1067	+ *
	1068	+ * This can be used on atoms that belong to different slot ringbuffers
	1069	+ *
	1070	+ * Return: true if @katom_b might depend on @katom_a, false if it cannot depend.
	1071	+ */
	1072	+static inline bool
	1073	+kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a,
	1074	+ const struct kbase_jd_atom *katom_b)
	1075	+{
	1076	+ if (katom_a->kctx != katom_b->kctx)
	1077	+ return false;
	1078	+ return (katom_b->pre_dep \|\|
	1079	+ (katom_b->atom_flags & (KBASE_KATOM_FLAG_X_DEP_BLOCKED \|
	1080	+ KBASE_KATOM_FLAG_FAIL_BLOCKER)));
	1081	+}
1042	1082
1043		-bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
1044		- u32 completion_code)
	1083	+/**
	1084	+ * kbase_gpu_irq_evict - evict a slot's JSn_HEAD_NEXT atom from the HW if it is
	1085	+ * related to a failed JSn_HEAD atom
	1086	+ * @kbdev: kbase device
	1087	+ * @js: job slot to check
	1088	+ * @completion_code: completion code of the failed atom
	1089	+ *
	1090	+ * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but
	1091	+ * unlike other failure codes we _can_ re-run them.
	1092	+ *
	1093	+ * This forms step 1 in a 2-step process of removing any related atoms from a
	1094	+ * slot's JSn_HEAD_NEXT (ringbuffer index 1), should there have
	1095	+ * been a 'failure' on an atom in JSn_HEAD (ringbuffer index 0).
	1096	+ *
	1097	+ * This step only removes the atoms from the HW, and marks them as
	1098	+ * (potentially) ready to run again.
	1099	+ *
	1100	+ * Step 2 is on marking the JSn_HEAD atom as complete
	1101	+ * (kbase_gpu_complete_hw()), to dequeue said atoms and return them to the JS
	1102	+ * as appropriate, or re-submit them.
	1103	+ *
	1104	+ * Hence, this function must evict at a minimum the atoms related to the atom
	1105	+ * in JSn_HEAD that kbase_gpu_complete_hw() will also dequeue. It is acceptable
	1106	+ * if this function evicts more atoms than kbase_gpu_complete_hw() dequeues, as
	1107	+ * the next kbase_backend_slot_update() will resubmit any remaining.
	1108	+ *
	1109	+ * Return: true if an atom was evicted, false otherwise.
	1110	+ */
	1111	+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code)
1045	1112	{
1046	1113	struct kbase_jd_atom *katom;
1047	1114	struct kbase_jd_atom *next_katom;
..	..	@@ -1049,16 +1116,18 @@
1049	1116	lockdep_assert_held(&kbdev->hwaccess_lock);
1050	1117
1051	1118	katom = kbase_gpu_inspect(kbdev, js, 0);
	1119	+ if (!katom) {
	1120	+ dev_err(kbdev->dev, "Can't get a katom from js(%u)\n", js);
	1121	+ return false;
	1122	+ }
1052	1123	next_katom = kbase_gpu_inspect(kbdev, js, 1);
1053	1124
1054		- if (next_katom && katom->kctx == next_katom->kctx &&
1055		- next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
1056		- (HAS_DEP(next_katom) \|\| next_katom->sched_priority ==
1057		- katom->sched_priority) &&
1058		- (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO))
1059		- != 0 \|\|
1060		- kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI))
1061		- != 0)) {
	1125	+ if (next_katom &&
	1126	+ next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
	1127	+ (kbase_rb_atom_might_depend(katom, next_katom) \|\|
	1128	+ kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) &&
	1129	+ (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO)) != 0 \|\|
	1130	+ kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) != 0)) {
1062	1131	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
1063	1132	JS_COMMAND_NOP);
1064	1133	next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
..	..	@@ -1077,19 +1146,56 @@
1077	1146	if (next_katom->core_req & BASE_JD_REQ_PERMON)
1078	1147	kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
1079	1148
	1149	+ /* On evicting the next_katom, the last submission kctx on the
	1150	+ * given job slot then reverts back to the one that owns katom.
	1151	+ * The aim is to enable the next submission that can determine
	1152	+ * if the read only shader core L1 cache should be invalidated.
	1153	+ */
	1154	+ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged =
	1155	+ SLOT_RB_TAG_KCTX(katom->kctx);
	1156	+
1080	1157	return true;
1081	1158	}
1082	1159
1083	1160	return false;
1084	1161	}
1085	1162
1086		-void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
1087		- u32 completion_code,
1088		- u64 job_tail,
1089		- ktime_t *end_timestamp)
	1163	+/**
	1164	+ * kbase_gpu_complete_hw - complete the atom in a slot's JSn_HEAD
	1165	+ * @kbdev: kbase device
	1166	+ * @js: job slot to check
	1167	+ * @completion_code: completion code of the completed atom
	1168	+ * @job_tail: value read from JSn_TAIL, for STOPPED atoms
	1169	+ * @end_timestamp: pointer to approximate ktime value when the katom completed
	1170	+ *
	1171	+ * Among other operations, this also executes step 2 of a 2-step process of
	1172	+ * removing any related atoms from a slot's JSn_HEAD_NEXT (ringbuffer index 1),
	1173	+ * should there have been a 'failure' on an atom in JSn_HEAD (ringbuffer index
	1174	+ * 0). The first step is done in kbase_gpu_irq_evict().
	1175	+ *
	1176	+ * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but
	1177	+ * unlike other failure codes we _can_ re-run them.
	1178	+ *
	1179	+ * When the JSn_HEAD atom is considered to be 'failed', then this will dequeue
	1180	+ * and return to the JS some (usually all) of the atoms evicted from the HW
	1181	+ * during the kbase_gpu_irq_evict() for that JSn_HEAD atom. If it dequeues an
	1182	+ * atom, that atom must not have been running or must already be evicted, as
	1183	+ * otherwise we would be in the incorrect state of having an atom both running
	1184	+ * on the HW and returned to the JS.
	1185	+ */
	1186	+
	1187	+void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code,
	1188	+ u64 job_tail, ktime_t *end_timestamp)
1090	1189	{
1091	1190	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
1092		- struct kbase_context *kctx = katom->kctx;
	1191	+ struct kbase_context *kctx = NULL;
	1192	+
	1193	+ if (unlikely(!katom)) {
	1194	+ dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js);
	1195	+ return;
	1196	+ }
	1197	+
	1198	+ kctx = katom->kctx;
1093	1199
1094	1200	dev_dbg(kbdev->dev,
1095	1201	"Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n",
..	..	@@ -1133,9 +1239,8 @@
1133	1239	* registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
1134	1240	* the atoms on this slot are returned in the correct order.
1135	1241	*/
1136		- if (next_katom && katom->kctx == next_katom->kctx &&
1137		- next_katom->sched_priority ==
1138		- katom->sched_priority) {
	1242	+ if (next_katom &&
	1243	+ kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) {
1139	1244	WARN_ON(next_katom->gpu_rb_state ==
1140	1245	KBASE_ATOM_GPU_RB_SUBMITTED);
1141	1246	kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
..	..	@@ -1143,13 +1248,15 @@
1143	1248	}
1144	1249	} else if (completion_code != BASE_JD_EVENT_DONE) {
1145	1250	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
1146		- int i;
	1251	+ unsigned int i;
1147	1252
1148		- if (!kbase_ctx_flag(katom->kctx, KCTX_DYING))
	1253	+ if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) {
1149	1254	dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
1150	1255	js, completion_code,
1151	1256	kbase_gpu_exception_name(
1152	1257	completion_code));
	1258	+
	1259	+ }
1153	1260
1154	1261	#if KBASE_KTRACE_DUMP_ON_JOB_SLOT_ERROR != 0
1155	1262	KBASE_KTRACE_DUMP(kbdev);
..	..	@@ -1168,18 +1275,17 @@
1168	1275	struct kbase_jd_atom *katom_idx1 =
1169	1276	kbase_gpu_inspect(kbdev, i, 1);
1170	1277
1171		- if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
1172		- HAS_DEP(katom_idx0) &&
1173		- katom_idx0->gpu_rb_state !=
1174		- KBASE_ATOM_GPU_RB_SUBMITTED) {
	1278	+ if (katom_idx0 &&
	1279	+ kbase_rb_atom_might_depend(katom, katom_idx0) &&
	1280	+ katom_idx0->gpu_rb_state !=
	1281	+ KBASE_ATOM_GPU_RB_SUBMITTED) {
1175	1282	/* Dequeue katom_idx0 from ringbuffer */
1176	1283	kbase_gpu_dequeue_atom(kbdev, i, end_timestamp);
1177	1284
1178		- if (katom_idx1 &&
1179		- katom_idx1->kctx == katom->kctx
1180		- && HAS_DEP(katom_idx1) &&
1181		- katom_idx0->gpu_rb_state !=
1182		- KBASE_ATOM_GPU_RB_SUBMITTED) {
	1285	+ if (katom_idx1 && kbase_rb_atom_might_depend(
	1286	+ katom, katom_idx1) &&
	1287	+ katom_idx0->gpu_rb_state !=
	1288	+ KBASE_ATOM_GPU_RB_SUBMITTED) {
1183	1289	/* Dequeue katom_idx1 from ringbuffer */
1184	1290	kbase_gpu_dequeue_atom(kbdev, i,
1185	1291	end_timestamp);
..	..	@@ -1192,11 +1298,10 @@
1192	1298	katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
1193	1299	kbase_jm_return_atom_to_js(kbdev, katom_idx0);
1194	1300
1195		- } else if (katom_idx1 &&
1196		- katom_idx1->kctx == katom->kctx &&
1197		- HAS_DEP(katom_idx1) &&
1198		- katom_idx1->gpu_rb_state !=
1199		- KBASE_ATOM_GPU_RB_SUBMITTED) {
	1301	+ } else if (katom_idx1 && kbase_rb_atom_might_depend(
	1302	+ katom, katom_idx1) &&
	1303	+ katom_idx1->gpu_rb_state !=
	1304	+ KBASE_ATOM_GPU_RB_SUBMITTED) {
1200	1305	/* Can not dequeue this atom yet - will be
1201	1306	* dequeued when atom at idx0 completes
1202	1307	*/
..	..	@@ -1248,17 +1353,12 @@
1248	1353	ktime_to_ns(*end_timestamp),
1249	1354	(u32)next_katom->kctx->id, 0,
1250	1355	next_katom->work_id);
1251		- kbdev->hwaccess.backend.slot_rb[js].last_context =
1252		- next_katom->kctx;
1253	1356	} else {
1254	1357	char js_string[16];
1255	1358
1256		- trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
1257		- js_string,
1258		- sizeof(js_string)),
1259		- ktime_to_ns(ktime_get()), 0, 0,
1260		- 0);
1261		- kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
	1359	+ trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string,
	1360	+ sizeof(js_string)),
	1361	+ ktime_to_ns(ktime_get_raw()), 0, 0, 0);
1262	1362	}
1263	1363	}
1264	1364	#endif
..	..	@@ -1293,7 +1393,7 @@
1293	1393
1294	1394	void kbase_backend_reset(struct kbase_device kbdev, ktime_t end_timestamp)
1295	1395	{
1296		- int js;
	1396	+ unsigned int js;
1297	1397
1298	1398	lockdep_assert_held(&kbdev->hwaccess_lock);
1299	1399
..	..	@@ -1314,14 +1414,14 @@
1314	1414	if (katom->protected_state.exit ==
1315	1415	KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) {
1316	1416	/* protected mode sanity checks */
1317		- KBASE_DEBUG_ASSERT_MSG(
1318		- kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev),
1319		- "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
1320		- kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev));
1321		- KBASE_DEBUG_ASSERT_MSG(
1322		- (kbase_jd_katom_is_protected(katom) && js == 0) \|\|
1323		- !kbase_jd_katom_is_protected(katom),
1324		- "Protected atom on JS%d not supported", js);
	1417	+ WARN(kbase_jd_katom_is_protected(katom) !=
	1418	+ kbase_gpu_in_protected_mode(kbdev),
	1419	+ "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
	1420	+ kbase_jd_katom_is_protected(katom),
	1421	+ kbase_gpu_in_protected_mode(kbdev));
	1422	+ WARN(!(kbase_jd_katom_is_protected(katom) && js == 0) &&
	1423	+ kbase_jd_katom_is_protected(katom),
	1424	+ "Protected atom on JS%u not supported", js);
1325	1425	}
1326	1426	if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) &&
1327	1427	!kbase_ctx_flag(katom->kctx, KCTX_DYING))
..	..	@@ -1352,6 +1452,9 @@
1352	1452	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
1353	1453	kbase_jm_complete(kbdev, katom, end_timestamp);
1354	1454	}
	1455	+
	1456	+ /* Clear the slot's last katom submission kctx on reset */
	1457	+ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_NULL_TAG_VAL;
1355	1458	}
1356	1459
1357	1460	/* Re-enable GPU hardware counters if we're resetting from protected
..	..	@@ -1369,17 +1472,61 @@
1369	1472	kbase_pm_protected_override_disable(kbdev);
1370	1473	}
1371	1474
1372		-static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
1373		- int js,
1374		- struct kbase_jd_atom *katom,
1375		- u32 action)
	1475	+/**
	1476	+ * should_stop_next_atom - given a soft/hard stop action, determine if the next
	1477	+ * atom on a slot should be stopped
	1478	+ * @kbdev: kbase devices
	1479	+ * @head_katom: atom currently in the JSn_HEAD
	1480	+ * @next_katom: atom currently in the JSn_HEAD_NEXT
	1481	+ * @action: JS_COMMAND_<...> action for soft/hard-stop
	1482	+ *
	1483	+ * This is used in cases where @head_katom is the target of the soft/hard-stop.
	1484	+ * It only makes sense to call this when @head_katom and @next_katom are from
	1485	+ * the same slot.
	1486	+ *
	1487	+ * Return: true if @next_katom should also be stopped with the given action,
	1488	+ * false otherwise
	1489	+ */
	1490	+static bool should_stop_next_atom(struct kbase_device *kbdev,
	1491	+ const struct kbase_jd_atom *head_katom,
	1492	+ const struct kbase_jd_atom *next_katom,
	1493	+ u32 action)
1376	1494	{
	1495	+ bool ret = false;
	1496	+ u32 hw_action = action & JS_COMMAND_MASK;
	1497	+
	1498	+ switch (hw_action) {
	1499	+ case JS_COMMAND_SOFT_STOP:
	1500	+ ret = kbase_js_atom_runs_before(kbdev, head_katom, next_katom,
	1501	+ 0u);
	1502	+ break;
	1503	+ case JS_COMMAND_HARD_STOP:
	1504	+ /* Unlike soft-stop, a hard-stop targeting a particular atom
	1505	+ * should not cause atoms from unrelated contexts to be
	1506	+ * removed
	1507	+ */
	1508	+ ret = (head_katom->kctx == next_katom->kctx);
	1509	+ break;
	1510	+ default:
	1511	+ /* Other stop actions are possible, but the driver should not
	1512	+ * be generating them at this point in the call chain
	1513	+ */
	1514	+ WARN(1, "Unexpected stop action: 0x%.8x", hw_action);
	1515	+ break;
	1516	+ }
	1517	+ return ret;
	1518	+}
	1519	+
	1520	+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, unsigned int js,
	1521	+ struct kbase_jd_atom *katom, u32 action)
	1522	+{
	1523	+ struct kbase_context *kctx = katom->kctx;
1377	1524	u32 hw_action = action & JS_COMMAND_MASK;
1378	1525
1379	1526	kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
1380	1527	kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
1381	1528	katom->core_req, katom);
1382		- katom->kctx->blocked_js[js][katom->sched_priority] = true;
	1529	+ kbase_jsctx_slot_prio_blocked_set(kctx, js, katom->sched_priority);
1383	1530	}
1384	1531
1385	1532	static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
..	..	@@ -1387,11 +1534,14 @@
1387	1534	u32 action,
1388	1535	bool disjoint)
1389	1536	{
	1537	+ struct kbase_context *kctx = katom->kctx;
	1538	+
1390	1539	lockdep_assert_held(&kbdev->hwaccess_lock);
1391	1540
1392	1541	katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
1393	1542	kbase_gpu_mark_atom_for_return(kbdev, katom);
1394		- katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true;
	1543	+ kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr,
	1544	+ katom->sched_priority);
1395	1545
1396	1546	if (disjoint)
1397	1547	kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
..	..	@@ -1412,14 +1562,13 @@
1412	1562	return -1;
1413	1563	}
1414	1564
1415		-bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
1416		- struct kbase_context *kctx,
1417		- int js,
1418		- struct kbase_jd_atom *katom,
1419		- u32 action)
	1565	+bool kbase_backend_soft_hard_stop_slot(struct kbase_device kbdev, struct kbase_context kctx,
	1566	+ unsigned int js, struct kbase_jd_atom *katom, u32 action)
1420	1567	{
1421	1568	struct kbase_jd_atom *katom_idx0;
	1569	+ struct kbase_context *kctx_idx0 = NULL;
1422	1570	struct kbase_jd_atom *katom_idx1;
	1571	+ struct kbase_context *kctx_idx1 = NULL;
1423	1572
1424	1573	bool katom_idx0_valid, katom_idx1_valid;
1425	1574
..	..	@@ -1433,31 +1582,32 @@
1433	1582	katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
1434	1583	katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
1435	1584
1436		- if (katom_idx0)
	1585	+ if (katom_idx0) {
	1586	+ kctx_idx0 = katom_idx0->kctx;
1437	1587	prio_idx0 = katom_idx0->sched_priority;
1438		- if (katom_idx1)
	1588	+ }
	1589	+ if (katom_idx1) {
	1590	+ kctx_idx1 = katom_idx1->kctx;
1439	1591	prio_idx1 = katom_idx1->sched_priority;
	1592	+ }
1440	1593
1441	1594	if (katom) {
1442	1595	katom_idx0_valid = (katom_idx0 == katom);
1443		- /* If idx0 is to be removed and idx1 is on the same context,
1444		- * then idx1 must also be removed otherwise the atoms might be
1445		- * returned out of order
1446		- */
1447	1596	if (katom_idx1)
1448		- katom_idx1_valid = (katom_idx1 == katom) \|\|
1449		- (katom_idx0_valid &&
1450		- (katom_idx0->kctx ==
1451		- katom_idx1->kctx));
	1597	+ katom_idx1_valid = (katom_idx1 == katom);
1452	1598	else
1453	1599	katom_idx1_valid = false;
1454	1600	} else {
1455		- katom_idx0_valid = (katom_idx0 &&
1456		- (!kctx \|\| katom_idx0->kctx == kctx));
1457		- katom_idx1_valid = (katom_idx1 &&
1458		- (!kctx \|\| katom_idx1->kctx == kctx) &&
1459		- prio_idx0 == prio_idx1);
	1601	+ katom_idx0_valid = (katom_idx0 && (!kctx \|\| kctx_idx0 == kctx));
	1602	+ katom_idx1_valid = (katom_idx1 && (!kctx \|\| kctx_idx1 == kctx));
1460	1603	}
	1604	+ /* If there's an atom in JSn_HEAD_NEXT that we haven't already decided
	1605	+ * to stop, but we're stopping the JSn_HEAD atom, see if they are
	1606	+ * related/ordered in some way that would require the same stop action
	1607	+ */
	1608	+ if (!katom_idx1_valid && katom_idx0_valid && katom_idx1)
	1609	+ katom_idx1_valid = should_stop_next_atom(kbdev, katom_idx0,
	1610	+ katom_idx1, action);
1461	1611
1462	1612	if (katom_idx0_valid)
1463	1613	stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
..	..	@@ -1473,14 +1623,15 @@
1473	1623	katom_idx1->event_code =
1474	1624	BASE_JD_EVENT_REMOVED_FROM_NEXT;
1475	1625	kbase_jm_return_atom_to_js(kbdev, katom_idx1);
1476		- katom_idx1->kctx->blocked_js[js][prio_idx1] =
1477		- true;
	1626	+ kbase_jsctx_slot_prio_blocked_set(kctx_idx1, js,
	1627	+ prio_idx1);
1478	1628	}
1479	1629
1480	1630	katom_idx0->event_code =
1481	1631	BASE_JD_EVENT_REMOVED_FROM_NEXT;
1482	1632	kbase_jm_return_atom_to_js(kbdev, katom_idx0);
1483		- katom_idx0->kctx->blocked_js[js][prio_idx0] = true;
	1633	+ kbase_jsctx_slot_prio_blocked_set(kctx_idx0, js,
	1634	+ prio_idx0);
1484	1635	} else {
1485	1636	/* katom_idx0 is on GPU */
1486	1637	if (katom_idx1_valid && katom_idx1->gpu_rb_state ==
..	..	@@ -1521,6 +1672,11 @@
1521	1672	kbase_gpu_remove_atom(kbdev,
1522	1673	katom_idx1,
1523	1674	action, true);
	1675	+ /* Revert the last_context. */
	1676	+ kbdev->hwaccess.backend.slot_rb[js]
	1677	+ .last_kctx_tagged =
	1678	+ SLOT_RB_TAG_KCTX(katom_idx0->kctx);
	1679	+
1524	1680	stop_x_dep_idx1 =
1525	1681	should_stop_x_dep_slot(katom_idx1);
1526	1682
..	..	@@ -1596,6 +1752,10 @@
1596	1752	kbase_gpu_remove_atom(kbdev, katom_idx1,
1597	1753	action,
1598	1754	false);
	1755	+ /* Revert the last_context, or mark as purged */
	1756	+ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged =
	1757	+ kctx_idx0 ? SLOT_RB_TAG_KCTX(katom_idx0->kctx) :
	1758	+ SLOT_RB_TAG_PURGED;
1599	1759	} else {
1600	1760	/* idx0 has already completed - stop
1601	1761	* idx1
..	..	@@ -1625,7 +1785,8 @@
1625	1785	struct kbase_jd_atom *katom)
1626	1786	{
1627	1787	if (katom->need_cache_flush_cores_retained) {
1628		- kbase_gpu_start_cache_clean(kbdev);
	1788	+ kbase_gpu_start_cache_clean(kbdev,
	1789	+ GPU_COMMAND_CACHE_CLN_INV_FULL);
1629	1790	kbase_gpu_wait_cache_clean(kbdev);
1630	1791
1631	1792	katom->need_cache_flush_cores_retained = false;
..	..	@@ -1646,22 +1807,20 @@
1646	1807	base_jd_core_req core_req)
1647	1808	{
1648	1809	if (!kbdev->pm.active_count) {
1649		- mutex_lock(&kbdev->js_data.runpool_mutex);
1650		- mutex_lock(&kbdev->pm.lock);
	1810	+ kbase_pm_lock(kbdev);
1651	1811	kbase_pm_update_active(kbdev);
1652		- mutex_unlock(&kbdev->pm.lock);
1653		- mutex_unlock(&kbdev->js_data.runpool_mutex);
	1812	+ kbase_pm_unlock(kbdev);
1654	1813	}
1655	1814	}
1656	1815
1657	1816	void kbase_gpu_dump_slots(struct kbase_device *kbdev)
1658	1817	{
1659	1818	unsigned long flags;
1660		- int js;
	1819	+ unsigned int js;
1661	1820
1662	1821	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1663	1822
1664		- dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
	1823	+ dev_info(kbdev->dev, "%s:\n", __func__);
1665	1824
1666	1825	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
1667	1826	int idx;
..	..	@@ -1672,14 +1831,43 @@
1672	1831	idx);
1673	1832
1674	1833	if (katom)
1675		- dev_info(kbdev->dev,
1676		- " js%d idx%d : katom=%pK gpu_rb_state=%d\n",
1677		- js, idx, katom, katom->gpu_rb_state);
	1834	+ dev_info(kbdev->dev, " js%u idx%d : katom=%pK gpu_rb_state=%d\n",
	1835	+ js, idx, katom, katom->gpu_rb_state);
1678	1836	else
1679		- dev_info(kbdev->dev, " js%d idx%d : empty\n",
1680		- js, idx);
	1837	+ dev_info(kbdev->dev, " js%u idx%d : empty\n", js, idx);
1681	1838	}
1682	1839	}
1683	1840
1684	1841	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1685	1842	}
	1843	+
	1844	+void kbase_backend_slot_kctx_purge_locked(struct kbase_device kbdev, struct kbase_context kctx)
	1845	+{
	1846	+ unsigned int js;
	1847	+ bool tracked = false;
	1848	+
	1849	+ lockdep_assert_held(&kbdev->hwaccess_lock);
	1850	+
	1851	+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
	1852	+ u64 tagged_kctx = kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged;
	1853	+
	1854	+ if (tagged_kctx == SLOT_RB_TAG_KCTX(kctx)) {
	1855	+ /* Marking the slot kctx tracking field is purged */
	1856	+ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_TAG_PURGED;
	1857	+ tracked = true;
	1858	+ }
	1859	+ }
	1860	+
	1861	+ if (tracked) {
	1862	+ /* The context had run some jobs before the purge, other slots
	1863	+ * in SLOT_RB_NULL_TAG_VAL condition needs to be marked as
	1864	+ * purged as well.
	1865	+ */
	1866	+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
	1867	+ if (kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged ==
	1868	+ SLOT_RB_NULL_TAG_VAL)
	1869	+ kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged =
	1870	+ SLOT_RB_TAG_PURGED;
	1871	+ }
	1872	+ }
	1873	+}