From 6778948f9de86c3cfaf36725a7c87dcff9ba247f Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Mon, 11 Dec 2023 08:20:59 +0000
Subject: [PATCH] kernel_5.10 no rt

---
 kernel/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c |  558 +++++++++++++++++++++++++++++++++++++------------------
 1 files changed, 373 insertions(+), 185 deletions(-)

diff --git a/kernel/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c b/kernel/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
index a3cb547..7db2b35 100644
--- a/kernel/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
+++ b/kernel/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,7 +29,7 @@
 #include <mali_kbase_jm.h>
 #include <mali_kbase_js.h>
 #include <tl/mali_kbase_tracepoints.h>
-#include <mali_kbase_hwcnt_context.h>
+#include <hwcnt/mali_kbase_hwcnt_context.h>
 #include <mali_kbase_reset_gpu.h>
 #include <mali_kbase_kinstr_jm.h>
 #include <backend/gpu/mali_kbase_cache_policy_backend.h>
@@ -37,14 +37,23 @@
 #include <backend/gpu/mali_kbase_jm_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 
-/* Return whether the specified ringbuffer is empty. HW access lock must be
- * held
+/**
+ * SLOT_RB_EMPTY - Return whether the specified ringbuffer is empty.
+ *
+ * @rb: ring buffer
+ *
+ * Note: HW access lock must be held
  */
 #define SLOT_RB_EMPTY(rb)   (rb->write_idx == rb->read_idx)
-/* Return number of atoms currently in the specified ringbuffer. HW access lock
- * must be held
+
+/**
+ * SLOT_RB_ENTRIES - Return number of atoms currently in the specified ringbuffer.
+ *
+ * @rb: ring buffer
+ *
+ * Note: HW access lock must be held
  */
-#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
+#define SLOT_RB_ENTRIES(rb) ((int)(s8)(rb->write_idx - rb->read_idx))
 
 static void kbase_gpu_release_atom(struct kbase_device *kbdev,
 					struct kbase_jd_atom *katom,
@@ -84,9 +93,8 @@
  *
  * Return: Atom removed from ringbuffer
  */
-static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
-						int js,
-						ktime_t *end_timestamp)
+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, unsigned int js,
+						    ktime_t *end_timestamp)
 {
 	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
 	struct kbase_jd_atom *katom;
@@ -109,8 +117,7 @@
 	return katom;
 }
 
-struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
-					int idx)
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx)
 {
 	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
 
@@ -122,8 +129,7 @@
 	return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
 }
 
-struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
-					int js)
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js)
 {
 	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
 
@@ -135,12 +141,13 @@
 
 bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
 {
-	int js;
-	int i;
+	unsigned int js;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int i;
+
 		for (i = 0; i < SLOT_RB_SIZE; i++) {
 			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
 
@@ -151,7 +158,7 @@
 	return false;
 }
 
-int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js)
 {
 	int nr = 0;
 	int i;
@@ -169,7 +176,7 @@
 	return nr;
 }
 
-int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js)
 {
 	int nr = 0;
 	int i;
@@ -184,8 +191,8 @@
 	return nr;
 }
 
-static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
-				enum kbase_atom_gpu_rb_state min_rb_state)
+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, unsigned int js,
+					  enum kbase_atom_gpu_rb_state min_rb_state)
 {
 	int nr = 0;
 	int i;
@@ -235,9 +242,11 @@
 static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
 		bool secure)
 {
-	int js, i;
+	unsigned int js;
 
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int i;
+
 		for (i = 0; i < SLOT_RB_SIZE; i++) {
 			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
 					js, i);
@@ -252,7 +261,7 @@
 	return false;
 }
 
-int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
+int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
@@ -304,10 +313,10 @@
 				[katom->slot_nr]);
 
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
-
+		fallthrough;
 	case KBASE_ATOM_GPU_RB_READY:
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
-
+		fallthrough;
 	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
 		break;
 
@@ -338,16 +347,35 @@
 				katom->protected_state.exit !=
 				KBASE_ATOM_EXIT_PROTECTED_CHECK)
 			kbdev->protected_mode_transition = false;
+
+		/* If the atom is at KBASE_ATOM_ENTER_PROTECTED_HWCNT state, it means
+		 * one of two events prevented it from progressing to the next state and
+		 * ultimately reach protected mode:
+		 * - hwcnts were enabled, and the atom had to schedule a worker to
+		 *   disable them.
+		 * - the hwcnts were already disabled, but some other error occurred.
+		 * In the first case, if the worker has not yet completed
+		 * (kbdev->protected_mode_hwcnt_disabled == false), we need to re-enable
+		 * them and signal to the worker they have already been enabled
+		 */
+		if (kbase_jd_katom_is_protected(katom) &&
+		    (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_HWCNT)) {
+			kbdev->protected_mode_hwcnt_desired = true;
+			if (kbdev->protected_mode_hwcnt_disabled) {
+				kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+				kbdev->protected_mode_hwcnt_disabled = false;
+			}
+		}
+
 		/* If the atom has suspended hwcnt but has not yet entered
 		 * protected mode, then resume hwcnt now. If the GPU is now in
 		 * protected mode then hwcnt will be resumed by GPU reset so
 		 * don't resume it here.
 		 */
 		if (kbase_jd_katom_is_protected(katom) &&
-				((katom->protected_state.enter ==
-				KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) ||
-				 (katom->protected_state.enter ==
-				KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY))) {
+		    ((katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) ||
+		     (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY) ||
+		     (katom->protected_state.enter == KBASE_ATOM_ENTER_PROTECTED_FINISHED))) {
 			WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
 			kbdev->protected_mode_hwcnt_desired = true;
 			if (kbdev->protected_mode_hwcnt_disabled) {
@@ -367,13 +395,13 @@
 		}
 
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
-
+		fallthrough;
 	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
-
+		fallthrough;
 	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
-
+		fallthrough;
 	case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
 		break;
 	}
@@ -387,6 +415,9 @@
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
+	KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_MARK_FOR_RETURN_TO_JS,
+				      katom->kctx, katom, katom->jc,
+				      katom->slot_nr, katom->event_code);
 	kbase_gpu_release_atom(kbdev, katom, NULL);
 	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
 }
@@ -399,9 +430,9 @@
  *
  * Return: true if any slots other than @js are busy, false otherwise
  */
-static inline bool other_slots_busy(struct kbase_device *kbdev, int js)
+static inline bool other_slots_busy(struct kbase_device *kbdev, unsigned int js)
 {
-	int slot;
+	unsigned int slot;
 
 	for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) {
 		if (slot == js)
@@ -495,17 +526,14 @@
 	KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev);
 	if (err) {
 		/*
-		 * Failed to switch into protected mode, resume
-		 * GPU hwcnt and fail atom.
+		 * Failed to switch into protected mode.
+		 *
+		 * At this point we expect:
+		 * katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION &&
+		 * katom->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_FINISHED
+		 *  ==>
+		 * kbdev->protected_mode_hwcnt_disabled = false
 		 */
-		WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
-		kbdev->protected_mode_hwcnt_desired = true;
-		if (kbdev->protected_mode_hwcnt_disabled) {
-			kbase_hwcnt_context_enable(
-				kbdev->hwcnt_gpu_ctx);
-			kbdev->protected_mode_hwcnt_disabled = false;
-		}
-
 		katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
 		kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
 		/*
@@ -525,12 +553,9 @@
 	/*
 	 * Protected mode sanity checks.
 	 */
-	KBASE_DEBUG_ASSERT_MSG(
-			kbase_jd_katom_is_protected(katom[idx]) ==
-			kbase_gpu_in_protected_mode(kbdev),
-			"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
-			kbase_jd_katom_is_protected(katom[idx]),
-			kbase_gpu_in_protected_mode(kbdev));
+	WARN(kbase_jd_katom_is_protected(katom[idx]) != kbase_gpu_in_protected_mode(kbdev),
+	     "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+	     kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev));
 	katom[idx]->gpu_rb_state =
 			KBASE_ATOM_GPU_RB_READY;
 
@@ -564,7 +589,7 @@
 		kbdev->protected_mode_transition = true;
 
 		/* ***TRANSITION TO HIGHER STATE*** */
-		/* fallthrough */
+		fallthrough;
 	case KBASE_ATOM_ENTER_PROTECTED_HWCNT:
 		/* See if we can get away with disabling hwcnt atomically */
 		kbdev->protected_mode_hwcnt_desired = false;
@@ -607,7 +632,7 @@
 			kbase_pm_update_cores_state_nolock(kbdev);
 
 		/* ***TRANSITION TO HIGHER STATE*** */
-		/* fallthrough */
+		fallthrough;
 	case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2:
 		/* Avoid unnecessary waiting on non-ACE platforms. */
 		if (kbdev->system_coherency == COHERENCY_ACE) {
@@ -638,7 +663,7 @@
 			KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY;
 
 		/* ***TRANSITION TO HIGHER STATE*** */
-		/* fallthrough */
+		fallthrough;
 	case KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY:
 		/*
 		 * When entering into protected mode, we must ensure that the
@@ -671,7 +696,7 @@
 			return -EAGAIN;
 
 		/* ***TRANSITION TO HIGHER STATE*** */
-		/* fallthrough */
+		fallthrough;
 	case KBASE_ATOM_ENTER_PROTECTED_FINISHED:
 		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
 			/*
@@ -742,7 +767,7 @@
 		kbase_pm_update_cores_state_nolock(kbdev);
 
 		/* ***TRANSITION TO HIGHER STATE*** */
-		/* fallthrough */
+		fallthrough;
 	case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2:
 		if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) {
 			/*
@@ -755,8 +780,15 @@
 				KBASE_ATOM_EXIT_PROTECTED_RESET;
 
 		/* ***TRANSITION TO HIGHER STATE*** */
-		/* fallthrough */
+		fallthrough;
 	case KBASE_ATOM_EXIT_PROTECTED_RESET:
+		/* L2 cache has been turned off (which is needed prior to the reset of GPU
+		 * to exit the protected mode), so the override flag can be safely cleared.
+		 * Even if L2 cache is powered up again before the actual reset, it should
+		 * not be an issue (there are no jobs running on the GPU).
+		 */
+		kbase_pm_protected_override_disable(kbdev);
+
 		/* Issue the reset to the GPU */
 		err = kbase_gpu_protected_mode_reset(kbdev);
 
@@ -765,7 +797,6 @@
 
 		if (err) {
 			kbdev->protected_mode_transition = false;
-			kbase_pm_protected_override_disable(kbdev);
 
 			/* Failed to exit protected mode, fail atom */
 			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
@@ -797,7 +828,7 @@
 				KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT;
 
 		/* ***TRANSITION TO HIGHER STATE*** */
-		/* fallthrough */
+		fallthrough;
 	case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT:
 		/* A GPU reset is issued when exiting protected mode. Once the
 		 * reset is done all atoms' state will also be reset. For this
@@ -813,7 +844,7 @@
 
 void kbase_backend_slot_update(struct kbase_device *kbdev)
 {
-	int js;
+	unsigned int js;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
@@ -854,7 +885,7 @@
 					KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV;
 
 				/* ***TRANSITION TO HIGHER STATE*** */
-				/* fallthrough */
+				fallthrough;
 			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
 				if (kbase_gpu_check_secure_atoms(kbdev,
 						!kbase_jd_katom_is_protected(
@@ -874,7 +905,7 @@
 					KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION;
 
 				/* ***TRANSITION TO HIGHER STATE*** */
-				/* fallthrough */
+				fallthrough;
 			case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
 
 				/*
@@ -909,7 +940,7 @@
 					KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
 
 				/* ***TRANSITION TO HIGHER STATE*** */
-				/* fallthrough */
+				fallthrough;
 			case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
 				if (katom[idx]->will_fail_event_code) {
 					kbase_gpu_mark_atom_for_return(kbdev,
@@ -934,13 +965,6 @@
 				cores_ready = kbase_pm_cores_requested(kbdev,
 						true);
 
-				if (katom[idx]->event_code ==
-						BASE_JD_EVENT_PM_EVENT) {
-					katom[idx]->gpu_rb_state =
-						KBASE_ATOM_GPU_RB_RETURN_TO_JS;
-					break;
-				}
-
 				if (!cores_ready)
 					break;
 
@@ -948,7 +972,7 @@
 					KBASE_ATOM_GPU_RB_READY;
 
 				/* ***TRANSITION TO HIGHER STATE*** */
-				/* fallthrough */
+				fallthrough;
 			case KBASE_ATOM_GPU_RB_READY:
 
 				if (idx == 1) {
@@ -977,36 +1001,34 @@
 						other_slots_busy(kbdev, js))
 					break;
 
-#ifdef CONFIG_MALI_GEM5_BUILD
-				if (!kbasep_jm_is_js_free(kbdev, js,
-						katom[idx]->kctx))
-					break;
-#endif
 				/* Check if this job needs the cycle counter
 				 * enabled before submission
 				 */
 				if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
-					kbase_pm_request_gpu_cycle_counter_l2_is_on(
-									kbdev);
+					kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev);
 
-				kbase_job_hw_submit(kbdev, katom[idx], js);
-				katom[idx]->gpu_rb_state =
-					KBASE_ATOM_GPU_RB_SUBMITTED;
+				if (!kbase_job_hw_submit(kbdev, katom[idx], js)) {
+					katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED;
+
+					/* Inform power management at start/finish of
+					 * atom so it can update its GPU utilisation
+					 * metrics.
+					 */
+					kbase_pm_metrics_update(kbdev,
+							&katom[idx]->start_timestamp);
+
+					/* Inform platform at start/finish of atom */
+					kbasep_platform_event_atom_submit(katom[idx]);
+				} else {
+					if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+						kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+
+					break;
+				}
 
 				/* ***TRANSITION TO HIGHER STATE*** */
-				/* fallthrough */
+				fallthrough;
 			case KBASE_ATOM_GPU_RB_SUBMITTED:
-
-				/* Inform power management at start/finish of
-				 * atom so it can update its GPU utilisation
-				 * metrics.
-				 */
-				kbase_pm_metrics_update(kbdev,
-						&katom[idx]->start_timestamp);
-
-				/* Inform platform at start/finish of atom */
-				kbasep_platform_event_atom_submit(katom[idx]);
-
 				break;
 
 			case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
@@ -1037,11 +1059,56 @@
 	kbase_backend_slot_update(kbdev);
 }
 
-#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \
-	(KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER))
+/**
+ * kbase_rb_atom_might_depend - determine if one atom in the slot ringbuffer
+ *                              might depend on another from the same kctx
+ * @katom_a: dependee atom
+ * @katom_b: atom to query
+ *
+ * This can be used on atoms that belong to different slot ringbuffers
+ *
+ * Return: true if @katom_b might depend on @katom_a, false if it cannot depend.
+ */
+static inline bool
+kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a,
+			   const struct kbase_jd_atom *katom_b)
+{
+	if (katom_a->kctx != katom_b->kctx)
+		return false;
+	return (katom_b->pre_dep ||
+		(katom_b->atom_flags & (KBASE_KATOM_FLAG_X_DEP_BLOCKED |
+					KBASE_KATOM_FLAG_FAIL_BLOCKER)));
+}
 
-bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
-				u32 completion_code)
+/**
+ * kbase_gpu_irq_evict - evict a slot's JSn_HEAD_NEXT atom from the HW if it is
+ *                       related to a failed JSn_HEAD atom
+ * @kbdev: kbase device
+ * @js: job slot to check
+ * @completion_code: completion code of the failed atom
+ *
+ * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but
+ * unlike other failure codes we _can_ re-run them.
+ *
+ * This forms step 1 in a 2-step process of removing any related atoms from a
+ * slot's JSn_HEAD_NEXT (ringbuffer index 1), should there have
+ * been a 'failure' on an atom in JSn_HEAD (ringbuffer index 0).
+ *
+ * This step only removes the atoms from the HW, and marks them as
+ * (potentially) ready to run again.
+ *
+ * Step 2 is on marking the JSn_HEAD atom as complete
+ * (kbase_gpu_complete_hw()), to dequeue said atoms and return them to the JS
+ * as appropriate, or re-submit them.
+ *
+ * Hence, this function must evict at a minimum the atoms related to the atom
+ * in JSn_HEAD that kbase_gpu_complete_hw() will also dequeue. It is acceptable
+ * if this function evicts more atoms than kbase_gpu_complete_hw() dequeues, as
+ * the next kbase_backend_slot_update() will resubmit any remaining.
+ *
+ * Return: true if an atom was evicted, false otherwise.
+ */
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code)
 {
 	struct kbase_jd_atom *katom;
 	struct kbase_jd_atom *next_katom;
@@ -1049,16 +1116,18 @@
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	katom = kbase_gpu_inspect(kbdev, js, 0);
+	if (!katom) {
+		dev_err(kbdev->dev, "Can't get a katom from js(%u)\n", js);
+		return false;
+	}
 	next_katom = kbase_gpu_inspect(kbdev, js, 1);
 
-	if (next_katom && katom->kctx == next_katom->kctx &&
-		next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
-		(HAS_DEP(next_katom) || next_katom->sched_priority ==
-				katom->sched_priority) &&
-		(kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO))
-									!= 0 ||
-		kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI))
-									!= 0)) {
+	if (next_katom &&
+	    next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
+	    (kbase_rb_atom_might_depend(katom, next_katom) ||
+	     kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) &&
+	    (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO)) != 0 ||
+	     kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) != 0)) {
 		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
 				JS_COMMAND_NOP);
 		next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
@@ -1077,19 +1146,56 @@
 		if (next_katom->core_req & BASE_JD_REQ_PERMON)
 			kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
 
+		/* On evicting the next_katom, the last submission kctx on the
+		 * given job slot then reverts back to the one that owns katom.
+		 * The aim is to enable the next submission that can determine
+		 * if the read only shader core L1 cache should be invalidated.
+		 */
+		kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged =
+			SLOT_RB_TAG_KCTX(katom->kctx);
+
 		return true;
 	}
 
 	return false;
 }
 
-void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
-				u32 completion_code,
-				u64 job_tail,
-				ktime_t *end_timestamp)
+/**
+ * kbase_gpu_complete_hw - complete the atom in a slot's JSn_HEAD
+ * @kbdev: kbase device
+ * @js: job slot to check
+ * @completion_code: completion code of the completed atom
+ * @job_tail: value read from JSn_TAIL, for STOPPED atoms
+ * @end_timestamp: pointer to approximate ktime value when the katom completed
+ *
+ * Among other operations, this also executes step 2 of a 2-step process of
+ * removing any related atoms from a slot's JSn_HEAD_NEXT (ringbuffer index 1),
+ * should there have been a 'failure' on an atom in JSn_HEAD (ringbuffer index
+ * 0). The first step is done in kbase_gpu_irq_evict().
+ *
+ * Note: 'STOPPED' atoms are considered 'failed', as they are in the HW, but
+ * unlike other failure codes we _can_ re-run them.
+ *
+ * When the JSn_HEAD atom is considered to be 'failed', then this will dequeue
+ * and return to the JS some (usually all) of the atoms evicted from the HW
+ * during the kbase_gpu_irq_evict() for that JSn_HEAD atom. If it dequeues an
+ * atom, that atom must not have been running or must already be evicted, as
+ * otherwise we would be in the incorrect state of having an atom both running
+ * on the HW and returned to the JS.
+ */
+
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code,
+			   u64 job_tail, ktime_t *end_timestamp)
 {
 	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
-	struct kbase_context *kctx = katom->kctx;
+	struct kbase_context *kctx = NULL;
+
+	if (unlikely(!katom)) {
+		dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js);
+		return;
+	}
+
+	kctx = katom->kctx;
 
 	dev_dbg(kbdev->dev,
 		"Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n",
@@ -1133,9 +1239,8 @@
 		 * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
 		 * the atoms on this slot are returned in the correct order.
 		 */
-		if (next_katom && katom->kctx == next_katom->kctx &&
-				next_katom->sched_priority ==
-				katom->sched_priority) {
+		if (next_katom &&
+		    kbase_js_atom_runs_before(kbdev, katom, next_katom, 0u)) {
 			WARN_ON(next_katom->gpu_rb_state ==
 					KBASE_ATOM_GPU_RB_SUBMITTED);
 			kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
@@ -1143,13 +1248,15 @@
 		}
 	} else if (completion_code != BASE_JD_EVENT_DONE) {
 		struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
-		int i;
+		unsigned int i;
 
-		if (!kbase_ctx_flag(katom->kctx, KCTX_DYING))
+		if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) {
 			dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
 					js, completion_code,
 					kbase_gpu_exception_name(
 					completion_code));
+
+		}
 
 #if KBASE_KTRACE_DUMP_ON_JOB_SLOT_ERROR != 0
 		KBASE_KTRACE_DUMP(kbdev);
@@ -1168,18 +1275,17 @@
 			struct kbase_jd_atom *katom_idx1 =
 						kbase_gpu_inspect(kbdev, i, 1);
 
-			if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
-					HAS_DEP(katom_idx0) &&
-					katom_idx0->gpu_rb_state !=
-					KBASE_ATOM_GPU_RB_SUBMITTED) {
+			if (katom_idx0 &&
+			    kbase_rb_atom_might_depend(katom, katom_idx0) &&
+			    katom_idx0->gpu_rb_state !=
+				    KBASE_ATOM_GPU_RB_SUBMITTED) {
 				/* Dequeue katom_idx0 from ringbuffer */
 				kbase_gpu_dequeue_atom(kbdev, i, end_timestamp);
 
-				if (katom_idx1 &&
-						katom_idx1->kctx == katom->kctx
-						&& HAS_DEP(katom_idx1) &&
-						katom_idx0->gpu_rb_state !=
-						KBASE_ATOM_GPU_RB_SUBMITTED) {
+				if (katom_idx1 && kbase_rb_atom_might_depend(
+							  katom, katom_idx1) &&
+				    katom_idx0->gpu_rb_state !=
+					    KBASE_ATOM_GPU_RB_SUBMITTED) {
 					/* Dequeue katom_idx1 from ringbuffer */
 					kbase_gpu_dequeue_atom(kbdev, i,
 							end_timestamp);
@@ -1192,11 +1298,10 @@
 				katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
 				kbase_jm_return_atom_to_js(kbdev, katom_idx0);
 
-			} else if (katom_idx1 &&
-					katom_idx1->kctx == katom->kctx &&
-					HAS_DEP(katom_idx1) &&
-					katom_idx1->gpu_rb_state !=
-					KBASE_ATOM_GPU_RB_SUBMITTED) {
+			} else if (katom_idx1 && kbase_rb_atom_might_depend(
+							 katom, katom_idx1) &&
+				   katom_idx1->gpu_rb_state !=
+					   KBASE_ATOM_GPU_RB_SUBMITTED) {
 				/* Can not dequeue this atom yet - will be
 				 * dequeued when atom at idx0 completes
 				 */
@@ -1248,17 +1353,12 @@
 						ktime_to_ns(*end_timestamp),
 						(u32)next_katom->kctx->id, 0,
 						next_katom->work_id);
-			kbdev->hwaccess.backend.slot_rb[js].last_context =
-							next_katom->kctx;
 		} else {
 			char js_string[16];
 
-			trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
-							js_string,
-							sizeof(js_string)),
-						ktime_to_ns(ktime_get()), 0, 0,
-						0);
-			kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
+			trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string,
+									   sizeof(js_string)),
+					       ktime_to_ns(ktime_get_raw()), 0, 0, 0);
 		}
 	}
 #endif
@@ -1293,7 +1393,7 @@
 
 void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
 {
-	int js;
+	unsigned int js;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
@@ -1314,14 +1414,14 @@
 			if (katom->protected_state.exit ==
 			    KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) {
 				/* protected mode sanity checks */
-				KBASE_DEBUG_ASSERT_MSG(
-					kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev),
-					"Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
-					kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev));
-				KBASE_DEBUG_ASSERT_MSG(
-					(kbase_jd_katom_is_protected(katom) && js == 0) ||
-					!kbase_jd_katom_is_protected(katom),
-					"Protected atom on JS%d not supported", js);
+				WARN(kbase_jd_katom_is_protected(katom) !=
+					     kbase_gpu_in_protected_mode(kbdev),
+				     "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)",
+				     kbase_jd_katom_is_protected(katom),
+				     kbase_gpu_in_protected_mode(kbdev));
+				WARN(!(kbase_jd_katom_is_protected(katom) && js == 0) &&
+					     kbase_jd_katom_is_protected(katom),
+				     "Protected atom on JS%u not supported", js);
 			}
 			if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) &&
 			    !kbase_ctx_flag(katom->kctx, KCTX_DYING))
@@ -1352,6 +1452,9 @@
 			katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
 			kbase_jm_complete(kbdev, katom, end_timestamp);
 		}
+
+		/* Clear the slot's last katom submission kctx on reset */
+		kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_NULL_TAG_VAL;
 	}
 
 	/* Re-enable GPU hardware counters if we're resetting from protected
@@ -1369,17 +1472,61 @@
 	kbase_pm_protected_override_disable(kbdev);
 }
 
-static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
-					int js,
-					struct kbase_jd_atom *katom,
-					u32 action)
+/**
+ * should_stop_next_atom - given a soft/hard stop action, determine if the next
+ *                         atom on a slot should be stopped
+ * @kbdev: kbase devices
+ * @head_katom: atom currently in the JSn_HEAD
+ * @next_katom: atom currently in the JSn_HEAD_NEXT
+ * @action: JS_COMMAND_<...> action for soft/hard-stop
+ *
+ * This is used in cases where @head_katom is the target of the soft/hard-stop.
+ * It only makes sense to call this when @head_katom and @next_katom are from
+ * the same slot.
+ *
+ * Return: true if @next_katom should also be stopped with the given action,
+ * false otherwise
+ */
+static bool should_stop_next_atom(struct kbase_device *kbdev,
+				  const struct kbase_jd_atom *head_katom,
+				  const struct kbase_jd_atom *next_katom,
+				  u32 action)
 {
+	bool ret = false;
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	switch (hw_action) {
+	case JS_COMMAND_SOFT_STOP:
+		ret = kbase_js_atom_runs_before(kbdev, head_katom, next_katom,
+						0u);
+		break;
+	case JS_COMMAND_HARD_STOP:
+		/* Unlike soft-stop, a hard-stop targeting a particular atom
+		 * should not cause atoms from unrelated contexts to be
+		 * removed
+		 */
+		ret = (head_katom->kctx == next_katom->kctx);
+		break;
+	default:
+		/* Other stop actions are possible, but the driver should not
+		 * be generating them at this point in the call chain
+		 */
+		WARN(1, "Unexpected stop action: 0x%.8x", hw_action);
+		break;
+	}
+	return ret;
+}
+
+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, unsigned int js,
+				       struct kbase_jd_atom *katom, u32 action)
+{
+	struct kbase_context *kctx = katom->kctx;
 	u32 hw_action = action & JS_COMMAND_MASK;
 
 	kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
 	kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
 							katom->core_req, katom);
-	katom->kctx->blocked_js[js][katom->sched_priority] = true;
+	kbase_jsctx_slot_prio_blocked_set(kctx, js, katom->sched_priority);
 }
 
 static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
@@ -1387,11 +1534,14 @@
 						u32 action,
 						bool disjoint)
 {
+	struct kbase_context *kctx = katom->kctx;
+
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
 	kbase_gpu_mark_atom_for_return(kbdev, katom);
-	katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true;
+	kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr,
+					  katom->sched_priority);
 
 	if (disjoint)
 		kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
@@ -1412,14 +1562,13 @@
 	return -1;
 }
 
-bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
-					struct kbase_context *kctx,
-					int js,
-					struct kbase_jd_atom *katom,
-					u32 action)
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx,
+				       unsigned int js, struct kbase_jd_atom *katom, u32 action)
 {
 	struct kbase_jd_atom *katom_idx0;
+	struct kbase_context *kctx_idx0 = NULL;
 	struct kbase_jd_atom *katom_idx1;
+	struct kbase_context *kctx_idx1 = NULL;
 
 	bool katom_idx0_valid, katom_idx1_valid;
 
@@ -1433,31 +1582,32 @@
 	katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
 	katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
 
-	if (katom_idx0)
+	if (katom_idx0) {
+		kctx_idx0 = katom_idx0->kctx;
 		prio_idx0 = katom_idx0->sched_priority;
-	if (katom_idx1)
+	}
+	if (katom_idx1) {
+		kctx_idx1 = katom_idx1->kctx;
 		prio_idx1 = katom_idx1->sched_priority;
+	}
 
 	if (katom) {
 		katom_idx0_valid = (katom_idx0 == katom);
-		/* If idx0 is to be removed and idx1 is on the same context,
-		 * then idx1 must also be removed otherwise the atoms might be
-		 * returned out of order
-		 */
 		if (katom_idx1)
-			katom_idx1_valid = (katom_idx1 == katom) ||
-						(katom_idx0_valid &&
-							(katom_idx0->kctx ==
-							katom_idx1->kctx));
+			katom_idx1_valid = (katom_idx1 == katom);
 		else
 			katom_idx1_valid = false;
 	} else {
-		katom_idx0_valid = (katom_idx0 &&
-				(!kctx || katom_idx0->kctx == kctx));
-		katom_idx1_valid = (katom_idx1 &&
-				(!kctx || katom_idx1->kctx == kctx) &&
-				prio_idx0 == prio_idx1);
+		katom_idx0_valid = (katom_idx0 && (!kctx || kctx_idx0 == kctx));
+		katom_idx1_valid = (katom_idx1 && (!kctx || kctx_idx1 == kctx));
 	}
+	/* If there's an atom in JSn_HEAD_NEXT that we haven't already decided
+	 * to stop, but we're stopping the JSn_HEAD atom, see if they are
+	 * related/ordered in some way that would require the same stop action
+	 */
+	if (!katom_idx1_valid && katom_idx0_valid && katom_idx1)
+		katom_idx1_valid = should_stop_next_atom(kbdev, katom_idx0,
+							 katom_idx1, action);
 
 	if (katom_idx0_valid)
 		stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
@@ -1473,14 +1623,15 @@
 				katom_idx1->event_code =
 						BASE_JD_EVENT_REMOVED_FROM_NEXT;
 				kbase_jm_return_atom_to_js(kbdev, katom_idx1);
-				katom_idx1->kctx->blocked_js[js][prio_idx1] =
-						true;
+				kbase_jsctx_slot_prio_blocked_set(kctx_idx1, js,
+								  prio_idx1);
 			}
 
 			katom_idx0->event_code =
 						BASE_JD_EVENT_REMOVED_FROM_NEXT;
 			kbase_jm_return_atom_to_js(kbdev, katom_idx0);
-			katom_idx0->kctx->blocked_js[js][prio_idx0] = true;
+			kbase_jsctx_slot_prio_blocked_set(kctx_idx0, js,
+							  prio_idx0);
 		} else {
 			/* katom_idx0 is on GPU */
 			if (katom_idx1_valid && katom_idx1->gpu_rb_state ==
@@ -1521,6 +1672,11 @@
 						kbase_gpu_remove_atom(kbdev,
 								katom_idx1,
 								action, true);
+						/* Revert the last_context. */
+						kbdev->hwaccess.backend.slot_rb[js]
+							.last_kctx_tagged =
+							SLOT_RB_TAG_KCTX(katom_idx0->kctx);
+
 						stop_x_dep_idx1 =
 					should_stop_x_dep_slot(katom_idx1);
 
@@ -1596,6 +1752,10 @@
 					kbase_gpu_remove_atom(kbdev, katom_idx1,
 									action,
 									false);
+					/* Revert the last_context, or mark as purged */
+					kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged =
+						kctx_idx0 ? SLOT_RB_TAG_KCTX(katom_idx0->kctx) :
+							    SLOT_RB_TAG_PURGED;
 				} else {
 					/* idx0 has already completed - stop
 					 * idx1
@@ -1625,7 +1785,8 @@
 		struct kbase_jd_atom *katom)
 {
 	if (katom->need_cache_flush_cores_retained) {
-		kbase_gpu_start_cache_clean(kbdev);
+		kbase_gpu_start_cache_clean(kbdev,
+					    GPU_COMMAND_CACHE_CLN_INV_FULL);
 		kbase_gpu_wait_cache_clean(kbdev);
 
 		katom->need_cache_flush_cores_retained = false;
@@ -1646,22 +1807,20 @@
 		base_jd_core_req core_req)
 {
 	if (!kbdev->pm.active_count) {
-		mutex_lock(&kbdev->js_data.runpool_mutex);
-		mutex_lock(&kbdev->pm.lock);
+		kbase_pm_lock(kbdev);
 		kbase_pm_update_active(kbdev);
-		mutex_unlock(&kbdev->pm.lock);
-		mutex_unlock(&kbdev->js_data.runpool_mutex);
+		kbase_pm_unlock(kbdev);
 	}
 }
 
 void kbase_gpu_dump_slots(struct kbase_device *kbdev)
 {
 	unsigned long flags;
-	int js;
+	unsigned int js;
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
-	dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
+	dev_info(kbdev->dev, "%s:\n", __func__);
 
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
 		int idx;
@@ -1672,14 +1831,43 @@
 									idx);
 
 			if (katom)
-				dev_info(kbdev->dev,
-				"  js%d idx%d : katom=%pK gpu_rb_state=%d\n",
-				js, idx, katom, katom->gpu_rb_state);
+				dev_info(kbdev->dev, "  js%u idx%d : katom=%pK gpu_rb_state=%d\n",
+					 js, idx, katom, katom->gpu_rb_state);
 			else
-				dev_info(kbdev->dev, "  js%d idx%d : empty\n",
-								js, idx);
+				dev_info(kbdev->dev, "  js%u idx%d : empty\n", js, idx);
 		}
 	}
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
+
+void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	unsigned int js;
+	bool tracked = false;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		u64 tagged_kctx = kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged;
+
+		if (tagged_kctx == SLOT_RB_TAG_KCTX(kctx)) {
+			/* Marking the slot kctx tracking field is purged */
+			kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged = SLOT_RB_TAG_PURGED;
+			tracked = true;
+		}
+	}
+
+	if (tracked) {
+		/* The context had run some jobs before the purge, other slots
+		 * in SLOT_RB_NULL_TAG_VAL condition needs to be marked as
+		 * purged as well.
+		 */
+		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+			if (kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged ==
+			    SLOT_RB_NULL_TAG_VAL)
+				kbdev->hwaccess.backend.slot_rb[js].last_kctx_tagged =
+					SLOT_RB_TAG_PURGED;
+		}
+	}
+}

--
Gitblit v1.6.2