From 1543e317f1da31b75942316931e8f491a8920811 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Thu, 04 Jan 2024 10:08:02 +0000
Subject: [PATCH] disable FB
---
kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c | 3150 ++++++++++++++++++++++++++++++++++++++++++++++------------
1 files changed, 2,488 insertions(+), 662 deletions(-)
diff --git a/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c b/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
index c165c0e..edaa6d1 100644
--- a/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
+++ b/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,16 +28,20 @@
#include <tl/mali_kbase_tracepoints.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <linux/export.h>
-#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
+#include <csf/mali_kbase_csf_registers.h>
#include <uapi/gpu/arm/bifrost/mali_base_kernel.h>
+#include <mali_kbase_hwaccess_time.h>
+#include "mali_kbase_csf_tiler_heap_reclaim.h"
+#include "mali_kbase_csf_mcu_shared_reg.h"
/* Value to indicate that a queue group is not groups_to_schedule list */
#define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
-/* Waiting timeout for scheduler state change for descheduling a CSG */
-#define CSG_SCHED_STOP_TIMEOUT_MS (50)
-
-#define CSG_SUSPEND_ON_RESET_WAIT_TIMEOUT_MS DEFAULT_RESET_TIMEOUT_MS
+/* This decides the upper limit on the waiting time for the Scheduler
+ * to exit the sleep state. Usually the value of autosuspend_delay is
+ * expected to be around 100 milli seconds.
+ */
+#define MAX_AUTO_SUSPEND_DELAY_MS (5000)
/* Maximum number of endpoints which may run tiler jobs. */
#define CSG_TILER_MAX ((u8)1)
@@ -48,37 +52,14 @@
/* CSF scheduler time slice value */
#define CSF_SCHEDULER_TIME_TICK_MS (100) /* 100 milliseconds */
-/*
- * CSF scheduler time threshold for converting "tock" requests into "tick" if
- * they come too close to the end of a tick interval. This avoids scheduling
- * twice in a row.
- */
-#define CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS \
- CSF_SCHEDULER_TIME_TICK_MS
+/* A GPU address space slot is reserved for MCU. */
+#define NUM_RESERVED_AS_SLOTS (1)
-#define CSF_SCHEDULER_TIME_TICK_THRESHOLD_JIFFIES \
- msecs_to_jiffies(CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS)
+/* Time to wait for completion of PING req before considering MCU as hung */
+#define FW_PING_AFTER_ERROR_TIMEOUT_MS (10)
-/* Nanoseconds per millisecond */
-#define NS_PER_MS ((u64)1000 * 1000)
-
-/*
- * CSF minimum time to reschedule for a new "tock" request. Bursts of "tock"
- * requests are not serviced immediately, but shall wait for a minimum time in
- * order to reduce load on the CSF scheduler thread.
- */
-#define CSF_SCHEDULER_TIME_TOCK_JIFFIES 1 /* 1 jiffies-time */
-
-/* CS suspended and is idle (empty ring buffer) */
-#define CS_IDLE_FLAG (1 << 0)
-
-/* CS suspended and is wait for a CQS condition */
-#define CS_WAIT_SYNC_FLAG (1 << 1)
-
-/* 2 GPU address space slots are reserved for MCU and privileged context for HW
- * counter dumping. TODO remove the slot reserved for latter in GPUCORE-26293.
- */
-#define NUM_RESERVED_AS_SLOTS (2)
+/* Explicitly defining this blocked_reason code as SB_WAIT for clarity */
+#define CS_STATUS_BLOCKED_ON_SB_WAIT CS_STATUS_BLOCKED_REASON_REASON_WAIT
static int scheduler_group_schedule(struct kbase_queue_group *group);
static void remove_group_from_idle_wait(struct kbase_queue_group *const group);
@@ -94,14 +75,234 @@
static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev);
static int suspend_active_queue_groups(struct kbase_device *kbdev,
unsigned long *slot_mask);
+static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
+ bool system_suspend);
static void schedule_in_cycle(struct kbase_queue_group *group, bool force);
+static bool queue_group_scheduled_locked(struct kbase_queue_group *group);
#define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT))
/**
+ * wait_for_dump_complete_on_group_deschedule() - Wait for dump on fault and
+ * scheduling tick/tock to complete before the group deschedule.
+ *
+ * @group: Pointer to the group that is being descheduled.
+ *
+ * This function blocks the descheduling of the group until the dump on fault is
+ * completed and scheduling tick/tock has completed.
+ * To deschedule an on slot group CSG termination request would be sent and that
+ * might time out if the fault had occurred and also potentially affect the state
+ * being dumped. Moreover the scheduler lock would be held, so the access to debugfs
+ * files would get blocked.
+ * Scheduler lock and 'kctx->csf.lock' are released before this function starts
+ * to wait. When a request sent by the Scheduler to the FW times out, Scheduler
+ * would also wait for the dumping to complete and release the Scheduler lock
+ * before the wait. Meanwhile Userspace can try to delete the group, this function
+ * would ensure that the group doesn't exit the Scheduler until scheduling
+ * tick/tock has completed. Though very unlikely, group deschedule can be triggered
+ * from multiple threads around the same time and after the wait Userspace thread
+ * can win the race and get the group descheduled and free the memory for group
+ * pointer before the other threads wake up and notice that group has already been
+ * descheduled. To avoid the freeing in such a case, a sort of refcount is used
+ * for the group which is incremented & decremented across the wait.
+ */
+static
+void wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group *group)
+{
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ struct kbase_device *kbdev = group->kctx->kbdev;
+ struct kbase_context *kctx = group->kctx;
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+
+ lockdep_assert_held(&kctx->csf.lock);
+ lockdep_assert_held(&scheduler->lock);
+
+ if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev)))
+ return;
+
+ while ((!kbase_debug_csf_fault_dump_complete(kbdev) ||
+ (scheduler->state == SCHED_BUSY)) &&
+ queue_group_scheduled_locked(group)) {
+ group->deschedule_deferred_cnt++;
+ mutex_unlock(&scheduler->lock);
+ mutex_unlock(&kctx->csf.lock);
+ kbase_debug_csf_fault_wait_completion(kbdev);
+ mutex_lock(&kctx->csf.lock);
+ mutex_lock(&scheduler->lock);
+ group->deschedule_deferred_cnt--;
+ }
+#endif
+}
+
+/**
+ * schedule_actions_trigger_df() - Notify the client about the fault and
+ * wait for the dumping to complete.
+ *
+ * @kbdev: Pointer to the device
+ * @kctx: Pointer to the context associated with the CSG slot for which
+ * the timeout was seen.
+ * @error: Error code indicating the type of timeout that occurred.
+ *
+ * This function notifies the Userspace client waiting for the faults and wait
+ * for the Client to complete the dumping.
+ * The function is called only from Scheduling tick/tock when a request sent by
+ * the Scheduler to FW times out or from the protm event work item of the group
+ * when the protected mode entry request times out.
+ * In the latter case there is no wait done as scheduler lock would be released
+ * immediately. In the former case the function waits and releases the scheduler
+ * lock before the wait. It has been ensured that the Scheduler view of the groups
+ * won't change meanwhile, so no group can enter/exit the Scheduler, become
+ * runnable or go off slot.
+ */
+static void schedule_actions_trigger_df(struct kbase_device *kbdev,
+ struct kbase_context *kctx, enum dumpfault_error_type error)
+{
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+
+ lockdep_assert_held(&scheduler->lock);
+
+ if (!kbase_debug_csf_fault_notify(kbdev, kctx, error))
+ return;
+
+ if (unlikely(scheduler->state != SCHED_BUSY)) {
+ WARN_ON(error != DF_PROTECTED_MODE_ENTRY_FAILURE);
+ return;
+ }
+
+ mutex_unlock(&scheduler->lock);
+ kbase_debug_csf_fault_wait_completion(kbdev);
+ mutex_lock(&scheduler->lock);
+ WARN_ON(scheduler->state != SCHED_BUSY);
+#endif
+}
+
+#ifdef KBASE_PM_RUNTIME
+/**
+ * wait_for_scheduler_to_exit_sleep() - Wait for Scheduler to exit the
+ * sleeping state.
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function waits until the Scheduler has exited the sleep state and
+ * it is called when an on-slot group is terminated or when the suspend
+ * buffer of an on-slot group needs to be captured.
+ *
+ * Return: 0 when the wait is successful, otherwise an error code.
+ */
+static int wait_for_scheduler_to_exit_sleep(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ int autosuspend_delay = kbdev->dev->power.autosuspend_delay;
+ unsigned int sleep_exit_wait_time;
+ long remaining;
+ int ret = 0;
+
+ lockdep_assert_held(&scheduler->lock);
+ WARN_ON(scheduler->state != SCHED_SLEEPING);
+
+ /* No point in waiting if autosuspend_delay value is negative.
+ * For the negative value of autosuspend_delay Driver will directly
+ * go for the suspend of Scheduler, but the autosuspend_delay value
+ * could have been changed after the sleep was initiated.
+ */
+ if (autosuspend_delay < 0)
+ return -EINVAL;
+
+ if (autosuspend_delay > MAX_AUTO_SUSPEND_DELAY_MS)
+ autosuspend_delay = MAX_AUTO_SUSPEND_DELAY_MS;
+
+ /* Usually Scheduler would remain in sleeping state until the
+ * auto-suspend timer expires and all active CSGs are suspended.
+ */
+ sleep_exit_wait_time = autosuspend_delay + kbdev->reset_timeout_ms;
+
+ remaining = kbase_csf_timeout_in_jiffies(sleep_exit_wait_time);
+
+ while ((scheduler->state == SCHED_SLEEPING) && !ret) {
+ mutex_unlock(&scheduler->lock);
+ remaining = wait_event_timeout(
+ kbdev->csf.event_wait,
+ (scheduler->state != SCHED_SLEEPING),
+ remaining);
+ mutex_lock(&scheduler->lock);
+ if (!remaining && (scheduler->state == SCHED_SLEEPING))
+ ret = -ETIMEDOUT;
+ }
+
+ return ret;
+}
+
+/**
+ * force_scheduler_to_exit_sleep() - Force scheduler to exit sleep state
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function will force the Scheduler to exit the sleep state by doing the
+ * wake up of MCU and suspension of on-slot groups. It is called at the time of
+ * system suspend.
+ *
+ * Return: 0 on success.
+ */
+static int force_scheduler_to_exit_sleep(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ unsigned long flags;
+ int ret = 0;
+
+ lockdep_assert_held(&scheduler->lock);
+ WARN_ON(scheduler->state != SCHED_SLEEPING);
+ WARN_ON(!kbdev->pm.backend.gpu_sleep_mode_active);
+
+ kbase_pm_lock(kbdev);
+ ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev);
+ kbase_pm_unlock(kbdev);
+ if (ret) {
+ dev_warn(kbdev->dev,
+ "[%llu] Wait for MCU wake up failed on forced scheduler suspend",
+ kbase_backend_get_cycle_cnt(kbdev));
+ goto out;
+ }
+
+ ret = suspend_active_groups_on_powerdown(kbdev, true);
+ if (ret)
+ goto out;
+
+ kbase_pm_lock(kbdev);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbdev->pm.backend.gpu_sleep_mode_active = false;
+ kbdev->pm.backend.gpu_wakeup_override = false;
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ ret = kbase_pm_wait_for_desired_state(kbdev);
+ kbase_pm_unlock(kbdev);
+ if (ret) {
+ dev_warn(kbdev->dev,
+ "[%llu] Wait for pm state change failed on forced scheduler suspend",
+ kbase_backend_get_cycle_cnt(kbdev));
+ goto out;
+ }
+
+ scheduler->state = SCHED_SUSPENDED;
+ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
+
+ return 0;
+
+out:
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbdev->pm.backend.exit_gpu_sleep_mode = true;
+ kbdev->pm.backend.gpu_wakeup_override = false;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ kbase_csf_scheduler_invoke_tick(kbdev);
+
+ return ret;
+}
+#endif
+
+/**
* tick_timer_callback() - Callback function for the scheduling tick hrtimer
*
- * @timer: Pointer to the device
+ * @timer: Pointer to the scheduling tick hrtimer
*
* This function will enqueue the scheduling tick work item for immediate
* execution, if it has not been queued already.
@@ -113,7 +314,7 @@
struct kbase_device *kbdev = container_of(timer, struct kbase_device,
csf.scheduler.tick_timer);
- kbase_csf_scheduler_advance_tick(kbdev);
+ kbase_csf_scheduler_tick_advance(kbdev);
return HRTIMER_NORESTART;
}
@@ -124,7 +325,7 @@
*
* This function will start the scheduling tick hrtimer and is supposed to
* be called only from the tick work item function. The tick hrtimer should
- * should not be active already.
+ * not be active already.
*/
static void start_tick_timer(struct kbase_device *kbdev)
{
@@ -173,14 +374,10 @@
static void enqueue_tick_work(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
- unsigned long flags;
lockdep_assert_held(&scheduler->lock);
- spin_lock_irqsave(&scheduler->interrupt_lock, flags);
- WARN_ON(scheduler->tick_timer_active);
- queue_work(scheduler->wq, &scheduler->tick_work);
- spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+ kbase_csf_scheduler_invoke_tick(kbdev);
}
static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr)
@@ -254,7 +451,7 @@
mutex_lock(&kbdev->csf.reg_lock);
/* If bind operation for the queue hasn't completed yet, then the
- * the CSI can't be programmed for the queue
+ * CSI can't be programmed for the queue
* (even in stopped state) and so the doorbell also can't be assigned
* to it.
*/
@@ -288,11 +485,110 @@
WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR);
}
-static u32 get_nr_active_csgs(struct kbase_device *kbdev)
+/**
+ * update_on_slot_queues_offsets - Update active queues' INSERT & EXTRACT ofs
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function updates the EXTRACT offset for all queues which groups have
+ * been assigned a physical slot. These values could be used to detect a
+ * queue's true idleness status. This is intended to be an additional check
+ * on top of the GPU idle notification to account for race conditions.
+ * This function is supposed to be called only when GPU idle notification
+ * interrupt is received.
+ */
+static void update_on_slot_queues_offsets(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ /* All CSGs have the same number of CSs */
+ size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num;
+ size_t i;
+
+ lockdep_assert_held(&scheduler->interrupt_lock);
+
+ /* csg_slots_idle_mask is not used here for the looping, as it could get
+ * updated concurrently when Scheduler re-evaluates the idle status of
+ * the CSGs for which idle notification was received previously.
+ */
+ for_each_set_bit(i, scheduler->csg_inuse_bitmap, kbdev->csf.global_iface.group_num) {
+ struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group;
+ size_t j;
+
+ if (WARN_ON(!group))
+ continue;
+
+ for (j = 0; j < max_streams; ++j) {
+ struct kbase_queue *const queue = group->bound_queues[j];
+
+ if (queue) {
+ if (queue->user_io_addr) {
+ u64 const *const output_addr =
+ (u64 const *)(queue->user_io_addr + PAGE_SIZE);
+
+ queue->extract_ofs =
+ output_addr[CS_EXTRACT_LO / sizeof(u64)];
+ } else {
+ dev_warn(kbdev->dev,
+ "%s(): queue->user_io_addr is NULL, queue: %p",
+ __func__,
+ queue);
+ }
+ }
+ }
+ }
+}
+
+static void enqueue_gpu_idle_work(struct kbase_csf_scheduler *const scheduler)
+{
+ atomic_set(&scheduler->gpu_no_longer_idle, false);
+ queue_work(scheduler->idle_wq, &scheduler->gpu_idle_work);
+}
+
+void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ int non_idle_offslot_grps;
+ bool can_suspend_on_idle;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+ lockdep_assert_held(&scheduler->interrupt_lock);
+
+ non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
+ can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND, NULL,
+ ((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
+
+ if (!non_idle_offslot_grps) {
+ if (can_suspend_on_idle) {
+ /* fast_gpu_idle_handling is protected by the
+ * interrupt_lock, which would prevent this from being
+ * updated whilst gpu_idle_worker() is executing.
+ */
+ scheduler->fast_gpu_idle_handling =
+ (kbdev->csf.gpu_idle_hysteresis_us == 0) ||
+ !kbase_csf_scheduler_all_csgs_idle(kbdev);
+
+ /* The GPU idle worker relies on update_on_slot_queues_offsets() to have
+ * finished. It's queued before to reduce the time it takes till execution
+ * but it'll eventually be blocked by the scheduler->interrupt_lock.
+ */
+ enqueue_gpu_idle_work(scheduler);
+
+ /* The extract offsets are unused in fast GPU idle handling */
+ if (!scheduler->fast_gpu_idle_handling)
+ update_on_slot_queues_offsets(kbdev);
+ }
+ } else {
+ /* Advance the scheduling tick to get the non-idle suspended groups loaded soon */
+ kbase_csf_scheduler_tick_advance_nolock(kbdev);
+ }
+}
+
+u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev)
{
u32 nr_active_csgs;
- lockdep_assert_held(&kbdev->csf.scheduler.lock);
+ lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
nr_active_csgs = bitmap_weight(kbdev->csf.scheduler.csg_inuse_bitmap,
kbdev->csf.global_iface.group_num);
@@ -300,27 +596,16 @@
return nr_active_csgs;
}
-/**
- * csgs_active - returns true if any of CSG slots are in use
- *
- * @kbdev: Instance of a GPU platform device that implements a CSF interface.
- *
- * Return: the interface is actively engaged flag.
- */
-static bool csgs_active(struct kbase_device *kbdev)
+u32 kbase_csf_scheduler_get_nr_active_csgs(struct kbase_device *kbdev)
{
u32 nr_active_csgs;
+ unsigned long flags;
- mutex_lock(&kbdev->csf.scheduler.lock);
- nr_active_csgs = get_nr_active_csgs(kbdev);
- mutex_unlock(&kbdev->csf.scheduler.lock);
+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
+ nr_active_csgs = kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev);
+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
- /* Right now if any of the CSG interfaces are in use
- * then we need to assume that there is some work pending.
- * In future when we have IDLE notifications from firmware implemented
- * then we would have a better idea of the pending work.
- */
- return (nr_active_csgs != 0);
+ return nr_active_csgs;
}
/**
@@ -358,6 +643,19 @@
group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE);
}
+static bool on_slot_group_idle_locked(struct kbase_queue_group *group)
+{
+ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
+
+ return (group->run_state == KBASE_CSF_GROUP_IDLE);
+}
+
+static bool can_schedule_idle_group(struct kbase_queue_group *group)
+{
+ return (on_slot_group_idle_locked(group) ||
+ (group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME));
+}
+
static bool queue_group_scheduled(struct kbase_queue_group *group)
{
return (group->run_state != KBASE_CSF_GROUP_INACTIVE &&
@@ -373,32 +671,43 @@
}
/**
- * scheduler_wait_protm_quit() - Wait for GPU to exit protected mode.
+ * scheduler_protm_wait_quit() - Wait for GPU to exit protected mode.
*
* @kbdev: Pointer to the GPU device
*
* This function waits for the GPU to exit protected mode which is confirmed
* when active_protm_grp is set to NULL.
+ *
+ * Return: true on success, false otherwise.
*/
-static void scheduler_wait_protm_quit(struct kbase_device *kbdev)
+static bool scheduler_protm_wait_quit(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
long remaining;
+ bool success = true;
lockdep_assert_held(&scheduler->lock);
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT, NULL,
- jiffies_to_msecs(wt));
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_START, NULL, jiffies_to_msecs(wt));
remaining = wait_event_timeout(kbdev->csf.event_wait,
!kbase_csf_scheduler_protected_mode_in_use(kbdev), wt);
- if (!remaining)
- dev_warn(kbdev->dev, "Timeout, protm_quit wait skipped");
+ if (unlikely(!remaining)) {
+ struct kbase_queue_group *group = kbdev->csf.scheduler.active_protm_grp;
+ struct kbase_context *kctx = group ? group->kctx : NULL;
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT_DONE, NULL,
- jiffies_to_msecs(remaining));
+ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms), protm_quit wait skipped",
+ kbase_backend_get_cycle_cnt(kbdev),
+ kbdev->csf.fw_timeout_ms);
+ schedule_actions_trigger_df(kbdev, kctx, DF_PROTECTED_MODE_EXIT_TIMEOUT);
+ success = false;
+ }
+
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_END, NULL, jiffies_to_msecs(remaining));
+
+ return success;
}
/**
@@ -408,13 +717,39 @@
*
* This function sends a ping request to the firmware and waits for the GPU
* to exit protected mode.
+ *
+ * If the GPU does not exit protected mode, it is considered as hang.
+ * A GPU reset would then be triggered.
*/
static void scheduler_force_protm_exit(struct kbase_device *kbdev)
{
+ unsigned long flags;
+
lockdep_assert_held(&kbdev->csf.scheduler.lock);
kbase_csf_firmware_ping(kbdev);
- scheduler_wait_protm_quit(kbdev);
+
+ if (scheduler_protm_wait_quit(kbdev))
+ return;
+
+ dev_err(kbdev->dev, "Possible GPU hang in Protected mode");
+
+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
+ if (kbdev->csf.scheduler.active_protm_grp) {
+ dev_err(kbdev->dev,
+ "Group-%d of context %d_%d ran in protected mode for too long on slot %d",
+ kbdev->csf.scheduler.active_protm_grp->handle,
+ kbdev->csf.scheduler.active_protm_grp->kctx->tgid,
+ kbdev->csf.scheduler.active_protm_grp->kctx->id,
+ kbdev->csf.scheduler.active_protm_grp->csg_nr);
+ }
+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
+
+ /* The GPU could be stuck in Protected mode. To prevent a hang,
+ * a GPU reset is performed.
+ */
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
+ kbase_reset_gpu(kbdev);
}
/**
@@ -435,68 +770,221 @@
return kbdev->csf.scheduler.timer_enabled;
}
-static void enable_gpu_idle_fw_timer(struct kbase_device *kbdev)
+/**
+ * scheduler_pm_active_handle_suspend() - Acquire the PM reference count for
+ * Scheduler
+ *
+ * @kbdev: Pointer to the device
+ * @suspend_handler: Handler code for how to handle a suspend that might occur.
+ *
+ * This function is usually called when Scheduler needs to be activated.
+ * The PM reference count is acquired for the Scheduler and the power on
+ * of GPU is initiated.
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
+ enum kbase_pm_suspend_handler suspend_handler)
{
- struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
unsigned long flags;
+ u32 prev_count;
+ int ret = 0;
- lockdep_assert_held(&scheduler->lock);
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
- if (scheduler->gpu_idle_fw_timer_enabled)
- return;
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ prev_count = kbdev->csf.scheduler.pm_active_count;
+ if (!WARN_ON(prev_count == U32_MAX))
+ kbdev->csf.scheduler.pm_active_count++;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+ /* On 0 => 1, make a pm_ctx_active request */
+ if (!prev_count) {
+ ret = kbase_pm_context_active_handle_suspend(kbdev,
+ suspend_handler);
+ /* Invoke the PM state machines again as the change in MCU
+ * desired status, due to the update of scheduler.pm_active_count,
+ * may be missed by the thread that called pm_wait_for_desired_state()
+ */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ if (ret)
+ kbdev->csf.scheduler.pm_active_count--;
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ }
- /* Update the timer_enabled flag requires holding interrupt_lock */
- scheduler->gpu_idle_fw_timer_enabled = true;
- kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
-
- spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+ return ret;
}
-static void disable_gpu_idle_fw_timer_locked(struct kbase_device *kbdev)
+#ifdef KBASE_PM_RUNTIME
+/**
+ * scheduler_pm_active_after_sleep() - Acquire the PM reference count for
+ * Scheduler
+ *
+ * @kbdev: Pointer to the device
+ * @flags: Pointer to the flags variable containing the interrupt state
+ * when hwaccess lock was acquired.
+ *
+ * This function is called when Scheduler needs to be activated from the
+ * sleeping state.
+ * The PM reference count is acquired for the Scheduler and the wake up of
+ * MCU is initiated. It resets the flag that indicates to the MCU state
+ * machine that MCU needs to be put in sleep state.
+ *
+ * Note: This function shall be called with hwaccess lock held and it may
+ * release that lock and reacquire it.
+ *
+ * Return: zero when the PM reference was taken and non-zero when the
+ * system is being suspending/suspended.
+ */
+static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev,
+ unsigned long *flags)
{
- struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ u32 prev_count;
+ int ret = 0;
- lockdep_assert_held(&scheduler->lock);
- lockdep_assert_held(&scheduler->interrupt_lock);
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+ lockdep_assert_held(&kbdev->hwaccess_lock);
- /* Update of the timer_enabled flag requires holding interrupt_lock */
- if (scheduler->gpu_idle_fw_timer_enabled) {
- scheduler->gpu_idle_fw_timer_enabled = false;
- kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
+ prev_count = kbdev->csf.scheduler.pm_active_count;
+ if (!WARN_ON(prev_count == U32_MAX))
+ kbdev->csf.scheduler.pm_active_count++;
+
+ /* On 0 => 1, make a pm_ctx_active request */
+ if (!prev_count) {
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, *flags);
+
+ ret = kbase_pm_context_active_handle_suspend(kbdev,
+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, *flags);
+ if (ret)
+ kbdev->csf.scheduler.pm_active_count--;
+ else
+ kbdev->pm.backend.gpu_sleep_mode_active = false;
+ kbase_pm_update_state(kbdev);
+ }
+
+ return ret;
+}
+#endif
+
+/**
+ * scheduler_pm_idle() - Release the PM reference count held by Scheduler
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function is usually called after Scheduler is suspended.
+ * The PM reference count held by the Scheduler is released to trigger the
+ * power down of GPU.
+ */
+static void scheduler_pm_idle(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+ u32 prev_count;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ prev_count = kbdev->csf.scheduler.pm_active_count;
+ if (!WARN_ON(prev_count == 0))
+ kbdev->csf.scheduler.pm_active_count--;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ if (prev_count == 1) {
+ kbase_pm_context_idle(kbdev);
+ /* Invoke the PM state machines again as the change in MCU
+ * desired status, due to the update of scheduler.pm_active_count,
+ * may be missed by the thread that called pm_wait_for_desired_state()
+ */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
}
-static void disable_gpu_idle_fw_timer(struct kbase_device *kbdev)
+#ifdef KBASE_PM_RUNTIME
+/**
+ * scheduler_pm_idle_before_sleep() - Release the PM reference count and
+ * trigger the tranistion to sleep state.
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function is called on the GPU idle notification. It releases the
+ * Scheduler's PM reference count and sets the flag to indicate to the
+ * MCU state machine that MCU needs to be put in sleep state.
+ */
+static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev)
{
- struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
unsigned long flags;
+ u32 prev_count;
- lockdep_assert_held(&scheduler->lock);
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
- if (!scheduler->gpu_idle_fw_timer_enabled)
- return;
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ prev_count = kbdev->csf.scheduler.pm_active_count;
+ if (!WARN_ON(prev_count == 0))
+ kbdev->csf.scheduler.pm_active_count--;
+ kbdev->pm.backend.gpu_sleep_mode_active = true;
+ kbdev->pm.backend.exit_gpu_sleep_mode = false;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- spin_lock_irqsave(&scheduler->interrupt_lock, flags);
- disable_gpu_idle_fw_timer_locked(kbdev);
- spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+ if (prev_count == 1) {
+ kbase_pm_context_idle(kbdev);
+ /* Invoke the PM state machines again as the change in MCU
+ * desired status, due to the update of scheduler.pm_active_count,
+ * may be missed by the thread that called pm_wait_for_desired_state()
+ */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ }
}
+#endif
static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ int ret;
lockdep_assert_held(&scheduler->lock);
- if (scheduler->state == SCHED_SUSPENDED) {
- dev_dbg(kbdev->dev, "Re-activating the Scheduler");
- kbase_csf_scheduler_pm_active(kbdev);
- scheduler->state = SCHED_INACTIVE;
+ if ((scheduler->state != SCHED_SUSPENDED) &&
+ (scheduler->state != SCHED_SLEEPING))
+ return;
- if (kick)
- scheduler_enable_tick_timer_nolock(kbdev);
+ if (scheduler->state == SCHED_SUSPENDED) {
+ dev_dbg(kbdev->dev,
+ "Re-activating the Scheduler after suspend");
+ ret = scheduler_pm_active_handle_suspend(kbdev,
+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+ } else {
+#ifdef KBASE_PM_RUNTIME
+ unsigned long flags;
+
+ dev_dbg(kbdev->dev,
+ "Re-activating the Scheduler out of sleep");
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ ret = scheduler_pm_active_after_sleep(kbdev, &flags);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#endif
}
+
+ if (ret) {
+ /* GPUCORE-29850 would add the handling for the case where
+ * Scheduler could not be activated due to system suspend.
+ */
+ dev_info(kbdev->dev,
+ "Couldn't wakeup Scheduler due to system suspend");
+ return;
+ }
+
+ scheduler->state = SCHED_INACTIVE;
+ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
+
+ if (kick)
+ scheduler_enable_tick_timer_nolock(kbdev);
}
static void scheduler_suspend(struct kbase_device *kbdev)
@@ -507,8 +995,9 @@
if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) {
dev_dbg(kbdev->dev, "Suspending the Scheduler");
- kbase_csf_scheduler_pm_idle(kbdev);
+ scheduler_pm_idle(kbdev);
scheduler->state = SCHED_SUSPENDED;
+ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
}
}
@@ -539,20 +1028,41 @@
KBASE_CSF_GROUP_SUSPENDED);
} else if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) {
group->run_state = KBASE_CSF_GROUP_SUSPENDED;
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED, group,
+ group->run_state);
/* If scheduler is not suspended and the given group's
* static priority (reflected by the scan_seq_num) is inside
- * the current tick slot-range, schedules an async tock.
+ * the current tick slot-range, or there are some on_slot
+ * idle groups, schedule an async tock.
*/
- if (scheduler->state != SCHED_SUSPENDED &&
- group->scan_seq_num < scheduler->num_csg_slots_for_tick)
- schedule_in_cycle(group, true);
+ if (scheduler->state != SCHED_SUSPENDED) {
+ unsigned long flags;
+ int n_idle;
+ int n_used;
+ int n_slots =
+ group->kctx->kbdev->csf.global_iface.group_num;
+
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+ n_idle = bitmap_weight(scheduler->csg_slots_idle_mask,
+ n_slots);
+ n_used = bitmap_weight(scheduler->csg_inuse_bitmap,
+ n_slots);
+ spin_unlock_irqrestore(&scheduler->interrupt_lock,
+ flags);
+
+ if (n_idle ||
+ n_used < scheduler->num_csg_slots_for_tick ||
+ group->scan_seq_num <
+ scheduler->num_csg_slots_for_tick)
+ schedule_in_cycle(group, true);
+ }
} else
return;
new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
- group, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group,
+ new_val);
}
int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group)
@@ -586,6 +1096,14 @@
return slot_num;
}
+/* kbasep_csf_scheduler_group_is_on_slot_locked() - Check if CSG is on slot.
+ *
+ * @group: GPU queue group to be checked
+ *
+ * This function needs to be called with scheduler's lock held
+ *
+ * Return: true if @group is on slot.
+ */
static bool kbasep_csf_scheduler_group_is_on_slot_locked(
struct kbase_queue_group *group)
{
@@ -636,6 +1154,7 @@
struct kbase_csf_cmd_stream_info *stream;
int csi_index = queue->csi_index;
long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
+ unsigned long flags;
if (WARN_ON(!group) ||
WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
@@ -653,10 +1172,12 @@
== CS_ACK_STATE_START), remaining);
if (!remaining) {
- dev_warn(kbdev->dev, "Timed out waiting for queue to start on csi %d bound to group %d on slot %d",
+ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to start on csi %d bound to group %d on slot %d",
+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
csi_index, group->handle, group->csg_nr);
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
+
return -ETIMEDOUT;
}
@@ -665,12 +1186,15 @@
kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
}
+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
/* Set state to STOP */
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP,
CS_REQ_STATE_MASK);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQUESTED, group, queue, 0u);
kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true);
+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
+
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQ, group, queue, 0u);
/* Timed wait */
remaining = wait_event_timeout(kbdev->csf.event_wait,
@@ -678,7 +1202,8 @@
== CS_ACK_STATE_STOP), remaining);
if (!remaining) {
- dev_warn(kbdev->dev, "Timed out waiting for queue to stop on csi %d bound to group %d on slot %d",
+ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to stop on csi %d bound to group %d on slot %d",
+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
queue->csi_index, group->handle, group->csg_nr);
/* TODO GPUCORE-25328: The CSG can't be terminated, the GPU
@@ -686,6 +1211,8 @@
*/
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
+
+
}
return (remaining) ? 0 : -ETIMEDOUT;
}
@@ -739,6 +1266,7 @@
long remaining;
int slot;
int err = 0;
+ const u32 group_schedule_timeout = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT);
if (WARN_ON(!group))
return -EINVAL;
@@ -782,8 +1310,7 @@
*/
remaining = wait_event_timeout(
kbdev->csf.event_wait, can_halt_stream(kbdev, group),
- kbase_csf_timeout_in_jiffies(
- 20 * kbdev->csf.scheduler.csg_scheduling_period_ms));
+ kbase_csf_timeout_in_jiffies(group_schedule_timeout));
mutex_lock(&scheduler->lock);
@@ -845,24 +1372,60 @@
kbase_csf_firmware_cs_output(
stream, CS_ACK)) ==
CS_ACK_STATE_STOP),
- kbdev->csf.fw_timeout_ms);
+ kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms));
if (!remaining) {
dev_warn(kbdev->dev,
- "Timed out waiting for queue stop ack on csi %d bound to group %d on slot %d",
+ "[%llu] Timeout (%d ms) waiting for queue stop ack on csi %d bound to group %d on slot %d",
+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
queue->csi_index,
group->handle, group->csg_nr);
+
+
err = -ETIMEDOUT;
}
}
}
} else if (!remaining) {
- dev_warn(kbdev->dev, "Group-%d failed to get a slot for stopping the queue on csi %d",
- group->handle, queue->csi_index);
+ dev_warn(kbdev->dev, "[%llu] Group-%d failed to get a slot for stopping the queue on csi %d (timeout %d ms)",
+ kbase_backend_get_cycle_cnt(kbdev),
+ group->handle, queue->csi_index,
+ group_schedule_timeout);
+
+
err = -ETIMEDOUT;
}
return err;
+}
+
+/**
+ * scheduler_activate_on_queue_stop() - Activate the Scheduler when the GPU
+ * queue needs to be stopped.
+ *
+ * @queue: Pointer the GPU command queue
+ *
+ * This function is called when the CSI to which GPU queue is bound needs to
+ * be stopped. For that the corresponding queue group needs to be resident on
+ * the CSG slot and MCU firmware should be running. So this function makes the
+ * Scheduler exit the sleeping or suspended state.
+ */
+static void scheduler_activate_on_queue_stop(struct kbase_queue *queue)
+{
+ struct kbase_device *kbdev = queue->kctx->kbdev;
+
+ scheduler_wakeup(kbdev, true);
+
+ /* Wait for MCU firmware to start running */
+ if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
+ dev_warn(
+ kbdev->dev,
+ "[%llu] Wait for MCU active failed for stopping queue on csi %d bound to group %d of context %d_%d on slot %d",
+ kbase_backend_get_cycle_cnt(kbdev),
+ queue->csi_index, queue->group->handle,
+ queue->kctx->tgid, queue->kctx->id,
+ queue->group->csg_nr);
+ }
}
int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue)
@@ -890,7 +1453,7 @@
/* Since the group needs to be resumed in order to stop the queue,
* check if GPU needs to be powered up.
*/
- scheduler_wakeup(kbdev, true);
+ scheduler_activate_on_queue_stop(queue);
if ((slot >= 0) &&
(atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING))
@@ -899,16 +1462,26 @@
err = sched_halt_stream(queue);
unassign_user_doorbell_from_queue(kbdev, queue);
+ kbase_csf_mcu_shared_drop_stopped_queue(kbdev, queue);
}
mutex_unlock(&kbdev->csf.scheduler.lock);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_STOP, group, queue, group->run_state);
return err;
}
static void update_hw_active(struct kbase_queue *queue, bool active)
{
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+ if (queue && queue->enabled) {
+ u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
+
+ output_addr[CS_ACTIVE / sizeof(u32)] = active;
+ }
+#else
CSTD_UNUSED(queue);
CSTD_UNUSED(active);
+#endif
}
static void program_cs_extract_init(struct kbase_queue *queue)
@@ -971,6 +1544,7 @@
struct kbase_csf_cmd_stream_group_info *ginfo;
struct kbase_csf_cmd_stream_info *stream;
int csi_index = queue->csi_index;
+ unsigned long flags;
u64 user_input;
u64 user_output;
@@ -988,11 +1562,13 @@
WARN_ON(csi_index >= ginfo->stream_num))
return;
- assign_user_doorbell_to_queue(kbdev, queue);
- if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
- return;
+ if (queue->enabled) {
+ assign_user_doorbell_to_queue(kbdev, queue);
+ if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
+ return;
- WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
+ WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
+ }
if (queue->enabled && queue_group_suspended_locked(group))
program_cs_extract_init(queue);
@@ -1006,17 +1582,15 @@
kbase_csf_firmware_cs_input(stream, CS_SIZE,
queue->size);
- user_input = (queue->reg->start_pfn << PAGE_SHIFT);
- kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO,
- user_input & 0xFFFFFFFF);
- kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI,
- user_input >> 32);
+ user_input = queue->user_io_gpu_va;
+ WARN_ONCE(!user_input && queue->enabled, "Enabled queue should have a valid gpu_va");
- user_output = ((queue->reg->start_pfn + 1) << PAGE_SHIFT);
- kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO,
- user_output & 0xFFFFFFFF);
- kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI,
- user_output >> 32);
+ kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, user_input & 0xFFFFFFFF);
+ kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, user_input >> 32);
+
+ user_output = user_input + PAGE_SIZE;
+ kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, user_output & 0xFFFFFFFF);
+ kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, user_output >> 32);
kbase_csf_firmware_cs_input(stream, CS_CONFIG,
(queue->doorbell_nr << 8) | (queue->priority & 0xF));
@@ -1027,25 +1601,56 @@
/* Enable all interrupts for now */
kbase_csf_firmware_cs_input(stream, CS_ACK_IRQ_MASK, ~((u32)0));
+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
+
+ /* The fault bit could be misaligned between CS_REQ and CS_ACK if the
+ * acknowledgment was deferred due to dump on fault and the group was
+ * removed from the CSG slot before the fault could be acknowledged.
+ */
+ if (queue->enabled) {
+ u32 const cs_ack =
+ kbase_csf_firmware_cs_output(stream, CS_ACK);
+
+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
+ CS_REQ_FAULT_MASK);
+ }
+
/*
* Enable the CSG idle notification once the CS's ringbuffer
* becomes empty or the CS becomes sync_idle, waiting sync update
* or protected mode switch.
*/
kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
- CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK,
- CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK);
+ CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK |
+ CS_REQ_IDLE_SHARED_SB_DEC_MASK,
+ CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK |
+ CS_REQ_IDLE_SHARED_SB_DEC_MASK);
/* Set state to START/STOP */
kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
queue->enabled ? CS_REQ_STATE_START : CS_REQ_STATE_STOP,
CS_REQ_STATE_MASK);
+ kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr,
+ ring_csg_doorbell);
+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_START, group, queue, queue->enabled);
- kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr,
- ring_csg_doorbell);
update_hw_active(queue, true);
+}
+
+static int onslot_csg_add_new_queue(struct kbase_queue *queue)
+{
+ struct kbase_device *kbdev = queue->kctx->kbdev;
+ int err;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ err = kbase_csf_mcu_shared_add_queue(kbdev, queue);
+ if (!err)
+ program_cs(kbdev, queue, true);
+
+ return err;
}
int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
@@ -1064,10 +1669,17 @@
mutex_lock(&kbdev->csf.scheduler.lock);
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ if (unlikely(kbdev->csf.scheduler.state == SCHED_BUSY)) {
+ mutex_unlock(&kbdev->csf.scheduler.lock);
+ return -EBUSY;
+ }
+#endif
+
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue,
group->run_state);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT, queue->group,
- queue, queue->status_wait);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue,
+ queue->status_wait);
if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) {
err = -EIO;
@@ -1096,12 +1708,32 @@
* user door-bell on such a case.
*/
kbase_csf_ring_cs_user_doorbell(kbdev, queue);
- } else
- program_cs(kbdev, queue, true);
+ } else {
+ err = onslot_csg_add_new_queue(queue);
+ /* For an on slot CSG, the only error in adding a new
+ * queue to run is that the scheduler could not map
+ * the required userio pages due to likely some resource
+ * issues. In such a case, and if the group is yet
+ * to enter its fatal error state, we return a -EBUSY
+ * to the submitter for another kick. The queue itself
+ * has yet to be programmed hence needs to remain its
+ * previous (disabled) state. If the error persists,
+ * the group will eventually reports a fatal error by
+ * the group's error reporting mechanism, when the MCU
+ * shared region map retry limit of the group is
+ * exceeded. For such a case, the expected error value
+ * is -EIO.
+ */
+ if (unlikely(err)) {
+ queue->enabled = cs_enabled;
+ mutex_unlock(&kbdev->csf.scheduler.lock);
+ return (err != -EIO) ? -EBUSY : err;
+ }
+ }
}
- queue_delayed_work(system_long_wq,
- &kbdev->csf.scheduler.ping_work,
- msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS));
+ queue_delayed_work(system_long_wq, &kbdev->csf.scheduler.ping_work,
+ msecs_to_jiffies(kbase_get_timeout_ms(
+ kbdev, CSF_FIRMWARE_PING_TIMEOUT)));
}
}
@@ -1136,7 +1768,8 @@
slot_state = CSG_SLOT_RUNNING;
atomic_set(&csg_slot->state, slot_state);
csg_slot->trigger_jiffies = jiffies;
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STARTED, csg_slot->resident_group, state);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_RUNNING, csg_slot->resident_group,
+ state);
dev_dbg(kbdev->dev, "Group %u running on slot %d\n",
csg_slot->resident_group->handle, slot);
}
@@ -1228,13 +1861,16 @@
csg_slot_running(kbdev, slot), remaining);
if (!remaining)
dev_warn(kbdev->dev,
- "slot %d timed out on up-running\n", slot);
+ "[%llu] slot %d timeout (%d ms) on up-running\n",
+ kbase_backend_get_cycle_cnt(kbdev),
+ slot, kbdev->csf.fw_timeout_ms);
}
if (csg_slot_running(kbdev, slot)) {
unsigned long flags;
struct kbase_csf_cmd_stream_group_info *ginfo =
&global_iface->groups[slot];
+
u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND :
CSG_REQ_STATE_TERMINATE;
@@ -1245,13 +1881,15 @@
/* Set state to SUSPEND/TERMINATE */
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, halt_cmd,
CSG_REQ_STATE_MASK);
+ kbase_csf_ring_csg_doorbell(kbdev, slot);
spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock,
flags);
atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP);
csg_slot[slot].trigger_jiffies = jiffies;
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP, group, halt_cmd);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd);
- kbase_csf_ring_csg_doorbell(kbdev, slot);
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG(
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot, suspend);
}
}
@@ -1265,6 +1903,31 @@
halt_csg_slot(group, true);
}
+static bool csf_wait_ge_condition_supported(struct kbase_device *kbdev)
+{
+ const uint32_t glb_major = GLB_VERSION_MAJOR_GET(kbdev->csf.global_iface.version);
+ const uint32_t glb_minor = GLB_VERSION_MINOR_GET(kbdev->csf.global_iface.version);
+
+ switch (glb_major) {
+ case 0:
+ break;
+ case 1:
+ if (glb_minor >= 4)
+ return true;
+ break;
+ case 2:
+ if (glb_minor >= 6)
+ return true;
+ break;
+ case 3:
+ if (glb_minor >= 6)
+ return true;
+ break;
+ default:
+ return true;
+ }
+ return false;
+}
/**
* evaluate_sync_update() - Evaluate the sync wait condition the GPU command
* queue has been blocked on.
@@ -1278,23 +1941,38 @@
struct kbase_vmap_struct *mapping;
bool updated = false;
u32 *sync_ptr;
+ u32 sync_wait_size;
+ u32 sync_wait_align_mask;
u32 sync_wait_cond;
u32 sync_current_val;
struct kbase_device *kbdev;
+ bool sync_wait_align_valid = false;
+ bool sync_wait_cond_valid = false;
if (WARN_ON(!queue))
return false;
kbdev = queue->kctx->kbdev;
+
lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ sync_wait_size = CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(queue->status_wait);
+ sync_wait_align_mask =
+ (sync_wait_size == 0 ? BASEP_EVENT32_ALIGN_BYTES : BASEP_EVENT64_ALIGN_BYTES) - 1;
+ sync_wait_align_valid = ((uintptr_t)queue->sync_ptr & sync_wait_align_mask) == 0;
+ if (!sync_wait_align_valid) {
+ dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX is misaligned",
+ queue->sync_ptr);
+ goto out;
+ }
sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr,
&mapping);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE, queue->group,
- queue, queue->sync_ptr);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_BLOCKED_REASON,
- queue->group, queue, queue->blocked_reason);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_START, queue->group, queue,
+ queue->sync_ptr);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_BLOCKED_REASON, queue->group, queue,
+ queue->blocked_reason);
if (!sync_ptr) {
dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX already freed",
@@ -1304,19 +1982,24 @@
sync_wait_cond =
CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(queue->status_wait);
+ sync_wait_cond_valid = (sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) ||
+ (sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) ||
+ ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE) &&
+ csf_wait_ge_condition_supported(kbdev));
- WARN_ON((sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) &&
- (sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE));
+ WARN_ON(!sync_wait_cond_valid);
sync_current_val = READ_ONCE(*sync_ptr);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_CURRENT_VAL, queue->group,
- queue, sync_current_val);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_CUR_VAL, queue->group, queue,
+ sync_current_val);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_TEST_VAL, queue->group,
- queue, queue->sync_value);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_TEST_VAL, queue->group, queue,
+ queue->sync_value);
if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) &&
(sync_current_val > queue->sync_value)) ||
+ ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE) &&
+ (sync_current_val >= queue->sync_value) && csf_wait_ge_condition_supported(kbdev)) ||
((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) &&
(sync_current_val <= queue->sync_value))) {
/* The sync wait condition is satisfied so the group to which
@@ -1330,8 +2013,7 @@
kbase_phy_alloc_mapping_put(queue->kctx, mapping);
out:
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVALUATED,
- queue->group, queue, updated);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_END, queue->group, queue, updated);
return updated;
}
@@ -1358,10 +2040,17 @@
u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT);
bool is_waiting = false;
- KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_STATUS_WAIT,
- queue->group, queue, status);
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ u64 cmd_ptr = kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_LO);
- if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) {
+ cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_HI) << 32;
+ queue->saved_cmd_ptr = cmd_ptr;
+#endif
+
+ KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group,
+ queue, status);
+
+ if (CS_STATUS_WAIT_SYNC_WAIT_GET(status) || CS_STATUS_WAIT_SB_MASK_GET(status)) {
queue->status_wait = status;
queue->sync_ptr = kbase_csf_firmware_cs_output(stream,
CS_STATUS_WAIT_SYNC_POINTER_LO);
@@ -1377,7 +2066,8 @@
kbase_csf_firmware_cs_output(stream,
CS_STATUS_BLOCKED_REASON));
- if (!evaluate_sync_update(queue)) {
+ if ((queue->blocked_reason == CS_STATUS_BLOCKED_ON_SB_WAIT) ||
+ !evaluate_sync_update(queue)) {
is_waiting = true;
} else {
/* Sync object already got updated & met the condition
@@ -1399,37 +2089,6 @@
return is_waiting;
}
-/**
- * Calculate how far in the future an event should be scheduled.
- *
- * The objective of this function is making sure that a minimum period of
- * time is guaranteed between handling two consecutive events.
- *
- * This function guarantees a minimum period of time between two consecutive
- * events: given the minimum period and the distance between the current time
- * and the last event, the function returns the difference between the two.
- * However, if more time than the minimum period has already elapsed
- * since the last event, the function will return 0 to schedule work to handle
- * the event with the lowest latency possible.
- *
- * @last_event: Timestamp of the last event, in jiffies.
- * @time_now: Timestamp of the new event to handle, in jiffies.
- * Must be successive to last_event.
- * @period: Minimum period between two events, in jiffies.
- *
- * Return: Time to delay work to handle the current event, in jiffies
- */
-static unsigned long get_schedule_delay(unsigned long last_event,
- unsigned long time_now,
- unsigned long period)
-{
- const unsigned long t_distance = time_now - last_event;
- const unsigned long delay_t = (t_distance < period) ?
- (period - t_distance) : 0;
-
- return delay_t;
-}
-
static void schedule_in_cycle(struct kbase_queue_group *group, bool force)
{
struct kbase_context *kctx = group->kctx;
@@ -1444,15 +2103,48 @@
* of work needs to be enforced in situation such as entering into
* protected mode).
*/
- if ((likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) &&
- !scheduler->tock_pending_request) {
- const unsigned long delay =
- get_schedule_delay(scheduler->last_schedule, jiffies,
- CSF_SCHEDULER_TIME_TOCK_JIFFIES);
- scheduler->tock_pending_request = true;
+ if (likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) {
dev_dbg(kbdev->dev, "Kicking async for group %d\n",
group->handle);
- mod_delayed_work(scheduler->wq, &scheduler->tock_work, delay);
+ kbase_csf_scheduler_invoke_tock(kbdev);
+ }
+}
+
+static void ktrace_log_group_state(struct kbase_queue_group *const group)
+{
+ switch (group->run_state) {
+ case KBASE_CSF_GROUP_INACTIVE:
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group,
+ group->run_state);
+ break;
+ case KBASE_CSF_GROUP_RUNNABLE:
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_RUNNABLE, group,
+ group->run_state);
+ break;
+ case KBASE_CSF_GROUP_IDLE:
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_IDLE, group,
+ group->run_state);
+ break;
+ case KBASE_CSF_GROUP_SUSPENDED:
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED, group,
+ group->run_state);
+ break;
+ case KBASE_CSF_GROUP_SUSPENDED_ON_IDLE:
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED_ON_IDLE, group,
+ group->run_state);
+ break;
+ case KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC:
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED_ON_WAIT_SYNC,
+ group, group->run_state);
+ break;
+ case KBASE_CSF_GROUP_FAULT_EVICTED:
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_FAULT_EVICTED, group,
+ group->run_state);
+ break;
+ case KBASE_CSF_GROUP_TERMINATED:
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group,
+ group->run_state);
+ break;
}
}
@@ -1473,13 +2165,15 @@
group->run_state = run_state;
+ ktrace_log_group_state(group);
+
if (run_state == KBASE_CSF_GROUP_RUNNABLE)
group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID;
list_add_tail(&group->link,
&kctx->csf.sched.runnable_groups[group->priority]);
kctx->csf.sched.num_runnable_grps++;
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_INSERT_RUNNABLE, group,
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_INSERT, group,
kctx->csf.sched.num_runnable_grps);
/* Add the kctx if not yet in runnable kctxs */
@@ -1487,14 +2181,15 @@
/* First runnable csg, adds to the runnable_kctxs */
INIT_LIST_HEAD(&kctx->csf.link);
list_add_tail(&kctx->csf.link, &scheduler->runnable_kctxs);
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_INSERT_RUNNABLE, kctx, 0u);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_INSERT, kctx, 0u);
}
scheduler->total_runnable_grps++;
if (likely(scheduler_timer_is_enabled_nolock(kbdev)) &&
(scheduler->total_runnable_grps == 1 ||
- scheduler->state == SCHED_SUSPENDED)) {
+ scheduler->state == SCHED_SUSPENDED ||
+ scheduler->state == SCHED_SLEEPING)) {
dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n");
/* Fire a scheduling to start the time-slice */
enqueue_tick_work(kbdev);
@@ -1516,13 +2211,41 @@
struct kbase_queue_group *new_head_grp;
struct list_head *list =
&kctx->csf.sched.runnable_groups[group->priority];
+ unsigned long flags;
lockdep_assert_held(&scheduler->lock);
WARN_ON(!queue_group_scheduled_locked(group));
group->run_state = run_state;
+
+ ktrace_log_group_state(group);
+
list_del_init(&group->link);
+
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+ /* The below condition will be true when the group running in protected
+ * mode is being terminated but the protected mode exit interrupt was't
+ * received. This can happen if the FW got stuck during protected mode
+ * for some reason (like GPU page fault or some internal error).
+ * In normal cases FW is expected to send the protected mode exit
+ * interrupt before it handles the CSG termination request.
+ */
+ if (unlikely(scheduler->active_protm_grp == group)) {
+ /* CSG slot cleanup should have happened for the pmode group */
+ WARN_ON(kbasep_csf_scheduler_group_is_on_slot_locked(group));
+ WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
+ /* Initiate a GPU reset, in case it wasn't initiated yet,
+ * in order to rectify the anomaly.
+ */
+ if (kbase_prepare_to_reset_gpu(kctx->kbdev, RESET_FLAGS_NONE))
+ kbase_reset_gpu(kctx->kbdev);
+
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_PROTM_EXIT,
+ scheduler->active_protm_grp, 0u);
+ scheduler->active_protm_grp = NULL;
+ }
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
if (scheduler->top_grp == group) {
/*
@@ -1548,13 +2271,12 @@
}
kctx->csf.sched.num_runnable_grps--;
- KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_RUNNABLE, group,
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_REMOVE, group,
kctx->csf.sched.num_runnable_grps);
new_head_grp = (!list_empty(list)) ?
list_first_entry(list, struct kbase_queue_group, link) :
NULL;
- KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_RUNNABLE, new_head_grp,
- 0u);
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u);
if (kctx->csf.sched.num_runnable_grps == 0) {
struct kbase_context *new_head_kctx;
@@ -1563,13 +2285,11 @@
list_del_init(&kctx->csf.link);
if (scheduler->top_ctx == kctx)
scheduler->top_ctx = NULL;
- KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_REMOVE_RUNNABLE, kctx,
- 0u);
+ KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_REMOVE, kctx, 0u);
new_head_kctx = (!list_empty(kctx_list)) ?
list_first_entry(kctx_list, struct kbase_context, csf.link) :
NULL;
- KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_HEAD_RUNNABLE,
- new_head_kctx, 0u);
+ KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx, 0u);
}
WARN_ON(scheduler->total_runnable_grps == 0);
@@ -1579,7 +2299,7 @@
cancel_tick_timer(kctx->kbdev);
WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps));
if (scheduler->state != SCHED_SUSPENDED)
- queue_work(system_wq, &scheduler->gpu_idle_work);
+ enqueue_gpu_idle_work(scheduler);
}
KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
scheduler->num_active_address_spaces |
@@ -1596,9 +2316,11 @@
list_add_tail(&group->link, &kctx->csf.sched.idle_wait_groups);
kctx->csf.sched.num_idle_wait_grps++;
- KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_INSERT_IDLE_WAIT, group,
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_INSERT, group,
kctx->csf.sched.num_idle_wait_grps);
group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC;
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, CSF_GROUP_SUSPENDED_ON_WAIT_SYNC, group,
+ group->run_state);
dev_dbg(kctx->kbdev->dev,
"Group-%d suspended on sync_wait, total wait_groups: %u\n",
group->handle, kctx->csf.sched.num_idle_wait_grps);
@@ -1617,14 +2339,14 @@
list_del_init(&group->link);
WARN_ON(kctx->csf.sched.num_idle_wait_grps == 0);
kctx->csf.sched.num_idle_wait_grps--;
- KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_IDLE_WAIT, group,
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_REMOVE, group,
kctx->csf.sched.num_idle_wait_grps);
new_head_grp = (!list_empty(list)) ?
list_first_entry(list, struct kbase_queue_group, link) :
NULL;
- KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_IDLE_WAIT,
- new_head_grp, 0u);
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_HEAD, new_head_grp, 0u);
group->run_state = KBASE_CSF_GROUP_INACTIVE;
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, CSF_GROUP_INACTIVE, group, group->run_state);
}
static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler,
@@ -1639,7 +2361,7 @@
insert_group_to_idle_wait(group);
}
-static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group)
+static void update_offslot_non_idle_cnt(struct kbase_queue_group *group)
{
struct kbase_device *kbdev = group->kctx->kbdev;
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
@@ -1649,8 +2371,7 @@
if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) {
int new_val =
atomic_dec_return(&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
- group, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val);
}
}
@@ -1666,8 +2387,7 @@
if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) {
int new_val =
atomic_dec_return(&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
- group, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val);
}
}
@@ -1687,15 +2407,15 @@
if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) {
int new_val = atomic_inc_return(
&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
- group, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC,
+ group, new_val);
}
} else {
if (group->run_state != KBASE_CSF_GROUP_SUSPENDED) {
int new_val = atomic_dec_return(
&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
- group, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC,
+ group, new_val);
}
}
} else {
@@ -1703,13 +2423,13 @@
if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) {
int new_val = atomic_inc_return(
&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
- group, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group,
+ new_val);
}
}
}
-static bool confirm_cmd_buf_empty(struct kbase_queue *queue)
+static bool confirm_cmd_buf_empty(struct kbase_queue const *queue)
{
bool cs_empty;
bool cs_idle;
@@ -1721,8 +2441,8 @@
u32 glb_version = iface->version;
- u64 *input_addr = (u64 *)queue->user_io_addr;
- u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE);
+ u64 const *input_addr = (u64 const *)queue->user_io_addr;
+ u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
if (glb_version >= kbase_csf_interface_version(1, 0, 0)) {
/* CS_STATUS_SCOREBOARD supported from CSF 1.0 */
@@ -1767,6 +2487,10 @@
bool sync_wait = false;
bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
CSG_STATUS_STATE_IDLE_MASK;
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+ for (i = 0; i < max_streams; i++)
+ update_hw_active(group->bound_queues[i], false);
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */
for (i = 0; idle && i < max_streams; i++) {
struct kbase_queue *const queue =
group->bound_queues[i];
@@ -1774,9 +2498,14 @@
if (!queue || !queue->enabled)
continue;
- if (save_slot_cs(ginfo, queue))
- sync_wait = true;
- else {
+ if (save_slot_cs(ginfo, queue)) {
+ /* sync_wait is only true if the queue is blocked on
+ * a CQS and not a scoreboard.
+ */
+ if (queue->blocked_reason !=
+ CS_STATUS_BLOCKED_ON_SB_WAIT)
+ sync_wait = true;
+ } else {
/* Need to confirm if ringbuffer of the GPU
* queue is empty or not. A race can arise
* between the flush of GPU queue and suspend
@@ -1801,14 +2530,19 @@
else {
group->run_state =
KBASE_CSF_GROUP_SUSPENDED_ON_IDLE;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED_ON_IDLE, group,
+ group->run_state);
dev_dbg(kbdev->dev, "Group-%d suspended: idle",
group->handle);
}
} else {
group->run_state = KBASE_CSF_GROUP_SUSPENDED;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED, group,
+ group->run_state);
}
update_offslot_non_idle_cnt_on_grp_suspend(group);
+ kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(group);
}
}
@@ -1885,6 +2619,11 @@
KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev,
kbdev->gpu_props.props.raw_props.gpu_id, slot);
+ /* Notify the group is off-slot and the csg_reg might be available for
+ * resue with other groups in a 'lazy unbinding' style.
+ */
+ kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
+
return as_fault;
}
@@ -1931,6 +2670,7 @@
csg_req ^= CSG_REQ_EP_CFG_MASK;
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
CSG_REQ_EP_CFG_MASK);
+ kbase_csf_ring_csg_doorbell(kbdev, slot);
spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
csg_slot->priority = prio;
@@ -1939,9 +2679,8 @@
group->handle, group->kctx->tgid, group->kctx->id, slot,
prev_prio, prio);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PRIO_UPDATE, group, prev_prio);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_PRIO_UPDATE, group, prev_prio);
- kbase_csf_ring_csg_doorbell(kbdev, slot);
set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update);
}
@@ -1968,8 +2707,8 @@
u32 state;
int i;
unsigned long flags;
- const u64 normal_suspend_buf =
- group->normal_suspend_buf.reg->start_pfn << PAGE_SHIFT;
+ u64 normal_suspend_buf;
+ u64 protm_suspend_buf;
struct kbase_csf_csg_slot *csg_slot =
&kbdev->csf.scheduler.csg_slots[slot];
@@ -1981,6 +2720,19 @@
WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY);
+ if (unlikely(kbase_csf_mcu_shared_group_bind_csg_reg(kbdev, group))) {
+ dev_warn(kbdev->dev,
+ "Couldn't bind MCU shared csg_reg for group %d of context %d_%d, slot=%u",
+ group->handle, group->kctx->tgid, kctx->id, slot);
+ kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
+ return;
+ }
+
+ /* The suspend buf has already been mapped through binding to csg_reg */
+ normal_suspend_buf = group->normal_suspend_buf.gpu_va;
+ protm_suspend_buf = group->protected_suspend_buf.gpu_va;
+ WARN_ONCE(!normal_suspend_buf, "Normal suspend buffer not mapped");
+
ginfo = &global_iface->groups[slot];
/* Pick an available address space for this context */
@@ -1991,8 +2743,9 @@
mutex_unlock(&kbdev->mmu_hw_mutex);
if (kctx->as_nr == KBASEP_AS_NR_INVALID) {
- dev_warn(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n",
+ dev_dbg(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n",
group->handle, kctx->tgid, kctx->id, slot);
+ kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
return;
}
@@ -2025,6 +2778,9 @@
kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER,
tiler_mask & U32_MAX);
+ /* Register group UID with firmware */
+ kbase_csf_firmware_csg_input(ginfo, CSG_ITER_TRACE_CONFIG,
+ group->group_uid);
ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max);
ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max);
@@ -2040,14 +2796,21 @@
kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI,
normal_suspend_buf >> 32);
- if (group->protected_suspend_buf.reg) {
- const u64 protm_suspend_buf =
- group->protected_suspend_buf.reg->start_pfn <<
- PAGE_SHIFT;
- kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO,
- protm_suspend_buf & U32_MAX);
- kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI,
- protm_suspend_buf >> 32);
+ /* Note, we program the P-mode buffer pointer here, but actual runtime
+ * enter into pmode execution is controlled by the P-mode phy pages are
+ * allocated and mapped with the bound csg_reg, which has a specific flag
+ * for indicating this P-mode runnable condition before a group is
+ * granted its p-mode section entry. Without a P-mode entry, the buffer
+ * pointed is not going to be accessed at all.
+ */
+ kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, protm_suspend_buf & U32_MAX);
+ kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, protm_suspend_buf >> 32);
+
+ if (group->dvs_buf) {
+ kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_LO,
+ group->dvs_buf & U32_MAX);
+ kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_HI,
+ group->dvs_buf >> 32);
}
/* Enable all interrupts for now */
@@ -2069,6 +2832,7 @@
kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ,
state, CSG_REQ_STATE_MASK);
+ kbase_csf_ring_csg_doorbell(kbdev, slot);
spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
/* Update status before rings the door-bell, marking ready => run */
@@ -2077,21 +2841,25 @@
csg_slot->priority = prio;
/* Trace the programming of the CSG on the slot */
- KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(kbdev,
- kbdev->gpu_props.props.raw_props.gpu_id, group->handle, slot);
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, group->kctx->id,
+ group->handle, slot, (state == CSG_REQ_STATE_RESUME) ? 1 : 0);
dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n",
group->handle, kctx->tgid, kctx->id, slot, prio);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START, group,
- (((u64)ep_cfg) << 32) |
- ((((u32)kctx->as_nr) & 0xF) << 16) |
- (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT)));
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START_REQ, group,
+ (((u64)ep_cfg) << 32) | ((((u32)kctx->as_nr) & 0xF) << 16) |
+ (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT)));
- kbase_csf_ring_csg_doorbell(kbdev, slot);
+ /* Update the heap reclaim manager */
+ kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(group);
/* Programming a slot consumes a group from scanout */
update_offslot_non_idle_cnt_for_onslot_grp(group);
+
+ /* Notify the group's bound csg_reg is now in active use */
+ kbase_csf_mcu_shared_set_group_csg_reg_active(kbdev, group);
}
static void remove_scheduled_group(struct kbase_device *kbdev,
@@ -2112,7 +2880,7 @@
}
static void sched_evict_group(struct kbase_queue_group *group, bool fault,
- bool update_non_idle_offslot_grps_cnt)
+ bool update_non_idle_offslot_grps_cnt_from_run_state)
{
struct kbase_context *kctx = group->kctx;
struct kbase_device *kbdev = kctx->kbdev;
@@ -2123,13 +2891,13 @@
if (queue_group_scheduled_locked(group)) {
u32 i;
- if (update_non_idle_offslot_grps_cnt &&
+ if (update_non_idle_offslot_grps_cnt_from_run_state &&
(group->run_state == KBASE_CSF_GROUP_SUSPENDED ||
group->run_state == KBASE_CSF_GROUP_RUNNABLE)) {
int new_val = atomic_dec_return(
&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
- group, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group,
+ new_val);
}
for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
@@ -2138,8 +2906,11 @@
}
if (group->prepared_seq_num !=
- KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID)
+ KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) {
+ if (!update_non_idle_offslot_grps_cnt_from_run_state)
+ update_offslot_non_idle_cnt(group);
remove_scheduled_group(kbdev, group);
+ }
if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
remove_group_from_idle_wait(group);
@@ -2150,17 +2921,25 @@
WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
- if (fault)
+ if (fault) {
group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_FAULT_EVICTED, group,
+ scheduler->total_runnable_grps);
+ }
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT_SCHED, group,
- (((u64)scheduler->total_runnable_grps) << 32) |
- ((u32)group->run_state));
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT, group,
+ (((u64)scheduler->total_runnable_grps) << 32) |
+ ((u32)group->run_state));
dev_dbg(kbdev->dev, "group %d exited scheduler, num_runnable_grps %d\n",
group->handle, scheduler->total_runnable_grps);
/* Notify a group has been evicted */
wake_up_all(&kbdev->csf.event_wait);
}
+
+ kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(group);
+
+ /* Clear all the bound shared regions and unmap any in-place MMU maps */
+ kbase_csf_mcu_shared_clear_evicted_group_csg_reg(kbdev, group);
}
static int term_group_sync(struct kbase_queue_group *group)
@@ -2172,14 +2951,23 @@
term_csg_slot(group);
remaining = wait_event_timeout(kbdev->csf.event_wait,
- csg_slot_stopped_locked(kbdev, group->csg_nr), remaining);
+ group->cs_unrecoverable || csg_slot_stopped_locked(kbdev, group->csg_nr),
+ remaining);
- if (!remaining) {
- dev_warn(kbdev->dev, "term request timed out for group %d of context %d_%d on slot %d",
+ if (unlikely(!remaining)) {
+ enum dumpfault_error_type error_type = DF_CSG_TERMINATE_TIMEOUT;
+
+ dev_warn(kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d",
+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
group->handle, group->kctx->tgid,
group->kctx->id, group->csg_nr);
+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
+ error_type = DF_PING_REQUEST_TIMEOUT;
+ kbase_debug_csf_fault_notify(kbdev, group->kctx, error_type);
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
+
+
err = -ETIMEDOUT;
}
@@ -2190,46 +2978,65 @@
{
struct kbase_device *kbdev = group->kctx->kbdev;
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
- long remaining =
- kbase_csf_timeout_in_jiffies(CSG_SCHED_STOP_TIMEOUT_MS);
- bool force = false;
+ bool wait_for_termination = true;
+ bool on_slot;
kbase_reset_gpu_assert_failed_or_prevented(kbdev);
lockdep_assert_held(&group->kctx->csf.lock);
mutex_lock(&scheduler->lock);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_DESCHEDULE, group, group->run_state);
- while (queue_group_scheduled_locked(group)) {
- u32 saved_state = scheduler->state;
+ wait_for_dump_complete_on_group_deschedule(group);
+ if (!queue_group_scheduled_locked(group))
+ goto unlock;
- if (!kbasep_csf_scheduler_group_is_on_slot_locked(group)) {
- sched_evict_group(group, false, true);
- } else if (saved_state == SCHED_INACTIVE || force) {
- bool as_faulty;
+ on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
- term_group_sync(group);
- /* Treat the csg been terminated */
- as_faulty = cleanup_csg_slot(group);
- /* remove from the scheduler list */
- sched_evict_group(group, as_faulty, false);
- }
+#ifdef KBASE_PM_RUNTIME
+ /* If the queue group is on slot and Scheduler is in SLEEPING state,
+ * then we need to wake up the Scheduler to exit the sleep state rather
+ * than waiting for the runtime suspend or power down of GPU.
+ * The group termination is usually triggered in the context of Application
+ * thread and it has been seen that certain Apps can destroy groups at
+ * random points and not necessarily when the App is exiting.
+ */
+ if (on_slot && (scheduler->state == SCHED_SLEEPING)) {
+ scheduler_wakeup(kbdev, true);
- /* waiting scheduler state to change */
- if (queue_group_scheduled_locked(group)) {
- mutex_unlock(&scheduler->lock);
- remaining = wait_event_timeout(
- kbdev->csf.event_wait,
- saved_state != scheduler->state,
- remaining);
- if (!remaining) {
- dev_warn(kbdev->dev, "Scheduler state change wait timed out for group %d on slot %d",
- group->handle, group->csg_nr);
- force = true;
- }
- mutex_lock(&scheduler->lock);
+ /* Wait for MCU firmware to start running */
+ if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
+ dev_warn(
+ kbdev->dev,
+ "[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d",
+ kbase_backend_get_cycle_cnt(kbdev),
+ group->handle, group->kctx->tgid,
+ group->kctx->id, group->csg_nr);
+ /* No point in waiting for CSG termination if MCU didn't
+ * become active.
+ */
+ wait_for_termination = false;
}
}
+#endif
+ if (!on_slot) {
+ sched_evict_group(group, false, true);
+ } else {
+ bool as_faulty;
+ if (likely(wait_for_termination))
+ term_group_sync(group);
+ else
+ term_csg_slot(group);
+
+ /* Treat the csg been terminated */
+ as_faulty = cleanup_csg_slot(group);
+ /* remove from the scheduler list */
+ sched_evict_group(group, as_faulty, false);
+ }
+
+ WARN_ON(queue_group_scheduled_locked(group));
+
+unlock:
mutex_unlock(&scheduler->lock);
}
@@ -2269,6 +3076,8 @@
group));
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
+ group->run_state);
/* A normal mode CSG could be idle onslot during
* protected mode. In this case clear the
@@ -2279,6 +3088,8 @@
if (protm_grp && protm_grp != group) {
clear_bit((unsigned int)group->csg_nr,
scheduler->csg_slots_idle_mask);
+ /* Request the update to confirm the condition inferred. */
+ group->reevaluate_idle_status = true;
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
scheduler->csg_slots_idle_mask[0]);
}
@@ -2299,13 +3110,13 @@
}
} else if (!queue_group_scheduled_locked(group)) {
int new_val;
+
insert_group_to_runnable(&kbdev->csf.scheduler, group,
KBASE_CSF_GROUP_RUNNABLE);
/* A new group into the scheduler */
new_val = atomic_inc_return(
&kbdev->csf.scheduler.non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
- group, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val);
}
/* Since a group has become active now, check if GPU needs to be
@@ -2508,8 +3319,7 @@
scheduler->remaining_tick_slots--;
}
} else {
- update_offslot_non_idle_cnt_for_faulty_grp(
- group);
+ update_offslot_non_idle_cnt(group);
remove_scheduled_group(kbdev, group);
}
}
@@ -2621,18 +3431,21 @@
csg_slot_stopped_raw),
remaining);
- if (remaining) {
+ if (likely(remaining)) {
u32 i;
for_each_set_bit(i, changed, num_groups) {
struct kbase_queue_group *group =
scheduler->csg_slots[i].resident_group;
- if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) {
+ if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i)))
continue;
- }
+
/* The on slot csg is now stopped */
clear_bit(i, slot_mask);
+
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
if (likely(group)) {
bool as_fault;
@@ -2661,6 +3474,7 @@
for_each_set_bit(i, slot_mask, num_groups) {
struct kbase_queue_group *const group =
scheduler->csg_slots[i].resident_group;
+ enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
struct base_gpu_queue_group_error const
err_payload = { .error_type =
@@ -2674,27 +3488,30 @@
if (unlikely(group == NULL))
continue;
- kbase_csf_add_group_fatal_error(group,
- &err_payload);
- kbase_event_wakeup(group->kctx);
-
/* TODO GPUCORE-25328: The CSG can't be
* terminated, the GPU will be reset as a
* work-around.
*/
dev_warn(
kbdev->dev,
- "Group %d of context %d_%d on slot %u failed to suspend",
+ "[%llu] Group %d of context %d_%d on slot %u failed to suspend (timeout %d ms)",
+ kbase_backend_get_cycle_cnt(kbdev),
group->handle, group->kctx->tgid,
- group->kctx->id, i);
+ group->kctx->id, i,
+ kbdev->csf.fw_timeout_ms);
+ if (kbase_csf_firmware_ping_wait(kbdev,
+ FW_PING_AFTER_ERROR_TIMEOUT_MS))
+ error_type = DF_PING_REQUEST_TIMEOUT;
+ schedule_actions_trigger_df(kbdev, group->kctx, error_type);
+
+ kbase_csf_add_group_fatal_error(group, &err_payload);
+ kbase_event_wakeup(group->kctx);
/* The group has failed suspension, stop
* further examination.
*/
clear_bit(i, slot_mask);
set_bit(i, scheduler->csgs_events_enable_mask);
- update_offslot_non_idle_cnt_for_onslot_grp(
- group);
}
suspend_wait_failed = true;
@@ -2774,7 +3591,7 @@
slots_state_changed(kbdev, changed, csg_slot_running),
remaining);
- if (remaining) {
+ if (likely(remaining)) {
for_each_set_bit(i, changed, num_groups) {
struct kbase_queue_group *group =
scheduler->csg_slots[i].resident_group;
@@ -2782,10 +3599,22 @@
/* The on slot csg is now running */
clear_bit(i, slot_mask);
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
+ group->run_state);
}
} else {
- dev_warn(kbdev->dev, "Timed out waiting for CSG slots to start, slots: 0x%*pb\n",
- num_groups, slot_mask);
+ const int csg_nr = ffs(slot_mask[0]) - 1;
+ struct kbase_queue_group *group =
+ scheduler->csg_slots[csg_nr].resident_group;
+ enum dumpfault_error_type error_type = DF_CSG_START_TIMEOUT;
+
+ dev_err(kbdev->dev,
+ "[%llu] Timeout (%d ms) waiting for CSG slots to start, slots: 0x%*pb\n",
+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
+ num_groups, slot_mask);
+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
+ error_type = DF_PING_REQUEST_TIMEOUT;
+ schedule_actions_trigger_df(kbdev, group->kctx, error_type);
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
@@ -2799,14 +3628,14 @@
* flagged after the completion of a CSG status
* update command
*
+ * @kbdev: Pointer to the GPU device.
+ * @slot: The given slot for checking an occupying resident group's idle
+ * state.
+ *
* This function is called at the start of scheduling tick to check the
* idle status of a queue group resident on a CSG slot.
* The caller must make sure the corresponding status update command has
* been called and completed before checking this status.
- *
- * @kbdev: Pointer to the GPU device.
- * @slot: The given slot for checking an occupying resident group's idle
- * state.
*
* Return: true if the group resident on slot is idle, otherwise false.
*/
@@ -2827,15 +3656,15 @@
* slots_update_state_changed() - Check the handshake state of a subset of
* command group slots.
*
- * Checks the state of a subset of slots selected through the slots_mask
- * bit_map. Records which slots' handshake completed and send it back in the
- * slots_done bit_map.
- *
* @kbdev: The GPU device.
* @field_mask: The field mask for checking the state in the csg_req/ack.
* @slots_mask: A bit_map specifying the slots to check.
* @slots_done: A cleared bit_map for returning the slots that
* have finished update.
+ *
+ * Checks the state of a subset of slots selected through the slots_mask
+ * bit_map. Records which slots' handshake completed and send it back in the
+ * slots_done bit_map.
*
* Return: true if the slots_done is set for at least one slot.
* Otherwise false.
@@ -2870,16 +3699,16 @@
* wait_csg_slots_handshake_ack - Wait the req/ack handshakes to complete on
* the specified groups.
*
- * This function waits for the acknowledgement of the request that have
- * already been placed for the CSG slots by the caller. Currently used for
- * the CSG priority update and status update requests.
- *
* @kbdev: Pointer to the GPU device.
* @field_mask: The field mask for checking the state in the csg_req/ack.
* @slot_mask: Bitmap reflecting the slots, the function will modify
* the acknowledged slots by clearing their corresponding
* bits.
* @wait_in_jiffies: Wait duration in jiffies, controlling the time-out.
+ *
+ * This function waits for the acknowledgment of the request that have
+ * already been placed for the CSG slots by the caller. Currently used for
+ * the CSG priority update and status update requests.
*
* Return: 0 on all specified slots acknowledged; otherwise -ETIMEDOUT. For
* timed out condition with unacknowledged slots, their bits remain
@@ -2902,11 +3731,13 @@
slot_mask, dones),
remaining);
- if (remaining)
+ if (likely(remaining))
bitmap_andnot(slot_mask, slot_mask, dones, num_groups);
- else
+ else {
+
/* Timed-out on the wait */
return -ETIMEDOUT;
+ }
}
return 0;
@@ -2922,16 +3753,37 @@
lockdep_assert_held(&kbdev->csf.scheduler.lock);
- if (ret != 0) {
- /* The update timeout is not regarded as a serious
- * issue, no major consequences are expected as a
- * result, so just warn the case.
- */
+ if (unlikely(ret != 0)) {
+ const int csg_nr = ffs(slot_mask[0]) - 1;
+ struct kbase_queue_group *group =
+ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
+ enum dumpfault_error_type error_type = DF_CSG_EP_CFG_TIMEOUT;
+
dev_warn(
kbdev->dev,
- "Timeout on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx",
+ "[%llu] Timeout (%d ms) on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx",
+ kbase_backend_get_cycle_cnt(kbdev),
+ kbdev->csf.fw_timeout_ms,
slot_mask[0]);
+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
+ error_type = DF_PING_REQUEST_TIMEOUT;
+ schedule_actions_trigger_df(kbdev, group->kctx, error_type);
+
+ /* Timeout could indicate firmware is unresponsive so trigger a GPU reset. */
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+ kbase_reset_gpu(kbdev);
}
+}
+
+static void report_csg_termination(struct kbase_queue_group *const group)
+{
+ struct base_gpu_queue_group_error
+ err = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
+ .payload = { .fatal_group = {
+ .status = GPU_EXCEPTION_TYPE_SW_FAULT_2,
+ } } };
+
+ kbase_csf_add_group_fatal_error(group, &err);
}
void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev,
@@ -2951,16 +3803,21 @@
*/
WARN_ON(!kbase_reset_gpu_is_active(kbdev));
- KBASE_KTRACE_ADD(kbdev, EVICT_CTX_SLOTS, kctx, 0u);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_EVICT_CTX_SLOTS_START, kctx, 0u);
for (slot = 0; slot < num_groups; slot++) {
group = kbdev->csf.scheduler.csg_slots[slot].resident_group;
if (group && group->kctx == kctx) {
bool as_fault;
+ dev_dbg(kbdev->dev, "Evicting group [%d] running on slot [%d] due to reset",
+ group->handle, group->csg_nr);
+
term_csg_slot(group);
as_fault = cleanup_csg_slot(group);
/* remove the group from the scheduler list */
sched_evict_group(group, as_fault, false);
+ /* signal Userspace that CSG is being terminated */
+ report_csg_termination(group);
/* return the evicted group to the caller */
list_add_tail(&group->link, evicted_groups);
set_bit(slot, slot_mask);
@@ -2970,20 +3827,30 @@
dev_info(kbdev->dev, "Evicting context %d_%d slots: 0x%*pb\n",
kctx->tgid, kctx->id, num_groups, slot_mask);
+ /* Fatal errors may have been the cause of the GPU reset
+ * taking place, in which case we want to make sure that
+ * we wake up the fatal event queue to notify userspace
+ * only once. Otherwise, we may have duplicate event
+ * notifications between the time the first notification
+ * occurs and the time the GPU is reset.
+ */
+ kbase_event_wakeup(kctx);
+
mutex_unlock(&scheduler->lock);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_EVICT_CTX_SLOTS_END, kctx, num_groups);
}
/**
* scheduler_slot_protm_ack - Acknowledging the protected region requests
* from the resident group on a given slot.
*
- * The function assumes that the given slot is in stable running state and
- * has already been judged by the caller on that any pending protected region
- * requests of the resident group should be acknowledged.
- *
* @kbdev: Pointer to the GPU device.
* @group: Pointer to the resident group on the given slot.
* @slot: The slot that the given group is actively operating on.
+ *
+ * The function assumes that the given slot is in stable running state and
+ * has already been judged by the caller on that any pending protected region
+ * requests of the resident group should be acknowledged.
*
* Return: true if the group has pending protm request(s) and is acknowledged.
* The caller should arrange to enter the protected mode for servicing
@@ -3014,8 +3881,8 @@
struct kbase_queue *queue = group->bound_queues[i];
clear_bit(i, group->protm_pending_bitmap);
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_CLEAR, group,
- queue, group->protm_pending_bitmap[0]);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_CLEAR, group, queue,
+ group->protm_pending_bitmap[0]);
if (!WARN_ON(!queue) && queue->enabled) {
struct kbase_csf_cmd_stream_info *stream =
@@ -3051,8 +3918,47 @@
}
/**
+ * protm_enter_set_next_pending_seq - Update the scheduler's field of
+ * tick_protm_pending_seq to that from the next available on-slot protm
+ * pending CSG.
+ *
+ * @kbdev: Pointer to the GPU device.
+ *
+ * If applicable, the function updates the scheduler's tick_protm_pending_seq
+ * field from the next available on-slot protm pending CSG. If not, the field
+ * is set to KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID.
+ */
+static void protm_enter_set_next_pending_seq(struct kbase_device *const kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ u32 num_groups = kbdev->csf.global_iface.group_num;
+ u32 num_csis = kbdev->csf.global_iface.groups[0].stream_num;
+ DECLARE_BITMAP(active_csgs, MAX_SUPPORTED_CSGS) = { 0 };
+ u32 i;
+
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+ bitmap_xor(active_csgs, scheduler->csg_slots_idle_mask, scheduler->csg_inuse_bitmap,
+ num_groups);
+ /* Reset the tick's pending protm seq number to invalid initially */
+ scheduler->tick_protm_pending_seq = KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
+ for_each_set_bit(i, active_csgs, num_groups) {
+ struct kbase_queue_group *group = scheduler->csg_slots[i].resident_group;
+
+ /* Set to the next pending protm group's scan_seq_number */
+ if ((group != scheduler->active_protm_grp) &&
+ (!bitmap_empty(group->protm_pending_bitmap, num_csis)) &&
+ (group->scan_seq_num < scheduler->tick_protm_pending_seq))
+ scheduler->tick_protm_pending_seq = group->scan_seq_num;
+ }
+}
+
+/**
* scheduler_group_check_protm_enter - Request the given group to be evaluated
* for triggering the protected mode.
+ *
+ * @kbdev: Pointer to the GPU device.
+ * @input_grp: Pointer to the GPU queue group.
*
* The function assumes the given group is either an active running group or
* the scheduler internally maintained field scheduler->top_grp.
@@ -3060,24 +3966,35 @@
* If the GPU is not already running in protected mode and the input group
* has protected region requests from its bound queues, the requests are
* acknowledged and the GPU is instructed to enter the protected mode.
- *
- * @kbdev: Pointer to the GPU device.
- * @input_grp: Pointer to the GPU queue group.
*/
static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
struct kbase_queue_group *const input_grp)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ struct kbase_protected_suspend_buffer *sbuf = &input_grp->protected_suspend_buf;
unsigned long flags;
bool protm_in_use;
lockdep_assert_held(&scheduler->lock);
+ /* Return early if the physical pages have not been allocated yet */
+ if (unlikely(!sbuf->pma))
+ return;
+
+ /* This lock is taken to prevent the issuing of MMU command during the
+ * transition to protected mode. This helps avoid the scenario where the
+ * entry to protected mode happens with a memory region being locked and
+ * the same region is then accessed by the GPU in protected mode.
+ */
+ mutex_lock(&kbdev->mmu_hw_mutex);
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
- protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_CHECK_PROTM_ENTER, input_grp,
- protm_in_use);
+ /* Check if the previous transition to enter & exit the protected
+ * mode has completed or not.
+ */
+ protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev) ||
+ kbdev->protected_mode;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER_CHECK, input_grp, protm_in_use);
/* Firmware samples the PROTM_PEND ACK bit for CSs when
* Host sends PROTM_ENTER global request. So if PROTM_PEND ACK bit
@@ -3108,6 +4025,8 @@
CSG_SLOT_RUNNING) {
if (kctx_as_enabled(input_grp->kctx) &&
scheduler_slot_protm_ack(kbdev, input_grp, slot)) {
+ int err;
+
/* Option of acknowledging to multiple
* CSGs from the same kctx is dropped,
* after consulting with the
@@ -3115,22 +4034,77 @@
* GPUCORE-21394.
*/
- /* Disable the idle timer */
- disable_gpu_idle_fw_timer_locked(kbdev);
-
/* Switch to protected mode */
scheduler->active_protm_grp = input_grp;
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_ENTER_PROTM,
- input_grp, 0u);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER, input_grp,
+ 0u);
+
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+
+ /* Coresight must be disabled before entering protected mode. */
+ kbase_debug_coresight_csf_disable_pmode_enter(kbdev);
+
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
+ kbase_csf_enter_protected_mode(kbdev);
+ /* Set the pending protm seq number to the next one */
+ protm_enter_set_next_pending_seq(kbdev);
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
- kbase_csf_enter_protected_mode(kbdev);
+
+ err = kbase_csf_wait_protected_mode_enter(kbdev);
+ mutex_unlock(&kbdev->mmu_hw_mutex);
+
+ if (err)
+ schedule_actions_trigger_df(kbdev, input_grp->kctx,
+ DF_PROTECTED_MODE_ENTRY_FAILURE);
+
+ scheduler->protm_enter_time = ktime_get_raw();
+
return;
}
}
}
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+ mutex_unlock(&kbdev->mmu_hw_mutex);
+}
+
+/**
+ * scheduler_check_pmode_progress - Check if protected mode execution is progressing
+ *
+ * @kbdev: Pointer to the GPU device.
+ *
+ * This function is called when the GPU is in protected mode.
+ *
+ * It will check if the time spent in protected mode is less
+ * than CSF_SCHED_PROTM_PROGRESS_TIMEOUT. If not, a PROTM_EXIT
+ * request is sent to the FW.
+ */
+static void scheduler_check_pmode_progress(struct kbase_device *kbdev)
+{
+ u64 protm_spent_time_ms;
+ u64 protm_progress_timeout =
+ kbase_get_timeout_ms(kbdev, CSF_SCHED_PROTM_PROGRESS_TIMEOUT);
+ s64 diff_ms_signed =
+ ktime_ms_delta(ktime_get_raw(), kbdev->csf.scheduler.protm_enter_time);
+
+ if (diff_ms_signed < 0)
+ return;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+ protm_spent_time_ms = (u64)diff_ms_signed;
+ if (protm_spent_time_ms < protm_progress_timeout)
+ return;
+
+ dev_dbg(kbdev->dev, "Protected mode progress timeout: %llu >= %llu",
+ protm_spent_time_ms, protm_progress_timeout);
+
+ /* Prompt the FW to exit protected mode */
+ scheduler_force_protm_exit(kbdev);
}
static void scheduler_apply(struct kbase_device *kbdev)
@@ -3160,7 +4134,7 @@
}
}
- /* Initialize the remaining avialable csg slots for the tick/tock */
+ /* Initialize the remaining available csg slots for the tick/tock */
scheduler->remaining_tick_slots = available_csg_slots;
/* If there are spare slots, apply heads in the list */
@@ -3186,8 +4160,7 @@
if (!kctx_as_enabled(group->kctx) || group->faulted) {
/* Drop the head group and continue */
- update_offslot_non_idle_cnt_for_faulty_grp(
- group);
+ update_offslot_non_idle_cnt(group);
remove_scheduled_group(kbdev, group);
continue;
}
@@ -3217,6 +4190,7 @@
struct kbase_queue_group *group;
lockdep_assert_held(&scheduler->lock);
+ lockdep_assert_held(&scheduler->interrupt_lock);
if (WARN_ON(priority < 0) ||
WARN_ON(priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
return;
@@ -3236,9 +4210,18 @@
/* Set the scanout sequence number, starting from 0 */
group->scan_seq_num = scheduler->csg_scan_count_for_tick++;
+ if (scheduler->tick_protm_pending_seq ==
+ KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) {
+ if (!bitmap_empty(group->protm_pending_bitmap,
+ kbdev->csf.global_iface.groups[0].stream_num))
+ scheduler->tick_protm_pending_seq =
+ group->scan_seq_num;
+ }
+
if (queue_group_idle_locked(group)) {
- list_add_tail(&group->link_to_schedule,
- &scheduler->idle_groups_to_schedule);
+ if (can_schedule_idle_group(group))
+ list_add_tail(&group->link_to_schedule,
+ &scheduler->idle_groups_to_schedule);
continue;
}
@@ -3261,6 +4244,8 @@
* scheduler_rotate_groups() - Rotate the runnable queue groups to provide
* fairness of scheduling within a single
* kbase_context.
+ *
+ * @kbdev: Pointer to the GPU device.
*
* Since only kbase_csf_scheduler's top_grp (i.e. the queue group assigned
* the highest slot priority) is guaranteed to get the resources that it
@@ -3300,8 +4285,6 @@
* the kbase_csf_scheduler's groups_to_schedule list. In this example, it will
* be for a group in the next lowest priority level or in absence of those the
* next kbase_context's queue groups.
- *
- * @kbdev: Pointer to the GPU device.
*/
static void scheduler_rotate_groups(struct kbase_device *kbdev)
{
@@ -3317,14 +4300,14 @@
WARN_ON(top_grp->kctx != top_ctx);
if (!WARN_ON(list_empty(list))) {
struct kbase_queue_group *new_head_grp;
+
list_move_tail(&top_grp->link, list);
new_head_grp = (!list_empty(list)) ?
list_first_entry(list, struct kbase_queue_group, link) :
NULL;
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_ROTATE_RUNNABLE,
- top_grp, top_ctx->csf.sched.num_runnable_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_HEAD_RUNNABLE,
- new_head_grp, 0u);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_ROTATE, top_grp,
+ top_ctx->csf.sched.num_runnable_grps);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u);
dev_dbg(kbdev->dev,
"groups rotated for a context, num_runnable_groups: %u\n",
scheduler->top_ctx->csf.sched.num_runnable_grps);
@@ -3353,14 +4336,14 @@
if (!WARN_ON(!found)) {
struct kbase_context *new_head_kctx;
+
list_move_tail(&pos->csf.link, list);
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_ROTATE_RUNNABLE, pos,
- 0u);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_ROTATE, pos, 0u);
new_head_kctx = (!list_empty(list)) ?
list_first_entry(list, struct kbase_context, csf.link) :
NULL;
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_HEAD_RUNNABLE,
- new_head_kctx, 0u);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx,
+ 0u);
dev_dbg(kbdev->dev, "contexts rotated\n");
}
}
@@ -3372,17 +4355,22 @@
* slots for which the IDLE notification was received
* previously.
*
- * This function sends a CSG status update request for all the CSG slots
- * present in the bitmap scheduler->csg_slots_idle_mask and wait for the
- * request to complete.
- * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by
- * this function.
- *
* @kbdev: Pointer to the GPU device.
* @csg_bitmap: Bitmap of the CSG slots for which
* the status update request completed successfully.
- * @failed_csg_bitmap: Bitmap of the CSG slots for which
+ * @failed_csg_bitmap: Bitmap of the idle CSG slots for which
* the status update request timedout.
+ *
+ * This function sends a CSG status update request for all the CSG slots
+ * present in the bitmap scheduler->csg_slots_idle_mask. Additionally, if
+ * the group's 'reevaluate_idle_status' field is set, the nominally non-idle
+ * slots are also included in the status update for a confirmation of their
+ * status. The function wait for the status update request to complete and
+ * returns the update completed slots bitmap and any timed out idle-flagged
+ * slots bitmap.
+ *
+ * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by
+ * this function.
*/
static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
unsigned long *csg_bitmap, unsigned long *failed_csg_bitmap)
@@ -3392,34 +4380,70 @@
struct kbase_csf_global_iface *const global_iface =
&kbdev->csf.global_iface;
unsigned long flags, i;
+ u32 active_chk = 0;
lockdep_assert_held(&scheduler->lock);
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
- for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) {
+
+ for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) {
struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
struct kbase_queue_group *group = csg_slot->resident_group;
struct kbase_csf_cmd_stream_group_info *const ginfo =
&global_iface->groups[i];
u32 csg_req;
+ bool idle_flag;
- clear_bit(i, scheduler->csg_slots_idle_mask);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
- scheduler->csg_slots_idle_mask[0]);
- if (WARN_ON(!group))
+ if (WARN_ON(!group)) {
+ clear_bit(i, scheduler->csg_inuse_bitmap);
+ clear_bit(i, scheduler->csg_slots_idle_mask);
continue;
+ }
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STATUS_UPDATE, group,
- i);
+ idle_flag = test_bit(i, scheduler->csg_slots_idle_mask);
+ if (idle_flag || group->reevaluate_idle_status) {
+ if (idle_flag) {
+#ifdef CONFIG_MALI_BIFROST_DEBUG
+ if (!bitmap_empty(group->protm_pending_bitmap,
+ ginfo->stream_num)) {
+ dev_warn(kbdev->dev,
+ "Idle bit set for group %d of ctx %d_%d on slot %d with pending protm execution",
+ group->handle, group->kctx->tgid,
+ group->kctx->id, (int)i);
+ }
+#endif
+ clear_bit(i, scheduler->csg_slots_idle_mask);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
+ scheduler->csg_slots_idle_mask[0]);
+ } else {
+ /* Updates include slots for which reevaluation is needed.
+ * Here one tracks the extra included slots in active_chk.
+ * For protm pending slots, their status of activeness are
+ * assured so no need to request an update.
+ */
+ active_chk |= BIT(i);
+ group->reevaluate_idle_status = false;
+ }
- csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
- csg_req ^= CSG_REQ_STATUS_UPDATE_MASK;
- kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
- CSG_REQ_STATUS_UPDATE_MASK);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_UPDATE_IDLE_SLOT_REQ, group, i);
+ csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
+ csg_req ^= CSG_REQ_STATUS_UPDATE_MASK;
+ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
+ CSG_REQ_STATUS_UPDATE_MASK);
- set_bit(i, csg_bitmap);
+ /* Track the slot update requests in csg_bitmap.
+ * Note, if the scheduler requested extended update, the resulting
+ * csg_bitmap would be the idle_flags + active_chk. Otherwise it's
+ * identical to the idle_flags.
+ */
+ set_bit(i, csg_bitmap);
+ } else {
+ group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
+ group->run_state);
+ }
}
- spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+
/* The groups are aggregated into a single kernel doorbell request */
if (!bitmap_empty(csg_bitmap, num_groups)) {
@@ -3428,22 +4452,43 @@
u32 db_slots = (u32)csg_bitmap[0];
kbase_csf_ring_csg_slots_doorbell(kbdev, db_slots);
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
if (wait_csg_slots_handshake_ack(kbdev,
CSG_REQ_STATUS_UPDATE_MASK, csg_bitmap, wt)) {
+ const int csg_nr = ffs(csg_bitmap[0]) - 1;
+ struct kbase_queue_group *group =
+ scheduler->csg_slots[csg_nr].resident_group;
+
dev_warn(
kbdev->dev,
- "Timeout on CSG_REQ:STATUS_UPDATE, treat groups as not idle: slot mask=0x%lx",
+ "[%llu] Timeout (%d ms) on CSG_REQ:STATUS_UPDATE, treat groups as not idle: slot mask=0x%lx",
+ kbase_backend_get_cycle_cnt(kbdev),
+ kbdev->csf.fw_timeout_ms,
csg_bitmap[0]);
+ schedule_actions_trigger_df(kbdev, group->kctx,
+ DF_CSG_STATUS_UPDATE_TIMEOUT);
/* Store the bitmap of timed out slots */
bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups);
csg_bitmap[0] = ~csg_bitmap[0] & db_slots;
+
+ /* Mask off any failed bit position contributed from active ones, as the
+ * intention is to retain the failed bit pattern contains only those from
+ * idle flags reporting back to the caller. This way, any failed to update
+ * original idle flag would be kept as 'idle' (an informed guess, as the
+ * update did not come to a conclusive result). So will be the failed
+ * active ones be treated as still 'non-idle'. This is for a graceful
+ * handling to the unexpected timeout condition.
+ */
+ failed_csg_bitmap[0] &= ~active_chk;
+
} else {
- KBASE_KTRACE_ADD(kbdev, SLOTS_STATUS_UPDATE_ACK, NULL,
- db_slots);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_UPDATE_IDLE_SLOTS_ACK, NULL, db_slots);
csg_bitmap[0] = db_slots;
}
+ } else {
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
}
}
@@ -3451,6 +4496,8 @@
* scheduler_handle_idle_slots() - Update the idle status of queue groups
* resident on CSG slots for which the
* IDLE notification was received previously.
+ *
+ * @kbdev: Pointer to the GPU device.
*
* This function is called at the start of scheduling tick/tock to reconfirm
* the idle status of queue groups resident on CSG slots for
@@ -3465,8 +4512,6 @@
* updated accordingly.
* The bits corresponding to slots for which the status update request timedout
* remain set in scheduler->csg_slots_idle_mask.
- *
- * @kbdev: Pointer to the GPU device.
*/
static void scheduler_handle_idle_slots(struct kbase_device *kbdev)
{
@@ -3498,17 +4543,21 @@
if (group_on_slot_is_idle(kbdev, i)) {
group->run_state = KBASE_CSF_GROUP_IDLE;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_IDLE, group, group->run_state);
set_bit(i, scheduler->csg_slots_idle_mask);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET,
group, scheduler->csg_slots_idle_mask[0]);
- } else
+ } else {
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
+ group->run_state);
+ }
}
bitmap_or(scheduler->csg_slots_idle_mask,
scheduler->csg_slots_idle_mask,
failed_csg_bitmap, num_groups);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, NULL,
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_HANDLE_IDLE_SLOTS, NULL,
scheduler->csg_slots_idle_mask[0]);
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
}
@@ -3520,8 +4569,7 @@
list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule,
link_to_schedule) {
-
- WARN_ON(!queue_group_idle_locked(group));
+ WARN_ON(!can_schedule_idle_group(group));
if (!scheduler->ngrp_to_schedule) {
/* keep the top csg's origin */
@@ -3575,39 +4623,109 @@
return NULL;
}
+/**
+ * suspend_active_groups_on_powerdown() - Suspend active CSG groups upon
+ * suspend or GPU IDLE.
+ *
+ * @kbdev: Pointer to the device
+ * @system_suspend: Flag to indicate it's for system suspend.
+ *
+ * This function will suspend all active CSG groups upon either
+ * system suspend, runtime suspend or GPU IDLE.
+ *
+ * Return: 0 on success, -1 otherwise.
+ */
static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
- bool is_suspend)
+ bool system_suspend)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 };
int ret = suspend_active_queue_groups(kbdev, slot_mask);
- if (ret) {
- /* The suspend of CSGs failed, trigger the GPU reset and wait
- * for it to complete to be in a deterministic state.
+ if (unlikely(ret)) {
+ const int csg_nr = ffs(slot_mask[0]) - 1;
+ struct kbase_queue_group *group =
+ scheduler->csg_slots[csg_nr].resident_group;
+ enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
+
+ /* The suspend of CSGs failed,
+ * trigger the GPU reset to be in a deterministic state.
*/
- dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n",
+ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n",
+ kbase_backend_get_cycle_cnt(kbdev),
+ kbdev->csf.fw_timeout_ms,
kbdev->csf.global_iface.group_num, slot_mask);
+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
+ error_type = DF_PING_REQUEST_TIMEOUT;
+ schedule_actions_trigger_df(kbdev, group->kctx, error_type);
if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(kbdev);
- if (is_suspend) {
- mutex_unlock(&scheduler->lock);
- kbase_reset_gpu_wait(kbdev);
- mutex_lock(&scheduler->lock);
- }
return -1;
}
/* Check if the groups became active whilst the suspend was ongoing,
* but only for the case where the system suspend is not in progress
*/
- if (!is_suspend && atomic_read(&scheduler->non_idle_offslot_grps))
+ if (!system_suspend && atomic_read(&scheduler->non_idle_offslot_grps))
return -1;
return 0;
+}
+
+/**
+ * all_on_slot_groups_remained_idle - Live check for all groups' idleness
+ *
+ * @kbdev: Pointer to the device.
+ *
+ * Returns false if any of the queues inside any of the groups that have been
+ * assigned a physical CSG slot have work to execute, or have executed work
+ * since having received a GPU idle notification. This function is used to
+ * handle a rance condition between firmware reporting GPU idle and userspace
+ * submitting more work by directly ringing a doorbell.
+ *
+ * Return: false if any queue inside any resident group has work to be processed
+ * or has processed work since GPU idle event, true otherwise.
+ */
+static bool all_on_slot_groups_remained_idle(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ /* All CSGs have the same number of CSs */
+ size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num;
+ size_t i;
+
+ lockdep_assert_held(&scheduler->lock);
+ lockdep_assert_held(&scheduler->interrupt_lock);
+
+ for_each_set_bit(i, scheduler->csg_slots_idle_mask,
+ kbdev->csf.global_iface.group_num) {
+ struct kbase_queue_group *const group =
+ scheduler->csg_slots[i].resident_group;
+ size_t j;
+
+ for (j = 0; j < max_streams; ++j) {
+ struct kbase_queue const *const queue =
+ group->bound_queues[j];
+ u64 const *output_addr;
+ u64 cur_extract_ofs;
+
+ if (!queue || !queue->user_io_addr)
+ continue;
+
+ output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
+ cur_extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)];
+ if (cur_extract_ofs != queue->extract_ofs) {
+ /* More work has been executed since the idle
+ * notification.
+ */
+ return false;
+ }
+ }
+ }
+
+ return true;
}
static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
@@ -3618,25 +4736,106 @@
lockdep_assert_held(&scheduler->lock);
- if (scheduler->state == SCHED_SUSPENDED)
+ if ((scheduler->state == SCHED_SUSPENDED) ||
+ (scheduler->state == SCHED_SLEEPING))
return false;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ spin_lock(&scheduler->interrupt_lock);
+
+ if (scheduler->fast_gpu_idle_handling) {
+ scheduler->fast_gpu_idle_handling = false;
+
+ if (scheduler->total_runnable_grps) {
+ suspend = !atomic_read(&scheduler->non_idle_offslot_grps) &&
+ kbase_pm_idle_groups_sched_suspendable(kbdev);
+ } else
+ suspend = kbase_pm_no_runnables_sched_suspendable(kbdev);
+ spin_unlock(&scheduler->interrupt_lock);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return suspend;
+ }
+
if (scheduler->total_runnable_grps) {
- spin_lock(&scheduler->interrupt_lock);
/* Check both on-slots and off-slots groups idle status */
suspend = kbase_csf_scheduler_all_csgs_idle(kbdev) &&
!atomic_read(&scheduler->non_idle_offslot_grps) &&
kbase_pm_idle_groups_sched_suspendable(kbdev);
-
- spin_unlock(&scheduler->interrupt_lock);
} else
suspend = kbase_pm_no_runnables_sched_suspendable(kbdev);
+ /* Confirm that all groups are actually idle before proceeding with
+ * suspension as groups might potentially become active again without
+ * informing the scheduler in case userspace rings a doorbell directly.
+ */
+ if (suspend && (unlikely(atomic_read(&scheduler->gpu_no_longer_idle)) ||
+ unlikely(!all_on_slot_groups_remained_idle(kbdev))))
+ suspend = false;
+
+ spin_unlock(&scheduler->interrupt_lock);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return suspend;
+}
+
+#ifdef KBASE_PM_RUNTIME
+/**
+ * scheduler_sleep_on_idle - Put the Scheduler in sleeping state on GPU
+ * becoming idle.
+ *
+ * @kbdev: Pointer to the device.
+ *
+ * This function is called on GPU idle notification to trigger the transition of
+ * GPU to sleep state, where MCU firmware pauses execution and L2 cache is
+ * turned off. Scheduler's state is changed to sleeping and all the active queue
+ * groups remain on the CSG slots.
+ */
+static void scheduler_sleep_on_idle(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+ lockdep_assert_held(&scheduler->lock);
+
+ dev_dbg(kbdev->dev,
+ "Scheduler to be put to sleep on GPU becoming idle");
+ cancel_tick_timer(kbdev);
+ scheduler_pm_idle_before_sleep(kbdev);
+ scheduler->state = SCHED_SLEEPING;
+ KBASE_KTRACE_ADD(kbdev, SCHED_SLEEPING, NULL, scheduler->state);
+}
+#endif
+
+/**
+ * scheduler_suspend_on_idle - Put the Scheduler in suspended state on GPU
+ * becoming idle.
+ *
+ * @kbdev: Pointer to the device.
+ *
+ * This function is called on GPU idle notification to trigger the power down of
+ * GPU. Scheduler's state is changed to suspended and all the active queue
+ * groups are suspended before halting the MCU firmware.
+ *
+ * Return: true if scheduler will be suspended or false if suspend is aborted.
+ */
+static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
+{
+ int ret = suspend_active_groups_on_powerdown(kbdev, false);
+
+ if (ret) {
+ dev_dbg(kbdev->dev, "Aborting suspend scheduler (grps: %d)",
+ atomic_read(
+ &kbdev->csf.scheduler.non_idle_offslot_grps));
+ /* Bring forward the next tick */
+ kbase_csf_scheduler_tick_advance(kbdev);
+ return false;
+ }
+
+ dev_dbg(kbdev->dev, "Scheduler to be suspended on GPU becoming idle");
+ scheduler_suspend(kbdev);
+ cancel_tick_timer(kbdev);
+ return true;
}
static void gpu_idle_worker(struct work_struct *work)
@@ -3644,53 +4843,58 @@
struct kbase_device *kbdev = container_of(
work, struct kbase_device, csf.scheduler.gpu_idle_work);
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
- bool reset_active = false;
bool scheduler_is_idle_suspendable = false;
bool all_groups_suspended = false;
- KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_BEGIN, NULL, 0u);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_START, NULL, 0u);
#define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \
(((u32)reset) | (((u32)idle) << 4) | (((u32)all_suspend) << 8))
if (kbase_reset_gpu_try_prevent(kbdev)) {
dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n");
- KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL,
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL,
__ENCODE_KTRACE_INFO(true, false, false));
return;
}
+ kbase_debug_csf_fault_wait_completion(kbdev);
mutex_lock(&scheduler->lock);
- /* Cycle completed, disable the firmware idle timer */
- disable_gpu_idle_fw_timer(kbdev);
- scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev);
- reset_active = kbase_reset_gpu_is_active(kbdev);
- if (scheduler_is_idle_suspendable && !reset_active) {
- all_groups_suspended =
- !suspend_active_groups_on_powerdown(kbdev, false);
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ if (unlikely(scheduler->state == SCHED_BUSY)) {
+ mutex_unlock(&scheduler->lock);
+ kbase_reset_gpu_allow(kbdev);
+ return;
+ }
+#endif
- if (all_groups_suspended) {
- dev_dbg(kbdev->dev, "Scheduler becomes idle suspended now");
- scheduler_suspend(kbdev);
- cancel_tick_timer(kbdev);
- } else {
- dev_dbg(kbdev->dev, "Aborting suspend scheduler (grps: %d)",
- atomic_read(&scheduler->non_idle_offslot_grps));
- /* Bring forward the next tick */
- kbase_csf_scheduler_advance_tick(kbdev);
- }
+ scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev);
+ if (scheduler_is_idle_suspendable) {
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_START, NULL,
+ kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
+#ifdef KBASE_PM_RUNTIME
+ if (kbase_pm_gpu_sleep_allowed(kbdev) &&
+ kbase_csf_scheduler_get_nr_active_csgs(kbdev))
+ scheduler_sleep_on_idle(kbdev);
+ else
+#endif
+ all_groups_suspended = scheduler_suspend_on_idle(kbdev);
+
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_END, NULL, 0u);
}
mutex_unlock(&scheduler->lock);
kbase_reset_gpu_allow(kbdev);
- KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL,
- __ENCODE_KTRACE_INFO(reset_active, scheduler_is_idle_suspendable, all_groups_suspended));
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL,
+ __ENCODE_KTRACE_INFO(false, scheduler_is_idle_suspendable,
+ all_groups_suspended));
#undef __ENCODE_KTRACE_INFO
}
static int scheduler_prepare(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ unsigned long flags;
int i;
lockdep_assert_held(&scheduler->lock);
@@ -3716,6 +4920,9 @@
scheduler->num_csg_slots_for_tick = 0;
bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS);
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+ scheduler->tick_protm_pending_seq =
+ KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
/* Scan out to run groups */
for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) {
struct kbase_context *kctx;
@@ -3723,6 +4930,7 @@
list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link)
scheduler_ctx_scan_groups(kbdev, kctx, i);
}
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
/* Update this tick's non-idle groups */
scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule;
@@ -3734,14 +4942,13 @@
*/
atomic_set(&scheduler->non_idle_offslot_grps,
scheduler->non_idle_scanout_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, NULL,
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, NULL,
scheduler->non_idle_scanout_grps);
/* Adds those idle but runnable groups to the scanout list */
scheduler_scan_idle_groups(kbdev);
- /* After adding the idle CSGs, the two counts should be the same */
- WARN_ON(scheduler->csg_scan_count_for_tick != scheduler->ngrp_to_schedule);
+ WARN_ON(scheduler->csg_scan_count_for_tick < scheduler->ngrp_to_schedule);
KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
scheduler->num_active_address_spaces |
@@ -3752,58 +4959,318 @@
return 0;
}
-static void scheduler_handle_idle_timer_onoff(struct kbase_device *kbdev)
+/**
+ * keep_lru_on_slots() - Check the condition for LRU is met.
+ *
+ * @kbdev: Pointer to the device.
+ *
+ * This function tries to maintain the Last-Recent-Use case on slots, when
+ * the scheduler has no non-idle off-slot CSGs for a replacement
+ * consideration. This effectively extends the previous scheduling results
+ * for the new one. That is, the last recent used CSGs are retained on slots
+ * for the new tick/tock action.
+ *
+ * Return: true for avoiding on-slot CSGs changes (i.e. keep existing LRU),
+ * otherwise false.
+ */
+static bool keep_lru_on_slots(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ bool keep_lru = false;
+ int on_slots = bitmap_weight(scheduler->csg_inuse_bitmap,
+ kbdev->csf.global_iface.group_num);
lockdep_assert_held(&scheduler->lock);
- /* After the scheduler apply operation, the internal variable
- * scheduler->non_idle_offslot_grps reflects the end-point view
- * of the count at the end of the active phase.
- *
- * Any changes that follow (after the scheduler has dropped the
- * scheduler->lock), reflects async operations to the scheduler,
- * such as a group gets killed (evicted) or a new group inserted,
- * cqs wait-sync triggered state transtion etc.
- *
- * The condition for enable the idle timer is that there is no
- * non-idle groups off-slots. If there is non-idle group off-slot,
- * the timer should be disabled.
- */
- if (atomic_read(&scheduler->non_idle_offslot_grps))
- disable_gpu_idle_fw_timer(kbdev);
- else
- enable_gpu_idle_fw_timer(kbdev);
+ if (on_slots && !atomic_read(&scheduler->non_idle_offslot_grps)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+ /* All on-slots are idle, no non-idle off-slot CSGs available
+ * for considering a meaningful change. Set keep_lru.
+ */
+ keep_lru = kbase_csf_scheduler_all_csgs_idle(kbdev);
+
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+
+ dev_dbg(kbdev->dev, "Keep_LRU: %d, CSGs on-slots: %d\n",
+ keep_lru, on_slots);
+ }
+
+ return keep_lru;
}
-static void schedule_actions(struct kbase_device *kbdev)
+/**
+ * prepare_fast_local_tock() - making preparation arrangement for exercizing
+ * a fast local tock inside scheduling-actions.
+ *
+ * @kbdev: Pointer to the GPU device.
+ *
+ * The function assumes that a scheduling action of firing a fast local tock
+ * call (i.e. an equivalent tock action without dropping the lock) is desired
+ * if there are idle onslot CSGs. The function updates those affected CSGs'
+ * run-state as a preparation. This should only be called from inside the
+ * schedule_actions(), where the previous idle-flags are still considered to
+ * be reflective, following its earlier idle confirmation operational call,
+ * plus some potential newly idle CSGs in the scheduling action committing
+ * steps.
+ *
+ * Return: number of on-slots CSGs that can be considered for replacing.
+ */
+static int prepare_fast_local_tock(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ u32 num_groups = kbdev->csf.global_iface.group_num;
+ unsigned long flags, i;
+ DECLARE_BITMAP(csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 };
+
+ lockdep_assert_held(&scheduler->lock);
+
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+ bitmap_copy(csg_bitmap, scheduler->csg_slots_idle_mask, num_groups);
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+
+ /* Marking the flagged idle CSGs' run state to IDLE, so
+ * the intended fast local tock can replacing them with off-slots
+ * non-idle CSGs.
+ */
+ for_each_set_bit(i, csg_bitmap, num_groups) {
+ struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
+ struct kbase_queue_group *group = csg_slot->resident_group;
+
+ if (!queue_group_idle_locked(group)) {
+ group->run_state = KBASE_CSF_GROUP_IDLE;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_IDLE, group, group->run_state);
+ }
+ }
+
+ /* Return the number of idle slots for potential replacement */
+ return bitmap_weight(csg_bitmap, num_groups);
+}
+
+static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slot_mask,
+ unsigned int timeout_ms)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ long remaining = kbase_csf_timeout_in_jiffies(timeout_ms);
+ u32 num_groups = kbdev->csf.global_iface.group_num;
+ int err = 0;
+ DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS);
+
+ lockdep_assert_held(&scheduler->lock);
+
+ bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS);
+
+ while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS) && remaining) {
+ DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
+
+ bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS);
+
+ remaining = wait_event_timeout(
+ kbdev->csf.event_wait,
+ slots_state_changed(kbdev, changed, csg_slot_stopped_locked), remaining);
+
+ if (likely(remaining)) {
+ u32 i;
+
+ for_each_set_bit(i, changed, num_groups) {
+ struct kbase_queue_group *group;
+
+ if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i)))
+ continue;
+
+ /* The on slot csg is now stopped */
+ clear_bit(i, slot_mask_local);
+
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
+
+ group = scheduler->csg_slots[i].resident_group;
+ if (likely(group)) {
+ /* Only do save/cleanup if the
+ * group is not terminated during
+ * the sleep.
+ */
+ save_csg_slot(group);
+ if (cleanup_csg_slot(group))
+ sched_evict_group(group, true, true);
+ }
+ }
+ } else {
+ dev_warn(
+ kbdev->dev,
+ "[%llu] Suspend request sent on CSG slots 0x%lx timed out for slots 0x%lx",
+ kbase_backend_get_cycle_cnt(kbdev), slot_mask[0],
+ slot_mask_local[0]);
+ /* Return the bitmask of the timed out slots to the caller */
+ bitmap_copy(slot_mask, slot_mask_local, MAX_SUPPORTED_CSGS);
+
+ err = -ETIMEDOUT;
+ }
+ }
+
+ return err;
+}
+
+/**
+ * evict_lru_or_blocked_csg() - Evict the least-recently-used idle or blocked CSG
+ *
+ * @kbdev: Pointer to the device
+ *
+ * Used to allow for speedier starting/resumption of another CSG. The worst-case
+ * scenario of the evicted CSG being scheduled next is expected to be rare.
+ * Also, the eviction will not be applied if the GPU is running in protected mode.
+ * Otherwise the the eviction attempt would force the MCU to quit the execution of
+ * the protected mode, and likely re-request to enter it again.
+ */
+static void evict_lru_or_blocked_csg(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ size_t i;
+ struct kbase_queue_group *lru_idle_group = NULL;
+ const u32 total_csg_slots = kbdev->csf.global_iface.group_num;
+ const bool all_addr_spaces_used = (scheduler->num_active_address_spaces >=
+ (kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS));
+ u8 as_usage[BASE_MAX_NR_AS] = { 0 };
+
+ lockdep_assert_held(&scheduler->lock);
+ if (kbase_csf_scheduler_protected_mode_in_use(kbdev))
+ return;
+
+ BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(int) * BITS_PER_BYTE));
+ if (fls(scheduler->csg_inuse_bitmap[0]) != total_csg_slots)
+ return; /* Some CSG slots remain unused */
+
+ if (all_addr_spaces_used) {
+ for (i = 0; i != total_csg_slots; ++i) {
+ if (scheduler->csg_slots[i].resident_group != NULL) {
+ if (WARN_ON(scheduler->csg_slots[i].resident_group->kctx->as_nr <
+ 0))
+ continue;
+
+ as_usage[scheduler->csg_slots[i].resident_group->kctx->as_nr]++;
+ }
+ }
+ }
+
+ for (i = 0; i != total_csg_slots; ++i) {
+ struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group;
+
+ /* We expect that by this point all groups would normally be
+ * assigned a physical CSG slot, but if circumstances have
+ * changed then bail out of this optimisation.
+ */
+ if (group == NULL)
+ return;
+
+ /* Real-time priority CSGs must be kept on-slot even when
+ * idle.
+ */
+ if ((group->run_state == KBASE_CSF_GROUP_IDLE) &&
+ (group->priority != BASE_QUEUE_GROUP_PRIORITY_REALTIME) &&
+ ((lru_idle_group == NULL) ||
+ (lru_idle_group->prepared_seq_num < group->prepared_seq_num))) {
+ if (WARN_ON(group->kctx->as_nr < 0))
+ continue;
+
+ /* If all address spaces are used, we need to ensure the group does not
+ * share the AS with other active CSGs. Or CSG would be freed without AS
+ * and this optimization would not work.
+ */
+ if ((!all_addr_spaces_used) || (as_usage[group->kctx->as_nr] == 1))
+ lru_idle_group = group;
+ }
+ }
+
+ if (lru_idle_group != NULL) {
+ unsigned long slot_mask = 1 << lru_idle_group->csg_nr;
+
+ dev_dbg(kbdev->dev, "Suspending LRU idle group %d of context %d_%d on slot %d",
+ lru_idle_group->handle, lru_idle_group->kctx->tgid,
+ lru_idle_group->kctx->id, lru_idle_group->csg_nr);
+ suspend_queue_group(lru_idle_group);
+ if (wait_csg_slots_suspend(kbdev, &slot_mask, kbdev->csf.fw_timeout_ms)) {
+ enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
+
+ dev_warn(
+ kbdev->dev,
+ "[%llu] LRU idle group %d of context %d_%d failed to suspend on slot %d (timeout %d ms)",
+ kbase_backend_get_cycle_cnt(kbdev), lru_idle_group->handle,
+ lru_idle_group->kctx->tgid, lru_idle_group->kctx->id,
+ lru_idle_group->csg_nr, kbdev->csf.fw_timeout_ms);
+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
+ error_type = DF_PING_REQUEST_TIMEOUT;
+ schedule_actions_trigger_df(kbdev, lru_idle_group->kctx, error_type);
+ }
+ }
+}
+
+static void schedule_actions(struct kbase_device *kbdev, bool is_tick)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
unsigned long flags;
struct kbase_queue_group *protm_grp;
int ret;
+ bool skip_scheduling_actions;
bool skip_idle_slots_update;
bool new_protm_top_grp = false;
+ int local_tock_slots = 0;
kbase_reset_gpu_assert_prevented(kbdev);
lockdep_assert_held(&scheduler->lock);
- ret = kbase_pm_wait_for_desired_state(kbdev);
+ ret = kbase_csf_scheduler_wait_mcu_active(kbdev);
if (ret) {
- dev_err(kbdev->dev, "Wait for MCU power on failed");
+ dev_err(kbdev->dev,
+ "Wait for MCU power on failed on scheduling tick/tock");
return;
}
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
skip_idle_slots_update = kbase_csf_scheduler_protected_mode_in_use(kbdev);
+ skip_scheduling_actions =
+ !skip_idle_slots_update && kbdev->protected_mode;
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
- /* Skip updating on-slot idle CSGs if GPU is in protected mode. */
- if (!skip_idle_slots_update)
+ /* Skip scheduling actions as GPU reset hasn't been performed yet to
+ * rectify the anomaly that happened when pmode exit interrupt wasn't
+ * received before the termination of group running in pmode.
+ */
+ if (unlikely(skip_scheduling_actions)) {
+ dev_info(kbdev->dev,
+ "Scheduling actions skipped due to anomaly in pmode");
+ return;
+ }
+
+ if (!skip_idle_slots_update) {
+ /* Updating on-slot idle CSGs when not in protected mode. */
scheduler_handle_idle_slots(kbdev);
+ /* Determine whether the condition is met for keeping the
+ * Last-Recent-Use. If true, skipping the remaining action
+ * steps and thus extending the previous tick's arrangement,
+ * in particular, no alterations to on-slot CSGs.
+ */
+ if (keep_lru_on_slots(kbdev))
+ return;
+ }
+
+ if (is_tick)
+ scheduler_rotate(kbdev);
+
+redo_local_tock:
scheduler_prepare(kbdev);
+ /* Need to specifically enqueue the GPU idle work if there are no groups
+ * to schedule despite the runnable groups. This scenario will happen
+ * if System suspend is done when all groups are idle and and no work
+ * is submitted for the groups after the System resume.
+ */
+ if (unlikely(!scheduler->ngrp_to_schedule &&
+ scheduler->total_runnable_grps)) {
+ dev_dbg(kbdev->dev, "No groups to schedule in the tick");
+ enqueue_gpu_idle_work(scheduler);
+ return;
+ }
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
protm_grp = scheduler->active_protm_grp;
@@ -3818,12 +5285,13 @@
* queue jobs.
*/
if (protm_grp && scheduler->top_grp == protm_grp) {
- int new_val;
dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d",
protm_grp->handle);
- new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
- protm_grp, new_val);
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+
+ update_offslot_non_idle_cnt_for_onslot_grp(protm_grp);
+ remove_scheduled_group(kbdev, protm_grp);
+ scheduler_check_pmode_progress(kbdev);
} else if (scheduler->top_grp) {
if (protm_grp)
dev_dbg(kbdev->dev, "Scheduler drop protm exec: group-%d",
@@ -3848,11 +5316,6 @@
scheduler_apply(kbdev);
- /* Post-apply, all the committed groups in this tick are on
- * slots, time to arrange the idle timer on/off decision.
- */
- scheduler_handle_idle_timer_onoff(kbdev);
-
/* Scheduler is dropping the exec of the previous protm_grp,
* Until the protm quit completes, the GPU is effectively
* locked in the secure mode.
@@ -3866,46 +5329,119 @@
if (new_protm_top_grp) {
scheduler_group_check_protm_enter(kbdev,
scheduler->top_grp);
- }
+ } else if (!local_tock_slots &&
+ atomic_read(&scheduler->non_idle_offslot_grps)) {
+ /* If during the scheduling action, we have off-slot
+ * non-idle CSGs in waiting, if it happens to have
+ * some new idle slots emerging during the committed
+ * action steps, trigger a one-off fast local tock.
+ */
+ local_tock_slots = prepare_fast_local_tock(kbdev);
- return;
+ if (local_tock_slots) {
+ dev_dbg(kbdev->dev,
+ "In-cycle %d idle slots available\n",
+ local_tock_slots);
+ goto redo_local_tock;
+ }
+ }
+ } else {
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
}
- spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
- return;
+ evict_lru_or_blocked_csg(kbdev);
+}
+
+/**
+ * can_skip_scheduling() - Check if the scheduling actions can be skipped.
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function is called on a scheduling tick or tock to determine if the
+ * scheduling actions can be skipped.
+ * If Scheduler is in sleeping state and exit from the sleep state is allowed
+ * then activation of MCU will be triggered. The tick or tock work item could
+ * have been in flight when the state of Scheduler was changed to sleeping.
+ *
+ * Return: true if the scheduling actions can be skipped.
+ */
+static bool can_skip_scheduling(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+ lockdep_assert_held(&scheduler->lock);
+
+ if (unlikely(!kbase_reset_gpu_is_not_pending(kbdev)))
+ return true;
+
+ if (scheduler->state == SCHED_SUSPENDED)
+ return true;
+
+#ifdef KBASE_PM_RUNTIME
+ if (scheduler->state == SCHED_SLEEPING) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ if (kbdev->pm.backend.exit_gpu_sleep_mode) {
+ int ret = scheduler_pm_active_after_sleep(kbdev, &flags);
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ if (!ret) {
+ scheduler->state = SCHED_INACTIVE;
+ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
+ return false;
+ }
+
+ dev_info(kbdev->dev,
+ "Skip scheduling due to system suspend");
+ return true;
+ }
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ return true;
+ }
+#endif
+
+ return false;
}
static void schedule_on_tock(struct work_struct *work)
{
- struct kbase_device *kbdev = container_of(work, struct kbase_device,
- csf.scheduler.tock_work.work);
+ struct kbase_device *kbdev =
+ container_of(work, struct kbase_device, csf.scheduler.tock_work.work);
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ int err;
- int err = kbase_reset_gpu_try_prevent(kbdev);
+ err = kbase_reset_gpu_try_prevent(kbdev);
/* Regardless of whether reset failed or is currently happening, exit
* early
*/
if (err)
return;
+ kbase_debug_csf_fault_wait_completion(kbdev);
mutex_lock(&scheduler->lock);
- if (scheduler->state == SCHED_SUSPENDED)
+ if (can_skip_scheduling(kbdev))
+ {
+ atomic_set(&scheduler->pending_tock_work, false);
goto exit_no_schedule_unlock;
+ }
WARN_ON(!(scheduler->state == SCHED_INACTIVE));
scheduler->state = SCHED_BUSY;
+ KBASE_KTRACE_ADD(kbdev, SCHED_BUSY, NULL, scheduler->state);
/* Undertaking schedule action steps */
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK, NULL, 0u);
- schedule_actions(kbdev);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_START, NULL, 0u);
+ while (atomic_cmpxchg(&scheduler->pending_tock_work, true, false) == true)
+ schedule_actions(kbdev, false);
- /* Record time information */
+ /* Record time information on a non-skipped tock */
scheduler->last_schedule = jiffies;
- /* Tock is serviced */
- scheduler->tock_pending_request = false;
-
scheduler->state = SCHED_INACTIVE;
+ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
+ if (!scheduler->total_runnable_grps)
+ enqueue_gpu_idle_work(scheduler);
mutex_unlock(&scheduler->lock);
kbase_reset_gpu_allow(kbdev);
@@ -3922,8 +5458,8 @@
static void schedule_on_tick(struct work_struct *work)
{
- struct kbase_device *kbdev = container_of(work, struct kbase_device,
- csf.scheduler.tick_work);
+ struct kbase_device *kbdev =
+ container_of(work, struct kbase_device, csf.scheduler.tick_work);
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
int err = kbase_reset_gpu_try_prevent(kbdev);
@@ -3933,20 +5469,19 @@
if (err)
return;
+ kbase_debug_csf_fault_wait_completion(kbdev);
mutex_lock(&scheduler->lock);
WARN_ON(scheduler->tick_timer_active);
- if (scheduler->state == SCHED_SUSPENDED)
+ if (can_skip_scheduling(kbdev))
goto exit_no_schedule_unlock;
scheduler->state = SCHED_BUSY;
- /* Do scheduling stuff */
- scheduler_rotate(kbdev);
+ KBASE_KTRACE_ADD(kbdev, SCHED_BUSY, NULL, scheduler->state);
/* Undertaking schedule action steps */
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK, NULL,
- scheduler->total_runnable_grps);
- schedule_actions(kbdev);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_START, NULL, scheduler->total_runnable_grps);
+ schedule_actions(kbdev, true);
/* Record time information */
scheduler->last_schedule = jiffies;
@@ -3958,10 +5493,13 @@
dev_dbg(kbdev->dev,
"scheduling for next tick, num_runnable_groups:%u\n",
scheduler->total_runnable_grps);
+ } else if (!scheduler->total_runnable_grps) {
+ enqueue_gpu_idle_work(scheduler);
}
scheduler->state = SCHED_INACTIVE;
mutex_unlock(&scheduler->lock);
+ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
kbase_reset_gpu_allow(kbdev);
dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes.");
@@ -3973,64 +5511,6 @@
exit_no_schedule_unlock:
mutex_unlock(&scheduler->lock);
kbase_reset_gpu_allow(kbdev);
-}
-
-static int wait_csg_slots_suspend(struct kbase_device *kbdev,
- const unsigned long *slot_mask,
- unsigned int timeout_ms)
-{
- struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
- long remaining = kbase_csf_timeout_in_jiffies(timeout_ms);
- u32 num_groups = kbdev->csf.global_iface.group_num;
- int err = 0;
- DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS);
-
- lockdep_assert_held(&scheduler->lock);
-
- bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS);
-
- while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS)
- && remaining) {
- DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
-
- bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS);
-
- remaining = wait_event_timeout(kbdev->csf.event_wait,
- slots_state_changed(kbdev, changed,
- csg_slot_stopped_locked),
- remaining);
-
- if (remaining) {
- u32 i;
-
- for_each_set_bit(i, changed, num_groups) {
- struct kbase_queue_group *group;
-
- if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i)))
- continue;
-
- /* The on slot csg is now stopped */
- clear_bit(i, slot_mask_local);
-
- group = scheduler->csg_slots[i].resident_group;
- if (likely(group)) {
- /* Only do save/cleanup if the
- * group is not terminated during
- * the sleep.
- */
- save_csg_slot(group);
- if (cleanup_csg_slot(group))
- sched_evict_group(group, true, true);
- }
- }
- } else {
- dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend, slot_mask: 0x%*pb\n",
- num_groups, slot_mask_local);
- err = -ETIMEDOUT;
- }
- }
-
- return err;
}
static int suspend_active_queue_groups(struct kbase_device *kbdev,
@@ -4069,7 +5549,7 @@
ret = suspend_active_queue_groups(kbdev, slot_mask);
if (ret) {
- dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n",
+ dev_warn(kbdev->dev, "Timeout waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n",
kbdev->csf.global_iface.group_num, slot_mask);
}
@@ -4083,12 +5563,16 @@
* due to the extra context ref-count, which prevents the
* L2 powering down cache clean operation in the non racing
* case.
+ * LSC is being flushed together to cover buslogging usecase,
+ * where GPU reset is done regularly to avoid the log buffer
+ * overflow.
*/
- kbase_gpu_start_cache_clean(kbdev);
+ kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev,
kbdev->reset_timeout_ms);
if (ret2) {
- dev_warn(kbdev->dev, "Timed out waiting for cache clean to complete before reset");
+ dev_warn(kbdev->dev, "[%llu] Timeout waiting for cache clean to complete before reset",
+ kbase_backend_get_cycle_cnt(kbdev));
if (!ret)
ret = ret2;
}
@@ -4103,7 +5587,7 @@
* groups when reset is done during
* protected mode execution.
*
- * @group: Pointer to the device.
+ * @kbdev: Pointer to the device.
*
* This function is called at the time of GPU reset, before the suspension of
* queue groups, to handle the case when the reset is getting performed whilst
@@ -4125,7 +5609,8 @@
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
u32 const num_groups = kbdev->csf.global_iface.group_num;
struct kbase_queue_group *protm_grp;
- bool suspend_on_slot_groups;
+ bool suspend_on_slot_groups = true;
+ bool pmode_active;
unsigned long flags;
u32 csg_nr;
@@ -4133,20 +5618,51 @@
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
protm_grp = scheduler->active_protm_grp;
+ pmode_active = kbdev->protected_mode;
- /* If GPU wasn't in protected mode or had exited it before the GPU reset
- * then all the on-slot groups can be suspended in the regular way by
- * sending CSG SUSPEND requests to FW.
- * If there wasn't a fault for protected mode group, then it would
- * also need to be suspended in the regular way before the reset.
- */
- suspend_on_slot_groups = !(protm_grp && protm_grp->faulted);
+ if (likely(!protm_grp && !pmode_active)) {
+ /* Case 1: GPU is not in protected mode or it successfully
+ * exited protected mode. All on-slot groups can be suspended in
+ * the regular way before reset.
+ */
+ suspend_on_slot_groups = true;
+ } else if (protm_grp && pmode_active) {
+ /* Case 2: GPU went successfully into protected mode and hasn't
+ * exited from it yet and the protected mode group is still
+ * active. If there was no fault for the protected mode group
+ * then it can be suspended in the regular way before reset.
+ * The other normal mode on-slot groups were already implicitly
+ * suspended on entry to protected mode so they can be marked as
+ * suspended right away.
+ */
+ suspend_on_slot_groups = !protm_grp->faulted;
+ } else if (!protm_grp && pmode_active) {
+ /* Case 3: GPU went successfully into protected mode and hasn't
+ * exited from it yet but the protected mode group got deleted.
+ * This would have happened if the FW got stuck during protected
+ * mode for some reason (like GPU page fault or some internal
+ * error). In normal cases FW is expected to send the pmode exit
+ * interrupt before it handles the CSG termination request.
+ * The other normal mode on-slot groups would already have been
+ * implicitly suspended on entry to protected mode so they can be
+ * marked as suspended right away.
+ */
+ suspend_on_slot_groups = false;
+ } else if (protm_grp && !pmode_active) {
+ /* Case 4: GPU couldn't successfully enter protected mode, i.e.
+ * PROTM_ENTER request had timed out.
+ * All the on-slot groups need to be suspended in the regular
+ * way before reset.
+ */
+ suspend_on_slot_groups = true;
+ }
+
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
- if (!protm_grp)
+ if (likely(!pmode_active))
goto unlock;
- /* GPU is in protected mode, so all the on-slot groups barring the
+ /* GPU hasn't exited protected mode, so all the on-slot groups barring
* the protected mode group can be marked as suspended right away.
*/
for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
@@ -4159,19 +5675,30 @@
cleanup_csg_slot(group);
group->run_state = KBASE_CSF_GROUP_SUSPENDED;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED, group, group->run_state);
/* Simply treat the normal mode groups as non-idle. The tick
* scheduled after the reset will re-initialize the counter
* anyways.
*/
new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps);
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
- group, new_val);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val);
}
unlock:
mutex_unlock(&scheduler->lock);
return suspend_on_slot_groups;
+}
+
+static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler)
+{
+ cancel_work_sync(&scheduler->tick_work);
+}
+
+static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler)
+{
+ atomic_set(&scheduler->pending_tock_work, false);
+ cancel_delayed_work_sync(&scheduler->tock_work);
}
static void scheduler_inner_reset(struct kbase_device *kbdev)
@@ -4180,13 +5707,13 @@
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
unsigned long flags;
- WARN_ON(csgs_active(kbdev));
+ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
/* Cancel any potential queued delayed work(s) */
cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work);
cancel_tick_timer(kbdev);
- cancel_work_sync(&scheduler->tick_work);
- cancel_delayed_work_sync(&scheduler->tock_work);
+ cancel_tick_work(scheduler);
+ cancel_tock_work(scheduler);
cancel_delayed_work_sync(&scheduler->ping_work);
mutex_lock(&scheduler->lock);
@@ -4194,8 +5721,8 @@
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS);
if (scheduler->active_protm_grp)
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM,
- scheduler->active_protm_grp, 0u);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT, scheduler->active_protm_grp,
+ 0u);
scheduler->active_protm_grp = NULL;
memset(kbdev->csf.scheduler.csg_slots, 0,
num_groups * sizeof(struct kbase_csf_csg_slot));
@@ -4218,7 +5745,9 @@
WARN_ON(!kbase_reset_gpu_is_active(kbdev));
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET, NULL, 0u);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_START, NULL, 0u);
+
+ kbase_debug_csf_fault_wait_completion(kbdev);
if (scheduler_handle_reset_in_protected_mode(kbdev) &&
!suspend_active_queue_groups_on_reset(kbdev)) {
@@ -4256,6 +5785,8 @@
mutex_unlock(&kbdev->kctx_list_lock);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_END, NULL, 0u);
+
/* After queue groups reset, the scheduler data fields clear out */
scheduler_inner_reset(kbdev);
}
@@ -4292,10 +5823,11 @@
}
#endif
- if (kbdev->csf.scheduler.state == SCHED_SUSPENDED)
+ if (kbdev->csf.scheduler.state == SCHED_SUSPENDED ||
+ kbdev->csf.scheduler.state == SCHED_SLEEPING)
goto exit;
- if (get_nr_active_csgs(kbdev) != 1)
+ if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) != 1)
goto exit;
if (kbase_csf_scheduler_protected_mode_in_use(kbdev))
@@ -4307,9 +5839,9 @@
goto exit;
}
- kbase_pm_wait_for_desired_state(kbdev);
+ kbase_csf_scheduler_wait_mcu_active(kbdev);
- err = kbase_csf_firmware_ping_wait(kbdev);
+ err = kbase_csf_firmware_ping_wait(kbdev, kbdev->csf.fw_timeout_ms);
if (err) {
/* It is acceptable to enqueue a reset whilst we've prevented
@@ -4318,17 +5850,16 @@
if (kbase_prepare_to_reset_gpu(
kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu(kbdev);
- } else if (get_nr_active_csgs(kbdev) == 1) {
- queue_delayed_work(system_long_wq,
- &kbdev->csf.scheduler.ping_work,
- msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS));
+ } else if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) == 1) {
+ queue_delayed_work(
+ system_long_wq, &kbdev->csf.scheduler.ping_work,
+ msecs_to_jiffies(kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_PING_TIMEOUT)));
}
kbase_pm_context_idle(kbdev);
exit:
mutex_unlock(&kbdev->csf.scheduler.lock);
kbase_reset_gpu_allow(kbdev);
- return;
}
int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
@@ -4337,13 +5868,42 @@
struct kbase_context *const kctx = group->kctx;
struct kbase_device *const kbdev = kctx->kbdev;
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ bool on_slot;
int err = 0;
kbase_reset_gpu_assert_prevented(kbdev);
lockdep_assert_held(&kctx->csf.lock);
mutex_lock(&scheduler->lock);
- if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) {
+ on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
+
+#ifdef KBASE_PM_RUNTIME
+ if (on_slot && (scheduler->state == SCHED_SLEEPING)) {
+ if (wait_for_scheduler_to_exit_sleep(kbdev)) {
+ dev_warn(
+ kbdev->dev,
+ "Wait for scheduler to exit sleep state timedout when copying suspend buffer for group %d of ctx %d_%d on slot %d",
+ group->handle, group->kctx->tgid,
+ group->kctx->id, group->csg_nr);
+
+ scheduler_wakeup(kbdev, true);
+
+ /* Wait for MCU firmware to start running */
+ if (kbase_csf_scheduler_wait_mcu_active(kbdev))
+ dev_warn(
+ kbdev->dev,
+ "Wait for MCU active failed when copying suspend buffer for group %d of ctx %d_%d on slot %d",
+ group->handle, group->kctx->tgid,
+ group->kctx->id, group->csg_nr);
+ }
+
+ /* Check the group state again as scheduler lock would have been
+ * released when waiting for the exit from SLEEPING state.
+ */
+ on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
+ }
+#endif
+ if (on_slot) {
DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0};
set_bit(kbase_csf_scheduler_group_get_slot(group), slot_mask);
@@ -4353,8 +5913,9 @@
err = wait_csg_slots_suspend(kbdev, slot_mask,
kbdev->csf.fw_timeout_ms);
if (err) {
- dev_warn(kbdev->dev, "Timed out waiting for the group %d to suspend on slot %d",
- group->handle, group->csg_nr);
+ dev_warn(kbdev->dev, "[%llu] Timeout waiting for the group %d to suspend on slot %d",
+ kbase_backend_get_cycle_cnt(kbdev),
+ group->handle, group->csg_nr);
goto exit;
}
}
@@ -4363,13 +5924,18 @@
unsigned int target_page_nr = 0, i = 0;
u64 offset = sus_buf->offset;
size_t to_copy = sus_buf->size;
+ const u32 csg_suspend_buf_nr_pages =
+ PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
if (scheduler->state != SCHED_SUSPENDED) {
/* Similar to the case of HW counters, need to flush
- * the GPU cache before reading from the suspend buffer
+ * the GPU L2 cache before reading from the suspend buffer
* pages as they are mapped and cached on GPU side.
+ * Flushing LSC is not done here, since only the flush of
+ * CSG suspend buffer contents is needed from the L2 cache.
*/
- kbase_gpu_start_cache_clean(kbdev);
+ kbase_gpu_start_cache_clean(
+ kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
kbase_gpu_wait_cache_clean(kbdev);
} else {
/* Make sure power down transitions have completed,
@@ -4381,7 +5947,7 @@
kbase_pm_wait_for_desired_state(kbdev);
}
- for (i = 0; i < PFN_UP(sus_buf->size) &&
+ for (i = 0; i < csg_suspend_buf_nr_pages &&
target_page_nr < sus_buf->nr_pages; i++) {
struct page *pg =
as_page(group->normal_suspend_buf.phy[i]);
@@ -4538,6 +6104,11 @@
mutex_lock(&scheduler->lock);
+ if (group->run_state == KBASE_CSF_GROUP_IDLE) {
+ group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
+ group->run_state);
+ }
/* Check if the group is now eligible for execution in protected mode. */
if (scheduler_get_protm_enter_async_group(kbdev, group))
scheduler_group_check_protm_enter(kbdev, group);
@@ -4547,20 +6118,22 @@
}
/**
- * check_sync_update_for_idle_group_protm() - Check the sync wait condition
- * for all the queues bound to
- * the given group.
+ * check_sync_update_for_on_slot_group() - Check the sync wait condition
+ * for all the queues bound to
+ * the given on-slot group.
*
- * @group: Pointer to the group that requires evaluation.
+ * @group: Pointer to the on-slot group that requires evaluation.
*
* This function is called if the GPU is in protected mode and there are on
- * slot idle groups with higher priority than the active protected mode group.
+ * slot idle groups with higher priority than the active protected mode group
+ * or this function is called when CQS object is signaled whilst GPU is in
+ * sleep state.
* This function will evaluate the sync condition, if any, of all the queues
* bound to the given group.
*
- * Return true if the sync condition of at least one queue has been satisfied.
+ * Return: true if the sync condition of at least one queue has been satisfied.
*/
-static bool check_sync_update_for_idle_group_protm(
+static bool check_sync_update_for_on_slot_group(
struct kbase_queue_group *group)
{
struct kbase_device *const kbdev = group->kctx->kbdev;
@@ -4583,7 +6156,7 @@
stream, CS_STATUS_WAIT);
unsigned long flags;
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT,
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS,
queue->group, queue, status);
if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status))
@@ -4625,7 +6198,13 @@
scheduler->csg_slots_idle_mask[0]);
spin_unlock_irqrestore(
&scheduler->interrupt_lock, flags);
+ /* Request the scheduler to confirm the condition inferred
+ * here inside the protected mode.
+ */
+ group->reevaluate_idle_status = true;
group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
+ group->run_state);
}
KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u);
@@ -4647,7 +6226,7 @@
* protected mode that has a higher priority than the active protected mode
* group.
*
- * Return true if the sync condition of at least one queue in a group has been
+ * Return: true if the sync condition of at least one queue in a group has been
* satisfied.
*/
static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev)
@@ -4680,12 +6259,34 @@
* has a higher priority than the protm group, then we
* need to exit protected mode.
*/
- if (check_sync_update_for_idle_group_protm(group))
+ if (check_sync_update_for_on_slot_group(group))
exit_protm = true;
}
}
return exit_protm;
+}
+
+static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ u32 const num_groups = kbdev->csf.global_iface.group_num;
+ u32 csg_nr;
+
+ lockdep_assert_held(&scheduler->lock);
+
+ for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
+ struct kbase_queue_group *const group =
+ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
+
+ if (!group)
+ continue;
+
+ if (check_sync_update_for_on_slot_group(group)) {
+ scheduler_wakeup(kbdev, true);
+ return;
+ }
+ }
}
/**
@@ -4709,16 +6310,27 @@
struct kbase_context, csf.sched.sync_update_work);
struct kbase_device *const kbdev = kctx->kbdev;
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ bool sync_updated = false;
mutex_lock(&scheduler->lock);
- KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_BEGIN, kctx, 0u);
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ if (unlikely(scheduler->state == SCHED_BUSY)) {
+ queue_work(kctx->csf.sched.sync_update_wq,
+ &kctx->csf.sched.sync_update_work);
+ mutex_unlock(&scheduler->lock);
+ return;
+ }
+#endif
+
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START, kctx, 0u);
if (kctx->csf.sched.num_idle_wait_grps != 0) {
struct kbase_queue_group *group, *temp;
list_for_each_entry_safe(group, temp,
&kctx->csf.sched.idle_wait_groups, link) {
if (group_sync_updated(group)) {
+ sync_updated = true;
/* Move this group back in to the runnable
* groups list of the context.
*/
@@ -4730,9 +6342,18 @@
WARN_ON(!list_empty(&kctx->csf.sched.idle_wait_groups));
}
- if (check_sync_update_for_idle_groups_protm(kbdev))
+ if (check_sync_update_for_idle_groups_protm(kbdev)) {
scheduler_force_protm_exit(kbdev);
- KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u);
+ sync_updated = true;
+ }
+
+ /* If scheduler is in sleep or suspended state, re-activate it
+ * to serve on-slot CSGs blocked on CQS which has been signaled.
+ */
+ if (!sync_updated && (scheduler->state == SCHED_SLEEPING))
+ check_sync_update_in_sleep_mode(kbdev);
+
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u);
mutex_unlock(&scheduler->lock);
}
@@ -4742,7 +6363,8 @@
{
struct kbase_context *const kctx = param;
- KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT, kctx, 0u);
+ KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_GROUP_SYNC_UPDATE_EVENT, kctx, 0u);
+
queue_work(kctx->csf.sched.sync_update_wq,
&kctx->csf.sched.sync_update_work);
@@ -4753,6 +6375,8 @@
{
int priority;
int err;
+
+ kbase_ctx_sched_init_ctx(kctx);
for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
++priority) {
@@ -4770,20 +6394,29 @@
if (!kctx->csf.sched.sync_update_wq) {
dev_err(kctx->kbdev->dev,
"Failed to initialize scheduler context workqueue");
- return -ENOMEM;
+ err = -ENOMEM;
+ goto alloc_wq_failed;
}
INIT_WORK(&kctx->csf.sched.sync_update_work,
check_group_sync_update_worker);
+
+ kbase_csf_tiler_heap_reclaim_ctx_init(kctx);
err = kbase_csf_event_wait_add(kctx, check_group_sync_update_cb, kctx);
if (err) {
dev_err(kctx->kbdev->dev,
"Failed to register a sync update callback");
- destroy_workqueue(kctx->csf.sched.sync_update_wq);
+ goto event_wait_add_failed;
}
+ return err;
+
+event_wait_add_failed:
+ destroy_workqueue(kctx->csf.sched.sync_update_wq);
+alloc_wq_failed:
+ kbase_ctx_sched_remove_ctx(kctx);
return err;
}
@@ -4792,6 +6425,8 @@
kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx);
cancel_work_sync(&kctx->csf.sched.sync_update_work);
destroy_workqueue(kctx->csf.sched.sync_update_wq);
+
+ kbase_ctx_sched_remove_ctx(kctx);
}
int kbase_csf_scheduler_init(struct kbase_device *kbdev)
@@ -4810,7 +6445,7 @@
return -ENOMEM;
}
- return 0;
+ return kbase_csf_mcu_shared_regs_data_init(kbdev);
}
int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
@@ -4824,12 +6459,20 @@
dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n");
return -ENOMEM;
}
+ scheduler->idle_wq = alloc_ordered_workqueue(
+ "csf_scheduler_gpu_idle_wq", WQ_HIGHPRI);
+ if (!scheduler->idle_wq) {
+ dev_err(kbdev->dev,
+ "Failed to allocate GPU idle scheduler workqueue\n");
+ destroy_workqueue(kbdev->csf.scheduler.wq);
+ return -ENOMEM;
+ }
INIT_WORK(&scheduler->tick_work, schedule_on_tick);
INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock);
+ atomic_set(&scheduler->pending_tock_work, false);
INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor);
- BUILD_BUG_ON(CSF_FIRMWARE_TIMEOUT_MS >= FIRMWARE_PING_INTERVAL_MS);
mutex_init(&scheduler->lock);
spin_lock_init(&scheduler->interrupt_lock);
@@ -4843,24 +6486,27 @@
(sizeof(scheduler->csgs_events_enable_mask) * BITS_PER_BYTE));
bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS);
scheduler->state = SCHED_SUSPENDED;
+ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
scheduler->pm_active_count = 0;
scheduler->ngrp_to_schedule = 0;
scheduler->total_runnable_grps = 0;
scheduler->top_ctx = NULL;
scheduler->top_grp = NULL;
scheduler->last_schedule = 0;
- scheduler->tock_pending_request = false;
scheduler->active_protm_grp = NULL;
- scheduler->gpu_idle_fw_timer_enabled = false;
scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS;
scheduler_doorbell_init(kbdev);
INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker);
+ scheduler->fast_gpu_idle_handling = false;
+ atomic_set(&scheduler->gpu_no_longer_idle, false);
atomic_set(&scheduler->non_idle_offslot_grps, 0);
hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
scheduler->tick_timer.function = tick_timer_callback;
scheduler->tick_timer_active = false;
+
+ kbase_csf_tiler_heap_reclaim_mgr_init(kbdev);
return 0;
}
@@ -4869,26 +6515,49 @@
{
if (kbdev->csf.scheduler.csg_slots) {
WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps));
- WARN_ON(csgs_active(kbdev));
+ /* The unload of Driver can take place only when all contexts have
+ * been terminated. The groups that were not terminated by the User
+ * are terminated on context termination. So no CSGs are expected
+ * to be active at the time of Driver unload.
+ */
+ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
flush_work(&kbdev->csf.scheduler.gpu_idle_work);
mutex_lock(&kbdev->csf.scheduler.lock);
- if (WARN_ON(kbdev->csf.scheduler.state != SCHED_SUSPENDED))
+
+ if (kbdev->csf.scheduler.state != SCHED_SUSPENDED) {
+ unsigned long flags;
+ /* The power policy could prevent the Scheduler from
+ * getting suspended when GPU becomes idle.
+ */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ WARN_ON(kbase_pm_idle_groups_sched_suspendable(kbdev));
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
scheduler_suspend(kbdev);
+ }
+
mutex_unlock(&kbdev->csf.scheduler.lock);
cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work);
cancel_tick_timer(kbdev);
- cancel_work_sync(&kbdev->csf.scheduler.tick_work);
- cancel_delayed_work_sync(&kbdev->csf.scheduler.tock_work);
- mutex_destroy(&kbdev->csf.scheduler.lock);
+ cancel_tick_work(&kbdev->csf.scheduler);
+ cancel_tock_work(&kbdev->csf.scheduler);
kfree(kbdev->csf.scheduler.csg_slots);
kbdev->csf.scheduler.csg_slots = NULL;
}
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_TERMINATED, NULL,
+ kbase_csf_scheduler_get_nr_active_csgs(kbdev));
+ /* Terminating the MCU shared regions, following the release of slots */
+ kbase_csf_mcu_shared_regs_data_term(kbdev);
}
void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
{
+ if (kbdev->csf.scheduler.idle_wq)
+ destroy_workqueue(kbdev->csf.scheduler.idle_wq);
if (kbdev->csf.scheduler.wq)
destroy_workqueue(kbdev->csf.scheduler.wq);
+
+ kbase_csf_tiler_heap_reclaim_mgr_term(kbdev);
+ mutex_destroy(&kbdev->csf.scheduler.lock);
}
/**
@@ -4911,13 +6580,14 @@
return;
WARN_ON((scheduler->state != SCHED_INACTIVE) &&
- (scheduler->state != SCHED_SUSPENDED));
+ (scheduler->state != SCHED_SUSPENDED) &&
+ (scheduler->state != SCHED_SLEEPING));
if (scheduler->total_runnable_grps > 0) {
enqueue_tick_work(kbdev);
dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n");
} else if (scheduler->state != SCHED_SUSPENDED) {
- queue_work(system_wq, &scheduler->gpu_idle_work);
+ enqueue_gpu_idle_work(scheduler);
}
}
@@ -4952,18 +6622,22 @@
if (currently_enabled && !enable) {
scheduler->timer_enabled = false;
cancel_tick_timer(kbdev);
- cancel_delayed_work(&scheduler->tock_work);
mutex_unlock(&scheduler->lock);
/* The non-sync version to cancel the normal work item is not
* available, so need to drop the lock before cancellation.
*/
- cancel_work_sync(&scheduler->tick_work);
- } else if (!currently_enabled && enable) {
+ cancel_tick_work(scheduler);
+ cancel_tock_work(scheduler);
+ return;
+ }
+
+ if (!currently_enabled && enable) {
scheduler->timer_enabled = true;
scheduler_enable_tick_timer_nolock(kbdev);
- mutex_unlock(&scheduler->lock);
}
+
+ mutex_unlock(&scheduler->lock);
}
void kbase_csf_scheduler_kick(struct kbase_device *kbdev)
@@ -4984,80 +6658,232 @@
mutex_unlock(&scheduler->lock);
}
-void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
+int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev)
{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ int result = 0;
+
+ lockdep_assert_held(&scheduler->lock);
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ if (unlikely(scheduler->state == SCHED_BUSY))
+ return -EBUSY;
+#endif
+
+#ifdef KBASE_PM_RUNTIME
+ /* If scheduler is in sleeping state, then MCU needs to be activated
+ * to suspend CSGs.
+ */
+ if (scheduler->state == SCHED_SLEEPING) {
+ dev_info(kbdev->dev, "Activating MCU out of sleep on system suspend");
+ result = force_scheduler_to_exit_sleep(kbdev);
+ if (result) {
+ dev_warn(kbdev->dev, "Scheduler failed to exit from sleep");
+ goto exit;
+ }
+ }
+#endif
+ if (scheduler->state != SCHED_SUSPENDED) {
+ result = suspend_active_groups_on_powerdown(kbdev, true);
+ if (result) {
+ dev_warn(kbdev->dev, "failed to suspend active groups");
+ goto exit;
+ } else {
+ dev_info(kbdev->dev, "Scheduler PM suspend");
+ scheduler_suspend(kbdev);
+ cancel_tick_timer(kbdev);
+ }
+ }
+
+exit:
+ return result;
+}
+
+int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
+{
+ int result = 0;
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
/* Cancel any potential queued delayed work(s) */
- cancel_work_sync(&scheduler->tick_work);
- cancel_delayed_work_sync(&scheduler->tock_work);
+ cancel_tick_work(scheduler);
+ cancel_tock_work(scheduler);
- if (kbase_reset_gpu_prevent_and_wait(kbdev)) {
- dev_warn(kbdev->dev,
- "Stop PM suspending for failing to prevent gpu reset.\n");
- return;
+ result = kbase_reset_gpu_prevent_and_wait(kbdev);
+ if (result) {
+ dev_warn(kbdev->dev, "Stop PM suspending for failing to prevent gpu reset.\n");
+ return result;
}
mutex_lock(&scheduler->lock);
- disable_gpu_idle_fw_timer(kbdev);
-
- if (scheduler->state != SCHED_SUSPENDED) {
- suspend_active_groups_on_powerdown(kbdev, true);
- dev_info(kbdev->dev, "Scheduler PM suspend");
- scheduler_suspend(kbdev);
- cancel_tick_timer(kbdev);
- }
+ result = kbase_csf_scheduler_pm_suspend_no_lock(kbdev);
mutex_unlock(&scheduler->lock);
kbase_reset_gpu_allow(kbdev);
+
+ return result;
}
KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_suspend);
-void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev)
+void kbase_csf_scheduler_pm_resume_no_lock(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
- mutex_lock(&scheduler->lock);
-
- if (scheduler->total_runnable_grps > 0) {
- WARN_ON(scheduler->state != SCHED_SUSPENDED);
+ lockdep_assert_held(&scheduler->lock);
+ if ((scheduler->total_runnable_grps > 0) &&
+ (scheduler->state == SCHED_SUSPENDED)) {
dev_info(kbdev->dev, "Scheduler PM resume");
scheduler_wakeup(kbdev, true);
}
- mutex_unlock(&scheduler->lock);
+}
+
+void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev)
+{
+ mutex_lock(&kbdev->csf.scheduler.lock);
+
+ kbase_csf_scheduler_pm_resume_no_lock(kbdev);
+ mutex_unlock(&kbdev->csf.scheduler.lock);
}
KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_resume);
void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev)
{
- unsigned long flags;
- u32 prev_count;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- prev_count = kbdev->csf.scheduler.pm_active_count++;
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
- /* On 0 => 1, make a pm_ctx_active request */
- if (!prev_count)
- kbase_pm_context_active(kbdev);
- else
- WARN_ON(prev_count == U32_MAX);
+ /* Here the lock is taken to synchronize against the runtime suspend
+ * callback function, which may need to wake up the MCU for suspending
+ * the CSGs before powering down the GPU.
+ */
+ mutex_lock(&kbdev->csf.scheduler.lock);
+ scheduler_pm_active_handle_suspend(kbdev,
+ KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
+ mutex_unlock(&kbdev->csf.scheduler.lock);
}
KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active);
void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev)
{
- unsigned long flags;
- u32 prev_count;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- prev_count = kbdev->csf.scheduler.pm_active_count--;
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
- if (prev_count == 1)
- kbase_pm_context_idle(kbdev);
- else
- WARN_ON(prev_count == 0);
+ /* Here the lock is taken just to maintain symmetry with
+ * kbase_csf_scheduler_pm_active().
+ */
+ mutex_lock(&kbdev->csf.scheduler.lock);
+ scheduler_pm_idle(kbdev);
+ mutex_unlock(&kbdev->csf.scheduler.lock);
}
KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle);
+
+int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ unsigned long flags;
+ int err;
+
+ kbase_pm_lock(kbdev);
+ WARN_ON(!kbdev->pm.active_count);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ WARN_ON(!scheduler->pm_active_count);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ kbase_pm_unlock(kbdev);
+
+ kbase_pm_wait_for_poweroff_work_complete(kbdev);
+
+ err = kbase_pm_wait_for_desired_state(kbdev);
+ if (!err) {
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_ON);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ }
+
+ return err;
+}
+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_wait_mcu_active);
+
+#ifdef KBASE_PM_RUNTIME
+int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ unsigned long flags;
+ int ret;
+
+ dev_dbg(kbdev->dev, "Handling runtime suspend");
+
+ kbase_reset_gpu_assert_prevented(kbdev);
+ lockdep_assert_held(&scheduler->lock);
+ WARN_ON(scheduler->pm_active_count);
+
+ if (scheduler->state == SCHED_SUSPENDED) {
+ WARN_ON(kbdev->pm.backend.gpu_sleep_mode_active);
+ return 0;
+ }
+
+ ret = suspend_active_groups_on_powerdown(kbdev, false);
+
+ if (ret) {
+ dev_dbg(kbdev->dev, "Aborting runtime suspend (grps: %d)",
+ atomic_read(&scheduler->non_idle_offslot_grps));
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbdev->pm.backend.exit_gpu_sleep_mode = true;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ kbase_csf_scheduler_invoke_tick(kbdev);
+ return ret;
+ }
+
+ scheduler->state = SCHED_SUSPENDED;
+ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbdev->pm.backend.gpu_sleep_mode_active = false;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ wake_up_all(&kbdev->csf.event_wait);
+ return 0;
+}
+
+void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev)
+{
+ u32 csg_nr;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP);
+
+ for (csg_nr = 0; csg_nr < kbdev->csf.global_iface.group_num; csg_nr++) {
+ struct kbase_csf_cmd_stream_group_info *ginfo =
+ &kbdev->csf.global_iface.groups[csg_nr];
+ bool csg_idle;
+
+ if (!kbdev->csf.scheduler.csg_slots[csg_nr].resident_group)
+ continue;
+
+ csg_idle =
+ kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
+ CSG_STATUS_STATE_IDLE_MASK;
+ if (!csg_idle) {
+ dev_dbg(kbdev->dev,
+ "Re-activate Scheduler after MCU sleep");
+ kbdev->pm.backend.exit_gpu_sleep_mode = true;
+ kbase_csf_scheduler_invoke_tick(kbdev);
+ break;
+ }
+ }
+}
+
+void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+ mutex_lock(&scheduler->lock);
+ if (kbase_pm_gpu_sleep_allowed(kbdev) &&
+ (scheduler->state == SCHED_INACTIVE))
+ scheduler_sleep_on_idle(kbdev);
+ mutex_unlock(&scheduler->lock);
+}
+#endif
+
+void kbase_csf_scheduler_force_wakeup(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+ mutex_lock(&scheduler->lock);
+ scheduler_wakeup(kbdev, true);
+ mutex_unlock(&scheduler->lock);
+}
--
Gitblit v1.6.2