hc
2023-12-06 08f87f769b595151be1afeff53e144f543faa614
kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
....@@ -1,7 +1,7 @@
11 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
22 /*
33 *
4
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
4
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
55 *
66 * This program is free software and is provided to you under the terms of the
77 * GNU General Public License version 2 as published by the Free Software
....@@ -28,16 +28,20 @@
2828 #include <tl/mali_kbase_tracepoints.h>
2929 #include <backend/gpu/mali_kbase_pm_internal.h>
3030 #include <linux/export.h>
31
-#include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
31
+#include <csf/mali_kbase_csf_registers.h>
3232 #include <uapi/gpu/arm/bifrost/mali_base_kernel.h>
33
+#include <mali_kbase_hwaccess_time.h>
34
+#include "mali_kbase_csf_tiler_heap_reclaim.h"
35
+#include "mali_kbase_csf_mcu_shared_reg.h"
3336
3437 /* Value to indicate that a queue group is not groups_to_schedule list */
3538 #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
3639
37
-/* Waiting timeout for scheduler state change for descheduling a CSG */
38
-#define CSG_SCHED_STOP_TIMEOUT_MS (50)
39
-
40
-#define CSG_SUSPEND_ON_RESET_WAIT_TIMEOUT_MS DEFAULT_RESET_TIMEOUT_MS
40
+/* This decides the upper limit on the waiting time for the Scheduler
41
+ * to exit the sleep state. Usually the value of autosuspend_delay is
42
+ * expected to be around 100 milli seconds.
43
+ */
44
+#define MAX_AUTO_SUSPEND_DELAY_MS (5000)
4145
4246 /* Maximum number of endpoints which may run tiler jobs. */
4347 #define CSG_TILER_MAX ((u8)1)
....@@ -48,37 +52,14 @@
4852 /* CSF scheduler time slice value */
4953 #define CSF_SCHEDULER_TIME_TICK_MS (100) /* 100 milliseconds */
5054
51
-/*
52
- * CSF scheduler time threshold for converting "tock" requests into "tick" if
53
- * they come too close to the end of a tick interval. This avoids scheduling
54
- * twice in a row.
55
- */
56
-#define CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS \
57
- CSF_SCHEDULER_TIME_TICK_MS
55
+/* A GPU address space slot is reserved for MCU. */
56
+#define NUM_RESERVED_AS_SLOTS (1)
5857
59
-#define CSF_SCHEDULER_TIME_TICK_THRESHOLD_JIFFIES \
60
- msecs_to_jiffies(CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS)
58
+/* Time to wait for completion of PING req before considering MCU as hung */
59
+#define FW_PING_AFTER_ERROR_TIMEOUT_MS (10)
6160
62
-/* Nanoseconds per millisecond */
63
-#define NS_PER_MS ((u64)1000 * 1000)
64
-
65
-/*
66
- * CSF minimum time to reschedule for a new "tock" request. Bursts of "tock"
67
- * requests are not serviced immediately, but shall wait for a minimum time in
68
- * order to reduce load on the CSF scheduler thread.
69
- */
70
-#define CSF_SCHEDULER_TIME_TOCK_JIFFIES 1 /* 1 jiffies-time */
71
-
72
-/* CS suspended and is idle (empty ring buffer) */
73
-#define CS_IDLE_FLAG (1 << 0)
74
-
75
-/* CS suspended and is wait for a CQS condition */
76
-#define CS_WAIT_SYNC_FLAG (1 << 1)
77
-
78
-/* 2 GPU address space slots are reserved for MCU and privileged context for HW
79
- * counter dumping. TODO remove the slot reserved for latter in GPUCORE-26293.
80
- */
81
-#define NUM_RESERVED_AS_SLOTS (2)
61
+/* Explicitly defining this blocked_reason code as SB_WAIT for clarity */
62
+#define CS_STATUS_BLOCKED_ON_SB_WAIT CS_STATUS_BLOCKED_REASON_REASON_WAIT
8263
8364 static int scheduler_group_schedule(struct kbase_queue_group *group);
8465 static void remove_group_from_idle_wait(struct kbase_queue_group *const group);
....@@ -94,14 +75,234 @@
9475 static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev);
9576 static int suspend_active_queue_groups(struct kbase_device *kbdev,
9677 unsigned long *slot_mask);
78
+static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
79
+ bool system_suspend);
9780 static void schedule_in_cycle(struct kbase_queue_group *group, bool force);
81
+static bool queue_group_scheduled_locked(struct kbase_queue_group *group);
9882
9983 #define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT))
10084
10185 /**
86
+ * wait_for_dump_complete_on_group_deschedule() - Wait for dump on fault and
87
+ * scheduling tick/tock to complete before the group deschedule.
88
+ *
89
+ * @group: Pointer to the group that is being descheduled.
90
+ *
91
+ * This function blocks the descheduling of the group until the dump on fault is
92
+ * completed and scheduling tick/tock has completed.
93
+ * To deschedule an on slot group CSG termination request would be sent and that
94
+ * might time out if the fault had occurred and also potentially affect the state
95
+ * being dumped. Moreover the scheduler lock would be held, so the access to debugfs
96
+ * files would get blocked.
97
+ * Scheduler lock and 'kctx->csf.lock' are released before this function starts
98
+ * to wait. When a request sent by the Scheduler to the FW times out, Scheduler
99
+ * would also wait for the dumping to complete and release the Scheduler lock
100
+ * before the wait. Meanwhile Userspace can try to delete the group, this function
101
+ * would ensure that the group doesn't exit the Scheduler until scheduling
102
+ * tick/tock has completed. Though very unlikely, group deschedule can be triggered
103
+ * from multiple threads around the same time and after the wait Userspace thread
104
+ * can win the race and get the group descheduled and free the memory for group
105
+ * pointer before the other threads wake up and notice that group has already been
106
+ * descheduled. To avoid the freeing in such a case, a sort of refcount is used
107
+ * for the group which is incremented & decremented across the wait.
108
+ */
109
+static
110
+void wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group *group)
111
+{
112
+#if IS_ENABLED(CONFIG_DEBUG_FS)
113
+ struct kbase_device *kbdev = group->kctx->kbdev;
114
+ struct kbase_context *kctx = group->kctx;
115
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
116
+
117
+ lockdep_assert_held(&kctx->csf.lock);
118
+ lockdep_assert_held(&scheduler->lock);
119
+
120
+ if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev)))
121
+ return;
122
+
123
+ while ((!kbase_debug_csf_fault_dump_complete(kbdev) ||
124
+ (scheduler->state == SCHED_BUSY)) &&
125
+ queue_group_scheduled_locked(group)) {
126
+ group->deschedule_deferred_cnt++;
127
+ mutex_unlock(&scheduler->lock);
128
+ mutex_unlock(&kctx->csf.lock);
129
+ kbase_debug_csf_fault_wait_completion(kbdev);
130
+ mutex_lock(&kctx->csf.lock);
131
+ mutex_lock(&scheduler->lock);
132
+ group->deschedule_deferred_cnt--;
133
+ }
134
+#endif
135
+}
136
+
137
+/**
138
+ * schedule_actions_trigger_df() - Notify the client about the fault and
139
+ * wait for the dumping to complete.
140
+ *
141
+ * @kbdev: Pointer to the device
142
+ * @kctx: Pointer to the context associated with the CSG slot for which
143
+ * the timeout was seen.
144
+ * @error: Error code indicating the type of timeout that occurred.
145
+ *
146
+ * This function notifies the Userspace client waiting for the faults and wait
147
+ * for the Client to complete the dumping.
148
+ * The function is called only from Scheduling tick/tock when a request sent by
149
+ * the Scheduler to FW times out or from the protm event work item of the group
150
+ * when the protected mode entry request times out.
151
+ * In the latter case there is no wait done as scheduler lock would be released
152
+ * immediately. In the former case the function waits and releases the scheduler
153
+ * lock before the wait. It has been ensured that the Scheduler view of the groups
154
+ * won't change meanwhile, so no group can enter/exit the Scheduler, become
155
+ * runnable or go off slot.
156
+ */
157
+static void schedule_actions_trigger_df(struct kbase_device *kbdev,
158
+ struct kbase_context *kctx, enum dumpfault_error_type error)
159
+{
160
+#if IS_ENABLED(CONFIG_DEBUG_FS)
161
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
162
+
163
+ lockdep_assert_held(&scheduler->lock);
164
+
165
+ if (!kbase_debug_csf_fault_notify(kbdev, kctx, error))
166
+ return;
167
+
168
+ if (unlikely(scheduler->state != SCHED_BUSY)) {
169
+ WARN_ON(error != DF_PROTECTED_MODE_ENTRY_FAILURE);
170
+ return;
171
+ }
172
+
173
+ mutex_unlock(&scheduler->lock);
174
+ kbase_debug_csf_fault_wait_completion(kbdev);
175
+ mutex_lock(&scheduler->lock);
176
+ WARN_ON(scheduler->state != SCHED_BUSY);
177
+#endif
178
+}
179
+
180
+#ifdef KBASE_PM_RUNTIME
181
+/**
182
+ * wait_for_scheduler_to_exit_sleep() - Wait for Scheduler to exit the
183
+ * sleeping state.
184
+ *
185
+ * @kbdev: Pointer to the device
186
+ *
187
+ * This function waits until the Scheduler has exited the sleep state and
188
+ * it is called when an on-slot group is terminated or when the suspend
189
+ * buffer of an on-slot group needs to be captured.
190
+ *
191
+ * Return: 0 when the wait is successful, otherwise an error code.
192
+ */
193
+static int wait_for_scheduler_to_exit_sleep(struct kbase_device *kbdev)
194
+{
195
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
196
+ int autosuspend_delay = kbdev->dev->power.autosuspend_delay;
197
+ unsigned int sleep_exit_wait_time;
198
+ long remaining;
199
+ int ret = 0;
200
+
201
+ lockdep_assert_held(&scheduler->lock);
202
+ WARN_ON(scheduler->state != SCHED_SLEEPING);
203
+
204
+ /* No point in waiting if autosuspend_delay value is negative.
205
+ * For the negative value of autosuspend_delay Driver will directly
206
+ * go for the suspend of Scheduler, but the autosuspend_delay value
207
+ * could have been changed after the sleep was initiated.
208
+ */
209
+ if (autosuspend_delay < 0)
210
+ return -EINVAL;
211
+
212
+ if (autosuspend_delay > MAX_AUTO_SUSPEND_DELAY_MS)
213
+ autosuspend_delay = MAX_AUTO_SUSPEND_DELAY_MS;
214
+
215
+ /* Usually Scheduler would remain in sleeping state until the
216
+ * auto-suspend timer expires and all active CSGs are suspended.
217
+ */
218
+ sleep_exit_wait_time = autosuspend_delay + kbdev->reset_timeout_ms;
219
+
220
+ remaining = kbase_csf_timeout_in_jiffies(sleep_exit_wait_time);
221
+
222
+ while ((scheduler->state == SCHED_SLEEPING) && !ret) {
223
+ mutex_unlock(&scheduler->lock);
224
+ remaining = wait_event_timeout(
225
+ kbdev->csf.event_wait,
226
+ (scheduler->state != SCHED_SLEEPING),
227
+ remaining);
228
+ mutex_lock(&scheduler->lock);
229
+ if (!remaining && (scheduler->state == SCHED_SLEEPING))
230
+ ret = -ETIMEDOUT;
231
+ }
232
+
233
+ return ret;
234
+}
235
+
236
+/**
237
+ * force_scheduler_to_exit_sleep() - Force scheduler to exit sleep state
238
+ *
239
+ * @kbdev: Pointer to the device
240
+ *
241
+ * This function will force the Scheduler to exit the sleep state by doing the
242
+ * wake up of MCU and suspension of on-slot groups. It is called at the time of
243
+ * system suspend.
244
+ *
245
+ * Return: 0 on success.
246
+ */
247
+static int force_scheduler_to_exit_sleep(struct kbase_device *kbdev)
248
+{
249
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
250
+ unsigned long flags;
251
+ int ret = 0;
252
+
253
+ lockdep_assert_held(&scheduler->lock);
254
+ WARN_ON(scheduler->state != SCHED_SLEEPING);
255
+ WARN_ON(!kbdev->pm.backend.gpu_sleep_mode_active);
256
+
257
+ kbase_pm_lock(kbdev);
258
+ ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev);
259
+ kbase_pm_unlock(kbdev);
260
+ if (ret) {
261
+ dev_warn(kbdev->dev,
262
+ "[%llu] Wait for MCU wake up failed on forced scheduler suspend",
263
+ kbase_backend_get_cycle_cnt(kbdev));
264
+ goto out;
265
+ }
266
+
267
+ ret = suspend_active_groups_on_powerdown(kbdev, true);
268
+ if (ret)
269
+ goto out;
270
+
271
+ kbase_pm_lock(kbdev);
272
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
273
+ kbdev->pm.backend.gpu_sleep_mode_active = false;
274
+ kbdev->pm.backend.gpu_wakeup_override = false;
275
+ kbase_pm_update_state(kbdev);
276
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
277
+ ret = kbase_pm_wait_for_desired_state(kbdev);
278
+ kbase_pm_unlock(kbdev);
279
+ if (ret) {
280
+ dev_warn(kbdev->dev,
281
+ "[%llu] Wait for pm state change failed on forced scheduler suspend",
282
+ kbase_backend_get_cycle_cnt(kbdev));
283
+ goto out;
284
+ }
285
+
286
+ scheduler->state = SCHED_SUSPENDED;
287
+ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
288
+
289
+ return 0;
290
+
291
+out:
292
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
293
+ kbdev->pm.backend.exit_gpu_sleep_mode = true;
294
+ kbdev->pm.backend.gpu_wakeup_override = false;
295
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
296
+ kbase_csf_scheduler_invoke_tick(kbdev);
297
+
298
+ return ret;
299
+}
300
+#endif
301
+
302
+/**
102303 * tick_timer_callback() - Callback function for the scheduling tick hrtimer
103304 *
104
- * @timer: Pointer to the device
305
+ * @timer: Pointer to the scheduling tick hrtimer
105306 *
106307 * This function will enqueue the scheduling tick work item for immediate
107308 * execution, if it has not been queued already.
....@@ -113,7 +314,7 @@
113314 struct kbase_device *kbdev = container_of(timer, struct kbase_device,
114315 csf.scheduler.tick_timer);
115316
116
- kbase_csf_scheduler_advance_tick(kbdev);
317
+ kbase_csf_scheduler_tick_advance(kbdev);
117318 return HRTIMER_NORESTART;
118319 }
119320
....@@ -124,7 +325,7 @@
124325 *
125326 * This function will start the scheduling tick hrtimer and is supposed to
126327 * be called only from the tick work item function. The tick hrtimer should
127
- * should not be active already.
328
+ * not be active already.
128329 */
129330 static void start_tick_timer(struct kbase_device *kbdev)
130331 {
....@@ -173,14 +374,10 @@
173374 static void enqueue_tick_work(struct kbase_device *kbdev)
174375 {
175376 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
176
- unsigned long flags;
177377
178378 lockdep_assert_held(&scheduler->lock);
179379
180
- spin_lock_irqsave(&scheduler->interrupt_lock, flags);
181
- WARN_ON(scheduler->tick_timer_active);
182
- queue_work(scheduler->wq, &scheduler->tick_work);
183
- spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
380
+ kbase_csf_scheduler_invoke_tick(kbdev);
184381 }
185382
186383 static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr)
....@@ -254,7 +451,7 @@
254451 mutex_lock(&kbdev->csf.reg_lock);
255452
256453 /* If bind operation for the queue hasn't completed yet, then the
257
- * the CSI can't be programmed for the queue
454
+ * CSI can't be programmed for the queue
258455 * (even in stopped state) and so the doorbell also can't be assigned
259456 * to it.
260457 */
....@@ -288,11 +485,110 @@
288485 WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR);
289486 }
290487
291
-static u32 get_nr_active_csgs(struct kbase_device *kbdev)
488
+/**
489
+ * update_on_slot_queues_offsets - Update active queues' INSERT & EXTRACT ofs
490
+ *
491
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
492
+ *
493
+ * This function updates the EXTRACT offset for all queues which groups have
494
+ * been assigned a physical slot. These values could be used to detect a
495
+ * queue's true idleness status. This is intended to be an additional check
496
+ * on top of the GPU idle notification to account for race conditions.
497
+ * This function is supposed to be called only when GPU idle notification
498
+ * interrupt is received.
499
+ */
500
+static void update_on_slot_queues_offsets(struct kbase_device *kbdev)
501
+{
502
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
503
+ /* All CSGs have the same number of CSs */
504
+ size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num;
505
+ size_t i;
506
+
507
+ lockdep_assert_held(&scheduler->interrupt_lock);
508
+
509
+ /* csg_slots_idle_mask is not used here for the looping, as it could get
510
+ * updated concurrently when Scheduler re-evaluates the idle status of
511
+ * the CSGs for which idle notification was received previously.
512
+ */
513
+ for_each_set_bit(i, scheduler->csg_inuse_bitmap, kbdev->csf.global_iface.group_num) {
514
+ struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group;
515
+ size_t j;
516
+
517
+ if (WARN_ON(!group))
518
+ continue;
519
+
520
+ for (j = 0; j < max_streams; ++j) {
521
+ struct kbase_queue *const queue = group->bound_queues[j];
522
+
523
+ if (queue) {
524
+ if (queue->user_io_addr) {
525
+ u64 const *const output_addr =
526
+ (u64 const *)(queue->user_io_addr + PAGE_SIZE);
527
+
528
+ queue->extract_ofs =
529
+ output_addr[CS_EXTRACT_LO / sizeof(u64)];
530
+ } else {
531
+ dev_warn(kbdev->dev,
532
+ "%s(): queue->user_io_addr is NULL, queue: %p",
533
+ __func__,
534
+ queue);
535
+ }
536
+ }
537
+ }
538
+ }
539
+}
540
+
541
+static void enqueue_gpu_idle_work(struct kbase_csf_scheduler *const scheduler)
542
+{
543
+ atomic_set(&scheduler->gpu_no_longer_idle, false);
544
+ queue_work(scheduler->idle_wq, &scheduler->gpu_idle_work);
545
+}
546
+
547
+void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
548
+{
549
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
550
+ int non_idle_offslot_grps;
551
+ bool can_suspend_on_idle;
552
+
553
+ lockdep_assert_held(&kbdev->hwaccess_lock);
554
+ lockdep_assert_held(&scheduler->interrupt_lock);
555
+
556
+ non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
557
+ can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
558
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND, NULL,
559
+ ((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
560
+
561
+ if (!non_idle_offslot_grps) {
562
+ if (can_suspend_on_idle) {
563
+ /* fast_gpu_idle_handling is protected by the
564
+ * interrupt_lock, which would prevent this from being
565
+ * updated whilst gpu_idle_worker() is executing.
566
+ */
567
+ scheduler->fast_gpu_idle_handling =
568
+ (kbdev->csf.gpu_idle_hysteresis_us == 0) ||
569
+ !kbase_csf_scheduler_all_csgs_idle(kbdev);
570
+
571
+ /* The GPU idle worker relies on update_on_slot_queues_offsets() to have
572
+ * finished. It's queued before to reduce the time it takes till execution
573
+ * but it'll eventually be blocked by the scheduler->interrupt_lock.
574
+ */
575
+ enqueue_gpu_idle_work(scheduler);
576
+
577
+ /* The extract offsets are unused in fast GPU idle handling */
578
+ if (!scheduler->fast_gpu_idle_handling)
579
+ update_on_slot_queues_offsets(kbdev);
580
+ }
581
+ } else {
582
+ /* Advance the scheduling tick to get the non-idle suspended groups loaded soon */
583
+ kbase_csf_scheduler_tick_advance_nolock(kbdev);
584
+ }
585
+}
586
+
587
+u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev)
292588 {
293589 u32 nr_active_csgs;
294590
295
- lockdep_assert_held(&kbdev->csf.scheduler.lock);
591
+ lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
296592
297593 nr_active_csgs = bitmap_weight(kbdev->csf.scheduler.csg_inuse_bitmap,
298594 kbdev->csf.global_iface.group_num);
....@@ -300,27 +596,16 @@
300596 return nr_active_csgs;
301597 }
302598
303
-/**
304
- * csgs_active - returns true if any of CSG slots are in use
305
- *
306
- * @kbdev: Instance of a GPU platform device that implements a CSF interface.
307
- *
308
- * Return: the interface is actively engaged flag.
309
- */
310
-static bool csgs_active(struct kbase_device *kbdev)
599
+u32 kbase_csf_scheduler_get_nr_active_csgs(struct kbase_device *kbdev)
311600 {
312601 u32 nr_active_csgs;
602
+ unsigned long flags;
313603
314
- mutex_lock(&kbdev->csf.scheduler.lock);
315
- nr_active_csgs = get_nr_active_csgs(kbdev);
316
- mutex_unlock(&kbdev->csf.scheduler.lock);
604
+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
605
+ nr_active_csgs = kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev);
606
+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
317607
318
- /* Right now if any of the CSG interfaces are in use
319
- * then we need to assume that there is some work pending.
320
- * In future when we have IDLE notifications from firmware implemented
321
- * then we would have a better idea of the pending work.
322
- */
323
- return (nr_active_csgs != 0);
608
+ return nr_active_csgs;
324609 }
325610
326611 /**
....@@ -358,6 +643,19 @@
358643 group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE);
359644 }
360645
646
+static bool on_slot_group_idle_locked(struct kbase_queue_group *group)
647
+{
648
+ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
649
+
650
+ return (group->run_state == KBASE_CSF_GROUP_IDLE);
651
+}
652
+
653
+static bool can_schedule_idle_group(struct kbase_queue_group *group)
654
+{
655
+ return (on_slot_group_idle_locked(group) ||
656
+ (group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME));
657
+}
658
+
361659 static bool queue_group_scheduled(struct kbase_queue_group *group)
362660 {
363661 return (group->run_state != KBASE_CSF_GROUP_INACTIVE &&
....@@ -373,32 +671,43 @@
373671 }
374672
375673 /**
376
- * scheduler_wait_protm_quit() - Wait for GPU to exit protected mode.
674
+ * scheduler_protm_wait_quit() - Wait for GPU to exit protected mode.
377675 *
378676 * @kbdev: Pointer to the GPU device
379677 *
380678 * This function waits for the GPU to exit protected mode which is confirmed
381679 * when active_protm_grp is set to NULL.
680
+ *
681
+ * Return: true on success, false otherwise.
382682 */
383
-static void scheduler_wait_protm_quit(struct kbase_device *kbdev)
683
+static bool scheduler_protm_wait_quit(struct kbase_device *kbdev)
384684 {
385685 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
386686 long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
387687 long remaining;
688
+ bool success = true;
388689
389690 lockdep_assert_held(&scheduler->lock);
390691
391
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT, NULL,
392
- jiffies_to_msecs(wt));
692
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_START, NULL, jiffies_to_msecs(wt));
393693
394694 remaining = wait_event_timeout(kbdev->csf.event_wait,
395695 !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt);
396696
397
- if (!remaining)
398
- dev_warn(kbdev->dev, "Timeout, protm_quit wait skipped");
697
+ if (unlikely(!remaining)) {
698
+ struct kbase_queue_group *group = kbdev->csf.scheduler.active_protm_grp;
699
+ struct kbase_context *kctx = group ? group->kctx : NULL;
399700
400
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT_DONE, NULL,
401
- jiffies_to_msecs(remaining));
701
+ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms), protm_quit wait skipped",
702
+ kbase_backend_get_cycle_cnt(kbdev),
703
+ kbdev->csf.fw_timeout_ms);
704
+ schedule_actions_trigger_df(kbdev, kctx, DF_PROTECTED_MODE_EXIT_TIMEOUT);
705
+ success = false;
706
+ }
707
+
708
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_END, NULL, jiffies_to_msecs(remaining));
709
+
710
+ return success;
402711 }
403712
404713 /**
....@@ -408,13 +717,39 @@
408717 *
409718 * This function sends a ping request to the firmware and waits for the GPU
410719 * to exit protected mode.
720
+ *
721
+ * If the GPU does not exit protected mode, it is considered as hang.
722
+ * A GPU reset would then be triggered.
411723 */
412724 static void scheduler_force_protm_exit(struct kbase_device *kbdev)
413725 {
726
+ unsigned long flags;
727
+
414728 lockdep_assert_held(&kbdev->csf.scheduler.lock);
415729
416730 kbase_csf_firmware_ping(kbdev);
417
- scheduler_wait_protm_quit(kbdev);
731
+
732
+ if (scheduler_protm_wait_quit(kbdev))
733
+ return;
734
+
735
+ dev_err(kbdev->dev, "Possible GPU hang in Protected mode");
736
+
737
+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
738
+ if (kbdev->csf.scheduler.active_protm_grp) {
739
+ dev_err(kbdev->dev,
740
+ "Group-%d of context %d_%d ran in protected mode for too long on slot %d",
741
+ kbdev->csf.scheduler.active_protm_grp->handle,
742
+ kbdev->csf.scheduler.active_protm_grp->kctx->tgid,
743
+ kbdev->csf.scheduler.active_protm_grp->kctx->id,
744
+ kbdev->csf.scheduler.active_protm_grp->csg_nr);
745
+ }
746
+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
747
+
748
+ /* The GPU could be stuck in Protected mode. To prevent a hang,
749
+ * a GPU reset is performed.
750
+ */
751
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
752
+ kbase_reset_gpu(kbdev);
418753 }
419754
420755 /**
....@@ -435,68 +770,221 @@
435770 return kbdev->csf.scheduler.timer_enabled;
436771 }
437772
438
-static void enable_gpu_idle_fw_timer(struct kbase_device *kbdev)
773
+/**
774
+ * scheduler_pm_active_handle_suspend() - Acquire the PM reference count for
775
+ * Scheduler
776
+ *
777
+ * @kbdev: Pointer to the device
778
+ * @suspend_handler: Handler code for how to handle a suspend that might occur.
779
+ *
780
+ * This function is usually called when Scheduler needs to be activated.
781
+ * The PM reference count is acquired for the Scheduler and the power on
782
+ * of GPU is initiated.
783
+ *
784
+ * Return: 0 if successful or a negative error code on failure.
785
+ */
786
+static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
787
+ enum kbase_pm_suspend_handler suspend_handler)
439788 {
440
- struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
441789 unsigned long flags;
790
+ u32 prev_count;
791
+ int ret = 0;
442792
443
- lockdep_assert_held(&scheduler->lock);
793
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
444794
445
- if (scheduler->gpu_idle_fw_timer_enabled)
446
- return;
795
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
796
+ prev_count = kbdev->csf.scheduler.pm_active_count;
797
+ if (!WARN_ON(prev_count == U32_MAX))
798
+ kbdev->csf.scheduler.pm_active_count++;
799
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
447800
448
- spin_lock_irqsave(&scheduler->interrupt_lock, flags);
801
+ /* On 0 => 1, make a pm_ctx_active request */
802
+ if (!prev_count) {
803
+ ret = kbase_pm_context_active_handle_suspend(kbdev,
804
+ suspend_handler);
805
+ /* Invoke the PM state machines again as the change in MCU
806
+ * desired status, due to the update of scheduler.pm_active_count,
807
+ * may be missed by the thread that called pm_wait_for_desired_state()
808
+ */
809
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
810
+ if (ret)
811
+ kbdev->csf.scheduler.pm_active_count--;
812
+ kbase_pm_update_state(kbdev);
813
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
814
+ }
449815
450
- /* Update the timer_enabled flag requires holding interrupt_lock */
451
- scheduler->gpu_idle_fw_timer_enabled = true;
452
- kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
453
-
454
- spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
816
+ return ret;
455817 }
456818
457
-static void disable_gpu_idle_fw_timer_locked(struct kbase_device *kbdev)
819
+#ifdef KBASE_PM_RUNTIME
820
+/**
821
+ * scheduler_pm_active_after_sleep() - Acquire the PM reference count for
822
+ * Scheduler
823
+ *
824
+ * @kbdev: Pointer to the device
825
+ * @flags: Pointer to the flags variable containing the interrupt state
826
+ * when hwaccess lock was acquired.
827
+ *
828
+ * This function is called when Scheduler needs to be activated from the
829
+ * sleeping state.
830
+ * The PM reference count is acquired for the Scheduler and the wake up of
831
+ * MCU is initiated. It resets the flag that indicates to the MCU state
832
+ * machine that MCU needs to be put in sleep state.
833
+ *
834
+ * Note: This function shall be called with hwaccess lock held and it may
835
+ * release that lock and reacquire it.
836
+ *
837
+ * Return: zero when the PM reference was taken and non-zero when the
838
+ * system is being suspending/suspended.
839
+ */
840
+static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev,
841
+ unsigned long *flags)
458842 {
459
- struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
843
+ u32 prev_count;
844
+ int ret = 0;
460845
461
- lockdep_assert_held(&scheduler->lock);
462
- lockdep_assert_held(&scheduler->interrupt_lock);
846
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
847
+ lockdep_assert_held(&kbdev->hwaccess_lock);
463848
464
- /* Update of the timer_enabled flag requires holding interrupt_lock */
465
- if (scheduler->gpu_idle_fw_timer_enabled) {
466
- scheduler->gpu_idle_fw_timer_enabled = false;
467
- kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
849
+ prev_count = kbdev->csf.scheduler.pm_active_count;
850
+ if (!WARN_ON(prev_count == U32_MAX))
851
+ kbdev->csf.scheduler.pm_active_count++;
852
+
853
+ /* On 0 => 1, make a pm_ctx_active request */
854
+ if (!prev_count) {
855
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, *flags);
856
+
857
+ ret = kbase_pm_context_active_handle_suspend(kbdev,
858
+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
859
+
860
+ spin_lock_irqsave(&kbdev->hwaccess_lock, *flags);
861
+ if (ret)
862
+ kbdev->csf.scheduler.pm_active_count--;
863
+ else
864
+ kbdev->pm.backend.gpu_sleep_mode_active = false;
865
+ kbase_pm_update_state(kbdev);
866
+ }
867
+
868
+ return ret;
869
+}
870
+#endif
871
+
872
+/**
873
+ * scheduler_pm_idle() - Release the PM reference count held by Scheduler
874
+ *
875
+ * @kbdev: Pointer to the device
876
+ *
877
+ * This function is usually called after Scheduler is suspended.
878
+ * The PM reference count held by the Scheduler is released to trigger the
879
+ * power down of GPU.
880
+ */
881
+static void scheduler_pm_idle(struct kbase_device *kbdev)
882
+{
883
+ unsigned long flags;
884
+ u32 prev_count;
885
+
886
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
887
+
888
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
889
+ prev_count = kbdev->csf.scheduler.pm_active_count;
890
+ if (!WARN_ON(prev_count == 0))
891
+ kbdev->csf.scheduler.pm_active_count--;
892
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
893
+
894
+ if (prev_count == 1) {
895
+ kbase_pm_context_idle(kbdev);
896
+ /* Invoke the PM state machines again as the change in MCU
897
+ * desired status, due to the update of scheduler.pm_active_count,
898
+ * may be missed by the thread that called pm_wait_for_desired_state()
899
+ */
900
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
901
+ kbase_pm_update_state(kbdev);
902
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
468903 }
469904 }
470905
471
-static void disable_gpu_idle_fw_timer(struct kbase_device *kbdev)
906
+#ifdef KBASE_PM_RUNTIME
907
+/**
908
+ * scheduler_pm_idle_before_sleep() - Release the PM reference count and
909
+ * trigger the tranistion to sleep state.
910
+ *
911
+ * @kbdev: Pointer to the device
912
+ *
913
+ * This function is called on the GPU idle notification. It releases the
914
+ * Scheduler's PM reference count and sets the flag to indicate to the
915
+ * MCU state machine that MCU needs to be put in sleep state.
916
+ */
917
+static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev)
472918 {
473
- struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
474919 unsigned long flags;
920
+ u32 prev_count;
475921
476
- lockdep_assert_held(&scheduler->lock);
922
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
477923
478
- if (!scheduler->gpu_idle_fw_timer_enabled)
479
- return;
924
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
925
+ prev_count = kbdev->csf.scheduler.pm_active_count;
926
+ if (!WARN_ON(prev_count == 0))
927
+ kbdev->csf.scheduler.pm_active_count--;
928
+ kbdev->pm.backend.gpu_sleep_mode_active = true;
929
+ kbdev->pm.backend.exit_gpu_sleep_mode = false;
930
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
480931
481
- spin_lock_irqsave(&scheduler->interrupt_lock, flags);
482
- disable_gpu_idle_fw_timer_locked(kbdev);
483
- spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
932
+ if (prev_count == 1) {
933
+ kbase_pm_context_idle(kbdev);
934
+ /* Invoke the PM state machines again as the change in MCU
935
+ * desired status, due to the update of scheduler.pm_active_count,
936
+ * may be missed by the thread that called pm_wait_for_desired_state()
937
+ */
938
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
939
+ kbase_pm_update_state(kbdev);
940
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
941
+ }
484942 }
943
+#endif
485944
486945 static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
487946 {
488947 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
948
+ int ret;
489949
490950 lockdep_assert_held(&scheduler->lock);
491951
492
- if (scheduler->state == SCHED_SUSPENDED) {
493
- dev_dbg(kbdev->dev, "Re-activating the Scheduler");
494
- kbase_csf_scheduler_pm_active(kbdev);
495
- scheduler->state = SCHED_INACTIVE;
952
+ if ((scheduler->state != SCHED_SUSPENDED) &&
953
+ (scheduler->state != SCHED_SLEEPING))
954
+ return;
496955
497
- if (kick)
498
- scheduler_enable_tick_timer_nolock(kbdev);
956
+ if (scheduler->state == SCHED_SUSPENDED) {
957
+ dev_dbg(kbdev->dev,
958
+ "Re-activating the Scheduler after suspend");
959
+ ret = scheduler_pm_active_handle_suspend(kbdev,
960
+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
961
+ } else {
962
+#ifdef KBASE_PM_RUNTIME
963
+ unsigned long flags;
964
+
965
+ dev_dbg(kbdev->dev,
966
+ "Re-activating the Scheduler out of sleep");
967
+
968
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
969
+ ret = scheduler_pm_active_after_sleep(kbdev, &flags);
970
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
971
+#endif
499972 }
973
+
974
+ if (ret) {
975
+ /* GPUCORE-29850 would add the handling for the case where
976
+ * Scheduler could not be activated due to system suspend.
977
+ */
978
+ dev_info(kbdev->dev,
979
+ "Couldn't wakeup Scheduler due to system suspend");
980
+ return;
981
+ }
982
+
983
+ scheduler->state = SCHED_INACTIVE;
984
+ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
985
+
986
+ if (kick)
987
+ scheduler_enable_tick_timer_nolock(kbdev);
500988 }
501989
502990 static void scheduler_suspend(struct kbase_device *kbdev)
....@@ -507,8 +995,9 @@
507995
508996 if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) {
509997 dev_dbg(kbdev->dev, "Suspending the Scheduler");
510
- kbase_csf_scheduler_pm_idle(kbdev);
998
+ scheduler_pm_idle(kbdev);
511999 scheduler->state = SCHED_SUSPENDED;
1000
+ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
5121001 }
5131002 }
5141003
....@@ -539,20 +1028,41 @@
5391028 KBASE_CSF_GROUP_SUSPENDED);
5401029 } else if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) {
5411030 group->run_state = KBASE_CSF_GROUP_SUSPENDED;
1031
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED, group,
1032
+ group->run_state);
5421033
5431034 /* If scheduler is not suspended and the given group's
5441035 * static priority (reflected by the scan_seq_num) is inside
545
- * the current tick slot-range, schedules an async tock.
1036
+ * the current tick slot-range, or there are some on_slot
1037
+ * idle groups, schedule an async tock.
5461038 */
547
- if (scheduler->state != SCHED_SUSPENDED &&
548
- group->scan_seq_num < scheduler->num_csg_slots_for_tick)
549
- schedule_in_cycle(group, true);
1039
+ if (scheduler->state != SCHED_SUSPENDED) {
1040
+ unsigned long flags;
1041
+ int n_idle;
1042
+ int n_used;
1043
+ int n_slots =
1044
+ group->kctx->kbdev->csf.global_iface.group_num;
1045
+
1046
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
1047
+ n_idle = bitmap_weight(scheduler->csg_slots_idle_mask,
1048
+ n_slots);
1049
+ n_used = bitmap_weight(scheduler->csg_inuse_bitmap,
1050
+ n_slots);
1051
+ spin_unlock_irqrestore(&scheduler->interrupt_lock,
1052
+ flags);
1053
+
1054
+ if (n_idle ||
1055
+ n_used < scheduler->num_csg_slots_for_tick ||
1056
+ group->scan_seq_num <
1057
+ scheduler->num_csg_slots_for_tick)
1058
+ schedule_in_cycle(group, true);
1059
+ }
5501060 } else
5511061 return;
5521062
5531063 new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps);
554
- KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
555
- group, new_val);
1064
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group,
1065
+ new_val);
5561066 }
5571067
5581068 int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group)
....@@ -586,6 +1096,14 @@
5861096 return slot_num;
5871097 }
5881098
1099
+/* kbasep_csf_scheduler_group_is_on_slot_locked() - Check if CSG is on slot.
1100
+ *
1101
+ * @group: GPU queue group to be checked
1102
+ *
1103
+ * This function needs to be called with scheduler's lock held
1104
+ *
1105
+ * Return: true if @group is on slot.
1106
+ */
5891107 static bool kbasep_csf_scheduler_group_is_on_slot_locked(
5901108 struct kbase_queue_group *group)
5911109 {
....@@ -636,6 +1154,7 @@
6361154 struct kbase_csf_cmd_stream_info *stream;
6371155 int csi_index = queue->csi_index;
6381156 long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
1157
+ unsigned long flags;
6391158
6401159 if (WARN_ON(!group) ||
6411160 WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
....@@ -653,10 +1172,12 @@
6531172 == CS_ACK_STATE_START), remaining);
6541173
6551174 if (!remaining) {
656
- dev_warn(kbdev->dev, "Timed out waiting for queue to start on csi %d bound to group %d on slot %d",
1175
+ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to start on csi %d bound to group %d on slot %d",
1176
+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
6571177 csi_index, group->handle, group->csg_nr);
6581178 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
6591179 kbase_reset_gpu(kbdev);
1180
+
6601181
6611182 return -ETIMEDOUT;
6621183 }
....@@ -665,12 +1186,15 @@
6651186 kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
6661187 }
6671188
1189
+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
6681190 /* Set state to STOP */
6691191 kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP,
6701192 CS_REQ_STATE_MASK);
6711193
672
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQUESTED, group, queue, 0u);
6731194 kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true);
1195
+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
1196
+
1197
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQ, group, queue, 0u);
6741198
6751199 /* Timed wait */
6761200 remaining = wait_event_timeout(kbdev->csf.event_wait,
....@@ -678,7 +1202,8 @@
6781202 == CS_ACK_STATE_STOP), remaining);
6791203
6801204 if (!remaining) {
681
- dev_warn(kbdev->dev, "Timed out waiting for queue to stop on csi %d bound to group %d on slot %d",
1205
+ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to stop on csi %d bound to group %d on slot %d",
1206
+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
6821207 queue->csi_index, group->handle, group->csg_nr);
6831208
6841209 /* TODO GPUCORE-25328: The CSG can't be terminated, the GPU
....@@ -686,6 +1211,8 @@
6861211 */
6871212 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
6881213 kbase_reset_gpu(kbdev);
1214
+
1215
+
6891216 }
6901217 return (remaining) ? 0 : -ETIMEDOUT;
6911218 }
....@@ -739,6 +1266,7 @@
7391266 long remaining;
7401267 int slot;
7411268 int err = 0;
1269
+ const u32 group_schedule_timeout = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT);
7421270
7431271 if (WARN_ON(!group))
7441272 return -EINVAL;
....@@ -782,8 +1310,7 @@
7821310 */
7831311 remaining = wait_event_timeout(
7841312 kbdev->csf.event_wait, can_halt_stream(kbdev, group),
785
- kbase_csf_timeout_in_jiffies(
786
- 20 * kbdev->csf.scheduler.csg_scheduling_period_ms));
1313
+ kbase_csf_timeout_in_jiffies(group_schedule_timeout));
7871314
7881315 mutex_lock(&scheduler->lock);
7891316
....@@ -845,24 +1372,60 @@
8451372 kbase_csf_firmware_cs_output(
8461373 stream, CS_ACK)) ==
8471374 CS_ACK_STATE_STOP),
848
- kbdev->csf.fw_timeout_ms);
1375
+ kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms));
8491376
8501377 if (!remaining) {
8511378 dev_warn(kbdev->dev,
852
- "Timed out waiting for queue stop ack on csi %d bound to group %d on slot %d",
1379
+ "[%llu] Timeout (%d ms) waiting for queue stop ack on csi %d bound to group %d on slot %d",
1380
+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
8531381 queue->csi_index,
8541382 group->handle, group->csg_nr);
1383
+
1384
+
8551385 err = -ETIMEDOUT;
8561386 }
8571387 }
8581388 }
8591389 } else if (!remaining) {
860
- dev_warn(kbdev->dev, "Group-%d failed to get a slot for stopping the queue on csi %d",
861
- group->handle, queue->csi_index);
1390
+ dev_warn(kbdev->dev, "[%llu] Group-%d failed to get a slot for stopping the queue on csi %d (timeout %d ms)",
1391
+ kbase_backend_get_cycle_cnt(kbdev),
1392
+ group->handle, queue->csi_index,
1393
+ group_schedule_timeout);
1394
+
1395
+
8621396 err = -ETIMEDOUT;
8631397 }
8641398
8651399 return err;
1400
+}
1401
+
1402
+/**
1403
+ * scheduler_activate_on_queue_stop() - Activate the Scheduler when the GPU
1404
+ * queue needs to be stopped.
1405
+ *
1406
+ * @queue: Pointer the GPU command queue
1407
+ *
1408
+ * This function is called when the CSI to which GPU queue is bound needs to
1409
+ * be stopped. For that the corresponding queue group needs to be resident on
1410
+ * the CSG slot and MCU firmware should be running. So this function makes the
1411
+ * Scheduler exit the sleeping or suspended state.
1412
+ */
1413
+static void scheduler_activate_on_queue_stop(struct kbase_queue *queue)
1414
+{
1415
+ struct kbase_device *kbdev = queue->kctx->kbdev;
1416
+
1417
+ scheduler_wakeup(kbdev, true);
1418
+
1419
+ /* Wait for MCU firmware to start running */
1420
+ if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
1421
+ dev_warn(
1422
+ kbdev->dev,
1423
+ "[%llu] Wait for MCU active failed for stopping queue on csi %d bound to group %d of context %d_%d on slot %d",
1424
+ kbase_backend_get_cycle_cnt(kbdev),
1425
+ queue->csi_index, queue->group->handle,
1426
+ queue->kctx->tgid, queue->kctx->id,
1427
+ queue->group->csg_nr);
1428
+ }
8661429 }
8671430
8681431 int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue)
....@@ -890,7 +1453,7 @@
8901453 /* Since the group needs to be resumed in order to stop the queue,
8911454 * check if GPU needs to be powered up.
8921455 */
893
- scheduler_wakeup(kbdev, true);
1456
+ scheduler_activate_on_queue_stop(queue);
8941457
8951458 if ((slot >= 0) &&
8961459 (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING))
....@@ -899,16 +1462,26 @@
8991462 err = sched_halt_stream(queue);
9001463
9011464 unassign_user_doorbell_from_queue(kbdev, queue);
1465
+ kbase_csf_mcu_shared_drop_stopped_queue(kbdev, queue);
9021466 }
9031467
9041468 mutex_unlock(&kbdev->csf.scheduler.lock);
1469
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_STOP, group, queue, group->run_state);
9051470 return err;
9061471 }
9071472
9081473 static void update_hw_active(struct kbase_queue *queue, bool active)
9091474 {
1475
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
1476
+ if (queue && queue->enabled) {
1477
+ u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
1478
+
1479
+ output_addr[CS_ACTIVE / sizeof(u32)] = active;
1480
+ }
1481
+#else
9101482 CSTD_UNUSED(queue);
9111483 CSTD_UNUSED(active);
1484
+#endif
9121485 }
9131486
9141487 static void program_cs_extract_init(struct kbase_queue *queue)
....@@ -971,6 +1544,7 @@
9711544 struct kbase_csf_cmd_stream_group_info *ginfo;
9721545 struct kbase_csf_cmd_stream_info *stream;
9731546 int csi_index = queue->csi_index;
1547
+ unsigned long flags;
9741548 u64 user_input;
9751549 u64 user_output;
9761550
....@@ -988,11 +1562,13 @@
9881562 WARN_ON(csi_index >= ginfo->stream_num))
9891563 return;
9901564
991
- assign_user_doorbell_to_queue(kbdev, queue);
992
- if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
993
- return;
1565
+ if (queue->enabled) {
1566
+ assign_user_doorbell_to_queue(kbdev, queue);
1567
+ if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
1568
+ return;
9941569
995
- WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
1570
+ WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
1571
+ }
9961572
9971573 if (queue->enabled && queue_group_suspended_locked(group))
9981574 program_cs_extract_init(queue);
....@@ -1006,17 +1582,15 @@
10061582 kbase_csf_firmware_cs_input(stream, CS_SIZE,
10071583 queue->size);
10081584
1009
- user_input = (queue->reg->start_pfn << PAGE_SHIFT);
1010
- kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO,
1011
- user_input & 0xFFFFFFFF);
1012
- kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI,
1013
- user_input >> 32);
1585
+ user_input = queue->user_io_gpu_va;
1586
+ WARN_ONCE(!user_input && queue->enabled, "Enabled queue should have a valid gpu_va");
10141587
1015
- user_output = ((queue->reg->start_pfn + 1) << PAGE_SHIFT);
1016
- kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO,
1017
- user_output & 0xFFFFFFFF);
1018
- kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI,
1019
- user_output >> 32);
1588
+ kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, user_input & 0xFFFFFFFF);
1589
+ kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, user_input >> 32);
1590
+
1591
+ user_output = user_input + PAGE_SIZE;
1592
+ kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, user_output & 0xFFFFFFFF);
1593
+ kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, user_output >> 32);
10201594
10211595 kbase_csf_firmware_cs_input(stream, CS_CONFIG,
10221596 (queue->doorbell_nr << 8) | (queue->priority & 0xF));
....@@ -1027,25 +1601,56 @@
10271601 /* Enable all interrupts for now */
10281602 kbase_csf_firmware_cs_input(stream, CS_ACK_IRQ_MASK, ~((u32)0));
10291603
1604
+ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
1605
+
1606
+ /* The fault bit could be misaligned between CS_REQ and CS_ACK if the
1607
+ * acknowledgment was deferred due to dump on fault and the group was
1608
+ * removed from the CSG slot before the fault could be acknowledged.
1609
+ */
1610
+ if (queue->enabled) {
1611
+ u32 const cs_ack =
1612
+ kbase_csf_firmware_cs_output(stream, CS_ACK);
1613
+
1614
+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
1615
+ CS_REQ_FAULT_MASK);
1616
+ }
1617
+
10301618 /*
10311619 * Enable the CSG idle notification once the CS's ringbuffer
10321620 * becomes empty or the CS becomes sync_idle, waiting sync update
10331621 * or protected mode switch.
10341622 */
10351623 kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
1036
- CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK,
1037
- CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK);
1624
+ CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK |
1625
+ CS_REQ_IDLE_SHARED_SB_DEC_MASK,
1626
+ CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK |
1627
+ CS_REQ_IDLE_SHARED_SB_DEC_MASK);
10381628
10391629 /* Set state to START/STOP */
10401630 kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
10411631 queue->enabled ? CS_REQ_STATE_START : CS_REQ_STATE_STOP,
10421632 CS_REQ_STATE_MASK);
1633
+ kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr,
1634
+ ring_csg_doorbell);
1635
+ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
10431636
10441637 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_START, group, queue, queue->enabled);
10451638
1046
- kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr,
1047
- ring_csg_doorbell);
10481639 update_hw_active(queue, true);
1640
+}
1641
+
1642
+static int onslot_csg_add_new_queue(struct kbase_queue *queue)
1643
+{
1644
+ struct kbase_device *kbdev = queue->kctx->kbdev;
1645
+ int err;
1646
+
1647
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
1648
+
1649
+ err = kbase_csf_mcu_shared_add_queue(kbdev, queue);
1650
+ if (!err)
1651
+ program_cs(kbdev, queue, true);
1652
+
1653
+ return err;
10491654 }
10501655
10511656 int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
....@@ -1064,10 +1669,17 @@
10641669
10651670 mutex_lock(&kbdev->csf.scheduler.lock);
10661671
1672
+#if IS_ENABLED(CONFIG_DEBUG_FS)
1673
+ if (unlikely(kbdev->csf.scheduler.state == SCHED_BUSY)) {
1674
+ mutex_unlock(&kbdev->csf.scheduler.lock);
1675
+ return -EBUSY;
1676
+ }
1677
+#endif
1678
+
10671679 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue,
10681680 group->run_state);
1069
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT, queue->group,
1070
- queue, queue->status_wait);
1681
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue,
1682
+ queue->status_wait);
10711683
10721684 if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) {
10731685 err = -EIO;
....@@ -1096,12 +1708,32 @@
10961708 * user door-bell on such a case.
10971709 */
10981710 kbase_csf_ring_cs_user_doorbell(kbdev, queue);
1099
- } else
1100
- program_cs(kbdev, queue, true);
1711
+ } else {
1712
+ err = onslot_csg_add_new_queue(queue);
1713
+ /* For an on slot CSG, the only error in adding a new
1714
+ * queue to run is that the scheduler could not map
1715
+ * the required userio pages due to likely some resource
1716
+ * issues. In such a case, and if the group is yet
1717
+ * to enter its fatal error state, we return a -EBUSY
1718
+ * to the submitter for another kick. The queue itself
1719
+ * has yet to be programmed hence needs to remain its
1720
+ * previous (disabled) state. If the error persists,
1721
+ * the group will eventually reports a fatal error by
1722
+ * the group's error reporting mechanism, when the MCU
1723
+ * shared region map retry limit of the group is
1724
+ * exceeded. For such a case, the expected error value
1725
+ * is -EIO.
1726
+ */
1727
+ if (unlikely(err)) {
1728
+ queue->enabled = cs_enabled;
1729
+ mutex_unlock(&kbdev->csf.scheduler.lock);
1730
+ return (err != -EIO) ? -EBUSY : err;
1731
+ }
1732
+ }
11011733 }
1102
- queue_delayed_work(system_long_wq,
1103
- &kbdev->csf.scheduler.ping_work,
1104
- msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS));
1734
+ queue_delayed_work(system_long_wq, &kbdev->csf.scheduler.ping_work,
1735
+ msecs_to_jiffies(kbase_get_timeout_ms(
1736
+ kbdev, CSF_FIRMWARE_PING_TIMEOUT)));
11051737 }
11061738 }
11071739
....@@ -1136,7 +1768,8 @@
11361768 slot_state = CSG_SLOT_RUNNING;
11371769 atomic_set(&csg_slot->state, slot_state);
11381770 csg_slot->trigger_jiffies = jiffies;
1139
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STARTED, csg_slot->resident_group, state);
1771
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_RUNNING, csg_slot->resident_group,
1772
+ state);
11401773 dev_dbg(kbdev->dev, "Group %u running on slot %d\n",
11411774 csg_slot->resident_group->handle, slot);
11421775 }
....@@ -1228,13 +1861,16 @@
12281861 csg_slot_running(kbdev, slot), remaining);
12291862 if (!remaining)
12301863 dev_warn(kbdev->dev,
1231
- "slot %d timed out on up-running\n", slot);
1864
+ "[%llu] slot %d timeout (%d ms) on up-running\n",
1865
+ kbase_backend_get_cycle_cnt(kbdev),
1866
+ slot, kbdev->csf.fw_timeout_ms);
12321867 }
12331868
12341869 if (csg_slot_running(kbdev, slot)) {
12351870 unsigned long flags;
12361871 struct kbase_csf_cmd_stream_group_info *ginfo =
12371872 &global_iface->groups[slot];
1873
+
12381874 u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND :
12391875 CSG_REQ_STATE_TERMINATE;
12401876
....@@ -1245,13 +1881,15 @@
12451881 /* Set state to SUSPEND/TERMINATE */
12461882 kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, halt_cmd,
12471883 CSG_REQ_STATE_MASK);
1884
+ kbase_csf_ring_csg_doorbell(kbdev, slot);
12481885 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock,
12491886 flags);
12501887 atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP);
12511888 csg_slot[slot].trigger_jiffies = jiffies;
1252
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP, group, halt_cmd);
1889
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd);
12531890
1254
- kbase_csf_ring_csg_doorbell(kbdev, slot);
1891
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG(
1892
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot, suspend);
12551893 }
12561894 }
12571895
....@@ -1265,6 +1903,31 @@
12651903 halt_csg_slot(group, true);
12661904 }
12671905
1906
+static bool csf_wait_ge_condition_supported(struct kbase_device *kbdev)
1907
+{
1908
+ const uint32_t glb_major = GLB_VERSION_MAJOR_GET(kbdev->csf.global_iface.version);
1909
+ const uint32_t glb_minor = GLB_VERSION_MINOR_GET(kbdev->csf.global_iface.version);
1910
+
1911
+ switch (glb_major) {
1912
+ case 0:
1913
+ break;
1914
+ case 1:
1915
+ if (glb_minor >= 4)
1916
+ return true;
1917
+ break;
1918
+ case 2:
1919
+ if (glb_minor >= 6)
1920
+ return true;
1921
+ break;
1922
+ case 3:
1923
+ if (glb_minor >= 6)
1924
+ return true;
1925
+ break;
1926
+ default:
1927
+ return true;
1928
+ }
1929
+ return false;
1930
+}
12681931 /**
12691932 * evaluate_sync_update() - Evaluate the sync wait condition the GPU command
12701933 * queue has been blocked on.
....@@ -1278,23 +1941,38 @@
12781941 struct kbase_vmap_struct *mapping;
12791942 bool updated = false;
12801943 u32 *sync_ptr;
1944
+ u32 sync_wait_size;
1945
+ u32 sync_wait_align_mask;
12811946 u32 sync_wait_cond;
12821947 u32 sync_current_val;
12831948 struct kbase_device *kbdev;
1949
+ bool sync_wait_align_valid = false;
1950
+ bool sync_wait_cond_valid = false;
12841951
12851952 if (WARN_ON(!queue))
12861953 return false;
12871954
12881955 kbdev = queue->kctx->kbdev;
1956
+
12891957 lockdep_assert_held(&kbdev->csf.scheduler.lock);
1958
+
1959
+ sync_wait_size = CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(queue->status_wait);
1960
+ sync_wait_align_mask =
1961
+ (sync_wait_size == 0 ? BASEP_EVENT32_ALIGN_BYTES : BASEP_EVENT64_ALIGN_BYTES) - 1;
1962
+ sync_wait_align_valid = ((uintptr_t)queue->sync_ptr & sync_wait_align_mask) == 0;
1963
+ if (!sync_wait_align_valid) {
1964
+ dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX is misaligned",
1965
+ queue->sync_ptr);
1966
+ goto out;
1967
+ }
12901968
12911969 sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr,
12921970 &mapping);
12931971
1294
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE, queue->group,
1295
- queue, queue->sync_ptr);
1296
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_BLOCKED_REASON,
1297
- queue->group, queue, queue->blocked_reason);
1972
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_START, queue->group, queue,
1973
+ queue->sync_ptr);
1974
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_BLOCKED_REASON, queue->group, queue,
1975
+ queue->blocked_reason);
12981976
12991977 if (!sync_ptr) {
13001978 dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX already freed",
....@@ -1304,19 +1982,24 @@
13041982
13051983 sync_wait_cond =
13061984 CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(queue->status_wait);
1985
+ sync_wait_cond_valid = (sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) ||
1986
+ (sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) ||
1987
+ ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE) &&
1988
+ csf_wait_ge_condition_supported(kbdev));
13071989
1308
- WARN_ON((sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) &&
1309
- (sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE));
1990
+ WARN_ON(!sync_wait_cond_valid);
13101991
13111992 sync_current_val = READ_ONCE(*sync_ptr);
1312
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_CURRENT_VAL, queue->group,
1313
- queue, sync_current_val);
1993
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_CUR_VAL, queue->group, queue,
1994
+ sync_current_val);
13141995
1315
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_TEST_VAL, queue->group,
1316
- queue, queue->sync_value);
1996
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_TEST_VAL, queue->group, queue,
1997
+ queue->sync_value);
13171998
13181999 if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) &&
13192000 (sync_current_val > queue->sync_value)) ||
2001
+ ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE) &&
2002
+ (sync_current_val >= queue->sync_value) && csf_wait_ge_condition_supported(kbdev)) ||
13202003 ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) &&
13212004 (sync_current_val <= queue->sync_value))) {
13222005 /* The sync wait condition is satisfied so the group to which
....@@ -1330,8 +2013,7 @@
13302013
13312014 kbase_phy_alloc_mapping_put(queue->kctx, mapping);
13322015 out:
1333
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVALUATED,
1334
- queue->group, queue, updated);
2016
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_END, queue->group, queue, updated);
13352017 return updated;
13362018 }
13372019
....@@ -1358,10 +2040,17 @@
13582040 u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT);
13592041 bool is_waiting = false;
13602042
1361
- KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_STATUS_WAIT,
1362
- queue->group, queue, status);
2043
+#if IS_ENABLED(CONFIG_DEBUG_FS)
2044
+ u64 cmd_ptr = kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_LO);
13632045
1364
- if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) {
2046
+ cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_HI) << 32;
2047
+ queue->saved_cmd_ptr = cmd_ptr;
2048
+#endif
2049
+
2050
+ KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group,
2051
+ queue, status);
2052
+
2053
+ if (CS_STATUS_WAIT_SYNC_WAIT_GET(status) || CS_STATUS_WAIT_SB_MASK_GET(status)) {
13652054 queue->status_wait = status;
13662055 queue->sync_ptr = kbase_csf_firmware_cs_output(stream,
13672056 CS_STATUS_WAIT_SYNC_POINTER_LO);
....@@ -1377,7 +2066,8 @@
13772066 kbase_csf_firmware_cs_output(stream,
13782067 CS_STATUS_BLOCKED_REASON));
13792068
1380
- if (!evaluate_sync_update(queue)) {
2069
+ if ((queue->blocked_reason == CS_STATUS_BLOCKED_ON_SB_WAIT) ||
2070
+ !evaluate_sync_update(queue)) {
13812071 is_waiting = true;
13822072 } else {
13832073 /* Sync object already got updated & met the condition
....@@ -1399,37 +2089,6 @@
13992089 return is_waiting;
14002090 }
14012091
1402
-/**
1403
- * Calculate how far in the future an event should be scheduled.
1404
- *
1405
- * The objective of this function is making sure that a minimum period of
1406
- * time is guaranteed between handling two consecutive events.
1407
- *
1408
- * This function guarantees a minimum period of time between two consecutive
1409
- * events: given the minimum period and the distance between the current time
1410
- * and the last event, the function returns the difference between the two.
1411
- * However, if more time than the minimum period has already elapsed
1412
- * since the last event, the function will return 0 to schedule work to handle
1413
- * the event with the lowest latency possible.
1414
- *
1415
- * @last_event: Timestamp of the last event, in jiffies.
1416
- * @time_now: Timestamp of the new event to handle, in jiffies.
1417
- * Must be successive to last_event.
1418
- * @period: Minimum period between two events, in jiffies.
1419
- *
1420
- * Return: Time to delay work to handle the current event, in jiffies
1421
- */
1422
-static unsigned long get_schedule_delay(unsigned long last_event,
1423
- unsigned long time_now,
1424
- unsigned long period)
1425
-{
1426
- const unsigned long t_distance = time_now - last_event;
1427
- const unsigned long delay_t = (t_distance < period) ?
1428
- (period - t_distance) : 0;
1429
-
1430
- return delay_t;
1431
-}
1432
-
14332092 static void schedule_in_cycle(struct kbase_queue_group *group, bool force)
14342093 {
14352094 struct kbase_context *kctx = group->kctx;
....@@ -1444,15 +2103,48 @@
14442103 * of work needs to be enforced in situation such as entering into
14452104 * protected mode).
14462105 */
1447
- if ((likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) &&
1448
- !scheduler->tock_pending_request) {
1449
- const unsigned long delay =
1450
- get_schedule_delay(scheduler->last_schedule, jiffies,
1451
- CSF_SCHEDULER_TIME_TOCK_JIFFIES);
1452
- scheduler->tock_pending_request = true;
2106
+ if (likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) {
14532107 dev_dbg(kbdev->dev, "Kicking async for group %d\n",
14542108 group->handle);
1455
- mod_delayed_work(scheduler->wq, &scheduler->tock_work, delay);
2109
+ kbase_csf_scheduler_invoke_tock(kbdev);
2110
+ }
2111
+}
2112
+
2113
+static void ktrace_log_group_state(struct kbase_queue_group *const group)
2114
+{
2115
+ switch (group->run_state) {
2116
+ case KBASE_CSF_GROUP_INACTIVE:
2117
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group,
2118
+ group->run_state);
2119
+ break;
2120
+ case KBASE_CSF_GROUP_RUNNABLE:
2121
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_RUNNABLE, group,
2122
+ group->run_state);
2123
+ break;
2124
+ case KBASE_CSF_GROUP_IDLE:
2125
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_IDLE, group,
2126
+ group->run_state);
2127
+ break;
2128
+ case KBASE_CSF_GROUP_SUSPENDED:
2129
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED, group,
2130
+ group->run_state);
2131
+ break;
2132
+ case KBASE_CSF_GROUP_SUSPENDED_ON_IDLE:
2133
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED_ON_IDLE, group,
2134
+ group->run_state);
2135
+ break;
2136
+ case KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC:
2137
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED_ON_WAIT_SYNC,
2138
+ group, group->run_state);
2139
+ break;
2140
+ case KBASE_CSF_GROUP_FAULT_EVICTED:
2141
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_FAULT_EVICTED, group,
2142
+ group->run_state);
2143
+ break;
2144
+ case KBASE_CSF_GROUP_TERMINATED:
2145
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group,
2146
+ group->run_state);
2147
+ break;
14562148 }
14572149 }
14582150
....@@ -1473,13 +2165,15 @@
14732165
14742166 group->run_state = run_state;
14752167
2168
+ ktrace_log_group_state(group);
2169
+
14762170 if (run_state == KBASE_CSF_GROUP_RUNNABLE)
14772171 group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID;
14782172
14792173 list_add_tail(&group->link,
14802174 &kctx->csf.sched.runnable_groups[group->priority]);
14812175 kctx->csf.sched.num_runnable_grps++;
1482
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_INSERT_RUNNABLE, group,
2176
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_INSERT, group,
14832177 kctx->csf.sched.num_runnable_grps);
14842178
14852179 /* Add the kctx if not yet in runnable kctxs */
....@@ -1487,14 +2181,15 @@
14872181 /* First runnable csg, adds to the runnable_kctxs */
14882182 INIT_LIST_HEAD(&kctx->csf.link);
14892183 list_add_tail(&kctx->csf.link, &scheduler->runnable_kctxs);
1490
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_INSERT_RUNNABLE, kctx, 0u);
2184
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_INSERT, kctx, 0u);
14912185 }
14922186
14932187 scheduler->total_runnable_grps++;
14942188
14952189 if (likely(scheduler_timer_is_enabled_nolock(kbdev)) &&
14962190 (scheduler->total_runnable_grps == 1 ||
1497
- scheduler->state == SCHED_SUSPENDED)) {
2191
+ scheduler->state == SCHED_SUSPENDED ||
2192
+ scheduler->state == SCHED_SLEEPING)) {
14982193 dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n");
14992194 /* Fire a scheduling to start the time-slice */
15002195 enqueue_tick_work(kbdev);
....@@ -1516,13 +2211,41 @@
15162211 struct kbase_queue_group *new_head_grp;
15172212 struct list_head *list =
15182213 &kctx->csf.sched.runnable_groups[group->priority];
2214
+ unsigned long flags;
15192215
15202216 lockdep_assert_held(&scheduler->lock);
15212217
15222218 WARN_ON(!queue_group_scheduled_locked(group));
15232219
15242220 group->run_state = run_state;
2221
+
2222
+ ktrace_log_group_state(group);
2223
+
15252224 list_del_init(&group->link);
2225
+
2226
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
2227
+ /* The below condition will be true when the group running in protected
2228
+ * mode is being terminated but the protected mode exit interrupt was't
2229
+ * received. This can happen if the FW got stuck during protected mode
2230
+ * for some reason (like GPU page fault or some internal error).
2231
+ * In normal cases FW is expected to send the protected mode exit
2232
+ * interrupt before it handles the CSG termination request.
2233
+ */
2234
+ if (unlikely(scheduler->active_protm_grp == group)) {
2235
+ /* CSG slot cleanup should have happened for the pmode group */
2236
+ WARN_ON(kbasep_csf_scheduler_group_is_on_slot_locked(group));
2237
+ WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
2238
+ /* Initiate a GPU reset, in case it wasn't initiated yet,
2239
+ * in order to rectify the anomaly.
2240
+ */
2241
+ if (kbase_prepare_to_reset_gpu(kctx->kbdev, RESET_FLAGS_NONE))
2242
+ kbase_reset_gpu(kctx->kbdev);
2243
+
2244
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_PROTM_EXIT,
2245
+ scheduler->active_protm_grp, 0u);
2246
+ scheduler->active_protm_grp = NULL;
2247
+ }
2248
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
15262249
15272250 if (scheduler->top_grp == group) {
15282251 /*
....@@ -1548,13 +2271,12 @@
15482271 }
15492272
15502273 kctx->csf.sched.num_runnable_grps--;
1551
- KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_RUNNABLE, group,
2274
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_REMOVE, group,
15522275 kctx->csf.sched.num_runnable_grps);
15532276 new_head_grp = (!list_empty(list)) ?
15542277 list_first_entry(list, struct kbase_queue_group, link) :
15552278 NULL;
1556
- KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_RUNNABLE, new_head_grp,
1557
- 0u);
2279
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u);
15582280
15592281 if (kctx->csf.sched.num_runnable_grps == 0) {
15602282 struct kbase_context *new_head_kctx;
....@@ -1563,13 +2285,11 @@
15632285 list_del_init(&kctx->csf.link);
15642286 if (scheduler->top_ctx == kctx)
15652287 scheduler->top_ctx = NULL;
1566
- KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_REMOVE_RUNNABLE, kctx,
1567
- 0u);
2288
+ KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_REMOVE, kctx, 0u);
15682289 new_head_kctx = (!list_empty(kctx_list)) ?
15692290 list_first_entry(kctx_list, struct kbase_context, csf.link) :
15702291 NULL;
1571
- KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_HEAD_RUNNABLE,
1572
- new_head_kctx, 0u);
2292
+ KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx, 0u);
15732293 }
15742294
15752295 WARN_ON(scheduler->total_runnable_grps == 0);
....@@ -1579,7 +2299,7 @@
15792299 cancel_tick_timer(kctx->kbdev);
15802300 WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps));
15812301 if (scheduler->state != SCHED_SUSPENDED)
1582
- queue_work(system_wq, &scheduler->gpu_idle_work);
2302
+ enqueue_gpu_idle_work(scheduler);
15832303 }
15842304 KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
15852305 scheduler->num_active_address_spaces |
....@@ -1596,9 +2316,11 @@
15962316
15972317 list_add_tail(&group->link, &kctx->csf.sched.idle_wait_groups);
15982318 kctx->csf.sched.num_idle_wait_grps++;
1599
- KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_INSERT_IDLE_WAIT, group,
2319
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_INSERT, group,
16002320 kctx->csf.sched.num_idle_wait_grps);
16012321 group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC;
2322
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, CSF_GROUP_SUSPENDED_ON_WAIT_SYNC, group,
2323
+ group->run_state);
16022324 dev_dbg(kctx->kbdev->dev,
16032325 "Group-%d suspended on sync_wait, total wait_groups: %u\n",
16042326 group->handle, kctx->csf.sched.num_idle_wait_grps);
....@@ -1617,14 +2339,14 @@
16172339 list_del_init(&group->link);
16182340 WARN_ON(kctx->csf.sched.num_idle_wait_grps == 0);
16192341 kctx->csf.sched.num_idle_wait_grps--;
1620
- KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_IDLE_WAIT, group,
2342
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_REMOVE, group,
16212343 kctx->csf.sched.num_idle_wait_grps);
16222344 new_head_grp = (!list_empty(list)) ?
16232345 list_first_entry(list, struct kbase_queue_group, link) :
16242346 NULL;
1625
- KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_IDLE_WAIT,
1626
- new_head_grp, 0u);
2347
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_HEAD, new_head_grp, 0u);
16272348 group->run_state = KBASE_CSF_GROUP_INACTIVE;
2349
+ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, CSF_GROUP_INACTIVE, group, group->run_state);
16282350 }
16292351
16302352 static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler,
....@@ -1639,7 +2361,7 @@
16392361 insert_group_to_idle_wait(group);
16402362 }
16412363
1642
-static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group)
2364
+static void update_offslot_non_idle_cnt(struct kbase_queue_group *group)
16432365 {
16442366 struct kbase_device *kbdev = group->kctx->kbdev;
16452367 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
....@@ -1649,8 +2371,7 @@
16492371 if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) {
16502372 int new_val =
16512373 atomic_dec_return(&scheduler->non_idle_offslot_grps);
1652
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
1653
- group, new_val);
2374
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val);
16542375 }
16552376 }
16562377
....@@ -1666,8 +2387,7 @@
16662387 if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) {
16672388 int new_val =
16682389 atomic_dec_return(&scheduler->non_idle_offslot_grps);
1669
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
1670
- group, new_val);
2390
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val);
16712391 }
16722392 }
16732393
....@@ -1687,15 +2407,15 @@
16872407 if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) {
16882408 int new_val = atomic_inc_return(
16892409 &scheduler->non_idle_offslot_grps);
1690
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
1691
- group, new_val);
2410
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC,
2411
+ group, new_val);
16922412 }
16932413 } else {
16942414 if (group->run_state != KBASE_CSF_GROUP_SUSPENDED) {
16952415 int new_val = atomic_dec_return(
16962416 &scheduler->non_idle_offslot_grps);
1697
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
1698
- group, new_val);
2417
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC,
2418
+ group, new_val);
16992419 }
17002420 }
17012421 } else {
....@@ -1703,13 +2423,13 @@
17032423 if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) {
17042424 int new_val = atomic_inc_return(
17052425 &scheduler->non_idle_offslot_grps);
1706
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
1707
- group, new_val);
2426
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group,
2427
+ new_val);
17082428 }
17092429 }
17102430 }
17112431
1712
-static bool confirm_cmd_buf_empty(struct kbase_queue *queue)
2432
+static bool confirm_cmd_buf_empty(struct kbase_queue const *queue)
17132433 {
17142434 bool cs_empty;
17152435 bool cs_idle;
....@@ -1721,8 +2441,8 @@
17212441
17222442 u32 glb_version = iface->version;
17232443
1724
- u64 *input_addr = (u64 *)queue->user_io_addr;
1725
- u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE);
2444
+ u64 const *input_addr = (u64 const *)queue->user_io_addr;
2445
+ u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
17262446
17272447 if (glb_version >= kbase_csf_interface_version(1, 0, 0)) {
17282448 /* CS_STATUS_SCOREBOARD supported from CSF 1.0 */
....@@ -1767,6 +2487,10 @@
17672487 bool sync_wait = false;
17682488 bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
17692489 CSG_STATUS_STATE_IDLE_MASK;
2490
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
2491
+ for (i = 0; i < max_streams; i++)
2492
+ update_hw_active(group->bound_queues[i], false);
2493
+#endif /* CONFIG_MALI_BIFROST_NO_MALI */
17702494 for (i = 0; idle && i < max_streams; i++) {
17712495 struct kbase_queue *const queue =
17722496 group->bound_queues[i];
....@@ -1774,9 +2498,14 @@
17742498 if (!queue || !queue->enabled)
17752499 continue;
17762500
1777
- if (save_slot_cs(ginfo, queue))
1778
- sync_wait = true;
1779
- else {
2501
+ if (save_slot_cs(ginfo, queue)) {
2502
+ /* sync_wait is only true if the queue is blocked on
2503
+ * a CQS and not a scoreboard.
2504
+ */
2505
+ if (queue->blocked_reason !=
2506
+ CS_STATUS_BLOCKED_ON_SB_WAIT)
2507
+ sync_wait = true;
2508
+ } else {
17802509 /* Need to confirm if ringbuffer of the GPU
17812510 * queue is empty or not. A race can arise
17822511 * between the flush of GPU queue and suspend
....@@ -1801,14 +2530,19 @@
18012530 else {
18022531 group->run_state =
18032532 KBASE_CSF_GROUP_SUSPENDED_ON_IDLE;
2533
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED_ON_IDLE, group,
2534
+ group->run_state);
18042535 dev_dbg(kbdev->dev, "Group-%d suspended: idle",
18052536 group->handle);
18062537 }
18072538 } else {
18082539 group->run_state = KBASE_CSF_GROUP_SUSPENDED;
2540
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED, group,
2541
+ group->run_state);
18092542 }
18102543
18112544 update_offslot_non_idle_cnt_on_grp_suspend(group);
2545
+ kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(group);
18122546 }
18132547 }
18142548
....@@ -1885,6 +2619,11 @@
18852619 KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev,
18862620 kbdev->gpu_props.props.raw_props.gpu_id, slot);
18872621
2622
+ /* Notify the group is off-slot and the csg_reg might be available for
2623
+ * resue with other groups in a 'lazy unbinding' style.
2624
+ */
2625
+ kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
2626
+
18882627 return as_fault;
18892628 }
18902629
....@@ -1931,6 +2670,7 @@
19312670 csg_req ^= CSG_REQ_EP_CFG_MASK;
19322671 kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
19332672 CSG_REQ_EP_CFG_MASK);
2673
+ kbase_csf_ring_csg_doorbell(kbdev, slot);
19342674 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
19352675
19362676 csg_slot->priority = prio;
....@@ -1939,9 +2679,8 @@
19392679 group->handle, group->kctx->tgid, group->kctx->id, slot,
19402680 prev_prio, prio);
19412681
1942
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PRIO_UPDATE, group, prev_prio);
2682
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_PRIO_UPDATE, group, prev_prio);
19432683
1944
- kbase_csf_ring_csg_doorbell(kbdev, slot);
19452684 set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update);
19462685 }
19472686
....@@ -1968,8 +2707,8 @@
19682707 u32 state;
19692708 int i;
19702709 unsigned long flags;
1971
- const u64 normal_suspend_buf =
1972
- group->normal_suspend_buf.reg->start_pfn << PAGE_SHIFT;
2710
+ u64 normal_suspend_buf;
2711
+ u64 protm_suspend_buf;
19732712 struct kbase_csf_csg_slot *csg_slot =
19742713 &kbdev->csf.scheduler.csg_slots[slot];
19752714
....@@ -1981,6 +2720,19 @@
19812720
19822721 WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY);
19832722
2723
+ if (unlikely(kbase_csf_mcu_shared_group_bind_csg_reg(kbdev, group))) {
2724
+ dev_warn(kbdev->dev,
2725
+ "Couldn't bind MCU shared csg_reg for group %d of context %d_%d, slot=%u",
2726
+ group->handle, group->kctx->tgid, kctx->id, slot);
2727
+ kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
2728
+ return;
2729
+ }
2730
+
2731
+ /* The suspend buf has already been mapped through binding to csg_reg */
2732
+ normal_suspend_buf = group->normal_suspend_buf.gpu_va;
2733
+ protm_suspend_buf = group->protected_suspend_buf.gpu_va;
2734
+ WARN_ONCE(!normal_suspend_buf, "Normal suspend buffer not mapped");
2735
+
19842736 ginfo = &global_iface->groups[slot];
19852737
19862738 /* Pick an available address space for this context */
....@@ -1991,8 +2743,9 @@
19912743 mutex_unlock(&kbdev->mmu_hw_mutex);
19922744
19932745 if (kctx->as_nr == KBASEP_AS_NR_INVALID) {
1994
- dev_warn(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n",
2746
+ dev_dbg(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n",
19952747 group->handle, kctx->tgid, kctx->id, slot);
2748
+ kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
19962749 return;
19972750 }
19982751
....@@ -2025,6 +2778,9 @@
20252778 kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER,
20262779 tiler_mask & U32_MAX);
20272780
2781
+ /* Register group UID with firmware */
2782
+ kbase_csf_firmware_csg_input(ginfo, CSG_ITER_TRACE_CONFIG,
2783
+ group->group_uid);
20282784
20292785 ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max);
20302786 ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max);
....@@ -2040,14 +2796,21 @@
20402796 kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI,
20412797 normal_suspend_buf >> 32);
20422798
2043
- if (group->protected_suspend_buf.reg) {
2044
- const u64 protm_suspend_buf =
2045
- group->protected_suspend_buf.reg->start_pfn <<
2046
- PAGE_SHIFT;
2047
- kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO,
2048
- protm_suspend_buf & U32_MAX);
2049
- kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI,
2050
- protm_suspend_buf >> 32);
2799
+ /* Note, we program the P-mode buffer pointer here, but actual runtime
2800
+ * enter into pmode execution is controlled by the P-mode phy pages are
2801
+ * allocated and mapped with the bound csg_reg, which has a specific flag
2802
+ * for indicating this P-mode runnable condition before a group is
2803
+ * granted its p-mode section entry. Without a P-mode entry, the buffer
2804
+ * pointed is not going to be accessed at all.
2805
+ */
2806
+ kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, protm_suspend_buf & U32_MAX);
2807
+ kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, protm_suspend_buf >> 32);
2808
+
2809
+ if (group->dvs_buf) {
2810
+ kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_LO,
2811
+ group->dvs_buf & U32_MAX);
2812
+ kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_HI,
2813
+ group->dvs_buf >> 32);
20512814 }
20522815
20532816 /* Enable all interrupts for now */
....@@ -2069,6 +2832,7 @@
20692832
20702833 kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ,
20712834 state, CSG_REQ_STATE_MASK);
2835
+ kbase_csf_ring_csg_doorbell(kbdev, slot);
20722836 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
20732837
20742838 /* Update status before rings the door-bell, marking ready => run */
....@@ -2077,21 +2841,25 @@
20772841 csg_slot->priority = prio;
20782842
20792843 /* Trace the programming of the CSG on the slot */
2080
- KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(kbdev,
2081
- kbdev->gpu_props.props.raw_props.gpu_id, group->handle, slot);
2844
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(
2845
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, group->kctx->id,
2846
+ group->handle, slot, (state == CSG_REQ_STATE_RESUME) ? 1 : 0);
20822847
20832848 dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n",
20842849 group->handle, kctx->tgid, kctx->id, slot, prio);
20852850
2086
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START, group,
2087
- (((u64)ep_cfg) << 32) |
2088
- ((((u32)kctx->as_nr) & 0xF) << 16) |
2089
- (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT)));
2851
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START_REQ, group,
2852
+ (((u64)ep_cfg) << 32) | ((((u32)kctx->as_nr) & 0xF) << 16) |
2853
+ (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT)));
20902854
2091
- kbase_csf_ring_csg_doorbell(kbdev, slot);
2855
+ /* Update the heap reclaim manager */
2856
+ kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(group);
20922857
20932858 /* Programming a slot consumes a group from scanout */
20942859 update_offslot_non_idle_cnt_for_onslot_grp(group);
2860
+
2861
+ /* Notify the group's bound csg_reg is now in active use */
2862
+ kbase_csf_mcu_shared_set_group_csg_reg_active(kbdev, group);
20952863 }
20962864
20972865 static void remove_scheduled_group(struct kbase_device *kbdev,
....@@ -2112,7 +2880,7 @@
21122880 }
21132881
21142882 static void sched_evict_group(struct kbase_queue_group *group, bool fault,
2115
- bool update_non_idle_offslot_grps_cnt)
2883
+ bool update_non_idle_offslot_grps_cnt_from_run_state)
21162884 {
21172885 struct kbase_context *kctx = group->kctx;
21182886 struct kbase_device *kbdev = kctx->kbdev;
....@@ -2123,13 +2891,13 @@
21232891 if (queue_group_scheduled_locked(group)) {
21242892 u32 i;
21252893
2126
- if (update_non_idle_offslot_grps_cnt &&
2894
+ if (update_non_idle_offslot_grps_cnt_from_run_state &&
21272895 (group->run_state == KBASE_CSF_GROUP_SUSPENDED ||
21282896 group->run_state == KBASE_CSF_GROUP_RUNNABLE)) {
21292897 int new_val = atomic_dec_return(
21302898 &scheduler->non_idle_offslot_grps);
2131
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
2132
- group, new_val);
2899
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group,
2900
+ new_val);
21332901 }
21342902
21352903 for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
....@@ -2138,8 +2906,11 @@
21382906 }
21392907
21402908 if (group->prepared_seq_num !=
2141
- KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID)
2909
+ KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) {
2910
+ if (!update_non_idle_offslot_grps_cnt_from_run_state)
2911
+ update_offslot_non_idle_cnt(group);
21422912 remove_scheduled_group(kbdev, group);
2913
+ }
21432914
21442915 if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
21452916 remove_group_from_idle_wait(group);
....@@ -2150,17 +2921,25 @@
21502921
21512922 WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
21522923
2153
- if (fault)
2924
+ if (fault) {
21542925 group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED;
2926
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_FAULT_EVICTED, group,
2927
+ scheduler->total_runnable_grps);
2928
+ }
21552929
2156
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT_SCHED, group,
2157
- (((u64)scheduler->total_runnable_grps) << 32) |
2158
- ((u32)group->run_state));
2930
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT, group,
2931
+ (((u64)scheduler->total_runnable_grps) << 32) |
2932
+ ((u32)group->run_state));
21592933 dev_dbg(kbdev->dev, "group %d exited scheduler, num_runnable_grps %d\n",
21602934 group->handle, scheduler->total_runnable_grps);
21612935 /* Notify a group has been evicted */
21622936 wake_up_all(&kbdev->csf.event_wait);
21632937 }
2938
+
2939
+ kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(group);
2940
+
2941
+ /* Clear all the bound shared regions and unmap any in-place MMU maps */
2942
+ kbase_csf_mcu_shared_clear_evicted_group_csg_reg(kbdev, group);
21642943 }
21652944
21662945 static int term_group_sync(struct kbase_queue_group *group)
....@@ -2172,14 +2951,23 @@
21722951 term_csg_slot(group);
21732952
21742953 remaining = wait_event_timeout(kbdev->csf.event_wait,
2175
- csg_slot_stopped_locked(kbdev, group->csg_nr), remaining);
2954
+ group->cs_unrecoverable || csg_slot_stopped_locked(kbdev, group->csg_nr),
2955
+ remaining);
21762956
2177
- if (!remaining) {
2178
- dev_warn(kbdev->dev, "term request timed out for group %d of context %d_%d on slot %d",
2957
+ if (unlikely(!remaining)) {
2958
+ enum dumpfault_error_type error_type = DF_CSG_TERMINATE_TIMEOUT;
2959
+
2960
+ dev_warn(kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d",
2961
+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
21792962 group->handle, group->kctx->tgid,
21802963 group->kctx->id, group->csg_nr);
2964
+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
2965
+ error_type = DF_PING_REQUEST_TIMEOUT;
2966
+ kbase_debug_csf_fault_notify(kbdev, group->kctx, error_type);
21812967 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
21822968 kbase_reset_gpu(kbdev);
2969
+
2970
+
21832971 err = -ETIMEDOUT;
21842972 }
21852973
....@@ -2190,46 +2978,65 @@
21902978 {
21912979 struct kbase_device *kbdev = group->kctx->kbdev;
21922980 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2193
- long remaining =
2194
- kbase_csf_timeout_in_jiffies(CSG_SCHED_STOP_TIMEOUT_MS);
2195
- bool force = false;
2981
+ bool wait_for_termination = true;
2982
+ bool on_slot;
21962983
21972984 kbase_reset_gpu_assert_failed_or_prevented(kbdev);
21982985 lockdep_assert_held(&group->kctx->csf.lock);
21992986 mutex_lock(&scheduler->lock);
22002987
22012988 KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_DESCHEDULE, group, group->run_state);
2202
- while (queue_group_scheduled_locked(group)) {
2203
- u32 saved_state = scheduler->state;
2989
+ wait_for_dump_complete_on_group_deschedule(group);
2990
+ if (!queue_group_scheduled_locked(group))
2991
+ goto unlock;
22042992
2205
- if (!kbasep_csf_scheduler_group_is_on_slot_locked(group)) {
2206
- sched_evict_group(group, false, true);
2207
- } else if (saved_state == SCHED_INACTIVE || force) {
2208
- bool as_faulty;
2993
+ on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
22092994
2210
- term_group_sync(group);
2211
- /* Treat the csg been terminated */
2212
- as_faulty = cleanup_csg_slot(group);
2213
- /* remove from the scheduler list */
2214
- sched_evict_group(group, as_faulty, false);
2215
- }
2995
+#ifdef KBASE_PM_RUNTIME
2996
+ /* If the queue group is on slot and Scheduler is in SLEEPING state,
2997
+ * then we need to wake up the Scheduler to exit the sleep state rather
2998
+ * than waiting for the runtime suspend or power down of GPU.
2999
+ * The group termination is usually triggered in the context of Application
3000
+ * thread and it has been seen that certain Apps can destroy groups at
3001
+ * random points and not necessarily when the App is exiting.
3002
+ */
3003
+ if (on_slot && (scheduler->state == SCHED_SLEEPING)) {
3004
+ scheduler_wakeup(kbdev, true);
22163005
2217
- /* waiting scheduler state to change */
2218
- if (queue_group_scheduled_locked(group)) {
2219
- mutex_unlock(&scheduler->lock);
2220
- remaining = wait_event_timeout(
2221
- kbdev->csf.event_wait,
2222
- saved_state != scheduler->state,
2223
- remaining);
2224
- if (!remaining) {
2225
- dev_warn(kbdev->dev, "Scheduler state change wait timed out for group %d on slot %d",
2226
- group->handle, group->csg_nr);
2227
- force = true;
2228
- }
2229
- mutex_lock(&scheduler->lock);
3006
+ /* Wait for MCU firmware to start running */
3007
+ if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
3008
+ dev_warn(
3009
+ kbdev->dev,
3010
+ "[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d",
3011
+ kbase_backend_get_cycle_cnt(kbdev),
3012
+ group->handle, group->kctx->tgid,
3013
+ group->kctx->id, group->csg_nr);
3014
+ /* No point in waiting for CSG termination if MCU didn't
3015
+ * become active.
3016
+ */
3017
+ wait_for_termination = false;
22303018 }
22313019 }
3020
+#endif
3021
+ if (!on_slot) {
3022
+ sched_evict_group(group, false, true);
3023
+ } else {
3024
+ bool as_faulty;
22323025
3026
+ if (likely(wait_for_termination))
3027
+ term_group_sync(group);
3028
+ else
3029
+ term_csg_slot(group);
3030
+
3031
+ /* Treat the csg been terminated */
3032
+ as_faulty = cleanup_csg_slot(group);
3033
+ /* remove from the scheduler list */
3034
+ sched_evict_group(group, as_faulty, false);
3035
+ }
3036
+
3037
+ WARN_ON(queue_group_scheduled_locked(group));
3038
+
3039
+unlock:
22333040 mutex_unlock(&scheduler->lock);
22343041 }
22353042
....@@ -2269,6 +3076,8 @@
22693076 group));
22703077
22713078 group->run_state = KBASE_CSF_GROUP_RUNNABLE;
3079
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
3080
+ group->run_state);
22723081
22733082 /* A normal mode CSG could be idle onslot during
22743083 * protected mode. In this case clear the
....@@ -2279,6 +3088,8 @@
22793088 if (protm_grp && protm_grp != group) {
22803089 clear_bit((unsigned int)group->csg_nr,
22813090 scheduler->csg_slots_idle_mask);
3091
+ /* Request the update to confirm the condition inferred. */
3092
+ group->reevaluate_idle_status = true;
22823093 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
22833094 scheduler->csg_slots_idle_mask[0]);
22843095 }
....@@ -2299,13 +3110,13 @@
22993110 }
23003111 } else if (!queue_group_scheduled_locked(group)) {
23013112 int new_val;
3113
+
23023114 insert_group_to_runnable(&kbdev->csf.scheduler, group,
23033115 KBASE_CSF_GROUP_RUNNABLE);
23043116 /* A new group into the scheduler */
23053117 new_val = atomic_inc_return(
23063118 &kbdev->csf.scheduler.non_idle_offslot_grps);
2307
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
2308
- group, new_val);
3119
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val);
23093120 }
23103121
23113122 /* Since a group has become active now, check if GPU needs to be
....@@ -2508,8 +3319,7 @@
25083319 scheduler->remaining_tick_slots--;
25093320 }
25103321 } else {
2511
- update_offslot_non_idle_cnt_for_faulty_grp(
2512
- group);
3322
+ update_offslot_non_idle_cnt(group);
25133323 remove_scheduled_group(kbdev, group);
25143324 }
25153325 }
....@@ -2621,18 +3431,21 @@
26213431 csg_slot_stopped_raw),
26223432 remaining);
26233433
2624
- if (remaining) {
3434
+ if (likely(remaining)) {
26253435 u32 i;
26263436
26273437 for_each_set_bit(i, changed, num_groups) {
26283438 struct kbase_queue_group *group =
26293439 scheduler->csg_slots[i].resident_group;
26303440
2631
- if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) {
3441
+ if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i)))
26323442 continue;
2633
- }
3443
+
26343444 /* The on slot csg is now stopped */
26353445 clear_bit(i, slot_mask);
3446
+
3447
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
3448
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
26363449
26373450 if (likely(group)) {
26383451 bool as_fault;
....@@ -2661,6 +3474,7 @@
26613474 for_each_set_bit(i, slot_mask, num_groups) {
26623475 struct kbase_queue_group *const group =
26633476 scheduler->csg_slots[i].resident_group;
3477
+ enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
26643478
26653479 struct base_gpu_queue_group_error const
26663480 err_payload = { .error_type =
....@@ -2674,27 +3488,30 @@
26743488 if (unlikely(group == NULL))
26753489 continue;
26763490
2677
- kbase_csf_add_group_fatal_error(group,
2678
- &err_payload);
2679
- kbase_event_wakeup(group->kctx);
2680
-
26813491 /* TODO GPUCORE-25328: The CSG can't be
26823492 * terminated, the GPU will be reset as a
26833493 * work-around.
26843494 */
26853495 dev_warn(
26863496 kbdev->dev,
2687
- "Group %d of context %d_%d on slot %u failed to suspend",
3497
+ "[%llu] Group %d of context %d_%d on slot %u failed to suspend (timeout %d ms)",
3498
+ kbase_backend_get_cycle_cnt(kbdev),
26883499 group->handle, group->kctx->tgid,
2689
- group->kctx->id, i);
3500
+ group->kctx->id, i,
3501
+ kbdev->csf.fw_timeout_ms);
3502
+ if (kbase_csf_firmware_ping_wait(kbdev,
3503
+ FW_PING_AFTER_ERROR_TIMEOUT_MS))
3504
+ error_type = DF_PING_REQUEST_TIMEOUT;
3505
+ schedule_actions_trigger_df(kbdev, group->kctx, error_type);
3506
+
3507
+ kbase_csf_add_group_fatal_error(group, &err_payload);
3508
+ kbase_event_wakeup(group->kctx);
26903509
26913510 /* The group has failed suspension, stop
26923511 * further examination.
26933512 */
26943513 clear_bit(i, slot_mask);
26953514 set_bit(i, scheduler->csgs_events_enable_mask);
2696
- update_offslot_non_idle_cnt_for_onslot_grp(
2697
- group);
26983515 }
26993516
27003517 suspend_wait_failed = true;
....@@ -2774,7 +3591,7 @@
27743591 slots_state_changed(kbdev, changed, csg_slot_running),
27753592 remaining);
27763593
2777
- if (remaining) {
3594
+ if (likely(remaining)) {
27783595 for_each_set_bit(i, changed, num_groups) {
27793596 struct kbase_queue_group *group =
27803597 scheduler->csg_slots[i].resident_group;
....@@ -2782,10 +3599,22 @@
27823599 /* The on slot csg is now running */
27833600 clear_bit(i, slot_mask);
27843601 group->run_state = KBASE_CSF_GROUP_RUNNABLE;
3602
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
3603
+ group->run_state);
27853604 }
27863605 } else {
2787
- dev_warn(kbdev->dev, "Timed out waiting for CSG slots to start, slots: 0x%*pb\n",
2788
- num_groups, slot_mask);
3606
+ const int csg_nr = ffs(slot_mask[0]) - 1;
3607
+ struct kbase_queue_group *group =
3608
+ scheduler->csg_slots[csg_nr].resident_group;
3609
+ enum dumpfault_error_type error_type = DF_CSG_START_TIMEOUT;
3610
+
3611
+ dev_err(kbdev->dev,
3612
+ "[%llu] Timeout (%d ms) waiting for CSG slots to start, slots: 0x%*pb\n",
3613
+ kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
3614
+ num_groups, slot_mask);
3615
+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
3616
+ error_type = DF_PING_REQUEST_TIMEOUT;
3617
+ schedule_actions_trigger_df(kbdev, group->kctx, error_type);
27893618
27903619 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
27913620 kbase_reset_gpu(kbdev);
....@@ -2799,14 +3628,14 @@
27993628 * flagged after the completion of a CSG status
28003629 * update command
28013630 *
3631
+ * @kbdev: Pointer to the GPU device.
3632
+ * @slot: The given slot for checking an occupying resident group's idle
3633
+ * state.
3634
+ *
28023635 * This function is called at the start of scheduling tick to check the
28033636 * idle status of a queue group resident on a CSG slot.
28043637 * The caller must make sure the corresponding status update command has
28053638 * been called and completed before checking this status.
2806
- *
2807
- * @kbdev: Pointer to the GPU device.
2808
- * @slot: The given slot for checking an occupying resident group's idle
2809
- * state.
28103639 *
28113640 * Return: true if the group resident on slot is idle, otherwise false.
28123641 */
....@@ -2827,15 +3656,15 @@
28273656 * slots_update_state_changed() - Check the handshake state of a subset of
28283657 * command group slots.
28293658 *
2830
- * Checks the state of a subset of slots selected through the slots_mask
2831
- * bit_map. Records which slots' handshake completed and send it back in the
2832
- * slots_done bit_map.
2833
- *
28343659 * @kbdev: The GPU device.
28353660 * @field_mask: The field mask for checking the state in the csg_req/ack.
28363661 * @slots_mask: A bit_map specifying the slots to check.
28373662 * @slots_done: A cleared bit_map for returning the slots that
28383663 * have finished update.
3664
+ *
3665
+ * Checks the state of a subset of slots selected through the slots_mask
3666
+ * bit_map. Records which slots' handshake completed and send it back in the
3667
+ * slots_done bit_map.
28393668 *
28403669 * Return: true if the slots_done is set for at least one slot.
28413670 * Otherwise false.
....@@ -2870,16 +3699,16 @@
28703699 * wait_csg_slots_handshake_ack - Wait the req/ack handshakes to complete on
28713700 * the specified groups.
28723701 *
2873
- * This function waits for the acknowledgement of the request that have
2874
- * already been placed for the CSG slots by the caller. Currently used for
2875
- * the CSG priority update and status update requests.
2876
- *
28773702 * @kbdev: Pointer to the GPU device.
28783703 * @field_mask: The field mask for checking the state in the csg_req/ack.
28793704 * @slot_mask: Bitmap reflecting the slots, the function will modify
28803705 * the acknowledged slots by clearing their corresponding
28813706 * bits.
28823707 * @wait_in_jiffies: Wait duration in jiffies, controlling the time-out.
3708
+ *
3709
+ * This function waits for the acknowledgment of the request that have
3710
+ * already been placed for the CSG slots by the caller. Currently used for
3711
+ * the CSG priority update and status update requests.
28833712 *
28843713 * Return: 0 on all specified slots acknowledged; otherwise -ETIMEDOUT. For
28853714 * timed out condition with unacknowledged slots, their bits remain
....@@ -2902,11 +3731,13 @@
29023731 slot_mask, dones),
29033732 remaining);
29043733
2905
- if (remaining)
3734
+ if (likely(remaining))
29063735 bitmap_andnot(slot_mask, slot_mask, dones, num_groups);
2907
- else
3736
+ else {
3737
+
29083738 /* Timed-out on the wait */
29093739 return -ETIMEDOUT;
3740
+ }
29103741 }
29113742
29123743 return 0;
....@@ -2922,16 +3753,37 @@
29223753
29233754 lockdep_assert_held(&kbdev->csf.scheduler.lock);
29243755
2925
- if (ret != 0) {
2926
- /* The update timeout is not regarded as a serious
2927
- * issue, no major consequences are expected as a
2928
- * result, so just warn the case.
2929
- */
3756
+ if (unlikely(ret != 0)) {
3757
+ const int csg_nr = ffs(slot_mask[0]) - 1;
3758
+ struct kbase_queue_group *group =
3759
+ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
3760
+ enum dumpfault_error_type error_type = DF_CSG_EP_CFG_TIMEOUT;
3761
+
29303762 dev_warn(
29313763 kbdev->dev,
2932
- "Timeout on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx",
3764
+ "[%llu] Timeout (%d ms) on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx",
3765
+ kbase_backend_get_cycle_cnt(kbdev),
3766
+ kbdev->csf.fw_timeout_ms,
29333767 slot_mask[0]);
3768
+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
3769
+ error_type = DF_PING_REQUEST_TIMEOUT;
3770
+ schedule_actions_trigger_df(kbdev, group->kctx, error_type);
3771
+
3772
+ /* Timeout could indicate firmware is unresponsive so trigger a GPU reset. */
3773
+ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
3774
+ kbase_reset_gpu(kbdev);
29343775 }
3776
+}
3777
+
3778
+static void report_csg_termination(struct kbase_queue_group *const group)
3779
+{
3780
+ struct base_gpu_queue_group_error
3781
+ err = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
3782
+ .payload = { .fatal_group = {
3783
+ .status = GPU_EXCEPTION_TYPE_SW_FAULT_2,
3784
+ } } };
3785
+
3786
+ kbase_csf_add_group_fatal_error(group, &err);
29353787 }
29363788
29373789 void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev,
....@@ -2951,16 +3803,21 @@
29513803 */
29523804 WARN_ON(!kbase_reset_gpu_is_active(kbdev));
29533805
2954
- KBASE_KTRACE_ADD(kbdev, EVICT_CTX_SLOTS, kctx, 0u);
3806
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_EVICT_CTX_SLOTS_START, kctx, 0u);
29553807 for (slot = 0; slot < num_groups; slot++) {
29563808 group = kbdev->csf.scheduler.csg_slots[slot].resident_group;
29573809 if (group && group->kctx == kctx) {
29583810 bool as_fault;
29593811
3812
+ dev_dbg(kbdev->dev, "Evicting group [%d] running on slot [%d] due to reset",
3813
+ group->handle, group->csg_nr);
3814
+
29603815 term_csg_slot(group);
29613816 as_fault = cleanup_csg_slot(group);
29623817 /* remove the group from the scheduler list */
29633818 sched_evict_group(group, as_fault, false);
3819
+ /* signal Userspace that CSG is being terminated */
3820
+ report_csg_termination(group);
29643821 /* return the evicted group to the caller */
29653822 list_add_tail(&group->link, evicted_groups);
29663823 set_bit(slot, slot_mask);
....@@ -2970,20 +3827,30 @@
29703827 dev_info(kbdev->dev, "Evicting context %d_%d slots: 0x%*pb\n",
29713828 kctx->tgid, kctx->id, num_groups, slot_mask);
29723829
3830
+ /* Fatal errors may have been the cause of the GPU reset
3831
+ * taking place, in which case we want to make sure that
3832
+ * we wake up the fatal event queue to notify userspace
3833
+ * only once. Otherwise, we may have duplicate event
3834
+ * notifications between the time the first notification
3835
+ * occurs and the time the GPU is reset.
3836
+ */
3837
+ kbase_event_wakeup(kctx);
3838
+
29733839 mutex_unlock(&scheduler->lock);
3840
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_EVICT_CTX_SLOTS_END, kctx, num_groups);
29743841 }
29753842
29763843 /**
29773844 * scheduler_slot_protm_ack - Acknowledging the protected region requests
29783845 * from the resident group on a given slot.
29793846 *
2980
- * The function assumes that the given slot is in stable running state and
2981
- * has already been judged by the caller on that any pending protected region
2982
- * requests of the resident group should be acknowledged.
2983
- *
29843847 * @kbdev: Pointer to the GPU device.
29853848 * @group: Pointer to the resident group on the given slot.
29863849 * @slot: The slot that the given group is actively operating on.
3850
+ *
3851
+ * The function assumes that the given slot is in stable running state and
3852
+ * has already been judged by the caller on that any pending protected region
3853
+ * requests of the resident group should be acknowledged.
29873854 *
29883855 * Return: true if the group has pending protm request(s) and is acknowledged.
29893856 * The caller should arrange to enter the protected mode for servicing
....@@ -3014,8 +3881,8 @@
30143881 struct kbase_queue *queue = group->bound_queues[i];
30153882
30163883 clear_bit(i, group->protm_pending_bitmap);
3017
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_CLEAR, group,
3018
- queue, group->protm_pending_bitmap[0]);
3884
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_CLEAR, group, queue,
3885
+ group->protm_pending_bitmap[0]);
30193886
30203887 if (!WARN_ON(!queue) && queue->enabled) {
30213888 struct kbase_csf_cmd_stream_info *stream =
....@@ -3051,8 +3918,47 @@
30513918 }
30523919
30533920 /**
3921
+ * protm_enter_set_next_pending_seq - Update the scheduler's field of
3922
+ * tick_protm_pending_seq to that from the next available on-slot protm
3923
+ * pending CSG.
3924
+ *
3925
+ * @kbdev: Pointer to the GPU device.
3926
+ *
3927
+ * If applicable, the function updates the scheduler's tick_protm_pending_seq
3928
+ * field from the next available on-slot protm pending CSG. If not, the field
3929
+ * is set to KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID.
3930
+ */
3931
+static void protm_enter_set_next_pending_seq(struct kbase_device *const kbdev)
3932
+{
3933
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3934
+ u32 num_groups = kbdev->csf.global_iface.group_num;
3935
+ u32 num_csis = kbdev->csf.global_iface.groups[0].stream_num;
3936
+ DECLARE_BITMAP(active_csgs, MAX_SUPPORTED_CSGS) = { 0 };
3937
+ u32 i;
3938
+
3939
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
3940
+
3941
+ bitmap_xor(active_csgs, scheduler->csg_slots_idle_mask, scheduler->csg_inuse_bitmap,
3942
+ num_groups);
3943
+ /* Reset the tick's pending protm seq number to invalid initially */
3944
+ scheduler->tick_protm_pending_seq = KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
3945
+ for_each_set_bit(i, active_csgs, num_groups) {
3946
+ struct kbase_queue_group *group = scheduler->csg_slots[i].resident_group;
3947
+
3948
+ /* Set to the next pending protm group's scan_seq_number */
3949
+ if ((group != scheduler->active_protm_grp) &&
3950
+ (!bitmap_empty(group->protm_pending_bitmap, num_csis)) &&
3951
+ (group->scan_seq_num < scheduler->tick_protm_pending_seq))
3952
+ scheduler->tick_protm_pending_seq = group->scan_seq_num;
3953
+ }
3954
+}
3955
+
3956
+/**
30543957 * scheduler_group_check_protm_enter - Request the given group to be evaluated
30553958 * for triggering the protected mode.
3959
+ *
3960
+ * @kbdev: Pointer to the GPU device.
3961
+ * @input_grp: Pointer to the GPU queue group.
30563962 *
30573963 * The function assumes the given group is either an active running group or
30583964 * the scheduler internally maintained field scheduler->top_grp.
....@@ -3060,24 +3966,35 @@
30603966 * If the GPU is not already running in protected mode and the input group
30613967 * has protected region requests from its bound queues, the requests are
30623968 * acknowledged and the GPU is instructed to enter the protected mode.
3063
- *
3064
- * @kbdev: Pointer to the GPU device.
3065
- * @input_grp: Pointer to the GPU queue group.
30663969 */
30673970 static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
30683971 struct kbase_queue_group *const input_grp)
30693972 {
30703973 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3974
+ struct kbase_protected_suspend_buffer *sbuf = &input_grp->protected_suspend_buf;
30713975 unsigned long flags;
30723976 bool protm_in_use;
30733977
30743978 lockdep_assert_held(&scheduler->lock);
30753979
3980
+ /* Return early if the physical pages have not been allocated yet */
3981
+ if (unlikely(!sbuf->pma))
3982
+ return;
3983
+
3984
+ /* This lock is taken to prevent the issuing of MMU command during the
3985
+ * transition to protected mode. This helps avoid the scenario where the
3986
+ * entry to protected mode happens with a memory region being locked and
3987
+ * the same region is then accessed by the GPU in protected mode.
3988
+ */
3989
+ mutex_lock(&kbdev->mmu_hw_mutex);
30763990 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
30773991
3078
- protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev);
3079
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_CHECK_PROTM_ENTER, input_grp,
3080
- protm_in_use);
3992
+ /* Check if the previous transition to enter & exit the protected
3993
+ * mode has completed or not.
3994
+ */
3995
+ protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev) ||
3996
+ kbdev->protected_mode;
3997
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER_CHECK, input_grp, protm_in_use);
30813998
30823999 /* Firmware samples the PROTM_PEND ACK bit for CSs when
30834000 * Host sends PROTM_ENTER global request. So if PROTM_PEND ACK bit
....@@ -3108,6 +4025,8 @@
31084025 CSG_SLOT_RUNNING) {
31094026 if (kctx_as_enabled(input_grp->kctx) &&
31104027 scheduler_slot_protm_ack(kbdev, input_grp, slot)) {
4028
+ int err;
4029
+
31114030 /* Option of acknowledging to multiple
31124031 * CSGs from the same kctx is dropped,
31134032 * after consulting with the
....@@ -3115,22 +4034,77 @@
31154034 * GPUCORE-21394.
31164035 */
31174036
3118
- /* Disable the idle timer */
3119
- disable_gpu_idle_fw_timer_locked(kbdev);
3120
-
31214037 /* Switch to protected mode */
31224038 scheduler->active_protm_grp = input_grp;
3123
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_ENTER_PROTM,
3124
- input_grp, 0u);
4039
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER, input_grp,
4040
+ 0u);
4041
+
4042
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
4043
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4044
+
4045
+ /* Coresight must be disabled before entering protected mode. */
4046
+ kbase_debug_coresight_csf_disable_pmode_enter(kbdev);
4047
+
4048
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4049
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
4050
+
4051
+ kbase_csf_enter_protected_mode(kbdev);
4052
+ /* Set the pending protm seq number to the next one */
4053
+ protm_enter_set_next_pending_seq(kbdev);
31254054
31264055 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
3127
- kbase_csf_enter_protected_mode(kbdev);
4056
+
4057
+ err = kbase_csf_wait_protected_mode_enter(kbdev);
4058
+ mutex_unlock(&kbdev->mmu_hw_mutex);
4059
+
4060
+ if (err)
4061
+ schedule_actions_trigger_df(kbdev, input_grp->kctx,
4062
+ DF_PROTECTED_MODE_ENTRY_FAILURE);
4063
+
4064
+ scheduler->protm_enter_time = ktime_get_raw();
4065
+
31284066 return;
31294067 }
31304068 }
31314069 }
31324070
31334071 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4072
+ mutex_unlock(&kbdev->mmu_hw_mutex);
4073
+}
4074
+
4075
+/**
4076
+ * scheduler_check_pmode_progress - Check if protected mode execution is progressing
4077
+ *
4078
+ * @kbdev: Pointer to the GPU device.
4079
+ *
4080
+ * This function is called when the GPU is in protected mode.
4081
+ *
4082
+ * It will check if the time spent in protected mode is less
4083
+ * than CSF_SCHED_PROTM_PROGRESS_TIMEOUT. If not, a PROTM_EXIT
4084
+ * request is sent to the FW.
4085
+ */
4086
+static void scheduler_check_pmode_progress(struct kbase_device *kbdev)
4087
+{
4088
+ u64 protm_spent_time_ms;
4089
+ u64 protm_progress_timeout =
4090
+ kbase_get_timeout_ms(kbdev, CSF_SCHED_PROTM_PROGRESS_TIMEOUT);
4091
+ s64 diff_ms_signed =
4092
+ ktime_ms_delta(ktime_get_raw(), kbdev->csf.scheduler.protm_enter_time);
4093
+
4094
+ if (diff_ms_signed < 0)
4095
+ return;
4096
+
4097
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
4098
+
4099
+ protm_spent_time_ms = (u64)diff_ms_signed;
4100
+ if (protm_spent_time_ms < protm_progress_timeout)
4101
+ return;
4102
+
4103
+ dev_dbg(kbdev->dev, "Protected mode progress timeout: %llu >= %llu",
4104
+ protm_spent_time_ms, protm_progress_timeout);
4105
+
4106
+ /* Prompt the FW to exit protected mode */
4107
+ scheduler_force_protm_exit(kbdev);
31344108 }
31354109
31364110 static void scheduler_apply(struct kbase_device *kbdev)
....@@ -3160,7 +4134,7 @@
31604134 }
31614135 }
31624136
3163
- /* Initialize the remaining avialable csg slots for the tick/tock */
4137
+ /* Initialize the remaining available csg slots for the tick/tock */
31644138 scheduler->remaining_tick_slots = available_csg_slots;
31654139
31664140 /* If there are spare slots, apply heads in the list */
....@@ -3186,8 +4160,7 @@
31864160
31874161 if (!kctx_as_enabled(group->kctx) || group->faulted) {
31884162 /* Drop the head group and continue */
3189
- update_offslot_non_idle_cnt_for_faulty_grp(
3190
- group);
4163
+ update_offslot_non_idle_cnt(group);
31914164 remove_scheduled_group(kbdev, group);
31924165 continue;
31934166 }
....@@ -3217,6 +4190,7 @@
32174190 struct kbase_queue_group *group;
32184191
32194192 lockdep_assert_held(&scheduler->lock);
4193
+ lockdep_assert_held(&scheduler->interrupt_lock);
32204194 if (WARN_ON(priority < 0) ||
32214195 WARN_ON(priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
32224196 return;
....@@ -3236,9 +4210,18 @@
32364210 /* Set the scanout sequence number, starting from 0 */
32374211 group->scan_seq_num = scheduler->csg_scan_count_for_tick++;
32384212
4213
+ if (scheduler->tick_protm_pending_seq ==
4214
+ KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) {
4215
+ if (!bitmap_empty(group->protm_pending_bitmap,
4216
+ kbdev->csf.global_iface.groups[0].stream_num))
4217
+ scheduler->tick_protm_pending_seq =
4218
+ group->scan_seq_num;
4219
+ }
4220
+
32394221 if (queue_group_idle_locked(group)) {
3240
- list_add_tail(&group->link_to_schedule,
3241
- &scheduler->idle_groups_to_schedule);
4222
+ if (can_schedule_idle_group(group))
4223
+ list_add_tail(&group->link_to_schedule,
4224
+ &scheduler->idle_groups_to_schedule);
32424225 continue;
32434226 }
32444227
....@@ -3261,6 +4244,8 @@
32614244 * scheduler_rotate_groups() - Rotate the runnable queue groups to provide
32624245 * fairness of scheduling within a single
32634246 * kbase_context.
4247
+ *
4248
+ * @kbdev: Pointer to the GPU device.
32644249 *
32654250 * Since only kbase_csf_scheduler's top_grp (i.e. the queue group assigned
32664251 * the highest slot priority) is guaranteed to get the resources that it
....@@ -3300,8 +4285,6 @@
33004285 * the kbase_csf_scheduler's groups_to_schedule list. In this example, it will
33014286 * be for a group in the next lowest priority level or in absence of those the
33024287 * next kbase_context's queue groups.
3303
- *
3304
- * @kbdev: Pointer to the GPU device.
33054288 */
33064289 static void scheduler_rotate_groups(struct kbase_device *kbdev)
33074290 {
....@@ -3317,14 +4300,14 @@
33174300 WARN_ON(top_grp->kctx != top_ctx);
33184301 if (!WARN_ON(list_empty(list))) {
33194302 struct kbase_queue_group *new_head_grp;
4303
+
33204304 list_move_tail(&top_grp->link, list);
33214305 new_head_grp = (!list_empty(list)) ?
33224306 list_first_entry(list, struct kbase_queue_group, link) :
33234307 NULL;
3324
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_ROTATE_RUNNABLE,
3325
- top_grp, top_ctx->csf.sched.num_runnable_grps);
3326
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_HEAD_RUNNABLE,
3327
- new_head_grp, 0u);
4308
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_ROTATE, top_grp,
4309
+ top_ctx->csf.sched.num_runnable_grps);
4310
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u);
33284311 dev_dbg(kbdev->dev,
33294312 "groups rotated for a context, num_runnable_groups: %u\n",
33304313 scheduler->top_ctx->csf.sched.num_runnable_grps);
....@@ -3353,14 +4336,14 @@
33534336
33544337 if (!WARN_ON(!found)) {
33554338 struct kbase_context *new_head_kctx;
4339
+
33564340 list_move_tail(&pos->csf.link, list);
3357
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_ROTATE_RUNNABLE, pos,
3358
- 0u);
4341
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_ROTATE, pos, 0u);
33594342 new_head_kctx = (!list_empty(list)) ?
33604343 list_first_entry(list, struct kbase_context, csf.link) :
33614344 NULL;
3362
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_HEAD_RUNNABLE,
3363
- new_head_kctx, 0u);
4345
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx,
4346
+ 0u);
33644347 dev_dbg(kbdev->dev, "contexts rotated\n");
33654348 }
33664349 }
....@@ -3372,17 +4355,22 @@
33724355 * slots for which the IDLE notification was received
33734356 * previously.
33744357 *
3375
- * This function sends a CSG status update request for all the CSG slots
3376
- * present in the bitmap scheduler->csg_slots_idle_mask and wait for the
3377
- * request to complete.
3378
- * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by
3379
- * this function.
3380
- *
33814358 * @kbdev: Pointer to the GPU device.
33824359 * @csg_bitmap: Bitmap of the CSG slots for which
33834360 * the status update request completed successfully.
3384
- * @failed_csg_bitmap: Bitmap of the CSG slots for which
4361
+ * @failed_csg_bitmap: Bitmap of the idle CSG slots for which
33854362 * the status update request timedout.
4363
+ *
4364
+ * This function sends a CSG status update request for all the CSG slots
4365
+ * present in the bitmap scheduler->csg_slots_idle_mask. Additionally, if
4366
+ * the group's 'reevaluate_idle_status' field is set, the nominally non-idle
4367
+ * slots are also included in the status update for a confirmation of their
4368
+ * status. The function wait for the status update request to complete and
4369
+ * returns the update completed slots bitmap and any timed out idle-flagged
4370
+ * slots bitmap.
4371
+ *
4372
+ * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by
4373
+ * this function.
33864374 */
33874375 static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
33884376 unsigned long *csg_bitmap, unsigned long *failed_csg_bitmap)
....@@ -3392,34 +4380,70 @@
33924380 struct kbase_csf_global_iface *const global_iface =
33934381 &kbdev->csf.global_iface;
33944382 unsigned long flags, i;
4383
+ u32 active_chk = 0;
33954384
33964385 lockdep_assert_held(&scheduler->lock);
33974386
33984387 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
3399
- for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) {
4388
+
4389
+ for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) {
34004390 struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
34014391 struct kbase_queue_group *group = csg_slot->resident_group;
34024392 struct kbase_csf_cmd_stream_group_info *const ginfo =
34034393 &global_iface->groups[i];
34044394 u32 csg_req;
4395
+ bool idle_flag;
34054396
3406
- clear_bit(i, scheduler->csg_slots_idle_mask);
3407
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
3408
- scheduler->csg_slots_idle_mask[0]);
3409
- if (WARN_ON(!group))
4397
+ if (WARN_ON(!group)) {
4398
+ clear_bit(i, scheduler->csg_inuse_bitmap);
4399
+ clear_bit(i, scheduler->csg_slots_idle_mask);
34104400 continue;
4401
+ }
34114402
3412
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STATUS_UPDATE, group,
3413
- i);
4403
+ idle_flag = test_bit(i, scheduler->csg_slots_idle_mask);
4404
+ if (idle_flag || group->reevaluate_idle_status) {
4405
+ if (idle_flag) {
4406
+#ifdef CONFIG_MALI_BIFROST_DEBUG
4407
+ if (!bitmap_empty(group->protm_pending_bitmap,
4408
+ ginfo->stream_num)) {
4409
+ dev_warn(kbdev->dev,
4410
+ "Idle bit set for group %d of ctx %d_%d on slot %d with pending protm execution",
4411
+ group->handle, group->kctx->tgid,
4412
+ group->kctx->id, (int)i);
4413
+ }
4414
+#endif
4415
+ clear_bit(i, scheduler->csg_slots_idle_mask);
4416
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
4417
+ scheduler->csg_slots_idle_mask[0]);
4418
+ } else {
4419
+ /* Updates include slots for which reevaluation is needed.
4420
+ * Here one tracks the extra included slots in active_chk.
4421
+ * For protm pending slots, their status of activeness are
4422
+ * assured so no need to request an update.
4423
+ */
4424
+ active_chk |= BIT(i);
4425
+ group->reevaluate_idle_status = false;
4426
+ }
34144427
3415
- csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
3416
- csg_req ^= CSG_REQ_STATUS_UPDATE_MASK;
3417
- kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
3418
- CSG_REQ_STATUS_UPDATE_MASK);
4428
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_UPDATE_IDLE_SLOT_REQ, group, i);
4429
+ csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
4430
+ csg_req ^= CSG_REQ_STATUS_UPDATE_MASK;
4431
+ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
4432
+ CSG_REQ_STATUS_UPDATE_MASK);
34194433
3420
- set_bit(i, csg_bitmap);
4434
+ /* Track the slot update requests in csg_bitmap.
4435
+ * Note, if the scheduler requested extended update, the resulting
4436
+ * csg_bitmap would be the idle_flags + active_chk. Otherwise it's
4437
+ * identical to the idle_flags.
4438
+ */
4439
+ set_bit(i, csg_bitmap);
4440
+ } else {
4441
+ group->run_state = KBASE_CSF_GROUP_RUNNABLE;
4442
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
4443
+ group->run_state);
4444
+ }
34214445 }
3422
- spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4446
+
34234447
34244448 /* The groups are aggregated into a single kernel doorbell request */
34254449 if (!bitmap_empty(csg_bitmap, num_groups)) {
....@@ -3428,22 +4452,43 @@
34284452 u32 db_slots = (u32)csg_bitmap[0];
34294453
34304454 kbase_csf_ring_csg_slots_doorbell(kbdev, db_slots);
4455
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
34314456
34324457 if (wait_csg_slots_handshake_ack(kbdev,
34334458 CSG_REQ_STATUS_UPDATE_MASK, csg_bitmap, wt)) {
4459
+ const int csg_nr = ffs(csg_bitmap[0]) - 1;
4460
+ struct kbase_queue_group *group =
4461
+ scheduler->csg_slots[csg_nr].resident_group;
4462
+
34344463 dev_warn(
34354464 kbdev->dev,
3436
- "Timeout on CSG_REQ:STATUS_UPDATE, treat groups as not idle: slot mask=0x%lx",
4465
+ "[%llu] Timeout (%d ms) on CSG_REQ:STATUS_UPDATE, treat groups as not idle: slot mask=0x%lx",
4466
+ kbase_backend_get_cycle_cnt(kbdev),
4467
+ kbdev->csf.fw_timeout_ms,
34374468 csg_bitmap[0]);
4469
+ schedule_actions_trigger_df(kbdev, group->kctx,
4470
+ DF_CSG_STATUS_UPDATE_TIMEOUT);
34384471
34394472 /* Store the bitmap of timed out slots */
34404473 bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups);
34414474 csg_bitmap[0] = ~csg_bitmap[0] & db_slots;
4475
+
4476
+ /* Mask off any failed bit position contributed from active ones, as the
4477
+ * intention is to retain the failed bit pattern contains only those from
4478
+ * idle flags reporting back to the caller. This way, any failed to update
4479
+ * original idle flag would be kept as 'idle' (an informed guess, as the
4480
+ * update did not come to a conclusive result). So will be the failed
4481
+ * active ones be treated as still 'non-idle'. This is for a graceful
4482
+ * handling to the unexpected timeout condition.
4483
+ */
4484
+ failed_csg_bitmap[0] &= ~active_chk;
4485
+
34424486 } else {
3443
- KBASE_KTRACE_ADD(kbdev, SLOTS_STATUS_UPDATE_ACK, NULL,
3444
- db_slots);
4487
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_UPDATE_IDLE_SLOTS_ACK, NULL, db_slots);
34454488 csg_bitmap[0] = db_slots;
34464489 }
4490
+ } else {
4491
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
34474492 }
34484493 }
34494494
....@@ -3451,6 +4496,8 @@
34514496 * scheduler_handle_idle_slots() - Update the idle status of queue groups
34524497 * resident on CSG slots for which the
34534498 * IDLE notification was received previously.
4499
+ *
4500
+ * @kbdev: Pointer to the GPU device.
34544501 *
34554502 * This function is called at the start of scheduling tick/tock to reconfirm
34564503 * the idle status of queue groups resident on CSG slots for
....@@ -3465,8 +4512,6 @@
34654512 * updated accordingly.
34664513 * The bits corresponding to slots for which the status update request timedout
34674514 * remain set in scheduler->csg_slots_idle_mask.
3468
- *
3469
- * @kbdev: Pointer to the GPU device.
34704515 */
34714516 static void scheduler_handle_idle_slots(struct kbase_device *kbdev)
34724517 {
....@@ -3498,17 +4543,21 @@
34984543
34994544 if (group_on_slot_is_idle(kbdev, i)) {
35004545 group->run_state = KBASE_CSF_GROUP_IDLE;
4546
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_IDLE, group, group->run_state);
35014547 set_bit(i, scheduler->csg_slots_idle_mask);
35024548 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET,
35034549 group, scheduler->csg_slots_idle_mask[0]);
3504
- } else
4550
+ } else {
35054551 group->run_state = KBASE_CSF_GROUP_RUNNABLE;
4552
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
4553
+ group->run_state);
4554
+ }
35064555 }
35074556
35084557 bitmap_or(scheduler->csg_slots_idle_mask,
35094558 scheduler->csg_slots_idle_mask,
35104559 failed_csg_bitmap, num_groups);
3511
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, NULL,
4560
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_HANDLE_IDLE_SLOTS, NULL,
35124561 scheduler->csg_slots_idle_mask[0]);
35134562 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
35144563 }
....@@ -3520,8 +4569,7 @@
35204569
35214570 list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule,
35224571 link_to_schedule) {
3523
-
3524
- WARN_ON(!queue_group_idle_locked(group));
4572
+ WARN_ON(!can_schedule_idle_group(group));
35254573
35264574 if (!scheduler->ngrp_to_schedule) {
35274575 /* keep the top csg's origin */
....@@ -3575,39 +4623,109 @@
35754623 return NULL;
35764624 }
35774625
4626
+/**
4627
+ * suspend_active_groups_on_powerdown() - Suspend active CSG groups upon
4628
+ * suspend or GPU IDLE.
4629
+ *
4630
+ * @kbdev: Pointer to the device
4631
+ * @system_suspend: Flag to indicate it's for system suspend.
4632
+ *
4633
+ * This function will suspend all active CSG groups upon either
4634
+ * system suspend, runtime suspend or GPU IDLE.
4635
+ *
4636
+ * Return: 0 on success, -1 otherwise.
4637
+ */
35784638 static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
3579
- bool is_suspend)
4639
+ bool system_suspend)
35804640 {
35814641 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
35824642 DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 };
35834643
35844644 int ret = suspend_active_queue_groups(kbdev, slot_mask);
35854645
3586
- if (ret) {
3587
- /* The suspend of CSGs failed, trigger the GPU reset and wait
3588
- * for it to complete to be in a deterministic state.
4646
+ if (unlikely(ret)) {
4647
+ const int csg_nr = ffs(slot_mask[0]) - 1;
4648
+ struct kbase_queue_group *group =
4649
+ scheduler->csg_slots[csg_nr].resident_group;
4650
+ enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
4651
+
4652
+ /* The suspend of CSGs failed,
4653
+ * trigger the GPU reset to be in a deterministic state.
35894654 */
3590
- dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n",
4655
+ dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n",
4656
+ kbase_backend_get_cycle_cnt(kbdev),
4657
+ kbdev->csf.fw_timeout_ms,
35914658 kbdev->csf.global_iface.group_num, slot_mask);
4659
+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
4660
+ error_type = DF_PING_REQUEST_TIMEOUT;
4661
+ schedule_actions_trigger_df(kbdev, group->kctx, error_type);
35924662
35934663 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
35944664 kbase_reset_gpu(kbdev);
35954665
3596
- if (is_suspend) {
3597
- mutex_unlock(&scheduler->lock);
3598
- kbase_reset_gpu_wait(kbdev);
3599
- mutex_lock(&scheduler->lock);
3600
- }
36014666 return -1;
36024667 }
36034668
36044669 /* Check if the groups became active whilst the suspend was ongoing,
36054670 * but only for the case where the system suspend is not in progress
36064671 */
3607
- if (!is_suspend && atomic_read(&scheduler->non_idle_offslot_grps))
4672
+ if (!system_suspend && atomic_read(&scheduler->non_idle_offslot_grps))
36084673 return -1;
36094674
36104675 return 0;
4676
+}
4677
+
4678
+/**
4679
+ * all_on_slot_groups_remained_idle - Live check for all groups' idleness
4680
+ *
4681
+ * @kbdev: Pointer to the device.
4682
+ *
4683
+ * Returns false if any of the queues inside any of the groups that have been
4684
+ * assigned a physical CSG slot have work to execute, or have executed work
4685
+ * since having received a GPU idle notification. This function is used to
4686
+ * handle a rance condition between firmware reporting GPU idle and userspace
4687
+ * submitting more work by directly ringing a doorbell.
4688
+ *
4689
+ * Return: false if any queue inside any resident group has work to be processed
4690
+ * or has processed work since GPU idle event, true otherwise.
4691
+ */
4692
+static bool all_on_slot_groups_remained_idle(struct kbase_device *kbdev)
4693
+{
4694
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4695
+ /* All CSGs have the same number of CSs */
4696
+ size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num;
4697
+ size_t i;
4698
+
4699
+ lockdep_assert_held(&scheduler->lock);
4700
+ lockdep_assert_held(&scheduler->interrupt_lock);
4701
+
4702
+ for_each_set_bit(i, scheduler->csg_slots_idle_mask,
4703
+ kbdev->csf.global_iface.group_num) {
4704
+ struct kbase_queue_group *const group =
4705
+ scheduler->csg_slots[i].resident_group;
4706
+ size_t j;
4707
+
4708
+ for (j = 0; j < max_streams; ++j) {
4709
+ struct kbase_queue const *const queue =
4710
+ group->bound_queues[j];
4711
+ u64 const *output_addr;
4712
+ u64 cur_extract_ofs;
4713
+
4714
+ if (!queue || !queue->user_io_addr)
4715
+ continue;
4716
+
4717
+ output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
4718
+ cur_extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)];
4719
+ if (cur_extract_ofs != queue->extract_ofs) {
4720
+ /* More work has been executed since the idle
4721
+ * notification.
4722
+ */
4723
+ return false;
4724
+ }
4725
+ }
4726
+ }
4727
+
4728
+ return true;
36114729 }
36124730
36134731 static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
....@@ -3618,25 +4736,106 @@
36184736
36194737 lockdep_assert_held(&scheduler->lock);
36204738
3621
- if (scheduler->state == SCHED_SUSPENDED)
4739
+ if ((scheduler->state == SCHED_SUSPENDED) ||
4740
+ (scheduler->state == SCHED_SLEEPING))
36224741 return false;
36234742
36244743 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
4744
+ spin_lock(&scheduler->interrupt_lock);
4745
+
4746
+ if (scheduler->fast_gpu_idle_handling) {
4747
+ scheduler->fast_gpu_idle_handling = false;
4748
+
4749
+ if (scheduler->total_runnable_grps) {
4750
+ suspend = !atomic_read(&scheduler->non_idle_offslot_grps) &&
4751
+ kbase_pm_idle_groups_sched_suspendable(kbdev);
4752
+ } else
4753
+ suspend = kbase_pm_no_runnables_sched_suspendable(kbdev);
4754
+ spin_unlock(&scheduler->interrupt_lock);
4755
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
4756
+
4757
+ return suspend;
4758
+ }
4759
+
36254760 if (scheduler->total_runnable_grps) {
3626
- spin_lock(&scheduler->interrupt_lock);
36274761
36284762 /* Check both on-slots and off-slots groups idle status */
36294763 suspend = kbase_csf_scheduler_all_csgs_idle(kbdev) &&
36304764 !atomic_read(&scheduler->non_idle_offslot_grps) &&
36314765 kbase_pm_idle_groups_sched_suspendable(kbdev);
3632
-
3633
- spin_unlock(&scheduler->interrupt_lock);
36344766 } else
36354767 suspend = kbase_pm_no_runnables_sched_suspendable(kbdev);
36364768
4769
+ /* Confirm that all groups are actually idle before proceeding with
4770
+ * suspension as groups might potentially become active again without
4771
+ * informing the scheduler in case userspace rings a doorbell directly.
4772
+ */
4773
+ if (suspend && (unlikely(atomic_read(&scheduler->gpu_no_longer_idle)) ||
4774
+ unlikely(!all_on_slot_groups_remained_idle(kbdev))))
4775
+ suspend = false;
4776
+
4777
+ spin_unlock(&scheduler->interrupt_lock);
36374778 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
36384779
36394780 return suspend;
4781
+}
4782
+
4783
+#ifdef KBASE_PM_RUNTIME
4784
+/**
4785
+ * scheduler_sleep_on_idle - Put the Scheduler in sleeping state on GPU
4786
+ * becoming idle.
4787
+ *
4788
+ * @kbdev: Pointer to the device.
4789
+ *
4790
+ * This function is called on GPU idle notification to trigger the transition of
4791
+ * GPU to sleep state, where MCU firmware pauses execution and L2 cache is
4792
+ * turned off. Scheduler's state is changed to sleeping and all the active queue
4793
+ * groups remain on the CSG slots.
4794
+ */
4795
+static void scheduler_sleep_on_idle(struct kbase_device *kbdev)
4796
+{
4797
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4798
+
4799
+ lockdep_assert_held(&scheduler->lock);
4800
+
4801
+ dev_dbg(kbdev->dev,
4802
+ "Scheduler to be put to sleep on GPU becoming idle");
4803
+ cancel_tick_timer(kbdev);
4804
+ scheduler_pm_idle_before_sleep(kbdev);
4805
+ scheduler->state = SCHED_SLEEPING;
4806
+ KBASE_KTRACE_ADD(kbdev, SCHED_SLEEPING, NULL, scheduler->state);
4807
+}
4808
+#endif
4809
+
4810
+/**
4811
+ * scheduler_suspend_on_idle - Put the Scheduler in suspended state on GPU
4812
+ * becoming idle.
4813
+ *
4814
+ * @kbdev: Pointer to the device.
4815
+ *
4816
+ * This function is called on GPU idle notification to trigger the power down of
4817
+ * GPU. Scheduler's state is changed to suspended and all the active queue
4818
+ * groups are suspended before halting the MCU firmware.
4819
+ *
4820
+ * Return: true if scheduler will be suspended or false if suspend is aborted.
4821
+ */
4822
+static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
4823
+{
4824
+ int ret = suspend_active_groups_on_powerdown(kbdev, false);
4825
+
4826
+ if (ret) {
4827
+ dev_dbg(kbdev->dev, "Aborting suspend scheduler (grps: %d)",
4828
+ atomic_read(
4829
+ &kbdev->csf.scheduler.non_idle_offslot_grps));
4830
+ /* Bring forward the next tick */
4831
+ kbase_csf_scheduler_tick_advance(kbdev);
4832
+ return false;
4833
+ }
4834
+
4835
+ dev_dbg(kbdev->dev, "Scheduler to be suspended on GPU becoming idle");
4836
+ scheduler_suspend(kbdev);
4837
+ cancel_tick_timer(kbdev);
4838
+ return true;
36404839 }
36414840
36424841 static void gpu_idle_worker(struct work_struct *work)
....@@ -3644,53 +4843,58 @@
36444843 struct kbase_device *kbdev = container_of(
36454844 work, struct kbase_device, csf.scheduler.gpu_idle_work);
36464845 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
3647
- bool reset_active = false;
36484846 bool scheduler_is_idle_suspendable = false;
36494847 bool all_groups_suspended = false;
36504848
3651
- KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_BEGIN, NULL, 0u);
4849
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_START, NULL, 0u);
36524850
36534851 #define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \
36544852 (((u32)reset) | (((u32)idle) << 4) | (((u32)all_suspend) << 8))
36554853
36564854 if (kbase_reset_gpu_try_prevent(kbdev)) {
36574855 dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n");
3658
- KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL,
4856
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL,
36594857 __ENCODE_KTRACE_INFO(true, false, false));
36604858 return;
36614859 }
4860
+ kbase_debug_csf_fault_wait_completion(kbdev);
36624861 mutex_lock(&scheduler->lock);
36634862
3664
- /* Cycle completed, disable the firmware idle timer */
3665
- disable_gpu_idle_fw_timer(kbdev);
3666
- scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev);
3667
- reset_active = kbase_reset_gpu_is_active(kbdev);
3668
- if (scheduler_is_idle_suspendable && !reset_active) {
3669
- all_groups_suspended =
3670
- !suspend_active_groups_on_powerdown(kbdev, false);
4863
+#if IS_ENABLED(CONFIG_DEBUG_FS)
4864
+ if (unlikely(scheduler->state == SCHED_BUSY)) {
4865
+ mutex_unlock(&scheduler->lock);
4866
+ kbase_reset_gpu_allow(kbdev);
4867
+ return;
4868
+ }
4869
+#endif
36714870
3672
- if (all_groups_suspended) {
3673
- dev_dbg(kbdev->dev, "Scheduler becomes idle suspended now");
3674
- scheduler_suspend(kbdev);
3675
- cancel_tick_timer(kbdev);
3676
- } else {
3677
- dev_dbg(kbdev->dev, "Aborting suspend scheduler (grps: %d)",
3678
- atomic_read(&scheduler->non_idle_offslot_grps));
3679
- /* Bring forward the next tick */
3680
- kbase_csf_scheduler_advance_tick(kbdev);
3681
- }
4871
+ scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev);
4872
+ if (scheduler_is_idle_suspendable) {
4873
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_START, NULL,
4874
+ kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
4875
+#ifdef KBASE_PM_RUNTIME
4876
+ if (kbase_pm_gpu_sleep_allowed(kbdev) &&
4877
+ kbase_csf_scheduler_get_nr_active_csgs(kbdev))
4878
+ scheduler_sleep_on_idle(kbdev);
4879
+ else
4880
+#endif
4881
+ all_groups_suspended = scheduler_suspend_on_idle(kbdev);
4882
+
4883
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_END, NULL, 0u);
36824884 }
36834885
36844886 mutex_unlock(&scheduler->lock);
36854887 kbase_reset_gpu_allow(kbdev);
3686
- KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL,
3687
- __ENCODE_KTRACE_INFO(reset_active, scheduler_is_idle_suspendable, all_groups_suspended));
4888
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL,
4889
+ __ENCODE_KTRACE_INFO(false, scheduler_is_idle_suspendable,
4890
+ all_groups_suspended));
36884891 #undef __ENCODE_KTRACE_INFO
36894892 }
36904893
36914894 static int scheduler_prepare(struct kbase_device *kbdev)
36924895 {
36934896 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4897
+ unsigned long flags;
36944898 int i;
36954899
36964900 lockdep_assert_held(&scheduler->lock);
....@@ -3716,6 +4920,9 @@
37164920 scheduler->num_csg_slots_for_tick = 0;
37174921 bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS);
37184922
4923
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4924
+ scheduler->tick_protm_pending_seq =
4925
+ KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
37194926 /* Scan out to run groups */
37204927 for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) {
37214928 struct kbase_context *kctx;
....@@ -3723,6 +4930,7 @@
37234930 list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link)
37244931 scheduler_ctx_scan_groups(kbdev, kctx, i);
37254932 }
4933
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
37264934
37274935 /* Update this tick's non-idle groups */
37284936 scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule;
....@@ -3734,14 +4942,13 @@
37344942 */
37354943 atomic_set(&scheduler->non_idle_offslot_grps,
37364944 scheduler->non_idle_scanout_grps);
3737
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, NULL,
4945
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, NULL,
37384946 scheduler->non_idle_scanout_grps);
37394947
37404948 /* Adds those idle but runnable groups to the scanout list */
37414949 scheduler_scan_idle_groups(kbdev);
37424950
3743
- /* After adding the idle CSGs, the two counts should be the same */
3744
- WARN_ON(scheduler->csg_scan_count_for_tick != scheduler->ngrp_to_schedule);
4951
+ WARN_ON(scheduler->csg_scan_count_for_tick < scheduler->ngrp_to_schedule);
37454952
37464953 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
37474954 scheduler->num_active_address_spaces |
....@@ -3752,58 +4959,318 @@
37524959 return 0;
37534960 }
37544961
3755
-static void scheduler_handle_idle_timer_onoff(struct kbase_device *kbdev)
4962
+/**
4963
+ * keep_lru_on_slots() - Check the condition for LRU is met.
4964
+ *
4965
+ * @kbdev: Pointer to the device.
4966
+ *
4967
+ * This function tries to maintain the Last-Recent-Use case on slots, when
4968
+ * the scheduler has no non-idle off-slot CSGs for a replacement
4969
+ * consideration. This effectively extends the previous scheduling results
4970
+ * for the new one. That is, the last recent used CSGs are retained on slots
4971
+ * for the new tick/tock action.
4972
+ *
4973
+ * Return: true for avoiding on-slot CSGs changes (i.e. keep existing LRU),
4974
+ * otherwise false.
4975
+ */
4976
+static bool keep_lru_on_slots(struct kbase_device *kbdev)
37564977 {
37574978 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4979
+ bool keep_lru = false;
4980
+ int on_slots = bitmap_weight(scheduler->csg_inuse_bitmap,
4981
+ kbdev->csf.global_iface.group_num);
37584982
37594983 lockdep_assert_held(&scheduler->lock);
37604984
3761
- /* After the scheduler apply operation, the internal variable
3762
- * scheduler->non_idle_offslot_grps reflects the end-point view
3763
- * of the count at the end of the active phase.
3764
- *
3765
- * Any changes that follow (after the scheduler has dropped the
3766
- * scheduler->lock), reflects async operations to the scheduler,
3767
- * such as a group gets killed (evicted) or a new group inserted,
3768
- * cqs wait-sync triggered state transtion etc.
3769
- *
3770
- * The condition for enable the idle timer is that there is no
3771
- * non-idle groups off-slots. If there is non-idle group off-slot,
3772
- * the timer should be disabled.
3773
- */
3774
- if (atomic_read(&scheduler->non_idle_offslot_grps))
3775
- disable_gpu_idle_fw_timer(kbdev);
3776
- else
3777
- enable_gpu_idle_fw_timer(kbdev);
4985
+ if (on_slots && !atomic_read(&scheduler->non_idle_offslot_grps)) {
4986
+ unsigned long flags;
4987
+
4988
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4989
+ /* All on-slots are idle, no non-idle off-slot CSGs available
4990
+ * for considering a meaningful change. Set keep_lru.
4991
+ */
4992
+ keep_lru = kbase_csf_scheduler_all_csgs_idle(kbdev);
4993
+
4994
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4995
+
4996
+ dev_dbg(kbdev->dev, "Keep_LRU: %d, CSGs on-slots: %d\n",
4997
+ keep_lru, on_slots);
4998
+ }
4999
+
5000
+ return keep_lru;
37785001 }
37795002
3780
-static void schedule_actions(struct kbase_device *kbdev)
5003
+/**
5004
+ * prepare_fast_local_tock() - making preparation arrangement for exercizing
5005
+ * a fast local tock inside scheduling-actions.
5006
+ *
5007
+ * @kbdev: Pointer to the GPU device.
5008
+ *
5009
+ * The function assumes that a scheduling action of firing a fast local tock
5010
+ * call (i.e. an equivalent tock action without dropping the lock) is desired
5011
+ * if there are idle onslot CSGs. The function updates those affected CSGs'
5012
+ * run-state as a preparation. This should only be called from inside the
5013
+ * schedule_actions(), where the previous idle-flags are still considered to
5014
+ * be reflective, following its earlier idle confirmation operational call,
5015
+ * plus some potential newly idle CSGs in the scheduling action committing
5016
+ * steps.
5017
+ *
5018
+ * Return: number of on-slots CSGs that can be considered for replacing.
5019
+ */
5020
+static int prepare_fast_local_tock(struct kbase_device *kbdev)
5021
+{
5022
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5023
+ u32 num_groups = kbdev->csf.global_iface.group_num;
5024
+ unsigned long flags, i;
5025
+ DECLARE_BITMAP(csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 };
5026
+
5027
+ lockdep_assert_held(&scheduler->lock);
5028
+
5029
+ spin_lock_irqsave(&scheduler->interrupt_lock, flags);
5030
+ bitmap_copy(csg_bitmap, scheduler->csg_slots_idle_mask, num_groups);
5031
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5032
+
5033
+ /* Marking the flagged idle CSGs' run state to IDLE, so
5034
+ * the intended fast local tock can replacing them with off-slots
5035
+ * non-idle CSGs.
5036
+ */
5037
+ for_each_set_bit(i, csg_bitmap, num_groups) {
5038
+ struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
5039
+ struct kbase_queue_group *group = csg_slot->resident_group;
5040
+
5041
+ if (!queue_group_idle_locked(group)) {
5042
+ group->run_state = KBASE_CSF_GROUP_IDLE;
5043
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_IDLE, group, group->run_state);
5044
+ }
5045
+ }
5046
+
5047
+ /* Return the number of idle slots for potential replacement */
5048
+ return bitmap_weight(csg_bitmap, num_groups);
5049
+}
5050
+
5051
+static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slot_mask,
5052
+ unsigned int timeout_ms)
5053
+{
5054
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5055
+ long remaining = kbase_csf_timeout_in_jiffies(timeout_ms);
5056
+ u32 num_groups = kbdev->csf.global_iface.group_num;
5057
+ int err = 0;
5058
+ DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS);
5059
+
5060
+ lockdep_assert_held(&scheduler->lock);
5061
+
5062
+ bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS);
5063
+
5064
+ while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS) && remaining) {
5065
+ DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
5066
+
5067
+ bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS);
5068
+
5069
+ remaining = wait_event_timeout(
5070
+ kbdev->csf.event_wait,
5071
+ slots_state_changed(kbdev, changed, csg_slot_stopped_locked), remaining);
5072
+
5073
+ if (likely(remaining)) {
5074
+ u32 i;
5075
+
5076
+ for_each_set_bit(i, changed, num_groups) {
5077
+ struct kbase_queue_group *group;
5078
+
5079
+ if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i)))
5080
+ continue;
5081
+
5082
+ /* The on slot csg is now stopped */
5083
+ clear_bit(i, slot_mask_local);
5084
+
5085
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
5086
+ kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
5087
+
5088
+ group = scheduler->csg_slots[i].resident_group;
5089
+ if (likely(group)) {
5090
+ /* Only do save/cleanup if the
5091
+ * group is not terminated during
5092
+ * the sleep.
5093
+ */
5094
+ save_csg_slot(group);
5095
+ if (cleanup_csg_slot(group))
5096
+ sched_evict_group(group, true, true);
5097
+ }
5098
+ }
5099
+ } else {
5100
+ dev_warn(
5101
+ kbdev->dev,
5102
+ "[%llu] Suspend request sent on CSG slots 0x%lx timed out for slots 0x%lx",
5103
+ kbase_backend_get_cycle_cnt(kbdev), slot_mask[0],
5104
+ slot_mask_local[0]);
5105
+ /* Return the bitmask of the timed out slots to the caller */
5106
+ bitmap_copy(slot_mask, slot_mask_local, MAX_SUPPORTED_CSGS);
5107
+
5108
+ err = -ETIMEDOUT;
5109
+ }
5110
+ }
5111
+
5112
+ return err;
5113
+}
5114
+
5115
+/**
5116
+ * evict_lru_or_blocked_csg() - Evict the least-recently-used idle or blocked CSG
5117
+ *
5118
+ * @kbdev: Pointer to the device
5119
+ *
5120
+ * Used to allow for speedier starting/resumption of another CSG. The worst-case
5121
+ * scenario of the evicted CSG being scheduled next is expected to be rare.
5122
+ * Also, the eviction will not be applied if the GPU is running in protected mode.
5123
+ * Otherwise the the eviction attempt would force the MCU to quit the execution of
5124
+ * the protected mode, and likely re-request to enter it again.
5125
+ */
5126
+static void evict_lru_or_blocked_csg(struct kbase_device *kbdev)
5127
+{
5128
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5129
+ size_t i;
5130
+ struct kbase_queue_group *lru_idle_group = NULL;
5131
+ const u32 total_csg_slots = kbdev->csf.global_iface.group_num;
5132
+ const bool all_addr_spaces_used = (scheduler->num_active_address_spaces >=
5133
+ (kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS));
5134
+ u8 as_usage[BASE_MAX_NR_AS] = { 0 };
5135
+
5136
+ lockdep_assert_held(&scheduler->lock);
5137
+ if (kbase_csf_scheduler_protected_mode_in_use(kbdev))
5138
+ return;
5139
+
5140
+ BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(int) * BITS_PER_BYTE));
5141
+ if (fls(scheduler->csg_inuse_bitmap[0]) != total_csg_slots)
5142
+ return; /* Some CSG slots remain unused */
5143
+
5144
+ if (all_addr_spaces_used) {
5145
+ for (i = 0; i != total_csg_slots; ++i) {
5146
+ if (scheduler->csg_slots[i].resident_group != NULL) {
5147
+ if (WARN_ON(scheduler->csg_slots[i].resident_group->kctx->as_nr <
5148
+ 0))
5149
+ continue;
5150
+
5151
+ as_usage[scheduler->csg_slots[i].resident_group->kctx->as_nr]++;
5152
+ }
5153
+ }
5154
+ }
5155
+
5156
+ for (i = 0; i != total_csg_slots; ++i) {
5157
+ struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group;
5158
+
5159
+ /* We expect that by this point all groups would normally be
5160
+ * assigned a physical CSG slot, but if circumstances have
5161
+ * changed then bail out of this optimisation.
5162
+ */
5163
+ if (group == NULL)
5164
+ return;
5165
+
5166
+ /* Real-time priority CSGs must be kept on-slot even when
5167
+ * idle.
5168
+ */
5169
+ if ((group->run_state == KBASE_CSF_GROUP_IDLE) &&
5170
+ (group->priority != BASE_QUEUE_GROUP_PRIORITY_REALTIME) &&
5171
+ ((lru_idle_group == NULL) ||
5172
+ (lru_idle_group->prepared_seq_num < group->prepared_seq_num))) {
5173
+ if (WARN_ON(group->kctx->as_nr < 0))
5174
+ continue;
5175
+
5176
+ /* If all address spaces are used, we need to ensure the group does not
5177
+ * share the AS with other active CSGs. Or CSG would be freed without AS
5178
+ * and this optimization would not work.
5179
+ */
5180
+ if ((!all_addr_spaces_used) || (as_usage[group->kctx->as_nr] == 1))
5181
+ lru_idle_group = group;
5182
+ }
5183
+ }
5184
+
5185
+ if (lru_idle_group != NULL) {
5186
+ unsigned long slot_mask = 1 << lru_idle_group->csg_nr;
5187
+
5188
+ dev_dbg(kbdev->dev, "Suspending LRU idle group %d of context %d_%d on slot %d",
5189
+ lru_idle_group->handle, lru_idle_group->kctx->tgid,
5190
+ lru_idle_group->kctx->id, lru_idle_group->csg_nr);
5191
+ suspend_queue_group(lru_idle_group);
5192
+ if (wait_csg_slots_suspend(kbdev, &slot_mask, kbdev->csf.fw_timeout_ms)) {
5193
+ enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
5194
+
5195
+ dev_warn(
5196
+ kbdev->dev,
5197
+ "[%llu] LRU idle group %d of context %d_%d failed to suspend on slot %d (timeout %d ms)",
5198
+ kbase_backend_get_cycle_cnt(kbdev), lru_idle_group->handle,
5199
+ lru_idle_group->kctx->tgid, lru_idle_group->kctx->id,
5200
+ lru_idle_group->csg_nr, kbdev->csf.fw_timeout_ms);
5201
+ if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
5202
+ error_type = DF_PING_REQUEST_TIMEOUT;
5203
+ schedule_actions_trigger_df(kbdev, lru_idle_group->kctx, error_type);
5204
+ }
5205
+ }
5206
+}
5207
+
5208
+static void schedule_actions(struct kbase_device *kbdev, bool is_tick)
37815209 {
37825210 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
37835211 unsigned long flags;
37845212 struct kbase_queue_group *protm_grp;
37855213 int ret;
5214
+ bool skip_scheduling_actions;
37865215 bool skip_idle_slots_update;
37875216 bool new_protm_top_grp = false;
5217
+ int local_tock_slots = 0;
37885218
37895219 kbase_reset_gpu_assert_prevented(kbdev);
37905220 lockdep_assert_held(&scheduler->lock);
37915221
3792
- ret = kbase_pm_wait_for_desired_state(kbdev);
5222
+ ret = kbase_csf_scheduler_wait_mcu_active(kbdev);
37935223 if (ret) {
3794
- dev_err(kbdev->dev, "Wait for MCU power on failed");
5224
+ dev_err(kbdev->dev,
5225
+ "Wait for MCU power on failed on scheduling tick/tock");
37955226 return;
37965227 }
37975228
37985229 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
37995230 skip_idle_slots_update = kbase_csf_scheduler_protected_mode_in_use(kbdev);
5231
+ skip_scheduling_actions =
5232
+ !skip_idle_slots_update && kbdev->protected_mode;
38005233 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
38015234
3802
- /* Skip updating on-slot idle CSGs if GPU is in protected mode. */
3803
- if (!skip_idle_slots_update)
5235
+ /* Skip scheduling actions as GPU reset hasn't been performed yet to
5236
+ * rectify the anomaly that happened when pmode exit interrupt wasn't
5237
+ * received before the termination of group running in pmode.
5238
+ */
5239
+ if (unlikely(skip_scheduling_actions)) {
5240
+ dev_info(kbdev->dev,
5241
+ "Scheduling actions skipped due to anomaly in pmode");
5242
+ return;
5243
+ }
5244
+
5245
+ if (!skip_idle_slots_update) {
5246
+ /* Updating on-slot idle CSGs when not in protected mode. */
38045247 scheduler_handle_idle_slots(kbdev);
38055248
5249
+ /* Determine whether the condition is met for keeping the
5250
+ * Last-Recent-Use. If true, skipping the remaining action
5251
+ * steps and thus extending the previous tick's arrangement,
5252
+ * in particular, no alterations to on-slot CSGs.
5253
+ */
5254
+ if (keep_lru_on_slots(kbdev))
5255
+ return;
5256
+ }
5257
+
5258
+ if (is_tick)
5259
+ scheduler_rotate(kbdev);
5260
+
5261
+redo_local_tock:
38065262 scheduler_prepare(kbdev);
5263
+ /* Need to specifically enqueue the GPU idle work if there are no groups
5264
+ * to schedule despite the runnable groups. This scenario will happen
5265
+ * if System suspend is done when all groups are idle and and no work
5266
+ * is submitted for the groups after the System resume.
5267
+ */
5268
+ if (unlikely(!scheduler->ngrp_to_schedule &&
5269
+ scheduler->total_runnable_grps)) {
5270
+ dev_dbg(kbdev->dev, "No groups to schedule in the tick");
5271
+ enqueue_gpu_idle_work(scheduler);
5272
+ return;
5273
+ }
38075274 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
38085275 protm_grp = scheduler->active_protm_grp;
38095276
....@@ -3818,12 +5285,13 @@
38185285 * queue jobs.
38195286 */
38205287 if (protm_grp && scheduler->top_grp == protm_grp) {
3821
- int new_val;
38225288 dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d",
38235289 protm_grp->handle);
3824
- new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps);
3825
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
3826
- protm_grp, new_val);
5290
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5291
+
5292
+ update_offslot_non_idle_cnt_for_onslot_grp(protm_grp);
5293
+ remove_scheduled_group(kbdev, protm_grp);
5294
+ scheduler_check_pmode_progress(kbdev);
38275295 } else if (scheduler->top_grp) {
38285296 if (protm_grp)
38295297 dev_dbg(kbdev->dev, "Scheduler drop protm exec: group-%d",
....@@ -3848,11 +5316,6 @@
38485316
38495317 scheduler_apply(kbdev);
38505318
3851
- /* Post-apply, all the committed groups in this tick are on
3852
- * slots, time to arrange the idle timer on/off decision.
3853
- */
3854
- scheduler_handle_idle_timer_onoff(kbdev);
3855
-
38565319 /* Scheduler is dropping the exec of the previous protm_grp,
38575320 * Until the protm quit completes, the GPU is effectively
38585321 * locked in the secure mode.
....@@ -3866,46 +5329,119 @@
38665329 if (new_protm_top_grp) {
38675330 scheduler_group_check_protm_enter(kbdev,
38685331 scheduler->top_grp);
3869
- }
5332
+ } else if (!local_tock_slots &&
5333
+ atomic_read(&scheduler->non_idle_offslot_grps)) {
5334
+ /* If during the scheduling action, we have off-slot
5335
+ * non-idle CSGs in waiting, if it happens to have
5336
+ * some new idle slots emerging during the committed
5337
+ * action steps, trigger a one-off fast local tock.
5338
+ */
5339
+ local_tock_slots = prepare_fast_local_tock(kbdev);
38705340
3871
- return;
5341
+ if (local_tock_slots) {
5342
+ dev_dbg(kbdev->dev,
5343
+ "In-cycle %d idle slots available\n",
5344
+ local_tock_slots);
5345
+ goto redo_local_tock;
5346
+ }
5347
+ }
5348
+ } else {
5349
+ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
38725350 }
38735351
3874
- spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
3875
- return;
5352
+ evict_lru_or_blocked_csg(kbdev);
5353
+}
5354
+
5355
+/**
5356
+ * can_skip_scheduling() - Check if the scheduling actions can be skipped.
5357
+ *
5358
+ * @kbdev: Pointer to the device
5359
+ *
5360
+ * This function is called on a scheduling tick or tock to determine if the
5361
+ * scheduling actions can be skipped.
5362
+ * If Scheduler is in sleeping state and exit from the sleep state is allowed
5363
+ * then activation of MCU will be triggered. The tick or tock work item could
5364
+ * have been in flight when the state of Scheduler was changed to sleeping.
5365
+ *
5366
+ * Return: true if the scheduling actions can be skipped.
5367
+ */
5368
+static bool can_skip_scheduling(struct kbase_device *kbdev)
5369
+{
5370
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5371
+
5372
+ lockdep_assert_held(&scheduler->lock);
5373
+
5374
+ if (unlikely(!kbase_reset_gpu_is_not_pending(kbdev)))
5375
+ return true;
5376
+
5377
+ if (scheduler->state == SCHED_SUSPENDED)
5378
+ return true;
5379
+
5380
+#ifdef KBASE_PM_RUNTIME
5381
+ if (scheduler->state == SCHED_SLEEPING) {
5382
+ unsigned long flags;
5383
+
5384
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
5385
+ if (kbdev->pm.backend.exit_gpu_sleep_mode) {
5386
+ int ret = scheduler_pm_active_after_sleep(kbdev, &flags);
5387
+
5388
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
5389
+ if (!ret) {
5390
+ scheduler->state = SCHED_INACTIVE;
5391
+ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
5392
+ return false;
5393
+ }
5394
+
5395
+ dev_info(kbdev->dev,
5396
+ "Skip scheduling due to system suspend");
5397
+ return true;
5398
+ }
5399
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
5400
+ return true;
5401
+ }
5402
+#endif
5403
+
5404
+ return false;
38765405 }
38775406
38785407 static void schedule_on_tock(struct work_struct *work)
38795408 {
3880
- struct kbase_device *kbdev = container_of(work, struct kbase_device,
3881
- csf.scheduler.tock_work.work);
5409
+ struct kbase_device *kbdev =
5410
+ container_of(work, struct kbase_device, csf.scheduler.tock_work.work);
38825411 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5412
+ int err;
38835413
3884
- int err = kbase_reset_gpu_try_prevent(kbdev);
5414
+ err = kbase_reset_gpu_try_prevent(kbdev);
38855415 /* Regardless of whether reset failed or is currently happening, exit
38865416 * early
38875417 */
38885418 if (err)
38895419 return;
38905420
5421
+ kbase_debug_csf_fault_wait_completion(kbdev);
38915422 mutex_lock(&scheduler->lock);
3892
- if (scheduler->state == SCHED_SUSPENDED)
5423
+ if (can_skip_scheduling(kbdev))
5424
+ {
5425
+ atomic_set(&scheduler->pending_tock_work, false);
38935426 goto exit_no_schedule_unlock;
5427
+ }
38945428
38955429 WARN_ON(!(scheduler->state == SCHED_INACTIVE));
38965430 scheduler->state = SCHED_BUSY;
5431
+ KBASE_KTRACE_ADD(kbdev, SCHED_BUSY, NULL, scheduler->state);
38975432
38985433 /* Undertaking schedule action steps */
3899
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK, NULL, 0u);
3900
- schedule_actions(kbdev);
5434
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_START, NULL, 0u);
5435
+ while (atomic_cmpxchg(&scheduler->pending_tock_work, true, false) == true)
5436
+ schedule_actions(kbdev, false);
39015437
3902
- /* Record time information */
5438
+ /* Record time information on a non-skipped tock */
39035439 scheduler->last_schedule = jiffies;
39045440
3905
- /* Tock is serviced */
3906
- scheduler->tock_pending_request = false;
3907
-
39085441 scheduler->state = SCHED_INACTIVE;
5442
+ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
5443
+ if (!scheduler->total_runnable_grps)
5444
+ enqueue_gpu_idle_work(scheduler);
39095445 mutex_unlock(&scheduler->lock);
39105446 kbase_reset_gpu_allow(kbdev);
39115447
....@@ -3922,8 +5458,8 @@
39225458
39235459 static void schedule_on_tick(struct work_struct *work)
39245460 {
3925
- struct kbase_device *kbdev = container_of(work, struct kbase_device,
3926
- csf.scheduler.tick_work);
5461
+ struct kbase_device *kbdev =
5462
+ container_of(work, struct kbase_device, csf.scheduler.tick_work);
39275463 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
39285464
39295465 int err = kbase_reset_gpu_try_prevent(kbdev);
....@@ -3933,20 +5469,19 @@
39335469 if (err)
39345470 return;
39355471
5472
+ kbase_debug_csf_fault_wait_completion(kbdev);
39365473 mutex_lock(&scheduler->lock);
39375474
39385475 WARN_ON(scheduler->tick_timer_active);
3939
- if (scheduler->state == SCHED_SUSPENDED)
5476
+ if (can_skip_scheduling(kbdev))
39405477 goto exit_no_schedule_unlock;
39415478
39425479 scheduler->state = SCHED_BUSY;
3943
- /* Do scheduling stuff */
3944
- scheduler_rotate(kbdev);
5480
+ KBASE_KTRACE_ADD(kbdev, SCHED_BUSY, NULL, scheduler->state);
39455481
39465482 /* Undertaking schedule action steps */
3947
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK, NULL,
3948
- scheduler->total_runnable_grps);
3949
- schedule_actions(kbdev);
5483
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_START, NULL, scheduler->total_runnable_grps);
5484
+ schedule_actions(kbdev, true);
39505485
39515486 /* Record time information */
39525487 scheduler->last_schedule = jiffies;
....@@ -3958,10 +5493,13 @@
39585493 dev_dbg(kbdev->dev,
39595494 "scheduling for next tick, num_runnable_groups:%u\n",
39605495 scheduler->total_runnable_grps);
5496
+ } else if (!scheduler->total_runnable_grps) {
5497
+ enqueue_gpu_idle_work(scheduler);
39615498 }
39625499
39635500 scheduler->state = SCHED_INACTIVE;
39645501 mutex_unlock(&scheduler->lock);
5502
+ KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
39655503 kbase_reset_gpu_allow(kbdev);
39665504
39675505 dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes.");
....@@ -3973,64 +5511,6 @@
39735511 exit_no_schedule_unlock:
39745512 mutex_unlock(&scheduler->lock);
39755513 kbase_reset_gpu_allow(kbdev);
3976
-}
3977
-
3978
-static int wait_csg_slots_suspend(struct kbase_device *kbdev,
3979
- const unsigned long *slot_mask,
3980
- unsigned int timeout_ms)
3981
-{
3982
- struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
3983
- long remaining = kbase_csf_timeout_in_jiffies(timeout_ms);
3984
- u32 num_groups = kbdev->csf.global_iface.group_num;
3985
- int err = 0;
3986
- DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS);
3987
-
3988
- lockdep_assert_held(&scheduler->lock);
3989
-
3990
- bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS);
3991
-
3992
- while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS)
3993
- && remaining) {
3994
- DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
3995
-
3996
- bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS);
3997
-
3998
- remaining = wait_event_timeout(kbdev->csf.event_wait,
3999
- slots_state_changed(kbdev, changed,
4000
- csg_slot_stopped_locked),
4001
- remaining);
4002
-
4003
- if (remaining) {
4004
- u32 i;
4005
-
4006
- for_each_set_bit(i, changed, num_groups) {
4007
- struct kbase_queue_group *group;
4008
-
4009
- if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i)))
4010
- continue;
4011
-
4012
- /* The on slot csg is now stopped */
4013
- clear_bit(i, slot_mask_local);
4014
-
4015
- group = scheduler->csg_slots[i].resident_group;
4016
- if (likely(group)) {
4017
- /* Only do save/cleanup if the
4018
- * group is not terminated during
4019
- * the sleep.
4020
- */
4021
- save_csg_slot(group);
4022
- if (cleanup_csg_slot(group))
4023
- sched_evict_group(group, true, true);
4024
- }
4025
- }
4026
- } else {
4027
- dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend, slot_mask: 0x%*pb\n",
4028
- num_groups, slot_mask_local);
4029
- err = -ETIMEDOUT;
4030
- }
4031
- }
4032
-
4033
- return err;
40345514 }
40355515
40365516 static int suspend_active_queue_groups(struct kbase_device *kbdev,
....@@ -4069,7 +5549,7 @@
40695549 ret = suspend_active_queue_groups(kbdev, slot_mask);
40705550
40715551 if (ret) {
4072
- dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n",
5552
+ dev_warn(kbdev->dev, "Timeout waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n",
40735553 kbdev->csf.global_iface.group_num, slot_mask);
40745554 }
40755555
....@@ -4083,12 +5563,16 @@
40835563 * due to the extra context ref-count, which prevents the
40845564 * L2 powering down cache clean operation in the non racing
40855565 * case.
5566
+ * LSC is being flushed together to cover buslogging usecase,
5567
+ * where GPU reset is done regularly to avoid the log buffer
5568
+ * overflow.
40865569 */
4087
- kbase_gpu_start_cache_clean(kbdev);
5570
+ kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
40885571 ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev,
40895572 kbdev->reset_timeout_ms);
40905573 if (ret2) {
4091
- dev_warn(kbdev->dev, "Timed out waiting for cache clean to complete before reset");
5574
+ dev_warn(kbdev->dev, "[%llu] Timeout waiting for cache clean to complete before reset",
5575
+ kbase_backend_get_cycle_cnt(kbdev));
40925576 if (!ret)
40935577 ret = ret2;
40945578 }
....@@ -4103,7 +5587,7 @@
41035587 * groups when reset is done during
41045588 * protected mode execution.
41055589 *
4106
- * @group: Pointer to the device.
5590
+ * @kbdev: Pointer to the device.
41075591 *
41085592 * This function is called at the time of GPU reset, before the suspension of
41095593 * queue groups, to handle the case when the reset is getting performed whilst
....@@ -4125,7 +5609,8 @@
41255609 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
41265610 u32 const num_groups = kbdev->csf.global_iface.group_num;
41275611 struct kbase_queue_group *protm_grp;
4128
- bool suspend_on_slot_groups;
5612
+ bool suspend_on_slot_groups = true;
5613
+ bool pmode_active;
41295614 unsigned long flags;
41305615 u32 csg_nr;
41315616
....@@ -4133,20 +5618,51 @@
41335618
41345619 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
41355620 protm_grp = scheduler->active_protm_grp;
5621
+ pmode_active = kbdev->protected_mode;
41365622
4137
- /* If GPU wasn't in protected mode or had exited it before the GPU reset
4138
- * then all the on-slot groups can be suspended in the regular way by
4139
- * sending CSG SUSPEND requests to FW.
4140
- * If there wasn't a fault for protected mode group, then it would
4141
- * also need to be suspended in the regular way before the reset.
4142
- */
4143
- suspend_on_slot_groups = !(protm_grp && protm_grp->faulted);
5623
+ if (likely(!protm_grp && !pmode_active)) {
5624
+ /* Case 1: GPU is not in protected mode or it successfully
5625
+ * exited protected mode. All on-slot groups can be suspended in
5626
+ * the regular way before reset.
5627
+ */
5628
+ suspend_on_slot_groups = true;
5629
+ } else if (protm_grp && pmode_active) {
5630
+ /* Case 2: GPU went successfully into protected mode and hasn't
5631
+ * exited from it yet and the protected mode group is still
5632
+ * active. If there was no fault for the protected mode group
5633
+ * then it can be suspended in the regular way before reset.
5634
+ * The other normal mode on-slot groups were already implicitly
5635
+ * suspended on entry to protected mode so they can be marked as
5636
+ * suspended right away.
5637
+ */
5638
+ suspend_on_slot_groups = !protm_grp->faulted;
5639
+ } else if (!protm_grp && pmode_active) {
5640
+ /* Case 3: GPU went successfully into protected mode and hasn't
5641
+ * exited from it yet but the protected mode group got deleted.
5642
+ * This would have happened if the FW got stuck during protected
5643
+ * mode for some reason (like GPU page fault or some internal
5644
+ * error). In normal cases FW is expected to send the pmode exit
5645
+ * interrupt before it handles the CSG termination request.
5646
+ * The other normal mode on-slot groups would already have been
5647
+ * implicitly suspended on entry to protected mode so they can be
5648
+ * marked as suspended right away.
5649
+ */
5650
+ suspend_on_slot_groups = false;
5651
+ } else if (protm_grp && !pmode_active) {
5652
+ /* Case 4: GPU couldn't successfully enter protected mode, i.e.
5653
+ * PROTM_ENTER request had timed out.
5654
+ * All the on-slot groups need to be suspended in the regular
5655
+ * way before reset.
5656
+ */
5657
+ suspend_on_slot_groups = true;
5658
+ }
5659
+
41445660 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
41455661
4146
- if (!protm_grp)
5662
+ if (likely(!pmode_active))
41475663 goto unlock;
41485664
4149
- /* GPU is in protected mode, so all the on-slot groups barring the
5665
+ /* GPU hasn't exited protected mode, so all the on-slot groups barring
41505666 * the protected mode group can be marked as suspended right away.
41515667 */
41525668 for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
....@@ -4159,19 +5675,30 @@
41595675
41605676 cleanup_csg_slot(group);
41615677 group->run_state = KBASE_CSF_GROUP_SUSPENDED;
5678
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED, group, group->run_state);
41625679
41635680 /* Simply treat the normal mode groups as non-idle. The tick
41645681 * scheduled after the reset will re-initialize the counter
41655682 * anyways.
41665683 */
41675684 new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps);
4168
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
4169
- group, new_val);
5685
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val);
41705686 }
41715687
41725688 unlock:
41735689 mutex_unlock(&scheduler->lock);
41745690 return suspend_on_slot_groups;
5691
+}
5692
+
5693
+static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler)
5694
+{
5695
+ cancel_work_sync(&scheduler->tick_work);
5696
+}
5697
+
5698
+static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler)
5699
+{
5700
+ atomic_set(&scheduler->pending_tock_work, false);
5701
+ cancel_delayed_work_sync(&scheduler->tock_work);
41755702 }
41765703
41775704 static void scheduler_inner_reset(struct kbase_device *kbdev)
....@@ -4180,13 +5707,13 @@
41805707 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
41815708 unsigned long flags;
41825709
4183
- WARN_ON(csgs_active(kbdev));
5710
+ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
41845711
41855712 /* Cancel any potential queued delayed work(s) */
41865713 cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work);
41875714 cancel_tick_timer(kbdev);
4188
- cancel_work_sync(&scheduler->tick_work);
4189
- cancel_delayed_work_sync(&scheduler->tock_work);
5715
+ cancel_tick_work(scheduler);
5716
+ cancel_tock_work(scheduler);
41905717 cancel_delayed_work_sync(&scheduler->ping_work);
41915718
41925719 mutex_lock(&scheduler->lock);
....@@ -4194,8 +5721,8 @@
41945721 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
41955722 bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS);
41965723 if (scheduler->active_protm_grp)
4197
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM,
4198
- scheduler->active_protm_grp, 0u);
5724
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT, scheduler->active_protm_grp,
5725
+ 0u);
41995726 scheduler->active_protm_grp = NULL;
42005727 memset(kbdev->csf.scheduler.csg_slots, 0,
42015728 num_groups * sizeof(struct kbase_csf_csg_slot));
....@@ -4218,7 +5745,9 @@
42185745
42195746 WARN_ON(!kbase_reset_gpu_is_active(kbdev));
42205747
4221
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET, NULL, 0u);
5748
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_START, NULL, 0u);
5749
+
5750
+ kbase_debug_csf_fault_wait_completion(kbdev);
42225751
42235752 if (scheduler_handle_reset_in_protected_mode(kbdev) &&
42245753 !suspend_active_queue_groups_on_reset(kbdev)) {
....@@ -4256,6 +5785,8 @@
42565785
42575786 mutex_unlock(&kbdev->kctx_list_lock);
42585787
5788
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_END, NULL, 0u);
5789
+
42595790 /* After queue groups reset, the scheduler data fields clear out */
42605791 scheduler_inner_reset(kbdev);
42615792 }
....@@ -4292,10 +5823,11 @@
42925823 }
42935824 #endif
42945825
4295
- if (kbdev->csf.scheduler.state == SCHED_SUSPENDED)
5826
+ if (kbdev->csf.scheduler.state == SCHED_SUSPENDED ||
5827
+ kbdev->csf.scheduler.state == SCHED_SLEEPING)
42965828 goto exit;
42975829
4298
- if (get_nr_active_csgs(kbdev) != 1)
5830
+ if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) != 1)
42995831 goto exit;
43005832
43015833 if (kbase_csf_scheduler_protected_mode_in_use(kbdev))
....@@ -4307,9 +5839,9 @@
43075839 goto exit;
43085840 }
43095841
4310
- kbase_pm_wait_for_desired_state(kbdev);
5842
+ kbase_csf_scheduler_wait_mcu_active(kbdev);
43115843
4312
- err = kbase_csf_firmware_ping_wait(kbdev);
5844
+ err = kbase_csf_firmware_ping_wait(kbdev, kbdev->csf.fw_timeout_ms);
43135845
43145846 if (err) {
43155847 /* It is acceptable to enqueue a reset whilst we've prevented
....@@ -4318,17 +5850,16 @@
43185850 if (kbase_prepare_to_reset_gpu(
43195851 kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
43205852 kbase_reset_gpu(kbdev);
4321
- } else if (get_nr_active_csgs(kbdev) == 1) {
4322
- queue_delayed_work(system_long_wq,
4323
- &kbdev->csf.scheduler.ping_work,
4324
- msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS));
5853
+ } else if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) == 1) {
5854
+ queue_delayed_work(
5855
+ system_long_wq, &kbdev->csf.scheduler.ping_work,
5856
+ msecs_to_jiffies(kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_PING_TIMEOUT)));
43255857 }
43265858
43275859 kbase_pm_context_idle(kbdev);
43285860 exit:
43295861 mutex_unlock(&kbdev->csf.scheduler.lock);
43305862 kbase_reset_gpu_allow(kbdev);
4331
- return;
43325863 }
43335864
43345865 int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
....@@ -4337,13 +5868,42 @@
43375868 struct kbase_context *const kctx = group->kctx;
43385869 struct kbase_device *const kbdev = kctx->kbdev;
43395870 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5871
+ bool on_slot;
43405872 int err = 0;
43415873
43425874 kbase_reset_gpu_assert_prevented(kbdev);
43435875 lockdep_assert_held(&kctx->csf.lock);
43445876 mutex_lock(&scheduler->lock);
43455877
4346
- if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) {
5878
+ on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
5879
+
5880
+#ifdef KBASE_PM_RUNTIME
5881
+ if (on_slot && (scheduler->state == SCHED_SLEEPING)) {
5882
+ if (wait_for_scheduler_to_exit_sleep(kbdev)) {
5883
+ dev_warn(
5884
+ kbdev->dev,
5885
+ "Wait for scheduler to exit sleep state timedout when copying suspend buffer for group %d of ctx %d_%d on slot %d",
5886
+ group->handle, group->kctx->tgid,
5887
+ group->kctx->id, group->csg_nr);
5888
+
5889
+ scheduler_wakeup(kbdev, true);
5890
+
5891
+ /* Wait for MCU firmware to start running */
5892
+ if (kbase_csf_scheduler_wait_mcu_active(kbdev))
5893
+ dev_warn(
5894
+ kbdev->dev,
5895
+ "Wait for MCU active failed when copying suspend buffer for group %d of ctx %d_%d on slot %d",
5896
+ group->handle, group->kctx->tgid,
5897
+ group->kctx->id, group->csg_nr);
5898
+ }
5899
+
5900
+ /* Check the group state again as scheduler lock would have been
5901
+ * released when waiting for the exit from SLEEPING state.
5902
+ */
5903
+ on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
5904
+ }
5905
+#endif
5906
+ if (on_slot) {
43475907 DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0};
43485908
43495909 set_bit(kbase_csf_scheduler_group_get_slot(group), slot_mask);
....@@ -4353,8 +5913,9 @@
43535913 err = wait_csg_slots_suspend(kbdev, slot_mask,
43545914 kbdev->csf.fw_timeout_ms);
43555915 if (err) {
4356
- dev_warn(kbdev->dev, "Timed out waiting for the group %d to suspend on slot %d",
4357
- group->handle, group->csg_nr);
5916
+ dev_warn(kbdev->dev, "[%llu] Timeout waiting for the group %d to suspend on slot %d",
5917
+ kbase_backend_get_cycle_cnt(kbdev),
5918
+ group->handle, group->csg_nr);
43585919 goto exit;
43595920 }
43605921 }
....@@ -4363,13 +5924,18 @@
43635924 unsigned int target_page_nr = 0, i = 0;
43645925 u64 offset = sus_buf->offset;
43655926 size_t to_copy = sus_buf->size;
5927
+ const u32 csg_suspend_buf_nr_pages =
5928
+ PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
43665929
43675930 if (scheduler->state != SCHED_SUSPENDED) {
43685931 /* Similar to the case of HW counters, need to flush
4369
- * the GPU cache before reading from the suspend buffer
5932
+ * the GPU L2 cache before reading from the suspend buffer
43705933 * pages as they are mapped and cached on GPU side.
5934
+ * Flushing LSC is not done here, since only the flush of
5935
+ * CSG suspend buffer contents is needed from the L2 cache.
43715936 */
4372
- kbase_gpu_start_cache_clean(kbdev);
5937
+ kbase_gpu_start_cache_clean(
5938
+ kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
43735939 kbase_gpu_wait_cache_clean(kbdev);
43745940 } else {
43755941 /* Make sure power down transitions have completed,
....@@ -4381,7 +5947,7 @@
43815947 kbase_pm_wait_for_desired_state(kbdev);
43825948 }
43835949
4384
- for (i = 0; i < PFN_UP(sus_buf->size) &&
5950
+ for (i = 0; i < csg_suspend_buf_nr_pages &&
43855951 target_page_nr < sus_buf->nr_pages; i++) {
43865952 struct page *pg =
43875953 as_page(group->normal_suspend_buf.phy[i]);
....@@ -4538,6 +6104,11 @@
45386104
45396105 mutex_lock(&scheduler->lock);
45406106
6107
+ if (group->run_state == KBASE_CSF_GROUP_IDLE) {
6108
+ group->run_state = KBASE_CSF_GROUP_RUNNABLE;
6109
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
6110
+ group->run_state);
6111
+ }
45416112 /* Check if the group is now eligible for execution in protected mode. */
45426113 if (scheduler_get_protm_enter_async_group(kbdev, group))
45436114 scheduler_group_check_protm_enter(kbdev, group);
....@@ -4547,20 +6118,22 @@
45476118 }
45486119
45496120 /**
4550
- * check_sync_update_for_idle_group_protm() - Check the sync wait condition
4551
- * for all the queues bound to
4552
- * the given group.
6121
+ * check_sync_update_for_on_slot_group() - Check the sync wait condition
6122
+ * for all the queues bound to
6123
+ * the given on-slot group.
45536124 *
4554
- * @group: Pointer to the group that requires evaluation.
6125
+ * @group: Pointer to the on-slot group that requires evaluation.
45556126 *
45566127 * This function is called if the GPU is in protected mode and there are on
4557
- * slot idle groups with higher priority than the active protected mode group.
6128
+ * slot idle groups with higher priority than the active protected mode group
6129
+ * or this function is called when CQS object is signaled whilst GPU is in
6130
+ * sleep state.
45586131 * This function will evaluate the sync condition, if any, of all the queues
45596132 * bound to the given group.
45606133 *
4561
- * Return true if the sync condition of at least one queue has been satisfied.
6134
+ * Return: true if the sync condition of at least one queue has been satisfied.
45626135 */
4563
-static bool check_sync_update_for_idle_group_protm(
6136
+static bool check_sync_update_for_on_slot_group(
45646137 struct kbase_queue_group *group)
45656138 {
45666139 struct kbase_device *const kbdev = group->kctx->kbdev;
....@@ -4583,7 +6156,7 @@
45836156 stream, CS_STATUS_WAIT);
45846157 unsigned long flags;
45856158
4586
- KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT,
6159
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS,
45876160 queue->group, queue, status);
45886161
45896162 if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status))
....@@ -4625,7 +6198,13 @@
46256198 scheduler->csg_slots_idle_mask[0]);
46266199 spin_unlock_irqrestore(
46276200 &scheduler->interrupt_lock, flags);
6201
+ /* Request the scheduler to confirm the condition inferred
6202
+ * here inside the protected mode.
6203
+ */
6204
+ group->reevaluate_idle_status = true;
46286205 group->run_state = KBASE_CSF_GROUP_RUNNABLE;
6206
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
6207
+ group->run_state);
46296208 }
46306209
46316210 KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u);
....@@ -4647,7 +6226,7 @@
46476226 * protected mode that has a higher priority than the active protected mode
46486227 * group.
46496228 *
4650
- * Return true if the sync condition of at least one queue in a group has been
6229
+ * Return: true if the sync condition of at least one queue in a group has been
46516230 * satisfied.
46526231 */
46536232 static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev)
....@@ -4680,12 +6259,34 @@
46806259 * has a higher priority than the protm group, then we
46816260 * need to exit protected mode.
46826261 */
4683
- if (check_sync_update_for_idle_group_protm(group))
6262
+ if (check_sync_update_for_on_slot_group(group))
46846263 exit_protm = true;
46856264 }
46866265 }
46876266
46886267 return exit_protm;
6268
+}
6269
+
6270
+static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev)
6271
+{
6272
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6273
+ u32 const num_groups = kbdev->csf.global_iface.group_num;
6274
+ u32 csg_nr;
6275
+
6276
+ lockdep_assert_held(&scheduler->lock);
6277
+
6278
+ for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
6279
+ struct kbase_queue_group *const group =
6280
+ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
6281
+
6282
+ if (!group)
6283
+ continue;
6284
+
6285
+ if (check_sync_update_for_on_slot_group(group)) {
6286
+ scheduler_wakeup(kbdev, true);
6287
+ return;
6288
+ }
6289
+ }
46896290 }
46906291
46916292 /**
....@@ -4709,16 +6310,27 @@
47096310 struct kbase_context, csf.sched.sync_update_work);
47106311 struct kbase_device *const kbdev = kctx->kbdev;
47116312 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
6313
+ bool sync_updated = false;
47126314
47136315 mutex_lock(&scheduler->lock);
47146316
4715
- KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_BEGIN, kctx, 0u);
6317
+#if IS_ENABLED(CONFIG_DEBUG_FS)
6318
+ if (unlikely(scheduler->state == SCHED_BUSY)) {
6319
+ queue_work(kctx->csf.sched.sync_update_wq,
6320
+ &kctx->csf.sched.sync_update_work);
6321
+ mutex_unlock(&scheduler->lock);
6322
+ return;
6323
+ }
6324
+#endif
6325
+
6326
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START, kctx, 0u);
47166327 if (kctx->csf.sched.num_idle_wait_grps != 0) {
47176328 struct kbase_queue_group *group, *temp;
47186329
47196330 list_for_each_entry_safe(group, temp,
47206331 &kctx->csf.sched.idle_wait_groups, link) {
47216332 if (group_sync_updated(group)) {
6333
+ sync_updated = true;
47226334 /* Move this group back in to the runnable
47236335 * groups list of the context.
47246336 */
....@@ -4730,9 +6342,18 @@
47306342 WARN_ON(!list_empty(&kctx->csf.sched.idle_wait_groups));
47316343 }
47326344
4733
- if (check_sync_update_for_idle_groups_protm(kbdev))
6345
+ if (check_sync_update_for_idle_groups_protm(kbdev)) {
47346346 scheduler_force_protm_exit(kbdev);
4735
- KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u);
6347
+ sync_updated = true;
6348
+ }
6349
+
6350
+ /* If scheduler is in sleep or suspended state, re-activate it
6351
+ * to serve on-slot CSGs blocked on CQS which has been signaled.
6352
+ */
6353
+ if (!sync_updated && (scheduler->state == SCHED_SLEEPING))
6354
+ check_sync_update_in_sleep_mode(kbdev);
6355
+
6356
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u);
47366357
47376358 mutex_unlock(&scheduler->lock);
47386359 }
....@@ -4742,7 +6363,8 @@
47426363 {
47436364 struct kbase_context *const kctx = param;
47446365
4745
- KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT, kctx, 0u);
6366
+ KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_GROUP_SYNC_UPDATE_EVENT, kctx, 0u);
6367
+
47466368 queue_work(kctx->csf.sched.sync_update_wq,
47476369 &kctx->csf.sched.sync_update_work);
47486370
....@@ -4753,6 +6375,8 @@
47536375 {
47546376 int priority;
47556377 int err;
6378
+
6379
+ kbase_ctx_sched_init_ctx(kctx);
47566380
47576381 for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
47586382 ++priority) {
....@@ -4770,20 +6394,29 @@
47706394 if (!kctx->csf.sched.sync_update_wq) {
47716395 dev_err(kctx->kbdev->dev,
47726396 "Failed to initialize scheduler context workqueue");
4773
- return -ENOMEM;
6397
+ err = -ENOMEM;
6398
+ goto alloc_wq_failed;
47746399 }
47756400
47766401 INIT_WORK(&kctx->csf.sched.sync_update_work,
47776402 check_group_sync_update_worker);
6403
+
6404
+ kbase_csf_tiler_heap_reclaim_ctx_init(kctx);
47786405
47796406 err = kbase_csf_event_wait_add(kctx, check_group_sync_update_cb, kctx);
47806407
47816408 if (err) {
47826409 dev_err(kctx->kbdev->dev,
47836410 "Failed to register a sync update callback");
4784
- destroy_workqueue(kctx->csf.sched.sync_update_wq);
6411
+ goto event_wait_add_failed;
47856412 }
47866413
6414
+ return err;
6415
+
6416
+event_wait_add_failed:
6417
+ destroy_workqueue(kctx->csf.sched.sync_update_wq);
6418
+alloc_wq_failed:
6419
+ kbase_ctx_sched_remove_ctx(kctx);
47876420 return err;
47886421 }
47896422
....@@ -4792,6 +6425,8 @@
47926425 kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx);
47936426 cancel_work_sync(&kctx->csf.sched.sync_update_work);
47946427 destroy_workqueue(kctx->csf.sched.sync_update_wq);
6428
+
6429
+ kbase_ctx_sched_remove_ctx(kctx);
47956430 }
47966431
47976432 int kbase_csf_scheduler_init(struct kbase_device *kbdev)
....@@ -4810,7 +6445,7 @@
48106445 return -ENOMEM;
48116446 }
48126447
4813
- return 0;
6448
+ return kbase_csf_mcu_shared_regs_data_init(kbdev);
48146449 }
48156450
48166451 int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
....@@ -4824,12 +6459,20 @@
48246459 dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n");
48256460 return -ENOMEM;
48266461 }
6462
+ scheduler->idle_wq = alloc_ordered_workqueue(
6463
+ "csf_scheduler_gpu_idle_wq", WQ_HIGHPRI);
6464
+ if (!scheduler->idle_wq) {
6465
+ dev_err(kbdev->dev,
6466
+ "Failed to allocate GPU idle scheduler workqueue\n");
6467
+ destroy_workqueue(kbdev->csf.scheduler.wq);
6468
+ return -ENOMEM;
6469
+ }
48276470
48286471 INIT_WORK(&scheduler->tick_work, schedule_on_tick);
48296472 INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock);
6473
+ atomic_set(&scheduler->pending_tock_work, false);
48306474
48316475 INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor);
4832
- BUILD_BUG_ON(CSF_FIRMWARE_TIMEOUT_MS >= FIRMWARE_PING_INTERVAL_MS);
48336476
48346477 mutex_init(&scheduler->lock);
48356478 spin_lock_init(&scheduler->interrupt_lock);
....@@ -4843,24 +6486,27 @@
48436486 (sizeof(scheduler->csgs_events_enable_mask) * BITS_PER_BYTE));
48446487 bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS);
48456488 scheduler->state = SCHED_SUSPENDED;
6489
+ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
48466490 scheduler->pm_active_count = 0;
48476491 scheduler->ngrp_to_schedule = 0;
48486492 scheduler->total_runnable_grps = 0;
48496493 scheduler->top_ctx = NULL;
48506494 scheduler->top_grp = NULL;
48516495 scheduler->last_schedule = 0;
4852
- scheduler->tock_pending_request = false;
48536496 scheduler->active_protm_grp = NULL;
4854
- scheduler->gpu_idle_fw_timer_enabled = false;
48556497 scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS;
48566498 scheduler_doorbell_init(kbdev);
48576499
48586500 INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker);
6501
+ scheduler->fast_gpu_idle_handling = false;
6502
+ atomic_set(&scheduler->gpu_no_longer_idle, false);
48596503 atomic_set(&scheduler->non_idle_offslot_grps, 0);
48606504
48616505 hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
48626506 scheduler->tick_timer.function = tick_timer_callback;
48636507 scheduler->tick_timer_active = false;
6508
+
6509
+ kbase_csf_tiler_heap_reclaim_mgr_init(kbdev);
48646510
48656511 return 0;
48666512 }
....@@ -4869,26 +6515,49 @@
48696515 {
48706516 if (kbdev->csf.scheduler.csg_slots) {
48716517 WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps));
4872
- WARN_ON(csgs_active(kbdev));
6518
+ /* The unload of Driver can take place only when all contexts have
6519
+ * been terminated. The groups that were not terminated by the User
6520
+ * are terminated on context termination. So no CSGs are expected
6521
+ * to be active at the time of Driver unload.
6522
+ */
6523
+ WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
48736524 flush_work(&kbdev->csf.scheduler.gpu_idle_work);
48746525 mutex_lock(&kbdev->csf.scheduler.lock);
4875
- if (WARN_ON(kbdev->csf.scheduler.state != SCHED_SUSPENDED))
6526
+
6527
+ if (kbdev->csf.scheduler.state != SCHED_SUSPENDED) {
6528
+ unsigned long flags;
6529
+ /* The power policy could prevent the Scheduler from
6530
+ * getting suspended when GPU becomes idle.
6531
+ */
6532
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
6533
+ WARN_ON(kbase_pm_idle_groups_sched_suspendable(kbdev));
6534
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
48766535 scheduler_suspend(kbdev);
6536
+ }
6537
+
48776538 mutex_unlock(&kbdev->csf.scheduler.lock);
48786539 cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work);
48796540 cancel_tick_timer(kbdev);
4880
- cancel_work_sync(&kbdev->csf.scheduler.tick_work);
4881
- cancel_delayed_work_sync(&kbdev->csf.scheduler.tock_work);
4882
- mutex_destroy(&kbdev->csf.scheduler.lock);
6541
+ cancel_tick_work(&kbdev->csf.scheduler);
6542
+ cancel_tock_work(&kbdev->csf.scheduler);
48836543 kfree(kbdev->csf.scheduler.csg_slots);
48846544 kbdev->csf.scheduler.csg_slots = NULL;
48856545 }
6546
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_TERMINATED, NULL,
6547
+ kbase_csf_scheduler_get_nr_active_csgs(kbdev));
6548
+ /* Terminating the MCU shared regions, following the release of slots */
6549
+ kbase_csf_mcu_shared_regs_data_term(kbdev);
48866550 }
48876551
48886552 void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
48896553 {
6554
+ if (kbdev->csf.scheduler.idle_wq)
6555
+ destroy_workqueue(kbdev->csf.scheduler.idle_wq);
48906556 if (kbdev->csf.scheduler.wq)
48916557 destroy_workqueue(kbdev->csf.scheduler.wq);
6558
+
6559
+ kbase_csf_tiler_heap_reclaim_mgr_term(kbdev);
6560
+ mutex_destroy(&kbdev->csf.scheduler.lock);
48926561 }
48936562
48946563 /**
....@@ -4911,13 +6580,14 @@
49116580 return;
49126581
49136582 WARN_ON((scheduler->state != SCHED_INACTIVE) &&
4914
- (scheduler->state != SCHED_SUSPENDED));
6583
+ (scheduler->state != SCHED_SUSPENDED) &&
6584
+ (scheduler->state != SCHED_SLEEPING));
49156585
49166586 if (scheduler->total_runnable_grps > 0) {
49176587 enqueue_tick_work(kbdev);
49186588 dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n");
49196589 } else if (scheduler->state != SCHED_SUSPENDED) {
4920
- queue_work(system_wq, &scheduler->gpu_idle_work);
6590
+ enqueue_gpu_idle_work(scheduler);
49216591 }
49226592 }
49236593
....@@ -4952,18 +6622,22 @@
49526622 if (currently_enabled && !enable) {
49536623 scheduler->timer_enabled = false;
49546624 cancel_tick_timer(kbdev);
4955
- cancel_delayed_work(&scheduler->tock_work);
49566625 mutex_unlock(&scheduler->lock);
49576626 /* The non-sync version to cancel the normal work item is not
49586627 * available, so need to drop the lock before cancellation.
49596628 */
4960
- cancel_work_sync(&scheduler->tick_work);
4961
- } else if (!currently_enabled && enable) {
6629
+ cancel_tick_work(scheduler);
6630
+ cancel_tock_work(scheduler);
6631
+ return;
6632
+ }
6633
+
6634
+ if (!currently_enabled && enable) {
49626635 scheduler->timer_enabled = true;
49636636
49646637 scheduler_enable_tick_timer_nolock(kbdev);
4965
- mutex_unlock(&scheduler->lock);
49666638 }
6639
+
6640
+ mutex_unlock(&scheduler->lock);
49676641 }
49686642
49696643 void kbase_csf_scheduler_kick(struct kbase_device *kbdev)
....@@ -4984,80 +6658,232 @@
49846658 mutex_unlock(&scheduler->lock);
49856659 }
49866660
4987
-void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
6661
+int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev)
49886662 {
6663
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6664
+ int result = 0;
6665
+
6666
+ lockdep_assert_held(&scheduler->lock);
6667
+
6668
+#if IS_ENABLED(CONFIG_DEBUG_FS)
6669
+ if (unlikely(scheduler->state == SCHED_BUSY))
6670
+ return -EBUSY;
6671
+#endif
6672
+
6673
+#ifdef KBASE_PM_RUNTIME
6674
+ /* If scheduler is in sleeping state, then MCU needs to be activated
6675
+ * to suspend CSGs.
6676
+ */
6677
+ if (scheduler->state == SCHED_SLEEPING) {
6678
+ dev_info(kbdev->dev, "Activating MCU out of sleep on system suspend");
6679
+ result = force_scheduler_to_exit_sleep(kbdev);
6680
+ if (result) {
6681
+ dev_warn(kbdev->dev, "Scheduler failed to exit from sleep");
6682
+ goto exit;
6683
+ }
6684
+ }
6685
+#endif
6686
+ if (scheduler->state != SCHED_SUSPENDED) {
6687
+ result = suspend_active_groups_on_powerdown(kbdev, true);
6688
+ if (result) {
6689
+ dev_warn(kbdev->dev, "failed to suspend active groups");
6690
+ goto exit;
6691
+ } else {
6692
+ dev_info(kbdev->dev, "Scheduler PM suspend");
6693
+ scheduler_suspend(kbdev);
6694
+ cancel_tick_timer(kbdev);
6695
+ }
6696
+ }
6697
+
6698
+exit:
6699
+ return result;
6700
+}
6701
+
6702
+int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
6703
+{
6704
+ int result = 0;
49896705 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
49906706
49916707 /* Cancel any potential queued delayed work(s) */
4992
- cancel_work_sync(&scheduler->tick_work);
4993
- cancel_delayed_work_sync(&scheduler->tock_work);
6708
+ cancel_tick_work(scheduler);
6709
+ cancel_tock_work(scheduler);
49946710
4995
- if (kbase_reset_gpu_prevent_and_wait(kbdev)) {
4996
- dev_warn(kbdev->dev,
4997
- "Stop PM suspending for failing to prevent gpu reset.\n");
4998
- return;
6711
+ result = kbase_reset_gpu_prevent_and_wait(kbdev);
6712
+ if (result) {
6713
+ dev_warn(kbdev->dev, "Stop PM suspending for failing to prevent gpu reset.\n");
6714
+ return result;
49996715 }
50006716
50016717 mutex_lock(&scheduler->lock);
50026718
5003
- disable_gpu_idle_fw_timer(kbdev);
5004
-
5005
- if (scheduler->state != SCHED_SUSPENDED) {
5006
- suspend_active_groups_on_powerdown(kbdev, true);
5007
- dev_info(kbdev->dev, "Scheduler PM suspend");
5008
- scheduler_suspend(kbdev);
5009
- cancel_tick_timer(kbdev);
5010
- }
6719
+ result = kbase_csf_scheduler_pm_suspend_no_lock(kbdev);
50116720 mutex_unlock(&scheduler->lock);
50126721
50136722 kbase_reset_gpu_allow(kbdev);
6723
+
6724
+ return result;
50146725 }
50156726 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_suspend);
50166727
5017
-void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev)
6728
+void kbase_csf_scheduler_pm_resume_no_lock(struct kbase_device *kbdev)
50186729 {
50196730 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
50206731
5021
- mutex_lock(&scheduler->lock);
5022
-
5023
- if (scheduler->total_runnable_grps > 0) {
5024
- WARN_ON(scheduler->state != SCHED_SUSPENDED);
6732
+ lockdep_assert_held(&scheduler->lock);
6733
+ if ((scheduler->total_runnable_grps > 0) &&
6734
+ (scheduler->state == SCHED_SUSPENDED)) {
50256735 dev_info(kbdev->dev, "Scheduler PM resume");
50266736 scheduler_wakeup(kbdev, true);
50276737 }
5028
- mutex_unlock(&scheduler->lock);
6738
+}
6739
+
6740
+void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev)
6741
+{
6742
+ mutex_lock(&kbdev->csf.scheduler.lock);
6743
+
6744
+ kbase_csf_scheduler_pm_resume_no_lock(kbdev);
6745
+ mutex_unlock(&kbdev->csf.scheduler.lock);
50296746 }
50306747 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_resume);
50316748
50326749 void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev)
50336750 {
5034
- unsigned long flags;
5035
- u32 prev_count;
5036
-
5037
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
5038
- prev_count = kbdev->csf.scheduler.pm_active_count++;
5039
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
5040
-
5041
- /* On 0 => 1, make a pm_ctx_active request */
5042
- if (!prev_count)
5043
- kbase_pm_context_active(kbdev);
5044
- else
5045
- WARN_ON(prev_count == U32_MAX);
6751
+ /* Here the lock is taken to synchronize against the runtime suspend
6752
+ * callback function, which may need to wake up the MCU for suspending
6753
+ * the CSGs before powering down the GPU.
6754
+ */
6755
+ mutex_lock(&kbdev->csf.scheduler.lock);
6756
+ scheduler_pm_active_handle_suspend(kbdev,
6757
+ KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
6758
+ mutex_unlock(&kbdev->csf.scheduler.lock);
50466759 }
50476760 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active);
50486761
50496762 void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev)
50506763 {
5051
- unsigned long flags;
5052
- u32 prev_count;
5053
-
5054
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
5055
- prev_count = kbdev->csf.scheduler.pm_active_count--;
5056
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
5057
-
5058
- if (prev_count == 1)
5059
- kbase_pm_context_idle(kbdev);
5060
- else
5061
- WARN_ON(prev_count == 0);
6764
+ /* Here the lock is taken just to maintain symmetry with
6765
+ * kbase_csf_scheduler_pm_active().
6766
+ */
6767
+ mutex_lock(&kbdev->csf.scheduler.lock);
6768
+ scheduler_pm_idle(kbdev);
6769
+ mutex_unlock(&kbdev->csf.scheduler.lock);
50626770 }
50636771 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle);
6772
+
6773
+int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev)
6774
+{
6775
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
6776
+ unsigned long flags;
6777
+ int err;
6778
+
6779
+ kbase_pm_lock(kbdev);
6780
+ WARN_ON(!kbdev->pm.active_count);
6781
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
6782
+ WARN_ON(!scheduler->pm_active_count);
6783
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
6784
+ kbase_pm_unlock(kbdev);
6785
+
6786
+ kbase_pm_wait_for_poweroff_work_complete(kbdev);
6787
+
6788
+ err = kbase_pm_wait_for_desired_state(kbdev);
6789
+ if (!err) {
6790
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
6791
+ WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_ON);
6792
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
6793
+ }
6794
+
6795
+ return err;
6796
+}
6797
+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_wait_mcu_active);
6798
+
6799
+#ifdef KBASE_PM_RUNTIME
6800
+int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev)
6801
+{
6802
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6803
+ unsigned long flags;
6804
+ int ret;
6805
+
6806
+ dev_dbg(kbdev->dev, "Handling runtime suspend");
6807
+
6808
+ kbase_reset_gpu_assert_prevented(kbdev);
6809
+ lockdep_assert_held(&scheduler->lock);
6810
+ WARN_ON(scheduler->pm_active_count);
6811
+
6812
+ if (scheduler->state == SCHED_SUSPENDED) {
6813
+ WARN_ON(kbdev->pm.backend.gpu_sleep_mode_active);
6814
+ return 0;
6815
+ }
6816
+
6817
+ ret = suspend_active_groups_on_powerdown(kbdev, false);
6818
+
6819
+ if (ret) {
6820
+ dev_dbg(kbdev->dev, "Aborting runtime suspend (grps: %d)",
6821
+ atomic_read(&scheduler->non_idle_offslot_grps));
6822
+
6823
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
6824
+ kbdev->pm.backend.exit_gpu_sleep_mode = true;
6825
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
6826
+
6827
+ kbase_csf_scheduler_invoke_tick(kbdev);
6828
+ return ret;
6829
+ }
6830
+
6831
+ scheduler->state = SCHED_SUSPENDED;
6832
+ KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
6833
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
6834
+ kbdev->pm.backend.gpu_sleep_mode_active = false;
6835
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
6836
+
6837
+ wake_up_all(&kbdev->csf.event_wait);
6838
+ return 0;
6839
+}
6840
+
6841
+void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev)
6842
+{
6843
+ u32 csg_nr;
6844
+
6845
+ lockdep_assert_held(&kbdev->hwaccess_lock);
6846
+
6847
+ WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP);
6848
+
6849
+ for (csg_nr = 0; csg_nr < kbdev->csf.global_iface.group_num; csg_nr++) {
6850
+ struct kbase_csf_cmd_stream_group_info *ginfo =
6851
+ &kbdev->csf.global_iface.groups[csg_nr];
6852
+ bool csg_idle;
6853
+
6854
+ if (!kbdev->csf.scheduler.csg_slots[csg_nr].resident_group)
6855
+ continue;
6856
+
6857
+ csg_idle =
6858
+ kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
6859
+ CSG_STATUS_STATE_IDLE_MASK;
6860
+ if (!csg_idle) {
6861
+ dev_dbg(kbdev->dev,
6862
+ "Re-activate Scheduler after MCU sleep");
6863
+ kbdev->pm.backend.exit_gpu_sleep_mode = true;
6864
+ kbase_csf_scheduler_invoke_tick(kbdev);
6865
+ break;
6866
+ }
6867
+ }
6868
+}
6869
+
6870
+void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev)
6871
+{
6872
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
6873
+
6874
+ mutex_lock(&scheduler->lock);
6875
+ if (kbase_pm_gpu_sleep_allowed(kbdev) &&
6876
+ (scheduler->state == SCHED_INACTIVE))
6877
+ scheduler_sleep_on_idle(kbdev);
6878
+ mutex_unlock(&scheduler->lock);
6879
+}
6880
+#endif
6881
+
6882
+void kbase_csf_scheduler_force_wakeup(struct kbase_device *kbdev)
6883
+{
6884
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
6885
+
6886
+ mutex_lock(&scheduler->lock);
6887
+ scheduler_wakeup(kbdev, true);
6888
+ mutex_unlock(&scheduler->lock);
6889
+}