hc
2024-05-14 bedbef8ad3e75a304af6361af235302bcc61d06b
kernel/drivers/cpuidle/cpuidle.c
....@@ -22,13 +22,14 @@
2222 #include <linux/module.h>
2323 #include <linux/suspend.h>
2424 #include <linux/tick.h>
25
+#include <linux/mmu_context.h>
2526 #include <trace/events/power.h>
27
+#include <trace/hooks/cpuidle.h>
2628
2729 #include "cpuidle.h"
2830
2931 DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
3032 DEFINE_PER_CPU(struct cpuidle_device, cpuidle_dev);
31
-EXPORT_SYMBOL_GPL(cpuidle_dev);
3233
3334 DEFINE_MUTEX(cpuidle_lock);
3435 LIST_HEAD(cpuidle_detected_devices);
....@@ -76,44 +77,45 @@
7677
7778 static int find_deepest_state(struct cpuidle_driver *drv,
7879 struct cpuidle_device *dev,
79
- unsigned int max_latency,
80
+ u64 max_latency_ns,
8081 unsigned int forbidden_flags,
8182 bool s2idle)
8283 {
83
- unsigned int latency_req = 0;
84
+ u64 latency_req = 0;
8485 int i, ret = 0;
8586
8687 for (i = 1; i < drv->state_count; i++) {
8788 struct cpuidle_state *s = &drv->states[i];
88
- struct cpuidle_state_usage *su = &dev->states_usage[i];
8989
90
- if (s->disabled || su->disable || s->exit_latency <= latency_req
91
- || s->exit_latency > max_latency
92
- || (s->flags & forbidden_flags)
93
- || (s2idle && !s->enter_s2idle))
90
+ if (dev->states_usage[i].disable ||
91
+ s->exit_latency_ns <= latency_req ||
92
+ s->exit_latency_ns > max_latency_ns ||
93
+ (s->flags & forbidden_flags) ||
94
+ (s2idle && !s->enter_s2idle))
9495 continue;
9596
96
- latency_req = s->exit_latency;
97
+ latency_req = s->exit_latency_ns;
9798 ret = i;
9899 }
99100 return ret;
100101 }
101102
102103 /**
103
- * cpuidle_use_deepest_state - Set/clear governor override flag.
104
- * @enable: New value of the flag.
104
+ * cpuidle_use_deepest_state - Set/unset governor override mode.
105
+ * @latency_limit_ns: Idle state exit latency limit (or no override if 0).
105106 *
106
- * Set/unset the current CPU to use the deepest idle state (override governors
107
- * going forward if set).
107
+ * If @latency_limit_ns is nonzero, set the current CPU to use the deepest idle
108
+ * state with exit latency within @latency_limit_ns (override governors going
109
+ * forward), or do not override governors if it is zero.
108110 */
109
-void cpuidle_use_deepest_state(bool enable)
111
+void cpuidle_use_deepest_state(u64 latency_limit_ns)
110112 {
111113 struct cpuidle_device *dev;
112114
113115 preempt_disable();
114116 dev = cpuidle_get_device();
115117 if (dev)
116
- dev->use_deepest_state = enable;
118
+ dev->forced_idle_latency_limit_ns = latency_limit_ns;
117119 preempt_enable();
118120 }
119121
....@@ -121,11 +123,15 @@
121123 * cpuidle_find_deepest_state - Find the deepest available idle state.
122124 * @drv: cpuidle driver for the given CPU.
123125 * @dev: cpuidle device for the given CPU.
126
+ * @latency_limit_ns: Idle state exit latency limit
127
+ *
128
+ * Return: the index of the deepest available idle state.
124129 */
125130 int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
126
- struct cpuidle_device *dev)
131
+ struct cpuidle_device *dev,
132
+ u64 latency_limit_ns)
127133 {
128
- return find_deepest_state(drv, dev, UINT_MAX, 0, false);
134
+ return find_deepest_state(drv, dev, latency_limit_ns, 0, false);
129135 }
130136
131137 #ifdef CONFIG_SUSPEND
....@@ -133,30 +139,25 @@
133139 struct cpuidle_device *dev, int index)
134140 {
135141 ktime_t time_start, time_end;
142
+ struct cpuidle_state *target_state = &drv->states[index];
136143
137144 time_start = ns_to_ktime(local_clock());
138145
139
- /*
140
- * trace_suspend_resume() called by tick_freeze() for the last CPU
141
- * executing it contains RCU usage regarded as invalid in the idle
142
- * context, so tell RCU about that.
143
- */
144
- RCU_NONIDLE(tick_freeze());
146
+ tick_freeze();
145147 /*
146148 * The state used here cannot be a "coupled" one, because the "coupled"
147149 * cpuidle mechanism enables interrupts and doing that with timekeeping
148150 * suspended is generally unsafe.
149151 */
150152 stop_critical_timings();
151
- drv->states[index].enter_s2idle(dev, drv, index);
153
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
154
+ rcu_idle_enter();
155
+ target_state->enter_s2idle(dev, drv, index);
152156 if (WARN_ON_ONCE(!irqs_disabled()))
153157 local_irq_disable();
154
- /*
155
- * timekeeping_resume() that will be called by tick_unfreeze() for the
156
- * first CPU executing it calls functions containing RCU read-side
157
- * critical sections, so tell RCU about that.
158
- */
159
- RCU_NONIDLE(tick_unfreeze());
158
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
159
+ rcu_idle_exit();
160
+ tick_unfreeze();
160161 start_critical_timings();
161162
162163 time_end = ns_to_ktime(local_clock());
....@@ -182,10 +183,11 @@
182183 * that interrupts won't be enabled when it exits and allows the tick to
183184 * be frozen safely.
184185 */
185
- index = find_deepest_state(drv, dev, UINT_MAX, 0, true);
186
- if (index > 0)
186
+ index = find_deepest_state(drv, dev, U64_MAX, 0, true);
187
+ if (index > 0) {
187188 enter_s2idle_proper(drv, dev, index);
188
-
189
+ local_irq_enable();
190
+ }
189191 return index;
190192 }
191193 #endif /* CONFIG_SUSPEND */
....@@ -201,10 +203,20 @@
201203 {
202204 int entered_state;
203205
204
- struct cpuidle_state *target_state = &drv->states[index];
205
- bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
206
+ struct cpuidle_state *target_state;
207
+ bool broadcast;
206208 ktime_t time_start, time_end;
207
- s64 diff;
209
+
210
+ /*
211
+ * The vendor hook may modify index, which means target_state and
212
+ * broadcast must be assigned after the vendor hook.
213
+ */
214
+ trace_android_vh_cpu_idle_enter(&index, dev);
215
+ if (index < 0)
216
+ return index;
217
+
218
+ target_state = &drv->states[index];
219
+ broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
208220
209221 /*
210222 * Tell the time framework to switch to a broadcast timer because our
....@@ -212,7 +224,7 @@
212224 * CPU as a broadcast timer, this call may fail if it is not available.
213225 */
214226 if (broadcast && tick_broadcast_enter()) {
215
- index = find_deepest_state(drv, dev, target_state->exit_latency,
227
+ index = find_deepest_state(drv, dev, target_state->exit_latency_ns,
216228 CPUIDLE_FLAG_TIMER_STOP, false);
217229 if (index < 0) {
218230 default_idle_call();
....@@ -222,22 +234,30 @@
222234 broadcast = false;
223235 }
224236
225
- /* Take note of the planned idle state. */
226
- sched_idle_set_state(target_state, index);
237
+ if (target_state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
238
+ leave_mm(dev->cpu);
227239
228
- trace_cpu_idle_rcuidle(index, dev->cpu);
240
+ /* Take note of the planned idle state. */
241
+ sched_idle_set_state(target_state);
242
+
243
+ trace_cpu_idle(index, dev->cpu);
229244 time_start = ns_to_ktime(local_clock());
230245
231246 stop_critical_timings();
247
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
248
+ rcu_idle_enter();
232249 entered_state = target_state->enter(dev, drv, index);
250
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
251
+ rcu_idle_exit();
233252 start_critical_timings();
234253
235254 sched_clock_idle_wakeup_event();
236255 time_end = ns_to_ktime(local_clock());
237
- trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
256
+ trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu);
257
+ trace_android_vh_cpu_idle_exit(entered_state, dev);
238258
239259 /* The cpu is no longer idle or about to enter idle. */
240
- sched_idle_set_state(NULL, -1);
260
+ sched_idle_set_state(NULL);
241261
242262 if (broadcast) {
243263 if (WARN_ON_ONCE(!irqs_disabled()))
....@@ -249,21 +269,48 @@
249269 if (!cpuidle_state_is_coupled(drv, index))
250270 local_irq_enable();
251271
252
- diff = ktime_us_delta(time_end, time_start);
253
- if (diff > INT_MAX)
254
- diff = INT_MAX;
255
-
256
- dev->last_residency = (int) diff;
257
-
258272 if (entered_state >= 0) {
259
- /* Update cpuidle counters */
260
- /* This can be moved to within driver enter routine
273
+ s64 diff, delay = drv->states[entered_state].exit_latency_ns;
274
+ int i;
275
+
276
+ /*
277
+ * Update cpuidle counters
278
+ * This can be moved to within driver enter routine,
261279 * but that results in multiple copies of same code.
262280 */
263
- dev->states_usage[entered_state].time += dev->last_residency;
281
+ diff = ktime_sub(time_end, time_start);
282
+
283
+ dev->last_residency_ns = diff;
284
+ dev->states_usage[entered_state].time_ns += diff;
264285 dev->states_usage[entered_state].usage++;
286
+
287
+ if (diff < drv->states[entered_state].target_residency_ns) {
288
+ for (i = entered_state - 1; i >= 0; i--) {
289
+ if (dev->states_usage[i].disable)
290
+ continue;
291
+
292
+ /* Shallower states are enabled, so update. */
293
+ dev->states_usage[entered_state].above++;
294
+ break;
295
+ }
296
+ } else if (diff > delay) {
297
+ for (i = entered_state + 1; i < drv->state_count; i++) {
298
+ if (dev->states_usage[i].disable)
299
+ continue;
300
+
301
+ /*
302
+ * Update if a deeper state would have been a
303
+ * better match for the observed idle duration.
304
+ */
305
+ if (diff - delay >= drv->states[i].target_residency_ns)
306
+ dev->states_usage[entered_state].below++;
307
+
308
+ break;
309
+ }
310
+ }
265311 } else {
266
- dev->last_residency = 0;
312
+ dev->last_residency_ns = 0;
313
+ dev->states_usage[index].rejected++;
267314 }
268315
269316 return entered_state;
....@@ -301,9 +348,23 @@
301348 int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev,
302349 int index)
303350 {
351
+ int ret = 0;
352
+
353
+ /*
354
+ * Store the next hrtimer, which becomes either next tick or the next
355
+ * timer event, whatever expires first. Additionally, to make this data
356
+ * useful for consumers outside cpuidle, we rely on that the governor's
357
+ * ->select() callback have decided, whether to stop the tick or not.
358
+ */
359
+ WRITE_ONCE(dev->next_hrtimer, tick_nohz_get_next_hrtimer());
360
+
304361 if (cpuidle_state_is_coupled(drv, index))
305
- return cpuidle_enter_state_coupled(dev, drv, index);
306
- return cpuidle_enter_state(dev, drv, index);
362
+ ret = cpuidle_enter_state_coupled(dev, drv, index);
363
+ else
364
+ ret = cpuidle_enter_state(dev, drv, index);
365
+
366
+ WRITE_ONCE(dev->next_hrtimer, 0);
367
+ return ret;
307368 }
308369
309370 /**
....@@ -318,6 +379,37 @@
318379 {
319380 if (cpuidle_curr_governor->reflect && index >= 0)
320381 cpuidle_curr_governor->reflect(dev, index);
382
+}
383
+
384
+/**
385
+ * cpuidle_poll_time - return amount of time to poll for,
386
+ * governors can override dev->poll_limit_ns if necessary
387
+ *
388
+ * @drv: the cpuidle driver tied with the cpu
389
+ * @dev: the cpuidle device
390
+ *
391
+ */
392
+u64 cpuidle_poll_time(struct cpuidle_driver *drv,
393
+ struct cpuidle_device *dev)
394
+{
395
+ int i;
396
+ u64 limit_ns;
397
+
398
+ if (dev->poll_limit_ns)
399
+ return dev->poll_limit_ns;
400
+
401
+ limit_ns = TICK_NSEC;
402
+ for (i = 1; i < drv->state_count; i++) {
403
+ if (dev->states_usage[i].disable)
404
+ continue;
405
+
406
+ limit_ns = drv->states[i].target_residency_ns;
407
+ break;
408
+ }
409
+
410
+ dev->poll_limit_ns = limit_ns;
411
+
412
+ return dev->poll_limit_ns;
321413 }
322414
323415 /**
....@@ -339,7 +431,7 @@
339431 {
340432 if (enabled_devices) {
341433 initialized = 0;
342
- wake_up_all_idle_cpus();
434
+ wake_up_all_online_idle_cpus();
343435 }
344436
345437 /*
....@@ -483,7 +575,8 @@
483575 static void __cpuidle_device_init(struct cpuidle_device *dev)
484576 {
485577 memset(dev->states_usage, 0, sizeof(dev->states_usage));
486
- dev->last_residency = 0;
578
+ dev->last_residency_ns = 0;
579
+ dev->next_hrtimer = 0;
487580 }
488581
489582 /**
....@@ -495,11 +588,19 @@
495588 */
496589 static int __cpuidle_register_device(struct cpuidle_device *dev)
497590 {
498
- int ret;
499591 struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
592
+ int i, ret;
500593
501594 if (!try_module_get(drv->owner))
502595 return -EINVAL;
596
+
597
+ for (i = 0; i < drv->state_count; i++) {
598
+ if (drv->states[i].flags & CPUIDLE_FLAG_UNUSABLE)
599
+ dev->states_usage[i].disable |= CPUIDLE_STATE_DISABLED_BY_DRIVER;
600
+
601
+ if (drv->states[i].flags & CPUIDLE_FLAG_OFF)
602
+ dev->states_usage[i].disable |= CPUIDLE_STATE_DISABLED_BY_USER;
603
+ }
503604
504605 per_cpu(cpuidle_devices, dev->cpu) = dev;
505606 list_add(&dev->device_list, &cpuidle_detected_devices);
....@@ -654,54 +755,17 @@
654755 }
655756 EXPORT_SYMBOL_GPL(cpuidle_register);
656757
657
-#ifdef CONFIG_SMP
658
-
659
-/*
660
- * This function gets called when a part of the kernel has a new latency
661
- * requirement. This means we need to get all processors out of their C-state,
662
- * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
663
- * wakes them all right up.
664
- */
665
-static int cpuidle_latency_notify(struct notifier_block *b,
666
- unsigned long l, void *v)
667
-{
668
- wake_up_all_idle_cpus();
669
- return NOTIFY_OK;
670
-}
671
-
672
-static struct notifier_block cpuidle_latency_notifier = {
673
- .notifier_call = cpuidle_latency_notify,
674
-};
675
-
676
-static inline void latency_notifier_init(struct notifier_block *n)
677
-{
678
- pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n);
679
-}
680
-
681
-#else /* CONFIG_SMP */
682
-
683
-#define latency_notifier_init(x) do { } while (0)
684
-
685
-#endif /* CONFIG_SMP */
686
-
687758 /**
688759 * cpuidle_init - core initializer
689760 */
690761 static int __init cpuidle_init(void)
691762 {
692
- int ret;
693
-
694763 if (cpuidle_disabled())
695764 return -ENODEV;
696765
697
- ret = cpuidle_add_interface(cpu_subsys.dev_root);
698
- if (ret)
699
- return ret;
700
-
701
- latency_notifier_init(&cpuidle_latency_notifier);
702
-
703
- return 0;
766
+ return cpuidle_add_interface(cpu_subsys.dev_root);
704767 }
705768
706769 module_param(off, int, 0444);
770
+module_param_string(governor, param_governor, CPUIDLE_NAME_LEN, 0444);
707771 core_initcall(cpuidle_init);