.. | .. |
---|
22 | 22 | #include <linux/module.h> |
---|
23 | 23 | #include <linux/suspend.h> |
---|
24 | 24 | #include <linux/tick.h> |
---|
| 25 | +#include <linux/mmu_context.h> |
---|
25 | 26 | #include <trace/events/power.h> |
---|
| 27 | +#include <trace/hooks/cpuidle.h> |
---|
26 | 28 | |
---|
27 | 29 | #include "cpuidle.h" |
---|
28 | 30 | |
---|
29 | 31 | DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices); |
---|
30 | 32 | DEFINE_PER_CPU(struct cpuidle_device, cpuidle_dev); |
---|
31 | | -EXPORT_SYMBOL_GPL(cpuidle_dev); |
---|
32 | 33 | |
---|
33 | 34 | DEFINE_MUTEX(cpuidle_lock); |
---|
34 | 35 | LIST_HEAD(cpuidle_detected_devices); |
---|
.. | .. |
---|
76 | 77 | |
---|
77 | 78 | static int find_deepest_state(struct cpuidle_driver *drv, |
---|
78 | 79 | struct cpuidle_device *dev, |
---|
79 | | - unsigned int max_latency, |
---|
| 80 | + u64 max_latency_ns, |
---|
80 | 81 | unsigned int forbidden_flags, |
---|
81 | 82 | bool s2idle) |
---|
82 | 83 | { |
---|
83 | | - unsigned int latency_req = 0; |
---|
| 84 | + u64 latency_req = 0; |
---|
84 | 85 | int i, ret = 0; |
---|
85 | 86 | |
---|
86 | 87 | for (i = 1; i < drv->state_count; i++) { |
---|
87 | 88 | struct cpuidle_state *s = &drv->states[i]; |
---|
88 | | - struct cpuidle_state_usage *su = &dev->states_usage[i]; |
---|
89 | 89 | |
---|
90 | | - if (s->disabled || su->disable || s->exit_latency <= latency_req |
---|
91 | | - || s->exit_latency > max_latency |
---|
92 | | - || (s->flags & forbidden_flags) |
---|
93 | | - || (s2idle && !s->enter_s2idle)) |
---|
| 90 | + if (dev->states_usage[i].disable || |
---|
| 91 | + s->exit_latency_ns <= latency_req || |
---|
| 92 | + s->exit_latency_ns > max_latency_ns || |
---|
| 93 | + (s->flags & forbidden_flags) || |
---|
| 94 | + (s2idle && !s->enter_s2idle)) |
---|
94 | 95 | continue; |
---|
95 | 96 | |
---|
96 | | - latency_req = s->exit_latency; |
---|
| 97 | + latency_req = s->exit_latency_ns; |
---|
97 | 98 | ret = i; |
---|
98 | 99 | } |
---|
99 | 100 | return ret; |
---|
100 | 101 | } |
---|
101 | 102 | |
---|
102 | 103 | /** |
---|
103 | | - * cpuidle_use_deepest_state - Set/clear governor override flag. |
---|
104 | | - * @enable: New value of the flag. |
---|
| 104 | + * cpuidle_use_deepest_state - Set/unset governor override mode. |
---|
| 105 | + * @latency_limit_ns: Idle state exit latency limit (or no override if 0). |
---|
105 | 106 | * |
---|
106 | | - * Set/unset the current CPU to use the deepest idle state (override governors |
---|
107 | | - * going forward if set). |
---|
| 107 | + * If @latency_limit_ns is nonzero, set the current CPU to use the deepest idle |
---|
| 108 | + * state with exit latency within @latency_limit_ns (override governors going |
---|
| 109 | + * forward), or do not override governors if it is zero. |
---|
108 | 110 | */ |
---|
109 | | -void cpuidle_use_deepest_state(bool enable) |
---|
| 111 | +void cpuidle_use_deepest_state(u64 latency_limit_ns) |
---|
110 | 112 | { |
---|
111 | 113 | struct cpuidle_device *dev; |
---|
112 | 114 | |
---|
113 | 115 | preempt_disable(); |
---|
114 | 116 | dev = cpuidle_get_device(); |
---|
115 | 117 | if (dev) |
---|
116 | | - dev->use_deepest_state = enable; |
---|
| 118 | + dev->forced_idle_latency_limit_ns = latency_limit_ns; |
---|
117 | 119 | preempt_enable(); |
---|
118 | 120 | } |
---|
119 | 121 | |
---|
.. | .. |
---|
121 | 123 | * cpuidle_find_deepest_state - Find the deepest available idle state. |
---|
122 | 124 | * @drv: cpuidle driver for the given CPU. |
---|
123 | 125 | * @dev: cpuidle device for the given CPU. |
---|
| 126 | + * @latency_limit_ns: Idle state exit latency limit |
---|
| 127 | + * |
---|
| 128 | + * Return: the index of the deepest available idle state. |
---|
124 | 129 | */ |
---|
125 | 130 | int cpuidle_find_deepest_state(struct cpuidle_driver *drv, |
---|
126 | | - struct cpuidle_device *dev) |
---|
| 131 | + struct cpuidle_device *dev, |
---|
| 132 | + u64 latency_limit_ns) |
---|
127 | 133 | { |
---|
128 | | - return find_deepest_state(drv, dev, UINT_MAX, 0, false); |
---|
| 134 | + return find_deepest_state(drv, dev, latency_limit_ns, 0, false); |
---|
129 | 135 | } |
---|
130 | 136 | |
---|
131 | 137 | #ifdef CONFIG_SUSPEND |
---|
.. | .. |
---|
133 | 139 | struct cpuidle_device *dev, int index) |
---|
134 | 140 | { |
---|
135 | 141 | ktime_t time_start, time_end; |
---|
| 142 | + struct cpuidle_state *target_state = &drv->states[index]; |
---|
136 | 143 | |
---|
137 | 144 | time_start = ns_to_ktime(local_clock()); |
---|
138 | 145 | |
---|
139 | | - /* |
---|
140 | | - * trace_suspend_resume() called by tick_freeze() for the last CPU |
---|
141 | | - * executing it contains RCU usage regarded as invalid in the idle |
---|
142 | | - * context, so tell RCU about that. |
---|
143 | | - */ |
---|
144 | | - RCU_NONIDLE(tick_freeze()); |
---|
| 146 | + tick_freeze(); |
---|
145 | 147 | /* |
---|
146 | 148 | * The state used here cannot be a "coupled" one, because the "coupled" |
---|
147 | 149 | * cpuidle mechanism enables interrupts and doing that with timekeeping |
---|
148 | 150 | * suspended is generally unsafe. |
---|
149 | 151 | */ |
---|
150 | 152 | stop_critical_timings(); |
---|
151 | | - drv->states[index].enter_s2idle(dev, drv, index); |
---|
| 153 | + if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) |
---|
| 154 | + rcu_idle_enter(); |
---|
| 155 | + target_state->enter_s2idle(dev, drv, index); |
---|
152 | 156 | if (WARN_ON_ONCE(!irqs_disabled())) |
---|
153 | 157 | local_irq_disable(); |
---|
154 | | - /* |
---|
155 | | - * timekeeping_resume() that will be called by tick_unfreeze() for the |
---|
156 | | - * first CPU executing it calls functions containing RCU read-side |
---|
157 | | - * critical sections, so tell RCU about that. |
---|
158 | | - */ |
---|
159 | | - RCU_NONIDLE(tick_unfreeze()); |
---|
| 158 | + if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) |
---|
| 159 | + rcu_idle_exit(); |
---|
| 160 | + tick_unfreeze(); |
---|
160 | 161 | start_critical_timings(); |
---|
161 | 162 | |
---|
162 | 163 | time_end = ns_to_ktime(local_clock()); |
---|
.. | .. |
---|
182 | 183 | * that interrupts won't be enabled when it exits and allows the tick to |
---|
183 | 184 | * be frozen safely. |
---|
184 | 185 | */ |
---|
185 | | - index = find_deepest_state(drv, dev, UINT_MAX, 0, true); |
---|
186 | | - if (index > 0) |
---|
| 186 | + index = find_deepest_state(drv, dev, U64_MAX, 0, true); |
---|
| 187 | + if (index > 0) { |
---|
187 | 188 | enter_s2idle_proper(drv, dev, index); |
---|
188 | | - |
---|
| 189 | + local_irq_enable(); |
---|
| 190 | + } |
---|
189 | 191 | return index; |
---|
190 | 192 | } |
---|
191 | 193 | #endif /* CONFIG_SUSPEND */ |
---|
.. | .. |
---|
201 | 203 | { |
---|
202 | 204 | int entered_state; |
---|
203 | 205 | |
---|
204 | | - struct cpuidle_state *target_state = &drv->states[index]; |
---|
205 | | - bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP); |
---|
| 206 | + struct cpuidle_state *target_state; |
---|
| 207 | + bool broadcast; |
---|
206 | 208 | ktime_t time_start, time_end; |
---|
207 | | - s64 diff; |
---|
| 209 | + |
---|
| 210 | + /* |
---|
| 211 | + * The vendor hook may modify index, which means target_state and |
---|
| 212 | + * broadcast must be assigned after the vendor hook. |
---|
| 213 | + */ |
---|
| 214 | + trace_android_vh_cpu_idle_enter(&index, dev); |
---|
| 215 | + if (index < 0) |
---|
| 216 | + return index; |
---|
| 217 | + |
---|
| 218 | + target_state = &drv->states[index]; |
---|
| 219 | + broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP); |
---|
208 | 220 | |
---|
209 | 221 | /* |
---|
210 | 222 | * Tell the time framework to switch to a broadcast timer because our |
---|
.. | .. |
---|
212 | 224 | * CPU as a broadcast timer, this call may fail if it is not available. |
---|
213 | 225 | */ |
---|
214 | 226 | if (broadcast && tick_broadcast_enter()) { |
---|
215 | | - index = find_deepest_state(drv, dev, target_state->exit_latency, |
---|
| 227 | + index = find_deepest_state(drv, dev, target_state->exit_latency_ns, |
---|
216 | 228 | CPUIDLE_FLAG_TIMER_STOP, false); |
---|
217 | 229 | if (index < 0) { |
---|
218 | 230 | default_idle_call(); |
---|
.. | .. |
---|
222 | 234 | broadcast = false; |
---|
223 | 235 | } |
---|
224 | 236 | |
---|
225 | | - /* Take note of the planned idle state. */ |
---|
226 | | - sched_idle_set_state(target_state, index); |
---|
| 237 | + if (target_state->flags & CPUIDLE_FLAG_TLB_FLUSHED) |
---|
| 238 | + leave_mm(dev->cpu); |
---|
227 | 239 | |
---|
228 | | - trace_cpu_idle_rcuidle(index, dev->cpu); |
---|
| 240 | + /* Take note of the planned idle state. */ |
---|
| 241 | + sched_idle_set_state(target_state); |
---|
| 242 | + |
---|
| 243 | + trace_cpu_idle(index, dev->cpu); |
---|
229 | 244 | time_start = ns_to_ktime(local_clock()); |
---|
230 | 245 | |
---|
231 | 246 | stop_critical_timings(); |
---|
| 247 | + if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) |
---|
| 248 | + rcu_idle_enter(); |
---|
232 | 249 | entered_state = target_state->enter(dev, drv, index); |
---|
| 250 | + if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) |
---|
| 251 | + rcu_idle_exit(); |
---|
233 | 252 | start_critical_timings(); |
---|
234 | 253 | |
---|
235 | 254 | sched_clock_idle_wakeup_event(); |
---|
236 | 255 | time_end = ns_to_ktime(local_clock()); |
---|
237 | | - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); |
---|
| 256 | + trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu); |
---|
| 257 | + trace_android_vh_cpu_idle_exit(entered_state, dev); |
---|
238 | 258 | |
---|
239 | 259 | /* The cpu is no longer idle or about to enter idle. */ |
---|
240 | | - sched_idle_set_state(NULL, -1); |
---|
| 260 | + sched_idle_set_state(NULL); |
---|
241 | 261 | |
---|
242 | 262 | if (broadcast) { |
---|
243 | 263 | if (WARN_ON_ONCE(!irqs_disabled())) |
---|
.. | .. |
---|
249 | 269 | if (!cpuidle_state_is_coupled(drv, index)) |
---|
250 | 270 | local_irq_enable(); |
---|
251 | 271 | |
---|
252 | | - diff = ktime_us_delta(time_end, time_start); |
---|
253 | | - if (diff > INT_MAX) |
---|
254 | | - diff = INT_MAX; |
---|
255 | | - |
---|
256 | | - dev->last_residency = (int) diff; |
---|
257 | | - |
---|
258 | 272 | if (entered_state >= 0) { |
---|
259 | | - /* Update cpuidle counters */ |
---|
260 | | - /* This can be moved to within driver enter routine |
---|
| 273 | + s64 diff, delay = drv->states[entered_state].exit_latency_ns; |
---|
| 274 | + int i; |
---|
| 275 | + |
---|
| 276 | + /* |
---|
| 277 | + * Update cpuidle counters |
---|
| 278 | + * This can be moved to within driver enter routine, |
---|
261 | 279 | * but that results in multiple copies of same code. |
---|
262 | 280 | */ |
---|
263 | | - dev->states_usage[entered_state].time += dev->last_residency; |
---|
| 281 | + diff = ktime_sub(time_end, time_start); |
---|
| 282 | + |
---|
| 283 | + dev->last_residency_ns = diff; |
---|
| 284 | + dev->states_usage[entered_state].time_ns += diff; |
---|
264 | 285 | dev->states_usage[entered_state].usage++; |
---|
| 286 | + |
---|
| 287 | + if (diff < drv->states[entered_state].target_residency_ns) { |
---|
| 288 | + for (i = entered_state - 1; i >= 0; i--) { |
---|
| 289 | + if (dev->states_usage[i].disable) |
---|
| 290 | + continue; |
---|
| 291 | + |
---|
| 292 | + /* Shallower states are enabled, so update. */ |
---|
| 293 | + dev->states_usage[entered_state].above++; |
---|
| 294 | + break; |
---|
| 295 | + } |
---|
| 296 | + } else if (diff > delay) { |
---|
| 297 | + for (i = entered_state + 1; i < drv->state_count; i++) { |
---|
| 298 | + if (dev->states_usage[i].disable) |
---|
| 299 | + continue; |
---|
| 300 | + |
---|
| 301 | + /* |
---|
| 302 | + * Update if a deeper state would have been a |
---|
| 303 | + * better match for the observed idle duration. |
---|
| 304 | + */ |
---|
| 305 | + if (diff - delay >= drv->states[i].target_residency_ns) |
---|
| 306 | + dev->states_usage[entered_state].below++; |
---|
| 307 | + |
---|
| 308 | + break; |
---|
| 309 | + } |
---|
| 310 | + } |
---|
265 | 311 | } else { |
---|
266 | | - dev->last_residency = 0; |
---|
| 312 | + dev->last_residency_ns = 0; |
---|
| 313 | + dev->states_usage[index].rejected++; |
---|
267 | 314 | } |
---|
268 | 315 | |
---|
269 | 316 | return entered_state; |
---|
.. | .. |
---|
301 | 348 | int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, |
---|
302 | 349 | int index) |
---|
303 | 350 | { |
---|
| 351 | + int ret = 0; |
---|
| 352 | + |
---|
| 353 | + /* |
---|
| 354 | + * Store the next hrtimer, which becomes either next tick or the next |
---|
| 355 | + * timer event, whatever expires first. Additionally, to make this data |
---|
| 356 | + * useful for consumers outside cpuidle, we rely on that the governor's |
---|
| 357 | + * ->select() callback have decided, whether to stop the tick or not. |
---|
| 358 | + */ |
---|
| 359 | + WRITE_ONCE(dev->next_hrtimer, tick_nohz_get_next_hrtimer()); |
---|
| 360 | + |
---|
304 | 361 | if (cpuidle_state_is_coupled(drv, index)) |
---|
305 | | - return cpuidle_enter_state_coupled(dev, drv, index); |
---|
306 | | - return cpuidle_enter_state(dev, drv, index); |
---|
| 362 | + ret = cpuidle_enter_state_coupled(dev, drv, index); |
---|
| 363 | + else |
---|
| 364 | + ret = cpuidle_enter_state(dev, drv, index); |
---|
| 365 | + |
---|
| 366 | + WRITE_ONCE(dev->next_hrtimer, 0); |
---|
| 367 | + return ret; |
---|
307 | 368 | } |
---|
308 | 369 | |
---|
309 | 370 | /** |
---|
.. | .. |
---|
318 | 379 | { |
---|
319 | 380 | if (cpuidle_curr_governor->reflect && index >= 0) |
---|
320 | 381 | cpuidle_curr_governor->reflect(dev, index); |
---|
| 382 | +} |
---|
| 383 | + |
---|
| 384 | +/** |
---|
| 385 | + * cpuidle_poll_time - return amount of time to poll for, |
---|
| 386 | + * governors can override dev->poll_limit_ns if necessary |
---|
| 387 | + * |
---|
| 388 | + * @drv: the cpuidle driver tied with the cpu |
---|
| 389 | + * @dev: the cpuidle device |
---|
| 390 | + * |
---|
| 391 | + */ |
---|
| 392 | +u64 cpuidle_poll_time(struct cpuidle_driver *drv, |
---|
| 393 | + struct cpuidle_device *dev) |
---|
| 394 | +{ |
---|
| 395 | + int i; |
---|
| 396 | + u64 limit_ns; |
---|
| 397 | + |
---|
| 398 | + if (dev->poll_limit_ns) |
---|
| 399 | + return dev->poll_limit_ns; |
---|
| 400 | + |
---|
| 401 | + limit_ns = TICK_NSEC; |
---|
| 402 | + for (i = 1; i < drv->state_count; i++) { |
---|
| 403 | + if (dev->states_usage[i].disable) |
---|
| 404 | + continue; |
---|
| 405 | + |
---|
| 406 | + limit_ns = drv->states[i].target_residency_ns; |
---|
| 407 | + break; |
---|
| 408 | + } |
---|
| 409 | + |
---|
| 410 | + dev->poll_limit_ns = limit_ns; |
---|
| 411 | + |
---|
| 412 | + return dev->poll_limit_ns; |
---|
321 | 413 | } |
---|
322 | 414 | |
---|
323 | 415 | /** |
---|
.. | .. |
---|
339 | 431 | { |
---|
340 | 432 | if (enabled_devices) { |
---|
341 | 433 | initialized = 0; |
---|
342 | | - wake_up_all_idle_cpus(); |
---|
| 434 | + wake_up_all_online_idle_cpus(); |
---|
343 | 435 | } |
---|
344 | 436 | |
---|
345 | 437 | /* |
---|
.. | .. |
---|
483 | 575 | static void __cpuidle_device_init(struct cpuidle_device *dev) |
---|
484 | 576 | { |
---|
485 | 577 | memset(dev->states_usage, 0, sizeof(dev->states_usage)); |
---|
486 | | - dev->last_residency = 0; |
---|
| 578 | + dev->last_residency_ns = 0; |
---|
| 579 | + dev->next_hrtimer = 0; |
---|
487 | 580 | } |
---|
488 | 581 | |
---|
489 | 582 | /** |
---|
.. | .. |
---|
495 | 588 | */ |
---|
496 | 589 | static int __cpuidle_register_device(struct cpuidle_device *dev) |
---|
497 | 590 | { |
---|
498 | | - int ret; |
---|
499 | 591 | struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); |
---|
| 592 | + int i, ret; |
---|
500 | 593 | |
---|
501 | 594 | if (!try_module_get(drv->owner)) |
---|
502 | 595 | return -EINVAL; |
---|
| 596 | + |
---|
| 597 | + for (i = 0; i < drv->state_count; i++) { |
---|
| 598 | + if (drv->states[i].flags & CPUIDLE_FLAG_UNUSABLE) |
---|
| 599 | + dev->states_usage[i].disable |= CPUIDLE_STATE_DISABLED_BY_DRIVER; |
---|
| 600 | + |
---|
| 601 | + if (drv->states[i].flags & CPUIDLE_FLAG_OFF) |
---|
| 602 | + dev->states_usage[i].disable |= CPUIDLE_STATE_DISABLED_BY_USER; |
---|
| 603 | + } |
---|
503 | 604 | |
---|
504 | 605 | per_cpu(cpuidle_devices, dev->cpu) = dev; |
---|
505 | 606 | list_add(&dev->device_list, &cpuidle_detected_devices); |
---|
.. | .. |
---|
654 | 755 | } |
---|
655 | 756 | EXPORT_SYMBOL_GPL(cpuidle_register); |
---|
656 | 757 | |
---|
657 | | -#ifdef CONFIG_SMP |
---|
658 | | - |
---|
659 | | -/* |
---|
660 | | - * This function gets called when a part of the kernel has a new latency |
---|
661 | | - * requirement. This means we need to get all processors out of their C-state, |
---|
662 | | - * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that |
---|
663 | | - * wakes them all right up. |
---|
664 | | - */ |
---|
665 | | -static int cpuidle_latency_notify(struct notifier_block *b, |
---|
666 | | - unsigned long l, void *v) |
---|
667 | | -{ |
---|
668 | | - wake_up_all_idle_cpus(); |
---|
669 | | - return NOTIFY_OK; |
---|
670 | | -} |
---|
671 | | - |
---|
672 | | -static struct notifier_block cpuidle_latency_notifier = { |
---|
673 | | - .notifier_call = cpuidle_latency_notify, |
---|
674 | | -}; |
---|
675 | | - |
---|
676 | | -static inline void latency_notifier_init(struct notifier_block *n) |
---|
677 | | -{ |
---|
678 | | - pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n); |
---|
679 | | -} |
---|
680 | | - |
---|
681 | | -#else /* CONFIG_SMP */ |
---|
682 | | - |
---|
683 | | -#define latency_notifier_init(x) do { } while (0) |
---|
684 | | - |
---|
685 | | -#endif /* CONFIG_SMP */ |
---|
686 | | - |
---|
687 | 758 | /** |
---|
688 | 759 | * cpuidle_init - core initializer |
---|
689 | 760 | */ |
---|
690 | 761 | static int __init cpuidle_init(void) |
---|
691 | 762 | { |
---|
692 | | - int ret; |
---|
693 | | - |
---|
694 | 763 | if (cpuidle_disabled()) |
---|
695 | 764 | return -ENODEV; |
---|
696 | 765 | |
---|
697 | | - ret = cpuidle_add_interface(cpu_subsys.dev_root); |
---|
698 | | - if (ret) |
---|
699 | | - return ret; |
---|
700 | | - |
---|
701 | | - latency_notifier_init(&cpuidle_latency_notifier); |
---|
702 | | - |
---|
703 | | - return 0; |
---|
| 766 | + return cpuidle_add_interface(cpu_subsys.dev_root); |
---|
704 | 767 | } |
---|
705 | 768 | |
---|
706 | 769 | module_param(off, int, 0444); |
---|
| 770 | +module_param_string(governor, param_governor, CPUIDLE_NAME_LEN, 0444); |
---|
707 | 771 | core_initcall(cpuidle_init); |
---|