.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Generic entry points for the idle threads and |
---|
3 | 4 | * implementation of the idle task scheduling class. |
---|
.. | .. |
---|
9 | 10 | |
---|
10 | 11 | #include <trace/events/power.h> |
---|
11 | 12 | |
---|
| 13 | +#include <trace/hooks/sched.h> |
---|
| 14 | + |
---|
12 | 15 | /* Linker adds these: start and end of __cpuidle functions */ |
---|
13 | 16 | extern char __cpuidle_text_start[], __cpuidle_text_end[]; |
---|
14 | 17 | |
---|
.. | .. |
---|
16 | 19 | * sched_idle_set_state - Record idle state for the current CPU. |
---|
17 | 20 | * @idle_state: State to record. |
---|
18 | 21 | */ |
---|
19 | | -void sched_idle_set_state(struct cpuidle_state *idle_state, int index) |
---|
| 22 | +void sched_idle_set_state(struct cpuidle_state *idle_state) |
---|
20 | 23 | { |
---|
21 | 24 | idle_set_state(this_rq(), idle_state); |
---|
22 | | - idle_set_state_idx(this_rq(), index); |
---|
23 | 25 | } |
---|
24 | 26 | |
---|
25 | 27 | static int __read_mostly cpu_idle_force_poll; |
---|
.. | .. |
---|
54 | 56 | |
---|
55 | 57 | static noinline int __cpuidle cpu_idle_poll(void) |
---|
56 | 58 | { |
---|
57 | | - rcu_idle_enter(); |
---|
58 | | - trace_cpu_idle_rcuidle(0, smp_processor_id()); |
---|
59 | | - local_irq_enable(); |
---|
| 59 | + trace_cpu_idle(0, smp_processor_id()); |
---|
60 | 60 | stop_critical_timings(); |
---|
| 61 | + rcu_idle_enter(); |
---|
| 62 | + local_irq_enable(); |
---|
61 | 63 | |
---|
62 | 64 | while (!tif_need_resched() && |
---|
63 | | - (cpu_idle_force_poll || tick_check_broadcast_expired())) |
---|
| 65 | + (cpu_idle_force_poll || tick_check_broadcast_expired())) |
---|
64 | 66 | cpu_relax(); |
---|
65 | | - start_critical_timings(); |
---|
66 | | - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); |
---|
| 67 | + |
---|
67 | 68 | rcu_idle_exit(); |
---|
| 69 | + start_critical_timings(); |
---|
| 70 | + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); |
---|
68 | 71 | |
---|
69 | 72 | return 1; |
---|
70 | 73 | } |
---|
.. | .. |
---|
77 | 80 | void __weak arch_cpu_idle(void) |
---|
78 | 81 | { |
---|
79 | 82 | cpu_idle_force_poll = 1; |
---|
80 | | - local_irq_enable(); |
---|
| 83 | + raw_local_irq_enable(); |
---|
81 | 84 | } |
---|
82 | 85 | |
---|
83 | 86 | /** |
---|
.. | .. |
---|
90 | 93 | if (current_clr_polling_and_test()) { |
---|
91 | 94 | local_irq_enable(); |
---|
92 | 95 | } else { |
---|
| 96 | + |
---|
| 97 | + trace_cpu_idle(1, smp_processor_id()); |
---|
93 | 98 | stop_critical_timings(); |
---|
| 99 | + |
---|
| 100 | + /* |
---|
| 101 | + * arch_cpu_idle() is supposed to enable IRQs, however |
---|
| 102 | + * we can't do that because of RCU and tracing. |
---|
| 103 | + * |
---|
| 104 | + * Trace IRQs enable here, then switch off RCU, and have |
---|
| 105 | + * arch_cpu_idle() use raw_local_irq_enable(). Note that |
---|
| 106 | + * rcu_idle_enter() relies on lockdep IRQ state, so switch that |
---|
| 107 | + * last -- this is very similar to the entry code. |
---|
| 108 | + */ |
---|
| 109 | + trace_hardirqs_on_prepare(); |
---|
| 110 | + lockdep_hardirqs_on_prepare(_THIS_IP_); |
---|
| 111 | + rcu_idle_enter(); |
---|
| 112 | + lockdep_hardirqs_on(_THIS_IP_); |
---|
| 113 | + |
---|
94 | 114 | arch_cpu_idle(); |
---|
| 115 | + |
---|
| 116 | + /* |
---|
| 117 | + * OK, so IRQs are enabled here, but RCU needs them disabled to |
---|
| 118 | + * turn itself back on.. funny thing is that disabling IRQs |
---|
| 119 | + * will cause tracing, which needs RCU. Jump through hoops to |
---|
| 120 | + * make it 'work'. |
---|
| 121 | + */ |
---|
| 122 | + raw_local_irq_disable(); |
---|
| 123 | + lockdep_hardirqs_off(_THIS_IP_); |
---|
| 124 | + rcu_idle_exit(); |
---|
| 125 | + lockdep_hardirqs_on(_THIS_IP_); |
---|
| 126 | + raw_local_irq_enable(); |
---|
| 127 | + |
---|
95 | 128 | start_critical_timings(); |
---|
| 129 | + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); |
---|
96 | 130 | } |
---|
| 131 | +} |
---|
| 132 | + |
---|
| 133 | +static int call_cpuidle_s2idle(struct cpuidle_driver *drv, |
---|
| 134 | + struct cpuidle_device *dev) |
---|
| 135 | +{ |
---|
| 136 | + if (current_clr_polling_and_test()) |
---|
| 137 | + return -EBUSY; |
---|
| 138 | + |
---|
| 139 | + return cpuidle_enter_s2idle(drv, dev); |
---|
97 | 140 | } |
---|
98 | 141 | |
---|
99 | 142 | static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, |
---|
.. | .. |
---|
104 | 147 | * update no idle residency and return. |
---|
105 | 148 | */ |
---|
106 | 149 | if (current_clr_polling_and_test()) { |
---|
107 | | - dev->last_residency = 0; |
---|
| 150 | + dev->last_residency_ns = 0; |
---|
108 | 151 | local_irq_enable(); |
---|
109 | 152 | return -EBUSY; |
---|
110 | 153 | } |
---|
.. | .. |
---|
149 | 192 | |
---|
150 | 193 | if (cpuidle_not_available(drv, dev)) { |
---|
151 | 194 | tick_nohz_idle_stop_tick(); |
---|
152 | | - rcu_idle_enter(); |
---|
153 | 195 | |
---|
154 | 196 | default_idle_call(); |
---|
155 | 197 | goto exit_idle; |
---|
.. | .. |
---|
158 | 200 | /* |
---|
159 | 201 | * Suspend-to-idle ("s2idle") is a system state in which all user space |
---|
160 | 202 | * has been frozen, all I/O devices have been suspended and the only |
---|
161 | | - * activity happens here and in iterrupts (if any). In that case bypass |
---|
| 203 | + * activity happens here and in interrupts (if any). In that case bypass |
---|
162 | 204 | * the cpuidle governor and go stratight for the deepest idle state |
---|
163 | 205 | * available. Possibly also suspend the local tick and the entire |
---|
164 | 206 | * timekeeping to prevent timer interrupts from kicking us out of idle |
---|
165 | 207 | * until a proper wakeup interrupt happens. |
---|
166 | 208 | */ |
---|
167 | 209 | |
---|
168 | | - if (idle_should_enter_s2idle() || dev->use_deepest_state) { |
---|
| 210 | + if (idle_should_enter_s2idle() || dev->forced_idle_latency_limit_ns) { |
---|
| 211 | + u64 max_latency_ns; |
---|
| 212 | + |
---|
169 | 213 | if (idle_should_enter_s2idle()) { |
---|
170 | | - rcu_idle_enter(); |
---|
171 | 214 | |
---|
172 | | - entered_state = cpuidle_enter_s2idle(drv, dev); |
---|
173 | | - if (entered_state > 0) { |
---|
174 | | - local_irq_enable(); |
---|
| 215 | + entered_state = call_cpuidle_s2idle(drv, dev); |
---|
| 216 | + if (entered_state > 0) |
---|
175 | 217 | goto exit_idle; |
---|
176 | | - } |
---|
177 | 218 | |
---|
178 | | - rcu_idle_exit(); |
---|
| 219 | + max_latency_ns = U64_MAX; |
---|
| 220 | + } else { |
---|
| 221 | + max_latency_ns = dev->forced_idle_latency_limit_ns; |
---|
179 | 222 | } |
---|
180 | 223 | |
---|
181 | 224 | tick_nohz_idle_stop_tick(); |
---|
182 | | - rcu_idle_enter(); |
---|
183 | 225 | |
---|
184 | | - next_state = cpuidle_find_deepest_state(drv, dev); |
---|
| 226 | + next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns); |
---|
185 | 227 | call_cpuidle(drv, dev, next_state); |
---|
186 | 228 | } else { |
---|
187 | 229 | bool stop_tick = true; |
---|
.. | .. |
---|
195 | 237 | tick_nohz_idle_stop_tick(); |
---|
196 | 238 | else |
---|
197 | 239 | tick_nohz_idle_retain_tick(); |
---|
198 | | - |
---|
199 | | - rcu_idle_enter(); |
---|
200 | 240 | |
---|
201 | 241 | entered_state = call_cpuidle(drv, dev, next_state); |
---|
202 | 242 | /* |
---|
.. | .. |
---|
213 | 253 | */ |
---|
214 | 254 | if (WARN_ON_ONCE(irqs_disabled())) |
---|
215 | 255 | local_irq_enable(); |
---|
216 | | - |
---|
217 | | - rcu_idle_exit(); |
---|
218 | 256 | } |
---|
219 | 257 | |
---|
220 | 258 | /* |
---|
.. | .. |
---|
238 | 276 | tick_nohz_idle_enter(); |
---|
239 | 277 | |
---|
240 | 278 | while (!need_resched()) { |
---|
241 | | - check_pgt_cache(); |
---|
242 | 279 | rmb(); |
---|
243 | 280 | |
---|
244 | 281 | local_irq_disable(); |
---|
.. | .. |
---|
250 | 287 | } |
---|
251 | 288 | |
---|
252 | 289 | arch_cpu_idle_enter(); |
---|
| 290 | + rcu_nocb_flush_deferred_wakeup(); |
---|
253 | 291 | |
---|
254 | 292 | /* |
---|
255 | 293 | * In poll mode we reenable interrupts and spin. Also if we |
---|
.. | .. |
---|
284 | 322 | */ |
---|
285 | 323 | smp_mb__after_atomic(); |
---|
286 | 324 | |
---|
287 | | - sched_ttwu_pending(); |
---|
| 325 | + /* |
---|
| 326 | + * RCU relies on this call to be done outside of an RCU read-side |
---|
| 327 | + * critical section. |
---|
| 328 | + */ |
---|
| 329 | + flush_smp_call_function_from_idle(); |
---|
288 | 330 | schedule_idle(); |
---|
289 | 331 | |
---|
290 | 332 | if (unlikely(klp_patch_pending(current))) |
---|
.. | .. |
---|
312 | 354 | return HRTIMER_NORESTART; |
---|
313 | 355 | } |
---|
314 | 356 | |
---|
315 | | -void play_idle(unsigned long duration_ms) |
---|
| 357 | +void play_idle_precise(u64 duration_ns, u64 latency_ns) |
---|
316 | 358 | { |
---|
317 | 359 | struct idle_timer it; |
---|
318 | 360 | |
---|
.. | .. |
---|
324 | 366 | WARN_ON_ONCE(current->nr_cpus_allowed != 1); |
---|
325 | 367 | WARN_ON_ONCE(!(current->flags & PF_KTHREAD)); |
---|
326 | 368 | WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY)); |
---|
327 | | - WARN_ON_ONCE(!duration_ms); |
---|
| 369 | + WARN_ON_ONCE(!duration_ns); |
---|
328 | 370 | |
---|
329 | 371 | rcu_sleep_check(); |
---|
330 | 372 | preempt_disable(); |
---|
331 | 373 | current->flags |= PF_IDLE; |
---|
332 | | - cpuidle_use_deepest_state(true); |
---|
| 374 | + cpuidle_use_deepest_state(latency_ns); |
---|
333 | 375 | |
---|
334 | 376 | it.done = 0; |
---|
335 | | - hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
---|
| 377 | + hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); |
---|
336 | 378 | it.timer.function = idle_inject_timer_fn; |
---|
337 | | - hrtimer_start(&it.timer, ms_to_ktime(duration_ms), HRTIMER_MODE_REL_PINNED); |
---|
| 379 | + hrtimer_start(&it.timer, ns_to_ktime(duration_ns), |
---|
| 380 | + HRTIMER_MODE_REL_PINNED_HARD); |
---|
338 | 381 | |
---|
339 | 382 | while (!READ_ONCE(it.done)) |
---|
340 | 383 | do_idle(); |
---|
341 | 384 | |
---|
342 | | - cpuidle_use_deepest_state(false); |
---|
| 385 | + cpuidle_use_deepest_state(0); |
---|
343 | 386 | current->flags &= ~PF_IDLE; |
---|
344 | 387 | |
---|
345 | 388 | preempt_fold_need_resched(); |
---|
346 | 389 | preempt_enable(); |
---|
347 | 390 | } |
---|
348 | | -EXPORT_SYMBOL_GPL(play_idle); |
---|
| 391 | +EXPORT_SYMBOL_GPL(play_idle_precise); |
---|
349 | 392 | |
---|
350 | 393 | void cpu_startup_entry(enum cpuhp_state state) |
---|
351 | 394 | { |
---|
352 | | - /* |
---|
353 | | - * This #ifdef needs to die, but it's too late in the cycle to |
---|
354 | | - * make this generic (ARM and SH have never invoked the canary |
---|
355 | | - * init for the non boot CPUs!). Will be fixed in 3.11 |
---|
356 | | - */ |
---|
357 | | -#ifdef CONFIG_X86 |
---|
358 | | - /* |
---|
359 | | - * If we're the non-boot CPU, nothing set the stack canary up |
---|
360 | | - * for us. The boot CPU already has it initialized but no harm |
---|
361 | | - * in doing it again. This is a good place for updating it, as |
---|
362 | | - * we wont ever return from this function (so the invalid |
---|
363 | | - * canaries already on the stack wont ever trigger). |
---|
364 | | - */ |
---|
365 | | - boot_init_stack_canary(); |
---|
366 | | -#endif |
---|
367 | 395 | arch_cpu_idle_prepare(); |
---|
368 | 396 | cpuhp_online_idle(state); |
---|
369 | 397 | while (1) |
---|
.. | .. |
---|
376 | 404 | |
---|
377 | 405 | #ifdef CONFIG_SMP |
---|
378 | 406 | static int |
---|
379 | | -select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags, |
---|
380 | | - int sibling_count_hint) |
---|
| 407 | +select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags) |
---|
381 | 408 | { |
---|
382 | 409 | return task_cpu(p); /* IDLE tasks as never migrated */ |
---|
| 410 | +} |
---|
| 411 | + |
---|
| 412 | +static int |
---|
| 413 | +balance_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) |
---|
| 414 | +{ |
---|
| 415 | + return WARN_ON_ONCE(1); |
---|
383 | 416 | } |
---|
384 | 417 | #endif |
---|
385 | 418 | |
---|
.. | .. |
---|
391 | 424 | resched_curr(rq); |
---|
392 | 425 | } |
---|
393 | 426 | |
---|
394 | | -static struct task_struct * |
---|
395 | | -pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) |
---|
| 427 | +static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) |
---|
396 | 428 | { |
---|
397 | | - put_prev_task(rq, prev); |
---|
| 429 | +} |
---|
| 430 | + |
---|
| 431 | +static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first) |
---|
| 432 | +{ |
---|
398 | 433 | update_idle_core(rq); |
---|
399 | 434 | schedstat_inc(rq->sched_goidle); |
---|
| 435 | +} |
---|
400 | 436 | |
---|
401 | | - return rq->idle; |
---|
| 437 | +struct task_struct *pick_next_task_idle(struct rq *rq) |
---|
| 438 | +{ |
---|
| 439 | + struct task_struct *next = rq->idle; |
---|
| 440 | + |
---|
| 441 | + set_next_task_idle(rq, next, true); |
---|
| 442 | + |
---|
| 443 | + return next; |
---|
402 | 444 | } |
---|
403 | 445 | |
---|
404 | 446 | /* |
---|
.. | .. |
---|
410 | 452 | { |
---|
411 | 453 | raw_spin_unlock_irq(&rq->lock); |
---|
412 | 454 | printk(KERN_ERR "bad: scheduling from the idle thread!\n"); |
---|
| 455 | + |
---|
| 456 | + trace_android_rvh_dequeue_task_idle(p); |
---|
413 | 457 | dump_stack(); |
---|
414 | 458 | raw_spin_lock_irq(&rq->lock); |
---|
415 | | -} |
---|
416 | | - |
---|
417 | | -static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) |
---|
418 | | -{ |
---|
419 | 459 | } |
---|
420 | 460 | |
---|
421 | 461 | /* |
---|
.. | .. |
---|
430 | 470 | { |
---|
431 | 471 | } |
---|
432 | 472 | |
---|
433 | | -static void set_curr_task_idle(struct rq *rq) |
---|
434 | | -{ |
---|
435 | | -} |
---|
436 | | - |
---|
437 | 473 | static void switched_to_idle(struct rq *rq, struct task_struct *p) |
---|
438 | 474 | { |
---|
439 | 475 | BUG(); |
---|
.. | .. |
---|
445 | 481 | BUG(); |
---|
446 | 482 | } |
---|
447 | 483 | |
---|
448 | | -static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task) |
---|
449 | | -{ |
---|
450 | | - return 0; |
---|
451 | | -} |
---|
452 | | - |
---|
453 | 484 | static void update_curr_idle(struct rq *rq) |
---|
454 | 485 | { |
---|
455 | 486 | } |
---|
.. | .. |
---|
457 | 488 | /* |
---|
458 | 489 | * Simple, special scheduling class for the per-CPU idle tasks: |
---|
459 | 490 | */ |
---|
460 | | -const struct sched_class idle_sched_class = { |
---|
461 | | - /* .next is NULL */ |
---|
| 491 | +const struct sched_class idle_sched_class |
---|
| 492 | + __section("__idle_sched_class") = { |
---|
462 | 493 | /* no enqueue/yield_task for idle tasks */ |
---|
463 | 494 | |
---|
464 | 495 | /* dequeue is not valid, we print a debug message there: */ |
---|
.. | .. |
---|
468 | 499 | |
---|
469 | 500 | .pick_next_task = pick_next_task_idle, |
---|
470 | 501 | .put_prev_task = put_prev_task_idle, |
---|
| 502 | + .set_next_task = set_next_task_idle, |
---|
471 | 503 | |
---|
472 | 504 | #ifdef CONFIG_SMP |
---|
| 505 | + .balance = balance_idle, |
---|
473 | 506 | .select_task_rq = select_task_rq_idle, |
---|
474 | 507 | .set_cpus_allowed = set_cpus_allowed_common, |
---|
475 | 508 | #endif |
---|
476 | 509 | |
---|
477 | | - .set_curr_task = set_curr_task_idle, |
---|
478 | 510 | .task_tick = task_tick_idle, |
---|
479 | | - |
---|
480 | | - .get_rr_interval = get_rr_interval_idle, |
---|
481 | 511 | |
---|
482 | 512 | .prio_changed = prio_changed_idle, |
---|
483 | 513 | .switched_to = switched_to_idle, |
---|