| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Generic entry points for the idle threads and |
|---|
| 3 | 4 | * implementation of the idle task scheduling class. |
|---|
| .. | .. |
|---|
| 9 | 10 | |
|---|
| 10 | 11 | #include <trace/events/power.h> |
|---|
| 11 | 12 | |
|---|
| 13 | +#include <trace/hooks/sched.h> |
|---|
| 14 | + |
|---|
| 12 | 15 | /* Linker adds these: start and end of __cpuidle functions */ |
|---|
| 13 | 16 | extern char __cpuidle_text_start[], __cpuidle_text_end[]; |
|---|
| 14 | 17 | |
|---|
| .. | .. |
|---|
| 16 | 19 | * sched_idle_set_state - Record idle state for the current CPU. |
|---|
| 17 | 20 | * @idle_state: State to record. |
|---|
| 18 | 21 | */ |
|---|
| 19 | | -void sched_idle_set_state(struct cpuidle_state *idle_state, int index) |
|---|
| 22 | +void sched_idle_set_state(struct cpuidle_state *idle_state) |
|---|
| 20 | 23 | { |
|---|
| 21 | 24 | idle_set_state(this_rq(), idle_state); |
|---|
| 22 | | - idle_set_state_idx(this_rq(), index); |
|---|
| 23 | 25 | } |
|---|
| 24 | 26 | |
|---|
| 25 | 27 | static int __read_mostly cpu_idle_force_poll; |
|---|
| .. | .. |
|---|
| 54 | 56 | |
|---|
| 55 | 57 | static noinline int __cpuidle cpu_idle_poll(void) |
|---|
| 56 | 58 | { |
|---|
| 57 | | - rcu_idle_enter(); |
|---|
| 58 | | - trace_cpu_idle_rcuidle(0, smp_processor_id()); |
|---|
| 59 | | - local_irq_enable(); |
|---|
| 59 | + trace_cpu_idle(0, smp_processor_id()); |
|---|
| 60 | 60 | stop_critical_timings(); |
|---|
| 61 | + rcu_idle_enter(); |
|---|
| 62 | + local_irq_enable(); |
|---|
| 61 | 63 | |
|---|
| 62 | 64 | while (!tif_need_resched() && |
|---|
| 63 | | - (cpu_idle_force_poll || tick_check_broadcast_expired())) |
|---|
| 65 | + (cpu_idle_force_poll || tick_check_broadcast_expired())) |
|---|
| 64 | 66 | cpu_relax(); |
|---|
| 65 | | - start_critical_timings(); |
|---|
| 66 | | - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); |
|---|
| 67 | + |
|---|
| 67 | 68 | rcu_idle_exit(); |
|---|
| 69 | + start_critical_timings(); |
|---|
| 70 | + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); |
|---|
| 68 | 71 | |
|---|
| 69 | 72 | return 1; |
|---|
| 70 | 73 | } |
|---|
| .. | .. |
|---|
| 77 | 80 | void __weak arch_cpu_idle(void) |
|---|
| 78 | 81 | { |
|---|
| 79 | 82 | cpu_idle_force_poll = 1; |
|---|
| 80 | | - local_irq_enable(); |
|---|
| 83 | + raw_local_irq_enable(); |
|---|
| 81 | 84 | } |
|---|
| 82 | 85 | |
|---|
| 83 | 86 | /** |
|---|
| .. | .. |
|---|
| 90 | 93 | if (current_clr_polling_and_test()) { |
|---|
| 91 | 94 | local_irq_enable(); |
|---|
| 92 | 95 | } else { |
|---|
| 96 | + |
|---|
| 97 | + trace_cpu_idle(1, smp_processor_id()); |
|---|
| 93 | 98 | stop_critical_timings(); |
|---|
| 99 | + |
|---|
| 100 | + /* |
|---|
| 101 | + * arch_cpu_idle() is supposed to enable IRQs, however |
|---|
| 102 | + * we can't do that because of RCU and tracing. |
|---|
| 103 | + * |
|---|
| 104 | + * Trace IRQs enable here, then switch off RCU, and have |
|---|
| 105 | + * arch_cpu_idle() use raw_local_irq_enable(). Note that |
|---|
| 106 | + * rcu_idle_enter() relies on lockdep IRQ state, so switch that |
|---|
| 107 | + * last -- this is very similar to the entry code. |
|---|
| 108 | + */ |
|---|
| 109 | + trace_hardirqs_on_prepare(); |
|---|
| 110 | + lockdep_hardirqs_on_prepare(_THIS_IP_); |
|---|
| 111 | + rcu_idle_enter(); |
|---|
| 112 | + lockdep_hardirqs_on(_THIS_IP_); |
|---|
| 113 | + |
|---|
| 94 | 114 | arch_cpu_idle(); |
|---|
| 115 | + |
|---|
| 116 | + /* |
|---|
| 117 | + * OK, so IRQs are enabled here, but RCU needs them disabled to |
|---|
| 118 | + * turn itself back on.. funny thing is that disabling IRQs |
|---|
| 119 | + * will cause tracing, which needs RCU. Jump through hoops to |
|---|
| 120 | + * make it 'work'. |
|---|
| 121 | + */ |
|---|
| 122 | + raw_local_irq_disable(); |
|---|
| 123 | + lockdep_hardirqs_off(_THIS_IP_); |
|---|
| 124 | + rcu_idle_exit(); |
|---|
| 125 | + lockdep_hardirqs_on(_THIS_IP_); |
|---|
| 126 | + raw_local_irq_enable(); |
|---|
| 127 | + |
|---|
| 95 | 128 | start_critical_timings(); |
|---|
| 129 | + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); |
|---|
| 96 | 130 | } |
|---|
| 131 | +} |
|---|
| 132 | + |
|---|
| 133 | +static int call_cpuidle_s2idle(struct cpuidle_driver *drv, |
|---|
| 134 | + struct cpuidle_device *dev) |
|---|
| 135 | +{ |
|---|
| 136 | + if (current_clr_polling_and_test()) |
|---|
| 137 | + return -EBUSY; |
|---|
| 138 | + |
|---|
| 139 | + return cpuidle_enter_s2idle(drv, dev); |
|---|
| 97 | 140 | } |
|---|
| 98 | 141 | |
|---|
| 99 | 142 | static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, |
|---|
| .. | .. |
|---|
| 104 | 147 | * update no idle residency and return. |
|---|
| 105 | 148 | */ |
|---|
| 106 | 149 | if (current_clr_polling_and_test()) { |
|---|
| 107 | | - dev->last_residency = 0; |
|---|
| 150 | + dev->last_residency_ns = 0; |
|---|
| 108 | 151 | local_irq_enable(); |
|---|
| 109 | 152 | return -EBUSY; |
|---|
| 110 | 153 | } |
|---|
| .. | .. |
|---|
| 149 | 192 | |
|---|
| 150 | 193 | if (cpuidle_not_available(drv, dev)) { |
|---|
| 151 | 194 | tick_nohz_idle_stop_tick(); |
|---|
| 152 | | - rcu_idle_enter(); |
|---|
| 153 | 195 | |
|---|
| 154 | 196 | default_idle_call(); |
|---|
| 155 | 197 | goto exit_idle; |
|---|
| .. | .. |
|---|
| 158 | 200 | /* |
|---|
| 159 | 201 | * Suspend-to-idle ("s2idle") is a system state in which all user space |
|---|
| 160 | 202 | * has been frozen, all I/O devices have been suspended and the only |
|---|
| 161 | | - * activity happens here and in iterrupts (if any). In that case bypass |
|---|
| 203 | + * activity happens here and in interrupts (if any). In that case bypass |
|---|
| 162 | 204 | * the cpuidle governor and go stratight for the deepest idle state |
|---|
| 163 | 205 | * available. Possibly also suspend the local tick and the entire |
|---|
| 164 | 206 | * timekeeping to prevent timer interrupts from kicking us out of idle |
|---|
| 165 | 207 | * until a proper wakeup interrupt happens. |
|---|
| 166 | 208 | */ |
|---|
| 167 | 209 | |
|---|
| 168 | | - if (idle_should_enter_s2idle() || dev->use_deepest_state) { |
|---|
| 210 | + if (idle_should_enter_s2idle() || dev->forced_idle_latency_limit_ns) { |
|---|
| 211 | + u64 max_latency_ns; |
|---|
| 212 | + |
|---|
| 169 | 213 | if (idle_should_enter_s2idle()) { |
|---|
| 170 | | - rcu_idle_enter(); |
|---|
| 171 | 214 | |
|---|
| 172 | | - entered_state = cpuidle_enter_s2idle(drv, dev); |
|---|
| 173 | | - if (entered_state > 0) { |
|---|
| 174 | | - local_irq_enable(); |
|---|
| 215 | + entered_state = call_cpuidle_s2idle(drv, dev); |
|---|
| 216 | + if (entered_state > 0) |
|---|
| 175 | 217 | goto exit_idle; |
|---|
| 176 | | - } |
|---|
| 177 | 218 | |
|---|
| 178 | | - rcu_idle_exit(); |
|---|
| 219 | + max_latency_ns = U64_MAX; |
|---|
| 220 | + } else { |
|---|
| 221 | + max_latency_ns = dev->forced_idle_latency_limit_ns; |
|---|
| 179 | 222 | } |
|---|
| 180 | 223 | |
|---|
| 181 | 224 | tick_nohz_idle_stop_tick(); |
|---|
| 182 | | - rcu_idle_enter(); |
|---|
| 183 | 225 | |
|---|
| 184 | | - next_state = cpuidle_find_deepest_state(drv, dev); |
|---|
| 226 | + next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns); |
|---|
| 185 | 227 | call_cpuidle(drv, dev, next_state); |
|---|
| 186 | 228 | } else { |
|---|
| 187 | 229 | bool stop_tick = true; |
|---|
| .. | .. |
|---|
| 195 | 237 | tick_nohz_idle_stop_tick(); |
|---|
| 196 | 238 | else |
|---|
| 197 | 239 | tick_nohz_idle_retain_tick(); |
|---|
| 198 | | - |
|---|
| 199 | | - rcu_idle_enter(); |
|---|
| 200 | 240 | |
|---|
| 201 | 241 | entered_state = call_cpuidle(drv, dev, next_state); |
|---|
| 202 | 242 | /* |
|---|
| .. | .. |
|---|
| 213 | 253 | */ |
|---|
| 214 | 254 | if (WARN_ON_ONCE(irqs_disabled())) |
|---|
| 215 | 255 | local_irq_enable(); |
|---|
| 216 | | - |
|---|
| 217 | | - rcu_idle_exit(); |
|---|
| 218 | 256 | } |
|---|
| 219 | 257 | |
|---|
| 220 | 258 | /* |
|---|
| .. | .. |
|---|
| 238 | 276 | tick_nohz_idle_enter(); |
|---|
| 239 | 277 | |
|---|
| 240 | 278 | while (!need_resched()) { |
|---|
| 241 | | - check_pgt_cache(); |
|---|
| 242 | 279 | rmb(); |
|---|
| 243 | 280 | |
|---|
| 244 | 281 | local_irq_disable(); |
|---|
| .. | .. |
|---|
| 250 | 287 | } |
|---|
| 251 | 288 | |
|---|
| 252 | 289 | arch_cpu_idle_enter(); |
|---|
| 290 | + rcu_nocb_flush_deferred_wakeup(); |
|---|
| 253 | 291 | |
|---|
| 254 | 292 | /* |
|---|
| 255 | 293 | * In poll mode we reenable interrupts and spin. Also if we |
|---|
| .. | .. |
|---|
| 284 | 322 | */ |
|---|
| 285 | 323 | smp_mb__after_atomic(); |
|---|
| 286 | 324 | |
|---|
| 287 | | - sched_ttwu_pending(); |
|---|
| 325 | + /* |
|---|
| 326 | + * RCU relies on this call to be done outside of an RCU read-side |
|---|
| 327 | + * critical section. |
|---|
| 328 | + */ |
|---|
| 329 | + flush_smp_call_function_from_idle(); |
|---|
| 288 | 330 | schedule_idle(); |
|---|
| 289 | 331 | |
|---|
| 290 | 332 | if (unlikely(klp_patch_pending(current))) |
|---|
| .. | .. |
|---|
| 312 | 354 | return HRTIMER_NORESTART; |
|---|
| 313 | 355 | } |
|---|
| 314 | 356 | |
|---|
| 315 | | -void play_idle(unsigned long duration_ms) |
|---|
| 357 | +void play_idle_precise(u64 duration_ns, u64 latency_ns) |
|---|
| 316 | 358 | { |
|---|
| 317 | 359 | struct idle_timer it; |
|---|
| 318 | 360 | |
|---|
| .. | .. |
|---|
| 324 | 366 | WARN_ON_ONCE(current->nr_cpus_allowed != 1); |
|---|
| 325 | 367 | WARN_ON_ONCE(!(current->flags & PF_KTHREAD)); |
|---|
| 326 | 368 | WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY)); |
|---|
| 327 | | - WARN_ON_ONCE(!duration_ms); |
|---|
| 369 | + WARN_ON_ONCE(!duration_ns); |
|---|
| 328 | 370 | |
|---|
| 329 | 371 | rcu_sleep_check(); |
|---|
| 330 | 372 | preempt_disable(); |
|---|
| 331 | 373 | current->flags |= PF_IDLE; |
|---|
| 332 | | - cpuidle_use_deepest_state(true); |
|---|
| 374 | + cpuidle_use_deepest_state(latency_ns); |
|---|
| 333 | 375 | |
|---|
| 334 | 376 | it.done = 0; |
|---|
| 335 | | - hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
|---|
| 377 | + hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); |
|---|
| 336 | 378 | it.timer.function = idle_inject_timer_fn; |
|---|
| 337 | | - hrtimer_start(&it.timer, ms_to_ktime(duration_ms), HRTIMER_MODE_REL_PINNED); |
|---|
| 379 | + hrtimer_start(&it.timer, ns_to_ktime(duration_ns), |
|---|
| 380 | + HRTIMER_MODE_REL_PINNED_HARD); |
|---|
| 338 | 381 | |
|---|
| 339 | 382 | while (!READ_ONCE(it.done)) |
|---|
| 340 | 383 | do_idle(); |
|---|
| 341 | 384 | |
|---|
| 342 | | - cpuidle_use_deepest_state(false); |
|---|
| 385 | + cpuidle_use_deepest_state(0); |
|---|
| 343 | 386 | current->flags &= ~PF_IDLE; |
|---|
| 344 | 387 | |
|---|
| 345 | 388 | preempt_fold_need_resched(); |
|---|
| 346 | 389 | preempt_enable(); |
|---|
| 347 | 390 | } |
|---|
| 348 | | -EXPORT_SYMBOL_GPL(play_idle); |
|---|
| 391 | +EXPORT_SYMBOL_GPL(play_idle_precise); |
|---|
| 349 | 392 | |
|---|
| 350 | 393 | void cpu_startup_entry(enum cpuhp_state state) |
|---|
| 351 | 394 | { |
|---|
| 352 | | - /* |
|---|
| 353 | | - * This #ifdef needs to die, but it's too late in the cycle to |
|---|
| 354 | | - * make this generic (ARM and SH have never invoked the canary |
|---|
| 355 | | - * init for the non boot CPUs!). Will be fixed in 3.11 |
|---|
| 356 | | - */ |
|---|
| 357 | | -#ifdef CONFIG_X86 |
|---|
| 358 | | - /* |
|---|
| 359 | | - * If we're the non-boot CPU, nothing set the stack canary up |
|---|
| 360 | | - * for us. The boot CPU already has it initialized but no harm |
|---|
| 361 | | - * in doing it again. This is a good place for updating it, as |
|---|
| 362 | | - * we wont ever return from this function (so the invalid |
|---|
| 363 | | - * canaries already on the stack wont ever trigger). |
|---|
| 364 | | - */ |
|---|
| 365 | | - boot_init_stack_canary(); |
|---|
| 366 | | -#endif |
|---|
| 367 | 395 | arch_cpu_idle_prepare(); |
|---|
| 368 | 396 | cpuhp_online_idle(state); |
|---|
| 369 | 397 | while (1) |
|---|
| .. | .. |
|---|
| 376 | 404 | |
|---|
| 377 | 405 | #ifdef CONFIG_SMP |
|---|
| 378 | 406 | static int |
|---|
| 379 | | -select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags, |
|---|
| 380 | | - int sibling_count_hint) |
|---|
| 407 | +select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags) |
|---|
| 381 | 408 | { |
|---|
| 382 | 409 | return task_cpu(p); /* IDLE tasks as never migrated */ |
|---|
| 410 | +} |
|---|
| 411 | + |
|---|
| 412 | +static int |
|---|
| 413 | +balance_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) |
|---|
| 414 | +{ |
|---|
| 415 | + return WARN_ON_ONCE(1); |
|---|
| 383 | 416 | } |
|---|
| 384 | 417 | #endif |
|---|
| 385 | 418 | |
|---|
| .. | .. |
|---|
| 391 | 424 | resched_curr(rq); |
|---|
| 392 | 425 | } |
|---|
| 393 | 426 | |
|---|
| 394 | | -static struct task_struct * |
|---|
| 395 | | -pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) |
|---|
| 427 | +static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) |
|---|
| 396 | 428 | { |
|---|
| 397 | | - put_prev_task(rq, prev); |
|---|
| 429 | +} |
|---|
| 430 | + |
|---|
| 431 | +static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first) |
|---|
| 432 | +{ |
|---|
| 398 | 433 | update_idle_core(rq); |
|---|
| 399 | 434 | schedstat_inc(rq->sched_goidle); |
|---|
| 435 | +} |
|---|
| 400 | 436 | |
|---|
| 401 | | - return rq->idle; |
|---|
| 437 | +struct task_struct *pick_next_task_idle(struct rq *rq) |
|---|
| 438 | +{ |
|---|
| 439 | + struct task_struct *next = rq->idle; |
|---|
| 440 | + |
|---|
| 441 | + set_next_task_idle(rq, next, true); |
|---|
| 442 | + |
|---|
| 443 | + return next; |
|---|
| 402 | 444 | } |
|---|
| 403 | 445 | |
|---|
| 404 | 446 | /* |
|---|
| .. | .. |
|---|
| 410 | 452 | { |
|---|
| 411 | 453 | raw_spin_unlock_irq(&rq->lock); |
|---|
| 412 | 454 | printk(KERN_ERR "bad: scheduling from the idle thread!\n"); |
|---|
| 455 | + |
|---|
| 456 | + trace_android_rvh_dequeue_task_idle(p); |
|---|
| 413 | 457 | dump_stack(); |
|---|
| 414 | 458 | raw_spin_lock_irq(&rq->lock); |
|---|
| 415 | | -} |
|---|
| 416 | | - |
|---|
| 417 | | -static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) |
|---|
| 418 | | -{ |
|---|
| 419 | 459 | } |
|---|
| 420 | 460 | |
|---|
| 421 | 461 | /* |
|---|
| .. | .. |
|---|
| 430 | 470 | { |
|---|
| 431 | 471 | } |
|---|
| 432 | 472 | |
|---|
| 433 | | -static void set_curr_task_idle(struct rq *rq) |
|---|
| 434 | | -{ |
|---|
| 435 | | -} |
|---|
| 436 | | - |
|---|
| 437 | 473 | static void switched_to_idle(struct rq *rq, struct task_struct *p) |
|---|
| 438 | 474 | { |
|---|
| 439 | 475 | BUG(); |
|---|
| .. | .. |
|---|
| 445 | 481 | BUG(); |
|---|
| 446 | 482 | } |
|---|
| 447 | 483 | |
|---|
| 448 | | -static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task) |
|---|
| 449 | | -{ |
|---|
| 450 | | - return 0; |
|---|
| 451 | | -} |
|---|
| 452 | | - |
|---|
| 453 | 484 | static void update_curr_idle(struct rq *rq) |
|---|
| 454 | 485 | { |
|---|
| 455 | 486 | } |
|---|
| .. | .. |
|---|
| 457 | 488 | /* |
|---|
| 458 | 489 | * Simple, special scheduling class for the per-CPU idle tasks: |
|---|
| 459 | 490 | */ |
|---|
| 460 | | -const struct sched_class idle_sched_class = { |
|---|
| 461 | | - /* .next is NULL */ |
|---|
| 491 | +const struct sched_class idle_sched_class |
|---|
| 492 | + __section("__idle_sched_class") = { |
|---|
| 462 | 493 | /* no enqueue/yield_task for idle tasks */ |
|---|
| 463 | 494 | |
|---|
| 464 | 495 | /* dequeue is not valid, we print a debug message there: */ |
|---|
| .. | .. |
|---|
| 468 | 499 | |
|---|
| 469 | 500 | .pick_next_task = pick_next_task_idle, |
|---|
| 470 | 501 | .put_prev_task = put_prev_task_idle, |
|---|
| 502 | + .set_next_task = set_next_task_idle, |
|---|
| 471 | 503 | |
|---|
| 472 | 504 | #ifdef CONFIG_SMP |
|---|
| 505 | + .balance = balance_idle, |
|---|
| 473 | 506 | .select_task_rq = select_task_rq_idle, |
|---|
| 474 | 507 | .set_cpus_allowed = set_cpus_allowed_common, |
|---|
| 475 | 508 | #endif |
|---|
| 476 | 509 | |
|---|
| 477 | | - .set_curr_task = set_curr_task_idle, |
|---|
| 478 | 510 | .task_tick = task_tick_idle, |
|---|
| 479 | | - |
|---|
| 480 | | - .get_rr_interval = get_rr_interval_idle, |
|---|
| 481 | 511 | |
|---|
| 482 | 512 | .prio_changed = prio_changed_idle, |
|---|
| 483 | 513 | .switched_to = switched_to_idle, |
|---|