| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * kernel/sched/core.c |
|---|
| 3 | 4 | * |
|---|
| .. | .. |
|---|
| 5 | 6 | * |
|---|
| 6 | 7 | * Copyright (C) 1991-2002 Linus Torvalds |
|---|
| 7 | 8 | */ |
|---|
| 9 | +#define CREATE_TRACE_POINTS |
|---|
| 10 | +#include <trace/events/sched.h> |
|---|
| 11 | +#undef CREATE_TRACE_POINTS |
|---|
| 12 | + |
|---|
| 8 | 13 | #include "sched.h" |
|---|
| 9 | 14 | |
|---|
| 10 | 15 | #include <linux/nospec.h> |
|---|
| .. | .. |
|---|
| 16 | 21 | #include <asm/tlb.h> |
|---|
| 17 | 22 | |
|---|
| 18 | 23 | #include "../workqueue_internal.h" |
|---|
| 24 | +#include "../../io_uring/io-wq.h" |
|---|
| 19 | 25 | #include "../smpboot.h" |
|---|
| 20 | 26 | |
|---|
| 21 | 27 | #include "pelt.h" |
|---|
| 28 | +#include "smp.h" |
|---|
| 22 | 29 | |
|---|
| 23 | | -#define CREATE_TRACE_POINTS |
|---|
| 24 | | -#include <trace/events/sched.h> |
|---|
| 30 | +#include <trace/hooks/sched.h> |
|---|
| 31 | +#include <trace/hooks/dtask.h> |
|---|
| 32 | + |
|---|
| 33 | +/* |
|---|
| 34 | + * Export tracepoints that act as a bare tracehook (ie: have no trace event |
|---|
| 35 | + * associated with them) to allow external modules to probe them. |
|---|
| 36 | + */ |
|---|
| 37 | +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_cfs_tp); |
|---|
| 38 | +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp); |
|---|
| 39 | +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp); |
|---|
| 40 | +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); |
|---|
| 41 | +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp); |
|---|
| 42 | +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_thermal_tp); |
|---|
| 43 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_cpu_capacity_tp); |
|---|
| 44 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp); |
|---|
| 45 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_cfs_tp); |
|---|
| 46 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_se_tp); |
|---|
| 47 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp); |
|---|
| 48 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_switch); |
|---|
| 49 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_waking); |
|---|
| 50 | +#ifdef CONFIG_SCHEDSTATS |
|---|
| 51 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_sleep); |
|---|
| 52 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_wait); |
|---|
| 53 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_iowait); |
|---|
| 54 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_blocked); |
|---|
| 55 | +#endif |
|---|
| 25 | 56 | |
|---|
| 26 | 57 | DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
|---|
| 58 | +EXPORT_SYMBOL_GPL(runqueues); |
|---|
| 27 | 59 | |
|---|
| 28 | 60 | #ifdef CONFIG_SCHED_DEBUG |
|---|
| 29 | 61 | /* |
|---|
| .. | .. |
|---|
| 38 | 70 | const_debug unsigned int sysctl_sched_features = |
|---|
| 39 | 71 | #include "features.h" |
|---|
| 40 | 72 | 0; |
|---|
| 73 | +EXPORT_SYMBOL_GPL(sysctl_sched_features); |
|---|
| 41 | 74 | #undef SCHED_FEAT |
|---|
| 42 | 75 | #endif |
|---|
| 43 | 76 | |
|---|
| .. | .. |
|---|
| 45 | 78 | * Number of tasks to iterate in a single balance run. |
|---|
| 46 | 79 | * Limited because this is done with IRQs disabled. |
|---|
| 47 | 80 | */ |
|---|
| 48 | | -#ifdef CONFIG_PREEMPT_RT_FULL |
|---|
| 81 | +#ifdef CONFIG_PREEMPT_RT |
|---|
| 49 | 82 | const_debug unsigned int sysctl_sched_nr_migrate = 8; |
|---|
| 50 | 83 | #else |
|---|
| 51 | 84 | const_debug unsigned int sysctl_sched_nr_migrate = 32; |
|---|
| .. | .. |
|---|
| 64 | 97 | * default: 0.95s |
|---|
| 65 | 98 | */ |
|---|
| 66 | 99 | int sysctl_sched_rt_runtime = 950000; |
|---|
| 100 | + |
|---|
| 101 | + |
|---|
| 102 | +/* |
|---|
| 103 | + * Serialization rules: |
|---|
| 104 | + * |
|---|
| 105 | + * Lock order: |
|---|
| 106 | + * |
|---|
| 107 | + * p->pi_lock |
|---|
| 108 | + * rq->lock |
|---|
| 109 | + * hrtimer_cpu_base->lock (hrtimer_start() for bandwidth controls) |
|---|
| 110 | + * |
|---|
| 111 | + * rq1->lock |
|---|
| 112 | + * rq2->lock where: rq1 < rq2 |
|---|
| 113 | + * |
|---|
| 114 | + * Regular state: |
|---|
| 115 | + * |
|---|
| 116 | + * Normal scheduling state is serialized by rq->lock. __schedule() takes the |
|---|
| 117 | + * local CPU's rq->lock, it optionally removes the task from the runqueue and |
|---|
| 118 | + * always looks at the local rq data structures to find the most elegible task |
|---|
| 119 | + * to run next. |
|---|
| 120 | + * |
|---|
| 121 | + * Task enqueue is also under rq->lock, possibly taken from another CPU. |
|---|
| 122 | + * Wakeups from another LLC domain might use an IPI to transfer the enqueue to |
|---|
| 123 | + * the local CPU to avoid bouncing the runqueue state around [ see |
|---|
| 124 | + * ttwu_queue_wakelist() ] |
|---|
| 125 | + * |
|---|
| 126 | + * Task wakeup, specifically wakeups that involve migration, are horribly |
|---|
| 127 | + * complicated to avoid having to take two rq->locks. |
|---|
| 128 | + * |
|---|
| 129 | + * Special state: |
|---|
| 130 | + * |
|---|
| 131 | + * System-calls and anything external will use task_rq_lock() which acquires |
|---|
| 132 | + * both p->pi_lock and rq->lock. As a consequence the state they change is |
|---|
| 133 | + * stable while holding either lock: |
|---|
| 134 | + * |
|---|
| 135 | + * - sched_setaffinity()/ |
|---|
| 136 | + * set_cpus_allowed_ptr(): p->cpus_ptr, p->nr_cpus_allowed |
|---|
| 137 | + * - set_user_nice(): p->se.load, p->*prio |
|---|
| 138 | + * - __sched_setscheduler(): p->sched_class, p->policy, p->*prio, |
|---|
| 139 | + * p->se.load, p->rt_priority, |
|---|
| 140 | + * p->dl.dl_{runtime, deadline, period, flags, bw, density} |
|---|
| 141 | + * - sched_setnuma(): p->numa_preferred_nid |
|---|
| 142 | + * - sched_move_task()/ |
|---|
| 143 | + * cpu_cgroup_fork(): p->sched_task_group |
|---|
| 144 | + * - uclamp_update_active() p->uclamp* |
|---|
| 145 | + * |
|---|
| 146 | + * p->state <- TASK_*: |
|---|
| 147 | + * |
|---|
| 148 | + * is changed locklessly using set_current_state(), __set_current_state() or |
|---|
| 149 | + * set_special_state(), see their respective comments, or by |
|---|
| 150 | + * try_to_wake_up(). This latter uses p->pi_lock to serialize against |
|---|
| 151 | + * concurrent self. |
|---|
| 152 | + * |
|---|
| 153 | + * p->on_rq <- { 0, 1 = TASK_ON_RQ_QUEUED, 2 = TASK_ON_RQ_MIGRATING }: |
|---|
| 154 | + * |
|---|
| 155 | + * is set by activate_task() and cleared by deactivate_task(), under |
|---|
| 156 | + * rq->lock. Non-zero indicates the task is runnable, the special |
|---|
| 157 | + * ON_RQ_MIGRATING state is used for migration without holding both |
|---|
| 158 | + * rq->locks. It indicates task_cpu() is not stable, see task_rq_lock(). |
|---|
| 159 | + * |
|---|
| 160 | + * p->on_cpu <- { 0, 1 }: |
|---|
| 161 | + * |
|---|
| 162 | + * is set by prepare_task() and cleared by finish_task() such that it will be |
|---|
| 163 | + * set before p is scheduled-in and cleared after p is scheduled-out, both |
|---|
| 164 | + * under rq->lock. Non-zero indicates the task is running on its CPU. |
|---|
| 165 | + * |
|---|
| 166 | + * [ The astute reader will observe that it is possible for two tasks on one |
|---|
| 167 | + * CPU to have ->on_cpu = 1 at the same time. ] |
|---|
| 168 | + * |
|---|
| 169 | + * task_cpu(p): is changed by set_task_cpu(), the rules are: |
|---|
| 170 | + * |
|---|
| 171 | + * - Don't call set_task_cpu() on a blocked task: |
|---|
| 172 | + * |
|---|
| 173 | + * We don't care what CPU we're not running on, this simplifies hotplug, |
|---|
| 174 | + * the CPU assignment of blocked tasks isn't required to be valid. |
|---|
| 175 | + * |
|---|
| 176 | + * - for try_to_wake_up(), called under p->pi_lock: |
|---|
| 177 | + * |
|---|
| 178 | + * This allows try_to_wake_up() to only take one rq->lock, see its comment. |
|---|
| 179 | + * |
|---|
| 180 | + * - for migration called under rq->lock: |
|---|
| 181 | + * [ see task_on_rq_migrating() in task_rq_lock() ] |
|---|
| 182 | + * |
|---|
| 183 | + * o move_queued_task() |
|---|
| 184 | + * o detach_task() |
|---|
| 185 | + * |
|---|
| 186 | + * - for migration called under double_rq_lock(): |
|---|
| 187 | + * |
|---|
| 188 | + * o __migrate_swap_task() |
|---|
| 189 | + * o push_rt_task() / pull_rt_task() |
|---|
| 190 | + * o push_dl_task() / pull_dl_task() |
|---|
| 191 | + * o dl_task_offline_migration() |
|---|
| 192 | + * |
|---|
| 193 | + */ |
|---|
| 67 | 194 | |
|---|
| 68 | 195 | /* |
|---|
| 69 | 196 | * __task_rq_lock - lock the rq @p resides on. |
|---|
| .. | .. |
|---|
| 88 | 215 | cpu_relax(); |
|---|
| 89 | 216 | } |
|---|
| 90 | 217 | } |
|---|
| 218 | +EXPORT_SYMBOL_GPL(__task_rq_lock); |
|---|
| 91 | 219 | |
|---|
| 92 | 220 | /* |
|---|
| 93 | 221 | * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. |
|---|
| .. | .. |
|---|
| 130 | 258 | cpu_relax(); |
|---|
| 131 | 259 | } |
|---|
| 132 | 260 | } |
|---|
| 261 | +EXPORT_SYMBOL_GPL(task_rq_lock); |
|---|
| 133 | 262 | |
|---|
| 134 | 263 | /* |
|---|
| 135 | 264 | * RQ-clock updating methods: |
|---|
| .. | .. |
|---|
| 210 | 339 | rq->clock += delta; |
|---|
| 211 | 340 | update_rq_clock_task(rq, delta); |
|---|
| 212 | 341 | } |
|---|
| 342 | +EXPORT_SYMBOL_GPL(update_rq_clock); |
|---|
| 213 | 343 | |
|---|
| 344 | +static inline void |
|---|
| 345 | +rq_csd_init(struct rq *rq, struct __call_single_data *csd, smp_call_func_t func) |
|---|
| 346 | +{ |
|---|
| 347 | + csd->flags = 0; |
|---|
| 348 | + csd->func = func; |
|---|
| 349 | + csd->info = rq; |
|---|
| 350 | +} |
|---|
| 214 | 351 | |
|---|
| 215 | 352 | #ifdef CONFIG_SCHED_HRTICK |
|---|
| 216 | 353 | /* |
|---|
| .. | .. |
|---|
| 247 | 384 | static void __hrtick_restart(struct rq *rq) |
|---|
| 248 | 385 | { |
|---|
| 249 | 386 | struct hrtimer *timer = &rq->hrtick_timer; |
|---|
| 387 | + ktime_t time = rq->hrtick_time; |
|---|
| 250 | 388 | |
|---|
| 251 | | - hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED); |
|---|
| 389 | + hrtimer_start(timer, time, HRTIMER_MODE_ABS_PINNED_HARD); |
|---|
| 252 | 390 | } |
|---|
| 253 | 391 | |
|---|
| 254 | 392 | /* |
|---|
| .. | .. |
|---|
| 261 | 399 | |
|---|
| 262 | 400 | rq_lock(rq, &rf); |
|---|
| 263 | 401 | __hrtick_restart(rq); |
|---|
| 264 | | - rq->hrtick_csd_pending = 0; |
|---|
| 265 | 402 | rq_unlock(rq, &rf); |
|---|
| 266 | 403 | } |
|---|
| 267 | 404 | |
|---|
| .. | .. |
|---|
| 273 | 410 | void hrtick_start(struct rq *rq, u64 delay) |
|---|
| 274 | 411 | { |
|---|
| 275 | 412 | struct hrtimer *timer = &rq->hrtick_timer; |
|---|
| 276 | | - ktime_t time; |
|---|
| 277 | 413 | s64 delta; |
|---|
| 278 | 414 | |
|---|
| 279 | 415 | /* |
|---|
| .. | .. |
|---|
| 281 | 417 | * doesn't make sense and can cause timer DoS. |
|---|
| 282 | 418 | */ |
|---|
| 283 | 419 | delta = max_t(s64, delay, 10000LL); |
|---|
| 284 | | - time = ktime_add_ns(timer->base->get_time(), delta); |
|---|
| 420 | + rq->hrtick_time = ktime_add_ns(timer->base->get_time(), delta); |
|---|
| 285 | 421 | |
|---|
| 286 | | - hrtimer_set_expires(timer, time); |
|---|
| 287 | | - |
|---|
| 288 | | - if (rq == this_rq()) { |
|---|
| 422 | + if (rq == this_rq()) |
|---|
| 289 | 423 | __hrtick_restart(rq); |
|---|
| 290 | | - } else if (!rq->hrtick_csd_pending) { |
|---|
| 424 | + else |
|---|
| 291 | 425 | smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd); |
|---|
| 292 | | - rq->hrtick_csd_pending = 1; |
|---|
| 293 | | - } |
|---|
| 294 | 426 | } |
|---|
| 295 | 427 | |
|---|
| 296 | 428 | #else |
|---|
| .. | .. |
|---|
| 307 | 439 | */ |
|---|
| 308 | 440 | delay = max_t(u64, delay, 10000LL); |
|---|
| 309 | 441 | hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), |
|---|
| 310 | | - HRTIMER_MODE_REL_PINNED); |
|---|
| 442 | + HRTIMER_MODE_REL_PINNED_HARD); |
|---|
| 311 | 443 | } |
|---|
| 444 | + |
|---|
| 312 | 445 | #endif /* CONFIG_SMP */ |
|---|
| 313 | 446 | |
|---|
| 314 | 447 | static void hrtick_rq_init(struct rq *rq) |
|---|
| 315 | 448 | { |
|---|
| 316 | 449 | #ifdef CONFIG_SMP |
|---|
| 317 | | - rq->hrtick_csd_pending = 0; |
|---|
| 318 | | - |
|---|
| 319 | | - rq->hrtick_csd.flags = 0; |
|---|
| 320 | | - rq->hrtick_csd.func = __hrtick_start; |
|---|
| 321 | | - rq->hrtick_csd.info = rq; |
|---|
| 450 | + rq_csd_init(rq, &rq->hrtick_csd, __hrtick_start); |
|---|
| 322 | 451 | #endif |
|---|
| 323 | | - |
|---|
| 324 | 452 | hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); |
|---|
| 325 | 453 | rq->hrtick_timer.function = hrtick; |
|---|
| 326 | 454 | } |
|---|
| .. | .. |
|---|
| 403 | 531 | #endif |
|---|
| 404 | 532 | #endif |
|---|
| 405 | 533 | |
|---|
| 406 | | -void __wake_q_add(struct wake_q_head *head, struct task_struct *task, |
|---|
| 407 | | - bool sleeper) |
|---|
| 534 | +static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task, |
|---|
| 535 | + bool sleeper) |
|---|
| 408 | 536 | { |
|---|
| 409 | 537 | struct wake_q_node *node; |
|---|
| 410 | 538 | |
|---|
| .. | .. |
|---|
| 422 | 550 | * state, even in the failed case, an explicit smp_mb() must be used. |
|---|
| 423 | 551 | */ |
|---|
| 424 | 552 | smp_mb__before_atomic(); |
|---|
| 425 | | - if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)) |
|---|
| 426 | | - return; |
|---|
| 427 | | - |
|---|
| 428 | | - head->count++; |
|---|
| 429 | | - |
|---|
| 430 | | - get_task_struct(task); |
|---|
| 553 | + if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))) |
|---|
| 554 | + return false; |
|---|
| 431 | 555 | |
|---|
| 432 | 556 | /* |
|---|
| 433 | 557 | * The head is context local, there can be no concurrency. |
|---|
| 434 | 558 | */ |
|---|
| 435 | 559 | *head->lastp = node; |
|---|
| 436 | 560 | head->lastp = &node->next; |
|---|
| 561 | + head->count++; |
|---|
| 562 | + return true; |
|---|
| 437 | 563 | } |
|---|
| 438 | 564 | |
|---|
| 439 | | -static int |
|---|
| 440 | | -try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags, |
|---|
| 441 | | - int sibling_count_hint); |
|---|
| 565 | +/** |
|---|
| 566 | + * wake_q_add() - queue a wakeup for 'later' waking. |
|---|
| 567 | + * @head: the wake_q_head to add @task to |
|---|
| 568 | + * @task: the task to queue for 'later' wakeup |
|---|
| 569 | + * |
|---|
| 570 | + * Queue a task for later wakeup, most likely by the wake_up_q() call in the |
|---|
| 571 | + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come |
|---|
| 572 | + * instantly. |
|---|
| 573 | + * |
|---|
| 574 | + * This function must be used as-if it were wake_up_process(); IOW the task |
|---|
| 575 | + * must be ready to be woken at this location. |
|---|
| 576 | + */ |
|---|
| 577 | +void wake_q_add(struct wake_q_head *head, struct task_struct *task) |
|---|
| 578 | +{ |
|---|
| 579 | + if (__wake_q_add(head, task, false)) |
|---|
| 580 | + get_task_struct(task); |
|---|
| 581 | +} |
|---|
| 582 | + |
|---|
| 583 | +void wake_q_add_sleeper(struct wake_q_head *head, struct task_struct *task) |
|---|
| 584 | +{ |
|---|
| 585 | + if (__wake_q_add(head, task, true)) |
|---|
| 586 | + get_task_struct(task); |
|---|
| 587 | +} |
|---|
| 588 | + |
|---|
| 589 | +/** |
|---|
| 590 | + * wake_q_add_safe() - safely queue a wakeup for 'later' waking. |
|---|
| 591 | + * @head: the wake_q_head to add @task to |
|---|
| 592 | + * @task: the task to queue for 'later' wakeup |
|---|
| 593 | + * |
|---|
| 594 | + * Queue a task for later wakeup, most likely by the wake_up_q() call in the |
|---|
| 595 | + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come |
|---|
| 596 | + * instantly. |
|---|
| 597 | + * |
|---|
| 598 | + * This function must be used as-if it were wake_up_process(); IOW the task |
|---|
| 599 | + * must be ready to be woken at this location. |
|---|
| 600 | + * |
|---|
| 601 | + * This function is essentially a task-safe equivalent to wake_q_add(). Callers |
|---|
| 602 | + * that already hold reference to @task can call the 'safe' version and trust |
|---|
| 603 | + * wake_q to do the right thing depending whether or not the @task is already |
|---|
| 604 | + * queued for wakeup. |
|---|
| 605 | + */ |
|---|
| 606 | +void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) |
|---|
| 607 | +{ |
|---|
| 608 | + if (!__wake_q_add(head, task, false)) |
|---|
| 609 | + put_task_struct(task); |
|---|
| 610 | +} |
|---|
| 611 | + |
|---|
| 442 | 612 | void __wake_up_q(struct wake_q_head *head, bool sleeper) |
|---|
| 443 | 613 | { |
|---|
| 444 | 614 | struct wake_q_node *node = head->first; |
|---|
| .. | .. |
|---|
| 450 | 620 | task = container_of(node, struct task_struct, wake_q_sleeper); |
|---|
| 451 | 621 | else |
|---|
| 452 | 622 | task = container_of(node, struct task_struct, wake_q); |
|---|
| 623 | + |
|---|
| 453 | 624 | BUG_ON(!task); |
|---|
| 454 | 625 | /* Task can safely be re-inserted now: */ |
|---|
| 455 | 626 | node = node->next; |
|---|
| 627 | + task->wake_q_count = head->count; |
|---|
| 456 | 628 | if (sleeper) |
|---|
| 457 | 629 | task->wake_q_sleeper.next = NULL; |
|---|
| 458 | 630 | else |
|---|
| 459 | 631 | task->wake_q.next = NULL; |
|---|
| 632 | + |
|---|
| 460 | 633 | /* |
|---|
| 461 | 634 | * wake_up_process() executes a full barrier, which pairs with |
|---|
| 462 | 635 | * the queueing in wake_q_add() so as not to miss wakeups. |
|---|
| .. | .. |
|---|
| 466 | 639 | else |
|---|
| 467 | 640 | wake_up_process(task); |
|---|
| 468 | 641 | |
|---|
| 642 | + task->wake_q_count = 0; |
|---|
| 469 | 643 | put_task_struct(task); |
|---|
| 470 | 644 | } |
|---|
| 471 | 645 | } |
|---|
| .. | .. |
|---|
| 495 | 669 | return; |
|---|
| 496 | 670 | } |
|---|
| 497 | 671 | |
|---|
| 498 | | -#ifdef CONFIG_PREEMPT |
|---|
| 499 | 672 | if (set_nr_and_not_polling(curr)) |
|---|
| 500 | | -#else |
|---|
| 501 | | - if (set_nr_and_not_polling(curr) && (rq->curr == rq->idle)) |
|---|
| 502 | | -#endif |
|---|
| 503 | 673 | smp_send_reschedule(cpu); |
|---|
| 504 | 674 | else |
|---|
| 505 | 675 | trace_sched_wake_idle_without_ipi(cpu); |
|---|
| 506 | 676 | } |
|---|
| 677 | +EXPORT_SYMBOL_GPL(resched_curr); |
|---|
| 507 | 678 | |
|---|
| 508 | 679 | #ifdef CONFIG_PREEMPT_LAZY |
|---|
| 509 | 680 | |
|---|
| .. | .. |
|---|
| 570 | 741 | */ |
|---|
| 571 | 742 | int get_nohz_timer_target(void) |
|---|
| 572 | 743 | { |
|---|
| 573 | | - int i, cpu = smp_processor_id(); |
|---|
| 744 | + int i, cpu = smp_processor_id(), default_cpu = -1; |
|---|
| 574 | 745 | struct sched_domain *sd; |
|---|
| 575 | 746 | |
|---|
| 576 | | - if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER)) |
|---|
| 577 | | - return cpu; |
|---|
| 747 | + if (housekeeping_cpu(cpu, HK_FLAG_TIMER) && cpu_active(cpu)) { |
|---|
| 748 | + if (!idle_cpu(cpu)) |
|---|
| 749 | + return cpu; |
|---|
| 750 | + default_cpu = cpu; |
|---|
| 751 | + } |
|---|
| 578 | 752 | |
|---|
| 579 | 753 | rcu_read_lock(); |
|---|
| 580 | 754 | for_each_domain(cpu, sd) { |
|---|
| 581 | | - for_each_cpu(i, sched_domain_span(sd)) { |
|---|
| 755 | + for_each_cpu_and(i, sched_domain_span(sd), |
|---|
| 756 | + housekeeping_cpumask(HK_FLAG_TIMER)) { |
|---|
| 582 | 757 | if (cpu == i) |
|---|
| 583 | 758 | continue; |
|---|
| 584 | 759 | |
|---|
| 585 | | - if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) { |
|---|
| 760 | + if (!idle_cpu(i)) { |
|---|
| 586 | 761 | cpu = i; |
|---|
| 587 | 762 | goto unlock; |
|---|
| 588 | 763 | } |
|---|
| 589 | 764 | } |
|---|
| 590 | 765 | } |
|---|
| 591 | 766 | |
|---|
| 592 | | - if (!housekeeping_cpu(cpu, HK_FLAG_TIMER)) |
|---|
| 593 | | - cpu = housekeeping_any_cpu(HK_FLAG_TIMER); |
|---|
| 767 | + if (default_cpu == -1) { |
|---|
| 768 | + for_each_cpu_and(i, cpu_active_mask, |
|---|
| 769 | + housekeeping_cpumask(HK_FLAG_TIMER)) { |
|---|
| 770 | + if (cpu == i) |
|---|
| 771 | + continue; |
|---|
| 772 | + |
|---|
| 773 | + if (!idle_cpu(i)) { |
|---|
| 774 | + cpu = i; |
|---|
| 775 | + goto unlock; |
|---|
| 776 | + } |
|---|
| 777 | + } |
|---|
| 778 | + |
|---|
| 779 | + /* no active, not-idle, housekpeeing CPU found. */ |
|---|
| 780 | + default_cpu = cpumask_any(cpu_active_mask); |
|---|
| 781 | + |
|---|
| 782 | + if (unlikely(default_cpu >= nr_cpu_ids)) |
|---|
| 783 | + goto unlock; |
|---|
| 784 | + } |
|---|
| 785 | + |
|---|
| 786 | + cpu = default_cpu; |
|---|
| 594 | 787 | unlock: |
|---|
| 595 | 788 | rcu_read_unlock(); |
|---|
| 596 | 789 | return cpu; |
|---|
| .. | .. |
|---|
| 650 | 843 | wake_up_idle_cpu(cpu); |
|---|
| 651 | 844 | } |
|---|
| 652 | 845 | |
|---|
| 653 | | -static inline bool got_nohz_idle_kick(void) |
|---|
| 846 | +static void nohz_csd_func(void *info) |
|---|
| 654 | 847 | { |
|---|
| 655 | | - int cpu = smp_processor_id(); |
|---|
| 656 | | - |
|---|
| 657 | | - if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK)) |
|---|
| 658 | | - return false; |
|---|
| 659 | | - |
|---|
| 660 | | - if (idle_cpu(cpu) && !need_resched()) |
|---|
| 661 | | - return true; |
|---|
| 848 | + struct rq *rq = info; |
|---|
| 849 | + int cpu = cpu_of(rq); |
|---|
| 850 | + unsigned int flags; |
|---|
| 662 | 851 | |
|---|
| 663 | 852 | /* |
|---|
| 664 | | - * We can't run Idle Load Balance on this CPU for this time so we |
|---|
| 665 | | - * cancel it and clear NOHZ_BALANCE_KICK |
|---|
| 853 | + * Release the rq::nohz_csd. |
|---|
| 666 | 854 | */ |
|---|
| 667 | | - atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); |
|---|
| 668 | | - return false; |
|---|
| 669 | | -} |
|---|
| 855 | + flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); |
|---|
| 856 | + WARN_ON(!(flags & NOHZ_KICK_MASK)); |
|---|
| 670 | 857 | |
|---|
| 671 | | -#else /* CONFIG_NO_HZ_COMMON */ |
|---|
| 672 | | - |
|---|
| 673 | | -static inline bool got_nohz_idle_kick(void) |
|---|
| 674 | | -{ |
|---|
| 675 | | - return false; |
|---|
| 858 | + rq->idle_balance = idle_cpu(cpu); |
|---|
| 859 | + if (rq->idle_balance && !need_resched()) { |
|---|
| 860 | + rq->nohz_idle_balance = flags; |
|---|
| 861 | + raise_softirq_irqoff(SCHED_SOFTIRQ); |
|---|
| 862 | + } |
|---|
| 676 | 863 | } |
|---|
| 677 | 864 | |
|---|
| 678 | 865 | #endif /* CONFIG_NO_HZ_COMMON */ |
|---|
| .. | .. |
|---|
| 763 | 950 | } |
|---|
| 764 | 951 | #endif |
|---|
| 765 | 952 | |
|---|
| 766 | | -static void set_load_weight(struct task_struct *p, bool update_load) |
|---|
| 953 | +static void set_load_weight(struct task_struct *p) |
|---|
| 767 | 954 | { |
|---|
| 955 | + bool update_load = !(READ_ONCE(p->state) & TASK_NEW); |
|---|
| 768 | 956 | int prio = p->static_prio - MAX_RT_PRIO; |
|---|
| 769 | 957 | struct load_weight *load = &p->se.load; |
|---|
| 770 | 958 | |
|---|
| 771 | 959 | /* |
|---|
| 772 | 960 | * SCHED_IDLE tasks get minimal weight: |
|---|
| 773 | 961 | */ |
|---|
| 774 | | - if (idle_policy(p->policy)) { |
|---|
| 962 | + if (task_has_idle_policy(p)) { |
|---|
| 775 | 963 | load->weight = scale_load(WEIGHT_IDLEPRIO); |
|---|
| 776 | 964 | load->inv_weight = WMULT_IDLEPRIO; |
|---|
| 777 | | - p->se.runnable_weight = load->weight; |
|---|
| 778 | 965 | return; |
|---|
| 779 | 966 | } |
|---|
| 780 | 967 | |
|---|
| .. | .. |
|---|
| 787 | 974 | } else { |
|---|
| 788 | 975 | load->weight = scale_load(sched_prio_to_weight[prio]); |
|---|
| 789 | 976 | load->inv_weight = sched_prio_to_wmult[prio]; |
|---|
| 790 | | - p->se.runnable_weight = load->weight; |
|---|
| 791 | 977 | } |
|---|
| 792 | 978 | } |
|---|
| 793 | 979 | |
|---|
| .. | .. |
|---|
| 810 | 996 | /* Max allowed maximum utilization */ |
|---|
| 811 | 997 | unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE; |
|---|
| 812 | 998 | |
|---|
| 999 | +/* |
|---|
| 1000 | + * By default RT tasks run at the maximum performance point/capacity of the |
|---|
| 1001 | + * system. Uclamp enforces this by always setting UCLAMP_MIN of RT tasks to |
|---|
| 1002 | + * SCHED_CAPACITY_SCALE. |
|---|
| 1003 | + * |
|---|
| 1004 | + * This knob allows admins to change the default behavior when uclamp is being |
|---|
| 1005 | + * used. In battery powered devices, particularly, running at the maximum |
|---|
| 1006 | + * capacity and frequency will increase energy consumption and shorten the |
|---|
| 1007 | + * battery life. |
|---|
| 1008 | + * |
|---|
| 1009 | + * This knob only affects RT tasks that their uclamp_se->user_defined == false. |
|---|
| 1010 | + * |
|---|
| 1011 | + * This knob will not override the system default sched_util_clamp_min defined |
|---|
| 1012 | + * above. |
|---|
| 1013 | + */ |
|---|
| 1014 | +unsigned int sysctl_sched_uclamp_util_min_rt_default = SCHED_CAPACITY_SCALE; |
|---|
| 1015 | + |
|---|
| 813 | 1016 | /* All clamps are required to be less or equal than these values */ |
|---|
| 814 | 1017 | static struct uclamp_se uclamp_default[UCLAMP_CNT]; |
|---|
| 1018 | + |
|---|
| 1019 | +/* |
|---|
| 1020 | + * This static key is used to reduce the uclamp overhead in the fast path. It |
|---|
| 1021 | + * primarily disables the call to uclamp_rq_{inc, dec}() in |
|---|
| 1022 | + * enqueue/dequeue_task(). |
|---|
| 1023 | + * |
|---|
| 1024 | + * This allows users to continue to enable uclamp in their kernel config with |
|---|
| 1025 | + * minimum uclamp overhead in the fast path. |
|---|
| 1026 | + * |
|---|
| 1027 | + * As soon as userspace modifies any of the uclamp knobs, the static key is |
|---|
| 1028 | + * enabled, since we have an actual users that make use of uclamp |
|---|
| 1029 | + * functionality. |
|---|
| 1030 | + * |
|---|
| 1031 | + * The knobs that would enable this static key are: |
|---|
| 1032 | + * |
|---|
| 1033 | + * * A task modifying its uclamp value with sched_setattr(). |
|---|
| 1034 | + * * An admin modifying the sysctl_sched_uclamp_{min, max} via procfs. |
|---|
| 1035 | + * * An admin modifying the cgroup cpu.uclamp.{min, max} |
|---|
| 1036 | + */ |
|---|
| 1037 | +DEFINE_STATIC_KEY_FALSE(sched_uclamp_used); |
|---|
| 1038 | +EXPORT_SYMBOL_GPL(sched_uclamp_used); |
|---|
| 815 | 1039 | |
|---|
| 816 | 1040 | /* Integer rounded range for each bucket */ |
|---|
| 817 | 1041 | #define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS) |
|---|
| .. | .. |
|---|
| 822 | 1046 | static inline unsigned int uclamp_bucket_id(unsigned int clamp_value) |
|---|
| 823 | 1047 | { |
|---|
| 824 | 1048 | return min_t(unsigned int, clamp_value / UCLAMP_BUCKET_DELTA, UCLAMP_BUCKETS - 1); |
|---|
| 825 | | -} |
|---|
| 826 | | - |
|---|
| 827 | | -static inline unsigned int uclamp_bucket_base_value(unsigned int clamp_value) |
|---|
| 828 | | -{ |
|---|
| 829 | | - return UCLAMP_BUCKET_DELTA * uclamp_bucket_id(clamp_value); |
|---|
| 830 | 1049 | } |
|---|
| 831 | 1050 | |
|---|
| 832 | 1051 | static inline unsigned int uclamp_none(enum uclamp_id clamp_id) |
|---|
| .. | .. |
|---|
| 892 | 1111 | return uclamp_idle_value(rq, clamp_id, clamp_value); |
|---|
| 893 | 1112 | } |
|---|
| 894 | 1113 | |
|---|
| 1114 | +static void __uclamp_update_util_min_rt_default(struct task_struct *p) |
|---|
| 1115 | +{ |
|---|
| 1116 | + unsigned int default_util_min; |
|---|
| 1117 | + struct uclamp_se *uc_se; |
|---|
| 1118 | + |
|---|
| 1119 | + lockdep_assert_held(&p->pi_lock); |
|---|
| 1120 | + |
|---|
| 1121 | + uc_se = &p->uclamp_req[UCLAMP_MIN]; |
|---|
| 1122 | + |
|---|
| 1123 | + /* Only sync if user didn't override the default */ |
|---|
| 1124 | + if (uc_se->user_defined) |
|---|
| 1125 | + return; |
|---|
| 1126 | + |
|---|
| 1127 | + default_util_min = sysctl_sched_uclamp_util_min_rt_default; |
|---|
| 1128 | + uclamp_se_set(uc_se, default_util_min, false); |
|---|
| 1129 | +} |
|---|
| 1130 | + |
|---|
| 1131 | +static void uclamp_update_util_min_rt_default(struct task_struct *p) |
|---|
| 1132 | +{ |
|---|
| 1133 | + struct rq_flags rf; |
|---|
| 1134 | + struct rq *rq; |
|---|
| 1135 | + |
|---|
| 1136 | + if (!rt_task(p)) |
|---|
| 1137 | + return; |
|---|
| 1138 | + |
|---|
| 1139 | + /* Protect updates to p->uclamp_* */ |
|---|
| 1140 | + rq = task_rq_lock(p, &rf); |
|---|
| 1141 | + __uclamp_update_util_min_rt_default(p); |
|---|
| 1142 | + task_rq_unlock(rq, p, &rf); |
|---|
| 1143 | +} |
|---|
| 1144 | + |
|---|
| 1145 | +static void uclamp_sync_util_min_rt_default(void) |
|---|
| 1146 | +{ |
|---|
| 1147 | + struct task_struct *g, *p; |
|---|
| 1148 | + |
|---|
| 1149 | + /* |
|---|
| 1150 | + * copy_process() sysctl_uclamp |
|---|
| 1151 | + * uclamp_min_rt = X; |
|---|
| 1152 | + * write_lock(&tasklist_lock) read_lock(&tasklist_lock) |
|---|
| 1153 | + * // link thread smp_mb__after_spinlock() |
|---|
| 1154 | + * write_unlock(&tasklist_lock) read_unlock(&tasklist_lock); |
|---|
| 1155 | + * sched_post_fork() for_each_process_thread() |
|---|
| 1156 | + * __uclamp_sync_rt() __uclamp_sync_rt() |
|---|
| 1157 | + * |
|---|
| 1158 | + * Ensures that either sched_post_fork() will observe the new |
|---|
| 1159 | + * uclamp_min_rt or for_each_process_thread() will observe the new |
|---|
| 1160 | + * task. |
|---|
| 1161 | + */ |
|---|
| 1162 | + read_lock(&tasklist_lock); |
|---|
| 1163 | + smp_mb__after_spinlock(); |
|---|
| 1164 | + read_unlock(&tasklist_lock); |
|---|
| 1165 | + |
|---|
| 1166 | + rcu_read_lock(); |
|---|
| 1167 | + for_each_process_thread(g, p) |
|---|
| 1168 | + uclamp_update_util_min_rt_default(p); |
|---|
| 1169 | + rcu_read_unlock(); |
|---|
| 1170 | +} |
|---|
| 1171 | + |
|---|
| 1172 | +#if IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE) |
|---|
| 1173 | +void rockchip_perf_uclamp_sync_util_min_rt_default(void) |
|---|
| 1174 | +{ |
|---|
| 1175 | + uclamp_sync_util_min_rt_default(); |
|---|
| 1176 | +} |
|---|
| 1177 | +EXPORT_SYMBOL(rockchip_perf_uclamp_sync_util_min_rt_default); |
|---|
| 1178 | +#endif |
|---|
| 1179 | + |
|---|
| 895 | 1180 | static inline struct uclamp_se |
|---|
| 896 | 1181 | uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id) |
|---|
| 897 | 1182 | { |
|---|
| 1183 | + /* Copy by value as we could modify it */ |
|---|
| 898 | 1184 | struct uclamp_se uc_req = p->uclamp_req[clamp_id]; |
|---|
| 899 | 1185 | #ifdef CONFIG_UCLAMP_TASK_GROUP |
|---|
| 900 | | - struct uclamp_se uc_max; |
|---|
| 1186 | + unsigned int tg_min, tg_max, value; |
|---|
| 901 | 1187 | |
|---|
| 902 | 1188 | /* |
|---|
| 903 | 1189 | * Tasks in autogroups or root task group will be |
|---|
| .. | .. |
|---|
| 908 | 1194 | if (task_group(p) == &root_task_group) |
|---|
| 909 | 1195 | return uc_req; |
|---|
| 910 | 1196 | |
|---|
| 911 | | - uc_max = task_group(p)->uclamp[clamp_id]; |
|---|
| 912 | | - if (uc_req.value > uc_max.value || !uc_req.user_defined) |
|---|
| 913 | | - return uc_max; |
|---|
| 1197 | + tg_min = task_group(p)->uclamp[UCLAMP_MIN].value; |
|---|
| 1198 | + tg_max = task_group(p)->uclamp[UCLAMP_MAX].value; |
|---|
| 1199 | + value = uc_req.value; |
|---|
| 1200 | + value = clamp(value, tg_min, tg_max); |
|---|
| 1201 | + uclamp_se_set(&uc_req, value, false); |
|---|
| 914 | 1202 | #endif |
|---|
| 915 | 1203 | |
|---|
| 916 | 1204 | return uc_req; |
|---|
| .. | .. |
|---|
| 929 | 1217 | { |
|---|
| 930 | 1218 | struct uclamp_se uc_req = uclamp_tg_restrict(p, clamp_id); |
|---|
| 931 | 1219 | struct uclamp_se uc_max = uclamp_default[clamp_id]; |
|---|
| 1220 | + struct uclamp_se uc_eff; |
|---|
| 1221 | + int ret = 0; |
|---|
| 1222 | + |
|---|
| 1223 | + trace_android_rvh_uclamp_eff_get(p, clamp_id, &uc_max, &uc_eff, &ret); |
|---|
| 1224 | + if (ret) |
|---|
| 1225 | + return uc_eff; |
|---|
| 932 | 1226 | |
|---|
| 933 | 1227 | /* System default restrictions always apply */ |
|---|
| 934 | 1228 | if (unlikely(uc_req.value > uc_max.value)) |
|---|
| .. | .. |
|---|
| 949 | 1243 | |
|---|
| 950 | 1244 | return (unsigned long)uc_eff.value; |
|---|
| 951 | 1245 | } |
|---|
| 1246 | +EXPORT_SYMBOL_GPL(uclamp_eff_value); |
|---|
| 952 | 1247 | |
|---|
| 953 | 1248 | /* |
|---|
| 954 | 1249 | * When a task is enqueued on a rq, the clamp bucket currently defined by the |
|---|
| .. | .. |
|---|
| 1009 | 1304 | |
|---|
| 1010 | 1305 | lockdep_assert_held(&rq->lock); |
|---|
| 1011 | 1306 | |
|---|
| 1307 | + /* |
|---|
| 1308 | + * If sched_uclamp_used was enabled after task @p was enqueued, |
|---|
| 1309 | + * we could end up with unbalanced call to uclamp_rq_dec_id(). |
|---|
| 1310 | + * |
|---|
| 1311 | + * In this case the uc_se->active flag should be false since no uclamp |
|---|
| 1312 | + * accounting was performed at enqueue time and we can just return |
|---|
| 1313 | + * here. |
|---|
| 1314 | + * |
|---|
| 1315 | + * Need to be careful of the following enqeueue/dequeue ordering |
|---|
| 1316 | + * problem too |
|---|
| 1317 | + * |
|---|
| 1318 | + * enqueue(taskA) |
|---|
| 1319 | + * // sched_uclamp_used gets enabled |
|---|
| 1320 | + * enqueue(taskB) |
|---|
| 1321 | + * dequeue(taskA) |
|---|
| 1322 | + * // Must not decrement bukcet->tasks here |
|---|
| 1323 | + * dequeue(taskB) |
|---|
| 1324 | + * |
|---|
| 1325 | + * where we could end up with stale data in uc_se and |
|---|
| 1326 | + * bucket[uc_se->bucket_id]. |
|---|
| 1327 | + * |
|---|
| 1328 | + * The following check here eliminates the possibility of such race. |
|---|
| 1329 | + */ |
|---|
| 1330 | + if (unlikely(!uc_se->active)) |
|---|
| 1331 | + return; |
|---|
| 1332 | + |
|---|
| 1012 | 1333 | bucket = &uc_rq->bucket[uc_se->bucket_id]; |
|---|
| 1334 | + |
|---|
| 1013 | 1335 | SCHED_WARN_ON(!bucket->tasks); |
|---|
| 1014 | 1336 | if (likely(bucket->tasks)) |
|---|
| 1015 | 1337 | bucket->tasks--; |
|---|
| 1338 | + |
|---|
| 1016 | 1339 | uc_se->active = false; |
|---|
| 1017 | 1340 | |
|---|
| 1018 | 1341 | /* |
|---|
| .. | .. |
|---|
| 1040 | 1363 | { |
|---|
| 1041 | 1364 | enum uclamp_id clamp_id; |
|---|
| 1042 | 1365 | |
|---|
| 1366 | + /* |
|---|
| 1367 | + * Avoid any overhead until uclamp is actually used by the userspace. |
|---|
| 1368 | + * |
|---|
| 1369 | + * The condition is constructed such that a NOP is generated when |
|---|
| 1370 | + * sched_uclamp_used is disabled. |
|---|
| 1371 | + */ |
|---|
| 1372 | + if (!static_branch_unlikely(&sched_uclamp_used)) |
|---|
| 1373 | + return; |
|---|
| 1374 | + |
|---|
| 1043 | 1375 | if (unlikely(!p->sched_class->uclamp_enabled)) |
|---|
| 1044 | 1376 | return; |
|---|
| 1045 | 1377 | |
|---|
| .. | .. |
|---|
| 1055 | 1387 | { |
|---|
| 1056 | 1388 | enum uclamp_id clamp_id; |
|---|
| 1057 | 1389 | |
|---|
| 1390 | + /* |
|---|
| 1391 | + * Avoid any overhead until uclamp is actually used by the userspace. |
|---|
| 1392 | + * |
|---|
| 1393 | + * The condition is constructed such that a NOP is generated when |
|---|
| 1394 | + * sched_uclamp_used is disabled. |
|---|
| 1395 | + */ |
|---|
| 1396 | + if (!static_branch_unlikely(&sched_uclamp_used)) |
|---|
| 1397 | + return; |
|---|
| 1398 | + |
|---|
| 1058 | 1399 | if (unlikely(!p->sched_class->uclamp_enabled)) |
|---|
| 1059 | 1400 | return; |
|---|
| 1060 | 1401 | |
|---|
| .. | .. |
|---|
| 1062 | 1403 | uclamp_rq_dec_id(rq, p, clamp_id); |
|---|
| 1063 | 1404 | } |
|---|
| 1064 | 1405 | |
|---|
| 1065 | | -static inline void |
|---|
| 1066 | | -uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id) |
|---|
| 1406 | +static inline void uclamp_rq_reinc_id(struct rq *rq, struct task_struct *p, |
|---|
| 1407 | + enum uclamp_id clamp_id) |
|---|
| 1067 | 1408 | { |
|---|
| 1409 | + if (!p->uclamp[clamp_id].active) |
|---|
| 1410 | + return; |
|---|
| 1411 | + |
|---|
| 1412 | + uclamp_rq_dec_id(rq, p, clamp_id); |
|---|
| 1413 | + uclamp_rq_inc_id(rq, p, clamp_id); |
|---|
| 1414 | + |
|---|
| 1415 | + /* |
|---|
| 1416 | + * Make sure to clear the idle flag if we've transiently reached 0 |
|---|
| 1417 | + * active tasks on rq. |
|---|
| 1418 | + */ |
|---|
| 1419 | + if (clamp_id == UCLAMP_MAX && (rq->uclamp_flags & UCLAMP_FLAG_IDLE)) |
|---|
| 1420 | + rq->uclamp_flags &= ~UCLAMP_FLAG_IDLE; |
|---|
| 1421 | +} |
|---|
| 1422 | + |
|---|
| 1423 | +static inline void |
|---|
| 1424 | +uclamp_update_active(struct task_struct *p) |
|---|
| 1425 | +{ |
|---|
| 1426 | + enum uclamp_id clamp_id; |
|---|
| 1068 | 1427 | struct rq_flags rf; |
|---|
| 1069 | 1428 | struct rq *rq; |
|---|
| 1070 | 1429 | |
|---|
| .. | .. |
|---|
| 1084 | 1443 | * affecting a valid clamp bucket, the next time it's enqueued, |
|---|
| 1085 | 1444 | * it will already see the updated clamp bucket value. |
|---|
| 1086 | 1445 | */ |
|---|
| 1087 | | - if (p->uclamp[clamp_id].active) { |
|---|
| 1088 | | - uclamp_rq_dec_id(rq, p, clamp_id); |
|---|
| 1089 | | - uclamp_rq_inc_id(rq, p, clamp_id); |
|---|
| 1090 | | - } |
|---|
| 1446 | + for_each_clamp_id(clamp_id) |
|---|
| 1447 | + uclamp_rq_reinc_id(rq, p, clamp_id); |
|---|
| 1091 | 1448 | |
|---|
| 1092 | 1449 | task_rq_unlock(rq, p, &rf); |
|---|
| 1093 | 1450 | } |
|---|
| 1094 | 1451 | |
|---|
| 1095 | 1452 | #ifdef CONFIG_UCLAMP_TASK_GROUP |
|---|
| 1096 | 1453 | static inline void |
|---|
| 1097 | | -uclamp_update_active_tasks(struct cgroup_subsys_state *css, |
|---|
| 1098 | | - unsigned int clamps) |
|---|
| 1454 | +uclamp_update_active_tasks(struct cgroup_subsys_state *css) |
|---|
| 1099 | 1455 | { |
|---|
| 1100 | | - enum uclamp_id clamp_id; |
|---|
| 1101 | 1456 | struct css_task_iter it; |
|---|
| 1102 | 1457 | struct task_struct *p; |
|---|
| 1103 | 1458 | |
|---|
| 1104 | 1459 | css_task_iter_start(css, 0, &it); |
|---|
| 1105 | | - while ((p = css_task_iter_next(&it))) { |
|---|
| 1106 | | - for_each_clamp_id(clamp_id) { |
|---|
| 1107 | | - if ((0x1 << clamp_id) & clamps) |
|---|
| 1108 | | - uclamp_update_active(p, clamp_id); |
|---|
| 1109 | | - } |
|---|
| 1110 | | - } |
|---|
| 1460 | + while ((p = css_task_iter_next(&it))) |
|---|
| 1461 | + uclamp_update_active(p); |
|---|
| 1111 | 1462 | css_task_iter_end(&it); |
|---|
| 1112 | 1463 | } |
|---|
| 1113 | 1464 | |
|---|
| .. | .. |
|---|
| 1130 | 1481 | #endif |
|---|
| 1131 | 1482 | |
|---|
| 1132 | 1483 | int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, |
|---|
| 1133 | | - void __user *buffer, size_t *lenp, |
|---|
| 1134 | | - loff_t *ppos) |
|---|
| 1484 | + void *buffer, size_t *lenp, loff_t *ppos) |
|---|
| 1135 | 1485 | { |
|---|
| 1136 | 1486 | bool update_root_tg = false; |
|---|
| 1137 | | - int old_min, old_max; |
|---|
| 1487 | + int old_min, old_max, old_min_rt; |
|---|
| 1138 | 1488 | int result; |
|---|
| 1139 | 1489 | |
|---|
| 1140 | 1490 | mutex_lock(&uclamp_mutex); |
|---|
| 1141 | 1491 | old_min = sysctl_sched_uclamp_util_min; |
|---|
| 1142 | 1492 | old_max = sysctl_sched_uclamp_util_max; |
|---|
| 1493 | + old_min_rt = sysctl_sched_uclamp_util_min_rt_default; |
|---|
| 1143 | 1494 | |
|---|
| 1144 | 1495 | result = proc_dointvec(table, write, buffer, lenp, ppos); |
|---|
| 1145 | 1496 | if (result) |
|---|
| .. | .. |
|---|
| 1148 | 1499 | goto done; |
|---|
| 1149 | 1500 | |
|---|
| 1150 | 1501 | if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max || |
|---|
| 1151 | | - sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE) { |
|---|
| 1502 | + sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE || |
|---|
| 1503 | + sysctl_sched_uclamp_util_min_rt_default > SCHED_CAPACITY_SCALE) { |
|---|
| 1504 | + |
|---|
| 1152 | 1505 | result = -EINVAL; |
|---|
| 1153 | 1506 | goto undo; |
|---|
| 1154 | 1507 | } |
|---|
| .. | .. |
|---|
| 1164 | 1517 | update_root_tg = true; |
|---|
| 1165 | 1518 | } |
|---|
| 1166 | 1519 | |
|---|
| 1167 | | - if (update_root_tg) |
|---|
| 1520 | + if (update_root_tg) { |
|---|
| 1521 | + static_branch_enable(&sched_uclamp_used); |
|---|
| 1168 | 1522 | uclamp_update_root_tg(); |
|---|
| 1523 | + } |
|---|
| 1524 | + |
|---|
| 1525 | + if (old_min_rt != sysctl_sched_uclamp_util_min_rt_default) { |
|---|
| 1526 | + static_branch_enable(&sched_uclamp_used); |
|---|
| 1527 | + uclamp_sync_util_min_rt_default(); |
|---|
| 1528 | + } |
|---|
| 1169 | 1529 | |
|---|
| 1170 | 1530 | /* |
|---|
| 1171 | 1531 | * We update all RUNNABLE tasks only when task groups are in use. |
|---|
| .. | .. |
|---|
| 1178 | 1538 | undo: |
|---|
| 1179 | 1539 | sysctl_sched_uclamp_util_min = old_min; |
|---|
| 1180 | 1540 | sysctl_sched_uclamp_util_max = old_max; |
|---|
| 1541 | + sysctl_sched_uclamp_util_min_rt_default = old_min_rt; |
|---|
| 1181 | 1542 | done: |
|---|
| 1182 | 1543 | mutex_unlock(&uclamp_mutex); |
|---|
| 1183 | 1544 | |
|---|
| .. | .. |
|---|
| 1187 | 1548 | static int uclamp_validate(struct task_struct *p, |
|---|
| 1188 | 1549 | const struct sched_attr *attr) |
|---|
| 1189 | 1550 | { |
|---|
| 1190 | | - unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value; |
|---|
| 1191 | | - unsigned int upper_bound = p->uclamp_req[UCLAMP_MAX].value; |
|---|
| 1551 | + int util_min = p->uclamp_req[UCLAMP_MIN].value; |
|---|
| 1552 | + int util_max = p->uclamp_req[UCLAMP_MAX].value; |
|---|
| 1192 | 1553 | |
|---|
| 1193 | | - if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) |
|---|
| 1194 | | - lower_bound = attr->sched_util_min; |
|---|
| 1195 | | - if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) |
|---|
| 1196 | | - upper_bound = attr->sched_util_max; |
|---|
| 1554 | + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) { |
|---|
| 1555 | + util_min = attr->sched_util_min; |
|---|
| 1197 | 1556 | |
|---|
| 1198 | | - if (lower_bound > upper_bound) |
|---|
| 1557 | + if (util_min + 1 > SCHED_CAPACITY_SCALE + 1) |
|---|
| 1558 | + return -EINVAL; |
|---|
| 1559 | + } |
|---|
| 1560 | + |
|---|
| 1561 | + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) { |
|---|
| 1562 | + util_max = attr->sched_util_max; |
|---|
| 1563 | + |
|---|
| 1564 | + if (util_max + 1 > SCHED_CAPACITY_SCALE + 1) |
|---|
| 1565 | + return -EINVAL; |
|---|
| 1566 | + } |
|---|
| 1567 | + |
|---|
| 1568 | + if (util_min != -1 && util_max != -1 && util_min > util_max) |
|---|
| 1199 | 1569 | return -EINVAL; |
|---|
| 1200 | | - if (upper_bound > SCHED_CAPACITY_SCALE) |
|---|
| 1201 | | - return -EINVAL; |
|---|
| 1570 | + |
|---|
| 1571 | + /* |
|---|
| 1572 | + * We have valid uclamp attributes; make sure uclamp is enabled. |
|---|
| 1573 | + * |
|---|
| 1574 | + * We need to do that here, because enabling static branches is a |
|---|
| 1575 | + * blocking operation which obviously cannot be done while holding |
|---|
| 1576 | + * scheduler locks. |
|---|
| 1577 | + */ |
|---|
| 1578 | + static_branch_enable(&sched_uclamp_used); |
|---|
| 1202 | 1579 | |
|---|
| 1203 | 1580 | return 0; |
|---|
| 1581 | +} |
|---|
| 1582 | + |
|---|
| 1583 | +static bool uclamp_reset(const struct sched_attr *attr, |
|---|
| 1584 | + enum uclamp_id clamp_id, |
|---|
| 1585 | + struct uclamp_se *uc_se) |
|---|
| 1586 | +{ |
|---|
| 1587 | + /* Reset on sched class change for a non user-defined clamp value. */ |
|---|
| 1588 | + if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)) && |
|---|
| 1589 | + !uc_se->user_defined) |
|---|
| 1590 | + return true; |
|---|
| 1591 | + |
|---|
| 1592 | + /* Reset on sched_util_{min,max} == -1. */ |
|---|
| 1593 | + if (clamp_id == UCLAMP_MIN && |
|---|
| 1594 | + attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN && |
|---|
| 1595 | + attr->sched_util_min == -1) { |
|---|
| 1596 | + return true; |
|---|
| 1597 | + } |
|---|
| 1598 | + |
|---|
| 1599 | + if (clamp_id == UCLAMP_MAX && |
|---|
| 1600 | + attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX && |
|---|
| 1601 | + attr->sched_util_max == -1) { |
|---|
| 1602 | + return true; |
|---|
| 1603 | + } |
|---|
| 1604 | + |
|---|
| 1605 | + return false; |
|---|
| 1204 | 1606 | } |
|---|
| 1205 | 1607 | |
|---|
| 1206 | 1608 | static void __setscheduler_uclamp(struct task_struct *p, |
|---|
| .. | .. |
|---|
| 1208 | 1610 | { |
|---|
| 1209 | 1611 | enum uclamp_id clamp_id; |
|---|
| 1210 | 1612 | |
|---|
| 1211 | | - /* |
|---|
| 1212 | | - * On scheduling class change, reset to default clamps for tasks |
|---|
| 1213 | | - * without a task-specific value. |
|---|
| 1214 | | - */ |
|---|
| 1215 | 1613 | for_each_clamp_id(clamp_id) { |
|---|
| 1216 | 1614 | struct uclamp_se *uc_se = &p->uclamp_req[clamp_id]; |
|---|
| 1217 | | - unsigned int clamp_value = uclamp_none(clamp_id); |
|---|
| 1615 | + unsigned int value; |
|---|
| 1218 | 1616 | |
|---|
| 1219 | | - /* Keep using defined clamps across class changes */ |
|---|
| 1220 | | - if (uc_se->user_defined) |
|---|
| 1617 | + if (!uclamp_reset(attr, clamp_id, uc_se)) |
|---|
| 1221 | 1618 | continue; |
|---|
| 1222 | 1619 | |
|---|
| 1223 | | - /* By default, RT tasks always get 100% boost */ |
|---|
| 1224 | | - if (sched_feat(SUGOV_RT_MAX_FREQ) && |
|---|
| 1225 | | - unlikely(rt_task(p) && |
|---|
| 1226 | | - clamp_id == UCLAMP_MIN)) { |
|---|
| 1620 | + /* |
|---|
| 1621 | + * RT by default have a 100% boost value that could be modified |
|---|
| 1622 | + * at runtime. |
|---|
| 1623 | + */ |
|---|
| 1624 | + if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN)) |
|---|
| 1625 | + value = sysctl_sched_uclamp_util_min_rt_default; |
|---|
| 1626 | + else |
|---|
| 1627 | + value = uclamp_none(clamp_id); |
|---|
| 1227 | 1628 | |
|---|
| 1228 | | - clamp_value = uclamp_none(UCLAMP_MAX); |
|---|
| 1229 | | - } |
|---|
| 1629 | + uclamp_se_set(uc_se, value, false); |
|---|
| 1230 | 1630 | |
|---|
| 1231 | | - uclamp_se_set(uc_se, clamp_value, false); |
|---|
| 1232 | 1631 | } |
|---|
| 1233 | 1632 | |
|---|
| 1234 | 1633 | if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP))) |
|---|
| 1235 | 1634 | return; |
|---|
| 1236 | 1635 | |
|---|
| 1237 | | - if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) { |
|---|
| 1636 | + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN && |
|---|
| 1637 | + attr->sched_util_min != -1) { |
|---|
| 1238 | 1638 | uclamp_se_set(&p->uclamp_req[UCLAMP_MIN], |
|---|
| 1239 | 1639 | attr->sched_util_min, true); |
|---|
| 1640 | + trace_android_vh_setscheduler_uclamp(p, UCLAMP_MIN, attr->sched_util_min); |
|---|
| 1240 | 1641 | } |
|---|
| 1241 | 1642 | |
|---|
| 1242 | | - if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) { |
|---|
| 1643 | + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX && |
|---|
| 1644 | + attr->sched_util_max != -1) { |
|---|
| 1243 | 1645 | uclamp_se_set(&p->uclamp_req[UCLAMP_MAX], |
|---|
| 1244 | 1646 | attr->sched_util_max, true); |
|---|
| 1647 | + trace_android_vh_setscheduler_uclamp(p, UCLAMP_MAX, attr->sched_util_max); |
|---|
| 1245 | 1648 | } |
|---|
| 1246 | 1649 | } |
|---|
| 1247 | 1650 | |
|---|
| .. | .. |
|---|
| 1249 | 1652 | { |
|---|
| 1250 | 1653 | enum uclamp_id clamp_id; |
|---|
| 1251 | 1654 | |
|---|
| 1655 | + /* |
|---|
| 1656 | + * We don't need to hold task_rq_lock() when updating p->uclamp_* here |
|---|
| 1657 | + * as the task is still at its early fork stages. |
|---|
| 1658 | + */ |
|---|
| 1252 | 1659 | for_each_clamp_id(clamp_id) |
|---|
| 1253 | 1660 | p->uclamp[clamp_id].active = false; |
|---|
| 1254 | 1661 | |
|---|
| .. | .. |
|---|
| 1261 | 1668 | } |
|---|
| 1262 | 1669 | } |
|---|
| 1263 | 1670 | |
|---|
| 1264 | | -#ifdef CONFIG_SMP |
|---|
| 1265 | | -unsigned int uclamp_task(struct task_struct *p) |
|---|
| 1671 | +static void uclamp_post_fork(struct task_struct *p) |
|---|
| 1266 | 1672 | { |
|---|
| 1267 | | - unsigned long util; |
|---|
| 1268 | | - |
|---|
| 1269 | | - util = task_util_est(p); |
|---|
| 1270 | | - util = max(util, uclamp_eff_value(p, UCLAMP_MIN)); |
|---|
| 1271 | | - util = min(util, uclamp_eff_value(p, UCLAMP_MAX)); |
|---|
| 1272 | | - |
|---|
| 1273 | | - return util; |
|---|
| 1673 | + uclamp_update_util_min_rt_default(p); |
|---|
| 1274 | 1674 | } |
|---|
| 1275 | 1675 | |
|---|
| 1276 | | -bool uclamp_boosted(struct task_struct *p) |
|---|
| 1676 | +static void __init init_uclamp_rq(struct rq *rq) |
|---|
| 1277 | 1677 | { |
|---|
| 1278 | | - return uclamp_eff_value(p, UCLAMP_MIN) > 0; |
|---|
| 1678 | + enum uclamp_id clamp_id; |
|---|
| 1679 | + struct uclamp_rq *uc_rq = rq->uclamp; |
|---|
| 1680 | + |
|---|
| 1681 | + for_each_clamp_id(clamp_id) { |
|---|
| 1682 | + uc_rq[clamp_id] = (struct uclamp_rq) { |
|---|
| 1683 | + .value = uclamp_none(clamp_id) |
|---|
| 1684 | + }; |
|---|
| 1685 | + } |
|---|
| 1686 | + |
|---|
| 1687 | + rq->uclamp_flags = UCLAMP_FLAG_IDLE; |
|---|
| 1279 | 1688 | } |
|---|
| 1280 | | - |
|---|
| 1281 | | -bool uclamp_latency_sensitive(struct task_struct *p) |
|---|
| 1282 | | -{ |
|---|
| 1283 | | -#ifdef CONFIG_UCLAMP_TASK_GROUP |
|---|
| 1284 | | - struct cgroup_subsys_state *css = task_css(p, cpu_cgrp_id); |
|---|
| 1285 | | - struct task_group *tg; |
|---|
| 1286 | | - |
|---|
| 1287 | | - if (!css) |
|---|
| 1288 | | - return false; |
|---|
| 1289 | | - tg = container_of(css, struct task_group, css); |
|---|
| 1290 | | - |
|---|
| 1291 | | - return tg->latency_sensitive; |
|---|
| 1292 | | -#else |
|---|
| 1293 | | - return false; |
|---|
| 1294 | | -#endif |
|---|
| 1295 | | -} |
|---|
| 1296 | | -#endif /* CONFIG_SMP */ |
|---|
| 1297 | 1689 | |
|---|
| 1298 | 1690 | static void __init init_uclamp(void) |
|---|
| 1299 | 1691 | { |
|---|
| .. | .. |
|---|
| 1301 | 1693 | enum uclamp_id clamp_id; |
|---|
| 1302 | 1694 | int cpu; |
|---|
| 1303 | 1695 | |
|---|
| 1304 | | - mutex_init(&uclamp_mutex); |
|---|
| 1305 | | - |
|---|
| 1306 | | - for_each_possible_cpu(cpu) { |
|---|
| 1307 | | - memset(&cpu_rq(cpu)->uclamp, 0, |
|---|
| 1308 | | - sizeof(struct uclamp_rq)*UCLAMP_CNT); |
|---|
| 1309 | | - cpu_rq(cpu)->uclamp_flags = 0; |
|---|
| 1310 | | - } |
|---|
| 1696 | + for_each_possible_cpu(cpu) |
|---|
| 1697 | + init_uclamp_rq(cpu_rq(cpu)); |
|---|
| 1311 | 1698 | |
|---|
| 1312 | 1699 | for_each_clamp_id(clamp_id) { |
|---|
| 1313 | 1700 | uclamp_se_set(&init_task.uclamp_req[clamp_id], |
|---|
| .. | .. |
|---|
| 1336 | 1723 | static void __setscheduler_uclamp(struct task_struct *p, |
|---|
| 1337 | 1724 | const struct sched_attr *attr) { } |
|---|
| 1338 | 1725 | static inline void uclamp_fork(struct task_struct *p) { } |
|---|
| 1339 | | - |
|---|
| 1340 | | -long schedtune_task_margin(struct task_struct *task); |
|---|
| 1341 | | - |
|---|
| 1342 | | -#ifdef CONFIG_SMP |
|---|
| 1343 | | -unsigned int uclamp_task(struct task_struct *p) |
|---|
| 1344 | | -{ |
|---|
| 1345 | | - unsigned long util = task_util_est(p); |
|---|
| 1346 | | -#ifdef CONFIG_SCHED_TUNE |
|---|
| 1347 | | - long margin = schedtune_task_margin(p); |
|---|
| 1348 | | - |
|---|
| 1349 | | - trace_sched_boost_task(p, util, margin); |
|---|
| 1350 | | - |
|---|
| 1351 | | - util += margin; |
|---|
| 1352 | | -#endif |
|---|
| 1353 | | - |
|---|
| 1354 | | - return util; |
|---|
| 1355 | | -} |
|---|
| 1356 | | - |
|---|
| 1357 | | -bool uclamp_boosted(struct task_struct *p) |
|---|
| 1358 | | -{ |
|---|
| 1359 | | -#ifdef CONFIG_SCHED_TUNE |
|---|
| 1360 | | - return schedtune_task_boost(p) > 0; |
|---|
| 1361 | | -#endif |
|---|
| 1362 | | - return false; |
|---|
| 1363 | | -} |
|---|
| 1364 | | - |
|---|
| 1365 | | -bool uclamp_latency_sensitive(struct task_struct *p) |
|---|
| 1366 | | -{ |
|---|
| 1367 | | -#ifdef CONFIG_SCHED_TUNE |
|---|
| 1368 | | - return schedtune_prefer_idle(p) != 0; |
|---|
| 1369 | | -#endif |
|---|
| 1370 | | - return false; |
|---|
| 1371 | | -} |
|---|
| 1372 | | -#endif /* CONFIG_SMP */ |
|---|
| 1373 | | - |
|---|
| 1726 | +static inline void uclamp_post_fork(struct task_struct *p) { } |
|---|
| 1374 | 1727 | static inline void init_uclamp(void) { } |
|---|
| 1375 | 1728 | #endif /* CONFIG_UCLAMP_TASK */ |
|---|
| 1376 | 1729 | |
|---|
| .. | .. |
|---|
| 1385 | 1738 | } |
|---|
| 1386 | 1739 | |
|---|
| 1387 | 1740 | uclamp_rq_inc(rq, p); |
|---|
| 1741 | + trace_android_rvh_enqueue_task(rq, p, flags); |
|---|
| 1388 | 1742 | p->sched_class->enqueue_task(rq, p, flags); |
|---|
| 1743 | + trace_android_rvh_after_enqueue_task(rq, p); |
|---|
| 1389 | 1744 | } |
|---|
| 1390 | 1745 | |
|---|
| 1391 | 1746 | static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) |
|---|
| .. | .. |
|---|
| 1399 | 1754 | } |
|---|
| 1400 | 1755 | |
|---|
| 1401 | 1756 | uclamp_rq_dec(rq, p); |
|---|
| 1757 | + trace_android_rvh_dequeue_task(rq, p, flags); |
|---|
| 1402 | 1758 | p->sched_class->dequeue_task(rq, p, flags); |
|---|
| 1759 | + trace_android_rvh_after_dequeue_task(rq, p); |
|---|
| 1403 | 1760 | } |
|---|
| 1404 | 1761 | |
|---|
| 1405 | 1762 | void activate_task(struct rq *rq, struct task_struct *p, int flags) |
|---|
| 1406 | 1763 | { |
|---|
| 1407 | | - if (task_contributes_to_load(p)) |
|---|
| 1408 | | - rq->nr_uninterruptible--; |
|---|
| 1409 | | - |
|---|
| 1410 | 1764 | enqueue_task(rq, p, flags); |
|---|
| 1765 | + |
|---|
| 1766 | + p->on_rq = TASK_ON_RQ_QUEUED; |
|---|
| 1411 | 1767 | } |
|---|
| 1768 | +EXPORT_SYMBOL_GPL(activate_task); |
|---|
| 1412 | 1769 | |
|---|
| 1413 | 1770 | void deactivate_task(struct rq *rq, struct task_struct *p, int flags) |
|---|
| 1414 | 1771 | { |
|---|
| 1415 | | - if (task_contributes_to_load(p)) |
|---|
| 1416 | | - rq->nr_uninterruptible++; |
|---|
| 1772 | + p->on_rq = (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING; |
|---|
| 1417 | 1773 | |
|---|
| 1418 | 1774 | dequeue_task(rq, p, flags); |
|---|
| 1419 | 1775 | } |
|---|
| 1776 | +EXPORT_SYMBOL_GPL(deactivate_task); |
|---|
| 1420 | 1777 | |
|---|
| 1421 | | -/* |
|---|
| 1422 | | - * __normal_prio - return the priority that is based on the static prio |
|---|
| 1423 | | - */ |
|---|
| 1424 | | -static inline int __normal_prio(struct task_struct *p) |
|---|
| 1778 | +static inline int __normal_prio(int policy, int rt_prio, int nice) |
|---|
| 1425 | 1779 | { |
|---|
| 1426 | | - return p->static_prio; |
|---|
| 1780 | + int prio; |
|---|
| 1781 | + |
|---|
| 1782 | + if (dl_policy(policy)) |
|---|
| 1783 | + prio = MAX_DL_PRIO - 1; |
|---|
| 1784 | + else if (rt_policy(policy)) |
|---|
| 1785 | + prio = MAX_RT_PRIO - 1 - rt_prio; |
|---|
| 1786 | + else |
|---|
| 1787 | + prio = NICE_TO_PRIO(nice); |
|---|
| 1788 | + |
|---|
| 1789 | + return prio; |
|---|
| 1427 | 1790 | } |
|---|
| 1428 | 1791 | |
|---|
| 1429 | 1792 | /* |
|---|
| .. | .. |
|---|
| 1435 | 1798 | */ |
|---|
| 1436 | 1799 | static inline int normal_prio(struct task_struct *p) |
|---|
| 1437 | 1800 | { |
|---|
| 1438 | | - int prio; |
|---|
| 1439 | | - |
|---|
| 1440 | | - if (task_has_dl_policy(p)) |
|---|
| 1441 | | - prio = MAX_DL_PRIO-1; |
|---|
| 1442 | | - else if (task_has_rt_policy(p)) |
|---|
| 1443 | | - prio = MAX_RT_PRIO-1 - p->rt_priority; |
|---|
| 1444 | | - else |
|---|
| 1445 | | - prio = __normal_prio(p); |
|---|
| 1446 | | - return prio; |
|---|
| 1801 | + return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio)); |
|---|
| 1447 | 1802 | } |
|---|
| 1448 | 1803 | |
|---|
| 1449 | 1804 | /* |
|---|
| .. | .. |
|---|
| 1499 | 1854 | |
|---|
| 1500 | 1855 | void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) |
|---|
| 1501 | 1856 | { |
|---|
| 1502 | | - const struct sched_class *class; |
|---|
| 1503 | | - |
|---|
| 1504 | | - if (p->sched_class == rq->curr->sched_class) { |
|---|
| 1857 | + if (p->sched_class == rq->curr->sched_class) |
|---|
| 1505 | 1858 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); |
|---|
| 1506 | | - } else { |
|---|
| 1507 | | - for_each_class(class) { |
|---|
| 1508 | | - if (class == rq->curr->sched_class) |
|---|
| 1509 | | - break; |
|---|
| 1510 | | - if (class == p->sched_class) { |
|---|
| 1511 | | - resched_curr(rq); |
|---|
| 1512 | | - break; |
|---|
| 1513 | | - } |
|---|
| 1514 | | - } |
|---|
| 1515 | | - } |
|---|
| 1859 | + else if (p->sched_class > rq->curr->sched_class) |
|---|
| 1860 | + resched_curr(rq); |
|---|
| 1516 | 1861 | |
|---|
| 1517 | 1862 | /* |
|---|
| 1518 | 1863 | * A queue event has occurred, and we're going to schedule. In |
|---|
| .. | .. |
|---|
| 1521 | 1866 | if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr)) |
|---|
| 1522 | 1867 | rq_clock_skip_update(rq); |
|---|
| 1523 | 1868 | } |
|---|
| 1869 | +EXPORT_SYMBOL_GPL(check_preempt_curr); |
|---|
| 1524 | 1870 | |
|---|
| 1525 | 1871 | #ifdef CONFIG_SMP |
|---|
| 1526 | 1872 | |
|---|
| 1527 | | -static inline bool is_per_cpu_kthread(struct task_struct *p) |
|---|
| 1873 | +static void |
|---|
| 1874 | +__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags); |
|---|
| 1875 | + |
|---|
| 1876 | +static int __set_cpus_allowed_ptr(struct task_struct *p, |
|---|
| 1877 | + const struct cpumask *new_mask, |
|---|
| 1878 | + u32 flags); |
|---|
| 1879 | + |
|---|
| 1880 | +static void migrate_disable_switch(struct rq *rq, struct task_struct *p) |
|---|
| 1528 | 1881 | { |
|---|
| 1529 | | - if (!(p->flags & PF_KTHREAD)) |
|---|
| 1530 | | - return false; |
|---|
| 1882 | + if (likely(!p->migration_disabled)) |
|---|
| 1883 | + return; |
|---|
| 1531 | 1884 | |
|---|
| 1532 | | - if (p->nr_cpus_allowed != 1) |
|---|
| 1533 | | - return false; |
|---|
| 1885 | + if (p->cpus_ptr != &p->cpus_mask) |
|---|
| 1886 | + return; |
|---|
| 1534 | 1887 | |
|---|
| 1535 | | - return true; |
|---|
| 1888 | + /* |
|---|
| 1889 | + * Violates locking rules! see comment in __do_set_cpus_allowed(). |
|---|
| 1890 | + */ |
|---|
| 1891 | + __do_set_cpus_allowed(p, cpumask_of(rq->cpu), SCA_MIGRATE_DISABLE); |
|---|
| 1892 | +} |
|---|
| 1893 | + |
|---|
| 1894 | +void migrate_disable(void) |
|---|
| 1895 | +{ |
|---|
| 1896 | + struct task_struct *p = current; |
|---|
| 1897 | + |
|---|
| 1898 | + if (p->migration_disabled) { |
|---|
| 1899 | + p->migration_disabled++; |
|---|
| 1900 | + return; |
|---|
| 1901 | + } |
|---|
| 1902 | + |
|---|
| 1903 | + trace_sched_migrate_disable_tp(p); |
|---|
| 1904 | + |
|---|
| 1905 | + preempt_disable(); |
|---|
| 1906 | + this_rq()->nr_pinned++; |
|---|
| 1907 | + p->migration_disabled = 1; |
|---|
| 1908 | + preempt_lazy_disable(); |
|---|
| 1909 | + preempt_enable(); |
|---|
| 1910 | +} |
|---|
| 1911 | +EXPORT_SYMBOL_GPL(migrate_disable); |
|---|
| 1912 | + |
|---|
| 1913 | +void migrate_enable(void) |
|---|
| 1914 | +{ |
|---|
| 1915 | + struct task_struct *p = current; |
|---|
| 1916 | + |
|---|
| 1917 | + if (p->migration_disabled > 1) { |
|---|
| 1918 | + p->migration_disabled--; |
|---|
| 1919 | + return; |
|---|
| 1920 | + } |
|---|
| 1921 | + |
|---|
| 1922 | + /* |
|---|
| 1923 | + * Ensure stop_task runs either before or after this, and that |
|---|
| 1924 | + * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). |
|---|
| 1925 | + */ |
|---|
| 1926 | + preempt_disable(); |
|---|
| 1927 | + if (p->cpus_ptr != &p->cpus_mask) |
|---|
| 1928 | + __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); |
|---|
| 1929 | + /* |
|---|
| 1930 | + * Mustn't clear migration_disabled() until cpus_ptr points back at the |
|---|
| 1931 | + * regular cpus_mask, otherwise things that race (eg. |
|---|
| 1932 | + * select_fallback_rq) get confused. |
|---|
| 1933 | + */ |
|---|
| 1934 | + barrier(); |
|---|
| 1935 | + p->migration_disabled = 0; |
|---|
| 1936 | + this_rq()->nr_pinned--; |
|---|
| 1937 | + preempt_lazy_enable(); |
|---|
| 1938 | + preempt_enable(); |
|---|
| 1939 | + |
|---|
| 1940 | + trace_sched_migrate_enable_tp(p); |
|---|
| 1941 | +} |
|---|
| 1942 | +EXPORT_SYMBOL_GPL(migrate_enable); |
|---|
| 1943 | + |
|---|
| 1944 | +static inline bool rq_has_pinned_tasks(struct rq *rq) |
|---|
| 1945 | +{ |
|---|
| 1946 | + return rq->nr_pinned; |
|---|
| 1536 | 1947 | } |
|---|
| 1537 | 1948 | |
|---|
| 1538 | 1949 | /* |
|---|
| 1539 | | - * Per-CPU kthreads are allowed to run on !actie && online CPUs, see |
|---|
| 1950 | + * Per-CPU kthreads are allowed to run on !active && online CPUs, see |
|---|
| 1540 | 1951 | * __set_cpus_allowed_ptr() and select_fallback_rq(). |
|---|
| 1541 | 1952 | */ |
|---|
| 1542 | 1953 | static inline bool is_cpu_allowed(struct task_struct *p, int cpu) |
|---|
| .. | .. |
|---|
| 1544 | 1955 | if (!cpumask_test_cpu(cpu, p->cpus_ptr)) |
|---|
| 1545 | 1956 | return false; |
|---|
| 1546 | 1957 | |
|---|
| 1547 | | - if (is_per_cpu_kthread(p) || __migrate_disabled(p)) |
|---|
| 1958 | + if (is_per_cpu_kthread(p) || is_migration_disabled(p)) |
|---|
| 1548 | 1959 | return cpu_online(cpu); |
|---|
| 1549 | 1960 | |
|---|
| 1550 | | - return cpu_active(cpu); |
|---|
| 1961 | + if (!cpu_active(cpu)) |
|---|
| 1962 | + return false; |
|---|
| 1963 | + |
|---|
| 1964 | + return cpumask_test_cpu(cpu, task_cpu_possible_mask(p)); |
|---|
| 1551 | 1965 | } |
|---|
| 1552 | 1966 | |
|---|
| 1553 | 1967 | /* |
|---|
| .. | .. |
|---|
| 1572 | 1986 | static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf, |
|---|
| 1573 | 1987 | struct task_struct *p, int new_cpu) |
|---|
| 1574 | 1988 | { |
|---|
| 1989 | + int detached = 0; |
|---|
| 1990 | + |
|---|
| 1575 | 1991 | lockdep_assert_held(&rq->lock); |
|---|
| 1576 | 1992 | |
|---|
| 1577 | | - WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); |
|---|
| 1578 | | - dequeue_task(rq, p, DEQUEUE_NOCLOCK); |
|---|
| 1579 | | - set_task_cpu(p, new_cpu); |
|---|
| 1580 | | - rq_unlock(rq, rf); |
|---|
| 1993 | + /* |
|---|
| 1994 | + * The vendor hook may drop the lock temporarily, so |
|---|
| 1995 | + * pass the rq flags to unpin lock. We expect the |
|---|
| 1996 | + * rq lock to be held after return. |
|---|
| 1997 | + */ |
|---|
| 1998 | + trace_android_rvh_migrate_queued_task(rq, rf, p, new_cpu, &detached); |
|---|
| 1999 | + if (detached) |
|---|
| 2000 | + goto attach; |
|---|
| 1581 | 2001 | |
|---|
| 2002 | + deactivate_task(rq, p, DEQUEUE_NOCLOCK); |
|---|
| 2003 | + set_task_cpu(p, new_cpu); |
|---|
| 2004 | + |
|---|
| 2005 | +attach: |
|---|
| 2006 | + rq_unlock(rq, rf); |
|---|
| 1582 | 2007 | rq = cpu_rq(new_cpu); |
|---|
| 1583 | 2008 | |
|---|
| 1584 | 2009 | rq_lock(rq, rf); |
|---|
| 1585 | 2010 | BUG_ON(task_cpu(p) != new_cpu); |
|---|
| 1586 | | - enqueue_task(rq, p, 0); |
|---|
| 1587 | | - p->on_rq = TASK_ON_RQ_QUEUED; |
|---|
| 2011 | + activate_task(rq, p, 0); |
|---|
| 1588 | 2012 | check_preempt_curr(rq, p, 0); |
|---|
| 1589 | 2013 | |
|---|
| 1590 | 2014 | return rq; |
|---|
| 1591 | 2015 | } |
|---|
| 1592 | 2016 | |
|---|
| 1593 | 2017 | struct migration_arg { |
|---|
| 1594 | | - struct task_struct *task; |
|---|
| 1595 | | - int dest_cpu; |
|---|
| 1596 | | - bool done; |
|---|
| 2018 | + struct task_struct *task; |
|---|
| 2019 | + int dest_cpu; |
|---|
| 2020 | + struct set_affinity_pending *pending; |
|---|
| 2021 | +}; |
|---|
| 2022 | + |
|---|
| 2023 | +/* |
|---|
| 2024 | + * @refs: number of wait_for_completion() |
|---|
| 2025 | + * @stop_pending: is @stop_work in use |
|---|
| 2026 | + */ |
|---|
| 2027 | +struct set_affinity_pending { |
|---|
| 2028 | + refcount_t refs; |
|---|
| 2029 | + unsigned int stop_pending; |
|---|
| 2030 | + struct completion done; |
|---|
| 2031 | + struct cpu_stop_work stop_work; |
|---|
| 2032 | + struct migration_arg arg; |
|---|
| 1597 | 2033 | }; |
|---|
| 1598 | 2034 | |
|---|
| 1599 | 2035 | /* |
|---|
| .. | .. |
|---|
| 1626 | 2062 | static int migration_cpu_stop(void *data) |
|---|
| 1627 | 2063 | { |
|---|
| 1628 | 2064 | struct migration_arg *arg = data; |
|---|
| 2065 | + struct set_affinity_pending *pending = arg->pending; |
|---|
| 1629 | 2066 | struct task_struct *p = arg->task; |
|---|
| 1630 | 2067 | struct rq *rq = this_rq(); |
|---|
| 2068 | + bool complete = false; |
|---|
| 1631 | 2069 | struct rq_flags rf; |
|---|
| 1632 | | - int dest_cpu = arg->dest_cpu; |
|---|
| 1633 | | - |
|---|
| 1634 | | - /* We don't look at arg after this point. */ |
|---|
| 1635 | | - smp_mb(); |
|---|
| 1636 | | - arg->done = true; |
|---|
| 1637 | 2070 | |
|---|
| 1638 | 2071 | /* |
|---|
| 1639 | 2072 | * The original target CPU might have gone down and we might |
|---|
| 1640 | 2073 | * be on another CPU but it doesn't matter. |
|---|
| 1641 | 2074 | */ |
|---|
| 1642 | | - local_irq_disable(); |
|---|
| 2075 | + local_irq_save(rf.flags); |
|---|
| 1643 | 2076 | /* |
|---|
| 1644 | 2077 | * We need to explicitly wake pending tasks before running |
|---|
| 1645 | 2078 | * __migrate_task() such that we will not miss enforcing cpus_ptr |
|---|
| 1646 | 2079 | * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. |
|---|
| 1647 | 2080 | */ |
|---|
| 1648 | | - sched_ttwu_pending(); |
|---|
| 2081 | + flush_smp_call_function_from_idle(); |
|---|
| 1649 | 2082 | |
|---|
| 1650 | 2083 | raw_spin_lock(&p->pi_lock); |
|---|
| 1651 | 2084 | rq_lock(rq, &rf); |
|---|
| 2085 | + |
|---|
| 1652 | 2086 | /* |
|---|
| 1653 | 2087 | * If task_rq(p) != rq, it cannot be migrated here, because we're |
|---|
| 1654 | 2088 | * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because |
|---|
| 1655 | 2089 | * we're holding p->pi_lock. |
|---|
| 1656 | 2090 | */ |
|---|
| 1657 | 2091 | if (task_rq(p) == rq) { |
|---|
| 1658 | | - if (task_on_rq_queued(p)) |
|---|
| 1659 | | - rq = __migrate_task(rq, &rf, p, dest_cpu); |
|---|
| 1660 | | - else |
|---|
| 1661 | | - p->wake_cpu = dest_cpu; |
|---|
| 1662 | | - } |
|---|
| 1663 | | - rq_unlock(rq, &rf); |
|---|
| 1664 | | - raw_spin_unlock(&p->pi_lock); |
|---|
| 2092 | + if (is_migration_disabled(p)) |
|---|
| 2093 | + goto out; |
|---|
| 1665 | 2094 | |
|---|
| 1666 | | - local_irq_enable(); |
|---|
| 2095 | + if (pending) { |
|---|
| 2096 | + if (p->migration_pending == pending) |
|---|
| 2097 | + p->migration_pending = NULL; |
|---|
| 2098 | + complete = true; |
|---|
| 2099 | + |
|---|
| 2100 | + if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) |
|---|
| 2101 | + goto out; |
|---|
| 2102 | + } |
|---|
| 2103 | + |
|---|
| 2104 | + if (task_on_rq_queued(p)) |
|---|
| 2105 | + rq = __migrate_task(rq, &rf, p, arg->dest_cpu); |
|---|
| 2106 | + else |
|---|
| 2107 | + p->wake_cpu = arg->dest_cpu; |
|---|
| 2108 | + |
|---|
| 2109 | + /* |
|---|
| 2110 | + * XXX __migrate_task() can fail, at which point we might end |
|---|
| 2111 | + * up running on a dodgy CPU, AFAICT this can only happen |
|---|
| 2112 | + * during CPU hotplug, at which point we'll get pushed out |
|---|
| 2113 | + * anyway, so it's probably not a big deal. |
|---|
| 2114 | + */ |
|---|
| 2115 | + |
|---|
| 2116 | + } else if (pending) { |
|---|
| 2117 | + /* |
|---|
| 2118 | + * This happens when we get migrated between migrate_enable()'s |
|---|
| 2119 | + * preempt_enable() and scheduling the stopper task. At that |
|---|
| 2120 | + * point we're a regular task again and not current anymore. |
|---|
| 2121 | + * |
|---|
| 2122 | + * A !PREEMPT kernel has a giant hole here, which makes it far |
|---|
| 2123 | + * more likely. |
|---|
| 2124 | + */ |
|---|
| 2125 | + |
|---|
| 2126 | + /* |
|---|
| 2127 | + * The task moved before the stopper got to run. We're holding |
|---|
| 2128 | + * ->pi_lock, so the allowed mask is stable - if it got |
|---|
| 2129 | + * somewhere allowed, we're done. |
|---|
| 2130 | + */ |
|---|
| 2131 | + if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) { |
|---|
| 2132 | + if (p->migration_pending == pending) |
|---|
| 2133 | + p->migration_pending = NULL; |
|---|
| 2134 | + complete = true; |
|---|
| 2135 | + goto out; |
|---|
| 2136 | + } |
|---|
| 2137 | + |
|---|
| 2138 | + /* |
|---|
| 2139 | + * When migrate_enable() hits a rq mis-match we can't reliably |
|---|
| 2140 | + * determine is_migration_disabled() and so have to chase after |
|---|
| 2141 | + * it. |
|---|
| 2142 | + */ |
|---|
| 2143 | + WARN_ON_ONCE(!pending->stop_pending); |
|---|
| 2144 | + task_rq_unlock(rq, p, &rf); |
|---|
| 2145 | + stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop, |
|---|
| 2146 | + &pending->arg, &pending->stop_work); |
|---|
| 2147 | + return 0; |
|---|
| 2148 | + } |
|---|
| 2149 | +out: |
|---|
| 2150 | + if (pending) |
|---|
| 2151 | + pending->stop_pending = false; |
|---|
| 2152 | + task_rq_unlock(rq, p, &rf); |
|---|
| 2153 | + |
|---|
| 2154 | + if (complete) |
|---|
| 2155 | + complete_all(&pending->done); |
|---|
| 2156 | + |
|---|
| 2157 | + return 0; |
|---|
| 2158 | +} |
|---|
| 2159 | + |
|---|
| 2160 | +int push_cpu_stop(void *arg) |
|---|
| 2161 | +{ |
|---|
| 2162 | + struct rq *lowest_rq = NULL, *rq = this_rq(); |
|---|
| 2163 | + struct task_struct *p = arg; |
|---|
| 2164 | + |
|---|
| 2165 | + raw_spin_lock_irq(&p->pi_lock); |
|---|
| 2166 | + raw_spin_lock(&rq->lock); |
|---|
| 2167 | + |
|---|
| 2168 | + if (task_rq(p) != rq) |
|---|
| 2169 | + goto out_unlock; |
|---|
| 2170 | + |
|---|
| 2171 | + if (is_migration_disabled(p)) { |
|---|
| 2172 | + p->migration_flags |= MDF_PUSH; |
|---|
| 2173 | + goto out_unlock; |
|---|
| 2174 | + } |
|---|
| 2175 | + |
|---|
| 2176 | + p->migration_flags &= ~MDF_PUSH; |
|---|
| 2177 | + |
|---|
| 2178 | + if (p->sched_class->find_lock_rq) |
|---|
| 2179 | + lowest_rq = p->sched_class->find_lock_rq(p, rq); |
|---|
| 2180 | + |
|---|
| 2181 | + if (!lowest_rq) |
|---|
| 2182 | + goto out_unlock; |
|---|
| 2183 | + |
|---|
| 2184 | + // XXX validate p is still the highest prio task |
|---|
| 2185 | + if (task_rq(p) == rq) { |
|---|
| 2186 | + deactivate_task(rq, p, 0); |
|---|
| 2187 | + set_task_cpu(p, lowest_rq->cpu); |
|---|
| 2188 | + activate_task(lowest_rq, p, 0); |
|---|
| 2189 | + resched_curr(lowest_rq); |
|---|
| 2190 | + } |
|---|
| 2191 | + |
|---|
| 2192 | + double_unlock_balance(rq, lowest_rq); |
|---|
| 2193 | + |
|---|
| 2194 | +out_unlock: |
|---|
| 2195 | + rq->push_busy = false; |
|---|
| 2196 | + raw_spin_unlock(&rq->lock); |
|---|
| 2197 | + raw_spin_unlock_irq(&p->pi_lock); |
|---|
| 2198 | + |
|---|
| 2199 | + put_task_struct(p); |
|---|
| 1667 | 2200 | return 0; |
|---|
| 1668 | 2201 | } |
|---|
| 1669 | 2202 | |
|---|
| .. | .. |
|---|
| 1671 | 2204 | * sched_class::set_cpus_allowed must do the below, but is not required to |
|---|
| 1672 | 2205 | * actually call this function. |
|---|
| 1673 | 2206 | */ |
|---|
| 1674 | | -void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) |
|---|
| 2207 | +void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags) |
|---|
| 1675 | 2208 | { |
|---|
| 2209 | + if (flags & (SCA_MIGRATE_ENABLE | SCA_MIGRATE_DISABLE)) { |
|---|
| 2210 | + p->cpus_ptr = new_mask; |
|---|
| 2211 | + return; |
|---|
| 2212 | + } |
|---|
| 2213 | + |
|---|
| 1676 | 2214 | cpumask_copy(&p->cpus_mask, new_mask); |
|---|
| 1677 | | - if (p->cpus_ptr == &p->cpus_mask) |
|---|
| 1678 | | - p->nr_cpus_allowed = cpumask_weight(new_mask); |
|---|
| 2215 | + p->nr_cpus_allowed = cpumask_weight(new_mask); |
|---|
| 2216 | + trace_android_rvh_set_cpus_allowed_comm(p, new_mask); |
|---|
| 1679 | 2217 | } |
|---|
| 1680 | 2218 | |
|---|
| 1681 | | -#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) |
|---|
| 1682 | | -int __migrate_disabled(struct task_struct *p) |
|---|
| 1683 | | -{ |
|---|
| 1684 | | - return p->migrate_disable; |
|---|
| 1685 | | -} |
|---|
| 1686 | | -EXPORT_SYMBOL_GPL(__migrate_disabled); |
|---|
| 1687 | | -#endif |
|---|
| 1688 | | - |
|---|
| 1689 | | -void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) |
|---|
| 2219 | +static void |
|---|
| 2220 | +__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags) |
|---|
| 1690 | 2221 | { |
|---|
| 1691 | 2222 | struct rq *rq = task_rq(p); |
|---|
| 1692 | 2223 | bool queued, running; |
|---|
| 1693 | 2224 | |
|---|
| 1694 | | - lockdep_assert_held(&p->pi_lock); |
|---|
| 2225 | + /* |
|---|
| 2226 | + * This here violates the locking rules for affinity, since we're only |
|---|
| 2227 | + * supposed to change these variables while holding both rq->lock and |
|---|
| 2228 | + * p->pi_lock. |
|---|
| 2229 | + * |
|---|
| 2230 | + * HOWEVER, it magically works, because ttwu() is the only code that |
|---|
| 2231 | + * accesses these variables under p->pi_lock and only does so after |
|---|
| 2232 | + * smp_cond_load_acquire(&p->on_cpu, !VAL), and we're in __schedule() |
|---|
| 2233 | + * before finish_task(). |
|---|
| 2234 | + * |
|---|
| 2235 | + * XXX do further audits, this smells like something putrid. |
|---|
| 2236 | + */ |
|---|
| 2237 | + if (flags & SCA_MIGRATE_DISABLE) |
|---|
| 2238 | + SCHED_WARN_ON(!p->on_cpu); |
|---|
| 2239 | + else |
|---|
| 2240 | + lockdep_assert_held(&p->pi_lock); |
|---|
| 1695 | 2241 | |
|---|
| 1696 | 2242 | queued = task_on_rq_queued(p); |
|---|
| 1697 | 2243 | running = task_current(rq, p); |
|---|
| .. | .. |
|---|
| 1707 | 2253 | if (running) |
|---|
| 1708 | 2254 | put_prev_task(rq, p); |
|---|
| 1709 | 2255 | |
|---|
| 1710 | | - p->sched_class->set_cpus_allowed(p, new_mask); |
|---|
| 2256 | + p->sched_class->set_cpus_allowed(p, new_mask, flags); |
|---|
| 1711 | 2257 | |
|---|
| 1712 | 2258 | if (queued) |
|---|
| 1713 | 2259 | enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); |
|---|
| 1714 | 2260 | if (running) |
|---|
| 1715 | | - set_curr_task(rq, p); |
|---|
| 2261 | + set_next_task(rq, p); |
|---|
| 2262 | +} |
|---|
| 2263 | + |
|---|
| 2264 | +static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flags *rf, |
|---|
| 2265 | + int dest_cpu, unsigned int flags); |
|---|
| 2266 | +/* |
|---|
| 2267 | + * Called with both p->pi_lock and rq->lock held; drops both before returning. |
|---|
| 2268 | + */ |
|---|
| 2269 | +static int __set_cpus_allowed_ptr_locked(struct task_struct *p, |
|---|
| 2270 | + const struct cpumask *new_mask, |
|---|
| 2271 | + u32 flags, |
|---|
| 2272 | + struct rq *rq, |
|---|
| 2273 | + struct rq_flags *rf) |
|---|
| 2274 | +{ |
|---|
| 2275 | + const struct cpumask *cpu_valid_mask = cpu_active_mask; |
|---|
| 2276 | + const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p); |
|---|
| 2277 | + unsigned int dest_cpu; |
|---|
| 2278 | + int ret = 0; |
|---|
| 2279 | + |
|---|
| 2280 | + update_rq_clock(rq); |
|---|
| 2281 | + |
|---|
| 2282 | + if (p->flags & PF_KTHREAD || is_migration_disabled(p)) { |
|---|
| 2283 | + /* |
|---|
| 2284 | + * Kernel threads are allowed on online && !active CPUs. |
|---|
| 2285 | + * |
|---|
| 2286 | + * Specifically, migration_disabled() tasks must not fail the |
|---|
| 2287 | + * cpumask_any_and_distribute() pick below, esp. so on |
|---|
| 2288 | + * SCA_MIGRATE_ENABLE, otherwise we'll not call |
|---|
| 2289 | + * set_cpus_allowed_common() and actually reset p->cpus_ptr. |
|---|
| 2290 | + */ |
|---|
| 2291 | + cpu_valid_mask = cpu_online_mask; |
|---|
| 2292 | + } else if (!cpumask_subset(new_mask, cpu_allowed_mask)) { |
|---|
| 2293 | + ret = -EINVAL; |
|---|
| 2294 | + goto out; |
|---|
| 2295 | + } |
|---|
| 2296 | + |
|---|
| 2297 | + /* |
|---|
| 2298 | + * Must re-check here, to close a race against __kthread_bind(), |
|---|
| 2299 | + * sched_setaffinity() is not guaranteed to observe the flag. |
|---|
| 2300 | + */ |
|---|
| 2301 | + if ((flags & SCA_CHECK) && (p->flags & PF_NO_SETAFFINITY)) { |
|---|
| 2302 | + ret = -EINVAL; |
|---|
| 2303 | + goto out; |
|---|
| 2304 | + } |
|---|
| 2305 | + |
|---|
| 2306 | + if (!(flags & SCA_MIGRATE_ENABLE)) { |
|---|
| 2307 | + if (cpumask_equal(&p->cpus_mask, new_mask)) |
|---|
| 2308 | + goto out; |
|---|
| 2309 | + |
|---|
| 2310 | + if (WARN_ON_ONCE(p == current && |
|---|
| 2311 | + is_migration_disabled(p) && |
|---|
| 2312 | + !cpumask_test_cpu(task_cpu(p), new_mask))) { |
|---|
| 2313 | + ret = -EBUSY; |
|---|
| 2314 | + goto out; |
|---|
| 2315 | + } |
|---|
| 2316 | + } |
|---|
| 2317 | + |
|---|
| 2318 | + /* |
|---|
| 2319 | + * Picking a ~random cpu helps in cases where we are changing affinity |
|---|
| 2320 | + * for groups of tasks (ie. cpuset), so that load balancing is not |
|---|
| 2321 | + * immediately required to distribute the tasks within their new mask. |
|---|
| 2322 | + */ |
|---|
| 2323 | + dest_cpu = cpumask_any_and_distribute(cpu_valid_mask, new_mask); |
|---|
| 2324 | + if (dest_cpu >= nr_cpu_ids) { |
|---|
| 2325 | + ret = -EINVAL; |
|---|
| 2326 | + goto out; |
|---|
| 2327 | + } |
|---|
| 2328 | + |
|---|
| 2329 | + __do_set_cpus_allowed(p, new_mask, flags); |
|---|
| 2330 | + |
|---|
| 2331 | + if (p->flags & PF_KTHREAD) { |
|---|
| 2332 | + /* |
|---|
| 2333 | + * For kernel threads that do indeed end up on online && |
|---|
| 2334 | + * !active we want to ensure they are strict per-CPU threads. |
|---|
| 2335 | + */ |
|---|
| 2336 | + WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && |
|---|
| 2337 | + !cpumask_intersects(new_mask, cpu_active_mask) && |
|---|
| 2338 | + p->nr_cpus_allowed != 1); |
|---|
| 2339 | + } |
|---|
| 2340 | + |
|---|
| 2341 | + return affine_move_task(rq, p, rf, dest_cpu, flags); |
|---|
| 2342 | +out: |
|---|
| 2343 | + task_rq_unlock(rq, p, rf); |
|---|
| 2344 | + |
|---|
| 2345 | + return ret; |
|---|
| 2346 | +} |
|---|
| 2347 | + |
|---|
| 2348 | +void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) |
|---|
| 2349 | +{ |
|---|
| 2350 | + __do_set_cpus_allowed(p, new_mask, 0); |
|---|
| 2351 | +} |
|---|
| 2352 | + |
|---|
| 2353 | +/* |
|---|
| 2354 | + * This function is wildly self concurrent; here be dragons. |
|---|
| 2355 | + * |
|---|
| 2356 | + * |
|---|
| 2357 | + * When given a valid mask, __set_cpus_allowed_ptr() must block until the |
|---|
| 2358 | + * designated task is enqueued on an allowed CPU. If that task is currently |
|---|
| 2359 | + * running, we have to kick it out using the CPU stopper. |
|---|
| 2360 | + * |
|---|
| 2361 | + * Migrate-Disable comes along and tramples all over our nice sandcastle. |
|---|
| 2362 | + * Consider: |
|---|
| 2363 | + * |
|---|
| 2364 | + * Initial conditions: P0->cpus_mask = [0, 1] |
|---|
| 2365 | + * |
|---|
| 2366 | + * P0@CPU0 P1 |
|---|
| 2367 | + * |
|---|
| 2368 | + * migrate_disable(); |
|---|
| 2369 | + * <preempted> |
|---|
| 2370 | + * set_cpus_allowed_ptr(P0, [1]); |
|---|
| 2371 | + * |
|---|
| 2372 | + * P1 *cannot* return from this set_cpus_allowed_ptr() call until P0 executes |
|---|
| 2373 | + * its outermost migrate_enable() (i.e. it exits its Migrate-Disable region). |
|---|
| 2374 | + * This means we need the following scheme: |
|---|
| 2375 | + * |
|---|
| 2376 | + * P0@CPU0 P1 |
|---|
| 2377 | + * |
|---|
| 2378 | + * migrate_disable(); |
|---|
| 2379 | + * <preempted> |
|---|
| 2380 | + * set_cpus_allowed_ptr(P0, [1]); |
|---|
| 2381 | + * <blocks> |
|---|
| 2382 | + * <resumes> |
|---|
| 2383 | + * migrate_enable(); |
|---|
| 2384 | + * __set_cpus_allowed_ptr(); |
|---|
| 2385 | + * <wakes local stopper> |
|---|
| 2386 | + * `--> <woken on migration completion> |
|---|
| 2387 | + * |
|---|
| 2388 | + * Now the fun stuff: there may be several P1-like tasks, i.e. multiple |
|---|
| 2389 | + * concurrent set_cpus_allowed_ptr(P0, [*]) calls. CPU affinity changes of any |
|---|
| 2390 | + * task p are serialized by p->pi_lock, which we can leverage: the one that |
|---|
| 2391 | + * should come into effect at the end of the Migrate-Disable region is the last |
|---|
| 2392 | + * one. This means we only need to track a single cpumask (i.e. p->cpus_mask), |
|---|
| 2393 | + * but we still need to properly signal those waiting tasks at the appropriate |
|---|
| 2394 | + * moment. |
|---|
| 2395 | + * |
|---|
| 2396 | + * This is implemented using struct set_affinity_pending. The first |
|---|
| 2397 | + * __set_cpus_allowed_ptr() caller within a given Migrate-Disable region will |
|---|
| 2398 | + * setup an instance of that struct and install it on the targeted task_struct. |
|---|
| 2399 | + * Any and all further callers will reuse that instance. Those then wait for |
|---|
| 2400 | + * a completion signaled at the tail of the CPU stopper callback (1), triggered |
|---|
| 2401 | + * on the end of the Migrate-Disable region (i.e. outermost migrate_enable()). |
|---|
| 2402 | + * |
|---|
| 2403 | + * |
|---|
| 2404 | + * (1) In the cases covered above. There is one more where the completion is |
|---|
| 2405 | + * signaled within affine_move_task() itself: when a subsequent affinity request |
|---|
| 2406 | + * cancels the need for an active migration. Consider: |
|---|
| 2407 | + * |
|---|
| 2408 | + * Initial conditions: P0->cpus_mask = [0, 1] |
|---|
| 2409 | + * |
|---|
| 2410 | + * P0@CPU0 P1 P2 |
|---|
| 2411 | + * |
|---|
| 2412 | + * migrate_disable(); |
|---|
| 2413 | + * <preempted> |
|---|
| 2414 | + * set_cpus_allowed_ptr(P0, [1]); |
|---|
| 2415 | + * <blocks> |
|---|
| 2416 | + * set_cpus_allowed_ptr(P0, [0, 1]); |
|---|
| 2417 | + * <signal completion> |
|---|
| 2418 | + * <awakes> |
|---|
| 2419 | + * |
|---|
| 2420 | + * Note that the above is safe vs a concurrent migrate_enable(), as any |
|---|
| 2421 | + * pending affinity completion is preceded an uninstallion of |
|---|
| 2422 | + * p->migration_pending done with p->pi_lock held. |
|---|
| 2423 | + */ |
|---|
| 2424 | +static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flags *rf, |
|---|
| 2425 | + int dest_cpu, unsigned int flags) |
|---|
| 2426 | +{ |
|---|
| 2427 | + struct set_affinity_pending my_pending = { }, *pending = NULL; |
|---|
| 2428 | + bool stop_pending, complete = false; |
|---|
| 2429 | + |
|---|
| 2430 | + /* Can the task run on the task's current CPU? If so, we're done */ |
|---|
| 2431 | + if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) { |
|---|
| 2432 | + struct task_struct *push_task = NULL; |
|---|
| 2433 | + |
|---|
| 2434 | + if ((flags & SCA_MIGRATE_ENABLE) && |
|---|
| 2435 | + (p->migration_flags & MDF_PUSH) && !rq->push_busy) { |
|---|
| 2436 | + rq->push_busy = true; |
|---|
| 2437 | + push_task = get_task_struct(p); |
|---|
| 2438 | + } |
|---|
| 2439 | + |
|---|
| 2440 | + /* |
|---|
| 2441 | + * If there are pending waiters, but no pending stop_work, |
|---|
| 2442 | + * then complete now. |
|---|
| 2443 | + */ |
|---|
| 2444 | + pending = p->migration_pending; |
|---|
| 2445 | + if (pending && !pending->stop_pending) { |
|---|
| 2446 | + p->migration_pending = NULL; |
|---|
| 2447 | + complete = true; |
|---|
| 2448 | + } |
|---|
| 2449 | + |
|---|
| 2450 | + task_rq_unlock(rq, p, rf); |
|---|
| 2451 | + |
|---|
| 2452 | + if (push_task) { |
|---|
| 2453 | + stop_one_cpu_nowait(rq->cpu, push_cpu_stop, |
|---|
| 2454 | + p, &rq->push_work); |
|---|
| 2455 | + } |
|---|
| 2456 | + |
|---|
| 2457 | + if (complete) |
|---|
| 2458 | + complete_all(&pending->done); |
|---|
| 2459 | + |
|---|
| 2460 | + return 0; |
|---|
| 2461 | + } |
|---|
| 2462 | + |
|---|
| 2463 | + if (!(flags & SCA_MIGRATE_ENABLE)) { |
|---|
| 2464 | + /* serialized by p->pi_lock */ |
|---|
| 2465 | + if (!p->migration_pending) { |
|---|
| 2466 | + /* Install the request */ |
|---|
| 2467 | + refcount_set(&my_pending.refs, 1); |
|---|
| 2468 | + init_completion(&my_pending.done); |
|---|
| 2469 | + my_pending.arg = (struct migration_arg) { |
|---|
| 2470 | + .task = p, |
|---|
| 2471 | + .dest_cpu = dest_cpu, |
|---|
| 2472 | + .pending = &my_pending, |
|---|
| 2473 | + }; |
|---|
| 2474 | + |
|---|
| 2475 | + p->migration_pending = &my_pending; |
|---|
| 2476 | + } else { |
|---|
| 2477 | + pending = p->migration_pending; |
|---|
| 2478 | + refcount_inc(&pending->refs); |
|---|
| 2479 | + /* |
|---|
| 2480 | + * Affinity has changed, but we've already installed a |
|---|
| 2481 | + * pending. migration_cpu_stop() *must* see this, else |
|---|
| 2482 | + * we risk a completion of the pending despite having a |
|---|
| 2483 | + * task on a disallowed CPU. |
|---|
| 2484 | + * |
|---|
| 2485 | + * Serialized by p->pi_lock, so this is safe. |
|---|
| 2486 | + */ |
|---|
| 2487 | + pending->arg.dest_cpu = dest_cpu; |
|---|
| 2488 | + } |
|---|
| 2489 | + } |
|---|
| 2490 | + pending = p->migration_pending; |
|---|
| 2491 | + /* |
|---|
| 2492 | + * - !MIGRATE_ENABLE: |
|---|
| 2493 | + * we'll have installed a pending if there wasn't one already. |
|---|
| 2494 | + * |
|---|
| 2495 | + * - MIGRATE_ENABLE: |
|---|
| 2496 | + * we're here because the current CPU isn't matching anymore, |
|---|
| 2497 | + * the only way that can happen is because of a concurrent |
|---|
| 2498 | + * set_cpus_allowed_ptr() call, which should then still be |
|---|
| 2499 | + * pending completion. |
|---|
| 2500 | + * |
|---|
| 2501 | + * Either way, we really should have a @pending here. |
|---|
| 2502 | + */ |
|---|
| 2503 | + if (WARN_ON_ONCE(!pending)) { |
|---|
| 2504 | + task_rq_unlock(rq, p, rf); |
|---|
| 2505 | + return -EINVAL; |
|---|
| 2506 | + } |
|---|
| 2507 | + |
|---|
| 2508 | + if (task_running(rq, p) || p->state == TASK_WAKING) { |
|---|
| 2509 | + /* |
|---|
| 2510 | + * MIGRATE_ENABLE gets here because 'p == current', but for |
|---|
| 2511 | + * anything else we cannot do is_migration_disabled(), punt |
|---|
| 2512 | + * and have the stopper function handle it all race-free. |
|---|
| 2513 | + */ |
|---|
| 2514 | + stop_pending = pending->stop_pending; |
|---|
| 2515 | + if (!stop_pending) |
|---|
| 2516 | + pending->stop_pending = true; |
|---|
| 2517 | + |
|---|
| 2518 | + if (flags & SCA_MIGRATE_ENABLE) |
|---|
| 2519 | + p->migration_flags &= ~MDF_PUSH; |
|---|
| 2520 | + |
|---|
| 2521 | + task_rq_unlock(rq, p, rf); |
|---|
| 2522 | + |
|---|
| 2523 | + if (!stop_pending) { |
|---|
| 2524 | + stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, |
|---|
| 2525 | + &pending->arg, &pending->stop_work); |
|---|
| 2526 | + } |
|---|
| 2527 | + |
|---|
| 2528 | + if (flags & SCA_MIGRATE_ENABLE) |
|---|
| 2529 | + return 0; |
|---|
| 2530 | + } else { |
|---|
| 2531 | + |
|---|
| 2532 | + if (!is_migration_disabled(p)) { |
|---|
| 2533 | + if (task_on_rq_queued(p)) |
|---|
| 2534 | + rq = move_queued_task(rq, rf, p, dest_cpu); |
|---|
| 2535 | + |
|---|
| 2536 | + if (!pending->stop_pending) { |
|---|
| 2537 | + p->migration_pending = NULL; |
|---|
| 2538 | + complete = true; |
|---|
| 2539 | + } |
|---|
| 2540 | + } |
|---|
| 2541 | + task_rq_unlock(rq, p, rf); |
|---|
| 2542 | + |
|---|
| 2543 | + if (complete) |
|---|
| 2544 | + complete_all(&pending->done); |
|---|
| 2545 | + } |
|---|
| 2546 | + |
|---|
| 2547 | + wait_for_completion(&pending->done); |
|---|
| 2548 | + |
|---|
| 2549 | + if (refcount_dec_and_test(&pending->refs)) |
|---|
| 2550 | + wake_up_var(&pending->refs); /* No UaF, just an address */ |
|---|
| 2551 | + |
|---|
| 2552 | + /* |
|---|
| 2553 | + * Block the original owner of &pending until all subsequent callers |
|---|
| 2554 | + * have seen the completion and decremented the refcount |
|---|
| 2555 | + */ |
|---|
| 2556 | + wait_var_event(&my_pending.refs, !refcount_read(&my_pending.refs)); |
|---|
| 2557 | + |
|---|
| 2558 | + /* ARGH */ |
|---|
| 2559 | + WARN_ON_ONCE(my_pending.stop_pending); |
|---|
| 2560 | + |
|---|
| 2561 | + return 0; |
|---|
| 1716 | 2562 | } |
|---|
| 1717 | 2563 | |
|---|
| 1718 | 2564 | /* |
|---|
| .. | .. |
|---|
| 1725 | 2571 | * call is not atomic; no spinlocks may be held. |
|---|
| 1726 | 2572 | */ |
|---|
| 1727 | 2573 | static int __set_cpus_allowed_ptr(struct task_struct *p, |
|---|
| 1728 | | - const struct cpumask *new_mask, bool check) |
|---|
| 2574 | + const struct cpumask *new_mask, |
|---|
| 2575 | + u32 flags) |
|---|
| 1729 | 2576 | { |
|---|
| 1730 | | - const struct cpumask *cpu_valid_mask = cpu_active_mask; |
|---|
| 1731 | | - unsigned int dest_cpu; |
|---|
| 1732 | 2577 | struct rq_flags rf; |
|---|
| 1733 | 2578 | struct rq *rq; |
|---|
| 1734 | | - int ret = 0; |
|---|
| 1735 | 2579 | |
|---|
| 1736 | 2580 | rq = task_rq_lock(p, &rf); |
|---|
| 1737 | | - update_rq_clock(rq); |
|---|
| 1738 | | - |
|---|
| 1739 | | - if (p->flags & PF_KTHREAD) { |
|---|
| 1740 | | - /* |
|---|
| 1741 | | - * Kernel threads are allowed on online && !active CPUs |
|---|
| 1742 | | - */ |
|---|
| 1743 | | - cpu_valid_mask = cpu_online_mask; |
|---|
| 1744 | | - } |
|---|
| 1745 | | - |
|---|
| 1746 | | - /* |
|---|
| 1747 | | - * Must re-check here, to close a race against __kthread_bind(), |
|---|
| 1748 | | - * sched_setaffinity() is not guaranteed to observe the flag. |
|---|
| 1749 | | - */ |
|---|
| 1750 | | - if (check && (p->flags & PF_NO_SETAFFINITY)) { |
|---|
| 1751 | | - ret = -EINVAL; |
|---|
| 1752 | | - goto out; |
|---|
| 1753 | | - } |
|---|
| 1754 | | - |
|---|
| 1755 | | - if (cpumask_equal(&p->cpus_mask, new_mask)) |
|---|
| 1756 | | - goto out; |
|---|
| 1757 | | - |
|---|
| 1758 | | - dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); |
|---|
| 1759 | | - if (dest_cpu >= nr_cpu_ids) { |
|---|
| 1760 | | - ret = -EINVAL; |
|---|
| 1761 | | - goto out; |
|---|
| 1762 | | - } |
|---|
| 1763 | | - |
|---|
| 1764 | | - do_set_cpus_allowed(p, new_mask); |
|---|
| 1765 | | - |
|---|
| 1766 | | - if (p->flags & PF_KTHREAD) { |
|---|
| 1767 | | - /* |
|---|
| 1768 | | - * For kernel threads that do indeed end up on online && |
|---|
| 1769 | | - * !active we want to ensure they are strict per-CPU threads. |
|---|
| 1770 | | - */ |
|---|
| 1771 | | - WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && |
|---|
| 1772 | | - !cpumask_intersects(new_mask, cpu_active_mask) && |
|---|
| 1773 | | - p->nr_cpus_allowed != 1); |
|---|
| 1774 | | - } |
|---|
| 1775 | | - |
|---|
| 1776 | | - /* Can the task run on the task's current CPU? If so, we're done */ |
|---|
| 1777 | | - if (cpumask_test_cpu(task_cpu(p), new_mask) || |
|---|
| 1778 | | - p->cpus_ptr != &p->cpus_mask) |
|---|
| 1779 | | - goto out; |
|---|
| 1780 | | - |
|---|
| 1781 | | - if (task_running(rq, p) || p->state == TASK_WAKING) { |
|---|
| 1782 | | - struct migration_arg arg = { p, dest_cpu }; |
|---|
| 1783 | | - /* Need help from migration thread: drop lock and wait. */ |
|---|
| 1784 | | - task_rq_unlock(rq, p, &rf); |
|---|
| 1785 | | - stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); |
|---|
| 1786 | | - tlb_migrate_finish(p->mm); |
|---|
| 1787 | | - return 0; |
|---|
| 1788 | | - } else if (task_on_rq_queued(p)) { |
|---|
| 1789 | | - /* |
|---|
| 1790 | | - * OK, since we're going to drop the lock immediately |
|---|
| 1791 | | - * afterwards anyway. |
|---|
| 1792 | | - */ |
|---|
| 1793 | | - rq = move_queued_task(rq, &rf, p, dest_cpu); |
|---|
| 1794 | | - } |
|---|
| 1795 | | -out: |
|---|
| 1796 | | - task_rq_unlock(rq, p, &rf); |
|---|
| 1797 | | - |
|---|
| 1798 | | - return ret; |
|---|
| 2581 | + return __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, &rf); |
|---|
| 1799 | 2582 | } |
|---|
| 1800 | 2583 | |
|---|
| 1801 | 2584 | int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) |
|---|
| 1802 | 2585 | { |
|---|
| 1803 | | - return __set_cpus_allowed_ptr(p, new_mask, false); |
|---|
| 2586 | + return __set_cpus_allowed_ptr(p, new_mask, 0); |
|---|
| 1804 | 2587 | } |
|---|
| 1805 | 2588 | EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); |
|---|
| 2589 | + |
|---|
| 2590 | +/* |
|---|
| 2591 | + * Change a given task's CPU affinity to the intersection of its current |
|---|
| 2592 | + * affinity mask and @subset_mask, writing the resulting mask to @new_mask. |
|---|
| 2593 | + * If the resulting mask is empty, leave the affinity unchanged and return |
|---|
| 2594 | + * -EINVAL. |
|---|
| 2595 | + */ |
|---|
| 2596 | +static int restrict_cpus_allowed_ptr(struct task_struct *p, |
|---|
| 2597 | + struct cpumask *new_mask, |
|---|
| 2598 | + const struct cpumask *subset_mask) |
|---|
| 2599 | +{ |
|---|
| 2600 | + struct rq_flags rf; |
|---|
| 2601 | + struct rq *rq; |
|---|
| 2602 | + |
|---|
| 2603 | + rq = task_rq_lock(p, &rf); |
|---|
| 2604 | + if (!cpumask_and(new_mask, &p->cpus_mask, subset_mask)) { |
|---|
| 2605 | + task_rq_unlock(rq, p, &rf); |
|---|
| 2606 | + return -EINVAL; |
|---|
| 2607 | + } |
|---|
| 2608 | + |
|---|
| 2609 | + return __set_cpus_allowed_ptr_locked(p, new_mask, false, rq, &rf); |
|---|
| 2610 | +} |
|---|
| 2611 | + |
|---|
| 2612 | +/* |
|---|
| 2613 | + * Restrict a given task's CPU affinity so that it is a subset of |
|---|
| 2614 | + * task_cpu_possible_mask(). If the resulting mask is empty, we warn and |
|---|
| 2615 | + * walk up the cpuset hierarchy until we find a suitable mask. |
|---|
| 2616 | + */ |
|---|
| 2617 | +void force_compatible_cpus_allowed_ptr(struct task_struct *p) |
|---|
| 2618 | +{ |
|---|
| 2619 | + cpumask_var_t new_mask; |
|---|
| 2620 | + const struct cpumask *override_mask = task_cpu_possible_mask(p); |
|---|
| 2621 | + |
|---|
| 2622 | + alloc_cpumask_var(&new_mask, GFP_KERNEL); |
|---|
| 2623 | + |
|---|
| 2624 | + /* |
|---|
| 2625 | + * __migrate_task() can fail silently in the face of concurrent |
|---|
| 2626 | + * offlining of the chosen destination CPU, so take the hotplug |
|---|
| 2627 | + * lock to ensure that the migration succeeds. |
|---|
| 2628 | + */ |
|---|
| 2629 | + trace_android_rvh_force_compatible_pre(NULL); |
|---|
| 2630 | + cpus_read_lock(); |
|---|
| 2631 | + if (!cpumask_available(new_mask)) |
|---|
| 2632 | + goto out_set_mask; |
|---|
| 2633 | + |
|---|
| 2634 | + if (!restrict_cpus_allowed_ptr(p, new_mask, override_mask)) |
|---|
| 2635 | + goto out_free_mask; |
|---|
| 2636 | + |
|---|
| 2637 | + /* |
|---|
| 2638 | + * We failed to find a valid subset of the affinity mask for the |
|---|
| 2639 | + * task, so override it based on its cpuset hierarchy. |
|---|
| 2640 | + */ |
|---|
| 2641 | + cpuset_cpus_allowed(p, new_mask); |
|---|
| 2642 | + override_mask = new_mask; |
|---|
| 2643 | + |
|---|
| 2644 | +out_set_mask: |
|---|
| 2645 | + if (printk_ratelimit()) { |
|---|
| 2646 | + printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n", |
|---|
| 2647 | + task_pid_nr(p), p->comm, |
|---|
| 2648 | + cpumask_pr_args(override_mask)); |
|---|
| 2649 | + } |
|---|
| 2650 | + |
|---|
| 2651 | + WARN_ON(set_cpus_allowed_ptr(p, override_mask)); |
|---|
| 2652 | +out_free_mask: |
|---|
| 2653 | + cpus_read_unlock(); |
|---|
| 2654 | + trace_android_rvh_force_compatible_post(NULL); |
|---|
| 2655 | + free_cpumask_var(new_mask); |
|---|
| 2656 | +} |
|---|
| 1806 | 2657 | |
|---|
| 1807 | 2658 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
|---|
| 1808 | 2659 | { |
|---|
| .. | .. |
|---|
| 1841 | 2692 | * Clearly, migrating tasks to offline CPUs is a fairly daft thing. |
|---|
| 1842 | 2693 | */ |
|---|
| 1843 | 2694 | WARN_ON_ONCE(!cpu_online(new_cpu)); |
|---|
| 2695 | + |
|---|
| 2696 | + WARN_ON_ONCE(is_migration_disabled(p)); |
|---|
| 1844 | 2697 | #endif |
|---|
| 1845 | 2698 | |
|---|
| 1846 | 2699 | trace_sched_migrate_task(p, new_cpu); |
|---|
| .. | .. |
|---|
| 1851 | 2704 | p->se.nr_migrations++; |
|---|
| 1852 | 2705 | rseq_migrate(p); |
|---|
| 1853 | 2706 | perf_event_task_migrate(p); |
|---|
| 2707 | + trace_android_rvh_set_task_cpu(p, new_cpu); |
|---|
| 1854 | 2708 | } |
|---|
| 1855 | 2709 | |
|---|
| 1856 | 2710 | __set_task_cpu(p, new_cpu); |
|---|
| 1857 | 2711 | } |
|---|
| 2712 | +EXPORT_SYMBOL_GPL(set_task_cpu); |
|---|
| 1858 | 2713 | |
|---|
| 1859 | | -#ifdef CONFIG_NUMA_BALANCING |
|---|
| 1860 | 2714 | static void __migrate_swap_task(struct task_struct *p, int cpu) |
|---|
| 1861 | 2715 | { |
|---|
| 1862 | 2716 | if (task_on_rq_queued(p)) { |
|---|
| .. | .. |
|---|
| 1869 | 2723 | rq_pin_lock(src_rq, &srf); |
|---|
| 1870 | 2724 | rq_pin_lock(dst_rq, &drf); |
|---|
| 1871 | 2725 | |
|---|
| 1872 | | - p->on_rq = TASK_ON_RQ_MIGRATING; |
|---|
| 1873 | 2726 | deactivate_task(src_rq, p, 0); |
|---|
| 1874 | 2727 | set_task_cpu(p, cpu); |
|---|
| 1875 | 2728 | activate_task(dst_rq, p, 0); |
|---|
| 1876 | | - p->on_rq = TASK_ON_RQ_QUEUED; |
|---|
| 1877 | 2729 | check_preempt_curr(dst_rq, p, 0); |
|---|
| 1878 | 2730 | |
|---|
| 1879 | 2731 | rq_unpin_lock(dst_rq, &drf); |
|---|
| .. | .. |
|---|
| 1973 | 2825 | out: |
|---|
| 1974 | 2826 | return ret; |
|---|
| 1975 | 2827 | } |
|---|
| 1976 | | -#endif /* CONFIG_NUMA_BALANCING */ |
|---|
| 2828 | +EXPORT_SYMBOL_GPL(migrate_swap); |
|---|
| 1977 | 2829 | |
|---|
| 1978 | 2830 | static bool check_task_state(struct task_struct *p, long match_state) |
|---|
| 1979 | 2831 | { |
|---|
| .. | .. |
|---|
| 2081 | 2933 | ktime_t to = NSEC_PER_SEC / HZ; |
|---|
| 2082 | 2934 | |
|---|
| 2083 | 2935 | set_current_state(TASK_UNINTERRUPTIBLE); |
|---|
| 2084 | | - schedule_hrtimeout(&to, HRTIMER_MODE_REL); |
|---|
| 2936 | + schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD); |
|---|
| 2085 | 2937 | continue; |
|---|
| 2086 | 2938 | } |
|---|
| 2087 | 2939 | |
|---|
| .. | .. |
|---|
| 2148 | 3000 | int nid = cpu_to_node(cpu); |
|---|
| 2149 | 3001 | const struct cpumask *nodemask = NULL; |
|---|
| 2150 | 3002 | enum { cpuset, possible, fail } state = cpuset; |
|---|
| 2151 | | - int dest_cpu; |
|---|
| 3003 | + int dest_cpu = -1; |
|---|
| 3004 | + |
|---|
| 3005 | + trace_android_rvh_select_fallback_rq(cpu, p, &dest_cpu); |
|---|
| 3006 | + if (dest_cpu >= 0) |
|---|
| 3007 | + return dest_cpu; |
|---|
| 2152 | 3008 | |
|---|
| 2153 | 3009 | /* |
|---|
| 2154 | 3010 | * If the node that the CPU is on has been offlined, cpu_to_node() |
|---|
| .. | .. |
|---|
| 2160 | 3016 | |
|---|
| 2161 | 3017 | /* Look for allowed, online CPU in same node. */ |
|---|
| 2162 | 3018 | for_each_cpu(dest_cpu, nodemask) { |
|---|
| 2163 | | - if (!cpu_active(dest_cpu)) |
|---|
| 2164 | | - continue; |
|---|
| 2165 | | - if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) |
|---|
| 3019 | + if (is_cpu_allowed(p, dest_cpu)) |
|---|
| 2166 | 3020 | return dest_cpu; |
|---|
| 2167 | 3021 | } |
|---|
| 2168 | 3022 | } |
|---|
| .. | .. |
|---|
| 2184 | 3038 | state = possible; |
|---|
| 2185 | 3039 | break; |
|---|
| 2186 | 3040 | } |
|---|
| 2187 | | - /* Fall-through */ |
|---|
| 3041 | + fallthrough; |
|---|
| 2188 | 3042 | case possible: |
|---|
| 2189 | | - do_set_cpus_allowed(p, cpu_possible_mask); |
|---|
| 3043 | + /* |
|---|
| 3044 | + * XXX When called from select_task_rq() we only |
|---|
| 3045 | + * hold p->pi_lock and again violate locking order. |
|---|
| 3046 | + * |
|---|
| 3047 | + * More yuck to audit. |
|---|
| 3048 | + */ |
|---|
| 3049 | + do_set_cpus_allowed(p, task_cpu_possible_mask(p)); |
|---|
| 2190 | 3050 | state = fail; |
|---|
| 2191 | 3051 | break; |
|---|
| 2192 | | - |
|---|
| 2193 | 3052 | case fail: |
|---|
| 2194 | 3053 | BUG(); |
|---|
| 2195 | 3054 | break; |
|---|
| .. | .. |
|---|
| 2216 | 3075 | * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable. |
|---|
| 2217 | 3076 | */ |
|---|
| 2218 | 3077 | static inline |
|---|
| 2219 | | -int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags, |
|---|
| 2220 | | - int sibling_count_hint) |
|---|
| 3078 | +int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) |
|---|
| 2221 | 3079 | { |
|---|
| 2222 | 3080 | lockdep_assert_held(&p->pi_lock); |
|---|
| 2223 | 3081 | |
|---|
| 2224 | | - if (p->nr_cpus_allowed > 1) |
|---|
| 2225 | | - cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags, |
|---|
| 2226 | | - sibling_count_hint); |
|---|
| 3082 | + if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p)) |
|---|
| 3083 | + cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); |
|---|
| 2227 | 3084 | else |
|---|
| 2228 | 3085 | cpu = cpumask_any(p->cpus_ptr); |
|---|
| 2229 | 3086 | |
|---|
| .. | .. |
|---|
| 2243 | 3100 | return cpu; |
|---|
| 2244 | 3101 | } |
|---|
| 2245 | 3102 | |
|---|
| 2246 | | -static void update_avg(u64 *avg, u64 sample) |
|---|
| 2247 | | -{ |
|---|
| 2248 | | - s64 diff = sample - *avg; |
|---|
| 2249 | | - *avg += diff >> 3; |
|---|
| 2250 | | -} |
|---|
| 2251 | | - |
|---|
| 2252 | 3103 | void sched_set_stop_task(int cpu, struct task_struct *stop) |
|---|
| 2253 | 3104 | { |
|---|
| 3105 | + static struct lock_class_key stop_pi_lock; |
|---|
| 2254 | 3106 | struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; |
|---|
| 2255 | 3107 | struct task_struct *old_stop = cpu_rq(cpu)->stop; |
|---|
| 2256 | 3108 | |
|---|
| .. | .. |
|---|
| 2266 | 3118 | sched_setscheduler_nocheck(stop, SCHED_FIFO, ¶m); |
|---|
| 2267 | 3119 | |
|---|
| 2268 | 3120 | stop->sched_class = &stop_sched_class; |
|---|
| 3121 | + |
|---|
| 3122 | + /* |
|---|
| 3123 | + * The PI code calls rt_mutex_setprio() with ->pi_lock held to |
|---|
| 3124 | + * adjust the effective priority of a task. As a result, |
|---|
| 3125 | + * rt_mutex_setprio() can trigger (RT) balancing operations, |
|---|
| 3126 | + * which can then trigger wakeups of the stop thread to push |
|---|
| 3127 | + * around the current task. |
|---|
| 3128 | + * |
|---|
| 3129 | + * The stop task itself will never be part of the PI-chain, it |
|---|
| 3130 | + * never blocks, therefore that ->pi_lock recursion is safe. |
|---|
| 3131 | + * Tell lockdep about this by placing the stop->pi_lock in its |
|---|
| 3132 | + * own class. |
|---|
| 3133 | + */ |
|---|
| 3134 | + lockdep_set_class(&stop->pi_lock, &stop_pi_lock); |
|---|
| 2269 | 3135 | } |
|---|
| 2270 | 3136 | |
|---|
| 2271 | 3137 | cpu_rq(cpu)->stop = stop; |
|---|
| .. | .. |
|---|
| 2279 | 3145 | } |
|---|
| 2280 | 3146 | } |
|---|
| 2281 | 3147 | |
|---|
| 2282 | | -#else |
|---|
| 3148 | +#else /* CONFIG_SMP */ |
|---|
| 2283 | 3149 | |
|---|
| 2284 | 3150 | static inline int __set_cpus_allowed_ptr(struct task_struct *p, |
|---|
| 2285 | | - const struct cpumask *new_mask, bool check) |
|---|
| 3151 | + const struct cpumask *new_mask, |
|---|
| 3152 | + u32 flags) |
|---|
| 2286 | 3153 | { |
|---|
| 2287 | 3154 | return set_cpus_allowed_ptr(p, new_mask); |
|---|
| 2288 | 3155 | } |
|---|
| 2289 | 3156 | |
|---|
| 2290 | | -#endif /* CONFIG_SMP */ |
|---|
| 3157 | +static inline void migrate_disable_switch(struct rq *rq, struct task_struct *p) { } |
|---|
| 3158 | + |
|---|
| 3159 | +static inline bool rq_has_pinned_tasks(struct rq *rq) |
|---|
| 3160 | +{ |
|---|
| 3161 | + return false; |
|---|
| 3162 | +} |
|---|
| 3163 | + |
|---|
| 3164 | +#endif /* !CONFIG_SMP */ |
|---|
| 2291 | 3165 | |
|---|
| 2292 | 3166 | static void |
|---|
| 2293 | 3167 | ttwu_stat(struct task_struct *p, int cpu, int wake_flags) |
|---|
| .. | .. |
|---|
| 2326 | 3200 | |
|---|
| 2327 | 3201 | if (wake_flags & WF_SYNC) |
|---|
| 2328 | 3202 | __schedstat_inc(p->se.statistics.nr_wakeups_sync); |
|---|
| 2329 | | -} |
|---|
| 2330 | | - |
|---|
| 2331 | | -static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags) |
|---|
| 2332 | | -{ |
|---|
| 2333 | | - activate_task(rq, p, en_flags); |
|---|
| 2334 | | - p->on_rq = TASK_ON_RQ_QUEUED; |
|---|
| 2335 | 3203 | } |
|---|
| 2336 | 3204 | |
|---|
| 2337 | 3205 | /* |
|---|
| .. | .. |
|---|
| 2375 | 3243 | { |
|---|
| 2376 | 3244 | int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK; |
|---|
| 2377 | 3245 | |
|---|
| 3246 | + if (wake_flags & WF_SYNC) |
|---|
| 3247 | + en_flags |= ENQUEUE_WAKEUP_SYNC; |
|---|
| 3248 | + |
|---|
| 2378 | 3249 | lockdep_assert_held(&rq->lock); |
|---|
| 2379 | 3250 | |
|---|
| 2380 | | -#ifdef CONFIG_SMP |
|---|
| 2381 | 3251 | if (p->sched_contributes_to_load) |
|---|
| 2382 | 3252 | rq->nr_uninterruptible--; |
|---|
| 2383 | 3253 | |
|---|
| 3254 | +#ifdef CONFIG_SMP |
|---|
| 2384 | 3255 | if (wake_flags & WF_MIGRATED) |
|---|
| 2385 | 3256 | en_flags |= ENQUEUE_MIGRATED; |
|---|
| 3257 | + else |
|---|
| 2386 | 3258 | #endif |
|---|
| 3259 | + if (p->in_iowait) { |
|---|
| 3260 | + delayacct_blkio_end(p); |
|---|
| 3261 | + atomic_dec(&task_rq(p)->nr_iowait); |
|---|
| 3262 | + } |
|---|
| 2387 | 3263 | |
|---|
| 2388 | | - ttwu_activate(rq, p, en_flags); |
|---|
| 3264 | + activate_task(rq, p, en_flags); |
|---|
| 2389 | 3265 | ttwu_do_wakeup(rq, p, wake_flags, rf); |
|---|
| 2390 | 3266 | } |
|---|
| 2391 | 3267 | |
|---|
| 2392 | 3268 | /* |
|---|
| 2393 | | - * Called in case the task @p isn't fully descheduled from its runqueue, |
|---|
| 2394 | | - * in this case we must do a remote wakeup. Its a 'light' wakeup though, |
|---|
| 2395 | | - * since all we need to do is flip p->state to TASK_RUNNING, since |
|---|
| 2396 | | - * the task is still ->on_rq. |
|---|
| 3269 | + * Consider @p being inside a wait loop: |
|---|
| 3270 | + * |
|---|
| 3271 | + * for (;;) { |
|---|
| 3272 | + * set_current_state(TASK_UNINTERRUPTIBLE); |
|---|
| 3273 | + * |
|---|
| 3274 | + * if (CONDITION) |
|---|
| 3275 | + * break; |
|---|
| 3276 | + * |
|---|
| 3277 | + * schedule(); |
|---|
| 3278 | + * } |
|---|
| 3279 | + * __set_current_state(TASK_RUNNING); |
|---|
| 3280 | + * |
|---|
| 3281 | + * between set_current_state() and schedule(). In this case @p is still |
|---|
| 3282 | + * runnable, so all that needs doing is change p->state back to TASK_RUNNING in |
|---|
| 3283 | + * an atomic manner. |
|---|
| 3284 | + * |
|---|
| 3285 | + * By taking task_rq(p)->lock we serialize against schedule(), if @p->on_rq |
|---|
| 3286 | + * then schedule() must still happen and p->state can be changed to |
|---|
| 3287 | + * TASK_RUNNING. Otherwise we lost the race, schedule() has happened, and we |
|---|
| 3288 | + * need to do a full wakeup with enqueue. |
|---|
| 3289 | + * |
|---|
| 3290 | + * Returns: %true when the wakeup is done, |
|---|
| 3291 | + * %false otherwise. |
|---|
| 2397 | 3292 | */ |
|---|
| 2398 | | -static int ttwu_remote(struct task_struct *p, int wake_flags) |
|---|
| 3293 | +static int ttwu_runnable(struct task_struct *p, int wake_flags) |
|---|
| 2399 | 3294 | { |
|---|
| 2400 | 3295 | struct rq_flags rf; |
|---|
| 2401 | 3296 | struct rq *rq; |
|---|
| .. | .. |
|---|
| 2414 | 3309 | } |
|---|
| 2415 | 3310 | |
|---|
| 2416 | 3311 | #ifdef CONFIG_SMP |
|---|
| 2417 | | -void sched_ttwu_pending(void) |
|---|
| 3312 | +void sched_ttwu_pending(void *arg) |
|---|
| 2418 | 3313 | { |
|---|
| 3314 | + struct llist_node *llist = arg; |
|---|
| 2419 | 3315 | struct rq *rq = this_rq(); |
|---|
| 2420 | | - struct llist_node *llist = llist_del_all(&rq->wake_list); |
|---|
| 2421 | 3316 | struct task_struct *p, *t; |
|---|
| 2422 | 3317 | struct rq_flags rf; |
|---|
| 2423 | 3318 | |
|---|
| 2424 | 3319 | if (!llist) |
|---|
| 2425 | 3320 | return; |
|---|
| 2426 | 3321 | |
|---|
| 3322 | + /* |
|---|
| 3323 | + * rq::ttwu_pending racy indication of out-standing wakeups. |
|---|
| 3324 | + * Races such that false-negatives are possible, since they |
|---|
| 3325 | + * are shorter lived that false-positives would be. |
|---|
| 3326 | + */ |
|---|
| 3327 | + WRITE_ONCE(rq->ttwu_pending, 0); |
|---|
| 3328 | + |
|---|
| 2427 | 3329 | rq_lock_irqsave(rq, &rf); |
|---|
| 2428 | 3330 | update_rq_clock(rq); |
|---|
| 2429 | 3331 | |
|---|
| 2430 | | - llist_for_each_entry_safe(p, t, llist, wake_entry) |
|---|
| 3332 | + llist_for_each_entry_safe(p, t, llist, wake_entry.llist) { |
|---|
| 3333 | + if (WARN_ON_ONCE(p->on_cpu)) |
|---|
| 3334 | + smp_cond_load_acquire(&p->on_cpu, !VAL); |
|---|
| 3335 | + |
|---|
| 3336 | + if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq))) |
|---|
| 3337 | + set_task_cpu(p, cpu_of(rq)); |
|---|
| 3338 | + |
|---|
| 2431 | 3339 | ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf); |
|---|
| 3340 | + } |
|---|
| 2432 | 3341 | |
|---|
| 2433 | 3342 | rq_unlock_irqrestore(rq, &rf); |
|---|
| 2434 | 3343 | } |
|---|
| 2435 | 3344 | |
|---|
| 2436 | | -void scheduler_ipi(void) |
|---|
| 3345 | +void send_call_function_single_ipi(int cpu) |
|---|
| 2437 | 3346 | { |
|---|
| 2438 | | - /* |
|---|
| 2439 | | - * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting |
|---|
| 2440 | | - * TIF_NEED_RESCHED remotely (for the first time) will also send |
|---|
| 2441 | | - * this IPI. |
|---|
| 2442 | | - */ |
|---|
| 2443 | | - preempt_fold_need_resched(); |
|---|
| 3347 | + struct rq *rq = cpu_rq(cpu); |
|---|
| 2444 | 3348 | |
|---|
| 2445 | | - if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) |
|---|
| 2446 | | - return; |
|---|
| 2447 | | - |
|---|
| 2448 | | - /* |
|---|
| 2449 | | - * Not all reschedule IPI handlers call irq_enter/irq_exit, since |
|---|
| 2450 | | - * traditionally all their work was done from the interrupt return |
|---|
| 2451 | | - * path. Now that we actually do some work, we need to make sure |
|---|
| 2452 | | - * we do call them. |
|---|
| 2453 | | - * |
|---|
| 2454 | | - * Some archs already do call them, luckily irq_enter/exit nest |
|---|
| 2455 | | - * properly. |
|---|
| 2456 | | - * |
|---|
| 2457 | | - * Arguably we should visit all archs and update all handlers, |
|---|
| 2458 | | - * however a fair share of IPIs are still resched only so this would |
|---|
| 2459 | | - * somewhat pessimize the simple resched case. |
|---|
| 2460 | | - */ |
|---|
| 2461 | | - irq_enter(); |
|---|
| 2462 | | - sched_ttwu_pending(); |
|---|
| 2463 | | - |
|---|
| 2464 | | - /* |
|---|
| 2465 | | - * Check if someone kicked us for doing the nohz idle load balance. |
|---|
| 2466 | | - */ |
|---|
| 2467 | | - if (unlikely(got_nohz_idle_kick())) { |
|---|
| 2468 | | - this_rq()->idle_balance = 1; |
|---|
| 2469 | | - raise_softirq_irqoff(SCHED_SOFTIRQ); |
|---|
| 2470 | | - } |
|---|
| 2471 | | - irq_exit(); |
|---|
| 3349 | + if (!set_nr_if_polling(rq->idle)) |
|---|
| 3350 | + arch_send_call_function_single_ipi(cpu); |
|---|
| 3351 | + else |
|---|
| 3352 | + trace_sched_wake_idle_without_ipi(cpu); |
|---|
| 2472 | 3353 | } |
|---|
| 2473 | 3354 | |
|---|
| 2474 | | -static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) |
|---|
| 3355 | +/* |
|---|
| 3356 | + * Queue a task on the target CPUs wake_list and wake the CPU via IPI if |
|---|
| 3357 | + * necessary. The wakee CPU on receipt of the IPI will queue the task |
|---|
| 3358 | + * via sched_ttwu_wakeup() for activation so the wakee incurs the cost |
|---|
| 3359 | + * of the wakeup instead of the waker. |
|---|
| 3360 | + */ |
|---|
| 3361 | +static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) |
|---|
| 2475 | 3362 | { |
|---|
| 2476 | 3363 | struct rq *rq = cpu_rq(cpu); |
|---|
| 2477 | 3364 | |
|---|
| 2478 | 3365 | p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); |
|---|
| 2479 | 3366 | |
|---|
| 2480 | | - if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) { |
|---|
| 2481 | | - if (!set_nr_if_polling(rq->idle)) |
|---|
| 2482 | | - smp_send_reschedule(cpu); |
|---|
| 2483 | | - else |
|---|
| 2484 | | - trace_sched_wake_idle_without_ipi(cpu); |
|---|
| 2485 | | - } |
|---|
| 3367 | + WRITE_ONCE(rq->ttwu_pending, 1); |
|---|
| 3368 | + __smp_call_single_queue(cpu, &p->wake_entry.llist); |
|---|
| 2486 | 3369 | } |
|---|
| 2487 | 3370 | |
|---|
| 2488 | 3371 | void wake_up_if_idle(int cpu) |
|---|
| .. | .. |
|---|
| 2508 | 3391 | out: |
|---|
| 2509 | 3392 | rcu_read_unlock(); |
|---|
| 2510 | 3393 | } |
|---|
| 3394 | +EXPORT_SYMBOL_GPL(wake_up_if_idle); |
|---|
| 2511 | 3395 | |
|---|
| 2512 | 3396 | bool cpus_share_cache(int this_cpu, int that_cpu) |
|---|
| 2513 | 3397 | { |
|---|
| .. | .. |
|---|
| 2516 | 3400 | |
|---|
| 2517 | 3401 | return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); |
|---|
| 2518 | 3402 | } |
|---|
| 3403 | + |
|---|
| 3404 | +static inline bool ttwu_queue_cond(int cpu, int wake_flags) |
|---|
| 3405 | +{ |
|---|
| 3406 | + /* |
|---|
| 3407 | + * If the CPU does not share cache, then queue the task on the |
|---|
| 3408 | + * remote rqs wakelist to avoid accessing remote data. |
|---|
| 3409 | + */ |
|---|
| 3410 | + if (!cpus_share_cache(smp_processor_id(), cpu)) |
|---|
| 3411 | + return true; |
|---|
| 3412 | + |
|---|
| 3413 | + /* |
|---|
| 3414 | + * If the task is descheduling and the only running task on the |
|---|
| 3415 | + * CPU then use the wakelist to offload the task activation to |
|---|
| 3416 | + * the soon-to-be-idle CPU as the current CPU is likely busy. |
|---|
| 3417 | + * nr_running is checked to avoid unnecessary task stacking. |
|---|
| 3418 | + * |
|---|
| 3419 | + * Note that we can only get here with (wakee) p->on_rq=0, |
|---|
| 3420 | + * p->on_cpu can be whatever, we've done the dequeue, so |
|---|
| 3421 | + * the wakee has been accounted out of ->nr_running. |
|---|
| 3422 | + */ |
|---|
| 3423 | + if ((wake_flags & WF_ON_CPU) && !cpu_rq(cpu)->nr_running) |
|---|
| 3424 | + return true; |
|---|
| 3425 | + |
|---|
| 3426 | + return false; |
|---|
| 3427 | +} |
|---|
| 3428 | + |
|---|
| 3429 | +static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) |
|---|
| 3430 | +{ |
|---|
| 3431 | + bool cond = false; |
|---|
| 3432 | + |
|---|
| 3433 | + trace_android_rvh_ttwu_cond(&cond); |
|---|
| 3434 | + |
|---|
| 3435 | + if ((sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) || |
|---|
| 3436 | + cond) { |
|---|
| 3437 | + if (WARN_ON_ONCE(cpu == smp_processor_id())) |
|---|
| 3438 | + return false; |
|---|
| 3439 | + |
|---|
| 3440 | + sched_clock_cpu(cpu); /* Sync clocks across CPUs */ |
|---|
| 3441 | + __ttwu_queue_wakelist(p, cpu, wake_flags); |
|---|
| 3442 | + return true; |
|---|
| 3443 | + } |
|---|
| 3444 | + |
|---|
| 3445 | + return false; |
|---|
| 3446 | +} |
|---|
| 3447 | + |
|---|
| 3448 | +#else /* !CONFIG_SMP */ |
|---|
| 3449 | + |
|---|
| 3450 | +static inline bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) |
|---|
| 3451 | +{ |
|---|
| 3452 | + return false; |
|---|
| 3453 | +} |
|---|
| 3454 | + |
|---|
| 2519 | 3455 | #endif /* CONFIG_SMP */ |
|---|
| 2520 | 3456 | |
|---|
| 2521 | 3457 | static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) |
|---|
| .. | .. |
|---|
| 2523 | 3459 | struct rq *rq = cpu_rq(cpu); |
|---|
| 2524 | 3460 | struct rq_flags rf; |
|---|
| 2525 | 3461 | |
|---|
| 2526 | | -#if defined(CONFIG_SMP) |
|---|
| 2527 | | - if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) { |
|---|
| 2528 | | - sched_clock_cpu(cpu); /* Sync clocks across CPUs */ |
|---|
| 2529 | | - ttwu_queue_remote(p, cpu, wake_flags); |
|---|
| 3462 | + if (ttwu_queue_wakelist(p, cpu, wake_flags)) |
|---|
| 2530 | 3463 | return; |
|---|
| 2531 | | - } |
|---|
| 2532 | | -#endif |
|---|
| 2533 | 3464 | |
|---|
| 2534 | 3465 | rq_lock(rq, &rf); |
|---|
| 2535 | 3466 | update_rq_clock(rq); |
|---|
| .. | .. |
|---|
| 2585 | 3516 | * migration. However the means are completely different as there is no lock |
|---|
| 2586 | 3517 | * chain to provide order. Instead we do: |
|---|
| 2587 | 3518 | * |
|---|
| 2588 | | - * 1) smp_store_release(X->on_cpu, 0) |
|---|
| 2589 | | - * 2) smp_cond_load_acquire(!X->on_cpu) |
|---|
| 3519 | + * 1) smp_store_release(X->on_cpu, 0) -- finish_task() |
|---|
| 3520 | + * 2) smp_cond_load_acquire(!X->on_cpu) -- try_to_wake_up() |
|---|
| 2590 | 3521 | * |
|---|
| 2591 | 3522 | * Example: |
|---|
| 2592 | 3523 | * |
|---|
| .. | .. |
|---|
| 2625 | 3556 | * @p: the thread to be awakened |
|---|
| 2626 | 3557 | * @state: the mask of task states that can be woken |
|---|
| 2627 | 3558 | * @wake_flags: wake modifier flags (WF_*) |
|---|
| 2628 | | - * @sibling_count_hint: A hint at the number of threads that are being woken up |
|---|
| 2629 | | - * in this event. |
|---|
| 2630 | 3559 | * |
|---|
| 2631 | | - * If (@state & @p->state) @p->state = TASK_RUNNING. |
|---|
| 3560 | + * Conceptually does: |
|---|
| 3561 | + * |
|---|
| 3562 | + * If (@state & @p->state) @p->state = TASK_RUNNING. |
|---|
| 2632 | 3563 | * |
|---|
| 2633 | 3564 | * If the task was not queued/runnable, also place it back on a runqueue. |
|---|
| 2634 | 3565 | * |
|---|
| 2635 | | - * Atomic against schedule() which would dequeue a task, also see |
|---|
| 2636 | | - * set_current_state(). |
|---|
| 3566 | + * This function is atomic against schedule() which would dequeue the task. |
|---|
| 2637 | 3567 | * |
|---|
| 2638 | | - * This function executes a full memory barrier before accessing the task |
|---|
| 2639 | | - * state; see set_current_state(). |
|---|
| 3568 | + * It issues a full memory barrier before accessing @p->state, see the comment |
|---|
| 3569 | + * with set_current_state(). |
|---|
| 3570 | + * |
|---|
| 3571 | + * Uses p->pi_lock to serialize against concurrent wake-ups. |
|---|
| 3572 | + * |
|---|
| 3573 | + * Relies on p->pi_lock stabilizing: |
|---|
| 3574 | + * - p->sched_class |
|---|
| 3575 | + * - p->cpus_ptr |
|---|
| 3576 | + * - p->sched_task_group |
|---|
| 3577 | + * in order to do migration, see its use of select_task_rq()/set_task_cpu(). |
|---|
| 3578 | + * |
|---|
| 3579 | + * Tries really hard to only take one task_rq(p)->lock for performance. |
|---|
| 3580 | + * Takes rq->lock in: |
|---|
| 3581 | + * - ttwu_runnable() -- old rq, unavoidable, see comment there; |
|---|
| 3582 | + * - ttwu_queue() -- new rq, for enqueue of the task; |
|---|
| 3583 | + * - psi_ttwu_dequeue() -- much sadness :-( accounting will kill us. |
|---|
| 3584 | + * |
|---|
| 3585 | + * As a consequence we race really badly with just about everything. See the |
|---|
| 3586 | + * many memory barriers and their comments for details. |
|---|
| 2640 | 3587 | * |
|---|
| 2641 | 3588 | * Return: %true if @p->state changes (an actual wakeup was done), |
|---|
| 2642 | 3589 | * %false otherwise. |
|---|
| 2643 | 3590 | */ |
|---|
| 2644 | 3591 | static int |
|---|
| 2645 | | -try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags, |
|---|
| 2646 | | - int sibling_count_hint) |
|---|
| 3592 | +try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) |
|---|
| 2647 | 3593 | { |
|---|
| 2648 | 3594 | unsigned long flags; |
|---|
| 2649 | 3595 | int cpu, success = 0; |
|---|
| 2650 | 3596 | |
|---|
| 3597 | + preempt_disable(); |
|---|
| 3598 | + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && p == current) { |
|---|
| 3599 | + /* |
|---|
| 3600 | + * We're waking current, this means 'p->on_rq' and 'task_cpu(p) |
|---|
| 3601 | + * == smp_processor_id()'. Together this means we can special |
|---|
| 3602 | + * case the whole 'p->on_rq && ttwu_runnable()' case below |
|---|
| 3603 | + * without taking any locks. |
|---|
| 3604 | + * |
|---|
| 3605 | + * In particular: |
|---|
| 3606 | + * - we rely on Program-Order guarantees for all the ordering, |
|---|
| 3607 | + * - we're serialized against set_special_state() by virtue of |
|---|
| 3608 | + * it disabling IRQs (this allows not taking ->pi_lock). |
|---|
| 3609 | + */ |
|---|
| 3610 | + if (!(p->state & state)) |
|---|
| 3611 | + goto out; |
|---|
| 3612 | + |
|---|
| 3613 | + success = 1; |
|---|
| 3614 | + trace_sched_waking(p); |
|---|
| 3615 | + p->state = TASK_RUNNING; |
|---|
| 3616 | + trace_sched_wakeup(p); |
|---|
| 3617 | + goto out; |
|---|
| 3618 | + } |
|---|
| 3619 | + |
|---|
| 2651 | 3620 | /* |
|---|
| 2652 | 3621 | * If we are going to wake up a thread waiting for CONDITION we |
|---|
| 2653 | 3622 | * need to ensure that CONDITION=1 done by the caller can not be |
|---|
| 2654 | | - * reordered with p->state check below. This pairs with mb() in |
|---|
| 2655 | | - * set_current_state() the waiting thread does. |
|---|
| 3623 | + * reordered with p->state check below. This pairs with smp_store_mb() |
|---|
| 3624 | + * in set_current_state() that the waiting thread does. |
|---|
| 2656 | 3625 | */ |
|---|
| 2657 | 3626 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
|---|
| 2658 | 3627 | smp_mb__after_spinlock(); |
|---|
| .. | .. |
|---|
| 2668 | 3637 | success = 1; |
|---|
| 2669 | 3638 | } |
|---|
| 2670 | 3639 | } |
|---|
| 2671 | | - goto out; |
|---|
| 3640 | + goto unlock; |
|---|
| 2672 | 3641 | } |
|---|
| 2673 | | - |
|---|
| 2674 | 3642 | /* |
|---|
| 2675 | 3643 | * If this is a regular wakeup, then we can unconditionally |
|---|
| 2676 | 3644 | * clear the saved state of a "lock sleeper". |
|---|
| .. | .. |
|---|
| 2678 | 3646 | if (!(wake_flags & WF_LOCK_SLEEPER)) |
|---|
| 2679 | 3647 | p->saved_state = TASK_RUNNING; |
|---|
| 2680 | 3648 | |
|---|
| 3649 | +#ifdef CONFIG_FREEZER |
|---|
| 3650 | + /* |
|---|
| 3651 | + * If we're going to wake up a thread which may be frozen, then |
|---|
| 3652 | + * we can only do so if we have an active CPU which is capable of |
|---|
| 3653 | + * running it. This may not be the case when resuming from suspend, |
|---|
| 3654 | + * as the secondary CPUs may not yet be back online. See __thaw_task() |
|---|
| 3655 | + * for the actual wakeup. |
|---|
| 3656 | + */ |
|---|
| 3657 | + if (unlikely(frozen_or_skipped(p)) && |
|---|
| 3658 | + !cpumask_intersects(cpu_active_mask, task_cpu_possible_mask(p))) |
|---|
| 3659 | + goto unlock; |
|---|
| 3660 | +#endif |
|---|
| 3661 | + |
|---|
| 2681 | 3662 | trace_sched_waking(p); |
|---|
| 2682 | 3663 | |
|---|
| 2683 | 3664 | /* We're going to change ->state: */ |
|---|
| 2684 | 3665 | success = 1; |
|---|
| 2685 | | - cpu = task_cpu(p); |
|---|
| 2686 | 3666 | |
|---|
| 2687 | 3667 | /* |
|---|
| 2688 | 3668 | * Ensure we load p->on_rq _after_ p->state, otherwise it would |
|---|
| .. | .. |
|---|
| 2703 | 3683 | * |
|---|
| 2704 | 3684 | * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in |
|---|
| 2705 | 3685 | * __schedule(). See the comment for smp_mb__after_spinlock(). |
|---|
| 3686 | + * |
|---|
| 3687 | + * A similar smb_rmb() lives in try_invoke_on_locked_down_task(). |
|---|
| 2706 | 3688 | */ |
|---|
| 2707 | 3689 | smp_rmb(); |
|---|
| 2708 | | - if (p->on_rq && ttwu_remote(p, wake_flags)) |
|---|
| 2709 | | - goto stat; |
|---|
| 3690 | + if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags)) |
|---|
| 3691 | + goto unlock; |
|---|
| 3692 | + |
|---|
| 3693 | + if (p->state & TASK_UNINTERRUPTIBLE) |
|---|
| 3694 | + trace_sched_blocked_reason(p); |
|---|
| 2710 | 3695 | |
|---|
| 2711 | 3696 | #ifdef CONFIG_SMP |
|---|
| 2712 | 3697 | /* |
|---|
| .. | .. |
|---|
| 2727 | 3712 | * |
|---|
| 2728 | 3713 | * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in |
|---|
| 2729 | 3714 | * __schedule(). See the comment for smp_mb__after_spinlock(). |
|---|
| 3715 | + * |
|---|
| 3716 | + * Form a control-dep-acquire with p->on_rq == 0 above, to ensure |
|---|
| 3717 | + * schedule()'s deactivate_task() has 'happened' and p will no longer |
|---|
| 3718 | + * care about it's own p->state. See the comment in __schedule(). |
|---|
| 2730 | 3719 | */ |
|---|
| 2731 | | - smp_rmb(); |
|---|
| 3720 | + smp_acquire__after_ctrl_dep(); |
|---|
| 3721 | + |
|---|
| 3722 | + /* |
|---|
| 3723 | + * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq |
|---|
| 3724 | + * == 0), which means we need to do an enqueue, change p->state to |
|---|
| 3725 | + * TASK_WAKING such that we can unlock p->pi_lock before doing the |
|---|
| 3726 | + * enqueue, such as ttwu_queue_wakelist(). |
|---|
| 3727 | + */ |
|---|
| 3728 | + p->state = TASK_WAKING; |
|---|
| 3729 | + |
|---|
| 3730 | + /* |
|---|
| 3731 | + * If the owning (remote) CPU is still in the middle of schedule() with |
|---|
| 3732 | + * this task as prev, considering queueing p on the remote CPUs wake_list |
|---|
| 3733 | + * which potentially sends an IPI instead of spinning on p->on_cpu to |
|---|
| 3734 | + * let the waker make forward progress. This is safe because IRQs are |
|---|
| 3735 | + * disabled and the IPI will deliver after on_cpu is cleared. |
|---|
| 3736 | + * |
|---|
| 3737 | + * Ensure we load task_cpu(p) after p->on_cpu: |
|---|
| 3738 | + * |
|---|
| 3739 | + * set_task_cpu(p, cpu); |
|---|
| 3740 | + * STORE p->cpu = @cpu |
|---|
| 3741 | + * __schedule() (switch to task 'p') |
|---|
| 3742 | + * LOCK rq->lock |
|---|
| 3743 | + * smp_mb__after_spin_lock() smp_cond_load_acquire(&p->on_cpu) |
|---|
| 3744 | + * STORE p->on_cpu = 1 LOAD p->cpu |
|---|
| 3745 | + * |
|---|
| 3746 | + * to ensure we observe the correct CPU on which the task is currently |
|---|
| 3747 | + * scheduling. |
|---|
| 3748 | + */ |
|---|
| 3749 | + if (smp_load_acquire(&p->on_cpu) && |
|---|
| 3750 | + ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU)) |
|---|
| 3751 | + goto unlock; |
|---|
| 2732 | 3752 | |
|---|
| 2733 | 3753 | /* |
|---|
| 2734 | 3754 | * If the owning (remote) CPU is still in the middle of schedule() with |
|---|
| .. | .. |
|---|
| 2741 | 3761 | */ |
|---|
| 2742 | 3762 | smp_cond_load_acquire(&p->on_cpu, !VAL); |
|---|
| 2743 | 3763 | |
|---|
| 2744 | | - p->sched_contributes_to_load = !!task_contributes_to_load(p); |
|---|
| 2745 | | - p->state = TASK_WAKING; |
|---|
| 3764 | + trace_android_rvh_try_to_wake_up(p); |
|---|
| 2746 | 3765 | |
|---|
| 2747 | | - if (p->in_iowait) { |
|---|
| 2748 | | - delayacct_blkio_end(p); |
|---|
| 2749 | | - atomic_dec(&task_rq(p)->nr_iowait); |
|---|
| 2750 | | - } |
|---|
| 2751 | | - |
|---|
| 2752 | | - cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags, |
|---|
| 2753 | | - sibling_count_hint); |
|---|
| 3766 | + cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags); |
|---|
| 2754 | 3767 | if (task_cpu(p) != cpu) { |
|---|
| 3768 | + if (p->in_iowait) { |
|---|
| 3769 | + delayacct_blkio_end(p); |
|---|
| 3770 | + atomic_dec(&task_rq(p)->nr_iowait); |
|---|
| 3771 | + } |
|---|
| 3772 | + |
|---|
| 2755 | 3773 | wake_flags |= WF_MIGRATED; |
|---|
| 2756 | 3774 | psi_ttwu_dequeue(p); |
|---|
| 2757 | 3775 | set_task_cpu(p, cpu); |
|---|
| 2758 | 3776 | } |
|---|
| 2759 | | - |
|---|
| 2760 | | -#else /* CONFIG_SMP */ |
|---|
| 2761 | | - |
|---|
| 2762 | | - if (p->in_iowait) { |
|---|
| 2763 | | - delayacct_blkio_end(p); |
|---|
| 2764 | | - atomic_dec(&task_rq(p)->nr_iowait); |
|---|
| 2765 | | - } |
|---|
| 2766 | | - |
|---|
| 3777 | +#else |
|---|
| 3778 | + cpu = task_cpu(p); |
|---|
| 2767 | 3779 | #endif /* CONFIG_SMP */ |
|---|
| 2768 | 3780 | |
|---|
| 2769 | 3781 | ttwu_queue(p, cpu, wake_flags); |
|---|
| 2770 | | -stat: |
|---|
| 2771 | | - ttwu_stat(p, cpu, wake_flags); |
|---|
| 2772 | | -out: |
|---|
| 3782 | +unlock: |
|---|
| 2773 | 3783 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
|---|
| 3784 | +out: |
|---|
| 3785 | + if (success) { |
|---|
| 3786 | + trace_android_rvh_try_to_wake_up_success(p); |
|---|
| 3787 | + ttwu_stat(p, task_cpu(p), wake_flags); |
|---|
| 3788 | + } |
|---|
| 3789 | + preempt_enable(); |
|---|
| 2774 | 3790 | |
|---|
| 2775 | 3791 | return success; |
|---|
| 3792 | +} |
|---|
| 3793 | + |
|---|
| 3794 | +/** |
|---|
| 3795 | + * try_invoke_on_locked_down_task - Invoke a function on task in fixed state |
|---|
| 3796 | + * @p: Process for which the function is to be invoked, can be @current. |
|---|
| 3797 | + * @func: Function to invoke. |
|---|
| 3798 | + * @arg: Argument to function. |
|---|
| 3799 | + * |
|---|
| 3800 | + * If the specified task can be quickly locked into a definite state |
|---|
| 3801 | + * (either sleeping or on a given runqueue), arrange to keep it in that |
|---|
| 3802 | + * state while invoking @func(@arg). This function can use ->on_rq and |
|---|
| 3803 | + * task_curr() to work out what the state is, if required. Given that |
|---|
| 3804 | + * @func can be invoked with a runqueue lock held, it had better be quite |
|---|
| 3805 | + * lightweight. |
|---|
| 3806 | + * |
|---|
| 3807 | + * Returns: |
|---|
| 3808 | + * @false if the task slipped out from under the locks. |
|---|
| 3809 | + * @true if the task was locked onto a runqueue or is sleeping. |
|---|
| 3810 | + * However, @func can override this by returning @false. |
|---|
| 3811 | + */ |
|---|
| 3812 | +bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg) |
|---|
| 3813 | +{ |
|---|
| 3814 | + struct rq_flags rf; |
|---|
| 3815 | + bool ret = false; |
|---|
| 3816 | + struct rq *rq; |
|---|
| 3817 | + |
|---|
| 3818 | + raw_spin_lock_irqsave(&p->pi_lock, rf.flags); |
|---|
| 3819 | + if (p->on_rq) { |
|---|
| 3820 | + rq = __task_rq_lock(p, &rf); |
|---|
| 3821 | + if (task_rq(p) == rq) |
|---|
| 3822 | + ret = func(p, arg); |
|---|
| 3823 | + rq_unlock(rq, &rf); |
|---|
| 3824 | + } else { |
|---|
| 3825 | + switch (p->state) { |
|---|
| 3826 | + case TASK_RUNNING: |
|---|
| 3827 | + case TASK_WAKING: |
|---|
| 3828 | + break; |
|---|
| 3829 | + default: |
|---|
| 3830 | + smp_rmb(); // See smp_rmb() comment in try_to_wake_up(). |
|---|
| 3831 | + if (!p->on_rq) |
|---|
| 3832 | + ret = func(p, arg); |
|---|
| 3833 | + } |
|---|
| 3834 | + } |
|---|
| 3835 | + raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags); |
|---|
| 3836 | + return ret; |
|---|
| 2776 | 3837 | } |
|---|
| 2777 | 3838 | |
|---|
| 2778 | 3839 | /** |
|---|
| .. | .. |
|---|
| 2788 | 3849 | */ |
|---|
| 2789 | 3850 | int wake_up_process(struct task_struct *p) |
|---|
| 2790 | 3851 | { |
|---|
| 2791 | | - return try_to_wake_up(p, TASK_NORMAL, 0, 1); |
|---|
| 3852 | + return try_to_wake_up(p, TASK_NORMAL, 0); |
|---|
| 2792 | 3853 | } |
|---|
| 2793 | 3854 | EXPORT_SYMBOL(wake_up_process); |
|---|
| 2794 | 3855 | |
|---|
| .. | .. |
|---|
| 2801 | 3862 | */ |
|---|
| 2802 | 3863 | int wake_up_lock_sleeper(struct task_struct *p) |
|---|
| 2803 | 3864 | { |
|---|
| 2804 | | - return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER, 1); |
|---|
| 3865 | + return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER); |
|---|
| 2805 | 3866 | } |
|---|
| 2806 | 3867 | |
|---|
| 2807 | 3868 | int wake_up_state(struct task_struct *p, unsigned int state) |
|---|
| 2808 | 3869 | { |
|---|
| 2809 | | - return try_to_wake_up(p, state, 0, 1); |
|---|
| 3870 | + return try_to_wake_up(p, state, 0); |
|---|
| 2810 | 3871 | } |
|---|
| 2811 | 3872 | |
|---|
| 2812 | 3873 | /* |
|---|
| .. | .. |
|---|
| 2831 | 3892 | p->se.cfs_rq = NULL; |
|---|
| 2832 | 3893 | #endif |
|---|
| 2833 | 3894 | |
|---|
| 3895 | + trace_android_rvh_sched_fork_init(p); |
|---|
| 3896 | + |
|---|
| 2834 | 3897 | #ifdef CONFIG_SCHEDSTATS |
|---|
| 2835 | 3898 | /* Even if schedstat is disabled, there should not be garbage */ |
|---|
| 2836 | 3899 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); |
|---|
| .. | .. |
|---|
| 2851 | 3914 | INIT_HLIST_HEAD(&p->preempt_notifiers); |
|---|
| 2852 | 3915 | #endif |
|---|
| 2853 | 3916 | |
|---|
| 3917 | +#ifdef CONFIG_COMPACTION |
|---|
| 3918 | + p->capture_control = NULL; |
|---|
| 3919 | +#endif |
|---|
| 2854 | 3920 | init_numa_balancing(clone_flags, p); |
|---|
| 3921 | +#ifdef CONFIG_SMP |
|---|
| 3922 | + p->wake_entry.u_flags = CSD_TYPE_TTWU; |
|---|
| 3923 | + p->migration_pending = NULL; |
|---|
| 3924 | +#endif |
|---|
| 2855 | 3925 | } |
|---|
| 2856 | 3926 | |
|---|
| 2857 | 3927 | DEFINE_STATIC_KEY_FALSE(sched_numa_balancing); |
|---|
| .. | .. |
|---|
| 2868 | 3938 | |
|---|
| 2869 | 3939 | #ifdef CONFIG_PROC_SYSCTL |
|---|
| 2870 | 3940 | int sysctl_numa_balancing(struct ctl_table *table, int write, |
|---|
| 2871 | | - void __user *buffer, size_t *lenp, loff_t *ppos) |
|---|
| 3941 | + void *buffer, size_t *lenp, loff_t *ppos) |
|---|
| 2872 | 3942 | { |
|---|
| 2873 | 3943 | struct ctl_table t; |
|---|
| 2874 | 3944 | int err; |
|---|
| .. | .. |
|---|
| 2942 | 4012 | } |
|---|
| 2943 | 4013 | |
|---|
| 2944 | 4014 | #ifdef CONFIG_PROC_SYSCTL |
|---|
| 2945 | | -int sysctl_schedstats(struct ctl_table *table, int write, |
|---|
| 2946 | | - void __user *buffer, size_t *lenp, loff_t *ppos) |
|---|
| 4015 | +int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, |
|---|
| 4016 | + size_t *lenp, loff_t *ppos) |
|---|
| 2947 | 4017 | { |
|---|
| 2948 | 4018 | struct ctl_table t; |
|---|
| 2949 | 4019 | int err; |
|---|
| .. | .. |
|---|
| 2971 | 4041 | */ |
|---|
| 2972 | 4042 | int sched_fork(unsigned long clone_flags, struct task_struct *p) |
|---|
| 2973 | 4043 | { |
|---|
| 2974 | | - unsigned long flags; |
|---|
| 4044 | + trace_android_rvh_sched_fork(p); |
|---|
| 2975 | 4045 | |
|---|
| 2976 | 4046 | __sched_fork(clone_flags, p); |
|---|
| 2977 | 4047 | /* |
|---|
| .. | .. |
|---|
| 2985 | 4055 | * Make sure we do not leak PI boosting priority to the child. |
|---|
| 2986 | 4056 | */ |
|---|
| 2987 | 4057 | p->prio = current->normal_prio; |
|---|
| 4058 | + trace_android_rvh_prepare_prio_fork(p); |
|---|
| 2988 | 4059 | |
|---|
| 2989 | 4060 | uclamp_fork(p); |
|---|
| 2990 | 4061 | |
|---|
| .. | .. |
|---|
| 2999 | 4070 | } else if (PRIO_TO_NICE(p->static_prio) < 0) |
|---|
| 3000 | 4071 | p->static_prio = NICE_TO_PRIO(0); |
|---|
| 3001 | 4072 | |
|---|
| 3002 | | - p->prio = p->normal_prio = __normal_prio(p); |
|---|
| 3003 | | - set_load_weight(p, false); |
|---|
| 4073 | + p->prio = p->normal_prio = p->static_prio; |
|---|
| 4074 | + set_load_weight(p); |
|---|
| 3004 | 4075 | |
|---|
| 3005 | 4076 | /* |
|---|
| 3006 | 4077 | * We don't need the reset flag anymore after the fork. It has |
|---|
| .. | .. |
|---|
| 3017 | 4088 | p->sched_class = &fair_sched_class; |
|---|
| 3018 | 4089 | |
|---|
| 3019 | 4090 | init_entity_runnable_average(&p->se); |
|---|
| 4091 | + trace_android_rvh_finish_prio_fork(p); |
|---|
| 3020 | 4092 | |
|---|
| 3021 | | - /* |
|---|
| 3022 | | - * The child is not yet in the pid-hash so no cgroup attach races, |
|---|
| 3023 | | - * and the cgroup is pinned to this child due to cgroup_fork() |
|---|
| 3024 | | - * is ran before sched_fork(). |
|---|
| 3025 | | - * |
|---|
| 3026 | | - * Silence PROVE_RCU. |
|---|
| 3027 | | - */ |
|---|
| 3028 | | - raw_spin_lock_irqsave(&p->pi_lock, flags); |
|---|
| 3029 | | - rseq_migrate(p); |
|---|
| 3030 | | - /* |
|---|
| 3031 | | - * We're setting the CPU for the first time, we don't migrate, |
|---|
| 3032 | | - * so use __set_task_cpu(). |
|---|
| 3033 | | - */ |
|---|
| 3034 | | - __set_task_cpu(p, smp_processor_id()); |
|---|
| 3035 | | - if (p->sched_class->task_fork) |
|---|
| 3036 | | - p->sched_class->task_fork(p); |
|---|
| 3037 | | - raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
|---|
| 3038 | 4093 | |
|---|
| 3039 | 4094 | #ifdef CONFIG_SCHED_INFO |
|---|
| 3040 | 4095 | if (likely(sched_info_on())) |
|---|
| .. | .. |
|---|
| 3052 | 4107 | RB_CLEAR_NODE(&p->pushable_dl_tasks); |
|---|
| 3053 | 4108 | #endif |
|---|
| 3054 | 4109 | return 0; |
|---|
| 4110 | +} |
|---|
| 4111 | + |
|---|
| 4112 | +void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs) |
|---|
| 4113 | +{ |
|---|
| 4114 | + unsigned long flags; |
|---|
| 4115 | + |
|---|
| 4116 | + /* |
|---|
| 4117 | + * Because we're not yet on the pid-hash, p->pi_lock isn't strictly |
|---|
| 4118 | + * required yet, but lockdep gets upset if rules are violated. |
|---|
| 4119 | + */ |
|---|
| 4120 | + raw_spin_lock_irqsave(&p->pi_lock, flags); |
|---|
| 4121 | +#ifdef CONFIG_CGROUP_SCHED |
|---|
| 4122 | + if (1) { |
|---|
| 4123 | + struct task_group *tg; |
|---|
| 4124 | + |
|---|
| 4125 | + tg = container_of(kargs->cset->subsys[cpu_cgrp_id], |
|---|
| 4126 | + struct task_group, css); |
|---|
| 4127 | + tg = autogroup_task_group(p, tg); |
|---|
| 4128 | + p->sched_task_group = tg; |
|---|
| 4129 | + } |
|---|
| 4130 | +#endif |
|---|
| 4131 | + rseq_migrate(p); |
|---|
| 4132 | + /* |
|---|
| 4133 | + * We're setting the CPU for the first time, we don't migrate, |
|---|
| 4134 | + * so use __set_task_cpu(). |
|---|
| 4135 | + */ |
|---|
| 4136 | + __set_task_cpu(p, smp_processor_id()); |
|---|
| 4137 | + if (p->sched_class->task_fork) |
|---|
| 4138 | + p->sched_class->task_fork(p); |
|---|
| 4139 | + raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
|---|
| 4140 | +} |
|---|
| 4141 | + |
|---|
| 4142 | +void sched_post_fork(struct task_struct *p) |
|---|
| 4143 | +{ |
|---|
| 4144 | + uclamp_post_fork(p); |
|---|
| 3055 | 4145 | } |
|---|
| 3056 | 4146 | |
|---|
| 3057 | 4147 | unsigned long to_ratio(u64 period, u64 runtime) |
|---|
| .. | .. |
|---|
| 3082 | 4172 | struct rq_flags rf; |
|---|
| 3083 | 4173 | struct rq *rq; |
|---|
| 3084 | 4174 | |
|---|
| 4175 | + trace_android_rvh_wake_up_new_task(p); |
|---|
| 4176 | + |
|---|
| 3085 | 4177 | raw_spin_lock_irqsave(&p->pi_lock, rf.flags); |
|---|
| 3086 | 4178 | p->state = TASK_RUNNING; |
|---|
| 3087 | 4179 | #ifdef CONFIG_SMP |
|---|
| .. | .. |
|---|
| 3095 | 4187 | */ |
|---|
| 3096 | 4188 | p->recent_used_cpu = task_cpu(p); |
|---|
| 3097 | 4189 | rseq_migrate(p); |
|---|
| 3098 | | - __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0, 1)); |
|---|
| 4190 | + __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); |
|---|
| 3099 | 4191 | #endif |
|---|
| 3100 | 4192 | rq = __task_rq_lock(p, &rf); |
|---|
| 3101 | 4193 | update_rq_clock(rq); |
|---|
| 3102 | | - post_init_entity_util_avg(&p->se); |
|---|
| 4194 | + post_init_entity_util_avg(p); |
|---|
| 4195 | + trace_android_rvh_new_task_stats(p); |
|---|
| 3103 | 4196 | |
|---|
| 3104 | 4197 | activate_task(rq, p, ENQUEUE_NOCLOCK); |
|---|
| 3105 | | - p->on_rq = TASK_ON_RQ_QUEUED; |
|---|
| 3106 | 4198 | trace_sched_wakeup_new(p); |
|---|
| 3107 | 4199 | check_preempt_curr(rq, p, WF_FORK); |
|---|
| 3108 | 4200 | #ifdef CONFIG_SMP |
|---|
| .. | .. |
|---|
| 3212 | 4304 | /* |
|---|
| 3213 | 4305 | * Claim the task as running, we do this before switching to it |
|---|
| 3214 | 4306 | * such that any running task will have this set. |
|---|
| 4307 | + * |
|---|
| 4308 | + * See the ttwu() WF_ON_CPU case and its ordering comment. |
|---|
| 3215 | 4309 | */ |
|---|
| 3216 | | - next->on_cpu = 1; |
|---|
| 4310 | + WRITE_ONCE(next->on_cpu, 1); |
|---|
| 3217 | 4311 | #endif |
|---|
| 3218 | 4312 | } |
|---|
| 3219 | 4313 | |
|---|
| .. | .. |
|---|
| 3221 | 4315 | { |
|---|
| 3222 | 4316 | #ifdef CONFIG_SMP |
|---|
| 3223 | 4317 | /* |
|---|
| 3224 | | - * After ->on_cpu is cleared, the task can be moved to a different CPU. |
|---|
| 3225 | | - * We must ensure this doesn't happen until the switch is completely |
|---|
| 4318 | + * This must be the very last reference to @prev from this CPU. After |
|---|
| 4319 | + * p->on_cpu is cleared, the task can be moved to a different CPU. We |
|---|
| 4320 | + * must ensure this doesn't happen until the switch is completely |
|---|
| 3226 | 4321 | * finished. |
|---|
| 3227 | 4322 | * |
|---|
| 3228 | 4323 | * In particular, the load of prev->state in finish_task_switch() must |
|---|
| .. | .. |
|---|
| 3234 | 4329 | #endif |
|---|
| 3235 | 4330 | } |
|---|
| 3236 | 4331 | |
|---|
| 4332 | +#ifdef CONFIG_SMP |
|---|
| 4333 | + |
|---|
| 4334 | +static void do_balance_callbacks(struct rq *rq, struct callback_head *head) |
|---|
| 4335 | +{ |
|---|
| 4336 | + void (*func)(struct rq *rq); |
|---|
| 4337 | + struct callback_head *next; |
|---|
| 4338 | + |
|---|
| 4339 | + lockdep_assert_held(&rq->lock); |
|---|
| 4340 | + |
|---|
| 4341 | + while (head) { |
|---|
| 4342 | + func = (void (*)(struct rq *))head->func; |
|---|
| 4343 | + next = head->next; |
|---|
| 4344 | + head->next = NULL; |
|---|
| 4345 | + head = next; |
|---|
| 4346 | + |
|---|
| 4347 | + func(rq); |
|---|
| 4348 | + } |
|---|
| 4349 | +} |
|---|
| 4350 | + |
|---|
| 4351 | +static inline struct callback_head *splice_balance_callbacks(struct rq *rq) |
|---|
| 4352 | +{ |
|---|
| 4353 | + struct callback_head *head = rq->balance_callback; |
|---|
| 4354 | + |
|---|
| 4355 | + lockdep_assert_held(&rq->lock); |
|---|
| 4356 | + if (head) { |
|---|
| 4357 | + rq->balance_callback = NULL; |
|---|
| 4358 | + rq->balance_flags &= ~BALANCE_WORK; |
|---|
| 4359 | + } |
|---|
| 4360 | + |
|---|
| 4361 | + return head; |
|---|
| 4362 | +} |
|---|
| 4363 | + |
|---|
| 4364 | +static void __balance_callbacks(struct rq *rq) |
|---|
| 4365 | +{ |
|---|
| 4366 | + do_balance_callbacks(rq, splice_balance_callbacks(rq)); |
|---|
| 4367 | +} |
|---|
| 4368 | + |
|---|
| 4369 | +static inline void balance_callbacks(struct rq *rq, struct callback_head *head) |
|---|
| 4370 | +{ |
|---|
| 4371 | + unsigned long flags; |
|---|
| 4372 | + |
|---|
| 4373 | + if (unlikely(head)) { |
|---|
| 4374 | + raw_spin_lock_irqsave(&rq->lock, flags); |
|---|
| 4375 | + do_balance_callbacks(rq, head); |
|---|
| 4376 | + raw_spin_unlock_irqrestore(&rq->lock, flags); |
|---|
| 4377 | + } |
|---|
| 4378 | +} |
|---|
| 4379 | + |
|---|
| 4380 | +static void balance_push(struct rq *rq); |
|---|
| 4381 | + |
|---|
| 4382 | +static inline void balance_switch(struct rq *rq) |
|---|
| 4383 | +{ |
|---|
| 4384 | + if (likely(!rq->balance_flags)) |
|---|
| 4385 | + return; |
|---|
| 4386 | + |
|---|
| 4387 | + if (rq->balance_flags & BALANCE_PUSH) { |
|---|
| 4388 | + balance_push(rq); |
|---|
| 4389 | + return; |
|---|
| 4390 | + } |
|---|
| 4391 | + |
|---|
| 4392 | + __balance_callbacks(rq); |
|---|
| 4393 | +} |
|---|
| 4394 | + |
|---|
| 4395 | +#else |
|---|
| 4396 | + |
|---|
| 4397 | +static inline void __balance_callbacks(struct rq *rq) |
|---|
| 4398 | +{ |
|---|
| 4399 | +} |
|---|
| 4400 | + |
|---|
| 4401 | +static inline struct callback_head *splice_balance_callbacks(struct rq *rq) |
|---|
| 4402 | +{ |
|---|
| 4403 | + return NULL; |
|---|
| 4404 | +} |
|---|
| 4405 | + |
|---|
| 4406 | +static inline void balance_callbacks(struct rq *rq, struct callback_head *head) |
|---|
| 4407 | +{ |
|---|
| 4408 | +} |
|---|
| 4409 | + |
|---|
| 4410 | +static inline void balance_switch(struct rq *rq) |
|---|
| 4411 | +{ |
|---|
| 4412 | +} |
|---|
| 4413 | + |
|---|
| 4414 | +#endif |
|---|
| 4415 | + |
|---|
| 3237 | 4416 | static inline void |
|---|
| 3238 | 4417 | prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf) |
|---|
| 3239 | 4418 | { |
|---|
| .. | .. |
|---|
| 3244 | 4423 | * do an early lockdep release here: |
|---|
| 3245 | 4424 | */ |
|---|
| 3246 | 4425 | rq_unpin_lock(rq, rf); |
|---|
| 3247 | | - spin_release(&rq->lock.dep_map, 1, _THIS_IP_); |
|---|
| 4426 | + spin_release(&rq->lock.dep_map, _THIS_IP_); |
|---|
| 3248 | 4427 | #ifdef CONFIG_DEBUG_SPINLOCK |
|---|
| 3249 | 4428 | /* this is a valid case when another task releases the spinlock */ |
|---|
| 3250 | 4429 | rq->lock.owner = next; |
|---|
| .. | .. |
|---|
| 3259 | 4438 | * prev into current: |
|---|
| 3260 | 4439 | */ |
|---|
| 3261 | 4440 | spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); |
|---|
| 4441 | + balance_switch(rq); |
|---|
| 3262 | 4442 | raw_spin_unlock_irq(&rq->lock); |
|---|
| 3263 | 4443 | } |
|---|
| 3264 | 4444 | |
|---|
| .. | .. |
|---|
| 3273 | 4453 | #ifndef finish_arch_post_lock_switch |
|---|
| 3274 | 4454 | # define finish_arch_post_lock_switch() do { } while (0) |
|---|
| 3275 | 4455 | #endif |
|---|
| 4456 | + |
|---|
| 4457 | +static inline void kmap_local_sched_out(void) |
|---|
| 4458 | +{ |
|---|
| 4459 | +#ifdef CONFIG_KMAP_LOCAL |
|---|
| 4460 | + if (unlikely(current->kmap_ctrl.idx)) |
|---|
| 4461 | + __kmap_local_sched_out(); |
|---|
| 4462 | +#endif |
|---|
| 4463 | +} |
|---|
| 4464 | + |
|---|
| 4465 | +static inline void kmap_local_sched_in(void) |
|---|
| 4466 | +{ |
|---|
| 4467 | +#ifdef CONFIG_KMAP_LOCAL |
|---|
| 4468 | + if (unlikely(current->kmap_ctrl.idx)) |
|---|
| 4469 | + __kmap_local_sched_in(); |
|---|
| 4470 | +#endif |
|---|
| 4471 | +} |
|---|
| 3276 | 4472 | |
|---|
| 3277 | 4473 | /** |
|---|
| 3278 | 4474 | * prepare_task_switch - prepare to switch tasks |
|---|
| .. | .. |
|---|
| 3296 | 4492 | perf_event_task_sched_out(prev, next); |
|---|
| 3297 | 4493 | rseq_preempt(prev); |
|---|
| 3298 | 4494 | fire_sched_out_preempt_notifiers(prev, next); |
|---|
| 4495 | + kmap_local_sched_out(); |
|---|
| 3299 | 4496 | prepare_task(next); |
|---|
| 3300 | 4497 | prepare_arch_switch(next); |
|---|
| 3301 | 4498 | } |
|---|
| .. | .. |
|---|
| 3362 | 4559 | finish_lock_switch(rq); |
|---|
| 3363 | 4560 | finish_arch_post_lock_switch(); |
|---|
| 3364 | 4561 | kcov_finish_switch(current); |
|---|
| 4562 | + kmap_local_sched_in(); |
|---|
| 3365 | 4563 | |
|---|
| 3366 | 4564 | fire_sched_in_preempt_notifiers(current); |
|---|
| 3367 | 4565 | /* |
|---|
| .. | .. |
|---|
| 3388 | 4586 | if (prev->sched_class->task_dead) |
|---|
| 3389 | 4587 | prev->sched_class->task_dead(prev); |
|---|
| 3390 | 4588 | |
|---|
| 3391 | | - put_task_struct(prev); |
|---|
| 4589 | + put_task_struct_rcu_user(prev); |
|---|
| 3392 | 4590 | } |
|---|
| 3393 | 4591 | |
|---|
| 3394 | 4592 | tick_nohz_task_switch(); |
|---|
| 3395 | 4593 | return rq; |
|---|
| 3396 | 4594 | } |
|---|
| 3397 | | - |
|---|
| 3398 | | -#ifdef CONFIG_SMP |
|---|
| 3399 | | - |
|---|
| 3400 | | -/* rq->lock is NOT held, but preemption is disabled */ |
|---|
| 3401 | | -static void __balance_callback(struct rq *rq) |
|---|
| 3402 | | -{ |
|---|
| 3403 | | - struct callback_head *head, *next; |
|---|
| 3404 | | - void (*func)(struct rq *rq); |
|---|
| 3405 | | - unsigned long flags; |
|---|
| 3406 | | - |
|---|
| 3407 | | - raw_spin_lock_irqsave(&rq->lock, flags); |
|---|
| 3408 | | - head = rq->balance_callback; |
|---|
| 3409 | | - rq->balance_callback = NULL; |
|---|
| 3410 | | - while (head) { |
|---|
| 3411 | | - func = (void (*)(struct rq *))head->func; |
|---|
| 3412 | | - next = head->next; |
|---|
| 3413 | | - head->next = NULL; |
|---|
| 3414 | | - head = next; |
|---|
| 3415 | | - |
|---|
| 3416 | | - func(rq); |
|---|
| 3417 | | - } |
|---|
| 3418 | | - raw_spin_unlock_irqrestore(&rq->lock, flags); |
|---|
| 3419 | | -} |
|---|
| 3420 | | - |
|---|
| 3421 | | -static inline void balance_callback(struct rq *rq) |
|---|
| 3422 | | -{ |
|---|
| 3423 | | - if (unlikely(rq->balance_callback)) |
|---|
| 3424 | | - __balance_callback(rq); |
|---|
| 3425 | | -} |
|---|
| 3426 | | - |
|---|
| 3427 | | -#else |
|---|
| 3428 | | - |
|---|
| 3429 | | -static inline void balance_callback(struct rq *rq) |
|---|
| 3430 | | -{ |
|---|
| 3431 | | -} |
|---|
| 3432 | | - |
|---|
| 3433 | | -#endif |
|---|
| 3434 | 4595 | |
|---|
| 3435 | 4596 | /** |
|---|
| 3436 | 4597 | * schedule_tail - first thing a freshly forked thread must call. |
|---|
| .. | .. |
|---|
| 3451 | 4612 | */ |
|---|
| 3452 | 4613 | |
|---|
| 3453 | 4614 | rq = finish_task_switch(prev); |
|---|
| 3454 | | - balance_callback(rq); |
|---|
| 3455 | 4615 | preempt_enable(); |
|---|
| 3456 | 4616 | |
|---|
| 3457 | 4617 | if (current->set_child_tid) |
|---|
| .. | .. |
|---|
| 3467 | 4627 | context_switch(struct rq *rq, struct task_struct *prev, |
|---|
| 3468 | 4628 | struct task_struct *next, struct rq_flags *rf) |
|---|
| 3469 | 4629 | { |
|---|
| 3470 | | - struct mm_struct *mm, *oldmm; |
|---|
| 3471 | | - |
|---|
| 3472 | 4630 | prepare_task_switch(rq, prev, next); |
|---|
| 3473 | 4631 | |
|---|
| 3474 | | - mm = next->mm; |
|---|
| 3475 | | - oldmm = prev->active_mm; |
|---|
| 3476 | 4632 | /* |
|---|
| 3477 | 4633 | * For paravirt, this is coupled with an exit in switch_to to |
|---|
| 3478 | 4634 | * combine the page table reload and the switch backend into |
|---|
| .. | .. |
|---|
| 3481 | 4637 | arch_start_context_switch(prev); |
|---|
| 3482 | 4638 | |
|---|
| 3483 | 4639 | /* |
|---|
| 3484 | | - * If mm is non-NULL, we pass through switch_mm(). If mm is |
|---|
| 3485 | | - * NULL, we will pass through mmdrop() in finish_task_switch(). |
|---|
| 3486 | | - * Both of these contain the full memory barrier required by |
|---|
| 3487 | | - * membarrier after storing to rq->curr, before returning to |
|---|
| 3488 | | - * user-space. |
|---|
| 4640 | + * kernel -> kernel lazy + transfer active |
|---|
| 4641 | + * user -> kernel lazy + mmgrab() active |
|---|
| 4642 | + * |
|---|
| 4643 | + * kernel -> user switch + mmdrop() active |
|---|
| 4644 | + * user -> user switch |
|---|
| 3489 | 4645 | */ |
|---|
| 3490 | | - if (!mm) { |
|---|
| 3491 | | - next->active_mm = oldmm; |
|---|
| 3492 | | - mmgrab(oldmm); |
|---|
| 3493 | | - enter_lazy_tlb(oldmm, next); |
|---|
| 3494 | | - } else |
|---|
| 3495 | | - switch_mm_irqs_off(oldmm, mm, next); |
|---|
| 4646 | + if (!next->mm) { // to kernel |
|---|
| 4647 | + enter_lazy_tlb(prev->active_mm, next); |
|---|
| 3496 | 4648 | |
|---|
| 3497 | | - if (!prev->mm) { |
|---|
| 3498 | | - prev->active_mm = NULL; |
|---|
| 3499 | | - rq->prev_mm = oldmm; |
|---|
| 4649 | + next->active_mm = prev->active_mm; |
|---|
| 4650 | + if (prev->mm) // from user |
|---|
| 4651 | + mmgrab(prev->active_mm); |
|---|
| 4652 | + else |
|---|
| 4653 | + prev->active_mm = NULL; |
|---|
| 4654 | + } else { // to user |
|---|
| 4655 | + membarrier_switch_mm(rq, prev->active_mm, next->mm); |
|---|
| 4656 | + /* |
|---|
| 4657 | + * sys_membarrier() requires an smp_mb() between setting |
|---|
| 4658 | + * rq->curr / membarrier_switch_mm() and returning to userspace. |
|---|
| 4659 | + * |
|---|
| 4660 | + * The below provides this either through switch_mm(), or in |
|---|
| 4661 | + * case 'prev->active_mm == next->mm' through |
|---|
| 4662 | + * finish_task_switch()'s mmdrop(). |
|---|
| 4663 | + */ |
|---|
| 4664 | + switch_mm_irqs_off(prev->active_mm, next->mm, next); |
|---|
| 4665 | + |
|---|
| 4666 | + if (!prev->mm) { // from kernel |
|---|
| 4667 | + /* will mmdrop() in finish_task_switch(). */ |
|---|
| 4668 | + rq->prev_mm = prev->active_mm; |
|---|
| 4669 | + prev->active_mm = NULL; |
|---|
| 4670 | + } |
|---|
| 3500 | 4671 | } |
|---|
| 3501 | 4672 | |
|---|
| 3502 | 4673 | rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); |
|---|
| .. | .. |
|---|
| 3533 | 4704 | * preemption, thus the result might have a time-of-check-to-time-of-use |
|---|
| 3534 | 4705 | * race. The caller is responsible to use it correctly, for example: |
|---|
| 3535 | 4706 | * |
|---|
| 3536 | | - * - from a non-preemptable section (of course) |
|---|
| 4707 | + * - from a non-preemptible section (of course) |
|---|
| 3537 | 4708 | * |
|---|
| 3538 | 4709 | * - from a thread that is bound to a single CPU |
|---|
| 3539 | 4710 | * |
|---|
| .. | .. |
|---|
| 3554 | 4725 | sum += cpu_rq(i)->nr_switches; |
|---|
| 3555 | 4726 | |
|---|
| 3556 | 4727 | return sum; |
|---|
| 4728 | +} |
|---|
| 4729 | + |
|---|
| 4730 | +/* |
|---|
| 4731 | + * Consumers of these two interfaces, like for example the cpuidle menu |
|---|
| 4732 | + * governor, are using nonsensical data. Preferring shallow idle state selection |
|---|
| 4733 | + * for a CPU that has IO-wait which might not even end up running the task when |
|---|
| 4734 | + * it does become runnable. |
|---|
| 4735 | + */ |
|---|
| 4736 | + |
|---|
| 4737 | +unsigned long nr_iowait_cpu(int cpu) |
|---|
| 4738 | +{ |
|---|
| 4739 | + return atomic_read(&cpu_rq(cpu)->nr_iowait); |
|---|
| 3557 | 4740 | } |
|---|
| 3558 | 4741 | |
|---|
| 3559 | 4742 | /* |
|---|
| .. | .. |
|---|
| 3591 | 4774 | unsigned long i, sum = 0; |
|---|
| 3592 | 4775 | |
|---|
| 3593 | 4776 | for_each_possible_cpu(i) |
|---|
| 3594 | | - sum += atomic_read(&cpu_rq(i)->nr_iowait); |
|---|
| 4777 | + sum += nr_iowait_cpu(i); |
|---|
| 3595 | 4778 | |
|---|
| 3596 | 4779 | return sum; |
|---|
| 3597 | | -} |
|---|
| 3598 | | - |
|---|
| 3599 | | -/* |
|---|
| 3600 | | - * Consumers of these two interfaces, like for example the cpufreq menu |
|---|
| 3601 | | - * governor are using nonsensical data. Boosting frequency for a CPU that has |
|---|
| 3602 | | - * IO-wait which might not even end up running the task when it does become |
|---|
| 3603 | | - * runnable. |
|---|
| 3604 | | - */ |
|---|
| 3605 | | - |
|---|
| 3606 | | -unsigned long nr_iowait_cpu(int cpu) |
|---|
| 3607 | | -{ |
|---|
| 3608 | | - struct rq *this = cpu_rq(cpu); |
|---|
| 3609 | | - return atomic_read(&this->nr_iowait); |
|---|
| 3610 | | -} |
|---|
| 3611 | | - |
|---|
| 3612 | | -void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) |
|---|
| 3613 | | -{ |
|---|
| 3614 | | - struct rq *rq = this_rq(); |
|---|
| 3615 | | - *nr_waiters = atomic_read(&rq->nr_iowait); |
|---|
| 3616 | | - *load = rq->load.weight; |
|---|
| 3617 | 4780 | } |
|---|
| 3618 | 4781 | |
|---|
| 3619 | 4782 | #ifdef CONFIG_SMP |
|---|
| .. | .. |
|---|
| 3627 | 4790 | struct task_struct *p = current; |
|---|
| 3628 | 4791 | unsigned long flags; |
|---|
| 3629 | 4792 | int dest_cpu; |
|---|
| 4793 | + bool cond = false; |
|---|
| 4794 | + |
|---|
| 4795 | + trace_android_rvh_sched_exec(&cond); |
|---|
| 4796 | + if (cond) |
|---|
| 4797 | + return; |
|---|
| 3630 | 4798 | |
|---|
| 3631 | 4799 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
|---|
| 3632 | | - dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0, 1); |
|---|
| 4800 | + dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0); |
|---|
| 3633 | 4801 | if (dest_cpu == smp_processor_id()) |
|---|
| 3634 | 4802 | goto unlock; |
|---|
| 3635 | 4803 | |
|---|
| .. | .. |
|---|
| 3712 | 4880 | |
|---|
| 3713 | 4881 | return ns; |
|---|
| 3714 | 4882 | } |
|---|
| 4883 | +EXPORT_SYMBOL_GPL(task_sched_runtime); |
|---|
| 3715 | 4884 | |
|---|
| 3716 | 4885 | /* |
|---|
| 3717 | 4886 | * This function gets called by the timer code, with HZ frequency. |
|---|
| .. | .. |
|---|
| 3723 | 4892 | struct rq *rq = cpu_rq(cpu); |
|---|
| 3724 | 4893 | struct task_struct *curr = rq->curr; |
|---|
| 3725 | 4894 | struct rq_flags rf; |
|---|
| 4895 | + unsigned long thermal_pressure; |
|---|
| 3726 | 4896 | |
|---|
| 4897 | + arch_scale_freq_tick(); |
|---|
| 3727 | 4898 | sched_clock_tick(); |
|---|
| 3728 | 4899 | |
|---|
| 3729 | 4900 | rq_lock(rq, &rf); |
|---|
| 3730 | 4901 | |
|---|
| 4902 | + trace_android_rvh_tick_entry(rq); |
|---|
| 3731 | 4903 | update_rq_clock(rq); |
|---|
| 4904 | + thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq)); |
|---|
| 4905 | + update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure); |
|---|
| 3732 | 4906 | curr->sched_class->task_tick(rq, curr, 0); |
|---|
| 3733 | | - cpu_load_update_active(rq); |
|---|
| 3734 | 4907 | calc_global_load_tick(rq); |
|---|
| 3735 | 4908 | psi_task_tick(rq); |
|---|
| 3736 | 4909 | |
|---|
| .. | .. |
|---|
| 3742 | 4915 | rq->idle_balance = idle_cpu(cpu); |
|---|
| 3743 | 4916 | trigger_load_balance(rq); |
|---|
| 3744 | 4917 | #endif |
|---|
| 4918 | + |
|---|
| 4919 | + trace_android_vh_scheduler_tick(rq); |
|---|
| 3745 | 4920 | } |
|---|
| 3746 | 4921 | |
|---|
| 3747 | 4922 | #ifdef CONFIG_NO_HZ_FULL |
|---|
| .. | .. |
|---|
| 3799 | 4974 | * statistics and checks timeslices in a time-independent way, regardless |
|---|
| 3800 | 4975 | * of when exactly it is running. |
|---|
| 3801 | 4976 | */ |
|---|
| 3802 | | - if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu)) |
|---|
| 4977 | + if (!tick_nohz_tick_stopped_cpu(cpu)) |
|---|
| 3803 | 4978 | goto out_requeue; |
|---|
| 3804 | 4979 | |
|---|
| 3805 | 4980 | rq_lock_irq(rq, &rf); |
|---|
| 3806 | 4981 | curr = rq->curr; |
|---|
| 3807 | | - if (is_idle_task(curr) || cpu_is_offline(cpu)) |
|---|
| 4982 | + if (cpu_is_offline(cpu)) |
|---|
| 3808 | 4983 | goto out_unlock; |
|---|
| 3809 | 4984 | |
|---|
| 3810 | 4985 | update_rq_clock(rq); |
|---|
| 3811 | | - delta = rq_clock_task(rq) - curr->se.exec_start; |
|---|
| 3812 | 4986 | |
|---|
| 3813 | | - /* |
|---|
| 3814 | | - * Make sure the next tick runs within a reasonable |
|---|
| 3815 | | - * amount of time. |
|---|
| 3816 | | - */ |
|---|
| 3817 | | - WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); |
|---|
| 4987 | + if (!is_idle_task(curr)) { |
|---|
| 4988 | + /* |
|---|
| 4989 | + * Make sure the next tick runs within a reasonable |
|---|
| 4990 | + * amount of time. |
|---|
| 4991 | + */ |
|---|
| 4992 | + delta = rq_clock_task(rq) - curr->se.exec_start; |
|---|
| 4993 | + WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); |
|---|
| 4994 | + } |
|---|
| 3818 | 4995 | curr->sched_class->task_tick(rq, curr, 0); |
|---|
| 3819 | 4996 | |
|---|
| 4997 | + calc_load_nohz_remote(rq); |
|---|
| 3820 | 4998 | out_unlock: |
|---|
| 3821 | 4999 | rq_unlock_irq(rq, &rf); |
|---|
| 3822 | | - |
|---|
| 3823 | 5000 | out_requeue: |
|---|
| 5001 | + |
|---|
| 3824 | 5002 | /* |
|---|
| 3825 | 5003 | * Run the remote tick once per second (1Hz). This arbitrary |
|---|
| 3826 | 5004 | * frequency is large enough to avoid overload but short enough |
|---|
| .. | .. |
|---|
| 3884 | 5062 | static inline void sched_tick_stop(int cpu) { } |
|---|
| 3885 | 5063 | #endif |
|---|
| 3886 | 5064 | |
|---|
| 3887 | | -#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ |
|---|
| 5065 | +#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \ |
|---|
| 3888 | 5066 | defined(CONFIG_TRACE_PREEMPT_TOGGLE)) |
|---|
| 3889 | 5067 | /* |
|---|
| 3890 | 5068 | * If the value passed in is equal to the current preempt count |
|---|
| .. | .. |
|---|
| 3990 | 5168 | if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) |
|---|
| 3991 | 5169 | && in_atomic_preempt_off()) { |
|---|
| 3992 | 5170 | pr_err("Preemption disabled at:"); |
|---|
| 3993 | | - print_ip_sym(preempt_disable_ip); |
|---|
| 3994 | | - pr_cont("\n"); |
|---|
| 5171 | + print_ip_sym(KERN_ERR, preempt_disable_ip); |
|---|
| 3995 | 5172 | } |
|---|
| 3996 | 5173 | if (panic_on_warn) |
|---|
| 3997 | 5174 | panic("scheduling while atomic\n"); |
|---|
| 5175 | + |
|---|
| 5176 | + trace_android_rvh_schedule_bug(prev); |
|---|
| 3998 | 5177 | |
|---|
| 3999 | 5178 | dump_stack(); |
|---|
| 4000 | 5179 | add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
|---|
| .. | .. |
|---|
| 4003 | 5182 | /* |
|---|
| 4004 | 5183 | * Various schedule()-time debugging checks and statistics: |
|---|
| 4005 | 5184 | */ |
|---|
| 4006 | | -static inline void schedule_debug(struct task_struct *prev) |
|---|
| 5185 | +static inline void schedule_debug(struct task_struct *prev, bool preempt) |
|---|
| 4007 | 5186 | { |
|---|
| 4008 | 5187 | #ifdef CONFIG_SCHED_STACK_END_CHECK |
|---|
| 4009 | 5188 | if (task_stack_end_corrupted(prev)) |
|---|
| 4010 | 5189 | panic("corrupted stack end detected inside scheduler\n"); |
|---|
| 5190 | + |
|---|
| 5191 | + if (task_scs_end_corrupted(prev)) |
|---|
| 5192 | + panic("corrupted shadow stack detected inside scheduler\n"); |
|---|
| 5193 | +#endif |
|---|
| 5194 | + |
|---|
| 5195 | +#ifdef CONFIG_DEBUG_ATOMIC_SLEEP |
|---|
| 5196 | + if (!preempt && prev->state && prev->non_block_count) { |
|---|
| 5197 | + printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n", |
|---|
| 5198 | + prev->comm, prev->pid, prev->non_block_count); |
|---|
| 5199 | + dump_stack(); |
|---|
| 5200 | + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
|---|
| 5201 | + } |
|---|
| 4011 | 5202 | #endif |
|---|
| 4012 | 5203 | |
|---|
| 4013 | 5204 | if (unlikely(in_atomic_preempt_off())) { |
|---|
| .. | .. |
|---|
| 4019 | 5210 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); |
|---|
| 4020 | 5211 | |
|---|
| 4021 | 5212 | schedstat_inc(this_rq()->sched_count); |
|---|
| 5213 | +} |
|---|
| 5214 | + |
|---|
| 5215 | +static void put_prev_task_balance(struct rq *rq, struct task_struct *prev, |
|---|
| 5216 | + struct rq_flags *rf) |
|---|
| 5217 | +{ |
|---|
| 5218 | +#ifdef CONFIG_SMP |
|---|
| 5219 | + const struct sched_class *class; |
|---|
| 5220 | + /* |
|---|
| 5221 | + * We must do the balancing pass before put_prev_task(), such |
|---|
| 5222 | + * that when we release the rq->lock the task is in the same |
|---|
| 5223 | + * state as before we took rq->lock. |
|---|
| 5224 | + * |
|---|
| 5225 | + * We can terminate the balance pass as soon as we know there is |
|---|
| 5226 | + * a runnable task of @class priority or higher. |
|---|
| 5227 | + */ |
|---|
| 5228 | + for_class_range(class, prev->sched_class, &idle_sched_class) { |
|---|
| 5229 | + if (class->balance(rq, prev, rf)) |
|---|
| 5230 | + break; |
|---|
| 5231 | + } |
|---|
| 5232 | +#endif |
|---|
| 5233 | + |
|---|
| 5234 | + put_prev_task(rq, prev); |
|---|
| 4022 | 5235 | } |
|---|
| 4023 | 5236 | |
|---|
| 4024 | 5237 | /* |
|---|
| .. | .. |
|---|
| 4036 | 5249 | * higher scheduling class, because otherwise those loose the |
|---|
| 4037 | 5250 | * opportunity to pull in more work from other CPUs. |
|---|
| 4038 | 5251 | */ |
|---|
| 4039 | | - if (likely((prev->sched_class == &idle_sched_class || |
|---|
| 4040 | | - prev->sched_class == &fair_sched_class) && |
|---|
| 5252 | + if (likely(prev->sched_class <= &fair_sched_class && |
|---|
| 4041 | 5253 | rq->nr_running == rq->cfs.h_nr_running)) { |
|---|
| 4042 | 5254 | |
|---|
| 4043 | | - p = fair_sched_class.pick_next_task(rq, prev, rf); |
|---|
| 5255 | + p = pick_next_task_fair(rq, prev, rf); |
|---|
| 4044 | 5256 | if (unlikely(p == RETRY_TASK)) |
|---|
| 4045 | | - goto again; |
|---|
| 5257 | + goto restart; |
|---|
| 4046 | 5258 | |
|---|
| 4047 | 5259 | /* Assumes fair_sched_class->next == idle_sched_class */ |
|---|
| 4048 | | - if (unlikely(!p)) |
|---|
| 4049 | | - p = idle_sched_class.pick_next_task(rq, prev, rf); |
|---|
| 5260 | + if (!p) { |
|---|
| 5261 | + put_prev_task(rq, prev); |
|---|
| 5262 | + p = pick_next_task_idle(rq); |
|---|
| 5263 | + } |
|---|
| 4050 | 5264 | |
|---|
| 4051 | 5265 | return p; |
|---|
| 4052 | 5266 | } |
|---|
| 4053 | 5267 | |
|---|
| 4054 | | -again: |
|---|
| 5268 | +restart: |
|---|
| 5269 | + put_prev_task_balance(rq, prev, rf); |
|---|
| 5270 | + |
|---|
| 4055 | 5271 | for_each_class(class) { |
|---|
| 4056 | | - p = class->pick_next_task(rq, prev, rf); |
|---|
| 4057 | | - if (p) { |
|---|
| 4058 | | - if (unlikely(p == RETRY_TASK)) |
|---|
| 4059 | | - goto again; |
|---|
| 5272 | + p = class->pick_next_task(rq); |
|---|
| 5273 | + if (p) |
|---|
| 4060 | 5274 | return p; |
|---|
| 4061 | | - } |
|---|
| 4062 | 5275 | } |
|---|
| 4063 | 5276 | |
|---|
| 4064 | 5277 | /* The idle class should always have a runnable task: */ |
|---|
| 4065 | 5278 | BUG(); |
|---|
| 4066 | 5279 | } |
|---|
| 4067 | | - |
|---|
| 4068 | | -static void migrate_disabled_sched(struct task_struct *p); |
|---|
| 4069 | 5280 | |
|---|
| 4070 | 5281 | /* |
|---|
| 4071 | 5282 | * __schedule() is the main scheduler function. |
|---|
| .. | .. |
|---|
| 4087 | 5298 | * task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets |
|---|
| 4088 | 5299 | * called on the nearest possible occasion: |
|---|
| 4089 | 5300 | * |
|---|
| 4090 | | - * - If the kernel is preemptible (CONFIG_PREEMPT=y): |
|---|
| 5301 | + * - If the kernel is preemptible (CONFIG_PREEMPTION=y): |
|---|
| 4091 | 5302 | * |
|---|
| 4092 | 5303 | * - in syscall or exception context, at the next outmost |
|---|
| 4093 | 5304 | * preempt_enable(). (this might be as soon as the wake_up()'s |
|---|
| .. | .. |
|---|
| 4096 | 5307 | * - in IRQ context, return from interrupt-handler to |
|---|
| 4097 | 5308 | * preemptible context |
|---|
| 4098 | 5309 | * |
|---|
| 4099 | | - * - If the kernel is not preemptible (CONFIG_PREEMPT is not set) |
|---|
| 5310 | + * - If the kernel is not preemptible (CONFIG_PREEMPTION is not set) |
|---|
| 4100 | 5311 | * then at the next: |
|---|
| 4101 | 5312 | * |
|---|
| 4102 | 5313 | * - cond_resched() call |
|---|
| .. | .. |
|---|
| 4106 | 5317 | * |
|---|
| 4107 | 5318 | * WARNING: must be called with preemption disabled! |
|---|
| 4108 | 5319 | */ |
|---|
| 4109 | | -static void __sched notrace __schedule(bool preempt) |
|---|
| 5320 | +static void __sched notrace __schedule(bool preempt, bool spinning_lock) |
|---|
| 4110 | 5321 | { |
|---|
| 4111 | 5322 | struct task_struct *prev, *next; |
|---|
| 4112 | 5323 | unsigned long *switch_count; |
|---|
| 5324 | + unsigned long prev_state; |
|---|
| 4113 | 5325 | struct rq_flags rf; |
|---|
| 4114 | 5326 | struct rq *rq; |
|---|
| 4115 | 5327 | int cpu; |
|---|
| .. | .. |
|---|
| 4118 | 5330 | rq = cpu_rq(cpu); |
|---|
| 4119 | 5331 | prev = rq->curr; |
|---|
| 4120 | 5332 | |
|---|
| 4121 | | - schedule_debug(prev); |
|---|
| 5333 | + schedule_debug(prev, preempt); |
|---|
| 4122 | 5334 | |
|---|
| 4123 | 5335 | if (sched_feat(HRTICK)) |
|---|
| 4124 | 5336 | hrtick_clear(rq); |
|---|
| .. | .. |
|---|
| 4129 | 5341 | /* |
|---|
| 4130 | 5342 | * Make sure that signal_pending_state()->signal_pending() below |
|---|
| 4131 | 5343 | * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) |
|---|
| 4132 | | - * done by the caller to avoid the race with signal_wake_up(). |
|---|
| 5344 | + * done by the caller to avoid the race with signal_wake_up(): |
|---|
| 4133 | 5345 | * |
|---|
| 4134 | | - * The membarrier system call requires a full memory barrier |
|---|
| 5346 | + * __set_current_state(@state) signal_wake_up() |
|---|
| 5347 | + * schedule() set_tsk_thread_flag(p, TIF_SIGPENDING) |
|---|
| 5348 | + * wake_up_state(p, state) |
|---|
| 5349 | + * LOCK rq->lock LOCK p->pi_state |
|---|
| 5350 | + * smp_mb__after_spinlock() smp_mb__after_spinlock() |
|---|
| 5351 | + * if (signal_pending_state()) if (p->state & @state) |
|---|
| 5352 | + * |
|---|
| 5353 | + * Also, the membarrier system call requires a full memory barrier |
|---|
| 4135 | 5354 | * after coming from user-space, before storing to rq->curr. |
|---|
| 4136 | 5355 | */ |
|---|
| 4137 | 5356 | rq_lock(rq, &rf); |
|---|
| 4138 | 5357 | smp_mb__after_spinlock(); |
|---|
| 4139 | | - |
|---|
| 4140 | | - if (__migrate_disabled(prev)) |
|---|
| 4141 | | - migrate_disabled_sched(prev); |
|---|
| 4142 | 5358 | |
|---|
| 4143 | 5359 | /* Promote REQ to ACT */ |
|---|
| 4144 | 5360 | rq->clock_update_flags <<= 1; |
|---|
| 4145 | 5361 | update_rq_clock(rq); |
|---|
| 4146 | 5362 | |
|---|
| 4147 | 5363 | switch_count = &prev->nivcsw; |
|---|
| 4148 | | - if (!preempt && prev->state) { |
|---|
| 4149 | | - if (unlikely(signal_pending_state(prev->state, prev))) { |
|---|
| 5364 | + |
|---|
| 5365 | + /* |
|---|
| 5366 | + * We must load prev->state once (task_struct::state is volatile), such |
|---|
| 5367 | + * that: |
|---|
| 5368 | + * |
|---|
| 5369 | + * - we form a control dependency vs deactivate_task() below. |
|---|
| 5370 | + * - ptrace_{,un}freeze_traced() can change ->state underneath us. |
|---|
| 5371 | + */ |
|---|
| 5372 | + prev_state = prev->state; |
|---|
| 5373 | + if ((!preempt || spinning_lock) && prev_state) { |
|---|
| 5374 | + if (signal_pending_state(prev_state, prev)) { |
|---|
| 4150 | 5375 | prev->state = TASK_RUNNING; |
|---|
| 4151 | 5376 | } else { |
|---|
| 5377 | + prev->sched_contributes_to_load = |
|---|
| 5378 | + (prev_state & TASK_UNINTERRUPTIBLE) && |
|---|
| 5379 | + !(prev_state & TASK_NOLOAD) && |
|---|
| 5380 | + !(prev->flags & PF_FROZEN); |
|---|
| 5381 | + |
|---|
| 5382 | + if (prev->sched_contributes_to_load) |
|---|
| 5383 | + rq->nr_uninterruptible++; |
|---|
| 5384 | + |
|---|
| 5385 | + /* |
|---|
| 5386 | + * __schedule() ttwu() |
|---|
| 5387 | + * prev_state = prev->state; if (p->on_rq && ...) |
|---|
| 5388 | + * if (prev_state) goto out; |
|---|
| 5389 | + * p->on_rq = 0; smp_acquire__after_ctrl_dep(); |
|---|
| 5390 | + * p->state = TASK_WAKING |
|---|
| 5391 | + * |
|---|
| 5392 | + * Where __schedule() and ttwu() have matching control dependencies. |
|---|
| 5393 | + * |
|---|
| 5394 | + * After this, schedule() must not care about p->state any more. |
|---|
| 5395 | + */ |
|---|
| 4152 | 5396 | deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK); |
|---|
| 4153 | | - prev->on_rq = 0; |
|---|
| 4154 | 5397 | |
|---|
| 4155 | 5398 | if (prev->in_iowait) { |
|---|
| 4156 | 5399 | atomic_inc(&rq->nr_iowait); |
|---|
| .. | .. |
|---|
| 4165 | 5408 | clear_tsk_need_resched_lazy(prev); |
|---|
| 4166 | 5409 | clear_preempt_need_resched(); |
|---|
| 4167 | 5410 | |
|---|
| 5411 | + trace_android_rvh_schedule(prev, next, rq); |
|---|
| 4168 | 5412 | if (likely(prev != next)) { |
|---|
| 4169 | 5413 | rq->nr_switches++; |
|---|
| 4170 | | - rq->curr = next; |
|---|
| 5414 | + /* |
|---|
| 5415 | + * RCU users of rcu_dereference(rq->curr) may not see |
|---|
| 5416 | + * changes to task_struct made by pick_next_task(). |
|---|
| 5417 | + */ |
|---|
| 5418 | + RCU_INIT_POINTER(rq->curr, next); |
|---|
| 4171 | 5419 | /* |
|---|
| 4172 | 5420 | * The membarrier system call requires each architecture |
|---|
| 4173 | 5421 | * to have a full memory barrier after updating |
|---|
| .. | .. |
|---|
| 4184 | 5432 | */ |
|---|
| 4185 | 5433 | ++*switch_count; |
|---|
| 4186 | 5434 | |
|---|
| 5435 | + migrate_disable_switch(rq, prev); |
|---|
| 5436 | + psi_sched_switch(prev, next, !task_on_rq_queued(prev)); |
|---|
| 5437 | + |
|---|
| 4187 | 5438 | trace_sched_switch(preempt, prev, next); |
|---|
| 4188 | 5439 | |
|---|
| 4189 | 5440 | /* Also unlocks the rq: */ |
|---|
| 4190 | 5441 | rq = context_switch(rq, prev, next, &rf); |
|---|
| 4191 | 5442 | } else { |
|---|
| 4192 | 5443 | rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); |
|---|
| 4193 | | - rq_unlock_irq(rq, &rf); |
|---|
| 4194 | | - } |
|---|
| 4195 | 5444 | |
|---|
| 4196 | | - balance_callback(rq); |
|---|
| 5445 | + rq_unpin_lock(rq, &rf); |
|---|
| 5446 | + __balance_callbacks(rq); |
|---|
| 5447 | + raw_spin_unlock_irq(&rq->lock); |
|---|
| 5448 | + } |
|---|
| 4197 | 5449 | } |
|---|
| 4198 | 5450 | |
|---|
| 4199 | 5451 | void __noreturn do_task_dead(void) |
|---|
| .. | .. |
|---|
| 4204 | 5456 | /* Tell freezer to ignore us: */ |
|---|
| 4205 | 5457 | current->flags |= PF_NOFREEZE; |
|---|
| 4206 | 5458 | |
|---|
| 4207 | | - __schedule(false); |
|---|
| 5459 | + __schedule(false, false); |
|---|
| 4208 | 5460 | BUG(); |
|---|
| 4209 | 5461 | |
|---|
| 4210 | 5462 | /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ |
|---|
| .. | .. |
|---|
| 4214 | 5466 | |
|---|
| 4215 | 5467 | static inline void sched_submit_work(struct task_struct *tsk) |
|---|
| 4216 | 5468 | { |
|---|
| 5469 | + unsigned int task_flags; |
|---|
| 5470 | + |
|---|
| 4217 | 5471 | if (!tsk->state) |
|---|
| 4218 | 5472 | return; |
|---|
| 4219 | 5473 | |
|---|
| 5474 | + task_flags = tsk->flags; |
|---|
| 4220 | 5475 | /* |
|---|
| 4221 | 5476 | * If a worker went to sleep, notify and ask workqueue whether |
|---|
| 4222 | 5477 | * it wants to wake up a task to maintain concurrency. |
|---|
| 4223 | 5478 | * As this function is called inside the schedule() context, |
|---|
| 4224 | 5479 | * we disable preemption to avoid it calling schedule() again |
|---|
| 4225 | | - * in the possible wakeup of a kworker. |
|---|
| 5480 | + * in the possible wakeup of a kworker and because wq_worker_sleeping() |
|---|
| 5481 | + * requires it. |
|---|
| 4226 | 5482 | */ |
|---|
| 4227 | | - if (tsk->flags & PF_WQ_WORKER) { |
|---|
| 5483 | + if (task_flags & (PF_WQ_WORKER | PF_IO_WORKER)) { |
|---|
| 4228 | 5484 | preempt_disable(); |
|---|
| 4229 | | - wq_worker_sleeping(tsk); |
|---|
| 5485 | + if (task_flags & PF_WQ_WORKER) |
|---|
| 5486 | + wq_worker_sleeping(tsk); |
|---|
| 5487 | + else |
|---|
| 5488 | + io_wq_worker_sleeping(tsk); |
|---|
| 4230 | 5489 | preempt_enable_no_resched(); |
|---|
| 4231 | 5490 | } |
|---|
| 4232 | | - |
|---|
| 4233 | | - if (tsk_is_pi_blocked(tsk)) |
|---|
| 4234 | | - return; |
|---|
| 4235 | 5491 | |
|---|
| 4236 | 5492 | /* |
|---|
| 4237 | 5493 | * If we are going to sleep and we have plugged IO queued, |
|---|
| .. | .. |
|---|
| 4243 | 5499 | |
|---|
| 4244 | 5500 | static void sched_update_worker(struct task_struct *tsk) |
|---|
| 4245 | 5501 | { |
|---|
| 4246 | | - if (tsk->flags & PF_WQ_WORKER) |
|---|
| 4247 | | - wq_worker_running(tsk); |
|---|
| 5502 | + if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) { |
|---|
| 5503 | + if (tsk->flags & PF_WQ_WORKER) |
|---|
| 5504 | + wq_worker_running(tsk); |
|---|
| 5505 | + else |
|---|
| 5506 | + io_wq_worker_running(tsk); |
|---|
| 5507 | + } |
|---|
| 4248 | 5508 | } |
|---|
| 4249 | 5509 | |
|---|
| 4250 | 5510 | asmlinkage __visible void __sched schedule(void) |
|---|
| .. | .. |
|---|
| 4254 | 5514 | sched_submit_work(tsk); |
|---|
| 4255 | 5515 | do { |
|---|
| 4256 | 5516 | preempt_disable(); |
|---|
| 4257 | | - __schedule(false); |
|---|
| 5517 | + __schedule(false, false); |
|---|
| 4258 | 5518 | sched_preempt_enable_no_resched(); |
|---|
| 4259 | 5519 | } while (need_resched()); |
|---|
| 4260 | 5520 | sched_update_worker(tsk); |
|---|
| .. | .. |
|---|
| 4282 | 5542 | */ |
|---|
| 4283 | 5543 | WARN_ON_ONCE(current->state); |
|---|
| 4284 | 5544 | do { |
|---|
| 4285 | | - __schedule(false); |
|---|
| 5545 | + __schedule(false, false); |
|---|
| 4286 | 5546 | } while (need_resched()); |
|---|
| 4287 | 5547 | } |
|---|
| 4288 | 5548 | |
|---|
| .. | .. |
|---|
| 4335 | 5595 | */ |
|---|
| 4336 | 5596 | preempt_disable_notrace(); |
|---|
| 4337 | 5597 | preempt_latency_start(1); |
|---|
| 4338 | | - __schedule(true); |
|---|
| 5598 | + __schedule(true, false); |
|---|
| 4339 | 5599 | preempt_latency_stop(1); |
|---|
| 4340 | 5600 | preempt_enable_no_resched_notrace(); |
|---|
| 4341 | 5601 | |
|---|
| .. | .. |
|---|
| 4370 | 5630 | |
|---|
| 4371 | 5631 | #endif |
|---|
| 4372 | 5632 | |
|---|
| 4373 | | -#ifdef CONFIG_PREEMPT |
|---|
| 5633 | +#ifdef CONFIG_PREEMPTION |
|---|
| 4374 | 5634 | /* |
|---|
| 4375 | | - * this is the entry point to schedule() from in-kernel preemption |
|---|
| 4376 | | - * off of preempt_enable. Kernel preemptions off return from interrupt |
|---|
| 4377 | | - * occur there and call schedule directly. |
|---|
| 5635 | + * This is the entry point to schedule() from in-kernel preemption |
|---|
| 5636 | + * off of preempt_enable. |
|---|
| 4378 | 5637 | */ |
|---|
| 4379 | 5638 | asmlinkage __visible void __sched notrace preempt_schedule(void) |
|---|
| 4380 | 5639 | { |
|---|
| .. | .. |
|---|
| 4390 | 5649 | } |
|---|
| 4391 | 5650 | NOKPROBE_SYMBOL(preempt_schedule); |
|---|
| 4392 | 5651 | EXPORT_SYMBOL(preempt_schedule); |
|---|
| 5652 | + |
|---|
| 5653 | +#ifdef CONFIG_PREEMPT_RT |
|---|
| 5654 | +void __sched notrace preempt_schedule_lock(void) |
|---|
| 5655 | +{ |
|---|
| 5656 | + do { |
|---|
| 5657 | + preempt_disable(); |
|---|
| 5658 | + __schedule(true, true); |
|---|
| 5659 | + sched_preempt_enable_no_resched(); |
|---|
| 5660 | + } while (need_resched()); |
|---|
| 5661 | +} |
|---|
| 5662 | +NOKPROBE_SYMBOL(preempt_schedule_lock); |
|---|
| 5663 | +EXPORT_SYMBOL(preempt_schedule_lock); |
|---|
| 5664 | +#endif |
|---|
| 4393 | 5665 | |
|---|
| 4394 | 5666 | /** |
|---|
| 4395 | 5667 | * preempt_schedule_notrace - preempt_schedule called by tracing |
|---|
| .. | .. |
|---|
| 4437 | 5709 | * an infinite recursion. |
|---|
| 4438 | 5710 | */ |
|---|
| 4439 | 5711 | prev_ctx = exception_enter(); |
|---|
| 4440 | | - __schedule(true); |
|---|
| 5712 | + __schedule(true, false); |
|---|
| 4441 | 5713 | exception_exit(prev_ctx); |
|---|
| 4442 | 5714 | |
|---|
| 4443 | 5715 | preempt_latency_stop(1); |
|---|
| .. | .. |
|---|
| 4446 | 5718 | } |
|---|
| 4447 | 5719 | EXPORT_SYMBOL_GPL(preempt_schedule_notrace); |
|---|
| 4448 | 5720 | |
|---|
| 4449 | | -#endif /* CONFIG_PREEMPT */ |
|---|
| 5721 | +#endif /* CONFIG_PREEMPTION */ |
|---|
| 4450 | 5722 | |
|---|
| 4451 | 5723 | /* |
|---|
| 4452 | | - * this is the entry point to schedule() from kernel preemption |
|---|
| 5724 | + * This is the entry point to schedule() from kernel preemption |
|---|
| 4453 | 5725 | * off of irq context. |
|---|
| 4454 | 5726 | * Note, that this is called and return with irqs disabled. This will |
|---|
| 4455 | 5727 | * protect us against recursive calling from irq. |
|---|
| .. | .. |
|---|
| 4466 | 5738 | do { |
|---|
| 4467 | 5739 | preempt_disable(); |
|---|
| 4468 | 5740 | local_irq_enable(); |
|---|
| 4469 | | - __schedule(true); |
|---|
| 5741 | + __schedule(true, false); |
|---|
| 4470 | 5742 | local_irq_disable(); |
|---|
| 4471 | 5743 | sched_preempt_enable_no_resched(); |
|---|
| 4472 | 5744 | } while (need_resched()); |
|---|
| .. | .. |
|---|
| 4477 | 5749 | int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags, |
|---|
| 4478 | 5750 | void *key) |
|---|
| 4479 | 5751 | { |
|---|
| 4480 | | - return try_to_wake_up(curr->private, mode, wake_flags, 1); |
|---|
| 5752 | + WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~(WF_SYNC | WF_ANDROID_VENDOR)); |
|---|
| 5753 | + return try_to_wake_up(curr->private, mode, wake_flags); |
|---|
| 4481 | 5754 | } |
|---|
| 4482 | 5755 | EXPORT_SYMBOL(default_wake_function); |
|---|
| 5756 | + |
|---|
| 5757 | +static void __setscheduler_prio(struct task_struct *p, int prio) |
|---|
| 5758 | +{ |
|---|
| 5759 | + if (dl_prio(prio)) |
|---|
| 5760 | + p->sched_class = &dl_sched_class; |
|---|
| 5761 | + else if (rt_prio(prio)) |
|---|
| 5762 | + p->sched_class = &rt_sched_class; |
|---|
| 5763 | + else |
|---|
| 5764 | + p->sched_class = &fair_sched_class; |
|---|
| 5765 | + |
|---|
| 5766 | + p->prio = prio; |
|---|
| 5767 | +} |
|---|
| 4483 | 5768 | |
|---|
| 4484 | 5769 | #ifdef CONFIG_RT_MUTEXES |
|---|
| 4485 | 5770 | |
|---|
| .. | .. |
|---|
| 4517 | 5802 | struct rq_flags rf; |
|---|
| 4518 | 5803 | struct rq *rq; |
|---|
| 4519 | 5804 | |
|---|
| 5805 | + trace_android_rvh_rtmutex_prepare_setprio(p, pi_task); |
|---|
| 4520 | 5806 | /* XXX used to be waiter->prio, not waiter->task->prio */ |
|---|
| 4521 | 5807 | prio = __rt_effective_prio(pi_task, p->normal_prio); |
|---|
| 4522 | 5808 | |
|---|
| .. | .. |
|---|
| 4591 | 5877 | if (!dl_prio(p->normal_prio) || |
|---|
| 4592 | 5878 | (pi_task && dl_prio(pi_task->prio) && |
|---|
| 4593 | 5879 | dl_entity_preempt(&pi_task->dl, &p->dl))) { |
|---|
| 4594 | | - p->dl.dl_boosted = 1; |
|---|
| 5880 | + p->dl.pi_se = pi_task->dl.pi_se; |
|---|
| 4595 | 5881 | queue_flag |= ENQUEUE_REPLENISH; |
|---|
| 4596 | | - } else |
|---|
| 4597 | | - p->dl.dl_boosted = 0; |
|---|
| 4598 | | - p->sched_class = &dl_sched_class; |
|---|
| 5882 | + } else { |
|---|
| 5883 | + p->dl.pi_se = &p->dl; |
|---|
| 5884 | + } |
|---|
| 4599 | 5885 | } else if (rt_prio(prio)) { |
|---|
| 4600 | 5886 | if (dl_prio(oldprio)) |
|---|
| 4601 | | - p->dl.dl_boosted = 0; |
|---|
| 5887 | + p->dl.pi_se = &p->dl; |
|---|
| 4602 | 5888 | if (oldprio < prio) |
|---|
| 4603 | 5889 | queue_flag |= ENQUEUE_HEAD; |
|---|
| 4604 | | - p->sched_class = &rt_sched_class; |
|---|
| 4605 | 5890 | } else { |
|---|
| 4606 | 5891 | if (dl_prio(oldprio)) |
|---|
| 4607 | | - p->dl.dl_boosted = 0; |
|---|
| 5892 | + p->dl.pi_se = &p->dl; |
|---|
| 4608 | 5893 | if (rt_prio(oldprio)) |
|---|
| 4609 | 5894 | p->rt.timeout = 0; |
|---|
| 4610 | | - p->sched_class = &fair_sched_class; |
|---|
| 4611 | 5895 | } |
|---|
| 4612 | 5896 | |
|---|
| 4613 | | - p->prio = prio; |
|---|
| 5897 | + __setscheduler_prio(p, prio); |
|---|
| 4614 | 5898 | |
|---|
| 4615 | 5899 | if (queued) |
|---|
| 4616 | 5900 | enqueue_task(rq, p, queue_flag); |
|---|
| 4617 | 5901 | if (running) |
|---|
| 4618 | | - set_curr_task(rq, p); |
|---|
| 5902 | + set_next_task(rq, p); |
|---|
| 4619 | 5903 | |
|---|
| 4620 | 5904 | check_class_changed(rq, p, prev_class, oldprio); |
|---|
| 4621 | 5905 | out_unlock: |
|---|
| 4622 | 5906 | /* Avoid rq from going away on us: */ |
|---|
| 4623 | 5907 | preempt_disable(); |
|---|
| 4624 | | - __task_rq_unlock(rq, &rf); |
|---|
| 4625 | 5908 | |
|---|
| 4626 | | - balance_callback(rq); |
|---|
| 5909 | + rq_unpin_lock(rq, &rf); |
|---|
| 5910 | + __balance_callbacks(rq); |
|---|
| 5911 | + raw_spin_unlock(&rq->lock); |
|---|
| 5912 | + |
|---|
| 4627 | 5913 | preempt_enable(); |
|---|
| 4628 | 5914 | } |
|---|
| 4629 | 5915 | #else |
|---|
| .. | .. |
|---|
| 4635 | 5921 | |
|---|
| 4636 | 5922 | void set_user_nice(struct task_struct *p, long nice) |
|---|
| 4637 | 5923 | { |
|---|
| 4638 | | - bool queued, running; |
|---|
| 4639 | | - int old_prio, delta; |
|---|
| 5924 | + bool queued, running, allowed = false; |
|---|
| 5925 | + int old_prio; |
|---|
| 4640 | 5926 | struct rq_flags rf; |
|---|
| 4641 | 5927 | struct rq *rq; |
|---|
| 4642 | 5928 | |
|---|
| 4643 | | - if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) |
|---|
| 5929 | + trace_android_rvh_set_user_nice(p, &nice, &allowed); |
|---|
| 5930 | + if ((task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) && !allowed) |
|---|
| 4644 | 5931 | return; |
|---|
| 4645 | 5932 | /* |
|---|
| 4646 | 5933 | * We have to be careful, if called from sys_setpriority(), |
|---|
| .. | .. |
|---|
| 4667 | 5954 | put_prev_task(rq, p); |
|---|
| 4668 | 5955 | |
|---|
| 4669 | 5956 | p->static_prio = NICE_TO_PRIO(nice); |
|---|
| 4670 | | - set_load_weight(p, true); |
|---|
| 5957 | + set_load_weight(p); |
|---|
| 4671 | 5958 | old_prio = p->prio; |
|---|
| 4672 | 5959 | p->prio = effective_prio(p); |
|---|
| 4673 | | - delta = p->prio - old_prio; |
|---|
| 4674 | 5960 | |
|---|
| 4675 | | - if (queued) { |
|---|
| 5961 | + if (queued) |
|---|
| 4676 | 5962 | enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); |
|---|
| 4677 | | - /* |
|---|
| 4678 | | - * If the task increased its priority or is running and |
|---|
| 4679 | | - * lowered its priority, then reschedule its CPU: |
|---|
| 4680 | | - */ |
|---|
| 4681 | | - if (delta < 0 || (delta > 0 && task_running(rq, p))) |
|---|
| 4682 | | - resched_curr(rq); |
|---|
| 4683 | | - } |
|---|
| 4684 | 5963 | if (running) |
|---|
| 4685 | | - set_curr_task(rq, p); |
|---|
| 5964 | + set_next_task(rq, p); |
|---|
| 5965 | + |
|---|
| 5966 | + /* |
|---|
| 5967 | + * If the task increased its priority or is running and |
|---|
| 5968 | + * lowered its priority, then reschedule its CPU: |
|---|
| 5969 | + */ |
|---|
| 5970 | + p->sched_class->prio_changed(rq, p, old_prio); |
|---|
| 5971 | + |
|---|
| 4686 | 5972 | out_unlock: |
|---|
| 4687 | 5973 | task_rq_unlock(rq, p, &rf); |
|---|
| 4688 | 5974 | } |
|---|
| .. | .. |
|---|
| 4767 | 6053 | return 0; |
|---|
| 4768 | 6054 | |
|---|
| 4769 | 6055 | #ifdef CONFIG_SMP |
|---|
| 4770 | | - if (!llist_empty(&rq->wake_list)) |
|---|
| 6056 | + if (rq->ttwu_pending) |
|---|
| 4771 | 6057 | return 0; |
|---|
| 4772 | 6058 | #endif |
|---|
| 4773 | 6059 | |
|---|
| .. | .. |
|---|
| 4790 | 6076 | |
|---|
| 4791 | 6077 | return 1; |
|---|
| 4792 | 6078 | } |
|---|
| 6079 | +EXPORT_SYMBOL_GPL(available_idle_cpu); |
|---|
| 4793 | 6080 | |
|---|
| 4794 | 6081 | /** |
|---|
| 4795 | 6082 | * idle_task - return the idle task for a given CPU. |
|---|
| .. | .. |
|---|
| 4841 | 6128 | */ |
|---|
| 4842 | 6129 | p->rt_priority = attr->sched_priority; |
|---|
| 4843 | 6130 | p->normal_prio = normal_prio(p); |
|---|
| 4844 | | - set_load_weight(p, true); |
|---|
| 4845 | | -} |
|---|
| 4846 | | - |
|---|
| 4847 | | -/* Actually do priority change: must hold pi & rq lock. */ |
|---|
| 4848 | | -static void __setscheduler(struct rq *rq, struct task_struct *p, |
|---|
| 4849 | | - const struct sched_attr *attr, bool keep_boost) |
|---|
| 4850 | | -{ |
|---|
| 4851 | | - /* |
|---|
| 4852 | | - * If params can't change scheduling class changes aren't allowed |
|---|
| 4853 | | - * either. |
|---|
| 4854 | | - */ |
|---|
| 4855 | | - if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS) |
|---|
| 4856 | | - return; |
|---|
| 4857 | | - |
|---|
| 4858 | | - __setscheduler_params(p, attr); |
|---|
| 4859 | | - |
|---|
| 4860 | | - /* |
|---|
| 4861 | | - * Keep a potential priority boosting if called from |
|---|
| 4862 | | - * sched_setscheduler(). |
|---|
| 4863 | | - */ |
|---|
| 4864 | | - p->prio = normal_prio(p); |
|---|
| 4865 | | - if (keep_boost) |
|---|
| 4866 | | - p->prio = rt_effective_prio(p, p->prio); |
|---|
| 4867 | | - |
|---|
| 4868 | | - if (dl_prio(p->prio)) |
|---|
| 4869 | | - p->sched_class = &dl_sched_class; |
|---|
| 4870 | | - else if (rt_prio(p->prio)) |
|---|
| 4871 | | - p->sched_class = &rt_sched_class; |
|---|
| 4872 | | - else |
|---|
| 4873 | | - p->sched_class = &fair_sched_class; |
|---|
| 6131 | + set_load_weight(p); |
|---|
| 4874 | 6132 | } |
|---|
| 4875 | 6133 | |
|---|
| 4876 | 6134 | /* |
|---|
| .. | .. |
|---|
| 4893 | 6151 | const struct sched_attr *attr, |
|---|
| 4894 | 6152 | bool user, bool pi) |
|---|
| 4895 | 6153 | { |
|---|
| 4896 | | - int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : |
|---|
| 4897 | | - MAX_RT_PRIO - 1 - attr->sched_priority; |
|---|
| 4898 | | - int retval, oldprio, oldpolicy = -1, queued, running; |
|---|
| 4899 | | - int new_effective_prio, policy = attr->sched_policy; |
|---|
| 6154 | + int oldpolicy = -1, policy = attr->sched_policy; |
|---|
| 6155 | + int retval, oldprio, newprio, queued, running; |
|---|
| 4900 | 6156 | const struct sched_class *prev_class; |
|---|
| 6157 | + struct callback_head *head; |
|---|
| 4901 | 6158 | struct rq_flags rf; |
|---|
| 4902 | 6159 | int reset_on_fork; |
|---|
| 4903 | 6160 | int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; |
|---|
| .. | .. |
|---|
| 4969 | 6226 | * Treat SCHED_IDLE as nice 20. Only allow a switch to |
|---|
| 4970 | 6227 | * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. |
|---|
| 4971 | 6228 | */ |
|---|
| 4972 | | - if (idle_policy(p->policy) && !idle_policy(policy)) { |
|---|
| 6229 | + if (task_has_idle_policy(p) && !idle_policy(policy)) { |
|---|
| 4973 | 6230 | if (!can_nice(p, task_nice(p))) |
|---|
| 4974 | 6231 | return -EPERM; |
|---|
| 4975 | 6232 | } |
|---|
| .. | .. |
|---|
| 4980 | 6237 | |
|---|
| 4981 | 6238 | /* Normal users shall not reset the sched_reset_on_fork flag: */ |
|---|
| 4982 | 6239 | if (p->sched_reset_on_fork && !reset_on_fork) |
|---|
| 6240 | + return -EPERM; |
|---|
| 6241 | + |
|---|
| 6242 | + /* Can't change util-clamps */ |
|---|
| 6243 | + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) |
|---|
| 4983 | 6244 | return -EPERM; |
|---|
| 4984 | 6245 | } |
|---|
| 4985 | 6246 | |
|---|
| .. | .. |
|---|
| 5013 | 6274 | * Changing the policy of the stop threads its a very bad idea: |
|---|
| 5014 | 6275 | */ |
|---|
| 5015 | 6276 | if (p == rq->stop) { |
|---|
| 5016 | | - task_rq_unlock(rq, p, &rf); |
|---|
| 5017 | | - return -EINVAL; |
|---|
| 6277 | + retval = -EINVAL; |
|---|
| 6278 | + goto unlock; |
|---|
| 5018 | 6279 | } |
|---|
| 5019 | 6280 | |
|---|
| 5020 | 6281 | /* |
|---|
| .. | .. |
|---|
| 5032 | 6293 | goto change; |
|---|
| 5033 | 6294 | |
|---|
| 5034 | 6295 | p->sched_reset_on_fork = reset_on_fork; |
|---|
| 5035 | | - task_rq_unlock(rq, p, &rf); |
|---|
| 5036 | | - return 0; |
|---|
| 6296 | + retval = 0; |
|---|
| 6297 | + goto unlock; |
|---|
| 5037 | 6298 | } |
|---|
| 5038 | 6299 | change: |
|---|
| 5039 | 6300 | |
|---|
| .. | .. |
|---|
| 5046 | 6307 | if (rt_bandwidth_enabled() && rt_policy(policy) && |
|---|
| 5047 | 6308 | task_group(p)->rt_bandwidth.rt_runtime == 0 && |
|---|
| 5048 | 6309 | !task_group_is_autogroup(task_group(p))) { |
|---|
| 5049 | | - task_rq_unlock(rq, p, &rf); |
|---|
| 5050 | | - return -EPERM; |
|---|
| 6310 | + retval = -EPERM; |
|---|
| 6311 | + goto unlock; |
|---|
| 5051 | 6312 | } |
|---|
| 5052 | 6313 | #endif |
|---|
| 5053 | 6314 | #ifdef CONFIG_SMP |
|---|
| .. | .. |
|---|
| 5062 | 6323 | */ |
|---|
| 5063 | 6324 | if (!cpumask_subset(span, p->cpus_ptr) || |
|---|
| 5064 | 6325 | rq->rd->dl_bw.bw == 0) { |
|---|
| 5065 | | - task_rq_unlock(rq, p, &rf); |
|---|
| 5066 | | - return -EPERM; |
|---|
| 6326 | + retval = -EPERM; |
|---|
| 6327 | + goto unlock; |
|---|
| 5067 | 6328 | } |
|---|
| 5068 | 6329 | } |
|---|
| 5069 | 6330 | #endif |
|---|
| .. | .. |
|---|
| 5082 | 6343 | * is available. |
|---|
| 5083 | 6344 | */ |
|---|
| 5084 | 6345 | if ((dl_policy(policy) || dl_task(p)) && sched_dl_overflow(p, policy, attr)) { |
|---|
| 5085 | | - task_rq_unlock(rq, p, &rf); |
|---|
| 5086 | | - return -EBUSY; |
|---|
| 6346 | + retval = -EBUSY; |
|---|
| 6347 | + goto unlock; |
|---|
| 5087 | 6348 | } |
|---|
| 5088 | 6349 | |
|---|
| 5089 | 6350 | p->sched_reset_on_fork = reset_on_fork; |
|---|
| 5090 | 6351 | oldprio = p->prio; |
|---|
| 5091 | 6352 | |
|---|
| 6353 | + newprio = __normal_prio(policy, attr->sched_priority, attr->sched_nice); |
|---|
| 5092 | 6354 | if (pi) { |
|---|
| 5093 | 6355 | /* |
|---|
| 5094 | 6356 | * Take priority boosted tasks into account. If the new |
|---|
| .. | .. |
|---|
| 5097 | 6359 | * the runqueue. This will be done when the task deboost |
|---|
| 5098 | 6360 | * itself. |
|---|
| 5099 | 6361 | */ |
|---|
| 5100 | | - new_effective_prio = rt_effective_prio(p, newprio); |
|---|
| 5101 | | - if (new_effective_prio == oldprio) |
|---|
| 6362 | + newprio = rt_effective_prio(p, newprio); |
|---|
| 6363 | + if (newprio == oldprio) |
|---|
| 5102 | 6364 | queue_flags &= ~DEQUEUE_MOVE; |
|---|
| 5103 | 6365 | } |
|---|
| 5104 | 6366 | |
|---|
| .. | .. |
|---|
| 5111 | 6373 | |
|---|
| 5112 | 6374 | prev_class = p->sched_class; |
|---|
| 5113 | 6375 | |
|---|
| 5114 | | - __setscheduler(rq, p, attr, pi); |
|---|
| 6376 | + if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { |
|---|
| 6377 | + __setscheduler_params(p, attr); |
|---|
| 6378 | + __setscheduler_prio(p, newprio); |
|---|
| 6379 | + trace_android_rvh_setscheduler(p); |
|---|
| 6380 | + } |
|---|
| 5115 | 6381 | __setscheduler_uclamp(p, attr); |
|---|
| 5116 | 6382 | |
|---|
| 5117 | 6383 | if (queued) { |
|---|
| .. | .. |
|---|
| 5125 | 6391 | enqueue_task(rq, p, queue_flags); |
|---|
| 5126 | 6392 | } |
|---|
| 5127 | 6393 | if (running) |
|---|
| 5128 | | - set_curr_task(rq, p); |
|---|
| 6394 | + set_next_task(rq, p); |
|---|
| 5129 | 6395 | |
|---|
| 5130 | 6396 | check_class_changed(rq, p, prev_class, oldprio); |
|---|
| 5131 | 6397 | |
|---|
| 5132 | 6398 | /* Avoid rq from going away on us: */ |
|---|
| 5133 | 6399 | preempt_disable(); |
|---|
| 6400 | + head = splice_balance_callbacks(rq); |
|---|
| 5134 | 6401 | task_rq_unlock(rq, p, &rf); |
|---|
| 5135 | 6402 | |
|---|
| 5136 | 6403 | if (pi) |
|---|
| 5137 | 6404 | rt_mutex_adjust_pi(p); |
|---|
| 5138 | 6405 | |
|---|
| 5139 | 6406 | /* Run balance callbacks after we've adjusted the PI chain: */ |
|---|
| 5140 | | - balance_callback(rq); |
|---|
| 6407 | + balance_callbacks(rq, head); |
|---|
| 5141 | 6408 | preempt_enable(); |
|---|
| 5142 | 6409 | |
|---|
| 5143 | 6410 | return 0; |
|---|
| 6411 | + |
|---|
| 6412 | +unlock: |
|---|
| 6413 | + task_rq_unlock(rq, p, &rf); |
|---|
| 6414 | + return retval; |
|---|
| 5144 | 6415 | } |
|---|
| 5145 | 6416 | |
|---|
| 5146 | 6417 | static int _sched_setscheduler(struct task_struct *p, int policy, |
|---|
| .. | .. |
|---|
| 5152 | 6423 | .sched_nice = PRIO_TO_NICE(p->static_prio), |
|---|
| 5153 | 6424 | }; |
|---|
| 5154 | 6425 | |
|---|
| 6426 | + if (IS_ENABLED(CONFIG_ROCKCHIP_OPTIMIZE_RT_PRIO) && |
|---|
| 6427 | + ((policy == SCHED_FIFO) || (policy == SCHED_RR))) { |
|---|
| 6428 | + attr.sched_priority /= 2; |
|---|
| 6429 | + if (!check) |
|---|
| 6430 | + attr.sched_priority += MAX_RT_PRIO / 2; |
|---|
| 6431 | + if (!attr.sched_priority) |
|---|
| 6432 | + attr.sched_priority = 1; |
|---|
| 6433 | + } |
|---|
| 5155 | 6434 | /* Fixup the legacy SCHED_RESET_ON_FORK hack. */ |
|---|
| 5156 | 6435 | if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) { |
|---|
| 5157 | 6436 | attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; |
|---|
| .. | .. |
|---|
| 5166 | 6445 | * @p: the task in question. |
|---|
| 5167 | 6446 | * @policy: new policy. |
|---|
| 5168 | 6447 | * @param: structure containing the new RT priority. |
|---|
| 6448 | + * |
|---|
| 6449 | + * Use sched_set_fifo(), read its comment. |
|---|
| 5169 | 6450 | * |
|---|
| 5170 | 6451 | * Return: 0 on success. An error code otherwise. |
|---|
| 5171 | 6452 | * |
|---|
| .. | .. |
|---|
| 5188 | 6469 | { |
|---|
| 5189 | 6470 | return __sched_setscheduler(p, attr, false, true); |
|---|
| 5190 | 6471 | } |
|---|
| 6472 | +EXPORT_SYMBOL_GPL(sched_setattr_nocheck); |
|---|
| 5191 | 6473 | |
|---|
| 5192 | 6474 | /** |
|---|
| 5193 | 6475 | * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. |
|---|
| .. | .. |
|---|
| 5208 | 6490 | return _sched_setscheduler(p, policy, param, false); |
|---|
| 5209 | 6491 | } |
|---|
| 5210 | 6492 | EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck); |
|---|
| 6493 | + |
|---|
| 6494 | +/* |
|---|
| 6495 | + * SCHED_FIFO is a broken scheduler model; that is, it is fundamentally |
|---|
| 6496 | + * incapable of resource management, which is the one thing an OS really should |
|---|
| 6497 | + * be doing. |
|---|
| 6498 | + * |
|---|
| 6499 | + * This is of course the reason it is limited to privileged users only. |
|---|
| 6500 | + * |
|---|
| 6501 | + * Worse still; it is fundamentally impossible to compose static priority |
|---|
| 6502 | + * workloads. You cannot take two correctly working static prio workloads |
|---|
| 6503 | + * and smash them together and still expect them to work. |
|---|
| 6504 | + * |
|---|
| 6505 | + * For this reason 'all' FIFO tasks the kernel creates are basically at: |
|---|
| 6506 | + * |
|---|
| 6507 | + * MAX_RT_PRIO / 2 |
|---|
| 6508 | + * |
|---|
| 6509 | + * The administrator _MUST_ configure the system, the kernel simply doesn't |
|---|
| 6510 | + * know enough information to make a sensible choice. |
|---|
| 6511 | + */ |
|---|
| 6512 | +void sched_set_fifo(struct task_struct *p) |
|---|
| 6513 | +{ |
|---|
| 6514 | + struct sched_param sp = { .sched_priority = MAX_RT_PRIO / 2 }; |
|---|
| 6515 | + WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0); |
|---|
| 6516 | +} |
|---|
| 6517 | +EXPORT_SYMBOL_GPL(sched_set_fifo); |
|---|
| 6518 | + |
|---|
| 6519 | +/* |
|---|
| 6520 | + * For when you don't much care about FIFO, but want to be above SCHED_NORMAL. |
|---|
| 6521 | + */ |
|---|
| 6522 | +void sched_set_fifo_low(struct task_struct *p) |
|---|
| 6523 | +{ |
|---|
| 6524 | + struct sched_param sp = { .sched_priority = 1 }; |
|---|
| 6525 | + WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0); |
|---|
| 6526 | +} |
|---|
| 6527 | +EXPORT_SYMBOL_GPL(sched_set_fifo_low); |
|---|
| 6528 | + |
|---|
| 6529 | +void sched_set_normal(struct task_struct *p, int nice) |
|---|
| 6530 | +{ |
|---|
| 6531 | + struct sched_attr attr = { |
|---|
| 6532 | + .sched_policy = SCHED_NORMAL, |
|---|
| 6533 | + .sched_nice = nice, |
|---|
| 6534 | + }; |
|---|
| 6535 | + WARN_ON_ONCE(sched_setattr_nocheck(p, &attr) != 0); |
|---|
| 6536 | +} |
|---|
| 6537 | +EXPORT_SYMBOL_GPL(sched_set_normal); |
|---|
| 5211 | 6538 | |
|---|
| 5212 | 6539 | static int |
|---|
| 5213 | 6540 | do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) |
|---|
| .. | .. |
|---|
| 5239 | 6566 | u32 size; |
|---|
| 5240 | 6567 | int ret; |
|---|
| 5241 | 6568 | |
|---|
| 5242 | | - if (!access_ok(VERIFY_WRITE, uattr, SCHED_ATTR_SIZE_VER0)) |
|---|
| 5243 | | - return -EFAULT; |
|---|
| 5244 | | - |
|---|
| 5245 | 6569 | /* Zero the full structure, so that a short copy will be nice: */ |
|---|
| 5246 | 6570 | memset(attr, 0, sizeof(*attr)); |
|---|
| 5247 | 6571 | |
|---|
| .. | .. |
|---|
| 5249 | 6573 | if (ret) |
|---|
| 5250 | 6574 | return ret; |
|---|
| 5251 | 6575 | |
|---|
| 5252 | | - /* Bail out on silly large: */ |
|---|
| 5253 | | - if (size > PAGE_SIZE) |
|---|
| 5254 | | - goto err_size; |
|---|
| 5255 | | - |
|---|
| 5256 | 6576 | /* ABI compatibility quirk: */ |
|---|
| 5257 | 6577 | if (!size) |
|---|
| 5258 | 6578 | size = SCHED_ATTR_SIZE_VER0; |
|---|
| 5259 | | - |
|---|
| 5260 | | - if (size < SCHED_ATTR_SIZE_VER0) |
|---|
| 6579 | + if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE) |
|---|
| 5261 | 6580 | goto err_size; |
|---|
| 5262 | 6581 | |
|---|
| 5263 | | - /* |
|---|
| 5264 | | - * If we're handed a bigger struct than we know of, |
|---|
| 5265 | | - * ensure all the unknown bits are 0 - i.e. new |
|---|
| 5266 | | - * user-space does not rely on any kernel feature |
|---|
| 5267 | | - * extensions we dont know about yet. |
|---|
| 5268 | | - */ |
|---|
| 5269 | | - if (size > sizeof(*attr)) { |
|---|
| 5270 | | - unsigned char __user *addr; |
|---|
| 5271 | | - unsigned char __user *end; |
|---|
| 5272 | | - unsigned char val; |
|---|
| 5273 | | - |
|---|
| 5274 | | - addr = (void __user *)uattr + sizeof(*attr); |
|---|
| 5275 | | - end = (void __user *)uattr + size; |
|---|
| 5276 | | - |
|---|
| 5277 | | - for (; addr < end; addr++) { |
|---|
| 5278 | | - ret = get_user(val, addr); |
|---|
| 5279 | | - if (ret) |
|---|
| 5280 | | - return ret; |
|---|
| 5281 | | - if (val) |
|---|
| 5282 | | - goto err_size; |
|---|
| 5283 | | - } |
|---|
| 5284 | | - size = sizeof(*attr); |
|---|
| 6582 | + ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size); |
|---|
| 6583 | + if (ret) { |
|---|
| 6584 | + if (ret == -E2BIG) |
|---|
| 6585 | + goto err_size; |
|---|
| 6586 | + return ret; |
|---|
| 5285 | 6587 | } |
|---|
| 5286 | | - |
|---|
| 5287 | | - ret = copy_from_user(attr, uattr, size); |
|---|
| 5288 | | - if (ret) |
|---|
| 5289 | | - return -EFAULT; |
|---|
| 5290 | 6588 | |
|---|
| 5291 | 6589 | if ((attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) && |
|---|
| 5292 | 6590 | size < SCHED_ATTR_SIZE_VER1) |
|---|
| .. | .. |
|---|
| 5303 | 6601 | err_size: |
|---|
| 5304 | 6602 | put_user(sizeof(*attr), &uattr->size); |
|---|
| 5305 | 6603 | return -E2BIG; |
|---|
| 6604 | +} |
|---|
| 6605 | + |
|---|
| 6606 | +static void get_params(struct task_struct *p, struct sched_attr *attr) |
|---|
| 6607 | +{ |
|---|
| 6608 | + if (task_has_dl_policy(p)) |
|---|
| 6609 | + __getparam_dl(p, attr); |
|---|
| 6610 | + else if (task_has_rt_policy(p)) |
|---|
| 6611 | + attr->sched_priority = p->rt_priority; |
|---|
| 6612 | + else |
|---|
| 6613 | + attr->sched_nice = task_nice(p); |
|---|
| 5306 | 6614 | } |
|---|
| 5307 | 6615 | |
|---|
| 5308 | 6616 | /** |
|---|
| .. | .. |
|---|
| 5366 | 6674 | rcu_read_unlock(); |
|---|
| 5367 | 6675 | |
|---|
| 5368 | 6676 | if (likely(p)) { |
|---|
| 6677 | + if (attr.sched_flags & SCHED_FLAG_KEEP_PARAMS) |
|---|
| 6678 | + get_params(p, &attr); |
|---|
| 5369 | 6679 | retval = sched_setattr(p, &attr); |
|---|
| 5370 | 6680 | put_task_struct(p); |
|---|
| 5371 | 6681 | } |
|---|
| .. | .. |
|---|
| 5459 | 6769 | { |
|---|
| 5460 | 6770 | unsigned int ksize = sizeof(*kattr); |
|---|
| 5461 | 6771 | |
|---|
| 5462 | | - if (!access_ok(VERIFY_WRITE, uattr, usize)) |
|---|
| 6772 | + if (!access_ok(uattr, usize)) |
|---|
| 5463 | 6773 | return -EFAULT; |
|---|
| 5464 | 6774 | |
|---|
| 5465 | 6775 | /* |
|---|
| .. | .. |
|---|
| 5487 | 6797 | * sys_sched_getattr - similar to sched_getparam, but with sched_attr |
|---|
| 5488 | 6798 | * @pid: the pid in question. |
|---|
| 5489 | 6799 | * @uattr: structure containing the extended parameters. |
|---|
| 5490 | | - * @usize: sizeof(attr) that user-space knows about, for forwards and backwards compatibility. |
|---|
| 6800 | + * @usize: sizeof(attr) for fwd/bwd comp. |
|---|
| 5491 | 6801 | * @flags: for future extension. |
|---|
| 5492 | 6802 | */ |
|---|
| 5493 | 6803 | SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, |
|---|
| .. | .. |
|---|
| 5514 | 6824 | kattr.sched_policy = p->policy; |
|---|
| 5515 | 6825 | if (p->sched_reset_on_fork) |
|---|
| 5516 | 6826 | kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; |
|---|
| 5517 | | - if (task_has_dl_policy(p)) |
|---|
| 5518 | | - __getparam_dl(p, &kattr); |
|---|
| 5519 | | - else if (task_has_rt_policy(p)) |
|---|
| 5520 | | - kattr.sched_priority = p->rt_priority; |
|---|
| 5521 | | - else |
|---|
| 5522 | | - kattr.sched_nice = task_nice(p); |
|---|
| 6827 | + get_params(p, &kattr); |
|---|
| 6828 | + kattr.sched_flags &= SCHED_FLAG_ALL; |
|---|
| 5523 | 6829 | |
|---|
| 5524 | 6830 | #ifdef CONFIG_UCLAMP_TASK |
|---|
| 6831 | + /* |
|---|
| 6832 | + * This could race with another potential updater, but this is fine |
|---|
| 6833 | + * because it'll correctly read the old or the new value. We don't need |
|---|
| 6834 | + * to guarantee who wins the race as long as it doesn't return garbage. |
|---|
| 6835 | + */ |
|---|
| 5525 | 6836 | kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value; |
|---|
| 5526 | 6837 | kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value; |
|---|
| 5527 | 6838 | #endif |
|---|
| .. | .. |
|---|
| 5540 | 6851 | cpumask_var_t cpus_allowed, new_mask; |
|---|
| 5541 | 6852 | struct task_struct *p; |
|---|
| 5542 | 6853 | int retval; |
|---|
| 6854 | + int skip = 0; |
|---|
| 5543 | 6855 | |
|---|
| 5544 | 6856 | rcu_read_lock(); |
|---|
| 5545 | 6857 | |
|---|
| .. | .. |
|---|
| 5575 | 6887 | rcu_read_unlock(); |
|---|
| 5576 | 6888 | } |
|---|
| 5577 | 6889 | |
|---|
| 6890 | + trace_android_vh_sched_setaffinity_early(p, in_mask, &skip); |
|---|
| 6891 | + if (skip) |
|---|
| 6892 | + goto out_free_new_mask; |
|---|
| 5578 | 6893 | retval = security_task_setscheduler(p); |
|---|
| 5579 | 6894 | if (retval) |
|---|
| 5580 | 6895 | goto out_free_new_mask; |
|---|
| .. | .. |
|---|
| 5601 | 6916 | } |
|---|
| 5602 | 6917 | #endif |
|---|
| 5603 | 6918 | again: |
|---|
| 5604 | | - retval = __set_cpus_allowed_ptr(p, new_mask, true); |
|---|
| 6919 | + retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK); |
|---|
| 5605 | 6920 | |
|---|
| 5606 | 6921 | if (!retval) { |
|---|
| 5607 | 6922 | cpuset_cpus_allowed(p, cpus_allowed); |
|---|
| .. | .. |
|---|
| 5615 | 6930 | goto again; |
|---|
| 5616 | 6931 | } |
|---|
| 5617 | 6932 | } |
|---|
| 6933 | + |
|---|
| 6934 | + trace_android_rvh_sched_setaffinity(p, in_mask, &retval); |
|---|
| 6935 | + |
|---|
| 5618 | 6936 | out_free_new_mask: |
|---|
| 5619 | 6937 | free_cpumask_var(new_mask); |
|---|
| 5620 | 6938 | out_free_cpus_allowed: |
|---|
| .. | .. |
|---|
| 5623 | 6941 | put_task_struct(p); |
|---|
| 5624 | 6942 | return retval; |
|---|
| 5625 | 6943 | } |
|---|
| 5626 | | -EXPORT_SYMBOL_GPL(sched_setaffinity); |
|---|
| 5627 | 6944 | |
|---|
| 5628 | 6945 | static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, |
|---|
| 5629 | 6946 | struct cpumask *new_mask) |
|---|
| .. | .. |
|---|
| 5742 | 7059 | schedstat_inc(rq->yld_count); |
|---|
| 5743 | 7060 | current->sched_class->yield_task(rq); |
|---|
| 5744 | 7061 | |
|---|
| 7062 | + trace_android_rvh_do_sched_yield(rq); |
|---|
| 7063 | + |
|---|
| 5745 | 7064 | preempt_disable(); |
|---|
| 5746 | 7065 | rq_unlock_irq(rq, &rf); |
|---|
| 5747 | 7066 | sched_preempt_enable_no_resched(); |
|---|
| .. | .. |
|---|
| 5755 | 7074 | return 0; |
|---|
| 5756 | 7075 | } |
|---|
| 5757 | 7076 | |
|---|
| 5758 | | -#ifndef CONFIG_PREEMPT |
|---|
| 7077 | +#ifndef CONFIG_PREEMPTION |
|---|
| 5759 | 7078 | int __sched _cond_resched(void) |
|---|
| 5760 | 7079 | { |
|---|
| 5761 | 7080 | if (should_resched(0)) { |
|---|
| .. | .. |
|---|
| 5772 | 7091 | * __cond_resched_lock() - if a reschedule is pending, drop the given lock, |
|---|
| 5773 | 7092 | * call schedule, and on return reacquire the lock. |
|---|
| 5774 | 7093 | * |
|---|
| 5775 | | - * This works OK both with and without CONFIG_PREEMPT. We do strange low-level |
|---|
| 7094 | + * This works OK both with and without CONFIG_PREEMPTION. We do strange low-level |
|---|
| 5776 | 7095 | * operations here to prevent schedule() from being called twice (once via |
|---|
| 5777 | 7096 | * spin_unlock(), once by hand). |
|---|
| 5778 | 7097 | */ |
|---|
| .. | .. |
|---|
| 5876 | 7195 | if (task_running(p_rq, p) || p->state) |
|---|
| 5877 | 7196 | goto out_unlock; |
|---|
| 5878 | 7197 | |
|---|
| 5879 | | - yielded = curr->sched_class->yield_to_task(rq, p, preempt); |
|---|
| 7198 | + yielded = curr->sched_class->yield_to_task(rq, p); |
|---|
| 5880 | 7199 | if (yielded) { |
|---|
| 5881 | 7200 | schedstat_inc(rq->yld_count); |
|---|
| 5882 | 7201 | /* |
|---|
| .. | .. |
|---|
| 6042 | 7361 | * an error code. |
|---|
| 6043 | 7362 | */ |
|---|
| 6044 | 7363 | SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, |
|---|
| 6045 | | - struct timespec __user *, interval) |
|---|
| 7364 | + struct __kernel_timespec __user *, interval) |
|---|
| 6046 | 7365 | { |
|---|
| 6047 | 7366 | struct timespec64 t; |
|---|
| 6048 | 7367 | int retval = sched_rr_get_interval(pid, &t); |
|---|
| .. | .. |
|---|
| 6053 | 7372 | return retval; |
|---|
| 6054 | 7373 | } |
|---|
| 6055 | 7374 | |
|---|
| 6056 | | -#ifdef CONFIG_COMPAT |
|---|
| 6057 | | -COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval, |
|---|
| 6058 | | - compat_pid_t, pid, |
|---|
| 6059 | | - struct compat_timespec __user *, interval) |
|---|
| 7375 | +#ifdef CONFIG_COMPAT_32BIT_TIME |
|---|
| 7376 | +SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid, |
|---|
| 7377 | + struct old_timespec32 __user *, interval) |
|---|
| 6060 | 7378 | { |
|---|
| 6061 | 7379 | struct timespec64 t; |
|---|
| 6062 | 7380 | int retval = sched_rr_get_interval(pid, &t); |
|---|
| 6063 | 7381 | |
|---|
| 6064 | 7382 | if (retval == 0) |
|---|
| 6065 | | - retval = compat_put_timespec64(&t, interval); |
|---|
| 7383 | + retval = put_old_timespec32(&t, interval); |
|---|
| 6066 | 7384 | return retval; |
|---|
| 6067 | 7385 | } |
|---|
| 6068 | 7386 | #endif |
|---|
| .. | .. |
|---|
| 6075 | 7393 | if (!try_get_task_stack(p)) |
|---|
| 6076 | 7394 | return; |
|---|
| 6077 | 7395 | |
|---|
| 6078 | | - printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p)); |
|---|
| 7396 | + pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p)); |
|---|
| 6079 | 7397 | |
|---|
| 6080 | 7398 | if (p->state == TASK_RUNNING) |
|---|
| 6081 | | - printk(KERN_CONT " running task "); |
|---|
| 7399 | + pr_cont(" running task "); |
|---|
| 6082 | 7400 | #ifdef CONFIG_DEBUG_STACK_USAGE |
|---|
| 6083 | 7401 | free = stack_not_used(p); |
|---|
| 6084 | 7402 | #endif |
|---|
| .. | .. |
|---|
| 6087 | 7405 | if (pid_alive(p)) |
|---|
| 6088 | 7406 | ppid = task_pid_nr(rcu_dereference(p->real_parent)); |
|---|
| 6089 | 7407 | rcu_read_unlock(); |
|---|
| 6090 | | - printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, |
|---|
| 6091 | | - task_pid_nr(p), ppid, |
|---|
| 7408 | + pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n", |
|---|
| 7409 | + free, task_pid_nr(p), ppid, |
|---|
| 6092 | 7410 | (unsigned long)task_thread_info(p)->flags); |
|---|
| 6093 | 7411 | |
|---|
| 6094 | 7412 | print_worker_info(KERN_INFO, p); |
|---|
| 6095 | | - show_stack(p, NULL); |
|---|
| 7413 | + trace_android_vh_sched_show_task(p); |
|---|
| 7414 | + show_stack(p, NULL, KERN_INFO); |
|---|
| 6096 | 7415 | put_task_stack(p); |
|---|
| 6097 | 7416 | } |
|---|
| 6098 | 7417 | EXPORT_SYMBOL_GPL(sched_show_task); |
|---|
| .. | .. |
|---|
| 6123 | 7442 | { |
|---|
| 6124 | 7443 | struct task_struct *g, *p; |
|---|
| 6125 | 7444 | |
|---|
| 6126 | | -#if BITS_PER_LONG == 32 |
|---|
| 6127 | | - printk(KERN_INFO |
|---|
| 6128 | | - " task PC stack pid father\n"); |
|---|
| 6129 | | -#else |
|---|
| 6130 | | - printk(KERN_INFO |
|---|
| 6131 | | - " task PC stack pid father\n"); |
|---|
| 6132 | | -#endif |
|---|
| 6133 | 7445 | rcu_read_lock(); |
|---|
| 6134 | 7446 | for_each_process_thread(g, p) { |
|---|
| 6135 | 7447 | /* |
|---|
| .. | .. |
|---|
| 6165 | 7477 | * NOTE: this function does not set the idle thread's NEED_RESCHED |
|---|
| 6166 | 7478 | * flag, to make booting more robust. |
|---|
| 6167 | 7479 | */ |
|---|
| 6168 | | -void init_idle(struct task_struct *idle, int cpu) |
|---|
| 7480 | +void __init init_idle(struct task_struct *idle, int cpu) |
|---|
| 6169 | 7481 | { |
|---|
| 6170 | 7482 | struct rq *rq = cpu_rq(cpu); |
|---|
| 6171 | 7483 | unsigned long flags; |
|---|
| .. | .. |
|---|
| 6179 | 7491 | idle->se.exec_start = sched_clock(); |
|---|
| 6180 | 7492 | idle->flags |= PF_IDLE; |
|---|
| 6181 | 7493 | |
|---|
| 6182 | | - scs_task_reset(idle); |
|---|
| 6183 | | - kasan_unpoison_task_stack(idle); |
|---|
| 6184 | | - |
|---|
| 6185 | 7494 | #ifdef CONFIG_SMP |
|---|
| 6186 | 7495 | /* |
|---|
| 6187 | 7496 | * Its possible that init_idle() gets called multiple times on a task, |
|---|
| .. | .. |
|---|
| 6189 | 7498 | * |
|---|
| 6190 | 7499 | * And since this is boot we can forgo the serialization. |
|---|
| 6191 | 7500 | */ |
|---|
| 6192 | | - set_cpus_allowed_common(idle, cpumask_of(cpu)); |
|---|
| 7501 | + set_cpus_allowed_common(idle, cpumask_of(cpu), 0); |
|---|
| 6193 | 7502 | #endif |
|---|
| 6194 | 7503 | /* |
|---|
| 6195 | 7504 | * We're having a chicken and egg problem, even though we are |
|---|
| .. | .. |
|---|
| 6205 | 7514 | __set_task_cpu(idle, cpu); |
|---|
| 6206 | 7515 | rcu_read_unlock(); |
|---|
| 6207 | 7516 | |
|---|
| 6208 | | - rq->curr = rq->idle = idle; |
|---|
| 7517 | + rq->idle = idle; |
|---|
| 7518 | + rcu_assign_pointer(rq->curr, idle); |
|---|
| 6209 | 7519 | idle->on_rq = TASK_ON_RQ_QUEUED; |
|---|
| 6210 | 7520 | #ifdef CONFIG_SMP |
|---|
| 6211 | 7521 | idle->on_cpu = 1; |
|---|
| .. | .. |
|---|
| 6245 | 7555 | } |
|---|
| 6246 | 7556 | |
|---|
| 6247 | 7557 | int task_can_attach(struct task_struct *p, |
|---|
| 6248 | | - const struct cpumask *cs_cpus_allowed) |
|---|
| 7558 | + const struct cpumask *cs_effective_cpus) |
|---|
| 6249 | 7559 | { |
|---|
| 6250 | 7560 | int ret = 0; |
|---|
| 6251 | 7561 | |
|---|
| .. | .. |
|---|
| 6264 | 7574 | } |
|---|
| 6265 | 7575 | |
|---|
| 6266 | 7576 | if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span, |
|---|
| 6267 | | - cs_cpus_allowed)) |
|---|
| 6268 | | - ret = dl_task_can_attach(p, cs_cpus_allowed); |
|---|
| 7577 | + cs_effective_cpus)) { |
|---|
| 7578 | + int cpu = cpumask_any_and(cpu_active_mask, cs_effective_cpus); |
|---|
| 7579 | + |
|---|
| 7580 | + if (unlikely(cpu >= nr_cpu_ids)) |
|---|
| 7581 | + return -EINVAL; |
|---|
| 7582 | + ret = dl_cpu_busy(cpu, p); |
|---|
| 7583 | + } |
|---|
| 6269 | 7584 | |
|---|
| 6270 | 7585 | out: |
|---|
| 6271 | 7586 | return ret; |
|---|
| .. | .. |
|---|
| 6316 | 7631 | if (queued) |
|---|
| 6317 | 7632 | enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); |
|---|
| 6318 | 7633 | if (running) |
|---|
| 6319 | | - set_curr_task(rq, p); |
|---|
| 7634 | + set_next_task(rq, p); |
|---|
| 6320 | 7635 | task_rq_unlock(rq, p, &rf); |
|---|
| 6321 | 7636 | } |
|---|
| 6322 | 7637 | #endif /* CONFIG_NUMA_BALANCING */ |
|---|
| .. | .. |
|---|
| 6342 | 7657 | /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ |
|---|
| 6343 | 7658 | } |
|---|
| 6344 | 7659 | |
|---|
| 6345 | | -/* |
|---|
| 6346 | | - * Since this CPU is going 'away' for a while, fold any nr_active delta |
|---|
| 6347 | | - * we might have. Assumes we're called after migrate_tasks() so that the |
|---|
| 6348 | | - * nr_active count is stable. We need to take the teardown thread which |
|---|
| 6349 | | - * is calling this into account, so we hand in adjust = 1 to the load |
|---|
| 6350 | | - * calculation. |
|---|
| 6351 | | - * |
|---|
| 6352 | | - * Also see the comment "Global load-average calculations". |
|---|
| 6353 | | - */ |
|---|
| 6354 | | -static void calc_load_migrate(struct rq *rq) |
|---|
| 7660 | +static int __balance_push_cpu_stop(void *arg) |
|---|
| 6355 | 7661 | { |
|---|
| 6356 | | - long delta = calc_load_fold_active(rq, 1); |
|---|
| 6357 | | - if (delta) |
|---|
| 6358 | | - atomic_long_add(delta, &calc_load_tasks); |
|---|
| 6359 | | -} |
|---|
| 7662 | + struct task_struct *p = arg; |
|---|
| 7663 | + struct rq *rq = this_rq(); |
|---|
| 7664 | + struct rq_flags rf; |
|---|
| 7665 | + int cpu; |
|---|
| 6360 | 7666 | |
|---|
| 6361 | | -static void put_prev_task_fake(struct rq *rq, struct task_struct *prev) |
|---|
| 6362 | | -{ |
|---|
| 6363 | | -} |
|---|
| 7667 | + raw_spin_lock_irq(&p->pi_lock); |
|---|
| 7668 | + rq_lock(rq, &rf); |
|---|
| 6364 | 7669 | |
|---|
| 6365 | | -static const struct sched_class fake_sched_class = { |
|---|
| 6366 | | - .put_prev_task = put_prev_task_fake, |
|---|
| 6367 | | -}; |
|---|
| 6368 | | - |
|---|
| 6369 | | -static struct task_struct fake_task = { |
|---|
| 6370 | | - /* |
|---|
| 6371 | | - * Avoid pull_{rt,dl}_task() |
|---|
| 6372 | | - */ |
|---|
| 6373 | | - .prio = MAX_PRIO + 1, |
|---|
| 6374 | | - .sched_class = &fake_sched_class, |
|---|
| 6375 | | -}; |
|---|
| 6376 | | - |
|---|
| 6377 | | -/* |
|---|
| 6378 | | - * Migrate all tasks from the rq, sleeping tasks will be migrated by |
|---|
| 6379 | | - * try_to_wake_up()->select_task_rq(). |
|---|
| 6380 | | - * |
|---|
| 6381 | | - * Called with rq->lock held even though we'er in stop_machine() and |
|---|
| 6382 | | - * there's no concurrency possible, we hold the required locks anyway |
|---|
| 6383 | | - * because of lock validation efforts. |
|---|
| 6384 | | - */ |
|---|
| 6385 | | -static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) |
|---|
| 6386 | | -{ |
|---|
| 6387 | | - struct rq *rq = dead_rq; |
|---|
| 6388 | | - struct task_struct *next, *stop = rq->stop; |
|---|
| 6389 | | - struct rq_flags orf = *rf; |
|---|
| 6390 | | - int dest_cpu; |
|---|
| 6391 | | - |
|---|
| 6392 | | - /* |
|---|
| 6393 | | - * Fudge the rq selection such that the below task selection loop |
|---|
| 6394 | | - * doesn't get stuck on the currently eligible stop task. |
|---|
| 6395 | | - * |
|---|
| 6396 | | - * We're currently inside stop_machine() and the rq is either stuck |
|---|
| 6397 | | - * in the stop_machine_cpu_stop() loop, or we're executing this code, |
|---|
| 6398 | | - * either way we should never end up calling schedule() until we're |
|---|
| 6399 | | - * done here. |
|---|
| 6400 | | - */ |
|---|
| 6401 | | - rq->stop = NULL; |
|---|
| 6402 | | - |
|---|
| 6403 | | - /* |
|---|
| 6404 | | - * put_prev_task() and pick_next_task() sched |
|---|
| 6405 | | - * class method both need to have an up-to-date |
|---|
| 6406 | | - * value of rq->clock[_task] |
|---|
| 6407 | | - */ |
|---|
| 6408 | 7670 | update_rq_clock(rq); |
|---|
| 6409 | 7671 | |
|---|
| 6410 | | - for (;;) { |
|---|
| 6411 | | - /* |
|---|
| 6412 | | - * There's this thread running, bail when that's the only |
|---|
| 6413 | | - * remaining thread: |
|---|
| 6414 | | - */ |
|---|
| 6415 | | - if (rq->nr_running == 1) |
|---|
| 6416 | | - break; |
|---|
| 6417 | | - |
|---|
| 6418 | | - /* |
|---|
| 6419 | | - * pick_next_task() assumes pinned rq->lock: |
|---|
| 6420 | | - */ |
|---|
| 6421 | | - next = pick_next_task(rq, &fake_task, rf); |
|---|
| 6422 | | - BUG_ON(!next); |
|---|
| 6423 | | - put_prev_task(rq, next); |
|---|
| 6424 | | - |
|---|
| 6425 | | - WARN_ON_ONCE(__migrate_disabled(next)); |
|---|
| 6426 | | - |
|---|
| 6427 | | - /* |
|---|
| 6428 | | - * Rules for changing task_struct::cpus_mask are holding |
|---|
| 6429 | | - * both pi_lock and rq->lock, such that holding either |
|---|
| 6430 | | - * stabilizes the mask. |
|---|
| 6431 | | - * |
|---|
| 6432 | | - * Drop rq->lock is not quite as disastrous as it usually is |
|---|
| 6433 | | - * because !cpu_active at this point, which means load-balance |
|---|
| 6434 | | - * will not interfere. Also, stop-machine. |
|---|
| 6435 | | - */ |
|---|
| 6436 | | - rq_unlock(rq, rf); |
|---|
| 6437 | | - raw_spin_lock(&next->pi_lock); |
|---|
| 6438 | | - rq_relock(rq, rf); |
|---|
| 6439 | | - |
|---|
| 6440 | | - /* |
|---|
| 6441 | | - * Since we're inside stop-machine, _nothing_ should have |
|---|
| 6442 | | - * changed the task, WARN if weird stuff happened, because in |
|---|
| 6443 | | - * that case the above rq->lock drop is a fail too. |
|---|
| 6444 | | - */ |
|---|
| 6445 | | - if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) { |
|---|
| 6446 | | - raw_spin_unlock(&next->pi_lock); |
|---|
| 6447 | | - continue; |
|---|
| 6448 | | - } |
|---|
| 6449 | | - |
|---|
| 6450 | | - /* Find suitable destination for @next, with force if needed. */ |
|---|
| 6451 | | - dest_cpu = select_fallback_rq(dead_rq->cpu, next); |
|---|
| 6452 | | - rq = __migrate_task(rq, rf, next, dest_cpu); |
|---|
| 6453 | | - if (rq != dead_rq) { |
|---|
| 6454 | | - rq_unlock(rq, rf); |
|---|
| 6455 | | - rq = dead_rq; |
|---|
| 6456 | | - *rf = orf; |
|---|
| 6457 | | - rq_relock(rq, rf); |
|---|
| 6458 | | - } |
|---|
| 6459 | | - raw_spin_unlock(&next->pi_lock); |
|---|
| 7672 | + if (task_rq(p) == rq && task_on_rq_queued(p)) { |
|---|
| 7673 | + cpu = select_fallback_rq(rq->cpu, p); |
|---|
| 7674 | + rq = __migrate_task(rq, &rf, p, cpu); |
|---|
| 6460 | 7675 | } |
|---|
| 6461 | 7676 | |
|---|
| 6462 | | - rq->stop = stop; |
|---|
| 7677 | + rq_unlock(rq, &rf); |
|---|
| 7678 | + raw_spin_unlock_irq(&p->pi_lock); |
|---|
| 7679 | + |
|---|
| 7680 | + put_task_struct(p); |
|---|
| 7681 | + |
|---|
| 7682 | + return 0; |
|---|
| 6463 | 7683 | } |
|---|
| 7684 | + |
|---|
| 7685 | +static DEFINE_PER_CPU(struct cpu_stop_work, push_work); |
|---|
| 7686 | + |
|---|
| 7687 | +/* |
|---|
| 7688 | + * Ensure we only run per-cpu kthreads once the CPU goes !active. |
|---|
| 7689 | + */ |
|---|
| 7690 | + |
|---|
| 7691 | + |
|---|
| 7692 | +static void balance_push(struct rq *rq) |
|---|
| 7693 | +{ |
|---|
| 7694 | + struct task_struct *push_task = rq->curr; |
|---|
| 7695 | + |
|---|
| 7696 | + lockdep_assert_held(&rq->lock); |
|---|
| 7697 | + SCHED_WARN_ON(rq->cpu != smp_processor_id()); |
|---|
| 7698 | + |
|---|
| 7699 | + /* |
|---|
| 7700 | + * Both the cpu-hotplug and stop task are in this case and are |
|---|
| 7701 | + * required to complete the hotplug process. |
|---|
| 7702 | + */ |
|---|
| 7703 | + if (is_per_cpu_kthread(push_task) || is_migration_disabled(push_task)) { |
|---|
| 7704 | + /* |
|---|
| 7705 | + * If this is the idle task on the outgoing CPU try to wake |
|---|
| 7706 | + * up the hotplug control thread which might wait for the |
|---|
| 7707 | + * last task to vanish. The rcuwait_active() check is |
|---|
| 7708 | + * accurate here because the waiter is pinned on this CPU |
|---|
| 7709 | + * and can't obviously be running in parallel. |
|---|
| 7710 | + * |
|---|
| 7711 | + * On RT kernels this also has to check whether there are |
|---|
| 7712 | + * pinned and scheduled out tasks on the runqueue. They |
|---|
| 7713 | + * need to leave the migrate disabled section first. |
|---|
| 7714 | + */ |
|---|
| 7715 | + if (!rq->nr_running && !rq_has_pinned_tasks(rq) && |
|---|
| 7716 | + rcuwait_active(&rq->hotplug_wait)) { |
|---|
| 7717 | + raw_spin_unlock(&rq->lock); |
|---|
| 7718 | + rcuwait_wake_up(&rq->hotplug_wait); |
|---|
| 7719 | + raw_spin_lock(&rq->lock); |
|---|
| 7720 | + } |
|---|
| 7721 | + return; |
|---|
| 7722 | + } |
|---|
| 7723 | + |
|---|
| 7724 | + get_task_struct(push_task); |
|---|
| 7725 | + /* |
|---|
| 7726 | + * Temporarily drop rq->lock such that we can wake-up the stop task. |
|---|
| 7727 | + * Both preemption and IRQs are still disabled. |
|---|
| 7728 | + */ |
|---|
| 7729 | + raw_spin_unlock(&rq->lock); |
|---|
| 7730 | + stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task, |
|---|
| 7731 | + this_cpu_ptr(&push_work)); |
|---|
| 7732 | + /* |
|---|
| 7733 | + * At this point need_resched() is true and we'll take the loop in |
|---|
| 7734 | + * schedule(). The next pick is obviously going to be the stop task |
|---|
| 7735 | + * which is_per_cpu_kthread() and will push this task away. |
|---|
| 7736 | + */ |
|---|
| 7737 | + raw_spin_lock(&rq->lock); |
|---|
| 7738 | +} |
|---|
| 7739 | + |
|---|
| 7740 | +static void balance_push_set(int cpu, bool on) |
|---|
| 7741 | +{ |
|---|
| 7742 | + struct rq *rq = cpu_rq(cpu); |
|---|
| 7743 | + struct rq_flags rf; |
|---|
| 7744 | + |
|---|
| 7745 | + rq_lock_irqsave(rq, &rf); |
|---|
| 7746 | + if (on) |
|---|
| 7747 | + rq->balance_flags |= BALANCE_PUSH; |
|---|
| 7748 | + else |
|---|
| 7749 | + rq->balance_flags &= ~BALANCE_PUSH; |
|---|
| 7750 | + rq_unlock_irqrestore(rq, &rf); |
|---|
| 7751 | +} |
|---|
| 7752 | + |
|---|
| 7753 | +/* |
|---|
| 7754 | + * Invoked from a CPUs hotplug control thread after the CPU has been marked |
|---|
| 7755 | + * inactive. All tasks which are not per CPU kernel threads are either |
|---|
| 7756 | + * pushed off this CPU now via balance_push() or placed on a different CPU |
|---|
| 7757 | + * during wakeup. Wait until the CPU is quiescent. |
|---|
| 7758 | + */ |
|---|
| 7759 | +static void balance_hotplug_wait(void) |
|---|
| 7760 | +{ |
|---|
| 7761 | + struct rq *rq = this_rq(); |
|---|
| 7762 | + |
|---|
| 7763 | + rcuwait_wait_event(&rq->hotplug_wait, |
|---|
| 7764 | + rq->nr_running == 1 && !rq_has_pinned_tasks(rq), |
|---|
| 7765 | + TASK_UNINTERRUPTIBLE); |
|---|
| 7766 | +} |
|---|
| 7767 | + |
|---|
| 7768 | +static int drain_rq_cpu_stop(void *data) |
|---|
| 7769 | +{ |
|---|
| 7770 | +#ifndef CONFIG_PREEMPT_RT |
|---|
| 7771 | + struct rq *rq = this_rq(); |
|---|
| 7772 | + struct rq_flags rf; |
|---|
| 7773 | + |
|---|
| 7774 | + rq_lock_irqsave(rq, &rf); |
|---|
| 7775 | + migrate_tasks(rq, &rf, false); |
|---|
| 7776 | + rq_unlock_irqrestore(rq, &rf); |
|---|
| 7777 | +#endif |
|---|
| 7778 | + return 0; |
|---|
| 7779 | +} |
|---|
| 7780 | + |
|---|
| 7781 | +int sched_cpu_drain_rq(unsigned int cpu) |
|---|
| 7782 | +{ |
|---|
| 7783 | + struct cpu_stop_work *rq_drain = &(cpu_rq(cpu)->drain); |
|---|
| 7784 | + struct cpu_stop_done *rq_drain_done = &(cpu_rq(cpu)->drain_done); |
|---|
| 7785 | + |
|---|
| 7786 | + if (idle_cpu(cpu)) { |
|---|
| 7787 | + rq_drain->done = NULL; |
|---|
| 7788 | + return 0; |
|---|
| 7789 | + } |
|---|
| 7790 | + |
|---|
| 7791 | + return stop_one_cpu_async(cpu, drain_rq_cpu_stop, NULL, rq_drain, |
|---|
| 7792 | + rq_drain_done); |
|---|
| 7793 | +} |
|---|
| 7794 | + |
|---|
| 7795 | +void sched_cpu_drain_rq_wait(unsigned int cpu) |
|---|
| 7796 | +{ |
|---|
| 7797 | + struct cpu_stop_work *rq_drain = &(cpu_rq(cpu)->drain); |
|---|
| 7798 | + |
|---|
| 7799 | + if (rq_drain->done) |
|---|
| 7800 | + cpu_stop_work_wait(rq_drain); |
|---|
| 7801 | +} |
|---|
| 7802 | + |
|---|
| 7803 | +#else |
|---|
| 7804 | + |
|---|
| 7805 | +static inline void balance_push(struct rq *rq) |
|---|
| 7806 | +{ |
|---|
| 7807 | +} |
|---|
| 7808 | + |
|---|
| 7809 | +static inline void balance_push_set(int cpu, bool on) |
|---|
| 7810 | +{ |
|---|
| 7811 | +} |
|---|
| 7812 | + |
|---|
| 7813 | +static inline void balance_hotplug_wait(void) |
|---|
| 7814 | +{ |
|---|
| 7815 | +} |
|---|
| 7816 | + |
|---|
| 6464 | 7817 | #endif /* CONFIG_HOTPLUG_CPU */ |
|---|
| 6465 | 7818 | |
|---|
| 6466 | 7819 | void set_rq_online(struct rq *rq) |
|---|
| .. | .. |
|---|
| 6531 | 7884 | static int cpuset_cpu_inactive(unsigned int cpu) |
|---|
| 6532 | 7885 | { |
|---|
| 6533 | 7886 | if (!cpuhp_tasks_frozen) { |
|---|
| 6534 | | - if (dl_cpu_busy(cpu)) |
|---|
| 6535 | | - return -EBUSY; |
|---|
| 7887 | + int ret = dl_cpu_busy(cpu, NULL); |
|---|
| 7888 | + |
|---|
| 7889 | + if (ret) |
|---|
| 7890 | + return ret; |
|---|
| 6536 | 7891 | cpuset_update_active_cpus(); |
|---|
| 6537 | 7892 | } else { |
|---|
| 6538 | 7893 | num_cpus_frozen++; |
|---|
| .. | .. |
|---|
| 6545 | 7900 | { |
|---|
| 6546 | 7901 | struct rq *rq = cpu_rq(cpu); |
|---|
| 6547 | 7902 | struct rq_flags rf; |
|---|
| 7903 | + |
|---|
| 7904 | + balance_push_set(cpu, false); |
|---|
| 6548 | 7905 | |
|---|
| 6549 | 7906 | #ifdef CONFIG_SCHED_SMT |
|---|
| 6550 | 7907 | /* |
|---|
| .. | .. |
|---|
| 6581 | 7938 | return 0; |
|---|
| 6582 | 7939 | } |
|---|
| 6583 | 7940 | |
|---|
| 6584 | | -int sched_cpu_deactivate(unsigned int cpu) |
|---|
| 7941 | +int sched_cpus_activate(struct cpumask *cpus) |
|---|
| 6585 | 7942 | { |
|---|
| 7943 | + unsigned int cpu; |
|---|
| 7944 | + |
|---|
| 7945 | + for_each_cpu(cpu, cpus) { |
|---|
| 7946 | + if (sched_cpu_activate(cpu)) { |
|---|
| 7947 | + for_each_cpu_and(cpu, cpus, cpu_active_mask) |
|---|
| 7948 | + sched_cpu_deactivate(cpu); |
|---|
| 7949 | + |
|---|
| 7950 | + return -EBUSY; |
|---|
| 7951 | + } |
|---|
| 7952 | + } |
|---|
| 7953 | + |
|---|
| 7954 | + return 0; |
|---|
| 7955 | +} |
|---|
| 7956 | + |
|---|
| 7957 | +int _sched_cpu_deactivate(unsigned int cpu) |
|---|
| 7958 | +{ |
|---|
| 7959 | + struct rq *rq = cpu_rq(cpu); |
|---|
| 7960 | + struct rq_flags rf; |
|---|
| 6586 | 7961 | int ret; |
|---|
| 6587 | 7962 | |
|---|
| 6588 | 7963 | set_cpu_active(cpu, false); |
|---|
| 6589 | | - /* |
|---|
| 6590 | | - * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU |
|---|
| 6591 | | - * users of this state to go away such that all new such users will |
|---|
| 6592 | | - * observe it. |
|---|
| 6593 | | - * |
|---|
| 6594 | | - * Do sync before park smpboot threads to take care the rcu boost case. |
|---|
| 6595 | | - */ |
|---|
| 6596 | | - synchronize_rcu_mult(call_rcu, call_rcu_sched); |
|---|
| 7964 | + |
|---|
| 7965 | + balance_push_set(cpu, true); |
|---|
| 7966 | + |
|---|
| 7967 | + rq_lock_irqsave(rq, &rf); |
|---|
| 7968 | + if (rq->rd) { |
|---|
| 7969 | + update_rq_clock(rq); |
|---|
| 7970 | + BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
|---|
| 7971 | + set_rq_offline(rq); |
|---|
| 7972 | + } |
|---|
| 7973 | + rq_unlock_irqrestore(rq, &rf); |
|---|
| 6597 | 7974 | |
|---|
| 6598 | 7975 | #ifdef CONFIG_SCHED_SMT |
|---|
| 6599 | 7976 | /* |
|---|
| .. | .. |
|---|
| 6608 | 7985 | |
|---|
| 6609 | 7986 | ret = cpuset_cpu_inactive(cpu); |
|---|
| 6610 | 7987 | if (ret) { |
|---|
| 7988 | + balance_push_set(cpu, false); |
|---|
| 6611 | 7989 | set_cpu_active(cpu, true); |
|---|
| 6612 | 7990 | return ret; |
|---|
| 6613 | 7991 | } |
|---|
| 6614 | 7992 | sched_domains_numa_masks_clear(cpu); |
|---|
| 7993 | + |
|---|
| 7994 | + update_max_interval(); |
|---|
| 7995 | + |
|---|
| 7996 | + return 0; |
|---|
| 7997 | +} |
|---|
| 7998 | + |
|---|
| 7999 | +int sched_cpu_deactivate(unsigned int cpu) |
|---|
| 8000 | +{ |
|---|
| 8001 | + int ret = _sched_cpu_deactivate(cpu); |
|---|
| 8002 | + |
|---|
| 8003 | + if (ret) |
|---|
| 8004 | + return ret; |
|---|
| 8005 | + |
|---|
| 8006 | + /* |
|---|
| 8007 | + * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU |
|---|
| 8008 | + * users of this state to go away such that all new such users will |
|---|
| 8009 | + * observe it. |
|---|
| 8010 | + * |
|---|
| 8011 | + * Do sync before park smpboot threads to take care the rcu boost case. |
|---|
| 8012 | + */ |
|---|
| 8013 | + synchronize_rcu(); |
|---|
| 8014 | + |
|---|
| 8015 | + return 0; |
|---|
| 8016 | +} |
|---|
| 8017 | + |
|---|
| 8018 | +int sched_cpus_deactivate_nosync(struct cpumask *cpus) |
|---|
| 8019 | +{ |
|---|
| 8020 | + unsigned int cpu; |
|---|
| 8021 | + |
|---|
| 8022 | + for_each_cpu(cpu, cpus) { |
|---|
| 8023 | + if (_sched_cpu_deactivate(cpu)) { |
|---|
| 8024 | + for_each_cpu(cpu, cpus) { |
|---|
| 8025 | + if (!cpu_active(cpu)) |
|---|
| 8026 | + sched_cpu_activate(cpu); |
|---|
| 8027 | + } |
|---|
| 8028 | + |
|---|
| 8029 | + return -EBUSY; |
|---|
| 8030 | + } |
|---|
| 8031 | + } |
|---|
| 8032 | + |
|---|
| 6615 | 8033 | return 0; |
|---|
| 6616 | 8034 | } |
|---|
| 6617 | 8035 | |
|---|
| .. | .. |
|---|
| 6620 | 8038 | struct rq *rq = cpu_rq(cpu); |
|---|
| 6621 | 8039 | |
|---|
| 6622 | 8040 | rq->calc_load_update = calc_load_update; |
|---|
| 6623 | | - update_max_interval(); |
|---|
| 6624 | 8041 | } |
|---|
| 6625 | 8042 | |
|---|
| 6626 | 8043 | int sched_cpu_starting(unsigned int cpu) |
|---|
| 6627 | 8044 | { |
|---|
| 6628 | 8045 | sched_rq_cpu_starting(cpu); |
|---|
| 6629 | 8046 | sched_tick_start(cpu); |
|---|
| 8047 | + trace_android_rvh_sched_cpu_starting(cpu); |
|---|
| 6630 | 8048 | return 0; |
|---|
| 6631 | 8049 | } |
|---|
| 6632 | 8050 | |
|---|
| 6633 | 8051 | #ifdef CONFIG_HOTPLUG_CPU |
|---|
| 8052 | + |
|---|
| 8053 | +/* |
|---|
| 8054 | + * Invoked immediately before the stopper thread is invoked to bring the |
|---|
| 8055 | + * CPU down completely. At this point all per CPU kthreads except the |
|---|
| 8056 | + * hotplug thread (current) and the stopper thread (inactive) have been |
|---|
| 8057 | + * either parked or have been unbound from the outgoing CPU. Ensure that |
|---|
| 8058 | + * any of those which might be on the way out are gone. |
|---|
| 8059 | + * |
|---|
| 8060 | + * If after this point a bound task is being woken on this CPU then the |
|---|
| 8061 | + * responsible hotplug callback has failed to do it's job. |
|---|
| 8062 | + * sched_cpu_dying() will catch it with the appropriate fireworks. |
|---|
| 8063 | + */ |
|---|
| 8064 | +int sched_cpu_wait_empty(unsigned int cpu) |
|---|
| 8065 | +{ |
|---|
| 8066 | + balance_hotplug_wait(); |
|---|
| 8067 | + return 0; |
|---|
| 8068 | +} |
|---|
| 8069 | + |
|---|
| 8070 | +/* |
|---|
| 8071 | + * Since this CPU is going 'away' for a while, fold any nr_active delta we |
|---|
| 8072 | + * might have. Called from the CPU stopper task after ensuring that the |
|---|
| 8073 | + * stopper is the last running task on the CPU, so nr_active count is |
|---|
| 8074 | + * stable. We need to take the teardown thread which is calling this into |
|---|
| 8075 | + * account, so we hand in adjust = 1 to the load calculation. |
|---|
| 8076 | + * |
|---|
| 8077 | + * Also see the comment "Global load-average calculations". |
|---|
| 8078 | + */ |
|---|
| 8079 | +static void calc_load_migrate(struct rq *rq) |
|---|
| 8080 | +{ |
|---|
| 8081 | + long delta = calc_load_fold_active(rq, 1); |
|---|
| 8082 | + |
|---|
| 8083 | + if (delta) |
|---|
| 8084 | + atomic_long_add(delta, &calc_load_tasks); |
|---|
| 8085 | +} |
|---|
| 8086 | + |
|---|
| 6634 | 8087 | int sched_cpu_dying(unsigned int cpu) |
|---|
| 6635 | 8088 | { |
|---|
| 6636 | 8089 | struct rq *rq = cpu_rq(cpu); |
|---|
| 6637 | 8090 | struct rq_flags rf; |
|---|
| 6638 | 8091 | |
|---|
| 6639 | 8092 | /* Handle pending wakeups and then migrate everything off */ |
|---|
| 6640 | | - sched_ttwu_pending(); |
|---|
| 6641 | 8093 | sched_tick_stop(cpu); |
|---|
| 6642 | 8094 | |
|---|
| 6643 | 8095 | rq_lock_irqsave(rq, &rf); |
|---|
| 6644 | | - if (rq->rd) { |
|---|
| 6645 | | - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
|---|
| 6646 | | - set_rq_offline(rq); |
|---|
| 6647 | | - } |
|---|
| 6648 | | - migrate_tasks(rq, &rf); |
|---|
| 6649 | | - BUG_ON(rq->nr_running != 1); |
|---|
| 8096 | + BUG_ON(rq->nr_running != 1 || rq_has_pinned_tasks(rq)); |
|---|
| 6650 | 8097 | rq_unlock_irqrestore(rq, &rf); |
|---|
| 6651 | 8098 | |
|---|
| 8099 | + trace_android_rvh_sched_cpu_dying(cpu); |
|---|
| 8100 | + |
|---|
| 6652 | 8101 | calc_load_migrate(rq); |
|---|
| 6653 | | - update_max_interval(); |
|---|
| 6654 | 8102 | nohz_balance_exit_idle(rq); |
|---|
| 6655 | 8103 | hrtick_clear(rq); |
|---|
| 6656 | 8104 | return 0; |
|---|
| .. | .. |
|---|
| 6664 | 8112 | /* |
|---|
| 6665 | 8113 | * There's no userspace yet to cause hotplug operations; hence all the |
|---|
| 6666 | 8114 | * CPU masks are stable and all blatant races in the below code cannot |
|---|
| 6667 | | - * happen. The hotplug lock is nevertheless taken to satisfy lockdep, |
|---|
| 6668 | | - * but there won't be any contention on it. |
|---|
| 8115 | + * happen. |
|---|
| 6669 | 8116 | */ |
|---|
| 6670 | | - cpus_read_lock(); |
|---|
| 6671 | 8117 | mutex_lock(&sched_domains_mutex); |
|---|
| 6672 | 8118 | sched_init_domains(cpu_active_mask); |
|---|
| 6673 | 8119 | mutex_unlock(&sched_domains_mutex); |
|---|
| 6674 | | - cpus_read_unlock(); |
|---|
| 6675 | 8120 | |
|---|
| 6676 | 8121 | /* Move init over to a non-isolated CPU */ |
|---|
| 6677 | 8122 | if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) |
|---|
| 6678 | 8123 | BUG(); |
|---|
| 8124 | + |
|---|
| 6679 | 8125 | sched_init_granularity(); |
|---|
| 6680 | 8126 | |
|---|
| 6681 | 8127 | init_sched_rt_class(); |
|---|
| .. | .. |
|---|
| 6686 | 8132 | |
|---|
| 6687 | 8133 | static int __init migration_init(void) |
|---|
| 6688 | 8134 | { |
|---|
| 6689 | | - sched_rq_cpu_starting(smp_processor_id()); |
|---|
| 8135 | + sched_cpu_starting(smp_processor_id()); |
|---|
| 6690 | 8136 | return 0; |
|---|
| 6691 | 8137 | } |
|---|
| 6692 | 8138 | early_initcall(migration_init); |
|---|
| .. | .. |
|---|
| 6711 | 8157 | * Every task in system belongs to this group at bootup. |
|---|
| 6712 | 8158 | */ |
|---|
| 6713 | 8159 | struct task_group root_task_group; |
|---|
| 8160 | +EXPORT_SYMBOL_GPL(root_task_group); |
|---|
| 6714 | 8161 | LIST_HEAD(task_groups); |
|---|
| 8162 | +EXPORT_SYMBOL_GPL(task_groups); |
|---|
| 6715 | 8163 | |
|---|
| 6716 | 8164 | /* Cacheline aligned slab cache for task_group */ |
|---|
| 6717 | 8165 | static struct kmem_cache *task_group_cache __read_mostly; |
|---|
| .. | .. |
|---|
| 6722 | 8170 | |
|---|
| 6723 | 8171 | void __init sched_init(void) |
|---|
| 6724 | 8172 | { |
|---|
| 6725 | | - int i, j; |
|---|
| 6726 | | - unsigned long alloc_size = 0, ptr; |
|---|
| 8173 | + unsigned long ptr = 0; |
|---|
| 8174 | + int i; |
|---|
| 8175 | + |
|---|
| 8176 | + /* Make sure the linker didn't screw up */ |
|---|
| 8177 | + BUG_ON(&idle_sched_class + 1 != &fair_sched_class || |
|---|
| 8178 | + &fair_sched_class + 1 != &rt_sched_class || |
|---|
| 8179 | + &rt_sched_class + 1 != &dl_sched_class); |
|---|
| 8180 | +#ifdef CONFIG_SMP |
|---|
| 8181 | + BUG_ON(&dl_sched_class + 1 != &stop_sched_class); |
|---|
| 8182 | +#endif |
|---|
| 6727 | 8183 | |
|---|
| 6728 | 8184 | wait_bit_init(); |
|---|
| 6729 | 8185 | |
|---|
| 6730 | 8186 | #ifdef CONFIG_FAIR_GROUP_SCHED |
|---|
| 6731 | | - alloc_size += 2 * nr_cpu_ids * sizeof(void **); |
|---|
| 8187 | + ptr += 2 * nr_cpu_ids * sizeof(void **); |
|---|
| 6732 | 8188 | #endif |
|---|
| 6733 | 8189 | #ifdef CONFIG_RT_GROUP_SCHED |
|---|
| 6734 | | - alloc_size += 2 * nr_cpu_ids * sizeof(void **); |
|---|
| 8190 | + ptr += 2 * nr_cpu_ids * sizeof(void **); |
|---|
| 6735 | 8191 | #endif |
|---|
| 6736 | | - if (alloc_size) { |
|---|
| 6737 | | - ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); |
|---|
| 8192 | + if (ptr) { |
|---|
| 8193 | + ptr = (unsigned long)kzalloc(ptr, GFP_NOWAIT); |
|---|
| 6738 | 8194 | |
|---|
| 6739 | 8195 | #ifdef CONFIG_FAIR_GROUP_SCHED |
|---|
| 6740 | 8196 | root_task_group.se = (struct sched_entity **)ptr; |
|---|
| .. | .. |
|---|
| 6743 | 8199 | root_task_group.cfs_rq = (struct cfs_rq **)ptr; |
|---|
| 6744 | 8200 | ptr += nr_cpu_ids * sizeof(void **); |
|---|
| 6745 | 8201 | |
|---|
| 8202 | + root_task_group.shares = ROOT_TASK_GROUP_LOAD; |
|---|
| 8203 | + init_cfs_bandwidth(&root_task_group.cfs_bandwidth); |
|---|
| 6746 | 8204 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
|---|
| 6747 | 8205 | #ifdef CONFIG_RT_GROUP_SCHED |
|---|
| 6748 | 8206 | root_task_group.rt_se = (struct sched_rt_entity **)ptr; |
|---|
| .. | .. |
|---|
| 6795 | 8253 | init_rt_rq(&rq->rt); |
|---|
| 6796 | 8254 | init_dl_rq(&rq->dl); |
|---|
| 6797 | 8255 | #ifdef CONFIG_FAIR_GROUP_SCHED |
|---|
| 6798 | | - root_task_group.shares = ROOT_TASK_GROUP_LOAD; |
|---|
| 6799 | 8256 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); |
|---|
| 6800 | 8257 | rq->tmp_alone_branch = &rq->leaf_cfs_rq_list; |
|---|
| 6801 | 8258 | /* |
|---|
| .. | .. |
|---|
| 6817 | 8274 | * We achieve this by letting root_task_group's tasks sit |
|---|
| 6818 | 8275 | * directly in rq->cfs (i.e root_task_group->se[] = NULL). |
|---|
| 6819 | 8276 | */ |
|---|
| 6820 | | - init_cfs_bandwidth(&root_task_group.cfs_bandwidth); |
|---|
| 6821 | 8277 | init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); |
|---|
| 6822 | 8278 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
|---|
| 6823 | 8279 | |
|---|
| .. | .. |
|---|
| 6825 | 8281 | #ifdef CONFIG_RT_GROUP_SCHED |
|---|
| 6826 | 8282 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); |
|---|
| 6827 | 8283 | #endif |
|---|
| 6828 | | - |
|---|
| 6829 | | - for (j = 0; j < CPU_LOAD_IDX_MAX; j++) |
|---|
| 6830 | | - rq->cpu_load[j] = 0; |
|---|
| 6831 | | - |
|---|
| 6832 | 8284 | #ifdef CONFIG_SMP |
|---|
| 6833 | 8285 | rq->sd = NULL; |
|---|
| 6834 | 8286 | rq->rd = NULL; |
|---|
| .. | .. |
|---|
| 6847 | 8299 | |
|---|
| 6848 | 8300 | rq_attach_root(rq, &def_root_domain); |
|---|
| 6849 | 8301 | #ifdef CONFIG_NO_HZ_COMMON |
|---|
| 6850 | | - rq->last_load_update_tick = jiffies; |
|---|
| 6851 | 8302 | rq->last_blocked_load_update_tick = jiffies; |
|---|
| 6852 | 8303 | atomic_set(&rq->nohz_flags, 0); |
|---|
| 8304 | + |
|---|
| 8305 | + rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func); |
|---|
| 8306 | +#endif |
|---|
| 8307 | +#ifdef CONFIG_HOTPLUG_CPU |
|---|
| 8308 | + rcuwait_init(&rq->hotplug_wait); |
|---|
| 6853 | 8309 | #endif |
|---|
| 6854 | 8310 | #endif /* CONFIG_SMP */ |
|---|
| 6855 | 8311 | hrtick_rq_init(rq); |
|---|
| 6856 | 8312 | atomic_set(&rq->nr_iowait, 0); |
|---|
| 6857 | 8313 | } |
|---|
| 6858 | 8314 | |
|---|
| 6859 | | - set_load_weight(&init_task, false); |
|---|
| 8315 | + set_load_weight(&init_task); |
|---|
| 6860 | 8316 | |
|---|
| 6861 | 8317 | /* |
|---|
| 6862 | 8318 | * The boot idle thread does lazy MMU switching as well: |
|---|
| .. | .. |
|---|
| 6925 | 8381 | rcu_sleep_check(); |
|---|
| 6926 | 8382 | |
|---|
| 6927 | 8383 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && |
|---|
| 6928 | | - !is_idle_task(current)) || |
|---|
| 8384 | + !is_idle_task(current) && !current->non_block_count) || |
|---|
| 6929 | 8385 | system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING || |
|---|
| 6930 | 8386 | oops_in_progress) |
|---|
| 6931 | 8387 | return; |
|---|
| .. | .. |
|---|
| 6941 | 8397 | "BUG: sleeping function called from invalid context at %s:%d\n", |
|---|
| 6942 | 8398 | file, line); |
|---|
| 6943 | 8399 | printk(KERN_ERR |
|---|
| 6944 | | - "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", |
|---|
| 6945 | | - in_atomic(), irqs_disabled(), |
|---|
| 8400 | + "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", |
|---|
| 8401 | + in_atomic(), irqs_disabled(), current->non_block_count, |
|---|
| 6946 | 8402 | current->pid, current->comm); |
|---|
| 6947 | 8403 | |
|---|
| 6948 | 8404 | if (task_stack_end_corrupted(current)) |
|---|
| .. | .. |
|---|
| 6954 | 8410 | if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) |
|---|
| 6955 | 8411 | && !preempt_count_equals(preempt_offset)) { |
|---|
| 6956 | 8412 | pr_err("Preemption disabled at:"); |
|---|
| 6957 | | - print_ip_sym(preempt_disable_ip); |
|---|
| 6958 | | - pr_cont("\n"); |
|---|
| 8413 | + print_ip_sym(KERN_ERR, preempt_disable_ip); |
|---|
| 6959 | 8414 | } |
|---|
| 8415 | + |
|---|
| 8416 | + trace_android_rvh_schedule_bug(NULL); |
|---|
| 8417 | + |
|---|
| 6960 | 8418 | dump_stack(); |
|---|
| 6961 | 8419 | add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
|---|
| 6962 | 8420 | } |
|---|
| 6963 | 8421 | EXPORT_SYMBOL(___might_sleep); |
|---|
| 8422 | + |
|---|
| 8423 | +void __cant_sleep(const char *file, int line, int preempt_offset) |
|---|
| 8424 | +{ |
|---|
| 8425 | + static unsigned long prev_jiffy; |
|---|
| 8426 | + |
|---|
| 8427 | + if (irqs_disabled()) |
|---|
| 8428 | + return; |
|---|
| 8429 | + |
|---|
| 8430 | + if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) |
|---|
| 8431 | + return; |
|---|
| 8432 | + |
|---|
| 8433 | + if (preempt_count() > preempt_offset) |
|---|
| 8434 | + return; |
|---|
| 8435 | + |
|---|
| 8436 | + if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) |
|---|
| 8437 | + return; |
|---|
| 8438 | + prev_jiffy = jiffies; |
|---|
| 8439 | + |
|---|
| 8440 | + printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line); |
|---|
| 8441 | + printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", |
|---|
| 8442 | + in_atomic(), irqs_disabled(), |
|---|
| 8443 | + current->pid, current->comm); |
|---|
| 8444 | + |
|---|
| 8445 | + debug_show_held_locks(current); |
|---|
| 8446 | + dump_stack(); |
|---|
| 8447 | + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
|---|
| 8448 | +} |
|---|
| 8449 | +EXPORT_SYMBOL_GPL(__cant_sleep); |
|---|
| 8450 | + |
|---|
| 8451 | +#ifdef CONFIG_SMP |
|---|
| 8452 | +void __cant_migrate(const char *file, int line) |
|---|
| 8453 | +{ |
|---|
| 8454 | + static unsigned long prev_jiffy; |
|---|
| 8455 | + |
|---|
| 8456 | + if (irqs_disabled()) |
|---|
| 8457 | + return; |
|---|
| 8458 | + |
|---|
| 8459 | + if (is_migration_disabled(current)) |
|---|
| 8460 | + return; |
|---|
| 8461 | + |
|---|
| 8462 | + if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) |
|---|
| 8463 | + return; |
|---|
| 8464 | + |
|---|
| 8465 | + if (preempt_count() > 0) |
|---|
| 8466 | + return; |
|---|
| 8467 | + |
|---|
| 8468 | + if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) |
|---|
| 8469 | + return; |
|---|
| 8470 | + prev_jiffy = jiffies; |
|---|
| 8471 | + |
|---|
| 8472 | + pr_err("BUG: assuming non migratable context at %s:%d\n", file, line); |
|---|
| 8473 | + pr_err("in_atomic(): %d, irqs_disabled(): %d, migration_disabled() %u pid: %d, name: %s\n", |
|---|
| 8474 | + in_atomic(), irqs_disabled(), is_migration_disabled(current), |
|---|
| 8475 | + current->pid, current->comm); |
|---|
| 8476 | + |
|---|
| 8477 | + debug_show_held_locks(current); |
|---|
| 8478 | + dump_stack(); |
|---|
| 8479 | + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
|---|
| 8480 | +} |
|---|
| 8481 | +EXPORT_SYMBOL_GPL(__cant_migrate); |
|---|
| 8482 | +#endif |
|---|
| 6964 | 8483 | #endif |
|---|
| 6965 | 8484 | |
|---|
| 6966 | 8485 | #ifdef CONFIG_MAGIC_SYSRQ |
|---|
| .. | .. |
|---|
| 7029 | 8548 | |
|---|
| 7030 | 8549 | #ifdef CONFIG_IA64 |
|---|
| 7031 | 8550 | /** |
|---|
| 7032 | | - * set_curr_task - set the current task for a given CPU. |
|---|
| 8551 | + * ia64_set_curr_task - set the current task for a given CPU. |
|---|
| 7033 | 8552 | * @cpu: the processor in question. |
|---|
| 7034 | 8553 | * @p: the task pointer to set. |
|---|
| 7035 | 8554 | * |
|---|
| .. | .. |
|---|
| 7195 | 8714 | |
|---|
| 7196 | 8715 | if (queued) |
|---|
| 7197 | 8716 | enqueue_task(rq, tsk, queue_flags); |
|---|
| 7198 | | - if (running) |
|---|
| 7199 | | - set_curr_task(rq, tsk); |
|---|
| 8717 | + if (running) { |
|---|
| 8718 | + set_next_task(rq, tsk); |
|---|
| 8719 | + /* |
|---|
| 8720 | + * After changing group, the running task may have joined a |
|---|
| 8721 | + * throttled one but it's still the running task. Trigger a |
|---|
| 8722 | + * resched to make sure that task can still run. |
|---|
| 8723 | + */ |
|---|
| 8724 | + resched_curr(rq); |
|---|
| 8725 | + } |
|---|
| 7200 | 8726 | |
|---|
| 7201 | 8727 | task_rq_unlock(rq, tsk, &rf); |
|---|
| 7202 | 8728 | } |
|---|
| .. | .. |
|---|
| 7235 | 8761 | |
|---|
| 7236 | 8762 | #ifdef CONFIG_UCLAMP_TASK_GROUP |
|---|
| 7237 | 8763 | /* Propagate the effective uclamp value for the new group */ |
|---|
| 8764 | + mutex_lock(&uclamp_mutex); |
|---|
| 8765 | + rcu_read_lock(); |
|---|
| 7238 | 8766 | cpu_util_update_eff(css); |
|---|
| 8767 | + rcu_read_unlock(); |
|---|
| 8768 | + mutex_unlock(&uclamp_mutex); |
|---|
| 7239 | 8769 | #endif |
|---|
| 7240 | 8770 | |
|---|
| 8771 | + trace_android_rvh_cpu_cgroup_online(css); |
|---|
| 7241 | 8772 | return 0; |
|---|
| 7242 | 8773 | } |
|---|
| 7243 | 8774 | |
|---|
| .. | .. |
|---|
| 7303 | 8834 | if (ret) |
|---|
| 7304 | 8835 | break; |
|---|
| 7305 | 8836 | } |
|---|
| 8837 | + |
|---|
| 8838 | + trace_android_rvh_cpu_cgroup_can_attach(tset, &ret); |
|---|
| 8839 | + |
|---|
| 7306 | 8840 | return ret; |
|---|
| 7307 | 8841 | } |
|---|
| 7308 | 8842 | |
|---|
| .. | .. |
|---|
| 7313 | 8847 | |
|---|
| 7314 | 8848 | cgroup_taskset_for_each(task, css, tset) |
|---|
| 7315 | 8849 | sched_move_task(task); |
|---|
| 8850 | + |
|---|
| 8851 | + trace_android_rvh_cpu_cgroup_attach(tset); |
|---|
| 7316 | 8852 | } |
|---|
| 7317 | 8853 | |
|---|
| 7318 | 8854 | #ifdef CONFIG_UCLAMP_TASK_GROUP |
|---|
| .. | .. |
|---|
| 7324 | 8860 | unsigned int eff[UCLAMP_CNT]; |
|---|
| 7325 | 8861 | enum uclamp_id clamp_id; |
|---|
| 7326 | 8862 | unsigned int clamps; |
|---|
| 8863 | + |
|---|
| 8864 | + lockdep_assert_held(&uclamp_mutex); |
|---|
| 8865 | + SCHED_WARN_ON(!rcu_read_lock_held()); |
|---|
| 7327 | 8866 | |
|---|
| 7328 | 8867 | css_for_each_descendant_pre(css, top_css) { |
|---|
| 7329 | 8868 | uc_parent = css_tg(css)->parent |
|---|
| .. | .. |
|---|
| 7357 | 8896 | } |
|---|
| 7358 | 8897 | |
|---|
| 7359 | 8898 | /* Immediately update descendants RUNNABLE tasks */ |
|---|
| 7360 | | - uclamp_update_active_tasks(css, clamps); |
|---|
| 8899 | + uclamp_update_active_tasks(css); |
|---|
| 7361 | 8900 | } |
|---|
| 7362 | 8901 | } |
|---|
| 7363 | 8902 | |
|---|
| .. | .. |
|---|
| 7414 | 8953 | req = capacity_from_percent(buf); |
|---|
| 7415 | 8954 | if (req.ret) |
|---|
| 7416 | 8955 | return req.ret; |
|---|
| 8956 | + |
|---|
| 8957 | + static_branch_enable(&sched_uclamp_used); |
|---|
| 7417 | 8958 | |
|---|
| 7418 | 8959 | mutex_lock(&uclamp_mutex); |
|---|
| 7419 | 8960 | rcu_read_lock(); |
|---|
| .. | .. |
|---|
| 7529 | 9070 | static DEFINE_MUTEX(cfs_constraints_mutex); |
|---|
| 7530 | 9071 | |
|---|
| 7531 | 9072 | const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ |
|---|
| 7532 | | -const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ |
|---|
| 9073 | +static const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ |
|---|
| 9074 | +/* More than 203 days if BW_SHIFT equals 20. */ |
|---|
| 9075 | +static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC; |
|---|
| 7533 | 9076 | |
|---|
| 7534 | 9077 | static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); |
|---|
| 7535 | 9078 | |
|---|
| .. | .. |
|---|
| 7555 | 9098 | * feasibility. |
|---|
| 7556 | 9099 | */ |
|---|
| 7557 | 9100 | if (period > max_cfs_quota_period) |
|---|
| 9101 | + return -EINVAL; |
|---|
| 9102 | + |
|---|
| 9103 | + /* |
|---|
| 9104 | + * Bound quota to defend quota against overflow during bandwidth shift. |
|---|
| 9105 | + */ |
|---|
| 9106 | + if (quota != RUNTIME_INF && quota > max_cfs_runtime) |
|---|
| 7558 | 9107 | return -EINVAL; |
|---|
| 7559 | 9108 | |
|---|
| 7560 | 9109 | /* |
|---|
| .. | .. |
|---|
| 7609 | 9158 | return ret; |
|---|
| 7610 | 9159 | } |
|---|
| 7611 | 9160 | |
|---|
| 7612 | | -int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) |
|---|
| 9161 | +static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) |
|---|
| 7613 | 9162 | { |
|---|
| 7614 | 9163 | u64 quota, period; |
|---|
| 7615 | 9164 | |
|---|
| .. | .. |
|---|
| 7624 | 9173 | return tg_set_cfs_bandwidth(tg, period, quota); |
|---|
| 7625 | 9174 | } |
|---|
| 7626 | 9175 | |
|---|
| 7627 | | -long tg_get_cfs_quota(struct task_group *tg) |
|---|
| 9176 | +static long tg_get_cfs_quota(struct task_group *tg) |
|---|
| 7628 | 9177 | { |
|---|
| 7629 | 9178 | u64 quota_us; |
|---|
| 7630 | 9179 | |
|---|
| .. | .. |
|---|
| 7637 | 9186 | return quota_us; |
|---|
| 7638 | 9187 | } |
|---|
| 7639 | 9188 | |
|---|
| 7640 | | -int tg_set_cfs_period(struct task_group *tg, long cfs_period_us) |
|---|
| 9189 | +static int tg_set_cfs_period(struct task_group *tg, long cfs_period_us) |
|---|
| 7641 | 9190 | { |
|---|
| 7642 | 9191 | u64 quota, period; |
|---|
| 7643 | 9192 | |
|---|
| .. | .. |
|---|
| 7650 | 9199 | return tg_set_cfs_bandwidth(tg, period, quota); |
|---|
| 7651 | 9200 | } |
|---|
| 7652 | 9201 | |
|---|
| 7653 | | -long tg_get_cfs_period(struct task_group *tg) |
|---|
| 9202 | +static long tg_get_cfs_period(struct task_group *tg) |
|---|
| 7654 | 9203 | { |
|---|
| 7655 | 9204 | u64 cfs_period_us; |
|---|
| 7656 | 9205 | |
|---|
| .. | .. |
|---|
| 8127 | 9676 | /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, |
|---|
| 8128 | 9677 | }; |
|---|
| 8129 | 9678 | |
|---|
| 8130 | | -#undef CREATE_TRACE_POINTS |
|---|
| 8131 | | - |
|---|
| 8132 | | -#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) |
|---|
| 8133 | | - |
|---|
| 8134 | | -static inline void |
|---|
| 8135 | | -update_nr_migratory(struct task_struct *p, long delta) |
|---|
| 9679 | +void call_trace_sched_update_nr_running(struct rq *rq, int count) |
|---|
| 8136 | 9680 | { |
|---|
| 8137 | | - if (unlikely((p->sched_class == &rt_sched_class || |
|---|
| 8138 | | - p->sched_class == &dl_sched_class) && |
|---|
| 8139 | | - p->nr_cpus_allowed > 1)) { |
|---|
| 8140 | | - if (p->sched_class == &rt_sched_class) |
|---|
| 8141 | | - task_rq(p)->rt.rt_nr_migratory += delta; |
|---|
| 8142 | | - else |
|---|
| 8143 | | - task_rq(p)->dl.dl_nr_migratory += delta; |
|---|
| 8144 | | - } |
|---|
| 9681 | + trace_sched_update_nr_running_tp(rq, count); |
|---|
| 8145 | 9682 | } |
|---|
| 8146 | | - |
|---|
| 8147 | | -static inline void |
|---|
| 8148 | | -migrate_disable_update_cpus_allowed(struct task_struct *p) |
|---|
| 8149 | | -{ |
|---|
| 8150 | | - p->cpus_ptr = cpumask_of(smp_processor_id()); |
|---|
| 8151 | | - update_nr_migratory(p, -1); |
|---|
| 8152 | | - p->nr_cpus_allowed = 1; |
|---|
| 8153 | | -} |
|---|
| 8154 | | - |
|---|
| 8155 | | -static inline void |
|---|
| 8156 | | -migrate_enable_update_cpus_allowed(struct task_struct *p) |
|---|
| 8157 | | -{ |
|---|
| 8158 | | - struct rq *rq; |
|---|
| 8159 | | - struct rq_flags rf; |
|---|
| 8160 | | - |
|---|
| 8161 | | - rq = task_rq_lock(p, &rf); |
|---|
| 8162 | | - p->cpus_ptr = &p->cpus_mask; |
|---|
| 8163 | | - p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask); |
|---|
| 8164 | | - update_nr_migratory(p, 1); |
|---|
| 8165 | | - task_rq_unlock(rq, p, &rf); |
|---|
| 8166 | | -} |
|---|
| 8167 | | - |
|---|
| 8168 | | -void migrate_disable(void) |
|---|
| 8169 | | -{ |
|---|
| 8170 | | - preempt_disable(); |
|---|
| 8171 | | - |
|---|
| 8172 | | - if (++current->migrate_disable == 1) { |
|---|
| 8173 | | - this_rq()->nr_pinned++; |
|---|
| 8174 | | - preempt_lazy_disable(); |
|---|
| 8175 | | -#ifdef CONFIG_SCHED_DEBUG |
|---|
| 8176 | | - WARN_ON_ONCE(current->pinned_on_cpu >= 0); |
|---|
| 8177 | | - current->pinned_on_cpu = smp_processor_id(); |
|---|
| 8178 | | -#endif |
|---|
| 8179 | | - } |
|---|
| 8180 | | - |
|---|
| 8181 | | - preempt_enable(); |
|---|
| 8182 | | -} |
|---|
| 8183 | | -EXPORT_SYMBOL(migrate_disable); |
|---|
| 8184 | | - |
|---|
| 8185 | | -static void migrate_disabled_sched(struct task_struct *p) |
|---|
| 8186 | | -{ |
|---|
| 8187 | | - if (p->migrate_disable_scheduled) |
|---|
| 8188 | | - return; |
|---|
| 8189 | | - |
|---|
| 8190 | | - migrate_disable_update_cpus_allowed(p); |
|---|
| 8191 | | - p->migrate_disable_scheduled = 1; |
|---|
| 8192 | | -} |
|---|
| 8193 | | - |
|---|
| 8194 | | -static DEFINE_PER_CPU(struct cpu_stop_work, migrate_work); |
|---|
| 8195 | | -static DEFINE_PER_CPU(struct migration_arg, migrate_arg); |
|---|
| 8196 | | - |
|---|
| 8197 | | -void migrate_enable(void) |
|---|
| 8198 | | -{ |
|---|
| 8199 | | - struct task_struct *p = current; |
|---|
| 8200 | | - struct rq *rq = this_rq(); |
|---|
| 8201 | | - int cpu = task_cpu(p); |
|---|
| 8202 | | - |
|---|
| 8203 | | - WARN_ON_ONCE(p->migrate_disable <= 0); |
|---|
| 8204 | | - if (p->migrate_disable > 1) { |
|---|
| 8205 | | - p->migrate_disable--; |
|---|
| 8206 | | - return; |
|---|
| 8207 | | - } |
|---|
| 8208 | | - |
|---|
| 8209 | | - preempt_disable(); |
|---|
| 8210 | | - |
|---|
| 8211 | | -#ifdef CONFIG_SCHED_DEBUG |
|---|
| 8212 | | - WARN_ON_ONCE(current->pinned_on_cpu != cpu); |
|---|
| 8213 | | - current->pinned_on_cpu = -1; |
|---|
| 8214 | | -#endif |
|---|
| 8215 | | - |
|---|
| 8216 | | - WARN_ON_ONCE(rq->nr_pinned < 1); |
|---|
| 8217 | | - |
|---|
| 8218 | | - p->migrate_disable = 0; |
|---|
| 8219 | | - rq->nr_pinned--; |
|---|
| 8220 | | -#ifdef CONFIG_HOTPLUG_CPU |
|---|
| 8221 | | - if (rq->nr_pinned == 0 && unlikely(!cpu_active(cpu)) && |
|---|
| 8222 | | - takedown_cpu_task) |
|---|
| 8223 | | - wake_up_process(takedown_cpu_task); |
|---|
| 8224 | | -#endif |
|---|
| 8225 | | - |
|---|
| 8226 | | - if (!p->migrate_disable_scheduled) |
|---|
| 8227 | | - goto out; |
|---|
| 8228 | | - |
|---|
| 8229 | | - p->migrate_disable_scheduled = 0; |
|---|
| 8230 | | - |
|---|
| 8231 | | - migrate_enable_update_cpus_allowed(p); |
|---|
| 8232 | | - |
|---|
| 8233 | | - WARN_ON(smp_processor_id() != cpu); |
|---|
| 8234 | | - if (!is_cpu_allowed(p, cpu)) { |
|---|
| 8235 | | - struct migration_arg __percpu *arg; |
|---|
| 8236 | | - struct cpu_stop_work __percpu *work; |
|---|
| 8237 | | - struct rq_flags rf; |
|---|
| 8238 | | - |
|---|
| 8239 | | - work = this_cpu_ptr(&migrate_work); |
|---|
| 8240 | | - arg = this_cpu_ptr(&migrate_arg); |
|---|
| 8241 | | - WARN_ON_ONCE(!arg->done && !work->disabled && work->arg); |
|---|
| 8242 | | - |
|---|
| 8243 | | - arg->task = p; |
|---|
| 8244 | | - arg->done = false; |
|---|
| 8245 | | - |
|---|
| 8246 | | - rq = task_rq_lock(p, &rf); |
|---|
| 8247 | | - update_rq_clock(rq); |
|---|
| 8248 | | - arg->dest_cpu = select_fallback_rq(cpu, p); |
|---|
| 8249 | | - task_rq_unlock(rq, p, &rf); |
|---|
| 8250 | | - |
|---|
| 8251 | | - stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop, |
|---|
| 8252 | | - arg, work); |
|---|
| 8253 | | - tlb_migrate_finish(p->mm); |
|---|
| 8254 | | - } |
|---|
| 8255 | | - |
|---|
| 8256 | | -out: |
|---|
| 8257 | | - preempt_lazy_enable(); |
|---|
| 8258 | | - preempt_enable(); |
|---|
| 8259 | | -} |
|---|
| 8260 | | -EXPORT_SYMBOL(migrate_enable); |
|---|
| 8261 | | - |
|---|
| 8262 | | -int cpu_nr_pinned(int cpu) |
|---|
| 8263 | | -{ |
|---|
| 8264 | | - struct rq *rq = cpu_rq(cpu); |
|---|
| 8265 | | - |
|---|
| 8266 | | - return rq->nr_pinned; |
|---|
| 8267 | | -} |
|---|
| 8268 | | - |
|---|
| 8269 | | -#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) |
|---|
| 8270 | | -static void migrate_disabled_sched(struct task_struct *p) |
|---|
| 8271 | | -{ |
|---|
| 8272 | | -} |
|---|
| 8273 | | - |
|---|
| 8274 | | -void migrate_disable(void) |
|---|
| 8275 | | -{ |
|---|
| 8276 | | -#ifdef CONFIG_SCHED_DEBUG |
|---|
| 8277 | | - current->migrate_disable++; |
|---|
| 8278 | | -#endif |
|---|
| 8279 | | - barrier(); |
|---|
| 8280 | | -} |
|---|
| 8281 | | -EXPORT_SYMBOL(migrate_disable); |
|---|
| 8282 | | - |
|---|
| 8283 | | -void migrate_enable(void) |
|---|
| 8284 | | -{ |
|---|
| 8285 | | -#ifdef CONFIG_SCHED_DEBUG |
|---|
| 8286 | | - struct task_struct *p = current; |
|---|
| 8287 | | - |
|---|
| 8288 | | - WARN_ON_ONCE(p->migrate_disable <= 0); |
|---|
| 8289 | | - p->migrate_disable--; |
|---|
| 8290 | | -#endif |
|---|
| 8291 | | - barrier(); |
|---|
| 8292 | | -} |
|---|
| 8293 | | -EXPORT_SYMBOL(migrate_enable); |
|---|
| 8294 | | -#else |
|---|
| 8295 | | -static void migrate_disabled_sched(struct task_struct *p) |
|---|
| 8296 | | -{ |
|---|
| 8297 | | -} |
|---|
| 8298 | | -#endif |
|---|