.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * kernel/sched/core.c |
---|
3 | 4 | * |
---|
.. | .. |
---|
5 | 6 | * |
---|
6 | 7 | * Copyright (C) 1991-2002 Linus Torvalds |
---|
7 | 8 | */ |
---|
| 9 | +#define CREATE_TRACE_POINTS |
---|
| 10 | +#include <trace/events/sched.h> |
---|
| 11 | +#undef CREATE_TRACE_POINTS |
---|
| 12 | + |
---|
8 | 13 | #include "sched.h" |
---|
9 | 14 | |
---|
10 | 15 | #include <linux/nospec.h> |
---|
.. | .. |
---|
16 | 21 | #include <asm/tlb.h> |
---|
17 | 22 | |
---|
18 | 23 | #include "../workqueue_internal.h" |
---|
| 24 | +#include "../../io_uring/io-wq.h" |
---|
19 | 25 | #include "../smpboot.h" |
---|
20 | 26 | |
---|
21 | 27 | #include "pelt.h" |
---|
| 28 | +#include "smp.h" |
---|
22 | 29 | |
---|
23 | | -#define CREATE_TRACE_POINTS |
---|
24 | | -#include <trace/events/sched.h> |
---|
| 30 | +#include <trace/hooks/sched.h> |
---|
| 31 | +#include <trace/hooks/dtask.h> |
---|
| 32 | + |
---|
| 33 | +/* |
---|
| 34 | + * Export tracepoints that act as a bare tracehook (ie: have no trace event |
---|
| 35 | + * associated with them) to allow external modules to probe them. |
---|
| 36 | + */ |
---|
| 37 | +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_cfs_tp); |
---|
| 38 | +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp); |
---|
| 39 | +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp); |
---|
| 40 | +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); |
---|
| 41 | +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp); |
---|
| 42 | +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_thermal_tp); |
---|
| 43 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_cpu_capacity_tp); |
---|
| 44 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp); |
---|
| 45 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_cfs_tp); |
---|
| 46 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_se_tp); |
---|
| 47 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp); |
---|
| 48 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_switch); |
---|
| 49 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_waking); |
---|
| 50 | +#ifdef CONFIG_SCHEDSTATS |
---|
| 51 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_sleep); |
---|
| 52 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_wait); |
---|
| 53 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_iowait); |
---|
| 54 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_blocked); |
---|
| 55 | +#endif |
---|
25 | 56 | |
---|
26 | 57 | DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
---|
| 58 | +EXPORT_SYMBOL_GPL(runqueues); |
---|
27 | 59 | |
---|
28 | 60 | #ifdef CONFIG_SCHED_DEBUG |
---|
29 | 61 | /* |
---|
.. | .. |
---|
38 | 70 | const_debug unsigned int sysctl_sched_features = |
---|
39 | 71 | #include "features.h" |
---|
40 | 72 | 0; |
---|
| 73 | +EXPORT_SYMBOL_GPL(sysctl_sched_features); |
---|
41 | 74 | #undef SCHED_FEAT |
---|
42 | 75 | #endif |
---|
43 | 76 | |
---|
.. | .. |
---|
45 | 78 | * Number of tasks to iterate in a single balance run. |
---|
46 | 79 | * Limited because this is done with IRQs disabled. |
---|
47 | 80 | */ |
---|
| 81 | +#ifdef CONFIG_PREEMPT_RT |
---|
| 82 | +const_debug unsigned int sysctl_sched_nr_migrate = 8; |
---|
| 83 | +#else |
---|
48 | 84 | const_debug unsigned int sysctl_sched_nr_migrate = 32; |
---|
| 85 | +#endif |
---|
49 | 86 | |
---|
50 | 87 | /* |
---|
51 | 88 | * period over which we measure -rt task CPU usage in us. |
---|
.. | .. |
---|
60 | 97 | * default: 0.95s |
---|
61 | 98 | */ |
---|
62 | 99 | int sysctl_sched_rt_runtime = 950000; |
---|
| 100 | + |
---|
| 101 | + |
---|
| 102 | +/* |
---|
| 103 | + * Serialization rules: |
---|
| 104 | + * |
---|
| 105 | + * Lock order: |
---|
| 106 | + * |
---|
| 107 | + * p->pi_lock |
---|
| 108 | + * rq->lock |
---|
| 109 | + * hrtimer_cpu_base->lock (hrtimer_start() for bandwidth controls) |
---|
| 110 | + * |
---|
| 111 | + * rq1->lock |
---|
| 112 | + * rq2->lock where: rq1 < rq2 |
---|
| 113 | + * |
---|
| 114 | + * Regular state: |
---|
| 115 | + * |
---|
| 116 | + * Normal scheduling state is serialized by rq->lock. __schedule() takes the |
---|
| 117 | + * local CPU's rq->lock, it optionally removes the task from the runqueue and |
---|
| 118 | + * always looks at the local rq data structures to find the most elegible task |
---|
| 119 | + * to run next. |
---|
| 120 | + * |
---|
| 121 | + * Task enqueue is also under rq->lock, possibly taken from another CPU. |
---|
| 122 | + * Wakeups from another LLC domain might use an IPI to transfer the enqueue to |
---|
| 123 | + * the local CPU to avoid bouncing the runqueue state around [ see |
---|
| 124 | + * ttwu_queue_wakelist() ] |
---|
| 125 | + * |
---|
| 126 | + * Task wakeup, specifically wakeups that involve migration, are horribly |
---|
| 127 | + * complicated to avoid having to take two rq->locks. |
---|
| 128 | + * |
---|
| 129 | + * Special state: |
---|
| 130 | + * |
---|
| 131 | + * System-calls and anything external will use task_rq_lock() which acquires |
---|
| 132 | + * both p->pi_lock and rq->lock. As a consequence the state they change is |
---|
| 133 | + * stable while holding either lock: |
---|
| 134 | + * |
---|
| 135 | + * - sched_setaffinity()/ |
---|
| 136 | + * set_cpus_allowed_ptr(): p->cpus_ptr, p->nr_cpus_allowed |
---|
| 137 | + * - set_user_nice(): p->se.load, p->*prio |
---|
| 138 | + * - __sched_setscheduler(): p->sched_class, p->policy, p->*prio, |
---|
| 139 | + * p->se.load, p->rt_priority, |
---|
| 140 | + * p->dl.dl_{runtime, deadline, period, flags, bw, density} |
---|
| 141 | + * - sched_setnuma(): p->numa_preferred_nid |
---|
| 142 | + * - sched_move_task()/ |
---|
| 143 | + * cpu_cgroup_fork(): p->sched_task_group |
---|
| 144 | + * - uclamp_update_active() p->uclamp* |
---|
| 145 | + * |
---|
| 146 | + * p->state <- TASK_*: |
---|
| 147 | + * |
---|
| 148 | + * is changed locklessly using set_current_state(), __set_current_state() or |
---|
| 149 | + * set_special_state(), see their respective comments, or by |
---|
| 150 | + * try_to_wake_up(). This latter uses p->pi_lock to serialize against |
---|
| 151 | + * concurrent self. |
---|
| 152 | + * |
---|
| 153 | + * p->on_rq <- { 0, 1 = TASK_ON_RQ_QUEUED, 2 = TASK_ON_RQ_MIGRATING }: |
---|
| 154 | + * |
---|
| 155 | + * is set by activate_task() and cleared by deactivate_task(), under |
---|
| 156 | + * rq->lock. Non-zero indicates the task is runnable, the special |
---|
| 157 | + * ON_RQ_MIGRATING state is used for migration without holding both |
---|
| 158 | + * rq->locks. It indicates task_cpu() is not stable, see task_rq_lock(). |
---|
| 159 | + * |
---|
| 160 | + * p->on_cpu <- { 0, 1 }: |
---|
| 161 | + * |
---|
| 162 | + * is set by prepare_task() and cleared by finish_task() such that it will be |
---|
| 163 | + * set before p is scheduled-in and cleared after p is scheduled-out, both |
---|
| 164 | + * under rq->lock. Non-zero indicates the task is running on its CPU. |
---|
| 165 | + * |
---|
| 166 | + * [ The astute reader will observe that it is possible for two tasks on one |
---|
| 167 | + * CPU to have ->on_cpu = 1 at the same time. ] |
---|
| 168 | + * |
---|
| 169 | + * task_cpu(p): is changed by set_task_cpu(), the rules are: |
---|
| 170 | + * |
---|
| 171 | + * - Don't call set_task_cpu() on a blocked task: |
---|
| 172 | + * |
---|
| 173 | + * We don't care what CPU we're not running on, this simplifies hotplug, |
---|
| 174 | + * the CPU assignment of blocked tasks isn't required to be valid. |
---|
| 175 | + * |
---|
| 176 | + * - for try_to_wake_up(), called under p->pi_lock: |
---|
| 177 | + * |
---|
| 178 | + * This allows try_to_wake_up() to only take one rq->lock, see its comment. |
---|
| 179 | + * |
---|
| 180 | + * - for migration called under rq->lock: |
---|
| 181 | + * [ see task_on_rq_migrating() in task_rq_lock() ] |
---|
| 182 | + * |
---|
| 183 | + * o move_queued_task() |
---|
| 184 | + * o detach_task() |
---|
| 185 | + * |
---|
| 186 | + * - for migration called under double_rq_lock(): |
---|
| 187 | + * |
---|
| 188 | + * o __migrate_swap_task() |
---|
| 189 | + * o push_rt_task() / pull_rt_task() |
---|
| 190 | + * o push_dl_task() / pull_dl_task() |
---|
| 191 | + * o dl_task_offline_migration() |
---|
| 192 | + * |
---|
| 193 | + */ |
---|
63 | 194 | |
---|
64 | 195 | /* |
---|
65 | 196 | * __task_rq_lock - lock the rq @p resides on. |
---|
.. | .. |
---|
84 | 215 | cpu_relax(); |
---|
85 | 216 | } |
---|
86 | 217 | } |
---|
| 218 | +EXPORT_SYMBOL_GPL(__task_rq_lock); |
---|
87 | 219 | |
---|
88 | 220 | /* |
---|
89 | 221 | * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. |
---|
.. | .. |
---|
126 | 258 | cpu_relax(); |
---|
127 | 259 | } |
---|
128 | 260 | } |
---|
| 261 | +EXPORT_SYMBOL_GPL(task_rq_lock); |
---|
129 | 262 | |
---|
130 | 263 | /* |
---|
131 | 264 | * RQ-clock updating methods: |
---|
.. | .. |
---|
206 | 339 | rq->clock += delta; |
---|
207 | 340 | update_rq_clock_task(rq, delta); |
---|
208 | 341 | } |
---|
| 342 | +EXPORT_SYMBOL_GPL(update_rq_clock); |
---|
209 | 343 | |
---|
| 344 | +static inline void |
---|
| 345 | +rq_csd_init(struct rq *rq, struct __call_single_data *csd, smp_call_func_t func) |
---|
| 346 | +{ |
---|
| 347 | + csd->flags = 0; |
---|
| 348 | + csd->func = func; |
---|
| 349 | + csd->info = rq; |
---|
| 350 | +} |
---|
210 | 351 | |
---|
211 | 352 | #ifdef CONFIG_SCHED_HRTICK |
---|
212 | 353 | /* |
---|
.. | .. |
---|
243 | 384 | static void __hrtick_restart(struct rq *rq) |
---|
244 | 385 | { |
---|
245 | 386 | struct hrtimer *timer = &rq->hrtick_timer; |
---|
| 387 | + ktime_t time = rq->hrtick_time; |
---|
246 | 388 | |
---|
247 | | - hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED); |
---|
| 389 | + hrtimer_start(timer, time, HRTIMER_MODE_ABS_PINNED_HARD); |
---|
248 | 390 | } |
---|
249 | 391 | |
---|
250 | 392 | /* |
---|
.. | .. |
---|
257 | 399 | |
---|
258 | 400 | rq_lock(rq, &rf); |
---|
259 | 401 | __hrtick_restart(rq); |
---|
260 | | - rq->hrtick_csd_pending = 0; |
---|
261 | 402 | rq_unlock(rq, &rf); |
---|
262 | 403 | } |
---|
263 | 404 | |
---|
.. | .. |
---|
269 | 410 | void hrtick_start(struct rq *rq, u64 delay) |
---|
270 | 411 | { |
---|
271 | 412 | struct hrtimer *timer = &rq->hrtick_timer; |
---|
272 | | - ktime_t time; |
---|
273 | 413 | s64 delta; |
---|
274 | 414 | |
---|
275 | 415 | /* |
---|
.. | .. |
---|
277 | 417 | * doesn't make sense and can cause timer DoS. |
---|
278 | 418 | */ |
---|
279 | 419 | delta = max_t(s64, delay, 10000LL); |
---|
280 | | - time = ktime_add_ns(timer->base->get_time(), delta); |
---|
| 420 | + rq->hrtick_time = ktime_add_ns(timer->base->get_time(), delta); |
---|
281 | 421 | |
---|
282 | | - hrtimer_set_expires(timer, time); |
---|
283 | | - |
---|
284 | | - if (rq == this_rq()) { |
---|
| 422 | + if (rq == this_rq()) |
---|
285 | 423 | __hrtick_restart(rq); |
---|
286 | | - } else if (!rq->hrtick_csd_pending) { |
---|
| 424 | + else |
---|
287 | 425 | smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd); |
---|
288 | | - rq->hrtick_csd_pending = 1; |
---|
289 | | - } |
---|
290 | 426 | } |
---|
291 | 427 | |
---|
292 | 428 | #else |
---|
.. | .. |
---|
303 | 439 | */ |
---|
304 | 440 | delay = max_t(u64, delay, 10000LL); |
---|
305 | 441 | hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), |
---|
306 | | - HRTIMER_MODE_REL_PINNED); |
---|
| 442 | + HRTIMER_MODE_REL_PINNED_HARD); |
---|
307 | 443 | } |
---|
| 444 | + |
---|
308 | 445 | #endif /* CONFIG_SMP */ |
---|
309 | 446 | |
---|
310 | 447 | static void hrtick_rq_init(struct rq *rq) |
---|
311 | 448 | { |
---|
312 | 449 | #ifdef CONFIG_SMP |
---|
313 | | - rq->hrtick_csd_pending = 0; |
---|
314 | | - |
---|
315 | | - rq->hrtick_csd.flags = 0; |
---|
316 | | - rq->hrtick_csd.func = __hrtick_start; |
---|
317 | | - rq->hrtick_csd.info = rq; |
---|
| 450 | + rq_csd_init(rq, &rq->hrtick_csd, __hrtick_start); |
---|
318 | 451 | #endif |
---|
319 | | - |
---|
320 | | - hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
---|
| 452 | + hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); |
---|
321 | 453 | rq->hrtick_timer.function = hrtick; |
---|
322 | 454 | } |
---|
323 | 455 | #else /* CONFIG_SCHED_HRTICK */ |
---|
.. | .. |
---|
399 | 531 | #endif |
---|
400 | 532 | #endif |
---|
401 | 533 | |
---|
402 | | -void wake_q_add(struct wake_q_head *head, struct task_struct *task) |
---|
| 534 | +static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task, |
---|
| 535 | + bool sleeper) |
---|
403 | 536 | { |
---|
404 | | - struct wake_q_node *node = &task->wake_q; |
---|
| 537 | + struct wake_q_node *node; |
---|
| 538 | + |
---|
| 539 | + if (sleeper) |
---|
| 540 | + node = &task->wake_q_sleeper; |
---|
| 541 | + else |
---|
| 542 | + node = &task->wake_q; |
---|
405 | 543 | |
---|
406 | 544 | /* |
---|
407 | 545 | * Atomically grab the task, if ->wake_q is !nil already it means |
---|
.. | .. |
---|
412 | 550 | * state, even in the failed case, an explicit smp_mb() must be used. |
---|
413 | 551 | */ |
---|
414 | 552 | smp_mb__before_atomic(); |
---|
415 | | - if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)) |
---|
416 | | - return; |
---|
417 | | - |
---|
418 | | - head->count++; |
---|
419 | | - |
---|
420 | | - get_task_struct(task); |
---|
| 553 | + if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))) |
---|
| 554 | + return false; |
---|
421 | 555 | |
---|
422 | 556 | /* |
---|
423 | 557 | * The head is context local, there can be no concurrency. |
---|
424 | 558 | */ |
---|
425 | 559 | *head->lastp = node; |
---|
426 | 560 | head->lastp = &node->next; |
---|
| 561 | + head->count++; |
---|
| 562 | + return true; |
---|
427 | 563 | } |
---|
428 | 564 | |
---|
429 | | -static int |
---|
430 | | -try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags, |
---|
431 | | - int sibling_count_hint); |
---|
| 565 | +/** |
---|
| 566 | + * wake_q_add() - queue a wakeup for 'later' waking. |
---|
| 567 | + * @head: the wake_q_head to add @task to |
---|
| 568 | + * @task: the task to queue for 'later' wakeup |
---|
| 569 | + * |
---|
| 570 | + * Queue a task for later wakeup, most likely by the wake_up_q() call in the |
---|
| 571 | + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come |
---|
| 572 | + * instantly. |
---|
| 573 | + * |
---|
| 574 | + * This function must be used as-if it were wake_up_process(); IOW the task |
---|
| 575 | + * must be ready to be woken at this location. |
---|
| 576 | + */ |
---|
| 577 | +void wake_q_add(struct wake_q_head *head, struct task_struct *task) |
---|
| 578 | +{ |
---|
| 579 | + if (__wake_q_add(head, task, false)) |
---|
| 580 | + get_task_struct(task); |
---|
| 581 | +} |
---|
432 | 582 | |
---|
433 | | -void wake_up_q(struct wake_q_head *head) |
---|
| 583 | +void wake_q_add_sleeper(struct wake_q_head *head, struct task_struct *task) |
---|
| 584 | +{ |
---|
| 585 | + if (__wake_q_add(head, task, true)) |
---|
| 586 | + get_task_struct(task); |
---|
| 587 | +} |
---|
| 588 | + |
---|
| 589 | +/** |
---|
| 590 | + * wake_q_add_safe() - safely queue a wakeup for 'later' waking. |
---|
| 591 | + * @head: the wake_q_head to add @task to |
---|
| 592 | + * @task: the task to queue for 'later' wakeup |
---|
| 593 | + * |
---|
| 594 | + * Queue a task for later wakeup, most likely by the wake_up_q() call in the |
---|
| 595 | + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come |
---|
| 596 | + * instantly. |
---|
| 597 | + * |
---|
| 598 | + * This function must be used as-if it were wake_up_process(); IOW the task |
---|
| 599 | + * must be ready to be woken at this location. |
---|
| 600 | + * |
---|
| 601 | + * This function is essentially a task-safe equivalent to wake_q_add(). Callers |
---|
| 602 | + * that already hold reference to @task can call the 'safe' version and trust |
---|
| 603 | + * wake_q to do the right thing depending whether or not the @task is already |
---|
| 604 | + * queued for wakeup. |
---|
| 605 | + */ |
---|
| 606 | +void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) |
---|
| 607 | +{ |
---|
| 608 | + if (!__wake_q_add(head, task, false)) |
---|
| 609 | + put_task_struct(task); |
---|
| 610 | +} |
---|
| 611 | + |
---|
| 612 | +void __wake_up_q(struct wake_q_head *head, bool sleeper) |
---|
434 | 613 | { |
---|
435 | 614 | struct wake_q_node *node = head->first; |
---|
436 | 615 | |
---|
437 | 616 | while (node != WAKE_Q_TAIL) { |
---|
438 | 617 | struct task_struct *task; |
---|
439 | 618 | |
---|
440 | | - task = container_of(node, struct task_struct, wake_q); |
---|
| 619 | + if (sleeper) |
---|
| 620 | + task = container_of(node, struct task_struct, wake_q_sleeper); |
---|
| 621 | + else |
---|
| 622 | + task = container_of(node, struct task_struct, wake_q); |
---|
| 623 | + |
---|
441 | 624 | BUG_ON(!task); |
---|
442 | 625 | /* Task can safely be re-inserted now: */ |
---|
443 | 626 | node = node->next; |
---|
444 | | - task->wake_q.next = NULL; |
---|
| 627 | + task->wake_q_count = head->count; |
---|
| 628 | + if (sleeper) |
---|
| 629 | + task->wake_q_sleeper.next = NULL; |
---|
| 630 | + else |
---|
| 631 | + task->wake_q.next = NULL; |
---|
445 | 632 | |
---|
446 | 633 | /* |
---|
447 | | - * try_to_wake_up() executes a full barrier, which pairs with |
---|
| 634 | + * wake_up_process() executes a full barrier, which pairs with |
---|
448 | 635 | * the queueing in wake_q_add() so as not to miss wakeups. |
---|
449 | 636 | */ |
---|
450 | | - try_to_wake_up(task, TASK_NORMAL, 0, head->count); |
---|
| 637 | + if (sleeper) |
---|
| 638 | + wake_up_lock_sleeper(task); |
---|
| 639 | + else |
---|
| 640 | + wake_up_process(task); |
---|
| 641 | + |
---|
| 642 | + task->wake_q_count = 0; |
---|
451 | 643 | put_task_struct(task); |
---|
452 | 644 | } |
---|
453 | 645 | } |
---|
.. | .. |
---|
477 | 669 | return; |
---|
478 | 670 | } |
---|
479 | 671 | |
---|
480 | | -#ifdef CONFIG_PREEMPT |
---|
481 | 672 | if (set_nr_and_not_polling(curr)) |
---|
482 | | -#else |
---|
483 | | - if (set_nr_and_not_polling(curr) && (rq->curr == rq->idle)) |
---|
484 | | -#endif |
---|
485 | 673 | smp_send_reschedule(cpu); |
---|
486 | 674 | else |
---|
487 | 675 | trace_sched_wake_idle_without_ipi(cpu); |
---|
488 | 676 | } |
---|
| 677 | +EXPORT_SYMBOL_GPL(resched_curr); |
---|
| 678 | + |
---|
| 679 | +#ifdef CONFIG_PREEMPT_LAZY |
---|
| 680 | + |
---|
| 681 | +static int tsk_is_polling(struct task_struct *p) |
---|
| 682 | +{ |
---|
| 683 | +#ifdef TIF_POLLING_NRFLAG |
---|
| 684 | + return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); |
---|
| 685 | +#else |
---|
| 686 | + return 0; |
---|
| 687 | +#endif |
---|
| 688 | +} |
---|
| 689 | + |
---|
| 690 | +void resched_curr_lazy(struct rq *rq) |
---|
| 691 | +{ |
---|
| 692 | + struct task_struct *curr = rq->curr; |
---|
| 693 | + int cpu; |
---|
| 694 | + |
---|
| 695 | + if (!sched_feat(PREEMPT_LAZY)) { |
---|
| 696 | + resched_curr(rq); |
---|
| 697 | + return; |
---|
| 698 | + } |
---|
| 699 | + |
---|
| 700 | + lockdep_assert_held(&rq->lock); |
---|
| 701 | + |
---|
| 702 | + if (test_tsk_need_resched(curr)) |
---|
| 703 | + return; |
---|
| 704 | + |
---|
| 705 | + if (test_tsk_need_resched_lazy(curr)) |
---|
| 706 | + return; |
---|
| 707 | + |
---|
| 708 | + set_tsk_need_resched_lazy(curr); |
---|
| 709 | + |
---|
| 710 | + cpu = cpu_of(rq); |
---|
| 711 | + if (cpu == smp_processor_id()) |
---|
| 712 | + return; |
---|
| 713 | + |
---|
| 714 | + /* NEED_RESCHED_LAZY must be visible before we test polling */ |
---|
| 715 | + smp_mb(); |
---|
| 716 | + if (!tsk_is_polling(curr)) |
---|
| 717 | + smp_send_reschedule(cpu); |
---|
| 718 | +} |
---|
| 719 | +#endif |
---|
489 | 720 | |
---|
490 | 721 | void resched_cpu(int cpu) |
---|
491 | 722 | { |
---|
.. | .. |
---|
510 | 741 | */ |
---|
511 | 742 | int get_nohz_timer_target(void) |
---|
512 | 743 | { |
---|
513 | | - int i, cpu = smp_processor_id(); |
---|
| 744 | + int i, cpu = smp_processor_id(), default_cpu = -1; |
---|
514 | 745 | struct sched_domain *sd; |
---|
515 | 746 | |
---|
516 | | - if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER)) |
---|
517 | | - return cpu; |
---|
| 747 | + if (housekeeping_cpu(cpu, HK_FLAG_TIMER) && cpu_active(cpu)) { |
---|
| 748 | + if (!idle_cpu(cpu)) |
---|
| 749 | + return cpu; |
---|
| 750 | + default_cpu = cpu; |
---|
| 751 | + } |
---|
518 | 752 | |
---|
519 | 753 | rcu_read_lock(); |
---|
520 | 754 | for_each_domain(cpu, sd) { |
---|
521 | | - for_each_cpu(i, sched_domain_span(sd)) { |
---|
| 755 | + for_each_cpu_and(i, sched_domain_span(sd), |
---|
| 756 | + housekeeping_cpumask(HK_FLAG_TIMER)) { |
---|
522 | 757 | if (cpu == i) |
---|
523 | 758 | continue; |
---|
524 | 759 | |
---|
525 | | - if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) { |
---|
| 760 | + if (!idle_cpu(i)) { |
---|
526 | 761 | cpu = i; |
---|
527 | 762 | goto unlock; |
---|
528 | 763 | } |
---|
529 | 764 | } |
---|
530 | 765 | } |
---|
531 | 766 | |
---|
532 | | - if (!housekeeping_cpu(cpu, HK_FLAG_TIMER)) |
---|
533 | | - cpu = housekeeping_any_cpu(HK_FLAG_TIMER); |
---|
| 767 | + if (default_cpu == -1) { |
---|
| 768 | + for_each_cpu_and(i, cpu_active_mask, |
---|
| 769 | + housekeeping_cpumask(HK_FLAG_TIMER)) { |
---|
| 770 | + if (cpu == i) |
---|
| 771 | + continue; |
---|
| 772 | + |
---|
| 773 | + if (!idle_cpu(i)) { |
---|
| 774 | + cpu = i; |
---|
| 775 | + goto unlock; |
---|
| 776 | + } |
---|
| 777 | + } |
---|
| 778 | + |
---|
| 779 | + /* no active, not-idle, housekpeeing CPU found. */ |
---|
| 780 | + default_cpu = cpumask_any(cpu_active_mask); |
---|
| 781 | + |
---|
| 782 | + if (unlikely(default_cpu >= nr_cpu_ids)) |
---|
| 783 | + goto unlock; |
---|
| 784 | + } |
---|
| 785 | + |
---|
| 786 | + cpu = default_cpu; |
---|
534 | 787 | unlock: |
---|
535 | 788 | rcu_read_unlock(); |
---|
536 | 789 | return cpu; |
---|
.. | .. |
---|
590 | 843 | wake_up_idle_cpu(cpu); |
---|
591 | 844 | } |
---|
592 | 845 | |
---|
593 | | -static inline bool got_nohz_idle_kick(void) |
---|
| 846 | +static void nohz_csd_func(void *info) |
---|
594 | 847 | { |
---|
595 | | - int cpu = smp_processor_id(); |
---|
596 | | - |
---|
597 | | - if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK)) |
---|
598 | | - return false; |
---|
599 | | - |
---|
600 | | - if (idle_cpu(cpu) && !need_resched()) |
---|
601 | | - return true; |
---|
| 848 | + struct rq *rq = info; |
---|
| 849 | + int cpu = cpu_of(rq); |
---|
| 850 | + unsigned int flags; |
---|
602 | 851 | |
---|
603 | 852 | /* |
---|
604 | | - * We can't run Idle Load Balance on this CPU for this time so we |
---|
605 | | - * cancel it and clear NOHZ_BALANCE_KICK |
---|
| 853 | + * Release the rq::nohz_csd. |
---|
606 | 854 | */ |
---|
607 | | - atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); |
---|
608 | | - return false; |
---|
609 | | -} |
---|
| 855 | + flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); |
---|
| 856 | + WARN_ON(!(flags & NOHZ_KICK_MASK)); |
---|
610 | 857 | |
---|
611 | | -#else /* CONFIG_NO_HZ_COMMON */ |
---|
612 | | - |
---|
613 | | -static inline bool got_nohz_idle_kick(void) |
---|
614 | | -{ |
---|
615 | | - return false; |
---|
| 858 | + rq->idle_balance = idle_cpu(cpu); |
---|
| 859 | + if (rq->idle_balance && !need_resched()) { |
---|
| 860 | + rq->nohz_idle_balance = flags; |
---|
| 861 | + raise_softirq_irqoff(SCHED_SOFTIRQ); |
---|
| 862 | + } |
---|
616 | 863 | } |
---|
617 | 864 | |
---|
618 | 865 | #endif /* CONFIG_NO_HZ_COMMON */ |
---|
.. | .. |
---|
703 | 950 | } |
---|
704 | 951 | #endif |
---|
705 | 952 | |
---|
706 | | -static void set_load_weight(struct task_struct *p, bool update_load) |
---|
| 953 | +static void set_load_weight(struct task_struct *p) |
---|
707 | 954 | { |
---|
| 955 | + bool update_load = !(READ_ONCE(p->state) & TASK_NEW); |
---|
708 | 956 | int prio = p->static_prio - MAX_RT_PRIO; |
---|
709 | 957 | struct load_weight *load = &p->se.load; |
---|
710 | 958 | |
---|
711 | 959 | /* |
---|
712 | 960 | * SCHED_IDLE tasks get minimal weight: |
---|
713 | 961 | */ |
---|
714 | | - if (idle_policy(p->policy)) { |
---|
| 962 | + if (task_has_idle_policy(p)) { |
---|
715 | 963 | load->weight = scale_load(WEIGHT_IDLEPRIO); |
---|
716 | 964 | load->inv_weight = WMULT_IDLEPRIO; |
---|
717 | | - p->se.runnable_weight = load->weight; |
---|
718 | 965 | return; |
---|
719 | 966 | } |
---|
720 | 967 | |
---|
.. | .. |
---|
727 | 974 | } else { |
---|
728 | 975 | load->weight = scale_load(sched_prio_to_weight[prio]); |
---|
729 | 976 | load->inv_weight = sched_prio_to_wmult[prio]; |
---|
730 | | - p->se.runnable_weight = load->weight; |
---|
731 | 977 | } |
---|
732 | 978 | } |
---|
733 | 979 | |
---|
.. | .. |
---|
750 | 996 | /* Max allowed maximum utilization */ |
---|
751 | 997 | unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE; |
---|
752 | 998 | |
---|
| 999 | +/* |
---|
| 1000 | + * By default RT tasks run at the maximum performance point/capacity of the |
---|
| 1001 | + * system. Uclamp enforces this by always setting UCLAMP_MIN of RT tasks to |
---|
| 1002 | + * SCHED_CAPACITY_SCALE. |
---|
| 1003 | + * |
---|
| 1004 | + * This knob allows admins to change the default behavior when uclamp is being |
---|
| 1005 | + * used. In battery powered devices, particularly, running at the maximum |
---|
| 1006 | + * capacity and frequency will increase energy consumption and shorten the |
---|
| 1007 | + * battery life. |
---|
| 1008 | + * |
---|
| 1009 | + * This knob only affects RT tasks that their uclamp_se->user_defined == false. |
---|
| 1010 | + * |
---|
| 1011 | + * This knob will not override the system default sched_util_clamp_min defined |
---|
| 1012 | + * above. |
---|
| 1013 | + */ |
---|
| 1014 | +unsigned int sysctl_sched_uclamp_util_min_rt_default = SCHED_CAPACITY_SCALE; |
---|
| 1015 | + |
---|
753 | 1016 | /* All clamps are required to be less or equal than these values */ |
---|
754 | 1017 | static struct uclamp_se uclamp_default[UCLAMP_CNT]; |
---|
| 1018 | + |
---|
| 1019 | +/* |
---|
| 1020 | + * This static key is used to reduce the uclamp overhead in the fast path. It |
---|
| 1021 | + * primarily disables the call to uclamp_rq_{inc, dec}() in |
---|
| 1022 | + * enqueue/dequeue_task(). |
---|
| 1023 | + * |
---|
| 1024 | + * This allows users to continue to enable uclamp in their kernel config with |
---|
| 1025 | + * minimum uclamp overhead in the fast path. |
---|
| 1026 | + * |
---|
| 1027 | + * As soon as userspace modifies any of the uclamp knobs, the static key is |
---|
| 1028 | + * enabled, since we have an actual users that make use of uclamp |
---|
| 1029 | + * functionality. |
---|
| 1030 | + * |
---|
| 1031 | + * The knobs that would enable this static key are: |
---|
| 1032 | + * |
---|
| 1033 | + * * A task modifying its uclamp value with sched_setattr(). |
---|
| 1034 | + * * An admin modifying the sysctl_sched_uclamp_{min, max} via procfs. |
---|
| 1035 | + * * An admin modifying the cgroup cpu.uclamp.{min, max} |
---|
| 1036 | + */ |
---|
| 1037 | +DEFINE_STATIC_KEY_FALSE(sched_uclamp_used); |
---|
| 1038 | +EXPORT_SYMBOL_GPL(sched_uclamp_used); |
---|
755 | 1039 | |
---|
756 | 1040 | /* Integer rounded range for each bucket */ |
---|
757 | 1041 | #define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS) |
---|
.. | .. |
---|
762 | 1046 | static inline unsigned int uclamp_bucket_id(unsigned int clamp_value) |
---|
763 | 1047 | { |
---|
764 | 1048 | return min_t(unsigned int, clamp_value / UCLAMP_BUCKET_DELTA, UCLAMP_BUCKETS - 1); |
---|
765 | | -} |
---|
766 | | - |
---|
767 | | -static inline unsigned int uclamp_bucket_base_value(unsigned int clamp_value) |
---|
768 | | -{ |
---|
769 | | - return UCLAMP_BUCKET_DELTA * uclamp_bucket_id(clamp_value); |
---|
770 | 1049 | } |
---|
771 | 1050 | |
---|
772 | 1051 | static inline unsigned int uclamp_none(enum uclamp_id clamp_id) |
---|
.. | .. |
---|
832 | 1111 | return uclamp_idle_value(rq, clamp_id, clamp_value); |
---|
833 | 1112 | } |
---|
834 | 1113 | |
---|
| 1114 | +static void __uclamp_update_util_min_rt_default(struct task_struct *p) |
---|
| 1115 | +{ |
---|
| 1116 | + unsigned int default_util_min; |
---|
| 1117 | + struct uclamp_se *uc_se; |
---|
| 1118 | + |
---|
| 1119 | + lockdep_assert_held(&p->pi_lock); |
---|
| 1120 | + |
---|
| 1121 | + uc_se = &p->uclamp_req[UCLAMP_MIN]; |
---|
| 1122 | + |
---|
| 1123 | + /* Only sync if user didn't override the default */ |
---|
| 1124 | + if (uc_se->user_defined) |
---|
| 1125 | + return; |
---|
| 1126 | + |
---|
| 1127 | + default_util_min = sysctl_sched_uclamp_util_min_rt_default; |
---|
| 1128 | + uclamp_se_set(uc_se, default_util_min, false); |
---|
| 1129 | +} |
---|
| 1130 | + |
---|
| 1131 | +static void uclamp_update_util_min_rt_default(struct task_struct *p) |
---|
| 1132 | +{ |
---|
| 1133 | + struct rq_flags rf; |
---|
| 1134 | + struct rq *rq; |
---|
| 1135 | + |
---|
| 1136 | + if (!rt_task(p)) |
---|
| 1137 | + return; |
---|
| 1138 | + |
---|
| 1139 | + /* Protect updates to p->uclamp_* */ |
---|
| 1140 | + rq = task_rq_lock(p, &rf); |
---|
| 1141 | + __uclamp_update_util_min_rt_default(p); |
---|
| 1142 | + task_rq_unlock(rq, p, &rf); |
---|
| 1143 | +} |
---|
| 1144 | + |
---|
| 1145 | +static void uclamp_sync_util_min_rt_default(void) |
---|
| 1146 | +{ |
---|
| 1147 | + struct task_struct *g, *p; |
---|
| 1148 | + |
---|
| 1149 | + /* |
---|
| 1150 | + * copy_process() sysctl_uclamp |
---|
| 1151 | + * uclamp_min_rt = X; |
---|
| 1152 | + * write_lock(&tasklist_lock) read_lock(&tasklist_lock) |
---|
| 1153 | + * // link thread smp_mb__after_spinlock() |
---|
| 1154 | + * write_unlock(&tasklist_lock) read_unlock(&tasklist_lock); |
---|
| 1155 | + * sched_post_fork() for_each_process_thread() |
---|
| 1156 | + * __uclamp_sync_rt() __uclamp_sync_rt() |
---|
| 1157 | + * |
---|
| 1158 | + * Ensures that either sched_post_fork() will observe the new |
---|
| 1159 | + * uclamp_min_rt or for_each_process_thread() will observe the new |
---|
| 1160 | + * task. |
---|
| 1161 | + */ |
---|
| 1162 | + read_lock(&tasklist_lock); |
---|
| 1163 | + smp_mb__after_spinlock(); |
---|
| 1164 | + read_unlock(&tasklist_lock); |
---|
| 1165 | + |
---|
| 1166 | + rcu_read_lock(); |
---|
| 1167 | + for_each_process_thread(g, p) |
---|
| 1168 | + uclamp_update_util_min_rt_default(p); |
---|
| 1169 | + rcu_read_unlock(); |
---|
| 1170 | +} |
---|
| 1171 | + |
---|
| 1172 | +#if IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE) |
---|
| 1173 | +void rockchip_perf_uclamp_sync_util_min_rt_default(void) |
---|
| 1174 | +{ |
---|
| 1175 | + uclamp_sync_util_min_rt_default(); |
---|
| 1176 | +} |
---|
| 1177 | +EXPORT_SYMBOL(rockchip_perf_uclamp_sync_util_min_rt_default); |
---|
| 1178 | +#endif |
---|
| 1179 | + |
---|
835 | 1180 | static inline struct uclamp_se |
---|
836 | 1181 | uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id) |
---|
837 | 1182 | { |
---|
| 1183 | + /* Copy by value as we could modify it */ |
---|
838 | 1184 | struct uclamp_se uc_req = p->uclamp_req[clamp_id]; |
---|
839 | 1185 | #ifdef CONFIG_UCLAMP_TASK_GROUP |
---|
840 | | - struct uclamp_se uc_max; |
---|
| 1186 | + unsigned int tg_min, tg_max, value; |
---|
841 | 1187 | |
---|
842 | 1188 | /* |
---|
843 | 1189 | * Tasks in autogroups or root task group will be |
---|
.. | .. |
---|
848 | 1194 | if (task_group(p) == &root_task_group) |
---|
849 | 1195 | return uc_req; |
---|
850 | 1196 | |
---|
851 | | - uc_max = task_group(p)->uclamp[clamp_id]; |
---|
852 | | - if (uc_req.value > uc_max.value || !uc_req.user_defined) |
---|
853 | | - return uc_max; |
---|
| 1197 | + tg_min = task_group(p)->uclamp[UCLAMP_MIN].value; |
---|
| 1198 | + tg_max = task_group(p)->uclamp[UCLAMP_MAX].value; |
---|
| 1199 | + value = uc_req.value; |
---|
| 1200 | + value = clamp(value, tg_min, tg_max); |
---|
| 1201 | + uclamp_se_set(&uc_req, value, false); |
---|
854 | 1202 | #endif |
---|
855 | 1203 | |
---|
856 | 1204 | return uc_req; |
---|
.. | .. |
---|
869 | 1217 | { |
---|
870 | 1218 | struct uclamp_se uc_req = uclamp_tg_restrict(p, clamp_id); |
---|
871 | 1219 | struct uclamp_se uc_max = uclamp_default[clamp_id]; |
---|
| 1220 | + struct uclamp_se uc_eff; |
---|
| 1221 | + int ret = 0; |
---|
| 1222 | + |
---|
| 1223 | + trace_android_rvh_uclamp_eff_get(p, clamp_id, &uc_max, &uc_eff, &ret); |
---|
| 1224 | + if (ret) |
---|
| 1225 | + return uc_eff; |
---|
872 | 1226 | |
---|
873 | 1227 | /* System default restrictions always apply */ |
---|
874 | 1228 | if (unlikely(uc_req.value > uc_max.value)) |
---|
.. | .. |
---|
889 | 1243 | |
---|
890 | 1244 | return (unsigned long)uc_eff.value; |
---|
891 | 1245 | } |
---|
| 1246 | +EXPORT_SYMBOL_GPL(uclamp_eff_value); |
---|
892 | 1247 | |
---|
893 | 1248 | /* |
---|
894 | 1249 | * When a task is enqueued on a rq, the clamp bucket currently defined by the |
---|
.. | .. |
---|
949 | 1304 | |
---|
950 | 1305 | lockdep_assert_held(&rq->lock); |
---|
951 | 1306 | |
---|
| 1307 | + /* |
---|
| 1308 | + * If sched_uclamp_used was enabled after task @p was enqueued, |
---|
| 1309 | + * we could end up with unbalanced call to uclamp_rq_dec_id(). |
---|
| 1310 | + * |
---|
| 1311 | + * In this case the uc_se->active flag should be false since no uclamp |
---|
| 1312 | + * accounting was performed at enqueue time and we can just return |
---|
| 1313 | + * here. |
---|
| 1314 | + * |
---|
| 1315 | + * Need to be careful of the following enqeueue/dequeue ordering |
---|
| 1316 | + * problem too |
---|
| 1317 | + * |
---|
| 1318 | + * enqueue(taskA) |
---|
| 1319 | + * // sched_uclamp_used gets enabled |
---|
| 1320 | + * enqueue(taskB) |
---|
| 1321 | + * dequeue(taskA) |
---|
| 1322 | + * // Must not decrement bukcet->tasks here |
---|
| 1323 | + * dequeue(taskB) |
---|
| 1324 | + * |
---|
| 1325 | + * where we could end up with stale data in uc_se and |
---|
| 1326 | + * bucket[uc_se->bucket_id]. |
---|
| 1327 | + * |
---|
| 1328 | + * The following check here eliminates the possibility of such race. |
---|
| 1329 | + */ |
---|
| 1330 | + if (unlikely(!uc_se->active)) |
---|
| 1331 | + return; |
---|
| 1332 | + |
---|
952 | 1333 | bucket = &uc_rq->bucket[uc_se->bucket_id]; |
---|
| 1334 | + |
---|
953 | 1335 | SCHED_WARN_ON(!bucket->tasks); |
---|
954 | 1336 | if (likely(bucket->tasks)) |
---|
955 | 1337 | bucket->tasks--; |
---|
| 1338 | + |
---|
956 | 1339 | uc_se->active = false; |
---|
957 | 1340 | |
---|
958 | 1341 | /* |
---|
.. | .. |
---|
980 | 1363 | { |
---|
981 | 1364 | enum uclamp_id clamp_id; |
---|
982 | 1365 | |
---|
| 1366 | + /* |
---|
| 1367 | + * Avoid any overhead until uclamp is actually used by the userspace. |
---|
| 1368 | + * |
---|
| 1369 | + * The condition is constructed such that a NOP is generated when |
---|
| 1370 | + * sched_uclamp_used is disabled. |
---|
| 1371 | + */ |
---|
| 1372 | + if (!static_branch_unlikely(&sched_uclamp_used)) |
---|
| 1373 | + return; |
---|
| 1374 | + |
---|
983 | 1375 | if (unlikely(!p->sched_class->uclamp_enabled)) |
---|
984 | 1376 | return; |
---|
985 | 1377 | |
---|
.. | .. |
---|
995 | 1387 | { |
---|
996 | 1388 | enum uclamp_id clamp_id; |
---|
997 | 1389 | |
---|
| 1390 | + /* |
---|
| 1391 | + * Avoid any overhead until uclamp is actually used by the userspace. |
---|
| 1392 | + * |
---|
| 1393 | + * The condition is constructed such that a NOP is generated when |
---|
| 1394 | + * sched_uclamp_used is disabled. |
---|
| 1395 | + */ |
---|
| 1396 | + if (!static_branch_unlikely(&sched_uclamp_used)) |
---|
| 1397 | + return; |
---|
| 1398 | + |
---|
998 | 1399 | if (unlikely(!p->sched_class->uclamp_enabled)) |
---|
999 | 1400 | return; |
---|
1000 | 1401 | |
---|
.. | .. |
---|
1002 | 1403 | uclamp_rq_dec_id(rq, p, clamp_id); |
---|
1003 | 1404 | } |
---|
1004 | 1405 | |
---|
1005 | | -static inline void |
---|
1006 | | -uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id) |
---|
| 1406 | +static inline void uclamp_rq_reinc_id(struct rq *rq, struct task_struct *p, |
---|
| 1407 | + enum uclamp_id clamp_id) |
---|
1007 | 1408 | { |
---|
| 1409 | + if (!p->uclamp[clamp_id].active) |
---|
| 1410 | + return; |
---|
| 1411 | + |
---|
| 1412 | + uclamp_rq_dec_id(rq, p, clamp_id); |
---|
| 1413 | + uclamp_rq_inc_id(rq, p, clamp_id); |
---|
| 1414 | + |
---|
| 1415 | + /* |
---|
| 1416 | + * Make sure to clear the idle flag if we've transiently reached 0 |
---|
| 1417 | + * active tasks on rq. |
---|
| 1418 | + */ |
---|
| 1419 | + if (clamp_id == UCLAMP_MAX && (rq->uclamp_flags & UCLAMP_FLAG_IDLE)) |
---|
| 1420 | + rq->uclamp_flags &= ~UCLAMP_FLAG_IDLE; |
---|
| 1421 | +} |
---|
| 1422 | + |
---|
| 1423 | +static inline void |
---|
| 1424 | +uclamp_update_active(struct task_struct *p) |
---|
| 1425 | +{ |
---|
| 1426 | + enum uclamp_id clamp_id; |
---|
1008 | 1427 | struct rq_flags rf; |
---|
1009 | 1428 | struct rq *rq; |
---|
1010 | 1429 | |
---|
.. | .. |
---|
1024 | 1443 | * affecting a valid clamp bucket, the next time it's enqueued, |
---|
1025 | 1444 | * it will already see the updated clamp bucket value. |
---|
1026 | 1445 | */ |
---|
1027 | | - if (p->uclamp[clamp_id].active) { |
---|
1028 | | - uclamp_rq_dec_id(rq, p, clamp_id); |
---|
1029 | | - uclamp_rq_inc_id(rq, p, clamp_id); |
---|
1030 | | - } |
---|
| 1446 | + for_each_clamp_id(clamp_id) |
---|
| 1447 | + uclamp_rq_reinc_id(rq, p, clamp_id); |
---|
1031 | 1448 | |
---|
1032 | 1449 | task_rq_unlock(rq, p, &rf); |
---|
1033 | 1450 | } |
---|
1034 | 1451 | |
---|
1035 | 1452 | #ifdef CONFIG_UCLAMP_TASK_GROUP |
---|
1036 | 1453 | static inline void |
---|
1037 | | -uclamp_update_active_tasks(struct cgroup_subsys_state *css, |
---|
1038 | | - unsigned int clamps) |
---|
| 1454 | +uclamp_update_active_tasks(struct cgroup_subsys_state *css) |
---|
1039 | 1455 | { |
---|
1040 | | - enum uclamp_id clamp_id; |
---|
1041 | 1456 | struct css_task_iter it; |
---|
1042 | 1457 | struct task_struct *p; |
---|
1043 | 1458 | |
---|
1044 | 1459 | css_task_iter_start(css, 0, &it); |
---|
1045 | | - while ((p = css_task_iter_next(&it))) { |
---|
1046 | | - for_each_clamp_id(clamp_id) { |
---|
1047 | | - if ((0x1 << clamp_id) & clamps) |
---|
1048 | | - uclamp_update_active(p, clamp_id); |
---|
1049 | | - } |
---|
1050 | | - } |
---|
| 1460 | + while ((p = css_task_iter_next(&it))) |
---|
| 1461 | + uclamp_update_active(p); |
---|
1051 | 1462 | css_task_iter_end(&it); |
---|
1052 | 1463 | } |
---|
1053 | 1464 | |
---|
.. | .. |
---|
1070 | 1481 | #endif |
---|
1071 | 1482 | |
---|
1072 | 1483 | int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, |
---|
1073 | | - void __user *buffer, size_t *lenp, |
---|
1074 | | - loff_t *ppos) |
---|
| 1484 | + void *buffer, size_t *lenp, loff_t *ppos) |
---|
1075 | 1485 | { |
---|
1076 | 1486 | bool update_root_tg = false; |
---|
1077 | | - int old_min, old_max; |
---|
| 1487 | + int old_min, old_max, old_min_rt; |
---|
1078 | 1488 | int result; |
---|
1079 | 1489 | |
---|
1080 | 1490 | mutex_lock(&uclamp_mutex); |
---|
1081 | 1491 | old_min = sysctl_sched_uclamp_util_min; |
---|
1082 | 1492 | old_max = sysctl_sched_uclamp_util_max; |
---|
| 1493 | + old_min_rt = sysctl_sched_uclamp_util_min_rt_default; |
---|
1083 | 1494 | |
---|
1084 | 1495 | result = proc_dointvec(table, write, buffer, lenp, ppos); |
---|
1085 | 1496 | if (result) |
---|
.. | .. |
---|
1088 | 1499 | goto done; |
---|
1089 | 1500 | |
---|
1090 | 1501 | if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max || |
---|
1091 | | - sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE) { |
---|
| 1502 | + sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE || |
---|
| 1503 | + sysctl_sched_uclamp_util_min_rt_default > SCHED_CAPACITY_SCALE) { |
---|
| 1504 | + |
---|
1092 | 1505 | result = -EINVAL; |
---|
1093 | 1506 | goto undo; |
---|
1094 | 1507 | } |
---|
.. | .. |
---|
1104 | 1517 | update_root_tg = true; |
---|
1105 | 1518 | } |
---|
1106 | 1519 | |
---|
1107 | | - if (update_root_tg) |
---|
| 1520 | + if (update_root_tg) { |
---|
| 1521 | + static_branch_enable(&sched_uclamp_used); |
---|
1108 | 1522 | uclamp_update_root_tg(); |
---|
| 1523 | + } |
---|
| 1524 | + |
---|
| 1525 | + if (old_min_rt != sysctl_sched_uclamp_util_min_rt_default) { |
---|
| 1526 | + static_branch_enable(&sched_uclamp_used); |
---|
| 1527 | + uclamp_sync_util_min_rt_default(); |
---|
| 1528 | + } |
---|
1109 | 1529 | |
---|
1110 | 1530 | /* |
---|
1111 | 1531 | * We update all RUNNABLE tasks only when task groups are in use. |
---|
.. | .. |
---|
1118 | 1538 | undo: |
---|
1119 | 1539 | sysctl_sched_uclamp_util_min = old_min; |
---|
1120 | 1540 | sysctl_sched_uclamp_util_max = old_max; |
---|
| 1541 | + sysctl_sched_uclamp_util_min_rt_default = old_min_rt; |
---|
1121 | 1542 | done: |
---|
1122 | 1543 | mutex_unlock(&uclamp_mutex); |
---|
1123 | 1544 | |
---|
.. | .. |
---|
1127 | 1548 | static int uclamp_validate(struct task_struct *p, |
---|
1128 | 1549 | const struct sched_attr *attr) |
---|
1129 | 1550 | { |
---|
1130 | | - unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value; |
---|
1131 | | - unsigned int upper_bound = p->uclamp_req[UCLAMP_MAX].value; |
---|
| 1551 | + int util_min = p->uclamp_req[UCLAMP_MIN].value; |
---|
| 1552 | + int util_max = p->uclamp_req[UCLAMP_MAX].value; |
---|
1132 | 1553 | |
---|
1133 | | - if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) |
---|
1134 | | - lower_bound = attr->sched_util_min; |
---|
1135 | | - if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) |
---|
1136 | | - upper_bound = attr->sched_util_max; |
---|
| 1554 | + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) { |
---|
| 1555 | + util_min = attr->sched_util_min; |
---|
1137 | 1556 | |
---|
1138 | | - if (lower_bound > upper_bound) |
---|
| 1557 | + if (util_min + 1 > SCHED_CAPACITY_SCALE + 1) |
---|
| 1558 | + return -EINVAL; |
---|
| 1559 | + } |
---|
| 1560 | + |
---|
| 1561 | + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) { |
---|
| 1562 | + util_max = attr->sched_util_max; |
---|
| 1563 | + |
---|
| 1564 | + if (util_max + 1 > SCHED_CAPACITY_SCALE + 1) |
---|
| 1565 | + return -EINVAL; |
---|
| 1566 | + } |
---|
| 1567 | + |
---|
| 1568 | + if (util_min != -1 && util_max != -1 && util_min > util_max) |
---|
1139 | 1569 | return -EINVAL; |
---|
1140 | | - if (upper_bound > SCHED_CAPACITY_SCALE) |
---|
1141 | | - return -EINVAL; |
---|
| 1570 | + |
---|
| 1571 | + /* |
---|
| 1572 | + * We have valid uclamp attributes; make sure uclamp is enabled. |
---|
| 1573 | + * |
---|
| 1574 | + * We need to do that here, because enabling static branches is a |
---|
| 1575 | + * blocking operation which obviously cannot be done while holding |
---|
| 1576 | + * scheduler locks. |
---|
| 1577 | + */ |
---|
| 1578 | + static_branch_enable(&sched_uclamp_used); |
---|
1142 | 1579 | |
---|
1143 | 1580 | return 0; |
---|
| 1581 | +} |
---|
| 1582 | + |
---|
| 1583 | +static bool uclamp_reset(const struct sched_attr *attr, |
---|
| 1584 | + enum uclamp_id clamp_id, |
---|
| 1585 | + struct uclamp_se *uc_se) |
---|
| 1586 | +{ |
---|
| 1587 | + /* Reset on sched class change for a non user-defined clamp value. */ |
---|
| 1588 | + if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)) && |
---|
| 1589 | + !uc_se->user_defined) |
---|
| 1590 | + return true; |
---|
| 1591 | + |
---|
| 1592 | + /* Reset on sched_util_{min,max} == -1. */ |
---|
| 1593 | + if (clamp_id == UCLAMP_MIN && |
---|
| 1594 | + attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN && |
---|
| 1595 | + attr->sched_util_min == -1) { |
---|
| 1596 | + return true; |
---|
| 1597 | + } |
---|
| 1598 | + |
---|
| 1599 | + if (clamp_id == UCLAMP_MAX && |
---|
| 1600 | + attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX && |
---|
| 1601 | + attr->sched_util_max == -1) { |
---|
| 1602 | + return true; |
---|
| 1603 | + } |
---|
| 1604 | + |
---|
| 1605 | + return false; |
---|
1144 | 1606 | } |
---|
1145 | 1607 | |
---|
1146 | 1608 | static void __setscheduler_uclamp(struct task_struct *p, |
---|
.. | .. |
---|
1148 | 1610 | { |
---|
1149 | 1611 | enum uclamp_id clamp_id; |
---|
1150 | 1612 | |
---|
1151 | | - /* |
---|
1152 | | - * On scheduling class change, reset to default clamps for tasks |
---|
1153 | | - * without a task-specific value. |
---|
1154 | | - */ |
---|
1155 | 1613 | for_each_clamp_id(clamp_id) { |
---|
1156 | 1614 | struct uclamp_se *uc_se = &p->uclamp_req[clamp_id]; |
---|
1157 | | - unsigned int clamp_value = uclamp_none(clamp_id); |
---|
| 1615 | + unsigned int value; |
---|
1158 | 1616 | |
---|
1159 | | - /* Keep using defined clamps across class changes */ |
---|
1160 | | - if (uc_se->user_defined) |
---|
| 1617 | + if (!uclamp_reset(attr, clamp_id, uc_se)) |
---|
1161 | 1618 | continue; |
---|
1162 | 1619 | |
---|
1163 | | - /* By default, RT tasks always get 100% boost */ |
---|
1164 | | - if (sched_feat(SUGOV_RT_MAX_FREQ) && |
---|
1165 | | - unlikely(rt_task(p) && |
---|
1166 | | - clamp_id == UCLAMP_MIN)) { |
---|
| 1620 | + /* |
---|
| 1621 | + * RT by default have a 100% boost value that could be modified |
---|
| 1622 | + * at runtime. |
---|
| 1623 | + */ |
---|
| 1624 | + if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN)) |
---|
| 1625 | + value = sysctl_sched_uclamp_util_min_rt_default; |
---|
| 1626 | + else |
---|
| 1627 | + value = uclamp_none(clamp_id); |
---|
1167 | 1628 | |
---|
1168 | | - clamp_value = uclamp_none(UCLAMP_MAX); |
---|
1169 | | - } |
---|
| 1629 | + uclamp_se_set(uc_se, value, false); |
---|
1170 | 1630 | |
---|
1171 | | - uclamp_se_set(uc_se, clamp_value, false); |
---|
1172 | 1631 | } |
---|
1173 | 1632 | |
---|
1174 | 1633 | if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP))) |
---|
1175 | 1634 | return; |
---|
1176 | 1635 | |
---|
1177 | | - if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) { |
---|
| 1636 | + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN && |
---|
| 1637 | + attr->sched_util_min != -1) { |
---|
1178 | 1638 | uclamp_se_set(&p->uclamp_req[UCLAMP_MIN], |
---|
1179 | 1639 | attr->sched_util_min, true); |
---|
| 1640 | + trace_android_vh_setscheduler_uclamp(p, UCLAMP_MIN, attr->sched_util_min); |
---|
1180 | 1641 | } |
---|
1181 | 1642 | |
---|
1182 | | - if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) { |
---|
| 1643 | + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX && |
---|
| 1644 | + attr->sched_util_max != -1) { |
---|
1183 | 1645 | uclamp_se_set(&p->uclamp_req[UCLAMP_MAX], |
---|
1184 | 1646 | attr->sched_util_max, true); |
---|
| 1647 | + trace_android_vh_setscheduler_uclamp(p, UCLAMP_MAX, attr->sched_util_max); |
---|
1185 | 1648 | } |
---|
1186 | 1649 | } |
---|
1187 | 1650 | |
---|
.. | .. |
---|
1189 | 1652 | { |
---|
1190 | 1653 | enum uclamp_id clamp_id; |
---|
1191 | 1654 | |
---|
| 1655 | + /* |
---|
| 1656 | + * We don't need to hold task_rq_lock() when updating p->uclamp_* here |
---|
| 1657 | + * as the task is still at its early fork stages. |
---|
| 1658 | + */ |
---|
1192 | 1659 | for_each_clamp_id(clamp_id) |
---|
1193 | 1660 | p->uclamp[clamp_id].active = false; |
---|
1194 | 1661 | |
---|
.. | .. |
---|
1201 | 1668 | } |
---|
1202 | 1669 | } |
---|
1203 | 1670 | |
---|
1204 | | -#ifdef CONFIG_SMP |
---|
1205 | | -unsigned int uclamp_task(struct task_struct *p) |
---|
| 1671 | +static void uclamp_post_fork(struct task_struct *p) |
---|
1206 | 1672 | { |
---|
1207 | | - unsigned long util; |
---|
1208 | | - |
---|
1209 | | - util = task_util_est(p); |
---|
1210 | | - util = max(util, uclamp_eff_value(p, UCLAMP_MIN)); |
---|
1211 | | - util = min(util, uclamp_eff_value(p, UCLAMP_MAX)); |
---|
1212 | | - |
---|
1213 | | - return util; |
---|
| 1673 | + uclamp_update_util_min_rt_default(p); |
---|
1214 | 1674 | } |
---|
1215 | 1675 | |
---|
1216 | | -bool uclamp_boosted(struct task_struct *p) |
---|
| 1676 | +static void __init init_uclamp_rq(struct rq *rq) |
---|
1217 | 1677 | { |
---|
1218 | | - return uclamp_eff_value(p, UCLAMP_MIN) > 0; |
---|
| 1678 | + enum uclamp_id clamp_id; |
---|
| 1679 | + struct uclamp_rq *uc_rq = rq->uclamp; |
---|
| 1680 | + |
---|
| 1681 | + for_each_clamp_id(clamp_id) { |
---|
| 1682 | + uc_rq[clamp_id] = (struct uclamp_rq) { |
---|
| 1683 | + .value = uclamp_none(clamp_id) |
---|
| 1684 | + }; |
---|
| 1685 | + } |
---|
| 1686 | + |
---|
| 1687 | + rq->uclamp_flags = UCLAMP_FLAG_IDLE; |
---|
1219 | 1688 | } |
---|
1220 | | - |
---|
1221 | | -bool uclamp_latency_sensitive(struct task_struct *p) |
---|
1222 | | -{ |
---|
1223 | | -#ifdef CONFIG_UCLAMP_TASK_GROUP |
---|
1224 | | - struct cgroup_subsys_state *css = task_css(p, cpu_cgrp_id); |
---|
1225 | | - struct task_group *tg; |
---|
1226 | | - |
---|
1227 | | - if (!css) |
---|
1228 | | - return false; |
---|
1229 | | - tg = container_of(css, struct task_group, css); |
---|
1230 | | - |
---|
1231 | | - return tg->latency_sensitive; |
---|
1232 | | -#else |
---|
1233 | | - return false; |
---|
1234 | | -#endif |
---|
1235 | | -} |
---|
1236 | | -#endif /* CONFIG_SMP */ |
---|
1237 | 1689 | |
---|
1238 | 1690 | static void __init init_uclamp(void) |
---|
1239 | 1691 | { |
---|
.. | .. |
---|
1241 | 1693 | enum uclamp_id clamp_id; |
---|
1242 | 1694 | int cpu; |
---|
1243 | 1695 | |
---|
1244 | | - mutex_init(&uclamp_mutex); |
---|
1245 | | - |
---|
1246 | | - for_each_possible_cpu(cpu) { |
---|
1247 | | - memset(&cpu_rq(cpu)->uclamp, 0, |
---|
1248 | | - sizeof(struct uclamp_rq)*UCLAMP_CNT); |
---|
1249 | | - cpu_rq(cpu)->uclamp_flags = 0; |
---|
1250 | | - } |
---|
| 1696 | + for_each_possible_cpu(cpu) |
---|
| 1697 | + init_uclamp_rq(cpu_rq(cpu)); |
---|
1251 | 1698 | |
---|
1252 | 1699 | for_each_clamp_id(clamp_id) { |
---|
1253 | 1700 | uclamp_se_set(&init_task.uclamp_req[clamp_id], |
---|
.. | .. |
---|
1276 | 1723 | static void __setscheduler_uclamp(struct task_struct *p, |
---|
1277 | 1724 | const struct sched_attr *attr) { } |
---|
1278 | 1725 | static inline void uclamp_fork(struct task_struct *p) { } |
---|
1279 | | - |
---|
1280 | | -long schedtune_task_margin(struct task_struct *task); |
---|
1281 | | - |
---|
1282 | | -#ifdef CONFIG_SMP |
---|
1283 | | -unsigned int uclamp_task(struct task_struct *p) |
---|
1284 | | -{ |
---|
1285 | | - unsigned long util = task_util_est(p); |
---|
1286 | | -#ifdef CONFIG_SCHED_TUNE |
---|
1287 | | - long margin = schedtune_task_margin(p); |
---|
1288 | | - |
---|
1289 | | - trace_sched_boost_task(p, util, margin); |
---|
1290 | | - |
---|
1291 | | - util += margin; |
---|
1292 | | -#endif |
---|
1293 | | - |
---|
1294 | | - return util; |
---|
1295 | | -} |
---|
1296 | | - |
---|
1297 | | -bool uclamp_boosted(struct task_struct *p) |
---|
1298 | | -{ |
---|
1299 | | -#ifdef CONFIG_SCHED_TUNE |
---|
1300 | | - return schedtune_task_boost(p) > 0; |
---|
1301 | | -#endif |
---|
1302 | | - return false; |
---|
1303 | | -} |
---|
1304 | | - |
---|
1305 | | -bool uclamp_latency_sensitive(struct task_struct *p) |
---|
1306 | | -{ |
---|
1307 | | -#ifdef CONFIG_SCHED_TUNE |
---|
1308 | | - return schedtune_prefer_idle(p) != 0; |
---|
1309 | | -#endif |
---|
1310 | | - return false; |
---|
1311 | | -} |
---|
1312 | | -#endif /* CONFIG_SMP */ |
---|
1313 | | - |
---|
| 1726 | +static inline void uclamp_post_fork(struct task_struct *p) { } |
---|
1314 | 1727 | static inline void init_uclamp(void) { } |
---|
1315 | 1728 | #endif /* CONFIG_UCLAMP_TASK */ |
---|
1316 | 1729 | |
---|
.. | .. |
---|
1325 | 1738 | } |
---|
1326 | 1739 | |
---|
1327 | 1740 | uclamp_rq_inc(rq, p); |
---|
| 1741 | + trace_android_rvh_enqueue_task(rq, p, flags); |
---|
1328 | 1742 | p->sched_class->enqueue_task(rq, p, flags); |
---|
| 1743 | + trace_android_rvh_after_enqueue_task(rq, p); |
---|
1329 | 1744 | } |
---|
1330 | 1745 | |
---|
1331 | 1746 | static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) |
---|
.. | .. |
---|
1339 | 1754 | } |
---|
1340 | 1755 | |
---|
1341 | 1756 | uclamp_rq_dec(rq, p); |
---|
| 1757 | + trace_android_rvh_dequeue_task(rq, p, flags); |
---|
1342 | 1758 | p->sched_class->dequeue_task(rq, p, flags); |
---|
| 1759 | + trace_android_rvh_after_dequeue_task(rq, p); |
---|
1343 | 1760 | } |
---|
1344 | 1761 | |
---|
1345 | 1762 | void activate_task(struct rq *rq, struct task_struct *p, int flags) |
---|
1346 | 1763 | { |
---|
1347 | | - if (task_contributes_to_load(p)) |
---|
1348 | | - rq->nr_uninterruptible--; |
---|
1349 | | - |
---|
1350 | 1764 | enqueue_task(rq, p, flags); |
---|
| 1765 | + |
---|
| 1766 | + p->on_rq = TASK_ON_RQ_QUEUED; |
---|
1351 | 1767 | } |
---|
| 1768 | +EXPORT_SYMBOL_GPL(activate_task); |
---|
1352 | 1769 | |
---|
1353 | 1770 | void deactivate_task(struct rq *rq, struct task_struct *p, int flags) |
---|
1354 | 1771 | { |
---|
1355 | | - if (task_contributes_to_load(p)) |
---|
1356 | | - rq->nr_uninterruptible++; |
---|
| 1772 | + p->on_rq = (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING; |
---|
1357 | 1773 | |
---|
1358 | 1774 | dequeue_task(rq, p, flags); |
---|
1359 | 1775 | } |
---|
| 1776 | +EXPORT_SYMBOL_GPL(deactivate_task); |
---|
1360 | 1777 | |
---|
1361 | | -/* |
---|
1362 | | - * __normal_prio - return the priority that is based on the static prio |
---|
1363 | | - */ |
---|
1364 | | -static inline int __normal_prio(struct task_struct *p) |
---|
| 1778 | +static inline int __normal_prio(int policy, int rt_prio, int nice) |
---|
1365 | 1779 | { |
---|
1366 | | - return p->static_prio; |
---|
| 1780 | + int prio; |
---|
| 1781 | + |
---|
| 1782 | + if (dl_policy(policy)) |
---|
| 1783 | + prio = MAX_DL_PRIO - 1; |
---|
| 1784 | + else if (rt_policy(policy)) |
---|
| 1785 | + prio = MAX_RT_PRIO - 1 - rt_prio; |
---|
| 1786 | + else |
---|
| 1787 | + prio = NICE_TO_PRIO(nice); |
---|
| 1788 | + |
---|
| 1789 | + return prio; |
---|
1367 | 1790 | } |
---|
1368 | 1791 | |
---|
1369 | 1792 | /* |
---|
.. | .. |
---|
1375 | 1798 | */ |
---|
1376 | 1799 | static inline int normal_prio(struct task_struct *p) |
---|
1377 | 1800 | { |
---|
1378 | | - int prio; |
---|
1379 | | - |
---|
1380 | | - if (task_has_dl_policy(p)) |
---|
1381 | | - prio = MAX_DL_PRIO-1; |
---|
1382 | | - else if (task_has_rt_policy(p)) |
---|
1383 | | - prio = MAX_RT_PRIO-1 - p->rt_priority; |
---|
1384 | | - else |
---|
1385 | | - prio = __normal_prio(p); |
---|
1386 | | - return prio; |
---|
| 1801 | + return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio)); |
---|
1387 | 1802 | } |
---|
1388 | 1803 | |
---|
1389 | 1804 | /* |
---|
.. | .. |
---|
1439 | 1854 | |
---|
1440 | 1855 | void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) |
---|
1441 | 1856 | { |
---|
1442 | | - const struct sched_class *class; |
---|
1443 | | - |
---|
1444 | | - if (p->sched_class == rq->curr->sched_class) { |
---|
| 1857 | + if (p->sched_class == rq->curr->sched_class) |
---|
1445 | 1858 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); |
---|
1446 | | - } else { |
---|
1447 | | - for_each_class(class) { |
---|
1448 | | - if (class == rq->curr->sched_class) |
---|
1449 | | - break; |
---|
1450 | | - if (class == p->sched_class) { |
---|
1451 | | - resched_curr(rq); |
---|
1452 | | - break; |
---|
1453 | | - } |
---|
1454 | | - } |
---|
1455 | | - } |
---|
| 1859 | + else if (p->sched_class > rq->curr->sched_class) |
---|
| 1860 | + resched_curr(rq); |
---|
1456 | 1861 | |
---|
1457 | 1862 | /* |
---|
1458 | 1863 | * A queue event has occurred, and we're going to schedule. In |
---|
.. | .. |
---|
1461 | 1866 | if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr)) |
---|
1462 | 1867 | rq_clock_skip_update(rq); |
---|
1463 | 1868 | } |
---|
| 1869 | +EXPORT_SYMBOL_GPL(check_preempt_curr); |
---|
1464 | 1870 | |
---|
1465 | 1871 | #ifdef CONFIG_SMP |
---|
1466 | 1872 | |
---|
1467 | | -static inline bool is_per_cpu_kthread(struct task_struct *p) |
---|
| 1873 | +static void |
---|
| 1874 | +__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags); |
---|
| 1875 | + |
---|
| 1876 | +static int __set_cpus_allowed_ptr(struct task_struct *p, |
---|
| 1877 | + const struct cpumask *new_mask, |
---|
| 1878 | + u32 flags); |
---|
| 1879 | + |
---|
| 1880 | +static void migrate_disable_switch(struct rq *rq, struct task_struct *p) |
---|
1468 | 1881 | { |
---|
1469 | | - if (!(p->flags & PF_KTHREAD)) |
---|
1470 | | - return false; |
---|
| 1882 | + if (likely(!p->migration_disabled)) |
---|
| 1883 | + return; |
---|
1471 | 1884 | |
---|
1472 | | - if (p->nr_cpus_allowed != 1) |
---|
1473 | | - return false; |
---|
| 1885 | + if (p->cpus_ptr != &p->cpus_mask) |
---|
| 1886 | + return; |
---|
1474 | 1887 | |
---|
1475 | | - return true; |
---|
| 1888 | + /* |
---|
| 1889 | + * Violates locking rules! see comment in __do_set_cpus_allowed(). |
---|
| 1890 | + */ |
---|
| 1891 | + __do_set_cpus_allowed(p, cpumask_of(rq->cpu), SCA_MIGRATE_DISABLE); |
---|
| 1892 | +} |
---|
| 1893 | + |
---|
| 1894 | +void migrate_disable(void) |
---|
| 1895 | +{ |
---|
| 1896 | + struct task_struct *p = current; |
---|
| 1897 | + |
---|
| 1898 | + if (p->migration_disabled) { |
---|
| 1899 | + p->migration_disabled++; |
---|
| 1900 | + return; |
---|
| 1901 | + } |
---|
| 1902 | + |
---|
| 1903 | + trace_sched_migrate_disable_tp(p); |
---|
| 1904 | + |
---|
| 1905 | + preempt_disable(); |
---|
| 1906 | + this_rq()->nr_pinned++; |
---|
| 1907 | + p->migration_disabled = 1; |
---|
| 1908 | + preempt_lazy_disable(); |
---|
| 1909 | + preempt_enable(); |
---|
| 1910 | +} |
---|
| 1911 | +EXPORT_SYMBOL_GPL(migrate_disable); |
---|
| 1912 | + |
---|
| 1913 | +void migrate_enable(void) |
---|
| 1914 | +{ |
---|
| 1915 | + struct task_struct *p = current; |
---|
| 1916 | + |
---|
| 1917 | + if (p->migration_disabled > 1) { |
---|
| 1918 | + p->migration_disabled--; |
---|
| 1919 | + return; |
---|
| 1920 | + } |
---|
| 1921 | + |
---|
| 1922 | + /* |
---|
| 1923 | + * Ensure stop_task runs either before or after this, and that |
---|
| 1924 | + * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). |
---|
| 1925 | + */ |
---|
| 1926 | + preempt_disable(); |
---|
| 1927 | + if (p->cpus_ptr != &p->cpus_mask) |
---|
| 1928 | + __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); |
---|
| 1929 | + /* |
---|
| 1930 | + * Mustn't clear migration_disabled() until cpus_ptr points back at the |
---|
| 1931 | + * regular cpus_mask, otherwise things that race (eg. |
---|
| 1932 | + * select_fallback_rq) get confused. |
---|
| 1933 | + */ |
---|
| 1934 | + barrier(); |
---|
| 1935 | + p->migration_disabled = 0; |
---|
| 1936 | + this_rq()->nr_pinned--; |
---|
| 1937 | + preempt_lazy_enable(); |
---|
| 1938 | + preempt_enable(); |
---|
| 1939 | + |
---|
| 1940 | + trace_sched_migrate_enable_tp(p); |
---|
| 1941 | +} |
---|
| 1942 | +EXPORT_SYMBOL_GPL(migrate_enable); |
---|
| 1943 | + |
---|
| 1944 | +static inline bool rq_has_pinned_tasks(struct rq *rq) |
---|
| 1945 | +{ |
---|
| 1946 | + return rq->nr_pinned; |
---|
1476 | 1947 | } |
---|
1477 | 1948 | |
---|
1478 | 1949 | /* |
---|
1479 | | - * Per-CPU kthreads are allowed to run on !actie && online CPUs, see |
---|
| 1950 | + * Per-CPU kthreads are allowed to run on !active && online CPUs, see |
---|
1480 | 1951 | * __set_cpus_allowed_ptr() and select_fallback_rq(). |
---|
1481 | 1952 | */ |
---|
1482 | 1953 | static inline bool is_cpu_allowed(struct task_struct *p, int cpu) |
---|
1483 | 1954 | { |
---|
1484 | | - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) |
---|
| 1955 | + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) |
---|
1485 | 1956 | return false; |
---|
1486 | 1957 | |
---|
1487 | | - if (is_per_cpu_kthread(p)) |
---|
| 1958 | + if (is_per_cpu_kthread(p) || is_migration_disabled(p)) |
---|
1488 | 1959 | return cpu_online(cpu); |
---|
1489 | 1960 | |
---|
1490 | | - return cpu_active(cpu); |
---|
| 1961 | + if (!cpu_active(cpu)) |
---|
| 1962 | + return false; |
---|
| 1963 | + |
---|
| 1964 | + return cpumask_test_cpu(cpu, task_cpu_possible_mask(p)); |
---|
1491 | 1965 | } |
---|
1492 | 1966 | |
---|
1493 | 1967 | /* |
---|
.. | .. |
---|
1512 | 1986 | static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf, |
---|
1513 | 1987 | struct task_struct *p, int new_cpu) |
---|
1514 | 1988 | { |
---|
| 1989 | + int detached = 0; |
---|
| 1990 | + |
---|
1515 | 1991 | lockdep_assert_held(&rq->lock); |
---|
1516 | 1992 | |
---|
1517 | | - WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); |
---|
1518 | | - dequeue_task(rq, p, DEQUEUE_NOCLOCK); |
---|
1519 | | - set_task_cpu(p, new_cpu); |
---|
1520 | | - rq_unlock(rq, rf); |
---|
| 1993 | + /* |
---|
| 1994 | + * The vendor hook may drop the lock temporarily, so |
---|
| 1995 | + * pass the rq flags to unpin lock. We expect the |
---|
| 1996 | + * rq lock to be held after return. |
---|
| 1997 | + */ |
---|
| 1998 | + trace_android_rvh_migrate_queued_task(rq, rf, p, new_cpu, &detached); |
---|
| 1999 | + if (detached) |
---|
| 2000 | + goto attach; |
---|
1521 | 2001 | |
---|
| 2002 | + deactivate_task(rq, p, DEQUEUE_NOCLOCK); |
---|
| 2003 | + set_task_cpu(p, new_cpu); |
---|
| 2004 | + |
---|
| 2005 | +attach: |
---|
| 2006 | + rq_unlock(rq, rf); |
---|
1522 | 2007 | rq = cpu_rq(new_cpu); |
---|
1523 | 2008 | |
---|
1524 | 2009 | rq_lock(rq, rf); |
---|
1525 | 2010 | BUG_ON(task_cpu(p) != new_cpu); |
---|
1526 | | - enqueue_task(rq, p, 0); |
---|
1527 | | - p->on_rq = TASK_ON_RQ_QUEUED; |
---|
| 2011 | + activate_task(rq, p, 0); |
---|
1528 | 2012 | check_preempt_curr(rq, p, 0); |
---|
1529 | 2013 | |
---|
1530 | 2014 | return rq; |
---|
1531 | 2015 | } |
---|
1532 | 2016 | |
---|
1533 | 2017 | struct migration_arg { |
---|
1534 | | - struct task_struct *task; |
---|
1535 | | - int dest_cpu; |
---|
| 2018 | + struct task_struct *task; |
---|
| 2019 | + int dest_cpu; |
---|
| 2020 | + struct set_affinity_pending *pending; |
---|
| 2021 | +}; |
---|
| 2022 | + |
---|
| 2023 | +/* |
---|
| 2024 | + * @refs: number of wait_for_completion() |
---|
| 2025 | + * @stop_pending: is @stop_work in use |
---|
| 2026 | + */ |
---|
| 2027 | +struct set_affinity_pending { |
---|
| 2028 | + refcount_t refs; |
---|
| 2029 | + unsigned int stop_pending; |
---|
| 2030 | + struct completion done; |
---|
| 2031 | + struct cpu_stop_work stop_work; |
---|
| 2032 | + struct migration_arg arg; |
---|
1536 | 2033 | }; |
---|
1537 | 2034 | |
---|
1538 | 2035 | /* |
---|
.. | .. |
---|
1565 | 2062 | static int migration_cpu_stop(void *data) |
---|
1566 | 2063 | { |
---|
1567 | 2064 | struct migration_arg *arg = data; |
---|
| 2065 | + struct set_affinity_pending *pending = arg->pending; |
---|
1568 | 2066 | struct task_struct *p = arg->task; |
---|
1569 | 2067 | struct rq *rq = this_rq(); |
---|
| 2068 | + bool complete = false; |
---|
1570 | 2069 | struct rq_flags rf; |
---|
1571 | 2070 | |
---|
1572 | 2071 | /* |
---|
1573 | 2072 | * The original target CPU might have gone down and we might |
---|
1574 | 2073 | * be on another CPU but it doesn't matter. |
---|
1575 | 2074 | */ |
---|
1576 | | - local_irq_disable(); |
---|
| 2075 | + local_irq_save(rf.flags); |
---|
1577 | 2076 | /* |
---|
1578 | 2077 | * We need to explicitly wake pending tasks before running |
---|
1579 | | - * __migrate_task() such that we will not miss enforcing cpus_allowed |
---|
| 2078 | + * __migrate_task() such that we will not miss enforcing cpus_ptr |
---|
1580 | 2079 | * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. |
---|
1581 | 2080 | */ |
---|
1582 | | - sched_ttwu_pending(); |
---|
| 2081 | + flush_smp_call_function_from_idle(); |
---|
1583 | 2082 | |
---|
1584 | 2083 | raw_spin_lock(&p->pi_lock); |
---|
1585 | 2084 | rq_lock(rq, &rf); |
---|
| 2085 | + |
---|
1586 | 2086 | /* |
---|
1587 | 2087 | * If task_rq(p) != rq, it cannot be migrated here, because we're |
---|
1588 | 2088 | * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because |
---|
1589 | 2089 | * we're holding p->pi_lock. |
---|
1590 | 2090 | */ |
---|
1591 | 2091 | if (task_rq(p) == rq) { |
---|
| 2092 | + if (is_migration_disabled(p)) |
---|
| 2093 | + goto out; |
---|
| 2094 | + |
---|
| 2095 | + if (pending) { |
---|
| 2096 | + if (p->migration_pending == pending) |
---|
| 2097 | + p->migration_pending = NULL; |
---|
| 2098 | + complete = true; |
---|
| 2099 | + |
---|
| 2100 | + if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) |
---|
| 2101 | + goto out; |
---|
| 2102 | + } |
---|
| 2103 | + |
---|
1592 | 2104 | if (task_on_rq_queued(p)) |
---|
1593 | 2105 | rq = __migrate_task(rq, &rf, p, arg->dest_cpu); |
---|
1594 | 2106 | else |
---|
1595 | 2107 | p->wake_cpu = arg->dest_cpu; |
---|
1596 | | - } |
---|
1597 | | - rq_unlock(rq, &rf); |
---|
1598 | | - raw_spin_unlock(&p->pi_lock); |
---|
1599 | 2108 | |
---|
1600 | | - local_irq_enable(); |
---|
| 2109 | + /* |
---|
| 2110 | + * XXX __migrate_task() can fail, at which point we might end |
---|
| 2111 | + * up running on a dodgy CPU, AFAICT this can only happen |
---|
| 2112 | + * during CPU hotplug, at which point we'll get pushed out |
---|
| 2113 | + * anyway, so it's probably not a big deal. |
---|
| 2114 | + */ |
---|
| 2115 | + |
---|
| 2116 | + } else if (pending) { |
---|
| 2117 | + /* |
---|
| 2118 | + * This happens when we get migrated between migrate_enable()'s |
---|
| 2119 | + * preempt_enable() and scheduling the stopper task. At that |
---|
| 2120 | + * point we're a regular task again and not current anymore. |
---|
| 2121 | + * |
---|
| 2122 | + * A !PREEMPT kernel has a giant hole here, which makes it far |
---|
| 2123 | + * more likely. |
---|
| 2124 | + */ |
---|
| 2125 | + |
---|
| 2126 | + /* |
---|
| 2127 | + * The task moved before the stopper got to run. We're holding |
---|
| 2128 | + * ->pi_lock, so the allowed mask is stable - if it got |
---|
| 2129 | + * somewhere allowed, we're done. |
---|
| 2130 | + */ |
---|
| 2131 | + if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) { |
---|
| 2132 | + if (p->migration_pending == pending) |
---|
| 2133 | + p->migration_pending = NULL; |
---|
| 2134 | + complete = true; |
---|
| 2135 | + goto out; |
---|
| 2136 | + } |
---|
| 2137 | + |
---|
| 2138 | + /* |
---|
| 2139 | + * When migrate_enable() hits a rq mis-match we can't reliably |
---|
| 2140 | + * determine is_migration_disabled() and so have to chase after |
---|
| 2141 | + * it. |
---|
| 2142 | + */ |
---|
| 2143 | + WARN_ON_ONCE(!pending->stop_pending); |
---|
| 2144 | + task_rq_unlock(rq, p, &rf); |
---|
| 2145 | + stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop, |
---|
| 2146 | + &pending->arg, &pending->stop_work); |
---|
| 2147 | + return 0; |
---|
| 2148 | + } |
---|
| 2149 | +out: |
---|
| 2150 | + if (pending) |
---|
| 2151 | + pending->stop_pending = false; |
---|
| 2152 | + task_rq_unlock(rq, p, &rf); |
---|
| 2153 | + |
---|
| 2154 | + if (complete) |
---|
| 2155 | + complete_all(&pending->done); |
---|
| 2156 | + |
---|
| 2157 | + return 0; |
---|
| 2158 | +} |
---|
| 2159 | + |
---|
| 2160 | +int push_cpu_stop(void *arg) |
---|
| 2161 | +{ |
---|
| 2162 | + struct rq *lowest_rq = NULL, *rq = this_rq(); |
---|
| 2163 | + struct task_struct *p = arg; |
---|
| 2164 | + |
---|
| 2165 | + raw_spin_lock_irq(&p->pi_lock); |
---|
| 2166 | + raw_spin_lock(&rq->lock); |
---|
| 2167 | + |
---|
| 2168 | + if (task_rq(p) != rq) |
---|
| 2169 | + goto out_unlock; |
---|
| 2170 | + |
---|
| 2171 | + if (is_migration_disabled(p)) { |
---|
| 2172 | + p->migration_flags |= MDF_PUSH; |
---|
| 2173 | + goto out_unlock; |
---|
| 2174 | + } |
---|
| 2175 | + |
---|
| 2176 | + p->migration_flags &= ~MDF_PUSH; |
---|
| 2177 | + |
---|
| 2178 | + if (p->sched_class->find_lock_rq) |
---|
| 2179 | + lowest_rq = p->sched_class->find_lock_rq(p, rq); |
---|
| 2180 | + |
---|
| 2181 | + if (!lowest_rq) |
---|
| 2182 | + goto out_unlock; |
---|
| 2183 | + |
---|
| 2184 | + // XXX validate p is still the highest prio task |
---|
| 2185 | + if (task_rq(p) == rq) { |
---|
| 2186 | + deactivate_task(rq, p, 0); |
---|
| 2187 | + set_task_cpu(p, lowest_rq->cpu); |
---|
| 2188 | + activate_task(lowest_rq, p, 0); |
---|
| 2189 | + resched_curr(lowest_rq); |
---|
| 2190 | + } |
---|
| 2191 | + |
---|
| 2192 | + double_unlock_balance(rq, lowest_rq); |
---|
| 2193 | + |
---|
| 2194 | +out_unlock: |
---|
| 2195 | + rq->push_busy = false; |
---|
| 2196 | + raw_spin_unlock(&rq->lock); |
---|
| 2197 | + raw_spin_unlock_irq(&p->pi_lock); |
---|
| 2198 | + |
---|
| 2199 | + put_task_struct(p); |
---|
1601 | 2200 | return 0; |
---|
1602 | 2201 | } |
---|
1603 | 2202 | |
---|
.. | .. |
---|
1605 | 2204 | * sched_class::set_cpus_allowed must do the below, but is not required to |
---|
1606 | 2205 | * actually call this function. |
---|
1607 | 2206 | */ |
---|
1608 | | -void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) |
---|
| 2207 | +void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags) |
---|
1609 | 2208 | { |
---|
1610 | | - cpumask_copy(&p->cpus_allowed, new_mask); |
---|
| 2209 | + if (flags & (SCA_MIGRATE_ENABLE | SCA_MIGRATE_DISABLE)) { |
---|
| 2210 | + p->cpus_ptr = new_mask; |
---|
| 2211 | + return; |
---|
| 2212 | + } |
---|
| 2213 | + |
---|
| 2214 | + cpumask_copy(&p->cpus_mask, new_mask); |
---|
1611 | 2215 | p->nr_cpus_allowed = cpumask_weight(new_mask); |
---|
| 2216 | + trace_android_rvh_set_cpus_allowed_comm(p, new_mask); |
---|
1612 | 2217 | } |
---|
1613 | 2218 | |
---|
1614 | | -void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) |
---|
| 2219 | +static void |
---|
| 2220 | +__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags) |
---|
1615 | 2221 | { |
---|
1616 | 2222 | struct rq *rq = task_rq(p); |
---|
1617 | 2223 | bool queued, running; |
---|
1618 | 2224 | |
---|
1619 | | - lockdep_assert_held(&p->pi_lock); |
---|
| 2225 | + /* |
---|
| 2226 | + * This here violates the locking rules for affinity, since we're only |
---|
| 2227 | + * supposed to change these variables while holding both rq->lock and |
---|
| 2228 | + * p->pi_lock. |
---|
| 2229 | + * |
---|
| 2230 | + * HOWEVER, it magically works, because ttwu() is the only code that |
---|
| 2231 | + * accesses these variables under p->pi_lock and only does so after |
---|
| 2232 | + * smp_cond_load_acquire(&p->on_cpu, !VAL), and we're in __schedule() |
---|
| 2233 | + * before finish_task(). |
---|
| 2234 | + * |
---|
| 2235 | + * XXX do further audits, this smells like something putrid. |
---|
| 2236 | + */ |
---|
| 2237 | + if (flags & SCA_MIGRATE_DISABLE) |
---|
| 2238 | + SCHED_WARN_ON(!p->on_cpu); |
---|
| 2239 | + else |
---|
| 2240 | + lockdep_assert_held(&p->pi_lock); |
---|
1620 | 2241 | |
---|
1621 | 2242 | queued = task_on_rq_queued(p); |
---|
1622 | 2243 | running = task_current(rq, p); |
---|
.. | .. |
---|
1632 | 2253 | if (running) |
---|
1633 | 2254 | put_prev_task(rq, p); |
---|
1634 | 2255 | |
---|
1635 | | - p->sched_class->set_cpus_allowed(p, new_mask); |
---|
| 2256 | + p->sched_class->set_cpus_allowed(p, new_mask, flags); |
---|
1636 | 2257 | |
---|
1637 | 2258 | if (queued) |
---|
1638 | 2259 | enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); |
---|
1639 | 2260 | if (running) |
---|
1640 | | - set_curr_task(rq, p); |
---|
| 2261 | + set_next_task(rq, p); |
---|
| 2262 | +} |
---|
| 2263 | + |
---|
| 2264 | +static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flags *rf, |
---|
| 2265 | + int dest_cpu, unsigned int flags); |
---|
| 2266 | +/* |
---|
| 2267 | + * Called with both p->pi_lock and rq->lock held; drops both before returning. |
---|
| 2268 | + */ |
---|
| 2269 | +static int __set_cpus_allowed_ptr_locked(struct task_struct *p, |
---|
| 2270 | + const struct cpumask *new_mask, |
---|
| 2271 | + u32 flags, |
---|
| 2272 | + struct rq *rq, |
---|
| 2273 | + struct rq_flags *rf) |
---|
| 2274 | +{ |
---|
| 2275 | + const struct cpumask *cpu_valid_mask = cpu_active_mask; |
---|
| 2276 | + const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p); |
---|
| 2277 | + unsigned int dest_cpu; |
---|
| 2278 | + int ret = 0; |
---|
| 2279 | + |
---|
| 2280 | + update_rq_clock(rq); |
---|
| 2281 | + |
---|
| 2282 | + if (p->flags & PF_KTHREAD || is_migration_disabled(p)) { |
---|
| 2283 | + /* |
---|
| 2284 | + * Kernel threads are allowed on online && !active CPUs. |
---|
| 2285 | + * |
---|
| 2286 | + * Specifically, migration_disabled() tasks must not fail the |
---|
| 2287 | + * cpumask_any_and_distribute() pick below, esp. so on |
---|
| 2288 | + * SCA_MIGRATE_ENABLE, otherwise we'll not call |
---|
| 2289 | + * set_cpus_allowed_common() and actually reset p->cpus_ptr. |
---|
| 2290 | + */ |
---|
| 2291 | + cpu_valid_mask = cpu_online_mask; |
---|
| 2292 | + } else if (!cpumask_subset(new_mask, cpu_allowed_mask)) { |
---|
| 2293 | + ret = -EINVAL; |
---|
| 2294 | + goto out; |
---|
| 2295 | + } |
---|
| 2296 | + |
---|
| 2297 | + /* |
---|
| 2298 | + * Must re-check here, to close a race against __kthread_bind(), |
---|
| 2299 | + * sched_setaffinity() is not guaranteed to observe the flag. |
---|
| 2300 | + */ |
---|
| 2301 | + if ((flags & SCA_CHECK) && (p->flags & PF_NO_SETAFFINITY)) { |
---|
| 2302 | + ret = -EINVAL; |
---|
| 2303 | + goto out; |
---|
| 2304 | + } |
---|
| 2305 | + |
---|
| 2306 | + if (!(flags & SCA_MIGRATE_ENABLE)) { |
---|
| 2307 | + if (cpumask_equal(&p->cpus_mask, new_mask)) |
---|
| 2308 | + goto out; |
---|
| 2309 | + |
---|
| 2310 | + if (WARN_ON_ONCE(p == current && |
---|
| 2311 | + is_migration_disabled(p) && |
---|
| 2312 | + !cpumask_test_cpu(task_cpu(p), new_mask))) { |
---|
| 2313 | + ret = -EBUSY; |
---|
| 2314 | + goto out; |
---|
| 2315 | + } |
---|
| 2316 | + } |
---|
| 2317 | + |
---|
| 2318 | + /* |
---|
| 2319 | + * Picking a ~random cpu helps in cases where we are changing affinity |
---|
| 2320 | + * for groups of tasks (ie. cpuset), so that load balancing is not |
---|
| 2321 | + * immediately required to distribute the tasks within their new mask. |
---|
| 2322 | + */ |
---|
| 2323 | + dest_cpu = cpumask_any_and_distribute(cpu_valid_mask, new_mask); |
---|
| 2324 | + if (dest_cpu >= nr_cpu_ids) { |
---|
| 2325 | + ret = -EINVAL; |
---|
| 2326 | + goto out; |
---|
| 2327 | + } |
---|
| 2328 | + |
---|
| 2329 | + __do_set_cpus_allowed(p, new_mask, flags); |
---|
| 2330 | + |
---|
| 2331 | + if (p->flags & PF_KTHREAD) { |
---|
| 2332 | + /* |
---|
| 2333 | + * For kernel threads that do indeed end up on online && |
---|
| 2334 | + * !active we want to ensure they are strict per-CPU threads. |
---|
| 2335 | + */ |
---|
| 2336 | + WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && |
---|
| 2337 | + !cpumask_intersects(new_mask, cpu_active_mask) && |
---|
| 2338 | + p->nr_cpus_allowed != 1); |
---|
| 2339 | + } |
---|
| 2340 | + |
---|
| 2341 | + return affine_move_task(rq, p, rf, dest_cpu, flags); |
---|
| 2342 | +out: |
---|
| 2343 | + task_rq_unlock(rq, p, rf); |
---|
| 2344 | + |
---|
| 2345 | + return ret; |
---|
| 2346 | +} |
---|
| 2347 | + |
---|
| 2348 | +void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) |
---|
| 2349 | +{ |
---|
| 2350 | + __do_set_cpus_allowed(p, new_mask, 0); |
---|
| 2351 | +} |
---|
| 2352 | + |
---|
| 2353 | +/* |
---|
| 2354 | + * This function is wildly self concurrent; here be dragons. |
---|
| 2355 | + * |
---|
| 2356 | + * |
---|
| 2357 | + * When given a valid mask, __set_cpus_allowed_ptr() must block until the |
---|
| 2358 | + * designated task is enqueued on an allowed CPU. If that task is currently |
---|
| 2359 | + * running, we have to kick it out using the CPU stopper. |
---|
| 2360 | + * |
---|
| 2361 | + * Migrate-Disable comes along and tramples all over our nice sandcastle. |
---|
| 2362 | + * Consider: |
---|
| 2363 | + * |
---|
| 2364 | + * Initial conditions: P0->cpus_mask = [0, 1] |
---|
| 2365 | + * |
---|
| 2366 | + * P0@CPU0 P1 |
---|
| 2367 | + * |
---|
| 2368 | + * migrate_disable(); |
---|
| 2369 | + * <preempted> |
---|
| 2370 | + * set_cpus_allowed_ptr(P0, [1]); |
---|
| 2371 | + * |
---|
| 2372 | + * P1 *cannot* return from this set_cpus_allowed_ptr() call until P0 executes |
---|
| 2373 | + * its outermost migrate_enable() (i.e. it exits its Migrate-Disable region). |
---|
| 2374 | + * This means we need the following scheme: |
---|
| 2375 | + * |
---|
| 2376 | + * P0@CPU0 P1 |
---|
| 2377 | + * |
---|
| 2378 | + * migrate_disable(); |
---|
| 2379 | + * <preempted> |
---|
| 2380 | + * set_cpus_allowed_ptr(P0, [1]); |
---|
| 2381 | + * <blocks> |
---|
| 2382 | + * <resumes> |
---|
| 2383 | + * migrate_enable(); |
---|
| 2384 | + * __set_cpus_allowed_ptr(); |
---|
| 2385 | + * <wakes local stopper> |
---|
| 2386 | + * `--> <woken on migration completion> |
---|
| 2387 | + * |
---|
| 2388 | + * Now the fun stuff: there may be several P1-like tasks, i.e. multiple |
---|
| 2389 | + * concurrent set_cpus_allowed_ptr(P0, [*]) calls. CPU affinity changes of any |
---|
| 2390 | + * task p are serialized by p->pi_lock, which we can leverage: the one that |
---|
| 2391 | + * should come into effect at the end of the Migrate-Disable region is the last |
---|
| 2392 | + * one. This means we only need to track a single cpumask (i.e. p->cpus_mask), |
---|
| 2393 | + * but we still need to properly signal those waiting tasks at the appropriate |
---|
| 2394 | + * moment. |
---|
| 2395 | + * |
---|
| 2396 | + * This is implemented using struct set_affinity_pending. The first |
---|
| 2397 | + * __set_cpus_allowed_ptr() caller within a given Migrate-Disable region will |
---|
| 2398 | + * setup an instance of that struct and install it on the targeted task_struct. |
---|
| 2399 | + * Any and all further callers will reuse that instance. Those then wait for |
---|
| 2400 | + * a completion signaled at the tail of the CPU stopper callback (1), triggered |
---|
| 2401 | + * on the end of the Migrate-Disable region (i.e. outermost migrate_enable()). |
---|
| 2402 | + * |
---|
| 2403 | + * |
---|
| 2404 | + * (1) In the cases covered above. There is one more where the completion is |
---|
| 2405 | + * signaled within affine_move_task() itself: when a subsequent affinity request |
---|
| 2406 | + * cancels the need for an active migration. Consider: |
---|
| 2407 | + * |
---|
| 2408 | + * Initial conditions: P0->cpus_mask = [0, 1] |
---|
| 2409 | + * |
---|
| 2410 | + * P0@CPU0 P1 P2 |
---|
| 2411 | + * |
---|
| 2412 | + * migrate_disable(); |
---|
| 2413 | + * <preempted> |
---|
| 2414 | + * set_cpus_allowed_ptr(P0, [1]); |
---|
| 2415 | + * <blocks> |
---|
| 2416 | + * set_cpus_allowed_ptr(P0, [0, 1]); |
---|
| 2417 | + * <signal completion> |
---|
| 2418 | + * <awakes> |
---|
| 2419 | + * |
---|
| 2420 | + * Note that the above is safe vs a concurrent migrate_enable(), as any |
---|
| 2421 | + * pending affinity completion is preceded an uninstallion of |
---|
| 2422 | + * p->migration_pending done with p->pi_lock held. |
---|
| 2423 | + */ |
---|
| 2424 | +static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flags *rf, |
---|
| 2425 | + int dest_cpu, unsigned int flags) |
---|
| 2426 | +{ |
---|
| 2427 | + struct set_affinity_pending my_pending = { }, *pending = NULL; |
---|
| 2428 | + bool stop_pending, complete = false; |
---|
| 2429 | + |
---|
| 2430 | + /* Can the task run on the task's current CPU? If so, we're done */ |
---|
| 2431 | + if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) { |
---|
| 2432 | + struct task_struct *push_task = NULL; |
---|
| 2433 | + |
---|
| 2434 | + if ((flags & SCA_MIGRATE_ENABLE) && |
---|
| 2435 | + (p->migration_flags & MDF_PUSH) && !rq->push_busy) { |
---|
| 2436 | + rq->push_busy = true; |
---|
| 2437 | + push_task = get_task_struct(p); |
---|
| 2438 | + } |
---|
| 2439 | + |
---|
| 2440 | + /* |
---|
| 2441 | + * If there are pending waiters, but no pending stop_work, |
---|
| 2442 | + * then complete now. |
---|
| 2443 | + */ |
---|
| 2444 | + pending = p->migration_pending; |
---|
| 2445 | + if (pending && !pending->stop_pending) { |
---|
| 2446 | + p->migration_pending = NULL; |
---|
| 2447 | + complete = true; |
---|
| 2448 | + } |
---|
| 2449 | + |
---|
| 2450 | + task_rq_unlock(rq, p, rf); |
---|
| 2451 | + |
---|
| 2452 | + if (push_task) { |
---|
| 2453 | + stop_one_cpu_nowait(rq->cpu, push_cpu_stop, |
---|
| 2454 | + p, &rq->push_work); |
---|
| 2455 | + } |
---|
| 2456 | + |
---|
| 2457 | + if (complete) |
---|
| 2458 | + complete_all(&pending->done); |
---|
| 2459 | + |
---|
| 2460 | + return 0; |
---|
| 2461 | + } |
---|
| 2462 | + |
---|
| 2463 | + if (!(flags & SCA_MIGRATE_ENABLE)) { |
---|
| 2464 | + /* serialized by p->pi_lock */ |
---|
| 2465 | + if (!p->migration_pending) { |
---|
| 2466 | + /* Install the request */ |
---|
| 2467 | + refcount_set(&my_pending.refs, 1); |
---|
| 2468 | + init_completion(&my_pending.done); |
---|
| 2469 | + my_pending.arg = (struct migration_arg) { |
---|
| 2470 | + .task = p, |
---|
| 2471 | + .dest_cpu = dest_cpu, |
---|
| 2472 | + .pending = &my_pending, |
---|
| 2473 | + }; |
---|
| 2474 | + |
---|
| 2475 | + p->migration_pending = &my_pending; |
---|
| 2476 | + } else { |
---|
| 2477 | + pending = p->migration_pending; |
---|
| 2478 | + refcount_inc(&pending->refs); |
---|
| 2479 | + /* |
---|
| 2480 | + * Affinity has changed, but we've already installed a |
---|
| 2481 | + * pending. migration_cpu_stop() *must* see this, else |
---|
| 2482 | + * we risk a completion of the pending despite having a |
---|
| 2483 | + * task on a disallowed CPU. |
---|
| 2484 | + * |
---|
| 2485 | + * Serialized by p->pi_lock, so this is safe. |
---|
| 2486 | + */ |
---|
| 2487 | + pending->arg.dest_cpu = dest_cpu; |
---|
| 2488 | + } |
---|
| 2489 | + } |
---|
| 2490 | + pending = p->migration_pending; |
---|
| 2491 | + /* |
---|
| 2492 | + * - !MIGRATE_ENABLE: |
---|
| 2493 | + * we'll have installed a pending if there wasn't one already. |
---|
| 2494 | + * |
---|
| 2495 | + * - MIGRATE_ENABLE: |
---|
| 2496 | + * we're here because the current CPU isn't matching anymore, |
---|
| 2497 | + * the only way that can happen is because of a concurrent |
---|
| 2498 | + * set_cpus_allowed_ptr() call, which should then still be |
---|
| 2499 | + * pending completion. |
---|
| 2500 | + * |
---|
| 2501 | + * Either way, we really should have a @pending here. |
---|
| 2502 | + */ |
---|
| 2503 | + if (WARN_ON_ONCE(!pending)) { |
---|
| 2504 | + task_rq_unlock(rq, p, rf); |
---|
| 2505 | + return -EINVAL; |
---|
| 2506 | + } |
---|
| 2507 | + |
---|
| 2508 | + if (task_running(rq, p) || p->state == TASK_WAKING) { |
---|
| 2509 | + /* |
---|
| 2510 | + * MIGRATE_ENABLE gets here because 'p == current', but for |
---|
| 2511 | + * anything else we cannot do is_migration_disabled(), punt |
---|
| 2512 | + * and have the stopper function handle it all race-free. |
---|
| 2513 | + */ |
---|
| 2514 | + stop_pending = pending->stop_pending; |
---|
| 2515 | + if (!stop_pending) |
---|
| 2516 | + pending->stop_pending = true; |
---|
| 2517 | + |
---|
| 2518 | + if (flags & SCA_MIGRATE_ENABLE) |
---|
| 2519 | + p->migration_flags &= ~MDF_PUSH; |
---|
| 2520 | + |
---|
| 2521 | + task_rq_unlock(rq, p, rf); |
---|
| 2522 | + |
---|
| 2523 | + if (!stop_pending) { |
---|
| 2524 | + stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, |
---|
| 2525 | + &pending->arg, &pending->stop_work); |
---|
| 2526 | + } |
---|
| 2527 | + |
---|
| 2528 | + if (flags & SCA_MIGRATE_ENABLE) |
---|
| 2529 | + return 0; |
---|
| 2530 | + } else { |
---|
| 2531 | + |
---|
| 2532 | + if (!is_migration_disabled(p)) { |
---|
| 2533 | + if (task_on_rq_queued(p)) |
---|
| 2534 | + rq = move_queued_task(rq, rf, p, dest_cpu); |
---|
| 2535 | + |
---|
| 2536 | + if (!pending->stop_pending) { |
---|
| 2537 | + p->migration_pending = NULL; |
---|
| 2538 | + complete = true; |
---|
| 2539 | + } |
---|
| 2540 | + } |
---|
| 2541 | + task_rq_unlock(rq, p, rf); |
---|
| 2542 | + |
---|
| 2543 | + if (complete) |
---|
| 2544 | + complete_all(&pending->done); |
---|
| 2545 | + } |
---|
| 2546 | + |
---|
| 2547 | + wait_for_completion(&pending->done); |
---|
| 2548 | + |
---|
| 2549 | + if (refcount_dec_and_test(&pending->refs)) |
---|
| 2550 | + wake_up_var(&pending->refs); /* No UaF, just an address */ |
---|
| 2551 | + |
---|
| 2552 | + /* |
---|
| 2553 | + * Block the original owner of &pending until all subsequent callers |
---|
| 2554 | + * have seen the completion and decremented the refcount |
---|
| 2555 | + */ |
---|
| 2556 | + wait_var_event(&my_pending.refs, !refcount_read(&my_pending.refs)); |
---|
| 2557 | + |
---|
| 2558 | + /* ARGH */ |
---|
| 2559 | + WARN_ON_ONCE(my_pending.stop_pending); |
---|
| 2560 | + |
---|
| 2561 | + return 0; |
---|
1641 | 2562 | } |
---|
1642 | 2563 | |
---|
1643 | 2564 | /* |
---|
.. | .. |
---|
1650 | 2571 | * call is not atomic; no spinlocks may be held. |
---|
1651 | 2572 | */ |
---|
1652 | 2573 | static int __set_cpus_allowed_ptr(struct task_struct *p, |
---|
1653 | | - const struct cpumask *new_mask, bool check) |
---|
| 2574 | + const struct cpumask *new_mask, |
---|
| 2575 | + u32 flags) |
---|
1654 | 2576 | { |
---|
1655 | | - const struct cpumask *cpu_valid_mask = cpu_active_mask; |
---|
1656 | | - unsigned int dest_cpu; |
---|
1657 | 2577 | struct rq_flags rf; |
---|
1658 | 2578 | struct rq *rq; |
---|
1659 | | - int ret = 0; |
---|
1660 | 2579 | |
---|
1661 | 2580 | rq = task_rq_lock(p, &rf); |
---|
1662 | | - update_rq_clock(rq); |
---|
1663 | | - |
---|
1664 | | - if (p->flags & PF_KTHREAD) { |
---|
1665 | | - /* |
---|
1666 | | - * Kernel threads are allowed on online && !active CPUs |
---|
1667 | | - */ |
---|
1668 | | - cpu_valid_mask = cpu_online_mask; |
---|
1669 | | - } |
---|
1670 | | - |
---|
1671 | | - /* |
---|
1672 | | - * Must re-check here, to close a race against __kthread_bind(), |
---|
1673 | | - * sched_setaffinity() is not guaranteed to observe the flag. |
---|
1674 | | - */ |
---|
1675 | | - if (check && (p->flags & PF_NO_SETAFFINITY)) { |
---|
1676 | | - ret = -EINVAL; |
---|
1677 | | - goto out; |
---|
1678 | | - } |
---|
1679 | | - |
---|
1680 | | - if (cpumask_equal(&p->cpus_allowed, new_mask)) |
---|
1681 | | - goto out; |
---|
1682 | | - |
---|
1683 | | - dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); |
---|
1684 | | - if (dest_cpu >= nr_cpu_ids) { |
---|
1685 | | - ret = -EINVAL; |
---|
1686 | | - goto out; |
---|
1687 | | - } |
---|
1688 | | - |
---|
1689 | | - do_set_cpus_allowed(p, new_mask); |
---|
1690 | | - |
---|
1691 | | - if (p->flags & PF_KTHREAD) { |
---|
1692 | | - /* |
---|
1693 | | - * For kernel threads that do indeed end up on online && |
---|
1694 | | - * !active we want to ensure they are strict per-CPU threads. |
---|
1695 | | - */ |
---|
1696 | | - WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && |
---|
1697 | | - !cpumask_intersects(new_mask, cpu_active_mask) && |
---|
1698 | | - p->nr_cpus_allowed != 1); |
---|
1699 | | - } |
---|
1700 | | - |
---|
1701 | | - /* Can the task run on the task's current CPU? If so, we're done */ |
---|
1702 | | - if (cpumask_test_cpu(task_cpu(p), new_mask)) |
---|
1703 | | - goto out; |
---|
1704 | | - |
---|
1705 | | - if (task_running(rq, p) || p->state == TASK_WAKING) { |
---|
1706 | | - struct migration_arg arg = { p, dest_cpu }; |
---|
1707 | | - /* Need help from migration thread: drop lock and wait. */ |
---|
1708 | | - task_rq_unlock(rq, p, &rf); |
---|
1709 | | - stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); |
---|
1710 | | - tlb_migrate_finish(p->mm); |
---|
1711 | | - return 0; |
---|
1712 | | - } else if (task_on_rq_queued(p)) { |
---|
1713 | | - /* |
---|
1714 | | - * OK, since we're going to drop the lock immediately |
---|
1715 | | - * afterwards anyway. |
---|
1716 | | - */ |
---|
1717 | | - rq = move_queued_task(rq, &rf, p, dest_cpu); |
---|
1718 | | - } |
---|
1719 | | -out: |
---|
1720 | | - task_rq_unlock(rq, p, &rf); |
---|
1721 | | - |
---|
1722 | | - return ret; |
---|
| 2581 | + return __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, &rf); |
---|
1723 | 2582 | } |
---|
1724 | 2583 | |
---|
1725 | 2584 | int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) |
---|
1726 | 2585 | { |
---|
1727 | | - return __set_cpus_allowed_ptr(p, new_mask, false); |
---|
| 2586 | + return __set_cpus_allowed_ptr(p, new_mask, 0); |
---|
1728 | 2587 | } |
---|
1729 | 2588 | EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); |
---|
| 2589 | + |
---|
| 2590 | +/* |
---|
| 2591 | + * Change a given task's CPU affinity to the intersection of its current |
---|
| 2592 | + * affinity mask and @subset_mask, writing the resulting mask to @new_mask. |
---|
| 2593 | + * If the resulting mask is empty, leave the affinity unchanged and return |
---|
| 2594 | + * -EINVAL. |
---|
| 2595 | + */ |
---|
| 2596 | +static int restrict_cpus_allowed_ptr(struct task_struct *p, |
---|
| 2597 | + struct cpumask *new_mask, |
---|
| 2598 | + const struct cpumask *subset_mask) |
---|
| 2599 | +{ |
---|
| 2600 | + struct rq_flags rf; |
---|
| 2601 | + struct rq *rq; |
---|
| 2602 | + |
---|
| 2603 | + rq = task_rq_lock(p, &rf); |
---|
| 2604 | + if (!cpumask_and(new_mask, &p->cpus_mask, subset_mask)) { |
---|
| 2605 | + task_rq_unlock(rq, p, &rf); |
---|
| 2606 | + return -EINVAL; |
---|
| 2607 | + } |
---|
| 2608 | + |
---|
| 2609 | + return __set_cpus_allowed_ptr_locked(p, new_mask, false, rq, &rf); |
---|
| 2610 | +} |
---|
| 2611 | + |
---|
| 2612 | +/* |
---|
| 2613 | + * Restrict a given task's CPU affinity so that it is a subset of |
---|
| 2614 | + * task_cpu_possible_mask(). If the resulting mask is empty, we warn and |
---|
| 2615 | + * walk up the cpuset hierarchy until we find a suitable mask. |
---|
| 2616 | + */ |
---|
| 2617 | +void force_compatible_cpus_allowed_ptr(struct task_struct *p) |
---|
| 2618 | +{ |
---|
| 2619 | + cpumask_var_t new_mask; |
---|
| 2620 | + const struct cpumask *override_mask = task_cpu_possible_mask(p); |
---|
| 2621 | + |
---|
| 2622 | + alloc_cpumask_var(&new_mask, GFP_KERNEL); |
---|
| 2623 | + |
---|
| 2624 | + /* |
---|
| 2625 | + * __migrate_task() can fail silently in the face of concurrent |
---|
| 2626 | + * offlining of the chosen destination CPU, so take the hotplug |
---|
| 2627 | + * lock to ensure that the migration succeeds. |
---|
| 2628 | + */ |
---|
| 2629 | + trace_android_rvh_force_compatible_pre(NULL); |
---|
| 2630 | + cpus_read_lock(); |
---|
| 2631 | + if (!cpumask_available(new_mask)) |
---|
| 2632 | + goto out_set_mask; |
---|
| 2633 | + |
---|
| 2634 | + if (!restrict_cpus_allowed_ptr(p, new_mask, override_mask)) |
---|
| 2635 | + goto out_free_mask; |
---|
| 2636 | + |
---|
| 2637 | + /* |
---|
| 2638 | + * We failed to find a valid subset of the affinity mask for the |
---|
| 2639 | + * task, so override it based on its cpuset hierarchy. |
---|
| 2640 | + */ |
---|
| 2641 | + cpuset_cpus_allowed(p, new_mask); |
---|
| 2642 | + override_mask = new_mask; |
---|
| 2643 | + |
---|
| 2644 | +out_set_mask: |
---|
| 2645 | + if (printk_ratelimit()) { |
---|
| 2646 | + printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n", |
---|
| 2647 | + task_pid_nr(p), p->comm, |
---|
| 2648 | + cpumask_pr_args(override_mask)); |
---|
| 2649 | + } |
---|
| 2650 | + |
---|
| 2651 | + WARN_ON(set_cpus_allowed_ptr(p, override_mask)); |
---|
| 2652 | +out_free_mask: |
---|
| 2653 | + cpus_read_unlock(); |
---|
| 2654 | + trace_android_rvh_force_compatible_post(NULL); |
---|
| 2655 | + free_cpumask_var(new_mask); |
---|
| 2656 | +} |
---|
1730 | 2657 | |
---|
1731 | 2658 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
---|
1732 | 2659 | { |
---|
.. | .. |
---|
1765 | 2692 | * Clearly, migrating tasks to offline CPUs is a fairly daft thing. |
---|
1766 | 2693 | */ |
---|
1767 | 2694 | WARN_ON_ONCE(!cpu_online(new_cpu)); |
---|
| 2695 | + |
---|
| 2696 | + WARN_ON_ONCE(is_migration_disabled(p)); |
---|
1768 | 2697 | #endif |
---|
1769 | 2698 | |
---|
1770 | 2699 | trace_sched_migrate_task(p, new_cpu); |
---|
.. | .. |
---|
1775 | 2704 | p->se.nr_migrations++; |
---|
1776 | 2705 | rseq_migrate(p); |
---|
1777 | 2706 | perf_event_task_migrate(p); |
---|
| 2707 | + trace_android_rvh_set_task_cpu(p, new_cpu); |
---|
1778 | 2708 | } |
---|
1779 | 2709 | |
---|
1780 | 2710 | __set_task_cpu(p, new_cpu); |
---|
1781 | 2711 | } |
---|
| 2712 | +EXPORT_SYMBOL_GPL(set_task_cpu); |
---|
1782 | 2713 | |
---|
1783 | | -#ifdef CONFIG_NUMA_BALANCING |
---|
1784 | 2714 | static void __migrate_swap_task(struct task_struct *p, int cpu) |
---|
1785 | 2715 | { |
---|
1786 | 2716 | if (task_on_rq_queued(p)) { |
---|
.. | .. |
---|
1793 | 2723 | rq_pin_lock(src_rq, &srf); |
---|
1794 | 2724 | rq_pin_lock(dst_rq, &drf); |
---|
1795 | 2725 | |
---|
1796 | | - p->on_rq = TASK_ON_RQ_MIGRATING; |
---|
1797 | 2726 | deactivate_task(src_rq, p, 0); |
---|
1798 | 2727 | set_task_cpu(p, cpu); |
---|
1799 | 2728 | activate_task(dst_rq, p, 0); |
---|
1800 | | - p->on_rq = TASK_ON_RQ_QUEUED; |
---|
1801 | 2729 | check_preempt_curr(dst_rq, p, 0); |
---|
1802 | 2730 | |
---|
1803 | 2731 | rq_unpin_lock(dst_rq, &drf); |
---|
.. | .. |
---|
1840 | 2768 | if (task_cpu(arg->src_task) != arg->src_cpu) |
---|
1841 | 2769 | goto unlock; |
---|
1842 | 2770 | |
---|
1843 | | - if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed)) |
---|
| 2771 | + if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr)) |
---|
1844 | 2772 | goto unlock; |
---|
1845 | 2773 | |
---|
1846 | | - if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed)) |
---|
| 2774 | + if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr)) |
---|
1847 | 2775 | goto unlock; |
---|
1848 | 2776 | |
---|
1849 | 2777 | __migrate_swap_task(arg->src_task, arg->dst_cpu); |
---|
.. | .. |
---|
1885 | 2813 | if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu)) |
---|
1886 | 2814 | goto out; |
---|
1887 | 2815 | |
---|
1888 | | - if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed)) |
---|
| 2816 | + if (!cpumask_test_cpu(arg.dst_cpu, arg.src_task->cpus_ptr)) |
---|
1889 | 2817 | goto out; |
---|
1890 | 2818 | |
---|
1891 | | - if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed)) |
---|
| 2819 | + if (!cpumask_test_cpu(arg.src_cpu, arg.dst_task->cpus_ptr)) |
---|
1892 | 2820 | goto out; |
---|
1893 | 2821 | |
---|
1894 | 2822 | trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu); |
---|
.. | .. |
---|
1897 | 2825 | out: |
---|
1898 | 2826 | return ret; |
---|
1899 | 2827 | } |
---|
1900 | | -#endif /* CONFIG_NUMA_BALANCING */ |
---|
| 2828 | +EXPORT_SYMBOL_GPL(migrate_swap); |
---|
| 2829 | + |
---|
| 2830 | +static bool check_task_state(struct task_struct *p, long match_state) |
---|
| 2831 | +{ |
---|
| 2832 | + bool match = false; |
---|
| 2833 | + |
---|
| 2834 | + raw_spin_lock_irq(&p->pi_lock); |
---|
| 2835 | + if (p->state == match_state || p->saved_state == match_state) |
---|
| 2836 | + match = true; |
---|
| 2837 | + raw_spin_unlock_irq(&p->pi_lock); |
---|
| 2838 | + |
---|
| 2839 | + return match; |
---|
| 2840 | +} |
---|
1901 | 2841 | |
---|
1902 | 2842 | /* |
---|
1903 | 2843 | * wait_task_inactive - wait for a thread to unschedule. |
---|
.. | .. |
---|
1943 | 2883 | * is actually now running somewhere else! |
---|
1944 | 2884 | */ |
---|
1945 | 2885 | while (task_running(rq, p)) { |
---|
1946 | | - if (match_state && unlikely(p->state != match_state)) |
---|
| 2886 | + if (match_state && !check_task_state(p, match_state)) |
---|
1947 | 2887 | return 0; |
---|
1948 | 2888 | cpu_relax(); |
---|
1949 | 2889 | } |
---|
.. | .. |
---|
1958 | 2898 | running = task_running(rq, p); |
---|
1959 | 2899 | queued = task_on_rq_queued(p); |
---|
1960 | 2900 | ncsw = 0; |
---|
1961 | | - if (!match_state || p->state == match_state) |
---|
| 2901 | + if (!match_state || p->state == match_state || |
---|
| 2902 | + p->saved_state == match_state) |
---|
1962 | 2903 | ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ |
---|
1963 | 2904 | task_rq_unlock(rq, p, &rf); |
---|
1964 | 2905 | |
---|
.. | .. |
---|
1992 | 2933 | ktime_t to = NSEC_PER_SEC / HZ; |
---|
1993 | 2934 | |
---|
1994 | 2935 | set_current_state(TASK_UNINTERRUPTIBLE); |
---|
1995 | | - schedule_hrtimeout(&to, HRTIMER_MODE_REL); |
---|
| 2936 | + schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD); |
---|
1996 | 2937 | continue; |
---|
1997 | 2938 | } |
---|
1998 | 2939 | |
---|
.. | .. |
---|
2033 | 2974 | EXPORT_SYMBOL_GPL(kick_process); |
---|
2034 | 2975 | |
---|
2035 | 2976 | /* |
---|
2036 | | - * ->cpus_allowed is protected by both rq->lock and p->pi_lock |
---|
| 2977 | + * ->cpus_ptr is protected by both rq->lock and p->pi_lock |
---|
2037 | 2978 | * |
---|
2038 | 2979 | * A few notes on cpu_active vs cpu_online: |
---|
2039 | 2980 | * |
---|
.. | .. |
---|
2059 | 3000 | int nid = cpu_to_node(cpu); |
---|
2060 | 3001 | const struct cpumask *nodemask = NULL; |
---|
2061 | 3002 | enum { cpuset, possible, fail } state = cpuset; |
---|
2062 | | - int dest_cpu; |
---|
| 3003 | + int dest_cpu = -1; |
---|
| 3004 | + |
---|
| 3005 | + trace_android_rvh_select_fallback_rq(cpu, p, &dest_cpu); |
---|
| 3006 | + if (dest_cpu >= 0) |
---|
| 3007 | + return dest_cpu; |
---|
2063 | 3008 | |
---|
2064 | 3009 | /* |
---|
2065 | 3010 | * If the node that the CPU is on has been offlined, cpu_to_node() |
---|
.. | .. |
---|
2071 | 3016 | |
---|
2072 | 3017 | /* Look for allowed, online CPU in same node. */ |
---|
2073 | 3018 | for_each_cpu(dest_cpu, nodemask) { |
---|
2074 | | - if (!cpu_active(dest_cpu)) |
---|
2075 | | - continue; |
---|
2076 | | - if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) |
---|
| 3019 | + if (is_cpu_allowed(p, dest_cpu)) |
---|
2077 | 3020 | return dest_cpu; |
---|
2078 | 3021 | } |
---|
2079 | 3022 | } |
---|
2080 | 3023 | |
---|
2081 | 3024 | for (;;) { |
---|
2082 | 3025 | /* Any allowed, online CPU? */ |
---|
2083 | | - for_each_cpu(dest_cpu, &p->cpus_allowed) { |
---|
| 3026 | + for_each_cpu(dest_cpu, p->cpus_ptr) { |
---|
2084 | 3027 | if (!is_cpu_allowed(p, dest_cpu)) |
---|
2085 | 3028 | continue; |
---|
2086 | 3029 | |
---|
.. | .. |
---|
2095 | 3038 | state = possible; |
---|
2096 | 3039 | break; |
---|
2097 | 3040 | } |
---|
2098 | | - /* Fall-through */ |
---|
| 3041 | + fallthrough; |
---|
2099 | 3042 | case possible: |
---|
2100 | | - do_set_cpus_allowed(p, cpu_possible_mask); |
---|
| 3043 | + /* |
---|
| 3044 | + * XXX When called from select_task_rq() we only |
---|
| 3045 | + * hold p->pi_lock and again violate locking order. |
---|
| 3046 | + * |
---|
| 3047 | + * More yuck to audit. |
---|
| 3048 | + */ |
---|
| 3049 | + do_set_cpus_allowed(p, task_cpu_possible_mask(p)); |
---|
2101 | 3050 | state = fail; |
---|
2102 | 3051 | break; |
---|
2103 | | - |
---|
2104 | 3052 | case fail: |
---|
2105 | 3053 | BUG(); |
---|
2106 | 3054 | break; |
---|
.. | .. |
---|
2124 | 3072 | } |
---|
2125 | 3073 | |
---|
2126 | 3074 | /* |
---|
2127 | | - * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable. |
---|
| 3075 | + * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable. |
---|
2128 | 3076 | */ |
---|
2129 | 3077 | static inline |
---|
2130 | | -int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags, |
---|
2131 | | - int sibling_count_hint) |
---|
| 3078 | +int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) |
---|
2132 | 3079 | { |
---|
2133 | 3080 | lockdep_assert_held(&p->pi_lock); |
---|
2134 | 3081 | |
---|
2135 | | - if (p->nr_cpus_allowed > 1) |
---|
2136 | | - cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags, |
---|
2137 | | - sibling_count_hint); |
---|
| 3082 | + if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p)) |
---|
| 3083 | + cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); |
---|
2138 | 3084 | else |
---|
2139 | | - cpu = cpumask_any(&p->cpus_allowed); |
---|
| 3085 | + cpu = cpumask_any(p->cpus_ptr); |
---|
2140 | 3086 | |
---|
2141 | 3087 | /* |
---|
2142 | 3088 | * In order not to call set_task_cpu() on a blocking task we need |
---|
2143 | | - * to rely on ttwu() to place the task on a valid ->cpus_allowed |
---|
| 3089 | + * to rely on ttwu() to place the task on a valid ->cpus_ptr |
---|
2144 | 3090 | * CPU. |
---|
2145 | 3091 | * |
---|
2146 | 3092 | * Since this is common to all placement strategies, this lives here. |
---|
.. | .. |
---|
2154 | 3100 | return cpu; |
---|
2155 | 3101 | } |
---|
2156 | 3102 | |
---|
2157 | | -static void update_avg(u64 *avg, u64 sample) |
---|
2158 | | -{ |
---|
2159 | | - s64 diff = sample - *avg; |
---|
2160 | | - *avg += diff >> 3; |
---|
2161 | | -} |
---|
2162 | | - |
---|
2163 | 3103 | void sched_set_stop_task(int cpu, struct task_struct *stop) |
---|
2164 | 3104 | { |
---|
| 3105 | + static struct lock_class_key stop_pi_lock; |
---|
2165 | 3106 | struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; |
---|
2166 | 3107 | struct task_struct *old_stop = cpu_rq(cpu)->stop; |
---|
2167 | 3108 | |
---|
.. | .. |
---|
2177 | 3118 | sched_setscheduler_nocheck(stop, SCHED_FIFO, ¶m); |
---|
2178 | 3119 | |
---|
2179 | 3120 | stop->sched_class = &stop_sched_class; |
---|
| 3121 | + |
---|
| 3122 | + /* |
---|
| 3123 | + * The PI code calls rt_mutex_setprio() with ->pi_lock held to |
---|
| 3124 | + * adjust the effective priority of a task. As a result, |
---|
| 3125 | + * rt_mutex_setprio() can trigger (RT) balancing operations, |
---|
| 3126 | + * which can then trigger wakeups of the stop thread to push |
---|
| 3127 | + * around the current task. |
---|
| 3128 | + * |
---|
| 3129 | + * The stop task itself will never be part of the PI-chain, it |
---|
| 3130 | + * never blocks, therefore that ->pi_lock recursion is safe. |
---|
| 3131 | + * Tell lockdep about this by placing the stop->pi_lock in its |
---|
| 3132 | + * own class. |
---|
| 3133 | + */ |
---|
| 3134 | + lockdep_set_class(&stop->pi_lock, &stop_pi_lock); |
---|
2180 | 3135 | } |
---|
2181 | 3136 | |
---|
2182 | 3137 | cpu_rq(cpu)->stop = stop; |
---|
.. | .. |
---|
2190 | 3145 | } |
---|
2191 | 3146 | } |
---|
2192 | 3147 | |
---|
2193 | | -#else |
---|
| 3148 | +#else /* CONFIG_SMP */ |
---|
2194 | 3149 | |
---|
2195 | 3150 | static inline int __set_cpus_allowed_ptr(struct task_struct *p, |
---|
2196 | | - const struct cpumask *new_mask, bool check) |
---|
| 3151 | + const struct cpumask *new_mask, |
---|
| 3152 | + u32 flags) |
---|
2197 | 3153 | { |
---|
2198 | 3154 | return set_cpus_allowed_ptr(p, new_mask); |
---|
2199 | 3155 | } |
---|
2200 | 3156 | |
---|
2201 | | -#endif /* CONFIG_SMP */ |
---|
| 3157 | +static inline void migrate_disable_switch(struct rq *rq, struct task_struct *p) { } |
---|
| 3158 | + |
---|
| 3159 | +static inline bool rq_has_pinned_tasks(struct rq *rq) |
---|
| 3160 | +{ |
---|
| 3161 | + return false; |
---|
| 3162 | +} |
---|
| 3163 | + |
---|
| 3164 | +#endif /* !CONFIG_SMP */ |
---|
2202 | 3165 | |
---|
2203 | 3166 | static void |
---|
2204 | 3167 | ttwu_stat(struct task_struct *p, int cpu, int wake_flags) |
---|
.. | .. |
---|
2237 | 3200 | |
---|
2238 | 3201 | if (wake_flags & WF_SYNC) |
---|
2239 | 3202 | __schedstat_inc(p->se.statistics.nr_wakeups_sync); |
---|
2240 | | -} |
---|
2241 | | - |
---|
2242 | | -static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags) |
---|
2243 | | -{ |
---|
2244 | | - activate_task(rq, p, en_flags); |
---|
2245 | | - p->on_rq = TASK_ON_RQ_QUEUED; |
---|
2246 | | - |
---|
2247 | | - /* If a worker is waking up, notify the workqueue: */ |
---|
2248 | | - if (p->flags & PF_WQ_WORKER) |
---|
2249 | | - wq_worker_waking_up(p, cpu_of(rq)); |
---|
2250 | 3203 | } |
---|
2251 | 3204 | |
---|
2252 | 3205 | /* |
---|
.. | .. |
---|
2290 | 3243 | { |
---|
2291 | 3244 | int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK; |
---|
2292 | 3245 | |
---|
| 3246 | + if (wake_flags & WF_SYNC) |
---|
| 3247 | + en_flags |= ENQUEUE_WAKEUP_SYNC; |
---|
| 3248 | + |
---|
2293 | 3249 | lockdep_assert_held(&rq->lock); |
---|
2294 | 3250 | |
---|
2295 | | -#ifdef CONFIG_SMP |
---|
2296 | 3251 | if (p->sched_contributes_to_load) |
---|
2297 | 3252 | rq->nr_uninterruptible--; |
---|
2298 | 3253 | |
---|
| 3254 | +#ifdef CONFIG_SMP |
---|
2299 | 3255 | if (wake_flags & WF_MIGRATED) |
---|
2300 | 3256 | en_flags |= ENQUEUE_MIGRATED; |
---|
| 3257 | + else |
---|
2301 | 3258 | #endif |
---|
| 3259 | + if (p->in_iowait) { |
---|
| 3260 | + delayacct_blkio_end(p); |
---|
| 3261 | + atomic_dec(&task_rq(p)->nr_iowait); |
---|
| 3262 | + } |
---|
2302 | 3263 | |
---|
2303 | | - ttwu_activate(rq, p, en_flags); |
---|
| 3264 | + activate_task(rq, p, en_flags); |
---|
2304 | 3265 | ttwu_do_wakeup(rq, p, wake_flags, rf); |
---|
2305 | 3266 | } |
---|
2306 | 3267 | |
---|
2307 | 3268 | /* |
---|
2308 | | - * Called in case the task @p isn't fully descheduled from its runqueue, |
---|
2309 | | - * in this case we must do a remote wakeup. Its a 'light' wakeup though, |
---|
2310 | | - * since all we need to do is flip p->state to TASK_RUNNING, since |
---|
2311 | | - * the task is still ->on_rq. |
---|
| 3269 | + * Consider @p being inside a wait loop: |
---|
| 3270 | + * |
---|
| 3271 | + * for (;;) { |
---|
| 3272 | + * set_current_state(TASK_UNINTERRUPTIBLE); |
---|
| 3273 | + * |
---|
| 3274 | + * if (CONDITION) |
---|
| 3275 | + * break; |
---|
| 3276 | + * |
---|
| 3277 | + * schedule(); |
---|
| 3278 | + * } |
---|
| 3279 | + * __set_current_state(TASK_RUNNING); |
---|
| 3280 | + * |
---|
| 3281 | + * between set_current_state() and schedule(). In this case @p is still |
---|
| 3282 | + * runnable, so all that needs doing is change p->state back to TASK_RUNNING in |
---|
| 3283 | + * an atomic manner. |
---|
| 3284 | + * |
---|
| 3285 | + * By taking task_rq(p)->lock we serialize against schedule(), if @p->on_rq |
---|
| 3286 | + * then schedule() must still happen and p->state can be changed to |
---|
| 3287 | + * TASK_RUNNING. Otherwise we lost the race, schedule() has happened, and we |
---|
| 3288 | + * need to do a full wakeup with enqueue. |
---|
| 3289 | + * |
---|
| 3290 | + * Returns: %true when the wakeup is done, |
---|
| 3291 | + * %false otherwise. |
---|
2312 | 3292 | */ |
---|
2313 | | -static int ttwu_remote(struct task_struct *p, int wake_flags) |
---|
| 3293 | +static int ttwu_runnable(struct task_struct *p, int wake_flags) |
---|
2314 | 3294 | { |
---|
2315 | 3295 | struct rq_flags rf; |
---|
2316 | 3296 | struct rq *rq; |
---|
.. | .. |
---|
2329 | 3309 | } |
---|
2330 | 3310 | |
---|
2331 | 3311 | #ifdef CONFIG_SMP |
---|
2332 | | -void sched_ttwu_pending(void) |
---|
| 3312 | +void sched_ttwu_pending(void *arg) |
---|
2333 | 3313 | { |
---|
| 3314 | + struct llist_node *llist = arg; |
---|
2334 | 3315 | struct rq *rq = this_rq(); |
---|
2335 | | - struct llist_node *llist = llist_del_all(&rq->wake_list); |
---|
2336 | 3316 | struct task_struct *p, *t; |
---|
2337 | 3317 | struct rq_flags rf; |
---|
2338 | 3318 | |
---|
2339 | 3319 | if (!llist) |
---|
2340 | 3320 | return; |
---|
2341 | 3321 | |
---|
| 3322 | + /* |
---|
| 3323 | + * rq::ttwu_pending racy indication of out-standing wakeups. |
---|
| 3324 | + * Races such that false-negatives are possible, since they |
---|
| 3325 | + * are shorter lived that false-positives would be. |
---|
| 3326 | + */ |
---|
| 3327 | + WRITE_ONCE(rq->ttwu_pending, 0); |
---|
| 3328 | + |
---|
2342 | 3329 | rq_lock_irqsave(rq, &rf); |
---|
2343 | 3330 | update_rq_clock(rq); |
---|
2344 | 3331 | |
---|
2345 | | - llist_for_each_entry_safe(p, t, llist, wake_entry) |
---|
| 3332 | + llist_for_each_entry_safe(p, t, llist, wake_entry.llist) { |
---|
| 3333 | + if (WARN_ON_ONCE(p->on_cpu)) |
---|
| 3334 | + smp_cond_load_acquire(&p->on_cpu, !VAL); |
---|
| 3335 | + |
---|
| 3336 | + if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq))) |
---|
| 3337 | + set_task_cpu(p, cpu_of(rq)); |
---|
| 3338 | + |
---|
2346 | 3339 | ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf); |
---|
| 3340 | + } |
---|
2347 | 3341 | |
---|
2348 | 3342 | rq_unlock_irqrestore(rq, &rf); |
---|
2349 | 3343 | } |
---|
2350 | 3344 | |
---|
2351 | | -void scheduler_ipi(void) |
---|
| 3345 | +void send_call_function_single_ipi(int cpu) |
---|
2352 | 3346 | { |
---|
2353 | | - /* |
---|
2354 | | - * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting |
---|
2355 | | - * TIF_NEED_RESCHED remotely (for the first time) will also send |
---|
2356 | | - * this IPI. |
---|
2357 | | - */ |
---|
2358 | | - preempt_fold_need_resched(); |
---|
| 3347 | + struct rq *rq = cpu_rq(cpu); |
---|
2359 | 3348 | |
---|
2360 | | - if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) |
---|
2361 | | - return; |
---|
2362 | | - |
---|
2363 | | - /* |
---|
2364 | | - * Not all reschedule IPI handlers call irq_enter/irq_exit, since |
---|
2365 | | - * traditionally all their work was done from the interrupt return |
---|
2366 | | - * path. Now that we actually do some work, we need to make sure |
---|
2367 | | - * we do call them. |
---|
2368 | | - * |
---|
2369 | | - * Some archs already do call them, luckily irq_enter/exit nest |
---|
2370 | | - * properly. |
---|
2371 | | - * |
---|
2372 | | - * Arguably we should visit all archs and update all handlers, |
---|
2373 | | - * however a fair share of IPIs are still resched only so this would |
---|
2374 | | - * somewhat pessimize the simple resched case. |
---|
2375 | | - */ |
---|
2376 | | - irq_enter(); |
---|
2377 | | - sched_ttwu_pending(); |
---|
2378 | | - |
---|
2379 | | - /* |
---|
2380 | | - * Check if someone kicked us for doing the nohz idle load balance. |
---|
2381 | | - */ |
---|
2382 | | - if (unlikely(got_nohz_idle_kick())) { |
---|
2383 | | - this_rq()->idle_balance = 1; |
---|
2384 | | - raise_softirq_irqoff(SCHED_SOFTIRQ); |
---|
2385 | | - } |
---|
2386 | | - irq_exit(); |
---|
| 3349 | + if (!set_nr_if_polling(rq->idle)) |
---|
| 3350 | + arch_send_call_function_single_ipi(cpu); |
---|
| 3351 | + else |
---|
| 3352 | + trace_sched_wake_idle_without_ipi(cpu); |
---|
2387 | 3353 | } |
---|
2388 | 3354 | |
---|
2389 | | -static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) |
---|
| 3355 | +/* |
---|
| 3356 | + * Queue a task on the target CPUs wake_list and wake the CPU via IPI if |
---|
| 3357 | + * necessary. The wakee CPU on receipt of the IPI will queue the task |
---|
| 3358 | + * via sched_ttwu_wakeup() for activation so the wakee incurs the cost |
---|
| 3359 | + * of the wakeup instead of the waker. |
---|
| 3360 | + */ |
---|
| 3361 | +static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) |
---|
2390 | 3362 | { |
---|
2391 | 3363 | struct rq *rq = cpu_rq(cpu); |
---|
2392 | 3364 | |
---|
2393 | 3365 | p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); |
---|
2394 | 3366 | |
---|
2395 | | - if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) { |
---|
2396 | | - if (!set_nr_if_polling(rq->idle)) |
---|
2397 | | - smp_send_reschedule(cpu); |
---|
2398 | | - else |
---|
2399 | | - trace_sched_wake_idle_without_ipi(cpu); |
---|
2400 | | - } |
---|
| 3367 | + WRITE_ONCE(rq->ttwu_pending, 1); |
---|
| 3368 | + __smp_call_single_queue(cpu, &p->wake_entry.llist); |
---|
2401 | 3369 | } |
---|
2402 | 3370 | |
---|
2403 | 3371 | void wake_up_if_idle(int cpu) |
---|
.. | .. |
---|
2423 | 3391 | out: |
---|
2424 | 3392 | rcu_read_unlock(); |
---|
2425 | 3393 | } |
---|
| 3394 | +EXPORT_SYMBOL_GPL(wake_up_if_idle); |
---|
2426 | 3395 | |
---|
2427 | 3396 | bool cpus_share_cache(int this_cpu, int that_cpu) |
---|
2428 | 3397 | { |
---|
.. | .. |
---|
2431 | 3400 | |
---|
2432 | 3401 | return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); |
---|
2433 | 3402 | } |
---|
| 3403 | + |
---|
| 3404 | +static inline bool ttwu_queue_cond(int cpu, int wake_flags) |
---|
| 3405 | +{ |
---|
| 3406 | + /* |
---|
| 3407 | + * If the CPU does not share cache, then queue the task on the |
---|
| 3408 | + * remote rqs wakelist to avoid accessing remote data. |
---|
| 3409 | + */ |
---|
| 3410 | + if (!cpus_share_cache(smp_processor_id(), cpu)) |
---|
| 3411 | + return true; |
---|
| 3412 | + |
---|
| 3413 | + /* |
---|
| 3414 | + * If the task is descheduling and the only running task on the |
---|
| 3415 | + * CPU then use the wakelist to offload the task activation to |
---|
| 3416 | + * the soon-to-be-idle CPU as the current CPU is likely busy. |
---|
| 3417 | + * nr_running is checked to avoid unnecessary task stacking. |
---|
| 3418 | + * |
---|
| 3419 | + * Note that we can only get here with (wakee) p->on_rq=0, |
---|
| 3420 | + * p->on_cpu can be whatever, we've done the dequeue, so |
---|
| 3421 | + * the wakee has been accounted out of ->nr_running. |
---|
| 3422 | + */ |
---|
| 3423 | + if ((wake_flags & WF_ON_CPU) && !cpu_rq(cpu)->nr_running) |
---|
| 3424 | + return true; |
---|
| 3425 | + |
---|
| 3426 | + return false; |
---|
| 3427 | +} |
---|
| 3428 | + |
---|
| 3429 | +static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) |
---|
| 3430 | +{ |
---|
| 3431 | + bool cond = false; |
---|
| 3432 | + |
---|
| 3433 | + trace_android_rvh_ttwu_cond(&cond); |
---|
| 3434 | + |
---|
| 3435 | + if ((sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) || |
---|
| 3436 | + cond) { |
---|
| 3437 | + if (WARN_ON_ONCE(cpu == smp_processor_id())) |
---|
| 3438 | + return false; |
---|
| 3439 | + |
---|
| 3440 | + sched_clock_cpu(cpu); /* Sync clocks across CPUs */ |
---|
| 3441 | + __ttwu_queue_wakelist(p, cpu, wake_flags); |
---|
| 3442 | + return true; |
---|
| 3443 | + } |
---|
| 3444 | + |
---|
| 3445 | + return false; |
---|
| 3446 | +} |
---|
| 3447 | + |
---|
| 3448 | +#else /* !CONFIG_SMP */ |
---|
| 3449 | + |
---|
| 3450 | +static inline bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) |
---|
| 3451 | +{ |
---|
| 3452 | + return false; |
---|
| 3453 | +} |
---|
| 3454 | + |
---|
2434 | 3455 | #endif /* CONFIG_SMP */ |
---|
2435 | 3456 | |
---|
2436 | 3457 | static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) |
---|
.. | .. |
---|
2438 | 3459 | struct rq *rq = cpu_rq(cpu); |
---|
2439 | 3460 | struct rq_flags rf; |
---|
2440 | 3461 | |
---|
2441 | | -#if defined(CONFIG_SMP) |
---|
2442 | | - if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) { |
---|
2443 | | - sched_clock_cpu(cpu); /* Sync clocks across CPUs */ |
---|
2444 | | - ttwu_queue_remote(p, cpu, wake_flags); |
---|
| 3462 | + if (ttwu_queue_wakelist(p, cpu, wake_flags)) |
---|
2445 | 3463 | return; |
---|
2446 | | - } |
---|
2447 | | -#endif |
---|
2448 | 3464 | |
---|
2449 | 3465 | rq_lock(rq, &rf); |
---|
2450 | 3466 | update_rq_clock(rq); |
---|
.. | .. |
---|
2500 | 3516 | * migration. However the means are completely different as there is no lock |
---|
2501 | 3517 | * chain to provide order. Instead we do: |
---|
2502 | 3518 | * |
---|
2503 | | - * 1) smp_store_release(X->on_cpu, 0) |
---|
2504 | | - * 2) smp_cond_load_acquire(!X->on_cpu) |
---|
| 3519 | + * 1) smp_store_release(X->on_cpu, 0) -- finish_task() |
---|
| 3520 | + * 2) smp_cond_load_acquire(!X->on_cpu) -- try_to_wake_up() |
---|
2505 | 3521 | * |
---|
2506 | 3522 | * Example: |
---|
2507 | 3523 | * |
---|
.. | .. |
---|
2540 | 3556 | * @p: the thread to be awakened |
---|
2541 | 3557 | * @state: the mask of task states that can be woken |
---|
2542 | 3558 | * @wake_flags: wake modifier flags (WF_*) |
---|
2543 | | - * @sibling_count_hint: A hint at the number of threads that are being woken up |
---|
2544 | | - * in this event. |
---|
2545 | 3559 | * |
---|
2546 | | - * If (@state & @p->state) @p->state = TASK_RUNNING. |
---|
| 3560 | + * Conceptually does: |
---|
| 3561 | + * |
---|
| 3562 | + * If (@state & @p->state) @p->state = TASK_RUNNING. |
---|
2547 | 3563 | * |
---|
2548 | 3564 | * If the task was not queued/runnable, also place it back on a runqueue. |
---|
2549 | 3565 | * |
---|
2550 | | - * Atomic against schedule() which would dequeue a task, also see |
---|
2551 | | - * set_current_state(). |
---|
| 3566 | + * This function is atomic against schedule() which would dequeue the task. |
---|
2552 | 3567 | * |
---|
2553 | | - * This function executes a full memory barrier before accessing the task |
---|
2554 | | - * state; see set_current_state(). |
---|
| 3568 | + * It issues a full memory barrier before accessing @p->state, see the comment |
---|
| 3569 | + * with set_current_state(). |
---|
| 3570 | + * |
---|
| 3571 | + * Uses p->pi_lock to serialize against concurrent wake-ups. |
---|
| 3572 | + * |
---|
| 3573 | + * Relies on p->pi_lock stabilizing: |
---|
| 3574 | + * - p->sched_class |
---|
| 3575 | + * - p->cpus_ptr |
---|
| 3576 | + * - p->sched_task_group |
---|
| 3577 | + * in order to do migration, see its use of select_task_rq()/set_task_cpu(). |
---|
| 3578 | + * |
---|
| 3579 | + * Tries really hard to only take one task_rq(p)->lock for performance. |
---|
| 3580 | + * Takes rq->lock in: |
---|
| 3581 | + * - ttwu_runnable() -- old rq, unavoidable, see comment there; |
---|
| 3582 | + * - ttwu_queue() -- new rq, for enqueue of the task; |
---|
| 3583 | + * - psi_ttwu_dequeue() -- much sadness :-( accounting will kill us. |
---|
| 3584 | + * |
---|
| 3585 | + * As a consequence we race really badly with just about everything. See the |
---|
| 3586 | + * many memory barriers and their comments for details. |
---|
2555 | 3587 | * |
---|
2556 | 3588 | * Return: %true if @p->state changes (an actual wakeup was done), |
---|
2557 | 3589 | * %false otherwise. |
---|
2558 | 3590 | */ |
---|
2559 | 3591 | static int |
---|
2560 | | -try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags, |
---|
2561 | | - int sibling_count_hint) |
---|
| 3592 | +try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) |
---|
2562 | 3593 | { |
---|
2563 | 3594 | unsigned long flags; |
---|
2564 | 3595 | int cpu, success = 0; |
---|
2565 | 3596 | |
---|
| 3597 | + preempt_disable(); |
---|
| 3598 | + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && p == current) { |
---|
| 3599 | + /* |
---|
| 3600 | + * We're waking current, this means 'p->on_rq' and 'task_cpu(p) |
---|
| 3601 | + * == smp_processor_id()'. Together this means we can special |
---|
| 3602 | + * case the whole 'p->on_rq && ttwu_runnable()' case below |
---|
| 3603 | + * without taking any locks. |
---|
| 3604 | + * |
---|
| 3605 | + * In particular: |
---|
| 3606 | + * - we rely on Program-Order guarantees for all the ordering, |
---|
| 3607 | + * - we're serialized against set_special_state() by virtue of |
---|
| 3608 | + * it disabling IRQs (this allows not taking ->pi_lock). |
---|
| 3609 | + */ |
---|
| 3610 | + if (!(p->state & state)) |
---|
| 3611 | + goto out; |
---|
| 3612 | + |
---|
| 3613 | + success = 1; |
---|
| 3614 | + trace_sched_waking(p); |
---|
| 3615 | + p->state = TASK_RUNNING; |
---|
| 3616 | + trace_sched_wakeup(p); |
---|
| 3617 | + goto out; |
---|
| 3618 | + } |
---|
| 3619 | + |
---|
2566 | 3620 | /* |
---|
2567 | 3621 | * If we are going to wake up a thread waiting for CONDITION we |
---|
2568 | 3622 | * need to ensure that CONDITION=1 done by the caller can not be |
---|
2569 | | - * reordered with p->state check below. This pairs with mb() in |
---|
2570 | | - * set_current_state() the waiting thread does. |
---|
| 3623 | + * reordered with p->state check below. This pairs with smp_store_mb() |
---|
| 3624 | + * in set_current_state() that the waiting thread does. |
---|
2571 | 3625 | */ |
---|
2572 | 3626 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
---|
2573 | 3627 | smp_mb__after_spinlock(); |
---|
2574 | | - if (!(p->state & state)) |
---|
2575 | | - goto out; |
---|
| 3628 | + if (!(p->state & state)) { |
---|
| 3629 | + /* |
---|
| 3630 | + * The task might be running due to a spinlock sleeper |
---|
| 3631 | + * wakeup. Check the saved state and set it to running |
---|
| 3632 | + * if the wakeup condition is true. |
---|
| 3633 | + */ |
---|
| 3634 | + if (!(wake_flags & WF_LOCK_SLEEPER)) { |
---|
| 3635 | + if (p->saved_state & state) { |
---|
| 3636 | + p->saved_state = TASK_RUNNING; |
---|
| 3637 | + success = 1; |
---|
| 3638 | + } |
---|
| 3639 | + } |
---|
| 3640 | + goto unlock; |
---|
| 3641 | + } |
---|
| 3642 | + /* |
---|
| 3643 | + * If this is a regular wakeup, then we can unconditionally |
---|
| 3644 | + * clear the saved state of a "lock sleeper". |
---|
| 3645 | + */ |
---|
| 3646 | + if (!(wake_flags & WF_LOCK_SLEEPER)) |
---|
| 3647 | + p->saved_state = TASK_RUNNING; |
---|
| 3648 | + |
---|
| 3649 | +#ifdef CONFIG_FREEZER |
---|
| 3650 | + /* |
---|
| 3651 | + * If we're going to wake up a thread which may be frozen, then |
---|
| 3652 | + * we can only do so if we have an active CPU which is capable of |
---|
| 3653 | + * running it. This may not be the case when resuming from suspend, |
---|
| 3654 | + * as the secondary CPUs may not yet be back online. See __thaw_task() |
---|
| 3655 | + * for the actual wakeup. |
---|
| 3656 | + */ |
---|
| 3657 | + if (unlikely(frozen_or_skipped(p)) && |
---|
| 3658 | + !cpumask_intersects(cpu_active_mask, task_cpu_possible_mask(p))) |
---|
| 3659 | + goto unlock; |
---|
| 3660 | +#endif |
---|
2576 | 3661 | |
---|
2577 | 3662 | trace_sched_waking(p); |
---|
2578 | 3663 | |
---|
2579 | 3664 | /* We're going to change ->state: */ |
---|
2580 | 3665 | success = 1; |
---|
2581 | | - cpu = task_cpu(p); |
---|
2582 | 3666 | |
---|
2583 | 3667 | /* |
---|
2584 | 3668 | * Ensure we load p->on_rq _after_ p->state, otherwise it would |
---|
.. | .. |
---|
2599 | 3683 | * |
---|
2600 | 3684 | * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in |
---|
2601 | 3685 | * __schedule(). See the comment for smp_mb__after_spinlock(). |
---|
| 3686 | + * |
---|
| 3687 | + * A similar smb_rmb() lives in try_invoke_on_locked_down_task(). |
---|
2602 | 3688 | */ |
---|
2603 | 3689 | smp_rmb(); |
---|
2604 | | - if (p->on_rq && ttwu_remote(p, wake_flags)) |
---|
2605 | | - goto stat; |
---|
| 3690 | + if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags)) |
---|
| 3691 | + goto unlock; |
---|
| 3692 | + |
---|
| 3693 | + if (p->state & TASK_UNINTERRUPTIBLE) |
---|
| 3694 | + trace_sched_blocked_reason(p); |
---|
2606 | 3695 | |
---|
2607 | 3696 | #ifdef CONFIG_SMP |
---|
2608 | 3697 | /* |
---|
.. | .. |
---|
2623 | 3712 | * |
---|
2624 | 3713 | * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in |
---|
2625 | 3714 | * __schedule(). See the comment for smp_mb__after_spinlock(). |
---|
| 3715 | + * |
---|
| 3716 | + * Form a control-dep-acquire with p->on_rq == 0 above, to ensure |
---|
| 3717 | + * schedule()'s deactivate_task() has 'happened' and p will no longer |
---|
| 3718 | + * care about it's own p->state. See the comment in __schedule(). |
---|
2626 | 3719 | */ |
---|
2627 | | - smp_rmb(); |
---|
| 3720 | + smp_acquire__after_ctrl_dep(); |
---|
| 3721 | + |
---|
| 3722 | + /* |
---|
| 3723 | + * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq |
---|
| 3724 | + * == 0), which means we need to do an enqueue, change p->state to |
---|
| 3725 | + * TASK_WAKING such that we can unlock p->pi_lock before doing the |
---|
| 3726 | + * enqueue, such as ttwu_queue_wakelist(). |
---|
| 3727 | + */ |
---|
| 3728 | + p->state = TASK_WAKING; |
---|
| 3729 | + |
---|
| 3730 | + /* |
---|
| 3731 | + * If the owning (remote) CPU is still in the middle of schedule() with |
---|
| 3732 | + * this task as prev, considering queueing p on the remote CPUs wake_list |
---|
| 3733 | + * which potentially sends an IPI instead of spinning on p->on_cpu to |
---|
| 3734 | + * let the waker make forward progress. This is safe because IRQs are |
---|
| 3735 | + * disabled and the IPI will deliver after on_cpu is cleared. |
---|
| 3736 | + * |
---|
| 3737 | + * Ensure we load task_cpu(p) after p->on_cpu: |
---|
| 3738 | + * |
---|
| 3739 | + * set_task_cpu(p, cpu); |
---|
| 3740 | + * STORE p->cpu = @cpu |
---|
| 3741 | + * __schedule() (switch to task 'p') |
---|
| 3742 | + * LOCK rq->lock |
---|
| 3743 | + * smp_mb__after_spin_lock() smp_cond_load_acquire(&p->on_cpu) |
---|
| 3744 | + * STORE p->on_cpu = 1 LOAD p->cpu |
---|
| 3745 | + * |
---|
| 3746 | + * to ensure we observe the correct CPU on which the task is currently |
---|
| 3747 | + * scheduling. |
---|
| 3748 | + */ |
---|
| 3749 | + if (smp_load_acquire(&p->on_cpu) && |
---|
| 3750 | + ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU)) |
---|
| 3751 | + goto unlock; |
---|
2628 | 3752 | |
---|
2629 | 3753 | /* |
---|
2630 | 3754 | * If the owning (remote) CPU is still in the middle of schedule() with |
---|
.. | .. |
---|
2637 | 3761 | */ |
---|
2638 | 3762 | smp_cond_load_acquire(&p->on_cpu, !VAL); |
---|
2639 | 3763 | |
---|
2640 | | - p->sched_contributes_to_load = !!task_contributes_to_load(p); |
---|
2641 | | - p->state = TASK_WAKING; |
---|
| 3764 | + trace_android_rvh_try_to_wake_up(p); |
---|
2642 | 3765 | |
---|
2643 | | - if (p->in_iowait) { |
---|
2644 | | - delayacct_blkio_end(p); |
---|
2645 | | - atomic_dec(&task_rq(p)->nr_iowait); |
---|
2646 | | - } |
---|
2647 | | - |
---|
2648 | | - cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags, |
---|
2649 | | - sibling_count_hint); |
---|
| 3766 | + cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags); |
---|
2650 | 3767 | if (task_cpu(p) != cpu) { |
---|
| 3768 | + if (p->in_iowait) { |
---|
| 3769 | + delayacct_blkio_end(p); |
---|
| 3770 | + atomic_dec(&task_rq(p)->nr_iowait); |
---|
| 3771 | + } |
---|
| 3772 | + |
---|
2651 | 3773 | wake_flags |= WF_MIGRATED; |
---|
2652 | 3774 | psi_ttwu_dequeue(p); |
---|
2653 | 3775 | set_task_cpu(p, cpu); |
---|
2654 | 3776 | } |
---|
2655 | | - |
---|
2656 | | -#else /* CONFIG_SMP */ |
---|
2657 | | - |
---|
2658 | | - if (p->in_iowait) { |
---|
2659 | | - delayacct_blkio_end(p); |
---|
2660 | | - atomic_dec(&task_rq(p)->nr_iowait); |
---|
2661 | | - } |
---|
2662 | | - |
---|
| 3777 | +#else |
---|
| 3778 | + cpu = task_cpu(p); |
---|
2663 | 3779 | #endif /* CONFIG_SMP */ |
---|
2664 | 3780 | |
---|
2665 | 3781 | ttwu_queue(p, cpu, wake_flags); |
---|
2666 | | -stat: |
---|
2667 | | - ttwu_stat(p, cpu, wake_flags); |
---|
2668 | | -out: |
---|
| 3782 | +unlock: |
---|
2669 | 3783 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
---|
| 3784 | +out: |
---|
| 3785 | + if (success) { |
---|
| 3786 | + trace_android_rvh_try_to_wake_up_success(p); |
---|
| 3787 | + ttwu_stat(p, task_cpu(p), wake_flags); |
---|
| 3788 | + } |
---|
| 3789 | + preempt_enable(); |
---|
2670 | 3790 | |
---|
2671 | 3791 | return success; |
---|
2672 | 3792 | } |
---|
2673 | 3793 | |
---|
2674 | 3794 | /** |
---|
2675 | | - * try_to_wake_up_local - try to wake up a local task with rq lock held |
---|
2676 | | - * @p: the thread to be awakened |
---|
2677 | | - * @rf: request-queue flags for pinning |
---|
| 3795 | + * try_invoke_on_locked_down_task - Invoke a function on task in fixed state |
---|
| 3796 | + * @p: Process for which the function is to be invoked, can be @current. |
---|
| 3797 | + * @func: Function to invoke. |
---|
| 3798 | + * @arg: Argument to function. |
---|
2678 | 3799 | * |
---|
2679 | | - * Put @p on the run-queue if it's not already there. The caller must |
---|
2680 | | - * ensure that this_rq() is locked, @p is bound to this_rq() and not |
---|
2681 | | - * the current task. |
---|
| 3800 | + * If the specified task can be quickly locked into a definite state |
---|
| 3801 | + * (either sleeping or on a given runqueue), arrange to keep it in that |
---|
| 3802 | + * state while invoking @func(@arg). This function can use ->on_rq and |
---|
| 3803 | + * task_curr() to work out what the state is, if required. Given that |
---|
| 3804 | + * @func can be invoked with a runqueue lock held, it had better be quite |
---|
| 3805 | + * lightweight. |
---|
| 3806 | + * |
---|
| 3807 | + * Returns: |
---|
| 3808 | + * @false if the task slipped out from under the locks. |
---|
| 3809 | + * @true if the task was locked onto a runqueue or is sleeping. |
---|
| 3810 | + * However, @func can override this by returning @false. |
---|
2682 | 3811 | */ |
---|
2683 | | -static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf) |
---|
| 3812 | +bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg) |
---|
2684 | 3813 | { |
---|
2685 | | - struct rq *rq = task_rq(p); |
---|
| 3814 | + struct rq_flags rf; |
---|
| 3815 | + bool ret = false; |
---|
| 3816 | + struct rq *rq; |
---|
2686 | 3817 | |
---|
2687 | | - if (WARN_ON_ONCE(rq != this_rq()) || |
---|
2688 | | - WARN_ON_ONCE(p == current)) |
---|
2689 | | - return; |
---|
2690 | | - |
---|
2691 | | - lockdep_assert_held(&rq->lock); |
---|
2692 | | - |
---|
2693 | | - if (!raw_spin_trylock(&p->pi_lock)) { |
---|
2694 | | - /* |
---|
2695 | | - * This is OK, because current is on_cpu, which avoids it being |
---|
2696 | | - * picked for load-balance and preemption/IRQs are still |
---|
2697 | | - * disabled avoiding further scheduler activity on it and we've |
---|
2698 | | - * not yet picked a replacement task. |
---|
2699 | | - */ |
---|
2700 | | - rq_unlock(rq, rf); |
---|
2701 | | - raw_spin_lock(&p->pi_lock); |
---|
2702 | | - rq_relock(rq, rf); |
---|
2703 | | - } |
---|
2704 | | - |
---|
2705 | | - if (!(p->state & TASK_NORMAL)) |
---|
2706 | | - goto out; |
---|
2707 | | - |
---|
2708 | | - trace_sched_waking(p); |
---|
2709 | | - |
---|
2710 | | - if (!task_on_rq_queued(p)) { |
---|
2711 | | - if (p->in_iowait) { |
---|
2712 | | - delayacct_blkio_end(p); |
---|
2713 | | - atomic_dec(&rq->nr_iowait); |
---|
| 3818 | + raw_spin_lock_irqsave(&p->pi_lock, rf.flags); |
---|
| 3819 | + if (p->on_rq) { |
---|
| 3820 | + rq = __task_rq_lock(p, &rf); |
---|
| 3821 | + if (task_rq(p) == rq) |
---|
| 3822 | + ret = func(p, arg); |
---|
| 3823 | + rq_unlock(rq, &rf); |
---|
| 3824 | + } else { |
---|
| 3825 | + switch (p->state) { |
---|
| 3826 | + case TASK_RUNNING: |
---|
| 3827 | + case TASK_WAKING: |
---|
| 3828 | + break; |
---|
| 3829 | + default: |
---|
| 3830 | + smp_rmb(); // See smp_rmb() comment in try_to_wake_up(). |
---|
| 3831 | + if (!p->on_rq) |
---|
| 3832 | + ret = func(p, arg); |
---|
2714 | 3833 | } |
---|
2715 | | - ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK); |
---|
2716 | 3834 | } |
---|
2717 | | - |
---|
2718 | | - ttwu_do_wakeup(rq, p, 0, rf); |
---|
2719 | | - ttwu_stat(p, smp_processor_id(), 0); |
---|
2720 | | -out: |
---|
2721 | | - raw_spin_unlock(&p->pi_lock); |
---|
| 3835 | + raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags); |
---|
| 3836 | + return ret; |
---|
2722 | 3837 | } |
---|
2723 | 3838 | |
---|
2724 | 3839 | /** |
---|
.. | .. |
---|
2734 | 3849 | */ |
---|
2735 | 3850 | int wake_up_process(struct task_struct *p) |
---|
2736 | 3851 | { |
---|
2737 | | - return try_to_wake_up(p, TASK_NORMAL, 0, 1); |
---|
| 3852 | + return try_to_wake_up(p, TASK_NORMAL, 0); |
---|
2738 | 3853 | } |
---|
2739 | 3854 | EXPORT_SYMBOL(wake_up_process); |
---|
2740 | 3855 | |
---|
| 3856 | +/** |
---|
| 3857 | + * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock" |
---|
| 3858 | + * @p: The process to be woken up. |
---|
| 3859 | + * |
---|
| 3860 | + * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate |
---|
| 3861 | + * the nature of the wakeup. |
---|
| 3862 | + */ |
---|
| 3863 | +int wake_up_lock_sleeper(struct task_struct *p) |
---|
| 3864 | +{ |
---|
| 3865 | + return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER); |
---|
| 3866 | +} |
---|
| 3867 | + |
---|
2741 | 3868 | int wake_up_state(struct task_struct *p, unsigned int state) |
---|
2742 | 3869 | { |
---|
2743 | | - return try_to_wake_up(p, state, 0, 1); |
---|
| 3870 | + return try_to_wake_up(p, state, 0); |
---|
2744 | 3871 | } |
---|
2745 | 3872 | |
---|
2746 | 3873 | /* |
---|
.. | .. |
---|
2765 | 3892 | p->se.cfs_rq = NULL; |
---|
2766 | 3893 | #endif |
---|
2767 | 3894 | |
---|
| 3895 | + trace_android_rvh_sched_fork_init(p); |
---|
| 3896 | + |
---|
2768 | 3897 | #ifdef CONFIG_SCHEDSTATS |
---|
2769 | 3898 | /* Even if schedstat is disabled, there should not be garbage */ |
---|
2770 | 3899 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); |
---|
.. | .. |
---|
2785 | 3914 | INIT_HLIST_HEAD(&p->preempt_notifiers); |
---|
2786 | 3915 | #endif |
---|
2787 | 3916 | |
---|
| 3917 | +#ifdef CONFIG_COMPACTION |
---|
| 3918 | + p->capture_control = NULL; |
---|
| 3919 | +#endif |
---|
2788 | 3920 | init_numa_balancing(clone_flags, p); |
---|
| 3921 | +#ifdef CONFIG_SMP |
---|
| 3922 | + p->wake_entry.u_flags = CSD_TYPE_TTWU; |
---|
| 3923 | + p->migration_pending = NULL; |
---|
| 3924 | +#endif |
---|
2789 | 3925 | } |
---|
2790 | 3926 | |
---|
2791 | 3927 | DEFINE_STATIC_KEY_FALSE(sched_numa_balancing); |
---|
.. | .. |
---|
2802 | 3938 | |
---|
2803 | 3939 | #ifdef CONFIG_PROC_SYSCTL |
---|
2804 | 3940 | int sysctl_numa_balancing(struct ctl_table *table, int write, |
---|
2805 | | - void __user *buffer, size_t *lenp, loff_t *ppos) |
---|
| 3941 | + void *buffer, size_t *lenp, loff_t *ppos) |
---|
2806 | 3942 | { |
---|
2807 | 3943 | struct ctl_table t; |
---|
2808 | 3944 | int err; |
---|
.. | .. |
---|
2876 | 4012 | } |
---|
2877 | 4013 | |
---|
2878 | 4014 | #ifdef CONFIG_PROC_SYSCTL |
---|
2879 | | -int sysctl_schedstats(struct ctl_table *table, int write, |
---|
2880 | | - void __user *buffer, size_t *lenp, loff_t *ppos) |
---|
| 4015 | +int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, |
---|
| 4016 | + size_t *lenp, loff_t *ppos) |
---|
2881 | 4017 | { |
---|
2882 | 4018 | struct ctl_table t; |
---|
2883 | 4019 | int err; |
---|
.. | .. |
---|
2905 | 4041 | */ |
---|
2906 | 4042 | int sched_fork(unsigned long clone_flags, struct task_struct *p) |
---|
2907 | 4043 | { |
---|
2908 | | - unsigned long flags; |
---|
| 4044 | + trace_android_rvh_sched_fork(p); |
---|
2909 | 4045 | |
---|
2910 | 4046 | __sched_fork(clone_flags, p); |
---|
2911 | 4047 | /* |
---|
.. | .. |
---|
2919 | 4055 | * Make sure we do not leak PI boosting priority to the child. |
---|
2920 | 4056 | */ |
---|
2921 | 4057 | p->prio = current->normal_prio; |
---|
| 4058 | + trace_android_rvh_prepare_prio_fork(p); |
---|
2922 | 4059 | |
---|
2923 | 4060 | uclamp_fork(p); |
---|
2924 | 4061 | |
---|
.. | .. |
---|
2933 | 4070 | } else if (PRIO_TO_NICE(p->static_prio) < 0) |
---|
2934 | 4071 | p->static_prio = NICE_TO_PRIO(0); |
---|
2935 | 4072 | |
---|
2936 | | - p->prio = p->normal_prio = __normal_prio(p); |
---|
2937 | | - set_load_weight(p, false); |
---|
| 4073 | + p->prio = p->normal_prio = p->static_prio; |
---|
| 4074 | + set_load_weight(p); |
---|
2938 | 4075 | |
---|
2939 | 4076 | /* |
---|
2940 | 4077 | * We don't need the reset flag anymore after the fork. It has |
---|
.. | .. |
---|
2951 | 4088 | p->sched_class = &fair_sched_class; |
---|
2952 | 4089 | |
---|
2953 | 4090 | init_entity_runnable_average(&p->se); |
---|
| 4091 | + trace_android_rvh_finish_prio_fork(p); |
---|
2954 | 4092 | |
---|
2955 | | - /* |
---|
2956 | | - * The child is not yet in the pid-hash so no cgroup attach races, |
---|
2957 | | - * and the cgroup is pinned to this child due to cgroup_fork() |
---|
2958 | | - * is ran before sched_fork(). |
---|
2959 | | - * |
---|
2960 | | - * Silence PROVE_RCU. |
---|
2961 | | - */ |
---|
2962 | | - raw_spin_lock_irqsave(&p->pi_lock, flags); |
---|
2963 | | - rseq_migrate(p); |
---|
2964 | | - /* |
---|
2965 | | - * We're setting the CPU for the first time, we don't migrate, |
---|
2966 | | - * so use __set_task_cpu(). |
---|
2967 | | - */ |
---|
2968 | | - __set_task_cpu(p, smp_processor_id()); |
---|
2969 | | - if (p->sched_class->task_fork) |
---|
2970 | | - p->sched_class->task_fork(p); |
---|
2971 | | - raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
---|
2972 | 4093 | |
---|
2973 | 4094 | #ifdef CONFIG_SCHED_INFO |
---|
2974 | 4095 | if (likely(sched_info_on())) |
---|
.. | .. |
---|
2978 | 4099 | p->on_cpu = 0; |
---|
2979 | 4100 | #endif |
---|
2980 | 4101 | init_task_preempt_count(p); |
---|
| 4102 | +#ifdef CONFIG_HAVE_PREEMPT_LAZY |
---|
| 4103 | + task_thread_info(p)->preempt_lazy_count = 0; |
---|
| 4104 | +#endif |
---|
2981 | 4105 | #ifdef CONFIG_SMP |
---|
2982 | 4106 | plist_node_init(&p->pushable_tasks, MAX_PRIO); |
---|
2983 | 4107 | RB_CLEAR_NODE(&p->pushable_dl_tasks); |
---|
2984 | 4108 | #endif |
---|
2985 | 4109 | return 0; |
---|
| 4110 | +} |
---|
| 4111 | + |
---|
| 4112 | +void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs) |
---|
| 4113 | +{ |
---|
| 4114 | + unsigned long flags; |
---|
| 4115 | + |
---|
| 4116 | + /* |
---|
| 4117 | + * Because we're not yet on the pid-hash, p->pi_lock isn't strictly |
---|
| 4118 | + * required yet, but lockdep gets upset if rules are violated. |
---|
| 4119 | + */ |
---|
| 4120 | + raw_spin_lock_irqsave(&p->pi_lock, flags); |
---|
| 4121 | +#ifdef CONFIG_CGROUP_SCHED |
---|
| 4122 | + if (1) { |
---|
| 4123 | + struct task_group *tg; |
---|
| 4124 | + |
---|
| 4125 | + tg = container_of(kargs->cset->subsys[cpu_cgrp_id], |
---|
| 4126 | + struct task_group, css); |
---|
| 4127 | + tg = autogroup_task_group(p, tg); |
---|
| 4128 | + p->sched_task_group = tg; |
---|
| 4129 | + } |
---|
| 4130 | +#endif |
---|
| 4131 | + rseq_migrate(p); |
---|
| 4132 | + /* |
---|
| 4133 | + * We're setting the CPU for the first time, we don't migrate, |
---|
| 4134 | + * so use __set_task_cpu(). |
---|
| 4135 | + */ |
---|
| 4136 | + __set_task_cpu(p, smp_processor_id()); |
---|
| 4137 | + if (p->sched_class->task_fork) |
---|
| 4138 | + p->sched_class->task_fork(p); |
---|
| 4139 | + raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
---|
| 4140 | +} |
---|
| 4141 | + |
---|
| 4142 | +void sched_post_fork(struct task_struct *p) |
---|
| 4143 | +{ |
---|
| 4144 | + uclamp_post_fork(p); |
---|
2986 | 4145 | } |
---|
2987 | 4146 | |
---|
2988 | 4147 | unsigned long to_ratio(u64 period, u64 runtime) |
---|
.. | .. |
---|
3013 | 4172 | struct rq_flags rf; |
---|
3014 | 4173 | struct rq *rq; |
---|
3015 | 4174 | |
---|
| 4175 | + trace_android_rvh_wake_up_new_task(p); |
---|
| 4176 | + |
---|
3016 | 4177 | raw_spin_lock_irqsave(&p->pi_lock, rf.flags); |
---|
3017 | 4178 | p->state = TASK_RUNNING; |
---|
3018 | 4179 | #ifdef CONFIG_SMP |
---|
3019 | 4180 | /* |
---|
3020 | 4181 | * Fork balancing, do it here and not earlier because: |
---|
3021 | | - * - cpus_allowed can change in the fork path |
---|
| 4182 | + * - cpus_ptr can change in the fork path |
---|
3022 | 4183 | * - any previously selected CPU might disappear through hotplug |
---|
3023 | 4184 | * |
---|
3024 | 4185 | * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, |
---|
.. | .. |
---|
3026 | 4187 | */ |
---|
3027 | 4188 | p->recent_used_cpu = task_cpu(p); |
---|
3028 | 4189 | rseq_migrate(p); |
---|
3029 | | - __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0, 1)); |
---|
| 4190 | + __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); |
---|
3030 | 4191 | #endif |
---|
3031 | 4192 | rq = __task_rq_lock(p, &rf); |
---|
3032 | 4193 | update_rq_clock(rq); |
---|
3033 | | - post_init_entity_util_avg(&p->se); |
---|
| 4194 | + post_init_entity_util_avg(p); |
---|
| 4195 | + trace_android_rvh_new_task_stats(p); |
---|
3034 | 4196 | |
---|
3035 | 4197 | activate_task(rq, p, ENQUEUE_NOCLOCK); |
---|
3036 | | - p->on_rq = TASK_ON_RQ_QUEUED; |
---|
3037 | 4198 | trace_sched_wakeup_new(p); |
---|
3038 | 4199 | check_preempt_curr(rq, p, WF_FORK); |
---|
3039 | 4200 | #ifdef CONFIG_SMP |
---|
.. | .. |
---|
3143 | 4304 | /* |
---|
3144 | 4305 | * Claim the task as running, we do this before switching to it |
---|
3145 | 4306 | * such that any running task will have this set. |
---|
| 4307 | + * |
---|
| 4308 | + * See the ttwu() WF_ON_CPU case and its ordering comment. |
---|
3146 | 4309 | */ |
---|
3147 | | - next->on_cpu = 1; |
---|
| 4310 | + WRITE_ONCE(next->on_cpu, 1); |
---|
3148 | 4311 | #endif |
---|
3149 | 4312 | } |
---|
3150 | 4313 | |
---|
.. | .. |
---|
3152 | 4315 | { |
---|
3153 | 4316 | #ifdef CONFIG_SMP |
---|
3154 | 4317 | /* |
---|
3155 | | - * After ->on_cpu is cleared, the task can be moved to a different CPU. |
---|
3156 | | - * We must ensure this doesn't happen until the switch is completely |
---|
| 4318 | + * This must be the very last reference to @prev from this CPU. After |
---|
| 4319 | + * p->on_cpu is cleared, the task can be moved to a different CPU. We |
---|
| 4320 | + * must ensure this doesn't happen until the switch is completely |
---|
3157 | 4321 | * finished. |
---|
3158 | 4322 | * |
---|
3159 | 4323 | * In particular, the load of prev->state in finish_task_switch() must |
---|
.. | .. |
---|
3165 | 4329 | #endif |
---|
3166 | 4330 | } |
---|
3167 | 4331 | |
---|
| 4332 | +#ifdef CONFIG_SMP |
---|
| 4333 | + |
---|
| 4334 | +static void do_balance_callbacks(struct rq *rq, struct callback_head *head) |
---|
| 4335 | +{ |
---|
| 4336 | + void (*func)(struct rq *rq); |
---|
| 4337 | + struct callback_head *next; |
---|
| 4338 | + |
---|
| 4339 | + lockdep_assert_held(&rq->lock); |
---|
| 4340 | + |
---|
| 4341 | + while (head) { |
---|
| 4342 | + func = (void (*)(struct rq *))head->func; |
---|
| 4343 | + next = head->next; |
---|
| 4344 | + head->next = NULL; |
---|
| 4345 | + head = next; |
---|
| 4346 | + |
---|
| 4347 | + func(rq); |
---|
| 4348 | + } |
---|
| 4349 | +} |
---|
| 4350 | + |
---|
| 4351 | +static inline struct callback_head *splice_balance_callbacks(struct rq *rq) |
---|
| 4352 | +{ |
---|
| 4353 | + struct callback_head *head = rq->balance_callback; |
---|
| 4354 | + |
---|
| 4355 | + lockdep_assert_held(&rq->lock); |
---|
| 4356 | + if (head) { |
---|
| 4357 | + rq->balance_callback = NULL; |
---|
| 4358 | + rq->balance_flags &= ~BALANCE_WORK; |
---|
| 4359 | + } |
---|
| 4360 | + |
---|
| 4361 | + return head; |
---|
| 4362 | +} |
---|
| 4363 | + |
---|
| 4364 | +static void __balance_callbacks(struct rq *rq) |
---|
| 4365 | +{ |
---|
| 4366 | + do_balance_callbacks(rq, splice_balance_callbacks(rq)); |
---|
| 4367 | +} |
---|
| 4368 | + |
---|
| 4369 | +static inline void balance_callbacks(struct rq *rq, struct callback_head *head) |
---|
| 4370 | +{ |
---|
| 4371 | + unsigned long flags; |
---|
| 4372 | + |
---|
| 4373 | + if (unlikely(head)) { |
---|
| 4374 | + raw_spin_lock_irqsave(&rq->lock, flags); |
---|
| 4375 | + do_balance_callbacks(rq, head); |
---|
| 4376 | + raw_spin_unlock_irqrestore(&rq->lock, flags); |
---|
| 4377 | + } |
---|
| 4378 | +} |
---|
| 4379 | + |
---|
| 4380 | +static void balance_push(struct rq *rq); |
---|
| 4381 | + |
---|
| 4382 | +static inline void balance_switch(struct rq *rq) |
---|
| 4383 | +{ |
---|
| 4384 | + if (likely(!rq->balance_flags)) |
---|
| 4385 | + return; |
---|
| 4386 | + |
---|
| 4387 | + if (rq->balance_flags & BALANCE_PUSH) { |
---|
| 4388 | + balance_push(rq); |
---|
| 4389 | + return; |
---|
| 4390 | + } |
---|
| 4391 | + |
---|
| 4392 | + __balance_callbacks(rq); |
---|
| 4393 | +} |
---|
| 4394 | + |
---|
| 4395 | +#else |
---|
| 4396 | + |
---|
| 4397 | +static inline void __balance_callbacks(struct rq *rq) |
---|
| 4398 | +{ |
---|
| 4399 | +} |
---|
| 4400 | + |
---|
| 4401 | +static inline struct callback_head *splice_balance_callbacks(struct rq *rq) |
---|
| 4402 | +{ |
---|
| 4403 | + return NULL; |
---|
| 4404 | +} |
---|
| 4405 | + |
---|
| 4406 | +static inline void balance_callbacks(struct rq *rq, struct callback_head *head) |
---|
| 4407 | +{ |
---|
| 4408 | +} |
---|
| 4409 | + |
---|
| 4410 | +static inline void balance_switch(struct rq *rq) |
---|
| 4411 | +{ |
---|
| 4412 | +} |
---|
| 4413 | + |
---|
| 4414 | +#endif |
---|
| 4415 | + |
---|
3168 | 4416 | static inline void |
---|
3169 | 4417 | prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf) |
---|
3170 | 4418 | { |
---|
.. | .. |
---|
3175 | 4423 | * do an early lockdep release here: |
---|
3176 | 4424 | */ |
---|
3177 | 4425 | rq_unpin_lock(rq, rf); |
---|
3178 | | - spin_release(&rq->lock.dep_map, 1, _THIS_IP_); |
---|
| 4426 | + spin_release(&rq->lock.dep_map, _THIS_IP_); |
---|
3179 | 4427 | #ifdef CONFIG_DEBUG_SPINLOCK |
---|
3180 | 4428 | /* this is a valid case when another task releases the spinlock */ |
---|
3181 | 4429 | rq->lock.owner = next; |
---|
.. | .. |
---|
3190 | 4438 | * prev into current: |
---|
3191 | 4439 | */ |
---|
3192 | 4440 | spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); |
---|
| 4441 | + balance_switch(rq); |
---|
3193 | 4442 | raw_spin_unlock_irq(&rq->lock); |
---|
3194 | 4443 | } |
---|
3195 | 4444 | |
---|
.. | .. |
---|
3204 | 4453 | #ifndef finish_arch_post_lock_switch |
---|
3205 | 4454 | # define finish_arch_post_lock_switch() do { } while (0) |
---|
3206 | 4455 | #endif |
---|
| 4456 | + |
---|
| 4457 | +static inline void kmap_local_sched_out(void) |
---|
| 4458 | +{ |
---|
| 4459 | +#ifdef CONFIG_KMAP_LOCAL |
---|
| 4460 | + if (unlikely(current->kmap_ctrl.idx)) |
---|
| 4461 | + __kmap_local_sched_out(); |
---|
| 4462 | +#endif |
---|
| 4463 | +} |
---|
| 4464 | + |
---|
| 4465 | +static inline void kmap_local_sched_in(void) |
---|
| 4466 | +{ |
---|
| 4467 | +#ifdef CONFIG_KMAP_LOCAL |
---|
| 4468 | + if (unlikely(current->kmap_ctrl.idx)) |
---|
| 4469 | + __kmap_local_sched_in(); |
---|
| 4470 | +#endif |
---|
| 4471 | +} |
---|
3207 | 4472 | |
---|
3208 | 4473 | /** |
---|
3209 | 4474 | * prepare_task_switch - prepare to switch tasks |
---|
.. | .. |
---|
3227 | 4492 | perf_event_task_sched_out(prev, next); |
---|
3228 | 4493 | rseq_preempt(prev); |
---|
3229 | 4494 | fire_sched_out_preempt_notifiers(prev, next); |
---|
| 4495 | + kmap_local_sched_out(); |
---|
3230 | 4496 | prepare_task(next); |
---|
3231 | 4497 | prepare_arch_switch(next); |
---|
3232 | 4498 | } |
---|
.. | .. |
---|
3293 | 4559 | finish_lock_switch(rq); |
---|
3294 | 4560 | finish_arch_post_lock_switch(); |
---|
3295 | 4561 | kcov_finish_switch(current); |
---|
| 4562 | + kmap_local_sched_in(); |
---|
3296 | 4563 | |
---|
3297 | 4564 | fire_sched_in_preempt_notifiers(current); |
---|
3298 | 4565 | /* |
---|
.. | .. |
---|
3307 | 4574 | * provided by mmdrop(), |
---|
3308 | 4575 | * - a sync_core for SYNC_CORE. |
---|
3309 | 4576 | */ |
---|
| 4577 | + /* |
---|
| 4578 | + * We use mmdrop_delayed() here so we don't have to do the |
---|
| 4579 | + * full __mmdrop() when we are the last user. |
---|
| 4580 | + */ |
---|
3310 | 4581 | if (mm) { |
---|
3311 | 4582 | membarrier_mm_sync_core_before_usermode(mm); |
---|
3312 | | - mmdrop(mm); |
---|
| 4583 | + mmdrop_delayed(mm); |
---|
3313 | 4584 | } |
---|
3314 | 4585 | if (unlikely(prev_state == TASK_DEAD)) { |
---|
3315 | 4586 | if (prev->sched_class->task_dead) |
---|
3316 | 4587 | prev->sched_class->task_dead(prev); |
---|
3317 | 4588 | |
---|
3318 | | - /* |
---|
3319 | | - * Remove function-return probe instances associated with this |
---|
3320 | | - * task and put them back on the free list. |
---|
3321 | | - */ |
---|
3322 | | - kprobe_flush_task(prev); |
---|
3323 | | - |
---|
3324 | | - /* Task is done with its stack. */ |
---|
3325 | | - put_task_stack(prev); |
---|
3326 | | - |
---|
3327 | | - put_task_struct(prev); |
---|
| 4589 | + put_task_struct_rcu_user(prev); |
---|
3328 | 4590 | } |
---|
3329 | 4591 | |
---|
3330 | 4592 | tick_nohz_task_switch(); |
---|
3331 | 4593 | return rq; |
---|
3332 | 4594 | } |
---|
3333 | | - |
---|
3334 | | -#ifdef CONFIG_SMP |
---|
3335 | | - |
---|
3336 | | -/* rq->lock is NOT held, but preemption is disabled */ |
---|
3337 | | -static void __balance_callback(struct rq *rq) |
---|
3338 | | -{ |
---|
3339 | | - struct callback_head *head, *next; |
---|
3340 | | - void (*func)(struct rq *rq); |
---|
3341 | | - unsigned long flags; |
---|
3342 | | - |
---|
3343 | | - raw_spin_lock_irqsave(&rq->lock, flags); |
---|
3344 | | - head = rq->balance_callback; |
---|
3345 | | - rq->balance_callback = NULL; |
---|
3346 | | - while (head) { |
---|
3347 | | - func = (void (*)(struct rq *))head->func; |
---|
3348 | | - next = head->next; |
---|
3349 | | - head->next = NULL; |
---|
3350 | | - head = next; |
---|
3351 | | - |
---|
3352 | | - func(rq); |
---|
3353 | | - } |
---|
3354 | | - raw_spin_unlock_irqrestore(&rq->lock, flags); |
---|
3355 | | -} |
---|
3356 | | - |
---|
3357 | | -static inline void balance_callback(struct rq *rq) |
---|
3358 | | -{ |
---|
3359 | | - if (unlikely(rq->balance_callback)) |
---|
3360 | | - __balance_callback(rq); |
---|
3361 | | -} |
---|
3362 | | - |
---|
3363 | | -#else |
---|
3364 | | - |
---|
3365 | | -static inline void balance_callback(struct rq *rq) |
---|
3366 | | -{ |
---|
3367 | | -} |
---|
3368 | | - |
---|
3369 | | -#endif |
---|
3370 | 4595 | |
---|
3371 | 4596 | /** |
---|
3372 | 4597 | * schedule_tail - first thing a freshly forked thread must call. |
---|
.. | .. |
---|
3387 | 4612 | */ |
---|
3388 | 4613 | |
---|
3389 | 4614 | rq = finish_task_switch(prev); |
---|
3390 | | - balance_callback(rq); |
---|
3391 | 4615 | preempt_enable(); |
---|
3392 | 4616 | |
---|
3393 | 4617 | if (current->set_child_tid) |
---|
.. | .. |
---|
3403 | 4627 | context_switch(struct rq *rq, struct task_struct *prev, |
---|
3404 | 4628 | struct task_struct *next, struct rq_flags *rf) |
---|
3405 | 4629 | { |
---|
3406 | | - struct mm_struct *mm, *oldmm; |
---|
3407 | | - |
---|
3408 | 4630 | prepare_task_switch(rq, prev, next); |
---|
3409 | 4631 | |
---|
3410 | | - mm = next->mm; |
---|
3411 | | - oldmm = prev->active_mm; |
---|
3412 | 4632 | /* |
---|
3413 | 4633 | * For paravirt, this is coupled with an exit in switch_to to |
---|
3414 | 4634 | * combine the page table reload and the switch backend into |
---|
.. | .. |
---|
3417 | 4637 | arch_start_context_switch(prev); |
---|
3418 | 4638 | |
---|
3419 | 4639 | /* |
---|
3420 | | - * If mm is non-NULL, we pass through switch_mm(). If mm is |
---|
3421 | | - * NULL, we will pass through mmdrop() in finish_task_switch(). |
---|
3422 | | - * Both of these contain the full memory barrier required by |
---|
3423 | | - * membarrier after storing to rq->curr, before returning to |
---|
3424 | | - * user-space. |
---|
| 4640 | + * kernel -> kernel lazy + transfer active |
---|
| 4641 | + * user -> kernel lazy + mmgrab() active |
---|
| 4642 | + * |
---|
| 4643 | + * kernel -> user switch + mmdrop() active |
---|
| 4644 | + * user -> user switch |
---|
3425 | 4645 | */ |
---|
3426 | | - if (!mm) { |
---|
3427 | | - next->active_mm = oldmm; |
---|
3428 | | - mmgrab(oldmm); |
---|
3429 | | - enter_lazy_tlb(oldmm, next); |
---|
3430 | | - } else |
---|
3431 | | - switch_mm_irqs_off(oldmm, mm, next); |
---|
| 4646 | + if (!next->mm) { // to kernel |
---|
| 4647 | + enter_lazy_tlb(prev->active_mm, next); |
---|
3432 | 4648 | |
---|
3433 | | - if (!prev->mm) { |
---|
3434 | | - prev->active_mm = NULL; |
---|
3435 | | - rq->prev_mm = oldmm; |
---|
| 4649 | + next->active_mm = prev->active_mm; |
---|
| 4650 | + if (prev->mm) // from user |
---|
| 4651 | + mmgrab(prev->active_mm); |
---|
| 4652 | + else |
---|
| 4653 | + prev->active_mm = NULL; |
---|
| 4654 | + } else { // to user |
---|
| 4655 | + membarrier_switch_mm(rq, prev->active_mm, next->mm); |
---|
| 4656 | + /* |
---|
| 4657 | + * sys_membarrier() requires an smp_mb() between setting |
---|
| 4658 | + * rq->curr / membarrier_switch_mm() and returning to userspace. |
---|
| 4659 | + * |
---|
| 4660 | + * The below provides this either through switch_mm(), or in |
---|
| 4661 | + * case 'prev->active_mm == next->mm' through |
---|
| 4662 | + * finish_task_switch()'s mmdrop(). |
---|
| 4663 | + */ |
---|
| 4664 | + switch_mm_irqs_off(prev->active_mm, next->mm, next); |
---|
| 4665 | + |
---|
| 4666 | + if (!prev->mm) { // from kernel |
---|
| 4667 | + /* will mmdrop() in finish_task_switch(). */ |
---|
| 4668 | + rq->prev_mm = prev->active_mm; |
---|
| 4669 | + prev->active_mm = NULL; |
---|
| 4670 | + } |
---|
3436 | 4671 | } |
---|
3437 | 4672 | |
---|
3438 | 4673 | rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); |
---|
.. | .. |
---|
3469 | 4704 | * preemption, thus the result might have a time-of-check-to-time-of-use |
---|
3470 | 4705 | * race. The caller is responsible to use it correctly, for example: |
---|
3471 | 4706 | * |
---|
3472 | | - * - from a non-preemptable section (of course) |
---|
| 4707 | + * - from a non-preemptible section (of course) |
---|
3473 | 4708 | * |
---|
3474 | 4709 | * - from a thread that is bound to a single CPU |
---|
3475 | 4710 | * |
---|
.. | .. |
---|
3490 | 4725 | sum += cpu_rq(i)->nr_switches; |
---|
3491 | 4726 | |
---|
3492 | 4727 | return sum; |
---|
| 4728 | +} |
---|
| 4729 | + |
---|
| 4730 | +/* |
---|
| 4731 | + * Consumers of these two interfaces, like for example the cpuidle menu |
---|
| 4732 | + * governor, are using nonsensical data. Preferring shallow idle state selection |
---|
| 4733 | + * for a CPU that has IO-wait which might not even end up running the task when |
---|
| 4734 | + * it does become runnable. |
---|
| 4735 | + */ |
---|
| 4736 | + |
---|
| 4737 | +unsigned long nr_iowait_cpu(int cpu) |
---|
| 4738 | +{ |
---|
| 4739 | + return atomic_read(&cpu_rq(cpu)->nr_iowait); |
---|
3493 | 4740 | } |
---|
3494 | 4741 | |
---|
3495 | 4742 | /* |
---|
.. | .. |
---|
3527 | 4774 | unsigned long i, sum = 0; |
---|
3528 | 4775 | |
---|
3529 | 4776 | for_each_possible_cpu(i) |
---|
3530 | | - sum += atomic_read(&cpu_rq(i)->nr_iowait); |
---|
| 4777 | + sum += nr_iowait_cpu(i); |
---|
3531 | 4778 | |
---|
3532 | 4779 | return sum; |
---|
3533 | | -} |
---|
3534 | | - |
---|
3535 | | -/* |
---|
3536 | | - * Consumers of these two interfaces, like for example the cpufreq menu |
---|
3537 | | - * governor are using nonsensical data. Boosting frequency for a CPU that has |
---|
3538 | | - * IO-wait which might not even end up running the task when it does become |
---|
3539 | | - * runnable. |
---|
3540 | | - */ |
---|
3541 | | - |
---|
3542 | | -unsigned long nr_iowait_cpu(int cpu) |
---|
3543 | | -{ |
---|
3544 | | - struct rq *this = cpu_rq(cpu); |
---|
3545 | | - return atomic_read(&this->nr_iowait); |
---|
3546 | | -} |
---|
3547 | | - |
---|
3548 | | -void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) |
---|
3549 | | -{ |
---|
3550 | | - struct rq *rq = this_rq(); |
---|
3551 | | - *nr_waiters = atomic_read(&rq->nr_iowait); |
---|
3552 | | - *load = rq->load.weight; |
---|
3553 | 4780 | } |
---|
3554 | 4781 | |
---|
3555 | 4782 | #ifdef CONFIG_SMP |
---|
.. | .. |
---|
3563 | 4790 | struct task_struct *p = current; |
---|
3564 | 4791 | unsigned long flags; |
---|
3565 | 4792 | int dest_cpu; |
---|
| 4793 | + bool cond = false; |
---|
| 4794 | + |
---|
| 4795 | + trace_android_rvh_sched_exec(&cond); |
---|
| 4796 | + if (cond) |
---|
| 4797 | + return; |
---|
3566 | 4798 | |
---|
3567 | 4799 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
---|
3568 | | - dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0, 1); |
---|
| 4800 | + dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0); |
---|
3569 | 4801 | if (dest_cpu == smp_processor_id()) |
---|
3570 | 4802 | goto unlock; |
---|
3571 | 4803 | |
---|
.. | .. |
---|
3648 | 4880 | |
---|
3649 | 4881 | return ns; |
---|
3650 | 4882 | } |
---|
| 4883 | +EXPORT_SYMBOL_GPL(task_sched_runtime); |
---|
3651 | 4884 | |
---|
3652 | 4885 | /* |
---|
3653 | 4886 | * This function gets called by the timer code, with HZ frequency. |
---|
.. | .. |
---|
3659 | 4892 | struct rq *rq = cpu_rq(cpu); |
---|
3660 | 4893 | struct task_struct *curr = rq->curr; |
---|
3661 | 4894 | struct rq_flags rf; |
---|
| 4895 | + unsigned long thermal_pressure; |
---|
3662 | 4896 | |
---|
| 4897 | + arch_scale_freq_tick(); |
---|
3663 | 4898 | sched_clock_tick(); |
---|
3664 | 4899 | |
---|
3665 | 4900 | rq_lock(rq, &rf); |
---|
3666 | 4901 | |
---|
| 4902 | + trace_android_rvh_tick_entry(rq); |
---|
3667 | 4903 | update_rq_clock(rq); |
---|
| 4904 | + thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq)); |
---|
| 4905 | + update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure); |
---|
3668 | 4906 | curr->sched_class->task_tick(rq, curr, 0); |
---|
3669 | | - cpu_load_update_active(rq); |
---|
3670 | 4907 | calc_global_load_tick(rq); |
---|
3671 | 4908 | psi_task_tick(rq); |
---|
3672 | 4909 | |
---|
.. | .. |
---|
3678 | 4915 | rq->idle_balance = idle_cpu(cpu); |
---|
3679 | 4916 | trigger_load_balance(rq); |
---|
3680 | 4917 | #endif |
---|
| 4918 | + |
---|
| 4919 | + trace_android_vh_scheduler_tick(rq); |
---|
3681 | 4920 | } |
---|
3682 | 4921 | |
---|
3683 | 4922 | #ifdef CONFIG_NO_HZ_FULL |
---|
.. | .. |
---|
3735 | 4974 | * statistics and checks timeslices in a time-independent way, regardless |
---|
3736 | 4975 | * of when exactly it is running. |
---|
3737 | 4976 | */ |
---|
3738 | | - if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu)) |
---|
| 4977 | + if (!tick_nohz_tick_stopped_cpu(cpu)) |
---|
3739 | 4978 | goto out_requeue; |
---|
3740 | 4979 | |
---|
3741 | 4980 | rq_lock_irq(rq, &rf); |
---|
3742 | 4981 | curr = rq->curr; |
---|
3743 | | - if (is_idle_task(curr) || cpu_is_offline(cpu)) |
---|
| 4982 | + if (cpu_is_offline(cpu)) |
---|
3744 | 4983 | goto out_unlock; |
---|
3745 | 4984 | |
---|
3746 | 4985 | update_rq_clock(rq); |
---|
3747 | | - delta = rq_clock_task(rq) - curr->se.exec_start; |
---|
3748 | 4986 | |
---|
3749 | | - /* |
---|
3750 | | - * Make sure the next tick runs within a reasonable |
---|
3751 | | - * amount of time. |
---|
3752 | | - */ |
---|
3753 | | - WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); |
---|
| 4987 | + if (!is_idle_task(curr)) { |
---|
| 4988 | + /* |
---|
| 4989 | + * Make sure the next tick runs within a reasonable |
---|
| 4990 | + * amount of time. |
---|
| 4991 | + */ |
---|
| 4992 | + delta = rq_clock_task(rq) - curr->se.exec_start; |
---|
| 4993 | + WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); |
---|
| 4994 | + } |
---|
3754 | 4995 | curr->sched_class->task_tick(rq, curr, 0); |
---|
3755 | 4996 | |
---|
| 4997 | + calc_load_nohz_remote(rq); |
---|
3756 | 4998 | out_unlock: |
---|
3757 | 4999 | rq_unlock_irq(rq, &rf); |
---|
3758 | | - |
---|
3759 | 5000 | out_requeue: |
---|
| 5001 | + |
---|
3760 | 5002 | /* |
---|
3761 | 5003 | * Run the remote tick once per second (1Hz). This arbitrary |
---|
3762 | 5004 | * frequency is large enough to avoid overload but short enough |
---|
.. | .. |
---|
3820 | 5062 | static inline void sched_tick_stop(int cpu) { } |
---|
3821 | 5063 | #endif |
---|
3822 | 5064 | |
---|
3823 | | -#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ |
---|
| 5065 | +#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \ |
---|
3824 | 5066 | defined(CONFIG_TRACE_PREEMPT_TOGGLE)) |
---|
3825 | 5067 | /* |
---|
3826 | 5068 | * If the value passed in is equal to the current preempt count |
---|
.. | .. |
---|
3926 | 5168 | if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) |
---|
3927 | 5169 | && in_atomic_preempt_off()) { |
---|
3928 | 5170 | pr_err("Preemption disabled at:"); |
---|
3929 | | - print_ip_sym(preempt_disable_ip); |
---|
3930 | | - pr_cont("\n"); |
---|
| 5171 | + print_ip_sym(KERN_ERR, preempt_disable_ip); |
---|
3931 | 5172 | } |
---|
3932 | 5173 | if (panic_on_warn) |
---|
3933 | 5174 | panic("scheduling while atomic\n"); |
---|
| 5175 | + |
---|
| 5176 | + trace_android_rvh_schedule_bug(prev); |
---|
3934 | 5177 | |
---|
3935 | 5178 | dump_stack(); |
---|
3936 | 5179 | add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
---|
.. | .. |
---|
3939 | 5182 | /* |
---|
3940 | 5183 | * Various schedule()-time debugging checks and statistics: |
---|
3941 | 5184 | */ |
---|
3942 | | -static inline void schedule_debug(struct task_struct *prev) |
---|
| 5185 | +static inline void schedule_debug(struct task_struct *prev, bool preempt) |
---|
3943 | 5186 | { |
---|
3944 | 5187 | #ifdef CONFIG_SCHED_STACK_END_CHECK |
---|
3945 | 5188 | if (task_stack_end_corrupted(prev)) |
---|
3946 | 5189 | panic("corrupted stack end detected inside scheduler\n"); |
---|
| 5190 | + |
---|
| 5191 | + if (task_scs_end_corrupted(prev)) |
---|
| 5192 | + panic("corrupted shadow stack detected inside scheduler\n"); |
---|
| 5193 | +#endif |
---|
| 5194 | + |
---|
| 5195 | +#ifdef CONFIG_DEBUG_ATOMIC_SLEEP |
---|
| 5196 | + if (!preempt && prev->state && prev->non_block_count) { |
---|
| 5197 | + printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n", |
---|
| 5198 | + prev->comm, prev->pid, prev->non_block_count); |
---|
| 5199 | + dump_stack(); |
---|
| 5200 | + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
---|
| 5201 | + } |
---|
3947 | 5202 | #endif |
---|
3948 | 5203 | |
---|
3949 | 5204 | if (unlikely(in_atomic_preempt_off())) { |
---|
.. | .. |
---|
3955 | 5210 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); |
---|
3956 | 5211 | |
---|
3957 | 5212 | schedstat_inc(this_rq()->sched_count); |
---|
| 5213 | +} |
---|
| 5214 | + |
---|
| 5215 | +static void put_prev_task_balance(struct rq *rq, struct task_struct *prev, |
---|
| 5216 | + struct rq_flags *rf) |
---|
| 5217 | +{ |
---|
| 5218 | +#ifdef CONFIG_SMP |
---|
| 5219 | + const struct sched_class *class; |
---|
| 5220 | + /* |
---|
| 5221 | + * We must do the balancing pass before put_prev_task(), such |
---|
| 5222 | + * that when we release the rq->lock the task is in the same |
---|
| 5223 | + * state as before we took rq->lock. |
---|
| 5224 | + * |
---|
| 5225 | + * We can terminate the balance pass as soon as we know there is |
---|
| 5226 | + * a runnable task of @class priority or higher. |
---|
| 5227 | + */ |
---|
| 5228 | + for_class_range(class, prev->sched_class, &idle_sched_class) { |
---|
| 5229 | + if (class->balance(rq, prev, rf)) |
---|
| 5230 | + break; |
---|
| 5231 | + } |
---|
| 5232 | +#endif |
---|
| 5233 | + |
---|
| 5234 | + put_prev_task(rq, prev); |
---|
3958 | 5235 | } |
---|
3959 | 5236 | |
---|
3960 | 5237 | /* |
---|
.. | .. |
---|
3972 | 5249 | * higher scheduling class, because otherwise those loose the |
---|
3973 | 5250 | * opportunity to pull in more work from other CPUs. |
---|
3974 | 5251 | */ |
---|
3975 | | - if (likely((prev->sched_class == &idle_sched_class || |
---|
3976 | | - prev->sched_class == &fair_sched_class) && |
---|
| 5252 | + if (likely(prev->sched_class <= &fair_sched_class && |
---|
3977 | 5253 | rq->nr_running == rq->cfs.h_nr_running)) { |
---|
3978 | 5254 | |
---|
3979 | | - p = fair_sched_class.pick_next_task(rq, prev, rf); |
---|
| 5255 | + p = pick_next_task_fair(rq, prev, rf); |
---|
3980 | 5256 | if (unlikely(p == RETRY_TASK)) |
---|
3981 | | - goto again; |
---|
| 5257 | + goto restart; |
---|
3982 | 5258 | |
---|
3983 | 5259 | /* Assumes fair_sched_class->next == idle_sched_class */ |
---|
3984 | | - if (unlikely(!p)) |
---|
3985 | | - p = idle_sched_class.pick_next_task(rq, prev, rf); |
---|
| 5260 | + if (!p) { |
---|
| 5261 | + put_prev_task(rq, prev); |
---|
| 5262 | + p = pick_next_task_idle(rq); |
---|
| 5263 | + } |
---|
3986 | 5264 | |
---|
3987 | 5265 | return p; |
---|
3988 | 5266 | } |
---|
3989 | 5267 | |
---|
3990 | | -again: |
---|
| 5268 | +restart: |
---|
| 5269 | + put_prev_task_balance(rq, prev, rf); |
---|
| 5270 | + |
---|
3991 | 5271 | for_each_class(class) { |
---|
3992 | | - p = class->pick_next_task(rq, prev, rf); |
---|
3993 | | - if (p) { |
---|
3994 | | - if (unlikely(p == RETRY_TASK)) |
---|
3995 | | - goto again; |
---|
| 5272 | + p = class->pick_next_task(rq); |
---|
| 5273 | + if (p) |
---|
3996 | 5274 | return p; |
---|
3997 | | - } |
---|
3998 | 5275 | } |
---|
3999 | 5276 | |
---|
4000 | 5277 | /* The idle class should always have a runnable task: */ |
---|
.. | .. |
---|
4021 | 5298 | * task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets |
---|
4022 | 5299 | * called on the nearest possible occasion: |
---|
4023 | 5300 | * |
---|
4024 | | - * - If the kernel is preemptible (CONFIG_PREEMPT=y): |
---|
| 5301 | + * - If the kernel is preemptible (CONFIG_PREEMPTION=y): |
---|
4025 | 5302 | * |
---|
4026 | 5303 | * - in syscall or exception context, at the next outmost |
---|
4027 | 5304 | * preempt_enable(). (this might be as soon as the wake_up()'s |
---|
.. | .. |
---|
4030 | 5307 | * - in IRQ context, return from interrupt-handler to |
---|
4031 | 5308 | * preemptible context |
---|
4032 | 5309 | * |
---|
4033 | | - * - If the kernel is not preemptible (CONFIG_PREEMPT is not set) |
---|
| 5310 | + * - If the kernel is not preemptible (CONFIG_PREEMPTION is not set) |
---|
4034 | 5311 | * then at the next: |
---|
4035 | 5312 | * |
---|
4036 | 5313 | * - cond_resched() call |
---|
.. | .. |
---|
4040 | 5317 | * |
---|
4041 | 5318 | * WARNING: must be called with preemption disabled! |
---|
4042 | 5319 | */ |
---|
4043 | | -static void __sched notrace __schedule(bool preempt) |
---|
| 5320 | +static void __sched notrace __schedule(bool preempt, bool spinning_lock) |
---|
4044 | 5321 | { |
---|
4045 | 5322 | struct task_struct *prev, *next; |
---|
4046 | 5323 | unsigned long *switch_count; |
---|
| 5324 | + unsigned long prev_state; |
---|
4047 | 5325 | struct rq_flags rf; |
---|
4048 | 5326 | struct rq *rq; |
---|
4049 | 5327 | int cpu; |
---|
.. | .. |
---|
4052 | 5330 | rq = cpu_rq(cpu); |
---|
4053 | 5331 | prev = rq->curr; |
---|
4054 | 5332 | |
---|
4055 | | - schedule_debug(prev); |
---|
| 5333 | + schedule_debug(prev, preempt); |
---|
4056 | 5334 | |
---|
4057 | 5335 | if (sched_feat(HRTICK)) |
---|
4058 | 5336 | hrtick_clear(rq); |
---|
.. | .. |
---|
4063 | 5341 | /* |
---|
4064 | 5342 | * Make sure that signal_pending_state()->signal_pending() below |
---|
4065 | 5343 | * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) |
---|
4066 | | - * done by the caller to avoid the race with signal_wake_up(). |
---|
| 5344 | + * done by the caller to avoid the race with signal_wake_up(): |
---|
4067 | 5345 | * |
---|
4068 | | - * The membarrier system call requires a full memory barrier |
---|
| 5346 | + * __set_current_state(@state) signal_wake_up() |
---|
| 5347 | + * schedule() set_tsk_thread_flag(p, TIF_SIGPENDING) |
---|
| 5348 | + * wake_up_state(p, state) |
---|
| 5349 | + * LOCK rq->lock LOCK p->pi_state |
---|
| 5350 | + * smp_mb__after_spinlock() smp_mb__after_spinlock() |
---|
| 5351 | + * if (signal_pending_state()) if (p->state & @state) |
---|
| 5352 | + * |
---|
| 5353 | + * Also, the membarrier system call requires a full memory barrier |
---|
4069 | 5354 | * after coming from user-space, before storing to rq->curr. |
---|
4070 | 5355 | */ |
---|
4071 | 5356 | rq_lock(rq, &rf); |
---|
.. | .. |
---|
4076 | 5361 | update_rq_clock(rq); |
---|
4077 | 5362 | |
---|
4078 | 5363 | switch_count = &prev->nivcsw; |
---|
4079 | | - if (!preempt && prev->state) { |
---|
4080 | | - if (unlikely(signal_pending_state(prev->state, prev))) { |
---|
| 5364 | + |
---|
| 5365 | + /* |
---|
| 5366 | + * We must load prev->state once (task_struct::state is volatile), such |
---|
| 5367 | + * that: |
---|
| 5368 | + * |
---|
| 5369 | + * - we form a control dependency vs deactivate_task() below. |
---|
| 5370 | + * - ptrace_{,un}freeze_traced() can change ->state underneath us. |
---|
| 5371 | + */ |
---|
| 5372 | + prev_state = prev->state; |
---|
| 5373 | + if ((!preempt || spinning_lock) && prev_state) { |
---|
| 5374 | + if (signal_pending_state(prev_state, prev)) { |
---|
4081 | 5375 | prev->state = TASK_RUNNING; |
---|
4082 | 5376 | } else { |
---|
| 5377 | + prev->sched_contributes_to_load = |
---|
| 5378 | + (prev_state & TASK_UNINTERRUPTIBLE) && |
---|
| 5379 | + !(prev_state & TASK_NOLOAD) && |
---|
| 5380 | + !(prev->flags & PF_FROZEN); |
---|
| 5381 | + |
---|
| 5382 | + if (prev->sched_contributes_to_load) |
---|
| 5383 | + rq->nr_uninterruptible++; |
---|
| 5384 | + |
---|
| 5385 | + /* |
---|
| 5386 | + * __schedule() ttwu() |
---|
| 5387 | + * prev_state = prev->state; if (p->on_rq && ...) |
---|
| 5388 | + * if (prev_state) goto out; |
---|
| 5389 | + * p->on_rq = 0; smp_acquire__after_ctrl_dep(); |
---|
| 5390 | + * p->state = TASK_WAKING |
---|
| 5391 | + * |
---|
| 5392 | + * Where __schedule() and ttwu() have matching control dependencies. |
---|
| 5393 | + * |
---|
| 5394 | + * After this, schedule() must not care about p->state any more. |
---|
| 5395 | + */ |
---|
4083 | 5396 | deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK); |
---|
4084 | | - prev->on_rq = 0; |
---|
4085 | 5397 | |
---|
4086 | 5398 | if (prev->in_iowait) { |
---|
4087 | 5399 | atomic_inc(&rq->nr_iowait); |
---|
4088 | 5400 | delayacct_blkio_start(); |
---|
4089 | | - } |
---|
4090 | | - |
---|
4091 | | - /* |
---|
4092 | | - * If a worker went to sleep, notify and ask workqueue |
---|
4093 | | - * whether it wants to wake up a task to maintain |
---|
4094 | | - * concurrency. |
---|
4095 | | - */ |
---|
4096 | | - if (prev->flags & PF_WQ_WORKER) { |
---|
4097 | | - struct task_struct *to_wakeup; |
---|
4098 | | - |
---|
4099 | | - to_wakeup = wq_worker_sleeping(prev); |
---|
4100 | | - if (to_wakeup) |
---|
4101 | | - try_to_wake_up_local(to_wakeup, &rf); |
---|
4102 | 5401 | } |
---|
4103 | 5402 | } |
---|
4104 | 5403 | switch_count = &prev->nvcsw; |
---|
.. | .. |
---|
4106 | 5405 | |
---|
4107 | 5406 | next = pick_next_task(rq, prev, &rf); |
---|
4108 | 5407 | clear_tsk_need_resched(prev); |
---|
| 5408 | + clear_tsk_need_resched_lazy(prev); |
---|
4109 | 5409 | clear_preempt_need_resched(); |
---|
4110 | 5410 | |
---|
| 5411 | + trace_android_rvh_schedule(prev, next, rq); |
---|
4111 | 5412 | if (likely(prev != next)) { |
---|
4112 | 5413 | rq->nr_switches++; |
---|
4113 | | - rq->curr = next; |
---|
| 5414 | + /* |
---|
| 5415 | + * RCU users of rcu_dereference(rq->curr) may not see |
---|
| 5416 | + * changes to task_struct made by pick_next_task(). |
---|
| 5417 | + */ |
---|
| 5418 | + RCU_INIT_POINTER(rq->curr, next); |
---|
4114 | 5419 | /* |
---|
4115 | 5420 | * The membarrier system call requires each architecture |
---|
4116 | 5421 | * to have a full memory barrier after updating |
---|
.. | .. |
---|
4127 | 5432 | */ |
---|
4128 | 5433 | ++*switch_count; |
---|
4129 | 5434 | |
---|
| 5435 | + migrate_disable_switch(rq, prev); |
---|
| 5436 | + psi_sched_switch(prev, next, !task_on_rq_queued(prev)); |
---|
| 5437 | + |
---|
4130 | 5438 | trace_sched_switch(preempt, prev, next); |
---|
4131 | 5439 | |
---|
4132 | 5440 | /* Also unlocks the rq: */ |
---|
4133 | 5441 | rq = context_switch(rq, prev, next, &rf); |
---|
4134 | 5442 | } else { |
---|
4135 | 5443 | rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); |
---|
4136 | | - rq_unlock_irq(rq, &rf); |
---|
4137 | | - } |
---|
4138 | 5444 | |
---|
4139 | | - balance_callback(rq); |
---|
| 5445 | + rq_unpin_lock(rq, &rf); |
---|
| 5446 | + __balance_callbacks(rq); |
---|
| 5447 | + raw_spin_unlock_irq(&rq->lock); |
---|
| 5448 | + } |
---|
4140 | 5449 | } |
---|
4141 | 5450 | |
---|
4142 | 5451 | void __noreturn do_task_dead(void) |
---|
.. | .. |
---|
4147 | 5456 | /* Tell freezer to ignore us: */ |
---|
4148 | 5457 | current->flags |= PF_NOFREEZE; |
---|
4149 | 5458 | |
---|
4150 | | - __schedule(false); |
---|
| 5459 | + __schedule(false, false); |
---|
4151 | 5460 | BUG(); |
---|
4152 | 5461 | |
---|
4153 | 5462 | /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ |
---|
.. | .. |
---|
4157 | 5466 | |
---|
4158 | 5467 | static inline void sched_submit_work(struct task_struct *tsk) |
---|
4159 | 5468 | { |
---|
4160 | | - if (!tsk->state || tsk_is_pi_blocked(tsk)) |
---|
| 5469 | + unsigned int task_flags; |
---|
| 5470 | + |
---|
| 5471 | + if (!tsk->state) |
---|
4161 | 5472 | return; |
---|
| 5473 | + |
---|
| 5474 | + task_flags = tsk->flags; |
---|
| 5475 | + /* |
---|
| 5476 | + * If a worker went to sleep, notify and ask workqueue whether |
---|
| 5477 | + * it wants to wake up a task to maintain concurrency. |
---|
| 5478 | + * As this function is called inside the schedule() context, |
---|
| 5479 | + * we disable preemption to avoid it calling schedule() again |
---|
| 5480 | + * in the possible wakeup of a kworker and because wq_worker_sleeping() |
---|
| 5481 | + * requires it. |
---|
| 5482 | + */ |
---|
| 5483 | + if (task_flags & (PF_WQ_WORKER | PF_IO_WORKER)) { |
---|
| 5484 | + preempt_disable(); |
---|
| 5485 | + if (task_flags & PF_WQ_WORKER) |
---|
| 5486 | + wq_worker_sleeping(tsk); |
---|
| 5487 | + else |
---|
| 5488 | + io_wq_worker_sleeping(tsk); |
---|
| 5489 | + preempt_enable_no_resched(); |
---|
| 5490 | + } |
---|
| 5491 | + |
---|
4162 | 5492 | /* |
---|
4163 | 5493 | * If we are going to sleep and we have plugged IO queued, |
---|
4164 | 5494 | * make sure to submit it to avoid deadlocks. |
---|
4165 | 5495 | */ |
---|
4166 | 5496 | if (blk_needs_flush_plug(tsk)) |
---|
4167 | 5497 | blk_schedule_flush_plug(tsk); |
---|
| 5498 | +} |
---|
| 5499 | + |
---|
| 5500 | +static void sched_update_worker(struct task_struct *tsk) |
---|
| 5501 | +{ |
---|
| 5502 | + if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) { |
---|
| 5503 | + if (tsk->flags & PF_WQ_WORKER) |
---|
| 5504 | + wq_worker_running(tsk); |
---|
| 5505 | + else |
---|
| 5506 | + io_wq_worker_running(tsk); |
---|
| 5507 | + } |
---|
4168 | 5508 | } |
---|
4169 | 5509 | |
---|
4170 | 5510 | asmlinkage __visible void __sched schedule(void) |
---|
.. | .. |
---|
4174 | 5514 | sched_submit_work(tsk); |
---|
4175 | 5515 | do { |
---|
4176 | 5516 | preempt_disable(); |
---|
4177 | | - __schedule(false); |
---|
| 5517 | + __schedule(false, false); |
---|
4178 | 5518 | sched_preempt_enable_no_resched(); |
---|
4179 | 5519 | } while (need_resched()); |
---|
| 5520 | + sched_update_worker(tsk); |
---|
4180 | 5521 | } |
---|
4181 | 5522 | EXPORT_SYMBOL(schedule); |
---|
4182 | 5523 | |
---|
.. | .. |
---|
4201 | 5542 | */ |
---|
4202 | 5543 | WARN_ON_ONCE(current->state); |
---|
4203 | 5544 | do { |
---|
4204 | | - __schedule(false); |
---|
| 5545 | + __schedule(false, false); |
---|
4205 | 5546 | } while (need_resched()); |
---|
4206 | 5547 | } |
---|
4207 | 5548 | |
---|
.. | .. |
---|
4254 | 5595 | */ |
---|
4255 | 5596 | preempt_disable_notrace(); |
---|
4256 | 5597 | preempt_latency_start(1); |
---|
4257 | | - __schedule(true); |
---|
| 5598 | + __schedule(true, false); |
---|
4258 | 5599 | preempt_latency_stop(1); |
---|
4259 | 5600 | preempt_enable_no_resched_notrace(); |
---|
4260 | 5601 | |
---|
.. | .. |
---|
4265 | 5606 | } while (need_resched()); |
---|
4266 | 5607 | } |
---|
4267 | 5608 | |
---|
4268 | | -#ifdef CONFIG_PREEMPT |
---|
| 5609 | +#ifdef CONFIG_PREEMPT_LAZY |
---|
4269 | 5610 | /* |
---|
4270 | | - * this is the entry point to schedule() from in-kernel preemption |
---|
4271 | | - * off of preempt_enable. Kernel preemptions off return from interrupt |
---|
4272 | | - * occur there and call schedule directly. |
---|
| 5611 | + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is |
---|
| 5612 | + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as |
---|
| 5613 | + * preempt_lazy_count counter >0. |
---|
| 5614 | + */ |
---|
| 5615 | +static __always_inline int preemptible_lazy(void) |
---|
| 5616 | +{ |
---|
| 5617 | + if (test_thread_flag(TIF_NEED_RESCHED)) |
---|
| 5618 | + return 1; |
---|
| 5619 | + if (current_thread_info()->preempt_lazy_count) |
---|
| 5620 | + return 0; |
---|
| 5621 | + return 1; |
---|
| 5622 | +} |
---|
| 5623 | + |
---|
| 5624 | +#else |
---|
| 5625 | + |
---|
| 5626 | +static inline int preemptible_lazy(void) |
---|
| 5627 | +{ |
---|
| 5628 | + return 1; |
---|
| 5629 | +} |
---|
| 5630 | + |
---|
| 5631 | +#endif |
---|
| 5632 | + |
---|
| 5633 | +#ifdef CONFIG_PREEMPTION |
---|
| 5634 | +/* |
---|
| 5635 | + * This is the entry point to schedule() from in-kernel preemption |
---|
| 5636 | + * off of preempt_enable. |
---|
4273 | 5637 | */ |
---|
4274 | 5638 | asmlinkage __visible void __sched notrace preempt_schedule(void) |
---|
4275 | 5639 | { |
---|
.. | .. |
---|
4279 | 5643 | */ |
---|
4280 | 5644 | if (likely(!preemptible())) |
---|
4281 | 5645 | return; |
---|
4282 | | - |
---|
| 5646 | + if (!preemptible_lazy()) |
---|
| 5647 | + return; |
---|
4283 | 5648 | preempt_schedule_common(); |
---|
4284 | 5649 | } |
---|
4285 | 5650 | NOKPROBE_SYMBOL(preempt_schedule); |
---|
4286 | 5651 | EXPORT_SYMBOL(preempt_schedule); |
---|
| 5652 | + |
---|
| 5653 | +#ifdef CONFIG_PREEMPT_RT |
---|
| 5654 | +void __sched notrace preempt_schedule_lock(void) |
---|
| 5655 | +{ |
---|
| 5656 | + do { |
---|
| 5657 | + preempt_disable(); |
---|
| 5658 | + __schedule(true, true); |
---|
| 5659 | + sched_preempt_enable_no_resched(); |
---|
| 5660 | + } while (need_resched()); |
---|
| 5661 | +} |
---|
| 5662 | +NOKPROBE_SYMBOL(preempt_schedule_lock); |
---|
| 5663 | +EXPORT_SYMBOL(preempt_schedule_lock); |
---|
| 5664 | +#endif |
---|
4287 | 5665 | |
---|
4288 | 5666 | /** |
---|
4289 | 5667 | * preempt_schedule_notrace - preempt_schedule called by tracing |
---|
.. | .. |
---|
4304 | 5682 | enum ctx_state prev_ctx; |
---|
4305 | 5683 | |
---|
4306 | 5684 | if (likely(!preemptible())) |
---|
| 5685 | + return; |
---|
| 5686 | + |
---|
| 5687 | + if (!preemptible_lazy()) |
---|
4307 | 5688 | return; |
---|
4308 | 5689 | |
---|
4309 | 5690 | do { |
---|
.. | .. |
---|
4328 | 5709 | * an infinite recursion. |
---|
4329 | 5710 | */ |
---|
4330 | 5711 | prev_ctx = exception_enter(); |
---|
4331 | | - __schedule(true); |
---|
| 5712 | + __schedule(true, false); |
---|
4332 | 5713 | exception_exit(prev_ctx); |
---|
4333 | 5714 | |
---|
4334 | 5715 | preempt_latency_stop(1); |
---|
.. | .. |
---|
4337 | 5718 | } |
---|
4338 | 5719 | EXPORT_SYMBOL_GPL(preempt_schedule_notrace); |
---|
4339 | 5720 | |
---|
4340 | | -#endif /* CONFIG_PREEMPT */ |
---|
| 5721 | +#endif /* CONFIG_PREEMPTION */ |
---|
4341 | 5722 | |
---|
4342 | 5723 | /* |
---|
4343 | | - * this is the entry point to schedule() from kernel preemption |
---|
| 5724 | + * This is the entry point to schedule() from kernel preemption |
---|
4344 | 5725 | * off of irq context. |
---|
4345 | 5726 | * Note, that this is called and return with irqs disabled. This will |
---|
4346 | 5727 | * protect us against recursive calling from irq. |
---|
.. | .. |
---|
4357 | 5738 | do { |
---|
4358 | 5739 | preempt_disable(); |
---|
4359 | 5740 | local_irq_enable(); |
---|
4360 | | - __schedule(true); |
---|
| 5741 | + __schedule(true, false); |
---|
4361 | 5742 | local_irq_disable(); |
---|
4362 | 5743 | sched_preempt_enable_no_resched(); |
---|
4363 | 5744 | } while (need_resched()); |
---|
.. | .. |
---|
4368 | 5749 | int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags, |
---|
4369 | 5750 | void *key) |
---|
4370 | 5751 | { |
---|
4371 | | - return try_to_wake_up(curr->private, mode, wake_flags, 1); |
---|
| 5752 | + WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~(WF_SYNC | WF_ANDROID_VENDOR)); |
---|
| 5753 | + return try_to_wake_up(curr->private, mode, wake_flags); |
---|
4372 | 5754 | } |
---|
4373 | 5755 | EXPORT_SYMBOL(default_wake_function); |
---|
| 5756 | + |
---|
| 5757 | +static void __setscheduler_prio(struct task_struct *p, int prio) |
---|
| 5758 | +{ |
---|
| 5759 | + if (dl_prio(prio)) |
---|
| 5760 | + p->sched_class = &dl_sched_class; |
---|
| 5761 | + else if (rt_prio(prio)) |
---|
| 5762 | + p->sched_class = &rt_sched_class; |
---|
| 5763 | + else |
---|
| 5764 | + p->sched_class = &fair_sched_class; |
---|
| 5765 | + |
---|
| 5766 | + p->prio = prio; |
---|
| 5767 | +} |
---|
4374 | 5768 | |
---|
4375 | 5769 | #ifdef CONFIG_RT_MUTEXES |
---|
4376 | 5770 | |
---|
.. | .. |
---|
4408 | 5802 | struct rq_flags rf; |
---|
4409 | 5803 | struct rq *rq; |
---|
4410 | 5804 | |
---|
| 5805 | + trace_android_rvh_rtmutex_prepare_setprio(p, pi_task); |
---|
4411 | 5806 | /* XXX used to be waiter->prio, not waiter->task->prio */ |
---|
4412 | 5807 | prio = __rt_effective_prio(pi_task, p->normal_prio); |
---|
4413 | 5808 | |
---|
.. | .. |
---|
4482 | 5877 | if (!dl_prio(p->normal_prio) || |
---|
4483 | 5878 | (pi_task && dl_prio(pi_task->prio) && |
---|
4484 | 5879 | dl_entity_preempt(&pi_task->dl, &p->dl))) { |
---|
4485 | | - p->dl.dl_boosted = 1; |
---|
| 5880 | + p->dl.pi_se = pi_task->dl.pi_se; |
---|
4486 | 5881 | queue_flag |= ENQUEUE_REPLENISH; |
---|
4487 | | - } else |
---|
4488 | | - p->dl.dl_boosted = 0; |
---|
4489 | | - p->sched_class = &dl_sched_class; |
---|
| 5882 | + } else { |
---|
| 5883 | + p->dl.pi_se = &p->dl; |
---|
| 5884 | + } |
---|
4490 | 5885 | } else if (rt_prio(prio)) { |
---|
4491 | 5886 | if (dl_prio(oldprio)) |
---|
4492 | | - p->dl.dl_boosted = 0; |
---|
| 5887 | + p->dl.pi_se = &p->dl; |
---|
4493 | 5888 | if (oldprio < prio) |
---|
4494 | 5889 | queue_flag |= ENQUEUE_HEAD; |
---|
4495 | | - p->sched_class = &rt_sched_class; |
---|
4496 | 5890 | } else { |
---|
4497 | 5891 | if (dl_prio(oldprio)) |
---|
4498 | | - p->dl.dl_boosted = 0; |
---|
| 5892 | + p->dl.pi_se = &p->dl; |
---|
4499 | 5893 | if (rt_prio(oldprio)) |
---|
4500 | 5894 | p->rt.timeout = 0; |
---|
4501 | | - p->sched_class = &fair_sched_class; |
---|
4502 | 5895 | } |
---|
4503 | 5896 | |
---|
4504 | | - p->prio = prio; |
---|
| 5897 | + __setscheduler_prio(p, prio); |
---|
4505 | 5898 | |
---|
4506 | 5899 | if (queued) |
---|
4507 | 5900 | enqueue_task(rq, p, queue_flag); |
---|
4508 | 5901 | if (running) |
---|
4509 | | - set_curr_task(rq, p); |
---|
| 5902 | + set_next_task(rq, p); |
---|
4510 | 5903 | |
---|
4511 | 5904 | check_class_changed(rq, p, prev_class, oldprio); |
---|
4512 | 5905 | out_unlock: |
---|
4513 | 5906 | /* Avoid rq from going away on us: */ |
---|
4514 | 5907 | preempt_disable(); |
---|
4515 | | - __task_rq_unlock(rq, &rf); |
---|
4516 | 5908 | |
---|
4517 | | - balance_callback(rq); |
---|
| 5909 | + rq_unpin_lock(rq, &rf); |
---|
| 5910 | + __balance_callbacks(rq); |
---|
| 5911 | + raw_spin_unlock(&rq->lock); |
---|
| 5912 | + |
---|
4518 | 5913 | preempt_enable(); |
---|
4519 | 5914 | } |
---|
4520 | 5915 | #else |
---|
.. | .. |
---|
4526 | 5921 | |
---|
4527 | 5922 | void set_user_nice(struct task_struct *p, long nice) |
---|
4528 | 5923 | { |
---|
4529 | | - bool queued, running; |
---|
4530 | | - int old_prio, delta; |
---|
| 5924 | + bool queued, running, allowed = false; |
---|
| 5925 | + int old_prio; |
---|
4531 | 5926 | struct rq_flags rf; |
---|
4532 | 5927 | struct rq *rq; |
---|
4533 | 5928 | |
---|
4534 | | - if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) |
---|
| 5929 | + trace_android_rvh_set_user_nice(p, &nice, &allowed); |
---|
| 5930 | + if ((task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) && !allowed) |
---|
4535 | 5931 | return; |
---|
4536 | 5932 | /* |
---|
4537 | 5933 | * We have to be careful, if called from sys_setpriority(), |
---|
.. | .. |
---|
4558 | 5954 | put_prev_task(rq, p); |
---|
4559 | 5955 | |
---|
4560 | 5956 | p->static_prio = NICE_TO_PRIO(nice); |
---|
4561 | | - set_load_weight(p, true); |
---|
| 5957 | + set_load_weight(p); |
---|
4562 | 5958 | old_prio = p->prio; |
---|
4563 | 5959 | p->prio = effective_prio(p); |
---|
4564 | | - delta = p->prio - old_prio; |
---|
4565 | 5960 | |
---|
4566 | | - if (queued) { |
---|
| 5961 | + if (queued) |
---|
4567 | 5962 | enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); |
---|
4568 | | - /* |
---|
4569 | | - * If the task increased its priority or is running and |
---|
4570 | | - * lowered its priority, then reschedule its CPU: |
---|
4571 | | - */ |
---|
4572 | | - if (delta < 0 || (delta > 0 && task_running(rq, p))) |
---|
4573 | | - resched_curr(rq); |
---|
4574 | | - } |
---|
4575 | 5963 | if (running) |
---|
4576 | | - set_curr_task(rq, p); |
---|
| 5964 | + set_next_task(rq, p); |
---|
| 5965 | + |
---|
| 5966 | + /* |
---|
| 5967 | + * If the task increased its priority or is running and |
---|
| 5968 | + * lowered its priority, then reschedule its CPU: |
---|
| 5969 | + */ |
---|
| 5970 | + p->sched_class->prio_changed(rq, p, old_prio); |
---|
| 5971 | + |
---|
4577 | 5972 | out_unlock: |
---|
4578 | 5973 | task_rq_unlock(rq, p, &rf); |
---|
4579 | 5974 | } |
---|
.. | .. |
---|
4658 | 6053 | return 0; |
---|
4659 | 6054 | |
---|
4660 | 6055 | #ifdef CONFIG_SMP |
---|
4661 | | - if (!llist_empty(&rq->wake_list)) |
---|
| 6056 | + if (rq->ttwu_pending) |
---|
4662 | 6057 | return 0; |
---|
4663 | 6058 | #endif |
---|
4664 | 6059 | |
---|
.. | .. |
---|
4681 | 6076 | |
---|
4682 | 6077 | return 1; |
---|
4683 | 6078 | } |
---|
| 6079 | +EXPORT_SYMBOL_GPL(available_idle_cpu); |
---|
4684 | 6080 | |
---|
4685 | 6081 | /** |
---|
4686 | 6082 | * idle_task - return the idle task for a given CPU. |
---|
.. | .. |
---|
4732 | 6128 | */ |
---|
4733 | 6129 | p->rt_priority = attr->sched_priority; |
---|
4734 | 6130 | p->normal_prio = normal_prio(p); |
---|
4735 | | - set_load_weight(p, true); |
---|
4736 | | -} |
---|
4737 | | - |
---|
4738 | | -/* Actually do priority change: must hold pi & rq lock. */ |
---|
4739 | | -static void __setscheduler(struct rq *rq, struct task_struct *p, |
---|
4740 | | - const struct sched_attr *attr, bool keep_boost) |
---|
4741 | | -{ |
---|
4742 | | - /* |
---|
4743 | | - * If params can't change scheduling class changes aren't allowed |
---|
4744 | | - * either. |
---|
4745 | | - */ |
---|
4746 | | - if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS) |
---|
4747 | | - return; |
---|
4748 | | - |
---|
4749 | | - __setscheduler_params(p, attr); |
---|
4750 | | - |
---|
4751 | | - /* |
---|
4752 | | - * Keep a potential priority boosting if called from |
---|
4753 | | - * sched_setscheduler(). |
---|
4754 | | - */ |
---|
4755 | | - p->prio = normal_prio(p); |
---|
4756 | | - if (keep_boost) |
---|
4757 | | - p->prio = rt_effective_prio(p, p->prio); |
---|
4758 | | - |
---|
4759 | | - if (dl_prio(p->prio)) |
---|
4760 | | - p->sched_class = &dl_sched_class; |
---|
4761 | | - else if (rt_prio(p->prio)) |
---|
4762 | | - p->sched_class = &rt_sched_class; |
---|
4763 | | - else |
---|
4764 | | - p->sched_class = &fair_sched_class; |
---|
| 6131 | + set_load_weight(p); |
---|
4765 | 6132 | } |
---|
4766 | 6133 | |
---|
4767 | 6134 | /* |
---|
.. | .. |
---|
4784 | 6151 | const struct sched_attr *attr, |
---|
4785 | 6152 | bool user, bool pi) |
---|
4786 | 6153 | { |
---|
4787 | | - int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : |
---|
4788 | | - MAX_RT_PRIO - 1 - attr->sched_priority; |
---|
4789 | | - int retval, oldprio, oldpolicy = -1, queued, running; |
---|
4790 | | - int new_effective_prio, policy = attr->sched_policy; |
---|
| 6154 | + int oldpolicy = -1, policy = attr->sched_policy; |
---|
| 6155 | + int retval, oldprio, newprio, queued, running; |
---|
4791 | 6156 | const struct sched_class *prev_class; |
---|
| 6157 | + struct callback_head *head; |
---|
4792 | 6158 | struct rq_flags rf; |
---|
4793 | 6159 | int reset_on_fork; |
---|
4794 | 6160 | int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; |
---|
.. | .. |
---|
4860 | 6226 | * Treat SCHED_IDLE as nice 20. Only allow a switch to |
---|
4861 | 6227 | * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. |
---|
4862 | 6228 | */ |
---|
4863 | | - if (idle_policy(p->policy) && !idle_policy(policy)) { |
---|
| 6229 | + if (task_has_idle_policy(p) && !idle_policy(policy)) { |
---|
4864 | 6230 | if (!can_nice(p, task_nice(p))) |
---|
4865 | 6231 | return -EPERM; |
---|
4866 | 6232 | } |
---|
.. | .. |
---|
4871 | 6237 | |
---|
4872 | 6238 | /* Normal users shall not reset the sched_reset_on_fork flag: */ |
---|
4873 | 6239 | if (p->sched_reset_on_fork && !reset_on_fork) |
---|
| 6240 | + return -EPERM; |
---|
| 6241 | + |
---|
| 6242 | + /* Can't change util-clamps */ |
---|
| 6243 | + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) |
---|
4874 | 6244 | return -EPERM; |
---|
4875 | 6245 | } |
---|
4876 | 6246 | |
---|
.. | .. |
---|
4904 | 6274 | * Changing the policy of the stop threads its a very bad idea: |
---|
4905 | 6275 | */ |
---|
4906 | 6276 | if (p == rq->stop) { |
---|
4907 | | - task_rq_unlock(rq, p, &rf); |
---|
4908 | | - return -EINVAL; |
---|
| 6277 | + retval = -EINVAL; |
---|
| 6278 | + goto unlock; |
---|
4909 | 6279 | } |
---|
4910 | 6280 | |
---|
4911 | 6281 | /* |
---|
.. | .. |
---|
4923 | 6293 | goto change; |
---|
4924 | 6294 | |
---|
4925 | 6295 | p->sched_reset_on_fork = reset_on_fork; |
---|
4926 | | - task_rq_unlock(rq, p, &rf); |
---|
4927 | | - return 0; |
---|
| 6296 | + retval = 0; |
---|
| 6297 | + goto unlock; |
---|
4928 | 6298 | } |
---|
4929 | 6299 | change: |
---|
4930 | 6300 | |
---|
.. | .. |
---|
4937 | 6307 | if (rt_bandwidth_enabled() && rt_policy(policy) && |
---|
4938 | 6308 | task_group(p)->rt_bandwidth.rt_runtime == 0 && |
---|
4939 | 6309 | !task_group_is_autogroup(task_group(p))) { |
---|
4940 | | - task_rq_unlock(rq, p, &rf); |
---|
4941 | | - return -EPERM; |
---|
| 6310 | + retval = -EPERM; |
---|
| 6311 | + goto unlock; |
---|
4942 | 6312 | } |
---|
4943 | 6313 | #endif |
---|
4944 | 6314 | #ifdef CONFIG_SMP |
---|
.. | .. |
---|
4951 | 6321 | * the entire root_domain to become SCHED_DEADLINE. We |
---|
4952 | 6322 | * will also fail if there's no bandwidth available. |
---|
4953 | 6323 | */ |
---|
4954 | | - if (!cpumask_subset(span, &p->cpus_allowed) || |
---|
| 6324 | + if (!cpumask_subset(span, p->cpus_ptr) || |
---|
4955 | 6325 | rq->rd->dl_bw.bw == 0) { |
---|
4956 | | - task_rq_unlock(rq, p, &rf); |
---|
4957 | | - return -EPERM; |
---|
| 6326 | + retval = -EPERM; |
---|
| 6327 | + goto unlock; |
---|
4958 | 6328 | } |
---|
4959 | 6329 | } |
---|
4960 | 6330 | #endif |
---|
.. | .. |
---|
4973 | 6343 | * is available. |
---|
4974 | 6344 | */ |
---|
4975 | 6345 | if ((dl_policy(policy) || dl_task(p)) && sched_dl_overflow(p, policy, attr)) { |
---|
4976 | | - task_rq_unlock(rq, p, &rf); |
---|
4977 | | - return -EBUSY; |
---|
| 6346 | + retval = -EBUSY; |
---|
| 6347 | + goto unlock; |
---|
4978 | 6348 | } |
---|
4979 | 6349 | |
---|
4980 | 6350 | p->sched_reset_on_fork = reset_on_fork; |
---|
4981 | 6351 | oldprio = p->prio; |
---|
4982 | 6352 | |
---|
| 6353 | + newprio = __normal_prio(policy, attr->sched_priority, attr->sched_nice); |
---|
4983 | 6354 | if (pi) { |
---|
4984 | 6355 | /* |
---|
4985 | 6356 | * Take priority boosted tasks into account. If the new |
---|
.. | .. |
---|
4988 | 6359 | * the runqueue. This will be done when the task deboost |
---|
4989 | 6360 | * itself. |
---|
4990 | 6361 | */ |
---|
4991 | | - new_effective_prio = rt_effective_prio(p, newprio); |
---|
4992 | | - if (new_effective_prio == oldprio) |
---|
| 6362 | + newprio = rt_effective_prio(p, newprio); |
---|
| 6363 | + if (newprio == oldprio) |
---|
4993 | 6364 | queue_flags &= ~DEQUEUE_MOVE; |
---|
4994 | 6365 | } |
---|
4995 | 6366 | |
---|
.. | .. |
---|
5002 | 6373 | |
---|
5003 | 6374 | prev_class = p->sched_class; |
---|
5004 | 6375 | |
---|
5005 | | - __setscheduler(rq, p, attr, pi); |
---|
| 6376 | + if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { |
---|
| 6377 | + __setscheduler_params(p, attr); |
---|
| 6378 | + __setscheduler_prio(p, newprio); |
---|
| 6379 | + trace_android_rvh_setscheduler(p); |
---|
| 6380 | + } |
---|
5006 | 6381 | __setscheduler_uclamp(p, attr); |
---|
5007 | 6382 | |
---|
5008 | 6383 | if (queued) { |
---|
.. | .. |
---|
5016 | 6391 | enqueue_task(rq, p, queue_flags); |
---|
5017 | 6392 | } |
---|
5018 | 6393 | if (running) |
---|
5019 | | - set_curr_task(rq, p); |
---|
| 6394 | + set_next_task(rq, p); |
---|
5020 | 6395 | |
---|
5021 | 6396 | check_class_changed(rq, p, prev_class, oldprio); |
---|
5022 | 6397 | |
---|
5023 | 6398 | /* Avoid rq from going away on us: */ |
---|
5024 | 6399 | preempt_disable(); |
---|
| 6400 | + head = splice_balance_callbacks(rq); |
---|
5025 | 6401 | task_rq_unlock(rq, p, &rf); |
---|
5026 | 6402 | |
---|
5027 | 6403 | if (pi) |
---|
5028 | 6404 | rt_mutex_adjust_pi(p); |
---|
5029 | 6405 | |
---|
5030 | 6406 | /* Run balance callbacks after we've adjusted the PI chain: */ |
---|
5031 | | - balance_callback(rq); |
---|
| 6407 | + balance_callbacks(rq, head); |
---|
5032 | 6408 | preempt_enable(); |
---|
5033 | 6409 | |
---|
5034 | 6410 | return 0; |
---|
| 6411 | + |
---|
| 6412 | +unlock: |
---|
| 6413 | + task_rq_unlock(rq, p, &rf); |
---|
| 6414 | + return retval; |
---|
5035 | 6415 | } |
---|
5036 | 6416 | |
---|
5037 | 6417 | static int _sched_setscheduler(struct task_struct *p, int policy, |
---|
.. | .. |
---|
5043 | 6423 | .sched_nice = PRIO_TO_NICE(p->static_prio), |
---|
5044 | 6424 | }; |
---|
5045 | 6425 | |
---|
| 6426 | + if (IS_ENABLED(CONFIG_ROCKCHIP_OPTIMIZE_RT_PRIO) && |
---|
| 6427 | + ((policy == SCHED_FIFO) || (policy == SCHED_RR))) { |
---|
| 6428 | + attr.sched_priority /= 2; |
---|
| 6429 | + if (!check) |
---|
| 6430 | + attr.sched_priority += MAX_RT_PRIO / 2; |
---|
| 6431 | + if (!attr.sched_priority) |
---|
| 6432 | + attr.sched_priority = 1; |
---|
| 6433 | + } |
---|
5046 | 6434 | /* Fixup the legacy SCHED_RESET_ON_FORK hack. */ |
---|
5047 | 6435 | if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) { |
---|
5048 | 6436 | attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; |
---|
.. | .. |
---|
5057 | 6445 | * @p: the task in question. |
---|
5058 | 6446 | * @policy: new policy. |
---|
5059 | 6447 | * @param: structure containing the new RT priority. |
---|
| 6448 | + * |
---|
| 6449 | + * Use sched_set_fifo(), read its comment. |
---|
5060 | 6450 | * |
---|
5061 | 6451 | * Return: 0 on success. An error code otherwise. |
---|
5062 | 6452 | * |
---|
.. | .. |
---|
5079 | 6469 | { |
---|
5080 | 6470 | return __sched_setscheduler(p, attr, false, true); |
---|
5081 | 6471 | } |
---|
| 6472 | +EXPORT_SYMBOL_GPL(sched_setattr_nocheck); |
---|
5082 | 6473 | |
---|
5083 | 6474 | /** |
---|
5084 | 6475 | * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. |
---|
.. | .. |
---|
5099 | 6490 | return _sched_setscheduler(p, policy, param, false); |
---|
5100 | 6491 | } |
---|
5101 | 6492 | EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck); |
---|
| 6493 | + |
---|
| 6494 | +/* |
---|
| 6495 | + * SCHED_FIFO is a broken scheduler model; that is, it is fundamentally |
---|
| 6496 | + * incapable of resource management, which is the one thing an OS really should |
---|
| 6497 | + * be doing. |
---|
| 6498 | + * |
---|
| 6499 | + * This is of course the reason it is limited to privileged users only. |
---|
| 6500 | + * |
---|
| 6501 | + * Worse still; it is fundamentally impossible to compose static priority |
---|
| 6502 | + * workloads. You cannot take two correctly working static prio workloads |
---|
| 6503 | + * and smash them together and still expect them to work. |
---|
| 6504 | + * |
---|
| 6505 | + * For this reason 'all' FIFO tasks the kernel creates are basically at: |
---|
| 6506 | + * |
---|
| 6507 | + * MAX_RT_PRIO / 2 |
---|
| 6508 | + * |
---|
| 6509 | + * The administrator _MUST_ configure the system, the kernel simply doesn't |
---|
| 6510 | + * know enough information to make a sensible choice. |
---|
| 6511 | + */ |
---|
| 6512 | +void sched_set_fifo(struct task_struct *p) |
---|
| 6513 | +{ |
---|
| 6514 | + struct sched_param sp = { .sched_priority = MAX_RT_PRIO / 2 }; |
---|
| 6515 | + WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0); |
---|
| 6516 | +} |
---|
| 6517 | +EXPORT_SYMBOL_GPL(sched_set_fifo); |
---|
| 6518 | + |
---|
| 6519 | +/* |
---|
| 6520 | + * For when you don't much care about FIFO, but want to be above SCHED_NORMAL. |
---|
| 6521 | + */ |
---|
| 6522 | +void sched_set_fifo_low(struct task_struct *p) |
---|
| 6523 | +{ |
---|
| 6524 | + struct sched_param sp = { .sched_priority = 1 }; |
---|
| 6525 | + WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0); |
---|
| 6526 | +} |
---|
| 6527 | +EXPORT_SYMBOL_GPL(sched_set_fifo_low); |
---|
| 6528 | + |
---|
| 6529 | +void sched_set_normal(struct task_struct *p, int nice) |
---|
| 6530 | +{ |
---|
| 6531 | + struct sched_attr attr = { |
---|
| 6532 | + .sched_policy = SCHED_NORMAL, |
---|
| 6533 | + .sched_nice = nice, |
---|
| 6534 | + }; |
---|
| 6535 | + WARN_ON_ONCE(sched_setattr_nocheck(p, &attr) != 0); |
---|
| 6536 | +} |
---|
| 6537 | +EXPORT_SYMBOL_GPL(sched_set_normal); |
---|
5102 | 6538 | |
---|
5103 | 6539 | static int |
---|
5104 | 6540 | do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) |
---|
.. | .. |
---|
5130 | 6566 | u32 size; |
---|
5131 | 6567 | int ret; |
---|
5132 | 6568 | |
---|
5133 | | - if (!access_ok(VERIFY_WRITE, uattr, SCHED_ATTR_SIZE_VER0)) |
---|
5134 | | - return -EFAULT; |
---|
5135 | | - |
---|
5136 | 6569 | /* Zero the full structure, so that a short copy will be nice: */ |
---|
5137 | 6570 | memset(attr, 0, sizeof(*attr)); |
---|
5138 | 6571 | |
---|
.. | .. |
---|
5140 | 6573 | if (ret) |
---|
5141 | 6574 | return ret; |
---|
5142 | 6575 | |
---|
5143 | | - /* Bail out on silly large: */ |
---|
5144 | | - if (size > PAGE_SIZE) |
---|
5145 | | - goto err_size; |
---|
5146 | | - |
---|
5147 | 6576 | /* ABI compatibility quirk: */ |
---|
5148 | 6577 | if (!size) |
---|
5149 | 6578 | size = SCHED_ATTR_SIZE_VER0; |
---|
5150 | | - |
---|
5151 | | - if (size < SCHED_ATTR_SIZE_VER0) |
---|
| 6579 | + if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE) |
---|
5152 | 6580 | goto err_size; |
---|
5153 | 6581 | |
---|
5154 | | - /* |
---|
5155 | | - * If we're handed a bigger struct than we know of, |
---|
5156 | | - * ensure all the unknown bits are 0 - i.e. new |
---|
5157 | | - * user-space does not rely on any kernel feature |
---|
5158 | | - * extensions we dont know about yet. |
---|
5159 | | - */ |
---|
5160 | | - if (size > sizeof(*attr)) { |
---|
5161 | | - unsigned char __user *addr; |
---|
5162 | | - unsigned char __user *end; |
---|
5163 | | - unsigned char val; |
---|
5164 | | - |
---|
5165 | | - addr = (void __user *)uattr + sizeof(*attr); |
---|
5166 | | - end = (void __user *)uattr + size; |
---|
5167 | | - |
---|
5168 | | - for (; addr < end; addr++) { |
---|
5169 | | - ret = get_user(val, addr); |
---|
5170 | | - if (ret) |
---|
5171 | | - return ret; |
---|
5172 | | - if (val) |
---|
5173 | | - goto err_size; |
---|
5174 | | - } |
---|
5175 | | - size = sizeof(*attr); |
---|
| 6582 | + ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size); |
---|
| 6583 | + if (ret) { |
---|
| 6584 | + if (ret == -E2BIG) |
---|
| 6585 | + goto err_size; |
---|
| 6586 | + return ret; |
---|
5176 | 6587 | } |
---|
5177 | | - |
---|
5178 | | - ret = copy_from_user(attr, uattr, size); |
---|
5179 | | - if (ret) |
---|
5180 | | - return -EFAULT; |
---|
5181 | 6588 | |
---|
5182 | 6589 | if ((attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) && |
---|
5183 | 6590 | size < SCHED_ATTR_SIZE_VER1) |
---|
.. | .. |
---|
5194 | 6601 | err_size: |
---|
5195 | 6602 | put_user(sizeof(*attr), &uattr->size); |
---|
5196 | 6603 | return -E2BIG; |
---|
| 6604 | +} |
---|
| 6605 | + |
---|
| 6606 | +static void get_params(struct task_struct *p, struct sched_attr *attr) |
---|
| 6607 | +{ |
---|
| 6608 | + if (task_has_dl_policy(p)) |
---|
| 6609 | + __getparam_dl(p, attr); |
---|
| 6610 | + else if (task_has_rt_policy(p)) |
---|
| 6611 | + attr->sched_priority = p->rt_priority; |
---|
| 6612 | + else |
---|
| 6613 | + attr->sched_nice = task_nice(p); |
---|
5197 | 6614 | } |
---|
5198 | 6615 | |
---|
5199 | 6616 | /** |
---|
.. | .. |
---|
5257 | 6674 | rcu_read_unlock(); |
---|
5258 | 6675 | |
---|
5259 | 6676 | if (likely(p)) { |
---|
| 6677 | + if (attr.sched_flags & SCHED_FLAG_KEEP_PARAMS) |
---|
| 6678 | + get_params(p, &attr); |
---|
5260 | 6679 | retval = sched_setattr(p, &attr); |
---|
5261 | 6680 | put_task_struct(p); |
---|
5262 | 6681 | } |
---|
.. | .. |
---|
5350 | 6769 | { |
---|
5351 | 6770 | unsigned int ksize = sizeof(*kattr); |
---|
5352 | 6771 | |
---|
5353 | | - if (!access_ok(VERIFY_WRITE, uattr, usize)) |
---|
| 6772 | + if (!access_ok(uattr, usize)) |
---|
5354 | 6773 | return -EFAULT; |
---|
5355 | 6774 | |
---|
5356 | 6775 | /* |
---|
.. | .. |
---|
5378 | 6797 | * sys_sched_getattr - similar to sched_getparam, but with sched_attr |
---|
5379 | 6798 | * @pid: the pid in question. |
---|
5380 | 6799 | * @uattr: structure containing the extended parameters. |
---|
5381 | | - * @usize: sizeof(attr) that user-space knows about, for forwards and backwards compatibility. |
---|
| 6800 | + * @usize: sizeof(attr) for fwd/bwd comp. |
---|
5382 | 6801 | * @flags: for future extension. |
---|
5383 | 6802 | */ |
---|
5384 | 6803 | SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, |
---|
.. | .. |
---|
5405 | 6824 | kattr.sched_policy = p->policy; |
---|
5406 | 6825 | if (p->sched_reset_on_fork) |
---|
5407 | 6826 | kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; |
---|
5408 | | - if (task_has_dl_policy(p)) |
---|
5409 | | - __getparam_dl(p, &kattr); |
---|
5410 | | - else if (task_has_rt_policy(p)) |
---|
5411 | | - kattr.sched_priority = p->rt_priority; |
---|
5412 | | - else |
---|
5413 | | - kattr.sched_nice = task_nice(p); |
---|
| 6827 | + get_params(p, &kattr); |
---|
| 6828 | + kattr.sched_flags &= SCHED_FLAG_ALL; |
---|
5414 | 6829 | |
---|
5415 | 6830 | #ifdef CONFIG_UCLAMP_TASK |
---|
| 6831 | + /* |
---|
| 6832 | + * This could race with another potential updater, but this is fine |
---|
| 6833 | + * because it'll correctly read the old or the new value. We don't need |
---|
| 6834 | + * to guarantee who wins the race as long as it doesn't return garbage. |
---|
| 6835 | + */ |
---|
5416 | 6836 | kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value; |
---|
5417 | 6837 | kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value; |
---|
5418 | 6838 | #endif |
---|
.. | .. |
---|
5431 | 6851 | cpumask_var_t cpus_allowed, new_mask; |
---|
5432 | 6852 | struct task_struct *p; |
---|
5433 | 6853 | int retval; |
---|
| 6854 | + int skip = 0; |
---|
5434 | 6855 | |
---|
5435 | 6856 | rcu_read_lock(); |
---|
5436 | 6857 | |
---|
.. | .. |
---|
5466 | 6887 | rcu_read_unlock(); |
---|
5467 | 6888 | } |
---|
5468 | 6889 | |
---|
| 6890 | + trace_android_vh_sched_setaffinity_early(p, in_mask, &skip); |
---|
| 6891 | + if (skip) |
---|
| 6892 | + goto out_free_new_mask; |
---|
5469 | 6893 | retval = security_task_setscheduler(p); |
---|
5470 | 6894 | if (retval) |
---|
5471 | 6895 | goto out_free_new_mask; |
---|
.. | .. |
---|
5492 | 6916 | } |
---|
5493 | 6917 | #endif |
---|
5494 | 6918 | again: |
---|
5495 | | - retval = __set_cpus_allowed_ptr(p, new_mask, true); |
---|
| 6919 | + retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK); |
---|
5496 | 6920 | |
---|
5497 | 6921 | if (!retval) { |
---|
5498 | 6922 | cpuset_cpus_allowed(p, cpus_allowed); |
---|
.. | .. |
---|
5506 | 6930 | goto again; |
---|
5507 | 6931 | } |
---|
5508 | 6932 | } |
---|
| 6933 | + |
---|
| 6934 | + trace_android_rvh_sched_setaffinity(p, in_mask, &retval); |
---|
| 6935 | + |
---|
5509 | 6936 | out_free_new_mask: |
---|
5510 | 6937 | free_cpumask_var(new_mask); |
---|
5511 | 6938 | out_free_cpus_allowed: |
---|
.. | .. |
---|
5514 | 6941 | put_task_struct(p); |
---|
5515 | 6942 | return retval; |
---|
5516 | 6943 | } |
---|
5517 | | -EXPORT_SYMBOL_GPL(sched_setaffinity); |
---|
5518 | 6944 | |
---|
5519 | 6945 | static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, |
---|
5520 | 6946 | struct cpumask *new_mask) |
---|
.. | .. |
---|
5569 | 6995 | goto out_unlock; |
---|
5570 | 6996 | |
---|
5571 | 6997 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
---|
5572 | | - cpumask_and(mask, &p->cpus_allowed, cpu_active_mask); |
---|
| 6998 | + cpumask_and(mask, &p->cpus_mask, cpu_active_mask); |
---|
5573 | 6999 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
---|
5574 | 7000 | |
---|
5575 | 7001 | out_unlock: |
---|
.. | .. |
---|
5633 | 7059 | schedstat_inc(rq->yld_count); |
---|
5634 | 7060 | current->sched_class->yield_task(rq); |
---|
5635 | 7061 | |
---|
| 7062 | + trace_android_rvh_do_sched_yield(rq); |
---|
| 7063 | + |
---|
5636 | 7064 | preempt_disable(); |
---|
5637 | 7065 | rq_unlock_irq(rq, &rf); |
---|
5638 | 7066 | sched_preempt_enable_no_resched(); |
---|
.. | .. |
---|
5646 | 7074 | return 0; |
---|
5647 | 7075 | } |
---|
5648 | 7076 | |
---|
5649 | | -#ifndef CONFIG_PREEMPT |
---|
| 7077 | +#ifndef CONFIG_PREEMPTION |
---|
5650 | 7078 | int __sched _cond_resched(void) |
---|
5651 | 7079 | { |
---|
5652 | 7080 | if (should_resched(0)) { |
---|
.. | .. |
---|
5663 | 7091 | * __cond_resched_lock() - if a reschedule is pending, drop the given lock, |
---|
5664 | 7092 | * call schedule, and on return reacquire the lock. |
---|
5665 | 7093 | * |
---|
5666 | | - * This works OK both with and without CONFIG_PREEMPT. We do strange low-level |
---|
| 7094 | + * This works OK both with and without CONFIG_PREEMPTION. We do strange low-level |
---|
5667 | 7095 | * operations here to prevent schedule() from being called twice (once via |
---|
5668 | 7096 | * spin_unlock(), once by hand). |
---|
5669 | 7097 | */ |
---|
.. | .. |
---|
5767 | 7195 | if (task_running(p_rq, p) || p->state) |
---|
5768 | 7196 | goto out_unlock; |
---|
5769 | 7197 | |
---|
5770 | | - yielded = curr->sched_class->yield_to_task(rq, p, preempt); |
---|
| 7198 | + yielded = curr->sched_class->yield_to_task(rq, p); |
---|
5771 | 7199 | if (yielded) { |
---|
5772 | 7200 | schedstat_inc(rq->yld_count); |
---|
5773 | 7201 | /* |
---|
.. | .. |
---|
5933 | 7361 | * an error code. |
---|
5934 | 7362 | */ |
---|
5935 | 7363 | SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, |
---|
5936 | | - struct timespec __user *, interval) |
---|
| 7364 | + struct __kernel_timespec __user *, interval) |
---|
5937 | 7365 | { |
---|
5938 | 7366 | struct timespec64 t; |
---|
5939 | 7367 | int retval = sched_rr_get_interval(pid, &t); |
---|
.. | .. |
---|
5944 | 7372 | return retval; |
---|
5945 | 7373 | } |
---|
5946 | 7374 | |
---|
5947 | | -#ifdef CONFIG_COMPAT |
---|
5948 | | -COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval, |
---|
5949 | | - compat_pid_t, pid, |
---|
5950 | | - struct compat_timespec __user *, interval) |
---|
| 7375 | +#ifdef CONFIG_COMPAT_32BIT_TIME |
---|
| 7376 | +SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid, |
---|
| 7377 | + struct old_timespec32 __user *, interval) |
---|
5951 | 7378 | { |
---|
5952 | 7379 | struct timespec64 t; |
---|
5953 | 7380 | int retval = sched_rr_get_interval(pid, &t); |
---|
5954 | 7381 | |
---|
5955 | 7382 | if (retval == 0) |
---|
5956 | | - retval = compat_put_timespec64(&t, interval); |
---|
| 7383 | + retval = put_old_timespec32(&t, interval); |
---|
5957 | 7384 | return retval; |
---|
5958 | 7385 | } |
---|
5959 | 7386 | #endif |
---|
.. | .. |
---|
5966 | 7393 | if (!try_get_task_stack(p)) |
---|
5967 | 7394 | return; |
---|
5968 | 7395 | |
---|
5969 | | - printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p)); |
---|
| 7396 | + pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p)); |
---|
5970 | 7397 | |
---|
5971 | 7398 | if (p->state == TASK_RUNNING) |
---|
5972 | | - printk(KERN_CONT " running task "); |
---|
| 7399 | + pr_cont(" running task "); |
---|
5973 | 7400 | #ifdef CONFIG_DEBUG_STACK_USAGE |
---|
5974 | 7401 | free = stack_not_used(p); |
---|
5975 | 7402 | #endif |
---|
.. | .. |
---|
5978 | 7405 | if (pid_alive(p)) |
---|
5979 | 7406 | ppid = task_pid_nr(rcu_dereference(p->real_parent)); |
---|
5980 | 7407 | rcu_read_unlock(); |
---|
5981 | | - printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, |
---|
5982 | | - task_pid_nr(p), ppid, |
---|
| 7408 | + pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n", |
---|
| 7409 | + free, task_pid_nr(p), ppid, |
---|
5983 | 7410 | (unsigned long)task_thread_info(p)->flags); |
---|
5984 | 7411 | |
---|
5985 | 7412 | print_worker_info(KERN_INFO, p); |
---|
5986 | | - show_stack(p, NULL); |
---|
| 7413 | + trace_android_vh_sched_show_task(p); |
---|
| 7414 | + show_stack(p, NULL, KERN_INFO); |
---|
5987 | 7415 | put_task_stack(p); |
---|
5988 | 7416 | } |
---|
5989 | 7417 | EXPORT_SYMBOL_GPL(sched_show_task); |
---|
.. | .. |
---|
6014 | 7442 | { |
---|
6015 | 7443 | struct task_struct *g, *p; |
---|
6016 | 7444 | |
---|
6017 | | -#if BITS_PER_LONG == 32 |
---|
6018 | | - printk(KERN_INFO |
---|
6019 | | - " task PC stack pid father\n"); |
---|
6020 | | -#else |
---|
6021 | | - printk(KERN_INFO |
---|
6022 | | - " task PC stack pid father\n"); |
---|
6023 | | -#endif |
---|
6024 | 7445 | rcu_read_lock(); |
---|
6025 | 7446 | for_each_process_thread(g, p) { |
---|
6026 | 7447 | /* |
---|
.. | .. |
---|
6056 | 7477 | * NOTE: this function does not set the idle thread's NEED_RESCHED |
---|
6057 | 7478 | * flag, to make booting more robust. |
---|
6058 | 7479 | */ |
---|
6059 | | -void init_idle(struct task_struct *idle, int cpu) |
---|
| 7480 | +void __init init_idle(struct task_struct *idle, int cpu) |
---|
6060 | 7481 | { |
---|
6061 | 7482 | struct rq *rq = cpu_rq(cpu); |
---|
6062 | 7483 | unsigned long flags; |
---|
.. | .. |
---|
6070 | 7491 | idle->se.exec_start = sched_clock(); |
---|
6071 | 7492 | idle->flags |= PF_IDLE; |
---|
6072 | 7493 | |
---|
6073 | | - scs_task_reset(idle); |
---|
6074 | | - kasan_unpoison_task_stack(idle); |
---|
6075 | | - |
---|
6076 | 7494 | #ifdef CONFIG_SMP |
---|
6077 | 7495 | /* |
---|
6078 | 7496 | * Its possible that init_idle() gets called multiple times on a task, |
---|
.. | .. |
---|
6080 | 7498 | * |
---|
6081 | 7499 | * And since this is boot we can forgo the serialization. |
---|
6082 | 7500 | */ |
---|
6083 | | - set_cpus_allowed_common(idle, cpumask_of(cpu)); |
---|
| 7501 | + set_cpus_allowed_common(idle, cpumask_of(cpu), 0); |
---|
6084 | 7502 | #endif |
---|
6085 | 7503 | /* |
---|
6086 | 7504 | * We're having a chicken and egg problem, even though we are |
---|
.. | .. |
---|
6096 | 7514 | __set_task_cpu(idle, cpu); |
---|
6097 | 7515 | rcu_read_unlock(); |
---|
6098 | 7516 | |
---|
6099 | | - rq->curr = rq->idle = idle; |
---|
| 7517 | + rq->idle = idle; |
---|
| 7518 | + rcu_assign_pointer(rq->curr, idle); |
---|
6100 | 7519 | idle->on_rq = TASK_ON_RQ_QUEUED; |
---|
6101 | 7520 | #ifdef CONFIG_SMP |
---|
6102 | 7521 | idle->on_cpu = 1; |
---|
.. | .. |
---|
6106 | 7525 | |
---|
6107 | 7526 | /* Set the preempt count _outside_ the spinlocks! */ |
---|
6108 | 7527 | init_idle_preempt_count(idle, cpu); |
---|
6109 | | - |
---|
| 7528 | +#ifdef CONFIG_HAVE_PREEMPT_LAZY |
---|
| 7529 | + task_thread_info(idle)->preempt_lazy_count = 0; |
---|
| 7530 | +#endif |
---|
6110 | 7531 | /* |
---|
6111 | 7532 | * The idle tasks have their own, simple scheduling class: |
---|
6112 | 7533 | */ |
---|
.. | .. |
---|
6134 | 7555 | } |
---|
6135 | 7556 | |
---|
6136 | 7557 | int task_can_attach(struct task_struct *p, |
---|
6137 | | - const struct cpumask *cs_cpus_allowed) |
---|
| 7558 | + const struct cpumask *cs_effective_cpus) |
---|
6138 | 7559 | { |
---|
6139 | 7560 | int ret = 0; |
---|
6140 | 7561 | |
---|
.. | .. |
---|
6145 | 7566 | * allowed nodes is unnecessary. Thus, cpusets are not |
---|
6146 | 7567 | * applicable for such threads. This prevents checking for |
---|
6147 | 7568 | * success of set_cpus_allowed_ptr() on all attached tasks |
---|
6148 | | - * before cpus_allowed may be changed. |
---|
| 7569 | + * before cpus_mask may be changed. |
---|
6149 | 7570 | */ |
---|
6150 | 7571 | if (p->flags & PF_NO_SETAFFINITY) { |
---|
6151 | 7572 | ret = -EINVAL; |
---|
.. | .. |
---|
6153 | 7574 | } |
---|
6154 | 7575 | |
---|
6155 | 7576 | if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span, |
---|
6156 | | - cs_cpus_allowed)) |
---|
6157 | | - ret = dl_task_can_attach(p, cs_cpus_allowed); |
---|
| 7577 | + cs_effective_cpus)) { |
---|
| 7578 | + int cpu = cpumask_any_and(cpu_active_mask, cs_effective_cpus); |
---|
| 7579 | + |
---|
| 7580 | + if (unlikely(cpu >= nr_cpu_ids)) |
---|
| 7581 | + return -EINVAL; |
---|
| 7582 | + ret = dl_cpu_busy(cpu, p); |
---|
| 7583 | + } |
---|
6158 | 7584 | |
---|
6159 | 7585 | out: |
---|
6160 | 7586 | return ret; |
---|
.. | .. |
---|
6172 | 7598 | if (curr_cpu == target_cpu) |
---|
6173 | 7599 | return 0; |
---|
6174 | 7600 | |
---|
6175 | | - if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed)) |
---|
| 7601 | + if (!cpumask_test_cpu(target_cpu, p->cpus_ptr)) |
---|
6176 | 7602 | return -EINVAL; |
---|
6177 | 7603 | |
---|
6178 | 7604 | /* TODO: This is not properly updating schedstats */ |
---|
.. | .. |
---|
6205 | 7631 | if (queued) |
---|
6206 | 7632 | enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); |
---|
6207 | 7633 | if (running) |
---|
6208 | | - set_curr_task(rq, p); |
---|
| 7634 | + set_next_task(rq, p); |
---|
6209 | 7635 | task_rq_unlock(rq, p, &rf); |
---|
6210 | 7636 | } |
---|
6211 | 7637 | #endif /* CONFIG_NUMA_BALANCING */ |
---|
6212 | 7638 | |
---|
6213 | 7639 | #ifdef CONFIG_HOTPLUG_CPU |
---|
| 7640 | + |
---|
6214 | 7641 | /* |
---|
6215 | 7642 | * Ensure that the idle task is using init_mm right before its CPU goes |
---|
6216 | 7643 | * offline. |
---|
.. | .. |
---|
6230 | 7657 | /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ |
---|
6231 | 7658 | } |
---|
6232 | 7659 | |
---|
6233 | | -/* |
---|
6234 | | - * Since this CPU is going 'away' for a while, fold any nr_active delta |
---|
6235 | | - * we might have. Assumes we're called after migrate_tasks() so that the |
---|
6236 | | - * nr_active count is stable. We need to take the teardown thread which |
---|
6237 | | - * is calling this into account, so we hand in adjust = 1 to the load |
---|
6238 | | - * calculation. |
---|
6239 | | - * |
---|
6240 | | - * Also see the comment "Global load-average calculations". |
---|
6241 | | - */ |
---|
6242 | | -static void calc_load_migrate(struct rq *rq) |
---|
| 7660 | +static int __balance_push_cpu_stop(void *arg) |
---|
6243 | 7661 | { |
---|
6244 | | - long delta = calc_load_fold_active(rq, 1); |
---|
6245 | | - if (delta) |
---|
6246 | | - atomic_long_add(delta, &calc_load_tasks); |
---|
6247 | | -} |
---|
| 7662 | + struct task_struct *p = arg; |
---|
| 7663 | + struct rq *rq = this_rq(); |
---|
| 7664 | + struct rq_flags rf; |
---|
| 7665 | + int cpu; |
---|
6248 | 7666 | |
---|
6249 | | -static void put_prev_task_fake(struct rq *rq, struct task_struct *prev) |
---|
6250 | | -{ |
---|
6251 | | -} |
---|
| 7667 | + raw_spin_lock_irq(&p->pi_lock); |
---|
| 7668 | + rq_lock(rq, &rf); |
---|
6252 | 7669 | |
---|
6253 | | -static const struct sched_class fake_sched_class = { |
---|
6254 | | - .put_prev_task = put_prev_task_fake, |
---|
6255 | | -}; |
---|
6256 | | - |
---|
6257 | | -static struct task_struct fake_task = { |
---|
6258 | | - /* |
---|
6259 | | - * Avoid pull_{rt,dl}_task() |
---|
6260 | | - */ |
---|
6261 | | - .prio = MAX_PRIO + 1, |
---|
6262 | | - .sched_class = &fake_sched_class, |
---|
6263 | | -}; |
---|
6264 | | - |
---|
6265 | | -/* |
---|
6266 | | - * Migrate all tasks from the rq, sleeping tasks will be migrated by |
---|
6267 | | - * try_to_wake_up()->select_task_rq(). |
---|
6268 | | - * |
---|
6269 | | - * Called with rq->lock held even though we'er in stop_machine() and |
---|
6270 | | - * there's no concurrency possible, we hold the required locks anyway |
---|
6271 | | - * because of lock validation efforts. |
---|
6272 | | - */ |
---|
6273 | | -static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) |
---|
6274 | | -{ |
---|
6275 | | - struct rq *rq = dead_rq; |
---|
6276 | | - struct task_struct *next, *stop = rq->stop; |
---|
6277 | | - struct rq_flags orf = *rf; |
---|
6278 | | - int dest_cpu; |
---|
6279 | | - |
---|
6280 | | - /* |
---|
6281 | | - * Fudge the rq selection such that the below task selection loop |
---|
6282 | | - * doesn't get stuck on the currently eligible stop task. |
---|
6283 | | - * |
---|
6284 | | - * We're currently inside stop_machine() and the rq is either stuck |
---|
6285 | | - * in the stop_machine_cpu_stop() loop, or we're executing this code, |
---|
6286 | | - * either way we should never end up calling schedule() until we're |
---|
6287 | | - * done here. |
---|
6288 | | - */ |
---|
6289 | | - rq->stop = NULL; |
---|
6290 | | - |
---|
6291 | | - /* |
---|
6292 | | - * put_prev_task() and pick_next_task() sched |
---|
6293 | | - * class method both need to have an up-to-date |
---|
6294 | | - * value of rq->clock[_task] |
---|
6295 | | - */ |
---|
6296 | 7670 | update_rq_clock(rq); |
---|
6297 | 7671 | |
---|
6298 | | - for (;;) { |
---|
6299 | | - /* |
---|
6300 | | - * There's this thread running, bail when that's the only |
---|
6301 | | - * remaining thread: |
---|
6302 | | - */ |
---|
6303 | | - if (rq->nr_running == 1) |
---|
6304 | | - break; |
---|
6305 | | - |
---|
6306 | | - /* |
---|
6307 | | - * pick_next_task() assumes pinned rq->lock: |
---|
6308 | | - */ |
---|
6309 | | - next = pick_next_task(rq, &fake_task, rf); |
---|
6310 | | - BUG_ON(!next); |
---|
6311 | | - put_prev_task(rq, next); |
---|
6312 | | - |
---|
6313 | | - /* |
---|
6314 | | - * Rules for changing task_struct::cpus_allowed are holding |
---|
6315 | | - * both pi_lock and rq->lock, such that holding either |
---|
6316 | | - * stabilizes the mask. |
---|
6317 | | - * |
---|
6318 | | - * Drop rq->lock is not quite as disastrous as it usually is |
---|
6319 | | - * because !cpu_active at this point, which means load-balance |
---|
6320 | | - * will not interfere. Also, stop-machine. |
---|
6321 | | - */ |
---|
6322 | | - rq_unlock(rq, rf); |
---|
6323 | | - raw_spin_lock(&next->pi_lock); |
---|
6324 | | - rq_relock(rq, rf); |
---|
6325 | | - |
---|
6326 | | - /* |
---|
6327 | | - * Since we're inside stop-machine, _nothing_ should have |
---|
6328 | | - * changed the task, WARN if weird stuff happened, because in |
---|
6329 | | - * that case the above rq->lock drop is a fail too. |
---|
6330 | | - */ |
---|
6331 | | - if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) { |
---|
6332 | | - raw_spin_unlock(&next->pi_lock); |
---|
6333 | | - continue; |
---|
6334 | | - } |
---|
6335 | | - |
---|
6336 | | - /* Find suitable destination for @next, with force if needed. */ |
---|
6337 | | - dest_cpu = select_fallback_rq(dead_rq->cpu, next); |
---|
6338 | | - rq = __migrate_task(rq, rf, next, dest_cpu); |
---|
6339 | | - if (rq != dead_rq) { |
---|
6340 | | - rq_unlock(rq, rf); |
---|
6341 | | - rq = dead_rq; |
---|
6342 | | - *rf = orf; |
---|
6343 | | - rq_relock(rq, rf); |
---|
6344 | | - } |
---|
6345 | | - raw_spin_unlock(&next->pi_lock); |
---|
| 7672 | + if (task_rq(p) == rq && task_on_rq_queued(p)) { |
---|
| 7673 | + cpu = select_fallback_rq(rq->cpu, p); |
---|
| 7674 | + rq = __migrate_task(rq, &rf, p, cpu); |
---|
6346 | 7675 | } |
---|
6347 | 7676 | |
---|
6348 | | - rq->stop = stop; |
---|
| 7677 | + rq_unlock(rq, &rf); |
---|
| 7678 | + raw_spin_unlock_irq(&p->pi_lock); |
---|
| 7679 | + |
---|
| 7680 | + put_task_struct(p); |
---|
| 7681 | + |
---|
| 7682 | + return 0; |
---|
6349 | 7683 | } |
---|
| 7684 | + |
---|
| 7685 | +static DEFINE_PER_CPU(struct cpu_stop_work, push_work); |
---|
| 7686 | + |
---|
| 7687 | +/* |
---|
| 7688 | + * Ensure we only run per-cpu kthreads once the CPU goes !active. |
---|
| 7689 | + */ |
---|
| 7690 | + |
---|
| 7691 | + |
---|
| 7692 | +static void balance_push(struct rq *rq) |
---|
| 7693 | +{ |
---|
| 7694 | + struct task_struct *push_task = rq->curr; |
---|
| 7695 | + |
---|
| 7696 | + lockdep_assert_held(&rq->lock); |
---|
| 7697 | + SCHED_WARN_ON(rq->cpu != smp_processor_id()); |
---|
| 7698 | + |
---|
| 7699 | + /* |
---|
| 7700 | + * Both the cpu-hotplug and stop task are in this case and are |
---|
| 7701 | + * required to complete the hotplug process. |
---|
| 7702 | + */ |
---|
| 7703 | + if (is_per_cpu_kthread(push_task) || is_migration_disabled(push_task)) { |
---|
| 7704 | + /* |
---|
| 7705 | + * If this is the idle task on the outgoing CPU try to wake |
---|
| 7706 | + * up the hotplug control thread which might wait for the |
---|
| 7707 | + * last task to vanish. The rcuwait_active() check is |
---|
| 7708 | + * accurate here because the waiter is pinned on this CPU |
---|
| 7709 | + * and can't obviously be running in parallel. |
---|
| 7710 | + * |
---|
| 7711 | + * On RT kernels this also has to check whether there are |
---|
| 7712 | + * pinned and scheduled out tasks on the runqueue. They |
---|
| 7713 | + * need to leave the migrate disabled section first. |
---|
| 7714 | + */ |
---|
| 7715 | + if (!rq->nr_running && !rq_has_pinned_tasks(rq) && |
---|
| 7716 | + rcuwait_active(&rq->hotplug_wait)) { |
---|
| 7717 | + raw_spin_unlock(&rq->lock); |
---|
| 7718 | + rcuwait_wake_up(&rq->hotplug_wait); |
---|
| 7719 | + raw_spin_lock(&rq->lock); |
---|
| 7720 | + } |
---|
| 7721 | + return; |
---|
| 7722 | + } |
---|
| 7723 | + |
---|
| 7724 | + get_task_struct(push_task); |
---|
| 7725 | + /* |
---|
| 7726 | + * Temporarily drop rq->lock such that we can wake-up the stop task. |
---|
| 7727 | + * Both preemption and IRQs are still disabled. |
---|
| 7728 | + */ |
---|
| 7729 | + raw_spin_unlock(&rq->lock); |
---|
| 7730 | + stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task, |
---|
| 7731 | + this_cpu_ptr(&push_work)); |
---|
| 7732 | + /* |
---|
| 7733 | + * At this point need_resched() is true and we'll take the loop in |
---|
| 7734 | + * schedule(). The next pick is obviously going to be the stop task |
---|
| 7735 | + * which is_per_cpu_kthread() and will push this task away. |
---|
| 7736 | + */ |
---|
| 7737 | + raw_spin_lock(&rq->lock); |
---|
| 7738 | +} |
---|
| 7739 | + |
---|
| 7740 | +static void balance_push_set(int cpu, bool on) |
---|
| 7741 | +{ |
---|
| 7742 | + struct rq *rq = cpu_rq(cpu); |
---|
| 7743 | + struct rq_flags rf; |
---|
| 7744 | + |
---|
| 7745 | + rq_lock_irqsave(rq, &rf); |
---|
| 7746 | + if (on) |
---|
| 7747 | + rq->balance_flags |= BALANCE_PUSH; |
---|
| 7748 | + else |
---|
| 7749 | + rq->balance_flags &= ~BALANCE_PUSH; |
---|
| 7750 | + rq_unlock_irqrestore(rq, &rf); |
---|
| 7751 | +} |
---|
| 7752 | + |
---|
| 7753 | +/* |
---|
| 7754 | + * Invoked from a CPUs hotplug control thread after the CPU has been marked |
---|
| 7755 | + * inactive. All tasks which are not per CPU kernel threads are either |
---|
| 7756 | + * pushed off this CPU now via balance_push() or placed on a different CPU |
---|
| 7757 | + * during wakeup. Wait until the CPU is quiescent. |
---|
| 7758 | + */ |
---|
| 7759 | +static void balance_hotplug_wait(void) |
---|
| 7760 | +{ |
---|
| 7761 | + struct rq *rq = this_rq(); |
---|
| 7762 | + |
---|
| 7763 | + rcuwait_wait_event(&rq->hotplug_wait, |
---|
| 7764 | + rq->nr_running == 1 && !rq_has_pinned_tasks(rq), |
---|
| 7765 | + TASK_UNINTERRUPTIBLE); |
---|
| 7766 | +} |
---|
| 7767 | + |
---|
| 7768 | +static int drain_rq_cpu_stop(void *data) |
---|
| 7769 | +{ |
---|
| 7770 | +#ifndef CONFIG_PREEMPT_RT |
---|
| 7771 | + struct rq *rq = this_rq(); |
---|
| 7772 | + struct rq_flags rf; |
---|
| 7773 | + |
---|
| 7774 | + rq_lock_irqsave(rq, &rf); |
---|
| 7775 | + migrate_tasks(rq, &rf, false); |
---|
| 7776 | + rq_unlock_irqrestore(rq, &rf); |
---|
| 7777 | +#endif |
---|
| 7778 | + return 0; |
---|
| 7779 | +} |
---|
| 7780 | + |
---|
| 7781 | +int sched_cpu_drain_rq(unsigned int cpu) |
---|
| 7782 | +{ |
---|
| 7783 | + struct cpu_stop_work *rq_drain = &(cpu_rq(cpu)->drain); |
---|
| 7784 | + struct cpu_stop_done *rq_drain_done = &(cpu_rq(cpu)->drain_done); |
---|
| 7785 | + |
---|
| 7786 | + if (idle_cpu(cpu)) { |
---|
| 7787 | + rq_drain->done = NULL; |
---|
| 7788 | + return 0; |
---|
| 7789 | + } |
---|
| 7790 | + |
---|
| 7791 | + return stop_one_cpu_async(cpu, drain_rq_cpu_stop, NULL, rq_drain, |
---|
| 7792 | + rq_drain_done); |
---|
| 7793 | +} |
---|
| 7794 | + |
---|
| 7795 | +void sched_cpu_drain_rq_wait(unsigned int cpu) |
---|
| 7796 | +{ |
---|
| 7797 | + struct cpu_stop_work *rq_drain = &(cpu_rq(cpu)->drain); |
---|
| 7798 | + |
---|
| 7799 | + if (rq_drain->done) |
---|
| 7800 | + cpu_stop_work_wait(rq_drain); |
---|
| 7801 | +} |
---|
| 7802 | + |
---|
| 7803 | +#else |
---|
| 7804 | + |
---|
| 7805 | +static inline void balance_push(struct rq *rq) |
---|
| 7806 | +{ |
---|
| 7807 | +} |
---|
| 7808 | + |
---|
| 7809 | +static inline void balance_push_set(int cpu, bool on) |
---|
| 7810 | +{ |
---|
| 7811 | +} |
---|
| 7812 | + |
---|
| 7813 | +static inline void balance_hotplug_wait(void) |
---|
| 7814 | +{ |
---|
| 7815 | +} |
---|
| 7816 | + |
---|
6350 | 7817 | #endif /* CONFIG_HOTPLUG_CPU */ |
---|
6351 | 7818 | |
---|
6352 | 7819 | void set_rq_online(struct rq *rq) |
---|
.. | .. |
---|
6417 | 7884 | static int cpuset_cpu_inactive(unsigned int cpu) |
---|
6418 | 7885 | { |
---|
6419 | 7886 | if (!cpuhp_tasks_frozen) { |
---|
6420 | | - if (dl_cpu_busy(cpu)) |
---|
6421 | | - return -EBUSY; |
---|
| 7887 | + int ret = dl_cpu_busy(cpu, NULL); |
---|
| 7888 | + |
---|
| 7889 | + if (ret) |
---|
| 7890 | + return ret; |
---|
6422 | 7891 | cpuset_update_active_cpus(); |
---|
6423 | 7892 | } else { |
---|
6424 | 7893 | num_cpus_frozen++; |
---|
.. | .. |
---|
6431 | 7900 | { |
---|
6432 | 7901 | struct rq *rq = cpu_rq(cpu); |
---|
6433 | 7902 | struct rq_flags rf; |
---|
| 7903 | + |
---|
| 7904 | + balance_push_set(cpu, false); |
---|
6434 | 7905 | |
---|
6435 | 7906 | #ifdef CONFIG_SCHED_SMT |
---|
6436 | 7907 | /* |
---|
.. | .. |
---|
6467 | 7938 | return 0; |
---|
6468 | 7939 | } |
---|
6469 | 7940 | |
---|
6470 | | -int sched_cpu_deactivate(unsigned int cpu) |
---|
| 7941 | +int sched_cpus_activate(struct cpumask *cpus) |
---|
6471 | 7942 | { |
---|
| 7943 | + unsigned int cpu; |
---|
| 7944 | + |
---|
| 7945 | + for_each_cpu(cpu, cpus) { |
---|
| 7946 | + if (sched_cpu_activate(cpu)) { |
---|
| 7947 | + for_each_cpu_and(cpu, cpus, cpu_active_mask) |
---|
| 7948 | + sched_cpu_deactivate(cpu); |
---|
| 7949 | + |
---|
| 7950 | + return -EBUSY; |
---|
| 7951 | + } |
---|
| 7952 | + } |
---|
| 7953 | + |
---|
| 7954 | + return 0; |
---|
| 7955 | +} |
---|
| 7956 | + |
---|
| 7957 | +int _sched_cpu_deactivate(unsigned int cpu) |
---|
| 7958 | +{ |
---|
| 7959 | + struct rq *rq = cpu_rq(cpu); |
---|
| 7960 | + struct rq_flags rf; |
---|
6472 | 7961 | int ret; |
---|
6473 | 7962 | |
---|
6474 | 7963 | set_cpu_active(cpu, false); |
---|
6475 | | - /* |
---|
6476 | | - * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU |
---|
6477 | | - * users of this state to go away such that all new such users will |
---|
6478 | | - * observe it. |
---|
6479 | | - * |
---|
6480 | | - * Do sync before park smpboot threads to take care the rcu boost case. |
---|
6481 | | - */ |
---|
6482 | | - synchronize_rcu_mult(call_rcu, call_rcu_sched); |
---|
| 7964 | + |
---|
| 7965 | + balance_push_set(cpu, true); |
---|
| 7966 | + |
---|
| 7967 | + rq_lock_irqsave(rq, &rf); |
---|
| 7968 | + if (rq->rd) { |
---|
| 7969 | + update_rq_clock(rq); |
---|
| 7970 | + BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
---|
| 7971 | + set_rq_offline(rq); |
---|
| 7972 | + } |
---|
| 7973 | + rq_unlock_irqrestore(rq, &rf); |
---|
6483 | 7974 | |
---|
6484 | 7975 | #ifdef CONFIG_SCHED_SMT |
---|
6485 | 7976 | /* |
---|
.. | .. |
---|
6494 | 7985 | |
---|
6495 | 7986 | ret = cpuset_cpu_inactive(cpu); |
---|
6496 | 7987 | if (ret) { |
---|
| 7988 | + balance_push_set(cpu, false); |
---|
6497 | 7989 | set_cpu_active(cpu, true); |
---|
6498 | 7990 | return ret; |
---|
6499 | 7991 | } |
---|
6500 | 7992 | sched_domains_numa_masks_clear(cpu); |
---|
| 7993 | + |
---|
| 7994 | + update_max_interval(); |
---|
| 7995 | + |
---|
| 7996 | + return 0; |
---|
| 7997 | +} |
---|
| 7998 | + |
---|
| 7999 | +int sched_cpu_deactivate(unsigned int cpu) |
---|
| 8000 | +{ |
---|
| 8001 | + int ret = _sched_cpu_deactivate(cpu); |
---|
| 8002 | + |
---|
| 8003 | + if (ret) |
---|
| 8004 | + return ret; |
---|
| 8005 | + |
---|
| 8006 | + /* |
---|
| 8007 | + * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU |
---|
| 8008 | + * users of this state to go away such that all new such users will |
---|
| 8009 | + * observe it. |
---|
| 8010 | + * |
---|
| 8011 | + * Do sync before park smpboot threads to take care the rcu boost case. |
---|
| 8012 | + */ |
---|
| 8013 | + synchronize_rcu(); |
---|
| 8014 | + |
---|
| 8015 | + return 0; |
---|
| 8016 | +} |
---|
| 8017 | + |
---|
| 8018 | +int sched_cpus_deactivate_nosync(struct cpumask *cpus) |
---|
| 8019 | +{ |
---|
| 8020 | + unsigned int cpu; |
---|
| 8021 | + |
---|
| 8022 | + for_each_cpu(cpu, cpus) { |
---|
| 8023 | + if (_sched_cpu_deactivate(cpu)) { |
---|
| 8024 | + for_each_cpu(cpu, cpus) { |
---|
| 8025 | + if (!cpu_active(cpu)) |
---|
| 8026 | + sched_cpu_activate(cpu); |
---|
| 8027 | + } |
---|
| 8028 | + |
---|
| 8029 | + return -EBUSY; |
---|
| 8030 | + } |
---|
| 8031 | + } |
---|
| 8032 | + |
---|
6501 | 8033 | return 0; |
---|
6502 | 8034 | } |
---|
6503 | 8035 | |
---|
.. | .. |
---|
6506 | 8038 | struct rq *rq = cpu_rq(cpu); |
---|
6507 | 8039 | |
---|
6508 | 8040 | rq->calc_load_update = calc_load_update; |
---|
6509 | | - update_max_interval(); |
---|
6510 | 8041 | } |
---|
6511 | 8042 | |
---|
6512 | 8043 | int sched_cpu_starting(unsigned int cpu) |
---|
6513 | 8044 | { |
---|
6514 | 8045 | sched_rq_cpu_starting(cpu); |
---|
6515 | 8046 | sched_tick_start(cpu); |
---|
| 8047 | + trace_android_rvh_sched_cpu_starting(cpu); |
---|
6516 | 8048 | return 0; |
---|
6517 | 8049 | } |
---|
6518 | 8050 | |
---|
6519 | 8051 | #ifdef CONFIG_HOTPLUG_CPU |
---|
| 8052 | + |
---|
| 8053 | +/* |
---|
| 8054 | + * Invoked immediately before the stopper thread is invoked to bring the |
---|
| 8055 | + * CPU down completely. At this point all per CPU kthreads except the |
---|
| 8056 | + * hotplug thread (current) and the stopper thread (inactive) have been |
---|
| 8057 | + * either parked or have been unbound from the outgoing CPU. Ensure that |
---|
| 8058 | + * any of those which might be on the way out are gone. |
---|
| 8059 | + * |
---|
| 8060 | + * If after this point a bound task is being woken on this CPU then the |
---|
| 8061 | + * responsible hotplug callback has failed to do it's job. |
---|
| 8062 | + * sched_cpu_dying() will catch it with the appropriate fireworks. |
---|
| 8063 | + */ |
---|
| 8064 | +int sched_cpu_wait_empty(unsigned int cpu) |
---|
| 8065 | +{ |
---|
| 8066 | + balance_hotplug_wait(); |
---|
| 8067 | + return 0; |
---|
| 8068 | +} |
---|
| 8069 | + |
---|
| 8070 | +/* |
---|
| 8071 | + * Since this CPU is going 'away' for a while, fold any nr_active delta we |
---|
| 8072 | + * might have. Called from the CPU stopper task after ensuring that the |
---|
| 8073 | + * stopper is the last running task on the CPU, so nr_active count is |
---|
| 8074 | + * stable. We need to take the teardown thread which is calling this into |
---|
| 8075 | + * account, so we hand in adjust = 1 to the load calculation. |
---|
| 8076 | + * |
---|
| 8077 | + * Also see the comment "Global load-average calculations". |
---|
| 8078 | + */ |
---|
| 8079 | +static void calc_load_migrate(struct rq *rq) |
---|
| 8080 | +{ |
---|
| 8081 | + long delta = calc_load_fold_active(rq, 1); |
---|
| 8082 | + |
---|
| 8083 | + if (delta) |
---|
| 8084 | + atomic_long_add(delta, &calc_load_tasks); |
---|
| 8085 | +} |
---|
| 8086 | + |
---|
6520 | 8087 | int sched_cpu_dying(unsigned int cpu) |
---|
6521 | 8088 | { |
---|
6522 | 8089 | struct rq *rq = cpu_rq(cpu); |
---|
6523 | 8090 | struct rq_flags rf; |
---|
6524 | 8091 | |
---|
6525 | 8092 | /* Handle pending wakeups and then migrate everything off */ |
---|
6526 | | - sched_ttwu_pending(); |
---|
6527 | 8093 | sched_tick_stop(cpu); |
---|
6528 | 8094 | |
---|
6529 | 8095 | rq_lock_irqsave(rq, &rf); |
---|
6530 | | - if (rq->rd) { |
---|
6531 | | - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
---|
6532 | | - set_rq_offline(rq); |
---|
6533 | | - } |
---|
6534 | | - migrate_tasks(rq, &rf); |
---|
6535 | | - BUG_ON(rq->nr_running != 1); |
---|
| 8096 | + BUG_ON(rq->nr_running != 1 || rq_has_pinned_tasks(rq)); |
---|
6536 | 8097 | rq_unlock_irqrestore(rq, &rf); |
---|
6537 | 8098 | |
---|
| 8099 | + trace_android_rvh_sched_cpu_dying(cpu); |
---|
| 8100 | + |
---|
6538 | 8101 | calc_load_migrate(rq); |
---|
6539 | | - update_max_interval(); |
---|
6540 | 8102 | nohz_balance_exit_idle(rq); |
---|
6541 | 8103 | hrtick_clear(rq); |
---|
6542 | 8104 | return 0; |
---|
.. | .. |
---|
6550 | 8112 | /* |
---|
6551 | 8113 | * There's no userspace yet to cause hotplug operations; hence all the |
---|
6552 | 8114 | * CPU masks are stable and all blatant races in the below code cannot |
---|
6553 | | - * happen. The hotplug lock is nevertheless taken to satisfy lockdep, |
---|
6554 | | - * but there won't be any contention on it. |
---|
| 8115 | + * happen. |
---|
6555 | 8116 | */ |
---|
6556 | | - cpus_read_lock(); |
---|
6557 | 8117 | mutex_lock(&sched_domains_mutex); |
---|
6558 | 8118 | sched_init_domains(cpu_active_mask); |
---|
6559 | 8119 | mutex_unlock(&sched_domains_mutex); |
---|
6560 | | - cpus_read_unlock(); |
---|
6561 | 8120 | |
---|
6562 | 8121 | /* Move init over to a non-isolated CPU */ |
---|
6563 | 8122 | if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) |
---|
6564 | 8123 | BUG(); |
---|
| 8124 | + |
---|
6565 | 8125 | sched_init_granularity(); |
---|
6566 | 8126 | |
---|
6567 | 8127 | init_sched_rt_class(); |
---|
.. | .. |
---|
6572 | 8132 | |
---|
6573 | 8133 | static int __init migration_init(void) |
---|
6574 | 8134 | { |
---|
6575 | | - sched_rq_cpu_starting(smp_processor_id()); |
---|
| 8135 | + sched_cpu_starting(smp_processor_id()); |
---|
6576 | 8136 | return 0; |
---|
6577 | 8137 | } |
---|
6578 | 8138 | early_initcall(migration_init); |
---|
.. | .. |
---|
6597 | 8157 | * Every task in system belongs to this group at bootup. |
---|
6598 | 8158 | */ |
---|
6599 | 8159 | struct task_group root_task_group; |
---|
| 8160 | +EXPORT_SYMBOL_GPL(root_task_group); |
---|
6600 | 8161 | LIST_HEAD(task_groups); |
---|
| 8162 | +EXPORT_SYMBOL_GPL(task_groups); |
---|
6601 | 8163 | |
---|
6602 | 8164 | /* Cacheline aligned slab cache for task_group */ |
---|
6603 | 8165 | static struct kmem_cache *task_group_cache __read_mostly; |
---|
.. | .. |
---|
6608 | 8170 | |
---|
6609 | 8171 | void __init sched_init(void) |
---|
6610 | 8172 | { |
---|
6611 | | - int i, j; |
---|
6612 | | - unsigned long alloc_size = 0, ptr; |
---|
| 8173 | + unsigned long ptr = 0; |
---|
| 8174 | + int i; |
---|
| 8175 | + |
---|
| 8176 | + /* Make sure the linker didn't screw up */ |
---|
| 8177 | + BUG_ON(&idle_sched_class + 1 != &fair_sched_class || |
---|
| 8178 | + &fair_sched_class + 1 != &rt_sched_class || |
---|
| 8179 | + &rt_sched_class + 1 != &dl_sched_class); |
---|
| 8180 | +#ifdef CONFIG_SMP |
---|
| 8181 | + BUG_ON(&dl_sched_class + 1 != &stop_sched_class); |
---|
| 8182 | +#endif |
---|
6613 | 8183 | |
---|
6614 | 8184 | wait_bit_init(); |
---|
6615 | 8185 | |
---|
6616 | 8186 | #ifdef CONFIG_FAIR_GROUP_SCHED |
---|
6617 | | - alloc_size += 2 * nr_cpu_ids * sizeof(void **); |
---|
| 8187 | + ptr += 2 * nr_cpu_ids * sizeof(void **); |
---|
6618 | 8188 | #endif |
---|
6619 | 8189 | #ifdef CONFIG_RT_GROUP_SCHED |
---|
6620 | | - alloc_size += 2 * nr_cpu_ids * sizeof(void **); |
---|
| 8190 | + ptr += 2 * nr_cpu_ids * sizeof(void **); |
---|
6621 | 8191 | #endif |
---|
6622 | | - if (alloc_size) { |
---|
6623 | | - ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); |
---|
| 8192 | + if (ptr) { |
---|
| 8193 | + ptr = (unsigned long)kzalloc(ptr, GFP_NOWAIT); |
---|
6624 | 8194 | |
---|
6625 | 8195 | #ifdef CONFIG_FAIR_GROUP_SCHED |
---|
6626 | 8196 | root_task_group.se = (struct sched_entity **)ptr; |
---|
.. | .. |
---|
6629 | 8199 | root_task_group.cfs_rq = (struct cfs_rq **)ptr; |
---|
6630 | 8200 | ptr += nr_cpu_ids * sizeof(void **); |
---|
6631 | 8201 | |
---|
| 8202 | + root_task_group.shares = ROOT_TASK_GROUP_LOAD; |
---|
| 8203 | + init_cfs_bandwidth(&root_task_group.cfs_bandwidth); |
---|
6632 | 8204 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
---|
6633 | 8205 | #ifdef CONFIG_RT_GROUP_SCHED |
---|
6634 | 8206 | root_task_group.rt_se = (struct sched_rt_entity **)ptr; |
---|
.. | .. |
---|
6681 | 8253 | init_rt_rq(&rq->rt); |
---|
6682 | 8254 | init_dl_rq(&rq->dl); |
---|
6683 | 8255 | #ifdef CONFIG_FAIR_GROUP_SCHED |
---|
6684 | | - root_task_group.shares = ROOT_TASK_GROUP_LOAD; |
---|
6685 | 8256 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); |
---|
6686 | 8257 | rq->tmp_alone_branch = &rq->leaf_cfs_rq_list; |
---|
6687 | 8258 | /* |
---|
.. | .. |
---|
6703 | 8274 | * We achieve this by letting root_task_group's tasks sit |
---|
6704 | 8275 | * directly in rq->cfs (i.e root_task_group->se[] = NULL). |
---|
6705 | 8276 | */ |
---|
6706 | | - init_cfs_bandwidth(&root_task_group.cfs_bandwidth); |
---|
6707 | 8277 | init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); |
---|
6708 | 8278 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
---|
6709 | 8279 | |
---|
.. | .. |
---|
6711 | 8281 | #ifdef CONFIG_RT_GROUP_SCHED |
---|
6712 | 8282 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); |
---|
6713 | 8283 | #endif |
---|
6714 | | - |
---|
6715 | | - for (j = 0; j < CPU_LOAD_IDX_MAX; j++) |
---|
6716 | | - rq->cpu_load[j] = 0; |
---|
6717 | | - |
---|
6718 | 8284 | #ifdef CONFIG_SMP |
---|
6719 | 8285 | rq->sd = NULL; |
---|
6720 | 8286 | rq->rd = NULL; |
---|
.. | .. |
---|
6733 | 8299 | |
---|
6734 | 8300 | rq_attach_root(rq, &def_root_domain); |
---|
6735 | 8301 | #ifdef CONFIG_NO_HZ_COMMON |
---|
6736 | | - rq->last_load_update_tick = jiffies; |
---|
6737 | 8302 | rq->last_blocked_load_update_tick = jiffies; |
---|
6738 | 8303 | atomic_set(&rq->nohz_flags, 0); |
---|
| 8304 | + |
---|
| 8305 | + rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func); |
---|
| 8306 | +#endif |
---|
| 8307 | +#ifdef CONFIG_HOTPLUG_CPU |
---|
| 8308 | + rcuwait_init(&rq->hotplug_wait); |
---|
6739 | 8309 | #endif |
---|
6740 | 8310 | #endif /* CONFIG_SMP */ |
---|
6741 | 8311 | hrtick_rq_init(rq); |
---|
6742 | 8312 | atomic_set(&rq->nr_iowait, 0); |
---|
6743 | 8313 | } |
---|
6744 | 8314 | |
---|
6745 | | - set_load_weight(&init_task, false); |
---|
| 8315 | + set_load_weight(&init_task); |
---|
6746 | 8316 | |
---|
6747 | 8317 | /* |
---|
6748 | 8318 | * The boot idle thread does lazy MMU switching as well: |
---|
.. | .. |
---|
6777 | 8347 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP |
---|
6778 | 8348 | static inline int preempt_count_equals(int preempt_offset) |
---|
6779 | 8349 | { |
---|
6780 | | - int nested = preempt_count() + rcu_preempt_depth(); |
---|
| 8350 | + int nested = preempt_count() + sched_rcu_preempt_depth(); |
---|
6781 | 8351 | |
---|
6782 | 8352 | return (nested == preempt_offset); |
---|
6783 | 8353 | } |
---|
.. | .. |
---|
6811 | 8381 | rcu_sleep_check(); |
---|
6812 | 8382 | |
---|
6813 | 8383 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && |
---|
6814 | | - !is_idle_task(current)) || |
---|
| 8384 | + !is_idle_task(current) && !current->non_block_count) || |
---|
6815 | 8385 | system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING || |
---|
6816 | 8386 | oops_in_progress) |
---|
6817 | 8387 | return; |
---|
.. | .. |
---|
6827 | 8397 | "BUG: sleeping function called from invalid context at %s:%d\n", |
---|
6828 | 8398 | file, line); |
---|
6829 | 8399 | printk(KERN_ERR |
---|
6830 | | - "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", |
---|
6831 | | - in_atomic(), irqs_disabled(), |
---|
| 8400 | + "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", |
---|
| 8401 | + in_atomic(), irqs_disabled(), current->non_block_count, |
---|
6832 | 8402 | current->pid, current->comm); |
---|
6833 | 8403 | |
---|
6834 | 8404 | if (task_stack_end_corrupted(current)) |
---|
.. | .. |
---|
6840 | 8410 | if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) |
---|
6841 | 8411 | && !preempt_count_equals(preempt_offset)) { |
---|
6842 | 8412 | pr_err("Preemption disabled at:"); |
---|
6843 | | - print_ip_sym(preempt_disable_ip); |
---|
6844 | | - pr_cont("\n"); |
---|
| 8413 | + print_ip_sym(KERN_ERR, preempt_disable_ip); |
---|
6845 | 8414 | } |
---|
| 8415 | + |
---|
| 8416 | + trace_android_rvh_schedule_bug(NULL); |
---|
| 8417 | + |
---|
6846 | 8418 | dump_stack(); |
---|
6847 | 8419 | add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
---|
6848 | 8420 | } |
---|
6849 | 8421 | EXPORT_SYMBOL(___might_sleep); |
---|
| 8422 | + |
---|
| 8423 | +void __cant_sleep(const char *file, int line, int preempt_offset) |
---|
| 8424 | +{ |
---|
| 8425 | + static unsigned long prev_jiffy; |
---|
| 8426 | + |
---|
| 8427 | + if (irqs_disabled()) |
---|
| 8428 | + return; |
---|
| 8429 | + |
---|
| 8430 | + if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) |
---|
| 8431 | + return; |
---|
| 8432 | + |
---|
| 8433 | + if (preempt_count() > preempt_offset) |
---|
| 8434 | + return; |
---|
| 8435 | + |
---|
| 8436 | + if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) |
---|
| 8437 | + return; |
---|
| 8438 | + prev_jiffy = jiffies; |
---|
| 8439 | + |
---|
| 8440 | + printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line); |
---|
| 8441 | + printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", |
---|
| 8442 | + in_atomic(), irqs_disabled(), |
---|
| 8443 | + current->pid, current->comm); |
---|
| 8444 | + |
---|
| 8445 | + debug_show_held_locks(current); |
---|
| 8446 | + dump_stack(); |
---|
| 8447 | + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
---|
| 8448 | +} |
---|
| 8449 | +EXPORT_SYMBOL_GPL(__cant_sleep); |
---|
| 8450 | + |
---|
| 8451 | +#ifdef CONFIG_SMP |
---|
| 8452 | +void __cant_migrate(const char *file, int line) |
---|
| 8453 | +{ |
---|
| 8454 | + static unsigned long prev_jiffy; |
---|
| 8455 | + |
---|
| 8456 | + if (irqs_disabled()) |
---|
| 8457 | + return; |
---|
| 8458 | + |
---|
| 8459 | + if (is_migration_disabled(current)) |
---|
| 8460 | + return; |
---|
| 8461 | + |
---|
| 8462 | + if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) |
---|
| 8463 | + return; |
---|
| 8464 | + |
---|
| 8465 | + if (preempt_count() > 0) |
---|
| 8466 | + return; |
---|
| 8467 | + |
---|
| 8468 | + if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) |
---|
| 8469 | + return; |
---|
| 8470 | + prev_jiffy = jiffies; |
---|
| 8471 | + |
---|
| 8472 | + pr_err("BUG: assuming non migratable context at %s:%d\n", file, line); |
---|
| 8473 | + pr_err("in_atomic(): %d, irqs_disabled(): %d, migration_disabled() %u pid: %d, name: %s\n", |
---|
| 8474 | + in_atomic(), irqs_disabled(), is_migration_disabled(current), |
---|
| 8475 | + current->pid, current->comm); |
---|
| 8476 | + |
---|
| 8477 | + debug_show_held_locks(current); |
---|
| 8478 | + dump_stack(); |
---|
| 8479 | + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
---|
| 8480 | +} |
---|
| 8481 | +EXPORT_SYMBOL_GPL(__cant_migrate); |
---|
| 8482 | +#endif |
---|
6850 | 8483 | #endif |
---|
6851 | 8484 | |
---|
6852 | 8485 | #ifdef CONFIG_MAGIC_SYSRQ |
---|
.. | .. |
---|
6915 | 8548 | |
---|
6916 | 8549 | #ifdef CONFIG_IA64 |
---|
6917 | 8550 | /** |
---|
6918 | | - * set_curr_task - set the current task for a given CPU. |
---|
| 8551 | + * ia64_set_curr_task - set the current task for a given CPU. |
---|
6919 | 8552 | * @cpu: the processor in question. |
---|
6920 | 8553 | * @p: the task pointer to set. |
---|
6921 | 8554 | * |
---|
.. | .. |
---|
7081 | 8714 | |
---|
7082 | 8715 | if (queued) |
---|
7083 | 8716 | enqueue_task(rq, tsk, queue_flags); |
---|
7084 | | - if (running) |
---|
7085 | | - set_curr_task(rq, tsk); |
---|
| 8717 | + if (running) { |
---|
| 8718 | + set_next_task(rq, tsk); |
---|
| 8719 | + /* |
---|
| 8720 | + * After changing group, the running task may have joined a |
---|
| 8721 | + * throttled one but it's still the running task. Trigger a |
---|
| 8722 | + * resched to make sure that task can still run. |
---|
| 8723 | + */ |
---|
| 8724 | + resched_curr(rq); |
---|
| 8725 | + } |
---|
7086 | 8726 | |
---|
7087 | 8727 | task_rq_unlock(rq, tsk, &rf); |
---|
7088 | 8728 | } |
---|
.. | .. |
---|
7121 | 8761 | |
---|
7122 | 8762 | #ifdef CONFIG_UCLAMP_TASK_GROUP |
---|
7123 | 8763 | /* Propagate the effective uclamp value for the new group */ |
---|
| 8764 | + mutex_lock(&uclamp_mutex); |
---|
| 8765 | + rcu_read_lock(); |
---|
7124 | 8766 | cpu_util_update_eff(css); |
---|
| 8767 | + rcu_read_unlock(); |
---|
| 8768 | + mutex_unlock(&uclamp_mutex); |
---|
7125 | 8769 | #endif |
---|
7126 | 8770 | |
---|
| 8771 | + trace_android_rvh_cpu_cgroup_online(css); |
---|
7127 | 8772 | return 0; |
---|
7128 | 8773 | } |
---|
7129 | 8774 | |
---|
.. | .. |
---|
7189 | 8834 | if (ret) |
---|
7190 | 8835 | break; |
---|
7191 | 8836 | } |
---|
| 8837 | + |
---|
| 8838 | + trace_android_rvh_cpu_cgroup_can_attach(tset, &ret); |
---|
| 8839 | + |
---|
7192 | 8840 | return ret; |
---|
7193 | 8841 | } |
---|
7194 | 8842 | |
---|
.. | .. |
---|
7199 | 8847 | |
---|
7200 | 8848 | cgroup_taskset_for_each(task, css, tset) |
---|
7201 | 8849 | sched_move_task(task); |
---|
| 8850 | + |
---|
| 8851 | + trace_android_rvh_cpu_cgroup_attach(tset); |
---|
7202 | 8852 | } |
---|
7203 | 8853 | |
---|
7204 | 8854 | #ifdef CONFIG_UCLAMP_TASK_GROUP |
---|
.. | .. |
---|
7210 | 8860 | unsigned int eff[UCLAMP_CNT]; |
---|
7211 | 8861 | enum uclamp_id clamp_id; |
---|
7212 | 8862 | unsigned int clamps; |
---|
| 8863 | + |
---|
| 8864 | + lockdep_assert_held(&uclamp_mutex); |
---|
| 8865 | + SCHED_WARN_ON(!rcu_read_lock_held()); |
---|
7213 | 8866 | |
---|
7214 | 8867 | css_for_each_descendant_pre(css, top_css) { |
---|
7215 | 8868 | uc_parent = css_tg(css)->parent |
---|
.. | .. |
---|
7243 | 8896 | } |
---|
7244 | 8897 | |
---|
7245 | 8898 | /* Immediately update descendants RUNNABLE tasks */ |
---|
7246 | | - uclamp_update_active_tasks(css, clamps); |
---|
| 8899 | + uclamp_update_active_tasks(css); |
---|
7247 | 8900 | } |
---|
7248 | 8901 | } |
---|
7249 | 8902 | |
---|
.. | .. |
---|
7300 | 8953 | req = capacity_from_percent(buf); |
---|
7301 | 8954 | if (req.ret) |
---|
7302 | 8955 | return req.ret; |
---|
| 8956 | + |
---|
| 8957 | + static_branch_enable(&sched_uclamp_used); |
---|
7303 | 8958 | |
---|
7304 | 8959 | mutex_lock(&uclamp_mutex); |
---|
7305 | 8960 | rcu_read_lock(); |
---|
.. | .. |
---|
7415 | 9070 | static DEFINE_MUTEX(cfs_constraints_mutex); |
---|
7416 | 9071 | |
---|
7417 | 9072 | const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ |
---|
7418 | | -const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ |
---|
| 9073 | +static const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ |
---|
| 9074 | +/* More than 203 days if BW_SHIFT equals 20. */ |
---|
| 9075 | +static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC; |
---|
7419 | 9076 | |
---|
7420 | 9077 | static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); |
---|
7421 | 9078 | |
---|
.. | .. |
---|
7441 | 9098 | * feasibility. |
---|
7442 | 9099 | */ |
---|
7443 | 9100 | if (period > max_cfs_quota_period) |
---|
| 9101 | + return -EINVAL; |
---|
| 9102 | + |
---|
| 9103 | + /* |
---|
| 9104 | + * Bound quota to defend quota against overflow during bandwidth shift. |
---|
| 9105 | + */ |
---|
| 9106 | + if (quota != RUNTIME_INF && quota > max_cfs_runtime) |
---|
7444 | 9107 | return -EINVAL; |
---|
7445 | 9108 | |
---|
7446 | 9109 | /* |
---|
.. | .. |
---|
7495 | 9158 | return ret; |
---|
7496 | 9159 | } |
---|
7497 | 9160 | |
---|
7498 | | -int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) |
---|
| 9161 | +static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) |
---|
7499 | 9162 | { |
---|
7500 | 9163 | u64 quota, period; |
---|
7501 | 9164 | |
---|
.. | .. |
---|
7510 | 9173 | return tg_set_cfs_bandwidth(tg, period, quota); |
---|
7511 | 9174 | } |
---|
7512 | 9175 | |
---|
7513 | | -long tg_get_cfs_quota(struct task_group *tg) |
---|
| 9176 | +static long tg_get_cfs_quota(struct task_group *tg) |
---|
7514 | 9177 | { |
---|
7515 | 9178 | u64 quota_us; |
---|
7516 | 9179 | |
---|
.. | .. |
---|
7523 | 9186 | return quota_us; |
---|
7524 | 9187 | } |
---|
7525 | 9188 | |
---|
7526 | | -int tg_set_cfs_period(struct task_group *tg, long cfs_period_us) |
---|
| 9189 | +static int tg_set_cfs_period(struct task_group *tg, long cfs_period_us) |
---|
7527 | 9190 | { |
---|
7528 | 9191 | u64 quota, period; |
---|
7529 | 9192 | |
---|
.. | .. |
---|
7536 | 9199 | return tg_set_cfs_bandwidth(tg, period, quota); |
---|
7537 | 9200 | } |
---|
7538 | 9201 | |
---|
7539 | | -long tg_get_cfs_period(struct task_group *tg) |
---|
| 9202 | +static long tg_get_cfs_period(struct task_group *tg) |
---|
7540 | 9203 | { |
---|
7541 | 9204 | u64 cfs_period_us; |
---|
7542 | 9205 | |
---|
.. | .. |
---|
8013 | 9676 | /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, |
---|
8014 | 9677 | }; |
---|
8015 | 9678 | |
---|
8016 | | -#undef CREATE_TRACE_POINTS |
---|
| 9679 | +void call_trace_sched_update_nr_running(struct rq *rq, int count) |
---|
| 9680 | +{ |
---|
| 9681 | + trace_sched_update_nr_running_tp(rq, count); |
---|
| 9682 | +} |
---|