.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * kernel/sched/core.c |
---|
3 | 4 | * |
---|
.. | .. |
---|
5 | 6 | * |
---|
6 | 7 | * Copyright (C) 1991-2002 Linus Torvalds |
---|
7 | 8 | */ |
---|
| 9 | +#define CREATE_TRACE_POINTS |
---|
| 10 | +#include <trace/events/sched.h> |
---|
| 11 | +#undef CREATE_TRACE_POINTS |
---|
| 12 | + |
---|
8 | 13 | #include "sched.h" |
---|
9 | 14 | |
---|
10 | 15 | #include <linux/nospec.h> |
---|
.. | .. |
---|
16 | 21 | #include <asm/tlb.h> |
---|
17 | 22 | |
---|
18 | 23 | #include "../workqueue_internal.h" |
---|
| 24 | +#include "../../io_uring/io-wq.h" |
---|
19 | 25 | #include "../smpboot.h" |
---|
20 | 26 | |
---|
21 | 27 | #include "pelt.h" |
---|
| 28 | +#include "smp.h" |
---|
22 | 29 | |
---|
23 | | -#define CREATE_TRACE_POINTS |
---|
24 | | -#include <trace/events/sched.h> |
---|
| 30 | +#include <trace/hooks/sched.h> |
---|
| 31 | +#include <trace/hooks/dtask.h> |
---|
| 32 | + |
---|
| 33 | +/* |
---|
| 34 | + * Export tracepoints that act as a bare tracehook (ie: have no trace event |
---|
| 35 | + * associated with them) to allow external modules to probe them. |
---|
| 36 | + */ |
---|
| 37 | +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_cfs_tp); |
---|
| 38 | +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp); |
---|
| 39 | +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp); |
---|
| 40 | +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); |
---|
| 41 | +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp); |
---|
| 42 | +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_thermal_tp); |
---|
| 43 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_cpu_capacity_tp); |
---|
| 44 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp); |
---|
| 45 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_cfs_tp); |
---|
| 46 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_se_tp); |
---|
| 47 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp); |
---|
| 48 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_switch); |
---|
| 49 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_waking); |
---|
| 50 | +#ifdef CONFIG_SCHEDSTATS |
---|
| 51 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_sleep); |
---|
| 52 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_wait); |
---|
| 53 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_iowait); |
---|
| 54 | +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_blocked); |
---|
| 55 | +#endif |
---|
25 | 56 | |
---|
26 | 57 | DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
---|
| 58 | +EXPORT_SYMBOL_GPL(runqueues); |
---|
27 | 59 | |
---|
28 | 60 | #ifdef CONFIG_SCHED_DEBUG |
---|
29 | 61 | /* |
---|
.. | .. |
---|
38 | 70 | const_debug unsigned int sysctl_sched_features = |
---|
39 | 71 | #include "features.h" |
---|
40 | 72 | 0; |
---|
| 73 | +EXPORT_SYMBOL_GPL(sysctl_sched_features); |
---|
41 | 74 | #undef SCHED_FEAT |
---|
42 | 75 | #endif |
---|
43 | 76 | |
---|
.. | .. |
---|
45 | 78 | * Number of tasks to iterate in a single balance run. |
---|
46 | 79 | * Limited because this is done with IRQs disabled. |
---|
47 | 80 | */ |
---|
48 | | -#ifdef CONFIG_PREEMPT_RT_FULL |
---|
49 | | -const_debug unsigned int sysctl_sched_nr_migrate = 8; |
---|
50 | | -#else |
---|
51 | 81 | const_debug unsigned int sysctl_sched_nr_migrate = 32; |
---|
52 | | -#endif |
---|
53 | 82 | |
---|
54 | 83 | /* |
---|
55 | 84 | * period over which we measure -rt task CPU usage in us. |
---|
.. | .. |
---|
64 | 93 | * default: 0.95s |
---|
65 | 94 | */ |
---|
66 | 95 | int sysctl_sched_rt_runtime = 950000; |
---|
| 96 | + |
---|
| 97 | + |
---|
| 98 | +/* |
---|
| 99 | + * Serialization rules: |
---|
| 100 | + * |
---|
| 101 | + * Lock order: |
---|
| 102 | + * |
---|
| 103 | + * p->pi_lock |
---|
| 104 | + * rq->lock |
---|
| 105 | + * hrtimer_cpu_base->lock (hrtimer_start() for bandwidth controls) |
---|
| 106 | + * |
---|
| 107 | + * rq1->lock |
---|
| 108 | + * rq2->lock where: rq1 < rq2 |
---|
| 109 | + * |
---|
| 110 | + * Regular state: |
---|
| 111 | + * |
---|
| 112 | + * Normal scheduling state is serialized by rq->lock. __schedule() takes the |
---|
| 113 | + * local CPU's rq->lock, it optionally removes the task from the runqueue and |
---|
| 114 | + * always looks at the local rq data structures to find the most elegible task |
---|
| 115 | + * to run next. |
---|
| 116 | + * |
---|
| 117 | + * Task enqueue is also under rq->lock, possibly taken from another CPU. |
---|
| 118 | + * Wakeups from another LLC domain might use an IPI to transfer the enqueue to |
---|
| 119 | + * the local CPU to avoid bouncing the runqueue state around [ see |
---|
| 120 | + * ttwu_queue_wakelist() ] |
---|
| 121 | + * |
---|
| 122 | + * Task wakeup, specifically wakeups that involve migration, are horribly |
---|
| 123 | + * complicated to avoid having to take two rq->locks. |
---|
| 124 | + * |
---|
| 125 | + * Special state: |
---|
| 126 | + * |
---|
| 127 | + * System-calls and anything external will use task_rq_lock() which acquires |
---|
| 128 | + * both p->pi_lock and rq->lock. As a consequence the state they change is |
---|
| 129 | + * stable while holding either lock: |
---|
| 130 | + * |
---|
| 131 | + * - sched_setaffinity()/ |
---|
| 132 | + * set_cpus_allowed_ptr(): p->cpus_ptr, p->nr_cpus_allowed |
---|
| 133 | + * - set_user_nice(): p->se.load, p->*prio |
---|
| 134 | + * - __sched_setscheduler(): p->sched_class, p->policy, p->*prio, |
---|
| 135 | + * p->se.load, p->rt_priority, |
---|
| 136 | + * p->dl.dl_{runtime, deadline, period, flags, bw, density} |
---|
| 137 | + * - sched_setnuma(): p->numa_preferred_nid |
---|
| 138 | + * - sched_move_task()/ |
---|
| 139 | + * cpu_cgroup_fork(): p->sched_task_group |
---|
| 140 | + * - uclamp_update_active() p->uclamp* |
---|
| 141 | + * |
---|
| 142 | + * p->state <- TASK_*: |
---|
| 143 | + * |
---|
| 144 | + * is changed locklessly using set_current_state(), __set_current_state() or |
---|
| 145 | + * set_special_state(), see their respective comments, or by |
---|
| 146 | + * try_to_wake_up(). This latter uses p->pi_lock to serialize against |
---|
| 147 | + * concurrent self. |
---|
| 148 | + * |
---|
| 149 | + * p->on_rq <- { 0, 1 = TASK_ON_RQ_QUEUED, 2 = TASK_ON_RQ_MIGRATING }: |
---|
| 150 | + * |
---|
| 151 | + * is set by activate_task() and cleared by deactivate_task(), under |
---|
| 152 | + * rq->lock. Non-zero indicates the task is runnable, the special |
---|
| 153 | + * ON_RQ_MIGRATING state is used for migration without holding both |
---|
| 154 | + * rq->locks. It indicates task_cpu() is not stable, see task_rq_lock(). |
---|
| 155 | + * |
---|
| 156 | + * p->on_cpu <- { 0, 1 }: |
---|
| 157 | + * |
---|
| 158 | + * is set by prepare_task() and cleared by finish_task() such that it will be |
---|
| 159 | + * set before p is scheduled-in and cleared after p is scheduled-out, both |
---|
| 160 | + * under rq->lock. Non-zero indicates the task is running on its CPU. |
---|
| 161 | + * |
---|
| 162 | + * [ The astute reader will observe that it is possible for two tasks on one |
---|
| 163 | + * CPU to have ->on_cpu = 1 at the same time. ] |
---|
| 164 | + * |
---|
| 165 | + * task_cpu(p): is changed by set_task_cpu(), the rules are: |
---|
| 166 | + * |
---|
| 167 | + * - Don't call set_task_cpu() on a blocked task: |
---|
| 168 | + * |
---|
| 169 | + * We don't care what CPU we're not running on, this simplifies hotplug, |
---|
| 170 | + * the CPU assignment of blocked tasks isn't required to be valid. |
---|
| 171 | + * |
---|
| 172 | + * - for try_to_wake_up(), called under p->pi_lock: |
---|
| 173 | + * |
---|
| 174 | + * This allows try_to_wake_up() to only take one rq->lock, see its comment. |
---|
| 175 | + * |
---|
| 176 | + * - for migration called under rq->lock: |
---|
| 177 | + * [ see task_on_rq_migrating() in task_rq_lock() ] |
---|
| 178 | + * |
---|
| 179 | + * o move_queued_task() |
---|
| 180 | + * o detach_task() |
---|
| 181 | + * |
---|
| 182 | + * - for migration called under double_rq_lock(): |
---|
| 183 | + * |
---|
| 184 | + * o __migrate_swap_task() |
---|
| 185 | + * o push_rt_task() / pull_rt_task() |
---|
| 186 | + * o push_dl_task() / pull_dl_task() |
---|
| 187 | + * o dl_task_offline_migration() |
---|
| 188 | + * |
---|
| 189 | + */ |
---|
67 | 190 | |
---|
68 | 191 | /* |
---|
69 | 192 | * __task_rq_lock - lock the rq @p resides on. |
---|
.. | .. |
---|
88 | 211 | cpu_relax(); |
---|
89 | 212 | } |
---|
90 | 213 | } |
---|
| 214 | +EXPORT_SYMBOL_GPL(__task_rq_lock); |
---|
91 | 215 | |
---|
92 | 216 | /* |
---|
93 | 217 | * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. |
---|
.. | .. |
---|
130 | 254 | cpu_relax(); |
---|
131 | 255 | } |
---|
132 | 256 | } |
---|
| 257 | +EXPORT_SYMBOL_GPL(task_rq_lock); |
---|
133 | 258 | |
---|
134 | 259 | /* |
---|
135 | 260 | * RQ-clock updating methods: |
---|
.. | .. |
---|
210 | 335 | rq->clock += delta; |
---|
211 | 336 | update_rq_clock_task(rq, delta); |
---|
212 | 337 | } |
---|
| 338 | +EXPORT_SYMBOL_GPL(update_rq_clock); |
---|
213 | 339 | |
---|
| 340 | +static inline void |
---|
| 341 | +rq_csd_init(struct rq *rq, struct __call_single_data *csd, smp_call_func_t func) |
---|
| 342 | +{ |
---|
| 343 | + csd->flags = 0; |
---|
| 344 | + csd->func = func; |
---|
| 345 | + csd->info = rq; |
---|
| 346 | +} |
---|
214 | 347 | |
---|
215 | 348 | #ifdef CONFIG_SCHED_HRTICK |
---|
216 | 349 | /* |
---|
.. | .. |
---|
247 | 380 | static void __hrtick_restart(struct rq *rq) |
---|
248 | 381 | { |
---|
249 | 382 | struct hrtimer *timer = &rq->hrtick_timer; |
---|
| 383 | + ktime_t time = rq->hrtick_time; |
---|
250 | 384 | |
---|
251 | | - hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED); |
---|
| 385 | + hrtimer_start(timer, time, HRTIMER_MODE_ABS_PINNED_HARD); |
---|
252 | 386 | } |
---|
253 | 387 | |
---|
254 | 388 | /* |
---|
.. | .. |
---|
261 | 395 | |
---|
262 | 396 | rq_lock(rq, &rf); |
---|
263 | 397 | __hrtick_restart(rq); |
---|
264 | | - rq->hrtick_csd_pending = 0; |
---|
265 | 398 | rq_unlock(rq, &rf); |
---|
266 | 399 | } |
---|
267 | 400 | |
---|
.. | .. |
---|
273 | 406 | void hrtick_start(struct rq *rq, u64 delay) |
---|
274 | 407 | { |
---|
275 | 408 | struct hrtimer *timer = &rq->hrtick_timer; |
---|
276 | | - ktime_t time; |
---|
277 | 409 | s64 delta; |
---|
278 | 410 | |
---|
279 | 411 | /* |
---|
.. | .. |
---|
281 | 413 | * doesn't make sense and can cause timer DoS. |
---|
282 | 414 | */ |
---|
283 | 415 | delta = max_t(s64, delay, 10000LL); |
---|
284 | | - time = ktime_add_ns(timer->base->get_time(), delta); |
---|
| 416 | + rq->hrtick_time = ktime_add_ns(timer->base->get_time(), delta); |
---|
285 | 417 | |
---|
286 | | - hrtimer_set_expires(timer, time); |
---|
287 | | - |
---|
288 | | - if (rq == this_rq()) { |
---|
| 418 | + if (rq == this_rq()) |
---|
289 | 419 | __hrtick_restart(rq); |
---|
290 | | - } else if (!rq->hrtick_csd_pending) { |
---|
| 420 | + else |
---|
291 | 421 | smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd); |
---|
292 | | - rq->hrtick_csd_pending = 1; |
---|
293 | | - } |
---|
294 | 422 | } |
---|
295 | 423 | |
---|
296 | 424 | #else |
---|
.. | .. |
---|
307 | 435 | */ |
---|
308 | 436 | delay = max_t(u64, delay, 10000LL); |
---|
309 | 437 | hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), |
---|
310 | | - HRTIMER_MODE_REL_PINNED); |
---|
| 438 | + HRTIMER_MODE_REL_PINNED_HARD); |
---|
311 | 439 | } |
---|
| 440 | + |
---|
312 | 441 | #endif /* CONFIG_SMP */ |
---|
313 | 442 | |
---|
314 | 443 | static void hrtick_rq_init(struct rq *rq) |
---|
315 | 444 | { |
---|
316 | 445 | #ifdef CONFIG_SMP |
---|
317 | | - rq->hrtick_csd_pending = 0; |
---|
318 | | - |
---|
319 | | - rq->hrtick_csd.flags = 0; |
---|
320 | | - rq->hrtick_csd.func = __hrtick_start; |
---|
321 | | - rq->hrtick_csd.info = rq; |
---|
| 446 | + rq_csd_init(rq, &rq->hrtick_csd, __hrtick_start); |
---|
322 | 447 | #endif |
---|
323 | | - |
---|
324 | 448 | hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); |
---|
325 | 449 | rq->hrtick_timer.function = hrtick; |
---|
326 | 450 | } |
---|
.. | .. |
---|
403 | 527 | #endif |
---|
404 | 528 | #endif |
---|
405 | 529 | |
---|
406 | | -void __wake_q_add(struct wake_q_head *head, struct task_struct *task, |
---|
407 | | - bool sleeper) |
---|
| 530 | +static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task) |
---|
408 | 531 | { |
---|
409 | | - struct wake_q_node *node; |
---|
410 | | - |
---|
411 | | - if (sleeper) |
---|
412 | | - node = &task->wake_q_sleeper; |
---|
413 | | - else |
---|
414 | | - node = &task->wake_q; |
---|
| 532 | + struct wake_q_node *node = &task->wake_q; |
---|
415 | 533 | |
---|
416 | 534 | /* |
---|
417 | 535 | * Atomically grab the task, if ->wake_q is !nil already it means |
---|
.. | .. |
---|
422 | 540 | * state, even in the failed case, an explicit smp_mb() must be used. |
---|
423 | 541 | */ |
---|
424 | 542 | smp_mb__before_atomic(); |
---|
425 | | - if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)) |
---|
426 | | - return; |
---|
427 | | - |
---|
428 | | - head->count++; |
---|
429 | | - |
---|
430 | | - get_task_struct(task); |
---|
| 543 | + if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))) |
---|
| 544 | + return false; |
---|
431 | 545 | |
---|
432 | 546 | /* |
---|
433 | 547 | * The head is context local, there can be no concurrency. |
---|
434 | 548 | */ |
---|
435 | 549 | *head->lastp = node; |
---|
436 | 550 | head->lastp = &node->next; |
---|
| 551 | + head->count++; |
---|
| 552 | + return true; |
---|
437 | 553 | } |
---|
438 | 554 | |
---|
439 | | -static int |
---|
440 | | -try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags, |
---|
441 | | - int sibling_count_hint); |
---|
442 | | -void __wake_up_q(struct wake_q_head *head, bool sleeper) |
---|
| 555 | +/** |
---|
| 556 | + * wake_q_add() - queue a wakeup for 'later' waking. |
---|
| 557 | + * @head: the wake_q_head to add @task to |
---|
| 558 | + * @task: the task to queue for 'later' wakeup |
---|
| 559 | + * |
---|
| 560 | + * Queue a task for later wakeup, most likely by the wake_up_q() call in the |
---|
| 561 | + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come |
---|
| 562 | + * instantly. |
---|
| 563 | + * |
---|
| 564 | + * This function must be used as-if it were wake_up_process(); IOW the task |
---|
| 565 | + * must be ready to be woken at this location. |
---|
| 566 | + */ |
---|
| 567 | +void wake_q_add(struct wake_q_head *head, struct task_struct *task) |
---|
| 568 | +{ |
---|
| 569 | + if (__wake_q_add(head, task)) |
---|
| 570 | + get_task_struct(task); |
---|
| 571 | +} |
---|
| 572 | + |
---|
| 573 | +/** |
---|
| 574 | + * wake_q_add_safe() - safely queue a wakeup for 'later' waking. |
---|
| 575 | + * @head: the wake_q_head to add @task to |
---|
| 576 | + * @task: the task to queue for 'later' wakeup |
---|
| 577 | + * |
---|
| 578 | + * Queue a task for later wakeup, most likely by the wake_up_q() call in the |
---|
| 579 | + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come |
---|
| 580 | + * instantly. |
---|
| 581 | + * |
---|
| 582 | + * This function must be used as-if it were wake_up_process(); IOW the task |
---|
| 583 | + * must be ready to be woken at this location. |
---|
| 584 | + * |
---|
| 585 | + * This function is essentially a task-safe equivalent to wake_q_add(). Callers |
---|
| 586 | + * that already hold reference to @task can call the 'safe' version and trust |
---|
| 587 | + * wake_q to do the right thing depending whether or not the @task is already |
---|
| 588 | + * queued for wakeup. |
---|
| 589 | + */ |
---|
| 590 | +void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) |
---|
| 591 | +{ |
---|
| 592 | + if (!__wake_q_add(head, task)) |
---|
| 593 | + put_task_struct(task); |
---|
| 594 | +} |
---|
| 595 | + |
---|
| 596 | +void wake_up_q(struct wake_q_head *head) |
---|
443 | 597 | { |
---|
444 | 598 | struct wake_q_node *node = head->first; |
---|
445 | 599 | |
---|
446 | 600 | while (node != WAKE_Q_TAIL) { |
---|
447 | 601 | struct task_struct *task; |
---|
448 | 602 | |
---|
449 | | - if (sleeper) |
---|
450 | | - task = container_of(node, struct task_struct, wake_q_sleeper); |
---|
451 | | - else |
---|
452 | | - task = container_of(node, struct task_struct, wake_q); |
---|
| 603 | + task = container_of(node, struct task_struct, wake_q); |
---|
453 | 604 | BUG_ON(!task); |
---|
454 | 605 | /* Task can safely be re-inserted now: */ |
---|
455 | 606 | node = node->next; |
---|
456 | | - if (sleeper) |
---|
457 | | - task->wake_q_sleeper.next = NULL; |
---|
458 | | - else |
---|
459 | | - task->wake_q.next = NULL; |
---|
| 607 | + task->wake_q.next = NULL; |
---|
| 608 | + task->wake_q_count = head->count; |
---|
| 609 | + |
---|
460 | 610 | /* |
---|
461 | 611 | * wake_up_process() executes a full barrier, which pairs with |
---|
462 | 612 | * the queueing in wake_q_add() so as not to miss wakeups. |
---|
463 | 613 | */ |
---|
464 | | - if (sleeper) |
---|
465 | | - wake_up_lock_sleeper(task); |
---|
466 | | - else |
---|
467 | | - wake_up_process(task); |
---|
468 | | - |
---|
| 614 | + wake_up_process(task); |
---|
| 615 | + task->wake_q_count = 0; |
---|
469 | 616 | put_task_struct(task); |
---|
470 | 617 | } |
---|
471 | 618 | } |
---|
.. | .. |
---|
495 | 642 | return; |
---|
496 | 643 | } |
---|
497 | 644 | |
---|
498 | | -#ifdef CONFIG_PREEMPT |
---|
499 | 645 | if (set_nr_and_not_polling(curr)) |
---|
500 | | -#else |
---|
501 | | - if (set_nr_and_not_polling(curr) && (rq->curr == rq->idle)) |
---|
502 | | -#endif |
---|
503 | 646 | smp_send_reschedule(cpu); |
---|
504 | 647 | else |
---|
505 | 648 | trace_sched_wake_idle_without_ipi(cpu); |
---|
506 | 649 | } |
---|
507 | | - |
---|
508 | | -#ifdef CONFIG_PREEMPT_LAZY |
---|
509 | | - |
---|
510 | | -static int tsk_is_polling(struct task_struct *p) |
---|
511 | | -{ |
---|
512 | | -#ifdef TIF_POLLING_NRFLAG |
---|
513 | | - return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); |
---|
514 | | -#else |
---|
515 | | - return 0; |
---|
516 | | -#endif |
---|
517 | | -} |
---|
518 | | - |
---|
519 | | -void resched_curr_lazy(struct rq *rq) |
---|
520 | | -{ |
---|
521 | | - struct task_struct *curr = rq->curr; |
---|
522 | | - int cpu; |
---|
523 | | - |
---|
524 | | - if (!sched_feat(PREEMPT_LAZY)) { |
---|
525 | | - resched_curr(rq); |
---|
526 | | - return; |
---|
527 | | - } |
---|
528 | | - |
---|
529 | | - lockdep_assert_held(&rq->lock); |
---|
530 | | - |
---|
531 | | - if (test_tsk_need_resched(curr)) |
---|
532 | | - return; |
---|
533 | | - |
---|
534 | | - if (test_tsk_need_resched_lazy(curr)) |
---|
535 | | - return; |
---|
536 | | - |
---|
537 | | - set_tsk_need_resched_lazy(curr); |
---|
538 | | - |
---|
539 | | - cpu = cpu_of(rq); |
---|
540 | | - if (cpu == smp_processor_id()) |
---|
541 | | - return; |
---|
542 | | - |
---|
543 | | - /* NEED_RESCHED_LAZY must be visible before we test polling */ |
---|
544 | | - smp_mb(); |
---|
545 | | - if (!tsk_is_polling(curr)) |
---|
546 | | - smp_send_reschedule(cpu); |
---|
547 | | -} |
---|
548 | | -#endif |
---|
| 650 | +EXPORT_SYMBOL_GPL(resched_curr); |
---|
549 | 651 | |
---|
550 | 652 | void resched_cpu(int cpu) |
---|
551 | 653 | { |
---|
.. | .. |
---|
570 | 672 | */ |
---|
571 | 673 | int get_nohz_timer_target(void) |
---|
572 | 674 | { |
---|
573 | | - int i, cpu = smp_processor_id(); |
---|
| 675 | + int i, cpu = smp_processor_id(), default_cpu = -1; |
---|
574 | 676 | struct sched_domain *sd; |
---|
575 | 677 | |
---|
576 | | - if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER)) |
---|
577 | | - return cpu; |
---|
| 678 | + if (housekeeping_cpu(cpu, HK_FLAG_TIMER) && cpu_active(cpu)) { |
---|
| 679 | + if (!idle_cpu(cpu)) |
---|
| 680 | + return cpu; |
---|
| 681 | + default_cpu = cpu; |
---|
| 682 | + } |
---|
578 | 683 | |
---|
579 | 684 | rcu_read_lock(); |
---|
580 | 685 | for_each_domain(cpu, sd) { |
---|
581 | | - for_each_cpu(i, sched_domain_span(sd)) { |
---|
| 686 | + for_each_cpu_and(i, sched_domain_span(sd), |
---|
| 687 | + housekeeping_cpumask(HK_FLAG_TIMER)) { |
---|
582 | 688 | if (cpu == i) |
---|
583 | 689 | continue; |
---|
584 | 690 | |
---|
585 | | - if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) { |
---|
| 691 | + if (!idle_cpu(i)) { |
---|
586 | 692 | cpu = i; |
---|
587 | 693 | goto unlock; |
---|
588 | 694 | } |
---|
589 | 695 | } |
---|
590 | 696 | } |
---|
591 | 697 | |
---|
592 | | - if (!housekeeping_cpu(cpu, HK_FLAG_TIMER)) |
---|
593 | | - cpu = housekeeping_any_cpu(HK_FLAG_TIMER); |
---|
| 698 | + if (default_cpu == -1) { |
---|
| 699 | + for_each_cpu_and(i, cpu_active_mask, |
---|
| 700 | + housekeeping_cpumask(HK_FLAG_TIMER)) { |
---|
| 701 | + if (cpu == i) |
---|
| 702 | + continue; |
---|
| 703 | + |
---|
| 704 | + if (!idle_cpu(i)) { |
---|
| 705 | + cpu = i; |
---|
| 706 | + goto unlock; |
---|
| 707 | + } |
---|
| 708 | + } |
---|
| 709 | + |
---|
| 710 | + /* no active, not-idle, housekpeeing CPU found. */ |
---|
| 711 | + default_cpu = cpumask_any(cpu_active_mask); |
---|
| 712 | + |
---|
| 713 | + if (unlikely(default_cpu >= nr_cpu_ids)) |
---|
| 714 | + goto unlock; |
---|
| 715 | + } |
---|
| 716 | + |
---|
| 717 | + cpu = default_cpu; |
---|
594 | 718 | unlock: |
---|
595 | 719 | rcu_read_unlock(); |
---|
596 | 720 | return cpu; |
---|
.. | .. |
---|
650 | 774 | wake_up_idle_cpu(cpu); |
---|
651 | 775 | } |
---|
652 | 776 | |
---|
653 | | -static inline bool got_nohz_idle_kick(void) |
---|
| 777 | +static void nohz_csd_func(void *info) |
---|
654 | 778 | { |
---|
655 | | - int cpu = smp_processor_id(); |
---|
656 | | - |
---|
657 | | - if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK)) |
---|
658 | | - return false; |
---|
659 | | - |
---|
660 | | - if (idle_cpu(cpu) && !need_resched()) |
---|
661 | | - return true; |
---|
| 779 | + struct rq *rq = info; |
---|
| 780 | + int cpu = cpu_of(rq); |
---|
| 781 | + unsigned int flags; |
---|
662 | 782 | |
---|
663 | 783 | /* |
---|
664 | | - * We can't run Idle Load Balance on this CPU for this time so we |
---|
665 | | - * cancel it and clear NOHZ_BALANCE_KICK |
---|
| 784 | + * Release the rq::nohz_csd. |
---|
666 | 785 | */ |
---|
667 | | - atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); |
---|
668 | | - return false; |
---|
669 | | -} |
---|
| 786 | + flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); |
---|
| 787 | + WARN_ON(!(flags & NOHZ_KICK_MASK)); |
---|
670 | 788 | |
---|
671 | | -#else /* CONFIG_NO_HZ_COMMON */ |
---|
672 | | - |
---|
673 | | -static inline bool got_nohz_idle_kick(void) |
---|
674 | | -{ |
---|
675 | | - return false; |
---|
| 789 | + rq->idle_balance = idle_cpu(cpu); |
---|
| 790 | + if (rq->idle_balance && !need_resched()) { |
---|
| 791 | + rq->nohz_idle_balance = flags; |
---|
| 792 | + raise_softirq_irqoff(SCHED_SOFTIRQ); |
---|
| 793 | + } |
---|
676 | 794 | } |
---|
677 | 795 | |
---|
678 | 796 | #endif /* CONFIG_NO_HZ_COMMON */ |
---|
.. | .. |
---|
763 | 881 | } |
---|
764 | 882 | #endif |
---|
765 | 883 | |
---|
766 | | -static void set_load_weight(struct task_struct *p, bool update_load) |
---|
| 884 | +static void set_load_weight(struct task_struct *p) |
---|
767 | 885 | { |
---|
| 886 | + bool update_load = !(READ_ONCE(p->state) & TASK_NEW); |
---|
768 | 887 | int prio = p->static_prio - MAX_RT_PRIO; |
---|
769 | 888 | struct load_weight *load = &p->se.load; |
---|
770 | 889 | |
---|
771 | 890 | /* |
---|
772 | 891 | * SCHED_IDLE tasks get minimal weight: |
---|
773 | 892 | */ |
---|
774 | | - if (idle_policy(p->policy)) { |
---|
| 893 | + if (task_has_idle_policy(p)) { |
---|
775 | 894 | load->weight = scale_load(WEIGHT_IDLEPRIO); |
---|
776 | 895 | load->inv_weight = WMULT_IDLEPRIO; |
---|
777 | | - p->se.runnable_weight = load->weight; |
---|
778 | 896 | return; |
---|
779 | 897 | } |
---|
780 | 898 | |
---|
.. | .. |
---|
787 | 905 | } else { |
---|
788 | 906 | load->weight = scale_load(sched_prio_to_weight[prio]); |
---|
789 | 907 | load->inv_weight = sched_prio_to_wmult[prio]; |
---|
790 | | - p->se.runnable_weight = load->weight; |
---|
791 | 908 | } |
---|
792 | 909 | } |
---|
793 | 910 | |
---|
.. | .. |
---|
810 | 927 | /* Max allowed maximum utilization */ |
---|
811 | 928 | unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE; |
---|
812 | 929 | |
---|
| 930 | +/* |
---|
| 931 | + * By default RT tasks run at the maximum performance point/capacity of the |
---|
| 932 | + * system. Uclamp enforces this by always setting UCLAMP_MIN of RT tasks to |
---|
| 933 | + * SCHED_CAPACITY_SCALE. |
---|
| 934 | + * |
---|
| 935 | + * This knob allows admins to change the default behavior when uclamp is being |
---|
| 936 | + * used. In battery powered devices, particularly, running at the maximum |
---|
| 937 | + * capacity and frequency will increase energy consumption and shorten the |
---|
| 938 | + * battery life. |
---|
| 939 | + * |
---|
| 940 | + * This knob only affects RT tasks that their uclamp_se->user_defined == false. |
---|
| 941 | + * |
---|
| 942 | + * This knob will not override the system default sched_util_clamp_min defined |
---|
| 943 | + * above. |
---|
| 944 | + */ |
---|
| 945 | +unsigned int sysctl_sched_uclamp_util_min_rt_default = SCHED_CAPACITY_SCALE; |
---|
| 946 | + |
---|
813 | 947 | /* All clamps are required to be less or equal than these values */ |
---|
814 | 948 | static struct uclamp_se uclamp_default[UCLAMP_CNT]; |
---|
| 949 | + |
---|
| 950 | +/* |
---|
| 951 | + * This static key is used to reduce the uclamp overhead in the fast path. It |
---|
| 952 | + * primarily disables the call to uclamp_rq_{inc, dec}() in |
---|
| 953 | + * enqueue/dequeue_task(). |
---|
| 954 | + * |
---|
| 955 | + * This allows users to continue to enable uclamp in their kernel config with |
---|
| 956 | + * minimum uclamp overhead in the fast path. |
---|
| 957 | + * |
---|
| 958 | + * As soon as userspace modifies any of the uclamp knobs, the static key is |
---|
| 959 | + * enabled, since we have an actual users that make use of uclamp |
---|
| 960 | + * functionality. |
---|
| 961 | + * |
---|
| 962 | + * The knobs that would enable this static key are: |
---|
| 963 | + * |
---|
| 964 | + * * A task modifying its uclamp value with sched_setattr(). |
---|
| 965 | + * * An admin modifying the sysctl_sched_uclamp_{min, max} via procfs. |
---|
| 966 | + * * An admin modifying the cgroup cpu.uclamp.{min, max} |
---|
| 967 | + */ |
---|
| 968 | +DEFINE_STATIC_KEY_FALSE(sched_uclamp_used); |
---|
| 969 | +EXPORT_SYMBOL_GPL(sched_uclamp_used); |
---|
815 | 970 | |
---|
816 | 971 | /* Integer rounded range for each bucket */ |
---|
817 | 972 | #define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS) |
---|
.. | .. |
---|
822 | 977 | static inline unsigned int uclamp_bucket_id(unsigned int clamp_value) |
---|
823 | 978 | { |
---|
824 | 979 | return min_t(unsigned int, clamp_value / UCLAMP_BUCKET_DELTA, UCLAMP_BUCKETS - 1); |
---|
825 | | -} |
---|
826 | | - |
---|
827 | | -static inline unsigned int uclamp_bucket_base_value(unsigned int clamp_value) |
---|
828 | | -{ |
---|
829 | | - return UCLAMP_BUCKET_DELTA * uclamp_bucket_id(clamp_value); |
---|
830 | 980 | } |
---|
831 | 981 | |
---|
832 | 982 | static inline unsigned int uclamp_none(enum uclamp_id clamp_id) |
---|
.. | .. |
---|
868 | 1018 | if (!(rq->uclamp_flags & UCLAMP_FLAG_IDLE)) |
---|
869 | 1019 | return; |
---|
870 | 1020 | |
---|
871 | | - WRITE_ONCE(rq->uclamp[clamp_id].value, clamp_value); |
---|
| 1021 | + uclamp_rq_set(rq, clamp_id, clamp_value); |
---|
872 | 1022 | } |
---|
873 | 1023 | |
---|
874 | 1024 | static inline |
---|
.. | .. |
---|
892 | 1042 | return uclamp_idle_value(rq, clamp_id, clamp_value); |
---|
893 | 1043 | } |
---|
894 | 1044 | |
---|
| 1045 | +static void __uclamp_update_util_min_rt_default(struct task_struct *p) |
---|
| 1046 | +{ |
---|
| 1047 | + unsigned int default_util_min; |
---|
| 1048 | + struct uclamp_se *uc_se; |
---|
| 1049 | + |
---|
| 1050 | + lockdep_assert_held(&p->pi_lock); |
---|
| 1051 | + |
---|
| 1052 | + uc_se = &p->uclamp_req[UCLAMP_MIN]; |
---|
| 1053 | + |
---|
| 1054 | + /* Only sync if user didn't override the default */ |
---|
| 1055 | + if (uc_se->user_defined) |
---|
| 1056 | + return; |
---|
| 1057 | + |
---|
| 1058 | + default_util_min = sysctl_sched_uclamp_util_min_rt_default; |
---|
| 1059 | + uclamp_se_set(uc_se, default_util_min, false); |
---|
| 1060 | +} |
---|
| 1061 | + |
---|
| 1062 | +static void uclamp_update_util_min_rt_default(struct task_struct *p) |
---|
| 1063 | +{ |
---|
| 1064 | + struct rq_flags rf; |
---|
| 1065 | + struct rq *rq; |
---|
| 1066 | + |
---|
| 1067 | + if (!rt_task(p)) |
---|
| 1068 | + return; |
---|
| 1069 | + |
---|
| 1070 | + /* Protect updates to p->uclamp_* */ |
---|
| 1071 | + rq = task_rq_lock(p, &rf); |
---|
| 1072 | + __uclamp_update_util_min_rt_default(p); |
---|
| 1073 | + task_rq_unlock(rq, p, &rf); |
---|
| 1074 | +} |
---|
| 1075 | + |
---|
| 1076 | +static void uclamp_sync_util_min_rt_default(void) |
---|
| 1077 | +{ |
---|
| 1078 | + struct task_struct *g, *p; |
---|
| 1079 | + |
---|
| 1080 | + /* |
---|
| 1081 | + * copy_process() sysctl_uclamp |
---|
| 1082 | + * uclamp_min_rt = X; |
---|
| 1083 | + * write_lock(&tasklist_lock) read_lock(&tasklist_lock) |
---|
| 1084 | + * // link thread smp_mb__after_spinlock() |
---|
| 1085 | + * write_unlock(&tasklist_lock) read_unlock(&tasklist_lock); |
---|
| 1086 | + * sched_post_fork() for_each_process_thread() |
---|
| 1087 | + * __uclamp_sync_rt() __uclamp_sync_rt() |
---|
| 1088 | + * |
---|
| 1089 | + * Ensures that either sched_post_fork() will observe the new |
---|
| 1090 | + * uclamp_min_rt or for_each_process_thread() will observe the new |
---|
| 1091 | + * task. |
---|
| 1092 | + */ |
---|
| 1093 | + read_lock(&tasklist_lock); |
---|
| 1094 | + smp_mb__after_spinlock(); |
---|
| 1095 | + read_unlock(&tasklist_lock); |
---|
| 1096 | + |
---|
| 1097 | + rcu_read_lock(); |
---|
| 1098 | + for_each_process_thread(g, p) |
---|
| 1099 | + uclamp_update_util_min_rt_default(p); |
---|
| 1100 | + rcu_read_unlock(); |
---|
| 1101 | +} |
---|
| 1102 | + |
---|
| 1103 | +#if IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE) |
---|
| 1104 | +void rockchip_perf_uclamp_sync_util_min_rt_default(void) |
---|
| 1105 | +{ |
---|
| 1106 | + uclamp_sync_util_min_rt_default(); |
---|
| 1107 | +} |
---|
| 1108 | +EXPORT_SYMBOL(rockchip_perf_uclamp_sync_util_min_rt_default); |
---|
| 1109 | +#endif |
---|
| 1110 | + |
---|
895 | 1111 | static inline struct uclamp_se |
---|
896 | 1112 | uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id) |
---|
897 | 1113 | { |
---|
| 1114 | + /* Copy by value as we could modify it */ |
---|
898 | 1115 | struct uclamp_se uc_req = p->uclamp_req[clamp_id]; |
---|
899 | 1116 | #ifdef CONFIG_UCLAMP_TASK_GROUP |
---|
900 | | - struct uclamp_se uc_max; |
---|
| 1117 | + unsigned int tg_min, tg_max, value; |
---|
901 | 1118 | |
---|
902 | 1119 | /* |
---|
903 | 1120 | * Tasks in autogroups or root task group will be |
---|
.. | .. |
---|
908 | 1125 | if (task_group(p) == &root_task_group) |
---|
909 | 1126 | return uc_req; |
---|
910 | 1127 | |
---|
911 | | - uc_max = task_group(p)->uclamp[clamp_id]; |
---|
912 | | - if (uc_req.value > uc_max.value || !uc_req.user_defined) |
---|
913 | | - return uc_max; |
---|
| 1128 | + tg_min = task_group(p)->uclamp[UCLAMP_MIN].value; |
---|
| 1129 | + tg_max = task_group(p)->uclamp[UCLAMP_MAX].value; |
---|
| 1130 | + value = uc_req.value; |
---|
| 1131 | + value = clamp(value, tg_min, tg_max); |
---|
| 1132 | + uclamp_se_set(&uc_req, value, false); |
---|
914 | 1133 | #endif |
---|
915 | 1134 | |
---|
916 | 1135 | return uc_req; |
---|
.. | .. |
---|
929 | 1148 | { |
---|
930 | 1149 | struct uclamp_se uc_req = uclamp_tg_restrict(p, clamp_id); |
---|
931 | 1150 | struct uclamp_se uc_max = uclamp_default[clamp_id]; |
---|
| 1151 | + struct uclamp_se uc_eff; |
---|
| 1152 | + int ret = 0; |
---|
| 1153 | + |
---|
| 1154 | + trace_android_rvh_uclamp_eff_get(p, clamp_id, &uc_max, &uc_eff, &ret); |
---|
| 1155 | + if (ret) |
---|
| 1156 | + return uc_eff; |
---|
932 | 1157 | |
---|
933 | 1158 | /* System default restrictions always apply */ |
---|
934 | 1159 | if (unlikely(uc_req.value > uc_max.value)) |
---|
.. | .. |
---|
949 | 1174 | |
---|
950 | 1175 | return (unsigned long)uc_eff.value; |
---|
951 | 1176 | } |
---|
| 1177 | +EXPORT_SYMBOL_GPL(uclamp_eff_value); |
---|
952 | 1178 | |
---|
953 | 1179 | /* |
---|
954 | 1180 | * When a task is enqueued on a rq, the clamp bucket currently defined by the |
---|
.. | .. |
---|
985 | 1211 | if (bucket->tasks == 1 || uc_se->value > bucket->value) |
---|
986 | 1212 | bucket->value = uc_se->value; |
---|
987 | 1213 | |
---|
988 | | - if (uc_se->value > READ_ONCE(uc_rq->value)) |
---|
989 | | - WRITE_ONCE(uc_rq->value, uc_se->value); |
---|
| 1214 | + if (uc_se->value > uclamp_rq_get(rq, clamp_id)) |
---|
| 1215 | + uclamp_rq_set(rq, clamp_id, uc_se->value); |
---|
990 | 1216 | } |
---|
991 | 1217 | |
---|
992 | 1218 | /* |
---|
.. | .. |
---|
1009 | 1235 | |
---|
1010 | 1236 | lockdep_assert_held(&rq->lock); |
---|
1011 | 1237 | |
---|
| 1238 | + /* |
---|
| 1239 | + * If sched_uclamp_used was enabled after task @p was enqueued, |
---|
| 1240 | + * we could end up with unbalanced call to uclamp_rq_dec_id(). |
---|
| 1241 | + * |
---|
| 1242 | + * In this case the uc_se->active flag should be false since no uclamp |
---|
| 1243 | + * accounting was performed at enqueue time and we can just return |
---|
| 1244 | + * here. |
---|
| 1245 | + * |
---|
| 1246 | + * Need to be careful of the following enqeueue/dequeue ordering |
---|
| 1247 | + * problem too |
---|
| 1248 | + * |
---|
| 1249 | + * enqueue(taskA) |
---|
| 1250 | + * // sched_uclamp_used gets enabled |
---|
| 1251 | + * enqueue(taskB) |
---|
| 1252 | + * dequeue(taskA) |
---|
| 1253 | + * // Must not decrement bukcet->tasks here |
---|
| 1254 | + * dequeue(taskB) |
---|
| 1255 | + * |
---|
| 1256 | + * where we could end up with stale data in uc_se and |
---|
| 1257 | + * bucket[uc_se->bucket_id]. |
---|
| 1258 | + * |
---|
| 1259 | + * The following check here eliminates the possibility of such race. |
---|
| 1260 | + */ |
---|
| 1261 | + if (unlikely(!uc_se->active)) |
---|
| 1262 | + return; |
---|
| 1263 | + |
---|
1012 | 1264 | bucket = &uc_rq->bucket[uc_se->bucket_id]; |
---|
| 1265 | + |
---|
1013 | 1266 | SCHED_WARN_ON(!bucket->tasks); |
---|
1014 | 1267 | if (likely(bucket->tasks)) |
---|
1015 | 1268 | bucket->tasks--; |
---|
| 1269 | + |
---|
1016 | 1270 | uc_se->active = false; |
---|
1017 | 1271 | |
---|
1018 | 1272 | /* |
---|
.. | .. |
---|
1024 | 1278 | if (likely(bucket->tasks)) |
---|
1025 | 1279 | return; |
---|
1026 | 1280 | |
---|
1027 | | - rq_clamp = READ_ONCE(uc_rq->value); |
---|
| 1281 | + rq_clamp = uclamp_rq_get(rq, clamp_id); |
---|
1028 | 1282 | /* |
---|
1029 | 1283 | * Defensive programming: this should never happen. If it happens, |
---|
1030 | 1284 | * e.g. due to future modification, warn and fixup the expected value. |
---|
.. | .. |
---|
1032 | 1286 | SCHED_WARN_ON(bucket->value > rq_clamp); |
---|
1033 | 1287 | if (bucket->value >= rq_clamp) { |
---|
1034 | 1288 | bkt_clamp = uclamp_rq_max_value(rq, clamp_id, uc_se->value); |
---|
1035 | | - WRITE_ONCE(uc_rq->value, bkt_clamp); |
---|
| 1289 | + uclamp_rq_set(rq, clamp_id, bkt_clamp); |
---|
1036 | 1290 | } |
---|
1037 | 1291 | } |
---|
1038 | 1292 | |
---|
1039 | 1293 | static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) |
---|
1040 | 1294 | { |
---|
1041 | 1295 | enum uclamp_id clamp_id; |
---|
| 1296 | + |
---|
| 1297 | + /* |
---|
| 1298 | + * Avoid any overhead until uclamp is actually used by the userspace. |
---|
| 1299 | + * |
---|
| 1300 | + * The condition is constructed such that a NOP is generated when |
---|
| 1301 | + * sched_uclamp_used is disabled. |
---|
| 1302 | + */ |
---|
| 1303 | + if (!static_branch_unlikely(&sched_uclamp_used)) |
---|
| 1304 | + return; |
---|
1042 | 1305 | |
---|
1043 | 1306 | if (unlikely(!p->sched_class->uclamp_enabled)) |
---|
1044 | 1307 | return; |
---|
.. | .. |
---|
1055 | 1318 | { |
---|
1056 | 1319 | enum uclamp_id clamp_id; |
---|
1057 | 1320 | |
---|
| 1321 | + /* |
---|
| 1322 | + * Avoid any overhead until uclamp is actually used by the userspace. |
---|
| 1323 | + * |
---|
| 1324 | + * The condition is constructed such that a NOP is generated when |
---|
| 1325 | + * sched_uclamp_used is disabled. |
---|
| 1326 | + */ |
---|
| 1327 | + if (!static_branch_unlikely(&sched_uclamp_used)) |
---|
| 1328 | + return; |
---|
| 1329 | + |
---|
1058 | 1330 | if (unlikely(!p->sched_class->uclamp_enabled)) |
---|
1059 | 1331 | return; |
---|
1060 | 1332 | |
---|
.. | .. |
---|
1062 | 1334 | uclamp_rq_dec_id(rq, p, clamp_id); |
---|
1063 | 1335 | } |
---|
1064 | 1336 | |
---|
1065 | | -static inline void |
---|
1066 | | -uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id) |
---|
| 1337 | +static inline void uclamp_rq_reinc_id(struct rq *rq, struct task_struct *p, |
---|
| 1338 | + enum uclamp_id clamp_id) |
---|
1067 | 1339 | { |
---|
| 1340 | + if (!p->uclamp[clamp_id].active) |
---|
| 1341 | + return; |
---|
| 1342 | + |
---|
| 1343 | + uclamp_rq_dec_id(rq, p, clamp_id); |
---|
| 1344 | + uclamp_rq_inc_id(rq, p, clamp_id); |
---|
| 1345 | + |
---|
| 1346 | + /* |
---|
| 1347 | + * Make sure to clear the idle flag if we've transiently reached 0 |
---|
| 1348 | + * active tasks on rq. |
---|
| 1349 | + */ |
---|
| 1350 | + if (clamp_id == UCLAMP_MAX && (rq->uclamp_flags & UCLAMP_FLAG_IDLE)) |
---|
| 1351 | + rq->uclamp_flags &= ~UCLAMP_FLAG_IDLE; |
---|
| 1352 | +} |
---|
| 1353 | + |
---|
| 1354 | +static inline void |
---|
| 1355 | +uclamp_update_active(struct task_struct *p) |
---|
| 1356 | +{ |
---|
| 1357 | + enum uclamp_id clamp_id; |
---|
1068 | 1358 | struct rq_flags rf; |
---|
1069 | 1359 | struct rq *rq; |
---|
1070 | 1360 | |
---|
.. | .. |
---|
1084 | 1374 | * affecting a valid clamp bucket, the next time it's enqueued, |
---|
1085 | 1375 | * it will already see the updated clamp bucket value. |
---|
1086 | 1376 | */ |
---|
1087 | | - if (p->uclamp[clamp_id].active) { |
---|
1088 | | - uclamp_rq_dec_id(rq, p, clamp_id); |
---|
1089 | | - uclamp_rq_inc_id(rq, p, clamp_id); |
---|
1090 | | - } |
---|
| 1377 | + for_each_clamp_id(clamp_id) |
---|
| 1378 | + uclamp_rq_reinc_id(rq, p, clamp_id); |
---|
1091 | 1379 | |
---|
1092 | 1380 | task_rq_unlock(rq, p, &rf); |
---|
1093 | 1381 | } |
---|
1094 | 1382 | |
---|
1095 | 1383 | #ifdef CONFIG_UCLAMP_TASK_GROUP |
---|
1096 | 1384 | static inline void |
---|
1097 | | -uclamp_update_active_tasks(struct cgroup_subsys_state *css, |
---|
1098 | | - unsigned int clamps) |
---|
| 1385 | +uclamp_update_active_tasks(struct cgroup_subsys_state *css) |
---|
1099 | 1386 | { |
---|
1100 | | - enum uclamp_id clamp_id; |
---|
1101 | 1387 | struct css_task_iter it; |
---|
1102 | 1388 | struct task_struct *p; |
---|
1103 | 1389 | |
---|
1104 | 1390 | css_task_iter_start(css, 0, &it); |
---|
1105 | | - while ((p = css_task_iter_next(&it))) { |
---|
1106 | | - for_each_clamp_id(clamp_id) { |
---|
1107 | | - if ((0x1 << clamp_id) & clamps) |
---|
1108 | | - uclamp_update_active(p, clamp_id); |
---|
1109 | | - } |
---|
1110 | | - } |
---|
| 1391 | + while ((p = css_task_iter_next(&it))) |
---|
| 1392 | + uclamp_update_active(p); |
---|
1111 | 1393 | css_task_iter_end(&it); |
---|
1112 | 1394 | } |
---|
1113 | 1395 | |
---|
.. | .. |
---|
1130 | 1412 | #endif |
---|
1131 | 1413 | |
---|
1132 | 1414 | int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, |
---|
1133 | | - void __user *buffer, size_t *lenp, |
---|
1134 | | - loff_t *ppos) |
---|
| 1415 | + void *buffer, size_t *lenp, loff_t *ppos) |
---|
1135 | 1416 | { |
---|
1136 | 1417 | bool update_root_tg = false; |
---|
1137 | | - int old_min, old_max; |
---|
| 1418 | + int old_min, old_max, old_min_rt; |
---|
1138 | 1419 | int result; |
---|
1139 | 1420 | |
---|
1140 | 1421 | mutex_lock(&uclamp_mutex); |
---|
1141 | 1422 | old_min = sysctl_sched_uclamp_util_min; |
---|
1142 | 1423 | old_max = sysctl_sched_uclamp_util_max; |
---|
| 1424 | + old_min_rt = sysctl_sched_uclamp_util_min_rt_default; |
---|
1143 | 1425 | |
---|
1144 | 1426 | result = proc_dointvec(table, write, buffer, lenp, ppos); |
---|
1145 | 1427 | if (result) |
---|
.. | .. |
---|
1148 | 1430 | goto done; |
---|
1149 | 1431 | |
---|
1150 | 1432 | if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max || |
---|
1151 | | - sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE) { |
---|
| 1433 | + sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE || |
---|
| 1434 | + sysctl_sched_uclamp_util_min_rt_default > SCHED_CAPACITY_SCALE) { |
---|
| 1435 | + |
---|
1152 | 1436 | result = -EINVAL; |
---|
1153 | 1437 | goto undo; |
---|
1154 | 1438 | } |
---|
.. | .. |
---|
1164 | 1448 | update_root_tg = true; |
---|
1165 | 1449 | } |
---|
1166 | 1450 | |
---|
1167 | | - if (update_root_tg) |
---|
| 1451 | + if (update_root_tg) { |
---|
| 1452 | + static_branch_enable(&sched_uclamp_used); |
---|
1168 | 1453 | uclamp_update_root_tg(); |
---|
| 1454 | + } |
---|
| 1455 | + |
---|
| 1456 | + if (old_min_rt != sysctl_sched_uclamp_util_min_rt_default) { |
---|
| 1457 | + static_branch_enable(&sched_uclamp_used); |
---|
| 1458 | + uclamp_sync_util_min_rt_default(); |
---|
| 1459 | + } |
---|
1169 | 1460 | |
---|
1170 | 1461 | /* |
---|
1171 | 1462 | * We update all RUNNABLE tasks only when task groups are in use. |
---|
.. | .. |
---|
1178 | 1469 | undo: |
---|
1179 | 1470 | sysctl_sched_uclamp_util_min = old_min; |
---|
1180 | 1471 | sysctl_sched_uclamp_util_max = old_max; |
---|
| 1472 | + sysctl_sched_uclamp_util_min_rt_default = old_min_rt; |
---|
1181 | 1473 | done: |
---|
1182 | 1474 | mutex_unlock(&uclamp_mutex); |
---|
1183 | 1475 | |
---|
.. | .. |
---|
1187 | 1479 | static int uclamp_validate(struct task_struct *p, |
---|
1188 | 1480 | const struct sched_attr *attr) |
---|
1189 | 1481 | { |
---|
1190 | | - unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value; |
---|
1191 | | - unsigned int upper_bound = p->uclamp_req[UCLAMP_MAX].value; |
---|
| 1482 | + int util_min = p->uclamp_req[UCLAMP_MIN].value; |
---|
| 1483 | + int util_max = p->uclamp_req[UCLAMP_MAX].value; |
---|
1192 | 1484 | |
---|
1193 | | - if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) |
---|
1194 | | - lower_bound = attr->sched_util_min; |
---|
1195 | | - if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) |
---|
1196 | | - upper_bound = attr->sched_util_max; |
---|
| 1485 | + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) { |
---|
| 1486 | + util_min = attr->sched_util_min; |
---|
1197 | 1487 | |
---|
1198 | | - if (lower_bound > upper_bound) |
---|
| 1488 | + if (util_min + 1 > SCHED_CAPACITY_SCALE + 1) |
---|
| 1489 | + return -EINVAL; |
---|
| 1490 | + } |
---|
| 1491 | + |
---|
| 1492 | + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) { |
---|
| 1493 | + util_max = attr->sched_util_max; |
---|
| 1494 | + |
---|
| 1495 | + if (util_max + 1 > SCHED_CAPACITY_SCALE + 1) |
---|
| 1496 | + return -EINVAL; |
---|
| 1497 | + } |
---|
| 1498 | + |
---|
| 1499 | + if (util_min != -1 && util_max != -1 && util_min > util_max) |
---|
1199 | 1500 | return -EINVAL; |
---|
1200 | | - if (upper_bound > SCHED_CAPACITY_SCALE) |
---|
1201 | | - return -EINVAL; |
---|
| 1501 | + |
---|
| 1502 | + /* |
---|
| 1503 | + * We have valid uclamp attributes; make sure uclamp is enabled. |
---|
| 1504 | + * |
---|
| 1505 | + * We need to do that here, because enabling static branches is a |
---|
| 1506 | + * blocking operation which obviously cannot be done while holding |
---|
| 1507 | + * scheduler locks. |
---|
| 1508 | + */ |
---|
| 1509 | + static_branch_enable(&sched_uclamp_used); |
---|
1202 | 1510 | |
---|
1203 | 1511 | return 0; |
---|
| 1512 | +} |
---|
| 1513 | + |
---|
| 1514 | +static bool uclamp_reset(const struct sched_attr *attr, |
---|
| 1515 | + enum uclamp_id clamp_id, |
---|
| 1516 | + struct uclamp_se *uc_se) |
---|
| 1517 | +{ |
---|
| 1518 | + /* Reset on sched class change for a non user-defined clamp value. */ |
---|
| 1519 | + if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)) && |
---|
| 1520 | + !uc_se->user_defined) |
---|
| 1521 | + return true; |
---|
| 1522 | + |
---|
| 1523 | + /* Reset on sched_util_{min,max} == -1. */ |
---|
| 1524 | + if (clamp_id == UCLAMP_MIN && |
---|
| 1525 | + attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN && |
---|
| 1526 | + attr->sched_util_min == -1) { |
---|
| 1527 | + return true; |
---|
| 1528 | + } |
---|
| 1529 | + |
---|
| 1530 | + if (clamp_id == UCLAMP_MAX && |
---|
| 1531 | + attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX && |
---|
| 1532 | + attr->sched_util_max == -1) { |
---|
| 1533 | + return true; |
---|
| 1534 | + } |
---|
| 1535 | + |
---|
| 1536 | + return false; |
---|
1204 | 1537 | } |
---|
1205 | 1538 | |
---|
1206 | 1539 | static void __setscheduler_uclamp(struct task_struct *p, |
---|
.. | .. |
---|
1208 | 1541 | { |
---|
1209 | 1542 | enum uclamp_id clamp_id; |
---|
1210 | 1543 | |
---|
1211 | | - /* |
---|
1212 | | - * On scheduling class change, reset to default clamps for tasks |
---|
1213 | | - * without a task-specific value. |
---|
1214 | | - */ |
---|
1215 | 1544 | for_each_clamp_id(clamp_id) { |
---|
1216 | 1545 | struct uclamp_se *uc_se = &p->uclamp_req[clamp_id]; |
---|
1217 | | - unsigned int clamp_value = uclamp_none(clamp_id); |
---|
| 1546 | + unsigned int value; |
---|
1218 | 1547 | |
---|
1219 | | - /* Keep using defined clamps across class changes */ |
---|
1220 | | - if (uc_se->user_defined) |
---|
| 1548 | + if (!uclamp_reset(attr, clamp_id, uc_se)) |
---|
1221 | 1549 | continue; |
---|
1222 | 1550 | |
---|
1223 | | - /* By default, RT tasks always get 100% boost */ |
---|
1224 | | - if (sched_feat(SUGOV_RT_MAX_FREQ) && |
---|
1225 | | - unlikely(rt_task(p) && |
---|
1226 | | - clamp_id == UCLAMP_MIN)) { |
---|
| 1551 | + /* |
---|
| 1552 | + * RT by default have a 100% boost value that could be modified |
---|
| 1553 | + * at runtime. |
---|
| 1554 | + */ |
---|
| 1555 | + if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN)) |
---|
| 1556 | + value = sysctl_sched_uclamp_util_min_rt_default; |
---|
| 1557 | + else |
---|
| 1558 | + value = uclamp_none(clamp_id); |
---|
1227 | 1559 | |
---|
1228 | | - clamp_value = uclamp_none(UCLAMP_MAX); |
---|
1229 | | - } |
---|
| 1560 | + uclamp_se_set(uc_se, value, false); |
---|
1230 | 1561 | |
---|
1231 | | - uclamp_se_set(uc_se, clamp_value, false); |
---|
1232 | 1562 | } |
---|
1233 | 1563 | |
---|
1234 | 1564 | if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP))) |
---|
1235 | 1565 | return; |
---|
1236 | 1566 | |
---|
1237 | | - if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) { |
---|
| 1567 | + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN && |
---|
| 1568 | + attr->sched_util_min != -1) { |
---|
1238 | 1569 | uclamp_se_set(&p->uclamp_req[UCLAMP_MIN], |
---|
1239 | 1570 | attr->sched_util_min, true); |
---|
| 1571 | + trace_android_vh_setscheduler_uclamp(p, UCLAMP_MIN, attr->sched_util_min); |
---|
1240 | 1572 | } |
---|
1241 | 1573 | |
---|
1242 | | - if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) { |
---|
| 1574 | + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX && |
---|
| 1575 | + attr->sched_util_max != -1) { |
---|
1243 | 1576 | uclamp_se_set(&p->uclamp_req[UCLAMP_MAX], |
---|
1244 | 1577 | attr->sched_util_max, true); |
---|
| 1578 | + trace_android_vh_setscheduler_uclamp(p, UCLAMP_MAX, attr->sched_util_max); |
---|
1245 | 1579 | } |
---|
1246 | 1580 | } |
---|
1247 | 1581 | |
---|
.. | .. |
---|
1249 | 1583 | { |
---|
1250 | 1584 | enum uclamp_id clamp_id; |
---|
1251 | 1585 | |
---|
| 1586 | + /* |
---|
| 1587 | + * We don't need to hold task_rq_lock() when updating p->uclamp_* here |
---|
| 1588 | + * as the task is still at its early fork stages. |
---|
| 1589 | + */ |
---|
1252 | 1590 | for_each_clamp_id(clamp_id) |
---|
1253 | 1591 | p->uclamp[clamp_id].active = false; |
---|
1254 | 1592 | |
---|
.. | .. |
---|
1261 | 1599 | } |
---|
1262 | 1600 | } |
---|
1263 | 1601 | |
---|
1264 | | -#ifdef CONFIG_SMP |
---|
1265 | | -unsigned int uclamp_task(struct task_struct *p) |
---|
| 1602 | +static void uclamp_post_fork(struct task_struct *p) |
---|
1266 | 1603 | { |
---|
1267 | | - unsigned long util; |
---|
1268 | | - |
---|
1269 | | - util = task_util_est(p); |
---|
1270 | | - util = max(util, uclamp_eff_value(p, UCLAMP_MIN)); |
---|
1271 | | - util = min(util, uclamp_eff_value(p, UCLAMP_MAX)); |
---|
1272 | | - |
---|
1273 | | - return util; |
---|
| 1604 | + uclamp_update_util_min_rt_default(p); |
---|
1274 | 1605 | } |
---|
1275 | 1606 | |
---|
1276 | | -bool uclamp_boosted(struct task_struct *p) |
---|
| 1607 | +static void __init init_uclamp_rq(struct rq *rq) |
---|
1277 | 1608 | { |
---|
1278 | | - return uclamp_eff_value(p, UCLAMP_MIN) > 0; |
---|
| 1609 | + enum uclamp_id clamp_id; |
---|
| 1610 | + struct uclamp_rq *uc_rq = rq->uclamp; |
---|
| 1611 | + |
---|
| 1612 | + for_each_clamp_id(clamp_id) { |
---|
| 1613 | + uc_rq[clamp_id] = (struct uclamp_rq) { |
---|
| 1614 | + .value = uclamp_none(clamp_id) |
---|
| 1615 | + }; |
---|
| 1616 | + } |
---|
| 1617 | + |
---|
| 1618 | + rq->uclamp_flags = UCLAMP_FLAG_IDLE; |
---|
1279 | 1619 | } |
---|
1280 | | - |
---|
1281 | | -bool uclamp_latency_sensitive(struct task_struct *p) |
---|
1282 | | -{ |
---|
1283 | | -#ifdef CONFIG_UCLAMP_TASK_GROUP |
---|
1284 | | - struct cgroup_subsys_state *css = task_css(p, cpu_cgrp_id); |
---|
1285 | | - struct task_group *tg; |
---|
1286 | | - |
---|
1287 | | - if (!css) |
---|
1288 | | - return false; |
---|
1289 | | - tg = container_of(css, struct task_group, css); |
---|
1290 | | - |
---|
1291 | | - return tg->latency_sensitive; |
---|
1292 | | -#else |
---|
1293 | | - return false; |
---|
1294 | | -#endif |
---|
1295 | | -} |
---|
1296 | | -#endif /* CONFIG_SMP */ |
---|
1297 | 1620 | |
---|
1298 | 1621 | static void __init init_uclamp(void) |
---|
1299 | 1622 | { |
---|
.. | .. |
---|
1301 | 1624 | enum uclamp_id clamp_id; |
---|
1302 | 1625 | int cpu; |
---|
1303 | 1626 | |
---|
1304 | | - mutex_init(&uclamp_mutex); |
---|
1305 | | - |
---|
1306 | | - for_each_possible_cpu(cpu) { |
---|
1307 | | - memset(&cpu_rq(cpu)->uclamp, 0, |
---|
1308 | | - sizeof(struct uclamp_rq)*UCLAMP_CNT); |
---|
1309 | | - cpu_rq(cpu)->uclamp_flags = 0; |
---|
1310 | | - } |
---|
| 1627 | + for_each_possible_cpu(cpu) |
---|
| 1628 | + init_uclamp_rq(cpu_rq(cpu)); |
---|
1311 | 1629 | |
---|
1312 | 1630 | for_each_clamp_id(clamp_id) { |
---|
1313 | 1631 | uclamp_se_set(&init_task.uclamp_req[clamp_id], |
---|
.. | .. |
---|
1336 | 1654 | static void __setscheduler_uclamp(struct task_struct *p, |
---|
1337 | 1655 | const struct sched_attr *attr) { } |
---|
1338 | 1656 | static inline void uclamp_fork(struct task_struct *p) { } |
---|
1339 | | - |
---|
1340 | | -long schedtune_task_margin(struct task_struct *task); |
---|
1341 | | - |
---|
1342 | | -#ifdef CONFIG_SMP |
---|
1343 | | -unsigned int uclamp_task(struct task_struct *p) |
---|
1344 | | -{ |
---|
1345 | | - unsigned long util = task_util_est(p); |
---|
1346 | | -#ifdef CONFIG_SCHED_TUNE |
---|
1347 | | - long margin = schedtune_task_margin(p); |
---|
1348 | | - |
---|
1349 | | - trace_sched_boost_task(p, util, margin); |
---|
1350 | | - |
---|
1351 | | - util += margin; |
---|
1352 | | -#endif |
---|
1353 | | - |
---|
1354 | | - return util; |
---|
1355 | | -} |
---|
1356 | | - |
---|
1357 | | -bool uclamp_boosted(struct task_struct *p) |
---|
1358 | | -{ |
---|
1359 | | -#ifdef CONFIG_SCHED_TUNE |
---|
1360 | | - return schedtune_task_boost(p) > 0; |
---|
1361 | | -#endif |
---|
1362 | | - return false; |
---|
1363 | | -} |
---|
1364 | | - |
---|
1365 | | -bool uclamp_latency_sensitive(struct task_struct *p) |
---|
1366 | | -{ |
---|
1367 | | -#ifdef CONFIG_SCHED_TUNE |
---|
1368 | | - return schedtune_prefer_idle(p) != 0; |
---|
1369 | | -#endif |
---|
1370 | | - return false; |
---|
1371 | | -} |
---|
1372 | | -#endif /* CONFIG_SMP */ |
---|
1373 | | - |
---|
| 1657 | +static inline void uclamp_post_fork(struct task_struct *p) { } |
---|
1374 | 1658 | static inline void init_uclamp(void) { } |
---|
1375 | 1659 | #endif /* CONFIG_UCLAMP_TASK */ |
---|
1376 | 1660 | |
---|
.. | .. |
---|
1385 | 1669 | } |
---|
1386 | 1670 | |
---|
1387 | 1671 | uclamp_rq_inc(rq, p); |
---|
| 1672 | + trace_android_rvh_enqueue_task(rq, p, flags); |
---|
1388 | 1673 | p->sched_class->enqueue_task(rq, p, flags); |
---|
| 1674 | + trace_android_rvh_after_enqueue_task(rq, p); |
---|
1389 | 1675 | } |
---|
1390 | 1676 | |
---|
1391 | 1677 | static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) |
---|
.. | .. |
---|
1399 | 1685 | } |
---|
1400 | 1686 | |
---|
1401 | 1687 | uclamp_rq_dec(rq, p); |
---|
| 1688 | + trace_android_rvh_dequeue_task(rq, p, flags); |
---|
1402 | 1689 | p->sched_class->dequeue_task(rq, p, flags); |
---|
| 1690 | + trace_android_rvh_after_dequeue_task(rq, p); |
---|
1403 | 1691 | } |
---|
1404 | 1692 | |
---|
1405 | 1693 | void activate_task(struct rq *rq, struct task_struct *p, int flags) |
---|
1406 | 1694 | { |
---|
1407 | | - if (task_contributes_to_load(p)) |
---|
1408 | | - rq->nr_uninterruptible--; |
---|
| 1695 | + if (task_on_rq_migrating(p)) |
---|
| 1696 | + flags |= ENQUEUE_MIGRATED; |
---|
1409 | 1697 | |
---|
1410 | 1698 | enqueue_task(rq, p, flags); |
---|
| 1699 | + |
---|
| 1700 | + p->on_rq = TASK_ON_RQ_QUEUED; |
---|
1411 | 1701 | } |
---|
| 1702 | +EXPORT_SYMBOL_GPL(activate_task); |
---|
1412 | 1703 | |
---|
1413 | 1704 | void deactivate_task(struct rq *rq, struct task_struct *p, int flags) |
---|
1414 | 1705 | { |
---|
1415 | | - if (task_contributes_to_load(p)) |
---|
1416 | | - rq->nr_uninterruptible++; |
---|
| 1706 | + p->on_rq = (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING; |
---|
1417 | 1707 | |
---|
1418 | 1708 | dequeue_task(rq, p, flags); |
---|
1419 | 1709 | } |
---|
| 1710 | +EXPORT_SYMBOL_GPL(deactivate_task); |
---|
1420 | 1711 | |
---|
1421 | | -/* |
---|
1422 | | - * __normal_prio - return the priority that is based on the static prio |
---|
1423 | | - */ |
---|
1424 | | -static inline int __normal_prio(struct task_struct *p) |
---|
| 1712 | +static inline int __normal_prio(int policy, int rt_prio, int nice) |
---|
1425 | 1713 | { |
---|
1426 | | - return p->static_prio; |
---|
| 1714 | + int prio; |
---|
| 1715 | + |
---|
| 1716 | + if (dl_policy(policy)) |
---|
| 1717 | + prio = MAX_DL_PRIO - 1; |
---|
| 1718 | + else if (rt_policy(policy)) |
---|
| 1719 | + prio = MAX_RT_PRIO - 1 - rt_prio; |
---|
| 1720 | + else |
---|
| 1721 | + prio = NICE_TO_PRIO(nice); |
---|
| 1722 | + |
---|
| 1723 | + return prio; |
---|
1427 | 1724 | } |
---|
1428 | 1725 | |
---|
1429 | 1726 | /* |
---|
.. | .. |
---|
1435 | 1732 | */ |
---|
1436 | 1733 | static inline int normal_prio(struct task_struct *p) |
---|
1437 | 1734 | { |
---|
1438 | | - int prio; |
---|
1439 | | - |
---|
1440 | | - if (task_has_dl_policy(p)) |
---|
1441 | | - prio = MAX_DL_PRIO-1; |
---|
1442 | | - else if (task_has_rt_policy(p)) |
---|
1443 | | - prio = MAX_RT_PRIO-1 - p->rt_priority; |
---|
1444 | | - else |
---|
1445 | | - prio = __normal_prio(p); |
---|
1446 | | - return prio; |
---|
| 1735 | + return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio)); |
---|
1447 | 1736 | } |
---|
1448 | 1737 | |
---|
1449 | 1738 | /* |
---|
.. | .. |
---|
1499 | 1788 | |
---|
1500 | 1789 | void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) |
---|
1501 | 1790 | { |
---|
1502 | | - const struct sched_class *class; |
---|
1503 | | - |
---|
1504 | | - if (p->sched_class == rq->curr->sched_class) { |
---|
| 1791 | + if (p->sched_class == rq->curr->sched_class) |
---|
1505 | 1792 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); |
---|
1506 | | - } else { |
---|
1507 | | - for_each_class(class) { |
---|
1508 | | - if (class == rq->curr->sched_class) |
---|
1509 | | - break; |
---|
1510 | | - if (class == p->sched_class) { |
---|
1511 | | - resched_curr(rq); |
---|
1512 | | - break; |
---|
1513 | | - } |
---|
1514 | | - } |
---|
1515 | | - } |
---|
| 1793 | + else if (p->sched_class > rq->curr->sched_class) |
---|
| 1794 | + resched_curr(rq); |
---|
1516 | 1795 | |
---|
1517 | 1796 | /* |
---|
1518 | 1797 | * A queue event has occurred, and we're going to schedule. In |
---|
.. | .. |
---|
1521 | 1800 | if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr)) |
---|
1522 | 1801 | rq_clock_skip_update(rq); |
---|
1523 | 1802 | } |
---|
| 1803 | +EXPORT_SYMBOL_GPL(check_preempt_curr); |
---|
1524 | 1804 | |
---|
1525 | 1805 | #ifdef CONFIG_SMP |
---|
1526 | 1806 | |
---|
1527 | | -static inline bool is_per_cpu_kthread(struct task_struct *p) |
---|
1528 | | -{ |
---|
1529 | | - if (!(p->flags & PF_KTHREAD)) |
---|
1530 | | - return false; |
---|
1531 | | - |
---|
1532 | | - if (p->nr_cpus_allowed != 1) |
---|
1533 | | - return false; |
---|
1534 | | - |
---|
1535 | | - return true; |
---|
1536 | | -} |
---|
1537 | | - |
---|
1538 | 1807 | /* |
---|
1539 | | - * Per-CPU kthreads are allowed to run on !actie && online CPUs, see |
---|
| 1808 | + * Per-CPU kthreads are allowed to run on !active && online CPUs, see |
---|
1540 | 1809 | * __set_cpus_allowed_ptr() and select_fallback_rq(). |
---|
1541 | 1810 | */ |
---|
1542 | 1811 | static inline bool is_cpu_allowed(struct task_struct *p, int cpu) |
---|
.. | .. |
---|
1544 | 1813 | if (!cpumask_test_cpu(cpu, p->cpus_ptr)) |
---|
1545 | 1814 | return false; |
---|
1546 | 1815 | |
---|
1547 | | - if (is_per_cpu_kthread(p) || __migrate_disabled(p)) |
---|
| 1816 | + if (is_per_cpu_kthread(p)) |
---|
1548 | 1817 | return cpu_online(cpu); |
---|
1549 | 1818 | |
---|
1550 | | - return cpu_active(cpu); |
---|
| 1819 | + if (!cpu_active(cpu)) |
---|
| 1820 | + return false; |
---|
| 1821 | + |
---|
| 1822 | + return cpumask_test_cpu(cpu, task_cpu_possible_mask(p)); |
---|
1551 | 1823 | } |
---|
1552 | 1824 | |
---|
1553 | 1825 | /* |
---|
.. | .. |
---|
1572 | 1844 | static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf, |
---|
1573 | 1845 | struct task_struct *p, int new_cpu) |
---|
1574 | 1846 | { |
---|
| 1847 | + int detached = 0; |
---|
| 1848 | + |
---|
1575 | 1849 | lockdep_assert_held(&rq->lock); |
---|
1576 | 1850 | |
---|
1577 | | - WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); |
---|
1578 | | - dequeue_task(rq, p, DEQUEUE_NOCLOCK); |
---|
1579 | | - set_task_cpu(p, new_cpu); |
---|
1580 | | - rq_unlock(rq, rf); |
---|
| 1851 | + /* |
---|
| 1852 | + * The vendor hook may drop the lock temporarily, so |
---|
| 1853 | + * pass the rq flags to unpin lock. We expect the |
---|
| 1854 | + * rq lock to be held after return. |
---|
| 1855 | + */ |
---|
| 1856 | + trace_android_rvh_migrate_queued_task(rq, rf, p, new_cpu, &detached); |
---|
| 1857 | + if (detached) |
---|
| 1858 | + goto attach; |
---|
1581 | 1859 | |
---|
| 1860 | + deactivate_task(rq, p, DEQUEUE_NOCLOCK); |
---|
| 1861 | + set_task_cpu(p, new_cpu); |
---|
| 1862 | + |
---|
| 1863 | +attach: |
---|
| 1864 | + rq_unlock(rq, rf); |
---|
1582 | 1865 | rq = cpu_rq(new_cpu); |
---|
1583 | 1866 | |
---|
1584 | 1867 | rq_lock(rq, rf); |
---|
1585 | 1868 | BUG_ON(task_cpu(p) != new_cpu); |
---|
1586 | | - enqueue_task(rq, p, 0); |
---|
1587 | | - p->on_rq = TASK_ON_RQ_QUEUED; |
---|
| 1869 | + activate_task(rq, p, 0); |
---|
1588 | 1870 | check_preempt_curr(rq, p, 0); |
---|
1589 | 1871 | |
---|
1590 | 1872 | return rq; |
---|
.. | .. |
---|
1593 | 1875 | struct migration_arg { |
---|
1594 | 1876 | struct task_struct *task; |
---|
1595 | 1877 | int dest_cpu; |
---|
1596 | | - bool done; |
---|
1597 | 1878 | }; |
---|
1598 | 1879 | |
---|
1599 | 1880 | /* |
---|
.. | .. |
---|
1629 | 1910 | struct task_struct *p = arg->task; |
---|
1630 | 1911 | struct rq *rq = this_rq(); |
---|
1631 | 1912 | struct rq_flags rf; |
---|
1632 | | - int dest_cpu = arg->dest_cpu; |
---|
1633 | | - |
---|
1634 | | - /* We don't look at arg after this point. */ |
---|
1635 | | - smp_mb(); |
---|
1636 | | - arg->done = true; |
---|
1637 | 1913 | |
---|
1638 | 1914 | /* |
---|
1639 | 1915 | * The original target CPU might have gone down and we might |
---|
.. | .. |
---|
1645 | 1921 | * __migrate_task() such that we will not miss enforcing cpus_ptr |
---|
1646 | 1922 | * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. |
---|
1647 | 1923 | */ |
---|
1648 | | - sched_ttwu_pending(); |
---|
| 1924 | + flush_smp_call_function_from_idle(); |
---|
1649 | 1925 | |
---|
1650 | 1926 | raw_spin_lock(&p->pi_lock); |
---|
1651 | 1927 | rq_lock(rq, &rf); |
---|
.. | .. |
---|
1656 | 1932 | */ |
---|
1657 | 1933 | if (task_rq(p) == rq) { |
---|
1658 | 1934 | if (task_on_rq_queued(p)) |
---|
1659 | | - rq = __migrate_task(rq, &rf, p, dest_cpu); |
---|
| 1935 | + rq = __migrate_task(rq, &rf, p, arg->dest_cpu); |
---|
1660 | 1936 | else |
---|
1661 | | - p->wake_cpu = dest_cpu; |
---|
| 1937 | + p->wake_cpu = arg->dest_cpu; |
---|
1662 | 1938 | } |
---|
1663 | 1939 | rq_unlock(rq, &rf); |
---|
1664 | 1940 | raw_spin_unlock(&p->pi_lock); |
---|
.. | .. |
---|
1674 | 1950 | void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) |
---|
1675 | 1951 | { |
---|
1676 | 1952 | cpumask_copy(&p->cpus_mask, new_mask); |
---|
1677 | | - if (p->cpus_ptr == &p->cpus_mask) |
---|
1678 | | - p->nr_cpus_allowed = cpumask_weight(new_mask); |
---|
| 1953 | + p->nr_cpus_allowed = cpumask_weight(new_mask); |
---|
| 1954 | + trace_android_rvh_set_cpus_allowed_comm(p, new_mask); |
---|
1679 | 1955 | } |
---|
1680 | | - |
---|
1681 | | -#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) |
---|
1682 | | -int __migrate_disabled(struct task_struct *p) |
---|
1683 | | -{ |
---|
1684 | | - return p->migrate_disable; |
---|
1685 | | -} |
---|
1686 | | -EXPORT_SYMBOL_GPL(__migrate_disabled); |
---|
1687 | | -#endif |
---|
1688 | 1956 | |
---|
1689 | 1957 | void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) |
---|
1690 | 1958 | { |
---|
.. | .. |
---|
1712 | 1980 | if (queued) |
---|
1713 | 1981 | enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); |
---|
1714 | 1982 | if (running) |
---|
1715 | | - set_curr_task(rq, p); |
---|
| 1983 | + set_next_task(rq, p); |
---|
1716 | 1984 | } |
---|
1717 | 1985 | |
---|
1718 | 1986 | /* |
---|
1719 | | - * Change a given task's CPU affinity. Migrate the thread to a |
---|
1720 | | - * proper CPU and schedule it away if the CPU it's executing on |
---|
1721 | | - * is removed from the allowed bitmask. |
---|
1722 | | - * |
---|
1723 | | - * NOTE: the caller must have a valid reference to the task, the |
---|
1724 | | - * task must not exit() & deallocate itself prematurely. The |
---|
1725 | | - * call is not atomic; no spinlocks may be held. |
---|
| 1987 | + * Called with both p->pi_lock and rq->lock held; drops both before returning. |
---|
1726 | 1988 | */ |
---|
1727 | | -static int __set_cpus_allowed_ptr(struct task_struct *p, |
---|
1728 | | - const struct cpumask *new_mask, bool check) |
---|
| 1989 | +static int __set_cpus_allowed_ptr_locked(struct task_struct *p, |
---|
| 1990 | + const struct cpumask *new_mask, |
---|
| 1991 | + bool check, |
---|
| 1992 | + struct rq *rq, |
---|
| 1993 | + struct rq_flags *rf) |
---|
1729 | 1994 | { |
---|
1730 | 1995 | const struct cpumask *cpu_valid_mask = cpu_active_mask; |
---|
| 1996 | + const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p); |
---|
1731 | 1997 | unsigned int dest_cpu; |
---|
1732 | | - struct rq_flags rf; |
---|
1733 | | - struct rq *rq; |
---|
1734 | 1998 | int ret = 0; |
---|
1735 | 1999 | |
---|
1736 | | - rq = task_rq_lock(p, &rf); |
---|
1737 | 2000 | update_rq_clock(rq); |
---|
1738 | 2001 | |
---|
1739 | 2002 | if (p->flags & PF_KTHREAD) { |
---|
.. | .. |
---|
1741 | 2004 | * Kernel threads are allowed on online && !active CPUs |
---|
1742 | 2005 | */ |
---|
1743 | 2006 | cpu_valid_mask = cpu_online_mask; |
---|
| 2007 | + } else if (!cpumask_subset(new_mask, cpu_allowed_mask)) { |
---|
| 2008 | + ret = -EINVAL; |
---|
| 2009 | + goto out; |
---|
1744 | 2010 | } |
---|
1745 | 2011 | |
---|
1746 | 2012 | /* |
---|
.. | .. |
---|
1755 | 2021 | if (cpumask_equal(&p->cpus_mask, new_mask)) |
---|
1756 | 2022 | goto out; |
---|
1757 | 2023 | |
---|
1758 | | - dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); |
---|
| 2024 | + /* |
---|
| 2025 | + * Picking a ~random cpu helps in cases where we are changing affinity |
---|
| 2026 | + * for groups of tasks (ie. cpuset), so that load balancing is not |
---|
| 2027 | + * immediately required to distribute the tasks within their new mask. |
---|
| 2028 | + */ |
---|
| 2029 | + dest_cpu = cpumask_any_and_distribute(cpu_valid_mask, new_mask); |
---|
1759 | 2030 | if (dest_cpu >= nr_cpu_ids) { |
---|
1760 | 2031 | ret = -EINVAL; |
---|
1761 | 2032 | goto out; |
---|
.. | .. |
---|
1774 | 2045 | } |
---|
1775 | 2046 | |
---|
1776 | 2047 | /* Can the task run on the task's current CPU? If so, we're done */ |
---|
1777 | | - if (cpumask_test_cpu(task_cpu(p), new_mask) || |
---|
1778 | | - p->cpus_ptr != &p->cpus_mask) |
---|
| 2048 | + if (cpumask_test_cpu(task_cpu(p), new_mask)) |
---|
1779 | 2049 | goto out; |
---|
1780 | 2050 | |
---|
1781 | 2051 | if (task_running(rq, p) || p->state == TASK_WAKING) { |
---|
1782 | 2052 | struct migration_arg arg = { p, dest_cpu }; |
---|
1783 | 2053 | /* Need help from migration thread: drop lock and wait. */ |
---|
1784 | | - task_rq_unlock(rq, p, &rf); |
---|
| 2054 | + task_rq_unlock(rq, p, rf); |
---|
1785 | 2055 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); |
---|
1786 | | - tlb_migrate_finish(p->mm); |
---|
1787 | 2056 | return 0; |
---|
1788 | 2057 | } else if (task_on_rq_queued(p)) { |
---|
1789 | 2058 | /* |
---|
1790 | 2059 | * OK, since we're going to drop the lock immediately |
---|
1791 | 2060 | * afterwards anyway. |
---|
1792 | 2061 | */ |
---|
1793 | | - rq = move_queued_task(rq, &rf, p, dest_cpu); |
---|
| 2062 | + rq = move_queued_task(rq, rf, p, dest_cpu); |
---|
1794 | 2063 | } |
---|
1795 | 2064 | out: |
---|
1796 | | - task_rq_unlock(rq, p, &rf); |
---|
| 2065 | + task_rq_unlock(rq, p, rf); |
---|
1797 | 2066 | |
---|
1798 | 2067 | return ret; |
---|
| 2068 | +} |
---|
| 2069 | + |
---|
| 2070 | +/* |
---|
| 2071 | + * Change a given task's CPU affinity. Migrate the thread to a |
---|
| 2072 | + * proper CPU and schedule it away if the CPU it's executing on |
---|
| 2073 | + * is removed from the allowed bitmask. |
---|
| 2074 | + * |
---|
| 2075 | + * NOTE: the caller must have a valid reference to the task, the |
---|
| 2076 | + * task must not exit() & deallocate itself prematurely. The |
---|
| 2077 | + * call is not atomic; no spinlocks may be held. |
---|
| 2078 | + */ |
---|
| 2079 | +static int __set_cpus_allowed_ptr(struct task_struct *p, |
---|
| 2080 | + const struct cpumask *new_mask, bool check) |
---|
| 2081 | +{ |
---|
| 2082 | + struct rq_flags rf; |
---|
| 2083 | + struct rq *rq; |
---|
| 2084 | + |
---|
| 2085 | + rq = task_rq_lock(p, &rf); |
---|
| 2086 | + return __set_cpus_allowed_ptr_locked(p, new_mask, check, rq, &rf); |
---|
1799 | 2087 | } |
---|
1800 | 2088 | |
---|
1801 | 2089 | int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) |
---|
.. | .. |
---|
1803 | 2091 | return __set_cpus_allowed_ptr(p, new_mask, false); |
---|
1804 | 2092 | } |
---|
1805 | 2093 | EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); |
---|
| 2094 | + |
---|
| 2095 | +/* |
---|
| 2096 | + * Change a given task's CPU affinity to the intersection of its current |
---|
| 2097 | + * affinity mask and @subset_mask, writing the resulting mask to @new_mask. |
---|
| 2098 | + * If the resulting mask is empty, leave the affinity unchanged and return |
---|
| 2099 | + * -EINVAL. |
---|
| 2100 | + */ |
---|
| 2101 | +static int restrict_cpus_allowed_ptr(struct task_struct *p, |
---|
| 2102 | + struct cpumask *new_mask, |
---|
| 2103 | + const struct cpumask *subset_mask) |
---|
| 2104 | +{ |
---|
| 2105 | + struct rq_flags rf; |
---|
| 2106 | + struct rq *rq; |
---|
| 2107 | + |
---|
| 2108 | + rq = task_rq_lock(p, &rf); |
---|
| 2109 | + if (!cpumask_and(new_mask, &p->cpus_mask, subset_mask)) { |
---|
| 2110 | + task_rq_unlock(rq, p, &rf); |
---|
| 2111 | + return -EINVAL; |
---|
| 2112 | + } |
---|
| 2113 | + |
---|
| 2114 | + return __set_cpus_allowed_ptr_locked(p, new_mask, false, rq, &rf); |
---|
| 2115 | +} |
---|
| 2116 | + |
---|
| 2117 | +/* |
---|
| 2118 | + * Restrict a given task's CPU affinity so that it is a subset of |
---|
| 2119 | + * task_cpu_possible_mask(). If the resulting mask is empty, we warn and |
---|
| 2120 | + * walk up the cpuset hierarchy until we find a suitable mask. |
---|
| 2121 | + */ |
---|
| 2122 | +void force_compatible_cpus_allowed_ptr(struct task_struct *p) |
---|
| 2123 | +{ |
---|
| 2124 | + cpumask_var_t new_mask; |
---|
| 2125 | + const struct cpumask *override_mask = task_cpu_possible_mask(p); |
---|
| 2126 | + |
---|
| 2127 | + alloc_cpumask_var(&new_mask, GFP_KERNEL); |
---|
| 2128 | + |
---|
| 2129 | + /* |
---|
| 2130 | + * __migrate_task() can fail silently in the face of concurrent |
---|
| 2131 | + * offlining of the chosen destination CPU, so take the hotplug |
---|
| 2132 | + * lock to ensure that the migration succeeds. |
---|
| 2133 | + */ |
---|
| 2134 | + trace_android_rvh_force_compatible_pre(NULL); |
---|
| 2135 | + cpus_read_lock(); |
---|
| 2136 | + if (!cpumask_available(new_mask)) |
---|
| 2137 | + goto out_set_mask; |
---|
| 2138 | + |
---|
| 2139 | + if (!restrict_cpus_allowed_ptr(p, new_mask, override_mask)) |
---|
| 2140 | + goto out_free_mask; |
---|
| 2141 | + |
---|
| 2142 | + /* |
---|
| 2143 | + * We failed to find a valid subset of the affinity mask for the |
---|
| 2144 | + * task, so override it based on its cpuset hierarchy. |
---|
| 2145 | + */ |
---|
| 2146 | + cpuset_cpus_allowed(p, new_mask); |
---|
| 2147 | + override_mask = new_mask; |
---|
| 2148 | + |
---|
| 2149 | +out_set_mask: |
---|
| 2150 | + if (printk_ratelimit()) { |
---|
| 2151 | + printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n", |
---|
| 2152 | + task_pid_nr(p), p->comm, |
---|
| 2153 | + cpumask_pr_args(override_mask)); |
---|
| 2154 | + } |
---|
| 2155 | + |
---|
| 2156 | + WARN_ON(set_cpus_allowed_ptr(p, override_mask)); |
---|
| 2157 | +out_free_mask: |
---|
| 2158 | + cpus_read_unlock(); |
---|
| 2159 | + trace_android_rvh_force_compatible_post(NULL); |
---|
| 2160 | + free_cpumask_var(new_mask); |
---|
| 2161 | +} |
---|
1806 | 2162 | |
---|
1807 | 2163 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
---|
1808 | 2164 | { |
---|
.. | .. |
---|
1851 | 2207 | p->se.nr_migrations++; |
---|
1852 | 2208 | rseq_migrate(p); |
---|
1853 | 2209 | perf_event_task_migrate(p); |
---|
| 2210 | + trace_android_rvh_set_task_cpu(p, new_cpu); |
---|
1854 | 2211 | } |
---|
1855 | 2212 | |
---|
1856 | 2213 | __set_task_cpu(p, new_cpu); |
---|
1857 | 2214 | } |
---|
| 2215 | +EXPORT_SYMBOL_GPL(set_task_cpu); |
---|
1858 | 2216 | |
---|
1859 | | -#ifdef CONFIG_NUMA_BALANCING |
---|
1860 | 2217 | static void __migrate_swap_task(struct task_struct *p, int cpu) |
---|
1861 | 2218 | { |
---|
1862 | 2219 | if (task_on_rq_queued(p)) { |
---|
.. | .. |
---|
1869 | 2226 | rq_pin_lock(src_rq, &srf); |
---|
1870 | 2227 | rq_pin_lock(dst_rq, &drf); |
---|
1871 | 2228 | |
---|
1872 | | - p->on_rq = TASK_ON_RQ_MIGRATING; |
---|
1873 | 2229 | deactivate_task(src_rq, p, 0); |
---|
1874 | 2230 | set_task_cpu(p, cpu); |
---|
1875 | 2231 | activate_task(dst_rq, p, 0); |
---|
1876 | | - p->on_rq = TASK_ON_RQ_QUEUED; |
---|
1877 | 2232 | check_preempt_curr(dst_rq, p, 0); |
---|
1878 | 2233 | |
---|
1879 | 2234 | rq_unpin_lock(dst_rq, &drf); |
---|
.. | .. |
---|
1973 | 2328 | out: |
---|
1974 | 2329 | return ret; |
---|
1975 | 2330 | } |
---|
1976 | | -#endif /* CONFIG_NUMA_BALANCING */ |
---|
1977 | | - |
---|
1978 | | -static bool check_task_state(struct task_struct *p, long match_state) |
---|
1979 | | -{ |
---|
1980 | | - bool match = false; |
---|
1981 | | - |
---|
1982 | | - raw_spin_lock_irq(&p->pi_lock); |
---|
1983 | | - if (p->state == match_state || p->saved_state == match_state) |
---|
1984 | | - match = true; |
---|
1985 | | - raw_spin_unlock_irq(&p->pi_lock); |
---|
1986 | | - |
---|
1987 | | - return match; |
---|
1988 | | -} |
---|
| 2331 | +EXPORT_SYMBOL_GPL(migrate_swap); |
---|
1989 | 2332 | |
---|
1990 | 2333 | /* |
---|
1991 | 2334 | * wait_task_inactive - wait for a thread to unschedule. |
---|
.. | .. |
---|
2031 | 2374 | * is actually now running somewhere else! |
---|
2032 | 2375 | */ |
---|
2033 | 2376 | while (task_running(rq, p)) { |
---|
2034 | | - if (match_state && !check_task_state(p, match_state)) |
---|
| 2377 | + if (match_state && unlikely(p->state != match_state)) |
---|
2035 | 2378 | return 0; |
---|
2036 | 2379 | cpu_relax(); |
---|
2037 | 2380 | } |
---|
.. | .. |
---|
2046 | 2389 | running = task_running(rq, p); |
---|
2047 | 2390 | queued = task_on_rq_queued(p); |
---|
2048 | 2391 | ncsw = 0; |
---|
2049 | | - if (!match_state || p->state == match_state || |
---|
2050 | | - p->saved_state == match_state) |
---|
| 2392 | + if (!match_state || p->state == match_state) |
---|
2051 | 2393 | ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ |
---|
2052 | 2394 | task_rq_unlock(rq, p, &rf); |
---|
2053 | 2395 | |
---|
.. | .. |
---|
2148 | 2490 | int nid = cpu_to_node(cpu); |
---|
2149 | 2491 | const struct cpumask *nodemask = NULL; |
---|
2150 | 2492 | enum { cpuset, possible, fail } state = cpuset; |
---|
2151 | | - int dest_cpu; |
---|
| 2493 | + int dest_cpu = -1; |
---|
| 2494 | + |
---|
| 2495 | + trace_android_rvh_select_fallback_rq(cpu, p, &dest_cpu); |
---|
| 2496 | + if (dest_cpu >= 0) |
---|
| 2497 | + return dest_cpu; |
---|
2152 | 2498 | |
---|
2153 | 2499 | /* |
---|
2154 | 2500 | * If the node that the CPU is on has been offlined, cpu_to_node() |
---|
.. | .. |
---|
2160 | 2506 | |
---|
2161 | 2507 | /* Look for allowed, online CPU in same node. */ |
---|
2162 | 2508 | for_each_cpu(dest_cpu, nodemask) { |
---|
2163 | | - if (!cpu_active(dest_cpu)) |
---|
2164 | | - continue; |
---|
2165 | | - if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) |
---|
| 2509 | + if (is_cpu_allowed(p, dest_cpu)) |
---|
2166 | 2510 | return dest_cpu; |
---|
2167 | 2511 | } |
---|
2168 | 2512 | } |
---|
.. | .. |
---|
2184 | 2528 | state = possible; |
---|
2185 | 2529 | break; |
---|
2186 | 2530 | } |
---|
2187 | | - /* Fall-through */ |
---|
| 2531 | + fallthrough; |
---|
2188 | 2532 | case possible: |
---|
2189 | | - do_set_cpus_allowed(p, cpu_possible_mask); |
---|
| 2533 | + do_set_cpus_allowed(p, task_cpu_possible_mask(p)); |
---|
2190 | 2534 | state = fail; |
---|
2191 | 2535 | break; |
---|
2192 | | - |
---|
2193 | 2536 | case fail: |
---|
2194 | 2537 | BUG(); |
---|
2195 | 2538 | break; |
---|
.. | .. |
---|
2216 | 2559 | * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable. |
---|
2217 | 2560 | */ |
---|
2218 | 2561 | static inline |
---|
2219 | | -int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags, |
---|
2220 | | - int sibling_count_hint) |
---|
| 2562 | +int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) |
---|
2221 | 2563 | { |
---|
2222 | 2564 | lockdep_assert_held(&p->pi_lock); |
---|
2223 | 2565 | |
---|
2224 | 2566 | if (p->nr_cpus_allowed > 1) |
---|
2225 | | - cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags, |
---|
2226 | | - sibling_count_hint); |
---|
| 2567 | + cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); |
---|
2227 | 2568 | else |
---|
2228 | 2569 | cpu = cpumask_any(p->cpus_ptr); |
---|
2229 | 2570 | |
---|
.. | .. |
---|
2241 | 2582 | cpu = select_fallback_rq(task_cpu(p), p); |
---|
2242 | 2583 | |
---|
2243 | 2584 | return cpu; |
---|
2244 | | -} |
---|
2245 | | - |
---|
2246 | | -static void update_avg(u64 *avg, u64 sample) |
---|
2247 | | -{ |
---|
2248 | | - s64 diff = sample - *avg; |
---|
2249 | | - *avg += diff >> 3; |
---|
2250 | 2585 | } |
---|
2251 | 2586 | |
---|
2252 | 2587 | void sched_set_stop_task(int cpu, struct task_struct *stop) |
---|
.. | .. |
---|
2328 | 2663 | __schedstat_inc(p->se.statistics.nr_wakeups_sync); |
---|
2329 | 2664 | } |
---|
2330 | 2665 | |
---|
2331 | | -static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags) |
---|
2332 | | -{ |
---|
2333 | | - activate_task(rq, p, en_flags); |
---|
2334 | | - p->on_rq = TASK_ON_RQ_QUEUED; |
---|
2335 | | -} |
---|
2336 | | - |
---|
2337 | 2666 | /* |
---|
2338 | 2667 | * Mark the task runnable and perform wakeup-preemption. |
---|
2339 | 2668 | */ |
---|
.. | .. |
---|
2375 | 2704 | { |
---|
2376 | 2705 | int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK; |
---|
2377 | 2706 | |
---|
| 2707 | + if (wake_flags & WF_SYNC) |
---|
| 2708 | + en_flags |= ENQUEUE_WAKEUP_SYNC; |
---|
| 2709 | + |
---|
2378 | 2710 | lockdep_assert_held(&rq->lock); |
---|
2379 | 2711 | |
---|
2380 | | -#ifdef CONFIG_SMP |
---|
2381 | 2712 | if (p->sched_contributes_to_load) |
---|
2382 | 2713 | rq->nr_uninterruptible--; |
---|
2383 | 2714 | |
---|
| 2715 | +#ifdef CONFIG_SMP |
---|
2384 | 2716 | if (wake_flags & WF_MIGRATED) |
---|
2385 | 2717 | en_flags |= ENQUEUE_MIGRATED; |
---|
| 2718 | + else |
---|
2386 | 2719 | #endif |
---|
| 2720 | + if (p->in_iowait) { |
---|
| 2721 | + delayacct_blkio_end(p); |
---|
| 2722 | + atomic_dec(&task_rq(p)->nr_iowait); |
---|
| 2723 | + } |
---|
2387 | 2724 | |
---|
2388 | | - ttwu_activate(rq, p, en_flags); |
---|
| 2725 | + activate_task(rq, p, en_flags); |
---|
2389 | 2726 | ttwu_do_wakeup(rq, p, wake_flags, rf); |
---|
2390 | 2727 | } |
---|
2391 | 2728 | |
---|
2392 | 2729 | /* |
---|
2393 | | - * Called in case the task @p isn't fully descheduled from its runqueue, |
---|
2394 | | - * in this case we must do a remote wakeup. Its a 'light' wakeup though, |
---|
2395 | | - * since all we need to do is flip p->state to TASK_RUNNING, since |
---|
2396 | | - * the task is still ->on_rq. |
---|
| 2730 | + * Consider @p being inside a wait loop: |
---|
| 2731 | + * |
---|
| 2732 | + * for (;;) { |
---|
| 2733 | + * set_current_state(TASK_UNINTERRUPTIBLE); |
---|
| 2734 | + * |
---|
| 2735 | + * if (CONDITION) |
---|
| 2736 | + * break; |
---|
| 2737 | + * |
---|
| 2738 | + * schedule(); |
---|
| 2739 | + * } |
---|
| 2740 | + * __set_current_state(TASK_RUNNING); |
---|
| 2741 | + * |
---|
| 2742 | + * between set_current_state() and schedule(). In this case @p is still |
---|
| 2743 | + * runnable, so all that needs doing is change p->state back to TASK_RUNNING in |
---|
| 2744 | + * an atomic manner. |
---|
| 2745 | + * |
---|
| 2746 | + * By taking task_rq(p)->lock we serialize against schedule(), if @p->on_rq |
---|
| 2747 | + * then schedule() must still happen and p->state can be changed to |
---|
| 2748 | + * TASK_RUNNING. Otherwise we lost the race, schedule() has happened, and we |
---|
| 2749 | + * need to do a full wakeup with enqueue. |
---|
| 2750 | + * |
---|
| 2751 | + * Returns: %true when the wakeup is done, |
---|
| 2752 | + * %false otherwise. |
---|
2397 | 2753 | */ |
---|
2398 | | -static int ttwu_remote(struct task_struct *p, int wake_flags) |
---|
| 2754 | +static int ttwu_runnable(struct task_struct *p, int wake_flags) |
---|
2399 | 2755 | { |
---|
2400 | 2756 | struct rq_flags rf; |
---|
2401 | 2757 | struct rq *rq; |
---|
.. | .. |
---|
2414 | 2770 | } |
---|
2415 | 2771 | |
---|
2416 | 2772 | #ifdef CONFIG_SMP |
---|
2417 | | -void sched_ttwu_pending(void) |
---|
| 2773 | +void sched_ttwu_pending(void *arg) |
---|
2418 | 2774 | { |
---|
| 2775 | + struct llist_node *llist = arg; |
---|
2419 | 2776 | struct rq *rq = this_rq(); |
---|
2420 | | - struct llist_node *llist = llist_del_all(&rq->wake_list); |
---|
2421 | 2777 | struct task_struct *p, *t; |
---|
2422 | 2778 | struct rq_flags rf; |
---|
2423 | 2779 | |
---|
2424 | 2780 | if (!llist) |
---|
2425 | 2781 | return; |
---|
2426 | 2782 | |
---|
| 2783 | + /* |
---|
| 2784 | + * rq::ttwu_pending racy indication of out-standing wakeups. |
---|
| 2785 | + * Races such that false-negatives are possible, since they |
---|
| 2786 | + * are shorter lived that false-positives would be. |
---|
| 2787 | + */ |
---|
| 2788 | + WRITE_ONCE(rq->ttwu_pending, 0); |
---|
| 2789 | + |
---|
2427 | 2790 | rq_lock_irqsave(rq, &rf); |
---|
2428 | 2791 | update_rq_clock(rq); |
---|
2429 | 2792 | |
---|
2430 | | - llist_for_each_entry_safe(p, t, llist, wake_entry) |
---|
| 2793 | + llist_for_each_entry_safe(p, t, llist, wake_entry.llist) { |
---|
| 2794 | + if (WARN_ON_ONCE(p->on_cpu)) |
---|
| 2795 | + smp_cond_load_acquire(&p->on_cpu, !VAL); |
---|
| 2796 | + |
---|
| 2797 | + if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq))) |
---|
| 2798 | + set_task_cpu(p, cpu_of(rq)); |
---|
| 2799 | + |
---|
2431 | 2800 | ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf); |
---|
| 2801 | + } |
---|
2432 | 2802 | |
---|
2433 | 2803 | rq_unlock_irqrestore(rq, &rf); |
---|
2434 | 2804 | } |
---|
2435 | 2805 | |
---|
2436 | | -void scheduler_ipi(void) |
---|
| 2806 | +void send_call_function_single_ipi(int cpu) |
---|
2437 | 2807 | { |
---|
2438 | | - /* |
---|
2439 | | - * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting |
---|
2440 | | - * TIF_NEED_RESCHED remotely (for the first time) will also send |
---|
2441 | | - * this IPI. |
---|
2442 | | - */ |
---|
2443 | | - preempt_fold_need_resched(); |
---|
| 2808 | + struct rq *rq = cpu_rq(cpu); |
---|
2444 | 2809 | |
---|
2445 | | - if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) |
---|
2446 | | - return; |
---|
2447 | | - |
---|
2448 | | - /* |
---|
2449 | | - * Not all reschedule IPI handlers call irq_enter/irq_exit, since |
---|
2450 | | - * traditionally all their work was done from the interrupt return |
---|
2451 | | - * path. Now that we actually do some work, we need to make sure |
---|
2452 | | - * we do call them. |
---|
2453 | | - * |
---|
2454 | | - * Some archs already do call them, luckily irq_enter/exit nest |
---|
2455 | | - * properly. |
---|
2456 | | - * |
---|
2457 | | - * Arguably we should visit all archs and update all handlers, |
---|
2458 | | - * however a fair share of IPIs are still resched only so this would |
---|
2459 | | - * somewhat pessimize the simple resched case. |
---|
2460 | | - */ |
---|
2461 | | - irq_enter(); |
---|
2462 | | - sched_ttwu_pending(); |
---|
2463 | | - |
---|
2464 | | - /* |
---|
2465 | | - * Check if someone kicked us for doing the nohz idle load balance. |
---|
2466 | | - */ |
---|
2467 | | - if (unlikely(got_nohz_idle_kick())) { |
---|
2468 | | - this_rq()->idle_balance = 1; |
---|
2469 | | - raise_softirq_irqoff(SCHED_SOFTIRQ); |
---|
2470 | | - } |
---|
2471 | | - irq_exit(); |
---|
| 2810 | + if (!set_nr_if_polling(rq->idle)) |
---|
| 2811 | + arch_send_call_function_single_ipi(cpu); |
---|
| 2812 | + else |
---|
| 2813 | + trace_sched_wake_idle_without_ipi(cpu); |
---|
2472 | 2814 | } |
---|
2473 | 2815 | |
---|
2474 | | -static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) |
---|
| 2816 | +/* |
---|
| 2817 | + * Queue a task on the target CPUs wake_list and wake the CPU via IPI if |
---|
| 2818 | + * necessary. The wakee CPU on receipt of the IPI will queue the task |
---|
| 2819 | + * via sched_ttwu_wakeup() for activation so the wakee incurs the cost |
---|
| 2820 | + * of the wakeup instead of the waker. |
---|
| 2821 | + */ |
---|
| 2822 | +static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) |
---|
2475 | 2823 | { |
---|
2476 | 2824 | struct rq *rq = cpu_rq(cpu); |
---|
2477 | 2825 | |
---|
2478 | 2826 | p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); |
---|
2479 | 2827 | |
---|
2480 | | - if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) { |
---|
2481 | | - if (!set_nr_if_polling(rq->idle)) |
---|
2482 | | - smp_send_reschedule(cpu); |
---|
2483 | | - else |
---|
2484 | | - trace_sched_wake_idle_without_ipi(cpu); |
---|
2485 | | - } |
---|
| 2828 | + WRITE_ONCE(rq->ttwu_pending, 1); |
---|
| 2829 | + __smp_call_single_queue(cpu, &p->wake_entry.llist); |
---|
2486 | 2830 | } |
---|
2487 | 2831 | |
---|
2488 | 2832 | void wake_up_if_idle(int cpu) |
---|
.. | .. |
---|
2508 | 2852 | out: |
---|
2509 | 2853 | rcu_read_unlock(); |
---|
2510 | 2854 | } |
---|
| 2855 | +EXPORT_SYMBOL_GPL(wake_up_if_idle); |
---|
2511 | 2856 | |
---|
2512 | 2857 | bool cpus_share_cache(int this_cpu, int that_cpu) |
---|
2513 | 2858 | { |
---|
.. | .. |
---|
2516 | 2861 | |
---|
2517 | 2862 | return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); |
---|
2518 | 2863 | } |
---|
| 2864 | + |
---|
| 2865 | +static inline bool ttwu_queue_cond(int cpu, int wake_flags) |
---|
| 2866 | +{ |
---|
| 2867 | + /* |
---|
| 2868 | + * If the CPU does not share cache, then queue the task on the |
---|
| 2869 | + * remote rqs wakelist to avoid accessing remote data. |
---|
| 2870 | + */ |
---|
| 2871 | + if (!cpus_share_cache(smp_processor_id(), cpu)) |
---|
| 2872 | + return true; |
---|
| 2873 | + |
---|
| 2874 | + /* |
---|
| 2875 | + * If the task is descheduling and the only running task on the |
---|
| 2876 | + * CPU then use the wakelist to offload the task activation to |
---|
| 2877 | + * the soon-to-be-idle CPU as the current CPU is likely busy. |
---|
| 2878 | + * nr_running is checked to avoid unnecessary task stacking. |
---|
| 2879 | + * |
---|
| 2880 | + * Note that we can only get here with (wakee) p->on_rq=0, |
---|
| 2881 | + * p->on_cpu can be whatever, we've done the dequeue, so |
---|
| 2882 | + * the wakee has been accounted out of ->nr_running. |
---|
| 2883 | + */ |
---|
| 2884 | + if ((wake_flags & WF_ON_CPU) && !cpu_rq(cpu)->nr_running) |
---|
| 2885 | + return true; |
---|
| 2886 | + |
---|
| 2887 | + return false; |
---|
| 2888 | +} |
---|
| 2889 | + |
---|
| 2890 | +static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) |
---|
| 2891 | +{ |
---|
| 2892 | + bool cond = false; |
---|
| 2893 | + |
---|
| 2894 | + trace_android_rvh_ttwu_cond(&cond); |
---|
| 2895 | + |
---|
| 2896 | + if ((sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) || |
---|
| 2897 | + cond) { |
---|
| 2898 | + if (WARN_ON_ONCE(cpu == smp_processor_id())) |
---|
| 2899 | + return false; |
---|
| 2900 | + |
---|
| 2901 | + sched_clock_cpu(cpu); /* Sync clocks across CPUs */ |
---|
| 2902 | + __ttwu_queue_wakelist(p, cpu, wake_flags); |
---|
| 2903 | + return true; |
---|
| 2904 | + } |
---|
| 2905 | + |
---|
| 2906 | + return false; |
---|
| 2907 | +} |
---|
| 2908 | + |
---|
| 2909 | +#else /* !CONFIG_SMP */ |
---|
| 2910 | + |
---|
| 2911 | +static inline bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) |
---|
| 2912 | +{ |
---|
| 2913 | + return false; |
---|
| 2914 | +} |
---|
| 2915 | + |
---|
2519 | 2916 | #endif /* CONFIG_SMP */ |
---|
2520 | 2917 | |
---|
2521 | 2918 | static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) |
---|
.. | .. |
---|
2523 | 2920 | struct rq *rq = cpu_rq(cpu); |
---|
2524 | 2921 | struct rq_flags rf; |
---|
2525 | 2922 | |
---|
2526 | | -#if defined(CONFIG_SMP) |
---|
2527 | | - if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) { |
---|
2528 | | - sched_clock_cpu(cpu); /* Sync clocks across CPUs */ |
---|
2529 | | - ttwu_queue_remote(p, cpu, wake_flags); |
---|
| 2923 | + if (ttwu_queue_wakelist(p, cpu, wake_flags)) |
---|
2530 | 2924 | return; |
---|
2531 | | - } |
---|
2532 | | -#endif |
---|
2533 | 2925 | |
---|
2534 | 2926 | rq_lock(rq, &rf); |
---|
2535 | 2927 | update_rq_clock(rq); |
---|
.. | .. |
---|
2585 | 2977 | * migration. However the means are completely different as there is no lock |
---|
2586 | 2978 | * chain to provide order. Instead we do: |
---|
2587 | 2979 | * |
---|
2588 | | - * 1) smp_store_release(X->on_cpu, 0) |
---|
2589 | | - * 2) smp_cond_load_acquire(!X->on_cpu) |
---|
| 2980 | + * 1) smp_store_release(X->on_cpu, 0) -- finish_task() |
---|
| 2981 | + * 2) smp_cond_load_acquire(!X->on_cpu) -- try_to_wake_up() |
---|
2590 | 2982 | * |
---|
2591 | 2983 | * Example: |
---|
2592 | 2984 | * |
---|
.. | .. |
---|
2625 | 3017 | * @p: the thread to be awakened |
---|
2626 | 3018 | * @state: the mask of task states that can be woken |
---|
2627 | 3019 | * @wake_flags: wake modifier flags (WF_*) |
---|
2628 | | - * @sibling_count_hint: A hint at the number of threads that are being woken up |
---|
2629 | | - * in this event. |
---|
2630 | 3020 | * |
---|
2631 | | - * If (@state & @p->state) @p->state = TASK_RUNNING. |
---|
| 3021 | + * Conceptually does: |
---|
| 3022 | + * |
---|
| 3023 | + * If (@state & @p->state) @p->state = TASK_RUNNING. |
---|
2632 | 3024 | * |
---|
2633 | 3025 | * If the task was not queued/runnable, also place it back on a runqueue. |
---|
2634 | 3026 | * |
---|
2635 | | - * Atomic against schedule() which would dequeue a task, also see |
---|
2636 | | - * set_current_state(). |
---|
| 3027 | + * This function is atomic against schedule() which would dequeue the task. |
---|
2637 | 3028 | * |
---|
2638 | | - * This function executes a full memory barrier before accessing the task |
---|
2639 | | - * state; see set_current_state(). |
---|
| 3029 | + * It issues a full memory barrier before accessing @p->state, see the comment |
---|
| 3030 | + * with set_current_state(). |
---|
| 3031 | + * |
---|
| 3032 | + * Uses p->pi_lock to serialize against concurrent wake-ups. |
---|
| 3033 | + * |
---|
| 3034 | + * Relies on p->pi_lock stabilizing: |
---|
| 3035 | + * - p->sched_class |
---|
| 3036 | + * - p->cpus_ptr |
---|
| 3037 | + * - p->sched_task_group |
---|
| 3038 | + * in order to do migration, see its use of select_task_rq()/set_task_cpu(). |
---|
| 3039 | + * |
---|
| 3040 | + * Tries really hard to only take one task_rq(p)->lock for performance. |
---|
| 3041 | + * Takes rq->lock in: |
---|
| 3042 | + * - ttwu_runnable() -- old rq, unavoidable, see comment there; |
---|
| 3043 | + * - ttwu_queue() -- new rq, for enqueue of the task; |
---|
| 3044 | + * - psi_ttwu_dequeue() -- much sadness :-( accounting will kill us. |
---|
| 3045 | + * |
---|
| 3046 | + * As a consequence we race really badly with just about everything. See the |
---|
| 3047 | + * many memory barriers and their comments for details. |
---|
2640 | 3048 | * |
---|
2641 | 3049 | * Return: %true if @p->state changes (an actual wakeup was done), |
---|
2642 | 3050 | * %false otherwise. |
---|
2643 | 3051 | */ |
---|
2644 | 3052 | static int |
---|
2645 | | -try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags, |
---|
2646 | | - int sibling_count_hint) |
---|
| 3053 | +try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) |
---|
2647 | 3054 | { |
---|
2648 | 3055 | unsigned long flags; |
---|
2649 | 3056 | int cpu, success = 0; |
---|
2650 | 3057 | |
---|
2651 | | - /* |
---|
2652 | | - * If we are going to wake up a thread waiting for CONDITION we |
---|
2653 | | - * need to ensure that CONDITION=1 done by the caller can not be |
---|
2654 | | - * reordered with p->state check below. This pairs with mb() in |
---|
2655 | | - * set_current_state() the waiting thread does. |
---|
2656 | | - */ |
---|
2657 | | - raw_spin_lock_irqsave(&p->pi_lock, flags); |
---|
2658 | | - smp_mb__after_spinlock(); |
---|
2659 | | - if (!(p->state & state)) { |
---|
| 3058 | + preempt_disable(); |
---|
| 3059 | + if (p == current) { |
---|
2660 | 3060 | /* |
---|
2661 | | - * The task might be running due to a spinlock sleeper |
---|
2662 | | - * wakeup. Check the saved state and set it to running |
---|
2663 | | - * if the wakeup condition is true. |
---|
| 3061 | + * We're waking current, this means 'p->on_rq' and 'task_cpu(p) |
---|
| 3062 | + * == smp_processor_id()'. Together this means we can special |
---|
| 3063 | + * case the whole 'p->on_rq && ttwu_runnable()' case below |
---|
| 3064 | + * without taking any locks. |
---|
| 3065 | + * |
---|
| 3066 | + * In particular: |
---|
| 3067 | + * - we rely on Program-Order guarantees for all the ordering, |
---|
| 3068 | + * - we're serialized against set_special_state() by virtue of |
---|
| 3069 | + * it disabling IRQs (this allows not taking ->pi_lock). |
---|
2664 | 3070 | */ |
---|
2665 | | - if (!(wake_flags & WF_LOCK_SLEEPER)) { |
---|
2666 | | - if (p->saved_state & state) { |
---|
2667 | | - p->saved_state = TASK_RUNNING; |
---|
2668 | | - success = 1; |
---|
2669 | | - } |
---|
2670 | | - } |
---|
| 3071 | + if (!(p->state & state)) |
---|
| 3072 | + goto out; |
---|
| 3073 | + |
---|
| 3074 | + success = 1; |
---|
| 3075 | + trace_sched_waking(p); |
---|
| 3076 | + p->state = TASK_RUNNING; |
---|
| 3077 | + trace_sched_wakeup(p); |
---|
2671 | 3078 | goto out; |
---|
2672 | 3079 | } |
---|
2673 | 3080 | |
---|
2674 | 3081 | /* |
---|
2675 | | - * If this is a regular wakeup, then we can unconditionally |
---|
2676 | | - * clear the saved state of a "lock sleeper". |
---|
| 3082 | + * If we are going to wake up a thread waiting for CONDITION we |
---|
| 3083 | + * need to ensure that CONDITION=1 done by the caller can not be |
---|
| 3084 | + * reordered with p->state check below. This pairs with smp_store_mb() |
---|
| 3085 | + * in set_current_state() that the waiting thread does. |
---|
2677 | 3086 | */ |
---|
2678 | | - if (!(wake_flags & WF_LOCK_SLEEPER)) |
---|
2679 | | - p->saved_state = TASK_RUNNING; |
---|
| 3087 | + raw_spin_lock_irqsave(&p->pi_lock, flags); |
---|
| 3088 | + smp_mb__after_spinlock(); |
---|
| 3089 | + if (!(p->state & state)) |
---|
| 3090 | + goto unlock; |
---|
| 3091 | + |
---|
| 3092 | +#ifdef CONFIG_FREEZER |
---|
| 3093 | + /* |
---|
| 3094 | + * If we're going to wake up a thread which may be frozen, then |
---|
| 3095 | + * we can only do so if we have an active CPU which is capable of |
---|
| 3096 | + * running it. This may not be the case when resuming from suspend, |
---|
| 3097 | + * as the secondary CPUs may not yet be back online. See __thaw_task() |
---|
| 3098 | + * for the actual wakeup. |
---|
| 3099 | + */ |
---|
| 3100 | + if (unlikely(frozen_or_skipped(p)) && |
---|
| 3101 | + !cpumask_intersects(cpu_active_mask, task_cpu_possible_mask(p))) |
---|
| 3102 | + goto unlock; |
---|
| 3103 | +#endif |
---|
2680 | 3104 | |
---|
2681 | 3105 | trace_sched_waking(p); |
---|
2682 | 3106 | |
---|
2683 | 3107 | /* We're going to change ->state: */ |
---|
2684 | 3108 | success = 1; |
---|
2685 | | - cpu = task_cpu(p); |
---|
2686 | 3109 | |
---|
2687 | 3110 | /* |
---|
2688 | 3111 | * Ensure we load p->on_rq _after_ p->state, otherwise it would |
---|
.. | .. |
---|
2703 | 3126 | * |
---|
2704 | 3127 | * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in |
---|
2705 | 3128 | * __schedule(). See the comment for smp_mb__after_spinlock(). |
---|
| 3129 | + * |
---|
| 3130 | + * A similar smb_rmb() lives in try_invoke_on_locked_down_task(). |
---|
2706 | 3131 | */ |
---|
2707 | 3132 | smp_rmb(); |
---|
2708 | | - if (p->on_rq && ttwu_remote(p, wake_flags)) |
---|
2709 | | - goto stat; |
---|
| 3133 | + if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags)) |
---|
| 3134 | + goto unlock; |
---|
| 3135 | + |
---|
| 3136 | + if (p->state & TASK_UNINTERRUPTIBLE) |
---|
| 3137 | + trace_sched_blocked_reason(p); |
---|
2710 | 3138 | |
---|
2711 | 3139 | #ifdef CONFIG_SMP |
---|
2712 | 3140 | /* |
---|
.. | .. |
---|
2727 | 3155 | * |
---|
2728 | 3156 | * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in |
---|
2729 | 3157 | * __schedule(). See the comment for smp_mb__after_spinlock(). |
---|
| 3158 | + * |
---|
| 3159 | + * Form a control-dep-acquire with p->on_rq == 0 above, to ensure |
---|
| 3160 | + * schedule()'s deactivate_task() has 'happened' and p will no longer |
---|
| 3161 | + * care about it's own p->state. See the comment in __schedule(). |
---|
2730 | 3162 | */ |
---|
2731 | | - smp_rmb(); |
---|
| 3163 | + smp_acquire__after_ctrl_dep(); |
---|
| 3164 | + |
---|
| 3165 | + /* |
---|
| 3166 | + * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq |
---|
| 3167 | + * == 0), which means we need to do an enqueue, change p->state to |
---|
| 3168 | + * TASK_WAKING such that we can unlock p->pi_lock before doing the |
---|
| 3169 | + * enqueue, such as ttwu_queue_wakelist(). |
---|
| 3170 | + */ |
---|
| 3171 | + p->state = TASK_WAKING; |
---|
| 3172 | + |
---|
| 3173 | + /* |
---|
| 3174 | + * If the owning (remote) CPU is still in the middle of schedule() with |
---|
| 3175 | + * this task as prev, considering queueing p on the remote CPUs wake_list |
---|
| 3176 | + * which potentially sends an IPI instead of spinning on p->on_cpu to |
---|
| 3177 | + * let the waker make forward progress. This is safe because IRQs are |
---|
| 3178 | + * disabled and the IPI will deliver after on_cpu is cleared. |
---|
| 3179 | + * |
---|
| 3180 | + * Ensure we load task_cpu(p) after p->on_cpu: |
---|
| 3181 | + * |
---|
| 3182 | + * set_task_cpu(p, cpu); |
---|
| 3183 | + * STORE p->cpu = @cpu |
---|
| 3184 | + * __schedule() (switch to task 'p') |
---|
| 3185 | + * LOCK rq->lock |
---|
| 3186 | + * smp_mb__after_spin_lock() smp_cond_load_acquire(&p->on_cpu) |
---|
| 3187 | + * STORE p->on_cpu = 1 LOAD p->cpu |
---|
| 3188 | + * |
---|
| 3189 | + * to ensure we observe the correct CPU on which the task is currently |
---|
| 3190 | + * scheduling. |
---|
| 3191 | + */ |
---|
| 3192 | + if (smp_load_acquire(&p->on_cpu) && |
---|
| 3193 | + ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU)) |
---|
| 3194 | + goto unlock; |
---|
2732 | 3195 | |
---|
2733 | 3196 | /* |
---|
2734 | 3197 | * If the owning (remote) CPU is still in the middle of schedule() with |
---|
.. | .. |
---|
2741 | 3204 | */ |
---|
2742 | 3205 | smp_cond_load_acquire(&p->on_cpu, !VAL); |
---|
2743 | 3206 | |
---|
2744 | | - p->sched_contributes_to_load = !!task_contributes_to_load(p); |
---|
2745 | | - p->state = TASK_WAKING; |
---|
| 3207 | + trace_android_rvh_try_to_wake_up(p); |
---|
2746 | 3208 | |
---|
2747 | | - if (p->in_iowait) { |
---|
2748 | | - delayacct_blkio_end(p); |
---|
2749 | | - atomic_dec(&task_rq(p)->nr_iowait); |
---|
2750 | | - } |
---|
2751 | | - |
---|
2752 | | - cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags, |
---|
2753 | | - sibling_count_hint); |
---|
| 3209 | + cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags); |
---|
2754 | 3210 | if (task_cpu(p) != cpu) { |
---|
| 3211 | + if (p->in_iowait) { |
---|
| 3212 | + delayacct_blkio_end(p); |
---|
| 3213 | + atomic_dec(&task_rq(p)->nr_iowait); |
---|
| 3214 | + } |
---|
| 3215 | + |
---|
2755 | 3216 | wake_flags |= WF_MIGRATED; |
---|
2756 | 3217 | psi_ttwu_dequeue(p); |
---|
2757 | 3218 | set_task_cpu(p, cpu); |
---|
2758 | 3219 | } |
---|
2759 | | - |
---|
2760 | | -#else /* CONFIG_SMP */ |
---|
2761 | | - |
---|
2762 | | - if (p->in_iowait) { |
---|
2763 | | - delayacct_blkio_end(p); |
---|
2764 | | - atomic_dec(&task_rq(p)->nr_iowait); |
---|
2765 | | - } |
---|
2766 | | - |
---|
| 3220 | +#else |
---|
| 3221 | + cpu = task_cpu(p); |
---|
2767 | 3222 | #endif /* CONFIG_SMP */ |
---|
2768 | 3223 | |
---|
2769 | 3224 | ttwu_queue(p, cpu, wake_flags); |
---|
2770 | | -stat: |
---|
2771 | | - ttwu_stat(p, cpu, wake_flags); |
---|
2772 | | -out: |
---|
| 3225 | +unlock: |
---|
2773 | 3226 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
---|
| 3227 | +out: |
---|
| 3228 | + if (success) { |
---|
| 3229 | + trace_android_rvh_try_to_wake_up_success(p); |
---|
| 3230 | + ttwu_stat(p, task_cpu(p), wake_flags); |
---|
| 3231 | + } |
---|
| 3232 | + preempt_enable(); |
---|
2774 | 3233 | |
---|
2775 | 3234 | return success; |
---|
| 3235 | +} |
---|
| 3236 | + |
---|
| 3237 | +/** |
---|
| 3238 | + * try_invoke_on_locked_down_task - Invoke a function on task in fixed state |
---|
| 3239 | + * @p: Process for which the function is to be invoked, can be @current. |
---|
| 3240 | + * @func: Function to invoke. |
---|
| 3241 | + * @arg: Argument to function. |
---|
| 3242 | + * |
---|
| 3243 | + * If the specified task can be quickly locked into a definite state |
---|
| 3244 | + * (either sleeping or on a given runqueue), arrange to keep it in that |
---|
| 3245 | + * state while invoking @func(@arg). This function can use ->on_rq and |
---|
| 3246 | + * task_curr() to work out what the state is, if required. Given that |
---|
| 3247 | + * @func can be invoked with a runqueue lock held, it had better be quite |
---|
| 3248 | + * lightweight. |
---|
| 3249 | + * |
---|
| 3250 | + * Returns: |
---|
| 3251 | + * @false if the task slipped out from under the locks. |
---|
| 3252 | + * @true if the task was locked onto a runqueue or is sleeping. |
---|
| 3253 | + * However, @func can override this by returning @false. |
---|
| 3254 | + */ |
---|
| 3255 | +bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg) |
---|
| 3256 | +{ |
---|
| 3257 | + struct rq_flags rf; |
---|
| 3258 | + bool ret = false; |
---|
| 3259 | + struct rq *rq; |
---|
| 3260 | + |
---|
| 3261 | + raw_spin_lock_irqsave(&p->pi_lock, rf.flags); |
---|
| 3262 | + if (p->on_rq) { |
---|
| 3263 | + rq = __task_rq_lock(p, &rf); |
---|
| 3264 | + if (task_rq(p) == rq) |
---|
| 3265 | + ret = func(p, arg); |
---|
| 3266 | + rq_unlock(rq, &rf); |
---|
| 3267 | + } else { |
---|
| 3268 | + switch (p->state) { |
---|
| 3269 | + case TASK_RUNNING: |
---|
| 3270 | + case TASK_WAKING: |
---|
| 3271 | + break; |
---|
| 3272 | + default: |
---|
| 3273 | + smp_rmb(); // See smp_rmb() comment in try_to_wake_up(). |
---|
| 3274 | + if (!p->on_rq) |
---|
| 3275 | + ret = func(p, arg); |
---|
| 3276 | + } |
---|
| 3277 | + } |
---|
| 3278 | + raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags); |
---|
| 3279 | + return ret; |
---|
2776 | 3280 | } |
---|
2777 | 3281 | |
---|
2778 | 3282 | /** |
---|
.. | .. |
---|
2788 | 3292 | */ |
---|
2789 | 3293 | int wake_up_process(struct task_struct *p) |
---|
2790 | 3294 | { |
---|
2791 | | - return try_to_wake_up(p, TASK_NORMAL, 0, 1); |
---|
| 3295 | + return try_to_wake_up(p, TASK_NORMAL, 0); |
---|
2792 | 3296 | } |
---|
2793 | 3297 | EXPORT_SYMBOL(wake_up_process); |
---|
2794 | 3298 | |
---|
2795 | | -/** |
---|
2796 | | - * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock" |
---|
2797 | | - * @p: The process to be woken up. |
---|
2798 | | - * |
---|
2799 | | - * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate |
---|
2800 | | - * the nature of the wakeup. |
---|
2801 | | - */ |
---|
2802 | | -int wake_up_lock_sleeper(struct task_struct *p) |
---|
2803 | | -{ |
---|
2804 | | - return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER, 1); |
---|
2805 | | -} |
---|
2806 | | - |
---|
2807 | 3299 | int wake_up_state(struct task_struct *p, unsigned int state) |
---|
2808 | 3300 | { |
---|
2809 | | - return try_to_wake_up(p, state, 0, 1); |
---|
| 3301 | + return try_to_wake_up(p, state, 0); |
---|
2810 | 3302 | } |
---|
2811 | 3303 | |
---|
2812 | 3304 | /* |
---|
.. | .. |
---|
2831 | 3323 | p->se.cfs_rq = NULL; |
---|
2832 | 3324 | #endif |
---|
2833 | 3325 | |
---|
| 3326 | + trace_android_rvh_sched_fork_init(p); |
---|
| 3327 | + |
---|
2834 | 3328 | #ifdef CONFIG_SCHEDSTATS |
---|
2835 | 3329 | /* Even if schedstat is disabled, there should not be garbage */ |
---|
2836 | 3330 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); |
---|
.. | .. |
---|
2851 | 3345 | INIT_HLIST_HEAD(&p->preempt_notifiers); |
---|
2852 | 3346 | #endif |
---|
2853 | 3347 | |
---|
| 3348 | +#ifdef CONFIG_COMPACTION |
---|
| 3349 | + p->capture_control = NULL; |
---|
| 3350 | +#endif |
---|
2854 | 3351 | init_numa_balancing(clone_flags, p); |
---|
| 3352 | +#ifdef CONFIG_SMP |
---|
| 3353 | + p->wake_entry.u_flags = CSD_TYPE_TTWU; |
---|
| 3354 | +#endif |
---|
2855 | 3355 | } |
---|
2856 | 3356 | |
---|
2857 | 3357 | DEFINE_STATIC_KEY_FALSE(sched_numa_balancing); |
---|
.. | .. |
---|
2868 | 3368 | |
---|
2869 | 3369 | #ifdef CONFIG_PROC_SYSCTL |
---|
2870 | 3370 | int sysctl_numa_balancing(struct ctl_table *table, int write, |
---|
2871 | | - void __user *buffer, size_t *lenp, loff_t *ppos) |
---|
| 3371 | + void *buffer, size_t *lenp, loff_t *ppos) |
---|
2872 | 3372 | { |
---|
2873 | 3373 | struct ctl_table t; |
---|
2874 | 3374 | int err; |
---|
.. | .. |
---|
2942 | 3442 | } |
---|
2943 | 3443 | |
---|
2944 | 3444 | #ifdef CONFIG_PROC_SYSCTL |
---|
2945 | | -int sysctl_schedstats(struct ctl_table *table, int write, |
---|
2946 | | - void __user *buffer, size_t *lenp, loff_t *ppos) |
---|
| 3445 | +int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, |
---|
| 3446 | + size_t *lenp, loff_t *ppos) |
---|
2947 | 3447 | { |
---|
2948 | 3448 | struct ctl_table t; |
---|
2949 | 3449 | int err; |
---|
.. | .. |
---|
2971 | 3471 | */ |
---|
2972 | 3472 | int sched_fork(unsigned long clone_flags, struct task_struct *p) |
---|
2973 | 3473 | { |
---|
2974 | | - unsigned long flags; |
---|
| 3474 | + trace_android_rvh_sched_fork(p); |
---|
2975 | 3475 | |
---|
2976 | 3476 | __sched_fork(clone_flags, p); |
---|
2977 | 3477 | /* |
---|
.. | .. |
---|
2985 | 3485 | * Make sure we do not leak PI boosting priority to the child. |
---|
2986 | 3486 | */ |
---|
2987 | 3487 | p->prio = current->normal_prio; |
---|
| 3488 | + trace_android_rvh_prepare_prio_fork(p); |
---|
2988 | 3489 | |
---|
2989 | 3490 | uclamp_fork(p); |
---|
2990 | 3491 | |
---|
.. | .. |
---|
2999 | 3500 | } else if (PRIO_TO_NICE(p->static_prio) < 0) |
---|
3000 | 3501 | p->static_prio = NICE_TO_PRIO(0); |
---|
3001 | 3502 | |
---|
3002 | | - p->prio = p->normal_prio = __normal_prio(p); |
---|
3003 | | - set_load_weight(p, false); |
---|
| 3503 | + p->prio = p->normal_prio = p->static_prio; |
---|
| 3504 | + set_load_weight(p); |
---|
3004 | 3505 | |
---|
3005 | 3506 | /* |
---|
3006 | 3507 | * We don't need the reset flag anymore after the fork. It has |
---|
.. | .. |
---|
3017 | 3518 | p->sched_class = &fair_sched_class; |
---|
3018 | 3519 | |
---|
3019 | 3520 | init_entity_runnable_average(&p->se); |
---|
| 3521 | + trace_android_rvh_finish_prio_fork(p); |
---|
3020 | 3522 | |
---|
3021 | | - /* |
---|
3022 | | - * The child is not yet in the pid-hash so no cgroup attach races, |
---|
3023 | | - * and the cgroup is pinned to this child due to cgroup_fork() |
---|
3024 | | - * is ran before sched_fork(). |
---|
3025 | | - * |
---|
3026 | | - * Silence PROVE_RCU. |
---|
3027 | | - */ |
---|
3028 | | - raw_spin_lock_irqsave(&p->pi_lock, flags); |
---|
3029 | | - rseq_migrate(p); |
---|
3030 | | - /* |
---|
3031 | | - * We're setting the CPU for the first time, we don't migrate, |
---|
3032 | | - * so use __set_task_cpu(). |
---|
3033 | | - */ |
---|
3034 | | - __set_task_cpu(p, smp_processor_id()); |
---|
3035 | | - if (p->sched_class->task_fork) |
---|
3036 | | - p->sched_class->task_fork(p); |
---|
3037 | | - raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
---|
3038 | 3523 | |
---|
3039 | 3524 | #ifdef CONFIG_SCHED_INFO |
---|
3040 | 3525 | if (likely(sched_info_on())) |
---|
.. | .. |
---|
3044 | 3529 | p->on_cpu = 0; |
---|
3045 | 3530 | #endif |
---|
3046 | 3531 | init_task_preempt_count(p); |
---|
3047 | | -#ifdef CONFIG_HAVE_PREEMPT_LAZY |
---|
3048 | | - task_thread_info(p)->preempt_lazy_count = 0; |
---|
3049 | | -#endif |
---|
3050 | 3532 | #ifdef CONFIG_SMP |
---|
3051 | 3533 | plist_node_init(&p->pushable_tasks, MAX_PRIO); |
---|
3052 | 3534 | RB_CLEAR_NODE(&p->pushable_dl_tasks); |
---|
3053 | 3535 | #endif |
---|
3054 | 3536 | return 0; |
---|
| 3537 | +} |
---|
| 3538 | + |
---|
| 3539 | +void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs) |
---|
| 3540 | +{ |
---|
| 3541 | + unsigned long flags; |
---|
| 3542 | + |
---|
| 3543 | + /* |
---|
| 3544 | + * Because we're not yet on the pid-hash, p->pi_lock isn't strictly |
---|
| 3545 | + * required yet, but lockdep gets upset if rules are violated. |
---|
| 3546 | + */ |
---|
| 3547 | + raw_spin_lock_irqsave(&p->pi_lock, flags); |
---|
| 3548 | +#ifdef CONFIG_CGROUP_SCHED |
---|
| 3549 | + if (1) { |
---|
| 3550 | + struct task_group *tg; |
---|
| 3551 | + |
---|
| 3552 | + tg = container_of(kargs->cset->subsys[cpu_cgrp_id], |
---|
| 3553 | + struct task_group, css); |
---|
| 3554 | + tg = autogroup_task_group(p, tg); |
---|
| 3555 | + p->sched_task_group = tg; |
---|
| 3556 | + } |
---|
| 3557 | +#endif |
---|
| 3558 | + rseq_migrate(p); |
---|
| 3559 | + /* |
---|
| 3560 | + * We're setting the CPU for the first time, we don't migrate, |
---|
| 3561 | + * so use __set_task_cpu(). |
---|
| 3562 | + */ |
---|
| 3563 | + __set_task_cpu(p, smp_processor_id()); |
---|
| 3564 | + if (p->sched_class->task_fork) |
---|
| 3565 | + p->sched_class->task_fork(p); |
---|
| 3566 | + raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
---|
| 3567 | +} |
---|
| 3568 | + |
---|
| 3569 | +void sched_post_fork(struct task_struct *p) |
---|
| 3570 | +{ |
---|
| 3571 | + uclamp_post_fork(p); |
---|
3055 | 3572 | } |
---|
3056 | 3573 | |
---|
3057 | 3574 | unsigned long to_ratio(u64 period, u64 runtime) |
---|
.. | .. |
---|
3082 | 3599 | struct rq_flags rf; |
---|
3083 | 3600 | struct rq *rq; |
---|
3084 | 3601 | |
---|
| 3602 | + trace_android_rvh_wake_up_new_task(p); |
---|
| 3603 | + |
---|
3085 | 3604 | raw_spin_lock_irqsave(&p->pi_lock, rf.flags); |
---|
3086 | 3605 | p->state = TASK_RUNNING; |
---|
3087 | 3606 | #ifdef CONFIG_SMP |
---|
.. | .. |
---|
3095 | 3614 | */ |
---|
3096 | 3615 | p->recent_used_cpu = task_cpu(p); |
---|
3097 | 3616 | rseq_migrate(p); |
---|
3098 | | - __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0, 1)); |
---|
| 3617 | + __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); |
---|
3099 | 3618 | #endif |
---|
3100 | 3619 | rq = __task_rq_lock(p, &rf); |
---|
3101 | 3620 | update_rq_clock(rq); |
---|
3102 | | - post_init_entity_util_avg(&p->se); |
---|
| 3621 | + post_init_entity_util_avg(p); |
---|
| 3622 | + trace_android_rvh_new_task_stats(p); |
---|
3103 | 3623 | |
---|
3104 | 3624 | activate_task(rq, p, ENQUEUE_NOCLOCK); |
---|
3105 | | - p->on_rq = TASK_ON_RQ_QUEUED; |
---|
3106 | 3625 | trace_sched_wakeup_new(p); |
---|
3107 | 3626 | check_preempt_curr(rq, p, WF_FORK); |
---|
3108 | 3627 | #ifdef CONFIG_SMP |
---|
.. | .. |
---|
3212 | 3731 | /* |
---|
3213 | 3732 | * Claim the task as running, we do this before switching to it |
---|
3214 | 3733 | * such that any running task will have this set. |
---|
| 3734 | + * |
---|
| 3735 | + * See the ttwu() WF_ON_CPU case and its ordering comment. |
---|
3215 | 3736 | */ |
---|
3216 | | - next->on_cpu = 1; |
---|
| 3737 | + WRITE_ONCE(next->on_cpu, 1); |
---|
3217 | 3738 | #endif |
---|
3218 | 3739 | } |
---|
3219 | 3740 | |
---|
.. | .. |
---|
3221 | 3742 | { |
---|
3222 | 3743 | #ifdef CONFIG_SMP |
---|
3223 | 3744 | /* |
---|
3224 | | - * After ->on_cpu is cleared, the task can be moved to a different CPU. |
---|
3225 | | - * We must ensure this doesn't happen until the switch is completely |
---|
| 3745 | + * This must be the very last reference to @prev from this CPU. After |
---|
| 3746 | + * p->on_cpu is cleared, the task can be moved to a different CPU. We |
---|
| 3747 | + * must ensure this doesn't happen until the switch is completely |
---|
3226 | 3748 | * finished. |
---|
3227 | 3749 | * |
---|
3228 | 3750 | * In particular, the load of prev->state in finish_task_switch() must |
---|
.. | .. |
---|
3244 | 3766 | * do an early lockdep release here: |
---|
3245 | 3767 | */ |
---|
3246 | 3768 | rq_unpin_lock(rq, rf); |
---|
3247 | | - spin_release(&rq->lock.dep_map, 1, _THIS_IP_); |
---|
| 3769 | + spin_release(&rq->lock.dep_map, _THIS_IP_); |
---|
3248 | 3770 | #ifdef CONFIG_DEBUG_SPINLOCK |
---|
3249 | 3771 | /* this is a valid case when another task releases the spinlock */ |
---|
3250 | 3772 | rq->lock.owner = next; |
---|
.. | .. |
---|
3376 | 3898 | * provided by mmdrop(), |
---|
3377 | 3899 | * - a sync_core for SYNC_CORE. |
---|
3378 | 3900 | */ |
---|
3379 | | - /* |
---|
3380 | | - * We use mmdrop_delayed() here so we don't have to do the |
---|
3381 | | - * full __mmdrop() when we are the last user. |
---|
3382 | | - */ |
---|
3383 | 3901 | if (mm) { |
---|
3384 | 3902 | membarrier_mm_sync_core_before_usermode(mm); |
---|
3385 | | - mmdrop_delayed(mm); |
---|
| 3903 | + mmdrop(mm); |
---|
3386 | 3904 | } |
---|
3387 | 3905 | if (unlikely(prev_state == TASK_DEAD)) { |
---|
3388 | 3906 | if (prev->sched_class->task_dead) |
---|
3389 | 3907 | prev->sched_class->task_dead(prev); |
---|
3390 | 3908 | |
---|
3391 | | - put_task_struct(prev); |
---|
| 3909 | + /* |
---|
| 3910 | + * Remove function-return probe instances associated with this |
---|
| 3911 | + * task and put them back on the free list. |
---|
| 3912 | + */ |
---|
| 3913 | + kprobe_flush_task(prev); |
---|
| 3914 | + trace_android_rvh_flush_task(prev); |
---|
| 3915 | + |
---|
| 3916 | + /* Task is done with its stack. */ |
---|
| 3917 | + put_task_stack(prev); |
---|
| 3918 | + |
---|
| 3919 | + put_task_struct_rcu_user(prev); |
---|
3392 | 3920 | } |
---|
3393 | 3921 | |
---|
3394 | 3922 | tick_nohz_task_switch(); |
---|
.. | .. |
---|
3467 | 3995 | context_switch(struct rq *rq, struct task_struct *prev, |
---|
3468 | 3996 | struct task_struct *next, struct rq_flags *rf) |
---|
3469 | 3997 | { |
---|
3470 | | - struct mm_struct *mm, *oldmm; |
---|
3471 | | - |
---|
3472 | 3998 | prepare_task_switch(rq, prev, next); |
---|
3473 | 3999 | |
---|
3474 | | - mm = next->mm; |
---|
3475 | | - oldmm = prev->active_mm; |
---|
3476 | 4000 | /* |
---|
3477 | 4001 | * For paravirt, this is coupled with an exit in switch_to to |
---|
3478 | 4002 | * combine the page table reload and the switch backend into |
---|
.. | .. |
---|
3481 | 4005 | arch_start_context_switch(prev); |
---|
3482 | 4006 | |
---|
3483 | 4007 | /* |
---|
3484 | | - * If mm is non-NULL, we pass through switch_mm(). If mm is |
---|
3485 | | - * NULL, we will pass through mmdrop() in finish_task_switch(). |
---|
3486 | | - * Both of these contain the full memory barrier required by |
---|
3487 | | - * membarrier after storing to rq->curr, before returning to |
---|
3488 | | - * user-space. |
---|
| 4008 | + * kernel -> kernel lazy + transfer active |
---|
| 4009 | + * user -> kernel lazy + mmgrab() active |
---|
| 4010 | + * |
---|
| 4011 | + * kernel -> user switch + mmdrop() active |
---|
| 4012 | + * user -> user switch |
---|
3489 | 4013 | */ |
---|
3490 | | - if (!mm) { |
---|
3491 | | - next->active_mm = oldmm; |
---|
3492 | | - mmgrab(oldmm); |
---|
3493 | | - enter_lazy_tlb(oldmm, next); |
---|
3494 | | - } else |
---|
3495 | | - switch_mm_irqs_off(oldmm, mm, next); |
---|
| 4014 | + if (!next->mm) { // to kernel |
---|
| 4015 | + enter_lazy_tlb(prev->active_mm, next); |
---|
3496 | 4016 | |
---|
3497 | | - if (!prev->mm) { |
---|
3498 | | - prev->active_mm = NULL; |
---|
3499 | | - rq->prev_mm = oldmm; |
---|
| 4017 | + next->active_mm = prev->active_mm; |
---|
| 4018 | + if (prev->mm) // from user |
---|
| 4019 | + mmgrab(prev->active_mm); |
---|
| 4020 | + else |
---|
| 4021 | + prev->active_mm = NULL; |
---|
| 4022 | + } else { // to user |
---|
| 4023 | + membarrier_switch_mm(rq, prev->active_mm, next->mm); |
---|
| 4024 | + /* |
---|
| 4025 | + * sys_membarrier() requires an smp_mb() between setting |
---|
| 4026 | + * rq->curr / membarrier_switch_mm() and returning to userspace. |
---|
| 4027 | + * |
---|
| 4028 | + * The below provides this either through switch_mm(), or in |
---|
| 4029 | + * case 'prev->active_mm == next->mm' through |
---|
| 4030 | + * finish_task_switch()'s mmdrop(). |
---|
| 4031 | + */ |
---|
| 4032 | + switch_mm_irqs_off(prev->active_mm, next->mm, next); |
---|
| 4033 | + |
---|
| 4034 | + if (!prev->mm) { // from kernel |
---|
| 4035 | + /* will mmdrop() in finish_task_switch(). */ |
---|
| 4036 | + rq->prev_mm = prev->active_mm; |
---|
| 4037 | + prev->active_mm = NULL; |
---|
| 4038 | + } |
---|
3500 | 4039 | } |
---|
3501 | 4040 | |
---|
3502 | 4041 | rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); |
---|
.. | .. |
---|
3533 | 4072 | * preemption, thus the result might have a time-of-check-to-time-of-use |
---|
3534 | 4073 | * race. The caller is responsible to use it correctly, for example: |
---|
3535 | 4074 | * |
---|
3536 | | - * - from a non-preemptable section (of course) |
---|
| 4075 | + * - from a non-preemptible section (of course) |
---|
3537 | 4076 | * |
---|
3538 | 4077 | * - from a thread that is bound to a single CPU |
---|
3539 | 4078 | * |
---|
.. | .. |
---|
3554 | 4093 | sum += cpu_rq(i)->nr_switches; |
---|
3555 | 4094 | |
---|
3556 | 4095 | return sum; |
---|
| 4096 | +} |
---|
| 4097 | + |
---|
| 4098 | +/* |
---|
| 4099 | + * Consumers of these two interfaces, like for example the cpuidle menu |
---|
| 4100 | + * governor, are using nonsensical data. Preferring shallow idle state selection |
---|
| 4101 | + * for a CPU that has IO-wait which might not even end up running the task when |
---|
| 4102 | + * it does become runnable. |
---|
| 4103 | + */ |
---|
| 4104 | + |
---|
| 4105 | +unsigned long nr_iowait_cpu(int cpu) |
---|
| 4106 | +{ |
---|
| 4107 | + return atomic_read(&cpu_rq(cpu)->nr_iowait); |
---|
3557 | 4108 | } |
---|
3558 | 4109 | |
---|
3559 | 4110 | /* |
---|
.. | .. |
---|
3591 | 4142 | unsigned long i, sum = 0; |
---|
3592 | 4143 | |
---|
3593 | 4144 | for_each_possible_cpu(i) |
---|
3594 | | - sum += atomic_read(&cpu_rq(i)->nr_iowait); |
---|
| 4145 | + sum += nr_iowait_cpu(i); |
---|
3595 | 4146 | |
---|
3596 | 4147 | return sum; |
---|
3597 | | -} |
---|
3598 | | - |
---|
3599 | | -/* |
---|
3600 | | - * Consumers of these two interfaces, like for example the cpufreq menu |
---|
3601 | | - * governor are using nonsensical data. Boosting frequency for a CPU that has |
---|
3602 | | - * IO-wait which might not even end up running the task when it does become |
---|
3603 | | - * runnable. |
---|
3604 | | - */ |
---|
3605 | | - |
---|
3606 | | -unsigned long nr_iowait_cpu(int cpu) |
---|
3607 | | -{ |
---|
3608 | | - struct rq *this = cpu_rq(cpu); |
---|
3609 | | - return atomic_read(&this->nr_iowait); |
---|
3610 | | -} |
---|
3611 | | - |
---|
3612 | | -void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) |
---|
3613 | | -{ |
---|
3614 | | - struct rq *rq = this_rq(); |
---|
3615 | | - *nr_waiters = atomic_read(&rq->nr_iowait); |
---|
3616 | | - *load = rq->load.weight; |
---|
3617 | 4148 | } |
---|
3618 | 4149 | |
---|
3619 | 4150 | #ifdef CONFIG_SMP |
---|
.. | .. |
---|
3627 | 4158 | struct task_struct *p = current; |
---|
3628 | 4159 | unsigned long flags; |
---|
3629 | 4160 | int dest_cpu; |
---|
| 4161 | + bool cond = false; |
---|
| 4162 | + |
---|
| 4163 | + trace_android_rvh_sched_exec(&cond); |
---|
| 4164 | + if (cond) |
---|
| 4165 | + return; |
---|
3630 | 4166 | |
---|
3631 | 4167 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
---|
3632 | | - dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0, 1); |
---|
| 4168 | + dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0); |
---|
3633 | 4169 | if (dest_cpu == smp_processor_id()) |
---|
3634 | 4170 | goto unlock; |
---|
3635 | 4171 | |
---|
.. | .. |
---|
3712 | 4248 | |
---|
3713 | 4249 | return ns; |
---|
3714 | 4250 | } |
---|
| 4251 | +EXPORT_SYMBOL_GPL(task_sched_runtime); |
---|
3715 | 4252 | |
---|
3716 | 4253 | /* |
---|
3717 | 4254 | * This function gets called by the timer code, with HZ frequency. |
---|
.. | .. |
---|
3723 | 4260 | struct rq *rq = cpu_rq(cpu); |
---|
3724 | 4261 | struct task_struct *curr = rq->curr; |
---|
3725 | 4262 | struct rq_flags rf; |
---|
| 4263 | + unsigned long thermal_pressure; |
---|
3726 | 4264 | |
---|
| 4265 | + arch_scale_freq_tick(); |
---|
3727 | 4266 | sched_clock_tick(); |
---|
3728 | 4267 | |
---|
3729 | 4268 | rq_lock(rq, &rf); |
---|
3730 | 4269 | |
---|
| 4270 | + trace_android_rvh_tick_entry(rq); |
---|
3731 | 4271 | update_rq_clock(rq); |
---|
| 4272 | + thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq)); |
---|
| 4273 | + update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure); |
---|
3732 | 4274 | curr->sched_class->task_tick(rq, curr, 0); |
---|
3733 | | - cpu_load_update_active(rq); |
---|
3734 | 4275 | calc_global_load_tick(rq); |
---|
3735 | 4276 | psi_task_tick(rq); |
---|
3736 | 4277 | |
---|
.. | .. |
---|
3742 | 4283 | rq->idle_balance = idle_cpu(cpu); |
---|
3743 | 4284 | trigger_load_balance(rq); |
---|
3744 | 4285 | #endif |
---|
| 4286 | + |
---|
| 4287 | + trace_android_vh_scheduler_tick(rq); |
---|
3745 | 4288 | } |
---|
3746 | 4289 | |
---|
3747 | 4290 | #ifdef CONFIG_NO_HZ_FULL |
---|
.. | .. |
---|
3799 | 4342 | * statistics and checks timeslices in a time-independent way, regardless |
---|
3800 | 4343 | * of when exactly it is running. |
---|
3801 | 4344 | */ |
---|
3802 | | - if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu)) |
---|
| 4345 | + if (!tick_nohz_tick_stopped_cpu(cpu)) |
---|
3803 | 4346 | goto out_requeue; |
---|
3804 | 4347 | |
---|
3805 | 4348 | rq_lock_irq(rq, &rf); |
---|
3806 | 4349 | curr = rq->curr; |
---|
3807 | | - if (is_idle_task(curr) || cpu_is_offline(cpu)) |
---|
| 4350 | + if (cpu_is_offline(cpu)) |
---|
3808 | 4351 | goto out_unlock; |
---|
3809 | 4352 | |
---|
3810 | 4353 | update_rq_clock(rq); |
---|
3811 | | - delta = rq_clock_task(rq) - curr->se.exec_start; |
---|
3812 | 4354 | |
---|
3813 | | - /* |
---|
3814 | | - * Make sure the next tick runs within a reasonable |
---|
3815 | | - * amount of time. |
---|
3816 | | - */ |
---|
3817 | | - WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); |
---|
| 4355 | + if (!is_idle_task(curr)) { |
---|
| 4356 | + /* |
---|
| 4357 | + * Make sure the next tick runs within a reasonable |
---|
| 4358 | + * amount of time. |
---|
| 4359 | + */ |
---|
| 4360 | + delta = rq_clock_task(rq) - curr->se.exec_start; |
---|
| 4361 | + WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); |
---|
| 4362 | + } |
---|
3818 | 4363 | curr->sched_class->task_tick(rq, curr, 0); |
---|
3819 | 4364 | |
---|
| 4365 | + calc_load_nohz_remote(rq); |
---|
3820 | 4366 | out_unlock: |
---|
3821 | 4367 | rq_unlock_irq(rq, &rf); |
---|
3822 | | - |
---|
3823 | 4368 | out_requeue: |
---|
| 4369 | + |
---|
3824 | 4370 | /* |
---|
3825 | 4371 | * Run the remote tick once per second (1Hz). This arbitrary |
---|
3826 | 4372 | * frequency is large enough to avoid overload but short enough |
---|
.. | .. |
---|
3884 | 4430 | static inline void sched_tick_stop(int cpu) { } |
---|
3885 | 4431 | #endif |
---|
3886 | 4432 | |
---|
3887 | | -#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ |
---|
| 4433 | +#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \ |
---|
3888 | 4434 | defined(CONFIG_TRACE_PREEMPT_TOGGLE)) |
---|
3889 | 4435 | /* |
---|
3890 | 4436 | * If the value passed in is equal to the current preempt count |
---|
.. | .. |
---|
3990 | 4536 | if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) |
---|
3991 | 4537 | && in_atomic_preempt_off()) { |
---|
3992 | 4538 | pr_err("Preemption disabled at:"); |
---|
3993 | | - print_ip_sym(preempt_disable_ip); |
---|
3994 | | - pr_cont("\n"); |
---|
| 4539 | + print_ip_sym(KERN_ERR, preempt_disable_ip); |
---|
3995 | 4540 | } |
---|
3996 | | - if (panic_on_warn) |
---|
3997 | | - panic("scheduling while atomic\n"); |
---|
| 4541 | + check_panic_on_warn("scheduling while atomic"); |
---|
| 4542 | + |
---|
| 4543 | + trace_android_rvh_schedule_bug(prev); |
---|
3998 | 4544 | |
---|
3999 | 4545 | dump_stack(); |
---|
4000 | 4546 | add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
---|
.. | .. |
---|
4003 | 4549 | /* |
---|
4004 | 4550 | * Various schedule()-time debugging checks and statistics: |
---|
4005 | 4551 | */ |
---|
4006 | | -static inline void schedule_debug(struct task_struct *prev) |
---|
| 4552 | +static inline void schedule_debug(struct task_struct *prev, bool preempt) |
---|
4007 | 4553 | { |
---|
4008 | 4554 | #ifdef CONFIG_SCHED_STACK_END_CHECK |
---|
4009 | 4555 | if (task_stack_end_corrupted(prev)) |
---|
4010 | 4556 | panic("corrupted stack end detected inside scheduler\n"); |
---|
| 4557 | + |
---|
| 4558 | + if (task_scs_end_corrupted(prev)) |
---|
| 4559 | + panic("corrupted shadow stack detected inside scheduler\n"); |
---|
| 4560 | +#endif |
---|
| 4561 | + |
---|
| 4562 | +#ifdef CONFIG_DEBUG_ATOMIC_SLEEP |
---|
| 4563 | + if (!preempt && prev->state && prev->non_block_count) { |
---|
| 4564 | + printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n", |
---|
| 4565 | + prev->comm, prev->pid, prev->non_block_count); |
---|
| 4566 | + dump_stack(); |
---|
| 4567 | + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
---|
| 4568 | + } |
---|
4011 | 4569 | #endif |
---|
4012 | 4570 | |
---|
4013 | 4571 | if (unlikely(in_atomic_preempt_off())) { |
---|
.. | .. |
---|
4019 | 4577 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); |
---|
4020 | 4578 | |
---|
4021 | 4579 | schedstat_inc(this_rq()->sched_count); |
---|
| 4580 | +} |
---|
| 4581 | + |
---|
| 4582 | +static void put_prev_task_balance(struct rq *rq, struct task_struct *prev, |
---|
| 4583 | + struct rq_flags *rf) |
---|
| 4584 | +{ |
---|
| 4585 | +#ifdef CONFIG_SMP |
---|
| 4586 | + const struct sched_class *class; |
---|
| 4587 | + /* |
---|
| 4588 | + * We must do the balancing pass before put_prev_task(), such |
---|
| 4589 | + * that when we release the rq->lock the task is in the same |
---|
| 4590 | + * state as before we took rq->lock. |
---|
| 4591 | + * |
---|
| 4592 | + * We can terminate the balance pass as soon as we know there is |
---|
| 4593 | + * a runnable task of @class priority or higher. |
---|
| 4594 | + */ |
---|
| 4595 | + for_class_range(class, prev->sched_class, &idle_sched_class) { |
---|
| 4596 | + if (class->balance(rq, prev, rf)) |
---|
| 4597 | + break; |
---|
| 4598 | + } |
---|
| 4599 | +#endif |
---|
| 4600 | + |
---|
| 4601 | + put_prev_task(rq, prev); |
---|
4022 | 4602 | } |
---|
4023 | 4603 | |
---|
4024 | 4604 | /* |
---|
.. | .. |
---|
4036 | 4616 | * higher scheduling class, because otherwise those loose the |
---|
4037 | 4617 | * opportunity to pull in more work from other CPUs. |
---|
4038 | 4618 | */ |
---|
4039 | | - if (likely((prev->sched_class == &idle_sched_class || |
---|
4040 | | - prev->sched_class == &fair_sched_class) && |
---|
| 4619 | + if (likely(prev->sched_class <= &fair_sched_class && |
---|
4041 | 4620 | rq->nr_running == rq->cfs.h_nr_running)) { |
---|
4042 | 4621 | |
---|
4043 | | - p = fair_sched_class.pick_next_task(rq, prev, rf); |
---|
| 4622 | + p = pick_next_task_fair(rq, prev, rf); |
---|
4044 | 4623 | if (unlikely(p == RETRY_TASK)) |
---|
4045 | | - goto again; |
---|
| 4624 | + goto restart; |
---|
4046 | 4625 | |
---|
4047 | 4626 | /* Assumes fair_sched_class->next == idle_sched_class */ |
---|
4048 | | - if (unlikely(!p)) |
---|
4049 | | - p = idle_sched_class.pick_next_task(rq, prev, rf); |
---|
| 4627 | + if (!p) { |
---|
| 4628 | + put_prev_task(rq, prev); |
---|
| 4629 | + p = pick_next_task_idle(rq); |
---|
| 4630 | + } |
---|
4050 | 4631 | |
---|
4051 | 4632 | return p; |
---|
4052 | 4633 | } |
---|
4053 | 4634 | |
---|
4054 | | -again: |
---|
| 4635 | +restart: |
---|
| 4636 | + put_prev_task_balance(rq, prev, rf); |
---|
| 4637 | + |
---|
4055 | 4638 | for_each_class(class) { |
---|
4056 | | - p = class->pick_next_task(rq, prev, rf); |
---|
4057 | | - if (p) { |
---|
4058 | | - if (unlikely(p == RETRY_TASK)) |
---|
4059 | | - goto again; |
---|
| 4639 | + p = class->pick_next_task(rq); |
---|
| 4640 | + if (p) |
---|
4060 | 4641 | return p; |
---|
4061 | | - } |
---|
4062 | 4642 | } |
---|
4063 | 4643 | |
---|
4064 | 4644 | /* The idle class should always have a runnable task: */ |
---|
4065 | 4645 | BUG(); |
---|
4066 | 4646 | } |
---|
4067 | | - |
---|
4068 | | -static void migrate_disabled_sched(struct task_struct *p); |
---|
4069 | 4647 | |
---|
4070 | 4648 | /* |
---|
4071 | 4649 | * __schedule() is the main scheduler function. |
---|
.. | .. |
---|
4087 | 4665 | * task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets |
---|
4088 | 4666 | * called on the nearest possible occasion: |
---|
4089 | 4667 | * |
---|
4090 | | - * - If the kernel is preemptible (CONFIG_PREEMPT=y): |
---|
| 4668 | + * - If the kernel is preemptible (CONFIG_PREEMPTION=y): |
---|
4091 | 4669 | * |
---|
4092 | 4670 | * - in syscall or exception context, at the next outmost |
---|
4093 | 4671 | * preempt_enable(). (this might be as soon as the wake_up()'s |
---|
.. | .. |
---|
4096 | 4674 | * - in IRQ context, return from interrupt-handler to |
---|
4097 | 4675 | * preemptible context |
---|
4098 | 4676 | * |
---|
4099 | | - * - If the kernel is not preemptible (CONFIG_PREEMPT is not set) |
---|
| 4677 | + * - If the kernel is not preemptible (CONFIG_PREEMPTION is not set) |
---|
4100 | 4678 | * then at the next: |
---|
4101 | 4679 | * |
---|
4102 | 4680 | * - cond_resched() call |
---|
.. | .. |
---|
4110 | 4688 | { |
---|
4111 | 4689 | struct task_struct *prev, *next; |
---|
4112 | 4690 | unsigned long *switch_count; |
---|
| 4691 | + unsigned long prev_state; |
---|
4113 | 4692 | struct rq_flags rf; |
---|
4114 | 4693 | struct rq *rq; |
---|
4115 | 4694 | int cpu; |
---|
.. | .. |
---|
4118 | 4697 | rq = cpu_rq(cpu); |
---|
4119 | 4698 | prev = rq->curr; |
---|
4120 | 4699 | |
---|
4121 | | - schedule_debug(prev); |
---|
| 4700 | + schedule_debug(prev, preempt); |
---|
4122 | 4701 | |
---|
4123 | 4702 | if (sched_feat(HRTICK)) |
---|
4124 | 4703 | hrtick_clear(rq); |
---|
.. | .. |
---|
4129 | 4708 | /* |
---|
4130 | 4709 | * Make sure that signal_pending_state()->signal_pending() below |
---|
4131 | 4710 | * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) |
---|
4132 | | - * done by the caller to avoid the race with signal_wake_up(). |
---|
| 4711 | + * done by the caller to avoid the race with signal_wake_up(): |
---|
4133 | 4712 | * |
---|
4134 | | - * The membarrier system call requires a full memory barrier |
---|
| 4713 | + * __set_current_state(@state) signal_wake_up() |
---|
| 4714 | + * schedule() set_tsk_thread_flag(p, TIF_SIGPENDING) |
---|
| 4715 | + * wake_up_state(p, state) |
---|
| 4716 | + * LOCK rq->lock LOCK p->pi_state |
---|
| 4717 | + * smp_mb__after_spinlock() smp_mb__after_spinlock() |
---|
| 4718 | + * if (signal_pending_state()) if (p->state & @state) |
---|
| 4719 | + * |
---|
| 4720 | + * Also, the membarrier system call requires a full memory barrier |
---|
4135 | 4721 | * after coming from user-space, before storing to rq->curr. |
---|
4136 | 4722 | */ |
---|
4137 | 4723 | rq_lock(rq, &rf); |
---|
4138 | 4724 | smp_mb__after_spinlock(); |
---|
4139 | | - |
---|
4140 | | - if (__migrate_disabled(prev)) |
---|
4141 | | - migrate_disabled_sched(prev); |
---|
4142 | 4725 | |
---|
4143 | 4726 | /* Promote REQ to ACT */ |
---|
4144 | 4727 | rq->clock_update_flags <<= 1; |
---|
4145 | 4728 | update_rq_clock(rq); |
---|
4146 | 4729 | |
---|
4147 | 4730 | switch_count = &prev->nivcsw; |
---|
4148 | | - if (!preempt && prev->state) { |
---|
4149 | | - if (unlikely(signal_pending_state(prev->state, prev))) { |
---|
| 4731 | + |
---|
| 4732 | + /* |
---|
| 4733 | + * We must load prev->state once (task_struct::state is volatile), such |
---|
| 4734 | + * that: |
---|
| 4735 | + * |
---|
| 4736 | + * - we form a control dependency vs deactivate_task() below. |
---|
| 4737 | + * - ptrace_{,un}freeze_traced() can change ->state underneath us. |
---|
| 4738 | + */ |
---|
| 4739 | + prev_state = prev->state; |
---|
| 4740 | + if (!preempt && prev_state) { |
---|
| 4741 | + if (signal_pending_state(prev_state, prev)) { |
---|
4150 | 4742 | prev->state = TASK_RUNNING; |
---|
4151 | 4743 | } else { |
---|
| 4744 | + prev->sched_contributes_to_load = |
---|
| 4745 | + (prev_state & TASK_UNINTERRUPTIBLE) && |
---|
| 4746 | + !(prev_state & TASK_NOLOAD) && |
---|
| 4747 | + !(prev->flags & PF_FROZEN); |
---|
| 4748 | + |
---|
| 4749 | + if (prev->sched_contributes_to_load) |
---|
| 4750 | + rq->nr_uninterruptible++; |
---|
| 4751 | + |
---|
| 4752 | + /* |
---|
| 4753 | + * __schedule() ttwu() |
---|
| 4754 | + * prev_state = prev->state; if (p->on_rq && ...) |
---|
| 4755 | + * if (prev_state) goto out; |
---|
| 4756 | + * p->on_rq = 0; smp_acquire__after_ctrl_dep(); |
---|
| 4757 | + * p->state = TASK_WAKING |
---|
| 4758 | + * |
---|
| 4759 | + * Where __schedule() and ttwu() have matching control dependencies. |
---|
| 4760 | + * |
---|
| 4761 | + * After this, schedule() must not care about p->state any more. |
---|
| 4762 | + */ |
---|
4152 | 4763 | deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK); |
---|
4153 | | - prev->on_rq = 0; |
---|
4154 | 4764 | |
---|
4155 | 4765 | if (prev->in_iowait) { |
---|
4156 | 4766 | atomic_inc(&rq->nr_iowait); |
---|
.. | .. |
---|
4162 | 4772 | |
---|
4163 | 4773 | next = pick_next_task(rq, prev, &rf); |
---|
4164 | 4774 | clear_tsk_need_resched(prev); |
---|
4165 | | - clear_tsk_need_resched_lazy(prev); |
---|
4166 | 4775 | clear_preempt_need_resched(); |
---|
4167 | 4776 | |
---|
| 4777 | + trace_android_rvh_schedule(prev, next, rq); |
---|
4168 | 4778 | if (likely(prev != next)) { |
---|
4169 | 4779 | rq->nr_switches++; |
---|
4170 | | - rq->curr = next; |
---|
| 4780 | + /* |
---|
| 4781 | + * RCU users of rcu_dereference(rq->curr) may not see |
---|
| 4782 | + * changes to task_struct made by pick_next_task(). |
---|
| 4783 | + */ |
---|
| 4784 | + RCU_INIT_POINTER(rq->curr, next); |
---|
4171 | 4785 | /* |
---|
4172 | 4786 | * The membarrier system call requires each architecture |
---|
4173 | 4787 | * to have a full memory barrier after updating |
---|
.. | .. |
---|
4183 | 4797 | * is a RELEASE barrier), |
---|
4184 | 4798 | */ |
---|
4185 | 4799 | ++*switch_count; |
---|
| 4800 | + |
---|
| 4801 | + psi_sched_switch(prev, next, !task_on_rq_queued(prev)); |
---|
4186 | 4802 | |
---|
4187 | 4803 | trace_sched_switch(preempt, prev, next); |
---|
4188 | 4804 | |
---|
.. | .. |
---|
4214 | 4830 | |
---|
4215 | 4831 | static inline void sched_submit_work(struct task_struct *tsk) |
---|
4216 | 4832 | { |
---|
| 4833 | + unsigned int task_flags; |
---|
| 4834 | + |
---|
4217 | 4835 | if (!tsk->state) |
---|
4218 | 4836 | return; |
---|
4219 | 4837 | |
---|
| 4838 | + task_flags = tsk->flags; |
---|
4220 | 4839 | /* |
---|
4221 | 4840 | * If a worker went to sleep, notify and ask workqueue whether |
---|
4222 | 4841 | * it wants to wake up a task to maintain concurrency. |
---|
4223 | 4842 | * As this function is called inside the schedule() context, |
---|
4224 | 4843 | * we disable preemption to avoid it calling schedule() again |
---|
4225 | | - * in the possible wakeup of a kworker. |
---|
| 4844 | + * in the possible wakeup of a kworker and because wq_worker_sleeping() |
---|
| 4845 | + * requires it. |
---|
4226 | 4846 | */ |
---|
4227 | | - if (tsk->flags & PF_WQ_WORKER) { |
---|
| 4847 | + if (task_flags & (PF_WQ_WORKER | PF_IO_WORKER)) { |
---|
4228 | 4848 | preempt_disable(); |
---|
4229 | | - wq_worker_sleeping(tsk); |
---|
| 4849 | + if (task_flags & PF_WQ_WORKER) |
---|
| 4850 | + wq_worker_sleeping(tsk); |
---|
| 4851 | + else |
---|
| 4852 | + io_wq_worker_sleeping(tsk); |
---|
4230 | 4853 | preempt_enable_no_resched(); |
---|
4231 | 4854 | } |
---|
4232 | 4855 | |
---|
.. | .. |
---|
4243 | 4866 | |
---|
4244 | 4867 | static void sched_update_worker(struct task_struct *tsk) |
---|
4245 | 4868 | { |
---|
4246 | | - if (tsk->flags & PF_WQ_WORKER) |
---|
4247 | | - wq_worker_running(tsk); |
---|
| 4869 | + if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) { |
---|
| 4870 | + if (tsk->flags & PF_WQ_WORKER) |
---|
| 4871 | + wq_worker_running(tsk); |
---|
| 4872 | + else |
---|
| 4873 | + io_wq_worker_running(tsk); |
---|
| 4874 | + } |
---|
4248 | 4875 | } |
---|
4249 | 4876 | |
---|
4250 | 4877 | asmlinkage __visible void __sched schedule(void) |
---|
.. | .. |
---|
4346 | 4973 | } while (need_resched()); |
---|
4347 | 4974 | } |
---|
4348 | 4975 | |
---|
4349 | | -#ifdef CONFIG_PREEMPT_LAZY |
---|
| 4976 | +#ifdef CONFIG_PREEMPTION |
---|
4350 | 4977 | /* |
---|
4351 | | - * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is |
---|
4352 | | - * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as |
---|
4353 | | - * preempt_lazy_count counter >0. |
---|
4354 | | - */ |
---|
4355 | | -static __always_inline int preemptible_lazy(void) |
---|
4356 | | -{ |
---|
4357 | | - if (test_thread_flag(TIF_NEED_RESCHED)) |
---|
4358 | | - return 1; |
---|
4359 | | - if (current_thread_info()->preempt_lazy_count) |
---|
4360 | | - return 0; |
---|
4361 | | - return 1; |
---|
4362 | | -} |
---|
4363 | | - |
---|
4364 | | -#else |
---|
4365 | | - |
---|
4366 | | -static inline int preemptible_lazy(void) |
---|
4367 | | -{ |
---|
4368 | | - return 1; |
---|
4369 | | -} |
---|
4370 | | - |
---|
4371 | | -#endif |
---|
4372 | | - |
---|
4373 | | -#ifdef CONFIG_PREEMPT |
---|
4374 | | -/* |
---|
4375 | | - * this is the entry point to schedule() from in-kernel preemption |
---|
4376 | | - * off of preempt_enable. Kernel preemptions off return from interrupt |
---|
4377 | | - * occur there and call schedule directly. |
---|
| 4978 | + * This is the entry point to schedule() from in-kernel preemption |
---|
| 4979 | + * off of preempt_enable. |
---|
4378 | 4980 | */ |
---|
4379 | 4981 | asmlinkage __visible void __sched notrace preempt_schedule(void) |
---|
4380 | 4982 | { |
---|
.. | .. |
---|
4384 | 4986 | */ |
---|
4385 | 4987 | if (likely(!preemptible())) |
---|
4386 | 4988 | return; |
---|
4387 | | - if (!preemptible_lazy()) |
---|
4388 | | - return; |
---|
| 4989 | + |
---|
4389 | 4990 | preempt_schedule_common(); |
---|
4390 | 4991 | } |
---|
4391 | 4992 | NOKPROBE_SYMBOL(preempt_schedule); |
---|
.. | .. |
---|
4410 | 5011 | enum ctx_state prev_ctx; |
---|
4411 | 5012 | |
---|
4412 | 5013 | if (likely(!preemptible())) |
---|
4413 | | - return; |
---|
4414 | | - |
---|
4415 | | - if (!preemptible_lazy()) |
---|
4416 | 5014 | return; |
---|
4417 | 5015 | |
---|
4418 | 5016 | do { |
---|
.. | .. |
---|
4446 | 5044 | } |
---|
4447 | 5045 | EXPORT_SYMBOL_GPL(preempt_schedule_notrace); |
---|
4448 | 5046 | |
---|
4449 | | -#endif /* CONFIG_PREEMPT */ |
---|
| 5047 | +#endif /* CONFIG_PREEMPTION */ |
---|
4450 | 5048 | |
---|
4451 | 5049 | /* |
---|
4452 | | - * this is the entry point to schedule() from kernel preemption |
---|
| 5050 | + * This is the entry point to schedule() from kernel preemption |
---|
4453 | 5051 | * off of irq context. |
---|
4454 | 5052 | * Note, that this is called and return with irqs disabled. This will |
---|
4455 | 5053 | * protect us against recursive calling from irq. |
---|
.. | .. |
---|
4477 | 5075 | int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags, |
---|
4478 | 5076 | void *key) |
---|
4479 | 5077 | { |
---|
4480 | | - return try_to_wake_up(curr->private, mode, wake_flags, 1); |
---|
| 5078 | + WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~(WF_SYNC | WF_ANDROID_VENDOR)); |
---|
| 5079 | + return try_to_wake_up(curr->private, mode, wake_flags); |
---|
4481 | 5080 | } |
---|
4482 | 5081 | EXPORT_SYMBOL(default_wake_function); |
---|
| 5082 | + |
---|
| 5083 | +static void __setscheduler_prio(struct task_struct *p, int prio) |
---|
| 5084 | +{ |
---|
| 5085 | + if (dl_prio(prio)) |
---|
| 5086 | + p->sched_class = &dl_sched_class; |
---|
| 5087 | + else if (rt_prio(prio)) |
---|
| 5088 | + p->sched_class = &rt_sched_class; |
---|
| 5089 | + else |
---|
| 5090 | + p->sched_class = &fair_sched_class; |
---|
| 5091 | + |
---|
| 5092 | + p->prio = prio; |
---|
| 5093 | +} |
---|
4483 | 5094 | |
---|
4484 | 5095 | #ifdef CONFIG_RT_MUTEXES |
---|
4485 | 5096 | |
---|
.. | .. |
---|
4517 | 5128 | struct rq_flags rf; |
---|
4518 | 5129 | struct rq *rq; |
---|
4519 | 5130 | |
---|
| 5131 | + trace_android_rvh_rtmutex_prepare_setprio(p, pi_task); |
---|
4520 | 5132 | /* XXX used to be waiter->prio, not waiter->task->prio */ |
---|
4521 | 5133 | prio = __rt_effective_prio(pi_task, p->normal_prio); |
---|
4522 | 5134 | |
---|
.. | .. |
---|
4591 | 5203 | if (!dl_prio(p->normal_prio) || |
---|
4592 | 5204 | (pi_task && dl_prio(pi_task->prio) && |
---|
4593 | 5205 | dl_entity_preempt(&pi_task->dl, &p->dl))) { |
---|
4594 | | - p->dl.dl_boosted = 1; |
---|
| 5206 | + p->dl.pi_se = pi_task->dl.pi_se; |
---|
4595 | 5207 | queue_flag |= ENQUEUE_REPLENISH; |
---|
4596 | | - } else |
---|
4597 | | - p->dl.dl_boosted = 0; |
---|
4598 | | - p->sched_class = &dl_sched_class; |
---|
| 5208 | + } else { |
---|
| 5209 | + p->dl.pi_se = &p->dl; |
---|
| 5210 | + } |
---|
4599 | 5211 | } else if (rt_prio(prio)) { |
---|
4600 | 5212 | if (dl_prio(oldprio)) |
---|
4601 | | - p->dl.dl_boosted = 0; |
---|
| 5213 | + p->dl.pi_se = &p->dl; |
---|
4602 | 5214 | if (oldprio < prio) |
---|
4603 | 5215 | queue_flag |= ENQUEUE_HEAD; |
---|
4604 | | - p->sched_class = &rt_sched_class; |
---|
4605 | 5216 | } else { |
---|
4606 | 5217 | if (dl_prio(oldprio)) |
---|
4607 | | - p->dl.dl_boosted = 0; |
---|
| 5218 | + p->dl.pi_se = &p->dl; |
---|
4608 | 5219 | if (rt_prio(oldprio)) |
---|
4609 | 5220 | p->rt.timeout = 0; |
---|
4610 | | - p->sched_class = &fair_sched_class; |
---|
4611 | 5221 | } |
---|
4612 | 5222 | |
---|
4613 | | - p->prio = prio; |
---|
| 5223 | + __setscheduler_prio(p, prio); |
---|
4614 | 5224 | |
---|
4615 | 5225 | if (queued) |
---|
4616 | 5226 | enqueue_task(rq, p, queue_flag); |
---|
4617 | 5227 | if (running) |
---|
4618 | | - set_curr_task(rq, p); |
---|
| 5228 | + set_next_task(rq, p); |
---|
4619 | 5229 | |
---|
4620 | 5230 | check_class_changed(rq, p, prev_class, oldprio); |
---|
4621 | 5231 | out_unlock: |
---|
.. | .. |
---|
4635 | 5245 | |
---|
4636 | 5246 | void set_user_nice(struct task_struct *p, long nice) |
---|
4637 | 5247 | { |
---|
4638 | | - bool queued, running; |
---|
4639 | | - int old_prio, delta; |
---|
| 5248 | + bool queued, running, allowed = false; |
---|
| 5249 | + int old_prio; |
---|
4640 | 5250 | struct rq_flags rf; |
---|
4641 | 5251 | struct rq *rq; |
---|
4642 | 5252 | |
---|
4643 | | - if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) |
---|
| 5253 | + trace_android_rvh_set_user_nice(p, &nice, &allowed); |
---|
| 5254 | + if ((task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) && !allowed) |
---|
4644 | 5255 | return; |
---|
4645 | 5256 | /* |
---|
4646 | 5257 | * We have to be careful, if called from sys_setpriority(), |
---|
.. | .. |
---|
4667 | 5278 | put_prev_task(rq, p); |
---|
4668 | 5279 | |
---|
4669 | 5280 | p->static_prio = NICE_TO_PRIO(nice); |
---|
4670 | | - set_load_weight(p, true); |
---|
| 5281 | + set_load_weight(p); |
---|
4671 | 5282 | old_prio = p->prio; |
---|
4672 | 5283 | p->prio = effective_prio(p); |
---|
4673 | | - delta = p->prio - old_prio; |
---|
4674 | 5284 | |
---|
4675 | | - if (queued) { |
---|
| 5285 | + if (queued) |
---|
4676 | 5286 | enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); |
---|
4677 | | - /* |
---|
4678 | | - * If the task increased its priority or is running and |
---|
4679 | | - * lowered its priority, then reschedule its CPU: |
---|
4680 | | - */ |
---|
4681 | | - if (delta < 0 || (delta > 0 && task_running(rq, p))) |
---|
4682 | | - resched_curr(rq); |
---|
4683 | | - } |
---|
4684 | 5287 | if (running) |
---|
4685 | | - set_curr_task(rq, p); |
---|
| 5288 | + set_next_task(rq, p); |
---|
| 5289 | + |
---|
| 5290 | + /* |
---|
| 5291 | + * If the task increased its priority or is running and |
---|
| 5292 | + * lowered its priority, then reschedule its CPU: |
---|
| 5293 | + */ |
---|
| 5294 | + p->sched_class->prio_changed(rq, p, old_prio); |
---|
| 5295 | + |
---|
4686 | 5296 | out_unlock: |
---|
4687 | 5297 | task_rq_unlock(rq, p, &rf); |
---|
4688 | 5298 | } |
---|
.. | .. |
---|
4767 | 5377 | return 0; |
---|
4768 | 5378 | |
---|
4769 | 5379 | #ifdef CONFIG_SMP |
---|
4770 | | - if (!llist_empty(&rq->wake_list)) |
---|
| 5380 | + if (rq->ttwu_pending) |
---|
4771 | 5381 | return 0; |
---|
4772 | 5382 | #endif |
---|
4773 | 5383 | |
---|
.. | .. |
---|
4790 | 5400 | |
---|
4791 | 5401 | return 1; |
---|
4792 | 5402 | } |
---|
| 5403 | +EXPORT_SYMBOL_GPL(available_idle_cpu); |
---|
4793 | 5404 | |
---|
4794 | 5405 | /** |
---|
4795 | 5406 | * idle_task - return the idle task for a given CPU. |
---|
.. | .. |
---|
4841 | 5452 | */ |
---|
4842 | 5453 | p->rt_priority = attr->sched_priority; |
---|
4843 | 5454 | p->normal_prio = normal_prio(p); |
---|
4844 | | - set_load_weight(p, true); |
---|
4845 | | -} |
---|
4846 | | - |
---|
4847 | | -/* Actually do priority change: must hold pi & rq lock. */ |
---|
4848 | | -static void __setscheduler(struct rq *rq, struct task_struct *p, |
---|
4849 | | - const struct sched_attr *attr, bool keep_boost) |
---|
4850 | | -{ |
---|
4851 | | - /* |
---|
4852 | | - * If params can't change scheduling class changes aren't allowed |
---|
4853 | | - * either. |
---|
4854 | | - */ |
---|
4855 | | - if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS) |
---|
4856 | | - return; |
---|
4857 | | - |
---|
4858 | | - __setscheduler_params(p, attr); |
---|
4859 | | - |
---|
4860 | | - /* |
---|
4861 | | - * Keep a potential priority boosting if called from |
---|
4862 | | - * sched_setscheduler(). |
---|
4863 | | - */ |
---|
4864 | | - p->prio = normal_prio(p); |
---|
4865 | | - if (keep_boost) |
---|
4866 | | - p->prio = rt_effective_prio(p, p->prio); |
---|
4867 | | - |
---|
4868 | | - if (dl_prio(p->prio)) |
---|
4869 | | - p->sched_class = &dl_sched_class; |
---|
4870 | | - else if (rt_prio(p->prio)) |
---|
4871 | | - p->sched_class = &rt_sched_class; |
---|
4872 | | - else |
---|
4873 | | - p->sched_class = &fair_sched_class; |
---|
| 5455 | + set_load_weight(p); |
---|
4874 | 5456 | } |
---|
4875 | 5457 | |
---|
4876 | 5458 | /* |
---|
.. | .. |
---|
4893 | 5475 | const struct sched_attr *attr, |
---|
4894 | 5476 | bool user, bool pi) |
---|
4895 | 5477 | { |
---|
4896 | | - int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : |
---|
4897 | | - MAX_RT_PRIO - 1 - attr->sched_priority; |
---|
4898 | | - int retval, oldprio, oldpolicy = -1, queued, running; |
---|
4899 | | - int new_effective_prio, policy = attr->sched_policy; |
---|
| 5478 | + int oldpolicy = -1, policy = attr->sched_policy; |
---|
| 5479 | + int retval, oldprio, newprio, queued, running; |
---|
4900 | 5480 | const struct sched_class *prev_class; |
---|
4901 | 5481 | struct rq_flags rf; |
---|
4902 | 5482 | int reset_on_fork; |
---|
4903 | 5483 | int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; |
---|
4904 | 5484 | struct rq *rq; |
---|
| 5485 | + bool cpuset_locked = false; |
---|
4905 | 5486 | |
---|
4906 | 5487 | /* The pi code expects interrupts enabled */ |
---|
4907 | 5488 | BUG_ON(pi && in_interrupt()); |
---|
.. | .. |
---|
4969 | 5550 | * Treat SCHED_IDLE as nice 20. Only allow a switch to |
---|
4970 | 5551 | * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. |
---|
4971 | 5552 | */ |
---|
4972 | | - if (idle_policy(p->policy) && !idle_policy(policy)) { |
---|
| 5553 | + if (task_has_idle_policy(p) && !idle_policy(policy)) { |
---|
4973 | 5554 | if (!can_nice(p, task_nice(p))) |
---|
4974 | 5555 | return -EPERM; |
---|
4975 | 5556 | } |
---|
.. | .. |
---|
4980 | 5561 | |
---|
4981 | 5562 | /* Normal users shall not reset the sched_reset_on_fork flag: */ |
---|
4982 | 5563 | if (p->sched_reset_on_fork && !reset_on_fork) |
---|
| 5564 | + return -EPERM; |
---|
| 5565 | + |
---|
| 5566 | + /* Can't change util-clamps */ |
---|
| 5567 | + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) |
---|
4983 | 5568 | return -EPERM; |
---|
4984 | 5569 | } |
---|
4985 | 5570 | |
---|
.. | .. |
---|
5000 | 5585 | } |
---|
5001 | 5586 | |
---|
5002 | 5587 | /* |
---|
| 5588 | + * SCHED_DEADLINE bandwidth accounting relies on stable cpusets |
---|
| 5589 | + * information. |
---|
| 5590 | + */ |
---|
| 5591 | + if (dl_policy(policy) || dl_policy(p->policy)) { |
---|
| 5592 | + cpuset_locked = true; |
---|
| 5593 | + cpuset_lock(); |
---|
| 5594 | + } |
---|
| 5595 | + |
---|
| 5596 | + /* |
---|
5003 | 5597 | * Make sure no PI-waiters arrive (or leave) while we are |
---|
5004 | 5598 | * changing the priority of the task: |
---|
5005 | 5599 | * |
---|
.. | .. |
---|
5013 | 5607 | * Changing the policy of the stop threads its a very bad idea: |
---|
5014 | 5608 | */ |
---|
5015 | 5609 | if (p == rq->stop) { |
---|
5016 | | - task_rq_unlock(rq, p, &rf); |
---|
5017 | | - return -EINVAL; |
---|
| 5610 | + retval = -EINVAL; |
---|
| 5611 | + goto unlock; |
---|
5018 | 5612 | } |
---|
5019 | 5613 | |
---|
5020 | 5614 | /* |
---|
.. | .. |
---|
5032 | 5626 | goto change; |
---|
5033 | 5627 | |
---|
5034 | 5628 | p->sched_reset_on_fork = reset_on_fork; |
---|
5035 | | - task_rq_unlock(rq, p, &rf); |
---|
5036 | | - return 0; |
---|
| 5629 | + retval = 0; |
---|
| 5630 | + goto unlock; |
---|
5037 | 5631 | } |
---|
5038 | 5632 | change: |
---|
5039 | 5633 | |
---|
.. | .. |
---|
5046 | 5640 | if (rt_bandwidth_enabled() && rt_policy(policy) && |
---|
5047 | 5641 | task_group(p)->rt_bandwidth.rt_runtime == 0 && |
---|
5048 | 5642 | !task_group_is_autogroup(task_group(p))) { |
---|
5049 | | - task_rq_unlock(rq, p, &rf); |
---|
5050 | | - return -EPERM; |
---|
| 5643 | + retval = -EPERM; |
---|
| 5644 | + goto unlock; |
---|
5051 | 5645 | } |
---|
5052 | 5646 | #endif |
---|
5053 | 5647 | #ifdef CONFIG_SMP |
---|
.. | .. |
---|
5062 | 5656 | */ |
---|
5063 | 5657 | if (!cpumask_subset(span, p->cpus_ptr) || |
---|
5064 | 5658 | rq->rd->dl_bw.bw == 0) { |
---|
5065 | | - task_rq_unlock(rq, p, &rf); |
---|
5066 | | - return -EPERM; |
---|
| 5659 | + retval = -EPERM; |
---|
| 5660 | + goto unlock; |
---|
5067 | 5661 | } |
---|
5068 | 5662 | } |
---|
5069 | 5663 | #endif |
---|
.. | .. |
---|
5073 | 5667 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { |
---|
5074 | 5668 | policy = oldpolicy = -1; |
---|
5075 | 5669 | task_rq_unlock(rq, p, &rf); |
---|
| 5670 | + if (cpuset_locked) |
---|
| 5671 | + cpuset_unlock(); |
---|
5076 | 5672 | goto recheck; |
---|
5077 | 5673 | } |
---|
5078 | 5674 | |
---|
.. | .. |
---|
5082 | 5678 | * is available. |
---|
5083 | 5679 | */ |
---|
5084 | 5680 | if ((dl_policy(policy) || dl_task(p)) && sched_dl_overflow(p, policy, attr)) { |
---|
5085 | | - task_rq_unlock(rq, p, &rf); |
---|
5086 | | - return -EBUSY; |
---|
| 5681 | + retval = -EBUSY; |
---|
| 5682 | + goto unlock; |
---|
5087 | 5683 | } |
---|
5088 | 5684 | |
---|
5089 | 5685 | p->sched_reset_on_fork = reset_on_fork; |
---|
5090 | 5686 | oldprio = p->prio; |
---|
5091 | 5687 | |
---|
| 5688 | + newprio = __normal_prio(policy, attr->sched_priority, attr->sched_nice); |
---|
5092 | 5689 | if (pi) { |
---|
5093 | 5690 | /* |
---|
5094 | 5691 | * Take priority boosted tasks into account. If the new |
---|
.. | .. |
---|
5097 | 5694 | * the runqueue. This will be done when the task deboost |
---|
5098 | 5695 | * itself. |
---|
5099 | 5696 | */ |
---|
5100 | | - new_effective_prio = rt_effective_prio(p, newprio); |
---|
5101 | | - if (new_effective_prio == oldprio) |
---|
| 5697 | + newprio = rt_effective_prio(p, newprio); |
---|
| 5698 | + if (newprio == oldprio) |
---|
5102 | 5699 | queue_flags &= ~DEQUEUE_MOVE; |
---|
5103 | 5700 | } |
---|
5104 | 5701 | |
---|
.. | .. |
---|
5111 | 5708 | |
---|
5112 | 5709 | prev_class = p->sched_class; |
---|
5113 | 5710 | |
---|
5114 | | - __setscheduler(rq, p, attr, pi); |
---|
| 5711 | + if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { |
---|
| 5712 | + __setscheduler_params(p, attr); |
---|
| 5713 | + __setscheduler_prio(p, newprio); |
---|
| 5714 | + trace_android_rvh_setscheduler(p); |
---|
| 5715 | + } |
---|
5115 | 5716 | __setscheduler_uclamp(p, attr); |
---|
5116 | 5717 | |
---|
5117 | 5718 | if (queued) { |
---|
.. | .. |
---|
5125 | 5726 | enqueue_task(rq, p, queue_flags); |
---|
5126 | 5727 | } |
---|
5127 | 5728 | if (running) |
---|
5128 | | - set_curr_task(rq, p); |
---|
| 5729 | + set_next_task(rq, p); |
---|
5129 | 5730 | |
---|
5130 | 5731 | check_class_changed(rq, p, prev_class, oldprio); |
---|
5131 | 5732 | |
---|
.. | .. |
---|
5133 | 5734 | preempt_disable(); |
---|
5134 | 5735 | task_rq_unlock(rq, p, &rf); |
---|
5135 | 5736 | |
---|
5136 | | - if (pi) |
---|
| 5737 | + if (pi) { |
---|
| 5738 | + if (cpuset_locked) |
---|
| 5739 | + cpuset_unlock(); |
---|
5137 | 5740 | rt_mutex_adjust_pi(p); |
---|
| 5741 | + } |
---|
5138 | 5742 | |
---|
5139 | 5743 | /* Run balance callbacks after we've adjusted the PI chain: */ |
---|
5140 | 5744 | balance_callback(rq); |
---|
5141 | 5745 | preempt_enable(); |
---|
5142 | 5746 | |
---|
5143 | 5747 | return 0; |
---|
| 5748 | + |
---|
| 5749 | +unlock: |
---|
| 5750 | + task_rq_unlock(rq, p, &rf); |
---|
| 5751 | + if (cpuset_locked) |
---|
| 5752 | + cpuset_unlock(); |
---|
| 5753 | + return retval; |
---|
5144 | 5754 | } |
---|
5145 | 5755 | |
---|
5146 | 5756 | static int _sched_setscheduler(struct task_struct *p, int policy, |
---|
.. | .. |
---|
5152 | 5762 | .sched_nice = PRIO_TO_NICE(p->static_prio), |
---|
5153 | 5763 | }; |
---|
5154 | 5764 | |
---|
| 5765 | + if (IS_ENABLED(CONFIG_ROCKCHIP_OPTIMIZE_RT_PRIO) && |
---|
| 5766 | + ((policy == SCHED_FIFO) || (policy == SCHED_RR))) { |
---|
| 5767 | + attr.sched_priority /= 2; |
---|
| 5768 | + if (!check) |
---|
| 5769 | + attr.sched_priority += MAX_RT_PRIO / 2; |
---|
| 5770 | + if (!attr.sched_priority) |
---|
| 5771 | + attr.sched_priority = 1; |
---|
| 5772 | + } |
---|
5155 | 5773 | /* Fixup the legacy SCHED_RESET_ON_FORK hack. */ |
---|
5156 | 5774 | if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) { |
---|
5157 | 5775 | attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; |
---|
.. | .. |
---|
5166 | 5784 | * @p: the task in question. |
---|
5167 | 5785 | * @policy: new policy. |
---|
5168 | 5786 | * @param: structure containing the new RT priority. |
---|
| 5787 | + * |
---|
| 5788 | + * Use sched_set_fifo(), read its comment. |
---|
5169 | 5789 | * |
---|
5170 | 5790 | * Return: 0 on success. An error code otherwise. |
---|
5171 | 5791 | * |
---|
.. | .. |
---|
5188 | 5808 | { |
---|
5189 | 5809 | return __sched_setscheduler(p, attr, false, true); |
---|
5190 | 5810 | } |
---|
| 5811 | +EXPORT_SYMBOL_GPL(sched_setattr_nocheck); |
---|
5191 | 5812 | |
---|
5192 | 5813 | /** |
---|
5193 | 5814 | * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. |
---|
.. | .. |
---|
5208 | 5829 | return _sched_setscheduler(p, policy, param, false); |
---|
5209 | 5830 | } |
---|
5210 | 5831 | EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck); |
---|
| 5832 | + |
---|
| 5833 | +/* |
---|
| 5834 | + * SCHED_FIFO is a broken scheduler model; that is, it is fundamentally |
---|
| 5835 | + * incapable of resource management, which is the one thing an OS really should |
---|
| 5836 | + * be doing. |
---|
| 5837 | + * |
---|
| 5838 | + * This is of course the reason it is limited to privileged users only. |
---|
| 5839 | + * |
---|
| 5840 | + * Worse still; it is fundamentally impossible to compose static priority |
---|
| 5841 | + * workloads. You cannot take two correctly working static prio workloads |
---|
| 5842 | + * and smash them together and still expect them to work. |
---|
| 5843 | + * |
---|
| 5844 | + * For this reason 'all' FIFO tasks the kernel creates are basically at: |
---|
| 5845 | + * |
---|
| 5846 | + * MAX_RT_PRIO / 2 |
---|
| 5847 | + * |
---|
| 5848 | + * The administrator _MUST_ configure the system, the kernel simply doesn't |
---|
| 5849 | + * know enough information to make a sensible choice. |
---|
| 5850 | + */ |
---|
| 5851 | +void sched_set_fifo(struct task_struct *p) |
---|
| 5852 | +{ |
---|
| 5853 | + struct sched_param sp = { .sched_priority = MAX_RT_PRIO / 2 }; |
---|
| 5854 | + WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0); |
---|
| 5855 | +} |
---|
| 5856 | +EXPORT_SYMBOL_GPL(sched_set_fifo); |
---|
| 5857 | + |
---|
| 5858 | +/* |
---|
| 5859 | + * For when you don't much care about FIFO, but want to be above SCHED_NORMAL. |
---|
| 5860 | + */ |
---|
| 5861 | +void sched_set_fifo_low(struct task_struct *p) |
---|
| 5862 | +{ |
---|
| 5863 | + struct sched_param sp = { .sched_priority = 1 }; |
---|
| 5864 | + WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0); |
---|
| 5865 | +} |
---|
| 5866 | +EXPORT_SYMBOL_GPL(sched_set_fifo_low); |
---|
| 5867 | + |
---|
| 5868 | +void sched_set_normal(struct task_struct *p, int nice) |
---|
| 5869 | +{ |
---|
| 5870 | + struct sched_attr attr = { |
---|
| 5871 | + .sched_policy = SCHED_NORMAL, |
---|
| 5872 | + .sched_nice = nice, |
---|
| 5873 | + }; |
---|
| 5874 | + WARN_ON_ONCE(sched_setattr_nocheck(p, &attr) != 0); |
---|
| 5875 | +} |
---|
| 5876 | +EXPORT_SYMBOL_GPL(sched_set_normal); |
---|
5211 | 5877 | |
---|
5212 | 5878 | static int |
---|
5213 | 5879 | do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) |
---|
.. | .. |
---|
5239 | 5905 | u32 size; |
---|
5240 | 5906 | int ret; |
---|
5241 | 5907 | |
---|
5242 | | - if (!access_ok(VERIFY_WRITE, uattr, SCHED_ATTR_SIZE_VER0)) |
---|
5243 | | - return -EFAULT; |
---|
5244 | | - |
---|
5245 | 5908 | /* Zero the full structure, so that a short copy will be nice: */ |
---|
5246 | 5909 | memset(attr, 0, sizeof(*attr)); |
---|
5247 | 5910 | |
---|
.. | .. |
---|
5249 | 5912 | if (ret) |
---|
5250 | 5913 | return ret; |
---|
5251 | 5914 | |
---|
5252 | | - /* Bail out on silly large: */ |
---|
5253 | | - if (size > PAGE_SIZE) |
---|
5254 | | - goto err_size; |
---|
5255 | | - |
---|
5256 | 5915 | /* ABI compatibility quirk: */ |
---|
5257 | 5916 | if (!size) |
---|
5258 | 5917 | size = SCHED_ATTR_SIZE_VER0; |
---|
5259 | | - |
---|
5260 | | - if (size < SCHED_ATTR_SIZE_VER0) |
---|
| 5918 | + if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE) |
---|
5261 | 5919 | goto err_size; |
---|
5262 | 5920 | |
---|
5263 | | - /* |
---|
5264 | | - * If we're handed a bigger struct than we know of, |
---|
5265 | | - * ensure all the unknown bits are 0 - i.e. new |
---|
5266 | | - * user-space does not rely on any kernel feature |
---|
5267 | | - * extensions we dont know about yet. |
---|
5268 | | - */ |
---|
5269 | | - if (size > sizeof(*attr)) { |
---|
5270 | | - unsigned char __user *addr; |
---|
5271 | | - unsigned char __user *end; |
---|
5272 | | - unsigned char val; |
---|
5273 | | - |
---|
5274 | | - addr = (void __user *)uattr + sizeof(*attr); |
---|
5275 | | - end = (void __user *)uattr + size; |
---|
5276 | | - |
---|
5277 | | - for (; addr < end; addr++) { |
---|
5278 | | - ret = get_user(val, addr); |
---|
5279 | | - if (ret) |
---|
5280 | | - return ret; |
---|
5281 | | - if (val) |
---|
5282 | | - goto err_size; |
---|
5283 | | - } |
---|
5284 | | - size = sizeof(*attr); |
---|
| 5921 | + ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size); |
---|
| 5922 | + if (ret) { |
---|
| 5923 | + if (ret == -E2BIG) |
---|
| 5924 | + goto err_size; |
---|
| 5925 | + return ret; |
---|
5285 | 5926 | } |
---|
5286 | | - |
---|
5287 | | - ret = copy_from_user(attr, uattr, size); |
---|
5288 | | - if (ret) |
---|
5289 | | - return -EFAULT; |
---|
5290 | 5927 | |
---|
5291 | 5928 | if ((attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) && |
---|
5292 | 5929 | size < SCHED_ATTR_SIZE_VER1) |
---|
.. | .. |
---|
5303 | 5940 | err_size: |
---|
5304 | 5941 | put_user(sizeof(*attr), &uattr->size); |
---|
5305 | 5942 | return -E2BIG; |
---|
| 5943 | +} |
---|
| 5944 | + |
---|
| 5945 | +static void get_params(struct task_struct *p, struct sched_attr *attr) |
---|
| 5946 | +{ |
---|
| 5947 | + if (task_has_dl_policy(p)) |
---|
| 5948 | + __getparam_dl(p, attr); |
---|
| 5949 | + else if (task_has_rt_policy(p)) |
---|
| 5950 | + attr->sched_priority = p->rt_priority; |
---|
| 5951 | + else |
---|
| 5952 | + attr->sched_nice = task_nice(p); |
---|
5306 | 5953 | } |
---|
5307 | 5954 | |
---|
5308 | 5955 | /** |
---|
.. | .. |
---|
5366 | 6013 | rcu_read_unlock(); |
---|
5367 | 6014 | |
---|
5368 | 6015 | if (likely(p)) { |
---|
| 6016 | + if (attr.sched_flags & SCHED_FLAG_KEEP_PARAMS) |
---|
| 6017 | + get_params(p, &attr); |
---|
5369 | 6018 | retval = sched_setattr(p, &attr); |
---|
5370 | 6019 | put_task_struct(p); |
---|
5371 | 6020 | } |
---|
.. | .. |
---|
5459 | 6108 | { |
---|
5460 | 6109 | unsigned int ksize = sizeof(*kattr); |
---|
5461 | 6110 | |
---|
5462 | | - if (!access_ok(VERIFY_WRITE, uattr, usize)) |
---|
| 6111 | + if (!access_ok(uattr, usize)) |
---|
5463 | 6112 | return -EFAULT; |
---|
5464 | 6113 | |
---|
5465 | 6114 | /* |
---|
.. | .. |
---|
5487 | 6136 | * sys_sched_getattr - similar to sched_getparam, but with sched_attr |
---|
5488 | 6137 | * @pid: the pid in question. |
---|
5489 | 6138 | * @uattr: structure containing the extended parameters. |
---|
5490 | | - * @usize: sizeof(attr) that user-space knows about, for forwards and backwards compatibility. |
---|
| 6139 | + * @usize: sizeof(attr) for fwd/bwd comp. |
---|
5491 | 6140 | * @flags: for future extension. |
---|
5492 | 6141 | */ |
---|
5493 | 6142 | SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, |
---|
.. | .. |
---|
5514 | 6163 | kattr.sched_policy = p->policy; |
---|
5515 | 6164 | if (p->sched_reset_on_fork) |
---|
5516 | 6165 | kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; |
---|
5517 | | - if (task_has_dl_policy(p)) |
---|
5518 | | - __getparam_dl(p, &kattr); |
---|
5519 | | - else if (task_has_rt_policy(p)) |
---|
5520 | | - kattr.sched_priority = p->rt_priority; |
---|
5521 | | - else |
---|
5522 | | - kattr.sched_nice = task_nice(p); |
---|
| 6166 | + get_params(p, &kattr); |
---|
| 6167 | + kattr.sched_flags &= SCHED_FLAG_ALL; |
---|
5523 | 6168 | |
---|
5524 | 6169 | #ifdef CONFIG_UCLAMP_TASK |
---|
| 6170 | + /* |
---|
| 6171 | + * This could race with another potential updater, but this is fine |
---|
| 6172 | + * because it'll correctly read the old or the new value. We don't need |
---|
| 6173 | + * to guarantee who wins the race as long as it doesn't return garbage. |
---|
| 6174 | + */ |
---|
5525 | 6175 | kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value; |
---|
5526 | 6176 | kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value; |
---|
5527 | 6177 | #endif |
---|
.. | .. |
---|
5540 | 6190 | cpumask_var_t cpus_allowed, new_mask; |
---|
5541 | 6191 | struct task_struct *p; |
---|
5542 | 6192 | int retval; |
---|
| 6193 | + int skip = 0; |
---|
5543 | 6194 | |
---|
5544 | 6195 | rcu_read_lock(); |
---|
5545 | 6196 | |
---|
.. | .. |
---|
5575 | 6226 | rcu_read_unlock(); |
---|
5576 | 6227 | } |
---|
5577 | 6228 | |
---|
| 6229 | + trace_android_vh_sched_setaffinity_early(p, in_mask, &skip); |
---|
| 6230 | + if (skip) |
---|
| 6231 | + goto out_free_new_mask; |
---|
5578 | 6232 | retval = security_task_setscheduler(p); |
---|
5579 | 6233 | if (retval) |
---|
5580 | 6234 | goto out_free_new_mask; |
---|
.. | .. |
---|
5615 | 6269 | goto again; |
---|
5616 | 6270 | } |
---|
5617 | 6271 | } |
---|
| 6272 | + |
---|
| 6273 | + trace_android_rvh_sched_setaffinity(p, in_mask, &retval); |
---|
| 6274 | + |
---|
5618 | 6275 | out_free_new_mask: |
---|
5619 | 6276 | free_cpumask_var(new_mask); |
---|
5620 | 6277 | out_free_cpus_allowed: |
---|
.. | .. |
---|
5623 | 6280 | put_task_struct(p); |
---|
5624 | 6281 | return retval; |
---|
5625 | 6282 | } |
---|
5626 | | -EXPORT_SYMBOL_GPL(sched_setaffinity); |
---|
5627 | 6283 | |
---|
5628 | 6284 | static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, |
---|
5629 | 6285 | struct cpumask *new_mask) |
---|
.. | .. |
---|
5707 | 6363 | if (len & (sizeof(unsigned long)-1)) |
---|
5708 | 6364 | return -EINVAL; |
---|
5709 | 6365 | |
---|
5710 | | - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) |
---|
| 6366 | + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) |
---|
5711 | 6367 | return -ENOMEM; |
---|
5712 | 6368 | |
---|
5713 | 6369 | ret = sched_getaffinity(pid, mask); |
---|
5714 | 6370 | if (ret == 0) { |
---|
5715 | 6371 | unsigned int retlen = min(len, cpumask_size()); |
---|
5716 | 6372 | |
---|
5717 | | - if (copy_to_user(user_mask_ptr, mask, retlen)) |
---|
| 6373 | + if (copy_to_user(user_mask_ptr, cpumask_bits(mask), retlen)) |
---|
5718 | 6374 | ret = -EFAULT; |
---|
5719 | 6375 | else |
---|
5720 | 6376 | ret = retlen; |
---|
.. | .. |
---|
5742 | 6398 | schedstat_inc(rq->yld_count); |
---|
5743 | 6399 | current->sched_class->yield_task(rq); |
---|
5744 | 6400 | |
---|
| 6401 | + trace_android_rvh_do_sched_yield(rq); |
---|
| 6402 | + |
---|
5745 | 6403 | preempt_disable(); |
---|
5746 | 6404 | rq_unlock_irq(rq, &rf); |
---|
5747 | 6405 | sched_preempt_enable_no_resched(); |
---|
.. | .. |
---|
5755 | 6413 | return 0; |
---|
5756 | 6414 | } |
---|
5757 | 6415 | |
---|
5758 | | -#ifndef CONFIG_PREEMPT |
---|
| 6416 | +#ifndef CONFIG_PREEMPTION |
---|
5759 | 6417 | int __sched _cond_resched(void) |
---|
5760 | 6418 | { |
---|
5761 | 6419 | if (should_resched(0)) { |
---|
.. | .. |
---|
5772 | 6430 | * __cond_resched_lock() - if a reschedule is pending, drop the given lock, |
---|
5773 | 6431 | * call schedule, and on return reacquire the lock. |
---|
5774 | 6432 | * |
---|
5775 | | - * This works OK both with and without CONFIG_PREEMPT. We do strange low-level |
---|
| 6433 | + * This works OK both with and without CONFIG_PREEMPTION. We do strange low-level |
---|
5776 | 6434 | * operations here to prevent schedule() from being called twice (once via |
---|
5777 | 6435 | * spin_unlock(), once by hand). |
---|
5778 | 6436 | */ |
---|
.. | .. |
---|
5876 | 6534 | if (task_running(p_rq, p) || p->state) |
---|
5877 | 6535 | goto out_unlock; |
---|
5878 | 6536 | |
---|
5879 | | - yielded = curr->sched_class->yield_to_task(rq, p, preempt); |
---|
| 6537 | + yielded = curr->sched_class->yield_to_task(rq, p); |
---|
5880 | 6538 | if (yielded) { |
---|
5881 | 6539 | schedstat_inc(rq->yld_count); |
---|
5882 | 6540 | /* |
---|
.. | .. |
---|
6042 | 6700 | * an error code. |
---|
6043 | 6701 | */ |
---|
6044 | 6702 | SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, |
---|
6045 | | - struct timespec __user *, interval) |
---|
| 6703 | + struct __kernel_timespec __user *, interval) |
---|
6046 | 6704 | { |
---|
6047 | 6705 | struct timespec64 t; |
---|
6048 | 6706 | int retval = sched_rr_get_interval(pid, &t); |
---|
.. | .. |
---|
6053 | 6711 | return retval; |
---|
6054 | 6712 | } |
---|
6055 | 6713 | |
---|
6056 | | -#ifdef CONFIG_COMPAT |
---|
6057 | | -COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval, |
---|
6058 | | - compat_pid_t, pid, |
---|
6059 | | - struct compat_timespec __user *, interval) |
---|
| 6714 | +#ifdef CONFIG_COMPAT_32BIT_TIME |
---|
| 6715 | +SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid, |
---|
| 6716 | + struct old_timespec32 __user *, interval) |
---|
6060 | 6717 | { |
---|
6061 | 6718 | struct timespec64 t; |
---|
6062 | 6719 | int retval = sched_rr_get_interval(pid, &t); |
---|
6063 | 6720 | |
---|
6064 | 6721 | if (retval == 0) |
---|
6065 | | - retval = compat_put_timespec64(&t, interval); |
---|
| 6722 | + retval = put_old_timespec32(&t, interval); |
---|
6066 | 6723 | return retval; |
---|
6067 | 6724 | } |
---|
6068 | 6725 | #endif |
---|
.. | .. |
---|
6075 | 6732 | if (!try_get_task_stack(p)) |
---|
6076 | 6733 | return; |
---|
6077 | 6734 | |
---|
6078 | | - printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p)); |
---|
| 6735 | + pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p)); |
---|
6079 | 6736 | |
---|
6080 | 6737 | if (p->state == TASK_RUNNING) |
---|
6081 | | - printk(KERN_CONT " running task "); |
---|
| 6738 | + pr_cont(" running task "); |
---|
6082 | 6739 | #ifdef CONFIG_DEBUG_STACK_USAGE |
---|
6083 | 6740 | free = stack_not_used(p); |
---|
6084 | 6741 | #endif |
---|
.. | .. |
---|
6087 | 6744 | if (pid_alive(p)) |
---|
6088 | 6745 | ppid = task_pid_nr(rcu_dereference(p->real_parent)); |
---|
6089 | 6746 | rcu_read_unlock(); |
---|
6090 | | - printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, |
---|
6091 | | - task_pid_nr(p), ppid, |
---|
| 6747 | + pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n", |
---|
| 6748 | + free, task_pid_nr(p), ppid, |
---|
6092 | 6749 | (unsigned long)task_thread_info(p)->flags); |
---|
6093 | 6750 | |
---|
6094 | 6751 | print_worker_info(KERN_INFO, p); |
---|
6095 | | - show_stack(p, NULL); |
---|
| 6752 | + trace_android_vh_sched_show_task(p); |
---|
| 6753 | + show_stack(p, NULL, KERN_INFO); |
---|
6096 | 6754 | put_task_stack(p); |
---|
6097 | 6755 | } |
---|
6098 | 6756 | EXPORT_SYMBOL_GPL(sched_show_task); |
---|
.. | .. |
---|
6123 | 6781 | { |
---|
6124 | 6782 | struct task_struct *g, *p; |
---|
6125 | 6783 | |
---|
6126 | | -#if BITS_PER_LONG == 32 |
---|
6127 | | - printk(KERN_INFO |
---|
6128 | | - " task PC stack pid father\n"); |
---|
6129 | | -#else |
---|
6130 | | - printk(KERN_INFO |
---|
6131 | | - " task PC stack pid father\n"); |
---|
6132 | | -#endif |
---|
6133 | 6784 | rcu_read_lock(); |
---|
6134 | 6785 | for_each_process_thread(g, p) { |
---|
6135 | 6786 | /* |
---|
.. | .. |
---|
6165 | 6816 | * NOTE: this function does not set the idle thread's NEED_RESCHED |
---|
6166 | 6817 | * flag, to make booting more robust. |
---|
6167 | 6818 | */ |
---|
6168 | | -void init_idle(struct task_struct *idle, int cpu) |
---|
| 6819 | +void __init init_idle(struct task_struct *idle, int cpu) |
---|
6169 | 6820 | { |
---|
6170 | 6821 | struct rq *rq = cpu_rq(cpu); |
---|
6171 | 6822 | unsigned long flags; |
---|
.. | .. |
---|
6178 | 6829 | idle->state = TASK_RUNNING; |
---|
6179 | 6830 | idle->se.exec_start = sched_clock(); |
---|
6180 | 6831 | idle->flags |= PF_IDLE; |
---|
6181 | | - |
---|
6182 | | - scs_task_reset(idle); |
---|
6183 | | - kasan_unpoison_task_stack(idle); |
---|
6184 | 6832 | |
---|
6185 | 6833 | #ifdef CONFIG_SMP |
---|
6186 | 6834 | /* |
---|
.. | .. |
---|
6205 | 6853 | __set_task_cpu(idle, cpu); |
---|
6206 | 6854 | rcu_read_unlock(); |
---|
6207 | 6855 | |
---|
6208 | | - rq->curr = rq->idle = idle; |
---|
| 6856 | + rq->idle = idle; |
---|
| 6857 | + rcu_assign_pointer(rq->curr, idle); |
---|
6209 | 6858 | idle->on_rq = TASK_ON_RQ_QUEUED; |
---|
6210 | 6859 | #ifdef CONFIG_SMP |
---|
6211 | 6860 | idle->on_cpu = 1; |
---|
.. | .. |
---|
6215 | 6864 | |
---|
6216 | 6865 | /* Set the preempt count _outside_ the spinlocks! */ |
---|
6217 | 6866 | init_idle_preempt_count(idle, cpu); |
---|
6218 | | -#ifdef CONFIG_HAVE_PREEMPT_LAZY |
---|
6219 | | - task_thread_info(idle)->preempt_lazy_count = 0; |
---|
6220 | | -#endif |
---|
| 6867 | + |
---|
6221 | 6868 | /* |
---|
6222 | 6869 | * The idle tasks have their own, simple scheduling class: |
---|
6223 | 6870 | */ |
---|
.. | .. |
---|
6244 | 6891 | return ret; |
---|
6245 | 6892 | } |
---|
6246 | 6893 | |
---|
6247 | | -int task_can_attach(struct task_struct *p, |
---|
6248 | | - const struct cpumask *cs_cpus_allowed) |
---|
| 6894 | +int task_can_attach(struct task_struct *p) |
---|
6249 | 6895 | { |
---|
6250 | 6896 | int ret = 0; |
---|
6251 | 6897 | |
---|
.. | .. |
---|
6258 | 6904 | * success of set_cpus_allowed_ptr() on all attached tasks |
---|
6259 | 6905 | * before cpus_mask may be changed. |
---|
6260 | 6906 | */ |
---|
6261 | | - if (p->flags & PF_NO_SETAFFINITY) { |
---|
| 6907 | + if (p->flags & PF_NO_SETAFFINITY) |
---|
6262 | 6908 | ret = -EINVAL; |
---|
6263 | | - goto out; |
---|
6264 | | - } |
---|
6265 | 6909 | |
---|
6266 | | - if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span, |
---|
6267 | | - cs_cpus_allowed)) |
---|
6268 | | - ret = dl_task_can_attach(p, cs_cpus_allowed); |
---|
6269 | | - |
---|
6270 | | -out: |
---|
6271 | 6910 | return ret; |
---|
6272 | 6911 | } |
---|
6273 | 6912 | |
---|
.. | .. |
---|
6316 | 6955 | if (queued) |
---|
6317 | 6956 | enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); |
---|
6318 | 6957 | if (running) |
---|
6319 | | - set_curr_task(rq, p); |
---|
| 6958 | + set_next_task(rq, p); |
---|
6320 | 6959 | task_rq_unlock(rq, p, &rf); |
---|
6321 | 6960 | } |
---|
6322 | 6961 | #endif /* CONFIG_NUMA_BALANCING */ |
---|
6323 | 6962 | |
---|
6324 | 6963 | #ifdef CONFIG_HOTPLUG_CPU |
---|
6325 | | - |
---|
6326 | 6964 | /* |
---|
6327 | 6965 | * Ensure that the idle task is using init_mm right before its CPU goes |
---|
6328 | 6966 | * offline. |
---|
.. | .. |
---|
6358 | 6996 | atomic_long_add(delta, &calc_load_tasks); |
---|
6359 | 6997 | } |
---|
6360 | 6998 | |
---|
6361 | | -static void put_prev_task_fake(struct rq *rq, struct task_struct *prev) |
---|
| 6999 | +static struct task_struct *__pick_migrate_task(struct rq *rq) |
---|
6362 | 7000 | { |
---|
| 7001 | + const struct sched_class *class; |
---|
| 7002 | + struct task_struct *next; |
---|
| 7003 | + |
---|
| 7004 | + for_each_class(class) { |
---|
| 7005 | + next = class->pick_next_task(rq); |
---|
| 7006 | + if (next) { |
---|
| 7007 | + next->sched_class->put_prev_task(rq, next); |
---|
| 7008 | + return next; |
---|
| 7009 | + } |
---|
| 7010 | + } |
---|
| 7011 | + |
---|
| 7012 | + /* The idle class should always have a runnable task */ |
---|
| 7013 | + BUG(); |
---|
6363 | 7014 | } |
---|
6364 | | - |
---|
6365 | | -static const struct sched_class fake_sched_class = { |
---|
6366 | | - .put_prev_task = put_prev_task_fake, |
---|
6367 | | -}; |
---|
6368 | | - |
---|
6369 | | -static struct task_struct fake_task = { |
---|
6370 | | - /* |
---|
6371 | | - * Avoid pull_{rt,dl}_task() |
---|
6372 | | - */ |
---|
6373 | | - .prio = MAX_PRIO + 1, |
---|
6374 | | - .sched_class = &fake_sched_class, |
---|
6375 | | -}; |
---|
6376 | 7015 | |
---|
6377 | 7016 | /* |
---|
6378 | 7017 | * Migrate all tasks from the rq, sleeping tasks will be migrated by |
---|
.. | .. |
---|
6381 | 7020 | * Called with rq->lock held even though we'er in stop_machine() and |
---|
6382 | 7021 | * there's no concurrency possible, we hold the required locks anyway |
---|
6383 | 7022 | * because of lock validation efforts. |
---|
| 7023 | + * |
---|
| 7024 | + * force: if false, the function will skip CPU pinned kthreads. |
---|
6384 | 7025 | */ |
---|
6385 | | -static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) |
---|
| 7026 | +static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf, bool force) |
---|
6386 | 7027 | { |
---|
6387 | 7028 | struct rq *rq = dead_rq; |
---|
6388 | | - struct task_struct *next, *stop = rq->stop; |
---|
| 7029 | + struct task_struct *next, *tmp, *stop = rq->stop; |
---|
| 7030 | + LIST_HEAD(percpu_kthreads); |
---|
6389 | 7031 | struct rq_flags orf = *rf; |
---|
6390 | 7032 | int dest_cpu; |
---|
6391 | 7033 | |
---|
.. | .. |
---|
6407 | 7049 | */ |
---|
6408 | 7050 | update_rq_clock(rq); |
---|
6409 | 7051 | |
---|
| 7052 | +#ifdef CONFIG_SCHED_DEBUG |
---|
| 7053 | + /* note the clock update in orf */ |
---|
| 7054 | + orf.clock_update_flags |= RQCF_UPDATED; |
---|
| 7055 | +#endif |
---|
| 7056 | + |
---|
6410 | 7057 | for (;;) { |
---|
6411 | 7058 | /* |
---|
6412 | 7059 | * There's this thread running, bail when that's the only |
---|
.. | .. |
---|
6415 | 7062 | if (rq->nr_running == 1) |
---|
6416 | 7063 | break; |
---|
6417 | 7064 | |
---|
6418 | | - /* |
---|
6419 | | - * pick_next_task() assumes pinned rq->lock: |
---|
6420 | | - */ |
---|
6421 | | - next = pick_next_task(rq, &fake_task, rf); |
---|
6422 | | - BUG_ON(!next); |
---|
6423 | | - put_prev_task(rq, next); |
---|
| 7065 | + next = __pick_migrate_task(rq); |
---|
6424 | 7066 | |
---|
6425 | | - WARN_ON_ONCE(__migrate_disabled(next)); |
---|
| 7067 | + /* |
---|
| 7068 | + * Argh ... no iterator for tasks, we need to remove the |
---|
| 7069 | + * kthread from the run-queue to continue. |
---|
| 7070 | + */ |
---|
| 7071 | + if (!force && is_per_cpu_kthread(next)) { |
---|
| 7072 | + INIT_LIST_HEAD(&next->percpu_kthread_node); |
---|
| 7073 | + list_add(&next->percpu_kthread_node, &percpu_kthreads); |
---|
| 7074 | + |
---|
| 7075 | + /* DEQUEUE_SAVE not used due to move_entity in rt */ |
---|
| 7076 | + deactivate_task(rq, next, |
---|
| 7077 | + DEQUEUE_NOCLOCK); |
---|
| 7078 | + continue; |
---|
| 7079 | + } |
---|
6426 | 7080 | |
---|
6427 | 7081 | /* |
---|
6428 | 7082 | * Rules for changing task_struct::cpus_mask are holding |
---|
.. | .. |
---|
6442 | 7096 | * changed the task, WARN if weird stuff happened, because in |
---|
6443 | 7097 | * that case the above rq->lock drop is a fail too. |
---|
6444 | 7098 | */ |
---|
6445 | | - if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) { |
---|
| 7099 | + if (task_rq(next) != rq || !task_on_rq_queued(next)) { |
---|
| 7100 | + /* |
---|
| 7101 | + * In the !force case, there is a hole between |
---|
| 7102 | + * rq_unlock() and rq_relock(), where another CPU might |
---|
| 7103 | + * not observe an up to date cpu_active_mask and try to |
---|
| 7104 | + * move tasks around. |
---|
| 7105 | + */ |
---|
| 7106 | + WARN_ON(force); |
---|
6446 | 7107 | raw_spin_unlock(&next->pi_lock); |
---|
6447 | 7108 | continue; |
---|
6448 | 7109 | } |
---|
.. | .. |
---|
6459 | 7120 | raw_spin_unlock(&next->pi_lock); |
---|
6460 | 7121 | } |
---|
6461 | 7122 | |
---|
| 7123 | + list_for_each_entry_safe(next, tmp, &percpu_kthreads, |
---|
| 7124 | + percpu_kthread_node) { |
---|
| 7125 | + |
---|
| 7126 | + /* ENQUEUE_RESTORE not used due to move_entity in rt */ |
---|
| 7127 | + activate_task(rq, next, ENQUEUE_NOCLOCK); |
---|
| 7128 | + list_del(&next->percpu_kthread_node); |
---|
| 7129 | + } |
---|
| 7130 | + |
---|
6462 | 7131 | rq->stop = stop; |
---|
| 7132 | +} |
---|
| 7133 | + |
---|
| 7134 | +static int drain_rq_cpu_stop(void *data) |
---|
| 7135 | +{ |
---|
| 7136 | + struct rq *rq = this_rq(); |
---|
| 7137 | + struct rq_flags rf; |
---|
| 7138 | + |
---|
| 7139 | + rq_lock_irqsave(rq, &rf); |
---|
| 7140 | + migrate_tasks(rq, &rf, false); |
---|
| 7141 | + rq_unlock_irqrestore(rq, &rf); |
---|
| 7142 | + |
---|
| 7143 | + return 0; |
---|
| 7144 | +} |
---|
| 7145 | + |
---|
| 7146 | +int sched_cpu_drain_rq(unsigned int cpu) |
---|
| 7147 | +{ |
---|
| 7148 | + struct cpu_stop_work *rq_drain = &(cpu_rq(cpu)->drain); |
---|
| 7149 | + struct cpu_stop_done *rq_drain_done = &(cpu_rq(cpu)->drain_done); |
---|
| 7150 | + |
---|
| 7151 | + if (idle_cpu(cpu)) { |
---|
| 7152 | + rq_drain->done = NULL; |
---|
| 7153 | + return 0; |
---|
| 7154 | + } |
---|
| 7155 | + |
---|
| 7156 | + return stop_one_cpu_async(cpu, drain_rq_cpu_stop, NULL, rq_drain, |
---|
| 7157 | + rq_drain_done); |
---|
| 7158 | +} |
---|
| 7159 | + |
---|
| 7160 | +void sched_cpu_drain_rq_wait(unsigned int cpu) |
---|
| 7161 | +{ |
---|
| 7162 | + struct cpu_stop_work *rq_drain = &(cpu_rq(cpu)->drain); |
---|
| 7163 | + |
---|
| 7164 | + if (rq_drain->done) |
---|
| 7165 | + cpu_stop_work_wait(rq_drain); |
---|
6463 | 7166 | } |
---|
6464 | 7167 | #endif /* CONFIG_HOTPLUG_CPU */ |
---|
6465 | 7168 | |
---|
.. | .. |
---|
6531 | 7234 | static int cpuset_cpu_inactive(unsigned int cpu) |
---|
6532 | 7235 | { |
---|
6533 | 7236 | if (!cpuhp_tasks_frozen) { |
---|
6534 | | - if (dl_cpu_busy(cpu)) |
---|
6535 | | - return -EBUSY; |
---|
| 7237 | + int ret = dl_bw_check_overflow(cpu); |
---|
| 7238 | + |
---|
| 7239 | + if (ret) |
---|
| 7240 | + return ret; |
---|
6536 | 7241 | cpuset_update_active_cpus(); |
---|
6537 | 7242 | } else { |
---|
6538 | 7243 | num_cpus_frozen++; |
---|
.. | .. |
---|
6581 | 7286 | return 0; |
---|
6582 | 7287 | } |
---|
6583 | 7288 | |
---|
6584 | | -int sched_cpu_deactivate(unsigned int cpu) |
---|
| 7289 | +int sched_cpus_activate(struct cpumask *cpus) |
---|
| 7290 | +{ |
---|
| 7291 | + unsigned int cpu; |
---|
| 7292 | + |
---|
| 7293 | + for_each_cpu(cpu, cpus) { |
---|
| 7294 | + if (sched_cpu_activate(cpu)) { |
---|
| 7295 | + for_each_cpu_and(cpu, cpus, cpu_active_mask) |
---|
| 7296 | + sched_cpu_deactivate(cpu); |
---|
| 7297 | + |
---|
| 7298 | + return -EBUSY; |
---|
| 7299 | + } |
---|
| 7300 | + } |
---|
| 7301 | + |
---|
| 7302 | + return 0; |
---|
| 7303 | +} |
---|
| 7304 | + |
---|
| 7305 | +int _sched_cpu_deactivate(unsigned int cpu) |
---|
6585 | 7306 | { |
---|
6586 | 7307 | int ret; |
---|
6587 | 7308 | |
---|
6588 | 7309 | set_cpu_active(cpu, false); |
---|
6589 | | - /* |
---|
6590 | | - * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU |
---|
6591 | | - * users of this state to go away such that all new such users will |
---|
6592 | | - * observe it. |
---|
6593 | | - * |
---|
6594 | | - * Do sync before park smpboot threads to take care the rcu boost case. |
---|
6595 | | - */ |
---|
6596 | | - synchronize_rcu_mult(call_rcu, call_rcu_sched); |
---|
6597 | 7310 | |
---|
6598 | 7311 | #ifdef CONFIG_SCHED_SMT |
---|
6599 | 7312 | /* |
---|
.. | .. |
---|
6612 | 7325 | return ret; |
---|
6613 | 7326 | } |
---|
6614 | 7327 | sched_domains_numa_masks_clear(cpu); |
---|
| 7328 | + |
---|
| 7329 | + update_max_interval(); |
---|
| 7330 | + |
---|
| 7331 | + return 0; |
---|
| 7332 | +} |
---|
| 7333 | + |
---|
| 7334 | +int sched_cpu_deactivate(unsigned int cpu) |
---|
| 7335 | +{ |
---|
| 7336 | + int ret = _sched_cpu_deactivate(cpu); |
---|
| 7337 | + |
---|
| 7338 | + if (ret) |
---|
| 7339 | + return ret; |
---|
| 7340 | + |
---|
| 7341 | + /* |
---|
| 7342 | + * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU |
---|
| 7343 | + * users of this state to go away such that all new such users will |
---|
| 7344 | + * observe it. |
---|
| 7345 | + * |
---|
| 7346 | + * Do sync before park smpboot threads to take care the rcu boost case. |
---|
| 7347 | + */ |
---|
| 7348 | + synchronize_rcu(); |
---|
| 7349 | + |
---|
| 7350 | + return 0; |
---|
| 7351 | +} |
---|
| 7352 | + |
---|
| 7353 | +int sched_cpus_deactivate_nosync(struct cpumask *cpus) |
---|
| 7354 | +{ |
---|
| 7355 | + unsigned int cpu; |
---|
| 7356 | + |
---|
| 7357 | + for_each_cpu(cpu, cpus) { |
---|
| 7358 | + if (_sched_cpu_deactivate(cpu)) { |
---|
| 7359 | + for_each_cpu(cpu, cpus) { |
---|
| 7360 | + if (!cpu_active(cpu)) |
---|
| 7361 | + sched_cpu_activate(cpu); |
---|
| 7362 | + } |
---|
| 7363 | + |
---|
| 7364 | + return -EBUSY; |
---|
| 7365 | + } |
---|
| 7366 | + } |
---|
| 7367 | + |
---|
6615 | 7368 | return 0; |
---|
6616 | 7369 | } |
---|
6617 | 7370 | |
---|
.. | .. |
---|
6620 | 7373 | struct rq *rq = cpu_rq(cpu); |
---|
6621 | 7374 | |
---|
6622 | 7375 | rq->calc_load_update = calc_load_update; |
---|
6623 | | - update_max_interval(); |
---|
6624 | 7376 | } |
---|
6625 | 7377 | |
---|
6626 | 7378 | int sched_cpu_starting(unsigned int cpu) |
---|
6627 | 7379 | { |
---|
6628 | 7380 | sched_rq_cpu_starting(cpu); |
---|
6629 | 7381 | sched_tick_start(cpu); |
---|
| 7382 | + trace_android_rvh_sched_cpu_starting(cpu); |
---|
6630 | 7383 | return 0; |
---|
6631 | 7384 | } |
---|
6632 | 7385 | |
---|
.. | .. |
---|
6637 | 7390 | struct rq_flags rf; |
---|
6638 | 7391 | |
---|
6639 | 7392 | /* Handle pending wakeups and then migrate everything off */ |
---|
6640 | | - sched_ttwu_pending(); |
---|
6641 | 7393 | sched_tick_stop(cpu); |
---|
6642 | 7394 | |
---|
6643 | 7395 | rq_lock_irqsave(rq, &rf); |
---|
.. | .. |
---|
6645 | 7397 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
---|
6646 | 7398 | set_rq_offline(rq); |
---|
6647 | 7399 | } |
---|
6648 | | - migrate_tasks(rq, &rf); |
---|
| 7400 | + migrate_tasks(rq, &rf, true); |
---|
6649 | 7401 | BUG_ON(rq->nr_running != 1); |
---|
6650 | 7402 | rq_unlock_irqrestore(rq, &rf); |
---|
6651 | 7403 | |
---|
| 7404 | + trace_android_rvh_sched_cpu_dying(cpu); |
---|
| 7405 | + |
---|
6652 | 7406 | calc_load_migrate(rq); |
---|
6653 | | - update_max_interval(); |
---|
6654 | 7407 | nohz_balance_exit_idle(rq); |
---|
6655 | 7408 | hrtick_clear(rq); |
---|
6656 | 7409 | return 0; |
---|
.. | .. |
---|
6664 | 7417 | /* |
---|
6665 | 7418 | * There's no userspace yet to cause hotplug operations; hence all the |
---|
6666 | 7419 | * CPU masks are stable and all blatant races in the below code cannot |
---|
6667 | | - * happen. The hotplug lock is nevertheless taken to satisfy lockdep, |
---|
6668 | | - * but there won't be any contention on it. |
---|
| 7420 | + * happen. |
---|
6669 | 7421 | */ |
---|
6670 | | - cpus_read_lock(); |
---|
6671 | 7422 | mutex_lock(&sched_domains_mutex); |
---|
6672 | 7423 | sched_init_domains(cpu_active_mask); |
---|
6673 | 7424 | mutex_unlock(&sched_domains_mutex); |
---|
6674 | | - cpus_read_unlock(); |
---|
6675 | 7425 | |
---|
6676 | 7426 | /* Move init over to a non-isolated CPU */ |
---|
6677 | 7427 | if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) |
---|
6678 | 7428 | BUG(); |
---|
| 7429 | + |
---|
6679 | 7430 | sched_init_granularity(); |
---|
6680 | 7431 | |
---|
6681 | 7432 | init_sched_rt_class(); |
---|
.. | .. |
---|
6686 | 7437 | |
---|
6687 | 7438 | static int __init migration_init(void) |
---|
6688 | 7439 | { |
---|
6689 | | - sched_rq_cpu_starting(smp_processor_id()); |
---|
| 7440 | + sched_cpu_starting(smp_processor_id()); |
---|
6690 | 7441 | return 0; |
---|
6691 | 7442 | } |
---|
6692 | 7443 | early_initcall(migration_init); |
---|
.. | .. |
---|
6711 | 7462 | * Every task in system belongs to this group at bootup. |
---|
6712 | 7463 | */ |
---|
6713 | 7464 | struct task_group root_task_group; |
---|
| 7465 | +EXPORT_SYMBOL_GPL(root_task_group); |
---|
6714 | 7466 | LIST_HEAD(task_groups); |
---|
| 7467 | +EXPORT_SYMBOL_GPL(task_groups); |
---|
6715 | 7468 | |
---|
6716 | 7469 | /* Cacheline aligned slab cache for task_group */ |
---|
6717 | 7470 | static struct kmem_cache *task_group_cache __read_mostly; |
---|
.. | .. |
---|
6722 | 7475 | |
---|
6723 | 7476 | void __init sched_init(void) |
---|
6724 | 7477 | { |
---|
6725 | | - int i, j; |
---|
6726 | | - unsigned long alloc_size = 0, ptr; |
---|
| 7478 | + unsigned long ptr = 0; |
---|
| 7479 | + int i; |
---|
| 7480 | + |
---|
| 7481 | + /* Make sure the linker didn't screw up */ |
---|
| 7482 | + BUG_ON(&idle_sched_class + 1 != &fair_sched_class || |
---|
| 7483 | + &fair_sched_class + 1 != &rt_sched_class || |
---|
| 7484 | + &rt_sched_class + 1 != &dl_sched_class); |
---|
| 7485 | +#ifdef CONFIG_SMP |
---|
| 7486 | + BUG_ON(&dl_sched_class + 1 != &stop_sched_class); |
---|
| 7487 | +#endif |
---|
6727 | 7488 | |
---|
6728 | 7489 | wait_bit_init(); |
---|
6729 | 7490 | |
---|
6730 | 7491 | #ifdef CONFIG_FAIR_GROUP_SCHED |
---|
6731 | | - alloc_size += 2 * nr_cpu_ids * sizeof(void **); |
---|
| 7492 | + ptr += 2 * nr_cpu_ids * sizeof(void **); |
---|
6732 | 7493 | #endif |
---|
6733 | 7494 | #ifdef CONFIG_RT_GROUP_SCHED |
---|
6734 | | - alloc_size += 2 * nr_cpu_ids * sizeof(void **); |
---|
| 7495 | + ptr += 2 * nr_cpu_ids * sizeof(void **); |
---|
6735 | 7496 | #endif |
---|
6736 | | - if (alloc_size) { |
---|
6737 | | - ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); |
---|
| 7497 | + if (ptr) { |
---|
| 7498 | + ptr = (unsigned long)kzalloc(ptr, GFP_NOWAIT); |
---|
6738 | 7499 | |
---|
6739 | 7500 | #ifdef CONFIG_FAIR_GROUP_SCHED |
---|
6740 | 7501 | root_task_group.se = (struct sched_entity **)ptr; |
---|
.. | .. |
---|
6743 | 7504 | root_task_group.cfs_rq = (struct cfs_rq **)ptr; |
---|
6744 | 7505 | ptr += nr_cpu_ids * sizeof(void **); |
---|
6745 | 7506 | |
---|
| 7507 | + root_task_group.shares = ROOT_TASK_GROUP_LOAD; |
---|
| 7508 | + init_cfs_bandwidth(&root_task_group.cfs_bandwidth); |
---|
6746 | 7509 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
---|
6747 | 7510 | #ifdef CONFIG_RT_GROUP_SCHED |
---|
6748 | 7511 | root_task_group.rt_se = (struct sched_rt_entity **)ptr; |
---|
.. | .. |
---|
6795 | 7558 | init_rt_rq(&rq->rt); |
---|
6796 | 7559 | init_dl_rq(&rq->dl); |
---|
6797 | 7560 | #ifdef CONFIG_FAIR_GROUP_SCHED |
---|
6798 | | - root_task_group.shares = ROOT_TASK_GROUP_LOAD; |
---|
6799 | 7561 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); |
---|
6800 | 7562 | rq->tmp_alone_branch = &rq->leaf_cfs_rq_list; |
---|
6801 | 7563 | /* |
---|
.. | .. |
---|
6817 | 7579 | * We achieve this by letting root_task_group's tasks sit |
---|
6818 | 7580 | * directly in rq->cfs (i.e root_task_group->se[] = NULL). |
---|
6819 | 7581 | */ |
---|
6820 | | - init_cfs_bandwidth(&root_task_group.cfs_bandwidth); |
---|
6821 | 7582 | init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); |
---|
6822 | 7583 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
---|
6823 | 7584 | |
---|
.. | .. |
---|
6825 | 7586 | #ifdef CONFIG_RT_GROUP_SCHED |
---|
6826 | 7587 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); |
---|
6827 | 7588 | #endif |
---|
6828 | | - |
---|
6829 | | - for (j = 0; j < CPU_LOAD_IDX_MAX; j++) |
---|
6830 | | - rq->cpu_load[j] = 0; |
---|
6831 | | - |
---|
6832 | 7589 | #ifdef CONFIG_SMP |
---|
6833 | 7590 | rq->sd = NULL; |
---|
6834 | 7591 | rq->rd = NULL; |
---|
.. | .. |
---|
6847 | 7604 | |
---|
6848 | 7605 | rq_attach_root(rq, &def_root_domain); |
---|
6849 | 7606 | #ifdef CONFIG_NO_HZ_COMMON |
---|
6850 | | - rq->last_load_update_tick = jiffies; |
---|
6851 | 7607 | rq->last_blocked_load_update_tick = jiffies; |
---|
6852 | 7608 | atomic_set(&rq->nohz_flags, 0); |
---|
| 7609 | + |
---|
| 7610 | + rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func); |
---|
6853 | 7611 | #endif |
---|
6854 | 7612 | #endif /* CONFIG_SMP */ |
---|
6855 | 7613 | hrtick_rq_init(rq); |
---|
6856 | 7614 | atomic_set(&rq->nr_iowait, 0); |
---|
6857 | 7615 | } |
---|
6858 | 7616 | |
---|
6859 | | - set_load_weight(&init_task, false); |
---|
| 7617 | + set_load_weight(&init_task); |
---|
6860 | 7618 | |
---|
6861 | 7619 | /* |
---|
6862 | 7620 | * The boot idle thread does lazy MMU switching as well: |
---|
.. | .. |
---|
6891 | 7649 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP |
---|
6892 | 7650 | static inline int preempt_count_equals(int preempt_offset) |
---|
6893 | 7651 | { |
---|
6894 | | - int nested = preempt_count() + sched_rcu_preempt_depth(); |
---|
| 7652 | + int nested = preempt_count() + rcu_preempt_depth(); |
---|
6895 | 7653 | |
---|
6896 | 7654 | return (nested == preempt_offset); |
---|
6897 | 7655 | } |
---|
.. | .. |
---|
6925 | 7683 | rcu_sleep_check(); |
---|
6926 | 7684 | |
---|
6927 | 7685 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && |
---|
6928 | | - !is_idle_task(current)) || |
---|
| 7686 | + !is_idle_task(current) && !current->non_block_count) || |
---|
6929 | 7687 | system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING || |
---|
6930 | 7688 | oops_in_progress) |
---|
6931 | 7689 | return; |
---|
.. | .. |
---|
6941 | 7699 | "BUG: sleeping function called from invalid context at %s:%d\n", |
---|
6942 | 7700 | file, line); |
---|
6943 | 7701 | printk(KERN_ERR |
---|
6944 | | - "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", |
---|
6945 | | - in_atomic(), irqs_disabled(), |
---|
| 7702 | + "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", |
---|
| 7703 | + in_atomic(), irqs_disabled(), current->non_block_count, |
---|
6946 | 7704 | current->pid, current->comm); |
---|
6947 | 7705 | |
---|
6948 | 7706 | if (task_stack_end_corrupted(current)) |
---|
.. | .. |
---|
6954 | 7712 | if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) |
---|
6955 | 7713 | && !preempt_count_equals(preempt_offset)) { |
---|
6956 | 7714 | pr_err("Preemption disabled at:"); |
---|
6957 | | - print_ip_sym(preempt_disable_ip); |
---|
6958 | | - pr_cont("\n"); |
---|
| 7715 | + print_ip_sym(KERN_ERR, preempt_disable_ip); |
---|
6959 | 7716 | } |
---|
| 7717 | + |
---|
| 7718 | + trace_android_rvh_schedule_bug(NULL); |
---|
| 7719 | + |
---|
6960 | 7720 | dump_stack(); |
---|
6961 | 7721 | add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
---|
6962 | 7722 | } |
---|
6963 | 7723 | EXPORT_SYMBOL(___might_sleep); |
---|
| 7724 | + |
---|
| 7725 | +void __cant_sleep(const char *file, int line, int preempt_offset) |
---|
| 7726 | +{ |
---|
| 7727 | + static unsigned long prev_jiffy; |
---|
| 7728 | + |
---|
| 7729 | + if (irqs_disabled()) |
---|
| 7730 | + return; |
---|
| 7731 | + |
---|
| 7732 | + if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) |
---|
| 7733 | + return; |
---|
| 7734 | + |
---|
| 7735 | + if (preempt_count() > preempt_offset) |
---|
| 7736 | + return; |
---|
| 7737 | + |
---|
| 7738 | + if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) |
---|
| 7739 | + return; |
---|
| 7740 | + prev_jiffy = jiffies; |
---|
| 7741 | + |
---|
| 7742 | + printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line); |
---|
| 7743 | + printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", |
---|
| 7744 | + in_atomic(), irqs_disabled(), |
---|
| 7745 | + current->pid, current->comm); |
---|
| 7746 | + |
---|
| 7747 | + debug_show_held_locks(current); |
---|
| 7748 | + dump_stack(); |
---|
| 7749 | + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
---|
| 7750 | +} |
---|
| 7751 | +EXPORT_SYMBOL_GPL(__cant_sleep); |
---|
6964 | 7752 | #endif |
---|
6965 | 7753 | |
---|
6966 | 7754 | #ifdef CONFIG_MAGIC_SYSRQ |
---|
.. | .. |
---|
7029 | 7817 | |
---|
7030 | 7818 | #ifdef CONFIG_IA64 |
---|
7031 | 7819 | /** |
---|
7032 | | - * set_curr_task - set the current task for a given CPU. |
---|
| 7820 | + * ia64_set_curr_task - set the current task for a given CPU. |
---|
7033 | 7821 | * @cpu: the processor in question. |
---|
7034 | 7822 | * @p: the task pointer to set. |
---|
7035 | 7823 | * |
---|
.. | .. |
---|
7195 | 7983 | |
---|
7196 | 7984 | if (queued) |
---|
7197 | 7985 | enqueue_task(rq, tsk, queue_flags); |
---|
7198 | | - if (running) |
---|
7199 | | - set_curr_task(rq, tsk); |
---|
| 7986 | + if (running) { |
---|
| 7987 | + set_next_task(rq, tsk); |
---|
| 7988 | + /* |
---|
| 7989 | + * After changing group, the running task may have joined a |
---|
| 7990 | + * throttled one but it's still the running task. Trigger a |
---|
| 7991 | + * resched to make sure that task can still run. |
---|
| 7992 | + */ |
---|
| 7993 | + resched_curr(rq); |
---|
| 7994 | + } |
---|
7200 | 7995 | |
---|
7201 | 7996 | task_rq_unlock(rq, tsk, &rf); |
---|
7202 | 7997 | } |
---|
.. | .. |
---|
7235 | 8030 | |
---|
7236 | 8031 | #ifdef CONFIG_UCLAMP_TASK_GROUP |
---|
7237 | 8032 | /* Propagate the effective uclamp value for the new group */ |
---|
| 8033 | + mutex_lock(&uclamp_mutex); |
---|
| 8034 | + rcu_read_lock(); |
---|
7238 | 8035 | cpu_util_update_eff(css); |
---|
| 8036 | + rcu_read_unlock(); |
---|
| 8037 | + mutex_unlock(&uclamp_mutex); |
---|
7239 | 8038 | #endif |
---|
7240 | 8039 | |
---|
| 8040 | + trace_android_rvh_cpu_cgroup_online(css); |
---|
7241 | 8041 | return 0; |
---|
7242 | 8042 | } |
---|
7243 | 8043 | |
---|
.. | .. |
---|
7303 | 8103 | if (ret) |
---|
7304 | 8104 | break; |
---|
7305 | 8105 | } |
---|
| 8106 | + |
---|
| 8107 | + trace_android_rvh_cpu_cgroup_can_attach(tset, &ret); |
---|
| 8108 | + |
---|
7306 | 8109 | return ret; |
---|
7307 | 8110 | } |
---|
7308 | 8111 | |
---|
.. | .. |
---|
7313 | 8116 | |
---|
7314 | 8117 | cgroup_taskset_for_each(task, css, tset) |
---|
7315 | 8118 | sched_move_task(task); |
---|
| 8119 | + |
---|
| 8120 | + trace_android_rvh_cpu_cgroup_attach(tset); |
---|
7316 | 8121 | } |
---|
7317 | 8122 | |
---|
7318 | 8123 | #ifdef CONFIG_UCLAMP_TASK_GROUP |
---|
.. | .. |
---|
7324 | 8129 | unsigned int eff[UCLAMP_CNT]; |
---|
7325 | 8130 | enum uclamp_id clamp_id; |
---|
7326 | 8131 | unsigned int clamps; |
---|
| 8132 | + |
---|
| 8133 | + lockdep_assert_held(&uclamp_mutex); |
---|
| 8134 | + SCHED_WARN_ON(!rcu_read_lock_held()); |
---|
7327 | 8135 | |
---|
7328 | 8136 | css_for_each_descendant_pre(css, top_css) { |
---|
7329 | 8137 | uc_parent = css_tg(css)->parent |
---|
.. | .. |
---|
7357 | 8165 | } |
---|
7358 | 8166 | |
---|
7359 | 8167 | /* Immediately update descendants RUNNABLE tasks */ |
---|
7360 | | - uclamp_update_active_tasks(css, clamps); |
---|
| 8168 | + uclamp_update_active_tasks(css); |
---|
7361 | 8169 | } |
---|
7362 | 8170 | } |
---|
7363 | 8171 | |
---|
.. | .. |
---|
7414 | 8222 | req = capacity_from_percent(buf); |
---|
7415 | 8223 | if (req.ret) |
---|
7416 | 8224 | return req.ret; |
---|
| 8225 | + |
---|
| 8226 | + static_branch_enable(&sched_uclamp_used); |
---|
7417 | 8227 | |
---|
7418 | 8228 | mutex_lock(&uclamp_mutex); |
---|
7419 | 8229 | rcu_read_lock(); |
---|
.. | .. |
---|
7529 | 8339 | static DEFINE_MUTEX(cfs_constraints_mutex); |
---|
7530 | 8340 | |
---|
7531 | 8341 | const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ |
---|
7532 | | -const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ |
---|
| 8342 | +static const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ |
---|
| 8343 | +/* More than 203 days if BW_SHIFT equals 20. */ |
---|
| 8344 | +static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC; |
---|
7533 | 8345 | |
---|
7534 | 8346 | static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); |
---|
7535 | 8347 | |
---|
.. | .. |
---|
7555 | 8367 | * feasibility. |
---|
7556 | 8368 | */ |
---|
7557 | 8369 | if (period > max_cfs_quota_period) |
---|
| 8370 | + return -EINVAL; |
---|
| 8371 | + |
---|
| 8372 | + /* |
---|
| 8373 | + * Bound quota to defend quota against overflow during bandwidth shift. |
---|
| 8374 | + */ |
---|
| 8375 | + if (quota != RUNTIME_INF && quota > max_cfs_runtime) |
---|
7558 | 8376 | return -EINVAL; |
---|
7559 | 8377 | |
---|
7560 | 8378 | /* |
---|
.. | .. |
---|
7609 | 8427 | return ret; |
---|
7610 | 8428 | } |
---|
7611 | 8429 | |
---|
7612 | | -int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) |
---|
| 8430 | +static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) |
---|
7613 | 8431 | { |
---|
7614 | 8432 | u64 quota, period; |
---|
7615 | 8433 | |
---|
.. | .. |
---|
7624 | 8442 | return tg_set_cfs_bandwidth(tg, period, quota); |
---|
7625 | 8443 | } |
---|
7626 | 8444 | |
---|
7627 | | -long tg_get_cfs_quota(struct task_group *tg) |
---|
| 8445 | +static long tg_get_cfs_quota(struct task_group *tg) |
---|
7628 | 8446 | { |
---|
7629 | 8447 | u64 quota_us; |
---|
7630 | 8448 | |
---|
.. | .. |
---|
7637 | 8455 | return quota_us; |
---|
7638 | 8456 | } |
---|
7639 | 8457 | |
---|
7640 | | -int tg_set_cfs_period(struct task_group *tg, long cfs_period_us) |
---|
| 8458 | +static int tg_set_cfs_period(struct task_group *tg, long cfs_period_us) |
---|
7641 | 8459 | { |
---|
7642 | 8460 | u64 quota, period; |
---|
7643 | 8461 | |
---|
.. | .. |
---|
7650 | 8468 | return tg_set_cfs_bandwidth(tg, period, quota); |
---|
7651 | 8469 | } |
---|
7652 | 8470 | |
---|
7653 | | -long tg_get_cfs_period(struct task_group *tg) |
---|
| 8471 | +static long tg_get_cfs_period(struct task_group *tg) |
---|
7654 | 8472 | { |
---|
7655 | 8473 | u64 cfs_period_us; |
---|
7656 | 8474 | |
---|
.. | .. |
---|
8127 | 8945 | /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, |
---|
8128 | 8946 | }; |
---|
8129 | 8947 | |
---|
8130 | | -#undef CREATE_TRACE_POINTS |
---|
8131 | | - |
---|
8132 | | -#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) |
---|
8133 | | - |
---|
8134 | | -static inline void |
---|
8135 | | -update_nr_migratory(struct task_struct *p, long delta) |
---|
| 8948 | +void call_trace_sched_update_nr_running(struct rq *rq, int count) |
---|
8136 | 8949 | { |
---|
8137 | | - if (unlikely((p->sched_class == &rt_sched_class || |
---|
8138 | | - p->sched_class == &dl_sched_class) && |
---|
8139 | | - p->nr_cpus_allowed > 1)) { |
---|
8140 | | - if (p->sched_class == &rt_sched_class) |
---|
8141 | | - task_rq(p)->rt.rt_nr_migratory += delta; |
---|
8142 | | - else |
---|
8143 | | - task_rq(p)->dl.dl_nr_migratory += delta; |
---|
8144 | | - } |
---|
| 8950 | + trace_sched_update_nr_running_tp(rq, count); |
---|
8145 | 8951 | } |
---|
8146 | | - |
---|
8147 | | -static inline void |
---|
8148 | | -migrate_disable_update_cpus_allowed(struct task_struct *p) |
---|
8149 | | -{ |
---|
8150 | | - p->cpus_ptr = cpumask_of(smp_processor_id()); |
---|
8151 | | - update_nr_migratory(p, -1); |
---|
8152 | | - p->nr_cpus_allowed = 1; |
---|
8153 | | -} |
---|
8154 | | - |
---|
8155 | | -static inline void |
---|
8156 | | -migrate_enable_update_cpus_allowed(struct task_struct *p) |
---|
8157 | | -{ |
---|
8158 | | - struct rq *rq; |
---|
8159 | | - struct rq_flags rf; |
---|
8160 | | - |
---|
8161 | | - rq = task_rq_lock(p, &rf); |
---|
8162 | | - p->cpus_ptr = &p->cpus_mask; |
---|
8163 | | - p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask); |
---|
8164 | | - update_nr_migratory(p, 1); |
---|
8165 | | - task_rq_unlock(rq, p, &rf); |
---|
8166 | | -} |
---|
8167 | | - |
---|
8168 | | -void migrate_disable(void) |
---|
8169 | | -{ |
---|
8170 | | - preempt_disable(); |
---|
8171 | | - |
---|
8172 | | - if (++current->migrate_disable == 1) { |
---|
8173 | | - this_rq()->nr_pinned++; |
---|
8174 | | - preempt_lazy_disable(); |
---|
8175 | | -#ifdef CONFIG_SCHED_DEBUG |
---|
8176 | | - WARN_ON_ONCE(current->pinned_on_cpu >= 0); |
---|
8177 | | - current->pinned_on_cpu = smp_processor_id(); |
---|
8178 | | -#endif |
---|
8179 | | - } |
---|
8180 | | - |
---|
8181 | | - preempt_enable(); |
---|
8182 | | -} |
---|
8183 | | -EXPORT_SYMBOL(migrate_disable); |
---|
8184 | | - |
---|
8185 | | -static void migrate_disabled_sched(struct task_struct *p) |
---|
8186 | | -{ |
---|
8187 | | - if (p->migrate_disable_scheduled) |
---|
8188 | | - return; |
---|
8189 | | - |
---|
8190 | | - migrate_disable_update_cpus_allowed(p); |
---|
8191 | | - p->migrate_disable_scheduled = 1; |
---|
8192 | | -} |
---|
8193 | | - |
---|
8194 | | -static DEFINE_PER_CPU(struct cpu_stop_work, migrate_work); |
---|
8195 | | -static DEFINE_PER_CPU(struct migration_arg, migrate_arg); |
---|
8196 | | - |
---|
8197 | | -void migrate_enable(void) |
---|
8198 | | -{ |
---|
8199 | | - struct task_struct *p = current; |
---|
8200 | | - struct rq *rq = this_rq(); |
---|
8201 | | - int cpu = task_cpu(p); |
---|
8202 | | - |
---|
8203 | | - WARN_ON_ONCE(p->migrate_disable <= 0); |
---|
8204 | | - if (p->migrate_disable > 1) { |
---|
8205 | | - p->migrate_disable--; |
---|
8206 | | - return; |
---|
8207 | | - } |
---|
8208 | | - |
---|
8209 | | - preempt_disable(); |
---|
8210 | | - |
---|
8211 | | -#ifdef CONFIG_SCHED_DEBUG |
---|
8212 | | - WARN_ON_ONCE(current->pinned_on_cpu != cpu); |
---|
8213 | | - current->pinned_on_cpu = -1; |
---|
8214 | | -#endif |
---|
8215 | | - |
---|
8216 | | - WARN_ON_ONCE(rq->nr_pinned < 1); |
---|
8217 | | - |
---|
8218 | | - p->migrate_disable = 0; |
---|
8219 | | - rq->nr_pinned--; |
---|
8220 | | -#ifdef CONFIG_HOTPLUG_CPU |
---|
8221 | | - if (rq->nr_pinned == 0 && unlikely(!cpu_active(cpu)) && |
---|
8222 | | - takedown_cpu_task) |
---|
8223 | | - wake_up_process(takedown_cpu_task); |
---|
8224 | | -#endif |
---|
8225 | | - |
---|
8226 | | - if (!p->migrate_disable_scheduled) |
---|
8227 | | - goto out; |
---|
8228 | | - |
---|
8229 | | - p->migrate_disable_scheduled = 0; |
---|
8230 | | - |
---|
8231 | | - migrate_enable_update_cpus_allowed(p); |
---|
8232 | | - |
---|
8233 | | - WARN_ON(smp_processor_id() != cpu); |
---|
8234 | | - if (!is_cpu_allowed(p, cpu)) { |
---|
8235 | | - struct migration_arg __percpu *arg; |
---|
8236 | | - struct cpu_stop_work __percpu *work; |
---|
8237 | | - struct rq_flags rf; |
---|
8238 | | - |
---|
8239 | | - work = this_cpu_ptr(&migrate_work); |
---|
8240 | | - arg = this_cpu_ptr(&migrate_arg); |
---|
8241 | | - WARN_ON_ONCE(!arg->done && !work->disabled && work->arg); |
---|
8242 | | - |
---|
8243 | | - arg->task = p; |
---|
8244 | | - arg->done = false; |
---|
8245 | | - |
---|
8246 | | - rq = task_rq_lock(p, &rf); |
---|
8247 | | - update_rq_clock(rq); |
---|
8248 | | - arg->dest_cpu = select_fallback_rq(cpu, p); |
---|
8249 | | - task_rq_unlock(rq, p, &rf); |
---|
8250 | | - |
---|
8251 | | - stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop, |
---|
8252 | | - arg, work); |
---|
8253 | | - tlb_migrate_finish(p->mm); |
---|
8254 | | - } |
---|
8255 | | - |
---|
8256 | | -out: |
---|
8257 | | - preempt_lazy_enable(); |
---|
8258 | | - preempt_enable(); |
---|
8259 | | -} |
---|
8260 | | -EXPORT_SYMBOL(migrate_enable); |
---|
8261 | | - |
---|
8262 | | -int cpu_nr_pinned(int cpu) |
---|
8263 | | -{ |
---|
8264 | | - struct rq *rq = cpu_rq(cpu); |
---|
8265 | | - |
---|
8266 | | - return rq->nr_pinned; |
---|
8267 | | -} |
---|
8268 | | - |
---|
8269 | | -#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) |
---|
8270 | | -static void migrate_disabled_sched(struct task_struct *p) |
---|
8271 | | -{ |
---|
8272 | | -} |
---|
8273 | | - |
---|
8274 | | -void migrate_disable(void) |
---|
8275 | | -{ |
---|
8276 | | -#ifdef CONFIG_SCHED_DEBUG |
---|
8277 | | - current->migrate_disable++; |
---|
8278 | | -#endif |
---|
8279 | | - barrier(); |
---|
8280 | | -} |
---|
8281 | | -EXPORT_SYMBOL(migrate_disable); |
---|
8282 | | - |
---|
8283 | | -void migrate_enable(void) |
---|
8284 | | -{ |
---|
8285 | | -#ifdef CONFIG_SCHED_DEBUG |
---|
8286 | | - struct task_struct *p = current; |
---|
8287 | | - |
---|
8288 | | - WARN_ON_ONCE(p->migrate_disable <= 0); |
---|
8289 | | - p->migrate_disable--; |
---|
8290 | | -#endif |
---|
8291 | | - barrier(); |
---|
8292 | | -} |
---|
8293 | | -EXPORT_SYMBOL(migrate_enable); |
---|
8294 | | -#else |
---|
8295 | | -static void migrate_disabled_sched(struct task_struct *p) |
---|
8296 | | -{ |
---|
8297 | | -} |
---|
8298 | | -#endif |
---|