From 6778948f9de86c3cfaf36725a7c87dcff9ba247f Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 11 Dec 2023 08:20:59 +0000 Subject: [PATCH] kernel_5.10 no rt --- kernel/kernel/sched/core.c | 1275 ++++++++++++--------------------------------------------- 1 files changed, 269 insertions(+), 1,006 deletions(-) diff --git a/kernel/kernel/sched/core.c b/kernel/kernel/sched/core.c index e00ae06..7359375 100644 --- a/kernel/kernel/sched/core.c +++ b/kernel/kernel/sched/core.c @@ -78,11 +78,7 @@ * Number of tasks to iterate in a single balance run. * Limited because this is done with IRQs disabled. */ -#ifdef CONFIG_PREEMPT_RT -const_debug unsigned int sysctl_sched_nr_migrate = 8; -#else const_debug unsigned int sysctl_sched_nr_migrate = 32; -#endif /* * period over which we measure -rt task CPU usage in us. @@ -531,15 +527,9 @@ #endif #endif -static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task, - bool sleeper) +static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task) { - struct wake_q_node *node; - - if (sleeper) - node = &task->wake_q_sleeper; - else - node = &task->wake_q; + struct wake_q_node *node = &task->wake_q; /* * Atomically grab the task, if ->wake_q is !nil already it means @@ -576,13 +566,7 @@ */ void wake_q_add(struct wake_q_head *head, struct task_struct *task) { - if (__wake_q_add(head, task, false)) - get_task_struct(task); -} - -void wake_q_add_sleeper(struct wake_q_head *head, struct task_struct *task) -{ - if (__wake_q_add(head, task, true)) + if (__wake_q_add(head, task)) get_task_struct(task); } @@ -605,40 +589,29 @@ */ void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) { - if (!__wake_q_add(head, task, false)) + if (!__wake_q_add(head, task)) put_task_struct(task); } -void __wake_up_q(struct wake_q_head *head, bool sleeper) +void wake_up_q(struct wake_q_head *head) { struct wake_q_node *node = head->first; while (node != WAKE_Q_TAIL) { struct task_struct *task; - if (sleeper) - task = container_of(node, struct task_struct, wake_q_sleeper); - else - task = container_of(node, struct task_struct, wake_q); - + task = container_of(node, struct task_struct, wake_q); BUG_ON(!task); /* Task can safely be re-inserted now: */ node = node->next; + task->wake_q.next = NULL; task->wake_q_count = head->count; - if (sleeper) - task->wake_q_sleeper.next = NULL; - else - task->wake_q.next = NULL; /* * wake_up_process() executes a full barrier, which pairs with * the queueing in wake_q_add() so as not to miss wakeups. */ - if (sleeper) - wake_up_lock_sleeper(task); - else - wake_up_process(task); - + wake_up_process(task); task->wake_q_count = 0; put_task_struct(task); } @@ -675,48 +648,6 @@ trace_sched_wake_idle_without_ipi(cpu); } EXPORT_SYMBOL_GPL(resched_curr); - -#ifdef CONFIG_PREEMPT_LAZY - -static int tsk_is_polling(struct task_struct *p) -{ -#ifdef TIF_POLLING_NRFLAG - return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); -#else - return 0; -#endif -} - -void resched_curr_lazy(struct rq *rq) -{ - struct task_struct *curr = rq->curr; - int cpu; - - if (!sched_feat(PREEMPT_LAZY)) { - resched_curr(rq); - return; - } - - lockdep_assert_held(&rq->lock); - - if (test_tsk_need_resched(curr)) - return; - - if (test_tsk_need_resched_lazy(curr)) - return; - - set_tsk_need_resched_lazy(curr); - - cpu = cpu_of(rq); - if (cpu == smp_processor_id()) - return; - - /* NEED_RESCHED_LAZY must be visible before we test polling */ - smp_mb(); - if (!tsk_is_polling(curr)) - smp_send_reschedule(cpu); -} -#endif void resched_cpu(int cpu) { @@ -1870,82 +1801,6 @@ #ifdef CONFIG_SMP -static void -__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags); - -static int __set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask, - u32 flags); - -static void migrate_disable_switch(struct rq *rq, struct task_struct *p) -{ - if (likely(!p->migration_disabled)) - return; - - if (p->cpus_ptr != &p->cpus_mask) - return; - - /* - * Violates locking rules! see comment in __do_set_cpus_allowed(). - */ - __do_set_cpus_allowed(p, cpumask_of(rq->cpu), SCA_MIGRATE_DISABLE); -} - -void migrate_disable(void) -{ - struct task_struct *p = current; - - if (p->migration_disabled) { - p->migration_disabled++; - return; - } - - trace_sched_migrate_disable_tp(p); - - preempt_disable(); - this_rq()->nr_pinned++; - p->migration_disabled = 1; - preempt_lazy_disable(); - preempt_enable(); -} -EXPORT_SYMBOL_GPL(migrate_disable); - -void migrate_enable(void) -{ - struct task_struct *p = current; - - if (p->migration_disabled > 1) { - p->migration_disabled--; - return; - } - - /* - * Ensure stop_task runs either before or after this, and that - * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). - */ - preempt_disable(); - if (p->cpus_ptr != &p->cpus_mask) - __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); - /* - * Mustn't clear migration_disabled() until cpus_ptr points back at the - * regular cpus_mask, otherwise things that race (eg. - * select_fallback_rq) get confused. - */ - barrier(); - p->migration_disabled = 0; - this_rq()->nr_pinned--; - preempt_lazy_enable(); - preempt_enable(); - - trace_sched_migrate_enable_tp(p); -} -EXPORT_SYMBOL_GPL(migrate_enable); - -static inline bool rq_has_pinned_tasks(struct rq *rq) -{ - return rq->nr_pinned; -} - /* * Per-CPU kthreads are allowed to run on !active && online CPUs, see * __set_cpus_allowed_ptr() and select_fallback_rq(). @@ -1955,7 +1810,7 @@ if (!cpumask_test_cpu(cpu, p->cpus_ptr)) return false; - if (is_per_cpu_kthread(p) || is_migration_disabled(p)) + if (is_per_cpu_kthread(p)) return cpu_online(cpu); if (!cpu_active(cpu)) @@ -2015,21 +1870,8 @@ } struct migration_arg { - struct task_struct *task; - int dest_cpu; - struct set_affinity_pending *pending; -}; - -/* - * @refs: number of wait_for_completion() - * @stop_pending: is @stop_work in use - */ -struct set_affinity_pending { - refcount_t refs; - unsigned int stop_pending; - struct completion done; - struct cpu_stop_work stop_work; - struct migration_arg arg; + struct task_struct *task; + int dest_cpu; }; /* @@ -2062,17 +1904,15 @@ static int migration_cpu_stop(void *data) { struct migration_arg *arg = data; - struct set_affinity_pending *pending = arg->pending; struct task_struct *p = arg->task; struct rq *rq = this_rq(); - bool complete = false; struct rq_flags rf; /* * The original target CPU might have gone down and we might * be on another CPU but it doesn't matter. */ - local_irq_save(rf.flags); + local_irq_disable(); /* * We need to explicitly wake pending tasks before running * __migrate_task() such that we will not miss enforcing cpus_ptr @@ -2082,121 +1922,21 @@ raw_spin_lock(&p->pi_lock); rq_lock(rq, &rf); - /* * If task_rq(p) != rq, it cannot be migrated here, because we're * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because * we're holding p->pi_lock. */ if (task_rq(p) == rq) { - if (is_migration_disabled(p)) - goto out; - - if (pending) { - if (p->migration_pending == pending) - p->migration_pending = NULL; - complete = true; - - if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) - goto out; - } - if (task_on_rq_queued(p)) rq = __migrate_task(rq, &rf, p, arg->dest_cpu); else p->wake_cpu = arg->dest_cpu; - - /* - * XXX __migrate_task() can fail, at which point we might end - * up running on a dodgy CPU, AFAICT this can only happen - * during CPU hotplug, at which point we'll get pushed out - * anyway, so it's probably not a big deal. - */ - - } else if (pending) { - /* - * This happens when we get migrated between migrate_enable()'s - * preempt_enable() and scheduling the stopper task. At that - * point we're a regular task again and not current anymore. - * - * A !PREEMPT kernel has a giant hole here, which makes it far - * more likely. - */ - - /* - * The task moved before the stopper got to run. We're holding - * ->pi_lock, so the allowed mask is stable - if it got - * somewhere allowed, we're done. - */ - if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) { - if (p->migration_pending == pending) - p->migration_pending = NULL; - complete = true; - goto out; - } - - /* - * When migrate_enable() hits a rq mis-match we can't reliably - * determine is_migration_disabled() and so have to chase after - * it. - */ - WARN_ON_ONCE(!pending->stop_pending); - task_rq_unlock(rq, p, &rf); - stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop, - &pending->arg, &pending->stop_work); - return 0; } -out: - if (pending) - pending->stop_pending = false; - task_rq_unlock(rq, p, &rf); + rq_unlock(rq, &rf); + raw_spin_unlock(&p->pi_lock); - if (complete) - complete_all(&pending->done); - - return 0; -} - -int push_cpu_stop(void *arg) -{ - struct rq *lowest_rq = NULL, *rq = this_rq(); - struct task_struct *p = arg; - - raw_spin_lock_irq(&p->pi_lock); - raw_spin_lock(&rq->lock); - - if (task_rq(p) != rq) - goto out_unlock; - - if (is_migration_disabled(p)) { - p->migration_flags |= MDF_PUSH; - goto out_unlock; - } - - p->migration_flags &= ~MDF_PUSH; - - if (p->sched_class->find_lock_rq) - lowest_rq = p->sched_class->find_lock_rq(p, rq); - - if (!lowest_rq) - goto out_unlock; - - // XXX validate p is still the highest prio task - if (task_rq(p) == rq) { - deactivate_task(rq, p, 0); - set_task_cpu(p, lowest_rq->cpu); - activate_task(lowest_rq, p, 0); - resched_curr(lowest_rq); - } - - double_unlock_balance(rq, lowest_rq); - -out_unlock: - rq->push_busy = false; - raw_spin_unlock(&rq->lock); - raw_spin_unlock_irq(&p->pi_lock); - - put_task_struct(p); + local_irq_enable(); return 0; } @@ -2204,40 +1944,19 @@ * sched_class::set_cpus_allowed must do the below, but is not required to * actually call this function. */ -void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags) +void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) { - if (flags & (SCA_MIGRATE_ENABLE | SCA_MIGRATE_DISABLE)) { - p->cpus_ptr = new_mask; - return; - } - cpumask_copy(&p->cpus_mask, new_mask); p->nr_cpus_allowed = cpumask_weight(new_mask); trace_android_rvh_set_cpus_allowed_comm(p, new_mask); } -static void -__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags) +void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { struct rq *rq = task_rq(p); bool queued, running; - /* - * This here violates the locking rules for affinity, since we're only - * supposed to change these variables while holding both rq->lock and - * p->pi_lock. - * - * HOWEVER, it magically works, because ttwu() is the only code that - * accesses these variables under p->pi_lock and only does so after - * smp_cond_load_acquire(&p->on_cpu, !VAL), and we're in __schedule() - * before finish_task(). - * - * XXX do further audits, this smells like something putrid. - */ - if (flags & SCA_MIGRATE_DISABLE) - SCHED_WARN_ON(!p->on_cpu); - else - lockdep_assert_held(&p->pi_lock); + lockdep_assert_held(&p->pi_lock); queued = task_on_rq_queued(p); running = task_current(rq, p); @@ -2253,7 +1972,7 @@ if (running) put_prev_task(rq, p); - p->sched_class->set_cpus_allowed(p, new_mask, flags); + p->sched_class->set_cpus_allowed(p, new_mask); if (queued) enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); @@ -2261,14 +1980,12 @@ set_next_task(rq, p); } -static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flags *rf, - int dest_cpu, unsigned int flags); /* * Called with both p->pi_lock and rq->lock held; drops both before returning. */ static int __set_cpus_allowed_ptr_locked(struct task_struct *p, const struct cpumask *new_mask, - u32 flags, + bool check, struct rq *rq, struct rq_flags *rf) { @@ -2279,14 +1996,9 @@ update_rq_clock(rq); - if (p->flags & PF_KTHREAD || is_migration_disabled(p)) { + if (p->flags & PF_KTHREAD) { /* - * Kernel threads are allowed on online && !active CPUs. - * - * Specifically, migration_disabled() tasks must not fail the - * cpumask_any_and_distribute() pick below, esp. so on - * SCA_MIGRATE_ENABLE, otherwise we'll not call - * set_cpus_allowed_common() and actually reset p->cpus_ptr. + * Kernel threads are allowed on online && !active CPUs */ cpu_valid_mask = cpu_online_mask; } else if (!cpumask_subset(new_mask, cpu_allowed_mask)) { @@ -2298,22 +2010,13 @@ * Must re-check here, to close a race against __kthread_bind(), * sched_setaffinity() is not guaranteed to observe the flag. */ - if ((flags & SCA_CHECK) && (p->flags & PF_NO_SETAFFINITY)) { + if (check && (p->flags & PF_NO_SETAFFINITY)) { ret = -EINVAL; goto out; } - if (!(flags & SCA_MIGRATE_ENABLE)) { - if (cpumask_equal(&p->cpus_mask, new_mask)) - goto out; - - if (WARN_ON_ONCE(p == current && - is_migration_disabled(p) && - !cpumask_test_cpu(task_cpu(p), new_mask))) { - ret = -EBUSY; - goto out; - } - } + if (cpumask_equal(&p->cpus_mask, new_mask)) + goto out; /* * Picking a ~random cpu helps in cases where we are changing affinity @@ -2326,7 +2029,7 @@ goto out; } - __do_set_cpus_allowed(p, new_mask, flags); + do_set_cpus_allowed(p, new_mask); if (p->flags & PF_KTHREAD) { /* @@ -2338,227 +2041,27 @@ p->nr_cpus_allowed != 1); } - return affine_move_task(rq, p, rf, dest_cpu, flags); + /* Can the task run on the task's current CPU? If so, we're done */ + if (cpumask_test_cpu(task_cpu(p), new_mask)) + goto out; + + if (task_running(rq, p) || p->state == TASK_WAKING) { + struct migration_arg arg = { p, dest_cpu }; + /* Need help from migration thread: drop lock and wait. */ + task_rq_unlock(rq, p, rf); + stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); + return 0; + } else if (task_on_rq_queued(p)) { + /* + * OK, since we're going to drop the lock immediately + * afterwards anyway. + */ + rq = move_queued_task(rq, rf, p, dest_cpu); + } out: task_rq_unlock(rq, p, rf); return ret; -} - -void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) -{ - __do_set_cpus_allowed(p, new_mask, 0); -} - -/* - * This function is wildly self concurrent; here be dragons. - * - * - * When given a valid mask, __set_cpus_allowed_ptr() must block until the - * designated task is enqueued on an allowed CPU. If that task is currently - * running, we have to kick it out using the CPU stopper. - * - * Migrate-Disable comes along and tramples all over our nice sandcastle. - * Consider: - * - * Initial conditions: P0->cpus_mask = [0, 1] - * - * P0@CPU0 P1 - * - * migrate_disable(); - * <preempted> - * set_cpus_allowed_ptr(P0, [1]); - * - * P1 *cannot* return from this set_cpus_allowed_ptr() call until P0 executes - * its outermost migrate_enable() (i.e. it exits its Migrate-Disable region). - * This means we need the following scheme: - * - * P0@CPU0 P1 - * - * migrate_disable(); - * <preempted> - * set_cpus_allowed_ptr(P0, [1]); - * <blocks> - * <resumes> - * migrate_enable(); - * __set_cpus_allowed_ptr(); - * <wakes local stopper> - * `--> <woken on migration completion> - * - * Now the fun stuff: there may be several P1-like tasks, i.e. multiple - * concurrent set_cpus_allowed_ptr(P0, [*]) calls. CPU affinity changes of any - * task p are serialized by p->pi_lock, which we can leverage: the one that - * should come into effect at the end of the Migrate-Disable region is the last - * one. This means we only need to track a single cpumask (i.e. p->cpus_mask), - * but we still need to properly signal those waiting tasks at the appropriate - * moment. - * - * This is implemented using struct set_affinity_pending. The first - * __set_cpus_allowed_ptr() caller within a given Migrate-Disable region will - * setup an instance of that struct and install it on the targeted task_struct. - * Any and all further callers will reuse that instance. Those then wait for - * a completion signaled at the tail of the CPU stopper callback (1), triggered - * on the end of the Migrate-Disable region (i.e. outermost migrate_enable()). - * - * - * (1) In the cases covered above. There is one more where the completion is - * signaled within affine_move_task() itself: when a subsequent affinity request - * cancels the need for an active migration. Consider: - * - * Initial conditions: P0->cpus_mask = [0, 1] - * - * P0@CPU0 P1 P2 - * - * migrate_disable(); - * <preempted> - * set_cpus_allowed_ptr(P0, [1]); - * <blocks> - * set_cpus_allowed_ptr(P0, [0, 1]); - * <signal completion> - * <awakes> - * - * Note that the above is safe vs a concurrent migrate_enable(), as any - * pending affinity completion is preceded an uninstallion of - * p->migration_pending done with p->pi_lock held. - */ -static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flags *rf, - int dest_cpu, unsigned int flags) -{ - struct set_affinity_pending my_pending = { }, *pending = NULL; - bool stop_pending, complete = false; - - /* Can the task run on the task's current CPU? If so, we're done */ - if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) { - struct task_struct *push_task = NULL; - - if ((flags & SCA_MIGRATE_ENABLE) && - (p->migration_flags & MDF_PUSH) && !rq->push_busy) { - rq->push_busy = true; - push_task = get_task_struct(p); - } - - /* - * If there are pending waiters, but no pending stop_work, - * then complete now. - */ - pending = p->migration_pending; - if (pending && !pending->stop_pending) { - p->migration_pending = NULL; - complete = true; - } - - task_rq_unlock(rq, p, rf); - - if (push_task) { - stop_one_cpu_nowait(rq->cpu, push_cpu_stop, - p, &rq->push_work); - } - - if (complete) - complete_all(&pending->done); - - return 0; - } - - if (!(flags & SCA_MIGRATE_ENABLE)) { - /* serialized by p->pi_lock */ - if (!p->migration_pending) { - /* Install the request */ - refcount_set(&my_pending.refs, 1); - init_completion(&my_pending.done); - my_pending.arg = (struct migration_arg) { - .task = p, - .dest_cpu = dest_cpu, - .pending = &my_pending, - }; - - p->migration_pending = &my_pending; - } else { - pending = p->migration_pending; - refcount_inc(&pending->refs); - /* - * Affinity has changed, but we've already installed a - * pending. migration_cpu_stop() *must* see this, else - * we risk a completion of the pending despite having a - * task on a disallowed CPU. - * - * Serialized by p->pi_lock, so this is safe. - */ - pending->arg.dest_cpu = dest_cpu; - } - } - pending = p->migration_pending; - /* - * - !MIGRATE_ENABLE: - * we'll have installed a pending if there wasn't one already. - * - * - MIGRATE_ENABLE: - * we're here because the current CPU isn't matching anymore, - * the only way that can happen is because of a concurrent - * set_cpus_allowed_ptr() call, which should then still be - * pending completion. - * - * Either way, we really should have a @pending here. - */ - if (WARN_ON_ONCE(!pending)) { - task_rq_unlock(rq, p, rf); - return -EINVAL; - } - - if (task_running(rq, p) || p->state == TASK_WAKING) { - /* - * MIGRATE_ENABLE gets here because 'p == current', but for - * anything else we cannot do is_migration_disabled(), punt - * and have the stopper function handle it all race-free. - */ - stop_pending = pending->stop_pending; - if (!stop_pending) - pending->stop_pending = true; - - if (flags & SCA_MIGRATE_ENABLE) - p->migration_flags &= ~MDF_PUSH; - - task_rq_unlock(rq, p, rf); - - if (!stop_pending) { - stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, - &pending->arg, &pending->stop_work); - } - - if (flags & SCA_MIGRATE_ENABLE) - return 0; - } else { - - if (!is_migration_disabled(p)) { - if (task_on_rq_queued(p)) - rq = move_queued_task(rq, rf, p, dest_cpu); - - if (!pending->stop_pending) { - p->migration_pending = NULL; - complete = true; - } - } - task_rq_unlock(rq, p, rf); - - if (complete) - complete_all(&pending->done); - } - - wait_for_completion(&pending->done); - - if (refcount_dec_and_test(&pending->refs)) - wake_up_var(&pending->refs); /* No UaF, just an address */ - - /* - * Block the original owner of &pending until all subsequent callers - * have seen the completion and decremented the refcount - */ - wait_var_event(&my_pending.refs, !refcount_read(&my_pending.refs)); - - /* ARGH */ - WARN_ON_ONCE(my_pending.stop_pending); - - return 0; } /* @@ -2571,19 +2074,18 @@ * call is not atomic; no spinlocks may be held. */ static int __set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask, - u32 flags) + const struct cpumask *new_mask, bool check) { struct rq_flags rf; struct rq *rq; rq = task_rq_lock(p, &rf); - return __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, &rf); + return __set_cpus_allowed_ptr_locked(p, new_mask, check, rq, &rf); } int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) { - return __set_cpus_allowed_ptr(p, new_mask, 0); + return __set_cpus_allowed_ptr(p, new_mask, false); } EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); @@ -2692,8 +2194,6 @@ * Clearly, migrating tasks to offline CPUs is a fairly daft thing. */ WARN_ON_ONCE(!cpu_online(new_cpu)); - - WARN_ON_ONCE(is_migration_disabled(p)); #endif trace_sched_migrate_task(p, new_cpu); @@ -2827,18 +2327,6 @@ } EXPORT_SYMBOL_GPL(migrate_swap); -static bool check_task_state(struct task_struct *p, long match_state) -{ - bool match = false; - - raw_spin_lock_irq(&p->pi_lock); - if (p->state == match_state || p->saved_state == match_state) - match = true; - raw_spin_unlock_irq(&p->pi_lock); - - return match; -} - /* * wait_task_inactive - wait for a thread to unschedule. * @@ -2883,7 +2371,7 @@ * is actually now running somewhere else! */ while (task_running(rq, p)) { - if (match_state && !check_task_state(p, match_state)) + if (match_state && unlikely(p->state != match_state)) return 0; cpu_relax(); } @@ -2898,8 +2386,7 @@ running = task_running(rq, p); queued = task_on_rq_queued(p); ncsw = 0; - if (!match_state || p->state == match_state || - p->saved_state == match_state) + if (!match_state || p->state == match_state) ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ task_rq_unlock(rq, p, &rf); @@ -2933,7 +2420,7 @@ ktime_t to = NSEC_PER_SEC / HZ; set_current_state(TASK_UNINTERRUPTIBLE); - schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD); + schedule_hrtimeout(&to, HRTIMER_MODE_REL); continue; } @@ -3040,12 +2527,6 @@ } fallthrough; case possible: - /* - * XXX When called from select_task_rq() we only - * hold p->pi_lock and again violate locking order. - * - * More yuck to audit. - */ do_set_cpus_allowed(p, task_cpu_possible_mask(p)); state = fail; break; @@ -3079,7 +2560,7 @@ { lockdep_assert_held(&p->pi_lock); - if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p)) + if (p->nr_cpus_allowed > 1) cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); else cpu = cpumask_any(p->cpus_ptr); @@ -3102,7 +2583,6 @@ void sched_set_stop_task(int cpu, struct task_struct *stop) { - static struct lock_class_key stop_pi_lock; struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; struct task_struct *old_stop = cpu_rq(cpu)->stop; @@ -3118,20 +2598,6 @@ sched_setscheduler_nocheck(stop, SCHED_FIFO, ¶m); stop->sched_class = &stop_sched_class; - - /* - * The PI code calls rt_mutex_setprio() with ->pi_lock held to - * adjust the effective priority of a task. As a result, - * rt_mutex_setprio() can trigger (RT) balancing operations, - * which can then trigger wakeups of the stop thread to push - * around the current task. - * - * The stop task itself will never be part of the PI-chain, it - * never blocks, therefore that ->pi_lock recursion is safe. - * Tell lockdep about this by placing the stop->pi_lock in its - * own class. - */ - lockdep_set_class(&stop->pi_lock, &stop_pi_lock); } cpu_rq(cpu)->stop = stop; @@ -3145,23 +2611,15 @@ } } -#else /* CONFIG_SMP */ +#else static inline int __set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask, - u32 flags) + const struct cpumask *new_mask, bool check) { return set_cpus_allowed_ptr(p, new_mask); } -static inline void migrate_disable_switch(struct rq *rq, struct task_struct *p) { } - -static inline bool rq_has_pinned_tasks(struct rq *rq) -{ - return false; -} - -#endif /* !CONFIG_SMP */ +#endif /* CONFIG_SMP */ static void ttwu_stat(struct task_struct *p, int cpu, int wake_flags) @@ -3595,7 +3053,7 @@ int cpu, success = 0; preempt_disable(); - if (!IS_ENABLED(CONFIG_PREEMPT_RT) && p == current) { + if (p == current) { /* * We're waking current, this means 'p->on_rq' and 'task_cpu(p) * == smp_processor_id()'. Together this means we can special @@ -3625,26 +3083,8 @@ */ raw_spin_lock_irqsave(&p->pi_lock, flags); smp_mb__after_spinlock(); - if (!(p->state & state)) { - /* - * The task might be running due to a spinlock sleeper - * wakeup. Check the saved state and set it to running - * if the wakeup condition is true. - */ - if (!(wake_flags & WF_LOCK_SLEEPER)) { - if (p->saved_state & state) { - p->saved_state = TASK_RUNNING; - success = 1; - } - } + if (!(p->state & state)) goto unlock; - } - /* - * If this is a regular wakeup, then we can unconditionally - * clear the saved state of a "lock sleeper". - */ - if (!(wake_flags & WF_LOCK_SLEEPER)) - p->saved_state = TASK_RUNNING; #ifdef CONFIG_FREEZER /* @@ -3853,18 +3293,6 @@ } EXPORT_SYMBOL(wake_up_process); -/** - * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock" - * @p: The process to be woken up. - * - * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate - * the nature of the wakeup. - */ -int wake_up_lock_sleeper(struct task_struct *p) -{ - return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER); -} - int wake_up_state(struct task_struct *p, unsigned int state) { return try_to_wake_up(p, state, 0); @@ -3920,7 +3348,6 @@ init_numa_balancing(clone_flags, p); #ifdef CONFIG_SMP p->wake_entry.u_flags = CSD_TYPE_TTWU; - p->migration_pending = NULL; #endif } @@ -4099,9 +3526,6 @@ p->on_cpu = 0; #endif init_task_preempt_count(p); -#ifdef CONFIG_HAVE_PREEMPT_LAZY - task_thread_info(p)->preempt_lazy_count = 0; -#endif #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); RB_CLEAR_NODE(&p->pushable_dl_tasks); @@ -4329,90 +3753,6 @@ #endif } -#ifdef CONFIG_SMP - -static void do_balance_callbacks(struct rq *rq, struct callback_head *head) -{ - void (*func)(struct rq *rq); - struct callback_head *next; - - lockdep_assert_held(&rq->lock); - - while (head) { - func = (void (*)(struct rq *))head->func; - next = head->next; - head->next = NULL; - head = next; - - func(rq); - } -} - -static inline struct callback_head *splice_balance_callbacks(struct rq *rq) -{ - struct callback_head *head = rq->balance_callback; - - lockdep_assert_held(&rq->lock); - if (head) { - rq->balance_callback = NULL; - rq->balance_flags &= ~BALANCE_WORK; - } - - return head; -} - -static void __balance_callbacks(struct rq *rq) -{ - do_balance_callbacks(rq, splice_balance_callbacks(rq)); -} - -static inline void balance_callbacks(struct rq *rq, struct callback_head *head) -{ - unsigned long flags; - - if (unlikely(head)) { - raw_spin_lock_irqsave(&rq->lock, flags); - do_balance_callbacks(rq, head); - raw_spin_unlock_irqrestore(&rq->lock, flags); - } -} - -static void balance_push(struct rq *rq); - -static inline void balance_switch(struct rq *rq) -{ - if (likely(!rq->balance_flags)) - return; - - if (rq->balance_flags & BALANCE_PUSH) { - balance_push(rq); - return; - } - - __balance_callbacks(rq); -} - -#else - -static inline void __balance_callbacks(struct rq *rq) -{ -} - -static inline struct callback_head *splice_balance_callbacks(struct rq *rq) -{ - return NULL; -} - -static inline void balance_callbacks(struct rq *rq, struct callback_head *head) -{ -} - -static inline void balance_switch(struct rq *rq) -{ -} - -#endif - static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf) { @@ -4438,7 +3778,6 @@ * prev into current: */ spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); - balance_switch(rq); raw_spin_unlock_irq(&rq->lock); } @@ -4453,22 +3792,6 @@ #ifndef finish_arch_post_lock_switch # define finish_arch_post_lock_switch() do { } while (0) #endif - -static inline void kmap_local_sched_out(void) -{ -#ifdef CONFIG_KMAP_LOCAL - if (unlikely(current->kmap_ctrl.idx)) - __kmap_local_sched_out(); -#endif -} - -static inline void kmap_local_sched_in(void) -{ -#ifdef CONFIG_KMAP_LOCAL - if (unlikely(current->kmap_ctrl.idx)) - __kmap_local_sched_in(); -#endif -} /** * prepare_task_switch - prepare to switch tasks @@ -4492,7 +3815,6 @@ perf_event_task_sched_out(prev, next); rseq_preempt(prev); fire_sched_out_preempt_notifiers(prev, next); - kmap_local_sched_out(); prepare_task(next); prepare_arch_switch(next); } @@ -4559,7 +3881,6 @@ finish_lock_switch(rq); finish_arch_post_lock_switch(); kcov_finish_switch(current); - kmap_local_sched_in(); fire_sched_in_preempt_notifiers(current); /* @@ -4574,17 +3895,23 @@ * provided by mmdrop(), * - a sync_core for SYNC_CORE. */ - /* - * We use mmdrop_delayed() here so we don't have to do the - * full __mmdrop() when we are the last user. - */ if (mm) { membarrier_mm_sync_core_before_usermode(mm); - mmdrop_delayed(mm); + mmdrop(mm); } if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead) prev->sched_class->task_dead(prev); + + /* + * Remove function-return probe instances associated with this + * task and put them back on the free list. + */ + kprobe_flush_task(prev); + trace_android_rvh_flush_task(prev); + + /* Task is done with its stack. */ + put_task_stack(prev); put_task_struct_rcu_user(prev); } @@ -4592,6 +3919,43 @@ tick_nohz_task_switch(); return rq; } + +#ifdef CONFIG_SMP + +/* rq->lock is NOT held, but preemption is disabled */ +static void __balance_callback(struct rq *rq) +{ + struct callback_head *head, *next; + void (*func)(struct rq *rq); + unsigned long flags; + + raw_spin_lock_irqsave(&rq->lock, flags); + head = rq->balance_callback; + rq->balance_callback = NULL; + while (head) { + func = (void (*)(struct rq *))head->func; + next = head->next; + head->next = NULL; + head = next; + + func(rq); + } + raw_spin_unlock_irqrestore(&rq->lock, flags); +} + +static inline void balance_callback(struct rq *rq) +{ + if (unlikely(rq->balance_callback)) + __balance_callback(rq); +} + +#else + +static inline void balance_callback(struct rq *rq) +{ +} + +#endif /** * schedule_tail - first thing a freshly forked thread must call. @@ -4612,6 +3976,7 @@ */ rq = finish_task_switch(prev); + balance_callback(rq); preempt_enable(); if (current->set_child_tid) @@ -5317,7 +4682,7 @@ * * WARNING: must be called with preemption disabled! */ -static void __sched notrace __schedule(bool preempt, bool spinning_lock) +static void __sched notrace __schedule(bool preempt) { struct task_struct *prev, *next; unsigned long *switch_count; @@ -5370,7 +4735,7 @@ * - ptrace_{,un}freeze_traced() can change ->state underneath us. */ prev_state = prev->state; - if ((!preempt || spinning_lock) && prev_state) { + if (!preempt && prev_state) { if (signal_pending_state(prev_state, prev)) { prev->state = TASK_RUNNING; } else { @@ -5405,7 +4770,6 @@ next = pick_next_task(rq, prev, &rf); clear_tsk_need_resched(prev); - clear_tsk_need_resched_lazy(prev); clear_preempt_need_resched(); trace_android_rvh_schedule(prev, next, rq); @@ -5432,7 +4796,6 @@ */ ++*switch_count; - migrate_disable_switch(rq, prev); psi_sched_switch(prev, next, !task_on_rq_queued(prev)); trace_sched_switch(preempt, prev, next); @@ -5441,11 +4804,10 @@ rq = context_switch(rq, prev, next, &rf); } else { rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); - - rq_unpin_lock(rq, &rf); - __balance_callbacks(rq); - raw_spin_unlock_irq(&rq->lock); + rq_unlock_irq(rq, &rf); } + + balance_callback(rq); } void __noreturn do_task_dead(void) @@ -5456,7 +4818,7 @@ /* Tell freezer to ignore us: */ current->flags |= PF_NOFREEZE; - __schedule(false, false); + __schedule(false); BUG(); /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ @@ -5489,6 +4851,9 @@ preempt_enable_no_resched(); } + if (tsk_is_pi_blocked(tsk)) + return; + /* * If we are going to sleep and we have plugged IO queued, * make sure to submit it to avoid deadlocks. @@ -5514,7 +4879,7 @@ sched_submit_work(tsk); do { preempt_disable(); - __schedule(false, false); + __schedule(false); sched_preempt_enable_no_resched(); } while (need_resched()); sched_update_worker(tsk); @@ -5542,7 +4907,7 @@ */ WARN_ON_ONCE(current->state); do { - __schedule(false, false); + __schedule(false); } while (need_resched()); } @@ -5595,7 +4960,7 @@ */ preempt_disable_notrace(); preempt_latency_start(1); - __schedule(true, false); + __schedule(true); preempt_latency_stop(1); preempt_enable_no_resched_notrace(); @@ -5605,30 +4970,6 @@ */ } while (need_resched()); } - -#ifdef CONFIG_PREEMPT_LAZY -/* - * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is - * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as - * preempt_lazy_count counter >0. - */ -static __always_inline int preemptible_lazy(void) -{ - if (test_thread_flag(TIF_NEED_RESCHED)) - return 1; - if (current_thread_info()->preempt_lazy_count) - return 0; - return 1; -} - -#else - -static inline int preemptible_lazy(void) -{ - return 1; -} - -#endif #ifdef CONFIG_PREEMPTION /* @@ -5643,25 +4984,11 @@ */ if (likely(!preemptible())) return; - if (!preemptible_lazy()) - return; + preempt_schedule_common(); } NOKPROBE_SYMBOL(preempt_schedule); EXPORT_SYMBOL(preempt_schedule); - -#ifdef CONFIG_PREEMPT_RT -void __sched notrace preempt_schedule_lock(void) -{ - do { - preempt_disable(); - __schedule(true, true); - sched_preempt_enable_no_resched(); - } while (need_resched()); -} -NOKPROBE_SYMBOL(preempt_schedule_lock); -EXPORT_SYMBOL(preempt_schedule_lock); -#endif /** * preempt_schedule_notrace - preempt_schedule called by tracing @@ -5682,9 +5009,6 @@ enum ctx_state prev_ctx; if (likely(!preemptible())) - return; - - if (!preemptible_lazy()) return; do { @@ -5709,7 +5033,7 @@ * an infinite recursion. */ prev_ctx = exception_enter(); - __schedule(true, false); + __schedule(true); exception_exit(prev_ctx); preempt_latency_stop(1); @@ -5738,7 +5062,7 @@ do { preempt_disable(); local_irq_enable(); - __schedule(true, false); + __schedule(true); local_irq_disable(); sched_preempt_enable_no_resched(); } while (need_resched()); @@ -5905,11 +5229,9 @@ out_unlock: /* Avoid rq from going away on us: */ preempt_disable(); + __task_rq_unlock(rq, &rf); - rq_unpin_lock(rq, &rf); - __balance_callbacks(rq); - raw_spin_unlock(&rq->lock); - + balance_callback(rq); preempt_enable(); } #else @@ -6154,7 +5476,6 @@ int oldpolicy = -1, policy = attr->sched_policy; int retval, oldprio, newprio, queued, running; const struct sched_class *prev_class; - struct callback_head *head; struct rq_flags rf; int reset_on_fork; int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; @@ -6397,14 +5718,13 @@ /* Avoid rq from going away on us: */ preempt_disable(); - head = splice_balance_callbacks(rq); task_rq_unlock(rq, p, &rf); if (pi) rt_mutex_adjust_pi(p); /* Run balance callbacks after we've adjusted the PI chain: */ - balance_callbacks(rq, head); + balance_callback(rq); preempt_enable(); return 0; @@ -6916,7 +6236,7 @@ } #endif again: - retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK); + retval = __set_cpus_allowed_ptr(p, new_mask, true); if (!retval) { cpuset_cpus_allowed(p, cpus_allowed); @@ -7498,7 +6818,7 @@ * * And since this is boot we can forgo the serialization. */ - set_cpus_allowed_common(idle, cpumask_of(cpu), 0); + set_cpus_allowed_common(idle, cpumask_of(cpu)); #endif /* * We're having a chicken and egg problem, even though we are @@ -7525,9 +6845,7 @@ /* Set the preempt count _outside_ the spinlocks! */ init_idle_preempt_count(idle, cpu); -#ifdef CONFIG_HAVE_PREEMPT_LAZY - task_thread_info(idle)->preempt_lazy_count = 0; -#endif + /* * The idle tasks have their own, simple scheduling class: */ @@ -7637,7 +6955,6 @@ #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_HOTPLUG_CPU - /* * Ensure that the idle task is using init_mm right before its CPU goes * offline. @@ -7657,124 +6974,166 @@ /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ } -static int __balance_push_cpu_stop(void *arg) +/* + * Since this CPU is going 'away' for a while, fold any nr_active delta + * we might have. Assumes we're called after migrate_tasks() so that the + * nr_active count is stable. We need to take the teardown thread which + * is calling this into account, so we hand in adjust = 1 to the load + * calculation. + * + * Also see the comment "Global load-average calculations". + */ +static void calc_load_migrate(struct rq *rq) { - struct task_struct *p = arg; - struct rq *rq = this_rq(); - struct rq_flags rf; - int cpu; + long delta = calc_load_fold_active(rq, 1); + if (delta) + atomic_long_add(delta, &calc_load_tasks); +} - raw_spin_lock_irq(&p->pi_lock); - rq_lock(rq, &rf); +static struct task_struct *__pick_migrate_task(struct rq *rq) +{ + const struct sched_class *class; + struct task_struct *next; + for_each_class(class) { + next = class->pick_next_task(rq); + if (next) { + next->sched_class->put_prev_task(rq, next); + return next; + } + } + + /* The idle class should always have a runnable task */ + BUG(); +} + +/* + * Migrate all tasks from the rq, sleeping tasks will be migrated by + * try_to_wake_up()->select_task_rq(). + * + * Called with rq->lock held even though we'er in stop_machine() and + * there's no concurrency possible, we hold the required locks anyway + * because of lock validation efforts. + * + * force: if false, the function will skip CPU pinned kthreads. + */ +static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf, bool force) +{ + struct rq *rq = dead_rq; + struct task_struct *next, *tmp, *stop = rq->stop; + LIST_HEAD(percpu_kthreads); + struct rq_flags orf = *rf; + int dest_cpu; + + /* + * Fudge the rq selection such that the below task selection loop + * doesn't get stuck on the currently eligible stop task. + * + * We're currently inside stop_machine() and the rq is either stuck + * in the stop_machine_cpu_stop() loop, or we're executing this code, + * either way we should never end up calling schedule() until we're + * done here. + */ + rq->stop = NULL; + + /* + * put_prev_task() and pick_next_task() sched + * class method both need to have an up-to-date + * value of rq->clock[_task] + */ update_rq_clock(rq); - if (task_rq(p) == rq && task_on_rq_queued(p)) { - cpu = select_fallback_rq(rq->cpu, p); - rq = __migrate_task(rq, &rf, p, cpu); - } +#ifdef CONFIG_SCHED_DEBUG + /* note the clock update in orf */ + orf.clock_update_flags |= RQCF_UPDATED; +#endif - rq_unlock(rq, &rf); - raw_spin_unlock_irq(&p->pi_lock); - - put_task_struct(p); - - return 0; -} - -static DEFINE_PER_CPU(struct cpu_stop_work, push_work); - -/* - * Ensure we only run per-cpu kthreads once the CPU goes !active. - */ - - -static void balance_push(struct rq *rq) -{ - struct task_struct *push_task = rq->curr; - - lockdep_assert_held(&rq->lock); - SCHED_WARN_ON(rq->cpu != smp_processor_id()); - - /* - * Both the cpu-hotplug and stop task are in this case and are - * required to complete the hotplug process. - */ - if (is_per_cpu_kthread(push_task) || is_migration_disabled(push_task)) { + for (;;) { /* - * If this is the idle task on the outgoing CPU try to wake - * up the hotplug control thread which might wait for the - * last task to vanish. The rcuwait_active() check is - * accurate here because the waiter is pinned on this CPU - * and can't obviously be running in parallel. - * - * On RT kernels this also has to check whether there are - * pinned and scheduled out tasks on the runqueue. They - * need to leave the migrate disabled section first. + * There's this thread running, bail when that's the only + * remaining thread: */ - if (!rq->nr_running && !rq_has_pinned_tasks(rq) && - rcuwait_active(&rq->hotplug_wait)) { - raw_spin_unlock(&rq->lock); - rcuwait_wake_up(&rq->hotplug_wait); - raw_spin_lock(&rq->lock); + if (rq->nr_running == 1) + break; + + next = __pick_migrate_task(rq); + + /* + * Argh ... no iterator for tasks, we need to remove the + * kthread from the run-queue to continue. + */ + if (!force && is_per_cpu_kthread(next)) { + INIT_LIST_HEAD(&next->percpu_kthread_node); + list_add(&next->percpu_kthread_node, &percpu_kthreads); + + /* DEQUEUE_SAVE not used due to move_entity in rt */ + deactivate_task(rq, next, + DEQUEUE_NOCLOCK); + continue; } - return; + + /* + * Rules for changing task_struct::cpus_mask are holding + * both pi_lock and rq->lock, such that holding either + * stabilizes the mask. + * + * Drop rq->lock is not quite as disastrous as it usually is + * because !cpu_active at this point, which means load-balance + * will not interfere. Also, stop-machine. + */ + rq_unlock(rq, rf); + raw_spin_lock(&next->pi_lock); + rq_relock(rq, rf); + + /* + * Since we're inside stop-machine, _nothing_ should have + * changed the task, WARN if weird stuff happened, because in + * that case the above rq->lock drop is a fail too. + */ + if (task_rq(next) != rq || !task_on_rq_queued(next)) { + /* + * In the !force case, there is a hole between + * rq_unlock() and rq_relock(), where another CPU might + * not observe an up to date cpu_active_mask and try to + * move tasks around. + */ + WARN_ON(force); + raw_spin_unlock(&next->pi_lock); + continue; + } + + /* Find suitable destination for @next, with force if needed. */ + dest_cpu = select_fallback_rq(dead_rq->cpu, next); + rq = __migrate_task(rq, rf, next, dest_cpu); + if (rq != dead_rq) { + rq_unlock(rq, rf); + rq = dead_rq; + *rf = orf; + rq_relock(rq, rf); + } + raw_spin_unlock(&next->pi_lock); } - get_task_struct(push_task); - /* - * Temporarily drop rq->lock such that we can wake-up the stop task. - * Both preemption and IRQs are still disabled. - */ - raw_spin_unlock(&rq->lock); - stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task, - this_cpu_ptr(&push_work)); - /* - * At this point need_resched() is true and we'll take the loop in - * schedule(). The next pick is obviously going to be the stop task - * which is_per_cpu_kthread() and will push this task away. - */ - raw_spin_lock(&rq->lock); -} + list_for_each_entry_safe(next, tmp, &percpu_kthreads, + percpu_kthread_node) { -static void balance_push_set(int cpu, bool on) -{ - struct rq *rq = cpu_rq(cpu); - struct rq_flags rf; + /* ENQUEUE_RESTORE not used due to move_entity in rt */ + activate_task(rq, next, ENQUEUE_NOCLOCK); + list_del(&next->percpu_kthread_node); + } - rq_lock_irqsave(rq, &rf); - if (on) - rq->balance_flags |= BALANCE_PUSH; - else - rq->balance_flags &= ~BALANCE_PUSH; - rq_unlock_irqrestore(rq, &rf); -} - -/* - * Invoked from a CPUs hotplug control thread after the CPU has been marked - * inactive. All tasks which are not per CPU kernel threads are either - * pushed off this CPU now via balance_push() or placed on a different CPU - * during wakeup. Wait until the CPU is quiescent. - */ -static void balance_hotplug_wait(void) -{ - struct rq *rq = this_rq(); - - rcuwait_wait_event(&rq->hotplug_wait, - rq->nr_running == 1 && !rq_has_pinned_tasks(rq), - TASK_UNINTERRUPTIBLE); + rq->stop = stop; } static int drain_rq_cpu_stop(void *data) { -#ifndef CONFIG_PREEMPT_RT struct rq *rq = this_rq(); struct rq_flags rf; rq_lock_irqsave(rq, &rf); migrate_tasks(rq, &rf, false); rq_unlock_irqrestore(rq, &rf); -#endif + return 0; } @@ -7799,21 +7158,6 @@ if (rq_drain->done) cpu_stop_work_wait(rq_drain); } - -#else - -static inline void balance_push(struct rq *rq) -{ -} - -static inline void balance_push_set(int cpu, bool on) -{ -} - -static inline void balance_hotplug_wait(void) -{ -} - #endif /* CONFIG_HOTPLUG_CPU */ void set_rq_online(struct rq *rq) @@ -7901,8 +7245,6 @@ struct rq *rq = cpu_rq(cpu); struct rq_flags rf; - balance_push_set(cpu, false); - #ifdef CONFIG_SCHED_SMT /* * When going up, increment the number of cores with SMT present. @@ -7956,21 +7298,9 @@ int _sched_cpu_deactivate(unsigned int cpu) { - struct rq *rq = cpu_rq(cpu); - struct rq_flags rf; int ret; set_cpu_active(cpu, false); - - balance_push_set(cpu, true); - - rq_lock_irqsave(rq, &rf); - if (rq->rd) { - update_rq_clock(rq); - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); - set_rq_offline(rq); - } - rq_unlock_irqrestore(rq, &rf); #ifdef CONFIG_SCHED_SMT /* @@ -7985,7 +7315,6 @@ ret = cpuset_cpu_inactive(cpu); if (ret) { - balance_push_set(cpu, false); set_cpu_active(cpu, true); return ret; } @@ -8049,41 +7378,6 @@ } #ifdef CONFIG_HOTPLUG_CPU - -/* - * Invoked immediately before the stopper thread is invoked to bring the - * CPU down completely. At this point all per CPU kthreads except the - * hotplug thread (current) and the stopper thread (inactive) have been - * either parked or have been unbound from the outgoing CPU. Ensure that - * any of those which might be on the way out are gone. - * - * If after this point a bound task is being woken on this CPU then the - * responsible hotplug callback has failed to do it's job. - * sched_cpu_dying() will catch it with the appropriate fireworks. - */ -int sched_cpu_wait_empty(unsigned int cpu) -{ - balance_hotplug_wait(); - return 0; -} - -/* - * Since this CPU is going 'away' for a while, fold any nr_active delta we - * might have. Called from the CPU stopper task after ensuring that the - * stopper is the last running task on the CPU, so nr_active count is - * stable. We need to take the teardown thread which is calling this into - * account, so we hand in adjust = 1 to the load calculation. - * - * Also see the comment "Global load-average calculations". - */ -static void calc_load_migrate(struct rq *rq) -{ - long delta = calc_load_fold_active(rq, 1); - - if (delta) - atomic_long_add(delta, &calc_load_tasks); -} - int sched_cpu_dying(unsigned int cpu) { struct rq *rq = cpu_rq(cpu); @@ -8093,7 +7387,12 @@ sched_tick_stop(cpu); rq_lock_irqsave(rq, &rf); - BUG_ON(rq->nr_running != 1 || rq_has_pinned_tasks(rq)); + if (rq->rd) { + BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); + set_rq_offline(rq); + } + migrate_tasks(rq, &rf, true); + BUG_ON(rq->nr_running != 1); rq_unlock_irqrestore(rq, &rf); trace_android_rvh_sched_cpu_dying(cpu); @@ -8304,9 +7603,6 @@ rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func); #endif -#ifdef CONFIG_HOTPLUG_CPU - rcuwait_init(&rq->hotplug_wait); -#endif #endif /* CONFIG_SMP */ hrtick_rq_init(rq); atomic_set(&rq->nr_iowait, 0); @@ -8347,7 +7643,7 @@ #ifdef CONFIG_DEBUG_ATOMIC_SLEEP static inline int preempt_count_equals(int preempt_offset) { - int nested = preempt_count() + sched_rcu_preempt_depth(); + int nested = preempt_count() + rcu_preempt_depth(); return (nested == preempt_offset); } @@ -8447,39 +7743,6 @@ add_taint(TAINT_WARN, LOCKDEP_STILL_OK); } EXPORT_SYMBOL_GPL(__cant_sleep); - -#ifdef CONFIG_SMP -void __cant_migrate(const char *file, int line) -{ - static unsigned long prev_jiffy; - - if (irqs_disabled()) - return; - - if (is_migration_disabled(current)) - return; - - if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) - return; - - if (preempt_count() > 0) - return; - - if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) - return; - prev_jiffy = jiffies; - - pr_err("BUG: assuming non migratable context at %s:%d\n", file, line); - pr_err("in_atomic(): %d, irqs_disabled(): %d, migration_disabled() %u pid: %d, name: %s\n", - in_atomic(), irqs_disabled(), is_migration_disabled(current), - current->pid, current->comm); - - debug_show_held_locks(current); - dump_stack(); - add_taint(TAINT_WARN, LOCKDEP_STILL_OK); -} -EXPORT_SYMBOL_GPL(__cant_migrate); -#endif #endif #ifdef CONFIG_MAGIC_SYSRQ -- Gitblit v1.6.2