From 37f49e37ab4cb5d0bc4c60eb5c6d4dd57db767bb Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Fri, 10 May 2024 07:44:59 +0000
Subject: [PATCH] gmac get mac form eeprom
---
kernel/kernel/sched/core.c | 1333 +++++++++++++--------------------------------------------
1 files changed, 301 insertions(+), 1,032 deletions(-)
diff --git a/kernel/kernel/sched/core.c b/kernel/kernel/sched/core.c
index e00ae06..30ab811 100644
--- a/kernel/kernel/sched/core.c
+++ b/kernel/kernel/sched/core.c
@@ -78,11 +78,7 @@
* Number of tasks to iterate in a single balance run.
* Limited because this is done with IRQs disabled.
*/
-#ifdef CONFIG_PREEMPT_RT
-const_debug unsigned int sysctl_sched_nr_migrate = 8;
-#else
const_debug unsigned int sysctl_sched_nr_migrate = 32;
-#endif
/*
* period over which we measure -rt task CPU usage in us.
@@ -531,15 +527,9 @@
#endif
#endif
-static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task,
- bool sleeper)
+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
{
- struct wake_q_node *node;
-
- if (sleeper)
- node = &task->wake_q_sleeper;
- else
- node = &task->wake_q;
+ struct wake_q_node *node = &task->wake_q;
/*
* Atomically grab the task, if ->wake_q is !nil already it means
@@ -576,13 +566,7 @@
*/
void wake_q_add(struct wake_q_head *head, struct task_struct *task)
{
- if (__wake_q_add(head, task, false))
- get_task_struct(task);
-}
-
-void wake_q_add_sleeper(struct wake_q_head *head, struct task_struct *task)
-{
- if (__wake_q_add(head, task, true))
+ if (__wake_q_add(head, task))
get_task_struct(task);
}
@@ -605,40 +589,29 @@
*/
void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
{
- if (!__wake_q_add(head, task, false))
+ if (!__wake_q_add(head, task))
put_task_struct(task);
}
-void __wake_up_q(struct wake_q_head *head, bool sleeper)
+void wake_up_q(struct wake_q_head *head)
{
struct wake_q_node *node = head->first;
while (node != WAKE_Q_TAIL) {
struct task_struct *task;
- if (sleeper)
- task = container_of(node, struct task_struct, wake_q_sleeper);
- else
- task = container_of(node, struct task_struct, wake_q);
-
+ task = container_of(node, struct task_struct, wake_q);
BUG_ON(!task);
/* Task can safely be re-inserted now: */
node = node->next;
+ task->wake_q.next = NULL;
task->wake_q_count = head->count;
- if (sleeper)
- task->wake_q_sleeper.next = NULL;
- else
- task->wake_q.next = NULL;
/*
* wake_up_process() executes a full barrier, which pairs with
* the queueing in wake_q_add() so as not to miss wakeups.
*/
- if (sleeper)
- wake_up_lock_sleeper(task);
- else
- wake_up_process(task);
-
+ wake_up_process(task);
task->wake_q_count = 0;
put_task_struct(task);
}
@@ -675,48 +648,6 @@
trace_sched_wake_idle_without_ipi(cpu);
}
EXPORT_SYMBOL_GPL(resched_curr);
-
-#ifdef CONFIG_PREEMPT_LAZY
-
-static int tsk_is_polling(struct task_struct *p)
-{
-#ifdef TIF_POLLING_NRFLAG
- return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
-#else
- return 0;
-#endif
-}
-
-void resched_curr_lazy(struct rq *rq)
-{
- struct task_struct *curr = rq->curr;
- int cpu;
-
- if (!sched_feat(PREEMPT_LAZY)) {
- resched_curr(rq);
- return;
- }
-
- lockdep_assert_held(&rq->lock);
-
- if (test_tsk_need_resched(curr))
- return;
-
- if (test_tsk_need_resched_lazy(curr))
- return;
-
- set_tsk_need_resched_lazy(curr);
-
- cpu = cpu_of(rq);
- if (cpu == smp_processor_id())
- return;
-
- /* NEED_RESCHED_LAZY must be visible before we test polling */
- smp_mb();
- if (!tsk_is_polling(curr))
- smp_send_reschedule(cpu);
-}
-#endif
void resched_cpu(int cpu)
{
@@ -1087,7 +1018,7 @@
if (!(rq->uclamp_flags & UCLAMP_FLAG_IDLE))
return;
- WRITE_ONCE(rq->uclamp[clamp_id].value, clamp_value);
+ uclamp_rq_set(rq, clamp_id, clamp_value);
}
static inline
@@ -1280,8 +1211,8 @@
if (bucket->tasks == 1 || uc_se->value > bucket->value)
bucket->value = uc_se->value;
- if (uc_se->value > READ_ONCE(uc_rq->value))
- WRITE_ONCE(uc_rq->value, uc_se->value);
+ if (uc_se->value > uclamp_rq_get(rq, clamp_id))
+ uclamp_rq_set(rq, clamp_id, uc_se->value);
}
/*
@@ -1347,7 +1278,7 @@
if (likely(bucket->tasks))
return;
- rq_clamp = READ_ONCE(uc_rq->value);
+ rq_clamp = uclamp_rq_get(rq, clamp_id);
/*
* Defensive programming: this should never happen. If it happens,
* e.g. due to future modification, warn and fixup the expected value.
@@ -1355,7 +1286,7 @@
SCHED_WARN_ON(bucket->value > rq_clamp);
if (bucket->value >= rq_clamp) {
bkt_clamp = uclamp_rq_max_value(rq, clamp_id, uc_se->value);
- WRITE_ONCE(uc_rq->value, bkt_clamp);
+ uclamp_rq_set(rq, clamp_id, bkt_clamp);
}
}
@@ -1761,6 +1692,9 @@
void activate_task(struct rq *rq, struct task_struct *p, int flags)
{
+ if (task_on_rq_migrating(p))
+ flags |= ENQUEUE_MIGRATED;
+
enqueue_task(rq, p, flags);
p->on_rq = TASK_ON_RQ_QUEUED;
@@ -1870,82 +1804,6 @@
#ifdef CONFIG_SMP
-static void
-__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags);
-
-static int __set_cpus_allowed_ptr(struct task_struct *p,
- const struct cpumask *new_mask,
- u32 flags);
-
-static void migrate_disable_switch(struct rq *rq, struct task_struct *p)
-{
- if (likely(!p->migration_disabled))
- return;
-
- if (p->cpus_ptr != &p->cpus_mask)
- return;
-
- /*
- * Violates locking rules! see comment in __do_set_cpus_allowed().
- */
- __do_set_cpus_allowed(p, cpumask_of(rq->cpu), SCA_MIGRATE_DISABLE);
-}
-
-void migrate_disable(void)
-{
- struct task_struct *p = current;
-
- if (p->migration_disabled) {
- p->migration_disabled++;
- return;
- }
-
- trace_sched_migrate_disable_tp(p);
-
- preempt_disable();
- this_rq()->nr_pinned++;
- p->migration_disabled = 1;
- preempt_lazy_disable();
- preempt_enable();
-}
-EXPORT_SYMBOL_GPL(migrate_disable);
-
-void migrate_enable(void)
-{
- struct task_struct *p = current;
-
- if (p->migration_disabled > 1) {
- p->migration_disabled--;
- return;
- }
-
- /*
- * Ensure stop_task runs either before or after this, and that
- * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
- */
- preempt_disable();
- if (p->cpus_ptr != &p->cpus_mask)
- __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE);
- /*
- * Mustn't clear migration_disabled() until cpus_ptr points back at the
- * regular cpus_mask, otherwise things that race (eg.
- * select_fallback_rq) get confused.
- */
- barrier();
- p->migration_disabled = 0;
- this_rq()->nr_pinned--;
- preempt_lazy_enable();
- preempt_enable();
-
- trace_sched_migrate_enable_tp(p);
-}
-EXPORT_SYMBOL_GPL(migrate_enable);
-
-static inline bool rq_has_pinned_tasks(struct rq *rq)
-{
- return rq->nr_pinned;
-}
-
/*
* Per-CPU kthreads are allowed to run on !active && online CPUs, see
* __set_cpus_allowed_ptr() and select_fallback_rq().
@@ -1955,7 +1813,7 @@
if (!cpumask_test_cpu(cpu, p->cpus_ptr))
return false;
- if (is_per_cpu_kthread(p) || is_migration_disabled(p))
+ if (is_per_cpu_kthread(p))
return cpu_online(cpu);
if (!cpu_active(cpu))
@@ -2015,21 +1873,8 @@
}
struct migration_arg {
- struct task_struct *task;
- int dest_cpu;
- struct set_affinity_pending *pending;
-};
-
-/*
- * @refs: number of wait_for_completion()
- * @stop_pending: is @stop_work in use
- */
-struct set_affinity_pending {
- refcount_t refs;
- unsigned int stop_pending;
- struct completion done;
- struct cpu_stop_work stop_work;
- struct migration_arg arg;
+ struct task_struct *task;
+ int dest_cpu;
};
/*
@@ -2062,17 +1907,15 @@
static int migration_cpu_stop(void *data)
{
struct migration_arg *arg = data;
- struct set_affinity_pending *pending = arg->pending;
struct task_struct *p = arg->task;
struct rq *rq = this_rq();
- bool complete = false;
struct rq_flags rf;
/*
* The original target CPU might have gone down and we might
* be on another CPU but it doesn't matter.
*/
- local_irq_save(rf.flags);
+ local_irq_disable();
/*
* We need to explicitly wake pending tasks before running
* __migrate_task() such that we will not miss enforcing cpus_ptr
@@ -2082,121 +1925,21 @@
raw_spin_lock(&p->pi_lock);
rq_lock(rq, &rf);
-
/*
* If task_rq(p) != rq, it cannot be migrated here, because we're
* holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
* we're holding p->pi_lock.
*/
if (task_rq(p) == rq) {
- if (is_migration_disabled(p))
- goto out;
-
- if (pending) {
- if (p->migration_pending == pending)
- p->migration_pending = NULL;
- complete = true;
-
- if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask))
- goto out;
- }
-
if (task_on_rq_queued(p))
rq = __migrate_task(rq, &rf, p, arg->dest_cpu);
else
p->wake_cpu = arg->dest_cpu;
-
- /*
- * XXX __migrate_task() can fail, at which point we might end
- * up running on a dodgy CPU, AFAICT this can only happen
- * during CPU hotplug, at which point we'll get pushed out
- * anyway, so it's probably not a big deal.
- */
-
- } else if (pending) {
- /*
- * This happens when we get migrated between migrate_enable()'s
- * preempt_enable() and scheduling the stopper task. At that
- * point we're a regular task again and not current anymore.
- *
- * A !PREEMPT kernel has a giant hole here, which makes it far
- * more likely.
- */
-
- /*
- * The task moved before the stopper got to run. We're holding
- * ->pi_lock, so the allowed mask is stable - if it got
- * somewhere allowed, we're done.
- */
- if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
- if (p->migration_pending == pending)
- p->migration_pending = NULL;
- complete = true;
- goto out;
- }
-
- /*
- * When migrate_enable() hits a rq mis-match we can't reliably
- * determine is_migration_disabled() and so have to chase after
- * it.
- */
- WARN_ON_ONCE(!pending->stop_pending);
- task_rq_unlock(rq, p, &rf);
- stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop,
- &pending->arg, &pending->stop_work);
- return 0;
}
-out:
- if (pending)
- pending->stop_pending = false;
- task_rq_unlock(rq, p, &rf);
+ rq_unlock(rq, &rf);
+ raw_spin_unlock(&p->pi_lock);
- if (complete)
- complete_all(&pending->done);
-
- return 0;
-}
-
-int push_cpu_stop(void *arg)
-{
- struct rq *lowest_rq = NULL, *rq = this_rq();
- struct task_struct *p = arg;
-
- raw_spin_lock_irq(&p->pi_lock);
- raw_spin_lock(&rq->lock);
-
- if (task_rq(p) != rq)
- goto out_unlock;
-
- if (is_migration_disabled(p)) {
- p->migration_flags |= MDF_PUSH;
- goto out_unlock;
- }
-
- p->migration_flags &= ~MDF_PUSH;
-
- if (p->sched_class->find_lock_rq)
- lowest_rq = p->sched_class->find_lock_rq(p, rq);
-
- if (!lowest_rq)
- goto out_unlock;
-
- // XXX validate p is still the highest prio task
- if (task_rq(p) == rq) {
- deactivate_task(rq, p, 0);
- set_task_cpu(p, lowest_rq->cpu);
- activate_task(lowest_rq, p, 0);
- resched_curr(lowest_rq);
- }
-
- double_unlock_balance(rq, lowest_rq);
-
-out_unlock:
- rq->push_busy = false;
- raw_spin_unlock(&rq->lock);
- raw_spin_unlock_irq(&p->pi_lock);
-
- put_task_struct(p);
+ local_irq_enable();
return 0;
}
@@ -2204,40 +1947,19 @@
* sched_class::set_cpus_allowed must do the below, but is not required to
* actually call this function.
*/
-void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags)
+void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
{
- if (flags & (SCA_MIGRATE_ENABLE | SCA_MIGRATE_DISABLE)) {
- p->cpus_ptr = new_mask;
- return;
- }
-
cpumask_copy(&p->cpus_mask, new_mask);
p->nr_cpus_allowed = cpumask_weight(new_mask);
trace_android_rvh_set_cpus_allowed_comm(p, new_mask);
}
-static void
-__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags)
+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{
struct rq *rq = task_rq(p);
bool queued, running;
- /*
- * This here violates the locking rules for affinity, since we're only
- * supposed to change these variables while holding both rq->lock and
- * p->pi_lock.
- *
- * HOWEVER, it magically works, because ttwu() is the only code that
- * accesses these variables under p->pi_lock and only does so after
- * smp_cond_load_acquire(&p->on_cpu, !VAL), and we're in __schedule()
- * before finish_task().
- *
- * XXX do further audits, this smells like something putrid.
- */
- if (flags & SCA_MIGRATE_DISABLE)
- SCHED_WARN_ON(!p->on_cpu);
- else
- lockdep_assert_held(&p->pi_lock);
+ lockdep_assert_held(&p->pi_lock);
queued = task_on_rq_queued(p);
running = task_current(rq, p);
@@ -2253,7 +1975,7 @@
if (running)
put_prev_task(rq, p);
- p->sched_class->set_cpus_allowed(p, new_mask, flags);
+ p->sched_class->set_cpus_allowed(p, new_mask);
if (queued)
enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
@@ -2261,14 +1983,12 @@
set_next_task(rq, p);
}
-static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flags *rf,
- int dest_cpu, unsigned int flags);
/*
* Called with both p->pi_lock and rq->lock held; drops both before returning.
*/
static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
const struct cpumask *new_mask,
- u32 flags,
+ bool check,
struct rq *rq,
struct rq_flags *rf)
{
@@ -2279,14 +1999,9 @@
update_rq_clock(rq);
- if (p->flags & PF_KTHREAD || is_migration_disabled(p)) {
+ if (p->flags & PF_KTHREAD) {
/*
- * Kernel threads are allowed on online && !active CPUs.
- *
- * Specifically, migration_disabled() tasks must not fail the
- * cpumask_any_and_distribute() pick below, esp. so on
- * SCA_MIGRATE_ENABLE, otherwise we'll not call
- * set_cpus_allowed_common() and actually reset p->cpus_ptr.
+ * Kernel threads are allowed on online && !active CPUs
*/
cpu_valid_mask = cpu_online_mask;
} else if (!cpumask_subset(new_mask, cpu_allowed_mask)) {
@@ -2298,22 +2013,13 @@
* Must re-check here, to close a race against __kthread_bind(),
* sched_setaffinity() is not guaranteed to observe the flag.
*/
- if ((flags & SCA_CHECK) && (p->flags & PF_NO_SETAFFINITY)) {
+ if (check && (p->flags & PF_NO_SETAFFINITY)) {
ret = -EINVAL;
goto out;
}
- if (!(flags & SCA_MIGRATE_ENABLE)) {
- if (cpumask_equal(&p->cpus_mask, new_mask))
- goto out;
-
- if (WARN_ON_ONCE(p == current &&
- is_migration_disabled(p) &&
- !cpumask_test_cpu(task_cpu(p), new_mask))) {
- ret = -EBUSY;
- goto out;
- }
- }
+ if (cpumask_equal(&p->cpus_mask, new_mask))
+ goto out;
/*
* Picking a ~random cpu helps in cases where we are changing affinity
@@ -2326,7 +2032,7 @@
goto out;
}
- __do_set_cpus_allowed(p, new_mask, flags);
+ do_set_cpus_allowed(p, new_mask);
if (p->flags & PF_KTHREAD) {
/*
@@ -2338,227 +2044,27 @@
p->nr_cpus_allowed != 1);
}
- return affine_move_task(rq, p, rf, dest_cpu, flags);
+ /* Can the task run on the task's current CPU? If so, we're done */
+ if (cpumask_test_cpu(task_cpu(p), new_mask))
+ goto out;
+
+ if (task_running(rq, p) || p->state == TASK_WAKING) {
+ struct migration_arg arg = { p, dest_cpu };
+ /* Need help from migration thread: drop lock and wait. */
+ task_rq_unlock(rq, p, rf);
+ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
+ return 0;
+ } else if (task_on_rq_queued(p)) {
+ /*
+ * OK, since we're going to drop the lock immediately
+ * afterwards anyway.
+ */
+ rq = move_queued_task(rq, rf, p, dest_cpu);
+ }
out:
task_rq_unlock(rq, p, rf);
return ret;
-}
-
-void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-{
- __do_set_cpus_allowed(p, new_mask, 0);
-}
-
-/*
- * This function is wildly self concurrent; here be dragons.
- *
- *
- * When given a valid mask, __set_cpus_allowed_ptr() must block until the
- * designated task is enqueued on an allowed CPU. If that task is currently
- * running, we have to kick it out using the CPU stopper.
- *
- * Migrate-Disable comes along and tramples all over our nice sandcastle.
- * Consider:
- *
- * Initial conditions: P0->cpus_mask = [0, 1]
- *
- * P0@CPU0 P1
- *
- * migrate_disable();
- * <preempted>
- * set_cpus_allowed_ptr(P0, [1]);
- *
- * P1 *cannot* return from this set_cpus_allowed_ptr() call until P0 executes
- * its outermost migrate_enable() (i.e. it exits its Migrate-Disable region).
- * This means we need the following scheme:
- *
- * P0@CPU0 P1
- *
- * migrate_disable();
- * <preempted>
- * set_cpus_allowed_ptr(P0, [1]);
- * <blocks>
- * <resumes>
- * migrate_enable();
- * __set_cpus_allowed_ptr();
- * <wakes local stopper>
- * `--> <woken on migration completion>
- *
- * Now the fun stuff: there may be several P1-like tasks, i.e. multiple
- * concurrent set_cpus_allowed_ptr(P0, [*]) calls. CPU affinity changes of any
- * task p are serialized by p->pi_lock, which we can leverage: the one that
- * should come into effect at the end of the Migrate-Disable region is the last
- * one. This means we only need to track a single cpumask (i.e. p->cpus_mask),
- * but we still need to properly signal those waiting tasks at the appropriate
- * moment.
- *
- * This is implemented using struct set_affinity_pending. The first
- * __set_cpus_allowed_ptr() caller within a given Migrate-Disable region will
- * setup an instance of that struct and install it on the targeted task_struct.
- * Any and all further callers will reuse that instance. Those then wait for
- * a completion signaled at the tail of the CPU stopper callback (1), triggered
- * on the end of the Migrate-Disable region (i.e. outermost migrate_enable()).
- *
- *
- * (1) In the cases covered above. There is one more where the completion is
- * signaled within affine_move_task() itself: when a subsequent affinity request
- * cancels the need for an active migration. Consider:
- *
- * Initial conditions: P0->cpus_mask = [0, 1]
- *
- * P0@CPU0 P1 P2
- *
- * migrate_disable();
- * <preempted>
- * set_cpus_allowed_ptr(P0, [1]);
- * <blocks>
- * set_cpus_allowed_ptr(P0, [0, 1]);
- * <signal completion>
- * <awakes>
- *
- * Note that the above is safe vs a concurrent migrate_enable(), as any
- * pending affinity completion is preceded an uninstallion of
- * p->migration_pending done with p->pi_lock held.
- */
-static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flags *rf,
- int dest_cpu, unsigned int flags)
-{
- struct set_affinity_pending my_pending = { }, *pending = NULL;
- bool stop_pending, complete = false;
-
- /* Can the task run on the task's current CPU? If so, we're done */
- if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
- struct task_struct *push_task = NULL;
-
- if ((flags & SCA_MIGRATE_ENABLE) &&
- (p->migration_flags & MDF_PUSH) && !rq->push_busy) {
- rq->push_busy = true;
- push_task = get_task_struct(p);
- }
-
- /*
- * If there are pending waiters, but no pending stop_work,
- * then complete now.
- */
- pending = p->migration_pending;
- if (pending && !pending->stop_pending) {
- p->migration_pending = NULL;
- complete = true;
- }
-
- task_rq_unlock(rq, p, rf);
-
- if (push_task) {
- stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
- p, &rq->push_work);
- }
-
- if (complete)
- complete_all(&pending->done);
-
- return 0;
- }
-
- if (!(flags & SCA_MIGRATE_ENABLE)) {
- /* serialized by p->pi_lock */
- if (!p->migration_pending) {
- /* Install the request */
- refcount_set(&my_pending.refs, 1);
- init_completion(&my_pending.done);
- my_pending.arg = (struct migration_arg) {
- .task = p,
- .dest_cpu = dest_cpu,
- .pending = &my_pending,
- };
-
- p->migration_pending = &my_pending;
- } else {
- pending = p->migration_pending;
- refcount_inc(&pending->refs);
- /*
- * Affinity has changed, but we've already installed a
- * pending. migration_cpu_stop() *must* see this, else
- * we risk a completion of the pending despite having a
- * task on a disallowed CPU.
- *
- * Serialized by p->pi_lock, so this is safe.
- */
- pending->arg.dest_cpu = dest_cpu;
- }
- }
- pending = p->migration_pending;
- /*
- * - !MIGRATE_ENABLE:
- * we'll have installed a pending if there wasn't one already.
- *
- * - MIGRATE_ENABLE:
- * we're here because the current CPU isn't matching anymore,
- * the only way that can happen is because of a concurrent
- * set_cpus_allowed_ptr() call, which should then still be
- * pending completion.
- *
- * Either way, we really should have a @pending here.
- */
- if (WARN_ON_ONCE(!pending)) {
- task_rq_unlock(rq, p, rf);
- return -EINVAL;
- }
-
- if (task_running(rq, p) || p->state == TASK_WAKING) {
- /*
- * MIGRATE_ENABLE gets here because 'p == current', but for
- * anything else we cannot do is_migration_disabled(), punt
- * and have the stopper function handle it all race-free.
- */
- stop_pending = pending->stop_pending;
- if (!stop_pending)
- pending->stop_pending = true;
-
- if (flags & SCA_MIGRATE_ENABLE)
- p->migration_flags &= ~MDF_PUSH;
-
- task_rq_unlock(rq, p, rf);
-
- if (!stop_pending) {
- stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
- &pending->arg, &pending->stop_work);
- }
-
- if (flags & SCA_MIGRATE_ENABLE)
- return 0;
- } else {
-
- if (!is_migration_disabled(p)) {
- if (task_on_rq_queued(p))
- rq = move_queued_task(rq, rf, p, dest_cpu);
-
- if (!pending->stop_pending) {
- p->migration_pending = NULL;
- complete = true;
- }
- }
- task_rq_unlock(rq, p, rf);
-
- if (complete)
- complete_all(&pending->done);
- }
-
- wait_for_completion(&pending->done);
-
- if (refcount_dec_and_test(&pending->refs))
- wake_up_var(&pending->refs); /* No UaF, just an address */
-
- /*
- * Block the original owner of &pending until all subsequent callers
- * have seen the completion and decremented the refcount
- */
- wait_var_event(&my_pending.refs, !refcount_read(&my_pending.refs));
-
- /* ARGH */
- WARN_ON_ONCE(my_pending.stop_pending);
-
- return 0;
}
/*
@@ -2571,19 +2077,18 @@
* call is not atomic; no spinlocks may be held.
*/
static int __set_cpus_allowed_ptr(struct task_struct *p,
- const struct cpumask *new_mask,
- u32 flags)
+ const struct cpumask *new_mask, bool check)
{
struct rq_flags rf;
struct rq *rq;
rq = task_rq_lock(p, &rf);
- return __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, &rf);
+ return __set_cpus_allowed_ptr_locked(p, new_mask, check, rq, &rf);
}
int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
{
- return __set_cpus_allowed_ptr(p, new_mask, 0);
+ return __set_cpus_allowed_ptr(p, new_mask, false);
}
EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
@@ -2692,8 +2197,6 @@
* Clearly, migrating tasks to offline CPUs is a fairly daft thing.
*/
WARN_ON_ONCE(!cpu_online(new_cpu));
-
- WARN_ON_ONCE(is_migration_disabled(p));
#endif
trace_sched_migrate_task(p, new_cpu);
@@ -2827,18 +2330,6 @@
}
EXPORT_SYMBOL_GPL(migrate_swap);
-static bool check_task_state(struct task_struct *p, long match_state)
-{
- bool match = false;
-
- raw_spin_lock_irq(&p->pi_lock);
- if (p->state == match_state || p->saved_state == match_state)
- match = true;
- raw_spin_unlock_irq(&p->pi_lock);
-
- return match;
-}
-
/*
* wait_task_inactive - wait for a thread to unschedule.
*
@@ -2883,7 +2374,7 @@
* is actually now running somewhere else!
*/
while (task_running(rq, p)) {
- if (match_state && !check_task_state(p, match_state))
+ if (match_state && unlikely(p->state != match_state))
return 0;
cpu_relax();
}
@@ -2898,8 +2389,7 @@
running = task_running(rq, p);
queued = task_on_rq_queued(p);
ncsw = 0;
- if (!match_state || p->state == match_state ||
- p->saved_state == match_state)
+ if (!match_state || p->state == match_state)
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
task_rq_unlock(rq, p, &rf);
@@ -2933,7 +2423,7 @@
ktime_t to = NSEC_PER_SEC / HZ;
set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
+ schedule_hrtimeout(&to, HRTIMER_MODE_REL);
continue;
}
@@ -3040,12 +2530,6 @@
}
fallthrough;
case possible:
- /*
- * XXX When called from select_task_rq() we only
- * hold p->pi_lock and again violate locking order.
- *
- * More yuck to audit.
- */
do_set_cpus_allowed(p, task_cpu_possible_mask(p));
state = fail;
break;
@@ -3079,7 +2563,7 @@
{
lockdep_assert_held(&p->pi_lock);
- if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p))
+ if (p->nr_cpus_allowed > 1)
cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
else
cpu = cpumask_any(p->cpus_ptr);
@@ -3102,7 +2586,6 @@
void sched_set_stop_task(int cpu, struct task_struct *stop)
{
- static struct lock_class_key stop_pi_lock;
struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
struct task_struct *old_stop = cpu_rq(cpu)->stop;
@@ -3118,20 +2601,6 @@
sched_setscheduler_nocheck(stop, SCHED_FIFO, ¶m);
stop->sched_class = &stop_sched_class;
-
- /*
- * The PI code calls rt_mutex_setprio() with ->pi_lock held to
- * adjust the effective priority of a task. As a result,
- * rt_mutex_setprio() can trigger (RT) balancing operations,
- * which can then trigger wakeups of the stop thread to push
- * around the current task.
- *
- * The stop task itself will never be part of the PI-chain, it
- * never blocks, therefore that ->pi_lock recursion is safe.
- * Tell lockdep about this by placing the stop->pi_lock in its
- * own class.
- */
- lockdep_set_class(&stop->pi_lock, &stop_pi_lock);
}
cpu_rq(cpu)->stop = stop;
@@ -3145,23 +2614,15 @@
}
}
-#else /* CONFIG_SMP */
+#else
static inline int __set_cpus_allowed_ptr(struct task_struct *p,
- const struct cpumask *new_mask,
- u32 flags)
+ const struct cpumask *new_mask, bool check)
{
return set_cpus_allowed_ptr(p, new_mask);
}
-static inline void migrate_disable_switch(struct rq *rq, struct task_struct *p) { }
-
-static inline bool rq_has_pinned_tasks(struct rq *rq)
-{
- return false;
-}
-
-#endif /* !CONFIG_SMP */
+#endif /* CONFIG_SMP */
static void
ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
@@ -3595,7 +3056,7 @@
int cpu, success = 0;
preempt_disable();
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) && p == current) {
+ if (p == current) {
/*
* We're waking current, this means 'p->on_rq' and 'task_cpu(p)
* == smp_processor_id()'. Together this means we can special
@@ -3625,26 +3086,8 @@
*/
raw_spin_lock_irqsave(&p->pi_lock, flags);
smp_mb__after_spinlock();
- if (!(p->state & state)) {
- /*
- * The task might be running due to a spinlock sleeper
- * wakeup. Check the saved state and set it to running
- * if the wakeup condition is true.
- */
- if (!(wake_flags & WF_LOCK_SLEEPER)) {
- if (p->saved_state & state) {
- p->saved_state = TASK_RUNNING;
- success = 1;
- }
- }
+ if (!(p->state & state))
goto unlock;
- }
- /*
- * If this is a regular wakeup, then we can unconditionally
- * clear the saved state of a "lock sleeper".
- */
- if (!(wake_flags & WF_LOCK_SLEEPER))
- p->saved_state = TASK_RUNNING;
#ifdef CONFIG_FREEZER
/*
@@ -3853,18 +3296,6 @@
}
EXPORT_SYMBOL(wake_up_process);
-/**
- * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
- * @p: The process to be woken up.
- *
- * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
- * the nature of the wakeup.
- */
-int wake_up_lock_sleeper(struct task_struct *p)
-{
- return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER);
-}
-
int wake_up_state(struct task_struct *p, unsigned int state)
{
return try_to_wake_up(p, state, 0);
@@ -3920,7 +3351,6 @@
init_numa_balancing(clone_flags, p);
#ifdef CONFIG_SMP
p->wake_entry.u_flags = CSD_TYPE_TTWU;
- p->migration_pending = NULL;
#endif
}
@@ -4099,9 +3529,6 @@
p->on_cpu = 0;
#endif
init_task_preempt_count(p);
-#ifdef CONFIG_HAVE_PREEMPT_LAZY
- task_thread_info(p)->preempt_lazy_count = 0;
-#endif
#ifdef CONFIG_SMP
plist_node_init(&p->pushable_tasks, MAX_PRIO);
RB_CLEAR_NODE(&p->pushable_dl_tasks);
@@ -4329,90 +3756,6 @@
#endif
}
-#ifdef CONFIG_SMP
-
-static void do_balance_callbacks(struct rq *rq, struct callback_head *head)
-{
- void (*func)(struct rq *rq);
- struct callback_head *next;
-
- lockdep_assert_held(&rq->lock);
-
- while (head) {
- func = (void (*)(struct rq *))head->func;
- next = head->next;
- head->next = NULL;
- head = next;
-
- func(rq);
- }
-}
-
-static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
-{
- struct callback_head *head = rq->balance_callback;
-
- lockdep_assert_held(&rq->lock);
- if (head) {
- rq->balance_callback = NULL;
- rq->balance_flags &= ~BALANCE_WORK;
- }
-
- return head;
-}
-
-static void __balance_callbacks(struct rq *rq)
-{
- do_balance_callbacks(rq, splice_balance_callbacks(rq));
-}
-
-static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
-{
- unsigned long flags;
-
- if (unlikely(head)) {
- raw_spin_lock_irqsave(&rq->lock, flags);
- do_balance_callbacks(rq, head);
- raw_spin_unlock_irqrestore(&rq->lock, flags);
- }
-}
-
-static void balance_push(struct rq *rq);
-
-static inline void balance_switch(struct rq *rq)
-{
- if (likely(!rq->balance_flags))
- return;
-
- if (rq->balance_flags & BALANCE_PUSH) {
- balance_push(rq);
- return;
- }
-
- __balance_callbacks(rq);
-}
-
-#else
-
-static inline void __balance_callbacks(struct rq *rq)
-{
-}
-
-static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
-{
- return NULL;
-}
-
-static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
-{
-}
-
-static inline void balance_switch(struct rq *rq)
-{
-}
-
-#endif
-
static inline void
prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
{
@@ -4438,7 +3781,6 @@
* prev into current:
*/
spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
- balance_switch(rq);
raw_spin_unlock_irq(&rq->lock);
}
@@ -4453,22 +3795,6 @@
#ifndef finish_arch_post_lock_switch
# define finish_arch_post_lock_switch() do { } while (0)
#endif
-
-static inline void kmap_local_sched_out(void)
-{
-#ifdef CONFIG_KMAP_LOCAL
- if (unlikely(current->kmap_ctrl.idx))
- __kmap_local_sched_out();
-#endif
-}
-
-static inline void kmap_local_sched_in(void)
-{
-#ifdef CONFIG_KMAP_LOCAL
- if (unlikely(current->kmap_ctrl.idx))
- __kmap_local_sched_in();
-#endif
-}
/**
* prepare_task_switch - prepare to switch tasks
@@ -4492,7 +3818,6 @@
perf_event_task_sched_out(prev, next);
rseq_preempt(prev);
fire_sched_out_preempt_notifiers(prev, next);
- kmap_local_sched_out();
prepare_task(next);
prepare_arch_switch(next);
}
@@ -4559,7 +3884,6 @@
finish_lock_switch(rq);
finish_arch_post_lock_switch();
kcov_finish_switch(current);
- kmap_local_sched_in();
fire_sched_in_preempt_notifiers(current);
/*
@@ -4574,17 +3898,23 @@
* provided by mmdrop(),
* - a sync_core for SYNC_CORE.
*/
- /*
- * We use mmdrop_delayed() here so we don't have to do the
- * full __mmdrop() when we are the last user.
- */
if (mm) {
membarrier_mm_sync_core_before_usermode(mm);
- mmdrop_delayed(mm);
+ mmdrop(mm);
}
if (unlikely(prev_state == TASK_DEAD)) {
if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev);
+
+ /*
+ * Remove function-return probe instances associated with this
+ * task and put them back on the free list.
+ */
+ kprobe_flush_task(prev);
+ trace_android_rvh_flush_task(prev);
+
+ /* Task is done with its stack. */
+ put_task_stack(prev);
put_task_struct_rcu_user(prev);
}
@@ -4592,6 +3922,43 @@
tick_nohz_task_switch();
return rq;
}
+
+#ifdef CONFIG_SMP
+
+/* rq->lock is NOT held, but preemption is disabled */
+static void __balance_callback(struct rq *rq)
+{
+ struct callback_head *head, *next;
+ void (*func)(struct rq *rq);
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&rq->lock, flags);
+ head = rq->balance_callback;
+ rq->balance_callback = NULL;
+ while (head) {
+ func = (void (*)(struct rq *))head->func;
+ next = head->next;
+ head->next = NULL;
+ head = next;
+
+ func(rq);
+ }
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+static inline void balance_callback(struct rq *rq)
+{
+ if (unlikely(rq->balance_callback))
+ __balance_callback(rq);
+}
+
+#else
+
+static inline void balance_callback(struct rq *rq)
+{
+}
+
+#endif
/**
* schedule_tail - first thing a freshly forked thread must call.
@@ -4612,6 +3979,7 @@
*/
rq = finish_task_switch(prev);
+ balance_callback(rq);
preempt_enable();
if (current->set_child_tid)
@@ -5170,8 +4538,7 @@
pr_err("Preemption disabled at:");
print_ip_sym(KERN_ERR, preempt_disable_ip);
}
- if (panic_on_warn)
- panic("scheduling while atomic\n");
+ check_panic_on_warn("scheduling while atomic");
trace_android_rvh_schedule_bug(prev);
@@ -5317,7 +4684,7 @@
*
* WARNING: must be called with preemption disabled!
*/
-static void __sched notrace __schedule(bool preempt, bool spinning_lock)
+static void __sched notrace __schedule(bool preempt)
{
struct task_struct *prev, *next;
unsigned long *switch_count;
@@ -5370,7 +4737,7 @@
* - ptrace_{,un}freeze_traced() can change ->state underneath us.
*/
prev_state = prev->state;
- if ((!preempt || spinning_lock) && prev_state) {
+ if (!preempt && prev_state) {
if (signal_pending_state(prev_state, prev)) {
prev->state = TASK_RUNNING;
} else {
@@ -5405,7 +4772,6 @@
next = pick_next_task(rq, prev, &rf);
clear_tsk_need_resched(prev);
- clear_tsk_need_resched_lazy(prev);
clear_preempt_need_resched();
trace_android_rvh_schedule(prev, next, rq);
@@ -5432,7 +4798,6 @@
*/
++*switch_count;
- migrate_disable_switch(rq, prev);
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
trace_sched_switch(preempt, prev, next);
@@ -5441,11 +4806,10 @@
rq = context_switch(rq, prev, next, &rf);
} else {
rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
-
- rq_unpin_lock(rq, &rf);
- __balance_callbacks(rq);
- raw_spin_unlock_irq(&rq->lock);
+ rq_unlock_irq(rq, &rf);
}
+
+ balance_callback(rq);
}
void __noreturn do_task_dead(void)
@@ -5456,7 +4820,7 @@
/* Tell freezer to ignore us: */
current->flags |= PF_NOFREEZE;
- __schedule(false, false);
+ __schedule(false);
BUG();
/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
@@ -5489,6 +4853,9 @@
preempt_enable_no_resched();
}
+ if (tsk_is_pi_blocked(tsk))
+ return;
+
/*
* If we are going to sleep and we have plugged IO queued,
* make sure to submit it to avoid deadlocks.
@@ -5514,7 +4881,7 @@
sched_submit_work(tsk);
do {
preempt_disable();
- __schedule(false, false);
+ __schedule(false);
sched_preempt_enable_no_resched();
} while (need_resched());
sched_update_worker(tsk);
@@ -5542,7 +4909,7 @@
*/
WARN_ON_ONCE(current->state);
do {
- __schedule(false, false);
+ __schedule(false);
} while (need_resched());
}
@@ -5595,7 +4962,7 @@
*/
preempt_disable_notrace();
preempt_latency_start(1);
- __schedule(true, false);
+ __schedule(true);
preempt_latency_stop(1);
preempt_enable_no_resched_notrace();
@@ -5605,30 +4972,6 @@
*/
} while (need_resched());
}
-
-#ifdef CONFIG_PREEMPT_LAZY
-/*
- * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
- * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
- * preempt_lazy_count counter >0.
- */
-static __always_inline int preemptible_lazy(void)
-{
- if (test_thread_flag(TIF_NEED_RESCHED))
- return 1;
- if (current_thread_info()->preempt_lazy_count)
- return 0;
- return 1;
-}
-
-#else
-
-static inline int preemptible_lazy(void)
-{
- return 1;
-}
-
-#endif
#ifdef CONFIG_PREEMPTION
/*
@@ -5643,25 +4986,11 @@
*/
if (likely(!preemptible()))
return;
- if (!preemptible_lazy())
- return;
+
preempt_schedule_common();
}
NOKPROBE_SYMBOL(preempt_schedule);
EXPORT_SYMBOL(preempt_schedule);
-
-#ifdef CONFIG_PREEMPT_RT
-void __sched notrace preempt_schedule_lock(void)
-{
- do {
- preempt_disable();
- __schedule(true, true);
- sched_preempt_enable_no_resched();
- } while (need_resched());
-}
-NOKPROBE_SYMBOL(preempt_schedule_lock);
-EXPORT_SYMBOL(preempt_schedule_lock);
-#endif
/**
* preempt_schedule_notrace - preempt_schedule called by tracing
@@ -5682,9 +5011,6 @@
enum ctx_state prev_ctx;
if (likely(!preemptible()))
- return;
-
- if (!preemptible_lazy())
return;
do {
@@ -5709,7 +5035,7 @@
* an infinite recursion.
*/
prev_ctx = exception_enter();
- __schedule(true, false);
+ __schedule(true);
exception_exit(prev_ctx);
preempt_latency_stop(1);
@@ -5738,7 +5064,7 @@
do {
preempt_disable();
local_irq_enable();
- __schedule(true, false);
+ __schedule(true);
local_irq_disable();
sched_preempt_enable_no_resched();
} while (need_resched());
@@ -5905,11 +5231,9 @@
out_unlock:
/* Avoid rq from going away on us: */
preempt_disable();
+ __task_rq_unlock(rq, &rf);
- rq_unpin_lock(rq, &rf);
- __balance_callbacks(rq);
- raw_spin_unlock(&rq->lock);
-
+ balance_callback(rq);
preempt_enable();
}
#else
@@ -6154,11 +5478,11 @@
int oldpolicy = -1, policy = attr->sched_policy;
int retval, oldprio, newprio, queued, running;
const struct sched_class *prev_class;
- struct callback_head *head;
struct rq_flags rf;
int reset_on_fork;
int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
struct rq *rq;
+ bool cpuset_locked = false;
/* The pi code expects interrupts enabled */
BUG_ON(pi && in_interrupt());
@@ -6261,6 +5585,15 @@
}
/*
+ * SCHED_DEADLINE bandwidth accounting relies on stable cpusets
+ * information.
+ */
+ if (dl_policy(policy) || dl_policy(p->policy)) {
+ cpuset_locked = true;
+ cpuset_lock();
+ }
+
+ /*
* Make sure no PI-waiters arrive (or leave) while we are
* changing the priority of the task:
*
@@ -6334,6 +5667,8 @@
if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
policy = oldpolicy = -1;
task_rq_unlock(rq, p, &rf);
+ if (cpuset_locked)
+ cpuset_unlock();
goto recheck;
}
@@ -6397,20 +5732,24 @@
/* Avoid rq from going away on us: */
preempt_disable();
- head = splice_balance_callbacks(rq);
task_rq_unlock(rq, p, &rf);
- if (pi)
+ if (pi) {
+ if (cpuset_locked)
+ cpuset_unlock();
rt_mutex_adjust_pi(p);
+ }
/* Run balance callbacks after we've adjusted the PI chain: */
- balance_callbacks(rq, head);
+ balance_callback(rq);
preempt_enable();
return 0;
unlock:
task_rq_unlock(rq, p, &rf);
+ if (cpuset_locked)
+ cpuset_unlock();
return retval;
}
@@ -6916,7 +6255,7 @@
}
#endif
again:
- retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK);
+ retval = __set_cpus_allowed_ptr(p, new_mask, true);
if (!retval) {
cpuset_cpus_allowed(p, cpus_allowed);
@@ -7024,14 +6363,14 @@
if (len & (sizeof(unsigned long)-1))
return -EINVAL;
- if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
return -ENOMEM;
ret = sched_getaffinity(pid, mask);
if (ret == 0) {
unsigned int retlen = min(len, cpumask_size());
- if (copy_to_user(user_mask_ptr, mask, retlen))
+ if (copy_to_user(user_mask_ptr, cpumask_bits(mask), retlen))
ret = -EFAULT;
else
ret = retlen;
@@ -7498,7 +6837,7 @@
*
* And since this is boot we can forgo the serialization.
*/
- set_cpus_allowed_common(idle, cpumask_of(cpu), 0);
+ set_cpus_allowed_common(idle, cpumask_of(cpu));
#endif
/*
* We're having a chicken and egg problem, even though we are
@@ -7525,9 +6864,7 @@
/* Set the preempt count _outside_ the spinlocks! */
init_idle_preempt_count(idle, cpu);
-#ifdef CONFIG_HAVE_PREEMPT_LAZY
- task_thread_info(idle)->preempt_lazy_count = 0;
-#endif
+
/*
* The idle tasks have their own, simple scheduling class:
*/
@@ -7554,8 +6891,7 @@
return ret;
}
-int task_can_attach(struct task_struct *p,
- const struct cpumask *cs_effective_cpus)
+int task_can_attach(struct task_struct *p)
{
int ret = 0;
@@ -7568,21 +6904,9 @@
* success of set_cpus_allowed_ptr() on all attached tasks
* before cpus_mask may be changed.
*/
- if (p->flags & PF_NO_SETAFFINITY) {
+ if (p->flags & PF_NO_SETAFFINITY)
ret = -EINVAL;
- goto out;
- }
- if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
- cs_effective_cpus)) {
- int cpu = cpumask_any_and(cpu_active_mask, cs_effective_cpus);
-
- if (unlikely(cpu >= nr_cpu_ids))
- return -EINVAL;
- ret = dl_cpu_busy(cpu, p);
- }
-
-out:
return ret;
}
@@ -7637,7 +6961,6 @@
#endif /* CONFIG_NUMA_BALANCING */
#ifdef CONFIG_HOTPLUG_CPU
-
/*
* Ensure that the idle task is using init_mm right before its CPU goes
* offline.
@@ -7657,124 +6980,166 @@
/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
}
-static int __balance_push_cpu_stop(void *arg)
+/*
+ * Since this CPU is going 'away' for a while, fold any nr_active delta
+ * we might have. Assumes we're called after migrate_tasks() so that the
+ * nr_active count is stable. We need to take the teardown thread which
+ * is calling this into account, so we hand in adjust = 1 to the load
+ * calculation.
+ *
+ * Also see the comment "Global load-average calculations".
+ */
+static void calc_load_migrate(struct rq *rq)
{
- struct task_struct *p = arg;
- struct rq *rq = this_rq();
- struct rq_flags rf;
- int cpu;
+ long delta = calc_load_fold_active(rq, 1);
+ if (delta)
+ atomic_long_add(delta, &calc_load_tasks);
+}
- raw_spin_lock_irq(&p->pi_lock);
- rq_lock(rq, &rf);
+static struct task_struct *__pick_migrate_task(struct rq *rq)
+{
+ const struct sched_class *class;
+ struct task_struct *next;
+ for_each_class(class) {
+ next = class->pick_next_task(rq);
+ if (next) {
+ next->sched_class->put_prev_task(rq, next);
+ return next;
+ }
+ }
+
+ /* The idle class should always have a runnable task */
+ BUG();
+}
+
+/*
+ * Migrate all tasks from the rq, sleeping tasks will be migrated by
+ * try_to_wake_up()->select_task_rq().
+ *
+ * Called with rq->lock held even though we'er in stop_machine() and
+ * there's no concurrency possible, we hold the required locks anyway
+ * because of lock validation efforts.
+ *
+ * force: if false, the function will skip CPU pinned kthreads.
+ */
+static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf, bool force)
+{
+ struct rq *rq = dead_rq;
+ struct task_struct *next, *tmp, *stop = rq->stop;
+ LIST_HEAD(percpu_kthreads);
+ struct rq_flags orf = *rf;
+ int dest_cpu;
+
+ /*
+ * Fudge the rq selection such that the below task selection loop
+ * doesn't get stuck on the currently eligible stop task.
+ *
+ * We're currently inside stop_machine() and the rq is either stuck
+ * in the stop_machine_cpu_stop() loop, or we're executing this code,
+ * either way we should never end up calling schedule() until we're
+ * done here.
+ */
+ rq->stop = NULL;
+
+ /*
+ * put_prev_task() and pick_next_task() sched
+ * class method both need to have an up-to-date
+ * value of rq->clock[_task]
+ */
update_rq_clock(rq);
- if (task_rq(p) == rq && task_on_rq_queued(p)) {
- cpu = select_fallback_rq(rq->cpu, p);
- rq = __migrate_task(rq, &rf, p, cpu);
- }
+#ifdef CONFIG_SCHED_DEBUG
+ /* note the clock update in orf */
+ orf.clock_update_flags |= RQCF_UPDATED;
+#endif
- rq_unlock(rq, &rf);
- raw_spin_unlock_irq(&p->pi_lock);
-
- put_task_struct(p);
-
- return 0;
-}
-
-static DEFINE_PER_CPU(struct cpu_stop_work, push_work);
-
-/*
- * Ensure we only run per-cpu kthreads once the CPU goes !active.
- */
-
-
-static void balance_push(struct rq *rq)
-{
- struct task_struct *push_task = rq->curr;
-
- lockdep_assert_held(&rq->lock);
- SCHED_WARN_ON(rq->cpu != smp_processor_id());
-
- /*
- * Both the cpu-hotplug and stop task are in this case and are
- * required to complete the hotplug process.
- */
- if (is_per_cpu_kthread(push_task) || is_migration_disabled(push_task)) {
+ for (;;) {
/*
- * If this is the idle task on the outgoing CPU try to wake
- * up the hotplug control thread which might wait for the
- * last task to vanish. The rcuwait_active() check is
- * accurate here because the waiter is pinned on this CPU
- * and can't obviously be running in parallel.
- *
- * On RT kernels this also has to check whether there are
- * pinned and scheduled out tasks on the runqueue. They
- * need to leave the migrate disabled section first.
+ * There's this thread running, bail when that's the only
+ * remaining thread:
*/
- if (!rq->nr_running && !rq_has_pinned_tasks(rq) &&
- rcuwait_active(&rq->hotplug_wait)) {
- raw_spin_unlock(&rq->lock);
- rcuwait_wake_up(&rq->hotplug_wait);
- raw_spin_lock(&rq->lock);
+ if (rq->nr_running == 1)
+ break;
+
+ next = __pick_migrate_task(rq);
+
+ /*
+ * Argh ... no iterator for tasks, we need to remove the
+ * kthread from the run-queue to continue.
+ */
+ if (!force && is_per_cpu_kthread(next)) {
+ INIT_LIST_HEAD(&next->percpu_kthread_node);
+ list_add(&next->percpu_kthread_node, &percpu_kthreads);
+
+ /* DEQUEUE_SAVE not used due to move_entity in rt */
+ deactivate_task(rq, next,
+ DEQUEUE_NOCLOCK);
+ continue;
}
- return;
+
+ /*
+ * Rules for changing task_struct::cpus_mask are holding
+ * both pi_lock and rq->lock, such that holding either
+ * stabilizes the mask.
+ *
+ * Drop rq->lock is not quite as disastrous as it usually is
+ * because !cpu_active at this point, which means load-balance
+ * will not interfere. Also, stop-machine.
+ */
+ rq_unlock(rq, rf);
+ raw_spin_lock(&next->pi_lock);
+ rq_relock(rq, rf);
+
+ /*
+ * Since we're inside stop-machine, _nothing_ should have
+ * changed the task, WARN if weird stuff happened, because in
+ * that case the above rq->lock drop is a fail too.
+ */
+ if (task_rq(next) != rq || !task_on_rq_queued(next)) {
+ /*
+ * In the !force case, there is a hole between
+ * rq_unlock() and rq_relock(), where another CPU might
+ * not observe an up to date cpu_active_mask and try to
+ * move tasks around.
+ */
+ WARN_ON(force);
+ raw_spin_unlock(&next->pi_lock);
+ continue;
+ }
+
+ /* Find suitable destination for @next, with force if needed. */
+ dest_cpu = select_fallback_rq(dead_rq->cpu, next);
+ rq = __migrate_task(rq, rf, next, dest_cpu);
+ if (rq != dead_rq) {
+ rq_unlock(rq, rf);
+ rq = dead_rq;
+ *rf = orf;
+ rq_relock(rq, rf);
+ }
+ raw_spin_unlock(&next->pi_lock);
}
- get_task_struct(push_task);
- /*
- * Temporarily drop rq->lock such that we can wake-up the stop task.
- * Both preemption and IRQs are still disabled.
- */
- raw_spin_unlock(&rq->lock);
- stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task,
- this_cpu_ptr(&push_work));
- /*
- * At this point need_resched() is true and we'll take the loop in
- * schedule(). The next pick is obviously going to be the stop task
- * which is_per_cpu_kthread() and will push this task away.
- */
- raw_spin_lock(&rq->lock);
-}
+ list_for_each_entry_safe(next, tmp, &percpu_kthreads,
+ percpu_kthread_node) {
-static void balance_push_set(int cpu, bool on)
-{
- struct rq *rq = cpu_rq(cpu);
- struct rq_flags rf;
+ /* ENQUEUE_RESTORE not used due to move_entity in rt */
+ activate_task(rq, next, ENQUEUE_NOCLOCK);
+ list_del(&next->percpu_kthread_node);
+ }
- rq_lock_irqsave(rq, &rf);
- if (on)
- rq->balance_flags |= BALANCE_PUSH;
- else
- rq->balance_flags &= ~BALANCE_PUSH;
- rq_unlock_irqrestore(rq, &rf);
-}
-
-/*
- * Invoked from a CPUs hotplug control thread after the CPU has been marked
- * inactive. All tasks which are not per CPU kernel threads are either
- * pushed off this CPU now via balance_push() or placed on a different CPU
- * during wakeup. Wait until the CPU is quiescent.
- */
-static void balance_hotplug_wait(void)
-{
- struct rq *rq = this_rq();
-
- rcuwait_wait_event(&rq->hotplug_wait,
- rq->nr_running == 1 && !rq_has_pinned_tasks(rq),
- TASK_UNINTERRUPTIBLE);
+ rq->stop = stop;
}
static int drain_rq_cpu_stop(void *data)
{
-#ifndef CONFIG_PREEMPT_RT
struct rq *rq = this_rq();
struct rq_flags rf;
rq_lock_irqsave(rq, &rf);
migrate_tasks(rq, &rf, false);
rq_unlock_irqrestore(rq, &rf);
-#endif
+
return 0;
}
@@ -7799,21 +7164,6 @@
if (rq_drain->done)
cpu_stop_work_wait(rq_drain);
}
-
-#else
-
-static inline void balance_push(struct rq *rq)
-{
-}
-
-static inline void balance_push_set(int cpu, bool on)
-{
-}
-
-static inline void balance_hotplug_wait(void)
-{
-}
-
#endif /* CONFIG_HOTPLUG_CPU */
void set_rq_online(struct rq *rq)
@@ -7884,7 +7234,7 @@
static int cpuset_cpu_inactive(unsigned int cpu)
{
if (!cpuhp_tasks_frozen) {
- int ret = dl_cpu_busy(cpu, NULL);
+ int ret = dl_bw_check_overflow(cpu);
if (ret)
return ret;
@@ -7900,8 +7250,6 @@
{
struct rq *rq = cpu_rq(cpu);
struct rq_flags rf;
-
- balance_push_set(cpu, false);
#ifdef CONFIG_SCHED_SMT
/*
@@ -7956,21 +7304,9 @@
int _sched_cpu_deactivate(unsigned int cpu)
{
- struct rq *rq = cpu_rq(cpu);
- struct rq_flags rf;
int ret;
set_cpu_active(cpu, false);
-
- balance_push_set(cpu, true);
-
- rq_lock_irqsave(rq, &rf);
- if (rq->rd) {
- update_rq_clock(rq);
- BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
- set_rq_offline(rq);
- }
- rq_unlock_irqrestore(rq, &rf);
#ifdef CONFIG_SCHED_SMT
/*
@@ -7985,7 +7321,6 @@
ret = cpuset_cpu_inactive(cpu);
if (ret) {
- balance_push_set(cpu, false);
set_cpu_active(cpu, true);
return ret;
}
@@ -8049,41 +7384,6 @@
}
#ifdef CONFIG_HOTPLUG_CPU
-
-/*
- * Invoked immediately before the stopper thread is invoked to bring the
- * CPU down completely. At this point all per CPU kthreads except the
- * hotplug thread (current) and the stopper thread (inactive) have been
- * either parked or have been unbound from the outgoing CPU. Ensure that
- * any of those which might be on the way out are gone.
- *
- * If after this point a bound task is being woken on this CPU then the
- * responsible hotplug callback has failed to do it's job.
- * sched_cpu_dying() will catch it with the appropriate fireworks.
- */
-int sched_cpu_wait_empty(unsigned int cpu)
-{
- balance_hotplug_wait();
- return 0;
-}
-
-/*
- * Since this CPU is going 'away' for a while, fold any nr_active delta we
- * might have. Called from the CPU stopper task after ensuring that the
- * stopper is the last running task on the CPU, so nr_active count is
- * stable. We need to take the teardown thread which is calling this into
- * account, so we hand in adjust = 1 to the load calculation.
- *
- * Also see the comment "Global load-average calculations".
- */
-static void calc_load_migrate(struct rq *rq)
-{
- long delta = calc_load_fold_active(rq, 1);
-
- if (delta)
- atomic_long_add(delta, &calc_load_tasks);
-}
-
int sched_cpu_dying(unsigned int cpu)
{
struct rq *rq = cpu_rq(cpu);
@@ -8093,7 +7393,12 @@
sched_tick_stop(cpu);
rq_lock_irqsave(rq, &rf);
- BUG_ON(rq->nr_running != 1 || rq_has_pinned_tasks(rq));
+ if (rq->rd) {
+ BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
+ set_rq_offline(rq);
+ }
+ migrate_tasks(rq, &rf, true);
+ BUG_ON(rq->nr_running != 1);
rq_unlock_irqrestore(rq, &rf);
trace_android_rvh_sched_cpu_dying(cpu);
@@ -8304,9 +7609,6 @@
rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func);
#endif
-#ifdef CONFIG_HOTPLUG_CPU
- rcuwait_init(&rq->hotplug_wait);
-#endif
#endif /* CONFIG_SMP */
hrtick_rq_init(rq);
atomic_set(&rq->nr_iowait, 0);
@@ -8347,7 +7649,7 @@
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
static inline int preempt_count_equals(int preempt_offset)
{
- int nested = preempt_count() + sched_rcu_preempt_depth();
+ int nested = preempt_count() + rcu_preempt_depth();
return (nested == preempt_offset);
}
@@ -8447,39 +7749,6 @@
add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
}
EXPORT_SYMBOL_GPL(__cant_sleep);
-
-#ifdef CONFIG_SMP
-void __cant_migrate(const char *file, int line)
-{
- static unsigned long prev_jiffy;
-
- if (irqs_disabled())
- return;
-
- if (is_migration_disabled(current))
- return;
-
- if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
- return;
-
- if (preempt_count() > 0)
- return;
-
- if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
- return;
- prev_jiffy = jiffies;
-
- pr_err("BUG: assuming non migratable context at %s:%d\n", file, line);
- pr_err("in_atomic(): %d, irqs_disabled(): %d, migration_disabled() %u pid: %d, name: %s\n",
- in_atomic(), irqs_disabled(), is_migration_disabled(current),
- current->pid, current->comm);
-
- debug_show_held_locks(current);
- dump_stack();
- add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-}
-EXPORT_SYMBOL_GPL(__cant_migrate);
-#endif
#endif
#ifdef CONFIG_MAGIC_SYSRQ
--
Gitblit v1.6.2