hc
2024-05-10 61598093bbdd283a7edc367d900f223070ead8d2
kernel/kernel/sched/core.c
....@@ -78,11 +78,7 @@
7878 * Number of tasks to iterate in a single balance run.
7979 * Limited because this is done with IRQs disabled.
8080 */
81
-#ifdef CONFIG_PREEMPT_RT
82
-const_debug unsigned int sysctl_sched_nr_migrate = 8;
83
-#else
8481 const_debug unsigned int sysctl_sched_nr_migrate = 32;
85
-#endif
8682
8783 /*
8884 * period over which we measure -rt task CPU usage in us.
....@@ -531,15 +527,9 @@
531527 #endif
532528 #endif
533529
534
-static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task,
535
- bool sleeper)
530
+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
536531 {
537
- struct wake_q_node *node;
538
-
539
- if (sleeper)
540
- node = &task->wake_q_sleeper;
541
- else
542
- node = &task->wake_q;
532
+ struct wake_q_node *node = &task->wake_q;
543533
544534 /*
545535 * Atomically grab the task, if ->wake_q is !nil already it means
....@@ -576,13 +566,7 @@
576566 */
577567 void wake_q_add(struct wake_q_head *head, struct task_struct *task)
578568 {
579
- if (__wake_q_add(head, task, false))
580
- get_task_struct(task);
581
-}
582
-
583
-void wake_q_add_sleeper(struct wake_q_head *head, struct task_struct *task)
584
-{
585
- if (__wake_q_add(head, task, true))
569
+ if (__wake_q_add(head, task))
586570 get_task_struct(task);
587571 }
588572
....@@ -605,40 +589,29 @@
605589 */
606590 void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
607591 {
608
- if (!__wake_q_add(head, task, false))
592
+ if (!__wake_q_add(head, task))
609593 put_task_struct(task);
610594 }
611595
612
-void __wake_up_q(struct wake_q_head *head, bool sleeper)
596
+void wake_up_q(struct wake_q_head *head)
613597 {
614598 struct wake_q_node *node = head->first;
615599
616600 while (node != WAKE_Q_TAIL) {
617601 struct task_struct *task;
618602
619
- if (sleeper)
620
- task = container_of(node, struct task_struct, wake_q_sleeper);
621
- else
622
- task = container_of(node, struct task_struct, wake_q);
623
-
603
+ task = container_of(node, struct task_struct, wake_q);
624604 BUG_ON(!task);
625605 /* Task can safely be re-inserted now: */
626606 node = node->next;
607
+ task->wake_q.next = NULL;
627608 task->wake_q_count = head->count;
628
- if (sleeper)
629
- task->wake_q_sleeper.next = NULL;
630
- else
631
- task->wake_q.next = NULL;
632609
633610 /*
634611 * wake_up_process() executes a full barrier, which pairs with
635612 * the queueing in wake_q_add() so as not to miss wakeups.
636613 */
637
- if (sleeper)
638
- wake_up_lock_sleeper(task);
639
- else
640
- wake_up_process(task);
641
-
614
+ wake_up_process(task);
642615 task->wake_q_count = 0;
643616 put_task_struct(task);
644617 }
....@@ -675,48 +648,6 @@
675648 trace_sched_wake_idle_without_ipi(cpu);
676649 }
677650 EXPORT_SYMBOL_GPL(resched_curr);
678
-
679
-#ifdef CONFIG_PREEMPT_LAZY
680
-
681
-static int tsk_is_polling(struct task_struct *p)
682
-{
683
-#ifdef TIF_POLLING_NRFLAG
684
- return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
685
-#else
686
- return 0;
687
-#endif
688
-}
689
-
690
-void resched_curr_lazy(struct rq *rq)
691
-{
692
- struct task_struct *curr = rq->curr;
693
- int cpu;
694
-
695
- if (!sched_feat(PREEMPT_LAZY)) {
696
- resched_curr(rq);
697
- return;
698
- }
699
-
700
- lockdep_assert_held(&rq->lock);
701
-
702
- if (test_tsk_need_resched(curr))
703
- return;
704
-
705
- if (test_tsk_need_resched_lazy(curr))
706
- return;
707
-
708
- set_tsk_need_resched_lazy(curr);
709
-
710
- cpu = cpu_of(rq);
711
- if (cpu == smp_processor_id())
712
- return;
713
-
714
- /* NEED_RESCHED_LAZY must be visible before we test polling */
715
- smp_mb();
716
- if (!tsk_is_polling(curr))
717
- smp_send_reschedule(cpu);
718
-}
719
-#endif
720651
721652 void resched_cpu(int cpu)
722653 {
....@@ -1087,7 +1018,7 @@
10871018 if (!(rq->uclamp_flags & UCLAMP_FLAG_IDLE))
10881019 return;
10891020
1090
- WRITE_ONCE(rq->uclamp[clamp_id].value, clamp_value);
1021
+ uclamp_rq_set(rq, clamp_id, clamp_value);
10911022 }
10921023
10931024 static inline
....@@ -1280,8 +1211,8 @@
12801211 if (bucket->tasks == 1 || uc_se->value > bucket->value)
12811212 bucket->value = uc_se->value;
12821213
1283
- if (uc_se->value > READ_ONCE(uc_rq->value))
1284
- WRITE_ONCE(uc_rq->value, uc_se->value);
1214
+ if (uc_se->value > uclamp_rq_get(rq, clamp_id))
1215
+ uclamp_rq_set(rq, clamp_id, uc_se->value);
12851216 }
12861217
12871218 /*
....@@ -1347,7 +1278,7 @@
13471278 if (likely(bucket->tasks))
13481279 return;
13491280
1350
- rq_clamp = READ_ONCE(uc_rq->value);
1281
+ rq_clamp = uclamp_rq_get(rq, clamp_id);
13511282 /*
13521283 * Defensive programming: this should never happen. If it happens,
13531284 * e.g. due to future modification, warn and fixup the expected value.
....@@ -1355,7 +1286,7 @@
13551286 SCHED_WARN_ON(bucket->value > rq_clamp);
13561287 if (bucket->value >= rq_clamp) {
13571288 bkt_clamp = uclamp_rq_max_value(rq, clamp_id, uc_se->value);
1358
- WRITE_ONCE(uc_rq->value, bkt_clamp);
1289
+ uclamp_rq_set(rq, clamp_id, bkt_clamp);
13591290 }
13601291 }
13611292
....@@ -1761,6 +1692,9 @@
17611692
17621693 void activate_task(struct rq *rq, struct task_struct *p, int flags)
17631694 {
1695
+ if (task_on_rq_migrating(p))
1696
+ flags |= ENQUEUE_MIGRATED;
1697
+
17641698 enqueue_task(rq, p, flags);
17651699
17661700 p->on_rq = TASK_ON_RQ_QUEUED;
....@@ -1870,82 +1804,6 @@
18701804
18711805 #ifdef CONFIG_SMP
18721806
1873
-static void
1874
-__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags);
1875
-
1876
-static int __set_cpus_allowed_ptr(struct task_struct *p,
1877
- const struct cpumask *new_mask,
1878
- u32 flags);
1879
-
1880
-static void migrate_disable_switch(struct rq *rq, struct task_struct *p)
1881
-{
1882
- if (likely(!p->migration_disabled))
1883
- return;
1884
-
1885
- if (p->cpus_ptr != &p->cpus_mask)
1886
- return;
1887
-
1888
- /*
1889
- * Violates locking rules! see comment in __do_set_cpus_allowed().
1890
- */
1891
- __do_set_cpus_allowed(p, cpumask_of(rq->cpu), SCA_MIGRATE_DISABLE);
1892
-}
1893
-
1894
-void migrate_disable(void)
1895
-{
1896
- struct task_struct *p = current;
1897
-
1898
- if (p->migration_disabled) {
1899
- p->migration_disabled++;
1900
- return;
1901
- }
1902
-
1903
- trace_sched_migrate_disable_tp(p);
1904
-
1905
- preempt_disable();
1906
- this_rq()->nr_pinned++;
1907
- p->migration_disabled = 1;
1908
- preempt_lazy_disable();
1909
- preempt_enable();
1910
-}
1911
-EXPORT_SYMBOL_GPL(migrate_disable);
1912
-
1913
-void migrate_enable(void)
1914
-{
1915
- struct task_struct *p = current;
1916
-
1917
- if (p->migration_disabled > 1) {
1918
- p->migration_disabled--;
1919
- return;
1920
- }
1921
-
1922
- /*
1923
- * Ensure stop_task runs either before or after this, and that
1924
- * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
1925
- */
1926
- preempt_disable();
1927
- if (p->cpus_ptr != &p->cpus_mask)
1928
- __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE);
1929
- /*
1930
- * Mustn't clear migration_disabled() until cpus_ptr points back at the
1931
- * regular cpus_mask, otherwise things that race (eg.
1932
- * select_fallback_rq) get confused.
1933
- */
1934
- barrier();
1935
- p->migration_disabled = 0;
1936
- this_rq()->nr_pinned--;
1937
- preempt_lazy_enable();
1938
- preempt_enable();
1939
-
1940
- trace_sched_migrate_enable_tp(p);
1941
-}
1942
-EXPORT_SYMBOL_GPL(migrate_enable);
1943
-
1944
-static inline bool rq_has_pinned_tasks(struct rq *rq)
1945
-{
1946
- return rq->nr_pinned;
1947
-}
1948
-
19491807 /*
19501808 * Per-CPU kthreads are allowed to run on !active && online CPUs, see
19511809 * __set_cpus_allowed_ptr() and select_fallback_rq().
....@@ -1955,7 +1813,7 @@
19551813 if (!cpumask_test_cpu(cpu, p->cpus_ptr))
19561814 return false;
19571815
1958
- if (is_per_cpu_kthread(p) || is_migration_disabled(p))
1816
+ if (is_per_cpu_kthread(p))
19591817 return cpu_online(cpu);
19601818
19611819 if (!cpu_active(cpu))
....@@ -2015,21 +1873,8 @@
20151873 }
20161874
20171875 struct migration_arg {
2018
- struct task_struct *task;
2019
- int dest_cpu;
2020
- struct set_affinity_pending *pending;
2021
-};
2022
-
2023
-/*
2024
- * @refs: number of wait_for_completion()
2025
- * @stop_pending: is @stop_work in use
2026
- */
2027
-struct set_affinity_pending {
2028
- refcount_t refs;
2029
- unsigned int stop_pending;
2030
- struct completion done;
2031
- struct cpu_stop_work stop_work;
2032
- struct migration_arg arg;
1876
+ struct task_struct *task;
1877
+ int dest_cpu;
20331878 };
20341879
20351880 /*
....@@ -2062,17 +1907,15 @@
20621907 static int migration_cpu_stop(void *data)
20631908 {
20641909 struct migration_arg *arg = data;
2065
- struct set_affinity_pending *pending = arg->pending;
20661910 struct task_struct *p = arg->task;
20671911 struct rq *rq = this_rq();
2068
- bool complete = false;
20691912 struct rq_flags rf;
20701913
20711914 /*
20721915 * The original target CPU might have gone down and we might
20731916 * be on another CPU but it doesn't matter.
20741917 */
2075
- local_irq_save(rf.flags);
1918
+ local_irq_disable();
20761919 /*
20771920 * We need to explicitly wake pending tasks before running
20781921 * __migrate_task() such that we will not miss enforcing cpus_ptr
....@@ -2082,121 +1925,21 @@
20821925
20831926 raw_spin_lock(&p->pi_lock);
20841927 rq_lock(rq, &rf);
2085
-
20861928 /*
20871929 * If task_rq(p) != rq, it cannot be migrated here, because we're
20881930 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
20891931 * we're holding p->pi_lock.
20901932 */
20911933 if (task_rq(p) == rq) {
2092
- if (is_migration_disabled(p))
2093
- goto out;
2094
-
2095
- if (pending) {
2096
- if (p->migration_pending == pending)
2097
- p->migration_pending = NULL;
2098
- complete = true;
2099
-
2100
- if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask))
2101
- goto out;
2102
- }
2103
-
21041934 if (task_on_rq_queued(p))
21051935 rq = __migrate_task(rq, &rf, p, arg->dest_cpu);
21061936 else
21071937 p->wake_cpu = arg->dest_cpu;
2108
-
2109
- /*
2110
- * XXX __migrate_task() can fail, at which point we might end
2111
- * up running on a dodgy CPU, AFAICT this can only happen
2112
- * during CPU hotplug, at which point we'll get pushed out
2113
- * anyway, so it's probably not a big deal.
2114
- */
2115
-
2116
- } else if (pending) {
2117
- /*
2118
- * This happens when we get migrated between migrate_enable()'s
2119
- * preempt_enable() and scheduling the stopper task. At that
2120
- * point we're a regular task again and not current anymore.
2121
- *
2122
- * A !PREEMPT kernel has a giant hole here, which makes it far
2123
- * more likely.
2124
- */
2125
-
2126
- /*
2127
- * The task moved before the stopper got to run. We're holding
2128
- * ->pi_lock, so the allowed mask is stable - if it got
2129
- * somewhere allowed, we're done.
2130
- */
2131
- if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
2132
- if (p->migration_pending == pending)
2133
- p->migration_pending = NULL;
2134
- complete = true;
2135
- goto out;
2136
- }
2137
-
2138
- /*
2139
- * When migrate_enable() hits a rq mis-match we can't reliably
2140
- * determine is_migration_disabled() and so have to chase after
2141
- * it.
2142
- */
2143
- WARN_ON_ONCE(!pending->stop_pending);
2144
- task_rq_unlock(rq, p, &rf);
2145
- stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop,
2146
- &pending->arg, &pending->stop_work);
2147
- return 0;
21481938 }
2149
-out:
2150
- if (pending)
2151
- pending->stop_pending = false;
2152
- task_rq_unlock(rq, p, &rf);
1939
+ rq_unlock(rq, &rf);
1940
+ raw_spin_unlock(&p->pi_lock);
21531941
2154
- if (complete)
2155
- complete_all(&pending->done);
2156
-
2157
- return 0;
2158
-}
2159
-
2160
-int push_cpu_stop(void *arg)
2161
-{
2162
- struct rq *lowest_rq = NULL, *rq = this_rq();
2163
- struct task_struct *p = arg;
2164
-
2165
- raw_spin_lock_irq(&p->pi_lock);
2166
- raw_spin_lock(&rq->lock);
2167
-
2168
- if (task_rq(p) != rq)
2169
- goto out_unlock;
2170
-
2171
- if (is_migration_disabled(p)) {
2172
- p->migration_flags |= MDF_PUSH;
2173
- goto out_unlock;
2174
- }
2175
-
2176
- p->migration_flags &= ~MDF_PUSH;
2177
-
2178
- if (p->sched_class->find_lock_rq)
2179
- lowest_rq = p->sched_class->find_lock_rq(p, rq);
2180
-
2181
- if (!lowest_rq)
2182
- goto out_unlock;
2183
-
2184
- // XXX validate p is still the highest prio task
2185
- if (task_rq(p) == rq) {
2186
- deactivate_task(rq, p, 0);
2187
- set_task_cpu(p, lowest_rq->cpu);
2188
- activate_task(lowest_rq, p, 0);
2189
- resched_curr(lowest_rq);
2190
- }
2191
-
2192
- double_unlock_balance(rq, lowest_rq);
2193
-
2194
-out_unlock:
2195
- rq->push_busy = false;
2196
- raw_spin_unlock(&rq->lock);
2197
- raw_spin_unlock_irq(&p->pi_lock);
2198
-
2199
- put_task_struct(p);
1942
+ local_irq_enable();
22001943 return 0;
22011944 }
22021945
....@@ -2204,40 +1947,19 @@
22041947 * sched_class::set_cpus_allowed must do the below, but is not required to
22051948 * actually call this function.
22061949 */
2207
-void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags)
1950
+void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
22081951 {
2209
- if (flags & (SCA_MIGRATE_ENABLE | SCA_MIGRATE_DISABLE)) {
2210
- p->cpus_ptr = new_mask;
2211
- return;
2212
- }
2213
-
22141952 cpumask_copy(&p->cpus_mask, new_mask);
22151953 p->nr_cpus_allowed = cpumask_weight(new_mask);
22161954 trace_android_rvh_set_cpus_allowed_comm(p, new_mask);
22171955 }
22181956
2219
-static void
2220
-__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags)
1957
+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
22211958 {
22221959 struct rq *rq = task_rq(p);
22231960 bool queued, running;
22241961
2225
- /*
2226
- * This here violates the locking rules for affinity, since we're only
2227
- * supposed to change these variables while holding both rq->lock and
2228
- * p->pi_lock.
2229
- *
2230
- * HOWEVER, it magically works, because ttwu() is the only code that
2231
- * accesses these variables under p->pi_lock and only does so after
2232
- * smp_cond_load_acquire(&p->on_cpu, !VAL), and we're in __schedule()
2233
- * before finish_task().
2234
- *
2235
- * XXX do further audits, this smells like something putrid.
2236
- */
2237
- if (flags & SCA_MIGRATE_DISABLE)
2238
- SCHED_WARN_ON(!p->on_cpu);
2239
- else
2240
- lockdep_assert_held(&p->pi_lock);
1962
+ lockdep_assert_held(&p->pi_lock);
22411963
22421964 queued = task_on_rq_queued(p);
22431965 running = task_current(rq, p);
....@@ -2253,7 +1975,7 @@
22531975 if (running)
22541976 put_prev_task(rq, p);
22551977
2256
- p->sched_class->set_cpus_allowed(p, new_mask, flags);
1978
+ p->sched_class->set_cpus_allowed(p, new_mask);
22571979
22581980 if (queued)
22591981 enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
....@@ -2261,14 +1983,12 @@
22611983 set_next_task(rq, p);
22621984 }
22631985
2264
-static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flags *rf,
2265
- int dest_cpu, unsigned int flags);
22661986 /*
22671987 * Called with both p->pi_lock and rq->lock held; drops both before returning.
22681988 */
22691989 static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
22701990 const struct cpumask *new_mask,
2271
- u32 flags,
1991
+ bool check,
22721992 struct rq *rq,
22731993 struct rq_flags *rf)
22741994 {
....@@ -2279,14 +1999,9 @@
22791999
22802000 update_rq_clock(rq);
22812001
2282
- if (p->flags & PF_KTHREAD || is_migration_disabled(p)) {
2002
+ if (p->flags & PF_KTHREAD) {
22832003 /*
2284
- * Kernel threads are allowed on online && !active CPUs.
2285
- *
2286
- * Specifically, migration_disabled() tasks must not fail the
2287
- * cpumask_any_and_distribute() pick below, esp. so on
2288
- * SCA_MIGRATE_ENABLE, otherwise we'll not call
2289
- * set_cpus_allowed_common() and actually reset p->cpus_ptr.
2004
+ * Kernel threads are allowed on online && !active CPUs
22902005 */
22912006 cpu_valid_mask = cpu_online_mask;
22922007 } else if (!cpumask_subset(new_mask, cpu_allowed_mask)) {
....@@ -2298,22 +2013,13 @@
22982013 * Must re-check here, to close a race against __kthread_bind(),
22992014 * sched_setaffinity() is not guaranteed to observe the flag.
23002015 */
2301
- if ((flags & SCA_CHECK) && (p->flags & PF_NO_SETAFFINITY)) {
2016
+ if (check && (p->flags & PF_NO_SETAFFINITY)) {
23022017 ret = -EINVAL;
23032018 goto out;
23042019 }
23052020
2306
- if (!(flags & SCA_MIGRATE_ENABLE)) {
2307
- if (cpumask_equal(&p->cpus_mask, new_mask))
2308
- goto out;
2309
-
2310
- if (WARN_ON_ONCE(p == current &&
2311
- is_migration_disabled(p) &&
2312
- !cpumask_test_cpu(task_cpu(p), new_mask))) {
2313
- ret = -EBUSY;
2314
- goto out;
2315
- }
2316
- }
2021
+ if (cpumask_equal(&p->cpus_mask, new_mask))
2022
+ goto out;
23172023
23182024 /*
23192025 * Picking a ~random cpu helps in cases where we are changing affinity
....@@ -2326,7 +2032,7 @@
23262032 goto out;
23272033 }
23282034
2329
- __do_set_cpus_allowed(p, new_mask, flags);
2035
+ do_set_cpus_allowed(p, new_mask);
23302036
23312037 if (p->flags & PF_KTHREAD) {
23322038 /*
....@@ -2338,227 +2044,27 @@
23382044 p->nr_cpus_allowed != 1);
23392045 }
23402046
2341
- return affine_move_task(rq, p, rf, dest_cpu, flags);
2047
+ /* Can the task run on the task's current CPU? If so, we're done */
2048
+ if (cpumask_test_cpu(task_cpu(p), new_mask))
2049
+ goto out;
2050
+
2051
+ if (task_running(rq, p) || p->state == TASK_WAKING) {
2052
+ struct migration_arg arg = { p, dest_cpu };
2053
+ /* Need help from migration thread: drop lock and wait. */
2054
+ task_rq_unlock(rq, p, rf);
2055
+ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
2056
+ return 0;
2057
+ } else if (task_on_rq_queued(p)) {
2058
+ /*
2059
+ * OK, since we're going to drop the lock immediately
2060
+ * afterwards anyway.
2061
+ */
2062
+ rq = move_queued_task(rq, rf, p, dest_cpu);
2063
+ }
23422064 out:
23432065 task_rq_unlock(rq, p, rf);
23442066
23452067 return ret;
2346
-}
2347
-
2348
-void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
2349
-{
2350
- __do_set_cpus_allowed(p, new_mask, 0);
2351
-}
2352
-
2353
-/*
2354
- * This function is wildly self concurrent; here be dragons.
2355
- *
2356
- *
2357
- * When given a valid mask, __set_cpus_allowed_ptr() must block until the
2358
- * designated task is enqueued on an allowed CPU. If that task is currently
2359
- * running, we have to kick it out using the CPU stopper.
2360
- *
2361
- * Migrate-Disable comes along and tramples all over our nice sandcastle.
2362
- * Consider:
2363
- *
2364
- * Initial conditions: P0->cpus_mask = [0, 1]
2365
- *
2366
- * P0@CPU0 P1
2367
- *
2368
- * migrate_disable();
2369
- * <preempted>
2370
- * set_cpus_allowed_ptr(P0, [1]);
2371
- *
2372
- * P1 *cannot* return from this set_cpus_allowed_ptr() call until P0 executes
2373
- * its outermost migrate_enable() (i.e. it exits its Migrate-Disable region).
2374
- * This means we need the following scheme:
2375
- *
2376
- * P0@CPU0 P1
2377
- *
2378
- * migrate_disable();
2379
- * <preempted>
2380
- * set_cpus_allowed_ptr(P0, [1]);
2381
- * <blocks>
2382
- * <resumes>
2383
- * migrate_enable();
2384
- * __set_cpus_allowed_ptr();
2385
- * <wakes local stopper>
2386
- * `--> <woken on migration completion>
2387
- *
2388
- * Now the fun stuff: there may be several P1-like tasks, i.e. multiple
2389
- * concurrent set_cpus_allowed_ptr(P0, [*]) calls. CPU affinity changes of any
2390
- * task p are serialized by p->pi_lock, which we can leverage: the one that
2391
- * should come into effect at the end of the Migrate-Disable region is the last
2392
- * one. This means we only need to track a single cpumask (i.e. p->cpus_mask),
2393
- * but we still need to properly signal those waiting tasks at the appropriate
2394
- * moment.
2395
- *
2396
- * This is implemented using struct set_affinity_pending. The first
2397
- * __set_cpus_allowed_ptr() caller within a given Migrate-Disable region will
2398
- * setup an instance of that struct and install it on the targeted task_struct.
2399
- * Any and all further callers will reuse that instance. Those then wait for
2400
- * a completion signaled at the tail of the CPU stopper callback (1), triggered
2401
- * on the end of the Migrate-Disable region (i.e. outermost migrate_enable()).
2402
- *
2403
- *
2404
- * (1) In the cases covered above. There is one more where the completion is
2405
- * signaled within affine_move_task() itself: when a subsequent affinity request
2406
- * cancels the need for an active migration. Consider:
2407
- *
2408
- * Initial conditions: P0->cpus_mask = [0, 1]
2409
- *
2410
- * P0@CPU0 P1 P2
2411
- *
2412
- * migrate_disable();
2413
- * <preempted>
2414
- * set_cpus_allowed_ptr(P0, [1]);
2415
- * <blocks>
2416
- * set_cpus_allowed_ptr(P0, [0, 1]);
2417
- * <signal completion>
2418
- * <awakes>
2419
- *
2420
- * Note that the above is safe vs a concurrent migrate_enable(), as any
2421
- * pending affinity completion is preceded an uninstallion of
2422
- * p->migration_pending done with p->pi_lock held.
2423
- */
2424
-static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flags *rf,
2425
- int dest_cpu, unsigned int flags)
2426
-{
2427
- struct set_affinity_pending my_pending = { }, *pending = NULL;
2428
- bool stop_pending, complete = false;
2429
-
2430
- /* Can the task run on the task's current CPU? If so, we're done */
2431
- if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
2432
- struct task_struct *push_task = NULL;
2433
-
2434
- if ((flags & SCA_MIGRATE_ENABLE) &&
2435
- (p->migration_flags & MDF_PUSH) && !rq->push_busy) {
2436
- rq->push_busy = true;
2437
- push_task = get_task_struct(p);
2438
- }
2439
-
2440
- /*
2441
- * If there are pending waiters, but no pending stop_work,
2442
- * then complete now.
2443
- */
2444
- pending = p->migration_pending;
2445
- if (pending && !pending->stop_pending) {
2446
- p->migration_pending = NULL;
2447
- complete = true;
2448
- }
2449
-
2450
- task_rq_unlock(rq, p, rf);
2451
-
2452
- if (push_task) {
2453
- stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
2454
- p, &rq->push_work);
2455
- }
2456
-
2457
- if (complete)
2458
- complete_all(&pending->done);
2459
-
2460
- return 0;
2461
- }
2462
-
2463
- if (!(flags & SCA_MIGRATE_ENABLE)) {
2464
- /* serialized by p->pi_lock */
2465
- if (!p->migration_pending) {
2466
- /* Install the request */
2467
- refcount_set(&my_pending.refs, 1);
2468
- init_completion(&my_pending.done);
2469
- my_pending.arg = (struct migration_arg) {
2470
- .task = p,
2471
- .dest_cpu = dest_cpu,
2472
- .pending = &my_pending,
2473
- };
2474
-
2475
- p->migration_pending = &my_pending;
2476
- } else {
2477
- pending = p->migration_pending;
2478
- refcount_inc(&pending->refs);
2479
- /*
2480
- * Affinity has changed, but we've already installed a
2481
- * pending. migration_cpu_stop() *must* see this, else
2482
- * we risk a completion of the pending despite having a
2483
- * task on a disallowed CPU.
2484
- *
2485
- * Serialized by p->pi_lock, so this is safe.
2486
- */
2487
- pending->arg.dest_cpu = dest_cpu;
2488
- }
2489
- }
2490
- pending = p->migration_pending;
2491
- /*
2492
- * - !MIGRATE_ENABLE:
2493
- * we'll have installed a pending if there wasn't one already.
2494
- *
2495
- * - MIGRATE_ENABLE:
2496
- * we're here because the current CPU isn't matching anymore,
2497
- * the only way that can happen is because of a concurrent
2498
- * set_cpus_allowed_ptr() call, which should then still be
2499
- * pending completion.
2500
- *
2501
- * Either way, we really should have a @pending here.
2502
- */
2503
- if (WARN_ON_ONCE(!pending)) {
2504
- task_rq_unlock(rq, p, rf);
2505
- return -EINVAL;
2506
- }
2507
-
2508
- if (task_running(rq, p) || p->state == TASK_WAKING) {
2509
- /*
2510
- * MIGRATE_ENABLE gets here because 'p == current', but for
2511
- * anything else we cannot do is_migration_disabled(), punt
2512
- * and have the stopper function handle it all race-free.
2513
- */
2514
- stop_pending = pending->stop_pending;
2515
- if (!stop_pending)
2516
- pending->stop_pending = true;
2517
-
2518
- if (flags & SCA_MIGRATE_ENABLE)
2519
- p->migration_flags &= ~MDF_PUSH;
2520
-
2521
- task_rq_unlock(rq, p, rf);
2522
-
2523
- if (!stop_pending) {
2524
- stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
2525
- &pending->arg, &pending->stop_work);
2526
- }
2527
-
2528
- if (flags & SCA_MIGRATE_ENABLE)
2529
- return 0;
2530
- } else {
2531
-
2532
- if (!is_migration_disabled(p)) {
2533
- if (task_on_rq_queued(p))
2534
- rq = move_queued_task(rq, rf, p, dest_cpu);
2535
-
2536
- if (!pending->stop_pending) {
2537
- p->migration_pending = NULL;
2538
- complete = true;
2539
- }
2540
- }
2541
- task_rq_unlock(rq, p, rf);
2542
-
2543
- if (complete)
2544
- complete_all(&pending->done);
2545
- }
2546
-
2547
- wait_for_completion(&pending->done);
2548
-
2549
- if (refcount_dec_and_test(&pending->refs))
2550
- wake_up_var(&pending->refs); /* No UaF, just an address */
2551
-
2552
- /*
2553
- * Block the original owner of &pending until all subsequent callers
2554
- * have seen the completion and decremented the refcount
2555
- */
2556
- wait_var_event(&my_pending.refs, !refcount_read(&my_pending.refs));
2557
-
2558
- /* ARGH */
2559
- WARN_ON_ONCE(my_pending.stop_pending);
2560
-
2561
- return 0;
25622068 }
25632069
25642070 /*
....@@ -2571,19 +2077,18 @@
25712077 * call is not atomic; no spinlocks may be held.
25722078 */
25732079 static int __set_cpus_allowed_ptr(struct task_struct *p,
2574
- const struct cpumask *new_mask,
2575
- u32 flags)
2080
+ const struct cpumask *new_mask, bool check)
25762081 {
25772082 struct rq_flags rf;
25782083 struct rq *rq;
25792084
25802085 rq = task_rq_lock(p, &rf);
2581
- return __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, &rf);
2086
+ return __set_cpus_allowed_ptr_locked(p, new_mask, check, rq, &rf);
25822087 }
25832088
25842089 int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
25852090 {
2586
- return __set_cpus_allowed_ptr(p, new_mask, 0);
2091
+ return __set_cpus_allowed_ptr(p, new_mask, false);
25872092 }
25882093 EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
25892094
....@@ -2692,8 +2197,6 @@
26922197 * Clearly, migrating tasks to offline CPUs is a fairly daft thing.
26932198 */
26942199 WARN_ON_ONCE(!cpu_online(new_cpu));
2695
-
2696
- WARN_ON_ONCE(is_migration_disabled(p));
26972200 #endif
26982201
26992202 trace_sched_migrate_task(p, new_cpu);
....@@ -2827,18 +2330,6 @@
28272330 }
28282331 EXPORT_SYMBOL_GPL(migrate_swap);
28292332
2830
-static bool check_task_state(struct task_struct *p, long match_state)
2831
-{
2832
- bool match = false;
2833
-
2834
- raw_spin_lock_irq(&p->pi_lock);
2835
- if (p->state == match_state || p->saved_state == match_state)
2836
- match = true;
2837
- raw_spin_unlock_irq(&p->pi_lock);
2838
-
2839
- return match;
2840
-}
2841
-
28422333 /*
28432334 * wait_task_inactive - wait for a thread to unschedule.
28442335 *
....@@ -2883,7 +2374,7 @@
28832374 * is actually now running somewhere else!
28842375 */
28852376 while (task_running(rq, p)) {
2886
- if (match_state && !check_task_state(p, match_state))
2377
+ if (match_state && unlikely(p->state != match_state))
28872378 return 0;
28882379 cpu_relax();
28892380 }
....@@ -2898,8 +2389,7 @@
28982389 running = task_running(rq, p);
28992390 queued = task_on_rq_queued(p);
29002391 ncsw = 0;
2901
- if (!match_state || p->state == match_state ||
2902
- p->saved_state == match_state)
2392
+ if (!match_state || p->state == match_state)
29032393 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
29042394 task_rq_unlock(rq, p, &rf);
29052395
....@@ -2933,7 +2423,7 @@
29332423 ktime_t to = NSEC_PER_SEC / HZ;
29342424
29352425 set_current_state(TASK_UNINTERRUPTIBLE);
2936
- schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
2426
+ schedule_hrtimeout(&to, HRTIMER_MODE_REL);
29372427 continue;
29382428 }
29392429
....@@ -3040,12 +2530,6 @@
30402530 }
30412531 fallthrough;
30422532 case possible:
3043
- /*
3044
- * XXX When called from select_task_rq() we only
3045
- * hold p->pi_lock and again violate locking order.
3046
- *
3047
- * More yuck to audit.
3048
- */
30492533 do_set_cpus_allowed(p, task_cpu_possible_mask(p));
30502534 state = fail;
30512535 break;
....@@ -3079,7 +2563,7 @@
30792563 {
30802564 lockdep_assert_held(&p->pi_lock);
30812565
3082
- if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p))
2566
+ if (p->nr_cpus_allowed > 1)
30832567 cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
30842568 else
30852569 cpu = cpumask_any(p->cpus_ptr);
....@@ -3102,7 +2586,6 @@
31022586
31032587 void sched_set_stop_task(int cpu, struct task_struct *stop)
31042588 {
3105
- static struct lock_class_key stop_pi_lock;
31062589 struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
31072590 struct task_struct *old_stop = cpu_rq(cpu)->stop;
31082591
....@@ -3118,20 +2601,6 @@
31182601 sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);
31192602
31202603 stop->sched_class = &stop_sched_class;
3121
-
3122
- /*
3123
- * The PI code calls rt_mutex_setprio() with ->pi_lock held to
3124
- * adjust the effective priority of a task. As a result,
3125
- * rt_mutex_setprio() can trigger (RT) balancing operations,
3126
- * which can then trigger wakeups of the stop thread to push
3127
- * around the current task.
3128
- *
3129
- * The stop task itself will never be part of the PI-chain, it
3130
- * never blocks, therefore that ->pi_lock recursion is safe.
3131
- * Tell lockdep about this by placing the stop->pi_lock in its
3132
- * own class.
3133
- */
3134
- lockdep_set_class(&stop->pi_lock, &stop_pi_lock);
31352604 }
31362605
31372606 cpu_rq(cpu)->stop = stop;
....@@ -3145,23 +2614,15 @@
31452614 }
31462615 }
31472616
3148
-#else /* CONFIG_SMP */
2617
+#else
31492618
31502619 static inline int __set_cpus_allowed_ptr(struct task_struct *p,
3151
- const struct cpumask *new_mask,
3152
- u32 flags)
2620
+ const struct cpumask *new_mask, bool check)
31532621 {
31542622 return set_cpus_allowed_ptr(p, new_mask);
31552623 }
31562624
3157
-static inline void migrate_disable_switch(struct rq *rq, struct task_struct *p) { }
3158
-
3159
-static inline bool rq_has_pinned_tasks(struct rq *rq)
3160
-{
3161
- return false;
3162
-}
3163
-
3164
-#endif /* !CONFIG_SMP */
2625
+#endif /* CONFIG_SMP */
31652626
31662627 static void
31672628 ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
....@@ -3595,7 +3056,7 @@
35953056 int cpu, success = 0;
35963057
35973058 preempt_disable();
3598
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) && p == current) {
3059
+ if (p == current) {
35993060 /*
36003061 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
36013062 * == smp_processor_id()'. Together this means we can special
....@@ -3625,26 +3086,8 @@
36253086 */
36263087 raw_spin_lock_irqsave(&p->pi_lock, flags);
36273088 smp_mb__after_spinlock();
3628
- if (!(p->state & state)) {
3629
- /*
3630
- * The task might be running due to a spinlock sleeper
3631
- * wakeup. Check the saved state and set it to running
3632
- * if the wakeup condition is true.
3633
- */
3634
- if (!(wake_flags & WF_LOCK_SLEEPER)) {
3635
- if (p->saved_state & state) {
3636
- p->saved_state = TASK_RUNNING;
3637
- success = 1;
3638
- }
3639
- }
3089
+ if (!(p->state & state))
36403090 goto unlock;
3641
- }
3642
- /*
3643
- * If this is a regular wakeup, then we can unconditionally
3644
- * clear the saved state of a "lock sleeper".
3645
- */
3646
- if (!(wake_flags & WF_LOCK_SLEEPER))
3647
- p->saved_state = TASK_RUNNING;
36483091
36493092 #ifdef CONFIG_FREEZER
36503093 /*
....@@ -3853,18 +3296,6 @@
38533296 }
38543297 EXPORT_SYMBOL(wake_up_process);
38553298
3856
-/**
3857
- * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
3858
- * @p: The process to be woken up.
3859
- *
3860
- * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
3861
- * the nature of the wakeup.
3862
- */
3863
-int wake_up_lock_sleeper(struct task_struct *p)
3864
-{
3865
- return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER);
3866
-}
3867
-
38683299 int wake_up_state(struct task_struct *p, unsigned int state)
38693300 {
38703301 return try_to_wake_up(p, state, 0);
....@@ -3920,7 +3351,6 @@
39203351 init_numa_balancing(clone_flags, p);
39213352 #ifdef CONFIG_SMP
39223353 p->wake_entry.u_flags = CSD_TYPE_TTWU;
3923
- p->migration_pending = NULL;
39243354 #endif
39253355 }
39263356
....@@ -4099,9 +3529,6 @@
40993529 p->on_cpu = 0;
41003530 #endif
41013531 init_task_preempt_count(p);
4102
-#ifdef CONFIG_HAVE_PREEMPT_LAZY
4103
- task_thread_info(p)->preempt_lazy_count = 0;
4104
-#endif
41053532 #ifdef CONFIG_SMP
41063533 plist_node_init(&p->pushable_tasks, MAX_PRIO);
41073534 RB_CLEAR_NODE(&p->pushable_dl_tasks);
....@@ -4329,90 +3756,6 @@
43293756 #endif
43303757 }
43313758
4332
-#ifdef CONFIG_SMP
4333
-
4334
-static void do_balance_callbacks(struct rq *rq, struct callback_head *head)
4335
-{
4336
- void (*func)(struct rq *rq);
4337
- struct callback_head *next;
4338
-
4339
- lockdep_assert_held(&rq->lock);
4340
-
4341
- while (head) {
4342
- func = (void (*)(struct rq *))head->func;
4343
- next = head->next;
4344
- head->next = NULL;
4345
- head = next;
4346
-
4347
- func(rq);
4348
- }
4349
-}
4350
-
4351
-static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
4352
-{
4353
- struct callback_head *head = rq->balance_callback;
4354
-
4355
- lockdep_assert_held(&rq->lock);
4356
- if (head) {
4357
- rq->balance_callback = NULL;
4358
- rq->balance_flags &= ~BALANCE_WORK;
4359
- }
4360
-
4361
- return head;
4362
-}
4363
-
4364
-static void __balance_callbacks(struct rq *rq)
4365
-{
4366
- do_balance_callbacks(rq, splice_balance_callbacks(rq));
4367
-}
4368
-
4369
-static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
4370
-{
4371
- unsigned long flags;
4372
-
4373
- if (unlikely(head)) {
4374
- raw_spin_lock_irqsave(&rq->lock, flags);
4375
- do_balance_callbacks(rq, head);
4376
- raw_spin_unlock_irqrestore(&rq->lock, flags);
4377
- }
4378
-}
4379
-
4380
-static void balance_push(struct rq *rq);
4381
-
4382
-static inline void balance_switch(struct rq *rq)
4383
-{
4384
- if (likely(!rq->balance_flags))
4385
- return;
4386
-
4387
- if (rq->balance_flags & BALANCE_PUSH) {
4388
- balance_push(rq);
4389
- return;
4390
- }
4391
-
4392
- __balance_callbacks(rq);
4393
-}
4394
-
4395
-#else
4396
-
4397
-static inline void __balance_callbacks(struct rq *rq)
4398
-{
4399
-}
4400
-
4401
-static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
4402
-{
4403
- return NULL;
4404
-}
4405
-
4406
-static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
4407
-{
4408
-}
4409
-
4410
-static inline void balance_switch(struct rq *rq)
4411
-{
4412
-}
4413
-
4414
-#endif
4415
-
44163759 static inline void
44173760 prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
44183761 {
....@@ -4438,7 +3781,6 @@
44383781 * prev into current:
44393782 */
44403783 spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
4441
- balance_switch(rq);
44423784 raw_spin_unlock_irq(&rq->lock);
44433785 }
44443786
....@@ -4453,22 +3795,6 @@
44533795 #ifndef finish_arch_post_lock_switch
44543796 # define finish_arch_post_lock_switch() do { } while (0)
44553797 #endif
4456
-
4457
-static inline void kmap_local_sched_out(void)
4458
-{
4459
-#ifdef CONFIG_KMAP_LOCAL
4460
- if (unlikely(current->kmap_ctrl.idx))
4461
- __kmap_local_sched_out();
4462
-#endif
4463
-}
4464
-
4465
-static inline void kmap_local_sched_in(void)
4466
-{
4467
-#ifdef CONFIG_KMAP_LOCAL
4468
- if (unlikely(current->kmap_ctrl.idx))
4469
- __kmap_local_sched_in();
4470
-#endif
4471
-}
44723798
44733799 /**
44743800 * prepare_task_switch - prepare to switch tasks
....@@ -4492,7 +3818,6 @@
44923818 perf_event_task_sched_out(prev, next);
44933819 rseq_preempt(prev);
44943820 fire_sched_out_preempt_notifiers(prev, next);
4495
- kmap_local_sched_out();
44963821 prepare_task(next);
44973822 prepare_arch_switch(next);
44983823 }
....@@ -4559,7 +3884,6 @@
45593884 finish_lock_switch(rq);
45603885 finish_arch_post_lock_switch();
45613886 kcov_finish_switch(current);
4562
- kmap_local_sched_in();
45633887
45643888 fire_sched_in_preempt_notifiers(current);
45653889 /*
....@@ -4574,17 +3898,23 @@
45743898 * provided by mmdrop(),
45753899 * - a sync_core for SYNC_CORE.
45763900 */
4577
- /*
4578
- * We use mmdrop_delayed() here so we don't have to do the
4579
- * full __mmdrop() when we are the last user.
4580
- */
45813901 if (mm) {
45823902 membarrier_mm_sync_core_before_usermode(mm);
4583
- mmdrop_delayed(mm);
3903
+ mmdrop(mm);
45843904 }
45853905 if (unlikely(prev_state == TASK_DEAD)) {
45863906 if (prev->sched_class->task_dead)
45873907 prev->sched_class->task_dead(prev);
3908
+
3909
+ /*
3910
+ * Remove function-return probe instances associated with this
3911
+ * task and put them back on the free list.
3912
+ */
3913
+ kprobe_flush_task(prev);
3914
+ trace_android_rvh_flush_task(prev);
3915
+
3916
+ /* Task is done with its stack. */
3917
+ put_task_stack(prev);
45883918
45893919 put_task_struct_rcu_user(prev);
45903920 }
....@@ -4592,6 +3922,43 @@
45923922 tick_nohz_task_switch();
45933923 return rq;
45943924 }
3925
+
3926
+#ifdef CONFIG_SMP
3927
+
3928
+/* rq->lock is NOT held, but preemption is disabled */
3929
+static void __balance_callback(struct rq *rq)
3930
+{
3931
+ struct callback_head *head, *next;
3932
+ void (*func)(struct rq *rq);
3933
+ unsigned long flags;
3934
+
3935
+ raw_spin_lock_irqsave(&rq->lock, flags);
3936
+ head = rq->balance_callback;
3937
+ rq->balance_callback = NULL;
3938
+ while (head) {
3939
+ func = (void (*)(struct rq *))head->func;
3940
+ next = head->next;
3941
+ head->next = NULL;
3942
+ head = next;
3943
+
3944
+ func(rq);
3945
+ }
3946
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
3947
+}
3948
+
3949
+static inline void balance_callback(struct rq *rq)
3950
+{
3951
+ if (unlikely(rq->balance_callback))
3952
+ __balance_callback(rq);
3953
+}
3954
+
3955
+#else
3956
+
3957
+static inline void balance_callback(struct rq *rq)
3958
+{
3959
+}
3960
+
3961
+#endif
45953962
45963963 /**
45973964 * schedule_tail - first thing a freshly forked thread must call.
....@@ -4612,6 +3979,7 @@
46123979 */
46133980
46143981 rq = finish_task_switch(prev);
3982
+ balance_callback(rq);
46153983 preempt_enable();
46163984
46173985 if (current->set_child_tid)
....@@ -5170,8 +4538,7 @@
51704538 pr_err("Preemption disabled at:");
51714539 print_ip_sym(KERN_ERR, preempt_disable_ip);
51724540 }
5173
- if (panic_on_warn)
5174
- panic("scheduling while atomic\n");
4541
+ check_panic_on_warn("scheduling while atomic");
51754542
51764543 trace_android_rvh_schedule_bug(prev);
51774544
....@@ -5317,7 +4684,7 @@
53174684 *
53184685 * WARNING: must be called with preemption disabled!
53194686 */
5320
-static void __sched notrace __schedule(bool preempt, bool spinning_lock)
4687
+static void __sched notrace __schedule(bool preempt)
53214688 {
53224689 struct task_struct *prev, *next;
53234690 unsigned long *switch_count;
....@@ -5370,7 +4737,7 @@
53704737 * - ptrace_{,un}freeze_traced() can change ->state underneath us.
53714738 */
53724739 prev_state = prev->state;
5373
- if ((!preempt || spinning_lock) && prev_state) {
4740
+ if (!preempt && prev_state) {
53744741 if (signal_pending_state(prev_state, prev)) {
53754742 prev->state = TASK_RUNNING;
53764743 } else {
....@@ -5405,7 +4772,6 @@
54054772
54064773 next = pick_next_task(rq, prev, &rf);
54074774 clear_tsk_need_resched(prev);
5408
- clear_tsk_need_resched_lazy(prev);
54094775 clear_preempt_need_resched();
54104776
54114777 trace_android_rvh_schedule(prev, next, rq);
....@@ -5432,7 +4798,6 @@
54324798 */
54334799 ++*switch_count;
54344800
5435
- migrate_disable_switch(rq, prev);
54364801 psi_sched_switch(prev, next, !task_on_rq_queued(prev));
54374802
54384803 trace_sched_switch(preempt, prev, next);
....@@ -5441,11 +4806,10 @@
54414806 rq = context_switch(rq, prev, next, &rf);
54424807 } else {
54434808 rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
5444
-
5445
- rq_unpin_lock(rq, &rf);
5446
- __balance_callbacks(rq);
5447
- raw_spin_unlock_irq(&rq->lock);
4809
+ rq_unlock_irq(rq, &rf);
54484810 }
4811
+
4812
+ balance_callback(rq);
54494813 }
54504814
54514815 void __noreturn do_task_dead(void)
....@@ -5456,7 +4820,7 @@
54564820 /* Tell freezer to ignore us: */
54574821 current->flags |= PF_NOFREEZE;
54584822
5459
- __schedule(false, false);
4823
+ __schedule(false);
54604824 BUG();
54614825
54624826 /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
....@@ -5489,6 +4853,9 @@
54894853 preempt_enable_no_resched();
54904854 }
54914855
4856
+ if (tsk_is_pi_blocked(tsk))
4857
+ return;
4858
+
54924859 /*
54934860 * If we are going to sleep and we have plugged IO queued,
54944861 * make sure to submit it to avoid deadlocks.
....@@ -5514,7 +4881,7 @@
55144881 sched_submit_work(tsk);
55154882 do {
55164883 preempt_disable();
5517
- __schedule(false, false);
4884
+ __schedule(false);
55184885 sched_preempt_enable_no_resched();
55194886 } while (need_resched());
55204887 sched_update_worker(tsk);
....@@ -5542,7 +4909,7 @@
55424909 */
55434910 WARN_ON_ONCE(current->state);
55444911 do {
5545
- __schedule(false, false);
4912
+ __schedule(false);
55464913 } while (need_resched());
55474914 }
55484915
....@@ -5595,7 +4962,7 @@
55954962 */
55964963 preempt_disable_notrace();
55974964 preempt_latency_start(1);
5598
- __schedule(true, false);
4965
+ __schedule(true);
55994966 preempt_latency_stop(1);
56004967 preempt_enable_no_resched_notrace();
56014968
....@@ -5605,30 +4972,6 @@
56054972 */
56064973 } while (need_resched());
56074974 }
5608
-
5609
-#ifdef CONFIG_PREEMPT_LAZY
5610
-/*
5611
- * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
5612
- * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
5613
- * preempt_lazy_count counter >0.
5614
- */
5615
-static __always_inline int preemptible_lazy(void)
5616
-{
5617
- if (test_thread_flag(TIF_NEED_RESCHED))
5618
- return 1;
5619
- if (current_thread_info()->preempt_lazy_count)
5620
- return 0;
5621
- return 1;
5622
-}
5623
-
5624
-#else
5625
-
5626
-static inline int preemptible_lazy(void)
5627
-{
5628
- return 1;
5629
-}
5630
-
5631
-#endif
56324975
56334976 #ifdef CONFIG_PREEMPTION
56344977 /*
....@@ -5643,25 +4986,11 @@
56434986 */
56444987 if (likely(!preemptible()))
56454988 return;
5646
- if (!preemptible_lazy())
5647
- return;
4989
+
56484990 preempt_schedule_common();
56494991 }
56504992 NOKPROBE_SYMBOL(preempt_schedule);
56514993 EXPORT_SYMBOL(preempt_schedule);
5652
-
5653
-#ifdef CONFIG_PREEMPT_RT
5654
-void __sched notrace preempt_schedule_lock(void)
5655
-{
5656
- do {
5657
- preempt_disable();
5658
- __schedule(true, true);
5659
- sched_preempt_enable_no_resched();
5660
- } while (need_resched());
5661
-}
5662
-NOKPROBE_SYMBOL(preempt_schedule_lock);
5663
-EXPORT_SYMBOL(preempt_schedule_lock);
5664
-#endif
56654994
56664995 /**
56674996 * preempt_schedule_notrace - preempt_schedule called by tracing
....@@ -5682,9 +5011,6 @@
56825011 enum ctx_state prev_ctx;
56835012
56845013 if (likely(!preemptible()))
5685
- return;
5686
-
5687
- if (!preemptible_lazy())
56885014 return;
56895015
56905016 do {
....@@ -5709,7 +5035,7 @@
57095035 * an infinite recursion.
57105036 */
57115037 prev_ctx = exception_enter();
5712
- __schedule(true, false);
5038
+ __schedule(true);
57135039 exception_exit(prev_ctx);
57145040
57155041 preempt_latency_stop(1);
....@@ -5738,7 +5064,7 @@
57385064 do {
57395065 preempt_disable();
57405066 local_irq_enable();
5741
- __schedule(true, false);
5067
+ __schedule(true);
57425068 local_irq_disable();
57435069 sched_preempt_enable_no_resched();
57445070 } while (need_resched());
....@@ -5905,11 +5231,9 @@
59055231 out_unlock:
59065232 /* Avoid rq from going away on us: */
59075233 preempt_disable();
5234
+ __task_rq_unlock(rq, &rf);
59085235
5909
- rq_unpin_lock(rq, &rf);
5910
- __balance_callbacks(rq);
5911
- raw_spin_unlock(&rq->lock);
5912
-
5236
+ balance_callback(rq);
59135237 preempt_enable();
59145238 }
59155239 #else
....@@ -6154,11 +5478,11 @@
61545478 int oldpolicy = -1, policy = attr->sched_policy;
61555479 int retval, oldprio, newprio, queued, running;
61565480 const struct sched_class *prev_class;
6157
- struct callback_head *head;
61585481 struct rq_flags rf;
61595482 int reset_on_fork;
61605483 int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
61615484 struct rq *rq;
5485
+ bool cpuset_locked = false;
61625486
61635487 /* The pi code expects interrupts enabled */
61645488 BUG_ON(pi && in_interrupt());
....@@ -6261,6 +5585,15 @@
62615585 }
62625586
62635587 /*
5588
+ * SCHED_DEADLINE bandwidth accounting relies on stable cpusets
5589
+ * information.
5590
+ */
5591
+ if (dl_policy(policy) || dl_policy(p->policy)) {
5592
+ cpuset_locked = true;
5593
+ cpuset_lock();
5594
+ }
5595
+
5596
+ /*
62645597 * Make sure no PI-waiters arrive (or leave) while we are
62655598 * changing the priority of the task:
62665599 *
....@@ -6334,6 +5667,8 @@
63345667 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
63355668 policy = oldpolicy = -1;
63365669 task_rq_unlock(rq, p, &rf);
5670
+ if (cpuset_locked)
5671
+ cpuset_unlock();
63375672 goto recheck;
63385673 }
63395674
....@@ -6397,20 +5732,24 @@
63975732
63985733 /* Avoid rq from going away on us: */
63995734 preempt_disable();
6400
- head = splice_balance_callbacks(rq);
64015735 task_rq_unlock(rq, p, &rf);
64025736
6403
- if (pi)
5737
+ if (pi) {
5738
+ if (cpuset_locked)
5739
+ cpuset_unlock();
64045740 rt_mutex_adjust_pi(p);
5741
+ }
64055742
64065743 /* Run balance callbacks after we've adjusted the PI chain: */
6407
- balance_callbacks(rq, head);
5744
+ balance_callback(rq);
64085745 preempt_enable();
64095746
64105747 return 0;
64115748
64125749 unlock:
64135750 task_rq_unlock(rq, p, &rf);
5751
+ if (cpuset_locked)
5752
+ cpuset_unlock();
64145753 return retval;
64155754 }
64165755
....@@ -6916,7 +6255,7 @@
69166255 }
69176256 #endif
69186257 again:
6919
- retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK);
6258
+ retval = __set_cpus_allowed_ptr(p, new_mask, true);
69206259
69216260 if (!retval) {
69226261 cpuset_cpus_allowed(p, cpus_allowed);
....@@ -7024,14 +6363,14 @@
70246363 if (len & (sizeof(unsigned long)-1))
70256364 return -EINVAL;
70266365
7027
- if (!alloc_cpumask_var(&mask, GFP_KERNEL))
6366
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
70286367 return -ENOMEM;
70296368
70306369 ret = sched_getaffinity(pid, mask);
70316370 if (ret == 0) {
70326371 unsigned int retlen = min(len, cpumask_size());
70336372
7034
- if (copy_to_user(user_mask_ptr, mask, retlen))
6373
+ if (copy_to_user(user_mask_ptr, cpumask_bits(mask), retlen))
70356374 ret = -EFAULT;
70366375 else
70376376 ret = retlen;
....@@ -7498,7 +6837,7 @@
74986837 *
74996838 * And since this is boot we can forgo the serialization.
75006839 */
7501
- set_cpus_allowed_common(idle, cpumask_of(cpu), 0);
6840
+ set_cpus_allowed_common(idle, cpumask_of(cpu));
75026841 #endif
75036842 /*
75046843 * We're having a chicken and egg problem, even though we are
....@@ -7525,9 +6864,7 @@
75256864
75266865 /* Set the preempt count _outside_ the spinlocks! */
75276866 init_idle_preempt_count(idle, cpu);
7528
-#ifdef CONFIG_HAVE_PREEMPT_LAZY
7529
- task_thread_info(idle)->preempt_lazy_count = 0;
7530
-#endif
6867
+
75316868 /*
75326869 * The idle tasks have their own, simple scheduling class:
75336870 */
....@@ -7554,8 +6891,7 @@
75546891 return ret;
75556892 }
75566893
7557
-int task_can_attach(struct task_struct *p,
7558
- const struct cpumask *cs_effective_cpus)
6894
+int task_can_attach(struct task_struct *p)
75596895 {
75606896 int ret = 0;
75616897
....@@ -7568,21 +6904,9 @@
75686904 * success of set_cpus_allowed_ptr() on all attached tasks
75696905 * before cpus_mask may be changed.
75706906 */
7571
- if (p->flags & PF_NO_SETAFFINITY) {
6907
+ if (p->flags & PF_NO_SETAFFINITY)
75726908 ret = -EINVAL;
7573
- goto out;
7574
- }
75756909
7576
- if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
7577
- cs_effective_cpus)) {
7578
- int cpu = cpumask_any_and(cpu_active_mask, cs_effective_cpus);
7579
-
7580
- if (unlikely(cpu >= nr_cpu_ids))
7581
- return -EINVAL;
7582
- ret = dl_cpu_busy(cpu, p);
7583
- }
7584
-
7585
-out:
75866910 return ret;
75876911 }
75886912
....@@ -7637,7 +6961,6 @@
76376961 #endif /* CONFIG_NUMA_BALANCING */
76386962
76396963 #ifdef CONFIG_HOTPLUG_CPU
7640
-
76416964 /*
76426965 * Ensure that the idle task is using init_mm right before its CPU goes
76436966 * offline.
....@@ -7657,124 +6980,166 @@
76576980 /* finish_cpu(), as ran on the BP, will clean up the active_mm state */
76586981 }
76596982
7660
-static int __balance_push_cpu_stop(void *arg)
6983
+/*
6984
+ * Since this CPU is going 'away' for a while, fold any nr_active delta
6985
+ * we might have. Assumes we're called after migrate_tasks() so that the
6986
+ * nr_active count is stable. We need to take the teardown thread which
6987
+ * is calling this into account, so we hand in adjust = 1 to the load
6988
+ * calculation.
6989
+ *
6990
+ * Also see the comment "Global load-average calculations".
6991
+ */
6992
+static void calc_load_migrate(struct rq *rq)
76616993 {
7662
- struct task_struct *p = arg;
7663
- struct rq *rq = this_rq();
7664
- struct rq_flags rf;
7665
- int cpu;
6994
+ long delta = calc_load_fold_active(rq, 1);
6995
+ if (delta)
6996
+ atomic_long_add(delta, &calc_load_tasks);
6997
+}
76666998
7667
- raw_spin_lock_irq(&p->pi_lock);
7668
- rq_lock(rq, &rf);
6999
+static struct task_struct *__pick_migrate_task(struct rq *rq)
7000
+{
7001
+ const struct sched_class *class;
7002
+ struct task_struct *next;
76697003
7004
+ for_each_class(class) {
7005
+ next = class->pick_next_task(rq);
7006
+ if (next) {
7007
+ next->sched_class->put_prev_task(rq, next);
7008
+ return next;
7009
+ }
7010
+ }
7011
+
7012
+ /* The idle class should always have a runnable task */
7013
+ BUG();
7014
+}
7015
+
7016
+/*
7017
+ * Migrate all tasks from the rq, sleeping tasks will be migrated by
7018
+ * try_to_wake_up()->select_task_rq().
7019
+ *
7020
+ * Called with rq->lock held even though we'er in stop_machine() and
7021
+ * there's no concurrency possible, we hold the required locks anyway
7022
+ * because of lock validation efforts.
7023
+ *
7024
+ * force: if false, the function will skip CPU pinned kthreads.
7025
+ */
7026
+static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf, bool force)
7027
+{
7028
+ struct rq *rq = dead_rq;
7029
+ struct task_struct *next, *tmp, *stop = rq->stop;
7030
+ LIST_HEAD(percpu_kthreads);
7031
+ struct rq_flags orf = *rf;
7032
+ int dest_cpu;
7033
+
7034
+ /*
7035
+ * Fudge the rq selection such that the below task selection loop
7036
+ * doesn't get stuck on the currently eligible stop task.
7037
+ *
7038
+ * We're currently inside stop_machine() and the rq is either stuck
7039
+ * in the stop_machine_cpu_stop() loop, or we're executing this code,
7040
+ * either way we should never end up calling schedule() until we're
7041
+ * done here.
7042
+ */
7043
+ rq->stop = NULL;
7044
+
7045
+ /*
7046
+ * put_prev_task() and pick_next_task() sched
7047
+ * class method both need to have an up-to-date
7048
+ * value of rq->clock[_task]
7049
+ */
76707050 update_rq_clock(rq);
76717051
7672
- if (task_rq(p) == rq && task_on_rq_queued(p)) {
7673
- cpu = select_fallback_rq(rq->cpu, p);
7674
- rq = __migrate_task(rq, &rf, p, cpu);
7675
- }
7052
+#ifdef CONFIG_SCHED_DEBUG
7053
+ /* note the clock update in orf */
7054
+ orf.clock_update_flags |= RQCF_UPDATED;
7055
+#endif
76767056
7677
- rq_unlock(rq, &rf);
7678
- raw_spin_unlock_irq(&p->pi_lock);
7679
-
7680
- put_task_struct(p);
7681
-
7682
- return 0;
7683
-}
7684
-
7685
-static DEFINE_PER_CPU(struct cpu_stop_work, push_work);
7686
-
7687
-/*
7688
- * Ensure we only run per-cpu kthreads once the CPU goes !active.
7689
- */
7690
-
7691
-
7692
-static void balance_push(struct rq *rq)
7693
-{
7694
- struct task_struct *push_task = rq->curr;
7695
-
7696
- lockdep_assert_held(&rq->lock);
7697
- SCHED_WARN_ON(rq->cpu != smp_processor_id());
7698
-
7699
- /*
7700
- * Both the cpu-hotplug and stop task are in this case and are
7701
- * required to complete the hotplug process.
7702
- */
7703
- if (is_per_cpu_kthread(push_task) || is_migration_disabled(push_task)) {
7057
+ for (;;) {
77047058 /*
7705
- * If this is the idle task on the outgoing CPU try to wake
7706
- * up the hotplug control thread which might wait for the
7707
- * last task to vanish. The rcuwait_active() check is
7708
- * accurate here because the waiter is pinned on this CPU
7709
- * and can't obviously be running in parallel.
7710
- *
7711
- * On RT kernels this also has to check whether there are
7712
- * pinned and scheduled out tasks on the runqueue. They
7713
- * need to leave the migrate disabled section first.
7059
+ * There's this thread running, bail when that's the only
7060
+ * remaining thread:
77147061 */
7715
- if (!rq->nr_running && !rq_has_pinned_tasks(rq) &&
7716
- rcuwait_active(&rq->hotplug_wait)) {
7717
- raw_spin_unlock(&rq->lock);
7718
- rcuwait_wake_up(&rq->hotplug_wait);
7719
- raw_spin_lock(&rq->lock);
7062
+ if (rq->nr_running == 1)
7063
+ break;
7064
+
7065
+ next = __pick_migrate_task(rq);
7066
+
7067
+ /*
7068
+ * Argh ... no iterator for tasks, we need to remove the
7069
+ * kthread from the run-queue to continue.
7070
+ */
7071
+ if (!force && is_per_cpu_kthread(next)) {
7072
+ INIT_LIST_HEAD(&next->percpu_kthread_node);
7073
+ list_add(&next->percpu_kthread_node, &percpu_kthreads);
7074
+
7075
+ /* DEQUEUE_SAVE not used due to move_entity in rt */
7076
+ deactivate_task(rq, next,
7077
+ DEQUEUE_NOCLOCK);
7078
+ continue;
77207079 }
7721
- return;
7080
+
7081
+ /*
7082
+ * Rules for changing task_struct::cpus_mask are holding
7083
+ * both pi_lock and rq->lock, such that holding either
7084
+ * stabilizes the mask.
7085
+ *
7086
+ * Drop rq->lock is not quite as disastrous as it usually is
7087
+ * because !cpu_active at this point, which means load-balance
7088
+ * will not interfere. Also, stop-machine.
7089
+ */
7090
+ rq_unlock(rq, rf);
7091
+ raw_spin_lock(&next->pi_lock);
7092
+ rq_relock(rq, rf);
7093
+
7094
+ /*
7095
+ * Since we're inside stop-machine, _nothing_ should have
7096
+ * changed the task, WARN if weird stuff happened, because in
7097
+ * that case the above rq->lock drop is a fail too.
7098
+ */
7099
+ if (task_rq(next) != rq || !task_on_rq_queued(next)) {
7100
+ /*
7101
+ * In the !force case, there is a hole between
7102
+ * rq_unlock() and rq_relock(), where another CPU might
7103
+ * not observe an up to date cpu_active_mask and try to
7104
+ * move tasks around.
7105
+ */
7106
+ WARN_ON(force);
7107
+ raw_spin_unlock(&next->pi_lock);
7108
+ continue;
7109
+ }
7110
+
7111
+ /* Find suitable destination for @next, with force if needed. */
7112
+ dest_cpu = select_fallback_rq(dead_rq->cpu, next);
7113
+ rq = __migrate_task(rq, rf, next, dest_cpu);
7114
+ if (rq != dead_rq) {
7115
+ rq_unlock(rq, rf);
7116
+ rq = dead_rq;
7117
+ *rf = orf;
7118
+ rq_relock(rq, rf);
7119
+ }
7120
+ raw_spin_unlock(&next->pi_lock);
77227121 }
77237122
7724
- get_task_struct(push_task);
7725
- /*
7726
- * Temporarily drop rq->lock such that we can wake-up the stop task.
7727
- * Both preemption and IRQs are still disabled.
7728
- */
7729
- raw_spin_unlock(&rq->lock);
7730
- stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task,
7731
- this_cpu_ptr(&push_work));
7732
- /*
7733
- * At this point need_resched() is true and we'll take the loop in
7734
- * schedule(). The next pick is obviously going to be the stop task
7735
- * which is_per_cpu_kthread() and will push this task away.
7736
- */
7737
- raw_spin_lock(&rq->lock);
7738
-}
7123
+ list_for_each_entry_safe(next, tmp, &percpu_kthreads,
7124
+ percpu_kthread_node) {
77397125
7740
-static void balance_push_set(int cpu, bool on)
7741
-{
7742
- struct rq *rq = cpu_rq(cpu);
7743
- struct rq_flags rf;
7126
+ /* ENQUEUE_RESTORE not used due to move_entity in rt */
7127
+ activate_task(rq, next, ENQUEUE_NOCLOCK);
7128
+ list_del(&next->percpu_kthread_node);
7129
+ }
77447130
7745
- rq_lock_irqsave(rq, &rf);
7746
- if (on)
7747
- rq->balance_flags |= BALANCE_PUSH;
7748
- else
7749
- rq->balance_flags &= ~BALANCE_PUSH;
7750
- rq_unlock_irqrestore(rq, &rf);
7751
-}
7752
-
7753
-/*
7754
- * Invoked from a CPUs hotplug control thread after the CPU has been marked
7755
- * inactive. All tasks which are not per CPU kernel threads are either
7756
- * pushed off this CPU now via balance_push() or placed on a different CPU
7757
- * during wakeup. Wait until the CPU is quiescent.
7758
- */
7759
-static void balance_hotplug_wait(void)
7760
-{
7761
- struct rq *rq = this_rq();
7762
-
7763
- rcuwait_wait_event(&rq->hotplug_wait,
7764
- rq->nr_running == 1 && !rq_has_pinned_tasks(rq),
7765
- TASK_UNINTERRUPTIBLE);
7131
+ rq->stop = stop;
77667132 }
77677133
77687134 static int drain_rq_cpu_stop(void *data)
77697135 {
7770
-#ifndef CONFIG_PREEMPT_RT
77717136 struct rq *rq = this_rq();
77727137 struct rq_flags rf;
77737138
77747139 rq_lock_irqsave(rq, &rf);
77757140 migrate_tasks(rq, &rf, false);
77767141 rq_unlock_irqrestore(rq, &rf);
7777
-#endif
7142
+
77787143 return 0;
77797144 }
77807145
....@@ -7799,21 +7164,6 @@
77997164 if (rq_drain->done)
78007165 cpu_stop_work_wait(rq_drain);
78017166 }
7802
-
7803
-#else
7804
-
7805
-static inline void balance_push(struct rq *rq)
7806
-{
7807
-}
7808
-
7809
-static inline void balance_push_set(int cpu, bool on)
7810
-{
7811
-}
7812
-
7813
-static inline void balance_hotplug_wait(void)
7814
-{
7815
-}
7816
-
78177167 #endif /* CONFIG_HOTPLUG_CPU */
78187168
78197169 void set_rq_online(struct rq *rq)
....@@ -7884,7 +7234,7 @@
78847234 static int cpuset_cpu_inactive(unsigned int cpu)
78857235 {
78867236 if (!cpuhp_tasks_frozen) {
7887
- int ret = dl_cpu_busy(cpu, NULL);
7237
+ int ret = dl_bw_check_overflow(cpu);
78887238
78897239 if (ret)
78907240 return ret;
....@@ -7900,8 +7250,6 @@
79007250 {
79017251 struct rq *rq = cpu_rq(cpu);
79027252 struct rq_flags rf;
7903
-
7904
- balance_push_set(cpu, false);
79057253
79067254 #ifdef CONFIG_SCHED_SMT
79077255 /*
....@@ -7956,21 +7304,9 @@
79567304
79577305 int _sched_cpu_deactivate(unsigned int cpu)
79587306 {
7959
- struct rq *rq = cpu_rq(cpu);
7960
- struct rq_flags rf;
79617307 int ret;
79627308
79637309 set_cpu_active(cpu, false);
7964
-
7965
- balance_push_set(cpu, true);
7966
-
7967
- rq_lock_irqsave(rq, &rf);
7968
- if (rq->rd) {
7969
- update_rq_clock(rq);
7970
- BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
7971
- set_rq_offline(rq);
7972
- }
7973
- rq_unlock_irqrestore(rq, &rf);
79747310
79757311 #ifdef CONFIG_SCHED_SMT
79767312 /*
....@@ -7985,7 +7321,6 @@
79857321
79867322 ret = cpuset_cpu_inactive(cpu);
79877323 if (ret) {
7988
- balance_push_set(cpu, false);
79897324 set_cpu_active(cpu, true);
79907325 return ret;
79917326 }
....@@ -8049,41 +7384,6 @@
80497384 }
80507385
80517386 #ifdef CONFIG_HOTPLUG_CPU
8052
-
8053
-/*
8054
- * Invoked immediately before the stopper thread is invoked to bring the
8055
- * CPU down completely. At this point all per CPU kthreads except the
8056
- * hotplug thread (current) and the stopper thread (inactive) have been
8057
- * either parked or have been unbound from the outgoing CPU. Ensure that
8058
- * any of those which might be on the way out are gone.
8059
- *
8060
- * If after this point a bound task is being woken on this CPU then the
8061
- * responsible hotplug callback has failed to do it's job.
8062
- * sched_cpu_dying() will catch it with the appropriate fireworks.
8063
- */
8064
-int sched_cpu_wait_empty(unsigned int cpu)
8065
-{
8066
- balance_hotplug_wait();
8067
- return 0;
8068
-}
8069
-
8070
-/*
8071
- * Since this CPU is going 'away' for a while, fold any nr_active delta we
8072
- * might have. Called from the CPU stopper task after ensuring that the
8073
- * stopper is the last running task on the CPU, so nr_active count is
8074
- * stable. We need to take the teardown thread which is calling this into
8075
- * account, so we hand in adjust = 1 to the load calculation.
8076
- *
8077
- * Also see the comment "Global load-average calculations".
8078
- */
8079
-static void calc_load_migrate(struct rq *rq)
8080
-{
8081
- long delta = calc_load_fold_active(rq, 1);
8082
-
8083
- if (delta)
8084
- atomic_long_add(delta, &calc_load_tasks);
8085
-}
8086
-
80877387 int sched_cpu_dying(unsigned int cpu)
80887388 {
80897389 struct rq *rq = cpu_rq(cpu);
....@@ -8093,7 +7393,12 @@
80937393 sched_tick_stop(cpu);
80947394
80957395 rq_lock_irqsave(rq, &rf);
8096
- BUG_ON(rq->nr_running != 1 || rq_has_pinned_tasks(rq));
7396
+ if (rq->rd) {
7397
+ BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
7398
+ set_rq_offline(rq);
7399
+ }
7400
+ migrate_tasks(rq, &rf, true);
7401
+ BUG_ON(rq->nr_running != 1);
80977402 rq_unlock_irqrestore(rq, &rf);
80987403
80997404 trace_android_rvh_sched_cpu_dying(cpu);
....@@ -8304,9 +7609,6 @@
83047609
83057610 rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func);
83067611 #endif
8307
-#ifdef CONFIG_HOTPLUG_CPU
8308
- rcuwait_init(&rq->hotplug_wait);
8309
-#endif
83107612 #endif /* CONFIG_SMP */
83117613 hrtick_rq_init(rq);
83127614 atomic_set(&rq->nr_iowait, 0);
....@@ -8347,7 +7649,7 @@
83477649 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
83487650 static inline int preempt_count_equals(int preempt_offset)
83497651 {
8350
- int nested = preempt_count() + sched_rcu_preempt_depth();
7652
+ int nested = preempt_count() + rcu_preempt_depth();
83517653
83527654 return (nested == preempt_offset);
83537655 }
....@@ -8447,39 +7749,6 @@
84477749 add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
84487750 }
84497751 EXPORT_SYMBOL_GPL(__cant_sleep);
8450
-
8451
-#ifdef CONFIG_SMP
8452
-void __cant_migrate(const char *file, int line)
8453
-{
8454
- static unsigned long prev_jiffy;
8455
-
8456
- if (irqs_disabled())
8457
- return;
8458
-
8459
- if (is_migration_disabled(current))
8460
- return;
8461
-
8462
- if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
8463
- return;
8464
-
8465
- if (preempt_count() > 0)
8466
- return;
8467
-
8468
- if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
8469
- return;
8470
- prev_jiffy = jiffies;
8471
-
8472
- pr_err("BUG: assuming non migratable context at %s:%d\n", file, line);
8473
- pr_err("in_atomic(): %d, irqs_disabled(): %d, migration_disabled() %u pid: %d, name: %s\n",
8474
- in_atomic(), irqs_disabled(), is_migration_disabled(current),
8475
- current->pid, current->comm);
8476
-
8477
- debug_show_held_locks(current);
8478
- dump_stack();
8479
- add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
8480
-}
8481
-EXPORT_SYMBOL_GPL(__cant_migrate);
8482
-#endif
84837752 #endif
84847753
84857754 #ifdef CONFIG_MAGIC_SYSRQ