hc
2023-12-11 6778948f9de86c3cfaf36725a7c87dcff9ba247f
kernel/kernel/sched/core.c
....@@ -78,11 +78,7 @@
7878 * Number of tasks to iterate in a single balance run.
7979 * Limited because this is done with IRQs disabled.
8080 */
81
-#ifdef CONFIG_PREEMPT_RT
82
-const_debug unsigned int sysctl_sched_nr_migrate = 8;
83
-#else
8481 const_debug unsigned int sysctl_sched_nr_migrate = 32;
85
-#endif
8682
8783 /*
8884 * period over which we measure -rt task CPU usage in us.
....@@ -531,15 +527,9 @@
531527 #endif
532528 #endif
533529
534
-static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task,
535
- bool sleeper)
530
+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
536531 {
537
- struct wake_q_node *node;
538
-
539
- if (sleeper)
540
- node = &task->wake_q_sleeper;
541
- else
542
- node = &task->wake_q;
532
+ struct wake_q_node *node = &task->wake_q;
543533
544534 /*
545535 * Atomically grab the task, if ->wake_q is !nil already it means
....@@ -576,13 +566,7 @@
576566 */
577567 void wake_q_add(struct wake_q_head *head, struct task_struct *task)
578568 {
579
- if (__wake_q_add(head, task, false))
580
- get_task_struct(task);
581
-}
582
-
583
-void wake_q_add_sleeper(struct wake_q_head *head, struct task_struct *task)
584
-{
585
- if (__wake_q_add(head, task, true))
569
+ if (__wake_q_add(head, task))
586570 get_task_struct(task);
587571 }
588572
....@@ -605,40 +589,29 @@
605589 */
606590 void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
607591 {
608
- if (!__wake_q_add(head, task, false))
592
+ if (!__wake_q_add(head, task))
609593 put_task_struct(task);
610594 }
611595
612
-void __wake_up_q(struct wake_q_head *head, bool sleeper)
596
+void wake_up_q(struct wake_q_head *head)
613597 {
614598 struct wake_q_node *node = head->first;
615599
616600 while (node != WAKE_Q_TAIL) {
617601 struct task_struct *task;
618602
619
- if (sleeper)
620
- task = container_of(node, struct task_struct, wake_q_sleeper);
621
- else
622
- task = container_of(node, struct task_struct, wake_q);
623
-
603
+ task = container_of(node, struct task_struct, wake_q);
624604 BUG_ON(!task);
625605 /* Task can safely be re-inserted now: */
626606 node = node->next;
607
+ task->wake_q.next = NULL;
627608 task->wake_q_count = head->count;
628
- if (sleeper)
629
- task->wake_q_sleeper.next = NULL;
630
- else
631
- task->wake_q.next = NULL;
632609
633610 /*
634611 * wake_up_process() executes a full barrier, which pairs with
635612 * the queueing in wake_q_add() so as not to miss wakeups.
636613 */
637
- if (sleeper)
638
- wake_up_lock_sleeper(task);
639
- else
640
- wake_up_process(task);
641
-
614
+ wake_up_process(task);
642615 task->wake_q_count = 0;
643616 put_task_struct(task);
644617 }
....@@ -675,48 +648,6 @@
675648 trace_sched_wake_idle_without_ipi(cpu);
676649 }
677650 EXPORT_SYMBOL_GPL(resched_curr);
678
-
679
-#ifdef CONFIG_PREEMPT_LAZY
680
-
681
-static int tsk_is_polling(struct task_struct *p)
682
-{
683
-#ifdef TIF_POLLING_NRFLAG
684
- return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
685
-#else
686
- return 0;
687
-#endif
688
-}
689
-
690
-void resched_curr_lazy(struct rq *rq)
691
-{
692
- struct task_struct *curr = rq->curr;
693
- int cpu;
694
-
695
- if (!sched_feat(PREEMPT_LAZY)) {
696
- resched_curr(rq);
697
- return;
698
- }
699
-
700
- lockdep_assert_held(&rq->lock);
701
-
702
- if (test_tsk_need_resched(curr))
703
- return;
704
-
705
- if (test_tsk_need_resched_lazy(curr))
706
- return;
707
-
708
- set_tsk_need_resched_lazy(curr);
709
-
710
- cpu = cpu_of(rq);
711
- if (cpu == smp_processor_id())
712
- return;
713
-
714
- /* NEED_RESCHED_LAZY must be visible before we test polling */
715
- smp_mb();
716
- if (!tsk_is_polling(curr))
717
- smp_send_reschedule(cpu);
718
-}
719
-#endif
720651
721652 void resched_cpu(int cpu)
722653 {
....@@ -1870,82 +1801,6 @@
18701801
18711802 #ifdef CONFIG_SMP
18721803
1873
-static void
1874
-__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags);
1875
-
1876
-static int __set_cpus_allowed_ptr(struct task_struct *p,
1877
- const struct cpumask *new_mask,
1878
- u32 flags);
1879
-
1880
-static void migrate_disable_switch(struct rq *rq, struct task_struct *p)
1881
-{
1882
- if (likely(!p->migration_disabled))
1883
- return;
1884
-
1885
- if (p->cpus_ptr != &p->cpus_mask)
1886
- return;
1887
-
1888
- /*
1889
- * Violates locking rules! see comment in __do_set_cpus_allowed().
1890
- */
1891
- __do_set_cpus_allowed(p, cpumask_of(rq->cpu), SCA_MIGRATE_DISABLE);
1892
-}
1893
-
1894
-void migrate_disable(void)
1895
-{
1896
- struct task_struct *p = current;
1897
-
1898
- if (p->migration_disabled) {
1899
- p->migration_disabled++;
1900
- return;
1901
- }
1902
-
1903
- trace_sched_migrate_disable_tp(p);
1904
-
1905
- preempt_disable();
1906
- this_rq()->nr_pinned++;
1907
- p->migration_disabled = 1;
1908
- preempt_lazy_disable();
1909
- preempt_enable();
1910
-}
1911
-EXPORT_SYMBOL_GPL(migrate_disable);
1912
-
1913
-void migrate_enable(void)
1914
-{
1915
- struct task_struct *p = current;
1916
-
1917
- if (p->migration_disabled > 1) {
1918
- p->migration_disabled--;
1919
- return;
1920
- }
1921
-
1922
- /*
1923
- * Ensure stop_task runs either before or after this, and that
1924
- * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
1925
- */
1926
- preempt_disable();
1927
- if (p->cpus_ptr != &p->cpus_mask)
1928
- __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE);
1929
- /*
1930
- * Mustn't clear migration_disabled() until cpus_ptr points back at the
1931
- * regular cpus_mask, otherwise things that race (eg.
1932
- * select_fallback_rq) get confused.
1933
- */
1934
- barrier();
1935
- p->migration_disabled = 0;
1936
- this_rq()->nr_pinned--;
1937
- preempt_lazy_enable();
1938
- preempt_enable();
1939
-
1940
- trace_sched_migrate_enable_tp(p);
1941
-}
1942
-EXPORT_SYMBOL_GPL(migrate_enable);
1943
-
1944
-static inline bool rq_has_pinned_tasks(struct rq *rq)
1945
-{
1946
- return rq->nr_pinned;
1947
-}
1948
-
19491804 /*
19501805 * Per-CPU kthreads are allowed to run on !active && online CPUs, see
19511806 * __set_cpus_allowed_ptr() and select_fallback_rq().
....@@ -1955,7 +1810,7 @@
19551810 if (!cpumask_test_cpu(cpu, p->cpus_ptr))
19561811 return false;
19571812
1958
- if (is_per_cpu_kthread(p) || is_migration_disabled(p))
1813
+ if (is_per_cpu_kthread(p))
19591814 return cpu_online(cpu);
19601815
19611816 if (!cpu_active(cpu))
....@@ -2015,21 +1870,8 @@
20151870 }
20161871
20171872 struct migration_arg {
2018
- struct task_struct *task;
2019
- int dest_cpu;
2020
- struct set_affinity_pending *pending;
2021
-};
2022
-
2023
-/*
2024
- * @refs: number of wait_for_completion()
2025
- * @stop_pending: is @stop_work in use
2026
- */
2027
-struct set_affinity_pending {
2028
- refcount_t refs;
2029
- unsigned int stop_pending;
2030
- struct completion done;
2031
- struct cpu_stop_work stop_work;
2032
- struct migration_arg arg;
1873
+ struct task_struct *task;
1874
+ int dest_cpu;
20331875 };
20341876
20351877 /*
....@@ -2062,17 +1904,15 @@
20621904 static int migration_cpu_stop(void *data)
20631905 {
20641906 struct migration_arg *arg = data;
2065
- struct set_affinity_pending *pending = arg->pending;
20661907 struct task_struct *p = arg->task;
20671908 struct rq *rq = this_rq();
2068
- bool complete = false;
20691909 struct rq_flags rf;
20701910
20711911 /*
20721912 * The original target CPU might have gone down and we might
20731913 * be on another CPU but it doesn't matter.
20741914 */
2075
- local_irq_save(rf.flags);
1915
+ local_irq_disable();
20761916 /*
20771917 * We need to explicitly wake pending tasks before running
20781918 * __migrate_task() such that we will not miss enforcing cpus_ptr
....@@ -2082,121 +1922,21 @@
20821922
20831923 raw_spin_lock(&p->pi_lock);
20841924 rq_lock(rq, &rf);
2085
-
20861925 /*
20871926 * If task_rq(p) != rq, it cannot be migrated here, because we're
20881927 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
20891928 * we're holding p->pi_lock.
20901929 */
20911930 if (task_rq(p) == rq) {
2092
- if (is_migration_disabled(p))
2093
- goto out;
2094
-
2095
- if (pending) {
2096
- if (p->migration_pending == pending)
2097
- p->migration_pending = NULL;
2098
- complete = true;
2099
-
2100
- if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask))
2101
- goto out;
2102
- }
2103
-
21041931 if (task_on_rq_queued(p))
21051932 rq = __migrate_task(rq, &rf, p, arg->dest_cpu);
21061933 else
21071934 p->wake_cpu = arg->dest_cpu;
2108
-
2109
- /*
2110
- * XXX __migrate_task() can fail, at which point we might end
2111
- * up running on a dodgy CPU, AFAICT this can only happen
2112
- * during CPU hotplug, at which point we'll get pushed out
2113
- * anyway, so it's probably not a big deal.
2114
- */
2115
-
2116
- } else if (pending) {
2117
- /*
2118
- * This happens when we get migrated between migrate_enable()'s
2119
- * preempt_enable() and scheduling the stopper task. At that
2120
- * point we're a regular task again and not current anymore.
2121
- *
2122
- * A !PREEMPT kernel has a giant hole here, which makes it far
2123
- * more likely.
2124
- */
2125
-
2126
- /*
2127
- * The task moved before the stopper got to run. We're holding
2128
- * ->pi_lock, so the allowed mask is stable - if it got
2129
- * somewhere allowed, we're done.
2130
- */
2131
- if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
2132
- if (p->migration_pending == pending)
2133
- p->migration_pending = NULL;
2134
- complete = true;
2135
- goto out;
2136
- }
2137
-
2138
- /*
2139
- * When migrate_enable() hits a rq mis-match we can't reliably
2140
- * determine is_migration_disabled() and so have to chase after
2141
- * it.
2142
- */
2143
- WARN_ON_ONCE(!pending->stop_pending);
2144
- task_rq_unlock(rq, p, &rf);
2145
- stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop,
2146
- &pending->arg, &pending->stop_work);
2147
- return 0;
21481935 }
2149
-out:
2150
- if (pending)
2151
- pending->stop_pending = false;
2152
- task_rq_unlock(rq, p, &rf);
1936
+ rq_unlock(rq, &rf);
1937
+ raw_spin_unlock(&p->pi_lock);
21531938
2154
- if (complete)
2155
- complete_all(&pending->done);
2156
-
2157
- return 0;
2158
-}
2159
-
2160
-int push_cpu_stop(void *arg)
2161
-{
2162
- struct rq *lowest_rq = NULL, *rq = this_rq();
2163
- struct task_struct *p = arg;
2164
-
2165
- raw_spin_lock_irq(&p->pi_lock);
2166
- raw_spin_lock(&rq->lock);
2167
-
2168
- if (task_rq(p) != rq)
2169
- goto out_unlock;
2170
-
2171
- if (is_migration_disabled(p)) {
2172
- p->migration_flags |= MDF_PUSH;
2173
- goto out_unlock;
2174
- }
2175
-
2176
- p->migration_flags &= ~MDF_PUSH;
2177
-
2178
- if (p->sched_class->find_lock_rq)
2179
- lowest_rq = p->sched_class->find_lock_rq(p, rq);
2180
-
2181
- if (!lowest_rq)
2182
- goto out_unlock;
2183
-
2184
- // XXX validate p is still the highest prio task
2185
- if (task_rq(p) == rq) {
2186
- deactivate_task(rq, p, 0);
2187
- set_task_cpu(p, lowest_rq->cpu);
2188
- activate_task(lowest_rq, p, 0);
2189
- resched_curr(lowest_rq);
2190
- }
2191
-
2192
- double_unlock_balance(rq, lowest_rq);
2193
-
2194
-out_unlock:
2195
- rq->push_busy = false;
2196
- raw_spin_unlock(&rq->lock);
2197
- raw_spin_unlock_irq(&p->pi_lock);
2198
-
2199
- put_task_struct(p);
1939
+ local_irq_enable();
22001940 return 0;
22011941 }
22021942
....@@ -2204,40 +1944,19 @@
22041944 * sched_class::set_cpus_allowed must do the below, but is not required to
22051945 * actually call this function.
22061946 */
2207
-void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags)
1947
+void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
22081948 {
2209
- if (flags & (SCA_MIGRATE_ENABLE | SCA_MIGRATE_DISABLE)) {
2210
- p->cpus_ptr = new_mask;
2211
- return;
2212
- }
2213
-
22141949 cpumask_copy(&p->cpus_mask, new_mask);
22151950 p->nr_cpus_allowed = cpumask_weight(new_mask);
22161951 trace_android_rvh_set_cpus_allowed_comm(p, new_mask);
22171952 }
22181953
2219
-static void
2220
-__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags)
1954
+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
22211955 {
22221956 struct rq *rq = task_rq(p);
22231957 bool queued, running;
22241958
2225
- /*
2226
- * This here violates the locking rules for affinity, since we're only
2227
- * supposed to change these variables while holding both rq->lock and
2228
- * p->pi_lock.
2229
- *
2230
- * HOWEVER, it magically works, because ttwu() is the only code that
2231
- * accesses these variables under p->pi_lock and only does so after
2232
- * smp_cond_load_acquire(&p->on_cpu, !VAL), and we're in __schedule()
2233
- * before finish_task().
2234
- *
2235
- * XXX do further audits, this smells like something putrid.
2236
- */
2237
- if (flags & SCA_MIGRATE_DISABLE)
2238
- SCHED_WARN_ON(!p->on_cpu);
2239
- else
2240
- lockdep_assert_held(&p->pi_lock);
1959
+ lockdep_assert_held(&p->pi_lock);
22411960
22421961 queued = task_on_rq_queued(p);
22431962 running = task_current(rq, p);
....@@ -2253,7 +1972,7 @@
22531972 if (running)
22541973 put_prev_task(rq, p);
22551974
2256
- p->sched_class->set_cpus_allowed(p, new_mask, flags);
1975
+ p->sched_class->set_cpus_allowed(p, new_mask);
22571976
22581977 if (queued)
22591978 enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
....@@ -2261,14 +1980,12 @@
22611980 set_next_task(rq, p);
22621981 }
22631982
2264
-static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flags *rf,
2265
- int dest_cpu, unsigned int flags);
22661983 /*
22671984 * Called with both p->pi_lock and rq->lock held; drops both before returning.
22681985 */
22691986 static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
22701987 const struct cpumask *new_mask,
2271
- u32 flags,
1988
+ bool check,
22721989 struct rq *rq,
22731990 struct rq_flags *rf)
22741991 {
....@@ -2279,14 +1996,9 @@
22791996
22801997 update_rq_clock(rq);
22811998
2282
- if (p->flags & PF_KTHREAD || is_migration_disabled(p)) {
1999
+ if (p->flags & PF_KTHREAD) {
22832000 /*
2284
- * Kernel threads are allowed on online && !active CPUs.
2285
- *
2286
- * Specifically, migration_disabled() tasks must not fail the
2287
- * cpumask_any_and_distribute() pick below, esp. so on
2288
- * SCA_MIGRATE_ENABLE, otherwise we'll not call
2289
- * set_cpus_allowed_common() and actually reset p->cpus_ptr.
2001
+ * Kernel threads are allowed on online && !active CPUs
22902002 */
22912003 cpu_valid_mask = cpu_online_mask;
22922004 } else if (!cpumask_subset(new_mask, cpu_allowed_mask)) {
....@@ -2298,22 +2010,13 @@
22982010 * Must re-check here, to close a race against __kthread_bind(),
22992011 * sched_setaffinity() is not guaranteed to observe the flag.
23002012 */
2301
- if ((flags & SCA_CHECK) && (p->flags & PF_NO_SETAFFINITY)) {
2013
+ if (check && (p->flags & PF_NO_SETAFFINITY)) {
23022014 ret = -EINVAL;
23032015 goto out;
23042016 }
23052017
2306
- if (!(flags & SCA_MIGRATE_ENABLE)) {
2307
- if (cpumask_equal(&p->cpus_mask, new_mask))
2308
- goto out;
2309
-
2310
- if (WARN_ON_ONCE(p == current &&
2311
- is_migration_disabled(p) &&
2312
- !cpumask_test_cpu(task_cpu(p), new_mask))) {
2313
- ret = -EBUSY;
2314
- goto out;
2315
- }
2316
- }
2018
+ if (cpumask_equal(&p->cpus_mask, new_mask))
2019
+ goto out;
23172020
23182021 /*
23192022 * Picking a ~random cpu helps in cases where we are changing affinity
....@@ -2326,7 +2029,7 @@
23262029 goto out;
23272030 }
23282031
2329
- __do_set_cpus_allowed(p, new_mask, flags);
2032
+ do_set_cpus_allowed(p, new_mask);
23302033
23312034 if (p->flags & PF_KTHREAD) {
23322035 /*
....@@ -2338,227 +2041,27 @@
23382041 p->nr_cpus_allowed != 1);
23392042 }
23402043
2341
- return affine_move_task(rq, p, rf, dest_cpu, flags);
2044
+ /* Can the task run on the task's current CPU? If so, we're done */
2045
+ if (cpumask_test_cpu(task_cpu(p), new_mask))
2046
+ goto out;
2047
+
2048
+ if (task_running(rq, p) || p->state == TASK_WAKING) {
2049
+ struct migration_arg arg = { p, dest_cpu };
2050
+ /* Need help from migration thread: drop lock and wait. */
2051
+ task_rq_unlock(rq, p, rf);
2052
+ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
2053
+ return 0;
2054
+ } else if (task_on_rq_queued(p)) {
2055
+ /*
2056
+ * OK, since we're going to drop the lock immediately
2057
+ * afterwards anyway.
2058
+ */
2059
+ rq = move_queued_task(rq, rf, p, dest_cpu);
2060
+ }
23422061 out:
23432062 task_rq_unlock(rq, p, rf);
23442063
23452064 return ret;
2346
-}
2347
-
2348
-void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
2349
-{
2350
- __do_set_cpus_allowed(p, new_mask, 0);
2351
-}
2352
-
2353
-/*
2354
- * This function is wildly self concurrent; here be dragons.
2355
- *
2356
- *
2357
- * When given a valid mask, __set_cpus_allowed_ptr() must block until the
2358
- * designated task is enqueued on an allowed CPU. If that task is currently
2359
- * running, we have to kick it out using the CPU stopper.
2360
- *
2361
- * Migrate-Disable comes along and tramples all over our nice sandcastle.
2362
- * Consider:
2363
- *
2364
- * Initial conditions: P0->cpus_mask = [0, 1]
2365
- *
2366
- * P0@CPU0 P1
2367
- *
2368
- * migrate_disable();
2369
- * <preempted>
2370
- * set_cpus_allowed_ptr(P0, [1]);
2371
- *
2372
- * P1 *cannot* return from this set_cpus_allowed_ptr() call until P0 executes
2373
- * its outermost migrate_enable() (i.e. it exits its Migrate-Disable region).
2374
- * This means we need the following scheme:
2375
- *
2376
- * P0@CPU0 P1
2377
- *
2378
- * migrate_disable();
2379
- * <preempted>
2380
- * set_cpus_allowed_ptr(P0, [1]);
2381
- * <blocks>
2382
- * <resumes>
2383
- * migrate_enable();
2384
- * __set_cpus_allowed_ptr();
2385
- * <wakes local stopper>
2386
- * `--> <woken on migration completion>
2387
- *
2388
- * Now the fun stuff: there may be several P1-like tasks, i.e. multiple
2389
- * concurrent set_cpus_allowed_ptr(P0, [*]) calls. CPU affinity changes of any
2390
- * task p are serialized by p->pi_lock, which we can leverage: the one that
2391
- * should come into effect at the end of the Migrate-Disable region is the last
2392
- * one. This means we only need to track a single cpumask (i.e. p->cpus_mask),
2393
- * but we still need to properly signal those waiting tasks at the appropriate
2394
- * moment.
2395
- *
2396
- * This is implemented using struct set_affinity_pending. The first
2397
- * __set_cpus_allowed_ptr() caller within a given Migrate-Disable region will
2398
- * setup an instance of that struct and install it on the targeted task_struct.
2399
- * Any and all further callers will reuse that instance. Those then wait for
2400
- * a completion signaled at the tail of the CPU stopper callback (1), triggered
2401
- * on the end of the Migrate-Disable region (i.e. outermost migrate_enable()).
2402
- *
2403
- *
2404
- * (1) In the cases covered above. There is one more where the completion is
2405
- * signaled within affine_move_task() itself: when a subsequent affinity request
2406
- * cancels the need for an active migration. Consider:
2407
- *
2408
- * Initial conditions: P0->cpus_mask = [0, 1]
2409
- *
2410
- * P0@CPU0 P1 P2
2411
- *
2412
- * migrate_disable();
2413
- * <preempted>
2414
- * set_cpus_allowed_ptr(P0, [1]);
2415
- * <blocks>
2416
- * set_cpus_allowed_ptr(P0, [0, 1]);
2417
- * <signal completion>
2418
- * <awakes>
2419
- *
2420
- * Note that the above is safe vs a concurrent migrate_enable(), as any
2421
- * pending affinity completion is preceded an uninstallion of
2422
- * p->migration_pending done with p->pi_lock held.
2423
- */
2424
-static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flags *rf,
2425
- int dest_cpu, unsigned int flags)
2426
-{
2427
- struct set_affinity_pending my_pending = { }, *pending = NULL;
2428
- bool stop_pending, complete = false;
2429
-
2430
- /* Can the task run on the task's current CPU? If so, we're done */
2431
- if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
2432
- struct task_struct *push_task = NULL;
2433
-
2434
- if ((flags & SCA_MIGRATE_ENABLE) &&
2435
- (p->migration_flags & MDF_PUSH) && !rq->push_busy) {
2436
- rq->push_busy = true;
2437
- push_task = get_task_struct(p);
2438
- }
2439
-
2440
- /*
2441
- * If there are pending waiters, but no pending stop_work,
2442
- * then complete now.
2443
- */
2444
- pending = p->migration_pending;
2445
- if (pending && !pending->stop_pending) {
2446
- p->migration_pending = NULL;
2447
- complete = true;
2448
- }
2449
-
2450
- task_rq_unlock(rq, p, rf);
2451
-
2452
- if (push_task) {
2453
- stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
2454
- p, &rq->push_work);
2455
- }
2456
-
2457
- if (complete)
2458
- complete_all(&pending->done);
2459
-
2460
- return 0;
2461
- }
2462
-
2463
- if (!(flags & SCA_MIGRATE_ENABLE)) {
2464
- /* serialized by p->pi_lock */
2465
- if (!p->migration_pending) {
2466
- /* Install the request */
2467
- refcount_set(&my_pending.refs, 1);
2468
- init_completion(&my_pending.done);
2469
- my_pending.arg = (struct migration_arg) {
2470
- .task = p,
2471
- .dest_cpu = dest_cpu,
2472
- .pending = &my_pending,
2473
- };
2474
-
2475
- p->migration_pending = &my_pending;
2476
- } else {
2477
- pending = p->migration_pending;
2478
- refcount_inc(&pending->refs);
2479
- /*
2480
- * Affinity has changed, but we've already installed a
2481
- * pending. migration_cpu_stop() *must* see this, else
2482
- * we risk a completion of the pending despite having a
2483
- * task on a disallowed CPU.
2484
- *
2485
- * Serialized by p->pi_lock, so this is safe.
2486
- */
2487
- pending->arg.dest_cpu = dest_cpu;
2488
- }
2489
- }
2490
- pending = p->migration_pending;
2491
- /*
2492
- * - !MIGRATE_ENABLE:
2493
- * we'll have installed a pending if there wasn't one already.
2494
- *
2495
- * - MIGRATE_ENABLE:
2496
- * we're here because the current CPU isn't matching anymore,
2497
- * the only way that can happen is because of a concurrent
2498
- * set_cpus_allowed_ptr() call, which should then still be
2499
- * pending completion.
2500
- *
2501
- * Either way, we really should have a @pending here.
2502
- */
2503
- if (WARN_ON_ONCE(!pending)) {
2504
- task_rq_unlock(rq, p, rf);
2505
- return -EINVAL;
2506
- }
2507
-
2508
- if (task_running(rq, p) || p->state == TASK_WAKING) {
2509
- /*
2510
- * MIGRATE_ENABLE gets here because 'p == current', but for
2511
- * anything else we cannot do is_migration_disabled(), punt
2512
- * and have the stopper function handle it all race-free.
2513
- */
2514
- stop_pending = pending->stop_pending;
2515
- if (!stop_pending)
2516
- pending->stop_pending = true;
2517
-
2518
- if (flags & SCA_MIGRATE_ENABLE)
2519
- p->migration_flags &= ~MDF_PUSH;
2520
-
2521
- task_rq_unlock(rq, p, rf);
2522
-
2523
- if (!stop_pending) {
2524
- stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
2525
- &pending->arg, &pending->stop_work);
2526
- }
2527
-
2528
- if (flags & SCA_MIGRATE_ENABLE)
2529
- return 0;
2530
- } else {
2531
-
2532
- if (!is_migration_disabled(p)) {
2533
- if (task_on_rq_queued(p))
2534
- rq = move_queued_task(rq, rf, p, dest_cpu);
2535
-
2536
- if (!pending->stop_pending) {
2537
- p->migration_pending = NULL;
2538
- complete = true;
2539
- }
2540
- }
2541
- task_rq_unlock(rq, p, rf);
2542
-
2543
- if (complete)
2544
- complete_all(&pending->done);
2545
- }
2546
-
2547
- wait_for_completion(&pending->done);
2548
-
2549
- if (refcount_dec_and_test(&pending->refs))
2550
- wake_up_var(&pending->refs); /* No UaF, just an address */
2551
-
2552
- /*
2553
- * Block the original owner of &pending until all subsequent callers
2554
- * have seen the completion and decremented the refcount
2555
- */
2556
- wait_var_event(&my_pending.refs, !refcount_read(&my_pending.refs));
2557
-
2558
- /* ARGH */
2559
- WARN_ON_ONCE(my_pending.stop_pending);
2560
-
2561
- return 0;
25622065 }
25632066
25642067 /*
....@@ -2571,19 +2074,18 @@
25712074 * call is not atomic; no spinlocks may be held.
25722075 */
25732076 static int __set_cpus_allowed_ptr(struct task_struct *p,
2574
- const struct cpumask *new_mask,
2575
- u32 flags)
2077
+ const struct cpumask *new_mask, bool check)
25762078 {
25772079 struct rq_flags rf;
25782080 struct rq *rq;
25792081
25802082 rq = task_rq_lock(p, &rf);
2581
- return __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, &rf);
2083
+ return __set_cpus_allowed_ptr_locked(p, new_mask, check, rq, &rf);
25822084 }
25832085
25842086 int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
25852087 {
2586
- return __set_cpus_allowed_ptr(p, new_mask, 0);
2088
+ return __set_cpus_allowed_ptr(p, new_mask, false);
25872089 }
25882090 EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
25892091
....@@ -2692,8 +2194,6 @@
26922194 * Clearly, migrating tasks to offline CPUs is a fairly daft thing.
26932195 */
26942196 WARN_ON_ONCE(!cpu_online(new_cpu));
2695
-
2696
- WARN_ON_ONCE(is_migration_disabled(p));
26972197 #endif
26982198
26992199 trace_sched_migrate_task(p, new_cpu);
....@@ -2827,18 +2327,6 @@
28272327 }
28282328 EXPORT_SYMBOL_GPL(migrate_swap);
28292329
2830
-static bool check_task_state(struct task_struct *p, long match_state)
2831
-{
2832
- bool match = false;
2833
-
2834
- raw_spin_lock_irq(&p->pi_lock);
2835
- if (p->state == match_state || p->saved_state == match_state)
2836
- match = true;
2837
- raw_spin_unlock_irq(&p->pi_lock);
2838
-
2839
- return match;
2840
-}
2841
-
28422330 /*
28432331 * wait_task_inactive - wait for a thread to unschedule.
28442332 *
....@@ -2883,7 +2371,7 @@
28832371 * is actually now running somewhere else!
28842372 */
28852373 while (task_running(rq, p)) {
2886
- if (match_state && !check_task_state(p, match_state))
2374
+ if (match_state && unlikely(p->state != match_state))
28872375 return 0;
28882376 cpu_relax();
28892377 }
....@@ -2898,8 +2386,7 @@
28982386 running = task_running(rq, p);
28992387 queued = task_on_rq_queued(p);
29002388 ncsw = 0;
2901
- if (!match_state || p->state == match_state ||
2902
- p->saved_state == match_state)
2389
+ if (!match_state || p->state == match_state)
29032390 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
29042391 task_rq_unlock(rq, p, &rf);
29052392
....@@ -2933,7 +2420,7 @@
29332420 ktime_t to = NSEC_PER_SEC / HZ;
29342421
29352422 set_current_state(TASK_UNINTERRUPTIBLE);
2936
- schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
2423
+ schedule_hrtimeout(&to, HRTIMER_MODE_REL);
29372424 continue;
29382425 }
29392426
....@@ -3040,12 +2527,6 @@
30402527 }
30412528 fallthrough;
30422529 case possible:
3043
- /*
3044
- * XXX When called from select_task_rq() we only
3045
- * hold p->pi_lock and again violate locking order.
3046
- *
3047
- * More yuck to audit.
3048
- */
30492530 do_set_cpus_allowed(p, task_cpu_possible_mask(p));
30502531 state = fail;
30512532 break;
....@@ -3079,7 +2560,7 @@
30792560 {
30802561 lockdep_assert_held(&p->pi_lock);
30812562
3082
- if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p))
2563
+ if (p->nr_cpus_allowed > 1)
30832564 cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
30842565 else
30852566 cpu = cpumask_any(p->cpus_ptr);
....@@ -3102,7 +2583,6 @@
31022583
31032584 void sched_set_stop_task(int cpu, struct task_struct *stop)
31042585 {
3105
- static struct lock_class_key stop_pi_lock;
31062586 struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
31072587 struct task_struct *old_stop = cpu_rq(cpu)->stop;
31082588
....@@ -3118,20 +2598,6 @@
31182598 sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);
31192599
31202600 stop->sched_class = &stop_sched_class;
3121
-
3122
- /*
3123
- * The PI code calls rt_mutex_setprio() with ->pi_lock held to
3124
- * adjust the effective priority of a task. As a result,
3125
- * rt_mutex_setprio() can trigger (RT) balancing operations,
3126
- * which can then trigger wakeups of the stop thread to push
3127
- * around the current task.
3128
- *
3129
- * The stop task itself will never be part of the PI-chain, it
3130
- * never blocks, therefore that ->pi_lock recursion is safe.
3131
- * Tell lockdep about this by placing the stop->pi_lock in its
3132
- * own class.
3133
- */
3134
- lockdep_set_class(&stop->pi_lock, &stop_pi_lock);
31352601 }
31362602
31372603 cpu_rq(cpu)->stop = stop;
....@@ -3145,23 +2611,15 @@
31452611 }
31462612 }
31472613
3148
-#else /* CONFIG_SMP */
2614
+#else
31492615
31502616 static inline int __set_cpus_allowed_ptr(struct task_struct *p,
3151
- const struct cpumask *new_mask,
3152
- u32 flags)
2617
+ const struct cpumask *new_mask, bool check)
31532618 {
31542619 return set_cpus_allowed_ptr(p, new_mask);
31552620 }
31562621
3157
-static inline void migrate_disable_switch(struct rq *rq, struct task_struct *p) { }
3158
-
3159
-static inline bool rq_has_pinned_tasks(struct rq *rq)
3160
-{
3161
- return false;
3162
-}
3163
-
3164
-#endif /* !CONFIG_SMP */
2622
+#endif /* CONFIG_SMP */
31652623
31662624 static void
31672625 ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
....@@ -3595,7 +3053,7 @@
35953053 int cpu, success = 0;
35963054
35973055 preempt_disable();
3598
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) && p == current) {
3056
+ if (p == current) {
35993057 /*
36003058 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
36013059 * == smp_processor_id()'. Together this means we can special
....@@ -3625,26 +3083,8 @@
36253083 */
36263084 raw_spin_lock_irqsave(&p->pi_lock, flags);
36273085 smp_mb__after_spinlock();
3628
- if (!(p->state & state)) {
3629
- /*
3630
- * The task might be running due to a spinlock sleeper
3631
- * wakeup. Check the saved state and set it to running
3632
- * if the wakeup condition is true.
3633
- */
3634
- if (!(wake_flags & WF_LOCK_SLEEPER)) {
3635
- if (p->saved_state & state) {
3636
- p->saved_state = TASK_RUNNING;
3637
- success = 1;
3638
- }
3639
- }
3086
+ if (!(p->state & state))
36403087 goto unlock;
3641
- }
3642
- /*
3643
- * If this is a regular wakeup, then we can unconditionally
3644
- * clear the saved state of a "lock sleeper".
3645
- */
3646
- if (!(wake_flags & WF_LOCK_SLEEPER))
3647
- p->saved_state = TASK_RUNNING;
36483088
36493089 #ifdef CONFIG_FREEZER
36503090 /*
....@@ -3853,18 +3293,6 @@
38533293 }
38543294 EXPORT_SYMBOL(wake_up_process);
38553295
3856
-/**
3857
- * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
3858
- * @p: The process to be woken up.
3859
- *
3860
- * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
3861
- * the nature of the wakeup.
3862
- */
3863
-int wake_up_lock_sleeper(struct task_struct *p)
3864
-{
3865
- return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER);
3866
-}
3867
-
38683296 int wake_up_state(struct task_struct *p, unsigned int state)
38693297 {
38703298 return try_to_wake_up(p, state, 0);
....@@ -3920,7 +3348,6 @@
39203348 init_numa_balancing(clone_flags, p);
39213349 #ifdef CONFIG_SMP
39223350 p->wake_entry.u_flags = CSD_TYPE_TTWU;
3923
- p->migration_pending = NULL;
39243351 #endif
39253352 }
39263353
....@@ -4099,9 +3526,6 @@
40993526 p->on_cpu = 0;
41003527 #endif
41013528 init_task_preempt_count(p);
4102
-#ifdef CONFIG_HAVE_PREEMPT_LAZY
4103
- task_thread_info(p)->preempt_lazy_count = 0;
4104
-#endif
41053529 #ifdef CONFIG_SMP
41063530 plist_node_init(&p->pushable_tasks, MAX_PRIO);
41073531 RB_CLEAR_NODE(&p->pushable_dl_tasks);
....@@ -4329,90 +3753,6 @@
43293753 #endif
43303754 }
43313755
4332
-#ifdef CONFIG_SMP
4333
-
4334
-static void do_balance_callbacks(struct rq *rq, struct callback_head *head)
4335
-{
4336
- void (*func)(struct rq *rq);
4337
- struct callback_head *next;
4338
-
4339
- lockdep_assert_held(&rq->lock);
4340
-
4341
- while (head) {
4342
- func = (void (*)(struct rq *))head->func;
4343
- next = head->next;
4344
- head->next = NULL;
4345
- head = next;
4346
-
4347
- func(rq);
4348
- }
4349
-}
4350
-
4351
-static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
4352
-{
4353
- struct callback_head *head = rq->balance_callback;
4354
-
4355
- lockdep_assert_held(&rq->lock);
4356
- if (head) {
4357
- rq->balance_callback = NULL;
4358
- rq->balance_flags &= ~BALANCE_WORK;
4359
- }
4360
-
4361
- return head;
4362
-}
4363
-
4364
-static void __balance_callbacks(struct rq *rq)
4365
-{
4366
- do_balance_callbacks(rq, splice_balance_callbacks(rq));
4367
-}
4368
-
4369
-static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
4370
-{
4371
- unsigned long flags;
4372
-
4373
- if (unlikely(head)) {
4374
- raw_spin_lock_irqsave(&rq->lock, flags);
4375
- do_balance_callbacks(rq, head);
4376
- raw_spin_unlock_irqrestore(&rq->lock, flags);
4377
- }
4378
-}
4379
-
4380
-static void balance_push(struct rq *rq);
4381
-
4382
-static inline void balance_switch(struct rq *rq)
4383
-{
4384
- if (likely(!rq->balance_flags))
4385
- return;
4386
-
4387
- if (rq->balance_flags & BALANCE_PUSH) {
4388
- balance_push(rq);
4389
- return;
4390
- }
4391
-
4392
- __balance_callbacks(rq);
4393
-}
4394
-
4395
-#else
4396
-
4397
-static inline void __balance_callbacks(struct rq *rq)
4398
-{
4399
-}
4400
-
4401
-static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
4402
-{
4403
- return NULL;
4404
-}
4405
-
4406
-static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
4407
-{
4408
-}
4409
-
4410
-static inline void balance_switch(struct rq *rq)
4411
-{
4412
-}
4413
-
4414
-#endif
4415
-
44163756 static inline void
44173757 prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
44183758 {
....@@ -4438,7 +3778,6 @@
44383778 * prev into current:
44393779 */
44403780 spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
4441
- balance_switch(rq);
44423781 raw_spin_unlock_irq(&rq->lock);
44433782 }
44443783
....@@ -4453,22 +3792,6 @@
44533792 #ifndef finish_arch_post_lock_switch
44543793 # define finish_arch_post_lock_switch() do { } while (0)
44553794 #endif
4456
-
4457
-static inline void kmap_local_sched_out(void)
4458
-{
4459
-#ifdef CONFIG_KMAP_LOCAL
4460
- if (unlikely(current->kmap_ctrl.idx))
4461
- __kmap_local_sched_out();
4462
-#endif
4463
-}
4464
-
4465
-static inline void kmap_local_sched_in(void)
4466
-{
4467
-#ifdef CONFIG_KMAP_LOCAL
4468
- if (unlikely(current->kmap_ctrl.idx))
4469
- __kmap_local_sched_in();
4470
-#endif
4471
-}
44723795
44733796 /**
44743797 * prepare_task_switch - prepare to switch tasks
....@@ -4492,7 +3815,6 @@
44923815 perf_event_task_sched_out(prev, next);
44933816 rseq_preempt(prev);
44943817 fire_sched_out_preempt_notifiers(prev, next);
4495
- kmap_local_sched_out();
44963818 prepare_task(next);
44973819 prepare_arch_switch(next);
44983820 }
....@@ -4559,7 +3881,6 @@
45593881 finish_lock_switch(rq);
45603882 finish_arch_post_lock_switch();
45613883 kcov_finish_switch(current);
4562
- kmap_local_sched_in();
45633884
45643885 fire_sched_in_preempt_notifiers(current);
45653886 /*
....@@ -4574,17 +3895,23 @@
45743895 * provided by mmdrop(),
45753896 * - a sync_core for SYNC_CORE.
45763897 */
4577
- /*
4578
- * We use mmdrop_delayed() here so we don't have to do the
4579
- * full __mmdrop() when we are the last user.
4580
- */
45813898 if (mm) {
45823899 membarrier_mm_sync_core_before_usermode(mm);
4583
- mmdrop_delayed(mm);
3900
+ mmdrop(mm);
45843901 }
45853902 if (unlikely(prev_state == TASK_DEAD)) {
45863903 if (prev->sched_class->task_dead)
45873904 prev->sched_class->task_dead(prev);
3905
+
3906
+ /*
3907
+ * Remove function-return probe instances associated with this
3908
+ * task and put them back on the free list.
3909
+ */
3910
+ kprobe_flush_task(prev);
3911
+ trace_android_rvh_flush_task(prev);
3912
+
3913
+ /* Task is done with its stack. */
3914
+ put_task_stack(prev);
45883915
45893916 put_task_struct_rcu_user(prev);
45903917 }
....@@ -4592,6 +3919,43 @@
45923919 tick_nohz_task_switch();
45933920 return rq;
45943921 }
3922
+
3923
+#ifdef CONFIG_SMP
3924
+
3925
+/* rq->lock is NOT held, but preemption is disabled */
3926
+static void __balance_callback(struct rq *rq)
3927
+{
3928
+ struct callback_head *head, *next;
3929
+ void (*func)(struct rq *rq);
3930
+ unsigned long flags;
3931
+
3932
+ raw_spin_lock_irqsave(&rq->lock, flags);
3933
+ head = rq->balance_callback;
3934
+ rq->balance_callback = NULL;
3935
+ while (head) {
3936
+ func = (void (*)(struct rq *))head->func;
3937
+ next = head->next;
3938
+ head->next = NULL;
3939
+ head = next;
3940
+
3941
+ func(rq);
3942
+ }
3943
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
3944
+}
3945
+
3946
+static inline void balance_callback(struct rq *rq)
3947
+{
3948
+ if (unlikely(rq->balance_callback))
3949
+ __balance_callback(rq);
3950
+}
3951
+
3952
+#else
3953
+
3954
+static inline void balance_callback(struct rq *rq)
3955
+{
3956
+}
3957
+
3958
+#endif
45953959
45963960 /**
45973961 * schedule_tail - first thing a freshly forked thread must call.
....@@ -4612,6 +3976,7 @@
46123976 */
46133977
46143978 rq = finish_task_switch(prev);
3979
+ balance_callback(rq);
46153980 preempt_enable();
46163981
46173982 if (current->set_child_tid)
....@@ -5317,7 +4682,7 @@
53174682 *
53184683 * WARNING: must be called with preemption disabled!
53194684 */
5320
-static void __sched notrace __schedule(bool preempt, bool spinning_lock)
4685
+static void __sched notrace __schedule(bool preempt)
53214686 {
53224687 struct task_struct *prev, *next;
53234688 unsigned long *switch_count;
....@@ -5370,7 +4735,7 @@
53704735 * - ptrace_{,un}freeze_traced() can change ->state underneath us.
53714736 */
53724737 prev_state = prev->state;
5373
- if ((!preempt || spinning_lock) && prev_state) {
4738
+ if (!preempt && prev_state) {
53744739 if (signal_pending_state(prev_state, prev)) {
53754740 prev->state = TASK_RUNNING;
53764741 } else {
....@@ -5405,7 +4770,6 @@
54054770
54064771 next = pick_next_task(rq, prev, &rf);
54074772 clear_tsk_need_resched(prev);
5408
- clear_tsk_need_resched_lazy(prev);
54094773 clear_preempt_need_resched();
54104774
54114775 trace_android_rvh_schedule(prev, next, rq);
....@@ -5432,7 +4796,6 @@
54324796 */
54334797 ++*switch_count;
54344798
5435
- migrate_disable_switch(rq, prev);
54364799 psi_sched_switch(prev, next, !task_on_rq_queued(prev));
54374800
54384801 trace_sched_switch(preempt, prev, next);
....@@ -5441,11 +4804,10 @@
54414804 rq = context_switch(rq, prev, next, &rf);
54424805 } else {
54434806 rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
5444
-
5445
- rq_unpin_lock(rq, &rf);
5446
- __balance_callbacks(rq);
5447
- raw_spin_unlock_irq(&rq->lock);
4807
+ rq_unlock_irq(rq, &rf);
54484808 }
4809
+
4810
+ balance_callback(rq);
54494811 }
54504812
54514813 void __noreturn do_task_dead(void)
....@@ -5456,7 +4818,7 @@
54564818 /* Tell freezer to ignore us: */
54574819 current->flags |= PF_NOFREEZE;
54584820
5459
- __schedule(false, false);
4821
+ __schedule(false);
54604822 BUG();
54614823
54624824 /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
....@@ -5489,6 +4851,9 @@
54894851 preempt_enable_no_resched();
54904852 }
54914853
4854
+ if (tsk_is_pi_blocked(tsk))
4855
+ return;
4856
+
54924857 /*
54934858 * If we are going to sleep and we have plugged IO queued,
54944859 * make sure to submit it to avoid deadlocks.
....@@ -5514,7 +4879,7 @@
55144879 sched_submit_work(tsk);
55154880 do {
55164881 preempt_disable();
5517
- __schedule(false, false);
4882
+ __schedule(false);
55184883 sched_preempt_enable_no_resched();
55194884 } while (need_resched());
55204885 sched_update_worker(tsk);
....@@ -5542,7 +4907,7 @@
55424907 */
55434908 WARN_ON_ONCE(current->state);
55444909 do {
5545
- __schedule(false, false);
4910
+ __schedule(false);
55464911 } while (need_resched());
55474912 }
55484913
....@@ -5595,7 +4960,7 @@
55954960 */
55964961 preempt_disable_notrace();
55974962 preempt_latency_start(1);
5598
- __schedule(true, false);
4963
+ __schedule(true);
55994964 preempt_latency_stop(1);
56004965 preempt_enable_no_resched_notrace();
56014966
....@@ -5605,30 +4970,6 @@
56054970 */
56064971 } while (need_resched());
56074972 }
5608
-
5609
-#ifdef CONFIG_PREEMPT_LAZY
5610
-/*
5611
- * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
5612
- * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
5613
- * preempt_lazy_count counter >0.
5614
- */
5615
-static __always_inline int preemptible_lazy(void)
5616
-{
5617
- if (test_thread_flag(TIF_NEED_RESCHED))
5618
- return 1;
5619
- if (current_thread_info()->preempt_lazy_count)
5620
- return 0;
5621
- return 1;
5622
-}
5623
-
5624
-#else
5625
-
5626
-static inline int preemptible_lazy(void)
5627
-{
5628
- return 1;
5629
-}
5630
-
5631
-#endif
56324973
56334974 #ifdef CONFIG_PREEMPTION
56344975 /*
....@@ -5643,25 +4984,11 @@
56434984 */
56444985 if (likely(!preemptible()))
56454986 return;
5646
- if (!preemptible_lazy())
5647
- return;
4987
+
56484988 preempt_schedule_common();
56494989 }
56504990 NOKPROBE_SYMBOL(preempt_schedule);
56514991 EXPORT_SYMBOL(preempt_schedule);
5652
-
5653
-#ifdef CONFIG_PREEMPT_RT
5654
-void __sched notrace preempt_schedule_lock(void)
5655
-{
5656
- do {
5657
- preempt_disable();
5658
- __schedule(true, true);
5659
- sched_preempt_enable_no_resched();
5660
- } while (need_resched());
5661
-}
5662
-NOKPROBE_SYMBOL(preempt_schedule_lock);
5663
-EXPORT_SYMBOL(preempt_schedule_lock);
5664
-#endif
56654992
56664993 /**
56674994 * preempt_schedule_notrace - preempt_schedule called by tracing
....@@ -5682,9 +5009,6 @@
56825009 enum ctx_state prev_ctx;
56835010
56845011 if (likely(!preemptible()))
5685
- return;
5686
-
5687
- if (!preemptible_lazy())
56885012 return;
56895013
56905014 do {
....@@ -5709,7 +5033,7 @@
57095033 * an infinite recursion.
57105034 */
57115035 prev_ctx = exception_enter();
5712
- __schedule(true, false);
5036
+ __schedule(true);
57135037 exception_exit(prev_ctx);
57145038
57155039 preempt_latency_stop(1);
....@@ -5738,7 +5062,7 @@
57385062 do {
57395063 preempt_disable();
57405064 local_irq_enable();
5741
- __schedule(true, false);
5065
+ __schedule(true);
57425066 local_irq_disable();
57435067 sched_preempt_enable_no_resched();
57445068 } while (need_resched());
....@@ -5905,11 +5229,9 @@
59055229 out_unlock:
59065230 /* Avoid rq from going away on us: */
59075231 preempt_disable();
5232
+ __task_rq_unlock(rq, &rf);
59085233
5909
- rq_unpin_lock(rq, &rf);
5910
- __balance_callbacks(rq);
5911
- raw_spin_unlock(&rq->lock);
5912
-
5234
+ balance_callback(rq);
59135235 preempt_enable();
59145236 }
59155237 #else
....@@ -6154,7 +5476,6 @@
61545476 int oldpolicy = -1, policy = attr->sched_policy;
61555477 int retval, oldprio, newprio, queued, running;
61565478 const struct sched_class *prev_class;
6157
- struct callback_head *head;
61585479 struct rq_flags rf;
61595480 int reset_on_fork;
61605481 int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
....@@ -6397,14 +5718,13 @@
63975718
63985719 /* Avoid rq from going away on us: */
63995720 preempt_disable();
6400
- head = splice_balance_callbacks(rq);
64015721 task_rq_unlock(rq, p, &rf);
64025722
64035723 if (pi)
64045724 rt_mutex_adjust_pi(p);
64055725
64065726 /* Run balance callbacks after we've adjusted the PI chain: */
6407
- balance_callbacks(rq, head);
5727
+ balance_callback(rq);
64085728 preempt_enable();
64095729
64105730 return 0;
....@@ -6916,7 +6236,7 @@
69166236 }
69176237 #endif
69186238 again:
6919
- retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK);
6239
+ retval = __set_cpus_allowed_ptr(p, new_mask, true);
69206240
69216241 if (!retval) {
69226242 cpuset_cpus_allowed(p, cpus_allowed);
....@@ -7498,7 +6818,7 @@
74986818 *
74996819 * And since this is boot we can forgo the serialization.
75006820 */
7501
- set_cpus_allowed_common(idle, cpumask_of(cpu), 0);
6821
+ set_cpus_allowed_common(idle, cpumask_of(cpu));
75026822 #endif
75036823 /*
75046824 * We're having a chicken and egg problem, even though we are
....@@ -7525,9 +6845,7 @@
75256845
75266846 /* Set the preempt count _outside_ the spinlocks! */
75276847 init_idle_preempt_count(idle, cpu);
7528
-#ifdef CONFIG_HAVE_PREEMPT_LAZY
7529
- task_thread_info(idle)->preempt_lazy_count = 0;
7530
-#endif
6848
+
75316849 /*
75326850 * The idle tasks have their own, simple scheduling class:
75336851 */
....@@ -7637,7 +6955,6 @@
76376955 #endif /* CONFIG_NUMA_BALANCING */
76386956
76396957 #ifdef CONFIG_HOTPLUG_CPU
7640
-
76416958 /*
76426959 * Ensure that the idle task is using init_mm right before its CPU goes
76436960 * offline.
....@@ -7657,124 +6974,166 @@
76576974 /* finish_cpu(), as ran on the BP, will clean up the active_mm state */
76586975 }
76596976
7660
-static int __balance_push_cpu_stop(void *arg)
6977
+/*
6978
+ * Since this CPU is going 'away' for a while, fold any nr_active delta
6979
+ * we might have. Assumes we're called after migrate_tasks() so that the
6980
+ * nr_active count is stable. We need to take the teardown thread which
6981
+ * is calling this into account, so we hand in adjust = 1 to the load
6982
+ * calculation.
6983
+ *
6984
+ * Also see the comment "Global load-average calculations".
6985
+ */
6986
+static void calc_load_migrate(struct rq *rq)
76616987 {
7662
- struct task_struct *p = arg;
7663
- struct rq *rq = this_rq();
7664
- struct rq_flags rf;
7665
- int cpu;
6988
+ long delta = calc_load_fold_active(rq, 1);
6989
+ if (delta)
6990
+ atomic_long_add(delta, &calc_load_tasks);
6991
+}
76666992
7667
- raw_spin_lock_irq(&p->pi_lock);
7668
- rq_lock(rq, &rf);
6993
+static struct task_struct *__pick_migrate_task(struct rq *rq)
6994
+{
6995
+ const struct sched_class *class;
6996
+ struct task_struct *next;
76696997
6998
+ for_each_class(class) {
6999
+ next = class->pick_next_task(rq);
7000
+ if (next) {
7001
+ next->sched_class->put_prev_task(rq, next);
7002
+ return next;
7003
+ }
7004
+ }
7005
+
7006
+ /* The idle class should always have a runnable task */
7007
+ BUG();
7008
+}
7009
+
7010
+/*
7011
+ * Migrate all tasks from the rq, sleeping tasks will be migrated by
7012
+ * try_to_wake_up()->select_task_rq().
7013
+ *
7014
+ * Called with rq->lock held even though we'er in stop_machine() and
7015
+ * there's no concurrency possible, we hold the required locks anyway
7016
+ * because of lock validation efforts.
7017
+ *
7018
+ * force: if false, the function will skip CPU pinned kthreads.
7019
+ */
7020
+static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf, bool force)
7021
+{
7022
+ struct rq *rq = dead_rq;
7023
+ struct task_struct *next, *tmp, *stop = rq->stop;
7024
+ LIST_HEAD(percpu_kthreads);
7025
+ struct rq_flags orf = *rf;
7026
+ int dest_cpu;
7027
+
7028
+ /*
7029
+ * Fudge the rq selection such that the below task selection loop
7030
+ * doesn't get stuck on the currently eligible stop task.
7031
+ *
7032
+ * We're currently inside stop_machine() and the rq is either stuck
7033
+ * in the stop_machine_cpu_stop() loop, or we're executing this code,
7034
+ * either way we should never end up calling schedule() until we're
7035
+ * done here.
7036
+ */
7037
+ rq->stop = NULL;
7038
+
7039
+ /*
7040
+ * put_prev_task() and pick_next_task() sched
7041
+ * class method both need to have an up-to-date
7042
+ * value of rq->clock[_task]
7043
+ */
76707044 update_rq_clock(rq);
76717045
7672
- if (task_rq(p) == rq && task_on_rq_queued(p)) {
7673
- cpu = select_fallback_rq(rq->cpu, p);
7674
- rq = __migrate_task(rq, &rf, p, cpu);
7675
- }
7046
+#ifdef CONFIG_SCHED_DEBUG
7047
+ /* note the clock update in orf */
7048
+ orf.clock_update_flags |= RQCF_UPDATED;
7049
+#endif
76767050
7677
- rq_unlock(rq, &rf);
7678
- raw_spin_unlock_irq(&p->pi_lock);
7679
-
7680
- put_task_struct(p);
7681
-
7682
- return 0;
7683
-}
7684
-
7685
-static DEFINE_PER_CPU(struct cpu_stop_work, push_work);
7686
-
7687
-/*
7688
- * Ensure we only run per-cpu kthreads once the CPU goes !active.
7689
- */
7690
-
7691
-
7692
-static void balance_push(struct rq *rq)
7693
-{
7694
- struct task_struct *push_task = rq->curr;
7695
-
7696
- lockdep_assert_held(&rq->lock);
7697
- SCHED_WARN_ON(rq->cpu != smp_processor_id());
7698
-
7699
- /*
7700
- * Both the cpu-hotplug and stop task are in this case and are
7701
- * required to complete the hotplug process.
7702
- */
7703
- if (is_per_cpu_kthread(push_task) || is_migration_disabled(push_task)) {
7051
+ for (;;) {
77047052 /*
7705
- * If this is the idle task on the outgoing CPU try to wake
7706
- * up the hotplug control thread which might wait for the
7707
- * last task to vanish. The rcuwait_active() check is
7708
- * accurate here because the waiter is pinned on this CPU
7709
- * and can't obviously be running in parallel.
7710
- *
7711
- * On RT kernels this also has to check whether there are
7712
- * pinned and scheduled out tasks on the runqueue. They
7713
- * need to leave the migrate disabled section first.
7053
+ * There's this thread running, bail when that's the only
7054
+ * remaining thread:
77147055 */
7715
- if (!rq->nr_running && !rq_has_pinned_tasks(rq) &&
7716
- rcuwait_active(&rq->hotplug_wait)) {
7717
- raw_spin_unlock(&rq->lock);
7718
- rcuwait_wake_up(&rq->hotplug_wait);
7719
- raw_spin_lock(&rq->lock);
7056
+ if (rq->nr_running == 1)
7057
+ break;
7058
+
7059
+ next = __pick_migrate_task(rq);
7060
+
7061
+ /*
7062
+ * Argh ... no iterator for tasks, we need to remove the
7063
+ * kthread from the run-queue to continue.
7064
+ */
7065
+ if (!force && is_per_cpu_kthread(next)) {
7066
+ INIT_LIST_HEAD(&next->percpu_kthread_node);
7067
+ list_add(&next->percpu_kthread_node, &percpu_kthreads);
7068
+
7069
+ /* DEQUEUE_SAVE not used due to move_entity in rt */
7070
+ deactivate_task(rq, next,
7071
+ DEQUEUE_NOCLOCK);
7072
+ continue;
77207073 }
7721
- return;
7074
+
7075
+ /*
7076
+ * Rules for changing task_struct::cpus_mask are holding
7077
+ * both pi_lock and rq->lock, such that holding either
7078
+ * stabilizes the mask.
7079
+ *
7080
+ * Drop rq->lock is not quite as disastrous as it usually is
7081
+ * because !cpu_active at this point, which means load-balance
7082
+ * will not interfere. Also, stop-machine.
7083
+ */
7084
+ rq_unlock(rq, rf);
7085
+ raw_spin_lock(&next->pi_lock);
7086
+ rq_relock(rq, rf);
7087
+
7088
+ /*
7089
+ * Since we're inside stop-machine, _nothing_ should have
7090
+ * changed the task, WARN if weird stuff happened, because in
7091
+ * that case the above rq->lock drop is a fail too.
7092
+ */
7093
+ if (task_rq(next) != rq || !task_on_rq_queued(next)) {
7094
+ /*
7095
+ * In the !force case, there is a hole between
7096
+ * rq_unlock() and rq_relock(), where another CPU might
7097
+ * not observe an up to date cpu_active_mask and try to
7098
+ * move tasks around.
7099
+ */
7100
+ WARN_ON(force);
7101
+ raw_spin_unlock(&next->pi_lock);
7102
+ continue;
7103
+ }
7104
+
7105
+ /* Find suitable destination for @next, with force if needed. */
7106
+ dest_cpu = select_fallback_rq(dead_rq->cpu, next);
7107
+ rq = __migrate_task(rq, rf, next, dest_cpu);
7108
+ if (rq != dead_rq) {
7109
+ rq_unlock(rq, rf);
7110
+ rq = dead_rq;
7111
+ *rf = orf;
7112
+ rq_relock(rq, rf);
7113
+ }
7114
+ raw_spin_unlock(&next->pi_lock);
77227115 }
77237116
7724
- get_task_struct(push_task);
7725
- /*
7726
- * Temporarily drop rq->lock such that we can wake-up the stop task.
7727
- * Both preemption and IRQs are still disabled.
7728
- */
7729
- raw_spin_unlock(&rq->lock);
7730
- stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task,
7731
- this_cpu_ptr(&push_work));
7732
- /*
7733
- * At this point need_resched() is true and we'll take the loop in
7734
- * schedule(). The next pick is obviously going to be the stop task
7735
- * which is_per_cpu_kthread() and will push this task away.
7736
- */
7737
- raw_spin_lock(&rq->lock);
7738
-}
7117
+ list_for_each_entry_safe(next, tmp, &percpu_kthreads,
7118
+ percpu_kthread_node) {
77397119
7740
-static void balance_push_set(int cpu, bool on)
7741
-{
7742
- struct rq *rq = cpu_rq(cpu);
7743
- struct rq_flags rf;
7120
+ /* ENQUEUE_RESTORE not used due to move_entity in rt */
7121
+ activate_task(rq, next, ENQUEUE_NOCLOCK);
7122
+ list_del(&next->percpu_kthread_node);
7123
+ }
77447124
7745
- rq_lock_irqsave(rq, &rf);
7746
- if (on)
7747
- rq->balance_flags |= BALANCE_PUSH;
7748
- else
7749
- rq->balance_flags &= ~BALANCE_PUSH;
7750
- rq_unlock_irqrestore(rq, &rf);
7751
-}
7752
-
7753
-/*
7754
- * Invoked from a CPUs hotplug control thread after the CPU has been marked
7755
- * inactive. All tasks which are not per CPU kernel threads are either
7756
- * pushed off this CPU now via balance_push() or placed on a different CPU
7757
- * during wakeup. Wait until the CPU is quiescent.
7758
- */
7759
-static void balance_hotplug_wait(void)
7760
-{
7761
- struct rq *rq = this_rq();
7762
-
7763
- rcuwait_wait_event(&rq->hotplug_wait,
7764
- rq->nr_running == 1 && !rq_has_pinned_tasks(rq),
7765
- TASK_UNINTERRUPTIBLE);
7125
+ rq->stop = stop;
77667126 }
77677127
77687128 static int drain_rq_cpu_stop(void *data)
77697129 {
7770
-#ifndef CONFIG_PREEMPT_RT
77717130 struct rq *rq = this_rq();
77727131 struct rq_flags rf;
77737132
77747133 rq_lock_irqsave(rq, &rf);
77757134 migrate_tasks(rq, &rf, false);
77767135 rq_unlock_irqrestore(rq, &rf);
7777
-#endif
7136
+
77787137 return 0;
77797138 }
77807139
....@@ -7799,21 +7158,6 @@
77997158 if (rq_drain->done)
78007159 cpu_stop_work_wait(rq_drain);
78017160 }
7802
-
7803
-#else
7804
-
7805
-static inline void balance_push(struct rq *rq)
7806
-{
7807
-}
7808
-
7809
-static inline void balance_push_set(int cpu, bool on)
7810
-{
7811
-}
7812
-
7813
-static inline void balance_hotplug_wait(void)
7814
-{
7815
-}
7816
-
78177161 #endif /* CONFIG_HOTPLUG_CPU */
78187162
78197163 void set_rq_online(struct rq *rq)
....@@ -7901,8 +7245,6 @@
79017245 struct rq *rq = cpu_rq(cpu);
79027246 struct rq_flags rf;
79037247
7904
- balance_push_set(cpu, false);
7905
-
79067248 #ifdef CONFIG_SCHED_SMT
79077249 /*
79087250 * When going up, increment the number of cores with SMT present.
....@@ -7956,21 +7298,9 @@
79567298
79577299 int _sched_cpu_deactivate(unsigned int cpu)
79587300 {
7959
- struct rq *rq = cpu_rq(cpu);
7960
- struct rq_flags rf;
79617301 int ret;
79627302
79637303 set_cpu_active(cpu, false);
7964
-
7965
- balance_push_set(cpu, true);
7966
-
7967
- rq_lock_irqsave(rq, &rf);
7968
- if (rq->rd) {
7969
- update_rq_clock(rq);
7970
- BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
7971
- set_rq_offline(rq);
7972
- }
7973
- rq_unlock_irqrestore(rq, &rf);
79747304
79757305 #ifdef CONFIG_SCHED_SMT
79767306 /*
....@@ -7985,7 +7315,6 @@
79857315
79867316 ret = cpuset_cpu_inactive(cpu);
79877317 if (ret) {
7988
- balance_push_set(cpu, false);
79897318 set_cpu_active(cpu, true);
79907319 return ret;
79917320 }
....@@ -8049,41 +7378,6 @@
80497378 }
80507379
80517380 #ifdef CONFIG_HOTPLUG_CPU
8052
-
8053
-/*
8054
- * Invoked immediately before the stopper thread is invoked to bring the
8055
- * CPU down completely. At this point all per CPU kthreads except the
8056
- * hotplug thread (current) and the stopper thread (inactive) have been
8057
- * either parked or have been unbound from the outgoing CPU. Ensure that
8058
- * any of those which might be on the way out are gone.
8059
- *
8060
- * If after this point a bound task is being woken on this CPU then the
8061
- * responsible hotplug callback has failed to do it's job.
8062
- * sched_cpu_dying() will catch it with the appropriate fireworks.
8063
- */
8064
-int sched_cpu_wait_empty(unsigned int cpu)
8065
-{
8066
- balance_hotplug_wait();
8067
- return 0;
8068
-}
8069
-
8070
-/*
8071
- * Since this CPU is going 'away' for a while, fold any nr_active delta we
8072
- * might have. Called from the CPU stopper task after ensuring that the
8073
- * stopper is the last running task on the CPU, so nr_active count is
8074
- * stable. We need to take the teardown thread which is calling this into
8075
- * account, so we hand in adjust = 1 to the load calculation.
8076
- *
8077
- * Also see the comment "Global load-average calculations".
8078
- */
8079
-static void calc_load_migrate(struct rq *rq)
8080
-{
8081
- long delta = calc_load_fold_active(rq, 1);
8082
-
8083
- if (delta)
8084
- atomic_long_add(delta, &calc_load_tasks);
8085
-}
8086
-
80877381 int sched_cpu_dying(unsigned int cpu)
80887382 {
80897383 struct rq *rq = cpu_rq(cpu);
....@@ -8093,7 +7387,12 @@
80937387 sched_tick_stop(cpu);
80947388
80957389 rq_lock_irqsave(rq, &rf);
8096
- BUG_ON(rq->nr_running != 1 || rq_has_pinned_tasks(rq));
7390
+ if (rq->rd) {
7391
+ BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
7392
+ set_rq_offline(rq);
7393
+ }
7394
+ migrate_tasks(rq, &rf, true);
7395
+ BUG_ON(rq->nr_running != 1);
80977396 rq_unlock_irqrestore(rq, &rf);
80987397
80997398 trace_android_rvh_sched_cpu_dying(cpu);
....@@ -8304,9 +7603,6 @@
83047603
83057604 rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func);
83067605 #endif
8307
-#ifdef CONFIG_HOTPLUG_CPU
8308
- rcuwait_init(&rq->hotplug_wait);
8309
-#endif
83107606 #endif /* CONFIG_SMP */
83117607 hrtick_rq_init(rq);
83127608 atomic_set(&rq->nr_iowait, 0);
....@@ -8347,7 +7643,7 @@
83477643 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
83487644 static inline int preempt_count_equals(int preempt_offset)
83497645 {
8350
- int nested = preempt_count() + sched_rcu_preempt_depth();
7646
+ int nested = preempt_count() + rcu_preempt_depth();
83517647
83527648 return (nested == preempt_offset);
83537649 }
....@@ -8447,39 +7743,6 @@
84477743 add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
84487744 }
84497745 EXPORT_SYMBOL_GPL(__cant_sleep);
8450
-
8451
-#ifdef CONFIG_SMP
8452
-void __cant_migrate(const char *file, int line)
8453
-{
8454
- static unsigned long prev_jiffy;
8455
-
8456
- if (irqs_disabled())
8457
- return;
8458
-
8459
- if (is_migration_disabled(current))
8460
- return;
8461
-
8462
- if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
8463
- return;
8464
-
8465
- if (preempt_count() > 0)
8466
- return;
8467
-
8468
- if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
8469
- return;
8470
- prev_jiffy = jiffies;
8471
-
8472
- pr_err("BUG: assuming non migratable context at %s:%d\n", file, line);
8473
- pr_err("in_atomic(): %d, irqs_disabled(): %d, migration_disabled() %u pid: %d, name: %s\n",
8474
- in_atomic(), irqs_disabled(), is_migration_disabled(current),
8475
- current->pid, current->comm);
8476
-
8477
- debug_show_held_locks(current);
8478
- dump_stack();
8479
- add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
8480
-}
8481
-EXPORT_SYMBOL_GPL(__cant_migrate);
8482
-#endif
84837746 #endif
84847747
84857748 #ifdef CONFIG_MAGIC_SYSRQ