~hc/RK356X_SDK_RELEASE.git

..	..	@@ -78,11 +78,7 @@
78	78	* Number of tasks to iterate in a single balance run.
79	79	* Limited because this is done with IRQs disabled.
80	80	*/
81		-#ifdef CONFIG_PREEMPT_RT
82		-const_debug unsigned int sysctl_sched_nr_migrate = 8;
83		-#else
84	81	const_debug unsigned int sysctl_sched_nr_migrate = 32;
85		-#endif
86	82
87	83	/*
88	84	* period over which we measure -rt task CPU usage in us.
..	..	@@ -531,15 +527,9 @@
531	527	#endif
532	528	#endif
533	529
534		-static bool __wake_q_add(struct wake_q_head head, struct task_struct task,
535		- bool sleeper)
	530	+static bool __wake_q_add(struct wake_q_head head, struct task_struct task)
536	531	{
537		- struct wake_q_node *node;
538		-
539		- if (sleeper)
540		- node = &task->wake_q_sleeper;
541		- else
542		- node = &task->wake_q;
	532	+ struct wake_q_node *node = &task->wake_q;
543	533
544	534	/*
545	535	* Atomically grab the task, if ->wake_q is !nil already it means
..	..	@@ -576,13 +566,7 @@
576	566	*/
577	567	void wake_q_add(struct wake_q_head head, struct task_struct task)
578	568	{
579		- if (__wake_q_add(head, task, false))
580		- get_task_struct(task);
581		-}
582		-
583		-void wake_q_add_sleeper(struct wake_q_head head, struct task_struct task)
584		-{
585		- if (__wake_q_add(head, task, true))
	569	+ if (__wake_q_add(head, task))
586	570	get_task_struct(task);
587	571	}
588	572
..	..	@@ -605,40 +589,29 @@
605	589	*/
606	590	void wake_q_add_safe(struct wake_q_head head, struct task_struct task)
607	591	{
608		- if (!__wake_q_add(head, task, false))
	592	+ if (!__wake_q_add(head, task))
609	593	put_task_struct(task);
610	594	}
611	595
612		-void __wake_up_q(struct wake_q_head *head, bool sleeper)
	596	+void wake_up_q(struct wake_q_head *head)
613	597	{
614	598	struct wake_q_node *node = head->first;
615	599
616	600	while (node != WAKE_Q_TAIL) {
617	601	struct task_struct *task;
618	602
619		- if (sleeper)
620		- task = container_of(node, struct task_struct, wake_q_sleeper);
621		- else
622		- task = container_of(node, struct task_struct, wake_q);
623		-
	603	+ task = container_of(node, struct task_struct, wake_q);
624	604	BUG_ON(!task);
625	605	/* Task can safely be re-inserted now: */
626	606	node = node->next;
	607	+ task->wake_q.next = NULL;
627	608	task->wake_q_count = head->count;
628		- if (sleeper)
629		- task->wake_q_sleeper.next = NULL;
630		- else
631		- task->wake_q.next = NULL;
632	609
633	610	/*
634	611	* wake_up_process() executes a full barrier, which pairs with
635	612	* the queueing in wake_q_add() so as not to miss wakeups.
636	613	*/
637		- if (sleeper)
638		- wake_up_lock_sleeper(task);
639		- else
640		- wake_up_process(task);
641		-
	614	+ wake_up_process(task);
642	615	task->wake_q_count = 0;
643	616	put_task_struct(task);
644	617	}
..	..	@@ -675,48 +648,6 @@
675	648	trace_sched_wake_idle_without_ipi(cpu);
676	649	}
677	650	EXPORT_SYMBOL_GPL(resched_curr);
678		-
679		-#ifdef CONFIG_PREEMPT_LAZY
680		-
681		-static int tsk_is_polling(struct task_struct *p)
682		-{
683		-#ifdef TIF_POLLING_NRFLAG
684		- return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
685		-#else
686		- return 0;
687		-#endif
688		-}
689		-
690		-void resched_curr_lazy(struct rq *rq)
691		-{
692		- struct task_struct *curr = rq->curr;
693		- int cpu;
694		-
695		- if (!sched_feat(PREEMPT_LAZY)) {
696		- resched_curr(rq);
697		- return;
698		- }
699		-
700		- lockdep_assert_held(&rq->lock);
701		-
702		- if (test_tsk_need_resched(curr))
703		- return;
704		-
705		- if (test_tsk_need_resched_lazy(curr))
706		- return;
707		-
708		- set_tsk_need_resched_lazy(curr);
709		-
710		- cpu = cpu_of(rq);
711		- if (cpu == smp_processor_id())
712		- return;
713		-
714		- /* NEED_RESCHED_LAZY must be visible before we test polling */
715		- smp_mb();
716		- if (!tsk_is_polling(curr))
717		- smp_send_reschedule(cpu);
718		-}
719		-#endif
720	651
721	652	void resched_cpu(int cpu)
722	653	{
..	..	@@ -1087,7 +1018,7 @@
1087	1018	if (!(rq->uclamp_flags & UCLAMP_FLAG_IDLE))
1088	1019	return;
1089	1020
1090		- WRITE_ONCE(rq->uclamp[clamp_id].value, clamp_value);
	1021	+ uclamp_rq_set(rq, clamp_id, clamp_value);
1091	1022	}
1092	1023
1093	1024	static inline
..	..	@@ -1280,8 +1211,8 @@
1280	1211	if (bucket->tasks == 1 \|\| uc_se->value > bucket->value)
1281	1212	bucket->value = uc_se->value;
1282	1213
1283		- if (uc_se->value > READ_ONCE(uc_rq->value))
1284		- WRITE_ONCE(uc_rq->value, uc_se->value);
	1214	+ if (uc_se->value > uclamp_rq_get(rq, clamp_id))
	1215	+ uclamp_rq_set(rq, clamp_id, uc_se->value);
1285	1216	}
1286	1217
1287	1218	/*
..	..	@@ -1347,7 +1278,7 @@
1347	1278	if (likely(bucket->tasks))
1348	1279	return;
1349	1280
1350		- rq_clamp = READ_ONCE(uc_rq->value);
	1281	+ rq_clamp = uclamp_rq_get(rq, clamp_id);
1351	1282	/*
1352	1283	* Defensive programming: this should never happen. If it happens,
1353	1284	* e.g. due to future modification, warn and fixup the expected value.
..	..	@@ -1355,7 +1286,7 @@
1355	1286	SCHED_WARN_ON(bucket->value > rq_clamp);
1356	1287	if (bucket->value >= rq_clamp) {
1357	1288	bkt_clamp = uclamp_rq_max_value(rq, clamp_id, uc_se->value);
1358		- WRITE_ONCE(uc_rq->value, bkt_clamp);
	1289	+ uclamp_rq_set(rq, clamp_id, bkt_clamp);
1359	1290	}
1360	1291	}
1361	1292
..	..	@@ -1761,6 +1692,9 @@
1761	1692
1762	1693	void activate_task(struct rq rq, struct task_struct p, int flags)
1763	1694	{
	1695	+ if (task_on_rq_migrating(p))
	1696	+ flags \|= ENQUEUE_MIGRATED;
	1697	+
1764	1698	enqueue_task(rq, p, flags);
1765	1699
1766	1700	p->on_rq = TASK_ON_RQ_QUEUED;
..	..	@@ -1870,82 +1804,6 @@
1870	1804
1871	1805	#ifdef CONFIG_SMP
1872	1806
1873		-static void
1874		-__do_set_cpus_allowed(struct task_struct p, const struct cpumask new_mask, u32 flags);
1875		-
1876		-static int __set_cpus_allowed_ptr(struct task_struct *p,
1877		- const struct cpumask *new_mask,
1878		- u32 flags);
1879		-
1880		-static void migrate_disable_switch(struct rq rq, struct task_struct p)
1881		-{
1882		- if (likely(!p->migration_disabled))
1883		- return;
1884		-
1885		- if (p->cpus_ptr != &p->cpus_mask)
1886		- return;
1887		-
1888		- /*
1889		- * Violates locking rules! see comment in __do_set_cpus_allowed().
1890		- */
1891		- __do_set_cpus_allowed(p, cpumask_of(rq->cpu), SCA_MIGRATE_DISABLE);
1892		-}
1893		-
1894		-void migrate_disable(void)
1895		-{
1896		- struct task_struct *p = current;
1897		-
1898		- if (p->migration_disabled) {
1899		- p->migration_disabled++;
1900		- return;
1901		- }
1902		-
1903		- trace_sched_migrate_disable_tp(p);
1904		-
1905		- preempt_disable();
1906		- this_rq()->nr_pinned++;
1907		- p->migration_disabled = 1;
1908		- preempt_lazy_disable();
1909		- preempt_enable();
1910		-}
1911		-EXPORT_SYMBOL_GPL(migrate_disable);
1912		-
1913		-void migrate_enable(void)
1914		-{
1915		- struct task_struct *p = current;
1916		-
1917		- if (p->migration_disabled > 1) {
1918		- p->migration_disabled--;
1919		- return;
1920		- }
1921		-
1922		- /*
1923		- * Ensure stop_task runs either before or after this, and that
1924		- * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
1925		- */
1926		- preempt_disable();
1927		- if (p->cpus_ptr != &p->cpus_mask)
1928		- __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE);
1929		- /*
1930		- * Mustn't clear migration_disabled() until cpus_ptr points back at the
1931		- * regular cpus_mask, otherwise things that race (eg.
1932		- * select_fallback_rq) get confused.
1933		- */
1934		- barrier();
1935		- p->migration_disabled = 0;
1936		- this_rq()->nr_pinned--;
1937		- preempt_lazy_enable();
1938		- preempt_enable();
1939		-
1940		- trace_sched_migrate_enable_tp(p);
1941		-}
1942		-EXPORT_SYMBOL_GPL(migrate_enable);
1943		-
1944		-static inline bool rq_has_pinned_tasks(struct rq *rq)
1945		-{
1946		- return rq->nr_pinned;
1947		-}
1948		-
1949	1807	/*
1950	1808	* Per-CPU kthreads are allowed to run on !active && online CPUs, see
1951	1809	* __set_cpus_allowed_ptr() and select_fallback_rq().
..	..	@@ -1955,7 +1813,7 @@
1955	1813	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
1956	1814	return false;
1957	1815
1958		- if (is_per_cpu_kthread(p) \|\| is_migration_disabled(p))
	1816	+ if (is_per_cpu_kthread(p))
1959	1817	return cpu_online(cpu);
1960	1818
1961	1819	if (!cpu_active(cpu))
..	..	@@ -2015,21 +1873,8 @@
2015	1873	}
2016	1874
2017	1875	struct migration_arg {
2018		- struct task_struct *task;
2019		- int dest_cpu;
2020		- struct set_affinity_pending *pending;
2021		-};
2022		-
2023		-/*
2024		- * @refs: number of wait_for_completion()
2025		- * @stop_pending: is @stop_work in use
2026		- */
2027		-struct set_affinity_pending {
2028		- refcount_t refs;
2029		- unsigned int stop_pending;
2030		- struct completion done;
2031		- struct cpu_stop_work stop_work;
2032		- struct migration_arg arg;
	1876	+ struct task_struct *task;
	1877	+ int dest_cpu;
2033	1878	};
2034	1879
2035	1880	/*
..	..	@@ -2062,17 +1907,15 @@
2062	1907	static int migration_cpu_stop(void *data)
2063	1908	{
2064	1909	struct migration_arg *arg = data;
2065		- struct set_affinity_pending *pending = arg->pending;
2066	1910	struct task_struct *p = arg->task;
2067	1911	struct rq *rq = this_rq();
2068		- bool complete = false;
2069	1912	struct rq_flags rf;
2070	1913
2071	1914	/*
2072	1915	* The original target CPU might have gone down and we might
2073	1916	* be on another CPU but it doesn't matter.
2074	1917	*/
2075		- local_irq_save(rf.flags);
	1918	+ local_irq_disable();
2076	1919	/*
2077	1920	* We need to explicitly wake pending tasks before running
2078	1921	* __migrate_task() such that we will not miss enforcing cpus_ptr
..	..	@@ -2082,121 +1925,21 @@
2082	1925
2083	1926	raw_spin_lock(&p->pi_lock);
2084	1927	rq_lock(rq, &rf);
2085		-
2086	1928	/*
2087	1929	* If task_rq(p) != rq, it cannot be migrated here, because we're
2088	1930	* holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
2089	1931	* we're holding p->pi_lock.
2090	1932	*/
2091	1933	if (task_rq(p) == rq) {
2092		- if (is_migration_disabled(p))
2093		- goto out;
2094		-
2095		- if (pending) {
2096		- if (p->migration_pending == pending)
2097		- p->migration_pending = NULL;
2098		- complete = true;
2099		-
2100		- if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask))
2101		- goto out;
2102		- }
2103		-
2104	1934	if (task_on_rq_queued(p))
2105	1935	rq = __migrate_task(rq, &rf, p, arg->dest_cpu);
2106	1936	else
2107	1937	p->wake_cpu = arg->dest_cpu;
2108		-
2109		- /*
2110		- * XXX __migrate_task() can fail, at which point we might end
2111		- * up running on a dodgy CPU, AFAICT this can only happen
2112		- * during CPU hotplug, at which point we'll get pushed out
2113		- * anyway, so it's probably not a big deal.
2114		- */
2115		-
2116		- } else if (pending) {
2117		- /*
2118		- * This happens when we get migrated between migrate_enable()'s
2119		- * preempt_enable() and scheduling the stopper task. At that
2120		- * point we're a regular task again and not current anymore.
2121		- *
2122		- * A !PREEMPT kernel has a giant hole here, which makes it far
2123		- * more likely.
2124		- */
2125		-
2126		- /*
2127		- * The task moved before the stopper got to run. We're holding
2128		- * ->pi_lock, so the allowed mask is stable - if it got
2129		- * somewhere allowed, we're done.
2130		- */
2131		- if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
2132		- if (p->migration_pending == pending)
2133		- p->migration_pending = NULL;
2134		- complete = true;
2135		- goto out;
2136		- }
2137		-
2138		- /*
2139		- * When migrate_enable() hits a rq mis-match we can't reliably
2140		- * determine is_migration_disabled() and so have to chase after
2141		- * it.
2142		- */
2143		- WARN_ON_ONCE(!pending->stop_pending);
2144		- task_rq_unlock(rq, p, &rf);
2145		- stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop,
2146		- &pending->arg, &pending->stop_work);
2147		- return 0;
2148	1938	}
2149		-out:
2150		- if (pending)
2151		- pending->stop_pending = false;
2152		- task_rq_unlock(rq, p, &rf);
	1939	+ rq_unlock(rq, &rf);
	1940	+ raw_spin_unlock(&p->pi_lock);
2153	1941
2154		- if (complete)
2155		- complete_all(&pending->done);
2156		-
2157		- return 0;
2158		-}
2159		-
2160		-int push_cpu_stop(void *arg)
2161		-{
2162		- struct rq lowest_rq = NULL, rq = this_rq();
2163		- struct task_struct *p = arg;
2164		-
2165		- raw_spin_lock_irq(&p->pi_lock);
2166		- raw_spin_lock(&rq->lock);
2167		-
2168		- if (task_rq(p) != rq)
2169		- goto out_unlock;
2170		-
2171		- if (is_migration_disabled(p)) {
2172		- p->migration_flags \|= MDF_PUSH;
2173		- goto out_unlock;
2174		- }
2175		-
2176		- p->migration_flags &= ~MDF_PUSH;
2177		-
2178		- if (p->sched_class->find_lock_rq)
2179		- lowest_rq = p->sched_class->find_lock_rq(p, rq);
2180		-
2181		- if (!lowest_rq)
2182		- goto out_unlock;
2183		-
2184		- // XXX validate p is still the highest prio task
2185		- if (task_rq(p) == rq) {
2186		- deactivate_task(rq, p, 0);
2187		- set_task_cpu(p, lowest_rq->cpu);
2188		- activate_task(lowest_rq, p, 0);
2189		- resched_curr(lowest_rq);
2190		- }
2191		-
2192		- double_unlock_balance(rq, lowest_rq);
2193		-
2194		-out_unlock:
2195		- rq->push_busy = false;
2196		- raw_spin_unlock(&rq->lock);
2197		- raw_spin_unlock_irq(&p->pi_lock);
2198		-
2199		- put_task_struct(p);
	1942	+ local_irq_enable();
2200	1943	return 0;
2201	1944	}
2202	1945
..	..	@@ -2204,40 +1947,19 @@
2204	1947	* sched_class::set_cpus_allowed must do the below, but is not required to
2205	1948	* actually call this function.
2206	1949	*/
2207		-void set_cpus_allowed_common(struct task_struct p, const struct cpumask new_mask, u32 flags)
	1950	+void set_cpus_allowed_common(struct task_struct p, const struct cpumask new_mask)
2208	1951	{
2209		- if (flags & (SCA_MIGRATE_ENABLE \| SCA_MIGRATE_DISABLE)) {
2210		- p->cpus_ptr = new_mask;
2211		- return;
2212		- }
2213		-
2214	1952	cpumask_copy(&p->cpus_mask, new_mask);
2215	1953	p->nr_cpus_allowed = cpumask_weight(new_mask);
2216	1954	trace_android_rvh_set_cpus_allowed_comm(p, new_mask);
2217	1955	}
2218	1956
2219		-static void
2220		-__do_set_cpus_allowed(struct task_struct p, const struct cpumask new_mask, u32 flags)
	1957	+void do_set_cpus_allowed(struct task_struct p, const struct cpumask new_mask)
2221	1958	{
2222	1959	struct rq *rq = task_rq(p);
2223	1960	bool queued, running;
2224	1961
2225		- /*
2226		- * This here violates the locking rules for affinity, since we're only
2227		- * supposed to change these variables while holding both rq->lock and
2228		- * p->pi_lock.
2229		- *
2230		- * HOWEVER, it magically works, because ttwu() is the only code that
2231		- * accesses these variables under p->pi_lock and only does so after
2232		- * smp_cond_load_acquire(&p->on_cpu, !VAL), and we're in __schedule()
2233		- * before finish_task().
2234		- *
2235		- * XXX do further audits, this smells like something putrid.
2236		- */
2237		- if (flags & SCA_MIGRATE_DISABLE)
2238		- SCHED_WARN_ON(!p->on_cpu);
2239		- else
2240		- lockdep_assert_held(&p->pi_lock);
	1962	+ lockdep_assert_held(&p->pi_lock);
2241	1963
2242	1964	queued = task_on_rq_queued(p);
2243	1965	running = task_current(rq, p);
..	..	@@ -2253,7 +1975,7 @@
2253	1975	if (running)
2254	1976	put_prev_task(rq, p);
2255	1977
2256		- p->sched_class->set_cpus_allowed(p, new_mask, flags);
	1978	+ p->sched_class->set_cpus_allowed(p, new_mask);
2257	1979
2258	1980	if (queued)
2259	1981	enqueue_task(rq, p, ENQUEUE_RESTORE \| ENQUEUE_NOCLOCK);
..	..	@@ -2261,14 +1983,12 @@
2261	1983	set_next_task(rq, p);
2262	1984	}
2263	1985
2264		-static int affine_move_task(struct rq rq, struct task_struct p, struct rq_flags *rf,
2265		- int dest_cpu, unsigned int flags);
2266	1986	/*
2267	1987	* Called with both p->pi_lock and rq->lock held; drops both before returning.
2268	1988	*/
2269	1989	static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
2270	1990	const struct cpumask *new_mask,
2271		- u32 flags,
	1991	+ bool check,
2272	1992	struct rq *rq,
2273	1993	struct rq_flags *rf)
2274	1994	{
..	..	@@ -2279,14 +1999,9 @@
2279	1999
2280	2000	update_rq_clock(rq);
2281	2001
2282		- if (p->flags & PF_KTHREAD \|\| is_migration_disabled(p)) {
	2002	+ if (p->flags & PF_KTHREAD) {
2283	2003	/*
2284		- * Kernel threads are allowed on online && !active CPUs.
2285		- *
2286		- * Specifically, migration_disabled() tasks must not fail the
2287		- * cpumask_any_and_distribute() pick below, esp. so on
2288		- * SCA_MIGRATE_ENABLE, otherwise we'll not call
2289		- * set_cpus_allowed_common() and actually reset p->cpus_ptr.
	2004	+ * Kernel threads are allowed on online && !active CPUs
2290	2005	*/
2291	2006	cpu_valid_mask = cpu_online_mask;
2292	2007	} else if (!cpumask_subset(new_mask, cpu_allowed_mask)) {
..	..	@@ -2298,22 +2013,13 @@
2298	2013	* Must re-check here, to close a race against __kthread_bind(),
2299	2014	* sched_setaffinity() is not guaranteed to observe the flag.
2300	2015	*/
2301		- if ((flags & SCA_CHECK) && (p->flags & PF_NO_SETAFFINITY)) {
	2016	+ if (check && (p->flags & PF_NO_SETAFFINITY)) {
2302	2017	ret = -EINVAL;
2303	2018	goto out;
2304	2019	}
2305	2020
2306		- if (!(flags & SCA_MIGRATE_ENABLE)) {
2307		- if (cpumask_equal(&p->cpus_mask, new_mask))
2308		- goto out;
2309		-
2310		- if (WARN_ON_ONCE(p == current &&
2311		- is_migration_disabled(p) &&
2312		- !cpumask_test_cpu(task_cpu(p), new_mask))) {
2313		- ret = -EBUSY;
2314		- goto out;
2315		- }
2316		- }
	2021	+ if (cpumask_equal(&p->cpus_mask, new_mask))
	2022	+ goto out;
2317	2023
2318	2024	/*
2319	2025	* Picking a ~random cpu helps in cases where we are changing affinity
..	..	@@ -2326,7 +2032,7 @@
2326	2032	goto out;
2327	2033	}
2328	2034
2329		- __do_set_cpus_allowed(p, new_mask, flags);
	2035	+ do_set_cpus_allowed(p, new_mask);
2330	2036
2331	2037	if (p->flags & PF_KTHREAD) {
2332	2038	/*
..	..	@@ -2338,227 +2044,27 @@
2338	2044	p->nr_cpus_allowed != 1);
2339	2045	}
2340	2046
2341		- return affine_move_task(rq, p, rf, dest_cpu, flags);
	2047	+ /* Can the task run on the task's current CPU? If so, we're done */
	2048	+ if (cpumask_test_cpu(task_cpu(p), new_mask))
	2049	+ goto out;
	2050	+
	2051	+ if (task_running(rq, p) \|\| p->state == TASK_WAKING) {
	2052	+ struct migration_arg arg = { p, dest_cpu };
	2053	+ /* Need help from migration thread: drop lock and wait. */
	2054	+ task_rq_unlock(rq, p, rf);
	2055	+ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
	2056	+ return 0;
	2057	+ } else if (task_on_rq_queued(p)) {
	2058	+ /*
	2059	+ * OK, since we're going to drop the lock immediately
	2060	+ * afterwards anyway.
	2061	+ */
	2062	+ rq = move_queued_task(rq, rf, p, dest_cpu);
	2063	+ }
2342	2064	out:
2343	2065	task_rq_unlock(rq, p, rf);
2344	2066
2345	2067	return ret;
2346		-}
2347		-
2348		-void do_set_cpus_allowed(struct task_struct p, const struct cpumask new_mask)
2349		-{
2350		- __do_set_cpus_allowed(p, new_mask, 0);
2351		-}
2352		-
2353		-/*
2354		- * This function is wildly self concurrent; here be dragons.
2355		- *
2356		- *
2357		- * When given a valid mask, __set_cpus_allowed_ptr() must block until the
2358		- * designated task is enqueued on an allowed CPU. If that task is currently
2359		- * running, we have to kick it out using the CPU stopper.
2360		- *
2361		- * Migrate-Disable comes along and tramples all over our nice sandcastle.
2362		- * Consider:
2363		- *
2364		- * Initial conditions: P0->cpus_mask = [0, 1]
2365		- *
2366		- * P0@CPU0 P1
2367		- *
2368		- * migrate_disable();
2369		- * <preempted>
2370		- * set_cpus_allowed_ptr(P0, [1]);
2371		- *
2372		- * P1 cannot return from this set_cpus_allowed_ptr() call until P0 executes
2373		- * its outermost migrate_enable() (i.e. it exits its Migrate-Disable region).
2374		- * This means we need the following scheme:
2375		- *
2376		- * P0@CPU0 P1
2377		- *
2378		- * migrate_disable();
2379		- * <preempted>
2380		- * set_cpus_allowed_ptr(P0, [1]);
2381		- * <blocks>
2382		- * <resumes>
2383		- * migrate_enable();
2384		- * __set_cpus_allowed_ptr();
2385		- * <wakes local stopper>
2386		- * `--> <woken on migration completion>
2387		- *
2388		- * Now the fun stuff: there may be several P1-like tasks, i.e. multiple
2389		- * concurrent set_cpus_allowed_ptr(P0, [*]) calls. CPU affinity changes of any
2390		- * task p are serialized by p->pi_lock, which we can leverage: the one that
2391		- * should come into effect at the end of the Migrate-Disable region is the last
2392		- * one. This means we only need to track a single cpumask (i.e. p->cpus_mask),
2393		- * but we still need to properly signal those waiting tasks at the appropriate
2394		- * moment.
2395		- *
2396		- * This is implemented using struct set_affinity_pending. The first
2397		- * __set_cpus_allowed_ptr() caller within a given Migrate-Disable region will
2398		- * setup an instance of that struct and install it on the targeted task_struct.
2399		- * Any and all further callers will reuse that instance. Those then wait for
2400		- * a completion signaled at the tail of the CPU stopper callback (1), triggered
2401		- * on the end of the Migrate-Disable region (i.e. outermost migrate_enable()).
2402		- *
2403		- *
2404		- * (1) In the cases covered above. There is one more where the completion is
2405		- * signaled within affine_move_task() itself: when a subsequent affinity request
2406		- * cancels the need for an active migration. Consider:
2407		- *
2408		- * Initial conditions: P0->cpus_mask = [0, 1]
2409		- *
2410		- * P0@CPU0 P1 P2
2411		- *
2412		- * migrate_disable();
2413		- * <preempted>
2414		- * set_cpus_allowed_ptr(P0, [1]);
2415		- * <blocks>
2416		- * set_cpus_allowed_ptr(P0, [0, 1]);
2417		- * <signal completion>
2418		- * <awakes>
2419		- *
2420		- * Note that the above is safe vs a concurrent migrate_enable(), as any
2421		- * pending affinity completion is preceded an uninstallion of
2422		- * p->migration_pending done with p->pi_lock held.
2423		- */
2424		-static int affine_move_task(struct rq rq, struct task_struct p, struct rq_flags *rf,
2425		- int dest_cpu, unsigned int flags)
2426		-{
2427		- struct set_affinity_pending my_pending = { }, *pending = NULL;
2428		- bool stop_pending, complete = false;
2429		-
2430		- /* Can the task run on the task's current CPU? If so, we're done */
2431		- if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
2432		- struct task_struct *push_task = NULL;
2433		-
2434		- if ((flags & SCA_MIGRATE_ENABLE) &&
2435		- (p->migration_flags & MDF_PUSH) && !rq->push_busy) {
2436		- rq->push_busy = true;
2437		- push_task = get_task_struct(p);
2438		- }
2439		-
2440		- /*
2441		- * If there are pending waiters, but no pending stop_work,
2442		- * then complete now.
2443		- */
2444		- pending = p->migration_pending;
2445		- if (pending && !pending->stop_pending) {
2446		- p->migration_pending = NULL;
2447		- complete = true;
2448		- }
2449		-
2450		- task_rq_unlock(rq, p, rf);
2451		-
2452		- if (push_task) {
2453		- stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
2454		- p, &rq->push_work);
2455		- }
2456		-
2457		- if (complete)
2458		- complete_all(&pending->done);
2459		-
2460		- return 0;
2461		- }
2462		-
2463		- if (!(flags & SCA_MIGRATE_ENABLE)) {
2464		- /* serialized by p->pi_lock */
2465		- if (!p->migration_pending) {
2466		- /* Install the request */
2467		- refcount_set(&my_pending.refs, 1);
2468		- init_completion(&my_pending.done);
2469		- my_pending.arg = (struct migration_arg) {
2470		- .task = p,
2471		- .dest_cpu = dest_cpu,
2472		- .pending = &my_pending,
2473		- };
2474		-
2475		- p->migration_pending = &my_pending;
2476		- } else {
2477		- pending = p->migration_pending;
2478		- refcount_inc(&pending->refs);
2479		- /*
2480		- * Affinity has changed, but we've already installed a
2481		- * pending. migration_cpu_stop() must see this, else
2482		- * we risk a completion of the pending despite having a
2483		- * task on a disallowed CPU.
2484		- *
2485		- * Serialized by p->pi_lock, so this is safe.
2486		- */
2487		- pending->arg.dest_cpu = dest_cpu;
2488		- }
2489		- }
2490		- pending = p->migration_pending;
2491		- /*
2492		- * - !MIGRATE_ENABLE:
2493		- * we'll have installed a pending if there wasn't one already.
2494		- *
2495		- * - MIGRATE_ENABLE:
2496		- * we're here because the current CPU isn't matching anymore,
2497		- * the only way that can happen is because of a concurrent
2498		- * set_cpus_allowed_ptr() call, which should then still be
2499		- * pending completion.
2500		- *
2501		- * Either way, we really should have a @pending here.
2502		- */
2503		- if (WARN_ON_ONCE(!pending)) {
2504		- task_rq_unlock(rq, p, rf);
2505		- return -EINVAL;
2506		- }
2507		-
2508		- if (task_running(rq, p) \|\| p->state == TASK_WAKING) {
2509		- /*
2510		- * MIGRATE_ENABLE gets here because 'p == current', but for
2511		- * anything else we cannot do is_migration_disabled(), punt
2512		- * and have the stopper function handle it all race-free.
2513		- */
2514		- stop_pending = pending->stop_pending;
2515		- if (!stop_pending)
2516		- pending->stop_pending = true;
2517		-
2518		- if (flags & SCA_MIGRATE_ENABLE)
2519		- p->migration_flags &= ~MDF_PUSH;
2520		-
2521		- task_rq_unlock(rq, p, rf);
2522		-
2523		- if (!stop_pending) {
2524		- stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
2525		- &pending->arg, &pending->stop_work);
2526		- }
2527		-
2528		- if (flags & SCA_MIGRATE_ENABLE)
2529		- return 0;
2530		- } else {
2531		-
2532		- if (!is_migration_disabled(p)) {
2533		- if (task_on_rq_queued(p))
2534		- rq = move_queued_task(rq, rf, p, dest_cpu);
2535		-
2536		- if (!pending->stop_pending) {
2537		- p->migration_pending = NULL;
2538		- complete = true;
2539		- }
2540		- }
2541		- task_rq_unlock(rq, p, rf);
2542		-
2543		- if (complete)
2544		- complete_all(&pending->done);
2545		- }
2546		-
2547		- wait_for_completion(&pending->done);
2548		-
2549		- if (refcount_dec_and_test(&pending->refs))
2550		- wake_up_var(&pending->refs); /* No UaF, just an address */
2551		-
2552		- /*
2553		- * Block the original owner of &pending until all subsequent callers
2554		- * have seen the completion and decremented the refcount
2555		- */
2556		- wait_var_event(&my_pending.refs, !refcount_read(&my_pending.refs));
2557		-
2558		- /* ARGH */
2559		- WARN_ON_ONCE(my_pending.stop_pending);
2560		-
2561		- return 0;
2562	2068	}
2563	2069
2564	2070	/*
..	..	@@ -2571,19 +2077,18 @@
2571	2077	* call is not atomic; no spinlocks may be held.
2572	2078	*/
2573	2079	static int __set_cpus_allowed_ptr(struct task_struct *p,
2574		- const struct cpumask *new_mask,
2575		- u32 flags)
	2080	+ const struct cpumask *new_mask, bool check)
2576	2081	{
2577	2082	struct rq_flags rf;
2578	2083	struct rq *rq;
2579	2084
2580	2085	rq = task_rq_lock(p, &rf);
2581		- return __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, &rf);
	2086	+ return __set_cpus_allowed_ptr_locked(p, new_mask, check, rq, &rf);
2582	2087	}
2583	2088
2584	2089	int set_cpus_allowed_ptr(struct task_struct p, const struct cpumask new_mask)
2585	2090	{
2586		- return __set_cpus_allowed_ptr(p, new_mask, 0);
	2091	+ return __set_cpus_allowed_ptr(p, new_mask, false);
2587	2092	}
2588	2093	EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
2589	2094
..	..	@@ -2692,8 +2197,6 @@
2692	2197	* Clearly, migrating tasks to offline CPUs is a fairly daft thing.
2693	2198	*/
2694	2199	WARN_ON_ONCE(!cpu_online(new_cpu));
2695		-
2696		- WARN_ON_ONCE(is_migration_disabled(p));
2697	2200	#endif
2698	2201
2699	2202	trace_sched_migrate_task(p, new_cpu);
..	..	@@ -2827,18 +2330,6 @@
2827	2330	}
2828	2331	EXPORT_SYMBOL_GPL(migrate_swap);
2829	2332
2830		-static bool check_task_state(struct task_struct *p, long match_state)
2831		-{
2832		- bool match = false;
2833		-
2834		- raw_spin_lock_irq(&p->pi_lock);
2835		- if (p->state == match_state \|\| p->saved_state == match_state)
2836		- match = true;
2837		- raw_spin_unlock_irq(&p->pi_lock);
2838		-
2839		- return match;
2840		-}
2841		-
2842	2333	/*
2843	2334	* wait_task_inactive - wait for a thread to unschedule.
2844	2335	*
..	..	@@ -2883,7 +2374,7 @@
2883	2374	* is actually now running somewhere else!
2884	2375	*/
2885	2376	while (task_running(rq, p)) {
2886		- if (match_state && !check_task_state(p, match_state))
	2377	+ if (match_state && unlikely(p->state != match_state))
2887	2378	return 0;
2888	2379	cpu_relax();
2889	2380	}
..	..	@@ -2898,8 +2389,7 @@
2898	2389	running = task_running(rq, p);
2899	2390	queued = task_on_rq_queued(p);
2900	2391	ncsw = 0;
2901		- if (!match_state \|\| p->state == match_state \|\|
2902		- p->saved_state == match_state)
	2392	+ if (!match_state \|\| p->state == match_state)
2903	2393	ncsw = p->nvcsw \| LONG_MIN; /* sets MSB */
2904	2394	task_rq_unlock(rq, p, &rf);
2905	2395
..	..	@@ -2933,7 +2423,7 @@
2933	2423	ktime_t to = NSEC_PER_SEC / HZ;
2934	2424
2935	2425	set_current_state(TASK_UNINTERRUPTIBLE);
2936		- schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
	2426	+ schedule_hrtimeout(&to, HRTIMER_MODE_REL);
2937	2427	continue;
2938	2428	}
2939	2429
..	..	@@ -3040,12 +2530,6 @@
3040	2530	}
3041	2531	fallthrough;
3042	2532	case possible:
3043		- /*
3044		- * XXX When called from select_task_rq() we only
3045		- * hold p->pi_lock and again violate locking order.
3046		- *
3047		- * More yuck to audit.
3048		- */
3049	2533	do_set_cpus_allowed(p, task_cpu_possible_mask(p));
3050	2534	state = fail;
3051	2535	break;
..	..	@@ -3079,7 +2563,7 @@
3079	2563	{
3080	2564	lockdep_assert_held(&p->pi_lock);
3081	2565
3082		- if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p))
	2566	+ if (p->nr_cpus_allowed > 1)
3083	2567	cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
3084	2568	else
3085	2569	cpu = cpumask_any(p->cpus_ptr);
..	..	@@ -3102,7 +2586,6 @@
3102	2586
3103	2587	void sched_set_stop_task(int cpu, struct task_struct *stop)
3104	2588	{
3105		- static struct lock_class_key stop_pi_lock;
3106	2589	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
3107	2590	struct task_struct *old_stop = cpu_rq(cpu)->stop;
3108	2591
..	..	@@ -3118,20 +2601,6 @@
3118	2601	sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);
3119	2602
3120	2603	stop->sched_class = &stop_sched_class;
3121		-
3122		- /*
3123		- * The PI code calls rt_mutex_setprio() with ->pi_lock held to
3124		- * adjust the effective priority of a task. As a result,
3125		- * rt_mutex_setprio() can trigger (RT) balancing operations,
3126		- * which can then trigger wakeups of the stop thread to push
3127		- * around the current task.
3128		- *
3129		- * The stop task itself will never be part of the PI-chain, it
3130		- * never blocks, therefore that ->pi_lock recursion is safe.
3131		- * Tell lockdep about this by placing the stop->pi_lock in its
3132		- * own class.
3133		- */
3134		- lockdep_set_class(&stop->pi_lock, &stop_pi_lock);
3135	2604	}
3136	2605
3137	2606	cpu_rq(cpu)->stop = stop;
..	..	@@ -3145,23 +2614,15 @@
3145	2614	}
3146	2615	}
3147	2616
3148		-#else /* CONFIG_SMP */
	2617	+#else
3149	2618
3150	2619	static inline int __set_cpus_allowed_ptr(struct task_struct *p,
3151		- const struct cpumask *new_mask,
3152		- u32 flags)
	2620	+ const struct cpumask *new_mask, bool check)
3153	2621	{
3154	2622	return set_cpus_allowed_ptr(p, new_mask);
3155	2623	}
3156	2624
3157		-static inline void migrate_disable_switch(struct rq rq, struct task_struct p) { }
3158		-
3159		-static inline bool rq_has_pinned_tasks(struct rq *rq)
3160		-{
3161		- return false;
3162		-}
3163		-
3164		-#endif /* !CONFIG_SMP */
	2625	+#endif /* CONFIG_SMP */
3165	2626
3166	2627	static void
3167	2628	ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
..	..	@@ -3595,7 +3056,7 @@
3595	3056	int cpu, success = 0;
3596	3057
3597	3058	preempt_disable();
3598		- if (!IS_ENABLED(CONFIG_PREEMPT_RT) && p == current) {
	3059	+ if (p == current) {
3599	3060	/*
3600	3061	* We're waking current, this means 'p->on_rq' and 'task_cpu(p)
3601	3062	* == smp_processor_id()'. Together this means we can special
..	..	@@ -3625,26 +3086,8 @@
3625	3086	*/
3626	3087	raw_spin_lock_irqsave(&p->pi_lock, flags);
3627	3088	smp_mb__after_spinlock();
3628		- if (!(p->state & state)) {
3629		- /*
3630		- * The task might be running due to a spinlock sleeper
3631		- * wakeup. Check the saved state and set it to running
3632		- * if the wakeup condition is true.
3633		- */
3634		- if (!(wake_flags & WF_LOCK_SLEEPER)) {
3635		- if (p->saved_state & state) {
3636		- p->saved_state = TASK_RUNNING;
3637		- success = 1;
3638		- }
3639		- }
	3089	+ if (!(p->state & state))
3640	3090	goto unlock;
3641		- }
3642		- /*
3643		- * If this is a regular wakeup, then we can unconditionally
3644		- * clear the saved state of a "lock sleeper".
3645		- */
3646		- if (!(wake_flags & WF_LOCK_SLEEPER))
3647		- p->saved_state = TASK_RUNNING;
3648	3091
3649	3092	#ifdef CONFIG_FREEZER
3650	3093	/*
..	..	@@ -3853,18 +3296,6 @@
3853	3296	}
3854	3297	EXPORT_SYMBOL(wake_up_process);
3855	3298
3856		-/**
3857		- * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
3858		- * @p: The process to be woken up.
3859		- *
3860		- * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
3861		- * the nature of the wakeup.
3862		- */
3863		-int wake_up_lock_sleeper(struct task_struct *p)
3864		-{
3865		- return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER);
3866		-}
3867		-
3868	3299	int wake_up_state(struct task_struct *p, unsigned int state)
3869	3300	{
3870	3301	return try_to_wake_up(p, state, 0);
..	..	@@ -3920,7 +3351,6 @@
3920	3351	init_numa_balancing(clone_flags, p);
3921	3352	#ifdef CONFIG_SMP
3922	3353	p->wake_entry.u_flags = CSD_TYPE_TTWU;
3923		- p->migration_pending = NULL;
3924	3354	#endif
3925	3355	}
3926	3356
..	..	@@ -4099,9 +3529,6 @@
4099	3529	p->on_cpu = 0;
4100	3530	#endif
4101	3531	init_task_preempt_count(p);
4102		-#ifdef CONFIG_HAVE_PREEMPT_LAZY
4103		- task_thread_info(p)->preempt_lazy_count = 0;
4104		-#endif
4105	3532	#ifdef CONFIG_SMP
4106	3533	plist_node_init(&p->pushable_tasks, MAX_PRIO);
4107	3534	RB_CLEAR_NODE(&p->pushable_dl_tasks);
..	..	@@ -4329,90 +3756,6 @@
4329	3756	#endif
4330	3757	}
4331	3758
4332		-#ifdef CONFIG_SMP
4333		-
4334		-static void do_balance_callbacks(struct rq rq, struct callback_head head)
4335		-{
4336		- void (func)(struct rq rq);
4337		- struct callback_head *next;
4338		-
4339		- lockdep_assert_held(&rq->lock);
4340		-
4341		- while (head) {
4342		- func = (void ()(struct rq ))head->func;
4343		- next = head->next;
4344		- head->next = NULL;
4345		- head = next;
4346		-
4347		- func(rq);
4348		- }
4349		-}
4350		-
4351		-static inline struct callback_head splice_balance_callbacks(struct rq rq)
4352		-{
4353		- struct callback_head *head = rq->balance_callback;
4354		-
4355		- lockdep_assert_held(&rq->lock);
4356		- if (head) {
4357		- rq->balance_callback = NULL;
4358		- rq->balance_flags &= ~BALANCE_WORK;
4359		- }
4360		-
4361		- return head;
4362		-}
4363		-
4364		-static void __balance_callbacks(struct rq *rq)
4365		-{
4366		- do_balance_callbacks(rq, splice_balance_callbacks(rq));
4367		-}
4368		-
4369		-static inline void balance_callbacks(struct rq rq, struct callback_head head)
4370		-{
4371		- unsigned long flags;
4372		-
4373		- if (unlikely(head)) {
4374		- raw_spin_lock_irqsave(&rq->lock, flags);
4375		- do_balance_callbacks(rq, head);
4376		- raw_spin_unlock_irqrestore(&rq->lock, flags);
4377		- }
4378		-}
4379		-
4380		-static void balance_push(struct rq *rq);
4381		-
4382		-static inline void balance_switch(struct rq *rq)
4383		-{
4384		- if (likely(!rq->balance_flags))
4385		- return;
4386		-
4387		- if (rq->balance_flags & BALANCE_PUSH) {
4388		- balance_push(rq);
4389		- return;
4390		- }
4391		-
4392		- __balance_callbacks(rq);
4393		-}
4394		-
4395		-#else
4396		-
4397		-static inline void __balance_callbacks(struct rq *rq)
4398		-{
4399		-}
4400		-
4401		-static inline struct callback_head splice_balance_callbacks(struct rq rq)
4402		-{
4403		- return NULL;
4404		-}
4405		-
4406		-static inline void balance_callbacks(struct rq rq, struct callback_head head)
4407		-{
4408		-}
4409		-
4410		-static inline void balance_switch(struct rq *rq)
4411		-{
4412		-}
4413		-
4414		-#endif
4415		-
4416	3759	static inline void
4417	3760	prepare_lock_switch(struct rq rq, struct task_struct next, struct rq_flags *rf)
4418	3761	{
..	..	@@ -4438,7 +3781,6 @@
4438	3781	* prev into current:
4439	3782	*/
4440	3783	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
4441		- balance_switch(rq);
4442	3784	raw_spin_unlock_irq(&rq->lock);
4443	3785	}
4444	3786
..	..	@@ -4453,22 +3795,6 @@
4453	3795	#ifndef finish_arch_post_lock_switch
4454	3796	# define finish_arch_post_lock_switch() do { } while (0)
4455	3797	#endif
4456		-
4457		-static inline void kmap_local_sched_out(void)
4458		-{
4459		-#ifdef CONFIG_KMAP_LOCAL
4460		- if (unlikely(current->kmap_ctrl.idx))
4461		- __kmap_local_sched_out();
4462		-#endif
4463		-}
4464		-
4465		-static inline void kmap_local_sched_in(void)
4466		-{
4467		-#ifdef CONFIG_KMAP_LOCAL
4468		- if (unlikely(current->kmap_ctrl.idx))
4469		- __kmap_local_sched_in();
4470		-#endif
4471		-}
4472	3798
4473	3799	/**
4474	3800	* prepare_task_switch - prepare to switch tasks
..	..	@@ -4492,7 +3818,6 @@
4492	3818	perf_event_task_sched_out(prev, next);
4493	3819	rseq_preempt(prev);
4494	3820	fire_sched_out_preempt_notifiers(prev, next);
4495		- kmap_local_sched_out();
4496	3821	prepare_task(next);
4497	3822	prepare_arch_switch(next);
4498	3823	}
..	..	@@ -4559,7 +3884,6 @@
4559	3884	finish_lock_switch(rq);
4560	3885	finish_arch_post_lock_switch();
4561	3886	kcov_finish_switch(current);
4562		- kmap_local_sched_in();
4563	3887
4564	3888	fire_sched_in_preempt_notifiers(current);
4565	3889	/*
..	..	@@ -4574,17 +3898,23 @@
4574	3898	* provided by mmdrop(),
4575	3899	* - a sync_core for SYNC_CORE.
4576	3900	*/
4577		- /*
4578		- * We use mmdrop_delayed() here so we don't have to do the
4579		- * full __mmdrop() when we are the last user.
4580		- */
4581	3901	if (mm) {
4582	3902	membarrier_mm_sync_core_before_usermode(mm);
4583		- mmdrop_delayed(mm);
	3903	+ mmdrop(mm);
4584	3904	}
4585	3905	if (unlikely(prev_state == TASK_DEAD)) {
4586	3906	if (prev->sched_class->task_dead)
4587	3907	prev->sched_class->task_dead(prev);
	3908	+
	3909	+ /*
	3910	+ * Remove function-return probe instances associated with this
	3911	+ * task and put them back on the free list.
	3912	+ */
	3913	+ kprobe_flush_task(prev);
	3914	+ trace_android_rvh_flush_task(prev);
	3915	+
	3916	+ /* Task is done with its stack. */
	3917	+ put_task_stack(prev);
4588	3918
4589	3919	put_task_struct_rcu_user(prev);
4590	3920	}
..	..	@@ -4592,6 +3922,43 @@
4592	3922	tick_nohz_task_switch();
4593	3923	return rq;
4594	3924	}
	3925	+
	3926	+#ifdef CONFIG_SMP
	3927	+
	3928	+/* rq->lock is NOT held, but preemption is disabled */
	3929	+static void __balance_callback(struct rq *rq)
	3930	+{
	3931	+ struct callback_head head, next;
	3932	+ void (func)(struct rq rq);
	3933	+ unsigned long flags;
	3934	+
	3935	+ raw_spin_lock_irqsave(&rq->lock, flags);
	3936	+ head = rq->balance_callback;
	3937	+ rq->balance_callback = NULL;
	3938	+ while (head) {
	3939	+ func = (void ()(struct rq ))head->func;
	3940	+ next = head->next;
	3941	+ head->next = NULL;
	3942	+ head = next;
	3943	+
	3944	+ func(rq);
	3945	+ }
	3946	+ raw_spin_unlock_irqrestore(&rq->lock, flags);
	3947	+}
	3948	+
	3949	+static inline void balance_callback(struct rq *rq)
	3950	+{
	3951	+ if (unlikely(rq->balance_callback))
	3952	+ __balance_callback(rq);
	3953	+}
	3954	+
	3955	+#else
	3956	+
	3957	+static inline void balance_callback(struct rq *rq)
	3958	+{
	3959	+}
	3960	+
	3961	+#endif
4595	3962
4596	3963	/**
4597	3964	* schedule_tail - first thing a freshly forked thread must call.
..	..	@@ -4612,6 +3979,7 @@
4612	3979	*/
4613	3980
4614	3981	rq = finish_task_switch(prev);
	3982	+ balance_callback(rq);
4615	3983	preempt_enable();
4616	3984
4617	3985	if (current->set_child_tid)
..	..	@@ -5170,8 +4538,7 @@
5170	4538	pr_err("Preemption disabled at:");
5171	4539	print_ip_sym(KERN_ERR, preempt_disable_ip);
5172	4540	}
5173		- if (panic_on_warn)
5174		- panic("scheduling while atomic\n");
	4541	+ check_panic_on_warn("scheduling while atomic");
5175	4542
5176	4543	trace_android_rvh_schedule_bug(prev);
5177	4544
..	..	@@ -5317,7 +4684,7 @@
5317	4684	*
5318	4685	* WARNING: must be called with preemption disabled!
5319	4686	*/
5320		-static void __sched notrace __schedule(bool preempt, bool spinning_lock)
	4687	+static void __sched notrace __schedule(bool preempt)
5321	4688	{
5322	4689	struct task_struct prev, next;
5323	4690	unsigned long *switch_count;
..	..	@@ -5370,7 +4737,7 @@
5370	4737	* - ptrace_{,un}freeze_traced() can change ->state underneath us.
5371	4738	*/
5372	4739	prev_state = prev->state;
5373		- if ((!preempt \|\| spinning_lock) && prev_state) {
	4740	+ if (!preempt && prev_state) {
5374	4741	if (signal_pending_state(prev_state, prev)) {
5375	4742	prev->state = TASK_RUNNING;
5376	4743	} else {
..	..	@@ -5405,7 +4772,6 @@
5405	4772
5406	4773	next = pick_next_task(rq, prev, &rf);
5407	4774	clear_tsk_need_resched(prev);
5408		- clear_tsk_need_resched_lazy(prev);
5409	4775	clear_preempt_need_resched();
5410	4776
5411	4777	trace_android_rvh_schedule(prev, next, rq);
..	..	@@ -5432,7 +4798,6 @@
5432	4798	*/
5433	4799	++*switch_count;
5434	4800
5435		- migrate_disable_switch(rq, prev);
5436	4801	psi_sched_switch(prev, next, !task_on_rq_queued(prev));
5437	4802
5438	4803	trace_sched_switch(preempt, prev, next);
..	..	@@ -5441,11 +4806,10 @@
5441	4806	rq = context_switch(rq, prev, next, &rf);
5442	4807	} else {
5443	4808	rq->clock_update_flags &= ~(RQCF_ACT_SKIP\|RQCF_REQ_SKIP);
5444		-
5445		- rq_unpin_lock(rq, &rf);
5446		- __balance_callbacks(rq);
5447		- raw_spin_unlock_irq(&rq->lock);
	4809	+ rq_unlock_irq(rq, &rf);
5448	4810	}
	4811	+
	4812	+ balance_callback(rq);
5449	4813	}
5450	4814
5451	4815	void __noreturn do_task_dead(void)
..	..	@@ -5456,7 +4820,7 @@
5456	4820	/* Tell freezer to ignore us: */
5457	4821	current->flags \|= PF_NOFREEZE;
5458	4822
5459		- __schedule(false, false);
	4823	+ __schedule(false);
5460	4824	BUG();
5461	4825
5462	4826	/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
..	..	@@ -5489,6 +4853,9 @@
5489	4853	preempt_enable_no_resched();
5490	4854	}
5491	4855
	4856	+ if (tsk_is_pi_blocked(tsk))
	4857	+ return;
	4858	+
5492	4859	/*
5493	4860	* If we are going to sleep and we have plugged IO queued,
5494	4861	* make sure to submit it to avoid deadlocks.
..	..	@@ -5514,7 +4881,7 @@
5514	4881	sched_submit_work(tsk);
5515	4882	do {
5516	4883	preempt_disable();
5517		- __schedule(false, false);
	4884	+ __schedule(false);
5518	4885	sched_preempt_enable_no_resched();
5519	4886	} while (need_resched());
5520	4887	sched_update_worker(tsk);
..	..	@@ -5542,7 +4909,7 @@
5542	4909	*/
5543	4910	WARN_ON_ONCE(current->state);
5544	4911	do {
5545		- __schedule(false, false);
	4912	+ __schedule(false);
5546	4913	} while (need_resched());
5547	4914	}
5548	4915
..	..	@@ -5595,7 +4962,7 @@
5595	4962	*/
5596	4963	preempt_disable_notrace();
5597	4964	preempt_latency_start(1);
5598		- __schedule(true, false);
	4965	+ __schedule(true);
5599	4966	preempt_latency_stop(1);
5600	4967	preempt_enable_no_resched_notrace();
5601	4968
..	..	@@ -5605,30 +4972,6 @@
5605	4972	*/
5606	4973	} while (need_resched());
5607	4974	}
5608		-
5609		-#ifdef CONFIG_PREEMPT_LAZY
5610		-/*
5611		- * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
5612		- * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
5613		- * preempt_lazy_count counter >0.
5614		- */
5615		-static __always_inline int preemptible_lazy(void)
5616		-{
5617		- if (test_thread_flag(TIF_NEED_RESCHED))
5618		- return 1;
5619		- if (current_thread_info()->preempt_lazy_count)
5620		- return 0;
5621		- return 1;
5622		-}
5623		-
5624		-#else
5625		-
5626		-static inline int preemptible_lazy(void)
5627		-{
5628		- return 1;
5629		-}
5630		-
5631		-#endif
5632	4975
5633	4976	#ifdef CONFIG_PREEMPTION
5634	4977	/*
..	..	@@ -5643,25 +4986,11 @@
5643	4986	*/
5644	4987	if (likely(!preemptible()))
5645	4988	return;
5646		- if (!preemptible_lazy())
5647		- return;
	4989	+
5648	4990	preempt_schedule_common();
5649	4991	}
5650	4992	NOKPROBE_SYMBOL(preempt_schedule);
5651	4993	EXPORT_SYMBOL(preempt_schedule);
5652		-
5653		-#ifdef CONFIG_PREEMPT_RT
5654		-void __sched notrace preempt_schedule_lock(void)
5655		-{
5656		- do {
5657		- preempt_disable();
5658		- __schedule(true, true);
5659		- sched_preempt_enable_no_resched();
5660		- } while (need_resched());
5661		-}
5662		-NOKPROBE_SYMBOL(preempt_schedule_lock);
5663		-EXPORT_SYMBOL(preempt_schedule_lock);
5664		-#endif
5665	4994
5666	4995	/**
5667	4996	* preempt_schedule_notrace - preempt_schedule called by tracing
..	..	@@ -5682,9 +5011,6 @@
5682	5011	enum ctx_state prev_ctx;
5683	5012
5684	5013	if (likely(!preemptible()))
5685		- return;
5686		-
5687		- if (!preemptible_lazy())
5688	5014	return;
5689	5015
5690	5016	do {
..	..	@@ -5709,7 +5035,7 @@
5709	5035	* an infinite recursion.
5710	5036	*/
5711	5037	prev_ctx = exception_enter();
5712		- __schedule(true, false);
	5038	+ __schedule(true);
5713	5039	exception_exit(prev_ctx);
5714	5040
5715	5041	preempt_latency_stop(1);
..	..	@@ -5738,7 +5064,7 @@
5738	5064	do {
5739	5065	preempt_disable();
5740	5066	local_irq_enable();
5741		- __schedule(true, false);
	5067	+ __schedule(true);
5742	5068	local_irq_disable();
5743	5069	sched_preempt_enable_no_resched();
5744	5070	} while (need_resched());
..	..	@@ -5905,11 +5231,9 @@
5905	5231	out_unlock:
5906	5232	/* Avoid rq from going away on us: */
5907	5233	preempt_disable();
	5234	+ __task_rq_unlock(rq, &rf);
5908	5235
5909		- rq_unpin_lock(rq, &rf);
5910		- __balance_callbacks(rq);
5911		- raw_spin_unlock(&rq->lock);
5912		-
	5236	+ balance_callback(rq);
5913	5237	preempt_enable();
5914	5238	}
5915	5239	#else
..	..	@@ -6154,11 +5478,11 @@
6154	5478	int oldpolicy = -1, policy = attr->sched_policy;
6155	5479	int retval, oldprio, newprio, queued, running;
6156	5480	const struct sched_class *prev_class;
6157		- struct callback_head *head;
6158	5481	struct rq_flags rf;
6159	5482	int reset_on_fork;
6160	5483	int queue_flags = DEQUEUE_SAVE \| DEQUEUE_MOVE \| DEQUEUE_NOCLOCK;
6161	5484	struct rq *rq;
	5485	+ bool cpuset_locked = false;
6162	5486
6163	5487	/* The pi code expects interrupts enabled */
6164	5488	BUG_ON(pi && in_interrupt());
..	..	@@ -6261,6 +5585,15 @@
6261	5585	}
6262	5586
6263	5587	/*
	5588	+ * SCHED_DEADLINE bandwidth accounting relies on stable cpusets
	5589	+ * information.
	5590	+ */
	5591	+ if (dl_policy(policy) \|\| dl_policy(p->policy)) {
	5592	+ cpuset_locked = true;
	5593	+ cpuset_lock();
	5594	+ }
	5595	+
	5596	+ /*
6264	5597	* Make sure no PI-waiters arrive (or leave) while we are
6265	5598	* changing the priority of the task:
6266	5599	*
..	..	@@ -6334,6 +5667,8 @@
6334	5667	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
6335	5668	policy = oldpolicy = -1;
6336	5669	task_rq_unlock(rq, p, &rf);
	5670	+ if (cpuset_locked)
	5671	+ cpuset_unlock();
6337	5672	goto recheck;
6338	5673	}
6339	5674
..	..	@@ -6397,20 +5732,24 @@
6397	5732
6398	5733	/* Avoid rq from going away on us: */
6399	5734	preempt_disable();
6400		- head = splice_balance_callbacks(rq);
6401	5735	task_rq_unlock(rq, p, &rf);
6402	5736
6403		- if (pi)
	5737	+ if (pi) {
	5738	+ if (cpuset_locked)
	5739	+ cpuset_unlock();
6404	5740	rt_mutex_adjust_pi(p);
	5741	+ }
6405	5742
6406	5743	/* Run balance callbacks after we've adjusted the PI chain: */
6407		- balance_callbacks(rq, head);
	5744	+ balance_callback(rq);
6408	5745	preempt_enable();
6409	5746
6410	5747	return 0;
6411	5748
6412	5749	unlock:
6413	5750	task_rq_unlock(rq, p, &rf);
	5751	+ if (cpuset_locked)
	5752	+ cpuset_unlock();
6414	5753	return retval;
6415	5754	}
6416	5755
..	..	@@ -6916,7 +6255,7 @@
6916	6255	}
6917	6256	#endif
6918	6257	again:
6919		- retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK);
	6258	+ retval = __set_cpus_allowed_ptr(p, new_mask, true);
6920	6259
6921	6260	if (!retval) {
6922	6261	cpuset_cpus_allowed(p, cpus_allowed);
..	..	@@ -7024,14 +6363,14 @@
7024	6363	if (len & (sizeof(unsigned long)-1))
7025	6364	return -EINVAL;
7026	6365
7027		- if (!alloc_cpumask_var(&mask, GFP_KERNEL))
	6366	+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
7028	6367	return -ENOMEM;
7029	6368
7030	6369	ret = sched_getaffinity(pid, mask);
7031	6370	if (ret == 0) {
7032	6371	unsigned int retlen = min(len, cpumask_size());
7033	6372
7034		- if (copy_to_user(user_mask_ptr, mask, retlen))
	6373	+ if (copy_to_user(user_mask_ptr, cpumask_bits(mask), retlen))
7035	6374	ret = -EFAULT;
7036	6375	else
7037	6376	ret = retlen;
..	..	@@ -7498,7 +6837,7 @@
7498	6837	*
7499	6838	* And since this is boot we can forgo the serialization.
7500	6839	*/
7501		- set_cpus_allowed_common(idle, cpumask_of(cpu), 0);
	6840	+ set_cpus_allowed_common(idle, cpumask_of(cpu));
7502	6841	#endif
7503	6842	/*
7504	6843	* We're having a chicken and egg problem, even though we are
..	..	@@ -7525,9 +6864,7 @@
7525	6864
7526	6865	/* Set the preempt count _outside_ the spinlocks! */
7527	6866	init_idle_preempt_count(idle, cpu);
7528		-#ifdef CONFIG_HAVE_PREEMPT_LAZY
7529		- task_thread_info(idle)->preempt_lazy_count = 0;
7530		-#endif
	6867	+
7531	6868	/*
7532	6869	* The idle tasks have their own, simple scheduling class:
7533	6870	*/
..	..	@@ -7554,8 +6891,7 @@
7554	6891	return ret;
7555	6892	}
7556	6893
7557		-int task_can_attach(struct task_struct *p,
7558		- const struct cpumask *cs_effective_cpus)
	6894	+int task_can_attach(struct task_struct *p)
7559	6895	{
7560	6896	int ret = 0;
7561	6897
..	..	@@ -7568,21 +6904,9 @@
7568	6904	* success of set_cpus_allowed_ptr() on all attached tasks
7569	6905	* before cpus_mask may be changed.
7570	6906	*/
7571		- if (p->flags & PF_NO_SETAFFINITY) {
	6907	+ if (p->flags & PF_NO_SETAFFINITY)
7572	6908	ret = -EINVAL;
7573		- goto out;
7574		- }
7575	6909
7576		- if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
7577		- cs_effective_cpus)) {
7578		- int cpu = cpumask_any_and(cpu_active_mask, cs_effective_cpus);
7579		-
7580		- if (unlikely(cpu >= nr_cpu_ids))
7581		- return -EINVAL;
7582		- ret = dl_cpu_busy(cpu, p);
7583		- }
7584		-
7585		-out:
7586	6910	return ret;
7587	6911	}
7588	6912
..	..	@@ -7637,7 +6961,6 @@
7637	6961	#endif /* CONFIG_NUMA_BALANCING */
7638	6962
7639	6963	#ifdef CONFIG_HOTPLUG_CPU
7640		-
7641	6964	/*
7642	6965	* Ensure that the idle task is using init_mm right before its CPU goes
7643	6966	* offline.
..	..	@@ -7657,124 +6980,166 @@
7657	6980	/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
7658	6981	}
7659	6982
7660		-static int __balance_push_cpu_stop(void *arg)
	6983	+/*
	6984	+ * Since this CPU is going 'away' for a while, fold any nr_active delta
	6985	+ * we might have. Assumes we're called after migrate_tasks() so that the
	6986	+ * nr_active count is stable. We need to take the teardown thread which
	6987	+ * is calling this into account, so we hand in adjust = 1 to the load
	6988	+ * calculation.
	6989	+ *
	6990	+ * Also see the comment "Global load-average calculations".
	6991	+ */
	6992	+static void calc_load_migrate(struct rq *rq)
7661	6993	{
7662		- struct task_struct *p = arg;
7663		- struct rq *rq = this_rq();
7664		- struct rq_flags rf;
7665		- int cpu;
	6994	+ long delta = calc_load_fold_active(rq, 1);
	6995	+ if (delta)
	6996	+ atomic_long_add(delta, &calc_load_tasks);
	6997	+}
7666	6998
7667		- raw_spin_lock_irq(&p->pi_lock);
7668		- rq_lock(rq, &rf);
	6999	+static struct task_struct __pick_migrate_task(struct rq rq)
	7000	+{
	7001	+ const struct sched_class *class;
	7002	+ struct task_struct *next;
7669	7003
	7004	+ for_each_class(class) {
	7005	+ next = class->pick_next_task(rq);
	7006	+ if (next) {
	7007	+ next->sched_class->put_prev_task(rq, next);
	7008	+ return next;
	7009	+ }
	7010	+ }
	7011	+
	7012	+ /* The idle class should always have a runnable task */
	7013	+ BUG();
	7014	+}
	7015	+
	7016	+/*
	7017	+ * Migrate all tasks from the rq, sleeping tasks will be migrated by
	7018	+ * try_to_wake_up()->select_task_rq().
	7019	+ *
	7020	+ * Called with rq->lock held even though we'er in stop_machine() and
	7021	+ * there's no concurrency possible, we hold the required locks anyway
	7022	+ * because of lock validation efforts.
	7023	+ *
	7024	+ * force: if false, the function will skip CPU pinned kthreads.
	7025	+ */
	7026	+static void migrate_tasks(struct rq dead_rq, struct rq_flags rf, bool force)
	7027	+{
	7028	+ struct rq *rq = dead_rq;
	7029	+ struct task_struct next, tmp, *stop = rq->stop;
	7030	+ LIST_HEAD(percpu_kthreads);
	7031	+ struct rq_flags orf = *rf;
	7032	+ int dest_cpu;
	7033	+
	7034	+ /*
	7035	+ * Fudge the rq selection such that the below task selection loop
	7036	+ * doesn't get stuck on the currently eligible stop task.
	7037	+ *
	7038	+ * We're currently inside stop_machine() and the rq is either stuck
	7039	+ * in the stop_machine_cpu_stop() loop, or we're executing this code,
	7040	+ * either way we should never end up calling schedule() until we're
	7041	+ * done here.
	7042	+ */
	7043	+ rq->stop = NULL;
	7044	+
	7045	+ /*
	7046	+ * put_prev_task() and pick_next_task() sched
	7047	+ * class method both need to have an up-to-date
	7048	+ * value of rq->clock[_task]
	7049	+ */
7670	7050	update_rq_clock(rq);
7671	7051
7672		- if (task_rq(p) == rq && task_on_rq_queued(p)) {
7673		- cpu = select_fallback_rq(rq->cpu, p);
7674		- rq = __migrate_task(rq, &rf, p, cpu);
7675		- }
	7052	+#ifdef CONFIG_SCHED_DEBUG
	7053	+ /* note the clock update in orf */
	7054	+ orf.clock_update_flags \|= RQCF_UPDATED;
	7055	+#endif
7676	7056
7677		- rq_unlock(rq, &rf);
7678		- raw_spin_unlock_irq(&p->pi_lock);
7679		-
7680		- put_task_struct(p);
7681		-
7682		- return 0;
7683		-}
7684		-
7685		-static DEFINE_PER_CPU(struct cpu_stop_work, push_work);
7686		-
7687		-/*
7688		- * Ensure we only run per-cpu kthreads once the CPU goes !active.
7689		- */
7690		-
7691		-
7692		-static void balance_push(struct rq *rq)
7693		-{
7694		- struct task_struct *push_task = rq->curr;
7695		-
7696		- lockdep_assert_held(&rq->lock);
7697		- SCHED_WARN_ON(rq->cpu != smp_processor_id());
7698		-
7699		- /*
7700		- * Both the cpu-hotplug and stop task are in this case and are
7701		- * required to complete the hotplug process.
7702		- */
7703		- if (is_per_cpu_kthread(push_task) \|\| is_migration_disabled(push_task)) {
	7057	+ for (;;) {
7704	7058	/*
7705		- * If this is the idle task on the outgoing CPU try to wake
7706		- * up the hotplug control thread which might wait for the
7707		- * last task to vanish. The rcuwait_active() check is
7708		- * accurate here because the waiter is pinned on this CPU
7709		- * and can't obviously be running in parallel.
7710		- *
7711		- * On RT kernels this also has to check whether there are
7712		- * pinned and scheduled out tasks on the runqueue. They
7713		- * need to leave the migrate disabled section first.
	7059	+ * There's this thread running, bail when that's the only
	7060	+ * remaining thread:
7714	7061	*/
7715		- if (!rq->nr_running && !rq_has_pinned_tasks(rq) &&
7716		- rcuwait_active(&rq->hotplug_wait)) {
7717		- raw_spin_unlock(&rq->lock);
7718		- rcuwait_wake_up(&rq->hotplug_wait);
7719		- raw_spin_lock(&rq->lock);
	7062	+ if (rq->nr_running == 1)
	7063	+ break;
	7064	+
	7065	+ next = __pick_migrate_task(rq);
	7066	+
	7067	+ /*
	7068	+ * Argh ... no iterator for tasks, we need to remove the
	7069	+ * kthread from the run-queue to continue.
	7070	+ */
	7071	+ if (!force && is_per_cpu_kthread(next)) {
	7072	+ INIT_LIST_HEAD(&next->percpu_kthread_node);
	7073	+ list_add(&next->percpu_kthread_node, &percpu_kthreads);
	7074	+
	7075	+ /* DEQUEUE_SAVE not used due to move_entity in rt */
	7076	+ deactivate_task(rq, next,
	7077	+ DEQUEUE_NOCLOCK);
	7078	+ continue;
7720	7079	}
7721		- return;
	7080	+
	7081	+ /*
	7082	+ * Rules for changing task_struct::cpus_mask are holding
	7083	+ * both pi_lock and rq->lock, such that holding either
	7084	+ * stabilizes the mask.
	7085	+ *
	7086	+ * Drop rq->lock is not quite as disastrous as it usually is
	7087	+ * because !cpu_active at this point, which means load-balance
	7088	+ * will not interfere. Also, stop-machine.
	7089	+ */
	7090	+ rq_unlock(rq, rf);
	7091	+ raw_spin_lock(&next->pi_lock);
	7092	+ rq_relock(rq, rf);
	7093	+
	7094	+ /*
	7095	+ * Since we're inside stop-machine, _nothing_ should have
	7096	+ * changed the task, WARN if weird stuff happened, because in
	7097	+ * that case the above rq->lock drop is a fail too.
	7098	+ */
	7099	+ if (task_rq(next) != rq \|\| !task_on_rq_queued(next)) {
	7100	+ /*
	7101	+ * In the !force case, there is a hole between
	7102	+ * rq_unlock() and rq_relock(), where another CPU might
	7103	+ * not observe an up to date cpu_active_mask and try to
	7104	+ * move tasks around.
	7105	+ */
	7106	+ WARN_ON(force);
	7107	+ raw_spin_unlock(&next->pi_lock);
	7108	+ continue;
	7109	+ }
	7110	+
	7111	+ /* Find suitable destination for @next, with force if needed. */
	7112	+ dest_cpu = select_fallback_rq(dead_rq->cpu, next);
	7113	+ rq = __migrate_task(rq, rf, next, dest_cpu);
	7114	+ if (rq != dead_rq) {
	7115	+ rq_unlock(rq, rf);
	7116	+ rq = dead_rq;
	7117	+ *rf = orf;
	7118	+ rq_relock(rq, rf);
	7119	+ }
	7120	+ raw_spin_unlock(&next->pi_lock);
7722	7121	}
7723	7122
7724		- get_task_struct(push_task);
7725		- /*
7726		- * Temporarily drop rq->lock such that we can wake-up the stop task.
7727		- * Both preemption and IRQs are still disabled.
7728		- */
7729		- raw_spin_unlock(&rq->lock);
7730		- stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task,
7731		- this_cpu_ptr(&push_work));
7732		- /*
7733		- * At this point need_resched() is true and we'll take the loop in
7734		- * schedule(). The next pick is obviously going to be the stop task
7735		- * which is_per_cpu_kthread() and will push this task away.
7736		- */
7737		- raw_spin_lock(&rq->lock);
7738		-}
	7123	+ list_for_each_entry_safe(next, tmp, &percpu_kthreads,
	7124	+ percpu_kthread_node) {
7739	7125
7740		-static void balance_push_set(int cpu, bool on)
7741		-{
7742		- struct rq *rq = cpu_rq(cpu);
7743		- struct rq_flags rf;
	7126	+ /* ENQUEUE_RESTORE not used due to move_entity in rt */
	7127	+ activate_task(rq, next, ENQUEUE_NOCLOCK);
	7128	+ list_del(&next->percpu_kthread_node);
	7129	+ }
7744	7130
7745		- rq_lock_irqsave(rq, &rf);
7746		- if (on)
7747		- rq->balance_flags \|= BALANCE_PUSH;
7748		- else
7749		- rq->balance_flags &= ~BALANCE_PUSH;
7750		- rq_unlock_irqrestore(rq, &rf);
7751		-}
7752		-
7753		-/*
7754		- * Invoked from a CPUs hotplug control thread after the CPU has been marked
7755		- * inactive. All tasks which are not per CPU kernel threads are either
7756		- * pushed off this CPU now via balance_push() or placed on a different CPU
7757		- * during wakeup. Wait until the CPU is quiescent.
7758		- */
7759		-static void balance_hotplug_wait(void)
7760		-{
7761		- struct rq *rq = this_rq();
7762		-
7763		- rcuwait_wait_event(&rq->hotplug_wait,
7764		- rq->nr_running == 1 && !rq_has_pinned_tasks(rq),
7765		- TASK_UNINTERRUPTIBLE);
	7131	+ rq->stop = stop;
7766	7132	}
7767	7133
7768	7134	static int drain_rq_cpu_stop(void *data)
7769	7135	{
7770		-#ifndef CONFIG_PREEMPT_RT
7771	7136	struct rq *rq = this_rq();
7772	7137	struct rq_flags rf;
7773	7138
7774	7139	rq_lock_irqsave(rq, &rf);
7775	7140	migrate_tasks(rq, &rf, false);
7776	7141	rq_unlock_irqrestore(rq, &rf);
7777		-#endif
	7142	+
7778	7143	return 0;
7779	7144	}
7780	7145
..	..	@@ -7799,21 +7164,6 @@
7799	7164	if (rq_drain->done)
7800	7165	cpu_stop_work_wait(rq_drain);
7801	7166	}
7802		-
7803		-#else
7804		-
7805		-static inline void balance_push(struct rq *rq)
7806		-{
7807		-}
7808		-
7809		-static inline void balance_push_set(int cpu, bool on)
7810		-{
7811		-}
7812		-
7813		-static inline void balance_hotplug_wait(void)
7814		-{
7815		-}
7816		-
7817	7167	#endif /* CONFIG_HOTPLUG_CPU */
7818	7168
7819	7169	void set_rq_online(struct rq *rq)
..	..	@@ -7884,7 +7234,7 @@
7884	7234	static int cpuset_cpu_inactive(unsigned int cpu)
7885	7235	{
7886	7236	if (!cpuhp_tasks_frozen) {
7887		- int ret = dl_cpu_busy(cpu, NULL);
	7237	+ int ret = dl_bw_check_overflow(cpu);
7888	7238
7889	7239	if (ret)
7890	7240	return ret;
..	..	@@ -7900,8 +7250,6 @@
7900	7250	{
7901	7251	struct rq *rq = cpu_rq(cpu);
7902	7252	struct rq_flags rf;
7903		-
7904		- balance_push_set(cpu, false);
7905	7253
7906	7254	#ifdef CONFIG_SCHED_SMT
7907	7255	/*
..	..	@@ -7956,21 +7304,9 @@
7956	7304
7957	7305	int _sched_cpu_deactivate(unsigned int cpu)
7958	7306	{
7959		- struct rq *rq = cpu_rq(cpu);
7960		- struct rq_flags rf;
7961	7307	int ret;
7962	7308
7963	7309	set_cpu_active(cpu, false);
7964		-
7965		- balance_push_set(cpu, true);
7966		-
7967		- rq_lock_irqsave(rq, &rf);
7968		- if (rq->rd) {
7969		- update_rq_clock(rq);
7970		- BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
7971		- set_rq_offline(rq);
7972		- }
7973		- rq_unlock_irqrestore(rq, &rf);
7974	7310
7975	7311	#ifdef CONFIG_SCHED_SMT
7976	7312	/*
..	..	@@ -7985,7 +7321,6 @@
7985	7321
7986	7322	ret = cpuset_cpu_inactive(cpu);
7987	7323	if (ret) {
7988		- balance_push_set(cpu, false);
7989	7324	set_cpu_active(cpu, true);
7990	7325	return ret;
7991	7326	}
..	..	@@ -8049,41 +7384,6 @@
8049	7384	}
8050	7385
8051	7386	#ifdef CONFIG_HOTPLUG_CPU
8052		-
8053		-/*
8054		- * Invoked immediately before the stopper thread is invoked to bring the
8055		- * CPU down completely. At this point all per CPU kthreads except the
8056		- * hotplug thread (current) and the stopper thread (inactive) have been
8057		- * either parked or have been unbound from the outgoing CPU. Ensure that
8058		- * any of those which might be on the way out are gone.
8059		- *
8060		- * If after this point a bound task is being woken on this CPU then the
8061		- * responsible hotplug callback has failed to do it's job.
8062		- * sched_cpu_dying() will catch it with the appropriate fireworks.
8063		- */
8064		-int sched_cpu_wait_empty(unsigned int cpu)
8065		-{
8066		- balance_hotplug_wait();
8067		- return 0;
8068		-}
8069		-
8070		-/*
8071		- * Since this CPU is going 'away' for a while, fold any nr_active delta we
8072		- * might have. Called from the CPU stopper task after ensuring that the
8073		- * stopper is the last running task on the CPU, so nr_active count is
8074		- * stable. We need to take the teardown thread which is calling this into
8075		- * account, so we hand in adjust = 1 to the load calculation.
8076		- *
8077		- * Also see the comment "Global load-average calculations".
8078		- */
8079		-static void calc_load_migrate(struct rq *rq)
8080		-{
8081		- long delta = calc_load_fold_active(rq, 1);
8082		-
8083		- if (delta)
8084		- atomic_long_add(delta, &calc_load_tasks);
8085		-}
8086		-
8087	7387	int sched_cpu_dying(unsigned int cpu)
8088	7388	{
8089	7389	struct rq *rq = cpu_rq(cpu);
..	..	@@ -8093,7 +7393,12 @@
8093	7393	sched_tick_stop(cpu);
8094	7394
8095	7395	rq_lock_irqsave(rq, &rf);
8096		- BUG_ON(rq->nr_running != 1 \|\| rq_has_pinned_tasks(rq));
	7396	+ if (rq->rd) {
	7397	+ BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
	7398	+ set_rq_offline(rq);
	7399	+ }
	7400	+ migrate_tasks(rq, &rf, true);
	7401	+ BUG_ON(rq->nr_running != 1);
8097	7402	rq_unlock_irqrestore(rq, &rf);
8098	7403
8099	7404	trace_android_rvh_sched_cpu_dying(cpu);
..	..	@@ -8304,9 +7609,6 @@
8304	7609
8305	7610	rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func);
8306	7611	#endif
8307		-#ifdef CONFIG_HOTPLUG_CPU
8308		- rcuwait_init(&rq->hotplug_wait);
8309		-#endif
8310	7612	#endif /* CONFIG_SMP */
8311	7613	hrtick_rq_init(rq);
8312	7614	atomic_set(&rq->nr_iowait, 0);
..	..	@@ -8347,7 +7649,7 @@
8347	7649	#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
8348	7650	static inline int preempt_count_equals(int preempt_offset)
8349	7651	{
8350		- int nested = preempt_count() + sched_rcu_preempt_depth();
	7652	+ int nested = preempt_count() + rcu_preempt_depth();
8351	7653
8352	7654	return (nested == preempt_offset);
8353	7655	}
..	..	@@ -8447,39 +7749,6 @@
8447	7749	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
8448	7750	}
8449	7751	EXPORT_SYMBOL_GPL(__cant_sleep);
8450		-
8451		-#ifdef CONFIG_SMP
8452		-void __cant_migrate(const char *file, int line)
8453		-{
8454		- static unsigned long prev_jiffy;
8455		-
8456		- if (irqs_disabled())
8457		- return;
8458		-
8459		- if (is_migration_disabled(current))
8460		- return;
8461		-
8462		- if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
8463		- return;
8464		-
8465		- if (preempt_count() > 0)
8466		- return;
8467		-
8468		- if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
8469		- return;
8470		- prev_jiffy = jiffies;
8471		-
8472		- pr_err("BUG: assuming non migratable context at %s:%d\n", file, line);
8473		- pr_err("in_atomic(): %d, irqs_disabled(): %d, migration_disabled() %u pid: %d, name: %s\n",
8474		- in_atomic(), irqs_disabled(), is_migration_disabled(current),
8475		- current->pid, current->comm);
8476		-
8477		- debug_show_held_locks(current);
8478		- dump_stack();
8479		- add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
8480		-}
8481		-EXPORT_SYMBOL_GPL(__cant_migrate);
8482		-#endif
8483	7752	#endif
8484	7753
8485	7754	#ifdef CONFIG_MAGIC_SYSRQ