~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,3 +1,4 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/*
2	3	* kernel/sched/core.c
3	4	*
..	..	@@ -5,6 +6,10 @@
5	6	*
6	7	* Copyright (C) 1991-2002 Linus Torvalds
7	8	*/
	9	+#define CREATE_TRACE_POINTS
	10	+#include <trace/events/sched.h>
	11	+#undef CREATE_TRACE_POINTS
	12	+
8	13	#include "sched.h"
9	14
10	15	#include <linux/nospec.h>
..	..	@@ -16,14 +21,41 @@
16	21	#include <asm/tlb.h>
17	22
18	23	#include "../workqueue_internal.h"
	24	+#include "../../io_uring/io-wq.h"
19	25	#include "../smpboot.h"
20	26
21	27	#include "pelt.h"
	28	+#include "smp.h"
22	29
23		-#define CREATE_TRACE_POINTS
24		-#include <trace/events/sched.h>
	30	+#include <trace/hooks/sched.h>
	31	+#include <trace/hooks/dtask.h>
	32	+
	33	+/*
	34	+ * Export tracepoints that act as a bare tracehook (ie: have no trace event
	35	+ * associated with them) to allow external modules to probe them.
	36	+ */
	37	+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_cfs_tp);
	38	+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp);
	39	+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp);
	40	+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp);
	41	+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp);
	42	+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_thermal_tp);
	43	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_cpu_capacity_tp);
	44	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp);
	45	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_cfs_tp);
	46	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_se_tp);
	47	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp);
	48	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_switch);
	49	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_waking);
	50	+#ifdef CONFIG_SCHEDSTATS
	51	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_sleep);
	52	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_wait);
	53	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_iowait);
	54	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_blocked);
	55	+#endif
25	56
26	57	DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
	58	+EXPORT_SYMBOL_GPL(runqueues);
27	59
28	60	#ifdef CONFIG_SCHED_DEBUG
29	61	/*
..	..	@@ -38,6 +70,7 @@
38	70	const_debug unsigned int sysctl_sched_features =
39	71	#include "features.h"
40	72	0;
	73	+EXPORT_SYMBOL_GPL(sysctl_sched_features);
41	74	#undef SCHED_FEAT
42	75	#endif
43	76
..	..	@@ -45,7 +78,7 @@
45	78	* Number of tasks to iterate in a single balance run.
46	79	* Limited because this is done with IRQs disabled.
47	80	*/
48		-#ifdef CONFIG_PREEMPT_RT_FULL
	81	+#ifdef CONFIG_PREEMPT_RT
49	82	const_debug unsigned int sysctl_sched_nr_migrate = 8;
50	83	#else
51	84	const_debug unsigned int sysctl_sched_nr_migrate = 32;
..	..	@@ -64,6 +97,100 @@
64	97	* default: 0.95s
65	98	*/
66	99	int sysctl_sched_rt_runtime = 950000;
	100	+
	101	+
	102	+/*
	103	+ * Serialization rules:
	104	+ *
	105	+ * Lock order:
	106	+ *
	107	+ * p->pi_lock
	108	+ * rq->lock
	109	+ * hrtimer_cpu_base->lock (hrtimer_start() for bandwidth controls)
	110	+ *
	111	+ * rq1->lock
	112	+ * rq2->lock where: rq1 < rq2
	113	+ *
	114	+ * Regular state:
	115	+ *
	116	+ * Normal scheduling state is serialized by rq->lock. __schedule() takes the
	117	+ * local CPU's rq->lock, it optionally removes the task from the runqueue and
	118	+ * always looks at the local rq data structures to find the most elegible task
	119	+ * to run next.
	120	+ *
	121	+ * Task enqueue is also under rq->lock, possibly taken from another CPU.
	122	+ * Wakeups from another LLC domain might use an IPI to transfer the enqueue to
	123	+ * the local CPU to avoid bouncing the runqueue state around [ see
	124	+ * ttwu_queue_wakelist() ]
	125	+ *
	126	+ * Task wakeup, specifically wakeups that involve migration, are horribly
	127	+ * complicated to avoid having to take two rq->locks.
	128	+ *
	129	+ * Special state:
	130	+ *
	131	+ * System-calls and anything external will use task_rq_lock() which acquires
	132	+ * both p->pi_lock and rq->lock. As a consequence the state they change is
	133	+ * stable while holding either lock:
	134	+ *
	135	+ * - sched_setaffinity()/
	136	+ * set_cpus_allowed_ptr(): p->cpus_ptr, p->nr_cpus_allowed
	137	+ * - set_user_nice(): p->se.load, p->*prio
	138	+ * - __sched_setscheduler(): p->sched_class, p->policy, p->*prio,
	139	+ * p->se.load, p->rt_priority,
	140	+ * p->dl.dl_{runtime, deadline, period, flags, bw, density}
	141	+ * - sched_setnuma(): p->numa_preferred_nid
	142	+ * - sched_move_task()/
	143	+ * cpu_cgroup_fork(): p->sched_task_group
	144	+ * - uclamp_update_active() p->uclamp*
	145	+ *
	146	+ * p->state <- TASK_*:
	147	+ *
	148	+ * is changed locklessly using set_current_state(), __set_current_state() or
	149	+ * set_special_state(), see their respective comments, or by
	150	+ * try_to_wake_up(). This latter uses p->pi_lock to serialize against
	151	+ * concurrent self.
	152	+ *
	153	+ * p->on_rq <- { 0, 1 = TASK_ON_RQ_QUEUED, 2 = TASK_ON_RQ_MIGRATING }:
	154	+ *
	155	+ * is set by activate_task() and cleared by deactivate_task(), under
	156	+ * rq->lock. Non-zero indicates the task is runnable, the special
	157	+ * ON_RQ_MIGRATING state is used for migration without holding both
	158	+ * rq->locks. It indicates task_cpu() is not stable, see task_rq_lock().
	159	+ *
	160	+ * p->on_cpu <- { 0, 1 }:
	161	+ *
	162	+ * is set by prepare_task() and cleared by finish_task() such that it will be
	163	+ * set before p is scheduled-in and cleared after p is scheduled-out, both
	164	+ * under rq->lock. Non-zero indicates the task is running on its CPU.
	165	+ *
	166	+ * [ The astute reader will observe that it is possible for two tasks on one
	167	+ * CPU to have ->on_cpu = 1 at the same time. ]
	168	+ *
	169	+ * task_cpu(p): is changed by set_task_cpu(), the rules are:
	170	+ *
	171	+ * - Don't call set_task_cpu() on a blocked task:
	172	+ *
	173	+ * We don't care what CPU we're not running on, this simplifies hotplug,
	174	+ * the CPU assignment of blocked tasks isn't required to be valid.
	175	+ *
	176	+ * - for try_to_wake_up(), called under p->pi_lock:
	177	+ *
	178	+ * This allows try_to_wake_up() to only take one rq->lock, see its comment.
	179	+ *
	180	+ * - for migration called under rq->lock:
	181	+ * [ see task_on_rq_migrating() in task_rq_lock() ]
	182	+ *
	183	+ * o move_queued_task()
	184	+ * o detach_task()
	185	+ *
	186	+ * - for migration called under double_rq_lock():
	187	+ *
	188	+ * o __migrate_swap_task()
	189	+ * o push_rt_task() / pull_rt_task()
	190	+ * o push_dl_task() / pull_dl_task()
	191	+ * o dl_task_offline_migration()
	192	+ *
	193	+ */
67	194
68	195	/*
69	196	* __task_rq_lock - lock the rq @p resides on.
..	..	@@ -88,6 +215,7 @@
88	215	cpu_relax();
89	216	}
90	217	}
	218	+EXPORT_SYMBOL_GPL(__task_rq_lock);
91	219
92	220	/*
93	221	* task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
..	..	@@ -130,6 +258,7 @@
130	258	cpu_relax();
131	259	}
132	260	}
	261	+EXPORT_SYMBOL_GPL(task_rq_lock);
133	262
134	263	/*
135	264	* RQ-clock updating methods:
..	..	@@ -210,7 +339,15 @@
210	339	rq->clock += delta;
211	340	update_rq_clock_task(rq, delta);
212	341	}
	342	+EXPORT_SYMBOL_GPL(update_rq_clock);
213	343
	344	+static inline void
	345	+rq_csd_init(struct rq rq, struct __call_single_data csd, smp_call_func_t func)
	346	+{
	347	+ csd->flags = 0;
	348	+ csd->func = func;
	349	+ csd->info = rq;
	350	+}
214	351
215	352	#ifdef CONFIG_SCHED_HRTICK
216	353	/*
..	..	@@ -247,8 +384,9 @@
247	384	static void __hrtick_restart(struct rq *rq)
248	385	{
249	386	struct hrtimer *timer = &rq->hrtick_timer;
	387	+ ktime_t time = rq->hrtick_time;
250	388
251		- hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
	389	+ hrtimer_start(timer, time, HRTIMER_MODE_ABS_PINNED_HARD);
252	390	}
253	391
254	392	/*
..	..	@@ -261,7 +399,6 @@
261	399
262	400	rq_lock(rq, &rf);
263	401	__hrtick_restart(rq);
264		- rq->hrtick_csd_pending = 0;
265	402	rq_unlock(rq, &rf);
266	403	}
267	404
..	..	@@ -273,7 +410,6 @@
273	410	void hrtick_start(struct rq *rq, u64 delay)
274	411	{
275	412	struct hrtimer *timer = &rq->hrtick_timer;
276		- ktime_t time;
277	413	s64 delta;
278	414
279	415	/*
..	..	@@ -281,16 +417,12 @@
281	417	* doesn't make sense and can cause timer DoS.
282	418	*/
283	419	delta = max_t(s64, delay, 10000LL);
284		- time = ktime_add_ns(timer->base->get_time(), delta);
	420	+ rq->hrtick_time = ktime_add_ns(timer->base->get_time(), delta);
285	421
286		- hrtimer_set_expires(timer, time);
287		-
288		- if (rq == this_rq()) {
	422	+ if (rq == this_rq())
289	423	__hrtick_restart(rq);
290		- } else if (!rq->hrtick_csd_pending) {
	424	+ else
291	425	smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd);
292		- rq->hrtick_csd_pending = 1;
293		- }
294	426	}
295	427
296	428	#else
..	..	@@ -307,20 +439,16 @@
307	439	*/
308	440	delay = max_t(u64, delay, 10000LL);
309	441	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
310		- HRTIMER_MODE_REL_PINNED);
	442	+ HRTIMER_MODE_REL_PINNED_HARD);
311	443	}
	444	+
312	445	#endif /* CONFIG_SMP */
313	446
314	447	static void hrtick_rq_init(struct rq *rq)
315	448	{
316	449	#ifdef CONFIG_SMP
317		- rq->hrtick_csd_pending = 0;
318		-
319		- rq->hrtick_csd.flags = 0;
320		- rq->hrtick_csd.func = __hrtick_start;
321		- rq->hrtick_csd.info = rq;
	450	+ rq_csd_init(rq, &rq->hrtick_csd, __hrtick_start);
322	451	#endif
323		-
324	452	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
325	453	rq->hrtick_timer.function = hrtick;
326	454	}
..	..	@@ -403,8 +531,8 @@
403	531	#endif
404	532	#endif
405	533
406		-void __wake_q_add(struct wake_q_head head, struct task_struct task,
407		- bool sleeper)
	534	+static bool __wake_q_add(struct wake_q_head head, struct task_struct task,
	535	+ bool sleeper)
408	536	{
409	537	struct wake_q_node *node;
410	538
..	..	@@ -422,23 +550,65 @@
422	550	* state, even in the failed case, an explicit smp_mb() must be used.
423	551	*/
424	552	smp_mb__before_atomic();
425		- if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))
426		- return;
427		-
428		- head->count++;
429		-
430		- get_task_struct(task);
	553	+ if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
	554	+ return false;
431	555
432	556	/*
433	557	* The head is context local, there can be no concurrency.
434	558	*/
435	559	*head->lastp = node;
436	560	head->lastp = &node->next;
	561	+ head->count++;
	562	+ return true;
437	563	}
438	564
439		-static int
440		-try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags,
441		- int sibling_count_hint);
	565	+/**
	566	+ * wake_q_add() - queue a wakeup for 'later' waking.
	567	+ * @head: the wake_q_head to add @task to
	568	+ * @task: the task to queue for 'later' wakeup
	569	+ *
	570	+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
	571	+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
	572	+ * instantly.
	573	+ *
	574	+ * This function must be used as-if it were wake_up_process(); IOW the task
	575	+ * must be ready to be woken at this location.
	576	+ */
	577	+void wake_q_add(struct wake_q_head head, struct task_struct task)
	578	+{
	579	+ if (__wake_q_add(head, task, false))
	580	+ get_task_struct(task);
	581	+}
	582	+
	583	+void wake_q_add_sleeper(struct wake_q_head head, struct task_struct task)
	584	+{
	585	+ if (__wake_q_add(head, task, true))
	586	+ get_task_struct(task);
	587	+}
	588	+
	589	+/**
	590	+ * wake_q_add_safe() - safely queue a wakeup for 'later' waking.
	591	+ * @head: the wake_q_head to add @task to
	592	+ * @task: the task to queue for 'later' wakeup
	593	+ *
	594	+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
	595	+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
	596	+ * instantly.
	597	+ *
	598	+ * This function must be used as-if it were wake_up_process(); IOW the task
	599	+ * must be ready to be woken at this location.
	600	+ *
	601	+ * This function is essentially a task-safe equivalent to wake_q_add(). Callers
	602	+ * that already hold reference to @task can call the 'safe' version and trust
	603	+ * wake_q to do the right thing depending whether or not the @task is already
	604	+ * queued for wakeup.
	605	+ */
	606	+void wake_q_add_safe(struct wake_q_head head, struct task_struct task)
	607	+{
	608	+ if (!__wake_q_add(head, task, false))
	609	+ put_task_struct(task);
	610	+}
	611	+
442	612	void __wake_up_q(struct wake_q_head *head, bool sleeper)
443	613	{
444	614	struct wake_q_node *node = head->first;
..	..	@@ -450,13 +620,16 @@
450	620	task = container_of(node, struct task_struct, wake_q_sleeper);
451	621	else
452	622	task = container_of(node, struct task_struct, wake_q);
	623	+
453	624	BUG_ON(!task);
454	625	/* Task can safely be re-inserted now: */
455	626	node = node->next;
	627	+ task->wake_q_count = head->count;
456	628	if (sleeper)
457	629	task->wake_q_sleeper.next = NULL;
458	630	else
459	631	task->wake_q.next = NULL;
	632	+
460	633	/*
461	634	* wake_up_process() executes a full barrier, which pairs with
462	635	* the queueing in wake_q_add() so as not to miss wakeups.
..	..	@@ -466,6 +639,7 @@
466	639	else
467	640	wake_up_process(task);
468	641
	642	+ task->wake_q_count = 0;
469	643	put_task_struct(task);
470	644	}
471	645	}
..	..	@@ -495,15 +669,12 @@
495	669	return;
496	670	}
497	671
498		-#ifdef CONFIG_PREEMPT
499	672	if (set_nr_and_not_polling(curr))
500		-#else
501		- if (set_nr_and_not_polling(curr) && (rq->curr == rq->idle))
502		-#endif
503	673	smp_send_reschedule(cpu);
504	674	else
505	675	trace_sched_wake_idle_without_ipi(cpu);
506	676	}
	677	+EXPORT_SYMBOL_GPL(resched_curr);
507	678
508	679	#ifdef CONFIG_PREEMPT_LAZY
509	680
..	..	@@ -570,27 +741,49 @@
570	741	*/
571	742	int get_nohz_timer_target(void)
572	743	{
573		- int i, cpu = smp_processor_id();
	744	+ int i, cpu = smp_processor_id(), default_cpu = -1;
574	745	struct sched_domain *sd;
575	746
576		- if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER))
577		- return cpu;
	747	+ if (housekeeping_cpu(cpu, HK_FLAG_TIMER) && cpu_active(cpu)) {
	748	+ if (!idle_cpu(cpu))
	749	+ return cpu;
	750	+ default_cpu = cpu;
	751	+ }
578	752
579	753	rcu_read_lock();
580	754	for_each_domain(cpu, sd) {
581		- for_each_cpu(i, sched_domain_span(sd)) {
	755	+ for_each_cpu_and(i, sched_domain_span(sd),
	756	+ housekeeping_cpumask(HK_FLAG_TIMER)) {
582	757	if (cpu == i)
583	758	continue;
584	759
585		- if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) {
	760	+ if (!idle_cpu(i)) {
586	761	cpu = i;
587	762	goto unlock;
588	763	}
589	764	}
590	765	}
591	766
592		- if (!housekeeping_cpu(cpu, HK_FLAG_TIMER))
593		- cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
	767	+ if (default_cpu == -1) {
	768	+ for_each_cpu_and(i, cpu_active_mask,
	769	+ housekeeping_cpumask(HK_FLAG_TIMER)) {
	770	+ if (cpu == i)
	771	+ continue;
	772	+
	773	+ if (!idle_cpu(i)) {
	774	+ cpu = i;
	775	+ goto unlock;
	776	+ }
	777	+ }
	778	+
	779	+ /* no active, not-idle, housekpeeing CPU found. */
	780	+ default_cpu = cpumask_any(cpu_active_mask);
	781	+
	782	+ if (unlikely(default_cpu >= nr_cpu_ids))
	783	+ goto unlock;
	784	+ }
	785	+
	786	+ cpu = default_cpu;
594	787	unlock:
595	788	rcu_read_unlock();
596	789	return cpu;
..	..	@@ -650,29 +843,23 @@
650	843	wake_up_idle_cpu(cpu);
651	844	}
652	845
653		-static inline bool got_nohz_idle_kick(void)
	846	+static void nohz_csd_func(void *info)
654	847	{
655		- int cpu = smp_processor_id();
656		-
657		- if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK))
658		- return false;
659		-
660		- if (idle_cpu(cpu) && !need_resched())
661		- return true;
	848	+ struct rq *rq = info;
	849	+ int cpu = cpu_of(rq);
	850	+ unsigned int flags;
662	851
663	852	/*
664		- * We can't run Idle Load Balance on this CPU for this time so we
665		- * cancel it and clear NOHZ_BALANCE_KICK
	853	+ * Release the rq::nohz_csd.
666	854	*/
667		- atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
668		- return false;
669		-}
	855	+ flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
	856	+ WARN_ON(!(flags & NOHZ_KICK_MASK));
670	857
671		-#else /* CONFIG_NO_HZ_COMMON */
672		-
673		-static inline bool got_nohz_idle_kick(void)
674		-{
675		- return false;
	858	+ rq->idle_balance = idle_cpu(cpu);
	859	+ if (rq->idle_balance && !need_resched()) {
	860	+ rq->nohz_idle_balance = flags;
	861	+ raise_softirq_irqoff(SCHED_SOFTIRQ);
	862	+ }
676	863	}
677	864
678	865	#endif /* CONFIG_NO_HZ_COMMON */
..	..	@@ -763,18 +950,18 @@
763	950	}
764	951	#endif
765	952
766		-static void set_load_weight(struct task_struct *p, bool update_load)
	953	+static void set_load_weight(struct task_struct *p)
767	954	{
	955	+ bool update_load = !(READ_ONCE(p->state) & TASK_NEW);
768	956	int prio = p->static_prio - MAX_RT_PRIO;
769	957	struct load_weight *load = &p->se.load;
770	958
771	959	/*
772	960	* SCHED_IDLE tasks get minimal weight:
773	961	*/
774		- if (idle_policy(p->policy)) {
	962	+ if (task_has_idle_policy(p)) {
775	963	load->weight = scale_load(WEIGHT_IDLEPRIO);
776	964	load->inv_weight = WMULT_IDLEPRIO;
777		- p->se.runnable_weight = load->weight;
778	965	return;
779	966	}
780	967
..	..	@@ -787,7 +974,6 @@
787	974	} else {
788	975	load->weight = scale_load(sched_prio_to_weight[prio]);
789	976	load->inv_weight = sched_prio_to_wmult[prio];
790		- p->se.runnable_weight = load->weight;
791	977	}
792	978	}
793	979
..	..	@@ -810,8 +996,46 @@
810	996	/* Max allowed maximum utilization */
811	997	unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE;
812	998
	999	+/*
	1000	+ * By default RT tasks run at the maximum performance point/capacity of the
	1001	+ * system. Uclamp enforces this by always setting UCLAMP_MIN of RT tasks to
	1002	+ * SCHED_CAPACITY_SCALE.
	1003	+ *
	1004	+ * This knob allows admins to change the default behavior when uclamp is being
	1005	+ * used. In battery powered devices, particularly, running at the maximum
	1006	+ * capacity and frequency will increase energy consumption and shorten the
	1007	+ * battery life.
	1008	+ *
	1009	+ * This knob only affects RT tasks that their uclamp_se->user_defined == false.
	1010	+ *
	1011	+ * This knob will not override the system default sched_util_clamp_min defined
	1012	+ * above.
	1013	+ */
	1014	+unsigned int sysctl_sched_uclamp_util_min_rt_default = SCHED_CAPACITY_SCALE;
	1015	+
813	1016	/* All clamps are required to be less or equal than these values */
814	1017	static struct uclamp_se uclamp_default[UCLAMP_CNT];
	1018	+
	1019	+/*
	1020	+ * This static key is used to reduce the uclamp overhead in the fast path. It
	1021	+ * primarily disables the call to uclamp_rq_{inc, dec}() in
	1022	+ * enqueue/dequeue_task().
	1023	+ *
	1024	+ * This allows users to continue to enable uclamp in their kernel config with
	1025	+ * minimum uclamp overhead in the fast path.
	1026	+ *
	1027	+ * As soon as userspace modifies any of the uclamp knobs, the static key is
	1028	+ * enabled, since we have an actual users that make use of uclamp
	1029	+ * functionality.
	1030	+ *
	1031	+ * The knobs that would enable this static key are:
	1032	+ *
	1033	+ * * A task modifying its uclamp value with sched_setattr().
	1034	+ * * An admin modifying the sysctl_sched_uclamp_{min, max} via procfs.
	1035	+ * * An admin modifying the cgroup cpu.uclamp.{min, max}
	1036	+ */
	1037	+DEFINE_STATIC_KEY_FALSE(sched_uclamp_used);
	1038	+EXPORT_SYMBOL_GPL(sched_uclamp_used);
815	1039
816	1040	/* Integer rounded range for each bucket */
817	1041	#define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS)
..	..	@@ -822,11 +1046,6 @@
822	1046	static inline unsigned int uclamp_bucket_id(unsigned int clamp_value)
823	1047	{
824	1048	return min_t(unsigned int, clamp_value / UCLAMP_BUCKET_DELTA, UCLAMP_BUCKETS - 1);
825		-}
826		-
827		-static inline unsigned int uclamp_bucket_base_value(unsigned int clamp_value)
828		-{
829		- return UCLAMP_BUCKET_DELTA * uclamp_bucket_id(clamp_value);
830	1049	}
831	1050
832	1051	static inline unsigned int uclamp_none(enum uclamp_id clamp_id)
..	..	@@ -892,12 +1111,79 @@
892	1111	return uclamp_idle_value(rq, clamp_id, clamp_value);
893	1112	}
894	1113
	1114	+static void __uclamp_update_util_min_rt_default(struct task_struct *p)
	1115	+{
	1116	+ unsigned int default_util_min;
	1117	+ struct uclamp_se *uc_se;
	1118	+
	1119	+ lockdep_assert_held(&p->pi_lock);
	1120	+
	1121	+ uc_se = &p->uclamp_req[UCLAMP_MIN];
	1122	+
	1123	+ /* Only sync if user didn't override the default */
	1124	+ if (uc_se->user_defined)
	1125	+ return;
	1126	+
	1127	+ default_util_min = sysctl_sched_uclamp_util_min_rt_default;
	1128	+ uclamp_se_set(uc_se, default_util_min, false);
	1129	+}
	1130	+
	1131	+static void uclamp_update_util_min_rt_default(struct task_struct *p)
	1132	+{
	1133	+ struct rq_flags rf;
	1134	+ struct rq *rq;
	1135	+
	1136	+ if (!rt_task(p))
	1137	+ return;
	1138	+
	1139	+ /* Protect updates to p->uclamp_* */
	1140	+ rq = task_rq_lock(p, &rf);
	1141	+ __uclamp_update_util_min_rt_default(p);
	1142	+ task_rq_unlock(rq, p, &rf);
	1143	+}
	1144	+
	1145	+static void uclamp_sync_util_min_rt_default(void)
	1146	+{
	1147	+ struct task_struct g, p;
	1148	+
	1149	+ /*
	1150	+ * copy_process() sysctl_uclamp
	1151	+ * uclamp_min_rt = X;
	1152	+ * write_lock(&tasklist_lock) read_lock(&tasklist_lock)
	1153	+ * // link thread smp_mb__after_spinlock()
	1154	+ * write_unlock(&tasklist_lock) read_unlock(&tasklist_lock);
	1155	+ * sched_post_fork() for_each_process_thread()
	1156	+ * __uclamp_sync_rt() __uclamp_sync_rt()
	1157	+ *
	1158	+ * Ensures that either sched_post_fork() will observe the new
	1159	+ * uclamp_min_rt or for_each_process_thread() will observe the new
	1160	+ * task.
	1161	+ */
	1162	+ read_lock(&tasklist_lock);
	1163	+ smp_mb__after_spinlock();
	1164	+ read_unlock(&tasklist_lock);
	1165	+
	1166	+ rcu_read_lock();
	1167	+ for_each_process_thread(g, p)
	1168	+ uclamp_update_util_min_rt_default(p);
	1169	+ rcu_read_unlock();
	1170	+}
	1171	+
	1172	+#if IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE)
	1173	+void rockchip_perf_uclamp_sync_util_min_rt_default(void)
	1174	+{
	1175	+ uclamp_sync_util_min_rt_default();
	1176	+}
	1177	+EXPORT_SYMBOL(rockchip_perf_uclamp_sync_util_min_rt_default);
	1178	+#endif
	1179	+
895	1180	static inline struct uclamp_se
896	1181	uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id)
897	1182	{
	1183	+ /* Copy by value as we could modify it */
898	1184	struct uclamp_se uc_req = p->uclamp_req[clamp_id];
899	1185	#ifdef CONFIG_UCLAMP_TASK_GROUP
900		- struct uclamp_se uc_max;
	1186	+ unsigned int tg_min, tg_max, value;
901	1187
902	1188	/*
903	1189	* Tasks in autogroups or root task group will be
..	..	@@ -908,9 +1194,11 @@
908	1194	if (task_group(p) == &root_task_group)
909	1195	return uc_req;
910	1196
911		- uc_max = task_group(p)->uclamp[clamp_id];
912		- if (uc_req.value > uc_max.value \|\| !uc_req.user_defined)
913		- return uc_max;
	1197	+ tg_min = task_group(p)->uclamp[UCLAMP_MIN].value;
	1198	+ tg_max = task_group(p)->uclamp[UCLAMP_MAX].value;
	1199	+ value = uc_req.value;
	1200	+ value = clamp(value, tg_min, tg_max);
	1201	+ uclamp_se_set(&uc_req, value, false);
914	1202	#endif
915	1203
916	1204	return uc_req;
..	..	@@ -929,6 +1217,12 @@
929	1217	{
930	1218	struct uclamp_se uc_req = uclamp_tg_restrict(p, clamp_id);
931	1219	struct uclamp_se uc_max = uclamp_default[clamp_id];
	1220	+ struct uclamp_se uc_eff;
	1221	+ int ret = 0;
	1222	+
	1223	+ trace_android_rvh_uclamp_eff_get(p, clamp_id, &uc_max, &uc_eff, &ret);
	1224	+ if (ret)
	1225	+ return uc_eff;
932	1226
933	1227	/* System default restrictions always apply */
934	1228	if (unlikely(uc_req.value > uc_max.value))
..	..	@@ -949,6 +1243,7 @@
949	1243
950	1244	return (unsigned long)uc_eff.value;
951	1245	}
	1246	+EXPORT_SYMBOL_GPL(uclamp_eff_value);
952	1247
953	1248	/*
954	1249	* When a task is enqueued on a rq, the clamp bucket currently defined by the
..	..	@@ -1009,10 +1304,38 @@
1009	1304
1010	1305	lockdep_assert_held(&rq->lock);
1011	1306
	1307	+ /*
	1308	+ * If sched_uclamp_used was enabled after task @p was enqueued,
	1309	+ * we could end up with unbalanced call to uclamp_rq_dec_id().
	1310	+ *
	1311	+ * In this case the uc_se->active flag should be false since no uclamp
	1312	+ * accounting was performed at enqueue time and we can just return
	1313	+ * here.
	1314	+ *
	1315	+ * Need to be careful of the following enqeueue/dequeue ordering
	1316	+ * problem too
	1317	+ *
	1318	+ * enqueue(taskA)
	1319	+ * // sched_uclamp_used gets enabled
	1320	+ * enqueue(taskB)
	1321	+ * dequeue(taskA)
	1322	+ * // Must not decrement bukcet->tasks here
	1323	+ * dequeue(taskB)
	1324	+ *
	1325	+ * where we could end up with stale data in uc_se and
	1326	+ * bucket[uc_se->bucket_id].
	1327	+ *
	1328	+ * The following check here eliminates the possibility of such race.
	1329	+ */
	1330	+ if (unlikely(!uc_se->active))
	1331	+ return;
	1332	+
1012	1333	bucket = &uc_rq->bucket[uc_se->bucket_id];
	1334	+
1013	1335	SCHED_WARN_ON(!bucket->tasks);
1014	1336	if (likely(bucket->tasks))
1015	1337	bucket->tasks--;
	1338	+
1016	1339	uc_se->active = false;
1017	1340
1018	1341	/*
..	..	@@ -1040,6 +1363,15 @@
1040	1363	{
1041	1364	enum uclamp_id clamp_id;
1042	1365
	1366	+ /*
	1367	+ * Avoid any overhead until uclamp is actually used by the userspace.
	1368	+ *
	1369	+ * The condition is constructed such that a NOP is generated when
	1370	+ * sched_uclamp_used is disabled.
	1371	+ */
	1372	+ if (!static_branch_unlikely(&sched_uclamp_used))
	1373	+ return;
	1374	+
1043	1375	if (unlikely(!p->sched_class->uclamp_enabled))
1044	1376	return;
1045	1377
..	..	@@ -1055,6 +1387,15 @@
1055	1387	{
1056	1388	enum uclamp_id clamp_id;
1057	1389
	1390	+ /*
	1391	+ * Avoid any overhead until uclamp is actually used by the userspace.
	1392	+ *
	1393	+ * The condition is constructed such that a NOP is generated when
	1394	+ * sched_uclamp_used is disabled.
	1395	+ */
	1396	+ if (!static_branch_unlikely(&sched_uclamp_used))
	1397	+ return;
	1398	+
1058	1399	if (unlikely(!p->sched_class->uclamp_enabled))
1059	1400	return;
1060	1401
..	..	@@ -1062,9 +1403,27 @@
1062	1403	uclamp_rq_dec_id(rq, p, clamp_id);
1063	1404	}
1064	1405
1065		-static inline void
1066		-uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
	1406	+static inline void uclamp_rq_reinc_id(struct rq rq, struct task_struct p,
	1407	+ enum uclamp_id clamp_id)
1067	1408	{
	1409	+ if (!p->uclamp[clamp_id].active)
	1410	+ return;
	1411	+
	1412	+ uclamp_rq_dec_id(rq, p, clamp_id);
	1413	+ uclamp_rq_inc_id(rq, p, clamp_id);
	1414	+
	1415	+ /*
	1416	+ * Make sure to clear the idle flag if we've transiently reached 0
	1417	+ * active tasks on rq.
	1418	+ */
	1419	+ if (clamp_id == UCLAMP_MAX && (rq->uclamp_flags & UCLAMP_FLAG_IDLE))
	1420	+ rq->uclamp_flags &= ~UCLAMP_FLAG_IDLE;
	1421	+}
	1422	+
	1423	+static inline void
	1424	+uclamp_update_active(struct task_struct *p)
	1425	+{
	1426	+ enum uclamp_id clamp_id;
1068	1427	struct rq_flags rf;
1069	1428	struct rq *rq;
1070	1429
..	..	@@ -1084,30 +1443,22 @@
1084	1443	* affecting a valid clamp bucket, the next time it's enqueued,
1085	1444	* it will already see the updated clamp bucket value.
1086	1445	*/
1087		- if (p->uclamp[clamp_id].active) {
1088		- uclamp_rq_dec_id(rq, p, clamp_id);
1089		- uclamp_rq_inc_id(rq, p, clamp_id);
1090		- }
	1446	+ for_each_clamp_id(clamp_id)
	1447	+ uclamp_rq_reinc_id(rq, p, clamp_id);
1091	1448
1092	1449	task_rq_unlock(rq, p, &rf);
1093	1450	}
1094	1451
1095	1452	#ifdef CONFIG_UCLAMP_TASK_GROUP
1096	1453	static inline void
1097		-uclamp_update_active_tasks(struct cgroup_subsys_state *css,
1098		- unsigned int clamps)
	1454	+uclamp_update_active_tasks(struct cgroup_subsys_state *css)
1099	1455	{
1100		- enum uclamp_id clamp_id;
1101	1456	struct css_task_iter it;
1102	1457	struct task_struct *p;
1103	1458
1104	1459	css_task_iter_start(css, 0, &it);
1105		- while ((p = css_task_iter_next(&it))) {
1106		- for_each_clamp_id(clamp_id) {
1107		- if ((0x1 << clamp_id) & clamps)
1108		- uclamp_update_active(p, clamp_id);
1109		- }
1110		- }
	1460	+ while ((p = css_task_iter_next(&it)))
	1461	+ uclamp_update_active(p);
1111	1462	css_task_iter_end(&it);
1112	1463	}
1113	1464
..	..	@@ -1130,16 +1481,16 @@
1130	1481	#endif
1131	1482
1132	1483	int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
1133		- void __user buffer, size_t lenp,
1134		- loff_t *ppos)
	1484	+ void buffer, size_t lenp, loff_t *ppos)
1135	1485	{
1136	1486	bool update_root_tg = false;
1137		- int old_min, old_max;
	1487	+ int old_min, old_max, old_min_rt;
1138	1488	int result;
1139	1489
1140	1490	mutex_lock(&uclamp_mutex);
1141	1491	old_min = sysctl_sched_uclamp_util_min;
1142	1492	old_max = sysctl_sched_uclamp_util_max;
	1493	+ old_min_rt = sysctl_sched_uclamp_util_min_rt_default;
1143	1494
1144	1495	result = proc_dointvec(table, write, buffer, lenp, ppos);
1145	1496	if (result)
..	..	@@ -1148,7 +1499,9 @@
1148	1499	goto done;
1149	1500
1150	1501	if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max \|\|
1151		- sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE) {
	1502	+ sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE \|\|
	1503	+ sysctl_sched_uclamp_util_min_rt_default > SCHED_CAPACITY_SCALE) {
	1504	+
1152	1505	result = -EINVAL;
1153	1506	goto undo;
1154	1507	}
..	..	@@ -1164,8 +1517,15 @@
1164	1517	update_root_tg = true;
1165	1518	}
1166	1519
1167		- if (update_root_tg)
	1520	+ if (update_root_tg) {
	1521	+ static_branch_enable(&sched_uclamp_used);
1168	1522	uclamp_update_root_tg();
	1523	+ }
	1524	+
	1525	+ if (old_min_rt != sysctl_sched_uclamp_util_min_rt_default) {
	1526	+ static_branch_enable(&sched_uclamp_used);
	1527	+ uclamp_sync_util_min_rt_default();
	1528	+ }
1169	1529
1170	1530	/*
1171	1531	* We update all RUNNABLE tasks only when task groups are in use.
..	..	@@ -1178,6 +1538,7 @@
1178	1538	undo:
1179	1539	sysctl_sched_uclamp_util_min = old_min;
1180	1540	sysctl_sched_uclamp_util_max = old_max;
	1541	+ sysctl_sched_uclamp_util_min_rt_default = old_min_rt;
1181	1542	done:
1182	1543	mutex_unlock(&uclamp_mutex);
1183	1544
..	..	@@ -1187,20 +1548,61 @@
1187	1548	static int uclamp_validate(struct task_struct *p,
1188	1549	const struct sched_attr *attr)
1189	1550	{
1190		- unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value;
1191		- unsigned int upper_bound = p->uclamp_req[UCLAMP_MAX].value;
	1551	+ int util_min = p->uclamp_req[UCLAMP_MIN].value;
	1552	+ int util_max = p->uclamp_req[UCLAMP_MAX].value;
1192	1553
1193		- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN)
1194		- lower_bound = attr->sched_util_min;
1195		- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX)
1196		- upper_bound = attr->sched_util_max;
	1554	+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
	1555	+ util_min = attr->sched_util_min;
1197	1556
1198		- if (lower_bound > upper_bound)
	1557	+ if (util_min + 1 > SCHED_CAPACITY_SCALE + 1)
	1558	+ return -EINVAL;
	1559	+ }
	1560	+
	1561	+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
	1562	+ util_max = attr->sched_util_max;
	1563	+
	1564	+ if (util_max + 1 > SCHED_CAPACITY_SCALE + 1)
	1565	+ return -EINVAL;
	1566	+ }
	1567	+
	1568	+ if (util_min != -1 && util_max != -1 && util_min > util_max)
1199	1569	return -EINVAL;
1200		- if (upper_bound > SCHED_CAPACITY_SCALE)
1201		- return -EINVAL;
	1570	+
	1571	+ /*
	1572	+ * We have valid uclamp attributes; make sure uclamp is enabled.
	1573	+ *
	1574	+ * We need to do that here, because enabling static branches is a
	1575	+ * blocking operation which obviously cannot be done while holding
	1576	+ * scheduler locks.
	1577	+ */
	1578	+ static_branch_enable(&sched_uclamp_used);
1202	1579
1203	1580	return 0;
	1581	+}
	1582	+
	1583	+static bool uclamp_reset(const struct sched_attr *attr,
	1584	+ enum uclamp_id clamp_id,
	1585	+ struct uclamp_se *uc_se)
	1586	+{
	1587	+ /* Reset on sched class change for a non user-defined clamp value. */
	1588	+ if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)) &&
	1589	+ !uc_se->user_defined)
	1590	+ return true;
	1591	+
	1592	+ /* Reset on sched_util_{min,max} == -1. */
	1593	+ if (clamp_id == UCLAMP_MIN &&
	1594	+ attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN &&
	1595	+ attr->sched_util_min == -1) {
	1596	+ return true;
	1597	+ }
	1598	+
	1599	+ if (clamp_id == UCLAMP_MAX &&
	1600	+ attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX &&
	1601	+ attr->sched_util_max == -1) {
	1602	+ return true;
	1603	+ }
	1604	+
	1605	+ return false;
1204	1606	}
1205	1607
1206	1608	static void __setscheduler_uclamp(struct task_struct *p,
..	..	@@ -1208,40 +1610,41 @@
1208	1610	{
1209	1611	enum uclamp_id clamp_id;
1210	1612
1211		- /*
1212		- * On scheduling class change, reset to default clamps for tasks
1213		- * without a task-specific value.
1214		- */
1215	1613	for_each_clamp_id(clamp_id) {
1216	1614	struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
1217		- unsigned int clamp_value = uclamp_none(clamp_id);
	1615	+ unsigned int value;
1218	1616
1219		- /* Keep using defined clamps across class changes */
1220		- if (uc_se->user_defined)
	1617	+ if (!uclamp_reset(attr, clamp_id, uc_se))
1221	1618	continue;
1222	1619
1223		- /* By default, RT tasks always get 100% boost */
1224		- if (sched_feat(SUGOV_RT_MAX_FREQ) &&
1225		- unlikely(rt_task(p) &&
1226		- clamp_id == UCLAMP_MIN)) {
	1620	+ /*
	1621	+ * RT by default have a 100% boost value that could be modified
	1622	+ * at runtime.
	1623	+ */
	1624	+ if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
	1625	+ value = sysctl_sched_uclamp_util_min_rt_default;
	1626	+ else
	1627	+ value = uclamp_none(clamp_id);
1227	1628
1228		- clamp_value = uclamp_none(UCLAMP_MAX);
1229		- }
	1629	+ uclamp_se_set(uc_se, value, false);
1230	1630
1231		- uclamp_se_set(uc_se, clamp_value, false);
1232	1631	}
1233	1632
1234	1633	if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
1235	1634	return;
1236	1635
1237		- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
	1636	+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN &&
	1637	+ attr->sched_util_min != -1) {
1238	1638	uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
1239	1639	attr->sched_util_min, true);
	1640	+ trace_android_vh_setscheduler_uclamp(p, UCLAMP_MIN, attr->sched_util_min);
1240	1641	}
1241	1642
1242		- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
	1643	+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX &&
	1644	+ attr->sched_util_max != -1) {
1243	1645	uclamp_se_set(&p->uclamp_req[UCLAMP_MAX],
1244	1646	attr->sched_util_max, true);
	1647	+ trace_android_vh_setscheduler_uclamp(p, UCLAMP_MAX, attr->sched_util_max);
1245	1648	}
1246	1649	}
1247	1650
..	..	@@ -1249,6 +1652,10 @@
1249	1652	{
1250	1653	enum uclamp_id clamp_id;
1251	1654
	1655	+ /*
	1656	+ * We don't need to hold task_rq_lock() when updating p->uclamp_* here
	1657	+ * as the task is still at its early fork stages.
	1658	+ */
1252	1659	for_each_clamp_id(clamp_id)
1253	1660	p->uclamp[clamp_id].active = false;
1254	1661
..	..	@@ -1261,39 +1668,24 @@
1261	1668	}
1262	1669	}
1263	1670
1264		-#ifdef CONFIG_SMP
1265		-unsigned int uclamp_task(struct task_struct *p)
	1671	+static void uclamp_post_fork(struct task_struct *p)
1266	1672	{
1267		- unsigned long util;
1268		-
1269		- util = task_util_est(p);
1270		- util = max(util, uclamp_eff_value(p, UCLAMP_MIN));
1271		- util = min(util, uclamp_eff_value(p, UCLAMP_MAX));
1272		-
1273		- return util;
	1673	+ uclamp_update_util_min_rt_default(p);
1274	1674	}
1275	1675
1276		-bool uclamp_boosted(struct task_struct *p)
	1676	+static void __init init_uclamp_rq(struct rq *rq)
1277	1677	{
1278		- return uclamp_eff_value(p, UCLAMP_MIN) > 0;
	1678	+ enum uclamp_id clamp_id;
	1679	+ struct uclamp_rq *uc_rq = rq->uclamp;
	1680	+
	1681	+ for_each_clamp_id(clamp_id) {
	1682	+ uc_rq[clamp_id] = (struct uclamp_rq) {
	1683	+ .value = uclamp_none(clamp_id)
	1684	+ };
	1685	+ }
	1686	+
	1687	+ rq->uclamp_flags = UCLAMP_FLAG_IDLE;
1279	1688	}
1280		-
1281		-bool uclamp_latency_sensitive(struct task_struct *p)
1282		-{
1283		-#ifdef CONFIG_UCLAMP_TASK_GROUP
1284		- struct cgroup_subsys_state *css = task_css(p, cpu_cgrp_id);
1285		- struct task_group *tg;
1286		-
1287		- if (!css)
1288		- return false;
1289		- tg = container_of(css, struct task_group, css);
1290		-
1291		- return tg->latency_sensitive;
1292		-#else
1293		- return false;
1294		-#endif
1295		-}
1296		-#endif /* CONFIG_SMP */
1297	1689
1298	1690	static void __init init_uclamp(void)
1299	1691	{
..	..	@@ -1301,13 +1693,8 @@
1301	1693	enum uclamp_id clamp_id;
1302	1694	int cpu;
1303	1695
1304		- mutex_init(&uclamp_mutex);
1305		-
1306		- for_each_possible_cpu(cpu) {
1307		- memset(&cpu_rq(cpu)->uclamp, 0,
1308		- sizeof(struct uclamp_rq)*UCLAMP_CNT);
1309		- cpu_rq(cpu)->uclamp_flags = 0;
1310		- }
	1696	+ for_each_possible_cpu(cpu)
	1697	+ init_uclamp_rq(cpu_rq(cpu));
1311	1698
1312	1699	for_each_clamp_id(clamp_id) {
1313	1700	uclamp_se_set(&init_task.uclamp_req[clamp_id],
..	..	@@ -1336,41 +1723,7 @@
1336	1723	static void __setscheduler_uclamp(struct task_struct *p,
1337	1724	const struct sched_attr *attr) { }
1338	1725	static inline void uclamp_fork(struct task_struct *p) { }
1339		-
1340		-long schedtune_task_margin(struct task_struct *task);
1341		-
1342		-#ifdef CONFIG_SMP
1343		-unsigned int uclamp_task(struct task_struct *p)
1344		-{
1345		- unsigned long util = task_util_est(p);
1346		-#ifdef CONFIG_SCHED_TUNE
1347		- long margin = schedtune_task_margin(p);
1348		-
1349		- trace_sched_boost_task(p, util, margin);
1350		-
1351		- util += margin;
1352		-#endif
1353		-
1354		- return util;
1355		-}
1356		-
1357		-bool uclamp_boosted(struct task_struct *p)
1358		-{
1359		-#ifdef CONFIG_SCHED_TUNE
1360		- return schedtune_task_boost(p) > 0;
1361		-#endif
1362		- return false;
1363		-}
1364		-
1365		-bool uclamp_latency_sensitive(struct task_struct *p)
1366		-{
1367		-#ifdef CONFIG_SCHED_TUNE
1368		- return schedtune_prefer_idle(p) != 0;
1369		-#endif
1370		- return false;
1371		-}
1372		-#endif /* CONFIG_SMP */
1373		-
	1726	+static inline void uclamp_post_fork(struct task_struct *p) { }
1374	1727	static inline void init_uclamp(void) { }
1375	1728	#endif /* CONFIG_UCLAMP_TASK */
1376	1729
..	..	@@ -1385,7 +1738,9 @@
1385	1738	}
1386	1739
1387	1740	uclamp_rq_inc(rq, p);
	1741	+ trace_android_rvh_enqueue_task(rq, p, flags);
1388	1742	p->sched_class->enqueue_task(rq, p, flags);
	1743	+ trace_android_rvh_after_enqueue_task(rq, p);
1389	1744	}
1390	1745
1391	1746	static inline void dequeue_task(struct rq rq, struct task_struct p, int flags)
..	..	@@ -1399,31 +1754,39 @@
1399	1754	}
1400	1755
1401	1756	uclamp_rq_dec(rq, p);
	1757	+ trace_android_rvh_dequeue_task(rq, p, flags);
1402	1758	p->sched_class->dequeue_task(rq, p, flags);
	1759	+ trace_android_rvh_after_dequeue_task(rq, p);
1403	1760	}
1404	1761
1405	1762	void activate_task(struct rq rq, struct task_struct p, int flags)
1406	1763	{
1407		- if (task_contributes_to_load(p))
1408		- rq->nr_uninterruptible--;
1409		-
1410	1764	enqueue_task(rq, p, flags);
	1765	+
	1766	+ p->on_rq = TASK_ON_RQ_QUEUED;
1411	1767	}
	1768	+EXPORT_SYMBOL_GPL(activate_task);
1412	1769
1413	1770	void deactivate_task(struct rq rq, struct task_struct p, int flags)
1414	1771	{
1415		- if (task_contributes_to_load(p))
1416		- rq->nr_uninterruptible++;
	1772	+ p->on_rq = (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING;
1417	1773
1418	1774	dequeue_task(rq, p, flags);
1419	1775	}
	1776	+EXPORT_SYMBOL_GPL(deactivate_task);
1420	1777
1421		-/*
1422		- * __normal_prio - return the priority that is based on the static prio
1423		- */
1424		-static inline int __normal_prio(struct task_struct *p)
	1778	+static inline int __normal_prio(int policy, int rt_prio, int nice)
1425	1779	{
1426		- return p->static_prio;
	1780	+ int prio;
	1781	+
	1782	+ if (dl_policy(policy))
	1783	+ prio = MAX_DL_PRIO - 1;
	1784	+ else if (rt_policy(policy))
	1785	+ prio = MAX_RT_PRIO - 1 - rt_prio;
	1786	+ else
	1787	+ prio = NICE_TO_PRIO(nice);
	1788	+
	1789	+ return prio;
1427	1790	}
1428	1791
1429	1792	/*
..	..	@@ -1435,15 +1798,7 @@
1435	1798	*/
1436	1799	static inline int normal_prio(struct task_struct *p)
1437	1800	{
1438		- int prio;
1439		-
1440		- if (task_has_dl_policy(p))
1441		- prio = MAX_DL_PRIO-1;
1442		- else if (task_has_rt_policy(p))
1443		- prio = MAX_RT_PRIO-1 - p->rt_priority;
1444		- else
1445		- prio = __normal_prio(p);
1446		- return prio;
	1801	+ return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio));
1447	1802	}
1448	1803
1449	1804	/*
..	..	@@ -1499,20 +1854,10 @@
1499	1854
1500	1855	void check_preempt_curr(struct rq rq, struct task_struct p, int flags)
1501	1856	{
1502		- const struct sched_class *class;
1503		-
1504		- if (p->sched_class == rq->curr->sched_class) {
	1857	+ if (p->sched_class == rq->curr->sched_class)
1505	1858	rq->curr->sched_class->check_preempt_curr(rq, p, flags);
1506		- } else {
1507		- for_each_class(class) {
1508		- if (class == rq->curr->sched_class)
1509		- break;
1510		- if (class == p->sched_class) {
1511		- resched_curr(rq);
1512		- break;
1513		- }
1514		- }
1515		- }
	1859	+ else if (p->sched_class > rq->curr->sched_class)
	1860	+ resched_curr(rq);
1516	1861
1517	1862	/*
1518	1863	* A queue event has occurred, and we're going to schedule. In
..	..	@@ -1521,22 +1866,88 @@
1521	1866	if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
1522	1867	rq_clock_skip_update(rq);
1523	1868	}
	1869	+EXPORT_SYMBOL_GPL(check_preempt_curr);
1524	1870
1525	1871	#ifdef CONFIG_SMP
1526	1872
1527		-static inline bool is_per_cpu_kthread(struct task_struct *p)
	1873	+static void
	1874	+__do_set_cpus_allowed(struct task_struct p, const struct cpumask new_mask, u32 flags);
	1875	+
	1876	+static int __set_cpus_allowed_ptr(struct task_struct *p,
	1877	+ const struct cpumask *new_mask,
	1878	+ u32 flags);
	1879	+
	1880	+static void migrate_disable_switch(struct rq rq, struct task_struct p)
1528	1881	{
1529		- if (!(p->flags & PF_KTHREAD))
1530		- return false;
	1882	+ if (likely(!p->migration_disabled))
	1883	+ return;
1531	1884
1532		- if (p->nr_cpus_allowed != 1)
1533		- return false;
	1885	+ if (p->cpus_ptr != &p->cpus_mask)
	1886	+ return;
1534	1887
1535		- return true;
	1888	+ /*
	1889	+ * Violates locking rules! see comment in __do_set_cpus_allowed().
	1890	+ */
	1891	+ __do_set_cpus_allowed(p, cpumask_of(rq->cpu), SCA_MIGRATE_DISABLE);
	1892	+}
	1893	+
	1894	+void migrate_disable(void)
	1895	+{
	1896	+ struct task_struct *p = current;
	1897	+
	1898	+ if (p->migration_disabled) {
	1899	+ p->migration_disabled++;
	1900	+ return;
	1901	+ }
	1902	+
	1903	+ trace_sched_migrate_disable_tp(p);
	1904	+
	1905	+ preempt_disable();
	1906	+ this_rq()->nr_pinned++;
	1907	+ p->migration_disabled = 1;
	1908	+ preempt_lazy_disable();
	1909	+ preempt_enable();
	1910	+}
	1911	+EXPORT_SYMBOL_GPL(migrate_disable);
	1912	+
	1913	+void migrate_enable(void)
	1914	+{
	1915	+ struct task_struct *p = current;
	1916	+
	1917	+ if (p->migration_disabled > 1) {
	1918	+ p->migration_disabled--;
	1919	+ return;
	1920	+ }
	1921	+
	1922	+ /*
	1923	+ * Ensure stop_task runs either before or after this, and that
	1924	+ * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
	1925	+ */
	1926	+ preempt_disable();
	1927	+ if (p->cpus_ptr != &p->cpus_mask)
	1928	+ __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE);
	1929	+ /*
	1930	+ * Mustn't clear migration_disabled() until cpus_ptr points back at the
	1931	+ * regular cpus_mask, otherwise things that race (eg.
	1932	+ * select_fallback_rq) get confused.
	1933	+ */
	1934	+ barrier();
	1935	+ p->migration_disabled = 0;
	1936	+ this_rq()->nr_pinned--;
	1937	+ preempt_lazy_enable();
	1938	+ preempt_enable();
	1939	+
	1940	+ trace_sched_migrate_enable_tp(p);
	1941	+}
	1942	+EXPORT_SYMBOL_GPL(migrate_enable);
	1943	+
	1944	+static inline bool rq_has_pinned_tasks(struct rq *rq)
	1945	+{
	1946	+ return rq->nr_pinned;
1536	1947	}
1537	1948
1538	1949	/*
1539		- * Per-CPU kthreads are allowed to run on !actie && online CPUs, see
	1950	+ * Per-CPU kthreads are allowed to run on !active && online CPUs, see
1540	1951	* __set_cpus_allowed_ptr() and select_fallback_rq().
1541	1952	*/
1542	1953	static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
..	..	@@ -1544,10 +1955,13 @@
1544	1955	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
1545	1956	return false;
1546	1957
1547		- if (is_per_cpu_kthread(p) \|\| __migrate_disabled(p))
	1958	+ if (is_per_cpu_kthread(p) \|\| is_migration_disabled(p))
1548	1959	return cpu_online(cpu);
1549	1960
1550		- return cpu_active(cpu);
	1961	+ if (!cpu_active(cpu))
	1962	+ return false;
	1963	+
	1964	+ return cpumask_test_cpu(cpu, task_cpu_possible_mask(p));
1551	1965	}
1552	1966
1553	1967	/*
..	..	@@ -1572,28 +1986,50 @@
1572	1986	static struct rq move_queued_task(struct rq rq, struct rq_flags *rf,
1573	1987	struct task_struct *p, int new_cpu)
1574	1988	{
	1989	+ int detached = 0;
	1990	+
1575	1991	lockdep_assert_held(&rq->lock);
1576	1992
1577		- WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
1578		- dequeue_task(rq, p, DEQUEUE_NOCLOCK);
1579		- set_task_cpu(p, new_cpu);
1580		- rq_unlock(rq, rf);
	1993	+ /*
	1994	+ * The vendor hook may drop the lock temporarily, so
	1995	+ * pass the rq flags to unpin lock. We expect the
	1996	+ * rq lock to be held after return.
	1997	+ */
	1998	+ trace_android_rvh_migrate_queued_task(rq, rf, p, new_cpu, &detached);
	1999	+ if (detached)
	2000	+ goto attach;
1581	2001
	2002	+ deactivate_task(rq, p, DEQUEUE_NOCLOCK);
	2003	+ set_task_cpu(p, new_cpu);
	2004	+
	2005	+attach:
	2006	+ rq_unlock(rq, rf);
1582	2007	rq = cpu_rq(new_cpu);
1583	2008
1584	2009	rq_lock(rq, rf);
1585	2010	BUG_ON(task_cpu(p) != new_cpu);
1586		- enqueue_task(rq, p, 0);
1587		- p->on_rq = TASK_ON_RQ_QUEUED;
	2011	+ activate_task(rq, p, 0);
1588	2012	check_preempt_curr(rq, p, 0);
1589	2013
1590	2014	return rq;
1591	2015	}
1592	2016
1593	2017	struct migration_arg {
1594		- struct task_struct *task;
1595		- int dest_cpu;
1596		- bool done;
	2018	+ struct task_struct *task;
	2019	+ int dest_cpu;
	2020	+ struct set_affinity_pending *pending;
	2021	+};
	2022	+
	2023	+/*
	2024	+ * @refs: number of wait_for_completion()
	2025	+ * @stop_pending: is @stop_work in use
	2026	+ */
	2027	+struct set_affinity_pending {
	2028	+ refcount_t refs;
	2029	+ unsigned int stop_pending;
	2030	+ struct completion done;
	2031	+ struct cpu_stop_work stop_work;
	2032	+ struct migration_arg arg;
1597	2033	};
1598	2034
1599	2035	/*
..	..	@@ -1626,44 +2062,141 @@
1626	2062	static int migration_cpu_stop(void *data)
1627	2063	{
1628	2064	struct migration_arg *arg = data;
	2065	+ struct set_affinity_pending *pending = arg->pending;
1629	2066	struct task_struct *p = arg->task;
1630	2067	struct rq *rq = this_rq();
	2068	+ bool complete = false;
1631	2069	struct rq_flags rf;
1632		- int dest_cpu = arg->dest_cpu;
1633		-
1634		- /* We don't look at arg after this point. */
1635		- smp_mb();
1636		- arg->done = true;
1637	2070
1638	2071	/*
1639	2072	* The original target CPU might have gone down and we might
1640	2073	* be on another CPU but it doesn't matter.
1641	2074	*/
1642		- local_irq_disable();
	2075	+ local_irq_save(rf.flags);
1643	2076	/*
1644	2077	* We need to explicitly wake pending tasks before running
1645	2078	* __migrate_task() such that we will not miss enforcing cpus_ptr
1646	2079	* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
1647	2080	*/
1648		- sched_ttwu_pending();
	2081	+ flush_smp_call_function_from_idle();
1649	2082
1650	2083	raw_spin_lock(&p->pi_lock);
1651	2084	rq_lock(rq, &rf);
	2085	+
1652	2086	/*
1653	2087	* If task_rq(p) != rq, it cannot be migrated here, because we're
1654	2088	* holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
1655	2089	* we're holding p->pi_lock.
1656	2090	*/
1657	2091	if (task_rq(p) == rq) {
1658		- if (task_on_rq_queued(p))
1659		- rq = __migrate_task(rq, &rf, p, dest_cpu);
1660		- else
1661		- p->wake_cpu = dest_cpu;
1662		- }
1663		- rq_unlock(rq, &rf);
1664		- raw_spin_unlock(&p->pi_lock);
	2092	+ if (is_migration_disabled(p))
	2093	+ goto out;
1665	2094
1666		- local_irq_enable();
	2095	+ if (pending) {
	2096	+ if (p->migration_pending == pending)
	2097	+ p->migration_pending = NULL;
	2098	+ complete = true;
	2099	+
	2100	+ if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask))
	2101	+ goto out;
	2102	+ }
	2103	+
	2104	+ if (task_on_rq_queued(p))
	2105	+ rq = __migrate_task(rq, &rf, p, arg->dest_cpu);
	2106	+ else
	2107	+ p->wake_cpu = arg->dest_cpu;
	2108	+
	2109	+ /*
	2110	+ * XXX __migrate_task() can fail, at which point we might end
	2111	+ * up running on a dodgy CPU, AFAICT this can only happen
	2112	+ * during CPU hotplug, at which point we'll get pushed out
	2113	+ * anyway, so it's probably not a big deal.
	2114	+ */
	2115	+
	2116	+ } else if (pending) {
	2117	+ /*
	2118	+ * This happens when we get migrated between migrate_enable()'s
	2119	+ * preempt_enable() and scheduling the stopper task. At that
	2120	+ * point we're a regular task again and not current anymore.
	2121	+ *
	2122	+ * A !PREEMPT kernel has a giant hole here, which makes it far
	2123	+ * more likely.
	2124	+ */
	2125	+
	2126	+ /*
	2127	+ * The task moved before the stopper got to run. We're holding
	2128	+ * ->pi_lock, so the allowed mask is stable - if it got
	2129	+ * somewhere allowed, we're done.
	2130	+ */
	2131	+ if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
	2132	+ if (p->migration_pending == pending)
	2133	+ p->migration_pending = NULL;
	2134	+ complete = true;
	2135	+ goto out;
	2136	+ }
	2137	+
	2138	+ /*
	2139	+ * When migrate_enable() hits a rq mis-match we can't reliably
	2140	+ * determine is_migration_disabled() and so have to chase after
	2141	+ * it.
	2142	+ */
	2143	+ WARN_ON_ONCE(!pending->stop_pending);
	2144	+ task_rq_unlock(rq, p, &rf);
	2145	+ stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop,
	2146	+ &pending->arg, &pending->stop_work);
	2147	+ return 0;
	2148	+ }
	2149	+out:
	2150	+ if (pending)
	2151	+ pending->stop_pending = false;
	2152	+ task_rq_unlock(rq, p, &rf);
	2153	+
	2154	+ if (complete)
	2155	+ complete_all(&pending->done);
	2156	+
	2157	+ return 0;
	2158	+}
	2159	+
	2160	+int push_cpu_stop(void *arg)
	2161	+{
	2162	+ struct rq lowest_rq = NULL, rq = this_rq();
	2163	+ struct task_struct *p = arg;
	2164	+
	2165	+ raw_spin_lock_irq(&p->pi_lock);
	2166	+ raw_spin_lock(&rq->lock);
	2167	+
	2168	+ if (task_rq(p) != rq)
	2169	+ goto out_unlock;
	2170	+
	2171	+ if (is_migration_disabled(p)) {
	2172	+ p->migration_flags \|= MDF_PUSH;
	2173	+ goto out_unlock;
	2174	+ }
	2175	+
	2176	+ p->migration_flags &= ~MDF_PUSH;
	2177	+
	2178	+ if (p->sched_class->find_lock_rq)
	2179	+ lowest_rq = p->sched_class->find_lock_rq(p, rq);
	2180	+
	2181	+ if (!lowest_rq)
	2182	+ goto out_unlock;
	2183	+
	2184	+ // XXX validate p is still the highest prio task
	2185	+ if (task_rq(p) == rq) {
	2186	+ deactivate_task(rq, p, 0);
	2187	+ set_task_cpu(p, lowest_rq->cpu);
	2188	+ activate_task(lowest_rq, p, 0);
	2189	+ resched_curr(lowest_rq);
	2190	+ }
	2191	+
	2192	+ double_unlock_balance(rq, lowest_rq);
	2193	+
	2194	+out_unlock:
	2195	+ rq->push_busy = false;
	2196	+ raw_spin_unlock(&rq->lock);
	2197	+ raw_spin_unlock_irq(&p->pi_lock);
	2198	+
	2199	+ put_task_struct(p);
1667	2200	return 0;
1668	2201	}
1669	2202
..	..	@@ -1671,27 +2204,40 @@
1671	2204	* sched_class::set_cpus_allowed must do the below, but is not required to
1672	2205	* actually call this function.
1673	2206	*/
1674		-void set_cpus_allowed_common(struct task_struct p, const struct cpumask new_mask)
	2207	+void set_cpus_allowed_common(struct task_struct p, const struct cpumask new_mask, u32 flags)
1675	2208	{
	2209	+ if (flags & (SCA_MIGRATE_ENABLE \| SCA_MIGRATE_DISABLE)) {
	2210	+ p->cpus_ptr = new_mask;
	2211	+ return;
	2212	+ }
	2213	+
1676	2214	cpumask_copy(&p->cpus_mask, new_mask);
1677		- if (p->cpus_ptr == &p->cpus_mask)
1678		- p->nr_cpus_allowed = cpumask_weight(new_mask);
	2215	+ p->nr_cpus_allowed = cpumask_weight(new_mask);
	2216	+ trace_android_rvh_set_cpus_allowed_comm(p, new_mask);
1679	2217	}
1680	2218
1681		-#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
1682		-int __migrate_disabled(struct task_struct *p)
1683		-{
1684		- return p->migrate_disable;
1685		-}
1686		-EXPORT_SYMBOL_GPL(__migrate_disabled);
1687		-#endif
1688		-
1689		-void do_set_cpus_allowed(struct task_struct p, const struct cpumask new_mask)
	2219	+static void
	2220	+__do_set_cpus_allowed(struct task_struct p, const struct cpumask new_mask, u32 flags)
1690	2221	{
1691	2222	struct rq *rq = task_rq(p);
1692	2223	bool queued, running;
1693	2224
1694		- lockdep_assert_held(&p->pi_lock);
	2225	+ /*
	2226	+ * This here violates the locking rules for affinity, since we're only
	2227	+ * supposed to change these variables while holding both rq->lock and
	2228	+ * p->pi_lock.
	2229	+ *
	2230	+ * HOWEVER, it magically works, because ttwu() is the only code that
	2231	+ * accesses these variables under p->pi_lock and only does so after
	2232	+ * smp_cond_load_acquire(&p->on_cpu, !VAL), and we're in __schedule()
	2233	+ * before finish_task().
	2234	+ *
	2235	+ * XXX do further audits, this smells like something putrid.
	2236	+ */
	2237	+ if (flags & SCA_MIGRATE_DISABLE)
	2238	+ SCHED_WARN_ON(!p->on_cpu);
	2239	+ else
	2240	+ lockdep_assert_held(&p->pi_lock);
1695	2241
1696	2242	queued = task_on_rq_queued(p);
1697	2243	running = task_current(rq, p);
..	..	@@ -1707,12 +2253,312 @@
1707	2253	if (running)
1708	2254	put_prev_task(rq, p);
1709	2255
1710		- p->sched_class->set_cpus_allowed(p, new_mask);
	2256	+ p->sched_class->set_cpus_allowed(p, new_mask, flags);
1711	2257
1712	2258	if (queued)
1713	2259	enqueue_task(rq, p, ENQUEUE_RESTORE \| ENQUEUE_NOCLOCK);
1714	2260	if (running)
1715		- set_curr_task(rq, p);
	2261	+ set_next_task(rq, p);
	2262	+}
	2263	+
	2264	+static int affine_move_task(struct rq rq, struct task_struct p, struct rq_flags *rf,
	2265	+ int dest_cpu, unsigned int flags);
	2266	+/*
	2267	+ * Called with both p->pi_lock and rq->lock held; drops both before returning.
	2268	+ */
	2269	+static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
	2270	+ const struct cpumask *new_mask,
	2271	+ u32 flags,
	2272	+ struct rq *rq,
	2273	+ struct rq_flags *rf)
	2274	+{
	2275	+ const struct cpumask *cpu_valid_mask = cpu_active_mask;
	2276	+ const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p);
	2277	+ unsigned int dest_cpu;
	2278	+ int ret = 0;
	2279	+
	2280	+ update_rq_clock(rq);
	2281	+
	2282	+ if (p->flags & PF_KTHREAD \|\| is_migration_disabled(p)) {
	2283	+ /*
	2284	+ * Kernel threads are allowed on online && !active CPUs.
	2285	+ *
	2286	+ * Specifically, migration_disabled() tasks must not fail the
	2287	+ * cpumask_any_and_distribute() pick below, esp. so on
	2288	+ * SCA_MIGRATE_ENABLE, otherwise we'll not call
	2289	+ * set_cpus_allowed_common() and actually reset p->cpus_ptr.
	2290	+ */
	2291	+ cpu_valid_mask = cpu_online_mask;
	2292	+ } else if (!cpumask_subset(new_mask, cpu_allowed_mask)) {
	2293	+ ret = -EINVAL;
	2294	+ goto out;
	2295	+ }
	2296	+
	2297	+ /*
	2298	+ * Must re-check here, to close a race against __kthread_bind(),
	2299	+ * sched_setaffinity() is not guaranteed to observe the flag.
	2300	+ */
	2301	+ if ((flags & SCA_CHECK) && (p->flags & PF_NO_SETAFFINITY)) {
	2302	+ ret = -EINVAL;
	2303	+ goto out;
	2304	+ }
	2305	+
	2306	+ if (!(flags & SCA_MIGRATE_ENABLE)) {
	2307	+ if (cpumask_equal(&p->cpus_mask, new_mask))
	2308	+ goto out;
	2309	+
	2310	+ if (WARN_ON_ONCE(p == current &&
	2311	+ is_migration_disabled(p) &&
	2312	+ !cpumask_test_cpu(task_cpu(p), new_mask))) {
	2313	+ ret = -EBUSY;
	2314	+ goto out;
	2315	+ }
	2316	+ }
	2317	+
	2318	+ /*
	2319	+ * Picking a ~random cpu helps in cases where we are changing affinity
	2320	+ * for groups of tasks (ie. cpuset), so that load balancing is not
	2321	+ * immediately required to distribute the tasks within their new mask.
	2322	+ */
	2323	+ dest_cpu = cpumask_any_and_distribute(cpu_valid_mask, new_mask);
	2324	+ if (dest_cpu >= nr_cpu_ids) {
	2325	+ ret = -EINVAL;
	2326	+ goto out;
	2327	+ }
	2328	+
	2329	+ __do_set_cpus_allowed(p, new_mask, flags);
	2330	+
	2331	+ if (p->flags & PF_KTHREAD) {
	2332	+ /*
	2333	+ * For kernel threads that do indeed end up on online &&
	2334	+ * !active we want to ensure they are strict per-CPU threads.
	2335	+ */
	2336	+ WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
	2337	+ !cpumask_intersects(new_mask, cpu_active_mask) &&
	2338	+ p->nr_cpus_allowed != 1);
	2339	+ }
	2340	+
	2341	+ return affine_move_task(rq, p, rf, dest_cpu, flags);
	2342	+out:
	2343	+ task_rq_unlock(rq, p, rf);
	2344	+
	2345	+ return ret;
	2346	+}
	2347	+
	2348	+void do_set_cpus_allowed(struct task_struct p, const struct cpumask new_mask)
	2349	+{
	2350	+ __do_set_cpus_allowed(p, new_mask, 0);
	2351	+}
	2352	+
	2353	+/*
	2354	+ * This function is wildly self concurrent; here be dragons.
	2355	+ *
	2356	+ *
	2357	+ * When given a valid mask, __set_cpus_allowed_ptr() must block until the
	2358	+ * designated task is enqueued on an allowed CPU. If that task is currently
	2359	+ * running, we have to kick it out using the CPU stopper.
	2360	+ *
	2361	+ * Migrate-Disable comes along and tramples all over our nice sandcastle.
	2362	+ * Consider:
	2363	+ *
	2364	+ * Initial conditions: P0->cpus_mask = [0, 1]
	2365	+ *
	2366	+ * P0@CPU0 P1
	2367	+ *
	2368	+ * migrate_disable();
	2369	+ * <preempted>
	2370	+ * set_cpus_allowed_ptr(P0, [1]);
	2371	+ *
	2372	+ * P1 cannot return from this set_cpus_allowed_ptr() call until P0 executes
	2373	+ * its outermost migrate_enable() (i.e. it exits its Migrate-Disable region).
	2374	+ * This means we need the following scheme:
	2375	+ *
	2376	+ * P0@CPU0 P1
	2377	+ *
	2378	+ * migrate_disable();
	2379	+ * <preempted>
	2380	+ * set_cpus_allowed_ptr(P0, [1]);
	2381	+ * <blocks>
	2382	+ * <resumes>
	2383	+ * migrate_enable();
	2384	+ * __set_cpus_allowed_ptr();
	2385	+ * <wakes local stopper>
	2386	+ * `--> <woken on migration completion>
	2387	+ *
	2388	+ * Now the fun stuff: there may be several P1-like tasks, i.e. multiple
	2389	+ * concurrent set_cpus_allowed_ptr(P0, [*]) calls. CPU affinity changes of any
	2390	+ * task p are serialized by p->pi_lock, which we can leverage: the one that
	2391	+ * should come into effect at the end of the Migrate-Disable region is the last
	2392	+ * one. This means we only need to track a single cpumask (i.e. p->cpus_mask),
	2393	+ * but we still need to properly signal those waiting tasks at the appropriate
	2394	+ * moment.
	2395	+ *
	2396	+ * This is implemented using struct set_affinity_pending. The first
	2397	+ * __set_cpus_allowed_ptr() caller within a given Migrate-Disable region will
	2398	+ * setup an instance of that struct and install it on the targeted task_struct.
	2399	+ * Any and all further callers will reuse that instance. Those then wait for
	2400	+ * a completion signaled at the tail of the CPU stopper callback (1), triggered
	2401	+ * on the end of the Migrate-Disable region (i.e. outermost migrate_enable()).
	2402	+ *
	2403	+ *
	2404	+ * (1) In the cases covered above. There is one more where the completion is
	2405	+ * signaled within affine_move_task() itself: when a subsequent affinity request
	2406	+ * cancels the need for an active migration. Consider:
	2407	+ *
	2408	+ * Initial conditions: P0->cpus_mask = [0, 1]
	2409	+ *
	2410	+ * P0@CPU0 P1 P2
	2411	+ *
	2412	+ * migrate_disable();
	2413	+ * <preempted>
	2414	+ * set_cpus_allowed_ptr(P0, [1]);
	2415	+ * <blocks>
	2416	+ * set_cpus_allowed_ptr(P0, [0, 1]);
	2417	+ * <signal completion>
	2418	+ * <awakes>
	2419	+ *
	2420	+ * Note that the above is safe vs a concurrent migrate_enable(), as any
	2421	+ * pending affinity completion is preceded an uninstallion of
	2422	+ * p->migration_pending done with p->pi_lock held.
	2423	+ */
	2424	+static int affine_move_task(struct rq rq, struct task_struct p, struct rq_flags *rf,
	2425	+ int dest_cpu, unsigned int flags)
	2426	+{
	2427	+ struct set_affinity_pending my_pending = { }, *pending = NULL;
	2428	+ bool stop_pending, complete = false;
	2429	+
	2430	+ /* Can the task run on the task's current CPU? If so, we're done */
	2431	+ if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
	2432	+ struct task_struct *push_task = NULL;
	2433	+
	2434	+ if ((flags & SCA_MIGRATE_ENABLE) &&
	2435	+ (p->migration_flags & MDF_PUSH) && !rq->push_busy) {
	2436	+ rq->push_busy = true;
	2437	+ push_task = get_task_struct(p);
	2438	+ }
	2439	+
	2440	+ /*
	2441	+ * If there are pending waiters, but no pending stop_work,
	2442	+ * then complete now.
	2443	+ */
	2444	+ pending = p->migration_pending;
	2445	+ if (pending && !pending->stop_pending) {
	2446	+ p->migration_pending = NULL;
	2447	+ complete = true;
	2448	+ }
	2449	+
	2450	+ task_rq_unlock(rq, p, rf);
	2451	+
	2452	+ if (push_task) {
	2453	+ stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
	2454	+ p, &rq->push_work);
	2455	+ }
	2456	+
	2457	+ if (complete)
	2458	+ complete_all(&pending->done);
	2459	+
	2460	+ return 0;
	2461	+ }
	2462	+
	2463	+ if (!(flags & SCA_MIGRATE_ENABLE)) {
	2464	+ /* serialized by p->pi_lock */
	2465	+ if (!p->migration_pending) {
	2466	+ /* Install the request */
	2467	+ refcount_set(&my_pending.refs, 1);
	2468	+ init_completion(&my_pending.done);
	2469	+ my_pending.arg = (struct migration_arg) {
	2470	+ .task = p,
	2471	+ .dest_cpu = dest_cpu,
	2472	+ .pending = &my_pending,
	2473	+ };
	2474	+
	2475	+ p->migration_pending = &my_pending;
	2476	+ } else {
	2477	+ pending = p->migration_pending;
	2478	+ refcount_inc(&pending->refs);
	2479	+ /*
	2480	+ * Affinity has changed, but we've already installed a
	2481	+ * pending. migration_cpu_stop() must see this, else
	2482	+ * we risk a completion of the pending despite having a
	2483	+ * task on a disallowed CPU.
	2484	+ *
	2485	+ * Serialized by p->pi_lock, so this is safe.
	2486	+ */
	2487	+ pending->arg.dest_cpu = dest_cpu;
	2488	+ }
	2489	+ }
	2490	+ pending = p->migration_pending;
	2491	+ /*
	2492	+ * - !MIGRATE_ENABLE:
	2493	+ * we'll have installed a pending if there wasn't one already.
	2494	+ *
	2495	+ * - MIGRATE_ENABLE:
	2496	+ * we're here because the current CPU isn't matching anymore,
	2497	+ * the only way that can happen is because of a concurrent
	2498	+ * set_cpus_allowed_ptr() call, which should then still be
	2499	+ * pending completion.
	2500	+ *
	2501	+ * Either way, we really should have a @pending here.
	2502	+ */
	2503	+ if (WARN_ON_ONCE(!pending)) {
	2504	+ task_rq_unlock(rq, p, rf);
	2505	+ return -EINVAL;
	2506	+ }
	2507	+
	2508	+ if (task_running(rq, p) \|\| p->state == TASK_WAKING) {
	2509	+ /*
	2510	+ * MIGRATE_ENABLE gets here because 'p == current', but for
	2511	+ * anything else we cannot do is_migration_disabled(), punt
	2512	+ * and have the stopper function handle it all race-free.
	2513	+ */
	2514	+ stop_pending = pending->stop_pending;
	2515	+ if (!stop_pending)
	2516	+ pending->stop_pending = true;
	2517	+
	2518	+ if (flags & SCA_MIGRATE_ENABLE)
	2519	+ p->migration_flags &= ~MDF_PUSH;
	2520	+
	2521	+ task_rq_unlock(rq, p, rf);
	2522	+
	2523	+ if (!stop_pending) {
	2524	+ stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
	2525	+ &pending->arg, &pending->stop_work);
	2526	+ }
	2527	+
	2528	+ if (flags & SCA_MIGRATE_ENABLE)
	2529	+ return 0;
	2530	+ } else {
	2531	+
	2532	+ if (!is_migration_disabled(p)) {
	2533	+ if (task_on_rq_queued(p))
	2534	+ rq = move_queued_task(rq, rf, p, dest_cpu);
	2535	+
	2536	+ if (!pending->stop_pending) {
	2537	+ p->migration_pending = NULL;
	2538	+ complete = true;
	2539	+ }
	2540	+ }
	2541	+ task_rq_unlock(rq, p, rf);
	2542	+
	2543	+ if (complete)
	2544	+ complete_all(&pending->done);
	2545	+ }
	2546	+
	2547	+ wait_for_completion(&pending->done);
	2548	+
	2549	+ if (refcount_dec_and_test(&pending->refs))
	2550	+ wake_up_var(&pending->refs); /* No UaF, just an address */
	2551	+
	2552	+ /*
	2553	+ * Block the original owner of &pending until all subsequent callers
	2554	+ * have seen the completion and decremented the refcount
	2555	+ */
	2556	+ wait_var_event(&my_pending.refs, !refcount_read(&my_pending.refs));
	2557	+
	2558	+ /* ARGH */
	2559	+ WARN_ON_ONCE(my_pending.stop_pending);
	2560	+
	2561	+ return 0;
1716	2562	}
1717	2563
1718	2564	/*
..	..	@@ -1725,84 +2571,89 @@
1725	2571	* call is not atomic; no spinlocks may be held.
1726	2572	*/
1727	2573	static int __set_cpus_allowed_ptr(struct task_struct *p,
1728		- const struct cpumask *new_mask, bool check)
	2574	+ const struct cpumask *new_mask,
	2575	+ u32 flags)
1729	2576	{
1730		- const struct cpumask *cpu_valid_mask = cpu_active_mask;
1731		- unsigned int dest_cpu;
1732	2577	struct rq_flags rf;
1733	2578	struct rq *rq;
1734		- int ret = 0;
1735	2579
1736	2580	rq = task_rq_lock(p, &rf);
1737		- update_rq_clock(rq);
1738		-
1739		- if (p->flags & PF_KTHREAD) {
1740		- /*
1741		- * Kernel threads are allowed on online && !active CPUs
1742		- */
1743		- cpu_valid_mask = cpu_online_mask;
1744		- }
1745		-
1746		- /*
1747		- * Must re-check here, to close a race against __kthread_bind(),
1748		- * sched_setaffinity() is not guaranteed to observe the flag.
1749		- */
1750		- if (check && (p->flags & PF_NO_SETAFFINITY)) {
1751		- ret = -EINVAL;
1752		- goto out;
1753		- }
1754		-
1755		- if (cpumask_equal(&p->cpus_mask, new_mask))
1756		- goto out;
1757		-
1758		- dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
1759		- if (dest_cpu >= nr_cpu_ids) {
1760		- ret = -EINVAL;
1761		- goto out;
1762		- }
1763		-
1764		- do_set_cpus_allowed(p, new_mask);
1765		-
1766		- if (p->flags & PF_KTHREAD) {
1767		- /*
1768		- * For kernel threads that do indeed end up on online &&
1769		- * !active we want to ensure they are strict per-CPU threads.
1770		- */
1771		- WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
1772		- !cpumask_intersects(new_mask, cpu_active_mask) &&
1773		- p->nr_cpus_allowed != 1);
1774		- }
1775		-
1776		- /* Can the task run on the task's current CPU? If so, we're done */
1777		- if (cpumask_test_cpu(task_cpu(p), new_mask) \|\|
1778		- p->cpus_ptr != &p->cpus_mask)
1779		- goto out;
1780		-
1781		- if (task_running(rq, p) \|\| p->state == TASK_WAKING) {
1782		- struct migration_arg arg = { p, dest_cpu };
1783		- /* Need help from migration thread: drop lock and wait. */
1784		- task_rq_unlock(rq, p, &rf);
1785		- stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
1786		- tlb_migrate_finish(p->mm);
1787		- return 0;
1788		- } else if (task_on_rq_queued(p)) {
1789		- /*
1790		- * OK, since we're going to drop the lock immediately
1791		- * afterwards anyway.
1792		- */
1793		- rq = move_queued_task(rq, &rf, p, dest_cpu);
1794		- }
1795		-out:
1796		- task_rq_unlock(rq, p, &rf);
1797		-
1798		- return ret;
	2581	+ return __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, &rf);
1799	2582	}
1800	2583
1801	2584	int set_cpus_allowed_ptr(struct task_struct p, const struct cpumask new_mask)
1802	2585	{
1803		- return __set_cpus_allowed_ptr(p, new_mask, false);
	2586	+ return __set_cpus_allowed_ptr(p, new_mask, 0);
1804	2587	}
1805	2588	EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
	2589	+
	2590	+/*
	2591	+ * Change a given task's CPU affinity to the intersection of its current
	2592	+ * affinity mask and @subset_mask, writing the resulting mask to @new_mask.
	2593	+ * If the resulting mask is empty, leave the affinity unchanged and return
	2594	+ * -EINVAL.
	2595	+ */
	2596	+static int restrict_cpus_allowed_ptr(struct task_struct *p,
	2597	+ struct cpumask *new_mask,
	2598	+ const struct cpumask *subset_mask)
	2599	+{
	2600	+ struct rq_flags rf;
	2601	+ struct rq *rq;
	2602	+
	2603	+ rq = task_rq_lock(p, &rf);
	2604	+ if (!cpumask_and(new_mask, &p->cpus_mask, subset_mask)) {
	2605	+ task_rq_unlock(rq, p, &rf);
	2606	+ return -EINVAL;
	2607	+ }
	2608	+
	2609	+ return __set_cpus_allowed_ptr_locked(p, new_mask, false, rq, &rf);
	2610	+}
	2611	+
	2612	+/*
	2613	+ * Restrict a given task's CPU affinity so that it is a subset of
	2614	+ * task_cpu_possible_mask(). If the resulting mask is empty, we warn and
	2615	+ * walk up the cpuset hierarchy until we find a suitable mask.
	2616	+ */
	2617	+void force_compatible_cpus_allowed_ptr(struct task_struct *p)
	2618	+{
	2619	+ cpumask_var_t new_mask;
	2620	+ const struct cpumask *override_mask = task_cpu_possible_mask(p);
	2621	+
	2622	+ alloc_cpumask_var(&new_mask, GFP_KERNEL);
	2623	+
	2624	+ /*
	2625	+ * __migrate_task() can fail silently in the face of concurrent
	2626	+ * offlining of the chosen destination CPU, so take the hotplug
	2627	+ * lock to ensure that the migration succeeds.
	2628	+ */
	2629	+ trace_android_rvh_force_compatible_pre(NULL);
	2630	+ cpus_read_lock();
	2631	+ if (!cpumask_available(new_mask))
	2632	+ goto out_set_mask;
	2633	+
	2634	+ if (!restrict_cpus_allowed_ptr(p, new_mask, override_mask))
	2635	+ goto out_free_mask;
	2636	+
	2637	+ /*
	2638	+ * We failed to find a valid subset of the affinity mask for the
	2639	+ * task, so override it based on its cpuset hierarchy.
	2640	+ */
	2641	+ cpuset_cpus_allowed(p, new_mask);
	2642	+ override_mask = new_mask;
	2643	+
	2644	+out_set_mask:
	2645	+ if (printk_ratelimit()) {
	2646	+ printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n",
	2647	+ task_pid_nr(p), p->comm,
	2648	+ cpumask_pr_args(override_mask));
	2649	+ }
	2650	+
	2651	+ WARN_ON(set_cpus_allowed_ptr(p, override_mask));
	2652	+out_free_mask:
	2653	+ cpus_read_unlock();
	2654	+ trace_android_rvh_force_compatible_post(NULL);
	2655	+ free_cpumask_var(new_mask);
	2656	+}
1806	2657
1807	2658	void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
1808	2659	{
..	..	@@ -1841,6 +2692,8 @@
1841	2692	* Clearly, migrating tasks to offline CPUs is a fairly daft thing.
1842	2693	*/
1843	2694	WARN_ON_ONCE(!cpu_online(new_cpu));
	2695	+
	2696	+ WARN_ON_ONCE(is_migration_disabled(p));
1844	2697	#endif
1845	2698
1846	2699	trace_sched_migrate_task(p, new_cpu);
..	..	@@ -1851,12 +2704,13 @@
1851	2704	p->se.nr_migrations++;
1852	2705	rseq_migrate(p);
1853	2706	perf_event_task_migrate(p);
	2707	+ trace_android_rvh_set_task_cpu(p, new_cpu);
1854	2708	}
1855	2709
1856	2710	__set_task_cpu(p, new_cpu);
1857	2711	}
	2712	+EXPORT_SYMBOL_GPL(set_task_cpu);
1858	2713
1859		-#ifdef CONFIG_NUMA_BALANCING
1860	2714	static void __migrate_swap_task(struct task_struct *p, int cpu)
1861	2715	{
1862	2716	if (task_on_rq_queued(p)) {
..	..	@@ -1869,11 +2723,9 @@
1869	2723	rq_pin_lock(src_rq, &srf);
1870	2724	rq_pin_lock(dst_rq, &drf);
1871	2725
1872		- p->on_rq = TASK_ON_RQ_MIGRATING;
1873	2726	deactivate_task(src_rq, p, 0);
1874	2727	set_task_cpu(p, cpu);
1875	2728	activate_task(dst_rq, p, 0);
1876		- p->on_rq = TASK_ON_RQ_QUEUED;
1877	2729	check_preempt_curr(dst_rq, p, 0);
1878	2730
1879	2731	rq_unpin_lock(dst_rq, &drf);
..	..	@@ -1973,7 +2825,7 @@
1973	2825	out:
1974	2826	return ret;
1975	2827	}
1976		-#endif /* CONFIG_NUMA_BALANCING */
	2828	+EXPORT_SYMBOL_GPL(migrate_swap);
1977	2829
1978	2830	static bool check_task_state(struct task_struct *p, long match_state)
1979	2831	{
..	..	@@ -2081,7 +2933,7 @@
2081	2933	ktime_t to = NSEC_PER_SEC / HZ;
2082	2934
2083	2935	set_current_state(TASK_UNINTERRUPTIBLE);
2084		- schedule_hrtimeout(&to, HRTIMER_MODE_REL);
	2936	+ schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
2085	2937	continue;
2086	2938	}
2087	2939
..	..	@@ -2148,7 +3000,11 @@
2148	3000	int nid = cpu_to_node(cpu);
2149	3001	const struct cpumask *nodemask = NULL;
2150	3002	enum { cpuset, possible, fail } state = cpuset;
2151		- int dest_cpu;
	3003	+ int dest_cpu = -1;
	3004	+
	3005	+ trace_android_rvh_select_fallback_rq(cpu, p, &dest_cpu);
	3006	+ if (dest_cpu >= 0)
	3007	+ return dest_cpu;
2152	3008
2153	3009	/*
2154	3010	* If the node that the CPU is on has been offlined, cpu_to_node()
..	..	@@ -2160,9 +3016,7 @@
2160	3016
2161	3017	/* Look for allowed, online CPU in same node. */
2162	3018	for_each_cpu(dest_cpu, nodemask) {
2163		- if (!cpu_active(dest_cpu))
2164		- continue;
2165		- if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
	3019	+ if (is_cpu_allowed(p, dest_cpu))
2166	3020	return dest_cpu;
2167	3021	}
2168	3022	}
..	..	@@ -2184,12 +3038,17 @@
2184	3038	state = possible;
2185	3039	break;
2186	3040	}
2187		- /* Fall-through */
	3041	+ fallthrough;
2188	3042	case possible:
2189		- do_set_cpus_allowed(p, cpu_possible_mask);
	3043	+ /*
	3044	+ * XXX When called from select_task_rq() we only
	3045	+ * hold p->pi_lock and again violate locking order.
	3046	+ *
	3047	+ * More yuck to audit.
	3048	+ */
	3049	+ do_set_cpus_allowed(p, task_cpu_possible_mask(p));
2190	3050	state = fail;
2191	3051	break;
2192		-
2193	3052	case fail:
2194	3053	BUG();
2195	3054	break;
..	..	@@ -2216,14 +3075,12 @@
2216	3075	* The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable.
2217	3076	*/
2218	3077	static inline
2219		-int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags,
2220		- int sibling_count_hint)
	3078	+int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
2221	3079	{
2222	3080	lockdep_assert_held(&p->pi_lock);
2223	3081
2224		- if (p->nr_cpus_allowed > 1)
2225		- cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags,
2226		- sibling_count_hint);
	3082	+ if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p))
	3083	+ cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
2227	3084	else
2228	3085	cpu = cpumask_any(p->cpus_ptr);
2229	3086
..	..	@@ -2243,14 +3100,9 @@
2243	3100	return cpu;
2244	3101	}
2245	3102
2246		-static void update_avg(u64 *avg, u64 sample)
2247		-{
2248		- s64 diff = sample - *avg;
2249		- *avg += diff >> 3;
2250		-}
2251		-
2252	3103	void sched_set_stop_task(int cpu, struct task_struct *stop)
2253	3104	{
	3105	+ static struct lock_class_key stop_pi_lock;
2254	3106	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
2255	3107	struct task_struct *old_stop = cpu_rq(cpu)->stop;
2256	3108
..	..	@@ -2266,6 +3118,20 @@
2266	3118	sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);
2267	3119
2268	3120	stop->sched_class = &stop_sched_class;
	3121	+
	3122	+ /*
	3123	+ * The PI code calls rt_mutex_setprio() with ->pi_lock held to
	3124	+ * adjust the effective priority of a task. As a result,
	3125	+ * rt_mutex_setprio() can trigger (RT) balancing operations,
	3126	+ * which can then trigger wakeups of the stop thread to push
	3127	+ * around the current task.
	3128	+ *
	3129	+ * The stop task itself will never be part of the PI-chain, it
	3130	+ * never blocks, therefore that ->pi_lock recursion is safe.
	3131	+ * Tell lockdep about this by placing the stop->pi_lock in its
	3132	+ * own class.
	3133	+ */
	3134	+ lockdep_set_class(&stop->pi_lock, &stop_pi_lock);
2269	3135	}
2270	3136
2271	3137	cpu_rq(cpu)->stop = stop;
..	..	@@ -2279,15 +3145,23 @@
2279	3145	}
2280	3146	}
2281	3147
2282		-#else
	3148	+#else /* CONFIG_SMP */
2283	3149
2284	3150	static inline int __set_cpus_allowed_ptr(struct task_struct *p,
2285		- const struct cpumask *new_mask, bool check)
	3151	+ const struct cpumask *new_mask,
	3152	+ u32 flags)
2286	3153	{
2287	3154	return set_cpus_allowed_ptr(p, new_mask);
2288	3155	}
2289	3156
2290		-#endif /* CONFIG_SMP */
	3157	+static inline void migrate_disable_switch(struct rq rq, struct task_struct p) { }
	3158	+
	3159	+static inline bool rq_has_pinned_tasks(struct rq *rq)
	3160	+{
	3161	+ return false;
	3162	+}
	3163	+
	3164	+#endif /* !CONFIG_SMP */
2291	3165
2292	3166	static void
2293	3167	ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
..	..	@@ -2326,12 +3200,6 @@
2326	3200
2327	3201	if (wake_flags & WF_SYNC)
2328	3202	__schedstat_inc(p->se.statistics.nr_wakeups_sync);
2329		-}
2330		-
2331		-static inline void ttwu_activate(struct rq rq, struct task_struct p, int en_flags)
2332		-{
2333		- activate_task(rq, p, en_flags);
2334		- p->on_rq = TASK_ON_RQ_QUEUED;
2335	3203	}
2336	3204
2337	3205	/*
..	..	@@ -2375,27 +3243,54 @@
2375	3243	{
2376	3244	int en_flags = ENQUEUE_WAKEUP \| ENQUEUE_NOCLOCK;
2377	3245
	3246	+ if (wake_flags & WF_SYNC)
	3247	+ en_flags \|= ENQUEUE_WAKEUP_SYNC;
	3248	+
2378	3249	lockdep_assert_held(&rq->lock);
2379	3250
2380		-#ifdef CONFIG_SMP
2381	3251	if (p->sched_contributes_to_load)
2382	3252	rq->nr_uninterruptible--;
2383	3253
	3254	+#ifdef CONFIG_SMP
2384	3255	if (wake_flags & WF_MIGRATED)
2385	3256	en_flags \|= ENQUEUE_MIGRATED;
	3257	+ else
2386	3258	#endif
	3259	+ if (p->in_iowait) {
	3260	+ delayacct_blkio_end(p);
	3261	+ atomic_dec(&task_rq(p)->nr_iowait);
	3262	+ }
2387	3263
2388		- ttwu_activate(rq, p, en_flags);
	3264	+ activate_task(rq, p, en_flags);
2389	3265	ttwu_do_wakeup(rq, p, wake_flags, rf);
2390	3266	}
2391	3267
2392	3268	/*
2393		- * Called in case the task @p isn't fully descheduled from its runqueue,
2394		- * in this case we must do a remote wakeup. Its a 'light' wakeup though,
2395		- * since all we need to do is flip p->state to TASK_RUNNING, since
2396		- * the task is still ->on_rq.
	3269	+ * Consider @p being inside a wait loop:
	3270	+ *
	3271	+ * for (;;) {
	3272	+ * set_current_state(TASK_UNINTERRUPTIBLE);
	3273	+ *
	3274	+ * if (CONDITION)
	3275	+ * break;
	3276	+ *
	3277	+ * schedule();
	3278	+ * }
	3279	+ * __set_current_state(TASK_RUNNING);
	3280	+ *
	3281	+ * between set_current_state() and schedule(). In this case @p is still
	3282	+ * runnable, so all that needs doing is change p->state back to TASK_RUNNING in
	3283	+ * an atomic manner.
	3284	+ *
	3285	+ * By taking task_rq(p)->lock we serialize against schedule(), if @p->on_rq
	3286	+ * then schedule() must still happen and p->state can be changed to
	3287	+ * TASK_RUNNING. Otherwise we lost the race, schedule() has happened, and we
	3288	+ * need to do a full wakeup with enqueue.
	3289	+ *
	3290	+ * Returns: %true when the wakeup is done,
	3291	+ * %false otherwise.
2397	3292	*/
2398		-static int ttwu_remote(struct task_struct *p, int wake_flags)
	3293	+static int ttwu_runnable(struct task_struct *p, int wake_flags)
2399	3294	{
2400	3295	struct rq_flags rf;
2401	3296	struct rq *rq;
..	..	@@ -2414,75 +3309,63 @@
2414	3309	}
2415	3310
2416	3311	#ifdef CONFIG_SMP
2417		-void sched_ttwu_pending(void)
	3312	+void sched_ttwu_pending(void *arg)
2418	3313	{
	3314	+ struct llist_node *llist = arg;
2419	3315	struct rq *rq = this_rq();
2420		- struct llist_node *llist = llist_del_all(&rq->wake_list);
2421	3316	struct task_struct p, t;
2422	3317	struct rq_flags rf;
2423	3318
2424	3319	if (!llist)
2425	3320	return;
2426	3321
	3322	+ /*
	3323	+ * rq::ttwu_pending racy indication of out-standing wakeups.
	3324	+ * Races such that false-negatives are possible, since they
	3325	+ * are shorter lived that false-positives would be.
	3326	+ */
	3327	+ WRITE_ONCE(rq->ttwu_pending, 0);
	3328	+
2427	3329	rq_lock_irqsave(rq, &rf);
2428	3330	update_rq_clock(rq);
2429	3331
2430		- llist_for_each_entry_safe(p, t, llist, wake_entry)
	3332	+ llist_for_each_entry_safe(p, t, llist, wake_entry.llist) {
	3333	+ if (WARN_ON_ONCE(p->on_cpu))
	3334	+ smp_cond_load_acquire(&p->on_cpu, !VAL);
	3335	+
	3336	+ if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq)))
	3337	+ set_task_cpu(p, cpu_of(rq));
	3338	+
2431	3339	ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf);
	3340	+ }
2432	3341
2433	3342	rq_unlock_irqrestore(rq, &rf);
2434	3343	}
2435	3344
2436		-void scheduler_ipi(void)
	3345	+void send_call_function_single_ipi(int cpu)
2437	3346	{
2438		- /*
2439		- * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
2440		- * TIF_NEED_RESCHED remotely (for the first time) will also send
2441		- * this IPI.
2442		- */
2443		- preempt_fold_need_resched();
	3347	+ struct rq *rq = cpu_rq(cpu);
2444	3348
2445		- if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
2446		- return;
2447		-
2448		- /*
2449		- * Not all reschedule IPI handlers call irq_enter/irq_exit, since
2450		- * traditionally all their work was done from the interrupt return
2451		- * path. Now that we actually do some work, we need to make sure
2452		- * we do call them.
2453		- *
2454		- * Some archs already do call them, luckily irq_enter/exit nest
2455		- * properly.
2456		- *
2457		- * Arguably we should visit all archs and update all handlers,
2458		- * however a fair share of IPIs are still resched only so this would
2459		- * somewhat pessimize the simple resched case.
2460		- */
2461		- irq_enter();
2462		- sched_ttwu_pending();
2463		-
2464		- /*
2465		- * Check if someone kicked us for doing the nohz idle load balance.
2466		- */
2467		- if (unlikely(got_nohz_idle_kick())) {
2468		- this_rq()->idle_balance = 1;
2469		- raise_softirq_irqoff(SCHED_SOFTIRQ);
2470		- }
2471		- irq_exit();
	3349	+ if (!set_nr_if_polling(rq->idle))
	3350	+ arch_send_call_function_single_ipi(cpu);
	3351	+ else
	3352	+ trace_sched_wake_idle_without_ipi(cpu);
2472	3353	}
2473	3354
2474		-static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
	3355	+/*
	3356	+ * Queue a task on the target CPUs wake_list and wake the CPU via IPI if
	3357	+ * necessary. The wakee CPU on receipt of the IPI will queue the task
	3358	+ * via sched_ttwu_wakeup() for activation so the wakee incurs the cost
	3359	+ * of the wakeup instead of the waker.
	3360	+ */
	3361	+static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
2475	3362	{
2476	3363	struct rq *rq = cpu_rq(cpu);
2477	3364
2478	3365	p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
2479	3366
2480		- if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) {
2481		- if (!set_nr_if_polling(rq->idle))
2482		- smp_send_reschedule(cpu);
2483		- else
2484		- trace_sched_wake_idle_without_ipi(cpu);
2485		- }
	3367	+ WRITE_ONCE(rq->ttwu_pending, 1);
	3368	+ __smp_call_single_queue(cpu, &p->wake_entry.llist);
2486	3369	}
2487	3370
2488	3371	void wake_up_if_idle(int cpu)
..	..	@@ -2508,6 +3391,7 @@
2508	3391	out:
2509	3392	rcu_read_unlock();
2510	3393	}
	3394	+EXPORT_SYMBOL_GPL(wake_up_if_idle);
2511	3395
2512	3396	bool cpus_share_cache(int this_cpu, int that_cpu)
2513	3397	{
..	..	@@ -2516,6 +3400,58 @@
2516	3400
2517	3401	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
2518	3402	}
	3403	+
	3404	+static inline bool ttwu_queue_cond(int cpu, int wake_flags)
	3405	+{
	3406	+ /*
	3407	+ * If the CPU does not share cache, then queue the task on the
	3408	+ * remote rqs wakelist to avoid accessing remote data.
	3409	+ */
	3410	+ if (!cpus_share_cache(smp_processor_id(), cpu))
	3411	+ return true;
	3412	+
	3413	+ /*
	3414	+ * If the task is descheduling and the only running task on the
	3415	+ * CPU then use the wakelist to offload the task activation to
	3416	+ * the soon-to-be-idle CPU as the current CPU is likely busy.
	3417	+ * nr_running is checked to avoid unnecessary task stacking.
	3418	+ *
	3419	+ * Note that we can only get here with (wakee) p->on_rq=0,
	3420	+ * p->on_cpu can be whatever, we've done the dequeue, so
	3421	+ * the wakee has been accounted out of ->nr_running.
	3422	+ */
	3423	+ if ((wake_flags & WF_ON_CPU) && !cpu_rq(cpu)->nr_running)
	3424	+ return true;
	3425	+
	3426	+ return false;
	3427	+}
	3428	+
	3429	+static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
	3430	+{
	3431	+ bool cond = false;
	3432	+
	3433	+ trace_android_rvh_ttwu_cond(&cond);
	3434	+
	3435	+ if ((sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) \|\|
	3436	+ cond) {
	3437	+ if (WARN_ON_ONCE(cpu == smp_processor_id()))
	3438	+ return false;
	3439	+
	3440	+ sched_clock_cpu(cpu); /* Sync clocks across CPUs */
	3441	+ __ttwu_queue_wakelist(p, cpu, wake_flags);
	3442	+ return true;
	3443	+ }
	3444	+
	3445	+ return false;
	3446	+}
	3447	+
	3448	+#else /* !CONFIG_SMP */
	3449	+
	3450	+static inline bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
	3451	+{
	3452	+ return false;
	3453	+}
	3454	+
2519	3455	#endif /* CONFIG_SMP */
2520	3456
2521	3457	static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
..	..	@@ -2523,13 +3459,8 @@
2523	3459	struct rq *rq = cpu_rq(cpu);
2524	3460	struct rq_flags rf;
2525	3461
2526		-#if defined(CONFIG_SMP)
2527		- if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) {
2528		- sched_clock_cpu(cpu); /* Sync clocks across CPUs */
2529		- ttwu_queue_remote(p, cpu, wake_flags);
	3462	+ if (ttwu_queue_wakelist(p, cpu, wake_flags))
2530	3463	return;
2531		- }
2532		-#endif
2533	3464
2534	3465	rq_lock(rq, &rf);
2535	3466	update_rq_clock(rq);
..	..	@@ -2585,8 +3516,8 @@
2585	3516	* migration. However the means are completely different as there is no lock
2586	3517	* chain to provide order. Instead we do:
2587	3518	*
2588		- * 1) smp_store_release(X->on_cpu, 0)
2589		- * 2) smp_cond_load_acquire(!X->on_cpu)
	3519	+ * 1) smp_store_release(X->on_cpu, 0) -- finish_task()
	3520	+ * 2) smp_cond_load_acquire(!X->on_cpu) -- try_to_wake_up()
2590	3521	*
2591	3522	* Example:
2592	3523	*
..	..	@@ -2625,34 +3556,72 @@
2625	3556	* @p: the thread to be awakened
2626	3557	* @state: the mask of task states that can be woken
2627	3558	* @wake_flags: wake modifier flags (WF_*)
2628		- * @sibling_count_hint: A hint at the number of threads that are being woken up
2629		- * in this event.
2630	3559	*
2631		- * If (@state & @p->state) @p->state = TASK_RUNNING.
	3560	+ * Conceptually does:
	3561	+ *
	3562	+ * If (@state & @p->state) @p->state = TASK_RUNNING.
2632	3563	*
2633	3564	* If the task was not queued/runnable, also place it back on a runqueue.
2634	3565	*
2635		- * Atomic against schedule() which would dequeue a task, also see
2636		- * set_current_state().
	3566	+ * This function is atomic against schedule() which would dequeue the task.
2637	3567	*
2638		- * This function executes a full memory barrier before accessing the task
2639		- * state; see set_current_state().
	3568	+ * It issues a full memory barrier before accessing @p->state, see the comment
	3569	+ * with set_current_state().
	3570	+ *
	3571	+ * Uses p->pi_lock to serialize against concurrent wake-ups.
	3572	+ *
	3573	+ * Relies on p->pi_lock stabilizing:
	3574	+ * - p->sched_class
	3575	+ * - p->cpus_ptr
	3576	+ * - p->sched_task_group
	3577	+ * in order to do migration, see its use of select_task_rq()/set_task_cpu().
	3578	+ *
	3579	+ * Tries really hard to only take one task_rq(p)->lock for performance.
	3580	+ * Takes rq->lock in:
	3581	+ * - ttwu_runnable() -- old rq, unavoidable, see comment there;
	3582	+ * - ttwu_queue() -- new rq, for enqueue of the task;
	3583	+ * - psi_ttwu_dequeue() -- much sadness :-( accounting will kill us.
	3584	+ *
	3585	+ * As a consequence we race really badly with just about everything. See the
	3586	+ * many memory barriers and their comments for details.
2640	3587	*
2641	3588	* Return: %true if @p->state changes (an actual wakeup was done),
2642	3589	* %false otherwise.
2643	3590	*/
2644	3591	static int
2645		-try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags,
2646		- int sibling_count_hint)
	3592	+try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2647	3593	{
2648	3594	unsigned long flags;
2649	3595	int cpu, success = 0;
2650	3596
	3597	+ preempt_disable();
	3598	+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && p == current) {
	3599	+ /*
	3600	+ * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
	3601	+ * == smp_processor_id()'. Together this means we can special
	3602	+ * case the whole 'p->on_rq && ttwu_runnable()' case below
	3603	+ * without taking any locks.
	3604	+ *
	3605	+ * In particular:
	3606	+ * - we rely on Program-Order guarantees for all the ordering,
	3607	+ * - we're serialized against set_special_state() by virtue of
	3608	+ * it disabling IRQs (this allows not taking ->pi_lock).
	3609	+ */
	3610	+ if (!(p->state & state))
	3611	+ goto out;
	3612	+
	3613	+ success = 1;
	3614	+ trace_sched_waking(p);
	3615	+ p->state = TASK_RUNNING;
	3616	+ trace_sched_wakeup(p);
	3617	+ goto out;
	3618	+ }
	3619	+
2651	3620	/*
2652	3621	* If we are going to wake up a thread waiting for CONDITION we
2653	3622	* need to ensure that CONDITION=1 done by the caller can not be
2654		- * reordered with p->state check below. This pairs with mb() in
2655		- * set_current_state() the waiting thread does.
	3623	+ * reordered with p->state check below. This pairs with smp_store_mb()
	3624	+ * in set_current_state() that the waiting thread does.
2656	3625	*/
2657	3626	raw_spin_lock_irqsave(&p->pi_lock, flags);
2658	3627	smp_mb__after_spinlock();
..	..	@@ -2668,9 +3637,8 @@
2668	3637	success = 1;
2669	3638	}
2670	3639	}
2671		- goto out;
	3640	+ goto unlock;
2672	3641	}
2673		-
2674	3642	/*
2675	3643	* If this is a regular wakeup, then we can unconditionally
2676	3644	* clear the saved state of a "lock sleeper".
..	..	@@ -2678,11 +3646,23 @@
2678	3646	if (!(wake_flags & WF_LOCK_SLEEPER))
2679	3647	p->saved_state = TASK_RUNNING;
2680	3648
	3649	+#ifdef CONFIG_FREEZER
	3650	+ /*
	3651	+ * If we're going to wake up a thread which may be frozen, then
	3652	+ * we can only do so if we have an active CPU which is capable of
	3653	+ * running it. This may not be the case when resuming from suspend,
	3654	+ * as the secondary CPUs may not yet be back online. See __thaw_task()
	3655	+ * for the actual wakeup.
	3656	+ */
	3657	+ if (unlikely(frozen_or_skipped(p)) &&
	3658	+ !cpumask_intersects(cpu_active_mask, task_cpu_possible_mask(p)))
	3659	+ goto unlock;
	3660	+#endif
	3661	+
2681	3662	trace_sched_waking(p);
2682	3663
2683	3664	/* We're going to change ->state: */
2684	3665	success = 1;
2685		- cpu = task_cpu(p);
2686	3666
2687	3667	/*
2688	3668	* Ensure we load p->on_rq _after_ p->state, otherwise it would
..	..	@@ -2703,10 +3683,15 @@
2703	3683	*
2704	3684	* Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
2705	3685	* __schedule(). See the comment for smp_mb__after_spinlock().
	3686	+ *
	3687	+ * A similar smb_rmb() lives in try_invoke_on_locked_down_task().
2706	3688	*/
2707	3689	smp_rmb();
2708		- if (p->on_rq && ttwu_remote(p, wake_flags))
2709		- goto stat;
	3690	+ if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
	3691	+ goto unlock;
	3692	+
	3693	+ if (p->state & TASK_UNINTERRUPTIBLE)
	3694	+ trace_sched_blocked_reason(p);
2710	3695
2711	3696	#ifdef CONFIG_SMP
2712	3697	/*
..	..	@@ -2727,8 +3712,43 @@
2727	3712	*
2728	3713	* Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
2729	3714	* __schedule(). See the comment for smp_mb__after_spinlock().
	3715	+ *
	3716	+ * Form a control-dep-acquire with p->on_rq == 0 above, to ensure
	3717	+ * schedule()'s deactivate_task() has 'happened' and p will no longer
	3718	+ * care about it's own p->state. See the comment in __schedule().
2730	3719	*/
2731		- smp_rmb();
	3720	+ smp_acquire__after_ctrl_dep();
	3721	+
	3722	+ /*
	3723	+ * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
	3724	+ * == 0), which means we need to do an enqueue, change p->state to
	3725	+ * TASK_WAKING such that we can unlock p->pi_lock before doing the
	3726	+ * enqueue, such as ttwu_queue_wakelist().
	3727	+ */
	3728	+ p->state = TASK_WAKING;
	3729	+
	3730	+ /*
	3731	+ * If the owning (remote) CPU is still in the middle of schedule() with
	3732	+ * this task as prev, considering queueing p on the remote CPUs wake_list
	3733	+ * which potentially sends an IPI instead of spinning on p->on_cpu to
	3734	+ * let the waker make forward progress. This is safe because IRQs are
	3735	+ * disabled and the IPI will deliver after on_cpu is cleared.
	3736	+ *
	3737	+ * Ensure we load task_cpu(p) after p->on_cpu:
	3738	+ *
	3739	+ * set_task_cpu(p, cpu);
	3740	+ * STORE p->cpu = @cpu
	3741	+ * __schedule() (switch to task 'p')
	3742	+ * LOCK rq->lock
	3743	+ * smp_mb__after_spin_lock() smp_cond_load_acquire(&p->on_cpu)
	3744	+ * STORE p->on_cpu = 1 LOAD p->cpu
	3745	+ *
	3746	+ * to ensure we observe the correct CPU on which the task is currently
	3747	+ * scheduling.
	3748	+ */
	3749	+ if (smp_load_acquire(&p->on_cpu) &&
	3750	+ ttwu_queue_wakelist(p, task_cpu(p), wake_flags \| WF_ON_CPU))
	3751	+ goto unlock;
2732	3752
2733	3753	/*
2734	3754	* If the owning (remote) CPU is still in the middle of schedule() with
..	..	@@ -2741,38 +3761,79 @@
2741	3761	*/
2742	3762	smp_cond_load_acquire(&p->on_cpu, !VAL);
2743	3763
2744		- p->sched_contributes_to_load = !!task_contributes_to_load(p);
2745		- p->state = TASK_WAKING;
	3764	+ trace_android_rvh_try_to_wake_up(p);
2746	3765
2747		- if (p->in_iowait) {
2748		- delayacct_blkio_end(p);
2749		- atomic_dec(&task_rq(p)->nr_iowait);
2750		- }
2751		-
2752		- cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags,
2753		- sibling_count_hint);
	3766	+ cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
2754	3767	if (task_cpu(p) != cpu) {
	3768	+ if (p->in_iowait) {
	3769	+ delayacct_blkio_end(p);
	3770	+ atomic_dec(&task_rq(p)->nr_iowait);
	3771	+ }
	3772	+
2755	3773	wake_flags \|= WF_MIGRATED;
2756	3774	psi_ttwu_dequeue(p);
2757	3775	set_task_cpu(p, cpu);
2758	3776	}
2759		-
2760		-#else /* CONFIG_SMP */
2761		-
2762		- if (p->in_iowait) {
2763		- delayacct_blkio_end(p);
2764		- atomic_dec(&task_rq(p)->nr_iowait);
2765		- }
2766		-
	3777	+#else
	3778	+ cpu = task_cpu(p);
2767	3779	#endif /* CONFIG_SMP */
2768	3780
2769	3781	ttwu_queue(p, cpu, wake_flags);
2770		-stat:
2771		- ttwu_stat(p, cpu, wake_flags);
2772		-out:
	3782	+unlock:
2773	3783	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
	3784	+out:
	3785	+ if (success) {
	3786	+ trace_android_rvh_try_to_wake_up_success(p);
	3787	+ ttwu_stat(p, task_cpu(p), wake_flags);
	3788	+ }
	3789	+ preempt_enable();
2774	3790
2775	3791	return success;
	3792	+}
	3793	+
	3794	+/**
	3795	+ * try_invoke_on_locked_down_task - Invoke a function on task in fixed state
	3796	+ * @p: Process for which the function is to be invoked, can be @current.
	3797	+ * @func: Function to invoke.
	3798	+ * @arg: Argument to function.
	3799	+ *
	3800	+ * If the specified task can be quickly locked into a definite state
	3801	+ * (either sleeping or on a given runqueue), arrange to keep it in that
	3802	+ * state while invoking @func(@arg). This function can use ->on_rq and
	3803	+ * task_curr() to work out what the state is, if required. Given that
	3804	+ * @func can be invoked with a runqueue lock held, it had better be quite
	3805	+ * lightweight.
	3806	+ *
	3807	+ * Returns:
	3808	+ * @false if the task slipped out from under the locks.
	3809	+ * @true if the task was locked onto a runqueue or is sleeping.
	3810	+ * However, @func can override this by returning @false.
	3811	+ */
	3812	+bool try_invoke_on_locked_down_task(struct task_struct p, bool (func)(struct task_struct t, void arg), void *arg)
	3813	+{
	3814	+ struct rq_flags rf;
	3815	+ bool ret = false;
	3816	+ struct rq *rq;
	3817	+
	3818	+ raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
	3819	+ if (p->on_rq) {
	3820	+ rq = __task_rq_lock(p, &rf);
	3821	+ if (task_rq(p) == rq)
	3822	+ ret = func(p, arg);
	3823	+ rq_unlock(rq, &rf);
	3824	+ } else {
	3825	+ switch (p->state) {
	3826	+ case TASK_RUNNING:
	3827	+ case TASK_WAKING:
	3828	+ break;
	3829	+ default:
	3830	+ smp_rmb(); // See smp_rmb() comment in try_to_wake_up().
	3831	+ if (!p->on_rq)
	3832	+ ret = func(p, arg);
	3833	+ }
	3834	+ }
	3835	+ raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
	3836	+ return ret;
2776	3837	}
2777	3838
2778	3839	/**
..	..	@@ -2788,7 +3849,7 @@
2788	3849	*/
2789	3850	int wake_up_process(struct task_struct *p)
2790	3851	{
2791		- return try_to_wake_up(p, TASK_NORMAL, 0, 1);
	3852	+ return try_to_wake_up(p, TASK_NORMAL, 0);
2792	3853	}
2793	3854	EXPORT_SYMBOL(wake_up_process);
2794	3855
..	..	@@ -2801,12 +3862,12 @@
2801	3862	*/
2802	3863	int wake_up_lock_sleeper(struct task_struct *p)
2803	3864	{
2804		- return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER, 1);
	3865	+ return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER);
2805	3866	}
2806	3867
2807	3868	int wake_up_state(struct task_struct *p, unsigned int state)
2808	3869	{
2809		- return try_to_wake_up(p, state, 0, 1);
	3870	+ return try_to_wake_up(p, state, 0);
2810	3871	}
2811	3872
2812	3873	/*
..	..	@@ -2831,6 +3892,8 @@
2831	3892	p->se.cfs_rq = NULL;
2832	3893	#endif
2833	3894
	3895	+ trace_android_rvh_sched_fork_init(p);
	3896	+
2834	3897	#ifdef CONFIG_SCHEDSTATS
2835	3898	/* Even if schedstat is disabled, there should not be garbage */
2836	3899	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
..	..	@@ -2851,7 +3914,14 @@
2851	3914	INIT_HLIST_HEAD(&p->preempt_notifiers);
2852	3915	#endif
2853	3916
	3917	+#ifdef CONFIG_COMPACTION
	3918	+ p->capture_control = NULL;
	3919	+#endif
2854	3920	init_numa_balancing(clone_flags, p);
	3921	+#ifdef CONFIG_SMP
	3922	+ p->wake_entry.u_flags = CSD_TYPE_TTWU;
	3923	+ p->migration_pending = NULL;
	3924	+#endif
2855	3925	}
2856	3926
2857	3927	DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
..	..	@@ -2868,7 +3938,7 @@
2868	3938
2869	3939	#ifdef CONFIG_PROC_SYSCTL
2870	3940	int sysctl_numa_balancing(struct ctl_table *table, int write,
2871		- void __user buffer, size_t lenp, loff_t *ppos)
	3941	+ void buffer, size_t lenp, loff_t *ppos)
2872	3942	{
2873	3943	struct ctl_table t;
2874	3944	int err;
..	..	@@ -2942,8 +4012,8 @@
2942	4012	}
2943	4013
2944	4014	#ifdef CONFIG_PROC_SYSCTL
2945		-int sysctl_schedstats(struct ctl_table *table, int write,
2946		- void __user buffer, size_t lenp, loff_t *ppos)
	4015	+int sysctl_schedstats(struct ctl_table table, int write, void buffer,
	4016	+ size_t lenp, loff_t ppos)
2947	4017	{
2948	4018	struct ctl_table t;
2949	4019	int err;
..	..	@@ -2971,7 +4041,7 @@
2971	4041	*/
2972	4042	int sched_fork(unsigned long clone_flags, struct task_struct *p)
2973	4043	{
2974		- unsigned long flags;
	4044	+ trace_android_rvh_sched_fork(p);
2975	4045
2976	4046	__sched_fork(clone_flags, p);
2977	4047	/*
..	..	@@ -2985,6 +4055,7 @@
2985	4055	* Make sure we do not leak PI boosting priority to the child.
2986	4056	*/
2987	4057	p->prio = current->normal_prio;
	4058	+ trace_android_rvh_prepare_prio_fork(p);
2988	4059
2989	4060	uclamp_fork(p);
2990	4061
..	..	@@ -2999,8 +4070,8 @@
2999	4070	} else if (PRIO_TO_NICE(p->static_prio) < 0)
3000	4071	p->static_prio = NICE_TO_PRIO(0);
3001	4072
3002		- p->prio = p->normal_prio = __normal_prio(p);
3003		- set_load_weight(p, false);
	4073	+ p->prio = p->normal_prio = p->static_prio;
	4074	+ set_load_weight(p);
3004	4075
3005	4076	/*
3006	4077	* We don't need the reset flag anymore after the fork. It has
..	..	@@ -3017,24 +4088,8 @@
3017	4088	p->sched_class = &fair_sched_class;
3018	4089
3019	4090	init_entity_runnable_average(&p->se);
	4091	+ trace_android_rvh_finish_prio_fork(p);
3020	4092
3021		- /*
3022		- * The child is not yet in the pid-hash so no cgroup attach races,
3023		- * and the cgroup is pinned to this child due to cgroup_fork()
3024		- * is ran before sched_fork().
3025		- *
3026		- * Silence PROVE_RCU.
3027		- */
3028		- raw_spin_lock_irqsave(&p->pi_lock, flags);
3029		- rseq_migrate(p);
3030		- /*
3031		- * We're setting the CPU for the first time, we don't migrate,
3032		- * so use __set_task_cpu().
3033		- */
3034		- __set_task_cpu(p, smp_processor_id());
3035		- if (p->sched_class->task_fork)
3036		- p->sched_class->task_fork(p);
3037		- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
3038	4093
3039	4094	#ifdef CONFIG_SCHED_INFO
3040	4095	if (likely(sched_info_on()))
..	..	@@ -3052,6 +4107,41 @@
3052	4107	RB_CLEAR_NODE(&p->pushable_dl_tasks);
3053	4108	#endif
3054	4109	return 0;
	4110	+}
	4111	+
	4112	+void sched_cgroup_fork(struct task_struct p, struct kernel_clone_args kargs)
	4113	+{
	4114	+ unsigned long flags;
	4115	+
	4116	+ /*
	4117	+ * Because we're not yet on the pid-hash, p->pi_lock isn't strictly
	4118	+ * required yet, but lockdep gets upset if rules are violated.
	4119	+ */
	4120	+ raw_spin_lock_irqsave(&p->pi_lock, flags);
	4121	+#ifdef CONFIG_CGROUP_SCHED
	4122	+ if (1) {
	4123	+ struct task_group *tg;
	4124	+
	4125	+ tg = container_of(kargs->cset->subsys[cpu_cgrp_id],
	4126	+ struct task_group, css);
	4127	+ tg = autogroup_task_group(p, tg);
	4128	+ p->sched_task_group = tg;
	4129	+ }
	4130	+#endif
	4131	+ rseq_migrate(p);
	4132	+ /*
	4133	+ * We're setting the CPU for the first time, we don't migrate,
	4134	+ * so use __set_task_cpu().
	4135	+ */
	4136	+ __set_task_cpu(p, smp_processor_id());
	4137	+ if (p->sched_class->task_fork)
	4138	+ p->sched_class->task_fork(p);
	4139	+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
	4140	+}
	4141	+
	4142	+void sched_post_fork(struct task_struct *p)
	4143	+{
	4144	+ uclamp_post_fork(p);
3055	4145	}
3056	4146
3057	4147	unsigned long to_ratio(u64 period, u64 runtime)
..	..	@@ -3082,6 +4172,8 @@
3082	4172	struct rq_flags rf;
3083	4173	struct rq *rq;
3084	4174
	4175	+ trace_android_rvh_wake_up_new_task(p);
	4176	+
3085	4177	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
3086	4178	p->state = TASK_RUNNING;
3087	4179	#ifdef CONFIG_SMP
..	..	@@ -3095,14 +4187,14 @@
3095	4187	*/
3096	4188	p->recent_used_cpu = task_cpu(p);
3097	4189	rseq_migrate(p);
3098		- __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0, 1));
	4190	+ __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
3099	4191	#endif
3100	4192	rq = __task_rq_lock(p, &rf);
3101	4193	update_rq_clock(rq);
3102		- post_init_entity_util_avg(&p->se);
	4194	+ post_init_entity_util_avg(p);
	4195	+ trace_android_rvh_new_task_stats(p);
3103	4196
3104	4197	activate_task(rq, p, ENQUEUE_NOCLOCK);
3105		- p->on_rq = TASK_ON_RQ_QUEUED;
3106	4198	trace_sched_wakeup_new(p);
3107	4199	check_preempt_curr(rq, p, WF_FORK);
3108	4200	#ifdef CONFIG_SMP
..	..	@@ -3212,8 +4304,10 @@
3212	4304	/*
3213	4305	* Claim the task as running, we do this before switching to it
3214	4306	* such that any running task will have this set.
	4307	+ *
	4308	+ * See the ttwu() WF_ON_CPU case and its ordering comment.
3215	4309	*/
3216		- next->on_cpu = 1;
	4310	+ WRITE_ONCE(next->on_cpu, 1);
3217	4311	#endif
3218	4312	}
3219	4313
..	..	@@ -3221,8 +4315,9 @@
3221	4315	{
3222	4316	#ifdef CONFIG_SMP
3223	4317	/*
3224		- * After ->on_cpu is cleared, the task can be moved to a different CPU.
3225		- * We must ensure this doesn't happen until the switch is completely
	4318	+ * This must be the very last reference to @prev from this CPU. After
	4319	+ * p->on_cpu is cleared, the task can be moved to a different CPU. We
	4320	+ * must ensure this doesn't happen until the switch is completely
3226	4321	* finished.
3227	4322	*
3228	4323	* In particular, the load of prev->state in finish_task_switch() must
..	..	@@ -3234,6 +4329,90 @@
3234	4329	#endif
3235	4330	}
3236	4331
	4332	+#ifdef CONFIG_SMP
	4333	+
	4334	+static void do_balance_callbacks(struct rq rq, struct callback_head head)
	4335	+{
	4336	+ void (func)(struct rq rq);
	4337	+ struct callback_head *next;
	4338	+
	4339	+ lockdep_assert_held(&rq->lock);
	4340	+
	4341	+ while (head) {
	4342	+ func = (void ()(struct rq ))head->func;
	4343	+ next = head->next;
	4344	+ head->next = NULL;
	4345	+ head = next;
	4346	+
	4347	+ func(rq);
	4348	+ }
	4349	+}
	4350	+
	4351	+static inline struct callback_head splice_balance_callbacks(struct rq rq)
	4352	+{
	4353	+ struct callback_head *head = rq->balance_callback;
	4354	+
	4355	+ lockdep_assert_held(&rq->lock);
	4356	+ if (head) {
	4357	+ rq->balance_callback = NULL;
	4358	+ rq->balance_flags &= ~BALANCE_WORK;
	4359	+ }
	4360	+
	4361	+ return head;
	4362	+}
	4363	+
	4364	+static void __balance_callbacks(struct rq *rq)
	4365	+{
	4366	+ do_balance_callbacks(rq, splice_balance_callbacks(rq));
	4367	+}
	4368	+
	4369	+static inline void balance_callbacks(struct rq rq, struct callback_head head)
	4370	+{
	4371	+ unsigned long flags;
	4372	+
	4373	+ if (unlikely(head)) {
	4374	+ raw_spin_lock_irqsave(&rq->lock, flags);
	4375	+ do_balance_callbacks(rq, head);
	4376	+ raw_spin_unlock_irqrestore(&rq->lock, flags);
	4377	+ }
	4378	+}
	4379	+
	4380	+static void balance_push(struct rq *rq);
	4381	+
	4382	+static inline void balance_switch(struct rq *rq)
	4383	+{
	4384	+ if (likely(!rq->balance_flags))
	4385	+ return;
	4386	+
	4387	+ if (rq->balance_flags & BALANCE_PUSH) {
	4388	+ balance_push(rq);
	4389	+ return;
	4390	+ }
	4391	+
	4392	+ __balance_callbacks(rq);
	4393	+}
	4394	+
	4395	+#else
	4396	+
	4397	+static inline void __balance_callbacks(struct rq *rq)
	4398	+{
	4399	+}
	4400	+
	4401	+static inline struct callback_head splice_balance_callbacks(struct rq rq)
	4402	+{
	4403	+ return NULL;
	4404	+}
	4405	+
	4406	+static inline void balance_callbacks(struct rq rq, struct callback_head head)
	4407	+{
	4408	+}
	4409	+
	4410	+static inline void balance_switch(struct rq *rq)
	4411	+{
	4412	+}
	4413	+
	4414	+#endif
	4415	+
3237	4416	static inline void
3238	4417	prepare_lock_switch(struct rq rq, struct task_struct next, struct rq_flags *rf)
3239	4418	{
..	..	@@ -3244,7 +4423,7 @@
3244	4423	* do an early lockdep release here:
3245	4424	*/
3246	4425	rq_unpin_lock(rq, rf);
3247		- spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
	4426	+ spin_release(&rq->lock.dep_map, _THIS_IP_);
3248	4427	#ifdef CONFIG_DEBUG_SPINLOCK
3249	4428	/* this is a valid case when another task releases the spinlock */
3250	4429	rq->lock.owner = next;
..	..	@@ -3259,6 +4438,7 @@
3259	4438	* prev into current:
3260	4439	*/
3261	4440	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
	4441	+ balance_switch(rq);
3262	4442	raw_spin_unlock_irq(&rq->lock);
3263	4443	}
3264	4444
..	..	@@ -3273,6 +4453,22 @@
3273	4453	#ifndef finish_arch_post_lock_switch
3274	4454	# define finish_arch_post_lock_switch() do { } while (0)
3275	4455	#endif
	4456	+
	4457	+static inline void kmap_local_sched_out(void)
	4458	+{
	4459	+#ifdef CONFIG_KMAP_LOCAL
	4460	+ if (unlikely(current->kmap_ctrl.idx))
	4461	+ __kmap_local_sched_out();
	4462	+#endif
	4463	+}
	4464	+
	4465	+static inline void kmap_local_sched_in(void)
	4466	+{
	4467	+#ifdef CONFIG_KMAP_LOCAL
	4468	+ if (unlikely(current->kmap_ctrl.idx))
	4469	+ __kmap_local_sched_in();
	4470	+#endif
	4471	+}
3276	4472
3277	4473	/**
3278	4474	* prepare_task_switch - prepare to switch tasks
..	..	@@ -3296,6 +4492,7 @@
3296	4492	perf_event_task_sched_out(prev, next);
3297	4493	rseq_preempt(prev);
3298	4494	fire_sched_out_preempt_notifiers(prev, next);
	4495	+ kmap_local_sched_out();
3299	4496	prepare_task(next);
3300	4497	prepare_arch_switch(next);
3301	4498	}
..	..	@@ -3362,6 +4559,7 @@
3362	4559	finish_lock_switch(rq);
3363	4560	finish_arch_post_lock_switch();
3364	4561	kcov_finish_switch(current);
	4562	+ kmap_local_sched_in();
3365	4563
3366	4564	fire_sched_in_preempt_notifiers(current);
3367	4565	/*
..	..	@@ -3388,49 +4586,12 @@
3388	4586	if (prev->sched_class->task_dead)
3389	4587	prev->sched_class->task_dead(prev);
3390	4588
3391		- put_task_struct(prev);
	4589	+ put_task_struct_rcu_user(prev);
3392	4590	}
3393	4591
3394	4592	tick_nohz_task_switch();
3395	4593	return rq;
3396	4594	}
3397		-
3398		-#ifdef CONFIG_SMP
3399		-
3400		-/* rq->lock is NOT held, but preemption is disabled */
3401		-static void __balance_callback(struct rq *rq)
3402		-{
3403		- struct callback_head head, next;
3404		- void (func)(struct rq rq);
3405		- unsigned long flags;
3406		-
3407		- raw_spin_lock_irqsave(&rq->lock, flags);
3408		- head = rq->balance_callback;
3409		- rq->balance_callback = NULL;
3410		- while (head) {
3411		- func = (void ()(struct rq ))head->func;
3412		- next = head->next;
3413		- head->next = NULL;
3414		- head = next;
3415		-
3416		- func(rq);
3417		- }
3418		- raw_spin_unlock_irqrestore(&rq->lock, flags);
3419		-}
3420		-
3421		-static inline void balance_callback(struct rq *rq)
3422		-{
3423		- if (unlikely(rq->balance_callback))
3424		- __balance_callback(rq);
3425		-}
3426		-
3427		-#else
3428		-
3429		-static inline void balance_callback(struct rq *rq)
3430		-{
3431		-}
3432		-
3433		-#endif
3434	4595
3435	4596	/**
3436	4597	* schedule_tail - first thing a freshly forked thread must call.
..	..	@@ -3451,7 +4612,6 @@
3451	4612	*/
3452	4613
3453	4614	rq = finish_task_switch(prev);
3454		- balance_callback(rq);
3455	4615	preempt_enable();
3456	4616
3457	4617	if (current->set_child_tid)
..	..	@@ -3467,12 +4627,8 @@
3467	4627	context_switch(struct rq rq, struct task_struct prev,
3468	4628	struct task_struct next, struct rq_flags rf)
3469	4629	{
3470		- struct mm_struct mm, oldmm;
3471		-
3472	4630	prepare_task_switch(rq, prev, next);
3473	4631
3474		- mm = next->mm;
3475		- oldmm = prev->active_mm;
3476	4632	/*
3477	4633	* For paravirt, this is coupled with an exit in switch_to to
3478	4634	* combine the page table reload and the switch backend into
..	..	@@ -3481,22 +4637,37 @@
3481	4637	arch_start_context_switch(prev);
3482	4638
3483	4639	/*
3484		- * If mm is non-NULL, we pass through switch_mm(). If mm is
3485		- * NULL, we will pass through mmdrop() in finish_task_switch().
3486		- * Both of these contain the full memory barrier required by
3487		- * membarrier after storing to rq->curr, before returning to
3488		- * user-space.
	4640	+ * kernel -> kernel lazy + transfer active
	4641	+ * user -> kernel lazy + mmgrab() active
	4642	+ *
	4643	+ * kernel -> user switch + mmdrop() active
	4644	+ * user -> user switch
3489	4645	*/
3490		- if (!mm) {
3491		- next->active_mm = oldmm;
3492		- mmgrab(oldmm);
3493		- enter_lazy_tlb(oldmm, next);
3494		- } else
3495		- switch_mm_irqs_off(oldmm, mm, next);
	4646	+ if (!next->mm) { // to kernel
	4647	+ enter_lazy_tlb(prev->active_mm, next);
3496	4648
3497		- if (!prev->mm) {
3498		- prev->active_mm = NULL;
3499		- rq->prev_mm = oldmm;
	4649	+ next->active_mm = prev->active_mm;
	4650	+ if (prev->mm) // from user
	4651	+ mmgrab(prev->active_mm);
	4652	+ else
	4653	+ prev->active_mm = NULL;
	4654	+ } else { // to user
	4655	+ membarrier_switch_mm(rq, prev->active_mm, next->mm);
	4656	+ /*
	4657	+ * sys_membarrier() requires an smp_mb() between setting
	4658	+ * rq->curr / membarrier_switch_mm() and returning to userspace.
	4659	+ *
	4660	+ * The below provides this either through switch_mm(), or in
	4661	+ * case 'prev->active_mm == next->mm' through
	4662	+ * finish_task_switch()'s mmdrop().
	4663	+ */
	4664	+ switch_mm_irqs_off(prev->active_mm, next->mm, next);
	4665	+
	4666	+ if (!prev->mm) { // from kernel
	4667	+ /* will mmdrop() in finish_task_switch(). */
	4668	+ rq->prev_mm = prev->active_mm;
	4669	+ prev->active_mm = NULL;
	4670	+ }
3500	4671	}
3501	4672
3502	4673	rq->clock_update_flags &= ~(RQCF_ACT_SKIP\|RQCF_REQ_SKIP);
..	..	@@ -3533,7 +4704,7 @@
3533	4704	* preemption, thus the result might have a time-of-check-to-time-of-use
3534	4705	* race. The caller is responsible to use it correctly, for example:
3535	4706	*
3536		- * - from a non-preemptable section (of course)
	4707	+ * - from a non-preemptible section (of course)
3537	4708	*
3538	4709	* - from a thread that is bound to a single CPU
3539	4710	*
..	..	@@ -3554,6 +4725,18 @@
3554	4725	sum += cpu_rq(i)->nr_switches;
3555	4726
3556	4727	return sum;
	4728	+}
	4729	+
	4730	+/*
	4731	+ * Consumers of these two interfaces, like for example the cpuidle menu
	4732	+ * governor, are using nonsensical data. Preferring shallow idle state selection
	4733	+ * for a CPU that has IO-wait which might not even end up running the task when
	4734	+ * it does become runnable.
	4735	+ */
	4736	+
	4737	+unsigned long nr_iowait_cpu(int cpu)
	4738	+{
	4739	+ return atomic_read(&cpu_rq(cpu)->nr_iowait);
3557	4740	}
3558	4741
3559	4742	/*
..	..	@@ -3591,29 +4774,9 @@
3591	4774	unsigned long i, sum = 0;
3592	4775
3593	4776	for_each_possible_cpu(i)
3594		- sum += atomic_read(&cpu_rq(i)->nr_iowait);
	4777	+ sum += nr_iowait_cpu(i);
3595	4778
3596	4779	return sum;
3597		-}
3598		-
3599		-/*
3600		- * Consumers of these two interfaces, like for example the cpufreq menu
3601		- * governor are using nonsensical data. Boosting frequency for a CPU that has
3602		- * IO-wait which might not even end up running the task when it does become
3603		- * runnable.
3604		- */
3605		-
3606		-unsigned long nr_iowait_cpu(int cpu)
3607		-{
3608		- struct rq *this = cpu_rq(cpu);
3609		- return atomic_read(&this->nr_iowait);
3610		-}
3611		-
3612		-void get_iowait_load(unsigned long nr_waiters, unsigned long load)
3613		-{
3614		- struct rq *rq = this_rq();
3615		- *nr_waiters = atomic_read(&rq->nr_iowait);
3616		- *load = rq->load.weight;
3617	4780	}
3618	4781
3619	4782	#ifdef CONFIG_SMP
..	..	@@ -3627,9 +4790,14 @@
3627	4790	struct task_struct *p = current;
3628	4791	unsigned long flags;
3629	4792	int dest_cpu;
	4793	+ bool cond = false;
	4794	+
	4795	+ trace_android_rvh_sched_exec(&cond);
	4796	+ if (cond)
	4797	+ return;
3630	4798
3631	4799	raw_spin_lock_irqsave(&p->pi_lock, flags);
3632		- dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0, 1);
	4800	+ dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0);
3633	4801	if (dest_cpu == smp_processor_id())
3634	4802	goto unlock;
3635	4803
..	..	@@ -3712,6 +4880,7 @@
3712	4880
3713	4881	return ns;
3714	4882	}
	4883	+EXPORT_SYMBOL_GPL(task_sched_runtime);
3715	4884
3716	4885	/*
3717	4886	* This function gets called by the timer code, with HZ frequency.
..	..	@@ -3723,14 +4892,18 @@
3723	4892	struct rq *rq = cpu_rq(cpu);
3724	4893	struct task_struct *curr = rq->curr;
3725	4894	struct rq_flags rf;
	4895	+ unsigned long thermal_pressure;
3726	4896
	4897	+ arch_scale_freq_tick();
3727	4898	sched_clock_tick();
3728	4899
3729	4900	rq_lock(rq, &rf);
3730	4901
	4902	+ trace_android_rvh_tick_entry(rq);
3731	4903	update_rq_clock(rq);
	4904	+ thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq));
	4905	+ update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure);
3732	4906	curr->sched_class->task_tick(rq, curr, 0);
3733		- cpu_load_update_active(rq);
3734	4907	calc_global_load_tick(rq);
3735	4908	psi_task_tick(rq);
3736	4909
..	..	@@ -3742,6 +4915,8 @@
3742	4915	rq->idle_balance = idle_cpu(cpu);
3743	4916	trigger_load_balance(rq);
3744	4917	#endif
	4918	+
	4919	+ trace_android_vh_scheduler_tick(rq);
3745	4920	}
3746	4921
3747	4922	#ifdef CONFIG_NO_HZ_FULL
..	..	@@ -3799,28 +4974,31 @@
3799	4974	* statistics and checks timeslices in a time-independent way, regardless
3800	4975	* of when exactly it is running.
3801	4976	*/
3802		- if (idle_cpu(cpu) \|\| !tick_nohz_tick_stopped_cpu(cpu))
	4977	+ if (!tick_nohz_tick_stopped_cpu(cpu))
3803	4978	goto out_requeue;
3804	4979
3805	4980	rq_lock_irq(rq, &rf);
3806	4981	curr = rq->curr;
3807		- if (is_idle_task(curr) \|\| cpu_is_offline(cpu))
	4982	+ if (cpu_is_offline(cpu))
3808	4983	goto out_unlock;
3809	4984
3810	4985	update_rq_clock(rq);
3811		- delta = rq_clock_task(rq) - curr->se.exec_start;
3812	4986
3813		- /*
3814		- * Make sure the next tick runs within a reasonable
3815		- * amount of time.
3816		- */
3817		- WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
	4987	+ if (!is_idle_task(curr)) {
	4988	+ /*
	4989	+ * Make sure the next tick runs within a reasonable
	4990	+ * amount of time.
	4991	+ */
	4992	+ delta = rq_clock_task(rq) - curr->se.exec_start;
	4993	+ WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
	4994	+ }
3818	4995	curr->sched_class->task_tick(rq, curr, 0);
3819	4996
	4997	+ calc_load_nohz_remote(rq);
3820	4998	out_unlock:
3821	4999	rq_unlock_irq(rq, &rf);
3822		-
3823	5000	out_requeue:
	5001	+
3824	5002	/*
3825	5003	* Run the remote tick once per second (1Hz). This arbitrary
3826	5004	* frequency is large enough to avoid overload but short enough
..	..	@@ -3884,7 +5062,7 @@
3884	5062	static inline void sched_tick_stop(int cpu) { }
3885	5063	#endif
3886	5064
3887		-#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) \|\| \
	5065	+#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) \|\| \
3888	5066	defined(CONFIG_TRACE_PREEMPT_TOGGLE))
3889	5067	/*
3890	5068	* If the value passed in is equal to the current preempt count
..	..	@@ -3990,11 +5168,12 @@
3990	5168	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
3991	5169	&& in_atomic_preempt_off()) {
3992	5170	pr_err("Preemption disabled at:");
3993		- print_ip_sym(preempt_disable_ip);
3994		- pr_cont("\n");
	5171	+ print_ip_sym(KERN_ERR, preempt_disable_ip);
3995	5172	}
3996	5173	if (panic_on_warn)
3997	5174	panic("scheduling while atomic\n");
	5175	+
	5176	+ trace_android_rvh_schedule_bug(prev);
3998	5177
3999	5178	dump_stack();
4000	5179	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
..	..	@@ -4003,11 +5182,23 @@
4003	5182	/*
4004	5183	* Various schedule()-time debugging checks and statistics:
4005	5184	*/
4006		-static inline void schedule_debug(struct task_struct *prev)
	5185	+static inline void schedule_debug(struct task_struct *prev, bool preempt)
4007	5186	{
4008	5187	#ifdef CONFIG_SCHED_STACK_END_CHECK
4009	5188	if (task_stack_end_corrupted(prev))
4010	5189	panic("corrupted stack end detected inside scheduler\n");
	5190	+
	5191	+ if (task_scs_end_corrupted(prev))
	5192	+ panic("corrupted shadow stack detected inside scheduler\n");
	5193	+#endif
	5194	+
	5195	+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
	5196	+ if (!preempt && prev->state && prev->non_block_count) {
	5197	+ printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
	5198	+ prev->comm, prev->pid, prev->non_block_count);
	5199	+ dump_stack();
	5200	+ add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
	5201	+ }
4011	5202	#endif
4012	5203
4013	5204	if (unlikely(in_atomic_preempt_off())) {
..	..	@@ -4019,6 +5210,28 @@
4019	5210	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
4020	5211
4021	5212	schedstat_inc(this_rq()->sched_count);
	5213	+}
	5214	+
	5215	+static void put_prev_task_balance(struct rq rq, struct task_struct prev,
	5216	+ struct rq_flags *rf)
	5217	+{
	5218	+#ifdef CONFIG_SMP
	5219	+ const struct sched_class *class;
	5220	+ /*
	5221	+ * We must do the balancing pass before put_prev_task(), such
	5222	+ * that when we release the rq->lock the task is in the same
	5223	+ * state as before we took rq->lock.
	5224	+ *
	5225	+ * We can terminate the balance pass as soon as we know there is
	5226	+ * a runnable task of @class priority or higher.
	5227	+ */
	5228	+ for_class_range(class, prev->sched_class, &idle_sched_class) {
	5229	+ if (class->balance(rq, prev, rf))
	5230	+ break;
	5231	+ }
	5232	+#endif
	5233	+
	5234	+ put_prev_task(rq, prev);
4022	5235	}
4023	5236
4024	5237	/*
..	..	@@ -4036,36 +5249,34 @@
4036	5249	* higher scheduling class, because otherwise those loose the
4037	5250	* opportunity to pull in more work from other CPUs.
4038	5251	*/
4039		- if (likely((prev->sched_class == &idle_sched_class \|\|
4040		- prev->sched_class == &fair_sched_class) &&
	5252	+ if (likely(prev->sched_class <= &fair_sched_class &&
4041	5253	rq->nr_running == rq->cfs.h_nr_running)) {
4042	5254
4043		- p = fair_sched_class.pick_next_task(rq, prev, rf);
	5255	+ p = pick_next_task_fair(rq, prev, rf);
4044	5256	if (unlikely(p == RETRY_TASK))
4045		- goto again;
	5257	+ goto restart;
4046	5258
4047	5259	/* Assumes fair_sched_class->next == idle_sched_class */
4048		- if (unlikely(!p))
4049		- p = idle_sched_class.pick_next_task(rq, prev, rf);
	5260	+ if (!p) {
	5261	+ put_prev_task(rq, prev);
	5262	+ p = pick_next_task_idle(rq);
	5263	+ }
4050	5264
4051	5265	return p;
4052	5266	}
4053	5267
4054		-again:
	5268	+restart:
	5269	+ put_prev_task_balance(rq, prev, rf);
	5270	+
4055	5271	for_each_class(class) {
4056		- p = class->pick_next_task(rq, prev, rf);
4057		- if (p) {
4058		- if (unlikely(p == RETRY_TASK))
4059		- goto again;
	5272	+ p = class->pick_next_task(rq);
	5273	+ if (p)
4060	5274	return p;
4061		- }
4062	5275	}
4063	5276
4064	5277	/* The idle class should always have a runnable task: */
4065	5278	BUG();
4066	5279	}
4067		-
4068		-static void migrate_disabled_sched(struct task_struct *p);
4069	5280
4070	5281	/*
4071	5282	* __schedule() is the main scheduler function.
..	..	@@ -4087,7 +5298,7 @@
4087	5298	* task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
4088	5299	* called on the nearest possible occasion:
4089	5300	*
4090		- * - If the kernel is preemptible (CONFIG_PREEMPT=y):
	5301	+ * - If the kernel is preemptible (CONFIG_PREEMPTION=y):
4091	5302	*
4092	5303	* - in syscall or exception context, at the next outmost
4093	5304	* preempt_enable(). (this might be as soon as the wake_up()'s
..	..	@@ -4096,7 +5307,7 @@
4096	5307	* - in IRQ context, return from interrupt-handler to
4097	5308	* preemptible context
4098	5309	*
4099		- * - If the kernel is not preemptible (CONFIG_PREEMPT is not set)
	5310	+ * - If the kernel is not preemptible (CONFIG_PREEMPTION is not set)
4100	5311	* then at the next:
4101	5312	*
4102	5313	* - cond_resched() call
..	..	@@ -4106,10 +5317,11 @@
4106	5317	*
4107	5318	* WARNING: must be called with preemption disabled!
4108	5319	*/
4109		-static void __sched notrace __schedule(bool preempt)
	5320	+static void __sched notrace __schedule(bool preempt, bool spinning_lock)
4110	5321	{
4111	5322	struct task_struct prev, next;
4112	5323	unsigned long *switch_count;
	5324	+ unsigned long prev_state;
4113	5325	struct rq_flags rf;
4114	5326	struct rq *rq;
4115	5327	int cpu;
..	..	@@ -4118,7 +5330,7 @@
4118	5330	rq = cpu_rq(cpu);
4119	5331	prev = rq->curr;
4120	5332
4121		- schedule_debug(prev);
	5333	+ schedule_debug(prev, preempt);
4122	5334
4123	5335	if (sched_feat(HRTICK))
4124	5336	hrtick_clear(rq);
..	..	@@ -4129,28 +5341,59 @@
4129	5341	/*
4130	5342	* Make sure that signal_pending_state()->signal_pending() below
4131	5343	* can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
4132		- * done by the caller to avoid the race with signal_wake_up().
	5344	+ * done by the caller to avoid the race with signal_wake_up():
4133	5345	*
4134		- * The membarrier system call requires a full memory barrier
	5346	+ * __set_current_state(@state) signal_wake_up()
	5347	+ * schedule() set_tsk_thread_flag(p, TIF_SIGPENDING)
	5348	+ * wake_up_state(p, state)
	5349	+ * LOCK rq->lock LOCK p->pi_state
	5350	+ * smp_mb__after_spinlock() smp_mb__after_spinlock()
	5351	+ * if (signal_pending_state()) if (p->state & @state)
	5352	+ *
	5353	+ * Also, the membarrier system call requires a full memory barrier
4135	5354	* after coming from user-space, before storing to rq->curr.
4136	5355	*/
4137	5356	rq_lock(rq, &rf);
4138	5357	smp_mb__after_spinlock();
4139		-
4140		- if (__migrate_disabled(prev))
4141		- migrate_disabled_sched(prev);
4142	5358
4143	5359	/* Promote REQ to ACT */
4144	5360	rq->clock_update_flags <<= 1;
4145	5361	update_rq_clock(rq);
4146	5362
4147	5363	switch_count = &prev->nivcsw;
4148		- if (!preempt && prev->state) {
4149		- if (unlikely(signal_pending_state(prev->state, prev))) {
	5364	+
	5365	+ /*
	5366	+ * We must load prev->state once (task_struct::state is volatile), such
	5367	+ * that:
	5368	+ *
	5369	+ * - we form a control dependency vs deactivate_task() below.
	5370	+ * - ptrace_{,un}freeze_traced() can change ->state underneath us.
	5371	+ */
	5372	+ prev_state = prev->state;
	5373	+ if ((!preempt \|\| spinning_lock) && prev_state) {
	5374	+ if (signal_pending_state(prev_state, prev)) {
4150	5375	prev->state = TASK_RUNNING;
4151	5376	} else {
	5377	+ prev->sched_contributes_to_load =
	5378	+ (prev_state & TASK_UNINTERRUPTIBLE) &&
	5379	+ !(prev_state & TASK_NOLOAD) &&
	5380	+ !(prev->flags & PF_FROZEN);
	5381	+
	5382	+ if (prev->sched_contributes_to_load)
	5383	+ rq->nr_uninterruptible++;
	5384	+
	5385	+ /*
	5386	+ * __schedule() ttwu()
	5387	+ * prev_state = prev->state; if (p->on_rq && ...)
	5388	+ * if (prev_state) goto out;
	5389	+ * p->on_rq = 0; smp_acquire__after_ctrl_dep();
	5390	+ * p->state = TASK_WAKING
	5391	+ *
	5392	+ * Where __schedule() and ttwu() have matching control dependencies.
	5393	+ *
	5394	+ * After this, schedule() must not care about p->state any more.
	5395	+ */
4152	5396	deactivate_task(rq, prev, DEQUEUE_SLEEP \| DEQUEUE_NOCLOCK);
4153		- prev->on_rq = 0;
4154	5397
4155	5398	if (prev->in_iowait) {
4156	5399	atomic_inc(&rq->nr_iowait);
..	..	@@ -4165,9 +5408,14 @@
4165	5408	clear_tsk_need_resched_lazy(prev);
4166	5409	clear_preempt_need_resched();
4167	5410
	5411	+ trace_android_rvh_schedule(prev, next, rq);
4168	5412	if (likely(prev != next)) {
4169	5413	rq->nr_switches++;
4170		- rq->curr = next;
	5414	+ /*
	5415	+ * RCU users of rcu_dereference(rq->curr) may not see
	5416	+ * changes to task_struct made by pick_next_task().
	5417	+ */
	5418	+ RCU_INIT_POINTER(rq->curr, next);
4171	5419	/*
4172	5420	* The membarrier system call requires each architecture
4173	5421	* to have a full memory barrier after updating
..	..	@@ -4184,16 +5432,20 @@
4184	5432	*/
4185	5433	++*switch_count;
4186	5434
	5435	+ migrate_disable_switch(rq, prev);
	5436	+ psi_sched_switch(prev, next, !task_on_rq_queued(prev));
	5437	+
4187	5438	trace_sched_switch(preempt, prev, next);
4188	5439
4189	5440	/* Also unlocks the rq: */
4190	5441	rq = context_switch(rq, prev, next, &rf);
4191	5442	} else {
4192	5443	rq->clock_update_flags &= ~(RQCF_ACT_SKIP\|RQCF_REQ_SKIP);
4193		- rq_unlock_irq(rq, &rf);
4194		- }
4195	5444
4196		- balance_callback(rq);
	5445	+ rq_unpin_lock(rq, &rf);
	5446	+ __balance_callbacks(rq);
	5447	+ raw_spin_unlock_irq(&rq->lock);
	5448	+ }
4197	5449	}
4198	5450
4199	5451	void __noreturn do_task_dead(void)
..	..	@@ -4204,7 +5456,7 @@
4204	5456	/* Tell freezer to ignore us: */
4205	5457	current->flags \|= PF_NOFREEZE;
4206	5458
4207		- __schedule(false);
	5459	+ __schedule(false, false);
4208	5460	BUG();
4209	5461
4210	5462	/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
..	..	@@ -4214,24 +5466,28 @@
4214	5466
4215	5467	static inline void sched_submit_work(struct task_struct *tsk)
4216	5468	{
	5469	+ unsigned int task_flags;
	5470	+
4217	5471	if (!tsk->state)
4218	5472	return;
4219	5473
	5474	+ task_flags = tsk->flags;
4220	5475	/*
4221	5476	* If a worker went to sleep, notify and ask workqueue whether
4222	5477	* it wants to wake up a task to maintain concurrency.
4223	5478	* As this function is called inside the schedule() context,
4224	5479	* we disable preemption to avoid it calling schedule() again
4225		- * in the possible wakeup of a kworker.
	5480	+ * in the possible wakeup of a kworker and because wq_worker_sleeping()
	5481	+ * requires it.
4226	5482	*/
4227		- if (tsk->flags & PF_WQ_WORKER) {
	5483	+ if (task_flags & (PF_WQ_WORKER \| PF_IO_WORKER)) {
4228	5484	preempt_disable();
4229		- wq_worker_sleeping(tsk);
	5485	+ if (task_flags & PF_WQ_WORKER)
	5486	+ wq_worker_sleeping(tsk);
	5487	+ else
	5488	+ io_wq_worker_sleeping(tsk);
4230	5489	preempt_enable_no_resched();
4231	5490	}
4232		-
4233		- if (tsk_is_pi_blocked(tsk))
4234		- return;
4235	5491
4236	5492	/*
4237	5493	* If we are going to sleep and we have plugged IO queued,
..	..	@@ -4243,8 +5499,12 @@
4243	5499
4244	5500	static void sched_update_worker(struct task_struct *tsk)
4245	5501	{
4246		- if (tsk->flags & PF_WQ_WORKER)
4247		- wq_worker_running(tsk);
	5502	+ if (tsk->flags & (PF_WQ_WORKER \| PF_IO_WORKER)) {
	5503	+ if (tsk->flags & PF_WQ_WORKER)
	5504	+ wq_worker_running(tsk);
	5505	+ else
	5506	+ io_wq_worker_running(tsk);
	5507	+ }
4248	5508	}
4249	5509
4250	5510	asmlinkage __visible void __sched schedule(void)
..	..	@@ -4254,7 +5514,7 @@
4254	5514	sched_submit_work(tsk);
4255	5515	do {
4256	5516	preempt_disable();
4257		- __schedule(false);
	5517	+ __schedule(false, false);
4258	5518	sched_preempt_enable_no_resched();
4259	5519	} while (need_resched());
4260	5520	sched_update_worker(tsk);
..	..	@@ -4282,7 +5542,7 @@
4282	5542	*/
4283	5543	WARN_ON_ONCE(current->state);
4284	5544	do {
4285		- __schedule(false);
	5545	+ __schedule(false, false);
4286	5546	} while (need_resched());
4287	5547	}
4288	5548
..	..	@@ -4335,7 +5595,7 @@
4335	5595	*/
4336	5596	preempt_disable_notrace();
4337	5597	preempt_latency_start(1);
4338		- __schedule(true);
	5598	+ __schedule(true, false);
4339	5599	preempt_latency_stop(1);
4340	5600	preempt_enable_no_resched_notrace();
4341	5601
..	..	@@ -4370,11 +5630,10 @@
4370	5630
4371	5631	#endif
4372	5632
4373		-#ifdef CONFIG_PREEMPT
	5633	+#ifdef CONFIG_PREEMPTION
4374	5634	/*
4375		- * this is the entry point to schedule() from in-kernel preemption
4376		- * off of preempt_enable. Kernel preemptions off return from interrupt
4377		- * occur there and call schedule directly.
	5635	+ * This is the entry point to schedule() from in-kernel preemption
	5636	+ * off of preempt_enable.
4378	5637	*/
4379	5638	asmlinkage __visible void __sched notrace preempt_schedule(void)
4380	5639	{
..	..	@@ -4390,6 +5649,19 @@
4390	5649	}
4391	5650	NOKPROBE_SYMBOL(preempt_schedule);
4392	5651	EXPORT_SYMBOL(preempt_schedule);
	5652	+
	5653	+#ifdef CONFIG_PREEMPT_RT
	5654	+void __sched notrace preempt_schedule_lock(void)
	5655	+{
	5656	+ do {
	5657	+ preempt_disable();
	5658	+ __schedule(true, true);
	5659	+ sched_preempt_enable_no_resched();
	5660	+ } while (need_resched());
	5661	+}
	5662	+NOKPROBE_SYMBOL(preempt_schedule_lock);
	5663	+EXPORT_SYMBOL(preempt_schedule_lock);
	5664	+#endif
4393	5665
4394	5666	/**
4395	5667	* preempt_schedule_notrace - preempt_schedule called by tracing
..	..	@@ -4437,7 +5709,7 @@
4437	5709	* an infinite recursion.
4438	5710	*/
4439	5711	prev_ctx = exception_enter();
4440		- __schedule(true);
	5712	+ __schedule(true, false);
4441	5713	exception_exit(prev_ctx);
4442	5714
4443	5715	preempt_latency_stop(1);
..	..	@@ -4446,10 +5718,10 @@
4446	5718	}
4447	5719	EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
4448	5720
4449		-#endif /* CONFIG_PREEMPT */
	5721	+#endif /* CONFIG_PREEMPTION */
4450	5722
4451	5723	/*
4452		- * this is the entry point to schedule() from kernel preemption
	5724	+ * This is the entry point to schedule() from kernel preemption
4453	5725	* off of irq context.
4454	5726	* Note, that this is called and return with irqs disabled. This will
4455	5727	* protect us against recursive calling from irq.
..	..	@@ -4466,7 +5738,7 @@
4466	5738	do {
4467	5739	preempt_disable();
4468	5740	local_irq_enable();
4469		- __schedule(true);
	5741	+ __schedule(true, false);
4470	5742	local_irq_disable();
4471	5743	sched_preempt_enable_no_resched();
4472	5744	} while (need_resched());
..	..	@@ -4477,9 +5749,22 @@
4477	5749	int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
4478	5750	void *key)
4479	5751	{
4480		- return try_to_wake_up(curr->private, mode, wake_flags, 1);
	5752	+ WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~(WF_SYNC \| WF_ANDROID_VENDOR));
	5753	+ return try_to_wake_up(curr->private, mode, wake_flags);
4481	5754	}
4482	5755	EXPORT_SYMBOL(default_wake_function);
	5756	+
	5757	+static void __setscheduler_prio(struct task_struct *p, int prio)
	5758	+{
	5759	+ if (dl_prio(prio))
	5760	+ p->sched_class = &dl_sched_class;
	5761	+ else if (rt_prio(prio))
	5762	+ p->sched_class = &rt_sched_class;
	5763	+ else
	5764	+ p->sched_class = &fair_sched_class;
	5765	+
	5766	+ p->prio = prio;
	5767	+}
4483	5768
4484	5769	#ifdef CONFIG_RT_MUTEXES
4485	5770
..	..	@@ -4517,6 +5802,7 @@
4517	5802	struct rq_flags rf;
4518	5803	struct rq *rq;
4519	5804
	5805	+ trace_android_rvh_rtmutex_prepare_setprio(p, pi_task);
4520	5806	/* XXX used to be waiter->prio, not waiter->task->prio */
4521	5807	prio = __rt_effective_prio(pi_task, p->normal_prio);
4522	5808
..	..	@@ -4591,39 +5877,39 @@
4591	5877	if (!dl_prio(p->normal_prio) \|\|
4592	5878	(pi_task && dl_prio(pi_task->prio) &&
4593	5879	dl_entity_preempt(&pi_task->dl, &p->dl))) {
4594		- p->dl.dl_boosted = 1;
	5880	+ p->dl.pi_se = pi_task->dl.pi_se;
4595	5881	queue_flag \|= ENQUEUE_REPLENISH;
4596		- } else
4597		- p->dl.dl_boosted = 0;
4598		- p->sched_class = &dl_sched_class;
	5882	+ } else {
	5883	+ p->dl.pi_se = &p->dl;
	5884	+ }
4599	5885	} else if (rt_prio(prio)) {
4600	5886	if (dl_prio(oldprio))
4601		- p->dl.dl_boosted = 0;
	5887	+ p->dl.pi_se = &p->dl;
4602	5888	if (oldprio < prio)
4603	5889	queue_flag \|= ENQUEUE_HEAD;
4604		- p->sched_class = &rt_sched_class;
4605	5890	} else {
4606	5891	if (dl_prio(oldprio))
4607		- p->dl.dl_boosted = 0;
	5892	+ p->dl.pi_se = &p->dl;
4608	5893	if (rt_prio(oldprio))
4609	5894	p->rt.timeout = 0;
4610		- p->sched_class = &fair_sched_class;
4611	5895	}
4612	5896
4613		- p->prio = prio;
	5897	+ __setscheduler_prio(p, prio);
4614	5898
4615	5899	if (queued)
4616	5900	enqueue_task(rq, p, queue_flag);
4617	5901	if (running)
4618		- set_curr_task(rq, p);
	5902	+ set_next_task(rq, p);
4619	5903
4620	5904	check_class_changed(rq, p, prev_class, oldprio);
4621	5905	out_unlock:
4622	5906	/* Avoid rq from going away on us: */
4623	5907	preempt_disable();
4624		- __task_rq_unlock(rq, &rf);
4625	5908
4626		- balance_callback(rq);
	5909	+ rq_unpin_lock(rq, &rf);
	5910	+ __balance_callbacks(rq);
	5911	+ raw_spin_unlock(&rq->lock);
	5912	+
4627	5913	preempt_enable();
4628	5914	}
4629	5915	#else
..	..	@@ -4635,12 +5921,13 @@
4635	5921
4636	5922	void set_user_nice(struct task_struct *p, long nice)
4637	5923	{
4638		- bool queued, running;
4639		- int old_prio, delta;
	5924	+ bool queued, running, allowed = false;
	5925	+ int old_prio;
4640	5926	struct rq_flags rf;
4641	5927	struct rq *rq;
4642	5928
4643		- if (task_nice(p) == nice \|\| nice < MIN_NICE \|\| nice > MAX_NICE)
	5929	+ trace_android_rvh_set_user_nice(p, &nice, &allowed);
	5930	+ if ((task_nice(p) == nice \|\| nice < MIN_NICE \|\| nice > MAX_NICE) && !allowed)
4644	5931	return;
4645	5932	/*
4646	5933	* We have to be careful, if called from sys_setpriority(),
..	..	@@ -4667,22 +5954,21 @@
4667	5954	put_prev_task(rq, p);
4668	5955
4669	5956	p->static_prio = NICE_TO_PRIO(nice);
4670		- set_load_weight(p, true);
	5957	+ set_load_weight(p);
4671	5958	old_prio = p->prio;
4672	5959	p->prio = effective_prio(p);
4673		- delta = p->prio - old_prio;
4674	5960
4675		- if (queued) {
	5961	+ if (queued)
4676	5962	enqueue_task(rq, p, ENQUEUE_RESTORE \| ENQUEUE_NOCLOCK);
4677		- /*
4678		- * If the task increased its priority or is running and
4679		- * lowered its priority, then reschedule its CPU:
4680		- */
4681		- if (delta < 0 \|\| (delta > 0 && task_running(rq, p)))
4682		- resched_curr(rq);
4683		- }
4684	5963	if (running)
4685		- set_curr_task(rq, p);
	5964	+ set_next_task(rq, p);
	5965	+
	5966	+ /*
	5967	+ * If the task increased its priority or is running and
	5968	+ * lowered its priority, then reschedule its CPU:
	5969	+ */
	5970	+ p->sched_class->prio_changed(rq, p, old_prio);
	5971	+
4686	5972	out_unlock:
4687	5973	task_rq_unlock(rq, p, &rf);
4688	5974	}
..	..	@@ -4767,7 +6053,7 @@
4767	6053	return 0;
4768	6054
4769	6055	#ifdef CONFIG_SMP
4770		- if (!llist_empty(&rq->wake_list))
	6056	+ if (rq->ttwu_pending)
4771	6057	return 0;
4772	6058	#endif
4773	6059
..	..	@@ -4790,6 +6076,7 @@
4790	6076
4791	6077	return 1;
4792	6078	}
	6079	+EXPORT_SYMBOL_GPL(available_idle_cpu);
4793	6080
4794	6081	/**
4795	6082	* idle_task - return the idle task for a given CPU.
..	..	@@ -4841,36 +6128,7 @@
4841	6128	*/
4842	6129	p->rt_priority = attr->sched_priority;
4843	6130	p->normal_prio = normal_prio(p);
4844		- set_load_weight(p, true);
4845		-}
4846		-
4847		-/* Actually do priority change: must hold pi & rq lock. */
4848		-static void __setscheduler(struct rq rq, struct task_struct p,
4849		- const struct sched_attr *attr, bool keep_boost)
4850		-{
4851		- /*
4852		- * If params can't change scheduling class changes aren't allowed
4853		- * either.
4854		- */
4855		- if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)
4856		- return;
4857		-
4858		- __setscheduler_params(p, attr);
4859		-
4860		- /*
4861		- * Keep a potential priority boosting if called from
4862		- * sched_setscheduler().
4863		- */
4864		- p->prio = normal_prio(p);
4865		- if (keep_boost)
4866		- p->prio = rt_effective_prio(p, p->prio);
4867		-
4868		- if (dl_prio(p->prio))
4869		- p->sched_class = &dl_sched_class;
4870		- else if (rt_prio(p->prio))
4871		- p->sched_class = &rt_sched_class;
4872		- else
4873		- p->sched_class = &fair_sched_class;
	6131	+ set_load_weight(p);
4874	6132	}
4875	6133
4876	6134	/*
..	..	@@ -4893,11 +6151,10 @@
4893	6151	const struct sched_attr *attr,
4894	6152	bool user, bool pi)
4895	6153	{
4896		- int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
4897		- MAX_RT_PRIO - 1 - attr->sched_priority;
4898		- int retval, oldprio, oldpolicy = -1, queued, running;
4899		- int new_effective_prio, policy = attr->sched_policy;
	6154	+ int oldpolicy = -1, policy = attr->sched_policy;
	6155	+ int retval, oldprio, newprio, queued, running;
4900	6156	const struct sched_class *prev_class;
	6157	+ struct callback_head *head;
4901	6158	struct rq_flags rf;
4902	6159	int reset_on_fork;
4903	6160	int queue_flags = DEQUEUE_SAVE \| DEQUEUE_MOVE \| DEQUEUE_NOCLOCK;
..	..	@@ -4969,7 +6226,7 @@
4969	6226	* Treat SCHED_IDLE as nice 20. Only allow a switch to
4970	6227	* SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
4971	6228	*/
4972		- if (idle_policy(p->policy) && !idle_policy(policy)) {
	6229	+ if (task_has_idle_policy(p) && !idle_policy(policy)) {
4973	6230	if (!can_nice(p, task_nice(p)))
4974	6231	return -EPERM;
4975	6232	}
..	..	@@ -4980,6 +6237,10 @@
4980	6237
4981	6238	/* Normal users shall not reset the sched_reset_on_fork flag: */
4982	6239	if (p->sched_reset_on_fork && !reset_on_fork)
	6240	+ return -EPERM;
	6241	+
	6242	+ /* Can't change util-clamps */
	6243	+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
4983	6244	return -EPERM;
4984	6245	}
4985	6246
..	..	@@ -5013,8 +6274,8 @@
5013	6274	* Changing the policy of the stop threads its a very bad idea:
5014	6275	*/
5015	6276	if (p == rq->stop) {
5016		- task_rq_unlock(rq, p, &rf);
5017		- return -EINVAL;
	6277	+ retval = -EINVAL;
	6278	+ goto unlock;
5018	6279	}
5019	6280
5020	6281	/*
..	..	@@ -5032,8 +6293,8 @@
5032	6293	goto change;
5033	6294
5034	6295	p->sched_reset_on_fork = reset_on_fork;
5035		- task_rq_unlock(rq, p, &rf);
5036		- return 0;
	6296	+ retval = 0;
	6297	+ goto unlock;
5037	6298	}
5038	6299	change:
5039	6300
..	..	@@ -5046,8 +6307,8 @@
5046	6307	if (rt_bandwidth_enabled() && rt_policy(policy) &&
5047	6308	task_group(p)->rt_bandwidth.rt_runtime == 0 &&
5048	6309	!task_group_is_autogroup(task_group(p))) {
5049		- task_rq_unlock(rq, p, &rf);
5050		- return -EPERM;
	6310	+ retval = -EPERM;
	6311	+ goto unlock;
5051	6312	}
5052	6313	#endif
5053	6314	#ifdef CONFIG_SMP
..	..	@@ -5062,8 +6323,8 @@
5062	6323	*/
5063	6324	if (!cpumask_subset(span, p->cpus_ptr) \|\|
5064	6325	rq->rd->dl_bw.bw == 0) {
5065		- task_rq_unlock(rq, p, &rf);
5066		- return -EPERM;
	6326	+ retval = -EPERM;
	6327	+ goto unlock;
5067	6328	}
5068	6329	}
5069	6330	#endif
..	..	@@ -5082,13 +6343,14 @@
5082	6343	* is available.
5083	6344	*/
5084	6345	if ((dl_policy(policy) \|\| dl_task(p)) && sched_dl_overflow(p, policy, attr)) {
5085		- task_rq_unlock(rq, p, &rf);
5086		- return -EBUSY;
	6346	+ retval = -EBUSY;
	6347	+ goto unlock;
5087	6348	}
5088	6349
5089	6350	p->sched_reset_on_fork = reset_on_fork;
5090	6351	oldprio = p->prio;
5091	6352
	6353	+ newprio = __normal_prio(policy, attr->sched_priority, attr->sched_nice);
5092	6354	if (pi) {
5093	6355	/*
5094	6356	* Take priority boosted tasks into account. If the new
..	..	@@ -5097,8 +6359,8 @@
5097	6359	* the runqueue. This will be done when the task deboost
5098	6360	* itself.
5099	6361	*/
5100		- new_effective_prio = rt_effective_prio(p, newprio);
5101		- if (new_effective_prio == oldprio)
	6362	+ newprio = rt_effective_prio(p, newprio);
	6363	+ if (newprio == oldprio)
5102	6364	queue_flags &= ~DEQUEUE_MOVE;
5103	6365	}
5104	6366
..	..	@@ -5111,7 +6373,11 @@
5111	6373
5112	6374	prev_class = p->sched_class;
5113	6375
5114		- __setscheduler(rq, p, attr, pi);
	6376	+ if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
	6377	+ __setscheduler_params(p, attr);
	6378	+ __setscheduler_prio(p, newprio);
	6379	+ trace_android_rvh_setscheduler(p);
	6380	+ }
5115	6381	__setscheduler_uclamp(p, attr);
5116	6382
5117	6383	if (queued) {
..	..	@@ -5125,22 +6391,27 @@
5125	6391	enqueue_task(rq, p, queue_flags);
5126	6392	}
5127	6393	if (running)
5128		- set_curr_task(rq, p);
	6394	+ set_next_task(rq, p);
5129	6395
5130	6396	check_class_changed(rq, p, prev_class, oldprio);
5131	6397
5132	6398	/* Avoid rq from going away on us: */
5133	6399	preempt_disable();
	6400	+ head = splice_balance_callbacks(rq);
5134	6401	task_rq_unlock(rq, p, &rf);
5135	6402
5136	6403	if (pi)
5137	6404	rt_mutex_adjust_pi(p);
5138	6405
5139	6406	/* Run balance callbacks after we've adjusted the PI chain: */
5140		- balance_callback(rq);
	6407	+ balance_callbacks(rq, head);
5141	6408	preempt_enable();
5142	6409
5143	6410	return 0;
	6411	+
	6412	+unlock:
	6413	+ task_rq_unlock(rq, p, &rf);
	6414	+ return retval;
5144	6415	}
5145	6416
5146	6417	static int _sched_setscheduler(struct task_struct *p, int policy,
..	..	@@ -5152,6 +6423,14 @@
5152	6423	.sched_nice = PRIO_TO_NICE(p->static_prio),
5153	6424	};
5154	6425
	6426	+ if (IS_ENABLED(CONFIG_ROCKCHIP_OPTIMIZE_RT_PRIO) &&
	6427	+ ((policy == SCHED_FIFO) \|\| (policy == SCHED_RR))) {
	6428	+ attr.sched_priority /= 2;
	6429	+ if (!check)
	6430	+ attr.sched_priority += MAX_RT_PRIO / 2;
	6431	+ if (!attr.sched_priority)
	6432	+ attr.sched_priority = 1;
	6433	+ }
5155	6434	/* Fixup the legacy SCHED_RESET_ON_FORK hack. */
5156	6435	if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) {
5157	6436	attr.sched_flags \|= SCHED_FLAG_RESET_ON_FORK;
..	..	@@ -5166,6 +6445,8 @@
5166	6445	* @p: the task in question.
5167	6446	* @policy: new policy.
5168	6447	* @param: structure containing the new RT priority.
	6448	+ *
	6449	+ * Use sched_set_fifo(), read its comment.
5169	6450	*
5170	6451	* Return: 0 on success. An error code otherwise.
5171	6452	*
..	..	@@ -5188,6 +6469,7 @@
5188	6469	{
5189	6470	return __sched_setscheduler(p, attr, false, true);
5190	6471	}
	6472	+EXPORT_SYMBOL_GPL(sched_setattr_nocheck);
5191	6473
5192	6474	/**
5193	6475	* sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
..	..	@@ -5208,6 +6490,51 @@
5208	6490	return _sched_setscheduler(p, policy, param, false);
5209	6491	}
5210	6492	EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck);
	6493	+
	6494	+/*
	6495	+ * SCHED_FIFO is a broken scheduler model; that is, it is fundamentally
	6496	+ * incapable of resource management, which is the one thing an OS really should
	6497	+ * be doing.
	6498	+ *
	6499	+ * This is of course the reason it is limited to privileged users only.
	6500	+ *
	6501	+ * Worse still; it is fundamentally impossible to compose static priority
	6502	+ * workloads. You cannot take two correctly working static prio workloads
	6503	+ * and smash them together and still expect them to work.
	6504	+ *
	6505	+ * For this reason 'all' FIFO tasks the kernel creates are basically at:
	6506	+ *
	6507	+ * MAX_RT_PRIO / 2
	6508	+ *
	6509	+ * The administrator _MUST_ configure the system, the kernel simply doesn't
	6510	+ * know enough information to make a sensible choice.
	6511	+ */
	6512	+void sched_set_fifo(struct task_struct *p)
	6513	+{
	6514	+ struct sched_param sp = { .sched_priority = MAX_RT_PRIO / 2 };
	6515	+ WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0);
	6516	+}
	6517	+EXPORT_SYMBOL_GPL(sched_set_fifo);
	6518	+
	6519	+/*
	6520	+ * For when you don't much care about FIFO, but want to be above SCHED_NORMAL.
	6521	+ */
	6522	+void sched_set_fifo_low(struct task_struct *p)
	6523	+{
	6524	+ struct sched_param sp = { .sched_priority = 1 };
	6525	+ WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0);
	6526	+}
	6527	+EXPORT_SYMBOL_GPL(sched_set_fifo_low);
	6528	+
	6529	+void sched_set_normal(struct task_struct *p, int nice)
	6530	+{
	6531	+ struct sched_attr attr = {
	6532	+ .sched_policy = SCHED_NORMAL,
	6533	+ .sched_nice = nice,
	6534	+ };
	6535	+ WARN_ON_ONCE(sched_setattr_nocheck(p, &attr) != 0);
	6536	+}
	6537	+EXPORT_SYMBOL_GPL(sched_set_normal);
5211	6538
5212	6539	static int
5213	6540	do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
..	..	@@ -5239,9 +6566,6 @@
5239	6566	u32 size;
5240	6567	int ret;
5241	6568
5242		- if (!access_ok(VERIFY_WRITE, uattr, SCHED_ATTR_SIZE_VER0))
5243		- return -EFAULT;
5244		-
5245	6569	/* Zero the full structure, so that a short copy will be nice: */
5246	6570	memset(attr, 0, sizeof(*attr));
5247	6571
..	..	@@ -5249,44 +6573,18 @@
5249	6573	if (ret)
5250	6574	return ret;
5251	6575
5252		- /* Bail out on silly large: */
5253		- if (size > PAGE_SIZE)
5254		- goto err_size;
5255		-
5256	6576	/* ABI compatibility quirk: */
5257	6577	if (!size)
5258	6578	size = SCHED_ATTR_SIZE_VER0;
5259		-
5260		- if (size < SCHED_ATTR_SIZE_VER0)
	6579	+ if (size < SCHED_ATTR_SIZE_VER0 \|\| size > PAGE_SIZE)
5261	6580	goto err_size;
5262	6581
5263		- /*
5264		- * If we're handed a bigger struct than we know of,
5265		- * ensure all the unknown bits are 0 - i.e. new
5266		- * user-space does not rely on any kernel feature
5267		- * extensions we dont know about yet.
5268		- */
5269		- if (size > sizeof(*attr)) {
5270		- unsigned char __user *addr;
5271		- unsigned char __user *end;
5272		- unsigned char val;
5273		-
5274		- addr = (void __user )uattr + sizeof(attr);
5275		- end = (void __user *)uattr + size;
5276		-
5277		- for (; addr < end; addr++) {
5278		- ret = get_user(val, addr);
5279		- if (ret)
5280		- return ret;
5281		- if (val)
5282		- goto err_size;
5283		- }
5284		- size = sizeof(*attr);
	6582	+ ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
	6583	+ if (ret) {
	6584	+ if (ret == -E2BIG)
	6585	+ goto err_size;
	6586	+ return ret;
5285	6587	}
5286		-
5287		- ret = copy_from_user(attr, uattr, size);
5288		- if (ret)
5289		- return -EFAULT;
5290	6588
5291	6589	if ((attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) &&
5292	6590	size < SCHED_ATTR_SIZE_VER1)
..	..	@@ -5303,6 +6601,16 @@
5303	6601	err_size:
5304	6602	put_user(sizeof(*attr), &uattr->size);
5305	6603	return -E2BIG;
	6604	+}
	6605	+
	6606	+static void get_params(struct task_struct p, struct sched_attr attr)
	6607	+{
	6608	+ if (task_has_dl_policy(p))
	6609	+ __getparam_dl(p, attr);
	6610	+ else if (task_has_rt_policy(p))
	6611	+ attr->sched_priority = p->rt_priority;
	6612	+ else
	6613	+ attr->sched_nice = task_nice(p);
5306	6614	}
5307	6615
5308	6616	/**
..	..	@@ -5366,6 +6674,8 @@
5366	6674	rcu_read_unlock();
5367	6675
5368	6676	if (likely(p)) {
	6677	+ if (attr.sched_flags & SCHED_FLAG_KEEP_PARAMS)
	6678	+ get_params(p, &attr);
5369	6679	retval = sched_setattr(p, &attr);
5370	6680	put_task_struct(p);
5371	6681	}
..	..	@@ -5459,7 +6769,7 @@
5459	6769	{
5460	6770	unsigned int ksize = sizeof(*kattr);
5461	6771
5462		- if (!access_ok(VERIFY_WRITE, uattr, usize))
	6772	+ if (!access_ok(uattr, usize))
5463	6773	return -EFAULT;
5464	6774
5465	6775	/*
..	..	@@ -5487,7 +6797,7 @@
5487	6797	* sys_sched_getattr - similar to sched_getparam, but with sched_attr
5488	6798	* @pid: the pid in question.
5489	6799	* @uattr: structure containing the extended parameters.
5490		- * @usize: sizeof(attr) that user-space knows about, for forwards and backwards compatibility.
	6800	+ * @usize: sizeof(attr) for fwd/bwd comp.
5491	6801	* @flags: for future extension.
5492	6802	*/
5493	6803	SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
..	..	@@ -5514,14 +6824,15 @@
5514	6824	kattr.sched_policy = p->policy;
5515	6825	if (p->sched_reset_on_fork)
5516	6826	kattr.sched_flags \|= SCHED_FLAG_RESET_ON_FORK;
5517		- if (task_has_dl_policy(p))
5518		- __getparam_dl(p, &kattr);
5519		- else if (task_has_rt_policy(p))
5520		- kattr.sched_priority = p->rt_priority;
5521		- else
5522		- kattr.sched_nice = task_nice(p);
	6827	+ get_params(p, &kattr);
	6828	+ kattr.sched_flags &= SCHED_FLAG_ALL;
5523	6829
5524	6830	#ifdef CONFIG_UCLAMP_TASK
	6831	+ /*
	6832	+ * This could race with another potential updater, but this is fine
	6833	+ * because it'll correctly read the old or the new value. We don't need
	6834	+ * to guarantee who wins the race as long as it doesn't return garbage.
	6835	+ */
5525	6836	kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value;
5526	6837	kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
5527	6838	#endif
..	..	@@ -5540,6 +6851,7 @@
5540	6851	cpumask_var_t cpus_allowed, new_mask;
5541	6852	struct task_struct *p;
5542	6853	int retval;
	6854	+ int skip = 0;
5543	6855
5544	6856	rcu_read_lock();
5545	6857
..	..	@@ -5575,6 +6887,9 @@
5575	6887	rcu_read_unlock();
5576	6888	}
5577	6889
	6890	+ trace_android_vh_sched_setaffinity_early(p, in_mask, &skip);
	6891	+ if (skip)
	6892	+ goto out_free_new_mask;
5578	6893	retval = security_task_setscheduler(p);
5579	6894	if (retval)
5580	6895	goto out_free_new_mask;
..	..	@@ -5601,7 +6916,7 @@
5601	6916	}
5602	6917	#endif
5603	6918	again:
5604		- retval = __set_cpus_allowed_ptr(p, new_mask, true);
	6919	+ retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK);
5605	6920
5606	6921	if (!retval) {
5607	6922	cpuset_cpus_allowed(p, cpus_allowed);
..	..	@@ -5615,6 +6930,9 @@
5615	6930	goto again;
5616	6931	}
5617	6932	}
	6933	+
	6934	+ trace_android_rvh_sched_setaffinity(p, in_mask, &retval);
	6935	+
5618	6936	out_free_new_mask:
5619	6937	free_cpumask_var(new_mask);
5620	6938	out_free_cpus_allowed:
..	..	@@ -5623,7 +6941,6 @@
5623	6941	put_task_struct(p);
5624	6942	return retval;
5625	6943	}
5626		-EXPORT_SYMBOL_GPL(sched_setaffinity);
5627	6944
5628	6945	static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
5629	6946	struct cpumask *new_mask)
..	..	@@ -5742,6 +7059,8 @@
5742	7059	schedstat_inc(rq->yld_count);
5743	7060	current->sched_class->yield_task(rq);
5744	7061
	7062	+ trace_android_rvh_do_sched_yield(rq);
	7063	+
5745	7064	preempt_disable();
5746	7065	rq_unlock_irq(rq, &rf);
5747	7066	sched_preempt_enable_no_resched();
..	..	@@ -5755,7 +7074,7 @@
5755	7074	return 0;
5756	7075	}
5757	7076
5758		-#ifndef CONFIG_PREEMPT
	7077	+#ifndef CONFIG_PREEMPTION
5759	7078	int __sched _cond_resched(void)
5760	7079	{
5761	7080	if (should_resched(0)) {
..	..	@@ -5772,7 +7091,7 @@
5772	7091	* __cond_resched_lock() - if a reschedule is pending, drop the given lock,
5773	7092	* call schedule, and on return reacquire the lock.
5774	7093	*
5775		- * This works OK both with and without CONFIG_PREEMPT. We do strange low-level
	7094	+ * This works OK both with and without CONFIG_PREEMPTION. We do strange low-level
5776	7095	* operations here to prevent schedule() from being called twice (once via
5777	7096	* spin_unlock(), once by hand).
5778	7097	*/
..	..	@@ -5876,7 +7195,7 @@
5876	7195	if (task_running(p_rq, p) \|\| p->state)
5877	7196	goto out_unlock;
5878	7197
5879		- yielded = curr->sched_class->yield_to_task(rq, p, preempt);
	7198	+ yielded = curr->sched_class->yield_to_task(rq, p);
5880	7199	if (yielded) {
5881	7200	schedstat_inc(rq->yld_count);
5882	7201	/*
..	..	@@ -6042,7 +7361,7 @@
6042	7361	* an error code.
6043	7362	*/
6044	7363	SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
6045		- struct timespec __user *, interval)
	7364	+ struct __kernel_timespec __user *, interval)
6046	7365	{
6047	7366	struct timespec64 t;
6048	7367	int retval = sched_rr_get_interval(pid, &t);
..	..	@@ -6053,16 +7372,15 @@
6053	7372	return retval;
6054	7373	}
6055	7374
6056		-#ifdef CONFIG_COMPAT
6057		-COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval,
6058		- compat_pid_t, pid,
6059		- struct compat_timespec __user *, interval)
	7375	+#ifdef CONFIG_COMPAT_32BIT_TIME
	7376	+SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid,
	7377	+ struct old_timespec32 __user *, interval)
6060	7378	{
6061	7379	struct timespec64 t;
6062	7380	int retval = sched_rr_get_interval(pid, &t);
6063	7381
6064	7382	if (retval == 0)
6065		- retval = compat_put_timespec64(&t, interval);
	7383	+ retval = put_old_timespec32(&t, interval);
6066	7384	return retval;
6067	7385	}
6068	7386	#endif
..	..	@@ -6075,10 +7393,10 @@
6075	7393	if (!try_get_task_stack(p))
6076	7394	return;
6077	7395
6078		- printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p));
	7396	+ pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p));
6079	7397
6080	7398	if (p->state == TASK_RUNNING)
6081		- printk(KERN_CONT " running task ");
	7399	+ pr_cont(" running task ");
6082	7400	#ifdef CONFIG_DEBUG_STACK_USAGE
6083	7401	free = stack_not_used(p);
6084	7402	#endif
..	..	@@ -6087,12 +7405,13 @@
6087	7405	if (pid_alive(p))
6088	7406	ppid = task_pid_nr(rcu_dereference(p->real_parent));
6089	7407	rcu_read_unlock();
6090		- printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
6091		- task_pid_nr(p), ppid,
	7408	+ pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n",
	7409	+ free, task_pid_nr(p), ppid,
6092	7410	(unsigned long)task_thread_info(p)->flags);
6093	7411
6094	7412	print_worker_info(KERN_INFO, p);
6095		- show_stack(p, NULL);
	7413	+ trace_android_vh_sched_show_task(p);
	7414	+ show_stack(p, NULL, KERN_INFO);
6096	7415	put_task_stack(p);
6097	7416	}
6098	7417	EXPORT_SYMBOL_GPL(sched_show_task);
..	..	@@ -6123,13 +7442,6 @@
6123	7442	{
6124	7443	struct task_struct g, p;
6125	7444
6126		-#if BITS_PER_LONG == 32
6127		- printk(KERN_INFO
6128		- " task PC stack pid father\n");
6129		-#else
6130		- printk(KERN_INFO
6131		- " task PC stack pid father\n");
6132		-#endif
6133	7445	rcu_read_lock();
6134	7446	for_each_process_thread(g, p) {
6135	7447	/*
..	..	@@ -6165,7 +7477,7 @@
6165	7477	* NOTE: this function does not set the idle thread's NEED_RESCHED
6166	7478	* flag, to make booting more robust.
6167	7479	*/
6168		-void init_idle(struct task_struct *idle, int cpu)
	7480	+void __init init_idle(struct task_struct *idle, int cpu)
6169	7481	{
6170	7482	struct rq *rq = cpu_rq(cpu);
6171	7483	unsigned long flags;
..	..	@@ -6179,9 +7491,6 @@
6179	7491	idle->se.exec_start = sched_clock();
6180	7492	idle->flags \|= PF_IDLE;
6181	7493
6182		- scs_task_reset(idle);
6183		- kasan_unpoison_task_stack(idle);
6184		-
6185	7494	#ifdef CONFIG_SMP
6186	7495	/*
6187	7496	* Its possible that init_idle() gets called multiple times on a task,
..	..	@@ -6189,7 +7498,7 @@
6189	7498	*
6190	7499	* And since this is boot we can forgo the serialization.
6191	7500	*/
6192		- set_cpus_allowed_common(idle, cpumask_of(cpu));
	7501	+ set_cpus_allowed_common(idle, cpumask_of(cpu), 0);
6193	7502	#endif
6194	7503	/*
6195	7504	* We're having a chicken and egg problem, even though we are
..	..	@@ -6205,7 +7514,8 @@
6205	7514	__set_task_cpu(idle, cpu);
6206	7515	rcu_read_unlock();
6207	7516
6208		- rq->curr = rq->idle = idle;
	7517	+ rq->idle = idle;
	7518	+ rcu_assign_pointer(rq->curr, idle);
6209	7519	idle->on_rq = TASK_ON_RQ_QUEUED;
6210	7520	#ifdef CONFIG_SMP
6211	7521	idle->on_cpu = 1;
..	..	@@ -6245,7 +7555,7 @@
6245	7555	}
6246	7556
6247	7557	int task_can_attach(struct task_struct *p,
6248		- const struct cpumask *cs_cpus_allowed)
	7558	+ const struct cpumask *cs_effective_cpus)
6249	7559	{
6250	7560	int ret = 0;
6251	7561
..	..	@@ -6264,8 +7574,13 @@
6264	7574	}
6265	7575
6266	7576	if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
6267		- cs_cpus_allowed))
6268		- ret = dl_task_can_attach(p, cs_cpus_allowed);
	7577	+ cs_effective_cpus)) {
	7578	+ int cpu = cpumask_any_and(cpu_active_mask, cs_effective_cpus);
	7579	+
	7580	+ if (unlikely(cpu >= nr_cpu_ids))
	7581	+ return -EINVAL;
	7582	+ ret = dl_cpu_busy(cpu, p);
	7583	+ }
6269	7584
6270	7585	out:
6271	7586	return ret;
..	..	@@ -6316,7 +7631,7 @@
6316	7631	if (queued)
6317	7632	enqueue_task(rq, p, ENQUEUE_RESTORE \| ENQUEUE_NOCLOCK);
6318	7633	if (running)
6319		- set_curr_task(rq, p);
	7634	+ set_next_task(rq, p);
6320	7635	task_rq_unlock(rq, p, &rf);
6321	7636	}
6322	7637	#endif /* CONFIG_NUMA_BALANCING */
..	..	@@ -6342,125 +7657,163 @@
6342	7657	/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
6343	7658	}
6344	7659
6345		-/*
6346		- * Since this CPU is going 'away' for a while, fold any nr_active delta
6347		- * we might have. Assumes we're called after migrate_tasks() so that the
6348		- * nr_active count is stable. We need to take the teardown thread which
6349		- * is calling this into account, so we hand in adjust = 1 to the load
6350		- * calculation.
6351		- *
6352		- * Also see the comment "Global load-average calculations".
6353		- */
6354		-static void calc_load_migrate(struct rq *rq)
	7660	+static int __balance_push_cpu_stop(void *arg)
6355	7661	{
6356		- long delta = calc_load_fold_active(rq, 1);
6357		- if (delta)
6358		- atomic_long_add(delta, &calc_load_tasks);
6359		-}
	7662	+ struct task_struct *p = arg;
	7663	+ struct rq *rq = this_rq();
	7664	+ struct rq_flags rf;
	7665	+ int cpu;
6360	7666
6361		-static void put_prev_task_fake(struct rq rq, struct task_struct prev)
6362		-{
6363		-}
	7667	+ raw_spin_lock_irq(&p->pi_lock);
	7668	+ rq_lock(rq, &rf);
6364	7669
6365		-static const struct sched_class fake_sched_class = {
6366		- .put_prev_task = put_prev_task_fake,
6367		-};
6368		-
6369		-static struct task_struct fake_task = {
6370		- /*
6371		- * Avoid pull_{rt,dl}_task()
6372		- */
6373		- .prio = MAX_PRIO + 1,
6374		- .sched_class = &fake_sched_class,
6375		-};
6376		-
6377		-/*
6378		- * Migrate all tasks from the rq, sleeping tasks will be migrated by
6379		- * try_to_wake_up()->select_task_rq().
6380		- *
6381		- * Called with rq->lock held even though we'er in stop_machine() and
6382		- * there's no concurrency possible, we hold the required locks anyway
6383		- * because of lock validation efforts.
6384		- */
6385		-static void migrate_tasks(struct rq dead_rq, struct rq_flags rf)
6386		-{
6387		- struct rq *rq = dead_rq;
6388		- struct task_struct next, stop = rq->stop;
6389		- struct rq_flags orf = *rf;
6390		- int dest_cpu;
6391		-
6392		- /*
6393		- * Fudge the rq selection such that the below task selection loop
6394		- * doesn't get stuck on the currently eligible stop task.
6395		- *
6396		- * We're currently inside stop_machine() and the rq is either stuck
6397		- * in the stop_machine_cpu_stop() loop, or we're executing this code,
6398		- * either way we should never end up calling schedule() until we're
6399		- * done here.
6400		- */
6401		- rq->stop = NULL;
6402		-
6403		- /*
6404		- * put_prev_task() and pick_next_task() sched
6405		- * class method both need to have an up-to-date
6406		- * value of rq->clock[_task]
6407		- */
6408	7670	update_rq_clock(rq);
6409	7671
6410		- for (;;) {
6411		- /*
6412		- * There's this thread running, bail when that's the only
6413		- * remaining thread:
6414		- */
6415		- if (rq->nr_running == 1)
6416		- break;
6417		-
6418		- /*
6419		- * pick_next_task() assumes pinned rq->lock:
6420		- */
6421		- next = pick_next_task(rq, &fake_task, rf);
6422		- BUG_ON(!next);
6423		- put_prev_task(rq, next);
6424		-
6425		- WARN_ON_ONCE(__migrate_disabled(next));
6426		-
6427		- /*
6428		- * Rules for changing task_struct::cpus_mask are holding
6429		- * both pi_lock and rq->lock, such that holding either
6430		- * stabilizes the mask.
6431		- *
6432		- * Drop rq->lock is not quite as disastrous as it usually is
6433		- * because !cpu_active at this point, which means load-balance
6434		- * will not interfere. Also, stop-machine.
6435		- */
6436		- rq_unlock(rq, rf);
6437		- raw_spin_lock(&next->pi_lock);
6438		- rq_relock(rq, rf);
6439		-
6440		- /*
6441		- * Since we're inside stop-machine, _nothing_ should have
6442		- * changed the task, WARN if weird stuff happened, because in
6443		- * that case the above rq->lock drop is a fail too.
6444		- */
6445		- if (WARN_ON(task_rq(next) != rq \|\| !task_on_rq_queued(next))) {
6446		- raw_spin_unlock(&next->pi_lock);
6447		- continue;
6448		- }
6449		-
6450		- /* Find suitable destination for @next, with force if needed. */
6451		- dest_cpu = select_fallback_rq(dead_rq->cpu, next);
6452		- rq = __migrate_task(rq, rf, next, dest_cpu);
6453		- if (rq != dead_rq) {
6454		- rq_unlock(rq, rf);
6455		- rq = dead_rq;
6456		- *rf = orf;
6457		- rq_relock(rq, rf);
6458		- }
6459		- raw_spin_unlock(&next->pi_lock);
	7672	+ if (task_rq(p) == rq && task_on_rq_queued(p)) {
	7673	+ cpu = select_fallback_rq(rq->cpu, p);
	7674	+ rq = __migrate_task(rq, &rf, p, cpu);
6460	7675	}
6461	7676
6462		- rq->stop = stop;
	7677	+ rq_unlock(rq, &rf);
	7678	+ raw_spin_unlock_irq(&p->pi_lock);
	7679	+
	7680	+ put_task_struct(p);
	7681	+
	7682	+ return 0;
6463	7683	}
	7684	+
	7685	+static DEFINE_PER_CPU(struct cpu_stop_work, push_work);
	7686	+
	7687	+/*
	7688	+ * Ensure we only run per-cpu kthreads once the CPU goes !active.
	7689	+ */
	7690	+
	7691	+
	7692	+static void balance_push(struct rq *rq)
	7693	+{
	7694	+ struct task_struct *push_task = rq->curr;
	7695	+
	7696	+ lockdep_assert_held(&rq->lock);
	7697	+ SCHED_WARN_ON(rq->cpu != smp_processor_id());
	7698	+
	7699	+ /*
	7700	+ * Both the cpu-hotplug and stop task are in this case and are
	7701	+ * required to complete the hotplug process.
	7702	+ */
	7703	+ if (is_per_cpu_kthread(push_task) \|\| is_migration_disabled(push_task)) {
	7704	+ /*
	7705	+ * If this is the idle task on the outgoing CPU try to wake
	7706	+ * up the hotplug control thread which might wait for the
	7707	+ * last task to vanish. The rcuwait_active() check is
	7708	+ * accurate here because the waiter is pinned on this CPU
	7709	+ * and can't obviously be running in parallel.
	7710	+ *
	7711	+ * On RT kernels this also has to check whether there are
	7712	+ * pinned and scheduled out tasks on the runqueue. They
	7713	+ * need to leave the migrate disabled section first.
	7714	+ */
	7715	+ if (!rq->nr_running && !rq_has_pinned_tasks(rq) &&
	7716	+ rcuwait_active(&rq->hotplug_wait)) {
	7717	+ raw_spin_unlock(&rq->lock);
	7718	+ rcuwait_wake_up(&rq->hotplug_wait);
	7719	+ raw_spin_lock(&rq->lock);
	7720	+ }
	7721	+ return;
	7722	+ }
	7723	+
	7724	+ get_task_struct(push_task);
	7725	+ /*
	7726	+ * Temporarily drop rq->lock such that we can wake-up the stop task.
	7727	+ * Both preemption and IRQs are still disabled.
	7728	+ */
	7729	+ raw_spin_unlock(&rq->lock);
	7730	+ stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task,
	7731	+ this_cpu_ptr(&push_work));
	7732	+ /*
	7733	+ * At this point need_resched() is true and we'll take the loop in
	7734	+ * schedule(). The next pick is obviously going to be the stop task
	7735	+ * which is_per_cpu_kthread() and will push this task away.
	7736	+ */
	7737	+ raw_spin_lock(&rq->lock);
	7738	+}
	7739	+
	7740	+static void balance_push_set(int cpu, bool on)
	7741	+{
	7742	+ struct rq *rq = cpu_rq(cpu);
	7743	+ struct rq_flags rf;
	7744	+
	7745	+ rq_lock_irqsave(rq, &rf);
	7746	+ if (on)
	7747	+ rq->balance_flags \|= BALANCE_PUSH;
	7748	+ else
	7749	+ rq->balance_flags &= ~BALANCE_PUSH;
	7750	+ rq_unlock_irqrestore(rq, &rf);
	7751	+}
	7752	+
	7753	+/*
	7754	+ * Invoked from a CPUs hotplug control thread after the CPU has been marked
	7755	+ * inactive. All tasks which are not per CPU kernel threads are either
	7756	+ * pushed off this CPU now via balance_push() or placed on a different CPU
	7757	+ * during wakeup. Wait until the CPU is quiescent.
	7758	+ */
	7759	+static void balance_hotplug_wait(void)
	7760	+{
	7761	+ struct rq *rq = this_rq();
	7762	+
	7763	+ rcuwait_wait_event(&rq->hotplug_wait,
	7764	+ rq->nr_running == 1 && !rq_has_pinned_tasks(rq),
	7765	+ TASK_UNINTERRUPTIBLE);
	7766	+}
	7767	+
	7768	+static int drain_rq_cpu_stop(void *data)
	7769	+{
	7770	+#ifndef CONFIG_PREEMPT_RT
	7771	+ struct rq *rq = this_rq();
	7772	+ struct rq_flags rf;
	7773	+
	7774	+ rq_lock_irqsave(rq, &rf);
	7775	+ migrate_tasks(rq, &rf, false);
	7776	+ rq_unlock_irqrestore(rq, &rf);
	7777	+#endif
	7778	+ return 0;
	7779	+}
	7780	+
	7781	+int sched_cpu_drain_rq(unsigned int cpu)
	7782	+{
	7783	+ struct cpu_stop_work *rq_drain = &(cpu_rq(cpu)->drain);
	7784	+ struct cpu_stop_done *rq_drain_done = &(cpu_rq(cpu)->drain_done);
	7785	+
	7786	+ if (idle_cpu(cpu)) {
	7787	+ rq_drain->done = NULL;
	7788	+ return 0;
	7789	+ }
	7790	+
	7791	+ return stop_one_cpu_async(cpu, drain_rq_cpu_stop, NULL, rq_drain,
	7792	+ rq_drain_done);
	7793	+}
	7794	+
	7795	+void sched_cpu_drain_rq_wait(unsigned int cpu)
	7796	+{
	7797	+ struct cpu_stop_work *rq_drain = &(cpu_rq(cpu)->drain);
	7798	+
	7799	+ if (rq_drain->done)
	7800	+ cpu_stop_work_wait(rq_drain);
	7801	+}
	7802	+
	7803	+#else
	7804	+
	7805	+static inline void balance_push(struct rq *rq)
	7806	+{
	7807	+}
	7808	+
	7809	+static inline void balance_push_set(int cpu, bool on)
	7810	+{
	7811	+}
	7812	+
	7813	+static inline void balance_hotplug_wait(void)
	7814	+{
	7815	+}
	7816	+
6464	7817	#endif /* CONFIG_HOTPLUG_CPU */
6465	7818
6466	7819	void set_rq_online(struct rq *rq)
..	..	@@ -6531,8 +7884,10 @@
6531	7884	static int cpuset_cpu_inactive(unsigned int cpu)
6532	7885	{
6533	7886	if (!cpuhp_tasks_frozen) {
6534		- if (dl_cpu_busy(cpu))
6535		- return -EBUSY;
	7887	+ int ret = dl_cpu_busy(cpu, NULL);
	7888	+
	7889	+ if (ret)
	7890	+ return ret;
6536	7891	cpuset_update_active_cpus();
6537	7892	} else {
6538	7893	num_cpus_frozen++;
..	..	@@ -6545,6 +7900,8 @@
6545	7900	{
6546	7901	struct rq *rq = cpu_rq(cpu);
6547	7902	struct rq_flags rf;
	7903	+
	7904	+ balance_push_set(cpu, false);
6548	7905
6549	7906	#ifdef CONFIG_SCHED_SMT
6550	7907	/*
..	..	@@ -6581,19 +7938,39 @@
6581	7938	return 0;
6582	7939	}
6583	7940
6584		-int sched_cpu_deactivate(unsigned int cpu)
	7941	+int sched_cpus_activate(struct cpumask *cpus)
6585	7942	{
	7943	+ unsigned int cpu;
	7944	+
	7945	+ for_each_cpu(cpu, cpus) {
	7946	+ if (sched_cpu_activate(cpu)) {
	7947	+ for_each_cpu_and(cpu, cpus, cpu_active_mask)
	7948	+ sched_cpu_deactivate(cpu);
	7949	+
	7950	+ return -EBUSY;
	7951	+ }
	7952	+ }
	7953	+
	7954	+ return 0;
	7955	+}
	7956	+
	7957	+int _sched_cpu_deactivate(unsigned int cpu)
	7958	+{
	7959	+ struct rq *rq = cpu_rq(cpu);
	7960	+ struct rq_flags rf;
6586	7961	int ret;
6587	7962
6588	7963	set_cpu_active(cpu, false);
6589		- /*
6590		- * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
6591		- * users of this state to go away such that all new such users will
6592		- * observe it.
6593		- *
6594		- * Do sync before park smpboot threads to take care the rcu boost case.
6595		- */
6596		- synchronize_rcu_mult(call_rcu, call_rcu_sched);
	7964	+
	7965	+ balance_push_set(cpu, true);
	7966	+
	7967	+ rq_lock_irqsave(rq, &rf);
	7968	+ if (rq->rd) {
	7969	+ update_rq_clock(rq);
	7970	+ BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
	7971	+ set_rq_offline(rq);
	7972	+ }
	7973	+ rq_unlock_irqrestore(rq, &rf);
6597	7974
6598	7975	#ifdef CONFIG_SCHED_SMT
6599	7976	/*
..	..	@@ -6608,10 +7985,51 @@
6608	7985
6609	7986	ret = cpuset_cpu_inactive(cpu);
6610	7987	if (ret) {
	7988	+ balance_push_set(cpu, false);
6611	7989	set_cpu_active(cpu, true);
6612	7990	return ret;
6613	7991	}
6614	7992	sched_domains_numa_masks_clear(cpu);
	7993	+
	7994	+ update_max_interval();
	7995	+
	7996	+ return 0;
	7997	+}
	7998	+
	7999	+int sched_cpu_deactivate(unsigned int cpu)
	8000	+{
	8001	+ int ret = _sched_cpu_deactivate(cpu);
	8002	+
	8003	+ if (ret)
	8004	+ return ret;
	8005	+
	8006	+ /*
	8007	+ * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
	8008	+ * users of this state to go away such that all new such users will
	8009	+ * observe it.
	8010	+ *
	8011	+ * Do sync before park smpboot threads to take care the rcu boost case.
	8012	+ */
	8013	+ synchronize_rcu();
	8014	+
	8015	+ return 0;
	8016	+}
	8017	+
	8018	+int sched_cpus_deactivate_nosync(struct cpumask *cpus)
	8019	+{
	8020	+ unsigned int cpu;
	8021	+
	8022	+ for_each_cpu(cpu, cpus) {
	8023	+ if (_sched_cpu_deactivate(cpu)) {
	8024	+ for_each_cpu(cpu, cpus) {
	8025	+ if (!cpu_active(cpu))
	8026	+ sched_cpu_activate(cpu);
	8027	+ }
	8028	+
	8029	+ return -EBUSY;
	8030	+ }
	8031	+ }
	8032	+
6615	8033	return 0;
6616	8034	}
6617	8035
..	..	@@ -6620,37 +8038,67 @@
6620	8038	struct rq *rq = cpu_rq(cpu);
6621	8039
6622	8040	rq->calc_load_update = calc_load_update;
6623		- update_max_interval();
6624	8041	}
6625	8042
6626	8043	int sched_cpu_starting(unsigned int cpu)
6627	8044	{
6628	8045	sched_rq_cpu_starting(cpu);
6629	8046	sched_tick_start(cpu);
	8047	+ trace_android_rvh_sched_cpu_starting(cpu);
6630	8048	return 0;
6631	8049	}
6632	8050
6633	8051	#ifdef CONFIG_HOTPLUG_CPU
	8052	+
	8053	+/*
	8054	+ * Invoked immediately before the stopper thread is invoked to bring the
	8055	+ * CPU down completely. At this point all per CPU kthreads except the
	8056	+ * hotplug thread (current) and the stopper thread (inactive) have been
	8057	+ * either parked or have been unbound from the outgoing CPU. Ensure that
	8058	+ * any of those which might be on the way out are gone.
	8059	+ *
	8060	+ * If after this point a bound task is being woken on this CPU then the
	8061	+ * responsible hotplug callback has failed to do it's job.
	8062	+ * sched_cpu_dying() will catch it with the appropriate fireworks.
	8063	+ */
	8064	+int sched_cpu_wait_empty(unsigned int cpu)
	8065	+{
	8066	+ balance_hotplug_wait();
	8067	+ return 0;
	8068	+}
	8069	+
	8070	+/*
	8071	+ * Since this CPU is going 'away' for a while, fold any nr_active delta we
	8072	+ * might have. Called from the CPU stopper task after ensuring that the
	8073	+ * stopper is the last running task on the CPU, so nr_active count is
	8074	+ * stable. We need to take the teardown thread which is calling this into
	8075	+ * account, so we hand in adjust = 1 to the load calculation.
	8076	+ *
	8077	+ * Also see the comment "Global load-average calculations".
	8078	+ */
	8079	+static void calc_load_migrate(struct rq *rq)
	8080	+{
	8081	+ long delta = calc_load_fold_active(rq, 1);
	8082	+
	8083	+ if (delta)
	8084	+ atomic_long_add(delta, &calc_load_tasks);
	8085	+}
	8086	+
6634	8087	int sched_cpu_dying(unsigned int cpu)
6635	8088	{
6636	8089	struct rq *rq = cpu_rq(cpu);
6637	8090	struct rq_flags rf;
6638	8091
6639	8092	/* Handle pending wakeups and then migrate everything off */
6640		- sched_ttwu_pending();
6641	8093	sched_tick_stop(cpu);
6642	8094
6643	8095	rq_lock_irqsave(rq, &rf);
6644		- if (rq->rd) {
6645		- BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
6646		- set_rq_offline(rq);
6647		- }
6648		- migrate_tasks(rq, &rf);
6649		- BUG_ON(rq->nr_running != 1);
	8096	+ BUG_ON(rq->nr_running != 1 \|\| rq_has_pinned_tasks(rq));
6650	8097	rq_unlock_irqrestore(rq, &rf);
6651	8098
	8099	+ trace_android_rvh_sched_cpu_dying(cpu);
	8100	+
6652	8101	calc_load_migrate(rq);
6653		- update_max_interval();
6654	8102	nohz_balance_exit_idle(rq);
6655	8103	hrtick_clear(rq);
6656	8104	return 0;
..	..	@@ -6664,18 +8112,16 @@
6664	8112	/*
6665	8113	* There's no userspace yet to cause hotplug operations; hence all the
6666	8114	* CPU masks are stable and all blatant races in the below code cannot
6667		- * happen. The hotplug lock is nevertheless taken to satisfy lockdep,
6668		- * but there won't be any contention on it.
	8115	+ * happen.
6669	8116	*/
6670		- cpus_read_lock();
6671	8117	mutex_lock(&sched_domains_mutex);
6672	8118	sched_init_domains(cpu_active_mask);
6673	8119	mutex_unlock(&sched_domains_mutex);
6674		- cpus_read_unlock();
6675	8120
6676	8121	/* Move init over to a non-isolated CPU */
6677	8122	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
6678	8123	BUG();
	8124	+
6679	8125	sched_init_granularity();
6680	8126
6681	8127	init_sched_rt_class();
..	..	@@ -6686,7 +8132,7 @@
6686	8132
6687	8133	static int __init migration_init(void)
6688	8134	{
6689		- sched_rq_cpu_starting(smp_processor_id());
	8135	+ sched_cpu_starting(smp_processor_id());
6690	8136	return 0;
6691	8137	}
6692	8138	early_initcall(migration_init);
..	..	@@ -6711,7 +8157,9 @@
6711	8157	* Every task in system belongs to this group at bootup.
6712	8158	*/
6713	8159	struct task_group root_task_group;
	8160	+EXPORT_SYMBOL_GPL(root_task_group);
6714	8161	LIST_HEAD(task_groups);
	8162	+EXPORT_SYMBOL_GPL(task_groups);
6715	8163
6716	8164	/* Cacheline aligned slab cache for task_group */
6717	8165	static struct kmem_cache *task_group_cache __read_mostly;
..	..	@@ -6722,19 +8170,27 @@
6722	8170
6723	8171	void __init sched_init(void)
6724	8172	{
6725		- int i, j;
6726		- unsigned long alloc_size = 0, ptr;
	8173	+ unsigned long ptr = 0;
	8174	+ int i;
	8175	+
	8176	+ /* Make sure the linker didn't screw up */
	8177	+ BUG_ON(&idle_sched_class + 1 != &fair_sched_class \|\|
	8178	+ &fair_sched_class + 1 != &rt_sched_class \|\|
	8179	+ &rt_sched_class + 1 != &dl_sched_class);
	8180	+#ifdef CONFIG_SMP
	8181	+ BUG_ON(&dl_sched_class + 1 != &stop_sched_class);
	8182	+#endif
6727	8183
6728	8184	wait_bit_init();
6729	8185
6730	8186	#ifdef CONFIG_FAIR_GROUP_SCHED
6731		- alloc_size += 2 * nr_cpu_ids * sizeof(void **);
	8187	+ ptr += 2 * nr_cpu_ids * sizeof(void **);
6732	8188	#endif
6733	8189	#ifdef CONFIG_RT_GROUP_SCHED
6734		- alloc_size += 2 * nr_cpu_ids * sizeof(void **);
	8190	+ ptr += 2 * nr_cpu_ids * sizeof(void **);
6735	8191	#endif
6736		- if (alloc_size) {
6737		- ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
	8192	+ if (ptr) {
	8193	+ ptr = (unsigned long)kzalloc(ptr, GFP_NOWAIT);
6738	8194
6739	8195	#ifdef CONFIG_FAIR_GROUP_SCHED
6740	8196	root_task_group.se = (struct sched_entity **)ptr;
..	..	@@ -6743,6 +8199,8 @@
6743	8199	root_task_group.cfs_rq = (struct cfs_rq **)ptr;
6744	8200	ptr += nr_cpu_ids * sizeof(void **);
6745	8201
	8202	+ root_task_group.shares = ROOT_TASK_GROUP_LOAD;
	8203	+ init_cfs_bandwidth(&root_task_group.cfs_bandwidth);
6746	8204	#endif /* CONFIG_FAIR_GROUP_SCHED */
6747	8205	#ifdef CONFIG_RT_GROUP_SCHED
6748	8206	root_task_group.rt_se = (struct sched_rt_entity **)ptr;
..	..	@@ -6795,7 +8253,6 @@
6795	8253	init_rt_rq(&rq->rt);
6796	8254	init_dl_rq(&rq->dl);
6797	8255	#ifdef CONFIG_FAIR_GROUP_SCHED
6798		- root_task_group.shares = ROOT_TASK_GROUP_LOAD;
6799	8256	INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
6800	8257	rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
6801	8258	/*
..	..	@@ -6817,7 +8274,6 @@
6817	8274	* We achieve this by letting root_task_group's tasks sit
6818	8275	* directly in rq->cfs (i.e root_task_group->se[] = NULL).
6819	8276	*/
6820		- init_cfs_bandwidth(&root_task_group.cfs_bandwidth);
6821	8277	init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
6822	8278	#endif /* CONFIG_FAIR_GROUP_SCHED */
6823	8279
..	..	@@ -6825,10 +8281,6 @@
6825	8281	#ifdef CONFIG_RT_GROUP_SCHED
6826	8282	init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
6827	8283	#endif
6828		-
6829		- for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
6830		- rq->cpu_load[j] = 0;
6831		-
6832	8284	#ifdef CONFIG_SMP
6833	8285	rq->sd = NULL;
6834	8286	rq->rd = NULL;
..	..	@@ -6847,16 +8299,20 @@
6847	8299
6848	8300	rq_attach_root(rq, &def_root_domain);
6849	8301	#ifdef CONFIG_NO_HZ_COMMON
6850		- rq->last_load_update_tick = jiffies;
6851	8302	rq->last_blocked_load_update_tick = jiffies;
6852	8303	atomic_set(&rq->nohz_flags, 0);
	8304	+
	8305	+ rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func);
	8306	+#endif
	8307	+#ifdef CONFIG_HOTPLUG_CPU
	8308	+ rcuwait_init(&rq->hotplug_wait);
6853	8309	#endif
6854	8310	#endif /* CONFIG_SMP */
6855	8311	hrtick_rq_init(rq);
6856	8312	atomic_set(&rq->nr_iowait, 0);
6857	8313	}
6858	8314
6859		- set_load_weight(&init_task, false);
	8315	+ set_load_weight(&init_task);
6860	8316
6861	8317	/*
6862	8318	* The boot idle thread does lazy MMU switching as well:
..	..	@@ -6925,7 +8381,7 @@
6925	8381	rcu_sleep_check();
6926	8382
6927	8383	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
6928		- !is_idle_task(current)) \|\|
	8384	+ !is_idle_task(current) && !current->non_block_count) \|\|
6929	8385	system_state == SYSTEM_BOOTING \|\| system_state > SYSTEM_RUNNING \|\|
6930	8386	oops_in_progress)
6931	8387	return;
..	..	@@ -6941,8 +8397,8 @@
6941	8397	"BUG: sleeping function called from invalid context at %s:%d\n",
6942	8398	file, line);
6943	8399	printk(KERN_ERR
6944		- "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
6945		- in_atomic(), irqs_disabled(),
	8400	+ "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
	8401	+ in_atomic(), irqs_disabled(), current->non_block_count,
6946	8402	current->pid, current->comm);
6947	8403
6948	8404	if (task_stack_end_corrupted(current))
..	..	@@ -6954,13 +8410,76 @@
6954	8410	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
6955	8411	&& !preempt_count_equals(preempt_offset)) {
6956	8412	pr_err("Preemption disabled at:");
6957		- print_ip_sym(preempt_disable_ip);
6958		- pr_cont("\n");
	8413	+ print_ip_sym(KERN_ERR, preempt_disable_ip);
6959	8414	}
	8415	+
	8416	+ trace_android_rvh_schedule_bug(NULL);
	8417	+
6960	8418	dump_stack();
6961	8419	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
6962	8420	}
6963	8421	EXPORT_SYMBOL(___might_sleep);
	8422	+
	8423	+void __cant_sleep(const char *file, int line, int preempt_offset)
	8424	+{
	8425	+ static unsigned long prev_jiffy;
	8426	+
	8427	+ if (irqs_disabled())
	8428	+ return;
	8429	+
	8430	+ if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
	8431	+ return;
	8432	+
	8433	+ if (preempt_count() > preempt_offset)
	8434	+ return;
	8435	+
	8436	+ if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
	8437	+ return;
	8438	+ prev_jiffy = jiffies;
	8439	+
	8440	+ printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
	8441	+ printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
	8442	+ in_atomic(), irqs_disabled(),
	8443	+ current->pid, current->comm);
	8444	+
	8445	+ debug_show_held_locks(current);
	8446	+ dump_stack();
	8447	+ add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
	8448	+}
	8449	+EXPORT_SYMBOL_GPL(__cant_sleep);
	8450	+
	8451	+#ifdef CONFIG_SMP
	8452	+void __cant_migrate(const char *file, int line)
	8453	+{
	8454	+ static unsigned long prev_jiffy;
	8455	+
	8456	+ if (irqs_disabled())
	8457	+ return;
	8458	+
	8459	+ if (is_migration_disabled(current))
	8460	+ return;
	8461	+
	8462	+ if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
	8463	+ return;
	8464	+
	8465	+ if (preempt_count() > 0)
	8466	+ return;
	8467	+
	8468	+ if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
	8469	+ return;
	8470	+ prev_jiffy = jiffies;
	8471	+
	8472	+ pr_err("BUG: assuming non migratable context at %s:%d\n", file, line);
	8473	+ pr_err("in_atomic(): %d, irqs_disabled(): %d, migration_disabled() %u pid: %d, name: %s\n",
	8474	+ in_atomic(), irqs_disabled(), is_migration_disabled(current),
	8475	+ current->pid, current->comm);
	8476	+
	8477	+ debug_show_held_locks(current);
	8478	+ dump_stack();
	8479	+ add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
	8480	+}
	8481	+EXPORT_SYMBOL_GPL(__cant_migrate);
	8482	+#endif
6964	8483	#endif
6965	8484
6966	8485	#ifdef CONFIG_MAGIC_SYSRQ
..	..	@@ -7029,7 +8548,7 @@
7029	8548
7030	8549	#ifdef CONFIG_IA64
7031	8550	/**
7032		- * set_curr_task - set the current task for a given CPU.
	8551	+ * ia64_set_curr_task - set the current task for a given CPU.
7033	8552	* @cpu: the processor in question.
7034	8553	* @p: the task pointer to set.
7035	8554	*
..	..	@@ -7195,8 +8714,15 @@
7195	8714
7196	8715	if (queued)
7197	8716	enqueue_task(rq, tsk, queue_flags);
7198		- if (running)
7199		- set_curr_task(rq, tsk);
	8717	+ if (running) {
	8718	+ set_next_task(rq, tsk);
	8719	+ /*
	8720	+ * After changing group, the running task may have joined a
	8721	+ * throttled one but it's still the running task. Trigger a
	8722	+ * resched to make sure that task can still run.
	8723	+ */
	8724	+ resched_curr(rq);
	8725	+ }
7200	8726
7201	8727	task_rq_unlock(rq, tsk, &rf);
7202	8728	}
..	..	@@ -7235,9 +8761,14 @@
7235	8761
7236	8762	#ifdef CONFIG_UCLAMP_TASK_GROUP
7237	8763	/* Propagate the effective uclamp value for the new group */
	8764	+ mutex_lock(&uclamp_mutex);
	8765	+ rcu_read_lock();
7238	8766	cpu_util_update_eff(css);
	8767	+ rcu_read_unlock();
	8768	+ mutex_unlock(&uclamp_mutex);
7239	8769	#endif
7240	8770
	8771	+ trace_android_rvh_cpu_cgroup_online(css);
7241	8772	return 0;
7242	8773	}
7243	8774
..	..	@@ -7303,6 +8834,9 @@
7303	8834	if (ret)
7304	8835	break;
7305	8836	}
	8837	+
	8838	+ trace_android_rvh_cpu_cgroup_can_attach(tset, &ret);
	8839	+
7306	8840	return ret;
7307	8841	}
7308	8842
..	..	@@ -7313,6 +8847,8 @@
7313	8847
7314	8848	cgroup_taskset_for_each(task, css, tset)
7315	8849	sched_move_task(task);
	8850	+
	8851	+ trace_android_rvh_cpu_cgroup_attach(tset);
7316	8852	}
7317	8853
7318	8854	#ifdef CONFIG_UCLAMP_TASK_GROUP
..	..	@@ -7324,6 +8860,9 @@
7324	8860	unsigned int eff[UCLAMP_CNT];
7325	8861	enum uclamp_id clamp_id;
7326	8862	unsigned int clamps;
	8863	+
	8864	+ lockdep_assert_held(&uclamp_mutex);
	8865	+ SCHED_WARN_ON(!rcu_read_lock_held());
7327	8866
7328	8867	css_for_each_descendant_pre(css, top_css) {
7329	8868	uc_parent = css_tg(css)->parent
..	..	@@ -7357,7 +8896,7 @@
7357	8896	}
7358	8897
7359	8898	/* Immediately update descendants RUNNABLE tasks */
7360		- uclamp_update_active_tasks(css, clamps);
	8899	+ uclamp_update_active_tasks(css);
7361	8900	}
7362	8901	}
7363	8902
..	..	@@ -7414,6 +8953,8 @@
7414	8953	req = capacity_from_percent(buf);
7415	8954	if (req.ret)
7416	8955	return req.ret;
	8956	+
	8957	+ static_branch_enable(&sched_uclamp_used);
7417	8958
7418	8959	mutex_lock(&uclamp_mutex);
7419	8960	rcu_read_lock();
..	..	@@ -7529,7 +9070,9 @@
7529	9070	static DEFINE_MUTEX(cfs_constraints_mutex);
7530	9071
7531	9072	const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */
7532		-const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
	9073	+static const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
	9074	+/* More than 203 days if BW_SHIFT equals 20. */
	9075	+static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC;
7533	9076
7534	9077	static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
7535	9078
..	..	@@ -7555,6 +9098,12 @@
7555	9098	* feasibility.
7556	9099	*/
7557	9100	if (period > max_cfs_quota_period)
	9101	+ return -EINVAL;
	9102	+
	9103	+ /*
	9104	+ * Bound quota to defend quota against overflow during bandwidth shift.
	9105	+ */
	9106	+ if (quota != RUNTIME_INF && quota > max_cfs_runtime)
7558	9107	return -EINVAL;
7559	9108
7560	9109	/*
..	..	@@ -7609,7 +9158,7 @@
7609	9158	return ret;
7610	9159	}
7611	9160
7612		-int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
	9161	+static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
7613	9162	{
7614	9163	u64 quota, period;
7615	9164
..	..	@@ -7624,7 +9173,7 @@
7624	9173	return tg_set_cfs_bandwidth(tg, period, quota);
7625	9174	}
7626	9175
7627		-long tg_get_cfs_quota(struct task_group *tg)
	9176	+static long tg_get_cfs_quota(struct task_group *tg)
7628	9177	{
7629	9178	u64 quota_us;
7630	9179
..	..	@@ -7637,7 +9186,7 @@
7637	9186	return quota_us;
7638	9187	}
7639	9188
7640		-int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
	9189	+static int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
7641	9190	{
7642	9191	u64 quota, period;
7643	9192
..	..	@@ -7650,7 +9199,7 @@
7650	9199	return tg_set_cfs_bandwidth(tg, period, quota);
7651	9200	}
7652	9201
7653		-long tg_get_cfs_period(struct task_group *tg)
	9202	+static long tg_get_cfs_period(struct task_group *tg)
7654	9203	{
7655	9204	u64 cfs_period_us;
7656	9205
..	..	@@ -8127,172 +9676,7 @@
8127	9676	/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
8128	9677	};
8129	9678
8130		-#undef CREATE_TRACE_POINTS
8131		-
8132		-#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
8133		-
8134		-static inline void
8135		-update_nr_migratory(struct task_struct *p, long delta)
	9679	+void call_trace_sched_update_nr_running(struct rq *rq, int count)
8136	9680	{
8137		- if (unlikely((p->sched_class == &rt_sched_class \|\|
8138		- p->sched_class == &dl_sched_class) &&
8139		- p->nr_cpus_allowed > 1)) {
8140		- if (p->sched_class == &rt_sched_class)
8141		- task_rq(p)->rt.rt_nr_migratory += delta;
8142		- else
8143		- task_rq(p)->dl.dl_nr_migratory += delta;
8144		- }
	9681	+ trace_sched_update_nr_running_tp(rq, count);
8145	9682	}
8146		-
8147		-static inline void
8148		-migrate_disable_update_cpus_allowed(struct task_struct *p)
8149		-{
8150		- p->cpus_ptr = cpumask_of(smp_processor_id());
8151		- update_nr_migratory(p, -1);
8152		- p->nr_cpus_allowed = 1;
8153		-}
8154		-
8155		-static inline void
8156		-migrate_enable_update_cpus_allowed(struct task_struct *p)
8157		-{
8158		- struct rq *rq;
8159		- struct rq_flags rf;
8160		-
8161		- rq = task_rq_lock(p, &rf);
8162		- p->cpus_ptr = &p->cpus_mask;
8163		- p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask);
8164		- update_nr_migratory(p, 1);
8165		- task_rq_unlock(rq, p, &rf);
8166		-}
8167		-
8168		-void migrate_disable(void)
8169		-{
8170		- preempt_disable();
8171		-
8172		- if (++current->migrate_disable == 1) {
8173		- this_rq()->nr_pinned++;
8174		- preempt_lazy_disable();
8175		-#ifdef CONFIG_SCHED_DEBUG
8176		- WARN_ON_ONCE(current->pinned_on_cpu >= 0);
8177		- current->pinned_on_cpu = smp_processor_id();
8178		-#endif
8179		- }
8180		-
8181		- preempt_enable();
8182		-}
8183		-EXPORT_SYMBOL(migrate_disable);
8184		-
8185		-static void migrate_disabled_sched(struct task_struct *p)
8186		-{
8187		- if (p->migrate_disable_scheduled)
8188		- return;
8189		-
8190		- migrate_disable_update_cpus_allowed(p);
8191		- p->migrate_disable_scheduled = 1;
8192		-}
8193		-
8194		-static DEFINE_PER_CPU(struct cpu_stop_work, migrate_work);
8195		-static DEFINE_PER_CPU(struct migration_arg, migrate_arg);
8196		-
8197		-void migrate_enable(void)
8198		-{
8199		- struct task_struct *p = current;
8200		- struct rq *rq = this_rq();
8201		- int cpu = task_cpu(p);
8202		-
8203		- WARN_ON_ONCE(p->migrate_disable <= 0);
8204		- if (p->migrate_disable > 1) {
8205		- p->migrate_disable--;
8206		- return;
8207		- }
8208		-
8209		- preempt_disable();
8210		-
8211		-#ifdef CONFIG_SCHED_DEBUG
8212		- WARN_ON_ONCE(current->pinned_on_cpu != cpu);
8213		- current->pinned_on_cpu = -1;
8214		-#endif
8215		-
8216		- WARN_ON_ONCE(rq->nr_pinned < 1);
8217		-
8218		- p->migrate_disable = 0;
8219		- rq->nr_pinned--;
8220		-#ifdef CONFIG_HOTPLUG_CPU
8221		- if (rq->nr_pinned == 0 && unlikely(!cpu_active(cpu)) &&
8222		- takedown_cpu_task)
8223		- wake_up_process(takedown_cpu_task);
8224		-#endif
8225		-
8226		- if (!p->migrate_disable_scheduled)
8227		- goto out;
8228		-
8229		- p->migrate_disable_scheduled = 0;
8230		-
8231		- migrate_enable_update_cpus_allowed(p);
8232		-
8233		- WARN_ON(smp_processor_id() != cpu);
8234		- if (!is_cpu_allowed(p, cpu)) {
8235		- struct migration_arg __percpu *arg;
8236		- struct cpu_stop_work __percpu *work;
8237		- struct rq_flags rf;
8238		-
8239		- work = this_cpu_ptr(&migrate_work);
8240		- arg = this_cpu_ptr(&migrate_arg);
8241		- WARN_ON_ONCE(!arg->done && !work->disabled && work->arg);
8242		-
8243		- arg->task = p;
8244		- arg->done = false;
8245		-
8246		- rq = task_rq_lock(p, &rf);
8247		- update_rq_clock(rq);
8248		- arg->dest_cpu = select_fallback_rq(cpu, p);
8249		- task_rq_unlock(rq, p, &rf);
8250		-
8251		- stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop,
8252		- arg, work);
8253		- tlb_migrate_finish(p->mm);
8254		- }
8255		-
8256		-out:
8257		- preempt_lazy_enable();
8258		- preempt_enable();
8259		-}
8260		-EXPORT_SYMBOL(migrate_enable);
8261		-
8262		-int cpu_nr_pinned(int cpu)
8263		-{
8264		- struct rq *rq = cpu_rq(cpu);
8265		-
8266		- return rq->nr_pinned;
8267		-}
8268		-
8269		-#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
8270		-static void migrate_disabled_sched(struct task_struct *p)
8271		-{
8272		-}
8273		-
8274		-void migrate_disable(void)
8275		-{
8276		-#ifdef CONFIG_SCHED_DEBUG
8277		- current->migrate_disable++;
8278		-#endif
8279		- barrier();
8280		-}
8281		-EXPORT_SYMBOL(migrate_disable);
8282		-
8283		-void migrate_enable(void)
8284		-{
8285		-#ifdef CONFIG_SCHED_DEBUG
8286		- struct task_struct *p = current;
8287		-
8288		- WARN_ON_ONCE(p->migrate_disable <= 0);
8289		- p->migrate_disable--;
8290		-#endif
8291		- barrier();
8292		-}
8293		-EXPORT_SYMBOL(migrate_enable);
8294		-#else
8295		-static void migrate_disabled_sched(struct task_struct *p)
8296		-{
8297		-}
8298		-#endif