~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,3 +1,4 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/*
2	3	* kernel/sched/core.c
3	4	*
..	..	@@ -5,6 +6,10 @@
5	6	*
6	7	* Copyright (C) 1991-2002 Linus Torvalds
7	8	*/
	9	+#define CREATE_TRACE_POINTS
	10	+#include <trace/events/sched.h>
	11	+#undef CREATE_TRACE_POINTS
	12	+
8	13	#include "sched.h"
9	14
10	15	#include <linux/nospec.h>
..	..	@@ -16,14 +21,41 @@
16	21	#include <asm/tlb.h>
17	22
18	23	#include "../workqueue_internal.h"
	24	+#include "../../io_uring/io-wq.h"
19	25	#include "../smpboot.h"
20	26
21	27	#include "pelt.h"
	28	+#include "smp.h"
22	29
23		-#define CREATE_TRACE_POINTS
24		-#include <trace/events/sched.h>
	30	+#include <trace/hooks/sched.h>
	31	+#include <trace/hooks/dtask.h>
	32	+
	33	+/*
	34	+ * Export tracepoints that act as a bare tracehook (ie: have no trace event
	35	+ * associated with them) to allow external modules to probe them.
	36	+ */
	37	+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_cfs_tp);
	38	+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp);
	39	+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp);
	40	+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp);
	41	+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp);
	42	+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_thermal_tp);
	43	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_cpu_capacity_tp);
	44	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp);
	45	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_cfs_tp);
	46	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_se_tp);
	47	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp);
	48	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_switch);
	49	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_waking);
	50	+#ifdef CONFIG_SCHEDSTATS
	51	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_sleep);
	52	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_wait);
	53	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_iowait);
	54	+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_blocked);
	55	+#endif
25	56
26	57	DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
	58	+EXPORT_SYMBOL_GPL(runqueues);
27	59
28	60	#ifdef CONFIG_SCHED_DEBUG
29	61	/*
..	..	@@ -38,6 +70,7 @@
38	70	const_debug unsigned int sysctl_sched_features =
39	71	#include "features.h"
40	72	0;
	73	+EXPORT_SYMBOL_GPL(sysctl_sched_features);
41	74	#undef SCHED_FEAT
42	75	#endif
43	76
..	..	@@ -60,6 +93,100 @@
60	93	* default: 0.95s
61	94	*/
62	95	int sysctl_sched_rt_runtime = 950000;
	96	+
	97	+
	98	+/*
	99	+ * Serialization rules:
	100	+ *
	101	+ * Lock order:
	102	+ *
	103	+ * p->pi_lock
	104	+ * rq->lock
	105	+ * hrtimer_cpu_base->lock (hrtimer_start() for bandwidth controls)
	106	+ *
	107	+ * rq1->lock
	108	+ * rq2->lock where: rq1 < rq2
	109	+ *
	110	+ * Regular state:
	111	+ *
	112	+ * Normal scheduling state is serialized by rq->lock. __schedule() takes the
	113	+ * local CPU's rq->lock, it optionally removes the task from the runqueue and
	114	+ * always looks at the local rq data structures to find the most elegible task
	115	+ * to run next.
	116	+ *
	117	+ * Task enqueue is also under rq->lock, possibly taken from another CPU.
	118	+ * Wakeups from another LLC domain might use an IPI to transfer the enqueue to
	119	+ * the local CPU to avoid bouncing the runqueue state around [ see
	120	+ * ttwu_queue_wakelist() ]
	121	+ *
	122	+ * Task wakeup, specifically wakeups that involve migration, are horribly
	123	+ * complicated to avoid having to take two rq->locks.
	124	+ *
	125	+ * Special state:
	126	+ *
	127	+ * System-calls and anything external will use task_rq_lock() which acquires
	128	+ * both p->pi_lock and rq->lock. As a consequence the state they change is
	129	+ * stable while holding either lock:
	130	+ *
	131	+ * - sched_setaffinity()/
	132	+ * set_cpus_allowed_ptr(): p->cpus_ptr, p->nr_cpus_allowed
	133	+ * - set_user_nice(): p->se.load, p->*prio
	134	+ * - __sched_setscheduler(): p->sched_class, p->policy, p->*prio,
	135	+ * p->se.load, p->rt_priority,
	136	+ * p->dl.dl_{runtime, deadline, period, flags, bw, density}
	137	+ * - sched_setnuma(): p->numa_preferred_nid
	138	+ * - sched_move_task()/
	139	+ * cpu_cgroup_fork(): p->sched_task_group
	140	+ * - uclamp_update_active() p->uclamp*
	141	+ *
	142	+ * p->state <- TASK_*:
	143	+ *
	144	+ * is changed locklessly using set_current_state(), __set_current_state() or
	145	+ * set_special_state(), see their respective comments, or by
	146	+ * try_to_wake_up(). This latter uses p->pi_lock to serialize against
	147	+ * concurrent self.
	148	+ *
	149	+ * p->on_rq <- { 0, 1 = TASK_ON_RQ_QUEUED, 2 = TASK_ON_RQ_MIGRATING }:
	150	+ *
	151	+ * is set by activate_task() and cleared by deactivate_task(), under
	152	+ * rq->lock. Non-zero indicates the task is runnable, the special
	153	+ * ON_RQ_MIGRATING state is used for migration without holding both
	154	+ * rq->locks. It indicates task_cpu() is not stable, see task_rq_lock().
	155	+ *
	156	+ * p->on_cpu <- { 0, 1 }:
	157	+ *
	158	+ * is set by prepare_task() and cleared by finish_task() such that it will be
	159	+ * set before p is scheduled-in and cleared after p is scheduled-out, both
	160	+ * under rq->lock. Non-zero indicates the task is running on its CPU.
	161	+ *
	162	+ * [ The astute reader will observe that it is possible for two tasks on one
	163	+ * CPU to have ->on_cpu = 1 at the same time. ]
	164	+ *
	165	+ * task_cpu(p): is changed by set_task_cpu(), the rules are:
	166	+ *
	167	+ * - Don't call set_task_cpu() on a blocked task:
	168	+ *
	169	+ * We don't care what CPU we're not running on, this simplifies hotplug,
	170	+ * the CPU assignment of blocked tasks isn't required to be valid.
	171	+ *
	172	+ * - for try_to_wake_up(), called under p->pi_lock:
	173	+ *
	174	+ * This allows try_to_wake_up() to only take one rq->lock, see its comment.
	175	+ *
	176	+ * - for migration called under rq->lock:
	177	+ * [ see task_on_rq_migrating() in task_rq_lock() ]
	178	+ *
	179	+ * o move_queued_task()
	180	+ * o detach_task()
	181	+ *
	182	+ * - for migration called under double_rq_lock():
	183	+ *
	184	+ * o __migrate_swap_task()
	185	+ * o push_rt_task() / pull_rt_task()
	186	+ * o push_dl_task() / pull_dl_task()
	187	+ * o dl_task_offline_migration()
	188	+ *
	189	+ */
63	190
64	191	/*
65	192	* __task_rq_lock - lock the rq @p resides on.
..	..	@@ -84,6 +211,7 @@
84	211	cpu_relax();
85	212	}
86	213	}
	214	+EXPORT_SYMBOL_GPL(__task_rq_lock);
87	215
88	216	/*
89	217	* task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
..	..	@@ -126,6 +254,7 @@
126	254	cpu_relax();
127	255	}
128	256	}
	257	+EXPORT_SYMBOL_GPL(task_rq_lock);
129	258
130	259	/*
131	260	* RQ-clock updating methods:
..	..	@@ -206,7 +335,15 @@
206	335	rq->clock += delta;
207	336	update_rq_clock_task(rq, delta);
208	337	}
	338	+EXPORT_SYMBOL_GPL(update_rq_clock);
209	339
	340	+static inline void
	341	+rq_csd_init(struct rq rq, struct __call_single_data csd, smp_call_func_t func)
	342	+{
	343	+ csd->flags = 0;
	344	+ csd->func = func;
	345	+ csd->info = rq;
	346	+}
210	347
211	348	#ifdef CONFIG_SCHED_HRTICK
212	349	/*
..	..	@@ -243,8 +380,9 @@
243	380	static void __hrtick_restart(struct rq *rq)
244	381	{
245	382	struct hrtimer *timer = &rq->hrtick_timer;
	383	+ ktime_t time = rq->hrtick_time;
246	384
247		- hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
	385	+ hrtimer_start(timer, time, HRTIMER_MODE_ABS_PINNED_HARD);
248	386	}
249	387
250	388	/*
..	..	@@ -257,7 +395,6 @@
257	395
258	396	rq_lock(rq, &rf);
259	397	__hrtick_restart(rq);
260		- rq->hrtick_csd_pending = 0;
261	398	rq_unlock(rq, &rf);
262	399	}
263	400
..	..	@@ -269,7 +406,6 @@
269	406	void hrtick_start(struct rq *rq, u64 delay)
270	407	{
271	408	struct hrtimer *timer = &rq->hrtick_timer;
272		- ktime_t time;
273	409	s64 delta;
274	410
275	411	/*
..	..	@@ -277,16 +413,12 @@
277	413	* doesn't make sense and can cause timer DoS.
278	414	*/
279	415	delta = max_t(s64, delay, 10000LL);
280		- time = ktime_add_ns(timer->base->get_time(), delta);
	416	+ rq->hrtick_time = ktime_add_ns(timer->base->get_time(), delta);
281	417
282		- hrtimer_set_expires(timer, time);
283		-
284		- if (rq == this_rq()) {
	418	+ if (rq == this_rq())
285	419	__hrtick_restart(rq);
286		- } else if (!rq->hrtick_csd_pending) {
	420	+ else
287	421	smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd);
288		- rq->hrtick_csd_pending = 1;
289		- }
290	422	}
291	423
292	424	#else
..	..	@@ -303,21 +435,17 @@
303	435	*/
304	436	delay = max_t(u64, delay, 10000LL);
305	437	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
306		- HRTIMER_MODE_REL_PINNED);
	438	+ HRTIMER_MODE_REL_PINNED_HARD);
307	439	}
	440	+
308	441	#endif /* CONFIG_SMP */
309	442
310	443	static void hrtick_rq_init(struct rq *rq)
311	444	{
312	445	#ifdef CONFIG_SMP
313		- rq->hrtick_csd_pending = 0;
314		-
315		- rq->hrtick_csd.flags = 0;
316		- rq->hrtick_csd.func = __hrtick_start;
317		- rq->hrtick_csd.info = rq;
	446	+ rq_csd_init(rq, &rq->hrtick_csd, __hrtick_start);
318	447	#endif
319		-
320		- hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
	448	+ hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
321	449	rq->hrtick_timer.function = hrtick;
322	450	}
323	451	#else /* CONFIG_SCHED_HRTICK */
..	..	@@ -399,7 +527,7 @@
399	527	#endif
400	528	#endif
401	529
402		-void wake_q_add(struct wake_q_head head, struct task_struct task)
	530	+static bool __wake_q_add(struct wake_q_head head, struct task_struct task)
403	531	{
404	532	struct wake_q_node *node = &task->wake_q;
405	533
..	..	@@ -412,23 +540,58 @@
412	540	* state, even in the failed case, an explicit smp_mb() must be used.
413	541	*/
414	542	smp_mb__before_atomic();
415		- if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))
416		- return;
417		-
418		- head->count++;
419		-
420		- get_task_struct(task);
	543	+ if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
	544	+ return false;
421	545
422	546	/*
423	547	* The head is context local, there can be no concurrency.
424	548	*/
425	549	*head->lastp = node;
426	550	head->lastp = &node->next;
	551	+ head->count++;
	552	+ return true;
427	553	}
428	554
429		-static int
430		-try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags,
431		- int sibling_count_hint);
	555	+/**
	556	+ * wake_q_add() - queue a wakeup for 'later' waking.
	557	+ * @head: the wake_q_head to add @task to
	558	+ * @task: the task to queue for 'later' wakeup
	559	+ *
	560	+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
	561	+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
	562	+ * instantly.
	563	+ *
	564	+ * This function must be used as-if it were wake_up_process(); IOW the task
	565	+ * must be ready to be woken at this location.
	566	+ */
	567	+void wake_q_add(struct wake_q_head head, struct task_struct task)
	568	+{
	569	+ if (__wake_q_add(head, task))
	570	+ get_task_struct(task);
	571	+}
	572	+
	573	+/**
	574	+ * wake_q_add_safe() - safely queue a wakeup for 'later' waking.
	575	+ * @head: the wake_q_head to add @task to
	576	+ * @task: the task to queue for 'later' wakeup
	577	+ *
	578	+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
	579	+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
	580	+ * instantly.
	581	+ *
	582	+ * This function must be used as-if it were wake_up_process(); IOW the task
	583	+ * must be ready to be woken at this location.
	584	+ *
	585	+ * This function is essentially a task-safe equivalent to wake_q_add(). Callers
	586	+ * that already hold reference to @task can call the 'safe' version and trust
	587	+ * wake_q to do the right thing depending whether or not the @task is already
	588	+ * queued for wakeup.
	589	+ */
	590	+void wake_q_add_safe(struct wake_q_head head, struct task_struct task)
	591	+{
	592	+ if (!__wake_q_add(head, task))
	593	+ put_task_struct(task);
	594	+}
432	595
433	596	void wake_up_q(struct wake_q_head *head)
434	597	{
..	..	@@ -442,12 +605,14 @@
442	605	/* Task can safely be re-inserted now: */
443	606	node = node->next;
444	607	task->wake_q.next = NULL;
	608	+ task->wake_q_count = head->count;
445	609
446	610	/*
447		- * try_to_wake_up() executes a full barrier, which pairs with
	611	+ * wake_up_process() executes a full barrier, which pairs with
448	612	* the queueing in wake_q_add() so as not to miss wakeups.
449	613	*/
450		- try_to_wake_up(task, TASK_NORMAL, 0, head->count);
	614	+ wake_up_process(task);
	615	+ task->wake_q_count = 0;
451	616	put_task_struct(task);
452	617	}
453	618	}
..	..	@@ -477,15 +642,12 @@
477	642	return;
478	643	}
479	644
480		-#ifdef CONFIG_PREEMPT
481	645	if (set_nr_and_not_polling(curr))
482		-#else
483		- if (set_nr_and_not_polling(curr) && (rq->curr == rq->idle))
484		-#endif
485	646	smp_send_reschedule(cpu);
486	647	else
487	648	trace_sched_wake_idle_without_ipi(cpu);
488	649	}
	650	+EXPORT_SYMBOL_GPL(resched_curr);
489	651
490	652	void resched_cpu(int cpu)
491	653	{
..	..	@@ -510,27 +672,49 @@
510	672	*/
511	673	int get_nohz_timer_target(void)
512	674	{
513		- int i, cpu = smp_processor_id();
	675	+ int i, cpu = smp_processor_id(), default_cpu = -1;
514	676	struct sched_domain *sd;
515	677
516		- if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER))
517		- return cpu;
	678	+ if (housekeeping_cpu(cpu, HK_FLAG_TIMER) && cpu_active(cpu)) {
	679	+ if (!idle_cpu(cpu))
	680	+ return cpu;
	681	+ default_cpu = cpu;
	682	+ }
518	683
519	684	rcu_read_lock();
520	685	for_each_domain(cpu, sd) {
521		- for_each_cpu(i, sched_domain_span(sd)) {
	686	+ for_each_cpu_and(i, sched_domain_span(sd),
	687	+ housekeeping_cpumask(HK_FLAG_TIMER)) {
522	688	if (cpu == i)
523	689	continue;
524	690
525		- if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) {
	691	+ if (!idle_cpu(i)) {
526	692	cpu = i;
527	693	goto unlock;
528	694	}
529	695	}
530	696	}
531	697
532		- if (!housekeeping_cpu(cpu, HK_FLAG_TIMER))
533		- cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
	698	+ if (default_cpu == -1) {
	699	+ for_each_cpu_and(i, cpu_active_mask,
	700	+ housekeeping_cpumask(HK_FLAG_TIMER)) {
	701	+ if (cpu == i)
	702	+ continue;
	703	+
	704	+ if (!idle_cpu(i)) {
	705	+ cpu = i;
	706	+ goto unlock;
	707	+ }
	708	+ }
	709	+
	710	+ /* no active, not-idle, housekpeeing CPU found. */
	711	+ default_cpu = cpumask_any(cpu_active_mask);
	712	+
	713	+ if (unlikely(default_cpu >= nr_cpu_ids))
	714	+ goto unlock;
	715	+ }
	716	+
	717	+ cpu = default_cpu;
534	718	unlock:
535	719	rcu_read_unlock();
536	720	return cpu;
..	..	@@ -590,29 +774,23 @@
590	774	wake_up_idle_cpu(cpu);
591	775	}
592	776
593		-static inline bool got_nohz_idle_kick(void)
	777	+static void nohz_csd_func(void *info)
594	778	{
595		- int cpu = smp_processor_id();
596		-
597		- if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK))
598		- return false;
599		-
600		- if (idle_cpu(cpu) && !need_resched())
601		- return true;
	779	+ struct rq *rq = info;
	780	+ int cpu = cpu_of(rq);
	781	+ unsigned int flags;
602	782
603	783	/*
604		- * We can't run Idle Load Balance on this CPU for this time so we
605		- * cancel it and clear NOHZ_BALANCE_KICK
	784	+ * Release the rq::nohz_csd.
606	785	*/
607		- atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
608		- return false;
609		-}
	786	+ flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
	787	+ WARN_ON(!(flags & NOHZ_KICK_MASK));
610	788
611		-#else /* CONFIG_NO_HZ_COMMON */
612		-
613		-static inline bool got_nohz_idle_kick(void)
614		-{
615		- return false;
	789	+ rq->idle_balance = idle_cpu(cpu);
	790	+ if (rq->idle_balance && !need_resched()) {
	791	+ rq->nohz_idle_balance = flags;
	792	+ raise_softirq_irqoff(SCHED_SOFTIRQ);
	793	+ }
616	794	}
617	795
618	796	#endif /* CONFIG_NO_HZ_COMMON */
..	..	@@ -703,18 +881,18 @@
703	881	}
704	882	#endif
705	883
706		-static void set_load_weight(struct task_struct *p, bool update_load)
	884	+static void set_load_weight(struct task_struct *p)
707	885	{
	886	+ bool update_load = !(READ_ONCE(p->state) & TASK_NEW);
708	887	int prio = p->static_prio - MAX_RT_PRIO;
709	888	struct load_weight *load = &p->se.load;
710	889
711	890	/*
712	891	* SCHED_IDLE tasks get minimal weight:
713	892	*/
714		- if (idle_policy(p->policy)) {
	893	+ if (task_has_idle_policy(p)) {
715	894	load->weight = scale_load(WEIGHT_IDLEPRIO);
716	895	load->inv_weight = WMULT_IDLEPRIO;
717		- p->se.runnable_weight = load->weight;
718	896	return;
719	897	}
720	898
..	..	@@ -727,7 +905,6 @@
727	905	} else {
728	906	load->weight = scale_load(sched_prio_to_weight[prio]);
729	907	load->inv_weight = sched_prio_to_wmult[prio];
730		- p->se.runnable_weight = load->weight;
731	908	}
732	909	}
733	910
..	..	@@ -750,8 +927,46 @@
750	927	/* Max allowed maximum utilization */
751	928	unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE;
752	929
	930	+/*
	931	+ * By default RT tasks run at the maximum performance point/capacity of the
	932	+ * system. Uclamp enforces this by always setting UCLAMP_MIN of RT tasks to
	933	+ * SCHED_CAPACITY_SCALE.
	934	+ *
	935	+ * This knob allows admins to change the default behavior when uclamp is being
	936	+ * used. In battery powered devices, particularly, running at the maximum
	937	+ * capacity and frequency will increase energy consumption and shorten the
	938	+ * battery life.
	939	+ *
	940	+ * This knob only affects RT tasks that their uclamp_se->user_defined == false.
	941	+ *
	942	+ * This knob will not override the system default sched_util_clamp_min defined
	943	+ * above.
	944	+ */
	945	+unsigned int sysctl_sched_uclamp_util_min_rt_default = SCHED_CAPACITY_SCALE;
	946	+
753	947	/* All clamps are required to be less or equal than these values */
754	948	static struct uclamp_se uclamp_default[UCLAMP_CNT];
	949	+
	950	+/*
	951	+ * This static key is used to reduce the uclamp overhead in the fast path. It
	952	+ * primarily disables the call to uclamp_rq_{inc, dec}() in
	953	+ * enqueue/dequeue_task().
	954	+ *
	955	+ * This allows users to continue to enable uclamp in their kernel config with
	956	+ * minimum uclamp overhead in the fast path.
	957	+ *
	958	+ * As soon as userspace modifies any of the uclamp knobs, the static key is
	959	+ * enabled, since we have an actual users that make use of uclamp
	960	+ * functionality.
	961	+ *
	962	+ * The knobs that would enable this static key are:
	963	+ *
	964	+ * * A task modifying its uclamp value with sched_setattr().
	965	+ * * An admin modifying the sysctl_sched_uclamp_{min, max} via procfs.
	966	+ * * An admin modifying the cgroup cpu.uclamp.{min, max}
	967	+ */
	968	+DEFINE_STATIC_KEY_FALSE(sched_uclamp_used);
	969	+EXPORT_SYMBOL_GPL(sched_uclamp_used);
755	970
756	971	/* Integer rounded range for each bucket */
757	972	#define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS)
..	..	@@ -762,11 +977,6 @@
762	977	static inline unsigned int uclamp_bucket_id(unsigned int clamp_value)
763	978	{
764	979	return min_t(unsigned int, clamp_value / UCLAMP_BUCKET_DELTA, UCLAMP_BUCKETS - 1);
765		-}
766		-
767		-static inline unsigned int uclamp_bucket_base_value(unsigned int clamp_value)
768		-{
769		- return UCLAMP_BUCKET_DELTA * uclamp_bucket_id(clamp_value);
770	980	}
771	981
772	982	static inline unsigned int uclamp_none(enum uclamp_id clamp_id)
..	..	@@ -808,7 +1018,7 @@
808	1018	if (!(rq->uclamp_flags & UCLAMP_FLAG_IDLE))
809	1019	return;
810	1020
811		- WRITE_ONCE(rq->uclamp[clamp_id].value, clamp_value);
	1021	+ uclamp_rq_set(rq, clamp_id, clamp_value);
812	1022	}
813	1023
814	1024	static inline
..	..	@@ -832,12 +1042,79 @@
832	1042	return uclamp_idle_value(rq, clamp_id, clamp_value);
833	1043	}
834	1044
	1045	+static void __uclamp_update_util_min_rt_default(struct task_struct *p)
	1046	+{
	1047	+ unsigned int default_util_min;
	1048	+ struct uclamp_se *uc_se;
	1049	+
	1050	+ lockdep_assert_held(&p->pi_lock);
	1051	+
	1052	+ uc_se = &p->uclamp_req[UCLAMP_MIN];
	1053	+
	1054	+ /* Only sync if user didn't override the default */
	1055	+ if (uc_se->user_defined)
	1056	+ return;
	1057	+
	1058	+ default_util_min = sysctl_sched_uclamp_util_min_rt_default;
	1059	+ uclamp_se_set(uc_se, default_util_min, false);
	1060	+}
	1061	+
	1062	+static void uclamp_update_util_min_rt_default(struct task_struct *p)
	1063	+{
	1064	+ struct rq_flags rf;
	1065	+ struct rq *rq;
	1066	+
	1067	+ if (!rt_task(p))
	1068	+ return;
	1069	+
	1070	+ /* Protect updates to p->uclamp_* */
	1071	+ rq = task_rq_lock(p, &rf);
	1072	+ __uclamp_update_util_min_rt_default(p);
	1073	+ task_rq_unlock(rq, p, &rf);
	1074	+}
	1075	+
	1076	+static void uclamp_sync_util_min_rt_default(void)
	1077	+{
	1078	+ struct task_struct g, p;
	1079	+
	1080	+ /*
	1081	+ * copy_process() sysctl_uclamp
	1082	+ * uclamp_min_rt = X;
	1083	+ * write_lock(&tasklist_lock) read_lock(&tasklist_lock)
	1084	+ * // link thread smp_mb__after_spinlock()
	1085	+ * write_unlock(&tasklist_lock) read_unlock(&tasklist_lock);
	1086	+ * sched_post_fork() for_each_process_thread()
	1087	+ * __uclamp_sync_rt() __uclamp_sync_rt()
	1088	+ *
	1089	+ * Ensures that either sched_post_fork() will observe the new
	1090	+ * uclamp_min_rt or for_each_process_thread() will observe the new
	1091	+ * task.
	1092	+ */
	1093	+ read_lock(&tasklist_lock);
	1094	+ smp_mb__after_spinlock();
	1095	+ read_unlock(&tasklist_lock);
	1096	+
	1097	+ rcu_read_lock();
	1098	+ for_each_process_thread(g, p)
	1099	+ uclamp_update_util_min_rt_default(p);
	1100	+ rcu_read_unlock();
	1101	+}
	1102	+
	1103	+#if IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE)
	1104	+void rockchip_perf_uclamp_sync_util_min_rt_default(void)
	1105	+{
	1106	+ uclamp_sync_util_min_rt_default();
	1107	+}
	1108	+EXPORT_SYMBOL(rockchip_perf_uclamp_sync_util_min_rt_default);
	1109	+#endif
	1110	+
835	1111	static inline struct uclamp_se
836	1112	uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id)
837	1113	{
	1114	+ /* Copy by value as we could modify it */
838	1115	struct uclamp_se uc_req = p->uclamp_req[clamp_id];
839	1116	#ifdef CONFIG_UCLAMP_TASK_GROUP
840		- struct uclamp_se uc_max;
	1117	+ unsigned int tg_min, tg_max, value;
841	1118
842	1119	/*
843	1120	* Tasks in autogroups or root task group will be
..	..	@@ -848,9 +1125,11 @@
848	1125	if (task_group(p) == &root_task_group)
849	1126	return uc_req;
850	1127
851		- uc_max = task_group(p)->uclamp[clamp_id];
852		- if (uc_req.value > uc_max.value \|\| !uc_req.user_defined)
853		- return uc_max;
	1128	+ tg_min = task_group(p)->uclamp[UCLAMP_MIN].value;
	1129	+ tg_max = task_group(p)->uclamp[UCLAMP_MAX].value;
	1130	+ value = uc_req.value;
	1131	+ value = clamp(value, tg_min, tg_max);
	1132	+ uclamp_se_set(&uc_req, value, false);
854	1133	#endif
855	1134
856	1135	return uc_req;
..	..	@@ -869,6 +1148,12 @@
869	1148	{
870	1149	struct uclamp_se uc_req = uclamp_tg_restrict(p, clamp_id);
871	1150	struct uclamp_se uc_max = uclamp_default[clamp_id];
	1151	+ struct uclamp_se uc_eff;
	1152	+ int ret = 0;
	1153	+
	1154	+ trace_android_rvh_uclamp_eff_get(p, clamp_id, &uc_max, &uc_eff, &ret);
	1155	+ if (ret)
	1156	+ return uc_eff;
872	1157
873	1158	/* System default restrictions always apply */
874	1159	if (unlikely(uc_req.value > uc_max.value))
..	..	@@ -889,6 +1174,7 @@
889	1174
890	1175	return (unsigned long)uc_eff.value;
891	1176	}
	1177	+EXPORT_SYMBOL_GPL(uclamp_eff_value);
892	1178
893	1179	/*
894	1180	* When a task is enqueued on a rq, the clamp bucket currently defined by the
..	..	@@ -925,8 +1211,8 @@
925	1211	if (bucket->tasks == 1 \|\| uc_se->value > bucket->value)
926	1212	bucket->value = uc_se->value;
927	1213
928		- if (uc_se->value > READ_ONCE(uc_rq->value))
929		- WRITE_ONCE(uc_rq->value, uc_se->value);
	1214	+ if (uc_se->value > uclamp_rq_get(rq, clamp_id))
	1215	+ uclamp_rq_set(rq, clamp_id, uc_se->value);
930	1216	}
931	1217
932	1218	/*
..	..	@@ -949,10 +1235,38 @@
949	1235
950	1236	lockdep_assert_held(&rq->lock);
951	1237
	1238	+ /*
	1239	+ * If sched_uclamp_used was enabled after task @p was enqueued,
	1240	+ * we could end up with unbalanced call to uclamp_rq_dec_id().
	1241	+ *
	1242	+ * In this case the uc_se->active flag should be false since no uclamp
	1243	+ * accounting was performed at enqueue time and we can just return
	1244	+ * here.
	1245	+ *
	1246	+ * Need to be careful of the following enqeueue/dequeue ordering
	1247	+ * problem too
	1248	+ *
	1249	+ * enqueue(taskA)
	1250	+ * // sched_uclamp_used gets enabled
	1251	+ * enqueue(taskB)
	1252	+ * dequeue(taskA)
	1253	+ * // Must not decrement bukcet->tasks here
	1254	+ * dequeue(taskB)
	1255	+ *
	1256	+ * where we could end up with stale data in uc_se and
	1257	+ * bucket[uc_se->bucket_id].
	1258	+ *
	1259	+ * The following check here eliminates the possibility of such race.
	1260	+ */
	1261	+ if (unlikely(!uc_se->active))
	1262	+ return;
	1263	+
952	1264	bucket = &uc_rq->bucket[uc_se->bucket_id];
	1265	+
953	1266	SCHED_WARN_ON(!bucket->tasks);
954	1267	if (likely(bucket->tasks))
955	1268	bucket->tasks--;
	1269	+
956	1270	uc_se->active = false;
957	1271
958	1272	/*
..	..	@@ -964,7 +1278,7 @@
964	1278	if (likely(bucket->tasks))
965	1279	return;
966	1280
967		- rq_clamp = READ_ONCE(uc_rq->value);
	1281	+ rq_clamp = uclamp_rq_get(rq, clamp_id);
968	1282	/*
969	1283	* Defensive programming: this should never happen. If it happens,
970	1284	* e.g. due to future modification, warn and fixup the expected value.
..	..	@@ -972,13 +1286,22 @@
972	1286	SCHED_WARN_ON(bucket->value > rq_clamp);
973	1287	if (bucket->value >= rq_clamp) {
974	1288	bkt_clamp = uclamp_rq_max_value(rq, clamp_id, uc_se->value);
975		- WRITE_ONCE(uc_rq->value, bkt_clamp);
	1289	+ uclamp_rq_set(rq, clamp_id, bkt_clamp);
976	1290	}
977	1291	}
978	1292
979	1293	static inline void uclamp_rq_inc(struct rq rq, struct task_struct p)
980	1294	{
981	1295	enum uclamp_id clamp_id;
	1296	+
	1297	+ /*
	1298	+ * Avoid any overhead until uclamp is actually used by the userspace.
	1299	+ *
	1300	+ * The condition is constructed such that a NOP is generated when
	1301	+ * sched_uclamp_used is disabled.
	1302	+ */
	1303	+ if (!static_branch_unlikely(&sched_uclamp_used))
	1304	+ return;
982	1305
983	1306	if (unlikely(!p->sched_class->uclamp_enabled))
984	1307	return;
..	..	@@ -995,6 +1318,15 @@
995	1318	{
996	1319	enum uclamp_id clamp_id;
997	1320
	1321	+ /*
	1322	+ * Avoid any overhead until uclamp is actually used by the userspace.
	1323	+ *
	1324	+ * The condition is constructed such that a NOP is generated when
	1325	+ * sched_uclamp_used is disabled.
	1326	+ */
	1327	+ if (!static_branch_unlikely(&sched_uclamp_used))
	1328	+ return;
	1329	+
998	1330	if (unlikely(!p->sched_class->uclamp_enabled))
999	1331	return;
1000	1332
..	..	@@ -1002,9 +1334,27 @@
1002	1334	uclamp_rq_dec_id(rq, p, clamp_id);
1003	1335	}
1004	1336
1005		-static inline void
1006		-uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
	1337	+static inline void uclamp_rq_reinc_id(struct rq rq, struct task_struct p,
	1338	+ enum uclamp_id clamp_id)
1007	1339	{
	1340	+ if (!p->uclamp[clamp_id].active)
	1341	+ return;
	1342	+
	1343	+ uclamp_rq_dec_id(rq, p, clamp_id);
	1344	+ uclamp_rq_inc_id(rq, p, clamp_id);
	1345	+
	1346	+ /*
	1347	+ * Make sure to clear the idle flag if we've transiently reached 0
	1348	+ * active tasks on rq.
	1349	+ */
	1350	+ if (clamp_id == UCLAMP_MAX && (rq->uclamp_flags & UCLAMP_FLAG_IDLE))
	1351	+ rq->uclamp_flags &= ~UCLAMP_FLAG_IDLE;
	1352	+}
	1353	+
	1354	+static inline void
	1355	+uclamp_update_active(struct task_struct *p)
	1356	+{
	1357	+ enum uclamp_id clamp_id;
1008	1358	struct rq_flags rf;
1009	1359	struct rq *rq;
1010	1360
..	..	@@ -1024,30 +1374,22 @@
1024	1374	* affecting a valid clamp bucket, the next time it's enqueued,
1025	1375	* it will already see the updated clamp bucket value.
1026	1376	*/
1027		- if (p->uclamp[clamp_id].active) {
1028		- uclamp_rq_dec_id(rq, p, clamp_id);
1029		- uclamp_rq_inc_id(rq, p, clamp_id);
1030		- }
	1377	+ for_each_clamp_id(clamp_id)
	1378	+ uclamp_rq_reinc_id(rq, p, clamp_id);
1031	1379
1032	1380	task_rq_unlock(rq, p, &rf);
1033	1381	}
1034	1382
1035	1383	#ifdef CONFIG_UCLAMP_TASK_GROUP
1036	1384	static inline void
1037		-uclamp_update_active_tasks(struct cgroup_subsys_state *css,
1038		- unsigned int clamps)
	1385	+uclamp_update_active_tasks(struct cgroup_subsys_state *css)
1039	1386	{
1040		- enum uclamp_id clamp_id;
1041	1387	struct css_task_iter it;
1042	1388	struct task_struct *p;
1043	1389
1044	1390	css_task_iter_start(css, 0, &it);
1045		- while ((p = css_task_iter_next(&it))) {
1046		- for_each_clamp_id(clamp_id) {
1047		- if ((0x1 << clamp_id) & clamps)
1048		- uclamp_update_active(p, clamp_id);
1049		- }
1050		- }
	1391	+ while ((p = css_task_iter_next(&it)))
	1392	+ uclamp_update_active(p);
1051	1393	css_task_iter_end(&it);
1052	1394	}
1053	1395
..	..	@@ -1070,16 +1412,16 @@
1070	1412	#endif
1071	1413
1072	1414	int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
1073		- void __user buffer, size_t lenp,
1074		- loff_t *ppos)
	1415	+ void buffer, size_t lenp, loff_t *ppos)
1075	1416	{
1076	1417	bool update_root_tg = false;
1077		- int old_min, old_max;
	1418	+ int old_min, old_max, old_min_rt;
1078	1419	int result;
1079	1420
1080	1421	mutex_lock(&uclamp_mutex);
1081	1422	old_min = sysctl_sched_uclamp_util_min;
1082	1423	old_max = sysctl_sched_uclamp_util_max;
	1424	+ old_min_rt = sysctl_sched_uclamp_util_min_rt_default;
1083	1425
1084	1426	result = proc_dointvec(table, write, buffer, lenp, ppos);
1085	1427	if (result)
..	..	@@ -1088,7 +1430,9 @@
1088	1430	goto done;
1089	1431
1090	1432	if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max \|\|
1091		- sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE) {
	1433	+ sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE \|\|
	1434	+ sysctl_sched_uclamp_util_min_rt_default > SCHED_CAPACITY_SCALE) {
	1435	+
1092	1436	result = -EINVAL;
1093	1437	goto undo;
1094	1438	}
..	..	@@ -1104,8 +1448,15 @@
1104	1448	update_root_tg = true;
1105	1449	}
1106	1450
1107		- if (update_root_tg)
	1451	+ if (update_root_tg) {
	1452	+ static_branch_enable(&sched_uclamp_used);
1108	1453	uclamp_update_root_tg();
	1454	+ }
	1455	+
	1456	+ if (old_min_rt != sysctl_sched_uclamp_util_min_rt_default) {
	1457	+ static_branch_enable(&sched_uclamp_used);
	1458	+ uclamp_sync_util_min_rt_default();
	1459	+ }
1109	1460
1110	1461	/*
1111	1462	* We update all RUNNABLE tasks only when task groups are in use.
..	..	@@ -1118,6 +1469,7 @@
1118	1469	undo:
1119	1470	sysctl_sched_uclamp_util_min = old_min;
1120	1471	sysctl_sched_uclamp_util_max = old_max;
	1472	+ sysctl_sched_uclamp_util_min_rt_default = old_min_rt;
1121	1473	done:
1122	1474	mutex_unlock(&uclamp_mutex);
1123	1475
..	..	@@ -1127,20 +1479,61 @@
1127	1479	static int uclamp_validate(struct task_struct *p,
1128	1480	const struct sched_attr *attr)
1129	1481	{
1130		- unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value;
1131		- unsigned int upper_bound = p->uclamp_req[UCLAMP_MAX].value;
	1482	+ int util_min = p->uclamp_req[UCLAMP_MIN].value;
	1483	+ int util_max = p->uclamp_req[UCLAMP_MAX].value;
1132	1484
1133		- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN)
1134		- lower_bound = attr->sched_util_min;
1135		- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX)
1136		- upper_bound = attr->sched_util_max;
	1485	+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
	1486	+ util_min = attr->sched_util_min;
1137	1487
1138		- if (lower_bound > upper_bound)
	1488	+ if (util_min + 1 > SCHED_CAPACITY_SCALE + 1)
	1489	+ return -EINVAL;
	1490	+ }
	1491	+
	1492	+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
	1493	+ util_max = attr->sched_util_max;
	1494	+
	1495	+ if (util_max + 1 > SCHED_CAPACITY_SCALE + 1)
	1496	+ return -EINVAL;
	1497	+ }
	1498	+
	1499	+ if (util_min != -1 && util_max != -1 && util_min > util_max)
1139	1500	return -EINVAL;
1140		- if (upper_bound > SCHED_CAPACITY_SCALE)
1141		- return -EINVAL;
	1501	+
	1502	+ /*
	1503	+ * We have valid uclamp attributes; make sure uclamp is enabled.
	1504	+ *
	1505	+ * We need to do that here, because enabling static branches is a
	1506	+ * blocking operation which obviously cannot be done while holding
	1507	+ * scheduler locks.
	1508	+ */
	1509	+ static_branch_enable(&sched_uclamp_used);
1142	1510
1143	1511	return 0;
	1512	+}
	1513	+
	1514	+static bool uclamp_reset(const struct sched_attr *attr,
	1515	+ enum uclamp_id clamp_id,
	1516	+ struct uclamp_se *uc_se)
	1517	+{
	1518	+ /* Reset on sched class change for a non user-defined clamp value. */
	1519	+ if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)) &&
	1520	+ !uc_se->user_defined)
	1521	+ return true;
	1522	+
	1523	+ /* Reset on sched_util_{min,max} == -1. */
	1524	+ if (clamp_id == UCLAMP_MIN &&
	1525	+ attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN &&
	1526	+ attr->sched_util_min == -1) {
	1527	+ return true;
	1528	+ }
	1529	+
	1530	+ if (clamp_id == UCLAMP_MAX &&
	1531	+ attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX &&
	1532	+ attr->sched_util_max == -1) {
	1533	+ return true;
	1534	+ }
	1535	+
	1536	+ return false;
1144	1537	}
1145	1538
1146	1539	static void __setscheduler_uclamp(struct task_struct *p,
..	..	@@ -1148,40 +1541,41 @@
1148	1541	{
1149	1542	enum uclamp_id clamp_id;
1150	1543
1151		- /*
1152		- * On scheduling class change, reset to default clamps for tasks
1153		- * without a task-specific value.
1154		- */
1155	1544	for_each_clamp_id(clamp_id) {
1156	1545	struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
1157		- unsigned int clamp_value = uclamp_none(clamp_id);
	1546	+ unsigned int value;
1158	1547
1159		- /* Keep using defined clamps across class changes */
1160		- if (uc_se->user_defined)
	1548	+ if (!uclamp_reset(attr, clamp_id, uc_se))
1161	1549	continue;
1162	1550
1163		- /* By default, RT tasks always get 100% boost */
1164		- if (sched_feat(SUGOV_RT_MAX_FREQ) &&
1165		- unlikely(rt_task(p) &&
1166		- clamp_id == UCLAMP_MIN)) {
	1551	+ /*
	1552	+ * RT by default have a 100% boost value that could be modified
	1553	+ * at runtime.
	1554	+ */
	1555	+ if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
	1556	+ value = sysctl_sched_uclamp_util_min_rt_default;
	1557	+ else
	1558	+ value = uclamp_none(clamp_id);
1167	1559
1168		- clamp_value = uclamp_none(UCLAMP_MAX);
1169		- }
	1560	+ uclamp_se_set(uc_se, value, false);
1170	1561
1171		- uclamp_se_set(uc_se, clamp_value, false);
1172	1562	}
1173	1563
1174	1564	if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
1175	1565	return;
1176	1566
1177		- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
	1567	+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN &&
	1568	+ attr->sched_util_min != -1) {
1178	1569	uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
1179	1570	attr->sched_util_min, true);
	1571	+ trace_android_vh_setscheduler_uclamp(p, UCLAMP_MIN, attr->sched_util_min);
1180	1572	}
1181	1573
1182		- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
	1574	+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX &&
	1575	+ attr->sched_util_max != -1) {
1183	1576	uclamp_se_set(&p->uclamp_req[UCLAMP_MAX],
1184	1577	attr->sched_util_max, true);
	1578	+ trace_android_vh_setscheduler_uclamp(p, UCLAMP_MAX, attr->sched_util_max);
1185	1579	}
1186	1580	}
1187	1581
..	..	@@ -1189,6 +1583,10 @@
1189	1583	{
1190	1584	enum uclamp_id clamp_id;
1191	1585
	1586	+ /*
	1587	+ * We don't need to hold task_rq_lock() when updating p->uclamp_* here
	1588	+ * as the task is still at its early fork stages.
	1589	+ */
1192	1590	for_each_clamp_id(clamp_id)
1193	1591	p->uclamp[clamp_id].active = false;
1194	1592
..	..	@@ -1201,39 +1599,24 @@
1201	1599	}
1202	1600	}
1203	1601
1204		-#ifdef CONFIG_SMP
1205		-unsigned int uclamp_task(struct task_struct *p)
	1602	+static void uclamp_post_fork(struct task_struct *p)
1206	1603	{
1207		- unsigned long util;
1208		-
1209		- util = task_util_est(p);
1210		- util = max(util, uclamp_eff_value(p, UCLAMP_MIN));
1211		- util = min(util, uclamp_eff_value(p, UCLAMP_MAX));
1212		-
1213		- return util;
	1604	+ uclamp_update_util_min_rt_default(p);
1214	1605	}
1215	1606
1216		-bool uclamp_boosted(struct task_struct *p)
	1607	+static void __init init_uclamp_rq(struct rq *rq)
1217	1608	{
1218		- return uclamp_eff_value(p, UCLAMP_MIN) > 0;
	1609	+ enum uclamp_id clamp_id;
	1610	+ struct uclamp_rq *uc_rq = rq->uclamp;
	1611	+
	1612	+ for_each_clamp_id(clamp_id) {
	1613	+ uc_rq[clamp_id] = (struct uclamp_rq) {
	1614	+ .value = uclamp_none(clamp_id)
	1615	+ };
	1616	+ }
	1617	+
	1618	+ rq->uclamp_flags = UCLAMP_FLAG_IDLE;
1219	1619	}
1220		-
1221		-bool uclamp_latency_sensitive(struct task_struct *p)
1222		-{
1223		-#ifdef CONFIG_UCLAMP_TASK_GROUP
1224		- struct cgroup_subsys_state *css = task_css(p, cpu_cgrp_id);
1225		- struct task_group *tg;
1226		-
1227		- if (!css)
1228		- return false;
1229		- tg = container_of(css, struct task_group, css);
1230		-
1231		- return tg->latency_sensitive;
1232		-#else
1233		- return false;
1234		-#endif
1235		-}
1236		-#endif /* CONFIG_SMP */
1237	1620
1238	1621	static void __init init_uclamp(void)
1239	1622	{
..	..	@@ -1241,13 +1624,8 @@
1241	1624	enum uclamp_id clamp_id;
1242	1625	int cpu;
1243	1626
1244		- mutex_init(&uclamp_mutex);
1245		-
1246		- for_each_possible_cpu(cpu) {
1247		- memset(&cpu_rq(cpu)->uclamp, 0,
1248		- sizeof(struct uclamp_rq)*UCLAMP_CNT);
1249		- cpu_rq(cpu)->uclamp_flags = 0;
1250		- }
	1627	+ for_each_possible_cpu(cpu)
	1628	+ init_uclamp_rq(cpu_rq(cpu));
1251	1629
1252	1630	for_each_clamp_id(clamp_id) {
1253	1631	uclamp_se_set(&init_task.uclamp_req[clamp_id],
..	..	@@ -1276,41 +1654,7 @@
1276	1654	static void __setscheduler_uclamp(struct task_struct *p,
1277	1655	const struct sched_attr *attr) { }
1278	1656	static inline void uclamp_fork(struct task_struct *p) { }
1279		-
1280		-long schedtune_task_margin(struct task_struct *task);
1281		-
1282		-#ifdef CONFIG_SMP
1283		-unsigned int uclamp_task(struct task_struct *p)
1284		-{
1285		- unsigned long util = task_util_est(p);
1286		-#ifdef CONFIG_SCHED_TUNE
1287		- long margin = schedtune_task_margin(p);
1288		-
1289		- trace_sched_boost_task(p, util, margin);
1290		-
1291		- util += margin;
1292		-#endif
1293		-
1294		- return util;
1295		-}
1296		-
1297		-bool uclamp_boosted(struct task_struct *p)
1298		-{
1299		-#ifdef CONFIG_SCHED_TUNE
1300		- return schedtune_task_boost(p) > 0;
1301		-#endif
1302		- return false;
1303		-}
1304		-
1305		-bool uclamp_latency_sensitive(struct task_struct *p)
1306		-{
1307		-#ifdef CONFIG_SCHED_TUNE
1308		- return schedtune_prefer_idle(p) != 0;
1309		-#endif
1310		- return false;
1311		-}
1312		-#endif /* CONFIG_SMP */
1313		-
	1657	+static inline void uclamp_post_fork(struct task_struct *p) { }
1314	1658	static inline void init_uclamp(void) { }
1315	1659	#endif /* CONFIG_UCLAMP_TASK */
1316	1660
..	..	@@ -1325,7 +1669,9 @@
1325	1669	}
1326	1670
1327	1671	uclamp_rq_inc(rq, p);
	1672	+ trace_android_rvh_enqueue_task(rq, p, flags);
1328	1673	p->sched_class->enqueue_task(rq, p, flags);
	1674	+ trace_android_rvh_after_enqueue_task(rq, p);
1329	1675	}
1330	1676
1331	1677	static inline void dequeue_task(struct rq rq, struct task_struct p, int flags)
..	..	@@ -1339,31 +1685,42 @@
1339	1685	}
1340	1686
1341	1687	uclamp_rq_dec(rq, p);
	1688	+ trace_android_rvh_dequeue_task(rq, p, flags);
1342	1689	p->sched_class->dequeue_task(rq, p, flags);
	1690	+ trace_android_rvh_after_dequeue_task(rq, p);
1343	1691	}
1344	1692
1345	1693	void activate_task(struct rq rq, struct task_struct p, int flags)
1346	1694	{
1347		- if (task_contributes_to_load(p))
1348		- rq->nr_uninterruptible--;
	1695	+ if (task_on_rq_migrating(p))
	1696	+ flags \|= ENQUEUE_MIGRATED;
1349	1697
1350	1698	enqueue_task(rq, p, flags);
	1699	+
	1700	+ p->on_rq = TASK_ON_RQ_QUEUED;
1351	1701	}
	1702	+EXPORT_SYMBOL_GPL(activate_task);
1352	1703
1353	1704	void deactivate_task(struct rq rq, struct task_struct p, int flags)
1354	1705	{
1355		- if (task_contributes_to_load(p))
1356		- rq->nr_uninterruptible++;
	1706	+ p->on_rq = (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING;
1357	1707
1358	1708	dequeue_task(rq, p, flags);
1359	1709	}
	1710	+EXPORT_SYMBOL_GPL(deactivate_task);
1360	1711
1361		-/*
1362		- * __normal_prio - return the priority that is based on the static prio
1363		- */
1364		-static inline int __normal_prio(struct task_struct *p)
	1712	+static inline int __normal_prio(int policy, int rt_prio, int nice)
1365	1713	{
1366		- return p->static_prio;
	1714	+ int prio;
	1715	+
	1716	+ if (dl_policy(policy))
	1717	+ prio = MAX_DL_PRIO - 1;
	1718	+ else if (rt_policy(policy))
	1719	+ prio = MAX_RT_PRIO - 1 - rt_prio;
	1720	+ else
	1721	+ prio = NICE_TO_PRIO(nice);
	1722	+
	1723	+ return prio;
1367	1724	}
1368	1725
1369	1726	/*
..	..	@@ -1375,15 +1732,7 @@
1375	1732	*/
1376	1733	static inline int normal_prio(struct task_struct *p)
1377	1734	{
1378		- int prio;
1379		-
1380		- if (task_has_dl_policy(p))
1381		- prio = MAX_DL_PRIO-1;
1382		- else if (task_has_rt_policy(p))
1383		- prio = MAX_RT_PRIO-1 - p->rt_priority;
1384		- else
1385		- prio = __normal_prio(p);
1386		- return prio;
	1735	+ return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio));
1387	1736	}
1388	1737
1389	1738	/*
..	..	@@ -1439,20 +1788,10 @@
1439	1788
1440	1789	void check_preempt_curr(struct rq rq, struct task_struct p, int flags)
1441	1790	{
1442		- const struct sched_class *class;
1443		-
1444		- if (p->sched_class == rq->curr->sched_class) {
	1791	+ if (p->sched_class == rq->curr->sched_class)
1445	1792	rq->curr->sched_class->check_preempt_curr(rq, p, flags);
1446		- } else {
1447		- for_each_class(class) {
1448		- if (class == rq->curr->sched_class)
1449		- break;
1450		- if (class == p->sched_class) {
1451		- resched_curr(rq);
1452		- break;
1453		- }
1454		- }
1455		- }
	1793	+ else if (p->sched_class > rq->curr->sched_class)
	1794	+ resched_curr(rq);
1456	1795
1457	1796	/*
1458	1797	* A queue event has occurred, and we're going to schedule. In
..	..	@@ -1461,33 +1800,26 @@
1461	1800	if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
1462	1801	rq_clock_skip_update(rq);
1463	1802	}
	1803	+EXPORT_SYMBOL_GPL(check_preempt_curr);
1464	1804
1465	1805	#ifdef CONFIG_SMP
1466	1806
1467		-static inline bool is_per_cpu_kthread(struct task_struct *p)
1468		-{
1469		- if (!(p->flags & PF_KTHREAD))
1470		- return false;
1471		-
1472		- if (p->nr_cpus_allowed != 1)
1473		- return false;
1474		-
1475		- return true;
1476		-}
1477		-
1478	1807	/*
1479		- * Per-CPU kthreads are allowed to run on !actie && online CPUs, see
	1808	+ * Per-CPU kthreads are allowed to run on !active && online CPUs, see
1480	1809	* __set_cpus_allowed_ptr() and select_fallback_rq().
1481	1810	*/
1482	1811	static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
1483	1812	{
1484		- if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
	1813	+ if (!cpumask_test_cpu(cpu, p->cpus_ptr))
1485	1814	return false;
1486	1815
1487	1816	if (is_per_cpu_kthread(p))
1488	1817	return cpu_online(cpu);
1489	1818
1490		- return cpu_active(cpu);
	1819	+ if (!cpu_active(cpu))
	1820	+ return false;
	1821	+
	1822	+ return cpumask_test_cpu(cpu, task_cpu_possible_mask(p));
1491	1823	}
1492	1824
1493	1825	/*
..	..	@@ -1512,19 +1844,29 @@
1512	1844	static struct rq move_queued_task(struct rq rq, struct rq_flags *rf,
1513	1845	struct task_struct *p, int new_cpu)
1514	1846	{
	1847	+ int detached = 0;
	1848	+
1515	1849	lockdep_assert_held(&rq->lock);
1516	1850
1517		- WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
1518		- dequeue_task(rq, p, DEQUEUE_NOCLOCK);
1519		- set_task_cpu(p, new_cpu);
1520		- rq_unlock(rq, rf);
	1851	+ /*
	1852	+ * The vendor hook may drop the lock temporarily, so
	1853	+ * pass the rq flags to unpin lock. We expect the
	1854	+ * rq lock to be held after return.
	1855	+ */
	1856	+ trace_android_rvh_migrate_queued_task(rq, rf, p, new_cpu, &detached);
	1857	+ if (detached)
	1858	+ goto attach;
1521	1859
	1860	+ deactivate_task(rq, p, DEQUEUE_NOCLOCK);
	1861	+ set_task_cpu(p, new_cpu);
	1862	+
	1863	+attach:
	1864	+ rq_unlock(rq, rf);
1522	1865	rq = cpu_rq(new_cpu);
1523	1866
1524	1867	rq_lock(rq, rf);
1525	1868	BUG_ON(task_cpu(p) != new_cpu);
1526		- enqueue_task(rq, p, 0);
1527		- p->on_rq = TASK_ON_RQ_QUEUED;
	1869	+ activate_task(rq, p, 0);
1528	1870	check_preempt_curr(rq, p, 0);
1529	1871
1530	1872	return rq;
..	..	@@ -1576,10 +1918,10 @@
1576	1918	local_irq_disable();
1577	1919	/*
1578	1920	* We need to explicitly wake pending tasks before running
1579		- * __migrate_task() such that we will not miss enforcing cpus_allowed
	1921	+ * __migrate_task() such that we will not miss enforcing cpus_ptr
1580	1922	* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
1581	1923	*/
1582		- sched_ttwu_pending();
	1924	+ flush_smp_call_function_from_idle();
1583	1925
1584	1926	raw_spin_lock(&p->pi_lock);
1585	1927	rq_lock(rq, &rf);
..	..	@@ -1607,8 +1949,9 @@
1607	1949	*/
1608	1950	void set_cpus_allowed_common(struct task_struct p, const struct cpumask new_mask)
1609	1951	{
1610		- cpumask_copy(&p->cpus_allowed, new_mask);
	1952	+ cpumask_copy(&p->cpus_mask, new_mask);
1611	1953	p->nr_cpus_allowed = cpumask_weight(new_mask);
	1954	+ trace_android_rvh_set_cpus_allowed_comm(p, new_mask);
1612	1955	}
1613	1956
1614	1957	void do_set_cpus_allowed(struct task_struct p, const struct cpumask new_mask)
..	..	@@ -1637,28 +1980,23 @@
1637	1980	if (queued)
1638	1981	enqueue_task(rq, p, ENQUEUE_RESTORE \| ENQUEUE_NOCLOCK);
1639	1982	if (running)
1640		- set_curr_task(rq, p);
	1983	+ set_next_task(rq, p);
1641	1984	}
1642	1985
1643	1986	/*
1644		- * Change a given task's CPU affinity. Migrate the thread to a
1645		- * proper CPU and schedule it away if the CPU it's executing on
1646		- * is removed from the allowed bitmask.
1647		- *
1648		- * NOTE: the caller must have a valid reference to the task, the
1649		- * task must not exit() & deallocate itself prematurely. The
1650		- * call is not atomic; no spinlocks may be held.
	1987	+ * Called with both p->pi_lock and rq->lock held; drops both before returning.
1651	1988	*/
1652		-static int __set_cpus_allowed_ptr(struct task_struct *p,
1653		- const struct cpumask *new_mask, bool check)
	1989	+static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
	1990	+ const struct cpumask *new_mask,
	1991	+ bool check,
	1992	+ struct rq *rq,
	1993	+ struct rq_flags *rf)
1654	1994	{
1655	1995	const struct cpumask *cpu_valid_mask = cpu_active_mask;
	1996	+ const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p);
1656	1997	unsigned int dest_cpu;
1657		- struct rq_flags rf;
1658		- struct rq *rq;
1659	1998	int ret = 0;
1660	1999
1661		- rq = task_rq_lock(p, &rf);
1662	2000	update_rq_clock(rq);
1663	2001
1664	2002	if (p->flags & PF_KTHREAD) {
..	..	@@ -1666,6 +2004,9 @@
1666	2004	* Kernel threads are allowed on online && !active CPUs
1667	2005	*/
1668	2006	cpu_valid_mask = cpu_online_mask;
	2007	+ } else if (!cpumask_subset(new_mask, cpu_allowed_mask)) {
	2008	+ ret = -EINVAL;
	2009	+ goto out;
1669	2010	}
1670	2011
1671	2012	/*
..	..	@@ -1677,10 +2018,15 @@
1677	2018	goto out;
1678	2019	}
1679	2020
1680		- if (cpumask_equal(&p->cpus_allowed, new_mask))
	2021	+ if (cpumask_equal(&p->cpus_mask, new_mask))
1681	2022	goto out;
1682	2023
1683		- dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
	2024	+ /*
	2025	+ * Picking a ~random cpu helps in cases where we are changing affinity
	2026	+ * for groups of tasks (ie. cpuset), so that load balancing is not
	2027	+ * immediately required to distribute the tasks within their new mask.
	2028	+ */
	2029	+ dest_cpu = cpumask_any_and_distribute(cpu_valid_mask, new_mask);
1684	2030	if (dest_cpu >= nr_cpu_ids) {
1685	2031	ret = -EINVAL;
1686	2032	goto out;
..	..	@@ -1705,21 +2051,39 @@
1705	2051	if (task_running(rq, p) \|\| p->state == TASK_WAKING) {
1706	2052	struct migration_arg arg = { p, dest_cpu };
1707	2053	/* Need help from migration thread: drop lock and wait. */
1708		- task_rq_unlock(rq, p, &rf);
	2054	+ task_rq_unlock(rq, p, rf);
1709	2055	stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
1710		- tlb_migrate_finish(p->mm);
1711	2056	return 0;
1712	2057	} else if (task_on_rq_queued(p)) {
1713	2058	/*
1714	2059	* OK, since we're going to drop the lock immediately
1715	2060	* afterwards anyway.
1716	2061	*/
1717		- rq = move_queued_task(rq, &rf, p, dest_cpu);
	2062	+ rq = move_queued_task(rq, rf, p, dest_cpu);
1718	2063	}
1719	2064	out:
1720		- task_rq_unlock(rq, p, &rf);
	2065	+ task_rq_unlock(rq, p, rf);
1721	2066
1722	2067	return ret;
	2068	+}
	2069	+
	2070	+/*
	2071	+ * Change a given task's CPU affinity. Migrate the thread to a
	2072	+ * proper CPU and schedule it away if the CPU it's executing on
	2073	+ * is removed from the allowed bitmask.
	2074	+ *
	2075	+ * NOTE: the caller must have a valid reference to the task, the
	2076	+ * task must not exit() & deallocate itself prematurely. The
	2077	+ * call is not atomic; no spinlocks may be held.
	2078	+ */
	2079	+static int __set_cpus_allowed_ptr(struct task_struct *p,
	2080	+ const struct cpumask *new_mask, bool check)
	2081	+{
	2082	+ struct rq_flags rf;
	2083	+ struct rq *rq;
	2084	+
	2085	+ rq = task_rq_lock(p, &rf);
	2086	+ return __set_cpus_allowed_ptr_locked(p, new_mask, check, rq, &rf);
1723	2087	}
1724	2088
1725	2089	int set_cpus_allowed_ptr(struct task_struct p, const struct cpumask new_mask)
..	..	@@ -1727,6 +2091,74 @@
1727	2091	return __set_cpus_allowed_ptr(p, new_mask, false);
1728	2092	}
1729	2093	EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
	2094	+
	2095	+/*
	2096	+ * Change a given task's CPU affinity to the intersection of its current
	2097	+ * affinity mask and @subset_mask, writing the resulting mask to @new_mask.
	2098	+ * If the resulting mask is empty, leave the affinity unchanged and return
	2099	+ * -EINVAL.
	2100	+ */
	2101	+static int restrict_cpus_allowed_ptr(struct task_struct *p,
	2102	+ struct cpumask *new_mask,
	2103	+ const struct cpumask *subset_mask)
	2104	+{
	2105	+ struct rq_flags rf;
	2106	+ struct rq *rq;
	2107	+
	2108	+ rq = task_rq_lock(p, &rf);
	2109	+ if (!cpumask_and(new_mask, &p->cpus_mask, subset_mask)) {
	2110	+ task_rq_unlock(rq, p, &rf);
	2111	+ return -EINVAL;
	2112	+ }
	2113	+
	2114	+ return __set_cpus_allowed_ptr_locked(p, new_mask, false, rq, &rf);
	2115	+}
	2116	+
	2117	+/*
	2118	+ * Restrict a given task's CPU affinity so that it is a subset of
	2119	+ * task_cpu_possible_mask(). If the resulting mask is empty, we warn and
	2120	+ * walk up the cpuset hierarchy until we find a suitable mask.
	2121	+ */
	2122	+void force_compatible_cpus_allowed_ptr(struct task_struct *p)
	2123	+{
	2124	+ cpumask_var_t new_mask;
	2125	+ const struct cpumask *override_mask = task_cpu_possible_mask(p);
	2126	+
	2127	+ alloc_cpumask_var(&new_mask, GFP_KERNEL);
	2128	+
	2129	+ /*
	2130	+ * __migrate_task() can fail silently in the face of concurrent
	2131	+ * offlining of the chosen destination CPU, so take the hotplug
	2132	+ * lock to ensure that the migration succeeds.
	2133	+ */
	2134	+ trace_android_rvh_force_compatible_pre(NULL);
	2135	+ cpus_read_lock();
	2136	+ if (!cpumask_available(new_mask))
	2137	+ goto out_set_mask;
	2138	+
	2139	+ if (!restrict_cpus_allowed_ptr(p, new_mask, override_mask))
	2140	+ goto out_free_mask;
	2141	+
	2142	+ /*
	2143	+ * We failed to find a valid subset of the affinity mask for the
	2144	+ * task, so override it based on its cpuset hierarchy.
	2145	+ */
	2146	+ cpuset_cpus_allowed(p, new_mask);
	2147	+ override_mask = new_mask;
	2148	+
	2149	+out_set_mask:
	2150	+ if (printk_ratelimit()) {
	2151	+ printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n",
	2152	+ task_pid_nr(p), p->comm,
	2153	+ cpumask_pr_args(override_mask));
	2154	+ }
	2155	+
	2156	+ WARN_ON(set_cpus_allowed_ptr(p, override_mask));
	2157	+out_free_mask:
	2158	+ cpus_read_unlock();
	2159	+ trace_android_rvh_force_compatible_post(NULL);
	2160	+ free_cpumask_var(new_mask);
	2161	+}
1730	2162
1731	2163	void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
1732	2164	{
..	..	@@ -1775,12 +2207,13 @@
1775	2207	p->se.nr_migrations++;
1776	2208	rseq_migrate(p);
1777	2209	perf_event_task_migrate(p);
	2210	+ trace_android_rvh_set_task_cpu(p, new_cpu);
1778	2211	}
1779	2212
1780	2213	__set_task_cpu(p, new_cpu);
1781	2214	}
	2215	+EXPORT_SYMBOL_GPL(set_task_cpu);
1782	2216
1783		-#ifdef CONFIG_NUMA_BALANCING
1784	2217	static void __migrate_swap_task(struct task_struct *p, int cpu)
1785	2218	{
1786	2219	if (task_on_rq_queued(p)) {
..	..	@@ -1793,11 +2226,9 @@
1793	2226	rq_pin_lock(src_rq, &srf);
1794	2227	rq_pin_lock(dst_rq, &drf);
1795	2228
1796		- p->on_rq = TASK_ON_RQ_MIGRATING;
1797	2229	deactivate_task(src_rq, p, 0);
1798	2230	set_task_cpu(p, cpu);
1799	2231	activate_task(dst_rq, p, 0);
1800		- p->on_rq = TASK_ON_RQ_QUEUED;
1801	2232	check_preempt_curr(dst_rq, p, 0);
1802	2233
1803	2234	rq_unpin_lock(dst_rq, &drf);
..	..	@@ -1840,10 +2271,10 @@
1840	2271	if (task_cpu(arg->src_task) != arg->src_cpu)
1841	2272	goto unlock;
1842	2273
1843		- if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed))
	2274	+ if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr))
1844	2275	goto unlock;
1845	2276
1846		- if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed))
	2277	+ if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr))
1847	2278	goto unlock;
1848	2279
1849	2280	__migrate_swap_task(arg->src_task, arg->dst_cpu);
..	..	@@ -1885,10 +2316,10 @@
1885	2316	if (!cpu_active(arg.src_cpu) \|\| !cpu_active(arg.dst_cpu))
1886	2317	goto out;
1887	2318
1888		- if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed))
	2319	+ if (!cpumask_test_cpu(arg.dst_cpu, arg.src_task->cpus_ptr))
1889	2320	goto out;
1890	2321
1891		- if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed))
	2322	+ if (!cpumask_test_cpu(arg.src_cpu, arg.dst_task->cpus_ptr))
1892	2323	goto out;
1893	2324
1894	2325	trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu);
..	..	@@ -1897,7 +2328,7 @@
1897	2328	out:
1898	2329	return ret;
1899	2330	}
1900		-#endif /* CONFIG_NUMA_BALANCING */
	2331	+EXPORT_SYMBOL_GPL(migrate_swap);
1901	2332
1902	2333	/*
1903	2334	* wait_task_inactive - wait for a thread to unschedule.
..	..	@@ -2033,7 +2464,7 @@
2033	2464	EXPORT_SYMBOL_GPL(kick_process);
2034	2465
2035	2466	/*
2036		- * ->cpus_allowed is protected by both rq->lock and p->pi_lock
	2467	+ * ->cpus_ptr is protected by both rq->lock and p->pi_lock
2037	2468	*
2038	2469	* A few notes on cpu_active vs cpu_online:
2039	2470	*
..	..	@@ -2059,7 +2490,11 @@
2059	2490	int nid = cpu_to_node(cpu);
2060	2491	const struct cpumask *nodemask = NULL;
2061	2492	enum { cpuset, possible, fail } state = cpuset;
2062		- int dest_cpu;
	2493	+ int dest_cpu = -1;
	2494	+
	2495	+ trace_android_rvh_select_fallback_rq(cpu, p, &dest_cpu);
	2496	+ if (dest_cpu >= 0)
	2497	+ return dest_cpu;
2063	2498
2064	2499	/*
2065	2500	* If the node that the CPU is on has been offlined, cpu_to_node()
..	..	@@ -2071,16 +2506,14 @@
2071	2506
2072	2507	/* Look for allowed, online CPU in same node. */
2073	2508	for_each_cpu(dest_cpu, nodemask) {
2074		- if (!cpu_active(dest_cpu))
2075		- continue;
2076		- if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
	2509	+ if (is_cpu_allowed(p, dest_cpu))
2077	2510	return dest_cpu;
2078	2511	}
2079	2512	}
2080	2513
2081	2514	for (;;) {
2082	2515	/* Any allowed, online CPU? */
2083		- for_each_cpu(dest_cpu, &p->cpus_allowed) {
	2516	+ for_each_cpu(dest_cpu, p->cpus_ptr) {
2084	2517	if (!is_cpu_allowed(p, dest_cpu))
2085	2518	continue;
2086	2519
..	..	@@ -2095,12 +2528,11 @@
2095	2528	state = possible;
2096	2529	break;
2097	2530	}
2098		- /* Fall-through */
	2531	+ fallthrough;
2099	2532	case possible:
2100		- do_set_cpus_allowed(p, cpu_possible_mask);
	2533	+ do_set_cpus_allowed(p, task_cpu_possible_mask(p));
2101	2534	state = fail;
2102	2535	break;
2103		-
2104	2536	case fail:
2105	2537	BUG();
2106	2538	break;
..	..	@@ -2124,23 +2556,21 @@
2124	2556	}
2125	2557
2126	2558	/*
2127		- * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
	2559	+ * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable.
2128	2560	*/
2129	2561	static inline
2130		-int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags,
2131		- int sibling_count_hint)
	2562	+int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
2132	2563	{
2133	2564	lockdep_assert_held(&p->pi_lock);
2134	2565
2135	2566	if (p->nr_cpus_allowed > 1)
2136		- cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags,
2137		- sibling_count_hint);
	2567	+ cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
2138	2568	else
2139		- cpu = cpumask_any(&p->cpus_allowed);
	2569	+ cpu = cpumask_any(p->cpus_ptr);
2140	2570
2141	2571	/*
2142	2572	* In order not to call set_task_cpu() on a blocking task we need
2143		- * to rely on ttwu() to place the task on a valid ->cpus_allowed
	2573	+ * to rely on ttwu() to place the task on a valid ->cpus_ptr
2144	2574	* CPU.
2145	2575	*
2146	2576	* Since this is common to all placement strategies, this lives here.
..	..	@@ -2152,12 +2582,6 @@
2152	2582	cpu = select_fallback_rq(task_cpu(p), p);
2153	2583
2154	2584	return cpu;
2155		-}
2156		-
2157		-static void update_avg(u64 *avg, u64 sample)
2158		-{
2159		- s64 diff = sample - *avg;
2160		- *avg += diff >> 3;
2161	2585	}
2162	2586
2163	2587	void sched_set_stop_task(int cpu, struct task_struct *stop)
..	..	@@ -2239,16 +2663,6 @@
2239	2663	__schedstat_inc(p->se.statistics.nr_wakeups_sync);
2240	2664	}
2241	2665
2242		-static inline void ttwu_activate(struct rq rq, struct task_struct p, int en_flags)
2243		-{
2244		- activate_task(rq, p, en_flags);
2245		- p->on_rq = TASK_ON_RQ_QUEUED;
2246		-
2247		- /* If a worker is waking up, notify the workqueue: */
2248		- if (p->flags & PF_WQ_WORKER)
2249		- wq_worker_waking_up(p, cpu_of(rq));
2250		-}
2251		-
2252	2666	/*
2253	2667	* Mark the task runnable and perform wakeup-preemption.
2254	2668	*/
..	..	@@ -2290,27 +2704,54 @@
2290	2704	{
2291	2705	int en_flags = ENQUEUE_WAKEUP \| ENQUEUE_NOCLOCK;
2292	2706
	2707	+ if (wake_flags & WF_SYNC)
	2708	+ en_flags \|= ENQUEUE_WAKEUP_SYNC;
	2709	+
2293	2710	lockdep_assert_held(&rq->lock);
2294	2711
2295		-#ifdef CONFIG_SMP
2296	2712	if (p->sched_contributes_to_load)
2297	2713	rq->nr_uninterruptible--;
2298	2714
	2715	+#ifdef CONFIG_SMP
2299	2716	if (wake_flags & WF_MIGRATED)
2300	2717	en_flags \|= ENQUEUE_MIGRATED;
	2718	+ else
2301	2719	#endif
	2720	+ if (p->in_iowait) {
	2721	+ delayacct_blkio_end(p);
	2722	+ atomic_dec(&task_rq(p)->nr_iowait);
	2723	+ }
2302	2724
2303		- ttwu_activate(rq, p, en_flags);
	2725	+ activate_task(rq, p, en_flags);
2304	2726	ttwu_do_wakeup(rq, p, wake_flags, rf);
2305	2727	}
2306	2728
2307	2729	/*
2308		- * Called in case the task @p isn't fully descheduled from its runqueue,
2309		- * in this case we must do a remote wakeup. Its a 'light' wakeup though,
2310		- * since all we need to do is flip p->state to TASK_RUNNING, since
2311		- * the task is still ->on_rq.
	2730	+ * Consider @p being inside a wait loop:
	2731	+ *
	2732	+ * for (;;) {
	2733	+ * set_current_state(TASK_UNINTERRUPTIBLE);
	2734	+ *
	2735	+ * if (CONDITION)
	2736	+ * break;
	2737	+ *
	2738	+ * schedule();
	2739	+ * }
	2740	+ * __set_current_state(TASK_RUNNING);
	2741	+ *
	2742	+ * between set_current_state() and schedule(). In this case @p is still
	2743	+ * runnable, so all that needs doing is change p->state back to TASK_RUNNING in
	2744	+ * an atomic manner.
	2745	+ *
	2746	+ * By taking task_rq(p)->lock we serialize against schedule(), if @p->on_rq
	2747	+ * then schedule() must still happen and p->state can be changed to
	2748	+ * TASK_RUNNING. Otherwise we lost the race, schedule() has happened, and we
	2749	+ * need to do a full wakeup with enqueue.
	2750	+ *
	2751	+ * Returns: %true when the wakeup is done,
	2752	+ * %false otherwise.
2312	2753	*/
2313		-static int ttwu_remote(struct task_struct *p, int wake_flags)
	2754	+static int ttwu_runnable(struct task_struct *p, int wake_flags)
2314	2755	{
2315	2756	struct rq_flags rf;
2316	2757	struct rq *rq;
..	..	@@ -2329,75 +2770,63 @@
2329	2770	}
2330	2771
2331	2772	#ifdef CONFIG_SMP
2332		-void sched_ttwu_pending(void)
	2773	+void sched_ttwu_pending(void *arg)
2333	2774	{
	2775	+ struct llist_node *llist = arg;
2334	2776	struct rq *rq = this_rq();
2335		- struct llist_node *llist = llist_del_all(&rq->wake_list);
2336	2777	struct task_struct p, t;
2337	2778	struct rq_flags rf;
2338	2779
2339	2780	if (!llist)
2340	2781	return;
2341	2782
	2783	+ /*
	2784	+ * rq::ttwu_pending racy indication of out-standing wakeups.
	2785	+ * Races such that false-negatives are possible, since they
	2786	+ * are shorter lived that false-positives would be.
	2787	+ */
	2788	+ WRITE_ONCE(rq->ttwu_pending, 0);
	2789	+
2342	2790	rq_lock_irqsave(rq, &rf);
2343	2791	update_rq_clock(rq);
2344	2792
2345		- llist_for_each_entry_safe(p, t, llist, wake_entry)
	2793	+ llist_for_each_entry_safe(p, t, llist, wake_entry.llist) {
	2794	+ if (WARN_ON_ONCE(p->on_cpu))
	2795	+ smp_cond_load_acquire(&p->on_cpu, !VAL);
	2796	+
	2797	+ if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq)))
	2798	+ set_task_cpu(p, cpu_of(rq));
	2799	+
2346	2800	ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf);
	2801	+ }
2347	2802
2348	2803	rq_unlock_irqrestore(rq, &rf);
2349	2804	}
2350	2805
2351		-void scheduler_ipi(void)
	2806	+void send_call_function_single_ipi(int cpu)
2352	2807	{
2353		- /*
2354		- * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
2355		- * TIF_NEED_RESCHED remotely (for the first time) will also send
2356		- * this IPI.
2357		- */
2358		- preempt_fold_need_resched();
	2808	+ struct rq *rq = cpu_rq(cpu);
2359	2809
2360		- if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
2361		- return;
2362		-
2363		- /*
2364		- * Not all reschedule IPI handlers call irq_enter/irq_exit, since
2365		- * traditionally all their work was done from the interrupt return
2366		- * path. Now that we actually do some work, we need to make sure
2367		- * we do call them.
2368		- *
2369		- * Some archs already do call them, luckily irq_enter/exit nest
2370		- * properly.
2371		- *
2372		- * Arguably we should visit all archs and update all handlers,
2373		- * however a fair share of IPIs are still resched only so this would
2374		- * somewhat pessimize the simple resched case.
2375		- */
2376		- irq_enter();
2377		- sched_ttwu_pending();
2378		-
2379		- /*
2380		- * Check if someone kicked us for doing the nohz idle load balance.
2381		- */
2382		- if (unlikely(got_nohz_idle_kick())) {
2383		- this_rq()->idle_balance = 1;
2384		- raise_softirq_irqoff(SCHED_SOFTIRQ);
2385		- }
2386		- irq_exit();
	2810	+ if (!set_nr_if_polling(rq->idle))
	2811	+ arch_send_call_function_single_ipi(cpu);
	2812	+ else
	2813	+ trace_sched_wake_idle_without_ipi(cpu);
2387	2814	}
2388	2815
2389		-static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
	2816	+/*
	2817	+ * Queue a task on the target CPUs wake_list and wake the CPU via IPI if
	2818	+ * necessary. The wakee CPU on receipt of the IPI will queue the task
	2819	+ * via sched_ttwu_wakeup() for activation so the wakee incurs the cost
	2820	+ * of the wakeup instead of the waker.
	2821	+ */
	2822	+static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
2390	2823	{
2391	2824	struct rq *rq = cpu_rq(cpu);
2392	2825
2393	2826	p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
2394	2827
2395		- if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) {
2396		- if (!set_nr_if_polling(rq->idle))
2397		- smp_send_reschedule(cpu);
2398		- else
2399		- trace_sched_wake_idle_without_ipi(cpu);
2400		- }
	2828	+ WRITE_ONCE(rq->ttwu_pending, 1);
	2829	+ __smp_call_single_queue(cpu, &p->wake_entry.llist);
2401	2830	}
2402	2831
2403	2832	void wake_up_if_idle(int cpu)
..	..	@@ -2423,6 +2852,7 @@
2423	2852	out:
2424	2853	rcu_read_unlock();
2425	2854	}
	2855	+EXPORT_SYMBOL_GPL(wake_up_if_idle);
2426	2856
2427	2857	bool cpus_share_cache(int this_cpu, int that_cpu)
2428	2858	{
..	..	@@ -2431,6 +2861,58 @@
2431	2861
2432	2862	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
2433	2863	}
	2864	+
	2865	+static inline bool ttwu_queue_cond(int cpu, int wake_flags)
	2866	+{
	2867	+ /*
	2868	+ * If the CPU does not share cache, then queue the task on the
	2869	+ * remote rqs wakelist to avoid accessing remote data.
	2870	+ */
	2871	+ if (!cpus_share_cache(smp_processor_id(), cpu))
	2872	+ return true;
	2873	+
	2874	+ /*
	2875	+ * If the task is descheduling and the only running task on the
	2876	+ * CPU then use the wakelist to offload the task activation to
	2877	+ * the soon-to-be-idle CPU as the current CPU is likely busy.
	2878	+ * nr_running is checked to avoid unnecessary task stacking.
	2879	+ *
	2880	+ * Note that we can only get here with (wakee) p->on_rq=0,
	2881	+ * p->on_cpu can be whatever, we've done the dequeue, so
	2882	+ * the wakee has been accounted out of ->nr_running.
	2883	+ */
	2884	+ if ((wake_flags & WF_ON_CPU) && !cpu_rq(cpu)->nr_running)
	2885	+ return true;
	2886	+
	2887	+ return false;
	2888	+}
	2889	+
	2890	+static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
	2891	+{
	2892	+ bool cond = false;
	2893	+
	2894	+ trace_android_rvh_ttwu_cond(&cond);
	2895	+
	2896	+ if ((sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) \|\|
	2897	+ cond) {
	2898	+ if (WARN_ON_ONCE(cpu == smp_processor_id()))
	2899	+ return false;
	2900	+
	2901	+ sched_clock_cpu(cpu); /* Sync clocks across CPUs */
	2902	+ __ttwu_queue_wakelist(p, cpu, wake_flags);
	2903	+ return true;
	2904	+ }
	2905	+
	2906	+ return false;
	2907	+}
	2908	+
	2909	+#else /* !CONFIG_SMP */
	2910	+
	2911	+static inline bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
	2912	+{
	2913	+ return false;
	2914	+}
	2915	+
2434	2916	#endif /* CONFIG_SMP */
2435	2917
2436	2918	static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
..	..	@@ -2438,13 +2920,8 @@
2438	2920	struct rq *rq = cpu_rq(cpu);
2439	2921	struct rq_flags rf;
2440	2922
2441		-#if defined(CONFIG_SMP)
2442		- if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) {
2443		- sched_clock_cpu(cpu); /* Sync clocks across CPUs */
2444		- ttwu_queue_remote(p, cpu, wake_flags);
	2923	+ if (ttwu_queue_wakelist(p, cpu, wake_flags))
2445	2924	return;
2446		- }
2447		-#endif
2448	2925
2449	2926	rq_lock(rq, &rf);
2450	2927	update_rq_clock(rq);
..	..	@@ -2500,8 +2977,8 @@
2500	2977	* migration. However the means are completely different as there is no lock
2501	2978	* chain to provide order. Instead we do:
2502	2979	*
2503		- * 1) smp_store_release(X->on_cpu, 0)
2504		- * 2) smp_cond_load_acquire(!X->on_cpu)
	2980	+ * 1) smp_store_release(X->on_cpu, 0) -- finish_task()
	2981	+ * 2) smp_cond_load_acquire(!X->on_cpu) -- try_to_wake_up()
2505	2982	*
2506	2983	* Example:
2507	2984	*
..	..	@@ -2540,45 +3017,95 @@
2540	3017	* @p: the thread to be awakened
2541	3018	* @state: the mask of task states that can be woken
2542	3019	* @wake_flags: wake modifier flags (WF_*)
2543		- * @sibling_count_hint: A hint at the number of threads that are being woken up
2544		- * in this event.
2545	3020	*
2546		- * If (@state & @p->state) @p->state = TASK_RUNNING.
	3021	+ * Conceptually does:
	3022	+ *
	3023	+ * If (@state & @p->state) @p->state = TASK_RUNNING.
2547	3024	*
2548	3025	* If the task was not queued/runnable, also place it back on a runqueue.
2549	3026	*
2550		- * Atomic against schedule() which would dequeue a task, also see
2551		- * set_current_state().
	3027	+ * This function is atomic against schedule() which would dequeue the task.
2552	3028	*
2553		- * This function executes a full memory barrier before accessing the task
2554		- * state; see set_current_state().
	3029	+ * It issues a full memory barrier before accessing @p->state, see the comment
	3030	+ * with set_current_state().
	3031	+ *
	3032	+ * Uses p->pi_lock to serialize against concurrent wake-ups.
	3033	+ *
	3034	+ * Relies on p->pi_lock stabilizing:
	3035	+ * - p->sched_class
	3036	+ * - p->cpus_ptr
	3037	+ * - p->sched_task_group
	3038	+ * in order to do migration, see its use of select_task_rq()/set_task_cpu().
	3039	+ *
	3040	+ * Tries really hard to only take one task_rq(p)->lock for performance.
	3041	+ * Takes rq->lock in:
	3042	+ * - ttwu_runnable() -- old rq, unavoidable, see comment there;
	3043	+ * - ttwu_queue() -- new rq, for enqueue of the task;
	3044	+ * - psi_ttwu_dequeue() -- much sadness :-( accounting will kill us.
	3045	+ *
	3046	+ * As a consequence we race really badly with just about everything. See the
	3047	+ * many memory barriers and their comments for details.
2555	3048	*
2556	3049	* Return: %true if @p->state changes (an actual wakeup was done),
2557	3050	* %false otherwise.
2558	3051	*/
2559	3052	static int
2560		-try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags,
2561		- int sibling_count_hint)
	3053	+try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2562	3054	{
2563	3055	unsigned long flags;
2564	3056	int cpu, success = 0;
2565	3057
	3058	+ preempt_disable();
	3059	+ if (p == current) {
	3060	+ /*
	3061	+ * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
	3062	+ * == smp_processor_id()'. Together this means we can special
	3063	+ * case the whole 'p->on_rq && ttwu_runnable()' case below
	3064	+ * without taking any locks.
	3065	+ *
	3066	+ * In particular:
	3067	+ * - we rely on Program-Order guarantees for all the ordering,
	3068	+ * - we're serialized against set_special_state() by virtue of
	3069	+ * it disabling IRQs (this allows not taking ->pi_lock).
	3070	+ */
	3071	+ if (!(p->state & state))
	3072	+ goto out;
	3073	+
	3074	+ success = 1;
	3075	+ trace_sched_waking(p);
	3076	+ p->state = TASK_RUNNING;
	3077	+ trace_sched_wakeup(p);
	3078	+ goto out;
	3079	+ }
	3080	+
2566	3081	/*
2567	3082	* If we are going to wake up a thread waiting for CONDITION we
2568	3083	* need to ensure that CONDITION=1 done by the caller can not be
2569		- * reordered with p->state check below. This pairs with mb() in
2570		- * set_current_state() the waiting thread does.
	3084	+ * reordered with p->state check below. This pairs with smp_store_mb()
	3085	+ * in set_current_state() that the waiting thread does.
2571	3086	*/
2572	3087	raw_spin_lock_irqsave(&p->pi_lock, flags);
2573	3088	smp_mb__after_spinlock();
2574	3089	if (!(p->state & state))
2575		- goto out;
	3090	+ goto unlock;
	3091	+
	3092	+#ifdef CONFIG_FREEZER
	3093	+ /*
	3094	+ * If we're going to wake up a thread which may be frozen, then
	3095	+ * we can only do so if we have an active CPU which is capable of
	3096	+ * running it. This may not be the case when resuming from suspend,
	3097	+ * as the secondary CPUs may not yet be back online. See __thaw_task()
	3098	+ * for the actual wakeup.
	3099	+ */
	3100	+ if (unlikely(frozen_or_skipped(p)) &&
	3101	+ !cpumask_intersects(cpu_active_mask, task_cpu_possible_mask(p)))
	3102	+ goto unlock;
	3103	+#endif
2576	3104
2577	3105	trace_sched_waking(p);
2578	3106
2579	3107	/* We're going to change ->state: */
2580	3108	success = 1;
2581		- cpu = task_cpu(p);
2582	3109
2583	3110	/*
2584	3111	* Ensure we load p->on_rq _after_ p->state, otherwise it would
..	..	@@ -2599,10 +3126,15 @@
2599	3126	*
2600	3127	* Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
2601	3128	* __schedule(). See the comment for smp_mb__after_spinlock().
	3129	+ *
	3130	+ * A similar smb_rmb() lives in try_invoke_on_locked_down_task().
2602	3131	*/
2603	3132	smp_rmb();
2604		- if (p->on_rq && ttwu_remote(p, wake_flags))
2605		- goto stat;
	3133	+ if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
	3134	+ goto unlock;
	3135	+
	3136	+ if (p->state & TASK_UNINTERRUPTIBLE)
	3137	+ trace_sched_blocked_reason(p);
2606	3138
2607	3139	#ifdef CONFIG_SMP
2608	3140	/*
..	..	@@ -2623,8 +3155,43 @@
2623	3155	*
2624	3156	* Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
2625	3157	* __schedule(). See the comment for smp_mb__after_spinlock().
	3158	+ *
	3159	+ * Form a control-dep-acquire with p->on_rq == 0 above, to ensure
	3160	+ * schedule()'s deactivate_task() has 'happened' and p will no longer
	3161	+ * care about it's own p->state. See the comment in __schedule().
2626	3162	*/
2627		- smp_rmb();
	3163	+ smp_acquire__after_ctrl_dep();
	3164	+
	3165	+ /*
	3166	+ * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
	3167	+ * == 0), which means we need to do an enqueue, change p->state to
	3168	+ * TASK_WAKING such that we can unlock p->pi_lock before doing the
	3169	+ * enqueue, such as ttwu_queue_wakelist().
	3170	+ */
	3171	+ p->state = TASK_WAKING;
	3172	+
	3173	+ /*
	3174	+ * If the owning (remote) CPU is still in the middle of schedule() with
	3175	+ * this task as prev, considering queueing p on the remote CPUs wake_list
	3176	+ * which potentially sends an IPI instead of spinning on p->on_cpu to
	3177	+ * let the waker make forward progress. This is safe because IRQs are
	3178	+ * disabled and the IPI will deliver after on_cpu is cleared.
	3179	+ *
	3180	+ * Ensure we load task_cpu(p) after p->on_cpu:
	3181	+ *
	3182	+ * set_task_cpu(p, cpu);
	3183	+ * STORE p->cpu = @cpu
	3184	+ * __schedule() (switch to task 'p')
	3185	+ * LOCK rq->lock
	3186	+ * smp_mb__after_spin_lock() smp_cond_load_acquire(&p->on_cpu)
	3187	+ * STORE p->on_cpu = 1 LOAD p->cpu
	3188	+ *
	3189	+ * to ensure we observe the correct CPU on which the task is currently
	3190	+ * scheduling.
	3191	+ */
	3192	+ if (smp_load_acquire(&p->on_cpu) &&
	3193	+ ttwu_queue_wakelist(p, task_cpu(p), wake_flags \| WF_ON_CPU))
	3194	+ goto unlock;
2628	3195
2629	3196	/*
2630	3197	* If the owning (remote) CPU is still in the middle of schedule() with
..	..	@@ -2637,88 +3204,79 @@
2637	3204	*/
2638	3205	smp_cond_load_acquire(&p->on_cpu, !VAL);
2639	3206
2640		- p->sched_contributes_to_load = !!task_contributes_to_load(p);
2641		- p->state = TASK_WAKING;
	3207	+ trace_android_rvh_try_to_wake_up(p);
2642	3208
2643		- if (p->in_iowait) {
2644		- delayacct_blkio_end(p);
2645		- atomic_dec(&task_rq(p)->nr_iowait);
2646		- }
2647		-
2648		- cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags,
2649		- sibling_count_hint);
	3209	+ cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
2650	3210	if (task_cpu(p) != cpu) {
	3211	+ if (p->in_iowait) {
	3212	+ delayacct_blkio_end(p);
	3213	+ atomic_dec(&task_rq(p)->nr_iowait);
	3214	+ }
	3215	+
2651	3216	wake_flags \|= WF_MIGRATED;
2652	3217	psi_ttwu_dequeue(p);
2653	3218	set_task_cpu(p, cpu);
2654	3219	}
2655		-
2656		-#else /* CONFIG_SMP */
2657		-
2658		- if (p->in_iowait) {
2659		- delayacct_blkio_end(p);
2660		- atomic_dec(&task_rq(p)->nr_iowait);
2661		- }
2662		-
	3220	+#else
	3221	+ cpu = task_cpu(p);
2663	3222	#endif /* CONFIG_SMP */
2664	3223
2665	3224	ttwu_queue(p, cpu, wake_flags);
2666		-stat:
2667		- ttwu_stat(p, cpu, wake_flags);
2668		-out:
	3225	+unlock:
2669	3226	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
	3227	+out:
	3228	+ if (success) {
	3229	+ trace_android_rvh_try_to_wake_up_success(p);
	3230	+ ttwu_stat(p, task_cpu(p), wake_flags);
	3231	+ }
	3232	+ preempt_enable();
2670	3233
2671	3234	return success;
2672	3235	}
2673	3236
2674	3237	/**
2675		- * try_to_wake_up_local - try to wake up a local task with rq lock held
2676		- * @p: the thread to be awakened
2677		- * @rf: request-queue flags for pinning
	3238	+ * try_invoke_on_locked_down_task - Invoke a function on task in fixed state
	3239	+ * @p: Process for which the function is to be invoked, can be @current.
	3240	+ * @func: Function to invoke.
	3241	+ * @arg: Argument to function.
2678	3242	*
2679		- * Put @p on the run-queue if it's not already there. The caller must
2680		- * ensure that this_rq() is locked, @p is bound to this_rq() and not
2681		- * the current task.
	3243	+ * If the specified task can be quickly locked into a definite state
	3244	+ * (either sleeping or on a given runqueue), arrange to keep it in that
	3245	+ * state while invoking @func(@arg). This function can use ->on_rq and
	3246	+ * task_curr() to work out what the state is, if required. Given that
	3247	+ * @func can be invoked with a runqueue lock held, it had better be quite
	3248	+ * lightweight.
	3249	+ *
	3250	+ * Returns:
	3251	+ * @false if the task slipped out from under the locks.
	3252	+ * @true if the task was locked onto a runqueue or is sleeping.
	3253	+ * However, @func can override this by returning @false.
2682	3254	*/
2683		-static void try_to_wake_up_local(struct task_struct p, struct rq_flags rf)
	3255	+bool try_invoke_on_locked_down_task(struct task_struct p, bool (func)(struct task_struct t, void arg), void *arg)
2684	3256	{
2685		- struct rq *rq = task_rq(p);
	3257	+ struct rq_flags rf;
	3258	+ bool ret = false;
	3259	+ struct rq *rq;
2686	3260
2687		- if (WARN_ON_ONCE(rq != this_rq()) \|\|
2688		- WARN_ON_ONCE(p == current))
2689		- return;
2690		-
2691		- lockdep_assert_held(&rq->lock);
2692		-
2693		- if (!raw_spin_trylock(&p->pi_lock)) {
2694		- /*
2695		- * This is OK, because current is on_cpu, which avoids it being
2696		- * picked for load-balance and preemption/IRQs are still
2697		- * disabled avoiding further scheduler activity on it and we've
2698		- * not yet picked a replacement task.
2699		- */
2700		- rq_unlock(rq, rf);
2701		- raw_spin_lock(&p->pi_lock);
2702		- rq_relock(rq, rf);
2703		- }
2704		-
2705		- if (!(p->state & TASK_NORMAL))
2706		- goto out;
2707		-
2708		- trace_sched_waking(p);
2709		-
2710		- if (!task_on_rq_queued(p)) {
2711		- if (p->in_iowait) {
2712		- delayacct_blkio_end(p);
2713		- atomic_dec(&rq->nr_iowait);
	3261	+ raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
	3262	+ if (p->on_rq) {
	3263	+ rq = __task_rq_lock(p, &rf);
	3264	+ if (task_rq(p) == rq)
	3265	+ ret = func(p, arg);
	3266	+ rq_unlock(rq, &rf);
	3267	+ } else {
	3268	+ switch (p->state) {
	3269	+ case TASK_RUNNING:
	3270	+ case TASK_WAKING:
	3271	+ break;
	3272	+ default:
	3273	+ smp_rmb(); // See smp_rmb() comment in try_to_wake_up().
	3274	+ if (!p->on_rq)
	3275	+ ret = func(p, arg);
2714	3276	}
2715		- ttwu_activate(rq, p, ENQUEUE_WAKEUP \| ENQUEUE_NOCLOCK);
2716	3277	}
2717		-
2718		- ttwu_do_wakeup(rq, p, 0, rf);
2719		- ttwu_stat(p, smp_processor_id(), 0);
2720		-out:
2721		- raw_spin_unlock(&p->pi_lock);
	3278	+ raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
	3279	+ return ret;
2722	3280	}
2723	3281
2724	3282	/**
..	..	@@ -2734,13 +3292,13 @@
2734	3292	*/
2735	3293	int wake_up_process(struct task_struct *p)
2736	3294	{
2737		- return try_to_wake_up(p, TASK_NORMAL, 0, 1);
	3295	+ return try_to_wake_up(p, TASK_NORMAL, 0);
2738	3296	}
2739	3297	EXPORT_SYMBOL(wake_up_process);
2740	3298
2741	3299	int wake_up_state(struct task_struct *p, unsigned int state)
2742	3300	{
2743		- return try_to_wake_up(p, state, 0, 1);
	3301	+ return try_to_wake_up(p, state, 0);
2744	3302	}
2745	3303
2746	3304	/*
..	..	@@ -2765,6 +3323,8 @@
2765	3323	p->se.cfs_rq = NULL;
2766	3324	#endif
2767	3325
	3326	+ trace_android_rvh_sched_fork_init(p);
	3327	+
2768	3328	#ifdef CONFIG_SCHEDSTATS
2769	3329	/* Even if schedstat is disabled, there should not be garbage */
2770	3330	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
..	..	@@ -2785,7 +3345,13 @@
2785	3345	INIT_HLIST_HEAD(&p->preempt_notifiers);
2786	3346	#endif
2787	3347
	3348	+#ifdef CONFIG_COMPACTION
	3349	+ p->capture_control = NULL;
	3350	+#endif
2788	3351	init_numa_balancing(clone_flags, p);
	3352	+#ifdef CONFIG_SMP
	3353	+ p->wake_entry.u_flags = CSD_TYPE_TTWU;
	3354	+#endif
2789	3355	}
2790	3356
2791	3357	DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
..	..	@@ -2802,7 +3368,7 @@
2802	3368
2803	3369	#ifdef CONFIG_PROC_SYSCTL
2804	3370	int sysctl_numa_balancing(struct ctl_table *table, int write,
2805		- void __user buffer, size_t lenp, loff_t *ppos)
	3371	+ void buffer, size_t lenp, loff_t *ppos)
2806	3372	{
2807	3373	struct ctl_table t;
2808	3374	int err;
..	..	@@ -2876,8 +3442,8 @@
2876	3442	}
2877	3443
2878	3444	#ifdef CONFIG_PROC_SYSCTL
2879		-int sysctl_schedstats(struct ctl_table *table, int write,
2880		- void __user buffer, size_t lenp, loff_t *ppos)
	3445	+int sysctl_schedstats(struct ctl_table table, int write, void buffer,
	3446	+ size_t lenp, loff_t ppos)
2881	3447	{
2882	3448	struct ctl_table t;
2883	3449	int err;
..	..	@@ -2905,7 +3471,7 @@
2905	3471	*/
2906	3472	int sched_fork(unsigned long clone_flags, struct task_struct *p)
2907	3473	{
2908		- unsigned long flags;
	3474	+ trace_android_rvh_sched_fork(p);
2909	3475
2910	3476	__sched_fork(clone_flags, p);
2911	3477	/*
..	..	@@ -2919,6 +3485,7 @@
2919	3485	* Make sure we do not leak PI boosting priority to the child.
2920	3486	*/
2921	3487	p->prio = current->normal_prio;
	3488	+ trace_android_rvh_prepare_prio_fork(p);
2922	3489
2923	3490	uclamp_fork(p);
2924	3491
..	..	@@ -2933,8 +3500,8 @@
2933	3500	} else if (PRIO_TO_NICE(p->static_prio) < 0)
2934	3501	p->static_prio = NICE_TO_PRIO(0);
2935	3502
2936		- p->prio = p->normal_prio = __normal_prio(p);
2937		- set_load_weight(p, false);
	3503	+ p->prio = p->normal_prio = p->static_prio;
	3504	+ set_load_weight(p);
2938	3505
2939	3506	/*
2940	3507	* We don't need the reset flag anymore after the fork. It has
..	..	@@ -2951,24 +3518,8 @@
2951	3518	p->sched_class = &fair_sched_class;
2952	3519
2953	3520	init_entity_runnable_average(&p->se);
	3521	+ trace_android_rvh_finish_prio_fork(p);
2954	3522
2955		- /*
2956		- * The child is not yet in the pid-hash so no cgroup attach races,
2957		- * and the cgroup is pinned to this child due to cgroup_fork()
2958		- * is ran before sched_fork().
2959		- *
2960		- * Silence PROVE_RCU.
2961		- */
2962		- raw_spin_lock_irqsave(&p->pi_lock, flags);
2963		- rseq_migrate(p);
2964		- /*
2965		- * We're setting the CPU for the first time, we don't migrate,
2966		- * so use __set_task_cpu().
2967		- */
2968		- __set_task_cpu(p, smp_processor_id());
2969		- if (p->sched_class->task_fork)
2970		- p->sched_class->task_fork(p);
2971		- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2972	3523
2973	3524	#ifdef CONFIG_SCHED_INFO
2974	3525	if (likely(sched_info_on()))
..	..	@@ -2983,6 +3534,41 @@
2983	3534	RB_CLEAR_NODE(&p->pushable_dl_tasks);
2984	3535	#endif
2985	3536	return 0;
	3537	+}
	3538	+
	3539	+void sched_cgroup_fork(struct task_struct p, struct kernel_clone_args kargs)
	3540	+{
	3541	+ unsigned long flags;
	3542	+
	3543	+ /*
	3544	+ * Because we're not yet on the pid-hash, p->pi_lock isn't strictly
	3545	+ * required yet, but lockdep gets upset if rules are violated.
	3546	+ */
	3547	+ raw_spin_lock_irqsave(&p->pi_lock, flags);
	3548	+#ifdef CONFIG_CGROUP_SCHED
	3549	+ if (1) {
	3550	+ struct task_group *tg;
	3551	+
	3552	+ tg = container_of(kargs->cset->subsys[cpu_cgrp_id],
	3553	+ struct task_group, css);
	3554	+ tg = autogroup_task_group(p, tg);
	3555	+ p->sched_task_group = tg;
	3556	+ }
	3557	+#endif
	3558	+ rseq_migrate(p);
	3559	+ /*
	3560	+ * We're setting the CPU for the first time, we don't migrate,
	3561	+ * so use __set_task_cpu().
	3562	+ */
	3563	+ __set_task_cpu(p, smp_processor_id());
	3564	+ if (p->sched_class->task_fork)
	3565	+ p->sched_class->task_fork(p);
	3566	+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
	3567	+}
	3568	+
	3569	+void sched_post_fork(struct task_struct *p)
	3570	+{
	3571	+ uclamp_post_fork(p);
2986	3572	}
2987	3573
2988	3574	unsigned long to_ratio(u64 period, u64 runtime)
..	..	@@ -3013,12 +3599,14 @@
3013	3599	struct rq_flags rf;
3014	3600	struct rq *rq;
3015	3601
	3602	+ trace_android_rvh_wake_up_new_task(p);
	3603	+
3016	3604	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
3017	3605	p->state = TASK_RUNNING;
3018	3606	#ifdef CONFIG_SMP
3019	3607	/*
3020	3608	* Fork balancing, do it here and not earlier because:
3021		- * - cpus_allowed can change in the fork path
	3609	+ * - cpus_ptr can change in the fork path
3022	3610	* - any previously selected CPU might disappear through hotplug
3023	3611	*
3024	3612	* Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
..	..	@@ -3026,14 +3614,14 @@
3026	3614	*/
3027	3615	p->recent_used_cpu = task_cpu(p);
3028	3616	rseq_migrate(p);
3029		- __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0, 1));
	3617	+ __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
3030	3618	#endif
3031	3619	rq = __task_rq_lock(p, &rf);
3032	3620	update_rq_clock(rq);
3033		- post_init_entity_util_avg(&p->se);
	3621	+ post_init_entity_util_avg(p);
	3622	+ trace_android_rvh_new_task_stats(p);
3034	3623
3035	3624	activate_task(rq, p, ENQUEUE_NOCLOCK);
3036		- p->on_rq = TASK_ON_RQ_QUEUED;
3037	3625	trace_sched_wakeup_new(p);
3038	3626	check_preempt_curr(rq, p, WF_FORK);
3039	3627	#ifdef CONFIG_SMP
..	..	@@ -3143,8 +3731,10 @@
3143	3731	/*
3144	3732	* Claim the task as running, we do this before switching to it
3145	3733	* such that any running task will have this set.
	3734	+ *
	3735	+ * See the ttwu() WF_ON_CPU case and its ordering comment.
3146	3736	*/
3147		- next->on_cpu = 1;
	3737	+ WRITE_ONCE(next->on_cpu, 1);
3148	3738	#endif
3149	3739	}
3150	3740
..	..	@@ -3152,8 +3742,9 @@
3152	3742	{
3153	3743	#ifdef CONFIG_SMP
3154	3744	/*
3155		- * After ->on_cpu is cleared, the task can be moved to a different CPU.
3156		- * We must ensure this doesn't happen until the switch is completely
	3745	+ * This must be the very last reference to @prev from this CPU. After
	3746	+ * p->on_cpu is cleared, the task can be moved to a different CPU. We
	3747	+ * must ensure this doesn't happen until the switch is completely
3157	3748	* finished.
3158	3749	*
3159	3750	* In particular, the load of prev->state in finish_task_switch() must
..	..	@@ -3175,7 +3766,7 @@
3175	3766	* do an early lockdep release here:
3176	3767	*/
3177	3768	rq_unpin_lock(rq, rf);
3178		- spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
	3769	+ spin_release(&rq->lock.dep_map, _THIS_IP_);
3179	3770	#ifdef CONFIG_DEBUG_SPINLOCK
3180	3771	/* this is a valid case when another task releases the spinlock */
3181	3772	rq->lock.owner = next;
..	..	@@ -3320,11 +3911,12 @@
3320	3911	* task and put them back on the free list.
3321	3912	*/
3322	3913	kprobe_flush_task(prev);
	3914	+ trace_android_rvh_flush_task(prev);
3323	3915
3324	3916	/* Task is done with its stack. */
3325	3917	put_task_stack(prev);
3326	3918
3327		- put_task_struct(prev);
	3919	+ put_task_struct_rcu_user(prev);
3328	3920	}
3329	3921
3330	3922	tick_nohz_task_switch();
..	..	@@ -3403,12 +3995,8 @@
3403	3995	context_switch(struct rq rq, struct task_struct prev,
3404	3996	struct task_struct next, struct rq_flags rf)
3405	3997	{
3406		- struct mm_struct mm, oldmm;
3407		-
3408	3998	prepare_task_switch(rq, prev, next);
3409	3999
3410		- mm = next->mm;
3411		- oldmm = prev->active_mm;
3412	4000	/*
3413	4001	* For paravirt, this is coupled with an exit in switch_to to
3414	4002	* combine the page table reload and the switch backend into
..	..	@@ -3417,22 +4005,37 @@
3417	4005	arch_start_context_switch(prev);
3418	4006
3419	4007	/*
3420		- * If mm is non-NULL, we pass through switch_mm(). If mm is
3421		- * NULL, we will pass through mmdrop() in finish_task_switch().
3422		- * Both of these contain the full memory barrier required by
3423		- * membarrier after storing to rq->curr, before returning to
3424		- * user-space.
	4008	+ * kernel -> kernel lazy + transfer active
	4009	+ * user -> kernel lazy + mmgrab() active
	4010	+ *
	4011	+ * kernel -> user switch + mmdrop() active
	4012	+ * user -> user switch
3425	4013	*/
3426		- if (!mm) {
3427		- next->active_mm = oldmm;
3428		- mmgrab(oldmm);
3429		- enter_lazy_tlb(oldmm, next);
3430		- } else
3431		- switch_mm_irqs_off(oldmm, mm, next);
	4014	+ if (!next->mm) { // to kernel
	4015	+ enter_lazy_tlb(prev->active_mm, next);
3432	4016
3433		- if (!prev->mm) {
3434		- prev->active_mm = NULL;
3435		- rq->prev_mm = oldmm;
	4017	+ next->active_mm = prev->active_mm;
	4018	+ if (prev->mm) // from user
	4019	+ mmgrab(prev->active_mm);
	4020	+ else
	4021	+ prev->active_mm = NULL;
	4022	+ } else { // to user
	4023	+ membarrier_switch_mm(rq, prev->active_mm, next->mm);
	4024	+ /*
	4025	+ * sys_membarrier() requires an smp_mb() between setting
	4026	+ * rq->curr / membarrier_switch_mm() and returning to userspace.
	4027	+ *
	4028	+ * The below provides this either through switch_mm(), or in
	4029	+ * case 'prev->active_mm == next->mm' through
	4030	+ * finish_task_switch()'s mmdrop().
	4031	+ */
	4032	+ switch_mm_irqs_off(prev->active_mm, next->mm, next);
	4033	+
	4034	+ if (!prev->mm) { // from kernel
	4035	+ /* will mmdrop() in finish_task_switch(). */
	4036	+ rq->prev_mm = prev->active_mm;
	4037	+ prev->active_mm = NULL;
	4038	+ }
3436	4039	}
3437	4040
3438	4041	rq->clock_update_flags &= ~(RQCF_ACT_SKIP\|RQCF_REQ_SKIP);
..	..	@@ -3469,7 +4072,7 @@
3469	4072	* preemption, thus the result might have a time-of-check-to-time-of-use
3470	4073	* race. The caller is responsible to use it correctly, for example:
3471	4074	*
3472		- * - from a non-preemptable section (of course)
	4075	+ * - from a non-preemptible section (of course)
3473	4076	*
3474	4077	* - from a thread that is bound to a single CPU
3475	4078	*
..	..	@@ -3490,6 +4093,18 @@
3490	4093	sum += cpu_rq(i)->nr_switches;
3491	4094
3492	4095	return sum;
	4096	+}
	4097	+
	4098	+/*
	4099	+ * Consumers of these two interfaces, like for example the cpuidle menu
	4100	+ * governor, are using nonsensical data. Preferring shallow idle state selection
	4101	+ * for a CPU that has IO-wait which might not even end up running the task when
	4102	+ * it does become runnable.
	4103	+ */
	4104	+
	4105	+unsigned long nr_iowait_cpu(int cpu)
	4106	+{
	4107	+ return atomic_read(&cpu_rq(cpu)->nr_iowait);
3493	4108	}
3494	4109
3495	4110	/*
..	..	@@ -3527,29 +4142,9 @@
3527	4142	unsigned long i, sum = 0;
3528	4143
3529	4144	for_each_possible_cpu(i)
3530		- sum += atomic_read(&cpu_rq(i)->nr_iowait);
	4145	+ sum += nr_iowait_cpu(i);
3531	4146
3532	4147	return sum;
3533		-}
3534		-
3535		-/*
3536		- * Consumers of these two interfaces, like for example the cpufreq menu
3537		- * governor are using nonsensical data. Boosting frequency for a CPU that has
3538		- * IO-wait which might not even end up running the task when it does become
3539		- * runnable.
3540		- */
3541		-
3542		-unsigned long nr_iowait_cpu(int cpu)
3543		-{
3544		- struct rq *this = cpu_rq(cpu);
3545		- return atomic_read(&this->nr_iowait);
3546		-}
3547		-
3548		-void get_iowait_load(unsigned long nr_waiters, unsigned long load)
3549		-{
3550		- struct rq *rq = this_rq();
3551		- *nr_waiters = atomic_read(&rq->nr_iowait);
3552		- *load = rq->load.weight;
3553	4148	}
3554	4149
3555	4150	#ifdef CONFIG_SMP
..	..	@@ -3563,9 +4158,14 @@
3563	4158	struct task_struct *p = current;
3564	4159	unsigned long flags;
3565	4160	int dest_cpu;
	4161	+ bool cond = false;
	4162	+
	4163	+ trace_android_rvh_sched_exec(&cond);
	4164	+ if (cond)
	4165	+ return;
3566	4166
3567	4167	raw_spin_lock_irqsave(&p->pi_lock, flags);
3568		- dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0, 1);
	4168	+ dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0);
3569	4169	if (dest_cpu == smp_processor_id())
3570	4170	goto unlock;
3571	4171
..	..	@@ -3648,6 +4248,7 @@
3648	4248
3649	4249	return ns;
3650	4250	}
	4251	+EXPORT_SYMBOL_GPL(task_sched_runtime);
3651	4252
3652	4253	/*
3653	4254	* This function gets called by the timer code, with HZ frequency.
..	..	@@ -3659,14 +4260,18 @@
3659	4260	struct rq *rq = cpu_rq(cpu);
3660	4261	struct task_struct *curr = rq->curr;
3661	4262	struct rq_flags rf;
	4263	+ unsigned long thermal_pressure;
3662	4264
	4265	+ arch_scale_freq_tick();
3663	4266	sched_clock_tick();
3664	4267
3665	4268	rq_lock(rq, &rf);
3666	4269
	4270	+ trace_android_rvh_tick_entry(rq);
3667	4271	update_rq_clock(rq);
	4272	+ thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq));
	4273	+ update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure);
3668	4274	curr->sched_class->task_tick(rq, curr, 0);
3669		- cpu_load_update_active(rq);
3670	4275	calc_global_load_tick(rq);
3671	4276	psi_task_tick(rq);
3672	4277
..	..	@@ -3678,6 +4283,8 @@
3678	4283	rq->idle_balance = idle_cpu(cpu);
3679	4284	trigger_load_balance(rq);
3680	4285	#endif
	4286	+
	4287	+ trace_android_vh_scheduler_tick(rq);
3681	4288	}
3682	4289
3683	4290	#ifdef CONFIG_NO_HZ_FULL
..	..	@@ -3735,28 +4342,31 @@
3735	4342	* statistics and checks timeslices in a time-independent way, regardless
3736	4343	* of when exactly it is running.
3737	4344	*/
3738		- if (idle_cpu(cpu) \|\| !tick_nohz_tick_stopped_cpu(cpu))
	4345	+ if (!tick_nohz_tick_stopped_cpu(cpu))
3739	4346	goto out_requeue;
3740	4347
3741	4348	rq_lock_irq(rq, &rf);
3742	4349	curr = rq->curr;
3743		- if (is_idle_task(curr) \|\| cpu_is_offline(cpu))
	4350	+ if (cpu_is_offline(cpu))
3744	4351	goto out_unlock;
3745	4352
3746	4353	update_rq_clock(rq);
3747		- delta = rq_clock_task(rq) - curr->se.exec_start;
3748	4354
3749		- /*
3750		- * Make sure the next tick runs within a reasonable
3751		- * amount of time.
3752		- */
3753		- WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
	4355	+ if (!is_idle_task(curr)) {
	4356	+ /*
	4357	+ * Make sure the next tick runs within a reasonable
	4358	+ * amount of time.
	4359	+ */
	4360	+ delta = rq_clock_task(rq) - curr->se.exec_start;
	4361	+ WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
	4362	+ }
3754	4363	curr->sched_class->task_tick(rq, curr, 0);
3755	4364
	4365	+ calc_load_nohz_remote(rq);
3756	4366	out_unlock:
3757	4367	rq_unlock_irq(rq, &rf);
3758		-
3759	4368	out_requeue:
	4369	+
3760	4370	/*
3761	4371	* Run the remote tick once per second (1Hz). This arbitrary
3762	4372	* frequency is large enough to avoid overload but short enough
..	..	@@ -3820,7 +4430,7 @@
3820	4430	static inline void sched_tick_stop(int cpu) { }
3821	4431	#endif
3822	4432
3823		-#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) \|\| \
	4433	+#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) \|\| \
3824	4434	defined(CONFIG_TRACE_PREEMPT_TOGGLE))
3825	4435	/*
3826	4436	* If the value passed in is equal to the current preempt count
..	..	@@ -3926,11 +4536,11 @@
3926	4536	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
3927	4537	&& in_atomic_preempt_off()) {
3928	4538	pr_err("Preemption disabled at:");
3929		- print_ip_sym(preempt_disable_ip);
3930		- pr_cont("\n");
	4539	+ print_ip_sym(KERN_ERR, preempt_disable_ip);
3931	4540	}
3932		- if (panic_on_warn)
3933		- panic("scheduling while atomic\n");
	4541	+ check_panic_on_warn("scheduling while atomic");
	4542	+
	4543	+ trace_android_rvh_schedule_bug(prev);
3934	4544
3935	4545	dump_stack();
3936	4546	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
..	..	@@ -3939,11 +4549,23 @@
3939	4549	/*
3940	4550	* Various schedule()-time debugging checks and statistics:
3941	4551	*/
3942		-static inline void schedule_debug(struct task_struct *prev)
	4552	+static inline void schedule_debug(struct task_struct *prev, bool preempt)
3943	4553	{
3944	4554	#ifdef CONFIG_SCHED_STACK_END_CHECK
3945	4555	if (task_stack_end_corrupted(prev))
3946	4556	panic("corrupted stack end detected inside scheduler\n");
	4557	+
	4558	+ if (task_scs_end_corrupted(prev))
	4559	+ panic("corrupted shadow stack detected inside scheduler\n");
	4560	+#endif
	4561	+
	4562	+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
	4563	+ if (!preempt && prev->state && prev->non_block_count) {
	4564	+ printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
	4565	+ prev->comm, prev->pid, prev->non_block_count);
	4566	+ dump_stack();
	4567	+ add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
	4568	+ }
3947	4569	#endif
3948	4570
3949	4571	if (unlikely(in_atomic_preempt_off())) {
..	..	@@ -3955,6 +4577,28 @@
3955	4577	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
3956	4578
3957	4579	schedstat_inc(this_rq()->sched_count);
	4580	+}
	4581	+
	4582	+static void put_prev_task_balance(struct rq rq, struct task_struct prev,
	4583	+ struct rq_flags *rf)
	4584	+{
	4585	+#ifdef CONFIG_SMP
	4586	+ const struct sched_class *class;
	4587	+ /*
	4588	+ * We must do the balancing pass before put_prev_task(), such
	4589	+ * that when we release the rq->lock the task is in the same
	4590	+ * state as before we took rq->lock.
	4591	+ *
	4592	+ * We can terminate the balance pass as soon as we know there is
	4593	+ * a runnable task of @class priority or higher.
	4594	+ */
	4595	+ for_class_range(class, prev->sched_class, &idle_sched_class) {
	4596	+ if (class->balance(rq, prev, rf))
	4597	+ break;
	4598	+ }
	4599	+#endif
	4600	+
	4601	+ put_prev_task(rq, prev);
3958	4602	}
3959	4603
3960	4604	/*
..	..	@@ -3972,29 +4616,29 @@
3972	4616	* higher scheduling class, because otherwise those loose the
3973	4617	* opportunity to pull in more work from other CPUs.
3974	4618	*/
3975		- if (likely((prev->sched_class == &idle_sched_class \|\|
3976		- prev->sched_class == &fair_sched_class) &&
	4619	+ if (likely(prev->sched_class <= &fair_sched_class &&
3977	4620	rq->nr_running == rq->cfs.h_nr_running)) {
3978	4621
3979		- p = fair_sched_class.pick_next_task(rq, prev, rf);
	4622	+ p = pick_next_task_fair(rq, prev, rf);
3980	4623	if (unlikely(p == RETRY_TASK))
3981		- goto again;
	4624	+ goto restart;
3982	4625
3983	4626	/* Assumes fair_sched_class->next == idle_sched_class */
3984		- if (unlikely(!p))
3985		- p = idle_sched_class.pick_next_task(rq, prev, rf);
	4627	+ if (!p) {
	4628	+ put_prev_task(rq, prev);
	4629	+ p = pick_next_task_idle(rq);
	4630	+ }
3986	4631
3987	4632	return p;
3988	4633	}
3989	4634
3990		-again:
	4635	+restart:
	4636	+ put_prev_task_balance(rq, prev, rf);
	4637	+
3991	4638	for_each_class(class) {
3992		- p = class->pick_next_task(rq, prev, rf);
3993		- if (p) {
3994		- if (unlikely(p == RETRY_TASK))
3995		- goto again;
	4639	+ p = class->pick_next_task(rq);
	4640	+ if (p)
3996	4641	return p;
3997		- }
3998	4642	}
3999	4643
4000	4644	/* The idle class should always have a runnable task: */
..	..	@@ -4021,7 +4665,7 @@
4021	4665	* task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
4022	4666	* called on the nearest possible occasion:
4023	4667	*
4024		- * - If the kernel is preemptible (CONFIG_PREEMPT=y):
	4668	+ * - If the kernel is preemptible (CONFIG_PREEMPTION=y):
4025	4669	*
4026	4670	* - in syscall or exception context, at the next outmost
4027	4671	* preempt_enable(). (this might be as soon as the wake_up()'s
..	..	@@ -4030,7 +4674,7 @@
4030	4674	* - in IRQ context, return from interrupt-handler to
4031	4675	* preemptible context
4032	4676	*
4033		- * - If the kernel is not preemptible (CONFIG_PREEMPT is not set)
	4677	+ * - If the kernel is not preemptible (CONFIG_PREEMPTION is not set)
4034	4678	* then at the next:
4035	4679	*
4036	4680	* - cond_resched() call
..	..	@@ -4044,6 +4688,7 @@
4044	4688	{
4045	4689	struct task_struct prev, next;
4046	4690	unsigned long *switch_count;
	4691	+ unsigned long prev_state;
4047	4692	struct rq_flags rf;
4048	4693	struct rq *rq;
4049	4694	int cpu;
..	..	@@ -4052,7 +4697,7 @@
4052	4697	rq = cpu_rq(cpu);
4053	4698	prev = rq->curr;
4054	4699
4055		- schedule_debug(prev);
	4700	+ schedule_debug(prev, preempt);
4056	4701
4057	4702	if (sched_feat(HRTICK))
4058	4703	hrtick_clear(rq);
..	..	@@ -4063,9 +4708,16 @@
4063	4708	/*
4064	4709	* Make sure that signal_pending_state()->signal_pending() below
4065	4710	* can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
4066		- * done by the caller to avoid the race with signal_wake_up().
	4711	+ * done by the caller to avoid the race with signal_wake_up():
4067	4712	*
4068		- * The membarrier system call requires a full memory barrier
	4713	+ * __set_current_state(@state) signal_wake_up()
	4714	+ * schedule() set_tsk_thread_flag(p, TIF_SIGPENDING)
	4715	+ * wake_up_state(p, state)
	4716	+ * LOCK rq->lock LOCK p->pi_state
	4717	+ * smp_mb__after_spinlock() smp_mb__after_spinlock()
	4718	+ * if (signal_pending_state()) if (p->state & @state)
	4719	+ *
	4720	+ * Also, the membarrier system call requires a full memory barrier
4069	4721	* after coming from user-space, before storing to rq->curr.
4070	4722	*/
4071	4723	rq_lock(rq, &rf);
..	..	@@ -4076,29 +4728,43 @@
4076	4728	update_rq_clock(rq);
4077	4729
4078	4730	switch_count = &prev->nivcsw;
4079		- if (!preempt && prev->state) {
4080		- if (unlikely(signal_pending_state(prev->state, prev))) {
	4731	+
	4732	+ /*
	4733	+ * We must load prev->state once (task_struct::state is volatile), such
	4734	+ * that:
	4735	+ *
	4736	+ * - we form a control dependency vs deactivate_task() below.
	4737	+ * - ptrace_{,un}freeze_traced() can change ->state underneath us.
	4738	+ */
	4739	+ prev_state = prev->state;
	4740	+ if (!preempt && prev_state) {
	4741	+ if (signal_pending_state(prev_state, prev)) {
4081	4742	prev->state = TASK_RUNNING;
4082	4743	} else {
	4744	+ prev->sched_contributes_to_load =
	4745	+ (prev_state & TASK_UNINTERRUPTIBLE) &&
	4746	+ !(prev_state & TASK_NOLOAD) &&
	4747	+ !(prev->flags & PF_FROZEN);
	4748	+
	4749	+ if (prev->sched_contributes_to_load)
	4750	+ rq->nr_uninterruptible++;
	4751	+
	4752	+ /*
	4753	+ * __schedule() ttwu()
	4754	+ * prev_state = prev->state; if (p->on_rq && ...)
	4755	+ * if (prev_state) goto out;
	4756	+ * p->on_rq = 0; smp_acquire__after_ctrl_dep();
	4757	+ * p->state = TASK_WAKING
	4758	+ *
	4759	+ * Where __schedule() and ttwu() have matching control dependencies.
	4760	+ *
	4761	+ * After this, schedule() must not care about p->state any more.
	4762	+ */
4083	4763	deactivate_task(rq, prev, DEQUEUE_SLEEP \| DEQUEUE_NOCLOCK);
4084		- prev->on_rq = 0;
4085	4764
4086	4765	if (prev->in_iowait) {
4087	4766	atomic_inc(&rq->nr_iowait);
4088	4767	delayacct_blkio_start();
4089		- }
4090		-
4091		- /*
4092		- * If a worker went to sleep, notify and ask workqueue
4093		- * whether it wants to wake up a task to maintain
4094		- * concurrency.
4095		- */
4096		- if (prev->flags & PF_WQ_WORKER) {
4097		- struct task_struct *to_wakeup;
4098		-
4099		- to_wakeup = wq_worker_sleeping(prev);
4100		- if (to_wakeup)
4101		- try_to_wake_up_local(to_wakeup, &rf);
4102	4768	}
4103	4769	}
4104	4770	switch_count = &prev->nvcsw;
..	..	@@ -4108,9 +4774,14 @@
4108	4774	clear_tsk_need_resched(prev);
4109	4775	clear_preempt_need_resched();
4110	4776
	4777	+ trace_android_rvh_schedule(prev, next, rq);
4111	4778	if (likely(prev != next)) {
4112	4779	rq->nr_switches++;
4113		- rq->curr = next;
	4780	+ /*
	4781	+ * RCU users of rcu_dereference(rq->curr) may not see
	4782	+ * changes to task_struct made by pick_next_task().
	4783	+ */
	4784	+ RCU_INIT_POINTER(rq->curr, next);
4114	4785	/*
4115	4786	* The membarrier system call requires each architecture
4116	4787	* to have a full memory barrier after updating
..	..	@@ -4126,6 +4797,8 @@
4126	4797	* is a RELEASE barrier),
4127	4798	*/
4128	4799	++*switch_count;
	4800	+
	4801	+ psi_sched_switch(prev, next, !task_on_rq_queued(prev));
4129	4802
4130	4803	trace_sched_switch(preempt, prev, next);
4131	4804
..	..	@@ -4157,14 +4830,48 @@
4157	4830
4158	4831	static inline void sched_submit_work(struct task_struct *tsk)
4159	4832	{
4160		- if (!tsk->state \|\| tsk_is_pi_blocked(tsk))
	4833	+ unsigned int task_flags;
	4834	+
	4835	+ if (!tsk->state)
4161	4836	return;
	4837	+
	4838	+ task_flags = tsk->flags;
	4839	+ /*
	4840	+ * If a worker went to sleep, notify and ask workqueue whether
	4841	+ * it wants to wake up a task to maintain concurrency.
	4842	+ * As this function is called inside the schedule() context,
	4843	+ * we disable preemption to avoid it calling schedule() again
	4844	+ * in the possible wakeup of a kworker and because wq_worker_sleeping()
	4845	+ * requires it.
	4846	+ */
	4847	+ if (task_flags & (PF_WQ_WORKER \| PF_IO_WORKER)) {
	4848	+ preempt_disable();
	4849	+ if (task_flags & PF_WQ_WORKER)
	4850	+ wq_worker_sleeping(tsk);
	4851	+ else
	4852	+ io_wq_worker_sleeping(tsk);
	4853	+ preempt_enable_no_resched();
	4854	+ }
	4855	+
	4856	+ if (tsk_is_pi_blocked(tsk))
	4857	+ return;
	4858	+
4162	4859	/*
4163	4860	* If we are going to sleep and we have plugged IO queued,
4164	4861	* make sure to submit it to avoid deadlocks.
4165	4862	*/
4166	4863	if (blk_needs_flush_plug(tsk))
4167	4864	blk_schedule_flush_plug(tsk);
	4865	+}
	4866	+
	4867	+static void sched_update_worker(struct task_struct *tsk)
	4868	+{
	4869	+ if (tsk->flags & (PF_WQ_WORKER \| PF_IO_WORKER)) {
	4870	+ if (tsk->flags & PF_WQ_WORKER)
	4871	+ wq_worker_running(tsk);
	4872	+ else
	4873	+ io_wq_worker_running(tsk);
	4874	+ }
4168	4875	}
4169	4876
4170	4877	asmlinkage __visible void __sched schedule(void)
..	..	@@ -4177,6 +4884,7 @@
4177	4884	__schedule(false);
4178	4885	sched_preempt_enable_no_resched();
4179	4886	} while (need_resched());
	4887	+ sched_update_worker(tsk);
4180	4888	}
4181	4889	EXPORT_SYMBOL(schedule);
4182	4890
..	..	@@ -4265,11 +4973,10 @@
4265	4973	} while (need_resched());
4266	4974	}
4267	4975
4268		-#ifdef CONFIG_PREEMPT
	4976	+#ifdef CONFIG_PREEMPTION
4269	4977	/*
4270		- * this is the entry point to schedule() from in-kernel preemption
4271		- * off of preempt_enable. Kernel preemptions off return from interrupt
4272		- * occur there and call schedule directly.
	4978	+ * This is the entry point to schedule() from in-kernel preemption
	4979	+ * off of preempt_enable.
4273	4980	*/
4274	4981	asmlinkage __visible void __sched notrace preempt_schedule(void)
4275	4982	{
..	..	@@ -4337,10 +5044,10 @@
4337	5044	}
4338	5045	EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
4339	5046
4340		-#endif /* CONFIG_PREEMPT */
	5047	+#endif /* CONFIG_PREEMPTION */
4341	5048
4342	5049	/*
4343		- * this is the entry point to schedule() from kernel preemption
	5050	+ * This is the entry point to schedule() from kernel preemption
4344	5051	* off of irq context.
4345	5052	* Note, that this is called and return with irqs disabled. This will
4346	5053	* protect us against recursive calling from irq.
..	..	@@ -4368,9 +5075,22 @@
4368	5075	int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
4369	5076	void *key)
4370	5077	{
4371		- return try_to_wake_up(curr->private, mode, wake_flags, 1);
	5078	+ WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~(WF_SYNC \| WF_ANDROID_VENDOR));
	5079	+ return try_to_wake_up(curr->private, mode, wake_flags);
4372	5080	}
4373	5081	EXPORT_SYMBOL(default_wake_function);
	5082	+
	5083	+static void __setscheduler_prio(struct task_struct *p, int prio)
	5084	+{
	5085	+ if (dl_prio(prio))
	5086	+ p->sched_class = &dl_sched_class;
	5087	+ else if (rt_prio(prio))
	5088	+ p->sched_class = &rt_sched_class;
	5089	+ else
	5090	+ p->sched_class = &fair_sched_class;
	5091	+
	5092	+ p->prio = prio;
	5093	+}
4374	5094
4375	5095	#ifdef CONFIG_RT_MUTEXES
4376	5096
..	..	@@ -4408,6 +5128,7 @@
4408	5128	struct rq_flags rf;
4409	5129	struct rq *rq;
4410	5130
	5131	+ trace_android_rvh_rtmutex_prepare_setprio(p, pi_task);
4411	5132	/* XXX used to be waiter->prio, not waiter->task->prio */
4412	5133	prio = __rt_effective_prio(pi_task, p->normal_prio);
4413	5134
..	..	@@ -4482,31 +5203,29 @@
4482	5203	if (!dl_prio(p->normal_prio) \|\|
4483	5204	(pi_task && dl_prio(pi_task->prio) &&
4484	5205	dl_entity_preempt(&pi_task->dl, &p->dl))) {
4485		- p->dl.dl_boosted = 1;
	5206	+ p->dl.pi_se = pi_task->dl.pi_se;
4486	5207	queue_flag \|= ENQUEUE_REPLENISH;
4487		- } else
4488		- p->dl.dl_boosted = 0;
4489		- p->sched_class = &dl_sched_class;
	5208	+ } else {
	5209	+ p->dl.pi_se = &p->dl;
	5210	+ }
4490	5211	} else if (rt_prio(prio)) {
4491	5212	if (dl_prio(oldprio))
4492		- p->dl.dl_boosted = 0;
	5213	+ p->dl.pi_se = &p->dl;
4493	5214	if (oldprio < prio)
4494	5215	queue_flag \|= ENQUEUE_HEAD;
4495		- p->sched_class = &rt_sched_class;
4496	5216	} else {
4497	5217	if (dl_prio(oldprio))
4498		- p->dl.dl_boosted = 0;
	5218	+ p->dl.pi_se = &p->dl;
4499	5219	if (rt_prio(oldprio))
4500	5220	p->rt.timeout = 0;
4501		- p->sched_class = &fair_sched_class;
4502	5221	}
4503	5222
4504		- p->prio = prio;
	5223	+ __setscheduler_prio(p, prio);
4505	5224
4506	5225	if (queued)
4507	5226	enqueue_task(rq, p, queue_flag);
4508	5227	if (running)
4509		- set_curr_task(rq, p);
	5228	+ set_next_task(rq, p);
4510	5229
4511	5230	check_class_changed(rq, p, prev_class, oldprio);
4512	5231	out_unlock:
..	..	@@ -4526,12 +5245,13 @@
4526	5245
4527	5246	void set_user_nice(struct task_struct *p, long nice)
4528	5247	{
4529		- bool queued, running;
4530		- int old_prio, delta;
	5248	+ bool queued, running, allowed = false;
	5249	+ int old_prio;
4531	5250	struct rq_flags rf;
4532	5251	struct rq *rq;
4533	5252
4534		- if (task_nice(p) == nice \|\| nice < MIN_NICE \|\| nice > MAX_NICE)
	5253	+ trace_android_rvh_set_user_nice(p, &nice, &allowed);
	5254	+ if ((task_nice(p) == nice \|\| nice < MIN_NICE \|\| nice > MAX_NICE) && !allowed)
4535	5255	return;
4536	5256	/*
4537	5257	* We have to be careful, if called from sys_setpriority(),
..	..	@@ -4558,22 +5278,21 @@
4558	5278	put_prev_task(rq, p);
4559	5279
4560	5280	p->static_prio = NICE_TO_PRIO(nice);
4561		- set_load_weight(p, true);
	5281	+ set_load_weight(p);
4562	5282	old_prio = p->prio;
4563	5283	p->prio = effective_prio(p);
4564		- delta = p->prio - old_prio;
4565	5284
4566		- if (queued) {
	5285	+ if (queued)
4567	5286	enqueue_task(rq, p, ENQUEUE_RESTORE \| ENQUEUE_NOCLOCK);
4568		- /*
4569		- * If the task increased its priority or is running and
4570		- * lowered its priority, then reschedule its CPU:
4571		- */
4572		- if (delta < 0 \|\| (delta > 0 && task_running(rq, p)))
4573		- resched_curr(rq);
4574		- }
4575	5287	if (running)
4576		- set_curr_task(rq, p);
	5288	+ set_next_task(rq, p);
	5289	+
	5290	+ /*
	5291	+ * If the task increased its priority or is running and
	5292	+ * lowered its priority, then reschedule its CPU:
	5293	+ */
	5294	+ p->sched_class->prio_changed(rq, p, old_prio);
	5295	+
4577	5296	out_unlock:
4578	5297	task_rq_unlock(rq, p, &rf);
4579	5298	}
..	..	@@ -4658,7 +5377,7 @@
4658	5377	return 0;
4659	5378
4660	5379	#ifdef CONFIG_SMP
4661		- if (!llist_empty(&rq->wake_list))
	5380	+ if (rq->ttwu_pending)
4662	5381	return 0;
4663	5382	#endif
4664	5383
..	..	@@ -4681,6 +5400,7 @@
4681	5400
4682	5401	return 1;
4683	5402	}
	5403	+EXPORT_SYMBOL_GPL(available_idle_cpu);
4684	5404
4685	5405	/**
4686	5406	* idle_task - return the idle task for a given CPU.
..	..	@@ -4732,36 +5452,7 @@
4732	5452	*/
4733	5453	p->rt_priority = attr->sched_priority;
4734	5454	p->normal_prio = normal_prio(p);
4735		- set_load_weight(p, true);
4736		-}
4737		-
4738		-/* Actually do priority change: must hold pi & rq lock. */
4739		-static void __setscheduler(struct rq rq, struct task_struct p,
4740		- const struct sched_attr *attr, bool keep_boost)
4741		-{
4742		- /*
4743		- * If params can't change scheduling class changes aren't allowed
4744		- * either.
4745		- */
4746		- if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)
4747		- return;
4748		-
4749		- __setscheduler_params(p, attr);
4750		-
4751		- /*
4752		- * Keep a potential priority boosting if called from
4753		- * sched_setscheduler().
4754		- */
4755		- p->prio = normal_prio(p);
4756		- if (keep_boost)
4757		- p->prio = rt_effective_prio(p, p->prio);
4758		-
4759		- if (dl_prio(p->prio))
4760		- p->sched_class = &dl_sched_class;
4761		- else if (rt_prio(p->prio))
4762		- p->sched_class = &rt_sched_class;
4763		- else
4764		- p->sched_class = &fair_sched_class;
	5455	+ set_load_weight(p);
4765	5456	}
4766	5457
4767	5458	/*
..	..	@@ -4784,15 +5475,14 @@
4784	5475	const struct sched_attr *attr,
4785	5476	bool user, bool pi)
4786	5477	{
4787		- int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
4788		- MAX_RT_PRIO - 1 - attr->sched_priority;
4789		- int retval, oldprio, oldpolicy = -1, queued, running;
4790		- int new_effective_prio, policy = attr->sched_policy;
	5478	+ int oldpolicy = -1, policy = attr->sched_policy;
	5479	+ int retval, oldprio, newprio, queued, running;
4791	5480	const struct sched_class *prev_class;
4792	5481	struct rq_flags rf;
4793	5482	int reset_on_fork;
4794	5483	int queue_flags = DEQUEUE_SAVE \| DEQUEUE_MOVE \| DEQUEUE_NOCLOCK;
4795	5484	struct rq *rq;
	5485	+ bool cpuset_locked = false;
4796	5486
4797	5487	/* The pi code expects interrupts enabled */
4798	5488	BUG_ON(pi && in_interrupt());
..	..	@@ -4860,7 +5550,7 @@
4860	5550	* Treat SCHED_IDLE as nice 20. Only allow a switch to
4861	5551	* SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
4862	5552	*/
4863		- if (idle_policy(p->policy) && !idle_policy(policy)) {
	5553	+ if (task_has_idle_policy(p) && !idle_policy(policy)) {
4864	5554	if (!can_nice(p, task_nice(p)))
4865	5555	return -EPERM;
4866	5556	}
..	..	@@ -4871,6 +5561,10 @@
4871	5561
4872	5562	/* Normal users shall not reset the sched_reset_on_fork flag: */
4873	5563	if (p->sched_reset_on_fork && !reset_on_fork)
	5564	+ return -EPERM;
	5565	+
	5566	+ /* Can't change util-clamps */
	5567	+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
4874	5568	return -EPERM;
4875	5569	}
4876	5570
..	..	@@ -4891,6 +5585,15 @@
4891	5585	}
4892	5586
4893	5587	/*
	5588	+ * SCHED_DEADLINE bandwidth accounting relies on stable cpusets
	5589	+ * information.
	5590	+ */
	5591	+ if (dl_policy(policy) \|\| dl_policy(p->policy)) {
	5592	+ cpuset_locked = true;
	5593	+ cpuset_lock();
	5594	+ }
	5595	+
	5596	+ /*
4894	5597	* Make sure no PI-waiters arrive (or leave) while we are
4895	5598	* changing the priority of the task:
4896	5599	*
..	..	@@ -4904,8 +5607,8 @@
4904	5607	* Changing the policy of the stop threads its a very bad idea:
4905	5608	*/
4906	5609	if (p == rq->stop) {
4907		- task_rq_unlock(rq, p, &rf);
4908		- return -EINVAL;
	5610	+ retval = -EINVAL;
	5611	+ goto unlock;
4909	5612	}
4910	5613
4911	5614	/*
..	..	@@ -4923,8 +5626,8 @@
4923	5626	goto change;
4924	5627
4925	5628	p->sched_reset_on_fork = reset_on_fork;
4926		- task_rq_unlock(rq, p, &rf);
4927		- return 0;
	5629	+ retval = 0;
	5630	+ goto unlock;
4928	5631	}
4929	5632	change:
4930	5633
..	..	@@ -4937,8 +5640,8 @@
4937	5640	if (rt_bandwidth_enabled() && rt_policy(policy) &&
4938	5641	task_group(p)->rt_bandwidth.rt_runtime == 0 &&
4939	5642	!task_group_is_autogroup(task_group(p))) {
4940		- task_rq_unlock(rq, p, &rf);
4941		- return -EPERM;
	5643	+ retval = -EPERM;
	5644	+ goto unlock;
4942	5645	}
4943	5646	#endif
4944	5647	#ifdef CONFIG_SMP
..	..	@@ -4951,10 +5654,10 @@
4951	5654	* the entire root_domain to become SCHED_DEADLINE. We
4952	5655	* will also fail if there's no bandwidth available.
4953	5656	*/
4954		- if (!cpumask_subset(span, &p->cpus_allowed) \|\|
	5657	+ if (!cpumask_subset(span, p->cpus_ptr) \|\|
4955	5658	rq->rd->dl_bw.bw == 0) {
4956		- task_rq_unlock(rq, p, &rf);
4957		- return -EPERM;
	5659	+ retval = -EPERM;
	5660	+ goto unlock;
4958	5661	}
4959	5662	}
4960	5663	#endif
..	..	@@ -4964,6 +5667,8 @@
4964	5667	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
4965	5668	policy = oldpolicy = -1;
4966	5669	task_rq_unlock(rq, p, &rf);
	5670	+ if (cpuset_locked)
	5671	+ cpuset_unlock();
4967	5672	goto recheck;
4968	5673	}
4969	5674
..	..	@@ -4973,13 +5678,14 @@
4973	5678	* is available.
4974	5679	*/
4975	5680	if ((dl_policy(policy) \|\| dl_task(p)) && sched_dl_overflow(p, policy, attr)) {
4976		- task_rq_unlock(rq, p, &rf);
4977		- return -EBUSY;
	5681	+ retval = -EBUSY;
	5682	+ goto unlock;
4978	5683	}
4979	5684
4980	5685	p->sched_reset_on_fork = reset_on_fork;
4981	5686	oldprio = p->prio;
4982	5687
	5688	+ newprio = __normal_prio(policy, attr->sched_priority, attr->sched_nice);
4983	5689	if (pi) {
4984	5690	/*
4985	5691	* Take priority boosted tasks into account. If the new
..	..	@@ -4988,8 +5694,8 @@
4988	5694	* the runqueue. This will be done when the task deboost
4989	5695	* itself.
4990	5696	*/
4991		- new_effective_prio = rt_effective_prio(p, newprio);
4992		- if (new_effective_prio == oldprio)
	5697	+ newprio = rt_effective_prio(p, newprio);
	5698	+ if (newprio == oldprio)
4993	5699	queue_flags &= ~DEQUEUE_MOVE;
4994	5700	}
4995	5701
..	..	@@ -5002,7 +5708,11 @@
5002	5708
5003	5709	prev_class = p->sched_class;
5004	5710
5005		- __setscheduler(rq, p, attr, pi);
	5711	+ if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
	5712	+ __setscheduler_params(p, attr);
	5713	+ __setscheduler_prio(p, newprio);
	5714	+ trace_android_rvh_setscheduler(p);
	5715	+ }
5006	5716	__setscheduler_uclamp(p, attr);
5007	5717
5008	5718	if (queued) {
..	..	@@ -5016,7 +5726,7 @@
5016	5726	enqueue_task(rq, p, queue_flags);
5017	5727	}
5018	5728	if (running)
5019		- set_curr_task(rq, p);
	5729	+ set_next_task(rq, p);
5020	5730
5021	5731	check_class_changed(rq, p, prev_class, oldprio);
5022	5732
..	..	@@ -5024,14 +5734,23 @@
5024	5734	preempt_disable();
5025	5735	task_rq_unlock(rq, p, &rf);
5026	5736
5027		- if (pi)
	5737	+ if (pi) {
	5738	+ if (cpuset_locked)
	5739	+ cpuset_unlock();
5028	5740	rt_mutex_adjust_pi(p);
	5741	+ }
5029	5742
5030	5743	/* Run balance callbacks after we've adjusted the PI chain: */
5031	5744	balance_callback(rq);
5032	5745	preempt_enable();
5033	5746
5034	5747	return 0;
	5748	+
	5749	+unlock:
	5750	+ task_rq_unlock(rq, p, &rf);
	5751	+ if (cpuset_locked)
	5752	+ cpuset_unlock();
	5753	+ return retval;
5035	5754	}
5036	5755
5037	5756	static int _sched_setscheduler(struct task_struct *p, int policy,
..	..	@@ -5043,6 +5762,14 @@
5043	5762	.sched_nice = PRIO_TO_NICE(p->static_prio),
5044	5763	};
5045	5764
	5765	+ if (IS_ENABLED(CONFIG_ROCKCHIP_OPTIMIZE_RT_PRIO) &&
	5766	+ ((policy == SCHED_FIFO) \|\| (policy == SCHED_RR))) {
	5767	+ attr.sched_priority /= 2;
	5768	+ if (!check)
	5769	+ attr.sched_priority += MAX_RT_PRIO / 2;
	5770	+ if (!attr.sched_priority)
	5771	+ attr.sched_priority = 1;
	5772	+ }
5046	5773	/* Fixup the legacy SCHED_RESET_ON_FORK hack. */
5047	5774	if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) {
5048	5775	attr.sched_flags \|= SCHED_FLAG_RESET_ON_FORK;
..	..	@@ -5057,6 +5784,8 @@
5057	5784	* @p: the task in question.
5058	5785	* @policy: new policy.
5059	5786	* @param: structure containing the new RT priority.
	5787	+ *
	5788	+ * Use sched_set_fifo(), read its comment.
5060	5789	*
5061	5790	* Return: 0 on success. An error code otherwise.
5062	5791	*
..	..	@@ -5079,6 +5808,7 @@
5079	5808	{
5080	5809	return __sched_setscheduler(p, attr, false, true);
5081	5810	}
	5811	+EXPORT_SYMBOL_GPL(sched_setattr_nocheck);
5082	5812
5083	5813	/**
5084	5814	* sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
..	..	@@ -5099,6 +5829,51 @@
5099	5829	return _sched_setscheduler(p, policy, param, false);
5100	5830	}
5101	5831	EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck);
	5832	+
	5833	+/*
	5834	+ * SCHED_FIFO is a broken scheduler model; that is, it is fundamentally
	5835	+ * incapable of resource management, which is the one thing an OS really should
	5836	+ * be doing.
	5837	+ *
	5838	+ * This is of course the reason it is limited to privileged users only.
	5839	+ *
	5840	+ * Worse still; it is fundamentally impossible to compose static priority
	5841	+ * workloads. You cannot take two correctly working static prio workloads
	5842	+ * and smash them together and still expect them to work.
	5843	+ *
	5844	+ * For this reason 'all' FIFO tasks the kernel creates are basically at:
	5845	+ *
	5846	+ * MAX_RT_PRIO / 2
	5847	+ *
	5848	+ * The administrator _MUST_ configure the system, the kernel simply doesn't
	5849	+ * know enough information to make a sensible choice.
	5850	+ */
	5851	+void sched_set_fifo(struct task_struct *p)
	5852	+{
	5853	+ struct sched_param sp = { .sched_priority = MAX_RT_PRIO / 2 };
	5854	+ WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0);
	5855	+}
	5856	+EXPORT_SYMBOL_GPL(sched_set_fifo);
	5857	+
	5858	+/*
	5859	+ * For when you don't much care about FIFO, but want to be above SCHED_NORMAL.
	5860	+ */
	5861	+void sched_set_fifo_low(struct task_struct *p)
	5862	+{
	5863	+ struct sched_param sp = { .sched_priority = 1 };
	5864	+ WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0);
	5865	+}
	5866	+EXPORT_SYMBOL_GPL(sched_set_fifo_low);
	5867	+
	5868	+void sched_set_normal(struct task_struct *p, int nice)
	5869	+{
	5870	+ struct sched_attr attr = {
	5871	+ .sched_policy = SCHED_NORMAL,
	5872	+ .sched_nice = nice,
	5873	+ };
	5874	+ WARN_ON_ONCE(sched_setattr_nocheck(p, &attr) != 0);
	5875	+}
	5876	+EXPORT_SYMBOL_GPL(sched_set_normal);
5102	5877
5103	5878	static int
5104	5879	do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
..	..	@@ -5130,9 +5905,6 @@
5130	5905	u32 size;
5131	5906	int ret;
5132	5907
5133		- if (!access_ok(VERIFY_WRITE, uattr, SCHED_ATTR_SIZE_VER0))
5134		- return -EFAULT;
5135		-
5136	5908	/* Zero the full structure, so that a short copy will be nice: */
5137	5909	memset(attr, 0, sizeof(*attr));
5138	5910
..	..	@@ -5140,44 +5912,18 @@
5140	5912	if (ret)
5141	5913	return ret;
5142	5914
5143		- /* Bail out on silly large: */
5144		- if (size > PAGE_SIZE)
5145		- goto err_size;
5146		-
5147	5915	/* ABI compatibility quirk: */
5148	5916	if (!size)
5149	5917	size = SCHED_ATTR_SIZE_VER0;
5150		-
5151		- if (size < SCHED_ATTR_SIZE_VER0)
	5918	+ if (size < SCHED_ATTR_SIZE_VER0 \|\| size > PAGE_SIZE)
5152	5919	goto err_size;
5153	5920
5154		- /*
5155		- * If we're handed a bigger struct than we know of,
5156		- * ensure all the unknown bits are 0 - i.e. new
5157		- * user-space does not rely on any kernel feature
5158		- * extensions we dont know about yet.
5159		- */
5160		- if (size > sizeof(*attr)) {
5161		- unsigned char __user *addr;
5162		- unsigned char __user *end;
5163		- unsigned char val;
5164		-
5165		- addr = (void __user )uattr + sizeof(attr);
5166		- end = (void __user *)uattr + size;
5167		-
5168		- for (; addr < end; addr++) {
5169		- ret = get_user(val, addr);
5170		- if (ret)
5171		- return ret;
5172		- if (val)
5173		- goto err_size;
5174		- }
5175		- size = sizeof(*attr);
	5921	+ ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
	5922	+ if (ret) {
	5923	+ if (ret == -E2BIG)
	5924	+ goto err_size;
	5925	+ return ret;
5176	5926	}
5177		-
5178		- ret = copy_from_user(attr, uattr, size);
5179		- if (ret)
5180		- return -EFAULT;
5181	5927
5182	5928	if ((attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) &&
5183	5929	size < SCHED_ATTR_SIZE_VER1)
..	..	@@ -5194,6 +5940,16 @@
5194	5940	err_size:
5195	5941	put_user(sizeof(*attr), &uattr->size);
5196	5942	return -E2BIG;
	5943	+}
	5944	+
	5945	+static void get_params(struct task_struct p, struct sched_attr attr)
	5946	+{
	5947	+ if (task_has_dl_policy(p))
	5948	+ __getparam_dl(p, attr);
	5949	+ else if (task_has_rt_policy(p))
	5950	+ attr->sched_priority = p->rt_priority;
	5951	+ else
	5952	+ attr->sched_nice = task_nice(p);
5197	5953	}
5198	5954
5199	5955	/**
..	..	@@ -5257,6 +6013,8 @@
5257	6013	rcu_read_unlock();
5258	6014
5259	6015	if (likely(p)) {
	6016	+ if (attr.sched_flags & SCHED_FLAG_KEEP_PARAMS)
	6017	+ get_params(p, &attr);
5260	6018	retval = sched_setattr(p, &attr);
5261	6019	put_task_struct(p);
5262	6020	}
..	..	@@ -5350,7 +6108,7 @@
5350	6108	{
5351	6109	unsigned int ksize = sizeof(*kattr);
5352	6110
5353		- if (!access_ok(VERIFY_WRITE, uattr, usize))
	6111	+ if (!access_ok(uattr, usize))
5354	6112	return -EFAULT;
5355	6113
5356	6114	/*
..	..	@@ -5378,7 +6136,7 @@
5378	6136	* sys_sched_getattr - similar to sched_getparam, but with sched_attr
5379	6137	* @pid: the pid in question.
5380	6138	* @uattr: structure containing the extended parameters.
5381		- * @usize: sizeof(attr) that user-space knows about, for forwards and backwards compatibility.
	6139	+ * @usize: sizeof(attr) for fwd/bwd comp.
5382	6140	* @flags: for future extension.
5383	6141	*/
5384	6142	SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
..	..	@@ -5405,14 +6163,15 @@
5405	6163	kattr.sched_policy = p->policy;
5406	6164	if (p->sched_reset_on_fork)
5407	6165	kattr.sched_flags \|= SCHED_FLAG_RESET_ON_FORK;
5408		- if (task_has_dl_policy(p))
5409		- __getparam_dl(p, &kattr);
5410		- else if (task_has_rt_policy(p))
5411		- kattr.sched_priority = p->rt_priority;
5412		- else
5413		- kattr.sched_nice = task_nice(p);
	6166	+ get_params(p, &kattr);
	6167	+ kattr.sched_flags &= SCHED_FLAG_ALL;
5414	6168
5415	6169	#ifdef CONFIG_UCLAMP_TASK
	6170	+ /*
	6171	+ * This could race with another potential updater, but this is fine
	6172	+ * because it'll correctly read the old or the new value. We don't need
	6173	+ * to guarantee who wins the race as long as it doesn't return garbage.
	6174	+ */
5416	6175	kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value;
5417	6176	kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
5418	6177	#endif
..	..	@@ -5431,6 +6190,7 @@
5431	6190	cpumask_var_t cpus_allowed, new_mask;
5432	6191	struct task_struct *p;
5433	6192	int retval;
	6193	+ int skip = 0;
5434	6194
5435	6195	rcu_read_lock();
5436	6196
..	..	@@ -5466,6 +6226,9 @@
5466	6226	rcu_read_unlock();
5467	6227	}
5468	6228
	6229	+ trace_android_vh_sched_setaffinity_early(p, in_mask, &skip);
	6230	+ if (skip)
	6231	+ goto out_free_new_mask;
5469	6232	retval = security_task_setscheduler(p);
5470	6233	if (retval)
5471	6234	goto out_free_new_mask;
..	..	@@ -5506,6 +6269,9 @@
5506	6269	goto again;
5507	6270	}
5508	6271	}
	6272	+
	6273	+ trace_android_rvh_sched_setaffinity(p, in_mask, &retval);
	6274	+
5509	6275	out_free_new_mask:
5510	6276	free_cpumask_var(new_mask);
5511	6277	out_free_cpus_allowed:
..	..	@@ -5514,7 +6280,6 @@
5514	6280	put_task_struct(p);
5515	6281	return retval;
5516	6282	}
5517		-EXPORT_SYMBOL_GPL(sched_setaffinity);
5518	6283
5519	6284	static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
5520	6285	struct cpumask *new_mask)
..	..	@@ -5569,7 +6334,7 @@
5569	6334	goto out_unlock;
5570	6335
5571	6336	raw_spin_lock_irqsave(&p->pi_lock, flags);
5572		- cpumask_and(mask, &p->cpus_allowed, cpu_active_mask);
	6337	+ cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
5573	6338	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5574	6339
5575	6340	out_unlock:
..	..	@@ -5598,14 +6363,14 @@
5598	6363	if (len & (sizeof(unsigned long)-1))
5599	6364	return -EINVAL;
5600	6365
5601		- if (!alloc_cpumask_var(&mask, GFP_KERNEL))
	6366	+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
5602	6367	return -ENOMEM;
5603	6368
5604	6369	ret = sched_getaffinity(pid, mask);
5605	6370	if (ret == 0) {
5606	6371	unsigned int retlen = min(len, cpumask_size());
5607	6372
5608		- if (copy_to_user(user_mask_ptr, mask, retlen))
	6373	+ if (copy_to_user(user_mask_ptr, cpumask_bits(mask), retlen))
5609	6374	ret = -EFAULT;
5610	6375	else
5611	6376	ret = retlen;
..	..	@@ -5633,6 +6398,8 @@
5633	6398	schedstat_inc(rq->yld_count);
5634	6399	current->sched_class->yield_task(rq);
5635	6400
	6401	+ trace_android_rvh_do_sched_yield(rq);
	6402	+
5636	6403	preempt_disable();
5637	6404	rq_unlock_irq(rq, &rf);
5638	6405	sched_preempt_enable_no_resched();
..	..	@@ -5646,7 +6413,7 @@
5646	6413	return 0;
5647	6414	}
5648	6415
5649		-#ifndef CONFIG_PREEMPT
	6416	+#ifndef CONFIG_PREEMPTION
5650	6417	int __sched _cond_resched(void)
5651	6418	{
5652	6419	if (should_resched(0)) {
..	..	@@ -5663,7 +6430,7 @@
5663	6430	* __cond_resched_lock() - if a reschedule is pending, drop the given lock,
5664	6431	* call schedule, and on return reacquire the lock.
5665	6432	*
5666		- * This works OK both with and without CONFIG_PREEMPT. We do strange low-level
	6433	+ * This works OK both with and without CONFIG_PREEMPTION. We do strange low-level
5667	6434	* operations here to prevent schedule() from being called twice (once via
5668	6435	* spin_unlock(), once by hand).
5669	6436	*/
..	..	@@ -5767,7 +6534,7 @@
5767	6534	if (task_running(p_rq, p) \|\| p->state)
5768	6535	goto out_unlock;
5769	6536
5770		- yielded = curr->sched_class->yield_to_task(rq, p, preempt);
	6537	+ yielded = curr->sched_class->yield_to_task(rq, p);
5771	6538	if (yielded) {
5772	6539	schedstat_inc(rq->yld_count);
5773	6540	/*
..	..	@@ -5933,7 +6700,7 @@
5933	6700	* an error code.
5934	6701	*/
5935	6702	SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
5936		- struct timespec __user *, interval)
	6703	+ struct __kernel_timespec __user *, interval)
5937	6704	{
5938	6705	struct timespec64 t;
5939	6706	int retval = sched_rr_get_interval(pid, &t);
..	..	@@ -5944,16 +6711,15 @@
5944	6711	return retval;
5945	6712	}
5946	6713
5947		-#ifdef CONFIG_COMPAT
5948		-COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval,
5949		- compat_pid_t, pid,
5950		- struct compat_timespec __user *, interval)
	6714	+#ifdef CONFIG_COMPAT_32BIT_TIME
	6715	+SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid,
	6716	+ struct old_timespec32 __user *, interval)
5951	6717	{
5952	6718	struct timespec64 t;
5953	6719	int retval = sched_rr_get_interval(pid, &t);
5954	6720
5955	6721	if (retval == 0)
5956		- retval = compat_put_timespec64(&t, interval);
	6722	+ retval = put_old_timespec32(&t, interval);
5957	6723	return retval;
5958	6724	}
5959	6725	#endif
..	..	@@ -5966,10 +6732,10 @@
5966	6732	if (!try_get_task_stack(p))
5967	6733	return;
5968	6734
5969		- printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p));
	6735	+ pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p));
5970	6736
5971	6737	if (p->state == TASK_RUNNING)
5972		- printk(KERN_CONT " running task ");
	6738	+ pr_cont(" running task ");
5973	6739	#ifdef CONFIG_DEBUG_STACK_USAGE
5974	6740	free = stack_not_used(p);
5975	6741	#endif
..	..	@@ -5978,12 +6744,13 @@
5978	6744	if (pid_alive(p))
5979	6745	ppid = task_pid_nr(rcu_dereference(p->real_parent));
5980	6746	rcu_read_unlock();
5981		- printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
5982		- task_pid_nr(p), ppid,
	6747	+ pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n",
	6748	+ free, task_pid_nr(p), ppid,
5983	6749	(unsigned long)task_thread_info(p)->flags);
5984	6750
5985	6751	print_worker_info(KERN_INFO, p);
5986		- show_stack(p, NULL);
	6752	+ trace_android_vh_sched_show_task(p);
	6753	+ show_stack(p, NULL, KERN_INFO);
5987	6754	put_task_stack(p);
5988	6755	}
5989	6756	EXPORT_SYMBOL_GPL(sched_show_task);
..	..	@@ -6014,13 +6781,6 @@
6014	6781	{
6015	6782	struct task_struct g, p;
6016	6783
6017		-#if BITS_PER_LONG == 32
6018		- printk(KERN_INFO
6019		- " task PC stack pid father\n");
6020		-#else
6021		- printk(KERN_INFO
6022		- " task PC stack pid father\n");
6023		-#endif
6024	6784	rcu_read_lock();
6025	6785	for_each_process_thread(g, p) {
6026	6786	/*
..	..	@@ -6056,7 +6816,7 @@
6056	6816	* NOTE: this function does not set the idle thread's NEED_RESCHED
6057	6817	* flag, to make booting more robust.
6058	6818	*/
6059		-void init_idle(struct task_struct *idle, int cpu)
	6819	+void __init init_idle(struct task_struct *idle, int cpu)
6060	6820	{
6061	6821	struct rq *rq = cpu_rq(cpu);
6062	6822	unsigned long flags;
..	..	@@ -6069,9 +6829,6 @@
6069	6829	idle->state = TASK_RUNNING;
6070	6830	idle->se.exec_start = sched_clock();
6071	6831	idle->flags \|= PF_IDLE;
6072		-
6073		- scs_task_reset(idle);
6074		- kasan_unpoison_task_stack(idle);
6075	6832
6076	6833	#ifdef CONFIG_SMP
6077	6834	/*
..	..	@@ -6096,7 +6853,8 @@
6096	6853	__set_task_cpu(idle, cpu);
6097	6854	rcu_read_unlock();
6098	6855
6099		- rq->curr = rq->idle = idle;
	6856	+ rq->idle = idle;
	6857	+ rcu_assign_pointer(rq->curr, idle);
6100	6858	idle->on_rq = TASK_ON_RQ_QUEUED;
6101	6859	#ifdef CONFIG_SMP
6102	6860	idle->on_cpu = 1;
..	..	@@ -6133,8 +6891,7 @@
6133	6891	return ret;
6134	6892	}
6135	6893
6136		-int task_can_attach(struct task_struct *p,
6137		- const struct cpumask *cs_cpus_allowed)
	6894	+int task_can_attach(struct task_struct *p)
6138	6895	{
6139	6896	int ret = 0;
6140	6897
..	..	@@ -6145,18 +6902,11 @@
6145	6902	* allowed nodes is unnecessary. Thus, cpusets are not
6146	6903	* applicable for such threads. This prevents checking for
6147	6904	* success of set_cpus_allowed_ptr() on all attached tasks
6148		- * before cpus_allowed may be changed.
	6905	+ * before cpus_mask may be changed.
6149	6906	*/
6150		- if (p->flags & PF_NO_SETAFFINITY) {
	6907	+ if (p->flags & PF_NO_SETAFFINITY)
6151	6908	ret = -EINVAL;
6152		- goto out;
6153		- }
6154	6909
6155		- if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
6156		- cs_cpus_allowed))
6157		- ret = dl_task_can_attach(p, cs_cpus_allowed);
6158		-
6159		-out:
6160	6910	return ret;
6161	6911	}
6162	6912
..	..	@@ -6172,7 +6922,7 @@
6172	6922	if (curr_cpu == target_cpu)
6173	6923	return 0;
6174	6924
6175		- if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed))
	6925	+ if (!cpumask_test_cpu(target_cpu, p->cpus_ptr))
6176	6926	return -EINVAL;
6177	6927
6178	6928	/* TODO: This is not properly updating schedstats */
..	..	@@ -6205,7 +6955,7 @@
6205	6955	if (queued)
6206	6956	enqueue_task(rq, p, ENQUEUE_RESTORE \| ENQUEUE_NOCLOCK);
6207	6957	if (running)
6208		- set_curr_task(rq, p);
	6958	+ set_next_task(rq, p);
6209	6959	task_rq_unlock(rq, p, &rf);
6210	6960	}
6211	6961	#endif /* CONFIG_NUMA_BALANCING */
..	..	@@ -6246,21 +6996,22 @@
6246	6996	atomic_long_add(delta, &calc_load_tasks);
6247	6997	}
6248	6998
6249		-static void put_prev_task_fake(struct rq rq, struct task_struct prev)
	6999	+static struct task_struct __pick_migrate_task(struct rq rq)
6250	7000	{
	7001	+ const struct sched_class *class;
	7002	+ struct task_struct *next;
	7003	+
	7004	+ for_each_class(class) {
	7005	+ next = class->pick_next_task(rq);
	7006	+ if (next) {
	7007	+ next->sched_class->put_prev_task(rq, next);
	7008	+ return next;
	7009	+ }
	7010	+ }
	7011	+
	7012	+ /* The idle class should always have a runnable task */
	7013	+ BUG();
6251	7014	}
6252		-
6253		-static const struct sched_class fake_sched_class = {
6254		- .put_prev_task = put_prev_task_fake,
6255		-};
6256		-
6257		-static struct task_struct fake_task = {
6258		- /*
6259		- * Avoid pull_{rt,dl}_task()
6260		- */
6261		- .prio = MAX_PRIO + 1,
6262		- .sched_class = &fake_sched_class,
6263		-};
6264	7015
6265	7016	/*
6266	7017	* Migrate all tasks from the rq, sleeping tasks will be migrated by
..	..	@@ -6269,11 +7020,14 @@
6269	7020	* Called with rq->lock held even though we'er in stop_machine() and
6270	7021	* there's no concurrency possible, we hold the required locks anyway
6271	7022	* because of lock validation efforts.
	7023	+ *
	7024	+ * force: if false, the function will skip CPU pinned kthreads.
6272	7025	*/
6273		-static void migrate_tasks(struct rq dead_rq, struct rq_flags rf)
	7026	+static void migrate_tasks(struct rq dead_rq, struct rq_flags rf, bool force)
6274	7027	{
6275	7028	struct rq *rq = dead_rq;
6276		- struct task_struct next, stop = rq->stop;
	7029	+ struct task_struct next, tmp, *stop = rq->stop;
	7030	+ LIST_HEAD(percpu_kthreads);
6277	7031	struct rq_flags orf = *rf;
6278	7032	int dest_cpu;
6279	7033
..	..	@@ -6295,6 +7049,11 @@
6295	7049	*/
6296	7050	update_rq_clock(rq);
6297	7051
	7052	+#ifdef CONFIG_SCHED_DEBUG
	7053	+ /* note the clock update in orf */
	7054	+ orf.clock_update_flags \|= RQCF_UPDATED;
	7055	+#endif
	7056	+
6298	7057	for (;;) {
6299	7058	/*
6300	7059	* There's this thread running, bail when that's the only
..	..	@@ -6303,15 +7062,24 @@
6303	7062	if (rq->nr_running == 1)
6304	7063	break;
6305	7064
6306		- /*
6307		- * pick_next_task() assumes pinned rq->lock:
6308		- */
6309		- next = pick_next_task(rq, &fake_task, rf);
6310		- BUG_ON(!next);
6311		- put_prev_task(rq, next);
	7065	+ next = __pick_migrate_task(rq);
6312	7066
6313	7067	/*
6314		- * Rules for changing task_struct::cpus_allowed are holding
	7068	+ * Argh ... no iterator for tasks, we need to remove the
	7069	+ * kthread from the run-queue to continue.
	7070	+ */
	7071	+ if (!force && is_per_cpu_kthread(next)) {
	7072	+ INIT_LIST_HEAD(&next->percpu_kthread_node);
	7073	+ list_add(&next->percpu_kthread_node, &percpu_kthreads);
	7074	+
	7075	+ /* DEQUEUE_SAVE not used due to move_entity in rt */
	7076	+ deactivate_task(rq, next,
	7077	+ DEQUEUE_NOCLOCK);
	7078	+ continue;
	7079	+ }
	7080	+
	7081	+ /*
	7082	+ * Rules for changing task_struct::cpus_mask are holding
6315	7083	* both pi_lock and rq->lock, such that holding either
6316	7084	* stabilizes the mask.
6317	7085	*
..	..	@@ -6328,7 +7096,14 @@
6328	7096	* changed the task, WARN if weird stuff happened, because in
6329	7097	* that case the above rq->lock drop is a fail too.
6330	7098	*/
6331		- if (WARN_ON(task_rq(next) != rq \|\| !task_on_rq_queued(next))) {
	7099	+ if (task_rq(next) != rq \|\| !task_on_rq_queued(next)) {
	7100	+ /*
	7101	+ * In the !force case, there is a hole between
	7102	+ * rq_unlock() and rq_relock(), where another CPU might
	7103	+ * not observe an up to date cpu_active_mask and try to
	7104	+ * move tasks around.
	7105	+ */
	7106	+ WARN_ON(force);
6332	7107	raw_spin_unlock(&next->pi_lock);
6333	7108	continue;
6334	7109	}
..	..	@@ -6345,7 +7120,49 @@
6345	7120	raw_spin_unlock(&next->pi_lock);
6346	7121	}
6347	7122
	7123	+ list_for_each_entry_safe(next, tmp, &percpu_kthreads,
	7124	+ percpu_kthread_node) {
	7125	+
	7126	+ /* ENQUEUE_RESTORE not used due to move_entity in rt */
	7127	+ activate_task(rq, next, ENQUEUE_NOCLOCK);
	7128	+ list_del(&next->percpu_kthread_node);
	7129	+ }
	7130	+
6348	7131	rq->stop = stop;
	7132	+}
	7133	+
	7134	+static int drain_rq_cpu_stop(void *data)
	7135	+{
	7136	+ struct rq *rq = this_rq();
	7137	+ struct rq_flags rf;
	7138	+
	7139	+ rq_lock_irqsave(rq, &rf);
	7140	+ migrate_tasks(rq, &rf, false);
	7141	+ rq_unlock_irqrestore(rq, &rf);
	7142	+
	7143	+ return 0;
	7144	+}
	7145	+
	7146	+int sched_cpu_drain_rq(unsigned int cpu)
	7147	+{
	7148	+ struct cpu_stop_work *rq_drain = &(cpu_rq(cpu)->drain);
	7149	+ struct cpu_stop_done *rq_drain_done = &(cpu_rq(cpu)->drain_done);
	7150	+
	7151	+ if (idle_cpu(cpu)) {
	7152	+ rq_drain->done = NULL;
	7153	+ return 0;
	7154	+ }
	7155	+
	7156	+ return stop_one_cpu_async(cpu, drain_rq_cpu_stop, NULL, rq_drain,
	7157	+ rq_drain_done);
	7158	+}
	7159	+
	7160	+void sched_cpu_drain_rq_wait(unsigned int cpu)
	7161	+{
	7162	+ struct cpu_stop_work *rq_drain = &(cpu_rq(cpu)->drain);
	7163	+
	7164	+ if (rq_drain->done)
	7165	+ cpu_stop_work_wait(rq_drain);
6349	7166	}
6350	7167	#endif /* CONFIG_HOTPLUG_CPU */
6351	7168
..	..	@@ -6417,8 +7234,10 @@
6417	7234	static int cpuset_cpu_inactive(unsigned int cpu)
6418	7235	{
6419	7236	if (!cpuhp_tasks_frozen) {
6420		- if (dl_cpu_busy(cpu))
6421		- return -EBUSY;
	7237	+ int ret = dl_bw_check_overflow(cpu);
	7238	+
	7239	+ if (ret)
	7240	+ return ret;
6422	7241	cpuset_update_active_cpus();
6423	7242	} else {
6424	7243	num_cpus_frozen++;
..	..	@@ -6467,19 +7286,27 @@
6467	7286	return 0;
6468	7287	}
6469	7288
6470		-int sched_cpu_deactivate(unsigned int cpu)
	7289	+int sched_cpus_activate(struct cpumask *cpus)
	7290	+{
	7291	+ unsigned int cpu;
	7292	+
	7293	+ for_each_cpu(cpu, cpus) {
	7294	+ if (sched_cpu_activate(cpu)) {
	7295	+ for_each_cpu_and(cpu, cpus, cpu_active_mask)
	7296	+ sched_cpu_deactivate(cpu);
	7297	+
	7298	+ return -EBUSY;
	7299	+ }
	7300	+ }
	7301	+
	7302	+ return 0;
	7303	+}
	7304	+
	7305	+int _sched_cpu_deactivate(unsigned int cpu)
6471	7306	{
6472	7307	int ret;
6473	7308
6474	7309	set_cpu_active(cpu, false);
6475		- /*
6476		- * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
6477		- * users of this state to go away such that all new such users will
6478		- * observe it.
6479		- *
6480		- * Do sync before park smpboot threads to take care the rcu boost case.
6481		- */
6482		- synchronize_rcu_mult(call_rcu, call_rcu_sched);
6483	7310
6484	7311	#ifdef CONFIG_SCHED_SMT
6485	7312	/*
..	..	@@ -6498,6 +7325,46 @@
6498	7325	return ret;
6499	7326	}
6500	7327	sched_domains_numa_masks_clear(cpu);
	7328	+
	7329	+ update_max_interval();
	7330	+
	7331	+ return 0;
	7332	+}
	7333	+
	7334	+int sched_cpu_deactivate(unsigned int cpu)
	7335	+{
	7336	+ int ret = _sched_cpu_deactivate(cpu);
	7337	+
	7338	+ if (ret)
	7339	+ return ret;
	7340	+
	7341	+ /*
	7342	+ * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
	7343	+ * users of this state to go away such that all new such users will
	7344	+ * observe it.
	7345	+ *
	7346	+ * Do sync before park smpboot threads to take care the rcu boost case.
	7347	+ */
	7348	+ synchronize_rcu();
	7349	+
	7350	+ return 0;
	7351	+}
	7352	+
	7353	+int sched_cpus_deactivate_nosync(struct cpumask *cpus)
	7354	+{
	7355	+ unsigned int cpu;
	7356	+
	7357	+ for_each_cpu(cpu, cpus) {
	7358	+ if (_sched_cpu_deactivate(cpu)) {
	7359	+ for_each_cpu(cpu, cpus) {
	7360	+ if (!cpu_active(cpu))
	7361	+ sched_cpu_activate(cpu);
	7362	+ }
	7363	+
	7364	+ return -EBUSY;
	7365	+ }
	7366	+ }
	7367	+
6501	7368	return 0;
6502	7369	}
6503	7370
..	..	@@ -6506,13 +7373,13 @@
6506	7373	struct rq *rq = cpu_rq(cpu);
6507	7374
6508	7375	rq->calc_load_update = calc_load_update;
6509		- update_max_interval();
6510	7376	}
6511	7377
6512	7378	int sched_cpu_starting(unsigned int cpu)
6513	7379	{
6514	7380	sched_rq_cpu_starting(cpu);
6515	7381	sched_tick_start(cpu);
	7382	+ trace_android_rvh_sched_cpu_starting(cpu);
6516	7383	return 0;
6517	7384	}
6518	7385
..	..	@@ -6523,7 +7390,6 @@
6523	7390	struct rq_flags rf;
6524	7391
6525	7392	/* Handle pending wakeups and then migrate everything off */
6526		- sched_ttwu_pending();
6527	7393	sched_tick_stop(cpu);
6528	7394
6529	7395	rq_lock_irqsave(rq, &rf);
..	..	@@ -6531,12 +7397,13 @@
6531	7397	BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
6532	7398	set_rq_offline(rq);
6533	7399	}
6534		- migrate_tasks(rq, &rf);
	7400	+ migrate_tasks(rq, &rf, true);
6535	7401	BUG_ON(rq->nr_running != 1);
6536	7402	rq_unlock_irqrestore(rq, &rf);
6537	7403
	7404	+ trace_android_rvh_sched_cpu_dying(cpu);
	7405	+
6538	7406	calc_load_migrate(rq);
6539		- update_max_interval();
6540	7407	nohz_balance_exit_idle(rq);
6541	7408	hrtick_clear(rq);
6542	7409	return 0;
..	..	@@ -6550,18 +7417,16 @@
6550	7417	/*
6551	7418	* There's no userspace yet to cause hotplug operations; hence all the
6552	7419	* CPU masks are stable and all blatant races in the below code cannot
6553		- * happen. The hotplug lock is nevertheless taken to satisfy lockdep,
6554		- * but there won't be any contention on it.
	7420	+ * happen.
6555	7421	*/
6556		- cpus_read_lock();
6557	7422	mutex_lock(&sched_domains_mutex);
6558	7423	sched_init_domains(cpu_active_mask);
6559	7424	mutex_unlock(&sched_domains_mutex);
6560		- cpus_read_unlock();
6561	7425
6562	7426	/* Move init over to a non-isolated CPU */
6563	7427	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
6564	7428	BUG();
	7429	+
6565	7430	sched_init_granularity();
6566	7431
6567	7432	init_sched_rt_class();
..	..	@@ -6572,7 +7437,7 @@
6572	7437
6573	7438	static int __init migration_init(void)
6574	7439	{
6575		- sched_rq_cpu_starting(smp_processor_id());
	7440	+ sched_cpu_starting(smp_processor_id());
6576	7441	return 0;
6577	7442	}
6578	7443	early_initcall(migration_init);
..	..	@@ -6597,7 +7462,9 @@
6597	7462	* Every task in system belongs to this group at bootup.
6598	7463	*/
6599	7464	struct task_group root_task_group;
	7465	+EXPORT_SYMBOL_GPL(root_task_group);
6600	7466	LIST_HEAD(task_groups);
	7467	+EXPORT_SYMBOL_GPL(task_groups);
6601	7468
6602	7469	/* Cacheline aligned slab cache for task_group */
6603	7470	static struct kmem_cache *task_group_cache __read_mostly;
..	..	@@ -6608,19 +7475,27 @@
6608	7475
6609	7476	void __init sched_init(void)
6610	7477	{
6611		- int i, j;
6612		- unsigned long alloc_size = 0, ptr;
	7478	+ unsigned long ptr = 0;
	7479	+ int i;
	7480	+
	7481	+ /* Make sure the linker didn't screw up */
	7482	+ BUG_ON(&idle_sched_class + 1 != &fair_sched_class \|\|
	7483	+ &fair_sched_class + 1 != &rt_sched_class \|\|
	7484	+ &rt_sched_class + 1 != &dl_sched_class);
	7485	+#ifdef CONFIG_SMP
	7486	+ BUG_ON(&dl_sched_class + 1 != &stop_sched_class);
	7487	+#endif
6613	7488
6614	7489	wait_bit_init();
6615	7490
6616	7491	#ifdef CONFIG_FAIR_GROUP_SCHED
6617		- alloc_size += 2 * nr_cpu_ids * sizeof(void **);
	7492	+ ptr += 2 * nr_cpu_ids * sizeof(void **);
6618	7493	#endif
6619	7494	#ifdef CONFIG_RT_GROUP_SCHED
6620		- alloc_size += 2 * nr_cpu_ids * sizeof(void **);
	7495	+ ptr += 2 * nr_cpu_ids * sizeof(void **);
6621	7496	#endif
6622		- if (alloc_size) {
6623		- ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
	7497	+ if (ptr) {
	7498	+ ptr = (unsigned long)kzalloc(ptr, GFP_NOWAIT);
6624	7499
6625	7500	#ifdef CONFIG_FAIR_GROUP_SCHED
6626	7501	root_task_group.se = (struct sched_entity **)ptr;
..	..	@@ -6629,6 +7504,8 @@
6629	7504	root_task_group.cfs_rq = (struct cfs_rq **)ptr;
6630	7505	ptr += nr_cpu_ids * sizeof(void **);
6631	7506
	7507	+ root_task_group.shares = ROOT_TASK_GROUP_LOAD;
	7508	+ init_cfs_bandwidth(&root_task_group.cfs_bandwidth);
6632	7509	#endif /* CONFIG_FAIR_GROUP_SCHED */
6633	7510	#ifdef CONFIG_RT_GROUP_SCHED
6634	7511	root_task_group.rt_se = (struct sched_rt_entity **)ptr;
..	..	@@ -6681,7 +7558,6 @@
6681	7558	init_rt_rq(&rq->rt);
6682	7559	init_dl_rq(&rq->dl);
6683	7560	#ifdef CONFIG_FAIR_GROUP_SCHED
6684		- root_task_group.shares = ROOT_TASK_GROUP_LOAD;
6685	7561	INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
6686	7562	rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
6687	7563	/*
..	..	@@ -6703,7 +7579,6 @@
6703	7579	* We achieve this by letting root_task_group's tasks sit
6704	7580	* directly in rq->cfs (i.e root_task_group->se[] = NULL).
6705	7581	*/
6706		- init_cfs_bandwidth(&root_task_group.cfs_bandwidth);
6707	7582	init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
6708	7583	#endif /* CONFIG_FAIR_GROUP_SCHED */
6709	7584
..	..	@@ -6711,10 +7586,6 @@
6711	7586	#ifdef CONFIG_RT_GROUP_SCHED
6712	7587	init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
6713	7588	#endif
6714		-
6715		- for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
6716		- rq->cpu_load[j] = 0;
6717		-
6718	7589	#ifdef CONFIG_SMP
6719	7590	rq->sd = NULL;
6720	7591	rq->rd = NULL;
..	..	@@ -6733,16 +7604,17 @@
6733	7604
6734	7605	rq_attach_root(rq, &def_root_domain);
6735	7606	#ifdef CONFIG_NO_HZ_COMMON
6736		- rq->last_load_update_tick = jiffies;
6737	7607	rq->last_blocked_load_update_tick = jiffies;
6738	7608	atomic_set(&rq->nohz_flags, 0);
	7609	+
	7610	+ rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func);
6739	7611	#endif
6740	7612	#endif /* CONFIG_SMP */
6741	7613	hrtick_rq_init(rq);
6742	7614	atomic_set(&rq->nr_iowait, 0);
6743	7615	}
6744	7616
6745		- set_load_weight(&init_task, false);
	7617	+ set_load_weight(&init_task);
6746	7618
6747	7619	/*
6748	7620	* The boot idle thread does lazy MMU switching as well:
..	..	@@ -6811,7 +7683,7 @@
6811	7683	rcu_sleep_check();
6812	7684
6813	7685	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
6814		- !is_idle_task(current)) \|\|
	7686	+ !is_idle_task(current) && !current->non_block_count) \|\|
6815	7687	system_state == SYSTEM_BOOTING \|\| system_state > SYSTEM_RUNNING \|\|
6816	7688	oops_in_progress)
6817	7689	return;
..	..	@@ -6827,8 +7699,8 @@
6827	7699	"BUG: sleeping function called from invalid context at %s:%d\n",
6828	7700	file, line);
6829	7701	printk(KERN_ERR
6830		- "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
6831		- in_atomic(), irqs_disabled(),
	7702	+ "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
	7703	+ in_atomic(), irqs_disabled(), current->non_block_count,
6832	7704	current->pid, current->comm);
6833	7705
6834	7706	if (task_stack_end_corrupted(current))
..	..	@@ -6840,13 +7712,43 @@
6840	7712	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
6841	7713	&& !preempt_count_equals(preempt_offset)) {
6842	7714	pr_err("Preemption disabled at:");
6843		- print_ip_sym(preempt_disable_ip);
6844		- pr_cont("\n");
	7715	+ print_ip_sym(KERN_ERR, preempt_disable_ip);
6845	7716	}
	7717	+
	7718	+ trace_android_rvh_schedule_bug(NULL);
	7719	+
6846	7720	dump_stack();
6847	7721	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
6848	7722	}
6849	7723	EXPORT_SYMBOL(___might_sleep);
	7724	+
	7725	+void __cant_sleep(const char *file, int line, int preempt_offset)
	7726	+{
	7727	+ static unsigned long prev_jiffy;
	7728	+
	7729	+ if (irqs_disabled())
	7730	+ return;
	7731	+
	7732	+ if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
	7733	+ return;
	7734	+
	7735	+ if (preempt_count() > preempt_offset)
	7736	+ return;
	7737	+
	7738	+ if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
	7739	+ return;
	7740	+ prev_jiffy = jiffies;
	7741	+
	7742	+ printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
	7743	+ printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
	7744	+ in_atomic(), irqs_disabled(),
	7745	+ current->pid, current->comm);
	7746	+
	7747	+ debug_show_held_locks(current);
	7748	+ dump_stack();
	7749	+ add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
	7750	+}
	7751	+EXPORT_SYMBOL_GPL(__cant_sleep);
6850	7752	#endif
6851	7753
6852	7754	#ifdef CONFIG_MAGIC_SYSRQ
..	..	@@ -6915,7 +7817,7 @@
6915	7817
6916	7818	#ifdef CONFIG_IA64
6917	7819	/**
6918		- * set_curr_task - set the current task for a given CPU.
	7820	+ * ia64_set_curr_task - set the current task for a given CPU.
6919	7821	* @cpu: the processor in question.
6920	7822	* @p: the task pointer to set.
6921	7823	*
..	..	@@ -7081,8 +7983,15 @@
7081	7983
7082	7984	if (queued)
7083	7985	enqueue_task(rq, tsk, queue_flags);
7084		- if (running)
7085		- set_curr_task(rq, tsk);
	7986	+ if (running) {
	7987	+ set_next_task(rq, tsk);
	7988	+ /*
	7989	+ * After changing group, the running task may have joined a
	7990	+ * throttled one but it's still the running task. Trigger a
	7991	+ * resched to make sure that task can still run.
	7992	+ */
	7993	+ resched_curr(rq);
	7994	+ }
7086	7995
7087	7996	task_rq_unlock(rq, tsk, &rf);
7088	7997	}
..	..	@@ -7121,9 +8030,14 @@
7121	8030
7122	8031	#ifdef CONFIG_UCLAMP_TASK_GROUP
7123	8032	/* Propagate the effective uclamp value for the new group */
	8033	+ mutex_lock(&uclamp_mutex);
	8034	+ rcu_read_lock();
7124	8035	cpu_util_update_eff(css);
	8036	+ rcu_read_unlock();
	8037	+ mutex_unlock(&uclamp_mutex);
7125	8038	#endif
7126	8039
	8040	+ trace_android_rvh_cpu_cgroup_online(css);
7127	8041	return 0;
7128	8042	}
7129	8043
..	..	@@ -7189,6 +8103,9 @@
7189	8103	if (ret)
7190	8104	break;
7191	8105	}
	8106	+
	8107	+ trace_android_rvh_cpu_cgroup_can_attach(tset, &ret);
	8108	+
7192	8109	return ret;
7193	8110	}
7194	8111
..	..	@@ -7199,6 +8116,8 @@
7199	8116
7200	8117	cgroup_taskset_for_each(task, css, tset)
7201	8118	sched_move_task(task);
	8119	+
	8120	+ trace_android_rvh_cpu_cgroup_attach(tset);
7202	8121	}
7203	8122
7204	8123	#ifdef CONFIG_UCLAMP_TASK_GROUP
..	..	@@ -7210,6 +8129,9 @@
7210	8129	unsigned int eff[UCLAMP_CNT];
7211	8130	enum uclamp_id clamp_id;
7212	8131	unsigned int clamps;
	8132	+
	8133	+ lockdep_assert_held(&uclamp_mutex);
	8134	+ SCHED_WARN_ON(!rcu_read_lock_held());
7213	8135
7214	8136	css_for_each_descendant_pre(css, top_css) {
7215	8137	uc_parent = css_tg(css)->parent
..	..	@@ -7243,7 +8165,7 @@
7243	8165	}
7244	8166
7245	8167	/* Immediately update descendants RUNNABLE tasks */
7246		- uclamp_update_active_tasks(css, clamps);
	8168	+ uclamp_update_active_tasks(css);
7247	8169	}
7248	8170	}
7249	8171
..	..	@@ -7300,6 +8222,8 @@
7300	8222	req = capacity_from_percent(buf);
7301	8223	if (req.ret)
7302	8224	return req.ret;
	8225	+
	8226	+ static_branch_enable(&sched_uclamp_used);
7303	8227
7304	8228	mutex_lock(&uclamp_mutex);
7305	8229	rcu_read_lock();
..	..	@@ -7415,7 +8339,9 @@
7415	8339	static DEFINE_MUTEX(cfs_constraints_mutex);
7416	8340
7417	8341	const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */
7418		-const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
	8342	+static const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
	8343	+/* More than 203 days if BW_SHIFT equals 20. */
	8344	+static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC;
7419	8345
7420	8346	static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
7421	8347
..	..	@@ -7441,6 +8367,12 @@
7441	8367	* feasibility.
7442	8368	*/
7443	8369	if (period > max_cfs_quota_period)
	8370	+ return -EINVAL;
	8371	+
	8372	+ /*
	8373	+ * Bound quota to defend quota against overflow during bandwidth shift.
	8374	+ */
	8375	+ if (quota != RUNTIME_INF && quota > max_cfs_runtime)
7444	8376	return -EINVAL;
7445	8377
7446	8378	/*
..	..	@@ -7495,7 +8427,7 @@
7495	8427	return ret;
7496	8428	}
7497	8429
7498		-int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
	8430	+static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
7499	8431	{
7500	8432	u64 quota, period;
7501	8433
..	..	@@ -7510,7 +8442,7 @@
7510	8442	return tg_set_cfs_bandwidth(tg, period, quota);
7511	8443	}
7512	8444
7513		-long tg_get_cfs_quota(struct task_group *tg)
	8445	+static long tg_get_cfs_quota(struct task_group *tg)
7514	8446	{
7515	8447	u64 quota_us;
7516	8448
..	..	@@ -7523,7 +8455,7 @@
7523	8455	return quota_us;
7524	8456	}
7525	8457
7526		-int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
	8458	+static int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
7527	8459	{
7528	8460	u64 quota, period;
7529	8461
..	..	@@ -7536,7 +8468,7 @@
7536	8468	return tg_set_cfs_bandwidth(tg, period, quota);
7537	8469	}
7538	8470
7539		-long tg_get_cfs_period(struct task_group *tg)
	8471	+static long tg_get_cfs_period(struct task_group *tg)
7540	8472	{
7541	8473	u64 cfs_period_us;
7542	8474
..	..	@@ -8013,4 +8945,7 @@
8013	8945	/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
8014	8946	};
8015	8947
8016		-#undef CREATE_TRACE_POINTS
	8948	+void call_trace_sched_update_nr_running(struct rq *rq, int count)
	8949	+{
	8950	+ trace_sched_update_nr_running_tp(rq, count);
	8951	+}