~hc/RK356X_SDK_RELEASE.git

..	..	@@ -17,6 +17,7 @@
17	17	*/
18	18	#include "sched.h"
19	19	#include "pelt.h"
	20	+#include <linux/cpuset.h>
20	21
21	22	struct dl_bandwidth def_dl_bandwidth;
22	23
..	..	@@ -43,6 +44,28 @@
43	44	return !RB_EMPTY_NODE(&dl_se->rb_node);
44	45	}
45	46
	47	+#ifdef CONFIG_RT_MUTEXES
	48	+static inline struct sched_dl_entity pi_of(struct sched_dl_entity dl_se)
	49	+{
	50	+ return dl_se->pi_se;
	51	+}
	52	+
	53	+static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
	54	+{
	55	+ return pi_of(dl_se) != dl_se;
	56	+}
	57	+#else
	58	+static inline struct sched_dl_entity pi_of(struct sched_dl_entity dl_se)
	59	+{
	60	+ return dl_se;
	61	+}
	62	+
	63	+static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
	64	+{
	65	+ return false;
	66	+}
	67	+#endif
	68	+
46	69	#ifdef CONFIG_SMP
47	70	static inline struct dl_bw *dl_bw_of(int i)
48	71	{
..	..	@@ -54,14 +77,48 @@
54	77	static inline int dl_bw_cpus(int i)
55	78	{
56	79	struct root_domain *rd = cpu_rq(i)->rd;
57		- int cpus = 0;
	80	+ int cpus;
58	81
59	82	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
60	83	"sched RCU must be held");
	84	+
	85	+ if (cpumask_subset(rd->span, cpu_active_mask))
	86	+ return cpumask_weight(rd->span);
	87	+
	88	+ cpus = 0;
	89	+
61	90	for_each_cpu_and(i, rd->span, cpu_active_mask)
62	91	cpus++;
63	92
64	93	return cpus;
	94	+}
	95	+
	96	+static inline unsigned long __dl_bw_capacity(int i)
	97	+{
	98	+ struct root_domain *rd = cpu_rq(i)->rd;
	99	+ unsigned long cap = 0;
	100	+
	101	+ RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
	102	+ "sched RCU must be held");
	103	+
	104	+ for_each_cpu_and(i, rd->span, cpu_active_mask)
	105	+ cap += capacity_orig_of(i);
	106	+
	107	+ return cap;
	108	+}
	109	+
	110	+/*
	111	+ * XXX Fix: If 'rq->rd == def_root_domain' perform AC against capacity
	112	+ * of the CPU the task is running on rather rd's \Sum CPU capacity.
	113	+ */
	114	+static inline unsigned long dl_bw_capacity(int i)
	115	+{
	116	+ if (!static_branch_unlikely(&sched_asym_cpucapacity) &&
	117	+ capacity_orig_of(i) == SCHED_CAPACITY_SCALE) {
	118	+ return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
	119	+ } else {
	120	+ return __dl_bw_capacity(i);
	121	+ }
65	122	}
66	123	#else
67	124	static inline struct dl_bw *dl_bw_of(int i)
..	..	@@ -72,6 +129,11 @@
72	129	static inline int dl_bw_cpus(int i)
73	130	{
74	131	return 1;
	132	+}
	133	+
	134	+static inline unsigned long dl_bw_capacity(int i)
	135	+{
	136	+ return SCHED_CAPACITY_SCALE;
75	137	}
76	138	#endif
77	139
..	..	@@ -153,7 +215,7 @@
153	215	__sub_running_bw(dl_se->dl_bw, dl_rq);
154	216	}
155	217
156		-void dl_change_utilization(struct task_struct *p, u64 new_bw)
	218	+static void dl_change_utilization(struct task_struct *p, u64 new_bw)
157	219	{
158	220	struct rq *rq;
159	221
..	..	@@ -287,7 +349,7 @@
287	349
288	350	dl_se->dl_non_contending = 1;
289	351	get_task_struct(p);
290		- hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL);
	352	+ hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL_HARD);
291	353	}
292	354
293	355	static void task_contending(struct sched_dl_entity *dl_se, int flags)
..	..	@@ -333,6 +395,8 @@
333	395
334	396	return dl_rq->root.rb_leftmost == &dl_se->rb_node;
335	397	}
	398	+
	399	+static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
336	400
337	401	void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
338	402	{
..	..	@@ -539,7 +603,7 @@
539	603	* If we cannot preempt any rq, fall back to pick any
540	604	* online CPU:
541	605	*/
542		- cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
	606	+ cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr);
543	607	if (cpu >= nr_cpu_ids) {
544	608	/*
545	609	* Failed to find any suitable CPU.
..	..	@@ -657,7 +721,7 @@
657	721	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
658	722	struct rq *rq = rq_of_dl_rq(dl_rq);
659	723
660		- WARN_ON(dl_se->dl_boosted);
	724	+ WARN_ON(is_dl_boosted(dl_se));
661	725	WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
662	726
663	727	/*
..	..	@@ -695,21 +759,20 @@
695	759	* could happen are, typically, a entity voluntarily trying to overcome its
696	760	* runtime, or it just underestimated it during sched_setattr().
697	761	*/
698		-static void replenish_dl_entity(struct sched_dl_entity *dl_se,
699		- struct sched_dl_entity *pi_se)
	762	+static void replenish_dl_entity(struct sched_dl_entity *dl_se)
700	763	{
701	764	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
702	765	struct rq *rq = rq_of_dl_rq(dl_rq);
703	766
704		- BUG_ON(pi_se->dl_runtime <= 0);
	767	+ BUG_ON(pi_of(dl_se)->dl_runtime <= 0);
705	768
706	769	/*
707	770	* This could be the case for a !-dl task that is boosted.
708	771	* Just go with full inherited parameters.
709	772	*/
710	773	if (dl_se->dl_deadline == 0) {
711		- dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
712		- dl_se->runtime = pi_se->dl_runtime;
	774	+ dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
	775	+ dl_se->runtime = pi_of(dl_se)->dl_runtime;
713	776	}
714	777
715	778	if (dl_se->dl_yielded && dl_se->runtime > 0)
..	..	@@ -722,8 +785,8 @@
722	785	* arbitrary large.
723	786	*/
724	787	while (dl_se->runtime <= 0) {
725		- dl_se->deadline += pi_se->dl_period;
726		- dl_se->runtime += pi_se->dl_runtime;
	788	+ dl_se->deadline += pi_of(dl_se)->dl_period;
	789	+ dl_se->runtime += pi_of(dl_se)->dl_runtime;
727	790	}
728	791
729	792	/*
..	..	@@ -737,8 +800,8 @@
737	800	*/
738	801	if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
739	802	printk_deferred_once("sched: DL replenish lagged too much\n");
740		- dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
741		- dl_se->runtime = pi_se->dl_runtime;
	803	+ dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
	804	+ dl_se->runtime = pi_of(dl_se)->dl_runtime;
742	805	}
743	806
744	807	if (dl_se->dl_yielded)
..	..	@@ -759,7 +822,7 @@
759	822	* refill the runtime and set the deadline a period in the future,
760	823	* because keeping the current (absolute) deadline of the task would
761	824	* result in breaking guarantees promised to other tasks (refer to
762		- * Documentation/scheduler/sched-deadline.txt for more informations).
	825	+ * Documentation/scheduler/sched-deadline.rst for more information).
763	826	*
764	827	* This function returns true if:
765	828	*
..	..	@@ -771,8 +834,7 @@
771	834	* task with deadline equal to period this is the same of using
772	835	* dl_period instead of dl_deadline in the equation above.
773	836	*/
774		-static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
775		- struct sched_dl_entity *pi_se, u64 t)
	837	+static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
776	838	{
777	839	u64 left, right;
778	840
..	..	@@ -794,9 +856,9 @@
794	856	* of anything below microseconds resolution is actually fiction
795	857	* (but still we want to give the user that illusion >;).
796	858	*/
797		- left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
	859	+ left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
798	860	right = ((dl_se->deadline - t) >> DL_SCALE) *
799		- (pi_se->dl_runtime >> DL_SCALE);
	861	+ (pi_of(dl_se)->dl_runtime >> DL_SCALE);
800	862
801	863	return dl_time_before(right, left);
802	864	}
..	..	@@ -881,24 +943,23 @@
881	943	* Please refer to the comments update_dl_revised_wakeup() function to find
882	944	* more about the Revised CBS rule.
883	945	*/
884		-static void update_dl_entity(struct sched_dl_entity *dl_se,
885		- struct sched_dl_entity *pi_se)
	946	+static void update_dl_entity(struct sched_dl_entity *dl_se)
886	947	{
887	948	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
888	949	struct rq *rq = rq_of_dl_rq(dl_rq);
889	950
890	951	if (dl_time_before(dl_se->deadline, rq_clock(rq)) \|\|
891		- dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
	952	+ dl_entity_overflow(dl_se, rq_clock(rq))) {
892	953
893	954	if (unlikely(!dl_is_implicit(dl_se) &&
894	955	!dl_time_before(dl_se->deadline, rq_clock(rq)) &&
895		- !dl_se->dl_boosted)){
	956	+ !is_dl_boosted(dl_se))) {
896	957	update_dl_revised_wakeup(dl_se, rq);
897	958	return;
898	959	}
899	960
900		- dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
901		- dl_se->runtime = pi_se->dl_runtime;
	961	+ dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
	962	+ dl_se->runtime = pi_of(dl_se)->dl_runtime;
902	963	}
903	964	}
904	965
..	..	@@ -956,7 +1017,7 @@
956	1017	*/
957	1018	if (!hrtimer_is_queued(timer)) {
958	1019	get_task_struct(p);
959		- hrtimer_start(timer, act, HRTIMER_MODE_ABS);
	1020	+ hrtimer_start(timer, act, HRTIMER_MODE_ABS_HARD);
960	1021	}
961	1022
962	1023	return 1;
..	..	@@ -997,7 +1058,7 @@
997	1058	* The task might have been boosted by someone else and might be in the
998	1059	* boosting/deboosting path, its not throttled.
999	1060	*/
1000		- if (dl_se->dl_boosted)
	1061	+ if (is_dl_boosted(dl_se))
1001	1062	goto unlock;
1002	1063
1003	1064	/*
..	..	@@ -1025,7 +1086,7 @@
1025	1086	* but do not enqueue -- wait for our wakeup to do that.
1026	1087	*/
1027	1088	if (!task_on_rq_queued(p)) {
1028		- replenish_dl_entity(dl_se, dl_se);
	1089	+ replenish_dl_entity(dl_se);
1029	1090	goto unlock;
1030	1091	}
1031	1092
..	..	@@ -1086,7 +1147,7 @@
1086	1147	{
1087	1148	struct hrtimer *timer = &dl_se->dl_timer;
1088	1149
1089		- hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
	1150	+ hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
1090	1151	timer->function = dl_task_timer;
1091	1152	}
1092	1153
..	..	@@ -1096,7 +1157,7 @@
1096	1157	* cannot use the runtime, and so it replenishes the task. This rule
1097	1158	* works fine for implicit deadline tasks (deadline == period), and the
1098	1159	* CBS was designed for implicit deadline tasks. However, a task with
1099		- * constrained deadline (deadine < period) might be awakened after the
	1160	+ * constrained deadline (deadline < period) might be awakened after the
1100	1161	* deadline, but before the next period. In this case, replenishing the
1101	1162	* task would allow it to run for runtime / deadline. As in this case
1102	1163	* deadline < period, CBS enables a task to run for more than the
..	..	@@ -1115,7 +1176,7 @@
1115	1176
1116	1177	if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
1117	1178	dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
1118		- if (unlikely(dl_se->dl_boosted \|\| !start_dl_timer(p)))
	1179	+ if (unlikely(is_dl_boosted(dl_se) \|\| !start_dl_timer(p)))
1119	1180	return;
1120	1181	dl_se->dl_throttled = 1;
1121	1182	if (dl_se->runtime > 0)
..	..	@@ -1228,7 +1289,7 @@
1228	1289	&curr->dl);
1229	1290	} else {
1230	1291	unsigned long scale_freq = arch_scale_freq_capacity(cpu);
1231		- unsigned long scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
	1292	+ unsigned long scale_cpu = arch_scale_cpu_capacity(cpu);
1232	1293
1233	1294	scaled_delta_exec = cap_scale(delta_exec, scale_freq);
1234	1295	scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
..	..	@@ -1246,7 +1307,7 @@
1246	1307	dl_se->dl_overrun = 1;
1247	1308
1248	1309	__dequeue_task_dl(rq, curr, 0);
1249		- if (unlikely(dl_se->dl_boosted \|\| !start_dl_timer(curr)))
	1310	+ if (unlikely(is_dl_boosted(dl_se) \|\| !start_dl_timer(curr)))
1250	1311	enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
1251	1312
1252	1313	if (!is_leftmost(curr, &rq->dl))
..	..	@@ -1325,7 +1386,7 @@
1325	1386	{
1326	1387	struct hrtimer *timer = &dl_se->inactive_timer;
1327	1388
1328		- hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
	1389	+ hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
1329	1390	timer->function = inactive_task_timer;
1330	1391	}
1331	1392
..	..	@@ -1440,8 +1501,7 @@
1440	1501	}
1441	1502
1442	1503	static void
1443		-enqueue_dl_entity(struct sched_dl_entity *dl_se,
1444		- struct sched_dl_entity *pi_se, int flags)
	1504	+enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
1445	1505	{
1446	1506	BUG_ON(on_dl_rq(dl_se));
1447	1507
..	..	@@ -1452,9 +1512,9 @@
1452	1512	*/
1453	1513	if (flags & ENQUEUE_WAKEUP) {
1454	1514	task_contending(dl_se, flags);
1455		- update_dl_entity(dl_se, pi_se);
	1515	+ update_dl_entity(dl_se);
1456	1516	} else if (flags & ENQUEUE_REPLENISH) {
1457		- replenish_dl_entity(dl_se, pi_se);
	1517	+ replenish_dl_entity(dl_se);
1458	1518	} else if ((flags & ENQUEUE_RESTORE) &&
1459	1519	dl_time_before(dl_se->deadline,
1460	1520	rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
..	..	@@ -1471,28 +1531,43 @@
1471	1531
1472	1532	static void enqueue_task_dl(struct rq rq, struct task_struct p, int flags)
1473	1533	{
1474		- struct task_struct *pi_task = rt_mutex_get_top_task(p);
1475		- struct sched_dl_entity *pi_se = &p->dl;
1476		-
1477		- /*
1478		- * Use the scheduling parameters of the top pi-waiter task if:
1479		- * - we have a top pi-waiter which is a SCHED_DEADLINE task AND
1480		- * - our dl_boosted is set (i.e. the pi-waiter's (absolute) deadline is
1481		- * smaller than our deadline OR we are a !SCHED_DEADLINE task getting
1482		- * boosted due to a SCHED_DEADLINE pi-waiter).
1483		- * Otherwise we keep our runtime and deadline.
1484		- */
1485		- if (pi_task && dl_prio(pi_task->normal_prio) && p->dl.dl_boosted) {
1486		- pi_se = &pi_task->dl;
	1534	+ if (is_dl_boosted(&p->dl)) {
	1535	+ /*
	1536	+ * Because of delays in the detection of the overrun of a
	1537	+ * thread's runtime, it might be the case that a thread
	1538	+ * goes to sleep in a rt mutex with negative runtime. As
	1539	+ * a consequence, the thread will be throttled.
	1540	+ *
	1541	+ * While waiting for the mutex, this thread can also be
	1542	+ * boosted via PI, resulting in a thread that is throttled
	1543	+ * and boosted at the same time.
	1544	+ *
	1545	+ * In this case, the boost overrides the throttle.
	1546	+ */
	1547	+ if (p->dl.dl_throttled) {
	1548	+ /*
	1549	+ * The replenish timer needs to be canceled. No
	1550	+ * problem if it fires concurrently: boosted threads
	1551	+ * are ignored in dl_task_timer().
	1552	+ */
	1553	+ hrtimer_try_to_cancel(&p->dl.dl_timer);
	1554	+ p->dl.dl_throttled = 0;
	1555	+ }
1487	1556	} else if (!dl_prio(p->normal_prio)) {
1488	1557	/*
1489		- * Special case in which we have a !SCHED_DEADLINE task
1490		- * that is going to be deboosted, but exceeds its
1491		- * runtime while doing so. No point in replenishing
1492		- * it, as it's going to return back to its original
1493		- * scheduling class after this.
	1558	+ * Special case in which we have a !SCHED_DEADLINE task that is going
	1559	+ * to be deboosted, but exceeds its runtime while doing so. No point in
	1560	+ * replenishing it, as it's going to return back to its original
	1561	+ * scheduling class after this. If it has been throttled, we need to
	1562	+ * clear the flag, otherwise the task may wake up as throttled after
	1563	+ * being boosted again with no means to replenish the runtime and clear
	1564	+ * the throttle.
1494	1565	*/
1495		- BUG_ON(!p->dl.dl_boosted \|\| flags != ENQUEUE_REPLENISH);
	1566	+ p->dl.dl_throttled = 0;
	1567	+ if (!(flags & ENQUEUE_REPLENISH))
	1568	+ printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n",
	1569	+ task_pid_nr(p));
	1570	+
1496	1571	return;
1497	1572	}
1498	1573
..	..	@@ -1529,7 +1604,7 @@
1529	1604	return;
1530	1605	}
1531	1606
1532		- enqueue_dl_entity(&p->dl, pi_se, flags);
	1607	+ enqueue_dl_entity(&p->dl, flags);
1533	1608
1534	1609	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
1535	1610	enqueue_pushable_dl_task(rq, p);
..	..	@@ -1599,10 +1674,10 @@
1599	1674	static int find_later_rq(struct task_struct *task);
1600	1675
1601	1676	static int
1602		-select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags,
1603		- int sibling_count_hint)
	1677	+select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
1604	1678	{
1605	1679	struct task_struct *curr;
	1680	+ bool select_rq;
1606	1681	struct rq *rq;
1607	1682
1608	1683	if (sd_flag != SD_BALANCE_WAKE)
..	..	@@ -1622,10 +1697,19 @@
1622	1697	* other hand, if it has a shorter deadline, we
1623	1698	* try to make it stay here, it might be important.
1624	1699	*/
1625		- if (unlikely(dl_task(curr)) &&
1626		- (curr->nr_cpus_allowed < 2 \|\|
1627		- !dl_entity_preempt(&p->dl, &curr->dl)) &&
1628		- (p->nr_cpus_allowed > 1)) {
	1700	+ select_rq = unlikely(dl_task(curr)) &&
	1701	+ (curr->nr_cpus_allowed < 2 \|\|
	1702	+ !dl_entity_preempt(&p->dl, &curr->dl)) &&
	1703	+ p->nr_cpus_allowed > 1;
	1704	+
	1705	+ /*
	1706	+ * Take the capacity of the CPU into account to
	1707	+ * ensure it fits the requirement of the task.
	1708	+ */
	1709	+ if (static_branch_unlikely(&sched_asym_cpucapacity))
	1710	+ select_rq \|= !dl_task_fits_capacity(p, cpu);
	1711	+
	1712	+ if (select_rq) {
1629	1713	int target = find_later_rq(p);
1630	1714
1631	1715	if (target != -1 &&
..	..	@@ -1693,6 +1777,22 @@
1693	1777	resched_curr(rq);
1694	1778	}
1695	1779
	1780	+static int balance_dl(struct rq rq, struct task_struct p, struct rq_flags *rf)
	1781	+{
	1782	+ if (!on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) {
	1783	+ /*
	1784	+ * This is OK, because current is on_cpu, which avoids it being
	1785	+ * picked for load-balance and preemption/IRQs are still
	1786	+ * disabled avoiding further scheduler activity on it and we've
	1787	+ * not yet started the picking loop.
	1788	+ */
	1789	+ rq_unpin_lock(rq, rf);
	1790	+ pull_dl_task(rq);
	1791	+ rq_repin_lock(rq, rf);
	1792	+ }
	1793	+
	1794	+ return sched_stop_runnable(rq) \|\| sched_dl_runnable(rq);
	1795	+}
1696	1796	#endif /* CONFIG_SMP */
1697	1797
1698	1798	/*
..	..	@@ -1729,8 +1829,26 @@
1729	1829	}
1730	1830	#endif
1731	1831
1732		-static struct sched_dl_entity pick_next_dl_entity(struct rq rq,
1733		- struct dl_rq *dl_rq)
	1832	+static void set_next_task_dl(struct rq rq, struct task_struct p, bool first)
	1833	+{
	1834	+ p->se.exec_start = rq_clock_task(rq);
	1835	+
	1836	+ /* You can't push away the running task */
	1837	+ dequeue_pushable_dl_task(rq, p);
	1838	+
	1839	+ if (!first)
	1840	+ return;
	1841	+
	1842	+ if (hrtick_enabled(rq))
	1843	+ start_hrtick_dl(rq, p);
	1844	+
	1845	+ if (rq->curr->sched_class != &dl_sched_class)
	1846	+ update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
	1847	+
	1848	+ deadline_queue_push_tasks(rq);
	1849	+}
	1850	+
	1851	+static struct sched_dl_entity pick_next_dl_entity(struct dl_rq dl_rq)
1734	1852	{
1735	1853	struct rb_node *left = rb_first_cached(&dl_rq->root);
1736	1854
..	..	@@ -1740,63 +1858,19 @@
1740	1858	return rb_entry(left, struct sched_dl_entity, rb_node);
1741	1859	}
1742	1860
1743		-static struct task_struct *
1744		-pick_next_task_dl(struct rq rq, struct task_struct prev, struct rq_flags *rf)
	1861	+static struct task_struct pick_next_task_dl(struct rq rq)
1745	1862	{
1746	1863	struct sched_dl_entity *dl_se;
	1864	+ struct dl_rq *dl_rq = &rq->dl;
1747	1865	struct task_struct *p;
1748		- struct dl_rq *dl_rq;
1749	1866
1750		- dl_rq = &rq->dl;
1751		-
1752		- if (need_pull_dl_task(rq, prev)) {
1753		- /*
1754		- * This is OK, because current is on_cpu, which avoids it being
1755		- * picked for load-balance and preemption/IRQs are still
1756		- * disabled avoiding further scheduler activity on it and we're
1757		- * being very careful to re-start the picking loop.
1758		- */
1759		- rq_unpin_lock(rq, rf);
1760		- pull_dl_task(rq);
1761		- rq_repin_lock(rq, rf);
1762		- /*
1763		- * pull_dl_task() can drop (and re-acquire) rq->lock; this
1764		- * means a stop task can slip in, in which case we need to
1765		- * re-start task selection.
1766		- */
1767		- if (rq->stop && task_on_rq_queued(rq->stop))
1768		- return RETRY_TASK;
1769		- }
1770		-
1771		- /*
1772		- * When prev is DL, we may throttle it in put_prev_task().
1773		- * So, we update time before we check for dl_nr_running.
1774		- */
1775		- if (prev->sched_class == &dl_sched_class)
1776		- update_curr_dl(rq);
1777		-
1778		- if (unlikely(!dl_rq->dl_nr_running))
	1867	+ if (!sched_dl_runnable(rq))
1779	1868	return NULL;
1780	1869
1781		- put_prev_task(rq, prev);
1782		-
1783		- dl_se = pick_next_dl_entity(rq, dl_rq);
	1870	+ dl_se = pick_next_dl_entity(dl_rq);
1784	1871	BUG_ON(!dl_se);
1785		-
1786	1872	p = dl_task_of(dl_se);
1787		- p->se.exec_start = rq_clock_task(rq);
1788		-
1789		- /* Running task will never be pushed. */
1790		- dequeue_pushable_dl_task(rq, p);
1791		-
1792		- if (hrtick_enabled(rq))
1793		- start_hrtick_dl(rq, p);
1794		-
1795		- deadline_queue_push_tasks(rq);
1796		-
1797		- if (rq->curr->sched_class != &dl_sched_class)
1798		- update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
1799		-
	1873	+ set_next_task_dl(rq, p, true);
1800	1874	return p;
1801	1875	}
1802	1876
..	..	@@ -1840,16 +1914,6 @@
1840	1914	*/
1841	1915	}
1842	1916
1843		-static void set_curr_task_dl(struct rq *rq)
1844		-{
1845		- struct task_struct *p = rq->curr;
1846		-
1847		- p->se.exec_start = rq_clock_task(rq);
1848		-
1849		- /* You can't push away the running task */
1850		- dequeue_pushable_dl_task(rq, p);
1851		-}
1852		-
1853	1917	#ifdef CONFIG_SMP
1854	1918
1855	1919	/* Only try algorithms three times */
..	..	@@ -1858,7 +1922,7 @@
1858	1922	static int pick_dl_task(struct rq rq, struct task_struct p, int cpu)
1859	1923	{
1860	1924	if (!task_running(rq, p) &&
1861		- cpumask_test_cpu(cpu, &p->cpus_allowed))
	1925	+ cpumask_test_cpu(cpu, p->cpus_ptr))
1862	1926	return 1;
1863	1927	return 0;
1864	1928	}
..	..	@@ -2008,7 +2072,7 @@
2008	2072	/* Retry if something changed. */
2009	2073	if (double_lock_balance(rq, later_rq)) {
2010	2074	if (unlikely(task_rq(task) != rq \|\|
2011		- !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) \|\|
	2075	+ !cpumask_test_cpu(later_rq->cpu, task->cpus_ptr) \|\|
2012	2076	task_running(rq, task) \|\|
2013	2077	!dl_task(task) \|\|
2014	2078	!task_on_rq_queued(task))) {
..	..	@@ -2075,10 +2139,8 @@
2075	2139	return 0;
2076	2140
2077	2141	retry:
2078		- if (unlikely(next_task == rq->curr)) {
2079		- WARN_ON(1);
	2142	+ if (WARN_ON(next_task == rq->curr))
2080	2143	return 0;
2081		- }
2082	2144
2083	2145	/*
2084	2146	* If next_task preempts rq->curr, and rq->curr
..	..	@@ -2124,17 +2186,13 @@
2124	2186	}
2125	2187
2126	2188	deactivate_task(rq, next_task, 0);
2127		- sub_running_bw(&next_task->dl, &rq->dl);
2128		- sub_rq_bw(&next_task->dl, &rq->dl);
2129	2189	set_task_cpu(next_task, later_rq->cpu);
2130		- add_rq_bw(&next_task->dl, &later_rq->dl);
2131	2190
2132	2191	/*
2133	2192	* Update the later_rq clock here, because the clock is used
2134	2193	* by the cpufreq_update_util() inside __add_running_bw().
2135	2194	*/
2136	2195	update_rq_clock(later_rq);
2137		- add_running_bw(&next_task->dl, &later_rq->dl);
2138	2196	activate_task(later_rq, next_task, ENQUEUE_NOCLOCK);
2139	2197	ret = 1;
2140	2198
..	..	@@ -2222,11 +2280,7 @@
2222	2280	resched = true;
2223	2281
2224	2282	deactivate_task(src_rq, p, 0);
2225		- sub_running_bw(&p->dl, &src_rq->dl);
2226		- sub_rq_bw(&p->dl, &src_rq->dl);
2227	2283	set_task_cpu(p, this_cpu);
2228		- add_rq_bw(&p->dl, &this_rq->dl);
2229		- add_running_bw(&p->dl, &this_rq->dl);
2230	2284	activate_task(this_rq, p, 0);
2231	2285	dmin = p->dl.deadline;
2232	2286
..	..	@@ -2319,6 +2373,39 @@
2319	2373	GFP_KERNEL, cpu_to_node(i));
2320	2374	}
2321	2375
	2376	+void dl_add_task_root_domain(struct task_struct *p)
	2377	+{
	2378	+ struct rq_flags rf;
	2379	+ struct rq *rq;
	2380	+ struct dl_bw *dl_b;
	2381	+
	2382	+ raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
	2383	+ if (!dl_task(p)) {
	2384	+ raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
	2385	+ return;
	2386	+ }
	2387	+
	2388	+ rq = __task_rq_lock(p, &rf);
	2389	+
	2390	+ dl_b = &rq->rd->dl_bw;
	2391	+ raw_spin_lock(&dl_b->lock);
	2392	+
	2393	+ __dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
	2394	+
	2395	+ raw_spin_unlock(&dl_b->lock);
	2396	+
	2397	+ task_rq_unlock(rq, p, &rf);
	2398	+}
	2399	+
	2400	+void dl_clear_root_domain(struct root_domain *rd)
	2401	+{
	2402	+ unsigned long flags;
	2403	+
	2404	+ raw_spin_lock_irqsave(&rd->dl_bw.lock, flags);
	2405	+ rd->dl_bw.total_bw = 0;
	2406	+ raw_spin_unlock_irqrestore(&rd->dl_bw.lock, flags);
	2407	+}
	2408	+
2322	2409	#endif /* CONFIG_SMP */
2323	2410
2324	2411	static void switched_from_dl(struct rq rq, struct task_struct p)
..	..	@@ -2333,6 +2420,12 @@
2333	2420	*/
2334	2421	if (task_on_rq_queued(p) && p->dl.dl_runtime)
2335	2422	task_non_contending(p);
	2423	+
	2424	+ /*
	2425	+ * In case a task is setscheduled out from SCHED_DEADLINE we need to
	2426	+ * keep track of that on its cpuset (for correct bandwidth tracking).
	2427	+ */
	2428	+ dec_dl_tasks_cs(p);
2336	2429
2337	2430	if (!task_on_rq_queued(p)) {
2338	2431	/*
..	..	@@ -2374,6 +2467,12 @@
2374	2467	if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
2375	2468	put_task_struct(p);
2376	2469
	2470	+ /*
	2471	+ * In case a task is setscheduled to SCHED_DEADLINE we need to keep
	2472	+ * track of that on its cpuset (for correct bandwidth tracking).
	2473	+ */
	2474	+ inc_dl_tasks_cs(p);
	2475	+
2377	2476	/* If p is not queued we will update its parameters at next wakeup. */
2378	2477	if (!task_on_rq_queued(p)) {
2379	2478	add_rq_bw(&p->dl, &rq->dl);
..	..	@@ -2390,6 +2489,8 @@
2390	2489	check_preempt_curr_dl(rq, p, 0);
2391	2490	else
2392	2491	resched_curr(rq);
	2492	+ } else {
	2493	+ update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
2393	2494	}
2394	2495	}
2395	2496
..	..	@@ -2429,8 +2530,8 @@
2429	2530	}
2430	2531	}
2431	2532
2432		-const struct sched_class dl_sched_class = {
2433		- .next = &rt_sched_class,
	2533	+const struct sched_class dl_sched_class
	2534	+ __section("__dl_sched_class") = {
2434	2535	.enqueue_task = enqueue_task_dl,
2435	2536	.dequeue_task = dequeue_task_dl,
2436	2537	.yield_task = yield_task_dl,
..	..	@@ -2439,8 +2540,10 @@
2439	2540
2440	2541	.pick_next_task = pick_next_task_dl,
2441	2542	.put_prev_task = put_prev_task_dl,
	2543	+ .set_next_task = set_next_task_dl,
2442	2544
2443	2545	#ifdef CONFIG_SMP
	2546	+ .balance = balance_dl,
2444	2547	.select_task_rq = select_task_rq_dl,
2445	2548	.migrate_task_rq = migrate_task_rq_dl,
2446	2549	.set_cpus_allowed = set_cpus_allowed_dl,
..	..	@@ -2449,7 +2552,6 @@
2449	2552	.task_woken = task_woken_dl,
2450	2553	#endif
2451	2554
2452		- .set_curr_task = set_curr_task_dl,
2453	2555	.task_tick = task_tick_dl,
2454	2556	.task_fork = task_fork_dl,
2455	2557
..	..	@@ -2497,7 +2599,7 @@
2497	2599	return ret;
2498	2600	}
2499	2601
2500		-void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
	2602	+static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
2501	2603	{
2502	2604	if (global_rt_runtime() == RUNTIME_INF) {
2503	2605	dl_rq->bw_ratio = 1 << RATIO_SHIFT;
..	..	@@ -2550,11 +2652,12 @@
2550	2652	int sched_dl_overflow(struct task_struct *p, int policy,
2551	2653	const struct sched_attr *attr)
2552	2654	{
2553		- struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
2554	2655	u64 period = attr->sched_period ?: attr->sched_deadline;
2555	2656	u64 runtime = attr->sched_runtime;
2556	2657	u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
2557		- int cpus, err = -1;
	2658	+ int cpus, err = -1, cpu = task_cpu(p);
	2659	+ struct dl_bw *dl_b = dl_bw_of(cpu);
	2660	+ unsigned long cap;
2558	2661
2559	2662	if (attr->sched_flags & SCHED_FLAG_SUGOV)
2560	2663	return 0;
..	..	@@ -2569,15 +2672,17 @@
2569	2672	* allocated bandwidth of the container.
2570	2673	*/
2571	2674	raw_spin_lock(&dl_b->lock);
2572		- cpus = dl_bw_cpus(task_cpu(p));
	2675	+ cpus = dl_bw_cpus(cpu);
	2676	+ cap = dl_bw_capacity(cpu);
	2677	+
2573	2678	if (dl_policy(policy) && !task_has_dl_policy(p) &&
2574		- !__dl_overflow(dl_b, cpus, 0, new_bw)) {
	2679	+ !__dl_overflow(dl_b, cap, 0, new_bw)) {
2575	2680	if (hrtimer_active(&p->dl.inactive_timer))
2576	2681	__dl_sub(dl_b, p->dl.dl_bw, cpus);
2577	2682	__dl_add(dl_b, new_bw, cpus);
2578	2683	err = 0;
2579	2684	} else if (dl_policy(policy) && task_has_dl_policy(p) &&
2580		- !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
	2685	+ !__dl_overflow(dl_b, cap, p->dl.dl_bw, new_bw)) {
2581	2686	/*
2582	2687	* XXX this is slightly incorrect: when the task
2583	2688	* utilization decreases, we should delay the total
..	..	@@ -2635,6 +2740,14 @@
2635	2740	}
2636	2741
2637	2742	/*
	2743	+ * Default limits for DL period; on the top end we guard against small util
	2744	+ * tasks still getting rediculous long effective runtimes, on the bottom end we
	2745	+ * guard against timer DoS.
	2746	+ */
	2747	+unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
	2748	+unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */
	2749	+
	2750	+/*
2638	2751	* This function validates the new parameters of a -deadline task.
2639	2752	* We ask for the deadline not being zero, and greater or equal
2640	2753	* than the runtime, as well as the period of being zero or
..	..	@@ -2646,6 +2759,8 @@
2646	2759	*/
2647	2760	bool __checkparam_dl(const struct sched_attr *attr)
2648	2761	{
	2762	+ u64 period, max, min;
	2763	+
2649	2764	/* special dl tasks don't actually use any parameter */
2650	2765	if (attr->sched_flags & SCHED_FLAG_SUGOV)
2651	2766	return true;
..	..	@@ -2669,10 +2784,19 @@
2669	2784	attr->sched_period & (1ULL << 63))
2670	2785	return false;
2671	2786
	2787	+ period = attr->sched_period;
	2788	+ if (!period)
	2789	+ period = attr->sched_deadline;
	2790	+
2672	2791	/* runtime <= deadline <= period (if period != 0) */
2673		- if ((attr->sched_period != 0 &&
2674		- attr->sched_period < attr->sched_deadline) \|\|
	2792	+ if (period < attr->sched_deadline \|\|
2675	2793	attr->sched_deadline < attr->sched_runtime)
	2794	+ return false;
	2795	+
	2796	+ max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
	2797	+ min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
	2798	+
	2799	+ if (period < min \|\| period > max)
2676	2800	return false;
2677	2801
2678	2802	return true;
..	..	@@ -2692,11 +2816,14 @@
2692	2816	dl_se->dl_bw = 0;
2693	2817	dl_se->dl_density = 0;
2694	2818
2695		- dl_se->dl_boosted = 0;
2696	2819	dl_se->dl_throttled = 0;
2697	2820	dl_se->dl_yielded = 0;
2698	2821	dl_se->dl_non_contending = 0;
2699	2822	dl_se->dl_overrun = 0;
	2823	+
	2824	+#ifdef CONFIG_RT_MUTEXES
	2825	+ dl_se->pi_se = dl_se;
	2826	+#endif
2700	2827	}
2701	2828
2702	2829	bool dl_param_changed(struct task_struct p, const struct sched_attr attr)
..	..	@@ -2713,39 +2840,6 @@
2713	2840	}
2714	2841
2715	2842	#ifdef CONFIG_SMP
2716		-int dl_task_can_attach(struct task_struct p, const struct cpumask cs_cpus_allowed)
2717		-{
2718		- unsigned int dest_cpu;
2719		- struct dl_bw *dl_b;
2720		- bool overflow;
2721		- int cpus, ret;
2722		- unsigned long flags;
2723		-
2724		- dest_cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
2725		-
2726		- rcu_read_lock_sched();
2727		- dl_b = dl_bw_of(dest_cpu);
2728		- raw_spin_lock_irqsave(&dl_b->lock, flags);
2729		- cpus = dl_bw_cpus(dest_cpu);
2730		- overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
2731		- if (overflow) {
2732		- ret = -EBUSY;
2733		- } else {
2734		- /*
2735		- * We reserve space for this task in the destination
2736		- * root_domain, as we can't fail after this point.
2737		- * We will free resources in the source root_domain
2738		- * later on (see set_cpus_allowed_dl()).
2739		- */
2740		- __dl_add(dl_b, p->dl.dl_bw, cpus);
2741		- ret = 0;
2742		- }
2743		- raw_spin_unlock_irqrestore(&dl_b->lock, flags);
2744		- rcu_read_unlock_sched();
2745		-
2746		- return ret;
2747		-}
2748		-
2749	2843	int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
2750	2844	const struct cpumask *trial)
2751	2845	{
..	..	@@ -2767,22 +2861,59 @@
2767	2861	return ret;
2768	2862	}
2769	2863
2770		-bool dl_cpu_busy(unsigned int cpu)
	2864	+enum dl_bw_request {
	2865	+ dl_bw_req_check_overflow = 0,
	2866	+ dl_bw_req_alloc,
	2867	+ dl_bw_req_free
	2868	+};
	2869	+
	2870	+static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
2771	2871	{
2772	2872	unsigned long flags;
2773	2873	struct dl_bw *dl_b;
2774		- bool overflow;
2775		- int cpus;
	2874	+ bool overflow = 0;
2776	2875
2777	2876	rcu_read_lock_sched();
2778	2877	dl_b = dl_bw_of(cpu);
2779	2878	raw_spin_lock_irqsave(&dl_b->lock, flags);
2780		- cpus = dl_bw_cpus(cpu);
2781		- overflow = __dl_overflow(dl_b, cpus, 0, 0);
	2879	+
	2880	+ if (req == dl_bw_req_free) {
	2881	+ __dl_sub(dl_b, dl_bw, dl_bw_cpus(cpu));
	2882	+ } else {
	2883	+ unsigned long cap = dl_bw_capacity(cpu);
	2884	+
	2885	+ overflow = __dl_overflow(dl_b, cap, 0, dl_bw);
	2886	+
	2887	+ if (req == dl_bw_req_alloc && !overflow) {
	2888	+ /*
	2889	+ * We reserve space in the destination
	2890	+ * root_domain, as we can't fail after this point.
	2891	+ * We will free resources in the source root_domain
	2892	+ * later on (see set_cpus_allowed_dl()).
	2893	+ */
	2894	+ __dl_add(dl_b, dl_bw, dl_bw_cpus(cpu));
	2895	+ }
	2896	+ }
	2897	+
2782	2898	raw_spin_unlock_irqrestore(&dl_b->lock, flags);
2783	2899	rcu_read_unlock_sched();
2784	2900
2785		- return overflow;
	2901	+ return overflow ? -EBUSY : 0;
	2902	+}
	2903	+
	2904	+int dl_bw_check_overflow(int cpu)
	2905	+{
	2906	+ return dl_bw_manage(dl_bw_req_check_overflow, cpu, 0);
	2907	+}
	2908	+
	2909	+int dl_bw_alloc(int cpu, u64 dl_bw)
	2910	+{
	2911	+ return dl_bw_manage(dl_bw_req_alloc, cpu, dl_bw);
	2912	+}
	2913	+
	2914	+void dl_bw_free(int cpu, u64 dl_bw)
	2915	+{
	2916	+ dl_bw_manage(dl_bw_req_free, cpu, dl_bw);
2786	2917	}
2787	2918	#endif
2788	2919