~hc/RK356X_SDK_RELEASE.git

..	..	@@ -43,6 +43,28 @@
43	43	return !RB_EMPTY_NODE(&dl_se->rb_node);
44	44	}
45	45
	46	+#ifdef CONFIG_RT_MUTEXES
	47	+static inline struct sched_dl_entity pi_of(struct sched_dl_entity dl_se)
	48	+{
	49	+ return dl_se->pi_se;
	50	+}
	51	+
	52	+static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
	53	+{
	54	+ return pi_of(dl_se) != dl_se;
	55	+}
	56	+#else
	57	+static inline struct sched_dl_entity pi_of(struct sched_dl_entity dl_se)
	58	+{
	59	+ return dl_se;
	60	+}
	61	+
	62	+static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
	63	+{
	64	+ return false;
	65	+}
	66	+#endif
	67	+
46	68	#ifdef CONFIG_SMP
47	69	static inline struct dl_bw *dl_bw_of(int i)
48	70	{
..	..	@@ -54,14 +76,48 @@
54	76	static inline int dl_bw_cpus(int i)
55	77	{
56	78	struct root_domain *rd = cpu_rq(i)->rd;
57		- int cpus = 0;
	79	+ int cpus;
58	80
59	81	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
60	82	"sched RCU must be held");
	83	+
	84	+ if (cpumask_subset(rd->span, cpu_active_mask))
	85	+ return cpumask_weight(rd->span);
	86	+
	87	+ cpus = 0;
	88	+
61	89	for_each_cpu_and(i, rd->span, cpu_active_mask)
62	90	cpus++;
63	91
64	92	return cpus;
	93	+}
	94	+
	95	+static inline unsigned long __dl_bw_capacity(int i)
	96	+{
	97	+ struct root_domain *rd = cpu_rq(i)->rd;
	98	+ unsigned long cap = 0;
	99	+
	100	+ RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
	101	+ "sched RCU must be held");
	102	+
	103	+ for_each_cpu_and(i, rd->span, cpu_active_mask)
	104	+ cap += capacity_orig_of(i);
	105	+
	106	+ return cap;
	107	+}
	108	+
	109	+/*
	110	+ * XXX Fix: If 'rq->rd == def_root_domain' perform AC against capacity
	111	+ * of the CPU the task is running on rather rd's \Sum CPU capacity.
	112	+ */
	113	+static inline unsigned long dl_bw_capacity(int i)
	114	+{
	115	+ if (!static_branch_unlikely(&sched_asym_cpucapacity) &&
	116	+ capacity_orig_of(i) == SCHED_CAPACITY_SCALE) {
	117	+ return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
	118	+ } else {
	119	+ return __dl_bw_capacity(i);
	120	+ }
65	121	}
66	122	#else
67	123	static inline struct dl_bw *dl_bw_of(int i)
..	..	@@ -72,6 +128,11 @@
72	128	static inline int dl_bw_cpus(int i)
73	129	{
74	130	return 1;
	131	+}
	132	+
	133	+static inline unsigned long dl_bw_capacity(int i)
	134	+{
	135	+ return SCHED_CAPACITY_SCALE;
75	136	}
76	137	#endif
77	138
..	..	@@ -153,7 +214,7 @@
153	214	__sub_running_bw(dl_se->dl_bw, dl_rq);
154	215	}
155	216
156		-void dl_change_utilization(struct task_struct *p, u64 new_bw)
	217	+static void dl_change_utilization(struct task_struct *p, u64 new_bw)
157	218	{
158	219	struct rq *rq;
159	220
..	..	@@ -287,7 +348,7 @@
287	348
288	349	dl_se->dl_non_contending = 1;
289	350	get_task_struct(p);
290		- hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL);
	351	+ hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL_HARD);
291	352	}
292	353
293	354	static void task_contending(struct sched_dl_entity *dl_se, int flags)
..	..	@@ -333,6 +394,8 @@
333	394
334	395	return dl_rq->root.rb_leftmost == &dl_se->rb_node;
335	396	}
	397	+
	398	+static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
336	399
337	400	void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
338	401	{
..	..	@@ -502,7 +565,7 @@
502	565
503	566	static inline bool need_pull_dl_task(struct rq rq, struct task_struct prev)
504	567	{
505		- return dl_task(prev);
	568	+ return rq->online && dl_task(prev);
506	569	}
507	570
508	571	static DEFINE_PER_CPU(struct callback_head, dl_push_head);
..	..	@@ -539,7 +602,7 @@
539	602	* If we cannot preempt any rq, fall back to pick any
540	603	* online CPU:
541	604	*/
542		- cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
	605	+ cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr);
543	606	if (cpu >= nr_cpu_ids) {
544	607	/*
545	608	* Failed to find any suitable CPU.
..	..	@@ -657,7 +720,7 @@
657	720	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
658	721	struct rq *rq = rq_of_dl_rq(dl_rq);
659	722
660		- WARN_ON(dl_se->dl_boosted);
	723	+ WARN_ON(is_dl_boosted(dl_se));
661	724	WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
662	725
663	726	/*
..	..	@@ -695,21 +758,20 @@
695	758	* could happen are, typically, a entity voluntarily trying to overcome its
696	759	* runtime, or it just underestimated it during sched_setattr().
697	760	*/
698		-static void replenish_dl_entity(struct sched_dl_entity *dl_se,
699		- struct sched_dl_entity *pi_se)
	761	+static void replenish_dl_entity(struct sched_dl_entity *dl_se)
700	762	{
701	763	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
702	764	struct rq *rq = rq_of_dl_rq(dl_rq);
703	765
704		- BUG_ON(pi_se->dl_runtime <= 0);
	766	+ BUG_ON(pi_of(dl_se)->dl_runtime <= 0);
705	767
706	768	/*
707	769	* This could be the case for a !-dl task that is boosted.
708	770	* Just go with full inherited parameters.
709	771	*/
710	772	if (dl_se->dl_deadline == 0) {
711		- dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
712		- dl_se->runtime = pi_se->dl_runtime;
	773	+ dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
	774	+ dl_se->runtime = pi_of(dl_se)->dl_runtime;
713	775	}
714	776
715	777	if (dl_se->dl_yielded && dl_se->runtime > 0)
..	..	@@ -722,8 +784,8 @@
722	784	* arbitrary large.
723	785	*/
724	786	while (dl_se->runtime <= 0) {
725		- dl_se->deadline += pi_se->dl_period;
726		- dl_se->runtime += pi_se->dl_runtime;
	787	+ dl_se->deadline += pi_of(dl_se)->dl_period;
	788	+ dl_se->runtime += pi_of(dl_se)->dl_runtime;
727	789	}
728	790
729	791	/*
..	..	@@ -737,8 +799,8 @@
737	799	*/
738	800	if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
739	801	printk_deferred_once("sched: DL replenish lagged too much\n");
740		- dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
741		- dl_se->runtime = pi_se->dl_runtime;
	802	+ dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
	803	+ dl_se->runtime = pi_of(dl_se)->dl_runtime;
742	804	}
743	805
744	806	if (dl_se->dl_yielded)
..	..	@@ -759,7 +821,7 @@
759	821	* refill the runtime and set the deadline a period in the future,
760	822	* because keeping the current (absolute) deadline of the task would
761	823	* result in breaking guarantees promised to other tasks (refer to
762		- * Documentation/scheduler/sched-deadline.txt for more informations).
	824	+ * Documentation/scheduler/sched-deadline.rst for more information).
763	825	*
764	826	* This function returns true if:
765	827	*
..	..	@@ -771,8 +833,7 @@
771	833	* task with deadline equal to period this is the same of using
772	834	* dl_period instead of dl_deadline in the equation above.
773	835	*/
774		-static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
775		- struct sched_dl_entity *pi_se, u64 t)
	836	+static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
776	837	{
777	838	u64 left, right;
778	839
..	..	@@ -794,9 +855,9 @@
794	855	* of anything below microseconds resolution is actually fiction
795	856	* (but still we want to give the user that illusion >;).
796	857	*/
797		- left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
	858	+ left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
798	859	right = ((dl_se->deadline - t) >> DL_SCALE) *
799		- (pi_se->dl_runtime >> DL_SCALE);
	860	+ (pi_of(dl_se)->dl_runtime >> DL_SCALE);
800	861
801	862	return dl_time_before(right, left);
802	863	}
..	..	@@ -881,24 +942,23 @@
881	942	* Please refer to the comments update_dl_revised_wakeup() function to find
882	943	* more about the Revised CBS rule.
883	944	*/
884		-static void update_dl_entity(struct sched_dl_entity *dl_se,
885		- struct sched_dl_entity *pi_se)
	945	+static void update_dl_entity(struct sched_dl_entity *dl_se)
886	946	{
887	947	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
888	948	struct rq *rq = rq_of_dl_rq(dl_rq);
889	949
890	950	if (dl_time_before(dl_se->deadline, rq_clock(rq)) \|\|
891		- dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
	951	+ dl_entity_overflow(dl_se, rq_clock(rq))) {
892	952
893	953	if (unlikely(!dl_is_implicit(dl_se) &&
894	954	!dl_time_before(dl_se->deadline, rq_clock(rq)) &&
895		- !dl_se->dl_boosted)){
	955	+ !is_dl_boosted(dl_se))) {
896	956	update_dl_revised_wakeup(dl_se, rq);
897	957	return;
898	958	}
899	959
900		- dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
901		- dl_se->runtime = pi_se->dl_runtime;
	960	+ dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
	961	+ dl_se->runtime = pi_of(dl_se)->dl_runtime;
902	962	}
903	963	}
904	964
..	..	@@ -956,7 +1016,7 @@
956	1016	*/
957	1017	if (!hrtimer_is_queued(timer)) {
958	1018	get_task_struct(p);
959		- hrtimer_start(timer, act, HRTIMER_MODE_ABS);
	1019	+ hrtimer_start(timer, act, HRTIMER_MODE_ABS_HARD);
960	1020	}
961	1021
962	1022	return 1;
..	..	@@ -997,7 +1057,7 @@
997	1057	* The task might have been boosted by someone else and might be in the
998	1058	* boosting/deboosting path, its not throttled.
999	1059	*/
1000		- if (dl_se->dl_boosted)
	1060	+ if (is_dl_boosted(dl_se))
1001	1061	goto unlock;
1002	1062
1003	1063	/*
..	..	@@ -1025,7 +1085,7 @@
1025	1085	* but do not enqueue -- wait for our wakeup to do that.
1026	1086	*/
1027	1087	if (!task_on_rq_queued(p)) {
1028		- replenish_dl_entity(dl_se, dl_se);
	1088	+ replenish_dl_entity(dl_se);
1029	1089	goto unlock;
1030	1090	}
1031	1091
..	..	@@ -1086,7 +1146,7 @@
1086	1146	{
1087	1147	struct hrtimer *timer = &dl_se->dl_timer;
1088	1148
1089		- hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
	1149	+ hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
1090	1150	timer->function = dl_task_timer;
1091	1151	}
1092	1152
..	..	@@ -1096,7 +1156,7 @@
1096	1156	* cannot use the runtime, and so it replenishes the task. This rule
1097	1157	* works fine for implicit deadline tasks (deadline == period), and the
1098	1158	* CBS was designed for implicit deadline tasks. However, a task with
1099		- * constrained deadline (deadine < period) might be awakened after the
	1159	+ * constrained deadline (deadline < period) might be awakened after the
1100	1160	* deadline, but before the next period. In this case, replenishing the
1101	1161	* task would allow it to run for runtime / deadline. As in this case
1102	1162	* deadline < period, CBS enables a task to run for more than the
..	..	@@ -1115,7 +1175,7 @@
1115	1175
1116	1176	if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
1117	1177	dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
1118		- if (unlikely(dl_se->dl_boosted \|\| !start_dl_timer(p)))
	1178	+ if (unlikely(is_dl_boosted(dl_se) \|\| !start_dl_timer(p)))
1119	1179	return;
1120	1180	dl_se->dl_throttled = 1;
1121	1181	if (dl_se->runtime > 0)
..	..	@@ -1228,7 +1288,7 @@
1228	1288	&curr->dl);
1229	1289	} else {
1230	1290	unsigned long scale_freq = arch_scale_freq_capacity(cpu);
1231		- unsigned long scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
	1291	+ unsigned long scale_cpu = arch_scale_cpu_capacity(cpu);
1232	1292
1233	1293	scaled_delta_exec = cap_scale(delta_exec, scale_freq);
1234	1294	scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
..	..	@@ -1246,7 +1306,7 @@
1246	1306	dl_se->dl_overrun = 1;
1247	1307
1248	1308	__dequeue_task_dl(rq, curr, 0);
1249		- if (unlikely(dl_se->dl_boosted \|\| !start_dl_timer(curr)))
	1309	+ if (unlikely(is_dl_boosted(dl_se) \|\| !start_dl_timer(curr)))
1250	1310	enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
1251	1311
1252	1312	if (!is_leftmost(curr, &rq->dl))
..	..	@@ -1325,7 +1385,7 @@
1325	1385	{
1326	1386	struct hrtimer *timer = &dl_se->inactive_timer;
1327	1387
1328		- hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
	1388	+ hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
1329	1389	timer->function = inactive_task_timer;
1330	1390	}
1331	1391
..	..	@@ -1440,8 +1500,7 @@
1440	1500	}
1441	1501
1442	1502	static void
1443		-enqueue_dl_entity(struct sched_dl_entity *dl_se,
1444		- struct sched_dl_entity *pi_se, int flags)
	1503	+enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
1445	1504	{
1446	1505	BUG_ON(on_dl_rq(dl_se));
1447	1506
..	..	@@ -1452,9 +1511,9 @@
1452	1511	*/
1453	1512	if (flags & ENQUEUE_WAKEUP) {
1454	1513	task_contending(dl_se, flags);
1455		- update_dl_entity(dl_se, pi_se);
	1514	+ update_dl_entity(dl_se);
1456	1515	} else if (flags & ENQUEUE_REPLENISH) {
1457		- replenish_dl_entity(dl_se, pi_se);
	1516	+ replenish_dl_entity(dl_se);
1458	1517	} else if ((flags & ENQUEUE_RESTORE) &&
1459	1518	dl_time_before(dl_se->deadline,
1460	1519	rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
..	..	@@ -1471,28 +1530,43 @@
1471	1530
1472	1531	static void enqueue_task_dl(struct rq rq, struct task_struct p, int flags)
1473	1532	{
1474		- struct task_struct *pi_task = rt_mutex_get_top_task(p);
1475		- struct sched_dl_entity *pi_se = &p->dl;
1476		-
1477		- /*
1478		- * Use the scheduling parameters of the top pi-waiter task if:
1479		- * - we have a top pi-waiter which is a SCHED_DEADLINE task AND
1480		- * - our dl_boosted is set (i.e. the pi-waiter's (absolute) deadline is
1481		- * smaller than our deadline OR we are a !SCHED_DEADLINE task getting
1482		- * boosted due to a SCHED_DEADLINE pi-waiter).
1483		- * Otherwise we keep our runtime and deadline.
1484		- */
1485		- if (pi_task && dl_prio(pi_task->normal_prio) && p->dl.dl_boosted) {
1486		- pi_se = &pi_task->dl;
	1533	+ if (is_dl_boosted(&p->dl)) {
	1534	+ /*
	1535	+ * Because of delays in the detection of the overrun of a
	1536	+ * thread's runtime, it might be the case that a thread
	1537	+ * goes to sleep in a rt mutex with negative runtime. As
	1538	+ * a consequence, the thread will be throttled.
	1539	+ *
	1540	+ * While waiting for the mutex, this thread can also be
	1541	+ * boosted via PI, resulting in a thread that is throttled
	1542	+ * and boosted at the same time.
	1543	+ *
	1544	+ * In this case, the boost overrides the throttle.
	1545	+ */
	1546	+ if (p->dl.dl_throttled) {
	1547	+ /*
	1548	+ * The replenish timer needs to be canceled. No
	1549	+ * problem if it fires concurrently: boosted threads
	1550	+ * are ignored in dl_task_timer().
	1551	+ */
	1552	+ hrtimer_try_to_cancel(&p->dl.dl_timer);
	1553	+ p->dl.dl_throttled = 0;
	1554	+ }
1487	1555	} else if (!dl_prio(p->normal_prio)) {
1488	1556	/*
1489		- * Special case in which we have a !SCHED_DEADLINE task
1490		- * that is going to be deboosted, but exceeds its
1491		- * runtime while doing so. No point in replenishing
1492		- * it, as it's going to return back to its original
1493		- * scheduling class after this.
	1557	+ * Special case in which we have a !SCHED_DEADLINE task that is going
	1558	+ * to be deboosted, but exceeds its runtime while doing so. No point in
	1559	+ * replenishing it, as it's going to return back to its original
	1560	+ * scheduling class after this. If it has been throttled, we need to
	1561	+ * clear the flag, otherwise the task may wake up as throttled after
	1562	+ * being boosted again with no means to replenish the runtime and clear
	1563	+ * the throttle.
1494	1564	*/
1495		- BUG_ON(!p->dl.dl_boosted \|\| flags != ENQUEUE_REPLENISH);
	1565	+ p->dl.dl_throttled = 0;
	1566	+ if (!(flags & ENQUEUE_REPLENISH))
	1567	+ printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n",
	1568	+ task_pid_nr(p));
	1569	+
1496	1570	return;
1497	1571	}
1498	1572
..	..	@@ -1529,7 +1603,7 @@
1529	1603	return;
1530	1604	}
1531	1605
1532		- enqueue_dl_entity(&p->dl, pi_se, flags);
	1606	+ enqueue_dl_entity(&p->dl, flags);
1533	1607
1534	1608	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
1535	1609	enqueue_pushable_dl_task(rq, p);
..	..	@@ -1599,10 +1673,10 @@
1599	1673	static int find_later_rq(struct task_struct *task);
1600	1674
1601	1675	static int
1602		-select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags,
1603		- int sibling_count_hint)
	1676	+select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
1604	1677	{
1605	1678	struct task_struct *curr;
	1679	+ bool select_rq;
1606	1680	struct rq *rq;
1607	1681
1608	1682	if (sd_flag != SD_BALANCE_WAKE)
..	..	@@ -1622,10 +1696,19 @@
1622	1696	* other hand, if it has a shorter deadline, we
1623	1697	* try to make it stay here, it might be important.
1624	1698	*/
1625		- if (unlikely(dl_task(curr)) &&
1626		- (curr->nr_cpus_allowed < 2 \|\|
1627		- !dl_entity_preempt(&p->dl, &curr->dl)) &&
1628		- (p->nr_cpus_allowed > 1)) {
	1699	+ select_rq = unlikely(dl_task(curr)) &&
	1700	+ (curr->nr_cpus_allowed < 2 \|\|
	1701	+ !dl_entity_preempt(&p->dl, &curr->dl)) &&
	1702	+ p->nr_cpus_allowed > 1;
	1703	+
	1704	+ /*
	1705	+ * Take the capacity of the CPU into account to
	1706	+ * ensure it fits the requirement of the task.
	1707	+ */
	1708	+ if (static_branch_unlikely(&sched_asym_cpucapacity))
	1709	+ select_rq \|= !dl_task_fits_capacity(p, cpu);
	1710	+
	1711	+ if (select_rq) {
1629	1712	int target = find_later_rq(p);
1630	1713
1631	1714	if (target != -1 &&
..	..	@@ -1693,6 +1776,22 @@
1693	1776	resched_curr(rq);
1694	1777	}
1695	1778
	1779	+static int balance_dl(struct rq rq, struct task_struct p, struct rq_flags *rf)
	1780	+{
	1781	+ if (!on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) {
	1782	+ /*
	1783	+ * This is OK, because current is on_cpu, which avoids it being
	1784	+ * picked for load-balance and preemption/IRQs are still
	1785	+ * disabled avoiding further scheduler activity on it and we've
	1786	+ * not yet started the picking loop.
	1787	+ */
	1788	+ rq_unpin_lock(rq, rf);
	1789	+ pull_dl_task(rq);
	1790	+ rq_repin_lock(rq, rf);
	1791	+ }
	1792	+
	1793	+ return sched_stop_runnable(rq) \|\| sched_dl_runnable(rq);
	1794	+}
1696	1795	#endif /* CONFIG_SMP */
1697	1796
1698	1797	/*
..	..	@@ -1729,6 +1828,25 @@
1729	1828	}
1730	1829	#endif
1731	1830
	1831	+static void set_next_task_dl(struct rq rq, struct task_struct p, bool first)
	1832	+{
	1833	+ p->se.exec_start = rq_clock_task(rq);
	1834	+
	1835	+ /* You can't push away the running task */
	1836	+ dequeue_pushable_dl_task(rq, p);
	1837	+
	1838	+ if (!first)
	1839	+ return;
	1840	+
	1841	+ if (hrtick_enabled(rq))
	1842	+ start_hrtick_dl(rq, p);
	1843	+
	1844	+ if (rq->curr->sched_class != &dl_sched_class)
	1845	+ update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
	1846	+
	1847	+ deadline_queue_push_tasks(rq);
	1848	+}
	1849	+
1732	1850	static struct sched_dl_entity pick_next_dl_entity(struct rq rq,
1733	1851	struct dl_rq *dl_rq)
1734	1852	{
..	..	@@ -1740,63 +1858,19 @@
1740	1858	return rb_entry(left, struct sched_dl_entity, rb_node);
1741	1859	}
1742	1860
1743		-static struct task_struct *
1744		-pick_next_task_dl(struct rq rq, struct task_struct prev, struct rq_flags *rf)
	1861	+static struct task_struct pick_next_task_dl(struct rq rq)
1745	1862	{
1746	1863	struct sched_dl_entity *dl_se;
	1864	+ struct dl_rq *dl_rq = &rq->dl;
1747	1865	struct task_struct *p;
1748		- struct dl_rq *dl_rq;
1749	1866
1750		- dl_rq = &rq->dl;
1751		-
1752		- if (need_pull_dl_task(rq, prev)) {
1753		- /*
1754		- * This is OK, because current is on_cpu, which avoids it being
1755		- * picked for load-balance and preemption/IRQs are still
1756		- * disabled avoiding further scheduler activity on it and we're
1757		- * being very careful to re-start the picking loop.
1758		- */
1759		- rq_unpin_lock(rq, rf);
1760		- pull_dl_task(rq);
1761		- rq_repin_lock(rq, rf);
1762		- /*
1763		- * pull_dl_task() can drop (and re-acquire) rq->lock; this
1764		- * means a stop task can slip in, in which case we need to
1765		- * re-start task selection.
1766		- */
1767		- if (rq->stop && task_on_rq_queued(rq->stop))
1768		- return RETRY_TASK;
1769		- }
1770		-
1771		- /*
1772		- * When prev is DL, we may throttle it in put_prev_task().
1773		- * So, we update time before we check for dl_nr_running.
1774		- */
1775		- if (prev->sched_class == &dl_sched_class)
1776		- update_curr_dl(rq);
1777		-
1778		- if (unlikely(!dl_rq->dl_nr_running))
	1867	+ if (!sched_dl_runnable(rq))
1779	1868	return NULL;
1780		-
1781		- put_prev_task(rq, prev);
1782	1869
1783	1870	dl_se = pick_next_dl_entity(rq, dl_rq);
1784	1871	BUG_ON(!dl_se);
1785		-
1786	1872	p = dl_task_of(dl_se);
1787		- p->se.exec_start = rq_clock_task(rq);
1788		-
1789		- /* Running task will never be pushed. */
1790		- dequeue_pushable_dl_task(rq, p);
1791		-
1792		- if (hrtick_enabled(rq))
1793		- start_hrtick_dl(rq, p);
1794		-
1795		- deadline_queue_push_tasks(rq);
1796		-
1797		- if (rq->curr->sched_class != &dl_sched_class)
1798		- update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
1799		-
	1873	+ set_next_task_dl(rq, p, true);
1800	1874	return p;
1801	1875	}
1802	1876
..	..	@@ -1840,16 +1914,6 @@
1840	1914	*/
1841	1915	}
1842	1916
1843		-static void set_curr_task_dl(struct rq *rq)
1844		-{
1845		- struct task_struct *p = rq->curr;
1846		-
1847		- p->se.exec_start = rq_clock_task(rq);
1848		-
1849		- /* You can't push away the running task */
1850		- dequeue_pushable_dl_task(rq, p);
1851		-}
1852		-
1853	1917	#ifdef CONFIG_SMP
1854	1918
1855	1919	/* Only try algorithms three times */
..	..	@@ -1858,7 +1922,7 @@
1858	1922	static int pick_dl_task(struct rq rq, struct task_struct p, int cpu)
1859	1923	{
1860	1924	if (!task_running(rq, p) &&
1861		- cpumask_test_cpu(cpu, &p->cpus_allowed))
	1925	+ cpumask_test_cpu(cpu, &p->cpus_mask))
1862	1926	return 1;
1863	1927	return 0;
1864	1928	}
..	..	@@ -1948,8 +2012,8 @@
1948	2012	return this_cpu;
1949	2013	}
1950	2014
1951		- best_cpu = cpumask_first_and(later_mask,
1952		- sched_domain_span(sd));
	2015	+ best_cpu = cpumask_any_and_distribute(later_mask,
	2016	+ sched_domain_span(sd));
1953	2017	/*
1954	2018	* Last chance: if a CPU being in both later_mask
1955	2019	* and current sd span is valid, that becomes our
..	..	@@ -1971,7 +2035,7 @@
1971	2035	if (this_cpu != -1)
1972	2036	return this_cpu;
1973	2037
1974		- cpu = cpumask_any(later_mask);
	2038	+ cpu = cpumask_any_distribute(later_mask);
1975	2039	if (cpu < nr_cpu_ids)
1976	2040	return cpu;
1977	2041
..	..	@@ -2008,7 +2072,7 @@
2008	2072	/* Retry if something changed. */
2009	2073	if (double_lock_balance(rq, later_rq)) {
2010	2074	if (unlikely(task_rq(task) != rq \|\|
2011		- !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) \|\|
	2075	+ !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) \|\|
2012	2076	task_running(rq, task) \|\|
2013	2077	!dl_task(task) \|\|
2014	2078	!task_on_rq_queued(task))) {
..	..	@@ -2075,10 +2139,11 @@
2075	2139	return 0;
2076	2140
2077	2141	retry:
2078		- if (unlikely(next_task == rq->curr)) {
2079		- WARN_ON(1);
	2142	+ if (is_migration_disabled(next_task))
2080	2143	return 0;
2081		- }
	2144	+
	2145	+ if (WARN_ON(next_task == rq->curr))
	2146	+ return 0;
2082	2147
2083	2148	/*
2084	2149	* If next_task preempts rq->curr, and rq->curr
..	..	@@ -2124,17 +2189,13 @@
2124	2189	}
2125	2190
2126	2191	deactivate_task(rq, next_task, 0);
2127		- sub_running_bw(&next_task->dl, &rq->dl);
2128		- sub_rq_bw(&next_task->dl, &rq->dl);
2129	2192	set_task_cpu(next_task, later_rq->cpu);
2130		- add_rq_bw(&next_task->dl, &later_rq->dl);
2131	2193
2132	2194	/*
2133	2195	* Update the later_rq clock here, because the clock is used
2134	2196	* by the cpufreq_update_util() inside __add_running_bw().
2135	2197	*/
2136	2198	update_rq_clock(later_rq);
2137		- add_running_bw(&next_task->dl, &later_rq->dl);
2138	2199	activate_task(later_rq, next_task, ENQUEUE_NOCLOCK);
2139	2200	ret = 1;
2140	2201
..	..	@@ -2158,7 +2219,7 @@
2158	2219	static void pull_dl_task(struct rq *this_rq)
2159	2220	{
2160	2221	int this_cpu = this_rq->cpu, cpu;
2161		- struct task_struct *p;
	2222	+ struct task_struct p, push_task;
2162	2223	bool resched = false;
2163	2224	struct rq *src_rq;
2164	2225	u64 dmin = LONG_MAX;
..	..	@@ -2188,6 +2249,7 @@
2188	2249	continue;
2189	2250
2190	2251	/* Might drop this_rq->lock */
	2252	+ push_task = NULL;
2191	2253	double_lock_balance(this_rq, src_rq);
2192	2254
2193	2255	/*
..	..	@@ -2219,21 +2281,28 @@
2219	2281	src_rq->curr->dl.deadline))
2220	2282	goto skip;
2221	2283
2222		- resched = true;
2223		-
2224		- deactivate_task(src_rq, p, 0);
2225		- sub_running_bw(&p->dl, &src_rq->dl);
2226		- sub_rq_bw(&p->dl, &src_rq->dl);
2227		- set_task_cpu(p, this_cpu);
2228		- add_rq_bw(&p->dl, &this_rq->dl);
2229		- add_running_bw(&p->dl, &this_rq->dl);
2230		- activate_task(this_rq, p, 0);
2231		- dmin = p->dl.deadline;
	2284	+ if (is_migration_disabled(p)) {
	2285	+ trace_sched_migrate_pull_tp(p);
	2286	+ push_task = get_push_task(src_rq);
	2287	+ } else {
	2288	+ deactivate_task(src_rq, p, 0);
	2289	+ set_task_cpu(p, this_cpu);
	2290	+ activate_task(this_rq, p, 0);
	2291	+ dmin = p->dl.deadline;
	2292	+ resched = true;
	2293	+ }
2232	2294
2233	2295	/* Is there any other task even earlier? */
2234	2296	}
2235	2297	skip:
2236	2298	double_unlock_balance(this_rq, src_rq);
	2299	+
	2300	+ if (push_task) {
	2301	+ raw_spin_unlock(&this_rq->lock);
	2302	+ stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
	2303	+ push_task, &src_rq->push_work);
	2304	+ raw_spin_lock(&this_rq->lock);
	2305	+ }
2237	2306	}
2238	2307
2239	2308	if (resched)
..	..	@@ -2257,7 +2326,8 @@
2257	2326	}
2258	2327
2259	2328	static void set_cpus_allowed_dl(struct task_struct *p,
2260		- const struct cpumask *new_mask)
	2329	+ const struct cpumask *new_mask,
	2330	+ u32 flags)
2261	2331	{
2262	2332	struct root_domain *src_rd;
2263	2333	struct rq *rq;
..	..	@@ -2286,7 +2356,7 @@
2286	2356	raw_spin_unlock(&src_dl_b->lock);
2287	2357	}
2288	2358
2289		- set_cpus_allowed_common(p, new_mask);
	2359	+ set_cpus_allowed_common(p, new_mask, flags);
2290	2360	}
2291	2361
2292	2362	/* Assumes rq->lock is held */
..	..	@@ -2317,6 +2387,39 @@
2317	2387	for_each_possible_cpu(i)
2318	2388	zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),
2319	2389	GFP_KERNEL, cpu_to_node(i));
	2390	+}
	2391	+
	2392	+void dl_add_task_root_domain(struct task_struct *p)
	2393	+{
	2394	+ struct rq_flags rf;
	2395	+ struct rq *rq;
	2396	+ struct dl_bw *dl_b;
	2397	+
	2398	+ raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
	2399	+ if (!dl_task(p)) {
	2400	+ raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
	2401	+ return;
	2402	+ }
	2403	+
	2404	+ rq = __task_rq_lock(p, &rf);
	2405	+
	2406	+ dl_b = &rq->rd->dl_bw;
	2407	+ raw_spin_lock(&dl_b->lock);
	2408	+
	2409	+ __dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
	2410	+
	2411	+ raw_spin_unlock(&dl_b->lock);
	2412	+
	2413	+ task_rq_unlock(rq, p, &rf);
	2414	+}
	2415	+
	2416	+void dl_clear_root_domain(struct root_domain *rd)
	2417	+{
	2418	+ unsigned long flags;
	2419	+
	2420	+ raw_spin_lock_irqsave(&rd->dl_bw.lock, flags);
	2421	+ rd->dl_bw.total_bw = 0;
	2422	+ raw_spin_unlock_irqrestore(&rd->dl_bw.lock, flags);
2320	2423	}
2321	2424
2322	2425	#endif /* CONFIG_SMP */
..	..	@@ -2390,6 +2493,8 @@
2390	2493	check_preempt_curr_dl(rq, p, 0);
2391	2494	else
2392	2495	resched_curr(rq);
	2496	+ } else {
	2497	+ update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
2393	2498	}
2394	2499	}
2395	2500
..	..	@@ -2429,8 +2534,8 @@
2429	2534	}
2430	2535	}
2431	2536
2432		-const struct sched_class dl_sched_class = {
2433		- .next = &rt_sched_class,
	2537	+const struct sched_class dl_sched_class
	2538	+ __section("__dl_sched_class") = {
2434	2539	.enqueue_task = enqueue_task_dl,
2435	2540	.dequeue_task = dequeue_task_dl,
2436	2541	.yield_task = yield_task_dl,
..	..	@@ -2439,17 +2544,19 @@
2439	2544
2440	2545	.pick_next_task = pick_next_task_dl,
2441	2546	.put_prev_task = put_prev_task_dl,
	2547	+ .set_next_task = set_next_task_dl,
2442	2548
2443	2549	#ifdef CONFIG_SMP
	2550	+ .balance = balance_dl,
2444	2551	.select_task_rq = select_task_rq_dl,
2445	2552	.migrate_task_rq = migrate_task_rq_dl,
2446	2553	.set_cpus_allowed = set_cpus_allowed_dl,
2447	2554	.rq_online = rq_online_dl,
2448	2555	.rq_offline = rq_offline_dl,
2449	2556	.task_woken = task_woken_dl,
	2557	+ .find_lock_rq = find_lock_later_rq,
2450	2558	#endif
2451	2559
2452		- .set_curr_task = set_curr_task_dl,
2453	2560	.task_tick = task_tick_dl,
2454	2561	.task_fork = task_fork_dl,
2455	2562
..	..	@@ -2497,7 +2604,7 @@
2497	2604	return ret;
2498	2605	}
2499	2606
2500		-void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
	2607	+static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
2501	2608	{
2502	2609	if (global_rt_runtime() == RUNTIME_INF) {
2503	2610	dl_rq->bw_ratio = 1 << RATIO_SHIFT;
..	..	@@ -2550,11 +2657,12 @@
2550	2657	int sched_dl_overflow(struct task_struct *p, int policy,
2551	2658	const struct sched_attr *attr)
2552	2659	{
2553		- struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
2554	2660	u64 period = attr->sched_period ?: attr->sched_deadline;
2555	2661	u64 runtime = attr->sched_runtime;
2556	2662	u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
2557		- int cpus, err = -1;
	2663	+ int cpus, err = -1, cpu = task_cpu(p);
	2664	+ struct dl_bw *dl_b = dl_bw_of(cpu);
	2665	+ unsigned long cap;
2558	2666
2559	2667	if (attr->sched_flags & SCHED_FLAG_SUGOV)
2560	2668	return 0;
..	..	@@ -2569,15 +2677,17 @@
2569	2677	* allocated bandwidth of the container.
2570	2678	*/
2571	2679	raw_spin_lock(&dl_b->lock);
2572		- cpus = dl_bw_cpus(task_cpu(p));
	2680	+ cpus = dl_bw_cpus(cpu);
	2681	+ cap = dl_bw_capacity(cpu);
	2682	+
2573	2683	if (dl_policy(policy) && !task_has_dl_policy(p) &&
2574		- !__dl_overflow(dl_b, cpus, 0, new_bw)) {
	2684	+ !__dl_overflow(dl_b, cap, 0, new_bw)) {
2575	2685	if (hrtimer_active(&p->dl.inactive_timer))
2576	2686	__dl_sub(dl_b, p->dl.dl_bw, cpus);
2577	2687	__dl_add(dl_b, new_bw, cpus);
2578	2688	err = 0;
2579	2689	} else if (dl_policy(policy) && task_has_dl_policy(p) &&
2580		- !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
	2690	+ !__dl_overflow(dl_b, cap, p->dl.dl_bw, new_bw)) {
2581	2691	/*
2582	2692	* XXX this is slightly incorrect: when the task
2583	2693	* utilization decreases, we should delay the total
..	..	@@ -2635,6 +2745,14 @@
2635	2745	}
2636	2746
2637	2747	/*
	2748	+ * Default limits for DL period; on the top end we guard against small util
	2749	+ * tasks still getting rediculous long effective runtimes, on the bottom end we
	2750	+ * guard against timer DoS.
	2751	+ */
	2752	+unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
	2753	+unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */
	2754	+
	2755	+/*
2638	2756	* This function validates the new parameters of a -deadline task.
2639	2757	* We ask for the deadline not being zero, and greater or equal
2640	2758	* than the runtime, as well as the period of being zero or
..	..	@@ -2646,6 +2764,8 @@
2646	2764	*/
2647	2765	bool __checkparam_dl(const struct sched_attr *attr)
2648	2766	{
	2767	+ u64 period, max, min;
	2768	+
2649	2769	/* special dl tasks don't actually use any parameter */
2650	2770	if (attr->sched_flags & SCHED_FLAG_SUGOV)
2651	2771	return true;
..	..	@@ -2669,10 +2789,19 @@
2669	2789	attr->sched_period & (1ULL << 63))
2670	2790	return false;
2671	2791
	2792	+ period = attr->sched_period;
	2793	+ if (!period)
	2794	+ period = attr->sched_deadline;
	2795	+
2672	2796	/* runtime <= deadline <= period (if period != 0) */
2673		- if ((attr->sched_period != 0 &&
2674		- attr->sched_period < attr->sched_deadline) \|\|
	2797	+ if (period < attr->sched_deadline \|\|
2675	2798	attr->sched_deadline < attr->sched_runtime)
	2799	+ return false;
	2800	+
	2801	+ max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
	2802	+ min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
	2803	+
	2804	+ if (period < min \|\| period > max)
2676	2805	return false;
2677	2806
2678	2807	return true;
..	..	@@ -2692,11 +2821,14 @@
2692	2821	dl_se->dl_bw = 0;
2693	2822	dl_se->dl_density = 0;
2694	2823
2695		- dl_se->dl_boosted = 0;
2696	2824	dl_se->dl_throttled = 0;
2697	2825	dl_se->dl_yielded = 0;
2698	2826	dl_se->dl_non_contending = 0;
2699	2827	dl_se->dl_overrun = 0;
	2828	+
	2829	+#ifdef CONFIG_RT_MUTEXES
	2830	+ dl_se->pi_se = dl_se;
	2831	+#endif
2700	2832	}
2701	2833
2702	2834	bool dl_param_changed(struct task_struct p, const struct sched_attr attr)
..	..	@@ -2713,39 +2845,6 @@
2713	2845	}
2714	2846
2715	2847	#ifdef CONFIG_SMP
2716		-int dl_task_can_attach(struct task_struct p, const struct cpumask cs_cpus_allowed)
2717		-{
2718		- unsigned int dest_cpu;
2719		- struct dl_bw *dl_b;
2720		- bool overflow;
2721		- int cpus, ret;
2722		- unsigned long flags;
2723		-
2724		- dest_cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
2725		-
2726		- rcu_read_lock_sched();
2727		- dl_b = dl_bw_of(dest_cpu);
2728		- raw_spin_lock_irqsave(&dl_b->lock, flags);
2729		- cpus = dl_bw_cpus(dest_cpu);
2730		- overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
2731		- if (overflow) {
2732		- ret = -EBUSY;
2733		- } else {
2734		- /*
2735		- * We reserve space for this task in the destination
2736		- * root_domain, as we can't fail after this point.
2737		- * We will free resources in the source root_domain
2738		- * later on (see set_cpus_allowed_dl()).
2739		- */
2740		- __dl_add(dl_b, p->dl.dl_bw, cpus);
2741		- ret = 0;
2742		- }
2743		- raw_spin_unlock_irqrestore(&dl_b->lock, flags);
2744		- rcu_read_unlock_sched();
2745		-
2746		- return ret;
2747		-}
2748		-
2749	2848	int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
2750	2849	const struct cpumask *trial)
2751	2850	{
..	..	@@ -2767,22 +2866,32 @@
2767	2866	return ret;
2768	2867	}
2769	2868
2770		-bool dl_cpu_busy(unsigned int cpu)
	2869	+int dl_cpu_busy(int cpu, struct task_struct *p)
2771	2870	{
2772		- unsigned long flags;
	2871	+ unsigned long flags, cap;
2773	2872	struct dl_bw *dl_b;
2774	2873	bool overflow;
2775		- int cpus;
2776	2874
2777	2875	rcu_read_lock_sched();
2778	2876	dl_b = dl_bw_of(cpu);
2779	2877	raw_spin_lock_irqsave(&dl_b->lock, flags);
2780		- cpus = dl_bw_cpus(cpu);
2781		- overflow = __dl_overflow(dl_b, cpus, 0, 0);
	2878	+ cap = dl_bw_capacity(cpu);
	2879	+ overflow = __dl_overflow(dl_b, cap, 0, p ? p->dl.dl_bw : 0);
	2880	+
	2881	+ if (!overflow && p) {
	2882	+ /*
	2883	+ * We reserve space for this task in the destination
	2884	+ * root_domain, as we can't fail after this point.
	2885	+ * We will free resources in the source root_domain
	2886	+ * later on (see set_cpus_allowed_dl()).
	2887	+ */
	2888	+ __dl_add(dl_b, p->dl.dl_bw, dl_bw_cpus(cpu));
	2889	+ }
	2890	+
2782	2891	raw_spin_unlock_irqrestore(&dl_b->lock, flags);
2783	2892	rcu_read_unlock_sched();
2784	2893
2785		- return overflow;
	2894	+ return overflow ? -EBUSY : 0;
2786	2895	}
2787	2896	#endif
2788	2897