~hc/RK356X_SDK_RELEASE.git

..	..	@@ -7,8 +7,12 @@
7	7
8	8	#include "pelt.h"
9	9
	10	+#include <trace/hooks/sched.h>
	11	+
10	12	int sched_rr_timeslice = RR_TIMESLICE;
11	13	int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
	14	+/* More than 4 hours if BW_SHIFT equals 20. */
	15	+static const u64 max_rt_runtime = MAX_BW;
12	16
13	17	static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
14	18
..	..	@@ -45,8 +49,8 @@
45	49
46	50	raw_spin_lock_init(&rt_b->rt_runtime_lock);
47	51
48		- hrtimer_init(&rt_b->rt_period_timer,
49		- CLOCK_MONOTONIC, HRTIMER_MODE_REL);
	52	+ hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC,
	53	+ HRTIMER_MODE_REL_HARD);
50	54	rt_b->rt_period_timer.function = sched_rt_period_timer;
51	55	}
52	56
..	..	@@ -64,7 +68,8 @@
64	68	* to update the period.
65	69	*/
66	70	hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
67		- hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
	71	+ hrtimer_start_expires(&rt_b->rt_period_timer,
	72	+ HRTIMER_MODE_ABS_PINNED_HARD);
68	73	}
69	74	raw_spin_unlock(&rt_b->rt_runtime_lock);
70	75	}
..	..	@@ -267,7 +272,7 @@
267	272	static inline bool need_pull_rt_task(struct rq rq, struct task_struct prev)
268	273	{
269	274	/* Try to pull RT tasks here if we lower this rq's prio */
270		- return rq->rt.highest_prio.curr > prev->prio;
	275	+ return rq->online && rq->rt.highest_prio.curr > prev->prio;
271	276	}
272	277
273	278	static inline int rt_overloaded(struct rq *rq)
..	..	@@ -434,7 +439,7 @@
434	439	#endif /* CONFIG_SMP */
435	440
436	441	static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
437		-static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
	442	+static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);
438	443
439	444	static inline int on_rt_rq(struct sched_rt_entity *rt_se)
440	445	{
..	..	@@ -555,7 +560,7 @@
555	560	rt_se = rt_rq->tg->rt_se[cpu];
556	561
557	562	if (!rt_se) {
558		- dequeue_top_rt_rq(rt_rq);
	563	+ dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
559	564	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
560	565	cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
561	566	}
..	..	@@ -641,7 +646,7 @@
641	646
642	647	static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
643	648	{
644		- dequeue_top_rt_rq(rt_rq);
	649	+ dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
645	650	}
646	651
647	652	static inline int rt_rq_throttled(struct rt_rq *rt_rq)
..	..	@@ -973,6 +978,13 @@
973	978	if (likely(rt_b->rt_runtime)) {
974	979	rt_rq->rt_throttled = 1;
975	980	printk_deferred_once("sched: RT throttling activated\n");
	981	+
	982	+ trace_android_vh_dump_throttled_rt_tasks(
	983	+ raw_smp_processor_id(),
	984	+ rq_clock(rq_of_rt_rq(rt_rq)),
	985	+ sched_rt_period(rt_rq),
	986	+ runtime,
	987	+ hrtimer_get_expires_ns(&rt_b->rt_period_timer));
976	988	} else {
977	989	/*
978	990	* In case we did anyway, make it go away,
..	..	@@ -1019,6 +1031,8 @@
1019	1031	curr->se.exec_start = now;
1020	1032	cgroup_account_cputime(curr, delta_exec);
1021	1033
	1034	+ trace_android_vh_sched_stat_runtime_rt(curr, delta_exec);
	1035	+
1022	1036	if (!rt_bandwidth_enabled())
1023	1037	return;
1024	1038
..	..	@@ -1040,7 +1054,7 @@
1040	1054	}
1041	1055
1042	1056	static void
1043		-dequeue_top_rt_rq(struct rt_rq *rt_rq)
	1057	+dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count)
1044	1058	{
1045	1059	struct rq *rq = rq_of_rt_rq(rt_rq);
1046	1060
..	..	@@ -1051,7 +1065,7 @@
1051	1065
1052	1066	BUG_ON(!rq->nr_running);
1053	1067
1054		- sub_nr_running(rq, rt_rq->rt_nr_running);
	1068	+ sub_nr_running(rq, count);
1055	1069	rt_rq->rt_queued = 0;
1056	1070
1057	1071	}
..	..	@@ -1330,18 +1344,21 @@
1330	1344	static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
1331	1345	{
1332	1346	struct sched_rt_entity *back = NULL;
	1347	+ unsigned int rt_nr_running;
1333	1348
1334	1349	for_each_sched_rt_entity(rt_se) {
1335	1350	rt_se->back = back;
1336	1351	back = rt_se;
1337	1352	}
1338	1353
1339		- dequeue_top_rt_rq(rt_rq_of_se(back));
	1354	+ rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
1340	1355
1341	1356	for (rt_se = back; rt_se; rt_se = rt_se->back) {
1342	1357	if (on_rt_rq(rt_se))
1343	1358	__dequeue_rt_entity(rt_se, flags);
1344	1359	}
	1360	+
	1361	+ dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
1345	1362	}
1346	1363
1347	1364	static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
..	..	@@ -1369,6 +1386,27 @@
1369	1386	enqueue_top_rt_rq(&rq->rt);
1370	1387	}
1371	1388
	1389	+#ifdef CONFIG_SMP
	1390	+static inline bool should_honor_rt_sync(struct rq rq, struct task_struct p,
	1391	+ bool sync)
	1392	+{
	1393	+ /*
	1394	+ * If the waker is CFS, then an RT sync wakeup would preempt the waker
	1395	+ * and force it to run for a likely small time after the RT wakee is
	1396	+ * done. So, only honor RT sync wakeups from RT wakers.
	1397	+ */
	1398	+ return sync && task_has_rt_policy(rq->curr) &&
	1399	+ p->prio <= rq->rt.highest_prio.next &&
	1400	+ rq->rt.rt_nr_running <= 2;
	1401	+}
	1402	+#else
	1403	+static inline bool should_honor_rt_sync(struct rq rq, struct task_struct p,
	1404	+ bool sync)
	1405	+{
	1406	+ return 0;
	1407	+}
	1408	+#endif
	1409	+
1372	1410	/*
1373	1411	* Adding/removing a task to/from a priority array:
1374	1412	*/
..	..	@@ -1376,23 +1414,21 @@
1376	1414	enqueue_task_rt(struct rq rq, struct task_struct p, int flags)
1377	1415	{
1378	1416	struct sched_rt_entity *rt_se = &p->rt;
1379		-
1380		- schedtune_enqueue_task(p, cpu_of(rq));
	1417	+ bool sync = !!(flags & ENQUEUE_WAKEUP_SYNC);
1381	1418
1382	1419	if (flags & ENQUEUE_WAKEUP)
1383	1420	rt_se->timeout = 0;
1384	1421
1385	1422	enqueue_rt_entity(rt_se, flags);
1386	1423
1387		- if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
	1424	+ if (!task_current(rq, p) && p->nr_cpus_allowed > 1 &&
	1425	+ !should_honor_rt_sync(rq, p, sync))
1388	1426	enqueue_pushable_task(rq, p);
1389	1427	}
1390	1428
1391	1429	static void dequeue_task_rt(struct rq rq, struct task_struct p, int flags)
1392	1430	{
1393	1431	struct sched_rt_entity *rt_se = &p->rt;
1394		-
1395		- schedtune_dequeue_task(p, cpu_of(rq));
1396	1432
1397	1433	update_curr_rt(rq);
1398	1434	dequeue_rt_entity(rt_se, flags);
..	..	@@ -1437,13 +1473,43 @@
1437	1473	#ifdef CONFIG_SMP
1438	1474	static int find_lowest_rq(struct task_struct *task);
1439	1475
	1476	+#ifdef CONFIG_RT_SOFTINT_OPTIMIZATION
	1477	+/*
	1478	+ * Return whether the task on the given cpu is currently non-preemptible
	1479	+ * while handling a potentially long softint, or if the task is likely
	1480	+ * to block preemptions soon because it is a ksoftirq thread that is
	1481	+ * handling slow softints.
	1482	+ */
	1483	+bool
	1484	+task_may_not_preempt(struct task_struct *task, int cpu)
	1485	+{
	1486	+ __u32 softirqs = per_cpu(active_softirqs, cpu) \|
	1487	+ __IRQ_STAT(cpu, __softirq_pending);
	1488	+
	1489	+ struct task_struct *cpu_ksoftirqd = per_cpu(ksoftirqd, cpu);
	1490	+ return ((softirqs & LONG_SOFTIRQ_MASK) &&
	1491	+ (task == cpu_ksoftirqd \|\|
	1492	+ task_thread_info(task)->preempt_count & SOFTIRQ_MASK));
	1493	+}
	1494	+EXPORT_SYMBOL_GPL(task_may_not_preempt);
	1495	+#endif /* CONFIG_RT_SOFTINT_OPTIMIZATION */
	1496	+
1440	1497	static int
1441		-select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags,
1442		- int sibling_count_hint)
	1498	+select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
1443	1499	{
1444	1500	struct task_struct *curr;
1445	1501	struct rq *rq;
	1502	+ struct rq *this_cpu_rq;
1446	1503	bool test;
	1504	+ int target_cpu = -1;
	1505	+ bool may_not_preempt;
	1506	+ bool sync = !!(flags & WF_SYNC);
	1507	+ int this_cpu;
	1508	+
	1509	+ trace_android_rvh_select_task_rq_rt(p, cpu, sd_flag,
	1510	+ flags, &target_cpu);
	1511	+ if (target_cpu >= 0)
	1512	+ return target_cpu;
1447	1513
1448	1514	/* For anything but wake ups, just return the task_cpu */
1449	1515	if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
..	..	@@ -1453,9 +1519,16 @@
1453	1519
1454	1520	rcu_read_lock();
1455	1521	curr = READ_ONCE(rq->curr); /* unlocked access */
	1522	+ this_cpu = smp_processor_id();
	1523	+ this_cpu_rq = cpu_rq(this_cpu);
1456	1524
1457	1525	/*
1458		- * If the current task on @p's runqueue is an RT task, then
	1526	+ * If the current task on @p's runqueue is a softirq task,
	1527	+ * it may run without preemption for a time that is
	1528	+ * ill-suited for a waiting RT task. Therefore, try to
	1529	+ * wake this RT task on another runqueue.
	1530	+ *
	1531	+ * Also, if the current task on @p's runqueue is an RT task, then
1459	1532	* try to see if we can wake this RT task up on another
1460	1533	* runqueue. Otherwise simply start this RT task
1461	1534	* on its current runqueue.
..	..	@@ -1480,9 +1553,21 @@
1480	1553	* requirement of the task - which is only important on heterogeneous
1481	1554	* systems like big.LITTLE.
1482	1555	*/
1483		- test = curr &&
1484		- unlikely(rt_task(curr)) &&
1485		- (curr->nr_cpus_allowed < 2 \|\| curr->prio <= p->prio);
	1556	+ may_not_preempt = task_may_not_preempt(curr, cpu);
	1557	+ test = (curr && (may_not_preempt \|\|
	1558	+ (unlikely(rt_task(curr)) &&
	1559	+ (curr->nr_cpus_allowed < 2 \|\| curr->prio <= p->prio))));
	1560	+
	1561	+ if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE))
	1562	+ test \|= rockchip_perf_misfit_rt(cpu);
	1563	+ /*
	1564	+ * Respect the sync flag as long as the task can run on this CPU.
	1565	+ */
	1566	+ if (should_honor_rt_sync(this_cpu_rq, p, sync) &&
	1567	+ cpumask_test_cpu(this_cpu, p->cpus_ptr)) {
	1568	+ cpu = this_cpu;
	1569	+ goto out_unlock;
	1570	+ }
1486	1571
1487	1572	if (test \|\| !rt_task_fits_capacity(p, cpu)) {
1488	1573	int target = find_lowest_rq(p);
..	..	@@ -1495,11 +1580,14 @@
1495	1580	goto out_unlock;
1496	1581
1497	1582	/*
1498		- * Don't bother moving it if the destination CPU is
	1583	+ * If cpu is non-preemptible, prefer remote cpu
	1584	+ * even if it's running a higher-prio task.
	1585	+ * Otherwise: Don't bother moving it if the destination CPU is
1499	1586	* not running a lower priority task.
1500	1587	*/
1501	1588	if (target != -1 &&
1502		- p->prio < cpu_rq(target)->rt.highest_prio.curr)
	1589	+ (may_not_preempt \|\|
	1590	+ p->prio < cpu_rq(target)->rt.highest_prio.curr))
1503	1591	cpu = target;
1504	1592	}
1505	1593
..	..	@@ -1537,6 +1625,26 @@
1537	1625	resched_curr(rq);
1538	1626	}
1539	1627
	1628	+static int balance_rt(struct rq rq, struct task_struct p, struct rq_flags *rf)
	1629	+{
	1630	+ if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
	1631	+ int done = 0;
	1632	+
	1633	+ /*
	1634	+ * This is OK, because current is on_cpu, which avoids it being
	1635	+ * picked for load-balance and preemption/IRQs are still
	1636	+ * disabled avoiding further scheduler activity on it and we've
	1637	+ * not yet started the picking loop.
	1638	+ */
	1639	+ rq_unpin_lock(rq, rf);
	1640	+ trace_android_rvh_sched_balance_rt(rq, p, &done);
	1641	+ if (!done)
	1642	+ pull_rt_task(rq);
	1643	+ rq_repin_lock(rq, rf);
	1644	+ }
	1645	+
	1646	+ return sched_stop_runnable(rq) \|\| sched_dl_runnable(rq) \|\| sched_rt_runnable(rq);
	1647	+}
1540	1648	#endif /* CONFIG_SMP */
1541	1649
1542	1650	/*
..	..	@@ -1567,6 +1675,27 @@
1567	1675	#endif
1568	1676	}
1569	1677
	1678	+static inline void set_next_task_rt(struct rq rq, struct task_struct p, bool first)
	1679	+{
	1680	+ p->se.exec_start = rq_clock_task(rq);
	1681	+
	1682	+ /* The running task is never eligible for pushing */
	1683	+ dequeue_pushable_task(rq, p);
	1684	+
	1685	+ if (!first)
	1686	+ return;
	1687	+
	1688	+ /*
	1689	+ * If prev task was rt, put_prev_task() has already updated the
	1690	+ * utilization. We only care of the case where we start to schedule a
	1691	+ * rt task
	1692	+ */
	1693	+ if (rq->curr->sched_class != &rt_sched_class)
	1694	+ update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
	1695	+
	1696	+ rt_queue_push_tasks(rq);
	1697	+}
	1698	+
1570	1699	static struct sched_rt_entity pick_next_rt_entity(struct rq rq,
1571	1700	struct rt_rq *rt_rq)
1572	1701	{
..	..	@@ -1587,7 +1716,6 @@
1587	1716	static struct task_struct _pick_next_task_rt(struct rq rq)
1588	1717	{
1589	1718	struct sched_rt_entity *rt_se;
1590		- struct task_struct *p;
1591	1719	struct rt_rq *rt_rq = &rq->rt;
1592	1720
1593	1721	do {
..	..	@@ -1596,65 +1724,18 @@
1596	1724	rt_rq = group_rt_rq(rt_se);
1597	1725	} while (rt_rq);
1598	1726
1599		- p = rt_task_of(rt_se);
1600		- p->se.exec_start = rq_clock_task(rq);
1601		-
1602		- return p;
	1727	+ return rt_task_of(rt_se);
1603	1728	}
1604	1729
1605		-static struct task_struct *
1606		-pick_next_task_rt(struct rq rq, struct task_struct prev, struct rq_flags *rf)
	1730	+static struct task_struct pick_next_task_rt(struct rq rq)
1607	1731	{
1608	1732	struct task_struct *p;
1609		- struct rt_rq *rt_rq = &rq->rt;
1610	1733
1611		- if (need_pull_rt_task(rq, prev)) {
1612		- /*
1613		- * This is OK, because current is on_cpu, which avoids it being
1614		- * picked for load-balance and preemption/IRQs are still
1615		- * disabled avoiding further scheduler activity on it and we're
1616		- * being very careful to re-start the picking loop.
1617		- */
1618		- rq_unpin_lock(rq, rf);
1619		- pull_rt_task(rq);
1620		- rq_repin_lock(rq, rf);
1621		- /*
1622		- * pull_rt_task() can drop (and re-acquire) rq->lock; this
1623		- * means a dl or stop task can slip in, in which case we need
1624		- * to re-start task selection.
1625		- */
1626		- if (unlikely((rq->stop && task_on_rq_queued(rq->stop)) \|\|
1627		- rq->dl.dl_nr_running))
1628		- return RETRY_TASK;
1629		- }
1630		-
1631		- /*
1632		- * We may dequeue prev's rt_rq in put_prev_task().
1633		- * So, we update time before rt_nr_running check.
1634		- */
1635		- if (prev->sched_class == &rt_sched_class)
1636		- update_curr_rt(rq);
1637		-
1638		- if (!rt_rq->rt_queued)
	1734	+ if (!sched_rt_runnable(rq))
1639	1735	return NULL;
1640	1736
1641		- put_prev_task(rq, prev);
1642		-
1643	1737	p = _pick_next_task_rt(rq);
1644		-
1645		- /* The running task is never eligible for pushing */
1646		- dequeue_pushable_task(rq, p);
1647		-
1648		- rt_queue_push_tasks(rq);
1649		-
1650		- /*
1651		- * If prev task was rt, put_prev_task() has already updated the
1652		- * utilization. We only care of the case where we start to schedule a
1653		- * rt task
1654		- */
1655		- if (rq->curr->sched_class != &rt_sched_class)
1656		- update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
1657		-
	1738	+ set_next_task_rt(rq, p, true);
1658	1739	return p;
1659	1740	}
1660	1741
..	..	@@ -1680,7 +1761,7 @@
1680	1761	static int pick_rt_task(struct rq rq, struct task_struct p, int cpu)
1681	1762	{
1682	1763	if (!task_running(rq, p) &&
1683		- cpumask_test_cpu(cpu, &p->cpus_allowed))
	1764	+ cpumask_test_cpu(cpu, &p->cpus_mask))
1684	1765	return 1;
1685	1766
1686	1767	return 0;
..	..	@@ -1690,7 +1771,7 @@
1690	1771	* Return the highest pushable rq's task, which is suitable to be executed
1691	1772	* on the CPU, NULL otherwise
1692	1773	*/
1693		-static struct task_struct pick_highest_pushable_task(struct rq rq, int cpu)
	1774	+struct task_struct pick_highest_pushable_task(struct rq rq, int cpu)
1694	1775	{
1695	1776	struct plist_head *head = &rq->rt.pushable_tasks;
1696	1777	struct task_struct *p;
..	..	@@ -1705,6 +1786,7 @@
1705	1786
1706	1787	return NULL;
1707	1788	}
	1789	+EXPORT_SYMBOL_GPL(pick_highest_pushable_task);
1708	1790
1709	1791	static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
1710	1792
..	..	@@ -1713,7 +1795,7 @@
1713	1795	struct sched_domain *sd;
1714	1796	struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
1715	1797	int this_cpu = smp_processor_id();
1716		- int cpu = task_cpu(task);
	1798	+ int cpu = -1;
1717	1799	int ret;
1718	1800
1719	1801	/* Make sure the mask is initialized first */
..	..	@@ -1738,9 +1820,17 @@
1738	1820	task, lowest_mask);
1739	1821	}
1740	1822
	1823	+ trace_android_rvh_find_lowest_rq(task, lowest_mask, ret, &cpu);
	1824	+ if (cpu >= 0)
	1825	+ return cpu;
	1826	+
1741	1827	if (!ret)
1742	1828	return -1; /* No targets found */
1743	1829
	1830	+ cpu = task_cpu(task);
	1831	+
	1832	+ if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE))
	1833	+ cpu = rockchip_perf_select_rt_cpu(cpu, lowest_mask);
1744	1834	/*
1745	1835	* At this point we have built a mask of CPUs representing the
1746	1836	* lowest priority tasks in the system. Now we want to elect
..	..	@@ -1774,8 +1864,8 @@
1774	1864	return this_cpu;
1775	1865	}
1776	1866
1777		- best_cpu = cpumask_first_and(lowest_mask,
1778		- sched_domain_span(sd));
	1867	+ best_cpu = cpumask_any_and_distribute(lowest_mask,
	1868	+ sched_domain_span(sd));
1779	1869	if (best_cpu < nr_cpu_ids) {
1780	1870	rcu_read_unlock();
1781	1871	return best_cpu;
..	..	@@ -1792,7 +1882,7 @@
1792	1882	if (this_cpu != -1)
1793	1883	return this_cpu;
1794	1884
1795		- cpu = cpumask_any(lowest_mask);
	1885	+ cpu = cpumask_any_distribute(lowest_mask);
1796	1886	if (cpu < nr_cpu_ids)
1797	1887	return cpu;
1798	1888
..	..	@@ -1833,7 +1923,7 @@
1833	1923	* Also make sure that it wasn't scheduled on its rq.
1834	1924	*/
1835	1925	if (unlikely(task_rq(task) != rq \|\|
1836		- !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) \|\|
	1926	+ !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) \|\|
1837	1927	task_running(rq, task) \|\|
1838	1928	!rt_task(task) \|\|
1839	1929	!task_on_rq_queued(task))) {
..	..	@@ -1881,7 +1971,7 @@
1881	1971	* running task can migrate over to a CPU that is running a task
1882	1972	* of lesser priority.
1883	1973	*/
1884		-static int push_rt_task(struct rq *rq)
	1974	+static int push_rt_task(struct rq *rq, bool pull)
1885	1975	{
1886	1976	struct task_struct *next_task;
1887	1977	struct rq *lowest_rq;
..	..	@@ -1895,10 +1985,41 @@
1895	1985	return 0;
1896	1986
1897	1987	retry:
1898		- if (unlikely(next_task == rq->curr)) {
1899		- WARN_ON(1);
	1988	+ if (is_migration_disabled(next_task)) {
	1989	+ struct task_struct *push_task = NULL;
	1990	+ int cpu;
	1991	+
	1992	+ if (!pull)
	1993	+ return 0;
	1994	+
	1995	+ trace_sched_migrate_pull_tp(next_task);
	1996	+
	1997	+ if (rq->push_busy)
	1998	+ return 0;
	1999	+
	2000	+ cpu = find_lowest_rq(rq->curr);
	2001	+ if (cpu == -1 \|\| cpu == rq->cpu)
	2002	+ return 0;
	2003	+
	2004	+ /*
	2005	+ * Given we found a CPU with lower priority than @next_task,
	2006	+ * therefore it should be running. However we cannot migrate it
	2007	+ * to this other CPU, instead attempt to push the current
	2008	+ * running task on this CPU away.
	2009	+ */
	2010	+ push_task = get_push_task(rq);
	2011	+ if (push_task) {
	2012	+ raw_spin_unlock(&rq->lock);
	2013	+ stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
	2014	+ push_task, &rq->push_work);
	2015	+ raw_spin_lock(&rq->lock);
	2016	+ }
	2017	+
1900	2018	return 0;
1901	2019	}
	2020	+
	2021	+ if (WARN_ON(next_task == rq->curr))
	2022	+ return 0;
1902	2023
1903	2024	/*
1904	2025	* It's possible that the next_task slipped in of
..	..	@@ -1951,12 +2072,10 @@
1951	2072	deactivate_task(rq, next_task, 0);
1952	2073	set_task_cpu(next_task, lowest_rq->cpu);
1953	2074	activate_task(lowest_rq, next_task, 0);
	2075	+ resched_curr(lowest_rq);
1954	2076	ret = 1;
1955	2077
1956		- resched_curr(lowest_rq);
1957		-
1958	2078	double_unlock_balance(rq, lowest_rq);
1959		-
1960	2079	out:
1961	2080	put_task_struct(next_task);
1962	2081
..	..	@@ -1966,7 +2085,7 @@
1966	2085	static void push_rt_tasks(struct rq *rq)
1967	2086	{
1968	2087	/* push_rt_task will return true if it moved an RT */
1969		- while (push_rt_task(rq))
	2088	+ while (push_rt_task(rq, false))
1970	2089	;
1971	2090	}
1972	2091
..	..	@@ -2119,7 +2238,8 @@
2119	2238	*/
2120	2239	if (has_pushable_tasks(rq)) {
2121	2240	raw_spin_lock(&rq->lock);
2122		- push_rt_tasks(rq);
	2241	+ while (push_rt_task(rq, true))
	2242	+ ;
2123	2243	raw_spin_unlock(&rq->lock);
2124	2244	}
2125	2245
..	..	@@ -2144,7 +2264,7 @@
2144	2264	{
2145	2265	int this_cpu = this_rq->cpu, cpu;
2146	2266	bool resched = false;
2147		- struct task_struct *p;
	2267	+ struct task_struct p, push_task;
2148	2268	struct rq *src_rq;
2149	2269	int rt_overload_count = rt_overloaded(this_rq);
2150	2270
..	..	@@ -2191,6 +2311,7 @@
2191	2311	* double_lock_balance, and another CPU could
2192	2312	* alter this_rq
2193	2313	*/
	2314	+ push_task = NULL;
2194	2315	double_lock_balance(this_rq, src_rq);
2195	2316
2196	2317	/*
..	..	@@ -2218,11 +2339,15 @@
2218	2339	if (p->prio < src_rq->curr->prio)
2219	2340	goto skip;
2220	2341
2221		- resched = true;
2222		-
2223		- deactivate_task(src_rq, p, 0);
2224		- set_task_cpu(p, this_cpu);
2225		- activate_task(this_rq, p, 0);
	2342	+ if (is_migration_disabled(p)) {
	2343	+ trace_sched_migrate_pull_tp(p);
	2344	+ push_task = get_push_task(src_rq);
	2345	+ } else {
	2346	+ deactivate_task(src_rq, p, 0);
	2347	+ set_task_cpu(p, this_cpu);
	2348	+ activate_task(this_rq, p, 0);
	2349	+ resched = true;
	2350	+ }
2226	2351	/*
2227	2352	* We continue with the search, just in
2228	2353	* case there's an even higher prio task
..	..	@@ -2232,6 +2357,13 @@
2232	2357	}
2233	2358	skip:
2234	2359	double_unlock_balance(this_rq, src_rq);
	2360	+
	2361	+ if (push_task) {
	2362	+ raw_spin_unlock(&this_rq->lock);
	2363	+ stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
	2364	+ push_task, &src_rq->push_work);
	2365	+ raw_spin_lock(&this_rq->lock);
	2366	+ }
2235	2367	}
2236	2368
2237	2369	if (resched)
..	..	@@ -2315,13 +2447,20 @@
2315	2447	static void switched_to_rt(struct rq rq, struct task_struct p)
2316	2448	{
2317	2449	/*
2318		- * If we are already running, then there's nothing
2319		- * that needs to be done. But if we are not running
2320		- * we may need to preempt the current running task.
2321		- * If that current running task is also an RT task
	2450	+ * If we are running, update the avg_rt tracking, as the running time
	2451	+ * will now on be accounted into the latter.
	2452	+ */
	2453	+ if (task_current(rq, p)) {
	2454	+ update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
	2455	+ return;
	2456	+ }
	2457	+
	2458	+ /*
	2459	+ * If we are not running we may need to preempt the current
	2460	+ * running task. If that current running task is also an RT task
2322	2461	* then see if we can move to another run queue.
2323	2462	*/
2324		- if (task_on_rq_queued(p) && rq->curr != p) {
	2463	+ if (task_on_rq_queued(p)) {
2325	2464	#ifdef CONFIG_SMP
2326	2465	if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
2327	2466	rt_queue_push_tasks(rq);
..	..	@@ -2390,8 +2529,10 @@
2390	2529	}
2391	2530
2392	2531	next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
2393		- if (p->rt.timeout > next)
2394		- p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
	2532	+ if (p->rt.timeout > next) {
	2533	+ posix_cputimers_rt_watchdog(&p->posix_cputimers,
	2534	+ p->se.sum_exec_runtime);
	2535	+ }
2395	2536	}
2396	2537	}
2397	2538	#else
..	..	@@ -2440,16 +2581,6 @@
2440	2581	}
2441	2582	}
2442	2583
2443		-static void set_curr_task_rt(struct rq *rq)
2444		-{
2445		- struct task_struct *p = rq->curr;
2446		-
2447		- p->se.exec_start = rq_clock_task(rq);
2448		-
2449		- /* The running task is never eligible for pushing */
2450		- dequeue_pushable_task(rq, p);
2451		-}
2452		-
2453	2584	static unsigned int get_rr_interval_rt(struct rq rq, struct task_struct task)
2454	2585	{
2455	2586	/*
..	..	@@ -2461,8 +2592,8 @@
2461	2592	return 0;
2462	2593	}
2463	2594
2464		-const struct sched_class rt_sched_class = {
2465		- .next = &fair_sched_class,
	2595	+const struct sched_class rt_sched_class
	2596	+ __section("__rt_sched_class") = {
2466	2597	.enqueue_task = enqueue_task_rt,
2467	2598	.dequeue_task = dequeue_task_rt,
2468	2599	.yield_task = yield_task_rt,
..	..	@@ -2471,18 +2602,19 @@
2471	2602
2472	2603	.pick_next_task = pick_next_task_rt,
2473	2604	.put_prev_task = put_prev_task_rt,
	2605	+ .set_next_task = set_next_task_rt,
2474	2606
2475	2607	#ifdef CONFIG_SMP
	2608	+ .balance = balance_rt,
2476	2609	.select_task_rq = select_task_rq_rt,
2477		-
2478	2610	.set_cpus_allowed = set_cpus_allowed_common,
2479	2611	.rq_online = rq_online_rt,
2480	2612	.rq_offline = rq_offline_rt,
2481	2613	.task_woken = task_woken_rt,
2482	2614	.switched_from = switched_from_rt,
	2615	+ .find_lock_rq = find_lock_lowest_rq,
2483	2616	#endif
2484	2617
2485		- .set_curr_task = set_curr_task_rt,
2486	2618	.task_tick = task_tick_rt,
2487	2619
2488	2620	.get_rr_interval = get_rr_interval_rt,
..	..	@@ -2503,10 +2635,11 @@
2503	2635	*/
2504	2636	static DEFINE_MUTEX(rt_constraints_mutex);
2505	2637
2506		-/* Must be called with tasklist_lock held */
2507	2638	static inline int tg_has_rt_tasks(struct task_group *tg)
2508	2639	{
2509		- struct task_struct g, p;
	2640	+ struct task_struct *task;
	2641	+ struct css_task_iter it;
	2642	+ int ret = 0;
2510	2643
2511	2644	/*
2512	2645	* Autogroups do not have RT tasks; see autogroup_create().
..	..	@@ -2514,12 +2647,12 @@
2514	2647	if (task_group_is_autogroup(tg))
2515	2648	return 0;
2516	2649
2517		- for_each_process_thread(g, p) {
2518		- if (rt_task(p) && task_group(p) == tg)
2519		- return 1;
2520		- }
	2650	+ css_task_iter_start(&tg->css, 0, &it);
	2651	+ while (!ret && (task = css_task_iter_next(&it)))
	2652	+ ret \|= rt_task(task);
	2653	+ css_task_iter_end(&it);
2521	2654
2522		- return 0;
	2655	+ return ret;
2523	2656	}
2524	2657
2525	2658	struct rt_schedulable_data {
..	..	@@ -2550,9 +2683,10 @@
2550	2683	return -EINVAL;
2551	2684
2552	2685	/*
2553		- * Ensure we don't starve existing RT tasks.
	2686	+ * Ensure we don't starve existing RT tasks if runtime turns zero.
2554	2687	*/
2555		- if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
	2688	+ if (rt_bandwidth_enabled() && !runtime &&
	2689	+ tg->rt_bandwidth.rt_runtime && tg_has_rt_tasks(tg))
2556	2690	return -EBUSY;
2557	2691
2558	2692	total = to_ratio(period, runtime);
..	..	@@ -2617,8 +2751,13 @@
2617	2751	if (rt_period == 0)
2618	2752	return -EINVAL;
2619	2753
	2754	+ /*
	2755	+ * Bound quota to defend quota against overflow during bandwidth shift.
	2756	+ */
	2757	+ if (rt_runtime != RUNTIME_INF && rt_runtime > max_rt_runtime)
	2758	+ return -EINVAL;
	2759	+
2620	2760	mutex_lock(&rt_constraints_mutex);
2621		- read_lock(&tasklist_lock);
2622	2761	err = __rt_schedulable(tg, rt_period, rt_runtime);
2623	2762	if (err)
2624	2763	goto unlock;
..	..	@@ -2636,7 +2775,6 @@
2636	2775	}
2637	2776	raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
2638	2777	unlock:
2639		- read_unlock(&tasklist_lock);
2640	2778	mutex_unlock(&rt_constraints_mutex);
2641	2779
2642	2780	return err;
..	..	@@ -2695,9 +2833,7 @@
2695	2833	int ret = 0;
2696	2834
2697	2835	mutex_lock(&rt_constraints_mutex);
2698		- read_lock(&tasklist_lock);
2699	2836	ret = __rt_schedulable(NULL, 0, 0);
2700		- read_unlock(&tasklist_lock);
2701	2837	mutex_unlock(&rt_constraints_mutex);
2702	2838
2703	2839	return ret;
..	..	@@ -2738,7 +2874,9 @@
2738	2874	return -EINVAL;
2739	2875
2740	2876	if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
2741		- (sysctl_sched_rt_runtime > sysctl_sched_rt_period))
	2877	+ ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) \|\|
	2878	+ ((u64)sysctl_sched_rt_runtime *
	2879	+ NSEC_PER_USEC > max_rt_runtime)))
2742	2880	return -EINVAL;
2743	2881
2744	2882	return 0;
..	..	@@ -2754,9 +2892,8 @@
2754	2892	raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
2755	2893	}
2756	2894
2757		-int sched_rt_handler(struct ctl_table *table, int write,
2758		- void __user buffer, size_t lenp,
2759		- loff_t *ppos)
	2895	+int sched_rt_handler(struct ctl_table table, int write, void buffer,
	2896	+ size_t lenp, loff_t ppos)
2760	2897	{
2761	2898	int old_period, old_runtime;
2762	2899	static DEFINE_MUTEX(mutex);
..	..	@@ -2794,9 +2931,8 @@
2794	2931	return ret;
2795	2932	}
2796	2933
2797		-int sched_rr_handler(struct ctl_table *table, int write,
2798		- void __user buffer, size_t lenp,
2799		- loff_t *ppos)
	2934	+int sched_rr_handler(struct ctl_table table, int write, void buffer,
	2935	+ size_t lenp, loff_t ppos)
2800	2936	{
2801	2937	int ret;
2802	2938	static DEFINE_MUTEX(mutex);