hc
2024-05-10 61598093bbdd283a7edc367d900f223070ead8d2
kernel/kernel/sched/rt.c
....@@ -7,8 +7,12 @@
77
88 #include "pelt.h"
99
10
+#include <trace/hooks/sched.h>
11
+
1012 int sched_rr_timeslice = RR_TIMESLICE;
1113 int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
14
+/* More than 4 hours if BW_SHIFT equals 20. */
15
+static const u64 max_rt_runtime = MAX_BW;
1216
1317 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
1418
....@@ -45,8 +49,8 @@
4549
4650 raw_spin_lock_init(&rt_b->rt_runtime_lock);
4751
48
- hrtimer_init(&rt_b->rt_period_timer,
49
- CLOCK_MONOTONIC, HRTIMER_MODE_REL);
52
+ hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC,
53
+ HRTIMER_MODE_REL_HARD);
5054 rt_b->rt_period_timer.function = sched_rt_period_timer;
5155 }
5256
....@@ -64,7 +68,8 @@
6468 * to update the period.
6569 */
6670 hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
67
- hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
71
+ hrtimer_start_expires(&rt_b->rt_period_timer,
72
+ HRTIMER_MODE_ABS_PINNED_HARD);
6873 }
6974 raw_spin_unlock(&rt_b->rt_runtime_lock);
7075 }
....@@ -434,7 +439,7 @@
434439 #endif /* CONFIG_SMP */
435440
436441 static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
437
-static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
442
+static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);
438443
439444 static inline int on_rt_rq(struct sched_rt_entity *rt_se)
440445 {
....@@ -555,7 +560,7 @@
555560 rt_se = rt_rq->tg->rt_se[cpu];
556561
557562 if (!rt_se) {
558
- dequeue_top_rt_rq(rt_rq);
563
+ dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
559564 /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
560565 cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
561566 }
....@@ -641,7 +646,7 @@
641646
642647 static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
643648 {
644
- dequeue_top_rt_rq(rt_rq);
649
+ dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
645650 }
646651
647652 static inline int rt_rq_throttled(struct rt_rq *rt_rq)
....@@ -973,6 +978,13 @@
973978 if (likely(rt_b->rt_runtime)) {
974979 rt_rq->rt_throttled = 1;
975980 printk_deferred_once("sched: RT throttling activated\n");
981
+
982
+ trace_android_vh_dump_throttled_rt_tasks(
983
+ raw_smp_processor_id(),
984
+ rq_clock(rq_of_rt_rq(rt_rq)),
985
+ sched_rt_period(rt_rq),
986
+ runtime,
987
+ hrtimer_get_expires_ns(&rt_b->rt_period_timer));
976988 } else {
977989 /*
978990 * In case we did anyway, make it go away,
....@@ -1019,6 +1031,8 @@
10191031 curr->se.exec_start = now;
10201032 cgroup_account_cputime(curr, delta_exec);
10211033
1034
+ trace_android_vh_sched_stat_runtime_rt(curr, delta_exec);
1035
+
10221036 if (!rt_bandwidth_enabled())
10231037 return;
10241038
....@@ -1040,7 +1054,7 @@
10401054 }
10411055
10421056 static void
1043
-dequeue_top_rt_rq(struct rt_rq *rt_rq)
1057
+dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count)
10441058 {
10451059 struct rq *rq = rq_of_rt_rq(rt_rq);
10461060
....@@ -1051,7 +1065,7 @@
10511065
10521066 BUG_ON(!rq->nr_running);
10531067
1054
- sub_nr_running(rq, rt_rq->rt_nr_running);
1068
+ sub_nr_running(rq, count);
10551069 rt_rq->rt_queued = 0;
10561070
10571071 }
....@@ -1330,18 +1344,21 @@
13301344 static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
13311345 {
13321346 struct sched_rt_entity *back = NULL;
1347
+ unsigned int rt_nr_running;
13331348
13341349 for_each_sched_rt_entity(rt_se) {
13351350 rt_se->back = back;
13361351 back = rt_se;
13371352 }
13381353
1339
- dequeue_top_rt_rq(rt_rq_of_se(back));
1354
+ rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
13401355
13411356 for (rt_se = back; rt_se; rt_se = rt_se->back) {
13421357 if (on_rt_rq(rt_se))
13431358 __dequeue_rt_entity(rt_se, flags);
13441359 }
1360
+
1361
+ dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
13451362 }
13461363
13471364 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
....@@ -1369,6 +1386,27 @@
13691386 enqueue_top_rt_rq(&rq->rt);
13701387 }
13711388
1389
+#ifdef CONFIG_SMP
1390
+static inline bool should_honor_rt_sync(struct rq *rq, struct task_struct *p,
1391
+ bool sync)
1392
+{
1393
+ /*
1394
+ * If the waker is CFS, then an RT sync wakeup would preempt the waker
1395
+ * and force it to run for a likely small time after the RT wakee is
1396
+ * done. So, only honor RT sync wakeups from RT wakers.
1397
+ */
1398
+ return sync && task_has_rt_policy(rq->curr) &&
1399
+ p->prio <= rq->rt.highest_prio.next &&
1400
+ rq->rt.rt_nr_running <= 2;
1401
+}
1402
+#else
1403
+static inline bool should_honor_rt_sync(struct rq *rq, struct task_struct *p,
1404
+ bool sync)
1405
+{
1406
+ return 0;
1407
+}
1408
+#endif
1409
+
13721410 /*
13731411 * Adding/removing a task to/from a priority array:
13741412 */
....@@ -1376,23 +1414,21 @@
13761414 enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
13771415 {
13781416 struct sched_rt_entity *rt_se = &p->rt;
1379
-
1380
- schedtune_enqueue_task(p, cpu_of(rq));
1417
+ bool sync = !!(flags & ENQUEUE_WAKEUP_SYNC);
13811418
13821419 if (flags & ENQUEUE_WAKEUP)
13831420 rt_se->timeout = 0;
13841421
13851422 enqueue_rt_entity(rt_se, flags);
13861423
1387
- if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
1424
+ if (!task_current(rq, p) && p->nr_cpus_allowed > 1 &&
1425
+ !should_honor_rt_sync(rq, p, sync))
13881426 enqueue_pushable_task(rq, p);
13891427 }
13901428
13911429 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
13921430 {
13931431 struct sched_rt_entity *rt_se = &p->rt;
1394
-
1395
- schedtune_dequeue_task(p, cpu_of(rq));
13961432
13971433 update_curr_rt(rq);
13981434 dequeue_rt_entity(rt_se, flags);
....@@ -1437,13 +1473,43 @@
14371473 #ifdef CONFIG_SMP
14381474 static int find_lowest_rq(struct task_struct *task);
14391475
1476
+#ifdef CONFIG_RT_SOFTINT_OPTIMIZATION
1477
+/*
1478
+ * Return whether the task on the given cpu is currently non-preemptible
1479
+ * while handling a potentially long softint, or if the task is likely
1480
+ * to block preemptions soon because it is a ksoftirq thread that is
1481
+ * handling slow softints.
1482
+ */
1483
+bool
1484
+task_may_not_preempt(struct task_struct *task, int cpu)
1485
+{
1486
+ __u32 softirqs = per_cpu(active_softirqs, cpu) |
1487
+ __IRQ_STAT(cpu, __softirq_pending);
1488
+
1489
+ struct task_struct *cpu_ksoftirqd = per_cpu(ksoftirqd, cpu);
1490
+ return ((softirqs & LONG_SOFTIRQ_MASK) &&
1491
+ (task == cpu_ksoftirqd ||
1492
+ task_thread_info(task)->preempt_count & SOFTIRQ_MASK));
1493
+}
1494
+EXPORT_SYMBOL_GPL(task_may_not_preempt);
1495
+#endif /* CONFIG_RT_SOFTINT_OPTIMIZATION */
1496
+
14401497 static int
1441
-select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags,
1442
- int sibling_count_hint)
1498
+select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
14431499 {
14441500 struct task_struct *curr;
14451501 struct rq *rq;
1502
+ struct rq *this_cpu_rq;
14461503 bool test;
1504
+ int target_cpu = -1;
1505
+ bool may_not_preempt;
1506
+ bool sync = !!(flags & WF_SYNC);
1507
+ int this_cpu;
1508
+
1509
+ trace_android_rvh_select_task_rq_rt(p, cpu, sd_flag,
1510
+ flags, &target_cpu);
1511
+ if (target_cpu >= 0)
1512
+ return target_cpu;
14471513
14481514 /* For anything but wake ups, just return the task_cpu */
14491515 if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
....@@ -1453,9 +1519,16 @@
14531519
14541520 rcu_read_lock();
14551521 curr = READ_ONCE(rq->curr); /* unlocked access */
1522
+ this_cpu = smp_processor_id();
1523
+ this_cpu_rq = cpu_rq(this_cpu);
14561524
14571525 /*
1458
- * If the current task on @p's runqueue is an RT task, then
1526
+ * If the current task on @p's runqueue is a softirq task,
1527
+ * it may run without preemption for a time that is
1528
+ * ill-suited for a waiting RT task. Therefore, try to
1529
+ * wake this RT task on another runqueue.
1530
+ *
1531
+ * Also, if the current task on @p's runqueue is an RT task, then
14591532 * try to see if we can wake this RT task up on another
14601533 * runqueue. Otherwise simply start this RT task
14611534 * on its current runqueue.
....@@ -1480,9 +1553,21 @@
14801553 * requirement of the task - which is only important on heterogeneous
14811554 * systems like big.LITTLE.
14821555 */
1483
- test = curr &&
1484
- unlikely(rt_task(curr)) &&
1485
- (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio);
1556
+ may_not_preempt = task_may_not_preempt(curr, cpu);
1557
+ test = (curr && (may_not_preempt ||
1558
+ (unlikely(rt_task(curr)) &&
1559
+ (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio))));
1560
+
1561
+ if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE))
1562
+ test |= rockchip_perf_misfit_rt(cpu);
1563
+ /*
1564
+ * Respect the sync flag as long as the task can run on this CPU.
1565
+ */
1566
+ if (should_honor_rt_sync(this_cpu_rq, p, sync) &&
1567
+ cpumask_test_cpu(this_cpu, p->cpus_ptr)) {
1568
+ cpu = this_cpu;
1569
+ goto out_unlock;
1570
+ }
14861571
14871572 if (test || !rt_task_fits_capacity(p, cpu)) {
14881573 int target = find_lowest_rq(p);
....@@ -1495,11 +1580,14 @@
14951580 goto out_unlock;
14961581
14971582 /*
1498
- * Don't bother moving it if the destination CPU is
1583
+ * If cpu is non-preemptible, prefer remote cpu
1584
+ * even if it's running a higher-prio task.
1585
+ * Otherwise: Don't bother moving it if the destination CPU is
14991586 * not running a lower priority task.
15001587 */
15011588 if (target != -1 &&
1502
- p->prio < cpu_rq(target)->rt.highest_prio.curr)
1589
+ (may_not_preempt ||
1590
+ p->prio < cpu_rq(target)->rt.highest_prio.curr))
15031591 cpu = target;
15041592 }
15051593
....@@ -1537,6 +1625,26 @@
15371625 resched_curr(rq);
15381626 }
15391627
1628
+static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
1629
+{
1630
+ if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
1631
+ int done = 0;
1632
+
1633
+ /*
1634
+ * This is OK, because current is on_cpu, which avoids it being
1635
+ * picked for load-balance and preemption/IRQs are still
1636
+ * disabled avoiding further scheduler activity on it and we've
1637
+ * not yet started the picking loop.
1638
+ */
1639
+ rq_unpin_lock(rq, rf);
1640
+ trace_android_rvh_sched_balance_rt(rq, p, &done);
1641
+ if (!done)
1642
+ pull_rt_task(rq);
1643
+ rq_repin_lock(rq, rf);
1644
+ }
1645
+
1646
+ return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq);
1647
+}
15401648 #endif /* CONFIG_SMP */
15411649
15421650 /*
....@@ -1567,8 +1675,28 @@
15671675 #endif
15681676 }
15691677
1570
-static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
1571
- struct rt_rq *rt_rq)
1678
+static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first)
1679
+{
1680
+ p->se.exec_start = rq_clock_task(rq);
1681
+
1682
+ /* The running task is never eligible for pushing */
1683
+ dequeue_pushable_task(rq, p);
1684
+
1685
+ if (!first)
1686
+ return;
1687
+
1688
+ /*
1689
+ * If prev task was rt, put_prev_task() has already updated the
1690
+ * utilization. We only care of the case where we start to schedule a
1691
+ * rt task
1692
+ */
1693
+ if (rq->curr->sched_class != &rt_sched_class)
1694
+ update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
1695
+
1696
+ rt_queue_push_tasks(rq);
1697
+}
1698
+
1699
+static struct sched_rt_entity *pick_next_rt_entity(struct rt_rq *rt_rq)
15721700 {
15731701 struct rt_prio_array *array = &rt_rq->active;
15741702 struct sched_rt_entity *next = NULL;
....@@ -1579,6 +1707,8 @@
15791707 BUG_ON(idx >= MAX_RT_PRIO);
15801708
15811709 queue = array->queue + idx;
1710
+ if (SCHED_WARN_ON(list_empty(queue)))
1711
+ return NULL;
15821712 next = list_entry(queue->next, struct sched_rt_entity, run_list);
15831713
15841714 return next;
....@@ -1587,74 +1717,27 @@
15871717 static struct task_struct *_pick_next_task_rt(struct rq *rq)
15881718 {
15891719 struct sched_rt_entity *rt_se;
1590
- struct task_struct *p;
15911720 struct rt_rq *rt_rq = &rq->rt;
15921721
15931722 do {
1594
- rt_se = pick_next_rt_entity(rq, rt_rq);
1595
- BUG_ON(!rt_se);
1723
+ rt_se = pick_next_rt_entity(rt_rq);
1724
+ if (unlikely(!rt_se))
1725
+ return NULL;
15961726 rt_rq = group_rt_rq(rt_se);
15971727 } while (rt_rq);
15981728
1599
- p = rt_task_of(rt_se);
1600
- p->se.exec_start = rq_clock_task(rq);
1601
-
1602
- return p;
1729
+ return rt_task_of(rt_se);
16031730 }
16041731
1605
-static struct task_struct *
1606
-pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
1732
+static struct task_struct *pick_next_task_rt(struct rq *rq)
16071733 {
16081734 struct task_struct *p;
1609
- struct rt_rq *rt_rq = &rq->rt;
16101735
1611
- if (need_pull_rt_task(rq, prev)) {
1612
- /*
1613
- * This is OK, because current is on_cpu, which avoids it being
1614
- * picked for load-balance and preemption/IRQs are still
1615
- * disabled avoiding further scheduler activity on it and we're
1616
- * being very careful to re-start the picking loop.
1617
- */
1618
- rq_unpin_lock(rq, rf);
1619
- pull_rt_task(rq);
1620
- rq_repin_lock(rq, rf);
1621
- /*
1622
- * pull_rt_task() can drop (and re-acquire) rq->lock; this
1623
- * means a dl or stop task can slip in, in which case we need
1624
- * to re-start task selection.
1625
- */
1626
- if (unlikely((rq->stop && task_on_rq_queued(rq->stop)) ||
1627
- rq->dl.dl_nr_running))
1628
- return RETRY_TASK;
1629
- }
1630
-
1631
- /*
1632
- * We may dequeue prev's rt_rq in put_prev_task().
1633
- * So, we update time before rt_nr_running check.
1634
- */
1635
- if (prev->sched_class == &rt_sched_class)
1636
- update_curr_rt(rq);
1637
-
1638
- if (!rt_rq->rt_queued)
1736
+ if (!sched_rt_runnable(rq))
16391737 return NULL;
16401738
1641
- put_prev_task(rq, prev);
1642
-
16431739 p = _pick_next_task_rt(rq);
1644
-
1645
- /* The running task is never eligible for pushing */
1646
- dequeue_pushable_task(rq, p);
1647
-
1648
- rt_queue_push_tasks(rq);
1649
-
1650
- /*
1651
- * If prev task was rt, put_prev_task() has already updated the
1652
- * utilization. We only care of the case where we start to schedule a
1653
- * rt task
1654
- */
1655
- if (rq->curr->sched_class != &rt_sched_class)
1656
- update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
1657
-
1740
+ set_next_task_rt(rq, p, true);
16581741 return p;
16591742 }
16601743
....@@ -1680,7 +1763,7 @@
16801763 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
16811764 {
16821765 if (!task_running(rq, p) &&
1683
- cpumask_test_cpu(cpu, &p->cpus_allowed))
1766
+ cpumask_test_cpu(cpu, p->cpus_ptr))
16841767 return 1;
16851768
16861769 return 0;
....@@ -1690,7 +1773,7 @@
16901773 * Return the highest pushable rq's task, which is suitable to be executed
16911774 * on the CPU, NULL otherwise
16921775 */
1693
-static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
1776
+struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
16941777 {
16951778 struct plist_head *head = &rq->rt.pushable_tasks;
16961779 struct task_struct *p;
....@@ -1705,6 +1788,7 @@
17051788
17061789 return NULL;
17071790 }
1791
+EXPORT_SYMBOL_GPL(pick_highest_pushable_task);
17081792
17091793 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
17101794
....@@ -1713,7 +1797,7 @@
17131797 struct sched_domain *sd;
17141798 struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
17151799 int this_cpu = smp_processor_id();
1716
- int cpu = task_cpu(task);
1800
+ int cpu = -1;
17171801 int ret;
17181802
17191803 /* Make sure the mask is initialized first */
....@@ -1738,9 +1822,17 @@
17381822 task, lowest_mask);
17391823 }
17401824
1825
+ trace_android_rvh_find_lowest_rq(task, lowest_mask, ret, &cpu);
1826
+ if (cpu >= 0)
1827
+ return cpu;
1828
+
17411829 if (!ret)
17421830 return -1; /* No targets found */
17431831
1832
+ cpu = task_cpu(task);
1833
+
1834
+ if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE))
1835
+ cpu = rockchip_perf_select_rt_cpu(cpu, lowest_mask);
17441836 /*
17451837 * At this point we have built a mask of CPUs representing the
17461838 * lowest priority tasks in the system. Now we want to elect
....@@ -1833,7 +1925,7 @@
18331925 * Also make sure that it wasn't scheduled on its rq.
18341926 */
18351927 if (unlikely(task_rq(task) != rq ||
1836
- !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) ||
1928
+ !cpumask_test_cpu(lowest_rq->cpu, task->cpus_ptr) ||
18371929 task_running(rq, task) ||
18381930 !rt_task(task) ||
18391931 !task_on_rq_queued(task))) {
....@@ -1895,10 +1987,8 @@
18951987 return 0;
18961988
18971989 retry:
1898
- if (unlikely(next_task == rq->curr)) {
1899
- WARN_ON(1);
1990
+ if (WARN_ON(next_task == rq->curr))
19001991 return 0;
1901
- }
19021992
19031993 /*
19041994 * It's possible that the next_task slipped in of
....@@ -2315,13 +2405,20 @@
23152405 static void switched_to_rt(struct rq *rq, struct task_struct *p)
23162406 {
23172407 /*
2318
- * If we are already running, then there's nothing
2319
- * that needs to be done. But if we are not running
2320
- * we may need to preempt the current running task.
2321
- * If that current running task is also an RT task
2408
+ * If we are running, update the avg_rt tracking, as the running time
2409
+ * will now on be accounted into the latter.
2410
+ */
2411
+ if (task_current(rq, p)) {
2412
+ update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
2413
+ return;
2414
+ }
2415
+
2416
+ /*
2417
+ * If we are not running we may need to preempt the current
2418
+ * running task. If that current running task is also an RT task
23222419 * then see if we can move to another run queue.
23232420 */
2324
- if (task_on_rq_queued(p) && rq->curr != p) {
2421
+ if (task_on_rq_queued(p)) {
23252422 #ifdef CONFIG_SMP
23262423 if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
23272424 rt_queue_push_tasks(rq);
....@@ -2390,8 +2487,10 @@
23902487 }
23912488
23922489 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
2393
- if (p->rt.timeout > next)
2394
- p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
2490
+ if (p->rt.timeout > next) {
2491
+ posix_cputimers_rt_watchdog(&p->posix_cputimers,
2492
+ p->se.sum_exec_runtime);
2493
+ }
23952494 }
23962495 }
23972496 #else
....@@ -2440,16 +2539,6 @@
24402539 }
24412540 }
24422541
2443
-static void set_curr_task_rt(struct rq *rq)
2444
-{
2445
- struct task_struct *p = rq->curr;
2446
-
2447
- p->se.exec_start = rq_clock_task(rq);
2448
-
2449
- /* The running task is never eligible for pushing */
2450
- dequeue_pushable_task(rq, p);
2451
-}
2452
-
24532542 static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
24542543 {
24552544 /*
....@@ -2461,8 +2550,8 @@
24612550 return 0;
24622551 }
24632552
2464
-const struct sched_class rt_sched_class = {
2465
- .next = &fair_sched_class,
2553
+const struct sched_class rt_sched_class
2554
+ __section("__rt_sched_class") = {
24662555 .enqueue_task = enqueue_task_rt,
24672556 .dequeue_task = dequeue_task_rt,
24682557 .yield_task = yield_task_rt,
....@@ -2471,10 +2560,11 @@
24712560
24722561 .pick_next_task = pick_next_task_rt,
24732562 .put_prev_task = put_prev_task_rt,
2563
+ .set_next_task = set_next_task_rt,
24742564
24752565 #ifdef CONFIG_SMP
2566
+ .balance = balance_rt,
24762567 .select_task_rq = select_task_rq_rt,
2477
-
24782568 .set_cpus_allowed = set_cpus_allowed_common,
24792569 .rq_online = rq_online_rt,
24802570 .rq_offline = rq_offline_rt,
....@@ -2482,7 +2572,6 @@
24822572 .switched_from = switched_from_rt,
24832573 #endif
24842574
2485
- .set_curr_task = set_curr_task_rt,
24862575 .task_tick = task_tick_rt,
24872576
24882577 .get_rr_interval = get_rr_interval_rt,
....@@ -2503,10 +2592,11 @@
25032592 */
25042593 static DEFINE_MUTEX(rt_constraints_mutex);
25052594
2506
-/* Must be called with tasklist_lock held */
25072595 static inline int tg_has_rt_tasks(struct task_group *tg)
25082596 {
2509
- struct task_struct *g, *p;
2597
+ struct task_struct *task;
2598
+ struct css_task_iter it;
2599
+ int ret = 0;
25102600
25112601 /*
25122602 * Autogroups do not have RT tasks; see autogroup_create().
....@@ -2514,12 +2604,12 @@
25142604 if (task_group_is_autogroup(tg))
25152605 return 0;
25162606
2517
- for_each_process_thread(g, p) {
2518
- if (rt_task(p) && task_group(p) == tg)
2519
- return 1;
2520
- }
2607
+ css_task_iter_start(&tg->css, 0, &it);
2608
+ while (!ret && (task = css_task_iter_next(&it)))
2609
+ ret |= rt_task(task);
2610
+ css_task_iter_end(&it);
25212611
2522
- return 0;
2612
+ return ret;
25232613 }
25242614
25252615 struct rt_schedulable_data {
....@@ -2550,9 +2640,10 @@
25502640 return -EINVAL;
25512641
25522642 /*
2553
- * Ensure we don't starve existing RT tasks.
2643
+ * Ensure we don't starve existing RT tasks if runtime turns zero.
25542644 */
2555
- if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
2645
+ if (rt_bandwidth_enabled() && !runtime &&
2646
+ tg->rt_bandwidth.rt_runtime && tg_has_rt_tasks(tg))
25562647 return -EBUSY;
25572648
25582649 total = to_ratio(period, runtime);
....@@ -2617,8 +2708,13 @@
26172708 if (rt_period == 0)
26182709 return -EINVAL;
26192710
2711
+ /*
2712
+ * Bound quota to defend quota against overflow during bandwidth shift.
2713
+ */
2714
+ if (rt_runtime != RUNTIME_INF && rt_runtime > max_rt_runtime)
2715
+ return -EINVAL;
2716
+
26202717 mutex_lock(&rt_constraints_mutex);
2621
- read_lock(&tasklist_lock);
26222718 err = __rt_schedulable(tg, rt_period, rt_runtime);
26232719 if (err)
26242720 goto unlock;
....@@ -2636,7 +2732,6 @@
26362732 }
26372733 raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
26382734 unlock:
2639
- read_unlock(&tasklist_lock);
26402735 mutex_unlock(&rt_constraints_mutex);
26412736
26422737 return err;
....@@ -2695,9 +2790,7 @@
26952790 int ret = 0;
26962791
26972792 mutex_lock(&rt_constraints_mutex);
2698
- read_lock(&tasklist_lock);
26992793 ret = __rt_schedulable(NULL, 0, 0);
2700
- read_unlock(&tasklist_lock);
27012794 mutex_unlock(&rt_constraints_mutex);
27022795
27032796 return ret;
....@@ -2738,7 +2831,9 @@
27382831 return -EINVAL;
27392832
27402833 if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
2741
- (sysctl_sched_rt_runtime > sysctl_sched_rt_period))
2834
+ ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) ||
2835
+ ((u64)sysctl_sched_rt_runtime *
2836
+ NSEC_PER_USEC > max_rt_runtime)))
27422837 return -EINVAL;
27432838
27442839 return 0;
....@@ -2754,9 +2849,8 @@
27542849 raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
27552850 }
27562851
2757
-int sched_rt_handler(struct ctl_table *table, int write,
2758
- void __user *buffer, size_t *lenp,
2759
- loff_t *ppos)
2852
+int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
2853
+ size_t *lenp, loff_t *ppos)
27602854 {
27612855 int old_period, old_runtime;
27622856 static DEFINE_MUTEX(mutex);
....@@ -2794,9 +2888,8 @@
27942888 return ret;
27952889 }
27962890
2797
-int sched_rr_handler(struct ctl_table *table, int write,
2798
- void __user *buffer, size_t *lenp,
2799
- loff_t *ppos)
2891
+int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
2892
+ size_t *lenp, loff_t *ppos)
28002893 {
28012894 int ret;
28022895 static DEFINE_MUTEX(mutex);