hc
2023-12-09 95099d4622f8cb224d94e314c7a8e0df60b13f87
kernel/kernel/sched/deadline.c
....@@ -43,6 +43,28 @@
4343 return !RB_EMPTY_NODE(&dl_se->rb_node);
4444 }
4545
46
+#ifdef CONFIG_RT_MUTEXES
47
+static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
48
+{
49
+ return dl_se->pi_se;
50
+}
51
+
52
+static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
53
+{
54
+ return pi_of(dl_se) != dl_se;
55
+}
56
+#else
57
+static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
58
+{
59
+ return dl_se;
60
+}
61
+
62
+static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
63
+{
64
+ return false;
65
+}
66
+#endif
67
+
4668 #ifdef CONFIG_SMP
4769 static inline struct dl_bw *dl_bw_of(int i)
4870 {
....@@ -54,14 +76,48 @@
5476 static inline int dl_bw_cpus(int i)
5577 {
5678 struct root_domain *rd = cpu_rq(i)->rd;
57
- int cpus = 0;
79
+ int cpus;
5880
5981 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
6082 "sched RCU must be held");
83
+
84
+ if (cpumask_subset(rd->span, cpu_active_mask))
85
+ return cpumask_weight(rd->span);
86
+
87
+ cpus = 0;
88
+
6189 for_each_cpu_and(i, rd->span, cpu_active_mask)
6290 cpus++;
6391
6492 return cpus;
93
+}
94
+
95
+static inline unsigned long __dl_bw_capacity(int i)
96
+{
97
+ struct root_domain *rd = cpu_rq(i)->rd;
98
+ unsigned long cap = 0;
99
+
100
+ RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
101
+ "sched RCU must be held");
102
+
103
+ for_each_cpu_and(i, rd->span, cpu_active_mask)
104
+ cap += capacity_orig_of(i);
105
+
106
+ return cap;
107
+}
108
+
109
+/*
110
+ * XXX Fix: If 'rq->rd == def_root_domain' perform AC against capacity
111
+ * of the CPU the task is running on rather rd's \Sum CPU capacity.
112
+ */
113
+static inline unsigned long dl_bw_capacity(int i)
114
+{
115
+ if (!static_branch_unlikely(&sched_asym_cpucapacity) &&
116
+ capacity_orig_of(i) == SCHED_CAPACITY_SCALE) {
117
+ return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
118
+ } else {
119
+ return __dl_bw_capacity(i);
120
+ }
65121 }
66122 #else
67123 static inline struct dl_bw *dl_bw_of(int i)
....@@ -72,6 +128,11 @@
72128 static inline int dl_bw_cpus(int i)
73129 {
74130 return 1;
131
+}
132
+
133
+static inline unsigned long dl_bw_capacity(int i)
134
+{
135
+ return SCHED_CAPACITY_SCALE;
75136 }
76137 #endif
77138
....@@ -153,7 +214,7 @@
153214 __sub_running_bw(dl_se->dl_bw, dl_rq);
154215 }
155216
156
-void dl_change_utilization(struct task_struct *p, u64 new_bw)
217
+static void dl_change_utilization(struct task_struct *p, u64 new_bw)
157218 {
158219 struct rq *rq;
159220
....@@ -334,6 +395,8 @@
334395 return dl_rq->root.rb_leftmost == &dl_se->rb_node;
335396 }
336397
398
+static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
399
+
337400 void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
338401 {
339402 raw_spin_lock_init(&dl_b->dl_runtime_lock);
....@@ -502,7 +565,7 @@
502565
503566 static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
504567 {
505
- return dl_task(prev);
568
+ return rq->online && dl_task(prev);
506569 }
507570
508571 static DEFINE_PER_CPU(struct callback_head, dl_push_head);
....@@ -657,7 +720,7 @@
657720 struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
658721 struct rq *rq = rq_of_dl_rq(dl_rq);
659722
660
- WARN_ON(dl_se->dl_boosted);
723
+ WARN_ON(is_dl_boosted(dl_se));
661724 WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
662725
663726 /*
....@@ -695,21 +758,20 @@
695758 * could happen are, typically, a entity voluntarily trying to overcome its
696759 * runtime, or it just underestimated it during sched_setattr().
697760 */
698
-static void replenish_dl_entity(struct sched_dl_entity *dl_se,
699
- struct sched_dl_entity *pi_se)
761
+static void replenish_dl_entity(struct sched_dl_entity *dl_se)
700762 {
701763 struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
702764 struct rq *rq = rq_of_dl_rq(dl_rq);
703765
704
- BUG_ON(pi_se->dl_runtime <= 0);
766
+ BUG_ON(pi_of(dl_se)->dl_runtime <= 0);
705767
706768 /*
707769 * This could be the case for a !-dl task that is boosted.
708770 * Just go with full inherited parameters.
709771 */
710772 if (dl_se->dl_deadline == 0) {
711
- dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
712
- dl_se->runtime = pi_se->dl_runtime;
773
+ dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
774
+ dl_se->runtime = pi_of(dl_se)->dl_runtime;
713775 }
714776
715777 if (dl_se->dl_yielded && dl_se->runtime > 0)
....@@ -722,8 +784,8 @@
722784 * arbitrary large.
723785 */
724786 while (dl_se->runtime <= 0) {
725
- dl_se->deadline += pi_se->dl_period;
726
- dl_se->runtime += pi_se->dl_runtime;
787
+ dl_se->deadline += pi_of(dl_se)->dl_period;
788
+ dl_se->runtime += pi_of(dl_se)->dl_runtime;
727789 }
728790
729791 /*
....@@ -737,8 +799,8 @@
737799 */
738800 if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
739801 printk_deferred_once("sched: DL replenish lagged too much\n");
740
- dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
741
- dl_se->runtime = pi_se->dl_runtime;
802
+ dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
803
+ dl_se->runtime = pi_of(dl_se)->dl_runtime;
742804 }
743805
744806 if (dl_se->dl_yielded)
....@@ -759,7 +821,7 @@
759821 * refill the runtime and set the deadline a period in the future,
760822 * because keeping the current (absolute) deadline of the task would
761823 * result in breaking guarantees promised to other tasks (refer to
762
- * Documentation/scheduler/sched-deadline.txt for more informations).
824
+ * Documentation/scheduler/sched-deadline.rst for more information).
763825 *
764826 * This function returns true if:
765827 *
....@@ -771,8 +833,7 @@
771833 * task with deadline equal to period this is the same of using
772834 * dl_period instead of dl_deadline in the equation above.
773835 */
774
-static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
775
- struct sched_dl_entity *pi_se, u64 t)
836
+static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
776837 {
777838 u64 left, right;
778839
....@@ -794,9 +855,9 @@
794855 * of anything below microseconds resolution is actually fiction
795856 * (but still we want to give the user that illusion >;).
796857 */
797
- left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
858
+ left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
798859 right = ((dl_se->deadline - t) >> DL_SCALE) *
799
- (pi_se->dl_runtime >> DL_SCALE);
860
+ (pi_of(dl_se)->dl_runtime >> DL_SCALE);
800861
801862 return dl_time_before(right, left);
802863 }
....@@ -881,24 +942,23 @@
881942 * Please refer to the comments update_dl_revised_wakeup() function to find
882943 * more about the Revised CBS rule.
883944 */
884
-static void update_dl_entity(struct sched_dl_entity *dl_se,
885
- struct sched_dl_entity *pi_se)
945
+static void update_dl_entity(struct sched_dl_entity *dl_se)
886946 {
887947 struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
888948 struct rq *rq = rq_of_dl_rq(dl_rq);
889949
890950 if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
891
- dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
951
+ dl_entity_overflow(dl_se, rq_clock(rq))) {
892952
893953 if (unlikely(!dl_is_implicit(dl_se) &&
894954 !dl_time_before(dl_se->deadline, rq_clock(rq)) &&
895
- !dl_se->dl_boosted)){
955
+ !is_dl_boosted(dl_se))) {
896956 update_dl_revised_wakeup(dl_se, rq);
897957 return;
898958 }
899959
900
- dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
901
- dl_se->runtime = pi_se->dl_runtime;
960
+ dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
961
+ dl_se->runtime = pi_of(dl_se)->dl_runtime;
902962 }
903963 }
904964
....@@ -956,7 +1016,7 @@
9561016 */
9571017 if (!hrtimer_is_queued(timer)) {
9581018 get_task_struct(p);
959
- hrtimer_start(timer, act, HRTIMER_MODE_ABS);
1019
+ hrtimer_start(timer, act, HRTIMER_MODE_ABS_HARD);
9601020 }
9611021
9621022 return 1;
....@@ -997,7 +1057,7 @@
9971057 * The task might have been boosted by someone else and might be in the
9981058 * boosting/deboosting path, its not throttled.
9991059 */
1000
- if (dl_se->dl_boosted)
1060
+ if (is_dl_boosted(dl_se))
10011061 goto unlock;
10021062
10031063 /*
....@@ -1025,7 +1085,7 @@
10251085 * but do not enqueue -- wait for our wakeup to do that.
10261086 */
10271087 if (!task_on_rq_queued(p)) {
1028
- replenish_dl_entity(dl_se, dl_se);
1088
+ replenish_dl_entity(dl_se);
10291089 goto unlock;
10301090 }
10311091
....@@ -1096,7 +1156,7 @@
10961156 * cannot use the runtime, and so it replenishes the task. This rule
10971157 * works fine for implicit deadline tasks (deadline == period), and the
10981158 * CBS was designed for implicit deadline tasks. However, a task with
1099
- * constrained deadline (deadine < period) might be awakened after the
1159
+ * constrained deadline (deadline < period) might be awakened after the
11001160 * deadline, but before the next period. In this case, replenishing the
11011161 * task would allow it to run for runtime / deadline. As in this case
11021162 * deadline < period, CBS enables a task to run for more than the
....@@ -1115,7 +1175,7 @@
11151175
11161176 if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
11171177 dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
1118
- if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
1178
+ if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(p)))
11191179 return;
11201180 dl_se->dl_throttled = 1;
11211181 if (dl_se->runtime > 0)
....@@ -1228,7 +1288,7 @@
12281288 &curr->dl);
12291289 } else {
12301290 unsigned long scale_freq = arch_scale_freq_capacity(cpu);
1231
- unsigned long scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
1291
+ unsigned long scale_cpu = arch_scale_cpu_capacity(cpu);
12321292
12331293 scaled_delta_exec = cap_scale(delta_exec, scale_freq);
12341294 scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
....@@ -1246,7 +1306,7 @@
12461306 dl_se->dl_overrun = 1;
12471307
12481308 __dequeue_task_dl(rq, curr, 0);
1249
- if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
1309
+ if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(curr)))
12501310 enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
12511311
12521312 if (!is_leftmost(curr, &rq->dl))
....@@ -1440,8 +1500,7 @@
14401500 }
14411501
14421502 static void
1443
-enqueue_dl_entity(struct sched_dl_entity *dl_se,
1444
- struct sched_dl_entity *pi_se, int flags)
1503
+enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
14451504 {
14461505 BUG_ON(on_dl_rq(dl_se));
14471506
....@@ -1452,9 +1511,9 @@
14521511 */
14531512 if (flags & ENQUEUE_WAKEUP) {
14541513 task_contending(dl_se, flags);
1455
- update_dl_entity(dl_se, pi_se);
1514
+ update_dl_entity(dl_se);
14561515 } else if (flags & ENQUEUE_REPLENISH) {
1457
- replenish_dl_entity(dl_se, pi_se);
1516
+ replenish_dl_entity(dl_se);
14581517 } else if ((flags & ENQUEUE_RESTORE) &&
14591518 dl_time_before(dl_se->deadline,
14601519 rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
....@@ -1471,28 +1530,43 @@
14711530
14721531 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
14731532 {
1474
- struct task_struct *pi_task = rt_mutex_get_top_task(p);
1475
- struct sched_dl_entity *pi_se = &p->dl;
1476
-
1477
- /*
1478
- * Use the scheduling parameters of the top pi-waiter task if:
1479
- * - we have a top pi-waiter which is a SCHED_DEADLINE task AND
1480
- * - our dl_boosted is set (i.e. the pi-waiter's (absolute) deadline is
1481
- * smaller than our deadline OR we are a !SCHED_DEADLINE task getting
1482
- * boosted due to a SCHED_DEADLINE pi-waiter).
1483
- * Otherwise we keep our runtime and deadline.
1484
- */
1485
- if (pi_task && dl_prio(pi_task->normal_prio) && p->dl.dl_boosted) {
1486
- pi_se = &pi_task->dl;
1533
+ if (is_dl_boosted(&p->dl)) {
1534
+ /*
1535
+ * Because of delays in the detection of the overrun of a
1536
+ * thread's runtime, it might be the case that a thread
1537
+ * goes to sleep in a rt mutex with negative runtime. As
1538
+ * a consequence, the thread will be throttled.
1539
+ *
1540
+ * While waiting for the mutex, this thread can also be
1541
+ * boosted via PI, resulting in a thread that is throttled
1542
+ * and boosted at the same time.
1543
+ *
1544
+ * In this case, the boost overrides the throttle.
1545
+ */
1546
+ if (p->dl.dl_throttled) {
1547
+ /*
1548
+ * The replenish timer needs to be canceled. No
1549
+ * problem if it fires concurrently: boosted threads
1550
+ * are ignored in dl_task_timer().
1551
+ */
1552
+ hrtimer_try_to_cancel(&p->dl.dl_timer);
1553
+ p->dl.dl_throttled = 0;
1554
+ }
14871555 } else if (!dl_prio(p->normal_prio)) {
14881556 /*
1489
- * Special case in which we have a !SCHED_DEADLINE task
1490
- * that is going to be deboosted, but exceeds its
1491
- * runtime while doing so. No point in replenishing
1492
- * it, as it's going to return back to its original
1493
- * scheduling class after this.
1557
+ * Special case in which we have a !SCHED_DEADLINE task that is going
1558
+ * to be deboosted, but exceeds its runtime while doing so. No point in
1559
+ * replenishing it, as it's going to return back to its original
1560
+ * scheduling class after this. If it has been throttled, we need to
1561
+ * clear the flag, otherwise the task may wake up as throttled after
1562
+ * being boosted again with no means to replenish the runtime and clear
1563
+ * the throttle.
14941564 */
1495
- BUG_ON(!p->dl.dl_boosted || flags != ENQUEUE_REPLENISH);
1565
+ p->dl.dl_throttled = 0;
1566
+ if (!(flags & ENQUEUE_REPLENISH))
1567
+ printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n",
1568
+ task_pid_nr(p));
1569
+
14961570 return;
14971571 }
14981572
....@@ -1529,7 +1603,7 @@
15291603 return;
15301604 }
15311605
1532
- enqueue_dl_entity(&p->dl, pi_se, flags);
1606
+ enqueue_dl_entity(&p->dl, flags);
15331607
15341608 if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
15351609 enqueue_pushable_dl_task(rq, p);
....@@ -1599,10 +1673,10 @@
15991673 static int find_later_rq(struct task_struct *task);
16001674
16011675 static int
1602
-select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags,
1603
- int sibling_count_hint)
1676
+select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
16041677 {
16051678 struct task_struct *curr;
1679
+ bool select_rq;
16061680 struct rq *rq;
16071681
16081682 if (sd_flag != SD_BALANCE_WAKE)
....@@ -1622,10 +1696,19 @@
16221696 * other hand, if it has a shorter deadline, we
16231697 * try to make it stay here, it might be important.
16241698 */
1625
- if (unlikely(dl_task(curr)) &&
1626
- (curr->nr_cpus_allowed < 2 ||
1627
- !dl_entity_preempt(&p->dl, &curr->dl)) &&
1628
- (p->nr_cpus_allowed > 1)) {
1699
+ select_rq = unlikely(dl_task(curr)) &&
1700
+ (curr->nr_cpus_allowed < 2 ||
1701
+ !dl_entity_preempt(&p->dl, &curr->dl)) &&
1702
+ p->nr_cpus_allowed > 1;
1703
+
1704
+ /*
1705
+ * Take the capacity of the CPU into account to
1706
+ * ensure it fits the requirement of the task.
1707
+ */
1708
+ if (static_branch_unlikely(&sched_asym_cpucapacity))
1709
+ select_rq |= !dl_task_fits_capacity(p, cpu);
1710
+
1711
+ if (select_rq) {
16291712 int target = find_later_rq(p);
16301713
16311714 if (target != -1 &&
....@@ -1693,6 +1776,22 @@
16931776 resched_curr(rq);
16941777 }
16951778
1779
+static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
1780
+{
1781
+ if (!on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) {
1782
+ /*
1783
+ * This is OK, because current is on_cpu, which avoids it being
1784
+ * picked for load-balance and preemption/IRQs are still
1785
+ * disabled avoiding further scheduler activity on it and we've
1786
+ * not yet started the picking loop.
1787
+ */
1788
+ rq_unpin_lock(rq, rf);
1789
+ pull_dl_task(rq);
1790
+ rq_repin_lock(rq, rf);
1791
+ }
1792
+
1793
+ return sched_stop_runnable(rq) || sched_dl_runnable(rq);
1794
+}
16961795 #endif /* CONFIG_SMP */
16971796
16981797 /*
....@@ -1729,6 +1828,25 @@
17291828 }
17301829 #endif
17311830
1831
+static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
1832
+{
1833
+ p->se.exec_start = rq_clock_task(rq);
1834
+
1835
+ /* You can't push away the running task */
1836
+ dequeue_pushable_dl_task(rq, p);
1837
+
1838
+ if (!first)
1839
+ return;
1840
+
1841
+ if (hrtick_enabled(rq))
1842
+ start_hrtick_dl(rq, p);
1843
+
1844
+ if (rq->curr->sched_class != &dl_sched_class)
1845
+ update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
1846
+
1847
+ deadline_queue_push_tasks(rq);
1848
+}
1849
+
17321850 static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
17331851 struct dl_rq *dl_rq)
17341852 {
....@@ -1740,63 +1858,19 @@
17401858 return rb_entry(left, struct sched_dl_entity, rb_node);
17411859 }
17421860
1743
-static struct task_struct *
1744
-pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
1861
+static struct task_struct *pick_next_task_dl(struct rq *rq)
17451862 {
17461863 struct sched_dl_entity *dl_se;
1864
+ struct dl_rq *dl_rq = &rq->dl;
17471865 struct task_struct *p;
1748
- struct dl_rq *dl_rq;
17491866
1750
- dl_rq = &rq->dl;
1751
-
1752
- if (need_pull_dl_task(rq, prev)) {
1753
- /*
1754
- * This is OK, because current is on_cpu, which avoids it being
1755
- * picked for load-balance and preemption/IRQs are still
1756
- * disabled avoiding further scheduler activity on it and we're
1757
- * being very careful to re-start the picking loop.
1758
- */
1759
- rq_unpin_lock(rq, rf);
1760
- pull_dl_task(rq);
1761
- rq_repin_lock(rq, rf);
1762
- /*
1763
- * pull_dl_task() can drop (and re-acquire) rq->lock; this
1764
- * means a stop task can slip in, in which case we need to
1765
- * re-start task selection.
1766
- */
1767
- if (rq->stop && task_on_rq_queued(rq->stop))
1768
- return RETRY_TASK;
1769
- }
1770
-
1771
- /*
1772
- * When prev is DL, we may throttle it in put_prev_task().
1773
- * So, we update time before we check for dl_nr_running.
1774
- */
1775
- if (prev->sched_class == &dl_sched_class)
1776
- update_curr_dl(rq);
1777
-
1778
- if (unlikely(!dl_rq->dl_nr_running))
1867
+ if (!sched_dl_runnable(rq))
17791868 return NULL;
1780
-
1781
- put_prev_task(rq, prev);
17821869
17831870 dl_se = pick_next_dl_entity(rq, dl_rq);
17841871 BUG_ON(!dl_se);
1785
-
17861872 p = dl_task_of(dl_se);
1787
- p->se.exec_start = rq_clock_task(rq);
1788
-
1789
- /* Running task will never be pushed. */
1790
- dequeue_pushable_dl_task(rq, p);
1791
-
1792
- if (hrtick_enabled(rq))
1793
- start_hrtick_dl(rq, p);
1794
-
1795
- deadline_queue_push_tasks(rq);
1796
-
1797
- if (rq->curr->sched_class != &dl_sched_class)
1798
- update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
1799
-
1873
+ set_next_task_dl(rq, p, true);
18001874 return p;
18011875 }
18021876
....@@ -1840,16 +1914,6 @@
18401914 */
18411915 }
18421916
1843
-static void set_curr_task_dl(struct rq *rq)
1844
-{
1845
- struct task_struct *p = rq->curr;
1846
-
1847
- p->se.exec_start = rq_clock_task(rq);
1848
-
1849
- /* You can't push away the running task */
1850
- dequeue_pushable_dl_task(rq, p);
1851
-}
1852
-
18531917 #ifdef CONFIG_SMP
18541918
18551919 /* Only try algorithms three times */
....@@ -1858,7 +1922,7 @@
18581922 static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
18591923 {
18601924 if (!task_running(rq, p) &&
1861
- cpumask_test_cpu(cpu, p->cpus_ptr))
1925
+ cpumask_test_cpu(cpu, &p->cpus_mask))
18621926 return 1;
18631927 return 0;
18641928 }
....@@ -1948,8 +2012,8 @@
19482012 return this_cpu;
19492013 }
19502014
1951
- best_cpu = cpumask_first_and(later_mask,
1952
- sched_domain_span(sd));
2015
+ best_cpu = cpumask_any_and_distribute(later_mask,
2016
+ sched_domain_span(sd));
19532017 /*
19542018 * Last chance: if a CPU being in both later_mask
19552019 * and current sd span is valid, that becomes our
....@@ -1971,7 +2035,7 @@
19712035 if (this_cpu != -1)
19722036 return this_cpu;
19732037
1974
- cpu = cpumask_any(later_mask);
2038
+ cpu = cpumask_any_distribute(later_mask);
19752039 if (cpu < nr_cpu_ids)
19762040 return cpu;
19772041
....@@ -2008,7 +2072,7 @@
20082072 /* Retry if something changed. */
20092073 if (double_lock_balance(rq, later_rq)) {
20102074 if (unlikely(task_rq(task) != rq ||
2011
- !cpumask_test_cpu(later_rq->cpu, task->cpus_ptr) ||
2075
+ !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
20122076 task_running(rq, task) ||
20132077 !dl_task(task) ||
20142078 !task_on_rq_queued(task))) {
....@@ -2075,10 +2139,11 @@
20752139 return 0;
20762140
20772141 retry:
2078
- if (unlikely(next_task == rq->curr)) {
2079
- WARN_ON(1);
2142
+ if (is_migration_disabled(next_task))
20802143 return 0;
2081
- }
2144
+
2145
+ if (WARN_ON(next_task == rq->curr))
2146
+ return 0;
20822147
20832148 /*
20842149 * If next_task preempts rq->curr, and rq->curr
....@@ -2124,17 +2189,13 @@
21242189 }
21252190
21262191 deactivate_task(rq, next_task, 0);
2127
- sub_running_bw(&next_task->dl, &rq->dl);
2128
- sub_rq_bw(&next_task->dl, &rq->dl);
21292192 set_task_cpu(next_task, later_rq->cpu);
2130
- add_rq_bw(&next_task->dl, &later_rq->dl);
21312193
21322194 /*
21332195 * Update the later_rq clock here, because the clock is used
21342196 * by the cpufreq_update_util() inside __add_running_bw().
21352197 */
21362198 update_rq_clock(later_rq);
2137
- add_running_bw(&next_task->dl, &later_rq->dl);
21382199 activate_task(later_rq, next_task, ENQUEUE_NOCLOCK);
21392200 ret = 1;
21402201
....@@ -2158,7 +2219,7 @@
21582219 static void pull_dl_task(struct rq *this_rq)
21592220 {
21602221 int this_cpu = this_rq->cpu, cpu;
2161
- struct task_struct *p;
2222
+ struct task_struct *p, *push_task;
21622223 bool resched = false;
21632224 struct rq *src_rq;
21642225 u64 dmin = LONG_MAX;
....@@ -2188,6 +2249,7 @@
21882249 continue;
21892250
21902251 /* Might drop this_rq->lock */
2252
+ push_task = NULL;
21912253 double_lock_balance(this_rq, src_rq);
21922254
21932255 /*
....@@ -2219,21 +2281,28 @@
22192281 src_rq->curr->dl.deadline))
22202282 goto skip;
22212283
2222
- resched = true;
2223
-
2224
- deactivate_task(src_rq, p, 0);
2225
- sub_running_bw(&p->dl, &src_rq->dl);
2226
- sub_rq_bw(&p->dl, &src_rq->dl);
2227
- set_task_cpu(p, this_cpu);
2228
- add_rq_bw(&p->dl, &this_rq->dl);
2229
- add_running_bw(&p->dl, &this_rq->dl);
2230
- activate_task(this_rq, p, 0);
2231
- dmin = p->dl.deadline;
2284
+ if (is_migration_disabled(p)) {
2285
+ trace_sched_migrate_pull_tp(p);
2286
+ push_task = get_push_task(src_rq);
2287
+ } else {
2288
+ deactivate_task(src_rq, p, 0);
2289
+ set_task_cpu(p, this_cpu);
2290
+ activate_task(this_rq, p, 0);
2291
+ dmin = p->dl.deadline;
2292
+ resched = true;
2293
+ }
22322294
22332295 /* Is there any other task even earlier? */
22342296 }
22352297 skip:
22362298 double_unlock_balance(this_rq, src_rq);
2299
+
2300
+ if (push_task) {
2301
+ raw_spin_unlock(&this_rq->lock);
2302
+ stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
2303
+ push_task, &src_rq->push_work);
2304
+ raw_spin_lock(&this_rq->lock);
2305
+ }
22372306 }
22382307
22392308 if (resched)
....@@ -2257,7 +2326,8 @@
22572326 }
22582327
22592328 static void set_cpus_allowed_dl(struct task_struct *p,
2260
- const struct cpumask *new_mask)
2329
+ const struct cpumask *new_mask,
2330
+ u32 flags)
22612331 {
22622332 struct root_domain *src_rd;
22632333 struct rq *rq;
....@@ -2286,7 +2356,7 @@
22862356 raw_spin_unlock(&src_dl_b->lock);
22872357 }
22882358
2289
- set_cpus_allowed_common(p, new_mask);
2359
+ set_cpus_allowed_common(p, new_mask, flags);
22902360 }
22912361
22922362 /* Assumes rq->lock is held */
....@@ -2317,6 +2387,39 @@
23172387 for_each_possible_cpu(i)
23182388 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),
23192389 GFP_KERNEL, cpu_to_node(i));
2390
+}
2391
+
2392
+void dl_add_task_root_domain(struct task_struct *p)
2393
+{
2394
+ struct rq_flags rf;
2395
+ struct rq *rq;
2396
+ struct dl_bw *dl_b;
2397
+
2398
+ raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
2399
+ if (!dl_task(p)) {
2400
+ raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
2401
+ return;
2402
+ }
2403
+
2404
+ rq = __task_rq_lock(p, &rf);
2405
+
2406
+ dl_b = &rq->rd->dl_bw;
2407
+ raw_spin_lock(&dl_b->lock);
2408
+
2409
+ __dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
2410
+
2411
+ raw_spin_unlock(&dl_b->lock);
2412
+
2413
+ task_rq_unlock(rq, p, &rf);
2414
+}
2415
+
2416
+void dl_clear_root_domain(struct root_domain *rd)
2417
+{
2418
+ unsigned long flags;
2419
+
2420
+ raw_spin_lock_irqsave(&rd->dl_bw.lock, flags);
2421
+ rd->dl_bw.total_bw = 0;
2422
+ raw_spin_unlock_irqrestore(&rd->dl_bw.lock, flags);
23202423 }
23212424
23222425 #endif /* CONFIG_SMP */
....@@ -2390,6 +2493,8 @@
23902493 check_preempt_curr_dl(rq, p, 0);
23912494 else
23922495 resched_curr(rq);
2496
+ } else {
2497
+ update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
23932498 }
23942499 }
23952500
....@@ -2429,8 +2534,8 @@
24292534 }
24302535 }
24312536
2432
-const struct sched_class dl_sched_class = {
2433
- .next = &rt_sched_class,
2537
+const struct sched_class dl_sched_class
2538
+ __section("__dl_sched_class") = {
24342539 .enqueue_task = enqueue_task_dl,
24352540 .dequeue_task = dequeue_task_dl,
24362541 .yield_task = yield_task_dl,
....@@ -2439,17 +2544,19 @@
24392544
24402545 .pick_next_task = pick_next_task_dl,
24412546 .put_prev_task = put_prev_task_dl,
2547
+ .set_next_task = set_next_task_dl,
24422548
24432549 #ifdef CONFIG_SMP
2550
+ .balance = balance_dl,
24442551 .select_task_rq = select_task_rq_dl,
24452552 .migrate_task_rq = migrate_task_rq_dl,
24462553 .set_cpus_allowed = set_cpus_allowed_dl,
24472554 .rq_online = rq_online_dl,
24482555 .rq_offline = rq_offline_dl,
24492556 .task_woken = task_woken_dl,
2557
+ .find_lock_rq = find_lock_later_rq,
24502558 #endif
24512559
2452
- .set_curr_task = set_curr_task_dl,
24532560 .task_tick = task_tick_dl,
24542561 .task_fork = task_fork_dl,
24552562
....@@ -2497,7 +2604,7 @@
24972604 return ret;
24982605 }
24992606
2500
-void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
2607
+static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
25012608 {
25022609 if (global_rt_runtime() == RUNTIME_INF) {
25032610 dl_rq->bw_ratio = 1 << RATIO_SHIFT;
....@@ -2550,11 +2657,12 @@
25502657 int sched_dl_overflow(struct task_struct *p, int policy,
25512658 const struct sched_attr *attr)
25522659 {
2553
- struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
25542660 u64 period = attr->sched_period ?: attr->sched_deadline;
25552661 u64 runtime = attr->sched_runtime;
25562662 u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
2557
- int cpus, err = -1;
2663
+ int cpus, err = -1, cpu = task_cpu(p);
2664
+ struct dl_bw *dl_b = dl_bw_of(cpu);
2665
+ unsigned long cap;
25582666
25592667 if (attr->sched_flags & SCHED_FLAG_SUGOV)
25602668 return 0;
....@@ -2569,15 +2677,17 @@
25692677 * allocated bandwidth of the container.
25702678 */
25712679 raw_spin_lock(&dl_b->lock);
2572
- cpus = dl_bw_cpus(task_cpu(p));
2680
+ cpus = dl_bw_cpus(cpu);
2681
+ cap = dl_bw_capacity(cpu);
2682
+
25732683 if (dl_policy(policy) && !task_has_dl_policy(p) &&
2574
- !__dl_overflow(dl_b, cpus, 0, new_bw)) {
2684
+ !__dl_overflow(dl_b, cap, 0, new_bw)) {
25752685 if (hrtimer_active(&p->dl.inactive_timer))
25762686 __dl_sub(dl_b, p->dl.dl_bw, cpus);
25772687 __dl_add(dl_b, new_bw, cpus);
25782688 err = 0;
25792689 } else if (dl_policy(policy) && task_has_dl_policy(p) &&
2580
- !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
2690
+ !__dl_overflow(dl_b, cap, p->dl.dl_bw, new_bw)) {
25812691 /*
25822692 * XXX this is slightly incorrect: when the task
25832693 * utilization decreases, we should delay the total
....@@ -2635,6 +2745,14 @@
26352745 }
26362746
26372747 /*
2748
+ * Default limits for DL period; on the top end we guard against small util
2749
+ * tasks still getting rediculous long effective runtimes, on the bottom end we
2750
+ * guard against timer DoS.
2751
+ */
2752
+unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
2753
+unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */
2754
+
2755
+/*
26382756 * This function validates the new parameters of a -deadline task.
26392757 * We ask for the deadline not being zero, and greater or equal
26402758 * than the runtime, as well as the period of being zero or
....@@ -2646,6 +2764,8 @@
26462764 */
26472765 bool __checkparam_dl(const struct sched_attr *attr)
26482766 {
2767
+ u64 period, max, min;
2768
+
26492769 /* special dl tasks don't actually use any parameter */
26502770 if (attr->sched_flags & SCHED_FLAG_SUGOV)
26512771 return true;
....@@ -2669,10 +2789,19 @@
26692789 attr->sched_period & (1ULL << 63))
26702790 return false;
26712791
2792
+ period = attr->sched_period;
2793
+ if (!period)
2794
+ period = attr->sched_deadline;
2795
+
26722796 /* runtime <= deadline <= period (if period != 0) */
2673
- if ((attr->sched_period != 0 &&
2674
- attr->sched_period < attr->sched_deadline) ||
2797
+ if (period < attr->sched_deadline ||
26752798 attr->sched_deadline < attr->sched_runtime)
2799
+ return false;
2800
+
2801
+ max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
2802
+ min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
2803
+
2804
+ if (period < min || period > max)
26762805 return false;
26772806
26782807 return true;
....@@ -2692,11 +2821,14 @@
26922821 dl_se->dl_bw = 0;
26932822 dl_se->dl_density = 0;
26942823
2695
- dl_se->dl_boosted = 0;
26962824 dl_se->dl_throttled = 0;
26972825 dl_se->dl_yielded = 0;
26982826 dl_se->dl_non_contending = 0;
26992827 dl_se->dl_overrun = 0;
2828
+
2829
+#ifdef CONFIG_RT_MUTEXES
2830
+ dl_se->pi_se = dl_se;
2831
+#endif
27002832 }
27012833
27022834 bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
....@@ -2713,39 +2845,6 @@
27132845 }
27142846
27152847 #ifdef CONFIG_SMP
2716
-int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed)
2717
-{
2718
- unsigned int dest_cpu;
2719
- struct dl_bw *dl_b;
2720
- bool overflow;
2721
- int cpus, ret;
2722
- unsigned long flags;
2723
-
2724
- dest_cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
2725
-
2726
- rcu_read_lock_sched();
2727
- dl_b = dl_bw_of(dest_cpu);
2728
- raw_spin_lock_irqsave(&dl_b->lock, flags);
2729
- cpus = dl_bw_cpus(dest_cpu);
2730
- overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
2731
- if (overflow) {
2732
- ret = -EBUSY;
2733
- } else {
2734
- /*
2735
- * We reserve space for this task in the destination
2736
- * root_domain, as we can't fail after this point.
2737
- * We will free resources in the source root_domain
2738
- * later on (see set_cpus_allowed_dl()).
2739
- */
2740
- __dl_add(dl_b, p->dl.dl_bw, cpus);
2741
- ret = 0;
2742
- }
2743
- raw_spin_unlock_irqrestore(&dl_b->lock, flags);
2744
- rcu_read_unlock_sched();
2745
-
2746
- return ret;
2747
-}
2748
-
27492848 int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
27502849 const struct cpumask *trial)
27512850 {
....@@ -2767,22 +2866,32 @@
27672866 return ret;
27682867 }
27692868
2770
-bool dl_cpu_busy(unsigned int cpu)
2869
+int dl_cpu_busy(int cpu, struct task_struct *p)
27712870 {
2772
- unsigned long flags;
2871
+ unsigned long flags, cap;
27732872 struct dl_bw *dl_b;
27742873 bool overflow;
2775
- int cpus;
27762874
27772875 rcu_read_lock_sched();
27782876 dl_b = dl_bw_of(cpu);
27792877 raw_spin_lock_irqsave(&dl_b->lock, flags);
2780
- cpus = dl_bw_cpus(cpu);
2781
- overflow = __dl_overflow(dl_b, cpus, 0, 0);
2878
+ cap = dl_bw_capacity(cpu);
2879
+ overflow = __dl_overflow(dl_b, cap, 0, p ? p->dl.dl_bw : 0);
2880
+
2881
+ if (!overflow && p) {
2882
+ /*
2883
+ * We reserve space for this task in the destination
2884
+ * root_domain, as we can't fail after this point.
2885
+ * We will free resources in the source root_domain
2886
+ * later on (see set_cpus_allowed_dl()).
2887
+ */
2888
+ __dl_add(dl_b, p->dl.dl_bw, dl_bw_cpus(cpu));
2889
+ }
2890
+
27822891 raw_spin_unlock_irqrestore(&dl_b->lock, flags);
27832892 rcu_read_unlock_sched();
27842893
2785
- return overflow;
2894
+ return overflow ? -EBUSY : 0;
27862895 }
27872896 #endif
27882897