hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/kernel/cgroup/cpuset.c
....@@ -165,6 +165,14 @@
165165 */
166166 int use_parent_ecpus;
167167 int child_ecpus_count;
168
+
169
+ /*
170
+ * number of SCHED_DEADLINE tasks attached to this cpuset, so that we
171
+ * know when to rebuild associated root domain bandwidth information.
172
+ */
173
+ int nr_deadline_tasks;
174
+ int nr_migrate_dl_tasks;
175
+ u64 sum_migrate_dl_bw;
168176 };
169177
170178 /*
....@@ -208,6 +216,20 @@
208216 static inline struct cpuset *parent_cs(struct cpuset *cs)
209217 {
210218 return css_cs(cs->css.parent);
219
+}
220
+
221
+void inc_dl_tasks_cs(struct task_struct *p)
222
+{
223
+ struct cpuset *cs = task_cs(p);
224
+
225
+ cs->nr_deadline_tasks++;
226
+}
227
+
228
+void dec_dl_tasks_cs(struct task_struct *p)
229
+{
230
+ struct cpuset *cs = task_cs(p);
231
+
232
+ cs->nr_deadline_tasks--;
211233 }
212234
213235 /* bits in struct cpuset flags field */
....@@ -339,7 +361,18 @@
339361 */
340362
341363 static DEFINE_MUTEX(cpuset_mutex);
342
-static DEFINE_RAW_SPINLOCK(callback_lock);
364
+
365
+void cpuset_lock(void)
366
+{
367
+ mutex_lock(&cpuset_mutex);
368
+}
369
+
370
+void cpuset_unlock(void)
371
+{
372
+ mutex_unlock(&cpuset_mutex);
373
+}
374
+
375
+static DEFINE_SPINLOCK(callback_lock);
343376
344377 static struct workqueue_struct *cpuset_migrate_mm_wq;
345378
....@@ -925,10 +958,13 @@
925958 return ndoms;
926959 }
927960
928
-static void update_tasks_root_domain(struct cpuset *cs)
961
+static void dl_update_tasks_root_domain(struct cpuset *cs)
929962 {
930963 struct css_task_iter it;
931964 struct task_struct *task;
965
+
966
+ if (cs->nr_deadline_tasks == 0)
967
+ return;
932968
933969 css_task_iter_start(&cs->css, 0, &it);
934970
....@@ -938,7 +974,7 @@
938974 css_task_iter_end(&it);
939975 }
940976
941
-static void rebuild_root_domains(void)
977
+static void dl_rebuild_rd_accounting(void)
942978 {
943979 struct cpuset *cs = NULL;
944980 struct cgroup_subsys_state *pos_css;
....@@ -966,7 +1002,7 @@
9661002
9671003 rcu_read_unlock();
9681004
969
- update_tasks_root_domain(cs);
1005
+ dl_update_tasks_root_domain(cs);
9701006
9711007 rcu_read_lock();
9721008 css_put(&cs->css);
....@@ -980,7 +1016,7 @@
9801016 {
9811017 mutex_lock(&sched_domains_mutex);
9821018 partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
983
- rebuild_root_domains();
1019
+ dl_rebuild_rd_accounting();
9841020 mutex_unlock(&sched_domains_mutex);
9851021 }
9861022
....@@ -1315,7 +1351,7 @@
13151351 * Newly added CPUs will be removed from effective_cpus and
13161352 * newly deleted ones will be added back to effective_cpus.
13171353 */
1318
- raw_spin_lock_irq(&callback_lock);
1354
+ spin_lock_irq(&callback_lock);
13191355 if (adding) {
13201356 cpumask_or(parent->subparts_cpus,
13211357 parent->subparts_cpus, tmp->addmask);
....@@ -1337,7 +1373,7 @@
13371373
13381374 if (cpuset->partition_root_state != new_prs)
13391375 cpuset->partition_root_state = new_prs;
1340
- raw_spin_unlock_irq(&callback_lock);
1376
+ spin_unlock_irq(&callback_lock);
13411377
13421378 return cmd == partcmd_update;
13431379 }
....@@ -1440,7 +1476,7 @@
14401476 continue;
14411477 rcu_read_unlock();
14421478
1443
- raw_spin_lock_irq(&callback_lock);
1479
+ spin_lock_irq(&callback_lock);
14441480
14451481 cpumask_copy(cp->effective_cpus, tmp->new_cpus);
14461482 if (cp->nr_subparts_cpus && (new_prs != PRS_ENABLED)) {
....@@ -1474,7 +1510,7 @@
14741510 if (new_prs != cp->partition_root_state)
14751511 cp->partition_root_state = new_prs;
14761512
1477
- raw_spin_unlock_irq(&callback_lock);
1513
+ spin_unlock_irq(&callback_lock);
14781514
14791515 WARN_ON(!is_in_v2_mode() &&
14801516 !cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
....@@ -1603,7 +1639,7 @@
16031639 return -EINVAL;
16041640 }
16051641
1606
- raw_spin_lock_irq(&callback_lock);
1642
+ spin_lock_irq(&callback_lock);
16071643 cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
16081644 cpumask_copy(cs->cpus_requested, trialcs->cpus_requested);
16091645
....@@ -1614,7 +1650,7 @@
16141650 cpumask_and(cs->subparts_cpus, cs->subparts_cpus, cs->cpus_allowed);
16151651 cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus);
16161652 }
1617
- raw_spin_unlock_irq(&callback_lock);
1653
+ spin_unlock_irq(&callback_lock);
16181654
16191655 update_cpumasks_hier(cs, &tmp);
16201656
....@@ -1808,9 +1844,9 @@
18081844 continue;
18091845 rcu_read_unlock();
18101846
1811
- raw_spin_lock_irq(&callback_lock);
1847
+ spin_lock_irq(&callback_lock);
18121848 cp->effective_mems = *new_mems;
1813
- raw_spin_unlock_irq(&callback_lock);
1849
+ spin_unlock_irq(&callback_lock);
18141850
18151851 WARN_ON(!is_in_v2_mode() &&
18161852 !nodes_equal(cp->mems_allowed, cp->effective_mems));
....@@ -1878,9 +1914,9 @@
18781914 if (retval < 0)
18791915 goto done;
18801916
1881
- raw_spin_lock_irq(&callback_lock);
1917
+ spin_lock_irq(&callback_lock);
18821918 cs->mems_allowed = trialcs->mems_allowed;
1883
- raw_spin_unlock_irq(&callback_lock);
1919
+ spin_unlock_irq(&callback_lock);
18841920
18851921 /* use trialcs->mems_allowed as a temp variable */
18861922 update_nodemasks_hier(cs, &trialcs->mems_allowed);
....@@ -1971,9 +2007,9 @@
19712007 spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
19722008 || (is_spread_page(cs) != is_spread_page(trialcs)));
19732009
1974
- raw_spin_lock_irq(&callback_lock);
2010
+ spin_lock_irq(&callback_lock);
19752011 cs->flags = trialcs->flags;
1976
- raw_spin_unlock_irq(&callback_lock);
2012
+ spin_unlock_irq(&callback_lock);
19772013
19782014 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
19792015 rebuild_sched_domains_locked();
....@@ -2059,9 +2095,9 @@
20592095 rebuild_sched_domains_locked();
20602096 out:
20612097 if (!err) {
2062
- raw_spin_lock_irq(&callback_lock);
2098
+ spin_lock_irq(&callback_lock);
20632099 cs->partition_root_state = new_prs;
2064
- raw_spin_unlock_irq(&callback_lock);
2100
+ spin_unlock_irq(&callback_lock);
20652101 }
20662102
20672103 free_cpumasks(NULL, &tmpmask);
....@@ -2171,16 +2207,23 @@
21712207
21722208 static struct cpuset *cpuset_attach_old_cs;
21732209
2210
+static void reset_migrate_dl_data(struct cpuset *cs)
2211
+{
2212
+ cs->nr_migrate_dl_tasks = 0;
2213
+ cs->sum_migrate_dl_bw = 0;
2214
+}
2215
+
21742216 /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
21752217 static int cpuset_can_attach(struct cgroup_taskset *tset)
21762218 {
21772219 struct cgroup_subsys_state *css;
2178
- struct cpuset *cs;
2220
+ struct cpuset *cs, *oldcs;
21792221 struct task_struct *task;
21802222 int ret;
21812223
21822224 /* used later by cpuset_attach() */
21832225 cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
2226
+ oldcs = cpuset_attach_old_cs;
21842227 cs = css_cs(css);
21852228
21862229 mutex_lock(&cpuset_mutex);
....@@ -2192,14 +2235,39 @@
21922235 goto out_unlock;
21932236
21942237 cgroup_taskset_for_each(task, css, tset) {
2195
- ret = task_can_attach(task, cs->effective_cpus);
2238
+ ret = task_can_attach(task);
21962239 if (ret)
21972240 goto out_unlock;
21982241 ret = security_task_setscheduler(task);
21992242 if (ret)
22002243 goto out_unlock;
2244
+
2245
+ if (dl_task(task)) {
2246
+ cs->nr_migrate_dl_tasks++;
2247
+ cs->sum_migrate_dl_bw += task->dl.dl_bw;
2248
+ }
22012249 }
22022250
2251
+ if (!cs->nr_migrate_dl_tasks)
2252
+ goto out_success;
2253
+
2254
+ if (!cpumask_intersects(oldcs->effective_cpus, cs->effective_cpus)) {
2255
+ int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus);
2256
+
2257
+ if (unlikely(cpu >= nr_cpu_ids)) {
2258
+ reset_migrate_dl_data(cs);
2259
+ ret = -EINVAL;
2260
+ goto out_unlock;
2261
+ }
2262
+
2263
+ ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw);
2264
+ if (ret) {
2265
+ reset_migrate_dl_data(cs);
2266
+ goto out_unlock;
2267
+ }
2268
+ }
2269
+
2270
+out_success:
22032271 /*
22042272 * Mark attach is in progress. This makes validate_change() fail
22052273 * changes which zero cpus/mems_allowed.
....@@ -2214,11 +2282,23 @@
22142282 static void cpuset_cancel_attach(struct cgroup_taskset *tset)
22152283 {
22162284 struct cgroup_subsys_state *css;
2285
+ struct cpuset *cs;
22172286
22182287 cgroup_taskset_first(tset, &css);
2288
+ cs = css_cs(css);
22192289
22202290 mutex_lock(&cpuset_mutex);
2221
- css_cs(css)->attach_in_progress--;
2291
+ cs->attach_in_progress--;
2292
+ if (!cs->attach_in_progress)
2293
+ wake_up(&cpuset_attach_wq);
2294
+
2295
+ if (cs->nr_migrate_dl_tasks) {
2296
+ int cpu = cpumask_any(cs->effective_cpus);
2297
+
2298
+ dl_bw_free(cpu, cs->sum_migrate_dl_bw);
2299
+ reset_migrate_dl_data(cs);
2300
+ }
2301
+
22222302 mutex_unlock(&cpuset_mutex);
22232303 }
22242304
....@@ -2290,6 +2370,12 @@
22902370 }
22912371
22922372 cs->old_mems_allowed = cpuset_attach_nodemask_to;
2373
+
2374
+ if (cs->nr_migrate_dl_tasks) {
2375
+ cs->nr_deadline_tasks += cs->nr_migrate_dl_tasks;
2376
+ oldcs->nr_deadline_tasks -= cs->nr_migrate_dl_tasks;
2377
+ reset_migrate_dl_data(cs);
2378
+ }
22932379
22942380 cs->attach_in_progress--;
22952381 if (!cs->attach_in_progress)
....@@ -2476,7 +2562,7 @@
24762562 cpuset_filetype_t type = seq_cft(sf)->private;
24772563 int ret = 0;
24782564
2479
- raw_spin_lock_irq(&callback_lock);
2565
+ spin_lock_irq(&callback_lock);
24802566
24812567 switch (type) {
24822568 case FILE_CPULIST:
....@@ -2498,7 +2584,7 @@
24982584 ret = -EINVAL;
24992585 }
25002586
2501
- raw_spin_unlock_irq(&callback_lock);
2587
+ spin_unlock_irq(&callback_lock);
25022588 return ret;
25032589 }
25042590
....@@ -2811,14 +2897,14 @@
28112897
28122898 cpuset_inc();
28132899
2814
- raw_spin_lock_irq(&callback_lock);
2900
+ spin_lock_irq(&callback_lock);
28152901 if (is_in_v2_mode()) {
28162902 cpumask_copy(cs->effective_cpus, parent->effective_cpus);
28172903 cs->effective_mems = parent->effective_mems;
28182904 cs->use_parent_ecpus = true;
28192905 parent->child_ecpus_count++;
28202906 }
2821
- raw_spin_unlock_irq(&callback_lock);
2907
+ spin_unlock_irq(&callback_lock);
28222908
28232909 if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
28242910 goto out_unlock;
....@@ -2845,13 +2931,13 @@
28452931 }
28462932 rcu_read_unlock();
28472933
2848
- raw_spin_lock_irq(&callback_lock);
2934
+ spin_lock_irq(&callback_lock);
28492935 cs->mems_allowed = parent->mems_allowed;
28502936 cs->effective_mems = parent->mems_allowed;
28512937 cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
28522938 cpumask_copy(cs->cpus_requested, parent->cpus_requested);
28532939 cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
2854
- raw_spin_unlock_irq(&callback_lock);
2940
+ spin_unlock_irq(&callback_lock);
28552941 out_unlock:
28562942 mutex_unlock(&cpuset_mutex);
28572943 put_online_cpus();
....@@ -2907,7 +2993,7 @@
29072993 static void cpuset_bind(struct cgroup_subsys_state *root_css)
29082994 {
29092995 mutex_lock(&cpuset_mutex);
2910
- raw_spin_lock_irq(&callback_lock);
2996
+ spin_lock_irq(&callback_lock);
29112997
29122998 if (is_in_v2_mode()) {
29132999 cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
....@@ -2918,7 +3004,7 @@
29183004 top_cpuset.mems_allowed = top_cpuset.effective_mems;
29193005 }
29203006
2921
- raw_spin_unlock_irq(&callback_lock);
3007
+ spin_unlock_irq(&callback_lock);
29223008 mutex_unlock(&cpuset_mutex);
29233009 }
29243010
....@@ -3018,12 +3104,12 @@
30183104 {
30193105 bool is_empty;
30203106
3021
- raw_spin_lock_irq(&callback_lock);
3107
+ spin_lock_irq(&callback_lock);
30223108 cpumask_copy(cs->cpus_allowed, new_cpus);
30233109 cpumask_copy(cs->effective_cpus, new_cpus);
30243110 cs->mems_allowed = *new_mems;
30253111 cs->effective_mems = *new_mems;
3026
- raw_spin_unlock_irq(&callback_lock);
3112
+ spin_unlock_irq(&callback_lock);
30273113
30283114 /*
30293115 * Don't call update_tasks_cpumask() if the cpuset becomes empty,
....@@ -3060,10 +3146,10 @@
30603146 if (nodes_empty(*new_mems))
30613147 *new_mems = parent_cs(cs)->effective_mems;
30623148
3063
- raw_spin_lock_irq(&callback_lock);
3149
+ spin_lock_irq(&callback_lock);
30643150 cpumask_copy(cs->effective_cpus, new_cpus);
30653151 cs->effective_mems = *new_mems;
3066
- raw_spin_unlock_irq(&callback_lock);
3152
+ spin_unlock_irq(&callback_lock);
30673153
30683154 if (cpus_updated)
30693155 update_tasks_cpumask(cs);
....@@ -3130,10 +3216,10 @@
31303216 if (is_partition_root(cs) && (cpumask_empty(&new_cpus) ||
31313217 (parent->partition_root_state == PRS_ERROR))) {
31323218 if (cs->nr_subparts_cpus) {
3133
- raw_spin_lock_irq(&callback_lock);
3219
+ spin_lock_irq(&callback_lock);
31343220 cs->nr_subparts_cpus = 0;
31353221 cpumask_clear(cs->subparts_cpus);
3136
- raw_spin_unlock_irq(&callback_lock);
3222
+ spin_unlock_irq(&callback_lock);
31373223 compute_effective_cpumask(&new_cpus, cs, parent);
31383224 }
31393225
....@@ -3147,9 +3233,9 @@
31473233 cpumask_empty(&new_cpus)) {
31483234 update_parent_subparts_cpumask(cs, partcmd_disable,
31493235 NULL, tmp);
3150
- raw_spin_lock_irq(&callback_lock);
3236
+ spin_lock_irq(&callback_lock);
31513237 cs->partition_root_state = PRS_ERROR;
3152
- raw_spin_unlock_irq(&callback_lock);
3238
+ spin_unlock_irq(&callback_lock);
31533239 }
31543240 cpuset_force_rebuild();
31553241 }
....@@ -3229,7 +3315,7 @@
32293315
32303316 /* synchronize cpus_allowed to cpu_active_mask */
32313317 if (cpus_updated) {
3232
- raw_spin_lock_irq(&callback_lock);
3318
+ spin_lock_irq(&callback_lock);
32333319 if (!on_dfl)
32343320 cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
32353321 /*
....@@ -3249,17 +3335,17 @@
32493335 }
32503336 }
32513337 cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
3252
- raw_spin_unlock_irq(&callback_lock);
3338
+ spin_unlock_irq(&callback_lock);
32533339 /* we don't mess with cpumasks of tasks in top_cpuset */
32543340 }
32553341
32563342 /* synchronize mems_allowed to N_MEMORY */
32573343 if (mems_updated) {
3258
- raw_spin_lock_irq(&callback_lock);
3344
+ spin_lock_irq(&callback_lock);
32593345 if (!on_dfl)
32603346 top_cpuset.mems_allowed = new_mems;
32613347 top_cpuset.effective_mems = new_mems;
3262
- raw_spin_unlock_irq(&callback_lock);
3348
+ spin_unlock_irq(&callback_lock);
32633349 update_tasks_nodemask(&top_cpuset);
32643350 }
32653351
....@@ -3368,11 +3454,11 @@
33683454 {
33693455 unsigned long flags;
33703456
3371
- raw_spin_lock_irqsave(&callback_lock, flags);
3457
+ spin_lock_irqsave(&callback_lock, flags);
33723458 rcu_read_lock();
33733459 guarantee_online_cpus(tsk, pmask);
33743460 rcu_read_unlock();
3375
- raw_spin_unlock_irqrestore(&callback_lock, flags);
3461
+ spin_unlock_irqrestore(&callback_lock, flags);
33763462 }
33773463 EXPORT_SYMBOL_GPL(cpuset_cpus_allowed);
33783464 /**
....@@ -3441,11 +3527,11 @@
34413527 nodemask_t mask;
34423528 unsigned long flags;
34433529
3444
- raw_spin_lock_irqsave(&callback_lock, flags);
3530
+ spin_lock_irqsave(&callback_lock, flags);
34453531 rcu_read_lock();
34463532 guarantee_online_mems(task_cs(tsk), &mask);
34473533 rcu_read_unlock();
3448
- raw_spin_unlock_irqrestore(&callback_lock, flags);
3534
+ spin_unlock_irqrestore(&callback_lock, flags);
34493535
34503536 return mask;
34513537 }
....@@ -3537,14 +3623,14 @@
35373623 return true;
35383624
35393625 /* Not hardwall and node outside mems_allowed: scan up cpusets */
3540
- raw_spin_lock_irqsave(&callback_lock, flags);
3626
+ spin_lock_irqsave(&callback_lock, flags);
35413627
35423628 rcu_read_lock();
35433629 cs = nearest_hardwall_ancestor(task_cs(current));
35443630 allowed = node_isset(node, cs->mems_allowed);
35453631 rcu_read_unlock();
35463632
3547
- raw_spin_unlock_irqrestore(&callback_lock, flags);
3633
+ spin_unlock_irqrestore(&callback_lock, flags);
35483634 return allowed;
35493635 }
35503636