.. | .. |
---|
33 | 33 | #include <linux/interrupt.h> |
---|
34 | 34 | #include <linux/kernel.h> |
---|
35 | 35 | #include <linux/kmod.h> |
---|
| 36 | +#include <linux/kthread.h> |
---|
36 | 37 | #include <linux/list.h> |
---|
37 | 38 | #include <linux/mempolicy.h> |
---|
38 | 39 | #include <linux/mm.h> |
---|
39 | 40 | #include <linux/memory.h> |
---|
40 | 41 | #include <linux/export.h> |
---|
41 | 42 | #include <linux/mount.h> |
---|
| 43 | +#include <linux/fs_context.h> |
---|
42 | 44 | #include <linux/namei.h> |
---|
43 | 45 | #include <linux/pagemap.h> |
---|
44 | 46 | #include <linux/proc_fs.h> |
---|
45 | 47 | #include <linux/rcupdate.h> |
---|
46 | 48 | #include <linux/sched.h> |
---|
| 49 | +#include <linux/sched/deadline.h> |
---|
47 | 50 | #include <linux/sched/mm.h> |
---|
48 | 51 | #include <linux/sched/task.h> |
---|
49 | 52 | #include <linux/seq_file.h> |
---|
.. | .. |
---|
63 | 66 | #include <linux/mutex.h> |
---|
64 | 67 | #include <linux/cgroup.h> |
---|
65 | 68 | #include <linux/wait.h> |
---|
| 69 | + |
---|
| 70 | +#include <trace/hooks/sched.h> |
---|
| 71 | +#include <trace/hooks/cgroup.h> |
---|
66 | 72 | |
---|
67 | 73 | DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key); |
---|
68 | 74 | DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key); |
---|
.. | .. |
---|
111 | 117 | nodemask_t effective_mems; |
---|
112 | 118 | |
---|
113 | 119 | /* |
---|
| 120 | + * CPUs allocated to child sub-partitions (default hierarchy only) |
---|
| 121 | + * - CPUs granted by the parent = effective_cpus U subparts_cpus |
---|
| 122 | + * - effective_cpus and subparts_cpus are mutually exclusive. |
---|
| 123 | + * |
---|
| 124 | + * effective_cpus contains only onlined CPUs, but subparts_cpus |
---|
| 125 | + * may have offlined ones. |
---|
| 126 | + */ |
---|
| 127 | + cpumask_var_t subparts_cpus; |
---|
| 128 | + |
---|
| 129 | + /* |
---|
114 | 130 | * This is old Memory Nodes tasks took on. |
---|
115 | 131 | * |
---|
116 | 132 | * - top_cpuset.old_mems_allowed is initialized to mems_allowed. |
---|
.. | .. |
---|
135 | 151 | |
---|
136 | 152 | /* for custom sched domain */ |
---|
137 | 153 | int relax_domain_level; |
---|
| 154 | + |
---|
| 155 | + /* number of CPUs in subparts_cpus */ |
---|
| 156 | + int nr_subparts_cpus; |
---|
| 157 | + |
---|
| 158 | + /* partition root state */ |
---|
| 159 | + int partition_root_state; |
---|
| 160 | + |
---|
| 161 | + /* |
---|
| 162 | + * Default hierarchy only: |
---|
| 163 | + * use_parent_ecpus - set if using parent's effective_cpus |
---|
| 164 | + * child_ecpus_count - # of children with use_parent_ecpus set |
---|
| 165 | + */ |
---|
| 166 | + int use_parent_ecpus; |
---|
| 167 | + int child_ecpus_count; |
---|
| 168 | + |
---|
| 169 | + /* |
---|
| 170 | + * number of SCHED_DEADLINE tasks attached to this cpuset, so that we |
---|
| 171 | + * know when to rebuild associated root domain bandwidth information. |
---|
| 172 | + */ |
---|
| 173 | + int nr_deadline_tasks; |
---|
| 174 | + int nr_migrate_dl_tasks; |
---|
| 175 | + u64 sum_migrate_dl_bw; |
---|
| 176 | +}; |
---|
| 177 | + |
---|
| 178 | +/* |
---|
| 179 | + * Partition root states: |
---|
| 180 | + * |
---|
| 181 | + * 0 - not a partition root |
---|
| 182 | + * |
---|
| 183 | + * 1 - partition root |
---|
| 184 | + * |
---|
| 185 | + * -1 - invalid partition root |
---|
| 186 | + * None of the cpus in cpus_allowed can be put into the parent's |
---|
| 187 | + * subparts_cpus. In this case, the cpuset is not a real partition |
---|
| 188 | + * root anymore. However, the CPU_EXCLUSIVE bit will still be set |
---|
| 189 | + * and the cpuset can be restored back to a partition root if the |
---|
| 190 | + * parent cpuset can give more CPUs back to this child cpuset. |
---|
| 191 | + */ |
---|
| 192 | +#define PRS_DISABLED 0 |
---|
| 193 | +#define PRS_ENABLED 1 |
---|
| 194 | +#define PRS_ERROR -1 |
---|
| 195 | + |
---|
| 196 | +/* |
---|
| 197 | + * Temporary cpumasks for working with partitions that are passed among |
---|
| 198 | + * functions to avoid memory allocation in inner functions. |
---|
| 199 | + */ |
---|
| 200 | +struct tmpmasks { |
---|
| 201 | + cpumask_var_t addmask, delmask; /* For partition root */ |
---|
| 202 | + cpumask_var_t new_cpus; /* For update_cpumasks_hier() */ |
---|
138 | 203 | }; |
---|
139 | 204 | |
---|
140 | 205 | static inline struct cpuset *css_cs(struct cgroup_subsys_state *css) |
---|
.. | .. |
---|
153 | 218 | return css_cs(cs->css.parent); |
---|
154 | 219 | } |
---|
155 | 220 | |
---|
156 | | -#ifdef CONFIG_NUMA |
---|
157 | | -static inline bool task_has_mempolicy(struct task_struct *task) |
---|
| 221 | +void inc_dl_tasks_cs(struct task_struct *p) |
---|
158 | 222 | { |
---|
159 | | - return task->mempolicy; |
---|
160 | | -} |
---|
161 | | -#else |
---|
162 | | -static inline bool task_has_mempolicy(struct task_struct *task) |
---|
163 | | -{ |
---|
164 | | - return false; |
---|
165 | | -} |
---|
166 | | -#endif |
---|
| 223 | + struct cpuset *cs = task_cs(p); |
---|
167 | 224 | |
---|
| 225 | + cs->nr_deadline_tasks++; |
---|
| 226 | +} |
---|
| 227 | + |
---|
| 228 | +void dec_dl_tasks_cs(struct task_struct *p) |
---|
| 229 | +{ |
---|
| 230 | + struct cpuset *cs = task_cs(p); |
---|
| 231 | + |
---|
| 232 | + cs->nr_deadline_tasks--; |
---|
| 233 | +} |
---|
168 | 234 | |
---|
169 | 235 | /* bits in struct cpuset flags field */ |
---|
170 | 236 | typedef enum { |
---|
.. | .. |
---|
219 | 285 | return test_bit(CS_SPREAD_SLAB, &cs->flags); |
---|
220 | 286 | } |
---|
221 | 287 | |
---|
| 288 | +static inline int is_partition_root(const struct cpuset *cs) |
---|
| 289 | +{ |
---|
| 290 | + return cs->partition_root_state > 0; |
---|
| 291 | +} |
---|
| 292 | + |
---|
222 | 293 | static struct cpuset top_cpuset = { |
---|
223 | 294 | .flags = ((1 << CS_ONLINE) | (1 << CS_CPU_EXCLUSIVE) | |
---|
224 | 295 | (1 << CS_MEM_EXCLUSIVE)), |
---|
| 296 | + .partition_root_state = PRS_ENABLED, |
---|
225 | 297 | }; |
---|
226 | 298 | |
---|
227 | 299 | /** |
---|
.. | .. |
---|
289 | 361 | */ |
---|
290 | 362 | |
---|
291 | 363 | static DEFINE_MUTEX(cpuset_mutex); |
---|
292 | | -static DEFINE_RAW_SPINLOCK(callback_lock); |
---|
| 364 | + |
---|
| 365 | +void cpuset_lock(void) |
---|
| 366 | +{ |
---|
| 367 | + mutex_lock(&cpuset_mutex); |
---|
| 368 | +} |
---|
| 369 | + |
---|
| 370 | +void cpuset_unlock(void) |
---|
| 371 | +{ |
---|
| 372 | + mutex_unlock(&cpuset_mutex); |
---|
| 373 | +} |
---|
| 374 | + |
---|
| 375 | +static DEFINE_SPINLOCK(callback_lock); |
---|
293 | 376 | |
---|
294 | 377 | static struct workqueue_struct *cpuset_migrate_mm_wq; |
---|
295 | 378 | |
---|
296 | 379 | /* |
---|
297 | | - * CPU / memory hotplug is handled asynchronously. |
---|
| 380 | + * CPU / memory hotplug is handled asynchronously |
---|
| 381 | + * for hotplug, synchronously for resume_cpus |
---|
298 | 382 | */ |
---|
299 | | -static void cpuset_hotplug_workfn(struct work_struct *work); |
---|
300 | 383 | static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); |
---|
301 | 384 | |
---|
302 | 385 | static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq); |
---|
303 | 386 | |
---|
304 | 387 | /* |
---|
305 | | - * Cgroup v2 behavior is used when on default hierarchy or the |
---|
306 | | - * cgroup_v2_mode flag is set. |
---|
| 388 | + * Cgroup v2 behavior is used on the "cpus" and "mems" control files when |
---|
| 389 | + * on default hierarchy or when the cpuset_v2_mode flag is set by mounting |
---|
| 390 | + * the v1 cpuset cgroup filesystem with the "cpuset_v2_mode" mount option. |
---|
| 391 | + * With v2 behavior, "cpus" and "mems" are always what the users have |
---|
| 392 | + * requested and won't be changed by hotplug events. Only the effective |
---|
| 393 | + * cpus or mems will be affected. |
---|
307 | 394 | */ |
---|
308 | 395 | static inline bool is_in_v2_mode(void) |
---|
309 | 396 | { |
---|
.. | .. |
---|
312 | 399 | } |
---|
313 | 400 | |
---|
314 | 401 | /* |
---|
315 | | - * This is ugly, but preserves the userspace API for existing cpuset |
---|
316 | | - * users. If someone tries to mount the "cpuset" filesystem, we |
---|
317 | | - * silently switch it to mount "cgroup" instead |
---|
318 | | - */ |
---|
319 | | -static struct dentry *cpuset_mount(struct file_system_type *fs_type, |
---|
320 | | - int flags, const char *unused_dev_name, void *data) |
---|
321 | | -{ |
---|
322 | | - struct file_system_type *cgroup_fs = get_fs_type("cgroup"); |
---|
323 | | - struct dentry *ret = ERR_PTR(-ENODEV); |
---|
324 | | - if (cgroup_fs) { |
---|
325 | | - char mountopts[] = |
---|
326 | | - "cpuset,noprefix," |
---|
327 | | - "release_agent=/sbin/cpuset_release_agent"; |
---|
328 | | - ret = cgroup_fs->mount(cgroup_fs, flags, |
---|
329 | | - unused_dev_name, mountopts); |
---|
330 | | - put_filesystem(cgroup_fs); |
---|
331 | | - } |
---|
332 | | - return ret; |
---|
333 | | -} |
---|
334 | | - |
---|
335 | | -static struct file_system_type cpuset_fs_type = { |
---|
336 | | - .name = "cpuset", |
---|
337 | | - .mount = cpuset_mount, |
---|
338 | | -}; |
---|
339 | | - |
---|
340 | | -/* |
---|
341 | | - * Return in pmask the portion of a cpusets's cpus_allowed that |
---|
342 | | - * are online. If none are online, walk up the cpuset hierarchy |
---|
343 | | - * until we find one that does have some online cpus. |
---|
| 402 | + * Return in pmask the portion of a task's cpusets's cpus_allowed that |
---|
| 403 | + * are online and are capable of running the task. If none are found, |
---|
| 404 | + * walk up the cpuset hierarchy until we find one that does have some |
---|
| 405 | + * appropriate cpus. |
---|
344 | 406 | * |
---|
345 | 407 | * One way or another, we guarantee to return some non-empty subset |
---|
346 | | - * of cpu_online_mask. |
---|
| 408 | + * of cpu_active_mask. |
---|
347 | 409 | * |
---|
348 | 410 | * Call with callback_lock or cpuset_mutex held. |
---|
349 | 411 | */ |
---|
350 | | -static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) |
---|
| 412 | +static void guarantee_online_cpus(struct task_struct *tsk, |
---|
| 413 | + struct cpumask *pmask) |
---|
351 | 414 | { |
---|
352 | | - while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) { |
---|
| 415 | + const struct cpumask *possible_mask = task_cpu_possible_mask(tsk); |
---|
| 416 | + struct cpuset *cs; |
---|
| 417 | + |
---|
| 418 | + if (WARN_ON(!cpumask_and(pmask, possible_mask, cpu_active_mask))) |
---|
| 419 | + cpumask_copy(pmask, cpu_active_mask); |
---|
| 420 | + |
---|
| 421 | + rcu_read_lock(); |
---|
| 422 | + cs = task_cs(tsk); |
---|
| 423 | + |
---|
| 424 | + while (!cpumask_intersects(cs->effective_cpus, pmask)) { |
---|
353 | 425 | cs = parent_cs(cs); |
---|
354 | 426 | if (unlikely(!cs)) { |
---|
355 | 427 | /* |
---|
356 | 428 | * The top cpuset doesn't have any online cpu as a |
---|
357 | 429 | * consequence of a race between cpuset_hotplug_work |
---|
358 | 430 | * and cpu hotplug notifier. But we know the top |
---|
359 | | - * cpuset's effective_cpus is on its way to to be |
---|
| 431 | + * cpuset's effective_cpus is on its way to be |
---|
360 | 432 | * identical to cpu_online_mask. |
---|
361 | 433 | */ |
---|
362 | | - cpumask_copy(pmask, cpu_online_mask); |
---|
363 | | - return; |
---|
| 434 | + goto out_unlock; |
---|
364 | 435 | } |
---|
365 | 436 | } |
---|
366 | | - cpumask_and(pmask, cs->effective_cpus, cpu_online_mask); |
---|
| 437 | + cpumask_and(pmask, pmask, cs->effective_cpus); |
---|
| 438 | + |
---|
| 439 | +out_unlock: |
---|
| 440 | + rcu_read_unlock(); |
---|
367 | 441 | } |
---|
368 | 442 | |
---|
369 | 443 | /* |
---|
.. | .. |
---|
420 | 494 | } |
---|
421 | 495 | |
---|
422 | 496 | /** |
---|
| 497 | + * alloc_cpumasks - allocate three cpumasks for cpuset |
---|
| 498 | + * @cs: the cpuset that have cpumasks to be allocated. |
---|
| 499 | + * @tmp: the tmpmasks structure pointer |
---|
| 500 | + * Return: 0 if successful, -ENOMEM otherwise. |
---|
| 501 | + * |
---|
| 502 | + * Only one of the two input arguments should be non-NULL. |
---|
| 503 | + */ |
---|
| 504 | +static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp) |
---|
| 505 | +{ |
---|
| 506 | + cpumask_var_t *pmask1, *pmask2, *pmask3; |
---|
| 507 | + |
---|
| 508 | + if (cs) { |
---|
| 509 | + pmask1 = &cs->cpus_allowed; |
---|
| 510 | + pmask2 = &cs->effective_cpus; |
---|
| 511 | + pmask3 = &cs->subparts_cpus; |
---|
| 512 | + } else { |
---|
| 513 | + pmask1 = &tmp->new_cpus; |
---|
| 514 | + pmask2 = &tmp->addmask; |
---|
| 515 | + pmask3 = &tmp->delmask; |
---|
| 516 | + } |
---|
| 517 | + |
---|
| 518 | + if (!zalloc_cpumask_var(pmask1, GFP_KERNEL)) |
---|
| 519 | + return -ENOMEM; |
---|
| 520 | + |
---|
| 521 | + if (!zalloc_cpumask_var(pmask2, GFP_KERNEL)) |
---|
| 522 | + goto free_one; |
---|
| 523 | + |
---|
| 524 | + if (!zalloc_cpumask_var(pmask3, GFP_KERNEL)) |
---|
| 525 | + goto free_two; |
---|
| 526 | + |
---|
| 527 | + if (cs && !zalloc_cpumask_var(&cs->cpus_requested, GFP_KERNEL)) |
---|
| 528 | + goto free_three; |
---|
| 529 | + |
---|
| 530 | + return 0; |
---|
| 531 | + |
---|
| 532 | +free_three: |
---|
| 533 | + free_cpumask_var(*pmask3); |
---|
| 534 | +free_two: |
---|
| 535 | + free_cpumask_var(*pmask2); |
---|
| 536 | +free_one: |
---|
| 537 | + free_cpumask_var(*pmask1); |
---|
| 538 | + return -ENOMEM; |
---|
| 539 | +} |
---|
| 540 | + |
---|
| 541 | +/** |
---|
| 542 | + * free_cpumasks - free cpumasks in a tmpmasks structure |
---|
| 543 | + * @cs: the cpuset that have cpumasks to be free. |
---|
| 544 | + * @tmp: the tmpmasks structure pointer |
---|
| 545 | + */ |
---|
| 546 | +static inline void free_cpumasks(struct cpuset *cs, struct tmpmasks *tmp) |
---|
| 547 | +{ |
---|
| 548 | + if (cs) { |
---|
| 549 | + free_cpumask_var(cs->cpus_allowed); |
---|
| 550 | + free_cpumask_var(cs->cpus_requested); |
---|
| 551 | + free_cpumask_var(cs->effective_cpus); |
---|
| 552 | + free_cpumask_var(cs->subparts_cpus); |
---|
| 553 | + } |
---|
| 554 | + if (tmp) { |
---|
| 555 | + free_cpumask_var(tmp->new_cpus); |
---|
| 556 | + free_cpumask_var(tmp->addmask); |
---|
| 557 | + free_cpumask_var(tmp->delmask); |
---|
| 558 | + } |
---|
| 559 | +} |
---|
| 560 | + |
---|
| 561 | +/** |
---|
423 | 562 | * alloc_trial_cpuset - allocate a trial cpuset |
---|
424 | 563 | * @cs: the cpuset that the trial cpuset duplicates |
---|
425 | 564 | */ |
---|
.. | .. |
---|
431 | 570 | if (!trial) |
---|
432 | 571 | return NULL; |
---|
433 | 572 | |
---|
434 | | - if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) |
---|
435 | | - goto free_cs; |
---|
436 | | - if (!alloc_cpumask_var(&trial->cpus_requested, GFP_KERNEL)) |
---|
437 | | - goto free_allowed; |
---|
438 | | - if (!alloc_cpumask_var(&trial->effective_cpus, GFP_KERNEL)) |
---|
439 | | - goto free_cpus; |
---|
| 573 | + if (alloc_cpumasks(trial, NULL)) { |
---|
| 574 | + kfree(trial); |
---|
| 575 | + return NULL; |
---|
| 576 | + } |
---|
440 | 577 | |
---|
441 | 578 | cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); |
---|
442 | 579 | cpumask_copy(trial->cpus_requested, cs->cpus_requested); |
---|
443 | 580 | cpumask_copy(trial->effective_cpus, cs->effective_cpus); |
---|
444 | 581 | return trial; |
---|
445 | | - |
---|
446 | | -free_cpus: |
---|
447 | | - free_cpumask_var(trial->cpus_requested); |
---|
448 | | -free_allowed: |
---|
449 | | - free_cpumask_var(trial->cpus_allowed); |
---|
450 | | -free_cs: |
---|
451 | | - kfree(trial); |
---|
452 | | - return NULL; |
---|
453 | 582 | } |
---|
454 | 583 | |
---|
455 | 584 | /** |
---|
456 | | - * free_trial_cpuset - free the trial cpuset |
---|
457 | | - * @trial: the trial cpuset to be freed |
---|
| 585 | + * free_cpuset - free the cpuset |
---|
| 586 | + * @cs: the cpuset to be freed |
---|
458 | 587 | */ |
---|
459 | | -static void free_trial_cpuset(struct cpuset *trial) |
---|
| 588 | +static inline void free_cpuset(struct cpuset *cs) |
---|
460 | 589 | { |
---|
461 | | - free_cpumask_var(trial->effective_cpus); |
---|
462 | | - free_cpumask_var(trial->cpus_requested); |
---|
463 | | - free_cpumask_var(trial->cpus_allowed); |
---|
464 | | - kfree(trial); |
---|
| 590 | + free_cpumasks(cs, NULL); |
---|
| 591 | + kfree(cs); |
---|
465 | 592 | } |
---|
466 | 593 | |
---|
467 | 594 | /* |
---|
.. | .. |
---|
612 | 739 | * load balancing domains (sched domains) as specified by that partial |
---|
613 | 740 | * partition. |
---|
614 | 741 | * |
---|
615 | | - * See "What is sched_load_balance" in Documentation/cgroup-v1/cpusets.txt |
---|
| 742 | + * See "What is sched_load_balance" in Documentation/admin-guide/cgroup-v1/cpusets.rst |
---|
616 | 743 | * for a background explanation of this. |
---|
617 | 744 | * |
---|
618 | 745 | * Does not return errors, on the theory that the callers of this |
---|
.. | .. |
---|
623 | 750 | * Must be called with cpuset_mutex held. |
---|
624 | 751 | * |
---|
625 | 752 | * The three key local variables below are: |
---|
626 | | - * q - a linked-list queue of cpuset pointers, used to implement a |
---|
627 | | - * top-down scan of all cpusets. This scan loads a pointer |
---|
628 | | - * to each cpuset marked is_sched_load_balance into the |
---|
629 | | - * array 'csa'. For our purposes, rebuilding the schedulers |
---|
630 | | - * sched domains, we can ignore !is_sched_load_balance cpusets. |
---|
| 753 | + * cp - cpuset pointer, used (together with pos_css) to perform a |
---|
| 754 | + * top-down scan of all cpusets. For our purposes, rebuilding |
---|
| 755 | + * the schedulers sched domains, we can ignore !is_sched_load_ |
---|
| 756 | + * balance cpusets. |
---|
631 | 757 | * csa - (for CpuSet Array) Array of pointers to all the cpusets |
---|
632 | 758 | * that need to be load balanced, for convenient iterative |
---|
633 | 759 | * access by the subsequent code that finds the best partition, |
---|
.. | .. |
---|
658 | 784 | static int generate_sched_domains(cpumask_var_t **domains, |
---|
659 | 785 | struct sched_domain_attr **attributes) |
---|
660 | 786 | { |
---|
661 | | - struct cpuset *cp; /* scans q */ |
---|
| 787 | + struct cpuset *cp; /* top-down scan of cpusets */ |
---|
662 | 788 | struct cpuset **csa; /* array of all cpuset ptrs */ |
---|
663 | 789 | int csn; /* how many cpuset ptrs in csa so far */ |
---|
664 | 790 | int i, j, k; /* indices for partition finding loops */ |
---|
.. | .. |
---|
667 | 793 | int ndoms = 0; /* number of sched domains in result */ |
---|
668 | 794 | int nslot; /* next empty doms[] struct cpumask slot */ |
---|
669 | 795 | struct cgroup_subsys_state *pos_css; |
---|
| 796 | + bool root_load_balance = is_sched_load_balance(&top_cpuset); |
---|
670 | 797 | |
---|
671 | 798 | doms = NULL; |
---|
672 | 799 | dattr = NULL; |
---|
673 | 800 | csa = NULL; |
---|
674 | 801 | |
---|
675 | 802 | /* Special case for the 99% of systems with one, full, sched domain */ |
---|
676 | | - if (is_sched_load_balance(&top_cpuset)) { |
---|
| 803 | + if (root_load_balance && !top_cpuset.nr_subparts_cpus) { |
---|
677 | 804 | ndoms = 1; |
---|
678 | 805 | doms = alloc_sched_domains(ndoms); |
---|
679 | 806 | if (!doms) |
---|
.. | .. |
---|
696 | 823 | csn = 0; |
---|
697 | 824 | |
---|
698 | 825 | rcu_read_lock(); |
---|
| 826 | + if (root_load_balance) |
---|
| 827 | + csa[csn++] = &top_cpuset; |
---|
699 | 828 | cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) { |
---|
700 | 829 | if (cp == &top_cpuset) |
---|
701 | 830 | continue; |
---|
.. | .. |
---|
706 | 835 | * parent's cpus, so just skip them, and then we call |
---|
707 | 836 | * update_domain_attr_tree() to calc relax_domain_level of |
---|
708 | 837 | * the corresponding sched domain. |
---|
| 838 | + * |
---|
| 839 | + * If root is load-balancing, we can skip @cp if it |
---|
| 840 | + * is a subset of the root's effective_cpus. |
---|
709 | 841 | */ |
---|
710 | 842 | if (!cpumask_empty(cp->cpus_allowed) && |
---|
711 | 843 | !(is_sched_load_balance(cp) && |
---|
.. | .. |
---|
713 | 845 | housekeeping_cpumask(HK_FLAG_DOMAIN)))) |
---|
714 | 846 | continue; |
---|
715 | 847 | |
---|
716 | | - if (is_sched_load_balance(cp)) |
---|
| 848 | + if (root_load_balance && |
---|
| 849 | + cpumask_subset(cp->cpus_allowed, top_cpuset.effective_cpus)) |
---|
| 850 | + continue; |
---|
| 851 | + |
---|
| 852 | + if (is_sched_load_balance(cp) && |
---|
| 853 | + !cpumask_empty(cp->effective_cpus)) |
---|
717 | 854 | csa[csn++] = cp; |
---|
718 | 855 | |
---|
719 | | - /* skip @cp's subtree */ |
---|
720 | | - pos_css = css_rightmost_descendant(pos_css); |
---|
| 856 | + /* skip @cp's subtree if not a partition root */ |
---|
| 857 | + if (!is_partition_root(cp)) |
---|
| 858 | + pos_css = css_rightmost_descendant(pos_css); |
---|
721 | 859 | } |
---|
722 | 860 | rcu_read_unlock(); |
---|
723 | 861 | |
---|
.. | .. |
---|
820 | 958 | return ndoms; |
---|
821 | 959 | } |
---|
822 | 960 | |
---|
| 961 | +static void dl_update_tasks_root_domain(struct cpuset *cs) |
---|
| 962 | +{ |
---|
| 963 | + struct css_task_iter it; |
---|
| 964 | + struct task_struct *task; |
---|
| 965 | + |
---|
| 966 | + if (cs->nr_deadline_tasks == 0) |
---|
| 967 | + return; |
---|
| 968 | + |
---|
| 969 | + css_task_iter_start(&cs->css, 0, &it); |
---|
| 970 | + |
---|
| 971 | + while ((task = css_task_iter_next(&it))) |
---|
| 972 | + dl_add_task_root_domain(task); |
---|
| 973 | + |
---|
| 974 | + css_task_iter_end(&it); |
---|
| 975 | +} |
---|
| 976 | + |
---|
| 977 | +static void dl_rebuild_rd_accounting(void) |
---|
| 978 | +{ |
---|
| 979 | + struct cpuset *cs = NULL; |
---|
| 980 | + struct cgroup_subsys_state *pos_css; |
---|
| 981 | + |
---|
| 982 | + lockdep_assert_held(&cpuset_mutex); |
---|
| 983 | + lockdep_assert_cpus_held(); |
---|
| 984 | + lockdep_assert_held(&sched_domains_mutex); |
---|
| 985 | + |
---|
| 986 | + rcu_read_lock(); |
---|
| 987 | + |
---|
| 988 | + /* |
---|
| 989 | + * Clear default root domain DL accounting, it will be computed again |
---|
| 990 | + * if a task belongs to it. |
---|
| 991 | + */ |
---|
| 992 | + dl_clear_root_domain(&def_root_domain); |
---|
| 993 | + |
---|
| 994 | + cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) { |
---|
| 995 | + |
---|
| 996 | + if (cpumask_empty(cs->effective_cpus)) { |
---|
| 997 | + pos_css = css_rightmost_descendant(pos_css); |
---|
| 998 | + continue; |
---|
| 999 | + } |
---|
| 1000 | + |
---|
| 1001 | + css_get(&cs->css); |
---|
| 1002 | + |
---|
| 1003 | + rcu_read_unlock(); |
---|
| 1004 | + |
---|
| 1005 | + dl_update_tasks_root_domain(cs); |
---|
| 1006 | + |
---|
| 1007 | + rcu_read_lock(); |
---|
| 1008 | + css_put(&cs->css); |
---|
| 1009 | + } |
---|
| 1010 | + rcu_read_unlock(); |
---|
| 1011 | +} |
---|
| 1012 | + |
---|
| 1013 | +static void |
---|
| 1014 | +partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[], |
---|
| 1015 | + struct sched_domain_attr *dattr_new) |
---|
| 1016 | +{ |
---|
| 1017 | + mutex_lock(&sched_domains_mutex); |
---|
| 1018 | + partition_sched_domains_locked(ndoms_new, doms_new, dattr_new); |
---|
| 1019 | + dl_rebuild_rd_accounting(); |
---|
| 1020 | + mutex_unlock(&sched_domains_mutex); |
---|
| 1021 | +} |
---|
| 1022 | + |
---|
823 | 1023 | /* |
---|
824 | 1024 | * Rebuild scheduler domains. |
---|
825 | 1025 | * |
---|
.. | .. |
---|
833 | 1033 | */ |
---|
834 | 1034 | static void rebuild_sched_domains_locked(void) |
---|
835 | 1035 | { |
---|
| 1036 | + struct cgroup_subsys_state *pos_css; |
---|
836 | 1037 | struct sched_domain_attr *attr; |
---|
837 | 1038 | cpumask_var_t *doms; |
---|
| 1039 | + struct cpuset *cs; |
---|
838 | 1040 | int ndoms; |
---|
839 | 1041 | |
---|
840 | 1042 | lockdep_assert_held(&cpuset_mutex); |
---|
841 | | - get_online_cpus(); |
---|
842 | 1043 | |
---|
843 | 1044 | /* |
---|
844 | | - * We have raced with CPU hotplug. Don't do anything to avoid |
---|
| 1045 | + * If we have raced with CPU hotplug, return early to avoid |
---|
845 | 1046 | * passing doms with offlined cpu to partition_sched_domains(). |
---|
846 | | - * Anyways, hotplug work item will rebuild sched domains. |
---|
| 1047 | + * Anyways, cpuset_hotplug_workfn() will rebuild sched domains. |
---|
| 1048 | + * |
---|
| 1049 | + * With no CPUs in any subpartitions, top_cpuset's effective CPUs |
---|
| 1050 | + * should be the same as the active CPUs, so checking only top_cpuset |
---|
| 1051 | + * is enough to detect racing CPU offlines. |
---|
847 | 1052 | */ |
---|
848 | | - if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) |
---|
849 | | - goto out; |
---|
| 1053 | + if (!top_cpuset.nr_subparts_cpus && |
---|
| 1054 | + !cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) |
---|
| 1055 | + return; |
---|
| 1056 | + |
---|
| 1057 | + /* |
---|
| 1058 | + * With subpartition CPUs, however, the effective CPUs of a partition |
---|
| 1059 | + * root should be only a subset of the active CPUs. Since a CPU in any |
---|
| 1060 | + * partition root could be offlined, all must be checked. |
---|
| 1061 | + */ |
---|
| 1062 | + if (top_cpuset.nr_subparts_cpus) { |
---|
| 1063 | + rcu_read_lock(); |
---|
| 1064 | + cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) { |
---|
| 1065 | + if (!is_partition_root(cs)) { |
---|
| 1066 | + pos_css = css_rightmost_descendant(pos_css); |
---|
| 1067 | + continue; |
---|
| 1068 | + } |
---|
| 1069 | + if (!cpumask_subset(cs->effective_cpus, |
---|
| 1070 | + cpu_active_mask)) { |
---|
| 1071 | + rcu_read_unlock(); |
---|
| 1072 | + return; |
---|
| 1073 | + } |
---|
| 1074 | + } |
---|
| 1075 | + rcu_read_unlock(); |
---|
| 1076 | + } |
---|
850 | 1077 | |
---|
851 | 1078 | /* Generate domain masks and attrs */ |
---|
852 | 1079 | ndoms = generate_sched_domains(&doms, &attr); |
---|
853 | 1080 | |
---|
854 | 1081 | /* Have scheduler rebuild the domains */ |
---|
855 | | - partition_sched_domains(ndoms, doms, attr); |
---|
856 | | -out: |
---|
857 | | - put_online_cpus(); |
---|
| 1082 | + partition_and_rebuild_sched_domains(ndoms, doms, attr); |
---|
858 | 1083 | } |
---|
859 | 1084 | #else /* !CONFIG_SMP */ |
---|
860 | 1085 | static void rebuild_sched_domains_locked(void) |
---|
.. | .. |
---|
864 | 1089 | |
---|
865 | 1090 | void rebuild_sched_domains(void) |
---|
866 | 1091 | { |
---|
| 1092 | + get_online_cpus(); |
---|
867 | 1093 | mutex_lock(&cpuset_mutex); |
---|
868 | 1094 | rebuild_sched_domains_locked(); |
---|
869 | 1095 | mutex_unlock(&cpuset_mutex); |
---|
| 1096 | + put_online_cpus(); |
---|
| 1097 | +} |
---|
| 1098 | + |
---|
| 1099 | +static int update_cpus_allowed(struct cpuset *cs, struct task_struct *p, |
---|
| 1100 | + const struct cpumask *new_mask) |
---|
| 1101 | +{ |
---|
| 1102 | + int ret = -EINVAL; |
---|
| 1103 | + |
---|
| 1104 | + trace_android_rvh_update_cpus_allowed(p, cs->cpus_requested, new_mask, &ret); |
---|
| 1105 | + if (!ret) |
---|
| 1106 | + return ret; |
---|
| 1107 | + |
---|
| 1108 | + return set_cpus_allowed_ptr(p, new_mask); |
---|
870 | 1109 | } |
---|
871 | 1110 | |
---|
872 | 1111 | /** |
---|
.. | .. |
---|
881 | 1120 | { |
---|
882 | 1121 | struct css_task_iter it; |
---|
883 | 1122 | struct task_struct *task; |
---|
| 1123 | + bool top_cs = cs == &top_cpuset; |
---|
884 | 1124 | |
---|
885 | 1125 | css_task_iter_start(&cs->css, 0, &it); |
---|
886 | | - while ((task = css_task_iter_next(&it))) |
---|
887 | | - set_cpus_allowed_ptr(task, cs->effective_cpus); |
---|
| 1126 | + while ((task = css_task_iter_next(&it))) { |
---|
| 1127 | + /* |
---|
| 1128 | + * Percpu kthreads in top_cpuset are ignored |
---|
| 1129 | + */ |
---|
| 1130 | + if (top_cs && (task->flags & PF_KTHREAD) && |
---|
| 1131 | + kthread_is_per_cpu(task)) |
---|
| 1132 | + continue; |
---|
| 1133 | + update_cpus_allowed(cs, task, cs->effective_cpus); |
---|
| 1134 | + } |
---|
888 | 1135 | css_task_iter_end(&it); |
---|
| 1136 | +} |
---|
| 1137 | + |
---|
| 1138 | +/** |
---|
| 1139 | + * compute_effective_cpumask - Compute the effective cpumask of the cpuset |
---|
| 1140 | + * @new_cpus: the temp variable for the new effective_cpus mask |
---|
| 1141 | + * @cs: the cpuset the need to recompute the new effective_cpus mask |
---|
| 1142 | + * @parent: the parent cpuset |
---|
| 1143 | + * |
---|
| 1144 | + * If the parent has subpartition CPUs, include them in the list of |
---|
| 1145 | + * allowable CPUs in computing the new effective_cpus mask. Since offlined |
---|
| 1146 | + * CPUs are not removed from subparts_cpus, we have to use cpu_active_mask |
---|
| 1147 | + * to mask those out. |
---|
| 1148 | + */ |
---|
| 1149 | +static void compute_effective_cpumask(struct cpumask *new_cpus, |
---|
| 1150 | + struct cpuset *cs, struct cpuset *parent) |
---|
| 1151 | +{ |
---|
| 1152 | + if (parent->nr_subparts_cpus) { |
---|
| 1153 | + cpumask_or(new_cpus, parent->effective_cpus, |
---|
| 1154 | + parent->subparts_cpus); |
---|
| 1155 | + cpumask_and(new_cpus, new_cpus, cs->cpus_requested); |
---|
| 1156 | + cpumask_and(new_cpus, new_cpus, cpu_active_mask); |
---|
| 1157 | + } else { |
---|
| 1158 | + cpumask_and(new_cpus, cs->cpus_requested, parent_cs(cs)->effective_cpus); |
---|
| 1159 | + } |
---|
| 1160 | +} |
---|
| 1161 | + |
---|
| 1162 | +/* |
---|
| 1163 | + * Commands for update_parent_subparts_cpumask |
---|
| 1164 | + */ |
---|
| 1165 | +enum subparts_cmd { |
---|
| 1166 | + partcmd_enable, /* Enable partition root */ |
---|
| 1167 | + partcmd_disable, /* Disable partition root */ |
---|
| 1168 | + partcmd_update, /* Update parent's subparts_cpus */ |
---|
| 1169 | +}; |
---|
| 1170 | + |
---|
| 1171 | +/** |
---|
| 1172 | + * update_parent_subparts_cpumask - update subparts_cpus mask of parent cpuset |
---|
| 1173 | + * @cpuset: The cpuset that requests change in partition root state |
---|
| 1174 | + * @cmd: Partition root state change command |
---|
| 1175 | + * @newmask: Optional new cpumask for partcmd_update |
---|
| 1176 | + * @tmp: Temporary addmask and delmask |
---|
| 1177 | + * Return: 0, 1 or an error code |
---|
| 1178 | + * |
---|
| 1179 | + * For partcmd_enable, the cpuset is being transformed from a non-partition |
---|
| 1180 | + * root to a partition root. The cpus_allowed mask of the given cpuset will |
---|
| 1181 | + * be put into parent's subparts_cpus and taken away from parent's |
---|
| 1182 | + * effective_cpus. The function will return 0 if all the CPUs listed in |
---|
| 1183 | + * cpus_allowed can be granted or an error code will be returned. |
---|
| 1184 | + * |
---|
| 1185 | + * For partcmd_disable, the cpuset is being transofrmed from a partition |
---|
| 1186 | + * root back to a non-partition root. Any CPUs in cpus_allowed that are in |
---|
| 1187 | + * parent's subparts_cpus will be taken away from that cpumask and put back |
---|
| 1188 | + * into parent's effective_cpus. 0 should always be returned. |
---|
| 1189 | + * |
---|
| 1190 | + * For partcmd_update, if the optional newmask is specified, the cpu |
---|
| 1191 | + * list is to be changed from cpus_allowed to newmask. Otherwise, |
---|
| 1192 | + * cpus_allowed is assumed to remain the same. The cpuset should either |
---|
| 1193 | + * be a partition root or an invalid partition root. The partition root |
---|
| 1194 | + * state may change if newmask is NULL and none of the requested CPUs can |
---|
| 1195 | + * be granted by the parent. The function will return 1 if changes to |
---|
| 1196 | + * parent's subparts_cpus and effective_cpus happen or 0 otherwise. |
---|
| 1197 | + * Error code should only be returned when newmask is non-NULL. |
---|
| 1198 | + * |
---|
| 1199 | + * The partcmd_enable and partcmd_disable commands are used by |
---|
| 1200 | + * update_prstate(). The partcmd_update command is used by |
---|
| 1201 | + * update_cpumasks_hier() with newmask NULL and update_cpumask() with |
---|
| 1202 | + * newmask set. |
---|
| 1203 | + * |
---|
| 1204 | + * The checking is more strict when enabling partition root than the |
---|
| 1205 | + * other two commands. |
---|
| 1206 | + * |
---|
| 1207 | + * Because of the implicit cpu exclusive nature of a partition root, |
---|
| 1208 | + * cpumask changes that violates the cpu exclusivity rule will not be |
---|
| 1209 | + * permitted when checked by validate_change(). The validate_change() |
---|
| 1210 | + * function will also prevent any changes to the cpu list if it is not |
---|
| 1211 | + * a superset of children's cpu lists. |
---|
| 1212 | + */ |
---|
| 1213 | +static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, |
---|
| 1214 | + struct cpumask *newmask, |
---|
| 1215 | + struct tmpmasks *tmp) |
---|
| 1216 | +{ |
---|
| 1217 | + struct cpuset *parent = parent_cs(cpuset); |
---|
| 1218 | + int adding; /* Moving cpus from effective_cpus to subparts_cpus */ |
---|
| 1219 | + int deleting; /* Moving cpus from subparts_cpus to effective_cpus */ |
---|
| 1220 | + int new_prs; |
---|
| 1221 | + bool part_error = false; /* Partition error? */ |
---|
| 1222 | + |
---|
| 1223 | + lockdep_assert_held(&cpuset_mutex); |
---|
| 1224 | + |
---|
| 1225 | + /* |
---|
| 1226 | + * The parent must be a partition root. |
---|
| 1227 | + * The new cpumask, if present, or the current cpus_allowed must |
---|
| 1228 | + * not be empty. |
---|
| 1229 | + */ |
---|
| 1230 | + if (!is_partition_root(parent) || |
---|
| 1231 | + (newmask && cpumask_empty(newmask)) || |
---|
| 1232 | + (!newmask && cpumask_empty(cpuset->cpus_allowed))) |
---|
| 1233 | + return -EINVAL; |
---|
| 1234 | + |
---|
| 1235 | + /* |
---|
| 1236 | + * Enabling/disabling partition root is not allowed if there are |
---|
| 1237 | + * online children. |
---|
| 1238 | + */ |
---|
| 1239 | + if ((cmd != partcmd_update) && css_has_online_children(&cpuset->css)) |
---|
| 1240 | + return -EBUSY; |
---|
| 1241 | + |
---|
| 1242 | + /* |
---|
| 1243 | + * Enabling partition root is not allowed if not all the CPUs |
---|
| 1244 | + * can be granted from parent's effective_cpus or at least one |
---|
| 1245 | + * CPU will be left after that. |
---|
| 1246 | + */ |
---|
| 1247 | + if ((cmd == partcmd_enable) && |
---|
| 1248 | + (!cpumask_subset(cpuset->cpus_allowed, parent->effective_cpus) || |
---|
| 1249 | + cpumask_equal(cpuset->cpus_allowed, parent->effective_cpus))) |
---|
| 1250 | + return -EINVAL; |
---|
| 1251 | + |
---|
| 1252 | + /* |
---|
| 1253 | + * A cpumask update cannot make parent's effective_cpus become empty. |
---|
| 1254 | + */ |
---|
| 1255 | + adding = deleting = false; |
---|
| 1256 | + new_prs = cpuset->partition_root_state; |
---|
| 1257 | + if (cmd == partcmd_enable) { |
---|
| 1258 | + cpumask_copy(tmp->addmask, cpuset->cpus_allowed); |
---|
| 1259 | + adding = true; |
---|
| 1260 | + } else if (cmd == partcmd_disable) { |
---|
| 1261 | + deleting = cpumask_and(tmp->delmask, cpuset->cpus_allowed, |
---|
| 1262 | + parent->subparts_cpus); |
---|
| 1263 | + } else if (newmask) { |
---|
| 1264 | + /* |
---|
| 1265 | + * partcmd_update with newmask: |
---|
| 1266 | + * |
---|
| 1267 | + * delmask = cpus_allowed & ~newmask & parent->subparts_cpus |
---|
| 1268 | + * addmask = newmask & parent->effective_cpus |
---|
| 1269 | + * & ~parent->subparts_cpus |
---|
| 1270 | + */ |
---|
| 1271 | + cpumask_andnot(tmp->delmask, cpuset->cpus_allowed, newmask); |
---|
| 1272 | + deleting = cpumask_and(tmp->delmask, tmp->delmask, |
---|
| 1273 | + parent->subparts_cpus); |
---|
| 1274 | + |
---|
| 1275 | + cpumask_and(tmp->addmask, newmask, parent->effective_cpus); |
---|
| 1276 | + adding = cpumask_andnot(tmp->addmask, tmp->addmask, |
---|
| 1277 | + parent->subparts_cpus); |
---|
| 1278 | + /* |
---|
| 1279 | + * Return error if the new effective_cpus could become empty. |
---|
| 1280 | + */ |
---|
| 1281 | + if (adding && |
---|
| 1282 | + cpumask_equal(parent->effective_cpus, tmp->addmask)) { |
---|
| 1283 | + if (!deleting) |
---|
| 1284 | + return -EINVAL; |
---|
| 1285 | + /* |
---|
| 1286 | + * As some of the CPUs in subparts_cpus might have |
---|
| 1287 | + * been offlined, we need to compute the real delmask |
---|
| 1288 | + * to confirm that. |
---|
| 1289 | + */ |
---|
| 1290 | + if (!cpumask_and(tmp->addmask, tmp->delmask, |
---|
| 1291 | + cpu_active_mask)) |
---|
| 1292 | + return -EINVAL; |
---|
| 1293 | + cpumask_copy(tmp->addmask, parent->effective_cpus); |
---|
| 1294 | + } |
---|
| 1295 | + } else { |
---|
| 1296 | + /* |
---|
| 1297 | + * partcmd_update w/o newmask: |
---|
| 1298 | + * |
---|
| 1299 | + * addmask = cpus_allowed & parent->effective_cpus |
---|
| 1300 | + * |
---|
| 1301 | + * Note that parent's subparts_cpus may have been |
---|
| 1302 | + * pre-shrunk in case there is a change in the cpu list. |
---|
| 1303 | + * So no deletion is needed. |
---|
| 1304 | + */ |
---|
| 1305 | + adding = cpumask_and(tmp->addmask, cpuset->cpus_allowed, |
---|
| 1306 | + parent->effective_cpus); |
---|
| 1307 | + part_error = cpumask_equal(tmp->addmask, |
---|
| 1308 | + parent->effective_cpus); |
---|
| 1309 | + } |
---|
| 1310 | + |
---|
| 1311 | + if (cmd == partcmd_update) { |
---|
| 1312 | + int prev_prs = cpuset->partition_root_state; |
---|
| 1313 | + |
---|
| 1314 | + /* |
---|
| 1315 | + * Check for possible transition between PRS_ENABLED |
---|
| 1316 | + * and PRS_ERROR. |
---|
| 1317 | + */ |
---|
| 1318 | + switch (cpuset->partition_root_state) { |
---|
| 1319 | + case PRS_ENABLED: |
---|
| 1320 | + if (part_error) |
---|
| 1321 | + new_prs = PRS_ERROR; |
---|
| 1322 | + break; |
---|
| 1323 | + case PRS_ERROR: |
---|
| 1324 | + if (!part_error) |
---|
| 1325 | + new_prs = PRS_ENABLED; |
---|
| 1326 | + break; |
---|
| 1327 | + } |
---|
| 1328 | + /* |
---|
| 1329 | + * Set part_error if previously in invalid state. |
---|
| 1330 | + */ |
---|
| 1331 | + part_error = (prev_prs == PRS_ERROR); |
---|
| 1332 | + } |
---|
| 1333 | + |
---|
| 1334 | + if (!part_error && (new_prs == PRS_ERROR)) |
---|
| 1335 | + return 0; /* Nothing need to be done */ |
---|
| 1336 | + |
---|
| 1337 | + if (new_prs == PRS_ERROR) { |
---|
| 1338 | + /* |
---|
| 1339 | + * Remove all its cpus from parent's subparts_cpus. |
---|
| 1340 | + */ |
---|
| 1341 | + adding = false; |
---|
| 1342 | + deleting = cpumask_and(tmp->delmask, cpuset->cpus_allowed, |
---|
| 1343 | + parent->subparts_cpus); |
---|
| 1344 | + } |
---|
| 1345 | + |
---|
| 1346 | + if (!adding && !deleting && (new_prs == cpuset->partition_root_state)) |
---|
| 1347 | + return 0; |
---|
| 1348 | + |
---|
| 1349 | + /* |
---|
| 1350 | + * Change the parent's subparts_cpus. |
---|
| 1351 | + * Newly added CPUs will be removed from effective_cpus and |
---|
| 1352 | + * newly deleted ones will be added back to effective_cpus. |
---|
| 1353 | + */ |
---|
| 1354 | + spin_lock_irq(&callback_lock); |
---|
| 1355 | + if (adding) { |
---|
| 1356 | + cpumask_or(parent->subparts_cpus, |
---|
| 1357 | + parent->subparts_cpus, tmp->addmask); |
---|
| 1358 | + cpumask_andnot(parent->effective_cpus, |
---|
| 1359 | + parent->effective_cpus, tmp->addmask); |
---|
| 1360 | + } |
---|
| 1361 | + if (deleting) { |
---|
| 1362 | + cpumask_andnot(parent->subparts_cpus, |
---|
| 1363 | + parent->subparts_cpus, tmp->delmask); |
---|
| 1364 | + /* |
---|
| 1365 | + * Some of the CPUs in subparts_cpus might have been offlined. |
---|
| 1366 | + */ |
---|
| 1367 | + cpumask_and(tmp->delmask, tmp->delmask, cpu_active_mask); |
---|
| 1368 | + cpumask_or(parent->effective_cpus, |
---|
| 1369 | + parent->effective_cpus, tmp->delmask); |
---|
| 1370 | + } |
---|
| 1371 | + |
---|
| 1372 | + parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus); |
---|
| 1373 | + |
---|
| 1374 | + if (cpuset->partition_root_state != new_prs) |
---|
| 1375 | + cpuset->partition_root_state = new_prs; |
---|
| 1376 | + spin_unlock_irq(&callback_lock); |
---|
| 1377 | + |
---|
| 1378 | + return cmd == partcmd_update; |
---|
889 | 1379 | } |
---|
890 | 1380 | |
---|
891 | 1381 | /* |
---|
892 | 1382 | * update_cpumasks_hier - Update effective cpumasks and tasks in the subtree |
---|
893 | | - * @cs: the cpuset to consider |
---|
894 | | - * @new_cpus: temp variable for calculating new effective_cpus |
---|
| 1383 | + * @cs: the cpuset to consider |
---|
| 1384 | + * @tmp: temp variables for calculating effective_cpus & partition setup |
---|
895 | 1385 | * |
---|
896 | 1386 | * When congifured cpumask is changed, the effective cpumasks of this cpuset |
---|
897 | 1387 | * and all its descendants need to be updated. |
---|
.. | .. |
---|
900 | 1390 | * |
---|
901 | 1391 | * Called with cpuset_mutex held |
---|
902 | 1392 | */ |
---|
903 | | -static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) |
---|
| 1393 | +static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) |
---|
904 | 1394 | { |
---|
905 | 1395 | struct cpuset *cp; |
---|
906 | 1396 | struct cgroup_subsys_state *pos_css; |
---|
907 | 1397 | bool need_rebuild_sched_domains = false; |
---|
| 1398 | + int new_prs; |
---|
908 | 1399 | |
---|
909 | 1400 | rcu_read_lock(); |
---|
910 | 1401 | cpuset_for_each_descendant_pre(cp, pos_css, cs) { |
---|
911 | 1402 | struct cpuset *parent = parent_cs(cp); |
---|
912 | 1403 | |
---|
913 | | - cpumask_and(new_cpus, cp->cpus_allowed, parent->effective_cpus); |
---|
| 1404 | + compute_effective_cpumask(tmp->new_cpus, cp, parent); |
---|
914 | 1405 | |
---|
915 | 1406 | /* |
---|
916 | 1407 | * If it becomes empty, inherit the effective mask of the |
---|
917 | 1408 | * parent, which is guaranteed to have some CPUs. |
---|
918 | 1409 | */ |
---|
919 | | - if (is_in_v2_mode() && cpumask_empty(new_cpus)) |
---|
920 | | - cpumask_copy(new_cpus, parent->effective_cpus); |
---|
| 1410 | + if (is_in_v2_mode() && cpumask_empty(tmp->new_cpus)) { |
---|
| 1411 | + cpumask_copy(tmp->new_cpus, parent->effective_cpus); |
---|
| 1412 | + if (!cp->use_parent_ecpus) { |
---|
| 1413 | + cp->use_parent_ecpus = true; |
---|
| 1414 | + parent->child_ecpus_count++; |
---|
| 1415 | + } |
---|
| 1416 | + } else if (cp->use_parent_ecpus) { |
---|
| 1417 | + cp->use_parent_ecpus = false; |
---|
| 1418 | + WARN_ON_ONCE(!parent->child_ecpus_count); |
---|
| 1419 | + parent->child_ecpus_count--; |
---|
| 1420 | + } |
---|
921 | 1421 | |
---|
922 | | - /* Skip the whole subtree if the cpumask remains the same. */ |
---|
923 | | - if (cpumask_equal(new_cpus, cp->effective_cpus)) { |
---|
| 1422 | + /* |
---|
| 1423 | + * Skip the whole subtree if the cpumask remains the same |
---|
| 1424 | + * and has no partition root state. |
---|
| 1425 | + */ |
---|
| 1426 | + if (!cp->partition_root_state && |
---|
| 1427 | + cpumask_equal(tmp->new_cpus, cp->effective_cpus)) { |
---|
924 | 1428 | pos_css = css_rightmost_descendant(pos_css); |
---|
925 | 1429 | continue; |
---|
| 1430 | + } |
---|
| 1431 | + |
---|
| 1432 | + /* |
---|
| 1433 | + * update_parent_subparts_cpumask() should have been called |
---|
| 1434 | + * for cs already in update_cpumask(). We should also call |
---|
| 1435 | + * update_tasks_cpumask() again for tasks in the parent |
---|
| 1436 | + * cpuset if the parent's subparts_cpus changes. |
---|
| 1437 | + */ |
---|
| 1438 | + new_prs = cp->partition_root_state; |
---|
| 1439 | + if ((cp != cs) && new_prs) { |
---|
| 1440 | + switch (parent->partition_root_state) { |
---|
| 1441 | + case PRS_DISABLED: |
---|
| 1442 | + /* |
---|
| 1443 | + * If parent is not a partition root or an |
---|
| 1444 | + * invalid partition root, clear its state |
---|
| 1445 | + * and its CS_CPU_EXCLUSIVE flag. |
---|
| 1446 | + */ |
---|
| 1447 | + WARN_ON_ONCE(cp->partition_root_state |
---|
| 1448 | + != PRS_ERROR); |
---|
| 1449 | + new_prs = PRS_DISABLED; |
---|
| 1450 | + |
---|
| 1451 | + /* |
---|
| 1452 | + * clear_bit() is an atomic operation and |
---|
| 1453 | + * readers aren't interested in the state |
---|
| 1454 | + * of CS_CPU_EXCLUSIVE anyway. So we can |
---|
| 1455 | + * just update the flag without holding |
---|
| 1456 | + * the callback_lock. |
---|
| 1457 | + */ |
---|
| 1458 | + clear_bit(CS_CPU_EXCLUSIVE, &cp->flags); |
---|
| 1459 | + break; |
---|
| 1460 | + |
---|
| 1461 | + case PRS_ENABLED: |
---|
| 1462 | + if (update_parent_subparts_cpumask(cp, partcmd_update, NULL, tmp)) |
---|
| 1463 | + update_tasks_cpumask(parent); |
---|
| 1464 | + break; |
---|
| 1465 | + |
---|
| 1466 | + case PRS_ERROR: |
---|
| 1467 | + /* |
---|
| 1468 | + * When parent is invalid, it has to be too. |
---|
| 1469 | + */ |
---|
| 1470 | + new_prs = PRS_ERROR; |
---|
| 1471 | + break; |
---|
| 1472 | + } |
---|
926 | 1473 | } |
---|
927 | 1474 | |
---|
928 | 1475 | if (!css_tryget_online(&cp->css)) |
---|
929 | 1476 | continue; |
---|
930 | 1477 | rcu_read_unlock(); |
---|
931 | 1478 | |
---|
932 | | - raw_spin_lock_irq(&callback_lock); |
---|
933 | | - cpumask_copy(cp->effective_cpus, new_cpus); |
---|
934 | | - raw_spin_unlock_irq(&callback_lock); |
---|
| 1479 | + spin_lock_irq(&callback_lock); |
---|
| 1480 | + |
---|
| 1481 | + cpumask_copy(cp->effective_cpus, tmp->new_cpus); |
---|
| 1482 | + if (cp->nr_subparts_cpus && (new_prs != PRS_ENABLED)) { |
---|
| 1483 | + cp->nr_subparts_cpus = 0; |
---|
| 1484 | + cpumask_clear(cp->subparts_cpus); |
---|
| 1485 | + } else if (cp->nr_subparts_cpus) { |
---|
| 1486 | + /* |
---|
| 1487 | + * Make sure that effective_cpus & subparts_cpus |
---|
| 1488 | + * are mutually exclusive. |
---|
| 1489 | + * |
---|
| 1490 | + * In the unlikely event that effective_cpus |
---|
| 1491 | + * becomes empty. we clear cp->nr_subparts_cpus and |
---|
| 1492 | + * let its child partition roots to compete for |
---|
| 1493 | + * CPUs again. |
---|
| 1494 | + */ |
---|
| 1495 | + cpumask_andnot(cp->effective_cpus, cp->effective_cpus, |
---|
| 1496 | + cp->subparts_cpus); |
---|
| 1497 | + if (cpumask_empty(cp->effective_cpus)) { |
---|
| 1498 | + cpumask_copy(cp->effective_cpus, tmp->new_cpus); |
---|
| 1499 | + cpumask_clear(cp->subparts_cpus); |
---|
| 1500 | + cp->nr_subparts_cpus = 0; |
---|
| 1501 | + } else if (!cpumask_subset(cp->subparts_cpus, |
---|
| 1502 | + tmp->new_cpus)) { |
---|
| 1503 | + cpumask_andnot(cp->subparts_cpus, |
---|
| 1504 | + cp->subparts_cpus, tmp->new_cpus); |
---|
| 1505 | + cp->nr_subparts_cpus |
---|
| 1506 | + = cpumask_weight(cp->subparts_cpus); |
---|
| 1507 | + } |
---|
| 1508 | + } |
---|
| 1509 | + |
---|
| 1510 | + if (new_prs != cp->partition_root_state) |
---|
| 1511 | + cp->partition_root_state = new_prs; |
---|
| 1512 | + |
---|
| 1513 | + spin_unlock_irq(&callback_lock); |
---|
935 | 1514 | |
---|
936 | 1515 | WARN_ON(!is_in_v2_mode() && |
---|
937 | 1516 | !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); |
---|
.. | .. |
---|
939 | 1518 | update_tasks_cpumask(cp); |
---|
940 | 1519 | |
---|
941 | 1520 | /* |
---|
942 | | - * If the effective cpumask of any non-empty cpuset is changed, |
---|
943 | | - * we need to rebuild sched domains. |
---|
| 1521 | + * On legacy hierarchy, if the effective cpumask of any non- |
---|
| 1522 | + * empty cpuset is changed, we need to rebuild sched domains. |
---|
| 1523 | + * On default hierarchy, the cpuset needs to be a partition |
---|
| 1524 | + * root as well. |
---|
944 | 1525 | */ |
---|
945 | 1526 | if (!cpumask_empty(cp->cpus_allowed) && |
---|
946 | | - is_sched_load_balance(cp)) |
---|
| 1527 | + is_sched_load_balance(cp) && |
---|
| 1528 | + (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) || |
---|
| 1529 | + is_partition_root(cp))) |
---|
947 | 1530 | need_rebuild_sched_domains = true; |
---|
948 | 1531 | |
---|
949 | 1532 | rcu_read_lock(); |
---|
.. | .. |
---|
956 | 1539 | } |
---|
957 | 1540 | |
---|
958 | 1541 | /** |
---|
| 1542 | + * update_sibling_cpumasks - Update siblings cpumasks |
---|
| 1543 | + * @parent: Parent cpuset |
---|
| 1544 | + * @cs: Current cpuset |
---|
| 1545 | + * @tmp: Temp variables |
---|
| 1546 | + */ |
---|
| 1547 | +static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, |
---|
| 1548 | + struct tmpmasks *tmp) |
---|
| 1549 | +{ |
---|
| 1550 | + struct cpuset *sibling; |
---|
| 1551 | + struct cgroup_subsys_state *pos_css; |
---|
| 1552 | + |
---|
| 1553 | + lockdep_assert_held(&cpuset_mutex); |
---|
| 1554 | + |
---|
| 1555 | + /* |
---|
| 1556 | + * Check all its siblings and call update_cpumasks_hier() |
---|
| 1557 | + * if their use_parent_ecpus flag is set in order for them |
---|
| 1558 | + * to use the right effective_cpus value. |
---|
| 1559 | + * |
---|
| 1560 | + * The update_cpumasks_hier() function may sleep. So we have to |
---|
| 1561 | + * release the RCU read lock before calling it. |
---|
| 1562 | + */ |
---|
| 1563 | + rcu_read_lock(); |
---|
| 1564 | + cpuset_for_each_child(sibling, pos_css, parent) { |
---|
| 1565 | + if (sibling == cs) |
---|
| 1566 | + continue; |
---|
| 1567 | + if (!sibling->use_parent_ecpus) |
---|
| 1568 | + continue; |
---|
| 1569 | + if (!css_tryget_online(&sibling->css)) |
---|
| 1570 | + continue; |
---|
| 1571 | + |
---|
| 1572 | + rcu_read_unlock(); |
---|
| 1573 | + update_cpumasks_hier(sibling, tmp); |
---|
| 1574 | + rcu_read_lock(); |
---|
| 1575 | + css_put(&sibling->css); |
---|
| 1576 | + } |
---|
| 1577 | + rcu_read_unlock(); |
---|
| 1578 | +} |
---|
| 1579 | + |
---|
| 1580 | +/** |
---|
959 | 1581 | * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it |
---|
960 | 1582 | * @cs: the cpuset to consider |
---|
961 | 1583 | * @trialcs: trial cpuset |
---|
.. | .. |
---|
965 | 1587 | const char *buf) |
---|
966 | 1588 | { |
---|
967 | 1589 | int retval; |
---|
| 1590 | + struct tmpmasks tmp; |
---|
968 | 1591 | |
---|
969 | 1592 | /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */ |
---|
970 | 1593 | if (cs == &top_cpuset) |
---|
.. | .. |
---|
997 | 1620 | if (retval < 0) |
---|
998 | 1621 | return retval; |
---|
999 | 1622 | |
---|
1000 | | - raw_spin_lock_irq(&callback_lock); |
---|
| 1623 | +#ifdef CONFIG_CPUMASK_OFFSTACK |
---|
| 1624 | + /* |
---|
| 1625 | + * Use the cpumasks in trialcs for tmpmasks when they are pointers |
---|
| 1626 | + * to allocated cpumasks. |
---|
| 1627 | + */ |
---|
| 1628 | + tmp.addmask = trialcs->subparts_cpus; |
---|
| 1629 | + tmp.delmask = trialcs->effective_cpus; |
---|
| 1630 | + tmp.new_cpus = trialcs->cpus_allowed; |
---|
| 1631 | +#endif |
---|
| 1632 | + |
---|
| 1633 | + if (cs->partition_root_state) { |
---|
| 1634 | + /* Cpumask of a partition root cannot be empty */ |
---|
| 1635 | + if (cpumask_empty(trialcs->cpus_allowed)) |
---|
| 1636 | + return -EINVAL; |
---|
| 1637 | + if (update_parent_subparts_cpumask(cs, partcmd_update, |
---|
| 1638 | + trialcs->cpus_allowed, &tmp) < 0) |
---|
| 1639 | + return -EINVAL; |
---|
| 1640 | + } |
---|
| 1641 | + |
---|
| 1642 | + spin_lock_irq(&callback_lock); |
---|
1001 | 1643 | cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); |
---|
1002 | 1644 | cpumask_copy(cs->cpus_requested, trialcs->cpus_requested); |
---|
1003 | | - raw_spin_unlock_irq(&callback_lock); |
---|
1004 | 1645 | |
---|
1005 | | - /* use trialcs->cpus_allowed as a temp variable */ |
---|
1006 | | - update_cpumasks_hier(cs, trialcs->cpus_allowed); |
---|
| 1646 | + /* |
---|
| 1647 | + * Make sure that subparts_cpus is a subset of cpus_allowed. |
---|
| 1648 | + */ |
---|
| 1649 | + if (cs->nr_subparts_cpus) { |
---|
| 1650 | + cpumask_and(cs->subparts_cpus, cs->subparts_cpus, cs->cpus_allowed); |
---|
| 1651 | + cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus); |
---|
| 1652 | + } |
---|
| 1653 | + spin_unlock_irq(&callback_lock); |
---|
| 1654 | + |
---|
| 1655 | + update_cpumasks_hier(cs, &tmp); |
---|
| 1656 | + |
---|
| 1657 | + if (cs->partition_root_state) { |
---|
| 1658 | + struct cpuset *parent = parent_cs(cs); |
---|
| 1659 | + |
---|
| 1660 | + /* |
---|
| 1661 | + * For partition root, update the cpumasks of sibling |
---|
| 1662 | + * cpusets if they use parent's effective_cpus. |
---|
| 1663 | + */ |
---|
| 1664 | + if (parent->child_ecpus_count) |
---|
| 1665 | + update_sibling_cpumasks(parent, cs, &tmp); |
---|
| 1666 | + } |
---|
1007 | 1667 | return 0; |
---|
1008 | 1668 | } |
---|
1009 | 1669 | |
---|
.. | .. |
---|
1104 | 1764 | guarantee_online_mems(cs, &newmems); |
---|
1105 | 1765 | |
---|
1106 | 1766 | /* |
---|
1107 | | - * The mpol_rebind_mm() call takes mmap_sem, which we couldn't |
---|
| 1767 | + * The mpol_rebind_mm() call takes mmap_lock, which we couldn't |
---|
1108 | 1768 | * take while holding tasklist_lock. Forks can happen - the |
---|
1109 | 1769 | * mpol_dup() cpuset_being_rebound check will catch such forks, |
---|
1110 | 1770 | * and rebind their vma mempolicies too. Because we still hold |
---|
.. | .. |
---|
1184 | 1844 | continue; |
---|
1185 | 1845 | rcu_read_unlock(); |
---|
1186 | 1846 | |
---|
1187 | | - raw_spin_lock_irq(&callback_lock); |
---|
| 1847 | + spin_lock_irq(&callback_lock); |
---|
1188 | 1848 | cp->effective_mems = *new_mems; |
---|
1189 | | - raw_spin_unlock_irq(&callback_lock); |
---|
| 1849 | + spin_unlock_irq(&callback_lock); |
---|
1190 | 1850 | |
---|
1191 | 1851 | WARN_ON(!is_in_v2_mode() && |
---|
1192 | 1852 | !nodes_equal(cp->mems_allowed, cp->effective_mems)); |
---|
.. | .. |
---|
1209 | 1869 | * |
---|
1210 | 1870 | * Call with cpuset_mutex held. May take callback_lock during call. |
---|
1211 | 1871 | * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, |
---|
1212 | | - * lock each such tasks mm->mmap_sem, scan its vma's and rebind |
---|
| 1872 | + * lock each such tasks mm->mmap_lock, scan its vma's and rebind |
---|
1213 | 1873 | * their mempolicies to the cpusets new mems_allowed. |
---|
1214 | 1874 | */ |
---|
1215 | 1875 | static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, |
---|
.. | .. |
---|
1254 | 1914 | if (retval < 0) |
---|
1255 | 1915 | goto done; |
---|
1256 | 1916 | |
---|
1257 | | - raw_spin_lock_irq(&callback_lock); |
---|
| 1917 | + spin_lock_irq(&callback_lock); |
---|
1258 | 1918 | cs->mems_allowed = trialcs->mems_allowed; |
---|
1259 | | - raw_spin_unlock_irq(&callback_lock); |
---|
| 1919 | + spin_unlock_irq(&callback_lock); |
---|
1260 | 1920 | |
---|
1261 | 1921 | /* use trialcs->mems_allowed as a temp variable */ |
---|
1262 | 1922 | update_nodemasks_hier(cs, &trialcs->mems_allowed); |
---|
.. | .. |
---|
1347 | 2007 | spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) |
---|
1348 | 2008 | || (is_spread_page(cs) != is_spread_page(trialcs))); |
---|
1349 | 2009 | |
---|
1350 | | - raw_spin_lock_irq(&callback_lock); |
---|
| 2010 | + spin_lock_irq(&callback_lock); |
---|
1351 | 2011 | cs->flags = trialcs->flags; |
---|
1352 | | - raw_spin_unlock_irq(&callback_lock); |
---|
| 2012 | + spin_unlock_irq(&callback_lock); |
---|
1353 | 2013 | |
---|
1354 | 2014 | if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) |
---|
1355 | 2015 | rebuild_sched_domains_locked(); |
---|
.. | .. |
---|
1357 | 2017 | if (spread_flag_changed) |
---|
1358 | 2018 | update_tasks_flags(cs); |
---|
1359 | 2019 | out: |
---|
1360 | | - free_trial_cpuset(trialcs); |
---|
| 2020 | + free_cpuset(trialcs); |
---|
| 2021 | + return err; |
---|
| 2022 | +} |
---|
| 2023 | + |
---|
| 2024 | +/* |
---|
| 2025 | + * update_prstate - update partititon_root_state |
---|
| 2026 | + * cs: the cpuset to update |
---|
| 2027 | + * new_prs: new partition root state |
---|
| 2028 | + * |
---|
| 2029 | + * Call with cpuset_mutex held. |
---|
| 2030 | + */ |
---|
| 2031 | +static int update_prstate(struct cpuset *cs, int new_prs) |
---|
| 2032 | +{ |
---|
| 2033 | + int err, old_prs = cs->partition_root_state; |
---|
| 2034 | + struct cpuset *parent = parent_cs(cs); |
---|
| 2035 | + struct tmpmasks tmpmask; |
---|
| 2036 | + |
---|
| 2037 | + if (old_prs == new_prs) |
---|
| 2038 | + return 0; |
---|
| 2039 | + |
---|
| 2040 | + /* |
---|
| 2041 | + * Cannot force a partial or invalid partition root to a full |
---|
| 2042 | + * partition root. |
---|
| 2043 | + */ |
---|
| 2044 | + if (new_prs && (old_prs == PRS_ERROR)) |
---|
| 2045 | + return -EINVAL; |
---|
| 2046 | + |
---|
| 2047 | + if (alloc_cpumasks(NULL, &tmpmask)) |
---|
| 2048 | + return -ENOMEM; |
---|
| 2049 | + |
---|
| 2050 | + err = -EINVAL; |
---|
| 2051 | + if (!old_prs) { |
---|
| 2052 | + /* |
---|
| 2053 | + * Turning on partition root requires setting the |
---|
| 2054 | + * CS_CPU_EXCLUSIVE bit implicitly as well and cpus_allowed |
---|
| 2055 | + * cannot be NULL. |
---|
| 2056 | + */ |
---|
| 2057 | + if (cpumask_empty(cs->cpus_allowed)) |
---|
| 2058 | + goto out; |
---|
| 2059 | + |
---|
| 2060 | + err = update_flag(CS_CPU_EXCLUSIVE, cs, 1); |
---|
| 2061 | + if (err) |
---|
| 2062 | + goto out; |
---|
| 2063 | + |
---|
| 2064 | + err = update_parent_subparts_cpumask(cs, partcmd_enable, |
---|
| 2065 | + NULL, &tmpmask); |
---|
| 2066 | + if (err) { |
---|
| 2067 | + update_flag(CS_CPU_EXCLUSIVE, cs, 0); |
---|
| 2068 | + goto out; |
---|
| 2069 | + } |
---|
| 2070 | + } else { |
---|
| 2071 | + /* |
---|
| 2072 | + * Turning off partition root will clear the |
---|
| 2073 | + * CS_CPU_EXCLUSIVE bit. |
---|
| 2074 | + */ |
---|
| 2075 | + if (old_prs == PRS_ERROR) { |
---|
| 2076 | + update_flag(CS_CPU_EXCLUSIVE, cs, 0); |
---|
| 2077 | + err = 0; |
---|
| 2078 | + goto out; |
---|
| 2079 | + } |
---|
| 2080 | + |
---|
| 2081 | + err = update_parent_subparts_cpumask(cs, partcmd_disable, |
---|
| 2082 | + NULL, &tmpmask); |
---|
| 2083 | + if (err) |
---|
| 2084 | + goto out; |
---|
| 2085 | + |
---|
| 2086 | + /* Turning off CS_CPU_EXCLUSIVE will not return error */ |
---|
| 2087 | + update_flag(CS_CPU_EXCLUSIVE, cs, 0); |
---|
| 2088 | + } |
---|
| 2089 | + |
---|
| 2090 | + update_tasks_cpumask(parent); |
---|
| 2091 | + |
---|
| 2092 | + if (parent->child_ecpus_count) |
---|
| 2093 | + update_sibling_cpumasks(parent, cs, &tmpmask); |
---|
| 2094 | + |
---|
| 2095 | + rebuild_sched_domains_locked(); |
---|
| 2096 | +out: |
---|
| 2097 | + if (!err) { |
---|
| 2098 | + spin_lock_irq(&callback_lock); |
---|
| 2099 | + cs->partition_root_state = new_prs; |
---|
| 2100 | + spin_unlock_irq(&callback_lock); |
---|
| 2101 | + } |
---|
| 2102 | + |
---|
| 2103 | + free_cpumasks(NULL, &tmpmask); |
---|
1361 | 2104 | return err; |
---|
1362 | 2105 | } |
---|
1363 | 2106 | |
---|
.. | .. |
---|
1464 | 2207 | |
---|
1465 | 2208 | static struct cpuset *cpuset_attach_old_cs; |
---|
1466 | 2209 | |
---|
| 2210 | +static void reset_migrate_dl_data(struct cpuset *cs) |
---|
| 2211 | +{ |
---|
| 2212 | + cs->nr_migrate_dl_tasks = 0; |
---|
| 2213 | + cs->sum_migrate_dl_bw = 0; |
---|
| 2214 | +} |
---|
| 2215 | + |
---|
1467 | 2216 | /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */ |
---|
1468 | 2217 | static int cpuset_can_attach(struct cgroup_taskset *tset) |
---|
1469 | 2218 | { |
---|
1470 | 2219 | struct cgroup_subsys_state *css; |
---|
1471 | | - struct cpuset *cs; |
---|
| 2220 | + struct cpuset *cs, *oldcs; |
---|
1472 | 2221 | struct task_struct *task; |
---|
1473 | 2222 | int ret; |
---|
1474 | 2223 | |
---|
1475 | 2224 | /* used later by cpuset_attach() */ |
---|
1476 | 2225 | cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css)); |
---|
| 2226 | + oldcs = cpuset_attach_old_cs; |
---|
1477 | 2227 | cs = css_cs(css); |
---|
1478 | 2228 | |
---|
1479 | 2229 | mutex_lock(&cpuset_mutex); |
---|
.. | .. |
---|
1485 | 2235 | goto out_unlock; |
---|
1486 | 2236 | |
---|
1487 | 2237 | cgroup_taskset_for_each(task, css, tset) { |
---|
1488 | | - ret = task_can_attach(task, cs->cpus_allowed); |
---|
| 2238 | + ret = task_can_attach(task); |
---|
1489 | 2239 | if (ret) |
---|
1490 | 2240 | goto out_unlock; |
---|
1491 | 2241 | ret = security_task_setscheduler(task); |
---|
1492 | 2242 | if (ret) |
---|
1493 | 2243 | goto out_unlock; |
---|
| 2244 | + |
---|
| 2245 | + if (dl_task(task)) { |
---|
| 2246 | + cs->nr_migrate_dl_tasks++; |
---|
| 2247 | + cs->sum_migrate_dl_bw += task->dl.dl_bw; |
---|
| 2248 | + } |
---|
1494 | 2249 | } |
---|
1495 | 2250 | |
---|
| 2251 | + if (!cs->nr_migrate_dl_tasks) |
---|
| 2252 | + goto out_success; |
---|
| 2253 | + |
---|
| 2254 | + if (!cpumask_intersects(oldcs->effective_cpus, cs->effective_cpus)) { |
---|
| 2255 | + int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus); |
---|
| 2256 | + |
---|
| 2257 | + if (unlikely(cpu >= nr_cpu_ids)) { |
---|
| 2258 | + reset_migrate_dl_data(cs); |
---|
| 2259 | + ret = -EINVAL; |
---|
| 2260 | + goto out_unlock; |
---|
| 2261 | + } |
---|
| 2262 | + |
---|
| 2263 | + ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw); |
---|
| 2264 | + if (ret) { |
---|
| 2265 | + reset_migrate_dl_data(cs); |
---|
| 2266 | + goto out_unlock; |
---|
| 2267 | + } |
---|
| 2268 | + } |
---|
| 2269 | + |
---|
| 2270 | +out_success: |
---|
1496 | 2271 | /* |
---|
1497 | 2272 | * Mark attach is in progress. This makes validate_change() fail |
---|
1498 | 2273 | * changes which zero cpus/mems_allowed. |
---|
.. | .. |
---|
1513 | 2288 | cs = css_cs(css); |
---|
1514 | 2289 | |
---|
1515 | 2290 | mutex_lock(&cpuset_mutex); |
---|
1516 | | - css_cs(css)->attach_in_progress--; |
---|
| 2291 | + cs->attach_in_progress--; |
---|
| 2292 | + if (!cs->attach_in_progress) |
---|
| 2293 | + wake_up(&cpuset_attach_wq); |
---|
| 2294 | + |
---|
| 2295 | + if (cs->nr_migrate_dl_tasks) { |
---|
| 2296 | + int cpu = cpumask_any(cs->effective_cpus); |
---|
| 2297 | + |
---|
| 2298 | + dl_bw_free(cpu, cs->sum_migrate_dl_bw); |
---|
| 2299 | + reset_migrate_dl_data(cs); |
---|
| 2300 | + } |
---|
| 2301 | + |
---|
1517 | 2302 | mutex_unlock(&cpuset_mutex); |
---|
1518 | 2303 | } |
---|
1519 | 2304 | |
---|
.. | .. |
---|
1537 | 2322 | cgroup_taskset_first(tset, &css); |
---|
1538 | 2323 | cs = css_cs(css); |
---|
1539 | 2324 | |
---|
1540 | | - cpus_read_lock(); |
---|
| 2325 | + lockdep_assert_cpus_held(); /* see cgroup_attach_lock() */ |
---|
1541 | 2326 | mutex_lock(&cpuset_mutex); |
---|
1542 | | - |
---|
1543 | | - /* prepare for attach */ |
---|
1544 | | - if (cs == &top_cpuset) |
---|
1545 | | - cpumask_copy(cpus_attach, cpu_possible_mask); |
---|
1546 | | - else |
---|
1547 | | - guarantee_online_cpus(cs, cpus_attach); |
---|
1548 | 2327 | |
---|
1549 | 2328 | guarantee_online_mems(cs, &cpuset_attach_nodemask_to); |
---|
1550 | 2329 | |
---|
1551 | 2330 | cgroup_taskset_for_each(task, css, tset) { |
---|
| 2331 | + if (cs != &top_cpuset) |
---|
| 2332 | + guarantee_online_cpus(task, cpus_attach); |
---|
| 2333 | + else |
---|
| 2334 | + cpumask_copy(cpus_attach, task_cpu_possible_mask(task)); |
---|
1552 | 2335 | /* |
---|
1553 | 2336 | * can_attach beforehand should guarantee that this doesn't |
---|
1554 | 2337 | * fail. TODO: have a better way to handle failure here |
---|
1555 | 2338 | */ |
---|
1556 | | - WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach)); |
---|
| 2339 | + WARN_ON_ONCE(update_cpus_allowed(cs, task, cpus_attach)); |
---|
1557 | 2340 | |
---|
1558 | 2341 | cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to); |
---|
1559 | 2342 | cpuset_update_task_spread_flag(cs, task); |
---|
.. | .. |
---|
1588 | 2371 | |
---|
1589 | 2372 | cs->old_mems_allowed = cpuset_attach_nodemask_to; |
---|
1590 | 2373 | |
---|
| 2374 | + if (cs->nr_migrate_dl_tasks) { |
---|
| 2375 | + cs->nr_deadline_tasks += cs->nr_migrate_dl_tasks; |
---|
| 2376 | + oldcs->nr_deadline_tasks -= cs->nr_migrate_dl_tasks; |
---|
| 2377 | + reset_migrate_dl_data(cs); |
---|
| 2378 | + } |
---|
| 2379 | + |
---|
1591 | 2380 | cs->attach_in_progress--; |
---|
1592 | 2381 | if (!cs->attach_in_progress) |
---|
1593 | 2382 | wake_up(&cpuset_attach_wq); |
---|
1594 | 2383 | |
---|
1595 | 2384 | mutex_unlock(&cpuset_mutex); |
---|
1596 | | - cpus_read_unlock(); |
---|
1597 | 2385 | } |
---|
1598 | 2386 | |
---|
1599 | 2387 | /* The various types of files and directories in a cpuset file system */ |
---|
.. | .. |
---|
1604 | 2392 | FILE_MEMLIST, |
---|
1605 | 2393 | FILE_EFFECTIVE_CPULIST, |
---|
1606 | 2394 | FILE_EFFECTIVE_MEMLIST, |
---|
| 2395 | + FILE_SUBPARTS_CPULIST, |
---|
1607 | 2396 | FILE_CPU_EXCLUSIVE, |
---|
1608 | 2397 | FILE_MEM_EXCLUSIVE, |
---|
1609 | 2398 | FILE_MEM_HARDWALL, |
---|
1610 | 2399 | FILE_SCHED_LOAD_BALANCE, |
---|
| 2400 | + FILE_PARTITION_ROOT, |
---|
1611 | 2401 | FILE_SCHED_RELAX_DOMAIN_LEVEL, |
---|
1612 | 2402 | FILE_MEMORY_PRESSURE_ENABLED, |
---|
1613 | 2403 | FILE_MEMORY_PRESSURE, |
---|
.. | .. |
---|
1622 | 2412 | cpuset_filetype_t type = cft->private; |
---|
1623 | 2413 | int retval = 0; |
---|
1624 | 2414 | |
---|
| 2415 | + get_online_cpus(); |
---|
1625 | 2416 | mutex_lock(&cpuset_mutex); |
---|
1626 | 2417 | if (!is_cpuset_online(cs)) { |
---|
1627 | 2418 | retval = -ENODEV; |
---|
.. | .. |
---|
1659 | 2450 | } |
---|
1660 | 2451 | out_unlock: |
---|
1661 | 2452 | mutex_unlock(&cpuset_mutex); |
---|
| 2453 | + put_online_cpus(); |
---|
1662 | 2454 | return retval; |
---|
1663 | 2455 | } |
---|
1664 | 2456 | |
---|
.. | .. |
---|
1669 | 2461 | cpuset_filetype_t type = cft->private; |
---|
1670 | 2462 | int retval = -ENODEV; |
---|
1671 | 2463 | |
---|
| 2464 | + get_online_cpus(); |
---|
1672 | 2465 | mutex_lock(&cpuset_mutex); |
---|
1673 | 2466 | if (!is_cpuset_online(cs)) |
---|
1674 | 2467 | goto out_unlock; |
---|
.. | .. |
---|
1683 | 2476 | } |
---|
1684 | 2477 | out_unlock: |
---|
1685 | 2478 | mutex_unlock(&cpuset_mutex); |
---|
| 2479 | + put_online_cpus(); |
---|
1686 | 2480 | return retval; |
---|
1687 | 2481 | } |
---|
1688 | 2482 | |
---|
.. | .. |
---|
1721 | 2515 | kernfs_break_active_protection(of->kn); |
---|
1722 | 2516 | flush_work(&cpuset_hotplug_work); |
---|
1723 | 2517 | |
---|
| 2518 | + get_online_cpus(); |
---|
1724 | 2519 | mutex_lock(&cpuset_mutex); |
---|
1725 | 2520 | if (!is_cpuset_online(cs)) |
---|
1726 | 2521 | goto out_unlock; |
---|
.. | .. |
---|
1743 | 2538 | break; |
---|
1744 | 2539 | } |
---|
1745 | 2540 | |
---|
1746 | | - free_trial_cpuset(trialcs); |
---|
| 2541 | + free_cpuset(trialcs); |
---|
1747 | 2542 | out_unlock: |
---|
1748 | 2543 | mutex_unlock(&cpuset_mutex); |
---|
| 2544 | + put_online_cpus(); |
---|
1749 | 2545 | kernfs_unbreak_active_protection(of->kn); |
---|
1750 | 2546 | css_put(&cs->css); |
---|
1751 | 2547 | flush_workqueue(cpuset_migrate_mm_wq); |
---|
.. | .. |
---|
1766 | 2562 | cpuset_filetype_t type = seq_cft(sf)->private; |
---|
1767 | 2563 | int ret = 0; |
---|
1768 | 2564 | |
---|
1769 | | - raw_spin_lock_irq(&callback_lock); |
---|
| 2565 | + spin_lock_irq(&callback_lock); |
---|
1770 | 2566 | |
---|
1771 | 2567 | switch (type) { |
---|
1772 | 2568 | case FILE_CPULIST: |
---|
.. | .. |
---|
1781 | 2577 | case FILE_EFFECTIVE_MEMLIST: |
---|
1782 | 2578 | seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems)); |
---|
1783 | 2579 | break; |
---|
| 2580 | + case FILE_SUBPARTS_CPULIST: |
---|
| 2581 | + seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->subparts_cpus)); |
---|
| 2582 | + break; |
---|
1784 | 2583 | default: |
---|
1785 | 2584 | ret = -EINVAL; |
---|
1786 | 2585 | } |
---|
1787 | 2586 | |
---|
1788 | | - raw_spin_unlock_irq(&callback_lock); |
---|
| 2587 | + spin_unlock_irq(&callback_lock); |
---|
1789 | 2588 | return ret; |
---|
1790 | 2589 | } |
---|
1791 | 2590 | |
---|
.. | .. |
---|
1835 | 2634 | return 0; |
---|
1836 | 2635 | } |
---|
1837 | 2636 | |
---|
| 2637 | +static int sched_partition_show(struct seq_file *seq, void *v) |
---|
| 2638 | +{ |
---|
| 2639 | + struct cpuset *cs = css_cs(seq_css(seq)); |
---|
| 2640 | + |
---|
| 2641 | + switch (cs->partition_root_state) { |
---|
| 2642 | + case PRS_ENABLED: |
---|
| 2643 | + seq_puts(seq, "root\n"); |
---|
| 2644 | + break; |
---|
| 2645 | + case PRS_DISABLED: |
---|
| 2646 | + seq_puts(seq, "member\n"); |
---|
| 2647 | + break; |
---|
| 2648 | + case PRS_ERROR: |
---|
| 2649 | + seq_puts(seq, "root invalid\n"); |
---|
| 2650 | + break; |
---|
| 2651 | + } |
---|
| 2652 | + return 0; |
---|
| 2653 | +} |
---|
| 2654 | + |
---|
| 2655 | +static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf, |
---|
| 2656 | + size_t nbytes, loff_t off) |
---|
| 2657 | +{ |
---|
| 2658 | + struct cpuset *cs = css_cs(of_css(of)); |
---|
| 2659 | + int val; |
---|
| 2660 | + int retval = -ENODEV; |
---|
| 2661 | + |
---|
| 2662 | + buf = strstrip(buf); |
---|
| 2663 | + |
---|
| 2664 | + /* |
---|
| 2665 | + * Convert "root" to ENABLED, and convert "member" to DISABLED. |
---|
| 2666 | + */ |
---|
| 2667 | + if (!strcmp(buf, "root")) |
---|
| 2668 | + val = PRS_ENABLED; |
---|
| 2669 | + else if (!strcmp(buf, "member")) |
---|
| 2670 | + val = PRS_DISABLED; |
---|
| 2671 | + else |
---|
| 2672 | + return -EINVAL; |
---|
| 2673 | + |
---|
| 2674 | + css_get(&cs->css); |
---|
| 2675 | + get_online_cpus(); |
---|
| 2676 | + mutex_lock(&cpuset_mutex); |
---|
| 2677 | + if (!is_cpuset_online(cs)) |
---|
| 2678 | + goto out_unlock; |
---|
| 2679 | + |
---|
| 2680 | + retval = update_prstate(cs, val); |
---|
| 2681 | +out_unlock: |
---|
| 2682 | + mutex_unlock(&cpuset_mutex); |
---|
| 2683 | + put_online_cpus(); |
---|
| 2684 | + css_put(&cs->css); |
---|
| 2685 | + return retval ?: nbytes; |
---|
| 2686 | +} |
---|
1838 | 2687 | |
---|
1839 | 2688 | /* |
---|
1840 | 2689 | * for the common functions, 'private' gives the type of file |
---|
1841 | 2690 | */ |
---|
1842 | 2691 | |
---|
1843 | | -static struct cftype files[] = { |
---|
| 2692 | +static struct cftype legacy_files[] = { |
---|
1844 | 2693 | { |
---|
1845 | 2694 | .name = "cpus", |
---|
1846 | 2695 | .seq_show = cpuset_common_seq_show, |
---|
.. | .. |
---|
1943 | 2792 | }; |
---|
1944 | 2793 | |
---|
1945 | 2794 | /* |
---|
| 2795 | + * This is currently a minimal set for the default hierarchy. It can be |
---|
| 2796 | + * expanded later on by migrating more features and control files from v1. |
---|
| 2797 | + */ |
---|
| 2798 | +static struct cftype dfl_files[] = { |
---|
| 2799 | + { |
---|
| 2800 | + .name = "cpus", |
---|
| 2801 | + .seq_show = cpuset_common_seq_show, |
---|
| 2802 | + .write = cpuset_write_resmask, |
---|
| 2803 | + .max_write_len = (100U + 6 * NR_CPUS), |
---|
| 2804 | + .private = FILE_CPULIST, |
---|
| 2805 | + .flags = CFTYPE_NOT_ON_ROOT, |
---|
| 2806 | + }, |
---|
| 2807 | + |
---|
| 2808 | + { |
---|
| 2809 | + .name = "mems", |
---|
| 2810 | + .seq_show = cpuset_common_seq_show, |
---|
| 2811 | + .write = cpuset_write_resmask, |
---|
| 2812 | + .max_write_len = (100U + 6 * MAX_NUMNODES), |
---|
| 2813 | + .private = FILE_MEMLIST, |
---|
| 2814 | + .flags = CFTYPE_NOT_ON_ROOT, |
---|
| 2815 | + }, |
---|
| 2816 | + |
---|
| 2817 | + { |
---|
| 2818 | + .name = "cpus.effective", |
---|
| 2819 | + .seq_show = cpuset_common_seq_show, |
---|
| 2820 | + .private = FILE_EFFECTIVE_CPULIST, |
---|
| 2821 | + }, |
---|
| 2822 | + |
---|
| 2823 | + { |
---|
| 2824 | + .name = "mems.effective", |
---|
| 2825 | + .seq_show = cpuset_common_seq_show, |
---|
| 2826 | + .private = FILE_EFFECTIVE_MEMLIST, |
---|
| 2827 | + }, |
---|
| 2828 | + |
---|
| 2829 | + { |
---|
| 2830 | + .name = "cpus.partition", |
---|
| 2831 | + .seq_show = sched_partition_show, |
---|
| 2832 | + .write = sched_partition_write, |
---|
| 2833 | + .private = FILE_PARTITION_ROOT, |
---|
| 2834 | + .flags = CFTYPE_NOT_ON_ROOT, |
---|
| 2835 | + }, |
---|
| 2836 | + |
---|
| 2837 | + { |
---|
| 2838 | + .name = "cpus.subpartitions", |
---|
| 2839 | + .seq_show = cpuset_common_seq_show, |
---|
| 2840 | + .private = FILE_SUBPARTS_CPULIST, |
---|
| 2841 | + .flags = CFTYPE_DEBUG, |
---|
| 2842 | + }, |
---|
| 2843 | + |
---|
| 2844 | + { } /* terminate */ |
---|
| 2845 | +}; |
---|
| 2846 | + |
---|
| 2847 | + |
---|
| 2848 | +/* |
---|
1946 | 2849 | * cpuset_css_alloc - allocate a cpuset css |
---|
1947 | 2850 | * cgrp: control group that the new cpuset will be part of |
---|
1948 | 2851 | */ |
---|
.. | .. |
---|
1958 | 2861 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); |
---|
1959 | 2862 | if (!cs) |
---|
1960 | 2863 | return ERR_PTR(-ENOMEM); |
---|
1961 | | - if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) |
---|
1962 | | - goto free_cs; |
---|
1963 | | - if (!alloc_cpumask_var(&cs->cpus_requested, GFP_KERNEL)) |
---|
1964 | | - goto free_allowed; |
---|
1965 | | - if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL)) |
---|
1966 | | - goto free_requested; |
---|
| 2864 | + |
---|
| 2865 | + if (alloc_cpumasks(cs, NULL)) { |
---|
| 2866 | + kfree(cs); |
---|
| 2867 | + return ERR_PTR(-ENOMEM); |
---|
| 2868 | + } |
---|
1967 | 2869 | |
---|
1968 | 2870 | set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); |
---|
1969 | | - cpumask_clear(cs->cpus_allowed); |
---|
1970 | | - cpumask_clear(cs->cpus_requested); |
---|
1971 | 2871 | nodes_clear(cs->mems_allowed); |
---|
1972 | | - cpumask_clear(cs->effective_cpus); |
---|
1973 | 2872 | nodes_clear(cs->effective_mems); |
---|
1974 | 2873 | fmeter_init(&cs->fmeter); |
---|
1975 | 2874 | cs->relax_domain_level = -1; |
---|
1976 | 2875 | |
---|
1977 | 2876 | return &cs->css; |
---|
1978 | | - |
---|
1979 | | -free_requested: |
---|
1980 | | - free_cpumask_var(cs->cpus_requested); |
---|
1981 | | -free_allowed: |
---|
1982 | | - free_cpumask_var(cs->cpus_allowed); |
---|
1983 | | -free_cs: |
---|
1984 | | - kfree(cs); |
---|
1985 | | - return ERR_PTR(-ENOMEM); |
---|
1986 | 2877 | } |
---|
1987 | 2878 | |
---|
1988 | 2879 | static int cpuset_css_online(struct cgroup_subsys_state *css) |
---|
.. | .. |
---|
1995 | 2886 | if (!parent) |
---|
1996 | 2887 | return 0; |
---|
1997 | 2888 | |
---|
| 2889 | + get_online_cpus(); |
---|
1998 | 2890 | mutex_lock(&cpuset_mutex); |
---|
1999 | 2891 | |
---|
2000 | 2892 | set_bit(CS_ONLINE, &cs->flags); |
---|
.. | .. |
---|
2005 | 2897 | |
---|
2006 | 2898 | cpuset_inc(); |
---|
2007 | 2899 | |
---|
2008 | | - raw_spin_lock_irq(&callback_lock); |
---|
| 2900 | + spin_lock_irq(&callback_lock); |
---|
2009 | 2901 | if (is_in_v2_mode()) { |
---|
2010 | 2902 | cpumask_copy(cs->effective_cpus, parent->effective_cpus); |
---|
2011 | 2903 | cs->effective_mems = parent->effective_mems; |
---|
| 2904 | + cs->use_parent_ecpus = true; |
---|
| 2905 | + parent->child_ecpus_count++; |
---|
2012 | 2906 | } |
---|
2013 | | - raw_spin_unlock_irq(&callback_lock); |
---|
| 2907 | + spin_unlock_irq(&callback_lock); |
---|
2014 | 2908 | |
---|
2015 | 2909 | if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) |
---|
2016 | 2910 | goto out_unlock; |
---|
.. | .. |
---|
2037 | 2931 | } |
---|
2038 | 2932 | rcu_read_unlock(); |
---|
2039 | 2933 | |
---|
2040 | | - raw_spin_lock_irq(&callback_lock); |
---|
| 2934 | + spin_lock_irq(&callback_lock); |
---|
2041 | 2935 | cs->mems_allowed = parent->mems_allowed; |
---|
2042 | 2936 | cs->effective_mems = parent->mems_allowed; |
---|
2043 | 2937 | cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); |
---|
2044 | 2938 | cpumask_copy(cs->cpus_requested, parent->cpus_requested); |
---|
2045 | 2939 | cpumask_copy(cs->effective_cpus, parent->cpus_allowed); |
---|
2046 | | - raw_spin_unlock_irq(&callback_lock); |
---|
| 2940 | + spin_unlock_irq(&callback_lock); |
---|
2047 | 2941 | out_unlock: |
---|
2048 | 2942 | mutex_unlock(&cpuset_mutex); |
---|
| 2943 | + put_online_cpus(); |
---|
2049 | 2944 | return 0; |
---|
2050 | 2945 | } |
---|
2051 | 2946 | |
---|
2052 | 2947 | /* |
---|
2053 | 2948 | * If the cpuset being removed has its flag 'sched_load_balance' |
---|
2054 | 2949 | * enabled, then simulate turning sched_load_balance off, which |
---|
2055 | | - * will call rebuild_sched_domains_locked(). |
---|
| 2950 | + * will call rebuild_sched_domains_locked(). That is not needed |
---|
| 2951 | + * in the default hierarchy where only changes in partition |
---|
| 2952 | + * will cause repartitioning. |
---|
| 2953 | + * |
---|
| 2954 | + * If the cpuset has the 'sched.partition' flag enabled, simulate |
---|
| 2955 | + * turning 'sched.partition" off. |
---|
2056 | 2956 | */ |
---|
2057 | 2957 | |
---|
2058 | 2958 | static void cpuset_css_offline(struct cgroup_subsys_state *css) |
---|
2059 | 2959 | { |
---|
2060 | 2960 | struct cpuset *cs = css_cs(css); |
---|
2061 | 2961 | |
---|
| 2962 | + get_online_cpus(); |
---|
2062 | 2963 | mutex_lock(&cpuset_mutex); |
---|
2063 | 2964 | |
---|
2064 | | - if (is_sched_load_balance(cs)) |
---|
| 2965 | + if (is_partition_root(cs)) |
---|
| 2966 | + update_prstate(cs, 0); |
---|
| 2967 | + |
---|
| 2968 | + if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && |
---|
| 2969 | + is_sched_load_balance(cs)) |
---|
2065 | 2970 | update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); |
---|
| 2971 | + |
---|
| 2972 | + if (cs->use_parent_ecpus) { |
---|
| 2973 | + struct cpuset *parent = parent_cs(cs); |
---|
| 2974 | + |
---|
| 2975 | + cs->use_parent_ecpus = false; |
---|
| 2976 | + parent->child_ecpus_count--; |
---|
| 2977 | + } |
---|
2066 | 2978 | |
---|
2067 | 2979 | cpuset_dec(); |
---|
2068 | 2980 | clear_bit(CS_ONLINE, &cs->flags); |
---|
2069 | 2981 | |
---|
2070 | 2982 | mutex_unlock(&cpuset_mutex); |
---|
| 2983 | + put_online_cpus(); |
---|
2071 | 2984 | } |
---|
2072 | 2985 | |
---|
2073 | 2986 | static void cpuset_css_free(struct cgroup_subsys_state *css) |
---|
2074 | 2987 | { |
---|
2075 | 2988 | struct cpuset *cs = css_cs(css); |
---|
2076 | 2989 | |
---|
2077 | | - free_cpumask_var(cs->effective_cpus); |
---|
2078 | | - free_cpumask_var(cs->cpus_allowed); |
---|
2079 | | - free_cpumask_var(cs->cpus_requested); |
---|
2080 | | - kfree(cs); |
---|
| 2990 | + free_cpuset(cs); |
---|
2081 | 2991 | } |
---|
2082 | 2992 | |
---|
2083 | 2993 | static void cpuset_bind(struct cgroup_subsys_state *root_css) |
---|
2084 | 2994 | { |
---|
2085 | 2995 | mutex_lock(&cpuset_mutex); |
---|
2086 | | - raw_spin_lock_irq(&callback_lock); |
---|
| 2996 | + spin_lock_irq(&callback_lock); |
---|
2087 | 2997 | |
---|
2088 | 2998 | if (is_in_v2_mode()) { |
---|
2089 | 2999 | cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); |
---|
.. | .. |
---|
2094 | 3004 | top_cpuset.mems_allowed = top_cpuset.effective_mems; |
---|
2095 | 3005 | } |
---|
2096 | 3006 | |
---|
2097 | | - raw_spin_unlock_irq(&callback_lock); |
---|
| 3007 | + spin_unlock_irq(&callback_lock); |
---|
2098 | 3008 | mutex_unlock(&cpuset_mutex); |
---|
2099 | 3009 | } |
---|
2100 | 3010 | |
---|
.. | .. |
---|
2105 | 3015 | */ |
---|
2106 | 3016 | static void cpuset_fork(struct task_struct *task) |
---|
2107 | 3017 | { |
---|
| 3018 | + int inherit_cpus = 0; |
---|
2108 | 3019 | if (task_css_is_root(task, cpuset_cgrp_id)) |
---|
2109 | 3020 | return; |
---|
2110 | 3021 | |
---|
2111 | | - set_cpus_allowed_ptr(task, current->cpus_ptr); |
---|
| 3022 | + trace_android_rvh_cpuset_fork(task, &inherit_cpus); |
---|
| 3023 | + if (!inherit_cpus) |
---|
| 3024 | + set_cpus_allowed_ptr(task, current->cpus_ptr); |
---|
2112 | 3025 | task->mems_allowed = current->mems_allowed; |
---|
2113 | 3026 | } |
---|
2114 | 3027 | |
---|
.. | .. |
---|
2123 | 3036 | .post_attach = cpuset_post_attach, |
---|
2124 | 3037 | .bind = cpuset_bind, |
---|
2125 | 3038 | .fork = cpuset_fork, |
---|
2126 | | - .legacy_cftypes = files, |
---|
| 3039 | + .legacy_cftypes = legacy_files, |
---|
| 3040 | + .dfl_cftypes = dfl_files, |
---|
2127 | 3041 | .early_init = true, |
---|
| 3042 | + .threaded = true, |
---|
2128 | 3043 | }; |
---|
2129 | 3044 | |
---|
2130 | 3045 | /** |
---|
2131 | 3046 | * cpuset_init - initialize cpusets at system boot |
---|
2132 | 3047 | * |
---|
2133 | | - * Description: Initialize top_cpuset and the cpuset internal file system, |
---|
| 3048 | + * Description: Initialize top_cpuset |
---|
2134 | 3049 | **/ |
---|
2135 | 3050 | |
---|
2136 | 3051 | int __init cpuset_init(void) |
---|
2137 | 3052 | { |
---|
2138 | | - int err = 0; |
---|
2139 | | - |
---|
2140 | 3053 | BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL)); |
---|
2141 | 3054 | BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL)); |
---|
| 3055 | + BUG_ON(!zalloc_cpumask_var(&top_cpuset.subparts_cpus, GFP_KERNEL)); |
---|
2142 | 3056 | BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_requested, GFP_KERNEL)); |
---|
2143 | 3057 | |
---|
2144 | 3058 | cpumask_setall(top_cpuset.cpus_allowed); |
---|
.. | .. |
---|
2150 | 3064 | fmeter_init(&top_cpuset.fmeter); |
---|
2151 | 3065 | set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags); |
---|
2152 | 3066 | top_cpuset.relax_domain_level = -1; |
---|
2153 | | - |
---|
2154 | | - err = register_filesystem(&cpuset_fs_type); |
---|
2155 | | - if (err < 0) |
---|
2156 | | - return err; |
---|
2157 | 3067 | |
---|
2158 | 3068 | BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL)); |
---|
2159 | 3069 | |
---|
.. | .. |
---|
2194 | 3104 | { |
---|
2195 | 3105 | bool is_empty; |
---|
2196 | 3106 | |
---|
2197 | | - raw_spin_lock_irq(&callback_lock); |
---|
| 3107 | + spin_lock_irq(&callback_lock); |
---|
2198 | 3108 | cpumask_copy(cs->cpus_allowed, new_cpus); |
---|
2199 | 3109 | cpumask_copy(cs->effective_cpus, new_cpus); |
---|
2200 | 3110 | cs->mems_allowed = *new_mems; |
---|
2201 | 3111 | cs->effective_mems = *new_mems; |
---|
2202 | | - raw_spin_unlock_irq(&callback_lock); |
---|
| 3112 | + spin_unlock_irq(&callback_lock); |
---|
2203 | 3113 | |
---|
2204 | 3114 | /* |
---|
2205 | 3115 | * Don't call update_tasks_cpumask() if the cpuset becomes empty, |
---|
.. | .. |
---|
2236 | 3146 | if (nodes_empty(*new_mems)) |
---|
2237 | 3147 | *new_mems = parent_cs(cs)->effective_mems; |
---|
2238 | 3148 | |
---|
2239 | | - raw_spin_lock_irq(&callback_lock); |
---|
| 3149 | + spin_lock_irq(&callback_lock); |
---|
2240 | 3150 | cpumask_copy(cs->effective_cpus, new_cpus); |
---|
2241 | 3151 | cs->effective_mems = *new_mems; |
---|
2242 | | - raw_spin_unlock_irq(&callback_lock); |
---|
| 3152 | + spin_unlock_irq(&callback_lock); |
---|
2243 | 3153 | |
---|
2244 | 3154 | if (cpus_updated) |
---|
2245 | 3155 | update_tasks_cpumask(cs); |
---|
.. | .. |
---|
2247 | 3157 | update_tasks_nodemask(cs); |
---|
2248 | 3158 | } |
---|
2249 | 3159 | |
---|
| 3160 | +static bool force_rebuild; |
---|
| 3161 | + |
---|
| 3162 | +void cpuset_force_rebuild(void) |
---|
| 3163 | +{ |
---|
| 3164 | + force_rebuild = true; |
---|
| 3165 | +} |
---|
| 3166 | + |
---|
2250 | 3167 | /** |
---|
2251 | 3168 | * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug |
---|
2252 | 3169 | * @cs: cpuset in interest |
---|
| 3170 | + * @tmp: the tmpmasks structure pointer |
---|
2253 | 3171 | * |
---|
2254 | 3172 | * Compare @cs's cpu and mem masks against top_cpuset and if some have gone |
---|
2255 | 3173 | * offline, update @cs accordingly. If @cs ends up with no CPU or memory, |
---|
2256 | 3174 | * all its tasks are moved to the nearest ancestor with both resources. |
---|
2257 | 3175 | */ |
---|
2258 | | -static void cpuset_hotplug_update_tasks(struct cpuset *cs) |
---|
| 3176 | +static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) |
---|
2259 | 3177 | { |
---|
2260 | 3178 | static cpumask_t new_cpus; |
---|
2261 | 3179 | static nodemask_t new_mems; |
---|
2262 | 3180 | bool cpus_updated; |
---|
2263 | 3181 | bool mems_updated; |
---|
| 3182 | + struct cpuset *parent; |
---|
2264 | 3183 | retry: |
---|
2265 | 3184 | wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); |
---|
2266 | 3185 | |
---|
.. | .. |
---|
2275 | 3194 | goto retry; |
---|
2276 | 3195 | } |
---|
2277 | 3196 | |
---|
2278 | | - cpumask_and(&new_cpus, cs->cpus_requested, parent_cs(cs)->effective_cpus); |
---|
2279 | | - nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems); |
---|
| 3197 | + parent = parent_cs(cs); |
---|
| 3198 | + compute_effective_cpumask(&new_cpus, cs, parent); |
---|
| 3199 | + nodes_and(new_mems, cs->mems_allowed, parent->effective_mems); |
---|
2280 | 3200 | |
---|
| 3201 | + if (cs->nr_subparts_cpus) |
---|
| 3202 | + /* |
---|
| 3203 | + * Make sure that CPUs allocated to child partitions |
---|
| 3204 | + * do not show up in effective_cpus. |
---|
| 3205 | + */ |
---|
| 3206 | + cpumask_andnot(&new_cpus, &new_cpus, cs->subparts_cpus); |
---|
| 3207 | + |
---|
| 3208 | + if (!tmp || !cs->partition_root_state) |
---|
| 3209 | + goto update_tasks; |
---|
| 3210 | + |
---|
| 3211 | + /* |
---|
| 3212 | + * In the unlikely event that a partition root has empty |
---|
| 3213 | + * effective_cpus or its parent becomes erroneous, we have to |
---|
| 3214 | + * transition it to the erroneous state. |
---|
| 3215 | + */ |
---|
| 3216 | + if (is_partition_root(cs) && (cpumask_empty(&new_cpus) || |
---|
| 3217 | + (parent->partition_root_state == PRS_ERROR))) { |
---|
| 3218 | + if (cs->nr_subparts_cpus) { |
---|
| 3219 | + spin_lock_irq(&callback_lock); |
---|
| 3220 | + cs->nr_subparts_cpus = 0; |
---|
| 3221 | + cpumask_clear(cs->subparts_cpus); |
---|
| 3222 | + spin_unlock_irq(&callback_lock); |
---|
| 3223 | + compute_effective_cpumask(&new_cpus, cs, parent); |
---|
| 3224 | + } |
---|
| 3225 | + |
---|
| 3226 | + /* |
---|
| 3227 | + * If the effective_cpus is empty because the child |
---|
| 3228 | + * partitions take away all the CPUs, we can keep |
---|
| 3229 | + * the current partition and let the child partitions |
---|
| 3230 | + * fight for available CPUs. |
---|
| 3231 | + */ |
---|
| 3232 | + if ((parent->partition_root_state == PRS_ERROR) || |
---|
| 3233 | + cpumask_empty(&new_cpus)) { |
---|
| 3234 | + update_parent_subparts_cpumask(cs, partcmd_disable, |
---|
| 3235 | + NULL, tmp); |
---|
| 3236 | + spin_lock_irq(&callback_lock); |
---|
| 3237 | + cs->partition_root_state = PRS_ERROR; |
---|
| 3238 | + spin_unlock_irq(&callback_lock); |
---|
| 3239 | + } |
---|
| 3240 | + cpuset_force_rebuild(); |
---|
| 3241 | + } |
---|
| 3242 | + |
---|
| 3243 | + /* |
---|
| 3244 | + * On the other hand, an erroneous partition root may be transitioned |
---|
| 3245 | + * back to a regular one or a partition root with no CPU allocated |
---|
| 3246 | + * from the parent may change to erroneous. |
---|
| 3247 | + */ |
---|
| 3248 | + if (is_partition_root(parent) && |
---|
| 3249 | + ((cs->partition_root_state == PRS_ERROR) || |
---|
| 3250 | + !cpumask_intersects(&new_cpus, parent->subparts_cpus)) && |
---|
| 3251 | + update_parent_subparts_cpumask(cs, partcmd_update, NULL, tmp)) |
---|
| 3252 | + cpuset_force_rebuild(); |
---|
| 3253 | + |
---|
| 3254 | +update_tasks: |
---|
2281 | 3255 | cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); |
---|
2282 | 3256 | mems_updated = !nodes_equal(new_mems, cs->effective_mems); |
---|
2283 | 3257 | |
---|
.. | .. |
---|
2289 | 3263 | cpus_updated, mems_updated); |
---|
2290 | 3264 | |
---|
2291 | 3265 | mutex_unlock(&cpuset_mutex); |
---|
2292 | | -} |
---|
2293 | | - |
---|
2294 | | -static bool force_rebuild; |
---|
2295 | | - |
---|
2296 | | -void cpuset_force_rebuild(void) |
---|
2297 | | -{ |
---|
2298 | | - force_rebuild = true; |
---|
2299 | 3266 | } |
---|
2300 | 3267 | |
---|
2301 | 3268 | /** |
---|
.. | .. |
---|
2314 | 3281 | * Note that CPU offlining during suspend is ignored. We don't modify |
---|
2315 | 3282 | * cpusets across suspend/resume cycles at all. |
---|
2316 | 3283 | */ |
---|
2317 | | -static void cpuset_hotplug_workfn(struct work_struct *work) |
---|
| 3284 | +void cpuset_hotplug_workfn(struct work_struct *work) |
---|
2318 | 3285 | { |
---|
2319 | 3286 | static cpumask_t new_cpus; |
---|
2320 | 3287 | static nodemask_t new_mems; |
---|
2321 | 3288 | bool cpus_updated, mems_updated; |
---|
2322 | 3289 | bool on_dfl = is_in_v2_mode(); |
---|
| 3290 | + struct tmpmasks tmp, *ptmp = NULL; |
---|
| 3291 | + |
---|
| 3292 | + if (on_dfl && !alloc_cpumasks(NULL, &tmp)) |
---|
| 3293 | + ptmp = &tmp; |
---|
2323 | 3294 | |
---|
2324 | 3295 | mutex_lock(&cpuset_mutex); |
---|
2325 | 3296 | |
---|
.. | .. |
---|
2327 | 3298 | cpumask_copy(&new_cpus, cpu_active_mask); |
---|
2328 | 3299 | new_mems = node_states[N_MEMORY]; |
---|
2329 | 3300 | |
---|
| 3301 | + /* |
---|
| 3302 | + * If subparts_cpus is populated, it is likely that the check below |
---|
| 3303 | + * will produce a false positive on cpus_updated when the cpu list |
---|
| 3304 | + * isn't changed. It is extra work, but it is better to be safe. |
---|
| 3305 | + */ |
---|
2330 | 3306 | cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus); |
---|
2331 | 3307 | mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems); |
---|
2332 | 3308 | |
---|
| 3309 | + /* |
---|
| 3310 | + * In the rare case that hotplug removes all the cpus in subparts_cpus, |
---|
| 3311 | + * we assumed that cpus are updated. |
---|
| 3312 | + */ |
---|
| 3313 | + if (!cpus_updated && top_cpuset.nr_subparts_cpus) |
---|
| 3314 | + cpus_updated = true; |
---|
| 3315 | + |
---|
2333 | 3316 | /* synchronize cpus_allowed to cpu_active_mask */ |
---|
2334 | 3317 | if (cpus_updated) { |
---|
2335 | | - raw_spin_lock_irq(&callback_lock); |
---|
| 3318 | + spin_lock_irq(&callback_lock); |
---|
2336 | 3319 | if (!on_dfl) |
---|
2337 | 3320 | cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); |
---|
| 3321 | + /* |
---|
| 3322 | + * Make sure that CPUs allocated to child partitions |
---|
| 3323 | + * do not show up in effective_cpus. If no CPU is left, |
---|
| 3324 | + * we clear the subparts_cpus & let the child partitions |
---|
| 3325 | + * fight for the CPUs again. |
---|
| 3326 | + */ |
---|
| 3327 | + if (top_cpuset.nr_subparts_cpus) { |
---|
| 3328 | + if (cpumask_subset(&new_cpus, |
---|
| 3329 | + top_cpuset.subparts_cpus)) { |
---|
| 3330 | + top_cpuset.nr_subparts_cpus = 0; |
---|
| 3331 | + cpumask_clear(top_cpuset.subparts_cpus); |
---|
| 3332 | + } else { |
---|
| 3333 | + cpumask_andnot(&new_cpus, &new_cpus, |
---|
| 3334 | + top_cpuset.subparts_cpus); |
---|
| 3335 | + } |
---|
| 3336 | + } |
---|
2338 | 3337 | cpumask_copy(top_cpuset.effective_cpus, &new_cpus); |
---|
2339 | | - raw_spin_unlock_irq(&callback_lock); |
---|
| 3338 | + spin_unlock_irq(&callback_lock); |
---|
2340 | 3339 | /* we don't mess with cpumasks of tasks in top_cpuset */ |
---|
2341 | 3340 | } |
---|
2342 | 3341 | |
---|
2343 | 3342 | /* synchronize mems_allowed to N_MEMORY */ |
---|
2344 | 3343 | if (mems_updated) { |
---|
2345 | | - raw_spin_lock_irq(&callback_lock); |
---|
| 3344 | + spin_lock_irq(&callback_lock); |
---|
2346 | 3345 | if (!on_dfl) |
---|
2347 | 3346 | top_cpuset.mems_allowed = new_mems; |
---|
2348 | 3347 | top_cpuset.effective_mems = new_mems; |
---|
2349 | | - raw_spin_unlock_irq(&callback_lock); |
---|
| 3348 | + spin_unlock_irq(&callback_lock); |
---|
2350 | 3349 | update_tasks_nodemask(&top_cpuset); |
---|
2351 | 3350 | } |
---|
2352 | 3351 | |
---|
.. | .. |
---|
2363 | 3362 | continue; |
---|
2364 | 3363 | rcu_read_unlock(); |
---|
2365 | 3364 | |
---|
2366 | | - cpuset_hotplug_update_tasks(cs); |
---|
| 3365 | + cpuset_hotplug_update_tasks(cs, ptmp); |
---|
2367 | 3366 | |
---|
2368 | 3367 | rcu_read_lock(); |
---|
2369 | 3368 | css_put(&cs->css); |
---|
.. | .. |
---|
2376 | 3375 | force_rebuild = false; |
---|
2377 | 3376 | rebuild_sched_domains(); |
---|
2378 | 3377 | } |
---|
| 3378 | + |
---|
| 3379 | + free_cpumasks(NULL, ptmp); |
---|
2379 | 3380 | } |
---|
2380 | 3381 | |
---|
2381 | 3382 | void cpuset_update_active_cpus(void) |
---|
.. | .. |
---|
2386 | 3387 | * to a work item to avoid reverse locking order. |
---|
2387 | 3388 | */ |
---|
2388 | 3389 | schedule_work(&cpuset_hotplug_work); |
---|
| 3390 | +} |
---|
| 3391 | + |
---|
| 3392 | +void cpuset_update_active_cpus_affine(int cpu) |
---|
| 3393 | +{ |
---|
| 3394 | + schedule_work_on(cpu, &cpuset_hotplug_work); |
---|
2389 | 3395 | } |
---|
2390 | 3396 | |
---|
2391 | 3397 | void cpuset_wait_for_hotplug(void) |
---|
.. | .. |
---|
2417 | 3423 | */ |
---|
2418 | 3424 | void __init cpuset_init_smp(void) |
---|
2419 | 3425 | { |
---|
2420 | | - cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); |
---|
2421 | | - top_cpuset.mems_allowed = node_states[N_MEMORY]; |
---|
| 3426 | + /* |
---|
| 3427 | + * cpus_allowd/mems_allowed set to v2 values in the initial |
---|
| 3428 | + * cpuset_bind() call will be reset to v1 values in another |
---|
| 3429 | + * cpuset_bind() call when v1 cpuset is mounted. |
---|
| 3430 | + */ |
---|
2422 | 3431 | top_cpuset.old_mems_allowed = top_cpuset.mems_allowed; |
---|
2423 | 3432 | |
---|
2424 | 3433 | cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask); |
---|
.. | .. |
---|
2445 | 3454 | { |
---|
2446 | 3455 | unsigned long flags; |
---|
2447 | 3456 | |
---|
2448 | | - raw_spin_lock_irqsave(&callback_lock, flags); |
---|
| 3457 | + spin_lock_irqsave(&callback_lock, flags); |
---|
2449 | 3458 | rcu_read_lock(); |
---|
2450 | | - guarantee_online_cpus(task_cs(tsk), pmask); |
---|
| 3459 | + guarantee_online_cpus(tsk, pmask); |
---|
2451 | 3460 | rcu_read_unlock(); |
---|
2452 | | - raw_spin_unlock_irqrestore(&callback_lock, flags); |
---|
| 3461 | + spin_unlock_irqrestore(&callback_lock, flags); |
---|
2453 | 3462 | } |
---|
2454 | | - |
---|
| 3463 | +EXPORT_SYMBOL_GPL(cpuset_cpus_allowed); |
---|
2455 | 3464 | /** |
---|
2456 | 3465 | * cpuset_cpus_allowed_fallback - final fallback before complete catastrophe. |
---|
2457 | 3466 | * @tsk: pointer to task_struct with which the scheduler is struggling |
---|
.. | .. |
---|
2466 | 3475 | |
---|
2467 | 3476 | void cpuset_cpus_allowed_fallback(struct task_struct *tsk) |
---|
2468 | 3477 | { |
---|
| 3478 | + const struct cpumask *possible_mask = task_cpu_possible_mask(tsk); |
---|
| 3479 | + const struct cpumask *cs_mask; |
---|
| 3480 | + |
---|
2469 | 3481 | rcu_read_lock(); |
---|
2470 | | - do_set_cpus_allowed(tsk, is_in_v2_mode() ? |
---|
2471 | | - task_cs(tsk)->cpus_allowed : cpu_possible_mask); |
---|
| 3482 | + cs_mask = task_cs(tsk)->cpus_allowed; |
---|
| 3483 | + |
---|
| 3484 | + if (!is_in_v2_mode() || !cpumask_subset(cs_mask, possible_mask)) |
---|
| 3485 | + goto unlock; /* select_fallback_rq will try harder */ |
---|
| 3486 | + |
---|
| 3487 | + do_set_cpus_allowed(tsk, cs_mask); |
---|
| 3488 | +unlock: |
---|
2472 | 3489 | rcu_read_unlock(); |
---|
2473 | 3490 | |
---|
2474 | 3491 | /* |
---|
.. | .. |
---|
2510 | 3527 | nodemask_t mask; |
---|
2511 | 3528 | unsigned long flags; |
---|
2512 | 3529 | |
---|
2513 | | - raw_spin_lock_irqsave(&callback_lock, flags); |
---|
| 3530 | + spin_lock_irqsave(&callback_lock, flags); |
---|
2514 | 3531 | rcu_read_lock(); |
---|
2515 | 3532 | guarantee_online_mems(task_cs(tsk), &mask); |
---|
2516 | 3533 | rcu_read_unlock(); |
---|
2517 | | - raw_spin_unlock_irqrestore(&callback_lock, flags); |
---|
| 3534 | + spin_unlock_irqrestore(&callback_lock, flags); |
---|
2518 | 3535 | |
---|
2519 | 3536 | return mask; |
---|
2520 | 3537 | } |
---|
.. | .. |
---|
2606 | 3623 | return true; |
---|
2607 | 3624 | |
---|
2608 | 3625 | /* Not hardwall and node outside mems_allowed: scan up cpusets */ |
---|
2609 | | - raw_spin_lock_irqsave(&callback_lock, flags); |
---|
| 3626 | + spin_lock_irqsave(&callback_lock, flags); |
---|
2610 | 3627 | |
---|
2611 | 3628 | rcu_read_lock(); |
---|
2612 | 3629 | cs = nearest_hardwall_ancestor(task_cs(current)); |
---|
2613 | 3630 | allowed = node_isset(node, cs->mems_allowed); |
---|
2614 | 3631 | rcu_read_unlock(); |
---|
2615 | 3632 | |
---|
2616 | | - raw_spin_unlock_irqrestore(&callback_lock, flags); |
---|
| 3633 | + spin_unlock_irqrestore(&callback_lock, flags); |
---|
2617 | 3634 | return allowed; |
---|
2618 | 3635 | } |
---|
2619 | 3636 | |
---|
.. | .. |
---|
2699 | 3716 | rcu_read_lock(); |
---|
2700 | 3717 | |
---|
2701 | 3718 | cgrp = task_cs(current)->css.cgroup; |
---|
2702 | | - pr_info("%s cpuset=", current->comm); |
---|
| 3719 | + pr_cont(",cpuset="); |
---|
2703 | 3720 | pr_cont_cgroup_name(cgrp); |
---|
2704 | | - pr_cont(" mems_allowed=%*pbl\n", |
---|
| 3721 | + pr_cont(",mems_allowed=%*pbl", |
---|
2705 | 3722 | nodemask_pr_args(¤t->mems_allowed)); |
---|
2706 | 3723 | |
---|
2707 | 3724 | rcu_read_unlock(); |
---|