| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | #include "cgroup-internal.h" |
|---|
| 2 | 3 | |
|---|
| 3 | 4 | #include <linux/ctype.h> |
|---|
| .. | .. |
|---|
| 13 | 14 | #include <linux/delayacct.h> |
|---|
| 14 | 15 | #include <linux/pid_namespace.h> |
|---|
| 15 | 16 | #include <linux/cgroupstats.h> |
|---|
| 17 | +#include <linux/fs_parser.h> |
|---|
| 16 | 18 | |
|---|
| 17 | 19 | #include <trace/events/cgroup.h> |
|---|
| 20 | +#include <trace/hooks/cgroup.h> |
|---|
| 18 | 21 | |
|---|
| 19 | 22 | /* |
|---|
| 20 | 23 | * pidlists linger the following amount before being destroyed. The goal |
|---|
| .. | .. |
|---|
| 36 | 39 | */ |
|---|
| 37 | 40 | static struct workqueue_struct *cgroup_pidlist_destroy_wq; |
|---|
| 38 | 41 | |
|---|
| 39 | | -/* |
|---|
| 40 | | - * Protects cgroup_subsys->release_agent_path. Modifying it also requires |
|---|
| 41 | | - * cgroup_mutex. Reading requires either cgroup_mutex or this spinlock. |
|---|
| 42 | | - */ |
|---|
| 42 | +/* protects cgroup_subsys->release_agent_path */ |
|---|
| 43 | 43 | static DEFINE_SPINLOCK(release_agent_path_lock); |
|---|
| 44 | 44 | |
|---|
| 45 | 45 | bool cgroup1_ssid_disabled(int ssid) |
|---|
| .. | .. |
|---|
| 58 | 58 | int retval = 0; |
|---|
| 59 | 59 | |
|---|
| 60 | 60 | mutex_lock(&cgroup_mutex); |
|---|
| 61 | + cpus_read_lock(); |
|---|
| 61 | 62 | percpu_down_write(&cgroup_threadgroup_rwsem); |
|---|
| 62 | 63 | for_each_root(root) { |
|---|
| 63 | 64 | struct cgroup *from_cgrp; |
|---|
| .. | .. |
|---|
| 74 | 75 | break; |
|---|
| 75 | 76 | } |
|---|
| 76 | 77 | percpu_up_write(&cgroup_threadgroup_rwsem); |
|---|
| 78 | + cpus_read_unlock(); |
|---|
| 77 | 79 | mutex_unlock(&cgroup_mutex); |
|---|
| 78 | 80 | |
|---|
| 79 | 81 | return retval; |
|---|
| .. | .. |
|---|
| 190 | 192 | }; |
|---|
| 191 | 193 | |
|---|
| 192 | 194 | /* |
|---|
| 193 | | - * The following two functions "fix" the issue where there are more pids |
|---|
| 194 | | - * than kmalloc will give memory for; in such cases, we use vmalloc/vfree. |
|---|
| 195 | | - * TODO: replace with a kernel-wide solution to this problem |
|---|
| 196 | | - */ |
|---|
| 197 | | -#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2)) |
|---|
| 198 | | -static void *pidlist_allocate(int count) |
|---|
| 199 | | -{ |
|---|
| 200 | | - if (PIDLIST_TOO_LARGE(count)) |
|---|
| 201 | | - return vmalloc(array_size(count, sizeof(pid_t))); |
|---|
| 202 | | - else |
|---|
| 203 | | - return kmalloc_array(count, sizeof(pid_t), GFP_KERNEL); |
|---|
| 204 | | -} |
|---|
| 205 | | - |
|---|
| 206 | | -static void pidlist_free(void *p) |
|---|
| 207 | | -{ |
|---|
| 208 | | - kvfree(p); |
|---|
| 209 | | -} |
|---|
| 210 | | - |
|---|
| 211 | | -/* |
|---|
| 212 | 195 | * Used to destroy all pidlists lingering waiting for destroy timer. None |
|---|
| 213 | 196 | * should be left afterwards. |
|---|
| 214 | 197 | */ |
|---|
| .. | .. |
|---|
| 240 | 223 | */ |
|---|
| 241 | 224 | if (!delayed_work_pending(dwork)) { |
|---|
| 242 | 225 | list_del(&l->links); |
|---|
| 243 | | - pidlist_free(l->list); |
|---|
| 226 | + kvfree(l->list); |
|---|
| 244 | 227 | put_pid_ns(l->key.ns); |
|---|
| 245 | 228 | tofree = l; |
|---|
| 246 | 229 | } |
|---|
| .. | .. |
|---|
| 361 | 344 | * show up until sometime later on. |
|---|
| 362 | 345 | */ |
|---|
| 363 | 346 | length = cgroup_task_count(cgrp); |
|---|
| 364 | | - array = pidlist_allocate(length); |
|---|
| 347 | + array = kvmalloc_array(length, sizeof(pid_t), GFP_KERNEL); |
|---|
| 365 | 348 | if (!array) |
|---|
| 366 | 349 | return -ENOMEM; |
|---|
| 367 | 350 | /* now, populate the array */ |
|---|
| .. | .. |
|---|
| 386 | 369 | |
|---|
| 387 | 370 | l = cgroup_pidlist_find_create(cgrp, type); |
|---|
| 388 | 371 | if (!l) { |
|---|
| 389 | | - pidlist_free(array); |
|---|
| 372 | + kvfree(array); |
|---|
| 390 | 373 | return -ENOMEM; |
|---|
| 391 | 374 | } |
|---|
| 392 | 375 | |
|---|
| 393 | 376 | /* store array, freeing old if necessary */ |
|---|
| 394 | | - pidlist_free(l->list); |
|---|
| 377 | + kvfree(l->list); |
|---|
| 395 | 378 | l->list = array; |
|---|
| 396 | 379 | l->length = length; |
|---|
| 397 | 380 | *lp = l; |
|---|
| .. | .. |
|---|
| 413 | 396 | * next pid to display, if any |
|---|
| 414 | 397 | */ |
|---|
| 415 | 398 | struct kernfs_open_file *of = s->private; |
|---|
| 399 | + struct cgroup_file_ctx *ctx = of->priv; |
|---|
| 416 | 400 | struct cgroup *cgrp = seq_css(s)->cgroup; |
|---|
| 417 | 401 | struct cgroup_pidlist *l; |
|---|
| 418 | 402 | enum cgroup_filetype type = seq_cft(s)->private; |
|---|
| .. | .. |
|---|
| 422 | 406 | mutex_lock(&cgrp->pidlist_mutex); |
|---|
| 423 | 407 | |
|---|
| 424 | 408 | /* |
|---|
| 425 | | - * !NULL @of->priv indicates that this isn't the first start() |
|---|
| 426 | | - * after open. If the matching pidlist is around, we can use that. |
|---|
| 427 | | - * Look for it. Note that @of->priv can't be used directly. It |
|---|
| 428 | | - * could already have been destroyed. |
|---|
| 409 | + * !NULL @ctx->procs1.pidlist indicates that this isn't the first |
|---|
| 410 | + * start() after open. If the matching pidlist is around, we can use |
|---|
| 411 | + * that. Look for it. Note that @ctx->procs1.pidlist can't be used |
|---|
| 412 | + * directly. It could already have been destroyed. |
|---|
| 429 | 413 | */ |
|---|
| 430 | | - if (of->priv) |
|---|
| 431 | | - of->priv = cgroup_pidlist_find(cgrp, type); |
|---|
| 414 | + if (ctx->procs1.pidlist) |
|---|
| 415 | + ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type); |
|---|
| 432 | 416 | |
|---|
| 433 | 417 | /* |
|---|
| 434 | 418 | * Either this is the first start() after open or the matching |
|---|
| 435 | 419 | * pidlist has been destroyed inbetween. Create a new one. |
|---|
| 436 | 420 | */ |
|---|
| 437 | | - if (!of->priv) { |
|---|
| 438 | | - ret = pidlist_array_load(cgrp, type, |
|---|
| 439 | | - (struct cgroup_pidlist **)&of->priv); |
|---|
| 421 | + if (!ctx->procs1.pidlist) { |
|---|
| 422 | + ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist); |
|---|
| 440 | 423 | if (ret) |
|---|
| 441 | 424 | return ERR_PTR(ret); |
|---|
| 442 | 425 | } |
|---|
| 443 | | - l = of->priv; |
|---|
| 426 | + l = ctx->procs1.pidlist; |
|---|
| 444 | 427 | |
|---|
| 445 | 428 | if (pid) { |
|---|
| 446 | 429 | int end = l->length; |
|---|
| .. | .. |
|---|
| 468 | 451 | static void cgroup_pidlist_stop(struct seq_file *s, void *v) |
|---|
| 469 | 452 | { |
|---|
| 470 | 453 | struct kernfs_open_file *of = s->private; |
|---|
| 471 | | - struct cgroup_pidlist *l = of->priv; |
|---|
| 454 | + struct cgroup_file_ctx *ctx = of->priv; |
|---|
| 455 | + struct cgroup_pidlist *l = ctx->procs1.pidlist; |
|---|
| 472 | 456 | |
|---|
| 473 | 457 | if (l) |
|---|
| 474 | 458 | mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, |
|---|
| .. | .. |
|---|
| 479 | 463 | static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) |
|---|
| 480 | 464 | { |
|---|
| 481 | 465 | struct kernfs_open_file *of = s->private; |
|---|
| 482 | | - struct cgroup_pidlist *l = of->priv; |
|---|
| 466 | + struct cgroup_file_ctx *ctx = of->priv; |
|---|
| 467 | + struct cgroup_pidlist *l = ctx->procs1.pidlist; |
|---|
| 483 | 468 | pid_t *p = v; |
|---|
| 484 | 469 | pid_t *end = l->list + l->length; |
|---|
| 485 | 470 | /* |
|---|
| .. | .. |
|---|
| 511 | 496 | struct task_struct *task; |
|---|
| 512 | 497 | const struct cred *cred, *tcred; |
|---|
| 513 | 498 | ssize_t ret; |
|---|
| 499 | + bool locked; |
|---|
| 514 | 500 | |
|---|
| 515 | 501 | cgrp = cgroup_kn_lock_live(of->kn, false); |
|---|
| 516 | 502 | if (!cgrp) |
|---|
| 517 | 503 | return -ENODEV; |
|---|
| 518 | 504 | |
|---|
| 519 | | - task = cgroup_procs_write_start(buf, threadgroup); |
|---|
| 505 | + task = cgroup_procs_write_start(buf, threadgroup, &locked, cgrp); |
|---|
| 520 | 506 | ret = PTR_ERR_OR_ZERO(task); |
|---|
| 521 | 507 | if (ret) |
|---|
| 522 | 508 | goto out_unlock; |
|---|
| 523 | 509 | |
|---|
| 524 | 510 | /* |
|---|
| 525 | | - * Even if we're attaching all tasks in the thread group, we only |
|---|
| 526 | | - * need to check permissions on one of them. |
|---|
| 511 | + * Even if we're attaching all tasks in the thread group, we only need |
|---|
| 512 | + * to check permissions on one of them. Check permissions using the |
|---|
| 513 | + * credentials from file open to protect against inherited fd attacks. |
|---|
| 527 | 514 | */ |
|---|
| 528 | | - cred = current_cred(); |
|---|
| 515 | + cred = of->file->f_cred; |
|---|
| 529 | 516 | tcred = get_task_cred(task); |
|---|
| 530 | 517 | if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && |
|---|
| 531 | 518 | !uid_eq(cred->euid, tcred->uid) && |
|---|
| .. | .. |
|---|
| 537 | 524 | goto out_finish; |
|---|
| 538 | 525 | |
|---|
| 539 | 526 | ret = cgroup_attach_task(cgrp, task, threadgroup); |
|---|
| 527 | + trace_android_vh_cgroup_set_task(ret, task); |
|---|
| 540 | 528 | |
|---|
| 541 | 529 | out_finish: |
|---|
| 542 | | - cgroup_procs_write_finish(task); |
|---|
| 530 | + cgroup_procs_write_finish(task, locked); |
|---|
| 543 | 531 | out_unlock: |
|---|
| 544 | 532 | cgroup_kn_unlock(of->kn); |
|---|
| 545 | 533 | |
|---|
| .. | .. |
|---|
| 562 | 550 | char *buf, size_t nbytes, loff_t off) |
|---|
| 563 | 551 | { |
|---|
| 564 | 552 | struct cgroup *cgrp; |
|---|
| 553 | + struct cgroup_file_ctx *ctx; |
|---|
| 565 | 554 | |
|---|
| 566 | 555 | BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); |
|---|
| 567 | 556 | |
|---|
| .. | .. |
|---|
| 569 | 558 | * Release agent gets called with all capabilities, |
|---|
| 570 | 559 | * require capabilities to set release agent. |
|---|
| 571 | 560 | */ |
|---|
| 572 | | - if ((of->file->f_cred->user_ns != &init_user_ns) || |
|---|
| 573 | | - !capable(CAP_SYS_ADMIN)) |
|---|
| 561 | + ctx = of->priv; |
|---|
| 562 | + if ((ctx->ns->user_ns != &init_user_ns) || |
|---|
| 563 | + !file_ns_capable(of->file, &init_user_ns, CAP_SYS_ADMIN)) |
|---|
| 574 | 564 | return -EPERM; |
|---|
| 575 | 565 | |
|---|
| 576 | 566 | cgrp = cgroup_kn_lock_live(of->kn, false); |
|---|
| .. | .. |
|---|
| 800 | 790 | { |
|---|
| 801 | 791 | struct cgroup *cgrp = |
|---|
| 802 | 792 | container_of(work, struct cgroup, release_agent_work); |
|---|
| 803 | | - char *pathbuf = NULL, *agentbuf = NULL; |
|---|
| 793 | + char *pathbuf, *agentbuf; |
|---|
| 804 | 794 | char *argv[3], *envp[3]; |
|---|
| 805 | 795 | int ret; |
|---|
| 806 | 796 | |
|---|
| 807 | | - mutex_lock(&cgroup_mutex); |
|---|
| 797 | + /* snoop agent path and exit early if empty */ |
|---|
| 798 | + if (!cgrp->root->release_agent_path[0]) |
|---|
| 799 | + return; |
|---|
| 808 | 800 | |
|---|
| 801 | + /* prepare argument buffers */ |
|---|
| 809 | 802 | pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); |
|---|
| 810 | | - agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL); |
|---|
| 811 | | - if (!pathbuf || !agentbuf || !strlen(agentbuf)) |
|---|
| 812 | | - goto out; |
|---|
| 803 | + agentbuf = kmalloc(PATH_MAX, GFP_KERNEL); |
|---|
| 804 | + if (!pathbuf || !agentbuf) |
|---|
| 805 | + goto out_free; |
|---|
| 813 | 806 | |
|---|
| 814 | | - spin_lock_irq(&css_set_lock); |
|---|
| 815 | | - ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns); |
|---|
| 816 | | - spin_unlock_irq(&css_set_lock); |
|---|
| 807 | + spin_lock(&release_agent_path_lock); |
|---|
| 808 | + strlcpy(agentbuf, cgrp->root->release_agent_path, PATH_MAX); |
|---|
| 809 | + spin_unlock(&release_agent_path_lock); |
|---|
| 810 | + if (!agentbuf[0]) |
|---|
| 811 | + goto out_free; |
|---|
| 812 | + |
|---|
| 813 | + ret = cgroup_path_ns(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns); |
|---|
| 817 | 814 | if (ret < 0 || ret >= PATH_MAX) |
|---|
| 818 | | - goto out; |
|---|
| 815 | + goto out_free; |
|---|
| 819 | 816 | |
|---|
| 820 | 817 | argv[0] = agentbuf; |
|---|
| 821 | 818 | argv[1] = pathbuf; |
|---|
| .. | .. |
|---|
| 826 | 823 | envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; |
|---|
| 827 | 824 | envp[2] = NULL; |
|---|
| 828 | 825 | |
|---|
| 829 | | - mutex_unlock(&cgroup_mutex); |
|---|
| 830 | 826 | call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); |
|---|
| 831 | | - goto out_free; |
|---|
| 832 | | -out: |
|---|
| 833 | | - mutex_unlock(&cgroup_mutex); |
|---|
| 834 | 827 | out_free: |
|---|
| 835 | 828 | kfree(agentbuf); |
|---|
| 836 | 829 | kfree(pathbuf); |
|---|
| .. | .. |
|---|
| 904 | 897 | return 0; |
|---|
| 905 | 898 | } |
|---|
| 906 | 899 | |
|---|
| 907 | | -static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) |
|---|
| 900 | +enum cgroup1_param { |
|---|
| 901 | + Opt_all, |
|---|
| 902 | + Opt_clone_children, |
|---|
| 903 | + Opt_cpuset_v2_mode, |
|---|
| 904 | + Opt_name, |
|---|
| 905 | + Opt_none, |
|---|
| 906 | + Opt_noprefix, |
|---|
| 907 | + Opt_release_agent, |
|---|
| 908 | + Opt_xattr, |
|---|
| 909 | +}; |
|---|
| 910 | + |
|---|
| 911 | +const struct fs_parameter_spec cgroup1_fs_parameters[] = { |
|---|
| 912 | + fsparam_flag ("all", Opt_all), |
|---|
| 913 | + fsparam_flag ("clone_children", Opt_clone_children), |
|---|
| 914 | + fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode), |
|---|
| 915 | + fsparam_string("name", Opt_name), |
|---|
| 916 | + fsparam_flag ("none", Opt_none), |
|---|
| 917 | + fsparam_flag ("noprefix", Opt_noprefix), |
|---|
| 918 | + fsparam_string("release_agent", Opt_release_agent), |
|---|
| 919 | + fsparam_flag ("xattr", Opt_xattr), |
|---|
| 920 | + {} |
|---|
| 921 | +}; |
|---|
| 922 | + |
|---|
| 923 | +int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) |
|---|
| 908 | 924 | { |
|---|
| 909 | | - char *token, *o = data; |
|---|
| 910 | | - bool all_ss = false, one_ss = false; |
|---|
| 911 | | - u16 mask = U16_MAX; |
|---|
| 925 | + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); |
|---|
| 912 | 926 | struct cgroup_subsys *ss; |
|---|
| 913 | | - int nr_opts = 0; |
|---|
| 927 | + struct fs_parse_result result; |
|---|
| 928 | + int opt, i; |
|---|
| 929 | + |
|---|
| 930 | + opt = fs_parse(fc, cgroup1_fs_parameters, param, &result); |
|---|
| 931 | + if (opt == -ENOPARAM) { |
|---|
| 932 | + if (strcmp(param->key, "source") == 0) { |
|---|
| 933 | + if (param->type != fs_value_is_string) |
|---|
| 934 | + return invalf(fc, "Non-string source"); |
|---|
| 935 | + if (fc->source) |
|---|
| 936 | + return invalf(fc, "Multiple sources not supported"); |
|---|
| 937 | + fc->source = param->string; |
|---|
| 938 | + param->string = NULL; |
|---|
| 939 | + return 0; |
|---|
| 940 | + } |
|---|
| 941 | + for_each_subsys(ss, i) { |
|---|
| 942 | + if (strcmp(param->key, ss->legacy_name)) |
|---|
| 943 | + continue; |
|---|
| 944 | + if (!cgroup_ssid_enabled(i) || cgroup1_ssid_disabled(i)) |
|---|
| 945 | + return invalfc(fc, "Disabled controller '%s'", |
|---|
| 946 | + param->key); |
|---|
| 947 | + ctx->subsys_mask |= (1 << i); |
|---|
| 948 | + return 0; |
|---|
| 949 | + } |
|---|
| 950 | + return invalfc(fc, "Unknown subsys name '%s'", param->key); |
|---|
| 951 | + } |
|---|
| 952 | + if (opt < 0) |
|---|
| 953 | + return opt; |
|---|
| 954 | + |
|---|
| 955 | + switch (opt) { |
|---|
| 956 | + case Opt_none: |
|---|
| 957 | + /* Explicitly have no subsystems */ |
|---|
| 958 | + ctx->none = true; |
|---|
| 959 | + break; |
|---|
| 960 | + case Opt_all: |
|---|
| 961 | + ctx->all_ss = true; |
|---|
| 962 | + break; |
|---|
| 963 | + case Opt_noprefix: |
|---|
| 964 | + ctx->flags |= CGRP_ROOT_NOPREFIX; |
|---|
| 965 | + break; |
|---|
| 966 | + case Opt_clone_children: |
|---|
| 967 | + ctx->cpuset_clone_children = true; |
|---|
| 968 | + break; |
|---|
| 969 | + case Opt_cpuset_v2_mode: |
|---|
| 970 | + ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE; |
|---|
| 971 | + break; |
|---|
| 972 | + case Opt_xattr: |
|---|
| 973 | + ctx->flags |= CGRP_ROOT_XATTR; |
|---|
| 974 | + break; |
|---|
| 975 | + case Opt_release_agent: |
|---|
| 976 | + /* Specifying two release agents is forbidden */ |
|---|
| 977 | + if (ctx->release_agent) |
|---|
| 978 | + return invalfc(fc, "release_agent respecified"); |
|---|
| 979 | + /* |
|---|
| 980 | + * Release agent gets called with all capabilities, |
|---|
| 981 | + * require capabilities to set release agent. |
|---|
| 982 | + */ |
|---|
| 983 | + if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN)) |
|---|
| 984 | + return invalfc(fc, "Setting release_agent not allowed"); |
|---|
| 985 | + ctx->release_agent = param->string; |
|---|
| 986 | + param->string = NULL; |
|---|
| 987 | + break; |
|---|
| 988 | + case Opt_name: |
|---|
| 989 | + /* blocked by boot param? */ |
|---|
| 990 | + if (cgroup_no_v1_named) |
|---|
| 991 | + return -ENOENT; |
|---|
| 992 | + /* Can't specify an empty name */ |
|---|
| 993 | + if (!param->size) |
|---|
| 994 | + return invalfc(fc, "Empty name"); |
|---|
| 995 | + if (param->size > MAX_CGROUP_ROOT_NAMELEN - 1) |
|---|
| 996 | + return invalfc(fc, "Name too long"); |
|---|
| 997 | + /* Must match [\w.-]+ */ |
|---|
| 998 | + for (i = 0; i < param->size; i++) { |
|---|
| 999 | + char c = param->string[i]; |
|---|
| 1000 | + if (isalnum(c)) |
|---|
| 1001 | + continue; |
|---|
| 1002 | + if ((c == '.') || (c == '-') || (c == '_')) |
|---|
| 1003 | + continue; |
|---|
| 1004 | + return invalfc(fc, "Invalid name"); |
|---|
| 1005 | + } |
|---|
| 1006 | + /* Specifying two names is forbidden */ |
|---|
| 1007 | + if (ctx->name) |
|---|
| 1008 | + return invalfc(fc, "name respecified"); |
|---|
| 1009 | + ctx->name = param->string; |
|---|
| 1010 | + param->string = NULL; |
|---|
| 1011 | + break; |
|---|
| 1012 | + } |
|---|
| 1013 | + return 0; |
|---|
| 1014 | +} |
|---|
| 1015 | + |
|---|
| 1016 | +static int check_cgroupfs_options(struct fs_context *fc) |
|---|
| 1017 | +{ |
|---|
| 1018 | + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); |
|---|
| 1019 | + u16 mask = U16_MAX; |
|---|
| 1020 | + u16 enabled = 0; |
|---|
| 1021 | + struct cgroup_subsys *ss; |
|---|
| 914 | 1022 | int i; |
|---|
| 915 | 1023 | |
|---|
| 916 | 1024 | #ifdef CONFIG_CPUSETS |
|---|
| 917 | 1025 | mask = ~((u16)1 << cpuset_cgrp_id); |
|---|
| 918 | 1026 | #endif |
|---|
| 1027 | + for_each_subsys(ss, i) |
|---|
| 1028 | + if (cgroup_ssid_enabled(i) && !cgroup1_ssid_disabled(i)) |
|---|
| 1029 | + enabled |= 1 << i; |
|---|
| 919 | 1030 | |
|---|
| 920 | | - memset(opts, 0, sizeof(*opts)); |
|---|
| 921 | | - |
|---|
| 922 | | - while ((token = strsep(&o, ",")) != NULL) { |
|---|
| 923 | | - nr_opts++; |
|---|
| 924 | | - |
|---|
| 925 | | - if (!*token) |
|---|
| 926 | | - return -EINVAL; |
|---|
| 927 | | - if (!strcmp(token, "none")) { |
|---|
| 928 | | - /* Explicitly have no subsystems */ |
|---|
| 929 | | - opts->none = true; |
|---|
| 930 | | - continue; |
|---|
| 931 | | - } |
|---|
| 932 | | - if (!strcmp(token, "all")) { |
|---|
| 933 | | - /* Mutually exclusive option 'all' + subsystem name */ |
|---|
| 934 | | - if (one_ss) |
|---|
| 935 | | - return -EINVAL; |
|---|
| 936 | | - all_ss = true; |
|---|
| 937 | | - continue; |
|---|
| 938 | | - } |
|---|
| 939 | | - if (!strcmp(token, "noprefix")) { |
|---|
| 940 | | - opts->flags |= CGRP_ROOT_NOPREFIX; |
|---|
| 941 | | - continue; |
|---|
| 942 | | - } |
|---|
| 943 | | - if (!strcmp(token, "clone_children")) { |
|---|
| 944 | | - opts->cpuset_clone_children = true; |
|---|
| 945 | | - continue; |
|---|
| 946 | | - } |
|---|
| 947 | | - if (!strcmp(token, "cpuset_v2_mode")) { |
|---|
| 948 | | - opts->flags |= CGRP_ROOT_CPUSET_V2_MODE; |
|---|
| 949 | | - continue; |
|---|
| 950 | | - } |
|---|
| 951 | | - if (!strcmp(token, "xattr")) { |
|---|
| 952 | | - opts->flags |= CGRP_ROOT_XATTR; |
|---|
| 953 | | - continue; |
|---|
| 954 | | - } |
|---|
| 955 | | - if (!strncmp(token, "release_agent=", 14)) { |
|---|
| 956 | | - /* Specifying two release agents is forbidden */ |
|---|
| 957 | | - if (opts->release_agent) |
|---|
| 958 | | - return -EINVAL; |
|---|
| 959 | | - opts->release_agent = |
|---|
| 960 | | - kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); |
|---|
| 961 | | - if (!opts->release_agent) |
|---|
| 962 | | - return -ENOMEM; |
|---|
| 963 | | - continue; |
|---|
| 964 | | - } |
|---|
| 965 | | - if (!strncmp(token, "name=", 5)) { |
|---|
| 966 | | - const char *name = token + 5; |
|---|
| 967 | | - |
|---|
| 968 | | - /* blocked by boot param? */ |
|---|
| 969 | | - if (cgroup_no_v1_named) |
|---|
| 970 | | - return -ENOENT; |
|---|
| 971 | | - /* Can't specify an empty name */ |
|---|
| 972 | | - if (!strlen(name)) |
|---|
| 973 | | - return -EINVAL; |
|---|
| 974 | | - /* Must match [\w.-]+ */ |
|---|
| 975 | | - for (i = 0; i < strlen(name); i++) { |
|---|
| 976 | | - char c = name[i]; |
|---|
| 977 | | - if (isalnum(c)) |
|---|
| 978 | | - continue; |
|---|
| 979 | | - if ((c == '.') || (c == '-') || (c == '_')) |
|---|
| 980 | | - continue; |
|---|
| 981 | | - return -EINVAL; |
|---|
| 982 | | - } |
|---|
| 983 | | - /* Specifying two names is forbidden */ |
|---|
| 984 | | - if (opts->name) |
|---|
| 985 | | - return -EINVAL; |
|---|
| 986 | | - opts->name = kstrndup(name, |
|---|
| 987 | | - MAX_CGROUP_ROOT_NAMELEN - 1, |
|---|
| 988 | | - GFP_KERNEL); |
|---|
| 989 | | - if (!opts->name) |
|---|
| 990 | | - return -ENOMEM; |
|---|
| 991 | | - |
|---|
| 992 | | - continue; |
|---|
| 993 | | - } |
|---|
| 994 | | - |
|---|
| 995 | | - for_each_subsys(ss, i) { |
|---|
| 996 | | - if (strcmp(token, ss->legacy_name)) |
|---|
| 997 | | - continue; |
|---|
| 998 | | - if (!cgroup_ssid_enabled(i)) |
|---|
| 999 | | - continue; |
|---|
| 1000 | | - if (cgroup1_ssid_disabled(i)) |
|---|
| 1001 | | - continue; |
|---|
| 1002 | | - |
|---|
| 1003 | | - /* Mutually exclusive option 'all' + subsystem name */ |
|---|
| 1004 | | - if (all_ss) |
|---|
| 1005 | | - return -EINVAL; |
|---|
| 1006 | | - opts->subsys_mask |= (1 << i); |
|---|
| 1007 | | - one_ss = true; |
|---|
| 1008 | | - |
|---|
| 1009 | | - break; |
|---|
| 1010 | | - } |
|---|
| 1011 | | - if (i == CGROUP_SUBSYS_COUNT) |
|---|
| 1012 | | - return -ENOENT; |
|---|
| 1013 | | - } |
|---|
| 1031 | + ctx->subsys_mask &= enabled; |
|---|
| 1014 | 1032 | |
|---|
| 1015 | 1033 | /* |
|---|
| 1016 | | - * If the 'all' option was specified select all the subsystems, |
|---|
| 1017 | | - * otherwise if 'none', 'name=' and a subsystem name options were |
|---|
| 1018 | | - * not specified, let's default to 'all' |
|---|
| 1034 | + * In absense of 'none', 'name=' or subsystem name options, |
|---|
| 1035 | + * let's default to 'all'. |
|---|
| 1019 | 1036 | */ |
|---|
| 1020 | | - if (all_ss || (!one_ss && !opts->none && !opts->name)) |
|---|
| 1021 | | - for_each_subsys(ss, i) |
|---|
| 1022 | | - if (cgroup_ssid_enabled(i) && !cgroup1_ssid_disabled(i)) |
|---|
| 1023 | | - opts->subsys_mask |= (1 << i); |
|---|
| 1037 | + if (!ctx->subsys_mask && !ctx->none && !ctx->name) |
|---|
| 1038 | + ctx->all_ss = true; |
|---|
| 1039 | + |
|---|
| 1040 | + if (ctx->all_ss) { |
|---|
| 1041 | + /* Mutually exclusive option 'all' + subsystem name */ |
|---|
| 1042 | + if (ctx->subsys_mask) |
|---|
| 1043 | + return invalfc(fc, "subsys name conflicts with all"); |
|---|
| 1044 | + /* 'all' => select all the subsystems */ |
|---|
| 1045 | + ctx->subsys_mask = enabled; |
|---|
| 1046 | + } |
|---|
| 1024 | 1047 | |
|---|
| 1025 | 1048 | /* |
|---|
| 1026 | 1049 | * We either have to specify by name or by subsystems. (So all |
|---|
| 1027 | 1050 | * empty hierarchies must have a name). |
|---|
| 1028 | 1051 | */ |
|---|
| 1029 | | - if (!opts->subsys_mask && !opts->name) |
|---|
| 1030 | | - return -EINVAL; |
|---|
| 1052 | + if (!ctx->subsys_mask && !ctx->name) |
|---|
| 1053 | + return invalfc(fc, "Need name or subsystem set"); |
|---|
| 1031 | 1054 | |
|---|
| 1032 | 1055 | /* |
|---|
| 1033 | 1056 | * Option noprefix was introduced just for backward compatibility |
|---|
| 1034 | 1057 | * with the old cpuset, so we allow noprefix only if mounting just |
|---|
| 1035 | 1058 | * the cpuset subsystem. |
|---|
| 1036 | 1059 | */ |
|---|
| 1037 | | - if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask)) |
|---|
| 1038 | | - return -EINVAL; |
|---|
| 1060 | + if ((ctx->flags & CGRP_ROOT_NOPREFIX) && (ctx->subsys_mask & mask)) |
|---|
| 1061 | + return invalfc(fc, "noprefix used incorrectly"); |
|---|
| 1039 | 1062 | |
|---|
| 1040 | 1063 | /* Can't specify "none" and some subsystems */ |
|---|
| 1041 | | - if (opts->subsys_mask && opts->none) |
|---|
| 1042 | | - return -EINVAL; |
|---|
| 1064 | + if (ctx->subsys_mask && ctx->none) |
|---|
| 1065 | + return invalfc(fc, "none used incorrectly"); |
|---|
| 1043 | 1066 | |
|---|
| 1044 | 1067 | return 0; |
|---|
| 1045 | 1068 | } |
|---|
| 1046 | 1069 | |
|---|
| 1047 | | -static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data) |
|---|
| 1070 | +int cgroup1_reconfigure(struct fs_context *fc) |
|---|
| 1048 | 1071 | { |
|---|
| 1049 | | - int ret = 0; |
|---|
| 1072 | + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); |
|---|
| 1073 | + struct kernfs_root *kf_root = kernfs_root_from_sb(fc->root->d_sb); |
|---|
| 1050 | 1074 | struct cgroup_root *root = cgroup_root_from_kf(kf_root); |
|---|
| 1051 | | - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; |
|---|
| 1052 | | - struct cgroup_sb_opts opts; |
|---|
| 1075 | + int ret = 0; |
|---|
| 1053 | 1076 | u16 added_mask, removed_mask; |
|---|
| 1054 | 1077 | |
|---|
| 1055 | 1078 | cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); |
|---|
| 1056 | 1079 | |
|---|
| 1057 | 1080 | /* See what subsystems are wanted */ |
|---|
| 1058 | | - ret = parse_cgroupfs_options(data, &opts); |
|---|
| 1081 | + ret = check_cgroupfs_options(fc); |
|---|
| 1059 | 1082 | if (ret) |
|---|
| 1060 | 1083 | goto out_unlock; |
|---|
| 1061 | 1084 | |
|---|
| 1062 | | - if (opts.subsys_mask != root->subsys_mask || opts.release_agent) |
|---|
| 1085 | + if (ctx->subsys_mask != root->subsys_mask || ctx->release_agent) |
|---|
| 1063 | 1086 | pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n", |
|---|
| 1064 | 1087 | task_tgid_nr(current), current->comm); |
|---|
| 1065 | | - /* See cgroup1_mount release_agent handling */ |
|---|
| 1066 | | - if (opts.release_agent && |
|---|
| 1067 | | - ((ns->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN))) { |
|---|
| 1068 | | - ret = -EINVAL; |
|---|
| 1069 | | - goto out_unlock; |
|---|
| 1070 | | - } |
|---|
| 1071 | 1088 | |
|---|
| 1072 | | - added_mask = opts.subsys_mask & ~root->subsys_mask; |
|---|
| 1073 | | - removed_mask = root->subsys_mask & ~opts.subsys_mask; |
|---|
| 1089 | + added_mask = ctx->subsys_mask & ~root->subsys_mask; |
|---|
| 1090 | + removed_mask = root->subsys_mask & ~ctx->subsys_mask; |
|---|
| 1074 | 1091 | |
|---|
| 1075 | 1092 | /* Don't allow flags or name to change at remount */ |
|---|
| 1076 | | - if ((opts.flags ^ root->flags) || |
|---|
| 1077 | | - (opts.name && strcmp(opts.name, root->name))) { |
|---|
| 1078 | | - pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n", |
|---|
| 1079 | | - opts.flags, opts.name ?: "", root->flags, root->name); |
|---|
| 1093 | + if ((ctx->flags ^ root->flags) || |
|---|
| 1094 | + (ctx->name && strcmp(ctx->name, root->name))) { |
|---|
| 1095 | + errorfc(fc, "option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"", |
|---|
| 1096 | + ctx->flags, ctx->name ?: "", root->flags, root->name); |
|---|
| 1080 | 1097 | ret = -EINVAL; |
|---|
| 1081 | 1098 | goto out_unlock; |
|---|
| 1082 | 1099 | } |
|---|
| .. | .. |
|---|
| 1093 | 1110 | |
|---|
| 1094 | 1111 | WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask)); |
|---|
| 1095 | 1112 | |
|---|
| 1096 | | - if (opts.release_agent) { |
|---|
| 1113 | + if (ctx->release_agent) { |
|---|
| 1097 | 1114 | spin_lock(&release_agent_path_lock); |
|---|
| 1098 | | - strcpy(root->release_agent_path, opts.release_agent); |
|---|
| 1115 | + strcpy(root->release_agent_path, ctx->release_agent); |
|---|
| 1099 | 1116 | spin_unlock(&release_agent_path_lock); |
|---|
| 1100 | 1117 | } |
|---|
| 1101 | 1118 | |
|---|
| 1102 | 1119 | trace_cgroup_remount(root); |
|---|
| 1103 | 1120 | |
|---|
| 1104 | 1121 | out_unlock: |
|---|
| 1105 | | - kfree(opts.release_agent); |
|---|
| 1106 | | - kfree(opts.name); |
|---|
| 1107 | 1122 | mutex_unlock(&cgroup_mutex); |
|---|
| 1108 | 1123 | return ret; |
|---|
| 1109 | 1124 | } |
|---|
| .. | .. |
|---|
| 1111 | 1126 | struct kernfs_syscall_ops cgroup1_kf_syscall_ops = { |
|---|
| 1112 | 1127 | .rename = cgroup1_rename, |
|---|
| 1113 | 1128 | .show_options = cgroup1_show_options, |
|---|
| 1114 | | - .remount_fs = cgroup1_remount, |
|---|
| 1115 | 1129 | .mkdir = cgroup_mkdir, |
|---|
| 1116 | 1130 | .rmdir = cgroup_rmdir, |
|---|
| 1117 | 1131 | .show_path = cgroup_show_path, |
|---|
| 1118 | 1132 | }; |
|---|
| 1119 | 1133 | |
|---|
| 1120 | | -struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, |
|---|
| 1121 | | - void *data, unsigned long magic, |
|---|
| 1122 | | - struct cgroup_namespace *ns) |
|---|
| 1134 | +/* |
|---|
| 1135 | + * The guts of cgroup1 mount - find or create cgroup_root to use. |
|---|
| 1136 | + * Called with cgroup_mutex held; returns 0 on success, -E... on |
|---|
| 1137 | + * error and positive - in case when the candidate is busy dying. |
|---|
| 1138 | + * On success it stashes a reference to cgroup_root into given |
|---|
| 1139 | + * cgroup_fs_context; that reference is *NOT* counting towards the |
|---|
| 1140 | + * cgroup_root refcount. |
|---|
| 1141 | + */ |
|---|
| 1142 | +static int cgroup1_root_to_use(struct fs_context *fc) |
|---|
| 1123 | 1143 | { |
|---|
| 1124 | | - struct cgroup_sb_opts opts; |
|---|
| 1144 | + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); |
|---|
| 1125 | 1145 | struct cgroup_root *root; |
|---|
| 1126 | 1146 | struct cgroup_subsys *ss; |
|---|
| 1127 | | - struct dentry *dentry; |
|---|
| 1128 | 1147 | int i, ret; |
|---|
| 1129 | 1148 | |
|---|
| 1130 | | - cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); |
|---|
| 1131 | | - |
|---|
| 1132 | 1149 | /* First find the desired set of subsystems */ |
|---|
| 1133 | | - ret = parse_cgroupfs_options(data, &opts); |
|---|
| 1150 | + ret = check_cgroupfs_options(fc); |
|---|
| 1134 | 1151 | if (ret) |
|---|
| 1135 | | - goto out_unlock; |
|---|
| 1152 | + return ret; |
|---|
| 1136 | 1153 | |
|---|
| 1137 | 1154 | /* |
|---|
| 1138 | 1155 | * Destruction of cgroup root is asynchronous, so subsystems may |
|---|
| .. | .. |
|---|
| 1142 | 1159 | * starting. Testing ref liveliness is good enough. |
|---|
| 1143 | 1160 | */ |
|---|
| 1144 | 1161 | for_each_subsys(ss, i) { |
|---|
| 1145 | | - if (!(opts.subsys_mask & (1 << i)) || |
|---|
| 1162 | + if (!(ctx->subsys_mask & (1 << i)) || |
|---|
| 1146 | 1163 | ss->root == &cgrp_dfl_root) |
|---|
| 1147 | 1164 | continue; |
|---|
| 1148 | 1165 | |
|---|
| 1149 | | - if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) { |
|---|
| 1150 | | - mutex_unlock(&cgroup_mutex); |
|---|
| 1151 | | - msleep(10); |
|---|
| 1152 | | - ret = restart_syscall(); |
|---|
| 1153 | | - goto out_free; |
|---|
| 1154 | | - } |
|---|
| 1166 | + if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) |
|---|
| 1167 | + return 1; /* restart */ |
|---|
| 1155 | 1168 | cgroup_put(&ss->root->cgrp); |
|---|
| 1156 | 1169 | } |
|---|
| 1157 | 1170 | |
|---|
| .. | .. |
|---|
| 1166 | 1179 | * name matches but sybsys_mask doesn't, we should fail. |
|---|
| 1167 | 1180 | * Remember whether name matched. |
|---|
| 1168 | 1181 | */ |
|---|
| 1169 | | - if (opts.name) { |
|---|
| 1170 | | - if (strcmp(opts.name, root->name)) |
|---|
| 1182 | + if (ctx->name) { |
|---|
| 1183 | + if (strcmp(ctx->name, root->name)) |
|---|
| 1171 | 1184 | continue; |
|---|
| 1172 | 1185 | name_match = true; |
|---|
| 1173 | 1186 | } |
|---|
| .. | .. |
|---|
| 1176 | 1189 | * If we asked for subsystems (or explicitly for no |
|---|
| 1177 | 1190 | * subsystems) then they must match. |
|---|
| 1178 | 1191 | */ |
|---|
| 1179 | | - if ((opts.subsys_mask || opts.none) && |
|---|
| 1180 | | - (opts.subsys_mask != root->subsys_mask)) { |
|---|
| 1192 | + if ((ctx->subsys_mask || ctx->none) && |
|---|
| 1193 | + (ctx->subsys_mask != root->subsys_mask)) { |
|---|
| 1181 | 1194 | if (!name_match) |
|---|
| 1182 | 1195 | continue; |
|---|
| 1183 | | - ret = -EBUSY; |
|---|
| 1184 | | - goto out_unlock; |
|---|
| 1196 | + return -EBUSY; |
|---|
| 1185 | 1197 | } |
|---|
| 1186 | 1198 | |
|---|
| 1187 | | - if (root->flags ^ opts.flags) |
|---|
| 1199 | + if (root->flags ^ ctx->flags) |
|---|
| 1188 | 1200 | pr_warn("new mount options do not match the existing superblock, will be ignored\n"); |
|---|
| 1189 | 1201 | |
|---|
| 1190 | | - ret = 0; |
|---|
| 1191 | | - goto out_unlock; |
|---|
| 1202 | + ctx->root = root; |
|---|
| 1203 | + return 0; |
|---|
| 1192 | 1204 | } |
|---|
| 1193 | 1205 | |
|---|
| 1194 | 1206 | /* |
|---|
| .. | .. |
|---|
| 1196 | 1208 | * specification is allowed for already existing hierarchies but we |
|---|
| 1197 | 1209 | * can't create new one without subsys specification. |
|---|
| 1198 | 1210 | */ |
|---|
| 1199 | | - if (!opts.subsys_mask && !opts.none) { |
|---|
| 1200 | | - ret = -EINVAL; |
|---|
| 1201 | | - goto out_unlock; |
|---|
| 1202 | | - } |
|---|
| 1211 | + if (!ctx->subsys_mask && !ctx->none) |
|---|
| 1212 | + return invalfc(fc, "No subsys list or none specified"); |
|---|
| 1203 | 1213 | |
|---|
| 1204 | 1214 | /* Hierarchies may only be created in the initial cgroup namespace. */ |
|---|
| 1205 | | - if (ns != &init_cgroup_ns) { |
|---|
| 1206 | | - ret = -EPERM; |
|---|
| 1207 | | - goto out_unlock; |
|---|
| 1208 | | - } |
|---|
| 1209 | | - /* |
|---|
| 1210 | | - * Release agent gets called with all capabilities, |
|---|
| 1211 | | - * require capabilities to set release agent. |
|---|
| 1212 | | - */ |
|---|
| 1213 | | - if (opts.release_agent && |
|---|
| 1214 | | - ((ns->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN))) { |
|---|
| 1215 | | - ret = -EINVAL; |
|---|
| 1216 | | - goto out_unlock; |
|---|
| 1217 | | - } |
|---|
| 1215 | + if (ctx->ns != &init_cgroup_ns) |
|---|
| 1216 | + return -EPERM; |
|---|
| 1218 | 1217 | |
|---|
| 1219 | 1218 | root = kzalloc(sizeof(*root), GFP_KERNEL); |
|---|
| 1220 | | - if (!root) { |
|---|
| 1221 | | - ret = -ENOMEM; |
|---|
| 1222 | | - goto out_unlock; |
|---|
| 1223 | | - } |
|---|
| 1219 | + if (!root) |
|---|
| 1220 | + return -ENOMEM; |
|---|
| 1224 | 1221 | |
|---|
| 1225 | | - init_cgroup_root(root, &opts); |
|---|
| 1222 | + ctx->root = root; |
|---|
| 1223 | + init_cgroup_root(ctx); |
|---|
| 1226 | 1224 | |
|---|
| 1227 | | - ret = cgroup_setup_root(root, opts.subsys_mask); |
|---|
| 1225 | + ret = cgroup_setup_root(root, ctx->subsys_mask); |
|---|
| 1228 | 1226 | if (ret) |
|---|
| 1229 | 1227 | cgroup_free_root(root); |
|---|
| 1228 | + return ret; |
|---|
| 1229 | +} |
|---|
| 1230 | 1230 | |
|---|
| 1231 | | -out_unlock: |
|---|
| 1232 | | - if (!ret && !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { |
|---|
| 1233 | | - mutex_unlock(&cgroup_mutex); |
|---|
| 1234 | | - msleep(10); |
|---|
| 1235 | | - ret = restart_syscall(); |
|---|
| 1236 | | - goto out_free; |
|---|
| 1237 | | - } |
|---|
| 1231 | +int cgroup1_get_tree(struct fs_context *fc) |
|---|
| 1232 | +{ |
|---|
| 1233 | + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); |
|---|
| 1234 | + int ret; |
|---|
| 1235 | + |
|---|
| 1236 | + /* Check if the caller has permission to mount. */ |
|---|
| 1237 | + if (!ns_capable(ctx->ns->user_ns, CAP_SYS_ADMIN)) |
|---|
| 1238 | + return -EPERM; |
|---|
| 1239 | + |
|---|
| 1240 | + cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); |
|---|
| 1241 | + |
|---|
| 1242 | + ret = cgroup1_root_to_use(fc); |
|---|
| 1243 | + if (!ret && !percpu_ref_tryget_live(&ctx->root->cgrp.self.refcnt)) |
|---|
| 1244 | + ret = 1; /* restart */ |
|---|
| 1245 | + |
|---|
| 1238 | 1246 | mutex_unlock(&cgroup_mutex); |
|---|
| 1239 | | -out_free: |
|---|
| 1240 | | - kfree(opts.release_agent); |
|---|
| 1241 | | - kfree(opts.name); |
|---|
| 1242 | 1247 | |
|---|
| 1243 | | - if (ret) |
|---|
| 1244 | | - return ERR_PTR(ret); |
|---|
| 1248 | + if (!ret) |
|---|
| 1249 | + ret = cgroup_do_get_tree(fc); |
|---|
| 1245 | 1250 | |
|---|
| 1246 | | - dentry = cgroup_do_mount(&cgroup_fs_type, flags, root, |
|---|
| 1247 | | - CGROUP_SUPER_MAGIC, ns); |
|---|
| 1248 | | - |
|---|
| 1249 | | - if (!IS_ERR(dentry) && percpu_ref_is_dying(&root->cgrp.self.refcnt)) { |
|---|
| 1250 | | - struct super_block *sb = dentry->d_sb; |
|---|
| 1251 | | - dput(dentry); |
|---|
| 1252 | | - deactivate_locked_super(sb); |
|---|
| 1253 | | - msleep(10); |
|---|
| 1254 | | - dentry = ERR_PTR(restart_syscall()); |
|---|
| 1251 | + if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) { |
|---|
| 1252 | + fc_drop_locked(fc); |
|---|
| 1253 | + ret = 1; |
|---|
| 1255 | 1254 | } |
|---|
| 1256 | | - return dentry; |
|---|
| 1255 | + |
|---|
| 1256 | + if (unlikely(ret > 0)) { |
|---|
| 1257 | + msleep(10); |
|---|
| 1258 | + return restart_syscall(); |
|---|
| 1259 | + } |
|---|
| 1260 | + return ret; |
|---|
| 1257 | 1261 | } |
|---|
| 1258 | 1262 | |
|---|
| 1259 | 1263 | static int __init cgroup1_wq_init(void) |
|---|