.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Copyright (C) 2006 IBM Corporation |
---|
3 | 4 | * |
---|
4 | 5 | * Author: Serge Hallyn <serue@us.ibm.com> |
---|
5 | | - * |
---|
6 | | - * This program is free software; you can redistribute it and/or |
---|
7 | | - * modify it under the terms of the GNU General Public License as |
---|
8 | | - * published by the Free Software Foundation, version 2 of the |
---|
9 | | - * License. |
---|
10 | 6 | * |
---|
11 | 7 | * Jun 2006 - namespaces support |
---|
12 | 8 | * OpenVZ, SWsoft Inc. |
---|
.. | .. |
---|
22 | 18 | #include <linux/pid_namespace.h> |
---|
23 | 19 | #include <net/net_namespace.h> |
---|
24 | 20 | #include <linux/ipc_namespace.h> |
---|
| 21 | +#include <linux/time_namespace.h> |
---|
| 22 | +#include <linux/fs_struct.h> |
---|
| 23 | +#include <linux/proc_fs.h> |
---|
25 | 24 | #include <linux/proc_ns.h> |
---|
26 | 25 | #include <linux/file.h> |
---|
27 | 26 | #include <linux/syscalls.h> |
---|
.. | .. |
---|
43 | 42 | #endif |
---|
44 | 43 | #ifdef CONFIG_CGROUPS |
---|
45 | 44 | .cgroup_ns = &init_cgroup_ns, |
---|
| 45 | +#endif |
---|
| 46 | +#ifdef CONFIG_TIME_NS |
---|
| 47 | + .time_ns = &init_time_ns, |
---|
| 48 | + .time_ns_for_children = &init_time_ns, |
---|
46 | 49 | #endif |
---|
47 | 50 | }; |
---|
48 | 51 | |
---|
.. | .. |
---|
110 | 113 | goto out_net; |
---|
111 | 114 | } |
---|
112 | 115 | |
---|
| 116 | + new_nsp->time_ns_for_children = copy_time_ns(flags, user_ns, |
---|
| 117 | + tsk->nsproxy->time_ns_for_children); |
---|
| 118 | + if (IS_ERR(new_nsp->time_ns_for_children)) { |
---|
| 119 | + err = PTR_ERR(new_nsp->time_ns_for_children); |
---|
| 120 | + goto out_time; |
---|
| 121 | + } |
---|
| 122 | + new_nsp->time_ns = get_time_ns(tsk->nsproxy->time_ns); |
---|
| 123 | + |
---|
113 | 124 | return new_nsp; |
---|
114 | 125 | |
---|
| 126 | +out_time: |
---|
| 127 | + put_net(new_nsp->net_ns); |
---|
115 | 128 | out_net: |
---|
116 | 129 | put_cgroup_ns(new_nsp->cgroup_ns); |
---|
117 | 130 | out_cgroup: |
---|
.. | .. |
---|
140 | 153 | struct nsproxy *old_ns = tsk->nsproxy; |
---|
141 | 154 | struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns); |
---|
142 | 155 | struct nsproxy *new_ns; |
---|
| 156 | + int ret; |
---|
143 | 157 | |
---|
144 | 158 | if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | |
---|
145 | 159 | CLONE_NEWPID | CLONE_NEWNET | |
---|
146 | | - CLONE_NEWCGROUP)))) { |
---|
147 | | - get_nsproxy(old_ns); |
---|
148 | | - return 0; |
---|
149 | | - } |
---|
150 | | - |
---|
151 | | - if (!ns_capable(user_ns, CAP_SYS_ADMIN)) |
---|
| 160 | + CLONE_NEWCGROUP | CLONE_NEWTIME)))) { |
---|
| 161 | + if (likely(old_ns->time_ns_for_children == old_ns->time_ns)) { |
---|
| 162 | + get_nsproxy(old_ns); |
---|
| 163 | + return 0; |
---|
| 164 | + } |
---|
| 165 | + } else if (!ns_capable(user_ns, CAP_SYS_ADMIN)) |
---|
152 | 166 | return -EPERM; |
---|
153 | 167 | |
---|
154 | 168 | /* |
---|
.. | .. |
---|
166 | 180 | if (IS_ERR(new_ns)) |
---|
167 | 181 | return PTR_ERR(new_ns); |
---|
168 | 182 | |
---|
| 183 | + ret = timens_on_fork(new_ns, tsk); |
---|
| 184 | + if (ret) { |
---|
| 185 | + free_nsproxy(new_ns); |
---|
| 186 | + return ret; |
---|
| 187 | + } |
---|
| 188 | + |
---|
169 | 189 | tsk->nsproxy = new_ns; |
---|
170 | 190 | return 0; |
---|
171 | 191 | } |
---|
.. | .. |
---|
180 | 200 | put_ipc_ns(ns->ipc_ns); |
---|
181 | 201 | if (ns->pid_ns_for_children) |
---|
182 | 202 | put_pid_ns(ns->pid_ns_for_children); |
---|
| 203 | + if (ns->time_ns) |
---|
| 204 | + put_time_ns(ns->time_ns); |
---|
| 205 | + if (ns->time_ns_for_children) |
---|
| 206 | + put_time_ns(ns->time_ns_for_children); |
---|
183 | 207 | put_cgroup_ns(ns->cgroup_ns); |
---|
184 | 208 | put_net(ns->net_ns); |
---|
185 | 209 | kmem_cache_free(nsproxy_cachep, ns); |
---|
.. | .. |
---|
196 | 220 | int err = 0; |
---|
197 | 221 | |
---|
198 | 222 | if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | |
---|
199 | | - CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP))) |
---|
| 223 | + CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP | |
---|
| 224 | + CLONE_NEWTIME))) |
---|
200 | 225 | return 0; |
---|
201 | 226 | |
---|
202 | 227 | user_ns = new_cred ? new_cred->user_ns : current_user_ns(); |
---|
.. | .. |
---|
234 | 259 | switch_task_namespaces(p, NULL); |
---|
235 | 260 | } |
---|
236 | 261 | |
---|
237 | | -SYSCALL_DEFINE2(setns, int, fd, int, nstype) |
---|
| 262 | +static int check_setns_flags(unsigned long flags) |
---|
238 | 263 | { |
---|
239 | | - struct task_struct *tsk = current; |
---|
240 | | - struct nsproxy *new_nsproxy; |
---|
| 264 | + if (!flags || (flags & ~(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | |
---|
| 265 | + CLONE_NEWNET | CLONE_NEWTIME | CLONE_NEWUSER | |
---|
| 266 | + CLONE_NEWPID | CLONE_NEWCGROUP))) |
---|
| 267 | + return -EINVAL; |
---|
| 268 | + |
---|
| 269 | +#ifndef CONFIG_USER_NS |
---|
| 270 | + if (flags & CLONE_NEWUSER) |
---|
| 271 | + return -EINVAL; |
---|
| 272 | +#endif |
---|
| 273 | +#ifndef CONFIG_PID_NS |
---|
| 274 | + if (flags & CLONE_NEWPID) |
---|
| 275 | + return -EINVAL; |
---|
| 276 | +#endif |
---|
| 277 | +#ifndef CONFIG_UTS_NS |
---|
| 278 | + if (flags & CLONE_NEWUTS) |
---|
| 279 | + return -EINVAL; |
---|
| 280 | +#endif |
---|
| 281 | +#ifndef CONFIG_IPC_NS |
---|
| 282 | + if (flags & CLONE_NEWIPC) |
---|
| 283 | + return -EINVAL; |
---|
| 284 | +#endif |
---|
| 285 | +#ifndef CONFIG_CGROUPS |
---|
| 286 | + if (flags & CLONE_NEWCGROUP) |
---|
| 287 | + return -EINVAL; |
---|
| 288 | +#endif |
---|
| 289 | +#ifndef CONFIG_NET_NS |
---|
| 290 | + if (flags & CLONE_NEWNET) |
---|
| 291 | + return -EINVAL; |
---|
| 292 | +#endif |
---|
| 293 | +#ifndef CONFIG_TIME_NS |
---|
| 294 | + if (flags & CLONE_NEWTIME) |
---|
| 295 | + return -EINVAL; |
---|
| 296 | +#endif |
---|
| 297 | + |
---|
| 298 | + return 0; |
---|
| 299 | +} |
---|
| 300 | + |
---|
| 301 | +static void put_nsset(struct nsset *nsset) |
---|
| 302 | +{ |
---|
| 303 | + unsigned flags = nsset->flags; |
---|
| 304 | + |
---|
| 305 | + if (flags & CLONE_NEWUSER) |
---|
| 306 | + put_cred(nsset_cred(nsset)); |
---|
| 307 | + /* |
---|
| 308 | + * We only created a temporary copy if we attached to more than just |
---|
| 309 | + * the mount namespace. |
---|
| 310 | + */ |
---|
| 311 | + if (nsset->fs && (flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS)) |
---|
| 312 | + free_fs_struct(nsset->fs); |
---|
| 313 | + if (nsset->nsproxy) |
---|
| 314 | + free_nsproxy(nsset->nsproxy); |
---|
| 315 | +} |
---|
| 316 | + |
---|
| 317 | +static int prepare_nsset(unsigned flags, struct nsset *nsset) |
---|
| 318 | +{ |
---|
| 319 | + struct task_struct *me = current; |
---|
| 320 | + |
---|
| 321 | + nsset->nsproxy = create_new_namespaces(0, me, current_user_ns(), me->fs); |
---|
| 322 | + if (IS_ERR(nsset->nsproxy)) |
---|
| 323 | + return PTR_ERR(nsset->nsproxy); |
---|
| 324 | + |
---|
| 325 | + if (flags & CLONE_NEWUSER) |
---|
| 326 | + nsset->cred = prepare_creds(); |
---|
| 327 | + else |
---|
| 328 | + nsset->cred = current_cred(); |
---|
| 329 | + if (!nsset->cred) |
---|
| 330 | + goto out; |
---|
| 331 | + |
---|
| 332 | + /* Only create a temporary copy of fs_struct if we really need to. */ |
---|
| 333 | + if (flags == CLONE_NEWNS) { |
---|
| 334 | + nsset->fs = me->fs; |
---|
| 335 | + } else if (flags & CLONE_NEWNS) { |
---|
| 336 | + nsset->fs = copy_fs_struct(me->fs); |
---|
| 337 | + if (!nsset->fs) |
---|
| 338 | + goto out; |
---|
| 339 | + } |
---|
| 340 | + |
---|
| 341 | + nsset->flags = flags; |
---|
| 342 | + return 0; |
---|
| 343 | + |
---|
| 344 | +out: |
---|
| 345 | + put_nsset(nsset); |
---|
| 346 | + return -ENOMEM; |
---|
| 347 | +} |
---|
| 348 | + |
---|
| 349 | +static inline int validate_ns(struct nsset *nsset, struct ns_common *ns) |
---|
| 350 | +{ |
---|
| 351 | + return ns->ops->install(nsset, ns); |
---|
| 352 | +} |
---|
| 353 | + |
---|
| 354 | +/* |
---|
| 355 | + * This is the inverse operation to unshare(). |
---|
| 356 | + * Ordering is equivalent to the standard ordering used everywhere else |
---|
| 357 | + * during unshare and process creation. The switch to the new set of |
---|
| 358 | + * namespaces occurs at the point of no return after installation of |
---|
| 359 | + * all requested namespaces was successful in commit_nsset(). |
---|
| 360 | + */ |
---|
| 361 | +static int validate_nsset(struct nsset *nsset, struct pid *pid) |
---|
| 362 | +{ |
---|
| 363 | + int ret = 0; |
---|
| 364 | + unsigned flags = nsset->flags; |
---|
| 365 | + struct user_namespace *user_ns = NULL; |
---|
| 366 | + struct pid_namespace *pid_ns = NULL; |
---|
| 367 | + struct nsproxy *nsp; |
---|
| 368 | + struct task_struct *tsk; |
---|
| 369 | + |
---|
| 370 | + /* Take a "snapshot" of the target task's namespaces. */ |
---|
| 371 | + rcu_read_lock(); |
---|
| 372 | + tsk = pid_task(pid, PIDTYPE_PID); |
---|
| 373 | + if (!tsk) { |
---|
| 374 | + rcu_read_unlock(); |
---|
| 375 | + return -ESRCH; |
---|
| 376 | + } |
---|
| 377 | + |
---|
| 378 | + if (!ptrace_may_access(tsk, PTRACE_MODE_READ_REALCREDS)) { |
---|
| 379 | + rcu_read_unlock(); |
---|
| 380 | + return -EPERM; |
---|
| 381 | + } |
---|
| 382 | + |
---|
| 383 | + task_lock(tsk); |
---|
| 384 | + nsp = tsk->nsproxy; |
---|
| 385 | + if (nsp) |
---|
| 386 | + get_nsproxy(nsp); |
---|
| 387 | + task_unlock(tsk); |
---|
| 388 | + if (!nsp) { |
---|
| 389 | + rcu_read_unlock(); |
---|
| 390 | + return -ESRCH; |
---|
| 391 | + } |
---|
| 392 | + |
---|
| 393 | +#ifdef CONFIG_PID_NS |
---|
| 394 | + if (flags & CLONE_NEWPID) { |
---|
| 395 | + pid_ns = task_active_pid_ns(tsk); |
---|
| 396 | + if (unlikely(!pid_ns)) { |
---|
| 397 | + rcu_read_unlock(); |
---|
| 398 | + ret = -ESRCH; |
---|
| 399 | + goto out; |
---|
| 400 | + } |
---|
| 401 | + get_pid_ns(pid_ns); |
---|
| 402 | + } |
---|
| 403 | +#endif |
---|
| 404 | + |
---|
| 405 | +#ifdef CONFIG_USER_NS |
---|
| 406 | + if (flags & CLONE_NEWUSER) |
---|
| 407 | + user_ns = get_user_ns(__task_cred(tsk)->user_ns); |
---|
| 408 | +#endif |
---|
| 409 | + rcu_read_unlock(); |
---|
| 410 | + |
---|
| 411 | + /* |
---|
| 412 | + * Install requested namespaces. The caller will have |
---|
| 413 | + * verified earlier that the requested namespaces are |
---|
| 414 | + * supported on this kernel. We don't report errors here |
---|
| 415 | + * if a namespace is requested that isn't supported. |
---|
| 416 | + */ |
---|
| 417 | +#ifdef CONFIG_USER_NS |
---|
| 418 | + if (flags & CLONE_NEWUSER) { |
---|
| 419 | + ret = validate_ns(nsset, &user_ns->ns); |
---|
| 420 | + if (ret) |
---|
| 421 | + goto out; |
---|
| 422 | + } |
---|
| 423 | +#endif |
---|
| 424 | + |
---|
| 425 | + if (flags & CLONE_NEWNS) { |
---|
| 426 | + ret = validate_ns(nsset, from_mnt_ns(nsp->mnt_ns)); |
---|
| 427 | + if (ret) |
---|
| 428 | + goto out; |
---|
| 429 | + } |
---|
| 430 | + |
---|
| 431 | +#ifdef CONFIG_UTS_NS |
---|
| 432 | + if (flags & CLONE_NEWUTS) { |
---|
| 433 | + ret = validate_ns(nsset, &nsp->uts_ns->ns); |
---|
| 434 | + if (ret) |
---|
| 435 | + goto out; |
---|
| 436 | + } |
---|
| 437 | +#endif |
---|
| 438 | + |
---|
| 439 | +#ifdef CONFIG_IPC_NS |
---|
| 440 | + if (flags & CLONE_NEWIPC) { |
---|
| 441 | + ret = validate_ns(nsset, &nsp->ipc_ns->ns); |
---|
| 442 | + if (ret) |
---|
| 443 | + goto out; |
---|
| 444 | + } |
---|
| 445 | +#endif |
---|
| 446 | + |
---|
| 447 | +#ifdef CONFIG_PID_NS |
---|
| 448 | + if (flags & CLONE_NEWPID) { |
---|
| 449 | + ret = validate_ns(nsset, &pid_ns->ns); |
---|
| 450 | + if (ret) |
---|
| 451 | + goto out; |
---|
| 452 | + } |
---|
| 453 | +#endif |
---|
| 454 | + |
---|
| 455 | +#ifdef CONFIG_CGROUPS |
---|
| 456 | + if (flags & CLONE_NEWCGROUP) { |
---|
| 457 | + ret = validate_ns(nsset, &nsp->cgroup_ns->ns); |
---|
| 458 | + if (ret) |
---|
| 459 | + goto out; |
---|
| 460 | + } |
---|
| 461 | +#endif |
---|
| 462 | + |
---|
| 463 | +#ifdef CONFIG_NET_NS |
---|
| 464 | + if (flags & CLONE_NEWNET) { |
---|
| 465 | + ret = validate_ns(nsset, &nsp->net_ns->ns); |
---|
| 466 | + if (ret) |
---|
| 467 | + goto out; |
---|
| 468 | + } |
---|
| 469 | +#endif |
---|
| 470 | + |
---|
| 471 | +#ifdef CONFIG_TIME_NS |
---|
| 472 | + if (flags & CLONE_NEWTIME) { |
---|
| 473 | + ret = validate_ns(nsset, &nsp->time_ns->ns); |
---|
| 474 | + if (ret) |
---|
| 475 | + goto out; |
---|
| 476 | + } |
---|
| 477 | +#endif |
---|
| 478 | + |
---|
| 479 | +out: |
---|
| 480 | + if (pid_ns) |
---|
| 481 | + put_pid_ns(pid_ns); |
---|
| 482 | + if (nsp) |
---|
| 483 | + put_nsproxy(nsp); |
---|
| 484 | + put_user_ns(user_ns); |
---|
| 485 | + |
---|
| 486 | + return ret; |
---|
| 487 | +} |
---|
| 488 | + |
---|
| 489 | +/* |
---|
| 490 | + * This is the point of no return. There are just a few namespaces |
---|
| 491 | + * that do some actual work here and it's sufficiently minimal that |
---|
| 492 | + * a separate ns_common operation seems unnecessary for now. |
---|
| 493 | + * Unshare is doing the same thing. If we'll end up needing to do |
---|
| 494 | + * more in a given namespace or a helper here is ultimately not |
---|
| 495 | + * exported anymore a simple commit handler for each namespace |
---|
| 496 | + * should be added to ns_common. |
---|
| 497 | + */ |
---|
| 498 | +static void commit_nsset(struct nsset *nsset) |
---|
| 499 | +{ |
---|
| 500 | + unsigned flags = nsset->flags; |
---|
| 501 | + struct task_struct *me = current; |
---|
| 502 | + |
---|
| 503 | +#ifdef CONFIG_USER_NS |
---|
| 504 | + if (flags & CLONE_NEWUSER) { |
---|
| 505 | + /* transfer ownership */ |
---|
| 506 | + commit_creds(nsset_cred(nsset)); |
---|
| 507 | + nsset->cred = NULL; |
---|
| 508 | + } |
---|
| 509 | +#endif |
---|
| 510 | + |
---|
| 511 | + /* We only need to commit if we have used a temporary fs_struct. */ |
---|
| 512 | + if ((flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS)) { |
---|
| 513 | + set_fs_root(me->fs, &nsset->fs->root); |
---|
| 514 | + set_fs_pwd(me->fs, &nsset->fs->pwd); |
---|
| 515 | + } |
---|
| 516 | + |
---|
| 517 | +#ifdef CONFIG_IPC_NS |
---|
| 518 | + if (flags & CLONE_NEWIPC) |
---|
| 519 | + exit_sem(me); |
---|
| 520 | +#endif |
---|
| 521 | + |
---|
| 522 | +#ifdef CONFIG_TIME_NS |
---|
| 523 | + if (flags & CLONE_NEWTIME) |
---|
| 524 | + timens_commit(me, nsset->nsproxy->time_ns); |
---|
| 525 | +#endif |
---|
| 526 | + |
---|
| 527 | + /* transfer ownership */ |
---|
| 528 | + switch_task_namespaces(me, nsset->nsproxy); |
---|
| 529 | + nsset->nsproxy = NULL; |
---|
| 530 | +} |
---|
| 531 | + |
---|
| 532 | +SYSCALL_DEFINE2(setns, int, fd, int, flags) |
---|
| 533 | +{ |
---|
241 | 534 | struct file *file; |
---|
242 | | - struct ns_common *ns; |
---|
243 | | - int err; |
---|
| 535 | + struct ns_common *ns = NULL; |
---|
| 536 | + struct nsset nsset = {}; |
---|
| 537 | + int err = 0; |
---|
244 | 538 | |
---|
245 | | - file = proc_ns_fget(fd); |
---|
246 | | - if (IS_ERR(file)) |
---|
247 | | - return PTR_ERR(file); |
---|
| 539 | + file = fget(fd); |
---|
| 540 | + if (!file) |
---|
| 541 | + return -EBADF; |
---|
248 | 542 | |
---|
249 | | - err = -EINVAL; |
---|
250 | | - ns = get_proc_ns(file_inode(file)); |
---|
251 | | - if (nstype && (ns->ops->type != nstype)) |
---|
252 | | - goto out; |
---|
253 | | - |
---|
254 | | - new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs); |
---|
255 | | - if (IS_ERR(new_nsproxy)) { |
---|
256 | | - err = PTR_ERR(new_nsproxy); |
---|
257 | | - goto out; |
---|
| 543 | + if (proc_ns_file(file)) { |
---|
| 544 | + ns = get_proc_ns(file_inode(file)); |
---|
| 545 | + if (flags && (ns->ops->type != flags)) |
---|
| 546 | + err = -EINVAL; |
---|
| 547 | + flags = ns->ops->type; |
---|
| 548 | + } else if (!IS_ERR(pidfd_pid(file))) { |
---|
| 549 | + err = check_setns_flags(flags); |
---|
| 550 | + } else { |
---|
| 551 | + err = -EINVAL; |
---|
258 | 552 | } |
---|
259 | | - |
---|
260 | | - err = ns->ops->install(new_nsproxy, ns); |
---|
261 | | - if (err) { |
---|
262 | | - free_nsproxy(new_nsproxy); |
---|
| 553 | + if (err) |
---|
263 | 554 | goto out; |
---|
264 | | - } |
---|
265 | | - switch_task_namespaces(tsk, new_nsproxy); |
---|
266 | 555 | |
---|
267 | | - perf_event_namespaces(tsk); |
---|
| 556 | + err = prepare_nsset(flags, &nsset); |
---|
| 557 | + if (err) |
---|
| 558 | + goto out; |
---|
| 559 | + |
---|
| 560 | + if (proc_ns_file(file)) |
---|
| 561 | + err = validate_ns(&nsset, ns); |
---|
| 562 | + else |
---|
| 563 | + err = validate_nsset(&nsset, file->private_data); |
---|
| 564 | + if (!err) { |
---|
| 565 | + commit_nsset(&nsset); |
---|
| 566 | + perf_event_namespaces(current); |
---|
| 567 | + } |
---|
| 568 | + put_nsset(&nsset); |
---|
268 | 569 | out: |
---|
269 | 570 | fput(file); |
---|
270 | 571 | return err; |
---|