From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/kernel/nsproxy.c | 373 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 files changed, 337 insertions(+), 36 deletions(-) diff --git a/kernel/kernel/nsproxy.c b/kernel/kernel/nsproxy.c index f6c5d33..12dd41b 100644 --- a/kernel/kernel/nsproxy.c +++ b/kernel/kernel/nsproxy.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2006 IBM Corporation * * Author: Serge Hallyn <serue@us.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation, version 2 of the - * License. * * Jun 2006 - namespaces support * OpenVZ, SWsoft Inc. @@ -22,6 +18,9 @@ #include <linux/pid_namespace.h> #include <net/net_namespace.h> #include <linux/ipc_namespace.h> +#include <linux/time_namespace.h> +#include <linux/fs_struct.h> +#include <linux/proc_fs.h> #include <linux/proc_ns.h> #include <linux/file.h> #include <linux/syscalls.h> @@ -43,6 +42,10 @@ #endif #ifdef CONFIG_CGROUPS .cgroup_ns = &init_cgroup_ns, +#endif +#ifdef CONFIG_TIME_NS + .time_ns = &init_time_ns, + .time_ns_for_children = &init_time_ns, #endif }; @@ -110,8 +113,18 @@ goto out_net; } + new_nsp->time_ns_for_children = copy_time_ns(flags, user_ns, + tsk->nsproxy->time_ns_for_children); + if (IS_ERR(new_nsp->time_ns_for_children)) { + err = PTR_ERR(new_nsp->time_ns_for_children); + goto out_time; + } + new_nsp->time_ns = get_time_ns(tsk->nsproxy->time_ns); + return new_nsp; +out_time: + put_net(new_nsp->net_ns); out_net: put_cgroup_ns(new_nsp->cgroup_ns); out_cgroup: @@ -140,15 +153,16 @@ struct nsproxy *old_ns = tsk->nsproxy; struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns); struct nsproxy *new_ns; + int ret; if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWNET | - CLONE_NEWCGROUP)))) { - get_nsproxy(old_ns); - return 0; - } - - if (!ns_capable(user_ns, CAP_SYS_ADMIN)) + CLONE_NEWCGROUP | CLONE_NEWTIME)))) { + if (likely(old_ns->time_ns_for_children == old_ns->time_ns)) { + get_nsproxy(old_ns); + return 0; + } + } else if (!ns_capable(user_ns, CAP_SYS_ADMIN)) return -EPERM; /* @@ -166,6 +180,12 @@ if (IS_ERR(new_ns)) return PTR_ERR(new_ns); + ret = timens_on_fork(new_ns, tsk); + if (ret) { + free_nsproxy(new_ns); + return ret; + } + tsk->nsproxy = new_ns; return 0; } @@ -180,6 +200,10 @@ put_ipc_ns(ns->ipc_ns); if (ns->pid_ns_for_children) put_pid_ns(ns->pid_ns_for_children); + if (ns->time_ns) + put_time_ns(ns->time_ns); + if (ns->time_ns_for_children) + put_time_ns(ns->time_ns_for_children); put_cgroup_ns(ns->cgroup_ns); put_net(ns->net_ns); kmem_cache_free(nsproxy_cachep, ns); @@ -196,7 +220,8 @@ int err = 0; if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | - CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP))) + CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP | + CLONE_NEWTIME))) return 0; user_ns = new_cred ? new_cred->user_ns : current_user_ns(); @@ -234,37 +259,313 @@ switch_task_namespaces(p, NULL); } -SYSCALL_DEFINE2(setns, int, fd, int, nstype) +static int check_setns_flags(unsigned long flags) { - struct task_struct *tsk = current; - struct nsproxy *new_nsproxy; + if (!flags || (flags & ~(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | + CLONE_NEWNET | CLONE_NEWTIME | CLONE_NEWUSER | + CLONE_NEWPID | CLONE_NEWCGROUP))) + return -EINVAL; + +#ifndef CONFIG_USER_NS + if (flags & CLONE_NEWUSER) + return -EINVAL; +#endif +#ifndef CONFIG_PID_NS + if (flags & CLONE_NEWPID) + return -EINVAL; +#endif +#ifndef CONFIG_UTS_NS + if (flags & CLONE_NEWUTS) + return -EINVAL; +#endif +#ifndef CONFIG_IPC_NS + if (flags & CLONE_NEWIPC) + return -EINVAL; +#endif +#ifndef CONFIG_CGROUPS + if (flags & CLONE_NEWCGROUP) + return -EINVAL; +#endif +#ifndef CONFIG_NET_NS + if (flags & CLONE_NEWNET) + return -EINVAL; +#endif +#ifndef CONFIG_TIME_NS + if (flags & CLONE_NEWTIME) + return -EINVAL; +#endif + + return 0; +} + +static void put_nsset(struct nsset *nsset) +{ + unsigned flags = nsset->flags; + + if (flags & CLONE_NEWUSER) + put_cred(nsset_cred(nsset)); + /* + * We only created a temporary copy if we attached to more than just + * the mount namespace. + */ + if (nsset->fs && (flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS)) + free_fs_struct(nsset->fs); + if (nsset->nsproxy) + free_nsproxy(nsset->nsproxy); +} + +static int prepare_nsset(unsigned flags, struct nsset *nsset) +{ + struct task_struct *me = current; + + nsset->nsproxy = create_new_namespaces(0, me, current_user_ns(), me->fs); + if (IS_ERR(nsset->nsproxy)) + return PTR_ERR(nsset->nsproxy); + + if (flags & CLONE_NEWUSER) + nsset->cred = prepare_creds(); + else + nsset->cred = current_cred(); + if (!nsset->cred) + goto out; + + /* Only create a temporary copy of fs_struct if we really need to. */ + if (flags == CLONE_NEWNS) { + nsset->fs = me->fs; + } else if (flags & CLONE_NEWNS) { + nsset->fs = copy_fs_struct(me->fs); + if (!nsset->fs) + goto out; + } + + nsset->flags = flags; + return 0; + +out: + put_nsset(nsset); + return -ENOMEM; +} + +static inline int validate_ns(struct nsset *nsset, struct ns_common *ns) +{ + return ns->ops->install(nsset, ns); +} + +/* + * This is the inverse operation to unshare(). + * Ordering is equivalent to the standard ordering used everywhere else + * during unshare and process creation. The switch to the new set of + * namespaces occurs at the point of no return after installation of + * all requested namespaces was successful in commit_nsset(). + */ +static int validate_nsset(struct nsset *nsset, struct pid *pid) +{ + int ret = 0; + unsigned flags = nsset->flags; + struct user_namespace *user_ns = NULL; + struct pid_namespace *pid_ns = NULL; + struct nsproxy *nsp; + struct task_struct *tsk; + + /* Take a "snapshot" of the target task's namespaces. */ + rcu_read_lock(); + tsk = pid_task(pid, PIDTYPE_PID); + if (!tsk) { + rcu_read_unlock(); + return -ESRCH; + } + + if (!ptrace_may_access(tsk, PTRACE_MODE_READ_REALCREDS)) { + rcu_read_unlock(); + return -EPERM; + } + + task_lock(tsk); + nsp = tsk->nsproxy; + if (nsp) + get_nsproxy(nsp); + task_unlock(tsk); + if (!nsp) { + rcu_read_unlock(); + return -ESRCH; + } + +#ifdef CONFIG_PID_NS + if (flags & CLONE_NEWPID) { + pid_ns = task_active_pid_ns(tsk); + if (unlikely(!pid_ns)) { + rcu_read_unlock(); + ret = -ESRCH; + goto out; + } + get_pid_ns(pid_ns); + } +#endif + +#ifdef CONFIG_USER_NS + if (flags & CLONE_NEWUSER) + user_ns = get_user_ns(__task_cred(tsk)->user_ns); +#endif + rcu_read_unlock(); + + /* + * Install requested namespaces. The caller will have + * verified earlier that the requested namespaces are + * supported on this kernel. We don't report errors here + * if a namespace is requested that isn't supported. + */ +#ifdef CONFIG_USER_NS + if (flags & CLONE_NEWUSER) { + ret = validate_ns(nsset, &user_ns->ns); + if (ret) + goto out; + } +#endif + + if (flags & CLONE_NEWNS) { + ret = validate_ns(nsset, from_mnt_ns(nsp->mnt_ns)); + if (ret) + goto out; + } + +#ifdef CONFIG_UTS_NS + if (flags & CLONE_NEWUTS) { + ret = validate_ns(nsset, &nsp->uts_ns->ns); + if (ret) + goto out; + } +#endif + +#ifdef CONFIG_IPC_NS + if (flags & CLONE_NEWIPC) { + ret = validate_ns(nsset, &nsp->ipc_ns->ns); + if (ret) + goto out; + } +#endif + +#ifdef CONFIG_PID_NS + if (flags & CLONE_NEWPID) { + ret = validate_ns(nsset, &pid_ns->ns); + if (ret) + goto out; + } +#endif + +#ifdef CONFIG_CGROUPS + if (flags & CLONE_NEWCGROUP) { + ret = validate_ns(nsset, &nsp->cgroup_ns->ns); + if (ret) + goto out; + } +#endif + +#ifdef CONFIG_NET_NS + if (flags & CLONE_NEWNET) { + ret = validate_ns(nsset, &nsp->net_ns->ns); + if (ret) + goto out; + } +#endif + +#ifdef CONFIG_TIME_NS + if (flags & CLONE_NEWTIME) { + ret = validate_ns(nsset, &nsp->time_ns->ns); + if (ret) + goto out; + } +#endif + +out: + if (pid_ns) + put_pid_ns(pid_ns); + if (nsp) + put_nsproxy(nsp); + put_user_ns(user_ns); + + return ret; +} + +/* + * This is the point of no return. There are just a few namespaces + * that do some actual work here and it's sufficiently minimal that + * a separate ns_common operation seems unnecessary for now. + * Unshare is doing the same thing. If we'll end up needing to do + * more in a given namespace or a helper here is ultimately not + * exported anymore a simple commit handler for each namespace + * should be added to ns_common. + */ +static void commit_nsset(struct nsset *nsset) +{ + unsigned flags = nsset->flags; + struct task_struct *me = current; + +#ifdef CONFIG_USER_NS + if (flags & CLONE_NEWUSER) { + /* transfer ownership */ + commit_creds(nsset_cred(nsset)); + nsset->cred = NULL; + } +#endif + + /* We only need to commit if we have used a temporary fs_struct. */ + if ((flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS)) { + set_fs_root(me->fs, &nsset->fs->root); + set_fs_pwd(me->fs, &nsset->fs->pwd); + } + +#ifdef CONFIG_IPC_NS + if (flags & CLONE_NEWIPC) + exit_sem(me); +#endif + +#ifdef CONFIG_TIME_NS + if (flags & CLONE_NEWTIME) + timens_commit(me, nsset->nsproxy->time_ns); +#endif + + /* transfer ownership */ + switch_task_namespaces(me, nsset->nsproxy); + nsset->nsproxy = NULL; +} + +SYSCALL_DEFINE2(setns, int, fd, int, flags) +{ struct file *file; - struct ns_common *ns; - int err; + struct ns_common *ns = NULL; + struct nsset nsset = {}; + int err = 0; - file = proc_ns_fget(fd); - if (IS_ERR(file)) - return PTR_ERR(file); + file = fget(fd); + if (!file) + return -EBADF; - err = -EINVAL; - ns = get_proc_ns(file_inode(file)); - if (nstype && (ns->ops->type != nstype)) - goto out; - - new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs); - if (IS_ERR(new_nsproxy)) { - err = PTR_ERR(new_nsproxy); - goto out; + if (proc_ns_file(file)) { + ns = get_proc_ns(file_inode(file)); + if (flags && (ns->ops->type != flags)) + err = -EINVAL; + flags = ns->ops->type; + } else if (!IS_ERR(pidfd_pid(file))) { + err = check_setns_flags(flags); + } else { + err = -EINVAL; } - - err = ns->ops->install(new_nsproxy, ns); - if (err) { - free_nsproxy(new_nsproxy); + if (err) goto out; - } - switch_task_namespaces(tsk, new_nsproxy); - perf_event_namespaces(tsk); + err = prepare_nsset(flags, &nsset); + if (err) + goto out; + + if (proc_ns_file(file)) + err = validate_ns(&nsset, ns); + else + err = validate_nsset(&nsset, file->private_data); + if (!err) { + commit_nsset(&nsset); + perf_event_namespaces(current); + } + put_nsset(&nsset); out: fput(file); return err; -- Gitblit v1.6.2