hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/kernel/pid_namespace.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Pid namespaces
34 *
....@@ -25,8 +26,6 @@
2526
2627 static DEFINE_MUTEX(pid_caches_mutex);
2728 static struct kmem_cache *pid_ns_cachep;
28
-/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
29
-#define MAX_PID_NS_LEVEL 32
3029 /* Write once array, filled from the beginning. */
3130 static struct kmem_cache *pid_cache[MAX_PID_NS_LEVEL];
3231
....@@ -57,12 +56,6 @@
5756 mutex_unlock(&pid_caches_mutex);
5857 /* current can fail, but someone else can succeed. */
5958 return READ_ONCE(*pkc);
60
-}
61
-
62
-static void proc_cleanup_work(struct work_struct *work)
63
-{
64
- struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work);
65
- pid_ns_release_proc(ns);
6659 }
6760
6861 static struct ucounts *inc_pid_namespaces(struct user_namespace *ns)
....@@ -116,7 +109,6 @@
116109 ns->user_ns = get_user_ns(user_ns);
117110 ns->ucounts = ucounts;
118111 ns->pid_allocated = PIDNS_ADDING;
119
- INIT_WORK(&ns->proc_work, proc_cleanup_work);
120112
121113 return ns;
122114
....@@ -217,7 +209,7 @@
217209 idr_for_each_entry_continue(&pid_ns->idr, pid, nr) {
218210 task = pid_task(pid, PIDTYPE_PID);
219211 if (task && !__fatal_signal_pending(task))
220
- send_sig_info(SIGKILL, SEND_SIG_FORCED, task);
212
+ group_send_sig_info(SIGKILL, SEND_SIG_PRIV, task, PIDTYPE_MAX);
221213 }
222214 read_unlock(&tasklist_lock);
223215 rcu_read_unlock();
....@@ -233,20 +225,27 @@
233225 } while (rc != -ECHILD);
234226
235227 /*
236
- * kernel_wait4() above can't reap the EXIT_DEAD children but we do not
237
- * really care, we could reparent them to the global init. We could
238
- * exit and reap ->child_reaper even if it is not the last thread in
239
- * this pid_ns, free_pid(pid_allocated == 0) calls proc_cleanup_work(),
240
- * pid_ns can not go away until proc_kill_sb() drops the reference.
228
+ * kernel_wait4() misses EXIT_DEAD children, and EXIT_ZOMBIE
229
+ * process whose parents processes are outside of the pid
230
+ * namespace. Such processes are created with setns()+fork().
241231 *
242
- * But this ns can also have other tasks injected by setns()+fork().
243
- * Again, ignoring the user visible semantics we do not really need
244
- * to wait until they are all reaped, but they can be reparented to
245
- * us and thus we need to ensure that pid->child_reaper stays valid
246
- * until they all go away. See free_pid()->wake_up_process().
232
+ * If those EXIT_ZOMBIE processes are not reaped by their
233
+ * parents before their parents exit, they will be reparented
234
+ * to pid_ns->child_reaper. Thus pidns->child_reaper needs to
235
+ * stay valid until they all go away.
247236 *
248
- * We rely on ignored SIGCHLD, an injected zombie must be autoreaped
249
- * if reparented.
237
+ * The code relies on the pid_ns->child_reaper ignoring
238
+ * SIGCHILD to cause those EXIT_ZOMBIE processes to be
239
+ * autoreaped if reparented.
240
+ *
241
+ * Semantically it is also desirable to wait for EXIT_ZOMBIE
242
+ * processes before allowing the child_reaper to be reaped, as
243
+ * that gives the invariant that when the init process of a
244
+ * pid namespace is reaped all of the processes in the pid
245
+ * namespace are gone.
246
+ *
247
+ * Once all of the other tasks are gone from the pid_namespace
248
+ * free_pid() will awaken this task.
250249 */
251250 for (;;) {
252251 set_current_state(TASK_INTERRUPTIBLE);
....@@ -265,13 +264,13 @@
265264
266265 #ifdef CONFIG_CHECKPOINT_RESTORE
267266 static int pid_ns_ctl_handler(struct ctl_table *table, int write,
268
- void __user *buffer, size_t *lenp, loff_t *ppos)
267
+ void *buffer, size_t *lenp, loff_t *ppos)
269268 {
270269 struct pid_namespace *pid_ns = task_active_pid_ns(current);
271270 struct ctl_table tmp = *table;
272271 int ret, next;
273272
274
- if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN))
273
+ if (write && !checkpoint_restore_ns_capable(pid_ns->user_ns))
275274 return -EPERM;
276275
277276 /*
....@@ -291,14 +290,13 @@
291290 }
292291
293292 extern int pid_max;
294
-static int zero = 0;
295293 static struct ctl_table pid_ns_ctl_table[] = {
296294 {
297295 .procname = "ns_last_pid",
298296 .maxlen = sizeof(int),
299297 .mode = 0666, /* permissions are checked in the handler */
300298 .proc_handler = pid_ns_ctl_handler,
301
- .extra1 = &zero,
299
+ .extra1 = SYSCTL_ZERO,
302300 .extra2 = &pid_max,
303301 },
304302 { }
....@@ -381,13 +379,14 @@
381379 put_pid_ns(to_pid_ns(ns));
382380 }
383381
384
-static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
382
+static int pidns_install(struct nsset *nsset, struct ns_common *ns)
385383 {
384
+ struct nsproxy *nsproxy = nsset->nsproxy;
386385 struct pid_namespace *active = task_active_pid_ns(current);
387386 struct pid_namespace *ancestor, *new = to_pid_ns(ns);
388387
389388 if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
390
- !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
389
+ !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
391390 return -EPERM;
392391
393392 /*