~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,3 +1,4 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/*
2	3	* Pid namespaces
3	4	*
..	..	@@ -25,8 +26,6 @@
25	26
26	27	static DEFINE_MUTEX(pid_caches_mutex);
27	28	static struct kmem_cache *pid_ns_cachep;
28		-/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
29		-#define MAX_PID_NS_LEVEL 32
30	29	/* Write once array, filled from the beginning. */
31	30	static struct kmem_cache *pid_cache[MAX_PID_NS_LEVEL];
32	31
..	..	@@ -57,12 +56,6 @@
57	56	mutex_unlock(&pid_caches_mutex);
58	57	/* current can fail, but someone else can succeed. */
59	58	return READ_ONCE(*pkc);
60		-}
61		-
62		-static void proc_cleanup_work(struct work_struct *work)
63		-{
64		- struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work);
65		- pid_ns_release_proc(ns);
66	59	}
67	60
68	61	static struct ucounts inc_pid_namespaces(struct user_namespace ns)
..	..	@@ -116,7 +109,6 @@
116	109	ns->user_ns = get_user_ns(user_ns);
117	110	ns->ucounts = ucounts;
118	111	ns->pid_allocated = PIDNS_ADDING;
119		- INIT_WORK(&ns->proc_work, proc_cleanup_work);
120	112
121	113	return ns;
122	114
..	..	@@ -217,7 +209,7 @@
217	209	idr_for_each_entry_continue(&pid_ns->idr, pid, nr) {
218	210	task = pid_task(pid, PIDTYPE_PID);
219	211	if (task && !__fatal_signal_pending(task))
220		- send_sig_info(SIGKILL, SEND_SIG_FORCED, task);
	212	+ group_send_sig_info(SIGKILL, SEND_SIG_PRIV, task, PIDTYPE_MAX);
221	213	}
222	214	read_unlock(&tasklist_lock);
223	215	rcu_read_unlock();
..	..	@@ -233,26 +225,50 @@
233	225	} while (rc != -ECHILD);
234	226
235	227	/*
236		- * kernel_wait4() above can't reap the EXIT_DEAD children but we do not
237		- * really care, we could reparent them to the global init. We could
238		- * exit and reap ->child_reaper even if it is not the last thread in
239		- * this pid_ns, free_pid(pid_allocated == 0) calls proc_cleanup_work(),
240		- * pid_ns can not go away until proc_kill_sb() drops the reference.
	228	+ * kernel_wait4() misses EXIT_DEAD children, and EXIT_ZOMBIE
	229	+ * process whose parents processes are outside of the pid
	230	+ * namespace. Such processes are created with setns()+fork().
241	231	*
242		- * But this ns can also have other tasks injected by setns()+fork().
243		- * Again, ignoring the user visible semantics we do not really need
244		- * to wait until they are all reaped, but they can be reparented to
245		- * us and thus we need to ensure that pid->child_reaper stays valid
246		- * until they all go away. See free_pid()->wake_up_process().
	232	+ * If those EXIT_ZOMBIE processes are not reaped by their
	233	+ * parents before their parents exit, they will be reparented
	234	+ * to pid_ns->child_reaper. Thus pidns->child_reaper needs to
	235	+ * stay valid until they all go away.
247	236	*
248		- * We rely on ignored SIGCHLD, an injected zombie must be autoreaped
249		- * if reparented.
	237	+ * The code relies on the pid_ns->child_reaper ignoring
	238	+ * SIGCHILD to cause those EXIT_ZOMBIE processes to be
	239	+ * autoreaped if reparented.
	240	+ *
	241	+ * Semantically it is also desirable to wait for EXIT_ZOMBIE
	242	+ * processes before allowing the child_reaper to be reaped, as
	243	+ * that gives the invariant that when the init process of a
	244	+ * pid namespace is reaped all of the processes in the pid
	245	+ * namespace are gone.
	246	+ *
	247	+ * Once all of the other tasks are gone from the pid_namespace
	248	+ * free_pid() will awaken this task.
250	249	*/
251	250	for (;;) {
252	251	set_current_state(TASK_INTERRUPTIBLE);
253	252	if (pid_ns->pid_allocated == init_pids)
254	253	break;
	254	+ /*
	255	+ * Release tasks_rcu_exit_srcu to avoid following deadlock:
	256	+ *
	257	+ * 1) TASK A unshare(CLONE_NEWPID)
	258	+ * 2) TASK A fork() twice -> TASK B (child reaper for new ns)
	259	+ * and TASK C
	260	+ * 3) TASK B exits, kills TASK C, waits for TASK A to reap it
	261	+ * 4) TASK A calls synchronize_rcu_tasks()
	262	+ * -> synchronize_srcu(tasks_rcu_exit_srcu)
	263	+ * 5) DEADLOCK
	264	+ *
	265	+ * It is considered safe to release tasks_rcu_exit_srcu here
	266	+ * because we assume the current task can not be concurrently
	267	+ * reaped at this point.
	268	+ */
	269	+ exit_tasks_rcu_stop();
255	270	schedule();
	271	+ exit_tasks_rcu_start();
256	272	}
257	273	__set_current_state(TASK_RUNNING);
258	274
..	..	@@ -265,13 +281,13 @@
265	281
266	282	#ifdef CONFIG_CHECKPOINT_RESTORE
267	283	static int pid_ns_ctl_handler(struct ctl_table *table, int write,
268		- void __user buffer, size_t lenp, loff_t *ppos)
	284	+ void buffer, size_t lenp, loff_t *ppos)
269	285	{
270	286	struct pid_namespace *pid_ns = task_active_pid_ns(current);
271	287	struct ctl_table tmp = *table;
272	288	int ret, next;
273	289
274		- if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN))
	290	+ if (write && !checkpoint_restore_ns_capable(pid_ns->user_ns))
275	291	return -EPERM;
276	292
277	293	/*
..	..	@@ -291,14 +307,13 @@
291	307	}
292	308
293	309	extern int pid_max;
294		-static int zero = 0;
295	310	static struct ctl_table pid_ns_ctl_table[] = {
296	311	{
297	312	.procname = "ns_last_pid",
298	313	.maxlen = sizeof(int),
299	314	.mode = 0666, /* permissions are checked in the handler */
300	315	.proc_handler = pid_ns_ctl_handler,
301		- .extra1 = &zero,
	316	+ .extra1 = SYSCTL_ZERO,
302	317	.extra2 = &pid_max,
303	318	},
304	319	{ }
..	..	@@ -381,13 +396,14 @@
381	396	put_pid_ns(to_pid_ns(ns));
382	397	}
383	398
384		-static int pidns_install(struct nsproxy nsproxy, struct ns_common ns)
	399	+static int pidns_install(struct nsset nsset, struct ns_common ns)
385	400	{
	401	+ struct nsproxy *nsproxy = nsset->nsproxy;
386	402	struct pid_namespace *active = task_active_pid_ns(current);
387	403	struct pid_namespace ancestor, new = to_pid_ns(ns);
388	404
389	405	if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) \|\|
390		- !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
	406	+ !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
391	407	return -EPERM;
392	408
393	409	/*