~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,8 +1,8 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/*
2	3	* linux/fs/namespace.c
3	4	*
4	5	* (C) Copyright Al Viro 2000, 2001
5		- * Released under GPL v2.
6	6	*
7	7	* Based on code from fs/super.c, copyright Linus Torvalds and others.
8	8	* Heavily rewritten.
..	..	@@ -20,12 +20,16 @@
20	20	#include <linux/init.h> /* init_rootfs */
21	21	#include <linux/fs_struct.h> /* get_fs_root et.al. */
22	22	#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
	23	+#include <linux/file.h>
23	24	#include <linux/uaccess.h>
24	25	#include <linux/proc_ns.h>
25	26	#include <linux/magic.h>
26		-#include <linux/bootmem.h>
	27	+#include <linux/memblock.h>
27	28	#include <linux/task_work.h>
28	29	#include <linux/sched/task.h>
	30	+#include <uapi/linux/mount.h>
	31	+#include <linux/fs_context.h>
	32	+#include <linux/shmem_fs.h>
29	33
30	34	#include "pnode.h"
31	35	#include "internal.h"
..	..	@@ -66,6 +70,8 @@
66	70	static struct hlist_head *mountpoint_hashtable __read_mostly;
67	71	static struct kmem_cache *mnt_cache __read_mostly;
68	72	static DECLARE_RWSEM(namespace_sem);
	73	+static HLIST_HEAD(unmounted); /* protected by namespace_sem */
	74	+static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
69	75
70	76	/* /sys/fs */
71	77	struct kobject *fs_kobj;
..	..	@@ -150,10 +156,10 @@
150	156	/*
151	157	* vfsmount lock must be held for write
152	158	*/
153		-unsigned int mnt_get_count(struct mount *mnt)
	159	+int mnt_get_count(struct mount *mnt)
154	160	{
155	161	#ifdef CONFIG_SMP
156		- unsigned int count = 0;
	162	+ int count = 0;
157	163	int cpu;
158	164
159	165	for_each_possible_cpu(cpu) {
..	..	@@ -164,14 +170,6 @@
164	170	#else
165	171	return mnt->mnt_count;
166	172	#endif
167		-}
168		-
169		-static void drop_mountpoint(struct fs_pin *p)
170		-{
171		- struct mount *m = container_of(p, struct mount, mnt_umount);
172		- dput(m->mnt_ex_mountpoint);
173		- pin_remove(p);
174		- mntput(&m->mnt);
175	173	}
176	174
177	175	static struct mount alloc_vfsmnt(const char name)
..	..	@@ -200,7 +198,6 @@
200	198	mnt->mnt_count = 1;
201	199	mnt->mnt_writers = 0;
202	200	#endif
203		- mnt->mnt.data = NULL;
204	201
205	202	INIT_HLIST_NODE(&mnt->mnt_hash);
206	203	INIT_LIST_HEAD(&mnt->mnt_child);
..	..	@@ -212,7 +209,7 @@
212	209	INIT_LIST_HEAD(&mnt->mnt_slave);
213	210	INIT_HLIST_NODE(&mnt->mnt_mp_list);
214	211	INIT_LIST_HEAD(&mnt->mnt_umounting);
215		- init_fs_pin(&mnt->mnt_umount, drop_mountpoint);
	212	+ INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
216	213	}
217	214	return mnt;
218	215
..	..	@@ -246,13 +243,9 @@
246	243	* mnt_want/drop_write() will _keep_ the filesystem
247	244	* r/w.
248	245	*/
249		-int __mnt_is_readonly(struct vfsmount *mnt)
	246	+bool __mnt_is_readonly(struct vfsmount *mnt)
250	247	{
251		- if (mnt->mnt_flags & MNT_READONLY)
252		- return 1;
253		- if (sb_rdonly(mnt->mnt_sb))
254		- return 1;
255		- return 0;
	248	+ return (mnt->mnt_flags & MNT_READONLY) \|\| sb_rdonly(mnt->mnt_sb);
256	249	}
257	250	EXPORT_SYMBOL_GPL(__mnt_is_readonly);
258	251
..	..	@@ -422,7 +415,7 @@
422	415	sb_end_write(file_inode(file)->i_sb);
423	416	return ret;
424	417	}
425		-EXPORT_SYMBOL_GPL(mnt_want_write_file);
	418	+EXPORT_SYMBOL_NS_GPL(mnt_want_write_file, ANDROID_GKI_VFS_EXPORT_ONLY);
426	419
427	420	/**
428	421	* __mnt_drop_write - give up write access to a mount
..	..	@@ -464,7 +457,7 @@
464	457	__mnt_drop_write_file(file);
465	458	sb_end_write(file_inode(file)->i_sb);
466	459	}
467		-EXPORT_SYMBOL(mnt_drop_write_file);
	460	+EXPORT_SYMBOL_NS(mnt_drop_write_file, ANDROID_GKI_VFS_EXPORT_ONLY);
468	461
469	462	static int mnt_make_readonly(struct mount *mnt)
470	463	{
..	..	@@ -508,11 +501,12 @@
508	501	return ret;
509	502	}
510	503
511		-static void __mnt_unmake_readonly(struct mount *mnt)
	504	+static int __mnt_unmake_readonly(struct mount *mnt)
512	505	{
513	506	lock_mount_hash();
514	507	mnt->mnt.mnt_flags &= ~MNT_READONLY;
515	508	unlock_mount_hash();
	509	+ return 0;
516	510	}
517	511
518	512	int sb_prepare_remount_readonly(struct super_block *sb)
..	..	@@ -553,7 +547,6 @@
553	547
554	548	static void free_vfsmnt(struct mount *mnt)
555	549	{
556		- kfree(mnt->mnt.data);
557	550	kfree_const(mnt->mnt_devname);
558	551	#ifdef CONFIG_SMP
559	552	free_percpu(mnt->mnt_pcp);
..	..	@@ -655,6 +648,21 @@
655	648	return m;
656	649	}
657	650
	651	+static inline void lock_ns_list(struct mnt_namespace *ns)
	652	+{
	653	+ spin_lock(&ns->ns_lock);
	654	+}
	655	+
	656	+static inline void unlock_ns_list(struct mnt_namespace *ns)
	657	+{
	658	+ spin_unlock(&ns->ns_lock);
	659	+}
	660	+
	661	+static inline bool mnt_is_cursor(struct mount *mnt)
	662	+{
	663	+ return mnt->mnt.mnt_flags & MNT_CURSOR;
	664	+}
	665	+
658	666	/*
659	667	* __is_local_mountpoint - Test to see if dentry is a mountpoint in the
660	668	* current mount namespace.
..	..	@@ -676,17 +684,18 @@
676	684	struct mount *mnt;
677	685	bool is_covered = false;
678	686
679		- if (!d_mountpoint(dentry))
680		- goto out;
681		-
682	687	down_read(&namespace_sem);
	688	+ lock_ns_list(ns);
683	689	list_for_each_entry(mnt, &ns->list, mnt_list) {
	690	+ if (mnt_is_cursor(mnt))
	691	+ continue;
684	692	is_covered = (mnt->mnt_mountpoint == dentry);
685	693	if (is_covered)
686	694	break;
687	695	}
	696	+ unlock_ns_list(ns);
688	697	up_read(&namespace_sem);
689		-out:
	698	+
690	699	return is_covered;
691	700	}
692	701
..	..	@@ -741,7 +750,7 @@
741	750
742	751	/* Add the new mountpoint to the hash table */
743	752	read_seqlock_excl(&mount_lock);
744		- new->m_dentry = dentry;
	753	+ new->m_dentry = dget(dentry);
745	754	new->m_count = 1;
746	755	hlist_add_head(&new->m_hash, mp_hash(dentry));
747	756	INIT_HLIST_HEAD(&new->m_list);
..	..	@@ -754,7 +763,11 @@
754	763	return mp;
755	764	}
756	765
757		-static void put_mountpoint(struct mountpoint *mp)
	766	+/*
	767	+ * vfsmount lock must be held. Additionally, the caller is responsible
	768	+ * for serializing calls for given disposal list.
	769	+ */
	770	+static void __put_mountpoint(struct mountpoint mp, struct list_head list)
758	771	{
759	772	if (!--mp->m_count) {
760	773	struct dentry *dentry = mp->m_dentry;
..	..	@@ -762,9 +775,16 @@
762	775	spin_lock(&dentry->d_lock);
763	776	dentry->d_flags &= ~DCACHE_MOUNTED;
764	777	spin_unlock(&dentry->d_lock);
	778	+ dput_to_list(dentry, list);
765	779	hlist_del(&mp->m_hash);
766	780	kfree(mp);
767	781	}
	782	+}
	783	+
	784	+/* called with namespace_lock and vfsmount lock */
	785	+static void put_mountpoint(struct mountpoint *mp)
	786	+{
	787	+ __put_mountpoint(mp, &ex_mountpoints);
768	788	}
769	789
770	790	static inline int check_mnt(struct mount *mnt)
..	..	@@ -797,25 +817,17 @@
797	817	/*
798	818	* vfsmount lock must be held for write
799	819	*/
800		-static void unhash_mnt(struct mount *mnt)
	820	+static struct mountpoint unhash_mnt(struct mount mnt)
801	821	{
	822	+ struct mountpoint *mp;
802	823	mnt->mnt_parent = mnt;
803	824	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
804	825	list_del_init(&mnt->mnt_child);
805	826	hlist_del_init_rcu(&mnt->mnt_hash);
806	827	hlist_del_init(&mnt->mnt_mp_list);
807		- put_mountpoint(mnt->mnt_mp);
	828	+ mp = mnt->mnt_mp;
808	829	mnt->mnt_mp = NULL;
809		-}
810		-
811		-/*
812		- * vfsmount lock must be held for write
813		- */
814		-static void detach_mnt(struct mount mnt, struct path old_path)
815		-{
816		- old_path->dentry = mnt->mnt_mountpoint;
817		- old_path->mnt = &mnt->mnt_parent->mnt;
818		- unhash_mnt(mnt);
	830	+ return mp;
819	831	}
820	832
821	833	/*
..	..	@@ -823,9 +835,7 @@
823	835	*/
824	836	static void umount_mnt(struct mount *mnt)
825	837	{
826		- /* old mountpoint will be dropped when we can do that */
827		- mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint;
828		- unhash_mnt(mnt);
	838	+ put_mountpoint(unhash_mnt(mnt));
829	839	}
830	840
831	841	/*
..	..	@@ -837,7 +847,7 @@
837	847	{
838	848	mp->m_count++;
839	849	mnt_add_count(mnt, 1); /* essentially, that's mntget */
840		- child_mnt->mnt_mountpoint = dget(mp->m_dentry);
	850	+ child_mnt->mnt_mountpoint = mp->m_dentry;
841	851	child_mnt->mnt_parent = mnt;
842	852	child_mnt->mnt_mp = mp;
843	853	hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
..	..	@@ -864,7 +874,6 @@
864	874	void mnt_change_mountpoint(struct mount parent, struct mountpoint mp, struct mount *mnt)
865	875	{
866	876	struct mountpoint *old_mp = mnt->mnt_mp;
867		- struct dentry *old_mountpoint = mnt->mnt_mountpoint;
868	877	struct mount *old_parent = mnt->mnt_parent;
869	878
870	879	list_del_init(&mnt->mnt_child);
..	..	@@ -874,22 +883,6 @@
874	883	attach_mnt(mnt, parent, mp);
875	884
876	885	put_mountpoint(old_mp);
877		-
878		- /*
879		- * Safely avoid even the suggestion this code might sleep or
880		- * lock the mount hash by taking advantage of the knowledge that
881		- * mnt_change_mountpoint will not release the final reference
882		- * to a mountpoint.
883		- *
884		- * During mounting, the mount passed in as the parent mount will
885		- * continue to use the old mountpoint and during unmounting, the
886		- * old mountpoint will continue to exist until namespace_unlock,
887		- * which happens well after mnt_change_mountpoint.
888		- */
889		- spin_lock(&old_mountpoint->d_lock);
890		- old_mountpoint->d_lockref.count--;
891		- spin_unlock(&old_mountpoint->d_lock);
892		-
893	886	mnt_add_count(old_parent, -1);
894	887	}
895	888
..	..	@@ -944,45 +937,80 @@
944	937	return p;
945	938	}
946	939
947		-struct vfsmount *
948		-vfs_kern_mount(struct file_system_type type, int flags, const char name, void *data)
	940	+/**
	941	+ * vfs_create_mount - Create a mount for a configured superblock
	942	+ * @fc: The configuration context with the superblock attached
	943	+ *
	944	+ * Create a mount to an already configured superblock. If necessary, the
	945	+ * caller should invoke vfs_get_tree() before calling this.
	946	+ *
	947	+ * Note that this does not attach the mount to anything.
	948	+ */
	949	+struct vfsmount vfs_create_mount(struct fs_context fc)
949	950	{
950	951	struct mount *mnt;
951		- struct dentry *root;
952	952
953		- if (!type)
954		- return ERR_PTR(-ENODEV);
	953	+ if (!fc->root)
	954	+ return ERR_PTR(-EINVAL);
955	955
956		- mnt = alloc_vfsmnt(name);
	956	+ mnt = alloc_vfsmnt(fc->source ?: "none");
957	957	if (!mnt)
958	958	return ERR_PTR(-ENOMEM);
959	959
960		- if (type->alloc_mnt_data) {
961		- mnt->mnt.data = type->alloc_mnt_data();
962		- if (!mnt->mnt.data) {
963		- mnt_free_id(mnt);
964		- free_vfsmnt(mnt);
965		- return ERR_PTR(-ENOMEM);
966		- }
967		- }
968		- if (flags & SB_KERNMOUNT)
	960	+ if (fc->sb_flags & SB_KERNMOUNT)
969	961	mnt->mnt.mnt_flags = MNT_INTERNAL;
970	962
971		- root = mount_fs(type, flags, name, &mnt->mnt, data);
972		- if (IS_ERR(root)) {
973		- mnt_free_id(mnt);
974		- free_vfsmnt(mnt);
975		- return ERR_CAST(root);
976		- }
	963	+ atomic_inc(&fc->root->d_sb->s_active);
	964	+ mnt->mnt.mnt_sb = fc->root->d_sb;
	965	+ mnt->mnt.mnt_root = dget(fc->root);
	966	+ mnt->mnt_mountpoint = mnt->mnt.mnt_root;
	967	+ mnt->mnt_parent = mnt;
977	968
978		- mnt->mnt.mnt_root = root;
979		- mnt->mnt.mnt_sb = root->d_sb;
980		- mnt->mnt_mountpoint = mnt->mnt.mnt_root;
981		- mnt->mnt_parent = mnt;
982	969	lock_mount_hash();
983		- list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
	970	+ list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
984	971	unlock_mount_hash();
985	972	return &mnt->mnt;
	973	+}
	974	+EXPORT_SYMBOL(vfs_create_mount);
	975	+
	976	+struct vfsmount fc_mount(struct fs_context fc)
	977	+{
	978	+ int err = vfs_get_tree(fc);
	979	+ if (!err) {
	980	+ up_write(&fc->root->d_sb->s_umount);
	981	+ return vfs_create_mount(fc);
	982	+ }
	983	+ return ERR_PTR(err);
	984	+}
	985	+EXPORT_SYMBOL(fc_mount);
	986	+
	987	+struct vfsmount vfs_kern_mount(struct file_system_type type,
	988	+ int flags, const char *name,
	989	+ void *data)
	990	+{
	991	+ struct fs_context *fc;
	992	+ struct vfsmount *mnt;
	993	+ int ret = 0;
	994	+
	995	+ if (!type)
	996	+ return ERR_PTR(-EINVAL);
	997	+
	998	+ fc = fs_context_for_mount(type, flags);
	999	+ if (IS_ERR(fc))
	1000	+ return ERR_CAST(fc);
	1001	+
	1002	+ if (name)
	1003	+ ret = vfs_parse_fs_string(fc, "source",
	1004	+ name, strlen(name));
	1005	+ if (!ret)
	1006	+ ret = parse_monolithic_mount_data(fc, data);
	1007	+ if (!ret)
	1008	+ mnt = fc_mount(fc);
	1009	+ else
	1010	+ mnt = ERR_PTR(ret);
	1011	+
	1012	+ put_fs_context(fc);
	1013	+ return mnt;
986	1014	}
987	1015	EXPORT_SYMBOL_GPL(vfs_kern_mount);
988	1016
..	..	@@ -1012,14 +1040,6 @@
1012	1040	if (!mnt)
1013	1041	return ERR_PTR(-ENOMEM);
1014	1042
1015		- if (sb->s_op->clone_mnt_data) {
1016		- mnt->mnt.data = sb->s_op->clone_mnt_data(old->mnt.data);
1017		- if (!mnt->mnt.data) {
1018		- err = -ENOMEM;
1019		- goto out_free;
1020		- }
1021		- }
1022		-
1023	1043	if (flag & (CL_SLAVE \| CL_PRIVATE \| CL_SHARED_TO_SLAVE))
1024	1044	mnt->mnt_group_id = 0; /* not a peer of original */
1025	1045	else
..	..	@@ -1033,27 +1053,6 @@
1033	1053
1034	1054	mnt->mnt.mnt_flags = old->mnt.mnt_flags;
1035	1055	mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD\|MNT_MARKED\|MNT_INTERNAL);
1036		- /* Don't allow unprivileged users to change mount flags */
1037		- if (flag & CL_UNPRIVILEGED) {
1038		- mnt->mnt.mnt_flags \|= MNT_LOCK_ATIME;
1039		-
1040		- if (mnt->mnt.mnt_flags & MNT_READONLY)
1041		- mnt->mnt.mnt_flags \|= MNT_LOCK_READONLY;
1042		-
1043		- if (mnt->mnt.mnt_flags & MNT_NODEV)
1044		- mnt->mnt.mnt_flags \|= MNT_LOCK_NODEV;
1045		-
1046		- if (mnt->mnt.mnt_flags & MNT_NOSUID)
1047		- mnt->mnt.mnt_flags \|= MNT_LOCK_NOSUID;
1048		-
1049		- if (mnt->mnt.mnt_flags & MNT_NOEXEC)
1050		- mnt->mnt.mnt_flags \|= MNT_LOCK_NOEXEC;
1051		- }
1052		-
1053		- /* Don't allow unprivileged users to reveal what is under a mount */
1054		- if ((flag & CL_UNPRIVILEGED) &&
1055		- (!(flag & CL_EXPIRE) \|\| list_empty(&old->mnt_expire)))
1056		- mnt->mnt.mnt_flags \|= MNT_LOCKED;
1057	1056
1058	1057	atomic_inc(&sb->s_active);
1059	1058	mnt->mnt.mnt_sb = sb;
..	..	@@ -1098,19 +1097,22 @@
1098	1097
1099	1098	static void cleanup_mnt(struct mount *mnt)
1100	1099	{
	1100	+ struct hlist_node *p;
	1101	+ struct mount *m;
1101	1102	/*
1102		- * This probably indicates that somebody messed
1103		- * up a mnt_want/drop_write() pair. If this
1104		- * happens, the filesystem was probably unable
1105		- * to make r/w->r/o transitions.
1106		- */
1107		- /*
	1103	+ * The warning here probably indicates that somebody messed
	1104	+ * up a mnt_want/drop_write() pair. If this happens, the
	1105	+ * filesystem was probably unable to make r/w->r/o transitions.
1108	1106	* The locking used to deal with mnt_count decrement provides barriers,
1109	1107	* so mnt_get_writers() below is safe.
1110	1108	*/
1111	1109	WARN_ON(mnt_get_writers(mnt));
1112	1110	if (unlikely(mnt->mnt_pins.first))
1113	1111	mnt_pin_kill(mnt);
	1112	+ hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) {
	1113	+ hlist_del(&m->mnt_umount);
	1114	+ mntput(&m->mnt);
	1115	+ }
1114	1116	fsnotify_vfsmount_delete(&mnt->mnt);
1115	1117	dput(mnt->mnt.mnt_root);
1116	1118	deactivate_super(mnt->mnt.mnt_sb);
..	..	@@ -1136,6 +1138,9 @@
1136	1138
1137	1139	static void mntput_no_expire(struct mount *mnt)
1138	1140	{
	1141	+ LIST_HEAD(list);
	1142	+ int count;
	1143	+
1139	1144	rcu_read_lock();
1140	1145	if (likely(READ_ONCE(mnt->mnt_ns))) {
1141	1146	/*
..	..	@@ -1158,7 +1163,9 @@
1158	1163	*/
1159	1164	smp_mb();
1160	1165	mnt_add_count(mnt, -1);
1161		- if (mnt_get_count(mnt)) {
	1166	+ count = mnt_get_count(mnt);
	1167	+ if (count != 0) {
	1168	+ WARN_ON(count < 0);
1162	1169	rcu_read_unlock();
1163	1170	unlock_mount_hash();
1164	1171	return;
..	..	@@ -1176,16 +1183,18 @@
1176	1183	if (unlikely(!list_empty(&mnt->mnt_mounts))) {
1177	1184	struct mount p, tmp;
1178	1185	list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
1179		- umount_mnt(p);
	1186	+ __put_mountpoint(unhash_mnt(p), &list);
	1187	+ hlist_add_head(&p->mnt_umount, &mnt->mnt_stuck_children);
1180	1188	}
1181	1189	}
1182	1190	unlock_mount_hash();
	1191	+ shrink_dentry_list(&list);
1183	1192
1184	1193	if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
1185	1194	struct task_struct *task = current;
1186	1195	if (likely(!(task->flags & PF_KTHREAD))) {
1187	1196	init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
1188		- if (!task_work_add(task, &mnt->mnt_rcu, true))
	1197	+ if (!task_work_add(task, &mnt->mnt_rcu, TWA_RESUME))
1189	1198	return;
1190	1199	}
1191	1200	if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
..	..	@@ -1255,46 +1264,71 @@
1255	1264	}
1256	1265
1257	1266	#ifdef CONFIG_PROC_FS
	1267	+static struct mount mnt_list_next(struct mnt_namespace ns,
	1268	+ struct list_head *p)
	1269	+{
	1270	+ struct mount mnt, ret = NULL;
	1271	+
	1272	+ lock_ns_list(ns);
	1273	+ list_for_each_continue(p, &ns->list) {
	1274	+ mnt = list_entry(p, typeof(*mnt), mnt_list);
	1275	+ if (!mnt_is_cursor(mnt)) {
	1276	+ ret = mnt;
	1277	+ break;
	1278	+ }
	1279	+ }
	1280	+ unlock_ns_list(ns);
	1281	+
	1282	+ return ret;
	1283	+}
	1284	+
1258	1285	/* iterator; we want it to have access to namespace_sem, thus here... */
1259	1286	static void m_start(struct seq_file m, loff_t *pos)
1260	1287	{
1261	1288	struct proc_mounts *p = m->private;
	1289	+ struct list_head *prev;
1262	1290
1263	1291	down_read(&namespace_sem);
1264		- if (p->cached_event == p->ns->event) {
1265		- void *v = p->cached_mount;
1266		- if (*pos == p->cached_index)
1267		- return v;
1268		- if (*pos == p->cached_index + 1) {
1269		- v = seq_list_next(v, &p->ns->list, &p->cached_index);
1270		- return p->cached_mount = v;
1271		- }
	1292	+ if (!*pos) {
	1293	+ prev = &p->ns->list;
	1294	+ } else {
	1295	+ prev = &p->cursor.mnt_list;
	1296	+
	1297	+ /* Read after we'd reached the end? */
	1298	+ if (list_empty(prev))
	1299	+ return NULL;
1272	1300	}
1273	1301
1274		- p->cached_event = p->ns->event;
1275		- p->cached_mount = seq_list_start(&p->ns->list, *pos);
1276		- p->cached_index = *pos;
1277		- return p->cached_mount;
	1302	+ return mnt_list_next(p->ns, prev);
1278	1303	}
1279	1304
1280	1305	static void m_next(struct seq_file m, void v, loff_t pos)
1281	1306	{
1282	1307	struct proc_mounts *p = m->private;
	1308	+ struct mount *mnt = v;
1283	1309
1284		- p->cached_mount = seq_list_next(v, &p->ns->list, pos);
1285		- p->cached_index = *pos;
1286		- return p->cached_mount;
	1310	+ ++*pos;
	1311	+ return mnt_list_next(p->ns, &mnt->mnt_list);
1287	1312	}
1288	1313
1289	1314	static void m_stop(struct seq_file m, void v)
1290	1315	{
	1316	+ struct proc_mounts *p = m->private;
	1317	+ struct mount *mnt = v;
	1318	+
	1319	+ lock_ns_list(p->ns);
	1320	+ if (mnt)
	1321	+ list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list);
	1322	+ else
	1323	+ list_del_init(&p->cursor.mnt_list);
	1324	+ unlock_ns_list(p->ns);
1291	1325	up_read(&namespace_sem);
1292	1326	}
1293	1327
1294	1328	static int m_show(struct seq_file m, void v)
1295	1329	{
1296	1330	struct proc_mounts *p = m->private;
1297		- struct mount *r = list_entry(v, struct mount, mnt_list);
	1331	+ struct mount *r = v;
1298	1332	return p->show(m, &r->mnt);
1299	1333	}
1300	1334
..	..	@@ -1304,6 +1338,15 @@
1304	1338	.stop = m_stop,
1305	1339	.show = m_show,
1306	1340	};
	1341	+
	1342	+void mnt_cursor_del(struct mnt_namespace ns, struct mount cursor)
	1343	+{
	1344	+ down_read(&namespace_sem);
	1345	+ lock_ns_list(ns);
	1346	+ list_del(&cursor->mnt_list);
	1347	+ unlock_ns_list(ns);
	1348	+ up_read(&namespace_sem);
	1349	+}
1307	1350	#endif /* CONFIG_PROC_FS */
1308	1351
1309	1352	/**
..	..	@@ -1365,22 +1408,29 @@
1365	1408
1366	1409	EXPORT_SYMBOL(may_umount);
1367	1410
1368		-static HLIST_HEAD(unmounted); /* protected by namespace_sem */
1369		-
1370	1411	static void namespace_unlock(void)
1371	1412	{
1372	1413	struct hlist_head head;
	1414	+ struct hlist_node *p;
	1415	+ struct mount *m;
	1416	+ LIST_HEAD(list);
1373	1417
1374	1418	hlist_move_list(&unmounted, &head);
	1419	+ list_splice_init(&ex_mountpoints, &list);
1375	1420
1376	1421	up_write(&namespace_sem);
	1422	+
	1423	+ shrink_dentry_list(&list);
1377	1424
1378	1425	if (likely(hlist_empty(&head)))
1379	1426	return;
1380	1427
1381		- synchronize_rcu();
	1428	+ synchronize_rcu_expedited();
1382	1429
1383		- group_pin_kill(&head);
	1430	+ hlist_for_each_entry_safe(m, p, &head, mnt_umount) {
	1431	+ hlist_del(&m->mnt_umount);
	1432	+ mntput(&m->mnt);
	1433	+ }
1384	1434	}
1385	1435
1386	1436	static inline void namespace_lock(void)
..	..	@@ -1466,9 +1516,6 @@
1466	1516	p->mnt.mnt_flags \|= MNT_SYNC_UMOUNT;
1467	1517
1468	1518	disconnect = disconnect_mount(p, how);
1469		-
1470		- pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
1471		- disconnect ? &unmounted : NULL);
1472	1519	if (mnt_has_parent(p)) {
1473	1520	mnt_add_count(p->mnt_parent, -1);
1474	1521	if (!disconnect) {
..	..	@@ -1479,10 +1526,35 @@
1479	1526	}
1480	1527	}
1481	1528	change_mnt_propagation(p, MS_PRIVATE);
	1529	+ if (disconnect)
	1530	+ hlist_add_head(&p->mnt_umount, &unmounted);
1482	1531	}
1483	1532	}
1484	1533
1485	1534	static void shrink_submounts(struct mount *mnt);
	1535	+
	1536	+static int do_umount_root(struct super_block *sb)
	1537	+{
	1538	+ int ret = 0;
	1539	+
	1540	+ down_write(&sb->s_umount);
	1541	+ if (!sb_rdonly(sb)) {
	1542	+ struct fs_context *fc;
	1543	+
	1544	+ fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY,
	1545	+ SB_RDONLY);
	1546	+ if (IS_ERR(fc)) {
	1547	+ ret = PTR_ERR(fc);
	1548	+ } else {
	1549	+ ret = parse_monolithic_mount_data(fc, NULL);
	1550	+ if (!ret)
	1551	+ ret = reconfigure_super(fc);
	1552	+ put_fs_context(fc);
	1553	+ }
	1554	+ }
	1555	+ up_write(&sb->s_umount);
	1556	+ return ret;
	1557	+}
1486	1558
1487	1559	static int do_umount(struct mount *mnt, int flags)
1488	1560	{
..	..	@@ -1549,11 +1621,7 @@
1549	1621	*/
1550	1622	if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
1551	1623	return -EPERM;
1552		- down_write(&sb->s_umount);
1553		- if (!sb_rdonly(sb))
1554		- retval = do_remount_sb(sb, SB_RDONLY, NULL, 0);
1555		- up_write(&sb->s_umount);
1556		- return retval;
	1624	+ return do_umount_root(sb);
1557	1625	}
1558	1626
1559	1627	namespace_lock();
..	..	@@ -1602,15 +1670,15 @@
1602	1670	namespace_lock();
1603	1671	lock_mount_hash();
1604	1672	mp = lookup_mountpoint(dentry);
1605		- if (IS_ERR_OR_NULL(mp))
	1673	+ if (!mp)
1606	1674	goto out_unlock;
1607	1675
1608	1676	event++;
1609	1677	while (!hlist_empty(&mp->m_list)) {
1610	1678	mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
1611	1679	if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
1612		- hlist_add_head(&mnt->mnt_umount.s_list, &unmounted);
1613	1680	umount_mnt(mnt);
	1681	+ hlist_add_head(&mnt->mnt_umount, &unmounted);
1614	1682	}
1615	1683	else umount_tree(mnt, UMOUNT_CONNECTED);
1616	1684	}
..	..	@@ -1645,52 +1713,55 @@
1645	1713	}
1646	1714	#endif
1647	1715
1648		-/*
1649		- * Now umount can handle mount points as well as block devices.
1650		- * This is important for filesystems which use unnamed block devices.
1651		- *
1652		- * We now support a flag for forced unmount like the other 'big iron'
1653		- * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD
1654		- */
1655		-
1656		-int ksys_umount(char __user *name, int flags)
	1716	+static int can_umount(const struct path *path, int flags)
1657	1717	{
1658		- struct path path;
1659		- struct mount *mnt;
1660		- int retval;
1661		- int lookup_flags = 0;
1662		-
1663		- if (flags & ~(MNT_FORCE \| MNT_DETACH \| MNT_EXPIRE \| UMOUNT_NOFOLLOW))
1664		- return -EINVAL;
	1718	+ struct mount *mnt = real_mount(path->mnt);
1665	1719
1666	1720	if (!may_mount())
1667	1721	return -EPERM;
	1722	+ if (path->dentry != path->mnt->mnt_root)
	1723	+ return -EINVAL;
	1724	+ if (!check_mnt(mnt))
	1725	+ return -EINVAL;
	1726	+ if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */
	1727	+ return -EINVAL;
	1728	+ if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
	1729	+ return -EPERM;
	1730	+ return 0;
	1731	+}
	1732	+
	1733	+// caller is responsible for flags being sane
	1734	+int path_umount(struct path *path, int flags)
	1735	+{
	1736	+ struct mount *mnt = real_mount(path->mnt);
	1737	+ int ret;
	1738	+
	1739	+ ret = can_umount(path, flags);
	1740	+ if (!ret)
	1741	+ ret = do_umount(mnt, flags);
	1742	+
	1743	+ /* we mustn't call path_put() as that would clear mnt_expiry_mark */
	1744	+ dput(path->dentry);
	1745	+ mntput_no_expire(mnt);
	1746	+ return ret;
	1747	+}
	1748	+
	1749	+static int ksys_umount(char __user *name, int flags)
	1750	+{
	1751	+ int lookup_flags = LOOKUP_MOUNTPOINT;
	1752	+ struct path path;
	1753	+ int ret;
	1754	+
	1755	+ // basic validity checks done first
	1756	+ if (flags & ~(MNT_FORCE \| MNT_DETACH \| MNT_EXPIRE \| UMOUNT_NOFOLLOW))
	1757	+ return -EINVAL;
1668	1758
1669	1759	if (!(flags & UMOUNT_NOFOLLOW))
1670	1760	lookup_flags \|= LOOKUP_FOLLOW;
1671		-
1672		- retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path);
1673		- if (retval)
1674		- goto out;
1675		- mnt = real_mount(path.mnt);
1676		- retval = -EINVAL;
1677		- if (path.dentry != path.mnt->mnt_root)
1678		- goto dput_and_out;
1679		- if (!check_mnt(mnt))
1680		- goto dput_and_out;
1681		- if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */
1682		- goto dput_and_out;
1683		- retval = -EPERM;
1684		- if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
1685		- goto dput_and_out;
1686		-
1687		- retval = do_umount(mnt, flags);
1688		-dput_and_out:
1689		- /* we mustn't call path_put() as that would clear mnt_expiry_mark */
1690		- dput(path.dentry);
1691		- mntput_no_expire(mnt);
1692		-out:
1693		- return retval;
	1761	+ ret = user_path_at(AT_FDCWD, name, lookup_flags, &path);
	1762	+ if (ret)
	1763	+ return ret;
	1764	+ return path_umount(&path, flags);
1694	1765	}
1695	1766
1696	1767	SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
..	..	@@ -1717,9 +1788,14 @@
1717	1788	dentry->d_fsdata == &mntns_operations;
1718	1789	}
1719	1790
1720		-struct mnt_namespace to_mnt_ns(struct ns_common ns)
	1791	+static struct mnt_namespace to_mnt_ns(struct ns_common ns)
1721	1792	{
1722	1793	return container_of(ns, struct mnt_namespace, ns);
	1794	+}
	1795	+
	1796	+struct ns_common from_mnt_ns(struct mnt_namespace mnt)
	1797	+{
	1798	+ return &mnt->ns;
1723	1799	}
1724	1800
1725	1801	static bool mnt_ns_loop(struct dentry *dentry)
..	..	@@ -1817,6 +1893,27 @@
1817	1893	return &tree->mnt;
1818	1894	}
1819	1895
	1896	+static void free_mnt_ns(struct mnt_namespace *);
	1897	+static struct mnt_namespace alloc_mnt_ns(struct user_namespace , bool);
	1898	+
	1899	+void dissolve_on_fput(struct vfsmount *mnt)
	1900	+{
	1901	+ struct mnt_namespace *ns;
	1902	+ namespace_lock();
	1903	+ lock_mount_hash();
	1904	+ ns = real_mount(mnt)->mnt_ns;
	1905	+ if (ns) {
	1906	+ if (is_anon_ns(ns))
	1907	+ umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
	1908	+ else
	1909	+ ns = NULL;
	1910	+ }
	1911	+ unlock_mount_hash();
	1912	+ namespace_unlock();
	1913	+ if (ns)
	1914	+ free_mnt_ns(ns);
	1915	+}
	1916	+
1820	1917	void drop_collected_mounts(struct vfsmount *mnt)
1821	1918	{
1822	1919	namespace_lock();
..	..	@@ -1870,6 +1967,9 @@
1870	1967	if (IS_ERR(new_mnt))
1871	1968	return ERR_CAST(new_mnt);
1872	1969
	1970	+ /* Longterm mount to be removed by kern_unmount() /
	1971	+ new_mnt->mnt_ns = MNT_NS_INTERNAL;
	1972	+
1873	1973	return &new_mnt->mnt;
1874	1974
1875	1975	invalid:
..	..	@@ -1891,6 +1991,33 @@
1891	1991	return res;
1892	1992	}
1893	1993	return 0;
	1994	+}
	1995	+
	1996	+static void lock_mnt_tree(struct mount *mnt)
	1997	+{
	1998	+ struct mount *p;
	1999	+
	2000	+ for (p = mnt; p; p = next_mnt(p, mnt)) {
	2001	+ int flags = p->mnt.mnt_flags;
	2002	+ /* Don't allow unprivileged users to change mount flags */
	2003	+ flags \|= MNT_LOCK_ATIME;
	2004	+
	2005	+ if (flags & MNT_READONLY)
	2006	+ flags \|= MNT_LOCK_READONLY;
	2007	+
	2008	+ if (flags & MNT_NODEV)
	2009	+ flags \|= MNT_LOCK_NODEV;
	2010	+
	2011	+ if (flags & MNT_NOSUID)
	2012	+ flags \|= MNT_LOCK_NOSUID;
	2013	+
	2014	+ if (flags & MNT_NOEXEC)
	2015	+ flags \|= MNT_LOCK_NOEXEC;
	2016	+ /* Don't allow unprivileged users to reveal what is under a mount */
	2017	+ if (list_empty(&p->mnt_expire))
	2018	+ flags \|= MNT_LOCKED;
	2019	+ p->mnt.mnt_flags = flags;
	2020	+ }
1894	2021	}
1895	2022
1896	2023	static void cleanup_group_ids(struct mount mnt, struct mount end)
..	..	@@ -2008,8 +2135,9 @@
2008	2135	static int attach_recursive_mnt(struct mount *source_mnt,
2009	2136	struct mount *dest_mnt,
2010	2137	struct mountpoint *dest_mp,
2011		- struct path *parent_path)
	2138	+ bool moving)
2012	2139	{
	2140	+ struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2013	2141	HLIST_HEAD(tree_list);
2014	2142	struct mnt_namespace *ns = dest_mnt->mnt_ns;
2015	2143	struct mountpoint *smp;
..	..	@@ -2025,7 +2153,7 @@
2025	2153	return PTR_ERR(smp);
2026	2154
2027	2155	/* Is there space to add these mounts to the mount namespace? */
2028		- if (!parent_path) {
	2156	+ if (!moving) {
2029	2157	err = count_mounts(ns, source_mnt);
2030	2158	if (err)
2031	2159	goto out;
..	..	@@ -2044,11 +2172,15 @@
2044	2172	} else {
2045	2173	lock_mount_hash();
2046	2174	}
2047		- if (parent_path) {
2048		- detach_mnt(source_mnt, parent_path);
	2175	+ if (moving) {
	2176	+ unhash_mnt(source_mnt);
2049	2177	attach_mnt(source_mnt, dest_mnt, dest_mp);
2050	2178	touch_mnt_namespace(source_mnt->mnt_ns);
2051	2179	} else {
	2180	+ if (source_mnt->mnt_ns) {
	2181	+ /* move from anon - the caller will destroy */
	2182	+ list_del_init(&source_mnt->mnt_ns->list);
	2183	+ }
2052	2184	mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
2053	2185	commit_tree(source_mnt);
2054	2186	}
..	..	@@ -2060,6 +2192,10 @@
2060	2192	child->mnt_mountpoint);
2061	2193	if (q)
2062	2194	mnt_change_mountpoint(child, smp, q);
	2195	+ /* Notice when we are propagating across user namespaces */
	2196	+ if (child->mnt_parent->mnt_ns->user_ns != user_ns)
	2197	+ lock_mnt_tree(child);
	2198	+ child->mnt.mnt_flags &= ~MNT_LOCKED;
2063	2199	commit_tree(child);
2064	2200	}
2065	2201	put_mountpoint(smp);
..	..	@@ -2135,7 +2271,7 @@
2135	2271	d_is_dir(mnt->mnt.mnt_root))
2136	2272	return -ENOTDIR;
2137	2273
2138		- return attach_recursive_mnt(mnt, p, mp, NULL);
	2274	+ return attach_recursive_mnt(mnt, p, mp, false);
2139	2275	}
2140	2276
2141	2277	/*
..	..	@@ -2190,6 +2326,30 @@
2190	2326	return err;
2191	2327	}
2192	2328
	2329	+static struct mount __do_loopback(struct path old_path, int recurse)
	2330	+{
	2331	+ struct mount mnt = ERR_PTR(-EINVAL), old = real_mount(old_path->mnt);
	2332	+
	2333	+ if (IS_MNT_UNBINDABLE(old))
	2334	+ return mnt;
	2335	+
	2336	+ if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations)
	2337	+ return mnt;
	2338	+
	2339	+ if (!recurse && has_locked_children(old, old_path->dentry))
	2340	+ return mnt;
	2341	+
	2342	+ if (recurse)
	2343	+ mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
	2344	+ else
	2345	+ mnt = clone_mnt(old, old_path->dentry, 0);
	2346	+
	2347	+ if (!IS_ERR(mnt))
	2348	+ mnt->mnt.mnt_flags &= ~MNT_LOCKED;
	2349	+
	2350	+ return mnt;
	2351	+}
	2352	+
2193	2353	/*
2194	2354	* do loopback mount.
2195	2355	*/
..	..	@@ -2197,7 +2357,7 @@
2197	2357	int recurse)
2198	2358	{
2199	2359	struct path old_path;
2200		- struct mount mnt = NULL, old, *parent;
	2360	+ struct mount mnt = NULL, parent;
2201	2361	struct mountpoint *mp;
2202	2362	int err;
2203	2363	if (!old_name \|\| !*old_name)
..	..	@@ -2211,37 +2371,20 @@
2211	2371	goto out;
2212	2372
2213	2373	mp = lock_mount(path);
2214		- err = PTR_ERR(mp);
2215		- if (IS_ERR(mp))
	2374	+ if (IS_ERR(mp)) {
	2375	+ err = PTR_ERR(mp);
2216	2376	goto out;
	2377	+ }
2217	2378
2218		- old = real_mount(old_path.mnt);
2219	2379	parent = real_mount(path->mnt);
2220		-
2221		- err = -EINVAL;
2222		- if (IS_MNT_UNBINDABLE(old))
2223		- goto out2;
2224		-
2225	2380	if (!check_mnt(parent))
2226	2381	goto out2;
2227	2382
2228		- if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations)
2229		- goto out2;
2230		-
2231		- if (!recurse && has_locked_children(old, old_path.dentry))
2232		- goto out2;
2233		-
2234		- if (recurse)
2235		- mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
2236		- else
2237		- mnt = clone_mnt(old, old_path.dentry, 0);
2238		-
	2383	+ mnt = __do_loopback(&old_path, recurse);
2239	2384	if (IS_ERR(mnt)) {
2240	2385	err = PTR_ERR(mnt);
2241	2386	goto out2;
2242	2387	}
2243		-
2244		- mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2245	2388
2246	2389	err = graft_tree(mnt, parent, mp);
2247	2390	if (err) {
..	..	@@ -2256,21 +2399,206 @@
2256	2399	return err;
2257	2400	}
2258	2401
2259		-static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
	2402	+static struct file open_detached_copy(struct path path, bool recursive)
2260	2403	{
2261		- int error = 0;
2262		- int readonly_request = 0;
	2404	+ struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
	2405	+ struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true);
	2406	+ struct mount mnt, p;
	2407	+ struct file *file;
2263	2408
2264		- if (ms_flags & MS_RDONLY)
2265		- readonly_request = 1;
2266		- if (readonly_request == __mnt_is_readonly(mnt))
	2409	+ if (IS_ERR(ns))
	2410	+ return ERR_CAST(ns);
	2411	+
	2412	+ namespace_lock();
	2413	+ mnt = __do_loopback(path, recursive);
	2414	+ if (IS_ERR(mnt)) {
	2415	+ namespace_unlock();
	2416	+ free_mnt_ns(ns);
	2417	+ return ERR_CAST(mnt);
	2418	+ }
	2419	+
	2420	+ lock_mount_hash();
	2421	+ for (p = mnt; p; p = next_mnt(p, mnt)) {
	2422	+ p->mnt_ns = ns;
	2423	+ ns->mounts++;
	2424	+ }
	2425	+ ns->root = mnt;
	2426	+ list_add_tail(&ns->list, &mnt->mnt_list);
	2427	+ mntget(&mnt->mnt);
	2428	+ unlock_mount_hash();
	2429	+ namespace_unlock();
	2430	+
	2431	+ mntput(path->mnt);
	2432	+ path->mnt = &mnt->mnt;
	2433	+ file = dentry_open(path, O_PATH, current_cred());
	2434	+ if (IS_ERR(file))
	2435	+ dissolve_on_fput(path->mnt);
	2436	+ else
	2437	+ file->f_mode \|= FMODE_NEED_UNMOUNT;
	2438	+ return file;
	2439	+}
	2440	+
	2441	+SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags)
	2442	+{
	2443	+ struct file *file;
	2444	+ struct path path;
	2445	+ int lookup_flags = LOOKUP_AUTOMOUNT \| LOOKUP_FOLLOW;
	2446	+ bool detached = flags & OPEN_TREE_CLONE;
	2447	+ int error;
	2448	+ int fd;
	2449	+
	2450	+ BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
	2451	+
	2452	+ if (flags & ~(AT_EMPTY_PATH \| AT_NO_AUTOMOUNT \| AT_RECURSIVE \|
	2453	+ AT_SYMLINK_NOFOLLOW \| OPEN_TREE_CLONE \|
	2454	+ OPEN_TREE_CLOEXEC))
	2455	+ return -EINVAL;
	2456	+
	2457	+ if ((flags & (AT_RECURSIVE \| OPEN_TREE_CLONE)) == AT_RECURSIVE)
	2458	+ return -EINVAL;
	2459	+
	2460	+ if (flags & AT_NO_AUTOMOUNT)
	2461	+ lookup_flags &= ~LOOKUP_AUTOMOUNT;
	2462	+ if (flags & AT_SYMLINK_NOFOLLOW)
	2463	+ lookup_flags &= ~LOOKUP_FOLLOW;
	2464	+ if (flags & AT_EMPTY_PATH)
	2465	+ lookup_flags \|= LOOKUP_EMPTY;
	2466	+
	2467	+ if (detached && !may_mount())
	2468	+ return -EPERM;
	2469	+
	2470	+ fd = get_unused_fd_flags(flags & O_CLOEXEC);
	2471	+ if (fd < 0)
	2472	+ return fd;
	2473	+
	2474	+ error = user_path_at(dfd, filename, lookup_flags, &path);
	2475	+ if (unlikely(error)) {
	2476	+ file = ERR_PTR(error);
	2477	+ } else {
	2478	+ if (detached)
	2479	+ file = open_detached_copy(&path, flags & AT_RECURSIVE);
	2480	+ else
	2481	+ file = dentry_open(&path, O_PATH, current_cred());
	2482	+ path_put(&path);
	2483	+ }
	2484	+ if (IS_ERR(file)) {
	2485	+ put_unused_fd(fd);
	2486	+ return PTR_ERR(file);
	2487	+ }
	2488	+ fd_install(fd, file);
	2489	+ return fd;
	2490	+}
	2491	+
	2492	+/*
	2493	+ * Don't allow locked mount flags to be cleared.
	2494	+ *
	2495	+ * No locks need to be held here while testing the various MNT_LOCK
	2496	+ * flags because those flags can never be cleared once they are set.
	2497	+ */
	2498	+static bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags)
	2499	+{
	2500	+ unsigned int fl = mnt->mnt.mnt_flags;
	2501	+
	2502	+ if ((fl & MNT_LOCK_READONLY) &&
	2503	+ !(mnt_flags & MNT_READONLY))
	2504	+ return false;
	2505	+
	2506	+ if ((fl & MNT_LOCK_NODEV) &&
	2507	+ !(mnt_flags & MNT_NODEV))
	2508	+ return false;
	2509	+
	2510	+ if ((fl & MNT_LOCK_NOSUID) &&
	2511	+ !(mnt_flags & MNT_NOSUID))
	2512	+ return false;
	2513	+
	2514	+ if ((fl & MNT_LOCK_NOEXEC) &&
	2515	+ !(mnt_flags & MNT_NOEXEC))
	2516	+ return false;
	2517	+
	2518	+ if ((fl & MNT_LOCK_ATIME) &&
	2519	+ ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK)))
	2520	+ return false;
	2521	+
	2522	+ return true;
	2523	+}
	2524	+
	2525	+static int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags)
	2526	+{
	2527	+ bool readonly_request = (mnt_flags & MNT_READONLY);
	2528	+
	2529	+ if (readonly_request == __mnt_is_readonly(&mnt->mnt))
2267	2530	return 0;
2268	2531
2269	2532	if (readonly_request)
2270		- error = mnt_make_readonly(real_mount(mnt));
2271		- else
2272		- __mnt_unmake_readonly(real_mount(mnt));
2273		- return error;
	2533	+ return mnt_make_readonly(mnt);
	2534	+
	2535	+ return __mnt_unmake_readonly(mnt);
	2536	+}
	2537	+
	2538	+/*
	2539	+ * Update the user-settable attributes on a mount. The caller must hold
	2540	+ * sb->s_umount for writing.
	2541	+ */
	2542	+static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags)
	2543	+{
	2544	+ lock_mount_hash();
	2545	+ mnt_flags \|= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
	2546	+ mnt->mnt.mnt_flags = mnt_flags;
	2547	+ touch_mnt_namespace(mnt->mnt_ns);
	2548	+ unlock_mount_hash();
	2549	+}
	2550	+
	2551	+static void mnt_warn_timestamp_expiry(struct path mountpoint, struct vfsmount mnt)
	2552	+{
	2553	+ struct super_block *sb = mnt->mnt_sb;
	2554	+
	2555	+ if (!__mnt_is_readonly(mnt) &&
	2556	+ (ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) {
	2557	+ char buf = (char )__get_free_page(GFP_KERNEL);
	2558	+ char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM);
	2559	+ struct tm tm;
	2560	+
	2561	+ time64_to_tm(sb->s_time_max, 0, &tm);
	2562	+
	2563	+ pr_warn("%s filesystem being %s at %s supports timestamps until %04ld (0x%llx)\n",
	2564	+ sb->s_type->name,
	2565	+ is_mounted(mnt) ? "remounted" : "mounted",
	2566	+ mntpath,
	2567	+ tm.tm_year+1900, (unsigned long long)sb->s_time_max);
	2568	+
	2569	+ free_page((unsigned long)buf);
	2570	+ }
	2571	+}
	2572	+
	2573	+/*
	2574	+ * Handle reconfiguration of the mountpoint only without alteration of the
	2575	+ * superblock it refers to. This is triggered by specifying MS_REMOUNT\|MS_BIND
	2576	+ * to mount(2).
	2577	+ */
	2578	+static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
	2579	+{
	2580	+ struct super_block *sb = path->mnt->mnt_sb;
	2581	+ struct mount *mnt = real_mount(path->mnt);
	2582	+ int ret;
	2583	+
	2584	+ if (!check_mnt(mnt))
	2585	+ return -EINVAL;
	2586	+
	2587	+ if (path->dentry != mnt->mnt.mnt_root)
	2588	+ return -EINVAL;
	2589	+
	2590	+ if (!can_change_locked_flags(mnt, mnt_flags))
	2591	+ return -EPERM;
	2592	+
	2593	+ down_write(&sb->s_umount);
	2594	+ ret = change_mount_ro_state(mnt, mnt_flags);
	2595	+ if (ret == 0)
	2596	+ set_mount_attributes(mnt, mnt_flags);
	2597	+ up_write(&sb->s_umount);
	2598	+
	2599	+ mnt_warn_timestamp_expiry(path, &mnt->mnt);
	2600	+
	2601	+ return ret;
2274	2602	}
2275	2603
2276	2604	/*
..	..	@@ -2284,6 +2612,7 @@
2284	2612	int err;
2285	2613	struct super_block *sb = path->mnt->mnt_sb;
2286	2614	struct mount *mnt = real_mount(path->mnt);
	2615	+ struct fs_context *fc;
2287	2616
2288	2617	if (!check_mnt(mnt))
2289	2618	return -EINVAL;
..	..	@@ -2291,58 +2620,29 @@
2291	2620	if (path->dentry != path->mnt->mnt_root)
2292	2621	return -EINVAL;
2293	2622
2294		- /* Don't allow changing of locked mnt flags.
2295		- *
2296		- * No locks need to be held here while testing the various
2297		- * MNT_LOCK flags because those flags can never be cleared
2298		- * once they are set.
2299		- */
2300		- if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
2301		- !(mnt_flags & MNT_READONLY)) {
	2623	+ if (!can_change_locked_flags(mnt, mnt_flags))
2302	2624	return -EPERM;
2303		- }
2304		- if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
2305		- !(mnt_flags & MNT_NODEV)) {
2306		- return -EPERM;
2307		- }
2308		- if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
2309		- !(mnt_flags & MNT_NOSUID)) {
2310		- return -EPERM;
2311		- }
2312		- if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
2313		- !(mnt_flags & MNT_NOEXEC)) {
2314		- return -EPERM;
2315		- }
2316		- if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
2317		- ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
2318		- return -EPERM;
2319		- }
2320	2625
2321		- err = security_sb_remount(sb, data);
2322		- if (err)
2323		- return err;
	2626	+ fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK);
	2627	+ if (IS_ERR(fc))
	2628	+ return PTR_ERR(fc);
2324	2629
2325		- down_write(&sb->s_umount);
2326		- if (ms_flags & MS_BIND)
2327		- err = change_mount_flags(path->mnt, ms_flags);
2328		- else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
2329		- err = -EPERM;
2330		- else {
2331		- err = do_remount_sb2(path->mnt, sb, sb_flags, data, 0);
2332		- namespace_lock();
2333		- lock_mount_hash();
2334		- propagate_remount(mnt);
2335		- unlock_mount_hash();
2336		- namespace_unlock();
2337		- }
	2630	+ fc->oldapi = true;
	2631	+ err = parse_monolithic_mount_data(fc, data);
2338	2632	if (!err) {
2339		- lock_mount_hash();
2340		- mnt_flags \|= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
2341		- mnt->mnt.mnt_flags = mnt_flags;
2342		- touch_mnt_namespace(mnt->mnt_ns);
2343		- unlock_mount_hash();
	2633	+ down_write(&sb->s_umount);
	2634	+ err = -EPERM;
	2635	+ if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
	2636	+ err = reconfigure_super(fc);
	2637	+ if (!err)
	2638	+ set_mount_attributes(mnt, mnt_flags);
	2639	+ }
	2640	+ up_write(&sb->s_umount);
2344	2641	}
2345		- up_write(&sb->s_umount);
	2642	+
	2643	+ mnt_warn_timestamp_expiry(path, &mnt->mnt);
	2644	+
	2645	+ put_fs_context(fc);
2346	2646	return err;
2347	2647	}
2348	2648
..	..	@@ -2356,144 +2656,200 @@
2356	2656	return 0;
2357	2657	}
2358	2658
2359		-static int do_move_mount(struct path path, const char old_name)
	2659	+/*
	2660	+ * Check that there aren't references to earlier/same mount namespaces in the
	2661	+ * specified subtree. Such references can act as pins for mount namespaces
	2662	+ * that aren't checked by the mount-cycle checking code, thereby allowing
	2663	+ * cycles to be made.
	2664	+ */
	2665	+static bool check_for_nsfs_mounts(struct mount *subtree)
2360	2666	{
2361		- struct path old_path, parent_path;
	2667	+ struct mount *p;
	2668	+ bool ret = false;
	2669	+
	2670	+ lock_mount_hash();
	2671	+ for (p = subtree; p; p = next_mnt(p, subtree))
	2672	+ if (mnt_ns_loop(p->mnt.mnt_root))
	2673	+ goto out;
	2674	+
	2675	+ ret = true;
	2676	+out:
	2677	+ unlock_mount_hash();
	2678	+ return ret;
	2679	+}
	2680	+
	2681	+static int do_move_mount(struct path old_path, struct path new_path)
	2682	+{
	2683	+ struct mnt_namespace *ns;
2362	2684	struct mount *p;
2363	2685	struct mount *old;
2364		- struct mountpoint *mp;
	2686	+ struct mount *parent;
	2687	+ struct mountpoint mp, old_mp;
2365	2688	int err;
2366		- if (!old_name \|\| !*old_name)
2367		- return -EINVAL;
2368		- err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
2369		- if (err)
2370		- return err;
	2689	+ bool attached;
2371	2690
2372		- mp = lock_mount(path);
2373		- err = PTR_ERR(mp);
	2691	+ mp = lock_mount(new_path);
2374	2692	if (IS_ERR(mp))
	2693	+ return PTR_ERR(mp);
	2694	+
	2695	+ old = real_mount(old_path->mnt);
	2696	+ p = real_mount(new_path->mnt);
	2697	+ parent = old->mnt_parent;
	2698	+ attached = mnt_has_parent(old);
	2699	+ old_mp = old->mnt_mp;
	2700	+ ns = old->mnt_ns;
	2701	+
	2702	+ err = -EINVAL;
	2703	+ /* The mountpoint must be in our namespace. */
	2704	+ if (!check_mnt(p))
2375	2705	goto out;
2376	2706
2377		- old = real_mount(old_path.mnt);
2378		- p = real_mount(path->mnt);
	2707	+ /* The thing moved must be mounted... */
	2708	+ if (!is_mounted(&old->mnt))
	2709	+ goto out;
2379	2710
2380		- err = -EINVAL;
2381		- if (!check_mnt(p) \|\| !check_mnt(old))
2382		- goto out1;
	2711	+ /* ... and either ours or the root of anon namespace */
	2712	+ if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
	2713	+ goto out;
2383	2714
2384	2715	if (old->mnt.mnt_flags & MNT_LOCKED)
2385		- goto out1;
	2716	+ goto out;
2386	2717
2387		- err = -EINVAL;
2388		- if (old_path.dentry != old_path.mnt->mnt_root)
2389		- goto out1;
	2718	+ if (old_path->dentry != old_path->mnt->mnt_root)
	2719	+ goto out;
2390	2720
2391		- if (!mnt_has_parent(old))
2392		- goto out1;
2393		-
2394		- if (d_is_dir(path->dentry) !=
2395		- d_is_dir(old_path.dentry))
2396		- goto out1;
	2721	+ if (d_is_dir(new_path->dentry) !=
	2722	+ d_is_dir(old_path->dentry))
	2723	+ goto out;
2397	2724	/*
2398	2725	* Don't move a mount residing in a shared parent.
2399	2726	*/
2400		- if (IS_MNT_SHARED(old->mnt_parent))
2401		- goto out1;
	2727	+ if (attached && IS_MNT_SHARED(parent))
	2728	+ goto out;
2402	2729	/*
2403	2730	* Don't move a mount tree containing unbindable mounts to a destination
2404	2731	* mount which is shared.
2405	2732	*/
2406	2733	if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
2407		- goto out1;
	2734	+ goto out;
2408	2735	err = -ELOOP;
	2736	+ if (!check_for_nsfs_mounts(old))
	2737	+ goto out;
2409	2738	for (; mnt_has_parent(p); p = p->mnt_parent)
2410	2739	if (p == old)
2411		- goto out1;
	2740	+ goto out;
2412	2741
2413		- err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
	2742	+ err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
	2743	+ attached);
2414	2744	if (err)
2415		- goto out1;
	2745	+ goto out;
2416	2746
2417	2747	/* if the mount is moved, it should no longer be expire
2418	2748	* automatically */
2419	2749	list_del_init(&old->mnt_expire);
2420		-out1:
2421		- unlock_mount(mp);
	2750	+ if (attached)
	2751	+ put_mountpoint(old_mp);
2422	2752	out:
2423		- if (!err)
2424		- path_put(&parent_path);
2425		- path_put(&old_path);
	2753	+ unlock_mount(mp);
	2754	+ if (!err) {
	2755	+ if (attached)
	2756	+ mntput_no_expire(parent);
	2757	+ else
	2758	+ free_mnt_ns(ns);
	2759	+ }
2426	2760	return err;
2427	2761	}
2428	2762
2429		-static struct vfsmount fs_set_subtype(struct vfsmount mnt, const char *fstype)
	2763	+static int do_move_mount_old(struct path path, const char old_name)
2430	2764	{
	2765	+ struct path old_path;
2431	2766	int err;
2432		- const char *subtype = strchr(fstype, '.');
2433		- if (subtype) {
2434		- subtype++;
2435		- err = -EINVAL;
2436		- if (!subtype[0])
2437		- goto err;
2438		- } else
2439		- subtype = "";
2440	2767
2441		- mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
2442		- err = -ENOMEM;
2443		- if (!mnt->mnt_sb->s_subtype)
2444		- goto err;
2445		- return mnt;
	2768	+ if (!old_name \|\| !*old_name)
	2769	+ return -EINVAL;
2446	2770
2447		- err:
2448		- mntput(mnt);
2449		- return ERR_PTR(err);
	2771	+ err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
	2772	+ if (err)
	2773	+ return err;
	2774	+
	2775	+ err = do_move_mount(&old_path, path);
	2776	+ path_put(&old_path);
	2777	+ return err;
2450	2778	}
2451	2779
2452	2780	/*
2453	2781	* add a mount into a namespace's mount tree
2454	2782	*/
2455		-static int do_add_mount(struct mount newmnt, struct path path, int mnt_flags)
	2783	+static int do_add_mount(struct mount newmnt, struct mountpoint mp,
	2784	+ struct path *path, int mnt_flags)
2456	2785	{
2457		- struct mountpoint *mp;
2458		- struct mount *parent;
2459		- int err;
	2786	+ struct mount *parent = real_mount(path->mnt);
2460	2787
2461	2788	mnt_flags &= ~MNT_INTERNAL_FLAGS;
2462	2789
2463		- mp = lock_mount(path);
2464		- if (IS_ERR(mp))
2465		- return PTR_ERR(mp);
2466		-
2467		- parent = real_mount(path->mnt);
2468		- err = -EINVAL;
2469	2790	if (unlikely(!check_mnt(parent))) {
2470	2791	/* that's acceptable only for automounts done in private ns */
2471	2792	if (!(mnt_flags & MNT_SHRINKABLE))
2472		- goto unlock;
	2793	+ return -EINVAL;
2473	2794	/* ... and for those we'd better have mountpoint still alive */
2474	2795	if (!parent->mnt_ns)
2475		- goto unlock;
	2796	+ return -EINVAL;
2476	2797	}
2477	2798
2478	2799	/* Refuse the same filesystem on the same mount point */
2479		- err = -EBUSY;
2480	2800	if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
2481	2801	path->mnt->mnt_root == path->dentry)
2482		- goto unlock;
	2802	+ return -EBUSY;
2483	2803
2484		- err = -EINVAL;
2485	2804	if (d_is_symlink(newmnt->mnt.mnt_root))
2486		- goto unlock;
	2805	+ return -EINVAL;
2487	2806
2488	2807	newmnt->mnt.mnt_flags = mnt_flags;
2489		- err = graft_tree(newmnt, parent, mp);
2490		-
2491		-unlock:
2492		- unlock_mount(mp);
2493		- return err;
	2808	+ return graft_tree(newmnt, parent, mp);
2494	2809	}
2495	2810
2496		-static bool mount_too_revealing(struct vfsmount mnt, int new_mnt_flags);
	2811	+static bool mount_too_revealing(const struct super_block sb, int new_mnt_flags);
	2812	+
	2813	+/*
	2814	+ * Create a new mount using a superblock configuration and request it
	2815	+ * be added to the namespace tree.
	2816	+ */
	2817	+static int do_new_mount_fc(struct fs_context fc, struct path mountpoint,
	2818	+ unsigned int mnt_flags)
	2819	+{
	2820	+ struct vfsmount *mnt;
	2821	+ struct mountpoint *mp;
	2822	+ struct super_block *sb = fc->root->d_sb;
	2823	+ int error;
	2824	+
	2825	+ error = security_sb_kern_mount(sb);
	2826	+ if (!error && mount_too_revealing(sb, &mnt_flags))
	2827	+ error = -EPERM;
	2828	+
	2829	+ if (unlikely(error)) {
	2830	+ fc_drop_locked(fc);
	2831	+ return error;
	2832	+ }
	2833	+
	2834	+ up_write(&sb->s_umount);
	2835	+
	2836	+ mnt = vfs_create_mount(fc);
	2837	+ if (IS_ERR(mnt))
	2838	+ return PTR_ERR(mnt);
	2839	+
	2840	+ mnt_warn_timestamp_expiry(mountpoint, mnt);
	2841	+
	2842	+ mp = lock_mount(mountpoint);
	2843	+ if (IS_ERR(mp)) {
	2844	+ mntput(mnt);
	2845	+ return PTR_ERR(mp);
	2846	+ }
	2847	+ error = do_add_mount(real_mount(mnt), mp, mountpoint, mnt_flags);
	2848	+ unlock_mount(mp);
	2849	+ if (error < 0)
	2850	+ mntput(mnt);
	2851	+ return error;
	2852	+}
2497	2853
2498	2854	/*
2499	2855	* create a new mount for userspace and request it to be added into the
..	..	@@ -2503,8 +2859,9 @@
2503	2859	int mnt_flags, const char name, void data)
2504	2860	{
2505	2861	struct file_system_type *type;
2506		- struct vfsmount *mnt;
2507		- int err;
	2862	+ struct fs_context *fc;
	2863	+ const char *subtype = NULL;
	2864	+ int err = 0;
2508	2865
2509	2866	if (!fstype)
2510	2867	return -EINVAL;
..	..	@@ -2513,45 +2870,99 @@
2513	2870	if (!type)
2514	2871	return -ENODEV;
2515	2872
2516		- mnt = vfs_kern_mount(type, sb_flags, name, data);
2517		- if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
2518		- !mnt->mnt_sb->s_subtype)
2519		- mnt = fs_set_subtype(mnt, fstype);
2520		-
2521		- put_filesystem(type);
2522		- if (IS_ERR(mnt))
2523		- return PTR_ERR(mnt);
2524		-
2525		- if (mount_too_revealing(mnt, &mnt_flags)) {
2526		- mntput(mnt);
2527		- return -EPERM;
	2873	+ if (type->fs_flags & FS_HAS_SUBTYPE) {
	2874	+ subtype = strchr(fstype, '.');
	2875	+ if (subtype) {
	2876	+ subtype++;
	2877	+ if (!*subtype) {
	2878	+ put_filesystem(type);
	2879	+ return -EINVAL;
	2880	+ }
	2881	+ }
2528	2882	}
2529	2883
2530		- err = do_add_mount(real_mount(mnt), path, mnt_flags);
2531		- if (err)
2532		- mntput(mnt);
	2884	+ fc = fs_context_for_mount(type, sb_flags);
	2885	+ put_filesystem(type);
	2886	+ if (IS_ERR(fc))
	2887	+ return PTR_ERR(fc);
	2888	+
	2889	+ if (subtype)
	2890	+ err = vfs_parse_fs_string(fc, "subtype",
	2891	+ subtype, strlen(subtype));
	2892	+ if (!err && name)
	2893	+ err = vfs_parse_fs_string(fc, "source", name, strlen(name));
	2894	+ if (!err)
	2895	+ err = parse_monolithic_mount_data(fc, data);
	2896	+ if (!err && !mount_capable(fc))
	2897	+ err = -EPERM;
	2898	+ if (!err)
	2899	+ err = vfs_get_tree(fc);
	2900	+ if (!err)
	2901	+ err = do_new_mount_fc(fc, path, mnt_flags);
	2902	+
	2903	+ put_fs_context(fc);
2533	2904	return err;
2534	2905	}
2535	2906
2536	2907	int finish_automount(struct vfsmount m, struct path path)
2537	2908	{
2538		- struct mount *mnt = real_mount(m);
	2909	+ struct dentry *dentry = path->dentry;
	2910	+ struct mountpoint *mp;
	2911	+ struct mount *mnt;
2539	2912	int err;
	2913	+
	2914	+ if (!m)
	2915	+ return 0;
	2916	+ if (IS_ERR(m))
	2917	+ return PTR_ERR(m);
	2918	+
	2919	+ mnt = real_mount(m);
2540	2920	/* The new mount record should have at least 2 refs to prevent it being
2541	2921	* expired before we get a chance to add it
2542	2922	*/
2543	2923	BUG_ON(mnt_get_count(mnt) < 2);
2544	2924
2545	2925	if (m->mnt_sb == path->mnt->mnt_sb &&
2546		- m->mnt_root == path->dentry) {
	2926	+ m->mnt_root == dentry) {
2547	2927	err = -ELOOP;
2548		- goto fail;
	2928	+ goto discard;
2549	2929	}
2550	2930
2551		- err = do_add_mount(mnt, path, path->mnt->mnt_flags \| MNT_SHRINKABLE);
2552		- if (!err)
2553		- return 0;
2554		-fail:
	2931	+ /*
	2932	+ * we don't want to use lock_mount() - in this case finding something
	2933	+ * that overmounts our mountpoint to be means "quitely drop what we've
	2934	+ * got", not "try to mount it on top".
	2935	+ */
	2936	+ inode_lock(dentry->d_inode);
	2937	+ namespace_lock();
	2938	+ if (unlikely(cant_mount(dentry))) {
	2939	+ err = -ENOENT;
	2940	+ goto discard_locked;
	2941	+ }
	2942	+ rcu_read_lock();
	2943	+ if (unlikely(__lookup_mnt(path->mnt, dentry))) {
	2944	+ rcu_read_unlock();
	2945	+ err = 0;
	2946	+ goto discard_locked;
	2947	+ }
	2948	+ rcu_read_unlock();
	2949	+ mp = get_mountpoint(dentry);
	2950	+ if (IS_ERR(mp)) {
	2951	+ err = PTR_ERR(mp);
	2952	+ goto discard_locked;
	2953	+ }
	2954	+
	2955	+ err = do_add_mount(mnt, mp, path, path->mnt->mnt_flags \| MNT_SHRINKABLE);
	2956	+ unlock_mount(mp);
	2957	+ if (unlikely(err))
	2958	+ goto discard;
	2959	+ mntput(m);
	2960	+ return 0;
	2961	+
	2962	+discard_locked:
	2963	+ namespace_unlock();
	2964	+ inode_unlock(dentry->d_inode);
	2965	+discard:
2555	2966	/* remove m from any expiration list it may be on */
2556	2967	if (!list_empty(&mnt->mnt_expire)) {
2557	2968	namespace_lock();
..	..	@@ -2685,39 +3096,10 @@
2685	3096	}
2686	3097	}
2687	3098
2688		-/*
2689		- * Some copy_from_user() implementations do not return the exact number of
2690		- * bytes remaining to copy on a fault. But copy_mount_options() requires that.
2691		- * Note that this function differs from copy_from_user() in that it will oops
2692		- * on bad values of `to', rather than returning a short copy.
2693		- */
2694		-static long exact_copy_from_user(void to, const void __user from,
2695		- unsigned long n)
	3099	+static void copy_mount_options(const void __user data)
2696	3100	{
2697		- char *t = to;
2698		- const char __user *f = from;
2699		- char c;
2700		-
2701		- if (!access_ok(VERIFY_READ, from, n))
2702		- return n;
2703		-
2704		- while (n) {
2705		- if (__get_user(c, f)) {
2706		- memset(t, 0, n);
2707		- break;
2708		- }
2709		- *t++ = c;
2710		- f++;
2711		- n--;
2712		- }
2713		- return n;
2714		-}
2715		-
2716		-void copy_mount_options(const void __user data)
2717		-{
2718		- int i;
2719		- unsigned long size;
2720	3101	char *copy;
	3102	+ unsigned left, offset;
2721	3103
2722	3104	if (!data)
2723	3105	return NULL;
..	..	@@ -2726,28 +3108,33 @@
2726	3108	if (!copy)
2727	3109	return ERR_PTR(-ENOMEM);
2728	3110
2729		- /* We only care that some data at the address the user
2730		- * gave us is valid. Just in case, we'll zero
2731		- * the remainder of the page.
2732		- */
2733		- /* copy_from_user cannot cross TASK_SIZE ! */
2734		- size = TASK_SIZE - (unsigned long)untagged_addr(data);
2735		- if (size > PAGE_SIZE)
2736		- size = PAGE_SIZE;
	3111	+ left = copy_from_user(copy, data, PAGE_SIZE);
2737	3112
2738		- i = size - exact_copy_from_user(copy, data, size);
2739		- if (!i) {
	3113	+ /*
	3114	+ * Not all architectures have an exact copy_from_user(). Resort to
	3115	+ * byte at a time.
	3116	+ */
	3117	+ offset = PAGE_SIZE - left;
	3118	+ while (left) {
	3119	+ char c;
	3120	+ if (get_user(c, (const char __user *)data + offset))
	3121	+ break;
	3122	+ copy[offset] = c;
	3123	+ left--;
	3124	+ offset++;
	3125	+ }
	3126	+
	3127	+ if (left == PAGE_SIZE) {
2740	3128	kfree(copy);
2741	3129	return ERR_PTR(-EFAULT);
2742	3130	}
2743		- if (i != PAGE_SIZE)
2744		- memset(copy + i, 0, PAGE_SIZE - i);
	3131	+
2745	3132	return copy;
2746	3133	}
2747	3134
2748		-char copy_mount_string(const void __user data)
	3135	+static char copy_mount_string(const void __user data)
2749	3136	{
2750		- return data ? strndup_user(data, PAGE_SIZE) : NULL;
	3137	+ return data ? strndup_user(data, PATH_MAX) : NULL;
2751	3138	}
2752	3139
2753	3140	/*
..	..	@@ -2764,12 +3151,11 @@
2764	3151	* Therefore, if this magic number is present, it carries no information
2765	3152	* and must be discarded.
2766	3153	*/
2767		-long do_mount(const char dev_name, const char __user dir_name,
	3154	+int path_mount(const char dev_name, struct path path,
2768	3155	const char type_page, unsigned long flags, void data_page)
2769	3156	{
2770		- struct path path;
2771	3157	unsigned int mnt_flags = 0, sb_flags;
2772		- int retval = 0;
	3158	+ int ret;
2773	3159
2774	3160	/* Discard magic */
2775	3161	if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
..	..	@@ -2782,19 +3168,13 @@
2782	3168	if (flags & MS_NOUSER)
2783	3169	return -EINVAL;
2784	3170
2785		- /* ... and get the mountpoint */
2786		- retval = user_path(dir_name, &path);
2787		- if (retval)
2788		- return retval;
2789		-
2790		- retval = security_sb_mount(dev_name, &path,
2791		- type_page, flags, data_page);
2792		- if (!retval && !may_mount())
2793		- retval = -EPERM;
2794		- if (!retval && (flags & SB_MANDLOCK) && !may_mandlock())
2795		- retval = -EPERM;
2796		- if (retval)
2797		- goto dput_out;
	3171	+ ret = security_sb_mount(dev_name, path, type_page, flags, data_page);
	3172	+ if (ret)
	3173	+ return ret;
	3174	+ if (!may_mount())
	3175	+ return -EPERM;
	3176	+ if ((flags & SB_MANDLOCK) && !may_mandlock())
	3177	+ return -EPERM;
2798	3178
2799	3179	/* Default to relatime unless overriden */
2800	3180	if (!(flags & MS_NOATIME))
..	..	@@ -2815,13 +3195,15 @@
2815	3195	mnt_flags &= ~(MNT_RELATIME \| MNT_NOATIME);
2816	3196	if (flags & MS_RDONLY)
2817	3197	mnt_flags \|= MNT_READONLY;
	3198	+ if (flags & MS_NOSYMFOLLOW)
	3199	+ mnt_flags \|= MNT_NOSYMFOLLOW;
2818	3200
2819	3201	/* The default atime for remount is preservation */
2820	3202	if ((flags & MS_REMOUNT) &&
2821	3203	((flags & (MS_NOATIME \| MS_NODIRATIME \| MS_RELATIME \|
2822	3204	MS_STRICTATIME)) == 0)) {
2823	3205	mnt_flags &= ~MNT_ATIME_MASK;
2824		- mnt_flags \|= path.mnt->mnt_flags & MNT_ATIME_MASK;
	3206	+ mnt_flags \|= path->mnt->mnt_flags & MNT_ATIME_MASK;
2825	3207	}
2826	3208
2827	3209	sb_flags = flags & (SB_RDONLY \|
..	..	@@ -2833,21 +3215,33 @@
2833	3215	SB_LAZYTIME \|
2834	3216	SB_I_VERSION);
2835	3217
	3218	+ if ((flags & (MS_REMOUNT \| MS_BIND)) == (MS_REMOUNT \| MS_BIND))
	3219	+ return do_reconfigure_mnt(path, mnt_flags);
2836	3220	if (flags & MS_REMOUNT)
2837		- retval = do_remount(&path, flags, sb_flags, mnt_flags,
2838		- data_page);
2839		- else if (flags & MS_BIND)
2840		- retval = do_loopback(&path, dev_name, flags & MS_REC);
2841		- else if (flags & (MS_SHARED \| MS_PRIVATE \| MS_SLAVE \| MS_UNBINDABLE))
2842		- retval = do_change_type(&path, flags);
2843		- else if (flags & MS_MOVE)
2844		- retval = do_move_mount(&path, dev_name);
2845		- else
2846		- retval = do_new_mount(&path, type_page, sb_flags, mnt_flags,
2847		- dev_name, data_page);
2848		-dput_out:
	3221	+ return do_remount(path, flags, sb_flags, mnt_flags, data_page);
	3222	+ if (flags & MS_BIND)
	3223	+ return do_loopback(path, dev_name, flags & MS_REC);
	3224	+ if (flags & (MS_SHARED \| MS_PRIVATE \| MS_SLAVE \| MS_UNBINDABLE))
	3225	+ return do_change_type(path, flags);
	3226	+ if (flags & MS_MOVE)
	3227	+ return do_move_mount_old(path, dev_name);
	3228	+
	3229	+ return do_new_mount(path, type_page, sb_flags, mnt_flags, dev_name,
	3230	+ data_page);
	3231	+}
	3232	+
	3233	+long do_mount(const char dev_name, const char __user dir_name,
	3234	+ const char type_page, unsigned long flags, void data_page)
	3235	+{
	3236	+ struct path path;
	3237	+ int ret;
	3238	+
	3239	+ ret = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path);
	3240	+ if (ret)
	3241	+ return ret;
	3242	+ ret = path_mount(dev_name, &path, type_page, flags, data_page);
2849	3243	path_put(&path);
2850		- return retval;
	3244	+ return ret;
2851	3245	}
2852	3246
2853	3247	static struct ucounts inc_mnt_namespaces(struct user_namespace ns)
..	..	@@ -2862,7 +3256,8 @@
2862	3256
2863	3257	static void free_mnt_ns(struct mnt_namespace *ns)
2864	3258	{
2865		- ns_free_inum(&ns->ns);
	3259	+ if (!is_anon_ns(ns))
	3260	+ ns_free_inum(&ns->ns);
2866	3261	dec_mnt_namespaces(ns->ucounts);
2867	3262	put_user_ns(ns->user_ns);
2868	3263	kfree(ns);
..	..	@@ -2877,7 +3272,7 @@
2877	3272	*/
2878	3273	static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
2879	3274
2880		-static struct mnt_namespace alloc_mnt_ns(struct user_namespace user_ns)
	3275	+static struct mnt_namespace alloc_mnt_ns(struct user_namespace user_ns, bool anon)
2881	3276	{
2882	3277	struct mnt_namespace *new_ns;
2883	3278	struct ucounts *ucounts;
..	..	@@ -2887,28 +3282,28 @@
2887	3282	if (!ucounts)
2888	3283	return ERR_PTR(-ENOSPC);
2889	3284
2890		- new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
	3285	+ new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
2891	3286	if (!new_ns) {
2892	3287	dec_mnt_namespaces(ucounts);
2893	3288	return ERR_PTR(-ENOMEM);
2894	3289	}
2895		- ret = ns_alloc_inum(&new_ns->ns);
2896		- if (ret) {
2897		- kfree(new_ns);
2898		- dec_mnt_namespaces(ucounts);
2899		- return ERR_PTR(ret);
	3290	+ if (!anon) {
	3291	+ ret = ns_alloc_inum(&new_ns->ns);
	3292	+ if (ret) {
	3293	+ kfree(new_ns);
	3294	+ dec_mnt_namespaces(ucounts);
	3295	+ return ERR_PTR(ret);
	3296	+ }
2900	3297	}
2901	3298	new_ns->ns.ops = &mntns_operations;
2902		- new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
	3299	+ if (!anon)
	3300	+ new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
2903	3301	atomic_set(&new_ns->count, 1);
2904		- new_ns->root = NULL;
2905	3302	INIT_LIST_HEAD(&new_ns->list);
2906	3303	init_waitqueue_head(&new_ns->poll);
2907		- new_ns->event = 0;
	3304	+ spin_lock_init(&new_ns->ns_lock);
2908	3305	new_ns->user_ns = get_user_ns(user_ns);
2909	3306	new_ns->ucounts = ucounts;
2910		- new_ns->mounts = 0;
2911		- new_ns->pending_mounts = 0;
2912	3307	return new_ns;
2913	3308	}
2914	3309
..	..	@@ -2932,7 +3327,7 @@
2932	3327
2933	3328	old = ns->root;
2934	3329
2935		- new_ns = alloc_mnt_ns(user_ns);
	3330	+ new_ns = alloc_mnt_ns(user_ns, false);
2936	3331	if (IS_ERR(new_ns))
2937	3332	return new_ns;
2938	3333
..	..	@@ -2940,12 +3335,17 @@
2940	3335	/* First pass: copy the tree topology */
2941	3336	copy_flags = CL_COPY_UNBINDABLE \| CL_EXPIRE;
2942	3337	if (user_ns != ns->user_ns)
2943		- copy_flags \|= CL_SHARED_TO_SLAVE \| CL_UNPRIVILEGED;
	3338	+ copy_flags \|= CL_SHARED_TO_SLAVE;
2944	3339	new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2945	3340	if (IS_ERR(new)) {
2946	3341	namespace_unlock();
2947	3342	free_mnt_ns(new_ns);
2948	3343	return ERR_CAST(new);
	3344	+ }
	3345	+ if (user_ns != ns->user_ns) {
	3346	+ lock_mount_hash();
	3347	+ lock_mnt_tree(new);
	3348	+ unlock_mount_hash();
2949	3349	}
2950	3350	new_ns->root = new;
2951	3351	list_add_tail(&new_ns->list, &new->mnt_list);
..	..	@@ -2987,37 +3387,25 @@
2987	3387	return new_ns;
2988	3388	}
2989	3389
2990		-/**
2991		- * create_mnt_ns - creates a private namespace and adds a root filesystem
2992		- * @mnt: pointer to the new root filesystem mountpoint
2993		- */
2994		-static struct mnt_namespace create_mnt_ns(struct vfsmount m)
	3390	+struct dentry mount_subtree(struct vfsmount m, const char *name)
2995	3391	{
2996		- struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
2997		- if (!IS_ERR(new_ns)) {
2998		- struct mount *mnt = real_mount(m);
2999		- mnt->mnt_ns = new_ns;
3000		- new_ns->root = mnt;
3001		- new_ns->mounts++;
3002		- list_add(&mnt->mnt_list, &new_ns->list);
3003		- } else {
3004		- mntput(m);
3005		- }
3006		- return new_ns;
3007		-}
3008		-
3009		-struct dentry mount_subtree(struct vfsmount mnt, const char *name)
3010		-{
	3392	+ struct mount *mnt = real_mount(m);
3011	3393	struct mnt_namespace *ns;
3012	3394	struct super_block *s;
3013	3395	struct path path;
3014	3396	int err;
3015	3397
3016		- ns = create_mnt_ns(mnt);
3017		- if (IS_ERR(ns))
	3398	+ ns = alloc_mnt_ns(&init_user_ns, true);
	3399	+ if (IS_ERR(ns)) {
	3400	+ mntput(m);
3018	3401	return ERR_CAST(ns);
	3402	+ }
	3403	+ mnt->mnt_ns = ns;
	3404	+ ns->root = mnt;
	3405	+ ns->mounts++;
	3406	+ list_add(&mnt->mnt_list, &ns->list);
3019	3407
3020		- err = vfs_path_lookup(mnt->mnt_root, mnt,
	3408	+ err = vfs_path_lookup(m->mnt_root, m,
3021	3409	name, LOOKUP_FOLLOW\|LOOKUP_AUTOMOUNT, &path);
3022	3410
3023	3411	put_mnt_ns(ns);
..	..	@@ -3036,8 +3424,8 @@
3036	3424	}
3037	3425	EXPORT_SYMBOL(mount_subtree);
3038	3426
3039		-int ksys_mount(char __user dev_name, char __user dir_name, char __user *type,
3040		- unsigned long flags, void __user *data)
	3427	+SYSCALL_DEFINE5(mount, char __user , dev_name, char __user , dir_name,
	3428	+ char __user , type, unsigned long, flags, void __user , data)
3041	3429	{
3042	3430	int ret;
3043	3431	char *kernel_type;
..	..	@@ -3070,10 +3458,202 @@
3070	3458	return ret;
3071	3459	}
3072	3460
3073		-SYSCALL_DEFINE5(mount, char __user , dev_name, char __user , dir_name,
3074		- char __user , type, unsigned long, flags, void __user , data)
	3461	+/*
	3462	+ * Create a kernel mount representation for a new, prepared superblock
	3463	+ * (specified by fs_fd) and attach to an open_tree-like file descriptor.
	3464	+ */
	3465	+SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
	3466	+ unsigned int, attr_flags)
3075	3467	{
3076		- return ksys_mount(dev_name, dir_name, type, flags, data);
	3468	+ struct mnt_namespace *ns;
	3469	+ struct fs_context *fc;
	3470	+ struct file *file;
	3471	+ struct path newmount;
	3472	+ struct mount *mnt;
	3473	+ struct fd f;
	3474	+ unsigned int mnt_flags = 0;
	3475	+ long ret;
	3476	+
	3477	+ if (!may_mount())
	3478	+ return -EPERM;
	3479	+
	3480	+ if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
	3481	+ return -EINVAL;
	3482	+
	3483	+ if (attr_flags & ~(MOUNT_ATTR_RDONLY \|
	3484	+ MOUNT_ATTR_NOSUID \|
	3485	+ MOUNT_ATTR_NODEV \|
	3486	+ MOUNT_ATTR_NOEXEC \|
	3487	+ MOUNT_ATTR__ATIME \|
	3488	+ MOUNT_ATTR_NODIRATIME))
	3489	+ return -EINVAL;
	3490	+
	3491	+ if (attr_flags & MOUNT_ATTR_RDONLY)
	3492	+ mnt_flags \|= MNT_READONLY;
	3493	+ if (attr_flags & MOUNT_ATTR_NOSUID)
	3494	+ mnt_flags \|= MNT_NOSUID;
	3495	+ if (attr_flags & MOUNT_ATTR_NODEV)
	3496	+ mnt_flags \|= MNT_NODEV;
	3497	+ if (attr_flags & MOUNT_ATTR_NOEXEC)
	3498	+ mnt_flags \|= MNT_NOEXEC;
	3499	+ if (attr_flags & MOUNT_ATTR_NODIRATIME)
	3500	+ mnt_flags \|= MNT_NODIRATIME;
	3501	+
	3502	+ switch (attr_flags & MOUNT_ATTR__ATIME) {
	3503	+ case MOUNT_ATTR_STRICTATIME:
	3504	+ break;
	3505	+ case MOUNT_ATTR_NOATIME:
	3506	+ mnt_flags \|= MNT_NOATIME;
	3507	+ break;
	3508	+ case MOUNT_ATTR_RELATIME:
	3509	+ mnt_flags \|= MNT_RELATIME;
	3510	+ break;
	3511	+ default:
	3512	+ return -EINVAL;
	3513	+ }
	3514	+
	3515	+ f = fdget(fs_fd);
	3516	+ if (!f.file)
	3517	+ return -EBADF;
	3518	+
	3519	+ ret = -EINVAL;
	3520	+ if (f.file->f_op != &fscontext_fops)
	3521	+ goto err_fsfd;
	3522	+
	3523	+ fc = f.file->private_data;
	3524	+
	3525	+ ret = mutex_lock_interruptible(&fc->uapi_mutex);
	3526	+ if (ret < 0)
	3527	+ goto err_fsfd;
	3528	+
	3529	+ /* There must be a valid superblock or we can't mount it */
	3530	+ ret = -EINVAL;
	3531	+ if (!fc->root)
	3532	+ goto err_unlock;
	3533	+
	3534	+ ret = -EPERM;
	3535	+ if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
	3536	+ pr_warn("VFS: Mount too revealing\n");
	3537	+ goto err_unlock;
	3538	+ }
	3539	+
	3540	+ ret = -EBUSY;
	3541	+ if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
	3542	+ goto err_unlock;
	3543	+
	3544	+ ret = -EPERM;
	3545	+ if ((fc->sb_flags & SB_MANDLOCK) && !may_mandlock())
	3546	+ goto err_unlock;
	3547	+
	3548	+ newmount.mnt = vfs_create_mount(fc);
	3549	+ if (IS_ERR(newmount.mnt)) {
	3550	+ ret = PTR_ERR(newmount.mnt);
	3551	+ goto err_unlock;
	3552	+ }
	3553	+ newmount.dentry = dget(fc->root);
	3554	+ newmount.mnt->mnt_flags = mnt_flags;
	3555	+
	3556	+ /* We've done the mount bit - now move the file context into more or
	3557	+ * less the same state as if we'd done an fspick(). We don't want to
	3558	+ * do any memory allocation or anything like that at this point as we
	3559	+ * don't want to have to handle any errors incurred.
	3560	+ */
	3561	+ vfs_clean_context(fc);
	3562	+
	3563	+ ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true);
	3564	+ if (IS_ERR(ns)) {
	3565	+ ret = PTR_ERR(ns);
	3566	+ goto err_path;
	3567	+ }
	3568	+ mnt = real_mount(newmount.mnt);
	3569	+ mnt->mnt_ns = ns;
	3570	+ ns->root = mnt;
	3571	+ ns->mounts = 1;
	3572	+ list_add(&mnt->mnt_list, &ns->list);
	3573	+ mntget(newmount.mnt);
	3574	+
	3575	+ /* Attach to an apparent O_PATH fd with a note that we need to unmount
	3576	+ * it, not just simply put it.
	3577	+ */
	3578	+ file = dentry_open(&newmount, O_PATH, fc->cred);
	3579	+ if (IS_ERR(file)) {
	3580	+ dissolve_on_fput(newmount.mnt);
	3581	+ ret = PTR_ERR(file);
	3582	+ goto err_path;
	3583	+ }
	3584	+ file->f_mode \|= FMODE_NEED_UNMOUNT;
	3585	+
	3586	+ ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
	3587	+ if (ret >= 0)
	3588	+ fd_install(ret, file);
	3589	+ else
	3590	+ fput(file);
	3591	+
	3592	+err_path:
	3593	+ path_put(&newmount);
	3594	+err_unlock:
	3595	+ mutex_unlock(&fc->uapi_mutex);
	3596	+err_fsfd:
	3597	+ fdput(f);
	3598	+ return ret;
	3599	+}
	3600	+
	3601	+/*
	3602	+ * Move a mount from one place to another. In combination with
	3603	+ * fsopen()/fsmount() this is used to install a new mount and in combination
	3604	+ * with open_tree(OPEN_TREE_CLONE [\| AT_RECURSIVE]) it can be used to copy
	3605	+ * a mount subtree.
	3606	+ *
	3607	+ * Note the flags value is a combination of MOVE_MOUNT_* flags.
	3608	+ */
	3609	+SYSCALL_DEFINE5(move_mount,
	3610	+ int, from_dfd, const char __user *, from_pathname,
	3611	+ int, to_dfd, const char __user *, to_pathname,
	3612	+ unsigned int, flags)
	3613	+{
	3614	+ struct path from_path, to_path;
	3615	+ unsigned int lflags;
	3616	+ int ret = 0;
	3617	+
	3618	+ if (!may_mount())
	3619	+ return -EPERM;
	3620	+
	3621	+ if (flags & ~MOVE_MOUNT__MASK)
	3622	+ return -EINVAL;
	3623	+
	3624	+ /* If someone gives a pathname, they aren't permitted to move
	3625	+ * from an fd that requires unmount as we can't get at the flag
	3626	+ * to clear it afterwards.
	3627	+ */
	3628	+ lflags = 0;
	3629	+ if (flags & MOVE_MOUNT_F_SYMLINKS) lflags \|= LOOKUP_FOLLOW;
	3630	+ if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags \|= LOOKUP_AUTOMOUNT;
	3631	+ if (flags & MOVE_MOUNT_F_EMPTY_PATH) lflags \|= LOOKUP_EMPTY;
	3632	+
	3633	+ ret = user_path_at(from_dfd, from_pathname, lflags, &from_path);
	3634	+ if (ret < 0)
	3635	+ return ret;
	3636	+
	3637	+ lflags = 0;
	3638	+ if (flags & MOVE_MOUNT_T_SYMLINKS) lflags \|= LOOKUP_FOLLOW;
	3639	+ if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags \|= LOOKUP_AUTOMOUNT;
	3640	+ if (flags & MOVE_MOUNT_T_EMPTY_PATH) lflags \|= LOOKUP_EMPTY;
	3641	+
	3642	+ ret = user_path_at(to_dfd, to_pathname, lflags, &to_path);
	3643	+ if (ret < 0)
	3644	+ goto out_from;
	3645	+
	3646	+ ret = security_move_mount(&from_path, &to_path);
	3647	+ if (ret < 0)
	3648	+ goto out_to;
	3649	+
	3650	+ ret = do_move_mount(&from_path, &to_path);
	3651	+
	3652	+out_to:
	3653	+ path_put(&to_path);
	3654	+out_from:
	3655	+ path_put(&from_path);
	3656	+ return ret;
3077	3657	}
3078	3658
3079	3659	/*
..	..	@@ -3115,7 +3695,7 @@
3115	3695	* file system may be mounted on put_old. After all, new_root is a mountpoint.
3116	3696	*
3117	3697	* Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
3118		- * See Documentation/filesystems/ramfs-rootfs-initramfs.txt for alternatives
	3698	+ * See Documentation/filesystems/ramfs-rootfs-initramfs.rst for alternatives
3119	3699	* in this situation.
3120	3700	*
3121	3701	* Notes:
..	..	@@ -3129,19 +3709,21 @@
3129	3709	SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
3130	3710	const char __user *, put_old)
3131	3711	{
3132		- struct path new, old, parent_path, root_parent, root;
3133		- struct mount new_mnt, root_mnt, *old_mnt;
	3712	+ struct path new, old, root;
	3713	+ struct mount new_mnt, root_mnt, old_mnt, root_parent, *ex_parent;
3134	3714	struct mountpoint old_mp, root_mp;
3135	3715	int error;
3136	3716
3137	3717	if (!may_mount())
3138	3718	return -EPERM;
3139	3719
3140		- error = user_path_dir(new_root, &new);
	3720	+ error = user_path_at(AT_FDCWD, new_root,
	3721	+ LOOKUP_FOLLOW \| LOOKUP_DIRECTORY, &new);
3141	3722	if (error)
3142	3723	goto out0;
3143	3724
3144		- error = user_path_dir(put_old, &old);
	3725	+ error = user_path_at(AT_FDCWD, put_old,
	3726	+ LOOKUP_FOLLOW \| LOOKUP_DIRECTORY, &old);
3145	3727	if (error)
3146	3728	goto out1;
3147	3729
..	..	@@ -3159,9 +3741,11 @@
3159	3741	new_mnt = real_mount(new.mnt);
3160	3742	root_mnt = real_mount(root.mnt);
3161	3743	old_mnt = real_mount(old.mnt);
	3744	+ ex_parent = new_mnt->mnt_parent;
	3745	+ root_parent = root_mnt->mnt_parent;
3162	3746	if (IS_MNT_SHARED(old_mnt) \|\|
3163		- IS_MNT_SHARED(new_mnt->mnt_parent) \|\|
3164		- IS_MNT_SHARED(root_mnt->mnt_parent))
	3747	+ IS_MNT_SHARED(ex_parent) \|\|
	3748	+ IS_MNT_SHARED(root_parent))
3165	3749	goto out4;
3166	3750	if (!check_mnt(root_mnt) \|\| !check_mnt(new_mnt))
3167	3751	goto out4;
..	..	@@ -3178,7 +3762,6 @@
3178	3762	goto out4; /* not a mountpoint */
3179	3763	if (!mnt_has_parent(root_mnt))
3180	3764	goto out4; /* not attached */
3181		- root_mp = root_mnt->mnt_mp;
3182	3765	if (new.mnt->mnt_root != new.dentry)
3183	3766	goto out4; /* not a mountpoint */
3184	3767	if (!mnt_has_parent(new_mnt))
..	..	@@ -3190,9 +3773,8 @@
3190	3773	if (!is_path_reachable(new_mnt, new.dentry, &root))
3191	3774	goto out4;
3192	3775	lock_mount_hash();
3193		- root_mp->m_count++; /* pin it so it won't go away */
3194		- detach_mnt(new_mnt, &parent_path);
3195		- detach_mnt(root_mnt, &root_parent);
	3776	+ umount_mnt(new_mnt);
	3777	+ root_mp = unhash_mnt(root_mnt); /* we'll need its mountpoint */
3196	3778	if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
3197	3779	new_mnt->mnt.mnt_flags \|= MNT_LOCKED;
3198	3780	root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
..	..	@@ -3200,7 +3782,8 @@
3200	3782	/* mount old root on put_old */
3201	3783	attach_mnt(root_mnt, old_mnt, old_mp);
3202	3784	/* mount new_root on / */
3203		- attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
	3785	+ attach_mnt(new_mnt, root_parent, root_mp);
	3786	+ mnt_add_count(root_parent, -1);
3204	3787	touch_mnt_namespace(current->nsproxy->mnt_ns);
3205	3788	/* A moved mount should not expire automatically */
3206	3789	list_del_init(&new_mnt->mnt_expire);
..	..	@@ -3210,10 +3793,8 @@
3210	3793	error = 0;
3211	3794	out4:
3212	3795	unlock_mount(old_mp);
3213		- if (!error) {
3214		- path_put(&root_parent);
3215		- path_put(&parent_path);
3216		- }
	3796	+ if (!error)
	3797	+ mntput_no_expire(ex_parent);
3217	3798	out3:
3218	3799	path_put(&root);
3219	3800	out2:
..	..	@@ -3227,22 +3808,22 @@
3227	3808	static void __init init_mount_tree(void)
3228	3809	{
3229	3810	struct vfsmount *mnt;
	3811	+ struct mount *m;
3230	3812	struct mnt_namespace *ns;
3231	3813	struct path root;
3232		- struct file_system_type *type;
3233	3814
3234		- type = get_fs_type("rootfs");
3235		- if (!type)
3236		- panic("Can't find rootfs type");
3237		- mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
3238		- put_filesystem(type);
	3815	+ mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
3239	3816	if (IS_ERR(mnt))
3240	3817	panic("Can't create rootfs");
3241	3818
3242		- ns = create_mnt_ns(mnt);
	3819	+ ns = alloc_mnt_ns(&init_user_ns, false);
3243	3820	if (IS_ERR(ns))
3244	3821	panic("Can't allocate initial namespace");
3245		-
	3822	+ m = real_mount(mnt);
	3823	+ m->mnt_ns = ns;
	3824	+ ns->root = m;
	3825	+ ns->mounts = 1;
	3826	+ list_add(&m->mnt_list, &ns->list);
3246	3827	init_task.nsproxy->mnt_ns = ns;
3247	3828	get_mnt_ns(ns);
3248	3829
..	..	@@ -3284,6 +3865,7 @@
3284	3865	fs_kobj = kobject_create_and_add("fs", NULL);
3285	3866	if (!fs_kobj)
3286	3867	printk(KERN_WARNING "%s: kobj create error\n", __func__);
	3868	+ shmem_init();
3287	3869	init_rootfs();
3288	3870	init_mount_tree();
3289	3871	}
..	..	@@ -3296,10 +3878,10 @@
3296	3878	free_mnt_ns(ns);
3297	3879	}
3298	3880
3299		-struct vfsmount kern_mount_data(struct file_system_type type, void *data)
	3881	+struct vfsmount kern_mount(struct file_system_type type)
3300	3882	{
3301	3883	struct vfsmount *mnt;
3302		- mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, data);
	3884	+ mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
3303	3885	if (!IS_ERR(mnt)) {
3304	3886	/*
3305	3887	* it is a longterm mount, don't release mnt until
..	..	@@ -3309,7 +3891,7 @@
3309	3891	}
3310	3892	return mnt;
3311	3893	}
3312		-EXPORT_SYMBOL_GPL(kern_mount_data);
	3894	+EXPORT_SYMBOL_GPL(kern_mount);
3313	3895
3314	3896	void kern_unmount(struct vfsmount *mnt)
3315	3897	{
..	..	@@ -3321,6 +3903,19 @@
3321	3903	}
3322	3904	}
3323	3905	EXPORT_SYMBOL(kern_unmount);
	3906	+
	3907	+void kern_unmount_array(struct vfsmount *mnt[], unsigned int num)
	3908	+{
	3909	+ unsigned int i;
	3910	+
	3911	+ for (i = 0; i < num; i++)
	3912	+ if (mnt[i])
	3913	+ real_mount(mnt[i])->mnt_ns = NULL;
	3914	+ synchronize_rcu_expedited();
	3915	+ for (i = 0; i < num; i++)
	3916	+ mntput(mnt[i]);
	3917	+}
	3918	+EXPORT_SYMBOL(kern_unmount_array);
3324	3919
3325	3920	bool our_mnt(struct vfsmount *mnt)
3326	3921	{
..	..	@@ -3351,7 +3946,8 @@
3351	3946	return chrooted;
3352	3947	}
3353	3948
3354		-static bool mnt_already_visible(struct mnt_namespace ns, struct vfsmount new,
	3949	+static bool mnt_already_visible(struct mnt_namespace *ns,
	3950	+ const struct super_block *sb,
3355	3951	int *new_mnt_flags)
3356	3952	{
3357	3953	int new_flags = *new_mnt_flags;
..	..	@@ -3359,11 +3955,15 @@
3359	3955	bool visible = false;
3360	3956
3361	3957	down_read(&namespace_sem);
	3958	+ lock_ns_list(ns);
3362	3959	list_for_each_entry(mnt, &ns->list, mnt_list) {
3363	3960	struct mount *child;
3364	3961	int mnt_flags;
3365	3962
3366		- if (mnt->mnt.mnt_sb->s_type != new->mnt_sb->s_type)
	3963	+ if (mnt_is_cursor(mnt))
	3964	+ continue;
	3965	+
	3966	+ if (mnt->mnt.mnt_sb->s_type != sb->s_type)
3367	3967	continue;
3368	3968
3369	3969	/* This mount is not fully visible if it's root directory
..	..	@@ -3410,11 +4010,12 @@
3410	4010	next: ;
3411	4011	}
3412	4012	found:
	4013	+ unlock_ns_list(ns);
3413	4014	up_read(&namespace_sem);
3414	4015	return visible;
3415	4016	}
3416	4017
3417		-static bool mount_too_revealing(struct vfsmount mnt, int new_mnt_flags)
	4018	+static bool mount_too_revealing(const struct super_block sb, int new_mnt_flags)
3418	4019	{
3419	4020	const unsigned long required_iflags = SB_I_NOEXEC \| SB_I_NODEV;
3420	4021	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
..	..	@@ -3424,7 +4025,7 @@
3424	4025	return false;
3425	4026
3426	4027	/* Can this filesystem be too revealing? */
3427		- s_iflags = mnt->mnt_sb->s_iflags;
	4028	+ s_iflags = sb->s_iflags;
3428	4029	if (!(s_iflags & SB_I_USERNS_VISIBLE))
3429	4030	return false;
3430	4031
..	..	@@ -3434,7 +4035,7 @@
3434	4035	return true;
3435	4036	}
3436	4037
3437		- return !mnt_already_visible(ns, mnt, new_mnt_flags);
	4038	+ return !mnt_already_visible(ns, sb, new_mnt_flags);
3438	4039	}
3439	4040
3440	4041	bool mnt_may_suid(struct vfsmount *mnt)
..	..	@@ -3471,18 +4072,23 @@
3471	4072	put_mnt_ns(to_mnt_ns(ns));
3472	4073	}
3473	4074
3474		-static int mntns_install(struct nsproxy nsproxy, struct ns_common ns)
	4075	+static int mntns_install(struct nsset nsset, struct ns_common ns)
3475	4076	{
3476		- struct fs_struct *fs = current->fs;
	4077	+ struct nsproxy *nsproxy = nsset->nsproxy;
	4078	+ struct fs_struct *fs = nsset->fs;
3477	4079	struct mnt_namespace mnt_ns = to_mnt_ns(ns), old_mnt_ns;
	4080	+ struct user_namespace *user_ns = nsset->cred->user_ns;
3478	4081	struct path root;
3479	4082	int err;
3480	4083
3481	4084	if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) \|\|
3482		- !ns_capable(current_user_ns(), CAP_SYS_CHROOT) \|\|
3483		- !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
	4085	+ !ns_capable(user_ns, CAP_SYS_CHROOT) \|\|
	4086	+ !ns_capable(user_ns, CAP_SYS_ADMIN))
3484	4087	return -EPERM;
3485	4088
	4089	+ if (is_anon_ns(mnt_ns))
	4090	+ return -EINVAL;
	4091	+
3486	4092	if (fs->users != 1)
3487	4093	return -EINVAL;
3488	4094