.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * linux/fs/namespace.c |
---|
3 | 4 | * |
---|
4 | 5 | * (C) Copyright Al Viro 2000, 2001 |
---|
5 | | - * Released under GPL v2. |
---|
6 | 6 | * |
---|
7 | 7 | * Based on code from fs/super.c, copyright Linus Torvalds and others. |
---|
8 | 8 | * Heavily rewritten. |
---|
.. | .. |
---|
21 | 21 | #include <linux/init.h> /* init_rootfs */ |
---|
22 | 22 | #include <linux/fs_struct.h> /* get_fs_root et.al. */ |
---|
23 | 23 | #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ |
---|
| 24 | +#include <linux/file.h> |
---|
24 | 25 | #include <linux/uaccess.h> |
---|
25 | 26 | #include <linux/proc_ns.h> |
---|
26 | 27 | #include <linux/magic.h> |
---|
27 | | -#include <linux/bootmem.h> |
---|
| 28 | +#include <linux/memblock.h> |
---|
28 | 29 | #include <linux/task_work.h> |
---|
29 | 30 | #include <linux/sched/task.h> |
---|
| 31 | +#include <uapi/linux/mount.h> |
---|
| 32 | +#include <linux/fs_context.h> |
---|
| 33 | +#include <linux/shmem_fs.h> |
---|
30 | 34 | |
---|
31 | 35 | #include "pnode.h" |
---|
32 | 36 | #include "internal.h" |
---|
.. | .. |
---|
67 | 71 | static struct hlist_head *mountpoint_hashtable __read_mostly; |
---|
68 | 72 | static struct kmem_cache *mnt_cache __read_mostly; |
---|
69 | 73 | static DECLARE_RWSEM(namespace_sem); |
---|
| 74 | +static HLIST_HEAD(unmounted); /* protected by namespace_sem */ |
---|
| 75 | +static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */ |
---|
70 | 76 | |
---|
71 | 77 | /* /sys/fs */ |
---|
72 | 78 | struct kobject *fs_kobj; |
---|
.. | .. |
---|
151 | 157 | /* |
---|
152 | 158 | * vfsmount lock must be held for write |
---|
153 | 159 | */ |
---|
154 | | -unsigned int mnt_get_count(struct mount *mnt) |
---|
| 160 | +int mnt_get_count(struct mount *mnt) |
---|
155 | 161 | { |
---|
156 | 162 | #ifdef CONFIG_SMP |
---|
157 | | - unsigned int count = 0; |
---|
| 163 | + int count = 0; |
---|
158 | 164 | int cpu; |
---|
159 | 165 | |
---|
160 | 166 | for_each_possible_cpu(cpu) { |
---|
.. | .. |
---|
165 | 171 | #else |
---|
166 | 172 | return mnt->mnt_count; |
---|
167 | 173 | #endif |
---|
168 | | -} |
---|
169 | | - |
---|
170 | | -static void drop_mountpoint(struct fs_pin *p) |
---|
171 | | -{ |
---|
172 | | - struct mount *m = container_of(p, struct mount, mnt_umount); |
---|
173 | | - dput(m->mnt_ex_mountpoint); |
---|
174 | | - pin_remove(p); |
---|
175 | | - mntput(&m->mnt); |
---|
176 | 174 | } |
---|
177 | 175 | |
---|
178 | 176 | static struct mount *alloc_vfsmnt(const char *name) |
---|
.. | .. |
---|
201 | 199 | mnt->mnt_count = 1; |
---|
202 | 200 | mnt->mnt_writers = 0; |
---|
203 | 201 | #endif |
---|
204 | | - mnt->mnt.data = NULL; |
---|
205 | 202 | |
---|
206 | 203 | INIT_HLIST_NODE(&mnt->mnt_hash); |
---|
207 | 204 | INIT_LIST_HEAD(&mnt->mnt_child); |
---|
.. | .. |
---|
213 | 210 | INIT_LIST_HEAD(&mnt->mnt_slave); |
---|
214 | 211 | INIT_HLIST_NODE(&mnt->mnt_mp_list); |
---|
215 | 212 | INIT_LIST_HEAD(&mnt->mnt_umounting); |
---|
216 | | - init_fs_pin(&mnt->mnt_umount, drop_mountpoint); |
---|
| 213 | + INIT_HLIST_HEAD(&mnt->mnt_stuck_children); |
---|
217 | 214 | } |
---|
218 | 215 | return mnt; |
---|
219 | 216 | |
---|
.. | .. |
---|
247 | 244 | * mnt_want/drop_write() will _keep_ the filesystem |
---|
248 | 245 | * r/w. |
---|
249 | 246 | */ |
---|
250 | | -int __mnt_is_readonly(struct vfsmount *mnt) |
---|
| 247 | +bool __mnt_is_readonly(struct vfsmount *mnt) |
---|
251 | 248 | { |
---|
252 | | - if (mnt->mnt_flags & MNT_READONLY) |
---|
253 | | - return 1; |
---|
254 | | - if (sb_rdonly(mnt->mnt_sb)) |
---|
255 | | - return 1; |
---|
256 | | - return 0; |
---|
| 249 | + return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb); |
---|
257 | 250 | } |
---|
258 | 251 | EXPORT_SYMBOL_GPL(__mnt_is_readonly); |
---|
259 | 252 | |
---|
.. | .. |
---|
426 | 419 | sb_end_write(file_inode(file)->i_sb); |
---|
427 | 420 | return ret; |
---|
428 | 421 | } |
---|
429 | | -EXPORT_SYMBOL_GPL(mnt_want_write_file); |
---|
| 422 | +EXPORT_SYMBOL_NS_GPL(mnt_want_write_file, ANDROID_GKI_VFS_EXPORT_ONLY); |
---|
430 | 423 | |
---|
431 | 424 | /** |
---|
432 | 425 | * __mnt_drop_write - give up write access to a mount |
---|
.. | .. |
---|
468 | 461 | __mnt_drop_write_file(file); |
---|
469 | 462 | sb_end_write(file_inode(file)->i_sb); |
---|
470 | 463 | } |
---|
471 | | -EXPORT_SYMBOL(mnt_drop_write_file); |
---|
| 464 | +EXPORT_SYMBOL_NS(mnt_drop_write_file, ANDROID_GKI_VFS_EXPORT_ONLY); |
---|
472 | 465 | |
---|
473 | 466 | static int mnt_make_readonly(struct mount *mnt) |
---|
474 | 467 | { |
---|
.. | .. |
---|
512 | 505 | return ret; |
---|
513 | 506 | } |
---|
514 | 507 | |
---|
515 | | -static void __mnt_unmake_readonly(struct mount *mnt) |
---|
| 508 | +static int __mnt_unmake_readonly(struct mount *mnt) |
---|
516 | 509 | { |
---|
517 | 510 | lock_mount_hash(); |
---|
518 | 511 | mnt->mnt.mnt_flags &= ~MNT_READONLY; |
---|
519 | 512 | unlock_mount_hash(); |
---|
| 513 | + return 0; |
---|
520 | 514 | } |
---|
521 | 515 | |
---|
522 | 516 | int sb_prepare_remount_readonly(struct super_block *sb) |
---|
.. | .. |
---|
557 | 551 | |
---|
558 | 552 | static void free_vfsmnt(struct mount *mnt) |
---|
559 | 553 | { |
---|
560 | | - kfree(mnt->mnt.data); |
---|
561 | 554 | kfree_const(mnt->mnt_devname); |
---|
562 | 555 | #ifdef CONFIG_SMP |
---|
563 | 556 | free_percpu(mnt->mnt_pcp); |
---|
.. | .. |
---|
659 | 652 | return m; |
---|
660 | 653 | } |
---|
661 | 654 | |
---|
| 655 | +static inline void lock_ns_list(struct mnt_namespace *ns) |
---|
| 656 | +{ |
---|
| 657 | + spin_lock(&ns->ns_lock); |
---|
| 658 | +} |
---|
| 659 | + |
---|
| 660 | +static inline void unlock_ns_list(struct mnt_namespace *ns) |
---|
| 661 | +{ |
---|
| 662 | + spin_unlock(&ns->ns_lock); |
---|
| 663 | +} |
---|
| 664 | + |
---|
| 665 | +static inline bool mnt_is_cursor(struct mount *mnt) |
---|
| 666 | +{ |
---|
| 667 | + return mnt->mnt.mnt_flags & MNT_CURSOR; |
---|
| 668 | +} |
---|
| 669 | + |
---|
662 | 670 | /* |
---|
663 | 671 | * __is_local_mountpoint - Test to see if dentry is a mountpoint in the |
---|
664 | 672 | * current mount namespace. |
---|
.. | .. |
---|
680 | 688 | struct mount *mnt; |
---|
681 | 689 | bool is_covered = false; |
---|
682 | 690 | |
---|
683 | | - if (!d_mountpoint(dentry)) |
---|
684 | | - goto out; |
---|
685 | | - |
---|
686 | 691 | down_read(&namespace_sem); |
---|
| 692 | + lock_ns_list(ns); |
---|
687 | 693 | list_for_each_entry(mnt, &ns->list, mnt_list) { |
---|
| 694 | + if (mnt_is_cursor(mnt)) |
---|
| 695 | + continue; |
---|
688 | 696 | is_covered = (mnt->mnt_mountpoint == dentry); |
---|
689 | 697 | if (is_covered) |
---|
690 | 698 | break; |
---|
691 | 699 | } |
---|
| 700 | + unlock_ns_list(ns); |
---|
692 | 701 | up_read(&namespace_sem); |
---|
693 | | -out: |
---|
| 702 | + |
---|
694 | 703 | return is_covered; |
---|
695 | 704 | } |
---|
696 | 705 | |
---|
.. | .. |
---|
745 | 754 | |
---|
746 | 755 | /* Add the new mountpoint to the hash table */ |
---|
747 | 756 | read_seqlock_excl(&mount_lock); |
---|
748 | | - new->m_dentry = dentry; |
---|
| 757 | + new->m_dentry = dget(dentry); |
---|
749 | 758 | new->m_count = 1; |
---|
750 | 759 | hlist_add_head(&new->m_hash, mp_hash(dentry)); |
---|
751 | 760 | INIT_HLIST_HEAD(&new->m_list); |
---|
.. | .. |
---|
758 | 767 | return mp; |
---|
759 | 768 | } |
---|
760 | 769 | |
---|
761 | | -static void put_mountpoint(struct mountpoint *mp) |
---|
| 770 | +/* |
---|
| 771 | + * vfsmount lock must be held. Additionally, the caller is responsible |
---|
| 772 | + * for serializing calls for given disposal list. |
---|
| 773 | + */ |
---|
| 774 | +static void __put_mountpoint(struct mountpoint *mp, struct list_head *list) |
---|
762 | 775 | { |
---|
763 | 776 | if (!--mp->m_count) { |
---|
764 | 777 | struct dentry *dentry = mp->m_dentry; |
---|
.. | .. |
---|
766 | 779 | spin_lock(&dentry->d_lock); |
---|
767 | 780 | dentry->d_flags &= ~DCACHE_MOUNTED; |
---|
768 | 781 | spin_unlock(&dentry->d_lock); |
---|
| 782 | + dput_to_list(dentry, list); |
---|
769 | 783 | hlist_del(&mp->m_hash); |
---|
770 | 784 | kfree(mp); |
---|
771 | 785 | } |
---|
| 786 | +} |
---|
| 787 | + |
---|
| 788 | +/* called with namespace_lock and vfsmount lock */ |
---|
| 789 | +static void put_mountpoint(struct mountpoint *mp) |
---|
| 790 | +{ |
---|
| 791 | + __put_mountpoint(mp, &ex_mountpoints); |
---|
772 | 792 | } |
---|
773 | 793 | |
---|
774 | 794 | static inline int check_mnt(struct mount *mnt) |
---|
.. | .. |
---|
801 | 821 | /* |
---|
802 | 822 | * vfsmount lock must be held for write |
---|
803 | 823 | */ |
---|
804 | | -static void unhash_mnt(struct mount *mnt) |
---|
| 824 | +static struct mountpoint *unhash_mnt(struct mount *mnt) |
---|
805 | 825 | { |
---|
| 826 | + struct mountpoint *mp; |
---|
806 | 827 | mnt->mnt_parent = mnt; |
---|
807 | 828 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; |
---|
808 | 829 | list_del_init(&mnt->mnt_child); |
---|
809 | 830 | hlist_del_init_rcu(&mnt->mnt_hash); |
---|
810 | 831 | hlist_del_init(&mnt->mnt_mp_list); |
---|
811 | | - put_mountpoint(mnt->mnt_mp); |
---|
| 832 | + mp = mnt->mnt_mp; |
---|
812 | 833 | mnt->mnt_mp = NULL; |
---|
813 | | -} |
---|
814 | | - |
---|
815 | | -/* |
---|
816 | | - * vfsmount lock must be held for write |
---|
817 | | - */ |
---|
818 | | -static void detach_mnt(struct mount *mnt, struct path *old_path) |
---|
819 | | -{ |
---|
820 | | - old_path->dentry = mnt->mnt_mountpoint; |
---|
821 | | - old_path->mnt = &mnt->mnt_parent->mnt; |
---|
822 | | - unhash_mnt(mnt); |
---|
| 834 | + return mp; |
---|
823 | 835 | } |
---|
824 | 836 | |
---|
825 | 837 | /* |
---|
.. | .. |
---|
827 | 839 | */ |
---|
828 | 840 | static void umount_mnt(struct mount *mnt) |
---|
829 | 841 | { |
---|
830 | | - /* old mountpoint will be dropped when we can do that */ |
---|
831 | | - mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint; |
---|
832 | | - unhash_mnt(mnt); |
---|
| 842 | + put_mountpoint(unhash_mnt(mnt)); |
---|
833 | 843 | } |
---|
834 | 844 | |
---|
835 | 845 | /* |
---|
.. | .. |
---|
841 | 851 | { |
---|
842 | 852 | mp->m_count++; |
---|
843 | 853 | mnt_add_count(mnt, 1); /* essentially, that's mntget */ |
---|
844 | | - child_mnt->mnt_mountpoint = dget(mp->m_dentry); |
---|
| 854 | + child_mnt->mnt_mountpoint = mp->m_dentry; |
---|
845 | 855 | child_mnt->mnt_parent = mnt; |
---|
846 | 856 | child_mnt->mnt_mp = mp; |
---|
847 | 857 | hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list); |
---|
.. | .. |
---|
868 | 878 | void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt) |
---|
869 | 879 | { |
---|
870 | 880 | struct mountpoint *old_mp = mnt->mnt_mp; |
---|
871 | | - struct dentry *old_mountpoint = mnt->mnt_mountpoint; |
---|
872 | 881 | struct mount *old_parent = mnt->mnt_parent; |
---|
873 | 882 | |
---|
874 | 883 | list_del_init(&mnt->mnt_child); |
---|
.. | .. |
---|
878 | 887 | attach_mnt(mnt, parent, mp); |
---|
879 | 888 | |
---|
880 | 889 | put_mountpoint(old_mp); |
---|
881 | | - |
---|
882 | | - /* |
---|
883 | | - * Safely avoid even the suggestion this code might sleep or |
---|
884 | | - * lock the mount hash by taking advantage of the knowledge that |
---|
885 | | - * mnt_change_mountpoint will not release the final reference |
---|
886 | | - * to a mountpoint. |
---|
887 | | - * |
---|
888 | | - * During mounting, the mount passed in as the parent mount will |
---|
889 | | - * continue to use the old mountpoint and during unmounting, the |
---|
890 | | - * old mountpoint will continue to exist until namespace_unlock, |
---|
891 | | - * which happens well after mnt_change_mountpoint. |
---|
892 | | - */ |
---|
893 | | - spin_lock(&old_mountpoint->d_lock); |
---|
894 | | - old_mountpoint->d_lockref.count--; |
---|
895 | | - spin_unlock(&old_mountpoint->d_lock); |
---|
896 | | - |
---|
897 | 890 | mnt_add_count(old_parent, -1); |
---|
898 | 891 | } |
---|
899 | 892 | |
---|
.. | .. |
---|
948 | 941 | return p; |
---|
949 | 942 | } |
---|
950 | 943 | |
---|
951 | | -struct vfsmount * |
---|
952 | | -vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) |
---|
| 944 | +/** |
---|
| 945 | + * vfs_create_mount - Create a mount for a configured superblock |
---|
| 946 | + * @fc: The configuration context with the superblock attached |
---|
| 947 | + * |
---|
| 948 | + * Create a mount to an already configured superblock. If necessary, the |
---|
| 949 | + * caller should invoke vfs_get_tree() before calling this. |
---|
| 950 | + * |
---|
| 951 | + * Note that this does not attach the mount to anything. |
---|
| 952 | + */ |
---|
| 953 | +struct vfsmount *vfs_create_mount(struct fs_context *fc) |
---|
953 | 954 | { |
---|
954 | 955 | struct mount *mnt; |
---|
955 | | - struct dentry *root; |
---|
956 | 956 | |
---|
957 | | - if (!type) |
---|
958 | | - return ERR_PTR(-ENODEV); |
---|
| 957 | + if (!fc->root) |
---|
| 958 | + return ERR_PTR(-EINVAL); |
---|
959 | 959 | |
---|
960 | | - mnt = alloc_vfsmnt(name); |
---|
| 960 | + mnt = alloc_vfsmnt(fc->source ?: "none"); |
---|
961 | 961 | if (!mnt) |
---|
962 | 962 | return ERR_PTR(-ENOMEM); |
---|
963 | 963 | |
---|
964 | | - if (type->alloc_mnt_data) { |
---|
965 | | - mnt->mnt.data = type->alloc_mnt_data(); |
---|
966 | | - if (!mnt->mnt.data) { |
---|
967 | | - mnt_free_id(mnt); |
---|
968 | | - free_vfsmnt(mnt); |
---|
969 | | - return ERR_PTR(-ENOMEM); |
---|
970 | | - } |
---|
971 | | - } |
---|
972 | | - if (flags & SB_KERNMOUNT) |
---|
| 964 | + if (fc->sb_flags & SB_KERNMOUNT) |
---|
973 | 965 | mnt->mnt.mnt_flags = MNT_INTERNAL; |
---|
974 | 966 | |
---|
975 | | - root = mount_fs(type, flags, name, &mnt->mnt, data); |
---|
976 | | - if (IS_ERR(root)) { |
---|
977 | | - mnt_free_id(mnt); |
---|
978 | | - free_vfsmnt(mnt); |
---|
979 | | - return ERR_CAST(root); |
---|
980 | | - } |
---|
| 967 | + atomic_inc(&fc->root->d_sb->s_active); |
---|
| 968 | + mnt->mnt.mnt_sb = fc->root->d_sb; |
---|
| 969 | + mnt->mnt.mnt_root = dget(fc->root); |
---|
| 970 | + mnt->mnt_mountpoint = mnt->mnt.mnt_root; |
---|
| 971 | + mnt->mnt_parent = mnt; |
---|
981 | 972 | |
---|
982 | | - mnt->mnt.mnt_root = root; |
---|
983 | | - mnt->mnt.mnt_sb = root->d_sb; |
---|
984 | | - mnt->mnt_mountpoint = mnt->mnt.mnt_root; |
---|
985 | | - mnt->mnt_parent = mnt; |
---|
986 | 973 | lock_mount_hash(); |
---|
987 | | - list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); |
---|
| 974 | + list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts); |
---|
988 | 975 | unlock_mount_hash(); |
---|
989 | 976 | return &mnt->mnt; |
---|
| 977 | +} |
---|
| 978 | +EXPORT_SYMBOL(vfs_create_mount); |
---|
| 979 | + |
---|
| 980 | +struct vfsmount *fc_mount(struct fs_context *fc) |
---|
| 981 | +{ |
---|
| 982 | + int err = vfs_get_tree(fc); |
---|
| 983 | + if (!err) { |
---|
| 984 | + up_write(&fc->root->d_sb->s_umount); |
---|
| 985 | + return vfs_create_mount(fc); |
---|
| 986 | + } |
---|
| 987 | + return ERR_PTR(err); |
---|
| 988 | +} |
---|
| 989 | +EXPORT_SYMBOL(fc_mount); |
---|
| 990 | + |
---|
| 991 | +struct vfsmount *vfs_kern_mount(struct file_system_type *type, |
---|
| 992 | + int flags, const char *name, |
---|
| 993 | + void *data) |
---|
| 994 | +{ |
---|
| 995 | + struct fs_context *fc; |
---|
| 996 | + struct vfsmount *mnt; |
---|
| 997 | + int ret = 0; |
---|
| 998 | + |
---|
| 999 | + if (!type) |
---|
| 1000 | + return ERR_PTR(-EINVAL); |
---|
| 1001 | + |
---|
| 1002 | + fc = fs_context_for_mount(type, flags); |
---|
| 1003 | + if (IS_ERR(fc)) |
---|
| 1004 | + return ERR_CAST(fc); |
---|
| 1005 | + |
---|
| 1006 | + if (name) |
---|
| 1007 | + ret = vfs_parse_fs_string(fc, "source", |
---|
| 1008 | + name, strlen(name)); |
---|
| 1009 | + if (!ret) |
---|
| 1010 | + ret = parse_monolithic_mount_data(fc, data); |
---|
| 1011 | + if (!ret) |
---|
| 1012 | + mnt = fc_mount(fc); |
---|
| 1013 | + else |
---|
| 1014 | + mnt = ERR_PTR(ret); |
---|
| 1015 | + |
---|
| 1016 | + put_fs_context(fc); |
---|
| 1017 | + return mnt; |
---|
990 | 1018 | } |
---|
991 | 1019 | EXPORT_SYMBOL_GPL(vfs_kern_mount); |
---|
992 | 1020 | |
---|
.. | .. |
---|
1016 | 1044 | if (!mnt) |
---|
1017 | 1045 | return ERR_PTR(-ENOMEM); |
---|
1018 | 1046 | |
---|
1019 | | - if (sb->s_op->clone_mnt_data) { |
---|
1020 | | - mnt->mnt.data = sb->s_op->clone_mnt_data(old->mnt.data); |
---|
1021 | | - if (!mnt->mnt.data) { |
---|
1022 | | - err = -ENOMEM; |
---|
1023 | | - goto out_free; |
---|
1024 | | - } |
---|
1025 | | - } |
---|
1026 | | - |
---|
1027 | 1047 | if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE)) |
---|
1028 | 1048 | mnt->mnt_group_id = 0; /* not a peer of original */ |
---|
1029 | 1049 | else |
---|
.. | .. |
---|
1037 | 1057 | |
---|
1038 | 1058 | mnt->mnt.mnt_flags = old->mnt.mnt_flags; |
---|
1039 | 1059 | mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL); |
---|
1040 | | - /* Don't allow unprivileged users to change mount flags */ |
---|
1041 | | - if (flag & CL_UNPRIVILEGED) { |
---|
1042 | | - mnt->mnt.mnt_flags |= MNT_LOCK_ATIME; |
---|
1043 | | - |
---|
1044 | | - if (mnt->mnt.mnt_flags & MNT_READONLY) |
---|
1045 | | - mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; |
---|
1046 | | - |
---|
1047 | | - if (mnt->mnt.mnt_flags & MNT_NODEV) |
---|
1048 | | - mnt->mnt.mnt_flags |= MNT_LOCK_NODEV; |
---|
1049 | | - |
---|
1050 | | - if (mnt->mnt.mnt_flags & MNT_NOSUID) |
---|
1051 | | - mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID; |
---|
1052 | | - |
---|
1053 | | - if (mnt->mnt.mnt_flags & MNT_NOEXEC) |
---|
1054 | | - mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC; |
---|
1055 | | - } |
---|
1056 | | - |
---|
1057 | | - /* Don't allow unprivileged users to reveal what is under a mount */ |
---|
1058 | | - if ((flag & CL_UNPRIVILEGED) && |
---|
1059 | | - (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire))) |
---|
1060 | | - mnt->mnt.mnt_flags |= MNT_LOCKED; |
---|
1061 | 1060 | |
---|
1062 | 1061 | atomic_inc(&sb->s_active); |
---|
1063 | 1062 | mnt->mnt.mnt_sb = sb; |
---|
.. | .. |
---|
1102 | 1101 | |
---|
1103 | 1102 | static void cleanup_mnt(struct mount *mnt) |
---|
1104 | 1103 | { |
---|
| 1104 | + struct hlist_node *p; |
---|
| 1105 | + struct mount *m; |
---|
1105 | 1106 | /* |
---|
1106 | | - * This probably indicates that somebody messed |
---|
1107 | | - * up a mnt_want/drop_write() pair. If this |
---|
1108 | | - * happens, the filesystem was probably unable |
---|
1109 | | - * to make r/w->r/o transitions. |
---|
1110 | | - */ |
---|
1111 | | - /* |
---|
| 1107 | + * The warning here probably indicates that somebody messed |
---|
| 1108 | + * up a mnt_want/drop_write() pair. If this happens, the |
---|
| 1109 | + * filesystem was probably unable to make r/w->r/o transitions. |
---|
1112 | 1110 | * The locking used to deal with mnt_count decrement provides barriers, |
---|
1113 | 1111 | * so mnt_get_writers() below is safe. |
---|
1114 | 1112 | */ |
---|
1115 | 1113 | WARN_ON(mnt_get_writers(mnt)); |
---|
1116 | 1114 | if (unlikely(mnt->mnt_pins.first)) |
---|
1117 | 1115 | mnt_pin_kill(mnt); |
---|
| 1116 | + hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) { |
---|
| 1117 | + hlist_del(&m->mnt_umount); |
---|
| 1118 | + mntput(&m->mnt); |
---|
| 1119 | + } |
---|
1118 | 1120 | fsnotify_vfsmount_delete(&mnt->mnt); |
---|
1119 | 1121 | dput(mnt->mnt.mnt_root); |
---|
1120 | 1122 | deactivate_super(mnt->mnt.mnt_sb); |
---|
.. | .. |
---|
1140 | 1142 | |
---|
1141 | 1143 | static void mntput_no_expire(struct mount *mnt) |
---|
1142 | 1144 | { |
---|
| 1145 | + LIST_HEAD(list); |
---|
| 1146 | + int count; |
---|
| 1147 | + |
---|
1143 | 1148 | rcu_read_lock(); |
---|
1144 | 1149 | if (likely(READ_ONCE(mnt->mnt_ns))) { |
---|
1145 | 1150 | /* |
---|
.. | .. |
---|
1162 | 1167 | */ |
---|
1163 | 1168 | smp_mb(); |
---|
1164 | 1169 | mnt_add_count(mnt, -1); |
---|
1165 | | - if (mnt_get_count(mnt)) { |
---|
| 1170 | + count = mnt_get_count(mnt); |
---|
| 1171 | + if (count != 0) { |
---|
| 1172 | + WARN_ON(count < 0); |
---|
1166 | 1173 | rcu_read_unlock(); |
---|
1167 | 1174 | unlock_mount_hash(); |
---|
1168 | 1175 | return; |
---|
.. | .. |
---|
1180 | 1187 | if (unlikely(!list_empty(&mnt->mnt_mounts))) { |
---|
1181 | 1188 | struct mount *p, *tmp; |
---|
1182 | 1189 | list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) { |
---|
1183 | | - umount_mnt(p); |
---|
| 1190 | + __put_mountpoint(unhash_mnt(p), &list); |
---|
| 1191 | + hlist_add_head(&p->mnt_umount, &mnt->mnt_stuck_children); |
---|
1184 | 1192 | } |
---|
1185 | 1193 | } |
---|
1186 | 1194 | unlock_mount_hash(); |
---|
| 1195 | + shrink_dentry_list(&list); |
---|
1187 | 1196 | |
---|
1188 | 1197 | if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) { |
---|
1189 | 1198 | struct task_struct *task = current; |
---|
1190 | 1199 | if (likely(!(task->flags & PF_KTHREAD))) { |
---|
1191 | 1200 | init_task_work(&mnt->mnt_rcu, __cleanup_mnt); |
---|
1192 | | - if (!task_work_add(task, &mnt->mnt_rcu, true)) |
---|
| 1201 | + if (!task_work_add(task, &mnt->mnt_rcu, TWA_RESUME)) |
---|
1193 | 1202 | return; |
---|
1194 | 1203 | } |
---|
1195 | 1204 | if (llist_add(&mnt->mnt_llist, &delayed_mntput_list)) |
---|
.. | .. |
---|
1259 | 1268 | } |
---|
1260 | 1269 | |
---|
1261 | 1270 | #ifdef CONFIG_PROC_FS |
---|
| 1271 | +static struct mount *mnt_list_next(struct mnt_namespace *ns, |
---|
| 1272 | + struct list_head *p) |
---|
| 1273 | +{ |
---|
| 1274 | + struct mount *mnt, *ret = NULL; |
---|
| 1275 | + |
---|
| 1276 | + lock_ns_list(ns); |
---|
| 1277 | + list_for_each_continue(p, &ns->list) { |
---|
| 1278 | + mnt = list_entry(p, typeof(*mnt), mnt_list); |
---|
| 1279 | + if (!mnt_is_cursor(mnt)) { |
---|
| 1280 | + ret = mnt; |
---|
| 1281 | + break; |
---|
| 1282 | + } |
---|
| 1283 | + } |
---|
| 1284 | + unlock_ns_list(ns); |
---|
| 1285 | + |
---|
| 1286 | + return ret; |
---|
| 1287 | +} |
---|
| 1288 | + |
---|
1262 | 1289 | /* iterator; we want it to have access to namespace_sem, thus here... */ |
---|
1263 | 1290 | static void *m_start(struct seq_file *m, loff_t *pos) |
---|
1264 | 1291 | { |
---|
1265 | 1292 | struct proc_mounts *p = m->private; |
---|
| 1293 | + struct list_head *prev; |
---|
1266 | 1294 | |
---|
1267 | 1295 | down_read(&namespace_sem); |
---|
1268 | | - if (p->cached_event == p->ns->event) { |
---|
1269 | | - void *v = p->cached_mount; |
---|
1270 | | - if (*pos == p->cached_index) |
---|
1271 | | - return v; |
---|
1272 | | - if (*pos == p->cached_index + 1) { |
---|
1273 | | - v = seq_list_next(v, &p->ns->list, &p->cached_index); |
---|
1274 | | - return p->cached_mount = v; |
---|
1275 | | - } |
---|
| 1296 | + if (!*pos) { |
---|
| 1297 | + prev = &p->ns->list; |
---|
| 1298 | + } else { |
---|
| 1299 | + prev = &p->cursor.mnt_list; |
---|
| 1300 | + |
---|
| 1301 | + /* Read after we'd reached the end? */ |
---|
| 1302 | + if (list_empty(prev)) |
---|
| 1303 | + return NULL; |
---|
1276 | 1304 | } |
---|
1277 | 1305 | |
---|
1278 | | - p->cached_event = p->ns->event; |
---|
1279 | | - p->cached_mount = seq_list_start(&p->ns->list, *pos); |
---|
1280 | | - p->cached_index = *pos; |
---|
1281 | | - return p->cached_mount; |
---|
| 1306 | + return mnt_list_next(p->ns, prev); |
---|
1282 | 1307 | } |
---|
1283 | 1308 | |
---|
1284 | 1309 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) |
---|
1285 | 1310 | { |
---|
1286 | 1311 | struct proc_mounts *p = m->private; |
---|
| 1312 | + struct mount *mnt = v; |
---|
1287 | 1313 | |
---|
1288 | | - p->cached_mount = seq_list_next(v, &p->ns->list, pos); |
---|
1289 | | - p->cached_index = *pos; |
---|
1290 | | - return p->cached_mount; |
---|
| 1314 | + ++*pos; |
---|
| 1315 | + return mnt_list_next(p->ns, &mnt->mnt_list); |
---|
1291 | 1316 | } |
---|
1292 | 1317 | |
---|
1293 | 1318 | static void m_stop(struct seq_file *m, void *v) |
---|
1294 | 1319 | { |
---|
| 1320 | + struct proc_mounts *p = m->private; |
---|
| 1321 | + struct mount *mnt = v; |
---|
| 1322 | + |
---|
| 1323 | + lock_ns_list(p->ns); |
---|
| 1324 | + if (mnt) |
---|
| 1325 | + list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list); |
---|
| 1326 | + else |
---|
| 1327 | + list_del_init(&p->cursor.mnt_list); |
---|
| 1328 | + unlock_ns_list(p->ns); |
---|
1295 | 1329 | up_read(&namespace_sem); |
---|
1296 | 1330 | } |
---|
1297 | 1331 | |
---|
1298 | 1332 | static int m_show(struct seq_file *m, void *v) |
---|
1299 | 1333 | { |
---|
1300 | 1334 | struct proc_mounts *p = m->private; |
---|
1301 | | - struct mount *r = list_entry(v, struct mount, mnt_list); |
---|
| 1335 | + struct mount *r = v; |
---|
1302 | 1336 | return p->show(m, &r->mnt); |
---|
1303 | 1337 | } |
---|
1304 | 1338 | |
---|
.. | .. |
---|
1308 | 1342 | .stop = m_stop, |
---|
1309 | 1343 | .show = m_show, |
---|
1310 | 1344 | }; |
---|
| 1345 | + |
---|
| 1346 | +void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor) |
---|
| 1347 | +{ |
---|
| 1348 | + down_read(&namespace_sem); |
---|
| 1349 | + lock_ns_list(ns); |
---|
| 1350 | + list_del(&cursor->mnt_list); |
---|
| 1351 | + unlock_ns_list(ns); |
---|
| 1352 | + up_read(&namespace_sem); |
---|
| 1353 | +} |
---|
1311 | 1354 | #endif /* CONFIG_PROC_FS */ |
---|
1312 | 1355 | |
---|
1313 | 1356 | /** |
---|
.. | .. |
---|
1369 | 1412 | |
---|
1370 | 1413 | EXPORT_SYMBOL(may_umount); |
---|
1371 | 1414 | |
---|
1372 | | -static HLIST_HEAD(unmounted); /* protected by namespace_sem */ |
---|
1373 | | - |
---|
1374 | 1415 | static void namespace_unlock(void) |
---|
1375 | 1416 | { |
---|
1376 | 1417 | struct hlist_head head; |
---|
| 1418 | + struct hlist_node *p; |
---|
| 1419 | + struct mount *m; |
---|
| 1420 | + LIST_HEAD(list); |
---|
1377 | 1421 | |
---|
1378 | 1422 | hlist_move_list(&unmounted, &head); |
---|
| 1423 | + list_splice_init(&ex_mountpoints, &list); |
---|
1379 | 1424 | |
---|
1380 | 1425 | up_write(&namespace_sem); |
---|
| 1426 | + |
---|
| 1427 | + shrink_dentry_list(&list); |
---|
1381 | 1428 | |
---|
1382 | 1429 | if (likely(hlist_empty(&head))) |
---|
1383 | 1430 | return; |
---|
1384 | 1431 | |
---|
1385 | | - synchronize_rcu(); |
---|
| 1432 | + synchronize_rcu_expedited(); |
---|
1386 | 1433 | |
---|
1387 | | - group_pin_kill(&head); |
---|
| 1434 | + hlist_for_each_entry_safe(m, p, &head, mnt_umount) { |
---|
| 1435 | + hlist_del(&m->mnt_umount); |
---|
| 1436 | + mntput(&m->mnt); |
---|
| 1437 | + } |
---|
1388 | 1438 | } |
---|
1389 | 1439 | |
---|
1390 | 1440 | static inline void namespace_lock(void) |
---|
.. | .. |
---|
1470 | 1520 | p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; |
---|
1471 | 1521 | |
---|
1472 | 1522 | disconnect = disconnect_mount(p, how); |
---|
1473 | | - |
---|
1474 | | - pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, |
---|
1475 | | - disconnect ? &unmounted : NULL); |
---|
1476 | 1523 | if (mnt_has_parent(p)) { |
---|
1477 | 1524 | mnt_add_count(p->mnt_parent, -1); |
---|
1478 | 1525 | if (!disconnect) { |
---|
.. | .. |
---|
1483 | 1530 | } |
---|
1484 | 1531 | } |
---|
1485 | 1532 | change_mnt_propagation(p, MS_PRIVATE); |
---|
| 1533 | + if (disconnect) |
---|
| 1534 | + hlist_add_head(&p->mnt_umount, &unmounted); |
---|
1486 | 1535 | } |
---|
1487 | 1536 | } |
---|
1488 | 1537 | |
---|
1489 | 1538 | static void shrink_submounts(struct mount *mnt); |
---|
| 1539 | + |
---|
| 1540 | +static int do_umount_root(struct super_block *sb) |
---|
| 1541 | +{ |
---|
| 1542 | + int ret = 0; |
---|
| 1543 | + |
---|
| 1544 | + down_write(&sb->s_umount); |
---|
| 1545 | + if (!sb_rdonly(sb)) { |
---|
| 1546 | + struct fs_context *fc; |
---|
| 1547 | + |
---|
| 1548 | + fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY, |
---|
| 1549 | + SB_RDONLY); |
---|
| 1550 | + if (IS_ERR(fc)) { |
---|
| 1551 | + ret = PTR_ERR(fc); |
---|
| 1552 | + } else { |
---|
| 1553 | + ret = parse_monolithic_mount_data(fc, NULL); |
---|
| 1554 | + if (!ret) |
---|
| 1555 | + ret = reconfigure_super(fc); |
---|
| 1556 | + put_fs_context(fc); |
---|
| 1557 | + } |
---|
| 1558 | + } |
---|
| 1559 | + up_write(&sb->s_umount); |
---|
| 1560 | + return ret; |
---|
| 1561 | +} |
---|
1490 | 1562 | |
---|
1491 | 1563 | static int do_umount(struct mount *mnt, int flags) |
---|
1492 | 1564 | { |
---|
.. | .. |
---|
1553 | 1625 | */ |
---|
1554 | 1626 | if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) |
---|
1555 | 1627 | return -EPERM; |
---|
1556 | | - down_write(&sb->s_umount); |
---|
1557 | | - if (!sb_rdonly(sb)) |
---|
1558 | | - retval = do_remount_sb(sb, SB_RDONLY, NULL, 0); |
---|
1559 | | - up_write(&sb->s_umount); |
---|
1560 | | - return retval; |
---|
| 1628 | + return do_umount_root(sb); |
---|
1561 | 1629 | } |
---|
1562 | 1630 | |
---|
1563 | 1631 | namespace_lock(); |
---|
.. | .. |
---|
1606 | 1674 | namespace_lock(); |
---|
1607 | 1675 | lock_mount_hash(); |
---|
1608 | 1676 | mp = lookup_mountpoint(dentry); |
---|
1609 | | - if (IS_ERR_OR_NULL(mp)) |
---|
| 1677 | + if (!mp) |
---|
1610 | 1678 | goto out_unlock; |
---|
1611 | 1679 | |
---|
1612 | 1680 | event++; |
---|
1613 | 1681 | while (!hlist_empty(&mp->m_list)) { |
---|
1614 | 1682 | mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); |
---|
1615 | 1683 | if (mnt->mnt.mnt_flags & MNT_UMOUNT) { |
---|
1616 | | - hlist_add_head(&mnt->mnt_umount.s_list, &unmounted); |
---|
1617 | 1684 | umount_mnt(mnt); |
---|
| 1685 | + hlist_add_head(&mnt->mnt_umount, &unmounted); |
---|
1618 | 1686 | } |
---|
1619 | 1687 | else umount_tree(mnt, UMOUNT_CONNECTED); |
---|
1620 | 1688 | } |
---|
.. | .. |
---|
1649 | 1717 | } |
---|
1650 | 1718 | #endif |
---|
1651 | 1719 | |
---|
1652 | | -/* |
---|
1653 | | - * Now umount can handle mount points as well as block devices. |
---|
1654 | | - * This is important for filesystems which use unnamed block devices. |
---|
1655 | | - * |
---|
1656 | | - * We now support a flag for forced unmount like the other 'big iron' |
---|
1657 | | - * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD |
---|
1658 | | - */ |
---|
1659 | | - |
---|
1660 | | -int ksys_umount(char __user *name, int flags) |
---|
| 1720 | +static int can_umount(const struct path *path, int flags) |
---|
1661 | 1721 | { |
---|
1662 | | - struct path path; |
---|
1663 | | - struct mount *mnt; |
---|
1664 | | - int retval; |
---|
1665 | | - int lookup_flags = 0; |
---|
1666 | | - |
---|
1667 | | - if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) |
---|
1668 | | - return -EINVAL; |
---|
| 1722 | + struct mount *mnt = real_mount(path->mnt); |
---|
1669 | 1723 | |
---|
1670 | 1724 | if (!may_mount()) |
---|
1671 | 1725 | return -EPERM; |
---|
| 1726 | + if (path->dentry != path->mnt->mnt_root) |
---|
| 1727 | + return -EINVAL; |
---|
| 1728 | + if (!check_mnt(mnt)) |
---|
| 1729 | + return -EINVAL; |
---|
| 1730 | + if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */ |
---|
| 1731 | + return -EINVAL; |
---|
| 1732 | + if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN)) |
---|
| 1733 | + return -EPERM; |
---|
| 1734 | + return 0; |
---|
| 1735 | +} |
---|
| 1736 | + |
---|
| 1737 | +// caller is responsible for flags being sane |
---|
| 1738 | +int path_umount(struct path *path, int flags) |
---|
| 1739 | +{ |
---|
| 1740 | + struct mount *mnt = real_mount(path->mnt); |
---|
| 1741 | + int ret; |
---|
| 1742 | + |
---|
| 1743 | + ret = can_umount(path, flags); |
---|
| 1744 | + if (!ret) |
---|
| 1745 | + ret = do_umount(mnt, flags); |
---|
| 1746 | + |
---|
| 1747 | + /* we mustn't call path_put() as that would clear mnt_expiry_mark */ |
---|
| 1748 | + dput(path->dentry); |
---|
| 1749 | + mntput_no_expire(mnt); |
---|
| 1750 | + return ret; |
---|
| 1751 | +} |
---|
| 1752 | + |
---|
| 1753 | +static int ksys_umount(char __user *name, int flags) |
---|
| 1754 | +{ |
---|
| 1755 | + int lookup_flags = LOOKUP_MOUNTPOINT; |
---|
| 1756 | + struct path path; |
---|
| 1757 | + int ret; |
---|
| 1758 | + |
---|
| 1759 | + // basic validity checks done first |
---|
| 1760 | + if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) |
---|
| 1761 | + return -EINVAL; |
---|
1672 | 1762 | |
---|
1673 | 1763 | if (!(flags & UMOUNT_NOFOLLOW)) |
---|
1674 | 1764 | lookup_flags |= LOOKUP_FOLLOW; |
---|
1675 | | - |
---|
1676 | | - retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path); |
---|
1677 | | - if (retval) |
---|
1678 | | - goto out; |
---|
1679 | | - mnt = real_mount(path.mnt); |
---|
1680 | | - retval = -EINVAL; |
---|
1681 | | - if (path.dentry != path.mnt->mnt_root) |
---|
1682 | | - goto dput_and_out; |
---|
1683 | | - if (!check_mnt(mnt)) |
---|
1684 | | - goto dput_and_out; |
---|
1685 | | - if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */ |
---|
1686 | | - goto dput_and_out; |
---|
1687 | | - retval = -EPERM; |
---|
1688 | | - if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN)) |
---|
1689 | | - goto dput_and_out; |
---|
1690 | | - |
---|
1691 | | - retval = do_umount(mnt, flags); |
---|
1692 | | -dput_and_out: |
---|
1693 | | - /* we mustn't call path_put() as that would clear mnt_expiry_mark */ |
---|
1694 | | - dput(path.dentry); |
---|
1695 | | - mntput_no_expire(mnt); |
---|
1696 | | -out: |
---|
1697 | | - return retval; |
---|
| 1765 | + ret = user_path_at(AT_FDCWD, name, lookup_flags, &path); |
---|
| 1766 | + if (ret) |
---|
| 1767 | + return ret; |
---|
| 1768 | + return path_umount(&path, flags); |
---|
1698 | 1769 | } |
---|
1699 | 1770 | |
---|
1700 | 1771 | SYSCALL_DEFINE2(umount, char __user *, name, int, flags) |
---|
.. | .. |
---|
1721 | 1792 | dentry->d_fsdata == &mntns_operations; |
---|
1722 | 1793 | } |
---|
1723 | 1794 | |
---|
1724 | | -struct mnt_namespace *to_mnt_ns(struct ns_common *ns) |
---|
| 1795 | +static struct mnt_namespace *to_mnt_ns(struct ns_common *ns) |
---|
1725 | 1796 | { |
---|
1726 | 1797 | return container_of(ns, struct mnt_namespace, ns); |
---|
| 1798 | +} |
---|
| 1799 | + |
---|
| 1800 | +struct ns_common *from_mnt_ns(struct mnt_namespace *mnt) |
---|
| 1801 | +{ |
---|
| 1802 | + return &mnt->ns; |
---|
1727 | 1803 | } |
---|
1728 | 1804 | |
---|
1729 | 1805 | static bool mnt_ns_loop(struct dentry *dentry) |
---|
.. | .. |
---|
1821 | 1897 | return &tree->mnt; |
---|
1822 | 1898 | } |
---|
1823 | 1899 | |
---|
| 1900 | +static void free_mnt_ns(struct mnt_namespace *); |
---|
| 1901 | +static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool); |
---|
| 1902 | + |
---|
| 1903 | +void dissolve_on_fput(struct vfsmount *mnt) |
---|
| 1904 | +{ |
---|
| 1905 | + struct mnt_namespace *ns; |
---|
| 1906 | + namespace_lock(); |
---|
| 1907 | + lock_mount_hash(); |
---|
| 1908 | + ns = real_mount(mnt)->mnt_ns; |
---|
| 1909 | + if (ns) { |
---|
| 1910 | + if (is_anon_ns(ns)) |
---|
| 1911 | + umount_tree(real_mount(mnt), UMOUNT_CONNECTED); |
---|
| 1912 | + else |
---|
| 1913 | + ns = NULL; |
---|
| 1914 | + } |
---|
| 1915 | + unlock_mount_hash(); |
---|
| 1916 | + namespace_unlock(); |
---|
| 1917 | + if (ns) |
---|
| 1918 | + free_mnt_ns(ns); |
---|
| 1919 | +} |
---|
| 1920 | + |
---|
1824 | 1921 | void drop_collected_mounts(struct vfsmount *mnt) |
---|
1825 | 1922 | { |
---|
1826 | 1923 | namespace_lock(); |
---|
.. | .. |
---|
1874 | 1971 | if (IS_ERR(new_mnt)) |
---|
1875 | 1972 | return ERR_CAST(new_mnt); |
---|
1876 | 1973 | |
---|
| 1974 | + /* Longterm mount to be removed by kern_unmount*() */ |
---|
| 1975 | + new_mnt->mnt_ns = MNT_NS_INTERNAL; |
---|
| 1976 | + |
---|
1877 | 1977 | return &new_mnt->mnt; |
---|
1878 | 1978 | |
---|
1879 | 1979 | invalid: |
---|
.. | .. |
---|
1895 | 1995 | return res; |
---|
1896 | 1996 | } |
---|
1897 | 1997 | return 0; |
---|
| 1998 | +} |
---|
| 1999 | + |
---|
| 2000 | +static void lock_mnt_tree(struct mount *mnt) |
---|
| 2001 | +{ |
---|
| 2002 | + struct mount *p; |
---|
| 2003 | + |
---|
| 2004 | + for (p = mnt; p; p = next_mnt(p, mnt)) { |
---|
| 2005 | + int flags = p->mnt.mnt_flags; |
---|
| 2006 | + /* Don't allow unprivileged users to change mount flags */ |
---|
| 2007 | + flags |= MNT_LOCK_ATIME; |
---|
| 2008 | + |
---|
| 2009 | + if (flags & MNT_READONLY) |
---|
| 2010 | + flags |= MNT_LOCK_READONLY; |
---|
| 2011 | + |
---|
| 2012 | + if (flags & MNT_NODEV) |
---|
| 2013 | + flags |= MNT_LOCK_NODEV; |
---|
| 2014 | + |
---|
| 2015 | + if (flags & MNT_NOSUID) |
---|
| 2016 | + flags |= MNT_LOCK_NOSUID; |
---|
| 2017 | + |
---|
| 2018 | + if (flags & MNT_NOEXEC) |
---|
| 2019 | + flags |= MNT_LOCK_NOEXEC; |
---|
| 2020 | + /* Don't allow unprivileged users to reveal what is under a mount */ |
---|
| 2021 | + if (list_empty(&p->mnt_expire)) |
---|
| 2022 | + flags |= MNT_LOCKED; |
---|
| 2023 | + p->mnt.mnt_flags = flags; |
---|
| 2024 | + } |
---|
1898 | 2025 | } |
---|
1899 | 2026 | |
---|
1900 | 2027 | static void cleanup_group_ids(struct mount *mnt, struct mount *end) |
---|
.. | .. |
---|
2012 | 2139 | static int attach_recursive_mnt(struct mount *source_mnt, |
---|
2013 | 2140 | struct mount *dest_mnt, |
---|
2014 | 2141 | struct mountpoint *dest_mp, |
---|
2015 | | - struct path *parent_path) |
---|
| 2142 | + bool moving) |
---|
2016 | 2143 | { |
---|
| 2144 | + struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; |
---|
2017 | 2145 | HLIST_HEAD(tree_list); |
---|
2018 | 2146 | struct mnt_namespace *ns = dest_mnt->mnt_ns; |
---|
2019 | 2147 | struct mountpoint *smp; |
---|
.. | .. |
---|
2029 | 2157 | return PTR_ERR(smp); |
---|
2030 | 2158 | |
---|
2031 | 2159 | /* Is there space to add these mounts to the mount namespace? */ |
---|
2032 | | - if (!parent_path) { |
---|
| 2160 | + if (!moving) { |
---|
2033 | 2161 | err = count_mounts(ns, source_mnt); |
---|
2034 | 2162 | if (err) |
---|
2035 | 2163 | goto out; |
---|
.. | .. |
---|
2048 | 2176 | } else { |
---|
2049 | 2177 | lock_mount_hash(); |
---|
2050 | 2178 | } |
---|
2051 | | - if (parent_path) { |
---|
2052 | | - detach_mnt(source_mnt, parent_path); |
---|
| 2179 | + if (moving) { |
---|
| 2180 | + unhash_mnt(source_mnt); |
---|
2053 | 2181 | attach_mnt(source_mnt, dest_mnt, dest_mp); |
---|
2054 | 2182 | touch_mnt_namespace(source_mnt->mnt_ns); |
---|
2055 | 2183 | } else { |
---|
| 2184 | + if (source_mnt->mnt_ns) { |
---|
| 2185 | + /* move from anon - the caller will destroy */ |
---|
| 2186 | + list_del_init(&source_mnt->mnt_ns->list); |
---|
| 2187 | + } |
---|
2056 | 2188 | mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); |
---|
2057 | 2189 | commit_tree(source_mnt); |
---|
2058 | 2190 | } |
---|
.. | .. |
---|
2064 | 2196 | child->mnt_mountpoint); |
---|
2065 | 2197 | if (q) |
---|
2066 | 2198 | mnt_change_mountpoint(child, smp, q); |
---|
| 2199 | + /* Notice when we are propagating across user namespaces */ |
---|
| 2200 | + if (child->mnt_parent->mnt_ns->user_ns != user_ns) |
---|
| 2201 | + lock_mnt_tree(child); |
---|
| 2202 | + child->mnt.mnt_flags &= ~MNT_LOCKED; |
---|
2067 | 2203 | commit_tree(child); |
---|
2068 | 2204 | } |
---|
2069 | 2205 | put_mountpoint(smp); |
---|
.. | .. |
---|
2139 | 2275 | d_is_dir(mnt->mnt.mnt_root)) |
---|
2140 | 2276 | return -ENOTDIR; |
---|
2141 | 2277 | |
---|
2142 | | - return attach_recursive_mnt(mnt, p, mp, NULL); |
---|
| 2278 | + return attach_recursive_mnt(mnt, p, mp, false); |
---|
2143 | 2279 | } |
---|
2144 | 2280 | |
---|
2145 | 2281 | /* |
---|
.. | .. |
---|
2194 | 2330 | return err; |
---|
2195 | 2331 | } |
---|
2196 | 2332 | |
---|
| 2333 | +static struct mount *__do_loopback(struct path *old_path, int recurse) |
---|
| 2334 | +{ |
---|
| 2335 | + struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt); |
---|
| 2336 | + |
---|
| 2337 | + if (IS_MNT_UNBINDABLE(old)) |
---|
| 2338 | + return mnt; |
---|
| 2339 | + |
---|
| 2340 | + if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations) |
---|
| 2341 | + return mnt; |
---|
| 2342 | + |
---|
| 2343 | + if (!recurse && has_locked_children(old, old_path->dentry)) |
---|
| 2344 | + return mnt; |
---|
| 2345 | + |
---|
| 2346 | + if (recurse) |
---|
| 2347 | + mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE); |
---|
| 2348 | + else |
---|
| 2349 | + mnt = clone_mnt(old, old_path->dentry, 0); |
---|
| 2350 | + |
---|
| 2351 | + if (!IS_ERR(mnt)) |
---|
| 2352 | + mnt->mnt.mnt_flags &= ~MNT_LOCKED; |
---|
| 2353 | + |
---|
| 2354 | + return mnt; |
---|
| 2355 | +} |
---|
| 2356 | + |
---|
2197 | 2357 | /* |
---|
2198 | 2358 | * do loopback mount. |
---|
2199 | 2359 | */ |
---|
.. | .. |
---|
2201 | 2361 | int recurse) |
---|
2202 | 2362 | { |
---|
2203 | 2363 | struct path old_path; |
---|
2204 | | - struct mount *mnt = NULL, *old, *parent; |
---|
| 2364 | + struct mount *mnt = NULL, *parent; |
---|
2205 | 2365 | struct mountpoint *mp; |
---|
2206 | 2366 | int err; |
---|
2207 | 2367 | if (!old_name || !*old_name) |
---|
.. | .. |
---|
2215 | 2375 | goto out; |
---|
2216 | 2376 | |
---|
2217 | 2377 | mp = lock_mount(path); |
---|
2218 | | - err = PTR_ERR(mp); |
---|
2219 | | - if (IS_ERR(mp)) |
---|
| 2378 | + if (IS_ERR(mp)) { |
---|
| 2379 | + err = PTR_ERR(mp); |
---|
2220 | 2380 | goto out; |
---|
| 2381 | + } |
---|
2221 | 2382 | |
---|
2222 | | - old = real_mount(old_path.mnt); |
---|
2223 | 2383 | parent = real_mount(path->mnt); |
---|
2224 | | - |
---|
2225 | | - err = -EINVAL; |
---|
2226 | | - if (IS_MNT_UNBINDABLE(old)) |
---|
2227 | | - goto out2; |
---|
2228 | | - |
---|
2229 | 2384 | if (!check_mnt(parent)) |
---|
2230 | 2385 | goto out2; |
---|
2231 | 2386 | |
---|
2232 | | - if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations) |
---|
2233 | | - goto out2; |
---|
2234 | | - |
---|
2235 | | - if (!recurse && has_locked_children(old, old_path.dentry)) |
---|
2236 | | - goto out2; |
---|
2237 | | - |
---|
2238 | | - if (recurse) |
---|
2239 | | - mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE); |
---|
2240 | | - else |
---|
2241 | | - mnt = clone_mnt(old, old_path.dentry, 0); |
---|
2242 | | - |
---|
| 2387 | + mnt = __do_loopback(&old_path, recurse); |
---|
2243 | 2388 | if (IS_ERR(mnt)) { |
---|
2244 | 2389 | err = PTR_ERR(mnt); |
---|
2245 | 2390 | goto out2; |
---|
2246 | 2391 | } |
---|
2247 | | - |
---|
2248 | | - mnt->mnt.mnt_flags &= ~MNT_LOCKED; |
---|
2249 | 2392 | |
---|
2250 | 2393 | err = graft_tree(mnt, parent, mp); |
---|
2251 | 2394 | if (err) { |
---|
.. | .. |
---|
2260 | 2403 | return err; |
---|
2261 | 2404 | } |
---|
2262 | 2405 | |
---|
2263 | | -static int change_mount_flags(struct vfsmount *mnt, int ms_flags) |
---|
| 2406 | +static struct file *open_detached_copy(struct path *path, bool recursive) |
---|
2264 | 2407 | { |
---|
2265 | | - int error = 0; |
---|
2266 | | - int readonly_request = 0; |
---|
| 2408 | + struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; |
---|
| 2409 | + struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true); |
---|
| 2410 | + struct mount *mnt, *p; |
---|
| 2411 | + struct file *file; |
---|
2267 | 2412 | |
---|
2268 | | - if (ms_flags & MS_RDONLY) |
---|
2269 | | - readonly_request = 1; |
---|
2270 | | - if (readonly_request == __mnt_is_readonly(mnt)) |
---|
| 2413 | + if (IS_ERR(ns)) |
---|
| 2414 | + return ERR_CAST(ns); |
---|
| 2415 | + |
---|
| 2416 | + namespace_lock(); |
---|
| 2417 | + mnt = __do_loopback(path, recursive); |
---|
| 2418 | + if (IS_ERR(mnt)) { |
---|
| 2419 | + namespace_unlock(); |
---|
| 2420 | + free_mnt_ns(ns); |
---|
| 2421 | + return ERR_CAST(mnt); |
---|
| 2422 | + } |
---|
| 2423 | + |
---|
| 2424 | + lock_mount_hash(); |
---|
| 2425 | + for (p = mnt; p; p = next_mnt(p, mnt)) { |
---|
| 2426 | + p->mnt_ns = ns; |
---|
| 2427 | + ns->mounts++; |
---|
| 2428 | + } |
---|
| 2429 | + ns->root = mnt; |
---|
| 2430 | + list_add_tail(&ns->list, &mnt->mnt_list); |
---|
| 2431 | + mntget(&mnt->mnt); |
---|
| 2432 | + unlock_mount_hash(); |
---|
| 2433 | + namespace_unlock(); |
---|
| 2434 | + |
---|
| 2435 | + mntput(path->mnt); |
---|
| 2436 | + path->mnt = &mnt->mnt; |
---|
| 2437 | + file = dentry_open(path, O_PATH, current_cred()); |
---|
| 2438 | + if (IS_ERR(file)) |
---|
| 2439 | + dissolve_on_fput(path->mnt); |
---|
| 2440 | + else |
---|
| 2441 | + file->f_mode |= FMODE_NEED_UNMOUNT; |
---|
| 2442 | + return file; |
---|
| 2443 | +} |
---|
| 2444 | + |
---|
| 2445 | +SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags) |
---|
| 2446 | +{ |
---|
| 2447 | + struct file *file; |
---|
| 2448 | + struct path path; |
---|
| 2449 | + int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW; |
---|
| 2450 | + bool detached = flags & OPEN_TREE_CLONE; |
---|
| 2451 | + int error; |
---|
| 2452 | + int fd; |
---|
| 2453 | + |
---|
| 2454 | + BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC); |
---|
| 2455 | + |
---|
| 2456 | + if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE | |
---|
| 2457 | + AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE | |
---|
| 2458 | + OPEN_TREE_CLOEXEC)) |
---|
| 2459 | + return -EINVAL; |
---|
| 2460 | + |
---|
| 2461 | + if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE) |
---|
| 2462 | + return -EINVAL; |
---|
| 2463 | + |
---|
| 2464 | + if (flags & AT_NO_AUTOMOUNT) |
---|
| 2465 | + lookup_flags &= ~LOOKUP_AUTOMOUNT; |
---|
| 2466 | + if (flags & AT_SYMLINK_NOFOLLOW) |
---|
| 2467 | + lookup_flags &= ~LOOKUP_FOLLOW; |
---|
| 2468 | + if (flags & AT_EMPTY_PATH) |
---|
| 2469 | + lookup_flags |= LOOKUP_EMPTY; |
---|
| 2470 | + |
---|
| 2471 | + if (detached && !may_mount()) |
---|
| 2472 | + return -EPERM; |
---|
| 2473 | + |
---|
| 2474 | + fd = get_unused_fd_flags(flags & O_CLOEXEC); |
---|
| 2475 | + if (fd < 0) |
---|
| 2476 | + return fd; |
---|
| 2477 | + |
---|
| 2478 | + error = user_path_at(dfd, filename, lookup_flags, &path); |
---|
| 2479 | + if (unlikely(error)) { |
---|
| 2480 | + file = ERR_PTR(error); |
---|
| 2481 | + } else { |
---|
| 2482 | + if (detached) |
---|
| 2483 | + file = open_detached_copy(&path, flags & AT_RECURSIVE); |
---|
| 2484 | + else |
---|
| 2485 | + file = dentry_open(&path, O_PATH, current_cred()); |
---|
| 2486 | + path_put(&path); |
---|
| 2487 | + } |
---|
| 2488 | + if (IS_ERR(file)) { |
---|
| 2489 | + put_unused_fd(fd); |
---|
| 2490 | + return PTR_ERR(file); |
---|
| 2491 | + } |
---|
| 2492 | + fd_install(fd, file); |
---|
| 2493 | + return fd; |
---|
| 2494 | +} |
---|
| 2495 | + |
---|
| 2496 | +/* |
---|
| 2497 | + * Don't allow locked mount flags to be cleared. |
---|
| 2498 | + * |
---|
| 2499 | + * No locks need to be held here while testing the various MNT_LOCK |
---|
| 2500 | + * flags because those flags can never be cleared once they are set. |
---|
| 2501 | + */ |
---|
| 2502 | +static bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags) |
---|
| 2503 | +{ |
---|
| 2504 | + unsigned int fl = mnt->mnt.mnt_flags; |
---|
| 2505 | + |
---|
| 2506 | + if ((fl & MNT_LOCK_READONLY) && |
---|
| 2507 | + !(mnt_flags & MNT_READONLY)) |
---|
| 2508 | + return false; |
---|
| 2509 | + |
---|
| 2510 | + if ((fl & MNT_LOCK_NODEV) && |
---|
| 2511 | + !(mnt_flags & MNT_NODEV)) |
---|
| 2512 | + return false; |
---|
| 2513 | + |
---|
| 2514 | + if ((fl & MNT_LOCK_NOSUID) && |
---|
| 2515 | + !(mnt_flags & MNT_NOSUID)) |
---|
| 2516 | + return false; |
---|
| 2517 | + |
---|
| 2518 | + if ((fl & MNT_LOCK_NOEXEC) && |
---|
| 2519 | + !(mnt_flags & MNT_NOEXEC)) |
---|
| 2520 | + return false; |
---|
| 2521 | + |
---|
| 2522 | + if ((fl & MNT_LOCK_ATIME) && |
---|
| 2523 | + ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) |
---|
| 2524 | + return false; |
---|
| 2525 | + |
---|
| 2526 | + return true; |
---|
| 2527 | +} |
---|
| 2528 | + |
---|
| 2529 | +static int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags) |
---|
| 2530 | +{ |
---|
| 2531 | + bool readonly_request = (mnt_flags & MNT_READONLY); |
---|
| 2532 | + |
---|
| 2533 | + if (readonly_request == __mnt_is_readonly(&mnt->mnt)) |
---|
2271 | 2534 | return 0; |
---|
2272 | 2535 | |
---|
2273 | 2536 | if (readonly_request) |
---|
2274 | | - error = mnt_make_readonly(real_mount(mnt)); |
---|
2275 | | - else |
---|
2276 | | - __mnt_unmake_readonly(real_mount(mnt)); |
---|
2277 | | - return error; |
---|
| 2537 | + return mnt_make_readonly(mnt); |
---|
| 2538 | + |
---|
| 2539 | + return __mnt_unmake_readonly(mnt); |
---|
| 2540 | +} |
---|
| 2541 | + |
---|
| 2542 | +/* |
---|
| 2543 | + * Update the user-settable attributes on a mount. The caller must hold |
---|
| 2544 | + * sb->s_umount for writing. |
---|
| 2545 | + */ |
---|
| 2546 | +static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags) |
---|
| 2547 | +{ |
---|
| 2548 | + lock_mount_hash(); |
---|
| 2549 | + mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK; |
---|
| 2550 | + mnt->mnt.mnt_flags = mnt_flags; |
---|
| 2551 | + touch_mnt_namespace(mnt->mnt_ns); |
---|
| 2552 | + unlock_mount_hash(); |
---|
| 2553 | +} |
---|
| 2554 | + |
---|
| 2555 | +static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *mnt) |
---|
| 2556 | +{ |
---|
| 2557 | + struct super_block *sb = mnt->mnt_sb; |
---|
| 2558 | + |
---|
| 2559 | + if (!__mnt_is_readonly(mnt) && |
---|
| 2560 | + (ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) { |
---|
| 2561 | + char *buf = (char *)__get_free_page(GFP_KERNEL); |
---|
| 2562 | + char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM); |
---|
| 2563 | + struct tm tm; |
---|
| 2564 | + |
---|
| 2565 | + time64_to_tm(sb->s_time_max, 0, &tm); |
---|
| 2566 | + |
---|
| 2567 | + pr_warn("%s filesystem being %s at %s supports timestamps until %04ld (0x%llx)\n", |
---|
| 2568 | + sb->s_type->name, |
---|
| 2569 | + is_mounted(mnt) ? "remounted" : "mounted", |
---|
| 2570 | + mntpath, |
---|
| 2571 | + tm.tm_year+1900, (unsigned long long)sb->s_time_max); |
---|
| 2572 | + |
---|
| 2573 | + free_page((unsigned long)buf); |
---|
| 2574 | + } |
---|
| 2575 | +} |
---|
| 2576 | + |
---|
| 2577 | +/* |
---|
| 2578 | + * Handle reconfiguration of the mountpoint only without alteration of the |
---|
| 2579 | + * superblock it refers to. This is triggered by specifying MS_REMOUNT|MS_BIND |
---|
| 2580 | + * to mount(2). |
---|
| 2581 | + */ |
---|
| 2582 | +static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags) |
---|
| 2583 | +{ |
---|
| 2584 | + struct super_block *sb = path->mnt->mnt_sb; |
---|
| 2585 | + struct mount *mnt = real_mount(path->mnt); |
---|
| 2586 | + int ret; |
---|
| 2587 | + |
---|
| 2588 | + if (!check_mnt(mnt)) |
---|
| 2589 | + return -EINVAL; |
---|
| 2590 | + |
---|
| 2591 | + if (path->dentry != mnt->mnt.mnt_root) |
---|
| 2592 | + return -EINVAL; |
---|
| 2593 | + |
---|
| 2594 | + if (!can_change_locked_flags(mnt, mnt_flags)) |
---|
| 2595 | + return -EPERM; |
---|
| 2596 | + |
---|
| 2597 | + down_write(&sb->s_umount); |
---|
| 2598 | + ret = change_mount_ro_state(mnt, mnt_flags); |
---|
| 2599 | + if (ret == 0) |
---|
| 2600 | + set_mount_attributes(mnt, mnt_flags); |
---|
| 2601 | + up_write(&sb->s_umount); |
---|
| 2602 | + |
---|
| 2603 | + mnt_warn_timestamp_expiry(path, &mnt->mnt); |
---|
| 2604 | + |
---|
| 2605 | + return ret; |
---|
2278 | 2606 | } |
---|
2279 | 2607 | |
---|
2280 | 2608 | /* |
---|
.. | .. |
---|
2288 | 2616 | int err; |
---|
2289 | 2617 | struct super_block *sb = path->mnt->mnt_sb; |
---|
2290 | 2618 | struct mount *mnt = real_mount(path->mnt); |
---|
| 2619 | + struct fs_context *fc; |
---|
2291 | 2620 | |
---|
2292 | 2621 | if (!check_mnt(mnt)) |
---|
2293 | 2622 | return -EINVAL; |
---|
.. | .. |
---|
2295 | 2624 | if (path->dentry != path->mnt->mnt_root) |
---|
2296 | 2625 | return -EINVAL; |
---|
2297 | 2626 | |
---|
2298 | | - /* Don't allow changing of locked mnt flags. |
---|
2299 | | - * |
---|
2300 | | - * No locks need to be held here while testing the various |
---|
2301 | | - * MNT_LOCK flags because those flags can never be cleared |
---|
2302 | | - * once they are set. |
---|
2303 | | - */ |
---|
2304 | | - if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) && |
---|
2305 | | - !(mnt_flags & MNT_READONLY)) { |
---|
| 2627 | + if (!can_change_locked_flags(mnt, mnt_flags)) |
---|
2306 | 2628 | return -EPERM; |
---|
2307 | | - } |
---|
2308 | | - if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && |
---|
2309 | | - !(mnt_flags & MNT_NODEV)) { |
---|
2310 | | - return -EPERM; |
---|
2311 | | - } |
---|
2312 | | - if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) && |
---|
2313 | | - !(mnt_flags & MNT_NOSUID)) { |
---|
2314 | | - return -EPERM; |
---|
2315 | | - } |
---|
2316 | | - if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) && |
---|
2317 | | - !(mnt_flags & MNT_NOEXEC)) { |
---|
2318 | | - return -EPERM; |
---|
2319 | | - } |
---|
2320 | | - if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) && |
---|
2321 | | - ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) { |
---|
2322 | | - return -EPERM; |
---|
2323 | | - } |
---|
2324 | 2629 | |
---|
2325 | | - err = security_sb_remount(sb, data); |
---|
2326 | | - if (err) |
---|
2327 | | - return err; |
---|
| 2630 | + fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK); |
---|
| 2631 | + if (IS_ERR(fc)) |
---|
| 2632 | + return PTR_ERR(fc); |
---|
2328 | 2633 | |
---|
2329 | | - down_write(&sb->s_umount); |
---|
2330 | | - if (ms_flags & MS_BIND) |
---|
2331 | | - err = change_mount_flags(path->mnt, ms_flags); |
---|
2332 | | - else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) |
---|
2333 | | - err = -EPERM; |
---|
2334 | | - else { |
---|
2335 | | - err = do_remount_sb2(path->mnt, sb, sb_flags, data, 0); |
---|
2336 | | - namespace_lock(); |
---|
2337 | | - lock_mount_hash(); |
---|
2338 | | - propagate_remount(mnt); |
---|
2339 | | - unlock_mount_hash(); |
---|
2340 | | - namespace_unlock(); |
---|
2341 | | - } |
---|
| 2634 | + fc->oldapi = true; |
---|
| 2635 | + err = parse_monolithic_mount_data(fc, data); |
---|
2342 | 2636 | if (!err) { |
---|
2343 | | - lock_mount_hash(); |
---|
2344 | | - mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK; |
---|
2345 | | - mnt->mnt.mnt_flags = mnt_flags; |
---|
2346 | | - touch_mnt_namespace(mnt->mnt_ns); |
---|
2347 | | - unlock_mount_hash(); |
---|
| 2637 | + down_write(&sb->s_umount); |
---|
| 2638 | + err = -EPERM; |
---|
| 2639 | + if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) { |
---|
| 2640 | + err = reconfigure_super(fc); |
---|
| 2641 | + if (!err) |
---|
| 2642 | + set_mount_attributes(mnt, mnt_flags); |
---|
| 2643 | + } |
---|
| 2644 | + up_write(&sb->s_umount); |
---|
2348 | 2645 | } |
---|
2349 | | - up_write(&sb->s_umount); |
---|
| 2646 | + |
---|
| 2647 | + mnt_warn_timestamp_expiry(path, &mnt->mnt); |
---|
| 2648 | + |
---|
| 2649 | + put_fs_context(fc); |
---|
2350 | 2650 | return err; |
---|
2351 | 2651 | } |
---|
2352 | 2652 | |
---|
.. | .. |
---|
2360 | 2660 | return 0; |
---|
2361 | 2661 | } |
---|
2362 | 2662 | |
---|
2363 | | -static int do_move_mount(struct path *path, const char *old_name) |
---|
| 2663 | +/* |
---|
| 2664 | + * Check that there aren't references to earlier/same mount namespaces in the |
---|
| 2665 | + * specified subtree. Such references can act as pins for mount namespaces |
---|
| 2666 | + * that aren't checked by the mount-cycle checking code, thereby allowing |
---|
| 2667 | + * cycles to be made. |
---|
| 2668 | + */ |
---|
| 2669 | +static bool check_for_nsfs_mounts(struct mount *subtree) |
---|
2364 | 2670 | { |
---|
2365 | | - struct path old_path, parent_path; |
---|
| 2671 | + struct mount *p; |
---|
| 2672 | + bool ret = false; |
---|
| 2673 | + |
---|
| 2674 | + lock_mount_hash(); |
---|
| 2675 | + for (p = subtree; p; p = next_mnt(p, subtree)) |
---|
| 2676 | + if (mnt_ns_loop(p->mnt.mnt_root)) |
---|
| 2677 | + goto out; |
---|
| 2678 | + |
---|
| 2679 | + ret = true; |
---|
| 2680 | +out: |
---|
| 2681 | + unlock_mount_hash(); |
---|
| 2682 | + return ret; |
---|
| 2683 | +} |
---|
| 2684 | + |
---|
| 2685 | +static int do_move_mount(struct path *old_path, struct path *new_path) |
---|
| 2686 | +{ |
---|
| 2687 | + struct mnt_namespace *ns; |
---|
2366 | 2688 | struct mount *p; |
---|
2367 | 2689 | struct mount *old; |
---|
2368 | | - struct mountpoint *mp; |
---|
| 2690 | + struct mount *parent; |
---|
| 2691 | + struct mountpoint *mp, *old_mp; |
---|
2369 | 2692 | int err; |
---|
2370 | | - if (!old_name || !*old_name) |
---|
2371 | | - return -EINVAL; |
---|
2372 | | - err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); |
---|
2373 | | - if (err) |
---|
2374 | | - return err; |
---|
| 2693 | + bool attached; |
---|
2375 | 2694 | |
---|
2376 | | - mp = lock_mount(path); |
---|
2377 | | - err = PTR_ERR(mp); |
---|
| 2695 | + mp = lock_mount(new_path); |
---|
2378 | 2696 | if (IS_ERR(mp)) |
---|
| 2697 | + return PTR_ERR(mp); |
---|
| 2698 | + |
---|
| 2699 | + old = real_mount(old_path->mnt); |
---|
| 2700 | + p = real_mount(new_path->mnt); |
---|
| 2701 | + parent = old->mnt_parent; |
---|
| 2702 | + attached = mnt_has_parent(old); |
---|
| 2703 | + old_mp = old->mnt_mp; |
---|
| 2704 | + ns = old->mnt_ns; |
---|
| 2705 | + |
---|
| 2706 | + err = -EINVAL; |
---|
| 2707 | + /* The mountpoint must be in our namespace. */ |
---|
| 2708 | + if (!check_mnt(p)) |
---|
2379 | 2709 | goto out; |
---|
2380 | 2710 | |
---|
2381 | | - old = real_mount(old_path.mnt); |
---|
2382 | | - p = real_mount(path->mnt); |
---|
| 2711 | + /* The thing moved must be mounted... */ |
---|
| 2712 | + if (!is_mounted(&old->mnt)) |
---|
| 2713 | + goto out; |
---|
2383 | 2714 | |
---|
2384 | | - err = -EINVAL; |
---|
2385 | | - if (!check_mnt(p) || !check_mnt(old)) |
---|
2386 | | - goto out1; |
---|
| 2715 | + /* ... and either ours or the root of anon namespace */ |
---|
| 2716 | + if (!(attached ? check_mnt(old) : is_anon_ns(ns))) |
---|
| 2717 | + goto out; |
---|
2387 | 2718 | |
---|
2388 | 2719 | if (old->mnt.mnt_flags & MNT_LOCKED) |
---|
2389 | | - goto out1; |
---|
| 2720 | + goto out; |
---|
2390 | 2721 | |
---|
2391 | | - err = -EINVAL; |
---|
2392 | | - if (old_path.dentry != old_path.mnt->mnt_root) |
---|
2393 | | - goto out1; |
---|
| 2722 | + if (old_path->dentry != old_path->mnt->mnt_root) |
---|
| 2723 | + goto out; |
---|
2394 | 2724 | |
---|
2395 | | - if (!mnt_has_parent(old)) |
---|
2396 | | - goto out1; |
---|
2397 | | - |
---|
2398 | | - if (d_is_dir(path->dentry) != |
---|
2399 | | - d_is_dir(old_path.dentry)) |
---|
2400 | | - goto out1; |
---|
| 2725 | + if (d_is_dir(new_path->dentry) != |
---|
| 2726 | + d_is_dir(old_path->dentry)) |
---|
| 2727 | + goto out; |
---|
2401 | 2728 | /* |
---|
2402 | 2729 | * Don't move a mount residing in a shared parent. |
---|
2403 | 2730 | */ |
---|
2404 | | - if (IS_MNT_SHARED(old->mnt_parent)) |
---|
2405 | | - goto out1; |
---|
| 2731 | + if (attached && IS_MNT_SHARED(parent)) |
---|
| 2732 | + goto out; |
---|
2406 | 2733 | /* |
---|
2407 | 2734 | * Don't move a mount tree containing unbindable mounts to a destination |
---|
2408 | 2735 | * mount which is shared. |
---|
2409 | 2736 | */ |
---|
2410 | 2737 | if (IS_MNT_SHARED(p) && tree_contains_unbindable(old)) |
---|
2411 | | - goto out1; |
---|
| 2738 | + goto out; |
---|
2412 | 2739 | err = -ELOOP; |
---|
| 2740 | + if (!check_for_nsfs_mounts(old)) |
---|
| 2741 | + goto out; |
---|
2413 | 2742 | for (; mnt_has_parent(p); p = p->mnt_parent) |
---|
2414 | 2743 | if (p == old) |
---|
2415 | | - goto out1; |
---|
| 2744 | + goto out; |
---|
2416 | 2745 | |
---|
2417 | | - err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path); |
---|
| 2746 | + err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp, |
---|
| 2747 | + attached); |
---|
2418 | 2748 | if (err) |
---|
2419 | | - goto out1; |
---|
| 2749 | + goto out; |
---|
2420 | 2750 | |
---|
2421 | 2751 | /* if the mount is moved, it should no longer be expire |
---|
2422 | 2752 | * automatically */ |
---|
2423 | 2753 | list_del_init(&old->mnt_expire); |
---|
2424 | | -out1: |
---|
2425 | | - unlock_mount(mp); |
---|
| 2754 | + if (attached) |
---|
| 2755 | + put_mountpoint(old_mp); |
---|
2426 | 2756 | out: |
---|
2427 | | - if (!err) |
---|
2428 | | - path_put(&parent_path); |
---|
2429 | | - path_put(&old_path); |
---|
| 2757 | + unlock_mount(mp); |
---|
| 2758 | + if (!err) { |
---|
| 2759 | + if (attached) |
---|
| 2760 | + mntput_no_expire(parent); |
---|
| 2761 | + else |
---|
| 2762 | + free_mnt_ns(ns); |
---|
| 2763 | + } |
---|
2430 | 2764 | return err; |
---|
2431 | 2765 | } |
---|
2432 | 2766 | |
---|
2433 | | -static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) |
---|
| 2767 | +static int do_move_mount_old(struct path *path, const char *old_name) |
---|
2434 | 2768 | { |
---|
| 2769 | + struct path old_path; |
---|
2435 | 2770 | int err; |
---|
2436 | | - const char *subtype = strchr(fstype, '.'); |
---|
2437 | | - if (subtype) { |
---|
2438 | | - subtype++; |
---|
2439 | | - err = -EINVAL; |
---|
2440 | | - if (!subtype[0]) |
---|
2441 | | - goto err; |
---|
2442 | | - } else |
---|
2443 | | - subtype = ""; |
---|
2444 | 2771 | |
---|
2445 | | - mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL); |
---|
2446 | | - err = -ENOMEM; |
---|
2447 | | - if (!mnt->mnt_sb->s_subtype) |
---|
2448 | | - goto err; |
---|
2449 | | - return mnt; |
---|
| 2772 | + if (!old_name || !*old_name) |
---|
| 2773 | + return -EINVAL; |
---|
2450 | 2774 | |
---|
2451 | | - err: |
---|
2452 | | - mntput(mnt); |
---|
2453 | | - return ERR_PTR(err); |
---|
| 2775 | + err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); |
---|
| 2776 | + if (err) |
---|
| 2777 | + return err; |
---|
| 2778 | + |
---|
| 2779 | + err = do_move_mount(&old_path, path); |
---|
| 2780 | + path_put(&old_path); |
---|
| 2781 | + return err; |
---|
2454 | 2782 | } |
---|
2455 | 2783 | |
---|
2456 | 2784 | /* |
---|
2457 | 2785 | * add a mount into a namespace's mount tree |
---|
2458 | 2786 | */ |
---|
2459 | | -static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) |
---|
| 2787 | +static int do_add_mount(struct mount *newmnt, struct mountpoint *mp, |
---|
| 2788 | + struct path *path, int mnt_flags) |
---|
2460 | 2789 | { |
---|
2461 | | - struct mountpoint *mp; |
---|
2462 | | - struct mount *parent; |
---|
2463 | | - int err; |
---|
| 2790 | + struct mount *parent = real_mount(path->mnt); |
---|
2464 | 2791 | |
---|
2465 | 2792 | mnt_flags &= ~MNT_INTERNAL_FLAGS; |
---|
2466 | 2793 | |
---|
2467 | | - mp = lock_mount(path); |
---|
2468 | | - if (IS_ERR(mp)) |
---|
2469 | | - return PTR_ERR(mp); |
---|
2470 | | - |
---|
2471 | | - parent = real_mount(path->mnt); |
---|
2472 | | - err = -EINVAL; |
---|
2473 | 2794 | if (unlikely(!check_mnt(parent))) { |
---|
2474 | 2795 | /* that's acceptable only for automounts done in private ns */ |
---|
2475 | 2796 | if (!(mnt_flags & MNT_SHRINKABLE)) |
---|
2476 | | - goto unlock; |
---|
| 2797 | + return -EINVAL; |
---|
2477 | 2798 | /* ... and for those we'd better have mountpoint still alive */ |
---|
2478 | 2799 | if (!parent->mnt_ns) |
---|
2479 | | - goto unlock; |
---|
| 2800 | + return -EINVAL; |
---|
2480 | 2801 | } |
---|
2481 | 2802 | |
---|
2482 | 2803 | /* Refuse the same filesystem on the same mount point */ |
---|
2483 | | - err = -EBUSY; |
---|
2484 | 2804 | if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb && |
---|
2485 | 2805 | path->mnt->mnt_root == path->dentry) |
---|
2486 | | - goto unlock; |
---|
| 2806 | + return -EBUSY; |
---|
2487 | 2807 | |
---|
2488 | | - err = -EINVAL; |
---|
2489 | 2808 | if (d_is_symlink(newmnt->mnt.mnt_root)) |
---|
2490 | | - goto unlock; |
---|
| 2809 | + return -EINVAL; |
---|
2491 | 2810 | |
---|
2492 | 2811 | newmnt->mnt.mnt_flags = mnt_flags; |
---|
2493 | | - err = graft_tree(newmnt, parent, mp); |
---|
2494 | | - |
---|
2495 | | -unlock: |
---|
2496 | | - unlock_mount(mp); |
---|
2497 | | - return err; |
---|
| 2812 | + return graft_tree(newmnt, parent, mp); |
---|
2498 | 2813 | } |
---|
2499 | 2814 | |
---|
2500 | | -static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags); |
---|
| 2815 | +static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags); |
---|
| 2816 | + |
---|
| 2817 | +/* |
---|
| 2818 | + * Create a new mount using a superblock configuration and request it |
---|
| 2819 | + * be added to the namespace tree. |
---|
| 2820 | + */ |
---|
| 2821 | +static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint, |
---|
| 2822 | + unsigned int mnt_flags) |
---|
| 2823 | +{ |
---|
| 2824 | + struct vfsmount *mnt; |
---|
| 2825 | + struct mountpoint *mp; |
---|
| 2826 | + struct super_block *sb = fc->root->d_sb; |
---|
| 2827 | + int error; |
---|
| 2828 | + |
---|
| 2829 | + error = security_sb_kern_mount(sb); |
---|
| 2830 | + if (!error && mount_too_revealing(sb, &mnt_flags)) |
---|
| 2831 | + error = -EPERM; |
---|
| 2832 | + |
---|
| 2833 | + if (unlikely(error)) { |
---|
| 2834 | + fc_drop_locked(fc); |
---|
| 2835 | + return error; |
---|
| 2836 | + } |
---|
| 2837 | + |
---|
| 2838 | + up_write(&sb->s_umount); |
---|
| 2839 | + |
---|
| 2840 | + mnt = vfs_create_mount(fc); |
---|
| 2841 | + if (IS_ERR(mnt)) |
---|
| 2842 | + return PTR_ERR(mnt); |
---|
| 2843 | + |
---|
| 2844 | + mnt_warn_timestamp_expiry(mountpoint, mnt); |
---|
| 2845 | + |
---|
| 2846 | + mp = lock_mount(mountpoint); |
---|
| 2847 | + if (IS_ERR(mp)) { |
---|
| 2848 | + mntput(mnt); |
---|
| 2849 | + return PTR_ERR(mp); |
---|
| 2850 | + } |
---|
| 2851 | + error = do_add_mount(real_mount(mnt), mp, mountpoint, mnt_flags); |
---|
| 2852 | + unlock_mount(mp); |
---|
| 2853 | + if (error < 0) |
---|
| 2854 | + mntput(mnt); |
---|
| 2855 | + return error; |
---|
| 2856 | +} |
---|
2501 | 2857 | |
---|
2502 | 2858 | /* |
---|
2503 | 2859 | * create a new mount for userspace and request it to be added into the |
---|
.. | .. |
---|
2507 | 2863 | int mnt_flags, const char *name, void *data) |
---|
2508 | 2864 | { |
---|
2509 | 2865 | struct file_system_type *type; |
---|
2510 | | - struct vfsmount *mnt; |
---|
2511 | | - int err; |
---|
| 2866 | + struct fs_context *fc; |
---|
| 2867 | + const char *subtype = NULL; |
---|
| 2868 | + int err = 0; |
---|
2512 | 2869 | |
---|
2513 | 2870 | if (!fstype) |
---|
2514 | 2871 | return -EINVAL; |
---|
.. | .. |
---|
2517 | 2874 | if (!type) |
---|
2518 | 2875 | return -ENODEV; |
---|
2519 | 2876 | |
---|
2520 | | - mnt = vfs_kern_mount(type, sb_flags, name, data); |
---|
2521 | | - if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && |
---|
2522 | | - !mnt->mnt_sb->s_subtype) |
---|
2523 | | - mnt = fs_set_subtype(mnt, fstype); |
---|
2524 | | - |
---|
2525 | | - put_filesystem(type); |
---|
2526 | | - if (IS_ERR(mnt)) |
---|
2527 | | - return PTR_ERR(mnt); |
---|
2528 | | - |
---|
2529 | | - if (mount_too_revealing(mnt, &mnt_flags)) { |
---|
2530 | | - mntput(mnt); |
---|
2531 | | - return -EPERM; |
---|
| 2877 | + if (type->fs_flags & FS_HAS_SUBTYPE) { |
---|
| 2878 | + subtype = strchr(fstype, '.'); |
---|
| 2879 | + if (subtype) { |
---|
| 2880 | + subtype++; |
---|
| 2881 | + if (!*subtype) { |
---|
| 2882 | + put_filesystem(type); |
---|
| 2883 | + return -EINVAL; |
---|
| 2884 | + } |
---|
| 2885 | + } |
---|
2532 | 2886 | } |
---|
2533 | 2887 | |
---|
2534 | | - err = do_add_mount(real_mount(mnt), path, mnt_flags); |
---|
2535 | | - if (err) |
---|
2536 | | - mntput(mnt); |
---|
| 2888 | + fc = fs_context_for_mount(type, sb_flags); |
---|
| 2889 | + put_filesystem(type); |
---|
| 2890 | + if (IS_ERR(fc)) |
---|
| 2891 | + return PTR_ERR(fc); |
---|
| 2892 | + |
---|
| 2893 | + if (subtype) |
---|
| 2894 | + err = vfs_parse_fs_string(fc, "subtype", |
---|
| 2895 | + subtype, strlen(subtype)); |
---|
| 2896 | + if (!err && name) |
---|
| 2897 | + err = vfs_parse_fs_string(fc, "source", name, strlen(name)); |
---|
| 2898 | + if (!err) |
---|
| 2899 | + err = parse_monolithic_mount_data(fc, data); |
---|
| 2900 | + if (!err && !mount_capable(fc)) |
---|
| 2901 | + err = -EPERM; |
---|
| 2902 | + if (!err) |
---|
| 2903 | + err = vfs_get_tree(fc); |
---|
| 2904 | + if (!err) |
---|
| 2905 | + err = do_new_mount_fc(fc, path, mnt_flags); |
---|
| 2906 | + |
---|
| 2907 | + put_fs_context(fc); |
---|
2537 | 2908 | return err; |
---|
2538 | 2909 | } |
---|
2539 | 2910 | |
---|
2540 | 2911 | int finish_automount(struct vfsmount *m, struct path *path) |
---|
2541 | 2912 | { |
---|
2542 | | - struct mount *mnt = real_mount(m); |
---|
| 2913 | + struct dentry *dentry = path->dentry; |
---|
| 2914 | + struct mountpoint *mp; |
---|
| 2915 | + struct mount *mnt; |
---|
2543 | 2916 | int err; |
---|
| 2917 | + |
---|
| 2918 | + if (!m) |
---|
| 2919 | + return 0; |
---|
| 2920 | + if (IS_ERR(m)) |
---|
| 2921 | + return PTR_ERR(m); |
---|
| 2922 | + |
---|
| 2923 | + mnt = real_mount(m); |
---|
2544 | 2924 | /* The new mount record should have at least 2 refs to prevent it being |
---|
2545 | 2925 | * expired before we get a chance to add it |
---|
2546 | 2926 | */ |
---|
2547 | 2927 | BUG_ON(mnt_get_count(mnt) < 2); |
---|
2548 | 2928 | |
---|
2549 | 2929 | if (m->mnt_sb == path->mnt->mnt_sb && |
---|
2550 | | - m->mnt_root == path->dentry) { |
---|
| 2930 | + m->mnt_root == dentry) { |
---|
2551 | 2931 | err = -ELOOP; |
---|
2552 | | - goto fail; |
---|
| 2932 | + goto discard; |
---|
2553 | 2933 | } |
---|
2554 | 2934 | |
---|
2555 | | - err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE); |
---|
2556 | | - if (!err) |
---|
2557 | | - return 0; |
---|
2558 | | -fail: |
---|
| 2935 | + /* |
---|
| 2936 | + * we don't want to use lock_mount() - in this case finding something |
---|
| 2937 | + * that overmounts our mountpoint to be means "quitely drop what we've |
---|
| 2938 | + * got", not "try to mount it on top". |
---|
| 2939 | + */ |
---|
| 2940 | + inode_lock(dentry->d_inode); |
---|
| 2941 | + namespace_lock(); |
---|
| 2942 | + if (unlikely(cant_mount(dentry))) { |
---|
| 2943 | + err = -ENOENT; |
---|
| 2944 | + goto discard_locked; |
---|
| 2945 | + } |
---|
| 2946 | + rcu_read_lock(); |
---|
| 2947 | + if (unlikely(__lookup_mnt(path->mnt, dentry))) { |
---|
| 2948 | + rcu_read_unlock(); |
---|
| 2949 | + err = 0; |
---|
| 2950 | + goto discard_locked; |
---|
| 2951 | + } |
---|
| 2952 | + rcu_read_unlock(); |
---|
| 2953 | + mp = get_mountpoint(dentry); |
---|
| 2954 | + if (IS_ERR(mp)) { |
---|
| 2955 | + err = PTR_ERR(mp); |
---|
| 2956 | + goto discard_locked; |
---|
| 2957 | + } |
---|
| 2958 | + |
---|
| 2959 | + err = do_add_mount(mnt, mp, path, path->mnt->mnt_flags | MNT_SHRINKABLE); |
---|
| 2960 | + unlock_mount(mp); |
---|
| 2961 | + if (unlikely(err)) |
---|
| 2962 | + goto discard; |
---|
| 2963 | + mntput(m); |
---|
| 2964 | + return 0; |
---|
| 2965 | + |
---|
| 2966 | +discard_locked: |
---|
| 2967 | + namespace_unlock(); |
---|
| 2968 | + inode_unlock(dentry->d_inode); |
---|
| 2969 | +discard: |
---|
2559 | 2970 | /* remove m from any expiration list it may be on */ |
---|
2560 | 2971 | if (!list_empty(&mnt->mnt_expire)) { |
---|
2561 | 2972 | namespace_lock(); |
---|
.. | .. |
---|
2689 | 3100 | } |
---|
2690 | 3101 | } |
---|
2691 | 3102 | |
---|
2692 | | -/* |
---|
2693 | | - * Some copy_from_user() implementations do not return the exact number of |
---|
2694 | | - * bytes remaining to copy on a fault. But copy_mount_options() requires that. |
---|
2695 | | - * Note that this function differs from copy_from_user() in that it will oops |
---|
2696 | | - * on bad values of `to', rather than returning a short copy. |
---|
2697 | | - */ |
---|
2698 | | -static long exact_copy_from_user(void *to, const void __user * from, |
---|
2699 | | - unsigned long n) |
---|
| 3103 | +static void *copy_mount_options(const void __user * data) |
---|
2700 | 3104 | { |
---|
2701 | | - char *t = to; |
---|
2702 | | - const char __user *f = from; |
---|
2703 | | - char c; |
---|
2704 | | - |
---|
2705 | | - if (!access_ok(VERIFY_READ, from, n)) |
---|
2706 | | - return n; |
---|
2707 | | - |
---|
2708 | | - while (n) { |
---|
2709 | | - if (__get_user(c, f)) { |
---|
2710 | | - memset(t, 0, n); |
---|
2711 | | - break; |
---|
2712 | | - } |
---|
2713 | | - *t++ = c; |
---|
2714 | | - f++; |
---|
2715 | | - n--; |
---|
2716 | | - } |
---|
2717 | | - return n; |
---|
2718 | | -} |
---|
2719 | | - |
---|
2720 | | -void *copy_mount_options(const void __user * data) |
---|
2721 | | -{ |
---|
2722 | | - int i; |
---|
2723 | | - unsigned long size; |
---|
2724 | 3105 | char *copy; |
---|
| 3106 | + unsigned left, offset; |
---|
2725 | 3107 | |
---|
2726 | 3108 | if (!data) |
---|
2727 | 3109 | return NULL; |
---|
.. | .. |
---|
2730 | 3112 | if (!copy) |
---|
2731 | 3113 | return ERR_PTR(-ENOMEM); |
---|
2732 | 3114 | |
---|
2733 | | - /* We only care that *some* data at the address the user |
---|
2734 | | - * gave us is valid. Just in case, we'll zero |
---|
2735 | | - * the remainder of the page. |
---|
2736 | | - */ |
---|
2737 | | - /* copy_from_user cannot cross TASK_SIZE ! */ |
---|
2738 | | - size = TASK_SIZE - (unsigned long)untagged_addr(data); |
---|
2739 | | - if (size > PAGE_SIZE) |
---|
2740 | | - size = PAGE_SIZE; |
---|
| 3115 | + left = copy_from_user(copy, data, PAGE_SIZE); |
---|
2741 | 3116 | |
---|
2742 | | - i = size - exact_copy_from_user(copy, data, size); |
---|
2743 | | - if (!i) { |
---|
| 3117 | + /* |
---|
| 3118 | + * Not all architectures have an exact copy_from_user(). Resort to |
---|
| 3119 | + * byte at a time. |
---|
| 3120 | + */ |
---|
| 3121 | + offset = PAGE_SIZE - left; |
---|
| 3122 | + while (left) { |
---|
| 3123 | + char c; |
---|
| 3124 | + if (get_user(c, (const char __user *)data + offset)) |
---|
| 3125 | + break; |
---|
| 3126 | + copy[offset] = c; |
---|
| 3127 | + left--; |
---|
| 3128 | + offset++; |
---|
| 3129 | + } |
---|
| 3130 | + |
---|
| 3131 | + if (left == PAGE_SIZE) { |
---|
2744 | 3132 | kfree(copy); |
---|
2745 | 3133 | return ERR_PTR(-EFAULT); |
---|
2746 | 3134 | } |
---|
2747 | | - if (i != PAGE_SIZE) |
---|
2748 | | - memset(copy + i, 0, PAGE_SIZE - i); |
---|
| 3135 | + |
---|
2749 | 3136 | return copy; |
---|
2750 | 3137 | } |
---|
2751 | 3138 | |
---|
2752 | | -char *copy_mount_string(const void __user *data) |
---|
| 3139 | +static char *copy_mount_string(const void __user *data) |
---|
2753 | 3140 | { |
---|
2754 | | - return data ? strndup_user(data, PAGE_SIZE) : NULL; |
---|
| 3141 | + return data ? strndup_user(data, PATH_MAX) : NULL; |
---|
2755 | 3142 | } |
---|
2756 | 3143 | |
---|
2757 | 3144 | /* |
---|
.. | .. |
---|
2768 | 3155 | * Therefore, if this magic number is present, it carries no information |
---|
2769 | 3156 | * and must be discarded. |
---|
2770 | 3157 | */ |
---|
2771 | | -long do_mount(const char *dev_name, const char __user *dir_name, |
---|
| 3158 | +int path_mount(const char *dev_name, struct path *path, |
---|
2772 | 3159 | const char *type_page, unsigned long flags, void *data_page) |
---|
2773 | 3160 | { |
---|
2774 | | - struct path path; |
---|
2775 | 3161 | unsigned int mnt_flags = 0, sb_flags; |
---|
2776 | | - int retval = 0; |
---|
| 3162 | + int ret; |
---|
2777 | 3163 | |
---|
2778 | 3164 | /* Discard magic */ |
---|
2779 | 3165 | if ((flags & MS_MGC_MSK) == MS_MGC_VAL) |
---|
.. | .. |
---|
2786 | 3172 | if (flags & MS_NOUSER) |
---|
2787 | 3173 | return -EINVAL; |
---|
2788 | 3174 | |
---|
2789 | | - /* ... and get the mountpoint */ |
---|
2790 | | - retval = user_path(dir_name, &path); |
---|
2791 | | - if (retval) |
---|
2792 | | - return retval; |
---|
2793 | | - |
---|
2794 | | - retval = security_sb_mount(dev_name, &path, |
---|
2795 | | - type_page, flags, data_page); |
---|
2796 | | - if (!retval && !may_mount()) |
---|
2797 | | - retval = -EPERM; |
---|
2798 | | - if (!retval && (flags & SB_MANDLOCK) && !may_mandlock()) |
---|
2799 | | - retval = -EPERM; |
---|
2800 | | - if (retval) |
---|
2801 | | - goto dput_out; |
---|
| 3175 | + ret = security_sb_mount(dev_name, path, type_page, flags, data_page); |
---|
| 3176 | + if (ret) |
---|
| 3177 | + return ret; |
---|
| 3178 | + if (!may_mount()) |
---|
| 3179 | + return -EPERM; |
---|
| 3180 | + if ((flags & SB_MANDLOCK) && !may_mandlock()) |
---|
| 3181 | + return -EPERM; |
---|
2802 | 3182 | |
---|
2803 | 3183 | /* Default to relatime unless overriden */ |
---|
2804 | 3184 | if (!(flags & MS_NOATIME)) |
---|
.. | .. |
---|
2819 | 3199 | mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME); |
---|
2820 | 3200 | if (flags & MS_RDONLY) |
---|
2821 | 3201 | mnt_flags |= MNT_READONLY; |
---|
| 3202 | + if (flags & MS_NOSYMFOLLOW) |
---|
| 3203 | + mnt_flags |= MNT_NOSYMFOLLOW; |
---|
2822 | 3204 | |
---|
2823 | 3205 | /* The default atime for remount is preservation */ |
---|
2824 | 3206 | if ((flags & MS_REMOUNT) && |
---|
2825 | 3207 | ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME | |
---|
2826 | 3208 | MS_STRICTATIME)) == 0)) { |
---|
2827 | 3209 | mnt_flags &= ~MNT_ATIME_MASK; |
---|
2828 | | - mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK; |
---|
| 3210 | + mnt_flags |= path->mnt->mnt_flags & MNT_ATIME_MASK; |
---|
2829 | 3211 | } |
---|
2830 | 3212 | |
---|
2831 | 3213 | sb_flags = flags & (SB_RDONLY | |
---|
.. | .. |
---|
2837 | 3219 | SB_LAZYTIME | |
---|
2838 | 3220 | SB_I_VERSION); |
---|
2839 | 3221 | |
---|
| 3222 | + if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND)) |
---|
| 3223 | + return do_reconfigure_mnt(path, mnt_flags); |
---|
2840 | 3224 | if (flags & MS_REMOUNT) |
---|
2841 | | - retval = do_remount(&path, flags, sb_flags, mnt_flags, |
---|
2842 | | - data_page); |
---|
2843 | | - else if (flags & MS_BIND) |
---|
2844 | | - retval = do_loopback(&path, dev_name, flags & MS_REC); |
---|
2845 | | - else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) |
---|
2846 | | - retval = do_change_type(&path, flags); |
---|
2847 | | - else if (flags & MS_MOVE) |
---|
2848 | | - retval = do_move_mount(&path, dev_name); |
---|
2849 | | - else |
---|
2850 | | - retval = do_new_mount(&path, type_page, sb_flags, mnt_flags, |
---|
2851 | | - dev_name, data_page); |
---|
2852 | | -dput_out: |
---|
| 3225 | + return do_remount(path, flags, sb_flags, mnt_flags, data_page); |
---|
| 3226 | + if (flags & MS_BIND) |
---|
| 3227 | + return do_loopback(path, dev_name, flags & MS_REC); |
---|
| 3228 | + if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) |
---|
| 3229 | + return do_change_type(path, flags); |
---|
| 3230 | + if (flags & MS_MOVE) |
---|
| 3231 | + return do_move_mount_old(path, dev_name); |
---|
| 3232 | + |
---|
| 3233 | + return do_new_mount(path, type_page, sb_flags, mnt_flags, dev_name, |
---|
| 3234 | + data_page); |
---|
| 3235 | +} |
---|
| 3236 | + |
---|
| 3237 | +long do_mount(const char *dev_name, const char __user *dir_name, |
---|
| 3238 | + const char *type_page, unsigned long flags, void *data_page) |
---|
| 3239 | +{ |
---|
| 3240 | + struct path path; |
---|
| 3241 | + int ret; |
---|
| 3242 | + |
---|
| 3243 | + ret = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path); |
---|
| 3244 | + if (ret) |
---|
| 3245 | + return ret; |
---|
| 3246 | + ret = path_mount(dev_name, &path, type_page, flags, data_page); |
---|
2853 | 3247 | path_put(&path); |
---|
2854 | | - return retval; |
---|
| 3248 | + return ret; |
---|
2855 | 3249 | } |
---|
2856 | 3250 | |
---|
2857 | 3251 | static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns) |
---|
.. | .. |
---|
2866 | 3260 | |
---|
2867 | 3261 | static void free_mnt_ns(struct mnt_namespace *ns) |
---|
2868 | 3262 | { |
---|
2869 | | - ns_free_inum(&ns->ns); |
---|
| 3263 | + if (!is_anon_ns(ns)) |
---|
| 3264 | + ns_free_inum(&ns->ns); |
---|
2870 | 3265 | dec_mnt_namespaces(ns->ucounts); |
---|
2871 | 3266 | put_user_ns(ns->user_ns); |
---|
2872 | 3267 | kfree(ns); |
---|
.. | .. |
---|
2881 | 3276 | */ |
---|
2882 | 3277 | static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); |
---|
2883 | 3278 | |
---|
2884 | | -static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) |
---|
| 3279 | +static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon) |
---|
2885 | 3280 | { |
---|
2886 | 3281 | struct mnt_namespace *new_ns; |
---|
2887 | 3282 | struct ucounts *ucounts; |
---|
.. | .. |
---|
2891 | 3286 | if (!ucounts) |
---|
2892 | 3287 | return ERR_PTR(-ENOSPC); |
---|
2893 | 3288 | |
---|
2894 | | - new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); |
---|
| 3289 | + new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL); |
---|
2895 | 3290 | if (!new_ns) { |
---|
2896 | 3291 | dec_mnt_namespaces(ucounts); |
---|
2897 | 3292 | return ERR_PTR(-ENOMEM); |
---|
2898 | 3293 | } |
---|
2899 | | - ret = ns_alloc_inum(&new_ns->ns); |
---|
2900 | | - if (ret) { |
---|
2901 | | - kfree(new_ns); |
---|
2902 | | - dec_mnt_namespaces(ucounts); |
---|
2903 | | - return ERR_PTR(ret); |
---|
| 3294 | + if (!anon) { |
---|
| 3295 | + ret = ns_alloc_inum(&new_ns->ns); |
---|
| 3296 | + if (ret) { |
---|
| 3297 | + kfree(new_ns); |
---|
| 3298 | + dec_mnt_namespaces(ucounts); |
---|
| 3299 | + return ERR_PTR(ret); |
---|
| 3300 | + } |
---|
2904 | 3301 | } |
---|
2905 | 3302 | new_ns->ns.ops = &mntns_operations; |
---|
2906 | | - new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); |
---|
| 3303 | + if (!anon) |
---|
| 3304 | + new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); |
---|
2907 | 3305 | atomic_set(&new_ns->count, 1); |
---|
2908 | | - new_ns->root = NULL; |
---|
2909 | 3306 | INIT_LIST_HEAD(&new_ns->list); |
---|
2910 | 3307 | init_waitqueue_head(&new_ns->poll); |
---|
2911 | | - new_ns->event = 0; |
---|
| 3308 | + spin_lock_init(&new_ns->ns_lock); |
---|
2912 | 3309 | new_ns->user_ns = get_user_ns(user_ns); |
---|
2913 | 3310 | new_ns->ucounts = ucounts; |
---|
2914 | | - new_ns->mounts = 0; |
---|
2915 | | - new_ns->pending_mounts = 0; |
---|
2916 | 3311 | return new_ns; |
---|
2917 | 3312 | } |
---|
2918 | 3313 | |
---|
.. | .. |
---|
2936 | 3331 | |
---|
2937 | 3332 | old = ns->root; |
---|
2938 | 3333 | |
---|
2939 | | - new_ns = alloc_mnt_ns(user_ns); |
---|
| 3334 | + new_ns = alloc_mnt_ns(user_ns, false); |
---|
2940 | 3335 | if (IS_ERR(new_ns)) |
---|
2941 | 3336 | return new_ns; |
---|
2942 | 3337 | |
---|
.. | .. |
---|
2944 | 3339 | /* First pass: copy the tree topology */ |
---|
2945 | 3340 | copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; |
---|
2946 | 3341 | if (user_ns != ns->user_ns) |
---|
2947 | | - copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; |
---|
| 3342 | + copy_flags |= CL_SHARED_TO_SLAVE; |
---|
2948 | 3343 | new = copy_tree(old, old->mnt.mnt_root, copy_flags); |
---|
2949 | 3344 | if (IS_ERR(new)) { |
---|
2950 | 3345 | namespace_unlock(); |
---|
2951 | 3346 | free_mnt_ns(new_ns); |
---|
2952 | 3347 | return ERR_CAST(new); |
---|
| 3348 | + } |
---|
| 3349 | + if (user_ns != ns->user_ns) { |
---|
| 3350 | + lock_mount_hash(); |
---|
| 3351 | + lock_mnt_tree(new); |
---|
| 3352 | + unlock_mount_hash(); |
---|
2953 | 3353 | } |
---|
2954 | 3354 | new_ns->root = new; |
---|
2955 | 3355 | list_add_tail(&new_ns->list, &new->mnt_list); |
---|
.. | .. |
---|
2991 | 3391 | return new_ns; |
---|
2992 | 3392 | } |
---|
2993 | 3393 | |
---|
2994 | | -/** |
---|
2995 | | - * create_mnt_ns - creates a private namespace and adds a root filesystem |
---|
2996 | | - * @mnt: pointer to the new root filesystem mountpoint |
---|
2997 | | - */ |
---|
2998 | | -static struct mnt_namespace *create_mnt_ns(struct vfsmount *m) |
---|
| 3394 | +struct dentry *mount_subtree(struct vfsmount *m, const char *name) |
---|
2999 | 3395 | { |
---|
3000 | | - struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns); |
---|
3001 | | - if (!IS_ERR(new_ns)) { |
---|
3002 | | - struct mount *mnt = real_mount(m); |
---|
3003 | | - mnt->mnt_ns = new_ns; |
---|
3004 | | - new_ns->root = mnt; |
---|
3005 | | - new_ns->mounts++; |
---|
3006 | | - list_add(&mnt->mnt_list, &new_ns->list); |
---|
3007 | | - } else { |
---|
3008 | | - mntput(m); |
---|
3009 | | - } |
---|
3010 | | - return new_ns; |
---|
3011 | | -} |
---|
3012 | | - |
---|
3013 | | -struct dentry *mount_subtree(struct vfsmount *mnt, const char *name) |
---|
3014 | | -{ |
---|
| 3396 | + struct mount *mnt = real_mount(m); |
---|
3015 | 3397 | struct mnt_namespace *ns; |
---|
3016 | 3398 | struct super_block *s; |
---|
3017 | 3399 | struct path path; |
---|
3018 | 3400 | int err; |
---|
3019 | 3401 | |
---|
3020 | | - ns = create_mnt_ns(mnt); |
---|
3021 | | - if (IS_ERR(ns)) |
---|
| 3402 | + ns = alloc_mnt_ns(&init_user_ns, true); |
---|
| 3403 | + if (IS_ERR(ns)) { |
---|
| 3404 | + mntput(m); |
---|
3022 | 3405 | return ERR_CAST(ns); |
---|
| 3406 | + } |
---|
| 3407 | + mnt->mnt_ns = ns; |
---|
| 3408 | + ns->root = mnt; |
---|
| 3409 | + ns->mounts++; |
---|
| 3410 | + list_add(&mnt->mnt_list, &ns->list); |
---|
3023 | 3411 | |
---|
3024 | | - err = vfs_path_lookup(mnt->mnt_root, mnt, |
---|
| 3412 | + err = vfs_path_lookup(m->mnt_root, m, |
---|
3025 | 3413 | name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); |
---|
3026 | 3414 | |
---|
3027 | 3415 | put_mnt_ns(ns); |
---|
.. | .. |
---|
3040 | 3428 | } |
---|
3041 | 3429 | EXPORT_SYMBOL(mount_subtree); |
---|
3042 | 3430 | |
---|
3043 | | -int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type, |
---|
3044 | | - unsigned long flags, void __user *data) |
---|
| 3431 | +SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, |
---|
| 3432 | + char __user *, type, unsigned long, flags, void __user *, data) |
---|
3045 | 3433 | { |
---|
3046 | 3434 | int ret; |
---|
3047 | 3435 | char *kernel_type; |
---|
.. | .. |
---|
3074 | 3462 | return ret; |
---|
3075 | 3463 | } |
---|
3076 | 3464 | |
---|
3077 | | -SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, |
---|
3078 | | - char __user *, type, unsigned long, flags, void __user *, data) |
---|
| 3465 | +/* |
---|
| 3466 | + * Create a kernel mount representation for a new, prepared superblock |
---|
| 3467 | + * (specified by fs_fd) and attach to an open_tree-like file descriptor. |
---|
| 3468 | + */ |
---|
| 3469 | +SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags, |
---|
| 3470 | + unsigned int, attr_flags) |
---|
3079 | 3471 | { |
---|
3080 | | - return ksys_mount(dev_name, dir_name, type, flags, data); |
---|
| 3472 | + struct mnt_namespace *ns; |
---|
| 3473 | + struct fs_context *fc; |
---|
| 3474 | + struct file *file; |
---|
| 3475 | + struct path newmount; |
---|
| 3476 | + struct mount *mnt; |
---|
| 3477 | + struct fd f; |
---|
| 3478 | + unsigned int mnt_flags = 0; |
---|
| 3479 | + long ret; |
---|
| 3480 | + |
---|
| 3481 | + if (!may_mount()) |
---|
| 3482 | + return -EPERM; |
---|
| 3483 | + |
---|
| 3484 | + if ((flags & ~(FSMOUNT_CLOEXEC)) != 0) |
---|
| 3485 | + return -EINVAL; |
---|
| 3486 | + |
---|
| 3487 | + if (attr_flags & ~(MOUNT_ATTR_RDONLY | |
---|
| 3488 | + MOUNT_ATTR_NOSUID | |
---|
| 3489 | + MOUNT_ATTR_NODEV | |
---|
| 3490 | + MOUNT_ATTR_NOEXEC | |
---|
| 3491 | + MOUNT_ATTR__ATIME | |
---|
| 3492 | + MOUNT_ATTR_NODIRATIME)) |
---|
| 3493 | + return -EINVAL; |
---|
| 3494 | + |
---|
| 3495 | + if (attr_flags & MOUNT_ATTR_RDONLY) |
---|
| 3496 | + mnt_flags |= MNT_READONLY; |
---|
| 3497 | + if (attr_flags & MOUNT_ATTR_NOSUID) |
---|
| 3498 | + mnt_flags |= MNT_NOSUID; |
---|
| 3499 | + if (attr_flags & MOUNT_ATTR_NODEV) |
---|
| 3500 | + mnt_flags |= MNT_NODEV; |
---|
| 3501 | + if (attr_flags & MOUNT_ATTR_NOEXEC) |
---|
| 3502 | + mnt_flags |= MNT_NOEXEC; |
---|
| 3503 | + if (attr_flags & MOUNT_ATTR_NODIRATIME) |
---|
| 3504 | + mnt_flags |= MNT_NODIRATIME; |
---|
| 3505 | + |
---|
| 3506 | + switch (attr_flags & MOUNT_ATTR__ATIME) { |
---|
| 3507 | + case MOUNT_ATTR_STRICTATIME: |
---|
| 3508 | + break; |
---|
| 3509 | + case MOUNT_ATTR_NOATIME: |
---|
| 3510 | + mnt_flags |= MNT_NOATIME; |
---|
| 3511 | + break; |
---|
| 3512 | + case MOUNT_ATTR_RELATIME: |
---|
| 3513 | + mnt_flags |= MNT_RELATIME; |
---|
| 3514 | + break; |
---|
| 3515 | + default: |
---|
| 3516 | + return -EINVAL; |
---|
| 3517 | + } |
---|
| 3518 | + |
---|
| 3519 | + f = fdget(fs_fd); |
---|
| 3520 | + if (!f.file) |
---|
| 3521 | + return -EBADF; |
---|
| 3522 | + |
---|
| 3523 | + ret = -EINVAL; |
---|
| 3524 | + if (f.file->f_op != &fscontext_fops) |
---|
| 3525 | + goto err_fsfd; |
---|
| 3526 | + |
---|
| 3527 | + fc = f.file->private_data; |
---|
| 3528 | + |
---|
| 3529 | + ret = mutex_lock_interruptible(&fc->uapi_mutex); |
---|
| 3530 | + if (ret < 0) |
---|
| 3531 | + goto err_fsfd; |
---|
| 3532 | + |
---|
| 3533 | + /* There must be a valid superblock or we can't mount it */ |
---|
| 3534 | + ret = -EINVAL; |
---|
| 3535 | + if (!fc->root) |
---|
| 3536 | + goto err_unlock; |
---|
| 3537 | + |
---|
| 3538 | + ret = -EPERM; |
---|
| 3539 | + if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) { |
---|
| 3540 | + pr_warn("VFS: Mount too revealing\n"); |
---|
| 3541 | + goto err_unlock; |
---|
| 3542 | + } |
---|
| 3543 | + |
---|
| 3544 | + ret = -EBUSY; |
---|
| 3545 | + if (fc->phase != FS_CONTEXT_AWAITING_MOUNT) |
---|
| 3546 | + goto err_unlock; |
---|
| 3547 | + |
---|
| 3548 | + ret = -EPERM; |
---|
| 3549 | + if ((fc->sb_flags & SB_MANDLOCK) && !may_mandlock()) |
---|
| 3550 | + goto err_unlock; |
---|
| 3551 | + |
---|
| 3552 | + newmount.mnt = vfs_create_mount(fc); |
---|
| 3553 | + if (IS_ERR(newmount.mnt)) { |
---|
| 3554 | + ret = PTR_ERR(newmount.mnt); |
---|
| 3555 | + goto err_unlock; |
---|
| 3556 | + } |
---|
| 3557 | + newmount.dentry = dget(fc->root); |
---|
| 3558 | + newmount.mnt->mnt_flags = mnt_flags; |
---|
| 3559 | + |
---|
| 3560 | + /* We've done the mount bit - now move the file context into more or |
---|
| 3561 | + * less the same state as if we'd done an fspick(). We don't want to |
---|
| 3562 | + * do any memory allocation or anything like that at this point as we |
---|
| 3563 | + * don't want to have to handle any errors incurred. |
---|
| 3564 | + */ |
---|
| 3565 | + vfs_clean_context(fc); |
---|
| 3566 | + |
---|
| 3567 | + ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true); |
---|
| 3568 | + if (IS_ERR(ns)) { |
---|
| 3569 | + ret = PTR_ERR(ns); |
---|
| 3570 | + goto err_path; |
---|
| 3571 | + } |
---|
| 3572 | + mnt = real_mount(newmount.mnt); |
---|
| 3573 | + mnt->mnt_ns = ns; |
---|
| 3574 | + ns->root = mnt; |
---|
| 3575 | + ns->mounts = 1; |
---|
| 3576 | + list_add(&mnt->mnt_list, &ns->list); |
---|
| 3577 | + mntget(newmount.mnt); |
---|
| 3578 | + |
---|
| 3579 | + /* Attach to an apparent O_PATH fd with a note that we need to unmount |
---|
| 3580 | + * it, not just simply put it. |
---|
| 3581 | + */ |
---|
| 3582 | + file = dentry_open(&newmount, O_PATH, fc->cred); |
---|
| 3583 | + if (IS_ERR(file)) { |
---|
| 3584 | + dissolve_on_fput(newmount.mnt); |
---|
| 3585 | + ret = PTR_ERR(file); |
---|
| 3586 | + goto err_path; |
---|
| 3587 | + } |
---|
| 3588 | + file->f_mode |= FMODE_NEED_UNMOUNT; |
---|
| 3589 | + |
---|
| 3590 | + ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0); |
---|
| 3591 | + if (ret >= 0) |
---|
| 3592 | + fd_install(ret, file); |
---|
| 3593 | + else |
---|
| 3594 | + fput(file); |
---|
| 3595 | + |
---|
| 3596 | +err_path: |
---|
| 3597 | + path_put(&newmount); |
---|
| 3598 | +err_unlock: |
---|
| 3599 | + mutex_unlock(&fc->uapi_mutex); |
---|
| 3600 | +err_fsfd: |
---|
| 3601 | + fdput(f); |
---|
| 3602 | + return ret; |
---|
| 3603 | +} |
---|
| 3604 | + |
---|
| 3605 | +/* |
---|
| 3606 | + * Move a mount from one place to another. In combination with |
---|
| 3607 | + * fsopen()/fsmount() this is used to install a new mount and in combination |
---|
| 3608 | + * with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be used to copy |
---|
| 3609 | + * a mount subtree. |
---|
| 3610 | + * |
---|
| 3611 | + * Note the flags value is a combination of MOVE_MOUNT_* flags. |
---|
| 3612 | + */ |
---|
| 3613 | +SYSCALL_DEFINE5(move_mount, |
---|
| 3614 | + int, from_dfd, const char __user *, from_pathname, |
---|
| 3615 | + int, to_dfd, const char __user *, to_pathname, |
---|
| 3616 | + unsigned int, flags) |
---|
| 3617 | +{ |
---|
| 3618 | + struct path from_path, to_path; |
---|
| 3619 | + unsigned int lflags; |
---|
| 3620 | + int ret = 0; |
---|
| 3621 | + |
---|
| 3622 | + if (!may_mount()) |
---|
| 3623 | + return -EPERM; |
---|
| 3624 | + |
---|
| 3625 | + if (flags & ~MOVE_MOUNT__MASK) |
---|
| 3626 | + return -EINVAL; |
---|
| 3627 | + |
---|
| 3628 | + /* If someone gives a pathname, they aren't permitted to move |
---|
| 3629 | + * from an fd that requires unmount as we can't get at the flag |
---|
| 3630 | + * to clear it afterwards. |
---|
| 3631 | + */ |
---|
| 3632 | + lflags = 0; |
---|
| 3633 | + if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW; |
---|
| 3634 | + if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT; |
---|
| 3635 | + if (flags & MOVE_MOUNT_F_EMPTY_PATH) lflags |= LOOKUP_EMPTY; |
---|
| 3636 | + |
---|
| 3637 | + ret = user_path_at(from_dfd, from_pathname, lflags, &from_path); |
---|
| 3638 | + if (ret < 0) |
---|
| 3639 | + return ret; |
---|
| 3640 | + |
---|
| 3641 | + lflags = 0; |
---|
| 3642 | + if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW; |
---|
| 3643 | + if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT; |
---|
| 3644 | + if (flags & MOVE_MOUNT_T_EMPTY_PATH) lflags |= LOOKUP_EMPTY; |
---|
| 3645 | + |
---|
| 3646 | + ret = user_path_at(to_dfd, to_pathname, lflags, &to_path); |
---|
| 3647 | + if (ret < 0) |
---|
| 3648 | + goto out_from; |
---|
| 3649 | + |
---|
| 3650 | + ret = security_move_mount(&from_path, &to_path); |
---|
| 3651 | + if (ret < 0) |
---|
| 3652 | + goto out_to; |
---|
| 3653 | + |
---|
| 3654 | + ret = do_move_mount(&from_path, &to_path); |
---|
| 3655 | + |
---|
| 3656 | +out_to: |
---|
| 3657 | + path_put(&to_path); |
---|
| 3658 | +out_from: |
---|
| 3659 | + path_put(&from_path); |
---|
| 3660 | + return ret; |
---|
3081 | 3661 | } |
---|
3082 | 3662 | |
---|
3083 | 3663 | /* |
---|
.. | .. |
---|
3119 | 3699 | * file system may be mounted on put_old. After all, new_root is a mountpoint. |
---|
3120 | 3700 | * |
---|
3121 | 3701 | * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem. |
---|
3122 | | - * See Documentation/filesystems/ramfs-rootfs-initramfs.txt for alternatives |
---|
| 3702 | + * See Documentation/filesystems/ramfs-rootfs-initramfs.rst for alternatives |
---|
3123 | 3703 | * in this situation. |
---|
3124 | 3704 | * |
---|
3125 | 3705 | * Notes: |
---|
.. | .. |
---|
3133 | 3713 | SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, |
---|
3134 | 3714 | const char __user *, put_old) |
---|
3135 | 3715 | { |
---|
3136 | | - struct path new, old, parent_path, root_parent, root; |
---|
3137 | | - struct mount *new_mnt, *root_mnt, *old_mnt; |
---|
| 3716 | + struct path new, old, root; |
---|
| 3717 | + struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent, *ex_parent; |
---|
3138 | 3718 | struct mountpoint *old_mp, *root_mp; |
---|
3139 | 3719 | int error; |
---|
3140 | 3720 | |
---|
3141 | 3721 | if (!may_mount()) |
---|
3142 | 3722 | return -EPERM; |
---|
3143 | 3723 | |
---|
3144 | | - error = user_path_dir(new_root, &new); |
---|
| 3724 | + error = user_path_at(AT_FDCWD, new_root, |
---|
| 3725 | + LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &new); |
---|
3145 | 3726 | if (error) |
---|
3146 | 3727 | goto out0; |
---|
3147 | 3728 | |
---|
3148 | | - error = user_path_dir(put_old, &old); |
---|
| 3729 | + error = user_path_at(AT_FDCWD, put_old, |
---|
| 3730 | + LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old); |
---|
3149 | 3731 | if (error) |
---|
3150 | 3732 | goto out1; |
---|
3151 | 3733 | |
---|
.. | .. |
---|
3163 | 3745 | new_mnt = real_mount(new.mnt); |
---|
3164 | 3746 | root_mnt = real_mount(root.mnt); |
---|
3165 | 3747 | old_mnt = real_mount(old.mnt); |
---|
| 3748 | + ex_parent = new_mnt->mnt_parent; |
---|
| 3749 | + root_parent = root_mnt->mnt_parent; |
---|
3166 | 3750 | if (IS_MNT_SHARED(old_mnt) || |
---|
3167 | | - IS_MNT_SHARED(new_mnt->mnt_parent) || |
---|
3168 | | - IS_MNT_SHARED(root_mnt->mnt_parent)) |
---|
| 3751 | + IS_MNT_SHARED(ex_parent) || |
---|
| 3752 | + IS_MNT_SHARED(root_parent)) |
---|
3169 | 3753 | goto out4; |
---|
3170 | 3754 | if (!check_mnt(root_mnt) || !check_mnt(new_mnt)) |
---|
3171 | 3755 | goto out4; |
---|
.. | .. |
---|
3182 | 3766 | goto out4; /* not a mountpoint */ |
---|
3183 | 3767 | if (!mnt_has_parent(root_mnt)) |
---|
3184 | 3768 | goto out4; /* not attached */ |
---|
3185 | | - root_mp = root_mnt->mnt_mp; |
---|
3186 | 3769 | if (new.mnt->mnt_root != new.dentry) |
---|
3187 | 3770 | goto out4; /* not a mountpoint */ |
---|
3188 | 3771 | if (!mnt_has_parent(new_mnt)) |
---|
.. | .. |
---|
3194 | 3777 | if (!is_path_reachable(new_mnt, new.dentry, &root)) |
---|
3195 | 3778 | goto out4; |
---|
3196 | 3779 | lock_mount_hash(); |
---|
3197 | | - root_mp->m_count++; /* pin it so it won't go away */ |
---|
3198 | | - detach_mnt(new_mnt, &parent_path); |
---|
3199 | | - detach_mnt(root_mnt, &root_parent); |
---|
| 3780 | + umount_mnt(new_mnt); |
---|
| 3781 | + root_mp = unhash_mnt(root_mnt); /* we'll need its mountpoint */ |
---|
3200 | 3782 | if (root_mnt->mnt.mnt_flags & MNT_LOCKED) { |
---|
3201 | 3783 | new_mnt->mnt.mnt_flags |= MNT_LOCKED; |
---|
3202 | 3784 | root_mnt->mnt.mnt_flags &= ~MNT_LOCKED; |
---|
.. | .. |
---|
3204 | 3786 | /* mount old root on put_old */ |
---|
3205 | 3787 | attach_mnt(root_mnt, old_mnt, old_mp); |
---|
3206 | 3788 | /* mount new_root on / */ |
---|
3207 | | - attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp); |
---|
| 3789 | + attach_mnt(new_mnt, root_parent, root_mp); |
---|
| 3790 | + mnt_add_count(root_parent, -1); |
---|
3208 | 3791 | touch_mnt_namespace(current->nsproxy->mnt_ns); |
---|
3209 | 3792 | /* A moved mount should not expire automatically */ |
---|
3210 | 3793 | list_del_init(&new_mnt->mnt_expire); |
---|
.. | .. |
---|
3214 | 3797 | error = 0; |
---|
3215 | 3798 | out4: |
---|
3216 | 3799 | unlock_mount(old_mp); |
---|
3217 | | - if (!error) { |
---|
3218 | | - path_put(&root_parent); |
---|
3219 | | - path_put(&parent_path); |
---|
3220 | | - } |
---|
| 3800 | + if (!error) |
---|
| 3801 | + mntput_no_expire(ex_parent); |
---|
3221 | 3802 | out3: |
---|
3222 | 3803 | path_put(&root); |
---|
3223 | 3804 | out2: |
---|
.. | .. |
---|
3231 | 3812 | static void __init init_mount_tree(void) |
---|
3232 | 3813 | { |
---|
3233 | 3814 | struct vfsmount *mnt; |
---|
| 3815 | + struct mount *m; |
---|
3234 | 3816 | struct mnt_namespace *ns; |
---|
3235 | 3817 | struct path root; |
---|
3236 | | - struct file_system_type *type; |
---|
3237 | 3818 | |
---|
3238 | | - type = get_fs_type("rootfs"); |
---|
3239 | | - if (!type) |
---|
3240 | | - panic("Can't find rootfs type"); |
---|
3241 | | - mnt = vfs_kern_mount(type, 0, "rootfs", NULL); |
---|
3242 | | - put_filesystem(type); |
---|
| 3819 | + mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL); |
---|
3243 | 3820 | if (IS_ERR(mnt)) |
---|
3244 | 3821 | panic("Can't create rootfs"); |
---|
3245 | 3822 | |
---|
3246 | | - ns = create_mnt_ns(mnt); |
---|
| 3823 | + ns = alloc_mnt_ns(&init_user_ns, false); |
---|
3247 | 3824 | if (IS_ERR(ns)) |
---|
3248 | 3825 | panic("Can't allocate initial namespace"); |
---|
3249 | | - |
---|
| 3826 | + m = real_mount(mnt); |
---|
| 3827 | + m->mnt_ns = ns; |
---|
| 3828 | + ns->root = m; |
---|
| 3829 | + ns->mounts = 1; |
---|
| 3830 | + list_add(&m->mnt_list, &ns->list); |
---|
3250 | 3831 | init_task.nsproxy->mnt_ns = ns; |
---|
3251 | 3832 | get_mnt_ns(ns); |
---|
3252 | 3833 | |
---|
.. | .. |
---|
3288 | 3869 | fs_kobj = kobject_create_and_add("fs", NULL); |
---|
3289 | 3870 | if (!fs_kobj) |
---|
3290 | 3871 | printk(KERN_WARNING "%s: kobj create error\n", __func__); |
---|
| 3872 | + shmem_init(); |
---|
3291 | 3873 | init_rootfs(); |
---|
3292 | 3874 | init_mount_tree(); |
---|
3293 | 3875 | } |
---|
.. | .. |
---|
3300 | 3882 | free_mnt_ns(ns); |
---|
3301 | 3883 | } |
---|
3302 | 3884 | |
---|
3303 | | -struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) |
---|
| 3885 | +struct vfsmount *kern_mount(struct file_system_type *type) |
---|
3304 | 3886 | { |
---|
3305 | 3887 | struct vfsmount *mnt; |
---|
3306 | | - mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, data); |
---|
| 3888 | + mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL); |
---|
3307 | 3889 | if (!IS_ERR(mnt)) { |
---|
3308 | 3890 | /* |
---|
3309 | 3891 | * it is a longterm mount, don't release mnt until |
---|
.. | .. |
---|
3313 | 3895 | } |
---|
3314 | 3896 | return mnt; |
---|
3315 | 3897 | } |
---|
3316 | | -EXPORT_SYMBOL_GPL(kern_mount_data); |
---|
| 3898 | +EXPORT_SYMBOL_GPL(kern_mount); |
---|
3317 | 3899 | |
---|
3318 | 3900 | void kern_unmount(struct vfsmount *mnt) |
---|
3319 | 3901 | { |
---|
.. | .. |
---|
3325 | 3907 | } |
---|
3326 | 3908 | } |
---|
3327 | 3909 | EXPORT_SYMBOL(kern_unmount); |
---|
| 3910 | + |
---|
| 3911 | +void kern_unmount_array(struct vfsmount *mnt[], unsigned int num) |
---|
| 3912 | +{ |
---|
| 3913 | + unsigned int i; |
---|
| 3914 | + |
---|
| 3915 | + for (i = 0; i < num; i++) |
---|
| 3916 | + if (mnt[i]) |
---|
| 3917 | + real_mount(mnt[i])->mnt_ns = NULL; |
---|
| 3918 | + synchronize_rcu_expedited(); |
---|
| 3919 | + for (i = 0; i < num; i++) |
---|
| 3920 | + mntput(mnt[i]); |
---|
| 3921 | +} |
---|
| 3922 | +EXPORT_SYMBOL(kern_unmount_array); |
---|
3328 | 3923 | |
---|
3329 | 3924 | bool our_mnt(struct vfsmount *mnt) |
---|
3330 | 3925 | { |
---|
.. | .. |
---|
3355 | 3950 | return chrooted; |
---|
3356 | 3951 | } |
---|
3357 | 3952 | |
---|
3358 | | -static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new, |
---|
| 3953 | +static bool mnt_already_visible(struct mnt_namespace *ns, |
---|
| 3954 | + const struct super_block *sb, |
---|
3359 | 3955 | int *new_mnt_flags) |
---|
3360 | 3956 | { |
---|
3361 | 3957 | int new_flags = *new_mnt_flags; |
---|
.. | .. |
---|
3363 | 3959 | bool visible = false; |
---|
3364 | 3960 | |
---|
3365 | 3961 | down_read(&namespace_sem); |
---|
| 3962 | + lock_ns_list(ns); |
---|
3366 | 3963 | list_for_each_entry(mnt, &ns->list, mnt_list) { |
---|
3367 | 3964 | struct mount *child; |
---|
3368 | 3965 | int mnt_flags; |
---|
3369 | 3966 | |
---|
3370 | | - if (mnt->mnt.mnt_sb->s_type != new->mnt_sb->s_type) |
---|
| 3967 | + if (mnt_is_cursor(mnt)) |
---|
| 3968 | + continue; |
---|
| 3969 | + |
---|
| 3970 | + if (mnt->mnt.mnt_sb->s_type != sb->s_type) |
---|
3371 | 3971 | continue; |
---|
3372 | 3972 | |
---|
3373 | 3973 | /* This mount is not fully visible if it's root directory |
---|
.. | .. |
---|
3414 | 4014 | next: ; |
---|
3415 | 4015 | } |
---|
3416 | 4016 | found: |
---|
| 4017 | + unlock_ns_list(ns); |
---|
3417 | 4018 | up_read(&namespace_sem); |
---|
3418 | 4019 | return visible; |
---|
3419 | 4020 | } |
---|
3420 | 4021 | |
---|
3421 | | -static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags) |
---|
| 4022 | +static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags) |
---|
3422 | 4023 | { |
---|
3423 | 4024 | const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV; |
---|
3424 | 4025 | struct mnt_namespace *ns = current->nsproxy->mnt_ns; |
---|
.. | .. |
---|
3428 | 4029 | return false; |
---|
3429 | 4030 | |
---|
3430 | 4031 | /* Can this filesystem be too revealing? */ |
---|
3431 | | - s_iflags = mnt->mnt_sb->s_iflags; |
---|
| 4032 | + s_iflags = sb->s_iflags; |
---|
3432 | 4033 | if (!(s_iflags & SB_I_USERNS_VISIBLE)) |
---|
3433 | 4034 | return false; |
---|
3434 | 4035 | |
---|
.. | .. |
---|
3438 | 4039 | return true; |
---|
3439 | 4040 | } |
---|
3440 | 4041 | |
---|
3441 | | - return !mnt_already_visible(ns, mnt, new_mnt_flags); |
---|
| 4042 | + return !mnt_already_visible(ns, sb, new_mnt_flags); |
---|
3442 | 4043 | } |
---|
3443 | 4044 | |
---|
3444 | 4045 | bool mnt_may_suid(struct vfsmount *mnt) |
---|
.. | .. |
---|
3475 | 4076 | put_mnt_ns(to_mnt_ns(ns)); |
---|
3476 | 4077 | } |
---|
3477 | 4078 | |
---|
3478 | | -static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns) |
---|
| 4079 | +static int mntns_install(struct nsset *nsset, struct ns_common *ns) |
---|
3479 | 4080 | { |
---|
3480 | | - struct fs_struct *fs = current->fs; |
---|
| 4081 | + struct nsproxy *nsproxy = nsset->nsproxy; |
---|
| 4082 | + struct fs_struct *fs = nsset->fs; |
---|
3481 | 4083 | struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns; |
---|
| 4084 | + struct user_namespace *user_ns = nsset->cred->user_ns; |
---|
3482 | 4085 | struct path root; |
---|
3483 | 4086 | int err; |
---|
3484 | 4087 | |
---|
3485 | 4088 | if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) || |
---|
3486 | | - !ns_capable(current_user_ns(), CAP_SYS_CHROOT) || |
---|
3487 | | - !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) |
---|
| 4089 | + !ns_capable(user_ns, CAP_SYS_CHROOT) || |
---|
| 4090 | + !ns_capable(user_ns, CAP_SYS_ADMIN)) |
---|
3488 | 4091 | return -EPERM; |
---|
3489 | 4092 | |
---|
| 4093 | + if (is_anon_ns(mnt_ns)) |
---|
| 4094 | + return -EINVAL; |
---|
| 4095 | + |
---|
3490 | 4096 | if (fs->users != 1) |
---|
3491 | 4097 | return -EINVAL; |
---|
3492 | 4098 | |
---|