.. | .. |
---|
10 | 10 | #include <linux/syscalls.h> |
---|
11 | 11 | #include <linux/export.h> |
---|
12 | 12 | #include <linux/fs.h> |
---|
| 13 | +#include <linux/kernel.h> |
---|
13 | 14 | #include <linux/mm.h> |
---|
14 | 15 | #include <linux/sched/signal.h> |
---|
15 | 16 | #include <linux/slab.h> |
---|
.. | .. |
---|
18 | 19 | #include <linux/bitops.h> |
---|
19 | 20 | #include <linux/spinlock.h> |
---|
20 | 21 | #include <linux/rcupdate.h> |
---|
| 22 | +#include <linux/close_range.h> |
---|
| 23 | +#include <net/sock.h> |
---|
| 24 | + |
---|
| 25 | +#include "internal.h" |
---|
21 | 26 | |
---|
22 | 27 | unsigned int sysctl_nr_open __read_mostly = 1024*1024; |
---|
23 | 28 | unsigned int sysctl_nr_open_min = BITS_PER_LONG; |
---|
.. | .. |
---|
82 | 87 | copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds); |
---|
83 | 88 | } |
---|
84 | 89 | |
---|
| 90 | +/* |
---|
| 91 | + * Note how the fdtable bitmap allocations very much have to be a multiple of |
---|
| 92 | + * BITS_PER_LONG. This is not only because we walk those things in chunks of |
---|
| 93 | + * 'unsigned long' in some places, but simply because that is how the Linux |
---|
| 94 | + * kernel bitmaps are defined to work: they are not "bits in an array of bytes", |
---|
| 95 | + * they are very much "bits in an array of unsigned long". |
---|
| 96 | + * |
---|
| 97 | + * The ALIGN(nr, BITS_PER_LONG) here is for clarity: since we just multiplied |
---|
| 98 | + * by that "1024/sizeof(ptr)" before, we already know there are sufficient |
---|
| 99 | + * clear low bits. Clang seems to realize that, gcc ends up being confused. |
---|
| 100 | + * |
---|
| 101 | + * On a 128-bit machine, the ALIGN() would actually matter. In the meantime, |
---|
| 102 | + * let's consider it documentation (and maybe a test-case for gcc to improve |
---|
| 103 | + * its code generation ;) |
---|
| 104 | + */ |
---|
85 | 105 | static struct fdtable * alloc_fdtable(unsigned int nr) |
---|
86 | 106 | { |
---|
87 | 107 | struct fdtable *fdt; |
---|
.. | .. |
---|
97 | 117 | nr /= (1024 / sizeof(struct file *)); |
---|
98 | 118 | nr = roundup_pow_of_two(nr + 1); |
---|
99 | 119 | nr *= (1024 / sizeof(struct file *)); |
---|
| 120 | + nr = ALIGN(nr, BITS_PER_LONG); |
---|
100 | 121 | /* |
---|
101 | 122 | * Note that this can drive nr *below* what we had passed if sysctl_nr_open |
---|
102 | 123 | * had been set lower between the check in expand_files() and here. Deal |
---|
.. | .. |
---|
158 | 179 | * or have finished their rcu_read_lock_sched() section. |
---|
159 | 180 | */ |
---|
160 | 181 | if (atomic_read(&files->count) > 1) |
---|
161 | | - synchronize_sched(); |
---|
| 182 | + synchronize_rcu(); |
---|
162 | 183 | |
---|
163 | 184 | spin_lock(&files->file_lock); |
---|
164 | 185 | if (!new_fdt) |
---|
.. | .. |
---|
265 | 286 | } |
---|
266 | 287 | |
---|
267 | 288 | /* |
---|
| 289 | + * Note that a sane fdtable size always has to be a multiple of |
---|
| 290 | + * BITS_PER_LONG, since we have bitmaps that are sized by this. |
---|
| 291 | + * |
---|
| 292 | + * 'max_fds' will normally already be properly aligned, but it |
---|
| 293 | + * turns out that in the close_range() -> __close_range() -> |
---|
| 294 | + * unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end |
---|
| 295 | + * up having a 'max_fds' value that isn't already aligned. |
---|
| 296 | + * |
---|
| 297 | + * Rather than make close_range() have to worry about this, |
---|
| 298 | + * just make that BITS_PER_LONG alignment be part of a sane |
---|
| 299 | + * fdtable size. Becuase that's really what it is. |
---|
| 300 | + */ |
---|
| 301 | +static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds) |
---|
| 302 | +{ |
---|
| 303 | + unsigned int count; |
---|
| 304 | + |
---|
| 305 | + count = count_open_files(fdt); |
---|
| 306 | + if (max_fds < NR_OPEN_DEFAULT) |
---|
| 307 | + max_fds = NR_OPEN_DEFAULT; |
---|
| 308 | + return ALIGN(min(count, max_fds), BITS_PER_LONG); |
---|
| 309 | +} |
---|
| 310 | + |
---|
| 311 | +/* |
---|
268 | 312 | * Allocate a new files structure and copy contents from the |
---|
269 | 313 | * passed in files structure. |
---|
270 | 314 | * errorp will be valid only when the returned files_struct is NULL. |
---|
271 | 315 | */ |
---|
272 | | -struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) |
---|
| 316 | +struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp) |
---|
273 | 317 | { |
---|
274 | 318 | struct files_struct *newf; |
---|
275 | 319 | struct file **old_fds, **new_fds; |
---|
.. | .. |
---|
296 | 340 | |
---|
297 | 341 | spin_lock(&oldf->file_lock); |
---|
298 | 342 | old_fdt = files_fdtable(oldf); |
---|
299 | | - open_files = count_open_files(old_fdt); |
---|
| 343 | + open_files = sane_fdtable_size(old_fdt, max_fds); |
---|
300 | 344 | |
---|
301 | 345 | /* |
---|
302 | 346 | * Check whether we need to allocate a larger fd array and fd set. |
---|
.. | .. |
---|
327 | 371 | */ |
---|
328 | 372 | spin_lock(&oldf->file_lock); |
---|
329 | 373 | old_fdt = files_fdtable(oldf); |
---|
330 | | - open_files = count_open_files(old_fdt); |
---|
| 374 | + open_files = sane_fdtable_size(old_fdt, max_fds); |
---|
331 | 375 | } |
---|
332 | 376 | |
---|
333 | 377 | copy_fd_bitmaps(new_fdt, old_fdt, open_files); |
---|
.. | .. |
---|
540 | 584 | return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); |
---|
541 | 585 | } |
---|
542 | 586 | |
---|
| 587 | +int __get_unused_fd_flags(unsigned flags, unsigned long nofile) |
---|
| 588 | +{ |
---|
| 589 | + return __alloc_fd(current->files, 0, nofile, flags); |
---|
| 590 | +} |
---|
| 591 | + |
---|
543 | 592 | int get_unused_fd_flags(unsigned flags) |
---|
544 | 593 | { |
---|
545 | | - return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags); |
---|
| 594 | + return __get_unused_fd_flags(flags, rlimit(RLIMIT_NOFILE)); |
---|
546 | 595 | } |
---|
547 | 596 | EXPORT_SYMBOL(get_unused_fd_flags); |
---|
548 | 597 | |
---|
.. | .. |
---|
608 | 657 | rcu_read_unlock_sched(); |
---|
609 | 658 | } |
---|
610 | 659 | |
---|
| 660 | +/* |
---|
| 661 | + * This consumes the "file" refcount, so callers should treat it |
---|
| 662 | + * as if they had called fput(file). |
---|
| 663 | + */ |
---|
611 | 664 | void fd_install(unsigned int fd, struct file *file) |
---|
612 | 665 | { |
---|
613 | 666 | __fd_install(current->files, fd, file); |
---|
.. | .. |
---|
615 | 668 | |
---|
616 | 669 | EXPORT_SYMBOL(fd_install); |
---|
617 | 670 | |
---|
618 | | -/* |
---|
619 | | - * The same warnings as for __alloc_fd()/__fd_install() apply here... |
---|
620 | | - */ |
---|
621 | | -int __close_fd(struct files_struct *files, unsigned fd) |
---|
| 671 | +static struct file *pick_file(struct files_struct *files, unsigned fd) |
---|
622 | 672 | { |
---|
623 | | - struct file *file; |
---|
| 673 | + struct file *file = NULL; |
---|
624 | 674 | struct fdtable *fdt; |
---|
625 | 675 | |
---|
626 | 676 | spin_lock(&files->file_lock); |
---|
.. | .. |
---|
632 | 682 | goto out_unlock; |
---|
633 | 683 | rcu_assign_pointer(fdt->fd[fd], NULL); |
---|
634 | 684 | __put_unused_fd(files, fd); |
---|
635 | | - spin_unlock(&files->file_lock); |
---|
636 | | - return filp_close(file, files); |
---|
637 | 685 | |
---|
638 | 686 | out_unlock: |
---|
639 | 687 | spin_unlock(&files->file_lock); |
---|
640 | | - return -EBADF; |
---|
| 688 | + return file; |
---|
| 689 | +} |
---|
| 690 | + |
---|
| 691 | +/* |
---|
| 692 | + * The same warnings as for __alloc_fd()/__fd_install() apply here... |
---|
| 693 | + */ |
---|
| 694 | +int __close_fd(struct files_struct *files, unsigned fd) |
---|
| 695 | +{ |
---|
| 696 | + struct file *file; |
---|
| 697 | + |
---|
| 698 | + file = pick_file(files, fd); |
---|
| 699 | + if (!file) |
---|
| 700 | + return -EBADF; |
---|
| 701 | + |
---|
| 702 | + return filp_close(file, files); |
---|
641 | 703 | } |
---|
642 | 704 | EXPORT_SYMBOL(__close_fd); /* for ksys_close() */ |
---|
| 705 | + |
---|
| 706 | +/** |
---|
| 707 | + * __close_range() - Close all file descriptors in a given range. |
---|
| 708 | + * |
---|
| 709 | + * @fd: starting file descriptor to close |
---|
| 710 | + * @max_fd: last file descriptor to close |
---|
| 711 | + * |
---|
| 712 | + * This closes a range of file descriptors. All file descriptors |
---|
| 713 | + * from @fd up to and including @max_fd are closed. |
---|
| 714 | + */ |
---|
| 715 | +int __close_range(unsigned fd, unsigned max_fd, unsigned int flags) |
---|
| 716 | +{ |
---|
| 717 | + unsigned int cur_max; |
---|
| 718 | + struct task_struct *me = current; |
---|
| 719 | + struct files_struct *cur_fds = me->files, *fds = NULL; |
---|
| 720 | + |
---|
| 721 | + if (flags & ~CLOSE_RANGE_UNSHARE) |
---|
| 722 | + return -EINVAL; |
---|
| 723 | + |
---|
| 724 | + if (fd > max_fd) |
---|
| 725 | + return -EINVAL; |
---|
| 726 | + |
---|
| 727 | + rcu_read_lock(); |
---|
| 728 | + cur_max = files_fdtable(cur_fds)->max_fds; |
---|
| 729 | + rcu_read_unlock(); |
---|
| 730 | + |
---|
| 731 | + /* cap to last valid index into fdtable */ |
---|
| 732 | + cur_max--; |
---|
| 733 | + |
---|
| 734 | + if (flags & CLOSE_RANGE_UNSHARE) { |
---|
| 735 | + int ret; |
---|
| 736 | + unsigned int max_unshare_fds = NR_OPEN_MAX; |
---|
| 737 | + |
---|
| 738 | + /* |
---|
| 739 | + * If the requested range is greater than the current maximum, |
---|
| 740 | + * we're closing everything so only copy all file descriptors |
---|
| 741 | + * beneath the lowest file descriptor. |
---|
| 742 | + */ |
---|
| 743 | + if (max_fd >= cur_max) |
---|
| 744 | + max_unshare_fds = fd; |
---|
| 745 | + |
---|
| 746 | + ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds); |
---|
| 747 | + if (ret) |
---|
| 748 | + return ret; |
---|
| 749 | + |
---|
| 750 | + /* |
---|
| 751 | + * We used to share our file descriptor table, and have now |
---|
| 752 | + * created a private one, make sure we're using it below. |
---|
| 753 | + */ |
---|
| 754 | + if (fds) |
---|
| 755 | + swap(cur_fds, fds); |
---|
| 756 | + } |
---|
| 757 | + |
---|
| 758 | + max_fd = min(max_fd, cur_max); |
---|
| 759 | + while (fd <= max_fd) { |
---|
| 760 | + struct file *file; |
---|
| 761 | + |
---|
| 762 | + file = pick_file(cur_fds, fd++); |
---|
| 763 | + if (!file) |
---|
| 764 | + continue; |
---|
| 765 | + |
---|
| 766 | + filp_close(file, cur_fds); |
---|
| 767 | + cond_resched(); |
---|
| 768 | + } |
---|
| 769 | + |
---|
| 770 | + if (fds) { |
---|
| 771 | + /* |
---|
| 772 | + * We're done closing the files we were supposed to. Time to install |
---|
| 773 | + * the new file descriptor table and drop the old one. |
---|
| 774 | + */ |
---|
| 775 | + task_lock(me); |
---|
| 776 | + me->files = cur_fds; |
---|
| 777 | + task_unlock(me); |
---|
| 778 | + put_files_struct(fds); |
---|
| 779 | + } |
---|
| 780 | + |
---|
| 781 | + return 0; |
---|
| 782 | +} |
---|
| 783 | + |
---|
| 784 | +/* |
---|
| 785 | + * See close_fd_get_file() below, this variant assumes current->files->file_lock |
---|
| 786 | + * is held. |
---|
| 787 | + */ |
---|
| 788 | +int __close_fd_get_file(unsigned int fd, struct file **res) |
---|
| 789 | +{ |
---|
| 790 | + struct files_struct *files = current->files; |
---|
| 791 | + struct file *file; |
---|
| 792 | + struct fdtable *fdt; |
---|
| 793 | + |
---|
| 794 | + fdt = files_fdtable(files); |
---|
| 795 | + if (fd >= fdt->max_fds) |
---|
| 796 | + goto out_err; |
---|
| 797 | + file = fdt->fd[fd]; |
---|
| 798 | + if (!file) |
---|
| 799 | + goto out_err; |
---|
| 800 | + rcu_assign_pointer(fdt->fd[fd], NULL); |
---|
| 801 | + __put_unused_fd(files, fd); |
---|
| 802 | + get_file(file); |
---|
| 803 | + *res = file; |
---|
| 804 | + return 0; |
---|
| 805 | +out_err: |
---|
| 806 | + *res = NULL; |
---|
| 807 | + return -ENOENT; |
---|
| 808 | +} |
---|
| 809 | + |
---|
| 810 | +/* |
---|
| 811 | + * variant of close_fd that gets a ref on the file for later fput. |
---|
| 812 | + * The caller must ensure that filp_close() called on the file, and then |
---|
| 813 | + * an fput(). |
---|
| 814 | + */ |
---|
| 815 | +int close_fd_get_file(unsigned int fd, struct file **res) |
---|
| 816 | +{ |
---|
| 817 | + struct files_struct *files = current->files; |
---|
| 818 | + int ret; |
---|
| 819 | + |
---|
| 820 | + spin_lock(&files->file_lock); |
---|
| 821 | + ret = __close_fd_get_file(fd, res); |
---|
| 822 | + spin_unlock(&files->file_lock); |
---|
| 823 | + |
---|
| 824 | + return ret; |
---|
| 825 | +} |
---|
643 | 826 | |
---|
644 | 827 | void do_close_on_exec(struct files_struct *files) |
---|
645 | 828 | { |
---|
.. | .. |
---|
678 | 861 | } |
---|
679 | 862 | |
---|
680 | 863 | static inline struct file *__fget_files_rcu(struct files_struct *files, |
---|
681 | | - unsigned int fd, fmode_t mask, unsigned int refs) |
---|
| 864 | + unsigned int fd, fmode_t mask, unsigned int refs) |
---|
682 | 865 | { |
---|
683 | 866 | for (;;) { |
---|
684 | 867 | struct file *file; |
---|
.. | .. |
---|
732 | 915 | } |
---|
733 | 916 | } |
---|
734 | 917 | |
---|
735 | | - |
---|
736 | | -static struct file *__fget(unsigned int fd, fmode_t mask, unsigned int refs) |
---|
| 918 | +static struct file *__fget_files(struct files_struct *files, unsigned int fd, |
---|
| 919 | + fmode_t mask, unsigned int refs) |
---|
737 | 920 | { |
---|
738 | | - struct files_struct *files = current->files; |
---|
739 | 921 | struct file *file; |
---|
740 | 922 | |
---|
741 | 923 | rcu_read_lock(); |
---|
.. | .. |
---|
743 | 925 | rcu_read_unlock(); |
---|
744 | 926 | |
---|
745 | 927 | return file; |
---|
| 928 | +} |
---|
| 929 | + |
---|
| 930 | +static inline struct file *__fget(unsigned int fd, fmode_t mask, |
---|
| 931 | + unsigned int refs) |
---|
| 932 | +{ |
---|
| 933 | + return __fget_files(current->files, fd, mask, refs); |
---|
746 | 934 | } |
---|
747 | 935 | |
---|
748 | 936 | struct file *fget_many(unsigned int fd, unsigned int refs) |
---|
.. | .. |
---|
761 | 949 | return __fget(fd, 0, 1); |
---|
762 | 950 | } |
---|
763 | 951 | EXPORT_SYMBOL(fget_raw); |
---|
| 952 | + |
---|
| 953 | +struct file *fget_task(struct task_struct *task, unsigned int fd) |
---|
| 954 | +{ |
---|
| 955 | + struct file *file = NULL; |
---|
| 956 | + |
---|
| 957 | + task_lock(task); |
---|
| 958 | + if (task->files) |
---|
| 959 | + file = __fget_files(task->files, fd, 0, 1); |
---|
| 960 | + task_unlock(task); |
---|
| 961 | + |
---|
| 962 | + return file; |
---|
| 963 | +} |
---|
764 | 964 | |
---|
765 | 965 | /* |
---|
766 | 966 | * Lightweight file lookup - no refcnt increment if fd table isn't shared. |
---|
.. | .. |
---|
922 | 1122 | return err; |
---|
923 | 1123 | } |
---|
924 | 1124 | |
---|
| 1125 | +/** |
---|
| 1126 | + * __receive_fd() - Install received file into file descriptor table |
---|
| 1127 | + * |
---|
| 1128 | + * @fd: fd to install into (if negative, a new fd will be allocated) |
---|
| 1129 | + * @file: struct file that was received from another process |
---|
| 1130 | + * @ufd: __user pointer to write new fd number to |
---|
| 1131 | + * @o_flags: the O_* flags to apply to the new fd entry |
---|
| 1132 | + * |
---|
| 1133 | + * Installs a received file into the file descriptor table, with appropriate |
---|
| 1134 | + * checks and count updates. Optionally writes the fd number to userspace, if |
---|
| 1135 | + * @ufd is non-NULL. |
---|
| 1136 | + * |
---|
| 1137 | + * This helper handles its own reference counting of the incoming |
---|
| 1138 | + * struct file. |
---|
| 1139 | + * |
---|
| 1140 | + * Returns newly install fd or -ve on error. |
---|
| 1141 | + */ |
---|
| 1142 | +int __receive_fd(int fd, struct file *file, int __user *ufd, unsigned int o_flags) |
---|
| 1143 | +{ |
---|
| 1144 | + int new_fd; |
---|
| 1145 | + int error; |
---|
| 1146 | + |
---|
| 1147 | + error = security_file_receive(file); |
---|
| 1148 | + if (error) |
---|
| 1149 | + return error; |
---|
| 1150 | + |
---|
| 1151 | + if (fd < 0) { |
---|
| 1152 | + new_fd = get_unused_fd_flags(o_flags); |
---|
| 1153 | + if (new_fd < 0) |
---|
| 1154 | + return new_fd; |
---|
| 1155 | + } else { |
---|
| 1156 | + new_fd = fd; |
---|
| 1157 | + } |
---|
| 1158 | + |
---|
| 1159 | + if (ufd) { |
---|
| 1160 | + error = put_user(new_fd, ufd); |
---|
| 1161 | + if (error) { |
---|
| 1162 | + if (fd < 0) |
---|
| 1163 | + put_unused_fd(new_fd); |
---|
| 1164 | + return error; |
---|
| 1165 | + } |
---|
| 1166 | + } |
---|
| 1167 | + |
---|
| 1168 | + if (fd < 0) { |
---|
| 1169 | + fd_install(new_fd, get_file(file)); |
---|
| 1170 | + } else { |
---|
| 1171 | + error = replace_fd(new_fd, file, o_flags); |
---|
| 1172 | + if (error) |
---|
| 1173 | + return error; |
---|
| 1174 | + } |
---|
| 1175 | + |
---|
| 1176 | + /* Bump the sock usage counts, if any. */ |
---|
| 1177 | + __receive_sock(file); |
---|
| 1178 | + return new_fd; |
---|
| 1179 | +} |
---|
| 1180 | + |
---|
925 | 1181 | static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags) |
---|
926 | 1182 | { |
---|
927 | 1183 | int err = -EBADF; |
---|
.. | .. |
---|
976 | 1232 | return ksys_dup3(oldfd, newfd, 0); |
---|
977 | 1233 | } |
---|
978 | 1234 | |
---|
979 | | -int ksys_dup(unsigned int fildes) |
---|
| 1235 | +SYSCALL_DEFINE1(dup, unsigned int, fildes) |
---|
980 | 1236 | { |
---|
981 | 1237 | int ret = -EBADF; |
---|
982 | 1238 | struct file *file = fget_raw(fildes); |
---|
.. | .. |
---|
989 | 1245 | fput(file); |
---|
990 | 1246 | } |
---|
991 | 1247 | return ret; |
---|
992 | | -} |
---|
993 | | - |
---|
994 | | -SYSCALL_DEFINE1(dup, unsigned int, fildes) |
---|
995 | | -{ |
---|
996 | | - return ksys_dup(fildes); |
---|
997 | 1248 | } |
---|
998 | 1249 | |
---|
999 | 1250 | int f_dupfd(unsigned int from, struct file *file, unsigned flags) |
---|