From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/fs/open.c | 413 +++++++++++++++++++++++++++++++++++++++++----------------- 1 files changed, 291 insertions(+), 122 deletions(-) diff --git a/kernel/fs/open.c b/kernel/fs/open.c index b14aef0..965230a 100644 --- a/kernel/fs/open.c +++ b/kernel/fs/open.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/open.c * @@ -33,9 +34,10 @@ #include <linux/compat.h> #include "internal.h" +#include <trace/hooks/syscall_check.h> -int do_truncate2(struct vfsmount *mnt, struct dentry *dentry, loff_t length, - unsigned int time_attrs, struct file *filp) +int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, + struct file *filp) { int ret; struct iattr newattrs; @@ -60,24 +62,17 @@ inode_lock(dentry->d_inode); /* Note any delegations or leases have already been broken: */ - ret = notify_change2(mnt, dentry, &newattrs, NULL); + ret = notify_change(dentry, &newattrs, NULL); inode_unlock(dentry->d_inode); return ret; -} -int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, - struct file *filp) -{ - return do_truncate2(NULL, dentry, length, time_attrs, filp); } long vfs_truncate(const struct path *path, loff_t length) { struct inode *inode; - struct vfsmount *mnt; long error; inode = path->dentry->d_inode; - mnt = path->mnt; /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ if (S_ISDIR(inode->i_mode)) @@ -89,7 +84,7 @@ if (error) goto out; - error = inode_permission2(mnt, inode, MAY_WRITE); + error = inode_permission(inode, MAY_WRITE); if (error) goto mnt_drop_write_and_out; @@ -113,7 +108,7 @@ if (!error) error = security_path_truncate(path); if (!error) - error = do_truncate2(mnt, path->dentry, length, 0, NULL); + error = do_truncate(path->dentry, length, 0, NULL); put_write_and_out: put_write_access(inode); @@ -162,7 +157,6 @@ { struct inode *inode; struct dentry *dentry; - struct vfsmount *mnt; struct fd f; int error; @@ -179,7 +173,6 @@ small = 0; dentry = f.file->f_path.dentry; - mnt = f.file->f_path.mnt; inode = dentry->d_inode; error = -EINVAL; if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE)) @@ -200,7 +193,7 @@ if (!error) error = security_path_truncate(&f.file->f_path); if (!error) - error = do_truncate2(mnt, dentry, length, ATTR_MTIME|ATTR_CTIME, f.file); + error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file); sb_end_write(inode->i_sb); out_putf: fdput(f); @@ -353,22 +346,14 @@ * We do this by temporarily clearing all FS-related capabilities and * switching the fsuid/fsgid around to the real ones. */ -long do_faccessat(int dfd, const char __user *filename, int mode) +static const struct cred *access_override_creds(void) { const struct cred *old_cred; struct cred *override_cred; - struct path path; - struct inode *inode; - struct vfsmount *mnt; - int res; - unsigned int lookup_flags = LOOKUP_FOLLOW; - - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ - return -EINVAL; override_cred = prepare_creds(); if (!override_cred) - return -ENOMEM; + return NULL; override_cred->fsuid = override_cred->uid; override_cred->fsgid = override_cred->gid; @@ -403,13 +388,44 @@ override_cred->non_rcu = 1; old_cred = override_creds(override_cred); + + /* override_cred() gets its own ref */ + put_cred(override_cred); + + return old_cred; +} + +static long do_faccessat(int dfd, const char __user *filename, int mode, int flags) +{ + struct path path; + struct inode *inode; + int res; + unsigned int lookup_flags = LOOKUP_FOLLOW; + const struct cred *old_cred = NULL; + + if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ + return -EINVAL; + + if (flags & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) + return -EINVAL; + + if (flags & AT_SYMLINK_NOFOLLOW) + lookup_flags &= ~LOOKUP_FOLLOW; + if (flags & AT_EMPTY_PATH) + lookup_flags |= LOOKUP_EMPTY; + + if (!(flags & AT_EACCESS)) { + old_cred = access_override_creds(); + if (!old_cred) + return -ENOMEM; + } + retry: res = user_path_at(dfd, filename, lookup_flags, &path); if (res) goto out; inode = d_backing_inode(path.dentry); - mnt = path.mnt; if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) { /* @@ -421,7 +437,7 @@ goto out_path_release; } - res = inode_permission2(mnt, inode, mode | MAY_ACCESS); + res = inode_permission(inode, mode | MAY_ACCESS); /* SuS v2 requires we report a read only fs too */ if (res || !(mode & S_IWOTH) || special_file(inode->i_mode)) goto out_path_release; @@ -445,22 +461,29 @@ goto retry; } out: - revert_creds(old_cred); - put_cred(override_cred); + if (old_cred) + revert_creds(old_cred); + return res; } SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) { - return do_faccessat(dfd, filename, mode); + return do_faccessat(dfd, filename, mode, 0); +} + +SYSCALL_DEFINE4(faccessat2, int, dfd, const char __user *, filename, int, mode, + int, flags) +{ + return do_faccessat(dfd, filename, mode, flags); } SYSCALL_DEFINE2(access, const char __user *, filename, int, mode) { - return do_faccessat(AT_FDCWD, filename, mode); + return do_faccessat(AT_FDCWD, filename, mode, 0); } -int ksys_chdir(const char __user *filename) +SYSCALL_DEFINE1(chdir, const char __user *, filename) { struct path path; int error; @@ -470,7 +493,7 @@ if (error) goto out; - error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); + error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (error) goto dput_and_out; @@ -486,11 +509,6 @@ return error; } -SYSCALL_DEFINE1(chdir, const char __user *, filename) -{ - return ksys_chdir(filename); -} - SYSCALL_DEFINE1(fchdir, unsigned int, fd) { struct fd f = fdget_raw(fd); @@ -504,8 +522,7 @@ if (!d_can_lookup(f.file->f_path.dentry)) goto out_putf; - error = inode_permission2(f.file->f_path.mnt, file_inode(f.file), - MAY_EXEC | MAY_CHDIR); + error = inode_permission(file_inode(f.file), MAY_EXEC | MAY_CHDIR); if (!error) set_fs_pwd(current->fs, &f.file->f_path); out_putf: @@ -514,7 +531,7 @@ return error; } -int ksys_chroot(const char __user *filename) +SYSCALL_DEFINE1(chroot, const char __user *, filename) { struct path path; int error; @@ -524,7 +541,7 @@ if (error) goto out; - error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); + error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (error) goto dput_and_out; @@ -547,12 +564,7 @@ return error; } -SYSCALL_DEFINE1(chroot, const char __user *, filename) -{ - return ksys_chroot(filename); -} - -static int chmod_common(const struct path *path, umode_t mode) +int chmod_common(const struct path *path, umode_t mode) { struct inode *inode = path->dentry->d_inode; struct inode *delegated_inode = NULL; @@ -569,7 +581,7 @@ goto out_unlock; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode); + error = notify_change(path->dentry, &newattrs, &delegated_inode); out_unlock: inode_unlock(inode); if (delegated_inode) { @@ -581,25 +593,25 @@ return error; } -int ksys_fchmod(unsigned int fd, umode_t mode) +int vfs_fchmod(struct file *file, umode_t mode) +{ + audit_file(file); + return chmod_common(&file->f_path, mode); +} + +SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode) { struct fd f = fdget(fd); int err = -EBADF; if (f.file) { - audit_file(f.file); - err = chmod_common(&f.file->f_path, mode); + err = vfs_fchmod(f.file, mode); fdput(f); } return err; } -SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode) -{ - return ksys_fchmod(fd, mode); -} - -int do_fchmodat(int dfd, const char __user *filename, umode_t mode) +static int do_fchmodat(int dfd, const char __user *filename, umode_t mode) { struct path path; int error; @@ -628,7 +640,7 @@ return do_fchmodat(AT_FDCWD, filename, mode); } -static int chown_common(const struct path *path, uid_t user, gid_t group) +int chown_common(const struct path *path, uid_t user, gid_t group) { struct inode *inode = path->dentry->d_inode; struct inode *delegated_inode = NULL; @@ -654,13 +666,13 @@ newattrs.ia_valid |= ATTR_GID; newattrs.ia_gid = gid; } - if (!S_ISDIR(inode->i_mode)) - newattrs.ia_valid |= - ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; inode_lock(inode); + if (!S_ISDIR(inode->i_mode)) + newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV | + setattr_should_drop_sgid(inode); error = security_path_chown(path, uid, gid); if (!error) - error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode); + error = notify_change(path->dentry, &newattrs, &delegated_inode); inode_unlock(inode); if (delegated_inode) { error = break_deleg_wait(&delegated_inode); @@ -719,23 +731,28 @@ AT_SYMLINK_NOFOLLOW); } +int vfs_fchown(struct file *file, uid_t user, gid_t group) +{ + int error; + + error = mnt_want_write_file(file); + if (error) + return error; + audit_file(file); + error = chown_common(&file->f_path, user, group); + mnt_drop_write_file(file); + return error; +} + int ksys_fchown(unsigned int fd, uid_t user, gid_t group) { struct fd f = fdget(fd); int error = -EBADF; - if (!f.file) - goto out; - - error = mnt_want_write_file(f.file); - if (error) - goto out_fput; - audit_file(f.file); - error = chown_common(&f.file->f_path, user, group); - mnt_drop_write_file(f.file); -out_fput: - fdput(f); -out: + if (f.file) { + error = vfs_fchown(f.file, user, group); + fdput(f); + } return error; } @@ -754,20 +771,13 @@ path_get(&f->f_path); f->f_inode = inode; f->f_mapping = inode->i_mapping; - - /* Ensure that we skip any errors that predate opening of the file */ f->f_wb_err = filemap_sample_wb_err(f->f_mapping); + f->f_sb_err = file_sample_sb_err(f); if (unlikely(f->f_flags & O_PATH)) { f->f_mode = FMODE_PATH | FMODE_OPENED; f->f_op = &empty_fops; return 0; - } - - /* Any file opened for execve()/uselib() has to be a regular file. */ - if (unlikely(f->f_flags & FMODE_EXEC && !S_ISREG(inode->i_mode))) { - error = -EACCES; - goto cleanup_file; } if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { @@ -787,10 +797,11 @@ f->f_mode |= FMODE_ATOMIC_POS; f->f_op = fops_get(inode->i_fop); - if (unlikely(WARN_ON(!f->f_op))) { + if (WARN_ON(!f->f_op)) { error = -ENODEV; goto cleanup_all; } + trace_android_vh_check_file_open(f); error = security_file_open(f); if (error) @@ -829,6 +840,23 @@ if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) return -EINVAL; } + + /* + * XXX: Huge page cache doesn't support writing yet. Drop all page + * cache for this file before processing writes. + */ + if (f->f_mode & FMODE_WRITE) { + /* + * Paired with smp_mb() in collapse_file() to ensure nr_thps + * is up to date and the update to i_writecount by + * get_write_access() is visible. Ensures subsequent insertion + * of THPs into the page cache will fail. + */ + smp_mb(); + if (filemap_nr_thps(inode->i_mapping)) + truncate_pagecache(inode, 0); + } + return 0; cleanup_all: @@ -955,24 +983,85 @@ } EXPORT_SYMBOL(open_with_fake_path); -static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) +#define WILL_CREATE(flags) (flags & (O_CREAT | __O_TMPFILE)) +#define O_PATH_FLAGS (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC) + +inline struct open_how build_open_how(int flags, umode_t mode) { + struct open_how how = { + .flags = flags & VALID_OPEN_FLAGS, + .mode = mode & S_IALLUGO, + }; + + /* O_PATH beats everything else. */ + if (how.flags & O_PATH) + how.flags &= O_PATH_FLAGS; + /* Modes should only be set for create-like flags. */ + if (!WILL_CREATE(how.flags)) + how.mode = 0; + return how; +} + +inline int build_open_flags(const struct open_how *how, struct open_flags *op) +{ + u64 flags = how->flags; + u64 strip = FMODE_NONOTIFY | O_CLOEXEC; int lookup_flags = 0; int acc_mode = ACC_MODE(flags); + BUILD_BUG_ON_MSG(upper_32_bits(VALID_OPEN_FLAGS), + "struct open_flags doesn't yet handle flags > 32 bits"); + /* - * Clear out all open flags we don't know about so that we don't report - * them in fcntl(F_GETFD) or similar interfaces. + * Strip flags that either shouldn't be set by userspace like + * FMODE_NONOTIFY or that aren't relevant in determining struct + * open_flags like O_CLOEXEC. */ - flags &= VALID_OPEN_FLAGS; + flags &= ~strip; - if (flags & (O_CREAT | __O_TMPFILE)) - op->mode = (mode & S_IALLUGO) | S_IFREG; - else + /* + * Older syscalls implicitly clear all of the invalid flags or argument + * values before calling build_open_flags(), but openat2(2) checks all + * of its arguments. + */ + if (flags & ~VALID_OPEN_FLAGS) + return -EINVAL; + if (how->resolve & ~VALID_RESOLVE_FLAGS) + return -EINVAL; + + /* Scoping flags are mutually exclusive. */ + if ((how->resolve & RESOLVE_BENEATH) && (how->resolve & RESOLVE_IN_ROOT)) + return -EINVAL; + + /* Deal with the mode. */ + if (WILL_CREATE(flags)) { + if (how->mode & ~S_IALLUGO) + return -EINVAL; + op->mode = how->mode | S_IFREG; + } else { + if (how->mode != 0) + return -EINVAL; op->mode = 0; + } - /* Must never be set by userspace */ - flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC; + /* + * In order to ensure programs get explicit errors when trying to use + * O_TMPFILE on old kernels, O_TMPFILE is implemented such that it + * looks like (O_DIRECTORY|O_RDWR & ~O_CREAT) to old kernels. But we + * have to require userspace to explicitly set it. + */ + if (flags & __O_TMPFILE) { + if ((flags & O_TMPFILE_MASK) != O_TMPFILE) + return -EINVAL; + if (!(acc_mode & MAY_WRITE)) + return -EINVAL; + } + if (flags & O_PATH) { + /* O_PATH only permits certain other flags to be set. */ + if (flags & ~O_PATH_FLAGS) + return -EINVAL; + acc_mode = 0; + } /* * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only @@ -982,20 +1071,6 @@ */ if (flags & __O_SYNC) flags |= O_DSYNC; - - if (flags & __O_TMPFILE) { - if ((flags & O_TMPFILE_MASK) != O_TMPFILE) - return -EINVAL; - if (!(acc_mode & MAY_WRITE)) - return -EINVAL; - } else if (flags & O_PATH) { - /* - * If we have O_PATH in the open flag. Then we - * cannot have anything other than the below set of flags - */ - flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH; - acc_mode = 0; - } op->open_flag = flags; @@ -1014,14 +1089,34 @@ if (flags & O_CREAT) { op->intent |= LOOKUP_CREATE; - if (flags & O_EXCL) + if (flags & O_EXCL) { op->intent |= LOOKUP_EXCL; + flags |= O_NOFOLLOW; + } } if (flags & O_DIRECTORY) lookup_flags |= LOOKUP_DIRECTORY; if (!(flags & O_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; + + if (how->resolve & RESOLVE_NO_XDEV) + lookup_flags |= LOOKUP_NO_XDEV; + if (how->resolve & RESOLVE_NO_MAGICLINKS) + lookup_flags |= LOOKUP_NO_MAGICLINKS; + if (how->resolve & RESOLVE_NO_SYMLINKS) + lookup_flags |= LOOKUP_NO_SYMLINKS; + if (how->resolve & RESOLVE_BENEATH) + lookup_flags |= LOOKUP_BENEATH; + if (how->resolve & RESOLVE_IN_ROOT) + lookup_flags |= LOOKUP_IN_ROOT; + if (how->resolve & RESOLVE_CACHED) { + /* Don't bother even trying for create/truncate/tmpfile open */ + if (flags & (O_TRUNC | O_CREAT | __O_TMPFILE)) + return -EAGAIN; + lookup_flags |= LOOKUP_CACHED; + } + op->lookup_flags = lookup_flags; return 0; } @@ -1040,8 +1135,11 @@ struct file *file_open_name(struct filename *name, int flags, umode_t mode) { struct open_flags op; - int err = build_open_flags(flags, mode, &op); - return err ? ERR_PTR(err) : do_filp_open(AT_FDCWD, name, &op); + struct open_how how = build_open_how(flags, mode); + int err = build_open_flags(&how, &op); + if (err) + return ERR_PTR(err); + return do_filp_open(AT_FDCWD, name, &op); } /** @@ -1066,23 +1164,45 @@ } return file; } -EXPORT_SYMBOL(filp_open); +EXPORT_SYMBOL_NS(filp_open, ANDROID_GKI_VFS_EXPORT_ONLY); + +/* ANDROID: Allow drivers to open only block files from kernel mode */ +struct file *filp_open_block(const char *filename, int flags, umode_t mode) +{ + struct file *file; + + file = filp_open(filename, flags, mode); + if (IS_ERR(file)) + goto err_out; + + /* Drivers should only be allowed to open block devices */ + if (!S_ISBLK(file->f_mapping->host->i_mode)) { + filp_close(file, NULL); + file = ERR_PTR(-ENOTBLK); + } + +err_out: + return file; +} +EXPORT_SYMBOL_GPL(filp_open_block); struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, const char *filename, int flags, umode_t mode) { struct open_flags op; - int err = build_open_flags(flags, mode, &op); + struct open_how how = build_open_how(flags, mode); + int err = build_open_flags(&how, &op); if (err) return ERR_PTR(err); return do_file_open_root(dentry, mnt, filename, &op); } EXPORT_SYMBOL(file_open_root); -long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) +static long do_sys_openat2(int dfd, const char __user *filename, + struct open_how *how) { struct open_flags op; - int fd = build_open_flags(flags, mode, &op); + int fd = build_open_flags(how, &op); struct filename *tmp; if (fd) @@ -1092,7 +1212,7 @@ if (IS_ERR(tmp)) return PTR_ERR(tmp); - fd = get_unused_fd_flags(flags); + fd = get_unused_fd_flags(how->flags); if (fd >= 0) { struct file *f = do_filp_open(dfd, tmp, &op); if (IS_ERR(f)) { @@ -1107,11 +1227,17 @@ return fd; } +long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) +{ + struct open_how how = build_open_how(flags, mode); + return do_sys_openat2(dfd, filename, &how); +} + + SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode) { if (force_o_largefile()) flags |= O_LARGEFILE; - return do_sys_open(AT_FDCWD, filename, flags, mode); } @@ -1120,8 +1246,30 @@ { if (force_o_largefile()) flags |= O_LARGEFILE; - return do_sys_open(dfd, filename, flags, mode); +} + +SYSCALL_DEFINE4(openat2, int, dfd, const char __user *, filename, + struct open_how __user *, how, size_t, usize) +{ + int err; + struct open_how tmp; + + BUILD_BUG_ON(sizeof(struct open_how) < OPEN_HOW_SIZE_VER0); + BUILD_BUG_ON(sizeof(struct open_how) != OPEN_HOW_SIZE_LATEST); + + if (unlikely(usize < OPEN_HOW_SIZE_VER0)) + return -EINVAL; + + err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize); + if (err) + return err; + + /* O_LARGEFILE is only allowed for non-O_PATH. */ + if (!(tmp.flags & O_PATH) && force_o_largefile()) + tmp.flags |= O_LARGEFILE; + + return do_sys_openat2(dfd, filename, &tmp); } #ifdef CONFIG_COMPAT @@ -1152,9 +1300,12 @@ */ SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode) { - return ksys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); -} + int flags = O_CREAT | O_WRONLY | O_TRUNC; + if (force_o_largefile()) + flags |= O_LARGEFILE; + return do_sys_open(AT_FDCWD, pathname, flags, mode); +} #endif /* @@ -1202,6 +1353,23 @@ return retval; } +/** + * close_range() - Close all file descriptors in a given range. + * + * @fd: starting file descriptor to close + * @max_fd: last file descriptor to close + * @flags: reserved for future extensions + * + * This closes a range of file descriptors. All file descriptors + * from @fd up to and including @max_fd are closed. + * Currently, errors to close a given file descriptor are ignored. + */ +SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd, + unsigned int, flags) +{ + return __close_range(fd, max_fd, flags); +} + /* * This routine simulates a hangup on the tty, to arrange that users * are given clean terminals at login time. @@ -1228,7 +1396,7 @@ return 0; } -EXPORT_SYMBOL(generic_file_open); +EXPORT_SYMBOL_NS(generic_file_open, ANDROID_GKI_VFS_EXPORT_ONLY); /* * This is used by subsystems that don't want seekable @@ -1247,8 +1415,9 @@ /* * stream_open is used by subsystems that want stream-like file descriptors. * Such file descriptors are not seekable and don't have notion of position - * (file.f_pos is always 0). Contrary to file descriptors of other regular - * files, .read() and .write() can run simultaneously. + * (file.f_pos is always 0 and ppos passed to .read()/.write() is always NULL). + * Contrary to file descriptors of other regular files, .read() and .write() + * can run simultaneously. * * stream_open never fails and is marked to return int so that it could be * directly used as file_operations.open . -- Gitblit v1.6.2