From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/fs/xfs/xfs_ioctl.c | 1372 +++++++++++++++++++++++++++++++++-------------------------- 1 files changed, 769 insertions(+), 603 deletions(-) diff --git a/kernel/fs/xfs/xfs_ioctl.c b/kernel/fs/xfs/xfs_ioctl.c index f123089..103fa83 100644 --- a/kernel/fs/xfs/xfs_ioctl.c +++ b/kernel/fs/xfs/xfs_ioctl.c @@ -11,9 +11,8 @@ #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" -#include "xfs_ioctl.h" -#include "xfs_alloc.h" #include "xfs_rtalloc.h" +#include "xfs_iwalk.h" #include "xfs_itable.h" #include "xfs_error.h" #include "xfs_attr.h" @@ -25,7 +24,6 @@ #include "xfs_export.h" #include "xfs_trace.h" #include "xfs_icache.h" -#include "xfs_symlink.h" #include "xfs_trans.h" #include "xfs_acl.h" #include "xfs_btree.h" @@ -33,15 +31,15 @@ #include "xfs_fsmap.h" #include "scrub/xfs_scrub.h" #include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_health.h" +#include "xfs_reflink.h" +#include "xfs_ioctl.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" -#include <linux/capability.h> -#include <linux/cred.h> -#include <linux/dcache.h> #include <linux/mount.h> #include <linux/namei.h> -#include <linux/pagemap.h> -#include <linux/slab.h> -#include <linux/exportfs.h> /* * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to @@ -73,7 +71,7 @@ return -EBADF; inode = file_inode(f.file); } else { - error = user_lpath((const char __user *)hreq->path, &path); + error = user_path_at(AT_FDCWD, hreq->path, 0, &path); if (error) return error; inode = d_inode(path.dentry); @@ -296,138 +294,173 @@ return error; } -int -xfs_set_dmattrs( - xfs_inode_t *ip, - uint evmask, - uint16_t state) +/* + * Format an attribute and copy it out to the user's buffer. + * Take care to check values and protect against them changing later, + * we may be reading them directly out of a user buffer. + */ +static void +xfs_ioc_attr_put_listent( + struct xfs_attr_list_context *context, + int flags, + unsigned char *name, + int namelen, + int valuelen) { - xfs_mount_t *mp = ip->i_mount; - xfs_trans_t *tp; - int error; + struct xfs_attrlist *alist = context->buffer; + struct xfs_attrlist_ent *aep; + int arraytop; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + ASSERT(!context->seen_enough); + ASSERT(context->count >= 0); + ASSERT(context->count < (ATTR_MAX_VALUELEN/8)); + ASSERT(context->firstu >= sizeof(*alist)); + ASSERT(context->firstu <= context->bufsize); - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; + /* + * Only list entries in the right namespace. + */ + if (context->attr_filter != (flags & XFS_ATTR_NSP_ONDISK_MASK)) + return; - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); - if (error) - return error; + arraytop = sizeof(*alist) + + context->count * sizeof(alist->al_offset[0]); - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + /* decrement by the actual bytes used by the attr */ + context->firstu -= round_up(offsetof(struct xfs_attrlist_ent, a_name) + + namelen + 1, sizeof(uint32_t)); + if (context->firstu < arraytop) { + trace_xfs_attr_list_full(context); + alist->al_more = 1; + context->seen_enough = 1; + return; + } - ip->i_d.di_dmevmask = evmask; - ip->i_d.di_dmstate = state; - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_trans_commit(tp); - - return error; + aep = context->buffer + context->firstu; + aep->a_valuelen = valuelen; + memcpy(aep->a_name, name, namelen); + aep->a_name[namelen] = 0; + alist->al_offset[context->count++] = context->firstu; + alist->al_count = context->count; + trace_xfs_attr_list_add(context); } -STATIC int -xfs_fssetdm_by_handle( - struct file *parfilp, - void __user *arg) +static unsigned int +xfs_attr_filter( + u32 ioc_flags) { - int error; - struct fsdmidata fsd; - xfs_fsop_setdm_handlereq_t dmhreq; - struct dentry *dentry; + if (ioc_flags & XFS_IOC_ATTR_ROOT) + return XFS_ATTR_ROOT; + if (ioc_flags & XFS_IOC_ATTR_SECURE) + return XFS_ATTR_SECURE; + return 0; +} - if (!capable(CAP_MKNOD)) - return -EPERM; - if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) +static unsigned int +xfs_attr_flags( + u32 ioc_flags) +{ + if (ioc_flags & XFS_IOC_ATTR_CREATE) + return XATTR_CREATE; + if (ioc_flags & XFS_IOC_ATTR_REPLACE) + return XATTR_REPLACE; + return 0; +} + +int +xfs_ioc_attr_list( + struct xfs_inode *dp, + void __user *ubuf, + size_t bufsize, + int flags, + struct xfs_attrlist_cursor __user *ucursor) +{ + struct xfs_attr_list_context context = { }; + struct xfs_attrlist *alist; + void *buffer; + int error; + + if (bufsize < sizeof(struct xfs_attrlist) || + bufsize > XFS_XATTR_LIST_MAX) + return -EINVAL; + + /* + * Reject flags, only allow namespaces. + */ + if (flags & ~(XFS_IOC_ATTR_ROOT | XFS_IOC_ATTR_SECURE)) + return -EINVAL; + if (flags == (XFS_IOC_ATTR_ROOT | XFS_IOC_ATTR_SECURE)) + return -EINVAL; + + /* + * Validate the cursor. + */ + if (copy_from_user(&context.cursor, ucursor, sizeof(context.cursor))) return -EFAULT; + if (context.cursor.pad1 || context.cursor.pad2) + return -EINVAL; + if (!context.cursor.initted && + (context.cursor.hashval || context.cursor.blkno || + context.cursor.offset)) + return -EINVAL; - error = mnt_want_write_file(parfilp); + buffer = kvzalloc(bufsize, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + + /* + * Initialize the output buffer. + */ + context.dp = dp; + context.resynch = 1; + context.attr_filter = xfs_attr_filter(flags); + context.buffer = buffer; + context.bufsize = round_down(bufsize, sizeof(uint32_t)); + context.firstu = context.bufsize; + context.put_listent = xfs_ioc_attr_put_listent; + + alist = context.buffer; + alist->al_count = 0; + alist->al_more = 0; + alist->al_offset[0] = context.bufsize; + + error = xfs_attr_list(&context); if (error) - return error; + goto out_free; - dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq); - if (IS_ERR(dentry)) { - mnt_drop_write_file(parfilp); - return PTR_ERR(dentry); - } - - if (IS_IMMUTABLE(d_inode(dentry)) || IS_APPEND(d_inode(dentry))) { - error = -EPERM; - goto out; - } - - if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) { + if (copy_to_user(ubuf, buffer, bufsize) || + copy_to_user(ucursor, &context.cursor, sizeof(context.cursor))) error = -EFAULT; - goto out; - } - - error = xfs_set_dmattrs(XFS_I(d_inode(dentry)), fsd.fsd_dmevmask, - fsd.fsd_dmstate); - - out: - mnt_drop_write_file(parfilp); - dput(dentry); +out_free: + kmem_free(buffer); return error; } STATIC int xfs_attrlist_by_handle( struct file *parfilp, - void __user *arg) + struct xfs_fsop_attrlist_handlereq __user *p) { - int error = -ENOMEM; - attrlist_cursor_kern_t *cursor; - struct xfs_fsop_attrlist_handlereq __user *p = arg; - xfs_fsop_attrlist_handlereq_t al_hreq; + struct xfs_fsop_attrlist_handlereq al_hreq; struct dentry *dentry; - char *kbuf; + int error = -ENOMEM; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) + if (copy_from_user(&al_hreq, p, sizeof(al_hreq))) return -EFAULT; - if (al_hreq.buflen < sizeof(struct attrlist) || - al_hreq.buflen > XFS_XATTR_LIST_MAX) - return -EINVAL; - - /* - * Reject flags, only allow namespaces. - */ - if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) - return -EINVAL; dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq); if (IS_ERR(dentry)) return PTR_ERR(dentry); - kbuf = kmem_zalloc_large(al_hreq.buflen, KM_SLEEP); - if (!kbuf) - goto out_dput; - - cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; - error = xfs_attr_list(XFS_I(d_inode(dentry)), kbuf, al_hreq.buflen, - al_hreq.flags, cursor); - if (error) - goto out_kfree; - - if (copy_to_user(&p->pos, cursor, sizeof(attrlist_cursor_kern_t))) { - error = -EFAULT; - goto out_kfree; - } - - if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen)) - error = -EFAULT; - -out_kfree: - kmem_free(kbuf); -out_dput: + error = xfs_ioc_attr_list(XFS_I(d_inode(dentry)), al_hreq.buffer, + al_hreq.buflen, al_hreq.flags, &p->pos); dput(dentry); return error; } -int +static int xfs_attrmulti_attr_get( struct inode *inode, unsigned char *name, @@ -435,28 +468,33 @@ uint32_t *len, uint32_t flags) { - unsigned char *kbuf; - int error = -EFAULT; + struct xfs_da_args args = { + .dp = XFS_I(inode), + .attr_filter = xfs_attr_filter(flags), + .attr_flags = xfs_attr_flags(flags), + .name = name, + .namelen = strlen(name), + .valuelen = *len, + }; + int error; if (*len > XFS_XATTR_SIZE_MAX) return -EINVAL; - kbuf = kmem_zalloc_large(*len, KM_SLEEP); - if (!kbuf) - return -ENOMEM; - error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags); + error = xfs_attr_get(&args); if (error) goto out_kfree; - if (copy_to_user(ubuf, kbuf, *len)) + *len = args.valuelen; + if (copy_to_user(ubuf, args.value, args.valuelen)) error = -EFAULT; out_kfree: - kmem_free(kbuf); + kmem_free(args.value); return error; } -int +static int xfs_attrmulti_attr_set( struct inode *inode, unsigned char *name, @@ -464,38 +502,75 @@ uint32_t len, uint32_t flags) { - unsigned char *kbuf; + struct xfs_da_args args = { + .dp = XFS_I(inode), + .attr_filter = xfs_attr_filter(flags), + .attr_flags = xfs_attr_flags(flags), + .name = name, + .namelen = strlen(name), + }; int error; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return -EPERM; - if (len > XFS_XATTR_SIZE_MAX) - return -EINVAL; - kbuf = memdup_user(ubuf, len); - if (IS_ERR(kbuf)) - return PTR_ERR(kbuf); + if (ubuf) { + if (len > XFS_XATTR_SIZE_MAX) + return -EINVAL; + args.value = memdup_user(ubuf, len); + if (IS_ERR(args.value)) + return PTR_ERR(args.value); + args.valuelen = len; + } - error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); - if (!error) - xfs_forget_acl(inode, name, flags); - kfree(kbuf); + error = xfs_attr_set(&args); + if (!error && (flags & XFS_IOC_ATTR_ROOT)) + xfs_forget_acl(inode, name); + kfree(args.value); return error; } int -xfs_attrmulti_attr_remove( +xfs_ioc_attrmulti_one( + struct file *parfilp, struct inode *inode, - unsigned char *name, + uint32_t opcode, + void __user *uname, + void __user *value, + uint32_t *len, uint32_t flags) { + unsigned char *name; int error; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return -EPERM; - error = xfs_attr_remove(XFS_I(inode), name, flags); - if (!error) - xfs_forget_acl(inode, name, flags); + if ((flags & XFS_IOC_ATTR_ROOT) && (flags & XFS_IOC_ATTR_SECURE)) + return -EINVAL; + + name = strndup_user(uname, MAXNAMELEN); + if (IS_ERR(name)) + return PTR_ERR(name); + + switch (opcode) { + case ATTR_OP_GET: + error = xfs_attrmulti_attr_get(inode, name, value, len, flags); + break; + case ATTR_OP_REMOVE: + value = NULL; + *len = 0; + /* fall through */ + case ATTR_OP_SET: + error = mnt_want_write_file(parfilp); + if (error) + break; + error = xfs_attrmulti_attr_set(inode, name, value, *len, flags); + mnt_drop_write_file(parfilp); + break; + default: + error = -EINVAL; + break; + } + + kfree(name); return error; } @@ -509,7 +584,6 @@ xfs_fsop_attrmulti_handlereq_t am_hreq; struct dentry *dentry; unsigned int i, size; - unsigned char *attr_name; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -535,56 +609,17 @@ goto out_dput; } - error = -ENOMEM; - attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); - if (!attr_name) - goto out_kfree_ops; - error = 0; for (i = 0; i < am_hreq.opcount; i++) { - ops[i].am_error = strncpy_from_user((char *)attr_name, - ops[i].am_attrname, MAXNAMELEN); - if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) - error = -ERANGE; - if (ops[i].am_error < 0) - break; - - switch (ops[i].am_opcode) { - case ATTR_OP_GET: - ops[i].am_error = xfs_attrmulti_attr_get( - d_inode(dentry), attr_name, - ops[i].am_attrvalue, &ops[i].am_length, - ops[i].am_flags); - break; - case ATTR_OP_SET: - ops[i].am_error = mnt_want_write_file(parfilp); - if (ops[i].am_error) - break; - ops[i].am_error = xfs_attrmulti_attr_set( - d_inode(dentry), attr_name, - ops[i].am_attrvalue, ops[i].am_length, - ops[i].am_flags); - mnt_drop_write_file(parfilp); - break; - case ATTR_OP_REMOVE: - ops[i].am_error = mnt_want_write_file(parfilp); - if (ops[i].am_error) - break; - ops[i].am_error = xfs_attrmulti_attr_remove( - d_inode(dentry), attr_name, - ops[i].am_flags); - mnt_drop_write_file(parfilp); - break; - default: - ops[i].am_error = -EINVAL; - } + ops[i].am_error = xfs_ioc_attrmulti_one(parfilp, + d_inode(dentry), ops[i].am_opcode, + ops[i].am_attrname, ops[i].am_attrvalue, + &ops[i].am_length, ops[i].am_flags); } if (copy_to_user(am_hreq.ops, ops, size)) error = -EFAULT; - kfree(attr_name); - out_kfree_ops: kfree(ops); out_dput: dput(dentry); @@ -594,23 +629,14 @@ int xfs_ioc_space( struct file *filp, - unsigned int cmd, xfs_flock64_t *bf) { struct inode *inode = file_inode(filp); struct xfs_inode *ip = XFS_I(inode); struct iattr iattr; - enum xfs_prealloc_flags flags = 0; + enum xfs_prealloc_flags flags = XFS_PREALLOC_CLEAR; uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; int error; - - /* - * Only allow the sys admin to reserve space unless - * unwritten extents are enabled. - */ - if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) && - !capable(CAP_SYS_ADMIN)) - return -EPERM; if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) return -EPERM; @@ -620,6 +646,9 @@ if (!S_ISREG(inode->i_mode)) return -EINVAL; + + if (xfs_is_always_cow_inode(ip)) + return -EOPNOTSUPP; if (filp->f_flags & O_DSYNC) flags |= XFS_PREALLOC_SYNC; @@ -634,6 +663,7 @@ error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP); if (error) goto out_unlock; + inode_dio_wait(inode); switch (bf->l_whence) { case 0: /*SEEK_SET*/ @@ -649,74 +679,22 @@ goto out_unlock; } - /* - * length of <= 0 for resv/unresv/zero is invalid. length for - * alloc/free is ignored completely and we have no idea what userspace - * might have set it to, so set it to zero to allow range - * checks to pass. - */ - switch (cmd) { - case XFS_IOC_ZERO_RANGE: - case XFS_IOC_RESVSP: - case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP: - case XFS_IOC_UNRESVSP64: - if (bf->l_len <= 0) { - error = -EINVAL; - goto out_unlock; - } - break; - default: - bf->l_len = 0; - break; - } - - if (bf->l_start < 0 || - bf->l_start > inode->i_sb->s_maxbytes || - bf->l_start + bf->l_len < 0 || - bf->l_start + bf->l_len >= inode->i_sb->s_maxbytes) { + if (bf->l_start < 0 || bf->l_start > inode->i_sb->s_maxbytes) { error = -EINVAL; goto out_unlock; } - switch (cmd) { - case XFS_IOC_ZERO_RANGE: - flags |= XFS_PREALLOC_SET; - error = xfs_zero_file_space(ip, bf->l_start, bf->l_len); - break; - case XFS_IOC_RESVSP: - case XFS_IOC_RESVSP64: - flags |= XFS_PREALLOC_SET; - error = xfs_alloc_file_space(ip, bf->l_start, bf->l_len, - XFS_BMAPI_PREALLOC); - break; - case XFS_IOC_UNRESVSP: - case XFS_IOC_UNRESVSP64: - error = xfs_free_file_space(ip, bf->l_start, bf->l_len); - break; - case XFS_IOC_ALLOCSP: - case XFS_IOC_ALLOCSP64: - case XFS_IOC_FREESP: - case XFS_IOC_FREESP64: - flags |= XFS_PREALLOC_CLEAR; - if (bf->l_start > XFS_ISIZE(ip)) { - error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), - bf->l_start - XFS_ISIZE(ip), - XFS_BMAPI_PREALLOC); - if (error) - goto out_unlock; - } - - iattr.ia_valid = ATTR_SIZE; - iattr.ia_size = bf->l_start; - - error = xfs_vn_setattr_size(file_dentry(filp), &iattr); - break; - default: - ASSERT(0); - error = -EINVAL; + if (bf->l_start > XFS_ISIZE(ip)) { + error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), + bf->l_start - XFS_ISIZE(ip), + XFS_BMAPI_PREALLOC); + if (error) + goto out_unlock; } + iattr.ia_valid = ATTR_SIZE; + iattr.ia_size = bf->l_start; + error = xfs_vn_setattr_size(file_dentry(filp), &iattr); if (error) goto out_unlock; @@ -728,16 +706,45 @@ return error; } +/* Return 0 on success or positive error */ +int +xfs_fsbulkstat_one_fmt( + struct xfs_ibulk *breq, + const struct xfs_bulkstat *bstat) +{ + struct xfs_bstat bs1; + + xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat); + if (copy_to_user(breq->ubuffer, &bs1, sizeof(bs1))) + return -EFAULT; + return xfs_ibulk_advance(breq, sizeof(struct xfs_bstat)); +} + +int +xfs_fsinumbers_fmt( + struct xfs_ibulk *breq, + const struct xfs_inumbers *igrp) +{ + struct xfs_inogrp ig1; + + xfs_inumbers_to_inogrp(&ig1, igrp); + if (copy_to_user(breq->ubuffer, &ig1, sizeof(struct xfs_inogrp))) + return -EFAULT; + return xfs_ibulk_advance(breq, sizeof(struct xfs_inogrp)); +} + STATIC int -xfs_ioc_bulkstat( +xfs_ioc_fsbulkstat( xfs_mount_t *mp, unsigned int cmd, void __user *arg) { - xfs_fsop_bulkreq_t bulkreq; - int count; /* # of records returned */ - xfs_ino_t inlast; /* last inode number */ - int done; + struct xfs_fsop_bulkreq bulkreq; + struct xfs_ibulk breq = { + .mp = mp, + .ocount = 0, + }; + xfs_ino_t lastino; int error; /* done = 1 if there are more stats to get and if bulkstat */ @@ -749,79 +756,291 @@ if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) + if (copy_from_user(&bulkreq, arg, sizeof(struct xfs_fsop_bulkreq))) return -EFAULT; - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) return -EFAULT; - if ((count = bulkreq.icount) <= 0) + if (bulkreq.icount <= 0) return -EINVAL; if (bulkreq.ubuffer == NULL) return -EINVAL; - if (cmd == XFS_IOC_FSINUMBERS) - error = xfs_inumbers(mp, &inlast, &count, - bulkreq.ubuffer, xfs_inumbers_fmt); - else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) - error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer, - sizeof(xfs_bstat_t), NULL, &done); - else /* XFS_IOC_FSBULKSTAT */ - error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, - sizeof(xfs_bstat_t), bulkreq.ubuffer, - &done); + breq.ubuffer = bulkreq.ubuffer; + breq.icount = bulkreq.icount; + + /* + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect + * that *lastip contains either zero or the number of the last inode to + * be examined by the previous call and return results starting with + * the next inode after that. The new bulk request back end functions + * take the inode to start with, so we have to compute the startino + * parameter from lastino to maintain correct function. lastino == 0 + * is a special case because it has traditionally meant "first inode + * in filesystem". + */ + if (cmd == XFS_IOC_FSINUMBERS) { + breq.startino = lastino ? lastino + 1 : 0; + error = xfs_inumbers(&breq, xfs_fsinumbers_fmt); + lastino = breq.startino - 1; + } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) { + breq.startino = lastino; + breq.icount = 1; + error = xfs_bulkstat_one(&breq, xfs_fsbulkstat_one_fmt); + } else { /* XFS_IOC_FSBULKSTAT */ + breq.startino = lastino ? lastino + 1 : 0; + error = xfs_bulkstat(&breq, xfs_fsbulkstat_one_fmt); + lastino = breq.startino - 1; + } if (error) return error; - if (bulkreq.ocount != NULL) { - if (copy_to_user(bulkreq.lastip, &inlast, - sizeof(xfs_ino_t))) - return -EFAULT; + if (bulkreq.lastip != NULL && + copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t))) + return -EFAULT; - if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) - return -EFAULT; + if (bulkreq.ocount != NULL && + copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32))) + return -EFAULT; + + return 0; +} + +/* Return 0 on success or positive error */ +static int +xfs_bulkstat_fmt( + struct xfs_ibulk *breq, + const struct xfs_bulkstat *bstat) +{ + if (copy_to_user(breq->ubuffer, bstat, sizeof(struct xfs_bulkstat))) + return -EFAULT; + return xfs_ibulk_advance(breq, sizeof(struct xfs_bulkstat)); +} + +/* + * Check the incoming bulk request @hdr from userspace and initialize the + * internal @breq bulk request appropriately. Returns 0 if the bulk request + * should proceed; -ECANCELED if there's nothing to do; or the usual + * negative error code. + */ +static int +xfs_bulk_ireq_setup( + struct xfs_mount *mp, + struct xfs_bulk_ireq *hdr, + struct xfs_ibulk *breq, + void __user *ubuffer) +{ + if (hdr->icount == 0 || + (hdr->flags & ~XFS_BULK_IREQ_FLAGS_ALL) || + memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved))) + return -EINVAL; + + breq->startino = hdr->ino; + breq->ubuffer = ubuffer; + breq->icount = hdr->icount; + breq->ocount = 0; + breq->flags = 0; + + /* + * The @ino parameter is a special value, so we must look it up here. + * We're not allowed to have IREQ_AGNO, and we only return one inode + * worth of data. + */ + if (hdr->flags & XFS_BULK_IREQ_SPECIAL) { + if (hdr->flags & XFS_BULK_IREQ_AGNO) + return -EINVAL; + + switch (hdr->ino) { + case XFS_BULK_IREQ_SPECIAL_ROOT: + hdr->ino = mp->m_sb.sb_rootino; + break; + default: + return -EINVAL; + } + breq->icount = 1; } + + /* + * The IREQ_AGNO flag means that we only want results from a given AG. + * If @hdr->ino is zero, we start iterating in that AG. If @hdr->ino is + * beyond the specified AG then we return no results. + */ + if (hdr->flags & XFS_BULK_IREQ_AGNO) { + if (hdr->agno >= mp->m_sb.sb_agcount) + return -EINVAL; + + if (breq->startino == 0) + breq->startino = XFS_AGINO_TO_INO(mp, hdr->agno, 0); + else if (XFS_INO_TO_AGNO(mp, breq->startino) < hdr->agno) + return -EINVAL; + + breq->flags |= XFS_IBULK_SAME_AG; + + /* Asking for an inode past the end of the AG? We're done! */ + if (XFS_INO_TO_AGNO(mp, breq->startino) > hdr->agno) + return -ECANCELED; + } else if (hdr->agno) + return -EINVAL; + + /* Asking for an inode past the end of the FS? We're done! */ + if (XFS_INO_TO_AGNO(mp, breq->startino) >= mp->m_sb.sb_agcount) + return -ECANCELED; + + return 0; +} + +/* + * Update the userspace bulk request @hdr to reflect the end state of the + * internal bulk request @breq. + */ +static void +xfs_bulk_ireq_teardown( + struct xfs_bulk_ireq *hdr, + struct xfs_ibulk *breq) +{ + hdr->ino = breq->startino; + hdr->ocount = breq->ocount; +} + +/* Handle the v5 bulkstat ioctl. */ +STATIC int +xfs_ioc_bulkstat( + struct xfs_mount *mp, + unsigned int cmd, + struct xfs_bulkstat_req __user *arg) +{ + struct xfs_bulk_ireq hdr; + struct xfs_ibulk breq = { + .mp = mp, + }; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr))) + return -EFAULT; + + error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->bulkstat); + if (error == -ECANCELED) + goto out_teardown; + if (error < 0) + return error; + + error = xfs_bulkstat(&breq, xfs_bulkstat_fmt); + if (error) + return error; + +out_teardown: + xfs_bulk_ireq_teardown(&hdr, &breq); + if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr))) + return -EFAULT; return 0; } STATIC int -xfs_ioc_fsgeometry_v1( - xfs_mount_t *mp, - void __user *arg) +xfs_inumbers_fmt( + struct xfs_ibulk *breq, + const struct xfs_inumbers *igrp) { - xfs_fsop_geom_t fsgeo; - int error; + if (copy_to_user(breq->ubuffer, igrp, sizeof(struct xfs_inumbers))) + return -EFAULT; + return xfs_ibulk_advance(breq, sizeof(struct xfs_inumbers)); +} - error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 3); +/* Handle the v5 inumbers ioctl. */ +STATIC int +xfs_ioc_inumbers( + struct xfs_mount *mp, + unsigned int cmd, + struct xfs_inumbers_req __user *arg) +{ + struct xfs_bulk_ireq hdr; + struct xfs_ibulk breq = { + .mp = mp, + }; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr))) + return -EFAULT; + + error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->inumbers); + if (error == -ECANCELED) + goto out_teardown; + if (error < 0) + return error; + + error = xfs_inumbers(&breq, xfs_inumbers_fmt); if (error) return error; - /* - * Caller should have passed an argument of type - * xfs_fsop_geom_v1_t. This is a proper subset of the - * xfs_fsop_geom_t that xfs_fs_geometry() fills in. - */ - if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t))) +out_teardown: + xfs_bulk_ireq_teardown(&hdr, &breq); + if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr))) return -EFAULT; + return 0; } STATIC int xfs_ioc_fsgeometry( - xfs_mount_t *mp, + struct xfs_mount *mp, + void __user *arg, + int struct_version) +{ + struct xfs_fsop_geom fsgeo; + size_t len; + + xfs_fs_geometry(&mp->m_sb, &fsgeo, struct_version); + + if (struct_version <= 3) + len = sizeof(struct xfs_fsop_geom_v1); + else if (struct_version == 4) + len = sizeof(struct xfs_fsop_geom_v4); + else { + xfs_fsop_geom_health(mp, &fsgeo); + len = sizeof(fsgeo); + } + + if (copy_to_user(arg, &fsgeo, len)) + return -EFAULT; + return 0; +} + +STATIC int +xfs_ioc_ag_geometry( + struct xfs_mount *mp, void __user *arg) { - xfs_fsop_geom_t fsgeo; + struct xfs_ag_geometry ageo; int error; - error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 4); + if (copy_from_user(&ageo, arg, sizeof(ageo))) + return -EFAULT; + if (ageo.ag_flags) + return -EINVAL; + if (memchr_inv(&ageo.ag_reserved, 0, sizeof(ageo.ag_reserved))) + return -EINVAL; + + error = xfs_ag_get_geometry(mp, ageo.ag_number, &ageo); if (error) return error; - if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) + if (copy_to_user(arg, &ageo, sizeof(ageo))) return -EFAULT; return 0; } @@ -857,13 +1076,18 @@ xflags |= FS_XFLAG_NODUMP; else xflags &= ~FS_XFLAG_NODUMP; + if (flags & FS_DAX_FL) + xflags |= FS_XFLAG_DAX; + else + xflags &= ~FS_XFLAG_DAX; return xflags; } STATIC unsigned int xfs_di2lxflags( - uint16_t di_flags) + uint16_t di_flags, + uint64_t di_flags2) { unsigned int flags = 0; @@ -877,7 +1101,29 @@ flags |= FS_NOATIME_FL; if (di_flags & XFS_DIFLAG_NODUMP) flags |= FS_NODUMP_FL; + if (di_flags2 & XFS_DIFLAG2_DAX) { + flags |= FS_DAX_FL; + } return flags; +} + +static void +xfs_fill_fsxattr( + struct xfs_inode *ip, + bool attr, + struct fsxattr *fa) +{ + struct xfs_ifork *ifp = attr ? ip->i_afp : &ip->i_df; + + simple_fill_fsxattr(fa, xfs_ip2xflags(ip)); + fa->fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; + fa->fsx_cowextsize = ip->i_d.di_cowextsize << + ip->i_mount->m_sb.sb_blocklog; + fa->fsx_projid = ip->i_d.di_projid; + if (ifp && (ifp->if_flags & XFS_IFEXTENTS)) + fa->fsx_nextents = xfs_iext_count(ifp); + else + fa->fsx_nextents = xfs_ifork_nextents(ifp); } STATIC int @@ -888,29 +1134,8 @@ { struct fsxattr fa; - memset(&fa, 0, sizeof(struct fsxattr)); - xfs_ilock(ip, XFS_ILOCK_SHARED); - fa.fsx_xflags = xfs_ip2xflags(ip); - fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; - fa.fsx_cowextsize = ip->i_d.di_cowextsize << - ip->i_mount->m_sb.sb_blocklog; - fa.fsx_projid = xfs_get_projid(ip); - - if (attr) { - if (ip->i_afp) { - if (ip->i_afp->if_flags & XFS_IFEXTENTS) - fa.fsx_nextents = xfs_iext_count(ip->i_afp); - else - fa.fsx_nextents = ip->i_d.di_anextents; - } else - fa.fsx_nextents = 0; - } else { - if (ip->i_df.if_flags & XFS_IFEXTENTS) - fa.fsx_nextents = xfs_iext_count(&ip->i_df); - else - fa.fsx_nextents = ip->i_d.di_nextents; - } + xfs_fill_fsxattr(ip, attr, &fa); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (copy_to_user(arg, &fa, sizeof(fa))) @@ -966,7 +1191,8 @@ unsigned int xflags) { uint64_t di_flags2 = - (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK); + (ip->i_d.di_flags2 & (XFS_DIFLAG2_REFLINK | + XFS_DIFLAG2_BIGTIME)); if (xflags & FS_XFLAG_DAX) di_flags2 |= XFS_DIFLAG2_DAX; @@ -974,37 +1200,6 @@ di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; return di_flags2; -} - -STATIC void -xfs_diflags_to_linux( - struct xfs_inode *ip) -{ - struct inode *inode = VFS_I(ip); - unsigned int xflags = xfs_ip2xflags(ip); - - if (xflags & FS_XFLAG_IMMUTABLE) - inode->i_flags |= S_IMMUTABLE; - else - inode->i_flags &= ~S_IMMUTABLE; - if (xflags & FS_XFLAG_APPEND) - inode->i_flags |= S_APPEND; - else - inode->i_flags &= ~S_APPEND; - if (xflags & FS_XFLAG_SYNC) - inode->i_flags |= S_SYNC; - else - inode->i_flags &= ~S_SYNC; - if (xflags & FS_XFLAG_NOATIME) - inode->i_flags |= S_NOATIME; - else - inode->i_flags &= ~S_NOATIME; -#if 0 /* disabled until the flag switching races are sorted out */ - if (xflags & FS_XFLAG_DAX) - inode->i_flags |= S_DAX; - else - inode->i_flags &= ~S_DAX; -#endif } static int @@ -1017,7 +1212,7 @@ uint64_t di_flags2; /* Can't change realtime flag if any extents are allocated. */ - if ((ip->i_d.di_nextents || ip->i_delayed_blks) && + if ((ip->i_df.if_nextents || ip->i_delayed_blks) && XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME)) return -EINVAL; @@ -1036,90 +1231,41 @@ if ((fa->fsx_xflags & FS_XFLAG_DAX) && xfs_is_reflink_inode(ip)) return -EINVAL; - /* - * Can't modify an immutable/append-only file unless - * we have appropriate permission. - */ - if (((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND)) || - (fa->fsx_xflags & (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND))) && - !capable(CAP_LINUX_IMMUTABLE)) - return -EPERM; - /* diflags2 only valid for v3 inodes. */ di_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags); - if (di_flags2 && ip->i_d.di_version < 3) + if (di_flags2 && !xfs_sb_version_has_v3inode(&mp->m_sb)) return -EINVAL; ip->i_d.di_flags = xfs_flags2diflags(ip, fa->fsx_xflags); ip->i_d.di_flags2 = di_flags2; - xfs_diflags_to_linux(ip); + xfs_diflags_to_iflags(ip, false); xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(mp, xs_ig_attrchg); return 0; } -/* - * If we are changing DAX flags, we have to ensure the file is clean and any - * cached objects in the address space are invalidated and removed. This - * requires us to lock out other IO and page faults similar to a truncate - * operation. The locks need to be held until the transaction has been committed - * so that the cache invalidation is atomic with respect to the DAX flag - * manipulation. - */ -static int -xfs_ioctl_setattr_dax_invalidate( +static void +xfs_ioctl_setattr_prepare_dax( struct xfs_inode *ip, - struct fsxattr *fa, - int *join_flags) + struct fsxattr *fa) { - struct inode *inode = VFS_I(ip); - struct super_block *sb = inode->i_sb; - int error; - - *join_flags = 0; - - /* - * It is only valid to set the DAX flag on regular files and - * directories on filesystems where the block size is equal to the page - * size. On directories it serves as an inherited hint so we don't - * have to check the device for dax support or flush pagecache. - */ - if (fa->fsx_xflags & FS_XFLAG_DAX) { - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) - return -EINVAL; - if (S_ISREG(inode->i_mode) && - !bdev_dax_supported(xfs_find_bdev_for_inode(VFS_I(ip)), - sb->s_blocksize)) - return -EINVAL; - } - - /* If the DAX state is not changing, we have nothing to do here. */ - if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_DAX(inode)) - return 0; - if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode)) - return 0; + struct xfs_mount *mp = ip->i_mount; + struct inode *inode = VFS_I(ip); if (S_ISDIR(inode->i_mode)) - return 0; + return; - /* lock, flush and invalidate mapping in preparation for flag change */ - xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL); - error = filemap_write_and_wait(inode->i_mapping); - if (error) - goto out_unlock; - error = invalidate_inode_pages2(inode->i_mapping); - if (error) - goto out_unlock; + if ((mp->m_flags & XFS_MOUNT_DAX_ALWAYS) || + (mp->m_flags & XFS_MOUNT_DAX_NEVER)) + return; - *join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL; - return 0; - -out_unlock: - xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL); - return error; - + if (((fa->fsx_xflags & FS_XFLAG_DAX) && + !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) || + (!(fa->fsx_xflags & FS_XFLAG_DAX) && + (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX))) + d_mark_dontcache(inode); } /* @@ -1127,17 +1273,10 @@ * have permission to do so. On success, return a clean transaction and the * inode locked exclusively ready for further operation specific checks. On * failure, return an error without modifying or locking the inode. - * - * The inode might already be IO locked on call. If this is the case, it is - * indicated in @join_flags and we take full responsibility for ensuring they - * are unlocked from now on. Hence if we have an error here, we still have to - * unlock them. Otherwise, once they are joined to the transaction, they will - * be unlocked on commit/cancel. */ static struct xfs_trans * xfs_ioctl_setattr_get_trans( - struct xfs_inode *ip, - int join_flags) + struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; @@ -1151,11 +1290,10 @@ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); if (error) - return ERR_PTR(error); + goto out_unlock; xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags); - join_flags = 0; + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); /* * CAP_FOWNER overrides the following restrictions: @@ -1176,8 +1314,6 @@ out_cancel: xfs_trans_cancel(tp); out_unlock: - if (join_flags) - xfs_iunlock(ip, join_flags); return ERR_PTR(error); } @@ -1203,39 +1339,31 @@ struct fsxattr *fa) { struct xfs_mount *mp = ip->i_mount; + xfs_extlen_t size; + xfs_fsblock_t extsize_fsb; - if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(VFS_I(ip)->i_mode)) - return -EINVAL; - - if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) && - !S_ISDIR(VFS_I(ip)->i_mode)) - return -EINVAL; - - if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_d.di_nextents && + if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents && ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) return -EINVAL; - if (fa->fsx_extsize != 0) { - xfs_extlen_t size; - xfs_fsblock_t extsize_fsb; + if (fa->fsx_extsize == 0) + return 0; - extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); - if (extsize_fsb > MAXEXTLEN) + extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); + if (extsize_fsb > MAXEXTLEN) + return -EINVAL; + + if (XFS_IS_REALTIME_INODE(ip) || + (fa->fsx_xflags & FS_XFLAG_REALTIME)) { + size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; + } else { + size = mp->m_sb.sb_blocksize; + if (extsize_fsb > mp->m_sb.sb_agblocks / 2) return -EINVAL; + } - if (XFS_IS_REALTIME_INODE(ip) || - (fa->fsx_xflags & FS_XFLAG_REALTIME)) { - size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; - } else { - size = mp->m_sb.sb_blocksize; - if (extsize_fsb > mp->m_sb.sb_agblocks / 2) - return -EINVAL; - } - - if (fa->fsx_extsize % size) - return -EINVAL; - } else - fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT); + if (fa->fsx_extsize % size) + return -EINVAL; return 0; } @@ -1261,33 +1389,28 @@ struct fsxattr *fa) { struct xfs_mount *mp = ip->i_mount; + xfs_extlen_t size; + xfs_fsblock_t cowextsize_fsb; if (!(fa->fsx_xflags & FS_XFLAG_COWEXTSIZE)) return 0; - if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb) || - ip->i_d.di_version != 3) + if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb)) return -EINVAL; - if (!S_ISREG(VFS_I(ip)->i_mode) && !S_ISDIR(VFS_I(ip)->i_mode)) + if (fa->fsx_cowextsize == 0) + return 0; + + cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize); + if (cowextsize_fsb > MAXEXTLEN) return -EINVAL; - if (fa->fsx_cowextsize != 0) { - xfs_extlen_t size; - xfs_fsblock_t cowextsize_fsb; + size = mp->m_sb.sb_blocksize; + if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2) + return -EINVAL; - cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize); - if (cowextsize_fsb > MAXEXTLEN) - return -EINVAL; - - size = mp->m_sb.sb_blocksize; - if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2) - return -EINVAL; - - if (fa->fsx_cowextsize % size) - return -EINVAL; - } else - fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE; + if (fa->fsx_cowextsize % size) + return -EINVAL; return 0; } @@ -1301,21 +1424,6 @@ if (fa->fsx_projid > (uint16_t)-1 && !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) return -EINVAL; - - /* - * Project Quota ID state is only allowed to change from within the init - * namespace. Enforce that restriction only if we are trying to change - * the quota ID state. Everything else is allowed in user namespaces. - */ - if (current_user_ns() == &init_user_ns) - return 0; - - if (xfs_get_projid(ip) != fa->fsx_projid) - return -EINVAL; - if ((fa->fsx_xflags & FS_XFLAG_PROJINHERIT) != - (ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)) - return -EINVAL; - return 0; } @@ -1324,13 +1432,12 @@ xfs_inode_t *ip, struct fsxattr *fa) { + struct fsxattr old_fa; struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; - struct xfs_dquot *udqp = NULL; struct xfs_dquot *pdqp = NULL; struct xfs_dquot *olddquot = NULL; int code; - int join_flags = 0; trace_xfs_ioctl_setattr(ip); @@ -1347,38 +1454,33 @@ * because the i_*dquot fields will get updated anyway. */ if (XFS_IS_QUOTA_ON(mp)) { - code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, - ip->i_d.di_gid, fa->fsx_projid, - XFS_QMOPT_PQUOTA, &udqp, NULL, &pdqp); + code = xfs_qm_vop_dqalloc(ip, VFS_I(ip)->i_uid, + VFS_I(ip)->i_gid, fa->fsx_projid, + XFS_QMOPT_PQUOTA, NULL, NULL, &pdqp); if (code) return code; } - /* - * Changing DAX config may require inode locking for mapping - * invalidation. These need to be held all the way to transaction commit - * or cancel time, so need to be passed through to - * xfs_ioctl_setattr_get_trans() so it can apply them to the join call - * appropriately. - */ - code = xfs_ioctl_setattr_dax_invalidate(ip, fa, &join_flags); - if (code) - goto error_free_dquots; + xfs_ioctl_setattr_prepare_dax(ip, fa); - tp = xfs_ioctl_setattr_get_trans(ip, join_flags); + tp = xfs_ioctl_setattr_get_trans(ip); if (IS_ERR(tp)) { code = PTR_ERR(tp); goto error_free_dquots; } - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) && - xfs_get_projid(ip) != fa->fsx_projid) { - code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, pdqp, + ip->i_d.di_projid != fa->fsx_projid) { + code = xfs_qm_vop_chown_reserve(tp, ip, NULL, NULL, pdqp, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (code) /* out of quota */ goto error_trans_cancel; } + + xfs_fill_fsxattr(ip, false, &old_fa); + code = vfs_ioc_fssetxattr_check(VFS_I(ip), &old_fa, fa); + if (code) + goto error_trans_cancel; code = xfs_ioctl_setattr_check_extsize(ip, fa); if (code) @@ -1405,13 +1507,12 @@ VFS_I(ip)->i_mode &= ~(S_ISUID|S_ISGID); /* Change the ownerships and register project quota modifications */ - if (xfs_get_projid(ip) != fa->fsx_projid) { + if (ip->i_d.di_projid != fa->fsx_projid) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { olddquot = xfs_qm_vop_chown(tp, ip, &ip->i_pdquot, pdqp); } - ASSERT(ip->i_d.di_version > 1); - xfs_set_projid(ip, fa->fsx_projid); + ip->i_d.di_projid = fa->fsx_projid; } /* @@ -1423,7 +1524,7 @@ ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; else ip->i_d.di_extsize = 0; - if (ip->i_d.di_version == 3 && + if (xfs_sb_version_has_v3inode(&mp->m_sb) && (ip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) ip->i_d.di_cowextsize = fa->fsx_cowextsize >> mp->m_sb.sb_blocklog; @@ -1436,7 +1537,6 @@ * Release any dquot(s) the inode had kept before chown. */ xfs_qm_dqrele(olddquot); - xfs_qm_dqrele(udqp); xfs_qm_dqrele(pdqp); return code; @@ -1444,7 +1544,6 @@ error_trans_cancel: xfs_trans_cancel(tp); error_free_dquots: - xfs_qm_dqrele(udqp); xfs_qm_dqrele(pdqp); return code; } @@ -1476,7 +1575,7 @@ { unsigned int flags; - flags = xfs_di2lxflags(ip->i_d.di_flags); + flags = xfs_di2lxflags(ip->i_d.di_flags, ip->i_d.di_flags2); if (copy_to_user(arg, &flags, sizeof(flags))) return -EFAULT; return 0; @@ -1490,8 +1589,8 @@ { struct xfs_trans *tp; struct fsxattr fa; + struct fsxattr old_fa; unsigned int flags; - int join_flags = 0; int error; if (copy_from_user(&flags, arg, sizeof(flags))) @@ -1499,7 +1598,7 @@ if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ FS_NOATIME_FL | FS_NODUMP_FL | \ - FS_SYNC_FL)) + FS_SYNC_FL | FS_DAX_FL)) return -EOPNOTSUPP; fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); @@ -1508,20 +1607,18 @@ if (error) return error; - /* - * Changing DAX config may require inode locking for mapping - * invalidation. These need to be held all the way to transaction commit - * or cancel time, so need to be passed through to - * xfs_ioctl_setattr_get_trans() so it can apply them to the join call - * appropriately. - */ - error = xfs_ioctl_setattr_dax_invalidate(ip, &fa, &join_flags); - if (error) - goto out_drop_write; + xfs_ioctl_setattr_prepare_dax(ip, &fa); - tp = xfs_ioctl_setattr_get_trans(ip, join_flags); + tp = xfs_ioctl_setattr_get_trans(ip); if (IS_ERR(tp)) { error = PTR_ERR(tp); + goto out_drop_write; + } + + xfs_fill_fsxattr(ip, false, &old_fa); + error = vfs_ioc_fssetxattr_check(VFS_I(ip), &old_fa, &fa); + if (error) { + xfs_trans_cancel(tp); goto out_drop_write; } @@ -1592,10 +1689,10 @@ if (bmx.bmv_count < 2) return -EINVAL; - if (bmx.bmv_count > ULONG_MAX / recsize) + if (bmx.bmv_count >= INT_MAX / recsize) return -ENOMEM; - buf = kmem_zalloc_large(bmx.bmv_count * sizeof(*buf), 0); + buf = kvzalloc(bmx.bmv_count * sizeof(*buf), GFP_KERNEL); if (!buf) return -ENOMEM; @@ -1620,39 +1717,17 @@ return error; } -struct getfsmap_info { - struct xfs_mount *mp; - struct fsmap_head __user *data; - unsigned int idx; - __u32 last_flags; -}; - -STATIC int -xfs_getfsmap_format(struct xfs_fsmap *xfm, void *priv) -{ - struct getfsmap_info *info = priv; - struct fsmap fm; - - trace_xfs_getfsmap_mapping(info->mp, xfm); - - info->last_flags = xfm->fmr_flags; - xfs_fsmap_from_internal(&fm, xfm); - if (copy_to_user(&info->data->fmh_recs[info->idx++], &fm, - sizeof(struct fsmap))) - return -EFAULT; - - return 0; -} - STATIC int xfs_ioc_getfsmap( struct xfs_inode *ip, struct fsmap_head __user *arg) { - struct getfsmap_info info = { NULL }; struct xfs_fsmap_head xhead = {0}; struct fsmap_head head; - bool aborted = false; + struct fsmap *recs; + unsigned int count; + __u32 last_flags = 0; + bool done = false; int error; if (copy_from_user(&head, arg, sizeof(struct fsmap_head))) @@ -1664,38 +1739,112 @@ sizeof(head.fmh_keys[1].fmr_reserved))) return -EINVAL; + /* + * Use an internal memory buffer so that we don't have to copy fsmap + * data to userspace while holding locks. Start by trying to allocate + * up to 128k for the buffer, but fall back to a single page if needed. + */ + count = min_t(unsigned int, head.fmh_count, + 131072 / sizeof(struct fsmap)); + recs = kvzalloc(count * sizeof(struct fsmap), GFP_KERNEL); + if (!recs) { + count = min_t(unsigned int, head.fmh_count, + PAGE_SIZE / sizeof(struct fsmap)); + recs = kvzalloc(count * sizeof(struct fsmap), GFP_KERNEL); + if (!recs) + return -ENOMEM; + } + xhead.fmh_iflags = head.fmh_iflags; - xhead.fmh_count = head.fmh_count; xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]); xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]); trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]); - info.mp = ip->i_mount; - info.data = arg; - error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info); - if (error == XFS_BTREE_QUERY_RANGE_ABORT) { - error = 0; - aborted = true; - } else if (error) - return error; + head.fmh_entries = 0; + do { + struct fsmap __user *user_recs; + struct fsmap *last_rec; - /* If we didn't abort, set the "last" flag in the last fmx */ - if (!aborted && info.idx) { - info.last_flags |= FMR_OF_LAST; - if (copy_to_user(&info.data->fmh_recs[info.idx - 1].fmr_flags, - &info.last_flags, sizeof(info.last_flags))) - return -EFAULT; + user_recs = &arg->fmh_recs[head.fmh_entries]; + xhead.fmh_entries = 0; + xhead.fmh_count = min_t(unsigned int, count, + head.fmh_count - head.fmh_entries); + + /* Run query, record how many entries we got. */ + error = xfs_getfsmap(ip->i_mount, &xhead, recs); + switch (error) { + case 0: + /* + * There are no more records in the result set. Copy + * whatever we got to userspace and break out. + */ + done = true; + break; + case -ECANCELED: + /* + * The internal memory buffer is full. Copy whatever + * records we got to userspace and go again if we have + * not yet filled the userspace buffer. + */ + error = 0; + break; + default: + goto out_free; + } + head.fmh_entries += xhead.fmh_entries; + head.fmh_oflags = xhead.fmh_oflags; + + /* + * If the caller wanted a record count or there aren't any + * new records to return, we're done. + */ + if (head.fmh_count == 0 || xhead.fmh_entries == 0) + break; + + /* Copy all the records we got out to userspace. */ + if (copy_to_user(user_recs, recs, + xhead.fmh_entries * sizeof(struct fsmap))) { + error = -EFAULT; + goto out_free; + } + + /* Remember the last record flags we copied to userspace. */ + last_rec = &recs[xhead.fmh_entries - 1]; + last_flags = last_rec->fmr_flags; + + /* Set up the low key for the next iteration. */ + xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec); + trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); + } while (!done && head.fmh_entries < head.fmh_count); + + /* + * If there are no more records in the query result set and we're not + * in counting mode, mark the last record returned with the LAST flag. + */ + if (done && head.fmh_count > 0 && head.fmh_entries > 0) { + struct fsmap __user *user_rec; + + last_flags |= FMR_OF_LAST; + user_rec = &arg->fmh_recs[head.fmh_entries - 1]; + + if (copy_to_user(&user_rec->fmr_flags, &last_flags, + sizeof(last_flags))) { + error = -EFAULT; + goto out_free; + } } /* copy back header */ - head.fmh_entries = xhead.fmh_entries; - head.fmh_oflags = xhead.fmh_oflags; - if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) - return -EFAULT; + if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) { + error = -EFAULT; + goto out_free; + } - return 0; +out_free: + kmem_free(recs); + return error; } STATIC int @@ -1884,6 +2033,41 @@ return error; } +static inline int +xfs_fs_eofblocks_from_user( + struct xfs_fs_eofblocks *src, + struct xfs_eofblocks *dst) +{ + if (src->eof_version != XFS_EOFBLOCKS_VERSION) + return -EINVAL; + + if (src->eof_flags & ~XFS_EOF_FLAGS_VALID) + return -EINVAL; + + if (memchr_inv(&src->pad32, 0, sizeof(src->pad32)) || + memchr_inv(src->pad64, 0, sizeof(src->pad64))) + return -EINVAL; + + dst->eof_flags = src->eof_flags; + dst->eof_prid = src->eof_prid; + dst->eof_min_file_size = src->eof_min_file_size; + + dst->eof_uid = INVALID_UID; + if (src->eof_flags & XFS_EOF_FLAGS_UID) { + dst->eof_uid = make_kuid(current_user_ns(), src->eof_uid); + if (!uid_valid(dst->eof_uid)) + return -EINVAL; + } + + dst->eof_gid = INVALID_GID; + if (src->eof_flags & XFS_EOF_FLAGS_GID) { + dst->eof_gid = make_kgid(current_user_ns(), src->eof_gid); + if (!gid_valid(dst->eof_gid)) + return -EINVAL; + } + return 0; +} + /* * Note: some of the ioctl's return positive numbers as a * byte count indicating success, such as readlink_by_handle. @@ -1913,24 +2097,17 @@ return xfs_ioc_setlabel(filp, mp, arg); case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: - case XFS_IOC_RESVSP: - case XFS_IOC_UNRESVSP: case XFS_IOC_ALLOCSP64: - case XFS_IOC_FREESP64: - case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP64: - case XFS_IOC_ZERO_RANGE: { + case XFS_IOC_FREESP64: { xfs_flock64_t bf; if (copy_from_user(&bf, arg, sizeof(bf))) return -EFAULT; - return xfs_ioc_space(filp, cmd, &bf); + return xfs_ioc_space(filp, &bf); } case XFS_IOC_DIOINFO: { - struct dioattr da; - xfs_buftarg_t *target = - XFS_IS_REALTIME_INODE(ip) ? - mp->m_rtdev_targp : mp->m_ddev_targp; + struct xfs_buftarg *target = xfs_inode_buftarg(ip); + struct dioattr da; da.d_mem = da.d_miniosz = target->bt_logical_sectorsize; da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); @@ -1943,13 +2120,22 @@ case XFS_IOC_FSBULKSTAT_SINGLE: case XFS_IOC_FSBULKSTAT: case XFS_IOC_FSINUMBERS: + return xfs_ioc_fsbulkstat(mp, cmd, arg); + + case XFS_IOC_BULKSTAT: return xfs_ioc_bulkstat(mp, cmd, arg); + case XFS_IOC_INUMBERS: + return xfs_ioc_inumbers(mp, cmd, arg); case XFS_IOC_FSGEOMETRY_V1: - return xfs_ioc_fsgeometry_v1(mp, arg); - + return xfs_ioc_fsgeometry(mp, arg, 3); + case XFS_IOC_FSGEOMETRY_V4: + return xfs_ioc_fsgeometry(mp, arg, 4); case XFS_IOC_FSGEOMETRY: - return xfs_ioc_fsgeometry(mp, arg); + return xfs_ioc_fsgeometry(mp, arg, 5); + + case XFS_IOC_AG_GEOMETRY: + return xfs_ioc_ag_geometry(mp, arg); case XFS_IOC_GETVERSION: return put_user(inode->i_generation, (int __user *)arg); @@ -1964,22 +2150,6 @@ return xfs_ioc_getxflags(ip, arg); case XFS_IOC_SETXFLAGS: return xfs_ioc_setxflags(ip, filp, arg); - - case XFS_IOC_FSSETDM: { - struct fsdmidata dmi; - - if (copy_from_user(&dmi, arg, sizeof(dmi))) - return -EFAULT; - - error = mnt_want_write_file(filp); - if (error) - return error; - - error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, - dmi.fsd_dmstate); - mnt_drop_write_file(filp); - return error; - } case XFS_IOC_GETBMAP: case XFS_IOC_GETBMAPA: @@ -2008,8 +2178,6 @@ return -EFAULT; return xfs_open_by_handle(filp, &hreq); } - case XFS_IOC_FSSETDM_BY_HANDLE: - return xfs_fssetdm_by_handle(filp, arg); case XFS_IOC_READLINK_BY_HANDLE: { xfs_fsop_handlereq_t hreq; @@ -2040,9 +2208,7 @@ case XFS_IOC_FSCOUNTS: { xfs_fsop_counts_t out; - error = xfs_fs_counts(mp, &out); - if (error) - return error; + xfs_fs_counts(mp, &out); if (copy_to_user(arg, &out, sizeof(out))) return -EFAULT; -- Gitblit v1.6.2