| .. | .. |
|---|
| 10 | 10 | #include "xfs_log_format.h" |
|---|
| 11 | 11 | #include "xfs_trans_resv.h" |
|---|
| 12 | 12 | #include "xfs_mount.h" |
|---|
| 13 | | -#include "xfs_da_format.h" |
|---|
| 14 | 13 | #include "xfs_inode.h" |
|---|
| 15 | | -#include "xfs_bmap.h" |
|---|
| 16 | | -#include "xfs_bmap_util.h" |
|---|
| 17 | 14 | #include "xfs_acl.h" |
|---|
| 18 | 15 | #include "xfs_quota.h" |
|---|
| 19 | | -#include "xfs_error.h" |
|---|
| 20 | 16 | #include "xfs_attr.h" |
|---|
| 21 | 17 | #include "xfs_trans.h" |
|---|
| 22 | 18 | #include "xfs_trace.h" |
|---|
| 23 | 19 | #include "xfs_icache.h" |
|---|
| 24 | 20 | #include "xfs_symlink.h" |
|---|
| 25 | | -#include "xfs_da_btree.h" |
|---|
| 26 | 21 | #include "xfs_dir2.h" |
|---|
| 27 | | -#include "xfs_trans_space.h" |
|---|
| 28 | 22 | #include "xfs_iomap.h" |
|---|
| 29 | | -#include "xfs_defer.h" |
|---|
| 23 | +#include "xfs_error.h" |
|---|
| 30 | 24 | |
|---|
| 31 | | -#include <linux/capability.h> |
|---|
| 32 | | -#include <linux/xattr.h> |
|---|
| 33 | 25 | #include <linux/posix_acl.h> |
|---|
| 34 | 26 | #include <linux/security.h> |
|---|
| 35 | | -#include <linux/iomap.h> |
|---|
| 36 | | -#include <linux/slab.h> |
|---|
| 37 | 27 | #include <linux/iversion.h> |
|---|
| 28 | +#include <linux/fiemap.h> |
|---|
| 38 | 29 | |
|---|
| 39 | 30 | /* |
|---|
| 40 | | - * Directories have different lock order w.r.t. mmap_sem compared to regular |
|---|
| 31 | + * Directories have different lock order w.r.t. mmap_lock compared to regular |
|---|
| 41 | 32 | * files. This is due to readdir potentially triggering page faults on a user |
|---|
| 42 | 33 | * buffer inside filldir(), and this happens with the ilock on the directory |
|---|
| 43 | 34 | * held. For regular files, the lock order is the other way around - the |
|---|
| 44 | | - * mmap_sem is taken during the page fault, and then we lock the ilock to do |
|---|
| 35 | + * mmap_lock is taken during the page fault, and then we lock the ilock to do |
|---|
| 45 | 36 | * block mapping. Hence we need a different class for the directory ilock so |
|---|
| 46 | 37 | * that lockdep can tell them apart. |
|---|
| 47 | 38 | */ |
|---|
| .. | .. |
|---|
| 59 | 50 | int error = 0; |
|---|
| 60 | 51 | |
|---|
| 61 | 52 | for (xattr = xattr_array; xattr->name != NULL; xattr++) { |
|---|
| 62 | | - error = xfs_attr_set(ip, xattr->name, xattr->value, |
|---|
| 63 | | - xattr->value_len, ATTR_SECURE); |
|---|
| 53 | + struct xfs_da_args args = { |
|---|
| 54 | + .dp = ip, |
|---|
| 55 | + .attr_filter = XFS_ATTR_SECURE, |
|---|
| 56 | + .name = xattr->name, |
|---|
| 57 | + .namelen = strlen(xattr->name), |
|---|
| 58 | + .value = xattr->value, |
|---|
| 59 | + .valuelen = xattr->value_len, |
|---|
| 60 | + }; |
|---|
| 61 | + error = xfs_attr_set(&args); |
|---|
| 64 | 62 | if (error < 0) |
|---|
| 65 | 63 | break; |
|---|
| 66 | 64 | } |
|---|
| .. | .. |
|---|
| 239 | 237 | umode_t mode, |
|---|
| 240 | 238 | bool flags) |
|---|
| 241 | 239 | { |
|---|
| 242 | | - return xfs_vn_mknod(dir, dentry, mode, 0); |
|---|
| 240 | + return xfs_generic_create(dir, dentry, mode, 0, false); |
|---|
| 243 | 241 | } |
|---|
| 244 | 242 | |
|---|
| 245 | 243 | STATIC int |
|---|
| .. | .. |
|---|
| 248 | 246 | struct dentry *dentry, |
|---|
| 249 | 247 | umode_t mode) |
|---|
| 250 | 248 | { |
|---|
| 251 | | - return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0); |
|---|
| 249 | + return xfs_generic_create(dir, dentry, mode | S_IFDIR, 0, false); |
|---|
| 252 | 250 | } |
|---|
| 253 | 251 | |
|---|
| 254 | 252 | STATIC struct dentry * |
|---|
| .. | .. |
|---|
| 480 | 478 | struct inode *inode, |
|---|
| 481 | 479 | struct delayed_call *done) |
|---|
| 482 | 480 | { |
|---|
| 481 | + struct xfs_inode *ip = XFS_I(inode); |
|---|
| 483 | 482 | char *link; |
|---|
| 484 | 483 | |
|---|
| 485 | | - ASSERT(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE); |
|---|
| 484 | + ASSERT(ip->i_df.if_flags & XFS_IFINLINE); |
|---|
| 486 | 485 | |
|---|
| 487 | 486 | /* |
|---|
| 488 | 487 | * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED if |
|---|
| 489 | 488 | * if_data is junk. |
|---|
| 490 | 489 | */ |
|---|
| 491 | | - link = XFS_I(inode)->i_df.if_u1.if_data; |
|---|
| 492 | | - if (!link) |
|---|
| 490 | + link = ip->i_df.if_u1.if_data; |
|---|
| 491 | + if (XFS_IS_CORRUPT(ip->i_mount, !link)) |
|---|
| 493 | 492 | return ERR_PTR(-EFSCORRUPTED); |
|---|
| 494 | 493 | return link; |
|---|
| 494 | +} |
|---|
| 495 | + |
|---|
| 496 | +static uint32_t |
|---|
| 497 | +xfs_stat_blksize( |
|---|
| 498 | + struct xfs_inode *ip) |
|---|
| 499 | +{ |
|---|
| 500 | + struct xfs_mount *mp = ip->i_mount; |
|---|
| 501 | + |
|---|
| 502 | + /* |
|---|
| 503 | + * If the file blocks are being allocated from a realtime volume, then |
|---|
| 504 | + * always return the realtime extent size. |
|---|
| 505 | + */ |
|---|
| 506 | + if (XFS_IS_REALTIME_INODE(ip)) |
|---|
| 507 | + return xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; |
|---|
| 508 | + |
|---|
| 509 | + /* |
|---|
| 510 | + * Allow large block sizes to be reported to userspace programs if the |
|---|
| 511 | + * "largeio" mount option is used. |
|---|
| 512 | + * |
|---|
| 513 | + * If compatibility mode is specified, simply return the basic unit of |
|---|
| 514 | + * caching so that we don't get inefficient read/modify/write I/O from |
|---|
| 515 | + * user apps. Otherwise.... |
|---|
| 516 | + * |
|---|
| 517 | + * If the underlying volume is a stripe, then return the stripe width in |
|---|
| 518 | + * bytes as the recommended I/O size. It is not a stripe and we've set a |
|---|
| 519 | + * default buffered I/O size, return that, otherwise return the compat |
|---|
| 520 | + * default. |
|---|
| 521 | + */ |
|---|
| 522 | + if (mp->m_flags & XFS_MOUNT_LARGEIO) { |
|---|
| 523 | + if (mp->m_swidth) |
|---|
| 524 | + return mp->m_swidth << mp->m_sb.sb_blocklog; |
|---|
| 525 | + if (mp->m_flags & XFS_MOUNT_ALLOCSIZE) |
|---|
| 526 | + return 1U << mp->m_allocsize_log; |
|---|
| 527 | + } |
|---|
| 528 | + |
|---|
| 529 | + return PAGE_SIZE; |
|---|
| 495 | 530 | } |
|---|
| 496 | 531 | |
|---|
| 497 | 532 | STATIC int |
|---|
| .. | .. |
|---|
| 523 | 558 | stat->blocks = |
|---|
| 524 | 559 | XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); |
|---|
| 525 | 560 | |
|---|
| 526 | | - if (ip->i_d.di_version == 3) { |
|---|
| 561 | + if (xfs_sb_version_has_v3inode(&mp->m_sb)) { |
|---|
| 527 | 562 | if (request_mask & STATX_BTIME) { |
|---|
| 528 | 563 | stat->result_mask |= STATX_BTIME; |
|---|
| 529 | | - stat->btime.tv_sec = ip->i_d.di_crtime.t_sec; |
|---|
| 530 | | - stat->btime.tv_nsec = ip->i_d.di_crtime.t_nsec; |
|---|
| 564 | + stat->btime = ip->i_d.di_crtime; |
|---|
| 531 | 565 | } |
|---|
| 532 | 566 | } |
|---|
| 533 | 567 | |
|---|
| .. | .. |
|---|
| 553 | 587 | stat->rdev = inode->i_rdev; |
|---|
| 554 | 588 | break; |
|---|
| 555 | 589 | default: |
|---|
| 556 | | - if (XFS_IS_REALTIME_INODE(ip)) { |
|---|
| 557 | | - /* |
|---|
| 558 | | - * If the file blocks are being allocated from a |
|---|
| 559 | | - * realtime volume, then return the inode's realtime |
|---|
| 560 | | - * extent size or the realtime volume's extent size. |
|---|
| 561 | | - */ |
|---|
| 562 | | - stat->blksize = |
|---|
| 563 | | - xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; |
|---|
| 564 | | - } else |
|---|
| 565 | | - stat->blksize = xfs_preferred_iosize(mp); |
|---|
| 590 | + stat->blksize = xfs_stat_blksize(ip); |
|---|
| 566 | 591 | stat->rdev = 0; |
|---|
| 567 | 592 | break; |
|---|
| 568 | 593 | } |
|---|
| .. | .. |
|---|
| 672 | 697 | */ |
|---|
| 673 | 698 | ASSERT(udqp == NULL); |
|---|
| 674 | 699 | ASSERT(gdqp == NULL); |
|---|
| 675 | | - error = xfs_qm_vop_dqalloc(ip, xfs_kuid_to_uid(uid), |
|---|
| 676 | | - xfs_kgid_to_gid(gid), |
|---|
| 677 | | - xfs_get_projid(ip), |
|---|
| 700 | + error = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid, |
|---|
| 678 | 701 | qflags, &udqp, &gdqp, NULL); |
|---|
| 679 | 702 | if (error) |
|---|
| 680 | 703 | return error; |
|---|
| .. | .. |
|---|
| 716 | 739 | if (error) /* out of quota */ |
|---|
| 717 | 740 | goto out_cancel; |
|---|
| 718 | 741 | } |
|---|
| 719 | | - } |
|---|
| 720 | 742 | |
|---|
| 721 | | - /* |
|---|
| 722 | | - * Change file ownership. Must be the owner or privileged. |
|---|
| 723 | | - */ |
|---|
| 724 | | - if (mask & (ATTR_UID|ATTR_GID)) { |
|---|
| 725 | 743 | /* |
|---|
| 726 | 744 | * CAP_FSETID overrides the following restrictions: |
|---|
| 727 | 745 | * |
|---|
| .. | .. |
|---|
| 743 | 761 | olddquot1 = xfs_qm_vop_chown(tp, ip, |
|---|
| 744 | 762 | &ip->i_udquot, udqp); |
|---|
| 745 | 763 | } |
|---|
| 746 | | - ip->i_d.di_uid = xfs_kuid_to_uid(uid); |
|---|
| 747 | 764 | inode->i_uid = uid; |
|---|
| 748 | 765 | } |
|---|
| 749 | 766 | if (!gid_eq(igid, gid)) { |
|---|
| .. | .. |
|---|
| 755 | 772 | olddquot2 = xfs_qm_vop_chown(tp, ip, |
|---|
| 756 | 773 | &ip->i_gdquot, gdqp); |
|---|
| 757 | 774 | } |
|---|
| 758 | | - ip->i_d.di_gid = xfs_kgid_to_gid(gid); |
|---|
| 759 | 775 | inode->i_gid = gid; |
|---|
| 760 | 776 | } |
|---|
| 761 | 777 | } |
|---|
| .. | .. |
|---|
| 857 | 873 | /* |
|---|
| 858 | 874 | * Short circuit the truncate case for zero length files. |
|---|
| 859 | 875 | */ |
|---|
| 860 | | - if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { |
|---|
| 876 | + if (newsize == 0 && oldsize == 0 && ip->i_df.if_nextents == 0) { |
|---|
| 861 | 877 | if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME))) |
|---|
| 862 | 878 | return 0; |
|---|
| 863 | 879 | |
|---|
| .. | .. |
|---|
| 893 | 909 | if (newsize > oldsize) { |
|---|
| 894 | 910 | trace_xfs_zero_eof(ip, oldsize, newsize - oldsize); |
|---|
| 895 | 911 | error = iomap_zero_range(inode, oldsize, newsize - oldsize, |
|---|
| 896 | | - &did_zeroing, &xfs_iomap_ops); |
|---|
| 912 | + &did_zeroing, &xfs_buffered_write_iomap_ops); |
|---|
| 897 | 913 | } else { |
|---|
| 898 | 914 | /* |
|---|
| 899 | 915 | * iomap won't detect a dirty page over an unwritten block (or a |
|---|
| .. | .. |
|---|
| 906 | 922 | if (error) |
|---|
| 907 | 923 | return error; |
|---|
| 908 | 924 | error = iomap_truncate_page(inode, newsize, &did_zeroing, |
|---|
| 909 | | - &xfs_iomap_ops); |
|---|
| 925 | + &xfs_buffered_write_iomap_ops); |
|---|
| 910 | 926 | } |
|---|
| 911 | 927 | |
|---|
| 912 | 928 | if (error) |
|---|
| .. | .. |
|---|
| 1134 | 1150 | &xfs_xattr_iomap_ops); |
|---|
| 1135 | 1151 | } else { |
|---|
| 1136 | 1152 | error = iomap_fiemap(inode, fieinfo, start, length, |
|---|
| 1137 | | - &xfs_iomap_ops); |
|---|
| 1153 | + &xfs_read_iomap_ops); |
|---|
| 1138 | 1154 | } |
|---|
| 1139 | 1155 | xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED); |
|---|
| 1140 | 1156 | |
|---|
| .. | .. |
|---|
| 1233 | 1249 | { |
|---|
| 1234 | 1250 | struct xfs_mount *mp = ip->i_mount; |
|---|
| 1235 | 1251 | |
|---|
| 1236 | | - /* Only supported on non-reflinked files. */ |
|---|
| 1237 | | - if (!S_ISREG(VFS_I(ip)->i_mode) || xfs_is_reflink_inode(ip)) |
|---|
| 1252 | + /* Only supported on regular files. */ |
|---|
| 1253 | + if (!S_ISREG(VFS_I(ip)->i_mode)) |
|---|
| 1238 | 1254 | return false; |
|---|
| 1239 | 1255 | |
|---|
| 1240 | | - /* DAX mount option or DAX iflag must be set. */ |
|---|
| 1241 | | - if (!(mp->m_flags & XFS_MOUNT_DAX) && |
|---|
| 1242 | | - !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) |
|---|
| 1256 | + /* Only supported on non-reflinked files. */ |
|---|
| 1257 | + if (xfs_is_reflink_inode(ip)) |
|---|
| 1243 | 1258 | return false; |
|---|
| 1244 | 1259 | |
|---|
| 1245 | 1260 | /* Block size must match page size */ |
|---|
| .. | .. |
|---|
| 1247 | 1262 | return false; |
|---|
| 1248 | 1263 | |
|---|
| 1249 | 1264 | /* Device has to support DAX too. */ |
|---|
| 1250 | | - return xfs_find_daxdev_for_inode(VFS_I(ip)) != NULL; |
|---|
| 1265 | + return xfs_inode_buftarg(ip)->bt_daxdev != NULL; |
|---|
| 1251 | 1266 | } |
|---|
| 1252 | 1267 | |
|---|
| 1253 | | -STATIC void |
|---|
| 1254 | | -xfs_diflags_to_iflags( |
|---|
| 1255 | | - struct inode *inode, |
|---|
| 1256 | | - struct xfs_inode *ip) |
|---|
| 1268 | +static bool |
|---|
| 1269 | +xfs_inode_should_enable_dax( |
|---|
| 1270 | + struct xfs_inode *ip) |
|---|
| 1257 | 1271 | { |
|---|
| 1258 | | - uint16_t flags = ip->i_d.di_flags; |
|---|
| 1272 | + if (!IS_ENABLED(CONFIG_FS_DAX)) |
|---|
| 1273 | + return false; |
|---|
| 1274 | + if (ip->i_mount->m_flags & XFS_MOUNT_DAX_NEVER) |
|---|
| 1275 | + return false; |
|---|
| 1276 | + if (!xfs_inode_supports_dax(ip)) |
|---|
| 1277 | + return false; |
|---|
| 1278 | + if (ip->i_mount->m_flags & XFS_MOUNT_DAX_ALWAYS) |
|---|
| 1279 | + return true; |
|---|
| 1280 | + if (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX) |
|---|
| 1281 | + return true; |
|---|
| 1282 | + return false; |
|---|
| 1283 | +} |
|---|
| 1259 | 1284 | |
|---|
| 1260 | | - inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC | |
|---|
| 1261 | | - S_NOATIME | S_DAX); |
|---|
| 1285 | +void |
|---|
| 1286 | +xfs_diflags_to_iflags( |
|---|
| 1287 | + struct xfs_inode *ip, |
|---|
| 1288 | + bool init) |
|---|
| 1289 | +{ |
|---|
| 1290 | + struct inode *inode = VFS_I(ip); |
|---|
| 1291 | + unsigned int xflags = xfs_ip2xflags(ip); |
|---|
| 1292 | + unsigned int flags = 0; |
|---|
| 1262 | 1293 | |
|---|
| 1263 | | - if (flags & XFS_DIFLAG_IMMUTABLE) |
|---|
| 1264 | | - inode->i_flags |= S_IMMUTABLE; |
|---|
| 1265 | | - if (flags & XFS_DIFLAG_APPEND) |
|---|
| 1266 | | - inode->i_flags |= S_APPEND; |
|---|
| 1267 | | - if (flags & XFS_DIFLAG_SYNC) |
|---|
| 1268 | | - inode->i_flags |= S_SYNC; |
|---|
| 1269 | | - if (flags & XFS_DIFLAG_NOATIME) |
|---|
| 1270 | | - inode->i_flags |= S_NOATIME; |
|---|
| 1271 | | - if (xfs_inode_supports_dax(ip)) |
|---|
| 1272 | | - inode->i_flags |= S_DAX; |
|---|
| 1294 | + ASSERT(!(IS_DAX(inode) && init)); |
|---|
| 1295 | + |
|---|
| 1296 | + if (xflags & FS_XFLAG_IMMUTABLE) |
|---|
| 1297 | + flags |= S_IMMUTABLE; |
|---|
| 1298 | + if (xflags & FS_XFLAG_APPEND) |
|---|
| 1299 | + flags |= S_APPEND; |
|---|
| 1300 | + if (xflags & FS_XFLAG_SYNC) |
|---|
| 1301 | + flags |= S_SYNC; |
|---|
| 1302 | + if (xflags & FS_XFLAG_NOATIME) |
|---|
| 1303 | + flags |= S_NOATIME; |
|---|
| 1304 | + if (init && xfs_inode_should_enable_dax(ip)) |
|---|
| 1305 | + flags |= S_DAX; |
|---|
| 1306 | + |
|---|
| 1307 | + /* |
|---|
| 1308 | + * S_DAX can only be set during inode initialization and is never set by |
|---|
| 1309 | + * the VFS, so we cannot mask off S_DAX in i_flags. |
|---|
| 1310 | + */ |
|---|
| 1311 | + inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC | S_NOATIME); |
|---|
| 1312 | + inode->i_flags |= flags; |
|---|
| 1273 | 1313 | } |
|---|
| 1274 | 1314 | |
|---|
| 1275 | 1315 | /* |
|---|
| .. | .. |
|---|
| 1288 | 1328 | gfp_t gfp_mask; |
|---|
| 1289 | 1329 | |
|---|
| 1290 | 1330 | inode->i_ino = ip->i_ino; |
|---|
| 1291 | | - inode->i_state = I_NEW; |
|---|
| 1331 | + inode->i_state |= I_NEW; |
|---|
| 1292 | 1332 | |
|---|
| 1293 | 1333 | inode_sb_list_add(inode); |
|---|
| 1294 | 1334 | /* make the inode look hashed for the writeback code */ |
|---|
| 1295 | 1335 | inode_fake_hash(inode); |
|---|
| 1296 | 1336 | |
|---|
| 1297 | | - inode->i_uid = xfs_uid_to_kuid(ip->i_d.di_uid); |
|---|
| 1298 | | - inode->i_gid = xfs_gid_to_kgid(ip->i_d.di_gid); |
|---|
| 1299 | | - |
|---|
| 1300 | 1337 | i_size_write(inode, ip->i_d.di_size); |
|---|
| 1301 | | - xfs_diflags_to_iflags(inode, ip); |
|---|
| 1338 | + xfs_diflags_to_iflags(ip, true); |
|---|
| 1302 | 1339 | |
|---|
| 1303 | 1340 | if (S_ISDIR(inode->i_mode)) { |
|---|
| 1304 | 1341 | /* |
|---|
| .. | .. |
|---|
| 1310 | 1347 | lockdep_set_class(&inode->i_rwsem, |
|---|
| 1311 | 1348 | &inode->i_sb->s_type->i_mutex_dir_key); |
|---|
| 1312 | 1349 | lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class); |
|---|
| 1313 | | - ip->d_ops = ip->i_mount->m_dir_inode_ops; |
|---|
| 1314 | 1350 | } else { |
|---|
| 1315 | | - ip->d_ops = ip->i_mount->m_nondir_inode_ops; |
|---|
| 1316 | 1351 | lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class); |
|---|
| 1317 | 1352 | } |
|---|
| 1318 | 1353 | |
|---|