.. | .. |
---|
10 | 10 | #include "xfs_log_format.h" |
---|
11 | 11 | #include "xfs_trans_resv.h" |
---|
12 | 12 | #include "xfs_mount.h" |
---|
13 | | -#include "xfs_da_format.h" |
---|
14 | 13 | #include "xfs_inode.h" |
---|
15 | | -#include "xfs_bmap.h" |
---|
16 | | -#include "xfs_bmap_util.h" |
---|
17 | 14 | #include "xfs_acl.h" |
---|
18 | 15 | #include "xfs_quota.h" |
---|
19 | | -#include "xfs_error.h" |
---|
20 | 16 | #include "xfs_attr.h" |
---|
21 | 17 | #include "xfs_trans.h" |
---|
22 | 18 | #include "xfs_trace.h" |
---|
23 | 19 | #include "xfs_icache.h" |
---|
24 | 20 | #include "xfs_symlink.h" |
---|
25 | | -#include "xfs_da_btree.h" |
---|
26 | 21 | #include "xfs_dir2.h" |
---|
27 | | -#include "xfs_trans_space.h" |
---|
28 | 22 | #include "xfs_iomap.h" |
---|
29 | | -#include "xfs_defer.h" |
---|
| 23 | +#include "xfs_error.h" |
---|
30 | 24 | |
---|
31 | | -#include <linux/capability.h> |
---|
32 | | -#include <linux/xattr.h> |
---|
33 | 25 | #include <linux/posix_acl.h> |
---|
34 | 26 | #include <linux/security.h> |
---|
35 | | -#include <linux/iomap.h> |
---|
36 | | -#include <linux/slab.h> |
---|
37 | 27 | #include <linux/iversion.h> |
---|
| 28 | +#include <linux/fiemap.h> |
---|
38 | 29 | |
---|
39 | 30 | /* |
---|
40 | | - * Directories have different lock order w.r.t. mmap_sem compared to regular |
---|
| 31 | + * Directories have different lock order w.r.t. mmap_lock compared to regular |
---|
41 | 32 | * files. This is due to readdir potentially triggering page faults on a user |
---|
42 | 33 | * buffer inside filldir(), and this happens with the ilock on the directory |
---|
43 | 34 | * held. For regular files, the lock order is the other way around - the |
---|
44 | | - * mmap_sem is taken during the page fault, and then we lock the ilock to do |
---|
| 35 | + * mmap_lock is taken during the page fault, and then we lock the ilock to do |
---|
45 | 36 | * block mapping. Hence we need a different class for the directory ilock so |
---|
46 | 37 | * that lockdep can tell them apart. |
---|
47 | 38 | */ |
---|
.. | .. |
---|
59 | 50 | int error = 0; |
---|
60 | 51 | |
---|
61 | 52 | for (xattr = xattr_array; xattr->name != NULL; xattr++) { |
---|
62 | | - error = xfs_attr_set(ip, xattr->name, xattr->value, |
---|
63 | | - xattr->value_len, ATTR_SECURE); |
---|
| 53 | + struct xfs_da_args args = { |
---|
| 54 | + .dp = ip, |
---|
| 55 | + .attr_filter = XFS_ATTR_SECURE, |
---|
| 56 | + .name = xattr->name, |
---|
| 57 | + .namelen = strlen(xattr->name), |
---|
| 58 | + .value = xattr->value, |
---|
| 59 | + .valuelen = xattr->value_len, |
---|
| 60 | + }; |
---|
| 61 | + error = xfs_attr_set(&args); |
---|
64 | 62 | if (error < 0) |
---|
65 | 63 | break; |
---|
66 | 64 | } |
---|
.. | .. |
---|
239 | 237 | umode_t mode, |
---|
240 | 238 | bool flags) |
---|
241 | 239 | { |
---|
242 | | - return xfs_vn_mknod(dir, dentry, mode, 0); |
---|
| 240 | + return xfs_generic_create(dir, dentry, mode, 0, false); |
---|
243 | 241 | } |
---|
244 | 242 | |
---|
245 | 243 | STATIC int |
---|
.. | .. |
---|
248 | 246 | struct dentry *dentry, |
---|
249 | 247 | umode_t mode) |
---|
250 | 248 | { |
---|
251 | | - return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0); |
---|
| 249 | + return xfs_generic_create(dir, dentry, mode | S_IFDIR, 0, false); |
---|
252 | 250 | } |
---|
253 | 251 | |
---|
254 | 252 | STATIC struct dentry * |
---|
.. | .. |
---|
480 | 478 | struct inode *inode, |
---|
481 | 479 | struct delayed_call *done) |
---|
482 | 480 | { |
---|
| 481 | + struct xfs_inode *ip = XFS_I(inode); |
---|
483 | 482 | char *link; |
---|
484 | 483 | |
---|
485 | | - ASSERT(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE); |
---|
| 484 | + ASSERT(ip->i_df.if_flags & XFS_IFINLINE); |
---|
486 | 485 | |
---|
487 | 486 | /* |
---|
488 | 487 | * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED if |
---|
489 | 488 | * if_data is junk. |
---|
490 | 489 | */ |
---|
491 | | - link = XFS_I(inode)->i_df.if_u1.if_data; |
---|
492 | | - if (!link) |
---|
| 490 | + link = ip->i_df.if_u1.if_data; |
---|
| 491 | + if (XFS_IS_CORRUPT(ip->i_mount, !link)) |
---|
493 | 492 | return ERR_PTR(-EFSCORRUPTED); |
---|
494 | 493 | return link; |
---|
| 494 | +} |
---|
| 495 | + |
---|
| 496 | +static uint32_t |
---|
| 497 | +xfs_stat_blksize( |
---|
| 498 | + struct xfs_inode *ip) |
---|
| 499 | +{ |
---|
| 500 | + struct xfs_mount *mp = ip->i_mount; |
---|
| 501 | + |
---|
| 502 | + /* |
---|
| 503 | + * If the file blocks are being allocated from a realtime volume, then |
---|
| 504 | + * always return the realtime extent size. |
---|
| 505 | + */ |
---|
| 506 | + if (XFS_IS_REALTIME_INODE(ip)) |
---|
| 507 | + return xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; |
---|
| 508 | + |
---|
| 509 | + /* |
---|
| 510 | + * Allow large block sizes to be reported to userspace programs if the |
---|
| 511 | + * "largeio" mount option is used. |
---|
| 512 | + * |
---|
| 513 | + * If compatibility mode is specified, simply return the basic unit of |
---|
| 514 | + * caching so that we don't get inefficient read/modify/write I/O from |
---|
| 515 | + * user apps. Otherwise.... |
---|
| 516 | + * |
---|
| 517 | + * If the underlying volume is a stripe, then return the stripe width in |
---|
| 518 | + * bytes as the recommended I/O size. It is not a stripe and we've set a |
---|
| 519 | + * default buffered I/O size, return that, otherwise return the compat |
---|
| 520 | + * default. |
---|
| 521 | + */ |
---|
| 522 | + if (mp->m_flags & XFS_MOUNT_LARGEIO) { |
---|
| 523 | + if (mp->m_swidth) |
---|
| 524 | + return mp->m_swidth << mp->m_sb.sb_blocklog; |
---|
| 525 | + if (mp->m_flags & XFS_MOUNT_ALLOCSIZE) |
---|
| 526 | + return 1U << mp->m_allocsize_log; |
---|
| 527 | + } |
---|
| 528 | + |
---|
| 529 | + return PAGE_SIZE; |
---|
495 | 530 | } |
---|
496 | 531 | |
---|
497 | 532 | STATIC int |
---|
.. | .. |
---|
523 | 558 | stat->blocks = |
---|
524 | 559 | XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); |
---|
525 | 560 | |
---|
526 | | - if (ip->i_d.di_version == 3) { |
---|
| 561 | + if (xfs_sb_version_has_v3inode(&mp->m_sb)) { |
---|
527 | 562 | if (request_mask & STATX_BTIME) { |
---|
528 | 563 | stat->result_mask |= STATX_BTIME; |
---|
529 | | - stat->btime.tv_sec = ip->i_d.di_crtime.t_sec; |
---|
530 | | - stat->btime.tv_nsec = ip->i_d.di_crtime.t_nsec; |
---|
| 564 | + stat->btime = ip->i_d.di_crtime; |
---|
531 | 565 | } |
---|
532 | 566 | } |
---|
533 | 567 | |
---|
.. | .. |
---|
553 | 587 | stat->rdev = inode->i_rdev; |
---|
554 | 588 | break; |
---|
555 | 589 | default: |
---|
556 | | - if (XFS_IS_REALTIME_INODE(ip)) { |
---|
557 | | - /* |
---|
558 | | - * If the file blocks are being allocated from a |
---|
559 | | - * realtime volume, then return the inode's realtime |
---|
560 | | - * extent size or the realtime volume's extent size. |
---|
561 | | - */ |
---|
562 | | - stat->blksize = |
---|
563 | | - xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; |
---|
564 | | - } else |
---|
565 | | - stat->blksize = xfs_preferred_iosize(mp); |
---|
| 590 | + stat->blksize = xfs_stat_blksize(ip); |
---|
566 | 591 | stat->rdev = 0; |
---|
567 | 592 | break; |
---|
568 | 593 | } |
---|
.. | .. |
---|
672 | 697 | */ |
---|
673 | 698 | ASSERT(udqp == NULL); |
---|
674 | 699 | ASSERT(gdqp == NULL); |
---|
675 | | - error = xfs_qm_vop_dqalloc(ip, xfs_kuid_to_uid(uid), |
---|
676 | | - xfs_kgid_to_gid(gid), |
---|
677 | | - xfs_get_projid(ip), |
---|
| 700 | + error = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid, |
---|
678 | 701 | qflags, &udqp, &gdqp, NULL); |
---|
679 | 702 | if (error) |
---|
680 | 703 | return error; |
---|
.. | .. |
---|
716 | 739 | if (error) /* out of quota */ |
---|
717 | 740 | goto out_cancel; |
---|
718 | 741 | } |
---|
719 | | - } |
---|
720 | 742 | |
---|
721 | | - /* |
---|
722 | | - * Change file ownership. Must be the owner or privileged. |
---|
723 | | - */ |
---|
724 | | - if (mask & (ATTR_UID|ATTR_GID)) { |
---|
725 | 743 | /* |
---|
726 | 744 | * CAP_FSETID overrides the following restrictions: |
---|
727 | 745 | * |
---|
.. | .. |
---|
743 | 761 | olddquot1 = xfs_qm_vop_chown(tp, ip, |
---|
744 | 762 | &ip->i_udquot, udqp); |
---|
745 | 763 | } |
---|
746 | | - ip->i_d.di_uid = xfs_kuid_to_uid(uid); |
---|
747 | 764 | inode->i_uid = uid; |
---|
748 | 765 | } |
---|
749 | 766 | if (!gid_eq(igid, gid)) { |
---|
.. | .. |
---|
755 | 772 | olddquot2 = xfs_qm_vop_chown(tp, ip, |
---|
756 | 773 | &ip->i_gdquot, gdqp); |
---|
757 | 774 | } |
---|
758 | | - ip->i_d.di_gid = xfs_kgid_to_gid(gid); |
---|
759 | 775 | inode->i_gid = gid; |
---|
760 | 776 | } |
---|
761 | 777 | } |
---|
.. | .. |
---|
857 | 873 | /* |
---|
858 | 874 | * Short circuit the truncate case for zero length files. |
---|
859 | 875 | */ |
---|
860 | | - if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { |
---|
| 876 | + if (newsize == 0 && oldsize == 0 && ip->i_df.if_nextents == 0) { |
---|
861 | 877 | if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME))) |
---|
862 | 878 | return 0; |
---|
863 | 879 | |
---|
.. | .. |
---|
893 | 909 | if (newsize > oldsize) { |
---|
894 | 910 | trace_xfs_zero_eof(ip, oldsize, newsize - oldsize); |
---|
895 | 911 | error = iomap_zero_range(inode, oldsize, newsize - oldsize, |
---|
896 | | - &did_zeroing, &xfs_iomap_ops); |
---|
| 912 | + &did_zeroing, &xfs_buffered_write_iomap_ops); |
---|
897 | 913 | } else { |
---|
898 | 914 | /* |
---|
899 | 915 | * iomap won't detect a dirty page over an unwritten block (or a |
---|
.. | .. |
---|
906 | 922 | if (error) |
---|
907 | 923 | return error; |
---|
908 | 924 | error = iomap_truncate_page(inode, newsize, &did_zeroing, |
---|
909 | | - &xfs_iomap_ops); |
---|
| 925 | + &xfs_buffered_write_iomap_ops); |
---|
910 | 926 | } |
---|
911 | 927 | |
---|
912 | 928 | if (error) |
---|
.. | .. |
---|
1134 | 1150 | &xfs_xattr_iomap_ops); |
---|
1135 | 1151 | } else { |
---|
1136 | 1152 | error = iomap_fiemap(inode, fieinfo, start, length, |
---|
1137 | | - &xfs_iomap_ops); |
---|
| 1153 | + &xfs_read_iomap_ops); |
---|
1138 | 1154 | } |
---|
1139 | 1155 | xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED); |
---|
1140 | 1156 | |
---|
.. | .. |
---|
1233 | 1249 | { |
---|
1234 | 1250 | struct xfs_mount *mp = ip->i_mount; |
---|
1235 | 1251 | |
---|
1236 | | - /* Only supported on non-reflinked files. */ |
---|
1237 | | - if (!S_ISREG(VFS_I(ip)->i_mode) || xfs_is_reflink_inode(ip)) |
---|
| 1252 | + /* Only supported on regular files. */ |
---|
| 1253 | + if (!S_ISREG(VFS_I(ip)->i_mode)) |
---|
1238 | 1254 | return false; |
---|
1239 | 1255 | |
---|
1240 | | - /* DAX mount option or DAX iflag must be set. */ |
---|
1241 | | - if (!(mp->m_flags & XFS_MOUNT_DAX) && |
---|
1242 | | - !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) |
---|
| 1256 | + /* Only supported on non-reflinked files. */ |
---|
| 1257 | + if (xfs_is_reflink_inode(ip)) |
---|
1243 | 1258 | return false; |
---|
1244 | 1259 | |
---|
1245 | 1260 | /* Block size must match page size */ |
---|
.. | .. |
---|
1247 | 1262 | return false; |
---|
1248 | 1263 | |
---|
1249 | 1264 | /* Device has to support DAX too. */ |
---|
1250 | | - return xfs_find_daxdev_for_inode(VFS_I(ip)) != NULL; |
---|
| 1265 | + return xfs_inode_buftarg(ip)->bt_daxdev != NULL; |
---|
1251 | 1266 | } |
---|
1252 | 1267 | |
---|
1253 | | -STATIC void |
---|
1254 | | -xfs_diflags_to_iflags( |
---|
1255 | | - struct inode *inode, |
---|
1256 | | - struct xfs_inode *ip) |
---|
| 1268 | +static bool |
---|
| 1269 | +xfs_inode_should_enable_dax( |
---|
| 1270 | + struct xfs_inode *ip) |
---|
1257 | 1271 | { |
---|
1258 | | - uint16_t flags = ip->i_d.di_flags; |
---|
| 1272 | + if (!IS_ENABLED(CONFIG_FS_DAX)) |
---|
| 1273 | + return false; |
---|
| 1274 | + if (ip->i_mount->m_flags & XFS_MOUNT_DAX_NEVER) |
---|
| 1275 | + return false; |
---|
| 1276 | + if (!xfs_inode_supports_dax(ip)) |
---|
| 1277 | + return false; |
---|
| 1278 | + if (ip->i_mount->m_flags & XFS_MOUNT_DAX_ALWAYS) |
---|
| 1279 | + return true; |
---|
| 1280 | + if (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX) |
---|
| 1281 | + return true; |
---|
| 1282 | + return false; |
---|
| 1283 | +} |
---|
1259 | 1284 | |
---|
1260 | | - inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC | |
---|
1261 | | - S_NOATIME | S_DAX); |
---|
| 1285 | +void |
---|
| 1286 | +xfs_diflags_to_iflags( |
---|
| 1287 | + struct xfs_inode *ip, |
---|
| 1288 | + bool init) |
---|
| 1289 | +{ |
---|
| 1290 | + struct inode *inode = VFS_I(ip); |
---|
| 1291 | + unsigned int xflags = xfs_ip2xflags(ip); |
---|
| 1292 | + unsigned int flags = 0; |
---|
1262 | 1293 | |
---|
1263 | | - if (flags & XFS_DIFLAG_IMMUTABLE) |
---|
1264 | | - inode->i_flags |= S_IMMUTABLE; |
---|
1265 | | - if (flags & XFS_DIFLAG_APPEND) |
---|
1266 | | - inode->i_flags |= S_APPEND; |
---|
1267 | | - if (flags & XFS_DIFLAG_SYNC) |
---|
1268 | | - inode->i_flags |= S_SYNC; |
---|
1269 | | - if (flags & XFS_DIFLAG_NOATIME) |
---|
1270 | | - inode->i_flags |= S_NOATIME; |
---|
1271 | | - if (xfs_inode_supports_dax(ip)) |
---|
1272 | | - inode->i_flags |= S_DAX; |
---|
| 1294 | + ASSERT(!(IS_DAX(inode) && init)); |
---|
| 1295 | + |
---|
| 1296 | + if (xflags & FS_XFLAG_IMMUTABLE) |
---|
| 1297 | + flags |= S_IMMUTABLE; |
---|
| 1298 | + if (xflags & FS_XFLAG_APPEND) |
---|
| 1299 | + flags |= S_APPEND; |
---|
| 1300 | + if (xflags & FS_XFLAG_SYNC) |
---|
| 1301 | + flags |= S_SYNC; |
---|
| 1302 | + if (xflags & FS_XFLAG_NOATIME) |
---|
| 1303 | + flags |= S_NOATIME; |
---|
| 1304 | + if (init && xfs_inode_should_enable_dax(ip)) |
---|
| 1305 | + flags |= S_DAX; |
---|
| 1306 | + |
---|
| 1307 | + /* |
---|
| 1308 | + * S_DAX can only be set during inode initialization and is never set by |
---|
| 1309 | + * the VFS, so we cannot mask off S_DAX in i_flags. |
---|
| 1310 | + */ |
---|
| 1311 | + inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC | S_NOATIME); |
---|
| 1312 | + inode->i_flags |= flags; |
---|
1273 | 1313 | } |
---|
1274 | 1314 | |
---|
1275 | 1315 | /* |
---|
.. | .. |
---|
1288 | 1328 | gfp_t gfp_mask; |
---|
1289 | 1329 | |
---|
1290 | 1330 | inode->i_ino = ip->i_ino; |
---|
1291 | | - inode->i_state = I_NEW; |
---|
| 1331 | + inode->i_state |= I_NEW; |
---|
1292 | 1332 | |
---|
1293 | 1333 | inode_sb_list_add(inode); |
---|
1294 | 1334 | /* make the inode look hashed for the writeback code */ |
---|
1295 | 1335 | inode_fake_hash(inode); |
---|
1296 | 1336 | |
---|
1297 | | - inode->i_uid = xfs_uid_to_kuid(ip->i_d.di_uid); |
---|
1298 | | - inode->i_gid = xfs_gid_to_kgid(ip->i_d.di_gid); |
---|
1299 | | - |
---|
1300 | 1337 | i_size_write(inode, ip->i_d.di_size); |
---|
1301 | | - xfs_diflags_to_iflags(inode, ip); |
---|
| 1338 | + xfs_diflags_to_iflags(ip, true); |
---|
1302 | 1339 | |
---|
1303 | 1340 | if (S_ISDIR(inode->i_mode)) { |
---|
1304 | 1341 | /* |
---|
.. | .. |
---|
1310 | 1347 | lockdep_set_class(&inode->i_rwsem, |
---|
1311 | 1348 | &inode->i_sb->s_type->i_mutex_dir_key); |
---|
1312 | 1349 | lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class); |
---|
1313 | | - ip->d_ops = ip->i_mount->m_dir_inode_ops; |
---|
1314 | 1350 | } else { |
---|
1315 | | - ip->d_ops = ip->i_mount->m_nondir_inode_ops; |
---|
1316 | 1351 | lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class); |
---|
1317 | 1352 | } |
---|
1318 | 1353 | |
---|