hc
2024-05-10 9999e48639b3cecb08ffb37358bcba3b48161b29
kernel/fs/xfs/xfs_iops.c
....@@ -10,38 +10,29 @@
1010 #include "xfs_log_format.h"
1111 #include "xfs_trans_resv.h"
1212 #include "xfs_mount.h"
13
-#include "xfs_da_format.h"
1413 #include "xfs_inode.h"
15
-#include "xfs_bmap.h"
16
-#include "xfs_bmap_util.h"
1714 #include "xfs_acl.h"
1815 #include "xfs_quota.h"
19
-#include "xfs_error.h"
2016 #include "xfs_attr.h"
2117 #include "xfs_trans.h"
2218 #include "xfs_trace.h"
2319 #include "xfs_icache.h"
2420 #include "xfs_symlink.h"
25
-#include "xfs_da_btree.h"
2621 #include "xfs_dir2.h"
27
-#include "xfs_trans_space.h"
2822 #include "xfs_iomap.h"
29
-#include "xfs_defer.h"
23
+#include "xfs_error.h"
3024
31
-#include <linux/capability.h>
32
-#include <linux/xattr.h>
3325 #include <linux/posix_acl.h>
3426 #include <linux/security.h>
35
-#include <linux/iomap.h>
36
-#include <linux/slab.h>
3727 #include <linux/iversion.h>
28
+#include <linux/fiemap.h>
3829
3930 /*
40
- * Directories have different lock order w.r.t. mmap_sem compared to regular
31
+ * Directories have different lock order w.r.t. mmap_lock compared to regular
4132 * files. This is due to readdir potentially triggering page faults on a user
4233 * buffer inside filldir(), and this happens with the ilock on the directory
4334 * held. For regular files, the lock order is the other way around - the
44
- * mmap_sem is taken during the page fault, and then we lock the ilock to do
35
+ * mmap_lock is taken during the page fault, and then we lock the ilock to do
4536 * block mapping. Hence we need a different class for the directory ilock so
4637 * that lockdep can tell them apart.
4738 */
....@@ -59,8 +50,15 @@
5950 int error = 0;
6051
6152 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
62
- error = xfs_attr_set(ip, xattr->name, xattr->value,
63
- xattr->value_len, ATTR_SECURE);
53
+ struct xfs_da_args args = {
54
+ .dp = ip,
55
+ .attr_filter = XFS_ATTR_SECURE,
56
+ .name = xattr->name,
57
+ .namelen = strlen(xattr->name),
58
+ .value = xattr->value,
59
+ .valuelen = xattr->value_len,
60
+ };
61
+ error = xfs_attr_set(&args);
6462 if (error < 0)
6563 break;
6664 }
....@@ -239,7 +237,7 @@
239237 umode_t mode,
240238 bool flags)
241239 {
242
- return xfs_vn_mknod(dir, dentry, mode, 0);
240
+ return xfs_generic_create(dir, dentry, mode, 0, false);
243241 }
244242
245243 STATIC int
....@@ -248,7 +246,7 @@
248246 struct dentry *dentry,
249247 umode_t mode)
250248 {
251
- return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
249
+ return xfs_generic_create(dir, dentry, mode | S_IFDIR, 0, false);
252250 }
253251
254252 STATIC struct dentry *
....@@ -480,18 +478,55 @@
480478 struct inode *inode,
481479 struct delayed_call *done)
482480 {
481
+ struct xfs_inode *ip = XFS_I(inode);
483482 char *link;
484483
485
- ASSERT(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE);
484
+ ASSERT(ip->i_df.if_flags & XFS_IFINLINE);
486485
487486 /*
488487 * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED if
489488 * if_data is junk.
490489 */
491
- link = XFS_I(inode)->i_df.if_u1.if_data;
492
- if (!link)
490
+ link = ip->i_df.if_u1.if_data;
491
+ if (XFS_IS_CORRUPT(ip->i_mount, !link))
493492 return ERR_PTR(-EFSCORRUPTED);
494493 return link;
494
+}
495
+
496
+static uint32_t
497
+xfs_stat_blksize(
498
+ struct xfs_inode *ip)
499
+{
500
+ struct xfs_mount *mp = ip->i_mount;
501
+
502
+ /*
503
+ * If the file blocks are being allocated from a realtime volume, then
504
+ * always return the realtime extent size.
505
+ */
506
+ if (XFS_IS_REALTIME_INODE(ip))
507
+ return xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
508
+
509
+ /*
510
+ * Allow large block sizes to be reported to userspace programs if the
511
+ * "largeio" mount option is used.
512
+ *
513
+ * If compatibility mode is specified, simply return the basic unit of
514
+ * caching so that we don't get inefficient read/modify/write I/O from
515
+ * user apps. Otherwise....
516
+ *
517
+ * If the underlying volume is a stripe, then return the stripe width in
518
+ * bytes as the recommended I/O size. It is not a stripe and we've set a
519
+ * default buffered I/O size, return that, otherwise return the compat
520
+ * default.
521
+ */
522
+ if (mp->m_flags & XFS_MOUNT_LARGEIO) {
523
+ if (mp->m_swidth)
524
+ return mp->m_swidth << mp->m_sb.sb_blocklog;
525
+ if (mp->m_flags & XFS_MOUNT_ALLOCSIZE)
526
+ return 1U << mp->m_allocsize_log;
527
+ }
528
+
529
+ return PAGE_SIZE;
495530 }
496531
497532 STATIC int
....@@ -523,11 +558,10 @@
523558 stat->blocks =
524559 XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
525560
526
- if (ip->i_d.di_version == 3) {
561
+ if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
527562 if (request_mask & STATX_BTIME) {
528563 stat->result_mask |= STATX_BTIME;
529
- stat->btime.tv_sec = ip->i_d.di_crtime.t_sec;
530
- stat->btime.tv_nsec = ip->i_d.di_crtime.t_nsec;
564
+ stat->btime = ip->i_d.di_crtime;
531565 }
532566 }
533567
....@@ -553,52 +587,12 @@
553587 stat->rdev = inode->i_rdev;
554588 break;
555589 default:
556
- if (XFS_IS_REALTIME_INODE(ip)) {
557
- /*
558
- * If the file blocks are being allocated from a
559
- * realtime volume, then return the inode's realtime
560
- * extent size or the realtime volume's extent size.
561
- */
562
- stat->blksize =
563
- xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
564
- } else
565
- stat->blksize = xfs_preferred_iosize(mp);
590
+ stat->blksize = xfs_stat_blksize(ip);
566591 stat->rdev = 0;
567592 break;
568593 }
569594
570595 return 0;
571
-}
572
-
573
-static void
574
-xfs_setattr_mode(
575
- struct xfs_inode *ip,
576
- struct iattr *iattr)
577
-{
578
- struct inode *inode = VFS_I(ip);
579
- umode_t mode = iattr->ia_mode;
580
-
581
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
582
-
583
- inode->i_mode &= S_IFMT;
584
- inode->i_mode |= mode & ~S_IFMT;
585
-}
586
-
587
-void
588
-xfs_setattr_time(
589
- struct xfs_inode *ip,
590
- struct iattr *iattr)
591
-{
592
- struct inode *inode = VFS_I(ip);
593
-
594
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
595
-
596
- if (iattr->ia_valid & ATTR_ATIME)
597
- inode->i_atime = iattr->ia_atime;
598
- if (iattr->ia_valid & ATTR_CTIME)
599
- inode->i_ctime = iattr->ia_ctime;
600
- if (iattr->ia_valid & ATTR_MTIME)
601
- inode->i_mtime = iattr->ia_mtime;
602596 }
603597
604598 static int
....@@ -672,9 +666,7 @@
672666 */
673667 ASSERT(udqp == NULL);
674668 ASSERT(gdqp == NULL);
675
- error = xfs_qm_vop_dqalloc(ip, xfs_kuid_to_uid(uid),
676
- xfs_kgid_to_gid(gid),
677
- xfs_get_projid(ip),
669
+ error = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid,
678670 qflags, &udqp, &gdqp, NULL);
679671 if (error)
680672 return error;
....@@ -716,21 +708,6 @@
716708 if (error) /* out of quota */
717709 goto out_cancel;
718710 }
719
- }
720
-
721
- /*
722
- * Change file ownership. Must be the owner or privileged.
723
- */
724
- if (mask & (ATTR_UID|ATTR_GID)) {
725
- /*
726
- * CAP_FSETID overrides the following restrictions:
727
- *
728
- * The set-user-ID and set-group-ID bits of a file will be
729
- * cleared upon successful return from chown()
730
- */
731
- if ((inode->i_mode & (S_ISUID|S_ISGID)) &&
732
- !capable(CAP_FSETID))
733
- inode->i_mode &= ~(S_ISUID|S_ISGID);
734711
735712 /*
736713 * Change the ownerships and register quota modifications
....@@ -743,8 +720,6 @@
743720 olddquot1 = xfs_qm_vop_chown(tp, ip,
744721 &ip->i_udquot, udqp);
745722 }
746
- ip->i_d.di_uid = xfs_kuid_to_uid(uid);
747
- inode->i_uid = uid;
748723 }
749724 if (!gid_eq(igid, gid)) {
750725 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
....@@ -755,16 +730,10 @@
755730 olddquot2 = xfs_qm_vop_chown(tp, ip,
756731 &ip->i_gdquot, gdqp);
757732 }
758
- ip->i_d.di_gid = xfs_kgid_to_gid(gid);
759
- inode->i_gid = gid;
760733 }
761734 }
762735
763
- if (mask & ATTR_MODE)
764
- xfs_setattr_mode(ip, iattr);
765
- if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
766
- xfs_setattr_time(ip, iattr);
767
-
736
+ setattr_copy(inode, iattr);
768737 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
769738
770739 XFS_STATS_INC(mp, xs_ig_attrchg);
....@@ -857,7 +826,7 @@
857826 /*
858827 * Short circuit the truncate case for zero length files.
859828 */
860
- if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) {
829
+ if (newsize == 0 && oldsize == 0 && ip->i_df.if_nextents == 0) {
861830 if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME)))
862831 return 0;
863832
....@@ -893,7 +862,7 @@
893862 if (newsize > oldsize) {
894863 trace_xfs_zero_eof(ip, oldsize, newsize - oldsize);
895864 error = iomap_zero_range(inode, oldsize, newsize - oldsize,
896
- &did_zeroing, &xfs_iomap_ops);
865
+ &did_zeroing, &xfs_buffered_write_iomap_ops);
897866 } else {
898867 /*
899868 * iomap won't detect a dirty page over an unwritten block (or a
....@@ -906,7 +875,7 @@
906875 if (error)
907876 return error;
908877 error = iomap_truncate_page(inode, newsize, &did_zeroing,
909
- &xfs_iomap_ops);
878
+ &xfs_buffered_write_iomap_ops);
910879 }
911880
912881 if (error)
....@@ -1009,11 +978,8 @@
1009978 xfs_inode_clear_eofblocks_tag(ip);
1010979 }
1011980
1012
- if (iattr->ia_valid & ATTR_MODE)
1013
- xfs_setattr_mode(ip, iattr);
1014
- if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
1015
- xfs_setattr_time(ip, iattr);
1016
-
981
+ ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID)));
982
+ setattr_copy(inode, iattr);
1017983 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1018984
1019985 XFS_STATS_INC(mp, xs_ig_attrchg);
....@@ -1134,7 +1100,7 @@
11341100 &xfs_xattr_iomap_ops);
11351101 } else {
11361102 error = iomap_fiemap(inode, fieinfo, start, length,
1137
- &xfs_iomap_ops);
1103
+ &xfs_read_iomap_ops);
11381104 }
11391105 xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED);
11401106
....@@ -1233,13 +1199,12 @@
12331199 {
12341200 struct xfs_mount *mp = ip->i_mount;
12351201
1236
- /* Only supported on non-reflinked files. */
1237
- if (!S_ISREG(VFS_I(ip)->i_mode) || xfs_is_reflink_inode(ip))
1202
+ /* Only supported on regular files. */
1203
+ if (!S_ISREG(VFS_I(ip)->i_mode))
12381204 return false;
12391205
1240
- /* DAX mount option or DAX iflag must be set. */
1241
- if (!(mp->m_flags & XFS_MOUNT_DAX) &&
1242
- !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX))
1206
+ /* Only supported on non-reflinked files. */
1207
+ if (xfs_is_reflink_inode(ip))
12431208 return false;
12441209
12451210 /* Block size must match page size */
....@@ -1247,29 +1212,54 @@
12471212 return false;
12481213
12491214 /* Device has to support DAX too. */
1250
- return xfs_find_daxdev_for_inode(VFS_I(ip)) != NULL;
1215
+ return xfs_inode_buftarg(ip)->bt_daxdev != NULL;
12511216 }
12521217
1253
-STATIC void
1254
-xfs_diflags_to_iflags(
1255
- struct inode *inode,
1256
- struct xfs_inode *ip)
1218
+static bool
1219
+xfs_inode_should_enable_dax(
1220
+ struct xfs_inode *ip)
12571221 {
1258
- uint16_t flags = ip->i_d.di_flags;
1222
+ if (!IS_ENABLED(CONFIG_FS_DAX))
1223
+ return false;
1224
+ if (ip->i_mount->m_flags & XFS_MOUNT_DAX_NEVER)
1225
+ return false;
1226
+ if (!xfs_inode_supports_dax(ip))
1227
+ return false;
1228
+ if (ip->i_mount->m_flags & XFS_MOUNT_DAX_ALWAYS)
1229
+ return true;
1230
+ if (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
1231
+ return true;
1232
+ return false;
1233
+}
12591234
1260
- inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC |
1261
- S_NOATIME | S_DAX);
1235
+void
1236
+xfs_diflags_to_iflags(
1237
+ struct xfs_inode *ip,
1238
+ bool init)
1239
+{
1240
+ struct inode *inode = VFS_I(ip);
1241
+ unsigned int xflags = xfs_ip2xflags(ip);
1242
+ unsigned int flags = 0;
12621243
1263
- if (flags & XFS_DIFLAG_IMMUTABLE)
1264
- inode->i_flags |= S_IMMUTABLE;
1265
- if (flags & XFS_DIFLAG_APPEND)
1266
- inode->i_flags |= S_APPEND;
1267
- if (flags & XFS_DIFLAG_SYNC)
1268
- inode->i_flags |= S_SYNC;
1269
- if (flags & XFS_DIFLAG_NOATIME)
1270
- inode->i_flags |= S_NOATIME;
1271
- if (xfs_inode_supports_dax(ip))
1272
- inode->i_flags |= S_DAX;
1244
+ ASSERT(!(IS_DAX(inode) && init));
1245
+
1246
+ if (xflags & FS_XFLAG_IMMUTABLE)
1247
+ flags |= S_IMMUTABLE;
1248
+ if (xflags & FS_XFLAG_APPEND)
1249
+ flags |= S_APPEND;
1250
+ if (xflags & FS_XFLAG_SYNC)
1251
+ flags |= S_SYNC;
1252
+ if (xflags & FS_XFLAG_NOATIME)
1253
+ flags |= S_NOATIME;
1254
+ if (init && xfs_inode_should_enable_dax(ip))
1255
+ flags |= S_DAX;
1256
+
1257
+ /*
1258
+ * S_DAX can only be set during inode initialization and is never set by
1259
+ * the VFS, so we cannot mask off S_DAX in i_flags.
1260
+ */
1261
+ inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC | S_NOATIME);
1262
+ inode->i_flags |= flags;
12731263 }
12741264
12751265 /*
....@@ -1288,17 +1278,14 @@
12881278 gfp_t gfp_mask;
12891279
12901280 inode->i_ino = ip->i_ino;
1291
- inode->i_state = I_NEW;
1281
+ inode->i_state |= I_NEW;
12921282
12931283 inode_sb_list_add(inode);
12941284 /* make the inode look hashed for the writeback code */
12951285 inode_fake_hash(inode);
12961286
1297
- inode->i_uid = xfs_uid_to_kuid(ip->i_d.di_uid);
1298
- inode->i_gid = xfs_gid_to_kgid(ip->i_d.di_gid);
1299
-
13001287 i_size_write(inode, ip->i_d.di_size);
1301
- xfs_diflags_to_iflags(inode, ip);
1288
+ xfs_diflags_to_iflags(ip, true);
13021289
13031290 if (S_ISDIR(inode->i_mode)) {
13041291 /*
....@@ -1310,9 +1297,7 @@
13101297 lockdep_set_class(&inode->i_rwsem,
13111298 &inode->i_sb->s_type->i_mutex_dir_key);
13121299 lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
1313
- ip->d_ops = ip->i_mount->m_dir_inode_ops;
13141300 } else {
1315
- ip->d_ops = ip->i_mount->m_nondir_inode_ops;
13161301 lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
13171302 }
13181303