forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-11 6778948f9de86c3cfaf36725a7c87dcff9ba247f
kernel/fs/xfs/xfs_file.c
....@@ -10,14 +10,11 @@
1010 #include "xfs_log_format.h"
1111 #include "xfs_trans_resv.h"
1212 #include "xfs_mount.h"
13
-#include "xfs_da_format.h"
14
-#include "xfs_da_btree.h"
1513 #include "xfs_inode.h"
1614 #include "xfs_trans.h"
1715 #include "xfs_inode_item.h"
1816 #include "xfs_bmap.h"
1917 #include "xfs_bmap_util.h"
20
-#include "xfs_error.h"
2118 #include "xfs_dir2.h"
2219 #include "xfs_dir2_priv.h"
2320 #include "xfs_ioctl.h"
....@@ -28,13 +25,45 @@
2825 #include "xfs_iomap.h"
2926 #include "xfs_reflink.h"
3027
31
-#include <linux/dcache.h>
3228 #include <linux/falloc.h>
33
-#include <linux/pagevec.h>
3429 #include <linux/backing-dev.h>
3530 #include <linux/mman.h>
31
+#include <linux/fadvise.h>
3632
3733 static const struct vm_operations_struct xfs_file_vm_ops;
34
+
35
+/*
36
+ * Decide if the given file range is aligned to the size of the fundamental
37
+ * allocation unit for the file.
38
+ */
39
+static bool
40
+xfs_is_falloc_aligned(
41
+ struct xfs_inode *ip,
42
+ loff_t pos,
43
+ long long int len)
44
+{
45
+ struct xfs_mount *mp = ip->i_mount;
46
+ uint64_t mask;
47
+
48
+ if (XFS_IS_REALTIME_INODE(ip)) {
49
+ if (!is_power_of_2(mp->m_sb.sb_rextsize)) {
50
+ u64 rextbytes;
51
+ u32 mod;
52
+
53
+ rextbytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize);
54
+ div_u64_rem(pos, rextbytes, &mod);
55
+ if (mod)
56
+ return false;
57
+ div_u64_rem(len, rextbytes, &mod);
58
+ return mod == 0;
59
+ }
60
+ mask = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize) - 1;
61
+ } else {
62
+ mask = mp->m_sb.sb_blocksize - 1;
63
+ }
64
+
65
+ return !((pos | len) & mask);
66
+}
3867
3968 int
4069 xfs_update_prealloc_flags(
....@@ -84,19 +113,57 @@
84113 int datasync)
85114 {
86115 struct xfs_inode *ip = XFS_I(file->f_mapping->host);
87
- struct xfs_mount *mp = ip->i_mount;
88
- xfs_lsn_t lsn = 0;
89116
90117 trace_xfs_dir_fsync(ip);
118
+ return xfs_log_force_inode(ip);
119
+}
120
+
121
+static xfs_csn_t
122
+xfs_fsync_seq(
123
+ struct xfs_inode *ip,
124
+ bool datasync)
125
+{
126
+ if (!xfs_ipincount(ip))
127
+ return 0;
128
+ if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
129
+ return 0;
130
+ return ip->i_itemp->ili_commit_seq;
131
+}
132
+
133
+/*
134
+ * All metadata updates are logged, which means that we just have to flush the
135
+ * log up to the latest LSN that touched the inode.
136
+ *
137
+ * If we have concurrent fsync/fdatasync() calls, we need them to all block on
138
+ * the log force before we clear the ili_fsync_fields field. This ensures that
139
+ * we don't get a racing sync operation that does not wait for the metadata to
140
+ * hit the journal before returning. If we race with clearing ili_fsync_fields,
141
+ * then all that will happen is the log force will do nothing as the lsn will
142
+ * already be on disk. We can't race with setting ili_fsync_fields because that
143
+ * is done under XFS_ILOCK_EXCL, and that can't happen because we hold the lock
144
+ * shared until after the ili_fsync_fields is cleared.
145
+ */
146
+static int
147
+xfs_fsync_flush_log(
148
+ struct xfs_inode *ip,
149
+ bool datasync,
150
+ int *log_flushed)
151
+{
152
+ int error = 0;
153
+ xfs_csn_t seq;
91154
92155 xfs_ilock(ip, XFS_ILOCK_SHARED);
93
- if (xfs_ipincount(ip))
94
- lsn = ip->i_itemp->ili_last_lsn;
95
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
156
+ seq = xfs_fsync_seq(ip, datasync);
157
+ if (seq) {
158
+ error = xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC,
159
+ log_flushed);
96160
97
- if (!lsn)
98
- return 0;
99
- return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
161
+ spin_lock(&ip->i_itemp->ili_lock);
162
+ ip->i_itemp->ili_fsync_fields = 0;
163
+ spin_unlock(&ip->i_itemp->ili_lock);
164
+ }
165
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
166
+ return error;
100167 }
101168
102169 STATIC int
....@@ -106,12 +173,10 @@
106173 loff_t end,
107174 int datasync)
108175 {
109
- struct inode *inode = file->f_mapping->host;
110
- struct xfs_inode *ip = XFS_I(inode);
176
+ struct xfs_inode *ip = XFS_I(file->f_mapping->host);
111177 struct xfs_mount *mp = ip->i_mount;
112178 int error = 0;
113179 int log_flushed = 0;
114
- xfs_lsn_t lsn = 0;
115180
116181 trace_xfs_file_fsync(ip);
117182
....@@ -135,31 +200,7 @@
135200 else if (mp->m_logdev_targp != mp->m_ddev_targp)
136201 xfs_blkdev_issue_flush(mp->m_ddev_targp);
137202
138
- /*
139
- * All metadata updates are logged, which means that we just have to
140
- * flush the log up to the latest LSN that touched the inode. If we have
141
- * concurrent fsync/fdatasync() calls, we need them to all block on the
142
- * log force before we clear the ili_fsync_fields field. This ensures
143
- * that we don't get a racing sync operation that does not wait for the
144
- * metadata to hit the journal before returning. If we race with
145
- * clearing the ili_fsync_fields, then all that will happen is the log
146
- * force will do nothing as the lsn will already be on disk. We can't
147
- * race with setting ili_fsync_fields because that is done under
148
- * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared
149
- * until after the ili_fsync_fields is cleared.
150
- */
151
- xfs_ilock(ip, XFS_ILOCK_SHARED);
152
- if (xfs_ipincount(ip)) {
153
- if (!datasync ||
154
- (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
155
- lsn = ip->i_itemp->ili_last_lsn;
156
- }
157
-
158
- if (lsn) {
159
- error = xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
160
- ip->i_itemp->ili_fsync_fields = 0;
161
- }
162
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
203
+ error = xfs_fsync_flush_log(ip, datasync, &log_flushed);
163204
164205 /*
165206 * If we only have a single device, and the log force about was
....@@ -191,8 +232,14 @@
191232
192233 file_accessed(iocb->ki_filp);
193234
194
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
195
- ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL);
235
+ if (iocb->ki_flags & IOCB_NOWAIT) {
236
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
237
+ return -EAGAIN;
238
+ } else {
239
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
240
+ }
241
+ ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL,
242
+ is_sync_kiocb(iocb));
196243 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
197244
198245 return ret;
....@@ -219,7 +266,7 @@
219266 xfs_ilock(ip, XFS_IOLOCK_SHARED);
220267 }
221268
222
- ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
269
+ ret = dax_iomap_rw(iocb, to, &xfs_read_iomap_ops);
223270 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
224271
225272 file_accessed(iocb->ki_filp);
....@@ -355,7 +402,7 @@
355402
356403 trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
357404 error = iomap_zero_range(inode, isize, iocb->ki_pos - isize,
358
- NULL, &xfs_iomap_ops);
405
+ NULL, &xfs_buffered_write_iomap_ops);
359406 if (error)
360407 return error;
361408 } else
....@@ -367,40 +414,30 @@
367414 * lock above. Eventually we should look into a way to avoid
368415 * the pointless lock roundtrip.
369416 */
370
- if (likely(!(file->f_mode & FMODE_NOCMTIME))) {
371
- error = file_update_time(file);
372
- if (error)
373
- return error;
374
- }
375
-
376
- /*
377
- * If we're writing the file then make sure to clear the setuid and
378
- * setgid bits if the process is not being run by root. This keeps
379
- * people from modifying setuid and setgid binaries.
380
- */
381
- if (!IS_NOSEC(inode))
382
- return file_remove_privs(file);
383
- return 0;
417
+ return file_modified(file);
384418 }
385419
386420 static int
387421 xfs_dio_write_end_io(
388422 struct kiocb *iocb,
389423 ssize_t size,
424
+ int error,
390425 unsigned flags)
391426 {
392427 struct inode *inode = file_inode(iocb->ki_filp);
393428 struct xfs_inode *ip = XFS_I(inode);
394429 loff_t offset = iocb->ki_pos;
395
- int error = 0;
430
+ unsigned int nofs_flag;
396431
397432 trace_xfs_end_io_direct_write(ip, offset, size);
398433
399434 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
400435 return -EIO;
401436
402
- if (size <= 0)
403
- return size;
437
+ if (error)
438
+ return error;
439
+ if (!size)
440
+ return 0;
404441
405442 /*
406443 * Capture amount written on completion as we can't reliably account
....@@ -408,10 +445,17 @@
408445 */
409446 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);
410447
448
+ /*
449
+ * We can allocate memory here while doing writeback on behalf of
450
+ * memory reclaim. To avoid memory allocation deadlocks set the
451
+ * task-wide nofs context for the following operations.
452
+ */
453
+ nofs_flag = memalloc_nofs_save();
454
+
411455 if (flags & IOMAP_DIO_COW) {
412456 error = xfs_reflink_end_cow(ip, offset, size);
413457 if (error)
414
- return error;
458
+ goto out;
415459 }
416460
417461 /*
....@@ -420,8 +464,10 @@
420464 * earlier allows a racing dio read to find unwritten extents before
421465 * they are converted.
422466 */
423
- if (flags & IOMAP_DIO_UNWRITTEN)
424
- return xfs_iomap_write_unwritten(ip, offset, size, true);
467
+ if (flags & IOMAP_DIO_UNWRITTEN) {
468
+ error = xfs_iomap_write_unwritten(ip, offset, size, true);
469
+ goto out;
470
+ }
425471
426472 /*
427473 * We need to update the in-core inode size here so that we don't end up
....@@ -443,8 +489,14 @@
443489 spin_unlock(&ip->i_flags_lock);
444490 }
445491
492
+out:
493
+ memalloc_nofs_restore(nofs_flag);
446494 return error;
447495 }
496
+
497
+static const struct iomap_dio_ops xfs_dio_write_ops = {
498
+ .end_io = xfs_dio_write_end_io,
499
+};
448500
449501 /*
450502 * xfs_file_dio_aio_write - handle direct IO writes
....@@ -485,8 +537,7 @@
485537 int unaligned_io = 0;
486538 int iolock;
487539 size_t count = iov_iter_count(from);
488
- struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
489
- mp->m_rtdev_targp : mp->m_ddev_targp;
540
+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
490541
491542 /* DIO must be aligned to device logical sector size */
492543 if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
....@@ -507,9 +558,9 @@
507558 * We can't properly handle unaligned direct I/O to reflink
508559 * files yet, as we can't unshare a partial block.
509560 */
510
- if (xfs_is_reflink_inode(ip)) {
561
+ if (xfs_is_cow_inode(ip)) {
511562 trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count);
512
- return -EREMCHG;
563
+ return -ENOTBLK;
513564 }
514565 iolock = XFS_IOLOCK_EXCL;
515566 } else {
....@@ -546,21 +597,19 @@
546597 }
547598
548599 trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
549
- ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
550
-
551600 /*
552
- * If unaligned, this is the only IO in-flight. If it has not yet
553
- * completed, wait on it before we release the iolock to prevent
554
- * subsequent overlapping IO.
601
+ * If unaligned, this is the only IO in-flight. Wait on it before we
602
+ * release the iolock to prevent subsequent overlapping IO.
555603 */
556
- if (ret == -EIOCBQUEUED && unaligned_io)
557
- inode_dio_wait(inode);
604
+ ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
605
+ &xfs_dio_write_ops,
606
+ is_sync_kiocb(iocb) || unaligned_io);
558607 out:
559608 xfs_iunlock(ip, iolock);
560609
561610 /*
562
- * No fallback to buffered IO on errors for XFS, direct IO will either
563
- * complete fully or fail.
611
+ * No fallback to buffered IO after short writes for XFS, direct I/O
612
+ * will either complete fully or return an error.
564613 */
565614 ASSERT(ret < 0 || ret == count);
566615 return ret;
....@@ -593,7 +642,7 @@
593642 count = iov_iter_count(from);
594643
595644 trace_xfs_file_dax_write(ip, count, pos);
596
- ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops);
645
+ ret = dax_iomap_rw(iocb, from, &xfs_direct_write_iomap_ops);
597646 if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
598647 i_size_write(inode, iocb->ki_pos);
599648 error = xfs_setfilesize(ip, pos, ret);
....@@ -640,7 +689,8 @@
640689 current->backing_dev_info = inode_to_bdi(inode);
641690
642691 trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos);
643
- ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
692
+ ret = iomap_file_buffered_write(iocb, from,
693
+ &xfs_buffered_write_iomap_ops);
644694 if (likely(ret >= 0))
645695 iocb->ki_pos += ret;
646696
....@@ -719,7 +769,7 @@
719769 * allow an operation to fall back to buffered mode.
720770 */
721771 ret = xfs_file_dio_aio_write(iocb, from);
722
- if (ret != -EREMCHG)
772
+ if (ret != -ENOTBLK)
723773 return ret;
724774 }
725775
....@@ -817,14 +867,42 @@
817867 if (error)
818868 goto out_unlock;
819869
870
+ /*
871
+ * Must wait for all AIO to complete before we continue as AIO can
872
+ * change the file size on completion without holding any locks we
873
+ * currently hold. We must do this first because AIO can update both
874
+ * the on disk and in memory inode sizes, and the operations that follow
875
+ * require the in-memory size to be fully up-to-date.
876
+ */
877
+ inode_dio_wait(inode);
878
+
879
+ /*
880
+ * Now AIO and DIO has drained we flush and (if necessary) invalidate
881
+ * the cached range over the first operation we are about to run.
882
+ *
883
+ * We care about zero and collapse here because they both run a hole
884
+ * punch over the range first. Because that can zero data, and the range
885
+ * of invalidation for the shift operations is much larger, we still do
886
+ * the required flush for collapse in xfs_prepare_shift().
887
+ *
888
+ * Insert has the same range requirements as collapse, and we extend the
889
+ * file first which can zero data. Hence insert has the same
890
+ * flush/invalidate requirements as collapse and so they are both
891
+ * handled at the right time by xfs_prepare_shift().
892
+ */
893
+ if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE |
894
+ FALLOC_FL_COLLAPSE_RANGE)) {
895
+ error = xfs_flush_unmap_range(ip, offset, len);
896
+ if (error)
897
+ goto out_unlock;
898
+ }
899
+
820900 if (mode & FALLOC_FL_PUNCH_HOLE) {
821901 error = xfs_free_file_space(ip, offset, len);
822902 if (error)
823903 goto out_unlock;
824904 } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
825
- unsigned int blksize_mask = i_blocksize(inode) - 1;
826
-
827
- if (offset & blksize_mask || len & blksize_mask) {
905
+ if (!xfs_is_falloc_aligned(ip, offset, len)) {
828906 error = -EINVAL;
829907 goto out_unlock;
830908 }
....@@ -844,10 +922,9 @@
844922 if (error)
845923 goto out_unlock;
846924 } else if (mode & FALLOC_FL_INSERT_RANGE) {
847
- unsigned int blksize_mask = i_blocksize(inode) - 1;
848925 loff_t isize = i_size_read(inode);
849926
850
- if (offset & blksize_mask || len & blksize_mask) {
927
+ if (!xfs_is_falloc_aligned(ip, offset, len)) {
851928 error = -EINVAL;
852929 goto out_unlock;
853930 }
....@@ -879,19 +956,48 @@
879956 goto out_unlock;
880957 }
881958
882
- if (mode & FALLOC_FL_ZERO_RANGE)
883
- error = xfs_zero_file_space(ip, offset, len);
884
- else {
885
- if (mode & FALLOC_FL_UNSHARE_RANGE) {
886
- error = xfs_reflink_unshare(ip, offset, len);
887
- if (error)
888
- goto out_unlock;
959
+ if (mode & FALLOC_FL_ZERO_RANGE) {
960
+ /*
961
+ * Punch a hole and prealloc the range. We use a hole
962
+ * punch rather than unwritten extent conversion for two
963
+ * reasons:
964
+ *
965
+ * 1.) Hole punch handles partial block zeroing for us.
966
+ * 2.) If prealloc returns ENOSPC, the file range is
967
+ * still zero-valued by virtue of the hole punch.
968
+ */
969
+ unsigned int blksize = i_blocksize(inode);
970
+
971
+ trace_xfs_zero_file_space(ip);
972
+
973
+ error = xfs_free_file_space(ip, offset, len);
974
+ if (error)
975
+ goto out_unlock;
976
+
977
+ len = round_up(offset + len, blksize) -
978
+ round_down(offset, blksize);
979
+ offset = round_down(offset, blksize);
980
+ } else if (mode & FALLOC_FL_UNSHARE_RANGE) {
981
+ error = xfs_reflink_unshare(ip, offset, len);
982
+ if (error)
983
+ goto out_unlock;
984
+ } else {
985
+ /*
986
+ * If always_cow mode we can't use preallocations and
987
+ * thus should not create them.
988
+ */
989
+ if (xfs_is_always_cow_inode(ip)) {
990
+ error = -EOPNOTSUPP;
991
+ goto out_unlock;
889992 }
993
+ }
994
+
995
+ if (!xfs_is_always_cow_inode(ip)) {
890996 error = xfs_alloc_file_space(ip, offset, len,
891997 XFS_BMAPI_PREALLOC);
998
+ if (error)
999
+ goto out_unlock;
8921000 }
893
- if (error)
894
- goto out_unlock;
8951001 }
8961002
8971003 if (file->f_flags & O_DSYNC)
....@@ -927,27 +1033,109 @@
9271033 }
9281034
9291035 STATIC int
930
-xfs_file_clone_range(
931
- struct file *file_in,
932
- loff_t pos_in,
933
- struct file *file_out,
934
- loff_t pos_out,
935
- u64 len)
1036
+xfs_file_fadvise(
1037
+ struct file *file,
1038
+ loff_t start,
1039
+ loff_t end,
1040
+ int advice)
9361041 {
937
- return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
938
- len, false);
1042
+ struct xfs_inode *ip = XFS_I(file_inode(file));
1043
+ int ret;
1044
+ int lockflags = 0;
1045
+
1046
+ /*
1047
+ * Operations creating pages in page cache need protection from hole
1048
+ * punching and similar ops
1049
+ */
1050
+ if (advice == POSIX_FADV_WILLNEED) {
1051
+ lockflags = XFS_IOLOCK_SHARED;
1052
+ xfs_ilock(ip, lockflags);
1053
+ }
1054
+ ret = generic_fadvise(file, start, end, advice);
1055
+ if (lockflags)
1056
+ xfs_iunlock(ip, lockflags);
1057
+ return ret;
9391058 }
9401059
941
-STATIC int
942
-xfs_file_dedupe_range(
943
- struct file *file_in,
944
- loff_t pos_in,
945
- struct file *file_out,
946
- loff_t pos_out,
947
- u64 len)
1060
+/* Does this file, inode, or mount want synchronous writes? */
1061
+static inline bool xfs_file_sync_writes(struct file *filp)
9481062 {
949
- return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
950
- len, true);
1063
+ struct xfs_inode *ip = XFS_I(file_inode(filp));
1064
+
1065
+ if (ip->i_mount->m_flags & XFS_MOUNT_WSYNC)
1066
+ return true;
1067
+ if (filp->f_flags & (__O_SYNC | O_DSYNC))
1068
+ return true;
1069
+ if (IS_SYNC(file_inode(filp)))
1070
+ return true;
1071
+
1072
+ return false;
1073
+}
1074
+
1075
+STATIC loff_t
1076
+xfs_file_remap_range(
1077
+ struct file *file_in,
1078
+ loff_t pos_in,
1079
+ struct file *file_out,
1080
+ loff_t pos_out,
1081
+ loff_t len,
1082
+ unsigned int remap_flags)
1083
+{
1084
+ struct inode *inode_in = file_inode(file_in);
1085
+ struct xfs_inode *src = XFS_I(inode_in);
1086
+ struct inode *inode_out = file_inode(file_out);
1087
+ struct xfs_inode *dest = XFS_I(inode_out);
1088
+ struct xfs_mount *mp = src->i_mount;
1089
+ loff_t remapped = 0;
1090
+ xfs_extlen_t cowextsize;
1091
+ int ret;
1092
+
1093
+ if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
1094
+ return -EINVAL;
1095
+
1096
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
1097
+ return -EOPNOTSUPP;
1098
+
1099
+ if (XFS_FORCED_SHUTDOWN(mp))
1100
+ return -EIO;
1101
+
1102
+ /* Prepare and then clone file data. */
1103
+ ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
1104
+ &len, remap_flags);
1105
+ if (ret || len == 0)
1106
+ return ret;
1107
+
1108
+ trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
1109
+
1110
+ ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len,
1111
+ &remapped);
1112
+ if (ret)
1113
+ goto out_unlock;
1114
+
1115
+ /*
1116
+ * Carry the cowextsize hint from src to dest if we're sharing the
1117
+ * entire source file to the entire destination file, the source file
1118
+ * has a cowextsize hint, and the destination file does not.
1119
+ */
1120
+ cowextsize = 0;
1121
+ if (pos_in == 0 && len == i_size_read(inode_in) &&
1122
+ (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
1123
+ pos_out == 0 && len >= i_size_read(inode_out) &&
1124
+ !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
1125
+ cowextsize = src->i_d.di_cowextsize;
1126
+
1127
+ ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
1128
+ remap_flags);
1129
+ if (ret)
1130
+ goto out_unlock;
1131
+
1132
+ if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out))
1133
+ xfs_log_force_inode(dest);
1134
+out_unlock:
1135
+ xfs_iunlock2_io_mmap(src, dest);
1136
+ if (ret)
1137
+ trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
1138
+ return remapped > 0 ? remapped : ret;
9511139 }
9521140
9531141 STATIC int
....@@ -959,7 +1147,7 @@
9591147 return -EFBIG;
9601148 if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
9611149 return -EIO;
962
- file->f_mode |= FMODE_NOWAIT;
1150
+ file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
9631151 return 0;
9641152 }
9651153
....@@ -981,8 +1169,8 @@
9811169 * certain to have the next operation be a read there.
9821170 */
9831171 mode = xfs_ilock_data_map_shared(ip);
984
- if (ip->i_d.di_nextents > 0)
985
- error = xfs_dir3_data_readahead(ip, 0, -1);
1172
+ if (ip->i_df.if_nextents > 0)
1173
+ error = xfs_dir3_data_readahead(ip, 0, 0);
9861174 xfs_iunlock(ip, mode);
9871175 return error;
9881176 }
....@@ -1036,10 +1224,10 @@
10361224 default:
10371225 return generic_file_llseek(file, offset, whence);
10381226 case SEEK_HOLE:
1039
- offset = iomap_seek_hole(inode, offset, &xfs_iomap_ops);
1227
+ offset = iomap_seek_hole(inode, offset, &xfs_seek_iomap_ops);
10401228 break;
10411229 case SEEK_DATA:
1042
- offset = iomap_seek_data(inode, offset, &xfs_iomap_ops);
1230
+ offset = iomap_seek_data(inode, offset, &xfs_seek_iomap_ops);
10431231 break;
10441232 }
10451233
....@@ -1052,7 +1240,7 @@
10521240 * Locking for serialisation of IO during page faults. This results in a lock
10531241 * ordering of:
10541242 *
1055
- * mmap_sem (MM)
1243
+ * mmap_lock (MM)
10561244 * sb_start_pagefault(vfs, freeze)
10571245 * i_mmaplock (XFS - truncate serialisation)
10581246 * page_lock (MM)
....@@ -1079,12 +1267,16 @@
10791267 if (IS_DAX(inode)) {
10801268 pfn_t pfn;
10811269
1082
- ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, &xfs_iomap_ops);
1270
+ ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL,
1271
+ (write_fault && !vmf->cow_page) ?
1272
+ &xfs_direct_write_iomap_ops :
1273
+ &xfs_read_iomap_ops);
10831274 if (ret & VM_FAULT_NEEDDSYNC)
10841275 ret = dax_finish_sync_fault(vmf, pe_size, pfn);
10851276 } else {
10861277 if (write_fault)
1087
- ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
1278
+ ret = iomap_page_mkwrite(vmf,
1279
+ &xfs_buffered_write_iomap_ops);
10881280 else
10891281 ret = filemap_fault(vmf);
10901282 }
....@@ -1146,29 +1338,47 @@
11461338 return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
11471339 }
11481340
1341
+static vm_fault_t
1342
+xfs_filemap_map_pages(
1343
+ struct vm_fault *vmf,
1344
+ pgoff_t start_pgoff,
1345
+ pgoff_t end_pgoff)
1346
+{
1347
+ struct inode *inode = file_inode(vmf->vma->vm_file);
1348
+ vm_fault_t ret;
1349
+
1350
+ xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
1351
+ ret = filemap_map_pages(vmf, start_pgoff, end_pgoff);
1352
+ xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
1353
+ return ret;
1354
+}
1355
+
11491356 static const struct vm_operations_struct xfs_file_vm_ops = {
11501357 .fault = xfs_filemap_fault,
11511358 .huge_fault = xfs_filemap_huge_fault,
1152
- .map_pages = filemap_map_pages,
1359
+ .map_pages = xfs_filemap_map_pages,
11531360 .page_mkwrite = xfs_filemap_page_mkwrite,
11541361 .pfn_mkwrite = xfs_filemap_pfn_mkwrite,
11551362 };
11561363
11571364 STATIC int
11581365 xfs_file_mmap(
1159
- struct file *filp,
1160
- struct vm_area_struct *vma)
1366
+ struct file *file,
1367
+ struct vm_area_struct *vma)
11611368 {
1369
+ struct inode *inode = file_inode(file);
1370
+ struct xfs_buftarg *target = xfs_inode_buftarg(XFS_I(inode));
1371
+
11621372 /*
1163
- * We don't support synchronous mappings for non-DAX files. At least
1164
- * until someone comes with a sensible use case.
1373
+ * We don't support synchronous mappings for non-DAX files and
1374
+ * for DAX files if underneath dax_device is not synchronous.
11651375 */
1166
- if (!IS_DAX(file_inode(filp)) && (vma->vm_flags & VM_SYNC))
1376
+ if (!daxdev_mapping_supported(vma, target->bt_daxdev))
11671377 return -EOPNOTSUPP;
11681378
1169
- file_accessed(filp);
1379
+ file_accessed(file);
11701380 vma->vm_ops = &xfs_file_vm_ops;
1171
- if (IS_DAX(file_inode(filp)))
1381
+ if (IS_DAX(inode))
11721382 vma->vm_flags |= VM_HUGEPAGE;
11731383 return 0;
11741384 }
....@@ -1179,6 +1389,7 @@
11791389 .write_iter = xfs_file_write_iter,
11801390 .splice_read = generic_file_splice_read,
11811391 .splice_write = iter_file_splice_write,
1392
+ .iopoll = iomap_dio_iopoll,
11821393 .unlocked_ioctl = xfs_file_ioctl,
11831394 #ifdef CONFIG_COMPAT
11841395 .compat_ioctl = xfs_file_compat_ioctl,
....@@ -1190,8 +1401,8 @@
11901401 .fsync = xfs_file_fsync,
11911402 .get_unmapped_area = thp_get_unmapped_area,
11921403 .fallocate = xfs_file_fallocate,
1193
- .clone_file_range = xfs_file_clone_range,
1194
- .dedupe_file_range = xfs_file_dedupe_range,
1404
+ .fadvise = xfs_file_fadvise,
1405
+ .remap_file_range = xfs_file_remap_range,
11951406 };
11961407
11971408 const struct file_operations xfs_dir_file_operations = {