~hc/RK356X_SDK_RELEASE.git

..	..	@@ -10,14 +10,11 @@
10	10	#include "xfs_log_format.h"
11	11	#include "xfs_trans_resv.h"
12	12	#include "xfs_mount.h"
13		-#include "xfs_da_format.h"
14		-#include "xfs_da_btree.h"
15	13	#include "xfs_inode.h"
16	14	#include "xfs_trans.h"
17	15	#include "xfs_inode_item.h"
18	16	#include "xfs_bmap.h"
19	17	#include "xfs_bmap_util.h"
20		-#include "xfs_error.h"
21	18	#include "xfs_dir2.h"
22	19	#include "xfs_dir2_priv.h"
23	20	#include "xfs_ioctl.h"
..	..	@@ -28,13 +25,45 @@
28	25	#include "xfs_iomap.h"
29	26	#include "xfs_reflink.h"
30	27
31		-#include <linux/dcache.h>
32	28	#include <linux/falloc.h>
33		-#include <linux/pagevec.h>
34	29	#include <linux/backing-dev.h>
35	30	#include <linux/mman.h>
	31	+#include <linux/fadvise.h>
36	32
37	33	static const struct vm_operations_struct xfs_file_vm_ops;
	34	+
	35	+/*
	36	+ * Decide if the given file range is aligned to the size of the fundamental
	37	+ * allocation unit for the file.
	38	+ */
	39	+static bool
	40	+xfs_is_falloc_aligned(
	41	+ struct xfs_inode *ip,
	42	+ loff_t pos,
	43	+ long long int len)
	44	+{
	45	+ struct xfs_mount *mp = ip->i_mount;
	46	+ uint64_t mask;
	47	+
	48	+ if (XFS_IS_REALTIME_INODE(ip)) {
	49	+ if (!is_power_of_2(mp->m_sb.sb_rextsize)) {
	50	+ u64 rextbytes;
	51	+ u32 mod;
	52	+
	53	+ rextbytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize);
	54	+ div_u64_rem(pos, rextbytes, &mod);
	55	+ if (mod)
	56	+ return false;
	57	+ div_u64_rem(len, rextbytes, &mod);
	58	+ return mod == 0;
	59	+ }
	60	+ mask = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize) - 1;
	61	+ } else {
	62	+ mask = mp->m_sb.sb_blocksize - 1;
	63	+ }
	64	+
	65	+ return !((pos \| len) & mask);
	66	+}
38	67
39	68	int
40	69	xfs_update_prealloc_flags(
..	..	@@ -65,8 +94,6 @@
65	94	ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
66	95
67	96	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
68		- if (flags & XFS_PREALLOC_SYNC)
69		- xfs_trans_set_sync(tp);
70	97	return xfs_trans_commit(tp);
71	98	}
72	99
..	..	@@ -84,19 +111,57 @@
84	111	int datasync)
85	112	{
86	113	struct xfs_inode *ip = XFS_I(file->f_mapping->host);
87		- struct xfs_mount *mp = ip->i_mount;
88		- xfs_lsn_t lsn = 0;
89	114
90	115	trace_xfs_dir_fsync(ip);
	116	+ return xfs_log_force_inode(ip);
	117	+}
	118	+
	119	+static xfs_csn_t
	120	+xfs_fsync_seq(
	121	+ struct xfs_inode *ip,
	122	+ bool datasync)
	123	+{
	124	+ if (!xfs_ipincount(ip))
	125	+ return 0;
	126	+ if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
	127	+ return 0;
	128	+ return ip->i_itemp->ili_commit_seq;
	129	+}
	130	+
	131	+/*
	132	+ * All metadata updates are logged, which means that we just have to flush the
	133	+ * log up to the latest LSN that touched the inode.
	134	+ *
	135	+ * If we have concurrent fsync/fdatasync() calls, we need them to all block on
	136	+ * the log force before we clear the ili_fsync_fields field. This ensures that
	137	+ * we don't get a racing sync operation that does not wait for the metadata to
	138	+ * hit the journal before returning. If we race with clearing ili_fsync_fields,
	139	+ * then all that will happen is the log force will do nothing as the lsn will
	140	+ * already be on disk. We can't race with setting ili_fsync_fields because that
	141	+ * is done under XFS_ILOCK_EXCL, and that can't happen because we hold the lock
	142	+ * shared until after the ili_fsync_fields is cleared.
	143	+ */
	144	+static int
	145	+xfs_fsync_flush_log(
	146	+ struct xfs_inode *ip,
	147	+ bool datasync,
	148	+ int *log_flushed)
	149	+{
	150	+ int error = 0;
	151	+ xfs_csn_t seq;
91	152
92	153	xfs_ilock(ip, XFS_ILOCK_SHARED);
93		- if (xfs_ipincount(ip))
94		- lsn = ip->i_itemp->ili_last_lsn;
95		- xfs_iunlock(ip, XFS_ILOCK_SHARED);
	154	+ seq = xfs_fsync_seq(ip, datasync);
	155	+ if (seq) {
	156	+ error = xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC,
	157	+ log_flushed);
96	158
97		- if (!lsn)
98		- return 0;
99		- return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
	159	+ spin_lock(&ip->i_itemp->ili_lock);
	160	+ ip->i_itemp->ili_fsync_fields = 0;
	161	+ spin_unlock(&ip->i_itemp->ili_lock);
	162	+ }
	163	+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
	164	+ return error;
100	165	}
101	166
102	167	STATIC int
..	..	@@ -106,12 +171,10 @@
106	171	loff_t end,
107	172	int datasync)
108	173	{
109		- struct inode *inode = file->f_mapping->host;
110		- struct xfs_inode *ip = XFS_I(inode);
	174	+ struct xfs_inode *ip = XFS_I(file->f_mapping->host);
111	175	struct xfs_mount *mp = ip->i_mount;
112	176	int error = 0;
113	177	int log_flushed = 0;
114		- xfs_lsn_t lsn = 0;
115	178
116	179	trace_xfs_file_fsync(ip);
117	180
..	..	@@ -135,31 +198,7 @@
135	198	else if (mp->m_logdev_targp != mp->m_ddev_targp)
136	199	xfs_blkdev_issue_flush(mp->m_ddev_targp);
137	200
138		- /*
139		- * All metadata updates are logged, which means that we just have to
140		- * flush the log up to the latest LSN that touched the inode. If we have
141		- * concurrent fsync/fdatasync() calls, we need them to all block on the
142		- * log force before we clear the ili_fsync_fields field. This ensures
143		- * that we don't get a racing sync operation that does not wait for the
144		- * metadata to hit the journal before returning. If we race with
145		- * clearing the ili_fsync_fields, then all that will happen is the log
146		- * force will do nothing as the lsn will already be on disk. We can't
147		- * race with setting ili_fsync_fields because that is done under
148		- * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared
149		- * until after the ili_fsync_fields is cleared.
150		- */
151		- xfs_ilock(ip, XFS_ILOCK_SHARED);
152		- if (xfs_ipincount(ip)) {
153		- if (!datasync \|\|
154		- (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
155		- lsn = ip->i_itemp->ili_last_lsn;
156		- }
157		-
158		- if (lsn) {
159		- error = xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
160		- ip->i_itemp->ili_fsync_fields = 0;
161		- }
162		- xfs_iunlock(ip, XFS_ILOCK_SHARED);
	201	+ error = xfs_fsync_flush_log(ip, datasync, &log_flushed);
163	202
164	203	/*
165	204	* If we only have a single device, and the log force about was
..	..	@@ -191,8 +230,14 @@
191	230
192	231	file_accessed(iocb->ki_filp);
193	232
194		- xfs_ilock(ip, XFS_IOLOCK_SHARED);
195		- ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL);
	233	+ if (iocb->ki_flags & IOCB_NOWAIT) {
	234	+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
	235	+ return -EAGAIN;
	236	+ } else {
	237	+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
	238	+ }
	239	+ ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL,
	240	+ is_sync_kiocb(iocb));
196	241	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
197	242
198	243	return ret;
..	..	@@ -219,7 +264,7 @@
219	264	xfs_ilock(ip, XFS_IOLOCK_SHARED);
220	265	}
221	266
222		- ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
	267	+ ret = dax_iomap_rw(iocb, to, &xfs_read_iomap_ops);
223	268	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
224	269
225	270	file_accessed(iocb->ki_filp);
..	..	@@ -355,7 +400,7 @@
355	400
356	401	trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
357	402	error = iomap_zero_range(inode, isize, iocb->ki_pos - isize,
358		- NULL, &xfs_iomap_ops);
	403	+ NULL, &xfs_buffered_write_iomap_ops);
359	404	if (error)
360	405	return error;
361	406	} else
..	..	@@ -367,40 +412,30 @@
367	412	* lock above. Eventually we should look into a way to avoid
368	413	* the pointless lock roundtrip.
369	414	*/
370		- if (likely(!(file->f_mode & FMODE_NOCMTIME))) {
371		- error = file_update_time(file);
372		- if (error)
373		- return error;
374		- }
375		-
376		- /*
377		- * If we're writing the file then make sure to clear the setuid and
378		- * setgid bits if the process is not being run by root. This keeps
379		- * people from modifying setuid and setgid binaries.
380		- */
381		- if (!IS_NOSEC(inode))
382		- return file_remove_privs(file);
383		- return 0;
	415	+ return file_modified(file);
384	416	}
385	417
386	418	static int
387	419	xfs_dio_write_end_io(
388	420	struct kiocb *iocb,
389	421	ssize_t size,
	422	+ int error,
390	423	unsigned flags)
391	424	{
392	425	struct inode *inode = file_inode(iocb->ki_filp);
393	426	struct xfs_inode *ip = XFS_I(inode);
394	427	loff_t offset = iocb->ki_pos;
395		- int error = 0;
	428	+ unsigned int nofs_flag;
396	429
397	430	trace_xfs_end_io_direct_write(ip, offset, size);
398	431
399	432	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
400	433	return -EIO;
401	434
402		- if (size <= 0)
403		- return size;
	435	+ if (error)
	436	+ return error;
	437	+ if (!size)
	438	+ return 0;
404	439
405	440	/*
406	441	* Capture amount written on completion as we can't reliably account
..	..	@@ -408,10 +443,17 @@
408	443	*/
409	444	XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);
410	445
	446	+ /*
	447	+ * We can allocate memory here while doing writeback on behalf of
	448	+ * memory reclaim. To avoid memory allocation deadlocks set the
	449	+ * task-wide nofs context for the following operations.
	450	+ */
	451	+ nofs_flag = memalloc_nofs_save();
	452	+
411	453	if (flags & IOMAP_DIO_COW) {
412	454	error = xfs_reflink_end_cow(ip, offset, size);
413	455	if (error)
414		- return error;
	456	+ goto out;
415	457	}
416	458
417	459	/*
..	..	@@ -420,8 +462,10 @@
420	462	* earlier allows a racing dio read to find unwritten extents before
421	463	* they are converted.
422	464	*/
423		- if (flags & IOMAP_DIO_UNWRITTEN)
424		- return xfs_iomap_write_unwritten(ip, offset, size, true);
	465	+ if (flags & IOMAP_DIO_UNWRITTEN) {
	466	+ error = xfs_iomap_write_unwritten(ip, offset, size, true);
	467	+ goto out;
	468	+ }
425	469
426	470	/*
427	471	* We need to update the in-core inode size here so that we don't end up
..	..	@@ -443,8 +487,14 @@
443	487	spin_unlock(&ip->i_flags_lock);
444	488	}
445	489
	490	+out:
	491	+ memalloc_nofs_restore(nofs_flag);
446	492	return error;
447	493	}
	494	+
	495	+static const struct iomap_dio_ops xfs_dio_write_ops = {
	496	+ .end_io = xfs_dio_write_end_io,
	497	+};
448	498
449	499	/*
450	500	* xfs_file_dio_aio_write - handle direct IO writes
..	..	@@ -485,8 +535,7 @@
485	535	int unaligned_io = 0;
486	536	int iolock;
487	537	size_t count = iov_iter_count(from);
488		- struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
489		- mp->m_rtdev_targp : mp->m_ddev_targp;
	538	+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
490	539
491	540	/* DIO must be aligned to device logical sector size */
492	541	if ((iocb->ki_pos \| count) & target->bt_logical_sectormask)
..	..	@@ -507,9 +556,9 @@
507	556	* We can't properly handle unaligned direct I/O to reflink
508	557	* files yet, as we can't unshare a partial block.
509	558	*/
510		- if (xfs_is_reflink_inode(ip)) {
	559	+ if (xfs_is_cow_inode(ip)) {
511	560	trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count);
512		- return -EREMCHG;
	561	+ return -ENOTBLK;
513	562	}
514	563	iolock = XFS_IOLOCK_EXCL;
515	564	} else {
..	..	@@ -546,21 +595,19 @@
546	595	}
547	596
548	597	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
549		- ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
550		-
551	598	/*
552		- * If unaligned, this is the only IO in-flight. If it has not yet
553		- * completed, wait on it before we release the iolock to prevent
554		- * subsequent overlapping IO.
	599	+ * If unaligned, this is the only IO in-flight. Wait on it before we
	600	+ * release the iolock to prevent subsequent overlapping IO.
555	601	*/
556		- if (ret == -EIOCBQUEUED && unaligned_io)
557		- inode_dio_wait(inode);
	602	+ ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
	603	+ &xfs_dio_write_ops,
	604	+ is_sync_kiocb(iocb) \|\| unaligned_io);
558	605	out:
559	606	xfs_iunlock(ip, iolock);
560	607
561	608	/*
562		- * No fallback to buffered IO on errors for XFS, direct IO will either
563		- * complete fully or fail.
	609	+ * No fallback to buffered IO after short writes for XFS, direct I/O
	610	+ * will either complete fully or return an error.
564	611	*/
565	612	ASSERT(ret < 0 \|\| ret == count);
566	613	return ret;
..	..	@@ -593,7 +640,7 @@
593	640	count = iov_iter_count(from);
594	641
595	642	trace_xfs_file_dax_write(ip, count, pos);
596		- ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops);
	643	+ ret = dax_iomap_rw(iocb, from, &xfs_direct_write_iomap_ops);
597	644	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
598	645	i_size_write(inode, iocb->ki_pos);
599	646	error = xfs_setfilesize(ip, pos, ret);
..	..	@@ -640,7 +687,8 @@
640	687	current->backing_dev_info = inode_to_bdi(inode);
641	688
642	689	trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos);
643		- ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
	690	+ ret = iomap_file_buffered_write(iocb, from,
	691	+ &xfs_buffered_write_iomap_ops);
644	692	if (likely(ret >= 0))
645	693	iocb->ki_pos += ret;
646	694
..	..	@@ -719,7 +767,7 @@
719	767	* allow an operation to fall back to buffered mode.
720	768	*/
721	769	ret = xfs_file_dio_aio_write(iocb, from);
722		- if (ret != -EREMCHG)
	770	+ if (ret != -ENOTBLK)
723	771	return ret;
724	772	}
725	773
..	..	@@ -802,7 +850,6 @@
802	850	struct inode *inode = file_inode(file);
803	851	struct xfs_inode *ip = XFS_I(inode);
804	852	long error;
805		- enum xfs_prealloc_flags flags = 0;
806	853	uint iolock = XFS_IOLOCK_EXCL \| XFS_MMAPLOCK_EXCL;
807	854	loff_t new_size = 0;
808	855	bool do_file_insert = false;
..	..	@@ -817,14 +864,46 @@
817	864	if (error)
818	865	goto out_unlock;
819	866
	867	+ /*
	868	+ * Must wait for all AIO to complete before we continue as AIO can
	869	+ * change the file size on completion without holding any locks we
	870	+ * currently hold. We must do this first because AIO can update both
	871	+ * the on disk and in memory inode sizes, and the operations that follow
	872	+ * require the in-memory size to be fully up-to-date.
	873	+ */
	874	+ inode_dio_wait(inode);
	875	+
	876	+ /*
	877	+ * Now AIO and DIO has drained we flush and (if necessary) invalidate
	878	+ * the cached range over the first operation we are about to run.
	879	+ *
	880	+ * We care about zero and collapse here because they both run a hole
	881	+ * punch over the range first. Because that can zero data, and the range
	882	+ * of invalidation for the shift operations is much larger, we still do
	883	+ * the required flush for collapse in xfs_prepare_shift().
	884	+ *
	885	+ * Insert has the same range requirements as collapse, and we extend the
	886	+ * file first which can zero data. Hence insert has the same
	887	+ * flush/invalidate requirements as collapse and so they are both
	888	+ * handled at the right time by xfs_prepare_shift().
	889	+ */
	890	+ if (mode & (FALLOC_FL_PUNCH_HOLE \| FALLOC_FL_ZERO_RANGE \|
	891	+ FALLOC_FL_COLLAPSE_RANGE)) {
	892	+ error = xfs_flush_unmap_range(ip, offset, len);
	893	+ if (error)
	894	+ goto out_unlock;
	895	+ }
	896	+
	897	+ error = file_modified(file);
	898	+ if (error)
	899	+ goto out_unlock;
	900	+
820	901	if (mode & FALLOC_FL_PUNCH_HOLE) {
821	902	error = xfs_free_file_space(ip, offset, len);
822	903	if (error)
823	904	goto out_unlock;
824	905	} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
825		- unsigned int blksize_mask = i_blocksize(inode) - 1;
826		-
827		- if (offset & blksize_mask \|\| len & blksize_mask) {
	906	+ if (!xfs_is_falloc_aligned(ip, offset, len)) {
828	907	error = -EINVAL;
829	908	goto out_unlock;
830	909	}
..	..	@@ -844,10 +923,9 @@
844	923	if (error)
845	924	goto out_unlock;
846	925	} else if (mode & FALLOC_FL_INSERT_RANGE) {
847		- unsigned int blksize_mask = i_blocksize(inode) - 1;
848	926	loff_t isize = i_size_read(inode);
849	927
850		- if (offset & blksize_mask \|\| len & blksize_mask) {
	928	+ if (!xfs_is_falloc_aligned(ip, offset, len)) {
851	929	error = -EINVAL;
852	930	goto out_unlock;
853	931	}
..	..	@@ -869,8 +947,6 @@
869	947	}
870	948	do_file_insert = true;
871	949	} else {
872		- flags \|= XFS_PREALLOC_SET;
873		-
874	950	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
875	951	offset + len > i_size_read(inode)) {
876	952	new_size = offset + len;
..	..	@@ -879,27 +955,49 @@
879	955	goto out_unlock;
880	956	}
881	957
882		- if (mode & FALLOC_FL_ZERO_RANGE)
883		- error = xfs_zero_file_space(ip, offset, len);
884		- else {
885		- if (mode & FALLOC_FL_UNSHARE_RANGE) {
886		- error = xfs_reflink_unshare(ip, offset, len);
887		- if (error)
888		- goto out_unlock;
	958	+ if (mode & FALLOC_FL_ZERO_RANGE) {
	959	+ /*
	960	+ * Punch a hole and prealloc the range. We use a hole
	961	+ * punch rather than unwritten extent conversion for two
	962	+ * reasons:
	963	+ *
	964	+ * 1.) Hole punch handles partial block zeroing for us.
	965	+ * 2.) If prealloc returns ENOSPC, the file range is
	966	+ * still zero-valued by virtue of the hole punch.
	967	+ */
	968	+ unsigned int blksize = i_blocksize(inode);
	969	+
	970	+ trace_xfs_zero_file_space(ip);
	971	+
	972	+ error = xfs_free_file_space(ip, offset, len);
	973	+ if (error)
	974	+ goto out_unlock;
	975	+
	976	+ len = round_up(offset + len, blksize) -
	977	+ round_down(offset, blksize);
	978	+ offset = round_down(offset, blksize);
	979	+ } else if (mode & FALLOC_FL_UNSHARE_RANGE) {
	980	+ error = xfs_reflink_unshare(ip, offset, len);
	981	+ if (error)
	982	+ goto out_unlock;
	983	+ } else {
	984	+ /*
	985	+ * If always_cow mode we can't use preallocations and
	986	+ * thus should not create them.
	987	+ */
	988	+ if (xfs_is_always_cow_inode(ip)) {
	989	+ error = -EOPNOTSUPP;
	990	+ goto out_unlock;
889	991	}
	992	+ }
	993	+
	994	+ if (!xfs_is_always_cow_inode(ip)) {
890	995	error = xfs_alloc_file_space(ip, offset, len,
891	996	XFS_BMAPI_PREALLOC);
	997	+ if (error)
	998	+ goto out_unlock;
892	999	}
893		- if (error)
894		- goto out_unlock;
895	1000	}
896		-
897		- if (file->f_flags & O_DSYNC)
898		- flags \|= XFS_PREALLOC_SYNC;
899		-
900		- error = xfs_update_prealloc_flags(ip, flags);
901		- if (error)
902		- goto out_unlock;
903	1001
904	1002	/* Change file size if needed */
905	1003	if (new_size) {
..	..	@@ -918,8 +1016,14 @@
918	1016	* leave shifted extents past EOF and hence losing access to
919	1017	* the data that is contained within them.
920	1018	*/
921		- if (do_file_insert)
	1019	+ if (do_file_insert) {
922	1020	error = xfs_insert_file_space(ip, offset, len);
	1021	+ if (error)
	1022	+ goto out_unlock;
	1023	+ }
	1024	+
	1025	+ if (file->f_flags & O_DSYNC)
	1026	+ error = xfs_log_force_inode(ip);
923	1027
924	1028	out_unlock:
925	1029	xfs_iunlock(ip, iolock);
..	..	@@ -927,27 +1031,109 @@
927	1031	}
928	1032
929	1033	STATIC int
930		-xfs_file_clone_range(
931		- struct file *file_in,
932		- loff_t pos_in,
933		- struct file *file_out,
934		- loff_t pos_out,
935		- u64 len)
	1034	+xfs_file_fadvise(
	1035	+ struct file *file,
	1036	+ loff_t start,
	1037	+ loff_t end,
	1038	+ int advice)
936	1039	{
937		- return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
938		- len, false);
	1040	+ struct xfs_inode *ip = XFS_I(file_inode(file));
	1041	+ int ret;
	1042	+ int lockflags = 0;
	1043	+
	1044	+ /*
	1045	+ * Operations creating pages in page cache need protection from hole
	1046	+ * punching and similar ops
	1047	+ */
	1048	+ if (advice == POSIX_FADV_WILLNEED) {
	1049	+ lockflags = XFS_IOLOCK_SHARED;
	1050	+ xfs_ilock(ip, lockflags);
	1051	+ }
	1052	+ ret = generic_fadvise(file, start, end, advice);
	1053	+ if (lockflags)
	1054	+ xfs_iunlock(ip, lockflags);
	1055	+ return ret;
939	1056	}
940	1057
941		-STATIC int
942		-xfs_file_dedupe_range(
943		- struct file *file_in,
944		- loff_t pos_in,
945		- struct file *file_out,
946		- loff_t pos_out,
947		- u64 len)
	1058	+/* Does this file, inode, or mount want synchronous writes? */
	1059	+static inline bool xfs_file_sync_writes(struct file *filp)
948	1060	{
949		- return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
950		- len, true);
	1061	+ struct xfs_inode *ip = XFS_I(file_inode(filp));
	1062	+
	1063	+ if (ip->i_mount->m_flags & XFS_MOUNT_WSYNC)
	1064	+ return true;
	1065	+ if (filp->f_flags & (__O_SYNC \| O_DSYNC))
	1066	+ return true;
	1067	+ if (IS_SYNC(file_inode(filp)))
	1068	+ return true;
	1069	+
	1070	+ return false;
	1071	+}
	1072	+
	1073	+STATIC loff_t
	1074	+xfs_file_remap_range(
	1075	+ struct file *file_in,
	1076	+ loff_t pos_in,
	1077	+ struct file *file_out,
	1078	+ loff_t pos_out,
	1079	+ loff_t len,
	1080	+ unsigned int remap_flags)
	1081	+{
	1082	+ struct inode *inode_in = file_inode(file_in);
	1083	+ struct xfs_inode *src = XFS_I(inode_in);
	1084	+ struct inode *inode_out = file_inode(file_out);
	1085	+ struct xfs_inode *dest = XFS_I(inode_out);
	1086	+ struct xfs_mount *mp = src->i_mount;
	1087	+ loff_t remapped = 0;
	1088	+ xfs_extlen_t cowextsize;
	1089	+ int ret;
	1090	+
	1091	+ if (remap_flags & ~(REMAP_FILE_DEDUP \| REMAP_FILE_ADVISORY))
	1092	+ return -EINVAL;
	1093	+
	1094	+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
	1095	+ return -EOPNOTSUPP;
	1096	+
	1097	+ if (XFS_FORCED_SHUTDOWN(mp))
	1098	+ return -EIO;
	1099	+
	1100	+ /* Prepare and then clone file data. */
	1101	+ ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
	1102	+ &len, remap_flags);
	1103	+ if (ret \|\| len == 0)
	1104	+ return ret;
	1105	+
	1106	+ trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
	1107	+
	1108	+ ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len,
	1109	+ &remapped);
	1110	+ if (ret)
	1111	+ goto out_unlock;
	1112	+
	1113	+ /*
	1114	+ * Carry the cowextsize hint from src to dest if we're sharing the
	1115	+ * entire source file to the entire destination file, the source file
	1116	+ * has a cowextsize hint, and the destination file does not.
	1117	+ */
	1118	+ cowextsize = 0;
	1119	+ if (pos_in == 0 && len == i_size_read(inode_in) &&
	1120	+ (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
	1121	+ pos_out == 0 && len >= i_size_read(inode_out) &&
	1122	+ !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
	1123	+ cowextsize = src->i_d.di_cowextsize;
	1124	+
	1125	+ ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
	1126	+ remap_flags);
	1127	+ if (ret)
	1128	+ goto out_unlock;
	1129	+
	1130	+ if (xfs_file_sync_writes(file_in) \|\| xfs_file_sync_writes(file_out))
	1131	+ xfs_log_force_inode(dest);
	1132	+out_unlock:
	1133	+ xfs_iunlock2_io_mmap(src, dest);
	1134	+ if (ret)
	1135	+ trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
	1136	+ return remapped > 0 ? remapped : ret;
951	1137	}
952	1138
953	1139	STATIC int
..	..	@@ -959,7 +1145,7 @@
959	1145	return -EFBIG;
960	1146	if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
961	1147	return -EIO;
962		- file->f_mode \|= FMODE_NOWAIT;
	1148	+ file->f_mode \|= FMODE_NOWAIT \| FMODE_BUF_RASYNC;
963	1149	return 0;
964	1150	}
965	1151
..	..	@@ -981,8 +1167,8 @@
981	1167	* certain to have the next operation be a read there.
982	1168	*/
983	1169	mode = xfs_ilock_data_map_shared(ip);
984		- if (ip->i_d.di_nextents > 0)
985		- error = xfs_dir3_data_readahead(ip, 0, -1);
	1170	+ if (ip->i_df.if_nextents > 0)
	1171	+ error = xfs_dir3_data_readahead(ip, 0, 0);
986	1172	xfs_iunlock(ip, mode);
987	1173	return error;
988	1174	}
..	..	@@ -1036,10 +1222,10 @@
1036	1222	default:
1037	1223	return generic_file_llseek(file, offset, whence);
1038	1224	case SEEK_HOLE:
1039		- offset = iomap_seek_hole(inode, offset, &xfs_iomap_ops);
	1225	+ offset = iomap_seek_hole(inode, offset, &xfs_seek_iomap_ops);
1040	1226	break;
1041	1227	case SEEK_DATA:
1042		- offset = iomap_seek_data(inode, offset, &xfs_iomap_ops);
	1228	+ offset = iomap_seek_data(inode, offset, &xfs_seek_iomap_ops);
1043	1229	break;
1044	1230	}
1045	1231
..	..	@@ -1052,7 +1238,7 @@
1052	1238	* Locking for serialisation of IO during page faults. This results in a lock
1053	1239	* ordering of:
1054	1240	*
1055		- * mmap_sem (MM)
	1241	+ * mmap_lock (MM)
1056	1242	* sb_start_pagefault(vfs, freeze)
1057	1243	* i_mmaplock (XFS - truncate serialisation)
1058	1244	* page_lock (MM)
..	..	@@ -1079,12 +1265,16 @@
1079	1265	if (IS_DAX(inode)) {
1080	1266	pfn_t pfn;
1081	1267
1082		- ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, &xfs_iomap_ops);
	1268	+ ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL,
	1269	+ (write_fault && !vmf->cow_page) ?
	1270	+ &xfs_direct_write_iomap_ops :
	1271	+ &xfs_read_iomap_ops);
1083	1272	if (ret & VM_FAULT_NEEDDSYNC)
1084	1273	ret = dax_finish_sync_fault(vmf, pe_size, pfn);
1085	1274	} else {
1086	1275	if (write_fault)
1087		- ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
	1276	+ ret = iomap_page_mkwrite(vmf,
	1277	+ &xfs_buffered_write_iomap_ops);
1088	1278	else
1089	1279	ret = filemap_fault(vmf);
1090	1280	}
..	..	@@ -1146,29 +1336,47 @@
1146	1336	return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
1147	1337	}
1148	1338
	1339	+static vm_fault_t
	1340	+xfs_filemap_map_pages(
	1341	+ struct vm_fault *vmf,
	1342	+ pgoff_t start_pgoff,
	1343	+ pgoff_t end_pgoff)
	1344	+{
	1345	+ struct inode *inode = file_inode(vmf->vma->vm_file);
	1346	+ vm_fault_t ret;
	1347	+
	1348	+ xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
	1349	+ ret = filemap_map_pages(vmf, start_pgoff, end_pgoff);
	1350	+ xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
	1351	+ return ret;
	1352	+}
	1353	+
1149	1354	static const struct vm_operations_struct xfs_file_vm_ops = {
1150	1355	.fault = xfs_filemap_fault,
1151	1356	.huge_fault = xfs_filemap_huge_fault,
1152		- .map_pages = filemap_map_pages,
	1357	+ .map_pages = xfs_filemap_map_pages,
1153	1358	.page_mkwrite = xfs_filemap_page_mkwrite,
1154	1359	.pfn_mkwrite = xfs_filemap_pfn_mkwrite,
1155	1360	};
1156	1361
1157	1362	STATIC int
1158	1363	xfs_file_mmap(
1159		- struct file *filp,
1160		- struct vm_area_struct *vma)
	1364	+ struct file *file,
	1365	+ struct vm_area_struct *vma)
1161	1366	{
	1367	+ struct inode *inode = file_inode(file);
	1368	+ struct xfs_buftarg *target = xfs_inode_buftarg(XFS_I(inode));
	1369	+
1162	1370	/*
1163		- * We don't support synchronous mappings for non-DAX files. At least
1164		- * until someone comes with a sensible use case.
	1371	+ * We don't support synchronous mappings for non-DAX files and
	1372	+ * for DAX files if underneath dax_device is not synchronous.
1165	1373	*/
1166		- if (!IS_DAX(file_inode(filp)) && (vma->vm_flags & VM_SYNC))
	1374	+ if (!daxdev_mapping_supported(vma, target->bt_daxdev))
1167	1375	return -EOPNOTSUPP;
1168	1376
1169		- file_accessed(filp);
	1377	+ file_accessed(file);
1170	1378	vma->vm_ops = &xfs_file_vm_ops;
1171		- if (IS_DAX(file_inode(filp)))
	1379	+ if (IS_DAX(inode))
1172	1380	vma->vm_flags \|= VM_HUGEPAGE;
1173	1381	return 0;
1174	1382	}
..	..	@@ -1179,6 +1387,7 @@
1179	1387	.write_iter = xfs_file_write_iter,
1180	1388	.splice_read = generic_file_splice_read,
1181	1389	.splice_write = iter_file_splice_write,
	1390	+ .iopoll = iomap_dio_iopoll,
1182	1391	.unlocked_ioctl = xfs_file_ioctl,
1183	1392	#ifdef CONFIG_COMPAT
1184	1393	.compat_ioctl = xfs_file_compat_ioctl,
..	..	@@ -1190,8 +1399,8 @@
1190	1399	.fsync = xfs_file_fsync,
1191	1400	.get_unmapped_area = thp_get_unmapped_area,
1192	1401	.fallocate = xfs_file_fallocate,
1193		- .clone_file_range = xfs_file_clone_range,
1194		- .dedupe_file_range = xfs_file_dedupe_range,
	1402	+ .fadvise = xfs_file_fadvise,
	1403	+ .remap_file_range = xfs_file_remap_range,
1195	1404	};
1196	1405
1197	1406	const struct file_operations xfs_dir_file_operations = {