~hc/RK356X_SDK_RELEASE.git

..	..	@@ -12,118 +12,28 @@
12	12	#include "xfs_mount.h"
13	13	#include "xfs_inode.h"
14	14	#include "xfs_trans.h"
15		-#include "xfs_inode_item.h"
16		-#include "xfs_alloc.h"
17		-#include "xfs_error.h"
18	15	#include "xfs_iomap.h"
19	16	#include "xfs_trace.h"
20	17	#include "xfs_bmap.h"
21	18	#include "xfs_bmap_util.h"
22		-#include "xfs_bmap_btree.h"
23	19	#include "xfs_reflink.h"
24		-#include <linux/writeback.h>
25	20
26		-/*
27		- * structure owned by writepages passed to individual writepage calls
28		- */
29	21	struct xfs_writepage_ctx {
30		- struct xfs_bmbt_irec imap;
31		- unsigned int io_type;
	22	+ struct iomap_writepage_ctx ctx;
	23	+ unsigned int data_seq;
32	24	unsigned int cow_seq;
33		- struct xfs_ioend *ioend;
34	25	};
35	26
36		-struct block_device *
37		-xfs_find_bdev_for_inode(
38		- struct inode *inode)
	27	+static inline struct xfs_writepage_ctx *
	28	+XFS_WPC(struct iomap_writepage_ctx *ctx)
39	29	{
40		- struct xfs_inode *ip = XFS_I(inode);
41		- struct xfs_mount *mp = ip->i_mount;
42		-
43		- if (XFS_IS_REALTIME_INODE(ip))
44		- return mp->m_rtdev_targp->bt_bdev;
45		- else
46		- return mp->m_ddev_targp->bt_bdev;
47		-}
48		-
49		-struct dax_device *
50		-xfs_find_daxdev_for_inode(
51		- struct inode *inode)
52		-{
53		- struct xfs_inode *ip = XFS_I(inode);
54		- struct xfs_mount *mp = ip->i_mount;
55		-
56		- if (XFS_IS_REALTIME_INODE(ip))
57		- return mp->m_rtdev_targp->bt_daxdev;
58		- else
59		- return mp->m_ddev_targp->bt_daxdev;
60		-}
61		-
62		-static void
63		-xfs_finish_page_writeback(
64		- struct inode *inode,
65		- struct bio_vec *bvec,
66		- int error)
67		-{
68		- struct iomap_page *iop = to_iomap_page(bvec->bv_page);
69		-
70		- if (error) {
71		- SetPageError(bvec->bv_page);
72		- mapping_set_error(inode->i_mapping, -EIO);
73		- }
74		-
75		- ASSERT(iop \|\| i_blocksize(inode) == PAGE_SIZE);
76		- ASSERT(!iop \|\| atomic_read(&iop->write_count) > 0);
77		-
78		- if (!iop \|\| atomic_dec_and_test(&iop->write_count))
79		- end_page_writeback(bvec->bv_page);
80		-}
81		-
82		-/*
83		- * We're now finished for good with this ioend structure. Update the page
84		- * state, release holds on bios, and finally free up memory. Do not use the
85		- * ioend after this.
86		- */
87		-STATIC void
88		-xfs_destroy_ioend(
89		- struct xfs_ioend *ioend,
90		- int error)
91		-{
92		- struct inode *inode = ioend->io_inode;
93		- struct bio *bio = &ioend->io_inline_bio;
94		- struct bio last = ioend->io_bio, next;
95		- u64 start = bio->bi_iter.bi_sector;
96		- bool quiet = bio_flagged(bio, BIO_QUIET);
97		-
98		- for (bio = &ioend->io_inline_bio; bio; bio = next) {
99		- struct bio_vec *bvec;
100		- int i;
101		-
102		- /*
103		- * For the last bio, bi_private points to the ioend, so we
104		- * need to explicitly end the iteration here.
105		- */
106		- if (bio == last)
107		- next = NULL;
108		- else
109		- next = bio->bi_private;
110		-
111		- /* walk each page on bio, ending page IO on them */
112		- bio_for_each_segment_all(bvec, bio, i)
113		- xfs_finish_page_writeback(inode, bvec, error);
114		- bio_put(bio);
115		- }
116		-
117		- if (unlikely(error && !quiet)) {
118		- xfs_err_ratelimited(XFS_I(inode)->i_mount,
119		- "writeback error on sector %llu", start);
120		- }
	30	+ return container_of(ctx, struct xfs_writepage_ctx, ctx);
121	31	}
122	32
123	33	/*
124	34	* Fast and loose check if this write could update the on-disk inode size.
125	35	*/
126		-static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
	36	+static inline bool xfs_ioend_is_append(struct iomap_ioend *ioend)
127	37	{
128	38	return ioend->io_offset + ioend->io_size >
129	39	XFS_I(ioend->io_inode)->i_d.di_size;
..	..	@@ -131,18 +41,17 @@
131	41
132	42	STATIC int
133	43	xfs_setfilesize_trans_alloc(
134		- struct xfs_ioend *ioend)
	44	+ struct iomap_ioend *ioend)
135	45	{
136	46	struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
137	47	struct xfs_trans *tp;
138	48	int error;
139	49
140		- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0,
141		- XFS_TRANS_NOFS, &tp);
	50	+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
142	51	if (error)
143	52	return error;
144	53
145		- ioend->io_append_trans = tp;
	54	+ ioend->io_private = tp;
146	55
147	56	/*
148	57	* We may pass freeze protection with a transaction. So tell lockdep
..	..	@@ -153,7 +62,7 @@
153	62	* We hand off the transaction to the completion thread now, so
154	63	* clear the flag here.
155	64	*/
156		- current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
	65	+ xfs_trans_clear_context(tp);
157	66	return 0;
158	67	}
159	68
..	..	@@ -205,18 +114,18 @@
205	114
206	115	STATIC int
207	116	xfs_setfilesize_ioend(
208		- struct xfs_ioend *ioend,
	117	+ struct iomap_ioend *ioend,
209	118	int error)
210	119	{
211	120	struct xfs_inode *ip = XFS_I(ioend->io_inode);
212		- struct xfs_trans *tp = ioend->io_append_trans;
	121	+ struct xfs_trans *tp = ioend->io_private;
213	122
214	123	/*
215	124	* The transaction may have been allocated in the I/O submission thread,
216	125	* thus we need to mark ourselves as being in a transaction manually.
217	126	* Similarly for freeze protection.
218	127	*/
219		- current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
	128	+ xfs_trans_set_context(tp);
220	129	__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
221	130
222	131	/* we abort the update if there was an IO error */
..	..	@@ -232,104 +141,229 @@
232	141	* IO write completion.
233	142	*/
234	143	STATIC void
235		-xfs_end_io(
236		- struct work_struct *work)
	144	+xfs_end_ioend(
	145	+ struct iomap_ioend *ioend)
237	146	{
238		- struct xfs_ioend *ioend =
239		- container_of(work, struct xfs_ioend, io_work);
240	147	struct xfs_inode *ip = XFS_I(ioend->io_inode);
	148	+ struct xfs_mount *mp = ip->i_mount;
241	149	xfs_off_t offset = ioend->io_offset;
242	150	size_t size = ioend->io_size;
	151	+ unsigned int nofs_flag;
243	152	int error;
	153	+
	154	+ /*
	155	+ * We can allocate memory here while doing writeback on behalf of
	156	+ * memory reclaim. To avoid memory allocation deadlocks set the
	157	+ * task-wide nofs context for the following operations.
	158	+ */
	159	+ nofs_flag = memalloc_nofs_save();
244	160
245	161	/*
246	162	* Just clean up the in-memory strutures if the fs has been shut down.
247	163	*/
248		- if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
	164	+ if (XFS_FORCED_SHUTDOWN(mp)) {
249	165	error = -EIO;
250	166	goto done;
251	167	}
252	168
253	169	/*
254		- * Clean up any COW blocks on an I/O error.
	170	+ * Clean up all COW blocks and underlying data fork delalloc blocks on
	171	+ * I/O error. The delalloc punch is required because this ioend was
	172	+ * mapped to blocks in the COW fork and the associated pages are no
	173	+ * longer dirty. If we don't remove delalloc blocks here, they become
	174	+ * stale and can corrupt free space accounting on unmount.
255	175	*/
256	176	error = blk_status_to_errno(ioend->io_bio->bi_status);
257	177	if (unlikely(error)) {
258		- switch (ioend->io_type) {
259		- case XFS_IO_COW:
	178	+ if (ioend->io_flags & IOMAP_F_SHARED) {
260	179	xfs_reflink_cancel_cow_range(ip, offset, size, true);
261		- break;
	180	+ xfs_bmap_punch_delalloc_range(ip,
	181	+ XFS_B_TO_FSBT(mp, offset),
	182	+ XFS_B_TO_FSB(mp, size));
262	183	}
263		-
264	184	goto done;
265	185	}
266	186
267	187	/*
268		- * Success: commit the COW or unwritten blocks if needed.
	188	+ * Success: commit the COW or unwritten blocks if needed.
269	189	*/
270		- switch (ioend->io_type) {
271		- case XFS_IO_COW:
	190	+ if (ioend->io_flags & IOMAP_F_SHARED)
272	191	error = xfs_reflink_end_cow(ip, offset, size);
273		- break;
274		- case XFS_IO_UNWRITTEN:
275		- /* writeback should never update isize */
	192	+ else if (ioend->io_type == IOMAP_UNWRITTEN)
276	193	error = xfs_iomap_write_unwritten(ip, offset, size, false);
277		- break;
278		- default:
279		- ASSERT(!xfs_ioend_is_append(ioend) \|\| ioend->io_append_trans);
280		- break;
281		- }
	194	+ else
	195	+ ASSERT(!xfs_ioend_is_append(ioend) \|\| ioend->io_private);
282	196
283	197	done:
284		- if (ioend->io_append_trans)
	198	+ if (ioend->io_private)
285	199	error = xfs_setfilesize_ioend(ioend, error);
286		- xfs_destroy_ioend(ioend, error);
	200	+ iomap_finish_ioends(ioend, error);
	201	+ memalloc_nofs_restore(nofs_flag);
	202	+}
	203	+
	204	+/*
	205	+ * If the to be merged ioend has a preallocated transaction for file
	206	+ * size updates we need to ensure the ioend it is merged into also
	207	+ * has one. If it already has one we can simply cancel the transaction
	208	+ * as it is guaranteed to be clean.
	209	+ */
	210	+static void
	211	+xfs_ioend_merge_private(
	212	+ struct iomap_ioend *ioend,
	213	+ struct iomap_ioend *next)
	214	+{
	215	+ if (!ioend->io_private) {
	216	+ ioend->io_private = next->io_private;
	217	+ next->io_private = NULL;
	218	+ } else {
	219	+ xfs_setfilesize_ioend(next, -ECANCELED);
	220	+ }
	221	+}
	222	+
	223	+/* Finish all pending io completions. */
	224	+void
	225	+xfs_end_io(
	226	+ struct work_struct *work)
	227	+{
	228	+ struct xfs_inode *ip =
	229	+ container_of(work, struct xfs_inode, i_ioend_work);
	230	+ struct iomap_ioend *ioend;
	231	+ struct list_head tmp;
	232	+ unsigned long flags;
	233	+
	234	+ spin_lock_irqsave(&ip->i_ioend_lock, flags);
	235	+ list_replace_init(&ip->i_ioend_list, &tmp);
	236	+ spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
	237	+
	238	+ iomap_sort_ioends(&tmp);
	239	+ while ((ioend = list_first_entry_or_null(&tmp, struct iomap_ioend,
	240	+ io_list))) {
	241	+ list_del_init(&ioend->io_list);
	242	+ iomap_ioend_try_merge(ioend, &tmp, xfs_ioend_merge_private);
	243	+ xfs_end_ioend(ioend);
	244	+ }
	245	+}
	246	+
	247	+static inline bool xfs_ioend_needs_workqueue(struct iomap_ioend *ioend)
	248	+{
	249	+ return ioend->io_private \|\|
	250	+ ioend->io_type == IOMAP_UNWRITTEN \|\|
	251	+ (ioend->io_flags & IOMAP_F_SHARED);
287	252	}
288	253
289	254	STATIC void
290	255	xfs_end_bio(
291	256	struct bio *bio)
292	257	{
293		- struct xfs_ioend *ioend = bio->bi_private;
294		- struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
	258	+ struct iomap_ioend *ioend = bio->bi_private;
	259	+ struct xfs_inode *ip = XFS_I(ioend->io_inode);
	260	+ unsigned long flags;
295	261
296		- if (ioend->io_type == XFS_IO_UNWRITTEN \|\| ioend->io_type == XFS_IO_COW)
297		- queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
298		- else if (ioend->io_append_trans)
299		- queue_work(mp->m_data_workqueue, &ioend->io_work);
300		- else
301		- xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status));
	262	+ ASSERT(xfs_ioend_needs_workqueue(ioend));
	263	+
	264	+ spin_lock_irqsave(&ip->i_ioend_lock, flags);
	265	+ if (list_empty(&ip->i_ioend_list))
	266	+ WARN_ON_ONCE(!queue_work(ip->i_mount->m_unwritten_workqueue,
	267	+ &ip->i_ioend_work));
	268	+ list_add_tail(&ioend->io_list, &ip->i_ioend_list);
	269	+ spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
302	270	}
303	271
304		-STATIC int
	272	+/*
	273	+ * Fast revalidation of the cached writeback mapping. Return true if the current
	274	+ * mapping is valid, false otherwise.
	275	+ */
	276	+static bool
	277	+xfs_imap_valid(
	278	+ struct iomap_writepage_ctx *wpc,
	279	+ struct xfs_inode *ip,
	280	+ loff_t offset)
	281	+{
	282	+ if (offset < wpc->iomap.offset \|\|
	283	+ offset >= wpc->iomap.offset + wpc->iomap.length)
	284	+ return false;
	285	+ /*
	286	+ * If this is a COW mapping, it is sufficient to check that the mapping
	287	+ * covers the offset. Be careful to check this first because the caller
	288	+ * can revalidate a COW mapping without updating the data seqno.
	289	+ */
	290	+ if (wpc->iomap.flags & IOMAP_F_SHARED)
	291	+ return true;
	292	+
	293	+ /*
	294	+ * This is not a COW mapping. Check the sequence number of the data fork
	295	+ * because concurrent changes could have invalidated the extent. Check
	296	+ * the COW fork because concurrent changes since the last time we
	297	+ * checked (and found nothing at this offset) could have added
	298	+ * overlapping blocks.
	299	+ */
	300	+ if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq))
	301	+ return false;
	302	+ if (xfs_inode_has_cow_data(ip) &&
	303	+ XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq))
	304	+ return false;
	305	+ return true;
	306	+}
	307	+
	308	+/*
	309	+ * Pass in a dellalloc extent and convert it to real extents, return the real
	310	+ * extent that maps offset_fsb in wpc->iomap.
	311	+ *
	312	+ * The current page is held locked so nothing could have removed the block
	313	+ * backing offset_fsb, although it could have moved from the COW to the data
	314	+ * fork by another thread.
	315	+ */
	316	+static int
	317	+xfs_convert_blocks(
	318	+ struct iomap_writepage_ctx *wpc,
	319	+ struct xfs_inode *ip,
	320	+ int whichfork,
	321	+ loff_t offset)
	322	+{
	323	+ int error;
	324	+ unsigned *seq;
	325	+
	326	+ if (whichfork == XFS_COW_FORK)
	327	+ seq = &XFS_WPC(wpc)->cow_seq;
	328	+ else
	329	+ seq = &XFS_WPC(wpc)->data_seq;
	330	+
	331	+ /*
	332	+ * Attempt to allocate whatever delalloc extent currently backs offset
	333	+ * and put the result into wpc->iomap. Allocate in a loop because it
	334	+ * may take several attempts to allocate real blocks for a contiguous
	335	+ * delalloc extent if free space is sufficiently fragmented.
	336	+ */
	337	+ do {
	338	+ error = xfs_bmapi_convert_delalloc(ip, whichfork, offset,
	339	+ &wpc->iomap, seq);
	340	+ if (error)
	341	+ return error;
	342	+ } while (wpc->iomap.offset + wpc->iomap.length <= offset);
	343	+
	344	+ return 0;
	345	+}
	346	+
	347	+static int
305	348	xfs_map_blocks(
306		- struct xfs_writepage_ctx *wpc,
	349	+ struct iomap_writepage_ctx *wpc,
307	350	struct inode *inode,
308	351	loff_t offset)
309	352	{
310	353	struct xfs_inode *ip = XFS_I(inode);
311	354	struct xfs_mount *mp = ip->i_mount;
312	355	ssize_t count = i_blocksize(inode);
313		- xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
314		- xfs_fileoff_t cow_fsb = NULLFILEOFF;
	356	+ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
	357	+ xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count);
	358	+ xfs_fileoff_t cow_fsb;
	359	+ int whichfork;
315	360	struct xfs_bmbt_irec imap;
316		- int whichfork = XFS_DATA_FORK;
317	361	struct xfs_iext_cursor icur;
318		- bool imap_valid;
	362	+ int retries = 0;
319	363	int error = 0;
320	364
321		- /*
322		- * We have to make sure the cached mapping is within EOF to protect
323		- * against eofblocks trimming on file release leaving us with a stale
324		- * mapping. Otherwise, a page for a subsequent file extending buffered
325		- * write could get picked up by this writeback cycle and written to the
326		- * wrong blocks.
327		- *
328		- * Note that what we really want here is a generic mapping invalidation
329		- * mechanism to protect us from arbitrary extent modifying contexts, not
330		- * just eofblocks.
331		- */
332		- xfs_trim_extent_eof(&wpc->imap, ip);
	365	+ if (XFS_FORCED_SHUTDOWN(mp))
	366	+ return -EIO;
333	367
334	368	/*
335	369	* COW fork blocks can overlap data fork blocks even if the blocks
..	..	@@ -346,16 +380,8 @@
346	380	* against concurrent updates and provides a memory barrier on the way
347	381	* out that ensures that we always see the current value.
348	382	*/
349		- imap_valid = offset_fsb >= wpc->imap.br_startoff &&
350		- offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
351		- if (imap_valid &&
352		- (!xfs_inode_has_cow_data(ip) \|\|
353		- wpc->io_type == XFS_IO_COW \|\|
354		- wpc->cow_seq == READ_ONCE(ip->i_cowfp->if_seq)))
	383	+ if (xfs_imap_valid(wpc, ip, offset))
355	384	return 0;
356		-
357		- if (XFS_FORCED_SHUTDOWN(mp))
358		- return -EIO;
359	385
360	386	/*
361	387	* If we don't have a valid map, now it's time to get a new one for this
..	..	@@ -363,14 +389,12 @@
363	389	* into real extents. If we return without a valid map, it means we
364	390	* landed in a hole and we skip the block.
365	391	*/
	392	+retry:
	393	+ cow_fsb = NULLFILEOFF;
	394	+ whichfork = XFS_DATA_FORK;
366	395	xfs_ilock(ip, XFS_ILOCK_SHARED);
367		- ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE \|\|
	396	+ ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE \|\|
368	397	(ip->i_df.if_flags & XFS_IFEXTENTS));
369		- ASSERT(offset <= mp->m_super->s_maxbytes);
370		-
371		- if (offset > mp->m_super->s_maxbytes - count)
372		- count = mp->m_super->s_maxbytes - offset;
373		- end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
374	398
375	399	/*
376	400	* Check if this is offset is covered by a COW extents, and if yes use
..	..	@@ -380,32 +404,18 @@
380	404	xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
381	405	cow_fsb = imap.br_startoff;
382	406	if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
383		- wpc->cow_seq = READ_ONCE(ip->i_cowfp->if_seq);
	407	+ XFS_WPC(wpc)->cow_seq = READ_ONCE(ip->i_cowfp->if_seq);
384	408	xfs_iunlock(ip, XFS_ILOCK_SHARED);
385		- /*
386		- * Truncate can race with writeback since writeback doesn't
387		- * take the iolock and truncate decreases the file size before
388		- * it starts truncating the pages between new_size and old_size.
389		- * Therefore, we can end up in the situation where writeback
390		- * gets a CoW fork mapping but the truncate makes the mapping
391		- * invalid and we end up in here trying to get a new mapping.
392		- * bail out here so that we simply never get a valid mapping
393		- * and so we drop the write altogether. The page truncation
394		- * will kill the contents anyway.
395		- */
396		- if (offset > i_size_read(inode)) {
397		- wpc->io_type = XFS_IO_HOLE;
398		- return 0;
399		- }
	409	+
400	410	whichfork = XFS_COW_FORK;
401		- wpc->io_type = XFS_IO_COW;
402	411	goto allocate_blocks;
403	412	}
404	413
405	414	/*
406		- * Map valid and no COW extent in the way? We're done.
	415	+ * No COW extent overlap. Revalidate now that we may have updated
	416	+ * ->cow_seq. If the data mapping is still valid, we're done.
407	417	*/
408		- if (imap_valid) {
	418	+ if (xfs_imap_valid(wpc, ip, offset)) {
409	419	xfs_iunlock(ip, XFS_ILOCK_SHARED);
410	420	return 0;
411	421	}
..	..	@@ -417,225 +427,102 @@
417	427	*/
418	428	if (!xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap))
419	429	imap.br_startoff = end_fsb; /* fake a hole past EOF */
	430	+ XFS_WPC(wpc)->data_seq = READ_ONCE(ip->i_df.if_seq);
420	431	xfs_iunlock(ip, XFS_ILOCK_SHARED);
421	432
	433	+ /* landed in a hole or beyond EOF? */
422	434	if (imap.br_startoff > offset_fsb) {
423		- /* landed in a hole or beyond EOF */
424	435	imap.br_blockcount = imap.br_startoff - offset_fsb;
425	436	imap.br_startoff = offset_fsb;
426	437	imap.br_startblock = HOLESTARTBLOCK;
427		- wpc->io_type = XFS_IO_HOLE;
428		- } else {
429		- /*
430		- * Truncate to the next COW extent if there is one. This is the
431		- * only opportunity to do this because we can skip COW fork
432		- * lookups for the subsequent blocks in the mapping; however,
433		- * the requirement to treat the COW range separately remains.
434		- */
435		- if (cow_fsb != NULLFILEOFF &&
436		- cow_fsb < imap.br_startoff + imap.br_blockcount)
437		- imap.br_blockcount = cow_fsb - imap.br_startoff;
438		-
439		- if (isnullstartblock(imap.br_startblock)) {
440		- /* got a delalloc extent */
441		- wpc->io_type = XFS_IO_DELALLOC;
442		- goto allocate_blocks;
443		- }
444		-
445		- if (imap.br_state == XFS_EXT_UNWRITTEN)
446		- wpc->io_type = XFS_IO_UNWRITTEN;
447		- else
448		- wpc->io_type = XFS_IO_OVERWRITE;
	438	+ imap.br_state = XFS_EXT_NORM;
449	439	}
450	440
451		- wpc->imap = imap;
452		- xfs_trim_extent_eof(&wpc->imap, ip);
453		- trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
	441	+ /*
	442	+ * Truncate to the next COW extent if there is one. This is the only
	443	+ * opportunity to do this because we can skip COW fork lookups for the
	444	+ * subsequent blocks in the mapping; however, the requirement to treat
	445	+ * the COW range separately remains.
	446	+ */
	447	+ if (cow_fsb != NULLFILEOFF &&
	448	+ cow_fsb < imap.br_startoff + imap.br_blockcount)
	449	+ imap.br_blockcount = cow_fsb - imap.br_startoff;
	450	+
	451	+ /* got a delalloc extent? */
	452	+ if (imap.br_startblock != HOLESTARTBLOCK &&
	453	+ isnullstartblock(imap.br_startblock))
	454	+ goto allocate_blocks;
	455	+
	456	+ xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0);
	457	+ trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
454	458	return 0;
455	459	allocate_blocks:
456		- error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap,
457		- &wpc->cow_seq);
458		- if (error)
	460	+ error = xfs_convert_blocks(wpc, ip, whichfork, offset);
	461	+ if (error) {
	462	+ /*
	463	+ * If we failed to find the extent in the COW fork we might have
	464	+ * raced with a COW to data fork conversion or truncate.
	465	+ * Restart the lookup to catch the extent in the data fork for
	466	+ * the former case, but prevent additional retries to avoid
	467	+ * looping forever for the latter case.
	468	+ */
	469	+ if (error == -EAGAIN && whichfork == XFS_COW_FORK && !retries++)
	470	+ goto retry;
	471	+ ASSERT(error != -EAGAIN);
459	472	return error;
460		- ASSERT(whichfork == XFS_COW_FORK \|\| cow_fsb == NULLFILEOFF \|\|
461		- imap.br_startoff + imap.br_blockcount <= cow_fsb);
462		- wpc->imap = imap;
463		- xfs_trim_extent_eof(&wpc->imap, ip);
464		- trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
	473	+ }
	474	+
	475	+ /*
	476	+ * Due to merging the return real extent might be larger than the
	477	+ * original delalloc one. Trim the return extent to the next COW
	478	+ * boundary again to force a re-lookup.
	479	+ */
	480	+ if (whichfork != XFS_COW_FORK && cow_fsb != NULLFILEOFF) {
	481	+ loff_t cow_offset = XFS_FSB_TO_B(mp, cow_fsb);
	482	+
	483	+ if (cow_offset < wpc->iomap.offset + wpc->iomap.length)
	484	+ wpc->iomap.length = cow_offset - wpc->iomap.offset;
	485	+ }
	486	+
	487	+ ASSERT(wpc->iomap.offset <= offset);
	488	+ ASSERT(wpc->iomap.offset + wpc->iomap.length > offset);
	489	+ trace_xfs_map_blocks_alloc(ip, offset, count, whichfork, &imap);
465	490	return 0;
466	491	}
467	492
468		-/*
469		- * Submit the bio for an ioend. We are passed an ioend with a bio attached to
470		- * it, and we submit that bio. The ioend may be used for multiple bio
471		- * submissions, so we only want to allocate an append transaction for the ioend
472		- * once. In the case of multiple bio submission, each bio will take an IO
473		- * reference to the ioend to ensure that the ioend completion is only done once
474		- * all bios have been submitted and the ioend is really done.
475		- *
476		- * If @fail is non-zero, it means that we have a situation where some part of
477		- * the submission process has failed after we have marked paged for writeback
478		- * and unlocked them. In this situation, we need to fail the bio and ioend
479		- * rather than submit it to IO. This typically only happens on a filesystem
480		- * shutdown.
481		- */
482		-STATIC int
483		-xfs_submit_ioend(
484		- struct writeback_control *wbc,
485		- struct xfs_ioend *ioend,
	493	+static int
	494	+xfs_prepare_ioend(
	495	+ struct iomap_ioend *ioend,
486	496	int status)
487	497	{
488		- /* Convert CoW extents to regular */
489		- if (!status && ioend->io_type == XFS_IO_COW) {
490		- /*
491		- * Yuk. This can do memory allocation, but is not a
492		- * transactional operation so everything is done in GFP_KERNEL
493		- * context. That can deadlock, because we hold pages in
494		- * writeback state and GFP_KERNEL allocations can block on them.
495		- * Hence we must operate in nofs conditions here.
496		- */
497		- unsigned nofs_flag;
	498	+ unsigned int nofs_flag;
498	499
499		- nofs_flag = memalloc_nofs_save();
	500	+ /*
	501	+ * We can allocate memory here while doing writeback on behalf of
	502	+ * memory reclaim. To avoid memory allocation deadlocks set the
	503	+ * task-wide nofs context for the following operations.
	504	+ */
	505	+ nofs_flag = memalloc_nofs_save();
	506	+
	507	+ /* Convert CoW extents to regular */
	508	+ if (!status && (ioend->io_flags & IOMAP_F_SHARED)) {
500	509	status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
501	510	ioend->io_offset, ioend->io_size);
502		- memalloc_nofs_restore(nofs_flag);
503	511	}
504	512
505	513	/* Reserve log space if we might write beyond the on-disk inode size. */
506	514	if (!status &&
507		- ioend->io_type != XFS_IO_UNWRITTEN &&
	515	+ ((ioend->io_flags & IOMAP_F_SHARED) \|\|
	516	+ ioend->io_type != IOMAP_UNWRITTEN) &&
508	517	xfs_ioend_is_append(ioend) &&
509		- !ioend->io_append_trans)
	518	+ !ioend->io_private)
510	519	status = xfs_setfilesize_trans_alloc(ioend);
511	520
512		- ioend->io_bio->bi_private = ioend;
513		- ioend->io_bio->bi_end_io = xfs_end_bio;
514		- ioend->io_bio->bi_opf = REQ_OP_WRITE \| wbc_to_write_flags(wbc);
	521	+ memalloc_nofs_restore(nofs_flag);
515	522
516		- /*
517		- * If we are failing the IO now, just mark the ioend with an
518		- * error and finish it. This will run IO completion immediately
519		- * as there is only one reference to the ioend at this point in
520		- * time.
521		- */
522		- if (status) {
523		- ioend->io_bio->bi_status = errno_to_blk_status(status);
524		- bio_endio(ioend->io_bio);
525		- return status;
526		- }
527		-
528		- ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
529		- submit_bio(ioend->io_bio);
530		- return 0;
531		-}
532		-
533		-static struct xfs_ioend *
534		-xfs_alloc_ioend(
535		- struct inode *inode,
536		- unsigned int type,
537		- xfs_off_t offset,
538		- struct block_device *bdev,
539		- sector_t sector)
540		-{
541		- struct xfs_ioend *ioend;
542		- struct bio *bio;
543		-
544		- bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset);
545		- bio_set_dev(bio, bdev);
546		- bio->bi_iter.bi_sector = sector;
547		-
548		- ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
549		- INIT_LIST_HEAD(&ioend->io_list);
550		- ioend->io_type = type;
551		- ioend->io_inode = inode;
552		- ioend->io_size = 0;
553		- ioend->io_offset = offset;
554		- INIT_WORK(&ioend->io_work, xfs_end_io);
555		- ioend->io_append_trans = NULL;
556		- ioend->io_bio = bio;
557		- return ioend;
558		-}
559		-
560		-/*
561		- * Allocate a new bio, and chain the old bio to the new one.
562		- *
563		- * Note that we have to do perform the chaining in this unintuitive order
564		- * so that the bi_private linkage is set up in the right direction for the
565		- * traversal in xfs_destroy_ioend().
566		- */
567		-static void
568		-xfs_chain_bio(
569		- struct xfs_ioend *ioend,
570		- struct writeback_control *wbc,
571		- struct block_device *bdev,
572		- sector_t sector)
573		-{
574		- struct bio *new;
575		-
576		- new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
577		- bio_set_dev(new, bdev);
578		- new->bi_iter.bi_sector = sector;
579		- bio_chain(ioend->io_bio, new);
580		- bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
581		- ioend->io_bio->bi_opf = REQ_OP_WRITE \| wbc_to_write_flags(wbc);
582		- ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
583		- submit_bio(ioend->io_bio);
584		- ioend->io_bio = new;
585		-}
586		-
587		-/*
588		- * Test to see if we have an existing ioend structure that we could append to
589		- * first, otherwise finish off the current ioend and start another.
590		- */
591		-STATIC void
592		-xfs_add_to_ioend(
593		- struct inode *inode,
594		- xfs_off_t offset,
595		- struct page *page,
596		- struct iomap_page *iop,
597		- struct xfs_writepage_ctx *wpc,
598		- struct writeback_control *wbc,
599		- struct list_head *iolist)
600		-{
601		- struct xfs_inode *ip = XFS_I(inode);
602		- struct xfs_mount *mp = ip->i_mount;
603		- struct block_device *bdev = xfs_find_bdev_for_inode(inode);
604		- unsigned len = i_blocksize(inode);
605		- unsigned poff = offset & (PAGE_SIZE - 1);
606		- sector_t sector;
607		-
608		- sector = xfs_fsb_to_db(ip, wpc->imap.br_startblock) +
609		- ((offset - XFS_FSB_TO_B(mp, wpc->imap.br_startoff)) >> 9);
610		-
611		- if (!wpc->ioend \|\| wpc->io_type != wpc->ioend->io_type \|\|
612		- sector != bio_end_sector(wpc->ioend->io_bio) \|\|
613		- offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
614		- if (wpc->ioend)
615		- list_add(&wpc->ioend->io_list, iolist);
616		- wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset,
617		- bdev, sector);
618		- }
619		-
620		- if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff)) {
621		- if (iop)
622		- atomic_inc(&iop->write_count);
623		- if (bio_full(wpc->ioend->io_bio))
624		- xfs_chain_bio(wpc->ioend, wbc, bdev, sector);
625		- __bio_add_page(wpc->ioend->io_bio, page, len, poff);
626		- }
627		-
628		- wpc->ioend->io_size += len;
629		-}
630		-
631		-STATIC void
632		-xfs_vm_invalidatepage(
633		- struct page *page,
634		- unsigned int offset,
635		- unsigned int length)
636		-{
637		- trace_xfs_invalidatepage(page->mapping->host, page, offset, length);
638		- iomap_invalidatepage(page, offset, length);
	523	+ if (xfs_ioend_needs_workqueue(ioend))
	524	+ ioend->io_bio->bi_end_io = xfs_end_bio;
	525	+ return status;
639	526	}
640	527
641	528	/*
..	..	@@ -649,284 +536,54 @@
649	536	* transaction as there is no space left for block reservation (typically why we
650	537	* see a ENOSPC in writeback).
651	538	*/
652		-STATIC void
653		-xfs_aops_discard_page(
654		- struct page *page)
	539	+static void
	540	+xfs_discard_page(
	541	+ struct page *page,
	542	+ loff_t fileoff)
655	543	{
656	544	struct inode *inode = page->mapping->host;
657	545	struct xfs_inode *ip = XFS_I(inode);
658	546	struct xfs_mount *mp = ip->i_mount;
659		- loff_t offset = page_offset(page);
660		- xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, offset);
	547	+ unsigned int pageoff = offset_in_page(fileoff);
	548	+ xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, fileoff);
	549	+ xfs_fileoff_t pageoff_fsb = XFS_B_TO_FSBT(mp, pageoff);
661	550	int error;
662	551
663	552	if (XFS_FORCED_SHUTDOWN(mp))
664	553	goto out_invalidate;
665	554
666		- xfs_alert(mp,
	555	+ xfs_alert_ratelimited(mp,
667	556	"page discard on page "PTR_FMT", inode 0x%llx, offset %llu.",
668		- page, ip->i_ino, offset);
	557	+ page, ip->i_ino, fileoff);
669	558
670	559	error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
671		- PAGE_SIZE / i_blocksize(inode));
	560	+ i_blocks_per_page(inode, page) - pageoff_fsb);
672	561	if (error && !XFS_FORCED_SHUTDOWN(mp))
673	562	xfs_alert(mp, "page discard unable to remove delalloc mapping.");
674	563	out_invalidate:
675		- xfs_vm_invalidatepage(page, 0, PAGE_SIZE);
	564	+ iomap_invalidatepage(page, pageoff, PAGE_SIZE - pageoff);
676	565	}
677	566
678		-/*
679		- * We implement an immediate ioend submission policy here to avoid needing to
680		- * chain multiple ioends and hence nest mempool allocations which can violate
681		- * forward progress guarantees we need to provide. The current ioend we are
682		- * adding blocks to is cached on the writepage context, and if the new block
683		- * does not append to the cached ioend it will create a new ioend and cache that
684		- * instead.
685		- *
686		- * If a new ioend is created and cached, the old ioend is returned and queued
687		- * locally for submission once the entire page is processed or an error has been
688		- * detected. While ioends are submitted immediately after they are completed,
689		- * batching optimisations are provided by higher level block plugging.
690		- *
691		- * At the end of a writeback pass, there will be a cached ioend remaining on the
692		- * writepage context that the caller will need to submit.
693		- */
694		-static int
695		-xfs_writepage_map(
696		- struct xfs_writepage_ctx *wpc,
697		- struct writeback_control *wbc,
698		- struct inode *inode,
699		- struct page *page,
700		- uint64_t end_offset)
701		-{
702		- LIST_HEAD(submit_list);
703		- struct iomap_page *iop = to_iomap_page(page);
704		- unsigned len = i_blocksize(inode);
705		- struct xfs_ioend ioend, next;
706		- uint64_t file_offset; /* file offset of page */
707		- int error = 0, count = 0, i;
708		-
709		- ASSERT(iop \|\| i_blocksize(inode) == PAGE_SIZE);
710		- ASSERT(!iop \|\| atomic_read(&iop->write_count) == 0);
711		-
712		- /*
713		- * Walk through the page to find areas to write back. If we run off the
714		- * end of the current map or find the current map invalid, grab a new
715		- * one.
716		- */
717		- for (i = 0, file_offset = page_offset(page);
718		- i < (PAGE_SIZE >> inode->i_blkbits) && file_offset < end_offset;
719		- i++, file_offset += len) {
720		- if (iop && !test_bit(i, iop->uptodate))
721		- continue;
722		-
723		- error = xfs_map_blocks(wpc, inode, file_offset);
724		- if (error)
725		- break;
726		- if (wpc->io_type == XFS_IO_HOLE)
727		- continue;
728		- xfs_add_to_ioend(inode, file_offset, page, iop, wpc, wbc,
729		- &submit_list);
730		- count++;
731		- }
732		-
733		- ASSERT(wpc->ioend \|\| list_empty(&submit_list));
734		- ASSERT(PageLocked(page));
735		- ASSERT(!PageWriteback(page));
736		-
737		- /*
738		- * On error, we have to fail the ioend here because we may have set
739		- * pages under writeback, we have to make sure we run IO completion to
740		- * mark the error state of the IO appropriately, so we can't cancel the
741		- * ioend directly here. That means we have to mark this page as under
742		- * writeback if we included any blocks from it in the ioend chain so
743		- * that completion treats it correctly.
744		- *
745		- * If we didn't include the page in the ioend, the on error we can
746		- * simply discard and unlock it as there are no other users of the page
747		- * now. The caller will still need to trigger submission of outstanding
748		- * ioends on the writepage context so they are treated correctly on
749		- * error.
750		- */
751		- if (unlikely(error)) {
752		- if (!count) {
753		- xfs_aops_discard_page(page);
754		- ClearPageUptodate(page);
755		- unlock_page(page);
756		- goto done;
757		- }
758		-
759		- /*
760		- * If the page was not fully cleaned, we need to ensure that the
761		- * higher layers come back to it correctly. That means we need
762		- * to keep the page dirty, and for WB_SYNC_ALL writeback we need
763		- * to ensure the PAGECACHE_TAG_TOWRITE index mark is not removed
764		- * so another attempt to write this page in this writeback sweep
765		- * will be made.
766		- */
767		- set_page_writeback_keepwrite(page);
768		- } else {
769		- clear_page_dirty_for_io(page);
770		- set_page_writeback(page);
771		- }
772		-
773		- unlock_page(page);
774		-
775		- /*
776		- * Preserve the original error if there was one, otherwise catch
777		- * submission errors here and propagate into subsequent ioend
778		- * submissions.
779		- */
780		- list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
781		- int error2;
782		-
783		- list_del_init(&ioend->io_list);
784		- error2 = xfs_submit_ioend(wbc, ioend, error);
785		- if (error2 && !error)
786		- error = error2;
787		- }
788		-
789		- /*
790		- * We can end up here with no error and nothing to write only if we race
791		- * with a partial page truncate on a sub-page block sized filesystem.
792		- */
793		- if (!count)
794		- end_page_writeback(page);
795		-done:
796		- mapping_set_error(page->mapping, error);
797		- return error;
798		-}
799		-
800		-/*
801		- * Write out a dirty page.
802		- *
803		- * For delalloc space on the page we need to allocate space and flush it.
804		- * For unwritten space on the page we need to start the conversion to
805		- * regular allocated space.
806		- */
807		-STATIC int
808		-xfs_do_writepage(
809		- struct page *page,
810		- struct writeback_control *wbc,
811		- void *data)
812		-{
813		- struct xfs_writepage_ctx *wpc = data;
814		- struct inode *inode = page->mapping->host;
815		- loff_t offset;
816		- uint64_t end_offset;
817		- pgoff_t end_index;
818		-
819		- trace_xfs_writepage(inode, page, 0, 0);
820		-
821		- /*
822		- * Refuse to write the page out if we are called from reclaim context.
823		- *
824		- * This avoids stack overflows when called from deeply used stacks in
825		- * random callers for direct reclaim or memcg reclaim. We explicitly
826		- * allow reclaim from kswapd as the stack usage there is relatively low.
827		- *
828		- * This should never happen except in the case of a VM regression so
829		- * warn about it.
830		- */
831		- if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC\|PF_KSWAPD)) ==
832		- PF_MEMALLOC))
833		- goto redirty;
834		-
835		- /*
836		- * Given that we do not allow direct reclaim to call us, we should
837		- * never be called while in a filesystem transaction.
838		- */
839		- if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS))
840		- goto redirty;
841		-
842		- /*
843		- * Is this page beyond the end of the file?
844		- *
845		- * The page index is less than the end_index, adjust the end_offset
846		- * to the highest offset that this page should represent.
847		- * -----------------------------------------------------
848		- * \| file mapping \| <EOF> \|
849		- * -----------------------------------------------------
850		- * \| Page ... \| Page N-2 \| Page N-1 \| Page N \| \|
851		- * ^--------------------------------^----------\|--------
852		- * \| desired writeback range \| see else \|
853		- * ---------------------------------^------------------\|
854		- */
855		- offset = i_size_read(inode);
856		- end_index = offset >> PAGE_SHIFT;
857		- if (page->index < end_index)
858		- end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT;
859		- else {
860		- /*
861		- * Check whether the page to write out is beyond or straddles
862		- * i_size or not.
863		- * -------------------------------------------------------
864		- * \| file mapping \| <EOF> \|
865		- * -------------------------------------------------------
866		- * \| Page ... \| Page N-2 \| Page N-1 \| Page N \| Beyond \|
867		- * ^--------------------------------^-----------\|---------
868		- * \| \| Straddles \|
869		- * ---------------------------------^-----------\|--------\|
870		- */
871		- unsigned offset_into_page = offset & (PAGE_SIZE - 1);
872		-
873		- /*
874		- * Skip the page if it is fully outside i_size, e.g. due to a
875		- * truncate operation that is in progress. We must redirty the
876		- * page so that reclaim stops reclaiming it. Otherwise
877		- * xfs_vm_releasepage() is called on it and gets confused.
878		- *
879		- * Note that the end_index is unsigned long, it would overflow
880		- * if the given offset is greater than 16TB on 32-bit system
881		- * and if we do check the page is fully outside i_size or not
882		- * via "if (page->index >= end_index + 1)" as "end_index + 1"
883		- * will be evaluated to 0. Hence this page will be redirtied
884		- * and be written out repeatedly which would result in an
885		- * infinite loop, the user program that perform this operation
886		- * will hang. Instead, we can verify this situation by checking
887		- * if the page to write is totally beyond the i_size or if it's
888		- * offset is just equal to the EOF.
889		- */
890		- if (page->index > end_index \|\|
891		- (page->index == end_index && offset_into_page == 0))
892		- goto redirty;
893		-
894		- /*
895		- * The page straddles i_size. It must be zeroed out on each
896		- * and every writepage invocation because it may be mmapped.
897		- * "A file is mapped in multiples of the page size. For a file
898		- * that is not a multiple of the page size, the remaining
899		- * memory is zeroed when mapped, and writes to that region are
900		- * not written out to the file."
901		- */
902		- zero_user_segment(page, offset_into_page, PAGE_SIZE);
903		-
904		- /* Adjust the end_offset to the end of file */
905		- end_offset = offset;
906		- }
907		-
908		- return xfs_writepage_map(wpc, wbc, inode, page, end_offset);
909		-
910		-redirty:
911		- redirty_page_for_writepage(wbc, page);
912		- unlock_page(page);
913		- return 0;
914		-}
	567	+static const struct iomap_writeback_ops xfs_writeback_ops = {
	568	+ .map_blocks = xfs_map_blocks,
	569	+ .prepare_ioend = xfs_prepare_ioend,
	570	+ .discard_page = xfs_discard_page,
	571	+};
915	572
916	573	STATIC int
917	574	xfs_vm_writepage(
918	575	struct page *page,
919	576	struct writeback_control *wbc)
920	577	{
921		- struct xfs_writepage_ctx wpc = {
922		- .io_type = XFS_IO_INVALID,
923		- };
924		- int ret;
	578	+ struct xfs_writepage_ctx wpc = { };
925	579
926		- ret = xfs_do_writepage(page, wbc, &wpc);
927		- if (wpc.ioend)
928		- ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
929		- return ret;
	580	+ if (WARN_ON_ONCE(current->journal_info)) {
	581	+ redirty_page_for_writepage(wbc, page);
	582	+ unlock_page(page);
	583	+ return 0;
	584	+ }
	585	+
	586	+ return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops);
930	587	}
931	588
932	589	STATIC int
..	..	@@ -934,16 +591,17 @@
934	591	struct address_space *mapping,
935	592	struct writeback_control *wbc)
936	593	{
937		- struct xfs_writepage_ctx wpc = {
938		- .io_type = XFS_IO_INVALID,
939		- };
940		- int ret;
	594	+ struct xfs_writepage_ctx wpc = { };
	595	+
	596	+ /*
	597	+ * Writing back data in a transaction context can result in recursive
	598	+ * transactions. This is bad, so issue a warning and get out of here.
	599	+ */
	600	+ if (WARN_ON_ONCE(current->journal_info))
	601	+ return 0;
941	602
942	603	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
943		- ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
944		- if (wpc.ioend)
945		- ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
946		- return ret;
	604	+ return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
947	605	}
948	606
949	607	STATIC int
..	..	@@ -951,18 +609,11 @@
951	609	struct address_space *mapping,
952	610	struct writeback_control *wbc)
953	611	{
954		- xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
955		- return dax_writeback_mapping_range(mapping,
956		- xfs_find_bdev_for_inode(mapping->host), wbc);
957		-}
	612	+ struct xfs_inode *ip = XFS_I(mapping->host);
958	613
959		-STATIC int
960		-xfs_vm_releasepage(
961		- struct page *page,
962		- gfp_t gfp_mask)
963		-{
964		- trace_xfs_releasepage(page->mapping->host, page, 0, 0);
965		- return iomap_releasepage(page, gfp_mask);
	614	+ xfs_iflags_clear(ip, XFS_ITRUNCATED);
	615	+ return dax_writeback_mapping_range(mapping,
	616	+ xfs_inode_buftarg(ip)->bt_daxdev, wbc);
966	617	}
967	618
968	619	STATIC sector_t
..	..	@@ -983,9 +634,9 @@
983	634	* Since we don't pass back blockdev info, we can't return bmap
984	635	* information for rt files either.
985	636	*/
986		- if (xfs_is_reflink_inode(ip) \|\| XFS_IS_REALTIME_INODE(ip))
	637	+ if (xfs_is_cow_inode(ip) \|\| XFS_IS_REALTIME_INODE(ip))
987	638	return 0;
988		- return iomap_bmap(mapping, block, &xfs_iomap_ops);
	639	+ return iomap_bmap(mapping, block, &xfs_read_iomap_ops);
989	640	}
990	641
991	642	STATIC int
..	..	@@ -993,19 +644,14 @@
993	644	struct file *unused,
994	645	struct page *page)
995	646	{
996		- trace_xfs_vm_readpage(page->mapping->host, 1);
997		- return iomap_readpage(page, &xfs_iomap_ops);
	647	+ return iomap_readpage(page, &xfs_read_iomap_ops);
998	648	}
999	649
1000		-STATIC int
1001		-xfs_vm_readpages(
1002		- struct file *unused,
1003		- struct address_space *mapping,
1004		- struct list_head *pages,
1005		- unsigned nr_pages)
	650	+STATIC void
	651	+xfs_vm_readahead(
	652	+ struct readahead_control *rac)
1006	653	{
1007		- trace_xfs_vm_readpages(mapping->host, nr_pages);
1008		- return iomap_readpages(mapping, pages, nr_pages, &xfs_iomap_ops);
	654	+ iomap_readahead(rac, &xfs_read_iomap_ops);
1009	655	}
1010	656
1011	657	static int
..	..	@@ -1014,18 +660,19 @@
1014	660	struct file *swap_file,
1015	661	sector_t *span)
1016	662	{
1017		- sis->bdev = xfs_find_bdev_for_inode(file_inode(swap_file));
1018		- return iomap_swapfile_activate(sis, swap_file, span, &xfs_iomap_ops);
	663	+ sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
	664	+ return iomap_swapfile_activate(sis, swap_file, span,
	665	+ &xfs_read_iomap_ops);
1019	666	}
1020	667
1021	668	const struct address_space_operations xfs_address_space_operations = {
1022	669	.readpage = xfs_vm_readpage,
1023		- .readpages = xfs_vm_readpages,
	670	+ .readahead = xfs_vm_readahead,
1024	671	.writepage = xfs_vm_writepage,
1025	672	.writepages = xfs_vm_writepages,
1026	673	.set_page_dirty = iomap_set_page_dirty,
1027		- .releasepage = xfs_vm_releasepage,
1028		- .invalidatepage = xfs_vm_invalidatepage,
	674	+ .releasepage = iomap_releasepage,
	675	+ .invalidatepage = iomap_invalidatepage,
1029	676	.bmap = xfs_vm_bmap,
1030	677	.direct_IO = noop_direct_IO,
1031	678	.migratepage = iomap_migrate_page,