hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/fs/xfs/xfs_aops.c
....@@ -12,118 +12,28 @@
1212 #include "xfs_mount.h"
1313 #include "xfs_inode.h"
1414 #include "xfs_trans.h"
15
-#include "xfs_inode_item.h"
16
-#include "xfs_alloc.h"
17
-#include "xfs_error.h"
1815 #include "xfs_iomap.h"
1916 #include "xfs_trace.h"
2017 #include "xfs_bmap.h"
2118 #include "xfs_bmap_util.h"
22
-#include "xfs_bmap_btree.h"
2319 #include "xfs_reflink.h"
24
-#include <linux/writeback.h>
2520
26
-/*
27
- * structure owned by writepages passed to individual writepage calls
28
- */
2921 struct xfs_writepage_ctx {
30
- struct xfs_bmbt_irec imap;
31
- unsigned int io_type;
22
+ struct iomap_writepage_ctx ctx;
23
+ unsigned int data_seq;
3224 unsigned int cow_seq;
33
- struct xfs_ioend *ioend;
3425 };
3526
36
-struct block_device *
37
-xfs_find_bdev_for_inode(
38
- struct inode *inode)
27
+static inline struct xfs_writepage_ctx *
28
+XFS_WPC(struct iomap_writepage_ctx *ctx)
3929 {
40
- struct xfs_inode *ip = XFS_I(inode);
41
- struct xfs_mount *mp = ip->i_mount;
42
-
43
- if (XFS_IS_REALTIME_INODE(ip))
44
- return mp->m_rtdev_targp->bt_bdev;
45
- else
46
- return mp->m_ddev_targp->bt_bdev;
47
-}
48
-
49
-struct dax_device *
50
-xfs_find_daxdev_for_inode(
51
- struct inode *inode)
52
-{
53
- struct xfs_inode *ip = XFS_I(inode);
54
- struct xfs_mount *mp = ip->i_mount;
55
-
56
- if (XFS_IS_REALTIME_INODE(ip))
57
- return mp->m_rtdev_targp->bt_daxdev;
58
- else
59
- return mp->m_ddev_targp->bt_daxdev;
60
-}
61
-
62
-static void
63
-xfs_finish_page_writeback(
64
- struct inode *inode,
65
- struct bio_vec *bvec,
66
- int error)
67
-{
68
- struct iomap_page *iop = to_iomap_page(bvec->bv_page);
69
-
70
- if (error) {
71
- SetPageError(bvec->bv_page);
72
- mapping_set_error(inode->i_mapping, -EIO);
73
- }
74
-
75
- ASSERT(iop || i_blocksize(inode) == PAGE_SIZE);
76
- ASSERT(!iop || atomic_read(&iop->write_count) > 0);
77
-
78
- if (!iop || atomic_dec_and_test(&iop->write_count))
79
- end_page_writeback(bvec->bv_page);
80
-}
81
-
82
-/*
83
- * We're now finished for good with this ioend structure. Update the page
84
- * state, release holds on bios, and finally free up memory. Do not use the
85
- * ioend after this.
86
- */
87
-STATIC void
88
-xfs_destroy_ioend(
89
- struct xfs_ioend *ioend,
90
- int error)
91
-{
92
- struct inode *inode = ioend->io_inode;
93
- struct bio *bio = &ioend->io_inline_bio;
94
- struct bio *last = ioend->io_bio, *next;
95
- u64 start = bio->bi_iter.bi_sector;
96
- bool quiet = bio_flagged(bio, BIO_QUIET);
97
-
98
- for (bio = &ioend->io_inline_bio; bio; bio = next) {
99
- struct bio_vec *bvec;
100
- int i;
101
-
102
- /*
103
- * For the last bio, bi_private points to the ioend, so we
104
- * need to explicitly end the iteration here.
105
- */
106
- if (bio == last)
107
- next = NULL;
108
- else
109
- next = bio->bi_private;
110
-
111
- /* walk each page on bio, ending page IO on them */
112
- bio_for_each_segment_all(bvec, bio, i)
113
- xfs_finish_page_writeback(inode, bvec, error);
114
- bio_put(bio);
115
- }
116
-
117
- if (unlikely(error && !quiet)) {
118
- xfs_err_ratelimited(XFS_I(inode)->i_mount,
119
- "writeback error on sector %llu", start);
120
- }
30
+ return container_of(ctx, struct xfs_writepage_ctx, ctx);
12131 }
12232
12333 /*
12434 * Fast and loose check if this write could update the on-disk inode size.
12535 */
126
-static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
36
+static inline bool xfs_ioend_is_append(struct iomap_ioend *ioend)
12737 {
12838 return ioend->io_offset + ioend->io_size >
12939 XFS_I(ioend->io_inode)->i_d.di_size;
....@@ -131,18 +41,17 @@
13141
13242 STATIC int
13343 xfs_setfilesize_trans_alloc(
134
- struct xfs_ioend *ioend)
44
+ struct iomap_ioend *ioend)
13545 {
13646 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
13747 struct xfs_trans *tp;
13848 int error;
13949
140
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0,
141
- XFS_TRANS_NOFS, &tp);
50
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
14251 if (error)
14352 return error;
14453
145
- ioend->io_append_trans = tp;
54
+ ioend->io_private = tp;
14655
14756 /*
14857 * We may pass freeze protection with a transaction. So tell lockdep
....@@ -153,7 +62,7 @@
15362 * We hand off the transaction to the completion thread now, so
15463 * clear the flag here.
15564 */
156
- current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
65
+ xfs_trans_clear_context(tp);
15766 return 0;
15867 }
15968
....@@ -205,18 +114,18 @@
205114
206115 STATIC int
207116 xfs_setfilesize_ioend(
208
- struct xfs_ioend *ioend,
117
+ struct iomap_ioend *ioend,
209118 int error)
210119 {
211120 struct xfs_inode *ip = XFS_I(ioend->io_inode);
212
- struct xfs_trans *tp = ioend->io_append_trans;
121
+ struct xfs_trans *tp = ioend->io_private;
213122
214123 /*
215124 * The transaction may have been allocated in the I/O submission thread,
216125 * thus we need to mark ourselves as being in a transaction manually.
217126 * Similarly for freeze protection.
218127 */
219
- current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
128
+ xfs_trans_set_context(tp);
220129 __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
221130
222131 /* we abort the update if there was an IO error */
....@@ -232,104 +141,229 @@
232141 * IO write completion.
233142 */
234143 STATIC void
235
-xfs_end_io(
236
- struct work_struct *work)
144
+xfs_end_ioend(
145
+ struct iomap_ioend *ioend)
237146 {
238
- struct xfs_ioend *ioend =
239
- container_of(work, struct xfs_ioend, io_work);
240147 struct xfs_inode *ip = XFS_I(ioend->io_inode);
148
+ struct xfs_mount *mp = ip->i_mount;
241149 xfs_off_t offset = ioend->io_offset;
242150 size_t size = ioend->io_size;
151
+ unsigned int nofs_flag;
243152 int error;
153
+
154
+ /*
155
+ * We can allocate memory here while doing writeback on behalf of
156
+ * memory reclaim. To avoid memory allocation deadlocks set the
157
+ * task-wide nofs context for the following operations.
158
+ */
159
+ nofs_flag = memalloc_nofs_save();
244160
245161 /*
246162 * Just clean up the in-memory strutures if the fs has been shut down.
247163 */
248
- if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
164
+ if (XFS_FORCED_SHUTDOWN(mp)) {
249165 error = -EIO;
250166 goto done;
251167 }
252168
253169 /*
254
- * Clean up any COW blocks on an I/O error.
170
+ * Clean up all COW blocks and underlying data fork delalloc blocks on
171
+ * I/O error. The delalloc punch is required because this ioend was
172
+ * mapped to blocks in the COW fork and the associated pages are no
173
+ * longer dirty. If we don't remove delalloc blocks here, they become
174
+ * stale and can corrupt free space accounting on unmount.
255175 */
256176 error = blk_status_to_errno(ioend->io_bio->bi_status);
257177 if (unlikely(error)) {
258
- switch (ioend->io_type) {
259
- case XFS_IO_COW:
178
+ if (ioend->io_flags & IOMAP_F_SHARED) {
260179 xfs_reflink_cancel_cow_range(ip, offset, size, true);
261
- break;
180
+ xfs_bmap_punch_delalloc_range(ip,
181
+ XFS_B_TO_FSBT(mp, offset),
182
+ XFS_B_TO_FSB(mp, size));
262183 }
263
-
264184 goto done;
265185 }
266186
267187 /*
268
- * Success: commit the COW or unwritten blocks if needed.
188
+ * Success: commit the COW or unwritten blocks if needed.
269189 */
270
- switch (ioend->io_type) {
271
- case XFS_IO_COW:
190
+ if (ioend->io_flags & IOMAP_F_SHARED)
272191 error = xfs_reflink_end_cow(ip, offset, size);
273
- break;
274
- case XFS_IO_UNWRITTEN:
275
- /* writeback should never update isize */
192
+ else if (ioend->io_type == IOMAP_UNWRITTEN)
276193 error = xfs_iomap_write_unwritten(ip, offset, size, false);
277
- break;
278
- default:
279
- ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
280
- break;
281
- }
194
+ else
195
+ ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_private);
282196
283197 done:
284
- if (ioend->io_append_trans)
198
+ if (ioend->io_private)
285199 error = xfs_setfilesize_ioend(ioend, error);
286
- xfs_destroy_ioend(ioend, error);
200
+ iomap_finish_ioends(ioend, error);
201
+ memalloc_nofs_restore(nofs_flag);
202
+}
203
+
204
+/*
205
+ * If the to be merged ioend has a preallocated transaction for file
206
+ * size updates we need to ensure the ioend it is merged into also
207
+ * has one. If it already has one we can simply cancel the transaction
208
+ * as it is guaranteed to be clean.
209
+ */
210
+static void
211
+xfs_ioend_merge_private(
212
+ struct iomap_ioend *ioend,
213
+ struct iomap_ioend *next)
214
+{
215
+ if (!ioend->io_private) {
216
+ ioend->io_private = next->io_private;
217
+ next->io_private = NULL;
218
+ } else {
219
+ xfs_setfilesize_ioend(next, -ECANCELED);
220
+ }
221
+}
222
+
223
+/* Finish all pending io completions. */
224
+void
225
+xfs_end_io(
226
+ struct work_struct *work)
227
+{
228
+ struct xfs_inode *ip =
229
+ container_of(work, struct xfs_inode, i_ioend_work);
230
+ struct iomap_ioend *ioend;
231
+ struct list_head tmp;
232
+ unsigned long flags;
233
+
234
+ spin_lock_irqsave(&ip->i_ioend_lock, flags);
235
+ list_replace_init(&ip->i_ioend_list, &tmp);
236
+ spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
237
+
238
+ iomap_sort_ioends(&tmp);
239
+ while ((ioend = list_first_entry_or_null(&tmp, struct iomap_ioend,
240
+ io_list))) {
241
+ list_del_init(&ioend->io_list);
242
+ iomap_ioend_try_merge(ioend, &tmp, xfs_ioend_merge_private);
243
+ xfs_end_ioend(ioend);
244
+ }
245
+}
246
+
247
+static inline bool xfs_ioend_needs_workqueue(struct iomap_ioend *ioend)
248
+{
249
+ return ioend->io_private ||
250
+ ioend->io_type == IOMAP_UNWRITTEN ||
251
+ (ioend->io_flags & IOMAP_F_SHARED);
287252 }
288253
289254 STATIC void
290255 xfs_end_bio(
291256 struct bio *bio)
292257 {
293
- struct xfs_ioend *ioend = bio->bi_private;
294
- struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
258
+ struct iomap_ioend *ioend = bio->bi_private;
259
+ struct xfs_inode *ip = XFS_I(ioend->io_inode);
260
+ unsigned long flags;
295261
296
- if (ioend->io_type == XFS_IO_UNWRITTEN || ioend->io_type == XFS_IO_COW)
297
- queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
298
- else if (ioend->io_append_trans)
299
- queue_work(mp->m_data_workqueue, &ioend->io_work);
300
- else
301
- xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status));
262
+ ASSERT(xfs_ioend_needs_workqueue(ioend));
263
+
264
+ spin_lock_irqsave(&ip->i_ioend_lock, flags);
265
+ if (list_empty(&ip->i_ioend_list))
266
+ WARN_ON_ONCE(!queue_work(ip->i_mount->m_unwritten_workqueue,
267
+ &ip->i_ioend_work));
268
+ list_add_tail(&ioend->io_list, &ip->i_ioend_list);
269
+ spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
302270 }
303271
304
-STATIC int
272
+/*
273
+ * Fast revalidation of the cached writeback mapping. Return true if the current
274
+ * mapping is valid, false otherwise.
275
+ */
276
+static bool
277
+xfs_imap_valid(
278
+ struct iomap_writepage_ctx *wpc,
279
+ struct xfs_inode *ip,
280
+ loff_t offset)
281
+{
282
+ if (offset < wpc->iomap.offset ||
283
+ offset >= wpc->iomap.offset + wpc->iomap.length)
284
+ return false;
285
+ /*
286
+ * If this is a COW mapping, it is sufficient to check that the mapping
287
+ * covers the offset. Be careful to check this first because the caller
288
+ * can revalidate a COW mapping without updating the data seqno.
289
+ */
290
+ if (wpc->iomap.flags & IOMAP_F_SHARED)
291
+ return true;
292
+
293
+ /*
294
+ * This is not a COW mapping. Check the sequence number of the data fork
295
+ * because concurrent changes could have invalidated the extent. Check
296
+ * the COW fork because concurrent changes since the last time we
297
+ * checked (and found nothing at this offset) could have added
298
+ * overlapping blocks.
299
+ */
300
+ if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq))
301
+ return false;
302
+ if (xfs_inode_has_cow_data(ip) &&
303
+ XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq))
304
+ return false;
305
+ return true;
306
+}
307
+
308
+/*
309
+ * Pass in a dellalloc extent and convert it to real extents, return the real
310
+ * extent that maps offset_fsb in wpc->iomap.
311
+ *
312
+ * The current page is held locked so nothing could have removed the block
313
+ * backing offset_fsb, although it could have moved from the COW to the data
314
+ * fork by another thread.
315
+ */
316
+static int
317
+xfs_convert_blocks(
318
+ struct iomap_writepage_ctx *wpc,
319
+ struct xfs_inode *ip,
320
+ int whichfork,
321
+ loff_t offset)
322
+{
323
+ int error;
324
+ unsigned *seq;
325
+
326
+ if (whichfork == XFS_COW_FORK)
327
+ seq = &XFS_WPC(wpc)->cow_seq;
328
+ else
329
+ seq = &XFS_WPC(wpc)->data_seq;
330
+
331
+ /*
332
+ * Attempt to allocate whatever delalloc extent currently backs offset
333
+ * and put the result into wpc->iomap. Allocate in a loop because it
334
+ * may take several attempts to allocate real blocks for a contiguous
335
+ * delalloc extent if free space is sufficiently fragmented.
336
+ */
337
+ do {
338
+ error = xfs_bmapi_convert_delalloc(ip, whichfork, offset,
339
+ &wpc->iomap, seq);
340
+ if (error)
341
+ return error;
342
+ } while (wpc->iomap.offset + wpc->iomap.length <= offset);
343
+
344
+ return 0;
345
+}
346
+
347
+static int
305348 xfs_map_blocks(
306
- struct xfs_writepage_ctx *wpc,
349
+ struct iomap_writepage_ctx *wpc,
307350 struct inode *inode,
308351 loff_t offset)
309352 {
310353 struct xfs_inode *ip = XFS_I(inode);
311354 struct xfs_mount *mp = ip->i_mount;
312355 ssize_t count = i_blocksize(inode);
313
- xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
314
- xfs_fileoff_t cow_fsb = NULLFILEOFF;
356
+ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
357
+ xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count);
358
+ xfs_fileoff_t cow_fsb;
359
+ int whichfork;
315360 struct xfs_bmbt_irec imap;
316
- int whichfork = XFS_DATA_FORK;
317361 struct xfs_iext_cursor icur;
318
- bool imap_valid;
362
+ int retries = 0;
319363 int error = 0;
320364
321
- /*
322
- * We have to make sure the cached mapping is within EOF to protect
323
- * against eofblocks trimming on file release leaving us with a stale
324
- * mapping. Otherwise, a page for a subsequent file extending buffered
325
- * write could get picked up by this writeback cycle and written to the
326
- * wrong blocks.
327
- *
328
- * Note that what we really want here is a generic mapping invalidation
329
- * mechanism to protect us from arbitrary extent modifying contexts, not
330
- * just eofblocks.
331
- */
332
- xfs_trim_extent_eof(&wpc->imap, ip);
365
+ if (XFS_FORCED_SHUTDOWN(mp))
366
+ return -EIO;
333367
334368 /*
335369 * COW fork blocks can overlap data fork blocks even if the blocks
....@@ -346,16 +380,8 @@
346380 * against concurrent updates and provides a memory barrier on the way
347381 * out that ensures that we always see the current value.
348382 */
349
- imap_valid = offset_fsb >= wpc->imap.br_startoff &&
350
- offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
351
- if (imap_valid &&
352
- (!xfs_inode_has_cow_data(ip) ||
353
- wpc->io_type == XFS_IO_COW ||
354
- wpc->cow_seq == READ_ONCE(ip->i_cowfp->if_seq)))
383
+ if (xfs_imap_valid(wpc, ip, offset))
355384 return 0;
356
-
357
- if (XFS_FORCED_SHUTDOWN(mp))
358
- return -EIO;
359385
360386 /*
361387 * If we don't have a valid map, now it's time to get a new one for this
....@@ -363,14 +389,12 @@
363389 * into real extents. If we return without a valid map, it means we
364390 * landed in a hole and we skip the block.
365391 */
392
+retry:
393
+ cow_fsb = NULLFILEOFF;
394
+ whichfork = XFS_DATA_FORK;
366395 xfs_ilock(ip, XFS_ILOCK_SHARED);
367
- ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
396
+ ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE ||
368397 (ip->i_df.if_flags & XFS_IFEXTENTS));
369
- ASSERT(offset <= mp->m_super->s_maxbytes);
370
-
371
- if (offset > mp->m_super->s_maxbytes - count)
372
- count = mp->m_super->s_maxbytes - offset;
373
- end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
374398
375399 /*
376400 * Check if this is offset is covered by a COW extents, and if yes use
....@@ -380,32 +404,18 @@
380404 xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
381405 cow_fsb = imap.br_startoff;
382406 if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
383
- wpc->cow_seq = READ_ONCE(ip->i_cowfp->if_seq);
407
+ XFS_WPC(wpc)->cow_seq = READ_ONCE(ip->i_cowfp->if_seq);
384408 xfs_iunlock(ip, XFS_ILOCK_SHARED);
385
- /*
386
- * Truncate can race with writeback since writeback doesn't
387
- * take the iolock and truncate decreases the file size before
388
- * it starts truncating the pages between new_size and old_size.
389
- * Therefore, we can end up in the situation where writeback
390
- * gets a CoW fork mapping but the truncate makes the mapping
391
- * invalid and we end up in here trying to get a new mapping.
392
- * bail out here so that we simply never get a valid mapping
393
- * and so we drop the write altogether. The page truncation
394
- * will kill the contents anyway.
395
- */
396
- if (offset > i_size_read(inode)) {
397
- wpc->io_type = XFS_IO_HOLE;
398
- return 0;
399
- }
409
+
400410 whichfork = XFS_COW_FORK;
401
- wpc->io_type = XFS_IO_COW;
402411 goto allocate_blocks;
403412 }
404413
405414 /*
406
- * Map valid and no COW extent in the way? We're done.
415
+ * No COW extent overlap. Revalidate now that we may have updated
416
+ * ->cow_seq. If the data mapping is still valid, we're done.
407417 */
408
- if (imap_valid) {
418
+ if (xfs_imap_valid(wpc, ip, offset)) {
409419 xfs_iunlock(ip, XFS_ILOCK_SHARED);
410420 return 0;
411421 }
....@@ -417,225 +427,102 @@
417427 */
418428 if (!xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap))
419429 imap.br_startoff = end_fsb; /* fake a hole past EOF */
430
+ XFS_WPC(wpc)->data_seq = READ_ONCE(ip->i_df.if_seq);
420431 xfs_iunlock(ip, XFS_ILOCK_SHARED);
421432
433
+ /* landed in a hole or beyond EOF? */
422434 if (imap.br_startoff > offset_fsb) {
423
- /* landed in a hole or beyond EOF */
424435 imap.br_blockcount = imap.br_startoff - offset_fsb;
425436 imap.br_startoff = offset_fsb;
426437 imap.br_startblock = HOLESTARTBLOCK;
427
- wpc->io_type = XFS_IO_HOLE;
428
- } else {
429
- /*
430
- * Truncate to the next COW extent if there is one. This is the
431
- * only opportunity to do this because we can skip COW fork
432
- * lookups for the subsequent blocks in the mapping; however,
433
- * the requirement to treat the COW range separately remains.
434
- */
435
- if (cow_fsb != NULLFILEOFF &&
436
- cow_fsb < imap.br_startoff + imap.br_blockcount)
437
- imap.br_blockcount = cow_fsb - imap.br_startoff;
438
-
439
- if (isnullstartblock(imap.br_startblock)) {
440
- /* got a delalloc extent */
441
- wpc->io_type = XFS_IO_DELALLOC;
442
- goto allocate_blocks;
443
- }
444
-
445
- if (imap.br_state == XFS_EXT_UNWRITTEN)
446
- wpc->io_type = XFS_IO_UNWRITTEN;
447
- else
448
- wpc->io_type = XFS_IO_OVERWRITE;
438
+ imap.br_state = XFS_EXT_NORM;
449439 }
450440
451
- wpc->imap = imap;
452
- xfs_trim_extent_eof(&wpc->imap, ip);
453
- trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
441
+ /*
442
+ * Truncate to the next COW extent if there is one. This is the only
443
+ * opportunity to do this because we can skip COW fork lookups for the
444
+ * subsequent blocks in the mapping; however, the requirement to treat
445
+ * the COW range separately remains.
446
+ */
447
+ if (cow_fsb != NULLFILEOFF &&
448
+ cow_fsb < imap.br_startoff + imap.br_blockcount)
449
+ imap.br_blockcount = cow_fsb - imap.br_startoff;
450
+
451
+ /* got a delalloc extent? */
452
+ if (imap.br_startblock != HOLESTARTBLOCK &&
453
+ isnullstartblock(imap.br_startblock))
454
+ goto allocate_blocks;
455
+
456
+ xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0);
457
+ trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
454458 return 0;
455459 allocate_blocks:
456
- error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap,
457
- &wpc->cow_seq);
458
- if (error)
460
+ error = xfs_convert_blocks(wpc, ip, whichfork, offset);
461
+ if (error) {
462
+ /*
463
+ * If we failed to find the extent in the COW fork we might have
464
+ * raced with a COW to data fork conversion or truncate.
465
+ * Restart the lookup to catch the extent in the data fork for
466
+ * the former case, but prevent additional retries to avoid
467
+ * looping forever for the latter case.
468
+ */
469
+ if (error == -EAGAIN && whichfork == XFS_COW_FORK && !retries++)
470
+ goto retry;
471
+ ASSERT(error != -EAGAIN);
459472 return error;
460
- ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
461
- imap.br_startoff + imap.br_blockcount <= cow_fsb);
462
- wpc->imap = imap;
463
- xfs_trim_extent_eof(&wpc->imap, ip);
464
- trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
473
+ }
474
+
475
+ /*
476
+ * Due to merging the return real extent might be larger than the
477
+ * original delalloc one. Trim the return extent to the next COW
478
+ * boundary again to force a re-lookup.
479
+ */
480
+ if (whichfork != XFS_COW_FORK && cow_fsb != NULLFILEOFF) {
481
+ loff_t cow_offset = XFS_FSB_TO_B(mp, cow_fsb);
482
+
483
+ if (cow_offset < wpc->iomap.offset + wpc->iomap.length)
484
+ wpc->iomap.length = cow_offset - wpc->iomap.offset;
485
+ }
486
+
487
+ ASSERT(wpc->iomap.offset <= offset);
488
+ ASSERT(wpc->iomap.offset + wpc->iomap.length > offset);
489
+ trace_xfs_map_blocks_alloc(ip, offset, count, whichfork, &imap);
465490 return 0;
466491 }
467492
468
-/*
469
- * Submit the bio for an ioend. We are passed an ioend with a bio attached to
470
- * it, and we submit that bio. The ioend may be used for multiple bio
471
- * submissions, so we only want to allocate an append transaction for the ioend
472
- * once. In the case of multiple bio submission, each bio will take an IO
473
- * reference to the ioend to ensure that the ioend completion is only done once
474
- * all bios have been submitted and the ioend is really done.
475
- *
476
- * If @fail is non-zero, it means that we have a situation where some part of
477
- * the submission process has failed after we have marked paged for writeback
478
- * and unlocked them. In this situation, we need to fail the bio and ioend
479
- * rather than submit it to IO. This typically only happens on a filesystem
480
- * shutdown.
481
- */
482
-STATIC int
483
-xfs_submit_ioend(
484
- struct writeback_control *wbc,
485
- struct xfs_ioend *ioend,
493
+static int
494
+xfs_prepare_ioend(
495
+ struct iomap_ioend *ioend,
486496 int status)
487497 {
488
- /* Convert CoW extents to regular */
489
- if (!status && ioend->io_type == XFS_IO_COW) {
490
- /*
491
- * Yuk. This can do memory allocation, but is not a
492
- * transactional operation so everything is done in GFP_KERNEL
493
- * context. That can deadlock, because we hold pages in
494
- * writeback state and GFP_KERNEL allocations can block on them.
495
- * Hence we must operate in nofs conditions here.
496
- */
497
- unsigned nofs_flag;
498
+ unsigned int nofs_flag;
498499
499
- nofs_flag = memalloc_nofs_save();
500
+ /*
501
+ * We can allocate memory here while doing writeback on behalf of
502
+ * memory reclaim. To avoid memory allocation deadlocks set the
503
+ * task-wide nofs context for the following operations.
504
+ */
505
+ nofs_flag = memalloc_nofs_save();
506
+
507
+ /* Convert CoW extents to regular */
508
+ if (!status && (ioend->io_flags & IOMAP_F_SHARED)) {
500509 status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
501510 ioend->io_offset, ioend->io_size);
502
- memalloc_nofs_restore(nofs_flag);
503511 }
504512
505513 /* Reserve log space if we might write beyond the on-disk inode size. */
506514 if (!status &&
507
- ioend->io_type != XFS_IO_UNWRITTEN &&
515
+ ((ioend->io_flags & IOMAP_F_SHARED) ||
516
+ ioend->io_type != IOMAP_UNWRITTEN) &&
508517 xfs_ioend_is_append(ioend) &&
509
- !ioend->io_append_trans)
518
+ !ioend->io_private)
510519 status = xfs_setfilesize_trans_alloc(ioend);
511520
512
- ioend->io_bio->bi_private = ioend;
513
- ioend->io_bio->bi_end_io = xfs_end_bio;
514
- ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
521
+ memalloc_nofs_restore(nofs_flag);
515522
516
- /*
517
- * If we are failing the IO now, just mark the ioend with an
518
- * error and finish it. This will run IO completion immediately
519
- * as there is only one reference to the ioend at this point in
520
- * time.
521
- */
522
- if (status) {
523
- ioend->io_bio->bi_status = errno_to_blk_status(status);
524
- bio_endio(ioend->io_bio);
525
- return status;
526
- }
527
-
528
- ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
529
- submit_bio(ioend->io_bio);
530
- return 0;
531
-}
532
-
533
-static struct xfs_ioend *
534
-xfs_alloc_ioend(
535
- struct inode *inode,
536
- unsigned int type,
537
- xfs_off_t offset,
538
- struct block_device *bdev,
539
- sector_t sector)
540
-{
541
- struct xfs_ioend *ioend;
542
- struct bio *bio;
543
-
544
- bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset);
545
- bio_set_dev(bio, bdev);
546
- bio->bi_iter.bi_sector = sector;
547
-
548
- ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
549
- INIT_LIST_HEAD(&ioend->io_list);
550
- ioend->io_type = type;
551
- ioend->io_inode = inode;
552
- ioend->io_size = 0;
553
- ioend->io_offset = offset;
554
- INIT_WORK(&ioend->io_work, xfs_end_io);
555
- ioend->io_append_trans = NULL;
556
- ioend->io_bio = bio;
557
- return ioend;
558
-}
559
-
560
-/*
561
- * Allocate a new bio, and chain the old bio to the new one.
562
- *
563
- * Note that we have to do perform the chaining in this unintuitive order
564
- * so that the bi_private linkage is set up in the right direction for the
565
- * traversal in xfs_destroy_ioend().
566
- */
567
-static void
568
-xfs_chain_bio(
569
- struct xfs_ioend *ioend,
570
- struct writeback_control *wbc,
571
- struct block_device *bdev,
572
- sector_t sector)
573
-{
574
- struct bio *new;
575
-
576
- new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
577
- bio_set_dev(new, bdev);
578
- new->bi_iter.bi_sector = sector;
579
- bio_chain(ioend->io_bio, new);
580
- bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
581
- ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
582
- ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
583
- submit_bio(ioend->io_bio);
584
- ioend->io_bio = new;
585
-}
586
-
587
-/*
588
- * Test to see if we have an existing ioend structure that we could append to
589
- * first, otherwise finish off the current ioend and start another.
590
- */
591
-STATIC void
592
-xfs_add_to_ioend(
593
- struct inode *inode,
594
- xfs_off_t offset,
595
- struct page *page,
596
- struct iomap_page *iop,
597
- struct xfs_writepage_ctx *wpc,
598
- struct writeback_control *wbc,
599
- struct list_head *iolist)
600
-{
601
- struct xfs_inode *ip = XFS_I(inode);
602
- struct xfs_mount *mp = ip->i_mount;
603
- struct block_device *bdev = xfs_find_bdev_for_inode(inode);
604
- unsigned len = i_blocksize(inode);
605
- unsigned poff = offset & (PAGE_SIZE - 1);
606
- sector_t sector;
607
-
608
- sector = xfs_fsb_to_db(ip, wpc->imap.br_startblock) +
609
- ((offset - XFS_FSB_TO_B(mp, wpc->imap.br_startoff)) >> 9);
610
-
611
- if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
612
- sector != bio_end_sector(wpc->ioend->io_bio) ||
613
- offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
614
- if (wpc->ioend)
615
- list_add(&wpc->ioend->io_list, iolist);
616
- wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset,
617
- bdev, sector);
618
- }
619
-
620
- if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff)) {
621
- if (iop)
622
- atomic_inc(&iop->write_count);
623
- if (bio_full(wpc->ioend->io_bio))
624
- xfs_chain_bio(wpc->ioend, wbc, bdev, sector);
625
- __bio_add_page(wpc->ioend->io_bio, page, len, poff);
626
- }
627
-
628
- wpc->ioend->io_size += len;
629
-}
630
-
631
-STATIC void
632
-xfs_vm_invalidatepage(
633
- struct page *page,
634
- unsigned int offset,
635
- unsigned int length)
636
-{
637
- trace_xfs_invalidatepage(page->mapping->host, page, offset, length);
638
- iomap_invalidatepage(page, offset, length);
523
+ if (xfs_ioend_needs_workqueue(ioend))
524
+ ioend->io_bio->bi_end_io = xfs_end_bio;
525
+ return status;
639526 }
640527
641528 /*
....@@ -649,284 +536,54 @@
649536 * transaction as there is no space left for block reservation (typically why we
650537 * see a ENOSPC in writeback).
651538 */
652
-STATIC void
653
-xfs_aops_discard_page(
654
- struct page *page)
539
+static void
540
+xfs_discard_page(
541
+ struct page *page,
542
+ loff_t fileoff)
655543 {
656544 struct inode *inode = page->mapping->host;
657545 struct xfs_inode *ip = XFS_I(inode);
658546 struct xfs_mount *mp = ip->i_mount;
659
- loff_t offset = page_offset(page);
660
- xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, offset);
547
+ unsigned int pageoff = offset_in_page(fileoff);
548
+ xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, fileoff);
549
+ xfs_fileoff_t pageoff_fsb = XFS_B_TO_FSBT(mp, pageoff);
661550 int error;
662551
663552 if (XFS_FORCED_SHUTDOWN(mp))
664553 goto out_invalidate;
665554
666
- xfs_alert(mp,
555
+ xfs_alert_ratelimited(mp,
667556 "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.",
668
- page, ip->i_ino, offset);
557
+ page, ip->i_ino, fileoff);
669558
670559 error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
671
- PAGE_SIZE / i_blocksize(inode));
560
+ i_blocks_per_page(inode, page) - pageoff_fsb);
672561 if (error && !XFS_FORCED_SHUTDOWN(mp))
673562 xfs_alert(mp, "page discard unable to remove delalloc mapping.");
674563 out_invalidate:
675
- xfs_vm_invalidatepage(page, 0, PAGE_SIZE);
564
+ iomap_invalidatepage(page, pageoff, PAGE_SIZE - pageoff);
676565 }
677566
678
-/*
679
- * We implement an immediate ioend submission policy here to avoid needing to
680
- * chain multiple ioends and hence nest mempool allocations which can violate
681
- * forward progress guarantees we need to provide. The current ioend we are
682
- * adding blocks to is cached on the writepage context, and if the new block
683
- * does not append to the cached ioend it will create a new ioend and cache that
684
- * instead.
685
- *
686
- * If a new ioend is created and cached, the old ioend is returned and queued
687
- * locally for submission once the entire page is processed or an error has been
688
- * detected. While ioends are submitted immediately after they are completed,
689
- * batching optimisations are provided by higher level block plugging.
690
- *
691
- * At the end of a writeback pass, there will be a cached ioend remaining on the
692
- * writepage context that the caller will need to submit.
693
- */
694
-static int
695
-xfs_writepage_map(
696
- struct xfs_writepage_ctx *wpc,
697
- struct writeback_control *wbc,
698
- struct inode *inode,
699
- struct page *page,
700
- uint64_t end_offset)
701
-{
702
- LIST_HEAD(submit_list);
703
- struct iomap_page *iop = to_iomap_page(page);
704
- unsigned len = i_blocksize(inode);
705
- struct xfs_ioend *ioend, *next;
706
- uint64_t file_offset; /* file offset of page */
707
- int error = 0, count = 0, i;
708
-
709
- ASSERT(iop || i_blocksize(inode) == PAGE_SIZE);
710
- ASSERT(!iop || atomic_read(&iop->write_count) == 0);
711
-
712
- /*
713
- * Walk through the page to find areas to write back. If we run off the
714
- * end of the current map or find the current map invalid, grab a new
715
- * one.
716
- */
717
- for (i = 0, file_offset = page_offset(page);
718
- i < (PAGE_SIZE >> inode->i_blkbits) && file_offset < end_offset;
719
- i++, file_offset += len) {
720
- if (iop && !test_bit(i, iop->uptodate))
721
- continue;
722
-
723
- error = xfs_map_blocks(wpc, inode, file_offset);
724
- if (error)
725
- break;
726
- if (wpc->io_type == XFS_IO_HOLE)
727
- continue;
728
- xfs_add_to_ioend(inode, file_offset, page, iop, wpc, wbc,
729
- &submit_list);
730
- count++;
731
- }
732
-
733
- ASSERT(wpc->ioend || list_empty(&submit_list));
734
- ASSERT(PageLocked(page));
735
- ASSERT(!PageWriteback(page));
736
-
737
- /*
738
- * On error, we have to fail the ioend here because we may have set
739
- * pages under writeback, we have to make sure we run IO completion to
740
- * mark the error state of the IO appropriately, so we can't cancel the
741
- * ioend directly here. That means we have to mark this page as under
742
- * writeback if we included any blocks from it in the ioend chain so
743
- * that completion treats it correctly.
744
- *
745
- * If we didn't include the page in the ioend, the on error we can
746
- * simply discard and unlock it as there are no other users of the page
747
- * now. The caller will still need to trigger submission of outstanding
748
- * ioends on the writepage context so they are treated correctly on
749
- * error.
750
- */
751
- if (unlikely(error)) {
752
- if (!count) {
753
- xfs_aops_discard_page(page);
754
- ClearPageUptodate(page);
755
- unlock_page(page);
756
- goto done;
757
- }
758
-
759
- /*
760
- * If the page was not fully cleaned, we need to ensure that the
761
- * higher layers come back to it correctly. That means we need
762
- * to keep the page dirty, and for WB_SYNC_ALL writeback we need
763
- * to ensure the PAGECACHE_TAG_TOWRITE index mark is not removed
764
- * so another attempt to write this page in this writeback sweep
765
- * will be made.
766
- */
767
- set_page_writeback_keepwrite(page);
768
- } else {
769
- clear_page_dirty_for_io(page);
770
- set_page_writeback(page);
771
- }
772
-
773
- unlock_page(page);
774
-
775
- /*
776
- * Preserve the original error if there was one, otherwise catch
777
- * submission errors here and propagate into subsequent ioend
778
- * submissions.
779
- */
780
- list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
781
- int error2;
782
-
783
- list_del_init(&ioend->io_list);
784
- error2 = xfs_submit_ioend(wbc, ioend, error);
785
- if (error2 && !error)
786
- error = error2;
787
- }
788
-
789
- /*
790
- * We can end up here with no error and nothing to write only if we race
791
- * with a partial page truncate on a sub-page block sized filesystem.
792
- */
793
- if (!count)
794
- end_page_writeback(page);
795
-done:
796
- mapping_set_error(page->mapping, error);
797
- return error;
798
-}
799
-
800
-/*
801
- * Write out a dirty page.
802
- *
803
- * For delalloc space on the page we need to allocate space and flush it.
804
- * For unwritten space on the page we need to start the conversion to
805
- * regular allocated space.
806
- */
807
-STATIC int
808
-xfs_do_writepage(
809
- struct page *page,
810
- struct writeback_control *wbc,
811
- void *data)
812
-{
813
- struct xfs_writepage_ctx *wpc = data;
814
- struct inode *inode = page->mapping->host;
815
- loff_t offset;
816
- uint64_t end_offset;
817
- pgoff_t end_index;
818
-
819
- trace_xfs_writepage(inode, page, 0, 0);
820
-
821
- /*
822
- * Refuse to write the page out if we are called from reclaim context.
823
- *
824
- * This avoids stack overflows when called from deeply used stacks in
825
- * random callers for direct reclaim or memcg reclaim. We explicitly
826
- * allow reclaim from kswapd as the stack usage there is relatively low.
827
- *
828
- * This should never happen except in the case of a VM regression so
829
- * warn about it.
830
- */
831
- if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
832
- PF_MEMALLOC))
833
- goto redirty;
834
-
835
- /*
836
- * Given that we do not allow direct reclaim to call us, we should
837
- * never be called while in a filesystem transaction.
838
- */
839
- if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS))
840
- goto redirty;
841
-
842
- /*
843
- * Is this page beyond the end of the file?
844
- *
845
- * The page index is less than the end_index, adjust the end_offset
846
- * to the highest offset that this page should represent.
847
- * -----------------------------------------------------
848
- * | file mapping | <EOF> |
849
- * -----------------------------------------------------
850
- * | Page ... | Page N-2 | Page N-1 | Page N | |
851
- * ^--------------------------------^----------|--------
852
- * | desired writeback range | see else |
853
- * ---------------------------------^------------------|
854
- */
855
- offset = i_size_read(inode);
856
- end_index = offset >> PAGE_SHIFT;
857
- if (page->index < end_index)
858
- end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT;
859
- else {
860
- /*
861
- * Check whether the page to write out is beyond or straddles
862
- * i_size or not.
863
- * -------------------------------------------------------
864
- * | file mapping | <EOF> |
865
- * -------------------------------------------------------
866
- * | Page ... | Page N-2 | Page N-1 | Page N | Beyond |
867
- * ^--------------------------------^-----------|---------
868
- * | | Straddles |
869
- * ---------------------------------^-----------|--------|
870
- */
871
- unsigned offset_into_page = offset & (PAGE_SIZE - 1);
872
-
873
- /*
874
- * Skip the page if it is fully outside i_size, e.g. due to a
875
- * truncate operation that is in progress. We must redirty the
876
- * page so that reclaim stops reclaiming it. Otherwise
877
- * xfs_vm_releasepage() is called on it and gets confused.
878
- *
879
- * Note that the end_index is unsigned long, it would overflow
880
- * if the given offset is greater than 16TB on 32-bit system
881
- * and if we do check the page is fully outside i_size or not
882
- * via "if (page->index >= end_index + 1)" as "end_index + 1"
883
- * will be evaluated to 0. Hence this page will be redirtied
884
- * and be written out repeatedly which would result in an
885
- * infinite loop, the user program that perform this operation
886
- * will hang. Instead, we can verify this situation by checking
887
- * if the page to write is totally beyond the i_size or if it's
888
- * offset is just equal to the EOF.
889
- */
890
- if (page->index > end_index ||
891
- (page->index == end_index && offset_into_page == 0))
892
- goto redirty;
893
-
894
- /*
895
- * The page straddles i_size. It must be zeroed out on each
896
- * and every writepage invocation because it may be mmapped.
897
- * "A file is mapped in multiples of the page size. For a file
898
- * that is not a multiple of the page size, the remaining
899
- * memory is zeroed when mapped, and writes to that region are
900
- * not written out to the file."
901
- */
902
- zero_user_segment(page, offset_into_page, PAGE_SIZE);
903
-
904
- /* Adjust the end_offset to the end of file */
905
- end_offset = offset;
906
- }
907
-
908
- return xfs_writepage_map(wpc, wbc, inode, page, end_offset);
909
-
910
-redirty:
911
- redirty_page_for_writepage(wbc, page);
912
- unlock_page(page);
913
- return 0;
914
-}
567
+static const struct iomap_writeback_ops xfs_writeback_ops = {
568
+ .map_blocks = xfs_map_blocks,
569
+ .prepare_ioend = xfs_prepare_ioend,
570
+ .discard_page = xfs_discard_page,
571
+};
915572
916573 STATIC int
917574 xfs_vm_writepage(
918575 struct page *page,
919576 struct writeback_control *wbc)
920577 {
921
- struct xfs_writepage_ctx wpc = {
922
- .io_type = XFS_IO_INVALID,
923
- };
924
- int ret;
578
+ struct xfs_writepage_ctx wpc = { };
925579
926
- ret = xfs_do_writepage(page, wbc, &wpc);
927
- if (wpc.ioend)
928
- ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
929
- return ret;
580
+ if (WARN_ON_ONCE(current->journal_info)) {
581
+ redirty_page_for_writepage(wbc, page);
582
+ unlock_page(page);
583
+ return 0;
584
+ }
585
+
586
+ return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops);
930587 }
931588
932589 STATIC int
....@@ -934,16 +591,17 @@
934591 struct address_space *mapping,
935592 struct writeback_control *wbc)
936593 {
937
- struct xfs_writepage_ctx wpc = {
938
- .io_type = XFS_IO_INVALID,
939
- };
940
- int ret;
594
+ struct xfs_writepage_ctx wpc = { };
595
+
596
+ /*
597
+ * Writing back data in a transaction context can result in recursive
598
+ * transactions. This is bad, so issue a warning and get out of here.
599
+ */
600
+ if (WARN_ON_ONCE(current->journal_info))
601
+ return 0;
941602
942603 xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
943
- ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
944
- if (wpc.ioend)
945
- ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
946
- return ret;
604
+ return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
947605 }
948606
949607 STATIC int
....@@ -951,18 +609,11 @@
951609 struct address_space *mapping,
952610 struct writeback_control *wbc)
953611 {
954
- xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
955
- return dax_writeback_mapping_range(mapping,
956
- xfs_find_bdev_for_inode(mapping->host), wbc);
957
-}
612
+ struct xfs_inode *ip = XFS_I(mapping->host);
958613
959
-STATIC int
960
-xfs_vm_releasepage(
961
- struct page *page,
962
- gfp_t gfp_mask)
963
-{
964
- trace_xfs_releasepage(page->mapping->host, page, 0, 0);
965
- return iomap_releasepage(page, gfp_mask);
614
+ xfs_iflags_clear(ip, XFS_ITRUNCATED);
615
+ return dax_writeback_mapping_range(mapping,
616
+ xfs_inode_buftarg(ip)->bt_daxdev, wbc);
966617 }
967618
968619 STATIC sector_t
....@@ -983,9 +634,9 @@
983634 * Since we don't pass back blockdev info, we can't return bmap
984635 * information for rt files either.
985636 */
986
- if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
637
+ if (xfs_is_cow_inode(ip) || XFS_IS_REALTIME_INODE(ip))
987638 return 0;
988
- return iomap_bmap(mapping, block, &xfs_iomap_ops);
639
+ return iomap_bmap(mapping, block, &xfs_read_iomap_ops);
989640 }
990641
991642 STATIC int
....@@ -993,19 +644,14 @@
993644 struct file *unused,
994645 struct page *page)
995646 {
996
- trace_xfs_vm_readpage(page->mapping->host, 1);
997
- return iomap_readpage(page, &xfs_iomap_ops);
647
+ return iomap_readpage(page, &xfs_read_iomap_ops);
998648 }
999649
1000
-STATIC int
1001
-xfs_vm_readpages(
1002
- struct file *unused,
1003
- struct address_space *mapping,
1004
- struct list_head *pages,
1005
- unsigned nr_pages)
650
+STATIC void
651
+xfs_vm_readahead(
652
+ struct readahead_control *rac)
1006653 {
1007
- trace_xfs_vm_readpages(mapping->host, nr_pages);
1008
- return iomap_readpages(mapping, pages, nr_pages, &xfs_iomap_ops);
654
+ iomap_readahead(rac, &xfs_read_iomap_ops);
1009655 }
1010656
1011657 static int
....@@ -1014,18 +660,19 @@
1014660 struct file *swap_file,
1015661 sector_t *span)
1016662 {
1017
- sis->bdev = xfs_find_bdev_for_inode(file_inode(swap_file));
1018
- return iomap_swapfile_activate(sis, swap_file, span, &xfs_iomap_ops);
663
+ sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
664
+ return iomap_swapfile_activate(sis, swap_file, span,
665
+ &xfs_read_iomap_ops);
1019666 }
1020667
1021668 const struct address_space_operations xfs_address_space_operations = {
1022669 .readpage = xfs_vm_readpage,
1023
- .readpages = xfs_vm_readpages,
670
+ .readahead = xfs_vm_readahead,
1024671 .writepage = xfs_vm_writepage,
1025672 .writepages = xfs_vm_writepages,
1026673 .set_page_dirty = iomap_set_page_dirty,
1027
- .releasepage = xfs_vm_releasepage,
1028
- .invalidatepage = xfs_vm_invalidatepage,
674
+ .releasepage = iomap_releasepage,
675
+ .invalidatepage = iomap_invalidatepage,
1029676 .bmap = xfs_vm_bmap,
1030677 .direct_IO = noop_direct_IO,
1031678 .migratepage = iomap_migrate_page,