hc
2024-05-16 8d2a02b24d66aa359e83eebc1ed3c0f85367a1cb
kernel/fs/xfs/xfs_aops.c
....@@ -12,149 +12,31 @@
1212 #include "xfs_mount.h"
1313 #include "xfs_inode.h"
1414 #include "xfs_trans.h"
15
-#include "xfs_inode_item.h"
16
-#include "xfs_alloc.h"
17
-#include "xfs_error.h"
1815 #include "xfs_iomap.h"
1916 #include "xfs_trace.h"
2017 #include "xfs_bmap.h"
2118 #include "xfs_bmap_util.h"
22
-#include "xfs_bmap_btree.h"
2319 #include "xfs_reflink.h"
24
-#include <linux/writeback.h>
2520
26
-/*
27
- * structure owned by writepages passed to individual writepage calls
28
- */
2921 struct xfs_writepage_ctx {
30
- struct xfs_bmbt_irec imap;
31
- unsigned int io_type;
22
+ struct iomap_writepage_ctx ctx;
23
+ unsigned int data_seq;
3224 unsigned int cow_seq;
33
- struct xfs_ioend *ioend;
3425 };
3526
36
-struct block_device *
37
-xfs_find_bdev_for_inode(
38
- struct inode *inode)
27
+static inline struct xfs_writepage_ctx *
28
+XFS_WPC(struct iomap_writepage_ctx *ctx)
3929 {
40
- struct xfs_inode *ip = XFS_I(inode);
41
- struct xfs_mount *mp = ip->i_mount;
42
-
43
- if (XFS_IS_REALTIME_INODE(ip))
44
- return mp->m_rtdev_targp->bt_bdev;
45
- else
46
- return mp->m_ddev_targp->bt_bdev;
47
-}
48
-
49
-struct dax_device *
50
-xfs_find_daxdev_for_inode(
51
- struct inode *inode)
52
-{
53
- struct xfs_inode *ip = XFS_I(inode);
54
- struct xfs_mount *mp = ip->i_mount;
55
-
56
- if (XFS_IS_REALTIME_INODE(ip))
57
- return mp->m_rtdev_targp->bt_daxdev;
58
- else
59
- return mp->m_ddev_targp->bt_daxdev;
60
-}
61
-
62
-static void
63
-xfs_finish_page_writeback(
64
- struct inode *inode,
65
- struct bio_vec *bvec,
66
- int error)
67
-{
68
- struct iomap_page *iop = to_iomap_page(bvec->bv_page);
69
-
70
- if (error) {
71
- SetPageError(bvec->bv_page);
72
- mapping_set_error(inode->i_mapping, -EIO);
73
- }
74
-
75
- ASSERT(iop || i_blocksize(inode) == PAGE_SIZE);
76
- ASSERT(!iop || atomic_read(&iop->write_count) > 0);
77
-
78
- if (!iop || atomic_dec_and_test(&iop->write_count))
79
- end_page_writeback(bvec->bv_page);
80
-}
81
-
82
-/*
83
- * We're now finished for good with this ioend structure. Update the page
84
- * state, release holds on bios, and finally free up memory. Do not use the
85
- * ioend after this.
86
- */
87
-STATIC void
88
-xfs_destroy_ioend(
89
- struct xfs_ioend *ioend,
90
- int error)
91
-{
92
- struct inode *inode = ioend->io_inode;
93
- struct bio *bio = &ioend->io_inline_bio;
94
- struct bio *last = ioend->io_bio, *next;
95
- u64 start = bio->bi_iter.bi_sector;
96
- bool quiet = bio_flagged(bio, BIO_QUIET);
97
-
98
- for (bio = &ioend->io_inline_bio; bio; bio = next) {
99
- struct bio_vec *bvec;
100
- int i;
101
-
102
- /*
103
- * For the last bio, bi_private points to the ioend, so we
104
- * need to explicitly end the iteration here.
105
- */
106
- if (bio == last)
107
- next = NULL;
108
- else
109
- next = bio->bi_private;
110
-
111
- /* walk each page on bio, ending page IO on them */
112
- bio_for_each_segment_all(bvec, bio, i)
113
- xfs_finish_page_writeback(inode, bvec, error);
114
- bio_put(bio);
115
- }
116
-
117
- if (unlikely(error && !quiet)) {
118
- xfs_err_ratelimited(XFS_I(inode)->i_mount,
119
- "writeback error on sector %llu", start);
120
- }
30
+ return container_of(ctx, struct xfs_writepage_ctx, ctx);
12131 }
12232
12333 /*
12434 * Fast and loose check if this write could update the on-disk inode size.
12535 */
126
-static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
36
+static inline bool xfs_ioend_is_append(struct iomap_ioend *ioend)
12737 {
12838 return ioend->io_offset + ioend->io_size >
12939 XFS_I(ioend->io_inode)->i_d.di_size;
130
-}
131
-
132
-STATIC int
133
-xfs_setfilesize_trans_alloc(
134
- struct xfs_ioend *ioend)
135
-{
136
- struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
137
- struct xfs_trans *tp;
138
- int error;
139
-
140
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0,
141
- XFS_TRANS_NOFS, &tp);
142
- if (error)
143
- return error;
144
-
145
- ioend->io_append_trans = tp;
146
-
147
- /*
148
- * We may pass freeze protection with a transaction. So tell lockdep
149
- * we released it.
150
- */
151
- __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS);
152
- /*
153
- * We hand off the transaction to the completion thread now, so
154
- * clear the flag here.
155
- */
156
- current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
157
- return 0;
15840 }
15941
16042 /*
....@@ -205,18 +87,18 @@
20587
20688 STATIC int
20789 xfs_setfilesize_ioend(
208
- struct xfs_ioend *ioend,
90
+ struct iomap_ioend *ioend,
20991 int error)
21092 {
21193 struct xfs_inode *ip = XFS_I(ioend->io_inode);
212
- struct xfs_trans *tp = ioend->io_append_trans;
94
+ struct xfs_trans *tp = ioend->io_private;
21395
21496 /*
21597 * The transaction may have been allocated in the I/O submission thread,
21698 * thus we need to mark ourselves as being in a transaction manually.
21799 * Similarly for freeze protection.
218100 */
219
- current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
101
+ xfs_trans_set_context(tp);
220102 __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
221103
222104 /* we abort the update if there was an IO error */
....@@ -232,104 +114,225 @@
232114 * IO write completion.
233115 */
234116 STATIC void
235
-xfs_end_io(
236
- struct work_struct *work)
117
+xfs_end_ioend(
118
+ struct iomap_ioend *ioend)
237119 {
238
- struct xfs_ioend *ioend =
239
- container_of(work, struct xfs_ioend, io_work);
240120 struct xfs_inode *ip = XFS_I(ioend->io_inode);
121
+ struct xfs_mount *mp = ip->i_mount;
241122 xfs_off_t offset = ioend->io_offset;
242123 size_t size = ioend->io_size;
124
+ unsigned int nofs_flag;
243125 int error;
126
+
127
+ /*
128
+ * We can allocate memory here while doing writeback on behalf of
129
+ * memory reclaim. To avoid memory allocation deadlocks set the
130
+ * task-wide nofs context for the following operations.
131
+ */
132
+ nofs_flag = memalloc_nofs_save();
244133
245134 /*
246135 * Just clean up the in-memory strutures if the fs has been shut down.
247136 */
248
- if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
137
+ if (XFS_FORCED_SHUTDOWN(mp)) {
249138 error = -EIO;
250139 goto done;
251140 }
252141
253142 /*
254
- * Clean up any COW blocks on an I/O error.
143
+ * Clean up all COW blocks and underlying data fork delalloc blocks on
144
+ * I/O error. The delalloc punch is required because this ioend was
145
+ * mapped to blocks in the COW fork and the associated pages are no
146
+ * longer dirty. If we don't remove delalloc blocks here, they become
147
+ * stale and can corrupt free space accounting on unmount.
255148 */
256149 error = blk_status_to_errno(ioend->io_bio->bi_status);
257150 if (unlikely(error)) {
258
- switch (ioend->io_type) {
259
- case XFS_IO_COW:
151
+ if (ioend->io_flags & IOMAP_F_SHARED) {
260152 xfs_reflink_cancel_cow_range(ip, offset, size, true);
261
- break;
153
+ xfs_bmap_punch_delalloc_range(ip,
154
+ XFS_B_TO_FSBT(mp, offset),
155
+ XFS_B_TO_FSB(mp, size));
262156 }
263
-
264157 goto done;
265158 }
266159
267160 /*
268
- * Success: commit the COW or unwritten blocks if needed.
161
+ * Success: commit the COW or unwritten blocks if needed.
269162 */
270
- switch (ioend->io_type) {
271
- case XFS_IO_COW:
163
+ if (ioend->io_flags & IOMAP_F_SHARED)
272164 error = xfs_reflink_end_cow(ip, offset, size);
273
- break;
274
- case XFS_IO_UNWRITTEN:
275
- /* writeback should never update isize */
165
+ else if (ioend->io_type == IOMAP_UNWRITTEN)
276166 error = xfs_iomap_write_unwritten(ip, offset, size, false);
277
- break;
278
- default:
279
- ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
280
- break;
281
- }
282167
168
+ if (!error && xfs_ioend_is_append(ioend))
169
+ error = xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
283170 done:
284
- if (ioend->io_append_trans)
285
- error = xfs_setfilesize_ioend(ioend, error);
286
- xfs_destroy_ioend(ioend, error);
171
+ iomap_finish_ioends(ioend, error);
172
+ memalloc_nofs_restore(nofs_flag);
173
+}
174
+
175
+/*
176
+ * If the to be merged ioend has a preallocated transaction for file
177
+ * size updates we need to ensure the ioend it is merged into also
178
+ * has one. If it already has one we can simply cancel the transaction
179
+ * as it is guaranteed to be clean.
180
+ */
181
+static void
182
+xfs_ioend_merge_private(
183
+ struct iomap_ioend *ioend,
184
+ struct iomap_ioend *next)
185
+{
186
+ if (!ioend->io_private) {
187
+ ioend->io_private = next->io_private;
188
+ next->io_private = NULL;
189
+ } else {
190
+ xfs_setfilesize_ioend(next, -ECANCELED);
191
+ }
192
+}
193
+
194
+/* Finish all pending io completions. */
195
+void
196
+xfs_end_io(
197
+ struct work_struct *work)
198
+{
199
+ struct xfs_inode *ip =
200
+ container_of(work, struct xfs_inode, i_ioend_work);
201
+ struct iomap_ioend *ioend;
202
+ struct list_head tmp;
203
+ unsigned long flags;
204
+
205
+ spin_lock_irqsave(&ip->i_ioend_lock, flags);
206
+ list_replace_init(&ip->i_ioend_list, &tmp);
207
+ spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
208
+
209
+ iomap_sort_ioends(&tmp);
210
+ while ((ioend = list_first_entry_or_null(&tmp, struct iomap_ioend,
211
+ io_list))) {
212
+ list_del_init(&ioend->io_list);
213
+ iomap_ioend_try_merge(ioend, &tmp, xfs_ioend_merge_private);
214
+ xfs_end_ioend(ioend);
215
+ }
216
+}
217
+
218
+static inline bool xfs_ioend_needs_workqueue(struct iomap_ioend *ioend)
219
+{
220
+ return xfs_ioend_is_append(ioend) ||
221
+ ioend->io_type == IOMAP_UNWRITTEN ||
222
+ (ioend->io_flags & IOMAP_F_SHARED);
287223 }
288224
289225 STATIC void
290226 xfs_end_bio(
291227 struct bio *bio)
292228 {
293
- struct xfs_ioend *ioend = bio->bi_private;
294
- struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
229
+ struct iomap_ioend *ioend = bio->bi_private;
230
+ struct xfs_inode *ip = XFS_I(ioend->io_inode);
231
+ unsigned long flags;
295232
296
- if (ioend->io_type == XFS_IO_UNWRITTEN || ioend->io_type == XFS_IO_COW)
297
- queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
298
- else if (ioend->io_append_trans)
299
- queue_work(mp->m_data_workqueue, &ioend->io_work);
300
- else
301
- xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status));
233
+ spin_lock_irqsave(&ip->i_ioend_lock, flags);
234
+ if (list_empty(&ip->i_ioend_list))
235
+ WARN_ON_ONCE(!queue_work(ip->i_mount->m_unwritten_workqueue,
236
+ &ip->i_ioend_work));
237
+ list_add_tail(&ioend->io_list, &ip->i_ioend_list);
238
+ spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
302239 }
303240
304
-STATIC int
241
+/*
242
+ * Fast revalidation of the cached writeback mapping. Return true if the current
243
+ * mapping is valid, false otherwise.
244
+ */
245
+static bool
246
+xfs_imap_valid(
247
+ struct iomap_writepage_ctx *wpc,
248
+ struct xfs_inode *ip,
249
+ loff_t offset)
250
+{
251
+ if (offset < wpc->iomap.offset ||
252
+ offset >= wpc->iomap.offset + wpc->iomap.length)
253
+ return false;
254
+ /*
255
+ * If this is a COW mapping, it is sufficient to check that the mapping
256
+ * covers the offset. Be careful to check this first because the caller
257
+ * can revalidate a COW mapping without updating the data seqno.
258
+ */
259
+ if (wpc->iomap.flags & IOMAP_F_SHARED)
260
+ return true;
261
+
262
+ /*
263
+ * This is not a COW mapping. Check the sequence number of the data fork
264
+ * because concurrent changes could have invalidated the extent. Check
265
+ * the COW fork because concurrent changes since the last time we
266
+ * checked (and found nothing at this offset) could have added
267
+ * overlapping blocks.
268
+ */
269
+ if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq))
270
+ return false;
271
+ if (xfs_inode_has_cow_data(ip) &&
272
+ XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq))
273
+ return false;
274
+ return true;
275
+}
276
+
277
+/*
278
+ * Pass in a dellalloc extent and convert it to real extents, return the real
279
+ * extent that maps offset_fsb in wpc->iomap.
280
+ *
281
+ * The current page is held locked so nothing could have removed the block
282
+ * backing offset_fsb, although it could have moved from the COW to the data
283
+ * fork by another thread.
284
+ */
285
+static int
286
+xfs_convert_blocks(
287
+ struct iomap_writepage_ctx *wpc,
288
+ struct xfs_inode *ip,
289
+ int whichfork,
290
+ loff_t offset)
291
+{
292
+ int error;
293
+ unsigned *seq;
294
+
295
+ if (whichfork == XFS_COW_FORK)
296
+ seq = &XFS_WPC(wpc)->cow_seq;
297
+ else
298
+ seq = &XFS_WPC(wpc)->data_seq;
299
+
300
+ /*
301
+ * Attempt to allocate whatever delalloc extent currently backs offset
302
+ * and put the result into wpc->iomap. Allocate in a loop because it
303
+ * may take several attempts to allocate real blocks for a contiguous
304
+ * delalloc extent if free space is sufficiently fragmented.
305
+ */
306
+ do {
307
+ error = xfs_bmapi_convert_delalloc(ip, whichfork, offset,
308
+ &wpc->iomap, seq);
309
+ if (error)
310
+ return error;
311
+ } while (wpc->iomap.offset + wpc->iomap.length <= offset);
312
+
313
+ return 0;
314
+}
315
+
316
+static int
305317 xfs_map_blocks(
306
- struct xfs_writepage_ctx *wpc,
318
+ struct iomap_writepage_ctx *wpc,
307319 struct inode *inode,
308320 loff_t offset)
309321 {
310322 struct xfs_inode *ip = XFS_I(inode);
311323 struct xfs_mount *mp = ip->i_mount;
312324 ssize_t count = i_blocksize(inode);
313
- xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
314
- xfs_fileoff_t cow_fsb = NULLFILEOFF;
325
+ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
326
+ xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count);
327
+ xfs_fileoff_t cow_fsb;
328
+ int whichfork;
315329 struct xfs_bmbt_irec imap;
316
- int whichfork = XFS_DATA_FORK;
317330 struct xfs_iext_cursor icur;
318
- bool imap_valid;
331
+ int retries = 0;
319332 int error = 0;
320333
321
- /*
322
- * We have to make sure the cached mapping is within EOF to protect
323
- * against eofblocks trimming on file release leaving us with a stale
324
- * mapping. Otherwise, a page for a subsequent file extending buffered
325
- * write could get picked up by this writeback cycle and written to the
326
- * wrong blocks.
327
- *
328
- * Note that what we really want here is a generic mapping invalidation
329
- * mechanism to protect us from arbitrary extent modifying contexts, not
330
- * just eofblocks.
331
- */
332
- xfs_trim_extent_eof(&wpc->imap, ip);
334
+ if (XFS_FORCED_SHUTDOWN(mp))
335
+ return -EIO;
333336
334337 /*
335338 * COW fork blocks can overlap data fork blocks even if the blocks
....@@ -346,16 +349,8 @@
346349 * against concurrent updates and provides a memory barrier on the way
347350 * out that ensures that we always see the current value.
348351 */
349
- imap_valid = offset_fsb >= wpc->imap.br_startoff &&
350
- offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
351
- if (imap_valid &&
352
- (!xfs_inode_has_cow_data(ip) ||
353
- wpc->io_type == XFS_IO_COW ||
354
- wpc->cow_seq == READ_ONCE(ip->i_cowfp->if_seq)))
352
+ if (xfs_imap_valid(wpc, ip, offset))
355353 return 0;
356
-
357
- if (XFS_FORCED_SHUTDOWN(mp))
358
- return -EIO;
359354
360355 /*
361356 * If we don't have a valid map, now it's time to get a new one for this
....@@ -363,14 +358,12 @@
363358 * into real extents. If we return without a valid map, it means we
364359 * landed in a hole and we skip the block.
365360 */
361
+retry:
362
+ cow_fsb = NULLFILEOFF;
363
+ whichfork = XFS_DATA_FORK;
366364 xfs_ilock(ip, XFS_ILOCK_SHARED);
367
- ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
365
+ ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE ||
368366 (ip->i_df.if_flags & XFS_IFEXTENTS));
369
- ASSERT(offset <= mp->m_super->s_maxbytes);
370
-
371
- if (offset > mp->m_super->s_maxbytes - count)
372
- count = mp->m_super->s_maxbytes - offset;
373
- end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
374367
375368 /*
376369 * Check if this is offset is covered by a COW extents, and if yes use
....@@ -380,32 +373,18 @@
380373 xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
381374 cow_fsb = imap.br_startoff;
382375 if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
383
- wpc->cow_seq = READ_ONCE(ip->i_cowfp->if_seq);
376
+ XFS_WPC(wpc)->cow_seq = READ_ONCE(ip->i_cowfp->if_seq);
384377 xfs_iunlock(ip, XFS_ILOCK_SHARED);
385
- /*
386
- * Truncate can race with writeback since writeback doesn't
387
- * take the iolock and truncate decreases the file size before
388
- * it starts truncating the pages between new_size and old_size.
389
- * Therefore, we can end up in the situation where writeback
390
- * gets a CoW fork mapping but the truncate makes the mapping
391
- * invalid and we end up in here trying to get a new mapping.
392
- * bail out here so that we simply never get a valid mapping
393
- * and so we drop the write altogether. The page truncation
394
- * will kill the contents anyway.
395
- */
396
- if (offset > i_size_read(inode)) {
397
- wpc->io_type = XFS_IO_HOLE;
398
- return 0;
399
- }
378
+
400379 whichfork = XFS_COW_FORK;
401
- wpc->io_type = XFS_IO_COW;
402380 goto allocate_blocks;
403381 }
404382
405383 /*
406
- * Map valid and no COW extent in the way? We're done.
384
+ * No COW extent overlap. Revalidate now that we may have updated
385
+ * ->cow_seq. If the data mapping is still valid, we're done.
407386 */
408
- if (imap_valid) {
387
+ if (xfs_imap_valid(wpc, ip, offset)) {
409388 xfs_iunlock(ip, XFS_ILOCK_SHARED);
410389 return 0;
411390 }
....@@ -417,225 +396,94 @@
417396 */
418397 if (!xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap))
419398 imap.br_startoff = end_fsb; /* fake a hole past EOF */
399
+ XFS_WPC(wpc)->data_seq = READ_ONCE(ip->i_df.if_seq);
420400 xfs_iunlock(ip, XFS_ILOCK_SHARED);
421401
402
+ /* landed in a hole or beyond EOF? */
422403 if (imap.br_startoff > offset_fsb) {
423
- /* landed in a hole or beyond EOF */
424404 imap.br_blockcount = imap.br_startoff - offset_fsb;
425405 imap.br_startoff = offset_fsb;
426406 imap.br_startblock = HOLESTARTBLOCK;
427
- wpc->io_type = XFS_IO_HOLE;
428
- } else {
429
- /*
430
- * Truncate to the next COW extent if there is one. This is the
431
- * only opportunity to do this because we can skip COW fork
432
- * lookups for the subsequent blocks in the mapping; however,
433
- * the requirement to treat the COW range separately remains.
434
- */
435
- if (cow_fsb != NULLFILEOFF &&
436
- cow_fsb < imap.br_startoff + imap.br_blockcount)
437
- imap.br_blockcount = cow_fsb - imap.br_startoff;
438
-
439
- if (isnullstartblock(imap.br_startblock)) {
440
- /* got a delalloc extent */
441
- wpc->io_type = XFS_IO_DELALLOC;
442
- goto allocate_blocks;
443
- }
444
-
445
- if (imap.br_state == XFS_EXT_UNWRITTEN)
446
- wpc->io_type = XFS_IO_UNWRITTEN;
447
- else
448
- wpc->io_type = XFS_IO_OVERWRITE;
407
+ imap.br_state = XFS_EXT_NORM;
449408 }
450
-
451
- wpc->imap = imap;
452
- xfs_trim_extent_eof(&wpc->imap, ip);
453
- trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
454
- return 0;
455
-allocate_blocks:
456
- error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap,
457
- &wpc->cow_seq);
458
- if (error)
459
- return error;
460
- ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
461
- imap.br_startoff + imap.br_blockcount <= cow_fsb);
462
- wpc->imap = imap;
463
- xfs_trim_extent_eof(&wpc->imap, ip);
464
- trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
465
- return 0;
466
-}
467
-
468
-/*
469
- * Submit the bio for an ioend. We are passed an ioend with a bio attached to
470
- * it, and we submit that bio. The ioend may be used for multiple bio
471
- * submissions, so we only want to allocate an append transaction for the ioend
472
- * once. In the case of multiple bio submission, each bio will take an IO
473
- * reference to the ioend to ensure that the ioend completion is only done once
474
- * all bios have been submitted and the ioend is really done.
475
- *
476
- * If @fail is non-zero, it means that we have a situation where some part of
477
- * the submission process has failed after we have marked paged for writeback
478
- * and unlocked them. In this situation, we need to fail the bio and ioend
479
- * rather than submit it to IO. This typically only happens on a filesystem
480
- * shutdown.
481
- */
482
-STATIC int
483
-xfs_submit_ioend(
484
- struct writeback_control *wbc,
485
- struct xfs_ioend *ioend,
486
- int status)
487
-{
488
- /* Convert CoW extents to regular */
489
- if (!status && ioend->io_type == XFS_IO_COW) {
490
- /*
491
- * Yuk. This can do memory allocation, but is not a
492
- * transactional operation so everything is done in GFP_KERNEL
493
- * context. That can deadlock, because we hold pages in
494
- * writeback state and GFP_KERNEL allocations can block on them.
495
- * Hence we must operate in nofs conditions here.
496
- */
497
- unsigned nofs_flag;
498
-
499
- nofs_flag = memalloc_nofs_save();
500
- status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
501
- ioend->io_offset, ioend->io_size);
502
- memalloc_nofs_restore(nofs_flag);
503
- }
504
-
505
- /* Reserve log space if we might write beyond the on-disk inode size. */
506
- if (!status &&
507
- ioend->io_type != XFS_IO_UNWRITTEN &&
508
- xfs_ioend_is_append(ioend) &&
509
- !ioend->io_append_trans)
510
- status = xfs_setfilesize_trans_alloc(ioend);
511
-
512
- ioend->io_bio->bi_private = ioend;
513
- ioend->io_bio->bi_end_io = xfs_end_bio;
514
- ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
515409
516410 /*
517
- * If we are failing the IO now, just mark the ioend with an
518
- * error and finish it. This will run IO completion immediately
519
- * as there is only one reference to the ioend at this point in
520
- * time.
411
+ * Truncate to the next COW extent if there is one. This is the only
412
+ * opportunity to do this because we can skip COW fork lookups for the
413
+ * subsequent blocks in the mapping; however, the requirement to treat
414
+ * the COW range separately remains.
521415 */
522
- if (status) {
523
- ioend->io_bio->bi_status = errno_to_blk_status(status);
524
- bio_endio(ioend->io_bio);
525
- return status;
416
+ if (cow_fsb != NULLFILEOFF &&
417
+ cow_fsb < imap.br_startoff + imap.br_blockcount)
418
+ imap.br_blockcount = cow_fsb - imap.br_startoff;
419
+
420
+ /* got a delalloc extent? */
421
+ if (imap.br_startblock != HOLESTARTBLOCK &&
422
+ isnullstartblock(imap.br_startblock))
423
+ goto allocate_blocks;
424
+
425
+ xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0);
426
+ trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
427
+ return 0;
428
+allocate_blocks:
429
+ error = xfs_convert_blocks(wpc, ip, whichfork, offset);
430
+ if (error) {
431
+ /*
432
+ * If we failed to find the extent in the COW fork we might have
433
+ * raced with a COW to data fork conversion or truncate.
434
+ * Restart the lookup to catch the extent in the data fork for
435
+ * the former case, but prevent additional retries to avoid
436
+ * looping forever for the latter case.
437
+ */
438
+ if (error == -EAGAIN && whichfork == XFS_COW_FORK && !retries++)
439
+ goto retry;
440
+ ASSERT(error != -EAGAIN);
441
+ return error;
526442 }
527443
528
- ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
529
- submit_bio(ioend->io_bio);
444
+ /*
445
+ * Due to merging the return real extent might be larger than the
446
+ * original delalloc one. Trim the return extent to the next COW
447
+ * boundary again to force a re-lookup.
448
+ */
449
+ if (whichfork != XFS_COW_FORK && cow_fsb != NULLFILEOFF) {
450
+ loff_t cow_offset = XFS_FSB_TO_B(mp, cow_fsb);
451
+
452
+ if (cow_offset < wpc->iomap.offset + wpc->iomap.length)
453
+ wpc->iomap.length = cow_offset - wpc->iomap.offset;
454
+ }
455
+
456
+ ASSERT(wpc->iomap.offset <= offset);
457
+ ASSERT(wpc->iomap.offset + wpc->iomap.length > offset);
458
+ trace_xfs_map_blocks_alloc(ip, offset, count, whichfork, &imap);
530459 return 0;
531460 }
532461
533
-static struct xfs_ioend *
534
-xfs_alloc_ioend(
535
- struct inode *inode,
536
- unsigned int type,
537
- xfs_off_t offset,
538
- struct block_device *bdev,
539
- sector_t sector)
462
+static int
463
+xfs_prepare_ioend(
464
+ struct iomap_ioend *ioend,
465
+ int status)
540466 {
541
- struct xfs_ioend *ioend;
542
- struct bio *bio;
467
+ unsigned int nofs_flag;
543468
544
- bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset);
545
- bio_set_dev(bio, bdev);
546
- bio->bi_iter.bi_sector = sector;
469
+ /*
470
+ * We can allocate memory here while doing writeback on behalf of
471
+ * memory reclaim. To avoid memory allocation deadlocks set the
472
+ * task-wide nofs context for the following operations.
473
+ */
474
+ nofs_flag = memalloc_nofs_save();
547475
548
- ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
549
- INIT_LIST_HEAD(&ioend->io_list);
550
- ioend->io_type = type;
551
- ioend->io_inode = inode;
552
- ioend->io_size = 0;
553
- ioend->io_offset = offset;
554
- INIT_WORK(&ioend->io_work, xfs_end_io);
555
- ioend->io_append_trans = NULL;
556
- ioend->io_bio = bio;
557
- return ioend;
558
-}
559
-
560
-/*
561
- * Allocate a new bio, and chain the old bio to the new one.
562
- *
563
- * Note that we have to do perform the chaining in this unintuitive order
564
- * so that the bi_private linkage is set up in the right direction for the
565
- * traversal in xfs_destroy_ioend().
566
- */
567
-static void
568
-xfs_chain_bio(
569
- struct xfs_ioend *ioend,
570
- struct writeback_control *wbc,
571
- struct block_device *bdev,
572
- sector_t sector)
573
-{
574
- struct bio *new;
575
-
576
- new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
577
- bio_set_dev(new, bdev);
578
- new->bi_iter.bi_sector = sector;
579
- bio_chain(ioend->io_bio, new);
580
- bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
581
- ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
582
- ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
583
- submit_bio(ioend->io_bio);
584
- ioend->io_bio = new;
585
-}
586
-
587
-/*
588
- * Test to see if we have an existing ioend structure that we could append to
589
- * first, otherwise finish off the current ioend and start another.
590
- */
591
-STATIC void
592
-xfs_add_to_ioend(
593
- struct inode *inode,
594
- xfs_off_t offset,
595
- struct page *page,
596
- struct iomap_page *iop,
597
- struct xfs_writepage_ctx *wpc,
598
- struct writeback_control *wbc,
599
- struct list_head *iolist)
600
-{
601
- struct xfs_inode *ip = XFS_I(inode);
602
- struct xfs_mount *mp = ip->i_mount;
603
- struct block_device *bdev = xfs_find_bdev_for_inode(inode);
604
- unsigned len = i_blocksize(inode);
605
- unsigned poff = offset & (PAGE_SIZE - 1);
606
- sector_t sector;
607
-
608
- sector = xfs_fsb_to_db(ip, wpc->imap.br_startblock) +
609
- ((offset - XFS_FSB_TO_B(mp, wpc->imap.br_startoff)) >> 9);
610
-
611
- if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
612
- sector != bio_end_sector(wpc->ioend->io_bio) ||
613
- offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
614
- if (wpc->ioend)
615
- list_add(&wpc->ioend->io_list, iolist);
616
- wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset,
617
- bdev, sector);
476
+ /* Convert CoW extents to regular */
477
+ if (!status && (ioend->io_flags & IOMAP_F_SHARED)) {
478
+ status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
479
+ ioend->io_offset, ioend->io_size);
618480 }
619481
620
- if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff)) {
621
- if (iop)
622
- atomic_inc(&iop->write_count);
623
- if (bio_full(wpc->ioend->io_bio))
624
- xfs_chain_bio(wpc->ioend, wbc, bdev, sector);
625
- __bio_add_page(wpc->ioend->io_bio, page, len, poff);
626
- }
482
+ memalloc_nofs_restore(nofs_flag);
627483
628
- wpc->ioend->io_size += len;
629
-}
630
-
631
-STATIC void
632
-xfs_vm_invalidatepage(
633
- struct page *page,
634
- unsigned int offset,
635
- unsigned int length)
636
-{
637
- trace_xfs_invalidatepage(page->mapping->host, page, offset, length);
638
- iomap_invalidatepage(page, offset, length);
484
+ if (xfs_ioend_needs_workqueue(ioend))
485
+ ioend->io_bio->bi_end_io = xfs_end_bio;
486
+ return status;
639487 }
640488
641489 /*
....@@ -649,284 +497,54 @@
649497 * transaction as there is no space left for block reservation (typically why we
650498 * see a ENOSPC in writeback).
651499 */
652
-STATIC void
653
-xfs_aops_discard_page(
654
- struct page *page)
500
+static void
501
+xfs_discard_page(
502
+ struct page *page,
503
+ loff_t fileoff)
655504 {
656505 struct inode *inode = page->mapping->host;
657506 struct xfs_inode *ip = XFS_I(inode);
658507 struct xfs_mount *mp = ip->i_mount;
659
- loff_t offset = page_offset(page);
660
- xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, offset);
508
+ unsigned int pageoff = offset_in_page(fileoff);
509
+ xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, fileoff);
510
+ xfs_fileoff_t pageoff_fsb = XFS_B_TO_FSBT(mp, pageoff);
661511 int error;
662512
663513 if (XFS_FORCED_SHUTDOWN(mp))
664514 goto out_invalidate;
665515
666
- xfs_alert(mp,
516
+ xfs_alert_ratelimited(mp,
667517 "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.",
668
- page, ip->i_ino, offset);
518
+ page, ip->i_ino, fileoff);
669519
670520 error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
671
- PAGE_SIZE / i_blocksize(inode));
521
+ i_blocks_per_page(inode, page) - pageoff_fsb);
672522 if (error && !XFS_FORCED_SHUTDOWN(mp))
673523 xfs_alert(mp, "page discard unable to remove delalloc mapping.");
674524 out_invalidate:
675
- xfs_vm_invalidatepage(page, 0, PAGE_SIZE);
525
+ iomap_invalidatepage(page, pageoff, PAGE_SIZE - pageoff);
676526 }
677527
678
-/*
679
- * We implement an immediate ioend submission policy here to avoid needing to
680
- * chain multiple ioends and hence nest mempool allocations which can violate
681
- * forward progress guarantees we need to provide. The current ioend we are
682
- * adding blocks to is cached on the writepage context, and if the new block
683
- * does not append to the cached ioend it will create a new ioend and cache that
684
- * instead.
685
- *
686
- * If a new ioend is created and cached, the old ioend is returned and queued
687
- * locally for submission once the entire page is processed or an error has been
688
- * detected. While ioends are submitted immediately after they are completed,
689
- * batching optimisations are provided by higher level block plugging.
690
- *
691
- * At the end of a writeback pass, there will be a cached ioend remaining on the
692
- * writepage context that the caller will need to submit.
693
- */
694
-static int
695
-xfs_writepage_map(
696
- struct xfs_writepage_ctx *wpc,
697
- struct writeback_control *wbc,
698
- struct inode *inode,
699
- struct page *page,
700
- uint64_t end_offset)
701
-{
702
- LIST_HEAD(submit_list);
703
- struct iomap_page *iop = to_iomap_page(page);
704
- unsigned len = i_blocksize(inode);
705
- struct xfs_ioend *ioend, *next;
706
- uint64_t file_offset; /* file offset of page */
707
- int error = 0, count = 0, i;
708
-
709
- ASSERT(iop || i_blocksize(inode) == PAGE_SIZE);
710
- ASSERT(!iop || atomic_read(&iop->write_count) == 0);
711
-
712
- /*
713
- * Walk through the page to find areas to write back. If we run off the
714
- * end of the current map or find the current map invalid, grab a new
715
- * one.
716
- */
717
- for (i = 0, file_offset = page_offset(page);
718
- i < (PAGE_SIZE >> inode->i_blkbits) && file_offset < end_offset;
719
- i++, file_offset += len) {
720
- if (iop && !test_bit(i, iop->uptodate))
721
- continue;
722
-
723
- error = xfs_map_blocks(wpc, inode, file_offset);
724
- if (error)
725
- break;
726
- if (wpc->io_type == XFS_IO_HOLE)
727
- continue;
728
- xfs_add_to_ioend(inode, file_offset, page, iop, wpc, wbc,
729
- &submit_list);
730
- count++;
731
- }
732
-
733
- ASSERT(wpc->ioend || list_empty(&submit_list));
734
- ASSERT(PageLocked(page));
735
- ASSERT(!PageWriteback(page));
736
-
737
- /*
738
- * On error, we have to fail the ioend here because we may have set
739
- * pages under writeback, we have to make sure we run IO completion to
740
- * mark the error state of the IO appropriately, so we can't cancel the
741
- * ioend directly here. That means we have to mark this page as under
742
- * writeback if we included any blocks from it in the ioend chain so
743
- * that completion treats it correctly.
744
- *
745
- * If we didn't include the page in the ioend, the on error we can
746
- * simply discard and unlock it as there are no other users of the page
747
- * now. The caller will still need to trigger submission of outstanding
748
- * ioends on the writepage context so they are treated correctly on
749
- * error.
750
- */
751
- if (unlikely(error)) {
752
- if (!count) {
753
- xfs_aops_discard_page(page);
754
- ClearPageUptodate(page);
755
- unlock_page(page);
756
- goto done;
757
- }
758
-
759
- /*
760
- * If the page was not fully cleaned, we need to ensure that the
761
- * higher layers come back to it correctly. That means we need
762
- * to keep the page dirty, and for WB_SYNC_ALL writeback we need
763
- * to ensure the PAGECACHE_TAG_TOWRITE index mark is not removed
764
- * so another attempt to write this page in this writeback sweep
765
- * will be made.
766
- */
767
- set_page_writeback_keepwrite(page);
768
- } else {
769
- clear_page_dirty_for_io(page);
770
- set_page_writeback(page);
771
- }
772
-
773
- unlock_page(page);
774
-
775
- /*
776
- * Preserve the original error if there was one, otherwise catch
777
- * submission errors here and propagate into subsequent ioend
778
- * submissions.
779
- */
780
- list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
781
- int error2;
782
-
783
- list_del_init(&ioend->io_list);
784
- error2 = xfs_submit_ioend(wbc, ioend, error);
785
- if (error2 && !error)
786
- error = error2;
787
- }
788
-
789
- /*
790
- * We can end up here with no error and nothing to write only if we race
791
- * with a partial page truncate on a sub-page block sized filesystem.
792
- */
793
- if (!count)
794
- end_page_writeback(page);
795
-done:
796
- mapping_set_error(page->mapping, error);
797
- return error;
798
-}
799
-
800
-/*
801
- * Write out a dirty page.
802
- *
803
- * For delalloc space on the page we need to allocate space and flush it.
804
- * For unwritten space on the page we need to start the conversion to
805
- * regular allocated space.
806
- */
807
-STATIC int
808
-xfs_do_writepage(
809
- struct page *page,
810
- struct writeback_control *wbc,
811
- void *data)
812
-{
813
- struct xfs_writepage_ctx *wpc = data;
814
- struct inode *inode = page->mapping->host;
815
- loff_t offset;
816
- uint64_t end_offset;
817
- pgoff_t end_index;
818
-
819
- trace_xfs_writepage(inode, page, 0, 0);
820
-
821
- /*
822
- * Refuse to write the page out if we are called from reclaim context.
823
- *
824
- * This avoids stack overflows when called from deeply used stacks in
825
- * random callers for direct reclaim or memcg reclaim. We explicitly
826
- * allow reclaim from kswapd as the stack usage there is relatively low.
827
- *
828
- * This should never happen except in the case of a VM regression so
829
- * warn about it.
830
- */
831
- if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
832
- PF_MEMALLOC))
833
- goto redirty;
834
-
835
- /*
836
- * Given that we do not allow direct reclaim to call us, we should
837
- * never be called while in a filesystem transaction.
838
- */
839
- if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS))
840
- goto redirty;
841
-
842
- /*
843
- * Is this page beyond the end of the file?
844
- *
845
- * The page index is less than the end_index, adjust the end_offset
846
- * to the highest offset that this page should represent.
847
- * -----------------------------------------------------
848
- * | file mapping | <EOF> |
849
- * -----------------------------------------------------
850
- * | Page ... | Page N-2 | Page N-1 | Page N | |
851
- * ^--------------------------------^----------|--------
852
- * | desired writeback range | see else |
853
- * ---------------------------------^------------------|
854
- */
855
- offset = i_size_read(inode);
856
- end_index = offset >> PAGE_SHIFT;
857
- if (page->index < end_index)
858
- end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT;
859
- else {
860
- /*
861
- * Check whether the page to write out is beyond or straddles
862
- * i_size or not.
863
- * -------------------------------------------------------
864
- * | file mapping | <EOF> |
865
- * -------------------------------------------------------
866
- * | Page ... | Page N-2 | Page N-1 | Page N | Beyond |
867
- * ^--------------------------------^-----------|---------
868
- * | | Straddles |
869
- * ---------------------------------^-----------|--------|
870
- */
871
- unsigned offset_into_page = offset & (PAGE_SIZE - 1);
872
-
873
- /*
874
- * Skip the page if it is fully outside i_size, e.g. due to a
875
- * truncate operation that is in progress. We must redirty the
876
- * page so that reclaim stops reclaiming it. Otherwise
877
- * xfs_vm_releasepage() is called on it and gets confused.
878
- *
879
- * Note that the end_index is unsigned long, it would overflow
880
- * if the given offset is greater than 16TB on 32-bit system
881
- * and if we do check the page is fully outside i_size or not
882
- * via "if (page->index >= end_index + 1)" as "end_index + 1"
883
- * will be evaluated to 0. Hence this page will be redirtied
884
- * and be written out repeatedly which would result in an
885
- * infinite loop, the user program that perform this operation
886
- * will hang. Instead, we can verify this situation by checking
887
- * if the page to write is totally beyond the i_size or if it's
888
- * offset is just equal to the EOF.
889
- */
890
- if (page->index > end_index ||
891
- (page->index == end_index && offset_into_page == 0))
892
- goto redirty;
893
-
894
- /*
895
- * The page straddles i_size. It must be zeroed out on each
896
- * and every writepage invocation because it may be mmapped.
897
- * "A file is mapped in multiples of the page size. For a file
898
- * that is not a multiple of the page size, the remaining
899
- * memory is zeroed when mapped, and writes to that region are
900
- * not written out to the file."
901
- */
902
- zero_user_segment(page, offset_into_page, PAGE_SIZE);
903
-
904
- /* Adjust the end_offset to the end of file */
905
- end_offset = offset;
906
- }
907
-
908
- return xfs_writepage_map(wpc, wbc, inode, page, end_offset);
909
-
910
-redirty:
911
- redirty_page_for_writepage(wbc, page);
912
- unlock_page(page);
913
- return 0;
914
-}
528
+static const struct iomap_writeback_ops xfs_writeback_ops = {
529
+ .map_blocks = xfs_map_blocks,
530
+ .prepare_ioend = xfs_prepare_ioend,
531
+ .discard_page = xfs_discard_page,
532
+};
915533
916534 STATIC int
917535 xfs_vm_writepage(
918536 struct page *page,
919537 struct writeback_control *wbc)
920538 {
921
- struct xfs_writepage_ctx wpc = {
922
- .io_type = XFS_IO_INVALID,
923
- };
924
- int ret;
539
+ struct xfs_writepage_ctx wpc = { };
925540
926
- ret = xfs_do_writepage(page, wbc, &wpc);
927
- if (wpc.ioend)
928
- ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
929
- return ret;
541
+ if (WARN_ON_ONCE(current->journal_info)) {
542
+ redirty_page_for_writepage(wbc, page);
543
+ unlock_page(page);
544
+ return 0;
545
+ }
546
+
547
+ return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops);
930548 }
931549
932550 STATIC int
....@@ -934,16 +552,17 @@
934552 struct address_space *mapping,
935553 struct writeback_control *wbc)
936554 {
937
- struct xfs_writepage_ctx wpc = {
938
- .io_type = XFS_IO_INVALID,
939
- };
940
- int ret;
555
+ struct xfs_writepage_ctx wpc = { };
556
+
557
+ /*
558
+ * Writing back data in a transaction context can result in recursive
559
+ * transactions. This is bad, so issue a warning and get out of here.
560
+ */
561
+ if (WARN_ON_ONCE(current->journal_info))
562
+ return 0;
941563
942564 xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
943
- ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
944
- if (wpc.ioend)
945
- ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
946
- return ret;
565
+ return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
947566 }
948567
949568 STATIC int
....@@ -951,18 +570,11 @@
951570 struct address_space *mapping,
952571 struct writeback_control *wbc)
953572 {
954
- xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
955
- return dax_writeback_mapping_range(mapping,
956
- xfs_find_bdev_for_inode(mapping->host), wbc);
957
-}
573
+ struct xfs_inode *ip = XFS_I(mapping->host);
958574
959
-STATIC int
960
-xfs_vm_releasepage(
961
- struct page *page,
962
- gfp_t gfp_mask)
963
-{
964
- trace_xfs_releasepage(page->mapping->host, page, 0, 0);
965
- return iomap_releasepage(page, gfp_mask);
575
+ xfs_iflags_clear(ip, XFS_ITRUNCATED);
576
+ return dax_writeback_mapping_range(mapping,
577
+ xfs_inode_buftarg(ip)->bt_daxdev, wbc);
966578 }
967579
968580 STATIC sector_t
....@@ -983,9 +595,9 @@
983595 * Since we don't pass back blockdev info, we can't return bmap
984596 * information for rt files either.
985597 */
986
- if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
598
+ if (xfs_is_cow_inode(ip) || XFS_IS_REALTIME_INODE(ip))
987599 return 0;
988
- return iomap_bmap(mapping, block, &xfs_iomap_ops);
600
+ return iomap_bmap(mapping, block, &xfs_read_iomap_ops);
989601 }
990602
991603 STATIC int
....@@ -993,19 +605,14 @@
993605 struct file *unused,
994606 struct page *page)
995607 {
996
- trace_xfs_vm_readpage(page->mapping->host, 1);
997
- return iomap_readpage(page, &xfs_iomap_ops);
608
+ return iomap_readpage(page, &xfs_read_iomap_ops);
998609 }
999610
1000
-STATIC int
1001
-xfs_vm_readpages(
1002
- struct file *unused,
1003
- struct address_space *mapping,
1004
- struct list_head *pages,
1005
- unsigned nr_pages)
611
+STATIC void
612
+xfs_vm_readahead(
613
+ struct readahead_control *rac)
1006614 {
1007
- trace_xfs_vm_readpages(mapping->host, nr_pages);
1008
- return iomap_readpages(mapping, pages, nr_pages, &xfs_iomap_ops);
615
+ iomap_readahead(rac, &xfs_read_iomap_ops);
1009616 }
1010617
1011618 static int
....@@ -1014,18 +621,19 @@
1014621 struct file *swap_file,
1015622 sector_t *span)
1016623 {
1017
- sis->bdev = xfs_find_bdev_for_inode(file_inode(swap_file));
1018
- return iomap_swapfile_activate(sis, swap_file, span, &xfs_iomap_ops);
624
+ sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
625
+ return iomap_swapfile_activate(sis, swap_file, span,
626
+ &xfs_read_iomap_ops);
1019627 }
1020628
1021629 const struct address_space_operations xfs_address_space_operations = {
1022630 .readpage = xfs_vm_readpage,
1023
- .readpages = xfs_vm_readpages,
631
+ .readahead = xfs_vm_readahead,
1024632 .writepage = xfs_vm_writepage,
1025633 .writepages = xfs_vm_writepages,
1026634 .set_page_dirty = iomap_set_page_dirty,
1027
- .releasepage = xfs_vm_releasepage,
1028
- .invalidatepage = xfs_vm_invalidatepage,
635
+ .releasepage = iomap_releasepage,
636
+ .invalidatepage = iomap_invalidatepage,
1029637 .bmap = xfs_vm_bmap,
1030638 .direct_IO = noop_direct_IO,
1031639 .migratepage = iomap_migrate_page,