hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/fs/xfs/xfs_buf_item.c
....@@ -5,19 +5,22 @@
55 */
66 #include "xfs.h"
77 #include "xfs_fs.h"
8
+#include "xfs_shared.h"
89 #include "xfs_format.h"
910 #include "xfs_log_format.h"
1011 #include "xfs_trans_resv.h"
1112 #include "xfs_bit.h"
12
-#include "xfs_sb.h"
1313 #include "xfs_mount.h"
1414 #include "xfs_trans.h"
15
-#include "xfs_buf_item.h"
1615 #include "xfs_trans_priv.h"
17
-#include "xfs_error.h"
16
+#include "xfs_buf_item.h"
17
+#include "xfs_inode.h"
18
+#include "xfs_inode_item.h"
19
+#include "xfs_quota.h"
20
+#include "xfs_dquot_item.h"
21
+#include "xfs_dquot.h"
1822 #include "xfs_trace.h"
1923 #include "xfs_log.h"
20
-#include "xfs_inode.h"
2124
2225
2326 kmem_zone_t *xfs_buf_item_zone;
....@@ -27,7 +30,22 @@
2730 return container_of(lip, struct xfs_buf_log_item, bli_item);
2831 }
2932
30
-STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
33
+/* Is this log iovec plausibly large enough to contain the buffer log format? */
34
+bool
35
+xfs_buf_log_check_iovec(
36
+ struct xfs_log_iovec *iovec)
37
+{
38
+ struct xfs_buf_log_format *blfp = iovec->i_addr;
39
+ char *bmp_end;
40
+ char *item_end;
41
+
42
+ if (offsetof(struct xfs_buf_log_format, blf_data_map) > iovec->i_len)
43
+ return false;
44
+
45
+ item_end = (char *)iovec->i_addr + iovec->i_len;
46
+ bmp_end = (char *)&blfp->blf_data_map[blfp->blf_map_size];
47
+ return bmp_end <= item_end;
48
+}
3149
3250 static inline int
3351 xfs_buf_log_format_size(
....@@ -38,14 +56,12 @@
3856 }
3957
4058 /*
41
- * This returns the number of log iovecs needed to log the
42
- * given buf log item.
59
+ * Return the number of log iovecs and space needed to log the given buf log
60
+ * item segment.
4361 *
44
- * It calculates this as 1 iovec for the buf log format structure
45
- * and 1 for each stretch of non-contiguous chunks to be logged.
46
- * Contiguous chunks are logged in a single iovec.
47
- *
48
- * If the XFS_BLI_STALE flag has been set, then log nothing.
62
+ * It calculates this as 1 iovec for the buf log format structure and 1 for each
63
+ * stretch of non-contiguous chunks to be logged. Contiguous chunks are logged
64
+ * in a single iovec.
4965 */
5066 STATIC void
5167 xfs_buf_item_size_segment(
....@@ -101,13 +117,10 @@
101117 }
102118
103119 /*
104
- * This returns the number of log iovecs needed to log the given buf log item.
120
+ * Return the number of log iovecs and space needed to log the given buf log
121
+ * item.
105122 *
106
- * It calculates this as 1 iovec for the buf log format structure and 1 for each
107
- * stretch of non-contiguous chunks to be logged. Contiguous chunks are logged
108
- * in a single iovec.
109
- *
110
- * Discontiguous buffers need a format structure per region that that is being
123
+ * Discontiguous buffers need a format structure per region that is being
111124 * logged. This makes the changes in the buffer appear to log recovery as though
112125 * they came from separate buffers, just like would occur if multiple buffers
113126 * were used instead of a single discontiguous buffer. This enables
....@@ -115,7 +128,11 @@
115128 * what ends up on disk.
116129 *
117130 * If the XFS_BLI_STALE flag has been set, then log nothing but the buf log
118
- * format structures.
131
+ * format structures. If the item has previously been logged and has dirty
132
+ * regions, we do not relog them in stale buffers. This has the effect of
133
+ * reducing the size of the relogged item by the amount of dirty data tracked
134
+ * by the log item. This can result in the committing transaction reducing the
135
+ * amount of space being consumed by the CIL.
119136 */
120137 STATIC void
121138 xfs_buf_item_size(
....@@ -129,9 +146,9 @@
129146 ASSERT(atomic_read(&bip->bli_refcount) > 0);
130147 if (bip->bli_flags & XFS_BLI_STALE) {
131148 /*
132
- * The buffer is stale, so all we need to log
133
- * is the buf log format structure with the
134
- * cancel flag in it.
149
+ * The buffer is stale, so all we need to log is the buf log
150
+ * format structure with the cancel flag in it as we are never
151
+ * going to replay the changes tracked in the log item.
135152 */
136153 trace_xfs_buf_item_size_stale(bip);
137154 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
....@@ -146,9 +163,9 @@
146163
147164 if (bip->bli_flags & XFS_BLI_ORDERED) {
148165 /*
149
- * The buffer has been logged just to order it.
150
- * It is not being included in the transaction
151
- * commit, so no vectors are used at all.
166
+ * The buffer has been logged just to order it. It is not being
167
+ * included in the transaction commit, so no vectors are used at
168
+ * all.
152169 */
153170 trace_xfs_buf_item_size_ordered(bip);
154171 *nvecs = XFS_LOG_VEC_ORDERED;
....@@ -330,7 +347,7 @@
330347 * occurs during recovery.
331348 */
332349 if (bip->bli_flags & XFS_BLI_INODE_BUF) {
333
- if (xfs_sb_version_hascrc(&lip->li_mountp->m_sb) ||
350
+ if (xfs_sb_version_has_v3inode(&lip->li_mountp->m_sb) ||
334351 !((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
335352 xfs_log_item_in_current_chkpt(lip)))
336353 bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF;
....@@ -376,17 +393,8 @@
376393 }
377394
378395 /*
379
- * This is called to unpin the buffer associated with the buf log
380
- * item which was previously pinned with a call to xfs_buf_item_pin().
381
- *
382
- * Also drop the reference to the buf item for the current transaction.
383
- * If the XFS_BLI_STALE flag is set and we are the last reference,
384
- * then free up the buf log item and unlock the buffer.
385
- *
386
- * If the remove flag is set we are called from uncommit in the
387
- * forced-shutdown path. If that is true and the reference count on
388
- * the log item is going to drop to zero we need to free the item's
389
- * descriptor in the transaction.
396
+ * This is called to unpin the buffer associated with the buf log item which
397
+ * was previously pinned with a call to xfs_buf_item_pin().
390398 */
391399 STATIC void
392400 xfs_buf_item_unpin(
....@@ -395,7 +403,6 @@
395403 {
396404 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
397405 xfs_buf_t *bp = bip->bli_buf;
398
- struct xfs_ail *ailp = lip->li_ailp;
399406 int stale = bip->bli_flags & XFS_BLI_STALE;
400407 int freed;
401408
....@@ -404,90 +411,62 @@
404411
405412 trace_xfs_buf_item_unpin(bip);
406413
414
+ /*
415
+ * Drop the bli ref associated with the pin and grab the hold required
416
+ * for the I/O simulation failure in the abort case. We have to do this
417
+ * before the pin count drops because the AIL doesn't acquire a bli
418
+ * reference. Therefore if the refcount drops to zero, the bli could
419
+ * still be AIL resident and the buffer submitted for I/O (and freed on
420
+ * completion) at any point before we return. This can be removed once
421
+ * the AIL properly holds a reference on the bli.
422
+ */
407423 freed = atomic_dec_and_test(&bip->bli_refcount);
408
-
424
+ if (freed && !stale && remove)
425
+ xfs_buf_hold(bp);
409426 if (atomic_dec_and_test(&bp->b_pin_count))
410427 wake_up_all(&bp->b_waiters);
411428
412
- if (freed && stale) {
429
+ /* nothing to do but drop the pin count if the bli is active */
430
+ if (!freed)
431
+ return;
432
+
433
+ if (stale) {
413434 ASSERT(bip->bli_flags & XFS_BLI_STALE);
414435 ASSERT(xfs_buf_islocked(bp));
415436 ASSERT(bp->b_flags & XBF_STALE);
416437 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
438
+ ASSERT(list_empty(&lip->li_trans));
439
+ ASSERT(!bp->b_transp);
417440
418441 trace_xfs_buf_item_unpin_stale(bip);
419442
420
- if (remove) {
421
- /*
422
- * If we are in a transaction context, we have to
423
- * remove the log item from the transaction as we are
424
- * about to release our reference to the buffer. If we
425
- * don't, the unlock that occurs later in
426
- * xfs_trans_uncommit() will try to reference the
427
- * buffer which we no longer have a hold on.
428
- */
429
- if (!list_empty(&lip->li_trans))
430
- xfs_trans_del_item(lip);
431
-
432
- /*
433
- * Since the transaction no longer refers to the buffer,
434
- * the buffer should no longer refer to the transaction.
435
- */
436
- bp->b_transp = NULL;
437
- }
438
-
439443 /*
440
- * If we get called here because of an IO error, we may
441
- * or may not have the item on the AIL. xfs_trans_ail_delete()
442
- * will take care of that situation.
443
- * xfs_trans_ail_delete() drops the AIL lock.
444
+ * If we get called here because of an IO error, we may or may
445
+ * not have the item on the AIL. xfs_trans_ail_delete() will
446
+ * take care of that situation. xfs_trans_ail_delete() drops
447
+ * the AIL lock.
444448 */
445449 if (bip->bli_flags & XFS_BLI_STALE_INODE) {
446
- xfs_buf_do_callbacks(bp);
447
- bp->b_log_item = NULL;
448
- list_del_init(&bp->b_li_list);
449
- bp->b_iodone = NULL;
450
+ xfs_buf_item_done(bp);
451
+ xfs_buf_inode_iodone(bp);
452
+ ASSERT(list_empty(&bp->b_li_list));
450453 } else {
451
- spin_lock(&ailp->ail_lock);
452
- xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR);
454
+ xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
453455 xfs_buf_item_relse(bp);
454456 ASSERT(bp->b_log_item == NULL);
455457 }
456458 xfs_buf_relse(bp);
457
- } else if (freed && remove) {
459
+ } else if (remove) {
458460 /*
459
- * There are currently two references to the buffer - the active
460
- * LRU reference and the buf log item. What we are about to do
461
- * here - simulate a failed IO completion - requires 3
462
- * references.
463
- *
464
- * The LRU reference is removed by the xfs_buf_stale() call. The
465
- * buf item reference is removed by the xfs_buf_iodone()
466
- * callback that is run by xfs_buf_do_callbacks() during ioend
467
- * processing (via the bp->b_iodone callback), and then finally
468
- * the ioend processing will drop the IO reference if the buffer
469
- * is marked XBF_ASYNC.
470
- *
471
- * Hence we need to take an additional reference here so that IO
472
- * completion processing doesn't free the buffer prematurely.
461
+ * The buffer must be locked and held by the caller to simulate
462
+ * an async I/O failure. We acquired the hold for this case
463
+ * before the buffer was unpinned.
473464 */
474465 xfs_buf_lock(bp);
475
- xfs_buf_hold(bp);
476466 bp->b_flags |= XBF_ASYNC;
477
- xfs_buf_ioerror(bp, -EIO);
478
- bp->b_flags &= ~XBF_DONE;
479
- xfs_buf_stale(bp);
480
- xfs_buf_ioend(bp);
467
+ xfs_buf_ioend_fail(bp);
481468 }
482469 }
483
-
484
-/*
485
- * Buffer IO error rate limiting. Limit it to no more than 10 messages per 30
486
- * seconds so as to not spam logs too much on repeated detection of the same
487
- * buffer being bad..
488
- */
489
-
490
-static DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10);
491470
492471 STATIC uint
493472 xfs_buf_item_push(
....@@ -518,11 +497,10 @@
518497 trace_xfs_buf_item_push(bip);
519498
520499 /* has a previous flush failed due to IO errors? */
521
- if ((bp->b_flags & XBF_WRITE_FAIL) &&
522
- ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) {
523
- xfs_warn(bp->b_target->bt_mount,
524
-"Failing async write on buffer block 0x%llx. Retrying async write.",
525
- (long long)bp->b_bn);
500
+ if (bp->b_flags & XBF_WRITE_FAIL) {
501
+ xfs_buf_alert_ratelimited(bp, "XFS: Failing async write",
502
+ "Failing async write on buffer block 0x%llx. Retrying async write.",
503
+ (long long)bp->b_bn);
526504 }
527505
528506 if (!xfs_buf_delwri_queue(bp, buffer_list))
....@@ -569,7 +547,7 @@
569547 * state.
570548 */
571549 if (aborted)
572
- xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);
550
+ xfs_trans_ail_delete(lip, 0);
573551 xfs_buf_item_relse(bip->bli_buf);
574552 return true;
575553 }
....@@ -594,7 +572,7 @@
594572 * free the item.
595573 */
596574 STATIC void
597
-xfs_buf_item_unlock(
575
+xfs_buf_item_release(
598576 struct xfs_log_item *lip)
599577 {
600578 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
....@@ -605,9 +583,11 @@
605583 #if defined(DEBUG) || defined(XFS_WARN)
606584 bool ordered = bip->bli_flags & XFS_BLI_ORDERED;
607585 bool dirty = bip->bli_flags & XFS_BLI_DIRTY;
586
+ bool aborted = test_bit(XFS_LI_ABORTED,
587
+ &lip->li_flags);
608588 #endif
609589
610
- trace_xfs_buf_item_unlock(bip);
590
+ trace_xfs_buf_item_release(bip);
611591
612592 /*
613593 * The bli dirty state should match whether the blf has logged segments
....@@ -633,8 +613,16 @@
633613 released = xfs_buf_item_put(bip);
634614 if (hold || (stale && !released))
635615 return;
636
- ASSERT(!stale || test_bit(XFS_LI_ABORTED, &lip->li_flags));
616
+ ASSERT(!stale || aborted);
637617 xfs_buf_relse(bp);
618
+}
619
+
620
+STATIC void
621
+xfs_buf_item_committing(
622
+ struct xfs_log_item *lip,
623
+ xfs_csn_t seq)
624
+{
625
+ return xfs_buf_item_release(lip);
638626 }
639627
640628 /*
....@@ -669,28 +657,18 @@
669657 return lsn;
670658 }
671659
672
-STATIC void
673
-xfs_buf_item_committing(
674
- struct xfs_log_item *lip,
675
- xfs_lsn_t commit_lsn)
676
-{
677
-}
678
-
679
-/*
680
- * This is the ops vector shared by all buf log items.
681
- */
682660 static const struct xfs_item_ops xfs_buf_item_ops = {
683661 .iop_size = xfs_buf_item_size,
684662 .iop_format = xfs_buf_item_format,
685663 .iop_pin = xfs_buf_item_pin,
686664 .iop_unpin = xfs_buf_item_unpin,
687
- .iop_unlock = xfs_buf_item_unlock,
665
+ .iop_release = xfs_buf_item_release,
666
+ .iop_committing = xfs_buf_item_committing,
688667 .iop_committed = xfs_buf_item_committed,
689668 .iop_push = xfs_buf_item_push,
690
- .iop_committing = xfs_buf_item_committing
691669 };
692670
693
-STATIC int
671
+STATIC void
694672 xfs_buf_item_get_format(
695673 struct xfs_buf_log_item *bip,
696674 int count)
....@@ -700,14 +678,11 @@
700678
701679 if (count == 1) {
702680 bip->bli_formats = &bip->__bli_format;
703
- return 0;
681
+ return;
704682 }
705683
706684 bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format),
707
- KM_SLEEP);
708
- if (!bip->bli_formats)
709
- return -ENOMEM;
710
- return 0;
685
+ 0);
711686 }
712687
713688 STATIC void
....@@ -733,7 +708,6 @@
733708 struct xfs_buf_log_item *bip = bp->b_log_item;
734709 int chunks;
735710 int map_size;
736
- int error;
737711 int i;
738712
739713 /*
....@@ -741,7 +715,7 @@
741715 * this buffer. If we do already have one, there is
742716 * nothing to do here so return.
743717 */
744
- ASSERT(bp->b_target->bt_mount == mp);
718
+ ASSERT(bp->b_mount == mp);
745719 if (bip) {
746720 ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
747721 ASSERT(!bp->b_transp);
....@@ -749,7 +723,7 @@
749723 return 0;
750724 }
751725
752
- bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP);
726
+ bip = kmem_cache_zalloc(xfs_buf_item_zone, GFP_KERNEL | __GFP_NOFAIL);
753727 xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
754728 bip->bli_buf = bp;
755729
....@@ -762,18 +736,21 @@
762736 * Discontiguous buffer support follows the layout of the underlying
763737 * buffer. This makes the implementation as simple as possible.
764738 */
765
- error = xfs_buf_item_get_format(bip, bp->b_map_count);
766
- ASSERT(error == 0);
767
- if (error) { /* to stop gcc throwing set-but-unused warnings */
768
- kmem_zone_free(xfs_buf_item_zone, bip);
769
- return error;
770
- }
771
-
739
+ xfs_buf_item_get_format(bip, bp->b_map_count);
772740
773741 for (i = 0; i < bip->bli_format_count; i++) {
774742 chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len),
775743 XFS_BLF_CHUNK);
776744 map_size = DIV_ROUND_UP(chunks, NBWORD);
745
+
746
+ if (map_size > XFS_BLF_DATAMAP_SIZE) {
747
+ kmem_cache_free(xfs_buf_item_zone, bip);
748
+ xfs_err(mp,
749
+ "buffer item dirty bitmap (%u uints) too small to reflect %u bytes!",
750
+ map_size,
751
+ BBTOB(bp->b_maps[i].bm_len));
752
+ return -EFSCORRUPTED;
753
+ }
777754
778755 bip->bli_formats[i].blf_type = XFS_LI_BUF;
779756 bip->bli_formats[i].blf_blkno = bp->b_maps[i].bm_bn;
....@@ -806,6 +783,9 @@
806783 uint bit;
807784 uint end_bit;
808785 uint mask;
786
+
787
+ ASSERT(first < XFS_BLF_DATAMAP_SIZE * XFS_BLF_CHUNK * NBWORD);
788
+ ASSERT(last < XFS_BLF_DATAMAP_SIZE * XFS_BLF_CHUNK * NBWORD);
809789
810790 /*
811791 * Convert byte offsets to bit numbers.
....@@ -853,7 +833,7 @@
853833 * first_bit and last_bit.
854834 */
855835 while ((bits_to_set - bits_set) >= NBWORD) {
856
- *wordp |= 0xffffffff;
836
+ *wordp = 0xffffffff;
857837 bits_set += NBWORD;
858838 wordp++;
859839 }
....@@ -941,15 +921,11 @@
941921 {
942922 xfs_buf_item_free_format(bip);
943923 kmem_free(bip->bli_item.li_lv_shadow);
944
- kmem_zone_free(xfs_buf_item_zone, bip);
924
+ kmem_cache_free(xfs_buf_item_zone, bip);
945925 }
946926
947927 /*
948
- * This is called when the buf log item is no longer needed. It should
949
- * free the buf log item associated with the given buffer and clear
950
- * the buffer's pointer to the buf log item. If there are no more
951
- * items in the list, clear the b_iodone field of the buffer (see
952
- * xfs_buf_attach_iodone() below).
928
+ * xfs_buf_item_relse() is called when the buf log item is no longer needed.
953929 */
954930 void
955931 xfs_buf_item_relse(
....@@ -958,315 +934,31 @@
958934 struct xfs_buf_log_item *bip = bp->b_log_item;
959935
960936 trace_xfs_buf_item_relse(bp, _RET_IP_);
961
- ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
937
+ ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
962938
963939 bp->b_log_item = NULL;
964
- if (list_empty(&bp->b_li_list))
965
- bp->b_iodone = NULL;
966
-
967940 xfs_buf_rele(bp);
968941 xfs_buf_item_free(bip);
969942 }
970943
971
-
972
-/*
973
- * Add the given log item with its callback to the list of callbacks
974
- * to be called when the buffer's I/O completes. If it is not set
975
- * already, set the buffer's b_iodone() routine to be
976
- * xfs_buf_iodone_callbacks() and link the log item into the list of
977
- * items rooted at b_li_list.
978
- */
979944 void
980
-xfs_buf_attach_iodone(
981
- xfs_buf_t *bp,
982
- void (*cb)(xfs_buf_t *, xfs_log_item_t *),
983
- xfs_log_item_t *lip)
984
-{
985
- ASSERT(xfs_buf_islocked(bp));
986
-
987
- lip->li_cb = cb;
988
- list_add_tail(&lip->li_bio_list, &bp->b_li_list);
989
-
990
- ASSERT(bp->b_iodone == NULL ||
991
- bp->b_iodone == xfs_buf_iodone_callbacks);
992
- bp->b_iodone = xfs_buf_iodone_callbacks;
993
-}
994
-
995
-/*
996
- * We can have many callbacks on a buffer. Running the callbacks individually
997
- * can cause a lot of contention on the AIL lock, so we allow for a single
998
- * callback to be able to scan the remaining items in bp->b_li_list for other
999
- * items of the same type and callback to be processed in the first call.
1000
- *
1001
- * As a result, the loop walking the callback list below will also modify the
1002
- * list. it removes the first item from the list and then runs the callback.
1003
- * The loop then restarts from the new first item int the list. This allows the
1004
- * callback to scan and modify the list attached to the buffer and we don't
1005
- * have to care about maintaining a next item pointer.
1006
- */
1007
-STATIC void
1008
-xfs_buf_do_callbacks(
1009
- struct xfs_buf *bp)
1010
-{
1011
- struct xfs_buf_log_item *blip = bp->b_log_item;
1012
- struct xfs_log_item *lip;
1013
-
1014
- /* If there is a buf_log_item attached, run its callback */
1015
- if (blip) {
1016
- lip = &blip->bli_item;
1017
- lip->li_cb(bp, lip);
1018
- }
1019
-
1020
- while (!list_empty(&bp->b_li_list)) {
1021
- lip = list_first_entry(&bp->b_li_list, struct xfs_log_item,
1022
- li_bio_list);
1023
-
1024
- /*
1025
- * Remove the item from the list, so we don't have any
1026
- * confusion if the item is added to another buf.
1027
- * Don't touch the log item after calling its
1028
- * callback, because it could have freed itself.
1029
- */
1030
- list_del_init(&lip->li_bio_list);
1031
- lip->li_cb(bp, lip);
1032
- }
1033
-}
1034
-
1035
-/*
1036
- * Invoke the error state callback for each log item affected by the failed I/O.
1037
- *
1038
- * If a metadata buffer write fails with a non-permanent error, the buffer is
1039
- * eventually resubmitted and so the completion callbacks are not run. The error
1040
- * state may need to be propagated to the log items attached to the buffer,
1041
- * however, so the next AIL push of the item knows hot to handle it correctly.
1042
- */
1043
-STATIC void
1044
-xfs_buf_do_callbacks_fail(
1045
- struct xfs_buf *bp)
1046
-{
1047
- struct xfs_log_item *lip;
1048
- struct xfs_ail *ailp;
1049
-
1050
- /*
1051
- * Buffer log item errors are handled directly by xfs_buf_item_push()
1052
- * and xfs_buf_iodone_callback_error, and they have no IO error
1053
- * callbacks. Check only for items in b_li_list.
1054
- */
1055
- if (list_empty(&bp->b_li_list))
1056
- return;
1057
-
1058
- lip = list_first_entry(&bp->b_li_list, struct xfs_log_item,
1059
- li_bio_list);
1060
- ailp = lip->li_ailp;
1061
- spin_lock(&ailp->ail_lock);
1062
- list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
1063
- if (lip->li_ops->iop_error)
1064
- lip->li_ops->iop_error(lip, bp);
1065
- }
1066
- spin_unlock(&ailp->ail_lock);
1067
-}
1068
-
1069
-static bool
1070
-xfs_buf_iodone_callback_error(
1071
- struct xfs_buf *bp)
1072
-{
1073
- struct xfs_buf_log_item *bip = bp->b_log_item;
1074
- struct xfs_log_item *lip;
1075
- struct xfs_mount *mp;
1076
- static ulong lasttime;
1077
- static xfs_buftarg_t *lasttarg;
1078
- struct xfs_error_cfg *cfg;
1079
-
1080
- /*
1081
- * The failed buffer might not have a buf_log_item attached or the
1082
- * log_item list might be empty. Get the mp from the available
1083
- * xfs_log_item
1084
- */
1085
- lip = list_first_entry_or_null(&bp->b_li_list, struct xfs_log_item,
1086
- li_bio_list);
1087
- mp = lip ? lip->li_mountp : bip->bli_item.li_mountp;
1088
-
1089
- /*
1090
- * If we've already decided to shutdown the filesystem because of
1091
- * I/O errors, there's no point in giving this a retry.
1092
- */
1093
- if (XFS_FORCED_SHUTDOWN(mp))
1094
- goto out_stale;
1095
-
1096
- if (bp->b_target != lasttarg ||
1097
- time_after(jiffies, (lasttime + 5*HZ))) {
1098
- lasttime = jiffies;
1099
- xfs_buf_ioerror_alert(bp, __func__);
1100
- }
1101
- lasttarg = bp->b_target;
1102
-
1103
- /* synchronous writes will have callers process the error */
1104
- if (!(bp->b_flags & XBF_ASYNC))
1105
- goto out_stale;
1106
-
1107
- trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
1108
- ASSERT(bp->b_iodone != NULL);
1109
-
1110
- cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
1111
-
1112
- /*
1113
- * If the write was asynchronous then no one will be looking for the
1114
- * error. If this is the first failure of this type, clear the error
1115
- * state and write the buffer out again. This means we always retry an
1116
- * async write failure at least once, but we also need to set the buffer
1117
- * up to behave correctly now for repeated failures.
1118
- */
1119
- if (!(bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) ||
1120
- bp->b_last_error != bp->b_error) {
1121
- bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
1122
- bp->b_last_error = bp->b_error;
1123
- if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
1124
- !bp->b_first_retry_time)
1125
- bp->b_first_retry_time = jiffies;
1126
-
1127
- xfs_buf_ioerror(bp, 0);
1128
- xfs_buf_submit(bp);
1129
- return true;
1130
- }
1131
-
1132
- /*
1133
- * Repeated failure on an async write. Take action according to the
1134
- * error configuration we have been set up to use.
1135
- */
1136
-
1137
- if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
1138
- ++bp->b_retries > cfg->max_retries)
1139
- goto permanent_error;
1140
- if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
1141
- time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
1142
- goto permanent_error;
1143
-
1144
- /* At unmount we may treat errors differently */
1145
- if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
1146
- goto permanent_error;
1147
-
1148
- /*
1149
- * Still a transient error, run IO completion failure callbacks and let
1150
- * the higher layers retry the buffer.
1151
- */
1152
- xfs_buf_do_callbacks_fail(bp);
1153
- xfs_buf_ioerror(bp, 0);
1154
- xfs_buf_relse(bp);
1155
- return true;
1156
-
1157
- /*
1158
- * Permanent error - we need to trigger a shutdown if we haven't already
1159
- * to indicate that inconsistency will result from this action.
1160
- */
1161
-permanent_error:
1162
- xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1163
-out_stale:
1164
- xfs_buf_stale(bp);
1165
- bp->b_flags |= XBF_DONE;
1166
- trace_xfs_buf_error_relse(bp, _RET_IP_);
1167
- return false;
1168
-}
1169
-
1170
-/*
1171
- * This is the iodone() function for buffers which have had callbacks attached
1172
- * to them by xfs_buf_attach_iodone(). We need to iterate the items on the
1173
- * callback list, mark the buffer as having no more callbacks and then push the
1174
- * buffer through IO completion processing.
1175
- */
1176
-void
1177
-xfs_buf_iodone_callbacks(
945
+xfs_buf_item_done(
1178946 struct xfs_buf *bp)
1179947 {
1180948 /*
1181
- * If there is an error, process it. Some errors require us
1182
- * to run callbacks after failure processing is done so we
1183
- * detect that and take appropriate action.
1184
- */
1185
- if (bp->b_error && xfs_buf_iodone_callback_error(bp))
1186
- return;
1187
-
1188
- /*
1189
- * Successful IO or permanent error. Either way, we can clear the
1190
- * retry state here in preparation for the next error that may occur.
1191
- */
1192
- bp->b_last_error = 0;
1193
- bp->b_retries = 0;
1194
- bp->b_first_retry_time = 0;
1195
-
1196
- xfs_buf_do_callbacks(bp);
1197
- bp->b_log_item = NULL;
1198
- list_del_init(&bp->b_li_list);
1199
- bp->b_iodone = NULL;
1200
- xfs_buf_ioend(bp);
1201
-}
1202
-
1203
-/*
1204
- * This is the iodone() function for buffers which have been
1205
- * logged. It is called when they are eventually flushed out.
1206
- * It should remove the buf item from the AIL, and free the buf item.
1207
- * It is called by xfs_buf_iodone_callbacks() above which will take
1208
- * care of cleaning up the buffer itself.
1209
- */
1210
-void
1211
-xfs_buf_iodone(
1212
- struct xfs_buf *bp,
1213
- struct xfs_log_item *lip)
1214
-{
1215
- struct xfs_ail *ailp = lip->li_ailp;
1216
-
1217
- ASSERT(BUF_ITEM(lip)->bli_buf == bp);
1218
-
1219
- xfs_buf_rele(bp);
1220
-
1221
- /*
1222
- * If we are forcibly shutting down, this may well be
1223
- * off the AIL already. That's because we simulate the
1224
- * log-committed callbacks to unpin these buffers. Or we may never
1225
- * have put this item on AIL because of the transaction was
1226
- * aborted forcibly. xfs_trans_ail_delete() takes care of these.
949
+ * If we are forcibly shutting down, this may well be off the AIL
950
+ * already. That's because we simulate the log-committed callbacks to
951
+ * unpin these buffers. Or we may never have put this item on AIL
952
+ * because of the transaction was aborted forcibly.
953
+ * xfs_trans_ail_delete() takes care of these.
1227954 *
1228955 * Either way, AIL is useless if we're forcing a shutdown.
956
+ *
957
+ * Note that log recovery writes might have buffer items that are not on
958
+ * the AIL even when the file system is not shut down.
1229959 */
1230
- spin_lock(&ailp->ail_lock);
1231
- xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
1232
- xfs_buf_item_free(BUF_ITEM(lip));
1233
-}
1234
-
1235
-/*
1236
- * Requeue a failed buffer for writeback.
1237
- *
1238
- * We clear the log item failed state here as well, but we have to be careful
1239
- * about reference counts because the only active reference counts on the buffer
1240
- * may be the failed log items. Hence if we clear the log item failed state
1241
- * before queuing the buffer for IO we can release all active references to
1242
- * the buffer and free it, leading to use after free problems in
1243
- * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which
1244
- * order we process them in - the buffer is locked, and we own the buffer list
1245
- * so nothing on them is going to change while we are performing this action.
1246
- *
1247
- * Hence we can safely queue the buffer for IO before we clear the failed log
1248
- * item state, therefore always having an active reference to the buffer and
1249
- * avoiding the transient zero-reference state that leads to use-after-free.
1250
- *
1251
- * Return true if the buffer was added to the buffer list, false if it was
1252
- * already on the buffer list.
1253
- */
1254
-bool
1255
-xfs_buf_resubmit_failed_buffers(
1256
- struct xfs_buf *bp,
1257
- struct list_head *buffer_list)
1258
-{
1259
- struct xfs_log_item *lip;
1260
- bool ret;
1261
-
1262
- ret = xfs_buf_delwri_queue(bp, buffer_list);
1263
-
1264
- /*
1265
- * XFS_LI_FAILED set/clear is protected by ail_lock, caller of this
1266
- * function already have it acquired
1267
- */
1268
- list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
1269
- xfs_clear_li_failed(lip);
1270
-
1271
- return ret;
960
+ xfs_trans_ail_delete(&bp->b_log_item->bli_item,
961
+ (bp->b_flags & _XBF_LOGRECOVERY) ? 0 :
962
+ SHUTDOWN_CORRUPT_INCORE);
963
+ xfs_buf_item_relse(bp);
1272964 }