hc
2024-05-10 9999e48639b3cecb08ffb37358bcba3b48161b29
kernel/fs/xfs/xfs_trans_ail.c
....@@ -6,6 +6,7 @@
66 */
77 #include "xfs.h"
88 #include "xfs_fs.h"
9
+#include "xfs_shared.h"
910 #include "xfs_format.h"
1011 #include "xfs_log_format.h"
1112 #include "xfs_trans_resv.h"
....@@ -31,6 +32,7 @@
3132 xfs_ail_check(
3233 struct xfs_ail *ailp,
3334 struct xfs_log_item *lip)
35
+ __must_hold(&ailp->ail_lock)
3436 {
3537 struct xfs_log_item *prev_lip;
3638 struct xfs_log_item *next_lip;
....@@ -74,29 +76,29 @@
7476 * Return a pointer to the last item in the AIL. If the AIL is empty, then
7577 * return NULL.
7678 */
77
-static xfs_log_item_t *
79
+static struct xfs_log_item *
7880 xfs_ail_max(
7981 struct xfs_ail *ailp)
8082 {
8183 if (list_empty(&ailp->ail_head))
8284 return NULL;
8385
84
- return list_entry(ailp->ail_head.prev, xfs_log_item_t, li_ail);
86
+ return list_entry(ailp->ail_head.prev, struct xfs_log_item, li_ail);
8587 }
8688
8789 /*
8890 * Return a pointer to the item which follows the given item in the AIL. If
8991 * the given item is the last item in the list, then return NULL.
9092 */
91
-static xfs_log_item_t *
93
+static struct xfs_log_item *
9294 xfs_ail_next(
93
- struct xfs_ail *ailp,
94
- xfs_log_item_t *lip)
95
+ struct xfs_ail *ailp,
96
+ struct xfs_log_item *lip)
9597 {
9698 if (lip->li_ail.next == &ailp->ail_head)
9799 return NULL;
98100
99
- return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
101
+ return list_first_entry(&lip->li_ail, struct xfs_log_item, li_ail);
100102 }
101103
102104 /*
....@@ -107,17 +109,25 @@
107109 * We need the AIL lock in order to get a coherent read of the lsn of the last
108110 * item in the AIL.
109111 */
112
+static xfs_lsn_t
113
+__xfs_ail_min_lsn(
114
+ struct xfs_ail *ailp)
115
+{
116
+ struct xfs_log_item *lip = xfs_ail_min(ailp);
117
+
118
+ if (lip)
119
+ return lip->li_lsn;
120
+ return 0;
121
+}
122
+
110123 xfs_lsn_t
111124 xfs_ail_min_lsn(
112
- struct xfs_ail *ailp)
125
+ struct xfs_ail *ailp)
113126 {
114
- xfs_lsn_t lsn = 0;
115
- xfs_log_item_t *lip;
127
+ xfs_lsn_t lsn;
116128
117129 spin_lock(&ailp->ail_lock);
118
- lip = xfs_ail_min(ailp);
119
- if (lip)
120
- lsn = lip->li_lsn;
130
+ lsn = __xfs_ail_min_lsn(ailp);
121131 spin_unlock(&ailp->ail_lock);
122132
123133 return lsn;
....@@ -128,10 +138,10 @@
128138 */
129139 static xfs_lsn_t
130140 xfs_ail_max_lsn(
131
- struct xfs_ail *ailp)
141
+ struct xfs_ail *ailp)
132142 {
133
- xfs_lsn_t lsn = 0;
134
- xfs_log_item_t *lip;
143
+ xfs_lsn_t lsn = 0;
144
+ struct xfs_log_item *lip;
135145
136146 spin_lock(&ailp->ail_lock);
137147 lip = xfs_ail_max(ailp);
....@@ -216,13 +226,13 @@
216226 * ascending traversal. Pass a @lsn of zero to initialise the cursor to the
217227 * first item in the AIL. Returns NULL if the list is empty.
218228 */
219
-xfs_log_item_t *
229
+struct xfs_log_item *
220230 xfs_trans_ail_cursor_first(
221231 struct xfs_ail *ailp,
222232 struct xfs_ail_cursor *cur,
223233 xfs_lsn_t lsn)
224234 {
225
- xfs_log_item_t *lip;
235
+ struct xfs_log_item *lip;
226236
227237 xfs_trans_ail_cursor_init(ailp, cur);
228238
....@@ -248,7 +258,7 @@
248258 struct xfs_ail *ailp,
249259 xfs_lsn_t lsn)
250260 {
251
- xfs_log_item_t *lip;
261
+ struct xfs_log_item *lip;
252262
253263 list_for_each_entry_reverse(lip, &ailp->ail_head, li_ail) {
254264 if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
....@@ -327,12 +337,55 @@
327337 */
328338 static void
329339 xfs_ail_delete(
330
- struct xfs_ail *ailp,
331
- xfs_log_item_t *lip)
340
+ struct xfs_ail *ailp,
341
+ struct xfs_log_item *lip)
332342 {
333343 xfs_ail_check(ailp, lip);
334344 list_del(&lip->li_ail);
335345 xfs_trans_ail_cursor_clear(ailp, lip);
346
+}
347
+
348
+/*
349
+ * Requeue a failed buffer for writeback.
350
+ *
351
+ * We clear the log item failed state here as well, but we have to be careful
352
+ * about reference counts because the only active reference counts on the buffer
353
+ * may be the failed log items. Hence if we clear the log item failed state
354
+ * before queuing the buffer for IO we can release all active references to
355
+ * the buffer and free it, leading to use after free problems in
356
+ * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which
357
+ * order we process them in - the buffer is locked, and we own the buffer list
358
+ * so nothing on them is going to change while we are performing this action.
359
+ *
360
+ * Hence we can safely queue the buffer for IO before we clear the failed log
361
+ * item state, therefore always having an active reference to the buffer and
362
+ * avoiding the transient zero-reference state that leads to use-after-free.
363
+ */
364
+static inline int
365
+xfsaild_resubmit_item(
366
+ struct xfs_log_item *lip,
367
+ struct list_head *buffer_list)
368
+{
369
+ struct xfs_buf *bp = lip->li_buf;
370
+
371
+ if (!xfs_buf_trylock(bp))
372
+ return XFS_ITEM_LOCKED;
373
+
374
+ if (!xfs_buf_delwri_queue(bp, buffer_list)) {
375
+ xfs_buf_unlock(bp);
376
+ return XFS_ITEM_FLUSHING;
377
+ }
378
+
379
+ /* protected by ail_lock */
380
+ list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
381
+ if (bp->b_flags & _XBF_INODES)
382
+ clear_bit(XFS_LI_FAILED, &lip->li_flags);
383
+ else
384
+ xfs_clear_li_failed(lip);
385
+ }
386
+
387
+ xfs_buf_unlock(bp);
388
+ return XFS_ITEM_SUCCESS;
336389 }
337390
338391 static inline uint
....@@ -347,6 +400,16 @@
347400 if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN))
348401 return XFS_ITEM_PINNED;
349402
403
+ /*
404
+ * Consider the item pinned if a push callback is not defined so the
405
+ * caller will force the log. This should only happen for intent items
406
+ * as they are unpinned once the associated done item is committed to
407
+ * the on-disk log.
408
+ */
409
+ if (!lip->li_ops->iop_push)
410
+ return XFS_ITEM_PINNED;
411
+ if (test_bit(XFS_LI_FAILED, &lip->li_flags))
412
+ return xfsaild_resubmit_item(lip, &ailp->ail_buf_list);
350413 return lip->li_ops->iop_push(lip, &ailp->ail_buf_list);
351414 }
352415
....@@ -356,7 +419,7 @@
356419 {
357420 xfs_mount_t *mp = ailp->ail_mount;
358421 struct xfs_ail_cursor cur;
359
- xfs_log_item_t *lip;
422
+ struct xfs_log_item *lip;
360423 xfs_lsn_t lsn;
361424 xfs_lsn_t target;
362425 long tout;
....@@ -385,16 +448,10 @@
385448 target = ailp->ail_target;
386449 ailp->ail_target_prev = target;
387450
451
+ /* we're done if the AIL is empty or our push has reached the end */
388452 lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->ail_last_pushed_lsn);
389
- if (!lip) {
390
- /*
391
- * If the AIL is empty or our push has reached the end we are
392
- * done now.
393
- */
394
- xfs_trans_ail_cursor_done(&cur);
395
- spin_unlock(&ailp->ail_lock);
453
+ if (!lip)
396454 goto out_done;
397
- }
398455
399456 XFS_STATS_INC(mp, xs_push_ail);
400457
....@@ -418,15 +475,15 @@
418475
419476 case XFS_ITEM_FLUSHING:
420477 /*
421
- * The item or its backing buffer is already beeing
478
+ * The item or its backing buffer is already being
422479 * flushed. The typical reason for that is that an
423480 * inode buffer is locked because we already pushed the
424481 * updates to it as part of inode clustering.
425482 *
426
- * We do not want to to stop flushing just because lots
427
- * of items are already beeing flushed, but we need to
483
+ * We do not want to stop flushing just because lots
484
+ * of items are already being flushed, but we need to
428485 * re-try the flushing relatively soon if most of the
429
- * AIL is beeing flushed.
486
+ * AIL is being flushed.
430487 */
431488 XFS_STATS_INC(mp, xs_push_ail_flushing);
432489 trace_xfs_ail_flushing(lip);
....@@ -458,7 +515,7 @@
458515 /*
459516 * Are there too many items we can't do anything with?
460517 *
461
- * If we we are skipping too many items because we can't flush
518
+ * If we are skipping too many items because we can't flush
462519 * them or they are already being flushed, we back off and
463520 * given them time to complete whatever operation is being
464521 * done. i.e. remove pressure from the AIL while we can't make
....@@ -476,6 +533,8 @@
476533 break;
477534 lsn = lip->li_lsn;
478535 }
536
+
537
+out_done:
479538 xfs_trans_ail_cursor_done(&cur);
480539 spin_unlock(&ailp->ail_lock);
481540
....@@ -483,7 +542,6 @@
483542 ailp->ail_log_flush++;
484543
485544 if (!count || XFS_LSN_CMP(lsn, target) >= 0) {
486
-out_done:
487545 /*
488546 * We reached the target or the AIL is empty, so wait a bit
489547 * longer for I/O to complete and remove pushed items from the
....@@ -575,7 +633,8 @@
575633 */
576634 smp_rmb();
577635 if (!xfs_ail_min(ailp) &&
578
- ailp->ail_target == ailp->ail_target_prev) {
636
+ ailp->ail_target == ailp->ail_target_prev &&
637
+ list_empty(&ailp->ail_buf_list)) {
579638 spin_unlock(&ailp->ail_lock);
580639 freezable_schedule();
581640 tout = 0;
....@@ -605,7 +664,7 @@
605664 * The push is run asynchronously in a workqueue, which means the caller needs
606665 * to handle waiting on the async flush for space to become available.
607666 * We don't want to interrupt any push that is in progress, hence we only queue
608
- * work if we set the pushing bit approriately.
667
+ * work if we set the pushing bit appropriately.
609668 *
610669 * We do this unlocked - we only need to know whether there is anything in the
611670 * AIL at the time we are called. We don't need to access the contents of
....@@ -613,10 +672,10 @@
613672 */
614673 void
615674 xfs_ail_push(
616
- struct xfs_ail *ailp,
617
- xfs_lsn_t threshold_lsn)
675
+ struct xfs_ail *ailp,
676
+ xfs_lsn_t threshold_lsn)
618677 {
619
- xfs_log_item_t *lip;
678
+ struct xfs_log_item *lip;
620679
621680 lip = xfs_ail_min(ailp);
622681 if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) ||
....@@ -671,6 +730,28 @@
671730 finish_wait(&ailp->ail_empty, &wait);
672731 }
673732
733
+void
734
+xfs_ail_update_finish(
735
+ struct xfs_ail *ailp,
736
+ xfs_lsn_t old_lsn) __releases(ailp->ail_lock)
737
+{
738
+ struct xfs_mount *mp = ailp->ail_mount;
739
+
740
+ /* if the tail lsn hasn't changed, don't do updates or wakeups. */
741
+ if (!old_lsn || old_lsn == __xfs_ail_min_lsn(ailp)) {
742
+ spin_unlock(&ailp->ail_lock);
743
+ return;
744
+ }
745
+
746
+ if (!XFS_FORCED_SHUTDOWN(mp))
747
+ xlog_assign_tail_lsn_locked(mp);
748
+
749
+ if (list_empty(&ailp->ail_head))
750
+ wake_up_all(&ailp->ail_empty);
751
+ spin_unlock(&ailp->ail_lock);
752
+ xfs_log_space_wake(mp);
753
+}
754
+
674755 /*
675756 * xfs_trans_ail_update - bulk AIL insertion operation.
676757 *
....@@ -701,8 +782,8 @@
701782 int nr_items,
702783 xfs_lsn_t lsn) __releases(ailp->ail_lock)
703784 {
704
- xfs_log_item_t *mlip;
705
- int mlip_changed = 0;
785
+ struct xfs_log_item *mlip;
786
+ xfs_lsn_t tail_lsn = 0;
706787 int i;
707788 LIST_HEAD(tmp);
708789
....@@ -717,9 +798,10 @@
717798 continue;
718799
719800 trace_xfs_ail_move(lip, lip->li_lsn, lsn);
801
+ if (mlip == lip && !tail_lsn)
802
+ tail_lsn = lip->li_lsn;
803
+
720804 xfs_ail_delete(ailp, lip);
721
- if (mlip == lip)
722
- mlip_changed = 1;
723805 } else {
724806 trace_xfs_ail_insert(lip, 0, lsn);
725807 }
....@@ -730,66 +812,58 @@
730812 if (!list_empty(&tmp))
731813 xfs_ail_splice(ailp, cur, &tmp, lsn);
732814
733
- if (mlip_changed) {
734
- if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
735
- xlog_assign_tail_lsn_locked(ailp->ail_mount);
736
- spin_unlock(&ailp->ail_lock);
737
-
738
- xfs_log_space_wake(ailp->ail_mount);
739
- } else {
740
- spin_unlock(&ailp->ail_lock);
741
- }
815
+ xfs_ail_update_finish(ailp, tail_lsn);
742816 }
743817
744
-bool
818
+/* Insert a log item into the AIL. */
819
+void
820
+xfs_trans_ail_insert(
821
+ struct xfs_ail *ailp,
822
+ struct xfs_log_item *lip,
823
+ xfs_lsn_t lsn)
824
+{
825
+ spin_lock(&ailp->ail_lock);
826
+ xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
827
+}
828
+
829
+/*
830
+ * Delete one log item from the AIL.
831
+ *
832
+ * If this item was at the tail of the AIL, return the LSN of the log item so
833
+ * that we can use it to check if the LSN of the tail of the log has moved
834
+ * when finishing up the AIL delete process in xfs_ail_update_finish().
835
+ */
836
+xfs_lsn_t
745837 xfs_ail_delete_one(
746838 struct xfs_ail *ailp,
747839 struct xfs_log_item *lip)
748840 {
749841 struct xfs_log_item *mlip = xfs_ail_min(ailp);
842
+ xfs_lsn_t lsn = lip->li_lsn;
750843
751844 trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
752845 xfs_ail_delete(ailp, lip);
753
- xfs_clear_li_failed(lip);
754846 clear_bit(XFS_LI_IN_AIL, &lip->li_flags);
755847 lip->li_lsn = 0;
756848
757
- return mlip == lip;
849
+ if (mlip == lip)
850
+ return lsn;
851
+ return 0;
758852 }
759853
760
-/**
761
- * Remove a log items from the AIL
762
- *
763
- * @xfs_trans_ail_delete_bulk takes an array of log items that all need to
764
- * removed from the AIL. The caller is already holding the AIL lock, and done
765
- * all the checks necessary to ensure the items passed in via @log_items are
766
- * ready for deletion. This includes checking that the items are in the AIL.
767
- *
768
- * For each log item to be removed, unlink it from the AIL, clear the IN_AIL
769
- * flag from the item and reset the item's lsn to 0. If we remove the first
770
- * item in the AIL, update the log tail to match the new minimum LSN in the
771
- * AIL.
772
- *
773
- * This function will not drop the AIL lock until all items are removed from
774
- * the AIL to minimise the amount of lock traffic on the AIL. This does not
775
- * greatly increase the AIL hold time, but does significantly reduce the amount
776
- * of traffic on the lock, especially during IO completion.
777
- *
778
- * This function must be called with the AIL lock held. The lock is dropped
779
- * before returning.
780
- */
781854 void
782855 xfs_trans_ail_delete(
783
- struct xfs_ail *ailp,
784856 struct xfs_log_item *lip,
785
- int shutdown_type) __releases(ailp->ail_lock)
857
+ int shutdown_type)
786858 {
859
+ struct xfs_ail *ailp = lip->li_ailp;
787860 struct xfs_mount *mp = ailp->ail_mount;
788
- bool mlip_changed;
861
+ xfs_lsn_t tail_lsn;
789862
863
+ spin_lock(&ailp->ail_lock);
790864 if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
791865 spin_unlock(&ailp->ail_lock);
792
- if (!XFS_FORCED_SHUTDOWN(mp)) {
866
+ if (shutdown_type && !XFS_FORCED_SHUTDOWN(mp)) {
793867 xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
794868 "%s: attempting to delete a log item that is not in the AIL",
795869 __func__);
....@@ -798,17 +872,10 @@
798872 return;
799873 }
800874
801
- mlip_changed = xfs_ail_delete_one(ailp, lip);
802
- if (mlip_changed) {
803
- if (!XFS_FORCED_SHUTDOWN(mp))
804
- xlog_assign_tail_lsn_locked(mp);
805
- if (list_empty(&ailp->ail_head))
806
- wake_up_all(&ailp->ail_empty);
807
- }
808
-
809
- spin_unlock(&ailp->ail_lock);
810
- if (mlip_changed)
811
- xfs_log_space_wake(ailp->ail_mount);
875
+ /* xfs_ail_update_finish() drops the AIL lock */
876
+ xfs_clear_li_failed(lip);
877
+ tail_lsn = xfs_ail_delete_one(ailp, lip);
878
+ xfs_ail_update_finish(ailp, tail_lsn);
812879 }
813880
814881 int
....@@ -829,7 +896,7 @@
829896 init_waitqueue_head(&ailp->ail_empty);
830897
831898 ailp->ail_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
832
- ailp->ail_mount->m_fsname);
899
+ ailp->ail_mount->m_super->s_id);
833900 if (IS_ERR(ailp->ail_task))
834901 goto out_free_ailp;
835902