hc
2024-09-20 a36159eec6ca17402b0e146b86efaf76568dc353
kernel/fs/xfs/libxfs/xfs_defer.c
....@@ -9,8 +9,6 @@
99 #include "xfs_format.h"
1010 #include "xfs_log_format.h"
1111 #include "xfs_trans_resv.h"
12
-#include "xfs_bit.h"
13
-#include "xfs_sb.h"
1412 #include "xfs_mount.h"
1513 #include "xfs_defer.h"
1614 #include "xfs_trans.h"
....@@ -18,6 +16,8 @@
1816 #include "xfs_inode.h"
1917 #include "xfs_inode_item.h"
2018 #include "xfs_trace.h"
19
+#include "xfs_icache.h"
20
+#include "xfs_log.h"
2121
2222 /*
2323 * Deferred Operations in XFS
....@@ -172,7 +172,26 @@
172172 * reoccur.
173173 */
174174
175
-static const struct xfs_defer_op_type *defer_op_types[XFS_DEFER_OPS_TYPE_MAX];
175
+static const struct xfs_defer_op_type *defer_op_types[] = {
176
+ [XFS_DEFER_OPS_TYPE_BMAP] = &xfs_bmap_update_defer_type,
177
+ [XFS_DEFER_OPS_TYPE_REFCOUNT] = &xfs_refcount_update_defer_type,
178
+ [XFS_DEFER_OPS_TYPE_RMAP] = &xfs_rmap_update_defer_type,
179
+ [XFS_DEFER_OPS_TYPE_FREE] = &xfs_extent_free_defer_type,
180
+ [XFS_DEFER_OPS_TYPE_AGFL_FREE] = &xfs_agfl_free_defer_type,
181
+};
182
+
183
+static void
184
+xfs_defer_create_intent(
185
+ struct xfs_trans *tp,
186
+ struct xfs_defer_pending *dfp,
187
+ bool sort)
188
+{
189
+ const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type];
190
+
191
+ if (!dfp->dfp_intent)
192
+ dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work,
193
+ dfp->dfp_count, sort);
194
+}
176195
177196 /*
178197 * For each pending item in the intake list, log its intent item and the
....@@ -183,17 +202,11 @@
183202 xfs_defer_create_intents(
184203 struct xfs_trans *tp)
185204 {
186
- struct list_head *li;
187205 struct xfs_defer_pending *dfp;
188206
189207 list_for_each_entry(dfp, &tp->t_dfops, dfp_list) {
190
- dfp->dfp_intent = dfp->dfp_type->create_intent(tp,
191
- dfp->dfp_count);
192208 trace_xfs_defer_create_intent(tp->t_mountp, dfp);
193
- list_sort(tp->t_mountp, &dfp->dfp_work,
194
- dfp->dfp_type->diff_items);
195
- list_for_each(li, &dfp->dfp_work)
196
- dfp->dfp_type->log_item(tp, dfp->dfp_intent, li);
209
+ xfs_defer_create_intent(tp, dfp, true);
197210 }
198211 }
199212
....@@ -204,14 +217,16 @@
204217 struct list_head *dop_pending)
205218 {
206219 struct xfs_defer_pending *dfp;
220
+ const struct xfs_defer_op_type *ops;
207221
208222 trace_xfs_defer_trans_abort(tp, _RET_IP_);
209223
210224 /* Abort intent items that don't have a done item. */
211225 list_for_each_entry(dfp, dop_pending, dfp_list) {
226
+ ops = defer_op_types[dfp->dfp_type];
212227 trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
213228 if (dfp->dfp_intent && !dfp->dfp_done) {
214
- dfp->dfp_type->abort_intent(dfp->dfp_intent);
229
+ ops->abort_intent(dfp->dfp_intent);
215230 dfp->dfp_intent = NULL;
216231 }
217232 }
....@@ -228,9 +243,12 @@
228243 struct xfs_log_item *lip;
229244 struct xfs_buf *bplist[XFS_DEFER_OPS_NR_BUFS];
230245 struct xfs_inode *iplist[XFS_DEFER_OPS_NR_INODES];
246
+ unsigned int ordered = 0; /* bitmap */
231247 int bpcount = 0, ipcount = 0;
232248 int i;
233249 int error;
250
+
251
+ BUILD_BUG_ON(NBBY * sizeof(ordered) < XFS_DEFER_OPS_NR_BUFS);
234252
235253 list_for_each_entry(lip, &tp->t_items, li_trans) {
236254 switch (lip->li_type) {
....@@ -242,7 +260,10 @@
242260 ASSERT(0);
243261 return -EFSCORRUPTED;
244262 }
245
- xfs_trans_dirty_buf(tp, bli->bli_buf);
263
+ if (bli->bli_flags & XFS_BLI_ORDERED)
264
+ ordered |= (1U << bpcount);
265
+ else
266
+ xfs_trans_dirty_buf(tp, bli->bli_buf);
246267 bplist[bpcount++] = bli->bli_buf;
247268 }
248269 break;
....@@ -283,28 +304,14 @@
283304 /* Rejoin the buffers and dirty them so the log moves forward. */
284305 for (i = 0; i < bpcount; i++) {
285306 xfs_trans_bjoin(tp, bplist[i]);
307
+ if (ordered & (1U << i))
308
+ xfs_trans_ordered_buf(tp, bplist[i]);
286309 xfs_trans_bhold(tp, bplist[i]);
287310 }
288311
289312 if (error)
290313 trace_xfs_defer_trans_roll_error(tp, error);
291314 return error;
292
-}
293
-
294
-/*
295
- * Reset an already used dfops after finish.
296
- */
297
-static void
298
-xfs_defer_reset(
299
- struct xfs_trans *tp)
300
-{
301
- ASSERT(list_empty(&tp->t_dfops));
302
-
303
- /*
304
- * Low mode state transfers across transaction rolls to mirror dfops
305
- * lifetime. Clear it now that dfops is reset.
306
- */
307
- tp->t_flags &= ~XFS_TRANS_LOWMODE;
308315 }
309316
310317 /*
....@@ -319,22 +326,124 @@
319326 struct xfs_defer_pending *pli;
320327 struct list_head *pwi;
321328 struct list_head *n;
329
+ const struct xfs_defer_op_type *ops;
322330
323331 /*
324332 * Free the pending items. Caller should already have arranged
325333 * for the intent items to be released.
326334 */
327335 list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) {
336
+ ops = defer_op_types[dfp->dfp_type];
328337 trace_xfs_defer_cancel_list(mp, dfp);
329338 list_del(&dfp->dfp_list);
330339 list_for_each_safe(pwi, n, &dfp->dfp_work) {
331340 list_del(pwi);
332341 dfp->dfp_count--;
333
- dfp->dfp_type->cancel_item(pwi);
342
+ ops->cancel_item(pwi);
334343 }
335344 ASSERT(dfp->dfp_count == 0);
336345 kmem_free(dfp);
337346 }
347
+}
348
+
349
+/*
350
+ * Prevent a log intent item from pinning the tail of the log by logging a
351
+ * done item to release the intent item; and then log a new intent item.
352
+ * The caller should provide a fresh transaction and roll it after we're done.
353
+ */
354
+static int
355
+xfs_defer_relog(
356
+ struct xfs_trans **tpp,
357
+ struct list_head *dfops)
358
+{
359
+ struct xlog *log = (*tpp)->t_mountp->m_log;
360
+ struct xfs_defer_pending *dfp;
361
+ xfs_lsn_t threshold_lsn = NULLCOMMITLSN;
362
+
363
+
364
+ ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES);
365
+
366
+ list_for_each_entry(dfp, dfops, dfp_list) {
367
+ /*
368
+ * If the log intent item for this deferred op is not a part of
369
+ * the current log checkpoint, relog the intent item to keep
370
+ * the log tail moving forward. We're ok with this being racy
371
+ * because an incorrect decision means we'll be a little slower
372
+ * at pushing the tail.
373
+ */
374
+ if (dfp->dfp_intent == NULL ||
375
+ xfs_log_item_in_current_chkpt(dfp->dfp_intent))
376
+ continue;
377
+
378
+ /*
379
+ * Figure out where we need the tail to be in order to maintain
380
+ * the minimum required free space in the log. Only sample
381
+ * the log threshold once per call.
382
+ */
383
+ if (threshold_lsn == NULLCOMMITLSN) {
384
+ threshold_lsn = xlog_grant_push_threshold(log, 0);
385
+ if (threshold_lsn == NULLCOMMITLSN)
386
+ break;
387
+ }
388
+ if (XFS_LSN_CMP(dfp->dfp_intent->li_lsn, threshold_lsn) >= 0)
389
+ continue;
390
+
391
+ trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp);
392
+ XFS_STATS_INC((*tpp)->t_mountp, defer_relog);
393
+ dfp->dfp_intent = xfs_trans_item_relog(dfp->dfp_intent, *tpp);
394
+ }
395
+
396
+ if ((*tpp)->t_flags & XFS_TRANS_DIRTY)
397
+ return xfs_defer_trans_roll(tpp);
398
+ return 0;
399
+}
400
+
401
+/*
402
+ * Log an intent-done item for the first pending intent, and finish the work
403
+ * items.
404
+ */
405
+static int
406
+xfs_defer_finish_one(
407
+ struct xfs_trans *tp,
408
+ struct xfs_defer_pending *dfp)
409
+{
410
+ const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type];
411
+ struct xfs_btree_cur *state = NULL;
412
+ struct list_head *li, *n;
413
+ int error;
414
+
415
+ trace_xfs_defer_pending_finish(tp->t_mountp, dfp);
416
+
417
+ dfp->dfp_done = ops->create_done(tp, dfp->dfp_intent, dfp->dfp_count);
418
+ list_for_each_safe(li, n, &dfp->dfp_work) {
419
+ list_del(li);
420
+ dfp->dfp_count--;
421
+ error = ops->finish_item(tp, dfp->dfp_done, li, &state);
422
+ if (error == -EAGAIN) {
423
+ /*
424
+ * Caller wants a fresh transaction; put the work item
425
+ * back on the list and log a new log intent item to
426
+ * replace the old one. See "Requesting a Fresh
427
+ * Transaction while Finishing Deferred Work" above.
428
+ */
429
+ list_add(li, &dfp->dfp_work);
430
+ dfp->dfp_count++;
431
+ dfp->dfp_done = NULL;
432
+ dfp->dfp_intent = NULL;
433
+ xfs_defer_create_intent(tp, dfp, false);
434
+ }
435
+
436
+ if (error)
437
+ goto out;
438
+ }
439
+
440
+ /* Done with the dfp, free it. */
441
+ list_del(&dfp->dfp_list);
442
+ kmem_free(dfp);
443
+out:
444
+ if (ops->finish_cleanup)
445
+ ops->finish_cleanup(tp, state, error);
446
+ return error;
338447 }
339448
340449 /*
....@@ -350,11 +459,7 @@
350459 struct xfs_trans **tp)
351460 {
352461 struct xfs_defer_pending *dfp;
353
- struct list_head *li;
354
- struct list_head *n;
355
- void *state;
356462 int error = 0;
357
- void (*cleanup_fn)(struct xfs_trans *, void *, int);
358463 LIST_HEAD(dop_pending);
359464
360465 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
....@@ -363,88 +468,44 @@
363468
364469 /* Until we run out of pending work to finish... */
365470 while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) {
366
- /* log intents and pull in intake items */
367
- xfs_defer_create_intents(*tp);
368
- list_splice_tail_init(&(*tp)->t_dfops, &dop_pending);
369
-
370471 /*
371
- * Roll the transaction.
472
+ * Deferred items that are created in the process of finishing
473
+ * other deferred work items should be queued at the head of
474
+ * the pending list, which puts them ahead of the deferred work
475
+ * that was created by the caller. This keeps the number of
476
+ * pending work items to a minimum, which decreases the amount
477
+ * of time that any one intent item can stick around in memory,
478
+ * pinning the log tail.
372479 */
480
+ xfs_defer_create_intents(*tp);
481
+ list_splice_init(&(*tp)->t_dfops, &dop_pending);
482
+
373483 error = xfs_defer_trans_roll(tp);
374484 if (error)
375
- goto out;
485
+ goto out_shutdown;
376486
377
- /* Log an intent-done item for the first pending item. */
487
+ /* Possibly relog intent items to keep the log moving. */
488
+ error = xfs_defer_relog(tp, &dop_pending);
489
+ if (error)
490
+ goto out_shutdown;
491
+
378492 dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
379493 dfp_list);
380
- trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp);
381
- dfp->dfp_done = dfp->dfp_type->create_done(*tp, dfp->dfp_intent,
382
- dfp->dfp_count);
383
- cleanup_fn = dfp->dfp_type->finish_cleanup;
384
-
385
- /* Finish the work items. */
386
- state = NULL;
387
- list_for_each_safe(li, n, &dfp->dfp_work) {
388
- list_del(li);
389
- dfp->dfp_count--;
390
- error = dfp->dfp_type->finish_item(*tp, li,
391
- dfp->dfp_done, &state);
392
- if (error == -EAGAIN) {
393
- /*
394
- * Caller wants a fresh transaction;
395
- * put the work item back on the list
396
- * and jump out.
397
- */
398
- list_add(li, &dfp->dfp_work);
399
- dfp->dfp_count++;
400
- break;
401
- } else if (error) {
402
- /*
403
- * Clean up after ourselves and jump out.
404
- * xfs_defer_cancel will take care of freeing
405
- * all these lists and stuff.
406
- */
407
- if (cleanup_fn)
408
- cleanup_fn(*tp, state, error);
409
- goto out;
410
- }
411
- }
412
- if (error == -EAGAIN) {
413
- /*
414
- * Caller wants a fresh transaction, so log a
415
- * new log intent item to replace the old one
416
- * and roll the transaction. See "Requesting
417
- * a Fresh Transaction while Finishing
418
- * Deferred Work" above.
419
- */
420
- dfp->dfp_intent = dfp->dfp_type->create_intent(*tp,
421
- dfp->dfp_count);
422
- dfp->dfp_done = NULL;
423
- list_for_each(li, &dfp->dfp_work)
424
- dfp->dfp_type->log_item(*tp, dfp->dfp_intent,
425
- li);
426
- } else {
427
- /* Done with the dfp, free it. */
428
- list_del(&dfp->dfp_list);
429
- kmem_free(dfp);
430
- }
431
-
432
- if (cleanup_fn)
433
- cleanup_fn(*tp, state, error);
434
- }
435
-
436
-out:
437
- if (error) {
438
- xfs_defer_trans_abort(*tp, &dop_pending);
439
- xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE);
440
- trace_xfs_defer_finish_error(*tp, error);
441
- xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending);
442
- xfs_defer_cancel(*tp);
443
- return error;
494
+ error = xfs_defer_finish_one(*tp, dfp);
495
+ if (error && error != -EAGAIN)
496
+ goto out_shutdown;
444497 }
445498
446499 trace_xfs_defer_finish_done(*tp, _RET_IP_);
447500 return 0;
501
+
502
+out_shutdown:
503
+ xfs_defer_trans_abort(*tp, &dop_pending);
504
+ xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE);
505
+ trace_xfs_defer_finish_error(*tp, error);
506
+ xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending);
507
+ xfs_defer_cancel(*tp);
508
+ return error;
448509 }
449510
450511 int
....@@ -468,7 +529,10 @@
468529 return error;
469530 }
470531 }
471
- xfs_defer_reset(*tp);
532
+
533
+ /* Reset LOWMODE now that we've finished all the dfops. */
534
+ ASSERT(list_empty(&(*tp)->t_dfops));
535
+ (*tp)->t_flags &= ~XFS_TRANS_LOWMODE;
472536 return 0;
473537 }
474538
....@@ -490,8 +554,10 @@
490554 struct list_head *li)
491555 {
492556 struct xfs_defer_pending *dfp = NULL;
557
+ const struct xfs_defer_op_type *ops;
493558
494559 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
560
+ BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX);
495561
496562 /*
497563 * Add the item to a pending item at the end of the intake list.
....@@ -501,15 +567,15 @@
501567 if (!list_empty(&tp->t_dfops)) {
502568 dfp = list_last_entry(&tp->t_dfops,
503569 struct xfs_defer_pending, dfp_list);
504
- if (dfp->dfp_type->type != type ||
505
- (dfp->dfp_type->max_items &&
506
- dfp->dfp_count >= dfp->dfp_type->max_items))
570
+ ops = defer_op_types[dfp->dfp_type];
571
+ if (dfp->dfp_type != type ||
572
+ (ops->max_items && dfp->dfp_count >= ops->max_items))
507573 dfp = NULL;
508574 }
509575 if (!dfp) {
510576 dfp = kmem_alloc(sizeof(struct xfs_defer_pending),
511
- KM_SLEEP | KM_NOFS);
512
- dfp->dfp_type = defer_op_types[type];
577
+ KM_NOFS);
578
+ dfp->dfp_type = type;
513579 dfp->dfp_intent = NULL;
514580 dfp->dfp_done = NULL;
515581 dfp->dfp_count = 0;
....@@ -519,14 +585,6 @@
519585
520586 list_add_tail(li, &dfp->dfp_work);
521587 dfp->dfp_count++;
522
-}
523
-
524
-/* Initialize a deferred operation list. */
525
-void
526
-xfs_defer_init_op_type(
527
- const struct xfs_defer_op_type *type)
528
-{
529
- defer_op_types[type->type] = type;
530588 }
531589
532590 /*
....@@ -548,6 +606,139 @@
548606 * that behavior.
549607 */
550608 dtp->t_flags |= (stp->t_flags & XFS_TRANS_LOWMODE);
609
+ stp->t_flags &= ~XFS_TRANS_LOWMODE;
610
+}
551611
552
- xfs_defer_reset(stp);
612
+/*
613
+ * Prepare a chain of fresh deferred ops work items to be completed later. Log
614
+ * recovery requires the ability to put off until later the actual finishing
615
+ * work so that it can process unfinished items recovered from the log in
616
+ * correct order.
617
+ *
618
+ * Create and log intent items for all the work that we're capturing so that we
619
+ * can be assured that the items will get replayed if the system goes down
620
+ * before log recovery gets a chance to finish the work it put off. The entire
621
+ * deferred ops state is transferred to the capture structure and the
622
+ * transaction is then ready for the caller to commit it. If there are no
623
+ * intent items to capture, this function returns NULL.
624
+ *
625
+ * If capture_ip is not NULL, the capture structure will obtain an extra
626
+ * reference to the inode.
627
+ */
628
+static struct xfs_defer_capture *
629
+xfs_defer_ops_capture(
630
+ struct xfs_trans *tp,
631
+ struct xfs_inode *capture_ip)
632
+{
633
+ struct xfs_defer_capture *dfc;
634
+
635
+ if (list_empty(&tp->t_dfops))
636
+ return NULL;
637
+
638
+ /* Create an object to capture the defer ops. */
639
+ dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS);
640
+ INIT_LIST_HEAD(&dfc->dfc_list);
641
+ INIT_LIST_HEAD(&dfc->dfc_dfops);
642
+
643
+ xfs_defer_create_intents(tp);
644
+
645
+ /* Move the dfops chain and transaction state to the capture struct. */
646
+ list_splice_init(&tp->t_dfops, &dfc->dfc_dfops);
647
+ dfc->dfc_tpflags = tp->t_flags & XFS_TRANS_LOWMODE;
648
+ tp->t_flags &= ~XFS_TRANS_LOWMODE;
649
+
650
+ /* Capture the remaining block reservations along with the dfops. */
651
+ dfc->dfc_blkres = tp->t_blk_res - tp->t_blk_res_used;
652
+ dfc->dfc_rtxres = tp->t_rtx_res - tp->t_rtx_res_used;
653
+
654
+ /* Preserve the log reservation size. */
655
+ dfc->dfc_logres = tp->t_log_res;
656
+
657
+ /*
658
+ * Grab an extra reference to this inode and attach it to the capture
659
+ * structure.
660
+ */
661
+ if (capture_ip) {
662
+ ihold(VFS_I(capture_ip));
663
+ dfc->dfc_capture_ip = capture_ip;
664
+ }
665
+
666
+ return dfc;
667
+}
668
+
669
+/* Release all resources that we used to capture deferred ops. */
670
+void
671
+xfs_defer_ops_release(
672
+ struct xfs_mount *mp,
673
+ struct xfs_defer_capture *dfc)
674
+{
675
+ xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
676
+ if (dfc->dfc_capture_ip)
677
+ xfs_irele(dfc->dfc_capture_ip);
678
+ kmem_free(dfc);
679
+}
680
+
681
+/*
682
+ * Capture any deferred ops and commit the transaction. This is the last step
683
+ * needed to finish a log intent item that we recovered from the log. If any
684
+ * of the deferred ops operate on an inode, the caller must pass in that inode
685
+ * so that the reference can be transferred to the capture structure. The
686
+ * caller must hold ILOCK_EXCL on the inode, and must unlock it before calling
687
+ * xfs_defer_ops_continue.
688
+ */
689
+int
690
+xfs_defer_ops_capture_and_commit(
691
+ struct xfs_trans *tp,
692
+ struct xfs_inode *capture_ip,
693
+ struct list_head *capture_list)
694
+{
695
+ struct xfs_mount *mp = tp->t_mountp;
696
+ struct xfs_defer_capture *dfc;
697
+ int error;
698
+
699
+ ASSERT(!capture_ip || xfs_isilocked(capture_ip, XFS_ILOCK_EXCL));
700
+
701
+ /* If we don't capture anything, commit transaction and exit. */
702
+ dfc = xfs_defer_ops_capture(tp, capture_ip);
703
+ if (!dfc)
704
+ return xfs_trans_commit(tp);
705
+
706
+ /* Commit the transaction and add the capture structure to the list. */
707
+ error = xfs_trans_commit(tp);
708
+ if (error) {
709
+ xfs_defer_ops_release(mp, dfc);
710
+ return error;
711
+ }
712
+
713
+ list_add_tail(&dfc->dfc_list, capture_list);
714
+ return 0;
715
+}
716
+
717
+/*
718
+ * Attach a chain of captured deferred ops to a new transaction and free the
719
+ * capture structure. If an inode was captured, it will be passed back to the
720
+ * caller with ILOCK_EXCL held and joined to the transaction with lockflags==0.
721
+ * The caller now owns the inode reference.
722
+ */
723
+void
724
+xfs_defer_ops_continue(
725
+ struct xfs_defer_capture *dfc,
726
+ struct xfs_trans *tp,
727
+ struct xfs_inode **captured_ipp)
728
+{
729
+ ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
730
+ ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
731
+
732
+ /* Lock and join the captured inode to the new transaction. */
733
+ if (dfc->dfc_capture_ip) {
734
+ xfs_ilock(dfc->dfc_capture_ip, XFS_ILOCK_EXCL);
735
+ xfs_trans_ijoin(tp, dfc->dfc_capture_ip, 0);
736
+ }
737
+ *captured_ipp = dfc->dfc_capture_ip;
738
+
739
+ /* Move captured dfops chain and state to the transaction. */
740
+ list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
741
+ tp->t_flags |= dfc->dfc_tpflags;
742
+
743
+ kmem_free(dfc);
553744 }