From 05e59e5fb0064c97a1c10921ecd549f2d4a58565 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Wed, 09 Oct 2024 06:14:40 +0000
Subject: [PATCH] add REDIRECT
---
kernel/fs/xfs/libxfs/xfs_defer.c | 429 ++++++++++++++++++++++++++++++++++++++--------------
1 files changed, 310 insertions(+), 119 deletions(-)
diff --git a/kernel/fs/xfs/libxfs/xfs_defer.c b/kernel/fs/xfs/libxfs/xfs_defer.c
index c52beee..eff4a12 100644
--- a/kernel/fs/xfs/libxfs/xfs_defer.c
+++ b/kernel/fs/xfs/libxfs/xfs_defer.c
@@ -9,8 +9,6 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_trans.h"
@@ -18,6 +16,8 @@
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_trace.h"
+#include "xfs_icache.h"
+#include "xfs_log.h"
/*
* Deferred Operations in XFS
@@ -172,7 +172,26 @@
* reoccur.
*/
-static const struct xfs_defer_op_type *defer_op_types[XFS_DEFER_OPS_TYPE_MAX];
+static const struct xfs_defer_op_type *defer_op_types[] = {
+ [XFS_DEFER_OPS_TYPE_BMAP] = &xfs_bmap_update_defer_type,
+ [XFS_DEFER_OPS_TYPE_REFCOUNT] = &xfs_refcount_update_defer_type,
+ [XFS_DEFER_OPS_TYPE_RMAP] = &xfs_rmap_update_defer_type,
+ [XFS_DEFER_OPS_TYPE_FREE] = &xfs_extent_free_defer_type,
+ [XFS_DEFER_OPS_TYPE_AGFL_FREE] = &xfs_agfl_free_defer_type,
+};
+
+static void
+xfs_defer_create_intent(
+ struct xfs_trans *tp,
+ struct xfs_defer_pending *dfp,
+ bool sort)
+{
+ const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type];
+
+ if (!dfp->dfp_intent)
+ dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work,
+ dfp->dfp_count, sort);
+}
/*
* For each pending item in the intake list, log its intent item and the
@@ -183,17 +202,11 @@
xfs_defer_create_intents(
struct xfs_trans *tp)
{
- struct list_head *li;
struct xfs_defer_pending *dfp;
list_for_each_entry(dfp, &tp->t_dfops, dfp_list) {
- dfp->dfp_intent = dfp->dfp_type->create_intent(tp,
- dfp->dfp_count);
trace_xfs_defer_create_intent(tp->t_mountp, dfp);
- list_sort(tp->t_mountp, &dfp->dfp_work,
- dfp->dfp_type->diff_items);
- list_for_each(li, &dfp->dfp_work)
- dfp->dfp_type->log_item(tp, dfp->dfp_intent, li);
+ xfs_defer_create_intent(tp, dfp, true);
}
}
@@ -204,14 +217,16 @@
struct list_head *dop_pending)
{
struct xfs_defer_pending *dfp;
+ const struct xfs_defer_op_type *ops;
trace_xfs_defer_trans_abort(tp, _RET_IP_);
/* Abort intent items that don't have a done item. */
list_for_each_entry(dfp, dop_pending, dfp_list) {
+ ops = defer_op_types[dfp->dfp_type];
trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
if (dfp->dfp_intent && !dfp->dfp_done) {
- dfp->dfp_type->abort_intent(dfp->dfp_intent);
+ ops->abort_intent(dfp->dfp_intent);
dfp->dfp_intent = NULL;
}
}
@@ -228,9 +243,12 @@
struct xfs_log_item *lip;
struct xfs_buf *bplist[XFS_DEFER_OPS_NR_BUFS];
struct xfs_inode *iplist[XFS_DEFER_OPS_NR_INODES];
+ unsigned int ordered = 0; /* bitmap */
int bpcount = 0, ipcount = 0;
int i;
int error;
+
+ BUILD_BUG_ON(NBBY * sizeof(ordered) < XFS_DEFER_OPS_NR_BUFS);
list_for_each_entry(lip, &tp->t_items, li_trans) {
switch (lip->li_type) {
@@ -242,7 +260,10 @@
ASSERT(0);
return -EFSCORRUPTED;
}
- xfs_trans_dirty_buf(tp, bli->bli_buf);
+ if (bli->bli_flags & XFS_BLI_ORDERED)
+ ordered |= (1U << bpcount);
+ else
+ xfs_trans_dirty_buf(tp, bli->bli_buf);
bplist[bpcount++] = bli->bli_buf;
}
break;
@@ -283,28 +304,14 @@
/* Rejoin the buffers and dirty them so the log moves forward. */
for (i = 0; i < bpcount; i++) {
xfs_trans_bjoin(tp, bplist[i]);
+ if (ordered & (1U << i))
+ xfs_trans_ordered_buf(tp, bplist[i]);
xfs_trans_bhold(tp, bplist[i]);
}
if (error)
trace_xfs_defer_trans_roll_error(tp, error);
return error;
-}
-
-/*
- * Reset an already used dfops after finish.
- */
-static void
-xfs_defer_reset(
- struct xfs_trans *tp)
-{
- ASSERT(list_empty(&tp->t_dfops));
-
- /*
- * Low mode state transfers across transaction rolls to mirror dfops
- * lifetime. Clear it now that dfops is reset.
- */
- tp->t_flags &= ~XFS_TRANS_LOWMODE;
}
/*
@@ -319,22 +326,124 @@
struct xfs_defer_pending *pli;
struct list_head *pwi;
struct list_head *n;
+ const struct xfs_defer_op_type *ops;
/*
* Free the pending items. Caller should already have arranged
* for the intent items to be released.
*/
list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) {
+ ops = defer_op_types[dfp->dfp_type];
trace_xfs_defer_cancel_list(mp, dfp);
list_del(&dfp->dfp_list);
list_for_each_safe(pwi, n, &dfp->dfp_work) {
list_del(pwi);
dfp->dfp_count--;
- dfp->dfp_type->cancel_item(pwi);
+ ops->cancel_item(pwi);
}
ASSERT(dfp->dfp_count == 0);
kmem_free(dfp);
}
+}
+
+/*
+ * Prevent a log intent item from pinning the tail of the log by logging a
+ * done item to release the intent item; and then log a new intent item.
+ * The caller should provide a fresh transaction and roll it after we're done.
+ */
+static int
+xfs_defer_relog(
+ struct xfs_trans **tpp,
+ struct list_head *dfops)
+{
+ struct xlog *log = (*tpp)->t_mountp->m_log;
+ struct xfs_defer_pending *dfp;
+ xfs_lsn_t threshold_lsn = NULLCOMMITLSN;
+
+
+ ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES);
+
+ list_for_each_entry(dfp, dfops, dfp_list) {
+ /*
+ * If the log intent item for this deferred op is not a part of
+ * the current log checkpoint, relog the intent item to keep
+ * the log tail moving forward. We're ok with this being racy
+ * because an incorrect decision means we'll be a little slower
+ * at pushing the tail.
+ */
+ if (dfp->dfp_intent == NULL ||
+ xfs_log_item_in_current_chkpt(dfp->dfp_intent))
+ continue;
+
+ /*
+ * Figure out where we need the tail to be in order to maintain
+ * the minimum required free space in the log. Only sample
+ * the log threshold once per call.
+ */
+ if (threshold_lsn == NULLCOMMITLSN) {
+ threshold_lsn = xlog_grant_push_threshold(log, 0);
+ if (threshold_lsn == NULLCOMMITLSN)
+ break;
+ }
+ if (XFS_LSN_CMP(dfp->dfp_intent->li_lsn, threshold_lsn) >= 0)
+ continue;
+
+ trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp);
+ XFS_STATS_INC((*tpp)->t_mountp, defer_relog);
+ dfp->dfp_intent = xfs_trans_item_relog(dfp->dfp_intent, *tpp);
+ }
+
+ if ((*tpp)->t_flags & XFS_TRANS_DIRTY)
+ return xfs_defer_trans_roll(tpp);
+ return 0;
+}
+
+/*
+ * Log an intent-done item for the first pending intent, and finish the work
+ * items.
+ */
+static int
+xfs_defer_finish_one(
+ struct xfs_trans *tp,
+ struct xfs_defer_pending *dfp)
+{
+ const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type];
+ struct xfs_btree_cur *state = NULL;
+ struct list_head *li, *n;
+ int error;
+
+ trace_xfs_defer_pending_finish(tp->t_mountp, dfp);
+
+ dfp->dfp_done = ops->create_done(tp, dfp->dfp_intent, dfp->dfp_count);
+ list_for_each_safe(li, n, &dfp->dfp_work) {
+ list_del(li);
+ dfp->dfp_count--;
+ error = ops->finish_item(tp, dfp->dfp_done, li, &state);
+ if (error == -EAGAIN) {
+ /*
+ * Caller wants a fresh transaction; put the work item
+ * back on the list and log a new log intent item to
+ * replace the old one. See "Requesting a Fresh
+ * Transaction while Finishing Deferred Work" above.
+ */
+ list_add(li, &dfp->dfp_work);
+ dfp->dfp_count++;
+ dfp->dfp_done = NULL;
+ dfp->dfp_intent = NULL;
+ xfs_defer_create_intent(tp, dfp, false);
+ }
+
+ if (error)
+ goto out;
+ }
+
+ /* Done with the dfp, free it. */
+ list_del(&dfp->dfp_list);
+ kmem_free(dfp);
+out:
+ if (ops->finish_cleanup)
+ ops->finish_cleanup(tp, state, error);
+ return error;
}
/*
@@ -350,11 +459,7 @@
struct xfs_trans **tp)
{
struct xfs_defer_pending *dfp;
- struct list_head *li;
- struct list_head *n;
- void *state;
int error = 0;
- void (*cleanup_fn)(struct xfs_trans *, void *, int);
LIST_HEAD(dop_pending);
ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -363,88 +468,44 @@
/* Until we run out of pending work to finish... */
while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) {
- /* log intents and pull in intake items */
- xfs_defer_create_intents(*tp);
- list_splice_tail_init(&(*tp)->t_dfops, &dop_pending);
-
/*
- * Roll the transaction.
+ * Deferred items that are created in the process of finishing
+ * other deferred work items should be queued at the head of
+ * the pending list, which puts them ahead of the deferred work
+ * that was created by the caller. This keeps the number of
+ * pending work items to a minimum, which decreases the amount
+ * of time that any one intent item can stick around in memory,
+ * pinning the log tail.
*/
+ xfs_defer_create_intents(*tp);
+ list_splice_init(&(*tp)->t_dfops, &dop_pending);
+
error = xfs_defer_trans_roll(tp);
if (error)
- goto out;
+ goto out_shutdown;
- /* Log an intent-done item for the first pending item. */
+ /* Possibly relog intent items to keep the log moving. */
+ error = xfs_defer_relog(tp, &dop_pending);
+ if (error)
+ goto out_shutdown;
+
dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
dfp_list);
- trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp);
- dfp->dfp_done = dfp->dfp_type->create_done(*tp, dfp->dfp_intent,
- dfp->dfp_count);
- cleanup_fn = dfp->dfp_type->finish_cleanup;
-
- /* Finish the work items. */
- state = NULL;
- list_for_each_safe(li, n, &dfp->dfp_work) {
- list_del(li);
- dfp->dfp_count--;
- error = dfp->dfp_type->finish_item(*tp, li,
- dfp->dfp_done, &state);
- if (error == -EAGAIN) {
- /*
- * Caller wants a fresh transaction;
- * put the work item back on the list
- * and jump out.
- */
- list_add(li, &dfp->dfp_work);
- dfp->dfp_count++;
- break;
- } else if (error) {
- /*
- * Clean up after ourselves and jump out.
- * xfs_defer_cancel will take care of freeing
- * all these lists and stuff.
- */
- if (cleanup_fn)
- cleanup_fn(*tp, state, error);
- goto out;
- }
- }
- if (error == -EAGAIN) {
- /*
- * Caller wants a fresh transaction, so log a
- * new log intent item to replace the old one
- * and roll the transaction. See "Requesting
- * a Fresh Transaction while Finishing
- * Deferred Work" above.
- */
- dfp->dfp_intent = dfp->dfp_type->create_intent(*tp,
- dfp->dfp_count);
- dfp->dfp_done = NULL;
- list_for_each(li, &dfp->dfp_work)
- dfp->dfp_type->log_item(*tp, dfp->dfp_intent,
- li);
- } else {
- /* Done with the dfp, free it. */
- list_del(&dfp->dfp_list);
- kmem_free(dfp);
- }
-
- if (cleanup_fn)
- cleanup_fn(*tp, state, error);
- }
-
-out:
- if (error) {
- xfs_defer_trans_abort(*tp, &dop_pending);
- xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE);
- trace_xfs_defer_finish_error(*tp, error);
- xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending);
- xfs_defer_cancel(*tp);
- return error;
+ error = xfs_defer_finish_one(*tp, dfp);
+ if (error && error != -EAGAIN)
+ goto out_shutdown;
}
trace_xfs_defer_finish_done(*tp, _RET_IP_);
return 0;
+
+out_shutdown:
+ xfs_defer_trans_abort(*tp, &dop_pending);
+ xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE);
+ trace_xfs_defer_finish_error(*tp, error);
+ xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending);
+ xfs_defer_cancel(*tp);
+ return error;
}
int
@@ -468,7 +529,10 @@
return error;
}
}
- xfs_defer_reset(*tp);
+
+ /* Reset LOWMODE now that we've finished all the dfops. */
+ ASSERT(list_empty(&(*tp)->t_dfops));
+ (*tp)->t_flags &= ~XFS_TRANS_LOWMODE;
return 0;
}
@@ -490,8 +554,10 @@
struct list_head *li)
{
struct xfs_defer_pending *dfp = NULL;
+ const struct xfs_defer_op_type *ops;
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+ BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX);
/*
* Add the item to a pending item at the end of the intake list.
@@ -501,15 +567,15 @@
if (!list_empty(&tp->t_dfops)) {
dfp = list_last_entry(&tp->t_dfops,
struct xfs_defer_pending, dfp_list);
- if (dfp->dfp_type->type != type ||
- (dfp->dfp_type->max_items &&
- dfp->dfp_count >= dfp->dfp_type->max_items))
+ ops = defer_op_types[dfp->dfp_type];
+ if (dfp->dfp_type != type ||
+ (ops->max_items && dfp->dfp_count >= ops->max_items))
dfp = NULL;
}
if (!dfp) {
dfp = kmem_alloc(sizeof(struct xfs_defer_pending),
- KM_SLEEP | KM_NOFS);
- dfp->dfp_type = defer_op_types[type];
+ KM_NOFS);
+ dfp->dfp_type = type;
dfp->dfp_intent = NULL;
dfp->dfp_done = NULL;
dfp->dfp_count = 0;
@@ -519,14 +585,6 @@
list_add_tail(li, &dfp->dfp_work);
dfp->dfp_count++;
-}
-
-/* Initialize a deferred operation list. */
-void
-xfs_defer_init_op_type(
- const struct xfs_defer_op_type *type)
-{
- defer_op_types[type->type] = type;
}
/*
@@ -548,6 +606,139 @@
* that behavior.
*/
dtp->t_flags |= (stp->t_flags & XFS_TRANS_LOWMODE);
+ stp->t_flags &= ~XFS_TRANS_LOWMODE;
+}
- xfs_defer_reset(stp);
+/*
+ * Prepare a chain of fresh deferred ops work items to be completed later. Log
+ * recovery requires the ability to put off until later the actual finishing
+ * work so that it can process unfinished items recovered from the log in
+ * correct order.
+ *
+ * Create and log intent items for all the work that we're capturing so that we
+ * can be assured that the items will get replayed if the system goes down
+ * before log recovery gets a chance to finish the work it put off. The entire
+ * deferred ops state is transferred to the capture structure and the
+ * transaction is then ready for the caller to commit it. If there are no
+ * intent items to capture, this function returns NULL.
+ *
+ * If capture_ip is not NULL, the capture structure will obtain an extra
+ * reference to the inode.
+ */
+static struct xfs_defer_capture *
+xfs_defer_ops_capture(
+ struct xfs_trans *tp,
+ struct xfs_inode *capture_ip)
+{
+ struct xfs_defer_capture *dfc;
+
+ if (list_empty(&tp->t_dfops))
+ return NULL;
+
+ /* Create an object to capture the defer ops. */
+ dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS);
+ INIT_LIST_HEAD(&dfc->dfc_list);
+ INIT_LIST_HEAD(&dfc->dfc_dfops);
+
+ xfs_defer_create_intents(tp);
+
+ /* Move the dfops chain and transaction state to the capture struct. */
+ list_splice_init(&tp->t_dfops, &dfc->dfc_dfops);
+ dfc->dfc_tpflags = tp->t_flags & XFS_TRANS_LOWMODE;
+ tp->t_flags &= ~XFS_TRANS_LOWMODE;
+
+ /* Capture the remaining block reservations along with the dfops. */
+ dfc->dfc_blkres = tp->t_blk_res - tp->t_blk_res_used;
+ dfc->dfc_rtxres = tp->t_rtx_res - tp->t_rtx_res_used;
+
+ /* Preserve the log reservation size. */
+ dfc->dfc_logres = tp->t_log_res;
+
+ /*
+ * Grab an extra reference to this inode and attach it to the capture
+ * structure.
+ */
+ if (capture_ip) {
+ ihold(VFS_I(capture_ip));
+ dfc->dfc_capture_ip = capture_ip;
+ }
+
+ return dfc;
+}
+
+/* Release all resources that we used to capture deferred ops. */
+void
+xfs_defer_ops_release(
+ struct xfs_mount *mp,
+ struct xfs_defer_capture *dfc)
+{
+ xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
+ if (dfc->dfc_capture_ip)
+ xfs_irele(dfc->dfc_capture_ip);
+ kmem_free(dfc);
+}
+
+/*
+ * Capture any deferred ops and commit the transaction. This is the last step
+ * needed to finish a log intent item that we recovered from the log. If any
+ * of the deferred ops operate on an inode, the caller must pass in that inode
+ * so that the reference can be transferred to the capture structure. The
+ * caller must hold ILOCK_EXCL on the inode, and must unlock it before calling
+ * xfs_defer_ops_continue.
+ */
+int
+xfs_defer_ops_capture_and_commit(
+ struct xfs_trans *tp,
+ struct xfs_inode *capture_ip,
+ struct list_head *capture_list)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_defer_capture *dfc;
+ int error;
+
+ ASSERT(!capture_ip || xfs_isilocked(capture_ip, XFS_ILOCK_EXCL));
+
+ /* If we don't capture anything, commit transaction and exit. */
+ dfc = xfs_defer_ops_capture(tp, capture_ip);
+ if (!dfc)
+ return xfs_trans_commit(tp);
+
+ /* Commit the transaction and add the capture structure to the list. */
+ error = xfs_trans_commit(tp);
+ if (error) {
+ xfs_defer_ops_release(mp, dfc);
+ return error;
+ }
+
+ list_add_tail(&dfc->dfc_list, capture_list);
+ return 0;
+}
+
+/*
+ * Attach a chain of captured deferred ops to a new transaction and free the
+ * capture structure. If an inode was captured, it will be passed back to the
+ * caller with ILOCK_EXCL held and joined to the transaction with lockflags==0.
+ * The caller now owns the inode reference.
+ */
+void
+xfs_defer_ops_continue(
+ struct xfs_defer_capture *dfc,
+ struct xfs_trans *tp,
+ struct xfs_inode **captured_ipp)
+{
+ ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+ ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
+
+ /* Lock and join the captured inode to the new transaction. */
+ if (dfc->dfc_capture_ip) {
+ xfs_ilock(dfc->dfc_capture_ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, dfc->dfc_capture_ip, 0);
+ }
+ *captured_ipp = dfc->dfc_capture_ip;
+
+ /* Move captured dfops chain and state to the transaction. */
+ list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
+ tp->t_flags |= dfc->dfc_tpflags;
+
+ kmem_free(dfc);
}
--
Gitblit v1.6.2