From 9999e48639b3cecb08ffb37358bcba3b48161b29 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Fri, 10 May 2024 08:50:17 +0000
Subject: [PATCH] add ax88772_rst
---
kernel/fs/xfs/xfs_log_cil.c | 234 +++++++++++++++++++++++++++++++++++-----------------------
1 files changed, 142 insertions(+), 92 deletions(-)
diff --git a/kernel/fs/xfs/xfs_log_cil.c b/kernel/fs/xfs/xfs_log_cil.c
index d3884e0..fbe160d 100644
--- a/kernel/fs/xfs/xfs_log_cil.c
+++ b/kernel/fs/xfs/xfs_log_cil.c
@@ -10,10 +10,7 @@
#include "xfs_shared.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_error.h"
-#include "xfs_alloc.h"
#include "xfs_extent_busy.h"
-#include "xfs_discard.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_log.h"
@@ -40,8 +37,7 @@
{
struct xlog_ticket *tic;
- tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0,
- KM_SLEEP|KM_NOFS);
+ tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0);
/*
* set the current reservation to zero so we know to steal the basic
@@ -182,14 +178,14 @@
/*
* We free and allocate here as a realloc would copy
- * unecessary data. We don't use kmem_zalloc() for the
+ * unnecessary data. We don't use kmem_zalloc() for the
* same reason - we don't need to zero the data area in
* the buffer, only the log vector header and the iovec
* storage.
*/
kmem_free(lip->li_lv_shadow);
- lv = kmem_alloc_large(buf_size, KM_SLEEP | KM_NOFS);
+ lv = kmem_alloc_large(buf_size, KM_NOFS);
memset(lv, 0, xlog_cil_iovec_space(niovecs));
lv->lv_item = lip;
@@ -243,10 +239,11 @@
* this CIL context and so we need to pin it. If we are replacing the
* old_lv, then remove the space it accounts for and make it the shadow
* buffer for later freeing. In both cases we are now switching to the
- * shadow buffer, so update the the pointer to it appropriately.
+ * shadow buffer, so update the pointer to it appropriately.
*/
if (!old_lv) {
- lv->lv_item->li_ops->iop_pin(lv->lv_item);
+ if (lv->lv_item->li_ops->iop_pin)
+ lv->lv_item->li_ops->iop_pin(lv->lv_item);
lv->lv_item->li_lv_shadow = NULL;
} else if (old_lv != lv) {
ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
@@ -576,11 +573,23 @@
*/
static void
xlog_cil_committed(
- void *args,
- int abort)
+ struct xfs_cil_ctx *ctx)
{
- struct xfs_cil_ctx *ctx = args;
struct xfs_mount *mp = ctx->cil->xc_log->l_mp;
+ bool abort = XLOG_FORCED_SHUTDOWN(ctx->cil->xc_log);
+
+ /*
+ * If the I/O failed, we're aborting the commit and already shutdown.
+ * Wake any commit waiters before aborting the log items so we don't
+ * block async log pushers on callbacks. Async log pushers explicitly do
+ * not wait on log force completion because they may be holding locks
+ * required to unpin items.
+ */
+ if (abort) {
+ spin_lock(&ctx->cil->xc_push_lock);
+ wake_up_all(&ctx->cil->xc_commit_wait);
+ spin_unlock(&ctx->cil->xc_push_lock);
+ }
xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
ctx->start_lsn, abort);
@@ -589,15 +598,7 @@
xfs_extent_busy_clear(mp, &ctx->busy_extents,
(mp->m_flags & XFS_MOUNT_DISCARD) && !abort);
- /*
- * If we are aborting the commit, wake up anyone waiting on the
- * committing list. If we don't, then a shutdown we can leave processes
- * waiting in xlog_cil_force_lsn() waiting on a sequence commit that
- * will never happen because we aborted it.
- */
spin_lock(&ctx->cil->xc_push_lock);
- if (abort)
- wake_up_all(&ctx->cil->xc_commit_wait);
list_del(&ctx->committing);
spin_unlock(&ctx->cil->xc_push_lock);
@@ -609,25 +610,40 @@
kmem_free(ctx);
}
+void
+xlog_cil_process_committed(
+ struct list_head *list)
+{
+ struct xfs_cil_ctx *ctx;
+
+ while ((ctx = list_first_entry_or_null(list,
+ struct xfs_cil_ctx, iclog_entry))) {
+ list_del(&ctx->iclog_entry);
+ xlog_cil_committed(ctx);
+ }
+}
+
/*
- * Push the Committed Item List to the log. If @push_seq flag is zero, then it
- * is a background flush and so we can chose to ignore it. Otherwise, if the
- * current sequence is the same as @push_seq we need to do a flush. If
- * @push_seq is less than the current sequence, then it has already been
+ * Push the Committed Item List to the log.
+ *
+ * If the current sequence is the same as xc_push_seq we need to do a flush. If
+ * xc_push_seq is less than the current sequence, then it has already been
* flushed and we don't need to do anything - the caller will wait for it to
* complete if necessary.
*
- * @push_seq is a value rather than a flag because that allows us to do an
- * unlocked check of the sequence number for a match. Hence we can allows log
- * forces to run racily and not issue pushes for the same sequence twice. If we
- * get a race between multiple pushes for the same sequence they will block on
- * the first one and then abort, hence avoiding needless pushes.
+ * xc_push_seq is checked unlocked against the sequence number for a match.
+ * Hence we can allow log forces to run racily and not issue pushes for the
+ * same sequence twice. If we get a race between multiple pushes for the same
+ * sequence they will block on the first one and then abort, hence avoiding
+ * needless pushes.
*/
-STATIC int
-xlog_cil_push(
- struct xlog *log)
+static void
+xlog_cil_push_work(
+ struct work_struct *work)
{
- struct xfs_cil *cil = log->l_cilp;
+ struct xfs_cil *cil =
+ container_of(work, struct xfs_cil, xc_push_work);
+ struct xlog *log = cil->xc_log;
struct xfs_log_vec *lv;
struct xfs_cil_ctx *ctx;
struct xfs_cil_ctx *new_ctx;
@@ -641,10 +657,7 @@
xfs_lsn_t commit_lsn;
xfs_lsn_t push_seq;
- if (!cil)
- return 0;
-
- new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
+ new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_NOFS);
new_ctx->ticket = xlog_cil_ticket_alloc(log);
down_write(&cil->xc_ctx_lock);
@@ -653,6 +666,17 @@
spin_lock(&cil->xc_push_lock);
push_seq = cil->xc_push_seq;
ASSERT(push_seq <= ctx->sequence);
+
+ /*
+ * As we are about to switch to a new, empty CIL context, we no longer
+ * need to throttle tasks on CIL space overruns. Wake any waiters that
+ * the hard push throttle may have caught so they can start committing
+ * to the new context. The ctx->xc_push_lock provides the serialisation
+ * necessary for safely using the lockless waitqueue_active() check in
+ * this context.
+ */
+ if (waitqueue_active(&cil->xc_push_wait))
+ wake_up_all(&cil->xc_push_wait);
/*
* Check if we've anything to push. If there is nothing, then we don't
@@ -666,7 +690,7 @@
}
- /* check for a previously pushed seqeunce */
+ /* check for a previously pushed sequence */
if (push_seq < cil->xc_ctx->sequence) {
spin_unlock(&cil->xc_push_lock);
goto out_skip;
@@ -724,7 +748,7 @@
/*
* initialise the new context and attach it to the CIL. Then attach
- * the current context to the CIL committing lsit so it can be found
+ * the current context to the CIL committing list so it can be found
* during log forces to extract the commit lsn of the sequence that
* needs to be forced.
*/
@@ -753,7 +777,7 @@
* that higher sequences will wait for us to write out a commit record
* before they do.
*
- * xfs_log_force_lsn requires us to mirror the new sequence into the cil
+ * xfs_log_force_seq requires us to mirror the new sequence into the cil
* structure atomically with the addition of this sequence to the
* committing list. This also ensures that we can do unlocked checks
* against the current sequence in log forces without risking
@@ -787,7 +811,7 @@
lvhdr.lv_iovecp = &lhdr;
lvhdr.lv_next = ctx->lv_chain;
- error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0);
+ error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0, true);
if (error)
goto out_abort_free_ticket;
@@ -825,17 +849,21 @@
}
spin_unlock(&cil->xc_push_lock);
- /* xfs_log_done always frees the ticket on error. */
- commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, false);
- if (commit_lsn == -1)
- goto out_abort;
-
- /* attach all the transactions w/ busy extents to iclog */
- ctx->log_cb.cb_func = xlog_cil_committed;
- ctx->log_cb.cb_arg = ctx;
- error = xfs_log_notify(commit_iclog, &ctx->log_cb);
+ error = xlog_commit_record(log, tic, &commit_iclog, &commit_lsn);
if (error)
+ goto out_abort_free_ticket;
+
+ xfs_log_ticket_ungrant(log, tic);
+
+ spin_lock(&commit_iclog->ic_callback_lock);
+ if (commit_iclog->ic_state == XLOG_STATE_IOERROR) {
+ spin_unlock(&commit_iclog->ic_callback_lock);
goto out_abort;
+ }
+ ASSERT_ALWAYS(commit_iclog->ic_state == XLOG_STATE_ACTIVE ||
+ commit_iclog->ic_state == XLOG_STATE_WANT_SYNC);
+ list_add_tail(&ctx->iclog_entry, &commit_iclog->ic_callbacks);
+ spin_unlock(&commit_iclog->ic_callback_lock);
/*
* now the checkpoint commit is complete and we've attached the
@@ -848,28 +876,20 @@
spin_unlock(&cil->xc_push_lock);
/* release the hounds! */
- return xfs_log_release_iclog(log->l_mp, commit_iclog);
+ xfs_log_release_iclog(commit_iclog);
+ return;
out_skip:
up_write(&cil->xc_ctx_lock);
xfs_log_ticket_put(new_ctx->ticket);
kmem_free(new_ctx);
- return 0;
+ return;
out_abort_free_ticket:
- xfs_log_ticket_put(tic);
+ xfs_log_ticket_ungrant(log, tic);
out_abort:
- xlog_cil_committed(ctx, XFS_LI_ABORTED);
- return -EIO;
-}
-
-static void
-xlog_cil_push_work(
- struct work_struct *work)
-{
- struct xfs_cil *cil = container_of(work, struct xfs_cil,
- xc_push_work);
- xlog_cil_push(cil->xc_log);
+ ASSERT(XLOG_FORCED_SHUTDOWN(log));
+ xlog_cil_committed(ctx);
}
/*
@@ -881,7 +901,7 @@
*/
static void
xlog_cil_push_background(
- struct xlog *log)
+ struct xlog *log) __releases(cil->xc_ctx_lock)
{
struct xfs_cil *cil = log->l_cilp;
@@ -892,17 +912,46 @@
ASSERT(!list_empty(&cil->xc_cil));
/*
- * don't do a background push if we haven't used up all the
+ * Don't do a background push if we haven't used up all the
* space available yet.
*/
- if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
+ if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) {
+ up_read(&cil->xc_ctx_lock);
return;
+ }
spin_lock(&cil->xc_push_lock);
if (cil->xc_push_seq < cil->xc_current_sequence) {
cil->xc_push_seq = cil->xc_current_sequence;
queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
}
+
+ /*
+ * Drop the context lock now, we can't hold that if we need to sleep
+ * because we are over the blocking threshold. The push_lock is still
+ * held, so blocking threshold sleep/wakeup is still correctly
+ * serialised here.
+ */
+ up_read(&cil->xc_ctx_lock);
+
+ /*
+ * If we are well over the space limit, throttle the work that is being
+ * done until the push work on this context has begun. Enforce the hard
+ * throttle on all transaction commits once it has been activated, even
+ * if the committing transactions have resulted in the space usage
+ * dipping back down under the hard limit.
+ *
+ * The ctx->xc_push_lock provides the serialisation necessary for safely
+ * using the lockless waitqueue_active() check in this context.
+ */
+ if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log) ||
+ waitqueue_active(&cil->xc_push_wait)) {
+ trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket);
+ ASSERT(cil->xc_ctx->space_used < log->l_logsize);
+ xlog_wait(&cil->xc_push_wait, &cil->xc_push_lock);
+ return;
+ }
+
spin_unlock(&cil->xc_push_lock);
}
@@ -971,15 +1020,14 @@
* allowed again.
*/
void
-xfs_log_commit_cil(
- struct xfs_mount *mp,
+xlog_cil_commit(
+ struct xlog *log,
struct xfs_trans *tp,
- xfs_lsn_t *commit_lsn,
+ xfs_csn_t *commit_seq,
bool regrant)
{
- struct xlog *log = mp->m_log;
struct xfs_cil *cil = log->l_cilp;
- xfs_lsn_t xc_commit_lsn;
+ struct xfs_log_item *lip, *next;
/*
* Do all necessary memory allocation before we lock the CIL.
@@ -993,17 +1041,16 @@
xlog_cil_insert_items(log, tp);
- xc_commit_lsn = cil->xc_ctx->sequence;
- if (commit_lsn)
- *commit_lsn = xc_commit_lsn;
-
- xfs_log_done(mp, tp->t_ticket, NULL, regrant);
+ if (regrant && !XLOG_FORCED_SHUTDOWN(log))
+ xfs_log_ticket_regrant(log, tp->t_ticket);
+ else
+ xfs_log_ticket_ungrant(log, tp->t_ticket);
tp->t_ticket = NULL;
xfs_trans_unreserve_and_mod_sb(tp);
/*
* Once all the items of the transaction have been copied to the CIL,
- * the items can be unlocked and freed.
+ * the items can be unlocked and possibly freed.
*
* This needs to be done before we drop the CIL context lock because we
* have to update state in the log items and unlock them before they go
@@ -1012,11 +1059,17 @@
* the log items. This affects (at least) processing of stale buffers,
* inodes and EFIs.
*/
- xfs_trans_free_items(tp, xc_commit_lsn, false);
+ trace_xfs_trans_commit_items(tp, _RET_IP_);
+ list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
+ xfs_trans_del_item(lip);
+ if (lip->li_ops->iop_committing)
+ lip->li_ops->iop_committing(lip, cil->xc_ctx->sequence);
+ }
+ if (commit_seq)
+ *commit_seq = cil->xc_ctx->sequence;
+ /* xlog_cil_push_background() releases cil->xc_ctx_lock */
xlog_cil_push_background(log);
-
- up_read(&cil->xc_ctx_lock);
}
/*
@@ -1030,9 +1083,9 @@
* iclog flush is necessary following this call.
*/
xfs_lsn_t
-xlog_cil_force_lsn(
+xlog_cil_force_seq(
struct xlog *log,
- xfs_lsn_t sequence)
+ xfs_csn_t sequence)
{
struct xfs_cil *cil = log->l_cilp;
struct xfs_cil_ctx *ctx;
@@ -1126,23 +1179,19 @@
*/
bool
xfs_log_item_in_current_chkpt(
- struct xfs_log_item *lip)
+ struct xfs_log_item *lip)
{
- struct xfs_cil_ctx *ctx;
+ struct xfs_cil *cil = lip->li_mountp->m_log->l_cilp;
if (list_empty(&lip->li_cil))
return false;
-
- ctx = lip->li_mountp->m_log->l_cilp->xc_ctx;
/*
* li_seq is written on the first commit of a log item to record the
* first checkpoint it is written to. Hence if it is different to the
* current sequence, we're in a new checkpoint.
*/
- if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0)
- return false;
- return true;
+ return lip->li_seq == READ_ONCE(cil->xc_current_sequence);
}
/*
@@ -1155,11 +1204,11 @@
struct xfs_cil *cil;
struct xfs_cil_ctx *ctx;
- cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
+ cil = kmem_zalloc(sizeof(*cil), KM_MAYFAIL);
if (!cil)
return -ENOMEM;
- ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL);
+ ctx = kmem_zalloc(sizeof(*ctx), KM_MAYFAIL);
if (!ctx) {
kmem_free(cil);
return -ENOMEM;
@@ -1170,6 +1219,7 @@
INIT_LIST_HEAD(&cil->xc_committing);
spin_lock_init(&cil->xc_cil_lock);
spin_lock_init(&cil->xc_push_lock);
+ init_waitqueue_head(&cil->xc_push_wait);
init_rwsem(&cil->xc_ctx_lock);
init_waitqueue_head(&cil->xc_commit_wait);
--
Gitblit v1.6.2