From 6778948f9de86c3cfaf36725a7c87dcff9ba247f Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 11 Dec 2023 08:20:59 +0000 Subject: [PATCH] kernel_5.10 no rt --- kernel/fs/xfs/xfs_log_cil.c | 234 +++++++++++++++++++++++++++++++++++----------------------- 1 files changed, 142 insertions(+), 92 deletions(-) diff --git a/kernel/fs/xfs/xfs_log_cil.c b/kernel/fs/xfs/xfs_log_cil.c index d3884e0..fbe160d 100644 --- a/kernel/fs/xfs/xfs_log_cil.c +++ b/kernel/fs/xfs/xfs_log_cil.c @@ -10,10 +10,7 @@ #include "xfs_shared.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_error.h" -#include "xfs_alloc.h" #include "xfs_extent_busy.h" -#include "xfs_discard.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_log.h" @@ -40,8 +37,7 @@ { struct xlog_ticket *tic; - tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0, - KM_SLEEP|KM_NOFS); + tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0); /* * set the current reservation to zero so we know to steal the basic @@ -182,14 +178,14 @@ /* * We free and allocate here as a realloc would copy - * unecessary data. We don't use kmem_zalloc() for the + * unnecessary data. We don't use kmem_zalloc() for the * same reason - we don't need to zero the data area in * the buffer, only the log vector header and the iovec * storage. */ kmem_free(lip->li_lv_shadow); - lv = kmem_alloc_large(buf_size, KM_SLEEP | KM_NOFS); + lv = kmem_alloc_large(buf_size, KM_NOFS); memset(lv, 0, xlog_cil_iovec_space(niovecs)); lv->lv_item = lip; @@ -243,10 +239,11 @@ * this CIL context and so we need to pin it. If we are replacing the * old_lv, then remove the space it accounts for and make it the shadow * buffer for later freeing. In both cases we are now switching to the - * shadow buffer, so update the the pointer to it appropriately. + * shadow buffer, so update the pointer to it appropriately. */ if (!old_lv) { - lv->lv_item->li_ops->iop_pin(lv->lv_item); + if (lv->lv_item->li_ops->iop_pin) + lv->lv_item->li_ops->iop_pin(lv->lv_item); lv->lv_item->li_lv_shadow = NULL; } else if (old_lv != lv) { ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED); @@ -576,11 +573,23 @@ */ static void xlog_cil_committed( - void *args, - int abort) + struct xfs_cil_ctx *ctx) { - struct xfs_cil_ctx *ctx = args; struct xfs_mount *mp = ctx->cil->xc_log->l_mp; + bool abort = XLOG_FORCED_SHUTDOWN(ctx->cil->xc_log); + + /* + * If the I/O failed, we're aborting the commit and already shutdown. + * Wake any commit waiters before aborting the log items so we don't + * block async log pushers on callbacks. Async log pushers explicitly do + * not wait on log force completion because they may be holding locks + * required to unpin items. + */ + if (abort) { + spin_lock(&ctx->cil->xc_push_lock); + wake_up_all(&ctx->cil->xc_commit_wait); + spin_unlock(&ctx->cil->xc_push_lock); + } xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, ctx->start_lsn, abort); @@ -589,15 +598,7 @@ xfs_extent_busy_clear(mp, &ctx->busy_extents, (mp->m_flags & XFS_MOUNT_DISCARD) && !abort); - /* - * If we are aborting the commit, wake up anyone waiting on the - * committing list. If we don't, then a shutdown we can leave processes - * waiting in xlog_cil_force_lsn() waiting on a sequence commit that - * will never happen because we aborted it. - */ spin_lock(&ctx->cil->xc_push_lock); - if (abort) - wake_up_all(&ctx->cil->xc_commit_wait); list_del(&ctx->committing); spin_unlock(&ctx->cil->xc_push_lock); @@ -609,25 +610,40 @@ kmem_free(ctx); } +void +xlog_cil_process_committed( + struct list_head *list) +{ + struct xfs_cil_ctx *ctx; + + while ((ctx = list_first_entry_or_null(list, + struct xfs_cil_ctx, iclog_entry))) { + list_del(&ctx->iclog_entry); + xlog_cil_committed(ctx); + } +} + /* - * Push the Committed Item List to the log. If @push_seq flag is zero, then it - * is a background flush and so we can chose to ignore it. Otherwise, if the - * current sequence is the same as @push_seq we need to do a flush. If - * @push_seq is less than the current sequence, then it has already been + * Push the Committed Item List to the log. + * + * If the current sequence is the same as xc_push_seq we need to do a flush. If + * xc_push_seq is less than the current sequence, then it has already been * flushed and we don't need to do anything - the caller will wait for it to * complete if necessary. * - * @push_seq is a value rather than a flag because that allows us to do an - * unlocked check of the sequence number for a match. Hence we can allows log - * forces to run racily and not issue pushes for the same sequence twice. If we - * get a race between multiple pushes for the same sequence they will block on - * the first one and then abort, hence avoiding needless pushes. + * xc_push_seq is checked unlocked against the sequence number for a match. + * Hence we can allow log forces to run racily and not issue pushes for the + * same sequence twice. If we get a race between multiple pushes for the same + * sequence they will block on the first one and then abort, hence avoiding + * needless pushes. */ -STATIC int -xlog_cil_push( - struct xlog *log) +static void +xlog_cil_push_work( + struct work_struct *work) { - struct xfs_cil *cil = log->l_cilp; + struct xfs_cil *cil = + container_of(work, struct xfs_cil, xc_push_work); + struct xlog *log = cil->xc_log; struct xfs_log_vec *lv; struct xfs_cil_ctx *ctx; struct xfs_cil_ctx *new_ctx; @@ -641,10 +657,7 @@ xfs_lsn_t commit_lsn; xfs_lsn_t push_seq; - if (!cil) - return 0; - - new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); + new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_NOFS); new_ctx->ticket = xlog_cil_ticket_alloc(log); down_write(&cil->xc_ctx_lock); @@ -653,6 +666,17 @@ spin_lock(&cil->xc_push_lock); push_seq = cil->xc_push_seq; ASSERT(push_seq <= ctx->sequence); + + /* + * As we are about to switch to a new, empty CIL context, we no longer + * need to throttle tasks on CIL space overruns. Wake any waiters that + * the hard push throttle may have caught so they can start committing + * to the new context. The ctx->xc_push_lock provides the serialisation + * necessary for safely using the lockless waitqueue_active() check in + * this context. + */ + if (waitqueue_active(&cil->xc_push_wait)) + wake_up_all(&cil->xc_push_wait); /* * Check if we've anything to push. If there is nothing, then we don't @@ -666,7 +690,7 @@ } - /* check for a previously pushed seqeunce */ + /* check for a previously pushed sequence */ if (push_seq < cil->xc_ctx->sequence) { spin_unlock(&cil->xc_push_lock); goto out_skip; @@ -724,7 +748,7 @@ /* * initialise the new context and attach it to the CIL. Then attach - * the current context to the CIL committing lsit so it can be found + * the current context to the CIL committing list so it can be found * during log forces to extract the commit lsn of the sequence that * needs to be forced. */ @@ -753,7 +777,7 @@ * that higher sequences will wait for us to write out a commit record * before they do. * - * xfs_log_force_lsn requires us to mirror the new sequence into the cil + * xfs_log_force_seq requires us to mirror the new sequence into the cil * structure atomically with the addition of this sequence to the * committing list. This also ensures that we can do unlocked checks * against the current sequence in log forces without risking @@ -787,7 +811,7 @@ lvhdr.lv_iovecp = &lhdr; lvhdr.lv_next = ctx->lv_chain; - error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0); + error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0, true); if (error) goto out_abort_free_ticket; @@ -825,17 +849,21 @@ } spin_unlock(&cil->xc_push_lock); - /* xfs_log_done always frees the ticket on error. */ - commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, false); - if (commit_lsn == -1) - goto out_abort; - - /* attach all the transactions w/ busy extents to iclog */ - ctx->log_cb.cb_func = xlog_cil_committed; - ctx->log_cb.cb_arg = ctx; - error = xfs_log_notify(commit_iclog, &ctx->log_cb); + error = xlog_commit_record(log, tic, &commit_iclog, &commit_lsn); if (error) + goto out_abort_free_ticket; + + xfs_log_ticket_ungrant(log, tic); + + spin_lock(&commit_iclog->ic_callback_lock); + if (commit_iclog->ic_state == XLOG_STATE_IOERROR) { + spin_unlock(&commit_iclog->ic_callback_lock); goto out_abort; + } + ASSERT_ALWAYS(commit_iclog->ic_state == XLOG_STATE_ACTIVE || + commit_iclog->ic_state == XLOG_STATE_WANT_SYNC); + list_add_tail(&ctx->iclog_entry, &commit_iclog->ic_callbacks); + spin_unlock(&commit_iclog->ic_callback_lock); /* * now the checkpoint commit is complete and we've attached the @@ -848,28 +876,20 @@ spin_unlock(&cil->xc_push_lock); /* release the hounds! */ - return xfs_log_release_iclog(log->l_mp, commit_iclog); + xfs_log_release_iclog(commit_iclog); + return; out_skip: up_write(&cil->xc_ctx_lock); xfs_log_ticket_put(new_ctx->ticket); kmem_free(new_ctx); - return 0; + return; out_abort_free_ticket: - xfs_log_ticket_put(tic); + xfs_log_ticket_ungrant(log, tic); out_abort: - xlog_cil_committed(ctx, XFS_LI_ABORTED); - return -EIO; -} - -static void -xlog_cil_push_work( - struct work_struct *work) -{ - struct xfs_cil *cil = container_of(work, struct xfs_cil, - xc_push_work); - xlog_cil_push(cil->xc_log); + ASSERT(XLOG_FORCED_SHUTDOWN(log)); + xlog_cil_committed(ctx); } /* @@ -881,7 +901,7 @@ */ static void xlog_cil_push_background( - struct xlog *log) + struct xlog *log) __releases(cil->xc_ctx_lock) { struct xfs_cil *cil = log->l_cilp; @@ -892,17 +912,46 @@ ASSERT(!list_empty(&cil->xc_cil)); /* - * don't do a background push if we haven't used up all the + * Don't do a background push if we haven't used up all the * space available yet. */ - if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) + if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) { + up_read(&cil->xc_ctx_lock); return; + } spin_lock(&cil->xc_push_lock); if (cil->xc_push_seq < cil->xc_current_sequence) { cil->xc_push_seq = cil->xc_current_sequence; queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work); } + + /* + * Drop the context lock now, we can't hold that if we need to sleep + * because we are over the blocking threshold. The push_lock is still + * held, so blocking threshold sleep/wakeup is still correctly + * serialised here. + */ + up_read(&cil->xc_ctx_lock); + + /* + * If we are well over the space limit, throttle the work that is being + * done until the push work on this context has begun. Enforce the hard + * throttle on all transaction commits once it has been activated, even + * if the committing transactions have resulted in the space usage + * dipping back down under the hard limit. + * + * The ctx->xc_push_lock provides the serialisation necessary for safely + * using the lockless waitqueue_active() check in this context. + */ + if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log) || + waitqueue_active(&cil->xc_push_wait)) { + trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket); + ASSERT(cil->xc_ctx->space_used < log->l_logsize); + xlog_wait(&cil->xc_push_wait, &cil->xc_push_lock); + return; + } + spin_unlock(&cil->xc_push_lock); } @@ -971,15 +1020,14 @@ * allowed again. */ void -xfs_log_commit_cil( - struct xfs_mount *mp, +xlog_cil_commit( + struct xlog *log, struct xfs_trans *tp, - xfs_lsn_t *commit_lsn, + xfs_csn_t *commit_seq, bool regrant) { - struct xlog *log = mp->m_log; struct xfs_cil *cil = log->l_cilp; - xfs_lsn_t xc_commit_lsn; + struct xfs_log_item *lip, *next; /* * Do all necessary memory allocation before we lock the CIL. @@ -993,17 +1041,16 @@ xlog_cil_insert_items(log, tp); - xc_commit_lsn = cil->xc_ctx->sequence; - if (commit_lsn) - *commit_lsn = xc_commit_lsn; - - xfs_log_done(mp, tp->t_ticket, NULL, regrant); + if (regrant && !XLOG_FORCED_SHUTDOWN(log)) + xfs_log_ticket_regrant(log, tp->t_ticket); + else + xfs_log_ticket_ungrant(log, tp->t_ticket); tp->t_ticket = NULL; xfs_trans_unreserve_and_mod_sb(tp); /* * Once all the items of the transaction have been copied to the CIL, - * the items can be unlocked and freed. + * the items can be unlocked and possibly freed. * * This needs to be done before we drop the CIL context lock because we * have to update state in the log items and unlock them before they go @@ -1012,11 +1059,17 @@ * the log items. This affects (at least) processing of stale buffers, * inodes and EFIs. */ - xfs_trans_free_items(tp, xc_commit_lsn, false); + trace_xfs_trans_commit_items(tp, _RET_IP_); + list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { + xfs_trans_del_item(lip); + if (lip->li_ops->iop_committing) + lip->li_ops->iop_committing(lip, cil->xc_ctx->sequence); + } + if (commit_seq) + *commit_seq = cil->xc_ctx->sequence; + /* xlog_cil_push_background() releases cil->xc_ctx_lock */ xlog_cil_push_background(log); - - up_read(&cil->xc_ctx_lock); } /* @@ -1030,9 +1083,9 @@ * iclog flush is necessary following this call. */ xfs_lsn_t -xlog_cil_force_lsn( +xlog_cil_force_seq( struct xlog *log, - xfs_lsn_t sequence) + xfs_csn_t sequence) { struct xfs_cil *cil = log->l_cilp; struct xfs_cil_ctx *ctx; @@ -1126,23 +1179,19 @@ */ bool xfs_log_item_in_current_chkpt( - struct xfs_log_item *lip) + struct xfs_log_item *lip) { - struct xfs_cil_ctx *ctx; + struct xfs_cil *cil = lip->li_mountp->m_log->l_cilp; if (list_empty(&lip->li_cil)) return false; - - ctx = lip->li_mountp->m_log->l_cilp->xc_ctx; /* * li_seq is written on the first commit of a log item to record the * first checkpoint it is written to. Hence if it is different to the * current sequence, we're in a new checkpoint. */ - if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0) - return false; - return true; + return lip->li_seq == READ_ONCE(cil->xc_current_sequence); } /* @@ -1155,11 +1204,11 @@ struct xfs_cil *cil; struct xfs_cil_ctx *ctx; - cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL); + cil = kmem_zalloc(sizeof(*cil), KM_MAYFAIL); if (!cil) return -ENOMEM; - ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL); + ctx = kmem_zalloc(sizeof(*ctx), KM_MAYFAIL); if (!ctx) { kmem_free(cil); return -ENOMEM; @@ -1170,6 +1219,7 @@ INIT_LIST_HEAD(&cil->xc_committing); spin_lock_init(&cil->xc_cil_lock); spin_lock_init(&cil->xc_push_lock); + init_waitqueue_head(&cil->xc_push_wait); init_rwsem(&cil->xc_ctx_lock); init_waitqueue_head(&cil->xc_commit_wait); -- Gitblit v1.6.2