From e636c8d336489bf3eed5878299e6cc045bbad077 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Tue, 20 Feb 2024 01:17:29 +0000
Subject: [PATCH] debug lk
---
kernel/fs/jbd2/transaction.c | 632 ++++++++++++++++++++++++++++++++++-----------------------
1 files changed, 377 insertions(+), 255 deletions(-)
diff --git a/kernel/fs/jbd2/transaction.c b/kernel/fs/jbd2/transaction.c
index 8c30559..1baf2d6 100644
--- a/kernel/fs/jbd2/transaction.c
+++ b/kernel/fs/jbd2/transaction.c
@@ -63,9 +63,31 @@
}
/*
+ * Base amount of descriptor blocks we reserve for each transaction.
+ */
+static int jbd2_descriptor_blocks_per_trans(journal_t *journal)
+{
+ int tag_space = journal->j_blocksize - sizeof(journal_header_t);
+ int tags_per_block;
+
+ /* Subtract UUID */
+ tag_space -= 16;
+ if (jbd2_journal_has_csum_v2or3(journal))
+ tag_space -= sizeof(struct jbd2_journal_block_tail);
+ /* Commit code leaves a slack space of 16 bytes at the end of block */
+ tags_per_block = (tag_space - 16) / journal_tag_bytes(journal);
+ /*
+ * Revoke descriptors are accounted separately so we need to reserve
+ * space for commit block and normal transaction descriptor blocks.
+ */
+ return 1 + DIV_ROUND_UP(journal->j_max_transaction_buffers,
+ tags_per_block);
+}
+
+/*
* jbd2_get_transaction: obtain a new transaction_t object.
*
- * Simply allocate and initialise a new transaction. Create it in
+ * Simply initialise a new transaction. Initialize it in
* RUNNING state and add it to the current journal (which should not
* have an existing running transaction: we only make a new transaction
* once we have started to commit the old one).
@@ -77,8 +99,8 @@
*
*/
-static transaction_t *
-jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
+static void jbd2_get_transaction(journal_t *journal,
+ transaction_t *transaction)
{
transaction->t_journal = journal;
transaction->t_state = T_RUNNING;
@@ -88,7 +110,9 @@
spin_lock_init(&transaction->t_handle_lock);
atomic_set(&transaction->t_updates, 0);
atomic_set(&transaction->t_outstanding_credits,
+ jbd2_descriptor_blocks_per_trans(journal) +
atomic_read(&journal->j_reserved_credits));
+ atomic_set(&transaction->t_outstanding_revokes, 0);
atomic_set(&transaction->t_handle_count, 0);
INIT_LIST_HEAD(&transaction->t_inode_list);
INIT_LIST_HEAD(&transaction->t_private_list);
@@ -102,8 +126,6 @@
transaction->t_max_wait = 0;
transaction->t_start = jiffies;
transaction->t_requested = 0;
-
- return transaction;
}
/*
@@ -140,9 +162,9 @@
}
/*
- * Wait until running transaction passes T_LOCKED state. Also starts the commit
- * if needed. The function expects running transaction to exist and releases
- * j_state_lock.
+ * Wait until running transaction passes to T_FLUSH state and new transaction
+ * can thus be started. Also starts the commit if needed. The function expects
+ * running transaction to exist and releases j_state_lock.
*/
static void wait_transaction_locked(journal_t *journal)
__releases(journal->j_state_lock)
@@ -151,13 +173,41 @@
int need_to_start;
tid_t tid = journal->j_running_transaction->t_tid;
- prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
+ prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait,
TASK_UNINTERRUPTIBLE);
need_to_start = !tid_geq(journal->j_commit_request, tid);
read_unlock(&journal->j_state_lock);
if (need_to_start)
jbd2_log_start_commit(journal, tid);
jbd2_might_wait_for_commit(journal);
+ schedule();
+ finish_wait(&journal->j_wait_transaction_locked, &wait);
+}
+
+/*
+ * Wait until running transaction transitions from T_SWITCH to T_FLUSH
+ * state and new transaction can thus be started. The function releases
+ * j_state_lock.
+ */
+static void wait_transaction_switching(journal_t *journal)
+ __releases(journal->j_state_lock)
+{
+ DEFINE_WAIT(wait);
+
+ if (WARN_ON(!journal->j_running_transaction ||
+ journal->j_running_transaction->t_state != T_SWITCH)) {
+ read_unlock(&journal->j_state_lock);
+ return;
+ }
+ prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait,
+ TASK_UNINTERRUPTIBLE);
+ read_unlock(&journal->j_state_lock);
+ /*
+ * We don't call jbd2_might_wait_for_commit() here as there's no
+ * waiting for outstanding handles happening anymore in T_SWITCH state
+ * and handling of reserved handles actually relies on that for
+ * correctness.
+ */
schedule();
finish_wait(&journal->j_wait_transaction_locked, &wait);
}
@@ -185,7 +235,8 @@
* If the current transaction is locked down for commit, wait
* for the lock to be released.
*/
- if (t->t_state == T_LOCKED) {
+ if (t->t_state != T_RUNNING) {
+ WARN_ON_ONCE(t->t_state >= T_FLUSH);
wait_transaction_locked(journal);
return 1;
}
@@ -233,12 +284,13 @@
* *before* starting to dirty potentially checkpointed buffers
* in the new transaction.
*/
- if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) {
+ if (jbd2_log_space_left(journal) < journal->j_max_transaction_buffers) {
atomic_sub(total, &t->t_outstanding_credits);
read_unlock(&journal->j_state_lock);
jbd2_might_wait_for_commit(journal);
write_lock(&journal->j_state_lock);
- if (jbd2_log_space_left(journal) < jbd2_space_needed(journal))
+ if (jbd2_log_space_left(journal) <
+ journal->j_max_transaction_buffers)
__jbd2_log_wait_for_space(journal);
write_unlock(&journal->j_state_lock);
return 1;
@@ -274,12 +326,12 @@
gfp_t gfp_mask)
{
transaction_t *transaction, *new_transaction = NULL;
- int blocks = handle->h_buffer_credits;
+ int blocks = handle->h_total_credits;
int rsv_blocks = 0;
unsigned long ts = jiffies;
if (handle->h_rsv_handle)
- rsv_blocks = handle->h_rsv_handle->h_buffer_credits;
+ rsv_blocks = handle->h_rsv_handle->h_total_credits;
/*
* Limit the number of reserved credits to 1/2 of maximum transaction
@@ -297,7 +349,12 @@
}
alloc_transaction:
- if (!journal->j_running_transaction) {
+ /*
+ * This check is racy but it is just an optimization of allocating new
+ * transaction early if there are high chances we'll need it. If we
+ * guess wrong, we'll retry or free unused transaction.
+ */
+ if (!data_race(journal->j_running_transaction)) {
/*
* If __GFP_FS is not present, then we may be being called from
* inside the fs writeback layer, so we MUST NOT fail.
@@ -362,8 +419,14 @@
/*
* We have handle reserved so we are allowed to join T_LOCKED
* transaction and we don't have to check for transaction size
- * and journal space.
+ * and journal space. But we still have to wait while running
+ * transaction is being switched to a committing one as it
+ * won't wait for any handles anymore.
*/
+ if (transaction->t_state == T_SWITCH) {
+ wait_transaction_switching(journal);
+ goto repeat;
+ }
sub_reserved_credits(journal, blocks);
handle->h_reserved = 0;
}
@@ -374,6 +437,7 @@
update_t_max_wait(transaction, ts);
handle->h_transaction = transaction;
handle->h_requested_credits = blocks;
+ handle->h_revoke_credits_requested = handle->h_revoke_credits;
handle->h_start_jiffies = jiffies;
atomic_inc(&transaction->t_updates);
atomic_inc(&transaction->t_handle_count);
@@ -400,15 +464,15 @@
handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
if (!handle)
return NULL;
- handle->h_buffer_credits = nblocks;
+ handle->h_total_credits = nblocks;
handle->h_ref = 1;
return handle;
}
handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
- gfp_t gfp_mask, unsigned int type,
- unsigned int line_no)
+ int revoke_records, gfp_t gfp_mask,
+ unsigned int type, unsigned int line_no)
{
handle_t *handle = journal_current_handle();
int err;
@@ -422,6 +486,8 @@
return handle;
}
+ nblocks += DIV_ROUND_UP(revoke_records,
+ journal->j_revoke_records_per_block);
handle = new_handle(nblocks);
if (!handle)
return ERR_PTR(-ENOMEM);
@@ -437,6 +503,7 @@
rsv_handle->h_journal = journal;
handle->h_rsv_handle = rsv_handle;
}
+ handle->h_revoke_credits = revoke_records;
err = start_this_handle(journal, handle, gfp_mask);
if (err < 0) {
@@ -457,7 +524,7 @@
/**
- * handle_t *jbd2_journal_start() - Obtain a new handle.
+ * jbd2_journal_start() - Obtain a new handle.
* @journal: Journal to start transaction on.
* @nblocks: number of block buffer we might modify
*
@@ -465,7 +532,7 @@
* modified buffers in the log. We block until the log can guarantee
* that much space. Additionally, if rsv_blocks > 0, we also create another
* handle with rsv_blocks reserved blocks in the journal. This handle is
- * is stored in h_rsv_handle. It is not attached to any particular transaction
+ * stored in h_rsv_handle. It is not attached to any particular transaction
* and thus doesn't block transaction commit. If the caller uses this reserved
* handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop()
* on the parent handle will dispose the reserved one. Reserved handle has to
@@ -477,22 +544,34 @@
*/
handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
{
- return jbd2__journal_start(journal, nblocks, 0, GFP_NOFS, 0, 0);
+ return jbd2__journal_start(journal, nblocks, 0, 0, GFP_NOFS, 0, 0);
}
EXPORT_SYMBOL(jbd2_journal_start);
+
+static void __jbd2_journal_unreserve_handle(handle_t *handle, transaction_t *t)
+{
+ journal_t *journal = handle->h_journal;
+
+ WARN_ON(!handle->h_reserved);
+ sub_reserved_credits(journal, handle->h_total_credits);
+ if (t)
+ atomic_sub(handle->h_total_credits, &t->t_outstanding_credits);
+}
void jbd2_journal_free_reserved(handle_t *handle)
{
journal_t *journal = handle->h_journal;
- WARN_ON(!handle->h_reserved);
- sub_reserved_credits(journal, handle->h_buffer_credits);
+ /* Get j_state_lock to pin running transaction if it exists */
+ read_lock(&journal->j_state_lock);
+ __jbd2_journal_unreserve_handle(handle, journal->j_running_transaction);
+ read_unlock(&journal->j_state_lock);
jbd2_free_handle(handle);
}
EXPORT_SYMBOL(jbd2_journal_free_reserved);
/**
- * int jbd2_journal_start_reserved() - start reserved handle
+ * jbd2_journal_start_reserved() - start reserved handle
* @handle: handle to start
* @type: for handle statistics
* @line_no: for handle statistics
@@ -538,14 +617,18 @@
}
handle->h_type = type;
handle->h_line_no = line_no;
+ trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
+ handle->h_transaction->t_tid, type,
+ line_no, handle->h_total_credits);
return 0;
}
EXPORT_SYMBOL(jbd2_journal_start_reserved);
/**
- * int jbd2_journal_extend() - extend buffer credits.
+ * jbd2_journal_extend() - extend buffer credits.
* @handle: handle to 'extend'
* @nblocks: nr blocks to try to extend by.
+ * @revoke_records: number of revoke records to try to extend by.
*
* Some transactions, such as large extends and truncates, can be done
* atomically all at once or in several stages. The operation requests
@@ -562,7 +645,7 @@
* return code < 0 implies an error
* return code > 0 implies normal transaction-full status.
*/
-int jbd2_journal_extend(handle_t *handle, int nblocks)
+int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal;
@@ -584,6 +667,12 @@
goto error_out;
}
+ nblocks += DIV_ROUND_UP(
+ handle->h_revoke_credits_requested + revoke_records,
+ journal->j_revoke_records_per_block) -
+ DIV_ROUND_UP(
+ handle->h_revoke_credits_requested,
+ journal->j_revoke_records_per_block);
spin_lock(&transaction->t_handle_lock);
wanted = atomic_add_return(nblocks,
&transaction->t_outstanding_credits);
@@ -595,22 +684,16 @@
goto unlock;
}
- if (wanted + (wanted >> JBD2_CONTROL_BLOCKS_SHIFT) >
- jbd2_log_space_left(journal)) {
- jbd_debug(3, "denied handle %p %d blocks: "
- "insufficient log space\n", handle, nblocks);
- atomic_sub(nblocks, &transaction->t_outstanding_credits);
- goto unlock;
- }
-
trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
transaction->t_tid,
handle->h_type, handle->h_line_no,
- handle->h_buffer_credits,
+ handle->h_total_credits,
nblocks);
- handle->h_buffer_credits += nblocks;
+ handle->h_total_credits += nblocks;
handle->h_requested_credits += nblocks;
+ handle->h_revoke_credits += revoke_records;
+ handle->h_revoke_credits_requested += revoke_records;
result = 0;
jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
@@ -621,11 +704,56 @@
return result;
}
+static void stop_this_handle(handle_t *handle)
+{
+ transaction_t *transaction = handle->h_transaction;
+ journal_t *journal = transaction->t_journal;
+ int revokes;
+
+ J_ASSERT(journal_current_handle() == handle);
+ J_ASSERT(atomic_read(&transaction->t_updates) > 0);
+ current->journal_info = NULL;
+ /*
+ * Subtract necessary revoke descriptor blocks from handle credits. We
+ * take care to account only for revoke descriptor blocks the
+ * transaction will really need as large sequences of transactions with
+ * small numbers of revokes are relatively common.
+ */
+ revokes = handle->h_revoke_credits_requested - handle->h_revoke_credits;
+ if (revokes) {
+ int t_revokes, revoke_descriptors;
+ int rr_per_blk = journal->j_revoke_records_per_block;
+
+ WARN_ON_ONCE(DIV_ROUND_UP(revokes, rr_per_blk)
+ > handle->h_total_credits);
+ t_revokes = atomic_add_return(revokes,
+ &transaction->t_outstanding_revokes);
+ revoke_descriptors =
+ DIV_ROUND_UP(t_revokes, rr_per_blk) -
+ DIV_ROUND_UP(t_revokes - revokes, rr_per_blk);
+ handle->h_total_credits -= revoke_descriptors;
+ }
+ atomic_sub(handle->h_total_credits,
+ &transaction->t_outstanding_credits);
+ if (handle->h_rsv_handle)
+ __jbd2_journal_unreserve_handle(handle->h_rsv_handle,
+ transaction);
+ if (atomic_dec_and_test(&transaction->t_updates))
+ wake_up(&journal->j_wait_updates);
+
+ rwsem_release(&journal->j_trans_commit_map, _THIS_IP_);
+ /*
+ * Scope of the GFP_NOFS context is over here and so we can restore the
+ * original alloc context.
+ */
+ memalloc_nofs_restore(handle->saved_alloc_context);
+}
/**
- * int jbd2_journal_restart() - restart a handle .
+ * jbd2__journal_restart() - restart a handle .
* @handle: handle to restart
* @nblocks: nr credits requested
+ * @revoke_records: number of revoke record credits requested
* @gfp_mask: memory allocation flags (for start_this_handle)
*
* Restart a handle for a multi-transaction filesystem
@@ -638,56 +766,48 @@
* credits. We preserve reserved handle if there's any attached to the
* passed in handle.
*/
-int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
+int jbd2__journal_restart(handle_t *handle, int nblocks, int revoke_records,
+ gfp_t gfp_mask)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal;
tid_t tid;
- int need_to_start, ret;
+ int need_to_start;
+ int ret;
/* If we've had an abort of any type, don't even think about
* actually doing the restart! */
if (is_handle_aborted(handle))
return 0;
journal = transaction->t_journal;
+ tid = transaction->t_tid;
/*
* First unlink the handle from its current transaction, and start the
* commit on that.
*/
- J_ASSERT(atomic_read(&transaction->t_updates) > 0);
- J_ASSERT(journal_current_handle() == handle);
-
- read_lock(&journal->j_state_lock);
- spin_lock(&transaction->t_handle_lock);
- atomic_sub(handle->h_buffer_credits,
- &transaction->t_outstanding_credits);
- if (handle->h_rsv_handle) {
- sub_reserved_credits(journal,
- handle->h_rsv_handle->h_buffer_credits);
- }
- if (atomic_dec_and_test(&transaction->t_updates))
- wake_up(&journal->j_wait_updates);
- tid = transaction->t_tid;
- spin_unlock(&transaction->t_handle_lock);
- handle->h_transaction = NULL;
- current->journal_info = NULL;
-
jbd_debug(2, "restarting handle %p\n", handle);
+ stop_this_handle(handle);
+ handle->h_transaction = NULL;
+
+ /*
+ * TODO: If we use READ_ONCE / WRITE_ONCE for j_commit_request we can
+ * get rid of pointless j_state_lock traffic like this.
+ */
+ read_lock(&journal->j_state_lock);
need_to_start = !tid_geq(journal->j_commit_request, tid);
read_unlock(&journal->j_state_lock);
if (need_to_start)
jbd2_log_start_commit(journal, tid);
-
- rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_);
- handle->h_buffer_credits = nblocks;
- /*
- * Restore the original nofs context because the journal restart
- * is basically the same thing as journal stop and start.
- * start_this_handle will start a new nofs context.
- */
- memalloc_nofs_restore(handle->saved_alloc_context);
+ handle->h_total_credits = nblocks +
+ DIV_ROUND_UP(revoke_records,
+ journal->j_revoke_records_per_block);
+ handle->h_revoke_credits = revoke_records;
ret = start_this_handle(journal, handle, gfp_mask);
+ trace_jbd2_handle_restart(journal->j_fs_dev->bd_dev,
+ ret ? 0 : handle->h_transaction->t_tid,
+ handle->h_type, handle->h_line_no,
+ handle->h_total_credits);
return ret;
}
EXPORT_SYMBOL(jbd2__journal_restart);
@@ -695,12 +815,12 @@
int jbd2_journal_restart(handle_t *handle, int nblocks)
{
- return jbd2__journal_restart(handle, nblocks, GFP_NOFS);
+ return jbd2__journal_restart(handle, nblocks, 0, GFP_NOFS);
}
EXPORT_SYMBOL(jbd2_journal_restart);
/**
- * void jbd2_journal_lock_updates () - establish a transaction barrier.
+ * jbd2_journal_lock_updates () - establish a transaction barrier.
* @journal: Journal to establish a barrier on.
*
* This locks out any further updates from being started, and blocks
@@ -759,7 +879,7 @@
}
/**
- * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier
+ * jbd2_journal_unlock_updates () - release barrier
* @journal: Journal to release the barrier on.
*
* Release a transaction barrier obtained with jbd2_journal_lock_updates().
@@ -774,7 +894,7 @@
write_lock(&journal->j_state_lock);
--journal->j_barrier_count;
write_unlock(&journal->j_state_lock);
- wake_up(&journal->j_wait_transaction_locked);
+ wake_up_all(&journal->j_wait_transaction_locked);
}
static void warn_dirty_buffer(struct buffer_head *bh)
@@ -843,7 +963,7 @@
start_lock = jiffies;
lock_buffer(bh);
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
/* If it takes too long to lock the buffer, trace it */
time_lock = jbd2_time_diff(start_lock, jiffies);
@@ -864,36 +984,28 @@
* ie. locked but not dirty) or tune2fs (which may actually have
* the buffer dirtied, ugh.) */
- if (buffer_dirty(bh)) {
+ if (buffer_dirty(bh) && jh->b_transaction) {
+ warn_dirty_buffer(bh);
/*
- * First question: is this buffer already part of the current
- * transaction or the existing committing transaction?
- */
- if (jh->b_transaction) {
- J_ASSERT_JH(jh,
- jh->b_transaction == transaction ||
- jh->b_transaction ==
- journal->j_committing_transaction);
- if (jh->b_next_transaction)
- J_ASSERT_JH(jh, jh->b_next_transaction ==
- transaction);
- warn_dirty_buffer(bh);
- }
- /*
- * In any case we need to clean the dirty flag and we must
- * do it under the buffer lock to be sure we don't race
- * with running write-out.
+ * We need to clean the dirty flag and we must do it under the
+ * buffer lock to be sure we don't race with running write-out.
*/
JBUFFER_TRACE(jh, "Journalling dirty buffer");
clear_buffer_dirty(bh);
+ /*
+ * The buffer is going to be added to BJ_Reserved list now and
+ * nothing guarantees jbd2_journal_dirty_metadata() will be
+ * ever called for it. So we need to set jbddirty bit here to
+ * make sure the buffer is dirtied and written out when the
+ * journaling machinery is done with it.
+ */
set_buffer_jbddirty(bh);
}
- unlock_buffer(bh);
-
error = -EROFS;
if (is_handle_aborted(handle)) {
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
+ unlock_buffer(bh);
goto out;
}
error = 0;
@@ -903,14 +1015,16 @@
* b_next_transaction points to it
*/
if (jh->b_transaction == transaction ||
- jh->b_next_transaction == transaction)
+ jh->b_next_transaction == transaction) {
+ unlock_buffer(bh);
goto done;
+ }
/*
* this is the first time this transaction is touching this buffer,
* reset the modified flag
*/
- jh->b_modified = 0;
+ jh->b_modified = 0;
/*
* If the buffer is not journaled right now, we need to make sure it
@@ -928,10 +1042,24 @@
*/
smp_wmb();
spin_lock(&journal->j_list_lock);
+ if (test_clear_buffer_dirty(bh)) {
+ /*
+ * Execute buffer dirty clearing and jh->b_transaction
+ * assignment under journal->j_list_lock locked to
+ * prevent bh being removed from checkpoint list if
+ * the buffer is in an intermediate state (not dirty
+ * and jh->b_transaction is NULL).
+ */
+ JBUFFER_TRACE(jh, "Journalling dirty buffer");
+ set_buffer_jbddirty(bh);
+ }
__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
spin_unlock(&journal->j_list_lock);
+ unlock_buffer(bh);
goto done;
}
+ unlock_buffer(bh);
+
/*
* If there is already a copy-out version of this buffer, then we don't
* need to make another one
@@ -957,7 +1085,7 @@
*/
if (buffer_shadow(bh)) {
JBUFFER_TRACE(jh, "on shadow: sleep");
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE);
goto repeat;
}
@@ -978,7 +1106,7 @@
JBUFFER_TRACE(jh, "generate frozen data");
if (!frozen_buffer) {
JBUFFER_TRACE(jh, "allocate memory for buffer");
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size,
GFP_NOFS | __GFP_NOFAIL);
goto repeat;
@@ -997,7 +1125,7 @@
jh->b_next_transaction = transaction;
done:
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
/*
* If we are about to journal a buffer, then any revoke pending on it is
@@ -1067,7 +1195,8 @@
}
/**
- * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
+ * jbd2_journal_get_write_access() - notify intent to modify a buffer
+ * for metadata (not data) update.
* @handle: transaction to add buffer modifications to
* @bh: bh to be used for metadata writes
*
@@ -1111,7 +1240,7 @@
* unlocked buffer beforehand. */
/**
- * int jbd2_journal_get_create_access () - notify intent to use newly created bh
+ * jbd2_journal_get_create_access () - notify intent to use newly created bh
* @handle: transaction to new buffer to
* @bh: new buffer.
*
@@ -1139,7 +1268,7 @@
* that case: the transaction must have deleted the buffer for it to be
* reused here.
*/
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
jh->b_transaction == NULL ||
(jh->b_transaction == journal->j_committing_transaction &&
@@ -1174,7 +1303,7 @@
jh->b_next_transaction = transaction;
spin_unlock(&journal->j_list_lock);
}
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
/*
* akpm: I added this. ext3_alloc_branch can pick up new indirect
@@ -1191,7 +1320,7 @@
}
/**
- * int jbd2_journal_get_undo_access() - Notify intent to modify metadata with
+ * jbd2_journal_get_undo_access() - Notify intent to modify metadata with
* non-rewindable consequences
* @handle: transaction
* @bh: buffer to undo
@@ -1245,13 +1374,13 @@
committed_data = jbd2_alloc(jh2bh(jh)->b_size,
GFP_NOFS|__GFP_NOFAIL);
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
if (!jh->b_committed_data) {
/* Copy out the current buffer contents into the
* preserved, committed copy. */
JBUFFER_TRACE(jh, "generate b_committed data");
if (!committed_data) {
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
goto repeat;
}
@@ -1259,7 +1388,7 @@
committed_data = NULL;
memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
}
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
out:
jbd2_journal_put_journal_head(jh);
if (unlikely(committed_data))
@@ -1268,7 +1397,7 @@
}
/**
- * void jbd2_journal_set_triggers() - Add triggers for commit writeout
+ * jbd2_journal_set_triggers() - Add triggers for commit writeout
* @bh: buffer to trigger on
* @type: struct jbd2_buffer_trigger_type containing the trigger(s).
*
@@ -1310,7 +1439,7 @@
}
/**
- * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
+ * jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
* @handle: transaction to add buffer to.
* @bh: buffer to mark
*
@@ -1339,8 +1468,6 @@
struct journal_head *jh;
int ret = 0;
- if (is_handle_aborted(handle))
- return -EROFS;
if (!buffer_jbd(bh))
return -EUCLEAN;
@@ -1358,18 +1485,18 @@
* crucial to catch bugs so let's do a reliable check until the
* lockless handling is fully proven.
*/
- if (jh->b_transaction != transaction &&
- jh->b_next_transaction != transaction) {
- jbd_lock_bh_state(bh);
+ if (data_race(jh->b_transaction != transaction &&
+ jh->b_next_transaction != transaction)) {
+ spin_lock(&jh->b_state_lock);
J_ASSERT_JH(jh, jh->b_transaction == transaction ||
jh->b_next_transaction == transaction);
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
}
if (jh->b_modified == 1) {
/* If it's in our transaction it must be in BJ_Metadata list. */
- if (jh->b_transaction == transaction &&
- jh->b_jlist != BJ_Metadata) {
- jbd_lock_bh_state(bh);
+ if (data_race(jh->b_transaction == transaction &&
+ jh->b_jlist != BJ_Metadata)) {
+ spin_lock(&jh->b_state_lock);
if (jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata)
pr_err("JBD2: assertion failure: h_type=%u "
@@ -1379,13 +1506,25 @@
jh->b_jlist);
J_ASSERT_JH(jh, jh->b_transaction != transaction ||
jh->b_jlist == BJ_Metadata);
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
}
goto out;
}
journal = transaction->t_journal;
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
+
+ if (is_handle_aborted(handle)) {
+ /*
+ * Check journal aborting with @jh->b_state_lock locked,
+ * since 'jh->b_transaction' could be replaced with
+ * 'jh->b_next_transaction' during old transaction
+ * committing if journal aborted, which may fail
+ * assertion on 'jh->b_frozen_data == NULL'.
+ */
+ ret = -EROFS;
+ goto out_unlock_bh;
+ }
if (jh->b_modified == 0) {
/*
@@ -1393,12 +1532,12 @@
* of the transaction. This needs to be done
* once a transaction -bzzz
*/
- if (handle->h_buffer_credits <= 0) {
+ if (WARN_ON_ONCE(jbd2_handle_buffer_credits(handle) <= 0)) {
ret = -ENOSPC;
goto out_unlock_bh;
}
jh->b_modified = 1;
- handle->h_buffer_credits--;
+ handle->h_total_credits--;
}
/*
@@ -1471,14 +1610,14 @@
__jbd2_journal_file_buffer(jh, transaction, BJ_Metadata);
spin_unlock(&journal->j_list_lock);
out_unlock_bh:
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
out:
JBUFFER_TRACE(jh, "exit");
return ret;
}
/**
- * void jbd2_journal_forget() - bforget() for potentially-journaled buffers.
+ * jbd2_journal_forget() - bforget() for potentially-journaled buffers.
* @handle: transaction handle
* @bh: bh to 'forget'
*
@@ -1494,7 +1633,7 @@
* Allow this call even if the handle has aborted --- it may be part of
* the caller's cleanup after an abort.
*/
-int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
+int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal;
@@ -1509,18 +1648,20 @@
BUFFER_TRACE(bh, "entry");
- jbd_lock_bh_state(bh);
+ jh = jbd2_journal_grab_journal_head(bh);
+ if (!jh) {
+ __bforget(bh);
+ return 0;
+ }
- if (!buffer_jbd(bh))
- goto not_jbd;
- jh = bh2jh(bh);
+ spin_lock(&jh->b_state_lock);
/* Critical error: attempting to delete a bitmap buffer, maybe?
* Don't do any jbd operations, and return an error. */
if (!J_EXPECT_JH(jh, !jh->b_committed_data,
"inconsistent data on disk")) {
err = -EIO;
- goto not_jbd;
+ goto drop;
}
/* keep track of whether or not this transaction modified us */
@@ -1568,12 +1709,7 @@
__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
} else {
__jbd2_journal_unfile_buffer(jh);
- if (!buffer_jbd(bh)) {
- spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
- __bforget(bh);
- goto drop;
- }
+ jbd2_journal_put_journal_head(jh);
}
spin_unlock(&journal->j_list_lock);
} else if (jh->b_transaction) {
@@ -1605,21 +1741,52 @@
if (was_modified)
drop_reserve = 1;
}
- }
+ } else {
+ /*
+ * Finally, if the buffer is not belongs to any
+ * transaction, we can just drop it now if it has no
+ * checkpoint.
+ */
+ spin_lock(&journal->j_list_lock);
+ if (!jh->b_cp_transaction) {
+ JBUFFER_TRACE(jh, "belongs to none transaction");
+ spin_unlock(&journal->j_list_lock);
+ goto drop;
+ }
-not_jbd:
- jbd_unlock_bh_state(bh);
- __brelse(bh);
+ /*
+ * Otherwise, if the buffer has been written to disk,
+ * it is safe to remove the checkpoint and drop it.
+ */
+ if (!buffer_dirty(bh)) {
+ __jbd2_journal_remove_checkpoint(jh);
+ spin_unlock(&journal->j_list_lock);
+ goto drop;
+ }
+
+ /*
+ * The buffer is still not written to disk, we should
+ * attach this buffer to current transaction so that the
+ * buffer can be checkpointed only after the current
+ * transaction commits.
+ */
+ clear_buffer_dirty(bh);
+ __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
+ spin_unlock(&journal->j_list_lock);
+ }
drop:
+ __brelse(bh);
+ spin_unlock(&jh->b_state_lock);
+ jbd2_journal_put_journal_head(jh);
if (drop_reserve) {
/* no need to reserve log space for this block -bzzz */
- handle->h_buffer_credits++;
+ handle->h_total_credits++;
}
return err;
}
/**
- * int jbd2_journal_stop() - complete a transaction
+ * jbd2_journal_stop() - complete a transaction
* @handle: transaction to complete.
*
* All done for a particular handle.
@@ -1642,45 +1809,34 @@
tid_t tid;
pid_t pid;
+ if (--handle->h_ref > 0) {
+ jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
+ handle->h_ref);
+ if (is_handle_aborted(handle))
+ return -EIO;
+ return 0;
+ }
if (!transaction) {
/*
- * Handle is already detached from the transaction so
- * there is nothing to do other than decrease a refcount,
- * or free the handle if refcount drops to zero
+ * Handle is already detached from the transaction so there is
+ * nothing to do other than free the handle.
*/
- if (--handle->h_ref > 0) {
- jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
- handle->h_ref);
- return err;
- } else {
- if (handle->h_rsv_handle)
- jbd2_free_handle(handle->h_rsv_handle);
- goto free_and_exit;
- }
+ memalloc_nofs_restore(handle->saved_alloc_context);
+ goto free_and_exit;
}
journal = transaction->t_journal;
-
- J_ASSERT(journal_current_handle() == handle);
+ tid = transaction->t_tid;
if (is_handle_aborted(handle))
err = -EIO;
- else
- J_ASSERT(atomic_read(&transaction->t_updates) > 0);
-
- if (--handle->h_ref > 0) {
- jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
- handle->h_ref);
- return err;
- }
jbd_debug(4, "Handle %p going down\n", handle);
trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev,
- transaction->t_tid,
- handle->h_type, handle->h_line_no,
+ tid, handle->h_type, handle->h_line_no,
jiffies - handle->h_start_jiffies,
handle->h_sync, handle->h_requested_credits,
(handle->h_requested_credits -
- handle->h_buffer_credits));
+ handle->h_total_credits));
/*
* Implement synchronous transaction batching. If the handle
@@ -1740,19 +1896,13 @@
if (handle->h_sync)
transaction->t_synchronous_commit = 1;
- current->journal_info = NULL;
- atomic_sub(handle->h_buffer_credits,
- &transaction->t_outstanding_credits);
/*
* If the handle is marked SYNC, we need to set another commit
- * going! We also want to force a commit if the current
- * transaction is occupying too much of the log, or if the
- * transaction is too old now.
+ * going! We also want to force a commit if the transaction is too
+ * old now.
*/
if (handle->h_sync ||
- (atomic_read(&transaction->t_outstanding_credits) >
- journal->j_max_transaction_buffers) ||
time_after_eq(jiffies, transaction->t_expires)) {
/* Do this even for aborted journals: an abort still
* completes the commit thread, it just doesn't write
@@ -1761,7 +1911,7 @@
jbd_debug(2, "transaction too old, requesting commit for "
"handle %p\n", handle);
/* This is non-blocking */
- jbd2_log_start_commit(journal, transaction->t_tid);
+ jbd2_log_start_commit(journal, tid);
/*
* Special case: JBD2_SYNC synchronous updates require us
@@ -1772,31 +1922,19 @@
}
/*
- * Once we drop t_updates, if it goes to zero the transaction
- * could start committing on us and eventually disappear. So
- * once we do this, we must not dereference transaction
- * pointer again.
+ * Once stop_this_handle() drops t_updates, the transaction could start
+ * committing on us and eventually disappear. So we must not
+ * dereference transaction pointer again after calling
+ * stop_this_handle().
*/
- tid = transaction->t_tid;
- if (atomic_dec_and_test(&transaction->t_updates)) {
- wake_up(&journal->j_wait_updates);
- if (journal->j_barrier_count)
- wake_up(&journal->j_wait_transaction_locked);
- }
-
- rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_);
+ stop_this_handle(handle);
if (wait_for_commit)
err = jbd2_log_wait_commit(journal, tid);
- if (handle->h_rsv_handle)
- jbd2_journal_free_reserved(handle->h_rsv_handle);
free_and_exit:
- /*
- * Scope of the GFP_NOFS context is over here and so we can restore the
- * original alloc context.
- */
- memalloc_nofs_restore(handle->saved_alloc_context);
+ if (handle->h_rsv_handle)
+ jbd2_free_handle(handle->h_rsv_handle);
jbd2_free_handle(handle);
return err;
}
@@ -1814,7 +1952,7 @@
*
* j_list_lock is held.
*
- * jbd_lock_bh_state(jh2bh(jh)) is held.
+ * jh->b_state_lock is held.
*/
static inline void
@@ -1838,7 +1976,7 @@
*
* Called with j_list_lock held, and the journal may not be locked.
*
- * jbd_lock_bh_state(jh2bh(jh)) is held.
+ * jh->b_state_lock is held.
*/
static inline void
@@ -1870,7 +2008,7 @@
transaction_t *transaction;
struct buffer_head *bh = jh2bh(jh);
- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
+ lockdep_assert_held(&jh->b_state_lock);
transaction = jh->b_transaction;
if (transaction)
assert_spin_locked(&transaction->t_journal->j_list_lock);
@@ -1907,11 +2045,10 @@
}
/*
- * Remove buffer from all transactions.
+ * Remove buffer from all transactions. The caller is responsible for dropping
+ * the jh reference that belonged to the transaction.
*
* Called with bh_state lock and j_list_lock
- *
- * jh and bh may be already freed when this function returns.
*/
static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
{
@@ -1920,7 +2057,6 @@
__jbd2_journal_temp_unlink_buffer(jh);
jh->b_transaction = NULL;
- jbd2_journal_put_journal_head(jh);
}
void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
@@ -1929,18 +2065,19 @@
/* Get reference so that buffer cannot be freed before we unlock it */
get_bh(bh);
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
spin_lock(&journal->j_list_lock);
__jbd2_journal_unfile_buffer(jh);
spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
+ jbd2_journal_put_journal_head(jh);
__brelse(bh);
}
/*
* Called from jbd2_journal_try_to_free_buffers().
*
- * Called under jbd_lock_bh_state(bh)
+ * Called under jh->b_state_lock
*/
static void
__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
@@ -1967,13 +2104,9 @@
}
/**
- * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
+ * jbd2_journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
* @page: to try and free
- * @gfp_mask: we use the mask to detect how hard should we try to release
- * buffers. If __GFP_DIRECT_RECLAIM and __GFP_FS is set, we wait for commit
- * code to release the buffers.
- *
*
* For all the buffers on this page,
* if they are fully written out ordered data, move them onto BUF_CLEAN
@@ -2004,8 +2137,7 @@
*
* Return 0 on failure, 1 on success
*/
-int jbd2_journal_try_to_free_buffers(journal_t *journal,
- struct page *page, gfp_t gfp_mask)
+int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page)
{
struct buffer_head *head;
struct buffer_head *bh;
@@ -2028,10 +2160,10 @@
if (!jh)
continue;
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
__journal_try_to_free_buffer(journal, bh);
+ spin_unlock(&jh->b_state_lock);
jbd2_journal_put_journal_head(jh);
- jbd_unlock_bh_state(bh);
if (buffer_jbd(bh))
goto busy;
@@ -2067,7 +2199,7 @@
*
* Called under j_list_lock.
*
- * Called under jbd_lock_bh_state(bh).
+ * Called under jh->b_state_lock.
*/
static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
{
@@ -2088,6 +2220,7 @@
} else {
JBUFFER_TRACE(jh, "on running transaction");
__jbd2_journal_unfile_buffer(jh);
+ jbd2_journal_put_journal_head(jh);
}
return may_free;
}
@@ -2154,17 +2287,14 @@
* holding the page lock. --sct
*/
- if (!buffer_jbd(bh))
+ jh = jbd2_journal_grab_journal_head(bh);
+ if (!jh)
goto zap_buffer_unlocked;
/* OK, we have data buffer in journaled mode */
write_lock(&journal->j_state_lock);
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
spin_lock(&journal->j_list_lock);
-
- jh = jbd2_journal_grab_journal_head(bh);
- if (!jh)
- goto zap_buffer_no_jh;
/*
* We cannot remove the buffer from checkpoint lists until the
@@ -2244,10 +2374,13 @@
* for commit and try again.
*/
if (partial_page) {
- jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
write_unlock(&journal->j_state_lock);
+ jbd2_journal_put_journal_head(jh);
+ /* Already zapped buffer? Nothing to do... */
+ if (!bh->b_bdev)
+ return 0;
return -EBUSY;
}
/*
@@ -2261,10 +2394,10 @@
if (journal->j_running_transaction && buffer_jbddirty(bh))
jh->b_next_transaction = journal->j_running_transaction;
jh->b_modified = 0;
- jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
write_unlock(&journal->j_state_lock);
+ jbd2_journal_put_journal_head(jh);
return 0;
} else {
/* Good, the buffer belongs to the running transaction.
@@ -2288,11 +2421,10 @@
* here.
*/
jh->b_modified = 0;
- jbd2_journal_put_journal_head(jh);
-zap_buffer_no_jh:
spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
+ spin_unlock(&jh->b_state_lock);
write_unlock(&journal->j_state_lock);
+ jbd2_journal_put_journal_head(jh);
zap_buffer_unlocked:
clear_buffer_dirty(bh);
J_ASSERT_BH(bh, !buffer_jbddirty(bh));
@@ -2306,7 +2438,7 @@
}
/**
- * void jbd2_journal_invalidatepage()
+ * jbd2_journal_invalidatepage()
* @journal: journal to use for flush...
* @page: page to flush
* @offset: start of the range to invalidate
@@ -2379,7 +2511,7 @@
int was_dirty = 0;
struct buffer_head *bh = jh2bh(jh);
- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
+ lockdep_assert_held(&jh->b_state_lock);
assert_spin_locked(&transaction->t_journal->j_list_lock);
J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
@@ -2441,11 +2573,11 @@
void jbd2_journal_file_buffer(struct journal_head *jh,
transaction_t *transaction, int jlist)
{
- jbd_lock_bh_state(jh2bh(jh));
+ spin_lock(&jh->b_state_lock);
spin_lock(&transaction->t_journal->j_list_lock);
__jbd2_journal_file_buffer(jh, transaction, jlist);
spin_unlock(&transaction->t_journal->j_list_lock);
- jbd_unlock_bh_state(jh2bh(jh));
+ spin_unlock(&jh->b_state_lock);
}
/*
@@ -2455,23 +2587,25 @@
* buffer on that transaction's metadata list.
*
* Called under j_list_lock
- * Called under jbd_lock_bh_state(jh2bh(jh))
+ * Called under jh->b_state_lock
*
- * jh and bh may be already free when this function returns
+ * When this function returns true, there's no next transaction to refile to
+ * and the caller has to drop jh reference through
+ * jbd2_journal_put_journal_head().
*/
-void __jbd2_journal_refile_buffer(struct journal_head *jh)
+bool __jbd2_journal_refile_buffer(struct journal_head *jh)
{
int was_dirty, jlist;
struct buffer_head *bh = jh2bh(jh);
- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
+ lockdep_assert_held(&jh->b_state_lock);
if (jh->b_transaction)
assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
/* If the buffer is now unused, just drop it. */
if (jh->b_next_transaction == NULL) {
__jbd2_journal_unfile_buffer(jh);
- return;
+ return true;
}
/*
@@ -2506,6 +2640,7 @@
if (was_dirty)
set_buffer_jbddirty(bh);
+ return false;
}
/*
@@ -2516,16 +2651,15 @@
*/
void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
{
- struct buffer_head *bh = jh2bh(jh);
+ bool drop;
- /* Get reference so that buffer cannot be freed before we unlock it */
- get_bh(bh);
- jbd_lock_bh_state(bh);
+ spin_lock(&jh->b_state_lock);
spin_lock(&journal->j_list_lock);
- __jbd2_journal_refile_buffer(jh);
- jbd_unlock_bh_state(bh);
+ drop = __jbd2_journal_refile_buffer(jh);
+ spin_unlock(&jh->b_state_lock);
spin_unlock(&journal->j_list_lock);
- __brelse(bh);
+ if (drop)
+ jbd2_journal_put_journal_head(jh);
}
/*
@@ -2584,18 +2718,6 @@
spin_unlock(&journal->j_list_lock);
return 0;
-}
-
-int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode)
-{
- return jbd2_journal_file_inode(handle, jinode,
- JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX);
-}
-
-int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode)
-{
- return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0,
- LLONG_MAX);
}
int jbd2_journal_inode_ranged_write(handle_t *handle,
--
Gitblit v1.6.2