hc
2024-05-10 23fa18eaa71266feff7ba8d83022d9e1cc83c65a
kernel/fs/jbd2/transaction.c
....@@ -63,9 +63,31 @@
6363 }
6464
6565 /*
66
+ * Base amount of descriptor blocks we reserve for each transaction.
67
+ */
68
+static int jbd2_descriptor_blocks_per_trans(journal_t *journal)
69
+{
70
+ int tag_space = journal->j_blocksize - sizeof(journal_header_t);
71
+ int tags_per_block;
72
+
73
+ /* Subtract UUID */
74
+ tag_space -= 16;
75
+ if (jbd2_journal_has_csum_v2or3(journal))
76
+ tag_space -= sizeof(struct jbd2_journal_block_tail);
77
+ /* Commit code leaves a slack space of 16 bytes at the end of block */
78
+ tags_per_block = (tag_space - 16) / journal_tag_bytes(journal);
79
+ /*
80
+ * Revoke descriptors are accounted separately so we need to reserve
81
+ * space for commit block and normal transaction descriptor blocks.
82
+ */
83
+ return 1 + DIV_ROUND_UP(journal->j_max_transaction_buffers,
84
+ tags_per_block);
85
+}
86
+
87
+/*
6688 * jbd2_get_transaction: obtain a new transaction_t object.
6789 *
68
- * Simply allocate and initialise a new transaction. Create it in
90
+ * Simply initialise a new transaction. Initialize it in
6991 * RUNNING state and add it to the current journal (which should not
7092 * have an existing running transaction: we only make a new transaction
7193 * once we have started to commit the old one).
....@@ -77,8 +99,8 @@
7799 *
78100 */
79101
80
-static transaction_t *
81
-jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
102
+static void jbd2_get_transaction(journal_t *journal,
103
+ transaction_t *transaction)
82104 {
83105 transaction->t_journal = journal;
84106 transaction->t_state = T_RUNNING;
....@@ -88,7 +110,9 @@
88110 spin_lock_init(&transaction->t_handle_lock);
89111 atomic_set(&transaction->t_updates, 0);
90112 atomic_set(&transaction->t_outstanding_credits,
113
+ jbd2_descriptor_blocks_per_trans(journal) +
91114 atomic_read(&journal->j_reserved_credits));
115
+ atomic_set(&transaction->t_outstanding_revokes, 0);
92116 atomic_set(&transaction->t_handle_count, 0);
93117 INIT_LIST_HEAD(&transaction->t_inode_list);
94118 INIT_LIST_HEAD(&transaction->t_private_list);
....@@ -102,8 +126,6 @@
102126 transaction->t_max_wait = 0;
103127 transaction->t_start = jiffies;
104128 transaction->t_requested = 0;
105
-
106
- return transaction;
107129 }
108130
109131 /*
....@@ -140,9 +162,9 @@
140162 }
141163
142164 /*
143
- * Wait until running transaction passes T_LOCKED state. Also starts the commit
144
- * if needed. The function expects running transaction to exist and releases
145
- * j_state_lock.
165
+ * Wait until running transaction passes to T_FLUSH state and new transaction
166
+ * can thus be started. Also starts the commit if needed. The function expects
167
+ * running transaction to exist and releases j_state_lock.
146168 */
147169 static void wait_transaction_locked(journal_t *journal)
148170 __releases(journal->j_state_lock)
....@@ -151,13 +173,41 @@
151173 int need_to_start;
152174 tid_t tid = journal->j_running_transaction->t_tid;
153175
154
- prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
176
+ prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait,
155177 TASK_UNINTERRUPTIBLE);
156178 need_to_start = !tid_geq(journal->j_commit_request, tid);
157179 read_unlock(&journal->j_state_lock);
158180 if (need_to_start)
159181 jbd2_log_start_commit(journal, tid);
160182 jbd2_might_wait_for_commit(journal);
183
+ schedule();
184
+ finish_wait(&journal->j_wait_transaction_locked, &wait);
185
+}
186
+
187
+/*
188
+ * Wait until running transaction transitions from T_SWITCH to T_FLUSH
189
+ * state and new transaction can thus be started. The function releases
190
+ * j_state_lock.
191
+ */
192
+static void wait_transaction_switching(journal_t *journal)
193
+ __releases(journal->j_state_lock)
194
+{
195
+ DEFINE_WAIT(wait);
196
+
197
+ if (WARN_ON(!journal->j_running_transaction ||
198
+ journal->j_running_transaction->t_state != T_SWITCH)) {
199
+ read_unlock(&journal->j_state_lock);
200
+ return;
201
+ }
202
+ prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait,
203
+ TASK_UNINTERRUPTIBLE);
204
+ read_unlock(&journal->j_state_lock);
205
+ /*
206
+ * We don't call jbd2_might_wait_for_commit() here as there's no
207
+ * waiting for outstanding handles happening anymore in T_SWITCH state
208
+ * and handling of reserved handles actually relies on that for
209
+ * correctness.
210
+ */
161211 schedule();
162212 finish_wait(&journal->j_wait_transaction_locked, &wait);
163213 }
....@@ -185,7 +235,8 @@
185235 * If the current transaction is locked down for commit, wait
186236 * for the lock to be released.
187237 */
188
- if (t->t_state == T_LOCKED) {
238
+ if (t->t_state != T_RUNNING) {
239
+ WARN_ON_ONCE(t->t_state >= T_FLUSH);
189240 wait_transaction_locked(journal);
190241 return 1;
191242 }
....@@ -233,12 +284,13 @@
233284 * *before* starting to dirty potentially checkpointed buffers
234285 * in the new transaction.
235286 */
236
- if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) {
287
+ if (jbd2_log_space_left(journal) < journal->j_max_transaction_buffers) {
237288 atomic_sub(total, &t->t_outstanding_credits);
238289 read_unlock(&journal->j_state_lock);
239290 jbd2_might_wait_for_commit(journal);
240291 write_lock(&journal->j_state_lock);
241
- if (jbd2_log_space_left(journal) < jbd2_space_needed(journal))
292
+ if (jbd2_log_space_left(journal) <
293
+ journal->j_max_transaction_buffers)
242294 __jbd2_log_wait_for_space(journal);
243295 write_unlock(&journal->j_state_lock);
244296 return 1;
....@@ -274,12 +326,12 @@
274326 gfp_t gfp_mask)
275327 {
276328 transaction_t *transaction, *new_transaction = NULL;
277
- int blocks = handle->h_buffer_credits;
329
+ int blocks = handle->h_total_credits;
278330 int rsv_blocks = 0;
279331 unsigned long ts = jiffies;
280332
281333 if (handle->h_rsv_handle)
282
- rsv_blocks = handle->h_rsv_handle->h_buffer_credits;
334
+ rsv_blocks = handle->h_rsv_handle->h_total_credits;
283335
284336 /*
285337 * Limit the number of reserved credits to 1/2 of maximum transaction
....@@ -297,7 +349,12 @@
297349 }
298350
299351 alloc_transaction:
300
- if (!journal->j_running_transaction) {
352
+ /*
353
+ * This check is racy but it is just an optimization of allocating new
354
+ * transaction early if there are high chances we'll need it. If we
355
+ * guess wrong, we'll retry or free unused transaction.
356
+ */
357
+ if (!data_race(journal->j_running_transaction)) {
301358 /*
302359 * If __GFP_FS is not present, then we may be being called from
303360 * inside the fs writeback layer, so we MUST NOT fail.
....@@ -362,8 +419,14 @@
362419 /*
363420 * We have handle reserved so we are allowed to join T_LOCKED
364421 * transaction and we don't have to check for transaction size
365
- * and journal space.
422
+ * and journal space. But we still have to wait while running
423
+ * transaction is being switched to a committing one as it
424
+ * won't wait for any handles anymore.
366425 */
426
+ if (transaction->t_state == T_SWITCH) {
427
+ wait_transaction_switching(journal);
428
+ goto repeat;
429
+ }
367430 sub_reserved_credits(journal, blocks);
368431 handle->h_reserved = 0;
369432 }
....@@ -374,6 +437,7 @@
374437 update_t_max_wait(transaction, ts);
375438 handle->h_transaction = transaction;
376439 handle->h_requested_credits = blocks;
440
+ handle->h_revoke_credits_requested = handle->h_revoke_credits;
377441 handle->h_start_jiffies = jiffies;
378442 atomic_inc(&transaction->t_updates);
379443 atomic_inc(&transaction->t_handle_count);
....@@ -400,15 +464,15 @@
400464 handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
401465 if (!handle)
402466 return NULL;
403
- handle->h_buffer_credits = nblocks;
467
+ handle->h_total_credits = nblocks;
404468 handle->h_ref = 1;
405469
406470 return handle;
407471 }
408472
409473 handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
410
- gfp_t gfp_mask, unsigned int type,
411
- unsigned int line_no)
474
+ int revoke_records, gfp_t gfp_mask,
475
+ unsigned int type, unsigned int line_no)
412476 {
413477 handle_t *handle = journal_current_handle();
414478 int err;
....@@ -422,6 +486,8 @@
422486 return handle;
423487 }
424488
489
+ nblocks += DIV_ROUND_UP(revoke_records,
490
+ journal->j_revoke_records_per_block);
425491 handle = new_handle(nblocks);
426492 if (!handle)
427493 return ERR_PTR(-ENOMEM);
....@@ -437,6 +503,7 @@
437503 rsv_handle->h_journal = journal;
438504 handle->h_rsv_handle = rsv_handle;
439505 }
506
+ handle->h_revoke_credits = revoke_records;
440507
441508 err = start_this_handle(journal, handle, gfp_mask);
442509 if (err < 0) {
....@@ -457,7 +524,7 @@
457524
458525
459526 /**
460
- * handle_t *jbd2_journal_start() - Obtain a new handle.
527
+ * jbd2_journal_start() - Obtain a new handle.
461528 * @journal: Journal to start transaction on.
462529 * @nblocks: number of block buffer we might modify
463530 *
....@@ -465,7 +532,7 @@
465532 * modified buffers in the log. We block until the log can guarantee
466533 * that much space. Additionally, if rsv_blocks > 0, we also create another
467534 * handle with rsv_blocks reserved blocks in the journal. This handle is
468
- * is stored in h_rsv_handle. It is not attached to any particular transaction
535
+ * stored in h_rsv_handle. It is not attached to any particular transaction
469536 * and thus doesn't block transaction commit. If the caller uses this reserved
470537 * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop()
471538 * on the parent handle will dispose the reserved one. Reserved handle has to
....@@ -477,22 +544,34 @@
477544 */
478545 handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
479546 {
480
- return jbd2__journal_start(journal, nblocks, 0, GFP_NOFS, 0, 0);
547
+ return jbd2__journal_start(journal, nblocks, 0, 0, GFP_NOFS, 0, 0);
481548 }
482549 EXPORT_SYMBOL(jbd2_journal_start);
550
+
551
+static void __jbd2_journal_unreserve_handle(handle_t *handle, transaction_t *t)
552
+{
553
+ journal_t *journal = handle->h_journal;
554
+
555
+ WARN_ON(!handle->h_reserved);
556
+ sub_reserved_credits(journal, handle->h_total_credits);
557
+ if (t)
558
+ atomic_sub(handle->h_total_credits, &t->t_outstanding_credits);
559
+}
483560
484561 void jbd2_journal_free_reserved(handle_t *handle)
485562 {
486563 journal_t *journal = handle->h_journal;
487564
488
- WARN_ON(!handle->h_reserved);
489
- sub_reserved_credits(journal, handle->h_buffer_credits);
565
+ /* Get j_state_lock to pin running transaction if it exists */
566
+ read_lock(&journal->j_state_lock);
567
+ __jbd2_journal_unreserve_handle(handle, journal->j_running_transaction);
568
+ read_unlock(&journal->j_state_lock);
490569 jbd2_free_handle(handle);
491570 }
492571 EXPORT_SYMBOL(jbd2_journal_free_reserved);
493572
494573 /**
495
- * int jbd2_journal_start_reserved() - start reserved handle
574
+ * jbd2_journal_start_reserved() - start reserved handle
496575 * @handle: handle to start
497576 * @type: for handle statistics
498577 * @line_no: for handle statistics
....@@ -538,14 +617,18 @@
538617 }
539618 handle->h_type = type;
540619 handle->h_line_no = line_no;
620
+ trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
621
+ handle->h_transaction->t_tid, type,
622
+ line_no, handle->h_total_credits);
541623 return 0;
542624 }
543625 EXPORT_SYMBOL(jbd2_journal_start_reserved);
544626
545627 /**
546
- * int jbd2_journal_extend() - extend buffer credits.
628
+ * jbd2_journal_extend() - extend buffer credits.
547629 * @handle: handle to 'extend'
548630 * @nblocks: nr blocks to try to extend by.
631
+ * @revoke_records: number of revoke records to try to extend by.
549632 *
550633 * Some transactions, such as large extends and truncates, can be done
551634 * atomically all at once or in several stages. The operation requests
....@@ -562,7 +645,7 @@
562645 * return code < 0 implies an error
563646 * return code > 0 implies normal transaction-full status.
564647 */
565
-int jbd2_journal_extend(handle_t *handle, int nblocks)
648
+int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
566649 {
567650 transaction_t *transaction = handle->h_transaction;
568651 journal_t *journal;
....@@ -584,6 +667,12 @@
584667 goto error_out;
585668 }
586669
670
+ nblocks += DIV_ROUND_UP(
671
+ handle->h_revoke_credits_requested + revoke_records,
672
+ journal->j_revoke_records_per_block) -
673
+ DIV_ROUND_UP(
674
+ handle->h_revoke_credits_requested,
675
+ journal->j_revoke_records_per_block);
587676 spin_lock(&transaction->t_handle_lock);
588677 wanted = atomic_add_return(nblocks,
589678 &transaction->t_outstanding_credits);
....@@ -595,22 +684,16 @@
595684 goto unlock;
596685 }
597686
598
- if (wanted + (wanted >> JBD2_CONTROL_BLOCKS_SHIFT) >
599
- jbd2_log_space_left(journal)) {
600
- jbd_debug(3, "denied handle %p %d blocks: "
601
- "insufficient log space\n", handle, nblocks);
602
- atomic_sub(nblocks, &transaction->t_outstanding_credits);
603
- goto unlock;
604
- }
605
-
606687 trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
607688 transaction->t_tid,
608689 handle->h_type, handle->h_line_no,
609
- handle->h_buffer_credits,
690
+ handle->h_total_credits,
610691 nblocks);
611692
612
- handle->h_buffer_credits += nblocks;
693
+ handle->h_total_credits += nblocks;
613694 handle->h_requested_credits += nblocks;
695
+ handle->h_revoke_credits += revoke_records;
696
+ handle->h_revoke_credits_requested += revoke_records;
614697 result = 0;
615698
616699 jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
....@@ -621,11 +704,56 @@
621704 return result;
622705 }
623706
707
+static void stop_this_handle(handle_t *handle)
708
+{
709
+ transaction_t *transaction = handle->h_transaction;
710
+ journal_t *journal = transaction->t_journal;
711
+ int revokes;
712
+
713
+ J_ASSERT(journal_current_handle() == handle);
714
+ J_ASSERT(atomic_read(&transaction->t_updates) > 0);
715
+ current->journal_info = NULL;
716
+ /*
717
+ * Subtract necessary revoke descriptor blocks from handle credits. We
718
+ * take care to account only for revoke descriptor blocks the
719
+ * transaction will really need as large sequences of transactions with
720
+ * small numbers of revokes are relatively common.
721
+ */
722
+ revokes = handle->h_revoke_credits_requested - handle->h_revoke_credits;
723
+ if (revokes) {
724
+ int t_revokes, revoke_descriptors;
725
+ int rr_per_blk = journal->j_revoke_records_per_block;
726
+
727
+ WARN_ON_ONCE(DIV_ROUND_UP(revokes, rr_per_blk)
728
+ > handle->h_total_credits);
729
+ t_revokes = atomic_add_return(revokes,
730
+ &transaction->t_outstanding_revokes);
731
+ revoke_descriptors =
732
+ DIV_ROUND_UP(t_revokes, rr_per_blk) -
733
+ DIV_ROUND_UP(t_revokes - revokes, rr_per_blk);
734
+ handle->h_total_credits -= revoke_descriptors;
735
+ }
736
+ atomic_sub(handle->h_total_credits,
737
+ &transaction->t_outstanding_credits);
738
+ if (handle->h_rsv_handle)
739
+ __jbd2_journal_unreserve_handle(handle->h_rsv_handle,
740
+ transaction);
741
+ if (atomic_dec_and_test(&transaction->t_updates))
742
+ wake_up(&journal->j_wait_updates);
743
+
744
+ rwsem_release(&journal->j_trans_commit_map, _THIS_IP_);
745
+ /*
746
+ * Scope of the GFP_NOFS context is over here and so we can restore the
747
+ * original alloc context.
748
+ */
749
+ memalloc_nofs_restore(handle->saved_alloc_context);
750
+}
624751
625752 /**
626
- * int jbd2_journal_restart() - restart a handle .
753
+ * jbd2__journal_restart() - restart a handle .
627754 * @handle: handle to restart
628755 * @nblocks: nr credits requested
756
+ * @revoke_records: number of revoke record credits requested
629757 * @gfp_mask: memory allocation flags (for start_this_handle)
630758 *
631759 * Restart a handle for a multi-transaction filesystem
....@@ -638,56 +766,48 @@
638766 * credits. We preserve reserved handle if there's any attached to the
639767 * passed in handle.
640768 */
641
-int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
769
+int jbd2__journal_restart(handle_t *handle, int nblocks, int revoke_records,
770
+ gfp_t gfp_mask)
642771 {
643772 transaction_t *transaction = handle->h_transaction;
644773 journal_t *journal;
645774 tid_t tid;
646
- int need_to_start, ret;
775
+ int need_to_start;
776
+ int ret;
647777
648778 /* If we've had an abort of any type, don't even think about
649779 * actually doing the restart! */
650780 if (is_handle_aborted(handle))
651781 return 0;
652782 journal = transaction->t_journal;
783
+ tid = transaction->t_tid;
653784
654785 /*
655786 * First unlink the handle from its current transaction, and start the
656787 * commit on that.
657788 */
658
- J_ASSERT(atomic_read(&transaction->t_updates) > 0);
659
- J_ASSERT(journal_current_handle() == handle);
660
-
661
- read_lock(&journal->j_state_lock);
662
- spin_lock(&transaction->t_handle_lock);
663
- atomic_sub(handle->h_buffer_credits,
664
- &transaction->t_outstanding_credits);
665
- if (handle->h_rsv_handle) {
666
- sub_reserved_credits(journal,
667
- handle->h_rsv_handle->h_buffer_credits);
668
- }
669
- if (atomic_dec_and_test(&transaction->t_updates))
670
- wake_up(&journal->j_wait_updates);
671
- tid = transaction->t_tid;
672
- spin_unlock(&transaction->t_handle_lock);
673
- handle->h_transaction = NULL;
674
- current->journal_info = NULL;
675
-
676789 jbd_debug(2, "restarting handle %p\n", handle);
790
+ stop_this_handle(handle);
791
+ handle->h_transaction = NULL;
792
+
793
+ /*
794
+ * TODO: If we use READ_ONCE / WRITE_ONCE for j_commit_request we can
795
+ * get rid of pointless j_state_lock traffic like this.
796
+ */
797
+ read_lock(&journal->j_state_lock);
677798 need_to_start = !tid_geq(journal->j_commit_request, tid);
678799 read_unlock(&journal->j_state_lock);
679800 if (need_to_start)
680801 jbd2_log_start_commit(journal, tid);
681
-
682
- rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_);
683
- handle->h_buffer_credits = nblocks;
684
- /*
685
- * Restore the original nofs context because the journal restart
686
- * is basically the same thing as journal stop and start.
687
- * start_this_handle will start a new nofs context.
688
- */
689
- memalloc_nofs_restore(handle->saved_alloc_context);
802
+ handle->h_total_credits = nblocks +
803
+ DIV_ROUND_UP(revoke_records,
804
+ journal->j_revoke_records_per_block);
805
+ handle->h_revoke_credits = revoke_records;
690806 ret = start_this_handle(journal, handle, gfp_mask);
807
+ trace_jbd2_handle_restart(journal->j_fs_dev->bd_dev,
808
+ ret ? 0 : handle->h_transaction->t_tid,
809
+ handle->h_type, handle->h_line_no,
810
+ handle->h_total_credits);
691811 return ret;
692812 }
693813 EXPORT_SYMBOL(jbd2__journal_restart);
....@@ -695,12 +815,12 @@
695815
696816 int jbd2_journal_restart(handle_t *handle, int nblocks)
697817 {
698
- return jbd2__journal_restart(handle, nblocks, GFP_NOFS);
818
+ return jbd2__journal_restart(handle, nblocks, 0, GFP_NOFS);
699819 }
700820 EXPORT_SYMBOL(jbd2_journal_restart);
701821
702822 /**
703
- * void jbd2_journal_lock_updates () - establish a transaction barrier.
823
+ * jbd2_journal_lock_updates () - establish a transaction barrier.
704824 * @journal: Journal to establish a barrier on.
705825 *
706826 * This locks out any further updates from being started, and blocks
....@@ -759,7 +879,7 @@
759879 }
760880
761881 /**
762
- * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier
882
+ * jbd2_journal_unlock_updates () - release barrier
763883 * @journal: Journal to release the barrier on.
764884 *
765885 * Release a transaction barrier obtained with jbd2_journal_lock_updates().
....@@ -774,7 +894,7 @@
774894 write_lock(&journal->j_state_lock);
775895 --journal->j_barrier_count;
776896 write_unlock(&journal->j_state_lock);
777
- wake_up(&journal->j_wait_transaction_locked);
897
+ wake_up_all(&journal->j_wait_transaction_locked);
778898 }
779899
780900 static void warn_dirty_buffer(struct buffer_head *bh)
....@@ -843,7 +963,7 @@
843963
844964 start_lock = jiffies;
845965 lock_buffer(bh);
846
- jbd_lock_bh_state(bh);
966
+ spin_lock(&jh->b_state_lock);
847967
848968 /* If it takes too long to lock the buffer, trace it */
849969 time_lock = jbd2_time_diff(start_lock, jiffies);
....@@ -864,36 +984,28 @@
864984 * ie. locked but not dirty) or tune2fs (which may actually have
865985 * the buffer dirtied, ugh.) */
866986
867
- if (buffer_dirty(bh)) {
987
+ if (buffer_dirty(bh) && jh->b_transaction) {
988
+ warn_dirty_buffer(bh);
868989 /*
869
- * First question: is this buffer already part of the current
870
- * transaction or the existing committing transaction?
871
- */
872
- if (jh->b_transaction) {
873
- J_ASSERT_JH(jh,
874
- jh->b_transaction == transaction ||
875
- jh->b_transaction ==
876
- journal->j_committing_transaction);
877
- if (jh->b_next_transaction)
878
- J_ASSERT_JH(jh, jh->b_next_transaction ==
879
- transaction);
880
- warn_dirty_buffer(bh);
881
- }
882
- /*
883
- * In any case we need to clean the dirty flag and we must
884
- * do it under the buffer lock to be sure we don't race
885
- * with running write-out.
990
+ * We need to clean the dirty flag and we must do it under the
991
+ * buffer lock to be sure we don't race with running write-out.
886992 */
887993 JBUFFER_TRACE(jh, "Journalling dirty buffer");
888994 clear_buffer_dirty(bh);
995
+ /*
996
+ * The buffer is going to be added to BJ_Reserved list now and
997
+ * nothing guarantees jbd2_journal_dirty_metadata() will be
998
+ * ever called for it. So we need to set jbddirty bit here to
999
+ * make sure the buffer is dirtied and written out when the
1000
+ * journaling machinery is done with it.
1001
+ */
8891002 set_buffer_jbddirty(bh);
8901003 }
8911004
892
- unlock_buffer(bh);
893
-
8941005 error = -EROFS;
8951006 if (is_handle_aborted(handle)) {
896
- jbd_unlock_bh_state(bh);
1007
+ spin_unlock(&jh->b_state_lock);
1008
+ unlock_buffer(bh);
8971009 goto out;
8981010 }
8991011 error = 0;
....@@ -903,14 +1015,16 @@
9031015 * b_next_transaction points to it
9041016 */
9051017 if (jh->b_transaction == transaction ||
906
- jh->b_next_transaction == transaction)
1018
+ jh->b_next_transaction == transaction) {
1019
+ unlock_buffer(bh);
9071020 goto done;
1021
+ }
9081022
9091023 /*
9101024 * this is the first time this transaction is touching this buffer,
9111025 * reset the modified flag
9121026 */
913
- jh->b_modified = 0;
1027
+ jh->b_modified = 0;
9141028
9151029 /*
9161030 * If the buffer is not journaled right now, we need to make sure it
....@@ -928,10 +1042,24 @@
9281042 */
9291043 smp_wmb();
9301044 spin_lock(&journal->j_list_lock);
1045
+ if (test_clear_buffer_dirty(bh)) {
1046
+ /*
1047
+ * Execute buffer dirty clearing and jh->b_transaction
1048
+ * assignment under journal->j_list_lock locked to
1049
+ * prevent bh being removed from checkpoint list if
1050
+ * the buffer is in an intermediate state (not dirty
1051
+ * and jh->b_transaction is NULL).
1052
+ */
1053
+ JBUFFER_TRACE(jh, "Journalling dirty buffer");
1054
+ set_buffer_jbddirty(bh);
1055
+ }
9311056 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
9321057 spin_unlock(&journal->j_list_lock);
1058
+ unlock_buffer(bh);
9331059 goto done;
9341060 }
1061
+ unlock_buffer(bh);
1062
+
9351063 /*
9361064 * If there is already a copy-out version of this buffer, then we don't
9371065 * need to make another one
....@@ -957,7 +1085,7 @@
9571085 */
9581086 if (buffer_shadow(bh)) {
9591087 JBUFFER_TRACE(jh, "on shadow: sleep");
960
- jbd_unlock_bh_state(bh);
1088
+ spin_unlock(&jh->b_state_lock);
9611089 wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE);
9621090 goto repeat;
9631091 }
....@@ -978,7 +1106,7 @@
9781106 JBUFFER_TRACE(jh, "generate frozen data");
9791107 if (!frozen_buffer) {
9801108 JBUFFER_TRACE(jh, "allocate memory for buffer");
981
- jbd_unlock_bh_state(bh);
1109
+ spin_unlock(&jh->b_state_lock);
9821110 frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size,
9831111 GFP_NOFS | __GFP_NOFAIL);
9841112 goto repeat;
....@@ -997,7 +1125,7 @@
9971125 jh->b_next_transaction = transaction;
9981126
9991127 done:
1000
- jbd_unlock_bh_state(bh);
1128
+ spin_unlock(&jh->b_state_lock);
10011129
10021130 /*
10031131 * If we are about to journal a buffer, then any revoke pending on it is
....@@ -1067,7 +1195,8 @@
10671195 }
10681196
10691197 /**
1070
- * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
1198
+ * jbd2_journal_get_write_access() - notify intent to modify a buffer
1199
+ * for metadata (not data) update.
10711200 * @handle: transaction to add buffer modifications to
10721201 * @bh: bh to be used for metadata writes
10731202 *
....@@ -1111,7 +1240,7 @@
11111240 * unlocked buffer beforehand. */
11121241
11131242 /**
1114
- * int jbd2_journal_get_create_access () - notify intent to use newly created bh
1243
+ * jbd2_journal_get_create_access () - notify intent to use newly created bh
11151244 * @handle: transaction to new buffer to
11161245 * @bh: new buffer.
11171246 *
....@@ -1139,7 +1268,7 @@
11391268 * that case: the transaction must have deleted the buffer for it to be
11401269 * reused here.
11411270 */
1142
- jbd_lock_bh_state(bh);
1271
+ spin_lock(&jh->b_state_lock);
11431272 J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
11441273 jh->b_transaction == NULL ||
11451274 (jh->b_transaction == journal->j_committing_transaction &&
....@@ -1174,7 +1303,7 @@
11741303 jh->b_next_transaction = transaction;
11751304 spin_unlock(&journal->j_list_lock);
11761305 }
1177
- jbd_unlock_bh_state(bh);
1306
+ spin_unlock(&jh->b_state_lock);
11781307
11791308 /*
11801309 * akpm: I added this. ext3_alloc_branch can pick up new indirect
....@@ -1191,7 +1320,7 @@
11911320 }
11921321
11931322 /**
1194
- * int jbd2_journal_get_undo_access() - Notify intent to modify metadata with
1323
+ * jbd2_journal_get_undo_access() - Notify intent to modify metadata with
11951324 * non-rewindable consequences
11961325 * @handle: transaction
11971326 * @bh: buffer to undo
....@@ -1245,13 +1374,13 @@
12451374 committed_data = jbd2_alloc(jh2bh(jh)->b_size,
12461375 GFP_NOFS|__GFP_NOFAIL);
12471376
1248
- jbd_lock_bh_state(bh);
1377
+ spin_lock(&jh->b_state_lock);
12491378 if (!jh->b_committed_data) {
12501379 /* Copy out the current buffer contents into the
12511380 * preserved, committed copy. */
12521381 JBUFFER_TRACE(jh, "generate b_committed data");
12531382 if (!committed_data) {
1254
- jbd_unlock_bh_state(bh);
1383
+ spin_unlock(&jh->b_state_lock);
12551384 goto repeat;
12561385 }
12571386
....@@ -1259,7 +1388,7 @@
12591388 committed_data = NULL;
12601389 memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
12611390 }
1262
- jbd_unlock_bh_state(bh);
1391
+ spin_unlock(&jh->b_state_lock);
12631392 out:
12641393 jbd2_journal_put_journal_head(jh);
12651394 if (unlikely(committed_data))
....@@ -1268,7 +1397,7 @@
12681397 }
12691398
12701399 /**
1271
- * void jbd2_journal_set_triggers() - Add triggers for commit writeout
1400
+ * jbd2_journal_set_triggers() - Add triggers for commit writeout
12721401 * @bh: buffer to trigger on
12731402 * @type: struct jbd2_buffer_trigger_type containing the trigger(s).
12741403 *
....@@ -1310,7 +1439,7 @@
13101439 }
13111440
13121441 /**
1313
- * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
1442
+ * jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
13141443 * @handle: transaction to add buffer to.
13151444 * @bh: buffer to mark
13161445 *
....@@ -1339,8 +1468,6 @@
13391468 struct journal_head *jh;
13401469 int ret = 0;
13411470
1342
- if (is_handle_aborted(handle))
1343
- return -EROFS;
13441471 if (!buffer_jbd(bh))
13451472 return -EUCLEAN;
13461473
....@@ -1358,18 +1485,18 @@
13581485 * crucial to catch bugs so let's do a reliable check until the
13591486 * lockless handling is fully proven.
13601487 */
1361
- if (jh->b_transaction != transaction &&
1362
- jh->b_next_transaction != transaction) {
1363
- jbd_lock_bh_state(bh);
1488
+ if (data_race(jh->b_transaction != transaction &&
1489
+ jh->b_next_transaction != transaction)) {
1490
+ spin_lock(&jh->b_state_lock);
13641491 J_ASSERT_JH(jh, jh->b_transaction == transaction ||
13651492 jh->b_next_transaction == transaction);
1366
- jbd_unlock_bh_state(bh);
1493
+ spin_unlock(&jh->b_state_lock);
13671494 }
13681495 if (jh->b_modified == 1) {
13691496 /* If it's in our transaction it must be in BJ_Metadata list. */
1370
- if (jh->b_transaction == transaction &&
1371
- jh->b_jlist != BJ_Metadata) {
1372
- jbd_lock_bh_state(bh);
1497
+ if (data_race(jh->b_transaction == transaction &&
1498
+ jh->b_jlist != BJ_Metadata)) {
1499
+ spin_lock(&jh->b_state_lock);
13731500 if (jh->b_transaction == transaction &&
13741501 jh->b_jlist != BJ_Metadata)
13751502 pr_err("JBD2: assertion failure: h_type=%u "
....@@ -1379,13 +1506,25 @@
13791506 jh->b_jlist);
13801507 J_ASSERT_JH(jh, jh->b_transaction != transaction ||
13811508 jh->b_jlist == BJ_Metadata);
1382
- jbd_unlock_bh_state(bh);
1509
+ spin_unlock(&jh->b_state_lock);
13831510 }
13841511 goto out;
13851512 }
13861513
13871514 journal = transaction->t_journal;
1388
- jbd_lock_bh_state(bh);
1515
+ spin_lock(&jh->b_state_lock);
1516
+
1517
+ if (is_handle_aborted(handle)) {
1518
+ /*
1519
+ * Check journal aborting with @jh->b_state_lock locked,
1520
+ * since 'jh->b_transaction' could be replaced with
1521
+ * 'jh->b_next_transaction' during old transaction
1522
+ * committing if journal aborted, which may fail
1523
+ * assertion on 'jh->b_frozen_data == NULL'.
1524
+ */
1525
+ ret = -EROFS;
1526
+ goto out_unlock_bh;
1527
+ }
13891528
13901529 if (jh->b_modified == 0) {
13911530 /*
....@@ -1393,12 +1532,12 @@
13931532 * of the transaction. This needs to be done
13941533 * once a transaction -bzzz
13951534 */
1396
- if (handle->h_buffer_credits <= 0) {
1535
+ if (WARN_ON_ONCE(jbd2_handle_buffer_credits(handle) <= 0)) {
13971536 ret = -ENOSPC;
13981537 goto out_unlock_bh;
13991538 }
14001539 jh->b_modified = 1;
1401
- handle->h_buffer_credits--;
1540
+ handle->h_total_credits--;
14021541 }
14031542
14041543 /*
....@@ -1471,14 +1610,14 @@
14711610 __jbd2_journal_file_buffer(jh, transaction, BJ_Metadata);
14721611 spin_unlock(&journal->j_list_lock);
14731612 out_unlock_bh:
1474
- jbd_unlock_bh_state(bh);
1613
+ spin_unlock(&jh->b_state_lock);
14751614 out:
14761615 JBUFFER_TRACE(jh, "exit");
14771616 return ret;
14781617 }
14791618
14801619 /**
1481
- * void jbd2_journal_forget() - bforget() for potentially-journaled buffers.
1620
+ * jbd2_journal_forget() - bforget() for potentially-journaled buffers.
14821621 * @handle: transaction handle
14831622 * @bh: bh to 'forget'
14841623 *
....@@ -1494,7 +1633,7 @@
14941633 * Allow this call even if the handle has aborted --- it may be part of
14951634 * the caller's cleanup after an abort.
14961635 */
1497
-int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1636
+int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
14981637 {
14991638 transaction_t *transaction = handle->h_transaction;
15001639 journal_t *journal;
....@@ -1509,18 +1648,20 @@
15091648
15101649 BUFFER_TRACE(bh, "entry");
15111650
1512
- jbd_lock_bh_state(bh);
1651
+ jh = jbd2_journal_grab_journal_head(bh);
1652
+ if (!jh) {
1653
+ __bforget(bh);
1654
+ return 0;
1655
+ }
15131656
1514
- if (!buffer_jbd(bh))
1515
- goto not_jbd;
1516
- jh = bh2jh(bh);
1657
+ spin_lock(&jh->b_state_lock);
15171658
15181659 /* Critical error: attempting to delete a bitmap buffer, maybe?
15191660 * Don't do any jbd operations, and return an error. */
15201661 if (!J_EXPECT_JH(jh, !jh->b_committed_data,
15211662 "inconsistent data on disk")) {
15221663 err = -EIO;
1523
- goto not_jbd;
1664
+ goto drop;
15241665 }
15251666
15261667 /* keep track of whether or not this transaction modified us */
....@@ -1568,12 +1709,7 @@
15681709 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
15691710 } else {
15701711 __jbd2_journal_unfile_buffer(jh);
1571
- if (!buffer_jbd(bh)) {
1572
- spin_unlock(&journal->j_list_lock);
1573
- jbd_unlock_bh_state(bh);
1574
- __bforget(bh);
1575
- goto drop;
1576
- }
1712
+ jbd2_journal_put_journal_head(jh);
15771713 }
15781714 spin_unlock(&journal->j_list_lock);
15791715 } else if (jh->b_transaction) {
....@@ -1605,21 +1741,52 @@
16051741 if (was_modified)
16061742 drop_reserve = 1;
16071743 }
1608
- }
1744
+ } else {
1745
+ /*
1746
+ * Finally, if the buffer is not belongs to any
1747
+ * transaction, we can just drop it now if it has no
1748
+ * checkpoint.
1749
+ */
1750
+ spin_lock(&journal->j_list_lock);
1751
+ if (!jh->b_cp_transaction) {
1752
+ JBUFFER_TRACE(jh, "belongs to none transaction");
1753
+ spin_unlock(&journal->j_list_lock);
1754
+ goto drop;
1755
+ }
16091756
1610
-not_jbd:
1611
- jbd_unlock_bh_state(bh);
1612
- __brelse(bh);
1757
+ /*
1758
+ * Otherwise, if the buffer has been written to disk,
1759
+ * it is safe to remove the checkpoint and drop it.
1760
+ */
1761
+ if (!buffer_dirty(bh)) {
1762
+ __jbd2_journal_remove_checkpoint(jh);
1763
+ spin_unlock(&journal->j_list_lock);
1764
+ goto drop;
1765
+ }
1766
+
1767
+ /*
1768
+ * The buffer is still not written to disk, we should
1769
+ * attach this buffer to current transaction so that the
1770
+ * buffer can be checkpointed only after the current
1771
+ * transaction commits.
1772
+ */
1773
+ clear_buffer_dirty(bh);
1774
+ __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
1775
+ spin_unlock(&journal->j_list_lock);
1776
+ }
16131777 drop:
1778
+ __brelse(bh);
1779
+ spin_unlock(&jh->b_state_lock);
1780
+ jbd2_journal_put_journal_head(jh);
16141781 if (drop_reserve) {
16151782 /* no need to reserve log space for this block -bzzz */
1616
- handle->h_buffer_credits++;
1783
+ handle->h_total_credits++;
16171784 }
16181785 return err;
16191786 }
16201787
16211788 /**
1622
- * int jbd2_journal_stop() - complete a transaction
1789
+ * jbd2_journal_stop() - complete a transaction
16231790 * @handle: transaction to complete.
16241791 *
16251792 * All done for a particular handle.
....@@ -1642,45 +1809,34 @@
16421809 tid_t tid;
16431810 pid_t pid;
16441811
1812
+ if (--handle->h_ref > 0) {
1813
+ jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
1814
+ handle->h_ref);
1815
+ if (is_handle_aborted(handle))
1816
+ return -EIO;
1817
+ return 0;
1818
+ }
16451819 if (!transaction) {
16461820 /*
1647
- * Handle is already detached from the transaction so
1648
- * there is nothing to do other than decrease a refcount,
1649
- * or free the handle if refcount drops to zero
1821
+ * Handle is already detached from the transaction so there is
1822
+ * nothing to do other than free the handle.
16501823 */
1651
- if (--handle->h_ref > 0) {
1652
- jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
1653
- handle->h_ref);
1654
- return err;
1655
- } else {
1656
- if (handle->h_rsv_handle)
1657
- jbd2_free_handle(handle->h_rsv_handle);
1658
- goto free_and_exit;
1659
- }
1824
+ memalloc_nofs_restore(handle->saved_alloc_context);
1825
+ goto free_and_exit;
16601826 }
16611827 journal = transaction->t_journal;
1662
-
1663
- J_ASSERT(journal_current_handle() == handle);
1828
+ tid = transaction->t_tid;
16641829
16651830 if (is_handle_aborted(handle))
16661831 err = -EIO;
1667
- else
1668
- J_ASSERT(atomic_read(&transaction->t_updates) > 0);
1669
-
1670
- if (--handle->h_ref > 0) {
1671
- jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
1672
- handle->h_ref);
1673
- return err;
1674
- }
16751832
16761833 jbd_debug(4, "Handle %p going down\n", handle);
16771834 trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev,
1678
- transaction->t_tid,
1679
- handle->h_type, handle->h_line_no,
1835
+ tid, handle->h_type, handle->h_line_no,
16801836 jiffies - handle->h_start_jiffies,
16811837 handle->h_sync, handle->h_requested_credits,
16821838 (handle->h_requested_credits -
1683
- handle->h_buffer_credits));
1839
+ handle->h_total_credits));
16841840
16851841 /*
16861842 * Implement synchronous transaction batching. If the handle
....@@ -1740,19 +1896,13 @@
17401896
17411897 if (handle->h_sync)
17421898 transaction->t_synchronous_commit = 1;
1743
- current->journal_info = NULL;
1744
- atomic_sub(handle->h_buffer_credits,
1745
- &transaction->t_outstanding_credits);
17461899
17471900 /*
17481901 * If the handle is marked SYNC, we need to set another commit
1749
- * going! We also want to force a commit if the current
1750
- * transaction is occupying too much of the log, or if the
1751
- * transaction is too old now.
1902
+ * going! We also want to force a commit if the transaction is too
1903
+ * old now.
17521904 */
17531905 if (handle->h_sync ||
1754
- (atomic_read(&transaction->t_outstanding_credits) >
1755
- journal->j_max_transaction_buffers) ||
17561906 time_after_eq(jiffies, transaction->t_expires)) {
17571907 /* Do this even for aborted journals: an abort still
17581908 * completes the commit thread, it just doesn't write
....@@ -1761,7 +1911,7 @@
17611911 jbd_debug(2, "transaction too old, requesting commit for "
17621912 "handle %p\n", handle);
17631913 /* This is non-blocking */
1764
- jbd2_log_start_commit(journal, transaction->t_tid);
1914
+ jbd2_log_start_commit(journal, tid);
17651915
17661916 /*
17671917 * Special case: JBD2_SYNC synchronous updates require us
....@@ -1772,31 +1922,19 @@
17721922 }
17731923
17741924 /*
1775
- * Once we drop t_updates, if it goes to zero the transaction
1776
- * could start committing on us and eventually disappear. So
1777
- * once we do this, we must not dereference transaction
1778
- * pointer again.
1925
+ * Once stop_this_handle() drops t_updates, the transaction could start
1926
+ * committing on us and eventually disappear. So we must not
1927
+ * dereference transaction pointer again after calling
1928
+ * stop_this_handle().
17791929 */
1780
- tid = transaction->t_tid;
1781
- if (atomic_dec_and_test(&transaction->t_updates)) {
1782
- wake_up(&journal->j_wait_updates);
1783
- if (journal->j_barrier_count)
1784
- wake_up(&journal->j_wait_transaction_locked);
1785
- }
1786
-
1787
- rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_);
1930
+ stop_this_handle(handle);
17881931
17891932 if (wait_for_commit)
17901933 err = jbd2_log_wait_commit(journal, tid);
17911934
1792
- if (handle->h_rsv_handle)
1793
- jbd2_journal_free_reserved(handle->h_rsv_handle);
17941935 free_and_exit:
1795
- /*
1796
- * Scope of the GFP_NOFS context is over here and so we can restore the
1797
- * original alloc context.
1798
- */
1799
- memalloc_nofs_restore(handle->saved_alloc_context);
1936
+ if (handle->h_rsv_handle)
1937
+ jbd2_free_handle(handle->h_rsv_handle);
18001938 jbd2_free_handle(handle);
18011939 return err;
18021940 }
....@@ -1814,7 +1952,7 @@
18141952 *
18151953 * j_list_lock is held.
18161954 *
1817
- * jbd_lock_bh_state(jh2bh(jh)) is held.
1955
+ * jh->b_state_lock is held.
18181956 */
18191957
18201958 static inline void
....@@ -1838,7 +1976,7 @@
18381976 *
18391977 * Called with j_list_lock held, and the journal may not be locked.
18401978 *
1841
- * jbd_lock_bh_state(jh2bh(jh)) is held.
1979
+ * jh->b_state_lock is held.
18421980 */
18431981
18441982 static inline void
....@@ -1870,7 +2008,7 @@
18702008 transaction_t *transaction;
18712009 struct buffer_head *bh = jh2bh(jh);
18722010
1873
- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
2011
+ lockdep_assert_held(&jh->b_state_lock);
18742012 transaction = jh->b_transaction;
18752013 if (transaction)
18762014 assert_spin_locked(&transaction->t_journal->j_list_lock);
....@@ -1907,11 +2045,10 @@
19072045 }
19082046
19092047 /*
1910
- * Remove buffer from all transactions.
2048
+ * Remove buffer from all transactions. The caller is responsible for dropping
2049
+ * the jh reference that belonged to the transaction.
19112050 *
19122051 * Called with bh_state lock and j_list_lock
1913
- *
1914
- * jh and bh may be already freed when this function returns.
19152052 */
19162053 static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
19172054 {
....@@ -1920,7 +2057,6 @@
19202057
19212058 __jbd2_journal_temp_unlink_buffer(jh);
19222059 jh->b_transaction = NULL;
1923
- jbd2_journal_put_journal_head(jh);
19242060 }
19252061
19262062 void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
....@@ -1929,18 +2065,19 @@
19292065
19302066 /* Get reference so that buffer cannot be freed before we unlock it */
19312067 get_bh(bh);
1932
- jbd_lock_bh_state(bh);
2068
+ spin_lock(&jh->b_state_lock);
19332069 spin_lock(&journal->j_list_lock);
19342070 __jbd2_journal_unfile_buffer(jh);
19352071 spin_unlock(&journal->j_list_lock);
1936
- jbd_unlock_bh_state(bh);
2072
+ spin_unlock(&jh->b_state_lock);
2073
+ jbd2_journal_put_journal_head(jh);
19372074 __brelse(bh);
19382075 }
19392076
19402077 /*
19412078 * Called from jbd2_journal_try_to_free_buffers().
19422079 *
1943
- * Called under jbd_lock_bh_state(bh)
2080
+ * Called under jh->b_state_lock
19442081 */
19452082 static void
19462083 __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
....@@ -1967,13 +2104,9 @@
19672104 }
19682105
19692106 /**
1970
- * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
2107
+ * jbd2_journal_try_to_free_buffers() - try to free page buffers.
19712108 * @journal: journal for operation
19722109 * @page: to try and free
1973
- * @gfp_mask: we use the mask to detect how hard should we try to release
1974
- * buffers. If __GFP_DIRECT_RECLAIM and __GFP_FS is set, we wait for commit
1975
- * code to release the buffers.
1976
- *
19772110 *
19782111 * For all the buffers on this page,
19792112 * if they are fully written out ordered data, move them onto BUF_CLEAN
....@@ -2004,8 +2137,7 @@
20042137 *
20052138 * Return 0 on failure, 1 on success
20062139 */
2007
-int jbd2_journal_try_to_free_buffers(journal_t *journal,
2008
- struct page *page, gfp_t gfp_mask)
2140
+int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page)
20092141 {
20102142 struct buffer_head *head;
20112143 struct buffer_head *bh;
....@@ -2028,10 +2160,10 @@
20282160 if (!jh)
20292161 continue;
20302162
2031
- jbd_lock_bh_state(bh);
2163
+ spin_lock(&jh->b_state_lock);
20322164 __journal_try_to_free_buffer(journal, bh);
2165
+ spin_unlock(&jh->b_state_lock);
20332166 jbd2_journal_put_journal_head(jh);
2034
- jbd_unlock_bh_state(bh);
20352167 if (buffer_jbd(bh))
20362168 goto busy;
20372169
....@@ -2067,7 +2199,7 @@
20672199 *
20682200 * Called under j_list_lock.
20692201 *
2070
- * Called under jbd_lock_bh_state(bh).
2202
+ * Called under jh->b_state_lock.
20712203 */
20722204 static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
20732205 {
....@@ -2088,6 +2220,7 @@
20882220 } else {
20892221 JBUFFER_TRACE(jh, "on running transaction");
20902222 __jbd2_journal_unfile_buffer(jh);
2223
+ jbd2_journal_put_journal_head(jh);
20912224 }
20922225 return may_free;
20932226 }
....@@ -2154,17 +2287,14 @@
21542287 * holding the page lock. --sct
21552288 */
21562289
2157
- if (!buffer_jbd(bh))
2290
+ jh = jbd2_journal_grab_journal_head(bh);
2291
+ if (!jh)
21582292 goto zap_buffer_unlocked;
21592293
21602294 /* OK, we have data buffer in journaled mode */
21612295 write_lock(&journal->j_state_lock);
2162
- jbd_lock_bh_state(bh);
2296
+ spin_lock(&jh->b_state_lock);
21632297 spin_lock(&journal->j_list_lock);
2164
-
2165
- jh = jbd2_journal_grab_journal_head(bh);
2166
- if (!jh)
2167
- goto zap_buffer_no_jh;
21682298
21692299 /*
21702300 * We cannot remove the buffer from checkpoint lists until the
....@@ -2244,10 +2374,13 @@
22442374 * for commit and try again.
22452375 */
22462376 if (partial_page) {
2247
- jbd2_journal_put_journal_head(jh);
22482377 spin_unlock(&journal->j_list_lock);
2249
- jbd_unlock_bh_state(bh);
2378
+ spin_unlock(&jh->b_state_lock);
22502379 write_unlock(&journal->j_state_lock);
2380
+ jbd2_journal_put_journal_head(jh);
2381
+ /* Already zapped buffer? Nothing to do... */
2382
+ if (!bh->b_bdev)
2383
+ return 0;
22512384 return -EBUSY;
22522385 }
22532386 /*
....@@ -2261,10 +2394,10 @@
22612394 if (journal->j_running_transaction && buffer_jbddirty(bh))
22622395 jh->b_next_transaction = journal->j_running_transaction;
22632396 jh->b_modified = 0;
2264
- jbd2_journal_put_journal_head(jh);
22652397 spin_unlock(&journal->j_list_lock);
2266
- jbd_unlock_bh_state(bh);
2398
+ spin_unlock(&jh->b_state_lock);
22672399 write_unlock(&journal->j_state_lock);
2400
+ jbd2_journal_put_journal_head(jh);
22682401 return 0;
22692402 } else {
22702403 /* Good, the buffer belongs to the running transaction.
....@@ -2288,11 +2421,10 @@
22882421 * here.
22892422 */
22902423 jh->b_modified = 0;
2291
- jbd2_journal_put_journal_head(jh);
2292
-zap_buffer_no_jh:
22932424 spin_unlock(&journal->j_list_lock);
2294
- jbd_unlock_bh_state(bh);
2425
+ spin_unlock(&jh->b_state_lock);
22952426 write_unlock(&journal->j_state_lock);
2427
+ jbd2_journal_put_journal_head(jh);
22962428 zap_buffer_unlocked:
22972429 clear_buffer_dirty(bh);
22982430 J_ASSERT_BH(bh, !buffer_jbddirty(bh));
....@@ -2306,7 +2438,7 @@
23062438 }
23072439
23082440 /**
2309
- * void jbd2_journal_invalidatepage()
2441
+ * jbd2_journal_invalidatepage()
23102442 * @journal: journal to use for flush...
23112443 * @page: page to flush
23122444 * @offset: start of the range to invalidate
....@@ -2379,7 +2511,7 @@
23792511 int was_dirty = 0;
23802512 struct buffer_head *bh = jh2bh(jh);
23812513
2382
- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
2514
+ lockdep_assert_held(&jh->b_state_lock);
23832515 assert_spin_locked(&transaction->t_journal->j_list_lock);
23842516
23852517 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
....@@ -2441,11 +2573,11 @@
24412573 void jbd2_journal_file_buffer(struct journal_head *jh,
24422574 transaction_t *transaction, int jlist)
24432575 {
2444
- jbd_lock_bh_state(jh2bh(jh));
2576
+ spin_lock(&jh->b_state_lock);
24452577 spin_lock(&transaction->t_journal->j_list_lock);
24462578 __jbd2_journal_file_buffer(jh, transaction, jlist);
24472579 spin_unlock(&transaction->t_journal->j_list_lock);
2448
- jbd_unlock_bh_state(jh2bh(jh));
2580
+ spin_unlock(&jh->b_state_lock);
24492581 }
24502582
24512583 /*
....@@ -2455,23 +2587,25 @@
24552587 * buffer on that transaction's metadata list.
24562588 *
24572589 * Called under j_list_lock
2458
- * Called under jbd_lock_bh_state(jh2bh(jh))
2590
+ * Called under jh->b_state_lock
24592591 *
2460
- * jh and bh may be already free when this function returns
2592
+ * When this function returns true, there's no next transaction to refile to
2593
+ * and the caller has to drop jh reference through
2594
+ * jbd2_journal_put_journal_head().
24612595 */
2462
-void __jbd2_journal_refile_buffer(struct journal_head *jh)
2596
+bool __jbd2_journal_refile_buffer(struct journal_head *jh)
24632597 {
24642598 int was_dirty, jlist;
24652599 struct buffer_head *bh = jh2bh(jh);
24662600
2467
- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
2601
+ lockdep_assert_held(&jh->b_state_lock);
24682602 if (jh->b_transaction)
24692603 assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
24702604
24712605 /* If the buffer is now unused, just drop it. */
24722606 if (jh->b_next_transaction == NULL) {
24732607 __jbd2_journal_unfile_buffer(jh);
2474
- return;
2608
+ return true;
24752609 }
24762610
24772611 /*
....@@ -2506,6 +2640,7 @@
25062640
25072641 if (was_dirty)
25082642 set_buffer_jbddirty(bh);
2643
+ return false;
25092644 }
25102645
25112646 /*
....@@ -2516,16 +2651,15 @@
25162651 */
25172652 void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
25182653 {
2519
- struct buffer_head *bh = jh2bh(jh);
2654
+ bool drop;
25202655
2521
- /* Get reference so that buffer cannot be freed before we unlock it */
2522
- get_bh(bh);
2523
- jbd_lock_bh_state(bh);
2656
+ spin_lock(&jh->b_state_lock);
25242657 spin_lock(&journal->j_list_lock);
2525
- __jbd2_journal_refile_buffer(jh);
2526
- jbd_unlock_bh_state(bh);
2658
+ drop = __jbd2_journal_refile_buffer(jh);
2659
+ spin_unlock(&jh->b_state_lock);
25272660 spin_unlock(&journal->j_list_lock);
2528
- __brelse(bh);
2661
+ if (drop)
2662
+ jbd2_journal_put_journal_head(jh);
25292663 }
25302664
25312665 /*
....@@ -2584,18 +2718,6 @@
25842718 spin_unlock(&journal->j_list_lock);
25852719
25862720 return 0;
2587
-}
2588
-
2589
-int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode)
2590
-{
2591
- return jbd2_journal_file_inode(handle, jinode,
2592
- JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX);
2593
-}
2594
-
2595
-int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode)
2596
-{
2597
- return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0,
2598
- LLONG_MAX);
25992721 }
26002722
26012723 int jbd2_journal_inode_ranged_write(handle_t *handle,