hc
2024-05-14 bedbef8ad3e75a304af6361af235302bcc61d06b
kernel/fs/jbd2/journal.c
....@@ -66,9 +66,6 @@
6666 EXPORT_SYMBOL(jbd2_journal_set_triggers);
6767 EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
6868 EXPORT_SYMBOL(jbd2_journal_forget);
69
-#if 0
70
-EXPORT_SYMBOL(journal_sync_buffer);
71
-#endif
7269 EXPORT_SYMBOL(jbd2_journal_flush);
7370 EXPORT_SYMBOL(jbd2_journal_revoke);
7471
....@@ -92,16 +89,15 @@
9289 EXPORT_SYMBOL(jbd2_journal_invalidatepage);
9390 EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
9491 EXPORT_SYMBOL(jbd2_journal_force_commit);
95
-EXPORT_SYMBOL(jbd2_journal_inode_add_write);
96
-EXPORT_SYMBOL(jbd2_journal_inode_add_wait);
9792 EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
9893 EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
94
+EXPORT_SYMBOL(jbd2_journal_submit_inode_data_buffers);
95
+EXPORT_SYMBOL(jbd2_journal_finish_inode_data_buffers);
9996 EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
10097 EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
10198 EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
10299 EXPORT_SYMBOL(jbd2_inode_cache);
103100
104
-static void __journal_abort_soft (journal_t *journal, int errno);
105101 static int jbd2_journal_create_slab(size_t slab_size);
106102
107103 #ifdef CONFIG_JBD2_DEBUG
....@@ -144,22 +140,6 @@
144140 return cpu_to_be32(csum);
145141 }
146142
147
-static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
148
-{
149
- if (!jbd2_journal_has_csum_v2or3(j))
150
- return 1;
151
-
152
- return sb->s_checksum == jbd2_superblock_csum(j, sb);
153
-}
154
-
155
-static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb)
156
-{
157
- if (!jbd2_journal_has_csum_v2or3(j))
158
- return;
159
-
160
- sb->s_checksum = jbd2_superblock_csum(j, sb);
161
-}
162
-
163143 /*
164144 * Helper function used to manage commit timeouts
165145 */
....@@ -179,7 +159,9 @@
179159 *
180160 * 1) COMMIT: Every so often we need to commit the current state of the
181161 * filesystem to disk. The journal thread is responsible for writing
182
- * all of the metadata buffers to disk.
162
+ * all of the metadata buffers to disk. If a fast commit is ongoing
163
+ * journal thread waits until it's done and then continues from
164
+ * there on.
183165 *
184166 * 2) CHECKPOINT: We cannot reuse a used section of the log file until all
185167 * of the data in that part of the log has been rewritten elsewhere on
....@@ -221,7 +203,7 @@
221203 if (journal->j_flags & JBD2_UNMOUNT)
222204 goto end_loop;
223205
224
- jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
206
+ jbd_debug(1, "commit_sequence=%u, commit_request=%u\n",
225207 journal->j_commit_sequence, journal->j_commit_request);
226208
227209 if (journal->j_commit_sequence != journal->j_commit_request) {
....@@ -342,7 +324,7 @@
342324 * IO is in progress. do_get_write_access() handles this.
343325 *
344326 * The function returns a pointer to the buffer_head to be used for IO.
345
- *
327
+ *
346328 *
347329 * Return value:
348330 * <0: Error
....@@ -384,7 +366,7 @@
384366 /* keep subsequent assertions sane */
385367 atomic_set(&new_bh->b_count, 1);
386368
387
- jbd_lock_bh_state(bh_in);
369
+ spin_lock(&jh_in->b_state_lock);
388370 repeat:
389371 /*
390372 * If a new transaction has already done a buffer copy-out, then
....@@ -426,13 +408,13 @@
426408 if (need_copy_out && !done_copy_out) {
427409 char *tmp;
428410
429
- jbd_unlock_bh_state(bh_in);
411
+ spin_unlock(&jh_in->b_state_lock);
430412 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
431413 if (!tmp) {
432414 brelse(new_bh);
433415 return -ENOMEM;
434416 }
435
- jbd_lock_bh_state(bh_in);
417
+ spin_lock(&jh_in->b_state_lock);
436418 if (jh_in->b_frozen_data) {
437419 jbd2_free(tmp, bh_in->b_size);
438420 goto repeat;
....@@ -485,7 +467,7 @@
485467 __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
486468 spin_unlock(&journal->j_list_lock);
487469 set_buffer_shadow(bh_in);
488
- jbd_unlock_bh_state(bh_in);
470
+ spin_unlock(&jh_in->b_state_lock);
489471
490472 return do_escape | (done_copy_out << 1);
491473 }
....@@ -518,7 +500,7 @@
518500 */
519501
520502 journal->j_commit_request = target;
521
- jbd_debug(1, "JBD2: requesting commit %d/%d\n",
503
+ jbd_debug(1, "JBD2: requesting commit %u/%u\n",
522504 journal->j_commit_request,
523505 journal->j_commit_sequence);
524506 journal->j_running_transaction->t_requested = jiffies;
....@@ -531,7 +513,7 @@
531513 WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n",
532514 journal->j_commit_request,
533515 journal->j_commit_sequence,
534
- target, journal->j_running_transaction ?
516
+ target, journal->j_running_transaction ?
535517 journal->j_running_transaction->t_tid : 0);
536518 return 0;
537519 }
....@@ -584,12 +566,14 @@
584566 }
585567
586568 /**
587
- * Force and wait upon a commit if the calling process is not within
588
- * transaction. This is used for forcing out undo-protected data which contains
589
- * bitmaps, when the fs is running out of space.
569
+ * jbd2_journal_force_commit_nested - Force and wait upon a commit if the
570
+ * calling process is not within transaction.
590571 *
591572 * @journal: journal to force
592573 * Returns true if progress was made.
574
+ *
575
+ * This is used for forcing out undo-protected data which contains
576
+ * bitmaps, when the fs is running out of space.
593577 */
594578 int jbd2_journal_force_commit_nested(journal_t *journal)
595579 {
....@@ -600,7 +584,7 @@
600584 }
601585
602586 /**
603
- * int journal_force_commit() - force any uncommitted transactions
587
+ * jbd2_journal_force_commit() - force any uncommitted transactions
604588 * @journal: journal to force
605589 *
606590 * Caller want unconditional commit. We can only force the running transaction
....@@ -716,12 +700,12 @@
716700 #ifdef CONFIG_JBD2_DEBUG
717701 if (!tid_geq(journal->j_commit_request, tid)) {
718702 printk(KERN_ERR
719
- "%s: error: j_commit_request=%d, tid=%d\n",
703
+ "%s: error: j_commit_request=%u, tid=%u\n",
720704 __func__, journal->j_commit_request, tid);
721705 }
722706 #endif
723707 while (tid_gt(tid, journal->j_commit_sequence)) {
724
- jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n",
708
+ jbd_debug(1, "JBD2: want %u, j_commit_sequence=%u\n",
725709 tid, journal->j_commit_sequence);
726710 read_unlock(&journal->j_state_lock);
727711 wake_up(&journal->j_wait_commit);
....@@ -735,6 +719,87 @@
735719 err = -EIO;
736720 return err;
737721 }
722
+
723
+/*
724
+ * Start a fast commit. If there's an ongoing fast or full commit wait for
725
+ * it to complete. Returns 0 if a new fast commit was started. Returns -EALREADY
726
+ * if a fast commit is not needed, either because there's an already a commit
727
+ * going on or this tid has already been committed. Returns -EINVAL if no jbd2
728
+ * commit has yet been performed.
729
+ */
730
+int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
731
+{
732
+ if (unlikely(is_journal_aborted(journal)))
733
+ return -EIO;
734
+ /*
735
+ * Fast commits only allowed if at least one full commit has
736
+ * been processed.
737
+ */
738
+ if (!journal->j_stats.ts_tid)
739
+ return -EINVAL;
740
+
741
+ write_lock(&journal->j_state_lock);
742
+ if (tid <= journal->j_commit_sequence) {
743
+ write_unlock(&journal->j_state_lock);
744
+ return -EALREADY;
745
+ }
746
+
747
+ if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
748
+ (journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) {
749
+ DEFINE_WAIT(wait);
750
+
751
+ prepare_to_wait(&journal->j_fc_wait, &wait,
752
+ TASK_UNINTERRUPTIBLE);
753
+ write_unlock(&journal->j_state_lock);
754
+ schedule();
755
+ finish_wait(&journal->j_fc_wait, &wait);
756
+ return -EALREADY;
757
+ }
758
+ journal->j_flags |= JBD2_FAST_COMMIT_ONGOING;
759
+ write_unlock(&journal->j_state_lock);
760
+ jbd2_journal_lock_updates(journal);
761
+
762
+ return 0;
763
+}
764
+EXPORT_SYMBOL(jbd2_fc_begin_commit);
765
+
766
+/*
767
+ * Stop a fast commit. If fallback is set, this function starts commit of
768
+ * TID tid before any other fast commit can start.
769
+ */
770
+static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
771
+{
772
+ jbd2_journal_unlock_updates(journal);
773
+ if (journal->j_fc_cleanup_callback)
774
+ journal->j_fc_cleanup_callback(journal, 0);
775
+ write_lock(&journal->j_state_lock);
776
+ journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
777
+ if (fallback)
778
+ journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
779
+ write_unlock(&journal->j_state_lock);
780
+ wake_up(&journal->j_fc_wait);
781
+ if (fallback)
782
+ return jbd2_complete_transaction(journal, tid);
783
+ return 0;
784
+}
785
+
786
+int jbd2_fc_end_commit(journal_t *journal)
787
+{
788
+ return __jbd2_fc_end_commit(journal, 0, false);
789
+}
790
+EXPORT_SYMBOL(jbd2_fc_end_commit);
791
+
792
+int jbd2_fc_end_commit_fallback(journal_t *journal)
793
+{
794
+ tid_t tid;
795
+
796
+ read_lock(&journal->j_state_lock);
797
+ tid = journal->j_running_transaction ?
798
+ journal->j_running_transaction->t_tid : 0;
799
+ read_unlock(&journal->j_state_lock);
800
+ return __jbd2_fc_end_commit(journal, tid, true);
801
+}
802
+EXPORT_SYMBOL(jbd2_fc_end_commit_fallback);
738803
739804 /* Return 1 when transaction with given tid has already committed. */
740805 int jbd2_transaction_committed(journal_t *journal, tid_t tid)
....@@ -804,6 +869,106 @@
804869 return jbd2_journal_bmap(journal, blocknr, retp);
805870 }
806871
872
+/* Map one fast commit buffer for use by the file system */
873
+int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
874
+{
875
+ unsigned long long pblock;
876
+ unsigned long blocknr;
877
+ int ret = 0;
878
+ struct buffer_head *bh;
879
+ int fc_off;
880
+
881
+ *bh_out = NULL;
882
+
883
+ if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) {
884
+ fc_off = journal->j_fc_off;
885
+ blocknr = journal->j_fc_first + fc_off;
886
+ journal->j_fc_off++;
887
+ } else {
888
+ ret = -EINVAL;
889
+ }
890
+
891
+ if (ret)
892
+ return ret;
893
+
894
+ ret = jbd2_journal_bmap(journal, blocknr, &pblock);
895
+ if (ret)
896
+ return ret;
897
+
898
+ bh = __getblk(journal->j_dev, pblock, journal->j_blocksize);
899
+ if (!bh)
900
+ return -ENOMEM;
901
+
902
+
903
+ journal->j_fc_wbuf[fc_off] = bh;
904
+
905
+ *bh_out = bh;
906
+
907
+ return 0;
908
+}
909
+EXPORT_SYMBOL(jbd2_fc_get_buf);
910
+
911
+/*
912
+ * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf
913
+ * for completion.
914
+ */
915
+int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
916
+{
917
+ struct buffer_head *bh;
918
+ int i, j_fc_off;
919
+
920
+ j_fc_off = journal->j_fc_off;
921
+
922
+ /*
923
+ * Wait in reverse order to minimize chances of us being woken up before
924
+ * all IOs have completed
925
+ */
926
+ for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) {
927
+ bh = journal->j_fc_wbuf[i];
928
+ wait_on_buffer(bh);
929
+ /*
930
+ * Update j_fc_off so jbd2_fc_release_bufs can release remain
931
+ * buffer head.
932
+ */
933
+ if (unlikely(!buffer_uptodate(bh))) {
934
+ journal->j_fc_off = i + 1;
935
+ return -EIO;
936
+ }
937
+ put_bh(bh);
938
+ journal->j_fc_wbuf[i] = NULL;
939
+ }
940
+
941
+ return 0;
942
+}
943
+EXPORT_SYMBOL(jbd2_fc_wait_bufs);
944
+
945
+/*
946
+ * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf
947
+ * for completion.
948
+ */
949
+int jbd2_fc_release_bufs(journal_t *journal)
950
+{
951
+ struct buffer_head *bh;
952
+ int i, j_fc_off;
953
+
954
+ j_fc_off = journal->j_fc_off;
955
+
956
+ /*
957
+ * Wait in reverse order to minimize chances of us being woken up before
958
+ * all IOs have completed
959
+ */
960
+ for (i = j_fc_off - 1; i >= 0; i--) {
961
+ bh = journal->j_fc_wbuf[i];
962
+ if (!bh)
963
+ break;
964
+ put_bh(bh);
965
+ journal->j_fc_wbuf[i] = NULL;
966
+ }
967
+
968
+ return 0;
969
+}
970
+EXPORT_SYMBOL(jbd2_fc_release_bufs);
971
+
807972 /*
808973 * Conversion of logical to physical block numbers for the journal
809974 *
....@@ -816,18 +981,22 @@
816981 {
817982 int err = 0;
818983 unsigned long long ret;
984
+ sector_t block = 0;
819985
820986 if (journal->j_inode) {
821
- ret = bmap(journal->j_inode, blocknr);
822
- if (ret)
823
- *retp = ret;
824
- else {
987
+ block = blocknr;
988
+ ret = bmap(journal->j_inode, &block);
989
+
990
+ if (ret || !block) {
825991 printk(KERN_ALERT "%s: journal block not found "
826992 "at offset %lu on %s\n",
827993 __func__, blocknr, journal->j_devname);
828994 err = -EIO;
829
- __journal_abort_soft(journal, err);
995
+ jbd2_journal_abort(journal, err);
996
+ } else {
997
+ *retp = block;
830998 }
999
+
8311000 } else {
8321001 *retp = blocknr; /* +journal->j_blk_offset */
8331002 }
....@@ -861,6 +1030,7 @@
8611030 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
8621031 if (!bh)
8631032 return NULL;
1033
+ atomic_dec(&transaction->t_outstanding_credits);
8641034 lock_buffer(bh);
8651035 memset(bh->b_data, 0, journal->j_blocksize);
8661036 header = (journal_header_t *)bh->b_data;
....@@ -962,7 +1132,7 @@
9621132
9631133 trace_jbd2_update_log_tail(journal, tid, block, freed);
9641134 jbd_debug(1,
965
- "Cleaning journal tail from %d to %d (offset %lu), "
1135
+ "Cleaning journal tail from %u to %u (offset %lu), "
9661136 "freeing %lu\n",
9671137 journal->j_tail_sequence, tid, block, freed);
9681138
....@@ -1095,12 +1265,11 @@
10951265 return seq_release(inode, file);
10961266 }
10971267
1098
-static const struct file_operations jbd2_seq_info_fops = {
1099
- .owner = THIS_MODULE,
1100
- .open = jbd2_seq_info_open,
1101
- .read = seq_read,
1102
- .llseek = seq_lseek,
1103
- .release = jbd2_seq_info_release,
1268
+static const struct proc_ops jbd2_info_proc_ops = {
1269
+ .proc_open = jbd2_seq_info_open,
1270
+ .proc_read = seq_read,
1271
+ .proc_lseek = seq_lseek,
1272
+ .proc_release = jbd2_seq_info_release,
11041273 };
11051274
11061275 static struct proc_dir_entry *proc_jbd2_stats;
....@@ -1110,7 +1279,7 @@
11101279 journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats);
11111280 if (journal->j_proc_entry) {
11121281 proc_create_data("info", S_IRUGO, journal->j_proc_entry,
1113
- &jbd2_seq_info_fops, journal);
1282
+ &jbd2_info_proc_ops, journal);
11141283 }
11151284 }
11161285
....@@ -1118,6 +1287,16 @@
11181287 {
11191288 remove_proc_entry("info", journal->j_proc_entry);
11201289 remove_proc_entry(journal->j_devname, proc_jbd2_stats);
1290
+}
1291
+
1292
+/* Minimum size of descriptor tag */
1293
+static int jbd2_min_tag_size(void)
1294
+{
1295
+ /*
1296
+ * Tag with 32-bit block numbers does not use last four bytes of the
1297
+ * structure
1298
+ */
1299
+ return sizeof(journal_block_tag_t) - 4;
11211300 }
11221301
11231302 /*
....@@ -1148,6 +1327,8 @@
11481327 init_waitqueue_head(&journal->j_wait_commit);
11491328 init_waitqueue_head(&journal->j_wait_updates);
11501329 init_waitqueue_head(&journal->j_wait_reserved);
1330
+ init_waitqueue_head(&journal->j_fc_wait);
1331
+ mutex_init(&journal->j_abort_mutex);
11511332 mutex_init(&journal->j_barrier);
11521333 mutex_init(&journal->j_checkpoint_mutex);
11531334 spin_lock_init(&journal->j_revoke_lock);
....@@ -1177,9 +1358,11 @@
11771358 journal->j_dev = bdev;
11781359 journal->j_fs_dev = fs_dev;
11791360 journal->j_blk_offset = start;
1180
- journal->j_maxlen = len;
1181
- n = journal->j_blocksize / sizeof(journal_block_tag_t);
1361
+ journal->j_total_len = len;
1362
+ /* We need enough buffers to write out full descriptor block. */
1363
+ n = journal->j_blocksize / jbd2_min_tag_size();
11821364 journal->j_wbufsize = n;
1365
+ journal->j_fc_wbuf = NULL;
11831366 journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
11841367 GFP_KERNEL);
11851368 if (!journal->j_wbuf)
....@@ -1254,11 +1437,14 @@
12541437 journal_t *jbd2_journal_init_inode(struct inode *inode)
12551438 {
12561439 journal_t *journal;
1440
+ sector_t blocknr;
12571441 char *p;
1258
- unsigned long long blocknr;
1442
+ int err = 0;
12591443
1260
- blocknr = bmap(inode, 0);
1261
- if (!blocknr) {
1444
+ blocknr = 0;
1445
+ err = bmap(inode, &blocknr);
1446
+
1447
+ if (err || !blocknr) {
12621448 pr_err("%s: Cannot locate journal superblock\n",
12631449 __func__);
12641450 return NULL;
....@@ -1288,7 +1474,7 @@
12881474 * superblock as being NULL to prevent the journal destroy from writing
12891475 * back a bogus superblock.
12901476 */
1291
-static void journal_fail_superblock (journal_t *journal)
1477
+static void journal_fail_superblock(journal_t *journal)
12921478 {
12931479 struct buffer_head *bh = journal->j_sb_buffer;
12941480 brelse(bh);
....@@ -1319,15 +1505,22 @@
13191505 journal->j_first = first;
13201506 journal->j_last = last;
13211507
1322
- journal->j_head = first;
1323
- journal->j_tail = first;
1324
- journal->j_free = last - first;
1508
+ journal->j_head = journal->j_first;
1509
+ journal->j_tail = journal->j_first;
1510
+ journal->j_free = journal->j_last - journal->j_first;
13251511
13261512 journal->j_tail_sequence = journal->j_transaction_sequence;
13271513 journal->j_commit_sequence = journal->j_transaction_sequence - 1;
13281514 journal->j_commit_request = journal->j_commit_sequence;
13291515
1330
- journal->j_max_transaction_buffers = journal->j_maxlen / 4;
1516
+ journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal);
1517
+
1518
+ /*
1519
+ * Now that journal recovery is done, turn fast commits off here. This
1520
+ * way, if fast commit was enabled before the crash but if now FS has
1521
+ * disabled it, we don't enable fast commits.
1522
+ */
1523
+ jbd2_clear_feature_fast_commit(journal);
13311524
13321525 /*
13331526 * As a special case, if the on-disk copy is already marked as needing
....@@ -1337,7 +1530,7 @@
13371530 */
13381531 if (sb->s_start == 0) {
13391532 jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
1340
- "(start %ld, seq %d, errno %d)\n",
1533
+ "(start %ld, seq %u, errno %d)\n",
13411534 journal->j_tail, journal->j_tail_sequence,
13421535 journal->j_errno);
13431536 journal->j_flags |= JBD2_FLUSHED;
....@@ -1393,7 +1586,8 @@
13931586 clear_buffer_write_io_error(bh);
13941587 set_buffer_uptodate(bh);
13951588 }
1396
- jbd2_superblock_csum_set(journal, sb);
1589
+ if (jbd2_journal_has_csum_v2or3(journal))
1590
+ sb->s_checksum = jbd2_superblock_csum(journal, sb);
13971591 get_bh(bh);
13981592 bh->b_end_io = end_buffer_write_sync;
13991593 ret = submit_bh(REQ_OP_WRITE, write_flags, bh);
....@@ -1407,7 +1601,8 @@
14071601 printk(KERN_ERR "JBD2: Error %d detected when updating "
14081602 "journal superblock for %s.\n", ret,
14091603 journal->j_devname);
1410
- jbd2_journal_abort(journal, ret);
1604
+ if (!is_journal_aborted(journal))
1605
+ jbd2_journal_abort(journal, ret);
14111606 }
14121607
14131608 return ret;
....@@ -1465,6 +1660,7 @@
14651660 static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
14661661 {
14671662 journal_superblock_t *sb = journal->j_superblock;
1663
+ bool had_fast_commit = false;
14681664
14691665 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
14701666 lock_buffer(journal->j_sb_buffer);
....@@ -1473,13 +1669,24 @@
14731669 return;
14741670 }
14751671
1476
- jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
1672
+ jbd_debug(1, "JBD2: Marking journal as empty (seq %u)\n",
14771673 journal->j_tail_sequence);
14781674
14791675 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
14801676 sb->s_start = cpu_to_be32(0);
1677
+ if (jbd2_has_feature_fast_commit(journal)) {
1678
+ /*
1679
+ * When journal is clean, no need to commit fast commit flag and
1680
+ * make file system incompatible with older kernels.
1681
+ */
1682
+ jbd2_clear_feature_fast_commit(journal);
1683
+ had_fast_commit = true;
1684
+ }
14811685
14821686 jbd2_write_superblock(journal, write_op);
1687
+
1688
+ if (had_fast_commit)
1689
+ jbd2_set_feature_fast_commit(journal);
14831690
14841691 /* Log is no longer empty */
14851692 write_lock(&journal->j_state_lock);
....@@ -1510,6 +1717,21 @@
15101717 jbd2_write_superblock(journal, REQ_SYNC | REQ_FUA);
15111718 }
15121719 EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
1720
+
1721
+static int journal_revoke_records_per_block(journal_t *journal)
1722
+{
1723
+ int record_size;
1724
+ int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t);
1725
+
1726
+ if (jbd2_has_feature_64bit(journal))
1727
+ record_size = 8;
1728
+ else
1729
+ record_size = 4;
1730
+
1731
+ if (jbd2_journal_has_csum_v2or3(journal))
1732
+ space -= sizeof(struct jbd2_journal_block_tail);
1733
+ return space / record_size;
1734
+}
15131735
15141736 /*
15151737 * Read the superblock for a given journal, performing initial
....@@ -1559,15 +1781,15 @@
15591781 goto out;
15601782 }
15611783
1562
- if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
1563
- journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
1564
- else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
1784
+ if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
1785
+ journal->j_total_len = be32_to_cpu(sb->s_maxlen);
1786
+ else if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
15651787 printk(KERN_WARNING "JBD2: journal file too short\n");
15661788 goto out;
15671789 }
15681790
15691791 if (be32_to_cpu(sb->s_first) == 0 ||
1570
- be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
1792
+ be32_to_cpu(sb->s_first) >= journal->j_total_len) {
15711793 printk(KERN_WARNING
15721794 "JBD2: Invalid start block of journal: %u\n",
15731795 be32_to_cpu(sb->s_first));
....@@ -1606,18 +1828,21 @@
16061828 }
16071829 }
16081830
1609
- /* Check superblock checksum */
1610
- if (!jbd2_superblock_csum_verify(journal, sb)) {
1611
- printk(KERN_ERR "JBD2: journal checksum error\n");
1612
- err = -EFSBADCRC;
1613
- goto out;
1614
- }
1831
+ if (jbd2_journal_has_csum_v2or3(journal)) {
1832
+ /* Check superblock checksum */
1833
+ if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
1834
+ printk(KERN_ERR "JBD2: journal checksum error\n");
1835
+ err = -EFSBADCRC;
1836
+ goto out;
1837
+ }
16151838
1616
- /* Precompute checksum seed for all metadata */
1617
- if (jbd2_journal_has_csum_v2or3(journal))
1839
+ /* Precompute checksum seed for all metadata */
16181840 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
16191841 sizeof(sb->s_uuid));
1842
+ }
16201843
1844
+ journal->j_revoke_records_per_block =
1845
+ journal_revoke_records_per_block(journal);
16211846 set_buffer_verified(bh);
16221847
16231848 return 0;
....@@ -1636,6 +1861,7 @@
16361861 {
16371862 int err;
16381863 journal_superblock_t *sb;
1864
+ int num_fc_blocks;
16391865
16401866 err = journal_get_superblock(journal);
16411867 if (err)
....@@ -1646,15 +1872,26 @@
16461872 journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
16471873 journal->j_tail = be32_to_cpu(sb->s_start);
16481874 journal->j_first = be32_to_cpu(sb->s_first);
1649
- journal->j_last = be32_to_cpu(sb->s_maxlen);
16501875 journal->j_errno = be32_to_cpu(sb->s_errno);
1876
+ journal->j_last = be32_to_cpu(sb->s_maxlen);
1877
+
1878
+ if (jbd2_has_feature_fast_commit(journal)) {
1879
+ journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
1880
+ num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks);
1881
+ if (!num_fc_blocks)
1882
+ num_fc_blocks = JBD2_MIN_FC_BLOCKS;
1883
+ if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS)
1884
+ journal->j_last = journal->j_fc_last - num_fc_blocks;
1885
+ journal->j_fc_first = journal->j_last + 1;
1886
+ journal->j_fc_off = 0;
1887
+ }
16511888
16521889 return 0;
16531890 }
16541891
16551892
16561893 /**
1657
- * int jbd2_journal_load() - Read journal from disk.
1894
+ * jbd2_journal_load() - Read journal from disk.
16581895 * @journal: Journal to act on.
16591896 *
16601897 * Given a journal_t structure which tells us which disk blocks contain
....@@ -1724,7 +1961,7 @@
17241961 }
17251962
17261963 /**
1727
- * void jbd2_journal_destroy() - Release a journal_t structure.
1964
+ * jbd2_journal_destroy() - Release a journal_t structure.
17281965 * @journal: Journal to act on.
17291966 *
17301967 * Release a journal_t structure once it is no longer in use by the
....@@ -1792,6 +2029,7 @@
17922029 jbd2_journal_destroy_revoke(journal);
17932030 if (journal->j_chksum_driver)
17942031 crypto_free_shash(journal->j_chksum_driver);
2032
+ kfree(journal->j_fc_wbuf);
17952033 kfree(journal->j_wbuf);
17962034 kfree(journal);
17972035
....@@ -1800,7 +2038,7 @@
18002038
18012039
18022040 /**
1803
- *int jbd2_journal_check_used_features () - Check if features specified are used.
2041
+ * jbd2_journal_check_used_features() - Check if features specified are used.
18042042 * @journal: Journal to check.
18052043 * @compat: bitmask of compatible features
18062044 * @ro: bitmask of features that force read-only mount
....@@ -1810,7 +2048,7 @@
18102048 * features. Return true (non-zero) if it does.
18112049 **/
18122050
1813
-int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat,
2051
+int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat,
18142052 unsigned long ro, unsigned long incompat)
18152053 {
18162054 journal_superblock_t *sb;
....@@ -1835,7 +2073,7 @@
18352073 }
18362074
18372075 /**
1838
- * int jbd2_journal_check_available_features() - Check feature set in journalling layer
2076
+ * jbd2_journal_check_available_features() - Check feature set in journalling layer
18392077 * @journal: Journal to check.
18402078 * @compat: bitmask of compatible features
18412079 * @ro: bitmask of features that force read-only mount
....@@ -1845,7 +2083,7 @@
18452083 * all of a given set of features on this journal. Return true
18462084 * (non-zero) if it can. */
18472085
1848
-int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat,
2086
+int jbd2_journal_check_available_features(journal_t *journal, unsigned long compat,
18492087 unsigned long ro, unsigned long incompat)
18502088 {
18512089 if (!compat && !ro && !incompat)
....@@ -1866,8 +2104,39 @@
18662104 return 0;
18672105 }
18682106
2107
+static int
2108
+jbd2_journal_initialize_fast_commit(journal_t *journal)
2109
+{
2110
+ journal_superblock_t *sb = journal->j_superblock;
2111
+ unsigned long long num_fc_blks;
2112
+
2113
+ num_fc_blks = be32_to_cpu(sb->s_num_fc_blks);
2114
+ if (num_fc_blks == 0)
2115
+ num_fc_blks = JBD2_MIN_FC_BLOCKS;
2116
+ if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS)
2117
+ return -ENOSPC;
2118
+
2119
+ /* Are we called twice? */
2120
+ WARN_ON(journal->j_fc_wbuf != NULL);
2121
+ journal->j_fc_wbuf = kmalloc_array(num_fc_blks,
2122
+ sizeof(struct buffer_head *), GFP_KERNEL);
2123
+ if (!journal->j_fc_wbuf)
2124
+ return -ENOMEM;
2125
+
2126
+ journal->j_fc_wbufsize = num_fc_blks;
2127
+ journal->j_fc_last = journal->j_last;
2128
+ journal->j_last = journal->j_fc_last - num_fc_blks;
2129
+ journal->j_fc_first = journal->j_last + 1;
2130
+ journal->j_fc_off = 0;
2131
+ journal->j_free = journal->j_last - journal->j_first;
2132
+ journal->j_max_transaction_buffers =
2133
+ jbd2_journal_get_max_txn_bufs(journal);
2134
+
2135
+ return 0;
2136
+}
2137
+
18692138 /**
1870
- * int jbd2_journal_set_features () - Mark a given journal feature in the superblock
2139
+ * jbd2_journal_set_features() - Mark a given journal feature in the superblock
18712140 * @journal: Journal to act on.
18722141 * @compat: bitmask of compatible features
18732142 * @ro: bitmask of features that force read-only mount
....@@ -1878,7 +2147,7 @@
18782147 *
18792148 */
18802149
1881
-int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
2150
+int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
18822151 unsigned long ro, unsigned long incompat)
18832152 {
18842153 #define INCOMPAT_FEATURE_ON(f) \
....@@ -1908,6 +2177,13 @@
19082177 compat, ro, incompat);
19092178
19102179 sb = journal->j_superblock;
2180
+
2181
+ if (incompat & JBD2_FEATURE_INCOMPAT_FAST_COMMIT) {
2182
+ if (jbd2_journal_initialize_fast_commit(journal)) {
2183
+ pr_err("JBD2: Cannot enable fast commits.\n");
2184
+ return 0;
2185
+ }
2186
+ }
19112187
19122188 /* Load the checksum driver if necessary */
19132189 if ((journal->j_chksum_driver == NULL) &&
....@@ -1942,6 +2218,8 @@
19422218 sb->s_feature_ro_compat |= cpu_to_be32(ro);
19432219 sb->s_feature_incompat |= cpu_to_be32(incompat);
19442220 unlock_buffer(journal->j_sb_buffer);
2221
+ journal->j_revoke_records_per_block =
2222
+ journal_revoke_records_per_block(journal);
19452223
19462224 return 1;
19472225 #undef COMPAT_FEATURE_ON
....@@ -1949,7 +2227,7 @@
19492227 }
19502228
19512229 /*
1952
- * jbd2_journal_clear_features () - Clear a given journal feature in the
2230
+ * jbd2_journal_clear_features() - Clear a given journal feature in the
19532231 * superblock
19542232 * @journal: Journal to act on.
19552233 * @compat: bitmask of compatible features
....@@ -1972,11 +2250,13 @@
19722250 sb->s_feature_compat &= ~cpu_to_be32(compat);
19732251 sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
19742252 sb->s_feature_incompat &= ~cpu_to_be32(incompat);
2253
+ journal->j_revoke_records_per_block =
2254
+ journal_revoke_records_per_block(journal);
19752255 }
19762256 EXPORT_SYMBOL(jbd2_journal_clear_features);
19772257
19782258 /**
1979
- * int jbd2_journal_flush () - Flush journal
2259
+ * jbd2_journal_flush() - Flush journal
19802260 * @journal: Journal to act on.
19812261 *
19822262 * Flush all data for a given journal to disk and empty the journal.
....@@ -2051,7 +2331,7 @@
20512331 }
20522332
20532333 /**
2054
- * int jbd2_journal_wipe() - Wipe journal contents
2334
+ * jbd2_journal_wipe() - Wipe journal contents
20552335 * @journal: Journal to act on.
20562336 * @write: flag (see below)
20572337 *
....@@ -2082,7 +2362,7 @@
20822362 err = jbd2_journal_skip_recovery(journal);
20832363 if (write) {
20842364 /* Lock to make assertions happy... */
2085
- mutex_lock(&journal->j_checkpoint_mutex);
2365
+ mutex_lock_io(&journal->j_checkpoint_mutex);
20862366 jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
20872367 mutex_unlock(&journal->j_checkpoint_mutex);
20882368 }
....@@ -2091,66 +2371,8 @@
20912371 return err;
20922372 }
20932373
2094
-/*
2095
- * Journal abort has very specific semantics, which we describe
2096
- * for journal abort.
2097
- *
2098
- * Two internal functions, which provide abort to the jbd layer
2099
- * itself are here.
2100
- */
2101
-
2102
-/*
2103
- * Quick version for internal journal use (doesn't lock the journal).
2104
- * Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
2105
- * and don't attempt to make any other journal updates.
2106
- */
2107
-void __jbd2_journal_abort_hard(journal_t *journal)
2108
-{
2109
- transaction_t *transaction;
2110
-
2111
- if (journal->j_flags & JBD2_ABORT)
2112
- return;
2113
-
2114
- printk(KERN_ERR "Aborting journal on device %s.\n",
2115
- journal->j_devname);
2116
-
2117
- write_lock(&journal->j_state_lock);
2118
- journal->j_flags |= JBD2_ABORT;
2119
- transaction = journal->j_running_transaction;
2120
- if (transaction)
2121
- __jbd2_log_start_commit(journal, transaction->t_tid);
2122
- write_unlock(&journal->j_state_lock);
2123
-}
2124
-
2125
-/* Soft abort: record the abort error status in the journal superblock,
2126
- * but don't do any other IO. */
2127
-static void __journal_abort_soft (journal_t *journal, int errno)
2128
-{
2129
- int old_errno;
2130
-
2131
- write_lock(&journal->j_state_lock);
2132
- old_errno = journal->j_errno;
2133
- if (!journal->j_errno || errno == -ESHUTDOWN)
2134
- journal->j_errno = errno;
2135
-
2136
- if (journal->j_flags & JBD2_ABORT) {
2137
- write_unlock(&journal->j_state_lock);
2138
- if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN)
2139
- jbd2_journal_update_sb_errno(journal);
2140
- return;
2141
- }
2142
- write_unlock(&journal->j_state_lock);
2143
-
2144
- __jbd2_journal_abort_hard(journal);
2145
-
2146
- jbd2_journal_update_sb_errno(journal);
2147
- write_lock(&journal->j_state_lock);
2148
- journal->j_flags |= JBD2_REC_ERR;
2149
- write_unlock(&journal->j_state_lock);
2150
-}
2151
-
21522374 /**
2153
- * void jbd2_journal_abort () - Shutdown the journal immediately.
2375
+ * jbd2_journal_abort () - Shutdown the journal immediately.
21542376 * @journal: the journal to shutdown.
21552377 * @errno: an error number to record in the journal indicating
21562378 * the reason for the shutdown.
....@@ -2192,11 +2414,56 @@
21922414
21932415 void jbd2_journal_abort(journal_t *journal, int errno)
21942416 {
2195
- __journal_abort_soft(journal, errno);
2417
+ transaction_t *transaction;
2418
+
2419
+ /*
2420
+ * Lock the aborting procedure until everything is done, this avoid
2421
+ * races between filesystem's error handling flow (e.g. ext4_abort()),
2422
+ * ensure panic after the error info is written into journal's
2423
+ * superblock.
2424
+ */
2425
+ mutex_lock(&journal->j_abort_mutex);
2426
+ /*
2427
+ * ESHUTDOWN always takes precedence because a file system check
2428
+ * caused by any other journal abort error is not required after
2429
+ * a shutdown triggered.
2430
+ */
2431
+ write_lock(&journal->j_state_lock);
2432
+ if (journal->j_flags & JBD2_ABORT) {
2433
+ int old_errno = journal->j_errno;
2434
+
2435
+ write_unlock(&journal->j_state_lock);
2436
+ if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) {
2437
+ journal->j_errno = errno;
2438
+ jbd2_journal_update_sb_errno(journal);
2439
+ }
2440
+ mutex_unlock(&journal->j_abort_mutex);
2441
+ return;
2442
+ }
2443
+
2444
+ /*
2445
+ * Mark the abort as occurred and start current running transaction
2446
+ * to release all journaled buffer.
2447
+ */
2448
+ pr_err("Aborting journal on device %s.\n", journal->j_devname);
2449
+
2450
+ journal->j_flags |= JBD2_ABORT;
2451
+ journal->j_errno = errno;
2452
+ transaction = journal->j_running_transaction;
2453
+ if (transaction)
2454
+ __jbd2_log_start_commit(journal, transaction->t_tid);
2455
+ write_unlock(&journal->j_state_lock);
2456
+
2457
+ /*
2458
+ * Record errno to the journal super block, so that fsck and jbd2
2459
+ * layer could realise that a filesystem check is needed.
2460
+ */
2461
+ jbd2_journal_update_sb_errno(journal);
2462
+ mutex_unlock(&journal->j_abort_mutex);
21962463 }
21972464
21982465 /**
2199
- * int jbd2_journal_errno () - returns the journal's error state.
2466
+ * jbd2_journal_errno() - returns the journal's error state.
22002467 * @journal: journal to examine.
22012468 *
22022469 * This is the errno number set with jbd2_journal_abort(), the last
....@@ -2220,7 +2487,7 @@
22202487 }
22212488
22222489 /**
2223
- * int jbd2_journal_clear_err () - clears the journal's error state
2490
+ * jbd2_journal_clear_err() - clears the journal's error state
22242491 * @journal: journal to act on.
22252492 *
22262493 * An error must be cleared or acked to take a FS out of readonly
....@@ -2240,7 +2507,7 @@
22402507 }
22412508
22422509 /**
2243
- * void jbd2_journal_ack_err() - Ack journal err.
2510
+ * jbd2_journal_ack_err() - Ack journal err.
22442511 * @journal: journal to act on.
22452512 *
22462513 * An error must be cleared or acked to take a FS out of readonly
....@@ -2428,6 +2695,8 @@
24282695 ret = kmem_cache_zalloc(jbd2_journal_head_cache,
24292696 GFP_NOFS | __GFP_NOFAIL);
24302697 }
2698
+ if (ret)
2699
+ spin_lock_init(&ret->b_state_lock);
24312700 return ret;
24322701 }
24332702
....@@ -2534,12 +2803,12 @@
25342803 jbd_unlock_bh_journal_head(bh);
25352804 return jh;
25362805 }
2806
+EXPORT_SYMBOL(jbd2_journal_grab_journal_head);
25372807
25382808 static void __journal_remove_journal_head(struct buffer_head *bh)
25392809 {
25402810 struct journal_head *jh = bh2jh(bh);
25412811
2542
- J_ASSERT_JH(jh, jh->b_jcount >= 0);
25432812 J_ASSERT_JH(jh, jh->b_transaction == NULL);
25442813 J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
25452814 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
....@@ -2547,17 +2816,23 @@
25472816 J_ASSERT_BH(bh, buffer_jbd(bh));
25482817 J_ASSERT_BH(bh, jh2bh(jh) == bh);
25492818 BUFFER_TRACE(bh, "remove journal_head");
2550
- if (jh->b_frozen_data) {
2551
- printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
2552
- jbd2_free(jh->b_frozen_data, bh->b_size);
2553
- }
2554
- if (jh->b_committed_data) {
2555
- printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
2556
- jbd2_free(jh->b_committed_data, bh->b_size);
2557
- }
2819
+
2820
+ /* Unlink before dropping the lock */
25582821 bh->b_private = NULL;
25592822 jh->b_bh = NULL; /* debug, really */
25602823 clear_buffer_jbd(bh);
2824
+}
2825
+
2826
+static void journal_release_journal_head(struct journal_head *jh, size_t b_size)
2827
+{
2828
+ if (jh->b_frozen_data) {
2829
+ printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
2830
+ jbd2_free(jh->b_frozen_data, b_size);
2831
+ }
2832
+ if (jh->b_committed_data) {
2833
+ printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
2834
+ jbd2_free(jh->b_committed_data, b_size);
2835
+ }
25612836 journal_free_journal_head(jh);
25622837 }
25632838
....@@ -2575,10 +2850,13 @@
25752850 if (!jh->b_jcount) {
25762851 __journal_remove_journal_head(bh);
25772852 jbd_unlock_bh_journal_head(bh);
2853
+ journal_release_journal_head(jh, bh->b_size);
25782854 __brelse(bh);
2579
- } else
2855
+ } else {
25802856 jbd_unlock_bh_journal_head(bh);
2857
+ }
25812858 }
2859
+EXPORT_SYMBOL(jbd2_journal_put_journal_head);
25822860
25832861 /*
25842862 * Initialize jbd inode head
....@@ -2744,6 +3022,7 @@
27443022 }
27453023
27463024 MODULE_LICENSE("GPL");
3025
+MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY);
27473026 module_init(journal_init);
27483027 module_exit(journal_exit);
27493028