From bedbef8ad3e75a304af6361af235302bcc61d06b Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 14 May 2024 06:39:01 +0000 Subject: [PATCH] 修改内核路径 --- kernel/fs/jbd2/journal.c | 603 ++++++++++++++++++++++++++++++++++++++++-------------- 1 files changed, 441 insertions(+), 162 deletions(-) diff --git a/kernel/fs/jbd2/journal.c b/kernel/fs/jbd2/journal.c index 8a50722..f706d93 100644 --- a/kernel/fs/jbd2/journal.c +++ b/kernel/fs/jbd2/journal.c @@ -66,9 +66,6 @@ EXPORT_SYMBOL(jbd2_journal_set_triggers); EXPORT_SYMBOL(jbd2_journal_dirty_metadata); EXPORT_SYMBOL(jbd2_journal_forget); -#if 0 -EXPORT_SYMBOL(journal_sync_buffer); -#endif EXPORT_SYMBOL(jbd2_journal_flush); EXPORT_SYMBOL(jbd2_journal_revoke); @@ -92,16 +89,15 @@ EXPORT_SYMBOL(jbd2_journal_invalidatepage); EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); EXPORT_SYMBOL(jbd2_journal_force_commit); -EXPORT_SYMBOL(jbd2_journal_inode_add_write); -EXPORT_SYMBOL(jbd2_journal_inode_add_wait); EXPORT_SYMBOL(jbd2_journal_inode_ranged_write); EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait); +EXPORT_SYMBOL(jbd2_journal_submit_inode_data_buffers); +EXPORT_SYMBOL(jbd2_journal_finish_inode_data_buffers); EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); EXPORT_SYMBOL(jbd2_inode_cache); -static void __journal_abort_soft (journal_t *journal, int errno); static int jbd2_journal_create_slab(size_t slab_size); #ifdef CONFIG_JBD2_DEBUG @@ -144,22 +140,6 @@ return cpu_to_be32(csum); } -static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) -{ - if (!jbd2_journal_has_csum_v2or3(j)) - return 1; - - return sb->s_checksum == jbd2_superblock_csum(j, sb); -} - -static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) -{ - if (!jbd2_journal_has_csum_v2or3(j)) - return; - - sb->s_checksum = jbd2_superblock_csum(j, sb); -} - /* * Helper function used to manage commit timeouts */ @@ -179,7 +159,9 @@ * * 1) COMMIT: Every so often we need to commit the current state of the * filesystem to disk. The journal thread is responsible for writing - * all of the metadata buffers to disk. + * all of the metadata buffers to disk. If a fast commit is ongoing + * journal thread waits until it's done and then continues from + * there on. * * 2) CHECKPOINT: We cannot reuse a used section of the log file until all * of the data in that part of the log has been rewritten elsewhere on @@ -221,7 +203,7 @@ if (journal->j_flags & JBD2_UNMOUNT) goto end_loop; - jbd_debug(1, "commit_sequence=%d, commit_request=%d\n", + jbd_debug(1, "commit_sequence=%u, commit_request=%u\n", journal->j_commit_sequence, journal->j_commit_request); if (journal->j_commit_sequence != journal->j_commit_request) { @@ -342,7 +324,7 @@ * IO is in progress. do_get_write_access() handles this. * * The function returns a pointer to the buffer_head to be used for IO. - * + * * * Return value: * <0: Error @@ -384,7 +366,7 @@ /* keep subsequent assertions sane */ atomic_set(&new_bh->b_count, 1); - jbd_lock_bh_state(bh_in); + spin_lock(&jh_in->b_state_lock); repeat: /* * If a new transaction has already done a buffer copy-out, then @@ -426,13 +408,13 @@ if (need_copy_out && !done_copy_out) { char *tmp; - jbd_unlock_bh_state(bh_in); + spin_unlock(&jh_in->b_state_lock); tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); if (!tmp) { brelse(new_bh); return -ENOMEM; } - jbd_lock_bh_state(bh_in); + spin_lock(&jh_in->b_state_lock); if (jh_in->b_frozen_data) { jbd2_free(tmp, bh_in->b_size); goto repeat; @@ -485,7 +467,7 @@ __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); spin_unlock(&journal->j_list_lock); set_buffer_shadow(bh_in); - jbd_unlock_bh_state(bh_in); + spin_unlock(&jh_in->b_state_lock); return do_escape | (done_copy_out << 1); } @@ -518,7 +500,7 @@ */ journal->j_commit_request = target; - jbd_debug(1, "JBD2: requesting commit %d/%d\n", + jbd_debug(1, "JBD2: requesting commit %u/%u\n", journal->j_commit_request, journal->j_commit_sequence); journal->j_running_transaction->t_requested = jiffies; @@ -531,7 +513,7 @@ WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n", journal->j_commit_request, journal->j_commit_sequence, - target, journal->j_running_transaction ? + target, journal->j_running_transaction ? journal->j_running_transaction->t_tid : 0); return 0; } @@ -584,12 +566,14 @@ } /** - * Force and wait upon a commit if the calling process is not within - * transaction. This is used for forcing out undo-protected data which contains - * bitmaps, when the fs is running out of space. + * jbd2_journal_force_commit_nested - Force and wait upon a commit if the + * calling process is not within transaction. * * @journal: journal to force * Returns true if progress was made. + * + * This is used for forcing out undo-protected data which contains + * bitmaps, when the fs is running out of space. */ int jbd2_journal_force_commit_nested(journal_t *journal) { @@ -600,7 +584,7 @@ } /** - * int journal_force_commit() - force any uncommitted transactions + * jbd2_journal_force_commit() - force any uncommitted transactions * @journal: journal to force * * Caller want unconditional commit. We can only force the running transaction @@ -716,12 +700,12 @@ #ifdef CONFIG_JBD2_DEBUG if (!tid_geq(journal->j_commit_request, tid)) { printk(KERN_ERR - "%s: error: j_commit_request=%d, tid=%d\n", + "%s: error: j_commit_request=%u, tid=%u\n", __func__, journal->j_commit_request, tid); } #endif while (tid_gt(tid, journal->j_commit_sequence)) { - jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n", + jbd_debug(1, "JBD2: want %u, j_commit_sequence=%u\n", tid, journal->j_commit_sequence); read_unlock(&journal->j_state_lock); wake_up(&journal->j_wait_commit); @@ -735,6 +719,87 @@ err = -EIO; return err; } + +/* + * Start a fast commit. If there's an ongoing fast or full commit wait for + * it to complete. Returns 0 if a new fast commit was started. Returns -EALREADY + * if a fast commit is not needed, either because there's an already a commit + * going on or this tid has already been committed. Returns -EINVAL if no jbd2 + * commit has yet been performed. + */ +int jbd2_fc_begin_commit(journal_t *journal, tid_t tid) +{ + if (unlikely(is_journal_aborted(journal))) + return -EIO; + /* + * Fast commits only allowed if at least one full commit has + * been processed. + */ + if (!journal->j_stats.ts_tid) + return -EINVAL; + + write_lock(&journal->j_state_lock); + if (tid <= journal->j_commit_sequence) { + write_unlock(&journal->j_state_lock); + return -EALREADY; + } + + if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING || + (journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) { + DEFINE_WAIT(wait); + + prepare_to_wait(&journal->j_fc_wait, &wait, + TASK_UNINTERRUPTIBLE); + write_unlock(&journal->j_state_lock); + schedule(); + finish_wait(&journal->j_fc_wait, &wait); + return -EALREADY; + } + journal->j_flags |= JBD2_FAST_COMMIT_ONGOING; + write_unlock(&journal->j_state_lock); + jbd2_journal_lock_updates(journal); + + return 0; +} +EXPORT_SYMBOL(jbd2_fc_begin_commit); + +/* + * Stop a fast commit. If fallback is set, this function starts commit of + * TID tid before any other fast commit can start. + */ +static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback) +{ + jbd2_journal_unlock_updates(journal); + if (journal->j_fc_cleanup_callback) + journal->j_fc_cleanup_callback(journal, 0); + write_lock(&journal->j_state_lock); + journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING; + if (fallback) + journal->j_flags |= JBD2_FULL_COMMIT_ONGOING; + write_unlock(&journal->j_state_lock); + wake_up(&journal->j_fc_wait); + if (fallback) + return jbd2_complete_transaction(journal, tid); + return 0; +} + +int jbd2_fc_end_commit(journal_t *journal) +{ + return __jbd2_fc_end_commit(journal, 0, false); +} +EXPORT_SYMBOL(jbd2_fc_end_commit); + +int jbd2_fc_end_commit_fallback(journal_t *journal) +{ + tid_t tid; + + read_lock(&journal->j_state_lock); + tid = journal->j_running_transaction ? + journal->j_running_transaction->t_tid : 0; + read_unlock(&journal->j_state_lock); + return __jbd2_fc_end_commit(journal, tid, true); +} +EXPORT_SYMBOL(jbd2_fc_end_commit_fallback); /* Return 1 when transaction with given tid has already committed. */ int jbd2_transaction_committed(journal_t *journal, tid_t tid) @@ -804,6 +869,106 @@ return jbd2_journal_bmap(journal, blocknr, retp); } +/* Map one fast commit buffer for use by the file system */ +int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out) +{ + unsigned long long pblock; + unsigned long blocknr; + int ret = 0; + struct buffer_head *bh; + int fc_off; + + *bh_out = NULL; + + if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) { + fc_off = journal->j_fc_off; + blocknr = journal->j_fc_first + fc_off; + journal->j_fc_off++; + } else { + ret = -EINVAL; + } + + if (ret) + return ret; + + ret = jbd2_journal_bmap(journal, blocknr, &pblock); + if (ret) + return ret; + + bh = __getblk(journal->j_dev, pblock, journal->j_blocksize); + if (!bh) + return -ENOMEM; + + + journal->j_fc_wbuf[fc_off] = bh; + + *bh_out = bh; + + return 0; +} +EXPORT_SYMBOL(jbd2_fc_get_buf); + +/* + * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf + * for completion. + */ +int jbd2_fc_wait_bufs(journal_t *journal, int num_blks) +{ + struct buffer_head *bh; + int i, j_fc_off; + + j_fc_off = journal->j_fc_off; + + /* + * Wait in reverse order to minimize chances of us being woken up before + * all IOs have completed + */ + for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) { + bh = journal->j_fc_wbuf[i]; + wait_on_buffer(bh); + /* + * Update j_fc_off so jbd2_fc_release_bufs can release remain + * buffer head. + */ + if (unlikely(!buffer_uptodate(bh))) { + journal->j_fc_off = i + 1; + return -EIO; + } + put_bh(bh); + journal->j_fc_wbuf[i] = NULL; + } + + return 0; +} +EXPORT_SYMBOL(jbd2_fc_wait_bufs); + +/* + * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf + * for completion. + */ +int jbd2_fc_release_bufs(journal_t *journal) +{ + struct buffer_head *bh; + int i, j_fc_off; + + j_fc_off = journal->j_fc_off; + + /* + * Wait in reverse order to minimize chances of us being woken up before + * all IOs have completed + */ + for (i = j_fc_off - 1; i >= 0; i--) { + bh = journal->j_fc_wbuf[i]; + if (!bh) + break; + put_bh(bh); + journal->j_fc_wbuf[i] = NULL; + } + + return 0; +} +EXPORT_SYMBOL(jbd2_fc_release_bufs); + /* * Conversion of logical to physical block numbers for the journal * @@ -816,18 +981,22 @@ { int err = 0; unsigned long long ret; + sector_t block = 0; if (journal->j_inode) { - ret = bmap(journal->j_inode, blocknr); - if (ret) - *retp = ret; - else { + block = blocknr; + ret = bmap(journal->j_inode, &block); + + if (ret || !block) { printk(KERN_ALERT "%s: journal block not found " "at offset %lu on %s\n", __func__, blocknr, journal->j_devname); err = -EIO; - __journal_abort_soft(journal, err); + jbd2_journal_abort(journal, err); + } else { + *retp = block; } + } else { *retp = blocknr; /* +journal->j_blk_offset */ } @@ -861,6 +1030,7 @@ bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); if (!bh) return NULL; + atomic_dec(&transaction->t_outstanding_credits); lock_buffer(bh); memset(bh->b_data, 0, journal->j_blocksize); header = (journal_header_t *)bh->b_data; @@ -962,7 +1132,7 @@ trace_jbd2_update_log_tail(journal, tid, block, freed); jbd_debug(1, - "Cleaning journal tail from %d to %d (offset %lu), " + "Cleaning journal tail from %u to %u (offset %lu), " "freeing %lu\n", journal->j_tail_sequence, tid, block, freed); @@ -1095,12 +1265,11 @@ return seq_release(inode, file); } -static const struct file_operations jbd2_seq_info_fops = { - .owner = THIS_MODULE, - .open = jbd2_seq_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = jbd2_seq_info_release, +static const struct proc_ops jbd2_info_proc_ops = { + .proc_open = jbd2_seq_info_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = jbd2_seq_info_release, }; static struct proc_dir_entry *proc_jbd2_stats; @@ -1110,7 +1279,7 @@ journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats); if (journal->j_proc_entry) { proc_create_data("info", S_IRUGO, journal->j_proc_entry, - &jbd2_seq_info_fops, journal); + &jbd2_info_proc_ops, journal); } } @@ -1118,6 +1287,16 @@ { remove_proc_entry("info", journal->j_proc_entry); remove_proc_entry(journal->j_devname, proc_jbd2_stats); +} + +/* Minimum size of descriptor tag */ +static int jbd2_min_tag_size(void) +{ + /* + * Tag with 32-bit block numbers does not use last four bytes of the + * structure + */ + return sizeof(journal_block_tag_t) - 4; } /* @@ -1148,6 +1327,8 @@ init_waitqueue_head(&journal->j_wait_commit); init_waitqueue_head(&journal->j_wait_updates); init_waitqueue_head(&journal->j_wait_reserved); + init_waitqueue_head(&journal->j_fc_wait); + mutex_init(&journal->j_abort_mutex); mutex_init(&journal->j_barrier); mutex_init(&journal->j_checkpoint_mutex); spin_lock_init(&journal->j_revoke_lock); @@ -1177,9 +1358,11 @@ journal->j_dev = bdev; journal->j_fs_dev = fs_dev; journal->j_blk_offset = start; - journal->j_maxlen = len; - n = journal->j_blocksize / sizeof(journal_block_tag_t); + journal->j_total_len = len; + /* We need enough buffers to write out full descriptor block. */ + n = journal->j_blocksize / jbd2_min_tag_size(); journal->j_wbufsize = n; + journal->j_fc_wbuf = NULL; journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *), GFP_KERNEL); if (!journal->j_wbuf) @@ -1254,11 +1437,14 @@ journal_t *jbd2_journal_init_inode(struct inode *inode) { journal_t *journal; + sector_t blocknr; char *p; - unsigned long long blocknr; + int err = 0; - blocknr = bmap(inode, 0); - if (!blocknr) { + blocknr = 0; + err = bmap(inode, &blocknr); + + if (err || !blocknr) { pr_err("%s: Cannot locate journal superblock\n", __func__); return NULL; @@ -1288,7 +1474,7 @@ * superblock as being NULL to prevent the journal destroy from writing * back a bogus superblock. */ -static void journal_fail_superblock (journal_t *journal) +static void journal_fail_superblock(journal_t *journal) { struct buffer_head *bh = journal->j_sb_buffer; brelse(bh); @@ -1319,15 +1505,22 @@ journal->j_first = first; journal->j_last = last; - journal->j_head = first; - journal->j_tail = first; - journal->j_free = last - first; + journal->j_head = journal->j_first; + journal->j_tail = journal->j_first; + journal->j_free = journal->j_last - journal->j_first; journal->j_tail_sequence = journal->j_transaction_sequence; journal->j_commit_sequence = journal->j_transaction_sequence - 1; journal->j_commit_request = journal->j_commit_sequence; - journal->j_max_transaction_buffers = journal->j_maxlen / 4; + journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal); + + /* + * Now that journal recovery is done, turn fast commits off here. This + * way, if fast commit was enabled before the crash but if now FS has + * disabled it, we don't enable fast commits. + */ + jbd2_clear_feature_fast_commit(journal); /* * As a special case, if the on-disk copy is already marked as needing @@ -1337,7 +1530,7 @@ */ if (sb->s_start == 0) { jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " - "(start %ld, seq %d, errno %d)\n", + "(start %ld, seq %u, errno %d)\n", journal->j_tail, journal->j_tail_sequence, journal->j_errno); journal->j_flags |= JBD2_FLUSHED; @@ -1393,7 +1586,8 @@ clear_buffer_write_io_error(bh); set_buffer_uptodate(bh); } - jbd2_superblock_csum_set(journal, sb); + if (jbd2_journal_has_csum_v2or3(journal)) + sb->s_checksum = jbd2_superblock_csum(journal, sb); get_bh(bh); bh->b_end_io = end_buffer_write_sync; ret = submit_bh(REQ_OP_WRITE, write_flags, bh); @@ -1407,7 +1601,8 @@ printk(KERN_ERR "JBD2: Error %d detected when updating " "journal superblock for %s.\n", ret, journal->j_devname); - jbd2_journal_abort(journal, ret); + if (!is_journal_aborted(journal)) + jbd2_journal_abort(journal, ret); } return ret; @@ -1465,6 +1660,7 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op) { journal_superblock_t *sb = journal->j_superblock; + bool had_fast_commit = false; BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); lock_buffer(journal->j_sb_buffer); @@ -1473,13 +1669,24 @@ return; } - jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", + jbd_debug(1, "JBD2: Marking journal as empty (seq %u)\n", journal->j_tail_sequence); sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); sb->s_start = cpu_to_be32(0); + if (jbd2_has_feature_fast_commit(journal)) { + /* + * When journal is clean, no need to commit fast commit flag and + * make file system incompatible with older kernels. + */ + jbd2_clear_feature_fast_commit(journal); + had_fast_commit = true; + } jbd2_write_superblock(journal, write_op); + + if (had_fast_commit) + jbd2_set_feature_fast_commit(journal); /* Log is no longer empty */ write_lock(&journal->j_state_lock); @@ -1510,6 +1717,21 @@ jbd2_write_superblock(journal, REQ_SYNC | REQ_FUA); } EXPORT_SYMBOL(jbd2_journal_update_sb_errno); + +static int journal_revoke_records_per_block(journal_t *journal) +{ + int record_size; + int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t); + + if (jbd2_has_feature_64bit(journal)) + record_size = 8; + else + record_size = 4; + + if (jbd2_journal_has_csum_v2or3(journal)) + space -= sizeof(struct jbd2_journal_block_tail); + return space / record_size; +} /* * Read the superblock for a given journal, performing initial @@ -1559,15 +1781,15 @@ goto out; } - if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) - journal->j_maxlen = be32_to_cpu(sb->s_maxlen); - else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { + if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len) + journal->j_total_len = be32_to_cpu(sb->s_maxlen); + else if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) { printk(KERN_WARNING "JBD2: journal file too short\n"); goto out; } if (be32_to_cpu(sb->s_first) == 0 || - be32_to_cpu(sb->s_first) >= journal->j_maxlen) { + be32_to_cpu(sb->s_first) >= journal->j_total_len) { printk(KERN_WARNING "JBD2: Invalid start block of journal: %u\n", be32_to_cpu(sb->s_first)); @@ -1606,18 +1828,21 @@ } } - /* Check superblock checksum */ - if (!jbd2_superblock_csum_verify(journal, sb)) { - printk(KERN_ERR "JBD2: journal checksum error\n"); - err = -EFSBADCRC; - goto out; - } + if (jbd2_journal_has_csum_v2or3(journal)) { + /* Check superblock checksum */ + if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) { + printk(KERN_ERR "JBD2: journal checksum error\n"); + err = -EFSBADCRC; + goto out; + } - /* Precompute checksum seed for all metadata */ - if (jbd2_journal_has_csum_v2or3(journal)) + /* Precompute checksum seed for all metadata */ journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, sizeof(sb->s_uuid)); + } + journal->j_revoke_records_per_block = + journal_revoke_records_per_block(journal); set_buffer_verified(bh); return 0; @@ -1636,6 +1861,7 @@ { int err; journal_superblock_t *sb; + int num_fc_blocks; err = journal_get_superblock(journal); if (err) @@ -1646,15 +1872,26 @@ journal->j_tail_sequence = be32_to_cpu(sb->s_sequence); journal->j_tail = be32_to_cpu(sb->s_start); journal->j_first = be32_to_cpu(sb->s_first); - journal->j_last = be32_to_cpu(sb->s_maxlen); journal->j_errno = be32_to_cpu(sb->s_errno); + journal->j_last = be32_to_cpu(sb->s_maxlen); + + if (jbd2_has_feature_fast_commit(journal)) { + journal->j_fc_last = be32_to_cpu(sb->s_maxlen); + num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks); + if (!num_fc_blocks) + num_fc_blocks = JBD2_MIN_FC_BLOCKS; + if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS) + journal->j_last = journal->j_fc_last - num_fc_blocks; + journal->j_fc_first = journal->j_last + 1; + journal->j_fc_off = 0; + } return 0; } /** - * int jbd2_journal_load() - Read journal from disk. + * jbd2_journal_load() - Read journal from disk. * @journal: Journal to act on. * * Given a journal_t structure which tells us which disk blocks contain @@ -1724,7 +1961,7 @@ } /** - * void jbd2_journal_destroy() - Release a journal_t structure. + * jbd2_journal_destroy() - Release a journal_t structure. * @journal: Journal to act on. * * Release a journal_t structure once it is no longer in use by the @@ -1792,6 +2029,7 @@ jbd2_journal_destroy_revoke(journal); if (journal->j_chksum_driver) crypto_free_shash(journal->j_chksum_driver); + kfree(journal->j_fc_wbuf); kfree(journal->j_wbuf); kfree(journal); @@ -1800,7 +2038,7 @@ /** - *int jbd2_journal_check_used_features () - Check if features specified are used. + * jbd2_journal_check_used_features() - Check if features specified are used. * @journal: Journal to check. * @compat: bitmask of compatible features * @ro: bitmask of features that force read-only mount @@ -1810,7 +2048,7 @@ * features. Return true (non-zero) if it does. **/ -int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, +int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat, unsigned long ro, unsigned long incompat) { journal_superblock_t *sb; @@ -1835,7 +2073,7 @@ } /** - * int jbd2_journal_check_available_features() - Check feature set in journalling layer + * jbd2_journal_check_available_features() - Check feature set in journalling layer * @journal: Journal to check. * @compat: bitmask of compatible features * @ro: bitmask of features that force read-only mount @@ -1845,7 +2083,7 @@ * all of a given set of features on this journal. Return true * (non-zero) if it can. */ -int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat, +int jbd2_journal_check_available_features(journal_t *journal, unsigned long compat, unsigned long ro, unsigned long incompat) { if (!compat && !ro && !incompat) @@ -1866,8 +2104,39 @@ return 0; } +static int +jbd2_journal_initialize_fast_commit(journal_t *journal) +{ + journal_superblock_t *sb = journal->j_superblock; + unsigned long long num_fc_blks; + + num_fc_blks = be32_to_cpu(sb->s_num_fc_blks); + if (num_fc_blks == 0) + num_fc_blks = JBD2_MIN_FC_BLOCKS; + if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS) + return -ENOSPC; + + /* Are we called twice? */ + WARN_ON(journal->j_fc_wbuf != NULL); + journal->j_fc_wbuf = kmalloc_array(num_fc_blks, + sizeof(struct buffer_head *), GFP_KERNEL); + if (!journal->j_fc_wbuf) + return -ENOMEM; + + journal->j_fc_wbufsize = num_fc_blks; + journal->j_fc_last = journal->j_last; + journal->j_last = journal->j_fc_last - num_fc_blks; + journal->j_fc_first = journal->j_last + 1; + journal->j_fc_off = 0; + journal->j_free = journal->j_last - journal->j_first; + journal->j_max_transaction_buffers = + jbd2_journal_get_max_txn_bufs(journal); + + return 0; +} + /** - * int jbd2_journal_set_features () - Mark a given journal feature in the superblock + * jbd2_journal_set_features() - Mark a given journal feature in the superblock * @journal: Journal to act on. * @compat: bitmask of compatible features * @ro: bitmask of features that force read-only mount @@ -1878,7 +2147,7 @@ * */ -int jbd2_journal_set_features (journal_t *journal, unsigned long compat, +int jbd2_journal_set_features(journal_t *journal, unsigned long compat, unsigned long ro, unsigned long incompat) { #define INCOMPAT_FEATURE_ON(f) \ @@ -1908,6 +2177,13 @@ compat, ro, incompat); sb = journal->j_superblock; + + if (incompat & JBD2_FEATURE_INCOMPAT_FAST_COMMIT) { + if (jbd2_journal_initialize_fast_commit(journal)) { + pr_err("JBD2: Cannot enable fast commits.\n"); + return 0; + } + } /* Load the checksum driver if necessary */ if ((journal->j_chksum_driver == NULL) && @@ -1942,6 +2218,8 @@ sb->s_feature_ro_compat |= cpu_to_be32(ro); sb->s_feature_incompat |= cpu_to_be32(incompat); unlock_buffer(journal->j_sb_buffer); + journal->j_revoke_records_per_block = + journal_revoke_records_per_block(journal); return 1; #undef COMPAT_FEATURE_ON @@ -1949,7 +2227,7 @@ } /* - * jbd2_journal_clear_features () - Clear a given journal feature in the + * jbd2_journal_clear_features() - Clear a given journal feature in the * superblock * @journal: Journal to act on. * @compat: bitmask of compatible features @@ -1972,11 +2250,13 @@ sb->s_feature_compat &= ~cpu_to_be32(compat); sb->s_feature_ro_compat &= ~cpu_to_be32(ro); sb->s_feature_incompat &= ~cpu_to_be32(incompat); + journal->j_revoke_records_per_block = + journal_revoke_records_per_block(journal); } EXPORT_SYMBOL(jbd2_journal_clear_features); /** - * int jbd2_journal_flush () - Flush journal + * jbd2_journal_flush() - Flush journal * @journal: Journal to act on. * * Flush all data for a given journal to disk and empty the journal. @@ -2051,7 +2331,7 @@ } /** - * int jbd2_journal_wipe() - Wipe journal contents + * jbd2_journal_wipe() - Wipe journal contents * @journal: Journal to act on. * @write: flag (see below) * @@ -2082,7 +2362,7 @@ err = jbd2_journal_skip_recovery(journal); if (write) { /* Lock to make assertions happy... */ - mutex_lock(&journal->j_checkpoint_mutex); + mutex_lock_io(&journal->j_checkpoint_mutex); jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } @@ -2091,66 +2371,8 @@ return err; } -/* - * Journal abort has very specific semantics, which we describe - * for journal abort. - * - * Two internal functions, which provide abort to the jbd layer - * itself are here. - */ - -/* - * Quick version for internal journal use (doesn't lock the journal). - * Aborts hard --- we mark the abort as occurred, but do _nothing_ else, - * and don't attempt to make any other journal updates. - */ -void __jbd2_journal_abort_hard(journal_t *journal) -{ - transaction_t *transaction; - - if (journal->j_flags & JBD2_ABORT) - return; - - printk(KERN_ERR "Aborting journal on device %s.\n", - journal->j_devname); - - write_lock(&journal->j_state_lock); - journal->j_flags |= JBD2_ABORT; - transaction = journal->j_running_transaction; - if (transaction) - __jbd2_log_start_commit(journal, transaction->t_tid); - write_unlock(&journal->j_state_lock); -} - -/* Soft abort: record the abort error status in the journal superblock, - * but don't do any other IO. */ -static void __journal_abort_soft (journal_t *journal, int errno) -{ - int old_errno; - - write_lock(&journal->j_state_lock); - old_errno = journal->j_errno; - if (!journal->j_errno || errno == -ESHUTDOWN) - journal->j_errno = errno; - - if (journal->j_flags & JBD2_ABORT) { - write_unlock(&journal->j_state_lock); - if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) - jbd2_journal_update_sb_errno(journal); - return; - } - write_unlock(&journal->j_state_lock); - - __jbd2_journal_abort_hard(journal); - - jbd2_journal_update_sb_errno(journal); - write_lock(&journal->j_state_lock); - journal->j_flags |= JBD2_REC_ERR; - write_unlock(&journal->j_state_lock); -} - /** - * void jbd2_journal_abort () - Shutdown the journal immediately. + * jbd2_journal_abort () - Shutdown the journal immediately. * @journal: the journal to shutdown. * @errno: an error number to record in the journal indicating * the reason for the shutdown. @@ -2192,11 +2414,56 @@ void jbd2_journal_abort(journal_t *journal, int errno) { - __journal_abort_soft(journal, errno); + transaction_t *transaction; + + /* + * Lock the aborting procedure until everything is done, this avoid + * races between filesystem's error handling flow (e.g. ext4_abort()), + * ensure panic after the error info is written into journal's + * superblock. + */ + mutex_lock(&journal->j_abort_mutex); + /* + * ESHUTDOWN always takes precedence because a file system check + * caused by any other journal abort error is not required after + * a shutdown triggered. + */ + write_lock(&journal->j_state_lock); + if (journal->j_flags & JBD2_ABORT) { + int old_errno = journal->j_errno; + + write_unlock(&journal->j_state_lock); + if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) { + journal->j_errno = errno; + jbd2_journal_update_sb_errno(journal); + } + mutex_unlock(&journal->j_abort_mutex); + return; + } + + /* + * Mark the abort as occurred and start current running transaction + * to release all journaled buffer. + */ + pr_err("Aborting journal on device %s.\n", journal->j_devname); + + journal->j_flags |= JBD2_ABORT; + journal->j_errno = errno; + transaction = journal->j_running_transaction; + if (transaction) + __jbd2_log_start_commit(journal, transaction->t_tid); + write_unlock(&journal->j_state_lock); + + /* + * Record errno to the journal super block, so that fsck and jbd2 + * layer could realise that a filesystem check is needed. + */ + jbd2_journal_update_sb_errno(journal); + mutex_unlock(&journal->j_abort_mutex); } /** - * int jbd2_journal_errno () - returns the journal's error state. + * jbd2_journal_errno() - returns the journal's error state. * @journal: journal to examine. * * This is the errno number set with jbd2_journal_abort(), the last @@ -2220,7 +2487,7 @@ } /** - * int jbd2_journal_clear_err () - clears the journal's error state + * jbd2_journal_clear_err() - clears the journal's error state * @journal: journal to act on. * * An error must be cleared or acked to take a FS out of readonly @@ -2240,7 +2507,7 @@ } /** - * void jbd2_journal_ack_err() - Ack journal err. + * jbd2_journal_ack_err() - Ack journal err. * @journal: journal to act on. * * An error must be cleared or acked to take a FS out of readonly @@ -2428,6 +2695,8 @@ ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS | __GFP_NOFAIL); } + if (ret) + spin_lock_init(&ret->b_state_lock); return ret; } @@ -2534,12 +2803,12 @@ jbd_unlock_bh_journal_head(bh); return jh; } +EXPORT_SYMBOL(jbd2_journal_grab_journal_head); static void __journal_remove_journal_head(struct buffer_head *bh) { struct journal_head *jh = bh2jh(bh); - J_ASSERT_JH(jh, jh->b_jcount >= 0); J_ASSERT_JH(jh, jh->b_transaction == NULL); J_ASSERT_JH(jh, jh->b_next_transaction == NULL); J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); @@ -2547,17 +2816,23 @@ J_ASSERT_BH(bh, buffer_jbd(bh)); J_ASSERT_BH(bh, jh2bh(jh) == bh); BUFFER_TRACE(bh, "remove journal_head"); - if (jh->b_frozen_data) { - printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); - jbd2_free(jh->b_frozen_data, bh->b_size); - } - if (jh->b_committed_data) { - printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); - jbd2_free(jh->b_committed_data, bh->b_size); - } + + /* Unlink before dropping the lock */ bh->b_private = NULL; jh->b_bh = NULL; /* debug, really */ clear_buffer_jbd(bh); +} + +static void journal_release_journal_head(struct journal_head *jh, size_t b_size) +{ + if (jh->b_frozen_data) { + printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); + jbd2_free(jh->b_frozen_data, b_size); + } + if (jh->b_committed_data) { + printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); + jbd2_free(jh->b_committed_data, b_size); + } journal_free_journal_head(jh); } @@ -2575,10 +2850,13 @@ if (!jh->b_jcount) { __journal_remove_journal_head(bh); jbd_unlock_bh_journal_head(bh); + journal_release_journal_head(jh, bh->b_size); __brelse(bh); - } else + } else { jbd_unlock_bh_journal_head(bh); + } } +EXPORT_SYMBOL(jbd2_journal_put_journal_head); /* * Initialize jbd inode head @@ -2744,6 +3022,7 @@ } MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); module_init(journal_init); module_exit(journal_exit); -- Gitblit v1.6.2