From 95099d4622f8cb224d94e314c7a8e0df60b13f87 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Sat, 09 Dec 2023 08:38:01 +0000
Subject: [PATCH] enable docker ppp
---
kernel/fs/ocfs2/alloc.c | 250 ++++++++++++++++++++++++++++---------------------
1 files changed, 142 insertions(+), 108 deletions(-)
diff --git a/kernel/fs/ocfs2/alloc.c b/kernel/fs/ocfs2/alloc.c
index 046f5e3..a9a6276 100644
--- a/kernel/fs/ocfs2/alloc.c
+++ b/kernel/fs/ocfs2/alloc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
@@ -6,21 +7,6 @@
* Extent allocs and frees
*
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
*/
#include <linux/fs.h>
@@ -1074,7 +1060,6 @@
brelse(bhs[i]);
bhs[i] = NULL;
}
- mlog_errno(status);
}
return status;
}
@@ -2302,9 +2287,9 @@
int ret = 0;
int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
- if (handle->h_buffer_credits < credits)
+ if (jbd2_handle_buffer_credits(handle) < credits)
ret = ocfs2_extend_trans(handle,
- credits - handle->h_buffer_credits);
+ credits - jbd2_handle_buffer_credits(handle));
return ret;
}
@@ -2381,7 +2366,7 @@
struct ocfs2_path *right_path,
struct ocfs2_path **ret_left_path)
{
- int ret, start, orig_credits = handle->h_buffer_credits;
+ int ret, start, orig_credits = jbd2_handle_buffer_credits(handle);
u32 cpos;
struct ocfs2_path *left_path = NULL;
struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
@@ -3162,7 +3147,7 @@
struct ocfs2_path *path,
struct ocfs2_cached_dealloc_ctxt *dealloc)
{
- int ret, orig_credits = handle->h_buffer_credits;
+ int ret, orig_credits = jbd2_handle_buffer_credits(handle);
struct ocfs2_path *tmp_path = NULL, *restart_path = NULL;
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el;
@@ -3400,8 +3385,8 @@
right_path);
ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
- handle->h_buffer_credits,
- right_path);
+ jbd2_handle_buffer_credits(handle),
+ right_path);
if (ret) {
mlog_errno(ret);
goto out;
@@ -3562,8 +3547,8 @@
right_path);
ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
- handle->h_buffer_credits,
- left_path);
+ jbd2_handle_buffer_credits(handle),
+ left_path);
if (ret) {
mlog_errno(ret);
goto out;
@@ -3637,7 +3622,7 @@
le16_to_cpu(el->l_next_free_rec) == 1) {
/* extend credit for ocfs2_remove_rightmost_path */
ret = ocfs2_extend_rotate_transaction(handle, 0,
- handle->h_buffer_credits,
+ jbd2_handle_buffer_credits(handle),
right_path);
if (ret) {
mlog_errno(ret);
@@ -3683,7 +3668,7 @@
if (ctxt->c_split_covers_rec && ctxt->c_has_empty_extent) {
/* extend credit for ocfs2_remove_rightmost_path */
ret = ocfs2_extend_rotate_transaction(handle, 0,
- handle->h_buffer_credits,
+ jbd2_handle_buffer_credits(handle),
path);
if (ret) {
mlog_errno(ret);
@@ -3739,7 +3724,7 @@
/* extend credit for ocfs2_remove_rightmost_path */
ret = ocfs2_extend_rotate_transaction(handle, 0,
- handle->h_buffer_credits,
+ jbd2_handle_buffer_credits(handle),
path);
if (ret) {
mlog_errno(ret);
@@ -3769,7 +3754,7 @@
/* extend credit for ocfs2_remove_rightmost_path */
ret = ocfs2_extend_rotate_transaction(handle, 0,
- handle->h_buffer_credits,
+ jbd2_handle_buffer_credits(handle),
path);
if (ret) {
mlog_errno(ret);
@@ -3813,7 +3798,7 @@
if (ctxt->c_split_covers_rec) {
/* extend credit for ocfs2_remove_rightmost_path */
ret = ocfs2_extend_rotate_transaction(handle, 0,
- handle->h_buffer_credits,
+ jbd2_handle_buffer_credits(handle),
path);
if (ret) {
mlog_errno(ret);
@@ -3956,7 +3941,7 @@
* above.
*
* This leaf needs to have space, either by the empty 1st
- * extent record, or by virtue of an l_next_rec < l_count.
+ * extent record, or by virtue of an l_next_free_rec < l_count.
*/
ocfs2_rotate_leaf(el, insert_rec);
}
@@ -4722,7 +4707,7 @@
struct ocfs2_alloc_context *meta_ac)
{
int status;
- int uninitialized_var(free_records);
+ int free_records;
struct buffer_head *last_eb_bh = NULL;
struct ocfs2_insert_type insert = {0, };
struct ocfs2_extent_rec rec;
@@ -5106,8 +5091,6 @@
* rightmost extent list.
*/
if (path->p_tree_depth) {
- struct ocfs2_extent_block *eb;
-
ret = ocfs2_read_extent_block(et->et_ci,
ocfs2_et_get_last_eb_blk(et),
&last_eb_bh);
@@ -5115,8 +5098,6 @@
mlog_errno(ret);
goto out;
}
-
- eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
}
if (rec->e_cpos == split_rec->e_cpos &&
@@ -5376,7 +5357,7 @@
if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
/* extend credit for ocfs2_remove_rightmost_path */
ret = ocfs2_extend_rotate_transaction(handle, 0,
- handle->h_buffer_credits,
+ jbd2_handle_buffer_credits(handle),
path);
if (ret) {
mlog_errno(ret);
@@ -5445,8 +5426,8 @@
}
ret = ocfs2_extend_rotate_transaction(handle, 0,
- handle->h_buffer_credits,
- path);
+ jbd2_handle_buffer_credits(handle),
+ path);
if (ret) {
mlog_errno(ret);
goto out;
@@ -6011,6 +5992,7 @@
struct buffer_head *data_alloc_bh = NULL;
struct ocfs2_dinode *di;
struct ocfs2_truncate_log *tl;
+ struct ocfs2_journal *journal = osb->journal;
BUG_ON(inode_trylock(tl_inode));
@@ -6028,6 +6010,20 @@
num_to_flush);
if (!num_to_flush) {
status = 0;
+ goto out;
+ }
+
+ /* Appending truncate log(TA) and flushing truncate log(TF) are
+ * two separated transactions. They can be both committed but not
+ * checkpointed. If crash occurs then, both two transaction will be
+ * replayed with several already released to global bitmap clusters.
+ * Then truncate log will be replayed resulting in cluster double free.
+ */
+ jbd2_journal_lock_updates(journal->j_journal);
+ status = jbd2_journal_flush(journal->j_journal);
+ jbd2_journal_unlock_updates(journal->j_journal);
+ if (status < 0) {
+ mlog_errno(status);
goto out;
}
@@ -6209,16 +6205,16 @@
if (le16_to_cpu(tl->tl_used)) {
trace_ocfs2_truncate_log_recovery_num(le16_to_cpu(tl->tl_used));
- *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL);
+ /*
+ * Assuming the write-out below goes well, this copy will be
+ * passed back to recovery for processing.
+ */
+ *tl_copy = kmemdup(tl_bh->b_data, tl_bh->b_size, GFP_KERNEL);
if (!(*tl_copy)) {
status = -ENOMEM;
mlog_errno(status);
goto bail;
}
-
- /* Assuming the write-out below goes well, this copy
- * will be passed back to recovery for processing. */
- memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size);
/* All we need to do to clear the truncate log is set
* tl_used. */
@@ -6810,6 +6806,8 @@
struct page *page, int zero, u64 *phys)
{
int ret, partial = 0;
+ loff_t start_byte = ((loff_t)page->index << PAGE_SHIFT) + from;
+ loff_t length = to - from;
ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0);
if (ret)
@@ -6829,7 +6827,8 @@
if (ret < 0)
mlog_errno(ret);
else if (ocfs2_should_order_data(inode)) {
- ret = ocfs2_jbd2_file_inode(handle, inode);
+ ret = ocfs2_jbd2_inode_add_write(handle, inode,
+ start_byte, length);
if (ret < 0)
mlog_errno(ret);
}
@@ -7052,7 +7051,7 @@
int need_free = 0;
u32 bit_off, num;
handle_t *handle;
- u64 uninitialized_var(block);
+ u64 block;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
@@ -7518,10 +7517,11 @@
return count;
}
-int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
+static
+int ocfs2_trim_mainbm(struct super_block *sb, struct fstrim_range *range)
{
struct ocfs2_super *osb = OCFS2_SB(sb);
- u64 start, len, trimmed, first_group, last_group, group;
+ u64 start, len, trimmed = 0, first_group, last_group = 0, group = 0;
int ret, cnt;
u32 first_bit, last_bit, minlen;
struct buffer_head *main_bm_bh = NULL;
@@ -7529,7 +7529,6 @@
struct buffer_head *gd_bh = NULL;
struct ocfs2_dinode *main_bm;
struct ocfs2_group_desc *gd = NULL;
- struct ocfs2_trim_fs_info info, *pinfo = NULL;
start = range->start >> osb->s_clustersize_bits;
len = range->len >> osb->s_clustersize_bits;
@@ -7538,6 +7537,9 @@
if (minlen >= osb->bitmap_cpg || range->len < sb->s_blocksize)
return -EINVAL;
+ trace_ocfs2_trim_mainbm(start, len, minlen);
+
+next_group:
main_bm_inode = ocfs2_get_system_file_inode(osb,
GLOBAL_BITMAP_SYSTEM_INODE,
OCFS2_INVALID_SLOT);
@@ -7556,64 +7558,34 @@
}
main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data;
- if (start >= le32_to_cpu(main_bm->i_clusters)) {
- ret = -EINVAL;
- goto out_unlock;
- }
-
- len = range->len >> osb->s_clustersize_bits;
- if (start + len > le32_to_cpu(main_bm->i_clusters))
- len = le32_to_cpu(main_bm->i_clusters) - start;
-
- trace_ocfs2_trim_fs(start, len, minlen);
-
- ocfs2_trim_fs_lock_res_init(osb);
- ret = ocfs2_trim_fs_lock(osb, NULL, 1);
- if (ret < 0) {
- if (ret != -EAGAIN) {
- mlog_errno(ret);
- ocfs2_trim_fs_lock_res_uninit(osb);
+ /*
+ * Do some check before trim the first group.
+ */
+ if (!group) {
+ if (start >= le32_to_cpu(main_bm->i_clusters)) {
+ ret = -EINVAL;
goto out_unlock;
}
- mlog(ML_NOTICE, "Wait for trim on device (%s) to "
- "finish, which is running from another node.\n",
- osb->dev_str);
- ret = ocfs2_trim_fs_lock(osb, &info, 0);
- if (ret < 0) {
- mlog_errno(ret);
- ocfs2_trim_fs_lock_res_uninit(osb);
- goto out_unlock;
- }
+ if (start + len > le32_to_cpu(main_bm->i_clusters))
+ len = le32_to_cpu(main_bm->i_clusters) - start;
- if (info.tf_valid && info.tf_success &&
- info.tf_start == start && info.tf_len == len &&
- info.tf_minlen == minlen) {
- /* Avoid sending duplicated trim to a shared device */
- mlog(ML_NOTICE, "The same trim on device (%s) was "
- "just done from node (%u), return.\n",
- osb->dev_str, info.tf_nodenum);
- range->len = info.tf_trimlen;
- goto out_trimunlock;
- }
+ /*
+ * Determine first and last group to examine based on
+ * start and len
+ */
+ first_group = ocfs2_which_cluster_group(main_bm_inode, start);
+ if (first_group == osb->first_cluster_group_blkno)
+ first_bit = start;
+ else
+ first_bit = start - ocfs2_blocks_to_clusters(sb,
+ first_group);
+ last_group = ocfs2_which_cluster_group(main_bm_inode,
+ start + len - 1);
+ group = first_group;
}
- info.tf_nodenum = osb->node_num;
- info.tf_start = start;
- info.tf_len = len;
- info.tf_minlen = minlen;
-
- /* Determine first and last group to examine based on start and len */
- first_group = ocfs2_which_cluster_group(main_bm_inode, start);
- if (first_group == osb->first_cluster_group_blkno)
- first_bit = start;
- else
- first_bit = start - ocfs2_blocks_to_clusters(sb, first_group);
- last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1);
- last_bit = osb->bitmap_cpg;
-
- trimmed = 0;
- for (group = first_group; group <= last_group;) {
+ do {
if (first_bit + len >= osb->bitmap_cpg)
last_bit = osb->bitmap_cpg;
else
@@ -7645,21 +7617,83 @@
group = ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
else
group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
- }
- range->len = trimmed * sb->s_blocksize;
+ } while (0);
- info.tf_trimlen = range->len;
- info.tf_success = (ret ? 0 : 1);
- pinfo = &info;
-out_trimunlock:
- ocfs2_trim_fs_unlock(osb, pinfo);
- ocfs2_trim_fs_lock_res_uninit(osb);
out_unlock:
ocfs2_inode_unlock(main_bm_inode, 0);
brelse(main_bm_bh);
+ main_bm_bh = NULL;
out_mutex:
inode_unlock(main_bm_inode);
iput(main_bm_inode);
+
+ /*
+ * If all the groups trim are not done or failed, but we should release
+ * main_bm related locks for avoiding the current IO starve, then go to
+ * trim the next group
+ */
+ if (ret >= 0 && group <= last_group) {
+ cond_resched();
+ goto next_group;
+ }
out:
+ range->len = trimmed * sb->s_blocksize;
+ return ret;
+}
+
+int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
+{
+ int ret;
+ struct ocfs2_super *osb = OCFS2_SB(sb);
+ struct ocfs2_trim_fs_info info, *pinfo = NULL;
+
+ ocfs2_trim_fs_lock_res_init(osb);
+
+ trace_ocfs2_trim_fs(range->start, range->len, range->minlen);
+
+ ret = ocfs2_trim_fs_lock(osb, NULL, 1);
+ if (ret < 0) {
+ if (ret != -EAGAIN) {
+ mlog_errno(ret);
+ ocfs2_trim_fs_lock_res_uninit(osb);
+ return ret;
+ }
+
+ mlog(ML_NOTICE, "Wait for trim on device (%s) to "
+ "finish, which is running from another node.\n",
+ osb->dev_str);
+ ret = ocfs2_trim_fs_lock(osb, &info, 0);
+ if (ret < 0) {
+ mlog_errno(ret);
+ ocfs2_trim_fs_lock_res_uninit(osb);
+ return ret;
+ }
+
+ if (info.tf_valid && info.tf_success &&
+ info.tf_start == range->start &&
+ info.tf_len == range->len &&
+ info.tf_minlen == range->minlen) {
+ /* Avoid sending duplicated trim to a shared device */
+ mlog(ML_NOTICE, "The same trim on device (%s) was "
+ "just done from node (%u), return.\n",
+ osb->dev_str, info.tf_nodenum);
+ range->len = info.tf_trimlen;
+ goto out;
+ }
+ }
+
+ info.tf_nodenum = osb->node_num;
+ info.tf_start = range->start;
+ info.tf_len = range->len;
+ info.tf_minlen = range->minlen;
+
+ ret = ocfs2_trim_mainbm(sb, range);
+
+ info.tf_trimlen = range->len;
+ info.tf_success = (ret < 0 ? 0 : 1);
+ pinfo = &info;
+out:
+ ocfs2_trim_fs_unlock(osb, pinfo);
+ ocfs2_trim_fs_lock_res_uninit(osb);
return ret;
}
--
Gitblit v1.6.2