hc
2024-05-10 9999e48639b3cecb08ffb37358bcba3b48161b29
kernel/fs/ocfs2/alloc.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /* -*- mode: c; c-basic-offset: 8; -*-
23 * vim: noexpandtab sw=8 ts=8 sts=0:
34 *
....@@ -6,21 +7,6 @@
67 * Extent allocs and frees
78 *
89 * Copyright (C) 2002, 2004 Oracle. All rights reserved.
9
- *
10
- * This program is free software; you can redistribute it and/or
11
- * modify it under the terms of the GNU General Public
12
- * License as published by the Free Software Foundation; either
13
- * version 2 of the License, or (at your option) any later version.
14
- *
15
- * This program is distributed in the hope that it will be useful,
16
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18
- * General Public License for more details.
19
- *
20
- * You should have received a copy of the GNU General Public
21
- * License along with this program; if not, write to the
22
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23
- * Boston, MA 021110-1307, USA.
2410 */
2511
2612 #include <linux/fs.h>
....@@ -1074,7 +1060,6 @@
10741060 brelse(bhs[i]);
10751061 bhs[i] = NULL;
10761062 }
1077
- mlog_errno(status);
10781063 }
10791064 return status;
10801065 }
....@@ -2302,9 +2287,9 @@
23022287 int ret = 0;
23032288 int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
23042289
2305
- if (handle->h_buffer_credits < credits)
2290
+ if (jbd2_handle_buffer_credits(handle) < credits)
23062291 ret = ocfs2_extend_trans(handle,
2307
- credits - handle->h_buffer_credits);
2292
+ credits - jbd2_handle_buffer_credits(handle));
23082293
23092294 return ret;
23102295 }
....@@ -2381,7 +2366,7 @@
23812366 struct ocfs2_path *right_path,
23822367 struct ocfs2_path **ret_left_path)
23832368 {
2384
- int ret, start, orig_credits = handle->h_buffer_credits;
2369
+ int ret, start, orig_credits = jbd2_handle_buffer_credits(handle);
23852370 u32 cpos;
23862371 struct ocfs2_path *left_path = NULL;
23872372 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
....@@ -3162,7 +3147,7 @@
31623147 struct ocfs2_path *path,
31633148 struct ocfs2_cached_dealloc_ctxt *dealloc)
31643149 {
3165
- int ret, orig_credits = handle->h_buffer_credits;
3150
+ int ret, orig_credits = jbd2_handle_buffer_credits(handle);
31663151 struct ocfs2_path *tmp_path = NULL, *restart_path = NULL;
31673152 struct ocfs2_extent_block *eb;
31683153 struct ocfs2_extent_list *el;
....@@ -3400,8 +3385,8 @@
34003385 right_path);
34013386
34023387 ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
3403
- handle->h_buffer_credits,
3404
- right_path);
3388
+ jbd2_handle_buffer_credits(handle),
3389
+ right_path);
34053390 if (ret) {
34063391 mlog_errno(ret);
34073392 goto out;
....@@ -3562,8 +3547,8 @@
35623547 right_path);
35633548
35643549 ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
3565
- handle->h_buffer_credits,
3566
- left_path);
3550
+ jbd2_handle_buffer_credits(handle),
3551
+ left_path);
35673552 if (ret) {
35683553 mlog_errno(ret);
35693554 goto out;
....@@ -3637,7 +3622,7 @@
36373622 le16_to_cpu(el->l_next_free_rec) == 1) {
36383623 /* extend credit for ocfs2_remove_rightmost_path */
36393624 ret = ocfs2_extend_rotate_transaction(handle, 0,
3640
- handle->h_buffer_credits,
3625
+ jbd2_handle_buffer_credits(handle),
36413626 right_path);
36423627 if (ret) {
36433628 mlog_errno(ret);
....@@ -3683,7 +3668,7 @@
36833668 if (ctxt->c_split_covers_rec && ctxt->c_has_empty_extent) {
36843669 /* extend credit for ocfs2_remove_rightmost_path */
36853670 ret = ocfs2_extend_rotate_transaction(handle, 0,
3686
- handle->h_buffer_credits,
3671
+ jbd2_handle_buffer_credits(handle),
36873672 path);
36883673 if (ret) {
36893674 mlog_errno(ret);
....@@ -3739,7 +3724,7 @@
37393724
37403725 /* extend credit for ocfs2_remove_rightmost_path */
37413726 ret = ocfs2_extend_rotate_transaction(handle, 0,
3742
- handle->h_buffer_credits,
3727
+ jbd2_handle_buffer_credits(handle),
37433728 path);
37443729 if (ret) {
37453730 mlog_errno(ret);
....@@ -3769,7 +3754,7 @@
37693754
37703755 /* extend credit for ocfs2_remove_rightmost_path */
37713756 ret = ocfs2_extend_rotate_transaction(handle, 0,
3772
- handle->h_buffer_credits,
3757
+ jbd2_handle_buffer_credits(handle),
37733758 path);
37743759 if (ret) {
37753760 mlog_errno(ret);
....@@ -3813,7 +3798,7 @@
38133798 if (ctxt->c_split_covers_rec) {
38143799 /* extend credit for ocfs2_remove_rightmost_path */
38153800 ret = ocfs2_extend_rotate_transaction(handle, 0,
3816
- handle->h_buffer_credits,
3801
+ jbd2_handle_buffer_credits(handle),
38173802 path);
38183803 if (ret) {
38193804 mlog_errno(ret);
....@@ -3956,7 +3941,7 @@
39563941 * above.
39573942 *
39583943 * This leaf needs to have space, either by the empty 1st
3959
- * extent record, or by virtue of an l_next_rec < l_count.
3944
+ * extent record, or by virtue of an l_next_free_rec < l_count.
39603945 */
39613946 ocfs2_rotate_leaf(el, insert_rec);
39623947 }
....@@ -4722,7 +4707,7 @@
47224707 struct ocfs2_alloc_context *meta_ac)
47234708 {
47244709 int status;
4725
- int uninitialized_var(free_records);
4710
+ int free_records;
47264711 struct buffer_head *last_eb_bh = NULL;
47274712 struct ocfs2_insert_type insert = {0, };
47284713 struct ocfs2_extent_rec rec;
....@@ -5106,8 +5091,6 @@
51065091 * rightmost extent list.
51075092 */
51085093 if (path->p_tree_depth) {
5109
- struct ocfs2_extent_block *eb;
5110
-
51115094 ret = ocfs2_read_extent_block(et->et_ci,
51125095 ocfs2_et_get_last_eb_blk(et),
51135096 &last_eb_bh);
....@@ -5115,8 +5098,6 @@
51155098 mlog_errno(ret);
51165099 goto out;
51175100 }
5118
-
5119
- eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
51205101 }
51215102
51225103 if (rec->e_cpos == split_rec->e_cpos &&
....@@ -5376,7 +5357,7 @@
53765357 if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
53775358 /* extend credit for ocfs2_remove_rightmost_path */
53785359 ret = ocfs2_extend_rotate_transaction(handle, 0,
5379
- handle->h_buffer_credits,
5360
+ jbd2_handle_buffer_credits(handle),
53805361 path);
53815362 if (ret) {
53825363 mlog_errno(ret);
....@@ -5445,8 +5426,8 @@
54455426 }
54465427
54475428 ret = ocfs2_extend_rotate_transaction(handle, 0,
5448
- handle->h_buffer_credits,
5449
- path);
5429
+ jbd2_handle_buffer_credits(handle),
5430
+ path);
54505431 if (ret) {
54515432 mlog_errno(ret);
54525433 goto out;
....@@ -6011,6 +5992,7 @@
60115992 struct buffer_head *data_alloc_bh = NULL;
60125993 struct ocfs2_dinode *di;
60135994 struct ocfs2_truncate_log *tl;
5995
+ struct ocfs2_journal *journal = osb->journal;
60145996
60155997 BUG_ON(inode_trylock(tl_inode));
60165998
....@@ -6028,6 +6010,20 @@
60286010 num_to_flush);
60296011 if (!num_to_flush) {
60306012 status = 0;
6013
+ goto out;
6014
+ }
6015
+
6016
+ /* Appending truncate log(TA) and flushing truncate log(TF) are
6017
+ * two separated transactions. They can be both committed but not
6018
+ * checkpointed. If crash occurs then, both two transaction will be
6019
+ * replayed with several already released to global bitmap clusters.
6020
+ * Then truncate log will be replayed resulting in cluster double free.
6021
+ */
6022
+ jbd2_journal_lock_updates(journal->j_journal);
6023
+ status = jbd2_journal_flush(journal->j_journal);
6024
+ jbd2_journal_unlock_updates(journal->j_journal);
6025
+ if (status < 0) {
6026
+ mlog_errno(status);
60316027 goto out;
60326028 }
60336029
....@@ -6209,16 +6205,16 @@
62096205 if (le16_to_cpu(tl->tl_used)) {
62106206 trace_ocfs2_truncate_log_recovery_num(le16_to_cpu(tl->tl_used));
62116207
6212
- *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL);
6208
+ /*
6209
+ * Assuming the write-out below goes well, this copy will be
6210
+ * passed back to recovery for processing.
6211
+ */
6212
+ *tl_copy = kmemdup(tl_bh->b_data, tl_bh->b_size, GFP_KERNEL);
62136213 if (!(*tl_copy)) {
62146214 status = -ENOMEM;
62156215 mlog_errno(status);
62166216 goto bail;
62176217 }
6218
-
6219
- /* Assuming the write-out below goes well, this copy
6220
- * will be passed back to recovery for processing. */
6221
- memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size);
62226218
62236219 /* All we need to do to clear the truncate log is set
62246220 * tl_used. */
....@@ -6810,6 +6806,8 @@
68106806 struct page *page, int zero, u64 *phys)
68116807 {
68126808 int ret, partial = 0;
6809
+ loff_t start_byte = ((loff_t)page->index << PAGE_SHIFT) + from;
6810
+ loff_t length = to - from;
68136811
68146812 ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0);
68156813 if (ret)
....@@ -6829,7 +6827,8 @@
68296827 if (ret < 0)
68306828 mlog_errno(ret);
68316829 else if (ocfs2_should_order_data(inode)) {
6832
- ret = ocfs2_jbd2_file_inode(handle, inode);
6830
+ ret = ocfs2_jbd2_inode_add_write(handle, inode,
6831
+ start_byte, length);
68336832 if (ret < 0)
68346833 mlog_errno(ret);
68356834 }
....@@ -7052,7 +7051,7 @@
70527051 int need_free = 0;
70537052 u32 bit_off, num;
70547053 handle_t *handle;
7055
- u64 uninitialized_var(block);
7054
+ u64 block;
70567055 struct ocfs2_inode_info *oi = OCFS2_I(inode);
70577056 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
70587057 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
....@@ -7518,10 +7517,11 @@
75187517 return count;
75197518 }
75207519
7521
-int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
7520
+static
7521
+int ocfs2_trim_mainbm(struct super_block *sb, struct fstrim_range *range)
75227522 {
75237523 struct ocfs2_super *osb = OCFS2_SB(sb);
7524
- u64 start, len, trimmed, first_group, last_group, group;
7524
+ u64 start, len, trimmed = 0, first_group, last_group = 0, group = 0;
75257525 int ret, cnt;
75267526 u32 first_bit, last_bit, minlen;
75277527 struct buffer_head *main_bm_bh = NULL;
....@@ -7529,7 +7529,6 @@
75297529 struct buffer_head *gd_bh = NULL;
75307530 struct ocfs2_dinode *main_bm;
75317531 struct ocfs2_group_desc *gd = NULL;
7532
- struct ocfs2_trim_fs_info info, *pinfo = NULL;
75337532
75347533 start = range->start >> osb->s_clustersize_bits;
75357534 len = range->len >> osb->s_clustersize_bits;
....@@ -7538,6 +7537,9 @@
75387537 if (minlen >= osb->bitmap_cpg || range->len < sb->s_blocksize)
75397538 return -EINVAL;
75407539
7540
+ trace_ocfs2_trim_mainbm(start, len, minlen);
7541
+
7542
+next_group:
75417543 main_bm_inode = ocfs2_get_system_file_inode(osb,
75427544 GLOBAL_BITMAP_SYSTEM_INODE,
75437545 OCFS2_INVALID_SLOT);
....@@ -7556,64 +7558,34 @@
75567558 }
75577559 main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data;
75587560
7559
- if (start >= le32_to_cpu(main_bm->i_clusters)) {
7560
- ret = -EINVAL;
7561
- goto out_unlock;
7562
- }
7563
-
7564
- len = range->len >> osb->s_clustersize_bits;
7565
- if (start + len > le32_to_cpu(main_bm->i_clusters))
7566
- len = le32_to_cpu(main_bm->i_clusters) - start;
7567
-
7568
- trace_ocfs2_trim_fs(start, len, minlen);
7569
-
7570
- ocfs2_trim_fs_lock_res_init(osb);
7571
- ret = ocfs2_trim_fs_lock(osb, NULL, 1);
7572
- if (ret < 0) {
7573
- if (ret != -EAGAIN) {
7574
- mlog_errno(ret);
7575
- ocfs2_trim_fs_lock_res_uninit(osb);
7561
+ /*
7562
+ * Do some check before trim the first group.
7563
+ */
7564
+ if (!group) {
7565
+ if (start >= le32_to_cpu(main_bm->i_clusters)) {
7566
+ ret = -EINVAL;
75767567 goto out_unlock;
75777568 }
75787569
7579
- mlog(ML_NOTICE, "Wait for trim on device (%s) to "
7580
- "finish, which is running from another node.\n",
7581
- osb->dev_str);
7582
- ret = ocfs2_trim_fs_lock(osb, &info, 0);
7583
- if (ret < 0) {
7584
- mlog_errno(ret);
7585
- ocfs2_trim_fs_lock_res_uninit(osb);
7586
- goto out_unlock;
7587
- }
7570
+ if (start + len > le32_to_cpu(main_bm->i_clusters))
7571
+ len = le32_to_cpu(main_bm->i_clusters) - start;
75887572
7589
- if (info.tf_valid && info.tf_success &&
7590
- info.tf_start == start && info.tf_len == len &&
7591
- info.tf_minlen == minlen) {
7592
- /* Avoid sending duplicated trim to a shared device */
7593
- mlog(ML_NOTICE, "The same trim on device (%s) was "
7594
- "just done from node (%u), return.\n",
7595
- osb->dev_str, info.tf_nodenum);
7596
- range->len = info.tf_trimlen;
7597
- goto out_trimunlock;
7598
- }
7573
+ /*
7574
+ * Determine first and last group to examine based on
7575
+ * start and len
7576
+ */
7577
+ first_group = ocfs2_which_cluster_group(main_bm_inode, start);
7578
+ if (first_group == osb->first_cluster_group_blkno)
7579
+ first_bit = start;
7580
+ else
7581
+ first_bit = start - ocfs2_blocks_to_clusters(sb,
7582
+ first_group);
7583
+ last_group = ocfs2_which_cluster_group(main_bm_inode,
7584
+ start + len - 1);
7585
+ group = first_group;
75997586 }
76007587
7601
- info.tf_nodenum = osb->node_num;
7602
- info.tf_start = start;
7603
- info.tf_len = len;
7604
- info.tf_minlen = minlen;
7605
-
7606
- /* Determine first and last group to examine based on start and len */
7607
- first_group = ocfs2_which_cluster_group(main_bm_inode, start);
7608
- if (first_group == osb->first_cluster_group_blkno)
7609
- first_bit = start;
7610
- else
7611
- first_bit = start - ocfs2_blocks_to_clusters(sb, first_group);
7612
- last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1);
7613
- last_bit = osb->bitmap_cpg;
7614
-
7615
- trimmed = 0;
7616
- for (group = first_group; group <= last_group;) {
7588
+ do {
76177589 if (first_bit + len >= osb->bitmap_cpg)
76187590 last_bit = osb->bitmap_cpg;
76197591 else
....@@ -7645,21 +7617,83 @@
76457617 group = ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
76467618 else
76477619 group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
7648
- }
7649
- range->len = trimmed * sb->s_blocksize;
7620
+ } while (0);
76507621
7651
- info.tf_trimlen = range->len;
7652
- info.tf_success = (ret ? 0 : 1);
7653
- pinfo = &info;
7654
-out_trimunlock:
7655
- ocfs2_trim_fs_unlock(osb, pinfo);
7656
- ocfs2_trim_fs_lock_res_uninit(osb);
76577622 out_unlock:
76587623 ocfs2_inode_unlock(main_bm_inode, 0);
76597624 brelse(main_bm_bh);
7625
+ main_bm_bh = NULL;
76607626 out_mutex:
76617627 inode_unlock(main_bm_inode);
76627628 iput(main_bm_inode);
7629
+
7630
+ /*
7631
+ * If all the groups trim are not done or failed, but we should release
7632
+ * main_bm related locks for avoiding the current IO starve, then go to
7633
+ * trim the next group
7634
+ */
7635
+ if (ret >= 0 && group <= last_group) {
7636
+ cond_resched();
7637
+ goto next_group;
7638
+ }
76637639 out:
7640
+ range->len = trimmed * sb->s_blocksize;
7641
+ return ret;
7642
+}
7643
+
7644
+int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
7645
+{
7646
+ int ret;
7647
+ struct ocfs2_super *osb = OCFS2_SB(sb);
7648
+ struct ocfs2_trim_fs_info info, *pinfo = NULL;
7649
+
7650
+ ocfs2_trim_fs_lock_res_init(osb);
7651
+
7652
+ trace_ocfs2_trim_fs(range->start, range->len, range->minlen);
7653
+
7654
+ ret = ocfs2_trim_fs_lock(osb, NULL, 1);
7655
+ if (ret < 0) {
7656
+ if (ret != -EAGAIN) {
7657
+ mlog_errno(ret);
7658
+ ocfs2_trim_fs_lock_res_uninit(osb);
7659
+ return ret;
7660
+ }
7661
+
7662
+ mlog(ML_NOTICE, "Wait for trim on device (%s) to "
7663
+ "finish, which is running from another node.\n",
7664
+ osb->dev_str);
7665
+ ret = ocfs2_trim_fs_lock(osb, &info, 0);
7666
+ if (ret < 0) {
7667
+ mlog_errno(ret);
7668
+ ocfs2_trim_fs_lock_res_uninit(osb);
7669
+ return ret;
7670
+ }
7671
+
7672
+ if (info.tf_valid && info.tf_success &&
7673
+ info.tf_start == range->start &&
7674
+ info.tf_len == range->len &&
7675
+ info.tf_minlen == range->minlen) {
7676
+ /* Avoid sending duplicated trim to a shared device */
7677
+ mlog(ML_NOTICE, "The same trim on device (%s) was "
7678
+ "just done from node (%u), return.\n",
7679
+ osb->dev_str, info.tf_nodenum);
7680
+ range->len = info.tf_trimlen;
7681
+ goto out;
7682
+ }
7683
+ }
7684
+
7685
+ info.tf_nodenum = osb->node_num;
7686
+ info.tf_start = range->start;
7687
+ info.tf_len = range->len;
7688
+ info.tf_minlen = range->minlen;
7689
+
7690
+ ret = ocfs2_trim_mainbm(sb, range);
7691
+
7692
+ info.tf_trimlen = range->len;
7693
+ info.tf_success = (ret < 0 ? 0 : 1);
7694
+ pinfo = &info;
7695
+out:
7696
+ ocfs2_trim_fs_unlock(osb, pinfo);
7697
+ ocfs2_trim_fs_lock_res_uninit(osb);
76647698 return ret;
76657699 }