hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/fs/btrfs/delayed-inode.c
....@@ -7,11 +7,13 @@
77 #include <linux/slab.h>
88 #include <linux/iversion.h>
99 #include <linux/sched/mm.h>
10
+#include "misc.h"
1011 #include "delayed-inode.h"
1112 #include "disk-io.h"
1213 #include "transaction.h"
1314 #include "ctree.h"
1415 #include "qgroup.h"
16
+#include "locking.h"
1517
1618 #define BTRFS_DELAYED_WRITEBACK 512
1719 #define BTRFS_DELAYED_BACKGROUND 128
....@@ -43,8 +45,8 @@
4345 delayed_node->root = root;
4446 delayed_node->inode_id = inode_id;
4547 refcount_set(&delayed_node->refs, 0);
46
- delayed_node->ins_root = RB_ROOT;
47
- delayed_node->del_root = RB_ROOT;
48
+ delayed_node->ins_root = RB_ROOT_CACHED;
49
+ delayed_node->del_root = RB_ROOT_CACHED;
4850 mutex_init(&delayed_node->mutex);
4951 INIT_LIST_HEAD(&delayed_node->n_list);
5052 INIT_LIST_HEAD(&delayed_node->p_list);
....@@ -391,7 +393,7 @@
391393 struct btrfs_delayed_node *delayed_node,
392394 struct btrfs_key *key)
393395 {
394
- return __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
396
+ return __btrfs_lookup_delayed_item(&delayed_node->ins_root.rb_root, key,
395397 NULL, NULL);
396398 }
397399
....@@ -401,9 +403,10 @@
401403 {
402404 struct rb_node **p, *node;
403405 struct rb_node *parent_node = NULL;
404
- struct rb_root *root;
406
+ struct rb_root_cached *root;
405407 struct btrfs_delayed_item *item;
406408 int cmp;
409
+ bool leftmost = true;
407410
408411 if (action == BTRFS_DELAYED_INSERTION_ITEM)
409412 root = &delayed_node->ins_root;
....@@ -411,7 +414,7 @@
411414 root = &delayed_node->del_root;
412415 else
413416 BUG();
414
- p = &root->rb_node;
417
+ p = &root->rb_root.rb_node;
415418 node = &ins->rb_node;
416419
417420 while (*p) {
....@@ -420,16 +423,18 @@
420423 rb_node);
421424
422425 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
423
- if (cmp < 0)
426
+ if (cmp < 0) {
424427 p = &(*p)->rb_right;
425
- else if (cmp > 0)
428
+ leftmost = false;
429
+ } else if (cmp > 0) {
426430 p = &(*p)->rb_left;
427
- else
431
+ } else {
428432 return -EEXIST;
433
+ }
429434 }
430435
431436 rb_link_node(node, parent_node, p);
432
- rb_insert_color(node, root);
437
+ rb_insert_color_cached(node, root, leftmost);
433438 ins->delayed_node = delayed_node;
434439 ins->ins_or_del = action;
435440
....@@ -469,9 +474,12 @@
469474
470475 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
471476 {
472
- struct rb_root *root;
477
+ struct rb_root_cached *root;
473478 struct btrfs_delayed_root *delayed_root;
474479
480
+ /* Not associated with any delayed_node */
481
+ if (!delayed_item->delayed_node)
482
+ return;
475483 delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
476484
477485 BUG_ON(!delayed_root);
....@@ -483,7 +491,7 @@
483491 else
484492 root = &delayed_item->delayed_node->del_root;
485493
486
- rb_erase(&delayed_item->rb_node, root);
494
+ rb_erase_cached(&delayed_item->rb_node, root);
487495 delayed_item->delayed_node->count--;
488496
489497 finish_one_item(delayed_root);
....@@ -504,7 +512,7 @@
504512 struct rb_node *p;
505513 struct btrfs_delayed_item *item = NULL;
506514
507
- p = rb_first(&delayed_node->ins_root);
515
+ p = rb_first_cached(&delayed_node->ins_root);
508516 if (p)
509517 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
510518
....@@ -517,7 +525,7 @@
517525 struct rb_node *p;
518526 struct btrfs_delayed_item *item = NULL;
519527
520
- p = rb_first(&delayed_node->del_root);
528
+ p = rb_first_cached(&delayed_node->del_root);
521529 if (p)
522530 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
523531
....@@ -553,14 +561,14 @@
553561 src_rsv = trans->block_rsv;
554562 dst_rsv = &fs_info->delayed_block_rsv;
555563
556
- num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
564
+ num_bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
557565
558566 /*
559567 * Here we migrate space rsv from transaction rsv, since have already
560568 * reserved space when starting a transaction. So no need to reserve
561569 * qgroup space here.
562570 */
563
- ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
571
+ ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, true);
564572 if (!ret) {
565573 trace_btrfs_space_reservation(fs_info, "delayed_item",
566574 item->key.objectid,
....@@ -588,8 +596,7 @@
588596 trace_btrfs_space_reservation(fs_info, "delayed_item",
589597 item->key.objectid, item->bytes_reserved,
590598 0);
591
- btrfs_block_rsv_release(fs_info, rsv,
592
- item->bytes_reserved);
599
+ btrfs_block_rsv_release(fs_info, rsv, item->bytes_reserved, NULL);
593600 }
594601
595602 static int btrfs_delayed_inode_reserve_metadata(
....@@ -607,7 +614,7 @@
607614 src_rsv = trans->block_rsv;
608615 dst_rsv = &fs_info->delayed_block_rsv;
609616
610
- num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
617
+ num_bytes = btrfs_calc_metadata_size(fs_info, 1);
611618
612619 /*
613620 * btrfs_dirty_inode will update the inode under btrfs_join_transaction
....@@ -620,7 +627,8 @@
620627 */
621628 if (!src_rsv || (!trans->bytes_reserved &&
622629 src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
623
- ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
630
+ ret = btrfs_qgroup_reserve_meta(root, num_bytes,
631
+ BTRFS_QGROUP_RSV_META_PREALLOC, true);
624632 if (ret < 0)
625633 return ret;
626634 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
....@@ -647,7 +655,7 @@
647655 return ret;
648656 }
649657
650
- ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
658
+ ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, true);
651659 if (!ret) {
652660 trace_btrfs_space_reservation(fs_info, "delayed_inode",
653661 btrfs_ino(inode), num_bytes, 1);
....@@ -669,8 +677,7 @@
669677 rsv = &fs_info->delayed_block_rsv;
670678 trace_btrfs_space_reservation(fs_info, "delayed_inode",
671679 node->inode_id, node->bytes_reserved, 0);
672
- btrfs_block_rsv_release(fs_info, rsv,
673
- node->bytes_reserved);
680
+ btrfs_block_rsv_release(fs_info, rsv, node->bytes_reserved, NULL);
674681 if (qgroup_free)
675682 btrfs_qgroup_free_meta_prealloc(node->root,
676683 node->bytes_reserved);
....@@ -688,7 +695,6 @@
688695 struct btrfs_path *path,
689696 struct btrfs_delayed_item *item)
690697 {
691
- struct btrfs_fs_info *fs_info = root->fs_info;
692698 struct btrfs_delayed_item *curr, *next;
693699 int free_space;
694700 int total_data_size = 0, total_size = 0;
....@@ -705,7 +711,7 @@
705711 BUG_ON(!path->nodes[0]);
706712
707713 leaf = path->nodes[0];
708
- free_space = btrfs_leaf_free_space(fs_info, leaf);
714
+ free_space = btrfs_leaf_free_space(leaf);
709715 INIT_LIST_HEAD(&head);
710716
711717 next = item;
....@@ -762,12 +768,8 @@
762768 i++;
763769 }
764770
765
- /* reset all the locked nodes in the patch to spinning locks. */
766
- btrfs_clear_path_blocking(path, NULL, 0);
767
-
768771 /* insert the keys of the items */
769
- setup_items_for_insert(root, path, keys, data_size,
770
- total_data_size, total_size, nitems);
772
+ setup_items_for_insert(root, path, keys, data_size, nitems);
771773
772774 /* insert the dir index items */
773775 slot = path->slots[0];
....@@ -1153,7 +1155,7 @@
11531155 int ret = 0;
11541156 bool count = (nr > 0);
11551157
1156
- if (trans->aborted)
1158
+ if (TRANS_ABORTED(trans))
11571159 return -EIO;
11581160
11591161 path = btrfs_alloc_path();
....@@ -1171,20 +1173,33 @@
11711173 ret = __btrfs_commit_inode_delayed_items(trans, path,
11721174 curr_node);
11731175 if (ret) {
1174
- btrfs_release_delayed_node(curr_node);
1175
- curr_node = NULL;
11761176 btrfs_abort_transaction(trans, ret);
11771177 break;
11781178 }
11791179
11801180 prev_node = curr_node;
11811181 curr_node = btrfs_next_delayed_node(curr_node);
1182
+ /*
1183
+ * See the comment below about releasing path before releasing
1184
+ * node. If the commit of delayed items was successful the path
1185
+ * should always be released, but in case of an error, it may
1186
+ * point to locked extent buffers (a leaf at the very least).
1187
+ */
1188
+ ASSERT(path->nodes[0] == NULL);
11821189 btrfs_release_delayed_node(prev_node);
11831190 }
11841191
1192
+ /*
1193
+ * Release the path to avoid a potential deadlock and lockdep splat when
1194
+ * releasing the delayed node, as that requires taking the delayed node's
1195
+ * mutex. If another task starts running delayed items before we take
1196
+ * the mutex, it will first lock the mutex and then it may try to lock
1197
+ * the same btree path (leaf).
1198
+ */
1199
+ btrfs_free_path(path);
1200
+
11851201 if (curr_node)
11861202 btrfs_release_delayed_node(curr_node);
1187
- btrfs_free_path(path);
11881203 trans->block_rsv = block_rsv;
11891204
11901205 return ret;
....@@ -1382,8 +1397,8 @@
13821397 return -ENOMEM;
13831398
13841399 async_work->delayed_root = delayed_root;
1385
- btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
1386
- btrfs_async_run_delayed_root, NULL, NULL);
1400
+ btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, NULL,
1401
+ NULL);
13871402 async_work->nr = nr;
13881403
13891404 btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
....@@ -1480,7 +1495,7 @@
14801495 if (unlikely(ret)) {
14811496 btrfs_err(trans->fs_info,
14821497 "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1483
- name_len, name, delayed_node->root->objectid,
1498
+ name_len, name, delayed_node->root->root_key.objectid,
14841499 delayed_node->inode_id, ret);
14851500 BUG();
14861501 }
....@@ -1544,15 +1559,22 @@
15441559 * we have reserved enough space when we start a new transaction,
15451560 * so reserving metadata failure is impossible.
15461561 */
1547
- BUG_ON(ret);
1562
+ if (ret < 0) {
1563
+ btrfs_err(trans->fs_info,
1564
+"metadata reservation failed for delayed dir item deltiona, should have been reserved");
1565
+ btrfs_release_delayed_item(item);
1566
+ goto end;
1567
+ }
15481568
15491569 mutex_lock(&node->mutex);
15501570 ret = __btrfs_add_delayed_deletion_item(node, item);
15511571 if (unlikely(ret)) {
15521572 btrfs_err(trans->fs_info,
15531573 "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1554
- index, node->root->objectid, node->inode_id, ret);
1555
- BUG();
1574
+ index, node->root->root_key.objectid,
1575
+ node->inode_id, ret);
1576
+ btrfs_delayed_item_release_metadata(dir->root, item);
1577
+ btrfs_release_delayed_item(item);
15561578 }
15571579 mutex_unlock(&node->mutex);
15581580 end:
....@@ -1709,7 +1731,7 @@
17091731 name = (char *)(di + 1);
17101732 name_len = btrfs_stack_dir_name_len(di);
17111733
1712
- d_type = btrfs_filetype_table[di->type];
1734
+ d_type = fs_ftype_to_dtype(di->type);
17131735 btrfs_disk_key_to_cpu(&location, &di->location);
17141736
17151737 over = !dir_emit(ctx, name, name_len,
....@@ -1767,6 +1789,7 @@
17671789
17681790 int btrfs_fill_inode(struct inode *inode, u32 *rdev)
17691791 {
1792
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
17701793 struct btrfs_delayed_node *delayed_node;
17711794 struct btrfs_inode_item *inode_item;
17721795
....@@ -1786,6 +1809,8 @@
17861809 i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
17871810 i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
17881811 btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
1812
+ btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0,
1813
+ round_up(i_size_read(inode), fs_info->sectorsize));
17891814 inode->i_mode = btrfs_stack_inode_mode(inode_item);
17901815 set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
17911816 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));