hc
2024-05-14 bedbef8ad3e75a304af6361af235302bcc61d06b
kernel/fs/btrfs/backref.c
....@@ -13,6 +13,7 @@
1313 #include "transaction.h"
1414 #include "delayed-ref.h"
1515 #include "locking.h"
16
+#include "misc.h"
1617
1718 /* Just an arbitrary number so we can be sure this happened */
1819 #define BACKREF_FOUND_SHARED 6
....@@ -112,11 +113,11 @@
112113 }
113114
114115 struct preftree {
115
- struct rb_root root;
116
+ struct rb_root_cached root;
116117 unsigned int count;
117118 };
118119
119
-#define PREFTREE_INIT { .root = RB_ROOT, .count = 0 }
120
+#define PREFTREE_INIT { .root = RB_ROOT_CACHED, .count = 0 }
120121
121122 struct preftrees {
122123 struct preftree direct; /* BTRFS_SHARED_[DATA|BLOCK]_REF_KEY */
....@@ -136,6 +137,7 @@
136137 u64 root_objectid;
137138 u64 inum;
138139 int share_count;
140
+ bool have_delayed_delete_refs;
139141 };
140142
141143 static inline int extent_is_shared(struct share_check *sc)
....@@ -225,14 +227,15 @@
225227 struct prelim_ref *newref,
226228 struct share_check *sc)
227229 {
228
- struct rb_root *root;
230
+ struct rb_root_cached *root;
229231 struct rb_node **p;
230232 struct rb_node *parent = NULL;
231233 struct prelim_ref *ref;
232234 int result;
235
+ bool leftmost = true;
233236
234237 root = &preftree->root;
235
- p = &root->rb_node;
238
+ p = &root->rb_root.rb_node;
236239
237240 while (*p) {
238241 parent = *p;
....@@ -242,6 +245,7 @@
242245 p = &(*p)->rb_left;
243246 } else if (result > 0) {
244247 p = &(*p)->rb_right;
248
+ leftmost = false;
245249 } else {
246250 /* Identical refs, merge them and free @newref */
247251 struct extent_inode_elem *eie = ref->inode_list;
....@@ -272,7 +276,7 @@
272276 preftree->count++;
273277 trace_btrfs_prelim_ref_insert(fs_info, newref, NULL, preftree->count);
274278 rb_link_node(&newref->rbnode, parent, p);
275
- rb_insert_color(&newref->rbnode, root);
279
+ rb_insert_color_cached(&newref->rbnode, root, leftmost);
276280 }
277281
278282 /*
....@@ -283,11 +287,13 @@
283287 {
284288 struct prelim_ref *ref, *next_ref;
285289
286
- rbtree_postorder_for_each_entry_safe(ref, next_ref, &preftree->root,
287
- rbnode)
290
+ rbtree_postorder_for_each_entry_safe(ref, next_ref,
291
+ &preftree->root.rb_root, rbnode) {
292
+ free_inode_elem_list(ref->inode_list);
288293 free_pref(ref);
294
+ }
289295
290
- preftree->root = RB_ROOT;
296
+ preftree->root = RB_ROOT_CACHED;
291297 preftree->count = 0;
292298 }
293299
....@@ -345,33 +351,10 @@
345351 return -ENOMEM;
346352
347353 ref->root_id = root_id;
348
- if (key) {
354
+ if (key)
349355 ref->key_for_search = *key;
350
- /*
351
- * We can often find data backrefs with an offset that is too
352
- * large (>= LLONG_MAX, maximum allowed file offset) due to
353
- * underflows when subtracting a file's offset with the data
354
- * offset of its corresponding extent data item. This can
355
- * happen for example in the clone ioctl.
356
- * So if we detect such case we set the search key's offset to
357
- * zero to make sure we will find the matching file extent item
358
- * at add_all_parents(), otherwise we will miss it because the
359
- * offset taken form the backref is much larger then the offset
360
- * of the file extent item. This can make us scan a very large
361
- * number of file extent items, but at least it will not make
362
- * us miss any.
363
- * This is an ugly workaround for a behaviour that should have
364
- * never existed, but it does and a fix for the clone ioctl
365
- * would touch a lot of places, cause backwards incompatibility
366
- * and would not fix the problem for extents cloned with older
367
- * kernels.
368
- */
369
- if (ref->key_for_search.type == BTRFS_EXTENT_DATA_KEY &&
370
- ref->key_for_search.offset >= LLONG_MAX)
371
- ref->key_for_search.offset = 0;
372
- } else {
356
+ else
373357 memset(&ref->key_for_search, 0, sizeof(ref->key_for_search));
374
- }
375358
376359 ref->inode_list = NULL;
377360 ref->level = level;
....@@ -407,10 +390,36 @@
407390 wanted_disk_byte, count, sc, gfp_mask);
408391 }
409392
393
+static int is_shared_data_backref(struct preftrees *preftrees, u64 bytenr)
394
+{
395
+ struct rb_node **p = &preftrees->direct.root.rb_root.rb_node;
396
+ struct rb_node *parent = NULL;
397
+ struct prelim_ref *ref = NULL;
398
+ struct prelim_ref target = {};
399
+ int result;
400
+
401
+ target.parent = bytenr;
402
+
403
+ while (*p) {
404
+ parent = *p;
405
+ ref = rb_entry(parent, struct prelim_ref, rbnode);
406
+ result = prelim_ref_compare(ref, &target);
407
+
408
+ if (result < 0)
409
+ p = &(*p)->rb_left;
410
+ else if (result > 0)
411
+ p = &(*p)->rb_right;
412
+ else
413
+ return 1;
414
+ }
415
+ return 0;
416
+}
417
+
410418 static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
411
- struct ulist *parents, struct prelim_ref *ref,
419
+ struct ulist *parents,
420
+ struct preftrees *preftrees, struct prelim_ref *ref,
412421 int level, u64 time_seq, const u64 *extent_item_pos,
413
- u64 total_refs, bool ignore_offset)
422
+ bool ignore_offset)
414423 {
415424 int ret = 0;
416425 int slot;
....@@ -422,6 +431,8 @@
422431 u64 disk_byte;
423432 u64 wanted_disk_byte = ref->wanted_disk_byte;
424433 u64 count = 0;
434
+ u64 data_offset;
435
+ u8 type;
425436
426437 if (level != 0) {
427438 eb = path->nodes[level];
....@@ -432,18 +443,26 @@
432443 }
433444
434445 /*
435
- * We normally enter this function with the path already pointing to
436
- * the first item to check. But sometimes, we may enter it with
437
- * slot==nritems. In that case, go to the next leaf before we continue.
446
+ * 1. We normally enter this function with the path already pointing to
447
+ * the first item to check. But sometimes, we may enter it with
448
+ * slot == nritems.
449
+ * 2. We are searching for normal backref but bytenr of this leaf
450
+ * matches shared data backref
451
+ * 3. The leaf owner is not equal to the root we are searching
452
+ *
453
+ * For these cases, go to the next leaf before we continue.
438454 */
439
- if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
455
+ eb = path->nodes[0];
456
+ if (path->slots[0] >= btrfs_header_nritems(eb) ||
457
+ is_shared_data_backref(preftrees, eb->start) ||
458
+ ref->root_id != btrfs_header_owner(eb)) {
440459 if (time_seq == SEQ_LAST)
441460 ret = btrfs_next_leaf(root, path);
442461 else
443462 ret = btrfs_next_old_leaf(root, path, time_seq);
444463 }
445464
446
- while (!ret && count < total_refs) {
465
+ while (!ret && count < ref->count) {
447466 eb = path->nodes[0];
448467 slot = path->slots[0];
449468
....@@ -453,13 +472,34 @@
453472 key.type != BTRFS_EXTENT_DATA_KEY)
454473 break;
455474
475
+ /*
476
+ * We are searching for normal backref but bytenr of this leaf
477
+ * matches shared data backref, OR
478
+ * the leaf owner is not equal to the root we are searching for
479
+ */
480
+ if (slot == 0 &&
481
+ (is_shared_data_backref(preftrees, eb->start) ||
482
+ ref->root_id != btrfs_header_owner(eb))) {
483
+ if (time_seq == SEQ_LAST)
484
+ ret = btrfs_next_leaf(root, path);
485
+ else
486
+ ret = btrfs_next_old_leaf(root, path, time_seq);
487
+ continue;
488
+ }
456489 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
490
+ type = btrfs_file_extent_type(eb, fi);
491
+ if (type == BTRFS_FILE_EXTENT_INLINE)
492
+ goto next;
457493 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
494
+ data_offset = btrfs_file_extent_offset(eb, fi);
458495
459496 if (disk_byte == wanted_disk_byte) {
460497 eie = NULL;
461498 old = NULL;
462
- count++;
499
+ if (ref->key_for_search.offset == key.offset - data_offset)
500
+ count++;
501
+ else
502
+ goto next;
463503 if (extent_item_pos) {
464504 ret = check_extent_in_eb(&key, eb, fi,
465505 *extent_item_pos,
....@@ -500,33 +540,41 @@
500540 */
501541 static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
502542 struct btrfs_path *path, u64 time_seq,
543
+ struct preftrees *preftrees,
503544 struct prelim_ref *ref, struct ulist *parents,
504
- const u64 *extent_item_pos, u64 total_refs,
505
- bool ignore_offset)
545
+ const u64 *extent_item_pos, bool ignore_offset)
506546 {
507547 struct btrfs_root *root;
508
- struct btrfs_key root_key;
509548 struct extent_buffer *eb;
510549 int ret = 0;
511550 int root_level;
512551 int level = ref->level;
513
- int index;
552
+ struct btrfs_key search_key = ref->key_for_search;
514553
515
- root_key.objectid = ref->root_id;
516
- root_key.type = BTRFS_ROOT_ITEM_KEY;
517
- root_key.offset = (u64)-1;
518
-
519
- index = srcu_read_lock(&fs_info->subvol_srcu);
520
-
521
- root = btrfs_get_fs_root(fs_info, &root_key, false);
554
+ /*
555
+ * If we're search_commit_root we could possibly be holding locks on
556
+ * other tree nodes. This happens when qgroups does backref walks when
557
+ * adding new delayed refs. To deal with this we need to look in cache
558
+ * for the root, and if we don't find it then we need to search the
559
+ * tree_root's commit root, thus the btrfs_get_fs_root_commit_root usage
560
+ * here.
561
+ */
562
+ if (path->search_commit_root)
563
+ root = btrfs_get_fs_root_commit_root(fs_info, path, ref->root_id);
564
+ else
565
+ root = btrfs_get_fs_root(fs_info, ref->root_id, false);
522566 if (IS_ERR(root)) {
523
- srcu_read_unlock(&fs_info->subvol_srcu, index);
524567 ret = PTR_ERR(root);
568
+ goto out_free;
569
+ }
570
+
571
+ if (!path->search_commit_root &&
572
+ test_bit(BTRFS_ROOT_DELETING, &root->state)) {
573
+ ret = -ENOENT;
525574 goto out;
526575 }
527576
528577 if (btrfs_is_testing(fs_info)) {
529
- srcu_read_unlock(&fs_info->subvol_srcu, index);
530578 ret = -ENOENT;
531579 goto out;
532580 }
....@@ -538,21 +586,36 @@
538586 else
539587 root_level = btrfs_old_root_level(root, time_seq);
540588
541
- if (root_level + 1 == level) {
542
- srcu_read_unlock(&fs_info->subvol_srcu, index);
589
+ if (root_level + 1 == level)
543590 goto out;
544
- }
545591
592
+ /*
593
+ * We can often find data backrefs with an offset that is too large
594
+ * (>= LLONG_MAX, maximum allowed file offset) due to underflows when
595
+ * subtracting a file's offset with the data offset of its
596
+ * corresponding extent data item. This can happen for example in the
597
+ * clone ioctl.
598
+ *
599
+ * So if we detect such case we set the search key's offset to zero to
600
+ * make sure we will find the matching file extent item at
601
+ * add_all_parents(), otherwise we will miss it because the offset
602
+ * taken form the backref is much larger then the offset of the file
603
+ * extent item. This can make us scan a very large number of file
604
+ * extent items, but at least it will not make us miss any.
605
+ *
606
+ * This is an ugly workaround for a behaviour that should have never
607
+ * existed, but it does and a fix for the clone ioctl would touch a lot
608
+ * of places, cause backwards incompatibility and would not fix the
609
+ * problem for extents cloned with older kernels.
610
+ */
611
+ if (search_key.type == BTRFS_EXTENT_DATA_KEY &&
612
+ search_key.offset >= LLONG_MAX)
613
+ search_key.offset = 0;
546614 path->lowest_level = level;
547615 if (time_seq == SEQ_LAST)
548
- ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path,
549
- 0, 0);
616
+ ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
550617 else
551
- ret = btrfs_search_old_slot(root, &ref->key_for_search, path,
552
- time_seq);
553
-
554
- /* root node has been locked, we can release @subvol_srcu safely here */
555
- srcu_read_unlock(&fs_info->subvol_srcu, index);
618
+ ret = btrfs_search_old_slot(root, &search_key, path, time_seq);
556619
557620 btrfs_debug(fs_info,
558621 "search slot in root %llu (level %d, ref count %d) returned %d for key (%llu %u %llu)",
....@@ -572,9 +635,11 @@
572635 eb = path->nodes[level];
573636 }
574637
575
- ret = add_all_parents(root, path, parents, ref, level, time_seq,
576
- extent_item_pos, total_refs, ignore_offset);
638
+ ret = add_all_parents(root, path, parents, preftrees, ref, level,
639
+ time_seq, extent_item_pos, ignore_offset);
577640 out:
641
+ btrfs_put_root(root);
642
+out_free:
578643 path->lowest_level = 0;
579644 btrfs_release_path(path);
580645 return ret;
....@@ -588,8 +653,20 @@
588653 return (struct extent_inode_elem *)(uintptr_t)node->aux;
589654 }
590655
656
+static void free_leaf_list(struct ulist *ulist)
657
+{
658
+ struct ulist_node *node;
659
+ struct ulist_iterator uiter;
660
+
661
+ ULIST_ITER_INIT(&uiter);
662
+ while ((node = ulist_next(ulist, &uiter)))
663
+ free_inode_elem_list(unode_aux_to_inode_list(node));
664
+
665
+ ulist_free(ulist);
666
+}
667
+
591668 /*
592
- * We maintain three seperate rbtrees: one for direct refs, one for
669
+ * We maintain three separate rbtrees: one for direct refs, one for
593670 * indirect refs which have a key, and one for indirect refs which do not
594671 * have a key. Each tree does merge on insertion.
595672 *
....@@ -607,7 +684,7 @@
607684 static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
608685 struct btrfs_path *path, u64 time_seq,
609686 struct preftrees *preftrees,
610
- const u64 *extent_item_pos, u64 total_refs,
687
+ const u64 *extent_item_pos,
611688 struct share_check *sc, bool ignore_offset)
612689 {
613690 int err;
....@@ -627,7 +704,7 @@
627704 * freeing the entire indirect tree when we're done. In some test
628705 * cases, the tree can grow quite large (~200k objects).
629706 */
630
- while ((rnode = rb_first(&preftrees->indirect.root))) {
707
+ while ((rnode = rb_first_cached(&preftrees->indirect.root))) {
631708 struct prelim_ref *ref;
632709
633710 ref = rb_entry(rnode, struct prelim_ref, rbnode);
....@@ -637,7 +714,7 @@
637714 goto out;
638715 }
639716
640
- rb_erase(&ref->rbnode, &preftrees->indirect.root);
717
+ rb_erase_cached(&ref->rbnode, &preftrees->indirect.root);
641718 preftrees->indirect.count--;
642719
643720 if (ref->count == 0) {
....@@ -651,9 +728,9 @@
651728 ret = BACKREF_FOUND_SHARED;
652729 goto out;
653730 }
654
- err = resolve_indirect_ref(fs_info, path, time_seq, ref,
655
- parents, extent_item_pos,
656
- total_refs, ignore_offset);
731
+ err = resolve_indirect_ref(fs_info, path, time_seq, preftrees,
732
+ ref, parents, extent_item_pos,
733
+ ignore_offset);
657734 /*
658735 * we can only tolerate ENOENT,otherwise,we should catch error
659736 * and return directly.
....@@ -693,7 +770,7 @@
693770 }
694771
695772 /*
696
- * Now it's a direct ref, put it in the the direct tree. We must
773
+ * Now it's a direct ref, put it in the direct tree. We must
697774 * do this last because the ref could be merged/freed here.
698775 */
699776 prelim_ref_insert(fs_info, &preftrees->direct, ref, NULL);
....@@ -702,7 +779,11 @@
702779 cond_resched();
703780 }
704781 out:
705
- ulist_free(parents);
782
+ /*
783
+ * We may have inode lists attached to refs in the parents ulist, so we
784
+ * must free them before freeing the ulist and its refs.
785
+ */
786
+ free_leaf_list(parents);
706787 return ret;
707788 }
708789
....@@ -717,9 +798,9 @@
717798 struct preftree *tree = &preftrees->indirect_missing_keys;
718799 struct rb_node *node;
719800
720
- while ((node = rb_first(&tree->root))) {
801
+ while ((node = rb_first_cached(&tree->root))) {
721802 ref = rb_entry(node, struct prelim_ref, rbnode);
722
- rb_erase(node, &tree->root);
803
+ rb_erase_cached(node, &tree->root);
723804
724805 BUG_ON(ref->parent); /* should not be a direct ref */
725806 BUG_ON(ref->key_for_search.type);
....@@ -756,22 +837,16 @@
756837 */
757838 static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
758839 struct btrfs_delayed_ref_head *head, u64 seq,
759
- struct preftrees *preftrees, u64 *total_refs,
760
- struct share_check *sc)
840
+ struct preftrees *preftrees, struct share_check *sc)
761841 {
762842 struct btrfs_delayed_ref_node *node;
763
- struct btrfs_delayed_extent_op *extent_op = head->extent_op;
764843 struct btrfs_key key;
765
- struct btrfs_key tmp_op_key;
766844 struct rb_node *n;
767845 int count;
768846 int ret = 0;
769847
770
- if (extent_op && extent_op->update_key)
771
- btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key);
772
-
773848 spin_lock(&head->lock);
774
- for (n = rb_first(&head->ref_tree); n; n = rb_next(n)) {
849
+ for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) {
775850 node = rb_entry(n, struct btrfs_delayed_ref_node,
776851 ref_node);
777852 if (node->seq > seq)
....@@ -789,17 +864,22 @@
789864 count = node->ref_mod * -1;
790865 break;
791866 default:
792
- BUG_ON(1);
867
+ BUG();
793868 }
794
- *total_refs += count;
795869 switch (node->type) {
796870 case BTRFS_TREE_BLOCK_REF_KEY: {
797871 /* NORMAL INDIRECT METADATA backref */
798872 struct btrfs_delayed_tree_ref *ref;
873
+ struct btrfs_key *key_ptr = NULL;
874
+
875
+ if (head->extent_op && head->extent_op->update_key) {
876
+ btrfs_disk_key_to_cpu(&key, &head->extent_op->key);
877
+ key_ptr = &key;
878
+ }
799879
800880 ref = btrfs_delayed_node_to_tree_ref(node);
801881 ret = add_indirect_ref(fs_info, preftrees, ref->root,
802
- &tmp_op_key, ref->level + 1,
882
+ key_ptr, ref->level + 1,
803883 node->bytenr, count, sc,
804884 GFP_ATOMIC);
805885 break;
....@@ -825,13 +905,22 @@
825905 key.offset = ref->offset;
826906
827907 /*
828
- * Found a inum that doesn't match our known inum, we
829
- * know it's shared.
908
+ * If we have a share check context and a reference for
909
+ * another inode, we can't exit immediately. This is
910
+ * because even if this is a BTRFS_ADD_DELAYED_REF
911
+ * reference we may find next a BTRFS_DROP_DELAYED_REF
912
+ * which cancels out this ADD reference.
913
+ *
914
+ * If this is a DROP reference and there was no previous
915
+ * ADD reference, then we need to signal that when we
916
+ * process references from the extent tree (through
917
+ * add_inline_refs() and add_keyed_refs()), we should
918
+ * not exit early if we find a reference for another
919
+ * inode, because one of the delayed DROP references
920
+ * may cancel that reference in the extent tree.
830921 */
831
- if (sc && sc->inum && ref->objectid != sc->inum) {
832
- ret = BACKREF_FOUND_SHARED;
833
- goto out;
834
- }
922
+ if (sc && count < 0)
923
+ sc->have_delayed_delete_refs = true;
835924
836925 ret = add_indirect_ref(fs_info, preftrees, ref->root,
837926 &key, 0, node->bytenr, count, sc,
....@@ -861,7 +950,7 @@
861950 }
862951 if (!ret)
863952 ret = extent_is_shared(sc);
864
-out:
953
+
865954 spin_unlock(&head->lock);
866955 return ret;
867956 }
....@@ -874,7 +963,7 @@
874963 static int add_inline_refs(const struct btrfs_fs_info *fs_info,
875964 struct btrfs_path *path, u64 bytenr,
876965 int *info_level, struct preftrees *preftrees,
877
- u64 *total_refs, struct share_check *sc)
966
+ struct share_check *sc)
878967 {
879968 int ret = 0;
880969 int slot;
....@@ -898,7 +987,6 @@
898987
899988 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
900989 flags = btrfs_extent_flags(leaf, ei);
901
- *total_refs += btrfs_extent_refs(leaf, ei);
902990 btrfs_item_key_to_cpu(leaf, &found_key, slot);
903991
904992 ptr = (unsigned long)(ei + 1);
....@@ -965,7 +1053,8 @@
9651053 key.type = BTRFS_EXTENT_DATA_KEY;
9661054 key.offset = btrfs_extent_data_ref_offset(leaf, dref);
9671055
968
- if (sc && sc->inum && key.objectid != sc->inum) {
1056
+ if (sc && sc->inum && key.objectid != sc->inum &&
1057
+ !sc->have_delayed_delete_refs) {
9691058 ret = BACKREF_FOUND_SHARED;
9701059 break;
9711060 }
....@@ -975,6 +1064,7 @@
9751064 ret = add_indirect_ref(fs_info, preftrees, root,
9761065 &key, 0, bytenr, count,
9771066 sc, GFP_NOFS);
1067
+
9781068 break;
9791069 }
9801070 default:
....@@ -1064,7 +1154,8 @@
10641154 key.type = BTRFS_EXTENT_DATA_KEY;
10651155 key.offset = btrfs_extent_data_ref_offset(leaf, dref);
10661156
1067
- if (sc && sc->inum && key.objectid != sc->inum) {
1157
+ if (sc && sc->inum && key.objectid != sc->inum &&
1158
+ !sc->have_delayed_delete_refs) {
10681159 ret = BACKREF_FOUND_SHARED;
10691160 break;
10701161 }
....@@ -1123,8 +1214,6 @@
11231214 struct prelim_ref *ref;
11241215 struct rb_node *node;
11251216 struct extent_inode_elem *eie = NULL;
1126
- /* total of both direct AND indirect refs! */
1127
- u64 total_refs = 0;
11281217 struct preftrees preftrees = {
11291218 .direct = PREFTREE_INIT,
11301219 .indirect = PREFTREE_INIT,
....@@ -1198,7 +1287,7 @@
11981287 }
11991288 spin_unlock(&delayed_refs->lock);
12001289 ret = add_delayed_refs(fs_info, head, time_seq,
1201
- &preftrees, &total_refs, sc);
1290
+ &preftrees, sc);
12021291 mutex_unlock(&head->mutex);
12031292 if (ret)
12041293 goto out;
....@@ -1219,8 +1308,7 @@
12191308 (key.type == BTRFS_EXTENT_ITEM_KEY ||
12201309 key.type == BTRFS_METADATA_ITEM_KEY)) {
12211310 ret = add_inline_refs(fs_info, path, bytenr,
1222
- &info_level, &preftrees,
1223
- &total_refs, sc);
1311
+ &info_level, &preftrees, sc);
12241312 if (ret)
12251313 goto out;
12261314 ret = add_keyed_refs(fs_info, path, bytenr, info_level,
....@@ -1236,14 +1324,14 @@
12361324 if (ret)
12371325 goto out;
12381326
1239
- WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root));
1327
+ WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root.rb_root));
12401328
12411329 ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees,
1242
- extent_item_pos, total_refs, sc, ignore_offset);
1330
+ extent_item_pos, sc, ignore_offset);
12431331 if (ret)
12441332 goto out;
12451333
1246
- WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect.root));
1334
+ WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect.root.rb_root));
12471335
12481336 /*
12491337 * This walks the tree of merged and resolved refs. Tree blocks are
....@@ -1252,7 +1340,7 @@
12521340 *
12531341 * We release the entire tree in one go before returning.
12541342 */
1255
- node = rb_first(&preftrees.direct.root);
1343
+ node = rb_first_cached(&preftrees.direct.root);
12561344 while (node) {
12571345 ref = rb_entry(node, struct prelim_ref, rbnode);
12581346 node = rb_next(&ref->rbnode);
....@@ -1293,9 +1381,10 @@
12931381 ret = -EIO;
12941382 goto out;
12951383 }
1384
+
12961385 if (!path->skip_locking) {
12971386 btrfs_tree_read_lock(eb);
1298
- btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1387
+ btrfs_set_lock_blocking_read(eb);
12991388 }
13001389 ret = find_extent_in_eb(eb, bytenr,
13011390 *extent_item_pos, &eie, ignore_offset);
....@@ -1305,6 +1394,12 @@
13051394 if (ret < 0)
13061395 goto out;
13071396 ref->inode_list = eie;
1397
+ /*
1398
+ * We transferred the list ownership to the ref,
1399
+ * so set to NULL to avoid a double free in case
1400
+ * an error happens after this.
1401
+ */
1402
+ eie = NULL;
13081403 }
13091404 ret = ulist_add_merge_ptr(refs, ref->parent,
13101405 ref->inode_list,
....@@ -1330,6 +1425,14 @@
13301425 eie->next = ref->inode_list;
13311426 }
13321427 eie = NULL;
1428
+ /*
1429
+ * We have transferred the inode list ownership from
1430
+ * this ref to the ref we added to the 'refs' ulist.
1431
+ * So set this ref's inode list to NULL to avoid
1432
+ * use-after-free when our caller uses it or double
1433
+ * frees in case an error happens before we return.
1434
+ */
1435
+ ref->inode_list = NULL;
13331436 }
13341437 cond_resched();
13351438 }
....@@ -1346,24 +1449,6 @@
13461449 return ret;
13471450 }
13481451
1349
-static void free_leaf_list(struct ulist *blocks)
1350
-{
1351
- struct ulist_node *node = NULL;
1352
- struct extent_inode_elem *eie;
1353
- struct ulist_iterator uiter;
1354
-
1355
- ULIST_ITER_INIT(&uiter);
1356
- while ((node = ulist_next(blocks, &uiter))) {
1357
- if (!node->aux)
1358
- continue;
1359
- eie = unode_aux_to_inode_list(node);
1360
- free_inode_elem_list(eie);
1361
- node->aux = 0;
1362
- }
1363
-
1364
- ulist_free(blocks);
1365
-}
1366
-
13671452 /*
13681453 * Finds all leafs with a reference to the specified combination of bytenr and
13691454 * offset. key_list_head will point to a list of corresponding keys (caller must
....@@ -1372,10 +1457,10 @@
13721457 *
13731458 * returns 0 on success, <0 on error
13741459 */
1375
-static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
1376
- struct btrfs_fs_info *fs_info, u64 bytenr,
1377
- u64 time_seq, struct ulist **leafs,
1378
- const u64 *extent_item_pos, bool ignore_offset)
1460
+int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
1461
+ struct btrfs_fs_info *fs_info, u64 bytenr,
1462
+ u64 time_seq, struct ulist **leafs,
1463
+ const u64 *extent_item_pos, bool ignore_offset)
13791464 {
13801465 int ret;
13811466
....@@ -1476,28 +1561,24 @@
14761561 *
14771562 * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
14781563 */
1479
-int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
1564
+int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
1565
+ struct ulist *roots, struct ulist *tmp)
14801566 {
14811567 struct btrfs_fs_info *fs_info = root->fs_info;
14821568 struct btrfs_trans_handle *trans;
1483
- struct ulist *tmp = NULL;
1484
- struct ulist *roots = NULL;
14851569 struct ulist_iterator uiter;
14861570 struct ulist_node *node;
14871571 struct seq_list elem = SEQ_LIST_INIT(elem);
14881572 int ret = 0;
14891573 struct share_check shared = {
1490
- .root_objectid = root->objectid,
1574
+ .root_objectid = root->root_key.objectid,
14911575 .inum = inum,
14921576 .share_count = 0,
1577
+ .have_delayed_delete_refs = false,
14931578 };
14941579
1495
- tmp = ulist_alloc(GFP_NOFS);
1496
- roots = ulist_alloc(GFP_NOFS);
1497
- if (!tmp || !roots) {
1498
- ret = -ENOMEM;
1499
- goto out;
1500
- }
1580
+ ulist_init(roots);
1581
+ ulist_init(tmp);
15011582
15021583 trans = btrfs_join_transaction_nostart(root);
15031584 if (IS_ERR(trans)) {
....@@ -1528,6 +1609,7 @@
15281609 break;
15291610 bytenr = node->val;
15301611 shared.share_count = 0;
1612
+ shared.have_delayed_delete_refs = false;
15311613 cond_resched();
15321614 }
15331615
....@@ -1538,8 +1620,8 @@
15381620 up_read(&fs_info->commit_root_sem);
15391621 }
15401622 out:
1541
- ulist_free(tmp);
1542
- ulist_free(roots);
1623
+ ulist_release(roots);
1624
+ ulist_release(tmp);
15431625 return ret;
15441626 }
15451627
....@@ -1671,7 +1753,7 @@
16711753 /* make sure we can use eb after releasing the path */
16721754 if (eb != eb_in) {
16731755 if (!path->skip_locking)
1674
- btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1756
+ btrfs_set_lock_blocking_read(eb);
16751757 path->nodes[0] = NULL;
16761758 path->locks[0] = 0;
16771759 }
....@@ -1762,7 +1844,7 @@
17621844 else if (flags & BTRFS_EXTENT_FLAG_DATA)
17631845 *flags_ret = BTRFS_EXTENT_FLAG_DATA;
17641846 else
1765
- BUG_ON(1);
1847
+ BUG();
17661848 return 0;
17671849 }
17681850
....@@ -1982,10 +2064,29 @@
19822064 return ret;
19832065 }
19842066
2067
+static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
2068
+{
2069
+ struct btrfs_data_container *inodes = ctx;
2070
+ const size_t c = 3 * sizeof(u64);
2071
+
2072
+ if (inodes->bytes_left >= c) {
2073
+ inodes->bytes_left -= c;
2074
+ inodes->val[inodes->elem_cnt] = inum;
2075
+ inodes->val[inodes->elem_cnt + 1] = offset;
2076
+ inodes->val[inodes->elem_cnt + 2] = root;
2077
+ inodes->elem_cnt += 3;
2078
+ } else {
2079
+ inodes->bytes_missing += c - inodes->bytes_left;
2080
+ inodes->bytes_left = 0;
2081
+ inodes->elem_missed += 3;
2082
+ }
2083
+
2084
+ return 0;
2085
+}
2086
+
19852087 int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
19862088 struct btrfs_path *path,
1987
- iterate_extent_inodes_t *iterate, void *ctx,
1988
- bool ignore_offset)
2089
+ void *ctx, bool ignore_offset)
19892090 {
19902091 int ret;
19912092 u64 extent_item_pos;
....@@ -2003,7 +2104,7 @@
20032104 extent_item_pos = logical - found_key.objectid;
20042105 ret = iterate_extent_inodes(fs_info, found_key.objectid,
20052106 extent_item_pos, search_commit_root,
2006
- iterate, ctx, ignore_offset);
2107
+ build_ino_list, ctx, ignore_offset);
20072108
20082109 return ret;
20092110 }
....@@ -2047,9 +2148,6 @@
20472148 ret = -ENOMEM;
20482149 break;
20492150 }
2050
- extent_buffer_get(eb);
2051
- btrfs_tree_read_lock(eb);
2052
- btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
20532151 btrfs_release_path(path);
20542152
20552153 item = btrfs_item_nr(slot);
....@@ -2060,7 +2158,8 @@
20602158 /* path must be released before calling iterate()! */
20612159 btrfs_debug(fs_root->fs_info,
20622160 "following ref at offset %u for inode %llu in tree %llu",
2063
- cur, found_key.objectid, fs_root->objectid);
2161
+ cur, found_key.objectid,
2162
+ fs_root->root_key.objectid);
20642163 ret = iterate(parent, name_len,
20652164 (unsigned long)(iref + 1), eb, ctx);
20662165 if (ret)
....@@ -2068,7 +2167,6 @@
20682167 len = sizeof(*iref) + name_len;
20692168 iref = (struct btrfs_inode_ref *)((char *)iref + len);
20702169 }
2071
- btrfs_tree_read_unlock_blocking(eb);
20722170 free_extent_buffer(eb);
20732171 }
20742172
....@@ -2109,10 +2207,6 @@
21092207 ret = -ENOMEM;
21102208 break;
21112209 }
2112
- extent_buffer_get(eb);
2113
-
2114
- btrfs_tree_read_lock(eb);
2115
- btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
21162210 btrfs_release_path(path);
21172211
21182212 item_size = btrfs_item_size_nr(eb, slot);
....@@ -2133,7 +2227,6 @@
21332227 cur_offset += btrfs_inode_extref_name_len(eb, extref);
21342228 cur_offset += sizeof(*extref);
21352229 }
2136
- btrfs_tree_read_unlock_blocking(eb);
21372230 free_extent_buffer(eb);
21382231
21392232 offset++;
....@@ -2276,3 +2369,824 @@
22762369 kvfree(ipath->fspath);
22772370 kfree(ipath);
22782371 }
2372
+
2373
+struct btrfs_backref_iter *btrfs_backref_iter_alloc(
2374
+ struct btrfs_fs_info *fs_info, gfp_t gfp_flag)
2375
+{
2376
+ struct btrfs_backref_iter *ret;
2377
+
2378
+ ret = kzalloc(sizeof(*ret), gfp_flag);
2379
+ if (!ret)
2380
+ return NULL;
2381
+
2382
+ ret->path = btrfs_alloc_path();
2383
+ if (!ret->path) {
2384
+ kfree(ret);
2385
+ return NULL;
2386
+ }
2387
+
2388
+ /* Current backref iterator only supports iteration in commit root */
2389
+ ret->path->search_commit_root = 1;
2390
+ ret->path->skip_locking = 1;
2391
+ ret->fs_info = fs_info;
2392
+
2393
+ return ret;
2394
+}
2395
+
2396
+int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
2397
+{
2398
+ struct btrfs_fs_info *fs_info = iter->fs_info;
2399
+ struct btrfs_path *path = iter->path;
2400
+ struct btrfs_extent_item *ei;
2401
+ struct btrfs_key key;
2402
+ int ret;
2403
+
2404
+ key.objectid = bytenr;
2405
+ key.type = BTRFS_METADATA_ITEM_KEY;
2406
+ key.offset = (u64)-1;
2407
+ iter->bytenr = bytenr;
2408
+
2409
+ ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
2410
+ if (ret < 0)
2411
+ return ret;
2412
+ if (ret == 0) {
2413
+ ret = -EUCLEAN;
2414
+ goto release;
2415
+ }
2416
+ if (path->slots[0] == 0) {
2417
+ WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
2418
+ ret = -EUCLEAN;
2419
+ goto release;
2420
+ }
2421
+ path->slots[0]--;
2422
+
2423
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2424
+ if ((key.type != BTRFS_EXTENT_ITEM_KEY &&
2425
+ key.type != BTRFS_METADATA_ITEM_KEY) || key.objectid != bytenr) {
2426
+ ret = -ENOENT;
2427
+ goto release;
2428
+ }
2429
+ memcpy(&iter->cur_key, &key, sizeof(key));
2430
+ iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
2431
+ path->slots[0]);
2432
+ iter->end_ptr = (u32)(iter->item_ptr +
2433
+ btrfs_item_size_nr(path->nodes[0], path->slots[0]));
2434
+ ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2435
+ struct btrfs_extent_item);
2436
+
2437
+ /*
2438
+ * Only support iteration on tree backref yet.
2439
+ *
2440
+ * This is an extra precaution for non skinny-metadata, where
2441
+ * EXTENT_ITEM is also used for tree blocks, that we can only use
2442
+ * extent flags to determine if it's a tree block.
2443
+ */
2444
+ if (btrfs_extent_flags(path->nodes[0], ei) & BTRFS_EXTENT_FLAG_DATA) {
2445
+ ret = -ENOTSUPP;
2446
+ goto release;
2447
+ }
2448
+ iter->cur_ptr = (u32)(iter->item_ptr + sizeof(*ei));
2449
+
2450
+ /* If there is no inline backref, go search for keyed backref */
2451
+ if (iter->cur_ptr >= iter->end_ptr) {
2452
+ ret = btrfs_next_item(fs_info->extent_root, path);
2453
+
2454
+ /* No inline nor keyed ref */
2455
+ if (ret > 0) {
2456
+ ret = -ENOENT;
2457
+ goto release;
2458
+ }
2459
+ if (ret < 0)
2460
+ goto release;
2461
+
2462
+ btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key,
2463
+ path->slots[0]);
2464
+ if (iter->cur_key.objectid != bytenr ||
2465
+ (iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
2466
+ iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY)) {
2467
+ ret = -ENOENT;
2468
+ goto release;
2469
+ }
2470
+ iter->cur_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
2471
+ path->slots[0]);
2472
+ iter->item_ptr = iter->cur_ptr;
2473
+ iter->end_ptr = (u32)(iter->item_ptr + btrfs_item_size_nr(
2474
+ path->nodes[0], path->slots[0]));
2475
+ }
2476
+
2477
+ return 0;
2478
+release:
2479
+ btrfs_backref_iter_release(iter);
2480
+ return ret;
2481
+}
2482
+
2483
+/*
2484
+ * Go to the next backref item of current bytenr, can be either inlined or
2485
+ * keyed.
2486
+ *
2487
+ * Caller needs to check whether it's inline ref or not by iter->cur_key.
2488
+ *
2489
+ * Return 0 if we get next backref without problem.
2490
+ * Return >0 if there is no extra backref for this bytenr.
2491
+ * Return <0 if there is something wrong happened.
2492
+ */
2493
+int btrfs_backref_iter_next(struct btrfs_backref_iter *iter)
2494
+{
2495
+ struct extent_buffer *eb = btrfs_backref_get_eb(iter);
2496
+ struct btrfs_path *path = iter->path;
2497
+ struct btrfs_extent_inline_ref *iref;
2498
+ int ret;
2499
+ u32 size;
2500
+
2501
+ if (btrfs_backref_iter_is_inline_ref(iter)) {
2502
+ /* We're still inside the inline refs */
2503
+ ASSERT(iter->cur_ptr < iter->end_ptr);
2504
+
2505
+ if (btrfs_backref_has_tree_block_info(iter)) {
2506
+ /* First tree block info */
2507
+ size = sizeof(struct btrfs_tree_block_info);
2508
+ } else {
2509
+ /* Use inline ref type to determine the size */
2510
+ int type;
2511
+
2512
+ iref = (struct btrfs_extent_inline_ref *)
2513
+ ((unsigned long)iter->cur_ptr);
2514
+ type = btrfs_extent_inline_ref_type(eb, iref);
2515
+
2516
+ size = btrfs_extent_inline_ref_size(type);
2517
+ }
2518
+ iter->cur_ptr += size;
2519
+ if (iter->cur_ptr < iter->end_ptr)
2520
+ return 0;
2521
+
2522
+ /* All inline items iterated, fall through */
2523
+ }
2524
+
2525
+ /* We're at keyed items, there is no inline item, go to the next one */
2526
+ ret = btrfs_next_item(iter->fs_info->extent_root, iter->path);
2527
+ if (ret)
2528
+ return ret;
2529
+
2530
+ btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key, path->slots[0]);
2531
+ if (iter->cur_key.objectid != iter->bytenr ||
2532
+ (iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
2533
+ iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY))
2534
+ return 1;
2535
+ iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
2536
+ path->slots[0]);
2537
+ iter->cur_ptr = iter->item_ptr;
2538
+ iter->end_ptr = iter->item_ptr + (u32)btrfs_item_size_nr(path->nodes[0],
2539
+ path->slots[0]);
2540
+ return 0;
2541
+}
2542
+
2543
+void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info,
2544
+ struct btrfs_backref_cache *cache, int is_reloc)
2545
+{
2546
+ int i;
2547
+
2548
+ cache->rb_root = RB_ROOT;
2549
+ for (i = 0; i < BTRFS_MAX_LEVEL; i++)
2550
+ INIT_LIST_HEAD(&cache->pending[i]);
2551
+ INIT_LIST_HEAD(&cache->changed);
2552
+ INIT_LIST_HEAD(&cache->detached);
2553
+ INIT_LIST_HEAD(&cache->leaves);
2554
+ INIT_LIST_HEAD(&cache->pending_edge);
2555
+ INIT_LIST_HEAD(&cache->useless_node);
2556
+ cache->fs_info = fs_info;
2557
+ cache->is_reloc = is_reloc;
2558
+}
2559
+
2560
+struct btrfs_backref_node *btrfs_backref_alloc_node(
2561
+ struct btrfs_backref_cache *cache, u64 bytenr, int level)
2562
+{
2563
+ struct btrfs_backref_node *node;
2564
+
2565
+ ASSERT(level >= 0 && level < BTRFS_MAX_LEVEL);
2566
+ node = kzalloc(sizeof(*node), GFP_NOFS);
2567
+ if (!node)
2568
+ return node;
2569
+
2570
+ INIT_LIST_HEAD(&node->list);
2571
+ INIT_LIST_HEAD(&node->upper);
2572
+ INIT_LIST_HEAD(&node->lower);
2573
+ RB_CLEAR_NODE(&node->rb_node);
2574
+ cache->nr_nodes++;
2575
+ node->level = level;
2576
+ node->bytenr = bytenr;
2577
+
2578
+ return node;
2579
+}
2580
+
2581
+struct btrfs_backref_edge *btrfs_backref_alloc_edge(
2582
+ struct btrfs_backref_cache *cache)
2583
+{
2584
+ struct btrfs_backref_edge *edge;
2585
+
2586
+ edge = kzalloc(sizeof(*edge), GFP_NOFS);
2587
+ if (edge)
2588
+ cache->nr_edges++;
2589
+ return edge;
2590
+}
2591
+
2592
+/*
2593
+ * Drop the backref node from cache, also cleaning up all its
2594
+ * upper edges and any uncached nodes in the path.
2595
+ *
2596
+ * This cleanup happens bottom up, thus the node should either
2597
+ * be the lowest node in the cache or a detached node.
2598
+ */
2599
+void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
2600
+ struct btrfs_backref_node *node)
2601
+{
2602
+ struct btrfs_backref_node *upper;
2603
+ struct btrfs_backref_edge *edge;
2604
+
2605
+ if (!node)
2606
+ return;
2607
+
2608
+ BUG_ON(!node->lowest && !node->detached);
2609
+ while (!list_empty(&node->upper)) {
2610
+ edge = list_entry(node->upper.next, struct btrfs_backref_edge,
2611
+ list[LOWER]);
2612
+ upper = edge->node[UPPER];
2613
+ list_del(&edge->list[LOWER]);
2614
+ list_del(&edge->list[UPPER]);
2615
+ btrfs_backref_free_edge(cache, edge);
2616
+
2617
+ /*
2618
+ * Add the node to leaf node list if no other child block
2619
+ * cached.
2620
+ */
2621
+ if (list_empty(&upper->lower)) {
2622
+ list_add_tail(&upper->lower, &cache->leaves);
2623
+ upper->lowest = 1;
2624
+ }
2625
+ }
2626
+
2627
+ btrfs_backref_drop_node(cache, node);
2628
+}
2629
+
2630
+/*
2631
+ * Release all nodes/edges from current cache
2632
+ */
2633
+void btrfs_backref_release_cache(struct btrfs_backref_cache *cache)
2634
+{
2635
+ struct btrfs_backref_node *node;
2636
+ int i;
2637
+
2638
+ while (!list_empty(&cache->detached)) {
2639
+ node = list_entry(cache->detached.next,
2640
+ struct btrfs_backref_node, list);
2641
+ btrfs_backref_cleanup_node(cache, node);
2642
+ }
2643
+
2644
+ while (!list_empty(&cache->leaves)) {
2645
+ node = list_entry(cache->leaves.next,
2646
+ struct btrfs_backref_node, lower);
2647
+ btrfs_backref_cleanup_node(cache, node);
2648
+ }
2649
+
2650
+ cache->last_trans = 0;
2651
+
2652
+ for (i = 0; i < BTRFS_MAX_LEVEL; i++)
2653
+ ASSERT(list_empty(&cache->pending[i]));
2654
+ ASSERT(list_empty(&cache->pending_edge));
2655
+ ASSERT(list_empty(&cache->useless_node));
2656
+ ASSERT(list_empty(&cache->changed));
2657
+ ASSERT(list_empty(&cache->detached));
2658
+ ASSERT(RB_EMPTY_ROOT(&cache->rb_root));
2659
+ ASSERT(!cache->nr_nodes);
2660
+ ASSERT(!cache->nr_edges);
2661
+}
2662
+
2663
+/*
2664
+ * Handle direct tree backref
2665
+ *
2666
+ * Direct tree backref means, the backref item shows its parent bytenr
2667
+ * directly. This is for SHARED_BLOCK_REF backref (keyed or inlined).
2668
+ *
2669
+ * @ref_key: The converted backref key.
2670
+ * For keyed backref, it's the item key.
2671
+ * For inlined backref, objectid is the bytenr,
2672
+ * type is btrfs_inline_ref_type, offset is
2673
+ * btrfs_inline_ref_offset.
2674
+ */
2675
+static int handle_direct_tree_backref(struct btrfs_backref_cache *cache,
2676
+ struct btrfs_key *ref_key,
2677
+ struct btrfs_backref_node *cur)
2678
+{
2679
+ struct btrfs_backref_edge *edge;
2680
+ struct btrfs_backref_node *upper;
2681
+ struct rb_node *rb_node;
2682
+
2683
+ ASSERT(ref_key->type == BTRFS_SHARED_BLOCK_REF_KEY);
2684
+
2685
+ /* Only reloc root uses backref pointing to itself */
2686
+ if (ref_key->objectid == ref_key->offset) {
2687
+ struct btrfs_root *root;
2688
+
2689
+ cur->is_reloc_root = 1;
2690
+ /* Only reloc backref cache cares about a specific root */
2691
+ if (cache->is_reloc) {
2692
+ root = find_reloc_root(cache->fs_info, cur->bytenr);
2693
+ if (!root)
2694
+ return -ENOENT;
2695
+ cur->root = root;
2696
+ } else {
2697
+ /*
2698
+ * For generic purpose backref cache, reloc root node
2699
+ * is useless.
2700
+ */
2701
+ list_add(&cur->list, &cache->useless_node);
2702
+ }
2703
+ return 0;
2704
+ }
2705
+
2706
+ edge = btrfs_backref_alloc_edge(cache);
2707
+ if (!edge)
2708
+ return -ENOMEM;
2709
+
2710
+ rb_node = rb_simple_search(&cache->rb_root, ref_key->offset);
2711
+ if (!rb_node) {
2712
+ /* Parent node not yet cached */
2713
+ upper = btrfs_backref_alloc_node(cache, ref_key->offset,
2714
+ cur->level + 1);
2715
+ if (!upper) {
2716
+ btrfs_backref_free_edge(cache, edge);
2717
+ return -ENOMEM;
2718
+ }
2719
+
2720
+ /*
2721
+ * Backrefs for the upper level block isn't cached, add the
2722
+ * block to pending list
2723
+ */
2724
+ list_add_tail(&edge->list[UPPER], &cache->pending_edge);
2725
+ } else {
2726
+ /* Parent node already cached */
2727
+ upper = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
2728
+ ASSERT(upper->checked);
2729
+ INIT_LIST_HEAD(&edge->list[UPPER]);
2730
+ }
2731
+ btrfs_backref_link_edge(edge, cur, upper, LINK_LOWER);
2732
+ return 0;
2733
+}
2734
+
2735
+/*
2736
+ * Handle indirect tree backref
2737
+ *
2738
+ * Indirect tree backref means, we only know which tree the node belongs to.
2739
+ * We still need to do a tree search to find out the parents. This is for
2740
+ * TREE_BLOCK_REF backref (keyed or inlined).
2741
+ *
2742
+ * @ref_key: The same as @ref_key in handle_direct_tree_backref()
2743
+ * @tree_key: The first key of this tree block.
2744
+ * @path: A clean (released) path, to avoid allocating path everytime
2745
+ * the function get called.
2746
+ */
2747
+static int handle_indirect_tree_backref(struct btrfs_backref_cache *cache,
2748
+ struct btrfs_path *path,
2749
+ struct btrfs_key *ref_key,
2750
+ struct btrfs_key *tree_key,
2751
+ struct btrfs_backref_node *cur)
2752
+{
2753
+ struct btrfs_fs_info *fs_info = cache->fs_info;
2754
+ struct btrfs_backref_node *upper;
2755
+ struct btrfs_backref_node *lower;
2756
+ struct btrfs_backref_edge *edge;
2757
+ struct extent_buffer *eb;
2758
+ struct btrfs_root *root;
2759
+ struct rb_node *rb_node;
2760
+ int level;
2761
+ bool need_check = true;
2762
+ int ret;
2763
+
2764
+ root = btrfs_get_fs_root(fs_info, ref_key->offset, false);
2765
+ if (IS_ERR(root))
2766
+ return PTR_ERR(root);
2767
+ if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
2768
+ cur->cowonly = 1;
2769
+
2770
+ if (btrfs_root_level(&root->root_item) == cur->level) {
2771
+ /* Tree root */
2772
+ ASSERT(btrfs_root_bytenr(&root->root_item) == cur->bytenr);
2773
+ /*
2774
+ * For reloc backref cache, we may ignore reloc root. But for
2775
+ * general purpose backref cache, we can't rely on
2776
+ * btrfs_should_ignore_reloc_root() as it may conflict with
2777
+ * current running relocation and lead to missing root.
2778
+ *
2779
+ * For general purpose backref cache, reloc root detection is
2780
+ * completely relying on direct backref (key->offset is parent
2781
+ * bytenr), thus only do such check for reloc cache.
2782
+ */
2783
+ if (btrfs_should_ignore_reloc_root(root) && cache->is_reloc) {
2784
+ btrfs_put_root(root);
2785
+ list_add(&cur->list, &cache->useless_node);
2786
+ } else {
2787
+ cur->root = root;
2788
+ }
2789
+ return 0;
2790
+ }
2791
+
2792
+ level = cur->level + 1;
2793
+
2794
+ /* Search the tree to find parent blocks referring to the block */
2795
+ path->search_commit_root = 1;
2796
+ path->skip_locking = 1;
2797
+ path->lowest_level = level;
2798
+ ret = btrfs_search_slot(NULL, root, tree_key, path, 0, 0);
2799
+ path->lowest_level = 0;
2800
+ if (ret < 0) {
2801
+ btrfs_put_root(root);
2802
+ return ret;
2803
+ }
2804
+ if (ret > 0 && path->slots[level] > 0)
2805
+ path->slots[level]--;
2806
+
2807
+ eb = path->nodes[level];
2808
+ if (btrfs_node_blockptr(eb, path->slots[level]) != cur->bytenr) {
2809
+ btrfs_err(fs_info,
2810
+"couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)",
2811
+ cur->bytenr, level - 1, root->root_key.objectid,
2812
+ tree_key->objectid, tree_key->type, tree_key->offset);
2813
+ btrfs_put_root(root);
2814
+ ret = -ENOENT;
2815
+ goto out;
2816
+ }
2817
+ lower = cur;
2818
+
2819
+ /* Add all nodes and edges in the path */
2820
+ for (; level < BTRFS_MAX_LEVEL; level++) {
2821
+ if (!path->nodes[level]) {
2822
+ ASSERT(btrfs_root_bytenr(&root->root_item) ==
2823
+ lower->bytenr);
2824
+ /* Same as previous should_ignore_reloc_root() call */
2825
+ if (btrfs_should_ignore_reloc_root(root) &&
2826
+ cache->is_reloc) {
2827
+ btrfs_put_root(root);
2828
+ list_add(&lower->list, &cache->useless_node);
2829
+ } else {
2830
+ lower->root = root;
2831
+ }
2832
+ break;
2833
+ }
2834
+
2835
+ edge = btrfs_backref_alloc_edge(cache);
2836
+ if (!edge) {
2837
+ btrfs_put_root(root);
2838
+ ret = -ENOMEM;
2839
+ goto out;
2840
+ }
2841
+
2842
+ eb = path->nodes[level];
2843
+ rb_node = rb_simple_search(&cache->rb_root, eb->start);
2844
+ if (!rb_node) {
2845
+ upper = btrfs_backref_alloc_node(cache, eb->start,
2846
+ lower->level + 1);
2847
+ if (!upper) {
2848
+ btrfs_put_root(root);
2849
+ btrfs_backref_free_edge(cache, edge);
2850
+ ret = -ENOMEM;
2851
+ goto out;
2852
+ }
2853
+ upper->owner = btrfs_header_owner(eb);
2854
+ if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
2855
+ upper->cowonly = 1;
2856
+
2857
+ /*
2858
+ * If we know the block isn't shared we can avoid
2859
+ * checking its backrefs.
2860
+ */
2861
+ if (btrfs_block_can_be_shared(root, eb))
2862
+ upper->checked = 0;
2863
+ else
2864
+ upper->checked = 1;
2865
+
2866
+ /*
2867
+ * Add the block to pending list if we need to check its
2868
+ * backrefs, we only do this once while walking up a
2869
+ * tree as we will catch anything else later on.
2870
+ */
2871
+ if (!upper->checked && need_check) {
2872
+ need_check = false;
2873
+ list_add_tail(&edge->list[UPPER],
2874
+ &cache->pending_edge);
2875
+ } else {
2876
+ if (upper->checked)
2877
+ need_check = true;
2878
+ INIT_LIST_HEAD(&edge->list[UPPER]);
2879
+ }
2880
+ } else {
2881
+ upper = rb_entry(rb_node, struct btrfs_backref_node,
2882
+ rb_node);
2883
+ ASSERT(upper->checked);
2884
+ INIT_LIST_HEAD(&edge->list[UPPER]);
2885
+ if (!upper->owner)
2886
+ upper->owner = btrfs_header_owner(eb);
2887
+ }
2888
+ btrfs_backref_link_edge(edge, lower, upper, LINK_LOWER);
2889
+
2890
+ if (rb_node) {
2891
+ btrfs_put_root(root);
2892
+ break;
2893
+ }
2894
+ lower = upper;
2895
+ upper = NULL;
2896
+ }
2897
+out:
2898
+ btrfs_release_path(path);
2899
+ return ret;
2900
+}
2901
+
2902
+/*
2903
+ * Add backref node @cur into @cache.
2904
+ *
2905
+ * NOTE: Even if the function returned 0, @cur is not yet cached as its upper
2906
+ * links aren't yet bi-directional. Needs to finish such links.
2907
+ * Use btrfs_backref_finish_upper_links() to finish such linkage.
2908
+ *
2909
+ * @path: Released path for indirect tree backref lookup
2910
+ * @iter: Released backref iter for extent tree search
2911
+ * @node_key: The first key of the tree block
2912
+ */
2913
+int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
2914
+ struct btrfs_path *path,
2915
+ struct btrfs_backref_iter *iter,
2916
+ struct btrfs_key *node_key,
2917
+ struct btrfs_backref_node *cur)
2918
+{
2919
+ struct btrfs_fs_info *fs_info = cache->fs_info;
2920
+ struct btrfs_backref_edge *edge;
2921
+ struct btrfs_backref_node *exist;
2922
+ int ret;
2923
+
2924
+ ret = btrfs_backref_iter_start(iter, cur->bytenr);
2925
+ if (ret < 0)
2926
+ return ret;
2927
+ /*
2928
+ * We skip the first btrfs_tree_block_info, as we don't use the key
2929
+ * stored in it, but fetch it from the tree block
2930
+ */
2931
+ if (btrfs_backref_has_tree_block_info(iter)) {
2932
+ ret = btrfs_backref_iter_next(iter);
2933
+ if (ret < 0)
2934
+ goto out;
2935
+ /* No extra backref? This means the tree block is corrupted */
2936
+ if (ret > 0) {
2937
+ ret = -EUCLEAN;
2938
+ goto out;
2939
+ }
2940
+ }
2941
+ WARN_ON(cur->checked);
2942
+ if (!list_empty(&cur->upper)) {
2943
+ /*
2944
+ * The backref was added previously when processing backref of
2945
+ * type BTRFS_TREE_BLOCK_REF_KEY
2946
+ */
2947
+ ASSERT(list_is_singular(&cur->upper));
2948
+ edge = list_entry(cur->upper.next, struct btrfs_backref_edge,
2949
+ list[LOWER]);
2950
+ ASSERT(list_empty(&edge->list[UPPER]));
2951
+ exist = edge->node[UPPER];
2952
+ /*
2953
+ * Add the upper level block to pending list if we need check
2954
+ * its backrefs
2955
+ */
2956
+ if (!exist->checked)
2957
+ list_add_tail(&edge->list[UPPER], &cache->pending_edge);
2958
+ } else {
2959
+ exist = NULL;
2960
+ }
2961
+
2962
+ for (; ret == 0; ret = btrfs_backref_iter_next(iter)) {
2963
+ struct extent_buffer *eb;
2964
+ struct btrfs_key key;
2965
+ int type;
2966
+
2967
+ cond_resched();
2968
+ eb = btrfs_backref_get_eb(iter);
2969
+
2970
+ key.objectid = iter->bytenr;
2971
+ if (btrfs_backref_iter_is_inline_ref(iter)) {
2972
+ struct btrfs_extent_inline_ref *iref;
2973
+
2974
+ /* Update key for inline backref */
2975
+ iref = (struct btrfs_extent_inline_ref *)
2976
+ ((unsigned long)iter->cur_ptr);
2977
+ type = btrfs_get_extent_inline_ref_type(eb, iref,
2978
+ BTRFS_REF_TYPE_BLOCK);
2979
+ if (type == BTRFS_REF_TYPE_INVALID) {
2980
+ ret = -EUCLEAN;
2981
+ goto out;
2982
+ }
2983
+ key.type = type;
2984
+ key.offset = btrfs_extent_inline_ref_offset(eb, iref);
2985
+ } else {
2986
+ key.type = iter->cur_key.type;
2987
+ key.offset = iter->cur_key.offset;
2988
+ }
2989
+
2990
+ /*
2991
+ * Parent node found and matches current inline ref, no need to
2992
+ * rebuild this node for this inline ref
2993
+ */
2994
+ if (exist &&
2995
+ ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
2996
+ exist->owner == key.offset) ||
2997
+ (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
2998
+ exist->bytenr == key.offset))) {
2999
+ exist = NULL;
3000
+ continue;
3001
+ }
3002
+
3003
+ /* SHARED_BLOCK_REF means key.offset is the parent bytenr */
3004
+ if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
3005
+ ret = handle_direct_tree_backref(cache, &key, cur);
3006
+ if (ret < 0)
3007
+ goto out;
3008
+ continue;
3009
+ } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
3010
+ ret = -EINVAL;
3011
+ btrfs_print_v0_err(fs_info);
3012
+ btrfs_handle_fs_error(fs_info, ret, NULL);
3013
+ goto out;
3014
+ } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
3015
+ continue;
3016
+ }
3017
+
3018
+ /*
3019
+ * key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref offset
3020
+ * means the root objectid. We need to search the tree to get
3021
+ * its parent bytenr.
3022
+ */
3023
+ ret = handle_indirect_tree_backref(cache, path, &key, node_key,
3024
+ cur);
3025
+ if (ret < 0)
3026
+ goto out;
3027
+ }
3028
+ ret = 0;
3029
+ cur->checked = 1;
3030
+ WARN_ON(exist);
3031
+out:
3032
+ btrfs_backref_iter_release(iter);
3033
+ return ret;
3034
+}
3035
+
3036
+/*
3037
+ * Finish the upwards linkage created by btrfs_backref_add_tree_node()
3038
+ */
3039
+int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
3040
+ struct btrfs_backref_node *start)
3041
+{
3042
+ struct list_head *useless_node = &cache->useless_node;
3043
+ struct btrfs_backref_edge *edge;
3044
+ struct rb_node *rb_node;
3045
+ LIST_HEAD(pending_edge);
3046
+
3047
+ ASSERT(start->checked);
3048
+
3049
+ /* Insert this node to cache if it's not COW-only */
3050
+ if (!start->cowonly) {
3051
+ rb_node = rb_simple_insert(&cache->rb_root, start->bytenr,
3052
+ &start->rb_node);
3053
+ if (rb_node)
3054
+ btrfs_backref_panic(cache->fs_info, start->bytenr,
3055
+ -EEXIST);
3056
+ list_add_tail(&start->lower, &cache->leaves);
3057
+ }
3058
+
3059
+ /*
3060
+ * Use breadth first search to iterate all related edges.
3061
+ *
3062
+ * The starting points are all the edges of this node
3063
+ */
3064
+ list_for_each_entry(edge, &start->upper, list[LOWER])
3065
+ list_add_tail(&edge->list[UPPER], &pending_edge);
3066
+
3067
+ while (!list_empty(&pending_edge)) {
3068
+ struct btrfs_backref_node *upper;
3069
+ struct btrfs_backref_node *lower;
3070
+
3071
+ edge = list_first_entry(&pending_edge,
3072
+ struct btrfs_backref_edge, list[UPPER]);
3073
+ list_del_init(&edge->list[UPPER]);
3074
+ upper = edge->node[UPPER];
3075
+ lower = edge->node[LOWER];
3076
+
3077
+ /* Parent is detached, no need to keep any edges */
3078
+ if (upper->detached) {
3079
+ list_del(&edge->list[LOWER]);
3080
+ btrfs_backref_free_edge(cache, edge);
3081
+
3082
+ /* Lower node is orphan, queue for cleanup */
3083
+ if (list_empty(&lower->upper))
3084
+ list_add(&lower->list, useless_node);
3085
+ continue;
3086
+ }
3087
+
3088
+ /*
3089
+ * All new nodes added in current build_backref_tree() haven't
3090
+ * been linked to the cache rb tree.
3091
+ * So if we have upper->rb_node populated, this means a cache
3092
+ * hit. We only need to link the edge, as @upper and all its
3093
+ * parents have already been linked.
3094
+ */
3095
+ if (!RB_EMPTY_NODE(&upper->rb_node)) {
3096
+ if (upper->lowest) {
3097
+ list_del_init(&upper->lower);
3098
+ upper->lowest = 0;
3099
+ }
3100
+
3101
+ list_add_tail(&edge->list[UPPER], &upper->lower);
3102
+ continue;
3103
+ }
3104
+
3105
+ /* Sanity check, we shouldn't have any unchecked nodes */
3106
+ if (!upper->checked) {
3107
+ ASSERT(0);
3108
+ return -EUCLEAN;
3109
+ }
3110
+
3111
+ /* Sanity check, COW-only node has non-COW-only parent */
3112
+ if (start->cowonly != upper->cowonly) {
3113
+ ASSERT(0);
3114
+ return -EUCLEAN;
3115
+ }
3116
+
3117
+ /* Only cache non-COW-only (subvolume trees) tree blocks */
3118
+ if (!upper->cowonly) {
3119
+ rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr,
3120
+ &upper->rb_node);
3121
+ if (rb_node) {
3122
+ btrfs_backref_panic(cache->fs_info,
3123
+ upper->bytenr, -EEXIST);
3124
+ return -EUCLEAN;
3125
+ }
3126
+ }
3127
+
3128
+ list_add_tail(&edge->list[UPPER], &upper->lower);
3129
+
3130
+ /*
3131
+ * Also queue all the parent edges of this uncached node
3132
+ * to finish the upper linkage
3133
+ */
3134
+ list_for_each_entry(edge, &upper->upper, list[LOWER])
3135
+ list_add_tail(&edge->list[UPPER], &pending_edge);
3136
+ }
3137
+ return 0;
3138
+}
3139
+
3140
+void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache,
3141
+ struct btrfs_backref_node *node)
3142
+{
3143
+ struct btrfs_backref_node *lower;
3144
+ struct btrfs_backref_node *upper;
3145
+ struct btrfs_backref_edge *edge;
3146
+
3147
+ while (!list_empty(&cache->useless_node)) {
3148
+ lower = list_first_entry(&cache->useless_node,
3149
+ struct btrfs_backref_node, list);
3150
+ list_del_init(&lower->list);
3151
+ }
3152
+ while (!list_empty(&cache->pending_edge)) {
3153
+ edge = list_first_entry(&cache->pending_edge,
3154
+ struct btrfs_backref_edge, list[UPPER]);
3155
+ list_del(&edge->list[UPPER]);
3156
+ list_del(&edge->list[LOWER]);
3157
+ lower = edge->node[LOWER];
3158
+ upper = edge->node[UPPER];
3159
+ btrfs_backref_free_edge(cache, edge);
3160
+
3161
+ /*
3162
+ * Lower is no longer linked to any upper backref nodes and
3163
+ * isn't in the cache, we can free it ourselves.
3164
+ */
3165
+ if (list_empty(&lower->upper) &&
3166
+ RB_EMPTY_NODE(&lower->rb_node))
3167
+ list_add(&lower->list, &cache->useless_node);
3168
+
3169
+ if (!RB_EMPTY_NODE(&upper->rb_node))
3170
+ continue;
3171
+
3172
+ /* Add this guy's upper edges to the list to process */
3173
+ list_for_each_entry(edge, &upper->upper, list[LOWER])
3174
+ list_add_tail(&edge->list[UPPER],
3175
+ &cache->pending_edge);
3176
+ if (list_empty(&upper->upper))
3177
+ list_add(&upper->list, &cache->useless_node);
3178
+ }
3179
+
3180
+ while (!list_empty(&cache->useless_node)) {
3181
+ lower = list_first_entry(&cache->useless_node,
3182
+ struct btrfs_backref_node, list);
3183
+ list_del_init(&lower->list);
3184
+ if (lower == node)
3185
+ node = NULL;
3186
+ btrfs_backref_drop_node(cache, lower);
3187
+ }
3188
+
3189
+ btrfs_backref_cleanup_node(cache, node);
3190
+ ASSERT(list_empty(&cache->useless_node) &&
3191
+ list_empty(&cache->pending_edge));
3192
+}