hc
2024-05-10 9999e48639b3cecb08ffb37358bcba3b48161b29
kernel/fs/btrfs/relocation.c
....@@ -9,6 +9,7 @@
99 #include <linux/blkdev.h>
1010 #include <linux/rbtree.h>
1111 #include <linux/slab.h>
12
+#include <linux/error-injection.h>
1213 #include "ctree.h"
1314 #include "disk-io.h"
1415 #include "transaction.h"
....@@ -20,101 +21,67 @@
2021 #include "inode-map.h"
2122 #include "qgroup.h"
2223 #include "print-tree.h"
24
+#include "delalloc-space.h"
25
+#include "block-group.h"
26
+#include "backref.h"
27
+#include "misc.h"
2328
2429 /*
25
- * backref_node, mapping_node and tree_block start with this
30
+ * Relocation overview
31
+ *
32
+ * [What does relocation do]
33
+ *
34
+ * The objective of relocation is to relocate all extents of the target block
35
+ * group to other block groups.
36
+ * This is utilized by resize (shrink only), profile converting, compacting
37
+ * space, or balance routine to spread chunks over devices.
38
+ *
39
+ * Before | After
40
+ * ------------------------------------------------------------------
41
+ * BG A: 10 data extents | BG A: deleted
42
+ * BG B: 2 data extents | BG B: 10 data extents (2 old + 8 relocated)
43
+ * BG C: 1 extents | BG C: 3 data extents (1 old + 2 relocated)
44
+ *
45
+ * [How does relocation work]
46
+ *
47
+ * 1. Mark the target block group read-only
48
+ * New extents won't be allocated from the target block group.
49
+ *
50
+ * 2.1 Record each extent in the target block group
51
+ * To build a proper map of extents to be relocated.
52
+ *
53
+ * 2.2 Build data reloc tree and reloc trees
54
+ * Data reloc tree will contain an inode, recording all newly relocated
55
+ * data extents.
56
+ * There will be only one data reloc tree for one data block group.
57
+ *
58
+ * Reloc tree will be a special snapshot of its source tree, containing
59
+ * relocated tree blocks.
60
+ * Each tree referring to a tree block in target block group will get its
61
+ * reloc tree built.
62
+ *
63
+ * 2.3 Swap source tree with its corresponding reloc tree
64
+ * Each involved tree only refers to new extents after swap.
65
+ *
66
+ * 3. Cleanup reloc trees and data reloc tree.
67
+ * As old extents in the target block group are still referenced by reloc
68
+ * trees, we need to clean them up before really freeing the target block
69
+ * group.
70
+ *
71
+ * The main complexity is in steps 2.2 and 2.3.
72
+ *
73
+ * The entry point of relocation is relocate_block_group() function.
2674 */
27
-struct tree_entry {
28
- struct rb_node rb_node;
29
- u64 bytenr;
30
-};
3175
32
-/*
33
- * present a tree block in the backref cache
34
- */
35
-struct backref_node {
36
- struct rb_node rb_node;
37
- u64 bytenr;
38
-
39
- u64 new_bytenr;
40
- /* objectid of tree block owner, can be not uptodate */
41
- u64 owner;
42
- /* link to pending, changed or detached list */
43
- struct list_head list;
44
- /* list of upper level blocks reference this block */
45
- struct list_head upper;
46
- /* list of child blocks in the cache */
47
- struct list_head lower;
48
- /* NULL if this node is not tree root */
49
- struct btrfs_root *root;
50
- /* extent buffer got by COW the block */
51
- struct extent_buffer *eb;
52
- /* level of tree block */
53
- unsigned int level:8;
54
- /* is the block in non-reference counted tree */
55
- unsigned int cowonly:1;
56
- /* 1 if no child node in the cache */
57
- unsigned int lowest:1;
58
- /* is the extent buffer locked */
59
- unsigned int locked:1;
60
- /* has the block been processed */
61
- unsigned int processed:1;
62
- /* have backrefs of this block been checked */
63
- unsigned int checked:1;
64
- /*
65
- * 1 if corresponding block has been cowed but some upper
66
- * level block pointers may not point to the new location
67
- */
68
- unsigned int pending:1;
69
- /*
70
- * 1 if the backref node isn't connected to any other
71
- * backref node.
72
- */
73
- unsigned int detached:1;
74
-};
75
-
76
-/*
77
- * present a block pointer in the backref cache
78
- */
79
-struct backref_edge {
80
- struct list_head list[2];
81
- struct backref_node *node[2];
82
-};
83
-
84
-#define LOWER 0
85
-#define UPPER 1
8676 #define RELOCATION_RESERVED_NODES 256
87
-
88
-struct backref_cache {
89
- /* red black tree of all backref nodes in the cache */
90
- struct rb_root rb_root;
91
- /* for passing backref nodes to btrfs_reloc_cow_block */
92
- struct backref_node *path[BTRFS_MAX_LEVEL];
93
- /*
94
- * list of blocks that have been cowed but some block
95
- * pointers in upper level blocks may not reflect the
96
- * new location
97
- */
98
- struct list_head pending[BTRFS_MAX_LEVEL];
99
- /* list of backref nodes with no child node */
100
- struct list_head leaves;
101
- /* list of blocks that have been cowed in current transaction */
102
- struct list_head changed;
103
- /* list of detached backref node. */
104
- struct list_head detached;
105
-
106
- u64 last_trans;
107
-
108
- int nr_nodes;
109
- int nr_edges;
110
-};
111
-
11277 /*
11378 * map address of tree root to tree
11479 */
11580 struct mapping_node {
116
- struct rb_node rb_node;
117
- u64 bytenr;
81
+ struct {
82
+ struct rb_node rb_node;
83
+ u64 bytenr;
84
+ }; /* Use rb_simle_node for search/insert */
11885 void *data;
11986 };
12087
....@@ -127,8 +94,10 @@
12794 * present a tree block to process
12895 */
12996 struct tree_block {
130
- struct rb_node rb_node;
131
- u64 bytenr;
97
+ struct {
98
+ struct rb_node rb_node;
99
+ u64 bytenr;
100
+ }; /* Use rb_simple_node for search/insert */
132101 struct btrfs_key key;
133102 unsigned int level:8;
134103 unsigned int key_ready:1;
....@@ -145,7 +114,7 @@
145114
146115 struct reloc_control {
147116 /* block group to relocate */
148
- struct btrfs_block_group_cache *block_group;
117
+ struct btrfs_block_group *block_group;
149118 /* extent tree */
150119 struct btrfs_root *extent_root;
151120 /* inode for moving data */
....@@ -153,7 +122,7 @@
153122
154123 struct btrfs_block_rsv *block_rsv;
155124
156
- struct backref_cache backref_cache;
125
+ struct btrfs_backref_cache backref_cache;
157126
158127 struct file_extent_cluster cluster;
159128 /* tree blocks have been processed */
....@@ -162,6 +131,8 @@
162131 struct mapping_tree reloc_root_tree;
163132 /* list of reloc trees */
164133 struct list_head reloc_roots;
134
+ /* list of subvolume trees that get relocated */
135
+ struct list_head dirty_subvol_roots;
165136 /* size of metadata reservation for merging reloc trees */
166137 u64 merging_rsv_size;
167138 /* size of relocated tree nodes */
....@@ -182,10 +153,21 @@
182153 #define MOVE_DATA_EXTENTS 0
183154 #define UPDATE_DATA_PTRS 1
184155
185
-static void remove_backref_node(struct backref_cache *cache,
186
- struct backref_node *node);
187
-static void __mark_block_processed(struct reloc_control *rc,
188
- struct backref_node *node);
156
+static void mark_block_processed(struct reloc_control *rc,
157
+ struct btrfs_backref_node *node)
158
+{
159
+ u32 blocksize;
160
+
161
+ if (node->level == 0 ||
162
+ in_range(node->bytenr, rc->block_group->start,
163
+ rc->block_group->length)) {
164
+ blocksize = rc->extent_root->fs_info->nodesize;
165
+ set_extent_bits(&rc->processed_blocks, node->bytenr,
166
+ node->bytenr + blocksize - 1, EXTENT_DIRTY);
167
+ }
168
+ node->processed = 1;
169
+}
170
+
189171
190172 static void mapping_tree_init(struct mapping_tree *tree)
191173 {
....@@ -193,156 +175,19 @@
193175 spin_lock_init(&tree->lock);
194176 }
195177
196
-static void backref_cache_init(struct backref_cache *cache)
197
-{
198
- int i;
199
- cache->rb_root = RB_ROOT;
200
- for (i = 0; i < BTRFS_MAX_LEVEL; i++)
201
- INIT_LIST_HEAD(&cache->pending[i]);
202
- INIT_LIST_HEAD(&cache->changed);
203
- INIT_LIST_HEAD(&cache->detached);
204
- INIT_LIST_HEAD(&cache->leaves);
205
-}
206
-
207
-static void backref_cache_cleanup(struct backref_cache *cache)
208
-{
209
- struct backref_node *node;
210
- int i;
211
-
212
- while (!list_empty(&cache->detached)) {
213
- node = list_entry(cache->detached.next,
214
- struct backref_node, list);
215
- remove_backref_node(cache, node);
216
- }
217
-
218
- while (!list_empty(&cache->leaves)) {
219
- node = list_entry(cache->leaves.next,
220
- struct backref_node, lower);
221
- remove_backref_node(cache, node);
222
- }
223
-
224
- cache->last_trans = 0;
225
-
226
- for (i = 0; i < BTRFS_MAX_LEVEL; i++)
227
- ASSERT(list_empty(&cache->pending[i]));
228
- ASSERT(list_empty(&cache->changed));
229
- ASSERT(list_empty(&cache->detached));
230
- ASSERT(RB_EMPTY_ROOT(&cache->rb_root));
231
- ASSERT(!cache->nr_nodes);
232
- ASSERT(!cache->nr_edges);
233
-}
234
-
235
-static struct backref_node *alloc_backref_node(struct backref_cache *cache)
236
-{
237
- struct backref_node *node;
238
-
239
- node = kzalloc(sizeof(*node), GFP_NOFS);
240
- if (node) {
241
- INIT_LIST_HEAD(&node->list);
242
- INIT_LIST_HEAD(&node->upper);
243
- INIT_LIST_HEAD(&node->lower);
244
- RB_CLEAR_NODE(&node->rb_node);
245
- cache->nr_nodes++;
246
- }
247
- return node;
248
-}
249
-
250
-static void free_backref_node(struct backref_cache *cache,
251
- struct backref_node *node)
252
-{
253
- if (node) {
254
- cache->nr_nodes--;
255
- kfree(node);
256
- }
257
-}
258
-
259
-static struct backref_edge *alloc_backref_edge(struct backref_cache *cache)
260
-{
261
- struct backref_edge *edge;
262
-
263
- edge = kzalloc(sizeof(*edge), GFP_NOFS);
264
- if (edge)
265
- cache->nr_edges++;
266
- return edge;
267
-}
268
-
269
-static void free_backref_edge(struct backref_cache *cache,
270
- struct backref_edge *edge)
271
-{
272
- if (edge) {
273
- cache->nr_edges--;
274
- kfree(edge);
275
- }
276
-}
277
-
278
-static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
279
- struct rb_node *node)
280
-{
281
- struct rb_node **p = &root->rb_node;
282
- struct rb_node *parent = NULL;
283
- struct tree_entry *entry;
284
-
285
- while (*p) {
286
- parent = *p;
287
- entry = rb_entry(parent, struct tree_entry, rb_node);
288
-
289
- if (bytenr < entry->bytenr)
290
- p = &(*p)->rb_left;
291
- else if (bytenr > entry->bytenr)
292
- p = &(*p)->rb_right;
293
- else
294
- return parent;
295
- }
296
-
297
- rb_link_node(node, parent, p);
298
- rb_insert_color(node, root);
299
- return NULL;
300
-}
301
-
302
-static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
303
-{
304
- struct rb_node *n = root->rb_node;
305
- struct tree_entry *entry;
306
-
307
- while (n) {
308
- entry = rb_entry(n, struct tree_entry, rb_node);
309
-
310
- if (bytenr < entry->bytenr)
311
- n = n->rb_left;
312
- else if (bytenr > entry->bytenr)
313
- n = n->rb_right;
314
- else
315
- return n;
316
- }
317
- return NULL;
318
-}
319
-
320
-static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr)
321
-{
322
-
323
- struct btrfs_fs_info *fs_info = NULL;
324
- struct backref_node *bnode = rb_entry(rb_node, struct backref_node,
325
- rb_node);
326
- if (bnode->root)
327
- fs_info = bnode->root->fs_info;
328
- btrfs_panic(fs_info, errno,
329
- "Inconsistency in backref cache found at offset %llu",
330
- bytenr);
331
-}
332
-
333178 /*
334179 * walk up backref nodes until reach node presents tree root
335180 */
336
-static struct backref_node *walk_up_backref(struct backref_node *node,
337
- struct backref_edge *edges[],
338
- int *index)
181
+static struct btrfs_backref_node *walk_up_backref(
182
+ struct btrfs_backref_node *node,
183
+ struct btrfs_backref_edge *edges[], int *index)
339184 {
340
- struct backref_edge *edge;
185
+ struct btrfs_backref_edge *edge;
341186 int idx = *index;
342187
343188 while (!list_empty(&node->upper)) {
344189 edge = list_entry(node->upper.next,
345
- struct backref_edge, list[LOWER]);
190
+ struct btrfs_backref_edge, list[LOWER]);
346191 edges[idx++] = edge;
347192 node = edge->node[UPPER];
348193 }
....@@ -354,11 +199,11 @@
354199 /*
355200 * walk down backref nodes to find start of next reference path
356201 */
357
-static struct backref_node *walk_down_backref(struct backref_edge *edges[],
358
- int *index)
202
+static struct btrfs_backref_node *walk_down_backref(
203
+ struct btrfs_backref_edge *edges[], int *index)
359204 {
360
- struct backref_edge *edge;
361
- struct backref_node *lower;
205
+ struct btrfs_backref_edge *edge;
206
+ struct btrfs_backref_node *lower;
362207 int idx = *index;
363208
364209 while (idx > 0) {
....@@ -369,7 +214,7 @@
369214 continue;
370215 }
371216 edge = list_entry(edge->list[LOWER].next,
372
- struct backref_edge, list[LOWER]);
217
+ struct btrfs_backref_edge, list[LOWER]);
373218 edges[idx - 1] = edge;
374219 *index = idx;
375220 return edge->node[UPPER];
....@@ -378,95 +223,24 @@
378223 return NULL;
379224 }
380225
381
-static void unlock_node_buffer(struct backref_node *node)
382
-{
383
- if (node->locked) {
384
- btrfs_tree_unlock(node->eb);
385
- node->locked = 0;
386
- }
387
-}
388
-
389
-static void drop_node_buffer(struct backref_node *node)
390
-{
391
- if (node->eb) {
392
- unlock_node_buffer(node);
393
- free_extent_buffer(node->eb);
394
- node->eb = NULL;
395
- }
396
-}
397
-
398
-static void drop_backref_node(struct backref_cache *tree,
399
- struct backref_node *node)
400
-{
401
- BUG_ON(!list_empty(&node->upper));
402
-
403
- drop_node_buffer(node);
404
- list_del(&node->list);
405
- list_del(&node->lower);
406
- if (!RB_EMPTY_NODE(&node->rb_node))
407
- rb_erase(&node->rb_node, &tree->rb_root);
408
- free_backref_node(tree, node);
409
-}
410
-
411
-/*
412
- * remove a backref node from the backref cache
413
- */
414
-static void remove_backref_node(struct backref_cache *cache,
415
- struct backref_node *node)
416
-{
417
- struct backref_node *upper;
418
- struct backref_edge *edge;
419
-
420
- if (!node)
421
- return;
422
-
423
- BUG_ON(!node->lowest && !node->detached);
424
- while (!list_empty(&node->upper)) {
425
- edge = list_entry(node->upper.next, struct backref_edge,
426
- list[LOWER]);
427
- upper = edge->node[UPPER];
428
- list_del(&edge->list[LOWER]);
429
- list_del(&edge->list[UPPER]);
430
- free_backref_edge(cache, edge);
431
-
432
- if (RB_EMPTY_NODE(&upper->rb_node)) {
433
- BUG_ON(!list_empty(&node->upper));
434
- drop_backref_node(cache, node);
435
- node = upper;
436
- node->lowest = 1;
437
- continue;
438
- }
439
- /*
440
- * add the node to leaf node list if no other
441
- * child block cached.
442
- */
443
- if (list_empty(&upper->lower)) {
444
- list_add_tail(&upper->lower, &cache->leaves);
445
- upper->lowest = 1;
446
- }
447
- }
448
-
449
- drop_backref_node(cache, node);
450
-}
451
-
452
-static void update_backref_node(struct backref_cache *cache,
453
- struct backref_node *node, u64 bytenr)
226
+static void update_backref_node(struct btrfs_backref_cache *cache,
227
+ struct btrfs_backref_node *node, u64 bytenr)
454228 {
455229 struct rb_node *rb_node;
456230 rb_erase(&node->rb_node, &cache->rb_root);
457231 node->bytenr = bytenr;
458
- rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
232
+ rb_node = rb_simple_insert(&cache->rb_root, node->bytenr, &node->rb_node);
459233 if (rb_node)
460
- backref_tree_panic(rb_node, -EEXIST, bytenr);
234
+ btrfs_backref_panic(cache->fs_info, bytenr, -EEXIST);
461235 }
462236
463237 /*
464238 * update backref cache after a transaction commit
465239 */
466240 static int update_backref_cache(struct btrfs_trans_handle *trans,
467
- struct backref_cache *cache)
241
+ struct btrfs_backref_cache *cache)
468242 {
469
- struct backref_node *node;
243
+ struct btrfs_backref_node *node;
470244 int level = 0;
471245
472246 if (cache->last_trans == 0) {
....@@ -484,13 +258,13 @@
484258 */
485259 while (!list_empty(&cache->detached)) {
486260 node = list_entry(cache->detached.next,
487
- struct backref_node, list);
488
- remove_backref_node(cache, node);
261
+ struct btrfs_backref_node, list);
262
+ btrfs_backref_cleanup_node(cache, node);
489263 }
490264
491265 while (!list_empty(&cache->changed)) {
492266 node = list_entry(cache->changed.next,
493
- struct backref_node, list);
267
+ struct btrfs_backref_node, list);
494268 list_del_init(&node->list);
495269 BUG_ON(node->pending);
496270 update_backref_node(cache, node, node->new_bytenr);
....@@ -513,13 +287,46 @@
513287 return 1;
514288 }
515289
290
+static bool reloc_root_is_dead(struct btrfs_root *root)
291
+{
292
+ /*
293
+ * Pair with set_bit/clear_bit in clean_dirty_subvols and
294
+ * btrfs_update_reloc_root. We need to see the updated bit before
295
+ * trying to access reloc_root
296
+ */
297
+ smp_rmb();
298
+ if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
299
+ return true;
300
+ return false;
301
+}
516302
517
-static int should_ignore_root(struct btrfs_root *root)
303
+/*
304
+ * Check if this subvolume tree has valid reloc tree.
305
+ *
306
+ * Reloc tree after swap is considered dead, thus not considered as valid.
307
+ * This is enough for most callers, as they don't distinguish dead reloc root
308
+ * from no reloc root. But btrfs_should_ignore_reloc_root() below is a
309
+ * special case.
310
+ */
311
+static bool have_reloc_root(struct btrfs_root *root)
312
+{
313
+ if (reloc_root_is_dead(root))
314
+ return false;
315
+ if (!root->reloc_root)
316
+ return false;
317
+ return true;
318
+}
319
+
320
+int btrfs_should_ignore_reloc_root(struct btrfs_root *root)
518321 {
519322 struct btrfs_root *reloc_root;
520323
521
- if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
324
+ if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
522325 return 0;
326
+
327
+ /* This root has been merged with its reloc tree, we can ignore it */
328
+ if (reloc_root_is_dead(root))
329
+ return 1;
523330
524331 reloc_root = root->reloc_root;
525332 if (!reloc_root)
....@@ -536,615 +343,187 @@
536343 */
537344 return 1;
538345 }
346
+
539347 /*
540348 * find reloc tree by address of tree root
541349 */
542
-static struct btrfs_root *find_reloc_root(struct reloc_control *rc,
543
- u64 bytenr)
350
+struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr)
544351 {
352
+ struct reloc_control *rc = fs_info->reloc_ctl;
545353 struct rb_node *rb_node;
546354 struct mapping_node *node;
547355 struct btrfs_root *root = NULL;
548356
357
+ ASSERT(rc);
549358 spin_lock(&rc->reloc_root_tree.lock);
550
- rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr);
359
+ rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root, bytenr);
551360 if (rb_node) {
552361 node = rb_entry(rb_node, struct mapping_node, rb_node);
553362 root = (struct btrfs_root *)node->data;
554363 }
555364 spin_unlock(&rc->reloc_root_tree.lock);
556
- return root;
557
-}
558
-
559
-static int is_cowonly_root(u64 root_objectid)
560
-{
561
- if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
562
- root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
563
- root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
564
- root_objectid == BTRFS_DEV_TREE_OBJECTID ||
565
- root_objectid == BTRFS_TREE_LOG_OBJECTID ||
566
- root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
567
- root_objectid == BTRFS_UUID_TREE_OBJECTID ||
568
- root_objectid == BTRFS_QUOTA_TREE_OBJECTID ||
569
- root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
570
- return 1;
571
- return 0;
572
-}
573
-
574
-static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
575
- u64 root_objectid)
576
-{
577
- struct btrfs_key key;
578
-
579
- key.objectid = root_objectid;
580
- key.type = BTRFS_ROOT_ITEM_KEY;
581
- if (is_cowonly_root(root_objectid))
582
- key.offset = 0;
583
- else
584
- key.offset = (u64)-1;
585
-
586
- return btrfs_get_fs_root(fs_info, &key, false);
587
-}
588
-
589
-static noinline_for_stack
590
-int find_inline_backref(struct extent_buffer *leaf, int slot,
591
- unsigned long *ptr, unsigned long *end)
592
-{
593
- struct btrfs_key key;
594
- struct btrfs_extent_item *ei;
595
- struct btrfs_tree_block_info *bi;
596
- u32 item_size;
597
-
598
- btrfs_item_key_to_cpu(leaf, &key, slot);
599
-
600
- item_size = btrfs_item_size_nr(leaf, slot);
601
- if (item_size < sizeof(*ei)) {
602
- btrfs_print_v0_err(leaf->fs_info);
603
- btrfs_handle_fs_error(leaf->fs_info, -EINVAL, NULL);
604
- return 1;
605
- }
606
- ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
607
- WARN_ON(!(btrfs_extent_flags(leaf, ei) &
608
- BTRFS_EXTENT_FLAG_TREE_BLOCK));
609
-
610
- if (key.type == BTRFS_EXTENT_ITEM_KEY &&
611
- item_size <= sizeof(*ei) + sizeof(*bi)) {
612
- WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
613
- return 1;
614
- }
615
- if (key.type == BTRFS_METADATA_ITEM_KEY &&
616
- item_size <= sizeof(*ei)) {
617
- WARN_ON(item_size < sizeof(*ei));
618
- return 1;
619
- }
620
-
621
- if (key.type == BTRFS_EXTENT_ITEM_KEY) {
622
- bi = (struct btrfs_tree_block_info *)(ei + 1);
623
- *ptr = (unsigned long)(bi + 1);
624
- } else {
625
- *ptr = (unsigned long)(ei + 1);
626
- }
627
- *end = (unsigned long)ei + item_size;
628
- return 0;
365
+ return btrfs_grab_root(root);
629366 }
630367
631368 /*
632
- * build backref tree for a given tree block. root of the backref tree
633
- * corresponds the tree block, leaves of the backref tree correspond
634
- * roots of b-trees that reference the tree block.
369
+ * For useless nodes, do two major clean ups:
635370 *
636
- * the basic idea of this function is check backrefs of a given block
637
- * to find upper level blocks that reference the block, and then check
638
- * backrefs of these upper level blocks recursively. the recursion stop
639
- * when tree root is reached or backrefs for the block is cached.
371
+ * - Cleanup the children edges and nodes
372
+ * If child node is also orphan (no parent) during cleanup, then the child
373
+ * node will also be cleaned up.
640374 *
641
- * NOTE: if we find backrefs for a block are cached, we know backrefs
642
- * for all upper level blocks that directly/indirectly reference the
643
- * block are also cached.
375
+ * - Freeing up leaves (level 0), keeps nodes detached
376
+ * For nodes, the node is still cached as "detached"
377
+ *
378
+ * Return false if @node is not in the @useless_nodes list.
379
+ * Return true if @node is in the @useless_nodes list.
644380 */
645
-static noinline_for_stack
646
-struct backref_node *build_backref_tree(struct reloc_control *rc,
647
- struct btrfs_key *node_key,
648
- int level, u64 bytenr)
381
+static bool handle_useless_nodes(struct reloc_control *rc,
382
+ struct btrfs_backref_node *node)
649383 {
650
- struct backref_cache *cache = &rc->backref_cache;
651
- struct btrfs_path *path1;
652
- struct btrfs_path *path2;
653
- struct extent_buffer *eb;
654
- struct btrfs_root *root;
655
- struct backref_node *cur;
656
- struct backref_node *upper;
657
- struct backref_node *lower;
658
- struct backref_node *node = NULL;
659
- struct backref_node *exist = NULL;
660
- struct backref_edge *edge;
661
- struct rb_node *rb_node;
662
- struct btrfs_key key;
663
- unsigned long end;
664
- unsigned long ptr;
665
- LIST_HEAD(list);
666
- LIST_HEAD(useless);
667
- int cowonly;
384
+ struct btrfs_backref_cache *cache = &rc->backref_cache;
385
+ struct list_head *useless_node = &cache->useless_node;
386
+ bool ret = false;
387
+
388
+ while (!list_empty(useless_node)) {
389
+ struct btrfs_backref_node *cur;
390
+
391
+ cur = list_first_entry(useless_node, struct btrfs_backref_node,
392
+ list);
393
+ list_del_init(&cur->list);
394
+
395
+ /* Only tree root nodes can be added to @useless_nodes */
396
+ ASSERT(list_empty(&cur->upper));
397
+
398
+ if (cur == node)
399
+ ret = true;
400
+
401
+ /* The node is the lowest node */
402
+ if (cur->lowest) {
403
+ list_del_init(&cur->lower);
404
+ cur->lowest = 0;
405
+ }
406
+
407
+ /* Cleanup the lower edges */
408
+ while (!list_empty(&cur->lower)) {
409
+ struct btrfs_backref_edge *edge;
410
+ struct btrfs_backref_node *lower;
411
+
412
+ edge = list_entry(cur->lower.next,
413
+ struct btrfs_backref_edge, list[UPPER]);
414
+ list_del(&edge->list[UPPER]);
415
+ list_del(&edge->list[LOWER]);
416
+ lower = edge->node[LOWER];
417
+ btrfs_backref_free_edge(cache, edge);
418
+
419
+ /* Child node is also orphan, queue for cleanup */
420
+ if (list_empty(&lower->upper))
421
+ list_add(&lower->list, useless_node);
422
+ }
423
+ /* Mark this block processed for relocation */
424
+ mark_block_processed(rc, cur);
425
+
426
+ /*
427
+ * Backref nodes for tree leaves are deleted from the cache.
428
+ * Backref nodes for upper level tree blocks are left in the
429
+ * cache to avoid unnecessary backref lookup.
430
+ */
431
+ if (cur->level > 0) {
432
+ list_add(&cur->list, &cache->detached);
433
+ cur->detached = 1;
434
+ } else {
435
+ rb_erase(&cur->rb_node, &cache->rb_root);
436
+ btrfs_backref_free_node(cache, cur);
437
+ }
438
+ }
439
+ return ret;
440
+}
441
+
442
+/*
443
+ * Build backref tree for a given tree block. Root of the backref tree
444
+ * corresponds the tree block, leaves of the backref tree correspond roots of
445
+ * b-trees that reference the tree block.
446
+ *
447
+ * The basic idea of this function is check backrefs of a given block to find
448
+ * upper level blocks that reference the block, and then check backrefs of
449
+ * these upper level blocks recursively. The recursion stops when tree root is
450
+ * reached or backrefs for the block is cached.
451
+ *
452
+ * NOTE: if we find that backrefs for a block are cached, we know backrefs for
453
+ * all upper level blocks that directly/indirectly reference the block are also
454
+ * cached.
455
+ */
456
+static noinline_for_stack struct btrfs_backref_node *build_backref_tree(
457
+ struct reloc_control *rc, struct btrfs_key *node_key,
458
+ int level, u64 bytenr)
459
+{
460
+ struct btrfs_backref_iter *iter;
461
+ struct btrfs_backref_cache *cache = &rc->backref_cache;
462
+ /* For searching parent of TREE_BLOCK_REF */
463
+ struct btrfs_path *path;
464
+ struct btrfs_backref_node *cur;
465
+ struct btrfs_backref_node *node = NULL;
466
+ struct btrfs_backref_edge *edge;
668467 int ret;
669468 int err = 0;
670
- bool need_check = true;
671469
672
- path1 = btrfs_alloc_path();
673
- path2 = btrfs_alloc_path();
674
- if (!path1 || !path2) {
470
+ iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info, GFP_NOFS);
471
+ if (!iter)
472
+ return ERR_PTR(-ENOMEM);
473
+ path = btrfs_alloc_path();
474
+ if (!path) {
675475 err = -ENOMEM;
676476 goto out;
677477 }
678
- path1->reada = READA_FORWARD;
679
- path2->reada = READA_FORWARD;
680478
681
- node = alloc_backref_node(cache);
479
+ node = btrfs_backref_alloc_node(cache, bytenr, level);
682480 if (!node) {
683481 err = -ENOMEM;
684482 goto out;
685483 }
686484
687
- node->bytenr = bytenr;
688
- node->level = level;
689485 node->lowest = 1;
690486 cur = node;
691
-again:
692
- end = 0;
693
- ptr = 0;
694
- key.objectid = cur->bytenr;
695
- key.type = BTRFS_METADATA_ITEM_KEY;
696
- key.offset = (u64)-1;
697487
698
- path1->search_commit_root = 1;
699
- path1->skip_locking = 1;
700
- ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1,
701
- 0, 0);
702
- if (ret < 0) {
703
- err = ret;
704
- goto out;
705
- }
706
- ASSERT(ret);
707
- ASSERT(path1->slots[0]);
708
-
709
- path1->slots[0]--;
710
-
711
- WARN_ON(cur->checked);
712
- if (!list_empty(&cur->upper)) {
713
- /*
714
- * the backref was added previously when processing
715
- * backref of type BTRFS_TREE_BLOCK_REF_KEY
716
- */
717
- ASSERT(list_is_singular(&cur->upper));
718
- edge = list_entry(cur->upper.next, struct backref_edge,
719
- list[LOWER]);
720
- ASSERT(list_empty(&edge->list[UPPER]));
721
- exist = edge->node[UPPER];
722
- /*
723
- * add the upper level block to pending list if we need
724
- * check its backrefs
725
- */
726
- if (!exist->checked)
727
- list_add_tail(&edge->list[UPPER], &list);
728
- } else {
729
- exist = NULL;
730
- }
731
-
732
- while (1) {
733
- cond_resched();
734
- eb = path1->nodes[0];
735
-
736
- if (ptr >= end) {
737
- if (path1->slots[0] >= btrfs_header_nritems(eb)) {
738
- ret = btrfs_next_leaf(rc->extent_root, path1);
739
- if (ret < 0) {
740
- err = ret;
741
- goto out;
742
- }
743
- if (ret > 0)
744
- break;
745
- eb = path1->nodes[0];
746
- }
747
-
748
- btrfs_item_key_to_cpu(eb, &key, path1->slots[0]);
749
- if (key.objectid != cur->bytenr) {
750
- WARN_ON(exist);
751
- break;
752
- }
753
-
754
- if (key.type == BTRFS_EXTENT_ITEM_KEY ||
755
- key.type == BTRFS_METADATA_ITEM_KEY) {
756
- ret = find_inline_backref(eb, path1->slots[0],
757
- &ptr, &end);
758
- if (ret)
759
- goto next;
760
- }
761
- }
762
-
763
- if (ptr < end) {
764
- /* update key for inline back ref */
765
- struct btrfs_extent_inline_ref *iref;
766
- int type;
767
- iref = (struct btrfs_extent_inline_ref *)ptr;
768
- type = btrfs_get_extent_inline_ref_type(eb, iref,
769
- BTRFS_REF_TYPE_BLOCK);
770
- if (type == BTRFS_REF_TYPE_INVALID) {
771
- err = -EUCLEAN;
772
- goto out;
773
- }
774
- key.type = type;
775
- key.offset = btrfs_extent_inline_ref_offset(eb, iref);
776
-
777
- WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY &&
778
- key.type != BTRFS_SHARED_BLOCK_REF_KEY);
779
- }
780
-
781
- if (exist &&
782
- ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
783
- exist->owner == key.offset) ||
784
- (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
785
- exist->bytenr == key.offset))) {
786
- exist = NULL;
787
- goto next;
788
- }
789
-
790
- if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
791
- if (key.objectid == key.offset) {
792
- /*
793
- * only root blocks of reloc trees use
794
- * backref of this type.
795
- */
796
- root = find_reloc_root(rc, cur->bytenr);
797
- ASSERT(root);
798
- cur->root = root;
799
- break;
800
- }
801
-
802
- edge = alloc_backref_edge(cache);
803
- if (!edge) {
804
- err = -ENOMEM;
805
- goto out;
806
- }
807
- rb_node = tree_search(&cache->rb_root, key.offset);
808
- if (!rb_node) {
809
- upper = alloc_backref_node(cache);
810
- if (!upper) {
811
- free_backref_edge(cache, edge);
812
- err = -ENOMEM;
813
- goto out;
814
- }
815
- upper->bytenr = key.offset;
816
- upper->level = cur->level + 1;
817
- /*
818
- * backrefs for the upper level block isn't
819
- * cached, add the block to pending list
820
- */
821
- list_add_tail(&edge->list[UPPER], &list);
822
- } else {
823
- upper = rb_entry(rb_node, struct backref_node,
824
- rb_node);
825
- ASSERT(upper->checked);
826
- INIT_LIST_HEAD(&edge->list[UPPER]);
827
- }
828
- list_add_tail(&edge->list[LOWER], &cur->upper);
829
- edge->node[LOWER] = cur;
830
- edge->node[UPPER] = upper;
831
-
832
- goto next;
833
- } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
834
- err = -EINVAL;
835
- btrfs_print_v0_err(rc->extent_root->fs_info);
836
- btrfs_handle_fs_error(rc->extent_root->fs_info, err,
837
- NULL);
838
- goto out;
839
- } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
840
- goto next;
841
- }
842
-
843
- /* key.type == BTRFS_TREE_BLOCK_REF_KEY */
844
- root = read_fs_root(rc->extent_root->fs_info, key.offset);
845
- if (IS_ERR(root)) {
846
- err = PTR_ERR(root);
847
- goto out;
848
- }
849
-
850
- if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
851
- cur->cowonly = 1;
852
-
853
- if (btrfs_root_level(&root->root_item) == cur->level) {
854
- /* tree root */
855
- ASSERT(btrfs_root_bytenr(&root->root_item) ==
856
- cur->bytenr);
857
- if (should_ignore_root(root))
858
- list_add(&cur->list, &useless);
859
- else
860
- cur->root = root;
861
- break;
862
- }
863
-
864
- level = cur->level + 1;
865
-
866
- /*
867
- * searching the tree to find upper level blocks
868
- * reference the block.
869
- */
870
- path2->search_commit_root = 1;
871
- path2->skip_locking = 1;
872
- path2->lowest_level = level;
873
- ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0);
874
- path2->lowest_level = 0;
488
+ /* Breadth-first search to build backref cache */
489
+ do {
490
+ ret = btrfs_backref_add_tree_node(cache, path, iter, node_key,
491
+ cur);
875492 if (ret < 0) {
876493 err = ret;
877494 goto out;
878495 }
879
- if (ret > 0 && path2->slots[level] > 0)
880
- path2->slots[level]--;
881
-
882
- eb = path2->nodes[level];
883
- if (btrfs_node_blockptr(eb, path2->slots[level]) !=
884
- cur->bytenr) {
885
- btrfs_err(root->fs_info,
886
- "couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)",
887
- cur->bytenr, level - 1, root->objectid,
888
- node_key->objectid, node_key->type,
889
- node_key->offset);
890
- err = -ENOENT;
891
- goto out;
496
+ edge = list_first_entry_or_null(&cache->pending_edge,
497
+ struct btrfs_backref_edge, list[UPPER]);
498
+ /*
499
+ * The pending list isn't empty, take the first block to
500
+ * process
501
+ */
502
+ if (edge) {
503
+ list_del_init(&edge->list[UPPER]);
504
+ cur = edge->node[UPPER];
892505 }
893
- lower = cur;
894
- need_check = true;
895
- for (; level < BTRFS_MAX_LEVEL; level++) {
896
- if (!path2->nodes[level]) {
897
- ASSERT(btrfs_root_bytenr(&root->root_item) ==
898
- lower->bytenr);
899
- if (should_ignore_root(root))
900
- list_add(&lower->list, &useless);
901
- else
902
- lower->root = root;
903
- break;
904
- }
506
+ } while (edge);
905507
906
- edge = alloc_backref_edge(cache);
907
- if (!edge) {
908
- err = -ENOMEM;
909
- goto out;
910
- }
911
-
912
- eb = path2->nodes[level];
913
- rb_node = tree_search(&cache->rb_root, eb->start);
914
- if (!rb_node) {
915
- upper = alloc_backref_node(cache);
916
- if (!upper) {
917
- free_backref_edge(cache, edge);
918
- err = -ENOMEM;
919
- goto out;
920
- }
921
- upper->bytenr = eb->start;
922
- upper->owner = btrfs_header_owner(eb);
923
- upper->level = lower->level + 1;
924
- if (!test_bit(BTRFS_ROOT_REF_COWS,
925
- &root->state))
926
- upper->cowonly = 1;
927
-
928
- /*
929
- * if we know the block isn't shared
930
- * we can void checking its backrefs.
931
- */
932
- if (btrfs_block_can_be_shared(root, eb))
933
- upper->checked = 0;
934
- else
935
- upper->checked = 1;
936
-
937
- /*
938
- * add the block to pending list if we
939
- * need check its backrefs, we only do this once
940
- * while walking up a tree as we will catch
941
- * anything else later on.
942
- */
943
- if (!upper->checked && need_check) {
944
- need_check = false;
945
- list_add_tail(&edge->list[UPPER],
946
- &list);
947
- } else {
948
- if (upper->checked)
949
- need_check = true;
950
- INIT_LIST_HEAD(&edge->list[UPPER]);
951
- }
952
- } else {
953
- upper = rb_entry(rb_node, struct backref_node,
954
- rb_node);
955
- ASSERT(upper->checked);
956
- INIT_LIST_HEAD(&edge->list[UPPER]);
957
- if (!upper->owner)
958
- upper->owner = btrfs_header_owner(eb);
959
- }
960
- list_add_tail(&edge->list[LOWER], &lower->upper);
961
- edge->node[LOWER] = lower;
962
- edge->node[UPPER] = upper;
963
-
964
- if (rb_node)
965
- break;
966
- lower = upper;
967
- upper = NULL;
968
- }
969
- btrfs_release_path(path2);
970
-next:
971
- if (ptr < end) {
972
- ptr += btrfs_extent_inline_ref_size(key.type);
973
- if (ptr >= end) {
974
- WARN_ON(ptr > end);
975
- ptr = 0;
976
- end = 0;
977
- }
978
- }
979
- if (ptr >= end)
980
- path1->slots[0]++;
981
- }
982
- btrfs_release_path(path1);
983
-
984
- cur->checked = 1;
985
- WARN_ON(exist);
986
-
987
- /* the pending list isn't empty, take the first block to process */
988
- if (!list_empty(&list)) {
989
- edge = list_entry(list.next, struct backref_edge, list[UPPER]);
990
- list_del_init(&edge->list[UPPER]);
991
- cur = edge->node[UPPER];
992
- goto again;
508
+ /* Finish the upper linkage of newly added edges/nodes */
509
+ ret = btrfs_backref_finish_upper_links(cache, node);
510
+ if (ret < 0) {
511
+ err = ret;
512
+ goto out;
993513 }
994514
995
- /*
996
- * everything goes well, connect backref nodes and insert backref nodes
997
- * into the cache.
998
- */
999
- ASSERT(node->checked);
1000
- cowonly = node->cowonly;
1001
- if (!cowonly) {
1002
- rb_node = tree_insert(&cache->rb_root, node->bytenr,
1003
- &node->rb_node);
1004
- if (rb_node)
1005
- backref_tree_panic(rb_node, -EEXIST, node->bytenr);
1006
- list_add_tail(&node->lower, &cache->leaves);
1007
- }
1008
-
1009
- list_for_each_entry(edge, &node->upper, list[LOWER])
1010
- list_add_tail(&edge->list[UPPER], &list);
1011
-
1012
- while (!list_empty(&list)) {
1013
- edge = list_entry(list.next, struct backref_edge, list[UPPER]);
1014
- list_del_init(&edge->list[UPPER]);
1015
- upper = edge->node[UPPER];
1016
- if (upper->detached) {
1017
- list_del(&edge->list[LOWER]);
1018
- lower = edge->node[LOWER];
1019
- free_backref_edge(cache, edge);
1020
- if (list_empty(&lower->upper))
1021
- list_add(&lower->list, &useless);
1022
- continue;
1023
- }
1024
-
1025
- if (!RB_EMPTY_NODE(&upper->rb_node)) {
1026
- if (upper->lowest) {
1027
- list_del_init(&upper->lower);
1028
- upper->lowest = 0;
1029
- }
1030
-
1031
- list_add_tail(&edge->list[UPPER], &upper->lower);
1032
- continue;
1033
- }
1034
-
1035
- if (!upper->checked) {
1036
- /*
1037
- * Still want to blow up for developers since this is a
1038
- * logic bug.
1039
- */
1040
- ASSERT(0);
1041
- err = -EINVAL;
1042
- goto out;
1043
- }
1044
- if (cowonly != upper->cowonly) {
1045
- ASSERT(0);
1046
- err = -EINVAL;
1047
- goto out;
1048
- }
1049
-
1050
- if (!cowonly) {
1051
- rb_node = tree_insert(&cache->rb_root, upper->bytenr,
1052
- &upper->rb_node);
1053
- if (rb_node)
1054
- backref_tree_panic(rb_node, -EEXIST,
1055
- upper->bytenr);
1056
- }
1057
-
1058
- list_add_tail(&edge->list[UPPER], &upper->lower);
1059
-
1060
- list_for_each_entry(edge, &upper->upper, list[LOWER])
1061
- list_add_tail(&edge->list[UPPER], &list);
1062
- }
1063
- /*
1064
- * process useless backref nodes. backref nodes for tree leaves
1065
- * are deleted from the cache. backref nodes for upper level
1066
- * tree blocks are left in the cache to avoid unnecessary backref
1067
- * lookup.
1068
- */
1069
- while (!list_empty(&useless)) {
1070
- upper = list_entry(useless.next, struct backref_node, list);
1071
- list_del_init(&upper->list);
1072
- ASSERT(list_empty(&upper->upper));
1073
- if (upper == node)
1074
- node = NULL;
1075
- if (upper->lowest) {
1076
- list_del_init(&upper->lower);
1077
- upper->lowest = 0;
1078
- }
1079
- while (!list_empty(&upper->lower)) {
1080
- edge = list_entry(upper->lower.next,
1081
- struct backref_edge, list[UPPER]);
1082
- list_del(&edge->list[UPPER]);
1083
- list_del(&edge->list[LOWER]);
1084
- lower = edge->node[LOWER];
1085
- free_backref_edge(cache, edge);
1086
-
1087
- if (list_empty(&lower->upper))
1088
- list_add(&lower->list, &useless);
1089
- }
1090
- __mark_block_processed(rc, upper);
1091
- if (upper->level > 0) {
1092
- list_add(&upper->list, &cache->detached);
1093
- upper->detached = 1;
1094
- } else {
1095
- rb_erase(&upper->rb_node, &cache->rb_root);
1096
- free_backref_node(cache, upper);
1097
- }
1098
- }
515
+ if (handle_useless_nodes(rc, node))
516
+ node = NULL;
1099517 out:
1100
- btrfs_free_path(path1);
1101
- btrfs_free_path(path2);
518
+ btrfs_backref_iter_free(iter);
519
+ btrfs_free_path(path);
1102520 if (err) {
1103
- while (!list_empty(&useless)) {
1104
- lower = list_entry(useless.next,
1105
- struct backref_node, list);
1106
- list_del_init(&lower->list);
1107
- }
1108
- while (!list_empty(&list)) {
1109
- edge = list_first_entry(&list, struct backref_edge,
1110
- list[UPPER]);
1111
- list_del(&edge->list[UPPER]);
1112
- list_del(&edge->list[LOWER]);
1113
- lower = edge->node[LOWER];
1114
- upper = edge->node[UPPER];
1115
- free_backref_edge(cache, edge);
1116
-
1117
- /*
1118
- * Lower is no longer linked to any upper backref nodes
1119
- * and isn't in the cache, we can free it ourselves.
1120
- */
1121
- if (list_empty(&lower->upper) &&
1122
- RB_EMPTY_NODE(&lower->rb_node))
1123
- list_add(&lower->list, &useless);
1124
-
1125
- if (!RB_EMPTY_NODE(&upper->rb_node))
1126
- continue;
1127
-
1128
- /* Add this guy's upper edges to the list to process */
1129
- list_for_each_entry(edge, &upper->upper, list[LOWER])
1130
- list_add_tail(&edge->list[UPPER], &list);
1131
- if (list_empty(&upper->upper))
1132
- list_add(&upper->list, &useless);
1133
- }
1134
-
1135
- while (!list_empty(&useless)) {
1136
- lower = list_entry(useless.next,
1137
- struct backref_node, list);
1138
- list_del_init(&lower->list);
1139
- if (lower == node)
1140
- node = NULL;
1141
- free_backref_node(cache, lower);
1142
- }
1143
-
1144
- remove_backref_node(cache, node);
521
+ btrfs_backref_error_cleanup(cache, node);
1145522 return ERR_PTR(err);
1146523 }
1147524 ASSERT(!node || !node->detached);
525
+ ASSERT(list_empty(&cache->useless_node) &&
526
+ list_empty(&cache->pending_edge));
1148527 return node;
1149528 }
1150529
....@@ -1159,19 +538,19 @@
1159538 struct btrfs_root *dest)
1160539 {
1161540 struct btrfs_root *reloc_root = src->reloc_root;
1162
- struct backref_cache *cache = &rc->backref_cache;
1163
- struct backref_node *node = NULL;
1164
- struct backref_node *new_node;
1165
- struct backref_edge *edge;
1166
- struct backref_edge *new_edge;
541
+ struct btrfs_backref_cache *cache = &rc->backref_cache;
542
+ struct btrfs_backref_node *node = NULL;
543
+ struct btrfs_backref_node *new_node;
544
+ struct btrfs_backref_edge *edge;
545
+ struct btrfs_backref_edge *new_edge;
1167546 struct rb_node *rb_node;
1168547
1169548 if (cache->last_trans > 0)
1170549 update_backref_cache(trans, cache);
1171550
1172
- rb_node = tree_search(&cache->rb_root, src->commit_root->start);
551
+ rb_node = rb_simple_search(&cache->rb_root, src->commit_root->start);
1173552 if (rb_node) {
1174
- node = rb_entry(rb_node, struct backref_node, rb_node);
553
+ node = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
1175554 if (node->detached)
1176555 node = NULL;
1177556 else
....@@ -1179,10 +558,10 @@
1179558 }
1180559
1181560 if (!node) {
1182
- rb_node = tree_search(&cache->rb_root,
1183
- reloc_root->commit_root->start);
561
+ rb_node = rb_simple_search(&cache->rb_root,
562
+ reloc_root->commit_root->start);
1184563 if (rb_node) {
1185
- node = rb_entry(rb_node, struct backref_node,
564
+ node = rb_entry(rb_node, struct btrfs_backref_node,
1186565 rb_node);
1187566 BUG_ON(node->detached);
1188567 }
....@@ -1191,35 +570,33 @@
1191570 if (!node)
1192571 return 0;
1193572
1194
- new_node = alloc_backref_node(cache);
573
+ new_node = btrfs_backref_alloc_node(cache, dest->node->start,
574
+ node->level);
1195575 if (!new_node)
1196576 return -ENOMEM;
1197577
1198
- new_node->bytenr = dest->node->start;
1199
- new_node->level = node->level;
1200578 new_node->lowest = node->lowest;
1201579 new_node->checked = 1;
1202
- new_node->root = dest;
580
+ new_node->root = btrfs_grab_root(dest);
581
+ ASSERT(new_node->root);
1203582
1204583 if (!node->lowest) {
1205584 list_for_each_entry(edge, &node->lower, list[UPPER]) {
1206
- new_edge = alloc_backref_edge(cache);
585
+ new_edge = btrfs_backref_alloc_edge(cache);
1207586 if (!new_edge)
1208587 goto fail;
1209588
1210
- new_edge->node[UPPER] = new_node;
1211
- new_edge->node[LOWER] = edge->node[LOWER];
1212
- list_add_tail(&new_edge->list[UPPER],
1213
- &new_node->lower);
589
+ btrfs_backref_link_edge(new_edge, edge->node[LOWER],
590
+ new_node, LINK_UPPER);
1214591 }
1215592 } else {
1216593 list_add_tail(&new_node->lower, &cache->leaves);
1217594 }
1218595
1219
- rb_node = tree_insert(&cache->rb_root, new_node->bytenr,
1220
- &new_node->rb_node);
596
+ rb_node = rb_simple_insert(&cache->rb_root, new_node->bytenr,
597
+ &new_node->rb_node);
1221598 if (rb_node)
1222
- backref_tree_panic(rb_node, -EEXIST, new_node->bytenr);
599
+ btrfs_backref_panic(trans->fs_info, new_node->bytenr, -EEXIST);
1223600
1224601 if (!new_node->lowest) {
1225602 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
....@@ -1231,11 +608,11 @@
1231608 fail:
1232609 while (!list_empty(&new_node->lower)) {
1233610 new_edge = list_entry(new_node->lower.next,
1234
- struct backref_edge, list[UPPER]);
611
+ struct btrfs_backref_edge, list[UPPER]);
1235612 list_del(&new_edge->list[UPPER]);
1236
- free_backref_edge(cache, new_edge);
613
+ btrfs_backref_free_edge(cache, new_edge);
1237614 }
1238
- free_backref_node(cache, new_node);
615
+ btrfs_backref_free_node(cache, new_node);
1239616 return -ENOMEM;
1240617 }
1241618
....@@ -1257,8 +634,8 @@
1257634 node->data = root;
1258635
1259636 spin_lock(&rc->reloc_root_tree.lock);
1260
- rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
1261
- node->bytenr, &node->rb_node);
637
+ rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
638
+ node->bytenr, &node->rb_node);
1262639 spin_unlock(&rc->reloc_root_tree.lock);
1263640 if (rb_node) {
1264641 btrfs_panic(fs_info, -EEXIST,
....@@ -1280,11 +657,12 @@
1280657 struct rb_node *rb_node;
1281658 struct mapping_node *node = NULL;
1282659 struct reloc_control *rc = fs_info->reloc_ctl;
660
+ bool put_ref = false;
1283661
1284662 if (rc && root->node) {
1285663 spin_lock(&rc->reloc_root_tree.lock);
1286
- rb_node = tree_search(&rc->reloc_root_tree.rb_root,
1287
- root->commit_root->start);
664
+ rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
665
+ root->commit_root->start);
1288666 if (rb_node) {
1289667 node = rb_entry(rb_node, struct mapping_node, rb_node);
1290668 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
....@@ -1294,9 +672,22 @@
1294672 ASSERT(!node || (struct btrfs_root *)node->data == root);
1295673 }
1296674
675
+ /*
676
+ * We only put the reloc root here if it's on the list. There's a lot
677
+ * of places where the pattern is to splice the rc->reloc_roots, process
678
+ * the reloc roots, and then add the reloc root back onto
679
+ * rc->reloc_roots. If we call __del_reloc_root while it's off of the
680
+ * list we don't want the reference being dropped, because the guy
681
+ * messing with the list is in charge of the reference.
682
+ */
1297683 spin_lock(&fs_info->trans_lock);
1298
- list_del_init(&root->root_list);
684
+ if (!list_empty(&root->root_list)) {
685
+ put_ref = true;
686
+ list_del_init(&root->root_list);
687
+ }
1299688 spin_unlock(&fs_info->trans_lock);
689
+ if (put_ref)
690
+ btrfs_put_root(root);
1300691 kfree(node);
1301692 }
1302693
....@@ -1312,8 +703,8 @@
1312703 struct reloc_control *rc = fs_info->reloc_ctl;
1313704
1314705 spin_lock(&rc->reloc_root_tree.lock);
1315
- rb_node = tree_search(&rc->reloc_root_tree.rb_root,
1316
- root->commit_root->start);
706
+ rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
707
+ root->commit_root->start);
1317708 if (rb_node) {
1318709 node = rb_entry(rb_node, struct mapping_node, rb_node);
1319710 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
....@@ -1326,11 +717,11 @@
1326717
1327718 spin_lock(&rc->reloc_root_tree.lock);
1328719 node->bytenr = root->node->start;
1329
- rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
1330
- node->bytenr, &node->rb_node);
720
+ rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
721
+ node->bytenr, &node->rb_node);
1331722 spin_unlock(&rc->reloc_root_tree.lock);
1332723 if (rb_node)
1333
- backref_tree_panic(rb_node, -EEXIST, node->bytenr);
724
+ btrfs_backref_panic(fs_info, node->bytenr, -EEXIST);
1334725 return 0;
1335726 }
1336727
....@@ -1342,10 +733,12 @@
1342733 struct extent_buffer *eb;
1343734 struct btrfs_root_item *root_item;
1344735 struct btrfs_key root_key;
1345
- int ret;
736
+ int ret = 0;
737
+ bool must_abort = false;
1346738
1347739 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
1348
- BUG_ON(!root_item);
740
+ if (!root_item)
741
+ return ERR_PTR(-ENOMEM);
1349742
1350743 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
1351744 root_key.type = BTRFS_ROOT_ITEM_KEY;
....@@ -1357,7 +750,9 @@
1357750 /* called by btrfs_init_reloc_root */
1358751 ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
1359752 BTRFS_TREE_RELOC_OBJECTID);
1360
- BUG_ON(ret);
753
+ if (ret)
754
+ goto fail;
755
+
1361756 /*
1362757 * Set the last_snapshot field to the generation of the commit
1363758 * root - like this ctree.c:btrfs_block_can_be_shared() behaves
....@@ -1378,8 +773,15 @@
1378773 */
1379774 ret = btrfs_copy_root(trans, root, root->node, &eb,
1380775 BTRFS_TREE_RELOC_OBJECTID);
1381
- BUG_ON(ret);
776
+ if (ret)
777
+ goto fail;
1382778 }
779
+
780
+ /*
781
+ * We have changed references at this point, we must abort the
782
+ * transaction if anything fails.
783
+ */
784
+ must_abort = true;
1383785
1384786 memcpy(root_item, &root->root_item, sizeof(*root_item));
1385787 btrfs_set_root_bytenr(root_item, eb->start);
....@@ -1398,18 +800,33 @@
1398800
1399801 ret = btrfs_insert_root(trans, fs_info->tree_root,
1400802 &root_key, root_item);
1401
- BUG_ON(ret);
803
+ if (ret)
804
+ goto fail;
805
+
1402806 kfree(root_item);
1403807
1404
- reloc_root = btrfs_read_fs_root(fs_info->tree_root, &root_key);
1405
- BUG_ON(IS_ERR(reloc_root));
808
+ reloc_root = btrfs_read_tree_root(fs_info->tree_root, &root_key);
809
+ if (IS_ERR(reloc_root)) {
810
+ ret = PTR_ERR(reloc_root);
811
+ goto abort;
812
+ }
813
+ set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
1406814 reloc_root->last_trans = trans->transid;
1407815 return reloc_root;
816
+fail:
817
+ kfree(root_item);
818
+abort:
819
+ if (must_abort)
820
+ btrfs_abort_transaction(trans, ret);
821
+ return ERR_PTR(ret);
1408822 }
1409823
1410824 /*
1411825 * create reloc tree for a given fs tree. reloc tree is just a
1412826 * snapshot of the fs tree with special root objectid.
827
+ *
828
+ * The reloc_root comes out of here with two references, one for
829
+ * root->reloc_root, and another for being on the rc->reloc_roots list.
1413830 */
1414831 int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
1415832 struct btrfs_root *root)
....@@ -1421,13 +838,35 @@
1421838 int clear_rsv = 0;
1422839 int ret;
1423840
841
+ if (!rc)
842
+ return 0;
843
+
844
+ /*
845
+ * The subvolume has reloc tree but the swap is finished, no need to
846
+ * create/update the dead reloc tree
847
+ */
848
+ if (reloc_root_is_dead(root))
849
+ return 0;
850
+
851
+ /*
852
+ * This is subtle but important. We do not do
853
+ * record_root_in_transaction for reloc roots, instead we record their
854
+ * corresponding fs root, and then here we update the last trans for the
855
+ * reloc root. This means that we have to do this for the entire life
856
+ * of the reloc root, regardless of which stage of the relocation we are
857
+ * in.
858
+ */
1424859 if (root->reloc_root) {
1425860 reloc_root = root->reloc_root;
1426861 reloc_root->last_trans = trans->transid;
1427862 return 0;
1428863 }
1429864
1430
- if (!rc || !rc->create_reloc_tree ||
865
+ /*
866
+ * We are merging reloc roots, we do not need new reloc trees. Also
867
+ * reloc trees never need their own reloc tree.
868
+ */
869
+ if (!rc->create_reloc_tree ||
1431870 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1432871 return 0;
1433872
....@@ -1442,7 +881,7 @@
1442881
1443882 ret = __add_reloc_root(reloc_root);
1444883 BUG_ON(ret < 0);
1445
- root->reloc_root = reloc_root;
884
+ root->reloc_root = btrfs_grab_root(reloc_root);
1446885 return 0;
1447886 }
1448887
....@@ -1457,15 +896,28 @@
1457896 struct btrfs_root_item *root_item;
1458897 int ret;
1459898
1460
- if (!root->reloc_root)
1461
- goto out;
899
+ if (!have_reloc_root(root))
900
+ return 0;
1462901
1463902 reloc_root = root->reloc_root;
1464903 root_item = &reloc_root->root_item;
1465904
905
+ /*
906
+ * We are probably ok here, but __del_reloc_root() will drop its ref of
907
+ * the root. We have the ref for root->reloc_root, but just in case
908
+ * hold it while we update the reloc root.
909
+ */
910
+ btrfs_grab_root(reloc_root);
911
+
912
+ /* root->reloc_root will stay until current relocation finished */
1466913 if (fs_info->reloc_ctl->merge_reloc_tree &&
1467914 btrfs_root_refs(root_item) == 0) {
1468
- root->reloc_root = NULL;
915
+ set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
916
+ /*
917
+ * Mark the tree as dead before we change reloc_root so
918
+ * have_reloc_root will not touch it from now on.
919
+ */
920
+ smp_wmb();
1469921 __del_reloc_root(reloc_root);
1470922 }
1471923
....@@ -1478,10 +930,8 @@
1478930
1479931 ret = btrfs_update_root(trans, fs_info->tree_root,
1480932 &reloc_root->root_key, root_item);
1481
- BUG_ON(ret);
1482
-
1483
-out:
1484
- return 0;
933
+ btrfs_put_root(reloc_root);
934
+ return ret;
1485935 }
1486936
1487937 /*
....@@ -1536,15 +986,6 @@
1536986 }
1537987 spin_unlock(&root->inode_lock);
1538988 return NULL;
1539
-}
1540
-
1541
-static int in_block_group(u64 bytenr,
1542
- struct btrfs_block_group_cache *block_group)
1543
-{
1544
- if (bytenr >= block_group->key.objectid &&
1545
- bytenr < block_group->key.objectid + block_group->key.offset)
1546
- return 1;
1547
- return 0;
1548989 }
1549990
1550991 /*
....@@ -1630,6 +1071,8 @@
16301071
16311072 nritems = btrfs_header_nritems(leaf);
16321073 for (i = 0; i < nritems; i++) {
1074
+ struct btrfs_ref ref = { 0 };
1075
+
16331076 cond_resched();
16341077 btrfs_item_key_to_cpu(leaf, &key, i);
16351078 if (key.type != BTRFS_EXTENT_DATA_KEY)
....@@ -1642,7 +1085,8 @@
16421085 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
16431086 if (bytenr == 0)
16441087 continue;
1645
- if (!in_block_group(bytenr, rc->block_group))
1088
+ if (!in_range(bytenr, rc->block_group->start,
1089
+ rc->block_group->length))
16461090 continue;
16471091
16481092 /*
....@@ -1690,18 +1134,23 @@
16901134 dirty = 1;
16911135
16921136 key.offset -= btrfs_file_extent_offset(leaf, fi);
1693
- ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
1694
- num_bytes, parent,
1695
- btrfs_header_owner(leaf),
1696
- key.objectid, key.offset);
1137
+ btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
1138
+ num_bytes, parent);
1139
+ ref.real_root = root->root_key.objectid;
1140
+ btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1141
+ key.objectid, key.offset);
1142
+ ret = btrfs_inc_extent_ref(trans, &ref);
16971143 if (ret) {
16981144 btrfs_abort_transaction(trans, ret);
16991145 break;
17001146 }
17011147
1702
- ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1703
- parent, btrfs_header_owner(leaf),
1704
- key.objectid, key.offset);
1148
+ btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
1149
+ num_bytes, parent);
1150
+ ref.real_root = root->root_key.objectid;
1151
+ btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1152
+ key.objectid, key.offset);
1153
+ ret = btrfs_free_extent(trans, &ref);
17051154 if (ret) {
17061155 btrfs_abort_transaction(trans, ret);
17071156 break;
....@@ -1735,7 +1184,7 @@
17351184 * errors, a negative error number is returned.
17361185 */
17371186 static noinline_for_stack
1738
-int replace_path(struct btrfs_trans_handle *trans,
1187
+int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
17391188 struct btrfs_root *dest, struct btrfs_root *src,
17401189 struct btrfs_path *path, struct btrfs_key *next_key,
17411190 int lowest_level, int max_level)
....@@ -1743,6 +1192,7 @@
17431192 struct btrfs_fs_info *fs_info = dest->fs_info;
17441193 struct extent_buffer *eb;
17451194 struct extent_buffer *parent;
1195
+ struct btrfs_ref ref = { 0 };
17461196 struct btrfs_key key;
17471197 u64 old_bytenr;
17481198 u64 new_bytenr;
....@@ -1764,7 +1214,7 @@
17641214 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
17651215
17661216 eb = btrfs_lock_root_node(dest);
1767
- btrfs_set_lock_blocking(eb);
1217
+ btrfs_set_lock_blocking_write(eb);
17681218 level = btrfs_header_level(eb);
17691219
17701220 if (level < lowest_level) {
....@@ -1774,10 +1224,11 @@
17741224 }
17751225
17761226 if (cow) {
1777
- ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb);
1227
+ ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb,
1228
+ BTRFS_NESTING_COW);
17781229 BUG_ON(ret);
17791230 }
1780
- btrfs_set_lock_blocking(eb);
1231
+ btrfs_set_lock_blocking_write(eb);
17811232
17821233 if (next_key) {
17831234 next_key->objectid = (u64)-1;
....@@ -1792,7 +1243,9 @@
17921243 level = btrfs_header_level(parent);
17931244 ASSERT(level >= lowest_level);
17941245
1795
- ret = btrfs_bin_search(parent, &key, level, &slot);
1246
+ ret = btrfs_bin_search(parent, &key, &slot);
1247
+ if (ret < 0)
1248
+ break;
17961249 if (ret && slot > 0)
17971250 slot--;
17981251
....@@ -1840,10 +1293,11 @@
18401293 btrfs_tree_lock(eb);
18411294 if (cow) {
18421295 ret = btrfs_cow_block(trans, dest, eb, parent,
1843
- slot, &eb);
1296
+ slot, &eb,
1297
+ BTRFS_NESTING_COW);
18441298 BUG_ON(ret);
18451299 }
1846
- btrfs_set_lock_blocking(eb);
1300
+ btrfs_set_lock_blocking_write(eb);
18471301
18481302 btrfs_tree_unlock(parent);
18491303 free_extent_buffer(parent);
....@@ -1876,20 +1330,18 @@
18761330 * If not traced, we will leak data numbers
18771331 * 2) Fs subtree
18781332 * If not traced, we will double count old data
1879
- * and tree block numbers, if current trans doesn't free
1880
- * data reloc tree inode.
1333
+ *
1334
+ * We don't scan the subtree right now, but only record
1335
+ * the swapped tree blocks.
1336
+ * The real subtree rescan is delayed until we have new
1337
+ * CoW on the subtree root node before transaction commit.
18811338 */
1882
- ret = btrfs_qgroup_trace_subtree(trans, parent,
1883
- btrfs_header_generation(parent),
1884
- btrfs_header_level(parent));
1339
+ ret = btrfs_qgroup_add_swapped_blocks(trans, dest,
1340
+ rc->block_group, parent, slot,
1341
+ path->nodes[level], path->slots[level],
1342
+ last_snapshot);
18851343 if (ret < 0)
18861344 break;
1887
- ret = btrfs_qgroup_trace_subtree(trans, path->nodes[level],
1888
- btrfs_header_generation(path->nodes[level]),
1889
- btrfs_header_level(path->nodes[level]));
1890
- if (ret < 0)
1891
- break;
1892
-
18931345 /*
18941346 * swap blocks in fs tree and reloc tree.
18951347 */
....@@ -1903,23 +1355,31 @@
19031355 path->slots[level], old_ptr_gen);
19041356 btrfs_mark_buffer_dirty(path->nodes[level]);
19051357
1906
- ret = btrfs_inc_extent_ref(trans, src, old_bytenr,
1907
- blocksize, path->nodes[level]->start,
1908
- src->root_key.objectid, level - 1, 0);
1358
+ btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr,
1359
+ blocksize, path->nodes[level]->start);
1360
+ ref.skip_qgroup = true;
1361
+ btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
1362
+ ret = btrfs_inc_extent_ref(trans, &ref);
19091363 BUG_ON(ret);
1910
- ret = btrfs_inc_extent_ref(trans, dest, new_bytenr,
1911
- blocksize, 0, dest->root_key.objectid,
1912
- level - 1, 0);
1364
+ btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
1365
+ blocksize, 0);
1366
+ ref.skip_qgroup = true;
1367
+ btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
1368
+ ret = btrfs_inc_extent_ref(trans, &ref);
19131369 BUG_ON(ret);
19141370
1915
- ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
1916
- path->nodes[level]->start,
1917
- src->root_key.objectid, level - 1, 0);
1371
+ btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr,
1372
+ blocksize, path->nodes[level]->start);
1373
+ btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
1374
+ ref.skip_qgroup = true;
1375
+ ret = btrfs_free_extent(trans, &ref);
19181376 BUG_ON(ret);
19191377
1920
- ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
1921
- 0, dest->root_key.objectid, level - 1,
1922
- 0);
1378
+ btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr,
1379
+ blocksize, 0);
1380
+ btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
1381
+ ref.skip_qgroup = true;
1382
+ ret = btrfs_free_extent(trans, &ref);
19231383 BUG_ON(ret);
19241384
19251385 btrfs_unlock_up_safe(path, 0);
....@@ -2117,6 +1577,81 @@
21171577 }
21181578
21191579 /*
1580
+ * Insert current subvolume into reloc_control::dirty_subvol_roots
1581
+ */
1582
+static void insert_dirty_subvol(struct btrfs_trans_handle *trans,
1583
+ struct reloc_control *rc,
1584
+ struct btrfs_root *root)
1585
+{
1586
+ struct btrfs_root *reloc_root = root->reloc_root;
1587
+ struct btrfs_root_item *reloc_root_item;
1588
+
1589
+ /* @root must be a subvolume tree root with a valid reloc tree */
1590
+ ASSERT(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1591
+ ASSERT(reloc_root);
1592
+
1593
+ reloc_root_item = &reloc_root->root_item;
1594
+ memset(&reloc_root_item->drop_progress, 0,
1595
+ sizeof(reloc_root_item->drop_progress));
1596
+ reloc_root_item->drop_level = 0;
1597
+ btrfs_set_root_refs(reloc_root_item, 0);
1598
+ btrfs_update_reloc_root(trans, root);
1599
+
1600
+ if (list_empty(&root->reloc_dirty_list)) {
1601
+ btrfs_grab_root(root);
1602
+ list_add_tail(&root->reloc_dirty_list, &rc->dirty_subvol_roots);
1603
+ }
1604
+}
1605
+
1606
+static int clean_dirty_subvols(struct reloc_control *rc)
1607
+{
1608
+ struct btrfs_root *root;
1609
+ struct btrfs_root *next;
1610
+ int ret = 0;
1611
+ int ret2;
1612
+
1613
+ list_for_each_entry_safe(root, next, &rc->dirty_subvol_roots,
1614
+ reloc_dirty_list) {
1615
+ if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1616
+ /* Merged subvolume, cleanup its reloc root */
1617
+ struct btrfs_root *reloc_root = root->reloc_root;
1618
+
1619
+ list_del_init(&root->reloc_dirty_list);
1620
+ root->reloc_root = NULL;
1621
+ /*
1622
+ * Need barrier to ensure clear_bit() only happens after
1623
+ * root->reloc_root = NULL. Pairs with have_reloc_root.
1624
+ */
1625
+ smp_wmb();
1626
+ clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
1627
+ if (reloc_root) {
1628
+ /*
1629
+ * btrfs_drop_snapshot drops our ref we hold for
1630
+ * ->reloc_root. If it fails however we must
1631
+ * drop the ref ourselves.
1632
+ */
1633
+ ret2 = btrfs_drop_snapshot(reloc_root, 0, 1);
1634
+ if (ret2 < 0) {
1635
+ btrfs_put_root(reloc_root);
1636
+ if (!ret)
1637
+ ret = ret2;
1638
+ }
1639
+ }
1640
+ btrfs_put_root(root);
1641
+ } else {
1642
+ /* Orphan reloc tree, just clean it up */
1643
+ ret2 = btrfs_drop_snapshot(root, 0, 1);
1644
+ if (ret2 < 0) {
1645
+ btrfs_put_root(root);
1646
+ if (!ret)
1647
+ ret = ret2;
1648
+ }
1649
+ }
1650
+ }
1651
+ return ret;
1652
+}
1653
+
1654
+/*
21201655 * merge the relocated tree blocks in reloc tree with corresponding
21211656 * fs tree.
21221657 */
....@@ -2124,7 +1659,6 @@
21241659 struct btrfs_root *root)
21251660 {
21261661 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2127
- LIST_HEAD(inode_list);
21281662 struct btrfs_key key;
21291663 struct btrfs_key next_key;
21301664 struct btrfs_trans_handle *trans = NULL;
....@@ -2132,6 +1666,7 @@
21321666 struct btrfs_root_item *root_item;
21331667 struct btrfs_path *path;
21341668 struct extent_buffer *leaf;
1669
+ int reserve_level;
21351670 int level;
21361671 int max_level;
21371672 int replaced = 0;
....@@ -2149,7 +1684,7 @@
21491684
21501685 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
21511686 level = btrfs_root_level(root_item);
2152
- extent_buffer_get(reloc_root->node);
1687
+ atomic_inc(&reloc_root->node->refs);
21531688 path->nodes[level] = reloc_root->node;
21541689 path->slots[level] = 0;
21551690 } else {
....@@ -2172,12 +1707,21 @@
21721707 btrfs_unlock_up_safe(path, 0);
21731708 }
21741709
2175
- min_reserved = fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
1710
+ /*
1711
+ * In merge_reloc_root(), we modify the upper level pointer to swap the
1712
+ * tree blocks between reloc tree and subvolume tree. Thus for tree
1713
+ * block COW, we COW at most from level 1 to root level for each tree.
1714
+ *
1715
+ * Thus the needed metadata size is at most root_level * nodesize,
1716
+ * and * 2 since we have two trees to COW.
1717
+ */
1718
+ reserve_level = max_t(int, 1, btrfs_root_level(root_item));
1719
+ min_reserved = fs_info->nodesize * reserve_level * 2;
21761720 memset(&next_key, 0, sizeof(next_key));
21771721
21781722 while (1) {
21791723 ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
2180
- BTRFS_RESERVE_FLUSH_ALL);
1724
+ BTRFS_RESERVE_FLUSH_LIMIT);
21811725 if (ret) {
21821726 err = ret;
21831727 goto out;
....@@ -2188,6 +1732,18 @@
21881732 trans = NULL;
21891733 goto out;
21901734 }
1735
+
1736
+ /*
1737
+ * At this point we no longer have a reloc_control, so we can't
1738
+ * depend on btrfs_init_reloc_root to update our last_trans.
1739
+ *
1740
+ * But that's ok, we started the trans handle on our
1741
+ * corresponding fs_root, which means it's been added to the
1742
+ * dirty list. At commit time we'll still call
1743
+ * btrfs_update_reloc_root() and update our root item
1744
+ * appropriately.
1745
+ */
1746
+ reloc_root->last_trans = trans->transid;
21911747 trans->block_rsv = rc->block_rsv;
21921748
21931749 replaced = 0;
....@@ -2205,7 +1761,7 @@
22051761 btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
22061762 ret = 0;
22071763 } else {
2208
- ret = replace_path(trans, root, reloc_root, path,
1764
+ ret = replace_path(trans, rc, root, reloc_root, path,
22091765 &next_key, level, max_level);
22101766 }
22111767 if (ret < 0) {
....@@ -2247,7 +1803,8 @@
22471803 * relocated and the block is tree root.
22481804 */
22491805 leaf = btrfs_lock_root_node(root);
2250
- ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf);
1806
+ ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf,
1807
+ BTRFS_NESTING_COW);
22511808 btrfs_tree_unlock(leaf);
22521809 free_extent_buffer(leaf);
22531810 if (ret < 0)
....@@ -2255,13 +1812,8 @@
22551812 out:
22561813 btrfs_free_path(path);
22571814
2258
- if (err == 0) {
2259
- memset(&root_item->drop_progress, 0,
2260
- sizeof(root_item->drop_progress));
2261
- root_item->drop_level = 0;
2262
- btrfs_set_root_refs(root_item, 0);
2263
- btrfs_update_reloc_root(trans, root);
2264
- }
1815
+ if (err == 0)
1816
+ insert_dirty_subvol(trans, rc, root);
22651817
22661818 if (trans)
22671819 btrfs_end_transaction_throttle(trans);
....@@ -2303,7 +1855,7 @@
23031855 if (IS_ERR(trans)) {
23041856 if (!err)
23051857 btrfs_block_rsv_release(fs_info, rc->block_rsv,
2306
- num_bytes);
1858
+ num_bytes, NULL);
23071859 return PTR_ERR(trans);
23081860 }
23091861
....@@ -2311,7 +1863,7 @@
23111863 if (num_bytes != rc->merging_rsv_size) {
23121864 btrfs_end_transaction(trans);
23131865 btrfs_block_rsv_release(fs_info, rc->block_rsv,
2314
- num_bytes);
1866
+ num_bytes, NULL);
23151867 goto again;
23161868 }
23171869 }
....@@ -2323,7 +1875,8 @@
23231875 struct btrfs_root, root_list);
23241876 list_del_init(&reloc_root->root_list);
23251877
2326
- root = read_fs_root(fs_info, reloc_root->root_key.offset);
1878
+ root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
1879
+ false);
23271880 BUG_ON(IS_ERR(root));
23281881 BUG_ON(root->reloc_root != reloc_root);
23291882
....@@ -2336,12 +1889,13 @@
23361889 btrfs_update_reloc_root(trans, root);
23371890
23381891 list_add(&reloc_root->root_list, &reloc_roots);
1892
+ btrfs_put_root(root);
23391893 }
23401894
23411895 list_splice(&reloc_roots, &rc->reloc_roots);
23421896
23431897 if (!err)
2344
- btrfs_commit_transaction(trans);
1898
+ err = btrfs_commit_transaction(trans);
23451899 else
23461900 btrfs_end_transaction(trans);
23471901 return err;
....@@ -2350,17 +1904,10 @@
23501904 static noinline_for_stack
23511905 void free_reloc_roots(struct list_head *list)
23521906 {
2353
- struct btrfs_root *reloc_root;
1907
+ struct btrfs_root *reloc_root, *tmp;
23541908
2355
- while (!list_empty(list)) {
2356
- reloc_root = list_entry(list->next, struct btrfs_root,
2357
- root_list);
1909
+ list_for_each_entry_safe(reloc_root, tmp, list, root_list)
23581910 __del_reloc_root(reloc_root);
2359
- free_extent_buffer(reloc_root->node);
2360
- free_extent_buffer(reloc_root->commit_root);
2361
- reloc_root->node = NULL;
2362
- reloc_root->commit_root = NULL;
2363
- }
23641911 }
23651912
23661913 static noinline_for_stack
....@@ -2390,13 +1937,13 @@
23901937 reloc_root = list_entry(reloc_roots.next,
23911938 struct btrfs_root, root_list);
23921939
1940
+ root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
1941
+ false);
23931942 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
2394
- root = read_fs_root(fs_info,
2395
- reloc_root->root_key.offset);
23961943 BUG_ON(IS_ERR(root));
23971944 BUG_ON(root->reloc_root != reloc_root);
2398
-
23991945 ret = merge_reloc_root(rc, root);
1946
+ btrfs_put_root(root);
24001947 if (ret) {
24011948 if (list_empty(&reloc_root->root_list))
24021949 list_add_tail(&reloc_root->root_list,
....@@ -2404,15 +1951,20 @@
24041951 goto out;
24051952 }
24061953 } else {
2407
- list_del_init(&reloc_root->root_list);
2408
- }
1954
+ if (!IS_ERR(root)) {
1955
+ if (root->reloc_root == reloc_root) {
1956
+ root->reloc_root = NULL;
1957
+ btrfs_put_root(reloc_root);
1958
+ }
1959
+ clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE,
1960
+ &root->state);
1961
+ btrfs_put_root(root);
1962
+ }
24091963
2410
- ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
2411
- if (ret < 0) {
2412
- if (list_empty(&reloc_root->root_list))
2413
- list_add_tail(&reloc_root->root_list,
2414
- &reloc_roots);
2415
- goto out;
1964
+ list_del_init(&reloc_root->root_list);
1965
+ /* Don't forget to queue this reloc root for cleanup */
1966
+ list_add_tail(&reloc_root->reloc_dirty_list,
1967
+ &rc->dirty_subvol_roots);
24161968 }
24171969 }
24181970
....@@ -2423,15 +1975,13 @@
24231975 out:
24241976 if (ret) {
24251977 btrfs_handle_fs_error(fs_info, ret, NULL);
2426
- if (!list_empty(&reloc_roots))
2427
- free_reloc_roots(&reloc_roots);
1978
+ free_reloc_roots(&reloc_roots);
24281979
24291980 /* new reloc root may be added */
24301981 mutex_lock(&fs_info->reloc_mutex);
24311982 list_splice_init(&rc->reloc_roots, &reloc_roots);
24321983 mutex_unlock(&fs_info->reloc_mutex);
2433
- if (!list_empty(&reloc_roots))
2434
- free_reloc_roots(&reloc_roots);
1984
+ free_reloc_roots(&reloc_roots);
24351985 }
24361986
24371987 /*
....@@ -2467,24 +2017,27 @@
24672017 {
24682018 struct btrfs_fs_info *fs_info = reloc_root->fs_info;
24692019 struct btrfs_root *root;
2020
+ int ret;
24702021
24712022 if (reloc_root->last_trans == trans->transid)
24722023 return 0;
24732024
2474
- root = read_fs_root(fs_info, reloc_root->root_key.offset);
2025
+ root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset, false);
24752026 BUG_ON(IS_ERR(root));
24762027 BUG_ON(root->reloc_root != reloc_root);
2028
+ ret = btrfs_record_root_in_trans(trans, root);
2029
+ btrfs_put_root(root);
24772030
2478
- return btrfs_record_root_in_trans(trans, root);
2031
+ return ret;
24792032 }
24802033
24812034 static noinline_for_stack
24822035 struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
24832036 struct reloc_control *rc,
2484
- struct backref_node *node,
2485
- struct backref_edge *edges[])
2037
+ struct btrfs_backref_node *node,
2038
+ struct btrfs_backref_edge *edges[])
24862039 {
2487
- struct backref_node *next;
2040
+ struct btrfs_backref_node *next;
24882041 struct btrfs_root *root;
24892042 int index = 0;
24902043
....@@ -2494,7 +2047,7 @@
24942047 next = walk_up_backref(next, edges, &index);
24952048 root = next->root;
24962049 BUG_ON(!root);
2497
- BUG_ON(!test_bit(BTRFS_ROOT_REF_COWS, &root->state));
2050
+ BUG_ON(!test_bit(BTRFS_ROOT_SHAREABLE, &root->state));
24982051
24992052 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
25002053 record_reloc_root_in_trans(trans, root);
....@@ -2508,10 +2061,12 @@
25082061 BUG_ON(next->new_bytenr);
25092062 BUG_ON(!list_empty(&next->list));
25102063 next->new_bytenr = root->node->start;
2511
- next->root = root;
2064
+ btrfs_put_root(next->root);
2065
+ next->root = btrfs_grab_root(root);
2066
+ ASSERT(next->root);
25122067 list_add_tail(&next->list,
25132068 &rc->backref_cache.changed);
2514
- __mark_block_processed(rc, next);
2069
+ mark_block_processed(rc, next);
25152070 break;
25162071 }
25172072
....@@ -2536,18 +2091,21 @@
25362091 }
25372092
25382093 /*
2539
- * select a tree root for relocation. return NULL if the block
2540
- * is reference counted. we should use do_relocation() in this
2541
- * case. return a tree root pointer if the block isn't reference
2542
- * counted. return -ENOENT if the block is root of reloc tree.
2094
+ * Select a tree root for relocation.
2095
+ *
2096
+ * Return NULL if the block is not shareable. We should use do_relocation() in
2097
+ * this case.
2098
+ *
2099
+ * Return a tree root pointer if the block is shareable.
2100
+ * Return -ENOENT if the block is root of reloc tree.
25432101 */
25442102 static noinline_for_stack
2545
-struct btrfs_root *select_one_root(struct backref_node *node)
2103
+struct btrfs_root *select_one_root(struct btrfs_backref_node *node)
25462104 {
2547
- struct backref_node *next;
2105
+ struct btrfs_backref_node *next;
25482106 struct btrfs_root *root;
25492107 struct btrfs_root *fs_root = NULL;
2550
- struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2108
+ struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
25512109 int index = 0;
25522110
25532111 next = node;
....@@ -2557,8 +2115,8 @@
25572115 root = next->root;
25582116 BUG_ON(!root);
25592117
2560
- /* no other choice for non-references counted tree */
2561
- if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
2118
+ /* No other choice for non-shareable tree */
2119
+ if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
25622120 return root;
25632121
25642122 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)
....@@ -2579,12 +2137,12 @@
25792137
25802138 static noinline_for_stack
25812139 u64 calcu_metadata_size(struct reloc_control *rc,
2582
- struct backref_node *node, int reserve)
2140
+ struct btrfs_backref_node *node, int reserve)
25832141 {
25842142 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2585
- struct backref_node *next = node;
2586
- struct backref_edge *edge;
2587
- struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2143
+ struct btrfs_backref_node *next = node;
2144
+ struct btrfs_backref_edge *edge;
2145
+ struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
25882146 u64 num_bytes = 0;
25892147 int index = 0;
25902148
....@@ -2602,7 +2160,7 @@
26022160 break;
26032161
26042162 edge = list_entry(next->upper.next,
2605
- struct backref_edge, list[LOWER]);
2163
+ struct btrfs_backref_edge, list[LOWER]);
26062164 edges[index++] = edge;
26072165 next = edge->node[UPPER];
26082166 }
....@@ -2613,7 +2171,7 @@
26132171
26142172 static int reserve_metadata_space(struct btrfs_trans_handle *trans,
26152173 struct reloc_control *rc,
2616
- struct backref_node *node)
2174
+ struct btrfs_backref_node *node)
26172175 {
26182176 struct btrfs_root *root = rc->extent_root;
26192177 struct btrfs_fs_info *fs_info = root->fs_info;
....@@ -2641,7 +2199,7 @@
26412199 * only one thread can access block_rsv at this point,
26422200 * so we don't need hold lock to protect block_rsv.
26432201 * we expand more reservation size here to allow enough
2644
- * space for relocation and we will return eailer in
2202
+ * space for relocation and we will return earlier in
26452203 * enospc case.
26462204 */
26472205 rc->block_rsv->size = tmp + fs_info->nodesize *
....@@ -2661,14 +2219,14 @@
26612219 */
26622220 static int do_relocation(struct btrfs_trans_handle *trans,
26632221 struct reloc_control *rc,
2664
- struct backref_node *node,
2222
+ struct btrfs_backref_node *node,
26652223 struct btrfs_key *key,
26662224 struct btrfs_path *path, int lowest)
26672225 {
26682226 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2669
- struct backref_node *upper;
2670
- struct backref_edge *edge;
2671
- struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2227
+ struct btrfs_backref_node *upper;
2228
+ struct btrfs_backref_edge *edge;
2229
+ struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
26722230 struct btrfs_root *root;
26732231 struct extent_buffer *eb;
26742232 u32 blocksize;
....@@ -2684,6 +2242,7 @@
26842242 rc->backref_cache.path[node->level] = node;
26852243 list_for_each_entry(edge, &node->upper, list[LOWER]) {
26862244 struct btrfs_key first_key;
2245
+ struct btrfs_ref ref = { 0 };
26872246
26882247 cond_resched();
26892248
....@@ -2693,14 +2252,17 @@
26932252
26942253 if (upper->eb && !upper->locked) {
26952254 if (!lowest) {
2696
- ret = btrfs_bin_search(upper->eb, key,
2697
- upper->level, &slot);
2255
+ ret = btrfs_bin_search(upper->eb, key, &slot);
2256
+ if (ret < 0) {
2257
+ err = ret;
2258
+ goto next;
2259
+ }
26982260 BUG_ON(ret);
26992261 bytenr = btrfs_node_blockptr(upper->eb, slot);
27002262 if (node->eb->start == bytenr)
27012263 goto next;
27022264 }
2703
- drop_node_buffer(upper);
2265
+ btrfs_backref_drop_node_buffer(upper);
27042266 }
27052267
27062268 if (!upper->eb) {
....@@ -2728,8 +2290,11 @@
27282290 slot = path->slots[upper->level];
27292291 btrfs_release_path(path);
27302292 } else {
2731
- ret = btrfs_bin_search(upper->eb, key, upper->level,
2732
- &slot);
2293
+ ret = btrfs_bin_search(upper->eb, key, &slot);
2294
+ if (ret < 0) {
2295
+ err = ret;
2296
+ goto next;
2297
+ }
27332298 BUG_ON(ret);
27342299 }
27352300
....@@ -2762,11 +2327,11 @@
27622327 goto next;
27632328 }
27642329 btrfs_tree_lock(eb);
2765
- btrfs_set_lock_blocking(eb);
2330
+ btrfs_set_lock_blocking_write(eb);
27662331
27672332 if (!node->eb) {
27682333 ret = btrfs_cow_block(trans, root, eb, upper->eb,
2769
- slot, &eb);
2334
+ slot, &eb, BTRFS_NESTING_COW);
27702335 btrfs_tree_unlock(eb);
27712336 free_extent_buffer(eb);
27722337 if (ret < 0) {
....@@ -2781,11 +2346,13 @@
27812346 trans->transid);
27822347 btrfs_mark_buffer_dirty(upper->eb);
27832348
2784
- ret = btrfs_inc_extent_ref(trans, root,
2785
- node->eb->start, blocksize,
2786
- upper->eb->start,
2787
- btrfs_header_owner(upper->eb),
2788
- node->level, 0);
2349
+ btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
2350
+ node->eb->start, blocksize,
2351
+ upper->eb->start);
2352
+ ref.real_root = root->root_key.objectid;
2353
+ btrfs_init_tree_ref(&ref, node->level,
2354
+ btrfs_header_owner(upper->eb));
2355
+ ret = btrfs_inc_extent_ref(trans, &ref);
27892356 BUG_ON(ret);
27902357
27912358 ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
....@@ -2793,15 +2360,15 @@
27932360 }
27942361 next:
27952362 if (!upper->pending)
2796
- drop_node_buffer(upper);
2363
+ btrfs_backref_drop_node_buffer(upper);
27972364 else
2798
- unlock_node_buffer(upper);
2365
+ btrfs_backref_unlock_node_buffer(upper);
27992366 if (err)
28002367 break;
28012368 }
28022369
28032370 if (!err && node->pending) {
2804
- drop_node_buffer(node);
2371
+ btrfs_backref_drop_node_buffer(node);
28052372 list_move_tail(&node->list, &rc->backref_cache.changed);
28062373 node->pending = 0;
28072374 }
....@@ -2813,7 +2380,7 @@
28132380
28142381 static int link_to_upper(struct btrfs_trans_handle *trans,
28152382 struct reloc_control *rc,
2816
- struct backref_node *node,
2383
+ struct btrfs_backref_node *node,
28172384 struct btrfs_path *path)
28182385 {
28192386 struct btrfs_key key;
....@@ -2827,15 +2394,15 @@
28272394 struct btrfs_path *path, int err)
28282395 {
28292396 LIST_HEAD(list);
2830
- struct backref_cache *cache = &rc->backref_cache;
2831
- struct backref_node *node;
2397
+ struct btrfs_backref_cache *cache = &rc->backref_cache;
2398
+ struct btrfs_backref_node *node;
28322399 int level;
28332400 int ret;
28342401
28352402 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
28362403 while (!list_empty(&cache->pending[level])) {
28372404 node = list_entry(cache->pending[level].next,
2838
- struct backref_node, list);
2405
+ struct btrfs_backref_node, list);
28392406 list_move_tail(&node->list, &list);
28402407 BUG_ON(!node->pending);
28412408
....@@ -2850,35 +2417,16 @@
28502417 return err;
28512418 }
28522419
2853
-static void mark_block_processed(struct reloc_control *rc,
2854
- u64 bytenr, u32 blocksize)
2855
-{
2856
- set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1,
2857
- EXTENT_DIRTY);
2858
-}
2859
-
2860
-static void __mark_block_processed(struct reloc_control *rc,
2861
- struct backref_node *node)
2862
-{
2863
- u32 blocksize;
2864
- if (node->level == 0 ||
2865
- in_block_group(node->bytenr, rc->block_group)) {
2866
- blocksize = rc->extent_root->fs_info->nodesize;
2867
- mark_block_processed(rc, node->bytenr, blocksize);
2868
- }
2869
- node->processed = 1;
2870
-}
2871
-
28722420 /*
28732421 * mark a block and all blocks directly/indirectly reference the block
28742422 * as processed.
28752423 */
28762424 static void update_processed_blocks(struct reloc_control *rc,
2877
- struct backref_node *node)
2425
+ struct btrfs_backref_node *node)
28782426 {
2879
- struct backref_node *next = node;
2880
- struct backref_edge *edge;
2881
- struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2427
+ struct btrfs_backref_node *next = node;
2428
+ struct btrfs_backref_edge *edge;
2429
+ struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
28822430 int index = 0;
28832431
28842432 while (next) {
....@@ -2887,13 +2435,13 @@
28872435 if (next->processed)
28882436 break;
28892437
2890
- __mark_block_processed(rc, next);
2438
+ mark_block_processed(rc, next);
28912439
28922440 if (list_empty(&next->upper))
28932441 break;
28942442
28952443 edge = list_entry(next->upper.next,
2896
- struct backref_edge, list[LOWER]);
2444
+ struct btrfs_backref_edge, list[LOWER]);
28972445 edges[index++] = edge;
28982446 next = edge->node[UPPER];
28992447 }
....@@ -2916,7 +2464,6 @@
29162464 {
29172465 struct extent_buffer *eb;
29182466
2919
- BUG_ON(block->key_ready);
29202467 eb = read_tree_block(fs_info, block->bytenr, block->key.offset,
29212468 block->level, NULL);
29222469 if (IS_ERR(eb)) {
....@@ -2925,7 +2472,6 @@
29252472 free_extent_buffer(eb);
29262473 return -EIO;
29272474 }
2928
- WARN_ON(btrfs_header_level(eb) != block->level);
29292475 if (block->level == 0)
29302476 btrfs_item_key_to_cpu(eb, &block->key, 0);
29312477 else
....@@ -2940,7 +2486,7 @@
29402486 */
29412487 static int relocate_tree_block(struct btrfs_trans_handle *trans,
29422488 struct reloc_control *rc,
2943
- struct backref_node *node,
2489
+ struct btrfs_backref_node *node,
29442490 struct btrfs_key *key,
29452491 struct btrfs_path *path)
29462492 {
....@@ -2950,6 +2496,14 @@
29502496 if (!node)
29512497 return 0;
29522498
2499
+ /*
2500
+ * If we fail here we want to drop our backref_node because we are going
2501
+ * to start over and regenerate the tree for it.
2502
+ */
2503
+ ret = reserve_metadata_space(trans, rc, node);
2504
+ if (ret)
2505
+ goto out;
2506
+
29532507 BUG_ON(node->processed);
29542508 root = select_one_root(node);
29552509 if (root == ERR_PTR(-ENOENT)) {
....@@ -2957,20 +2511,16 @@
29572511 goto out;
29582512 }
29592513
2960
- if (!root || test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
2961
- ret = reserve_metadata_space(trans, rc, node);
2962
- if (ret)
2963
- goto out;
2964
- }
2965
-
29662514 if (root) {
2967
- if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
2515
+ if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
29682516 BUG_ON(node->new_bytenr);
29692517 BUG_ON(!list_empty(&node->list));
29702518 btrfs_record_root_in_trans(trans, root);
29712519 root = root->reloc_root;
29722520 node->new_bytenr = root->node->start;
2973
- node->root = root;
2521
+ btrfs_put_root(node->root);
2522
+ node->root = btrfs_grab_root(root);
2523
+ ASSERT(node->root);
29742524 list_add_tail(&node->list, &rc->backref_cache.changed);
29752525 } else {
29762526 path->lowest_level = node->level;
....@@ -2986,7 +2536,7 @@
29862536 }
29872537 out:
29882538 if (ret || node->level == 0 || node->cowonly)
2989
- remove_backref_node(&rc->backref_cache, node);
2539
+ btrfs_backref_cleanup_node(&rc->backref_cache, node);
29902540 return ret;
29912541 }
29922542
....@@ -2998,10 +2548,10 @@
29982548 struct reloc_control *rc, struct rb_root *blocks)
29992549 {
30002550 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3001
- struct backref_node *node;
2551
+ struct btrfs_backref_node *node;
30022552 struct btrfs_path *path;
30032553 struct tree_block *block;
3004
- struct rb_node *rb_node;
2554
+ struct tree_block *next;
30052555 int ret;
30062556 int err = 0;
30072557
....@@ -3011,29 +2561,23 @@
30112561 goto out_free_blocks;
30122562 }
30132563
3014
- rb_node = rb_first(blocks);
3015
- while (rb_node) {
3016
- block = rb_entry(rb_node, struct tree_block, rb_node);
2564
+ /* Kick in readahead for tree blocks with missing keys */
2565
+ rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
30172566 if (!block->key_ready)
30182567 readahead_tree_block(fs_info, block->bytenr);
3019
- rb_node = rb_next(rb_node);
30202568 }
30212569
3022
- rb_node = rb_first(blocks);
3023
- while (rb_node) {
3024
- block = rb_entry(rb_node, struct tree_block, rb_node);
2570
+ /* Get first keys */
2571
+ rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
30252572 if (!block->key_ready) {
30262573 err = get_tree_block_key(fs_info, block);
30272574 if (err)
30282575 goto out_free_path;
30292576 }
3030
- rb_node = rb_next(rb_node);
30312577 }
30322578
3033
- rb_node = rb_first(blocks);
3034
- while (rb_node) {
3035
- block = rb_entry(rb_node, struct tree_block, rb_node);
3036
-
2579
+ /* Do tree relocation */
2580
+ rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
30372581 node = build_backref_tree(rc, &block->key,
30382582 block->level, block->bytenr);
30392583 if (IS_ERR(node)) {
....@@ -3044,11 +2588,9 @@
30442588 ret = relocate_tree_block(trans, rc, node, &block->key,
30452589 path);
30462590 if (ret < 0) {
3047
- if (ret != -EAGAIN || rb_node == rb_first(blocks))
3048
- err = ret;
3049
- goto out;
2591
+ err = ret;
2592
+ break;
30502593 }
3051
- rb_node = rb_next(rb_node);
30522594 }
30532595 out:
30542596 err = finish_pending_nodes(trans, rc, path, err);
....@@ -3060,58 +2602,50 @@
30602602 return err;
30612603 }
30622604
3063
-static noinline_for_stack
3064
-int prealloc_file_extent_cluster(struct inode *inode,
3065
- struct file_extent_cluster *cluster)
2605
+static noinline_for_stack int prealloc_file_extent_cluster(
2606
+ struct btrfs_inode *inode,
2607
+ struct file_extent_cluster *cluster)
30662608 {
30672609 u64 alloc_hint = 0;
30682610 u64 start;
30692611 u64 end;
3070
- u64 offset = BTRFS_I(inode)->index_cnt;
2612
+ u64 offset = inode->index_cnt;
30712613 u64 num_bytes;
3072
- int nr = 0;
2614
+ int nr;
30732615 int ret = 0;
30742616 u64 prealloc_start = cluster->start - offset;
30752617 u64 prealloc_end = cluster->end - offset;
3076
- u64 cur_offset;
3077
- struct extent_changeset *data_reserved = NULL;
2618
+ u64 cur_offset = prealloc_start;
30782619
30792620 BUG_ON(cluster->start != cluster->boundary[0]);
3080
- inode_lock(inode);
3081
-
3082
- ret = btrfs_check_data_free_space(inode, &data_reserved, prealloc_start,
3083
- prealloc_end + 1 - prealloc_start);
2621
+ ret = btrfs_alloc_data_chunk_ondemand(inode,
2622
+ prealloc_end + 1 - prealloc_start);
30842623 if (ret)
3085
- goto out;
2624
+ return ret;
30862625
3087
- cur_offset = prealloc_start;
3088
- while (nr < cluster->nr) {
2626
+ inode_lock(&inode->vfs_inode);
2627
+ for (nr = 0; nr < cluster->nr; nr++) {
30892628 start = cluster->boundary[nr] - offset;
30902629 if (nr + 1 < cluster->nr)
30912630 end = cluster->boundary[nr + 1] - 1 - offset;
30922631 else
30932632 end = cluster->end - offset;
30942633
3095
- lock_extent(&BTRFS_I(inode)->io_tree, start, end);
2634
+ lock_extent(&inode->io_tree, start, end);
30962635 num_bytes = end + 1 - start;
3097
- if (cur_offset < start)
3098
- btrfs_free_reserved_data_space(inode, data_reserved,
3099
- cur_offset, start - cur_offset);
3100
- ret = btrfs_prealloc_file_range(inode, 0, start,
2636
+ ret = btrfs_prealloc_file_range(&inode->vfs_inode, 0, start,
31012637 num_bytes, num_bytes,
31022638 end + 1, &alloc_hint);
31032639 cur_offset = end + 1;
3104
- unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
2640
+ unlock_extent(&inode->io_tree, start, end);
31052641 if (ret)
31062642 break;
3107
- nr++;
31082643 }
2644
+ inode_unlock(&inode->vfs_inode);
2645
+
31092646 if (cur_offset < prealloc_end)
3110
- btrfs_free_reserved_data_space(inode, data_reserved,
3111
- cur_offset, prealloc_end + 1 - cur_offset);
3112
-out:
3113
- inode_unlock(inode);
3114
- extent_changeset_free(data_reserved);
2647
+ btrfs_free_reserved_data_space_noquota(inode->root->fs_info,
2648
+ prealloc_end + 1 - cur_offset);
31152649 return ret;
31162650 }
31172651
....@@ -3119,7 +2653,6 @@
31192653 int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
31202654 u64 block_start)
31212655 {
3122
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
31232656 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
31242657 struct extent_map *em;
31252658 int ret = 0;
....@@ -3132,7 +2665,6 @@
31322665 em->len = end + 1 - start;
31332666 em->block_len = em->len;
31342667 em->block_start = block_start;
3135
- em->bdev = fs_info->fs_devices->latest_bdev;
31362668 set_bit(EXTENT_FLAG_PINNED, &em->flags);
31372669
31382670 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
....@@ -3149,6 +2681,16 @@
31492681 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
31502682 return ret;
31512683 }
2684
+
2685
+/*
2686
+ * Allow error injection to test balance cancellation
2687
+ */
2688
+int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
2689
+{
2690
+ return atomic_read(&fs_info->balance_cancel_req) ||
2691
+ fatal_signal_pending(current);
2692
+}
2693
+ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
31522694
31532695 static int relocate_file_extent_cluster(struct inode *inode,
31542696 struct file_extent_cluster *cluster)
....@@ -3172,7 +2714,7 @@
31722714 if (!ra)
31732715 return -ENOMEM;
31742716
3175
- ret = prealloc_file_extent_cluster(inode, cluster);
2717
+ ret = prealloc_file_extent_cluster(BTRFS_I(inode), cluster);
31762718 if (ret)
31772719 goto out;
31782720
....@@ -3244,8 +2786,8 @@
32442786 nr++;
32452787 }
32462788
3247
- ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
3248
- NULL, 0);
2789
+ ret = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start,
2790
+ page_end, 0, NULL);
32492791 if (ret) {
32502792 unlock_page(page);
32512793 put_page(page);
....@@ -3271,6 +2813,10 @@
32712813 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
32722814 balance_dirty_pages_ratelimited(inode->i_mapping);
32732815 btrfs_throttle(fs_info);
2816
+ if (btrfs_should_cancel_balance(fs_info)) {
2817
+ ret = -ECANCELED;
2818
+ goto out;
2819
+ }
32742820 }
32752821 WARN_ON(nr != cluster->nr);
32762822 out:
....@@ -3362,9 +2908,10 @@
33622908 block->level = level;
33632909 block->key_ready = 0;
33642910
3365
- rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
2911
+ rb_node = rb_simple_insert(blocks, block->bytenr, &block->rb_node);
33662912 if (rb_node)
3367
- backref_tree_panic(rb_node, -EEXIST, block->bytenr);
2913
+ btrfs_backref_panic(rc->extent_root->fs_info, block->bytenr,
2914
+ -EEXIST);
33682915
33692916 return 0;
33702917 }
....@@ -3385,7 +2932,7 @@
33852932 if (tree_block_processed(bytenr, rc))
33862933 return 0;
33872934
3388
- if (tree_search(blocks, bytenr))
2935
+ if (rb_simple_search(blocks, bytenr))
33892936 return 0;
33902937
33912938 path = btrfs_alloc_path();
....@@ -3442,37 +2989,11 @@
34422989 return ret;
34432990 }
34442991
3445
-/*
3446
- * helper to check if the block use full backrefs for pointers in it
3447
- */
3448
-static int block_use_full_backref(struct reloc_control *rc,
3449
- struct extent_buffer *eb)
3450
-{
3451
- u64 flags;
3452
- int ret;
3453
-
3454
- if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) ||
3455
- btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV)
3456
- return 1;
3457
-
3458
- ret = btrfs_lookup_extent_info(NULL, rc->extent_root->fs_info,
3459
- eb->start, btrfs_header_level(eb), 1,
3460
- NULL, &flags);
3461
- BUG_ON(ret);
3462
-
3463
- if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
3464
- ret = 1;
3465
- else
3466
- ret = 0;
3467
- return ret;
3468
-}
3469
-
34702992 static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3471
- struct btrfs_block_group_cache *block_group,
2993
+ struct btrfs_block_group *block_group,
34722994 struct inode *inode,
34732995 u64 ino)
34742996 {
3475
- struct btrfs_key key;
34762997 struct btrfs_root *root = fs_info->tree_root;
34772998 struct btrfs_trans_handle *trans;
34782999 int ret = 0;
....@@ -3480,11 +3001,7 @@
34803001 if (inode)
34813002 goto truncate;
34823003
3483
- key.objectid = ino;
3484
- key.type = BTRFS_INODE_ITEM_KEY;
3485
- key.offset = 0;
3486
-
3487
- inode = btrfs_iget(fs_info->sb, &key, root, NULL);
3004
+ inode = btrfs_iget(fs_info->sb, ino, root);
34883005 if (IS_ERR(inode))
34893006 return -ENOENT;
34903007
....@@ -3510,172 +3027,45 @@
35103027 }
35113028
35123029 /*
3513
- * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
3514
- * this function scans fs tree to find blocks reference the data extent
3030
+ * Locate the free space cache EXTENT_DATA in root tree leaf and delete the
3031
+ * cache inode, to avoid free space cache data extent blocking data relocation.
35153032 */
3516
-static int find_data_references(struct reloc_control *rc,
3517
- struct btrfs_key *extent_key,
3518
- struct extent_buffer *leaf,
3519
- struct btrfs_extent_data_ref *ref,
3520
- struct rb_root *blocks)
3033
+static int delete_v1_space_cache(struct extent_buffer *leaf,
3034
+ struct btrfs_block_group *block_group,
3035
+ u64 data_bytenr)
35213036 {
3522
- struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3523
- struct btrfs_path *path;
3524
- struct tree_block *block;
3525
- struct btrfs_root *root;
3526
- struct btrfs_file_extent_item *fi;
3527
- struct rb_node *rb_node;
3037
+ u64 space_cache_ino;
3038
+ struct btrfs_file_extent_item *ei;
35283039 struct btrfs_key key;
3529
- u64 ref_root;
3530
- u64 ref_objectid;
3531
- u64 ref_offset;
3532
- u32 ref_count;
3533
- u32 nritems;
3534
- int err = 0;
3535
- int added = 0;
3536
- int counted;
3040
+ bool found = false;
3041
+ int i;
35373042 int ret;
35383043
3539
- ref_root = btrfs_extent_data_ref_root(leaf, ref);
3540
- ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
3541
- ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
3542
- ref_count = btrfs_extent_data_ref_count(leaf, ref);
3044
+ if (btrfs_header_owner(leaf) != BTRFS_ROOT_TREE_OBJECTID)
3045
+ return 0;
35433046
3544
- /*
3545
- * This is an extent belonging to the free space cache, lets just delete
3546
- * it and redo the search.
3547
- */
3548
- if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
3549
- ret = delete_block_group_cache(fs_info, rc->block_group,
3550
- NULL, ref_objectid);
3551
- if (ret != -ENOENT)
3552
- return ret;
3553
- ret = 0;
3554
- }
3047
+ for (i = 0; i < btrfs_header_nritems(leaf); i++) {
3048
+ u8 type;
35553049
3556
- path = btrfs_alloc_path();
3557
- if (!path)
3558
- return -ENOMEM;
3559
- path->reada = READA_FORWARD;
3050
+ btrfs_item_key_to_cpu(leaf, &key, i);
3051
+ if (key.type != BTRFS_EXTENT_DATA_KEY)
3052
+ continue;
3053
+ ei = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
3054
+ type = btrfs_file_extent_type(leaf, ei);
35603055
3561
- root = read_fs_root(fs_info, ref_root);
3562
- if (IS_ERR(root)) {
3563
- err = PTR_ERR(root);
3564
- goto out;
3565
- }
3566
-
3567
- key.objectid = ref_objectid;
3568
- key.type = BTRFS_EXTENT_DATA_KEY;
3569
- if (ref_offset > ((u64)-1 << 32))
3570
- key.offset = 0;
3571
- else
3572
- key.offset = ref_offset;
3573
-
3574
- path->search_commit_root = 1;
3575
- path->skip_locking = 1;
3576
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3577
- if (ret < 0) {
3578
- err = ret;
3579
- goto out;
3580
- }
3581
-
3582
- leaf = path->nodes[0];
3583
- nritems = btrfs_header_nritems(leaf);
3584
- /*
3585
- * the references in tree blocks that use full backrefs
3586
- * are not counted in
3587
- */
3588
- if (block_use_full_backref(rc, leaf))
3589
- counted = 0;
3590
- else
3591
- counted = 1;
3592
- rb_node = tree_search(blocks, leaf->start);
3593
- if (rb_node) {
3594
- if (counted)
3595
- added = 1;
3596
- else
3597
- path->slots[0] = nritems;
3598
- }
3599
-
3600
- while (ref_count > 0) {
3601
- while (path->slots[0] >= nritems) {
3602
- ret = btrfs_next_leaf(root, path);
3603
- if (ret < 0) {
3604
- err = ret;
3605
- goto out;
3606
- }
3607
- if (WARN_ON(ret > 0))
3608
- goto out;
3609
-
3610
- leaf = path->nodes[0];
3611
- nritems = btrfs_header_nritems(leaf);
3612
- added = 0;
3613
-
3614
- if (block_use_full_backref(rc, leaf))
3615
- counted = 0;
3616
- else
3617
- counted = 1;
3618
- rb_node = tree_search(blocks, leaf->start);
3619
- if (rb_node) {
3620
- if (counted)
3621
- added = 1;
3622
- else
3623
- path->slots[0] = nritems;
3624
- }
3625
- }
3626
-
3627
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3628
- if (WARN_ON(key.objectid != ref_objectid ||
3629
- key.type != BTRFS_EXTENT_DATA_KEY))
3056
+ if ((type == BTRFS_FILE_EXTENT_REG ||
3057
+ type == BTRFS_FILE_EXTENT_PREALLOC) &&
3058
+ btrfs_file_extent_disk_bytenr(leaf, ei) == data_bytenr) {
3059
+ found = true;
3060
+ space_cache_ino = key.objectid;
36303061 break;
3631
-
3632
- fi = btrfs_item_ptr(leaf, path->slots[0],
3633
- struct btrfs_file_extent_item);
3634
-
3635
- if (btrfs_file_extent_type(leaf, fi) ==
3636
- BTRFS_FILE_EXTENT_INLINE)
3637
- goto next;
3638
-
3639
- if (btrfs_file_extent_disk_bytenr(leaf, fi) !=
3640
- extent_key->objectid)
3641
- goto next;
3642
-
3643
- key.offset -= btrfs_file_extent_offset(leaf, fi);
3644
- if (key.offset != ref_offset)
3645
- goto next;
3646
-
3647
- if (counted)
3648
- ref_count--;
3649
- if (added)
3650
- goto next;
3651
-
3652
- if (!tree_block_processed(leaf->start, rc)) {
3653
- block = kmalloc(sizeof(*block), GFP_NOFS);
3654
- if (!block) {
3655
- err = -ENOMEM;
3656
- break;
3657
- }
3658
- block->bytenr = leaf->start;
3659
- btrfs_item_key_to_cpu(leaf, &block->key, 0);
3660
- block->level = 0;
3661
- block->key_ready = 1;
3662
- rb_node = tree_insert(blocks, block->bytenr,
3663
- &block->rb_node);
3664
- if (rb_node)
3665
- backref_tree_panic(rb_node, -EEXIST,
3666
- block->bytenr);
36673062 }
3668
- if (counted)
3669
- added = 1;
3670
- else
3671
- path->slots[0] = nritems;
3672
-next:
3673
- path->slots[0]++;
3674
-
36753063 }
3676
-out:
3677
- btrfs_free_path(path);
3678
- return err;
3064
+ if (!found)
3065
+ return -ENOENT;
3066
+ ret = delete_block_group_cache(leaf->fs_info, block_group, NULL,
3067
+ space_cache_ino);
3068
+ return ret;
36793069 }
36803070
36813071 /*
....@@ -3687,91 +3077,41 @@
36873077 struct btrfs_path *path,
36883078 struct rb_root *blocks)
36893079 {
3690
- struct btrfs_key key;
3691
- struct extent_buffer *eb;
3692
- struct btrfs_extent_data_ref *dref;
3693
- struct btrfs_extent_inline_ref *iref;
3694
- unsigned long ptr;
3695
- unsigned long end;
3696
- u32 blocksize = rc->extent_root->fs_info->nodesize;
3080
+ struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3081
+ struct ulist *leaves = NULL;
3082
+ struct ulist_iterator leaf_uiter;
3083
+ struct ulist_node *ref_node = NULL;
3084
+ const u32 blocksize = fs_info->nodesize;
36973085 int ret = 0;
3698
- int err = 0;
36993086
3700
- eb = path->nodes[0];
3701
- ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
3702
- end = ptr + btrfs_item_size_nr(eb, path->slots[0]);
3703
- ptr += sizeof(struct btrfs_extent_item);
3704
-
3705
- while (ptr < end) {
3706
- iref = (struct btrfs_extent_inline_ref *)ptr;
3707
- key.type = btrfs_get_extent_inline_ref_type(eb, iref,
3708
- BTRFS_REF_TYPE_DATA);
3709
- if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3710
- key.offset = btrfs_extent_inline_ref_offset(eb, iref);
3711
- ret = __add_tree_block(rc, key.offset, blocksize,
3712
- blocks);
3713
- } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3714
- dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3715
- ret = find_data_references(rc, extent_key,
3716
- eb, dref, blocks);
3717
- } else {
3718
- ret = -EUCLEAN;
3719
- btrfs_err(rc->extent_root->fs_info,
3720
- "extent %llu slot %d has an invalid inline ref type",
3721
- eb->start, path->slots[0]);
3722
- }
3723
- if (ret) {
3724
- err = ret;
3725
- goto out;
3726
- }
3727
- ptr += btrfs_extent_inline_ref_size(key.type);
3728
- }
3729
- WARN_ON(ptr > end);
3730
-
3731
- while (1) {
3732
- cond_resched();
3733
- eb = path->nodes[0];
3734
- if (path->slots[0] >= btrfs_header_nritems(eb)) {
3735
- ret = btrfs_next_leaf(rc->extent_root, path);
3736
- if (ret < 0) {
3737
- err = ret;
3738
- break;
3739
- }
3740
- if (ret > 0)
3741
- break;
3742
- eb = path->nodes[0];
3743
- }
3744
-
3745
- btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
3746
- if (key.objectid != extent_key->objectid)
3747
- break;
3748
-
3749
- if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3750
- ret = __add_tree_block(rc, key.offset, blocksize,
3751
- blocks);
3752
- } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3753
- dref = btrfs_item_ptr(eb, path->slots[0],
3754
- struct btrfs_extent_data_ref);
3755
- ret = find_data_references(rc, extent_key,
3756
- eb, dref, blocks);
3757
- } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
3758
- btrfs_print_v0_err(eb->fs_info);
3759
- btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
3760
- ret = -EINVAL;
3761
- } else {
3762
- ret = 0;
3763
- }
3764
- if (ret) {
3765
- err = ret;
3766
- break;
3767
- }
3768
- path->slots[0]++;
3769
- }
3770
-out:
37713087 btrfs_release_path(path);
3772
- if (err)
3088
+ ret = btrfs_find_all_leafs(NULL, fs_info, extent_key->objectid,
3089
+ 0, &leaves, NULL, true);
3090
+ if (ret < 0)
3091
+ return ret;
3092
+
3093
+ ULIST_ITER_INIT(&leaf_uiter);
3094
+ while ((ref_node = ulist_next(leaves, &leaf_uiter))) {
3095
+ struct extent_buffer *eb;
3096
+
3097
+ eb = read_tree_block(fs_info, ref_node->val, 0, 0, NULL);
3098
+ if (IS_ERR(eb)) {
3099
+ ret = PTR_ERR(eb);
3100
+ break;
3101
+ }
3102
+ ret = delete_v1_space_cache(eb, rc->block_group,
3103
+ extent_key->objectid);
3104
+ free_extent_buffer(eb);
3105
+ if (ret < 0)
3106
+ break;
3107
+ ret = __add_tree_block(rc, ref_node->val, blocksize, blocks);
3108
+ if (ret < 0)
3109
+ break;
3110
+ }
3111
+ if (ret < 0)
37733112 free_block_list(blocks);
3774
- return err;
3113
+ ulist_free(leaves);
3114
+ return ret;
37753115 }
37763116
37773117 /*
....@@ -3787,7 +3127,7 @@
37873127 u64 start, end, last;
37883128 int ret;
37893129
3790
- last = rc->block_group->key.objectid + rc->block_group->key.offset;
3130
+ last = rc->block_group->start + rc->block_group->length;
37913131 while (1) {
37923132 cond_resched();
37933133 if (rc->search_start >= last) {
....@@ -3904,7 +3244,7 @@
39043244 return -ENOMEM;
39053245
39063246 memset(&rc->cluster, 0, sizeof(rc->cluster));
3907
- rc->search_start = rc->block_group->key.objectid;
3247
+ rc->search_start = rc->block_group->start;
39083248 rc->extents_found = 0;
39093249 rc->nodes_relocated = 0;
39103250 rc->merging_rsv_size = 0;
....@@ -3930,8 +3270,12 @@
39303270 */
39313271 return PTR_ERR(trans);
39323272 }
3933
- btrfs_commit_transaction(trans);
3934
- return 0;
3273
+
3274
+ ret = btrfs_commit_transaction(trans);
3275
+ if (ret)
3276
+ unset_reloc_control(rc);
3277
+
3278
+ return ret;
39353279 }
39363280
39373281 static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
....@@ -4023,12 +3367,6 @@
40233367 if (!RB_EMPTY_ROOT(&blocks)) {
40243368 ret = relocate_tree_blocks(trans, rc, &blocks);
40253369 if (ret < 0) {
4026
- /*
4027
- * if we fail to relocate tree blocks, force to update
4028
- * backref cache when committing transaction.
4029
- */
4030
- rc->backref_cache.last_trans = trans->transid - 1;
4031
-
40323370 if (ret != -EAGAIN) {
40333371 err = ret;
40343372 break;
....@@ -4051,6 +3389,10 @@
40513389 err = ret;
40523390 break;
40533391 }
3392
+ }
3393
+ if (btrfs_should_cancel_balance(fs_info)) {
3394
+ err = -ECANCELED;
3395
+ break;
40543396 }
40553397 }
40563398 if (trans && progress && err == -ENOSPC) {
....@@ -4080,16 +3422,24 @@
40803422 rc->create_reloc_tree = 0;
40813423 set_reloc_control(rc);
40823424
4083
- backref_cache_cleanup(&rc->backref_cache);
4084
- btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1);
3425
+ btrfs_backref_release_cache(&rc->backref_cache);
3426
+ btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
40853427
3428
+ /*
3429
+ * Even in the case when the relocation is cancelled, we should all go
3430
+ * through prepare_to_merge() and merge_reloc_roots().
3431
+ *
3432
+ * For error (including cancelled balance), prepare_to_merge() will
3433
+ * mark all reloc trees orphan, then queue them for cleanup in
3434
+ * merge_reloc_roots()
3435
+ */
40863436 err = prepare_to_merge(rc, err);
40873437
40883438 merge_reloc_roots(rc);
40893439
40903440 rc->merge_reloc_tree = 0;
40913441 unset_reloc_control(rc);
4092
- btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1);
3442
+ btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
40933443
40943444 /* get rid of pinned extents */
40953445 trans = btrfs_join_transaction(rc->extent_root);
....@@ -4097,8 +3447,13 @@
40973447 err = PTR_ERR(trans);
40983448 goto out_free;
40993449 }
4100
- btrfs_commit_transaction(trans);
3450
+ ret = btrfs_commit_transaction(trans);
3451
+ if (ret && !err)
3452
+ err = ret;
41013453 out_free:
3454
+ ret = clean_dirty_subvols(rc);
3455
+ if (ret < 0 && !err)
3456
+ err = ret;
41023457 btrfs_free_block_rsv(fs_info, rc->block_rsv);
41033458 btrfs_free_path(path);
41043459 return err;
....@@ -4140,22 +3495,20 @@
41403495 */
41413496 static noinline_for_stack
41423497 struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
4143
- struct btrfs_block_group_cache *group)
3498
+ struct btrfs_block_group *group)
41443499 {
41453500 struct inode *inode = NULL;
41463501 struct btrfs_trans_handle *trans;
41473502 struct btrfs_root *root;
4148
- struct btrfs_key key;
41493503 u64 objectid;
41503504 int err = 0;
41513505
4152
- root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
4153
- if (IS_ERR(root))
4154
- return ERR_CAST(root);
4155
-
3506
+ root = btrfs_grab_root(fs_info->data_reloc_root);
41563507 trans = btrfs_start_transaction(root, 6);
4157
- if (IS_ERR(trans))
3508
+ if (IS_ERR(trans)) {
3509
+ btrfs_put_root(root);
41583510 return ERR_CAST(trans);
3511
+ }
41593512
41603513 err = btrfs_find_free_objectid(root, &objectid);
41613514 if (err)
....@@ -4164,15 +3517,13 @@
41643517 err = __insert_orphan_inode(trans, root, objectid);
41653518 BUG_ON(err);
41663519
4167
- key.objectid = objectid;
4168
- key.type = BTRFS_INODE_ITEM_KEY;
4169
- key.offset = 0;
4170
- inode = btrfs_iget(fs_info->sb, &key, root, NULL);
3520
+ inode = btrfs_iget(fs_info->sb, objectid, root);
41713521 BUG_ON(IS_ERR(inode));
4172
- BTRFS_I(inode)->index_cnt = group->key.objectid;
3522
+ BTRFS_I(inode)->index_cnt = group->start;
41733523
41743524 err = btrfs_orphan_add(trans, BTRFS_I(inode));
41753525 out:
3526
+ btrfs_put_root(root);
41763527 btrfs_end_transaction(trans);
41773528 btrfs_btree_balance_dirty(fs_info);
41783529 if (err) {
....@@ -4183,7 +3534,7 @@
41833534 return inode;
41843535 }
41853536
4186
-static struct reloc_control *alloc_reloc_control(void)
3537
+static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
41873538 {
41883539 struct reloc_control *rc;
41893540
....@@ -4192,49 +3543,48 @@
41923543 return NULL;
41933544
41943545 INIT_LIST_HEAD(&rc->reloc_roots);
4195
- backref_cache_init(&rc->backref_cache);
3546
+ INIT_LIST_HEAD(&rc->dirty_subvol_roots);
3547
+ btrfs_backref_init_cache(fs_info, &rc->backref_cache, 1);
41963548 mapping_tree_init(&rc->reloc_root_tree);
4197
- extent_io_tree_init(&rc->processed_blocks, NULL);
3549
+ extent_io_tree_init(fs_info, &rc->processed_blocks,
3550
+ IO_TREE_RELOC_BLOCKS, NULL);
41983551 return rc;
3552
+}
3553
+
3554
+static void free_reloc_control(struct reloc_control *rc)
3555
+{
3556
+ struct mapping_node *node, *tmp;
3557
+
3558
+ free_reloc_roots(&rc->reloc_roots);
3559
+ rbtree_postorder_for_each_entry_safe(node, tmp,
3560
+ &rc->reloc_root_tree.rb_root, rb_node)
3561
+ kfree(node);
3562
+
3563
+ kfree(rc);
41993564 }
42003565
42013566 /*
42023567 * Print the block group being relocated
42033568 */
42043569 static void describe_relocation(struct btrfs_fs_info *fs_info,
4205
- struct btrfs_block_group_cache *block_group)
3570
+ struct btrfs_block_group *block_group)
42063571 {
4207
- char buf[128]; /* prefixed by a '|' that'll be dropped */
4208
- u64 flags = block_group->flags;
3572
+ char buf[128] = {'\0'};
42093573
4210
- /* Shouldn't happen */
4211
- if (!flags) {
4212
- strcpy(buf, "|NONE");
4213
- } else {
4214
- char *bp = buf;
4215
-
4216
-#define DESCRIBE_FLAG(f, d) \
4217
- if (flags & BTRFS_BLOCK_GROUP_##f) { \
4218
- bp += snprintf(bp, buf - bp + sizeof(buf), "|%s", d); \
4219
- flags &= ~BTRFS_BLOCK_GROUP_##f; \
4220
- }
4221
- DESCRIBE_FLAG(DATA, "data");
4222
- DESCRIBE_FLAG(SYSTEM, "system");
4223
- DESCRIBE_FLAG(METADATA, "metadata");
4224
- DESCRIBE_FLAG(RAID0, "raid0");
4225
- DESCRIBE_FLAG(RAID1, "raid1");
4226
- DESCRIBE_FLAG(DUP, "dup");
4227
- DESCRIBE_FLAG(RAID10, "raid10");
4228
- DESCRIBE_FLAG(RAID5, "raid5");
4229
- DESCRIBE_FLAG(RAID6, "raid6");
4230
- if (flags)
4231
- snprintf(bp, buf - bp + sizeof(buf), "|0x%llx", flags);
4232
-#undef DESCRIBE_FLAG
4233
- }
3574
+ btrfs_describe_block_groups(block_group->flags, buf, sizeof(buf));
42343575
42353576 btrfs_info(fs_info,
42363577 "relocating block group %llu flags %s",
4237
- block_group->key.objectid, buf + 1);
3578
+ block_group->start, buf);
3579
+}
3580
+
3581
+static const char *stage_to_string(int stage)
3582
+{
3583
+ if (stage == MOVE_DATA_EXTENTS)
3584
+ return "move data extents";
3585
+ if (stage == UPDATE_DATA_PTRS)
3586
+ return "update data pointers";
3587
+ return "unknown";
42383588 }
42393589
42403590 /*
....@@ -4242,6 +3592,7 @@
42423592 */
42433593 int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
42443594 {
3595
+ struct btrfs_block_group *bg;
42453596 struct btrfs_root *extent_root = fs_info->extent_root;
42463597 struct reloc_control *rc;
42473598 struct inode *inode;
....@@ -4250,16 +3601,25 @@
42503601 int rw = 0;
42513602 int err = 0;
42523603
4253
- rc = alloc_reloc_control();
4254
- if (!rc)
3604
+ bg = btrfs_lookup_block_group(fs_info, group_start);
3605
+ if (!bg)
3606
+ return -ENOENT;
3607
+
3608
+ if (btrfs_pinned_by_swapfile(fs_info, bg)) {
3609
+ btrfs_put_block_group(bg);
3610
+ return -ETXTBSY;
3611
+ }
3612
+
3613
+ rc = alloc_reloc_control(fs_info);
3614
+ if (!rc) {
3615
+ btrfs_put_block_group(bg);
42553616 return -ENOMEM;
3617
+ }
42563618
42573619 rc->extent_root = extent_root;
3620
+ rc->block_group = bg;
42583621
4259
- rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
4260
- BUG_ON(!rc->block_group);
4261
-
4262
- ret = btrfs_inc_block_group_ro(rc->block_group);
3622
+ ret = btrfs_inc_block_group_ro(rc->block_group, true);
42633623 if (ret) {
42643624 err = ret;
42653625 goto out;
....@@ -4272,7 +3632,7 @@
42723632 goto out;
42733633 }
42743634
4275
- inode = lookup_free_space_inode(fs_info, rc->block_group, path);
3635
+ inode = lookup_free_space_inode(rc->block_group, path);
42763636 btrfs_free_path(path);
42773637
42783638 if (!IS_ERR(inode))
....@@ -4297,16 +3657,19 @@
42973657 btrfs_wait_block_group_reservations(rc->block_group);
42983658 btrfs_wait_nocow_writers(rc->block_group);
42993659 btrfs_wait_ordered_roots(fs_info, U64_MAX,
4300
- rc->block_group->key.objectid,
4301
- rc->block_group->key.offset);
3660
+ rc->block_group->start,
3661
+ rc->block_group->length);
43023662
43033663 while (1) {
3664
+ int finishes_stage;
3665
+
43043666 mutex_lock(&fs_info->cleaner_mutex);
43053667 ret = relocate_block_group(rc);
43063668 mutex_unlock(&fs_info->cleaner_mutex);
43073669 if (ret < 0)
43083670 err = ret;
43093671
3672
+ finishes_stage = rc->stage;
43103673 /*
43113674 * We may have gotten ENOSPC after we already dirtied some
43123675 * extents. If writeout happens while we're relocating a
....@@ -4332,19 +3695,19 @@
43323695 if (rc->extents_found == 0)
43333696 break;
43343697
4335
- btrfs_info(fs_info, "found %llu extents", rc->extents_found);
4336
-
3698
+ btrfs_info(fs_info, "found %llu extents, stage: %s",
3699
+ rc->extents_found, stage_to_string(finishes_stage));
43373700 }
43383701
43393702 WARN_ON(rc->block_group->pinned > 0);
43403703 WARN_ON(rc->block_group->reserved > 0);
4341
- WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
3704
+ WARN_ON(rc->block_group->used > 0);
43423705 out:
43433706 if (err && rw)
43443707 btrfs_dec_block_group_ro(rc->block_group);
43453708 iput(rc->data_inode);
43463709 btrfs_put_block_group(rc->block_group);
4347
- kfree(rc);
3710
+ free_reloc_control(rc);
43483711 return err;
43493712 }
43503713
....@@ -4420,17 +3783,18 @@
44203783 key.type != BTRFS_ROOT_ITEM_KEY)
44213784 break;
44223785
4423
- reloc_root = btrfs_read_fs_root(root, &key);
3786
+ reloc_root = btrfs_read_tree_root(root, &key);
44243787 if (IS_ERR(reloc_root)) {
44253788 err = PTR_ERR(reloc_root);
44263789 goto out;
44273790 }
44283791
3792
+ set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
44293793 list_add(&reloc_root->root_list, &reloc_roots);
44303794
44313795 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
4432
- fs_root = read_fs_root(fs_info,
4433
- reloc_root->root_key.offset);
3796
+ fs_root = btrfs_get_fs_root(fs_info,
3797
+ reloc_root->root_key.offset, false);
44343798 if (IS_ERR(fs_root)) {
44353799 ret = PTR_ERR(fs_root);
44363800 if (ret != -ENOENT) {
....@@ -4442,6 +3806,8 @@
44423806 err = ret;
44433807 goto out;
44443808 }
3809
+ } else {
3810
+ btrfs_put_root(fs_root);
44453811 }
44463812 }
44473813
....@@ -4455,7 +3821,7 @@
44553821 if (list_empty(&reloc_roots))
44563822 goto out;
44573823
4458
- rc = alloc_reloc_control();
3824
+ rc = alloc_reloc_control(fs_info);
44593825 if (!rc) {
44603826 err = -ENOMEM;
44613827 goto out;
....@@ -4467,9 +3833,8 @@
44673833
44683834 trans = btrfs_join_transaction(rc->extent_root);
44693835 if (IS_ERR(trans)) {
4470
- unset_reloc_control(rc);
44713836 err = PTR_ERR(trans);
4472
- goto out_free;
3837
+ goto out_unset;
44733838 }
44743839
44753840 rc->merge_reloc_tree = 1;
....@@ -4485,21 +3850,24 @@
44853850 continue;
44863851 }
44873852
4488
- fs_root = read_fs_root(fs_info, reloc_root->root_key.offset);
3853
+ fs_root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
3854
+ false);
44893855 if (IS_ERR(fs_root)) {
44903856 err = PTR_ERR(fs_root);
44913857 list_add_tail(&reloc_root->root_list, &reloc_roots);
4492
- goto out_free;
3858
+ btrfs_end_transaction(trans);
3859
+ goto out_unset;
44933860 }
44943861
44953862 err = __add_reloc_root(reloc_root);
44963863 BUG_ON(err < 0); /* -ENOMEM or logic error */
4497
- fs_root->reloc_root = reloc_root;
3864
+ fs_root->reloc_root = btrfs_grab_root(reloc_root);
3865
+ btrfs_put_root(fs_root);
44983866 }
44993867
45003868 err = btrfs_commit_transaction(trans);
45013869 if (err)
4502
- goto out_free;
3870
+ goto out_unset;
45033871
45043872 merge_reloc_roots(rc);
45053873
....@@ -4508,24 +3876,27 @@
45083876 trans = btrfs_join_transaction(rc->extent_root);
45093877 if (IS_ERR(trans)) {
45103878 err = PTR_ERR(trans);
4511
- goto out_free;
3879
+ goto out_clean;
45123880 }
45133881 err = btrfs_commit_transaction(trans);
4514
-out_free:
4515
- kfree(rc);
3882
+out_clean:
3883
+ ret = clean_dirty_subvols(rc);
3884
+ if (ret < 0 && !err)
3885
+ err = ret;
3886
+out_unset:
3887
+ unset_reloc_control(rc);
3888
+ free_reloc_control(rc);
45163889 out:
4517
- if (!list_empty(&reloc_roots))
4518
- free_reloc_roots(&reloc_roots);
3890
+ free_reloc_roots(&reloc_roots);
45193891
45203892 btrfs_free_path(path);
45213893
45223894 if (err == 0) {
45233895 /* cleanup orphan inode in data relocation tree */
4524
- fs_root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
4525
- if (IS_ERR(fs_root))
4526
- err = PTR_ERR(fs_root);
4527
- else
4528
- err = btrfs_orphan_cleanup(fs_root);
3896
+ fs_root = btrfs_grab_root(fs_info->data_reloc_root);
3897
+ ASSERT(fs_root);
3898
+ err = btrfs_orphan_cleanup(fs_root);
3899
+ btrfs_put_root(fs_root);
45293900 }
45303901 return err;
45313902 }
....@@ -4536,9 +3907,9 @@
45363907 * cloning checksum properly handles the nodatasum extents.
45373908 * it also saves CPU time to re-calculate the checksum.
45383909 */
4539
-int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
3910
+int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len)
45403911 {
4541
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3912
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
45423913 struct btrfs_ordered_sum *sums;
45433914 struct btrfs_ordered_extent *ordered;
45443915 int ret;
....@@ -4547,9 +3918,9 @@
45473918 LIST_HEAD(list);
45483919
45493920 ordered = btrfs_lookup_ordered_extent(inode, file_pos);
4550
- BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
3921
+ BUG_ON(ordered->file_offset != file_pos || ordered->num_bytes != len);
45513922
4552
- disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
3923
+ disk_bytenr = file_pos + inode->index_cnt;
45533924 ret = btrfs_lookup_csums_range(fs_info->csum_root, disk_bytenr,
45543925 disk_bytenr + len - 1, &list, 0);
45553926 if (ret)
....@@ -4571,10 +3942,10 @@
45713942 * disk_len vs real len like with real inodes since it's all
45723943 * disk length.
45733944 */
4574
- new_bytenr = ordered->start + (sums->bytenr - disk_bytenr);
3945
+ new_bytenr = ordered->disk_bytenr + sums->bytenr - disk_bytenr;
45753946 sums->bytenr = new_bytenr;
45763947
4577
- btrfs_add_ordered_sum(inode, ordered, sums);
3948
+ btrfs_add_ordered_sum(ordered, sums);
45783949 }
45793950 out:
45803951 btrfs_put_ordered_extent(ordered);
....@@ -4587,7 +3958,7 @@
45873958 {
45883959 struct btrfs_fs_info *fs_info = root->fs_info;
45893960 struct reloc_control *rc;
4590
- struct backref_node *node;
3961
+ struct btrfs_backref_node *node;
45913962 int first_cow = 0;
45923963 int level;
45933964 int ret = 0;
....@@ -4612,8 +3983,8 @@
46123983 BUG_ON(node->bytenr != buf->start &&
46133984 node->new_bytenr != buf->start);
46143985
4615
- drop_node_buffer(node);
4616
- extent_buffer_get(cow);
3986
+ btrfs_backref_drop_node_buffer(node);
3987
+ atomic_inc(&cow->refs);
46173988 node->eb = cow;
46183989 node->new_bytenr = cow->start;
46193990
....@@ -4624,7 +3995,7 @@
46243995 }
46253996
46263997 if (first_cow)
4627
- __mark_block_processed(rc, node);
3998
+ mark_block_processed(rc, node);
46283999
46294000 if (first_cow && level > 0)
46304001 rc->nodes_relocated += buf->len;
....@@ -4642,14 +4013,12 @@
46424013 void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
46434014 u64 *bytes_to_reserve)
46444015 {
4645
- struct btrfs_root *root;
4646
- struct reloc_control *rc;
4016
+ struct btrfs_root *root = pending->root;
4017
+ struct reloc_control *rc = root->fs_info->reloc_ctl;
46474018
4648
- root = pending->root;
4649
- if (!root->reloc_root)
4019
+ if (!rc || !have_reloc_root(root))
46504020 return;
46514021
4652
- rc = root->fs_info->reloc_ctl;
46534022 if (!rc->merge_reloc_tree)
46544023 return;
46554024
....@@ -4671,6 +4040,10 @@
46714040 /*
46724041 * called after snapshot is created. migrate block reservation
46734042 * and create reloc root for the newly created snapshot
4043
+ *
4044
+ * This is similar to btrfs_init_reloc_root(), we come out of here with two
4045
+ * references held on the reloc_root, one for root->reloc_root and one for
4046
+ * rc->reloc_roots.
46744047 */
46754048 int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
46764049 struct btrfs_pending_snapshot *pending)
....@@ -4678,10 +4051,10 @@
46784051 struct btrfs_root *root = pending->root;
46794052 struct btrfs_root *reloc_root;
46804053 struct btrfs_root *new_root;
4681
- struct reloc_control *rc;
4054
+ struct reloc_control *rc = root->fs_info->reloc_ctl;
46824055 int ret;
46834056
4684
- if (!root->reloc_root)
4057
+ if (!rc || !have_reloc_root(root))
46854058 return 0;
46864059
46874060 rc = root->fs_info->reloc_ctl;
....@@ -4690,7 +4063,7 @@
46904063 if (rc->merge_reloc_tree) {
46914064 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
46924065 rc->block_rsv,
4693
- rc->nodes_relocated, 1);
4066
+ rc->nodes_relocated, true);
46944067 if (ret)
46954068 return ret;
46964069 }
....@@ -4703,7 +4076,7 @@
47034076
47044077 ret = __add_reloc_root(reloc_root);
47054078 BUG_ON(ret < 0);
4706
- new_root->reloc_root = reloc_root;
4079
+ new_root->reloc_root = btrfs_grab_root(reloc_root);
47074080
47084081 if (rc->create_reloc_tree)
47094082 ret = clone_backref_node(trans, rc, root, reloc_root);