.. | .. |
---|
16 | 16 | #include <linux/percpu_counter.h> |
---|
17 | 17 | #include <linux/lockdep.h> |
---|
18 | 18 | #include <linux/crc32c.h> |
---|
| 19 | +#include "misc.h" |
---|
19 | 20 | #include "tree-log.h" |
---|
20 | 21 | #include "disk-io.h" |
---|
21 | 22 | #include "print-tree.h" |
---|
.. | .. |
---|
24 | 25 | #include "locking.h" |
---|
25 | 26 | #include "free-space-cache.h" |
---|
26 | 27 | #include "free-space-tree.h" |
---|
27 | | -#include "math.h" |
---|
28 | 28 | #include "sysfs.h" |
---|
29 | 29 | #include "qgroup.h" |
---|
30 | 30 | #include "ref-verify.h" |
---|
| 31 | +#include "space-info.h" |
---|
| 32 | +#include "block-rsv.h" |
---|
| 33 | +#include "delalloc-space.h" |
---|
| 34 | +#include "block-group.h" |
---|
| 35 | +#include "discard.h" |
---|
| 36 | +#include "rcu-string.h" |
---|
31 | 37 | |
---|
32 | 38 | #undef SCRAMBLE_DELAYED_REFS |
---|
33 | 39 | |
---|
34 | | -/* |
---|
35 | | - * control flags for do_chunk_alloc's force field |
---|
36 | | - * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk |
---|
37 | | - * if we really need one. |
---|
38 | | - * |
---|
39 | | - * CHUNK_ALLOC_LIMITED means to only try and allocate one |
---|
40 | | - * if we have very few chunks already allocated. This is |
---|
41 | | - * used as part of the clustering code to help make sure |
---|
42 | | - * we have a good pool of storage to cluster in, without |
---|
43 | | - * filling the FS with empty chunks |
---|
44 | | - * |
---|
45 | | - * CHUNK_ALLOC_FORCE means it must try to allocate one |
---|
46 | | - * |
---|
47 | | - */ |
---|
48 | | -enum { |
---|
49 | | - CHUNK_ALLOC_NO_FORCE = 0, |
---|
50 | | - CHUNK_ALLOC_LIMITED = 1, |
---|
51 | | - CHUNK_ALLOC_FORCE = 2, |
---|
52 | | -}; |
---|
53 | 40 | |
---|
54 | 41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
---|
55 | 42 | struct btrfs_delayed_ref_node *node, u64 parent, |
---|
.. | .. |
---|
66 | 53 | static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, |
---|
67 | 54 | struct btrfs_delayed_ref_node *node, |
---|
68 | 55 | struct btrfs_delayed_extent_op *extent_op); |
---|
69 | | -static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, |
---|
70 | | - int force); |
---|
71 | 56 | static int find_next_key(struct btrfs_path *path, int level, |
---|
72 | 57 | struct btrfs_key *key); |
---|
73 | | -static void dump_space_info(struct btrfs_fs_info *fs_info, |
---|
74 | | - struct btrfs_space_info *info, u64 bytes, |
---|
75 | | - int dump_block_groups); |
---|
76 | | -static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, |
---|
77 | | - u64 num_bytes); |
---|
78 | | -static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, |
---|
79 | | - struct btrfs_space_info *space_info, |
---|
80 | | - u64 num_bytes); |
---|
81 | | -static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info, |
---|
82 | | - struct btrfs_space_info *space_info, |
---|
83 | | - u64 num_bytes); |
---|
84 | 58 | |
---|
85 | | -static noinline int |
---|
86 | | -block_group_cache_done(struct btrfs_block_group_cache *cache) |
---|
87 | | -{ |
---|
88 | | - smp_mb(); |
---|
89 | | - return cache->cached == BTRFS_CACHE_FINISHED || |
---|
90 | | - cache->cached == BTRFS_CACHE_ERROR; |
---|
91 | | -} |
---|
92 | | - |
---|
93 | | -static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) |
---|
| 59 | +static int block_group_bits(struct btrfs_block_group *cache, u64 bits) |
---|
94 | 60 | { |
---|
95 | 61 | return (cache->flags & bits) == bits; |
---|
96 | 62 | } |
---|
97 | 63 | |
---|
98 | | -void btrfs_get_block_group(struct btrfs_block_group_cache *cache) |
---|
99 | | -{ |
---|
100 | | - atomic_inc(&cache->count); |
---|
101 | | -} |
---|
102 | | - |
---|
103 | | -void btrfs_put_block_group(struct btrfs_block_group_cache *cache) |
---|
104 | | -{ |
---|
105 | | - if (atomic_dec_and_test(&cache->count)) { |
---|
106 | | - WARN_ON(cache->pinned > 0); |
---|
107 | | - WARN_ON(cache->reserved > 0); |
---|
108 | | - |
---|
109 | | - /* |
---|
110 | | - * If not empty, someone is still holding mutex of |
---|
111 | | - * full_stripe_lock, which can only be released by caller. |
---|
112 | | - * And it will definitely cause use-after-free when caller |
---|
113 | | - * tries to release full stripe lock. |
---|
114 | | - * |
---|
115 | | - * No better way to resolve, but only to warn. |
---|
116 | | - */ |
---|
117 | | - WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root)); |
---|
118 | | - kfree(cache->free_space_ctl); |
---|
119 | | - kfree(cache); |
---|
120 | | - } |
---|
121 | | -} |
---|
122 | | - |
---|
123 | | -/* |
---|
124 | | - * this adds the block group to the fs_info rb tree for the block group |
---|
125 | | - * cache |
---|
126 | | - */ |
---|
127 | | -static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, |
---|
128 | | - struct btrfs_block_group_cache *block_group) |
---|
129 | | -{ |
---|
130 | | - struct rb_node **p; |
---|
131 | | - struct rb_node *parent = NULL; |
---|
132 | | - struct btrfs_block_group_cache *cache; |
---|
133 | | - |
---|
134 | | - spin_lock(&info->block_group_cache_lock); |
---|
135 | | - p = &info->block_group_cache_tree.rb_node; |
---|
136 | | - |
---|
137 | | - while (*p) { |
---|
138 | | - parent = *p; |
---|
139 | | - cache = rb_entry(parent, struct btrfs_block_group_cache, |
---|
140 | | - cache_node); |
---|
141 | | - if (block_group->key.objectid < cache->key.objectid) { |
---|
142 | | - p = &(*p)->rb_left; |
---|
143 | | - } else if (block_group->key.objectid > cache->key.objectid) { |
---|
144 | | - p = &(*p)->rb_right; |
---|
145 | | - } else { |
---|
146 | | - spin_unlock(&info->block_group_cache_lock); |
---|
147 | | - return -EEXIST; |
---|
148 | | - } |
---|
149 | | - } |
---|
150 | | - |
---|
151 | | - rb_link_node(&block_group->cache_node, parent, p); |
---|
152 | | - rb_insert_color(&block_group->cache_node, |
---|
153 | | - &info->block_group_cache_tree); |
---|
154 | | - |
---|
155 | | - if (info->first_logical_byte > block_group->key.objectid) |
---|
156 | | - info->first_logical_byte = block_group->key.objectid; |
---|
157 | | - |
---|
158 | | - spin_unlock(&info->block_group_cache_lock); |
---|
159 | | - |
---|
160 | | - return 0; |
---|
161 | | -} |
---|
162 | | - |
---|
163 | | -/* |
---|
164 | | - * This will return the block group at or after bytenr if contains is 0, else |
---|
165 | | - * it will return the block group that contains the bytenr |
---|
166 | | - */ |
---|
167 | | -static struct btrfs_block_group_cache * |
---|
168 | | -block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, |
---|
169 | | - int contains) |
---|
170 | | -{ |
---|
171 | | - struct btrfs_block_group_cache *cache, *ret = NULL; |
---|
172 | | - struct rb_node *n; |
---|
173 | | - u64 end, start; |
---|
174 | | - |
---|
175 | | - spin_lock(&info->block_group_cache_lock); |
---|
176 | | - n = info->block_group_cache_tree.rb_node; |
---|
177 | | - |
---|
178 | | - while (n) { |
---|
179 | | - cache = rb_entry(n, struct btrfs_block_group_cache, |
---|
180 | | - cache_node); |
---|
181 | | - end = cache->key.objectid + cache->key.offset - 1; |
---|
182 | | - start = cache->key.objectid; |
---|
183 | | - |
---|
184 | | - if (bytenr < start) { |
---|
185 | | - if (!contains && (!ret || start < ret->key.objectid)) |
---|
186 | | - ret = cache; |
---|
187 | | - n = n->rb_left; |
---|
188 | | - } else if (bytenr > start) { |
---|
189 | | - if (contains && bytenr <= end) { |
---|
190 | | - ret = cache; |
---|
191 | | - break; |
---|
192 | | - } |
---|
193 | | - n = n->rb_right; |
---|
194 | | - } else { |
---|
195 | | - ret = cache; |
---|
196 | | - break; |
---|
197 | | - } |
---|
198 | | - } |
---|
199 | | - if (ret) { |
---|
200 | | - btrfs_get_block_group(ret); |
---|
201 | | - if (bytenr == 0 && info->first_logical_byte > ret->key.objectid) |
---|
202 | | - info->first_logical_byte = ret->key.objectid; |
---|
203 | | - } |
---|
204 | | - spin_unlock(&info->block_group_cache_lock); |
---|
205 | | - |
---|
206 | | - return ret; |
---|
207 | | -} |
---|
208 | | - |
---|
209 | | -static int add_excluded_extent(struct btrfs_fs_info *fs_info, |
---|
210 | | - u64 start, u64 num_bytes) |
---|
| 64 | +int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info, |
---|
| 65 | + u64 start, u64 num_bytes) |
---|
211 | 66 | { |
---|
212 | 67 | u64 end = start + num_bytes - 1; |
---|
213 | | - set_extent_bits(&fs_info->freed_extents[0], |
---|
214 | | - start, end, EXTENT_UPTODATE); |
---|
215 | | - set_extent_bits(&fs_info->freed_extents[1], |
---|
216 | | - start, end, EXTENT_UPTODATE); |
---|
| 68 | + set_extent_bits(&fs_info->excluded_extents, start, end, |
---|
| 69 | + EXTENT_UPTODATE); |
---|
217 | 70 | return 0; |
---|
218 | 71 | } |
---|
219 | 72 | |
---|
220 | | -static void free_excluded_extents(struct btrfs_block_group_cache *cache) |
---|
| 73 | +void btrfs_free_excluded_extents(struct btrfs_block_group *cache) |
---|
221 | 74 | { |
---|
222 | 75 | struct btrfs_fs_info *fs_info = cache->fs_info; |
---|
223 | 76 | u64 start, end; |
---|
224 | 77 | |
---|
225 | | - start = cache->key.objectid; |
---|
226 | | - end = start + cache->key.offset - 1; |
---|
| 78 | + start = cache->start; |
---|
| 79 | + end = start + cache->length - 1; |
---|
227 | 80 | |
---|
228 | | - clear_extent_bits(&fs_info->freed_extents[0], |
---|
229 | | - start, end, EXTENT_UPTODATE); |
---|
230 | | - clear_extent_bits(&fs_info->freed_extents[1], |
---|
231 | | - start, end, EXTENT_UPTODATE); |
---|
232 | | -} |
---|
233 | | - |
---|
234 | | -static int exclude_super_stripes(struct btrfs_block_group_cache *cache) |
---|
235 | | -{ |
---|
236 | | - struct btrfs_fs_info *fs_info = cache->fs_info; |
---|
237 | | - u64 bytenr; |
---|
238 | | - u64 *logical; |
---|
239 | | - int stripe_len; |
---|
240 | | - int i, nr, ret; |
---|
241 | | - |
---|
242 | | - if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) { |
---|
243 | | - stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid; |
---|
244 | | - cache->bytes_super += stripe_len; |
---|
245 | | - ret = add_excluded_extent(fs_info, cache->key.objectid, |
---|
246 | | - stripe_len); |
---|
247 | | - if (ret) |
---|
248 | | - return ret; |
---|
249 | | - } |
---|
250 | | - |
---|
251 | | - for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { |
---|
252 | | - bytenr = btrfs_sb_offset(i); |
---|
253 | | - ret = btrfs_rmap_block(fs_info, cache->key.objectid, |
---|
254 | | - bytenr, &logical, &nr, &stripe_len); |
---|
255 | | - if (ret) |
---|
256 | | - return ret; |
---|
257 | | - |
---|
258 | | - while (nr--) { |
---|
259 | | - u64 start, len; |
---|
260 | | - |
---|
261 | | - if (logical[nr] > cache->key.objectid + |
---|
262 | | - cache->key.offset) |
---|
263 | | - continue; |
---|
264 | | - |
---|
265 | | - if (logical[nr] + stripe_len <= cache->key.objectid) |
---|
266 | | - continue; |
---|
267 | | - |
---|
268 | | - start = logical[nr]; |
---|
269 | | - if (start < cache->key.objectid) { |
---|
270 | | - start = cache->key.objectid; |
---|
271 | | - len = (logical[nr] + stripe_len) - start; |
---|
272 | | - } else { |
---|
273 | | - len = min_t(u64, stripe_len, |
---|
274 | | - cache->key.objectid + |
---|
275 | | - cache->key.offset - start); |
---|
276 | | - } |
---|
277 | | - |
---|
278 | | - cache->bytes_super += len; |
---|
279 | | - ret = add_excluded_extent(fs_info, start, len); |
---|
280 | | - if (ret) { |
---|
281 | | - kfree(logical); |
---|
282 | | - return ret; |
---|
283 | | - } |
---|
284 | | - } |
---|
285 | | - |
---|
286 | | - kfree(logical); |
---|
287 | | - } |
---|
288 | | - return 0; |
---|
289 | | -} |
---|
290 | | - |
---|
291 | | -static struct btrfs_caching_control * |
---|
292 | | -get_caching_control(struct btrfs_block_group_cache *cache) |
---|
293 | | -{ |
---|
294 | | - struct btrfs_caching_control *ctl; |
---|
295 | | - |
---|
296 | | - spin_lock(&cache->lock); |
---|
297 | | - if (!cache->caching_ctl) { |
---|
298 | | - spin_unlock(&cache->lock); |
---|
299 | | - return NULL; |
---|
300 | | - } |
---|
301 | | - |
---|
302 | | - ctl = cache->caching_ctl; |
---|
303 | | - refcount_inc(&ctl->count); |
---|
304 | | - spin_unlock(&cache->lock); |
---|
305 | | - return ctl; |
---|
306 | | -} |
---|
307 | | - |
---|
308 | | -static void put_caching_control(struct btrfs_caching_control *ctl) |
---|
309 | | -{ |
---|
310 | | - if (refcount_dec_and_test(&ctl->count)) |
---|
311 | | - kfree(ctl); |
---|
312 | | -} |
---|
313 | | - |
---|
314 | | -#ifdef CONFIG_BTRFS_DEBUG |
---|
315 | | -static void fragment_free_space(struct btrfs_block_group_cache *block_group) |
---|
316 | | -{ |
---|
317 | | - struct btrfs_fs_info *fs_info = block_group->fs_info; |
---|
318 | | - u64 start = block_group->key.objectid; |
---|
319 | | - u64 len = block_group->key.offset; |
---|
320 | | - u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ? |
---|
321 | | - fs_info->nodesize : fs_info->sectorsize; |
---|
322 | | - u64 step = chunk << 1; |
---|
323 | | - |
---|
324 | | - while (len > chunk) { |
---|
325 | | - btrfs_remove_free_space(block_group, start, chunk); |
---|
326 | | - start += step; |
---|
327 | | - if (len < step) |
---|
328 | | - len = 0; |
---|
329 | | - else |
---|
330 | | - len -= step; |
---|
331 | | - } |
---|
332 | | -} |
---|
333 | | -#endif |
---|
334 | | - |
---|
335 | | -/* |
---|
336 | | - * this is only called by cache_block_group, since we could have freed extents |
---|
337 | | - * we need to check the pinned_extents for any extents that can't be used yet |
---|
338 | | - * since their free space will be released as soon as the transaction commits. |
---|
339 | | - */ |
---|
340 | | -u64 add_new_free_space(struct btrfs_block_group_cache *block_group, |
---|
341 | | - u64 start, u64 end) |
---|
342 | | -{ |
---|
343 | | - struct btrfs_fs_info *info = block_group->fs_info; |
---|
344 | | - u64 extent_start, extent_end, size, total_added = 0; |
---|
345 | | - int ret; |
---|
346 | | - |
---|
347 | | - while (start < end) { |
---|
348 | | - ret = find_first_extent_bit(info->pinned_extents, start, |
---|
349 | | - &extent_start, &extent_end, |
---|
350 | | - EXTENT_DIRTY | EXTENT_UPTODATE, |
---|
351 | | - NULL); |
---|
352 | | - if (ret) |
---|
353 | | - break; |
---|
354 | | - |
---|
355 | | - if (extent_start <= start) { |
---|
356 | | - start = extent_end + 1; |
---|
357 | | - } else if (extent_start > start && extent_start < end) { |
---|
358 | | - size = extent_start - start; |
---|
359 | | - total_added += size; |
---|
360 | | - ret = btrfs_add_free_space(block_group, start, |
---|
361 | | - size); |
---|
362 | | - BUG_ON(ret); /* -ENOMEM or logic error */ |
---|
363 | | - start = extent_end + 1; |
---|
364 | | - } else { |
---|
365 | | - break; |
---|
366 | | - } |
---|
367 | | - } |
---|
368 | | - |
---|
369 | | - if (start < end) { |
---|
370 | | - size = end - start; |
---|
371 | | - total_added += size; |
---|
372 | | - ret = btrfs_add_free_space(block_group, start, size); |
---|
373 | | - BUG_ON(ret); /* -ENOMEM or logic error */ |
---|
374 | | - } |
---|
375 | | - |
---|
376 | | - return total_added; |
---|
377 | | -} |
---|
378 | | - |
---|
379 | | -static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl) |
---|
380 | | -{ |
---|
381 | | - struct btrfs_block_group_cache *block_group = caching_ctl->block_group; |
---|
382 | | - struct btrfs_fs_info *fs_info = block_group->fs_info; |
---|
383 | | - struct btrfs_root *extent_root = fs_info->extent_root; |
---|
384 | | - struct btrfs_path *path; |
---|
385 | | - struct extent_buffer *leaf; |
---|
386 | | - struct btrfs_key key; |
---|
387 | | - u64 total_found = 0; |
---|
388 | | - u64 last = 0; |
---|
389 | | - u32 nritems; |
---|
390 | | - int ret; |
---|
391 | | - bool wakeup = true; |
---|
392 | | - |
---|
393 | | - path = btrfs_alloc_path(); |
---|
394 | | - if (!path) |
---|
395 | | - return -ENOMEM; |
---|
396 | | - |
---|
397 | | - last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
---|
398 | | - |
---|
399 | | -#ifdef CONFIG_BTRFS_DEBUG |
---|
400 | | - /* |
---|
401 | | - * If we're fragmenting we don't want to make anybody think we can |
---|
402 | | - * allocate from this block group until we've had a chance to fragment |
---|
403 | | - * the free space. |
---|
404 | | - */ |
---|
405 | | - if (btrfs_should_fragment_free_space(block_group)) |
---|
406 | | - wakeup = false; |
---|
407 | | -#endif |
---|
408 | | - /* |
---|
409 | | - * We don't want to deadlock with somebody trying to allocate a new |
---|
410 | | - * extent for the extent root while also trying to search the extent |
---|
411 | | - * root to add free space. So we skip locking and search the commit |
---|
412 | | - * root, since its read-only |
---|
413 | | - */ |
---|
414 | | - path->skip_locking = 1; |
---|
415 | | - path->search_commit_root = 1; |
---|
416 | | - path->reada = READA_FORWARD; |
---|
417 | | - |
---|
418 | | - key.objectid = last; |
---|
419 | | - key.offset = 0; |
---|
420 | | - key.type = BTRFS_EXTENT_ITEM_KEY; |
---|
421 | | - |
---|
422 | | -next: |
---|
423 | | - ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); |
---|
424 | | - if (ret < 0) |
---|
425 | | - goto out; |
---|
426 | | - |
---|
427 | | - leaf = path->nodes[0]; |
---|
428 | | - nritems = btrfs_header_nritems(leaf); |
---|
429 | | - |
---|
430 | | - while (1) { |
---|
431 | | - if (btrfs_fs_closing(fs_info) > 1) { |
---|
432 | | - last = (u64)-1; |
---|
433 | | - break; |
---|
434 | | - } |
---|
435 | | - |
---|
436 | | - if (path->slots[0] < nritems) { |
---|
437 | | - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
---|
438 | | - } else { |
---|
439 | | - ret = find_next_key(path, 0, &key); |
---|
440 | | - if (ret) |
---|
441 | | - break; |
---|
442 | | - |
---|
443 | | - if (need_resched() || |
---|
444 | | - rwsem_is_contended(&fs_info->commit_root_sem)) { |
---|
445 | | - if (wakeup) |
---|
446 | | - caching_ctl->progress = last; |
---|
447 | | - btrfs_release_path(path); |
---|
448 | | - up_read(&fs_info->commit_root_sem); |
---|
449 | | - mutex_unlock(&caching_ctl->mutex); |
---|
450 | | - cond_resched(); |
---|
451 | | - mutex_lock(&caching_ctl->mutex); |
---|
452 | | - down_read(&fs_info->commit_root_sem); |
---|
453 | | - goto next; |
---|
454 | | - } |
---|
455 | | - |
---|
456 | | - ret = btrfs_next_leaf(extent_root, path); |
---|
457 | | - if (ret < 0) |
---|
458 | | - goto out; |
---|
459 | | - if (ret) |
---|
460 | | - break; |
---|
461 | | - leaf = path->nodes[0]; |
---|
462 | | - nritems = btrfs_header_nritems(leaf); |
---|
463 | | - continue; |
---|
464 | | - } |
---|
465 | | - |
---|
466 | | - if (key.objectid < last) { |
---|
467 | | - key.objectid = last; |
---|
468 | | - key.offset = 0; |
---|
469 | | - key.type = BTRFS_EXTENT_ITEM_KEY; |
---|
470 | | - |
---|
471 | | - if (wakeup) |
---|
472 | | - caching_ctl->progress = last; |
---|
473 | | - btrfs_release_path(path); |
---|
474 | | - goto next; |
---|
475 | | - } |
---|
476 | | - |
---|
477 | | - if (key.objectid < block_group->key.objectid) { |
---|
478 | | - path->slots[0]++; |
---|
479 | | - continue; |
---|
480 | | - } |
---|
481 | | - |
---|
482 | | - if (key.objectid >= block_group->key.objectid + |
---|
483 | | - block_group->key.offset) |
---|
484 | | - break; |
---|
485 | | - |
---|
486 | | - if (key.type == BTRFS_EXTENT_ITEM_KEY || |
---|
487 | | - key.type == BTRFS_METADATA_ITEM_KEY) { |
---|
488 | | - total_found += add_new_free_space(block_group, last, |
---|
489 | | - key.objectid); |
---|
490 | | - if (key.type == BTRFS_METADATA_ITEM_KEY) |
---|
491 | | - last = key.objectid + |
---|
492 | | - fs_info->nodesize; |
---|
493 | | - else |
---|
494 | | - last = key.objectid + key.offset; |
---|
495 | | - |
---|
496 | | - if (total_found > CACHING_CTL_WAKE_UP) { |
---|
497 | | - total_found = 0; |
---|
498 | | - if (wakeup) |
---|
499 | | - wake_up(&caching_ctl->wait); |
---|
500 | | - } |
---|
501 | | - } |
---|
502 | | - path->slots[0]++; |
---|
503 | | - } |
---|
504 | | - ret = 0; |
---|
505 | | - |
---|
506 | | - total_found += add_new_free_space(block_group, last, |
---|
507 | | - block_group->key.objectid + |
---|
508 | | - block_group->key.offset); |
---|
509 | | - caching_ctl->progress = (u64)-1; |
---|
510 | | - |
---|
511 | | -out: |
---|
512 | | - btrfs_free_path(path); |
---|
513 | | - return ret; |
---|
514 | | -} |
---|
515 | | - |
---|
516 | | -static noinline void caching_thread(struct btrfs_work *work) |
---|
517 | | -{ |
---|
518 | | - struct btrfs_block_group_cache *block_group; |
---|
519 | | - struct btrfs_fs_info *fs_info; |
---|
520 | | - struct btrfs_caching_control *caching_ctl; |
---|
521 | | - int ret; |
---|
522 | | - |
---|
523 | | - caching_ctl = container_of(work, struct btrfs_caching_control, work); |
---|
524 | | - block_group = caching_ctl->block_group; |
---|
525 | | - fs_info = block_group->fs_info; |
---|
526 | | - |
---|
527 | | - mutex_lock(&caching_ctl->mutex); |
---|
528 | | - down_read(&fs_info->commit_root_sem); |
---|
529 | | - |
---|
530 | | - if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) |
---|
531 | | - ret = load_free_space_tree(caching_ctl); |
---|
532 | | - else |
---|
533 | | - ret = load_extent_tree_free(caching_ctl); |
---|
534 | | - |
---|
535 | | - spin_lock(&block_group->lock); |
---|
536 | | - block_group->caching_ctl = NULL; |
---|
537 | | - block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED; |
---|
538 | | - spin_unlock(&block_group->lock); |
---|
539 | | - |
---|
540 | | -#ifdef CONFIG_BTRFS_DEBUG |
---|
541 | | - if (btrfs_should_fragment_free_space(block_group)) { |
---|
542 | | - u64 bytes_used; |
---|
543 | | - |
---|
544 | | - spin_lock(&block_group->space_info->lock); |
---|
545 | | - spin_lock(&block_group->lock); |
---|
546 | | - bytes_used = block_group->key.offset - |
---|
547 | | - btrfs_block_group_used(&block_group->item); |
---|
548 | | - block_group->space_info->bytes_used += bytes_used >> 1; |
---|
549 | | - spin_unlock(&block_group->lock); |
---|
550 | | - spin_unlock(&block_group->space_info->lock); |
---|
551 | | - fragment_free_space(block_group); |
---|
552 | | - } |
---|
553 | | -#endif |
---|
554 | | - |
---|
555 | | - caching_ctl->progress = (u64)-1; |
---|
556 | | - |
---|
557 | | - up_read(&fs_info->commit_root_sem); |
---|
558 | | - free_excluded_extents(block_group); |
---|
559 | | - mutex_unlock(&caching_ctl->mutex); |
---|
560 | | - |
---|
561 | | - wake_up(&caching_ctl->wait); |
---|
562 | | - |
---|
563 | | - put_caching_control(caching_ctl); |
---|
564 | | - btrfs_put_block_group(block_group); |
---|
565 | | -} |
---|
566 | | - |
---|
567 | | -static int cache_block_group(struct btrfs_block_group_cache *cache, |
---|
568 | | - int load_cache_only) |
---|
569 | | -{ |
---|
570 | | - DEFINE_WAIT(wait); |
---|
571 | | - struct btrfs_fs_info *fs_info = cache->fs_info; |
---|
572 | | - struct btrfs_caching_control *caching_ctl; |
---|
573 | | - int ret = 0; |
---|
574 | | - |
---|
575 | | - caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); |
---|
576 | | - if (!caching_ctl) |
---|
577 | | - return -ENOMEM; |
---|
578 | | - |
---|
579 | | - INIT_LIST_HEAD(&caching_ctl->list); |
---|
580 | | - mutex_init(&caching_ctl->mutex); |
---|
581 | | - init_waitqueue_head(&caching_ctl->wait); |
---|
582 | | - caching_ctl->block_group = cache; |
---|
583 | | - caching_ctl->progress = cache->key.objectid; |
---|
584 | | - refcount_set(&caching_ctl->count, 1); |
---|
585 | | - btrfs_init_work(&caching_ctl->work, btrfs_cache_helper, |
---|
586 | | - caching_thread, NULL, NULL); |
---|
587 | | - |
---|
588 | | - spin_lock(&cache->lock); |
---|
589 | | - /* |
---|
590 | | - * This should be a rare occasion, but this could happen I think in the |
---|
591 | | - * case where one thread starts to load the space cache info, and then |
---|
592 | | - * some other thread starts a transaction commit which tries to do an |
---|
593 | | - * allocation while the other thread is still loading the space cache |
---|
594 | | - * info. The previous loop should have kept us from choosing this block |
---|
595 | | - * group, but if we've moved to the state where we will wait on caching |
---|
596 | | - * block groups we need to first check if we're doing a fast load here, |
---|
597 | | - * so we can wait for it to finish, otherwise we could end up allocating |
---|
598 | | - * from a block group who's cache gets evicted for one reason or |
---|
599 | | - * another. |
---|
600 | | - */ |
---|
601 | | - while (cache->cached == BTRFS_CACHE_FAST) { |
---|
602 | | - struct btrfs_caching_control *ctl; |
---|
603 | | - |
---|
604 | | - ctl = cache->caching_ctl; |
---|
605 | | - refcount_inc(&ctl->count); |
---|
606 | | - prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE); |
---|
607 | | - spin_unlock(&cache->lock); |
---|
608 | | - |
---|
609 | | - schedule(); |
---|
610 | | - |
---|
611 | | - finish_wait(&ctl->wait, &wait); |
---|
612 | | - put_caching_control(ctl); |
---|
613 | | - spin_lock(&cache->lock); |
---|
614 | | - } |
---|
615 | | - |
---|
616 | | - if (cache->cached != BTRFS_CACHE_NO) { |
---|
617 | | - spin_unlock(&cache->lock); |
---|
618 | | - kfree(caching_ctl); |
---|
619 | | - return 0; |
---|
620 | | - } |
---|
621 | | - WARN_ON(cache->caching_ctl); |
---|
622 | | - cache->caching_ctl = caching_ctl; |
---|
623 | | - cache->cached = BTRFS_CACHE_FAST; |
---|
624 | | - spin_unlock(&cache->lock); |
---|
625 | | - |
---|
626 | | - if (btrfs_test_opt(fs_info, SPACE_CACHE)) { |
---|
627 | | - mutex_lock(&caching_ctl->mutex); |
---|
628 | | - ret = load_free_space_cache(fs_info, cache); |
---|
629 | | - |
---|
630 | | - spin_lock(&cache->lock); |
---|
631 | | - if (ret == 1) { |
---|
632 | | - cache->caching_ctl = NULL; |
---|
633 | | - cache->cached = BTRFS_CACHE_FINISHED; |
---|
634 | | - cache->last_byte_to_unpin = (u64)-1; |
---|
635 | | - caching_ctl->progress = (u64)-1; |
---|
636 | | - } else { |
---|
637 | | - if (load_cache_only) { |
---|
638 | | - cache->caching_ctl = NULL; |
---|
639 | | - cache->cached = BTRFS_CACHE_NO; |
---|
640 | | - } else { |
---|
641 | | - cache->cached = BTRFS_CACHE_STARTED; |
---|
642 | | - cache->has_caching_ctl = 1; |
---|
643 | | - } |
---|
644 | | - } |
---|
645 | | - spin_unlock(&cache->lock); |
---|
646 | | -#ifdef CONFIG_BTRFS_DEBUG |
---|
647 | | - if (ret == 1 && |
---|
648 | | - btrfs_should_fragment_free_space(cache)) { |
---|
649 | | - u64 bytes_used; |
---|
650 | | - |
---|
651 | | - spin_lock(&cache->space_info->lock); |
---|
652 | | - spin_lock(&cache->lock); |
---|
653 | | - bytes_used = cache->key.offset - |
---|
654 | | - btrfs_block_group_used(&cache->item); |
---|
655 | | - cache->space_info->bytes_used += bytes_used >> 1; |
---|
656 | | - spin_unlock(&cache->lock); |
---|
657 | | - spin_unlock(&cache->space_info->lock); |
---|
658 | | - fragment_free_space(cache); |
---|
659 | | - } |
---|
660 | | -#endif |
---|
661 | | - mutex_unlock(&caching_ctl->mutex); |
---|
662 | | - |
---|
663 | | - wake_up(&caching_ctl->wait); |
---|
664 | | - if (ret == 1) { |
---|
665 | | - put_caching_control(caching_ctl); |
---|
666 | | - free_excluded_extents(cache); |
---|
667 | | - return 0; |
---|
668 | | - } |
---|
669 | | - } else { |
---|
670 | | - /* |
---|
671 | | - * We're either using the free space tree or no caching at all. |
---|
672 | | - * Set cached to the appropriate value and wakeup any waiters. |
---|
673 | | - */ |
---|
674 | | - spin_lock(&cache->lock); |
---|
675 | | - if (load_cache_only) { |
---|
676 | | - cache->caching_ctl = NULL; |
---|
677 | | - cache->cached = BTRFS_CACHE_NO; |
---|
678 | | - } else { |
---|
679 | | - cache->cached = BTRFS_CACHE_STARTED; |
---|
680 | | - cache->has_caching_ctl = 1; |
---|
681 | | - } |
---|
682 | | - spin_unlock(&cache->lock); |
---|
683 | | - wake_up(&caching_ctl->wait); |
---|
684 | | - } |
---|
685 | | - |
---|
686 | | - if (load_cache_only) { |
---|
687 | | - put_caching_control(caching_ctl); |
---|
688 | | - return 0; |
---|
689 | | - } |
---|
690 | | - |
---|
691 | | - down_write(&fs_info->commit_root_sem); |
---|
692 | | - refcount_inc(&caching_ctl->count); |
---|
693 | | - list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); |
---|
694 | | - up_write(&fs_info->commit_root_sem); |
---|
695 | | - |
---|
696 | | - btrfs_get_block_group(cache); |
---|
697 | | - |
---|
698 | | - btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); |
---|
699 | | - |
---|
700 | | - return ret; |
---|
701 | | -} |
---|
702 | | - |
---|
703 | | -/* |
---|
704 | | - * return the block group that starts at or after bytenr |
---|
705 | | - */ |
---|
706 | | -static struct btrfs_block_group_cache * |
---|
707 | | -btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) |
---|
708 | | -{ |
---|
709 | | - return block_group_cache_tree_search(info, bytenr, 0); |
---|
710 | | -} |
---|
711 | | - |
---|
712 | | -/* |
---|
713 | | - * return the block group that contains the given bytenr |
---|
714 | | - */ |
---|
715 | | -struct btrfs_block_group_cache *btrfs_lookup_block_group( |
---|
716 | | - struct btrfs_fs_info *info, |
---|
717 | | - u64 bytenr) |
---|
718 | | -{ |
---|
719 | | - return block_group_cache_tree_search(info, bytenr, 1); |
---|
720 | | -} |
---|
721 | | - |
---|
722 | | -static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, |
---|
723 | | - u64 flags) |
---|
724 | | -{ |
---|
725 | | - struct list_head *head = &info->space_info; |
---|
726 | | - struct btrfs_space_info *found; |
---|
727 | | - |
---|
728 | | - flags &= BTRFS_BLOCK_GROUP_TYPE_MASK; |
---|
729 | | - |
---|
730 | | - rcu_read_lock(); |
---|
731 | | - list_for_each_entry_rcu(found, head, list) { |
---|
732 | | - if (found->flags & flags) { |
---|
733 | | - rcu_read_unlock(); |
---|
734 | | - return found; |
---|
735 | | - } |
---|
736 | | - } |
---|
737 | | - rcu_read_unlock(); |
---|
738 | | - return NULL; |
---|
739 | | -} |
---|
740 | | - |
---|
741 | | -static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes, |
---|
742 | | - bool metadata, u64 root_objectid) |
---|
743 | | -{ |
---|
744 | | - struct btrfs_space_info *space_info; |
---|
745 | | - u64 flags; |
---|
746 | | - |
---|
747 | | - if (metadata) { |
---|
748 | | - if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID) |
---|
749 | | - flags = BTRFS_BLOCK_GROUP_SYSTEM; |
---|
750 | | - else |
---|
751 | | - flags = BTRFS_BLOCK_GROUP_METADATA; |
---|
752 | | - } else { |
---|
753 | | - flags = BTRFS_BLOCK_GROUP_DATA; |
---|
754 | | - } |
---|
755 | | - |
---|
756 | | - space_info = __find_space_info(fs_info, flags); |
---|
757 | | - ASSERT(space_info); |
---|
758 | | - percpu_counter_add_batch(&space_info->total_bytes_pinned, num_bytes, |
---|
759 | | - BTRFS_TOTAL_BYTES_PINNED_BATCH); |
---|
760 | | -} |
---|
761 | | - |
---|
762 | | -/* |
---|
763 | | - * after adding space to the filesystem, we need to clear the full flags |
---|
764 | | - * on all the space infos. |
---|
765 | | - */ |
---|
766 | | -void btrfs_clear_space_info_full(struct btrfs_fs_info *info) |
---|
767 | | -{ |
---|
768 | | - struct list_head *head = &info->space_info; |
---|
769 | | - struct btrfs_space_info *found; |
---|
770 | | - |
---|
771 | | - rcu_read_lock(); |
---|
772 | | - list_for_each_entry_rcu(found, head, list) |
---|
773 | | - found->full = 0; |
---|
774 | | - rcu_read_unlock(); |
---|
| 81 | + clear_extent_bits(&fs_info->excluded_extents, start, end, |
---|
| 82 | + EXTENT_UPTODATE); |
---|
775 | 83 | } |
---|
776 | 84 | |
---|
777 | 85 | /* simple helper to search for an existing data extent at a given offset */ |
---|
.. | .. |
---|
1037 | 345 | |
---|
1038 | 346 | /* |
---|
1039 | 347 | * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required, |
---|
1040 | | - * is_data == BTRFS_REF_TYPE_DATA, data type is requried, |
---|
| 348 | + * is_data == BTRFS_REF_TYPE_DATA, data type is requiried, |
---|
1041 | 349 | * is_data == BTRFS_REF_TYPE_ANY, either type is OK. |
---|
1042 | 350 | */ |
---|
1043 | 351 | int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb, |
---|
.. | .. |
---|
1092 | 400 | return BTRFS_REF_TYPE_INVALID; |
---|
1093 | 401 | } |
---|
1094 | 402 | |
---|
1095 | | -static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) |
---|
| 403 | +u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) |
---|
1096 | 404 | { |
---|
1097 | 405 | u32 high_crc = ~(u32)0; |
---|
1098 | 406 | u32 low_crc = ~(u32)0; |
---|
1099 | 407 | __le64 lenum; |
---|
1100 | 408 | |
---|
1101 | 409 | lenum = cpu_to_le64(root_objectid); |
---|
1102 | | - high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); |
---|
| 410 | + high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum)); |
---|
1103 | 411 | lenum = cpu_to_le64(owner); |
---|
1104 | | - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); |
---|
| 412 | + low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); |
---|
1105 | 413 | lenum = cpu_to_le64(offset); |
---|
1106 | | - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); |
---|
| 414 | + low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); |
---|
1107 | 415 | |
---|
1108 | 416 | return ((u64)high_crc << 31) ^ (u64)low_crc; |
---|
1109 | 417 | } |
---|
.. | .. |
---|
1549 | 857 | err = -ENOENT; |
---|
1550 | 858 | goto out; |
---|
1551 | 859 | } else if (WARN_ON(ret)) { |
---|
| 860 | + btrfs_print_leaf(path->nodes[0]); |
---|
| 861 | + btrfs_err(fs_info, |
---|
| 862 | +"extent item not found for insert, bytenr %llu num_bytes %llu parent %llu root_objectid %llu owner %llu offset %llu", |
---|
| 863 | + bytenr, num_bytes, parent, root_objectid, owner, |
---|
| 864 | + offset); |
---|
1552 | 865 | err = -EIO; |
---|
1553 | 866 | goto out; |
---|
1554 | 867 | } |
---|
.. | .. |
---|
1685 | 998 | type = extent_ref_type(parent, owner); |
---|
1686 | 999 | size = btrfs_extent_inline_ref_size(type); |
---|
1687 | 1000 | |
---|
1688 | | - btrfs_extend_item(fs_info, path, size); |
---|
| 1001 | + btrfs_extend_item(path, size); |
---|
1689 | 1002 | |
---|
1690 | 1003 | ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); |
---|
1691 | 1004 | refs = btrfs_extent_refs(leaf, ei); |
---|
.. | .. |
---|
1760 | 1073 | int *last_ref) |
---|
1761 | 1074 | { |
---|
1762 | 1075 | struct extent_buffer *leaf = path->nodes[0]; |
---|
1763 | | - struct btrfs_fs_info *fs_info = leaf->fs_info; |
---|
1764 | 1076 | struct btrfs_extent_item *ei; |
---|
1765 | 1077 | struct btrfs_extent_data_ref *dref = NULL; |
---|
1766 | 1078 | struct btrfs_shared_data_ref *sref = NULL; |
---|
.. | .. |
---|
1815 | 1127 | memmove_extent_buffer(leaf, ptr, ptr + size, |
---|
1816 | 1128 | end - ptr - size); |
---|
1817 | 1129 | item_size -= size; |
---|
1818 | | - btrfs_truncate_item(fs_info, path, item_size, 1); |
---|
| 1130 | + btrfs_truncate_item(path, item_size, 1); |
---|
1819 | 1131 | } |
---|
1820 | 1132 | btrfs_mark_buffer_dirty(leaf); |
---|
1821 | 1133 | } |
---|
.. | .. |
---|
1835 | 1147 | num_bytes, parent, root_objectid, |
---|
1836 | 1148 | owner, offset, 1); |
---|
1837 | 1149 | if (ret == 0) { |
---|
1838 | | - BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); |
---|
| 1150 | + /* |
---|
| 1151 | + * We're adding refs to a tree block we already own, this |
---|
| 1152 | + * should not happen at all. |
---|
| 1153 | + */ |
---|
| 1154 | + if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
---|
| 1155 | + btrfs_crit(trans->fs_info, |
---|
| 1156 | +"adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu", |
---|
| 1157 | + bytenr, num_bytes, root_objectid); |
---|
| 1158 | + if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) { |
---|
| 1159 | + WARN_ON(1); |
---|
| 1160 | + btrfs_crit(trans->fs_info, |
---|
| 1161 | + "path->slots[0]=%d path->nodes[0]:", path->slots[0]); |
---|
| 1162 | + btrfs_print_leaf(path->nodes[0]); |
---|
| 1163 | + } |
---|
| 1164 | + return -EUCLEAN; |
---|
| 1165 | + } |
---|
1839 | 1166 | update_inline_extent_backref(path, iref, refs_to_add, |
---|
1840 | 1167 | extent_op, NULL); |
---|
1841 | 1168 | } else if (ret == -ENOENT) { |
---|
.. | .. |
---|
1843 | 1170 | root_objectid, owner, offset, |
---|
1844 | 1171 | refs_to_add, extent_op); |
---|
1845 | 1172 | ret = 0; |
---|
1846 | | - } |
---|
1847 | | - return ret; |
---|
1848 | | -} |
---|
1849 | | - |
---|
1850 | | -static int insert_extent_backref(struct btrfs_trans_handle *trans, |
---|
1851 | | - struct btrfs_path *path, |
---|
1852 | | - u64 bytenr, u64 parent, u64 root_objectid, |
---|
1853 | | - u64 owner, u64 offset, int refs_to_add) |
---|
1854 | | -{ |
---|
1855 | | - int ret; |
---|
1856 | | - if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
---|
1857 | | - BUG_ON(refs_to_add != 1); |
---|
1858 | | - ret = insert_tree_block_ref(trans, path, bytenr, parent, |
---|
1859 | | - root_objectid); |
---|
1860 | | - } else { |
---|
1861 | | - ret = insert_extent_data_ref(trans, path, bytenr, parent, |
---|
1862 | | - root_objectid, owner, offset, |
---|
1863 | | - refs_to_add); |
---|
1864 | 1173 | } |
---|
1865 | 1174 | return ret; |
---|
1866 | 1175 | } |
---|
.. | .. |
---|
1886 | 1195 | return ret; |
---|
1887 | 1196 | } |
---|
1888 | 1197 | |
---|
1889 | | -#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len)) |
---|
1890 | 1198 | static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len, |
---|
1891 | 1199 | u64 *discarded_bytes) |
---|
1892 | 1200 | { |
---|
.. | .. |
---|
1962 | 1270 | int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, |
---|
1963 | 1271 | u64 num_bytes, u64 *actual_bytes) |
---|
1964 | 1272 | { |
---|
1965 | | - int ret; |
---|
| 1273 | + int ret = 0; |
---|
1966 | 1274 | u64 discarded_bytes = 0; |
---|
| 1275 | + u64 end = bytenr + num_bytes; |
---|
| 1276 | + u64 cur = bytenr; |
---|
1967 | 1277 | struct btrfs_bio *bbio = NULL; |
---|
1968 | 1278 | |
---|
1969 | 1279 | |
---|
.. | .. |
---|
1972 | 1282 | * associated to its stripes that don't go away while we are discarding. |
---|
1973 | 1283 | */ |
---|
1974 | 1284 | btrfs_bio_counter_inc_blocked(fs_info); |
---|
1975 | | - /* Tell the block device(s) that the sectors can be discarded */ |
---|
1976 | | - ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, bytenr, &num_bytes, |
---|
1977 | | - &bbio, 0); |
---|
1978 | | - /* Error condition is -ENOMEM */ |
---|
1979 | | - if (!ret) { |
---|
1980 | | - struct btrfs_bio_stripe *stripe = bbio->stripes; |
---|
| 1285 | + while (cur < end) { |
---|
| 1286 | + struct btrfs_bio_stripe *stripe; |
---|
1981 | 1287 | int i; |
---|
1982 | 1288 | |
---|
| 1289 | + num_bytes = end - cur; |
---|
| 1290 | + /* Tell the block device(s) that the sectors can be discarded */ |
---|
| 1291 | + ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur, |
---|
| 1292 | + &num_bytes, &bbio, 0); |
---|
| 1293 | + /* |
---|
| 1294 | + * Error can be -ENOMEM, -ENOENT (no such chunk mapping) or |
---|
| 1295 | + * -EOPNOTSUPP. For any such error, @num_bytes is not updated, |
---|
| 1296 | + * thus we can't continue anyway. |
---|
| 1297 | + */ |
---|
| 1298 | + if (ret < 0) |
---|
| 1299 | + goto out; |
---|
1983 | 1300 | |
---|
| 1301 | + stripe = bbio->stripes; |
---|
1984 | 1302 | for (i = 0; i < bbio->num_stripes; i++, stripe++) { |
---|
1985 | 1303 | u64 bytes; |
---|
1986 | 1304 | struct request_queue *req_q; |
---|
.. | .. |
---|
2001 | 1319 | stripe->physical, |
---|
2002 | 1320 | stripe->length, |
---|
2003 | 1321 | &bytes); |
---|
2004 | | - if (!ret) |
---|
| 1322 | + if (!ret) { |
---|
2005 | 1323 | discarded_bytes += bytes; |
---|
2006 | | - else if (ret != -EOPNOTSUPP) |
---|
2007 | | - break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */ |
---|
| 1324 | + } else if (ret != -EOPNOTSUPP) { |
---|
| 1325 | + /* |
---|
| 1326 | + * Logic errors or -ENOMEM, or -EIO, but |
---|
| 1327 | + * unlikely to happen. |
---|
| 1328 | + * |
---|
| 1329 | + * And since there are two loops, explicitly |
---|
| 1330 | + * go to out to avoid confusion. |
---|
| 1331 | + */ |
---|
| 1332 | + btrfs_put_bbio(bbio); |
---|
| 1333 | + goto out; |
---|
| 1334 | + } |
---|
2008 | 1335 | |
---|
2009 | 1336 | /* |
---|
2010 | 1337 | * Just in case we get back EOPNOTSUPP for some reason, |
---|
.. | .. |
---|
2014 | 1341 | ret = 0; |
---|
2015 | 1342 | } |
---|
2016 | 1343 | btrfs_put_bbio(bbio); |
---|
| 1344 | + cur += num_bytes; |
---|
2017 | 1345 | } |
---|
| 1346 | +out: |
---|
2018 | 1347 | btrfs_bio_counter_dec(fs_info); |
---|
2019 | 1348 | |
---|
2020 | 1349 | if (actual_bytes) |
---|
.. | .. |
---|
2028 | 1357 | |
---|
2029 | 1358 | /* Can return -ENOMEM */ |
---|
2030 | 1359 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
---|
2031 | | - struct btrfs_root *root, |
---|
2032 | | - u64 bytenr, u64 num_bytes, u64 parent, |
---|
2033 | | - u64 root_objectid, u64 owner, u64 offset) |
---|
| 1360 | + struct btrfs_ref *generic_ref) |
---|
2034 | 1361 | { |
---|
2035 | | - struct btrfs_fs_info *fs_info = root->fs_info; |
---|
2036 | | - int old_ref_mod, new_ref_mod; |
---|
| 1362 | + struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
2037 | 1363 | int ret; |
---|
2038 | 1364 | |
---|
2039 | | - BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && |
---|
2040 | | - root_objectid == BTRFS_TREE_LOG_OBJECTID); |
---|
| 1365 | + ASSERT(generic_ref->type != BTRFS_REF_NOT_SET && |
---|
| 1366 | + generic_ref->action); |
---|
| 1367 | + BUG_ON(generic_ref->type == BTRFS_REF_METADATA && |
---|
| 1368 | + generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID); |
---|
2041 | 1369 | |
---|
2042 | | - btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, root_objectid, |
---|
2043 | | - owner, offset, BTRFS_ADD_DELAYED_REF); |
---|
| 1370 | + if (generic_ref->type == BTRFS_REF_METADATA) |
---|
| 1371 | + ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL); |
---|
| 1372 | + else |
---|
| 1373 | + ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0); |
---|
2044 | 1374 | |
---|
2045 | | - if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
---|
2046 | | - ret = btrfs_add_delayed_tree_ref(trans, bytenr, |
---|
2047 | | - num_bytes, parent, |
---|
2048 | | - root_objectid, (int)owner, |
---|
2049 | | - BTRFS_ADD_DELAYED_REF, NULL, |
---|
2050 | | - &old_ref_mod, &new_ref_mod); |
---|
2051 | | - } else { |
---|
2052 | | - ret = btrfs_add_delayed_data_ref(trans, bytenr, |
---|
2053 | | - num_bytes, parent, |
---|
2054 | | - root_objectid, owner, offset, |
---|
2055 | | - 0, BTRFS_ADD_DELAYED_REF, |
---|
2056 | | - &old_ref_mod, &new_ref_mod); |
---|
2057 | | - } |
---|
2058 | | - |
---|
2059 | | - if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) { |
---|
2060 | | - bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID; |
---|
2061 | | - |
---|
2062 | | - add_pinned_bytes(fs_info, -num_bytes, metadata, root_objectid); |
---|
2063 | | - } |
---|
| 1375 | + btrfs_ref_tree_mod(fs_info, generic_ref); |
---|
2064 | 1376 | |
---|
2065 | 1377 | return ret; |
---|
2066 | 1378 | } |
---|
2067 | 1379 | |
---|
2068 | 1380 | /* |
---|
2069 | 1381 | * __btrfs_inc_extent_ref - insert backreference for a given extent |
---|
| 1382 | + * |
---|
| 1383 | + * The counterpart is in __btrfs_free_extent(), with examples and more details |
---|
| 1384 | + * how it works. |
---|
2070 | 1385 | * |
---|
2071 | 1386 | * @trans: Handle of transaction |
---|
2072 | 1387 | * |
---|
.. | .. |
---|
2118 | 1433 | if (!path) |
---|
2119 | 1434 | return -ENOMEM; |
---|
2120 | 1435 | |
---|
2121 | | - path->reada = READA_FORWARD; |
---|
2122 | 1436 | path->leave_spinning = 1; |
---|
2123 | 1437 | /* this will setup the path even if it fails to insert the back ref */ |
---|
2124 | 1438 | ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes, |
---|
.. | .. |
---|
2143 | 1457 | btrfs_mark_buffer_dirty(leaf); |
---|
2144 | 1458 | btrfs_release_path(path); |
---|
2145 | 1459 | |
---|
2146 | | - path->reada = READA_FORWARD; |
---|
2147 | 1460 | path->leave_spinning = 1; |
---|
2148 | 1461 | /* now insert the actual backref */ |
---|
2149 | | - ret = insert_extent_backref(trans, path, bytenr, parent, root_objectid, |
---|
2150 | | - owner, offset, refs_to_add); |
---|
| 1462 | + if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
---|
| 1463 | + BUG_ON(refs_to_add != 1); |
---|
| 1464 | + ret = insert_tree_block_ref(trans, path, bytenr, parent, |
---|
| 1465 | + root_objectid); |
---|
| 1466 | + } else { |
---|
| 1467 | + ret = insert_extent_data_ref(trans, path, bytenr, parent, |
---|
| 1468 | + root_objectid, owner, offset, |
---|
| 1469 | + refs_to_add); |
---|
| 1470 | + } |
---|
2151 | 1471 | if (ret) |
---|
2152 | 1472 | btrfs_abort_transaction(trans, ret); |
---|
2153 | 1473 | out: |
---|
.. | .. |
---|
2232 | 1552 | int err = 0; |
---|
2233 | 1553 | int metadata = !extent_op->is_data; |
---|
2234 | 1554 | |
---|
2235 | | - if (trans->aborted) |
---|
| 1555 | + if (TRANS_ABORTED(trans)) |
---|
2236 | 1556 | return 0; |
---|
2237 | 1557 | |
---|
2238 | 1558 | if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) |
---|
.. | .. |
---|
2253 | 1573 | } |
---|
2254 | 1574 | |
---|
2255 | 1575 | again: |
---|
2256 | | - path->reada = READA_FORWARD; |
---|
2257 | 1576 | path->leave_spinning = 1; |
---|
2258 | 1577 | ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1); |
---|
2259 | 1578 | if (ret < 0) { |
---|
.. | .. |
---|
2352 | 1671 | { |
---|
2353 | 1672 | int ret = 0; |
---|
2354 | 1673 | |
---|
2355 | | - if (trans->aborted) { |
---|
| 1674 | + if (TRANS_ABORTED(trans)) { |
---|
2356 | 1675 | if (insert_reserved) |
---|
2357 | | - btrfs_pin_extent(trans->fs_info, node->bytenr, |
---|
2358 | | - node->num_bytes, 1); |
---|
| 1676 | + btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); |
---|
2359 | 1677 | return 0; |
---|
2360 | 1678 | } |
---|
2361 | 1679 | |
---|
.. | .. |
---|
2370 | 1688 | else |
---|
2371 | 1689 | BUG(); |
---|
2372 | 1690 | if (ret && insert_reserved) |
---|
2373 | | - btrfs_pin_extent(trans->fs_info, node->bytenr, |
---|
2374 | | - node->num_bytes, 1); |
---|
| 1691 | + btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); |
---|
| 1692 | + if (ret < 0) |
---|
| 1693 | + btrfs_err(trans->fs_info, |
---|
| 1694 | +"failed to run delayed ref for logical %llu num_bytes %llu type %u action %u ref_mod %d: %d", |
---|
| 1695 | + node->bytenr, node->num_bytes, node->type, |
---|
| 1696 | + node->action, node->ref_mod, ret); |
---|
2375 | 1697 | return ret; |
---|
2376 | 1698 | } |
---|
2377 | 1699 | |
---|
.. | .. |
---|
2380 | 1702 | { |
---|
2381 | 1703 | struct btrfs_delayed_ref_node *ref; |
---|
2382 | 1704 | |
---|
2383 | | - if (RB_EMPTY_ROOT(&head->ref_tree)) |
---|
| 1705 | + if (RB_EMPTY_ROOT(&head->ref_tree.rb_root)) |
---|
2384 | 1706 | return NULL; |
---|
2385 | 1707 | |
---|
2386 | 1708 | /* |
---|
.. | .. |
---|
2393 | 1715 | return list_first_entry(&head->ref_add_list, |
---|
2394 | 1716 | struct btrfs_delayed_ref_node, add_list); |
---|
2395 | 1717 | |
---|
2396 | | - ref = rb_entry(rb_first(&head->ref_tree), |
---|
| 1718 | + ref = rb_entry(rb_first_cached(&head->ref_tree), |
---|
2397 | 1719 | struct btrfs_delayed_ref_node, ref_node); |
---|
2398 | 1720 | ASSERT(list_empty(&ref->add_list)); |
---|
2399 | 1721 | return ref; |
---|
.. | .. |
---|
2409 | 1731 | btrfs_delayed_ref_unlock(head); |
---|
2410 | 1732 | } |
---|
2411 | 1733 | |
---|
2412 | | -static int cleanup_extent_op(struct btrfs_trans_handle *trans, |
---|
2413 | | - struct btrfs_delayed_ref_head *head) |
---|
| 1734 | +static struct btrfs_delayed_extent_op *cleanup_extent_op( |
---|
| 1735 | + struct btrfs_delayed_ref_head *head) |
---|
2414 | 1736 | { |
---|
2415 | 1737 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; |
---|
| 1738 | + |
---|
| 1739 | + if (!extent_op) |
---|
| 1740 | + return NULL; |
---|
| 1741 | + |
---|
| 1742 | + if (head->must_insert_reserved) { |
---|
| 1743 | + head->extent_op = NULL; |
---|
| 1744 | + btrfs_free_delayed_extent_op(extent_op); |
---|
| 1745 | + return NULL; |
---|
| 1746 | + } |
---|
| 1747 | + return extent_op; |
---|
| 1748 | +} |
---|
| 1749 | + |
---|
| 1750 | +static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans, |
---|
| 1751 | + struct btrfs_delayed_ref_head *head) |
---|
| 1752 | +{ |
---|
| 1753 | + struct btrfs_delayed_extent_op *extent_op; |
---|
2416 | 1754 | int ret; |
---|
2417 | 1755 | |
---|
| 1756 | + extent_op = cleanup_extent_op(head); |
---|
2418 | 1757 | if (!extent_op) |
---|
2419 | 1758 | return 0; |
---|
2420 | 1759 | head->extent_op = NULL; |
---|
2421 | | - if (head->must_insert_reserved) { |
---|
2422 | | - btrfs_free_delayed_extent_op(extent_op); |
---|
2423 | | - return 0; |
---|
2424 | | - } |
---|
2425 | 1760 | spin_unlock(&head->lock); |
---|
2426 | 1761 | ret = run_delayed_extent_op(trans, head, extent_op); |
---|
2427 | 1762 | btrfs_free_delayed_extent_op(extent_op); |
---|
2428 | 1763 | return ret ? ret : 1; |
---|
| 1764 | +} |
---|
| 1765 | + |
---|
| 1766 | +void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, |
---|
| 1767 | + struct btrfs_delayed_ref_root *delayed_refs, |
---|
| 1768 | + struct btrfs_delayed_ref_head *head) |
---|
| 1769 | +{ |
---|
| 1770 | + int nr_items = 1; /* Dropping this ref head update. */ |
---|
| 1771 | + |
---|
| 1772 | + /* |
---|
| 1773 | + * We had csum deletions accounted for in our delayed refs rsv, we need |
---|
| 1774 | + * to drop the csum leaves for this update from our delayed_refs_rsv. |
---|
| 1775 | + */ |
---|
| 1776 | + if (head->total_ref_mod < 0 && head->is_data) { |
---|
| 1777 | + spin_lock(&delayed_refs->lock); |
---|
| 1778 | + delayed_refs->pending_csums -= head->num_bytes; |
---|
| 1779 | + spin_unlock(&delayed_refs->lock); |
---|
| 1780 | + nr_items += btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes); |
---|
| 1781 | + } |
---|
| 1782 | + |
---|
| 1783 | + /* |
---|
| 1784 | + * We were dropping refs, or had a new ref and dropped it, and thus must |
---|
| 1785 | + * adjust down our total_bytes_pinned, the space may or may not have |
---|
| 1786 | + * been pinned and so is accounted for properly in the pinned space by |
---|
| 1787 | + * now. |
---|
| 1788 | + */ |
---|
| 1789 | + if (head->total_ref_mod < 0 || |
---|
| 1790 | + (head->total_ref_mod == 0 && head->must_insert_reserved)) { |
---|
| 1791 | + u64 flags = btrfs_ref_head_to_space_flags(head); |
---|
| 1792 | + |
---|
| 1793 | + btrfs_mod_total_bytes_pinned(fs_info, flags, -head->num_bytes); |
---|
| 1794 | + } |
---|
| 1795 | + |
---|
| 1796 | + btrfs_delayed_refs_rsv_release(fs_info, nr_items); |
---|
2429 | 1797 | } |
---|
2430 | 1798 | |
---|
2431 | 1799 | static int cleanup_ref_head(struct btrfs_trans_handle *trans, |
---|
.. | .. |
---|
2438 | 1806 | |
---|
2439 | 1807 | delayed_refs = &trans->transaction->delayed_refs; |
---|
2440 | 1808 | |
---|
2441 | | - ret = cleanup_extent_op(trans, head); |
---|
| 1809 | + ret = run_and_cleanup_extent_op(trans, head); |
---|
2442 | 1810 | if (ret < 0) { |
---|
2443 | 1811 | unselect_delayed_ref_head(delayed_refs, head); |
---|
2444 | 1812 | btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); |
---|
.. | .. |
---|
2454 | 1822 | spin_unlock(&head->lock); |
---|
2455 | 1823 | spin_lock(&delayed_refs->lock); |
---|
2456 | 1824 | spin_lock(&head->lock); |
---|
2457 | | - if (!RB_EMPTY_ROOT(&head->ref_tree) || head->extent_op) { |
---|
| 1825 | + if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) { |
---|
2458 | 1826 | spin_unlock(&head->lock); |
---|
2459 | 1827 | spin_unlock(&delayed_refs->lock); |
---|
2460 | 1828 | return 1; |
---|
2461 | 1829 | } |
---|
2462 | | - delayed_refs->num_heads--; |
---|
2463 | | - rb_erase(&head->href_node, &delayed_refs->href_root); |
---|
2464 | | - RB_CLEAR_NODE(&head->href_node); |
---|
| 1830 | + btrfs_delete_ref_head(delayed_refs, head); |
---|
2465 | 1831 | spin_unlock(&head->lock); |
---|
2466 | 1832 | spin_unlock(&delayed_refs->lock); |
---|
2467 | | - atomic_dec(&delayed_refs->num_entries); |
---|
2468 | | - |
---|
2469 | | - trace_run_delayed_ref_head(fs_info, head, 0); |
---|
2470 | | - |
---|
2471 | | - if (head->total_ref_mod < 0) { |
---|
2472 | | - struct btrfs_space_info *space_info; |
---|
2473 | | - u64 flags; |
---|
2474 | | - |
---|
2475 | | - if (head->is_data) |
---|
2476 | | - flags = BTRFS_BLOCK_GROUP_DATA; |
---|
2477 | | - else if (head->is_system) |
---|
2478 | | - flags = BTRFS_BLOCK_GROUP_SYSTEM; |
---|
2479 | | - else |
---|
2480 | | - flags = BTRFS_BLOCK_GROUP_METADATA; |
---|
2481 | | - space_info = __find_space_info(fs_info, flags); |
---|
2482 | | - ASSERT(space_info); |
---|
2483 | | - percpu_counter_add_batch(&space_info->total_bytes_pinned, |
---|
2484 | | - -head->num_bytes, |
---|
2485 | | - BTRFS_TOTAL_BYTES_PINNED_BATCH); |
---|
2486 | | - |
---|
2487 | | - if (head->is_data) { |
---|
2488 | | - spin_lock(&delayed_refs->lock); |
---|
2489 | | - delayed_refs->pending_csums -= head->num_bytes; |
---|
2490 | | - spin_unlock(&delayed_refs->lock); |
---|
2491 | | - } |
---|
2492 | | - } |
---|
2493 | 1833 | |
---|
2494 | 1834 | if (head->must_insert_reserved) { |
---|
2495 | | - btrfs_pin_extent(fs_info, head->bytenr, |
---|
2496 | | - head->num_bytes, 1); |
---|
| 1835 | + btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1); |
---|
2497 | 1836 | if (head->is_data) { |
---|
2498 | 1837 | ret = btrfs_del_csums(trans, fs_info->csum_root, |
---|
2499 | 1838 | head->bytenr, head->num_bytes); |
---|
2500 | 1839 | } |
---|
2501 | 1840 | } |
---|
2502 | 1841 | |
---|
2503 | | - /* Also free its reserved qgroup space */ |
---|
2504 | | - btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root, |
---|
2505 | | - head->qgroup_reserved); |
---|
| 1842 | + btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head); |
---|
| 1843 | + |
---|
| 1844 | + trace_run_delayed_ref_head(fs_info, head, 0); |
---|
2506 | 1845 | btrfs_delayed_ref_unlock(head); |
---|
2507 | 1846 | btrfs_put_delayed_ref_head(head); |
---|
2508 | 1847 | return ret; |
---|
2509 | 1848 | } |
---|
2510 | 1849 | |
---|
2511 | | -/* |
---|
2512 | | - * Returns 0 on success or if called with an already aborted transaction. |
---|
2513 | | - * Returns -ENOMEM or -EIO on failure and will abort the transaction. |
---|
2514 | | - */ |
---|
2515 | | -static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
---|
2516 | | - unsigned long nr) |
---|
| 1850 | +static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head( |
---|
| 1851 | + struct btrfs_trans_handle *trans) |
---|
| 1852 | +{ |
---|
| 1853 | + struct btrfs_delayed_ref_root *delayed_refs = |
---|
| 1854 | + &trans->transaction->delayed_refs; |
---|
| 1855 | + struct btrfs_delayed_ref_head *head = NULL; |
---|
| 1856 | + int ret; |
---|
| 1857 | + |
---|
| 1858 | + spin_lock(&delayed_refs->lock); |
---|
| 1859 | + head = btrfs_select_ref_head(delayed_refs); |
---|
| 1860 | + if (!head) { |
---|
| 1861 | + spin_unlock(&delayed_refs->lock); |
---|
| 1862 | + return head; |
---|
| 1863 | + } |
---|
| 1864 | + |
---|
| 1865 | + /* |
---|
| 1866 | + * Grab the lock that says we are going to process all the refs for |
---|
| 1867 | + * this head |
---|
| 1868 | + */ |
---|
| 1869 | + ret = btrfs_delayed_ref_lock(delayed_refs, head); |
---|
| 1870 | + spin_unlock(&delayed_refs->lock); |
---|
| 1871 | + |
---|
| 1872 | + /* |
---|
| 1873 | + * We may have dropped the spin lock to get the head mutex lock, and |
---|
| 1874 | + * that might have given someone else time to free the head. If that's |
---|
| 1875 | + * true, it has been removed from our list and we can move on. |
---|
| 1876 | + */ |
---|
| 1877 | + if (ret == -EAGAIN) |
---|
| 1878 | + head = ERR_PTR(-EAGAIN); |
---|
| 1879 | + |
---|
| 1880 | + return head; |
---|
| 1881 | +} |
---|
| 1882 | + |
---|
| 1883 | +static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans, |
---|
| 1884 | + struct btrfs_delayed_ref_head *locked_ref, |
---|
| 1885 | + unsigned long *run_refs) |
---|
2517 | 1886 | { |
---|
2518 | 1887 | struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
2519 | 1888 | struct btrfs_delayed_ref_root *delayed_refs; |
---|
2520 | | - struct btrfs_delayed_ref_node *ref; |
---|
2521 | | - struct btrfs_delayed_ref_head *locked_ref = NULL; |
---|
2522 | 1889 | struct btrfs_delayed_extent_op *extent_op; |
---|
2523 | | - ktime_t start = ktime_get(); |
---|
2524 | | - int ret; |
---|
2525 | | - unsigned long count = 0; |
---|
2526 | | - unsigned long actual_count = 0; |
---|
| 1890 | + struct btrfs_delayed_ref_node *ref; |
---|
2527 | 1891 | int must_insert_reserved = 0; |
---|
| 1892 | + int ret; |
---|
2528 | 1893 | |
---|
2529 | 1894 | delayed_refs = &trans->transaction->delayed_refs; |
---|
2530 | | - while (1) { |
---|
2531 | | - if (!locked_ref) { |
---|
2532 | | - if (count >= nr) |
---|
2533 | | - break; |
---|
2534 | 1895 | |
---|
2535 | | - spin_lock(&delayed_refs->lock); |
---|
2536 | | - locked_ref = btrfs_select_ref_head(trans); |
---|
2537 | | - if (!locked_ref) { |
---|
2538 | | - spin_unlock(&delayed_refs->lock); |
---|
2539 | | - break; |
---|
2540 | | - } |
---|
| 1896 | + lockdep_assert_held(&locked_ref->mutex); |
---|
| 1897 | + lockdep_assert_held(&locked_ref->lock); |
---|
2541 | 1898 | |
---|
2542 | | - /* grab the lock that says we are going to process |
---|
2543 | | - * all the refs for this head */ |
---|
2544 | | - ret = btrfs_delayed_ref_lock(trans, locked_ref); |
---|
2545 | | - spin_unlock(&delayed_refs->lock); |
---|
2546 | | - /* |
---|
2547 | | - * we may have dropped the spin lock to get the head |
---|
2548 | | - * mutex lock, and that might have given someone else |
---|
2549 | | - * time to free the head. If that's true, it has been |
---|
2550 | | - * removed from our list and we can move on. |
---|
2551 | | - */ |
---|
2552 | | - if (ret == -EAGAIN) { |
---|
2553 | | - locked_ref = NULL; |
---|
2554 | | - count++; |
---|
2555 | | - continue; |
---|
2556 | | - } |
---|
2557 | | - } |
---|
2558 | | - |
---|
2559 | | - /* |
---|
2560 | | - * We need to try and merge add/drops of the same ref since we |
---|
2561 | | - * can run into issues with relocate dropping the implicit ref |
---|
2562 | | - * and then it being added back again before the drop can |
---|
2563 | | - * finish. If we merged anything we need to re-loop so we can |
---|
2564 | | - * get a good ref. |
---|
2565 | | - * Or we can get node references of the same type that weren't |
---|
2566 | | - * merged when created due to bumps in the tree mod seq, and |
---|
2567 | | - * we need to merge them to prevent adding an inline extent |
---|
2568 | | - * backref before dropping it (triggering a BUG_ON at |
---|
2569 | | - * insert_inline_extent_backref()). |
---|
2570 | | - */ |
---|
2571 | | - spin_lock(&locked_ref->lock); |
---|
2572 | | - btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref); |
---|
2573 | | - |
---|
2574 | | - ref = select_delayed_ref(locked_ref); |
---|
2575 | | - |
---|
2576 | | - if (ref && ref->seq && |
---|
| 1899 | + while ((ref = select_delayed_ref(locked_ref))) { |
---|
| 1900 | + if (ref->seq && |
---|
2577 | 1901 | btrfs_check_delayed_seq(fs_info, ref->seq)) { |
---|
2578 | 1902 | spin_unlock(&locked_ref->lock); |
---|
2579 | 1903 | unselect_delayed_ref_head(delayed_refs, locked_ref); |
---|
2580 | | - locked_ref = NULL; |
---|
2581 | | - cond_resched(); |
---|
2582 | | - count++; |
---|
2583 | | - continue; |
---|
| 1904 | + return -EAGAIN; |
---|
2584 | 1905 | } |
---|
2585 | 1906 | |
---|
2586 | | - /* |
---|
2587 | | - * We're done processing refs in this ref_head, clean everything |
---|
2588 | | - * up and move on to the next ref_head. |
---|
2589 | | - */ |
---|
2590 | | - if (!ref) { |
---|
2591 | | - ret = cleanup_ref_head(trans, locked_ref); |
---|
2592 | | - if (ret > 0 ) { |
---|
2593 | | - /* We dropped our lock, we need to loop. */ |
---|
2594 | | - ret = 0; |
---|
2595 | | - continue; |
---|
2596 | | - } else if (ret) { |
---|
2597 | | - return ret; |
---|
2598 | | - } |
---|
2599 | | - locked_ref = NULL; |
---|
2600 | | - count++; |
---|
2601 | | - continue; |
---|
2602 | | - } |
---|
2603 | | - |
---|
2604 | | - actual_count++; |
---|
| 1907 | + (*run_refs)++; |
---|
2605 | 1908 | ref->in_tree = 0; |
---|
2606 | | - rb_erase(&ref->ref_node, &locked_ref->ref_tree); |
---|
| 1909 | + rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree); |
---|
2607 | 1910 | RB_CLEAR_NODE(&ref->ref_node); |
---|
2608 | 1911 | if (!list_empty(&ref->add_list)) |
---|
2609 | 1912 | list_del(&ref->add_list); |
---|
.. | .. |
---|
2625 | 1928 | atomic_dec(&delayed_refs->num_entries); |
---|
2626 | 1929 | |
---|
2627 | 1930 | /* |
---|
2628 | | - * Record the must-insert_reserved flag before we drop the spin |
---|
2629 | | - * lock. |
---|
| 1931 | + * Record the must_insert_reserved flag before we drop the |
---|
| 1932 | + * spin lock. |
---|
2630 | 1933 | */ |
---|
2631 | 1934 | must_insert_reserved = locked_ref->must_insert_reserved; |
---|
2632 | 1935 | locked_ref->must_insert_reserved = 0; |
---|
.. | .. |
---|
2642 | 1945 | if (ret) { |
---|
2643 | 1946 | unselect_delayed_ref_head(delayed_refs, locked_ref); |
---|
2644 | 1947 | btrfs_put_delayed_ref(ref); |
---|
2645 | | - btrfs_debug(fs_info, "run_one_delayed_ref returned %d", |
---|
2646 | | - ret); |
---|
2647 | 1948 | return ret; |
---|
2648 | 1949 | } |
---|
2649 | 1950 | |
---|
2650 | 1951 | btrfs_put_delayed_ref(ref); |
---|
2651 | | - count++; |
---|
2652 | 1952 | cond_resched(); |
---|
| 1953 | + |
---|
| 1954 | + spin_lock(&locked_ref->lock); |
---|
| 1955 | + btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref); |
---|
2653 | 1956 | } |
---|
| 1957 | + |
---|
| 1958 | + return 0; |
---|
| 1959 | +} |
---|
| 1960 | + |
---|
| 1961 | +/* |
---|
| 1962 | + * Returns 0 on success or if called with an already aborted transaction. |
---|
| 1963 | + * Returns -ENOMEM or -EIO on failure and will abort the transaction. |
---|
| 1964 | + */ |
---|
| 1965 | +static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
---|
| 1966 | + unsigned long nr) |
---|
| 1967 | +{ |
---|
| 1968 | + struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
| 1969 | + struct btrfs_delayed_ref_root *delayed_refs; |
---|
| 1970 | + struct btrfs_delayed_ref_head *locked_ref = NULL; |
---|
| 1971 | + ktime_t start = ktime_get(); |
---|
| 1972 | + int ret; |
---|
| 1973 | + unsigned long count = 0; |
---|
| 1974 | + unsigned long actual_count = 0; |
---|
| 1975 | + |
---|
| 1976 | + delayed_refs = &trans->transaction->delayed_refs; |
---|
| 1977 | + do { |
---|
| 1978 | + if (!locked_ref) { |
---|
| 1979 | + locked_ref = btrfs_obtain_ref_head(trans); |
---|
| 1980 | + if (IS_ERR_OR_NULL(locked_ref)) { |
---|
| 1981 | + if (PTR_ERR(locked_ref) == -EAGAIN) { |
---|
| 1982 | + continue; |
---|
| 1983 | + } else { |
---|
| 1984 | + break; |
---|
| 1985 | + } |
---|
| 1986 | + } |
---|
| 1987 | + count++; |
---|
| 1988 | + } |
---|
| 1989 | + /* |
---|
| 1990 | + * We need to try and merge add/drops of the same ref since we |
---|
| 1991 | + * can run into issues with relocate dropping the implicit ref |
---|
| 1992 | + * and then it being added back again before the drop can |
---|
| 1993 | + * finish. If we merged anything we need to re-loop so we can |
---|
| 1994 | + * get a good ref. |
---|
| 1995 | + * Or we can get node references of the same type that weren't |
---|
| 1996 | + * merged when created due to bumps in the tree mod seq, and |
---|
| 1997 | + * we need to merge them to prevent adding an inline extent |
---|
| 1998 | + * backref before dropping it (triggering a BUG_ON at |
---|
| 1999 | + * insert_inline_extent_backref()). |
---|
| 2000 | + */ |
---|
| 2001 | + spin_lock(&locked_ref->lock); |
---|
| 2002 | + btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref); |
---|
| 2003 | + |
---|
| 2004 | + ret = btrfs_run_delayed_refs_for_head(trans, locked_ref, |
---|
| 2005 | + &actual_count); |
---|
| 2006 | + if (ret < 0 && ret != -EAGAIN) { |
---|
| 2007 | + /* |
---|
| 2008 | + * Error, btrfs_run_delayed_refs_for_head already |
---|
| 2009 | + * unlocked everything so just bail out |
---|
| 2010 | + */ |
---|
| 2011 | + return ret; |
---|
| 2012 | + } else if (!ret) { |
---|
| 2013 | + /* |
---|
| 2014 | + * Success, perform the usual cleanup of a processed |
---|
| 2015 | + * head |
---|
| 2016 | + */ |
---|
| 2017 | + ret = cleanup_ref_head(trans, locked_ref); |
---|
| 2018 | + if (ret > 0 ) { |
---|
| 2019 | + /* We dropped our lock, we need to loop. */ |
---|
| 2020 | + ret = 0; |
---|
| 2021 | + continue; |
---|
| 2022 | + } else if (ret) { |
---|
| 2023 | + return ret; |
---|
| 2024 | + } |
---|
| 2025 | + } |
---|
| 2026 | + |
---|
| 2027 | + /* |
---|
| 2028 | + * Either success case or btrfs_run_delayed_refs_for_head |
---|
| 2029 | + * returned -EAGAIN, meaning we need to select another head |
---|
| 2030 | + */ |
---|
| 2031 | + |
---|
| 2032 | + locked_ref = NULL; |
---|
| 2033 | + cond_resched(); |
---|
| 2034 | + } while ((nr != -1 && count < nr) || locked_ref); |
---|
2654 | 2035 | |
---|
2655 | 2036 | /* |
---|
2656 | 2037 | * We don't want to include ref heads since we can have empty ref heads |
---|
.. | .. |
---|
2716 | 2097 | } |
---|
2717 | 2098 | #endif |
---|
2718 | 2099 | |
---|
2719 | | -static inline u64 heads_to_leaves(struct btrfs_fs_info *fs_info, u64 heads) |
---|
2720 | | -{ |
---|
2721 | | - u64 num_bytes; |
---|
2722 | | - |
---|
2723 | | - num_bytes = heads * (sizeof(struct btrfs_extent_item) + |
---|
2724 | | - sizeof(struct btrfs_extent_inline_ref)); |
---|
2725 | | - if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA)) |
---|
2726 | | - num_bytes += heads * sizeof(struct btrfs_tree_block_info); |
---|
2727 | | - |
---|
2728 | | - /* |
---|
2729 | | - * We don't ever fill up leaves all the way so multiply by 2 just to be |
---|
2730 | | - * closer to what we're really going to want to use. |
---|
2731 | | - */ |
---|
2732 | | - return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(fs_info)); |
---|
2733 | | -} |
---|
2734 | | - |
---|
2735 | 2100 | /* |
---|
2736 | 2101 | * Takes the number of bytes to be csumm'ed and figures out how many leaves it |
---|
2737 | 2102 | * would require to store the csums for that many bytes. |
---|
.. | .. |
---|
2749 | 2114 | num_csums += num_csums_per_leaf - 1; |
---|
2750 | 2115 | num_csums = div64_u64(num_csums, num_csums_per_leaf); |
---|
2751 | 2116 | return num_csums; |
---|
2752 | | -} |
---|
2753 | | - |
---|
2754 | | -int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, |
---|
2755 | | - struct btrfs_fs_info *fs_info) |
---|
2756 | | -{ |
---|
2757 | | - struct btrfs_block_rsv *global_rsv; |
---|
2758 | | - u64 num_heads = trans->transaction->delayed_refs.num_heads_ready; |
---|
2759 | | - u64 csum_bytes = trans->transaction->delayed_refs.pending_csums; |
---|
2760 | | - unsigned int num_dirty_bgs = trans->transaction->num_dirty_bgs; |
---|
2761 | | - u64 num_bytes, num_dirty_bgs_bytes; |
---|
2762 | | - int ret = 0; |
---|
2763 | | - |
---|
2764 | | - num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); |
---|
2765 | | - num_heads = heads_to_leaves(fs_info, num_heads); |
---|
2766 | | - if (num_heads > 1) |
---|
2767 | | - num_bytes += (num_heads - 1) * fs_info->nodesize; |
---|
2768 | | - num_bytes <<= 1; |
---|
2769 | | - num_bytes += btrfs_csum_bytes_to_leaves(fs_info, csum_bytes) * |
---|
2770 | | - fs_info->nodesize; |
---|
2771 | | - num_dirty_bgs_bytes = btrfs_calc_trans_metadata_size(fs_info, |
---|
2772 | | - num_dirty_bgs); |
---|
2773 | | - global_rsv = &fs_info->global_block_rsv; |
---|
2774 | | - |
---|
2775 | | - /* |
---|
2776 | | - * If we can't allocate any more chunks lets make sure we have _lots_ of |
---|
2777 | | - * wiggle room since running delayed refs can create more delayed refs. |
---|
2778 | | - */ |
---|
2779 | | - if (global_rsv->space_info->full) { |
---|
2780 | | - num_dirty_bgs_bytes <<= 1; |
---|
2781 | | - num_bytes <<= 1; |
---|
2782 | | - } |
---|
2783 | | - |
---|
2784 | | - spin_lock(&global_rsv->lock); |
---|
2785 | | - if (global_rsv->reserved <= num_bytes + num_dirty_bgs_bytes) |
---|
2786 | | - ret = 1; |
---|
2787 | | - spin_unlock(&global_rsv->lock); |
---|
2788 | | - return ret; |
---|
2789 | | -} |
---|
2790 | | - |
---|
2791 | | -int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, |
---|
2792 | | - struct btrfs_fs_info *fs_info) |
---|
2793 | | -{ |
---|
2794 | | - u64 num_entries = |
---|
2795 | | - atomic_read(&trans->transaction->delayed_refs.num_entries); |
---|
2796 | | - u64 avg_runtime; |
---|
2797 | | - u64 val; |
---|
2798 | | - |
---|
2799 | | - smp_mb(); |
---|
2800 | | - avg_runtime = fs_info->avg_delayed_ref_runtime; |
---|
2801 | | - val = num_entries * avg_runtime; |
---|
2802 | | - if (val >= NSEC_PER_SEC) |
---|
2803 | | - return 1; |
---|
2804 | | - if (val >= NSEC_PER_SEC / 2) |
---|
2805 | | - return 2; |
---|
2806 | | - |
---|
2807 | | - return btrfs_check_space_for_delayed_refs(trans, fs_info); |
---|
2808 | | -} |
---|
2809 | | - |
---|
2810 | | -struct async_delayed_refs { |
---|
2811 | | - struct btrfs_root *root; |
---|
2812 | | - u64 transid; |
---|
2813 | | - int count; |
---|
2814 | | - int error; |
---|
2815 | | - int sync; |
---|
2816 | | - struct completion wait; |
---|
2817 | | - struct btrfs_work work; |
---|
2818 | | -}; |
---|
2819 | | - |
---|
2820 | | -static inline struct async_delayed_refs * |
---|
2821 | | -to_async_delayed_refs(struct btrfs_work *work) |
---|
2822 | | -{ |
---|
2823 | | - return container_of(work, struct async_delayed_refs, work); |
---|
2824 | | -} |
---|
2825 | | - |
---|
2826 | | -static void delayed_ref_async_start(struct btrfs_work *work) |
---|
2827 | | -{ |
---|
2828 | | - struct async_delayed_refs *async = to_async_delayed_refs(work); |
---|
2829 | | - struct btrfs_trans_handle *trans; |
---|
2830 | | - struct btrfs_fs_info *fs_info = async->root->fs_info; |
---|
2831 | | - int ret; |
---|
2832 | | - |
---|
2833 | | - /* if the commit is already started, we don't need to wait here */ |
---|
2834 | | - if (btrfs_transaction_blocked(fs_info)) |
---|
2835 | | - goto done; |
---|
2836 | | - |
---|
2837 | | - trans = btrfs_join_transaction(async->root); |
---|
2838 | | - if (IS_ERR(trans)) { |
---|
2839 | | - async->error = PTR_ERR(trans); |
---|
2840 | | - goto done; |
---|
2841 | | - } |
---|
2842 | | - |
---|
2843 | | - /* |
---|
2844 | | - * trans->sync means that when we call end_transaction, we won't |
---|
2845 | | - * wait on delayed refs |
---|
2846 | | - */ |
---|
2847 | | - trans->sync = true; |
---|
2848 | | - |
---|
2849 | | - /* Don't bother flushing if we got into a different transaction */ |
---|
2850 | | - if (trans->transid > async->transid) |
---|
2851 | | - goto end; |
---|
2852 | | - |
---|
2853 | | - ret = btrfs_run_delayed_refs(trans, async->count); |
---|
2854 | | - if (ret) |
---|
2855 | | - async->error = ret; |
---|
2856 | | -end: |
---|
2857 | | - ret = btrfs_end_transaction(trans); |
---|
2858 | | - if (ret && !async->error) |
---|
2859 | | - async->error = ret; |
---|
2860 | | -done: |
---|
2861 | | - if (async->sync) |
---|
2862 | | - complete(&async->wait); |
---|
2863 | | - else |
---|
2864 | | - kfree(async); |
---|
2865 | | -} |
---|
2866 | | - |
---|
2867 | | -int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info, |
---|
2868 | | - unsigned long count, u64 transid, int wait) |
---|
2869 | | -{ |
---|
2870 | | - struct async_delayed_refs *async; |
---|
2871 | | - int ret; |
---|
2872 | | - |
---|
2873 | | - async = kmalloc(sizeof(*async), GFP_NOFS); |
---|
2874 | | - if (!async) |
---|
2875 | | - return -ENOMEM; |
---|
2876 | | - |
---|
2877 | | - async->root = fs_info->tree_root; |
---|
2878 | | - async->count = count; |
---|
2879 | | - async->error = 0; |
---|
2880 | | - async->transid = transid; |
---|
2881 | | - if (wait) |
---|
2882 | | - async->sync = 1; |
---|
2883 | | - else |
---|
2884 | | - async->sync = 0; |
---|
2885 | | - init_completion(&async->wait); |
---|
2886 | | - |
---|
2887 | | - btrfs_init_work(&async->work, btrfs_extent_refs_helper, |
---|
2888 | | - delayed_ref_async_start, NULL, NULL); |
---|
2889 | | - |
---|
2890 | | - btrfs_queue_work(fs_info->extent_workers, &async->work); |
---|
2891 | | - |
---|
2892 | | - if (wait) { |
---|
2893 | | - wait_for_completion(&async->wait); |
---|
2894 | | - ret = async->error; |
---|
2895 | | - kfree(async); |
---|
2896 | | - return ret; |
---|
2897 | | - } |
---|
2898 | | - return 0; |
---|
2899 | 2117 | } |
---|
2900 | 2118 | |
---|
2901 | 2119 | /* |
---|
.. | .. |
---|
2919 | 2137 | int run_all = count == (unsigned long)-1; |
---|
2920 | 2138 | |
---|
2921 | 2139 | /* We'll clean this up in btrfs_cleanup_transaction */ |
---|
2922 | | - if (trans->aborted) |
---|
| 2140 | + if (TRANS_ABORTED(trans)) |
---|
2923 | 2141 | return 0; |
---|
2924 | 2142 | |
---|
2925 | 2143 | if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags)) |
---|
.. | .. |
---|
2940 | 2158 | } |
---|
2941 | 2159 | |
---|
2942 | 2160 | if (run_all) { |
---|
2943 | | - if (!list_empty(&trans->new_bgs)) |
---|
2944 | | - btrfs_create_pending_block_groups(trans); |
---|
| 2161 | + btrfs_create_pending_block_groups(trans); |
---|
2945 | 2162 | |
---|
2946 | 2163 | spin_lock(&delayed_refs->lock); |
---|
2947 | | - node = rb_first(&delayed_refs->href_root); |
---|
| 2164 | + node = rb_first_cached(&delayed_refs->href_root); |
---|
2948 | 2165 | if (!node) { |
---|
2949 | 2166 | spin_unlock(&delayed_refs->lock); |
---|
2950 | 2167 | goto out; |
---|
.. | .. |
---|
2967 | 2184 | } |
---|
2968 | 2185 | |
---|
2969 | 2186 | int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, |
---|
2970 | | - struct btrfs_fs_info *fs_info, |
---|
2971 | | - u64 bytenr, u64 num_bytes, u64 flags, |
---|
| 2187 | + struct extent_buffer *eb, u64 flags, |
---|
2972 | 2188 | int level, int is_data) |
---|
2973 | 2189 | { |
---|
2974 | 2190 | struct btrfs_delayed_extent_op *extent_op; |
---|
.. | .. |
---|
2984 | 2200 | extent_op->is_data = is_data ? true : false; |
---|
2985 | 2201 | extent_op->level = level; |
---|
2986 | 2202 | |
---|
2987 | | - ret = btrfs_add_delayed_extent_op(fs_info, trans, bytenr, |
---|
2988 | | - num_bytes, extent_op); |
---|
| 2203 | + ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op); |
---|
2989 | 2204 | if (ret) |
---|
2990 | 2205 | btrfs_free_delayed_extent_op(extent_op); |
---|
2991 | 2206 | return ret; |
---|
.. | .. |
---|
3043 | 2258 | * XXX: We should replace this with a proper search function in the |
---|
3044 | 2259 | * future. |
---|
3045 | 2260 | */ |
---|
3046 | | - for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) { |
---|
| 2261 | + for (node = rb_first_cached(&head->ref_tree); node; |
---|
| 2262 | + node = rb_next(node)) { |
---|
3047 | 2263 | ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); |
---|
3048 | 2264 | /* If it's a shared ref we know a cross reference exists */ |
---|
3049 | 2265 | if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) { |
---|
.. | .. |
---|
3072 | 2288 | |
---|
3073 | 2289 | static noinline int check_committed_ref(struct btrfs_root *root, |
---|
3074 | 2290 | struct btrfs_path *path, |
---|
3075 | | - u64 objectid, u64 offset, u64 bytenr) |
---|
| 2291 | + u64 objectid, u64 offset, u64 bytenr, |
---|
| 2292 | + bool strict) |
---|
3076 | 2293 | { |
---|
3077 | 2294 | struct btrfs_fs_info *fs_info = root->fs_info; |
---|
3078 | 2295 | struct btrfs_root *extent_root = fs_info->extent_root; |
---|
.. | .. |
---|
3109 | 2326 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); |
---|
3110 | 2327 | ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); |
---|
3111 | 2328 | |
---|
| 2329 | + /* If extent item has more than 1 inline ref then it's shared */ |
---|
3112 | 2330 | if (item_size != sizeof(*ei) + |
---|
3113 | 2331 | btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY)) |
---|
3114 | 2332 | goto out; |
---|
3115 | 2333 | |
---|
3116 | | - if (btrfs_extent_generation(leaf, ei) <= |
---|
3117 | | - btrfs_root_last_snapshot(&root->root_item)) |
---|
| 2334 | + /* |
---|
| 2335 | + * If extent created before last snapshot => it's shared unless the |
---|
| 2336 | + * snapshot has been deleted. Use the heuristic if strict is false. |
---|
| 2337 | + */ |
---|
| 2338 | + if (!strict && |
---|
| 2339 | + (btrfs_extent_generation(leaf, ei) <= |
---|
| 2340 | + btrfs_root_last_snapshot(&root->root_item))) |
---|
3118 | 2341 | goto out; |
---|
3119 | 2342 | |
---|
3120 | 2343 | iref = (struct btrfs_extent_inline_ref *)(ei + 1); |
---|
3121 | 2344 | |
---|
| 2345 | + /* If this extent has SHARED_DATA_REF then it's shared */ |
---|
3122 | 2346 | type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA); |
---|
3123 | 2347 | if (type != BTRFS_EXTENT_DATA_REF_KEY) |
---|
3124 | 2348 | goto out; |
---|
.. | .. |
---|
3138 | 2362 | } |
---|
3139 | 2363 | |
---|
3140 | 2364 | int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset, |
---|
3141 | | - u64 bytenr) |
---|
| 2365 | + u64 bytenr, bool strict) |
---|
3142 | 2366 | { |
---|
3143 | 2367 | struct btrfs_path *path; |
---|
3144 | 2368 | int ret; |
---|
3145 | | - int ret2; |
---|
3146 | 2369 | |
---|
3147 | 2370 | path = btrfs_alloc_path(); |
---|
3148 | 2371 | if (!path) |
---|
.. | .. |
---|
3150 | 2373 | |
---|
3151 | 2374 | do { |
---|
3152 | 2375 | ret = check_committed_ref(root, path, objectid, |
---|
3153 | | - offset, bytenr); |
---|
| 2376 | + offset, bytenr, strict); |
---|
3154 | 2377 | if (ret && ret != -ENOENT) |
---|
3155 | 2378 | goto out; |
---|
3156 | 2379 | |
---|
3157 | | - ret2 = check_delayed_ref(root, path, objectid, |
---|
3158 | | - offset, bytenr); |
---|
3159 | | - } while (ret2 == -EAGAIN); |
---|
| 2380 | + ret = check_delayed_ref(root, path, objectid, offset, bytenr); |
---|
| 2381 | + } while (ret == -EAGAIN); |
---|
3160 | 2382 | |
---|
3161 | | - if (ret2 && ret2 != -ENOENT) { |
---|
3162 | | - ret = ret2; |
---|
3163 | | - goto out; |
---|
3164 | | - } |
---|
3165 | | - |
---|
3166 | | - if (ret != -ENOENT || ret2 != -ENOENT) |
---|
3167 | | - ret = 0; |
---|
3168 | 2383 | out: |
---|
3169 | 2384 | btrfs_free_path(path); |
---|
3170 | 2385 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) |
---|
.. | .. |
---|
3185 | 2400 | u32 nritems; |
---|
3186 | 2401 | struct btrfs_key key; |
---|
3187 | 2402 | struct btrfs_file_extent_item *fi; |
---|
| 2403 | + struct btrfs_ref generic_ref = { 0 }; |
---|
| 2404 | + bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC); |
---|
3188 | 2405 | int i; |
---|
| 2406 | + int action; |
---|
3189 | 2407 | int level; |
---|
3190 | 2408 | int ret = 0; |
---|
3191 | | - int (*process_func)(struct btrfs_trans_handle *, |
---|
3192 | | - struct btrfs_root *, |
---|
3193 | | - u64, u64, u64, u64, u64, u64); |
---|
3194 | | - |
---|
3195 | 2409 | |
---|
3196 | 2410 | if (btrfs_is_testing(fs_info)) |
---|
3197 | 2411 | return 0; |
---|
.. | .. |
---|
3200 | 2414 | nritems = btrfs_header_nritems(buf); |
---|
3201 | 2415 | level = btrfs_header_level(buf); |
---|
3202 | 2416 | |
---|
3203 | | - if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0) |
---|
| 2417 | + if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && level == 0) |
---|
3204 | 2418 | return 0; |
---|
3205 | | - |
---|
3206 | | - if (inc) |
---|
3207 | | - process_func = btrfs_inc_extent_ref; |
---|
3208 | | - else |
---|
3209 | | - process_func = btrfs_free_extent; |
---|
3210 | 2419 | |
---|
3211 | 2420 | if (full_backref) |
---|
3212 | 2421 | parent = buf->start; |
---|
3213 | 2422 | else |
---|
3214 | 2423 | parent = 0; |
---|
| 2424 | + if (inc) |
---|
| 2425 | + action = BTRFS_ADD_DELAYED_REF; |
---|
| 2426 | + else |
---|
| 2427 | + action = BTRFS_DROP_DELAYED_REF; |
---|
3215 | 2428 | |
---|
3216 | 2429 | for (i = 0; i < nritems; i++) { |
---|
3217 | 2430 | if (level == 0) { |
---|
.. | .. |
---|
3229 | 2442 | |
---|
3230 | 2443 | num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); |
---|
3231 | 2444 | key.offset -= btrfs_file_extent_offset(buf, fi); |
---|
3232 | | - ret = process_func(trans, root, bytenr, num_bytes, |
---|
3233 | | - parent, ref_root, key.objectid, |
---|
3234 | | - key.offset); |
---|
| 2445 | + btrfs_init_generic_ref(&generic_ref, action, bytenr, |
---|
| 2446 | + num_bytes, parent); |
---|
| 2447 | + generic_ref.real_root = root->root_key.objectid; |
---|
| 2448 | + btrfs_init_data_ref(&generic_ref, ref_root, key.objectid, |
---|
| 2449 | + key.offset); |
---|
| 2450 | + generic_ref.skip_qgroup = for_reloc; |
---|
| 2451 | + if (inc) |
---|
| 2452 | + ret = btrfs_inc_extent_ref(trans, &generic_ref); |
---|
| 2453 | + else |
---|
| 2454 | + ret = btrfs_free_extent(trans, &generic_ref); |
---|
3235 | 2455 | if (ret) |
---|
3236 | 2456 | goto fail; |
---|
3237 | 2457 | } else { |
---|
3238 | 2458 | bytenr = btrfs_node_blockptr(buf, i); |
---|
3239 | 2459 | num_bytes = fs_info->nodesize; |
---|
3240 | | - ret = process_func(trans, root, bytenr, num_bytes, |
---|
3241 | | - parent, ref_root, level - 1, 0); |
---|
| 2460 | + btrfs_init_generic_ref(&generic_ref, action, bytenr, |
---|
| 2461 | + num_bytes, parent); |
---|
| 2462 | + generic_ref.real_root = root->root_key.objectid; |
---|
| 2463 | + btrfs_init_tree_ref(&generic_ref, level - 1, ref_root); |
---|
| 2464 | + generic_ref.skip_qgroup = for_reloc; |
---|
| 2465 | + if (inc) |
---|
| 2466 | + ret = btrfs_inc_extent_ref(trans, &generic_ref); |
---|
| 2467 | + else |
---|
| 2468 | + ret = btrfs_free_extent(trans, &generic_ref); |
---|
3242 | 2469 | if (ret) |
---|
3243 | 2470 | goto fail; |
---|
3244 | 2471 | } |
---|
.. | .. |
---|
3260 | 2487 | return __btrfs_mod_ref(trans, root, buf, full_backref, 0); |
---|
3261 | 2488 | } |
---|
3262 | 2489 | |
---|
3263 | | -static int write_one_cache_group(struct btrfs_trans_handle *trans, |
---|
3264 | | - struct btrfs_fs_info *fs_info, |
---|
3265 | | - struct btrfs_path *path, |
---|
3266 | | - struct btrfs_block_group_cache *cache) |
---|
3267 | | -{ |
---|
3268 | | - int ret; |
---|
3269 | | - struct btrfs_root *extent_root = fs_info->extent_root; |
---|
3270 | | - unsigned long bi; |
---|
3271 | | - struct extent_buffer *leaf; |
---|
3272 | | - |
---|
3273 | | - ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); |
---|
3274 | | - if (ret) { |
---|
3275 | | - if (ret > 0) |
---|
3276 | | - ret = -ENOENT; |
---|
3277 | | - goto fail; |
---|
3278 | | - } |
---|
3279 | | - |
---|
3280 | | - leaf = path->nodes[0]; |
---|
3281 | | - bi = btrfs_item_ptr_offset(leaf, path->slots[0]); |
---|
3282 | | - write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); |
---|
3283 | | - btrfs_mark_buffer_dirty(leaf); |
---|
3284 | | -fail: |
---|
3285 | | - btrfs_release_path(path); |
---|
3286 | | - return ret; |
---|
3287 | | - |
---|
3288 | | -} |
---|
3289 | | - |
---|
3290 | | -static struct btrfs_block_group_cache * |
---|
3291 | | -next_block_group(struct btrfs_fs_info *fs_info, |
---|
3292 | | - struct btrfs_block_group_cache *cache) |
---|
3293 | | -{ |
---|
3294 | | - struct rb_node *node; |
---|
3295 | | - |
---|
3296 | | - spin_lock(&fs_info->block_group_cache_lock); |
---|
3297 | | - |
---|
3298 | | - /* If our block group was removed, we need a full search. */ |
---|
3299 | | - if (RB_EMPTY_NODE(&cache->cache_node)) { |
---|
3300 | | - const u64 next_bytenr = cache->key.objectid + cache->key.offset; |
---|
3301 | | - |
---|
3302 | | - spin_unlock(&fs_info->block_group_cache_lock); |
---|
3303 | | - btrfs_put_block_group(cache); |
---|
3304 | | - cache = btrfs_lookup_first_block_group(fs_info, next_bytenr); return cache; |
---|
3305 | | - } |
---|
3306 | | - node = rb_next(&cache->cache_node); |
---|
3307 | | - btrfs_put_block_group(cache); |
---|
3308 | | - if (node) { |
---|
3309 | | - cache = rb_entry(node, struct btrfs_block_group_cache, |
---|
3310 | | - cache_node); |
---|
3311 | | - btrfs_get_block_group(cache); |
---|
3312 | | - } else |
---|
3313 | | - cache = NULL; |
---|
3314 | | - spin_unlock(&fs_info->block_group_cache_lock); |
---|
3315 | | - return cache; |
---|
3316 | | -} |
---|
3317 | | - |
---|
3318 | | -static int cache_save_setup(struct btrfs_block_group_cache *block_group, |
---|
3319 | | - struct btrfs_trans_handle *trans, |
---|
3320 | | - struct btrfs_path *path) |
---|
3321 | | -{ |
---|
3322 | | - struct btrfs_fs_info *fs_info = block_group->fs_info; |
---|
3323 | | - struct btrfs_root *root = fs_info->tree_root; |
---|
3324 | | - struct inode *inode = NULL; |
---|
3325 | | - struct extent_changeset *data_reserved = NULL; |
---|
3326 | | - u64 alloc_hint = 0; |
---|
3327 | | - int dcs = BTRFS_DC_ERROR; |
---|
3328 | | - u64 num_pages = 0; |
---|
3329 | | - int retries = 0; |
---|
3330 | | - int ret = 0; |
---|
3331 | | - |
---|
3332 | | - /* |
---|
3333 | | - * If this block group is smaller than 100 megs don't bother caching the |
---|
3334 | | - * block group. |
---|
3335 | | - */ |
---|
3336 | | - if (block_group->key.offset < (100 * SZ_1M)) { |
---|
3337 | | - spin_lock(&block_group->lock); |
---|
3338 | | - block_group->disk_cache_state = BTRFS_DC_WRITTEN; |
---|
3339 | | - spin_unlock(&block_group->lock); |
---|
3340 | | - return 0; |
---|
3341 | | - } |
---|
3342 | | - |
---|
3343 | | - if (trans->aborted) |
---|
3344 | | - return 0; |
---|
3345 | | -again: |
---|
3346 | | - inode = lookup_free_space_inode(fs_info, block_group, path); |
---|
3347 | | - if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { |
---|
3348 | | - ret = PTR_ERR(inode); |
---|
3349 | | - btrfs_release_path(path); |
---|
3350 | | - goto out; |
---|
3351 | | - } |
---|
3352 | | - |
---|
3353 | | - if (IS_ERR(inode)) { |
---|
3354 | | - BUG_ON(retries); |
---|
3355 | | - retries++; |
---|
3356 | | - |
---|
3357 | | - if (block_group->ro) |
---|
3358 | | - goto out_free; |
---|
3359 | | - |
---|
3360 | | - ret = create_free_space_inode(fs_info, trans, block_group, |
---|
3361 | | - path); |
---|
3362 | | - if (ret) |
---|
3363 | | - goto out_free; |
---|
3364 | | - goto again; |
---|
3365 | | - } |
---|
3366 | | - |
---|
3367 | | - /* |
---|
3368 | | - * We want to set the generation to 0, that way if anything goes wrong |
---|
3369 | | - * from here on out we know not to trust this cache when we load up next |
---|
3370 | | - * time. |
---|
3371 | | - */ |
---|
3372 | | - BTRFS_I(inode)->generation = 0; |
---|
3373 | | - ret = btrfs_update_inode(trans, root, inode); |
---|
3374 | | - if (ret) { |
---|
3375 | | - /* |
---|
3376 | | - * So theoretically we could recover from this, simply set the |
---|
3377 | | - * super cache generation to 0 so we know to invalidate the |
---|
3378 | | - * cache, but then we'd have to keep track of the block groups |
---|
3379 | | - * that fail this way so we know we _have_ to reset this cache |
---|
3380 | | - * before the next commit or risk reading stale cache. So to |
---|
3381 | | - * limit our exposure to horrible edge cases lets just abort the |
---|
3382 | | - * transaction, this only happens in really bad situations |
---|
3383 | | - * anyway. |
---|
3384 | | - */ |
---|
3385 | | - btrfs_abort_transaction(trans, ret); |
---|
3386 | | - goto out_put; |
---|
3387 | | - } |
---|
3388 | | - WARN_ON(ret); |
---|
3389 | | - |
---|
3390 | | - /* We've already setup this transaction, go ahead and exit */ |
---|
3391 | | - if (block_group->cache_generation == trans->transid && |
---|
3392 | | - i_size_read(inode)) { |
---|
3393 | | - dcs = BTRFS_DC_SETUP; |
---|
3394 | | - goto out_put; |
---|
3395 | | - } |
---|
3396 | | - |
---|
3397 | | - if (i_size_read(inode) > 0) { |
---|
3398 | | - ret = btrfs_check_trunc_cache_free_space(fs_info, |
---|
3399 | | - &fs_info->global_block_rsv); |
---|
3400 | | - if (ret) |
---|
3401 | | - goto out_put; |
---|
3402 | | - |
---|
3403 | | - ret = btrfs_truncate_free_space_cache(trans, NULL, inode); |
---|
3404 | | - if (ret) |
---|
3405 | | - goto out_put; |
---|
3406 | | - } |
---|
3407 | | - |
---|
3408 | | - spin_lock(&block_group->lock); |
---|
3409 | | - if (block_group->cached != BTRFS_CACHE_FINISHED || |
---|
3410 | | - !btrfs_test_opt(fs_info, SPACE_CACHE)) { |
---|
3411 | | - /* |
---|
3412 | | - * don't bother trying to write stuff out _if_ |
---|
3413 | | - * a) we're not cached, |
---|
3414 | | - * b) we're with nospace_cache mount option, |
---|
3415 | | - * c) we're with v2 space_cache (FREE_SPACE_TREE). |
---|
3416 | | - */ |
---|
3417 | | - dcs = BTRFS_DC_WRITTEN; |
---|
3418 | | - spin_unlock(&block_group->lock); |
---|
3419 | | - goto out_put; |
---|
3420 | | - } |
---|
3421 | | - spin_unlock(&block_group->lock); |
---|
3422 | | - |
---|
3423 | | - /* |
---|
3424 | | - * We hit an ENOSPC when setting up the cache in this transaction, just |
---|
3425 | | - * skip doing the setup, we've already cleared the cache so we're safe. |
---|
3426 | | - */ |
---|
3427 | | - if (test_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags)) { |
---|
3428 | | - ret = -ENOSPC; |
---|
3429 | | - goto out_put; |
---|
3430 | | - } |
---|
3431 | | - |
---|
3432 | | - /* |
---|
3433 | | - * Try to preallocate enough space based on how big the block group is. |
---|
3434 | | - * Keep in mind this has to include any pinned space which could end up |
---|
3435 | | - * taking up quite a bit since it's not folded into the other space |
---|
3436 | | - * cache. |
---|
3437 | | - */ |
---|
3438 | | - num_pages = div_u64(block_group->key.offset, SZ_256M); |
---|
3439 | | - if (!num_pages) |
---|
3440 | | - num_pages = 1; |
---|
3441 | | - |
---|
3442 | | - num_pages *= 16; |
---|
3443 | | - num_pages *= PAGE_SIZE; |
---|
3444 | | - |
---|
3445 | | - ret = btrfs_check_data_free_space(inode, &data_reserved, 0, num_pages); |
---|
3446 | | - if (ret) |
---|
3447 | | - goto out_put; |
---|
3448 | | - |
---|
3449 | | - ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, |
---|
3450 | | - num_pages, num_pages, |
---|
3451 | | - &alloc_hint); |
---|
3452 | | - /* |
---|
3453 | | - * Our cache requires contiguous chunks so that we don't modify a bunch |
---|
3454 | | - * of metadata or split extents when writing the cache out, which means |
---|
3455 | | - * we can enospc if we are heavily fragmented in addition to just normal |
---|
3456 | | - * out of space conditions. So if we hit this just skip setting up any |
---|
3457 | | - * other block groups for this transaction, maybe we'll unpin enough |
---|
3458 | | - * space the next time around. |
---|
3459 | | - */ |
---|
3460 | | - if (!ret) |
---|
3461 | | - dcs = BTRFS_DC_SETUP; |
---|
3462 | | - else if (ret == -ENOSPC) |
---|
3463 | | - set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags); |
---|
3464 | | - |
---|
3465 | | -out_put: |
---|
3466 | | - iput(inode); |
---|
3467 | | -out_free: |
---|
3468 | | - btrfs_release_path(path); |
---|
3469 | | -out: |
---|
3470 | | - spin_lock(&block_group->lock); |
---|
3471 | | - if (!ret && dcs == BTRFS_DC_SETUP) |
---|
3472 | | - block_group->cache_generation = trans->transid; |
---|
3473 | | - block_group->disk_cache_state = dcs; |
---|
3474 | | - spin_unlock(&block_group->lock); |
---|
3475 | | - |
---|
3476 | | - extent_changeset_free(data_reserved); |
---|
3477 | | - return ret; |
---|
3478 | | -} |
---|
3479 | | - |
---|
3480 | | -int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, |
---|
3481 | | - struct btrfs_fs_info *fs_info) |
---|
3482 | | -{ |
---|
3483 | | - struct btrfs_block_group_cache *cache, *tmp; |
---|
3484 | | - struct btrfs_transaction *cur_trans = trans->transaction; |
---|
3485 | | - struct btrfs_path *path; |
---|
3486 | | - |
---|
3487 | | - if (list_empty(&cur_trans->dirty_bgs) || |
---|
3488 | | - !btrfs_test_opt(fs_info, SPACE_CACHE)) |
---|
3489 | | - return 0; |
---|
3490 | | - |
---|
3491 | | - path = btrfs_alloc_path(); |
---|
3492 | | - if (!path) |
---|
3493 | | - return -ENOMEM; |
---|
3494 | | - |
---|
3495 | | - /* Could add new block groups, use _safe just in case */ |
---|
3496 | | - list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs, |
---|
3497 | | - dirty_list) { |
---|
3498 | | - if (cache->disk_cache_state == BTRFS_DC_CLEAR) |
---|
3499 | | - cache_save_setup(cache, trans, path); |
---|
3500 | | - } |
---|
3501 | | - |
---|
3502 | | - btrfs_free_path(path); |
---|
3503 | | - return 0; |
---|
3504 | | -} |
---|
3505 | | - |
---|
3506 | | -/* |
---|
3507 | | - * transaction commit does final block group cache writeback during a |
---|
3508 | | - * critical section where nothing is allowed to change the FS. This is |
---|
3509 | | - * required in order for the cache to actually match the block group, |
---|
3510 | | - * but can introduce a lot of latency into the commit. |
---|
3511 | | - * |
---|
3512 | | - * So, btrfs_start_dirty_block_groups is here to kick off block group |
---|
3513 | | - * cache IO. There's a chance we'll have to redo some of it if the |
---|
3514 | | - * block group changes again during the commit, but it greatly reduces |
---|
3515 | | - * the commit latency by getting rid of the easy block groups while |
---|
3516 | | - * we're still allowing others to join the commit. |
---|
3517 | | - */ |
---|
3518 | | -int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) |
---|
3519 | | -{ |
---|
3520 | | - struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
3521 | | - struct btrfs_block_group_cache *cache; |
---|
3522 | | - struct btrfs_transaction *cur_trans = trans->transaction; |
---|
3523 | | - int ret = 0; |
---|
3524 | | - int should_put; |
---|
3525 | | - struct btrfs_path *path = NULL; |
---|
3526 | | - LIST_HEAD(dirty); |
---|
3527 | | - struct list_head *io = &cur_trans->io_bgs; |
---|
3528 | | - int num_started = 0; |
---|
3529 | | - int loops = 0; |
---|
3530 | | - |
---|
3531 | | - spin_lock(&cur_trans->dirty_bgs_lock); |
---|
3532 | | - if (list_empty(&cur_trans->dirty_bgs)) { |
---|
3533 | | - spin_unlock(&cur_trans->dirty_bgs_lock); |
---|
3534 | | - return 0; |
---|
3535 | | - } |
---|
3536 | | - list_splice_init(&cur_trans->dirty_bgs, &dirty); |
---|
3537 | | - spin_unlock(&cur_trans->dirty_bgs_lock); |
---|
3538 | | - |
---|
3539 | | -again: |
---|
3540 | | - /* |
---|
3541 | | - * make sure all the block groups on our dirty list actually |
---|
3542 | | - * exist |
---|
3543 | | - */ |
---|
3544 | | - btrfs_create_pending_block_groups(trans); |
---|
3545 | | - |
---|
3546 | | - if (!path) { |
---|
3547 | | - path = btrfs_alloc_path(); |
---|
3548 | | - if (!path) |
---|
3549 | | - return -ENOMEM; |
---|
3550 | | - } |
---|
3551 | | - |
---|
3552 | | - /* |
---|
3553 | | - * cache_write_mutex is here only to save us from balance or automatic |
---|
3554 | | - * removal of empty block groups deleting this block group while we are |
---|
3555 | | - * writing out the cache |
---|
3556 | | - */ |
---|
3557 | | - mutex_lock(&trans->transaction->cache_write_mutex); |
---|
3558 | | - while (!list_empty(&dirty)) { |
---|
3559 | | - cache = list_first_entry(&dirty, |
---|
3560 | | - struct btrfs_block_group_cache, |
---|
3561 | | - dirty_list); |
---|
3562 | | - /* |
---|
3563 | | - * this can happen if something re-dirties a block |
---|
3564 | | - * group that is already under IO. Just wait for it to |
---|
3565 | | - * finish and then do it all again |
---|
3566 | | - */ |
---|
3567 | | - if (!list_empty(&cache->io_list)) { |
---|
3568 | | - list_del_init(&cache->io_list); |
---|
3569 | | - btrfs_wait_cache_io(trans, cache, path); |
---|
3570 | | - btrfs_put_block_group(cache); |
---|
3571 | | - } |
---|
3572 | | - |
---|
3573 | | - |
---|
3574 | | - /* |
---|
3575 | | - * btrfs_wait_cache_io uses the cache->dirty_list to decide |
---|
3576 | | - * if it should update the cache_state. Don't delete |
---|
3577 | | - * until after we wait. |
---|
3578 | | - * |
---|
3579 | | - * Since we're not running in the commit critical section |
---|
3580 | | - * we need the dirty_bgs_lock to protect from update_block_group |
---|
3581 | | - */ |
---|
3582 | | - spin_lock(&cur_trans->dirty_bgs_lock); |
---|
3583 | | - list_del_init(&cache->dirty_list); |
---|
3584 | | - spin_unlock(&cur_trans->dirty_bgs_lock); |
---|
3585 | | - |
---|
3586 | | - should_put = 1; |
---|
3587 | | - |
---|
3588 | | - cache_save_setup(cache, trans, path); |
---|
3589 | | - |
---|
3590 | | - if (cache->disk_cache_state == BTRFS_DC_SETUP) { |
---|
3591 | | - cache->io_ctl.inode = NULL; |
---|
3592 | | - ret = btrfs_write_out_cache(fs_info, trans, |
---|
3593 | | - cache, path); |
---|
3594 | | - if (ret == 0 && cache->io_ctl.inode) { |
---|
3595 | | - num_started++; |
---|
3596 | | - should_put = 0; |
---|
3597 | | - |
---|
3598 | | - /* |
---|
3599 | | - * The cache_write_mutex is protecting the |
---|
3600 | | - * io_list, also refer to the definition of |
---|
3601 | | - * btrfs_transaction::io_bgs for more details |
---|
3602 | | - */ |
---|
3603 | | - list_add_tail(&cache->io_list, io); |
---|
3604 | | - } else { |
---|
3605 | | - /* |
---|
3606 | | - * if we failed to write the cache, the |
---|
3607 | | - * generation will be bad and life goes on |
---|
3608 | | - */ |
---|
3609 | | - ret = 0; |
---|
3610 | | - } |
---|
3611 | | - } |
---|
3612 | | - if (!ret) { |
---|
3613 | | - ret = write_one_cache_group(trans, fs_info, |
---|
3614 | | - path, cache); |
---|
3615 | | - /* |
---|
3616 | | - * Our block group might still be attached to the list |
---|
3617 | | - * of new block groups in the transaction handle of some |
---|
3618 | | - * other task (struct btrfs_trans_handle->new_bgs). This |
---|
3619 | | - * means its block group item isn't yet in the extent |
---|
3620 | | - * tree. If this happens ignore the error, as we will |
---|
3621 | | - * try again later in the critical section of the |
---|
3622 | | - * transaction commit. |
---|
3623 | | - */ |
---|
3624 | | - if (ret == -ENOENT) { |
---|
3625 | | - ret = 0; |
---|
3626 | | - spin_lock(&cur_trans->dirty_bgs_lock); |
---|
3627 | | - if (list_empty(&cache->dirty_list)) { |
---|
3628 | | - list_add_tail(&cache->dirty_list, |
---|
3629 | | - &cur_trans->dirty_bgs); |
---|
3630 | | - btrfs_get_block_group(cache); |
---|
3631 | | - } |
---|
3632 | | - spin_unlock(&cur_trans->dirty_bgs_lock); |
---|
3633 | | - } else if (ret) { |
---|
3634 | | - btrfs_abort_transaction(trans, ret); |
---|
3635 | | - } |
---|
3636 | | - } |
---|
3637 | | - |
---|
3638 | | - /* if its not on the io list, we need to put the block group */ |
---|
3639 | | - if (should_put) |
---|
3640 | | - btrfs_put_block_group(cache); |
---|
3641 | | - |
---|
3642 | | - if (ret) |
---|
3643 | | - break; |
---|
3644 | | - |
---|
3645 | | - /* |
---|
3646 | | - * Avoid blocking other tasks for too long. It might even save |
---|
3647 | | - * us from writing caches for block groups that are going to be |
---|
3648 | | - * removed. |
---|
3649 | | - */ |
---|
3650 | | - mutex_unlock(&trans->transaction->cache_write_mutex); |
---|
3651 | | - mutex_lock(&trans->transaction->cache_write_mutex); |
---|
3652 | | - } |
---|
3653 | | - mutex_unlock(&trans->transaction->cache_write_mutex); |
---|
3654 | | - |
---|
3655 | | - /* |
---|
3656 | | - * go through delayed refs for all the stuff we've just kicked off |
---|
3657 | | - * and then loop back (just once) |
---|
3658 | | - */ |
---|
3659 | | - ret = btrfs_run_delayed_refs(trans, 0); |
---|
3660 | | - if (!ret && loops == 0) { |
---|
3661 | | - loops++; |
---|
3662 | | - spin_lock(&cur_trans->dirty_bgs_lock); |
---|
3663 | | - list_splice_init(&cur_trans->dirty_bgs, &dirty); |
---|
3664 | | - /* |
---|
3665 | | - * dirty_bgs_lock protects us from concurrent block group |
---|
3666 | | - * deletes too (not just cache_write_mutex). |
---|
3667 | | - */ |
---|
3668 | | - if (!list_empty(&dirty)) { |
---|
3669 | | - spin_unlock(&cur_trans->dirty_bgs_lock); |
---|
3670 | | - goto again; |
---|
3671 | | - } |
---|
3672 | | - spin_unlock(&cur_trans->dirty_bgs_lock); |
---|
3673 | | - } else if (ret < 0) { |
---|
3674 | | - btrfs_cleanup_dirty_bgs(cur_trans, fs_info); |
---|
3675 | | - } |
---|
3676 | | - |
---|
3677 | | - btrfs_free_path(path); |
---|
3678 | | - return ret; |
---|
3679 | | -} |
---|
3680 | | - |
---|
3681 | | -int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, |
---|
3682 | | - struct btrfs_fs_info *fs_info) |
---|
3683 | | -{ |
---|
3684 | | - struct btrfs_block_group_cache *cache; |
---|
3685 | | - struct btrfs_transaction *cur_trans = trans->transaction; |
---|
3686 | | - int ret = 0; |
---|
3687 | | - int should_put; |
---|
3688 | | - struct btrfs_path *path; |
---|
3689 | | - struct list_head *io = &cur_trans->io_bgs; |
---|
3690 | | - int num_started = 0; |
---|
3691 | | - |
---|
3692 | | - path = btrfs_alloc_path(); |
---|
3693 | | - if (!path) |
---|
3694 | | - return -ENOMEM; |
---|
3695 | | - |
---|
3696 | | - /* |
---|
3697 | | - * Even though we are in the critical section of the transaction commit, |
---|
3698 | | - * we can still have concurrent tasks adding elements to this |
---|
3699 | | - * transaction's list of dirty block groups. These tasks correspond to |
---|
3700 | | - * endio free space workers started when writeback finishes for a |
---|
3701 | | - * space cache, which run inode.c:btrfs_finish_ordered_io(), and can |
---|
3702 | | - * allocate new block groups as a result of COWing nodes of the root |
---|
3703 | | - * tree when updating the free space inode. The writeback for the space |
---|
3704 | | - * caches is triggered by an earlier call to |
---|
3705 | | - * btrfs_start_dirty_block_groups() and iterations of the following |
---|
3706 | | - * loop. |
---|
3707 | | - * Also we want to do the cache_save_setup first and then run the |
---|
3708 | | - * delayed refs to make sure we have the best chance at doing this all |
---|
3709 | | - * in one shot. |
---|
3710 | | - */ |
---|
3711 | | - spin_lock(&cur_trans->dirty_bgs_lock); |
---|
3712 | | - while (!list_empty(&cur_trans->dirty_bgs)) { |
---|
3713 | | - cache = list_first_entry(&cur_trans->dirty_bgs, |
---|
3714 | | - struct btrfs_block_group_cache, |
---|
3715 | | - dirty_list); |
---|
3716 | | - |
---|
3717 | | - /* |
---|
3718 | | - * this can happen if cache_save_setup re-dirties a block |
---|
3719 | | - * group that is already under IO. Just wait for it to |
---|
3720 | | - * finish and then do it all again |
---|
3721 | | - */ |
---|
3722 | | - if (!list_empty(&cache->io_list)) { |
---|
3723 | | - spin_unlock(&cur_trans->dirty_bgs_lock); |
---|
3724 | | - list_del_init(&cache->io_list); |
---|
3725 | | - btrfs_wait_cache_io(trans, cache, path); |
---|
3726 | | - btrfs_put_block_group(cache); |
---|
3727 | | - spin_lock(&cur_trans->dirty_bgs_lock); |
---|
3728 | | - } |
---|
3729 | | - |
---|
3730 | | - /* |
---|
3731 | | - * don't remove from the dirty list until after we've waited |
---|
3732 | | - * on any pending IO |
---|
3733 | | - */ |
---|
3734 | | - list_del_init(&cache->dirty_list); |
---|
3735 | | - spin_unlock(&cur_trans->dirty_bgs_lock); |
---|
3736 | | - should_put = 1; |
---|
3737 | | - |
---|
3738 | | - cache_save_setup(cache, trans, path); |
---|
3739 | | - |
---|
3740 | | - if (!ret) |
---|
3741 | | - ret = btrfs_run_delayed_refs(trans, |
---|
3742 | | - (unsigned long) -1); |
---|
3743 | | - |
---|
3744 | | - if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) { |
---|
3745 | | - cache->io_ctl.inode = NULL; |
---|
3746 | | - ret = btrfs_write_out_cache(fs_info, trans, |
---|
3747 | | - cache, path); |
---|
3748 | | - if (ret == 0 && cache->io_ctl.inode) { |
---|
3749 | | - num_started++; |
---|
3750 | | - should_put = 0; |
---|
3751 | | - list_add_tail(&cache->io_list, io); |
---|
3752 | | - } else { |
---|
3753 | | - /* |
---|
3754 | | - * if we failed to write the cache, the |
---|
3755 | | - * generation will be bad and life goes on |
---|
3756 | | - */ |
---|
3757 | | - ret = 0; |
---|
3758 | | - } |
---|
3759 | | - } |
---|
3760 | | - if (!ret) { |
---|
3761 | | - ret = write_one_cache_group(trans, fs_info, |
---|
3762 | | - path, cache); |
---|
3763 | | - /* |
---|
3764 | | - * One of the free space endio workers might have |
---|
3765 | | - * created a new block group while updating a free space |
---|
3766 | | - * cache's inode (at inode.c:btrfs_finish_ordered_io()) |
---|
3767 | | - * and hasn't released its transaction handle yet, in |
---|
3768 | | - * which case the new block group is still attached to |
---|
3769 | | - * its transaction handle and its creation has not |
---|
3770 | | - * finished yet (no block group item in the extent tree |
---|
3771 | | - * yet, etc). If this is the case, wait for all free |
---|
3772 | | - * space endio workers to finish and retry. This is a |
---|
3773 | | - * a very rare case so no need for a more efficient and |
---|
3774 | | - * complex approach. |
---|
3775 | | - */ |
---|
3776 | | - if (ret == -ENOENT) { |
---|
3777 | | - wait_event(cur_trans->writer_wait, |
---|
3778 | | - atomic_read(&cur_trans->num_writers) == 1); |
---|
3779 | | - ret = write_one_cache_group(trans, fs_info, |
---|
3780 | | - path, cache); |
---|
3781 | | - } |
---|
3782 | | - if (ret) |
---|
3783 | | - btrfs_abort_transaction(trans, ret); |
---|
3784 | | - } |
---|
3785 | | - |
---|
3786 | | - /* if its not on the io list, we need to put the block group */ |
---|
3787 | | - if (should_put) |
---|
3788 | | - btrfs_put_block_group(cache); |
---|
3789 | | - spin_lock(&cur_trans->dirty_bgs_lock); |
---|
3790 | | - } |
---|
3791 | | - spin_unlock(&cur_trans->dirty_bgs_lock); |
---|
3792 | | - |
---|
3793 | | - /* |
---|
3794 | | - * Refer to the definition of io_bgs member for details why it's safe |
---|
3795 | | - * to use it without any locking |
---|
3796 | | - */ |
---|
3797 | | - while (!list_empty(io)) { |
---|
3798 | | - cache = list_first_entry(io, struct btrfs_block_group_cache, |
---|
3799 | | - io_list); |
---|
3800 | | - list_del_init(&cache->io_list); |
---|
3801 | | - btrfs_wait_cache_io(trans, cache, path); |
---|
3802 | | - btrfs_put_block_group(cache); |
---|
3803 | | - } |
---|
3804 | | - |
---|
3805 | | - btrfs_free_path(path); |
---|
3806 | | - return ret; |
---|
3807 | | -} |
---|
3808 | | - |
---|
3809 | 2490 | int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr) |
---|
3810 | 2491 | { |
---|
3811 | | - struct btrfs_block_group_cache *block_group; |
---|
| 2492 | + struct btrfs_block_group *block_group; |
---|
3812 | 2493 | int readonly = 0; |
---|
3813 | 2494 | |
---|
3814 | 2495 | block_group = btrfs_lookup_block_group(fs_info, bytenr); |
---|
.. | .. |
---|
3817 | 2498 | if (block_group) |
---|
3818 | 2499 | btrfs_put_block_group(block_group); |
---|
3819 | 2500 | return readonly; |
---|
3820 | | -} |
---|
3821 | | - |
---|
3822 | | -bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr) |
---|
3823 | | -{ |
---|
3824 | | - struct btrfs_block_group_cache *bg; |
---|
3825 | | - bool ret = true; |
---|
3826 | | - |
---|
3827 | | - bg = btrfs_lookup_block_group(fs_info, bytenr); |
---|
3828 | | - if (!bg) |
---|
3829 | | - return false; |
---|
3830 | | - |
---|
3831 | | - spin_lock(&bg->lock); |
---|
3832 | | - if (bg->ro) |
---|
3833 | | - ret = false; |
---|
3834 | | - else |
---|
3835 | | - atomic_inc(&bg->nocow_writers); |
---|
3836 | | - spin_unlock(&bg->lock); |
---|
3837 | | - |
---|
3838 | | - /* no put on block group, done by btrfs_dec_nocow_writers */ |
---|
3839 | | - if (!ret) |
---|
3840 | | - btrfs_put_block_group(bg); |
---|
3841 | | - |
---|
3842 | | - return ret; |
---|
3843 | | - |
---|
3844 | | -} |
---|
3845 | | - |
---|
3846 | | -void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr) |
---|
3847 | | -{ |
---|
3848 | | - struct btrfs_block_group_cache *bg; |
---|
3849 | | - |
---|
3850 | | - bg = btrfs_lookup_block_group(fs_info, bytenr); |
---|
3851 | | - ASSERT(bg); |
---|
3852 | | - if (atomic_dec_and_test(&bg->nocow_writers)) |
---|
3853 | | - wake_up_var(&bg->nocow_writers); |
---|
3854 | | - /* |
---|
3855 | | - * Once for our lookup and once for the lookup done by a previous call |
---|
3856 | | - * to btrfs_inc_nocow_writers() |
---|
3857 | | - */ |
---|
3858 | | - btrfs_put_block_group(bg); |
---|
3859 | | - btrfs_put_block_group(bg); |
---|
3860 | | -} |
---|
3861 | | - |
---|
3862 | | -void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg) |
---|
3863 | | -{ |
---|
3864 | | - wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers)); |
---|
3865 | | -} |
---|
3866 | | - |
---|
3867 | | -static const char *alloc_name(u64 flags) |
---|
3868 | | -{ |
---|
3869 | | - switch (flags) { |
---|
3870 | | - case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA: |
---|
3871 | | - return "mixed"; |
---|
3872 | | - case BTRFS_BLOCK_GROUP_METADATA: |
---|
3873 | | - return "metadata"; |
---|
3874 | | - case BTRFS_BLOCK_GROUP_DATA: |
---|
3875 | | - return "data"; |
---|
3876 | | - case BTRFS_BLOCK_GROUP_SYSTEM: |
---|
3877 | | - return "system"; |
---|
3878 | | - default: |
---|
3879 | | - WARN_ON(1); |
---|
3880 | | - return "invalid-combination"; |
---|
3881 | | - }; |
---|
3882 | | -} |
---|
3883 | | - |
---|
3884 | | -static int create_space_info(struct btrfs_fs_info *info, u64 flags) |
---|
3885 | | -{ |
---|
3886 | | - |
---|
3887 | | - struct btrfs_space_info *space_info; |
---|
3888 | | - int i; |
---|
3889 | | - int ret; |
---|
3890 | | - |
---|
3891 | | - space_info = kzalloc(sizeof(*space_info), GFP_NOFS); |
---|
3892 | | - if (!space_info) |
---|
3893 | | - return -ENOMEM; |
---|
3894 | | - |
---|
3895 | | - ret = percpu_counter_init(&space_info->total_bytes_pinned, 0, |
---|
3896 | | - GFP_KERNEL); |
---|
3897 | | - if (ret) { |
---|
3898 | | - kfree(space_info); |
---|
3899 | | - return ret; |
---|
3900 | | - } |
---|
3901 | | - |
---|
3902 | | - for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) |
---|
3903 | | - INIT_LIST_HEAD(&space_info->block_groups[i]); |
---|
3904 | | - init_rwsem(&space_info->groups_sem); |
---|
3905 | | - spin_lock_init(&space_info->lock); |
---|
3906 | | - space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; |
---|
3907 | | - space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; |
---|
3908 | | - init_waitqueue_head(&space_info->wait); |
---|
3909 | | - INIT_LIST_HEAD(&space_info->ro_bgs); |
---|
3910 | | - INIT_LIST_HEAD(&space_info->tickets); |
---|
3911 | | - INIT_LIST_HEAD(&space_info->priority_tickets); |
---|
3912 | | - |
---|
3913 | | - ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype, |
---|
3914 | | - info->space_info_kobj, "%s", |
---|
3915 | | - alloc_name(space_info->flags)); |
---|
3916 | | - if (ret) { |
---|
3917 | | - kobject_put(&space_info->kobj); |
---|
3918 | | - return ret; |
---|
3919 | | - } |
---|
3920 | | - |
---|
3921 | | - list_add_rcu(&space_info->list, &info->space_info); |
---|
3922 | | - if (flags & BTRFS_BLOCK_GROUP_DATA) |
---|
3923 | | - info->data_sinfo = space_info; |
---|
3924 | | - |
---|
3925 | | - return ret; |
---|
3926 | | -} |
---|
3927 | | - |
---|
3928 | | -static void update_space_info(struct btrfs_fs_info *info, u64 flags, |
---|
3929 | | - u64 total_bytes, u64 bytes_used, |
---|
3930 | | - u64 bytes_readonly, |
---|
3931 | | - struct btrfs_space_info **space_info) |
---|
3932 | | -{ |
---|
3933 | | - struct btrfs_space_info *found; |
---|
3934 | | - int factor; |
---|
3935 | | - |
---|
3936 | | - factor = btrfs_bg_type_to_factor(flags); |
---|
3937 | | - |
---|
3938 | | - found = __find_space_info(info, flags); |
---|
3939 | | - ASSERT(found); |
---|
3940 | | - spin_lock(&found->lock); |
---|
3941 | | - found->total_bytes += total_bytes; |
---|
3942 | | - found->disk_total += total_bytes * factor; |
---|
3943 | | - found->bytes_used += bytes_used; |
---|
3944 | | - found->disk_used += bytes_used * factor; |
---|
3945 | | - found->bytes_readonly += bytes_readonly; |
---|
3946 | | - if (total_bytes > 0) |
---|
3947 | | - found->full = 0; |
---|
3948 | | - space_info_add_new_bytes(info, found, total_bytes - |
---|
3949 | | - bytes_used - bytes_readonly); |
---|
3950 | | - spin_unlock(&found->lock); |
---|
3951 | | - *space_info = found; |
---|
3952 | | -} |
---|
3953 | | - |
---|
3954 | | -static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) |
---|
3955 | | -{ |
---|
3956 | | - u64 extra_flags = chunk_to_extended(flags) & |
---|
3957 | | - BTRFS_EXTENDED_PROFILE_MASK; |
---|
3958 | | - |
---|
3959 | | - write_seqlock(&fs_info->profiles_lock); |
---|
3960 | | - if (flags & BTRFS_BLOCK_GROUP_DATA) |
---|
3961 | | - fs_info->avail_data_alloc_bits |= extra_flags; |
---|
3962 | | - if (flags & BTRFS_BLOCK_GROUP_METADATA) |
---|
3963 | | - fs_info->avail_metadata_alloc_bits |= extra_flags; |
---|
3964 | | - if (flags & BTRFS_BLOCK_GROUP_SYSTEM) |
---|
3965 | | - fs_info->avail_system_alloc_bits |= extra_flags; |
---|
3966 | | - write_sequnlock(&fs_info->profiles_lock); |
---|
3967 | | -} |
---|
3968 | | - |
---|
3969 | | -/* |
---|
3970 | | - * returns target flags in extended format or 0 if restripe for this |
---|
3971 | | - * chunk_type is not in progress |
---|
3972 | | - * |
---|
3973 | | - * should be called with balance_lock held |
---|
3974 | | - */ |
---|
3975 | | -static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags) |
---|
3976 | | -{ |
---|
3977 | | - struct btrfs_balance_control *bctl = fs_info->balance_ctl; |
---|
3978 | | - u64 target = 0; |
---|
3979 | | - |
---|
3980 | | - if (!bctl) |
---|
3981 | | - return 0; |
---|
3982 | | - |
---|
3983 | | - if (flags & BTRFS_BLOCK_GROUP_DATA && |
---|
3984 | | - bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) { |
---|
3985 | | - target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target; |
---|
3986 | | - } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM && |
---|
3987 | | - bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { |
---|
3988 | | - target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target; |
---|
3989 | | - } else if (flags & BTRFS_BLOCK_GROUP_METADATA && |
---|
3990 | | - bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) { |
---|
3991 | | - target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target; |
---|
3992 | | - } |
---|
3993 | | - |
---|
3994 | | - return target; |
---|
3995 | | -} |
---|
3996 | | - |
---|
3997 | | -/* |
---|
3998 | | - * @flags: available profiles in extended format (see ctree.h) |
---|
3999 | | - * |
---|
4000 | | - * Returns reduced profile in chunk format. If profile changing is in |
---|
4001 | | - * progress (either running or paused) picks the target profile (if it's |
---|
4002 | | - * already available), otherwise falls back to plain reducing. |
---|
4003 | | - */ |
---|
4004 | | -static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags) |
---|
4005 | | -{ |
---|
4006 | | - u64 num_devices = fs_info->fs_devices->rw_devices; |
---|
4007 | | - u64 target; |
---|
4008 | | - u64 raid_type; |
---|
4009 | | - u64 allowed = 0; |
---|
4010 | | - |
---|
4011 | | - /* |
---|
4012 | | - * see if restripe for this chunk_type is in progress, if so |
---|
4013 | | - * try to reduce to the target profile |
---|
4014 | | - */ |
---|
4015 | | - spin_lock(&fs_info->balance_lock); |
---|
4016 | | - target = get_restripe_target(fs_info, flags); |
---|
4017 | | - if (target) { |
---|
4018 | | - /* pick target profile only if it's already available */ |
---|
4019 | | - if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) { |
---|
4020 | | - spin_unlock(&fs_info->balance_lock); |
---|
4021 | | - return extended_to_chunk(target); |
---|
4022 | | - } |
---|
4023 | | - } |
---|
4024 | | - spin_unlock(&fs_info->balance_lock); |
---|
4025 | | - |
---|
4026 | | - /* First, mask out the RAID levels which aren't possible */ |
---|
4027 | | - for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) { |
---|
4028 | | - if (num_devices >= btrfs_raid_array[raid_type].devs_min) |
---|
4029 | | - allowed |= btrfs_raid_array[raid_type].bg_flag; |
---|
4030 | | - } |
---|
4031 | | - allowed &= flags; |
---|
4032 | | - |
---|
4033 | | - if (allowed & BTRFS_BLOCK_GROUP_RAID6) |
---|
4034 | | - allowed = BTRFS_BLOCK_GROUP_RAID6; |
---|
4035 | | - else if (allowed & BTRFS_BLOCK_GROUP_RAID5) |
---|
4036 | | - allowed = BTRFS_BLOCK_GROUP_RAID5; |
---|
4037 | | - else if (allowed & BTRFS_BLOCK_GROUP_RAID10) |
---|
4038 | | - allowed = BTRFS_BLOCK_GROUP_RAID10; |
---|
4039 | | - else if (allowed & BTRFS_BLOCK_GROUP_RAID1) |
---|
4040 | | - allowed = BTRFS_BLOCK_GROUP_RAID1; |
---|
4041 | | - else if (allowed & BTRFS_BLOCK_GROUP_RAID0) |
---|
4042 | | - allowed = BTRFS_BLOCK_GROUP_RAID0; |
---|
4043 | | - |
---|
4044 | | - flags &= ~BTRFS_BLOCK_GROUP_PROFILE_MASK; |
---|
4045 | | - |
---|
4046 | | - return extended_to_chunk(flags | allowed); |
---|
4047 | | -} |
---|
4048 | | - |
---|
4049 | | -static u64 get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags) |
---|
4050 | | -{ |
---|
4051 | | - unsigned seq; |
---|
4052 | | - u64 flags; |
---|
4053 | | - |
---|
4054 | | - do { |
---|
4055 | | - flags = orig_flags; |
---|
4056 | | - seq = read_seqbegin(&fs_info->profiles_lock); |
---|
4057 | | - |
---|
4058 | | - if (flags & BTRFS_BLOCK_GROUP_DATA) |
---|
4059 | | - flags |= fs_info->avail_data_alloc_bits; |
---|
4060 | | - else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) |
---|
4061 | | - flags |= fs_info->avail_system_alloc_bits; |
---|
4062 | | - else if (flags & BTRFS_BLOCK_GROUP_METADATA) |
---|
4063 | | - flags |= fs_info->avail_metadata_alloc_bits; |
---|
4064 | | - } while (read_seqretry(&fs_info->profiles_lock, seq)); |
---|
4065 | | - |
---|
4066 | | - return btrfs_reduce_alloc_profile(fs_info, flags); |
---|
4067 | 2501 | } |
---|
4068 | 2502 | |
---|
4069 | 2503 | static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data) |
---|
.. | .. |
---|
4079 | 2513 | else |
---|
4080 | 2514 | flags = BTRFS_BLOCK_GROUP_METADATA; |
---|
4081 | 2515 | |
---|
4082 | | - ret = get_alloc_profile(fs_info, flags); |
---|
| 2516 | + ret = btrfs_get_alloc_profile(fs_info, flags); |
---|
4083 | 2517 | return ret; |
---|
4084 | | -} |
---|
4085 | | - |
---|
4086 | | -u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info) |
---|
4087 | | -{ |
---|
4088 | | - return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_DATA); |
---|
4089 | | -} |
---|
4090 | | - |
---|
4091 | | -u64 btrfs_metadata_alloc_profile(struct btrfs_fs_info *fs_info) |
---|
4092 | | -{ |
---|
4093 | | - return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_METADATA); |
---|
4094 | | -} |
---|
4095 | | - |
---|
4096 | | -u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info) |
---|
4097 | | -{ |
---|
4098 | | - return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); |
---|
4099 | | -} |
---|
4100 | | - |
---|
4101 | | -static u64 btrfs_space_info_used(struct btrfs_space_info *s_info, |
---|
4102 | | - bool may_use_included) |
---|
4103 | | -{ |
---|
4104 | | - ASSERT(s_info); |
---|
4105 | | - return s_info->bytes_used + s_info->bytes_reserved + |
---|
4106 | | - s_info->bytes_pinned + s_info->bytes_readonly + |
---|
4107 | | - (may_use_included ? s_info->bytes_may_use : 0); |
---|
4108 | | -} |
---|
4109 | | - |
---|
4110 | | -int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes) |
---|
4111 | | -{ |
---|
4112 | | - struct btrfs_root *root = inode->root; |
---|
4113 | | - struct btrfs_fs_info *fs_info = root->fs_info; |
---|
4114 | | - struct btrfs_space_info *data_sinfo = fs_info->data_sinfo; |
---|
4115 | | - u64 used; |
---|
4116 | | - int ret = 0; |
---|
4117 | | - int need_commit = 2; |
---|
4118 | | - int have_pinned_space; |
---|
4119 | | - |
---|
4120 | | - /* make sure bytes are sectorsize aligned */ |
---|
4121 | | - bytes = ALIGN(bytes, fs_info->sectorsize); |
---|
4122 | | - |
---|
4123 | | - if (btrfs_is_free_space_inode(inode)) { |
---|
4124 | | - need_commit = 0; |
---|
4125 | | - ASSERT(current->journal_info); |
---|
4126 | | - } |
---|
4127 | | - |
---|
4128 | | -again: |
---|
4129 | | - /* make sure we have enough space to handle the data first */ |
---|
4130 | | - spin_lock(&data_sinfo->lock); |
---|
4131 | | - used = btrfs_space_info_used(data_sinfo, true); |
---|
4132 | | - |
---|
4133 | | - if (used + bytes > data_sinfo->total_bytes) { |
---|
4134 | | - struct btrfs_trans_handle *trans; |
---|
4135 | | - |
---|
4136 | | - /* |
---|
4137 | | - * if we don't have enough free bytes in this space then we need |
---|
4138 | | - * to alloc a new chunk. |
---|
4139 | | - */ |
---|
4140 | | - if (!data_sinfo->full) { |
---|
4141 | | - u64 alloc_target; |
---|
4142 | | - |
---|
4143 | | - data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; |
---|
4144 | | - spin_unlock(&data_sinfo->lock); |
---|
4145 | | - |
---|
4146 | | - alloc_target = btrfs_data_alloc_profile(fs_info); |
---|
4147 | | - /* |
---|
4148 | | - * It is ugly that we don't call nolock join |
---|
4149 | | - * transaction for the free space inode case here. |
---|
4150 | | - * But it is safe because we only do the data space |
---|
4151 | | - * reservation for the free space cache in the |
---|
4152 | | - * transaction context, the common join transaction |
---|
4153 | | - * just increase the counter of the current transaction |
---|
4154 | | - * handler, doesn't try to acquire the trans_lock of |
---|
4155 | | - * the fs. |
---|
4156 | | - */ |
---|
4157 | | - trans = btrfs_join_transaction(root); |
---|
4158 | | - if (IS_ERR(trans)) |
---|
4159 | | - return PTR_ERR(trans); |
---|
4160 | | - |
---|
4161 | | - ret = do_chunk_alloc(trans, alloc_target, |
---|
4162 | | - CHUNK_ALLOC_NO_FORCE); |
---|
4163 | | - btrfs_end_transaction(trans); |
---|
4164 | | - if (ret < 0) { |
---|
4165 | | - if (ret != -ENOSPC) |
---|
4166 | | - return ret; |
---|
4167 | | - else { |
---|
4168 | | - have_pinned_space = 1; |
---|
4169 | | - goto commit_trans; |
---|
4170 | | - } |
---|
4171 | | - } |
---|
4172 | | - |
---|
4173 | | - goto again; |
---|
4174 | | - } |
---|
4175 | | - |
---|
4176 | | - /* |
---|
4177 | | - * If we don't have enough pinned space to deal with this |
---|
4178 | | - * allocation, and no removed chunk in current transaction, |
---|
4179 | | - * don't bother committing the transaction. |
---|
4180 | | - */ |
---|
4181 | | - have_pinned_space = __percpu_counter_compare( |
---|
4182 | | - &data_sinfo->total_bytes_pinned, |
---|
4183 | | - used + bytes - data_sinfo->total_bytes, |
---|
4184 | | - BTRFS_TOTAL_BYTES_PINNED_BATCH); |
---|
4185 | | - spin_unlock(&data_sinfo->lock); |
---|
4186 | | - |
---|
4187 | | - /* commit the current transaction and try again */ |
---|
4188 | | -commit_trans: |
---|
4189 | | - if (need_commit) { |
---|
4190 | | - need_commit--; |
---|
4191 | | - |
---|
4192 | | - if (need_commit > 0) { |
---|
4193 | | - btrfs_start_delalloc_roots(fs_info, -1); |
---|
4194 | | - btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, |
---|
4195 | | - (u64)-1); |
---|
4196 | | - } |
---|
4197 | | - |
---|
4198 | | - trans = btrfs_join_transaction(root); |
---|
4199 | | - if (IS_ERR(trans)) |
---|
4200 | | - return PTR_ERR(trans); |
---|
4201 | | - if (have_pinned_space >= 0 || |
---|
4202 | | - test_bit(BTRFS_TRANS_HAVE_FREE_BGS, |
---|
4203 | | - &trans->transaction->flags) || |
---|
4204 | | - need_commit > 0) { |
---|
4205 | | - ret = btrfs_commit_transaction(trans); |
---|
4206 | | - if (ret) |
---|
4207 | | - return ret; |
---|
4208 | | - /* |
---|
4209 | | - * The cleaner kthread might still be doing iput |
---|
4210 | | - * operations. Wait for it to finish so that |
---|
4211 | | - * more space is released. |
---|
4212 | | - */ |
---|
4213 | | - mutex_lock(&fs_info->cleaner_delayed_iput_mutex); |
---|
4214 | | - mutex_unlock(&fs_info->cleaner_delayed_iput_mutex); |
---|
4215 | | - goto again; |
---|
4216 | | - } else { |
---|
4217 | | - btrfs_end_transaction(trans); |
---|
4218 | | - } |
---|
4219 | | - } |
---|
4220 | | - |
---|
4221 | | - trace_btrfs_space_reservation(fs_info, |
---|
4222 | | - "space_info:enospc", |
---|
4223 | | - data_sinfo->flags, bytes, 1); |
---|
4224 | | - return -ENOSPC; |
---|
4225 | | - } |
---|
4226 | | - data_sinfo->bytes_may_use += bytes; |
---|
4227 | | - trace_btrfs_space_reservation(fs_info, "space_info", |
---|
4228 | | - data_sinfo->flags, bytes, 1); |
---|
4229 | | - spin_unlock(&data_sinfo->lock); |
---|
4230 | | - |
---|
4231 | | - return 0; |
---|
4232 | | -} |
---|
4233 | | - |
---|
4234 | | -int btrfs_check_data_free_space(struct inode *inode, |
---|
4235 | | - struct extent_changeset **reserved, u64 start, u64 len) |
---|
4236 | | -{ |
---|
4237 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
4238 | | - int ret; |
---|
4239 | | - |
---|
4240 | | - /* align the range */ |
---|
4241 | | - len = round_up(start + len, fs_info->sectorsize) - |
---|
4242 | | - round_down(start, fs_info->sectorsize); |
---|
4243 | | - start = round_down(start, fs_info->sectorsize); |
---|
4244 | | - |
---|
4245 | | - ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len); |
---|
4246 | | - if (ret < 0) |
---|
4247 | | - return ret; |
---|
4248 | | - |
---|
4249 | | - /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */ |
---|
4250 | | - ret = btrfs_qgroup_reserve_data(inode, reserved, start, len); |
---|
4251 | | - if (ret < 0) |
---|
4252 | | - btrfs_free_reserved_data_space_noquota(inode, start, len); |
---|
4253 | | - else |
---|
4254 | | - ret = 0; |
---|
4255 | | - return ret; |
---|
4256 | | -} |
---|
4257 | | - |
---|
4258 | | -/* |
---|
4259 | | - * Called if we need to clear a data reservation for this inode |
---|
4260 | | - * Normally in a error case. |
---|
4261 | | - * |
---|
4262 | | - * This one will *NOT* use accurate qgroup reserved space API, just for case |
---|
4263 | | - * which we can't sleep and is sure it won't affect qgroup reserved space. |
---|
4264 | | - * Like clear_bit_hook(). |
---|
4265 | | - */ |
---|
4266 | | -void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start, |
---|
4267 | | - u64 len) |
---|
4268 | | -{ |
---|
4269 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
4270 | | - struct btrfs_space_info *data_sinfo; |
---|
4271 | | - |
---|
4272 | | - /* Make sure the range is aligned to sectorsize */ |
---|
4273 | | - len = round_up(start + len, fs_info->sectorsize) - |
---|
4274 | | - round_down(start, fs_info->sectorsize); |
---|
4275 | | - start = round_down(start, fs_info->sectorsize); |
---|
4276 | | - |
---|
4277 | | - data_sinfo = fs_info->data_sinfo; |
---|
4278 | | - spin_lock(&data_sinfo->lock); |
---|
4279 | | - if (WARN_ON(data_sinfo->bytes_may_use < len)) |
---|
4280 | | - data_sinfo->bytes_may_use = 0; |
---|
4281 | | - else |
---|
4282 | | - data_sinfo->bytes_may_use -= len; |
---|
4283 | | - trace_btrfs_space_reservation(fs_info, "space_info", |
---|
4284 | | - data_sinfo->flags, len, 0); |
---|
4285 | | - spin_unlock(&data_sinfo->lock); |
---|
4286 | | -} |
---|
4287 | | - |
---|
4288 | | -/* |
---|
4289 | | - * Called if we need to clear a data reservation for this inode |
---|
4290 | | - * Normally in a error case. |
---|
4291 | | - * |
---|
4292 | | - * This one will handle the per-inode data rsv map for accurate reserved |
---|
4293 | | - * space framework. |
---|
4294 | | - */ |
---|
4295 | | -void btrfs_free_reserved_data_space(struct inode *inode, |
---|
4296 | | - struct extent_changeset *reserved, u64 start, u64 len) |
---|
4297 | | -{ |
---|
4298 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
4299 | | - |
---|
4300 | | - /* Make sure the range is aligned to sectorsize */ |
---|
4301 | | - len = round_up(start + len, root->fs_info->sectorsize) - |
---|
4302 | | - round_down(start, root->fs_info->sectorsize); |
---|
4303 | | - start = round_down(start, root->fs_info->sectorsize); |
---|
4304 | | - |
---|
4305 | | - btrfs_free_reserved_data_space_noquota(inode, start, len); |
---|
4306 | | - btrfs_qgroup_free_data(inode, reserved, start, len); |
---|
4307 | | -} |
---|
4308 | | - |
---|
4309 | | -static void force_metadata_allocation(struct btrfs_fs_info *info) |
---|
4310 | | -{ |
---|
4311 | | - struct list_head *head = &info->space_info; |
---|
4312 | | - struct btrfs_space_info *found; |
---|
4313 | | - |
---|
4314 | | - rcu_read_lock(); |
---|
4315 | | - list_for_each_entry_rcu(found, head, list) { |
---|
4316 | | - if (found->flags & BTRFS_BLOCK_GROUP_METADATA) |
---|
4317 | | - found->force_alloc = CHUNK_ALLOC_FORCE; |
---|
4318 | | - } |
---|
4319 | | - rcu_read_unlock(); |
---|
4320 | | -} |
---|
4321 | | - |
---|
4322 | | -static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global) |
---|
4323 | | -{ |
---|
4324 | | - return (global->size << 1); |
---|
4325 | | -} |
---|
4326 | | - |
---|
4327 | | -static int should_alloc_chunk(struct btrfs_fs_info *fs_info, |
---|
4328 | | - struct btrfs_space_info *sinfo, int force) |
---|
4329 | | -{ |
---|
4330 | | - struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; |
---|
4331 | | - u64 bytes_used = btrfs_space_info_used(sinfo, false); |
---|
4332 | | - u64 thresh; |
---|
4333 | | - |
---|
4334 | | - if (force == CHUNK_ALLOC_FORCE) |
---|
4335 | | - return 1; |
---|
4336 | | - |
---|
4337 | | - /* |
---|
4338 | | - * We need to take into account the global rsv because for all intents |
---|
4339 | | - * and purposes it's used space. Don't worry about locking the |
---|
4340 | | - * global_rsv, it doesn't change except when the transaction commits. |
---|
4341 | | - */ |
---|
4342 | | - if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA) |
---|
4343 | | - bytes_used += calc_global_rsv_need_space(global_rsv); |
---|
4344 | | - |
---|
4345 | | - /* |
---|
4346 | | - * in limited mode, we want to have some free space up to |
---|
4347 | | - * about 1% of the FS size. |
---|
4348 | | - */ |
---|
4349 | | - if (force == CHUNK_ALLOC_LIMITED) { |
---|
4350 | | - thresh = btrfs_super_total_bytes(fs_info->super_copy); |
---|
4351 | | - thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1)); |
---|
4352 | | - |
---|
4353 | | - if (sinfo->total_bytes - bytes_used < thresh) |
---|
4354 | | - return 1; |
---|
4355 | | - } |
---|
4356 | | - |
---|
4357 | | - if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8)) |
---|
4358 | | - return 0; |
---|
4359 | | - return 1; |
---|
4360 | | -} |
---|
4361 | | - |
---|
4362 | | -static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type) |
---|
4363 | | -{ |
---|
4364 | | - u64 num_dev; |
---|
4365 | | - |
---|
4366 | | - if (type & (BTRFS_BLOCK_GROUP_RAID10 | |
---|
4367 | | - BTRFS_BLOCK_GROUP_RAID0 | |
---|
4368 | | - BTRFS_BLOCK_GROUP_RAID5 | |
---|
4369 | | - BTRFS_BLOCK_GROUP_RAID6)) |
---|
4370 | | - num_dev = fs_info->fs_devices->rw_devices; |
---|
4371 | | - else if (type & BTRFS_BLOCK_GROUP_RAID1) |
---|
4372 | | - num_dev = 2; |
---|
4373 | | - else |
---|
4374 | | - num_dev = 1; /* DUP or single */ |
---|
4375 | | - |
---|
4376 | | - return num_dev; |
---|
4377 | | -} |
---|
4378 | | - |
---|
4379 | | -/* |
---|
4380 | | - * If @is_allocation is true, reserve space in the system space info necessary |
---|
4381 | | - * for allocating a chunk, otherwise if it's false, reserve space necessary for |
---|
4382 | | - * removing a chunk. |
---|
4383 | | - */ |
---|
4384 | | -void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) |
---|
4385 | | -{ |
---|
4386 | | - struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
4387 | | - struct btrfs_space_info *info; |
---|
4388 | | - u64 left; |
---|
4389 | | - u64 thresh; |
---|
4390 | | - int ret = 0; |
---|
4391 | | - u64 num_devs; |
---|
4392 | | - |
---|
4393 | | - /* |
---|
4394 | | - * Needed because we can end up allocating a system chunk and for an |
---|
4395 | | - * atomic and race free space reservation in the chunk block reserve. |
---|
4396 | | - */ |
---|
4397 | | - lockdep_assert_held(&fs_info->chunk_mutex); |
---|
4398 | | - |
---|
4399 | | - info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); |
---|
4400 | | - spin_lock(&info->lock); |
---|
4401 | | - left = info->total_bytes - btrfs_space_info_used(info, true); |
---|
4402 | | - spin_unlock(&info->lock); |
---|
4403 | | - |
---|
4404 | | - num_devs = get_profile_num_devs(fs_info, type); |
---|
4405 | | - |
---|
4406 | | - /* num_devs device items to update and 1 chunk item to add or remove */ |
---|
4407 | | - thresh = btrfs_calc_trunc_metadata_size(fs_info, num_devs) + |
---|
4408 | | - btrfs_calc_trans_metadata_size(fs_info, 1); |
---|
4409 | | - |
---|
4410 | | - if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { |
---|
4411 | | - btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu", |
---|
4412 | | - left, thresh, type); |
---|
4413 | | - dump_space_info(fs_info, info, 0, 0); |
---|
4414 | | - } |
---|
4415 | | - |
---|
4416 | | - if (left < thresh) { |
---|
4417 | | - u64 flags = btrfs_system_alloc_profile(fs_info); |
---|
4418 | | - |
---|
4419 | | - /* |
---|
4420 | | - * Ignore failure to create system chunk. We might end up not |
---|
4421 | | - * needing it, as we might not need to COW all nodes/leafs from |
---|
4422 | | - * the paths we visit in the chunk tree (they were already COWed |
---|
4423 | | - * or created in the current transaction for example). |
---|
4424 | | - */ |
---|
4425 | | - ret = btrfs_alloc_chunk(trans, flags); |
---|
4426 | | - } |
---|
4427 | | - |
---|
4428 | | - if (!ret) { |
---|
4429 | | - ret = btrfs_block_rsv_add(fs_info->chunk_root, |
---|
4430 | | - &fs_info->chunk_block_rsv, |
---|
4431 | | - thresh, BTRFS_RESERVE_NO_FLUSH); |
---|
4432 | | - if (!ret) |
---|
4433 | | - trans->chunk_bytes_reserved += thresh; |
---|
4434 | | - } |
---|
4435 | | -} |
---|
4436 | | - |
---|
4437 | | -/* |
---|
4438 | | - * If force is CHUNK_ALLOC_FORCE: |
---|
4439 | | - * - return 1 if it successfully allocates a chunk, |
---|
4440 | | - * - return errors including -ENOSPC otherwise. |
---|
4441 | | - * If force is NOT CHUNK_ALLOC_FORCE: |
---|
4442 | | - * - return 0 if it doesn't need to allocate a new chunk, |
---|
4443 | | - * - return 1 if it successfully allocates a chunk, |
---|
4444 | | - * - return errors including -ENOSPC otherwise. |
---|
4445 | | - */ |
---|
4446 | | -static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, |
---|
4447 | | - int force) |
---|
4448 | | -{ |
---|
4449 | | - struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
4450 | | - struct btrfs_space_info *space_info; |
---|
4451 | | - bool wait_for_alloc = false; |
---|
4452 | | - bool should_alloc = false; |
---|
4453 | | - int ret = 0; |
---|
4454 | | - |
---|
4455 | | - /* Don't re-enter if we're already allocating a chunk */ |
---|
4456 | | - if (trans->allocating_chunk) |
---|
4457 | | - return -ENOSPC; |
---|
4458 | | - |
---|
4459 | | - space_info = __find_space_info(fs_info, flags); |
---|
4460 | | - ASSERT(space_info); |
---|
4461 | | - |
---|
4462 | | - do { |
---|
4463 | | - spin_lock(&space_info->lock); |
---|
4464 | | - if (force < space_info->force_alloc) |
---|
4465 | | - force = space_info->force_alloc; |
---|
4466 | | - should_alloc = should_alloc_chunk(fs_info, space_info, force); |
---|
4467 | | - if (space_info->full) { |
---|
4468 | | - /* No more free physical space */ |
---|
4469 | | - if (should_alloc) |
---|
4470 | | - ret = -ENOSPC; |
---|
4471 | | - else |
---|
4472 | | - ret = 0; |
---|
4473 | | - spin_unlock(&space_info->lock); |
---|
4474 | | - return ret; |
---|
4475 | | - } else if (!should_alloc) { |
---|
4476 | | - spin_unlock(&space_info->lock); |
---|
4477 | | - return 0; |
---|
4478 | | - } else if (space_info->chunk_alloc) { |
---|
4479 | | - /* |
---|
4480 | | - * Someone is already allocating, so we need to block |
---|
4481 | | - * until this someone is finished and then loop to |
---|
4482 | | - * recheck if we should continue with our allocation |
---|
4483 | | - * attempt. |
---|
4484 | | - */ |
---|
4485 | | - wait_for_alloc = true; |
---|
4486 | | - spin_unlock(&space_info->lock); |
---|
4487 | | - mutex_lock(&fs_info->chunk_mutex); |
---|
4488 | | - mutex_unlock(&fs_info->chunk_mutex); |
---|
4489 | | - } else { |
---|
4490 | | - /* Proceed with allocation */ |
---|
4491 | | - space_info->chunk_alloc = 1; |
---|
4492 | | - wait_for_alloc = false; |
---|
4493 | | - spin_unlock(&space_info->lock); |
---|
4494 | | - } |
---|
4495 | | - |
---|
4496 | | - cond_resched(); |
---|
4497 | | - } while (wait_for_alloc); |
---|
4498 | | - |
---|
4499 | | - mutex_lock(&fs_info->chunk_mutex); |
---|
4500 | | - trans->allocating_chunk = true; |
---|
4501 | | - |
---|
4502 | | - /* |
---|
4503 | | - * If we have mixed data/metadata chunks we want to make sure we keep |
---|
4504 | | - * allocating mixed chunks instead of individual chunks. |
---|
4505 | | - */ |
---|
4506 | | - if (btrfs_mixed_space_info(space_info)) |
---|
4507 | | - flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA); |
---|
4508 | | - |
---|
4509 | | - /* |
---|
4510 | | - * if we're doing a data chunk, go ahead and make sure that |
---|
4511 | | - * we keep a reasonable number of metadata chunks allocated in the |
---|
4512 | | - * FS as well. |
---|
4513 | | - */ |
---|
4514 | | - if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { |
---|
4515 | | - fs_info->data_chunk_allocations++; |
---|
4516 | | - if (!(fs_info->data_chunk_allocations % |
---|
4517 | | - fs_info->metadata_ratio)) |
---|
4518 | | - force_metadata_allocation(fs_info); |
---|
4519 | | - } |
---|
4520 | | - |
---|
4521 | | - /* |
---|
4522 | | - * Check if we have enough space in SYSTEM chunk because we may need |
---|
4523 | | - * to update devices. |
---|
4524 | | - */ |
---|
4525 | | - check_system_chunk(trans, flags); |
---|
4526 | | - |
---|
4527 | | - ret = btrfs_alloc_chunk(trans, flags); |
---|
4528 | | - trans->allocating_chunk = false; |
---|
4529 | | - |
---|
4530 | | - spin_lock(&space_info->lock); |
---|
4531 | | - if (ret < 0) { |
---|
4532 | | - if (ret == -ENOSPC) |
---|
4533 | | - space_info->full = 1; |
---|
4534 | | - else |
---|
4535 | | - goto out; |
---|
4536 | | - } else { |
---|
4537 | | - ret = 1; |
---|
4538 | | - space_info->max_extent_size = 0; |
---|
4539 | | - } |
---|
4540 | | - |
---|
4541 | | - space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; |
---|
4542 | | -out: |
---|
4543 | | - space_info->chunk_alloc = 0; |
---|
4544 | | - spin_unlock(&space_info->lock); |
---|
4545 | | - mutex_unlock(&fs_info->chunk_mutex); |
---|
4546 | | - /* |
---|
4547 | | - * When we allocate a new chunk we reserve space in the chunk block |
---|
4548 | | - * reserve to make sure we can COW nodes/leafs in the chunk tree or |
---|
4549 | | - * add new nodes/leafs to it if we end up needing to do it when |
---|
4550 | | - * inserting the chunk item and updating device items as part of the |
---|
4551 | | - * second phase of chunk allocation, performed by |
---|
4552 | | - * btrfs_finish_chunk_alloc(). So make sure we don't accumulate a |
---|
4553 | | - * large number of new block groups to create in our transaction |
---|
4554 | | - * handle's new_bgs list to avoid exhausting the chunk block reserve |
---|
4555 | | - * in extreme cases - like having a single transaction create many new |
---|
4556 | | - * block groups when starting to write out the free space caches of all |
---|
4557 | | - * the block groups that were made dirty during the lifetime of the |
---|
4558 | | - * transaction. |
---|
4559 | | - */ |
---|
4560 | | - if (trans->chunk_bytes_reserved >= (u64)SZ_2M) |
---|
4561 | | - btrfs_create_pending_block_groups(trans); |
---|
4562 | | - |
---|
4563 | | - return ret; |
---|
4564 | | -} |
---|
4565 | | - |
---|
4566 | | -static int can_overcommit(struct btrfs_fs_info *fs_info, |
---|
4567 | | - struct btrfs_space_info *space_info, u64 bytes, |
---|
4568 | | - enum btrfs_reserve_flush_enum flush, |
---|
4569 | | - bool system_chunk) |
---|
4570 | | -{ |
---|
4571 | | - struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; |
---|
4572 | | - u64 profile; |
---|
4573 | | - u64 space_size; |
---|
4574 | | - u64 avail; |
---|
4575 | | - u64 used; |
---|
4576 | | - int factor; |
---|
4577 | | - |
---|
4578 | | - /* Don't overcommit when in mixed mode. */ |
---|
4579 | | - if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) |
---|
4580 | | - return 0; |
---|
4581 | | - |
---|
4582 | | - if (system_chunk) |
---|
4583 | | - profile = btrfs_system_alloc_profile(fs_info); |
---|
4584 | | - else |
---|
4585 | | - profile = btrfs_metadata_alloc_profile(fs_info); |
---|
4586 | | - |
---|
4587 | | - used = btrfs_space_info_used(space_info, false); |
---|
4588 | | - |
---|
4589 | | - /* |
---|
4590 | | - * We only want to allow over committing if we have lots of actual space |
---|
4591 | | - * free, but if we don't have enough space to handle the global reserve |
---|
4592 | | - * space then we could end up having a real enospc problem when trying |
---|
4593 | | - * to allocate a chunk or some other such important allocation. |
---|
4594 | | - */ |
---|
4595 | | - spin_lock(&global_rsv->lock); |
---|
4596 | | - space_size = calc_global_rsv_need_space(global_rsv); |
---|
4597 | | - spin_unlock(&global_rsv->lock); |
---|
4598 | | - if (used + space_size >= space_info->total_bytes) |
---|
4599 | | - return 0; |
---|
4600 | | - |
---|
4601 | | - used += space_info->bytes_may_use; |
---|
4602 | | - |
---|
4603 | | - avail = atomic64_read(&fs_info->free_chunk_space); |
---|
4604 | | - |
---|
4605 | | - /* |
---|
4606 | | - * If we have dup, raid1 or raid10 then only half of the free |
---|
4607 | | - * space is actually useable. For raid56, the space info used |
---|
4608 | | - * doesn't include the parity drive, so we don't have to |
---|
4609 | | - * change the math |
---|
4610 | | - */ |
---|
4611 | | - factor = btrfs_bg_type_to_factor(profile); |
---|
4612 | | - avail = div_u64(avail, factor); |
---|
4613 | | - |
---|
4614 | | - /* |
---|
4615 | | - * If we aren't flushing all things, let us overcommit up to |
---|
4616 | | - * 1/2th of the space. If we can flush, don't let us overcommit |
---|
4617 | | - * too much, let it overcommit up to 1/8 of the space. |
---|
4618 | | - */ |
---|
4619 | | - if (flush == BTRFS_RESERVE_FLUSH_ALL) |
---|
4620 | | - avail >>= 3; |
---|
4621 | | - else |
---|
4622 | | - avail >>= 1; |
---|
4623 | | - |
---|
4624 | | - if (used + bytes < space_info->total_bytes + avail) |
---|
4625 | | - return 1; |
---|
4626 | | - return 0; |
---|
4627 | | -} |
---|
4628 | | - |
---|
4629 | | -static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info, |
---|
4630 | | - unsigned long nr_pages, int nr_items) |
---|
4631 | | -{ |
---|
4632 | | - struct super_block *sb = fs_info->sb; |
---|
4633 | | - |
---|
4634 | | - if (down_read_trylock(&sb->s_umount)) { |
---|
4635 | | - writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE); |
---|
4636 | | - up_read(&sb->s_umount); |
---|
4637 | | - } else { |
---|
4638 | | - /* |
---|
4639 | | - * We needn't worry the filesystem going from r/w to r/o though |
---|
4640 | | - * we don't acquire ->s_umount mutex, because the filesystem |
---|
4641 | | - * should guarantee the delalloc inodes list be empty after |
---|
4642 | | - * the filesystem is readonly(all dirty pages are written to |
---|
4643 | | - * the disk). |
---|
4644 | | - */ |
---|
4645 | | - btrfs_start_delalloc_roots(fs_info, nr_items); |
---|
4646 | | - if (!current->journal_info) |
---|
4647 | | - btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1); |
---|
4648 | | - } |
---|
4649 | | -} |
---|
4650 | | - |
---|
4651 | | -static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info, |
---|
4652 | | - u64 to_reclaim) |
---|
4653 | | -{ |
---|
4654 | | - u64 bytes; |
---|
4655 | | - u64 nr; |
---|
4656 | | - |
---|
4657 | | - bytes = btrfs_calc_trans_metadata_size(fs_info, 1); |
---|
4658 | | - nr = div64_u64(to_reclaim, bytes); |
---|
4659 | | - if (!nr) |
---|
4660 | | - nr = 1; |
---|
4661 | | - return nr; |
---|
4662 | | -} |
---|
4663 | | - |
---|
4664 | | -#define EXTENT_SIZE_PER_ITEM SZ_256K |
---|
4665 | | - |
---|
4666 | | -/* |
---|
4667 | | - * shrink metadata reservation for delalloc |
---|
4668 | | - */ |
---|
4669 | | -static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, |
---|
4670 | | - u64 orig, bool wait_ordered) |
---|
4671 | | -{ |
---|
4672 | | - struct btrfs_space_info *space_info; |
---|
4673 | | - struct btrfs_trans_handle *trans; |
---|
4674 | | - u64 delalloc_bytes; |
---|
4675 | | - u64 max_reclaim; |
---|
4676 | | - u64 items; |
---|
4677 | | - long time_left; |
---|
4678 | | - unsigned long nr_pages; |
---|
4679 | | - int loops; |
---|
4680 | | - |
---|
4681 | | - /* Calc the number of the pages we need flush for space reservation */ |
---|
4682 | | - items = calc_reclaim_items_nr(fs_info, to_reclaim); |
---|
4683 | | - to_reclaim = items * EXTENT_SIZE_PER_ITEM; |
---|
4684 | | - |
---|
4685 | | - trans = (struct btrfs_trans_handle *)current->journal_info; |
---|
4686 | | - space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); |
---|
4687 | | - |
---|
4688 | | - delalloc_bytes = percpu_counter_sum_positive( |
---|
4689 | | - &fs_info->delalloc_bytes); |
---|
4690 | | - if (delalloc_bytes == 0) { |
---|
4691 | | - if (trans) |
---|
4692 | | - return; |
---|
4693 | | - if (wait_ordered) |
---|
4694 | | - btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1); |
---|
4695 | | - return; |
---|
4696 | | - } |
---|
4697 | | - |
---|
4698 | | - loops = 0; |
---|
4699 | | - while (delalloc_bytes && loops < 3) { |
---|
4700 | | - max_reclaim = min(delalloc_bytes, to_reclaim); |
---|
4701 | | - nr_pages = max_reclaim >> PAGE_SHIFT; |
---|
4702 | | - btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items); |
---|
4703 | | - /* |
---|
4704 | | - * We need to wait for the async pages to actually start before |
---|
4705 | | - * we do anything. |
---|
4706 | | - */ |
---|
4707 | | - max_reclaim = atomic_read(&fs_info->async_delalloc_pages); |
---|
4708 | | - if (!max_reclaim) |
---|
4709 | | - goto skip_async; |
---|
4710 | | - |
---|
4711 | | - if (max_reclaim <= nr_pages) |
---|
4712 | | - max_reclaim = 0; |
---|
4713 | | - else |
---|
4714 | | - max_reclaim -= nr_pages; |
---|
4715 | | - |
---|
4716 | | - wait_event(fs_info->async_submit_wait, |
---|
4717 | | - atomic_read(&fs_info->async_delalloc_pages) <= |
---|
4718 | | - (int)max_reclaim); |
---|
4719 | | -skip_async: |
---|
4720 | | - spin_lock(&space_info->lock); |
---|
4721 | | - if (list_empty(&space_info->tickets) && |
---|
4722 | | - list_empty(&space_info->priority_tickets)) { |
---|
4723 | | - spin_unlock(&space_info->lock); |
---|
4724 | | - break; |
---|
4725 | | - } |
---|
4726 | | - spin_unlock(&space_info->lock); |
---|
4727 | | - |
---|
4728 | | - loops++; |
---|
4729 | | - if (wait_ordered && !trans) { |
---|
4730 | | - btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1); |
---|
4731 | | - } else { |
---|
4732 | | - time_left = schedule_timeout_killable(1); |
---|
4733 | | - if (time_left) |
---|
4734 | | - break; |
---|
4735 | | - } |
---|
4736 | | - delalloc_bytes = percpu_counter_sum_positive( |
---|
4737 | | - &fs_info->delalloc_bytes); |
---|
4738 | | - } |
---|
4739 | | -} |
---|
4740 | | - |
---|
4741 | | -struct reserve_ticket { |
---|
4742 | | - u64 bytes; |
---|
4743 | | - int error; |
---|
4744 | | - struct list_head list; |
---|
4745 | | - wait_queue_head_t wait; |
---|
4746 | | -}; |
---|
4747 | | - |
---|
4748 | | -/** |
---|
4749 | | - * maybe_commit_transaction - possibly commit the transaction if its ok to |
---|
4750 | | - * @root - the root we're allocating for |
---|
4751 | | - * @bytes - the number of bytes we want to reserve |
---|
4752 | | - * @force - force the commit |
---|
4753 | | - * |
---|
4754 | | - * This will check to make sure that committing the transaction will actually |
---|
4755 | | - * get us somewhere and then commit the transaction if it does. Otherwise it |
---|
4756 | | - * will return -ENOSPC. |
---|
4757 | | - */ |
---|
4758 | | -static int may_commit_transaction(struct btrfs_fs_info *fs_info, |
---|
4759 | | - struct btrfs_space_info *space_info) |
---|
4760 | | -{ |
---|
4761 | | - struct reserve_ticket *ticket = NULL; |
---|
4762 | | - struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv; |
---|
4763 | | - struct btrfs_trans_handle *trans; |
---|
4764 | | - u64 bytes; |
---|
4765 | | - |
---|
4766 | | - trans = (struct btrfs_trans_handle *)current->journal_info; |
---|
4767 | | - if (trans) |
---|
4768 | | - return -EAGAIN; |
---|
4769 | | - |
---|
4770 | | - spin_lock(&space_info->lock); |
---|
4771 | | - if (!list_empty(&space_info->priority_tickets)) |
---|
4772 | | - ticket = list_first_entry(&space_info->priority_tickets, |
---|
4773 | | - struct reserve_ticket, list); |
---|
4774 | | - else if (!list_empty(&space_info->tickets)) |
---|
4775 | | - ticket = list_first_entry(&space_info->tickets, |
---|
4776 | | - struct reserve_ticket, list); |
---|
4777 | | - bytes = (ticket) ? ticket->bytes : 0; |
---|
4778 | | - spin_unlock(&space_info->lock); |
---|
4779 | | - |
---|
4780 | | - if (!bytes) |
---|
4781 | | - return 0; |
---|
4782 | | - |
---|
4783 | | - /* See if there is enough pinned space to make this reservation */ |
---|
4784 | | - if (__percpu_counter_compare(&space_info->total_bytes_pinned, |
---|
4785 | | - bytes, |
---|
4786 | | - BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0) |
---|
4787 | | - goto commit; |
---|
4788 | | - |
---|
4789 | | - /* |
---|
4790 | | - * See if there is some space in the delayed insertion reservation for |
---|
4791 | | - * this reservation. |
---|
4792 | | - */ |
---|
4793 | | - if (space_info != delayed_rsv->space_info) |
---|
4794 | | - return -ENOSPC; |
---|
4795 | | - |
---|
4796 | | - spin_lock(&delayed_rsv->lock); |
---|
4797 | | - if (delayed_rsv->size > bytes) |
---|
4798 | | - bytes = 0; |
---|
4799 | | - else |
---|
4800 | | - bytes -= delayed_rsv->size; |
---|
4801 | | - spin_unlock(&delayed_rsv->lock); |
---|
4802 | | - |
---|
4803 | | - if (__percpu_counter_compare(&space_info->total_bytes_pinned, |
---|
4804 | | - bytes, |
---|
4805 | | - BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0) { |
---|
4806 | | - return -ENOSPC; |
---|
4807 | | - } |
---|
4808 | | - |
---|
4809 | | -commit: |
---|
4810 | | - trans = btrfs_join_transaction(fs_info->extent_root); |
---|
4811 | | - if (IS_ERR(trans)) |
---|
4812 | | - return -ENOSPC; |
---|
4813 | | - |
---|
4814 | | - return btrfs_commit_transaction(trans); |
---|
4815 | | -} |
---|
4816 | | - |
---|
4817 | | -/* |
---|
4818 | | - * Try to flush some data based on policy set by @state. This is only advisory |
---|
4819 | | - * and may fail for various reasons. The caller is supposed to examine the |
---|
4820 | | - * state of @space_info to detect the outcome. |
---|
4821 | | - */ |
---|
4822 | | -static void flush_space(struct btrfs_fs_info *fs_info, |
---|
4823 | | - struct btrfs_space_info *space_info, u64 num_bytes, |
---|
4824 | | - int state) |
---|
4825 | | -{ |
---|
4826 | | - struct btrfs_root *root = fs_info->extent_root; |
---|
4827 | | - struct btrfs_trans_handle *trans; |
---|
4828 | | - int nr; |
---|
4829 | | - int ret = 0; |
---|
4830 | | - |
---|
4831 | | - switch (state) { |
---|
4832 | | - case FLUSH_DELAYED_ITEMS_NR: |
---|
4833 | | - case FLUSH_DELAYED_ITEMS: |
---|
4834 | | - if (state == FLUSH_DELAYED_ITEMS_NR) |
---|
4835 | | - nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2; |
---|
4836 | | - else |
---|
4837 | | - nr = -1; |
---|
4838 | | - |
---|
4839 | | - trans = btrfs_join_transaction(root); |
---|
4840 | | - if (IS_ERR(trans)) { |
---|
4841 | | - ret = PTR_ERR(trans); |
---|
4842 | | - break; |
---|
4843 | | - } |
---|
4844 | | - ret = btrfs_run_delayed_items_nr(trans, nr); |
---|
4845 | | - btrfs_end_transaction(trans); |
---|
4846 | | - break; |
---|
4847 | | - case FLUSH_DELALLOC: |
---|
4848 | | - case FLUSH_DELALLOC_WAIT: |
---|
4849 | | - shrink_delalloc(fs_info, num_bytes * 2, num_bytes, |
---|
4850 | | - state == FLUSH_DELALLOC_WAIT); |
---|
4851 | | - break; |
---|
4852 | | - case ALLOC_CHUNK: |
---|
4853 | | - trans = btrfs_join_transaction(root); |
---|
4854 | | - if (IS_ERR(trans)) { |
---|
4855 | | - ret = PTR_ERR(trans); |
---|
4856 | | - break; |
---|
4857 | | - } |
---|
4858 | | - ret = do_chunk_alloc(trans, |
---|
4859 | | - btrfs_metadata_alloc_profile(fs_info), |
---|
4860 | | - CHUNK_ALLOC_NO_FORCE); |
---|
4861 | | - btrfs_end_transaction(trans); |
---|
4862 | | - if (ret > 0 || ret == -ENOSPC) |
---|
4863 | | - ret = 0; |
---|
4864 | | - break; |
---|
4865 | | - case COMMIT_TRANS: |
---|
4866 | | - ret = may_commit_transaction(fs_info, space_info); |
---|
4867 | | - break; |
---|
4868 | | - default: |
---|
4869 | | - ret = -ENOSPC; |
---|
4870 | | - break; |
---|
4871 | | - } |
---|
4872 | | - |
---|
4873 | | - trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state, |
---|
4874 | | - ret); |
---|
4875 | | - return; |
---|
4876 | | -} |
---|
4877 | | - |
---|
4878 | | -static inline u64 |
---|
4879 | | -btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, |
---|
4880 | | - struct btrfs_space_info *space_info, |
---|
4881 | | - bool system_chunk) |
---|
4882 | | -{ |
---|
4883 | | - struct reserve_ticket *ticket; |
---|
4884 | | - u64 used; |
---|
4885 | | - u64 expected; |
---|
4886 | | - u64 to_reclaim = 0; |
---|
4887 | | - |
---|
4888 | | - list_for_each_entry(ticket, &space_info->tickets, list) |
---|
4889 | | - to_reclaim += ticket->bytes; |
---|
4890 | | - list_for_each_entry(ticket, &space_info->priority_tickets, list) |
---|
4891 | | - to_reclaim += ticket->bytes; |
---|
4892 | | - if (to_reclaim) |
---|
4893 | | - return to_reclaim; |
---|
4894 | | - |
---|
4895 | | - to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); |
---|
4896 | | - if (can_overcommit(fs_info, space_info, to_reclaim, |
---|
4897 | | - BTRFS_RESERVE_FLUSH_ALL, system_chunk)) |
---|
4898 | | - return 0; |
---|
4899 | | - |
---|
4900 | | - used = btrfs_space_info_used(space_info, true); |
---|
4901 | | - |
---|
4902 | | - if (can_overcommit(fs_info, space_info, SZ_1M, |
---|
4903 | | - BTRFS_RESERVE_FLUSH_ALL, system_chunk)) |
---|
4904 | | - expected = div_factor_fine(space_info->total_bytes, 95); |
---|
4905 | | - else |
---|
4906 | | - expected = div_factor_fine(space_info->total_bytes, 90); |
---|
4907 | | - |
---|
4908 | | - if (used > expected) |
---|
4909 | | - to_reclaim = used - expected; |
---|
4910 | | - else |
---|
4911 | | - to_reclaim = 0; |
---|
4912 | | - to_reclaim = min(to_reclaim, space_info->bytes_may_use + |
---|
4913 | | - space_info->bytes_reserved); |
---|
4914 | | - return to_reclaim; |
---|
4915 | | -} |
---|
4916 | | - |
---|
4917 | | -static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info, |
---|
4918 | | - struct btrfs_space_info *space_info, |
---|
4919 | | - u64 used, bool system_chunk) |
---|
4920 | | -{ |
---|
4921 | | - u64 thresh = div_factor_fine(space_info->total_bytes, 98); |
---|
4922 | | - |
---|
4923 | | - /* If we're just plain full then async reclaim just slows us down. */ |
---|
4924 | | - if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh) |
---|
4925 | | - return 0; |
---|
4926 | | - |
---|
4927 | | - if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info, |
---|
4928 | | - system_chunk)) |
---|
4929 | | - return 0; |
---|
4930 | | - |
---|
4931 | | - return (used >= thresh && !btrfs_fs_closing(fs_info) && |
---|
4932 | | - !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); |
---|
4933 | | -} |
---|
4934 | | - |
---|
4935 | | -static void wake_all_tickets(struct list_head *head) |
---|
4936 | | -{ |
---|
4937 | | - struct reserve_ticket *ticket; |
---|
4938 | | - |
---|
4939 | | - while (!list_empty(head)) { |
---|
4940 | | - ticket = list_first_entry(head, struct reserve_ticket, list); |
---|
4941 | | - list_del_init(&ticket->list); |
---|
4942 | | - ticket->error = -ENOSPC; |
---|
4943 | | - wake_up(&ticket->wait); |
---|
4944 | | - } |
---|
4945 | | -} |
---|
4946 | | - |
---|
4947 | | -/* |
---|
4948 | | - * This is for normal flushers, we can wait all goddamned day if we want to. We |
---|
4949 | | - * will loop and continuously try to flush as long as we are making progress. |
---|
4950 | | - * We count progress as clearing off tickets each time we have to loop. |
---|
4951 | | - */ |
---|
4952 | | -static void btrfs_async_reclaim_metadata_space(struct work_struct *work) |
---|
4953 | | -{ |
---|
4954 | | - struct btrfs_fs_info *fs_info; |
---|
4955 | | - struct btrfs_space_info *space_info; |
---|
4956 | | - u64 to_reclaim; |
---|
4957 | | - int flush_state; |
---|
4958 | | - int commit_cycles = 0; |
---|
4959 | | - u64 last_tickets_id; |
---|
4960 | | - |
---|
4961 | | - fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); |
---|
4962 | | - space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); |
---|
4963 | | - |
---|
4964 | | - spin_lock(&space_info->lock); |
---|
4965 | | - to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info, |
---|
4966 | | - false); |
---|
4967 | | - if (!to_reclaim) { |
---|
4968 | | - space_info->flush = 0; |
---|
4969 | | - spin_unlock(&space_info->lock); |
---|
4970 | | - return; |
---|
4971 | | - } |
---|
4972 | | - last_tickets_id = space_info->tickets_id; |
---|
4973 | | - spin_unlock(&space_info->lock); |
---|
4974 | | - |
---|
4975 | | - flush_state = FLUSH_DELAYED_ITEMS_NR; |
---|
4976 | | - do { |
---|
4977 | | - flush_space(fs_info, space_info, to_reclaim, flush_state); |
---|
4978 | | - spin_lock(&space_info->lock); |
---|
4979 | | - if (list_empty(&space_info->tickets)) { |
---|
4980 | | - space_info->flush = 0; |
---|
4981 | | - spin_unlock(&space_info->lock); |
---|
4982 | | - return; |
---|
4983 | | - } |
---|
4984 | | - to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, |
---|
4985 | | - space_info, |
---|
4986 | | - false); |
---|
4987 | | - if (last_tickets_id == space_info->tickets_id) { |
---|
4988 | | - flush_state++; |
---|
4989 | | - } else { |
---|
4990 | | - last_tickets_id = space_info->tickets_id; |
---|
4991 | | - flush_state = FLUSH_DELAYED_ITEMS_NR; |
---|
4992 | | - if (commit_cycles) |
---|
4993 | | - commit_cycles--; |
---|
4994 | | - } |
---|
4995 | | - |
---|
4996 | | - if (flush_state > COMMIT_TRANS) { |
---|
4997 | | - commit_cycles++; |
---|
4998 | | - if (commit_cycles > 2) { |
---|
4999 | | - wake_all_tickets(&space_info->tickets); |
---|
5000 | | - space_info->flush = 0; |
---|
5001 | | - } else { |
---|
5002 | | - flush_state = FLUSH_DELAYED_ITEMS_NR; |
---|
5003 | | - } |
---|
5004 | | - } |
---|
5005 | | - spin_unlock(&space_info->lock); |
---|
5006 | | - } while (flush_state <= COMMIT_TRANS); |
---|
5007 | | -} |
---|
5008 | | - |
---|
5009 | | -void btrfs_init_async_reclaim_work(struct work_struct *work) |
---|
5010 | | -{ |
---|
5011 | | - INIT_WORK(work, btrfs_async_reclaim_metadata_space); |
---|
5012 | | -} |
---|
5013 | | - |
---|
5014 | | -static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info, |
---|
5015 | | - struct btrfs_space_info *space_info, |
---|
5016 | | - struct reserve_ticket *ticket) |
---|
5017 | | -{ |
---|
5018 | | - u64 to_reclaim; |
---|
5019 | | - int flush_state = FLUSH_DELAYED_ITEMS_NR; |
---|
5020 | | - |
---|
5021 | | - spin_lock(&space_info->lock); |
---|
5022 | | - to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info, |
---|
5023 | | - false); |
---|
5024 | | - if (!to_reclaim) { |
---|
5025 | | - spin_unlock(&space_info->lock); |
---|
5026 | | - return; |
---|
5027 | | - } |
---|
5028 | | - spin_unlock(&space_info->lock); |
---|
5029 | | - |
---|
5030 | | - do { |
---|
5031 | | - flush_space(fs_info, space_info, to_reclaim, flush_state); |
---|
5032 | | - flush_state++; |
---|
5033 | | - spin_lock(&space_info->lock); |
---|
5034 | | - if (ticket->bytes == 0) { |
---|
5035 | | - spin_unlock(&space_info->lock); |
---|
5036 | | - return; |
---|
5037 | | - } |
---|
5038 | | - spin_unlock(&space_info->lock); |
---|
5039 | | - |
---|
5040 | | - /* |
---|
5041 | | - * Priority flushers can't wait on delalloc without |
---|
5042 | | - * deadlocking. |
---|
5043 | | - */ |
---|
5044 | | - if (flush_state == FLUSH_DELALLOC || |
---|
5045 | | - flush_state == FLUSH_DELALLOC_WAIT) |
---|
5046 | | - flush_state = ALLOC_CHUNK; |
---|
5047 | | - } while (flush_state < COMMIT_TRANS); |
---|
5048 | | -} |
---|
5049 | | - |
---|
5050 | | -static int wait_reserve_ticket(struct btrfs_fs_info *fs_info, |
---|
5051 | | - struct btrfs_space_info *space_info, |
---|
5052 | | - struct reserve_ticket *ticket, u64 orig_bytes) |
---|
5053 | | - |
---|
5054 | | -{ |
---|
5055 | | - DEFINE_WAIT(wait); |
---|
5056 | | - int ret = 0; |
---|
5057 | | - |
---|
5058 | | - spin_lock(&space_info->lock); |
---|
5059 | | - while (ticket->bytes > 0 && ticket->error == 0) { |
---|
5060 | | - ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE); |
---|
5061 | | - if (ret) { |
---|
5062 | | - ret = -EINTR; |
---|
5063 | | - break; |
---|
5064 | | - } |
---|
5065 | | - spin_unlock(&space_info->lock); |
---|
5066 | | - |
---|
5067 | | - schedule(); |
---|
5068 | | - |
---|
5069 | | - finish_wait(&ticket->wait, &wait); |
---|
5070 | | - spin_lock(&space_info->lock); |
---|
5071 | | - } |
---|
5072 | | - if (!ret) |
---|
5073 | | - ret = ticket->error; |
---|
5074 | | - if (!list_empty(&ticket->list)) |
---|
5075 | | - list_del_init(&ticket->list); |
---|
5076 | | - if (ticket->bytes && ticket->bytes < orig_bytes) { |
---|
5077 | | - u64 num_bytes = orig_bytes - ticket->bytes; |
---|
5078 | | - space_info->bytes_may_use -= num_bytes; |
---|
5079 | | - trace_btrfs_space_reservation(fs_info, "space_info", |
---|
5080 | | - space_info->flags, num_bytes, 0); |
---|
5081 | | - } |
---|
5082 | | - spin_unlock(&space_info->lock); |
---|
5083 | | - |
---|
5084 | | - return ret; |
---|
5085 | | -} |
---|
5086 | | - |
---|
5087 | | -/** |
---|
5088 | | - * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space |
---|
5089 | | - * @root - the root we're allocating for |
---|
5090 | | - * @space_info - the space info we want to allocate from |
---|
5091 | | - * @orig_bytes - the number of bytes we want |
---|
5092 | | - * @flush - whether or not we can flush to make our reservation |
---|
5093 | | - * |
---|
5094 | | - * This will reserve orig_bytes number of bytes from the space info associated |
---|
5095 | | - * with the block_rsv. If there is not enough space it will make an attempt to |
---|
5096 | | - * flush out space to make room. It will do this by flushing delalloc if |
---|
5097 | | - * possible or committing the transaction. If flush is 0 then no attempts to |
---|
5098 | | - * regain reservations will be made and this will fail if there is not enough |
---|
5099 | | - * space already. |
---|
5100 | | - */ |
---|
5101 | | -static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info, |
---|
5102 | | - struct btrfs_space_info *space_info, |
---|
5103 | | - u64 orig_bytes, |
---|
5104 | | - enum btrfs_reserve_flush_enum flush, |
---|
5105 | | - bool system_chunk) |
---|
5106 | | -{ |
---|
5107 | | - struct reserve_ticket ticket; |
---|
5108 | | - u64 used; |
---|
5109 | | - int ret = 0; |
---|
5110 | | - |
---|
5111 | | - ASSERT(orig_bytes); |
---|
5112 | | - ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL); |
---|
5113 | | - |
---|
5114 | | - spin_lock(&space_info->lock); |
---|
5115 | | - ret = -ENOSPC; |
---|
5116 | | - used = btrfs_space_info_used(space_info, true); |
---|
5117 | | - |
---|
5118 | | - /* |
---|
5119 | | - * If we have enough space then hooray, make our reservation and carry |
---|
5120 | | - * on. If not see if we can overcommit, and if we can, hooray carry on. |
---|
5121 | | - * If not things get more complicated. |
---|
5122 | | - */ |
---|
5123 | | - if (used + orig_bytes <= space_info->total_bytes) { |
---|
5124 | | - space_info->bytes_may_use += orig_bytes; |
---|
5125 | | - trace_btrfs_space_reservation(fs_info, "space_info", |
---|
5126 | | - space_info->flags, orig_bytes, 1); |
---|
5127 | | - ret = 0; |
---|
5128 | | - } else if (can_overcommit(fs_info, space_info, orig_bytes, flush, |
---|
5129 | | - system_chunk)) { |
---|
5130 | | - space_info->bytes_may_use += orig_bytes; |
---|
5131 | | - trace_btrfs_space_reservation(fs_info, "space_info", |
---|
5132 | | - space_info->flags, orig_bytes, 1); |
---|
5133 | | - ret = 0; |
---|
5134 | | - } |
---|
5135 | | - |
---|
5136 | | - /* |
---|
5137 | | - * If we couldn't make a reservation then setup our reservation ticket |
---|
5138 | | - * and kick the async worker if it's not already running. |
---|
5139 | | - * |
---|
5140 | | - * If we are a priority flusher then we just need to add our ticket to |
---|
5141 | | - * the list and we will do our own flushing further down. |
---|
5142 | | - */ |
---|
5143 | | - if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { |
---|
5144 | | - ticket.bytes = orig_bytes; |
---|
5145 | | - ticket.error = 0; |
---|
5146 | | - init_waitqueue_head(&ticket.wait); |
---|
5147 | | - if (flush == BTRFS_RESERVE_FLUSH_ALL) { |
---|
5148 | | - list_add_tail(&ticket.list, &space_info->tickets); |
---|
5149 | | - if (!space_info->flush) { |
---|
5150 | | - space_info->flush = 1; |
---|
5151 | | - trace_btrfs_trigger_flush(fs_info, |
---|
5152 | | - space_info->flags, |
---|
5153 | | - orig_bytes, flush, |
---|
5154 | | - "enospc"); |
---|
5155 | | - queue_work(system_unbound_wq, |
---|
5156 | | - &fs_info->async_reclaim_work); |
---|
5157 | | - } |
---|
5158 | | - } else { |
---|
5159 | | - list_add_tail(&ticket.list, |
---|
5160 | | - &space_info->priority_tickets); |
---|
5161 | | - } |
---|
5162 | | - } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { |
---|
5163 | | - used += orig_bytes; |
---|
5164 | | - /* |
---|
5165 | | - * We will do the space reservation dance during log replay, |
---|
5166 | | - * which means we won't have fs_info->fs_root set, so don't do |
---|
5167 | | - * the async reclaim as we will panic. |
---|
5168 | | - */ |
---|
5169 | | - if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) && |
---|
5170 | | - need_do_async_reclaim(fs_info, space_info, |
---|
5171 | | - used, system_chunk) && |
---|
5172 | | - !work_busy(&fs_info->async_reclaim_work)) { |
---|
5173 | | - trace_btrfs_trigger_flush(fs_info, space_info->flags, |
---|
5174 | | - orig_bytes, flush, "preempt"); |
---|
5175 | | - queue_work(system_unbound_wq, |
---|
5176 | | - &fs_info->async_reclaim_work); |
---|
5177 | | - } |
---|
5178 | | - } |
---|
5179 | | - spin_unlock(&space_info->lock); |
---|
5180 | | - if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) |
---|
5181 | | - return ret; |
---|
5182 | | - |
---|
5183 | | - if (flush == BTRFS_RESERVE_FLUSH_ALL) |
---|
5184 | | - return wait_reserve_ticket(fs_info, space_info, &ticket, |
---|
5185 | | - orig_bytes); |
---|
5186 | | - |
---|
5187 | | - ret = 0; |
---|
5188 | | - priority_reclaim_metadata_space(fs_info, space_info, &ticket); |
---|
5189 | | - spin_lock(&space_info->lock); |
---|
5190 | | - if (ticket.bytes) { |
---|
5191 | | - if (ticket.bytes < orig_bytes) { |
---|
5192 | | - u64 num_bytes = orig_bytes - ticket.bytes; |
---|
5193 | | - space_info->bytes_may_use -= num_bytes; |
---|
5194 | | - trace_btrfs_space_reservation(fs_info, "space_info", |
---|
5195 | | - space_info->flags, |
---|
5196 | | - num_bytes, 0); |
---|
5197 | | - |
---|
5198 | | - } |
---|
5199 | | - list_del_init(&ticket.list); |
---|
5200 | | - ret = -ENOSPC; |
---|
5201 | | - } |
---|
5202 | | - spin_unlock(&space_info->lock); |
---|
5203 | | - ASSERT(list_empty(&ticket.list)); |
---|
5204 | | - return ret; |
---|
5205 | | -} |
---|
5206 | | - |
---|
5207 | | -/** |
---|
5208 | | - * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space |
---|
5209 | | - * @root - the root we're allocating for |
---|
5210 | | - * @block_rsv - the block_rsv we're allocating for |
---|
5211 | | - * @orig_bytes - the number of bytes we want |
---|
5212 | | - * @flush - whether or not we can flush to make our reservation |
---|
5213 | | - * |
---|
5214 | | - * This will reserve orgi_bytes number of bytes from the space info associated |
---|
5215 | | - * with the block_rsv. If there is not enough space it will make an attempt to |
---|
5216 | | - * flush out space to make room. It will do this by flushing delalloc if |
---|
5217 | | - * possible or committing the transaction. If flush is 0 then no attempts to |
---|
5218 | | - * regain reservations will be made and this will fail if there is not enough |
---|
5219 | | - * space already. |
---|
5220 | | - */ |
---|
5221 | | -static int reserve_metadata_bytes(struct btrfs_root *root, |
---|
5222 | | - struct btrfs_block_rsv *block_rsv, |
---|
5223 | | - u64 orig_bytes, |
---|
5224 | | - enum btrfs_reserve_flush_enum flush) |
---|
5225 | | -{ |
---|
5226 | | - struct btrfs_fs_info *fs_info = root->fs_info; |
---|
5227 | | - struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; |
---|
5228 | | - int ret; |
---|
5229 | | - bool system_chunk = (root == fs_info->chunk_root); |
---|
5230 | | - |
---|
5231 | | - ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info, |
---|
5232 | | - orig_bytes, flush, system_chunk); |
---|
5233 | | - if (ret == -ENOSPC && |
---|
5234 | | - unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { |
---|
5235 | | - if (block_rsv != global_rsv && |
---|
5236 | | - !block_rsv_use_bytes(global_rsv, orig_bytes)) |
---|
5237 | | - ret = 0; |
---|
5238 | | - } |
---|
5239 | | - if (ret == -ENOSPC) { |
---|
5240 | | - trace_btrfs_space_reservation(fs_info, "space_info:enospc", |
---|
5241 | | - block_rsv->space_info->flags, |
---|
5242 | | - orig_bytes, 1); |
---|
5243 | | - |
---|
5244 | | - if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) |
---|
5245 | | - dump_space_info(fs_info, block_rsv->space_info, |
---|
5246 | | - orig_bytes, 0); |
---|
5247 | | - } |
---|
5248 | | - return ret; |
---|
5249 | | -} |
---|
5250 | | - |
---|
5251 | | -static struct btrfs_block_rsv *get_block_rsv( |
---|
5252 | | - const struct btrfs_trans_handle *trans, |
---|
5253 | | - const struct btrfs_root *root) |
---|
5254 | | -{ |
---|
5255 | | - struct btrfs_fs_info *fs_info = root->fs_info; |
---|
5256 | | - struct btrfs_block_rsv *block_rsv = NULL; |
---|
5257 | | - |
---|
5258 | | - if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || |
---|
5259 | | - (root == fs_info->csum_root && trans->adding_csums) || |
---|
5260 | | - (root == fs_info->uuid_root)) |
---|
5261 | | - block_rsv = trans->block_rsv; |
---|
5262 | | - |
---|
5263 | | - if (!block_rsv) |
---|
5264 | | - block_rsv = root->block_rsv; |
---|
5265 | | - |
---|
5266 | | - if (!block_rsv) |
---|
5267 | | - block_rsv = &fs_info->empty_block_rsv; |
---|
5268 | | - |
---|
5269 | | - return block_rsv; |
---|
5270 | | -} |
---|
5271 | | - |
---|
5272 | | -static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, |
---|
5273 | | - u64 num_bytes) |
---|
5274 | | -{ |
---|
5275 | | - int ret = -ENOSPC; |
---|
5276 | | - spin_lock(&block_rsv->lock); |
---|
5277 | | - if (block_rsv->reserved >= num_bytes) { |
---|
5278 | | - block_rsv->reserved -= num_bytes; |
---|
5279 | | - if (block_rsv->reserved < block_rsv->size) |
---|
5280 | | - block_rsv->full = 0; |
---|
5281 | | - ret = 0; |
---|
5282 | | - } |
---|
5283 | | - spin_unlock(&block_rsv->lock); |
---|
5284 | | - return ret; |
---|
5285 | | -} |
---|
5286 | | - |
---|
5287 | | -static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, |
---|
5288 | | - u64 num_bytes, int update_size) |
---|
5289 | | -{ |
---|
5290 | | - spin_lock(&block_rsv->lock); |
---|
5291 | | - block_rsv->reserved += num_bytes; |
---|
5292 | | - if (update_size) |
---|
5293 | | - block_rsv->size += num_bytes; |
---|
5294 | | - else if (block_rsv->reserved >= block_rsv->size) |
---|
5295 | | - block_rsv->full = 1; |
---|
5296 | | - spin_unlock(&block_rsv->lock); |
---|
5297 | | -} |
---|
5298 | | - |
---|
5299 | | -int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, |
---|
5300 | | - struct btrfs_block_rsv *dest, u64 num_bytes, |
---|
5301 | | - int min_factor) |
---|
5302 | | -{ |
---|
5303 | | - struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; |
---|
5304 | | - u64 min_bytes; |
---|
5305 | | - |
---|
5306 | | - if (global_rsv->space_info != dest->space_info) |
---|
5307 | | - return -ENOSPC; |
---|
5308 | | - |
---|
5309 | | - spin_lock(&global_rsv->lock); |
---|
5310 | | - min_bytes = div_factor(global_rsv->size, min_factor); |
---|
5311 | | - if (global_rsv->reserved < min_bytes + num_bytes) { |
---|
5312 | | - spin_unlock(&global_rsv->lock); |
---|
5313 | | - return -ENOSPC; |
---|
5314 | | - } |
---|
5315 | | - global_rsv->reserved -= num_bytes; |
---|
5316 | | - if (global_rsv->reserved < global_rsv->size) |
---|
5317 | | - global_rsv->full = 0; |
---|
5318 | | - spin_unlock(&global_rsv->lock); |
---|
5319 | | - |
---|
5320 | | - block_rsv_add_bytes(dest, num_bytes, 1); |
---|
5321 | | - return 0; |
---|
5322 | | -} |
---|
5323 | | - |
---|
5324 | | -/* |
---|
5325 | | - * This is for space we already have accounted in space_info->bytes_may_use, so |
---|
5326 | | - * basically when we're returning space from block_rsv's. |
---|
5327 | | - */ |
---|
5328 | | -static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info, |
---|
5329 | | - struct btrfs_space_info *space_info, |
---|
5330 | | - u64 num_bytes) |
---|
5331 | | -{ |
---|
5332 | | - struct reserve_ticket *ticket; |
---|
5333 | | - struct list_head *head; |
---|
5334 | | - u64 used; |
---|
5335 | | - enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH; |
---|
5336 | | - bool check_overcommit = false; |
---|
5337 | | - |
---|
5338 | | - spin_lock(&space_info->lock); |
---|
5339 | | - head = &space_info->priority_tickets; |
---|
5340 | | - |
---|
5341 | | - /* |
---|
5342 | | - * If we are over our limit then we need to check and see if we can |
---|
5343 | | - * overcommit, and if we can't then we just need to free up our space |
---|
5344 | | - * and not satisfy any requests. |
---|
5345 | | - */ |
---|
5346 | | - used = btrfs_space_info_used(space_info, true); |
---|
5347 | | - if (used - num_bytes >= space_info->total_bytes) |
---|
5348 | | - check_overcommit = true; |
---|
5349 | | -again: |
---|
5350 | | - while (!list_empty(head) && num_bytes) { |
---|
5351 | | - ticket = list_first_entry(head, struct reserve_ticket, |
---|
5352 | | - list); |
---|
5353 | | - /* |
---|
5354 | | - * We use 0 bytes because this space is already reserved, so |
---|
5355 | | - * adding the ticket space would be a double count. |
---|
5356 | | - */ |
---|
5357 | | - if (check_overcommit && |
---|
5358 | | - !can_overcommit(fs_info, space_info, 0, flush, false)) |
---|
5359 | | - break; |
---|
5360 | | - if (num_bytes >= ticket->bytes) { |
---|
5361 | | - list_del_init(&ticket->list); |
---|
5362 | | - num_bytes -= ticket->bytes; |
---|
5363 | | - ticket->bytes = 0; |
---|
5364 | | - space_info->tickets_id++; |
---|
5365 | | - wake_up(&ticket->wait); |
---|
5366 | | - } else { |
---|
5367 | | - ticket->bytes -= num_bytes; |
---|
5368 | | - num_bytes = 0; |
---|
5369 | | - } |
---|
5370 | | - } |
---|
5371 | | - |
---|
5372 | | - if (num_bytes && head == &space_info->priority_tickets) { |
---|
5373 | | - head = &space_info->tickets; |
---|
5374 | | - flush = BTRFS_RESERVE_FLUSH_ALL; |
---|
5375 | | - goto again; |
---|
5376 | | - } |
---|
5377 | | - space_info->bytes_may_use -= num_bytes; |
---|
5378 | | - trace_btrfs_space_reservation(fs_info, "space_info", |
---|
5379 | | - space_info->flags, num_bytes, 0); |
---|
5380 | | - spin_unlock(&space_info->lock); |
---|
5381 | | -} |
---|
5382 | | - |
---|
5383 | | -/* |
---|
5384 | | - * This is for newly allocated space that isn't accounted in |
---|
5385 | | - * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent |
---|
5386 | | - * we use this helper. |
---|
5387 | | - */ |
---|
5388 | | -static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, |
---|
5389 | | - struct btrfs_space_info *space_info, |
---|
5390 | | - u64 num_bytes) |
---|
5391 | | -{ |
---|
5392 | | - struct reserve_ticket *ticket; |
---|
5393 | | - struct list_head *head = &space_info->priority_tickets; |
---|
5394 | | - |
---|
5395 | | -again: |
---|
5396 | | - while (!list_empty(head) && num_bytes) { |
---|
5397 | | - ticket = list_first_entry(head, struct reserve_ticket, |
---|
5398 | | - list); |
---|
5399 | | - if (num_bytes >= ticket->bytes) { |
---|
5400 | | - trace_btrfs_space_reservation(fs_info, "space_info", |
---|
5401 | | - space_info->flags, |
---|
5402 | | - ticket->bytes, 1); |
---|
5403 | | - list_del_init(&ticket->list); |
---|
5404 | | - num_bytes -= ticket->bytes; |
---|
5405 | | - space_info->bytes_may_use += ticket->bytes; |
---|
5406 | | - ticket->bytes = 0; |
---|
5407 | | - space_info->tickets_id++; |
---|
5408 | | - wake_up(&ticket->wait); |
---|
5409 | | - } else { |
---|
5410 | | - trace_btrfs_space_reservation(fs_info, "space_info", |
---|
5411 | | - space_info->flags, |
---|
5412 | | - num_bytes, 1); |
---|
5413 | | - space_info->bytes_may_use += num_bytes; |
---|
5414 | | - ticket->bytes -= num_bytes; |
---|
5415 | | - num_bytes = 0; |
---|
5416 | | - } |
---|
5417 | | - } |
---|
5418 | | - |
---|
5419 | | - if (num_bytes && head == &space_info->priority_tickets) { |
---|
5420 | | - head = &space_info->tickets; |
---|
5421 | | - goto again; |
---|
5422 | | - } |
---|
5423 | | -} |
---|
5424 | | - |
---|
5425 | | -static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, |
---|
5426 | | - struct btrfs_block_rsv *block_rsv, |
---|
5427 | | - struct btrfs_block_rsv *dest, u64 num_bytes, |
---|
5428 | | - u64 *qgroup_to_release_ret) |
---|
5429 | | -{ |
---|
5430 | | - struct btrfs_space_info *space_info = block_rsv->space_info; |
---|
5431 | | - u64 qgroup_to_release = 0; |
---|
5432 | | - u64 ret; |
---|
5433 | | - |
---|
5434 | | - spin_lock(&block_rsv->lock); |
---|
5435 | | - if (num_bytes == (u64)-1) { |
---|
5436 | | - num_bytes = block_rsv->size; |
---|
5437 | | - qgroup_to_release = block_rsv->qgroup_rsv_size; |
---|
5438 | | - } |
---|
5439 | | - block_rsv->size -= num_bytes; |
---|
5440 | | - if (block_rsv->reserved >= block_rsv->size) { |
---|
5441 | | - num_bytes = block_rsv->reserved - block_rsv->size; |
---|
5442 | | - block_rsv->reserved = block_rsv->size; |
---|
5443 | | - block_rsv->full = 1; |
---|
5444 | | - } else { |
---|
5445 | | - num_bytes = 0; |
---|
5446 | | - } |
---|
5447 | | - if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) { |
---|
5448 | | - qgroup_to_release = block_rsv->qgroup_rsv_reserved - |
---|
5449 | | - block_rsv->qgroup_rsv_size; |
---|
5450 | | - block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size; |
---|
5451 | | - } else { |
---|
5452 | | - qgroup_to_release = 0; |
---|
5453 | | - } |
---|
5454 | | - spin_unlock(&block_rsv->lock); |
---|
5455 | | - |
---|
5456 | | - ret = num_bytes; |
---|
5457 | | - if (num_bytes > 0) { |
---|
5458 | | - if (dest) { |
---|
5459 | | - spin_lock(&dest->lock); |
---|
5460 | | - if (!dest->full) { |
---|
5461 | | - u64 bytes_to_add; |
---|
5462 | | - |
---|
5463 | | - bytes_to_add = dest->size - dest->reserved; |
---|
5464 | | - bytes_to_add = min(num_bytes, bytes_to_add); |
---|
5465 | | - dest->reserved += bytes_to_add; |
---|
5466 | | - if (dest->reserved >= dest->size) |
---|
5467 | | - dest->full = 1; |
---|
5468 | | - num_bytes -= bytes_to_add; |
---|
5469 | | - } |
---|
5470 | | - spin_unlock(&dest->lock); |
---|
5471 | | - } |
---|
5472 | | - if (num_bytes) |
---|
5473 | | - space_info_add_old_bytes(fs_info, space_info, |
---|
5474 | | - num_bytes); |
---|
5475 | | - } |
---|
5476 | | - if (qgroup_to_release_ret) |
---|
5477 | | - *qgroup_to_release_ret = qgroup_to_release; |
---|
5478 | | - return ret; |
---|
5479 | | -} |
---|
5480 | | - |
---|
5481 | | -int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src, |
---|
5482 | | - struct btrfs_block_rsv *dst, u64 num_bytes, |
---|
5483 | | - int update_size) |
---|
5484 | | -{ |
---|
5485 | | - int ret; |
---|
5486 | | - |
---|
5487 | | - ret = block_rsv_use_bytes(src, num_bytes); |
---|
5488 | | - if (ret) |
---|
5489 | | - return ret; |
---|
5490 | | - |
---|
5491 | | - block_rsv_add_bytes(dst, num_bytes, update_size); |
---|
5492 | | - return 0; |
---|
5493 | | -} |
---|
5494 | | - |
---|
5495 | | -void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type) |
---|
5496 | | -{ |
---|
5497 | | - memset(rsv, 0, sizeof(*rsv)); |
---|
5498 | | - spin_lock_init(&rsv->lock); |
---|
5499 | | - rsv->type = type; |
---|
5500 | | -} |
---|
5501 | | - |
---|
5502 | | -void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info, |
---|
5503 | | - struct btrfs_block_rsv *rsv, |
---|
5504 | | - unsigned short type) |
---|
5505 | | -{ |
---|
5506 | | - btrfs_init_block_rsv(rsv, type); |
---|
5507 | | - rsv->space_info = __find_space_info(fs_info, |
---|
5508 | | - BTRFS_BLOCK_GROUP_METADATA); |
---|
5509 | | -} |
---|
5510 | | - |
---|
5511 | | -struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info, |
---|
5512 | | - unsigned short type) |
---|
5513 | | -{ |
---|
5514 | | - struct btrfs_block_rsv *block_rsv; |
---|
5515 | | - |
---|
5516 | | - block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); |
---|
5517 | | - if (!block_rsv) |
---|
5518 | | - return NULL; |
---|
5519 | | - |
---|
5520 | | - btrfs_init_metadata_block_rsv(fs_info, block_rsv, type); |
---|
5521 | | - return block_rsv; |
---|
5522 | | -} |
---|
5523 | | - |
---|
5524 | | -void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info, |
---|
5525 | | - struct btrfs_block_rsv *rsv) |
---|
5526 | | -{ |
---|
5527 | | - if (!rsv) |
---|
5528 | | - return; |
---|
5529 | | - btrfs_block_rsv_release(fs_info, rsv, (u64)-1); |
---|
5530 | | - kfree(rsv); |
---|
5531 | | -} |
---|
5532 | | - |
---|
5533 | | -int btrfs_block_rsv_add(struct btrfs_root *root, |
---|
5534 | | - struct btrfs_block_rsv *block_rsv, u64 num_bytes, |
---|
5535 | | - enum btrfs_reserve_flush_enum flush) |
---|
5536 | | -{ |
---|
5537 | | - int ret; |
---|
5538 | | - |
---|
5539 | | - if (num_bytes == 0) |
---|
5540 | | - return 0; |
---|
5541 | | - |
---|
5542 | | - ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); |
---|
5543 | | - if (!ret) { |
---|
5544 | | - block_rsv_add_bytes(block_rsv, num_bytes, 1); |
---|
5545 | | - return 0; |
---|
5546 | | - } |
---|
5547 | | - |
---|
5548 | | - return ret; |
---|
5549 | | -} |
---|
5550 | | - |
---|
5551 | | -int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor) |
---|
5552 | | -{ |
---|
5553 | | - u64 num_bytes = 0; |
---|
5554 | | - int ret = -ENOSPC; |
---|
5555 | | - |
---|
5556 | | - if (!block_rsv) |
---|
5557 | | - return 0; |
---|
5558 | | - |
---|
5559 | | - spin_lock(&block_rsv->lock); |
---|
5560 | | - num_bytes = div_factor(block_rsv->size, min_factor); |
---|
5561 | | - if (block_rsv->reserved >= num_bytes) |
---|
5562 | | - ret = 0; |
---|
5563 | | - spin_unlock(&block_rsv->lock); |
---|
5564 | | - |
---|
5565 | | - return ret; |
---|
5566 | | -} |
---|
5567 | | - |
---|
5568 | | -int btrfs_block_rsv_refill(struct btrfs_root *root, |
---|
5569 | | - struct btrfs_block_rsv *block_rsv, u64 min_reserved, |
---|
5570 | | - enum btrfs_reserve_flush_enum flush) |
---|
5571 | | -{ |
---|
5572 | | - u64 num_bytes = 0; |
---|
5573 | | - int ret = -ENOSPC; |
---|
5574 | | - |
---|
5575 | | - if (!block_rsv) |
---|
5576 | | - return 0; |
---|
5577 | | - |
---|
5578 | | - spin_lock(&block_rsv->lock); |
---|
5579 | | - num_bytes = min_reserved; |
---|
5580 | | - if (block_rsv->reserved >= num_bytes) |
---|
5581 | | - ret = 0; |
---|
5582 | | - else |
---|
5583 | | - num_bytes -= block_rsv->reserved; |
---|
5584 | | - spin_unlock(&block_rsv->lock); |
---|
5585 | | - |
---|
5586 | | - if (!ret) |
---|
5587 | | - return 0; |
---|
5588 | | - |
---|
5589 | | - ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); |
---|
5590 | | - if (!ret) { |
---|
5591 | | - block_rsv_add_bytes(block_rsv, num_bytes, 0); |
---|
5592 | | - return 0; |
---|
5593 | | - } |
---|
5594 | | - |
---|
5595 | | - return ret; |
---|
5596 | | -} |
---|
5597 | | - |
---|
5598 | | -/** |
---|
5599 | | - * btrfs_inode_rsv_refill - refill the inode block rsv. |
---|
5600 | | - * @inode - the inode we are refilling. |
---|
5601 | | - * @flush - the flusing restriction. |
---|
5602 | | - * |
---|
5603 | | - * Essentially the same as btrfs_block_rsv_refill, except it uses the |
---|
5604 | | - * block_rsv->size as the minimum size. We'll either refill the missing amount |
---|
5605 | | - * or return if we already have enough space. This will also handle the resreve |
---|
5606 | | - * tracepoint for the reserved amount. |
---|
5607 | | - */ |
---|
5608 | | -static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, |
---|
5609 | | - enum btrfs_reserve_flush_enum flush) |
---|
5610 | | -{ |
---|
5611 | | - struct btrfs_root *root = inode->root; |
---|
5612 | | - struct btrfs_block_rsv *block_rsv = &inode->block_rsv; |
---|
5613 | | - u64 num_bytes = 0; |
---|
5614 | | - u64 qgroup_num_bytes = 0; |
---|
5615 | | - int ret = -ENOSPC; |
---|
5616 | | - |
---|
5617 | | - spin_lock(&block_rsv->lock); |
---|
5618 | | - if (block_rsv->reserved < block_rsv->size) |
---|
5619 | | - num_bytes = block_rsv->size - block_rsv->reserved; |
---|
5620 | | - if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size) |
---|
5621 | | - qgroup_num_bytes = block_rsv->qgroup_rsv_size - |
---|
5622 | | - block_rsv->qgroup_rsv_reserved; |
---|
5623 | | - spin_unlock(&block_rsv->lock); |
---|
5624 | | - |
---|
5625 | | - if (num_bytes == 0) |
---|
5626 | | - return 0; |
---|
5627 | | - |
---|
5628 | | - ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true); |
---|
5629 | | - if (ret) |
---|
5630 | | - return ret; |
---|
5631 | | - ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); |
---|
5632 | | - if (!ret) { |
---|
5633 | | - block_rsv_add_bytes(block_rsv, num_bytes, 0); |
---|
5634 | | - trace_btrfs_space_reservation(root->fs_info, "delalloc", |
---|
5635 | | - btrfs_ino(inode), num_bytes, 1); |
---|
5636 | | - |
---|
5637 | | - /* Don't forget to increase qgroup_rsv_reserved */ |
---|
5638 | | - spin_lock(&block_rsv->lock); |
---|
5639 | | - block_rsv->qgroup_rsv_reserved += qgroup_num_bytes; |
---|
5640 | | - spin_unlock(&block_rsv->lock); |
---|
5641 | | - } else |
---|
5642 | | - btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes); |
---|
5643 | | - return ret; |
---|
5644 | | -} |
---|
5645 | | - |
---|
5646 | | -/** |
---|
5647 | | - * btrfs_inode_rsv_release - release any excessive reservation. |
---|
5648 | | - * @inode - the inode we need to release from. |
---|
5649 | | - * @qgroup_free - free or convert qgroup meta. |
---|
5650 | | - * Unlike normal operation, qgroup meta reservation needs to know if we are |
---|
5651 | | - * freeing qgroup reservation or just converting it into per-trans. Normally |
---|
5652 | | - * @qgroup_free is true for error handling, and false for normal release. |
---|
5653 | | - * |
---|
5654 | | - * This is the same as btrfs_block_rsv_release, except that it handles the |
---|
5655 | | - * tracepoint for the reservation. |
---|
5656 | | - */ |
---|
5657 | | -static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free) |
---|
5658 | | -{ |
---|
5659 | | - struct btrfs_fs_info *fs_info = inode->root->fs_info; |
---|
5660 | | - struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; |
---|
5661 | | - struct btrfs_block_rsv *block_rsv = &inode->block_rsv; |
---|
5662 | | - u64 released = 0; |
---|
5663 | | - u64 qgroup_to_release = 0; |
---|
5664 | | - |
---|
5665 | | - /* |
---|
5666 | | - * Since we statically set the block_rsv->size we just want to say we |
---|
5667 | | - * are releasing 0 bytes, and then we'll just get the reservation over |
---|
5668 | | - * the size free'd. |
---|
5669 | | - */ |
---|
5670 | | - released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0, |
---|
5671 | | - &qgroup_to_release); |
---|
5672 | | - if (released > 0) |
---|
5673 | | - trace_btrfs_space_reservation(fs_info, "delalloc", |
---|
5674 | | - btrfs_ino(inode), released, 0); |
---|
5675 | | - if (qgroup_free) |
---|
5676 | | - btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release); |
---|
5677 | | - else |
---|
5678 | | - btrfs_qgroup_convert_reserved_meta(inode->root, |
---|
5679 | | - qgroup_to_release); |
---|
5680 | | -} |
---|
5681 | | - |
---|
5682 | | -void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, |
---|
5683 | | - struct btrfs_block_rsv *block_rsv, |
---|
5684 | | - u64 num_bytes) |
---|
5685 | | -{ |
---|
5686 | | - struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; |
---|
5687 | | - |
---|
5688 | | - if (global_rsv == block_rsv || |
---|
5689 | | - block_rsv->space_info != global_rsv->space_info) |
---|
5690 | | - global_rsv = NULL; |
---|
5691 | | - block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes, NULL); |
---|
5692 | | -} |
---|
5693 | | - |
---|
5694 | | -static void update_global_block_rsv(struct btrfs_fs_info *fs_info) |
---|
5695 | | -{ |
---|
5696 | | - struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; |
---|
5697 | | - struct btrfs_space_info *sinfo = block_rsv->space_info; |
---|
5698 | | - u64 num_bytes; |
---|
5699 | | - |
---|
5700 | | - /* |
---|
5701 | | - * The global block rsv is based on the size of the extent tree, the |
---|
5702 | | - * checksum tree and the root tree. If the fs is empty we want to set |
---|
5703 | | - * it to a minimal amount for safety. |
---|
5704 | | - */ |
---|
5705 | | - num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) + |
---|
5706 | | - btrfs_root_used(&fs_info->csum_root->root_item) + |
---|
5707 | | - btrfs_root_used(&fs_info->tree_root->root_item); |
---|
5708 | | - num_bytes = max_t(u64, num_bytes, SZ_16M); |
---|
5709 | | - |
---|
5710 | | - spin_lock(&sinfo->lock); |
---|
5711 | | - spin_lock(&block_rsv->lock); |
---|
5712 | | - |
---|
5713 | | - block_rsv->size = min_t(u64, num_bytes, SZ_512M); |
---|
5714 | | - |
---|
5715 | | - if (block_rsv->reserved < block_rsv->size) { |
---|
5716 | | - num_bytes = btrfs_space_info_used(sinfo, true); |
---|
5717 | | - if (sinfo->total_bytes > num_bytes) { |
---|
5718 | | - num_bytes = sinfo->total_bytes - num_bytes; |
---|
5719 | | - num_bytes = min(num_bytes, |
---|
5720 | | - block_rsv->size - block_rsv->reserved); |
---|
5721 | | - block_rsv->reserved += num_bytes; |
---|
5722 | | - sinfo->bytes_may_use += num_bytes; |
---|
5723 | | - trace_btrfs_space_reservation(fs_info, "space_info", |
---|
5724 | | - sinfo->flags, num_bytes, |
---|
5725 | | - 1); |
---|
5726 | | - } |
---|
5727 | | - } else if (block_rsv->reserved > block_rsv->size) { |
---|
5728 | | - num_bytes = block_rsv->reserved - block_rsv->size; |
---|
5729 | | - sinfo->bytes_may_use -= num_bytes; |
---|
5730 | | - trace_btrfs_space_reservation(fs_info, "space_info", |
---|
5731 | | - sinfo->flags, num_bytes, 0); |
---|
5732 | | - block_rsv->reserved = block_rsv->size; |
---|
5733 | | - } |
---|
5734 | | - |
---|
5735 | | - if (block_rsv->reserved == block_rsv->size) |
---|
5736 | | - block_rsv->full = 1; |
---|
5737 | | - else |
---|
5738 | | - block_rsv->full = 0; |
---|
5739 | | - |
---|
5740 | | - spin_unlock(&block_rsv->lock); |
---|
5741 | | - spin_unlock(&sinfo->lock); |
---|
5742 | | -} |
---|
5743 | | - |
---|
5744 | | -static void init_global_block_rsv(struct btrfs_fs_info *fs_info) |
---|
5745 | | -{ |
---|
5746 | | - struct btrfs_space_info *space_info; |
---|
5747 | | - |
---|
5748 | | - space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); |
---|
5749 | | - fs_info->chunk_block_rsv.space_info = space_info; |
---|
5750 | | - |
---|
5751 | | - space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); |
---|
5752 | | - fs_info->global_block_rsv.space_info = space_info; |
---|
5753 | | - fs_info->trans_block_rsv.space_info = space_info; |
---|
5754 | | - fs_info->empty_block_rsv.space_info = space_info; |
---|
5755 | | - fs_info->delayed_block_rsv.space_info = space_info; |
---|
5756 | | - |
---|
5757 | | - fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; |
---|
5758 | | - fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; |
---|
5759 | | - fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; |
---|
5760 | | - fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; |
---|
5761 | | - if (fs_info->quota_root) |
---|
5762 | | - fs_info->quota_root->block_rsv = &fs_info->global_block_rsv; |
---|
5763 | | - fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; |
---|
5764 | | - |
---|
5765 | | - update_global_block_rsv(fs_info); |
---|
5766 | | -} |
---|
5767 | | - |
---|
5768 | | -static void release_global_block_rsv(struct btrfs_fs_info *fs_info) |
---|
5769 | | -{ |
---|
5770 | | - block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL, |
---|
5771 | | - (u64)-1, NULL); |
---|
5772 | | - WARN_ON(fs_info->trans_block_rsv.size > 0); |
---|
5773 | | - WARN_ON(fs_info->trans_block_rsv.reserved > 0); |
---|
5774 | | - WARN_ON(fs_info->chunk_block_rsv.size > 0); |
---|
5775 | | - WARN_ON(fs_info->chunk_block_rsv.reserved > 0); |
---|
5776 | | - WARN_ON(fs_info->delayed_block_rsv.size > 0); |
---|
5777 | | - WARN_ON(fs_info->delayed_block_rsv.reserved > 0); |
---|
5778 | | -} |
---|
5779 | | - |
---|
5780 | | - |
---|
5781 | | -/* |
---|
5782 | | - * To be called after all the new block groups attached to the transaction |
---|
5783 | | - * handle have been created (btrfs_create_pending_block_groups()). |
---|
5784 | | - */ |
---|
5785 | | -void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) |
---|
5786 | | -{ |
---|
5787 | | - struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
5788 | | - |
---|
5789 | | - if (!trans->chunk_bytes_reserved) |
---|
5790 | | - return; |
---|
5791 | | - |
---|
5792 | | - WARN_ON_ONCE(!list_empty(&trans->new_bgs)); |
---|
5793 | | - |
---|
5794 | | - block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL, |
---|
5795 | | - trans->chunk_bytes_reserved, NULL); |
---|
5796 | | - trans->chunk_bytes_reserved = 0; |
---|
5797 | | -} |
---|
5798 | | - |
---|
5799 | | -/* |
---|
5800 | | - * btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation |
---|
5801 | | - * root: the root of the parent directory |
---|
5802 | | - * rsv: block reservation |
---|
5803 | | - * items: the number of items that we need do reservation |
---|
5804 | | - * use_global_rsv: allow fallback to the global block reservation |
---|
5805 | | - * |
---|
5806 | | - * This function is used to reserve the space for snapshot/subvolume |
---|
5807 | | - * creation and deletion. Those operations are different with the |
---|
5808 | | - * common file/directory operations, they change two fs/file trees |
---|
5809 | | - * and root tree, the number of items that the qgroup reserves is |
---|
5810 | | - * different with the free space reservation. So we can not use |
---|
5811 | | - * the space reservation mechanism in start_transaction(). |
---|
5812 | | - */ |
---|
5813 | | -int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, |
---|
5814 | | - struct btrfs_block_rsv *rsv, int items, |
---|
5815 | | - bool use_global_rsv) |
---|
5816 | | -{ |
---|
5817 | | - u64 qgroup_num_bytes = 0; |
---|
5818 | | - u64 num_bytes; |
---|
5819 | | - int ret; |
---|
5820 | | - struct btrfs_fs_info *fs_info = root->fs_info; |
---|
5821 | | - struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; |
---|
5822 | | - |
---|
5823 | | - if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { |
---|
5824 | | - /* One for parent inode, two for dir entries */ |
---|
5825 | | - qgroup_num_bytes = 3 * fs_info->nodesize; |
---|
5826 | | - ret = btrfs_qgroup_reserve_meta_prealloc(root, |
---|
5827 | | - qgroup_num_bytes, true); |
---|
5828 | | - if (ret) |
---|
5829 | | - return ret; |
---|
5830 | | - } |
---|
5831 | | - |
---|
5832 | | - num_bytes = btrfs_calc_trans_metadata_size(fs_info, items); |
---|
5833 | | - rsv->space_info = __find_space_info(fs_info, |
---|
5834 | | - BTRFS_BLOCK_GROUP_METADATA); |
---|
5835 | | - ret = btrfs_block_rsv_add(root, rsv, num_bytes, |
---|
5836 | | - BTRFS_RESERVE_FLUSH_ALL); |
---|
5837 | | - |
---|
5838 | | - if (ret == -ENOSPC && use_global_rsv) |
---|
5839 | | - ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1); |
---|
5840 | | - |
---|
5841 | | - if (ret && qgroup_num_bytes) |
---|
5842 | | - btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes); |
---|
5843 | | - |
---|
5844 | | - return ret; |
---|
5845 | | -} |
---|
5846 | | - |
---|
5847 | | -void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info, |
---|
5848 | | - struct btrfs_block_rsv *rsv) |
---|
5849 | | -{ |
---|
5850 | | - btrfs_block_rsv_release(fs_info, rsv, (u64)-1); |
---|
5851 | | -} |
---|
5852 | | - |
---|
5853 | | -static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, |
---|
5854 | | - struct btrfs_inode *inode) |
---|
5855 | | -{ |
---|
5856 | | - struct btrfs_block_rsv *block_rsv = &inode->block_rsv; |
---|
5857 | | - u64 reserve_size = 0; |
---|
5858 | | - u64 qgroup_rsv_size = 0; |
---|
5859 | | - u64 csum_leaves; |
---|
5860 | | - unsigned outstanding_extents; |
---|
5861 | | - |
---|
5862 | | - lockdep_assert_held(&inode->lock); |
---|
5863 | | - outstanding_extents = inode->outstanding_extents; |
---|
5864 | | - if (outstanding_extents) |
---|
5865 | | - reserve_size = btrfs_calc_trans_metadata_size(fs_info, |
---|
5866 | | - outstanding_extents + 1); |
---|
5867 | | - csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, |
---|
5868 | | - inode->csum_bytes); |
---|
5869 | | - reserve_size += btrfs_calc_trans_metadata_size(fs_info, |
---|
5870 | | - csum_leaves); |
---|
5871 | | - /* |
---|
5872 | | - * For qgroup rsv, the calculation is very simple: |
---|
5873 | | - * account one nodesize for each outstanding extent |
---|
5874 | | - * |
---|
5875 | | - * This is overestimating in most cases. |
---|
5876 | | - */ |
---|
5877 | | - qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize; |
---|
5878 | | - |
---|
5879 | | - spin_lock(&block_rsv->lock); |
---|
5880 | | - block_rsv->size = reserve_size; |
---|
5881 | | - block_rsv->qgroup_rsv_size = qgroup_rsv_size; |
---|
5882 | | - spin_unlock(&block_rsv->lock); |
---|
5883 | | -} |
---|
5884 | | - |
---|
5885 | | -int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) |
---|
5886 | | -{ |
---|
5887 | | - struct btrfs_fs_info *fs_info = inode->root->fs_info; |
---|
5888 | | - unsigned nr_extents; |
---|
5889 | | - enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; |
---|
5890 | | - int ret = 0; |
---|
5891 | | - bool delalloc_lock = true; |
---|
5892 | | - |
---|
5893 | | - /* If we are a free space inode we need to not flush since we will be in |
---|
5894 | | - * the middle of a transaction commit. We also don't need the delalloc |
---|
5895 | | - * mutex since we won't race with anybody. We need this mostly to make |
---|
5896 | | - * lockdep shut its filthy mouth. |
---|
5897 | | - * |
---|
5898 | | - * If we have a transaction open (can happen if we call truncate_block |
---|
5899 | | - * from truncate), then we need FLUSH_LIMIT so we don't deadlock. |
---|
5900 | | - */ |
---|
5901 | | - if (btrfs_is_free_space_inode(inode)) { |
---|
5902 | | - flush = BTRFS_RESERVE_NO_FLUSH; |
---|
5903 | | - delalloc_lock = false; |
---|
5904 | | - } else { |
---|
5905 | | - if (current->journal_info) |
---|
5906 | | - flush = BTRFS_RESERVE_FLUSH_LIMIT; |
---|
5907 | | - |
---|
5908 | | - if (btrfs_transaction_in_commit(fs_info)) |
---|
5909 | | - schedule_timeout(1); |
---|
5910 | | - } |
---|
5911 | | - |
---|
5912 | | - if (delalloc_lock) |
---|
5913 | | - mutex_lock(&inode->delalloc_mutex); |
---|
5914 | | - |
---|
5915 | | - num_bytes = ALIGN(num_bytes, fs_info->sectorsize); |
---|
5916 | | - |
---|
5917 | | - /* Add our new extents and calculate the new rsv size. */ |
---|
5918 | | - spin_lock(&inode->lock); |
---|
5919 | | - nr_extents = count_max_extents(num_bytes); |
---|
5920 | | - btrfs_mod_outstanding_extents(inode, nr_extents); |
---|
5921 | | - inode->csum_bytes += num_bytes; |
---|
5922 | | - btrfs_calculate_inode_block_rsv_size(fs_info, inode); |
---|
5923 | | - spin_unlock(&inode->lock); |
---|
5924 | | - |
---|
5925 | | - ret = btrfs_inode_rsv_refill(inode, flush); |
---|
5926 | | - if (unlikely(ret)) |
---|
5927 | | - goto out_fail; |
---|
5928 | | - |
---|
5929 | | - if (delalloc_lock) |
---|
5930 | | - mutex_unlock(&inode->delalloc_mutex); |
---|
5931 | | - return 0; |
---|
5932 | | - |
---|
5933 | | -out_fail: |
---|
5934 | | - spin_lock(&inode->lock); |
---|
5935 | | - nr_extents = count_max_extents(num_bytes); |
---|
5936 | | - btrfs_mod_outstanding_extents(inode, -nr_extents); |
---|
5937 | | - inode->csum_bytes -= num_bytes; |
---|
5938 | | - btrfs_calculate_inode_block_rsv_size(fs_info, inode); |
---|
5939 | | - spin_unlock(&inode->lock); |
---|
5940 | | - |
---|
5941 | | - btrfs_inode_rsv_release(inode, true); |
---|
5942 | | - if (delalloc_lock) |
---|
5943 | | - mutex_unlock(&inode->delalloc_mutex); |
---|
5944 | | - return ret; |
---|
5945 | | -} |
---|
5946 | | - |
---|
5947 | | -/** |
---|
5948 | | - * btrfs_delalloc_release_metadata - release a metadata reservation for an inode |
---|
5949 | | - * @inode: the inode to release the reservation for. |
---|
5950 | | - * @num_bytes: the number of bytes we are releasing. |
---|
5951 | | - * @qgroup_free: free qgroup reservation or convert it to per-trans reservation |
---|
5952 | | - * |
---|
5953 | | - * This will release the metadata reservation for an inode. This can be called |
---|
5954 | | - * once we complete IO for a given set of bytes to release their metadata |
---|
5955 | | - * reservations, or on error for the same reason. |
---|
5956 | | - */ |
---|
5957 | | -void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, |
---|
5958 | | - bool qgroup_free) |
---|
5959 | | -{ |
---|
5960 | | - struct btrfs_fs_info *fs_info = inode->root->fs_info; |
---|
5961 | | - |
---|
5962 | | - num_bytes = ALIGN(num_bytes, fs_info->sectorsize); |
---|
5963 | | - spin_lock(&inode->lock); |
---|
5964 | | - inode->csum_bytes -= num_bytes; |
---|
5965 | | - btrfs_calculate_inode_block_rsv_size(fs_info, inode); |
---|
5966 | | - spin_unlock(&inode->lock); |
---|
5967 | | - |
---|
5968 | | - if (btrfs_is_testing(fs_info)) |
---|
5969 | | - return; |
---|
5970 | | - |
---|
5971 | | - btrfs_inode_rsv_release(inode, qgroup_free); |
---|
5972 | | -} |
---|
5973 | | - |
---|
5974 | | -/** |
---|
5975 | | - * btrfs_delalloc_release_extents - release our outstanding_extents |
---|
5976 | | - * @inode: the inode to balance the reservation for. |
---|
5977 | | - * @num_bytes: the number of bytes we originally reserved with |
---|
5978 | | - * @qgroup_free: do we need to free qgroup meta reservation or convert them. |
---|
5979 | | - * |
---|
5980 | | - * When we reserve space we increase outstanding_extents for the extents we may |
---|
5981 | | - * add. Once we've set the range as delalloc or created our ordered extents we |
---|
5982 | | - * have outstanding_extents to track the real usage, so we use this to free our |
---|
5983 | | - * temporarily tracked outstanding_extents. This _must_ be used in conjunction |
---|
5984 | | - * with btrfs_delalloc_reserve_metadata. |
---|
5985 | | - */ |
---|
5986 | | -void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes) |
---|
5987 | | -{ |
---|
5988 | | - struct btrfs_fs_info *fs_info = inode->root->fs_info; |
---|
5989 | | - unsigned num_extents; |
---|
5990 | | - |
---|
5991 | | - spin_lock(&inode->lock); |
---|
5992 | | - num_extents = count_max_extents(num_bytes); |
---|
5993 | | - btrfs_mod_outstanding_extents(inode, -num_extents); |
---|
5994 | | - btrfs_calculate_inode_block_rsv_size(fs_info, inode); |
---|
5995 | | - spin_unlock(&inode->lock); |
---|
5996 | | - |
---|
5997 | | - if (btrfs_is_testing(fs_info)) |
---|
5998 | | - return; |
---|
5999 | | - |
---|
6000 | | - btrfs_inode_rsv_release(inode, true); |
---|
6001 | | -} |
---|
6002 | | - |
---|
6003 | | -/** |
---|
6004 | | - * btrfs_delalloc_reserve_space - reserve data and metadata space for |
---|
6005 | | - * delalloc |
---|
6006 | | - * @inode: inode we're writing to |
---|
6007 | | - * @start: start range we are writing to |
---|
6008 | | - * @len: how long the range we are writing to |
---|
6009 | | - * @reserved: mandatory parameter, record actually reserved qgroup ranges of |
---|
6010 | | - * current reservation. |
---|
6011 | | - * |
---|
6012 | | - * This will do the following things |
---|
6013 | | - * |
---|
6014 | | - * o reserve space in data space info for num bytes |
---|
6015 | | - * and reserve precious corresponding qgroup space |
---|
6016 | | - * (Done in check_data_free_space) |
---|
6017 | | - * |
---|
6018 | | - * o reserve space for metadata space, based on the number of outstanding |
---|
6019 | | - * extents and how much csums will be needed |
---|
6020 | | - * also reserve metadata space in a per root over-reserve method. |
---|
6021 | | - * o add to the inodes->delalloc_bytes |
---|
6022 | | - * o add it to the fs_info's delalloc inodes list. |
---|
6023 | | - * (Above 3 all done in delalloc_reserve_metadata) |
---|
6024 | | - * |
---|
6025 | | - * Return 0 for success |
---|
6026 | | - * Return <0 for error(-ENOSPC or -EQUOT) |
---|
6027 | | - */ |
---|
6028 | | -int btrfs_delalloc_reserve_space(struct inode *inode, |
---|
6029 | | - struct extent_changeset **reserved, u64 start, u64 len) |
---|
6030 | | -{ |
---|
6031 | | - int ret; |
---|
6032 | | - |
---|
6033 | | - ret = btrfs_check_data_free_space(inode, reserved, start, len); |
---|
6034 | | - if (ret < 0) |
---|
6035 | | - return ret; |
---|
6036 | | - ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len); |
---|
6037 | | - if (ret < 0) |
---|
6038 | | - btrfs_free_reserved_data_space(inode, *reserved, start, len); |
---|
6039 | | - return ret; |
---|
6040 | | -} |
---|
6041 | | - |
---|
6042 | | -/** |
---|
6043 | | - * btrfs_delalloc_release_space - release data and metadata space for delalloc |
---|
6044 | | - * @inode: inode we're releasing space for |
---|
6045 | | - * @start: start position of the space already reserved |
---|
6046 | | - * @len: the len of the space already reserved |
---|
6047 | | - * @release_bytes: the len of the space we consumed or didn't use |
---|
6048 | | - * |
---|
6049 | | - * This function will release the metadata space that was not used and will |
---|
6050 | | - * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes |
---|
6051 | | - * list if there are no delalloc bytes left. |
---|
6052 | | - * Also it will handle the qgroup reserved space. |
---|
6053 | | - */ |
---|
6054 | | -void btrfs_delalloc_release_space(struct inode *inode, |
---|
6055 | | - struct extent_changeset *reserved, |
---|
6056 | | - u64 start, u64 len, bool qgroup_free) |
---|
6057 | | -{ |
---|
6058 | | - btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free); |
---|
6059 | | - btrfs_free_reserved_data_space(inode, reserved, start, len); |
---|
6060 | | -} |
---|
6061 | | - |
---|
6062 | | -static int update_block_group(struct btrfs_trans_handle *trans, |
---|
6063 | | - struct btrfs_fs_info *info, u64 bytenr, |
---|
6064 | | - u64 num_bytes, int alloc) |
---|
6065 | | -{ |
---|
6066 | | - struct btrfs_block_group_cache *cache = NULL; |
---|
6067 | | - u64 total = num_bytes; |
---|
6068 | | - u64 old_val; |
---|
6069 | | - u64 byte_in_group; |
---|
6070 | | - int factor; |
---|
6071 | | - |
---|
6072 | | - /* block accounting for super block */ |
---|
6073 | | - spin_lock(&info->delalloc_root_lock); |
---|
6074 | | - old_val = btrfs_super_bytes_used(info->super_copy); |
---|
6075 | | - if (alloc) |
---|
6076 | | - old_val += num_bytes; |
---|
6077 | | - else |
---|
6078 | | - old_val -= num_bytes; |
---|
6079 | | - btrfs_set_super_bytes_used(info->super_copy, old_val); |
---|
6080 | | - spin_unlock(&info->delalloc_root_lock); |
---|
6081 | | - |
---|
6082 | | - while (total) { |
---|
6083 | | - cache = btrfs_lookup_block_group(info, bytenr); |
---|
6084 | | - if (!cache) |
---|
6085 | | - return -ENOENT; |
---|
6086 | | - factor = btrfs_bg_type_to_factor(cache->flags); |
---|
6087 | | - |
---|
6088 | | - /* |
---|
6089 | | - * If this block group has free space cache written out, we |
---|
6090 | | - * need to make sure to load it if we are removing space. This |
---|
6091 | | - * is because we need the unpinning stage to actually add the |
---|
6092 | | - * space back to the block group, otherwise we will leak space. |
---|
6093 | | - */ |
---|
6094 | | - if (!alloc && cache->cached == BTRFS_CACHE_NO) |
---|
6095 | | - cache_block_group(cache, 1); |
---|
6096 | | - |
---|
6097 | | - byte_in_group = bytenr - cache->key.objectid; |
---|
6098 | | - WARN_ON(byte_in_group > cache->key.offset); |
---|
6099 | | - |
---|
6100 | | - spin_lock(&cache->space_info->lock); |
---|
6101 | | - spin_lock(&cache->lock); |
---|
6102 | | - |
---|
6103 | | - if (btrfs_test_opt(info, SPACE_CACHE) && |
---|
6104 | | - cache->disk_cache_state < BTRFS_DC_CLEAR) |
---|
6105 | | - cache->disk_cache_state = BTRFS_DC_CLEAR; |
---|
6106 | | - |
---|
6107 | | - old_val = btrfs_block_group_used(&cache->item); |
---|
6108 | | - num_bytes = min(total, cache->key.offset - byte_in_group); |
---|
6109 | | - if (alloc) { |
---|
6110 | | - old_val += num_bytes; |
---|
6111 | | - btrfs_set_block_group_used(&cache->item, old_val); |
---|
6112 | | - cache->reserved -= num_bytes; |
---|
6113 | | - cache->space_info->bytes_reserved -= num_bytes; |
---|
6114 | | - cache->space_info->bytes_used += num_bytes; |
---|
6115 | | - cache->space_info->disk_used += num_bytes * factor; |
---|
6116 | | - spin_unlock(&cache->lock); |
---|
6117 | | - spin_unlock(&cache->space_info->lock); |
---|
6118 | | - } else { |
---|
6119 | | - old_val -= num_bytes; |
---|
6120 | | - btrfs_set_block_group_used(&cache->item, old_val); |
---|
6121 | | - cache->pinned += num_bytes; |
---|
6122 | | - cache->space_info->bytes_pinned += num_bytes; |
---|
6123 | | - cache->space_info->bytes_used -= num_bytes; |
---|
6124 | | - cache->space_info->disk_used -= num_bytes * factor; |
---|
6125 | | - spin_unlock(&cache->lock); |
---|
6126 | | - spin_unlock(&cache->space_info->lock); |
---|
6127 | | - |
---|
6128 | | - trace_btrfs_space_reservation(info, "pinned", |
---|
6129 | | - cache->space_info->flags, |
---|
6130 | | - num_bytes, 1); |
---|
6131 | | - percpu_counter_add_batch(&cache->space_info->total_bytes_pinned, |
---|
6132 | | - num_bytes, |
---|
6133 | | - BTRFS_TOTAL_BYTES_PINNED_BATCH); |
---|
6134 | | - set_extent_dirty(info->pinned_extents, |
---|
6135 | | - bytenr, bytenr + num_bytes - 1, |
---|
6136 | | - GFP_NOFS | __GFP_NOFAIL); |
---|
6137 | | - } |
---|
6138 | | - |
---|
6139 | | - spin_lock(&trans->transaction->dirty_bgs_lock); |
---|
6140 | | - if (list_empty(&cache->dirty_list)) { |
---|
6141 | | - list_add_tail(&cache->dirty_list, |
---|
6142 | | - &trans->transaction->dirty_bgs); |
---|
6143 | | - trans->transaction->num_dirty_bgs++; |
---|
6144 | | - btrfs_get_block_group(cache); |
---|
6145 | | - } |
---|
6146 | | - spin_unlock(&trans->transaction->dirty_bgs_lock); |
---|
6147 | | - |
---|
6148 | | - /* |
---|
6149 | | - * No longer have used bytes in this block group, queue it for |
---|
6150 | | - * deletion. We do this after adding the block group to the |
---|
6151 | | - * dirty list to avoid races between cleaner kthread and space |
---|
6152 | | - * cache writeout. |
---|
6153 | | - */ |
---|
6154 | | - if (!alloc && old_val == 0) |
---|
6155 | | - btrfs_mark_bg_unused(cache); |
---|
6156 | | - |
---|
6157 | | - btrfs_put_block_group(cache); |
---|
6158 | | - total -= num_bytes; |
---|
6159 | | - bytenr += num_bytes; |
---|
6160 | | - } |
---|
6161 | | - return 0; |
---|
6162 | 2518 | } |
---|
6163 | 2519 | |
---|
6164 | 2520 | static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start) |
---|
6165 | 2521 | { |
---|
6166 | | - struct btrfs_block_group_cache *cache; |
---|
| 2522 | + struct btrfs_block_group *cache; |
---|
6167 | 2523 | u64 bytenr; |
---|
6168 | 2524 | |
---|
6169 | 2525 | spin_lock(&fs_info->block_group_cache_lock); |
---|
.. | .. |
---|
6177 | 2533 | if (!cache) |
---|
6178 | 2534 | return 0; |
---|
6179 | 2535 | |
---|
6180 | | - bytenr = cache->key.objectid; |
---|
| 2536 | + bytenr = cache->start; |
---|
6181 | 2537 | btrfs_put_block_group(cache); |
---|
6182 | 2538 | |
---|
6183 | 2539 | return bytenr; |
---|
6184 | 2540 | } |
---|
6185 | 2541 | |
---|
6186 | | -static int pin_down_extent(struct btrfs_fs_info *fs_info, |
---|
6187 | | - struct btrfs_block_group_cache *cache, |
---|
| 2542 | +static int pin_down_extent(struct btrfs_trans_handle *trans, |
---|
| 2543 | + struct btrfs_block_group *cache, |
---|
6188 | 2544 | u64 bytenr, u64 num_bytes, int reserved) |
---|
6189 | 2545 | { |
---|
| 2546 | + struct btrfs_fs_info *fs_info = cache->fs_info; |
---|
| 2547 | + |
---|
6190 | 2548 | spin_lock(&cache->space_info->lock); |
---|
6191 | 2549 | spin_lock(&cache->lock); |
---|
6192 | 2550 | cache->pinned += num_bytes; |
---|
6193 | | - cache->space_info->bytes_pinned += num_bytes; |
---|
| 2551 | + btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info, |
---|
| 2552 | + num_bytes); |
---|
6194 | 2553 | if (reserved) { |
---|
6195 | 2554 | cache->reserved -= num_bytes; |
---|
6196 | 2555 | cache->space_info->bytes_reserved -= num_bytes; |
---|
.. | .. |
---|
6198 | 2557 | spin_unlock(&cache->lock); |
---|
6199 | 2558 | spin_unlock(&cache->space_info->lock); |
---|
6200 | 2559 | |
---|
6201 | | - trace_btrfs_space_reservation(fs_info, "pinned", |
---|
6202 | | - cache->space_info->flags, num_bytes, 1); |
---|
6203 | | - percpu_counter_add_batch(&cache->space_info->total_bytes_pinned, |
---|
6204 | | - num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH); |
---|
6205 | | - set_extent_dirty(fs_info->pinned_extents, bytenr, |
---|
| 2560 | + __btrfs_mod_total_bytes_pinned(cache->space_info, num_bytes); |
---|
| 2561 | + set_extent_dirty(&trans->transaction->pinned_extents, bytenr, |
---|
6206 | 2562 | bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); |
---|
6207 | 2563 | return 0; |
---|
6208 | 2564 | } |
---|
6209 | 2565 | |
---|
6210 | | -/* |
---|
6211 | | - * this function must be called within transaction |
---|
6212 | | - */ |
---|
6213 | | -int btrfs_pin_extent(struct btrfs_fs_info *fs_info, |
---|
| 2566 | +int btrfs_pin_extent(struct btrfs_trans_handle *trans, |
---|
6214 | 2567 | u64 bytenr, u64 num_bytes, int reserved) |
---|
6215 | 2568 | { |
---|
6216 | | - struct btrfs_block_group_cache *cache; |
---|
| 2569 | + struct btrfs_block_group *cache; |
---|
6217 | 2570 | |
---|
6218 | | - cache = btrfs_lookup_block_group(fs_info, bytenr); |
---|
| 2571 | + cache = btrfs_lookup_block_group(trans->fs_info, bytenr); |
---|
6219 | 2572 | BUG_ON(!cache); /* Logic error */ |
---|
6220 | 2573 | |
---|
6221 | | - pin_down_extent(fs_info, cache, bytenr, num_bytes, reserved); |
---|
| 2574 | + pin_down_extent(trans, cache, bytenr, num_bytes, reserved); |
---|
6222 | 2575 | |
---|
6223 | 2576 | btrfs_put_block_group(cache); |
---|
6224 | 2577 | return 0; |
---|
.. | .. |
---|
6227 | 2580 | /* |
---|
6228 | 2581 | * this function must be called within transaction |
---|
6229 | 2582 | */ |
---|
6230 | | -int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info, |
---|
| 2583 | +int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, |
---|
6231 | 2584 | u64 bytenr, u64 num_bytes) |
---|
6232 | 2585 | { |
---|
6233 | | - struct btrfs_block_group_cache *cache; |
---|
| 2586 | + struct btrfs_block_group *cache; |
---|
6234 | 2587 | int ret; |
---|
6235 | 2588 | |
---|
6236 | | - cache = btrfs_lookup_block_group(fs_info, bytenr); |
---|
| 2589 | + btrfs_add_excluded_extent(trans->fs_info, bytenr, num_bytes); |
---|
| 2590 | + |
---|
| 2591 | + cache = btrfs_lookup_block_group(trans->fs_info, bytenr); |
---|
6237 | 2592 | if (!cache) |
---|
6238 | 2593 | return -EINVAL; |
---|
6239 | 2594 | |
---|
.. | .. |
---|
6243 | 2598 | * to one because the slow code to read in the free extents does check |
---|
6244 | 2599 | * the pinned extents. |
---|
6245 | 2600 | */ |
---|
6246 | | - cache_block_group(cache, 1); |
---|
| 2601 | + btrfs_cache_block_group(cache, 1); |
---|
6247 | 2602 | |
---|
6248 | | - pin_down_extent(fs_info, cache, bytenr, num_bytes, 0); |
---|
| 2603 | + pin_down_extent(trans, cache, bytenr, num_bytes, 0); |
---|
6249 | 2604 | |
---|
6250 | 2605 | /* remove us from the free space cache (if we're there at all) */ |
---|
6251 | 2606 | ret = btrfs_remove_free_space(cache, bytenr, num_bytes); |
---|
.. | .. |
---|
6257 | 2612 | u64 start, u64 num_bytes) |
---|
6258 | 2613 | { |
---|
6259 | 2614 | int ret; |
---|
6260 | | - struct btrfs_block_group_cache *block_group; |
---|
| 2615 | + struct btrfs_block_group *block_group; |
---|
6261 | 2616 | struct btrfs_caching_control *caching_ctl; |
---|
6262 | 2617 | |
---|
6263 | 2618 | block_group = btrfs_lookup_block_group(fs_info, start); |
---|
6264 | 2619 | if (!block_group) |
---|
6265 | 2620 | return -EINVAL; |
---|
6266 | 2621 | |
---|
6267 | | - cache_block_group(block_group, 0); |
---|
6268 | | - caching_ctl = get_caching_control(block_group); |
---|
| 2622 | + btrfs_cache_block_group(block_group, 0); |
---|
| 2623 | + caching_ctl = btrfs_get_caching_control(block_group); |
---|
6269 | 2624 | |
---|
6270 | 2625 | if (!caching_ctl) { |
---|
6271 | 2626 | /* Logic error */ |
---|
6272 | | - BUG_ON(!block_group_cache_done(block_group)); |
---|
| 2627 | + BUG_ON(!btrfs_block_group_done(block_group)); |
---|
6273 | 2628 | ret = btrfs_remove_free_space(block_group, start, num_bytes); |
---|
6274 | 2629 | } else { |
---|
6275 | 2630 | mutex_lock(&caching_ctl->mutex); |
---|
6276 | 2631 | |
---|
6277 | 2632 | if (start >= caching_ctl->progress) { |
---|
6278 | | - ret = add_excluded_extent(fs_info, start, num_bytes); |
---|
| 2633 | + ret = btrfs_add_excluded_extent(fs_info, start, |
---|
| 2634 | + num_bytes); |
---|
6279 | 2635 | } else if (start + num_bytes <= caching_ctl->progress) { |
---|
6280 | 2636 | ret = btrfs_remove_free_space(block_group, |
---|
6281 | 2637 | start, num_bytes); |
---|
.. | .. |
---|
6289 | 2645 | num_bytes = (start + num_bytes) - |
---|
6290 | 2646 | caching_ctl->progress; |
---|
6291 | 2647 | start = caching_ctl->progress; |
---|
6292 | | - ret = add_excluded_extent(fs_info, start, num_bytes); |
---|
| 2648 | + ret = btrfs_add_excluded_extent(fs_info, start, |
---|
| 2649 | + num_bytes); |
---|
6293 | 2650 | } |
---|
6294 | 2651 | out_lock: |
---|
6295 | 2652 | mutex_unlock(&caching_ctl->mutex); |
---|
6296 | | - put_caching_control(caching_ctl); |
---|
| 2653 | + btrfs_put_caching_control(caching_ctl); |
---|
6297 | 2654 | } |
---|
6298 | 2655 | btrfs_put_block_group(block_group); |
---|
6299 | 2656 | return ret; |
---|
6300 | 2657 | } |
---|
6301 | 2658 | |
---|
6302 | | -int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info, |
---|
6303 | | - struct extent_buffer *eb) |
---|
| 2659 | +int btrfs_exclude_logged_extents(struct extent_buffer *eb) |
---|
6304 | 2660 | { |
---|
| 2661 | + struct btrfs_fs_info *fs_info = eb->fs_info; |
---|
6305 | 2662 | struct btrfs_file_extent_item *item; |
---|
6306 | 2663 | struct btrfs_key key; |
---|
6307 | 2664 | int found_type; |
---|
.. | .. |
---|
6332 | 2689 | } |
---|
6333 | 2690 | |
---|
6334 | 2691 | static void |
---|
6335 | | -btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg) |
---|
| 2692 | +btrfs_inc_block_group_reservations(struct btrfs_block_group *bg) |
---|
6336 | 2693 | { |
---|
6337 | 2694 | atomic_inc(&bg->reservations); |
---|
6338 | | -} |
---|
6339 | | - |
---|
6340 | | -void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info, |
---|
6341 | | - const u64 start) |
---|
6342 | | -{ |
---|
6343 | | - struct btrfs_block_group_cache *bg; |
---|
6344 | | - |
---|
6345 | | - bg = btrfs_lookup_block_group(fs_info, start); |
---|
6346 | | - ASSERT(bg); |
---|
6347 | | - if (atomic_dec_and_test(&bg->reservations)) |
---|
6348 | | - wake_up_var(&bg->reservations); |
---|
6349 | | - btrfs_put_block_group(bg); |
---|
6350 | | -} |
---|
6351 | | - |
---|
6352 | | -void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) |
---|
6353 | | -{ |
---|
6354 | | - struct btrfs_space_info *space_info = bg->space_info; |
---|
6355 | | - |
---|
6356 | | - ASSERT(bg->ro); |
---|
6357 | | - |
---|
6358 | | - if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA)) |
---|
6359 | | - return; |
---|
6360 | | - |
---|
6361 | | - /* |
---|
6362 | | - * Our block group is read only but before we set it to read only, |
---|
6363 | | - * some task might have had allocated an extent from it already, but it |
---|
6364 | | - * has not yet created a respective ordered extent (and added it to a |
---|
6365 | | - * root's list of ordered extents). |
---|
6366 | | - * Therefore wait for any task currently allocating extents, since the |
---|
6367 | | - * block group's reservations counter is incremented while a read lock |
---|
6368 | | - * on the groups' semaphore is held and decremented after releasing |
---|
6369 | | - * the read access on that semaphore and creating the ordered extent. |
---|
6370 | | - */ |
---|
6371 | | - down_write(&space_info->groups_sem); |
---|
6372 | | - up_write(&space_info->groups_sem); |
---|
6373 | | - |
---|
6374 | | - wait_var_event(&bg->reservations, !atomic_read(&bg->reservations)); |
---|
6375 | | -} |
---|
6376 | | - |
---|
6377 | | -/** |
---|
6378 | | - * btrfs_add_reserved_bytes - update the block_group and space info counters |
---|
6379 | | - * @cache: The cache we are manipulating |
---|
6380 | | - * @ram_bytes: The number of bytes of file content, and will be same to |
---|
6381 | | - * @num_bytes except for the compress path. |
---|
6382 | | - * @num_bytes: The number of bytes in question |
---|
6383 | | - * @delalloc: The blocks are allocated for the delalloc write |
---|
6384 | | - * |
---|
6385 | | - * This is called by the allocator when it reserves space. If this is a |
---|
6386 | | - * reservation and the block group has become read only we cannot make the |
---|
6387 | | - * reservation and return -EAGAIN, otherwise this function always succeeds. |
---|
6388 | | - */ |
---|
6389 | | -static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, |
---|
6390 | | - u64 ram_bytes, u64 num_bytes, int delalloc) |
---|
6391 | | -{ |
---|
6392 | | - struct btrfs_space_info *space_info = cache->space_info; |
---|
6393 | | - int ret = 0; |
---|
6394 | | - |
---|
6395 | | - spin_lock(&space_info->lock); |
---|
6396 | | - spin_lock(&cache->lock); |
---|
6397 | | - if (cache->ro) { |
---|
6398 | | - ret = -EAGAIN; |
---|
6399 | | - } else { |
---|
6400 | | - cache->reserved += num_bytes; |
---|
6401 | | - space_info->bytes_reserved += num_bytes; |
---|
6402 | | - |
---|
6403 | | - trace_btrfs_space_reservation(cache->fs_info, |
---|
6404 | | - "space_info", space_info->flags, |
---|
6405 | | - ram_bytes, 0); |
---|
6406 | | - space_info->bytes_may_use -= ram_bytes; |
---|
6407 | | - if (delalloc) |
---|
6408 | | - cache->delalloc_bytes += num_bytes; |
---|
6409 | | - } |
---|
6410 | | - spin_unlock(&cache->lock); |
---|
6411 | | - spin_unlock(&space_info->lock); |
---|
6412 | | - return ret; |
---|
6413 | | -} |
---|
6414 | | - |
---|
6415 | | -/** |
---|
6416 | | - * btrfs_free_reserved_bytes - update the block_group and space info counters |
---|
6417 | | - * @cache: The cache we are manipulating |
---|
6418 | | - * @num_bytes: The number of bytes in question |
---|
6419 | | - * @delalloc: The blocks are allocated for the delalloc write |
---|
6420 | | - * |
---|
6421 | | - * This is called by somebody who is freeing space that was never actually used |
---|
6422 | | - * on disk. For example if you reserve some space for a new leaf in transaction |
---|
6423 | | - * A and before transaction A commits you free that leaf, you call this with |
---|
6424 | | - * reserve set to 0 in order to clear the reservation. |
---|
6425 | | - */ |
---|
6426 | | - |
---|
6427 | | -static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, |
---|
6428 | | - u64 num_bytes, int delalloc) |
---|
6429 | | -{ |
---|
6430 | | - struct btrfs_space_info *space_info = cache->space_info; |
---|
6431 | | - int ret = 0; |
---|
6432 | | - |
---|
6433 | | - spin_lock(&space_info->lock); |
---|
6434 | | - spin_lock(&cache->lock); |
---|
6435 | | - if (cache->ro) |
---|
6436 | | - space_info->bytes_readonly += num_bytes; |
---|
6437 | | - cache->reserved -= num_bytes; |
---|
6438 | | - space_info->bytes_reserved -= num_bytes; |
---|
6439 | | - space_info->max_extent_size = 0; |
---|
6440 | | - |
---|
6441 | | - if (delalloc) |
---|
6442 | | - cache->delalloc_bytes -= num_bytes; |
---|
6443 | | - spin_unlock(&cache->lock); |
---|
6444 | | - spin_unlock(&space_info->lock); |
---|
6445 | | - return ret; |
---|
6446 | | -} |
---|
6447 | | -void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info) |
---|
6448 | | -{ |
---|
6449 | | - struct btrfs_caching_control *next; |
---|
6450 | | - struct btrfs_caching_control *caching_ctl; |
---|
6451 | | - struct btrfs_block_group_cache *cache; |
---|
6452 | | - |
---|
6453 | | - down_write(&fs_info->commit_root_sem); |
---|
6454 | | - |
---|
6455 | | - list_for_each_entry_safe(caching_ctl, next, |
---|
6456 | | - &fs_info->caching_block_groups, list) { |
---|
6457 | | - cache = caching_ctl->block_group; |
---|
6458 | | - if (block_group_cache_done(cache)) { |
---|
6459 | | - cache->last_byte_to_unpin = (u64)-1; |
---|
6460 | | - list_del_init(&caching_ctl->list); |
---|
6461 | | - put_caching_control(caching_ctl); |
---|
6462 | | - } else { |
---|
6463 | | - cache->last_byte_to_unpin = caching_ctl->progress; |
---|
6464 | | - } |
---|
6465 | | - } |
---|
6466 | | - |
---|
6467 | | - if (fs_info->pinned_extents == &fs_info->freed_extents[0]) |
---|
6468 | | - fs_info->pinned_extents = &fs_info->freed_extents[1]; |
---|
6469 | | - else |
---|
6470 | | - fs_info->pinned_extents = &fs_info->freed_extents[0]; |
---|
6471 | | - |
---|
6472 | | - up_write(&fs_info->commit_root_sem); |
---|
6473 | | - |
---|
6474 | | - update_global_block_rsv(fs_info); |
---|
6475 | 2695 | } |
---|
6476 | 2696 | |
---|
6477 | 2697 | /* |
---|
.. | .. |
---|
6507 | 2727 | u64 start, u64 end, |
---|
6508 | 2728 | const bool return_free_space) |
---|
6509 | 2729 | { |
---|
6510 | | - struct btrfs_block_group_cache *cache = NULL; |
---|
| 2730 | + struct btrfs_block_group *cache = NULL; |
---|
6511 | 2731 | struct btrfs_space_info *space_info; |
---|
6512 | 2732 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; |
---|
6513 | 2733 | struct btrfs_free_cluster *cluster = NULL; |
---|
.. | .. |
---|
6519 | 2739 | while (start <= end) { |
---|
6520 | 2740 | readonly = false; |
---|
6521 | 2741 | if (!cache || |
---|
6522 | | - start >= cache->key.objectid + cache->key.offset) { |
---|
| 2742 | + start >= cache->start + cache->length) { |
---|
6523 | 2743 | if (cache) |
---|
6524 | 2744 | btrfs_put_block_group(cache); |
---|
6525 | 2745 | total_unpinned = 0; |
---|
.. | .. |
---|
6532 | 2752 | empty_cluster <<= 1; |
---|
6533 | 2753 | } |
---|
6534 | 2754 | |
---|
6535 | | - len = cache->key.objectid + cache->key.offset - start; |
---|
| 2755 | + len = cache->start + cache->length - start; |
---|
6536 | 2756 | len = min(len, end + 1 - start); |
---|
6537 | 2757 | |
---|
6538 | | - if (start < cache->last_byte_to_unpin) { |
---|
6539 | | - len = min(len, cache->last_byte_to_unpin - start); |
---|
6540 | | - if (return_free_space) |
---|
6541 | | - btrfs_add_free_space(cache, start, len); |
---|
| 2758 | + if (start < cache->last_byte_to_unpin && return_free_space) { |
---|
| 2759 | + u64 add_len = min(len, cache->last_byte_to_unpin - start); |
---|
| 2760 | + |
---|
| 2761 | + btrfs_add_free_space(cache, start, add_len); |
---|
6542 | 2762 | } |
---|
6543 | 2763 | |
---|
6544 | 2764 | start += len; |
---|
.. | .. |
---|
6561 | 2781 | spin_lock(&space_info->lock); |
---|
6562 | 2782 | spin_lock(&cache->lock); |
---|
6563 | 2783 | cache->pinned -= len; |
---|
6564 | | - space_info->bytes_pinned -= len; |
---|
6565 | | - |
---|
6566 | | - trace_btrfs_space_reservation(fs_info, "pinned", |
---|
6567 | | - space_info->flags, len, 0); |
---|
| 2784 | + btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len); |
---|
6568 | 2785 | space_info->max_extent_size = 0; |
---|
6569 | | - percpu_counter_add_batch(&space_info->total_bytes_pinned, |
---|
6570 | | - -len, BTRFS_TOTAL_BYTES_PINNED_BATCH); |
---|
| 2786 | + __btrfs_mod_total_bytes_pinned(space_info, -len); |
---|
6571 | 2787 | if (cache->ro) { |
---|
6572 | 2788 | space_info->bytes_readonly += len; |
---|
6573 | 2789 | readonly = true; |
---|
.. | .. |
---|
6582 | 2798 | to_add = min(len, global_rsv->size - |
---|
6583 | 2799 | global_rsv->reserved); |
---|
6584 | 2800 | global_rsv->reserved += to_add; |
---|
6585 | | - space_info->bytes_may_use += to_add; |
---|
| 2801 | + btrfs_space_info_update_bytes_may_use(fs_info, |
---|
| 2802 | + space_info, to_add); |
---|
6586 | 2803 | if (global_rsv->reserved >= global_rsv->size) |
---|
6587 | 2804 | global_rsv->full = 1; |
---|
6588 | | - trace_btrfs_space_reservation(fs_info, |
---|
6589 | | - "space_info", |
---|
6590 | | - space_info->flags, |
---|
6591 | | - to_add, 1); |
---|
6592 | 2805 | len -= to_add; |
---|
6593 | 2806 | } |
---|
6594 | 2807 | spin_unlock(&global_rsv->lock); |
---|
6595 | | - /* Add to any tickets we may have */ |
---|
6596 | | - if (len) |
---|
6597 | | - space_info_add_new_bytes(fs_info, space_info, |
---|
6598 | | - len); |
---|
6599 | 2808 | } |
---|
| 2809 | + /* Add to any tickets we may have */ |
---|
| 2810 | + if (!readonly && return_free_space && len) |
---|
| 2811 | + btrfs_try_granting_tickets(fs_info, space_info); |
---|
6600 | 2812 | spin_unlock(&space_info->lock); |
---|
6601 | 2813 | } |
---|
6602 | 2814 | |
---|
.. | .. |
---|
6608 | 2820 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) |
---|
6609 | 2821 | { |
---|
6610 | 2822 | struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
6611 | | - struct btrfs_block_group_cache *block_group, *tmp; |
---|
| 2823 | + struct btrfs_block_group *block_group, *tmp; |
---|
6612 | 2824 | struct list_head *deleted_bgs; |
---|
6613 | 2825 | struct extent_io_tree *unpin; |
---|
6614 | 2826 | u64 start; |
---|
6615 | 2827 | u64 end; |
---|
6616 | 2828 | int ret; |
---|
6617 | 2829 | |
---|
6618 | | - if (fs_info->pinned_extents == &fs_info->freed_extents[0]) |
---|
6619 | | - unpin = &fs_info->freed_extents[1]; |
---|
6620 | | - else |
---|
6621 | | - unpin = &fs_info->freed_extents[0]; |
---|
| 2830 | + unpin = &trans->transaction->pinned_extents; |
---|
6622 | 2831 | |
---|
6623 | | - while (!trans->aborted) { |
---|
| 2832 | + while (!TRANS_ABORTED(trans)) { |
---|
6624 | 2833 | struct extent_state *cached_state = NULL; |
---|
6625 | 2834 | |
---|
6626 | 2835 | mutex_lock(&fs_info->unused_bg_unpin_mutex); |
---|
.. | .. |
---|
6630 | 2839 | mutex_unlock(&fs_info->unused_bg_unpin_mutex); |
---|
6631 | 2840 | break; |
---|
6632 | 2841 | } |
---|
| 2842 | + if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) |
---|
| 2843 | + clear_extent_bits(&fs_info->excluded_extents, start, |
---|
| 2844 | + end, EXTENT_UPTODATE); |
---|
6633 | 2845 | |
---|
6634 | | - if (btrfs_test_opt(fs_info, DISCARD)) |
---|
| 2846 | + if (btrfs_test_opt(fs_info, DISCARD_SYNC)) |
---|
6635 | 2847 | ret = btrfs_discard_extent(fs_info, start, |
---|
6636 | 2848 | end + 1 - start, NULL); |
---|
6637 | 2849 | |
---|
.. | .. |
---|
6640 | 2852 | mutex_unlock(&fs_info->unused_bg_unpin_mutex); |
---|
6641 | 2853 | free_extent_state(cached_state); |
---|
6642 | 2854 | cond_resched(); |
---|
| 2855 | + } |
---|
| 2856 | + |
---|
| 2857 | + if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) { |
---|
| 2858 | + btrfs_discard_calc_delay(&fs_info->discard_ctl); |
---|
| 2859 | + btrfs_discard_schedule_work(&fs_info->discard_ctl, true); |
---|
6643 | 2860 | } |
---|
6644 | 2861 | |
---|
6645 | 2862 | /* |
---|
.. | .. |
---|
6652 | 2869 | u64 trimmed = 0; |
---|
6653 | 2870 | |
---|
6654 | 2871 | ret = -EROFS; |
---|
6655 | | - if (!trans->aborted) |
---|
| 2872 | + if (!TRANS_ABORTED(trans)) |
---|
6656 | 2873 | ret = btrfs_discard_extent(fs_info, |
---|
6657 | | - block_group->key.objectid, |
---|
6658 | | - block_group->key.offset, |
---|
| 2874 | + block_group->start, |
---|
| 2875 | + block_group->length, |
---|
6659 | 2876 | &trimmed); |
---|
6660 | 2877 | |
---|
6661 | 2878 | list_del_init(&block_group->bg_list); |
---|
6662 | | - btrfs_put_block_group_trimming(block_group); |
---|
| 2879 | + btrfs_unfreeze_block_group(block_group); |
---|
6663 | 2880 | btrfs_put_block_group(block_group); |
---|
6664 | 2881 | |
---|
6665 | 2882 | if (ret) { |
---|
.. | .. |
---|
6673 | 2890 | return 0; |
---|
6674 | 2891 | } |
---|
6675 | 2892 | |
---|
| 2893 | +/* |
---|
| 2894 | + * Drop one or more refs of @node. |
---|
| 2895 | + * |
---|
| 2896 | + * 1. Locate the extent refs. |
---|
| 2897 | + * It's either inline in EXTENT/METADATA_ITEM or in keyed SHARED_* item. |
---|
| 2898 | + * Locate it, then reduce the refs number or remove the ref line completely. |
---|
| 2899 | + * |
---|
| 2900 | + * 2. Update the refs count in EXTENT/METADATA_ITEM |
---|
| 2901 | + * |
---|
| 2902 | + * Inline backref case: |
---|
| 2903 | + * |
---|
| 2904 | + * in extent tree we have: |
---|
| 2905 | + * |
---|
| 2906 | + * item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16201 itemsize 82 |
---|
| 2907 | + * refs 2 gen 6 flags DATA |
---|
| 2908 | + * extent data backref root FS_TREE objectid 258 offset 0 count 1 |
---|
| 2909 | + * extent data backref root FS_TREE objectid 257 offset 0 count 1 |
---|
| 2910 | + * |
---|
| 2911 | + * This function gets called with: |
---|
| 2912 | + * |
---|
| 2913 | + * node->bytenr = 13631488 |
---|
| 2914 | + * node->num_bytes = 1048576 |
---|
| 2915 | + * root_objectid = FS_TREE |
---|
| 2916 | + * owner_objectid = 257 |
---|
| 2917 | + * owner_offset = 0 |
---|
| 2918 | + * refs_to_drop = 1 |
---|
| 2919 | + * |
---|
| 2920 | + * Then we should get some like: |
---|
| 2921 | + * |
---|
| 2922 | + * item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16201 itemsize 82 |
---|
| 2923 | + * refs 1 gen 6 flags DATA |
---|
| 2924 | + * extent data backref root FS_TREE objectid 258 offset 0 count 1 |
---|
| 2925 | + * |
---|
| 2926 | + * Keyed backref case: |
---|
| 2927 | + * |
---|
| 2928 | + * in extent tree we have: |
---|
| 2929 | + * |
---|
| 2930 | + * item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 3971 itemsize 24 |
---|
| 2931 | + * refs 754 gen 6 flags DATA |
---|
| 2932 | + * [...] |
---|
| 2933 | + * item 2 key (13631488 EXTENT_DATA_REF <HASH>) itemoff 3915 itemsize 28 |
---|
| 2934 | + * extent data backref root FS_TREE objectid 866 offset 0 count 1 |
---|
| 2935 | + * |
---|
| 2936 | + * This function get called with: |
---|
| 2937 | + * |
---|
| 2938 | + * node->bytenr = 13631488 |
---|
| 2939 | + * node->num_bytes = 1048576 |
---|
| 2940 | + * root_objectid = FS_TREE |
---|
| 2941 | + * owner_objectid = 866 |
---|
| 2942 | + * owner_offset = 0 |
---|
| 2943 | + * refs_to_drop = 1 |
---|
| 2944 | + * |
---|
| 2945 | + * Then we should get some like: |
---|
| 2946 | + * |
---|
| 2947 | + * item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 3971 itemsize 24 |
---|
| 2948 | + * refs 753 gen 6 flags DATA |
---|
| 2949 | + * |
---|
| 2950 | + * And that (13631488 EXTENT_DATA_REF <HASH>) gets removed. |
---|
| 2951 | + */ |
---|
6676 | 2952 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
---|
6677 | 2953 | struct btrfs_delayed_ref_node *node, u64 parent, |
---|
6678 | 2954 | u64 root_objectid, u64 owner_objectid, |
---|
.. | .. |
---|
6702 | 2978 | if (!path) |
---|
6703 | 2979 | return -ENOMEM; |
---|
6704 | 2980 | |
---|
6705 | | - path->reada = READA_FORWARD; |
---|
6706 | 2981 | path->leave_spinning = 1; |
---|
6707 | 2982 | |
---|
6708 | 2983 | is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; |
---|
6709 | | - BUG_ON(!is_data && refs_to_drop != 1); |
---|
| 2984 | + |
---|
| 2985 | + if (!is_data && refs_to_drop != 1) { |
---|
| 2986 | + btrfs_crit(info, |
---|
| 2987 | +"invalid refs_to_drop, dropping more than 1 refs for tree block %llu refs_to_drop %u", |
---|
| 2988 | + node->bytenr, refs_to_drop); |
---|
| 2989 | + ret = -EINVAL; |
---|
| 2990 | + btrfs_abort_transaction(trans, ret); |
---|
| 2991 | + goto out; |
---|
| 2992 | + } |
---|
6710 | 2993 | |
---|
6711 | 2994 | if (is_data) |
---|
6712 | 2995 | skinny_metadata = false; |
---|
.. | .. |
---|
6715 | 2998 | parent, root_objectid, owner_objectid, |
---|
6716 | 2999 | owner_offset); |
---|
6717 | 3000 | if (ret == 0) { |
---|
| 3001 | + /* |
---|
| 3002 | + * Either the inline backref or the SHARED_DATA_REF/ |
---|
| 3003 | + * SHARED_BLOCK_REF is found |
---|
| 3004 | + * |
---|
| 3005 | + * Here is a quick path to locate EXTENT/METADATA_ITEM. |
---|
| 3006 | + * It's possible the EXTENT/METADATA_ITEM is near current slot. |
---|
| 3007 | + */ |
---|
6718 | 3008 | extent_slot = path->slots[0]; |
---|
6719 | 3009 | while (extent_slot >= 0) { |
---|
6720 | 3010 | btrfs_item_key_to_cpu(path->nodes[0], &key, |
---|
.. | .. |
---|
6731 | 3021 | found_extent = 1; |
---|
6732 | 3022 | break; |
---|
6733 | 3023 | } |
---|
| 3024 | + |
---|
| 3025 | + /* Quick path didn't find the EXTEMT/METADATA_ITEM */ |
---|
6734 | 3026 | if (path->slots[0] - extent_slot > 5) |
---|
6735 | 3027 | break; |
---|
6736 | 3028 | extent_slot--; |
---|
6737 | 3029 | } |
---|
6738 | 3030 | |
---|
6739 | 3031 | if (!found_extent) { |
---|
6740 | | - BUG_ON(iref); |
---|
| 3032 | + if (iref) { |
---|
| 3033 | + btrfs_crit(info, |
---|
| 3034 | +"invalid iref, no EXTENT/METADATA_ITEM found but has inline extent ref"); |
---|
| 3035 | + btrfs_abort_transaction(trans, -EUCLEAN); |
---|
| 3036 | + goto err_dump; |
---|
| 3037 | + } |
---|
| 3038 | + /* Must be SHARED_* item, remove the backref first */ |
---|
6741 | 3039 | ret = remove_extent_backref(trans, path, NULL, |
---|
6742 | 3040 | refs_to_drop, |
---|
6743 | 3041 | is_data, &last_ref); |
---|
.. | .. |
---|
6748 | 3046 | btrfs_release_path(path); |
---|
6749 | 3047 | path->leave_spinning = 1; |
---|
6750 | 3048 | |
---|
| 3049 | + /* Slow path to locate EXTENT/METADATA_ITEM */ |
---|
6751 | 3050 | key.objectid = bytenr; |
---|
6752 | 3051 | key.type = BTRFS_EXTENT_ITEM_KEY; |
---|
6753 | 3052 | key.offset = num_bytes; |
---|
.. | .. |
---|
6822 | 3121 | if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID && |
---|
6823 | 3122 | key.type == BTRFS_EXTENT_ITEM_KEY) { |
---|
6824 | 3123 | struct btrfs_tree_block_info *bi; |
---|
6825 | | - BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); |
---|
| 3124 | + if (item_size < sizeof(*ei) + sizeof(*bi)) { |
---|
| 3125 | + btrfs_crit(info, |
---|
| 3126 | +"invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %zu", |
---|
| 3127 | + key.objectid, key.type, key.offset, |
---|
| 3128 | + owner_objectid, item_size, |
---|
| 3129 | + sizeof(*ei) + sizeof(*bi)); |
---|
| 3130 | + btrfs_abort_transaction(trans, -EUCLEAN); |
---|
| 3131 | + goto err_dump; |
---|
| 3132 | + } |
---|
6826 | 3133 | bi = (struct btrfs_tree_block_info *)(ei + 1); |
---|
6827 | 3134 | WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi)); |
---|
6828 | 3135 | } |
---|
6829 | 3136 | |
---|
6830 | 3137 | refs = btrfs_extent_refs(leaf, ei); |
---|
6831 | 3138 | if (refs < refs_to_drop) { |
---|
6832 | | - btrfs_err(info, |
---|
6833 | | - "trying to drop %d refs but we only have %Lu for bytenr %Lu", |
---|
| 3139 | + btrfs_crit(info, |
---|
| 3140 | + "trying to drop %d refs but we only have %llu for bytenr %llu", |
---|
6834 | 3141 | refs_to_drop, refs, bytenr); |
---|
6835 | | - ret = -EINVAL; |
---|
6836 | | - btrfs_abort_transaction(trans, ret); |
---|
6837 | | - goto out; |
---|
| 3142 | + btrfs_abort_transaction(trans, -EUCLEAN); |
---|
| 3143 | + goto err_dump; |
---|
6838 | 3144 | } |
---|
6839 | 3145 | refs -= refs_to_drop; |
---|
6840 | 3146 | |
---|
.. | .. |
---|
6846 | 3152 | * be updated by remove_extent_backref |
---|
6847 | 3153 | */ |
---|
6848 | 3154 | if (iref) { |
---|
6849 | | - BUG_ON(!found_extent); |
---|
| 3155 | + if (!found_extent) { |
---|
| 3156 | + btrfs_crit(info, |
---|
| 3157 | +"invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found"); |
---|
| 3158 | + btrfs_abort_transaction(trans, -EUCLEAN); |
---|
| 3159 | + goto err_dump; |
---|
| 3160 | + } |
---|
6850 | 3161 | } else { |
---|
6851 | 3162 | btrfs_set_extent_refs(leaf, ei, refs); |
---|
6852 | 3163 | btrfs_mark_buffer_dirty(leaf); |
---|
.. | .. |
---|
6861 | 3172 | } |
---|
6862 | 3173 | } |
---|
6863 | 3174 | } else { |
---|
| 3175 | + /* In this branch refs == 1 */ |
---|
6864 | 3176 | if (found_extent) { |
---|
6865 | | - BUG_ON(is_data && refs_to_drop != |
---|
6866 | | - extent_data_ref_count(path, iref)); |
---|
| 3177 | + if (is_data && refs_to_drop != |
---|
| 3178 | + extent_data_ref_count(path, iref)) { |
---|
| 3179 | + btrfs_crit(info, |
---|
| 3180 | + "invalid refs_to_drop, current refs %u refs_to_drop %u", |
---|
| 3181 | + extent_data_ref_count(path, iref), |
---|
| 3182 | + refs_to_drop); |
---|
| 3183 | + btrfs_abort_transaction(trans, -EUCLEAN); |
---|
| 3184 | + goto err_dump; |
---|
| 3185 | + } |
---|
6867 | 3186 | if (iref) { |
---|
6868 | | - BUG_ON(path->slots[0] != extent_slot); |
---|
| 3187 | + if (path->slots[0] != extent_slot) { |
---|
| 3188 | + btrfs_crit(info, |
---|
| 3189 | +"invalid iref, extent item key (%llu %u %llu) doesn't have wanted iref", |
---|
| 3190 | + key.objectid, key.type, |
---|
| 3191 | + key.offset); |
---|
| 3192 | + btrfs_abort_transaction(trans, -EUCLEAN); |
---|
| 3193 | + goto err_dump; |
---|
| 3194 | + } |
---|
6869 | 3195 | } else { |
---|
6870 | | - BUG_ON(path->slots[0] != extent_slot + 1); |
---|
| 3196 | + /* |
---|
| 3197 | + * No inline ref, we must be at SHARED_* item, |
---|
| 3198 | + * And it's single ref, it must be: |
---|
| 3199 | + * | extent_slot ||extent_slot + 1| |
---|
| 3200 | + * [ EXTENT/METADATA_ITEM ][ SHARED_* ITEM ] |
---|
| 3201 | + */ |
---|
| 3202 | + if (path->slots[0] != extent_slot + 1) { |
---|
| 3203 | + btrfs_crit(info, |
---|
| 3204 | + "invalid SHARED_* item, previous item is not EXTENT/METADATA_ITEM"); |
---|
| 3205 | + btrfs_abort_transaction(trans, -EUCLEAN); |
---|
| 3206 | + goto err_dump; |
---|
| 3207 | + } |
---|
6871 | 3208 | path->slots[0] = extent_slot; |
---|
6872 | 3209 | num_to_del = 2; |
---|
6873 | 3210 | } |
---|
.. | .. |
---|
6897 | 3234 | goto out; |
---|
6898 | 3235 | } |
---|
6899 | 3236 | |
---|
6900 | | - ret = update_block_group(trans, info, bytenr, num_bytes, 0); |
---|
| 3237 | + ret = btrfs_update_block_group(trans, bytenr, num_bytes, 0); |
---|
6901 | 3238 | if (ret) { |
---|
6902 | 3239 | btrfs_abort_transaction(trans, ret); |
---|
6903 | 3240 | goto out; |
---|
.. | .. |
---|
6908 | 3245 | out: |
---|
6909 | 3246 | btrfs_free_path(path); |
---|
6910 | 3247 | return ret; |
---|
| 3248 | +err_dump: |
---|
| 3249 | + /* |
---|
| 3250 | + * Leaf dump can take up a lot of log buffer, so we only do full leaf |
---|
| 3251 | + * dump for debug build. |
---|
| 3252 | + */ |
---|
| 3253 | + if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) { |
---|
| 3254 | + btrfs_crit(info, "path->slots[0]=%d extent_slot=%d", |
---|
| 3255 | + path->slots[0], extent_slot); |
---|
| 3256 | + btrfs_print_leaf(path->nodes[0]); |
---|
| 3257 | + } |
---|
| 3258 | + |
---|
| 3259 | + btrfs_free_path(path); |
---|
| 3260 | + return -EUCLEAN; |
---|
6911 | 3261 | } |
---|
6912 | 3262 | |
---|
6913 | 3263 | /* |
---|
.. | .. |
---|
6930 | 3280 | goto out_delayed_unlock; |
---|
6931 | 3281 | |
---|
6932 | 3282 | spin_lock(&head->lock); |
---|
6933 | | - if (!RB_EMPTY_ROOT(&head->ref_tree)) |
---|
| 3283 | + if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root)) |
---|
6934 | 3284 | goto out; |
---|
6935 | 3285 | |
---|
6936 | | - if (head->extent_op) { |
---|
6937 | | - if (!head->must_insert_reserved) |
---|
6938 | | - goto out; |
---|
6939 | | - btrfs_free_delayed_extent_op(head->extent_op); |
---|
6940 | | - head->extent_op = NULL; |
---|
6941 | | - } |
---|
| 3286 | + if (cleanup_extent_op(head) != NULL) |
---|
| 3287 | + goto out; |
---|
6942 | 3288 | |
---|
6943 | 3289 | /* |
---|
6944 | 3290 | * waiting for the lock here would deadlock. If someone else has it |
---|
.. | .. |
---|
6947 | 3293 | if (!mutex_trylock(&head->mutex)) |
---|
6948 | 3294 | goto out; |
---|
6949 | 3295 | |
---|
6950 | | - /* |
---|
6951 | | - * at this point we have a head with no other entries. Go |
---|
6952 | | - * ahead and process it. |
---|
6953 | | - */ |
---|
6954 | | - rb_erase(&head->href_node, &delayed_refs->href_root); |
---|
6955 | | - RB_CLEAR_NODE(&head->href_node); |
---|
6956 | | - atomic_dec(&delayed_refs->num_entries); |
---|
6957 | | - |
---|
6958 | | - /* |
---|
6959 | | - * we don't take a ref on the node because we're removing it from the |
---|
6960 | | - * tree, so we just steal the ref the tree was holding. |
---|
6961 | | - */ |
---|
6962 | | - delayed_refs->num_heads--; |
---|
6963 | | - if (head->processing == 0) |
---|
6964 | | - delayed_refs->num_heads_ready--; |
---|
| 3296 | + btrfs_delete_ref_head(delayed_refs, head); |
---|
6965 | 3297 | head->processing = 0; |
---|
| 3298 | + |
---|
6966 | 3299 | spin_unlock(&head->lock); |
---|
6967 | 3300 | spin_unlock(&delayed_refs->lock); |
---|
6968 | 3301 | |
---|
.. | .. |
---|
6970 | 3303 | if (head->must_insert_reserved) |
---|
6971 | 3304 | ret = 1; |
---|
6972 | 3305 | |
---|
| 3306 | + btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head); |
---|
6973 | 3307 | mutex_unlock(&head->mutex); |
---|
6974 | 3308 | btrfs_put_delayed_ref_head(head); |
---|
6975 | 3309 | return ret; |
---|
.. | .. |
---|
6987 | 3321 | u64 parent, int last_ref) |
---|
6988 | 3322 | { |
---|
6989 | 3323 | struct btrfs_fs_info *fs_info = root->fs_info; |
---|
6990 | | - int pin = 1; |
---|
| 3324 | + struct btrfs_ref generic_ref = { 0 }; |
---|
6991 | 3325 | int ret; |
---|
6992 | 3326 | |
---|
6993 | | - if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
---|
6994 | | - int old_ref_mod, new_ref_mod; |
---|
| 3327 | + btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF, |
---|
| 3328 | + buf->start, buf->len, parent); |
---|
| 3329 | + btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf), |
---|
| 3330 | + root->root_key.objectid); |
---|
6995 | 3331 | |
---|
6996 | | - btrfs_ref_tree_mod(root, buf->start, buf->len, parent, |
---|
6997 | | - root->root_key.objectid, |
---|
6998 | | - btrfs_header_level(buf), 0, |
---|
6999 | | - BTRFS_DROP_DELAYED_REF); |
---|
7000 | | - ret = btrfs_add_delayed_tree_ref(trans, buf->start, |
---|
7001 | | - buf->len, parent, |
---|
7002 | | - root->root_key.objectid, |
---|
7003 | | - btrfs_header_level(buf), |
---|
7004 | | - BTRFS_DROP_DELAYED_REF, NULL, |
---|
7005 | | - &old_ref_mod, &new_ref_mod); |
---|
| 3332 | + if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
---|
| 3333 | + btrfs_ref_tree_mod(fs_info, &generic_ref); |
---|
| 3334 | + ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL); |
---|
7006 | 3335 | BUG_ON(ret); /* -ENOMEM */ |
---|
7007 | | - pin = old_ref_mod >= 0 && new_ref_mod < 0; |
---|
7008 | 3336 | } |
---|
7009 | 3337 | |
---|
7010 | 3338 | if (last_ref && btrfs_header_generation(buf) == trans->transid) { |
---|
7011 | | - struct btrfs_block_group_cache *cache; |
---|
| 3339 | + struct btrfs_block_group *cache; |
---|
7012 | 3340 | |
---|
7013 | 3341 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
---|
7014 | 3342 | ret = check_ref_cleanup(trans, buf->start); |
---|
.. | .. |
---|
7016 | 3344 | goto out; |
---|
7017 | 3345 | } |
---|
7018 | 3346 | |
---|
7019 | | - pin = 0; |
---|
7020 | 3347 | cache = btrfs_lookup_block_group(fs_info, buf->start); |
---|
7021 | 3348 | |
---|
7022 | 3349 | if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { |
---|
7023 | | - pin_down_extent(fs_info, cache, buf->start, |
---|
7024 | | - buf->len, 1); |
---|
| 3350 | + pin_down_extent(trans, cache, buf->start, buf->len, 1); |
---|
7025 | 3351 | btrfs_put_block_group(cache); |
---|
7026 | 3352 | goto out; |
---|
7027 | 3353 | } |
---|
.. | .. |
---|
7034 | 3360 | trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len); |
---|
7035 | 3361 | } |
---|
7036 | 3362 | out: |
---|
7037 | | - if (pin) |
---|
7038 | | - add_pinned_bytes(fs_info, buf->len, true, |
---|
7039 | | - root->root_key.objectid); |
---|
7040 | | - |
---|
7041 | 3363 | if (last_ref) { |
---|
7042 | 3364 | /* |
---|
7043 | 3365 | * Deleting the buffer, clear the corrupt flag since it doesn't |
---|
.. | .. |
---|
7048 | 3370 | } |
---|
7049 | 3371 | |
---|
7050 | 3372 | /* Can return -ENOMEM */ |
---|
7051 | | -int btrfs_free_extent(struct btrfs_trans_handle *trans, |
---|
7052 | | - struct btrfs_root *root, |
---|
7053 | | - u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, |
---|
7054 | | - u64 owner, u64 offset) |
---|
| 3373 | +int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref) |
---|
7055 | 3374 | { |
---|
7056 | | - struct btrfs_fs_info *fs_info = root->fs_info; |
---|
7057 | | - int old_ref_mod, new_ref_mod; |
---|
| 3375 | + struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
7058 | 3376 | int ret; |
---|
7059 | 3377 | |
---|
7060 | 3378 | if (btrfs_is_testing(fs_info)) |
---|
7061 | 3379 | return 0; |
---|
7062 | 3380 | |
---|
7063 | | - if (root_objectid != BTRFS_TREE_LOG_OBJECTID) |
---|
7064 | | - btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, |
---|
7065 | | - root_objectid, owner, offset, |
---|
7066 | | - BTRFS_DROP_DELAYED_REF); |
---|
7067 | | - |
---|
7068 | 3381 | /* |
---|
7069 | 3382 | * tree log blocks never actually go into the extent allocation |
---|
7070 | 3383 | * tree, just update pinning info and exit early. |
---|
7071 | 3384 | */ |
---|
7072 | | - if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { |
---|
7073 | | - WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); |
---|
| 3385 | + if ((ref->type == BTRFS_REF_METADATA && |
---|
| 3386 | + ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) || |
---|
| 3387 | + (ref->type == BTRFS_REF_DATA && |
---|
| 3388 | + ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) { |
---|
7074 | 3389 | /* unlocks the pinned mutex */ |
---|
7075 | | - btrfs_pin_extent(fs_info, bytenr, num_bytes, 1); |
---|
7076 | | - old_ref_mod = new_ref_mod = 0; |
---|
| 3390 | + btrfs_pin_extent(trans, ref->bytenr, ref->len, 1); |
---|
7077 | 3391 | ret = 0; |
---|
7078 | | - } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
---|
7079 | | - ret = btrfs_add_delayed_tree_ref(trans, bytenr, |
---|
7080 | | - num_bytes, parent, |
---|
7081 | | - root_objectid, (int)owner, |
---|
7082 | | - BTRFS_DROP_DELAYED_REF, NULL, |
---|
7083 | | - &old_ref_mod, &new_ref_mod); |
---|
| 3392 | + } else if (ref->type == BTRFS_REF_METADATA) { |
---|
| 3393 | + ret = btrfs_add_delayed_tree_ref(trans, ref, NULL); |
---|
7084 | 3394 | } else { |
---|
7085 | | - ret = btrfs_add_delayed_data_ref(trans, bytenr, |
---|
7086 | | - num_bytes, parent, |
---|
7087 | | - root_objectid, owner, offset, |
---|
7088 | | - 0, BTRFS_DROP_DELAYED_REF, |
---|
7089 | | - &old_ref_mod, &new_ref_mod); |
---|
| 3395 | + ret = btrfs_add_delayed_data_ref(trans, ref, 0); |
---|
7090 | 3396 | } |
---|
7091 | 3397 | |
---|
7092 | | - if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) { |
---|
7093 | | - bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID; |
---|
| 3398 | + if (!((ref->type == BTRFS_REF_METADATA && |
---|
| 3399 | + ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) || |
---|
| 3400 | + (ref->type == BTRFS_REF_DATA && |
---|
| 3401 | + ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID))) |
---|
| 3402 | + btrfs_ref_tree_mod(fs_info, ref); |
---|
7094 | 3403 | |
---|
7095 | | - add_pinned_bytes(fs_info, num_bytes, metadata, root_objectid); |
---|
7096 | | - } |
---|
7097 | | - |
---|
7098 | | - return ret; |
---|
7099 | | -} |
---|
7100 | | - |
---|
7101 | | -/* |
---|
7102 | | - * when we wait for progress in the block group caching, its because |
---|
7103 | | - * our allocation attempt failed at least once. So, we must sleep |
---|
7104 | | - * and let some progress happen before we try again. |
---|
7105 | | - * |
---|
7106 | | - * This function will sleep at least once waiting for new free space to |
---|
7107 | | - * show up, and then it will check the block group free space numbers |
---|
7108 | | - * for our min num_bytes. Another option is to have it go ahead |
---|
7109 | | - * and look in the rbtree for a free extent of a given size, but this |
---|
7110 | | - * is a good start. |
---|
7111 | | - * |
---|
7112 | | - * Callers of this must check if cache->cached == BTRFS_CACHE_ERROR before using |
---|
7113 | | - * any of the information in this block group. |
---|
7114 | | - */ |
---|
7115 | | -static noinline void |
---|
7116 | | -wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, |
---|
7117 | | - u64 num_bytes) |
---|
7118 | | -{ |
---|
7119 | | - struct btrfs_caching_control *caching_ctl; |
---|
7120 | | - |
---|
7121 | | - caching_ctl = get_caching_control(cache); |
---|
7122 | | - if (!caching_ctl) |
---|
7123 | | - return; |
---|
7124 | | - |
---|
7125 | | - wait_event(caching_ctl->wait, block_group_cache_done(cache) || |
---|
7126 | | - (cache->free_space_ctl->free_space >= num_bytes)); |
---|
7127 | | - |
---|
7128 | | - put_caching_control(caching_ctl); |
---|
7129 | | -} |
---|
7130 | | - |
---|
7131 | | -static noinline int |
---|
7132 | | -wait_block_group_cache_done(struct btrfs_block_group_cache *cache) |
---|
7133 | | -{ |
---|
7134 | | - struct btrfs_caching_control *caching_ctl; |
---|
7135 | | - int ret = 0; |
---|
7136 | | - |
---|
7137 | | - caching_ctl = get_caching_control(cache); |
---|
7138 | | - if (!caching_ctl) |
---|
7139 | | - return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0; |
---|
7140 | | - |
---|
7141 | | - wait_event(caching_ctl->wait, block_group_cache_done(cache)); |
---|
7142 | | - if (cache->cached == BTRFS_CACHE_ERROR) |
---|
7143 | | - ret = -EIO; |
---|
7144 | | - put_caching_control(caching_ctl); |
---|
7145 | 3404 | return ret; |
---|
7146 | 3405 | } |
---|
7147 | 3406 | |
---|
7148 | 3407 | enum btrfs_loop_type { |
---|
7149 | | - LOOP_CACHING_NOWAIT = 0, |
---|
7150 | | - LOOP_CACHING_WAIT = 1, |
---|
7151 | | - LOOP_ALLOC_CHUNK = 2, |
---|
7152 | | - LOOP_NO_EMPTY_SIZE = 3, |
---|
| 3408 | + LOOP_CACHING_NOWAIT, |
---|
| 3409 | + LOOP_CACHING_WAIT, |
---|
| 3410 | + LOOP_ALLOC_CHUNK, |
---|
| 3411 | + LOOP_NO_EMPTY_SIZE, |
---|
7153 | 3412 | }; |
---|
7154 | 3413 | |
---|
7155 | 3414 | static inline void |
---|
7156 | | -btrfs_lock_block_group(struct btrfs_block_group_cache *cache, |
---|
| 3415 | +btrfs_lock_block_group(struct btrfs_block_group *cache, |
---|
7157 | 3416 | int delalloc) |
---|
7158 | 3417 | { |
---|
7159 | 3418 | if (delalloc) |
---|
7160 | 3419 | down_read(&cache->data_rwsem); |
---|
7161 | 3420 | } |
---|
7162 | 3421 | |
---|
7163 | | -static inline void |
---|
7164 | | -btrfs_grab_block_group(struct btrfs_block_group_cache *cache, |
---|
| 3422 | +static inline void btrfs_grab_block_group(struct btrfs_block_group *cache, |
---|
7165 | 3423 | int delalloc) |
---|
7166 | 3424 | { |
---|
7167 | 3425 | btrfs_get_block_group(cache); |
---|
.. | .. |
---|
7169 | 3427 | down_read(&cache->data_rwsem); |
---|
7170 | 3428 | } |
---|
7171 | 3429 | |
---|
7172 | | -static struct btrfs_block_group_cache * |
---|
7173 | | -btrfs_lock_cluster(struct btrfs_block_group_cache *block_group, |
---|
| 3430 | +static struct btrfs_block_group *btrfs_lock_cluster( |
---|
| 3431 | + struct btrfs_block_group *block_group, |
---|
7174 | 3432 | struct btrfs_free_cluster *cluster, |
---|
7175 | 3433 | int delalloc) |
---|
| 3434 | + __acquires(&cluster->refill_lock) |
---|
7176 | 3435 | { |
---|
7177 | | - struct btrfs_block_group_cache *used_bg = NULL; |
---|
| 3436 | + struct btrfs_block_group *used_bg = NULL; |
---|
7178 | 3437 | |
---|
7179 | 3438 | spin_lock(&cluster->refill_lock); |
---|
7180 | 3439 | while (1) { |
---|
.. | .. |
---|
7208 | 3467 | } |
---|
7209 | 3468 | |
---|
7210 | 3469 | static inline void |
---|
7211 | | -btrfs_release_block_group(struct btrfs_block_group_cache *cache, |
---|
| 3470 | +btrfs_release_block_group(struct btrfs_block_group *cache, |
---|
7212 | 3471 | int delalloc) |
---|
7213 | 3472 | { |
---|
7214 | 3473 | if (delalloc) |
---|
7215 | 3474 | up_read(&cache->data_rwsem); |
---|
7216 | 3475 | btrfs_put_block_group(cache); |
---|
| 3476 | +} |
---|
| 3477 | + |
---|
| 3478 | +enum btrfs_extent_allocation_policy { |
---|
| 3479 | + BTRFS_EXTENT_ALLOC_CLUSTERED, |
---|
| 3480 | +}; |
---|
| 3481 | + |
---|
| 3482 | +/* |
---|
| 3483 | + * Structure used internally for find_free_extent() function. Wraps needed |
---|
| 3484 | + * parameters. |
---|
| 3485 | + */ |
---|
| 3486 | +struct find_free_extent_ctl { |
---|
| 3487 | + /* Basic allocation info */ |
---|
| 3488 | + u64 num_bytes; |
---|
| 3489 | + u64 empty_size; |
---|
| 3490 | + u64 flags; |
---|
| 3491 | + int delalloc; |
---|
| 3492 | + |
---|
| 3493 | + /* Where to start the search inside the bg */ |
---|
| 3494 | + u64 search_start; |
---|
| 3495 | + |
---|
| 3496 | + /* For clustered allocation */ |
---|
| 3497 | + u64 empty_cluster; |
---|
| 3498 | + struct btrfs_free_cluster *last_ptr; |
---|
| 3499 | + bool use_cluster; |
---|
| 3500 | + |
---|
| 3501 | + bool have_caching_bg; |
---|
| 3502 | + bool orig_have_caching_bg; |
---|
| 3503 | + |
---|
| 3504 | + /* RAID index, converted from flags */ |
---|
| 3505 | + int index; |
---|
| 3506 | + |
---|
| 3507 | + /* |
---|
| 3508 | + * Current loop number, check find_free_extent_update_loop() for details |
---|
| 3509 | + */ |
---|
| 3510 | + int loop; |
---|
| 3511 | + |
---|
| 3512 | + /* |
---|
| 3513 | + * Whether we're refilling a cluster, if true we need to re-search |
---|
| 3514 | + * current block group but don't try to refill the cluster again. |
---|
| 3515 | + */ |
---|
| 3516 | + bool retry_clustered; |
---|
| 3517 | + |
---|
| 3518 | + /* |
---|
| 3519 | + * Whether we're updating free space cache, if true we need to re-search |
---|
| 3520 | + * current block group but don't try updating free space cache again. |
---|
| 3521 | + */ |
---|
| 3522 | + bool retry_unclustered; |
---|
| 3523 | + |
---|
| 3524 | + /* If current block group is cached */ |
---|
| 3525 | + int cached; |
---|
| 3526 | + |
---|
| 3527 | + /* Max contiguous hole found */ |
---|
| 3528 | + u64 max_extent_size; |
---|
| 3529 | + |
---|
| 3530 | + /* Total free space from free space cache, not always contiguous */ |
---|
| 3531 | + u64 total_free_space; |
---|
| 3532 | + |
---|
| 3533 | + /* Found result */ |
---|
| 3534 | + u64 found_offset; |
---|
| 3535 | + |
---|
| 3536 | + /* Hint where to start looking for an empty space */ |
---|
| 3537 | + u64 hint_byte; |
---|
| 3538 | + |
---|
| 3539 | + /* Allocation policy */ |
---|
| 3540 | + enum btrfs_extent_allocation_policy policy; |
---|
| 3541 | +}; |
---|
| 3542 | + |
---|
| 3543 | + |
---|
| 3544 | +/* |
---|
| 3545 | + * Helper function for find_free_extent(). |
---|
| 3546 | + * |
---|
| 3547 | + * Return -ENOENT to inform caller that we need fallback to unclustered mode. |
---|
| 3548 | + * Return -EAGAIN to inform caller that we need to re-search this block group |
---|
| 3549 | + * Return >0 to inform caller that we find nothing |
---|
| 3550 | + * Return 0 means we have found a location and set ffe_ctl->found_offset. |
---|
| 3551 | + */ |
---|
| 3552 | +static int find_free_extent_clustered(struct btrfs_block_group *bg, |
---|
| 3553 | + struct find_free_extent_ctl *ffe_ctl, |
---|
| 3554 | + struct btrfs_block_group **cluster_bg_ret) |
---|
| 3555 | +{ |
---|
| 3556 | + struct btrfs_block_group *cluster_bg; |
---|
| 3557 | + struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr; |
---|
| 3558 | + u64 aligned_cluster; |
---|
| 3559 | + u64 offset; |
---|
| 3560 | + int ret; |
---|
| 3561 | + |
---|
| 3562 | + cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc); |
---|
| 3563 | + if (!cluster_bg) |
---|
| 3564 | + goto refill_cluster; |
---|
| 3565 | + if (cluster_bg != bg && (cluster_bg->ro || |
---|
| 3566 | + !block_group_bits(cluster_bg, ffe_ctl->flags))) |
---|
| 3567 | + goto release_cluster; |
---|
| 3568 | + |
---|
| 3569 | + offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr, |
---|
| 3570 | + ffe_ctl->num_bytes, cluster_bg->start, |
---|
| 3571 | + &ffe_ctl->max_extent_size); |
---|
| 3572 | + if (offset) { |
---|
| 3573 | + /* We have a block, we're done */ |
---|
| 3574 | + spin_unlock(&last_ptr->refill_lock); |
---|
| 3575 | + trace_btrfs_reserve_extent_cluster(cluster_bg, |
---|
| 3576 | + ffe_ctl->search_start, ffe_ctl->num_bytes); |
---|
| 3577 | + *cluster_bg_ret = cluster_bg; |
---|
| 3578 | + ffe_ctl->found_offset = offset; |
---|
| 3579 | + return 0; |
---|
| 3580 | + } |
---|
| 3581 | + WARN_ON(last_ptr->block_group != cluster_bg); |
---|
| 3582 | + |
---|
| 3583 | +release_cluster: |
---|
| 3584 | + /* |
---|
| 3585 | + * If we are on LOOP_NO_EMPTY_SIZE, we can't set up a new clusters, so |
---|
| 3586 | + * lets just skip it and let the allocator find whatever block it can |
---|
| 3587 | + * find. If we reach this point, we will have tried the cluster |
---|
| 3588 | + * allocator plenty of times and not have found anything, so we are |
---|
| 3589 | + * likely way too fragmented for the clustering stuff to find anything. |
---|
| 3590 | + * |
---|
| 3591 | + * However, if the cluster is taken from the current block group, |
---|
| 3592 | + * release the cluster first, so that we stand a better chance of |
---|
| 3593 | + * succeeding in the unclustered allocation. |
---|
| 3594 | + */ |
---|
| 3595 | + if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) { |
---|
| 3596 | + spin_unlock(&last_ptr->refill_lock); |
---|
| 3597 | + btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc); |
---|
| 3598 | + return -ENOENT; |
---|
| 3599 | + } |
---|
| 3600 | + |
---|
| 3601 | + /* This cluster didn't work out, free it and start over */ |
---|
| 3602 | + btrfs_return_cluster_to_free_space(NULL, last_ptr); |
---|
| 3603 | + |
---|
| 3604 | + if (cluster_bg != bg) |
---|
| 3605 | + btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc); |
---|
| 3606 | + |
---|
| 3607 | +refill_cluster: |
---|
| 3608 | + if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) { |
---|
| 3609 | + spin_unlock(&last_ptr->refill_lock); |
---|
| 3610 | + return -ENOENT; |
---|
| 3611 | + } |
---|
| 3612 | + |
---|
| 3613 | + aligned_cluster = max_t(u64, |
---|
| 3614 | + ffe_ctl->empty_cluster + ffe_ctl->empty_size, |
---|
| 3615 | + bg->full_stripe_len); |
---|
| 3616 | + ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start, |
---|
| 3617 | + ffe_ctl->num_bytes, aligned_cluster); |
---|
| 3618 | + if (ret == 0) { |
---|
| 3619 | + /* Now pull our allocation out of this cluster */ |
---|
| 3620 | + offset = btrfs_alloc_from_cluster(bg, last_ptr, |
---|
| 3621 | + ffe_ctl->num_bytes, ffe_ctl->search_start, |
---|
| 3622 | + &ffe_ctl->max_extent_size); |
---|
| 3623 | + if (offset) { |
---|
| 3624 | + /* We found one, proceed */ |
---|
| 3625 | + spin_unlock(&last_ptr->refill_lock); |
---|
| 3626 | + trace_btrfs_reserve_extent_cluster(bg, |
---|
| 3627 | + ffe_ctl->search_start, |
---|
| 3628 | + ffe_ctl->num_bytes); |
---|
| 3629 | + ffe_ctl->found_offset = offset; |
---|
| 3630 | + return 0; |
---|
| 3631 | + } |
---|
| 3632 | + } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT && |
---|
| 3633 | + !ffe_ctl->retry_clustered) { |
---|
| 3634 | + spin_unlock(&last_ptr->refill_lock); |
---|
| 3635 | + |
---|
| 3636 | + ffe_ctl->retry_clustered = true; |
---|
| 3637 | + btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes + |
---|
| 3638 | + ffe_ctl->empty_cluster + ffe_ctl->empty_size); |
---|
| 3639 | + return -EAGAIN; |
---|
| 3640 | + } |
---|
| 3641 | + /* |
---|
| 3642 | + * At this point we either didn't find a cluster or we weren't able to |
---|
| 3643 | + * allocate a block from our cluster. Free the cluster we've been |
---|
| 3644 | + * trying to use, and go to the next block group. |
---|
| 3645 | + */ |
---|
| 3646 | + btrfs_return_cluster_to_free_space(NULL, last_ptr); |
---|
| 3647 | + spin_unlock(&last_ptr->refill_lock); |
---|
| 3648 | + return 1; |
---|
| 3649 | +} |
---|
| 3650 | + |
---|
| 3651 | +/* |
---|
| 3652 | + * Return >0 to inform caller that we find nothing |
---|
| 3653 | + * Return 0 when we found an free extent and set ffe_ctrl->found_offset |
---|
| 3654 | + * Return -EAGAIN to inform caller that we need to re-search this block group |
---|
| 3655 | + */ |
---|
| 3656 | +static int find_free_extent_unclustered(struct btrfs_block_group *bg, |
---|
| 3657 | + struct find_free_extent_ctl *ffe_ctl) |
---|
| 3658 | +{ |
---|
| 3659 | + struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr; |
---|
| 3660 | + u64 offset; |
---|
| 3661 | + |
---|
| 3662 | + /* |
---|
| 3663 | + * We are doing an unclustered allocation, set the fragmented flag so |
---|
| 3664 | + * we don't bother trying to setup a cluster again until we get more |
---|
| 3665 | + * space. |
---|
| 3666 | + */ |
---|
| 3667 | + if (unlikely(last_ptr)) { |
---|
| 3668 | + spin_lock(&last_ptr->lock); |
---|
| 3669 | + last_ptr->fragmented = 1; |
---|
| 3670 | + spin_unlock(&last_ptr->lock); |
---|
| 3671 | + } |
---|
| 3672 | + if (ffe_ctl->cached) { |
---|
| 3673 | + struct btrfs_free_space_ctl *free_space_ctl; |
---|
| 3674 | + |
---|
| 3675 | + free_space_ctl = bg->free_space_ctl; |
---|
| 3676 | + spin_lock(&free_space_ctl->tree_lock); |
---|
| 3677 | + if (free_space_ctl->free_space < |
---|
| 3678 | + ffe_ctl->num_bytes + ffe_ctl->empty_cluster + |
---|
| 3679 | + ffe_ctl->empty_size) { |
---|
| 3680 | + ffe_ctl->total_free_space = max_t(u64, |
---|
| 3681 | + ffe_ctl->total_free_space, |
---|
| 3682 | + free_space_ctl->free_space); |
---|
| 3683 | + spin_unlock(&free_space_ctl->tree_lock); |
---|
| 3684 | + return 1; |
---|
| 3685 | + } |
---|
| 3686 | + spin_unlock(&free_space_ctl->tree_lock); |
---|
| 3687 | + } |
---|
| 3688 | + |
---|
| 3689 | + offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start, |
---|
| 3690 | + ffe_ctl->num_bytes, ffe_ctl->empty_size, |
---|
| 3691 | + &ffe_ctl->max_extent_size); |
---|
| 3692 | + |
---|
| 3693 | + /* |
---|
| 3694 | + * If we didn't find a chunk, and we haven't failed on this block group |
---|
| 3695 | + * before, and this block group is in the middle of caching and we are |
---|
| 3696 | + * ok with waiting, then go ahead and wait for progress to be made, and |
---|
| 3697 | + * set @retry_unclustered to true. |
---|
| 3698 | + * |
---|
| 3699 | + * If @retry_unclustered is true then we've already waited on this |
---|
| 3700 | + * block group once and should move on to the next block group. |
---|
| 3701 | + */ |
---|
| 3702 | + if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached && |
---|
| 3703 | + ffe_ctl->loop > LOOP_CACHING_NOWAIT) { |
---|
| 3704 | + btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes + |
---|
| 3705 | + ffe_ctl->empty_size); |
---|
| 3706 | + ffe_ctl->retry_unclustered = true; |
---|
| 3707 | + return -EAGAIN; |
---|
| 3708 | + } else if (!offset) { |
---|
| 3709 | + return 1; |
---|
| 3710 | + } |
---|
| 3711 | + ffe_ctl->found_offset = offset; |
---|
| 3712 | + return 0; |
---|
| 3713 | +} |
---|
| 3714 | + |
---|
| 3715 | +static int do_allocation_clustered(struct btrfs_block_group *block_group, |
---|
| 3716 | + struct find_free_extent_ctl *ffe_ctl, |
---|
| 3717 | + struct btrfs_block_group **bg_ret) |
---|
| 3718 | +{ |
---|
| 3719 | + int ret; |
---|
| 3720 | + |
---|
| 3721 | + /* We want to try and use the cluster allocator, so lets look there */ |
---|
| 3722 | + if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) { |
---|
| 3723 | + ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret); |
---|
| 3724 | + if (ret >= 0 || ret == -EAGAIN) |
---|
| 3725 | + return ret; |
---|
| 3726 | + /* ret == -ENOENT case falls through */ |
---|
| 3727 | + } |
---|
| 3728 | + |
---|
| 3729 | + return find_free_extent_unclustered(block_group, ffe_ctl); |
---|
| 3730 | +} |
---|
| 3731 | + |
---|
| 3732 | +static int do_allocation(struct btrfs_block_group *block_group, |
---|
| 3733 | + struct find_free_extent_ctl *ffe_ctl, |
---|
| 3734 | + struct btrfs_block_group **bg_ret) |
---|
| 3735 | +{ |
---|
| 3736 | + switch (ffe_ctl->policy) { |
---|
| 3737 | + case BTRFS_EXTENT_ALLOC_CLUSTERED: |
---|
| 3738 | + return do_allocation_clustered(block_group, ffe_ctl, bg_ret); |
---|
| 3739 | + default: |
---|
| 3740 | + BUG(); |
---|
| 3741 | + } |
---|
| 3742 | +} |
---|
| 3743 | + |
---|
| 3744 | +static void release_block_group(struct btrfs_block_group *block_group, |
---|
| 3745 | + struct find_free_extent_ctl *ffe_ctl, |
---|
| 3746 | + int delalloc) |
---|
| 3747 | +{ |
---|
| 3748 | + switch (ffe_ctl->policy) { |
---|
| 3749 | + case BTRFS_EXTENT_ALLOC_CLUSTERED: |
---|
| 3750 | + ffe_ctl->retry_clustered = false; |
---|
| 3751 | + ffe_ctl->retry_unclustered = false; |
---|
| 3752 | + break; |
---|
| 3753 | + default: |
---|
| 3754 | + BUG(); |
---|
| 3755 | + } |
---|
| 3756 | + |
---|
| 3757 | + BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) != |
---|
| 3758 | + ffe_ctl->index); |
---|
| 3759 | + btrfs_release_block_group(block_group, delalloc); |
---|
| 3760 | +} |
---|
| 3761 | + |
---|
| 3762 | +static void found_extent_clustered(struct find_free_extent_ctl *ffe_ctl, |
---|
| 3763 | + struct btrfs_key *ins) |
---|
| 3764 | +{ |
---|
| 3765 | + struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr; |
---|
| 3766 | + |
---|
| 3767 | + if (!ffe_ctl->use_cluster && last_ptr) { |
---|
| 3768 | + spin_lock(&last_ptr->lock); |
---|
| 3769 | + last_ptr->window_start = ins->objectid; |
---|
| 3770 | + spin_unlock(&last_ptr->lock); |
---|
| 3771 | + } |
---|
| 3772 | +} |
---|
| 3773 | + |
---|
| 3774 | +static void found_extent(struct find_free_extent_ctl *ffe_ctl, |
---|
| 3775 | + struct btrfs_key *ins) |
---|
| 3776 | +{ |
---|
| 3777 | + switch (ffe_ctl->policy) { |
---|
| 3778 | + case BTRFS_EXTENT_ALLOC_CLUSTERED: |
---|
| 3779 | + found_extent_clustered(ffe_ctl, ins); |
---|
| 3780 | + break; |
---|
| 3781 | + default: |
---|
| 3782 | + BUG(); |
---|
| 3783 | + } |
---|
| 3784 | +} |
---|
| 3785 | + |
---|
| 3786 | +static int chunk_allocation_failed(struct find_free_extent_ctl *ffe_ctl) |
---|
| 3787 | +{ |
---|
| 3788 | + switch (ffe_ctl->policy) { |
---|
| 3789 | + case BTRFS_EXTENT_ALLOC_CLUSTERED: |
---|
| 3790 | + /* |
---|
| 3791 | + * If we can't allocate a new chunk we've already looped through |
---|
| 3792 | + * at least once, move on to the NO_EMPTY_SIZE case. |
---|
| 3793 | + */ |
---|
| 3794 | + ffe_ctl->loop = LOOP_NO_EMPTY_SIZE; |
---|
| 3795 | + return 0; |
---|
| 3796 | + default: |
---|
| 3797 | + BUG(); |
---|
| 3798 | + } |
---|
| 3799 | +} |
---|
| 3800 | + |
---|
| 3801 | +/* |
---|
| 3802 | + * Return >0 means caller needs to re-search for free extent |
---|
| 3803 | + * Return 0 means we have the needed free extent. |
---|
| 3804 | + * Return <0 means we failed to locate any free extent. |
---|
| 3805 | + */ |
---|
| 3806 | +static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, |
---|
| 3807 | + struct btrfs_key *ins, |
---|
| 3808 | + struct find_free_extent_ctl *ffe_ctl, |
---|
| 3809 | + bool full_search) |
---|
| 3810 | +{ |
---|
| 3811 | + struct btrfs_root *root = fs_info->extent_root; |
---|
| 3812 | + int ret; |
---|
| 3813 | + |
---|
| 3814 | + if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) && |
---|
| 3815 | + ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg) |
---|
| 3816 | + ffe_ctl->orig_have_caching_bg = true; |
---|
| 3817 | + |
---|
| 3818 | + if (!ins->objectid && ffe_ctl->loop >= LOOP_CACHING_WAIT && |
---|
| 3819 | + ffe_ctl->have_caching_bg) |
---|
| 3820 | + return 1; |
---|
| 3821 | + |
---|
| 3822 | + if (!ins->objectid && ++(ffe_ctl->index) < BTRFS_NR_RAID_TYPES) |
---|
| 3823 | + return 1; |
---|
| 3824 | + |
---|
| 3825 | + if (ins->objectid) { |
---|
| 3826 | + found_extent(ffe_ctl, ins); |
---|
| 3827 | + return 0; |
---|
| 3828 | + } |
---|
| 3829 | + |
---|
| 3830 | + /* |
---|
| 3831 | + * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking |
---|
| 3832 | + * caching kthreads as we move along |
---|
| 3833 | + * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching |
---|
| 3834 | + * LOOP_ALLOC_CHUNK, force a chunk allocation and try again |
---|
| 3835 | + * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try |
---|
| 3836 | + * again |
---|
| 3837 | + */ |
---|
| 3838 | + if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) { |
---|
| 3839 | + ffe_ctl->index = 0; |
---|
| 3840 | + if (ffe_ctl->loop == LOOP_CACHING_NOWAIT) { |
---|
| 3841 | + /* |
---|
| 3842 | + * We want to skip the LOOP_CACHING_WAIT step if we |
---|
| 3843 | + * don't have any uncached bgs and we've already done a |
---|
| 3844 | + * full search through. |
---|
| 3845 | + */ |
---|
| 3846 | + if (ffe_ctl->orig_have_caching_bg || !full_search) |
---|
| 3847 | + ffe_ctl->loop = LOOP_CACHING_WAIT; |
---|
| 3848 | + else |
---|
| 3849 | + ffe_ctl->loop = LOOP_ALLOC_CHUNK; |
---|
| 3850 | + } else { |
---|
| 3851 | + ffe_ctl->loop++; |
---|
| 3852 | + } |
---|
| 3853 | + |
---|
| 3854 | + if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) { |
---|
| 3855 | + struct btrfs_trans_handle *trans; |
---|
| 3856 | + int exist = 0; |
---|
| 3857 | + |
---|
| 3858 | + trans = current->journal_info; |
---|
| 3859 | + if (trans) |
---|
| 3860 | + exist = 1; |
---|
| 3861 | + else |
---|
| 3862 | + trans = btrfs_join_transaction(root); |
---|
| 3863 | + |
---|
| 3864 | + if (IS_ERR(trans)) { |
---|
| 3865 | + ret = PTR_ERR(trans); |
---|
| 3866 | + return ret; |
---|
| 3867 | + } |
---|
| 3868 | + |
---|
| 3869 | + ret = btrfs_chunk_alloc(trans, ffe_ctl->flags, |
---|
| 3870 | + CHUNK_ALLOC_FORCE); |
---|
| 3871 | + |
---|
| 3872 | + /* Do not bail out on ENOSPC since we can do more. */ |
---|
| 3873 | + if (ret == -ENOSPC) |
---|
| 3874 | + ret = chunk_allocation_failed(ffe_ctl); |
---|
| 3875 | + else if (ret < 0) |
---|
| 3876 | + btrfs_abort_transaction(trans, ret); |
---|
| 3877 | + else |
---|
| 3878 | + ret = 0; |
---|
| 3879 | + if (!exist) |
---|
| 3880 | + btrfs_end_transaction(trans); |
---|
| 3881 | + if (ret) |
---|
| 3882 | + return ret; |
---|
| 3883 | + } |
---|
| 3884 | + |
---|
| 3885 | + if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) { |
---|
| 3886 | + if (ffe_ctl->policy != BTRFS_EXTENT_ALLOC_CLUSTERED) |
---|
| 3887 | + return -ENOSPC; |
---|
| 3888 | + |
---|
| 3889 | + /* |
---|
| 3890 | + * Don't loop again if we already have no empty_size and |
---|
| 3891 | + * no empty_cluster. |
---|
| 3892 | + */ |
---|
| 3893 | + if (ffe_ctl->empty_size == 0 && |
---|
| 3894 | + ffe_ctl->empty_cluster == 0) |
---|
| 3895 | + return -ENOSPC; |
---|
| 3896 | + ffe_ctl->empty_size = 0; |
---|
| 3897 | + ffe_ctl->empty_cluster = 0; |
---|
| 3898 | + } |
---|
| 3899 | + return 1; |
---|
| 3900 | + } |
---|
| 3901 | + return -ENOSPC; |
---|
| 3902 | +} |
---|
| 3903 | + |
---|
| 3904 | +static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info, |
---|
| 3905 | + struct find_free_extent_ctl *ffe_ctl, |
---|
| 3906 | + struct btrfs_space_info *space_info, |
---|
| 3907 | + struct btrfs_key *ins) |
---|
| 3908 | +{ |
---|
| 3909 | + /* |
---|
| 3910 | + * If our free space is heavily fragmented we may not be able to make |
---|
| 3911 | + * big contiguous allocations, so instead of doing the expensive search |
---|
| 3912 | + * for free space, simply return ENOSPC with our max_extent_size so we |
---|
| 3913 | + * can go ahead and search for a more manageable chunk. |
---|
| 3914 | + * |
---|
| 3915 | + * If our max_extent_size is large enough for our allocation simply |
---|
| 3916 | + * disable clustering since we will likely not be able to find enough |
---|
| 3917 | + * space to create a cluster and induce latency trying. |
---|
| 3918 | + */ |
---|
| 3919 | + if (space_info->max_extent_size) { |
---|
| 3920 | + spin_lock(&space_info->lock); |
---|
| 3921 | + if (space_info->max_extent_size && |
---|
| 3922 | + ffe_ctl->num_bytes > space_info->max_extent_size) { |
---|
| 3923 | + ins->offset = space_info->max_extent_size; |
---|
| 3924 | + spin_unlock(&space_info->lock); |
---|
| 3925 | + return -ENOSPC; |
---|
| 3926 | + } else if (space_info->max_extent_size) { |
---|
| 3927 | + ffe_ctl->use_cluster = false; |
---|
| 3928 | + } |
---|
| 3929 | + spin_unlock(&space_info->lock); |
---|
| 3930 | + } |
---|
| 3931 | + |
---|
| 3932 | + ffe_ctl->last_ptr = fetch_cluster_info(fs_info, space_info, |
---|
| 3933 | + &ffe_ctl->empty_cluster); |
---|
| 3934 | + if (ffe_ctl->last_ptr) { |
---|
| 3935 | + struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr; |
---|
| 3936 | + |
---|
| 3937 | + spin_lock(&last_ptr->lock); |
---|
| 3938 | + if (last_ptr->block_group) |
---|
| 3939 | + ffe_ctl->hint_byte = last_ptr->window_start; |
---|
| 3940 | + if (last_ptr->fragmented) { |
---|
| 3941 | + /* |
---|
| 3942 | + * We still set window_start so we can keep track of the |
---|
| 3943 | + * last place we found an allocation to try and save |
---|
| 3944 | + * some time. |
---|
| 3945 | + */ |
---|
| 3946 | + ffe_ctl->hint_byte = last_ptr->window_start; |
---|
| 3947 | + ffe_ctl->use_cluster = false; |
---|
| 3948 | + } |
---|
| 3949 | + spin_unlock(&last_ptr->lock); |
---|
| 3950 | + } |
---|
| 3951 | + |
---|
| 3952 | + return 0; |
---|
| 3953 | +} |
---|
| 3954 | + |
---|
| 3955 | +static int prepare_allocation(struct btrfs_fs_info *fs_info, |
---|
| 3956 | + struct find_free_extent_ctl *ffe_ctl, |
---|
| 3957 | + struct btrfs_space_info *space_info, |
---|
| 3958 | + struct btrfs_key *ins) |
---|
| 3959 | +{ |
---|
| 3960 | + switch (ffe_ctl->policy) { |
---|
| 3961 | + case BTRFS_EXTENT_ALLOC_CLUSTERED: |
---|
| 3962 | + return prepare_allocation_clustered(fs_info, ffe_ctl, |
---|
| 3963 | + space_info, ins); |
---|
| 3964 | + default: |
---|
| 3965 | + BUG(); |
---|
| 3966 | + } |
---|
7217 | 3967 | } |
---|
7218 | 3968 | |
---|
7219 | 3969 | /* |
---|
.. | .. |
---|
7226 | 3976 | * |
---|
7227 | 3977 | * If there is no suitable free space, we will record the max size of |
---|
7228 | 3978 | * the free space extent currently. |
---|
| 3979 | + * |
---|
| 3980 | + * The overall logic and call chain: |
---|
| 3981 | + * |
---|
| 3982 | + * find_free_extent() |
---|
| 3983 | + * |- Iterate through all block groups |
---|
| 3984 | + * | |- Get a valid block group |
---|
| 3985 | + * | |- Try to do clustered allocation in that block group |
---|
| 3986 | + * | |- Try to do unclustered allocation in that block group |
---|
| 3987 | + * | |- Check if the result is valid |
---|
| 3988 | + * | | |- If valid, then exit |
---|
| 3989 | + * | |- Jump to next block group |
---|
| 3990 | + * | |
---|
| 3991 | + * |- Push harder to find free extents |
---|
| 3992 | + * |- If not found, re-iterate all block groups |
---|
7229 | 3993 | */ |
---|
7230 | | -static noinline int find_free_extent(struct btrfs_fs_info *fs_info, |
---|
| 3994 | +static noinline int find_free_extent(struct btrfs_root *root, |
---|
7231 | 3995 | u64 ram_bytes, u64 num_bytes, u64 empty_size, |
---|
7232 | | - u64 hint_byte, struct btrfs_key *ins, |
---|
| 3996 | + u64 hint_byte_orig, struct btrfs_key *ins, |
---|
7233 | 3997 | u64 flags, int delalloc) |
---|
7234 | 3998 | { |
---|
| 3999 | + struct btrfs_fs_info *fs_info = root->fs_info; |
---|
7235 | 4000 | int ret = 0; |
---|
7236 | | - struct btrfs_root *root = fs_info->extent_root; |
---|
7237 | | - struct btrfs_free_cluster *last_ptr = NULL; |
---|
7238 | | - struct btrfs_block_group_cache *block_group = NULL; |
---|
7239 | | - u64 search_start = 0; |
---|
7240 | | - u64 max_extent_size = 0; |
---|
7241 | | - u64 max_free_space = 0; |
---|
7242 | | - u64 empty_cluster = 0; |
---|
| 4001 | + int cache_block_group_error = 0; |
---|
| 4002 | + struct btrfs_block_group *block_group = NULL; |
---|
| 4003 | + struct find_free_extent_ctl ffe_ctl = {0}; |
---|
7243 | 4004 | struct btrfs_space_info *space_info; |
---|
7244 | | - int loop = 0; |
---|
7245 | | - int index = btrfs_bg_flags_to_raid_index(flags); |
---|
7246 | | - bool failed_cluster_refill = false; |
---|
7247 | | - bool failed_alloc = false; |
---|
7248 | | - bool use_cluster = true; |
---|
7249 | | - bool have_caching_bg = false; |
---|
7250 | | - bool orig_have_caching_bg = false; |
---|
7251 | 4005 | bool full_search = false; |
---|
7252 | 4006 | |
---|
7253 | 4007 | WARN_ON(num_bytes < fs_info->sectorsize); |
---|
| 4008 | + |
---|
| 4009 | + ffe_ctl.num_bytes = num_bytes; |
---|
| 4010 | + ffe_ctl.empty_size = empty_size; |
---|
| 4011 | + ffe_ctl.flags = flags; |
---|
| 4012 | + ffe_ctl.search_start = 0; |
---|
| 4013 | + ffe_ctl.delalloc = delalloc; |
---|
| 4014 | + ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags); |
---|
| 4015 | + ffe_ctl.have_caching_bg = false; |
---|
| 4016 | + ffe_ctl.orig_have_caching_bg = false; |
---|
| 4017 | + ffe_ctl.found_offset = 0; |
---|
| 4018 | + ffe_ctl.hint_byte = hint_byte_orig; |
---|
| 4019 | + ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED; |
---|
| 4020 | + |
---|
| 4021 | + /* For clustered allocation */ |
---|
| 4022 | + ffe_ctl.retry_clustered = false; |
---|
| 4023 | + ffe_ctl.retry_unclustered = false; |
---|
| 4024 | + ffe_ctl.last_ptr = NULL; |
---|
| 4025 | + ffe_ctl.use_cluster = true; |
---|
| 4026 | + |
---|
7254 | 4027 | ins->type = BTRFS_EXTENT_ITEM_KEY; |
---|
7255 | 4028 | ins->objectid = 0; |
---|
7256 | 4029 | ins->offset = 0; |
---|
7257 | 4030 | |
---|
7258 | | - trace_find_free_extent(fs_info, num_bytes, empty_size, flags); |
---|
| 4031 | + trace_find_free_extent(root, num_bytes, empty_size, flags); |
---|
7259 | 4032 | |
---|
7260 | | - space_info = __find_space_info(fs_info, flags); |
---|
| 4033 | + space_info = btrfs_find_space_info(fs_info, flags); |
---|
7261 | 4034 | if (!space_info) { |
---|
7262 | 4035 | btrfs_err(fs_info, "No space info for %llu", flags); |
---|
7263 | 4036 | return -ENOSPC; |
---|
7264 | 4037 | } |
---|
7265 | 4038 | |
---|
7266 | | - /* |
---|
7267 | | - * If our free space is heavily fragmented we may not be able to make |
---|
7268 | | - * big contiguous allocations, so instead of doing the expensive search |
---|
7269 | | - * for free space, simply return ENOSPC with our max_extent_size so we |
---|
7270 | | - * can go ahead and search for a more manageable chunk. |
---|
7271 | | - * |
---|
7272 | | - * If our max_extent_size is large enough for our allocation simply |
---|
7273 | | - * disable clustering since we will likely not be able to find enough |
---|
7274 | | - * space to create a cluster and induce latency trying. |
---|
7275 | | - */ |
---|
7276 | | - if (unlikely(space_info->max_extent_size)) { |
---|
7277 | | - spin_lock(&space_info->lock); |
---|
7278 | | - if (space_info->max_extent_size && |
---|
7279 | | - num_bytes > space_info->max_extent_size) { |
---|
7280 | | - ins->offset = space_info->max_extent_size; |
---|
7281 | | - spin_unlock(&space_info->lock); |
---|
7282 | | - return -ENOSPC; |
---|
7283 | | - } else if (space_info->max_extent_size) { |
---|
7284 | | - use_cluster = false; |
---|
7285 | | - } |
---|
7286 | | - spin_unlock(&space_info->lock); |
---|
7287 | | - } |
---|
| 4039 | + ret = prepare_allocation(fs_info, &ffe_ctl, space_info, ins); |
---|
| 4040 | + if (ret < 0) |
---|
| 4041 | + return ret; |
---|
7288 | 4042 | |
---|
7289 | | - last_ptr = fetch_cluster_info(fs_info, space_info, &empty_cluster); |
---|
7290 | | - if (last_ptr) { |
---|
7291 | | - spin_lock(&last_ptr->lock); |
---|
7292 | | - if (last_ptr->block_group) |
---|
7293 | | - hint_byte = last_ptr->window_start; |
---|
7294 | | - if (last_ptr->fragmented) { |
---|
7295 | | - /* |
---|
7296 | | - * We still set window_start so we can keep track of the |
---|
7297 | | - * last place we found an allocation to try and save |
---|
7298 | | - * some time. |
---|
7299 | | - */ |
---|
7300 | | - hint_byte = last_ptr->window_start; |
---|
7301 | | - use_cluster = false; |
---|
7302 | | - } |
---|
7303 | | - spin_unlock(&last_ptr->lock); |
---|
7304 | | - } |
---|
7305 | | - |
---|
7306 | | - search_start = max(search_start, first_logical_byte(fs_info, 0)); |
---|
7307 | | - search_start = max(search_start, hint_byte); |
---|
7308 | | - if (search_start == hint_byte) { |
---|
7309 | | - block_group = btrfs_lookup_block_group(fs_info, search_start); |
---|
| 4043 | + ffe_ctl.search_start = max(ffe_ctl.search_start, |
---|
| 4044 | + first_logical_byte(fs_info, 0)); |
---|
| 4045 | + ffe_ctl.search_start = max(ffe_ctl.search_start, ffe_ctl.hint_byte); |
---|
| 4046 | + if (ffe_ctl.search_start == ffe_ctl.hint_byte) { |
---|
| 4047 | + block_group = btrfs_lookup_block_group(fs_info, |
---|
| 4048 | + ffe_ctl.search_start); |
---|
7310 | 4049 | /* |
---|
7311 | 4050 | * we don't want to use the block group if it doesn't match our |
---|
7312 | 4051 | * allocation bits, or if its not cached. |
---|
.. | .. |
---|
7328 | 4067 | btrfs_put_block_group(block_group); |
---|
7329 | 4068 | up_read(&space_info->groups_sem); |
---|
7330 | 4069 | } else { |
---|
7331 | | - index = btrfs_bg_flags_to_raid_index( |
---|
| 4070 | + ffe_ctl.index = btrfs_bg_flags_to_raid_index( |
---|
7332 | 4071 | block_group->flags); |
---|
7333 | 4072 | btrfs_lock_block_group(block_group, delalloc); |
---|
7334 | 4073 | goto have_block_group; |
---|
.. | .. |
---|
7338 | 4077 | } |
---|
7339 | 4078 | } |
---|
7340 | 4079 | search: |
---|
7341 | | - have_caching_bg = false; |
---|
7342 | | - if (index == 0 || index == btrfs_bg_flags_to_raid_index(flags)) |
---|
| 4080 | + ffe_ctl.have_caching_bg = false; |
---|
| 4081 | + if (ffe_ctl.index == btrfs_bg_flags_to_raid_index(flags) || |
---|
| 4082 | + ffe_ctl.index == 0) |
---|
7343 | 4083 | full_search = true; |
---|
7344 | 4084 | down_read(&space_info->groups_sem); |
---|
7345 | | - list_for_each_entry(block_group, &space_info->block_groups[index], |
---|
7346 | | - list) { |
---|
7347 | | - u64 offset; |
---|
7348 | | - int cached; |
---|
| 4085 | + list_for_each_entry(block_group, |
---|
| 4086 | + &space_info->block_groups[ffe_ctl.index], list) { |
---|
| 4087 | + struct btrfs_block_group *bg_ret; |
---|
7349 | 4088 | |
---|
7350 | 4089 | /* If the block group is read-only, we can skip it entirely. */ |
---|
7351 | 4090 | if (unlikely(block_group->ro)) |
---|
7352 | 4091 | continue; |
---|
7353 | 4092 | |
---|
7354 | 4093 | btrfs_grab_block_group(block_group, delalloc); |
---|
7355 | | - search_start = block_group->key.objectid; |
---|
| 4094 | + ffe_ctl.search_start = block_group->start; |
---|
7356 | 4095 | |
---|
7357 | 4096 | /* |
---|
7358 | 4097 | * this can happen if we end up cycling through all the |
---|
.. | .. |
---|
7361 | 4100 | */ |
---|
7362 | 4101 | if (!block_group_bits(block_group, flags)) { |
---|
7363 | 4102 | u64 extra = BTRFS_BLOCK_GROUP_DUP | |
---|
7364 | | - BTRFS_BLOCK_GROUP_RAID1 | |
---|
7365 | | - BTRFS_BLOCK_GROUP_RAID5 | |
---|
7366 | | - BTRFS_BLOCK_GROUP_RAID6 | |
---|
| 4103 | + BTRFS_BLOCK_GROUP_RAID1_MASK | |
---|
| 4104 | + BTRFS_BLOCK_GROUP_RAID56_MASK | |
---|
7367 | 4105 | BTRFS_BLOCK_GROUP_RAID10; |
---|
7368 | 4106 | |
---|
7369 | 4107 | /* |
---|
.. | .. |
---|
7384 | 4122 | } |
---|
7385 | 4123 | |
---|
7386 | 4124 | have_block_group: |
---|
7387 | | - cached = block_group_cache_done(block_group); |
---|
7388 | | - if (unlikely(!cached)) { |
---|
7389 | | - have_caching_bg = true; |
---|
7390 | | - ret = cache_block_group(block_group, 0); |
---|
7391 | | - BUG_ON(ret < 0); |
---|
| 4125 | + ffe_ctl.cached = btrfs_block_group_done(block_group); |
---|
| 4126 | + if (unlikely(!ffe_ctl.cached)) { |
---|
| 4127 | + ffe_ctl.have_caching_bg = true; |
---|
| 4128 | + ret = btrfs_cache_block_group(block_group, 0); |
---|
| 4129 | + |
---|
| 4130 | + /* |
---|
| 4131 | + * If we get ENOMEM here or something else we want to |
---|
| 4132 | + * try other block groups, because it may not be fatal. |
---|
| 4133 | + * However if we can't find anything else we need to |
---|
| 4134 | + * save our return here so that we return the actual |
---|
| 4135 | + * error that caused problems, not ENOSPC. |
---|
| 4136 | + */ |
---|
| 4137 | + if (ret < 0) { |
---|
| 4138 | + if (!cache_block_group_error) |
---|
| 4139 | + cache_block_group_error = ret; |
---|
| 4140 | + ret = 0; |
---|
| 4141 | + goto loop; |
---|
| 4142 | + } |
---|
7392 | 4143 | ret = 0; |
---|
7393 | 4144 | } |
---|
7394 | 4145 | |
---|
7395 | | - if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) |
---|
7396 | | - goto loop; |
---|
7397 | | - |
---|
7398 | | - /* |
---|
7399 | | - * Ok we want to try and use the cluster allocator, so |
---|
7400 | | - * lets look there |
---|
7401 | | - */ |
---|
7402 | | - if (last_ptr && use_cluster) { |
---|
7403 | | - struct btrfs_block_group_cache *used_block_group; |
---|
7404 | | - unsigned long aligned_cluster; |
---|
7405 | | - /* |
---|
7406 | | - * the refill lock keeps out other |
---|
7407 | | - * people trying to start a new cluster |
---|
7408 | | - */ |
---|
7409 | | - used_block_group = btrfs_lock_cluster(block_group, |
---|
7410 | | - last_ptr, |
---|
7411 | | - delalloc); |
---|
7412 | | - if (!used_block_group) |
---|
7413 | | - goto refill_cluster; |
---|
7414 | | - |
---|
7415 | | - if (used_block_group != block_group && |
---|
7416 | | - (used_block_group->ro || |
---|
7417 | | - !block_group_bits(used_block_group, flags))) |
---|
7418 | | - goto release_cluster; |
---|
7419 | | - |
---|
7420 | | - offset = btrfs_alloc_from_cluster(used_block_group, |
---|
7421 | | - last_ptr, |
---|
7422 | | - num_bytes, |
---|
7423 | | - used_block_group->key.objectid, |
---|
7424 | | - &max_extent_size); |
---|
7425 | | - if (offset) { |
---|
7426 | | - /* we have a block, we're done */ |
---|
7427 | | - spin_unlock(&last_ptr->refill_lock); |
---|
7428 | | - trace_btrfs_reserve_extent_cluster( |
---|
7429 | | - used_block_group, |
---|
7430 | | - search_start, num_bytes); |
---|
7431 | | - if (used_block_group != block_group) { |
---|
7432 | | - btrfs_release_block_group(block_group, |
---|
7433 | | - delalloc); |
---|
7434 | | - block_group = used_block_group; |
---|
7435 | | - } |
---|
7436 | | - goto checks; |
---|
7437 | | - } |
---|
7438 | | - |
---|
7439 | | - WARN_ON(last_ptr->block_group != used_block_group); |
---|
7440 | | -release_cluster: |
---|
7441 | | - /* If we are on LOOP_NO_EMPTY_SIZE, we can't |
---|
7442 | | - * set up a new clusters, so lets just skip it |
---|
7443 | | - * and let the allocator find whatever block |
---|
7444 | | - * it can find. If we reach this point, we |
---|
7445 | | - * will have tried the cluster allocator |
---|
7446 | | - * plenty of times and not have found |
---|
7447 | | - * anything, so we are likely way too |
---|
7448 | | - * fragmented for the clustering stuff to find |
---|
7449 | | - * anything. |
---|
7450 | | - * |
---|
7451 | | - * However, if the cluster is taken from the |
---|
7452 | | - * current block group, release the cluster |
---|
7453 | | - * first, so that we stand a better chance of |
---|
7454 | | - * succeeding in the unclustered |
---|
7455 | | - * allocation. */ |
---|
7456 | | - if (loop >= LOOP_NO_EMPTY_SIZE && |
---|
7457 | | - used_block_group != block_group) { |
---|
7458 | | - spin_unlock(&last_ptr->refill_lock); |
---|
7459 | | - btrfs_release_block_group(used_block_group, |
---|
7460 | | - delalloc); |
---|
7461 | | - goto unclustered_alloc; |
---|
7462 | | - } |
---|
7463 | | - |
---|
7464 | | - /* |
---|
7465 | | - * this cluster didn't work out, free it and |
---|
7466 | | - * start over |
---|
7467 | | - */ |
---|
7468 | | - btrfs_return_cluster_to_free_space(NULL, last_ptr); |
---|
7469 | | - |
---|
7470 | | - if (used_block_group != block_group) |
---|
7471 | | - btrfs_release_block_group(used_block_group, |
---|
7472 | | - delalloc); |
---|
7473 | | -refill_cluster: |
---|
7474 | | - if (loop >= LOOP_NO_EMPTY_SIZE) { |
---|
7475 | | - spin_unlock(&last_ptr->refill_lock); |
---|
7476 | | - goto unclustered_alloc; |
---|
7477 | | - } |
---|
7478 | | - |
---|
7479 | | - aligned_cluster = max_t(unsigned long, |
---|
7480 | | - empty_cluster + empty_size, |
---|
7481 | | - block_group->full_stripe_len); |
---|
7482 | | - |
---|
7483 | | - /* allocate a cluster in this block group */ |
---|
7484 | | - ret = btrfs_find_space_cluster(fs_info, block_group, |
---|
7485 | | - last_ptr, search_start, |
---|
7486 | | - num_bytes, |
---|
7487 | | - aligned_cluster); |
---|
7488 | | - if (ret == 0) { |
---|
7489 | | - /* |
---|
7490 | | - * now pull our allocation out of this |
---|
7491 | | - * cluster |
---|
7492 | | - */ |
---|
7493 | | - offset = btrfs_alloc_from_cluster(block_group, |
---|
7494 | | - last_ptr, |
---|
7495 | | - num_bytes, |
---|
7496 | | - search_start, |
---|
7497 | | - &max_extent_size); |
---|
7498 | | - if (offset) { |
---|
7499 | | - /* we found one, proceed */ |
---|
7500 | | - spin_unlock(&last_ptr->refill_lock); |
---|
7501 | | - trace_btrfs_reserve_extent_cluster( |
---|
7502 | | - block_group, search_start, |
---|
7503 | | - num_bytes); |
---|
7504 | | - goto checks; |
---|
7505 | | - } |
---|
7506 | | - } else if (!cached && loop > LOOP_CACHING_NOWAIT |
---|
7507 | | - && !failed_cluster_refill) { |
---|
7508 | | - spin_unlock(&last_ptr->refill_lock); |
---|
7509 | | - |
---|
7510 | | - failed_cluster_refill = true; |
---|
7511 | | - wait_block_group_cache_progress(block_group, |
---|
7512 | | - num_bytes + empty_cluster + empty_size); |
---|
7513 | | - goto have_block_group; |
---|
7514 | | - } |
---|
7515 | | - |
---|
7516 | | - /* |
---|
7517 | | - * at this point we either didn't find a cluster |
---|
7518 | | - * or we weren't able to allocate a block from our |
---|
7519 | | - * cluster. Free the cluster we've been trying |
---|
7520 | | - * to use, and go to the next block group |
---|
7521 | | - */ |
---|
7522 | | - btrfs_return_cluster_to_free_space(NULL, last_ptr); |
---|
7523 | | - spin_unlock(&last_ptr->refill_lock); |
---|
| 4146 | + if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) { |
---|
| 4147 | + if (!cache_block_group_error) |
---|
| 4148 | + cache_block_group_error = -EIO; |
---|
7524 | 4149 | goto loop; |
---|
7525 | 4150 | } |
---|
7526 | 4151 | |
---|
7527 | | -unclustered_alloc: |
---|
7528 | | - /* |
---|
7529 | | - * We are doing an unclustered alloc, set the fragmented flag so |
---|
7530 | | - * we don't bother trying to setup a cluster again until we get |
---|
7531 | | - * more space. |
---|
7532 | | - */ |
---|
7533 | | - if (unlikely(last_ptr)) { |
---|
7534 | | - spin_lock(&last_ptr->lock); |
---|
7535 | | - last_ptr->fragmented = 1; |
---|
7536 | | - spin_unlock(&last_ptr->lock); |
---|
7537 | | - } |
---|
7538 | | - if (cached) { |
---|
7539 | | - struct btrfs_free_space_ctl *ctl = |
---|
7540 | | - block_group->free_space_ctl; |
---|
7541 | | - |
---|
7542 | | - spin_lock(&ctl->tree_lock); |
---|
7543 | | - if (ctl->free_space < |
---|
7544 | | - num_bytes + empty_cluster + empty_size) { |
---|
7545 | | - max_free_space = max(max_free_space, |
---|
7546 | | - ctl->free_space); |
---|
7547 | | - spin_unlock(&ctl->tree_lock); |
---|
7548 | | - goto loop; |
---|
| 4152 | + bg_ret = NULL; |
---|
| 4153 | + ret = do_allocation(block_group, &ffe_ctl, &bg_ret); |
---|
| 4154 | + if (ret == 0) { |
---|
| 4155 | + if (bg_ret && bg_ret != block_group) { |
---|
| 4156 | + btrfs_release_block_group(block_group, delalloc); |
---|
| 4157 | + block_group = bg_ret; |
---|
7549 | 4158 | } |
---|
7550 | | - spin_unlock(&ctl->tree_lock); |
---|
7551 | | - } |
---|
7552 | | - |
---|
7553 | | - offset = btrfs_find_space_for_alloc(block_group, search_start, |
---|
7554 | | - num_bytes, empty_size, |
---|
7555 | | - &max_extent_size); |
---|
7556 | | - /* |
---|
7557 | | - * If we didn't find a chunk, and we haven't failed on this |
---|
7558 | | - * block group before, and this block group is in the middle of |
---|
7559 | | - * caching and we are ok with waiting, then go ahead and wait |
---|
7560 | | - * for progress to be made, and set failed_alloc to true. |
---|
7561 | | - * |
---|
7562 | | - * If failed_alloc is true then we've already waited on this |
---|
7563 | | - * block group once and should move on to the next block group. |
---|
7564 | | - */ |
---|
7565 | | - if (!offset && !failed_alloc && !cached && |
---|
7566 | | - loop > LOOP_CACHING_NOWAIT) { |
---|
7567 | | - wait_block_group_cache_progress(block_group, |
---|
7568 | | - num_bytes + empty_size); |
---|
7569 | | - failed_alloc = true; |
---|
| 4159 | + } else if (ret == -EAGAIN) { |
---|
7570 | 4160 | goto have_block_group; |
---|
7571 | | - } else if (!offset) { |
---|
| 4161 | + } else if (ret > 0) { |
---|
7572 | 4162 | goto loop; |
---|
7573 | 4163 | } |
---|
7574 | | -checks: |
---|
7575 | | - search_start = round_up(offset, fs_info->stripesize); |
---|
| 4164 | + |
---|
| 4165 | + /* Checks */ |
---|
| 4166 | + ffe_ctl.search_start = round_up(ffe_ctl.found_offset, |
---|
| 4167 | + fs_info->stripesize); |
---|
7576 | 4168 | |
---|
7577 | 4169 | /* move on to the next group */ |
---|
7578 | | - if (search_start + num_bytes > |
---|
7579 | | - block_group->key.objectid + block_group->key.offset) { |
---|
7580 | | - btrfs_add_free_space(block_group, offset, num_bytes); |
---|
| 4170 | + if (ffe_ctl.search_start + num_bytes > |
---|
| 4171 | + block_group->start + block_group->length) { |
---|
| 4172 | + btrfs_add_free_space(block_group, ffe_ctl.found_offset, |
---|
| 4173 | + num_bytes); |
---|
7581 | 4174 | goto loop; |
---|
7582 | 4175 | } |
---|
7583 | 4176 | |
---|
7584 | | - if (offset < search_start) |
---|
7585 | | - btrfs_add_free_space(block_group, offset, |
---|
7586 | | - search_start - offset); |
---|
| 4177 | + if (ffe_ctl.found_offset < ffe_ctl.search_start) |
---|
| 4178 | + btrfs_add_free_space(block_group, ffe_ctl.found_offset, |
---|
| 4179 | + ffe_ctl.search_start - ffe_ctl.found_offset); |
---|
7587 | 4180 | |
---|
7588 | 4181 | ret = btrfs_add_reserved_bytes(block_group, ram_bytes, |
---|
7589 | 4182 | num_bytes, delalloc); |
---|
7590 | 4183 | if (ret == -EAGAIN) { |
---|
7591 | | - btrfs_add_free_space(block_group, offset, num_bytes); |
---|
| 4184 | + btrfs_add_free_space(block_group, ffe_ctl.found_offset, |
---|
| 4185 | + num_bytes); |
---|
7592 | 4186 | goto loop; |
---|
7593 | 4187 | } |
---|
7594 | 4188 | btrfs_inc_block_group_reservations(block_group); |
---|
7595 | 4189 | |
---|
7596 | 4190 | /* we are all good, lets return */ |
---|
7597 | | - ins->objectid = search_start; |
---|
| 4191 | + ins->objectid = ffe_ctl.search_start; |
---|
7598 | 4192 | ins->offset = num_bytes; |
---|
7599 | 4193 | |
---|
7600 | | - trace_btrfs_reserve_extent(block_group, search_start, num_bytes); |
---|
| 4194 | + trace_btrfs_reserve_extent(block_group, ffe_ctl.search_start, |
---|
| 4195 | + num_bytes); |
---|
7601 | 4196 | btrfs_release_block_group(block_group, delalloc); |
---|
7602 | 4197 | break; |
---|
7603 | 4198 | loop: |
---|
7604 | | - failed_cluster_refill = false; |
---|
7605 | | - failed_alloc = false; |
---|
7606 | | - BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) != |
---|
7607 | | - index); |
---|
7608 | | - btrfs_release_block_group(block_group, delalloc); |
---|
| 4199 | + release_block_group(block_group, &ffe_ctl, delalloc); |
---|
7609 | 4200 | cond_resched(); |
---|
7610 | 4201 | } |
---|
7611 | 4202 | up_read(&space_info->groups_sem); |
---|
7612 | 4203 | |
---|
7613 | | - if ((loop == LOOP_CACHING_NOWAIT) && have_caching_bg |
---|
7614 | | - && !orig_have_caching_bg) |
---|
7615 | | - orig_have_caching_bg = true; |
---|
7616 | | - |
---|
7617 | | - if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg) |
---|
| 4204 | + ret = find_free_extent_update_loop(fs_info, ins, &ffe_ctl, full_search); |
---|
| 4205 | + if (ret > 0) |
---|
7618 | 4206 | goto search; |
---|
7619 | 4207 | |
---|
7620 | | - if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) |
---|
7621 | | - goto search; |
---|
7622 | | - |
---|
7623 | | - /* |
---|
7624 | | - * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking |
---|
7625 | | - * caching kthreads as we move along |
---|
7626 | | - * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching |
---|
7627 | | - * LOOP_ALLOC_CHUNK, force a chunk allocation and try again |
---|
7628 | | - * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try |
---|
7629 | | - * again |
---|
7630 | | - */ |
---|
7631 | | - if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) { |
---|
7632 | | - index = 0; |
---|
7633 | | - if (loop == LOOP_CACHING_NOWAIT) { |
---|
7634 | | - /* |
---|
7635 | | - * We want to skip the LOOP_CACHING_WAIT step if we |
---|
7636 | | - * don't have any uncached bgs and we've already done a |
---|
7637 | | - * full search through. |
---|
7638 | | - */ |
---|
7639 | | - if (orig_have_caching_bg || !full_search) |
---|
7640 | | - loop = LOOP_CACHING_WAIT; |
---|
7641 | | - else |
---|
7642 | | - loop = LOOP_ALLOC_CHUNK; |
---|
7643 | | - } else { |
---|
7644 | | - loop++; |
---|
7645 | | - } |
---|
7646 | | - |
---|
7647 | | - if (loop == LOOP_ALLOC_CHUNK) { |
---|
7648 | | - struct btrfs_trans_handle *trans; |
---|
7649 | | - int exist = 0; |
---|
7650 | | - |
---|
7651 | | - trans = current->journal_info; |
---|
7652 | | - if (trans) |
---|
7653 | | - exist = 1; |
---|
7654 | | - else |
---|
7655 | | - trans = btrfs_join_transaction(root); |
---|
7656 | | - |
---|
7657 | | - if (IS_ERR(trans)) { |
---|
7658 | | - ret = PTR_ERR(trans); |
---|
7659 | | - goto out; |
---|
7660 | | - } |
---|
7661 | | - |
---|
7662 | | - ret = do_chunk_alloc(trans, flags, CHUNK_ALLOC_FORCE); |
---|
7663 | | - |
---|
7664 | | - /* |
---|
7665 | | - * If we can't allocate a new chunk we've already looped |
---|
7666 | | - * through at least once, move on to the NO_EMPTY_SIZE |
---|
7667 | | - * case. |
---|
7668 | | - */ |
---|
7669 | | - if (ret == -ENOSPC) |
---|
7670 | | - loop = LOOP_NO_EMPTY_SIZE; |
---|
7671 | | - |
---|
7672 | | - /* |
---|
7673 | | - * Do not bail out on ENOSPC since we |
---|
7674 | | - * can do more things. |
---|
7675 | | - */ |
---|
7676 | | - if (ret < 0 && ret != -ENOSPC) |
---|
7677 | | - btrfs_abort_transaction(trans, ret); |
---|
7678 | | - else |
---|
7679 | | - ret = 0; |
---|
7680 | | - if (!exist) |
---|
7681 | | - btrfs_end_transaction(trans); |
---|
7682 | | - if (ret) |
---|
7683 | | - goto out; |
---|
7684 | | - } |
---|
7685 | | - |
---|
7686 | | - if (loop == LOOP_NO_EMPTY_SIZE) { |
---|
7687 | | - /* |
---|
7688 | | - * Don't loop again if we already have no empty_size and |
---|
7689 | | - * no empty_cluster. |
---|
7690 | | - */ |
---|
7691 | | - if (empty_size == 0 && |
---|
7692 | | - empty_cluster == 0) { |
---|
7693 | | - ret = -ENOSPC; |
---|
7694 | | - goto out; |
---|
7695 | | - } |
---|
7696 | | - empty_size = 0; |
---|
7697 | | - empty_cluster = 0; |
---|
7698 | | - } |
---|
7699 | | - |
---|
7700 | | - goto search; |
---|
7701 | | - } else if (!ins->objectid) { |
---|
7702 | | - ret = -ENOSPC; |
---|
7703 | | - } else if (ins->objectid) { |
---|
7704 | | - if (!use_cluster && last_ptr) { |
---|
7705 | | - spin_lock(&last_ptr->lock); |
---|
7706 | | - last_ptr->window_start = ins->objectid; |
---|
7707 | | - spin_unlock(&last_ptr->lock); |
---|
7708 | | - } |
---|
7709 | | - ret = 0; |
---|
7710 | | - } |
---|
7711 | | -out: |
---|
7712 | | - if (ret == -ENOSPC) { |
---|
7713 | | - if (!max_extent_size) |
---|
7714 | | - max_extent_size = max_free_space; |
---|
| 4208 | + if (ret == -ENOSPC && !cache_block_group_error) { |
---|
| 4209 | + /* |
---|
| 4210 | + * Use ffe_ctl->total_free_space as fallback if we can't find |
---|
| 4211 | + * any contiguous hole. |
---|
| 4212 | + */ |
---|
| 4213 | + if (!ffe_ctl.max_extent_size) |
---|
| 4214 | + ffe_ctl.max_extent_size = ffe_ctl.total_free_space; |
---|
7715 | 4215 | spin_lock(&space_info->lock); |
---|
7716 | | - space_info->max_extent_size = max_extent_size; |
---|
| 4216 | + space_info->max_extent_size = ffe_ctl.max_extent_size; |
---|
7717 | 4217 | spin_unlock(&space_info->lock); |
---|
7718 | | - ins->offset = max_extent_size; |
---|
| 4218 | + ins->offset = ffe_ctl.max_extent_size; |
---|
| 4219 | + } else if (ret == -ENOSPC) { |
---|
| 4220 | + ret = cache_block_group_error; |
---|
7719 | 4221 | } |
---|
7720 | 4222 | return ret; |
---|
7721 | | -} |
---|
7722 | | - |
---|
7723 | | -static void dump_space_info(struct btrfs_fs_info *fs_info, |
---|
7724 | | - struct btrfs_space_info *info, u64 bytes, |
---|
7725 | | - int dump_block_groups) |
---|
7726 | | -{ |
---|
7727 | | - struct btrfs_block_group_cache *cache; |
---|
7728 | | - int index = 0; |
---|
7729 | | - |
---|
7730 | | - spin_lock(&info->lock); |
---|
7731 | | - btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull", |
---|
7732 | | - info->flags, |
---|
7733 | | - info->total_bytes - btrfs_space_info_used(info, true), |
---|
7734 | | - info->full ? "" : "not "); |
---|
7735 | | - btrfs_info(fs_info, |
---|
7736 | | - "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu", |
---|
7737 | | - info->total_bytes, info->bytes_used, info->bytes_pinned, |
---|
7738 | | - info->bytes_reserved, info->bytes_may_use, |
---|
7739 | | - info->bytes_readonly); |
---|
7740 | | - spin_unlock(&info->lock); |
---|
7741 | | - |
---|
7742 | | - if (!dump_block_groups) |
---|
7743 | | - return; |
---|
7744 | | - |
---|
7745 | | - down_read(&info->groups_sem); |
---|
7746 | | -again: |
---|
7747 | | - list_for_each_entry(cache, &info->block_groups[index], list) { |
---|
7748 | | - spin_lock(&cache->lock); |
---|
7749 | | - btrfs_info(fs_info, |
---|
7750 | | - "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s", |
---|
7751 | | - cache->key.objectid, cache->key.offset, |
---|
7752 | | - btrfs_block_group_used(&cache->item), cache->pinned, |
---|
7753 | | - cache->reserved, cache->ro ? "[readonly]" : ""); |
---|
7754 | | - btrfs_dump_free_space(cache, bytes); |
---|
7755 | | - spin_unlock(&cache->lock); |
---|
7756 | | - } |
---|
7757 | | - if (++index < BTRFS_NR_RAID_TYPES) |
---|
7758 | | - goto again; |
---|
7759 | | - up_read(&info->groups_sem); |
---|
7760 | 4223 | } |
---|
7761 | 4224 | |
---|
7762 | 4225 | /* |
---|
.. | .. |
---|
7817 | 4280 | flags = get_alloc_profile_by_root(root, is_data); |
---|
7818 | 4281 | again: |
---|
7819 | 4282 | WARN_ON(num_bytes < fs_info->sectorsize); |
---|
7820 | | - ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size, |
---|
| 4283 | + ret = find_free_extent(root, ram_bytes, num_bytes, empty_size, |
---|
7821 | 4284 | hint_byte, ins, flags, delalloc); |
---|
7822 | 4285 | if (!ret && !is_data) { |
---|
7823 | 4286 | btrfs_dec_block_group_reservations(fs_info, ins->objectid); |
---|
.. | .. |
---|
7834 | 4297 | } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { |
---|
7835 | 4298 | struct btrfs_space_info *sinfo; |
---|
7836 | 4299 | |
---|
7837 | | - sinfo = __find_space_info(fs_info, flags); |
---|
| 4300 | + sinfo = btrfs_find_space_info(fs_info, flags); |
---|
7838 | 4301 | btrfs_err(fs_info, |
---|
7839 | 4302 | "allocation failed flags %llu, wanted %llu", |
---|
7840 | 4303 | flags, num_bytes); |
---|
7841 | 4304 | if (sinfo) |
---|
7842 | | - dump_space_info(fs_info, sinfo, num_bytes, 1); |
---|
| 4305 | + btrfs_dump_space_info(fs_info, sinfo, |
---|
| 4306 | + num_bytes, 1); |
---|
7843 | 4307 | } |
---|
7844 | 4308 | } |
---|
7845 | 4309 | |
---|
7846 | 4310 | return ret; |
---|
7847 | 4311 | } |
---|
7848 | 4312 | |
---|
7849 | | -static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, |
---|
7850 | | - u64 start, u64 len, |
---|
7851 | | - int pin, int delalloc) |
---|
| 4313 | +int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, |
---|
| 4314 | + u64 start, u64 len, int delalloc) |
---|
7852 | 4315 | { |
---|
7853 | | - struct btrfs_block_group_cache *cache; |
---|
7854 | | - int ret = 0; |
---|
| 4316 | + struct btrfs_block_group *cache; |
---|
7855 | 4317 | |
---|
7856 | 4318 | cache = btrfs_lookup_block_group(fs_info, start); |
---|
7857 | 4319 | if (!cache) { |
---|
.. | .. |
---|
7860 | 4322 | return -ENOSPC; |
---|
7861 | 4323 | } |
---|
7862 | 4324 | |
---|
7863 | | - if (pin) |
---|
7864 | | - pin_down_extent(fs_info, cache, start, len, 1); |
---|
7865 | | - else { |
---|
7866 | | - if (btrfs_test_opt(fs_info, DISCARD)) |
---|
7867 | | - ret = btrfs_discard_extent(fs_info, start, len, NULL); |
---|
7868 | | - btrfs_add_free_space(cache, start, len); |
---|
7869 | | - btrfs_free_reserved_bytes(cache, len, delalloc); |
---|
7870 | | - trace_btrfs_reserved_extent_free(fs_info, start, len); |
---|
7871 | | - } |
---|
| 4325 | + btrfs_add_free_space(cache, start, len); |
---|
| 4326 | + btrfs_free_reserved_bytes(cache, len, delalloc); |
---|
| 4327 | + trace_btrfs_reserved_extent_free(fs_info, start, len); |
---|
7872 | 4328 | |
---|
7873 | 4329 | btrfs_put_block_group(cache); |
---|
| 4330 | + return 0; |
---|
| 4331 | +} |
---|
| 4332 | + |
---|
| 4333 | +int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start, |
---|
| 4334 | + u64 len) |
---|
| 4335 | +{ |
---|
| 4336 | + struct btrfs_block_group *cache; |
---|
| 4337 | + int ret = 0; |
---|
| 4338 | + |
---|
| 4339 | + cache = btrfs_lookup_block_group(trans->fs_info, start); |
---|
| 4340 | + if (!cache) { |
---|
| 4341 | + btrfs_err(trans->fs_info, "unable to find block group for %llu", |
---|
| 4342 | + start); |
---|
| 4343 | + return -ENOSPC; |
---|
| 4344 | + } |
---|
| 4345 | + |
---|
| 4346 | + ret = pin_down_extent(trans, cache, start, len, 1); |
---|
| 4347 | + btrfs_put_block_group(cache); |
---|
7874 | 4348 | return ret; |
---|
7875 | | -} |
---|
7876 | | - |
---|
7877 | | -int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, |
---|
7878 | | - u64 start, u64 len, int delalloc) |
---|
7879 | | -{ |
---|
7880 | | - return __btrfs_free_reserved_extent(fs_info, start, len, 0, delalloc); |
---|
7881 | | -} |
---|
7882 | | - |
---|
7883 | | -int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info, |
---|
7884 | | - u64 start, u64 len) |
---|
7885 | | -{ |
---|
7886 | | - return __btrfs_free_reserved_extent(fs_info, start, len, 1, 0); |
---|
7887 | 4349 | } |
---|
7888 | 4350 | |
---|
7889 | 4351 | static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, |
---|
.. | .. |
---|
7950 | 4412 | if (ret) |
---|
7951 | 4413 | return ret; |
---|
7952 | 4414 | |
---|
7953 | | - ret = update_block_group(trans, fs_info, ins->objectid, ins->offset, 1); |
---|
| 4415 | + ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, 1); |
---|
7954 | 4416 | if (ret) { /* -ENOENT, logic error */ |
---|
7955 | 4417 | btrfs_err(fs_info, "update block group failed for %llu %llu", |
---|
7956 | 4418 | ins->objectid, ins->offset); |
---|
.. | .. |
---|
8040 | 4502 | if (ret) |
---|
8041 | 4503 | return ret; |
---|
8042 | 4504 | |
---|
8043 | | - ret = update_block_group(trans, fs_info, extent_key.objectid, |
---|
8044 | | - fs_info->nodesize, 1); |
---|
| 4505 | + ret = btrfs_update_block_group(trans, extent_key.objectid, |
---|
| 4506 | + fs_info->nodesize, 1); |
---|
8045 | 4507 | if (ret) { /* -ENOENT, logic error */ |
---|
8046 | 4508 | btrfs_err(fs_info, "update block group failed for %llu %llu", |
---|
8047 | 4509 | extent_key.objectid, extent_key.offset); |
---|
.. | .. |
---|
8058 | 4520 | u64 offset, u64 ram_bytes, |
---|
8059 | 4521 | struct btrfs_key *ins) |
---|
8060 | 4522 | { |
---|
8061 | | - int ret; |
---|
| 4523 | + struct btrfs_ref generic_ref = { 0 }; |
---|
8062 | 4524 | |
---|
8063 | 4525 | BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); |
---|
8064 | 4526 | |
---|
8065 | | - btrfs_ref_tree_mod(root, ins->objectid, ins->offset, 0, |
---|
8066 | | - root->root_key.objectid, owner, offset, |
---|
8067 | | - BTRFS_ADD_DELAYED_EXTENT); |
---|
| 4527 | + btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT, |
---|
| 4528 | + ins->objectid, ins->offset, 0); |
---|
| 4529 | + btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset); |
---|
| 4530 | + btrfs_ref_tree_mod(root->fs_info, &generic_ref); |
---|
8068 | 4531 | |
---|
8069 | | - ret = btrfs_add_delayed_data_ref(trans, ins->objectid, |
---|
8070 | | - ins->offset, 0, |
---|
8071 | | - root->root_key.objectid, owner, |
---|
8072 | | - offset, ram_bytes, |
---|
8073 | | - BTRFS_ADD_DELAYED_EXTENT, NULL, NULL); |
---|
8074 | | - return ret; |
---|
| 4532 | + return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes); |
---|
8075 | 4533 | } |
---|
8076 | 4534 | |
---|
8077 | 4535 | /* |
---|
.. | .. |
---|
8085 | 4543 | { |
---|
8086 | 4544 | struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
8087 | 4545 | int ret; |
---|
8088 | | - struct btrfs_block_group_cache *block_group; |
---|
| 4546 | + struct btrfs_block_group *block_group; |
---|
8089 | 4547 | struct btrfs_space_info *space_info; |
---|
8090 | 4548 | |
---|
8091 | 4549 | /* |
---|
.. | .. |
---|
8113 | 4571 | |
---|
8114 | 4572 | ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner, |
---|
8115 | 4573 | offset, ins, 1); |
---|
| 4574 | + if (ret) |
---|
| 4575 | + btrfs_pin_extent(trans, ins->objectid, ins->offset, 1); |
---|
8116 | 4576 | btrfs_put_block_group(block_group); |
---|
8117 | 4577 | return ret; |
---|
8118 | 4578 | } |
---|
8119 | 4579 | |
---|
8120 | 4580 | static struct extent_buffer * |
---|
8121 | 4581 | btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
---|
8122 | | - u64 bytenr, int level, u64 owner) |
---|
| 4582 | + u64 bytenr, int level, u64 owner, |
---|
| 4583 | + enum btrfs_lock_nesting nest) |
---|
8123 | 4584 | { |
---|
8124 | 4585 | struct btrfs_fs_info *fs_info = root->fs_info; |
---|
8125 | 4586 | struct extent_buffer *buf; |
---|
.. | .. |
---|
8141 | 4602 | return ERR_PTR(-EUCLEAN); |
---|
8142 | 4603 | } |
---|
8143 | 4604 | |
---|
8144 | | - btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); |
---|
8145 | | - btrfs_tree_lock(buf); |
---|
8146 | | - clean_tree_block(fs_info, buf); |
---|
| 4605 | + btrfs_set_buffer_lockdep_class(owner, buf, level); |
---|
| 4606 | + __btrfs_tree_lock(buf, nest); |
---|
| 4607 | + btrfs_clean_tree_block(buf); |
---|
8147 | 4608 | clear_bit(EXTENT_BUFFER_STALE, &buf->bflags); |
---|
8148 | 4609 | |
---|
8149 | | - btrfs_set_lock_blocking(buf); |
---|
| 4610 | + btrfs_set_lock_blocking_write(buf); |
---|
8150 | 4611 | set_extent_buffer_uptodate(buf); |
---|
8151 | 4612 | |
---|
8152 | 4613 | memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header)); |
---|
.. | .. |
---|
8155 | 4616 | btrfs_set_header_generation(buf, trans->transid); |
---|
8156 | 4617 | btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV); |
---|
8157 | 4618 | btrfs_set_header_owner(buf, owner); |
---|
8158 | | - write_extent_buffer_fsid(buf, fs_info->fsid); |
---|
| 4619 | + write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid); |
---|
8159 | 4620 | write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid); |
---|
8160 | 4621 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { |
---|
8161 | 4622 | buf->log_index = root->log_transid % 2; |
---|
8162 | 4623 | /* |
---|
8163 | 4624 | * we allow two log transactions at a time, use different |
---|
8164 | | - * EXENT bit to differentiate dirty pages. |
---|
| 4625 | + * EXTENT bit to differentiate dirty pages. |
---|
8165 | 4626 | */ |
---|
8166 | 4627 | if (buf->log_index == 0) |
---|
8167 | 4628 | set_extent_dirty(&root->dirty_log_pages, buf->start, |
---|
.. | .. |
---|
8179 | 4640 | return buf; |
---|
8180 | 4641 | } |
---|
8181 | 4642 | |
---|
8182 | | -static struct btrfs_block_rsv * |
---|
8183 | | -use_block_rsv(struct btrfs_trans_handle *trans, |
---|
8184 | | - struct btrfs_root *root, u32 blocksize) |
---|
8185 | | -{ |
---|
8186 | | - struct btrfs_fs_info *fs_info = root->fs_info; |
---|
8187 | | - struct btrfs_block_rsv *block_rsv; |
---|
8188 | | - struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; |
---|
8189 | | - int ret; |
---|
8190 | | - bool global_updated = false; |
---|
8191 | | - |
---|
8192 | | - block_rsv = get_block_rsv(trans, root); |
---|
8193 | | - |
---|
8194 | | - if (unlikely(block_rsv->size == 0)) |
---|
8195 | | - goto try_reserve; |
---|
8196 | | -again: |
---|
8197 | | - ret = block_rsv_use_bytes(block_rsv, blocksize); |
---|
8198 | | - if (!ret) |
---|
8199 | | - return block_rsv; |
---|
8200 | | - |
---|
8201 | | - if (block_rsv->failfast) |
---|
8202 | | - return ERR_PTR(ret); |
---|
8203 | | - |
---|
8204 | | - if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) { |
---|
8205 | | - global_updated = true; |
---|
8206 | | - update_global_block_rsv(fs_info); |
---|
8207 | | - goto again; |
---|
8208 | | - } |
---|
8209 | | - |
---|
8210 | | - if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { |
---|
8211 | | - static DEFINE_RATELIMIT_STATE(_rs, |
---|
8212 | | - DEFAULT_RATELIMIT_INTERVAL * 10, |
---|
8213 | | - /*DEFAULT_RATELIMIT_BURST*/ 1); |
---|
8214 | | - if (__ratelimit(&_rs)) |
---|
8215 | | - WARN(1, KERN_DEBUG |
---|
8216 | | - "BTRFS: block rsv returned %d\n", ret); |
---|
8217 | | - } |
---|
8218 | | -try_reserve: |
---|
8219 | | - ret = reserve_metadata_bytes(root, block_rsv, blocksize, |
---|
8220 | | - BTRFS_RESERVE_NO_FLUSH); |
---|
8221 | | - if (!ret) |
---|
8222 | | - return block_rsv; |
---|
8223 | | - /* |
---|
8224 | | - * If we couldn't reserve metadata bytes try and use some from |
---|
8225 | | - * the global reserve if its space type is the same as the global |
---|
8226 | | - * reservation. |
---|
8227 | | - */ |
---|
8228 | | - if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL && |
---|
8229 | | - block_rsv->space_info == global_rsv->space_info) { |
---|
8230 | | - ret = block_rsv_use_bytes(global_rsv, blocksize); |
---|
8231 | | - if (!ret) |
---|
8232 | | - return global_rsv; |
---|
8233 | | - } |
---|
8234 | | - return ERR_PTR(ret); |
---|
8235 | | -} |
---|
8236 | | - |
---|
8237 | | -static void unuse_block_rsv(struct btrfs_fs_info *fs_info, |
---|
8238 | | - struct btrfs_block_rsv *block_rsv, u32 blocksize) |
---|
8239 | | -{ |
---|
8240 | | - block_rsv_add_bytes(block_rsv, blocksize, 0); |
---|
8241 | | - block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL); |
---|
8242 | | -} |
---|
8243 | | - |
---|
8244 | 4643 | /* |
---|
8245 | 4644 | * finds a free extent and does all the dirty work required for allocation |
---|
8246 | 4645 | * returns the tree buffer or an ERR_PTR on error. |
---|
.. | .. |
---|
8250 | 4649 | u64 parent, u64 root_objectid, |
---|
8251 | 4650 | const struct btrfs_disk_key *key, |
---|
8252 | 4651 | int level, u64 hint, |
---|
8253 | | - u64 empty_size) |
---|
| 4652 | + u64 empty_size, |
---|
| 4653 | + enum btrfs_lock_nesting nest) |
---|
8254 | 4654 | { |
---|
8255 | 4655 | struct btrfs_fs_info *fs_info = root->fs_info; |
---|
8256 | 4656 | struct btrfs_key ins; |
---|
8257 | 4657 | struct btrfs_block_rsv *block_rsv; |
---|
8258 | 4658 | struct extent_buffer *buf; |
---|
8259 | 4659 | struct btrfs_delayed_extent_op *extent_op; |
---|
| 4660 | + struct btrfs_ref generic_ref = { 0 }; |
---|
8260 | 4661 | u64 flags = 0; |
---|
8261 | 4662 | int ret; |
---|
8262 | 4663 | u32 blocksize = fs_info->nodesize; |
---|
.. | .. |
---|
8265 | 4666 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
---|
8266 | 4667 | if (btrfs_is_testing(fs_info)) { |
---|
8267 | 4668 | buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, |
---|
8268 | | - level, root_objectid); |
---|
| 4669 | + level, root_objectid, nest); |
---|
8269 | 4670 | if (!IS_ERR(buf)) |
---|
8270 | 4671 | root->alloc_bytenr += blocksize; |
---|
8271 | 4672 | return buf; |
---|
8272 | 4673 | } |
---|
8273 | 4674 | #endif |
---|
8274 | 4675 | |
---|
8275 | | - block_rsv = use_block_rsv(trans, root, blocksize); |
---|
| 4676 | + block_rsv = btrfs_use_block_rsv(trans, root, blocksize); |
---|
8276 | 4677 | if (IS_ERR(block_rsv)) |
---|
8277 | 4678 | return ERR_CAST(block_rsv); |
---|
8278 | 4679 | |
---|
.. | .. |
---|
8282 | 4683 | goto out_unuse; |
---|
8283 | 4684 | |
---|
8284 | 4685 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, level, |
---|
8285 | | - root_objectid); |
---|
| 4686 | + root_objectid, nest); |
---|
8286 | 4687 | if (IS_ERR(buf)) { |
---|
8287 | 4688 | ret = PTR_ERR(buf); |
---|
8288 | 4689 | goto out_free_reserved; |
---|
.. | .. |
---|
8311 | 4712 | extent_op->is_data = false; |
---|
8312 | 4713 | extent_op->level = level; |
---|
8313 | 4714 | |
---|
8314 | | - btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent, |
---|
8315 | | - root_objectid, level, 0, |
---|
8316 | | - BTRFS_ADD_DELAYED_EXTENT); |
---|
8317 | | - ret = btrfs_add_delayed_tree_ref(trans, ins.objectid, |
---|
8318 | | - ins.offset, parent, |
---|
8319 | | - root_objectid, level, |
---|
8320 | | - BTRFS_ADD_DELAYED_EXTENT, |
---|
8321 | | - extent_op, NULL, NULL); |
---|
| 4715 | + btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT, |
---|
| 4716 | + ins.objectid, ins.offset, parent); |
---|
| 4717 | + generic_ref.real_root = root->root_key.objectid; |
---|
| 4718 | + btrfs_init_tree_ref(&generic_ref, level, root_objectid); |
---|
| 4719 | + btrfs_ref_tree_mod(fs_info, &generic_ref); |
---|
| 4720 | + ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op); |
---|
8322 | 4721 | if (ret) |
---|
8323 | 4722 | goto out_free_delayed; |
---|
8324 | 4723 | } |
---|
.. | .. |
---|
8327 | 4726 | out_free_delayed: |
---|
8328 | 4727 | btrfs_free_delayed_extent_op(extent_op); |
---|
8329 | 4728 | out_free_buf: |
---|
| 4729 | + btrfs_tree_unlock(buf); |
---|
8330 | 4730 | free_extent_buffer(buf); |
---|
8331 | 4731 | out_free_reserved: |
---|
8332 | 4732 | btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0); |
---|
8333 | 4733 | out_unuse: |
---|
8334 | | - unuse_block_rsv(fs_info, block_rsv, blocksize); |
---|
| 4734 | + btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize); |
---|
8335 | 4735 | return ERR_PTR(ret); |
---|
8336 | 4736 | } |
---|
8337 | 4737 | |
---|
.. | .. |
---|
8339 | 4739 | u64 refs[BTRFS_MAX_LEVEL]; |
---|
8340 | 4740 | u64 flags[BTRFS_MAX_LEVEL]; |
---|
8341 | 4741 | struct btrfs_key update_progress; |
---|
| 4742 | + struct btrfs_key drop_progress; |
---|
| 4743 | + int drop_level; |
---|
8342 | 4744 | int stage; |
---|
8343 | 4745 | int level; |
---|
8344 | 4746 | int shared_level; |
---|
.. | .. |
---|
8346 | 4748 | int keep_locks; |
---|
8347 | 4749 | int reada_slot; |
---|
8348 | 4750 | int reada_count; |
---|
| 4751 | + int restarted; |
---|
8349 | 4752 | }; |
---|
8350 | 4753 | |
---|
8351 | 4754 | #define DROP_REFERENCE 1 |
---|
.. | .. |
---|
8490 | 4893 | BUG_ON(ret); /* -ENOMEM */ |
---|
8491 | 4894 | ret = btrfs_dec_ref(trans, root, eb, 0); |
---|
8492 | 4895 | BUG_ON(ret); /* -ENOMEM */ |
---|
8493 | | - ret = btrfs_set_disk_extent_flags(trans, fs_info, eb->start, |
---|
8494 | | - eb->len, flag, |
---|
| 4896 | + ret = btrfs_set_disk_extent_flags(trans, eb, flag, |
---|
8495 | 4897 | btrfs_header_level(eb), 0); |
---|
8496 | 4898 | BUG_ON(ret); /* -ENOMEM */ |
---|
8497 | 4899 | wc->flags[level] |= flag; |
---|
.. | .. |
---|
8506 | 4908 | path->locks[level] = 0; |
---|
8507 | 4909 | } |
---|
8508 | 4910 | return 0; |
---|
| 4911 | +} |
---|
| 4912 | + |
---|
| 4913 | +/* |
---|
| 4914 | + * This is used to verify a ref exists for this root to deal with a bug where we |
---|
| 4915 | + * would have a drop_progress key that hadn't been updated properly. |
---|
| 4916 | + */ |
---|
| 4917 | +static int check_ref_exists(struct btrfs_trans_handle *trans, |
---|
| 4918 | + struct btrfs_root *root, u64 bytenr, u64 parent, |
---|
| 4919 | + int level) |
---|
| 4920 | +{ |
---|
| 4921 | + struct btrfs_path *path; |
---|
| 4922 | + struct btrfs_extent_inline_ref *iref; |
---|
| 4923 | + int ret; |
---|
| 4924 | + |
---|
| 4925 | + path = btrfs_alloc_path(); |
---|
| 4926 | + if (!path) |
---|
| 4927 | + return -ENOMEM; |
---|
| 4928 | + |
---|
| 4929 | + ret = lookup_extent_backref(trans, path, &iref, bytenr, |
---|
| 4930 | + root->fs_info->nodesize, parent, |
---|
| 4931 | + root->root_key.objectid, level, 0); |
---|
| 4932 | + btrfs_free_path(path); |
---|
| 4933 | + if (ret == -ENOENT) |
---|
| 4934 | + return 0; |
---|
| 4935 | + if (ret < 0) |
---|
| 4936 | + return ret; |
---|
| 4937 | + return 1; |
---|
8509 | 4938 | } |
---|
8510 | 4939 | |
---|
8511 | 4940 | /* |
---|
.. | .. |
---|
8530 | 4959 | u64 bytenr; |
---|
8531 | 4960 | u64 generation; |
---|
8532 | 4961 | u64 parent; |
---|
8533 | | - u32 blocksize; |
---|
8534 | 4962 | struct btrfs_key key; |
---|
8535 | 4963 | struct btrfs_key first_key; |
---|
| 4964 | + struct btrfs_ref ref = { 0 }; |
---|
8536 | 4965 | struct extent_buffer *next; |
---|
8537 | 4966 | int level = wc->level; |
---|
8538 | 4967 | int reada = 0; |
---|
.. | .. |
---|
8555 | 4984 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); |
---|
8556 | 4985 | btrfs_node_key_to_cpu(path->nodes[level], &first_key, |
---|
8557 | 4986 | path->slots[level]); |
---|
8558 | | - blocksize = fs_info->nodesize; |
---|
8559 | 4987 | |
---|
8560 | 4988 | next = find_extent_buffer(fs_info, bytenr); |
---|
8561 | 4989 | if (!next) { |
---|
.. | .. |
---|
8568 | 4996 | reada = 1; |
---|
8569 | 4997 | } |
---|
8570 | 4998 | btrfs_tree_lock(next); |
---|
8571 | | - btrfs_set_lock_blocking(next); |
---|
| 4999 | + btrfs_set_lock_blocking_write(next); |
---|
8572 | 5000 | |
---|
8573 | 5001 | ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1, |
---|
8574 | 5002 | &wc->refs[level - 1], |
---|
.. | .. |
---|
8628 | 5056 | return -EIO; |
---|
8629 | 5057 | } |
---|
8630 | 5058 | btrfs_tree_lock(next); |
---|
8631 | | - btrfs_set_lock_blocking(next); |
---|
| 5059 | + btrfs_set_lock_blocking_write(next); |
---|
8632 | 5060 | } |
---|
8633 | 5061 | |
---|
8634 | 5062 | level--; |
---|
.. | .. |
---|
8664 | 5092 | parent = 0; |
---|
8665 | 5093 | } |
---|
8666 | 5094 | |
---|
8667 | | - if (need_account) { |
---|
| 5095 | + /* |
---|
| 5096 | + * If we had a drop_progress we need to verify the refs are set |
---|
| 5097 | + * as expected. If we find our ref then we know that from here |
---|
| 5098 | + * on out everything should be correct, and we can clear the |
---|
| 5099 | + * ->restarted flag. |
---|
| 5100 | + */ |
---|
| 5101 | + if (wc->restarted) { |
---|
| 5102 | + ret = check_ref_exists(trans, root, bytenr, parent, |
---|
| 5103 | + level - 1); |
---|
| 5104 | + if (ret < 0) |
---|
| 5105 | + goto out_unlock; |
---|
| 5106 | + if (ret == 0) |
---|
| 5107 | + goto no_delete; |
---|
| 5108 | + ret = 0; |
---|
| 5109 | + wc->restarted = 0; |
---|
| 5110 | + } |
---|
| 5111 | + |
---|
| 5112 | + /* |
---|
| 5113 | + * Reloc tree doesn't contribute to qgroup numbers, and we have |
---|
| 5114 | + * already accounted them at merge time (replace_path), |
---|
| 5115 | + * thus we could skip expensive subtree trace here. |
---|
| 5116 | + */ |
---|
| 5117 | + if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && |
---|
| 5118 | + need_account) { |
---|
8668 | 5119 | ret = btrfs_qgroup_trace_subtree(trans, next, |
---|
8669 | 5120 | generation, level - 1); |
---|
8670 | 5121 | if (ret) { |
---|
.. | .. |
---|
8673 | 5124 | ret); |
---|
8674 | 5125 | } |
---|
8675 | 5126 | } |
---|
8676 | | - ret = btrfs_free_extent(trans, root, bytenr, blocksize, |
---|
8677 | | - parent, root->root_key.objectid, |
---|
8678 | | - level - 1, 0); |
---|
| 5127 | + |
---|
| 5128 | + /* |
---|
| 5129 | + * We need to update the next key in our walk control so we can |
---|
| 5130 | + * update the drop_progress key accordingly. We don't care if |
---|
| 5131 | + * find_next_key doesn't find a key because that means we're at |
---|
| 5132 | + * the end and are going to clean up now. |
---|
| 5133 | + */ |
---|
| 5134 | + wc->drop_level = level; |
---|
| 5135 | + find_next_key(path, level, &wc->drop_progress); |
---|
| 5136 | + |
---|
| 5137 | + btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, |
---|
| 5138 | + fs_info->nodesize, parent); |
---|
| 5139 | + btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid); |
---|
| 5140 | + ret = btrfs_free_extent(trans, &ref); |
---|
8679 | 5141 | if (ret) |
---|
8680 | 5142 | goto out_unlock; |
---|
8681 | 5143 | } |
---|
8682 | | - |
---|
| 5144 | +no_delete: |
---|
8683 | 5145 | *lookup_info = 1; |
---|
8684 | 5146 | ret = 1; |
---|
8685 | 5147 | |
---|
.. | .. |
---|
8734 | 5196 | if (!path->locks[level]) { |
---|
8735 | 5197 | BUG_ON(level == 0); |
---|
8736 | 5198 | btrfs_tree_lock(eb); |
---|
8737 | | - btrfs_set_lock_blocking(eb); |
---|
| 5199 | + btrfs_set_lock_blocking_write(eb); |
---|
8738 | 5200 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
---|
8739 | 5201 | |
---|
8740 | 5202 | ret = btrfs_lookup_extent_info(trans, fs_info, |
---|
.. | .. |
---|
8765 | 5227 | else |
---|
8766 | 5228 | ret = btrfs_dec_ref(trans, root, eb, 0); |
---|
8767 | 5229 | BUG_ON(ret); /* -ENOMEM */ |
---|
8768 | | - ret = btrfs_qgroup_trace_leaf_items(trans, eb); |
---|
8769 | | - if (ret) { |
---|
8770 | | - btrfs_err_rl(fs_info, |
---|
8771 | | - "error %d accounting leaf items. Quota is out of sync, rescan required.", |
---|
| 5230 | + if (is_fstree(root->root_key.objectid)) { |
---|
| 5231 | + ret = btrfs_qgroup_trace_leaf_items(trans, eb); |
---|
| 5232 | + if (ret) { |
---|
| 5233 | + btrfs_err_rl(fs_info, |
---|
| 5234 | + "error %d accounting leaf items, quota is out of sync, rescan required", |
---|
8772 | 5235 | ret); |
---|
| 5236 | + } |
---|
8773 | 5237 | } |
---|
8774 | 5238 | } |
---|
8775 | | - /* make block locked assertion in clean_tree_block happy */ |
---|
| 5239 | + /* make block locked assertion in btrfs_clean_tree_block happy */ |
---|
8776 | 5240 | if (!path->locks[level] && |
---|
8777 | 5241 | btrfs_header_generation(eb) == trans->transid) { |
---|
8778 | 5242 | btrfs_tree_lock(eb); |
---|
8779 | | - btrfs_set_lock_blocking(eb); |
---|
| 5243 | + btrfs_set_lock_blocking_write(eb); |
---|
8780 | 5244 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
---|
8781 | 5245 | } |
---|
8782 | | - clean_tree_block(fs_info, eb); |
---|
| 5246 | + btrfs_clean_tree_block(eb); |
---|
8783 | 5247 | } |
---|
8784 | 5248 | |
---|
8785 | 5249 | if (eb == root->node) { |
---|
.. | .. |
---|
8887 | 5351 | * |
---|
8888 | 5352 | * If called with for_reloc == 0, may exit early with -EAGAIN |
---|
8889 | 5353 | */ |
---|
8890 | | -int btrfs_drop_snapshot(struct btrfs_root *root, |
---|
8891 | | - struct btrfs_block_rsv *block_rsv, int update_ref, |
---|
8892 | | - int for_reloc) |
---|
| 5354 | +int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc) |
---|
8893 | 5355 | { |
---|
8894 | 5356 | struct btrfs_fs_info *fs_info = root->fs_info; |
---|
8895 | 5357 | struct btrfs_path *path; |
---|
.. | .. |
---|
8903 | 5365 | int level; |
---|
8904 | 5366 | bool root_dropped = false; |
---|
8905 | 5367 | |
---|
8906 | | - btrfs_debug(fs_info, "Drop subvolume %llu", root->objectid); |
---|
| 5368 | + btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid); |
---|
8907 | 5369 | |
---|
8908 | 5370 | path = btrfs_alloc_path(); |
---|
8909 | 5371 | if (!path) { |
---|
.. | .. |
---|
8918 | 5380 | goto out; |
---|
8919 | 5381 | } |
---|
8920 | 5382 | |
---|
8921 | | - trans = btrfs_start_transaction(tree_root, 0); |
---|
| 5383 | + /* |
---|
| 5384 | + * Use join to avoid potential EINTR from transaction start. See |
---|
| 5385 | + * wait_reserve_ticket and the whole reservation callchain. |
---|
| 5386 | + */ |
---|
| 5387 | + if (for_reloc) |
---|
| 5388 | + trans = btrfs_join_transaction(tree_root); |
---|
| 5389 | + else |
---|
| 5390 | + trans = btrfs_start_transaction(tree_root, 0); |
---|
8922 | 5391 | if (IS_ERR(trans)) { |
---|
8923 | 5392 | err = PTR_ERR(trans); |
---|
8924 | 5393 | goto out_free; |
---|
.. | .. |
---|
8928 | 5397 | if (err) |
---|
8929 | 5398 | goto out_end_trans; |
---|
8930 | 5399 | |
---|
8931 | | - if (block_rsv) |
---|
8932 | | - trans->block_rsv = block_rsv; |
---|
8933 | | - |
---|
| 5400 | + /* |
---|
| 5401 | + * This will help us catch people modifying the fs tree while we're |
---|
| 5402 | + * dropping it. It is unsafe to mess with the fs tree while it's being |
---|
| 5403 | + * dropped as we unlock the root node and parent nodes as we walk down |
---|
| 5404 | + * the tree, assuming nothing will change. If something does change |
---|
| 5405 | + * then we'll have stale information and drop references to blocks we've |
---|
| 5406 | + * already dropped. |
---|
| 5407 | + */ |
---|
| 5408 | + set_bit(BTRFS_ROOT_DELETING, &root->state); |
---|
8934 | 5409 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { |
---|
8935 | 5410 | level = btrfs_header_level(root->node); |
---|
8936 | 5411 | path->nodes[level] = btrfs_lock_root_node(root); |
---|
8937 | | - btrfs_set_lock_blocking(path->nodes[level]); |
---|
| 5412 | + btrfs_set_lock_blocking_write(path->nodes[level]); |
---|
8938 | 5413 | path->slots[level] = 0; |
---|
8939 | 5414 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
---|
8940 | 5415 | memset(&wc->update_progress, 0, |
---|
.. | .. |
---|
8964 | 5439 | level = btrfs_header_level(root->node); |
---|
8965 | 5440 | while (1) { |
---|
8966 | 5441 | btrfs_tree_lock(path->nodes[level]); |
---|
8967 | | - btrfs_set_lock_blocking(path->nodes[level]); |
---|
| 5442 | + btrfs_set_lock_blocking_write(path->nodes[level]); |
---|
8968 | 5443 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
---|
8969 | 5444 | |
---|
8970 | 5445 | ret = btrfs_lookup_extent_info(trans, fs_info, |
---|
.. | .. |
---|
8987 | 5462 | } |
---|
8988 | 5463 | } |
---|
8989 | 5464 | |
---|
| 5465 | + wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state); |
---|
8990 | 5466 | wc->level = level; |
---|
8991 | 5467 | wc->shared_level = -1; |
---|
8992 | 5468 | wc->stage = DROP_REFERENCE; |
---|
.. | .. |
---|
9014 | 5490 | } |
---|
9015 | 5491 | |
---|
9016 | 5492 | if (wc->stage == DROP_REFERENCE) { |
---|
9017 | | - level = wc->level; |
---|
9018 | | - btrfs_node_key(path->nodes[level], |
---|
9019 | | - &root_item->drop_progress, |
---|
9020 | | - path->slots[level]); |
---|
9021 | | - root_item->drop_level = level; |
---|
| 5493 | + wc->drop_level = wc->level; |
---|
| 5494 | + btrfs_node_key_to_cpu(path->nodes[wc->drop_level], |
---|
| 5495 | + &wc->drop_progress, |
---|
| 5496 | + path->slots[wc->drop_level]); |
---|
9022 | 5497 | } |
---|
| 5498 | + btrfs_cpu_key_to_disk(&root_item->drop_progress, |
---|
| 5499 | + &wc->drop_progress); |
---|
| 5500 | + root_item->drop_level = wc->drop_level; |
---|
9023 | 5501 | |
---|
9024 | 5502 | BUG_ON(wc->level == 0); |
---|
9025 | 5503 | if (btrfs_should_end_transaction(trans) || |
---|
.. | .. |
---|
9041 | 5519 | goto out_free; |
---|
9042 | 5520 | } |
---|
9043 | 5521 | |
---|
9044 | | - trans = btrfs_start_transaction(tree_root, 0); |
---|
| 5522 | + /* |
---|
| 5523 | + * Use join to avoid potential EINTR from transaction |
---|
| 5524 | + * start. See wait_reserve_ticket and the whole |
---|
| 5525 | + * reservation callchain. |
---|
| 5526 | + */ |
---|
| 5527 | + if (for_reloc) |
---|
| 5528 | + trans = btrfs_join_transaction(tree_root); |
---|
| 5529 | + else |
---|
| 5530 | + trans = btrfs_start_transaction(tree_root, 0); |
---|
9045 | 5531 | if (IS_ERR(trans)) { |
---|
9046 | 5532 | err = PTR_ERR(trans); |
---|
9047 | 5533 | goto out_free; |
---|
9048 | 5534 | } |
---|
9049 | | - if (block_rsv) |
---|
9050 | | - trans->block_rsv = block_rsv; |
---|
9051 | 5535 | } |
---|
9052 | 5536 | } |
---|
9053 | 5537 | btrfs_release_path(path); |
---|
.. | .. |
---|
9079 | 5563 | } |
---|
9080 | 5564 | } |
---|
9081 | 5565 | |
---|
9082 | | - if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) { |
---|
| 5566 | + /* |
---|
| 5567 | + * This subvolume is going to be completely dropped, and won't be |
---|
| 5568 | + * recorded as dirty roots, thus pertrans meta rsv will not be freed at |
---|
| 5569 | + * commit transaction time. So free it here manually. |
---|
| 5570 | + */ |
---|
| 5571 | + btrfs_qgroup_convert_reserved_meta(root, INT_MAX); |
---|
| 5572 | + btrfs_qgroup_free_meta_all_pertrans(root); |
---|
| 5573 | + |
---|
| 5574 | + if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) |
---|
9083 | 5575 | btrfs_add_dropped_root(trans, root); |
---|
9084 | | - } else { |
---|
9085 | | - free_extent_buffer(root->node); |
---|
9086 | | - free_extent_buffer(root->commit_root); |
---|
9087 | | - btrfs_put_fs_root(root); |
---|
9088 | | - } |
---|
| 5576 | + else |
---|
| 5577 | + btrfs_put_root(root); |
---|
9089 | 5578 | root_dropped = true; |
---|
9090 | 5579 | out_end_trans: |
---|
9091 | 5580 | btrfs_end_transaction_throttle(trans); |
---|
.. | .. |
---|
9138 | 5627 | |
---|
9139 | 5628 | btrfs_assert_tree_locked(parent); |
---|
9140 | 5629 | parent_level = btrfs_header_level(parent); |
---|
9141 | | - extent_buffer_get(parent); |
---|
| 5630 | + atomic_inc(&parent->refs); |
---|
9142 | 5631 | path->nodes[parent_level] = parent; |
---|
9143 | 5632 | path->slots[parent_level] = btrfs_header_nritems(parent); |
---|
9144 | 5633 | |
---|
.. | .. |
---|
9176 | 5665 | return ret; |
---|
9177 | 5666 | } |
---|
9178 | 5667 | |
---|
9179 | | -static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags) |
---|
9180 | | -{ |
---|
9181 | | - u64 num_devices; |
---|
9182 | | - u64 stripped; |
---|
9183 | | - |
---|
9184 | | - /* |
---|
9185 | | - * if restripe for this chunk_type is on pick target profile and |
---|
9186 | | - * return, otherwise do the usual balance |
---|
9187 | | - */ |
---|
9188 | | - stripped = get_restripe_target(fs_info, flags); |
---|
9189 | | - if (stripped) |
---|
9190 | | - return extended_to_chunk(stripped); |
---|
9191 | | - |
---|
9192 | | - num_devices = fs_info->fs_devices->rw_devices; |
---|
9193 | | - |
---|
9194 | | - stripped = BTRFS_BLOCK_GROUP_RAID0 | |
---|
9195 | | - BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | |
---|
9196 | | - BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; |
---|
9197 | | - |
---|
9198 | | - if (num_devices == 1) { |
---|
9199 | | - stripped |= BTRFS_BLOCK_GROUP_DUP; |
---|
9200 | | - stripped = flags & ~stripped; |
---|
9201 | | - |
---|
9202 | | - /* turn raid0 into single device chunks */ |
---|
9203 | | - if (flags & BTRFS_BLOCK_GROUP_RAID0) |
---|
9204 | | - return stripped; |
---|
9205 | | - |
---|
9206 | | - /* turn mirroring into duplication */ |
---|
9207 | | - if (flags & (BTRFS_BLOCK_GROUP_RAID1 | |
---|
9208 | | - BTRFS_BLOCK_GROUP_RAID10)) |
---|
9209 | | - return stripped | BTRFS_BLOCK_GROUP_DUP; |
---|
9210 | | - } else { |
---|
9211 | | - /* they already had raid on here, just return */ |
---|
9212 | | - if (flags & stripped) |
---|
9213 | | - return flags; |
---|
9214 | | - |
---|
9215 | | - stripped |= BTRFS_BLOCK_GROUP_DUP; |
---|
9216 | | - stripped = flags & ~stripped; |
---|
9217 | | - |
---|
9218 | | - /* switch duplicated blocks with raid1 */ |
---|
9219 | | - if (flags & BTRFS_BLOCK_GROUP_DUP) |
---|
9220 | | - return stripped | BTRFS_BLOCK_GROUP_RAID1; |
---|
9221 | | - |
---|
9222 | | - /* this is drive concat, leave it alone */ |
---|
9223 | | - } |
---|
9224 | | - |
---|
9225 | | - return flags; |
---|
9226 | | -} |
---|
9227 | | - |
---|
9228 | | -static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force) |
---|
9229 | | -{ |
---|
9230 | | - struct btrfs_space_info *sinfo = cache->space_info; |
---|
9231 | | - u64 num_bytes; |
---|
9232 | | - u64 min_allocable_bytes; |
---|
9233 | | - int ret = -ENOSPC; |
---|
9234 | | - |
---|
9235 | | - /* |
---|
9236 | | - * We need some metadata space and system metadata space for |
---|
9237 | | - * allocating chunks in some corner cases until we force to set |
---|
9238 | | - * it to be readonly. |
---|
9239 | | - */ |
---|
9240 | | - if ((sinfo->flags & |
---|
9241 | | - (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) && |
---|
9242 | | - !force) |
---|
9243 | | - min_allocable_bytes = SZ_1M; |
---|
9244 | | - else |
---|
9245 | | - min_allocable_bytes = 0; |
---|
9246 | | - |
---|
9247 | | - spin_lock(&sinfo->lock); |
---|
9248 | | - spin_lock(&cache->lock); |
---|
9249 | | - |
---|
9250 | | - if (cache->ro) { |
---|
9251 | | - cache->ro++; |
---|
9252 | | - ret = 0; |
---|
9253 | | - goto out; |
---|
9254 | | - } |
---|
9255 | | - |
---|
9256 | | - num_bytes = cache->key.offset - cache->reserved - cache->pinned - |
---|
9257 | | - cache->bytes_super - btrfs_block_group_used(&cache->item); |
---|
9258 | | - |
---|
9259 | | - if (btrfs_space_info_used(sinfo, true) + num_bytes + |
---|
9260 | | - min_allocable_bytes <= sinfo->total_bytes) { |
---|
9261 | | - sinfo->bytes_readonly += num_bytes; |
---|
9262 | | - cache->ro++; |
---|
9263 | | - list_add_tail(&cache->ro_list, &sinfo->ro_bgs); |
---|
9264 | | - ret = 0; |
---|
9265 | | - } |
---|
9266 | | -out: |
---|
9267 | | - spin_unlock(&cache->lock); |
---|
9268 | | - spin_unlock(&sinfo->lock); |
---|
9269 | | - return ret; |
---|
9270 | | -} |
---|
9271 | | - |
---|
9272 | | -int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache) |
---|
9273 | | - |
---|
9274 | | -{ |
---|
9275 | | - struct btrfs_fs_info *fs_info = cache->fs_info; |
---|
9276 | | - struct btrfs_trans_handle *trans; |
---|
9277 | | - u64 alloc_flags; |
---|
9278 | | - int ret; |
---|
9279 | | - |
---|
9280 | | -again: |
---|
9281 | | - trans = btrfs_join_transaction(fs_info->extent_root); |
---|
9282 | | - if (IS_ERR(trans)) |
---|
9283 | | - return PTR_ERR(trans); |
---|
9284 | | - |
---|
9285 | | - /* |
---|
9286 | | - * we're not allowed to set block groups readonly after the dirty |
---|
9287 | | - * block groups cache has started writing. If it already started, |
---|
9288 | | - * back off and let this transaction commit |
---|
9289 | | - */ |
---|
9290 | | - mutex_lock(&fs_info->ro_block_group_mutex); |
---|
9291 | | - if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) { |
---|
9292 | | - u64 transid = trans->transid; |
---|
9293 | | - |
---|
9294 | | - mutex_unlock(&fs_info->ro_block_group_mutex); |
---|
9295 | | - btrfs_end_transaction(trans); |
---|
9296 | | - |
---|
9297 | | - ret = btrfs_wait_for_commit(fs_info, transid); |
---|
9298 | | - if (ret) |
---|
9299 | | - return ret; |
---|
9300 | | - goto again; |
---|
9301 | | - } |
---|
9302 | | - |
---|
9303 | | - /* |
---|
9304 | | - * if we are changing raid levels, try to allocate a corresponding |
---|
9305 | | - * block group with the new raid level. |
---|
9306 | | - */ |
---|
9307 | | - alloc_flags = update_block_group_flags(fs_info, cache->flags); |
---|
9308 | | - if (alloc_flags != cache->flags) { |
---|
9309 | | - ret = do_chunk_alloc(trans, alloc_flags, |
---|
9310 | | - CHUNK_ALLOC_FORCE); |
---|
9311 | | - /* |
---|
9312 | | - * ENOSPC is allowed here, we may have enough space |
---|
9313 | | - * already allocated at the new raid level to |
---|
9314 | | - * carry on |
---|
9315 | | - */ |
---|
9316 | | - if (ret == -ENOSPC) |
---|
9317 | | - ret = 0; |
---|
9318 | | - if (ret < 0) |
---|
9319 | | - goto out; |
---|
9320 | | - } |
---|
9321 | | - |
---|
9322 | | - ret = inc_block_group_ro(cache, 0); |
---|
9323 | | - if (!ret) |
---|
9324 | | - goto out; |
---|
9325 | | - alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags); |
---|
9326 | | - ret = do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); |
---|
9327 | | - if (ret < 0) |
---|
9328 | | - goto out; |
---|
9329 | | - ret = inc_block_group_ro(cache, 0); |
---|
9330 | | -out: |
---|
9331 | | - if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { |
---|
9332 | | - alloc_flags = update_block_group_flags(fs_info, cache->flags); |
---|
9333 | | - mutex_lock(&fs_info->chunk_mutex); |
---|
9334 | | - check_system_chunk(trans, alloc_flags); |
---|
9335 | | - mutex_unlock(&fs_info->chunk_mutex); |
---|
9336 | | - } |
---|
9337 | | - mutex_unlock(&fs_info->ro_block_group_mutex); |
---|
9338 | | - |
---|
9339 | | - btrfs_end_transaction(trans); |
---|
9340 | | - return ret; |
---|
9341 | | -} |
---|
9342 | | - |
---|
9343 | | -int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type) |
---|
9344 | | -{ |
---|
9345 | | - u64 alloc_flags = get_alloc_profile(trans->fs_info, type); |
---|
9346 | | - |
---|
9347 | | - return do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); |
---|
9348 | | -} |
---|
9349 | | - |
---|
9350 | 5668 | /* |
---|
9351 | 5669 | * helper to account the unused space of all the readonly block group in the |
---|
9352 | 5670 | * space_info. takes mirrors into account. |
---|
9353 | 5671 | */ |
---|
9354 | 5672 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) |
---|
9355 | 5673 | { |
---|
9356 | | - struct btrfs_block_group_cache *block_group; |
---|
| 5674 | + struct btrfs_block_group *block_group; |
---|
9357 | 5675 | u64 free_bytes = 0; |
---|
9358 | 5676 | int factor; |
---|
9359 | 5677 | |
---|
.. | .. |
---|
9371 | 5689 | } |
---|
9372 | 5690 | |
---|
9373 | 5691 | factor = btrfs_bg_type_to_factor(block_group->flags); |
---|
9374 | | - free_bytes += (block_group->key.offset - |
---|
9375 | | - btrfs_block_group_used(&block_group->item)) * |
---|
9376 | | - factor; |
---|
| 5692 | + free_bytes += (block_group->length - |
---|
| 5693 | + block_group->used) * factor; |
---|
9377 | 5694 | |
---|
9378 | 5695 | spin_unlock(&block_group->lock); |
---|
9379 | 5696 | } |
---|
9380 | 5697 | spin_unlock(&sinfo->lock); |
---|
9381 | 5698 | |
---|
9382 | 5699 | return free_bytes; |
---|
9383 | | -} |
---|
9384 | | - |
---|
9385 | | -void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache) |
---|
9386 | | -{ |
---|
9387 | | - struct btrfs_space_info *sinfo = cache->space_info; |
---|
9388 | | - u64 num_bytes; |
---|
9389 | | - |
---|
9390 | | - BUG_ON(!cache->ro); |
---|
9391 | | - |
---|
9392 | | - spin_lock(&sinfo->lock); |
---|
9393 | | - spin_lock(&cache->lock); |
---|
9394 | | - if (!--cache->ro) { |
---|
9395 | | - num_bytes = cache->key.offset - cache->reserved - |
---|
9396 | | - cache->pinned - cache->bytes_super - |
---|
9397 | | - btrfs_block_group_used(&cache->item); |
---|
9398 | | - sinfo->bytes_readonly -= num_bytes; |
---|
9399 | | - list_del_init(&cache->ro_list); |
---|
9400 | | - } |
---|
9401 | | - spin_unlock(&cache->lock); |
---|
9402 | | - spin_unlock(&sinfo->lock); |
---|
9403 | | -} |
---|
9404 | | - |
---|
9405 | | -/* |
---|
9406 | | - * checks to see if its even possible to relocate this block group. |
---|
9407 | | - * |
---|
9408 | | - * @return - -1 if it's not a good idea to relocate this block group, 0 if its |
---|
9409 | | - * ok to go ahead and try. |
---|
9410 | | - */ |
---|
9411 | | -int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr) |
---|
9412 | | -{ |
---|
9413 | | - struct btrfs_root *root = fs_info->extent_root; |
---|
9414 | | - struct btrfs_block_group_cache *block_group; |
---|
9415 | | - struct btrfs_space_info *space_info; |
---|
9416 | | - struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; |
---|
9417 | | - struct btrfs_device *device; |
---|
9418 | | - struct btrfs_trans_handle *trans; |
---|
9419 | | - u64 min_free; |
---|
9420 | | - u64 dev_min = 1; |
---|
9421 | | - u64 dev_nr = 0; |
---|
9422 | | - u64 target; |
---|
9423 | | - int debug; |
---|
9424 | | - int index; |
---|
9425 | | - int full = 0; |
---|
9426 | | - int ret = 0; |
---|
9427 | | - |
---|
9428 | | - debug = btrfs_test_opt(fs_info, ENOSPC_DEBUG); |
---|
9429 | | - |
---|
9430 | | - block_group = btrfs_lookup_block_group(fs_info, bytenr); |
---|
9431 | | - |
---|
9432 | | - /* odd, couldn't find the block group, leave it alone */ |
---|
9433 | | - if (!block_group) { |
---|
9434 | | - if (debug) |
---|
9435 | | - btrfs_warn(fs_info, |
---|
9436 | | - "can't find block group for bytenr %llu", |
---|
9437 | | - bytenr); |
---|
9438 | | - return -1; |
---|
9439 | | - } |
---|
9440 | | - |
---|
9441 | | - min_free = btrfs_block_group_used(&block_group->item); |
---|
9442 | | - |
---|
9443 | | - /* no bytes used, we're good */ |
---|
9444 | | - if (!min_free) |
---|
9445 | | - goto out; |
---|
9446 | | - |
---|
9447 | | - space_info = block_group->space_info; |
---|
9448 | | - spin_lock(&space_info->lock); |
---|
9449 | | - |
---|
9450 | | - full = space_info->full; |
---|
9451 | | - |
---|
9452 | | - /* |
---|
9453 | | - * if this is the last block group we have in this space, we can't |
---|
9454 | | - * relocate it unless we're able to allocate a new chunk below. |
---|
9455 | | - * |
---|
9456 | | - * Otherwise, we need to make sure we have room in the space to handle |
---|
9457 | | - * all of the extents from this block group. If we can, we're good |
---|
9458 | | - */ |
---|
9459 | | - if ((space_info->total_bytes != block_group->key.offset) && |
---|
9460 | | - (btrfs_space_info_used(space_info, false) + min_free < |
---|
9461 | | - space_info->total_bytes)) { |
---|
9462 | | - spin_unlock(&space_info->lock); |
---|
9463 | | - goto out; |
---|
9464 | | - } |
---|
9465 | | - spin_unlock(&space_info->lock); |
---|
9466 | | - |
---|
9467 | | - /* |
---|
9468 | | - * ok we don't have enough space, but maybe we have free space on our |
---|
9469 | | - * devices to allocate new chunks for relocation, so loop through our |
---|
9470 | | - * alloc devices and guess if we have enough space. if this block |
---|
9471 | | - * group is going to be restriped, run checks against the target |
---|
9472 | | - * profile instead of the current one. |
---|
9473 | | - */ |
---|
9474 | | - ret = -1; |
---|
9475 | | - |
---|
9476 | | - /* |
---|
9477 | | - * index: |
---|
9478 | | - * 0: raid10 |
---|
9479 | | - * 1: raid1 |
---|
9480 | | - * 2: dup |
---|
9481 | | - * 3: raid0 |
---|
9482 | | - * 4: single |
---|
9483 | | - */ |
---|
9484 | | - target = get_restripe_target(fs_info, block_group->flags); |
---|
9485 | | - if (target) { |
---|
9486 | | - index = btrfs_bg_flags_to_raid_index(extended_to_chunk(target)); |
---|
9487 | | - } else { |
---|
9488 | | - /* |
---|
9489 | | - * this is just a balance, so if we were marked as full |
---|
9490 | | - * we know there is no space for a new chunk |
---|
9491 | | - */ |
---|
9492 | | - if (full) { |
---|
9493 | | - if (debug) |
---|
9494 | | - btrfs_warn(fs_info, |
---|
9495 | | - "no space to alloc new chunk for block group %llu", |
---|
9496 | | - block_group->key.objectid); |
---|
9497 | | - goto out; |
---|
9498 | | - } |
---|
9499 | | - |
---|
9500 | | - index = btrfs_bg_flags_to_raid_index(block_group->flags); |
---|
9501 | | - } |
---|
9502 | | - |
---|
9503 | | - if (index == BTRFS_RAID_RAID10) { |
---|
9504 | | - dev_min = 4; |
---|
9505 | | - /* Divide by 2 */ |
---|
9506 | | - min_free >>= 1; |
---|
9507 | | - } else if (index == BTRFS_RAID_RAID1) { |
---|
9508 | | - dev_min = 2; |
---|
9509 | | - } else if (index == BTRFS_RAID_DUP) { |
---|
9510 | | - /* Multiply by 2 */ |
---|
9511 | | - min_free <<= 1; |
---|
9512 | | - } else if (index == BTRFS_RAID_RAID0) { |
---|
9513 | | - dev_min = fs_devices->rw_devices; |
---|
9514 | | - min_free = div64_u64(min_free, dev_min); |
---|
9515 | | - } |
---|
9516 | | - |
---|
9517 | | - /* We need to do this so that we can look at pending chunks */ |
---|
9518 | | - trans = btrfs_join_transaction(root); |
---|
9519 | | - if (IS_ERR(trans)) { |
---|
9520 | | - ret = PTR_ERR(trans); |
---|
9521 | | - goto out; |
---|
9522 | | - } |
---|
9523 | | - |
---|
9524 | | - mutex_lock(&fs_info->chunk_mutex); |
---|
9525 | | - list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
---|
9526 | | - u64 dev_offset; |
---|
9527 | | - |
---|
9528 | | - /* |
---|
9529 | | - * check to make sure we can actually find a chunk with enough |
---|
9530 | | - * space to fit our block group in. |
---|
9531 | | - */ |
---|
9532 | | - if (device->total_bytes > device->bytes_used + min_free && |
---|
9533 | | - !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { |
---|
9534 | | - ret = find_free_dev_extent(trans, device, min_free, |
---|
9535 | | - &dev_offset, NULL); |
---|
9536 | | - if (!ret) |
---|
9537 | | - dev_nr++; |
---|
9538 | | - |
---|
9539 | | - if (dev_nr >= dev_min) |
---|
9540 | | - break; |
---|
9541 | | - |
---|
9542 | | - ret = -1; |
---|
9543 | | - } |
---|
9544 | | - } |
---|
9545 | | - if (debug && ret == -1) |
---|
9546 | | - btrfs_warn(fs_info, |
---|
9547 | | - "no space to allocate a new chunk for block group %llu", |
---|
9548 | | - block_group->key.objectid); |
---|
9549 | | - mutex_unlock(&fs_info->chunk_mutex); |
---|
9550 | | - btrfs_end_transaction(trans); |
---|
9551 | | -out: |
---|
9552 | | - btrfs_put_block_group(block_group); |
---|
9553 | | - return ret; |
---|
9554 | | -} |
---|
9555 | | - |
---|
9556 | | -static int find_first_block_group(struct btrfs_fs_info *fs_info, |
---|
9557 | | - struct btrfs_path *path, |
---|
9558 | | - struct btrfs_key *key) |
---|
9559 | | -{ |
---|
9560 | | - struct btrfs_root *root = fs_info->extent_root; |
---|
9561 | | - int ret = 0; |
---|
9562 | | - struct btrfs_key found_key; |
---|
9563 | | - struct extent_buffer *leaf; |
---|
9564 | | - struct btrfs_block_group_item bg; |
---|
9565 | | - u64 flags; |
---|
9566 | | - int slot; |
---|
9567 | | - |
---|
9568 | | - ret = btrfs_search_slot(NULL, root, key, path, 0, 0); |
---|
9569 | | - if (ret < 0) |
---|
9570 | | - goto out; |
---|
9571 | | - |
---|
9572 | | - while (1) { |
---|
9573 | | - slot = path->slots[0]; |
---|
9574 | | - leaf = path->nodes[0]; |
---|
9575 | | - if (slot >= btrfs_header_nritems(leaf)) { |
---|
9576 | | - ret = btrfs_next_leaf(root, path); |
---|
9577 | | - if (ret == 0) |
---|
9578 | | - continue; |
---|
9579 | | - if (ret < 0) |
---|
9580 | | - goto out; |
---|
9581 | | - break; |
---|
9582 | | - } |
---|
9583 | | - btrfs_item_key_to_cpu(leaf, &found_key, slot); |
---|
9584 | | - |
---|
9585 | | - if (found_key.objectid >= key->objectid && |
---|
9586 | | - found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { |
---|
9587 | | - struct extent_map_tree *em_tree; |
---|
9588 | | - struct extent_map *em; |
---|
9589 | | - |
---|
9590 | | - em_tree = &root->fs_info->mapping_tree.map_tree; |
---|
9591 | | - read_lock(&em_tree->lock); |
---|
9592 | | - em = lookup_extent_mapping(em_tree, found_key.objectid, |
---|
9593 | | - found_key.offset); |
---|
9594 | | - read_unlock(&em_tree->lock); |
---|
9595 | | - if (!em) { |
---|
9596 | | - btrfs_err(fs_info, |
---|
9597 | | - "logical %llu len %llu found bg but no related chunk", |
---|
9598 | | - found_key.objectid, found_key.offset); |
---|
9599 | | - ret = -ENOENT; |
---|
9600 | | - } else if (em->start != found_key.objectid || |
---|
9601 | | - em->len != found_key.offset) { |
---|
9602 | | - btrfs_err(fs_info, |
---|
9603 | | - "block group %llu len %llu mismatch with chunk %llu len %llu", |
---|
9604 | | - found_key.objectid, found_key.offset, |
---|
9605 | | - em->start, em->len); |
---|
9606 | | - ret = -EUCLEAN; |
---|
9607 | | - } else { |
---|
9608 | | - read_extent_buffer(leaf, &bg, |
---|
9609 | | - btrfs_item_ptr_offset(leaf, slot), |
---|
9610 | | - sizeof(bg)); |
---|
9611 | | - flags = btrfs_block_group_flags(&bg) & |
---|
9612 | | - BTRFS_BLOCK_GROUP_TYPE_MASK; |
---|
9613 | | - |
---|
9614 | | - if (flags != (em->map_lookup->type & |
---|
9615 | | - BTRFS_BLOCK_GROUP_TYPE_MASK)) { |
---|
9616 | | - btrfs_err(fs_info, |
---|
9617 | | -"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx", |
---|
9618 | | - found_key.objectid, |
---|
9619 | | - found_key.offset, flags, |
---|
9620 | | - (BTRFS_BLOCK_GROUP_TYPE_MASK & |
---|
9621 | | - em->map_lookup->type)); |
---|
9622 | | - ret = -EUCLEAN; |
---|
9623 | | - } else { |
---|
9624 | | - ret = 0; |
---|
9625 | | - } |
---|
9626 | | - } |
---|
9627 | | - free_extent_map(em); |
---|
9628 | | - goto out; |
---|
9629 | | - } |
---|
9630 | | - path->slots[0]++; |
---|
9631 | | - } |
---|
9632 | | -out: |
---|
9633 | | - return ret; |
---|
9634 | | -} |
---|
9635 | | - |
---|
9636 | | -void btrfs_put_block_group_cache(struct btrfs_fs_info *info) |
---|
9637 | | -{ |
---|
9638 | | - struct btrfs_block_group_cache *block_group; |
---|
9639 | | - u64 last = 0; |
---|
9640 | | - |
---|
9641 | | - while (1) { |
---|
9642 | | - struct inode *inode; |
---|
9643 | | - |
---|
9644 | | - block_group = btrfs_lookup_first_block_group(info, last); |
---|
9645 | | - while (block_group) { |
---|
9646 | | - wait_block_group_cache_done(block_group); |
---|
9647 | | - spin_lock(&block_group->lock); |
---|
9648 | | - if (block_group->iref) |
---|
9649 | | - break; |
---|
9650 | | - spin_unlock(&block_group->lock); |
---|
9651 | | - block_group = next_block_group(info, block_group); |
---|
9652 | | - } |
---|
9653 | | - if (!block_group) { |
---|
9654 | | - if (last == 0) |
---|
9655 | | - break; |
---|
9656 | | - last = 0; |
---|
9657 | | - continue; |
---|
9658 | | - } |
---|
9659 | | - |
---|
9660 | | - inode = block_group->inode; |
---|
9661 | | - block_group->iref = 0; |
---|
9662 | | - block_group->inode = NULL; |
---|
9663 | | - spin_unlock(&block_group->lock); |
---|
9664 | | - ASSERT(block_group->io_ctl.inode == NULL); |
---|
9665 | | - iput(inode); |
---|
9666 | | - last = block_group->key.objectid + block_group->key.offset; |
---|
9667 | | - btrfs_put_block_group(block_group); |
---|
9668 | | - } |
---|
9669 | | -} |
---|
9670 | | - |
---|
9671 | | -/* |
---|
9672 | | - * Must be called only after stopping all workers, since we could have block |
---|
9673 | | - * group caching kthreads running, and therefore they could race with us if we |
---|
9674 | | - * freed the block groups before stopping them. |
---|
9675 | | - */ |
---|
9676 | | -int btrfs_free_block_groups(struct btrfs_fs_info *info) |
---|
9677 | | -{ |
---|
9678 | | - struct btrfs_block_group_cache *block_group; |
---|
9679 | | - struct btrfs_space_info *space_info; |
---|
9680 | | - struct btrfs_caching_control *caching_ctl; |
---|
9681 | | - struct rb_node *n; |
---|
9682 | | - |
---|
9683 | | - down_write(&info->commit_root_sem); |
---|
9684 | | - while (!list_empty(&info->caching_block_groups)) { |
---|
9685 | | - caching_ctl = list_entry(info->caching_block_groups.next, |
---|
9686 | | - struct btrfs_caching_control, list); |
---|
9687 | | - list_del(&caching_ctl->list); |
---|
9688 | | - put_caching_control(caching_ctl); |
---|
9689 | | - } |
---|
9690 | | - up_write(&info->commit_root_sem); |
---|
9691 | | - |
---|
9692 | | - spin_lock(&info->unused_bgs_lock); |
---|
9693 | | - while (!list_empty(&info->unused_bgs)) { |
---|
9694 | | - block_group = list_first_entry(&info->unused_bgs, |
---|
9695 | | - struct btrfs_block_group_cache, |
---|
9696 | | - bg_list); |
---|
9697 | | - list_del_init(&block_group->bg_list); |
---|
9698 | | - btrfs_put_block_group(block_group); |
---|
9699 | | - } |
---|
9700 | | - spin_unlock(&info->unused_bgs_lock); |
---|
9701 | | - |
---|
9702 | | - spin_lock(&info->block_group_cache_lock); |
---|
9703 | | - while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { |
---|
9704 | | - block_group = rb_entry(n, struct btrfs_block_group_cache, |
---|
9705 | | - cache_node); |
---|
9706 | | - rb_erase(&block_group->cache_node, |
---|
9707 | | - &info->block_group_cache_tree); |
---|
9708 | | - RB_CLEAR_NODE(&block_group->cache_node); |
---|
9709 | | - spin_unlock(&info->block_group_cache_lock); |
---|
9710 | | - |
---|
9711 | | - down_write(&block_group->space_info->groups_sem); |
---|
9712 | | - list_del(&block_group->list); |
---|
9713 | | - up_write(&block_group->space_info->groups_sem); |
---|
9714 | | - |
---|
9715 | | - /* |
---|
9716 | | - * We haven't cached this block group, which means we could |
---|
9717 | | - * possibly have excluded extents on this block group. |
---|
9718 | | - */ |
---|
9719 | | - if (block_group->cached == BTRFS_CACHE_NO || |
---|
9720 | | - block_group->cached == BTRFS_CACHE_ERROR) |
---|
9721 | | - free_excluded_extents(block_group); |
---|
9722 | | - |
---|
9723 | | - btrfs_remove_free_space_cache(block_group); |
---|
9724 | | - ASSERT(block_group->cached != BTRFS_CACHE_STARTED); |
---|
9725 | | - ASSERT(list_empty(&block_group->dirty_list)); |
---|
9726 | | - ASSERT(list_empty(&block_group->io_list)); |
---|
9727 | | - ASSERT(list_empty(&block_group->bg_list)); |
---|
9728 | | - ASSERT(atomic_read(&block_group->count) == 1); |
---|
9729 | | - btrfs_put_block_group(block_group); |
---|
9730 | | - |
---|
9731 | | - spin_lock(&info->block_group_cache_lock); |
---|
9732 | | - } |
---|
9733 | | - spin_unlock(&info->block_group_cache_lock); |
---|
9734 | | - |
---|
9735 | | - /* now that all the block groups are freed, go through and |
---|
9736 | | - * free all the space_info structs. This is only called during |
---|
9737 | | - * the final stages of unmount, and so we know nobody is |
---|
9738 | | - * using them. We call synchronize_rcu() once before we start, |
---|
9739 | | - * just to be on the safe side. |
---|
9740 | | - */ |
---|
9741 | | - synchronize_rcu(); |
---|
9742 | | - |
---|
9743 | | - release_global_block_rsv(info); |
---|
9744 | | - |
---|
9745 | | - while (!list_empty(&info->space_info)) { |
---|
9746 | | - int i; |
---|
9747 | | - |
---|
9748 | | - space_info = list_entry(info->space_info.next, |
---|
9749 | | - struct btrfs_space_info, |
---|
9750 | | - list); |
---|
9751 | | - |
---|
9752 | | - /* |
---|
9753 | | - * Do not hide this behind enospc_debug, this is actually |
---|
9754 | | - * important and indicates a real bug if this happens. |
---|
9755 | | - */ |
---|
9756 | | - if (WARN_ON(space_info->bytes_pinned > 0 || |
---|
9757 | | - space_info->bytes_reserved > 0 || |
---|
9758 | | - space_info->bytes_may_use > 0)) |
---|
9759 | | - dump_space_info(info, space_info, 0, 0); |
---|
9760 | | - list_del(&space_info->list); |
---|
9761 | | - for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { |
---|
9762 | | - struct kobject *kobj; |
---|
9763 | | - kobj = space_info->block_group_kobjs[i]; |
---|
9764 | | - space_info->block_group_kobjs[i] = NULL; |
---|
9765 | | - if (kobj) { |
---|
9766 | | - kobject_del(kobj); |
---|
9767 | | - kobject_put(kobj); |
---|
9768 | | - } |
---|
9769 | | - } |
---|
9770 | | - kobject_del(&space_info->kobj); |
---|
9771 | | - kobject_put(&space_info->kobj); |
---|
9772 | | - } |
---|
9773 | | - return 0; |
---|
9774 | | -} |
---|
9775 | | - |
---|
9776 | | -/* link_block_group will queue up kobjects to add when we're reclaim-safe */ |
---|
9777 | | -void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info) |
---|
9778 | | -{ |
---|
9779 | | - struct btrfs_space_info *space_info; |
---|
9780 | | - struct raid_kobject *rkobj; |
---|
9781 | | - LIST_HEAD(list); |
---|
9782 | | - int index; |
---|
9783 | | - int ret = 0; |
---|
9784 | | - |
---|
9785 | | - spin_lock(&fs_info->pending_raid_kobjs_lock); |
---|
9786 | | - list_splice_init(&fs_info->pending_raid_kobjs, &list); |
---|
9787 | | - spin_unlock(&fs_info->pending_raid_kobjs_lock); |
---|
9788 | | - |
---|
9789 | | - list_for_each_entry(rkobj, &list, list) { |
---|
9790 | | - space_info = __find_space_info(fs_info, rkobj->flags); |
---|
9791 | | - index = btrfs_bg_flags_to_raid_index(rkobj->flags); |
---|
9792 | | - |
---|
9793 | | - ret = kobject_add(&rkobj->kobj, &space_info->kobj, |
---|
9794 | | - "%s", get_raid_name(index)); |
---|
9795 | | - if (ret) { |
---|
9796 | | - kobject_put(&rkobj->kobj); |
---|
9797 | | - break; |
---|
9798 | | - } |
---|
9799 | | - } |
---|
9800 | | - if (ret) |
---|
9801 | | - btrfs_warn(fs_info, |
---|
9802 | | - "failed to add kobject for block cache, ignoring"); |
---|
9803 | | -} |
---|
9804 | | - |
---|
9805 | | -static void link_block_group(struct btrfs_block_group_cache *cache) |
---|
9806 | | -{ |
---|
9807 | | - struct btrfs_space_info *space_info = cache->space_info; |
---|
9808 | | - struct btrfs_fs_info *fs_info = cache->fs_info; |
---|
9809 | | - int index = btrfs_bg_flags_to_raid_index(cache->flags); |
---|
9810 | | - bool first = false; |
---|
9811 | | - |
---|
9812 | | - down_write(&space_info->groups_sem); |
---|
9813 | | - if (list_empty(&space_info->block_groups[index])) |
---|
9814 | | - first = true; |
---|
9815 | | - list_add_tail(&cache->list, &space_info->block_groups[index]); |
---|
9816 | | - up_write(&space_info->groups_sem); |
---|
9817 | | - |
---|
9818 | | - if (first) { |
---|
9819 | | - struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS); |
---|
9820 | | - if (!rkobj) { |
---|
9821 | | - btrfs_warn(cache->fs_info, |
---|
9822 | | - "couldn't alloc memory for raid level kobject"); |
---|
9823 | | - return; |
---|
9824 | | - } |
---|
9825 | | - rkobj->flags = cache->flags; |
---|
9826 | | - kobject_init(&rkobj->kobj, &btrfs_raid_ktype); |
---|
9827 | | - |
---|
9828 | | - spin_lock(&fs_info->pending_raid_kobjs_lock); |
---|
9829 | | - list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs); |
---|
9830 | | - spin_unlock(&fs_info->pending_raid_kobjs_lock); |
---|
9831 | | - space_info->block_group_kobjs[index] = &rkobj->kobj; |
---|
9832 | | - } |
---|
9833 | | -} |
---|
9834 | | - |
---|
9835 | | -static struct btrfs_block_group_cache * |
---|
9836 | | -btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info, |
---|
9837 | | - u64 start, u64 size) |
---|
9838 | | -{ |
---|
9839 | | - struct btrfs_block_group_cache *cache; |
---|
9840 | | - |
---|
9841 | | - cache = kzalloc(sizeof(*cache), GFP_NOFS); |
---|
9842 | | - if (!cache) |
---|
9843 | | - return NULL; |
---|
9844 | | - |
---|
9845 | | - cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), |
---|
9846 | | - GFP_NOFS); |
---|
9847 | | - if (!cache->free_space_ctl) { |
---|
9848 | | - kfree(cache); |
---|
9849 | | - return NULL; |
---|
9850 | | - } |
---|
9851 | | - |
---|
9852 | | - cache->key.objectid = start; |
---|
9853 | | - cache->key.offset = size; |
---|
9854 | | - cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
---|
9855 | | - |
---|
9856 | | - cache->fs_info = fs_info; |
---|
9857 | | - cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start); |
---|
9858 | | - set_free_space_tree_thresholds(cache); |
---|
9859 | | - |
---|
9860 | | - atomic_set(&cache->count, 1); |
---|
9861 | | - spin_lock_init(&cache->lock); |
---|
9862 | | - init_rwsem(&cache->data_rwsem); |
---|
9863 | | - INIT_LIST_HEAD(&cache->list); |
---|
9864 | | - INIT_LIST_HEAD(&cache->cluster_list); |
---|
9865 | | - INIT_LIST_HEAD(&cache->bg_list); |
---|
9866 | | - INIT_LIST_HEAD(&cache->ro_list); |
---|
9867 | | - INIT_LIST_HEAD(&cache->dirty_list); |
---|
9868 | | - INIT_LIST_HEAD(&cache->io_list); |
---|
9869 | | - btrfs_init_free_space_ctl(cache); |
---|
9870 | | - atomic_set(&cache->trimming, 0); |
---|
9871 | | - mutex_init(&cache->free_space_lock); |
---|
9872 | | - btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root); |
---|
9873 | | - |
---|
9874 | | - return cache; |
---|
9875 | | -} |
---|
9876 | | - |
---|
9877 | | - |
---|
9878 | | -/* |
---|
9879 | | - * Iterate all chunks and verify that each of them has the corresponding block |
---|
9880 | | - * group |
---|
9881 | | - */ |
---|
9882 | | -static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) |
---|
9883 | | -{ |
---|
9884 | | - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; |
---|
9885 | | - struct extent_map *em; |
---|
9886 | | - struct btrfs_block_group_cache *bg; |
---|
9887 | | - u64 start = 0; |
---|
9888 | | - int ret = 0; |
---|
9889 | | - |
---|
9890 | | - while (1) { |
---|
9891 | | - read_lock(&map_tree->map_tree.lock); |
---|
9892 | | - /* |
---|
9893 | | - * lookup_extent_mapping will return the first extent map |
---|
9894 | | - * intersecting the range, so setting @len to 1 is enough to |
---|
9895 | | - * get the first chunk. |
---|
9896 | | - */ |
---|
9897 | | - em = lookup_extent_mapping(&map_tree->map_tree, start, 1); |
---|
9898 | | - read_unlock(&map_tree->map_tree.lock); |
---|
9899 | | - if (!em) |
---|
9900 | | - break; |
---|
9901 | | - |
---|
9902 | | - bg = btrfs_lookup_block_group(fs_info, em->start); |
---|
9903 | | - if (!bg) { |
---|
9904 | | - btrfs_err(fs_info, |
---|
9905 | | - "chunk start=%llu len=%llu doesn't have corresponding block group", |
---|
9906 | | - em->start, em->len); |
---|
9907 | | - ret = -EUCLEAN; |
---|
9908 | | - free_extent_map(em); |
---|
9909 | | - break; |
---|
9910 | | - } |
---|
9911 | | - if (bg->key.objectid != em->start || |
---|
9912 | | - bg->key.offset != em->len || |
---|
9913 | | - (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) != |
---|
9914 | | - (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { |
---|
9915 | | - btrfs_err(fs_info, |
---|
9916 | | -"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx", |
---|
9917 | | - em->start, em->len, |
---|
9918 | | - em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK, |
---|
9919 | | - bg->key.objectid, bg->key.offset, |
---|
9920 | | - bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK); |
---|
9921 | | - ret = -EUCLEAN; |
---|
9922 | | - free_extent_map(em); |
---|
9923 | | - btrfs_put_block_group(bg); |
---|
9924 | | - break; |
---|
9925 | | - } |
---|
9926 | | - start = em->start + em->len; |
---|
9927 | | - free_extent_map(em); |
---|
9928 | | - btrfs_put_block_group(bg); |
---|
9929 | | - } |
---|
9930 | | - return ret; |
---|
9931 | | -} |
---|
9932 | | - |
---|
9933 | | -int btrfs_read_block_groups(struct btrfs_fs_info *info) |
---|
9934 | | -{ |
---|
9935 | | - struct btrfs_path *path; |
---|
9936 | | - int ret; |
---|
9937 | | - struct btrfs_block_group_cache *cache; |
---|
9938 | | - struct btrfs_space_info *space_info; |
---|
9939 | | - struct btrfs_key key; |
---|
9940 | | - struct btrfs_key found_key; |
---|
9941 | | - struct extent_buffer *leaf; |
---|
9942 | | - int need_clear = 0; |
---|
9943 | | - u64 cache_gen; |
---|
9944 | | - u64 feature; |
---|
9945 | | - int mixed; |
---|
9946 | | - |
---|
9947 | | - feature = btrfs_super_incompat_flags(info->super_copy); |
---|
9948 | | - mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS); |
---|
9949 | | - |
---|
9950 | | - key.objectid = 0; |
---|
9951 | | - key.offset = 0; |
---|
9952 | | - key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
---|
9953 | | - path = btrfs_alloc_path(); |
---|
9954 | | - if (!path) |
---|
9955 | | - return -ENOMEM; |
---|
9956 | | - path->reada = READA_FORWARD; |
---|
9957 | | - |
---|
9958 | | - cache_gen = btrfs_super_cache_generation(info->super_copy); |
---|
9959 | | - if (btrfs_test_opt(info, SPACE_CACHE) && |
---|
9960 | | - btrfs_super_generation(info->super_copy) != cache_gen) |
---|
9961 | | - need_clear = 1; |
---|
9962 | | - if (btrfs_test_opt(info, CLEAR_CACHE)) |
---|
9963 | | - need_clear = 1; |
---|
9964 | | - |
---|
9965 | | - while (1) { |
---|
9966 | | - ret = find_first_block_group(info, path, &key); |
---|
9967 | | - if (ret > 0) |
---|
9968 | | - break; |
---|
9969 | | - if (ret != 0) |
---|
9970 | | - goto error; |
---|
9971 | | - |
---|
9972 | | - leaf = path->nodes[0]; |
---|
9973 | | - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
---|
9974 | | - |
---|
9975 | | - cache = btrfs_create_block_group_cache(info, found_key.objectid, |
---|
9976 | | - found_key.offset); |
---|
9977 | | - if (!cache) { |
---|
9978 | | - ret = -ENOMEM; |
---|
9979 | | - goto error; |
---|
9980 | | - } |
---|
9981 | | - |
---|
9982 | | - if (need_clear) { |
---|
9983 | | - /* |
---|
9984 | | - * When we mount with old space cache, we need to |
---|
9985 | | - * set BTRFS_DC_CLEAR and set dirty flag. |
---|
9986 | | - * |
---|
9987 | | - * a) Setting 'BTRFS_DC_CLEAR' makes sure that we |
---|
9988 | | - * truncate the old free space cache inode and |
---|
9989 | | - * setup a new one. |
---|
9990 | | - * b) Setting 'dirty flag' makes sure that we flush |
---|
9991 | | - * the new space cache info onto disk. |
---|
9992 | | - */ |
---|
9993 | | - if (btrfs_test_opt(info, SPACE_CACHE)) |
---|
9994 | | - cache->disk_cache_state = BTRFS_DC_CLEAR; |
---|
9995 | | - } |
---|
9996 | | - |
---|
9997 | | - read_extent_buffer(leaf, &cache->item, |
---|
9998 | | - btrfs_item_ptr_offset(leaf, path->slots[0]), |
---|
9999 | | - sizeof(cache->item)); |
---|
10000 | | - cache->flags = btrfs_block_group_flags(&cache->item); |
---|
10001 | | - if (!mixed && |
---|
10002 | | - ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) && |
---|
10003 | | - (cache->flags & BTRFS_BLOCK_GROUP_DATA))) { |
---|
10004 | | - btrfs_err(info, |
---|
10005 | | -"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups", |
---|
10006 | | - cache->key.objectid); |
---|
10007 | | - btrfs_put_block_group(cache); |
---|
10008 | | - ret = -EINVAL; |
---|
10009 | | - goto error; |
---|
10010 | | - } |
---|
10011 | | - |
---|
10012 | | - key.objectid = found_key.objectid + found_key.offset; |
---|
10013 | | - btrfs_release_path(path); |
---|
10014 | | - |
---|
10015 | | - /* |
---|
10016 | | - * We need to exclude the super stripes now so that the space |
---|
10017 | | - * info has super bytes accounted for, otherwise we'll think |
---|
10018 | | - * we have more space than we actually do. |
---|
10019 | | - */ |
---|
10020 | | - ret = exclude_super_stripes(cache); |
---|
10021 | | - if (ret) { |
---|
10022 | | - /* |
---|
10023 | | - * We may have excluded something, so call this just in |
---|
10024 | | - * case. |
---|
10025 | | - */ |
---|
10026 | | - free_excluded_extents(cache); |
---|
10027 | | - btrfs_put_block_group(cache); |
---|
10028 | | - goto error; |
---|
10029 | | - } |
---|
10030 | | - |
---|
10031 | | - /* |
---|
10032 | | - * check for two cases, either we are full, and therefore |
---|
10033 | | - * don't need to bother with the caching work since we won't |
---|
10034 | | - * find any space, or we are empty, and we can just add all |
---|
10035 | | - * the space in and be done with it. This saves us _alot_ of |
---|
10036 | | - * time, particularly in the full case. |
---|
10037 | | - */ |
---|
10038 | | - if (found_key.offset == btrfs_block_group_used(&cache->item)) { |
---|
10039 | | - cache->last_byte_to_unpin = (u64)-1; |
---|
10040 | | - cache->cached = BTRFS_CACHE_FINISHED; |
---|
10041 | | - free_excluded_extents(cache); |
---|
10042 | | - } else if (btrfs_block_group_used(&cache->item) == 0) { |
---|
10043 | | - cache->last_byte_to_unpin = (u64)-1; |
---|
10044 | | - cache->cached = BTRFS_CACHE_FINISHED; |
---|
10045 | | - add_new_free_space(cache, found_key.objectid, |
---|
10046 | | - found_key.objectid + |
---|
10047 | | - found_key.offset); |
---|
10048 | | - free_excluded_extents(cache); |
---|
10049 | | - } |
---|
10050 | | - |
---|
10051 | | - ret = btrfs_add_block_group_cache(info, cache); |
---|
10052 | | - if (ret) { |
---|
10053 | | - btrfs_remove_free_space_cache(cache); |
---|
10054 | | - btrfs_put_block_group(cache); |
---|
10055 | | - goto error; |
---|
10056 | | - } |
---|
10057 | | - |
---|
10058 | | - trace_btrfs_add_block_group(info, cache, 0); |
---|
10059 | | - update_space_info(info, cache->flags, found_key.offset, |
---|
10060 | | - btrfs_block_group_used(&cache->item), |
---|
10061 | | - cache->bytes_super, &space_info); |
---|
10062 | | - |
---|
10063 | | - cache->space_info = space_info; |
---|
10064 | | - |
---|
10065 | | - link_block_group(cache); |
---|
10066 | | - |
---|
10067 | | - set_avail_alloc_bits(info, cache->flags); |
---|
10068 | | - if (btrfs_chunk_readonly(info, cache->key.objectid)) { |
---|
10069 | | - inc_block_group_ro(cache, 1); |
---|
10070 | | - } else if (btrfs_block_group_used(&cache->item) == 0) { |
---|
10071 | | - ASSERT(list_empty(&cache->bg_list)); |
---|
10072 | | - btrfs_mark_bg_unused(cache); |
---|
10073 | | - } |
---|
10074 | | - } |
---|
10075 | | - |
---|
10076 | | - list_for_each_entry_rcu(space_info, &info->space_info, list) { |
---|
10077 | | - if (!(get_alloc_profile(info, space_info->flags) & |
---|
10078 | | - (BTRFS_BLOCK_GROUP_RAID10 | |
---|
10079 | | - BTRFS_BLOCK_GROUP_RAID1 | |
---|
10080 | | - BTRFS_BLOCK_GROUP_RAID5 | |
---|
10081 | | - BTRFS_BLOCK_GROUP_RAID6 | |
---|
10082 | | - BTRFS_BLOCK_GROUP_DUP))) |
---|
10083 | | - continue; |
---|
10084 | | - /* |
---|
10085 | | - * avoid allocating from un-mirrored block group if there are |
---|
10086 | | - * mirrored block groups. |
---|
10087 | | - */ |
---|
10088 | | - list_for_each_entry(cache, |
---|
10089 | | - &space_info->block_groups[BTRFS_RAID_RAID0], |
---|
10090 | | - list) |
---|
10091 | | - inc_block_group_ro(cache, 1); |
---|
10092 | | - list_for_each_entry(cache, |
---|
10093 | | - &space_info->block_groups[BTRFS_RAID_SINGLE], |
---|
10094 | | - list) |
---|
10095 | | - inc_block_group_ro(cache, 1); |
---|
10096 | | - } |
---|
10097 | | - |
---|
10098 | | - btrfs_add_raid_kobjects(info); |
---|
10099 | | - init_global_block_rsv(info); |
---|
10100 | | - ret = check_chunk_block_group_mappings(info); |
---|
10101 | | -error: |
---|
10102 | | - btrfs_free_path(path); |
---|
10103 | | - return ret; |
---|
10104 | | -} |
---|
10105 | | - |
---|
10106 | | -void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) |
---|
10107 | | -{ |
---|
10108 | | - struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
10109 | | - struct btrfs_block_group_cache *block_group; |
---|
10110 | | - struct btrfs_root *extent_root = fs_info->extent_root; |
---|
10111 | | - struct btrfs_block_group_item item; |
---|
10112 | | - struct btrfs_key key; |
---|
10113 | | - int ret = 0; |
---|
10114 | | - |
---|
10115 | | - if (!trans->can_flush_pending_bgs) |
---|
10116 | | - return; |
---|
10117 | | - |
---|
10118 | | - while (!list_empty(&trans->new_bgs)) { |
---|
10119 | | - block_group = list_first_entry(&trans->new_bgs, |
---|
10120 | | - struct btrfs_block_group_cache, |
---|
10121 | | - bg_list); |
---|
10122 | | - if (ret) |
---|
10123 | | - goto next; |
---|
10124 | | - |
---|
10125 | | - spin_lock(&block_group->lock); |
---|
10126 | | - memcpy(&item, &block_group->item, sizeof(item)); |
---|
10127 | | - memcpy(&key, &block_group->key, sizeof(key)); |
---|
10128 | | - spin_unlock(&block_group->lock); |
---|
10129 | | - |
---|
10130 | | - ret = btrfs_insert_item(trans, extent_root, &key, &item, |
---|
10131 | | - sizeof(item)); |
---|
10132 | | - if (ret) |
---|
10133 | | - btrfs_abort_transaction(trans, ret); |
---|
10134 | | - ret = btrfs_finish_chunk_alloc(trans, key.objectid, key.offset); |
---|
10135 | | - if (ret) |
---|
10136 | | - btrfs_abort_transaction(trans, ret); |
---|
10137 | | - add_block_group_free_space(trans, block_group); |
---|
10138 | | - /* already aborted the transaction if it failed. */ |
---|
10139 | | -next: |
---|
10140 | | - list_del_init(&block_group->bg_list); |
---|
10141 | | - } |
---|
10142 | | - btrfs_trans_release_chunk_metadata(trans); |
---|
10143 | | -} |
---|
10144 | | - |
---|
10145 | | -int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, |
---|
10146 | | - u64 type, u64 chunk_offset, u64 size) |
---|
10147 | | -{ |
---|
10148 | | - struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
10149 | | - struct btrfs_block_group_cache *cache; |
---|
10150 | | - int ret; |
---|
10151 | | - |
---|
10152 | | - btrfs_set_log_full_commit(fs_info, trans); |
---|
10153 | | - |
---|
10154 | | - cache = btrfs_create_block_group_cache(fs_info, chunk_offset, size); |
---|
10155 | | - if (!cache) |
---|
10156 | | - return -ENOMEM; |
---|
10157 | | - |
---|
10158 | | - btrfs_set_block_group_used(&cache->item, bytes_used); |
---|
10159 | | - btrfs_set_block_group_chunk_objectid(&cache->item, |
---|
10160 | | - BTRFS_FIRST_CHUNK_TREE_OBJECTID); |
---|
10161 | | - btrfs_set_block_group_flags(&cache->item, type); |
---|
10162 | | - |
---|
10163 | | - cache->flags = type; |
---|
10164 | | - cache->last_byte_to_unpin = (u64)-1; |
---|
10165 | | - cache->cached = BTRFS_CACHE_FINISHED; |
---|
10166 | | - cache->needs_free_space = 1; |
---|
10167 | | - ret = exclude_super_stripes(cache); |
---|
10168 | | - if (ret) { |
---|
10169 | | - /* |
---|
10170 | | - * We may have excluded something, so call this just in |
---|
10171 | | - * case. |
---|
10172 | | - */ |
---|
10173 | | - free_excluded_extents(cache); |
---|
10174 | | - btrfs_put_block_group(cache); |
---|
10175 | | - return ret; |
---|
10176 | | - } |
---|
10177 | | - |
---|
10178 | | - add_new_free_space(cache, chunk_offset, chunk_offset + size); |
---|
10179 | | - |
---|
10180 | | - free_excluded_extents(cache); |
---|
10181 | | - |
---|
10182 | | -#ifdef CONFIG_BTRFS_DEBUG |
---|
10183 | | - if (btrfs_should_fragment_free_space(cache)) { |
---|
10184 | | - u64 new_bytes_used = size - bytes_used; |
---|
10185 | | - |
---|
10186 | | - bytes_used += new_bytes_used >> 1; |
---|
10187 | | - fragment_free_space(cache); |
---|
10188 | | - } |
---|
10189 | | -#endif |
---|
10190 | | - /* |
---|
10191 | | - * Ensure the corresponding space_info object is created and |
---|
10192 | | - * assigned to our block group. We want our bg to be added to the rbtree |
---|
10193 | | - * with its ->space_info set. |
---|
10194 | | - */ |
---|
10195 | | - cache->space_info = __find_space_info(fs_info, cache->flags); |
---|
10196 | | - ASSERT(cache->space_info); |
---|
10197 | | - |
---|
10198 | | - ret = btrfs_add_block_group_cache(fs_info, cache); |
---|
10199 | | - if (ret) { |
---|
10200 | | - btrfs_remove_free_space_cache(cache); |
---|
10201 | | - btrfs_put_block_group(cache); |
---|
10202 | | - return ret; |
---|
10203 | | - } |
---|
10204 | | - |
---|
10205 | | - /* |
---|
10206 | | - * Now that our block group has its ->space_info set and is inserted in |
---|
10207 | | - * the rbtree, update the space info's counters. |
---|
10208 | | - */ |
---|
10209 | | - trace_btrfs_add_block_group(fs_info, cache, 1); |
---|
10210 | | - update_space_info(fs_info, cache->flags, size, bytes_used, |
---|
10211 | | - cache->bytes_super, &cache->space_info); |
---|
10212 | | - update_global_block_rsv(fs_info); |
---|
10213 | | - |
---|
10214 | | - link_block_group(cache); |
---|
10215 | | - |
---|
10216 | | - list_add_tail(&cache->bg_list, &trans->new_bgs); |
---|
10217 | | - |
---|
10218 | | - set_avail_alloc_bits(fs_info, type); |
---|
10219 | | - return 0; |
---|
10220 | | -} |
---|
10221 | | - |
---|
10222 | | -static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) |
---|
10223 | | -{ |
---|
10224 | | - u64 extra_flags = chunk_to_extended(flags) & |
---|
10225 | | - BTRFS_EXTENDED_PROFILE_MASK; |
---|
10226 | | - |
---|
10227 | | - write_seqlock(&fs_info->profiles_lock); |
---|
10228 | | - if (flags & BTRFS_BLOCK_GROUP_DATA) |
---|
10229 | | - fs_info->avail_data_alloc_bits &= ~extra_flags; |
---|
10230 | | - if (flags & BTRFS_BLOCK_GROUP_METADATA) |
---|
10231 | | - fs_info->avail_metadata_alloc_bits &= ~extra_flags; |
---|
10232 | | - if (flags & BTRFS_BLOCK_GROUP_SYSTEM) |
---|
10233 | | - fs_info->avail_system_alloc_bits &= ~extra_flags; |
---|
10234 | | - write_sequnlock(&fs_info->profiles_lock); |
---|
10235 | | -} |
---|
10236 | | - |
---|
10237 | | -int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
---|
10238 | | - u64 group_start, struct extent_map *em) |
---|
10239 | | -{ |
---|
10240 | | - struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
10241 | | - struct btrfs_root *root = fs_info->extent_root; |
---|
10242 | | - struct btrfs_path *path; |
---|
10243 | | - struct btrfs_block_group_cache *block_group; |
---|
10244 | | - struct btrfs_free_cluster *cluster; |
---|
10245 | | - struct btrfs_root *tree_root = fs_info->tree_root; |
---|
10246 | | - struct btrfs_key key; |
---|
10247 | | - struct inode *inode; |
---|
10248 | | - struct kobject *kobj = NULL; |
---|
10249 | | - int ret; |
---|
10250 | | - int index; |
---|
10251 | | - int factor; |
---|
10252 | | - struct btrfs_caching_control *caching_ctl = NULL; |
---|
10253 | | - bool remove_em; |
---|
10254 | | - |
---|
10255 | | - block_group = btrfs_lookup_block_group(fs_info, group_start); |
---|
10256 | | - BUG_ON(!block_group); |
---|
10257 | | - BUG_ON(!block_group->ro); |
---|
10258 | | - |
---|
10259 | | - trace_btrfs_remove_block_group(block_group); |
---|
10260 | | - /* |
---|
10261 | | - * Free the reserved super bytes from this block group before |
---|
10262 | | - * remove it. |
---|
10263 | | - */ |
---|
10264 | | - free_excluded_extents(block_group); |
---|
10265 | | - btrfs_free_ref_tree_range(fs_info, block_group->key.objectid, |
---|
10266 | | - block_group->key.offset); |
---|
10267 | | - |
---|
10268 | | - memcpy(&key, &block_group->key, sizeof(key)); |
---|
10269 | | - index = btrfs_bg_flags_to_raid_index(block_group->flags); |
---|
10270 | | - factor = btrfs_bg_type_to_factor(block_group->flags); |
---|
10271 | | - |
---|
10272 | | - /* make sure this block group isn't part of an allocation cluster */ |
---|
10273 | | - cluster = &fs_info->data_alloc_cluster; |
---|
10274 | | - spin_lock(&cluster->refill_lock); |
---|
10275 | | - btrfs_return_cluster_to_free_space(block_group, cluster); |
---|
10276 | | - spin_unlock(&cluster->refill_lock); |
---|
10277 | | - |
---|
10278 | | - /* |
---|
10279 | | - * make sure this block group isn't part of a metadata |
---|
10280 | | - * allocation cluster |
---|
10281 | | - */ |
---|
10282 | | - cluster = &fs_info->meta_alloc_cluster; |
---|
10283 | | - spin_lock(&cluster->refill_lock); |
---|
10284 | | - btrfs_return_cluster_to_free_space(block_group, cluster); |
---|
10285 | | - spin_unlock(&cluster->refill_lock); |
---|
10286 | | - |
---|
10287 | | - path = btrfs_alloc_path(); |
---|
10288 | | - if (!path) { |
---|
10289 | | - ret = -ENOMEM; |
---|
10290 | | - goto out; |
---|
10291 | | - } |
---|
10292 | | - |
---|
10293 | | - /* |
---|
10294 | | - * get the inode first so any iput calls done for the io_list |
---|
10295 | | - * aren't the final iput (no unlinks allowed now) |
---|
10296 | | - */ |
---|
10297 | | - inode = lookup_free_space_inode(fs_info, block_group, path); |
---|
10298 | | - |
---|
10299 | | - mutex_lock(&trans->transaction->cache_write_mutex); |
---|
10300 | | - /* |
---|
10301 | | - * make sure our free spache cache IO is done before remove the |
---|
10302 | | - * free space inode |
---|
10303 | | - */ |
---|
10304 | | - spin_lock(&trans->transaction->dirty_bgs_lock); |
---|
10305 | | - if (!list_empty(&block_group->io_list)) { |
---|
10306 | | - list_del_init(&block_group->io_list); |
---|
10307 | | - |
---|
10308 | | - WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode); |
---|
10309 | | - |
---|
10310 | | - spin_unlock(&trans->transaction->dirty_bgs_lock); |
---|
10311 | | - btrfs_wait_cache_io(trans, block_group, path); |
---|
10312 | | - btrfs_put_block_group(block_group); |
---|
10313 | | - spin_lock(&trans->transaction->dirty_bgs_lock); |
---|
10314 | | - } |
---|
10315 | | - |
---|
10316 | | - if (!list_empty(&block_group->dirty_list)) { |
---|
10317 | | - list_del_init(&block_group->dirty_list); |
---|
10318 | | - btrfs_put_block_group(block_group); |
---|
10319 | | - } |
---|
10320 | | - spin_unlock(&trans->transaction->dirty_bgs_lock); |
---|
10321 | | - mutex_unlock(&trans->transaction->cache_write_mutex); |
---|
10322 | | - |
---|
10323 | | - if (!IS_ERR(inode)) { |
---|
10324 | | - ret = btrfs_orphan_add(trans, BTRFS_I(inode)); |
---|
10325 | | - if (ret) { |
---|
10326 | | - btrfs_add_delayed_iput(inode); |
---|
10327 | | - goto out; |
---|
10328 | | - } |
---|
10329 | | - clear_nlink(inode); |
---|
10330 | | - /* One for the block groups ref */ |
---|
10331 | | - spin_lock(&block_group->lock); |
---|
10332 | | - if (block_group->iref) { |
---|
10333 | | - block_group->iref = 0; |
---|
10334 | | - block_group->inode = NULL; |
---|
10335 | | - spin_unlock(&block_group->lock); |
---|
10336 | | - iput(inode); |
---|
10337 | | - } else { |
---|
10338 | | - spin_unlock(&block_group->lock); |
---|
10339 | | - } |
---|
10340 | | - /* One for our lookup ref */ |
---|
10341 | | - btrfs_add_delayed_iput(inode); |
---|
10342 | | - } |
---|
10343 | | - |
---|
10344 | | - key.objectid = BTRFS_FREE_SPACE_OBJECTID; |
---|
10345 | | - key.offset = block_group->key.objectid; |
---|
10346 | | - key.type = 0; |
---|
10347 | | - |
---|
10348 | | - ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); |
---|
10349 | | - if (ret < 0) |
---|
10350 | | - goto out; |
---|
10351 | | - if (ret > 0) |
---|
10352 | | - btrfs_release_path(path); |
---|
10353 | | - if (ret == 0) { |
---|
10354 | | - ret = btrfs_del_item(trans, tree_root, path); |
---|
10355 | | - if (ret) |
---|
10356 | | - goto out; |
---|
10357 | | - btrfs_release_path(path); |
---|
10358 | | - } |
---|
10359 | | - |
---|
10360 | | - spin_lock(&fs_info->block_group_cache_lock); |
---|
10361 | | - rb_erase(&block_group->cache_node, |
---|
10362 | | - &fs_info->block_group_cache_tree); |
---|
10363 | | - RB_CLEAR_NODE(&block_group->cache_node); |
---|
10364 | | - |
---|
10365 | | - /* Once for the block groups rbtree */ |
---|
10366 | | - btrfs_put_block_group(block_group); |
---|
10367 | | - |
---|
10368 | | - if (fs_info->first_logical_byte == block_group->key.objectid) |
---|
10369 | | - fs_info->first_logical_byte = (u64)-1; |
---|
10370 | | - spin_unlock(&fs_info->block_group_cache_lock); |
---|
10371 | | - |
---|
10372 | | - down_write(&block_group->space_info->groups_sem); |
---|
10373 | | - /* |
---|
10374 | | - * we must use list_del_init so people can check to see if they |
---|
10375 | | - * are still on the list after taking the semaphore |
---|
10376 | | - */ |
---|
10377 | | - list_del_init(&block_group->list); |
---|
10378 | | - if (list_empty(&block_group->space_info->block_groups[index])) { |
---|
10379 | | - kobj = block_group->space_info->block_group_kobjs[index]; |
---|
10380 | | - block_group->space_info->block_group_kobjs[index] = NULL; |
---|
10381 | | - clear_avail_alloc_bits(fs_info, block_group->flags); |
---|
10382 | | - } |
---|
10383 | | - up_write(&block_group->space_info->groups_sem); |
---|
10384 | | - if (kobj) { |
---|
10385 | | - kobject_del(kobj); |
---|
10386 | | - kobject_put(kobj); |
---|
10387 | | - } |
---|
10388 | | - |
---|
10389 | | - if (block_group->has_caching_ctl) |
---|
10390 | | - caching_ctl = get_caching_control(block_group); |
---|
10391 | | - if (block_group->cached == BTRFS_CACHE_STARTED) |
---|
10392 | | - wait_block_group_cache_done(block_group); |
---|
10393 | | - if (block_group->has_caching_ctl) { |
---|
10394 | | - down_write(&fs_info->commit_root_sem); |
---|
10395 | | - if (!caching_ctl) { |
---|
10396 | | - struct btrfs_caching_control *ctl; |
---|
10397 | | - |
---|
10398 | | - list_for_each_entry(ctl, |
---|
10399 | | - &fs_info->caching_block_groups, list) |
---|
10400 | | - if (ctl->block_group == block_group) { |
---|
10401 | | - caching_ctl = ctl; |
---|
10402 | | - refcount_inc(&caching_ctl->count); |
---|
10403 | | - break; |
---|
10404 | | - } |
---|
10405 | | - } |
---|
10406 | | - if (caching_ctl) |
---|
10407 | | - list_del_init(&caching_ctl->list); |
---|
10408 | | - up_write(&fs_info->commit_root_sem); |
---|
10409 | | - if (caching_ctl) { |
---|
10410 | | - /* Once for the caching bgs list and once for us. */ |
---|
10411 | | - put_caching_control(caching_ctl); |
---|
10412 | | - put_caching_control(caching_ctl); |
---|
10413 | | - } |
---|
10414 | | - } |
---|
10415 | | - |
---|
10416 | | - spin_lock(&trans->transaction->dirty_bgs_lock); |
---|
10417 | | - if (!list_empty(&block_group->dirty_list)) { |
---|
10418 | | - WARN_ON(1); |
---|
10419 | | - } |
---|
10420 | | - if (!list_empty(&block_group->io_list)) { |
---|
10421 | | - WARN_ON(1); |
---|
10422 | | - } |
---|
10423 | | - spin_unlock(&trans->transaction->dirty_bgs_lock); |
---|
10424 | | - btrfs_remove_free_space_cache(block_group); |
---|
10425 | | - |
---|
10426 | | - spin_lock(&block_group->space_info->lock); |
---|
10427 | | - list_del_init(&block_group->ro_list); |
---|
10428 | | - |
---|
10429 | | - if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { |
---|
10430 | | - WARN_ON(block_group->space_info->total_bytes |
---|
10431 | | - < block_group->key.offset); |
---|
10432 | | - WARN_ON(block_group->space_info->bytes_readonly |
---|
10433 | | - < block_group->key.offset); |
---|
10434 | | - WARN_ON(block_group->space_info->disk_total |
---|
10435 | | - < block_group->key.offset * factor); |
---|
10436 | | - } |
---|
10437 | | - block_group->space_info->total_bytes -= block_group->key.offset; |
---|
10438 | | - block_group->space_info->bytes_readonly -= block_group->key.offset; |
---|
10439 | | - block_group->space_info->disk_total -= block_group->key.offset * factor; |
---|
10440 | | - |
---|
10441 | | - spin_unlock(&block_group->space_info->lock); |
---|
10442 | | - |
---|
10443 | | - memcpy(&key, &block_group->key, sizeof(key)); |
---|
10444 | | - |
---|
10445 | | - mutex_lock(&fs_info->chunk_mutex); |
---|
10446 | | - if (!list_empty(&em->list)) { |
---|
10447 | | - /* We're in the transaction->pending_chunks list. */ |
---|
10448 | | - free_extent_map(em); |
---|
10449 | | - } |
---|
10450 | | - spin_lock(&block_group->lock); |
---|
10451 | | - block_group->removed = 1; |
---|
10452 | | - /* |
---|
10453 | | - * At this point trimming can't start on this block group, because we |
---|
10454 | | - * removed the block group from the tree fs_info->block_group_cache_tree |
---|
10455 | | - * so no one can't find it anymore and even if someone already got this |
---|
10456 | | - * block group before we removed it from the rbtree, they have already |
---|
10457 | | - * incremented block_group->trimming - if they didn't, they won't find |
---|
10458 | | - * any free space entries because we already removed them all when we |
---|
10459 | | - * called btrfs_remove_free_space_cache(). |
---|
10460 | | - * |
---|
10461 | | - * And we must not remove the extent map from the fs_info->mapping_tree |
---|
10462 | | - * to prevent the same logical address range and physical device space |
---|
10463 | | - * ranges from being reused for a new block group. This is because our |
---|
10464 | | - * fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is |
---|
10465 | | - * completely transactionless, so while it is trimming a range the |
---|
10466 | | - * currently running transaction might finish and a new one start, |
---|
10467 | | - * allowing for new block groups to be created that can reuse the same |
---|
10468 | | - * physical device locations unless we take this special care. |
---|
10469 | | - * |
---|
10470 | | - * There may also be an implicit trim operation if the file system |
---|
10471 | | - * is mounted with -odiscard. The same protections must remain |
---|
10472 | | - * in place until the extents have been discarded completely when |
---|
10473 | | - * the transaction commit has completed. |
---|
10474 | | - */ |
---|
10475 | | - remove_em = (atomic_read(&block_group->trimming) == 0); |
---|
10476 | | - /* |
---|
10477 | | - * Make sure a trimmer task always sees the em in the pinned_chunks list |
---|
10478 | | - * if it sees block_group->removed == 1 (needs to lock block_group->lock |
---|
10479 | | - * before checking block_group->removed). |
---|
10480 | | - */ |
---|
10481 | | - if (!remove_em) { |
---|
10482 | | - /* |
---|
10483 | | - * Our em might be in trans->transaction->pending_chunks which |
---|
10484 | | - * is protected by fs_info->chunk_mutex ([lock|unlock]_chunks), |
---|
10485 | | - * and so is the fs_info->pinned_chunks list. |
---|
10486 | | - * |
---|
10487 | | - * So at this point we must be holding the chunk_mutex to avoid |
---|
10488 | | - * any races with chunk allocation (more specifically at |
---|
10489 | | - * volumes.c:contains_pending_extent()), to ensure it always |
---|
10490 | | - * sees the em, either in the pending_chunks list or in the |
---|
10491 | | - * pinned_chunks list. |
---|
10492 | | - */ |
---|
10493 | | - list_move_tail(&em->list, &fs_info->pinned_chunks); |
---|
10494 | | - } |
---|
10495 | | - spin_unlock(&block_group->lock); |
---|
10496 | | - |
---|
10497 | | - mutex_unlock(&fs_info->chunk_mutex); |
---|
10498 | | - |
---|
10499 | | - ret = remove_block_group_free_space(trans, block_group); |
---|
10500 | | - if (ret) |
---|
10501 | | - goto out; |
---|
10502 | | - |
---|
10503 | | - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
---|
10504 | | - if (ret > 0) |
---|
10505 | | - ret = -EIO; |
---|
10506 | | - if (ret < 0) |
---|
10507 | | - goto out; |
---|
10508 | | - |
---|
10509 | | - ret = btrfs_del_item(trans, root, path); |
---|
10510 | | - if (ret) |
---|
10511 | | - goto out; |
---|
10512 | | - |
---|
10513 | | - if (remove_em) { |
---|
10514 | | - struct extent_map_tree *em_tree; |
---|
10515 | | - |
---|
10516 | | - em_tree = &fs_info->mapping_tree.map_tree; |
---|
10517 | | - write_lock(&em_tree->lock); |
---|
10518 | | - /* |
---|
10519 | | - * The em might be in the pending_chunks list, so make sure the |
---|
10520 | | - * chunk mutex is locked, since remove_extent_mapping() will |
---|
10521 | | - * delete us from that list. |
---|
10522 | | - */ |
---|
10523 | | - remove_extent_mapping(em_tree, em); |
---|
10524 | | - write_unlock(&em_tree->lock); |
---|
10525 | | - /* once for the tree */ |
---|
10526 | | - free_extent_map(em); |
---|
10527 | | - } |
---|
10528 | | - |
---|
10529 | | -out: |
---|
10530 | | - /* Once for the lookup reference */ |
---|
10531 | | - btrfs_put_block_group(block_group); |
---|
10532 | | - btrfs_free_path(path); |
---|
10533 | | - return ret; |
---|
10534 | | -} |
---|
10535 | | - |
---|
10536 | | -struct btrfs_trans_handle * |
---|
10537 | | -btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info, |
---|
10538 | | - const u64 chunk_offset) |
---|
10539 | | -{ |
---|
10540 | | - struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; |
---|
10541 | | - struct extent_map *em; |
---|
10542 | | - struct map_lookup *map; |
---|
10543 | | - unsigned int num_items; |
---|
10544 | | - |
---|
10545 | | - read_lock(&em_tree->lock); |
---|
10546 | | - em = lookup_extent_mapping(em_tree, chunk_offset, 1); |
---|
10547 | | - read_unlock(&em_tree->lock); |
---|
10548 | | - ASSERT(em && em->start == chunk_offset); |
---|
10549 | | - |
---|
10550 | | - /* |
---|
10551 | | - * We need to reserve 3 + N units from the metadata space info in order |
---|
10552 | | - * to remove a block group (done at btrfs_remove_chunk() and at |
---|
10553 | | - * btrfs_remove_block_group()), which are used for: |
---|
10554 | | - * |
---|
10555 | | - * 1 unit for adding the free space inode's orphan (located in the tree |
---|
10556 | | - * of tree roots). |
---|
10557 | | - * 1 unit for deleting the block group item (located in the extent |
---|
10558 | | - * tree). |
---|
10559 | | - * 1 unit for deleting the free space item (located in tree of tree |
---|
10560 | | - * roots). |
---|
10561 | | - * N units for deleting N device extent items corresponding to each |
---|
10562 | | - * stripe (located in the device tree). |
---|
10563 | | - * |
---|
10564 | | - * In order to remove a block group we also need to reserve units in the |
---|
10565 | | - * system space info in order to update the chunk tree (update one or |
---|
10566 | | - * more device items and remove one chunk item), but this is done at |
---|
10567 | | - * btrfs_remove_chunk() through a call to check_system_chunk(). |
---|
10568 | | - */ |
---|
10569 | | - map = em->map_lookup; |
---|
10570 | | - num_items = 3 + map->num_stripes; |
---|
10571 | | - free_extent_map(em); |
---|
10572 | | - |
---|
10573 | | - return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root, |
---|
10574 | | - num_items, 1); |
---|
10575 | | -} |
---|
10576 | | - |
---|
10577 | | -/* |
---|
10578 | | - * Process the unused_bgs list and remove any that don't have any allocated |
---|
10579 | | - * space inside of them. |
---|
10580 | | - */ |
---|
10581 | | -void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) |
---|
10582 | | -{ |
---|
10583 | | - struct btrfs_block_group_cache *block_group; |
---|
10584 | | - struct btrfs_space_info *space_info; |
---|
10585 | | - struct btrfs_trans_handle *trans; |
---|
10586 | | - int ret = 0; |
---|
10587 | | - |
---|
10588 | | - if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) |
---|
10589 | | - return; |
---|
10590 | | - |
---|
10591 | | - spin_lock(&fs_info->unused_bgs_lock); |
---|
10592 | | - while (!list_empty(&fs_info->unused_bgs)) { |
---|
10593 | | - u64 start, end; |
---|
10594 | | - int trimming; |
---|
10595 | | - |
---|
10596 | | - block_group = list_first_entry(&fs_info->unused_bgs, |
---|
10597 | | - struct btrfs_block_group_cache, |
---|
10598 | | - bg_list); |
---|
10599 | | - list_del_init(&block_group->bg_list); |
---|
10600 | | - |
---|
10601 | | - space_info = block_group->space_info; |
---|
10602 | | - |
---|
10603 | | - if (ret || btrfs_mixed_space_info(space_info)) { |
---|
10604 | | - btrfs_put_block_group(block_group); |
---|
10605 | | - continue; |
---|
10606 | | - } |
---|
10607 | | - spin_unlock(&fs_info->unused_bgs_lock); |
---|
10608 | | - |
---|
10609 | | - mutex_lock(&fs_info->delete_unused_bgs_mutex); |
---|
10610 | | - |
---|
10611 | | - /* Don't want to race with allocators so take the groups_sem */ |
---|
10612 | | - down_write(&space_info->groups_sem); |
---|
10613 | | - spin_lock(&block_group->lock); |
---|
10614 | | - if (block_group->reserved || block_group->pinned || |
---|
10615 | | - btrfs_block_group_used(&block_group->item) || |
---|
10616 | | - block_group->ro || |
---|
10617 | | - list_is_singular(&block_group->list)) { |
---|
10618 | | - /* |
---|
10619 | | - * We want to bail if we made new allocations or have |
---|
10620 | | - * outstanding allocations in this block group. We do |
---|
10621 | | - * the ro check in case balance is currently acting on |
---|
10622 | | - * this block group. |
---|
10623 | | - */ |
---|
10624 | | - trace_btrfs_skip_unused_block_group(block_group); |
---|
10625 | | - spin_unlock(&block_group->lock); |
---|
10626 | | - up_write(&space_info->groups_sem); |
---|
10627 | | - goto next; |
---|
10628 | | - } |
---|
10629 | | - spin_unlock(&block_group->lock); |
---|
10630 | | - |
---|
10631 | | - /* We don't want to force the issue, only flip if it's ok. */ |
---|
10632 | | - ret = inc_block_group_ro(block_group, 0); |
---|
10633 | | - up_write(&space_info->groups_sem); |
---|
10634 | | - if (ret < 0) { |
---|
10635 | | - ret = 0; |
---|
10636 | | - goto next; |
---|
10637 | | - } |
---|
10638 | | - |
---|
10639 | | - /* |
---|
10640 | | - * Want to do this before we do anything else so we can recover |
---|
10641 | | - * properly if we fail to join the transaction. |
---|
10642 | | - */ |
---|
10643 | | - trans = btrfs_start_trans_remove_block_group(fs_info, |
---|
10644 | | - block_group->key.objectid); |
---|
10645 | | - if (IS_ERR(trans)) { |
---|
10646 | | - btrfs_dec_block_group_ro(block_group); |
---|
10647 | | - ret = PTR_ERR(trans); |
---|
10648 | | - goto next; |
---|
10649 | | - } |
---|
10650 | | - |
---|
10651 | | - /* |
---|
10652 | | - * We could have pending pinned extents for this block group, |
---|
10653 | | - * just delete them, we don't care about them anymore. |
---|
10654 | | - */ |
---|
10655 | | - start = block_group->key.objectid; |
---|
10656 | | - end = start + block_group->key.offset - 1; |
---|
10657 | | - /* |
---|
10658 | | - * Hold the unused_bg_unpin_mutex lock to avoid racing with |
---|
10659 | | - * btrfs_finish_extent_commit(). If we are at transaction N, |
---|
10660 | | - * another task might be running finish_extent_commit() for the |
---|
10661 | | - * previous transaction N - 1, and have seen a range belonging |
---|
10662 | | - * to the block group in freed_extents[] before we were able to |
---|
10663 | | - * clear the whole block group range from freed_extents[]. This |
---|
10664 | | - * means that task can lookup for the block group after we |
---|
10665 | | - * unpinned it from freed_extents[] and removed it, leading to |
---|
10666 | | - * a BUG_ON() at btrfs_unpin_extent_range(). |
---|
10667 | | - */ |
---|
10668 | | - mutex_lock(&fs_info->unused_bg_unpin_mutex); |
---|
10669 | | - ret = clear_extent_bits(&fs_info->freed_extents[0], start, end, |
---|
10670 | | - EXTENT_DIRTY); |
---|
10671 | | - if (ret) { |
---|
10672 | | - mutex_unlock(&fs_info->unused_bg_unpin_mutex); |
---|
10673 | | - btrfs_dec_block_group_ro(block_group); |
---|
10674 | | - goto end_trans; |
---|
10675 | | - } |
---|
10676 | | - ret = clear_extent_bits(&fs_info->freed_extents[1], start, end, |
---|
10677 | | - EXTENT_DIRTY); |
---|
10678 | | - if (ret) { |
---|
10679 | | - mutex_unlock(&fs_info->unused_bg_unpin_mutex); |
---|
10680 | | - btrfs_dec_block_group_ro(block_group); |
---|
10681 | | - goto end_trans; |
---|
10682 | | - } |
---|
10683 | | - mutex_unlock(&fs_info->unused_bg_unpin_mutex); |
---|
10684 | | - |
---|
10685 | | - /* Reset pinned so btrfs_put_block_group doesn't complain */ |
---|
10686 | | - spin_lock(&space_info->lock); |
---|
10687 | | - spin_lock(&block_group->lock); |
---|
10688 | | - |
---|
10689 | | - space_info->bytes_pinned -= block_group->pinned; |
---|
10690 | | - space_info->bytes_readonly += block_group->pinned; |
---|
10691 | | - percpu_counter_add_batch(&space_info->total_bytes_pinned, |
---|
10692 | | - -block_group->pinned, |
---|
10693 | | - BTRFS_TOTAL_BYTES_PINNED_BATCH); |
---|
10694 | | - block_group->pinned = 0; |
---|
10695 | | - |
---|
10696 | | - spin_unlock(&block_group->lock); |
---|
10697 | | - spin_unlock(&space_info->lock); |
---|
10698 | | - |
---|
10699 | | - /* DISCARD can flip during remount */ |
---|
10700 | | - trimming = btrfs_test_opt(fs_info, DISCARD); |
---|
10701 | | - |
---|
10702 | | - /* Implicit trim during transaction commit. */ |
---|
10703 | | - if (trimming) |
---|
10704 | | - btrfs_get_block_group_trimming(block_group); |
---|
10705 | | - |
---|
10706 | | - /* |
---|
10707 | | - * Btrfs_remove_chunk will abort the transaction if things go |
---|
10708 | | - * horribly wrong. |
---|
10709 | | - */ |
---|
10710 | | - ret = btrfs_remove_chunk(trans, block_group->key.objectid); |
---|
10711 | | - |
---|
10712 | | - if (ret) { |
---|
10713 | | - if (trimming) |
---|
10714 | | - btrfs_put_block_group_trimming(block_group); |
---|
10715 | | - goto end_trans; |
---|
10716 | | - } |
---|
10717 | | - |
---|
10718 | | - /* |
---|
10719 | | - * If we're not mounted with -odiscard, we can just forget |
---|
10720 | | - * about this block group. Otherwise we'll need to wait |
---|
10721 | | - * until transaction commit to do the actual discard. |
---|
10722 | | - */ |
---|
10723 | | - if (trimming) { |
---|
10724 | | - spin_lock(&fs_info->unused_bgs_lock); |
---|
10725 | | - /* |
---|
10726 | | - * A concurrent scrub might have added us to the list |
---|
10727 | | - * fs_info->unused_bgs, so use a list_move operation |
---|
10728 | | - * to add the block group to the deleted_bgs list. |
---|
10729 | | - */ |
---|
10730 | | - list_move(&block_group->bg_list, |
---|
10731 | | - &trans->transaction->deleted_bgs); |
---|
10732 | | - spin_unlock(&fs_info->unused_bgs_lock); |
---|
10733 | | - btrfs_get_block_group(block_group); |
---|
10734 | | - } |
---|
10735 | | -end_trans: |
---|
10736 | | - btrfs_end_transaction(trans); |
---|
10737 | | -next: |
---|
10738 | | - mutex_unlock(&fs_info->delete_unused_bgs_mutex); |
---|
10739 | | - btrfs_put_block_group(block_group); |
---|
10740 | | - spin_lock(&fs_info->unused_bgs_lock); |
---|
10741 | | - } |
---|
10742 | | - spin_unlock(&fs_info->unused_bgs_lock); |
---|
10743 | | -} |
---|
10744 | | - |
---|
10745 | | -int btrfs_init_space_info(struct btrfs_fs_info *fs_info) |
---|
10746 | | -{ |
---|
10747 | | - struct btrfs_super_block *disk_super; |
---|
10748 | | - u64 features; |
---|
10749 | | - u64 flags; |
---|
10750 | | - int mixed = 0; |
---|
10751 | | - int ret; |
---|
10752 | | - |
---|
10753 | | - disk_super = fs_info->super_copy; |
---|
10754 | | - if (!btrfs_super_root(disk_super)) |
---|
10755 | | - return -EINVAL; |
---|
10756 | | - |
---|
10757 | | - features = btrfs_super_incompat_flags(disk_super); |
---|
10758 | | - if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) |
---|
10759 | | - mixed = 1; |
---|
10760 | | - |
---|
10761 | | - flags = BTRFS_BLOCK_GROUP_SYSTEM; |
---|
10762 | | - ret = create_space_info(fs_info, flags); |
---|
10763 | | - if (ret) |
---|
10764 | | - goto out; |
---|
10765 | | - |
---|
10766 | | - if (mixed) { |
---|
10767 | | - flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; |
---|
10768 | | - ret = create_space_info(fs_info, flags); |
---|
10769 | | - } else { |
---|
10770 | | - flags = BTRFS_BLOCK_GROUP_METADATA; |
---|
10771 | | - ret = create_space_info(fs_info, flags); |
---|
10772 | | - if (ret) |
---|
10773 | | - goto out; |
---|
10774 | | - |
---|
10775 | | - flags = BTRFS_BLOCK_GROUP_DATA; |
---|
10776 | | - ret = create_space_info(fs_info, flags); |
---|
10777 | | - } |
---|
10778 | | -out: |
---|
10779 | | - return ret; |
---|
10780 | 5700 | } |
---|
10781 | 5701 | |
---|
10782 | 5702 | int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, |
---|
.. | .. |
---|
10805 | 5725 | * it while performing the free space search since we have already |
---|
10806 | 5726 | * held back allocations. |
---|
10807 | 5727 | */ |
---|
10808 | | -static int btrfs_trim_free_extents(struct btrfs_device *device, |
---|
10809 | | - u64 minlen, u64 *trimmed) |
---|
| 5728 | +static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed) |
---|
10810 | 5729 | { |
---|
10811 | | - u64 start = 0, len = 0; |
---|
| 5730 | + u64 start = SZ_1M, len = 0, end = 0; |
---|
10812 | 5731 | int ret; |
---|
10813 | 5732 | |
---|
10814 | 5733 | *trimmed = 0; |
---|
.. | .. |
---|
10817 | 5736 | if (!blk_queue_discard(bdev_get_queue(device->bdev))) |
---|
10818 | 5737 | return 0; |
---|
10819 | 5738 | |
---|
10820 | | - /* Not writeable = nothing to do. */ |
---|
| 5739 | + /* Not writable = nothing to do. */ |
---|
10821 | 5740 | if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) |
---|
10822 | 5741 | return 0; |
---|
10823 | 5742 | |
---|
.. | .. |
---|
10829 | 5748 | |
---|
10830 | 5749 | while (1) { |
---|
10831 | 5750 | struct btrfs_fs_info *fs_info = device->fs_info; |
---|
10832 | | - struct btrfs_transaction *trans; |
---|
10833 | 5751 | u64 bytes; |
---|
10834 | 5752 | |
---|
10835 | 5753 | ret = mutex_lock_interruptible(&fs_info->chunk_mutex); |
---|
10836 | 5754 | if (ret) |
---|
10837 | 5755 | break; |
---|
10838 | 5756 | |
---|
10839 | | - ret = down_read_killable(&fs_info->commit_root_sem); |
---|
10840 | | - if (ret) { |
---|
| 5757 | + find_first_clear_extent_bit(&device->alloc_state, start, |
---|
| 5758 | + &start, &end, |
---|
| 5759 | + CHUNK_TRIMMED | CHUNK_ALLOCATED); |
---|
| 5760 | + |
---|
| 5761 | + /* Check if there are any CHUNK_* bits left */ |
---|
| 5762 | + if (start > device->total_bytes) { |
---|
| 5763 | + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); |
---|
| 5764 | + btrfs_warn_in_rcu(fs_info, |
---|
| 5765 | +"ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu", |
---|
| 5766 | + start, end - start + 1, |
---|
| 5767 | + rcu_str_deref(device->name), |
---|
| 5768 | + device->total_bytes); |
---|
10841 | 5769 | mutex_unlock(&fs_info->chunk_mutex); |
---|
| 5770 | + ret = 0; |
---|
10842 | 5771 | break; |
---|
10843 | 5772 | } |
---|
10844 | 5773 | |
---|
10845 | | - spin_lock(&fs_info->trans_lock); |
---|
10846 | | - trans = fs_info->running_transaction; |
---|
10847 | | - if (trans) |
---|
10848 | | - refcount_inc(&trans->use_count); |
---|
10849 | | - spin_unlock(&fs_info->trans_lock); |
---|
| 5774 | + /* Ensure we skip the reserved area in the first 1M */ |
---|
| 5775 | + start = max_t(u64, start, SZ_1M); |
---|
10850 | 5776 | |
---|
10851 | | - if (!trans) |
---|
10852 | | - up_read(&fs_info->commit_root_sem); |
---|
| 5777 | + /* |
---|
| 5778 | + * If find_first_clear_extent_bit find a range that spans the |
---|
| 5779 | + * end of the device it will set end to -1, in this case it's up |
---|
| 5780 | + * to the caller to trim the value to the size of the device. |
---|
| 5781 | + */ |
---|
| 5782 | + end = min(end, device->total_bytes - 1); |
---|
10853 | 5783 | |
---|
10854 | | - ret = find_free_dev_extent_start(trans, device, minlen, start, |
---|
10855 | | - &start, &len); |
---|
10856 | | - if (trans) { |
---|
10857 | | - up_read(&fs_info->commit_root_sem); |
---|
10858 | | - btrfs_put_transaction(trans); |
---|
10859 | | - } |
---|
| 5784 | + len = end - start + 1; |
---|
10860 | 5785 | |
---|
10861 | | - if (ret) { |
---|
| 5786 | + /* We didn't find any extents */ |
---|
| 5787 | + if (!len) { |
---|
10862 | 5788 | mutex_unlock(&fs_info->chunk_mutex); |
---|
10863 | | - if (ret == -ENOSPC) |
---|
10864 | | - ret = 0; |
---|
| 5789 | + ret = 0; |
---|
10865 | 5790 | break; |
---|
10866 | 5791 | } |
---|
10867 | 5792 | |
---|
10868 | | - ret = btrfs_issue_discard(device->bdev, start, len, &bytes); |
---|
| 5793 | + ret = btrfs_issue_discard(device->bdev, start, len, |
---|
| 5794 | + &bytes); |
---|
| 5795 | + if (!ret) |
---|
| 5796 | + set_extent_bits(&device->alloc_state, start, |
---|
| 5797 | + start + bytes - 1, |
---|
| 5798 | + CHUNK_TRIMMED); |
---|
10869 | 5799 | mutex_unlock(&fs_info->chunk_mutex); |
---|
10870 | 5800 | |
---|
10871 | 5801 | if (ret) |
---|
.. | .. |
---|
10896 | 5826 | */ |
---|
10897 | 5827 | int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) |
---|
10898 | 5828 | { |
---|
10899 | | - struct btrfs_block_group_cache *cache = NULL; |
---|
| 5829 | + struct btrfs_block_group *cache = NULL; |
---|
10900 | 5830 | struct btrfs_device *device; |
---|
10901 | 5831 | struct list_head *devices; |
---|
10902 | 5832 | u64 group_trimmed; |
---|
| 5833 | + u64 range_end = U64_MAX; |
---|
10903 | 5834 | u64 start; |
---|
10904 | 5835 | u64 end; |
---|
10905 | 5836 | u64 trimmed = 0; |
---|
.. | .. |
---|
10909 | 5840 | int dev_ret = 0; |
---|
10910 | 5841 | int ret = 0; |
---|
10911 | 5842 | |
---|
| 5843 | + /* |
---|
| 5844 | + * Check range overflow if range->len is set. |
---|
| 5845 | + * The default range->len is U64_MAX. |
---|
| 5846 | + */ |
---|
| 5847 | + if (range->len != U64_MAX && |
---|
| 5848 | + check_add_overflow(range->start, range->len, &range_end)) |
---|
| 5849 | + return -EINVAL; |
---|
| 5850 | + |
---|
10912 | 5851 | cache = btrfs_lookup_first_block_group(fs_info, range->start); |
---|
10913 | | - for (; cache; cache = next_block_group(fs_info, cache)) { |
---|
10914 | | - if (cache->key.objectid >= (range->start + range->len)) { |
---|
| 5852 | + for (; cache; cache = btrfs_next_block_group(cache)) { |
---|
| 5853 | + if (cache->start >= range_end) { |
---|
10915 | 5854 | btrfs_put_block_group(cache); |
---|
10916 | 5855 | break; |
---|
10917 | 5856 | } |
---|
10918 | 5857 | |
---|
10919 | | - start = max(range->start, cache->key.objectid); |
---|
10920 | | - end = min(range->start + range->len, |
---|
10921 | | - cache->key.objectid + cache->key.offset); |
---|
| 5858 | + start = max(range->start, cache->start); |
---|
| 5859 | + end = min(range_end, cache->start + cache->length); |
---|
10922 | 5860 | |
---|
10923 | 5861 | if (end - start >= range->minlen) { |
---|
10924 | | - if (!block_group_cache_done(cache)) { |
---|
10925 | | - ret = cache_block_group(cache, 0); |
---|
| 5862 | + if (!btrfs_block_group_done(cache)) { |
---|
| 5863 | + ret = btrfs_cache_block_group(cache, 0); |
---|
10926 | 5864 | if (ret) { |
---|
10927 | 5865 | bg_failed++; |
---|
10928 | 5866 | bg_ret = ret; |
---|
10929 | 5867 | continue; |
---|
10930 | 5868 | } |
---|
10931 | | - ret = wait_block_group_cache_done(cache); |
---|
| 5869 | + ret = btrfs_wait_block_group_cache_done(cache); |
---|
10932 | 5870 | if (ret) { |
---|
10933 | 5871 | bg_failed++; |
---|
10934 | 5872 | bg_ret = ret; |
---|
.. | .. |
---|
10957 | 5895 | mutex_lock(&fs_info->fs_devices->device_list_mutex); |
---|
10958 | 5896 | devices = &fs_info->fs_devices->devices; |
---|
10959 | 5897 | list_for_each_entry(device, devices, dev_list) { |
---|
10960 | | - ret = btrfs_trim_free_extents(device, range->minlen, |
---|
10961 | | - &group_trimmed); |
---|
| 5898 | + if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) |
---|
| 5899 | + continue; |
---|
| 5900 | + |
---|
| 5901 | + ret = btrfs_trim_free_extents(device, &group_trimmed); |
---|
10962 | 5902 | if (ret) { |
---|
10963 | 5903 | dev_failed++; |
---|
10964 | 5904 | dev_ret = ret; |
---|
.. | .. |
---|
10977 | 5917 | if (bg_ret) |
---|
10978 | 5918 | return bg_ret; |
---|
10979 | 5919 | return dev_ret; |
---|
10980 | | -} |
---|
10981 | | - |
---|
10982 | | -/* |
---|
10983 | | - * btrfs_{start,end}_write_no_snapshotting() are similar to |
---|
10984 | | - * mnt_{want,drop}_write(), they are used to prevent some tasks from writing |
---|
10985 | | - * data into the page cache through nocow before the subvolume is snapshoted, |
---|
10986 | | - * but flush the data into disk after the snapshot creation, or to prevent |
---|
10987 | | - * operations while snapshotting is ongoing and that cause the snapshot to be |
---|
10988 | | - * inconsistent (writes followed by expanding truncates for example). |
---|
10989 | | - */ |
---|
10990 | | -void btrfs_end_write_no_snapshotting(struct btrfs_root *root) |
---|
10991 | | -{ |
---|
10992 | | - percpu_counter_dec(&root->subv_writers->counter); |
---|
10993 | | - cond_wake_up(&root->subv_writers->wait); |
---|
10994 | | -} |
---|
10995 | | - |
---|
10996 | | -int btrfs_start_write_no_snapshotting(struct btrfs_root *root) |
---|
10997 | | -{ |
---|
10998 | | - if (atomic_read(&root->will_be_snapshotted)) |
---|
10999 | | - return 0; |
---|
11000 | | - |
---|
11001 | | - percpu_counter_inc(&root->subv_writers->counter); |
---|
11002 | | - /* |
---|
11003 | | - * Make sure counter is updated before we check for snapshot creation. |
---|
11004 | | - */ |
---|
11005 | | - smp_mb(); |
---|
11006 | | - if (atomic_read(&root->will_be_snapshotted)) { |
---|
11007 | | - btrfs_end_write_no_snapshotting(root); |
---|
11008 | | - return 0; |
---|
11009 | | - } |
---|
11010 | | - return 1; |
---|
11011 | | -} |
---|
11012 | | - |
---|
11013 | | -void btrfs_wait_for_snapshot_creation(struct btrfs_root *root) |
---|
11014 | | -{ |
---|
11015 | | - while (true) { |
---|
11016 | | - int ret; |
---|
11017 | | - |
---|
11018 | | - ret = btrfs_start_write_no_snapshotting(root); |
---|
11019 | | - if (ret) |
---|
11020 | | - break; |
---|
11021 | | - wait_var_event(&root->will_be_snapshotted, |
---|
11022 | | - !atomic_read(&root->will_be_snapshotted)); |
---|
11023 | | - } |
---|
11024 | | -} |
---|
11025 | | - |
---|
11026 | | -void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg) |
---|
11027 | | -{ |
---|
11028 | | - struct btrfs_fs_info *fs_info = bg->fs_info; |
---|
11029 | | - |
---|
11030 | | - spin_lock(&fs_info->unused_bgs_lock); |
---|
11031 | | - if (list_empty(&bg->bg_list)) { |
---|
11032 | | - btrfs_get_block_group(bg); |
---|
11033 | | - trace_btrfs_add_unused_block_group(bg); |
---|
11034 | | - list_add_tail(&bg->bg_list, &fs_info->unused_bgs); |
---|
11035 | | - } |
---|
11036 | | - spin_unlock(&fs_info->unused_bgs_lock); |
---|
11037 | 5920 | } |
---|