.. | .. |
---|
14 | 14 | #include <linux/prefetch.h> |
---|
15 | 15 | #include <linux/cleancache.h> |
---|
16 | 16 | #include "extent_io.h" |
---|
| 17 | +#include "extent-io-tree.h" |
---|
17 | 18 | #include "extent_map.h" |
---|
18 | 19 | #include "ctree.h" |
---|
19 | 20 | #include "btrfs_inode.h" |
---|
.. | .. |
---|
34 | 35 | } |
---|
35 | 36 | |
---|
36 | 37 | #ifdef CONFIG_BTRFS_DEBUG |
---|
37 | | -static LIST_HEAD(buffers); |
---|
38 | 38 | static LIST_HEAD(states); |
---|
39 | | - |
---|
40 | 39 | static DEFINE_SPINLOCK(leak_lock); |
---|
41 | 40 | |
---|
42 | | -static inline |
---|
43 | | -void btrfs_leak_debug_add(struct list_head *new, struct list_head *head) |
---|
| 41 | +static inline void btrfs_leak_debug_add(spinlock_t *lock, |
---|
| 42 | + struct list_head *new, |
---|
| 43 | + struct list_head *head) |
---|
44 | 44 | { |
---|
45 | 45 | unsigned long flags; |
---|
46 | 46 | |
---|
47 | | - spin_lock_irqsave(&leak_lock, flags); |
---|
| 47 | + spin_lock_irqsave(lock, flags); |
---|
48 | 48 | list_add(new, head); |
---|
49 | | - spin_unlock_irqrestore(&leak_lock, flags); |
---|
| 49 | + spin_unlock_irqrestore(lock, flags); |
---|
50 | 50 | } |
---|
51 | 51 | |
---|
52 | | -static inline |
---|
53 | | -void btrfs_leak_debug_del(struct list_head *entry) |
---|
| 52 | +static inline void btrfs_leak_debug_del(spinlock_t *lock, |
---|
| 53 | + struct list_head *entry) |
---|
54 | 54 | { |
---|
55 | 55 | unsigned long flags; |
---|
56 | 56 | |
---|
57 | | - spin_lock_irqsave(&leak_lock, flags); |
---|
| 57 | + spin_lock_irqsave(lock, flags); |
---|
58 | 58 | list_del(entry); |
---|
59 | | - spin_unlock_irqrestore(&leak_lock, flags); |
---|
| 59 | + spin_unlock_irqrestore(lock, flags); |
---|
60 | 60 | } |
---|
61 | 61 | |
---|
62 | | -static inline |
---|
63 | | -void btrfs_leak_debug_check(void) |
---|
| 62 | +void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info) |
---|
| 63 | +{ |
---|
| 64 | + struct extent_buffer *eb; |
---|
| 65 | + unsigned long flags; |
---|
| 66 | + |
---|
| 67 | + /* |
---|
| 68 | + * If we didn't get into open_ctree our allocated_ebs will not be |
---|
| 69 | + * initialized, so just skip this. |
---|
| 70 | + */ |
---|
| 71 | + if (!fs_info->allocated_ebs.next) |
---|
| 72 | + return; |
---|
| 73 | + |
---|
| 74 | + spin_lock_irqsave(&fs_info->eb_leak_lock, flags); |
---|
| 75 | + while (!list_empty(&fs_info->allocated_ebs)) { |
---|
| 76 | + eb = list_first_entry(&fs_info->allocated_ebs, |
---|
| 77 | + struct extent_buffer, leak_list); |
---|
| 78 | + pr_err( |
---|
| 79 | + "BTRFS: buffer leak start %llu len %lu refs %d bflags %lu owner %llu\n", |
---|
| 80 | + eb->start, eb->len, atomic_read(&eb->refs), eb->bflags, |
---|
| 81 | + btrfs_header_owner(eb)); |
---|
| 82 | + list_del(&eb->leak_list); |
---|
| 83 | + kmem_cache_free(extent_buffer_cache, eb); |
---|
| 84 | + } |
---|
| 85 | + spin_unlock_irqrestore(&fs_info->eb_leak_lock, flags); |
---|
| 86 | +} |
---|
| 87 | + |
---|
| 88 | +static inline void btrfs_extent_state_leak_debug_check(void) |
---|
64 | 89 | { |
---|
65 | 90 | struct extent_state *state; |
---|
66 | | - struct extent_buffer *eb; |
---|
67 | 91 | |
---|
68 | 92 | while (!list_empty(&states)) { |
---|
69 | 93 | state = list_entry(states.next, struct extent_state, leak_list); |
---|
.. | .. |
---|
74 | 98 | list_del(&state->leak_list); |
---|
75 | 99 | kmem_cache_free(extent_state_cache, state); |
---|
76 | 100 | } |
---|
77 | | - |
---|
78 | | - while (!list_empty(&buffers)) { |
---|
79 | | - eb = list_entry(buffers.next, struct extent_buffer, leak_list); |
---|
80 | | - pr_err("BTRFS: buffer leak start %llu len %lu refs %d bflags %lu\n", |
---|
81 | | - eb->start, eb->len, atomic_read(&eb->refs), eb->bflags); |
---|
82 | | - list_del(&eb->leak_list); |
---|
83 | | - kmem_cache_free(extent_buffer_cache, eb); |
---|
84 | | - } |
---|
85 | 101 | } |
---|
86 | 102 | |
---|
87 | 103 | #define btrfs_debug_check_extent_io_range(tree, start, end) \ |
---|
.. | .. |
---|
89 | 105 | static inline void __btrfs_debug_check_extent_io_range(const char *caller, |
---|
90 | 106 | struct extent_io_tree *tree, u64 start, u64 end) |
---|
91 | 107 | { |
---|
92 | | - if (tree->ops && tree->ops->check_extent_io_range) |
---|
93 | | - tree->ops->check_extent_io_range(tree->private_data, caller, |
---|
94 | | - start, end); |
---|
| 108 | + struct inode *inode = tree->private_data; |
---|
| 109 | + u64 isize; |
---|
| 110 | + |
---|
| 111 | + if (!inode || !is_data_inode(inode)) |
---|
| 112 | + return; |
---|
| 113 | + |
---|
| 114 | + isize = i_size_read(inode); |
---|
| 115 | + if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { |
---|
| 116 | + btrfs_debug_rl(BTRFS_I(inode)->root->fs_info, |
---|
| 117 | + "%s: ino %llu isize %llu odd range [%llu,%llu]", |
---|
| 118 | + caller, btrfs_ino(BTRFS_I(inode)), isize, start, end); |
---|
| 119 | + } |
---|
95 | 120 | } |
---|
96 | 121 | #else |
---|
97 | | -#define btrfs_leak_debug_add(new, head) do {} while (0) |
---|
98 | | -#define btrfs_leak_debug_del(entry) do {} while (0) |
---|
99 | | -#define btrfs_leak_debug_check() do {} while (0) |
---|
| 122 | +#define btrfs_leak_debug_add(lock, new, head) do {} while (0) |
---|
| 123 | +#define btrfs_leak_debug_del(lock, entry) do {} while (0) |
---|
| 124 | +#define btrfs_extent_state_leak_debug_check() do {} while (0) |
---|
100 | 125 | #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) |
---|
101 | 126 | #endif |
---|
102 | | - |
---|
103 | | -#define BUFFER_LRU_MAX 64 |
---|
104 | 127 | |
---|
105 | 128 | struct tree_entry { |
---|
106 | 129 | u64 start; |
---|
.. | .. |
---|
110 | 133 | |
---|
111 | 134 | struct extent_page_data { |
---|
112 | 135 | struct bio *bio; |
---|
113 | | - struct extent_io_tree *tree; |
---|
114 | 136 | /* tells writepage not to lock the state bits for this range |
---|
115 | 137 | * it still does the unlocking |
---|
116 | 138 | */ |
---|
.. | .. |
---|
138 | 160 | return ret; |
---|
139 | 161 | } |
---|
140 | 162 | |
---|
141 | | -static int __must_check submit_one_bio(struct bio *bio, int mirror_num, |
---|
142 | | - unsigned long bio_flags) |
---|
| 163 | +int __must_check submit_one_bio(struct bio *bio, int mirror_num, |
---|
| 164 | + unsigned long bio_flags) |
---|
143 | 165 | { |
---|
144 | 166 | blk_status_t ret = 0; |
---|
145 | | - struct bio_vec *bvec = bio_last_bvec_all(bio); |
---|
146 | | - struct page *page = bvec->bv_page; |
---|
147 | 167 | struct extent_io_tree *tree = bio->bi_private; |
---|
148 | | - u64 start; |
---|
149 | | - |
---|
150 | | - start = page_offset(page) + bvec->bv_offset; |
---|
151 | 168 | |
---|
152 | 169 | bio->bi_private = NULL; |
---|
153 | 170 | |
---|
154 | | - if (tree->ops) |
---|
155 | | - ret = tree->ops->submit_bio_hook(tree->private_data, bio, |
---|
156 | | - mirror_num, bio_flags, start); |
---|
| 171 | + if (is_data_inode(tree->private_data)) |
---|
| 172 | + ret = btrfs_submit_data_bio(tree->private_data, bio, mirror_num, |
---|
| 173 | + bio_flags); |
---|
157 | 174 | else |
---|
158 | | - btrfsic_submit_bio(bio); |
---|
| 175 | + ret = btrfs_submit_metadata_bio(tree->private_data, bio, |
---|
| 176 | + mirror_num, bio_flags); |
---|
159 | 177 | |
---|
160 | 178 | return blk_status_to_errno(ret); |
---|
161 | 179 | } |
---|
.. | .. |
---|
194 | 212 | return ret; |
---|
195 | 213 | } |
---|
196 | 214 | |
---|
197 | | -int __init extent_io_init(void) |
---|
| 215 | +int __init extent_state_cache_init(void) |
---|
198 | 216 | { |
---|
199 | 217 | extent_state_cache = kmem_cache_create("btrfs_extent_state", |
---|
200 | 218 | sizeof(struct extent_state), 0, |
---|
201 | 219 | SLAB_MEM_SPREAD, NULL); |
---|
202 | 220 | if (!extent_state_cache) |
---|
203 | 221 | return -ENOMEM; |
---|
| 222 | + return 0; |
---|
| 223 | +} |
---|
204 | 224 | |
---|
| 225 | +int __init extent_io_init(void) |
---|
| 226 | +{ |
---|
205 | 227 | extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer", |
---|
206 | 228 | sizeof(struct extent_buffer), 0, |
---|
207 | 229 | SLAB_MEM_SPREAD, NULL); |
---|
208 | 230 | if (!extent_buffer_cache) |
---|
209 | | - goto free_state_cache; |
---|
| 231 | + return -ENOMEM; |
---|
210 | 232 | |
---|
211 | 233 | if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE, |
---|
212 | 234 | offsetof(struct btrfs_io_bio, bio), |
---|
.. | .. |
---|
224 | 246 | free_buffer_cache: |
---|
225 | 247 | kmem_cache_destroy(extent_buffer_cache); |
---|
226 | 248 | extent_buffer_cache = NULL; |
---|
227 | | - |
---|
228 | | -free_state_cache: |
---|
229 | | - kmem_cache_destroy(extent_state_cache); |
---|
230 | | - extent_state_cache = NULL; |
---|
231 | 249 | return -ENOMEM; |
---|
| 250 | +} |
---|
| 251 | + |
---|
| 252 | +void __cold extent_state_cache_exit(void) |
---|
| 253 | +{ |
---|
| 254 | + btrfs_extent_state_leak_debug_check(); |
---|
| 255 | + kmem_cache_destroy(extent_state_cache); |
---|
232 | 256 | } |
---|
233 | 257 | |
---|
234 | 258 | void __cold extent_io_exit(void) |
---|
235 | 259 | { |
---|
236 | | - btrfs_leak_debug_check(); |
---|
237 | | - |
---|
238 | 260 | /* |
---|
239 | 261 | * Make sure all delayed rcu free are flushed before we |
---|
240 | 262 | * destroy caches. |
---|
241 | 263 | */ |
---|
242 | 264 | rcu_barrier(); |
---|
243 | | - kmem_cache_destroy(extent_state_cache); |
---|
244 | 265 | kmem_cache_destroy(extent_buffer_cache); |
---|
245 | 266 | bioset_exit(&btrfs_bioset); |
---|
246 | 267 | } |
---|
247 | 268 | |
---|
248 | | -void extent_io_tree_init(struct extent_io_tree *tree, |
---|
| 269 | +/* |
---|
| 270 | + * For the file_extent_tree, we want to hold the inode lock when we lookup and |
---|
| 271 | + * update the disk_i_size, but lockdep will complain because our io_tree we hold |
---|
| 272 | + * the tree lock and get the inode lock when setting delalloc. These two things |
---|
| 273 | + * are unrelated, so make a class for the file_extent_tree so we don't get the |
---|
| 274 | + * two locking patterns mixed up. |
---|
| 275 | + */ |
---|
| 276 | +static struct lock_class_key file_extent_tree_class; |
---|
| 277 | + |
---|
| 278 | +void extent_io_tree_init(struct btrfs_fs_info *fs_info, |
---|
| 279 | + struct extent_io_tree *tree, unsigned int owner, |
---|
249 | 280 | void *private_data) |
---|
250 | 281 | { |
---|
| 282 | + tree->fs_info = fs_info; |
---|
251 | 283 | tree->state = RB_ROOT; |
---|
252 | | - tree->ops = NULL; |
---|
253 | 284 | tree->dirty_bytes = 0; |
---|
254 | 285 | spin_lock_init(&tree->lock); |
---|
255 | 286 | tree->private_data = private_data; |
---|
| 287 | + tree->owner = owner; |
---|
| 288 | + if (owner == IO_TREE_INODE_FILE_EXTENT) |
---|
| 289 | + lockdep_set_class(&tree->lock, &file_extent_tree_class); |
---|
| 290 | +} |
---|
| 291 | + |
---|
| 292 | +void extent_io_tree_release(struct extent_io_tree *tree) |
---|
| 293 | +{ |
---|
| 294 | + spin_lock(&tree->lock); |
---|
| 295 | + /* |
---|
| 296 | + * Do a single barrier for the waitqueue_active check here, the state |
---|
| 297 | + * of the waitqueue should not change once extent_io_tree_release is |
---|
| 298 | + * called. |
---|
| 299 | + */ |
---|
| 300 | + smp_mb(); |
---|
| 301 | + while (!RB_EMPTY_ROOT(&tree->state)) { |
---|
| 302 | + struct rb_node *node; |
---|
| 303 | + struct extent_state *state; |
---|
| 304 | + |
---|
| 305 | + node = rb_first(&tree->state); |
---|
| 306 | + state = rb_entry(node, struct extent_state, rb_node); |
---|
| 307 | + rb_erase(&state->rb_node, &tree->state); |
---|
| 308 | + RB_CLEAR_NODE(&state->rb_node); |
---|
| 309 | + /* |
---|
| 310 | + * btree io trees aren't supposed to have tasks waiting for |
---|
| 311 | + * changes in the flags of extent states ever. |
---|
| 312 | + */ |
---|
| 313 | + ASSERT(!waitqueue_active(&state->wq)); |
---|
| 314 | + free_extent_state(state); |
---|
| 315 | + |
---|
| 316 | + cond_resched_lock(&tree->lock); |
---|
| 317 | + } |
---|
| 318 | + spin_unlock(&tree->lock); |
---|
256 | 319 | } |
---|
257 | 320 | |
---|
258 | 321 | static struct extent_state *alloc_extent_state(gfp_t mask) |
---|
.. | .. |
---|
270 | 333 | state->state = 0; |
---|
271 | 334 | state->failrec = NULL; |
---|
272 | 335 | RB_CLEAR_NODE(&state->rb_node); |
---|
273 | | - btrfs_leak_debug_add(&state->leak_list, &states); |
---|
| 336 | + btrfs_leak_debug_add(&leak_lock, &state->leak_list, &states); |
---|
274 | 337 | refcount_set(&state->refs, 1); |
---|
275 | 338 | init_waitqueue_head(&state->wq); |
---|
276 | 339 | trace_alloc_extent_state(state, mask, _RET_IP_); |
---|
.. | .. |
---|
283 | 346 | return; |
---|
284 | 347 | if (refcount_dec_and_test(&state->refs)) { |
---|
285 | 348 | WARN_ON(extent_state_in_tree(state)); |
---|
286 | | - btrfs_leak_debug_del(&state->leak_list); |
---|
| 349 | + btrfs_leak_debug_del(&leak_lock, &state->leak_list); |
---|
287 | 350 | trace_free_extent_state(state, _RET_IP_); |
---|
288 | 351 | kmem_cache_free(extent_state_cache, state); |
---|
289 | 352 | } |
---|
.. | .. |
---|
325 | 388 | return NULL; |
---|
326 | 389 | } |
---|
327 | 390 | |
---|
| 391 | +/** |
---|
| 392 | + * __etree_search - searche @tree for an entry that contains @offset. Such |
---|
| 393 | + * entry would have entry->start <= offset && entry->end >= offset. |
---|
| 394 | + * |
---|
| 395 | + * @tree - the tree to search |
---|
| 396 | + * @offset - offset that should fall within an entry in @tree |
---|
| 397 | + * @next_ret - pointer to the first entry whose range ends after @offset |
---|
| 398 | + * @prev - pointer to the first entry whose range begins before @offset |
---|
| 399 | + * @p_ret - pointer where new node should be anchored (used when inserting an |
---|
| 400 | + * entry in the tree) |
---|
| 401 | + * @parent_ret - points to entry which would have been the parent of the entry, |
---|
| 402 | + * containing @offset |
---|
| 403 | + * |
---|
| 404 | + * This function returns a pointer to the entry that contains @offset byte |
---|
| 405 | + * address. If no such entry exists, then NULL is returned and the other |
---|
| 406 | + * pointer arguments to the function are filled, otherwise the found entry is |
---|
| 407 | + * returned and other pointers are left untouched. |
---|
| 408 | + */ |
---|
328 | 409 | static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, |
---|
329 | | - struct rb_node **prev_ret, |
---|
330 | 410 | struct rb_node **next_ret, |
---|
| 411 | + struct rb_node **prev_ret, |
---|
331 | 412 | struct rb_node ***p_ret, |
---|
332 | 413 | struct rb_node **parent_ret) |
---|
333 | 414 | { |
---|
.. | .. |
---|
356 | 437 | if (parent_ret) |
---|
357 | 438 | *parent_ret = prev; |
---|
358 | 439 | |
---|
359 | | - if (prev_ret) { |
---|
| 440 | + if (next_ret) { |
---|
360 | 441 | orig_prev = prev; |
---|
361 | 442 | while (prev && offset > prev_entry->end) { |
---|
362 | 443 | prev = rb_next(prev); |
---|
363 | 444 | prev_entry = rb_entry(prev, struct tree_entry, rb_node); |
---|
364 | 445 | } |
---|
365 | | - *prev_ret = prev; |
---|
| 446 | + *next_ret = prev; |
---|
366 | 447 | prev = orig_prev; |
---|
367 | 448 | } |
---|
368 | 449 | |
---|
369 | | - if (next_ret) { |
---|
| 450 | + if (prev_ret) { |
---|
370 | 451 | prev_entry = rb_entry(prev, struct tree_entry, rb_node); |
---|
371 | 452 | while (prev && offset < prev_entry->start) { |
---|
372 | 453 | prev = rb_prev(prev); |
---|
373 | 454 | prev_entry = rb_entry(prev, struct tree_entry, rb_node); |
---|
374 | 455 | } |
---|
375 | | - *next_ret = prev; |
---|
| 456 | + *prev_ret = prev; |
---|
376 | 457 | } |
---|
377 | 458 | return NULL; |
---|
378 | 459 | } |
---|
.. | .. |
---|
383 | 464 | struct rb_node ***p_ret, |
---|
384 | 465 | struct rb_node **parent_ret) |
---|
385 | 466 | { |
---|
386 | | - struct rb_node *prev = NULL; |
---|
| 467 | + struct rb_node *next= NULL; |
---|
387 | 468 | struct rb_node *ret; |
---|
388 | 469 | |
---|
389 | | - ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret); |
---|
| 470 | + ret = __etree_search(tree, offset, &next, NULL, p_ret, parent_ret); |
---|
390 | 471 | if (!ret) |
---|
391 | | - return prev; |
---|
| 472 | + return next; |
---|
392 | 473 | return ret; |
---|
393 | 474 | } |
---|
394 | 475 | |
---|
.. | .. |
---|
396 | 477 | u64 offset) |
---|
397 | 478 | { |
---|
398 | 479 | return tree_search_for_insert(tree, offset, NULL, NULL); |
---|
399 | | -} |
---|
400 | | - |
---|
401 | | -static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, |
---|
402 | | - struct extent_state *other) |
---|
403 | | -{ |
---|
404 | | - if (tree->ops && tree->ops->merge_extent_hook) |
---|
405 | | - tree->ops->merge_extent_hook(tree->private_data, new, other); |
---|
406 | 480 | } |
---|
407 | 481 | |
---|
408 | 482 | /* |
---|
.. | .. |
---|
420 | 494 | struct extent_state *other; |
---|
421 | 495 | struct rb_node *other_node; |
---|
422 | 496 | |
---|
423 | | - if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) |
---|
| 497 | + if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY)) |
---|
424 | 498 | return; |
---|
425 | 499 | |
---|
426 | 500 | other_node = rb_prev(&state->rb_node); |
---|
.. | .. |
---|
428 | 502 | other = rb_entry(other_node, struct extent_state, rb_node); |
---|
429 | 503 | if (other->end == state->start - 1 && |
---|
430 | 504 | other->state == state->state) { |
---|
431 | | - merge_cb(tree, state, other); |
---|
| 505 | + if (tree->private_data && |
---|
| 506 | + is_data_inode(tree->private_data)) |
---|
| 507 | + btrfs_merge_delalloc_extent(tree->private_data, |
---|
| 508 | + state, other); |
---|
432 | 509 | state->start = other->start; |
---|
433 | 510 | rb_erase(&other->rb_node, &tree->state); |
---|
434 | 511 | RB_CLEAR_NODE(&other->rb_node); |
---|
.. | .. |
---|
440 | 517 | other = rb_entry(other_node, struct extent_state, rb_node); |
---|
441 | 518 | if (other->start == state->end + 1 && |
---|
442 | 519 | other->state == state->state) { |
---|
443 | | - merge_cb(tree, state, other); |
---|
| 520 | + if (tree->private_data && |
---|
| 521 | + is_data_inode(tree->private_data)) |
---|
| 522 | + btrfs_merge_delalloc_extent(tree->private_data, |
---|
| 523 | + state, other); |
---|
444 | 524 | state->end = other->end; |
---|
445 | 525 | rb_erase(&other->rb_node, &tree->state); |
---|
446 | 526 | RB_CLEAR_NODE(&other->rb_node); |
---|
447 | 527 | free_extent_state(other); |
---|
448 | 528 | } |
---|
449 | 529 | } |
---|
450 | | -} |
---|
451 | | - |
---|
452 | | -static void set_state_cb(struct extent_io_tree *tree, |
---|
453 | | - struct extent_state *state, unsigned *bits) |
---|
454 | | -{ |
---|
455 | | - if (tree->ops && tree->ops->set_bit_hook) |
---|
456 | | - tree->ops->set_bit_hook(tree->private_data, state, bits); |
---|
457 | | -} |
---|
458 | | - |
---|
459 | | -static void clear_state_cb(struct extent_io_tree *tree, |
---|
460 | | - struct extent_state *state, unsigned *bits) |
---|
461 | | -{ |
---|
462 | | - if (tree->ops && tree->ops->clear_bit_hook) |
---|
463 | | - tree->ops->clear_bit_hook(tree->private_data, state, bits); |
---|
464 | 530 | } |
---|
465 | 531 | |
---|
466 | 532 | static void set_state_bits(struct extent_io_tree *tree, |
---|
.. | .. |
---|
485 | 551 | { |
---|
486 | 552 | struct rb_node *node; |
---|
487 | 553 | |
---|
488 | | - if (end < start) |
---|
489 | | - WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n", |
---|
490 | | - end, start); |
---|
| 554 | + if (end < start) { |
---|
| 555 | + btrfs_err(tree->fs_info, |
---|
| 556 | + "insert state: end < start %llu %llu", end, start); |
---|
| 557 | + WARN_ON(1); |
---|
| 558 | + } |
---|
491 | 559 | state->start = start; |
---|
492 | 560 | state->end = end; |
---|
493 | 561 | |
---|
.. | .. |
---|
497 | 565 | if (node) { |
---|
498 | 566 | struct extent_state *found; |
---|
499 | 567 | found = rb_entry(node, struct extent_state, rb_node); |
---|
500 | | - pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n", |
---|
| 568 | + btrfs_err(tree->fs_info, |
---|
| 569 | + "found node %llu %llu on insert of %llu %llu", |
---|
501 | 570 | found->start, found->end, start, end); |
---|
502 | 571 | return -EEXIST; |
---|
503 | 572 | } |
---|
504 | 573 | merge_state(tree, state); |
---|
505 | 574 | return 0; |
---|
506 | | -} |
---|
507 | | - |
---|
508 | | -static void split_cb(struct extent_io_tree *tree, struct extent_state *orig, |
---|
509 | | - u64 split) |
---|
510 | | -{ |
---|
511 | | - if (tree->ops && tree->ops->split_extent_hook) |
---|
512 | | - tree->ops->split_extent_hook(tree->private_data, orig, split); |
---|
513 | 575 | } |
---|
514 | 576 | |
---|
515 | 577 | /* |
---|
.. | .. |
---|
531 | 593 | { |
---|
532 | 594 | struct rb_node *node; |
---|
533 | 595 | |
---|
534 | | - split_cb(tree, orig, split); |
---|
| 596 | + if (tree->private_data && is_data_inode(tree->private_data)) |
---|
| 597 | + btrfs_split_delalloc_extent(tree->private_data, orig, split); |
---|
535 | 598 | |
---|
536 | 599 | prealloc->start = orig->start; |
---|
537 | 600 | prealloc->end = split - 1; |
---|
.. | .. |
---|
558 | 621 | |
---|
559 | 622 | /* |
---|
560 | 623 | * utility function to clear some bits in an extent state struct. |
---|
561 | | - * it will optionally wake up any one waiting on this state (wake == 1). |
---|
| 624 | + * it will optionally wake up anyone waiting on this state (wake == 1). |
---|
562 | 625 | * |
---|
563 | 626 | * If no bits are set on the state struct after clearing things, the |
---|
564 | 627 | * struct is freed and removed from the tree |
---|
.. | .. |
---|
577 | 640 | WARN_ON(range > tree->dirty_bytes); |
---|
578 | 641 | tree->dirty_bytes -= range; |
---|
579 | 642 | } |
---|
580 | | - clear_state_cb(tree, state, bits); |
---|
| 643 | + |
---|
| 644 | + if (tree->private_data && is_data_inode(tree->private_data)) |
---|
| 645 | + btrfs_clear_delalloc_extent(tree->private_data, state, bits); |
---|
| 646 | + |
---|
581 | 647 | ret = add_extent_changeset(state, bits_to_clear, changeset, 0); |
---|
582 | 648 | BUG_ON(ret < 0); |
---|
583 | 649 | state->state &= ~bits_to_clear; |
---|
.. | .. |
---|
610 | 676 | |
---|
611 | 677 | static void extent_io_tree_panic(struct extent_io_tree *tree, int err) |
---|
612 | 678 | { |
---|
613 | | - struct inode *inode = tree->private_data; |
---|
614 | | - |
---|
615 | | - btrfs_panic(btrfs_sb(inode->i_sb), err, |
---|
| 679 | + btrfs_panic(tree->fs_info, err, |
---|
616 | 680 | "locking error: extent tree was modified by another thread while locked"); |
---|
617 | 681 | } |
---|
618 | 682 | |
---|
.. | .. |
---|
642 | 706 | int clear = 0; |
---|
643 | 707 | |
---|
644 | 708 | btrfs_debug_check_extent_io_range(tree, start, end); |
---|
| 709 | + trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits); |
---|
645 | 710 | |
---|
646 | 711 | if (bits & EXTENT_DELALLOC) |
---|
647 | 712 | bits |= EXTENT_NORESERVE; |
---|
648 | 713 | |
---|
649 | 714 | if (delete) |
---|
650 | 715 | bits |= ~EXTENT_CTLBITS; |
---|
651 | | - bits |= EXTENT_FIRST_DELALLOC; |
---|
652 | 716 | |
---|
653 | | - if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) |
---|
| 717 | + if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY)) |
---|
654 | 718 | clear = 1; |
---|
655 | 719 | again: |
---|
656 | 720 | if (!prealloc && gfpflags_allow_blocking(mask)) { |
---|
.. | .. |
---|
854 | 918 | unsigned bits_to_set = *bits & ~EXTENT_CTLBITS; |
---|
855 | 919 | int ret; |
---|
856 | 920 | |
---|
857 | | - set_state_cb(tree, state, bits); |
---|
| 921 | + if (tree->private_data && is_data_inode(tree->private_data)) |
---|
| 922 | + btrfs_set_delalloc_extent(tree->private_data, state, bits); |
---|
| 923 | + |
---|
858 | 924 | if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { |
---|
859 | 925 | u64 range = state->end - state->start + 1; |
---|
860 | 926 | tree->dirty_bytes += range; |
---|
.. | .. |
---|
880 | 946 | struct extent_state **cached_ptr) |
---|
881 | 947 | { |
---|
882 | 948 | return cache_state_if_flags(state, cached_ptr, |
---|
883 | | - EXTENT_IOBITS | EXTENT_BOUNDARY); |
---|
| 949 | + EXTENT_LOCKED | EXTENT_BOUNDARY); |
---|
884 | 950 | } |
---|
885 | 951 | |
---|
886 | 952 | /* |
---|
.. | .. |
---|
910 | 976 | u64 last_end; |
---|
911 | 977 | |
---|
912 | 978 | btrfs_debug_check_extent_io_range(tree, start, end); |
---|
| 979 | + trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits); |
---|
913 | 980 | |
---|
914 | | - bits |= EXTENT_FIRST_DELALLOC; |
---|
915 | 981 | again: |
---|
916 | 982 | if (!prealloc && gfpflags_allow_blocking(mask)) { |
---|
917 | 983 | /* |
---|
.. | .. |
---|
1002 | 1068 | *failed_start = start; |
---|
1003 | 1069 | err = -EEXIST; |
---|
1004 | 1070 | goto out; |
---|
| 1071 | + } |
---|
| 1072 | + |
---|
| 1073 | + /* |
---|
| 1074 | + * If this extent already has all the bits we want set, then |
---|
| 1075 | + * skip it, not necessary to split it or do anything with it. |
---|
| 1076 | + */ |
---|
| 1077 | + if ((state->state & bits) == bits) { |
---|
| 1078 | + start = state->end + 1; |
---|
| 1079 | + cache_state(state, cached_state); |
---|
| 1080 | + goto search_again; |
---|
1005 | 1081 | } |
---|
1006 | 1082 | |
---|
1007 | 1083 | prealloc = alloc_extent_state_atomic(prealloc); |
---|
.. | .. |
---|
1143 | 1219 | bool first_iteration = true; |
---|
1144 | 1220 | |
---|
1145 | 1221 | btrfs_debug_check_extent_io_range(tree, start, end); |
---|
| 1222 | + trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits, |
---|
| 1223 | + clear_bits); |
---|
1146 | 1224 | |
---|
1147 | 1225 | again: |
---|
1148 | 1226 | if (!prealloc) { |
---|
.. | .. |
---|
1342 | 1420 | changeset); |
---|
1343 | 1421 | } |
---|
1344 | 1422 | |
---|
| 1423 | +int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end, |
---|
| 1424 | + unsigned bits) |
---|
| 1425 | +{ |
---|
| 1426 | + return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL, |
---|
| 1427 | + GFP_NOWAIT, NULL); |
---|
| 1428 | +} |
---|
| 1429 | + |
---|
1345 | 1430 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
---|
1346 | 1431 | unsigned bits, int wake, int delete, |
---|
1347 | 1432 | struct extent_state **cached) |
---|
.. | .. |
---|
1478 | 1563 | struct extent_state **cached_state) |
---|
1479 | 1564 | { |
---|
1480 | 1565 | struct extent_state *state; |
---|
1481 | | - struct rb_node *n; |
---|
1482 | 1566 | int ret = 1; |
---|
1483 | 1567 | |
---|
1484 | 1568 | spin_lock(&tree->lock); |
---|
1485 | 1569 | if (cached_state && *cached_state) { |
---|
1486 | 1570 | state = *cached_state; |
---|
1487 | 1571 | if (state->end == start - 1 && extent_state_in_tree(state)) { |
---|
1488 | | - n = rb_next(&state->rb_node); |
---|
1489 | | - while (n) { |
---|
1490 | | - state = rb_entry(n, struct extent_state, |
---|
1491 | | - rb_node); |
---|
| 1572 | + while ((state = next_state(state)) != NULL) { |
---|
1492 | 1573 | if (state->state & bits) |
---|
1493 | 1574 | goto got_it; |
---|
1494 | | - n = rb_next(n); |
---|
1495 | 1575 | } |
---|
1496 | 1576 | free_extent_state(*cached_state); |
---|
1497 | 1577 | *cached_state = NULL; |
---|
.. | .. |
---|
1514 | 1594 | return ret; |
---|
1515 | 1595 | } |
---|
1516 | 1596 | |
---|
| 1597 | +/** |
---|
| 1598 | + * find_contiguous_extent_bit: find a contiguous area of bits |
---|
| 1599 | + * @tree - io tree to check |
---|
| 1600 | + * @start - offset to start the search from |
---|
| 1601 | + * @start_ret - the first offset we found with the bits set |
---|
| 1602 | + * @end_ret - the final contiguous range of the bits that were set |
---|
| 1603 | + * @bits - bits to look for |
---|
| 1604 | + * |
---|
| 1605 | + * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges |
---|
| 1606 | + * to set bits appropriately, and then merge them again. During this time it |
---|
| 1607 | + * will drop the tree->lock, so use this helper if you want to find the actual |
---|
| 1608 | + * contiguous area for given bits. We will search to the first bit we find, and |
---|
| 1609 | + * then walk down the tree until we find a non-contiguous area. The area |
---|
| 1610 | + * returned will be the full contiguous area with the bits set. |
---|
| 1611 | + */ |
---|
| 1612 | +int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start, |
---|
| 1613 | + u64 *start_ret, u64 *end_ret, unsigned bits) |
---|
| 1614 | +{ |
---|
| 1615 | + struct extent_state *state; |
---|
| 1616 | + int ret = 1; |
---|
| 1617 | + |
---|
| 1618 | + spin_lock(&tree->lock); |
---|
| 1619 | + state = find_first_extent_bit_state(tree, start, bits); |
---|
| 1620 | + if (state) { |
---|
| 1621 | + *start_ret = state->start; |
---|
| 1622 | + *end_ret = state->end; |
---|
| 1623 | + while ((state = next_state(state)) != NULL) { |
---|
| 1624 | + if (state->start > (*end_ret + 1)) |
---|
| 1625 | + break; |
---|
| 1626 | + *end_ret = state->end; |
---|
| 1627 | + } |
---|
| 1628 | + ret = 0; |
---|
| 1629 | + } |
---|
| 1630 | + spin_unlock(&tree->lock); |
---|
| 1631 | + return ret; |
---|
| 1632 | +} |
---|
| 1633 | + |
---|
| 1634 | +/** |
---|
| 1635 | + * find_first_clear_extent_bit - find the first range that has @bits not set. |
---|
| 1636 | + * This range could start before @start. |
---|
| 1637 | + * |
---|
| 1638 | + * @tree - the tree to search |
---|
| 1639 | + * @start - the offset at/after which the found extent should start |
---|
| 1640 | + * @start_ret - records the beginning of the range |
---|
| 1641 | + * @end_ret - records the end of the range (inclusive) |
---|
| 1642 | + * @bits - the set of bits which must be unset |
---|
| 1643 | + * |
---|
| 1644 | + * Since unallocated range is also considered one which doesn't have the bits |
---|
| 1645 | + * set it's possible that @end_ret contains -1, this happens in case the range |
---|
| 1646 | + * spans (last_range_end, end of device]. In this case it's up to the caller to |
---|
| 1647 | + * trim @end_ret to the appropriate size. |
---|
| 1648 | + */ |
---|
| 1649 | +void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, |
---|
| 1650 | + u64 *start_ret, u64 *end_ret, unsigned bits) |
---|
| 1651 | +{ |
---|
| 1652 | + struct extent_state *state; |
---|
| 1653 | + struct rb_node *node, *prev = NULL, *next; |
---|
| 1654 | + |
---|
| 1655 | + spin_lock(&tree->lock); |
---|
| 1656 | + |
---|
| 1657 | + /* Find first extent with bits cleared */ |
---|
| 1658 | + while (1) { |
---|
| 1659 | + node = __etree_search(tree, start, &next, &prev, NULL, NULL); |
---|
| 1660 | + if (!node && !next && !prev) { |
---|
| 1661 | + /* |
---|
| 1662 | + * Tree is completely empty, send full range and let |
---|
| 1663 | + * caller deal with it |
---|
| 1664 | + */ |
---|
| 1665 | + *start_ret = 0; |
---|
| 1666 | + *end_ret = -1; |
---|
| 1667 | + goto out; |
---|
| 1668 | + } else if (!node && !next) { |
---|
| 1669 | + /* |
---|
| 1670 | + * We are past the last allocated chunk, set start at |
---|
| 1671 | + * the end of the last extent. |
---|
| 1672 | + */ |
---|
| 1673 | + state = rb_entry(prev, struct extent_state, rb_node); |
---|
| 1674 | + *start_ret = state->end + 1; |
---|
| 1675 | + *end_ret = -1; |
---|
| 1676 | + goto out; |
---|
| 1677 | + } else if (!node) { |
---|
| 1678 | + node = next; |
---|
| 1679 | + } |
---|
| 1680 | + /* |
---|
| 1681 | + * At this point 'node' either contains 'start' or start is |
---|
| 1682 | + * before 'node' |
---|
| 1683 | + */ |
---|
| 1684 | + state = rb_entry(node, struct extent_state, rb_node); |
---|
| 1685 | + |
---|
| 1686 | + if (in_range(start, state->start, state->end - state->start + 1)) { |
---|
| 1687 | + if (state->state & bits) { |
---|
| 1688 | + /* |
---|
| 1689 | + * |--range with bits sets--| |
---|
| 1690 | + * | |
---|
| 1691 | + * start |
---|
| 1692 | + */ |
---|
| 1693 | + start = state->end + 1; |
---|
| 1694 | + } else { |
---|
| 1695 | + /* |
---|
| 1696 | + * 'start' falls within a range that doesn't |
---|
| 1697 | + * have the bits set, so take its start as |
---|
| 1698 | + * the beginning of the desired range |
---|
| 1699 | + * |
---|
| 1700 | + * |--range with bits cleared----| |
---|
| 1701 | + * | |
---|
| 1702 | + * start |
---|
| 1703 | + */ |
---|
| 1704 | + *start_ret = state->start; |
---|
| 1705 | + break; |
---|
| 1706 | + } |
---|
| 1707 | + } else { |
---|
| 1708 | + /* |
---|
| 1709 | + * |---prev range---|---hole/unset---|---node range---| |
---|
| 1710 | + * | |
---|
| 1711 | + * start |
---|
| 1712 | + * |
---|
| 1713 | + * or |
---|
| 1714 | + * |
---|
| 1715 | + * |---hole/unset--||--first node--| |
---|
| 1716 | + * 0 | |
---|
| 1717 | + * start |
---|
| 1718 | + */ |
---|
| 1719 | + if (prev) { |
---|
| 1720 | + state = rb_entry(prev, struct extent_state, |
---|
| 1721 | + rb_node); |
---|
| 1722 | + *start_ret = state->end + 1; |
---|
| 1723 | + } else { |
---|
| 1724 | + *start_ret = 0; |
---|
| 1725 | + } |
---|
| 1726 | + break; |
---|
| 1727 | + } |
---|
| 1728 | + } |
---|
| 1729 | + |
---|
| 1730 | + /* |
---|
| 1731 | + * Find the longest stretch from start until an entry which has the |
---|
| 1732 | + * bits set |
---|
| 1733 | + */ |
---|
| 1734 | + while (1) { |
---|
| 1735 | + state = rb_entry(node, struct extent_state, rb_node); |
---|
| 1736 | + if (state->end >= start && !(state->state & bits)) { |
---|
| 1737 | + *end_ret = state->end; |
---|
| 1738 | + } else { |
---|
| 1739 | + *end_ret = state->start - 1; |
---|
| 1740 | + break; |
---|
| 1741 | + } |
---|
| 1742 | + |
---|
| 1743 | + node = rb_next(node); |
---|
| 1744 | + if (!node) |
---|
| 1745 | + break; |
---|
| 1746 | + } |
---|
| 1747 | +out: |
---|
| 1748 | + spin_unlock(&tree->lock); |
---|
| 1749 | +} |
---|
| 1750 | + |
---|
1517 | 1751 | /* |
---|
1518 | 1752 | * find a contiguous range of bytes in the file marked as delalloc, not |
---|
1519 | 1753 | * more than 'max_bytes'. start and end are used to return the range, |
---|
1520 | 1754 | * |
---|
1521 | | - * 1 is returned if we find something, 0 if nothing was in the tree |
---|
| 1755 | + * true is returned if we find something, false if nothing was in the tree |
---|
1522 | 1756 | */ |
---|
1523 | | -static noinline u64 find_delalloc_range(struct extent_io_tree *tree, |
---|
1524 | | - u64 *start, u64 *end, u64 max_bytes, |
---|
1525 | | - struct extent_state **cached_state) |
---|
| 1757 | +bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start, |
---|
| 1758 | + u64 *end, u64 max_bytes, |
---|
| 1759 | + struct extent_state **cached_state) |
---|
1526 | 1760 | { |
---|
1527 | 1761 | struct rb_node *node; |
---|
1528 | 1762 | struct extent_state *state; |
---|
1529 | 1763 | u64 cur_start = *start; |
---|
1530 | | - u64 found = 0; |
---|
| 1764 | + bool found = false; |
---|
1531 | 1765 | u64 total_bytes = 0; |
---|
1532 | 1766 | |
---|
1533 | 1767 | spin_lock(&tree->lock); |
---|
.. | .. |
---|
1538 | 1772 | */ |
---|
1539 | 1773 | node = tree_search(tree, cur_start); |
---|
1540 | 1774 | if (!node) { |
---|
1541 | | - if (!found) |
---|
1542 | | - *end = (u64)-1; |
---|
| 1775 | + *end = (u64)-1; |
---|
1543 | 1776 | goto out; |
---|
1544 | 1777 | } |
---|
1545 | 1778 | |
---|
.. | .. |
---|
1559 | 1792 | *cached_state = state; |
---|
1560 | 1793 | refcount_inc(&state->refs); |
---|
1561 | 1794 | } |
---|
1562 | | - found++; |
---|
| 1795 | + found = true; |
---|
1563 | 1796 | *end = state->end; |
---|
1564 | 1797 | cur_start = state->end + 1; |
---|
1565 | 1798 | node = rb_next(node); |
---|
.. | .. |
---|
1617 | 1850 | } |
---|
1618 | 1851 | |
---|
1619 | 1852 | /* |
---|
1620 | | - * find a contiguous range of bytes in the file marked as delalloc, not |
---|
1621 | | - * more than 'max_bytes'. start and end are used to return the range, |
---|
| 1853 | + * Find and lock a contiguous range of bytes in the file marked as delalloc, no |
---|
| 1854 | + * more than @max_bytes. @Start and @end are used to return the range, |
---|
1622 | 1855 | * |
---|
1623 | | - * 1 is returned if we find something, 0 if nothing was in the tree |
---|
| 1856 | + * Return: true if we find something |
---|
| 1857 | + * false if nothing was in the tree |
---|
1624 | 1858 | */ |
---|
1625 | | -STATIC u64 find_lock_delalloc_range(struct inode *inode, |
---|
1626 | | - struct extent_io_tree *tree, |
---|
| 1859 | +EXPORT_FOR_TESTS |
---|
| 1860 | +noinline_for_stack bool find_lock_delalloc_range(struct inode *inode, |
---|
1627 | 1861 | struct page *locked_page, u64 *start, |
---|
1628 | | - u64 *end, u64 max_bytes) |
---|
| 1862 | + u64 *end) |
---|
1629 | 1863 | { |
---|
| 1864 | + struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; |
---|
| 1865 | + u64 max_bytes = BTRFS_MAX_EXTENT_SIZE; |
---|
1630 | 1866 | u64 delalloc_start; |
---|
1631 | 1867 | u64 delalloc_end; |
---|
1632 | | - u64 found; |
---|
| 1868 | + bool found; |
---|
1633 | 1869 | struct extent_state *cached_state = NULL; |
---|
1634 | 1870 | int ret; |
---|
1635 | 1871 | int loops = 0; |
---|
.. | .. |
---|
1638 | 1874 | /* step one, find a bunch of delalloc bytes starting at start */ |
---|
1639 | 1875 | delalloc_start = *start; |
---|
1640 | 1876 | delalloc_end = 0; |
---|
1641 | | - found = find_delalloc_range(tree, &delalloc_start, &delalloc_end, |
---|
1642 | | - max_bytes, &cached_state); |
---|
| 1877 | + found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end, |
---|
| 1878 | + max_bytes, &cached_state); |
---|
1643 | 1879 | if (!found || delalloc_end <= *start) { |
---|
1644 | 1880 | *start = delalloc_start; |
---|
1645 | 1881 | *end = delalloc_end; |
---|
1646 | 1882 | free_extent_state(cached_state); |
---|
1647 | | - return 0; |
---|
| 1883 | + return false; |
---|
1648 | 1884 | } |
---|
1649 | 1885 | |
---|
1650 | 1886 | /* |
---|
.. | .. |
---|
1664 | 1900 | /* step two, lock all the pages after the page that has start */ |
---|
1665 | 1901 | ret = lock_delalloc_pages(inode, locked_page, |
---|
1666 | 1902 | delalloc_start, delalloc_end); |
---|
| 1903 | + ASSERT(!ret || ret == -EAGAIN); |
---|
1667 | 1904 | if (ret == -EAGAIN) { |
---|
1668 | 1905 | /* some of the pages are gone, lets avoid looping by |
---|
1669 | 1906 | * shortening the size of the delalloc range we're searching |
---|
.. | .. |
---|
1675 | 1912 | loops = 1; |
---|
1676 | 1913 | goto again; |
---|
1677 | 1914 | } else { |
---|
1678 | | - found = 0; |
---|
| 1915 | + found = false; |
---|
1679 | 1916 | goto out_failed; |
---|
1680 | 1917 | } |
---|
1681 | 1918 | } |
---|
1682 | | - BUG_ON(ret); /* Only valid values are 0 and -EAGAIN */ |
---|
1683 | 1919 | |
---|
1684 | 1920 | /* step three, lock the state bits for the whole range */ |
---|
1685 | 1921 | lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state); |
---|
.. | .. |
---|
1741 | 1977 | if (page_ops & PAGE_SET_PRIVATE2) |
---|
1742 | 1978 | SetPagePrivate2(pages[i]); |
---|
1743 | 1979 | |
---|
1744 | | - if (pages[i] == locked_page) { |
---|
| 1980 | + if (locked_page && pages[i] == locked_page) { |
---|
1745 | 1981 | put_page(pages[i]); |
---|
1746 | 1982 | pages_locked++; |
---|
1747 | 1983 | continue; |
---|
.. | .. |
---|
1780 | 2016 | return err; |
---|
1781 | 2017 | } |
---|
1782 | 2018 | |
---|
1783 | | -void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, |
---|
1784 | | - u64 delalloc_end, struct page *locked_page, |
---|
1785 | | - unsigned clear_bits, |
---|
1786 | | - unsigned long page_ops) |
---|
| 2019 | +void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end, |
---|
| 2020 | + struct page *locked_page, |
---|
| 2021 | + unsigned clear_bits, |
---|
| 2022 | + unsigned long page_ops) |
---|
1787 | 2023 | { |
---|
1788 | | - clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0, |
---|
1789 | | - NULL); |
---|
| 2024 | + clear_extent_bit(&inode->io_tree, start, end, clear_bits, 1, 0, NULL); |
---|
1790 | 2025 | |
---|
1791 | | - __process_pages_contig(inode->i_mapping, locked_page, |
---|
| 2026 | + __process_pages_contig(inode->vfs_inode.i_mapping, locked_page, |
---|
1792 | 2027 | start >> PAGE_SHIFT, end >> PAGE_SHIFT, |
---|
1793 | 2028 | page_ops, NULL); |
---|
1794 | 2029 | } |
---|
.. | .. |
---|
1857 | 2092 | * set the private field for a given byte offset in the tree. If there isn't |
---|
1858 | 2093 | * an extent_state there already, this does nothing. |
---|
1859 | 2094 | */ |
---|
1860 | | -static noinline int set_state_failrec(struct extent_io_tree *tree, u64 start, |
---|
1861 | | - struct io_failure_record *failrec) |
---|
| 2095 | +int set_state_failrec(struct extent_io_tree *tree, u64 start, |
---|
| 2096 | + struct io_failure_record *failrec) |
---|
1862 | 2097 | { |
---|
1863 | 2098 | struct rb_node *node; |
---|
1864 | 2099 | struct extent_state *state; |
---|
.. | .. |
---|
1885 | 2120 | return ret; |
---|
1886 | 2121 | } |
---|
1887 | 2122 | |
---|
1888 | | -static noinline int get_state_failrec(struct extent_io_tree *tree, u64 start, |
---|
1889 | | - struct io_failure_record **failrec) |
---|
| 2123 | +struct io_failure_record *get_state_failrec(struct extent_io_tree *tree, u64 start) |
---|
1890 | 2124 | { |
---|
1891 | 2125 | struct rb_node *node; |
---|
1892 | 2126 | struct extent_state *state; |
---|
1893 | | - int ret = 0; |
---|
| 2127 | + struct io_failure_record *failrec; |
---|
1894 | 2128 | |
---|
1895 | 2129 | spin_lock(&tree->lock); |
---|
1896 | 2130 | /* |
---|
.. | .. |
---|
1899 | 2133 | */ |
---|
1900 | 2134 | node = tree_search(tree, start); |
---|
1901 | 2135 | if (!node) { |
---|
1902 | | - ret = -ENOENT; |
---|
| 2136 | + failrec = ERR_PTR(-ENOENT); |
---|
1903 | 2137 | goto out; |
---|
1904 | 2138 | } |
---|
1905 | 2139 | state = rb_entry(node, struct extent_state, rb_node); |
---|
1906 | 2140 | if (state->start != start) { |
---|
1907 | | - ret = -ENOENT; |
---|
| 2141 | + failrec = ERR_PTR(-ENOENT); |
---|
1908 | 2142 | goto out; |
---|
1909 | 2143 | } |
---|
1910 | | - *failrec = state->failrec; |
---|
| 2144 | + |
---|
| 2145 | + failrec = state->failrec; |
---|
1911 | 2146 | out: |
---|
1912 | 2147 | spin_unlock(&tree->lock); |
---|
1913 | | - return ret; |
---|
| 2148 | + return failrec; |
---|
1914 | 2149 | } |
---|
1915 | 2150 | |
---|
1916 | 2151 | /* |
---|
.. | .. |
---|
2096 | 2331 | return 0; |
---|
2097 | 2332 | } |
---|
2098 | 2333 | |
---|
2099 | | -int repair_eb_io_failure(struct btrfs_fs_info *fs_info, |
---|
2100 | | - struct extent_buffer *eb, int mirror_num) |
---|
| 2334 | +int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num) |
---|
2101 | 2335 | { |
---|
| 2336 | + struct btrfs_fs_info *fs_info = eb->fs_info; |
---|
2102 | 2337 | u64 start = eb->start; |
---|
2103 | 2338 | int i, num_pages = num_extent_pages(eb); |
---|
2104 | 2339 | int ret = 0; |
---|
.. | .. |
---|
2140 | 2375 | if (!ret) |
---|
2141 | 2376 | return 0; |
---|
2142 | 2377 | |
---|
2143 | | - ret = get_state_failrec(failure_tree, start, &failrec); |
---|
2144 | | - if (ret) |
---|
| 2378 | + failrec = get_state_failrec(failure_tree, start); |
---|
| 2379 | + if (IS_ERR(failrec)) |
---|
2145 | 2380 | return 0; |
---|
2146 | 2381 | |
---|
2147 | 2382 | BUG_ON(!failrec->this_mirror); |
---|
.. | .. |
---|
2213 | 2448 | spin_unlock(&failure_tree->lock); |
---|
2214 | 2449 | } |
---|
2215 | 2450 | |
---|
2216 | | -int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, |
---|
2217 | | - struct io_failure_record **failrec_ret) |
---|
| 2451 | +static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode, |
---|
| 2452 | + u64 start, u64 end) |
---|
2218 | 2453 | { |
---|
2219 | 2454 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
2220 | 2455 | struct io_failure_record *failrec; |
---|
.. | .. |
---|
2225 | 2460 | int ret; |
---|
2226 | 2461 | u64 logical; |
---|
2227 | 2462 | |
---|
2228 | | - ret = get_state_failrec(failure_tree, start, &failrec); |
---|
2229 | | - if (ret) { |
---|
2230 | | - failrec = kzalloc(sizeof(*failrec), GFP_NOFS); |
---|
2231 | | - if (!failrec) |
---|
2232 | | - return -ENOMEM; |
---|
2233 | | - |
---|
2234 | | - failrec->start = start; |
---|
2235 | | - failrec->len = end - start + 1; |
---|
2236 | | - failrec->this_mirror = 0; |
---|
2237 | | - failrec->bio_flags = 0; |
---|
2238 | | - failrec->in_validation = 0; |
---|
2239 | | - |
---|
2240 | | - read_lock(&em_tree->lock); |
---|
2241 | | - em = lookup_extent_mapping(em_tree, start, failrec->len); |
---|
2242 | | - if (!em) { |
---|
2243 | | - read_unlock(&em_tree->lock); |
---|
2244 | | - kfree(failrec); |
---|
2245 | | - return -EIO; |
---|
2246 | | - } |
---|
2247 | | - |
---|
2248 | | - if (em->start > start || em->start + em->len <= start) { |
---|
2249 | | - free_extent_map(em); |
---|
2250 | | - em = NULL; |
---|
2251 | | - } |
---|
2252 | | - read_unlock(&em_tree->lock); |
---|
2253 | | - if (!em) { |
---|
2254 | | - kfree(failrec); |
---|
2255 | | - return -EIO; |
---|
2256 | | - } |
---|
2257 | | - |
---|
2258 | | - logical = start - em->start; |
---|
2259 | | - logical = em->block_start + logical; |
---|
2260 | | - if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
---|
2261 | | - logical = em->block_start; |
---|
2262 | | - failrec->bio_flags = EXTENT_BIO_COMPRESSED; |
---|
2263 | | - extent_set_compress_type(&failrec->bio_flags, |
---|
2264 | | - em->compress_type); |
---|
2265 | | - } |
---|
2266 | | - |
---|
2267 | | - btrfs_debug(fs_info, |
---|
2268 | | - "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu", |
---|
2269 | | - logical, start, failrec->len); |
---|
2270 | | - |
---|
2271 | | - failrec->logical = logical; |
---|
2272 | | - free_extent_map(em); |
---|
2273 | | - |
---|
2274 | | - /* set the bits in the private failure tree */ |
---|
2275 | | - ret = set_extent_bits(failure_tree, start, end, |
---|
2276 | | - EXTENT_LOCKED | EXTENT_DIRTY); |
---|
2277 | | - if (ret >= 0) |
---|
2278 | | - ret = set_state_failrec(failure_tree, start, failrec); |
---|
2279 | | - /* set the bits in the inode's tree */ |
---|
2280 | | - if (ret >= 0) |
---|
2281 | | - ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED); |
---|
2282 | | - if (ret < 0) { |
---|
2283 | | - kfree(failrec); |
---|
2284 | | - return ret; |
---|
2285 | | - } |
---|
2286 | | - } else { |
---|
| 2463 | + failrec = get_state_failrec(failure_tree, start); |
---|
| 2464 | + if (!IS_ERR(failrec)) { |
---|
2287 | 2465 | btrfs_debug(fs_info, |
---|
2288 | 2466 | "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d", |
---|
2289 | 2467 | failrec->logical, failrec->start, failrec->len, |
---|
.. | .. |
---|
2293 | 2471 | * (e.g. with a list for failed_mirror) to make |
---|
2294 | 2472 | * clean_io_failure() clean all those errors at once. |
---|
2295 | 2473 | */ |
---|
| 2474 | + |
---|
| 2475 | + return failrec; |
---|
2296 | 2476 | } |
---|
2297 | 2477 | |
---|
2298 | | - *failrec_ret = failrec; |
---|
| 2478 | + failrec = kzalloc(sizeof(*failrec), GFP_NOFS); |
---|
| 2479 | + if (!failrec) |
---|
| 2480 | + return ERR_PTR(-ENOMEM); |
---|
2299 | 2481 | |
---|
2300 | | - return 0; |
---|
| 2482 | + failrec->start = start; |
---|
| 2483 | + failrec->len = end - start + 1; |
---|
| 2484 | + failrec->this_mirror = 0; |
---|
| 2485 | + failrec->bio_flags = 0; |
---|
| 2486 | + failrec->in_validation = 0; |
---|
| 2487 | + |
---|
| 2488 | + read_lock(&em_tree->lock); |
---|
| 2489 | + em = lookup_extent_mapping(em_tree, start, failrec->len); |
---|
| 2490 | + if (!em) { |
---|
| 2491 | + read_unlock(&em_tree->lock); |
---|
| 2492 | + kfree(failrec); |
---|
| 2493 | + return ERR_PTR(-EIO); |
---|
| 2494 | + } |
---|
| 2495 | + |
---|
| 2496 | + if (em->start > start || em->start + em->len <= start) { |
---|
| 2497 | + free_extent_map(em); |
---|
| 2498 | + em = NULL; |
---|
| 2499 | + } |
---|
| 2500 | + read_unlock(&em_tree->lock); |
---|
| 2501 | + if (!em) { |
---|
| 2502 | + kfree(failrec); |
---|
| 2503 | + return ERR_PTR(-EIO); |
---|
| 2504 | + } |
---|
| 2505 | + |
---|
| 2506 | + logical = start - em->start; |
---|
| 2507 | + logical = em->block_start + logical; |
---|
| 2508 | + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
---|
| 2509 | + logical = em->block_start; |
---|
| 2510 | + failrec->bio_flags = EXTENT_BIO_COMPRESSED; |
---|
| 2511 | + extent_set_compress_type(&failrec->bio_flags, em->compress_type); |
---|
| 2512 | + } |
---|
| 2513 | + |
---|
| 2514 | + btrfs_debug(fs_info, |
---|
| 2515 | + "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu", |
---|
| 2516 | + logical, start, failrec->len); |
---|
| 2517 | + |
---|
| 2518 | + failrec->logical = logical; |
---|
| 2519 | + free_extent_map(em); |
---|
| 2520 | + |
---|
| 2521 | + /* Set the bits in the private failure tree */ |
---|
| 2522 | + ret = set_extent_bits(failure_tree, start, end, |
---|
| 2523 | + EXTENT_LOCKED | EXTENT_DIRTY); |
---|
| 2524 | + if (ret >= 0) { |
---|
| 2525 | + ret = set_state_failrec(failure_tree, start, failrec); |
---|
| 2526 | + /* Set the bits in the inode's tree */ |
---|
| 2527 | + ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED); |
---|
| 2528 | + } else if (ret < 0) { |
---|
| 2529 | + kfree(failrec); |
---|
| 2530 | + return ERR_PTR(ret); |
---|
| 2531 | + } |
---|
| 2532 | + |
---|
| 2533 | + return failrec; |
---|
2301 | 2534 | } |
---|
2302 | 2535 | |
---|
2303 | | -bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages, |
---|
2304 | | - struct io_failure_record *failrec, int failed_mirror) |
---|
| 2536 | +static bool btrfs_check_repairable(struct inode *inode, bool needs_validation, |
---|
| 2537 | + struct io_failure_record *failrec, |
---|
| 2538 | + int failed_mirror) |
---|
2305 | 2539 | { |
---|
2306 | 2540 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
2307 | 2541 | int num_copies; |
---|
.. | .. |
---|
2324 | 2558 | * a) deliver good data to the caller |
---|
2325 | 2559 | * b) correct the bad sectors on disk |
---|
2326 | 2560 | */ |
---|
2327 | | - if (failed_bio_pages > 1) { |
---|
| 2561 | + if (needs_validation) { |
---|
2328 | 2562 | /* |
---|
2329 | 2563 | * to fulfill b), we need to know the exact failing sectors, as |
---|
2330 | 2564 | * we don't want to rewrite any more than the failed ones. thus, |
---|
.. | .. |
---|
2363 | 2597 | return true; |
---|
2364 | 2598 | } |
---|
2365 | 2599 | |
---|
2366 | | - |
---|
2367 | | -struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, |
---|
2368 | | - struct io_failure_record *failrec, |
---|
2369 | | - struct page *page, int pg_offset, int icsum, |
---|
2370 | | - bio_end_io_t *endio_func, void *data) |
---|
| 2600 | +static bool btrfs_io_needs_validation(struct inode *inode, struct bio *bio) |
---|
2371 | 2601 | { |
---|
2372 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
2373 | | - struct bio *bio; |
---|
2374 | | - struct btrfs_io_bio *btrfs_failed_bio; |
---|
2375 | | - struct btrfs_io_bio *btrfs_bio; |
---|
| 2602 | + u64 len = 0; |
---|
| 2603 | + const u32 blocksize = inode->i_sb->s_blocksize; |
---|
2376 | 2604 | |
---|
2377 | | - bio = btrfs_io_bio_alloc(1); |
---|
2378 | | - bio->bi_end_io = endio_func; |
---|
2379 | | - bio->bi_iter.bi_sector = failrec->logical >> 9; |
---|
2380 | | - bio_set_dev(bio, fs_info->fs_devices->latest_bdev); |
---|
2381 | | - bio->bi_iter.bi_size = 0; |
---|
2382 | | - bio->bi_private = data; |
---|
| 2605 | + /* |
---|
| 2606 | + * If bi_status is BLK_STS_OK, then this was a checksum error, not an |
---|
| 2607 | + * I/O error. In this case, we already know exactly which sector was |
---|
| 2608 | + * bad, so we don't need to validate. |
---|
| 2609 | + */ |
---|
| 2610 | + if (bio->bi_status == BLK_STS_OK) |
---|
| 2611 | + return false; |
---|
2383 | 2612 | |
---|
2384 | | - btrfs_failed_bio = btrfs_io_bio(failed_bio); |
---|
2385 | | - if (btrfs_failed_bio->csum) { |
---|
2386 | | - u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); |
---|
| 2613 | + /* |
---|
| 2614 | + * We need to validate each sector individually if the failed I/O was |
---|
| 2615 | + * for multiple sectors. |
---|
| 2616 | + * |
---|
| 2617 | + * There are a few possible bios that can end up here: |
---|
| 2618 | + * 1. A buffered read bio, which is not cloned. |
---|
| 2619 | + * 2. A direct I/O read bio, which is cloned. |
---|
| 2620 | + * 3. A (buffered or direct) repair bio, which is not cloned. |
---|
| 2621 | + * |
---|
| 2622 | + * For cloned bios (case 2), we can get the size from |
---|
| 2623 | + * btrfs_io_bio->iter; for non-cloned bios (cases 1 and 3), we can get |
---|
| 2624 | + * it from the bvecs. |
---|
| 2625 | + */ |
---|
| 2626 | + if (bio_flagged(bio, BIO_CLONED)) { |
---|
| 2627 | + if (btrfs_io_bio(bio)->iter.bi_size > blocksize) |
---|
| 2628 | + return true; |
---|
| 2629 | + } else { |
---|
| 2630 | + struct bio_vec *bvec; |
---|
| 2631 | + int i; |
---|
2387 | 2632 | |
---|
2388 | | - btrfs_bio = btrfs_io_bio(bio); |
---|
2389 | | - btrfs_bio->csum = btrfs_bio->csum_inline; |
---|
2390 | | - icsum *= csum_size; |
---|
2391 | | - memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum, |
---|
2392 | | - csum_size); |
---|
| 2633 | + bio_for_each_bvec_all(bvec, bio, i) { |
---|
| 2634 | + len += bvec->bv_len; |
---|
| 2635 | + if (len > blocksize) |
---|
| 2636 | + return true; |
---|
| 2637 | + } |
---|
2393 | 2638 | } |
---|
2394 | | - |
---|
2395 | | - bio_add_page(bio, page, failrec->len, pg_offset); |
---|
2396 | | - |
---|
2397 | | - return bio; |
---|
| 2639 | + return false; |
---|
2398 | 2640 | } |
---|
2399 | 2641 | |
---|
2400 | | -/* |
---|
2401 | | - * this is a generic handler for readpage errors (default |
---|
2402 | | - * readpage_io_failed_hook). if other copies exist, read those and write back |
---|
2403 | | - * good data to the failed position. does not investigate in remapping the |
---|
2404 | | - * failed extent elsewhere, hoping the device will be smart enough to do this as |
---|
2405 | | - * needed |
---|
2406 | | - */ |
---|
2407 | | - |
---|
2408 | | -static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, |
---|
2409 | | - struct page *page, u64 start, u64 end, |
---|
2410 | | - int failed_mirror) |
---|
| 2642 | +blk_status_t btrfs_submit_read_repair(struct inode *inode, |
---|
| 2643 | + struct bio *failed_bio, u64 phy_offset, |
---|
| 2644 | + struct page *page, unsigned int pgoff, |
---|
| 2645 | + u64 start, u64 end, int failed_mirror, |
---|
| 2646 | + submit_bio_hook_t *submit_bio_hook) |
---|
2411 | 2647 | { |
---|
2412 | 2648 | struct io_failure_record *failrec; |
---|
2413 | | - struct inode *inode = page->mapping->host; |
---|
| 2649 | + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
2414 | 2650 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; |
---|
2415 | 2651 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; |
---|
2416 | | - struct bio *bio; |
---|
2417 | | - int read_mode = 0; |
---|
| 2652 | + struct btrfs_io_bio *failed_io_bio = btrfs_io_bio(failed_bio); |
---|
| 2653 | + const int icsum = phy_offset >> inode->i_sb->s_blocksize_bits; |
---|
| 2654 | + bool need_validation; |
---|
| 2655 | + struct bio *repair_bio; |
---|
| 2656 | + struct btrfs_io_bio *repair_io_bio; |
---|
2418 | 2657 | blk_status_t status; |
---|
2419 | | - int ret; |
---|
2420 | | - unsigned failed_bio_pages = bio_pages_all(failed_bio); |
---|
| 2658 | + |
---|
| 2659 | + btrfs_debug(fs_info, |
---|
| 2660 | + "repair read error: read error at %llu", start); |
---|
2421 | 2661 | |
---|
2422 | 2662 | BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); |
---|
2423 | 2663 | |
---|
2424 | | - ret = btrfs_get_io_failure_record(inode, start, end, &failrec); |
---|
2425 | | - if (ret) |
---|
2426 | | - return ret; |
---|
| 2664 | + failrec = btrfs_get_io_failure_record(inode, start, end); |
---|
| 2665 | + if (IS_ERR(failrec)) |
---|
| 2666 | + return errno_to_blk_status(PTR_ERR(failrec)); |
---|
2427 | 2667 | |
---|
2428 | | - if (!btrfs_check_repairable(inode, failed_bio_pages, failrec, |
---|
| 2668 | + need_validation = btrfs_io_needs_validation(inode, failed_bio); |
---|
| 2669 | + |
---|
| 2670 | + if (!btrfs_check_repairable(inode, need_validation, failrec, |
---|
2429 | 2671 | failed_mirror)) { |
---|
2430 | 2672 | free_io_failure(failure_tree, tree, failrec); |
---|
2431 | | - return -EIO; |
---|
| 2673 | + return BLK_STS_IOERR; |
---|
2432 | 2674 | } |
---|
2433 | 2675 | |
---|
2434 | | - if (failed_bio_pages > 1) |
---|
2435 | | - read_mode |= REQ_FAILFAST_DEV; |
---|
| 2676 | + repair_bio = btrfs_io_bio_alloc(1); |
---|
| 2677 | + repair_io_bio = btrfs_io_bio(repair_bio); |
---|
| 2678 | + repair_bio->bi_opf = REQ_OP_READ; |
---|
| 2679 | + if (need_validation) |
---|
| 2680 | + repair_bio->bi_opf |= REQ_FAILFAST_DEV; |
---|
| 2681 | + repair_bio->bi_end_io = failed_bio->bi_end_io; |
---|
| 2682 | + repair_bio->bi_iter.bi_sector = failrec->logical >> 9; |
---|
| 2683 | + repair_bio->bi_private = failed_bio->bi_private; |
---|
2436 | 2684 | |
---|
2437 | | - phy_offset >>= inode->i_sb->s_blocksize_bits; |
---|
2438 | | - bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, |
---|
2439 | | - start - page_offset(page), |
---|
2440 | | - (int)phy_offset, failed_bio->bi_end_io, |
---|
2441 | | - NULL); |
---|
2442 | | - bio->bi_opf = REQ_OP_READ | read_mode; |
---|
| 2685 | + if (failed_io_bio->csum) { |
---|
| 2686 | + const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); |
---|
| 2687 | + |
---|
| 2688 | + repair_io_bio->csum = repair_io_bio->csum_inline; |
---|
| 2689 | + memcpy(repair_io_bio->csum, |
---|
| 2690 | + failed_io_bio->csum + csum_size * icsum, csum_size); |
---|
| 2691 | + } |
---|
| 2692 | + |
---|
| 2693 | + bio_add_page(repair_bio, page, failrec->len, pgoff); |
---|
| 2694 | + repair_io_bio->logical = failrec->start; |
---|
| 2695 | + repair_io_bio->iter = repair_bio->bi_iter; |
---|
2443 | 2696 | |
---|
2444 | 2697 | btrfs_debug(btrfs_sb(inode->i_sb), |
---|
2445 | | - "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d", |
---|
2446 | | - read_mode, failrec->this_mirror, failrec->in_validation); |
---|
| 2698 | +"repair read error: submitting new read to mirror %d, in_validation=%d", |
---|
| 2699 | + failrec->this_mirror, failrec->in_validation); |
---|
2447 | 2700 | |
---|
2448 | | - status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror, |
---|
2449 | | - failrec->bio_flags, 0); |
---|
| 2701 | + status = submit_bio_hook(inode, repair_bio, failrec->this_mirror, |
---|
| 2702 | + failrec->bio_flags); |
---|
2450 | 2703 | if (status) { |
---|
2451 | 2704 | free_io_failure(failure_tree, tree, failrec); |
---|
2452 | | - bio_put(bio); |
---|
2453 | | - ret = blk_status_to_errno(status); |
---|
| 2705 | + bio_put(repair_bio); |
---|
2454 | 2706 | } |
---|
2455 | | - |
---|
2456 | | - return ret; |
---|
| 2707 | + return status; |
---|
2457 | 2708 | } |
---|
2458 | 2709 | |
---|
2459 | 2710 | /* lots and lots of room for performance fixes in the end_bio funcs */ |
---|
.. | .. |
---|
2461 | 2712 | void end_extent_writepage(struct page *page, int err, u64 start, u64 end) |
---|
2462 | 2713 | { |
---|
2463 | 2714 | int uptodate = (err == 0); |
---|
2464 | | - struct extent_io_tree *tree; |
---|
2465 | 2715 | int ret = 0; |
---|
2466 | 2716 | |
---|
2467 | | - tree = &BTRFS_I(page->mapping->host)->io_tree; |
---|
2468 | | - |
---|
2469 | | - if (tree->ops && tree->ops->writepage_end_io_hook) |
---|
2470 | | - tree->ops->writepage_end_io_hook(page, start, end, NULL, |
---|
2471 | | - uptodate); |
---|
| 2717 | + btrfs_writepage_endio_finish_ordered(page, start, end, uptodate); |
---|
2472 | 2718 | |
---|
2473 | 2719 | if (!uptodate) { |
---|
2474 | 2720 | ClearPageUptodate(page); |
---|
.. | .. |
---|
2493 | 2739 | struct bio_vec *bvec; |
---|
2494 | 2740 | u64 start; |
---|
2495 | 2741 | u64 end; |
---|
2496 | | - int i; |
---|
| 2742 | + struct bvec_iter_all iter_all; |
---|
2497 | 2743 | |
---|
2498 | 2744 | ASSERT(!bio_flagged(bio, BIO_CLONED)); |
---|
2499 | | - bio_for_each_segment_all(bvec, bio, i) { |
---|
| 2745 | + bio_for_each_segment_all(bvec, bio, iter_all) { |
---|
2500 | 2746 | struct page *page = bvec->bv_page; |
---|
2501 | 2747 | struct inode *inode = page->mapping->host; |
---|
2502 | 2748 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
.. | .. |
---|
2564 | 2810 | u64 extent_len = 0; |
---|
2565 | 2811 | int mirror; |
---|
2566 | 2812 | int ret; |
---|
2567 | | - int i; |
---|
| 2813 | + struct bvec_iter_all iter_all; |
---|
2568 | 2814 | |
---|
2569 | 2815 | ASSERT(!bio_flagged(bio, BIO_CLONED)); |
---|
2570 | | - bio_for_each_segment_all(bvec, bio, i) { |
---|
| 2816 | + bio_for_each_segment_all(bvec, bio, iter_all) { |
---|
2571 | 2817 | struct page *page = bvec->bv_page; |
---|
2572 | 2818 | struct inode *inode = page->mapping->host; |
---|
2573 | 2819 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
.. | .. |
---|
2600 | 2846 | len = bvec->bv_len; |
---|
2601 | 2847 | |
---|
2602 | 2848 | mirror = io_bio->mirror_num; |
---|
2603 | | - if (likely(uptodate && tree->ops)) { |
---|
2604 | | - ret = tree->ops->readpage_end_io_hook(io_bio, offset, |
---|
2605 | | - page, start, end, |
---|
2606 | | - mirror); |
---|
| 2849 | + if (likely(uptodate)) { |
---|
| 2850 | + if (is_data_inode(inode)) |
---|
| 2851 | + ret = btrfs_verify_data_csum(io_bio, offset, page, |
---|
| 2852 | + start, end, mirror); |
---|
| 2853 | + else |
---|
| 2854 | + ret = btrfs_validate_metadata_buffer(io_bio, |
---|
| 2855 | + offset, page, start, end, mirror); |
---|
2607 | 2856 | if (ret) |
---|
2608 | 2857 | uptodate = 0; |
---|
2609 | 2858 | else |
---|
.. | .. |
---|
2616 | 2865 | if (likely(uptodate)) |
---|
2617 | 2866 | goto readpage_ok; |
---|
2618 | 2867 | |
---|
2619 | | - if (tree->ops) { |
---|
2620 | | - ret = tree->ops->readpage_io_failed_hook(page, mirror); |
---|
2621 | | - if (ret == -EAGAIN) { |
---|
2622 | | - /* |
---|
2623 | | - * Data inode's readpage_io_failed_hook() always |
---|
2624 | | - * returns -EAGAIN. |
---|
2625 | | - * |
---|
2626 | | - * The generic bio_readpage_error handles errors |
---|
2627 | | - * the following way: If possible, new read |
---|
2628 | | - * requests are created and submitted and will |
---|
2629 | | - * end up in end_bio_extent_readpage as well (if |
---|
2630 | | - * we're lucky, not in the !uptodate case). In |
---|
2631 | | - * that case it returns 0 and we just go on with |
---|
2632 | | - * the next page in our bio. If it can't handle |
---|
2633 | | - * the error it will return -EIO and we remain |
---|
2634 | | - * responsible for that page. |
---|
2635 | | - */ |
---|
2636 | | - ret = bio_readpage_error(bio, offset, page, |
---|
2637 | | - start, end, mirror); |
---|
2638 | | - if (ret == 0) { |
---|
2639 | | - uptodate = !bio->bi_status; |
---|
2640 | | - offset += len; |
---|
2641 | | - continue; |
---|
2642 | | - } |
---|
2643 | | - } |
---|
| 2868 | + if (is_data_inode(inode)) { |
---|
2644 | 2869 | |
---|
2645 | 2870 | /* |
---|
2646 | | - * metadata's readpage_io_failed_hook() always returns |
---|
2647 | | - * -EIO and fixes nothing. -EIO is also returned if |
---|
2648 | | - * data inode error could not be fixed. |
---|
| 2871 | + * The generic bio_readpage_error handles errors the |
---|
| 2872 | + * following way: If possible, new read requests are |
---|
| 2873 | + * created and submitted and will end up in |
---|
| 2874 | + * end_bio_extent_readpage as well (if we're lucky, |
---|
| 2875 | + * not in the !uptodate case). In that case it returns |
---|
| 2876 | + * 0 and we just go on with the next page in our bio. |
---|
| 2877 | + * If it can't handle the error it will return -EIO and |
---|
| 2878 | + * we remain responsible for that page. |
---|
2649 | 2879 | */ |
---|
2650 | | - ASSERT(ret == -EIO); |
---|
| 2880 | + if (!btrfs_submit_read_repair(inode, bio, offset, page, |
---|
| 2881 | + start - page_offset(page), |
---|
| 2882 | + start, end, mirror, |
---|
| 2883 | + btrfs_submit_data_bio)) { |
---|
| 2884 | + uptodate = !bio->bi_status; |
---|
| 2885 | + offset += len; |
---|
| 2886 | + continue; |
---|
| 2887 | + } |
---|
| 2888 | + } else { |
---|
| 2889 | + struct extent_buffer *eb; |
---|
| 2890 | + |
---|
| 2891 | + eb = (struct extent_buffer *)page->private; |
---|
| 2892 | + set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); |
---|
| 2893 | + eb->read_mirror = mirror; |
---|
| 2894 | + atomic_dec(&eb->io_pages); |
---|
| 2895 | + if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, |
---|
| 2896 | + &eb->bflags)) |
---|
| 2897 | + btree_readahead_hook(eb, -EIO); |
---|
2651 | 2898 | } |
---|
2652 | 2899 | readpage_ok: |
---|
2653 | 2900 | if (likely(uptodate)) { |
---|
.. | .. |
---|
2656 | 2903 | unsigned off; |
---|
2657 | 2904 | |
---|
2658 | 2905 | /* Zero out the end if this page straddles i_size */ |
---|
2659 | | - off = i_size & (PAGE_SIZE-1); |
---|
| 2906 | + off = offset_in_page(i_size); |
---|
2660 | 2907 | if (page->index == end_index && off) |
---|
2661 | 2908 | zero_user_segment(page, off, PAGE_SIZE); |
---|
2662 | 2909 | SetPageUptodate(page); |
---|
.. | .. |
---|
2693 | 2940 | if (extent_len) |
---|
2694 | 2941 | endio_readpage_release_extent(tree, extent_start, extent_len, |
---|
2695 | 2942 | uptodate); |
---|
2696 | | - if (io_bio->end_io) |
---|
2697 | | - io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status)); |
---|
| 2943 | + btrfs_io_bio_free_csum(io_bio); |
---|
2698 | 2944 | bio_put(bio); |
---|
2699 | 2945 | } |
---|
2700 | 2946 | |
---|
.. | .. |
---|
2713 | 2959 | * never fail. We're returning a bio right now but you can call btrfs_io_bio |
---|
2714 | 2960 | * for the appropriate container_of magic |
---|
2715 | 2961 | */ |
---|
2716 | | -struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte) |
---|
| 2962 | +struct bio *btrfs_bio_alloc(u64 first_byte) |
---|
2717 | 2963 | { |
---|
2718 | 2964 | struct bio *bio; |
---|
2719 | 2965 | |
---|
2720 | 2966 | bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset); |
---|
2721 | | - bio_set_dev(bio, bdev); |
---|
2722 | 2967 | bio->bi_iter.bi_sector = first_byte >> 9; |
---|
2723 | 2968 | btrfs_io_bio_init(btrfs_io_bio(bio)); |
---|
2724 | 2969 | return bio; |
---|
.. | .. |
---|
2766 | 3011 | |
---|
2767 | 3012 | /* |
---|
2768 | 3013 | * @opf: bio REQ_OP_* and REQ_* flags as one value |
---|
2769 | | - * @tree: tree so we can call our merge_bio hook |
---|
2770 | 3014 | * @wbc: optional writeback control for io accounting |
---|
2771 | 3015 | * @page: page to add to the bio |
---|
2772 | 3016 | * @pg_offset: offset of the new bio or to check whether we are adding |
---|
2773 | 3017 | * a contiguous page to the previous one |
---|
2774 | 3018 | * @size: portion of page that we want to write |
---|
2775 | 3019 | * @offset: starting offset in the page |
---|
2776 | | - * @bdev: attach newly created bios to this bdev |
---|
2777 | 3020 | * @bio_ret: must be valid pointer, newly allocated bio will be stored there |
---|
2778 | 3021 | * @end_io_func: end_io callback for new bio |
---|
2779 | 3022 | * @mirror_num: desired mirror to read/write |
---|
2780 | 3023 | * @prev_bio_flags: flags of previous bio to see if we can merge the current one |
---|
2781 | 3024 | * @bio_flags: flags of the current bio to see if we can merge them |
---|
2782 | 3025 | */ |
---|
2783 | | -static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree, |
---|
| 3026 | +static int submit_extent_page(unsigned int opf, |
---|
2784 | 3027 | struct writeback_control *wbc, |
---|
2785 | 3028 | struct page *page, u64 offset, |
---|
2786 | 3029 | size_t size, unsigned long pg_offset, |
---|
2787 | | - struct block_device *bdev, |
---|
2788 | 3030 | struct bio **bio_ret, |
---|
2789 | 3031 | bio_end_io_t end_io_func, |
---|
2790 | 3032 | int mirror_num, |
---|
.. | .. |
---|
2796 | 3038 | struct bio *bio; |
---|
2797 | 3039 | size_t page_size = min_t(size_t, size, PAGE_SIZE); |
---|
2798 | 3040 | sector_t sector = offset >> 9; |
---|
| 3041 | + struct extent_io_tree *tree = &BTRFS_I(page->mapping->host)->io_tree; |
---|
2799 | 3042 | |
---|
2800 | 3043 | ASSERT(bio_ret); |
---|
2801 | 3044 | |
---|
.. | .. |
---|
2809 | 3052 | else |
---|
2810 | 3053 | contig = bio_end_sector(bio) == sector; |
---|
2811 | 3054 | |
---|
2812 | | - if (tree->ops && btrfs_merge_bio_hook(page, offset, page_size, |
---|
2813 | | - bio, bio_flags)) |
---|
| 3055 | + if (btrfs_bio_fits_in_stripe(page, page_size, bio, bio_flags)) |
---|
2814 | 3056 | can_merge = false; |
---|
2815 | 3057 | |
---|
2816 | 3058 | if (prev_bio_flags != bio_flags || !contig || !can_merge || |
---|
.. | .. |
---|
2824 | 3066 | bio = NULL; |
---|
2825 | 3067 | } else { |
---|
2826 | 3068 | if (wbc) |
---|
2827 | | - wbc_account_io(wbc, page, page_size); |
---|
| 3069 | + wbc_account_cgroup_owner(wbc, page, page_size); |
---|
2828 | 3070 | return 0; |
---|
2829 | 3071 | } |
---|
2830 | 3072 | } |
---|
2831 | 3073 | |
---|
2832 | | - bio = btrfs_bio_alloc(bdev, offset); |
---|
| 3074 | + bio = btrfs_bio_alloc(offset); |
---|
2833 | 3075 | bio_add_page(bio, page, page_size, pg_offset); |
---|
2834 | 3076 | bio->bi_end_io = end_io_func; |
---|
2835 | 3077 | bio->bi_private = tree; |
---|
2836 | 3078 | bio->bi_write_hint = page->mapping->host->i_write_hint; |
---|
2837 | 3079 | bio->bi_opf = opf; |
---|
2838 | 3080 | if (wbc) { |
---|
| 3081 | + struct block_device *bdev; |
---|
| 3082 | + |
---|
| 3083 | + bdev = BTRFS_I(page->mapping->host)->root->fs_info->fs_devices->latest_bdev; |
---|
| 3084 | + bio_set_dev(bio, bdev); |
---|
2839 | 3085 | wbc_init_bio(wbc, bio); |
---|
2840 | | - wbc_account_io(wbc, page, page_size); |
---|
| 3086 | + wbc_account_cgroup_owner(wbc, page, page_size); |
---|
2841 | 3087 | } |
---|
2842 | 3088 | |
---|
2843 | 3089 | *bio_ret = bio; |
---|
.. | .. |
---|
2848 | 3094 | static void attach_extent_buffer_page(struct extent_buffer *eb, |
---|
2849 | 3095 | struct page *page) |
---|
2850 | 3096 | { |
---|
2851 | | - if (!PagePrivate(page)) { |
---|
2852 | | - SetPagePrivate(page); |
---|
2853 | | - get_page(page); |
---|
2854 | | - set_page_private(page, (unsigned long)eb); |
---|
2855 | | - } else { |
---|
| 3097 | + if (!PagePrivate(page)) |
---|
| 3098 | + attach_page_private(page, eb); |
---|
| 3099 | + else |
---|
2856 | 3100 | WARN_ON(page->private != (unsigned long)eb); |
---|
2857 | | - } |
---|
2858 | 3101 | } |
---|
2859 | 3102 | |
---|
2860 | 3103 | void set_page_extent_mapped(struct page *page) |
---|
2861 | 3104 | { |
---|
2862 | | - if (!PagePrivate(page)) { |
---|
2863 | | - SetPagePrivate(page); |
---|
2864 | | - get_page(page); |
---|
2865 | | - set_page_private(page, EXTENT_PAGE_PRIVATE); |
---|
2866 | | - } |
---|
| 3105 | + if (!PagePrivate(page)) |
---|
| 3106 | + attach_page_private(page, (void *)EXTENT_PAGE_PRIVATE); |
---|
2867 | 3107 | } |
---|
2868 | 3108 | |
---|
2869 | 3109 | static struct extent_map * |
---|
2870 | 3110 | __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset, |
---|
2871 | | - u64 start, u64 len, get_extent_t *get_extent, |
---|
2872 | | - struct extent_map **em_cached) |
---|
| 3111 | + u64 start, u64 len, struct extent_map **em_cached) |
---|
2873 | 3112 | { |
---|
2874 | 3113 | struct extent_map *em; |
---|
2875 | 3114 | |
---|
.. | .. |
---|
2885 | 3124 | *em_cached = NULL; |
---|
2886 | 3125 | } |
---|
2887 | 3126 | |
---|
2888 | | - em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0); |
---|
| 3127 | + em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, start, len); |
---|
2889 | 3128 | if (em_cached && !IS_ERR_OR_NULL(em)) { |
---|
2890 | 3129 | BUG_ON(*em_cached); |
---|
2891 | 3130 | refcount_inc(&em->refs); |
---|
.. | .. |
---|
2900 | 3139 | * XXX JDM: This needs looking at to ensure proper page locking |
---|
2901 | 3140 | * return 0 on success, otherwise return error |
---|
2902 | 3141 | */ |
---|
2903 | | -static int __do_readpage(struct extent_io_tree *tree, |
---|
2904 | | - struct page *page, |
---|
2905 | | - get_extent_t *get_extent, |
---|
2906 | | - struct extent_map **em_cached, |
---|
2907 | | - struct bio **bio, int mirror_num, |
---|
2908 | | - unsigned long *bio_flags, unsigned int read_flags, |
---|
2909 | | - u64 *prev_em_start) |
---|
| 3142 | +int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, |
---|
| 3143 | + struct bio **bio, unsigned long *bio_flags, |
---|
| 3144 | + unsigned int read_flags, u64 *prev_em_start) |
---|
2910 | 3145 | { |
---|
2911 | 3146 | struct inode *inode = page->mapping->host; |
---|
2912 | 3147 | u64 start = page_offset(page); |
---|
.. | .. |
---|
2917 | 3152 | u64 block_start; |
---|
2918 | 3153 | u64 cur_end; |
---|
2919 | 3154 | struct extent_map *em; |
---|
2920 | | - struct block_device *bdev; |
---|
2921 | 3155 | int ret = 0; |
---|
2922 | 3156 | int nr = 0; |
---|
2923 | 3157 | size_t pg_offset = 0; |
---|
.. | .. |
---|
2925 | 3159 | size_t disk_io_size; |
---|
2926 | 3160 | size_t blocksize = inode->i_sb->s_blocksize; |
---|
2927 | 3161 | unsigned long this_bio_flag = 0; |
---|
| 3162 | + struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; |
---|
2928 | 3163 | |
---|
2929 | 3164 | set_page_extent_mapped(page); |
---|
2930 | 3165 | |
---|
.. | .. |
---|
2938 | 3173 | |
---|
2939 | 3174 | if (page->index == last_byte >> PAGE_SHIFT) { |
---|
2940 | 3175 | char *userpage; |
---|
2941 | | - size_t zero_offset = last_byte & (PAGE_SIZE - 1); |
---|
| 3176 | + size_t zero_offset = offset_in_page(last_byte); |
---|
2942 | 3177 | |
---|
2943 | 3178 | if (zero_offset) { |
---|
2944 | 3179 | iosize = PAGE_SIZE - zero_offset; |
---|
.. | .. |
---|
2968 | 3203 | break; |
---|
2969 | 3204 | } |
---|
2970 | 3205 | em = __get_extent_map(inode, page, pg_offset, cur, |
---|
2971 | | - end - cur + 1, get_extent, em_cached); |
---|
| 3206 | + end - cur + 1, em_cached); |
---|
2972 | 3207 | if (IS_ERR_OR_NULL(em)) { |
---|
2973 | 3208 | SetPageError(page); |
---|
2974 | 3209 | unlock_extent(tree, cur, end); |
---|
.. | .. |
---|
2994 | 3229 | offset = em->block_start + extent_offset; |
---|
2995 | 3230 | disk_io_size = iosize; |
---|
2996 | 3231 | } |
---|
2997 | | - bdev = em->bdev; |
---|
2998 | 3232 | block_start = em->block_start; |
---|
2999 | 3233 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) |
---|
3000 | 3234 | block_start = EXTENT_MAP_HOLE; |
---|
3001 | 3235 | |
---|
3002 | 3236 | /* |
---|
3003 | 3237 | * If we have a file range that points to a compressed extent |
---|
3004 | | - * and it's followed by a consecutive file range that points to |
---|
| 3238 | + * and it's followed by a consecutive file range that points |
---|
3005 | 3239 | * to the same compressed extent (possibly with a different |
---|
3006 | 3240 | * offset and/or length, so it either points to the whole extent |
---|
3007 | 3241 | * or only part of it), we must make sure we do not submit a |
---|
.. | .. |
---|
3082 | 3316 | continue; |
---|
3083 | 3317 | } |
---|
3084 | 3318 | |
---|
3085 | | - ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL, |
---|
| 3319 | + ret = submit_extent_page(REQ_OP_READ | read_flags, NULL, |
---|
3086 | 3320 | page, offset, disk_io_size, |
---|
3087 | | - pg_offset, bdev, bio, |
---|
3088 | | - end_bio_extent_readpage, mirror_num, |
---|
| 3321 | + pg_offset, bio, |
---|
| 3322 | + end_bio_extent_readpage, 0, |
---|
3089 | 3323 | *bio_flags, |
---|
3090 | 3324 | this_bio_flag, |
---|
3091 | 3325 | force_bio_submit); |
---|
.. | .. |
---|
3109 | 3343 | return ret; |
---|
3110 | 3344 | } |
---|
3111 | 3345 | |
---|
3112 | | -static inline void __do_contiguous_readpages(struct extent_io_tree *tree, |
---|
3113 | | - struct page *pages[], int nr_pages, |
---|
| 3346 | +static inline void contiguous_readpages(struct page *pages[], int nr_pages, |
---|
3114 | 3347 | u64 start, u64 end, |
---|
3115 | 3348 | struct extent_map **em_cached, |
---|
3116 | 3349 | struct bio **bio, |
---|
3117 | 3350 | unsigned long *bio_flags, |
---|
3118 | 3351 | u64 *prev_em_start) |
---|
3119 | 3352 | { |
---|
3120 | | - struct inode *inode; |
---|
3121 | | - struct btrfs_ordered_extent *ordered; |
---|
| 3353 | + struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host); |
---|
3122 | 3354 | int index; |
---|
3123 | 3355 | |
---|
3124 | | - inode = pages[0]->mapping->host; |
---|
3125 | | - while (1) { |
---|
3126 | | - lock_extent(tree, start, end); |
---|
3127 | | - ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start, |
---|
3128 | | - end - start + 1); |
---|
3129 | | - if (!ordered) |
---|
3130 | | - break; |
---|
3131 | | - unlock_extent(tree, start, end); |
---|
3132 | | - btrfs_start_ordered_extent(inode, ordered, 1); |
---|
3133 | | - btrfs_put_ordered_extent(ordered); |
---|
3134 | | - } |
---|
| 3356 | + btrfs_lock_and_flush_ordered_range(inode, start, end, NULL); |
---|
3135 | 3357 | |
---|
3136 | 3358 | for (index = 0; index < nr_pages; index++) { |
---|
3137 | | - __do_readpage(tree, pages[index], btrfs_get_extent, em_cached, |
---|
3138 | | - bio, 0, bio_flags, REQ_RAHEAD, prev_em_start); |
---|
| 3359 | + btrfs_do_readpage(pages[index], em_cached, bio, bio_flags, |
---|
| 3360 | + REQ_RAHEAD, prev_em_start); |
---|
3139 | 3361 | put_page(pages[index]); |
---|
3140 | 3362 | } |
---|
3141 | | -} |
---|
3142 | | - |
---|
3143 | | -static void __extent_readpages(struct extent_io_tree *tree, |
---|
3144 | | - struct page *pages[], |
---|
3145 | | - int nr_pages, |
---|
3146 | | - struct extent_map **em_cached, |
---|
3147 | | - struct bio **bio, unsigned long *bio_flags, |
---|
3148 | | - u64 *prev_em_start) |
---|
3149 | | -{ |
---|
3150 | | - u64 start = 0; |
---|
3151 | | - u64 end = 0; |
---|
3152 | | - u64 page_start; |
---|
3153 | | - int index; |
---|
3154 | | - int first_index = 0; |
---|
3155 | | - |
---|
3156 | | - for (index = 0; index < nr_pages; index++) { |
---|
3157 | | - page_start = page_offset(pages[index]); |
---|
3158 | | - if (!end) { |
---|
3159 | | - start = page_start; |
---|
3160 | | - end = start + PAGE_SIZE - 1; |
---|
3161 | | - first_index = index; |
---|
3162 | | - } else if (end + 1 == page_start) { |
---|
3163 | | - end += PAGE_SIZE; |
---|
3164 | | - } else { |
---|
3165 | | - __do_contiguous_readpages(tree, &pages[first_index], |
---|
3166 | | - index - first_index, start, |
---|
3167 | | - end, em_cached, |
---|
3168 | | - bio, bio_flags, |
---|
3169 | | - prev_em_start); |
---|
3170 | | - start = page_start; |
---|
3171 | | - end = start + PAGE_SIZE - 1; |
---|
3172 | | - first_index = index; |
---|
3173 | | - } |
---|
3174 | | - } |
---|
3175 | | - |
---|
3176 | | - if (end) |
---|
3177 | | - __do_contiguous_readpages(tree, &pages[first_index], |
---|
3178 | | - index - first_index, start, |
---|
3179 | | - end, em_cached, bio, |
---|
3180 | | - bio_flags, prev_em_start); |
---|
3181 | | -} |
---|
3182 | | - |
---|
3183 | | -static int __extent_read_full_page(struct extent_io_tree *tree, |
---|
3184 | | - struct page *page, |
---|
3185 | | - get_extent_t *get_extent, |
---|
3186 | | - struct bio **bio, int mirror_num, |
---|
3187 | | - unsigned long *bio_flags, |
---|
3188 | | - unsigned int read_flags) |
---|
3189 | | -{ |
---|
3190 | | - struct inode *inode = page->mapping->host; |
---|
3191 | | - struct btrfs_ordered_extent *ordered; |
---|
3192 | | - u64 start = page_offset(page); |
---|
3193 | | - u64 end = start + PAGE_SIZE - 1; |
---|
3194 | | - int ret; |
---|
3195 | | - |
---|
3196 | | - while (1) { |
---|
3197 | | - lock_extent(tree, start, end); |
---|
3198 | | - ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start, |
---|
3199 | | - PAGE_SIZE); |
---|
3200 | | - if (!ordered) |
---|
3201 | | - break; |
---|
3202 | | - unlock_extent(tree, start, end); |
---|
3203 | | - btrfs_start_ordered_extent(inode, ordered, 1); |
---|
3204 | | - btrfs_put_ordered_extent(ordered); |
---|
3205 | | - } |
---|
3206 | | - |
---|
3207 | | - ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, |
---|
3208 | | - bio_flags, read_flags, NULL); |
---|
3209 | | - return ret; |
---|
3210 | | -} |
---|
3211 | | - |
---|
3212 | | -int extent_read_full_page(struct extent_io_tree *tree, struct page *page, |
---|
3213 | | - get_extent_t *get_extent, int mirror_num) |
---|
3214 | | -{ |
---|
3215 | | - struct bio *bio = NULL; |
---|
3216 | | - unsigned long bio_flags = 0; |
---|
3217 | | - int ret; |
---|
3218 | | - |
---|
3219 | | - ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num, |
---|
3220 | | - &bio_flags, 0); |
---|
3221 | | - if (bio) |
---|
3222 | | - ret = submit_one_bio(bio, mirror_num, bio_flags); |
---|
3223 | | - return ret; |
---|
3224 | 3363 | } |
---|
3225 | 3364 | |
---|
3226 | 3365 | static void update_nr_written(struct writeback_control *wbc, |
---|
.. | .. |
---|
3239 | 3378 | * This returns 0 if all went well (page still locked) |
---|
3240 | 3379 | * This returns < 0 if there were errors (page still locked) |
---|
3241 | 3380 | */ |
---|
3242 | | -static noinline_for_stack int writepage_delalloc(struct inode *inode, |
---|
3243 | | - struct page *page, struct writeback_control *wbc, |
---|
3244 | | - struct extent_page_data *epd, |
---|
3245 | | - u64 delalloc_start, |
---|
3246 | | - unsigned long *nr_written) |
---|
| 3381 | +static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, |
---|
| 3382 | + struct page *page, struct writeback_control *wbc, |
---|
| 3383 | + u64 delalloc_start, unsigned long *nr_written) |
---|
3247 | 3384 | { |
---|
3248 | | - struct extent_io_tree *tree = epd->tree; |
---|
3249 | 3385 | u64 page_end = delalloc_start + PAGE_SIZE - 1; |
---|
3250 | | - u64 nr_delalloc; |
---|
| 3386 | + bool found; |
---|
3251 | 3387 | u64 delalloc_to_write = 0; |
---|
3252 | 3388 | u64 delalloc_end = 0; |
---|
3253 | 3389 | int ret; |
---|
3254 | 3390 | int page_started = 0; |
---|
3255 | 3391 | |
---|
3256 | | - if (epd->extent_locked) |
---|
3257 | | - return 0; |
---|
3258 | 3392 | |
---|
3259 | 3393 | while (delalloc_end < page_end) { |
---|
3260 | | - nr_delalloc = find_lock_delalloc_range(inode, tree, |
---|
3261 | | - page, |
---|
| 3394 | + found = find_lock_delalloc_range(&inode->vfs_inode, page, |
---|
3262 | 3395 | &delalloc_start, |
---|
3263 | | - &delalloc_end, |
---|
3264 | | - BTRFS_MAX_EXTENT_SIZE); |
---|
3265 | | - if (nr_delalloc == 0) { |
---|
| 3396 | + &delalloc_end); |
---|
| 3397 | + if (!found) { |
---|
3266 | 3398 | delalloc_start = delalloc_end + 1; |
---|
3267 | 3399 | continue; |
---|
3268 | 3400 | } |
---|
3269 | 3401 | ret = btrfs_run_delalloc_range(inode, page, delalloc_start, |
---|
3270 | 3402 | delalloc_end, &page_started, nr_written, wbc); |
---|
3271 | | - /* File system has been set read-only */ |
---|
3272 | 3403 | if (ret) { |
---|
3273 | 3404 | SetPageError(page); |
---|
3274 | 3405 | /* |
---|
.. | .. |
---|
3277 | 3408 | * started, so we don't want to return > 0 unless |
---|
3278 | 3409 | * things are going well. |
---|
3279 | 3410 | */ |
---|
3280 | | - ret = ret < 0 ? ret : -EIO; |
---|
3281 | | - goto done; |
---|
| 3411 | + return ret < 0 ? ret : -EIO; |
---|
3282 | 3412 | } |
---|
3283 | 3413 | /* |
---|
3284 | 3414 | * delalloc_end is already one less than the total length, so |
---|
.. | .. |
---|
3310 | 3440 | return 1; |
---|
3311 | 3441 | } |
---|
3312 | 3442 | |
---|
3313 | | - ret = 0; |
---|
3314 | | - |
---|
3315 | | -done: |
---|
3316 | | - return ret; |
---|
| 3443 | + return 0; |
---|
3317 | 3444 | } |
---|
3318 | 3445 | |
---|
3319 | 3446 | /* |
---|
.. | .. |
---|
3324 | 3451 | * 0 if all went well (page still locked) |
---|
3325 | 3452 | * < 0 if there were errors (page still locked) |
---|
3326 | 3453 | */ |
---|
3327 | | -static noinline_for_stack int __extent_writepage_io(struct inode *inode, |
---|
| 3454 | +static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, |
---|
3328 | 3455 | struct page *page, |
---|
3329 | 3456 | struct writeback_control *wbc, |
---|
3330 | 3457 | struct extent_page_data *epd, |
---|
3331 | 3458 | loff_t i_size, |
---|
3332 | 3459 | unsigned long nr_written, |
---|
3333 | | - unsigned int write_flags, int *nr_ret) |
---|
| 3460 | + int *nr_ret) |
---|
3334 | 3461 | { |
---|
3335 | | - struct extent_io_tree *tree = epd->tree; |
---|
| 3462 | + struct extent_io_tree *tree = &inode->io_tree; |
---|
3336 | 3463 | u64 start = page_offset(page); |
---|
3337 | 3464 | u64 page_end = start + PAGE_SIZE - 1; |
---|
3338 | 3465 | u64 end; |
---|
.. | .. |
---|
3341 | 3468 | u64 block_start; |
---|
3342 | 3469 | u64 iosize; |
---|
3343 | 3470 | struct extent_map *em; |
---|
3344 | | - struct block_device *bdev; |
---|
3345 | 3471 | size_t pg_offset = 0; |
---|
3346 | 3472 | size_t blocksize; |
---|
3347 | 3473 | int ret = 0; |
---|
3348 | 3474 | int nr = 0; |
---|
| 3475 | + const unsigned int write_flags = wbc_to_write_flags(wbc); |
---|
3349 | 3476 | bool compressed; |
---|
3350 | 3477 | |
---|
3351 | | - if (tree->ops && tree->ops->writepage_start_hook) { |
---|
3352 | | - ret = tree->ops->writepage_start_hook(page, start, |
---|
3353 | | - page_end); |
---|
3354 | | - if (ret) { |
---|
3355 | | - /* Fixup worker will requeue */ |
---|
3356 | | - if (ret == -EBUSY) |
---|
3357 | | - wbc->pages_skipped++; |
---|
3358 | | - else |
---|
3359 | | - redirty_page_for_writepage(wbc, page); |
---|
3360 | | - |
---|
3361 | | - update_nr_written(wbc, nr_written); |
---|
3362 | | - unlock_page(page); |
---|
3363 | | - return 1; |
---|
3364 | | - } |
---|
| 3478 | + ret = btrfs_writepage_cow_fixup(page, start, page_end); |
---|
| 3479 | + if (ret) { |
---|
| 3480 | + /* Fixup worker will requeue */ |
---|
| 3481 | + redirty_page_for_writepage(wbc, page); |
---|
| 3482 | + update_nr_written(wbc, nr_written); |
---|
| 3483 | + unlock_page(page); |
---|
| 3484 | + return 1; |
---|
3365 | 3485 | } |
---|
3366 | 3486 | |
---|
3367 | 3487 | /* |
---|
.. | .. |
---|
3371 | 3491 | update_nr_written(wbc, nr_written + 1); |
---|
3372 | 3492 | |
---|
3373 | 3493 | end = page_end; |
---|
3374 | | - if (i_size <= start) { |
---|
3375 | | - if (tree->ops && tree->ops->writepage_end_io_hook) |
---|
3376 | | - tree->ops->writepage_end_io_hook(page, start, |
---|
3377 | | - page_end, NULL, 1); |
---|
3378 | | - goto done; |
---|
3379 | | - } |
---|
3380 | | - |
---|
3381 | | - blocksize = inode->i_sb->s_blocksize; |
---|
| 3494 | + blocksize = inode->vfs_inode.i_sb->s_blocksize; |
---|
3382 | 3495 | |
---|
3383 | 3496 | while (cur <= end) { |
---|
3384 | 3497 | u64 em_end; |
---|
3385 | 3498 | u64 offset; |
---|
3386 | 3499 | |
---|
3387 | 3500 | if (cur >= i_size) { |
---|
3388 | | - if (tree->ops && tree->ops->writepage_end_io_hook) |
---|
3389 | | - tree->ops->writepage_end_io_hook(page, cur, |
---|
3390 | | - page_end, NULL, 1); |
---|
| 3501 | + btrfs_writepage_endio_finish_ordered(page, cur, |
---|
| 3502 | + page_end, 1); |
---|
3391 | 3503 | break; |
---|
3392 | 3504 | } |
---|
3393 | | - em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, cur, |
---|
3394 | | - end - cur + 1, 1); |
---|
| 3505 | + em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1); |
---|
3395 | 3506 | if (IS_ERR_OR_NULL(em)) { |
---|
3396 | 3507 | SetPageError(page); |
---|
3397 | 3508 | ret = PTR_ERR_OR_ZERO(em); |
---|
.. | .. |
---|
3405 | 3516 | iosize = min(em_end - cur, end - cur + 1); |
---|
3406 | 3517 | iosize = ALIGN(iosize, blocksize); |
---|
3407 | 3518 | offset = em->block_start + extent_offset; |
---|
3408 | | - bdev = em->bdev; |
---|
3409 | 3519 | block_start = em->block_start; |
---|
3410 | 3520 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
---|
3411 | 3521 | free_extent_map(em); |
---|
.. | .. |
---|
3417 | 3527 | */ |
---|
3418 | 3528 | if (compressed || block_start == EXTENT_MAP_HOLE || |
---|
3419 | 3529 | block_start == EXTENT_MAP_INLINE) { |
---|
3420 | | - /* |
---|
3421 | | - * end_io notification does not happen here for |
---|
3422 | | - * compressed extents |
---|
3423 | | - */ |
---|
3424 | | - if (!compressed && tree->ops && |
---|
3425 | | - tree->ops->writepage_end_io_hook) |
---|
3426 | | - tree->ops->writepage_end_io_hook(page, cur, |
---|
3427 | | - cur + iosize - 1, |
---|
3428 | | - NULL, 1); |
---|
3429 | | - else if (compressed) { |
---|
3430 | | - /* we don't want to end_page_writeback on |
---|
3431 | | - * a compressed extent. this happens |
---|
3432 | | - * elsewhere |
---|
3433 | | - */ |
---|
| 3530 | + if (compressed) |
---|
3434 | 3531 | nr++; |
---|
3435 | | - } |
---|
3436 | | - |
---|
| 3532 | + else |
---|
| 3533 | + btrfs_writepage_endio_finish_ordered(page, cur, |
---|
| 3534 | + cur + iosize - 1, 1); |
---|
3437 | 3535 | cur += iosize; |
---|
3438 | 3536 | pg_offset += iosize; |
---|
3439 | 3537 | continue; |
---|
.. | .. |
---|
3441 | 3539 | |
---|
3442 | 3540 | btrfs_set_range_writeback(tree, cur, cur + iosize - 1); |
---|
3443 | 3541 | if (!PageWriteback(page)) { |
---|
3444 | | - btrfs_err(BTRFS_I(inode)->root->fs_info, |
---|
| 3542 | + btrfs_err(inode->root->fs_info, |
---|
3445 | 3543 | "page %lu not writeback, cur %llu end %llu", |
---|
3446 | 3544 | page->index, cur, end); |
---|
3447 | 3545 | } |
---|
3448 | 3546 | |
---|
3449 | | - ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc, |
---|
| 3547 | + ret = submit_extent_page(REQ_OP_WRITE | write_flags, wbc, |
---|
3450 | 3548 | page, offset, iosize, pg_offset, |
---|
3451 | | - bdev, &epd->bio, |
---|
| 3549 | + &epd->bio, |
---|
3452 | 3550 | end_bio_extent_writepage, |
---|
3453 | 3551 | 0, 0, 0, false); |
---|
3454 | 3552 | if (ret) { |
---|
.. | .. |
---|
3461 | 3559 | pg_offset += iosize; |
---|
3462 | 3560 | nr++; |
---|
3463 | 3561 | } |
---|
3464 | | -done: |
---|
3465 | 3562 | *nr_ret = nr; |
---|
3466 | 3563 | return ret; |
---|
3467 | 3564 | } |
---|
.. | .. |
---|
3483 | 3580 | u64 page_end = start + PAGE_SIZE - 1; |
---|
3484 | 3581 | int ret; |
---|
3485 | 3582 | int nr = 0; |
---|
3486 | | - size_t pg_offset = 0; |
---|
| 3583 | + size_t pg_offset; |
---|
3487 | 3584 | loff_t i_size = i_size_read(inode); |
---|
3488 | 3585 | unsigned long end_index = i_size >> PAGE_SHIFT; |
---|
3489 | | - unsigned int write_flags = 0; |
---|
3490 | 3586 | unsigned long nr_written = 0; |
---|
3491 | | - |
---|
3492 | | - write_flags = wbc_to_write_flags(wbc); |
---|
3493 | 3587 | |
---|
3494 | 3588 | trace___extent_writepage(page, inode, wbc); |
---|
3495 | 3589 | |
---|
.. | .. |
---|
3497 | 3591 | |
---|
3498 | 3592 | ClearPageError(page); |
---|
3499 | 3593 | |
---|
3500 | | - pg_offset = i_size & (PAGE_SIZE - 1); |
---|
| 3594 | + pg_offset = offset_in_page(i_size); |
---|
3501 | 3595 | if (page->index > end_index || |
---|
3502 | 3596 | (page->index == end_index && !pg_offset)) { |
---|
3503 | 3597 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); |
---|
.. | .. |
---|
3515 | 3609 | flush_dcache_page(page); |
---|
3516 | 3610 | } |
---|
3517 | 3611 | |
---|
3518 | | - pg_offset = 0; |
---|
3519 | | - |
---|
3520 | 3612 | set_page_extent_mapped(page); |
---|
3521 | 3613 | |
---|
3522 | | - ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written); |
---|
3523 | | - if (ret == 1) |
---|
3524 | | - goto done_unlocked; |
---|
3525 | | - if (ret) |
---|
3526 | | - goto done; |
---|
| 3614 | + if (!epd->extent_locked) { |
---|
| 3615 | + ret = writepage_delalloc(BTRFS_I(inode), page, wbc, start, |
---|
| 3616 | + &nr_written); |
---|
| 3617 | + if (ret == 1) |
---|
| 3618 | + return 0; |
---|
| 3619 | + if (ret) |
---|
| 3620 | + goto done; |
---|
| 3621 | + } |
---|
3527 | 3622 | |
---|
3528 | | - ret = __extent_writepage_io(inode, page, wbc, epd, |
---|
3529 | | - i_size, nr_written, write_flags, &nr); |
---|
| 3623 | + ret = __extent_writepage_io(BTRFS_I(inode), page, wbc, epd, i_size, |
---|
| 3624 | + nr_written, &nr); |
---|
3530 | 3625 | if (ret == 1) |
---|
3531 | | - goto done_unlocked; |
---|
| 3626 | + return 0; |
---|
3532 | 3627 | |
---|
3533 | 3628 | done: |
---|
3534 | 3629 | if (nr == 0) { |
---|
.. | .. |
---|
3543 | 3638 | unlock_page(page); |
---|
3544 | 3639 | ASSERT(ret <= 0); |
---|
3545 | 3640 | return ret; |
---|
3546 | | - |
---|
3547 | | -done_unlocked: |
---|
3548 | | - return 0; |
---|
3549 | 3641 | } |
---|
3550 | 3642 | |
---|
3551 | 3643 | void wait_on_extent_buffer_writeback(struct extent_buffer *eb) |
---|
.. | .. |
---|
3568 | 3660 | * Return >0 is same as 0, except bio is not submitted |
---|
3569 | 3661 | * Return <0 if something went wrong, no page is locked |
---|
3570 | 3662 | */ |
---|
3571 | | -static noinline_for_stack int |
---|
3572 | | -lock_extent_buffer_for_io(struct extent_buffer *eb, |
---|
3573 | | - struct btrfs_fs_info *fs_info, |
---|
| 3663 | +static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb, |
---|
3574 | 3664 | struct extent_page_data *epd) |
---|
3575 | 3665 | { |
---|
| 3666 | + struct btrfs_fs_info *fs_info = eb->fs_info; |
---|
3576 | 3667 | int i, num_pages, failed_page_nr; |
---|
3577 | 3668 | int flush = 0; |
---|
3578 | 3669 | int ret = 0; |
---|
.. | .. |
---|
3672 | 3763 | static void set_btree_ioerr(struct page *page) |
---|
3673 | 3764 | { |
---|
3674 | 3765 | struct extent_buffer *eb = (struct extent_buffer *)page->private; |
---|
| 3766 | + struct btrfs_fs_info *fs_info; |
---|
3675 | 3767 | |
---|
3676 | 3768 | SetPageError(page); |
---|
3677 | 3769 | if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) |
---|
3678 | 3770 | return; |
---|
| 3771 | + |
---|
| 3772 | + /* |
---|
| 3773 | + * A read may stumble upon this buffer later, make sure that it gets an |
---|
| 3774 | + * error and knows there was an error. |
---|
| 3775 | + */ |
---|
| 3776 | + clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
---|
| 3777 | + |
---|
| 3778 | + /* |
---|
| 3779 | + * If we error out, we should add back the dirty_metadata_bytes |
---|
| 3780 | + * to make it consistent. |
---|
| 3781 | + */ |
---|
| 3782 | + fs_info = eb->fs_info; |
---|
| 3783 | + percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, |
---|
| 3784 | + eb->len, fs_info->dirty_metadata_batch); |
---|
3679 | 3785 | |
---|
3680 | 3786 | /* |
---|
3681 | 3787 | * If writeback for a btree extent that doesn't belong to a log tree |
---|
.. | .. |
---|
3734 | 3840 | { |
---|
3735 | 3841 | struct bio_vec *bvec; |
---|
3736 | 3842 | struct extent_buffer *eb; |
---|
3737 | | - int i, done; |
---|
| 3843 | + int done; |
---|
| 3844 | + struct bvec_iter_all iter_all; |
---|
3738 | 3845 | |
---|
3739 | 3846 | ASSERT(!bio_flagged(bio, BIO_CLONED)); |
---|
3740 | | - bio_for_each_segment_all(bvec, bio, i) { |
---|
| 3847 | + bio_for_each_segment_all(bvec, bio, iter_all) { |
---|
3741 | 3848 | struct page *page = bvec->bv_page; |
---|
3742 | 3849 | |
---|
3743 | 3850 | eb = (struct extent_buffer *)page->private; |
---|
.. | .. |
---|
3762 | 3869 | } |
---|
3763 | 3870 | |
---|
3764 | 3871 | static noinline_for_stack int write_one_eb(struct extent_buffer *eb, |
---|
3765 | | - struct btrfs_fs_info *fs_info, |
---|
3766 | 3872 | struct writeback_control *wbc, |
---|
3767 | 3873 | struct extent_page_data *epd) |
---|
3768 | 3874 | { |
---|
3769 | | - struct block_device *bdev = fs_info->fs_devices->latest_bdev; |
---|
3770 | | - struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree; |
---|
3771 | 3875 | u64 offset = eb->start; |
---|
3772 | 3876 | u32 nritems; |
---|
3773 | 3877 | int i, num_pages; |
---|
.. | .. |
---|
3791 | 3895 | * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0 |
---|
3792 | 3896 | */ |
---|
3793 | 3897 | start = btrfs_item_nr_offset(nritems); |
---|
3794 | | - end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, eb); |
---|
| 3898 | + end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(eb); |
---|
3795 | 3899 | memzero_extent_buffer(eb, start, end - start); |
---|
3796 | 3900 | } |
---|
3797 | 3901 | |
---|
.. | .. |
---|
3800 | 3904 | |
---|
3801 | 3905 | clear_page_dirty_for_io(p); |
---|
3802 | 3906 | set_page_writeback(p); |
---|
3803 | | - ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc, |
---|
3804 | | - p, offset, PAGE_SIZE, 0, bdev, |
---|
| 3907 | + ret = submit_extent_page(REQ_OP_WRITE | write_flags, wbc, |
---|
| 3908 | + p, offset, PAGE_SIZE, 0, |
---|
3805 | 3909 | &epd->bio, |
---|
3806 | 3910 | end_bio_extent_buffer_writepage, |
---|
3807 | 3911 | 0, 0, 0, false); |
---|
.. | .. |
---|
3833 | 3937 | int btree_write_cache_pages(struct address_space *mapping, |
---|
3834 | 3938 | struct writeback_control *wbc) |
---|
3835 | 3939 | { |
---|
3836 | | - struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree; |
---|
3837 | | - struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info; |
---|
3838 | 3940 | struct extent_buffer *eb, *prev_eb = NULL; |
---|
3839 | 3941 | struct extent_page_data epd = { |
---|
3840 | 3942 | .bio = NULL, |
---|
3841 | | - .tree = tree, |
---|
3842 | 3943 | .extent_locked = 0, |
---|
3843 | 3944 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
---|
3844 | 3945 | }; |
---|
| 3946 | + struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info; |
---|
3845 | 3947 | int ret = 0; |
---|
3846 | 3948 | int done = 0; |
---|
3847 | 3949 | int nr_to_write_done = 0; |
---|
.. | .. |
---|
3850 | 3952 | pgoff_t index; |
---|
3851 | 3953 | pgoff_t end; /* Inclusive */ |
---|
3852 | 3954 | int scanned = 0; |
---|
3853 | | - int tag; |
---|
| 3955 | + xa_mark_t tag; |
---|
3854 | 3956 | |
---|
3855 | 3957 | pagevec_init(&pvec); |
---|
3856 | 3958 | if (wbc->range_cyclic) { |
---|
3857 | 3959 | index = mapping->writeback_index; /* Start from prev offset */ |
---|
3858 | 3960 | end = -1; |
---|
| 3961 | + /* |
---|
| 3962 | + * Start from the beginning does not need to cycle over the |
---|
| 3963 | + * range, mark it as scanned. |
---|
| 3964 | + */ |
---|
| 3965 | + scanned = (index == 0); |
---|
3859 | 3966 | } else { |
---|
3860 | 3967 | index = wbc->range_start >> PAGE_SHIFT; |
---|
3861 | 3968 | end = wbc->range_end >> PAGE_SHIFT; |
---|
.. | .. |
---|
3873 | 3980 | tag))) { |
---|
3874 | 3981 | unsigned i; |
---|
3875 | 3982 | |
---|
3876 | | - scanned = 1; |
---|
3877 | 3983 | for (i = 0; i < nr_pages; i++) { |
---|
3878 | 3984 | struct page *page = pvec.pages[i]; |
---|
3879 | 3985 | |
---|
.. | .. |
---|
3909 | 4015 | continue; |
---|
3910 | 4016 | |
---|
3911 | 4017 | prev_eb = eb; |
---|
3912 | | - ret = lock_extent_buffer_for_io(eb, fs_info, &epd); |
---|
| 4018 | + ret = lock_extent_buffer_for_io(eb, &epd); |
---|
3913 | 4019 | if (!ret) { |
---|
3914 | 4020 | free_extent_buffer(eb); |
---|
3915 | 4021 | continue; |
---|
.. | .. |
---|
3919 | 4025 | break; |
---|
3920 | 4026 | } |
---|
3921 | 4027 | |
---|
3922 | | - ret = write_one_eb(eb, fs_info, wbc, &epd); |
---|
| 4028 | + ret = write_one_eb(eb, wbc, &epd); |
---|
3923 | 4029 | if (ret) { |
---|
3924 | 4030 | done = 1; |
---|
3925 | 4031 | free_extent_buffer(eb); |
---|
.. | .. |
---|
3928 | 4034 | free_extent_buffer(eb); |
---|
3929 | 4035 | |
---|
3930 | 4036 | /* |
---|
3931 | | - * the filesystem may choose to bump up nr_to_write. |
---|
| 4037 | + * The filesystem may choose to bump up nr_to_write. |
---|
3932 | 4038 | * We have to make sure to honor the new nr_to_write |
---|
3933 | | - * at any time |
---|
| 4039 | + * at any time. |
---|
3934 | 4040 | */ |
---|
3935 | | - nr_to_write_done = wbc->nr_to_write <= 0; |
---|
| 4041 | + nr_to_write_done = (wbc->sync_mode == WB_SYNC_NONE && |
---|
| 4042 | + wbc->nr_to_write <= 0); |
---|
3936 | 4043 | } |
---|
3937 | 4044 | pagevec_release(&pvec); |
---|
3938 | 4045 | cond_resched(); |
---|
.. | .. |
---|
3981 | 4088 | if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { |
---|
3982 | 4089 | ret = flush_write_bio(&epd); |
---|
3983 | 4090 | } else { |
---|
3984 | | - ret = -EUCLEAN; |
---|
| 4091 | + ret = -EROFS; |
---|
3985 | 4092 | end_write_bio(&epd, ret); |
---|
3986 | 4093 | } |
---|
3987 | 4094 | return ret; |
---|
.. | .. |
---|
4016 | 4123 | pgoff_t done_index; |
---|
4017 | 4124 | int range_whole = 0; |
---|
4018 | 4125 | int scanned = 0; |
---|
4019 | | - int tag; |
---|
| 4126 | + xa_mark_t tag; |
---|
4020 | 4127 | |
---|
4021 | 4128 | /* |
---|
4022 | 4129 | * We have to hold onto the inode so that ordered extents can do their |
---|
.. | .. |
---|
4034 | 4141 | if (wbc->range_cyclic) { |
---|
4035 | 4142 | index = mapping->writeback_index; /* Start from prev offset */ |
---|
4036 | 4143 | end = -1; |
---|
| 4144 | + /* |
---|
| 4145 | + * Start from the beginning does not need to cycle over the |
---|
| 4146 | + * range, mark it as scanned. |
---|
| 4147 | + */ |
---|
| 4148 | + scanned = (index == 0); |
---|
4037 | 4149 | } else { |
---|
4038 | 4150 | index = wbc->range_start >> PAGE_SHIFT; |
---|
4039 | 4151 | end = wbc->range_end >> PAGE_SHIFT; |
---|
.. | .. |
---|
4067 | 4179 | &index, end, tag))) { |
---|
4068 | 4180 | unsigned i; |
---|
4069 | 4181 | |
---|
4070 | | - scanned = 1; |
---|
4071 | 4182 | for (i = 0; i < nr_pages; i++) { |
---|
4072 | 4183 | struct page *page = pvec.pages[i]; |
---|
4073 | 4184 | |
---|
.. | .. |
---|
4105 | 4216 | } |
---|
4106 | 4217 | |
---|
4107 | 4218 | ret = __extent_writepage(page, wbc, epd); |
---|
4108 | | - |
---|
4109 | | - if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { |
---|
4110 | | - unlock_page(page); |
---|
4111 | | - ret = 0; |
---|
4112 | | - } |
---|
4113 | 4219 | if (ret < 0) { |
---|
4114 | 4220 | done = 1; |
---|
4115 | 4221 | break; |
---|
.. | .. |
---|
4156 | 4262 | int ret; |
---|
4157 | 4263 | struct extent_page_data epd = { |
---|
4158 | 4264 | .bio = NULL, |
---|
4159 | | - .tree = &BTRFS_I(page->mapping->host)->io_tree, |
---|
4160 | 4265 | .extent_locked = 0, |
---|
4161 | 4266 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
---|
4162 | 4267 | }; |
---|
.. | .. |
---|
4177 | 4282 | int mode) |
---|
4178 | 4283 | { |
---|
4179 | 4284 | int ret = 0; |
---|
4180 | | - int flush_ret; |
---|
4181 | 4285 | struct address_space *mapping = inode->i_mapping; |
---|
4182 | | - struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; |
---|
4183 | 4286 | struct page *page; |
---|
4184 | 4287 | unsigned long nr_pages = (end - start + PAGE_SIZE) >> |
---|
4185 | 4288 | PAGE_SHIFT; |
---|
4186 | 4289 | |
---|
4187 | 4290 | struct extent_page_data epd = { |
---|
4188 | 4291 | .bio = NULL, |
---|
4189 | | - .tree = tree, |
---|
4190 | 4292 | .extent_locked = 1, |
---|
4191 | 4293 | .sync_io = mode == WB_SYNC_ALL, |
---|
4192 | 4294 | }; |
---|
.. | .. |
---|
4195 | 4297 | .nr_to_write = nr_pages * 2, |
---|
4196 | 4298 | .range_start = start, |
---|
4197 | 4299 | .range_end = end + 1, |
---|
| 4300 | + /* We're called from an async helper function */ |
---|
| 4301 | + .punt_to_cgroup = 1, |
---|
| 4302 | + .no_cgroup_owner = 1, |
---|
4198 | 4303 | }; |
---|
4199 | 4304 | |
---|
| 4305 | + wbc_attach_fdatawrite_inode(&wbc_writepages, inode); |
---|
4200 | 4306 | while (start <= end) { |
---|
4201 | 4307 | page = find_get_page(mapping, start >> PAGE_SHIFT); |
---|
4202 | 4308 | if (clear_page_dirty_for_io(page)) |
---|
4203 | 4309 | ret = __extent_writepage(page, &wbc_writepages, &epd); |
---|
4204 | 4310 | else { |
---|
4205 | | - if (tree->ops && tree->ops->writepage_end_io_hook) |
---|
4206 | | - tree->ops->writepage_end_io_hook(page, start, |
---|
4207 | | - start + PAGE_SIZE - 1, |
---|
4208 | | - NULL, 1); |
---|
| 4311 | + btrfs_writepage_endio_finish_ordered(page, start, |
---|
| 4312 | + start + PAGE_SIZE - 1, 1); |
---|
4209 | 4313 | unlock_page(page); |
---|
4210 | 4314 | } |
---|
4211 | 4315 | put_page(page); |
---|
4212 | 4316 | start += PAGE_SIZE; |
---|
4213 | 4317 | } |
---|
4214 | 4318 | |
---|
4215 | | - flush_ret = flush_write_bio(&epd); |
---|
4216 | | - BUG_ON(flush_ret < 0); |
---|
| 4319 | + ASSERT(ret <= 0); |
---|
| 4320 | + if (ret == 0) |
---|
| 4321 | + ret = flush_write_bio(&epd); |
---|
| 4322 | + else |
---|
| 4323 | + end_write_bio(&epd, ret); |
---|
| 4324 | + |
---|
| 4325 | + wbc_detach_inode(&wbc_writepages); |
---|
4217 | 4326 | return ret; |
---|
4218 | 4327 | } |
---|
4219 | 4328 | |
---|
.. | .. |
---|
4221 | 4330 | struct writeback_control *wbc) |
---|
4222 | 4331 | { |
---|
4223 | 4332 | int ret = 0; |
---|
4224 | | - int flush_ret; |
---|
4225 | 4333 | struct extent_page_data epd = { |
---|
4226 | 4334 | .bio = NULL, |
---|
4227 | | - .tree = &BTRFS_I(mapping->host)->io_tree, |
---|
4228 | 4335 | .extent_locked = 0, |
---|
4229 | 4336 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
---|
4230 | 4337 | }; |
---|
4231 | 4338 | |
---|
4232 | 4339 | ret = extent_write_cache_pages(mapping, wbc, &epd); |
---|
4233 | | - flush_ret = flush_write_bio(&epd); |
---|
4234 | | - BUG_ON(flush_ret < 0); |
---|
| 4340 | + ASSERT(ret <= 0); |
---|
| 4341 | + if (ret < 0) { |
---|
| 4342 | + end_write_bio(&epd, ret); |
---|
| 4343 | + return ret; |
---|
| 4344 | + } |
---|
| 4345 | + ret = flush_write_bio(&epd); |
---|
4235 | 4346 | return ret; |
---|
4236 | 4347 | } |
---|
4237 | 4348 | |
---|
4238 | | -int extent_readpages(struct address_space *mapping, struct list_head *pages, |
---|
4239 | | - unsigned nr_pages) |
---|
| 4349 | +void extent_readahead(struct readahead_control *rac) |
---|
4240 | 4350 | { |
---|
4241 | 4351 | struct bio *bio = NULL; |
---|
4242 | | - unsigned page_idx; |
---|
4243 | 4352 | unsigned long bio_flags = 0; |
---|
4244 | 4353 | struct page *pagepool[16]; |
---|
4245 | | - struct page *page; |
---|
4246 | 4354 | struct extent_map *em_cached = NULL; |
---|
4247 | | - struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree; |
---|
4248 | | - int nr = 0; |
---|
4249 | 4355 | u64 prev_em_start = (u64)-1; |
---|
| 4356 | + int nr; |
---|
4250 | 4357 | |
---|
4251 | | - for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
---|
4252 | | - page = list_entry(pages->prev, struct page, lru); |
---|
| 4358 | + while ((nr = readahead_page_batch(rac, pagepool))) { |
---|
| 4359 | + u64 contig_start = page_offset(pagepool[0]); |
---|
| 4360 | + u64 contig_end = page_offset(pagepool[nr - 1]) + PAGE_SIZE - 1; |
---|
4253 | 4361 | |
---|
4254 | | - prefetchw(&page->flags); |
---|
4255 | | - list_del(&page->lru); |
---|
4256 | | - if (add_to_page_cache_lru(page, mapping, |
---|
4257 | | - page->index, |
---|
4258 | | - readahead_gfp_mask(mapping))) { |
---|
4259 | | - put_page(page); |
---|
4260 | | - continue; |
---|
4261 | | - } |
---|
| 4362 | + ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end); |
---|
4262 | 4363 | |
---|
4263 | | - pagepool[nr++] = page; |
---|
4264 | | - if (nr < ARRAY_SIZE(pagepool)) |
---|
4265 | | - continue; |
---|
4266 | | - __extent_readpages(tree, pagepool, nr, &em_cached, &bio, |
---|
4267 | | - &bio_flags, &prev_em_start); |
---|
4268 | | - nr = 0; |
---|
| 4364 | + contiguous_readpages(pagepool, nr, contig_start, contig_end, |
---|
| 4365 | + &em_cached, &bio, &bio_flags, &prev_em_start); |
---|
4269 | 4366 | } |
---|
4270 | | - if (nr) |
---|
4271 | | - __extent_readpages(tree, pagepool, nr, &em_cached, &bio, |
---|
4272 | | - &bio_flags, &prev_em_start); |
---|
4273 | 4367 | |
---|
4274 | 4368 | if (em_cached) |
---|
4275 | 4369 | free_extent_map(em_cached); |
---|
4276 | 4370 | |
---|
4277 | | - BUG_ON(!list_empty(pages)); |
---|
4278 | | - if (bio) |
---|
4279 | | - return submit_one_bio(bio, 0, bio_flags); |
---|
4280 | | - return 0; |
---|
| 4371 | + if (bio) { |
---|
| 4372 | + if (submit_one_bio(bio, 0, bio_flags)) |
---|
| 4373 | + return; |
---|
| 4374 | + } |
---|
4281 | 4375 | } |
---|
4282 | 4376 | |
---|
4283 | 4377 | /* |
---|
.. | .. |
---|
4299 | 4393 | |
---|
4300 | 4394 | lock_extent_bits(tree, start, end, &cached_state); |
---|
4301 | 4395 | wait_on_page_writeback(page); |
---|
4302 | | - clear_extent_bit(tree, start, end, |
---|
4303 | | - EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | |
---|
4304 | | - EXTENT_DO_ACCOUNTING, |
---|
4305 | | - 1, 1, &cached_state); |
---|
| 4396 | + clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DELALLOC | |
---|
| 4397 | + EXTENT_DO_ACCOUNTING, 1, 1, &cached_state); |
---|
4306 | 4398 | return 0; |
---|
4307 | 4399 | } |
---|
4308 | 4400 | |
---|
.. | .. |
---|
4318 | 4410 | u64 end = start + PAGE_SIZE - 1; |
---|
4319 | 4411 | int ret = 1; |
---|
4320 | 4412 | |
---|
4321 | | - if (test_range_bit(tree, start, end, |
---|
4322 | | - EXTENT_IOBITS, 0, NULL)) |
---|
| 4413 | + if (test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) { |
---|
4323 | 4414 | ret = 0; |
---|
4324 | | - else { |
---|
| 4415 | + } else { |
---|
4325 | 4416 | /* |
---|
4326 | 4417 | * at this point we can safely clear everything except the |
---|
4327 | 4418 | * locked bit and the nodatasum bit |
---|
.. | .. |
---|
4359 | 4450 | page->mapping->host->i_size > SZ_16M) { |
---|
4360 | 4451 | u64 len; |
---|
4361 | 4452 | while (start <= end) { |
---|
| 4453 | + struct btrfs_fs_info *fs_info; |
---|
| 4454 | + u64 cur_gen; |
---|
| 4455 | + |
---|
4362 | 4456 | len = end - start + 1; |
---|
4363 | 4457 | write_lock(&map->lock); |
---|
4364 | 4458 | em = lookup_extent_mapping(map, start, len); |
---|
.. | .. |
---|
4372 | 4466 | free_extent_map(em); |
---|
4373 | 4467 | break; |
---|
4374 | 4468 | } |
---|
4375 | | - if (!test_range_bit(tree, em->start, |
---|
4376 | | - extent_map_end(em) - 1, |
---|
4377 | | - EXTENT_LOCKED | EXTENT_WRITEBACK, |
---|
4378 | | - 0, NULL)) { |
---|
4379 | | - set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
---|
4380 | | - &btrfs_inode->runtime_flags); |
---|
4381 | | - remove_extent_mapping(map, em); |
---|
4382 | | - /* once for the rb tree */ |
---|
4383 | | - free_extent_map(em); |
---|
4384 | | - } |
---|
| 4469 | + if (test_range_bit(tree, em->start, |
---|
| 4470 | + extent_map_end(em) - 1, |
---|
| 4471 | + EXTENT_LOCKED, 0, NULL)) |
---|
| 4472 | + goto next; |
---|
| 4473 | + /* |
---|
| 4474 | + * If it's not in the list of modified extents, used |
---|
| 4475 | + * by a fast fsync, we can remove it. If it's being |
---|
| 4476 | + * logged we can safely remove it since fsync took an |
---|
| 4477 | + * extra reference on the em. |
---|
| 4478 | + */ |
---|
| 4479 | + if (list_empty(&em->list) || |
---|
| 4480 | + test_bit(EXTENT_FLAG_LOGGING, &em->flags)) |
---|
| 4481 | + goto remove_em; |
---|
| 4482 | + /* |
---|
| 4483 | + * If it's in the list of modified extents, remove it |
---|
| 4484 | + * only if its generation is older then the current one, |
---|
| 4485 | + * in which case we don't need it for a fast fsync. |
---|
| 4486 | + * Otherwise don't remove it, we could be racing with an |
---|
| 4487 | + * ongoing fast fsync that could miss the new extent. |
---|
| 4488 | + */ |
---|
| 4489 | + fs_info = btrfs_inode->root->fs_info; |
---|
| 4490 | + spin_lock(&fs_info->trans_lock); |
---|
| 4491 | + cur_gen = fs_info->generation; |
---|
| 4492 | + spin_unlock(&fs_info->trans_lock); |
---|
| 4493 | + if (em->generation >= cur_gen) |
---|
| 4494 | + goto next; |
---|
| 4495 | +remove_em: |
---|
| 4496 | + /* |
---|
| 4497 | + * We only remove extent maps that are not in the list of |
---|
| 4498 | + * modified extents or that are in the list but with a |
---|
| 4499 | + * generation lower then the current generation, so there |
---|
| 4500 | + * is no need to set the full fsync flag on the inode (it |
---|
| 4501 | + * hurts the fsync performance for workloads with a data |
---|
| 4502 | + * size that exceeds or is close to the system's memory). |
---|
| 4503 | + */ |
---|
| 4504 | + remove_extent_mapping(map, em); |
---|
| 4505 | + /* once for the rb tree */ |
---|
| 4506 | + free_extent_map(em); |
---|
| 4507 | +next: |
---|
4385 | 4508 | start = extent_map_end(em); |
---|
4386 | 4509 | write_unlock(&map->lock); |
---|
4387 | 4510 | |
---|
.. | .. |
---|
4398 | 4521 | * helper function for fiemap, which doesn't want to see any holes. |
---|
4399 | 4522 | * This maps until we find something past 'last' |
---|
4400 | 4523 | */ |
---|
4401 | | -static struct extent_map *get_extent_skip_holes(struct inode *inode, |
---|
| 4524 | +static struct extent_map *get_extent_skip_holes(struct btrfs_inode *inode, |
---|
4402 | 4525 | u64 offset, u64 last) |
---|
4403 | 4526 | { |
---|
4404 | 4527 | u64 sectorsize = btrfs_inode_sectorsize(inode); |
---|
.. | .. |
---|
4413 | 4536 | if (len == 0) |
---|
4414 | 4537 | break; |
---|
4415 | 4538 | len = ALIGN(len, sectorsize); |
---|
4416 | | - em = btrfs_get_extent_fiemap(BTRFS_I(inode), NULL, 0, offset, |
---|
4417 | | - len, 0); |
---|
| 4539 | + em = btrfs_get_extent_fiemap(inode, offset, len); |
---|
4418 | 4540 | if (IS_ERR_OR_NULL(em)) |
---|
4419 | 4541 | return em; |
---|
4420 | 4542 | |
---|
.. | .. |
---|
4465 | 4587 | |
---|
4466 | 4588 | /* |
---|
4467 | 4589 | * Sanity check, extent_fiemap() should have ensured that new |
---|
4468 | | - * fiemap extent won't overlap with cahced one. |
---|
| 4590 | + * fiemap extent won't overlap with cached one. |
---|
4469 | 4591 | * Not recoverable. |
---|
4470 | 4592 | * |
---|
4471 | 4593 | * NOTE: Physical address can overlap, due to compression |
---|
.. | .. |
---|
4527 | 4649 | * In this case, the first extent range will be cached but not emitted. |
---|
4528 | 4650 | * So we must emit it before ending extent_fiemap(). |
---|
4529 | 4651 | */ |
---|
4530 | | -static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info, |
---|
4531 | | - struct fiemap_extent_info *fieinfo, |
---|
| 4652 | +static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo, |
---|
4532 | 4653 | struct fiemap_cache *cache) |
---|
4533 | 4654 | { |
---|
4534 | 4655 | int ret; |
---|
.. | .. |
---|
4544 | 4665 | return ret; |
---|
4545 | 4666 | } |
---|
4546 | 4667 | |
---|
4547 | | -int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
---|
4548 | | - __u64 start, __u64 len) |
---|
| 4668 | +int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, |
---|
| 4669 | + u64 start, u64 len) |
---|
4549 | 4670 | { |
---|
4550 | 4671 | int ret = 0; |
---|
4551 | | - u64 off = start; |
---|
| 4672 | + u64 off; |
---|
4552 | 4673 | u64 max = start + len; |
---|
4553 | 4674 | u32 flags = 0; |
---|
4554 | 4675 | u32 found_type; |
---|
4555 | 4676 | u64 last; |
---|
4556 | 4677 | u64 last_for_get_extent = 0; |
---|
4557 | 4678 | u64 disko = 0; |
---|
4558 | | - u64 isize = i_size_read(inode); |
---|
| 4679 | + u64 isize = i_size_read(&inode->vfs_inode); |
---|
4559 | 4680 | struct btrfs_key found_key; |
---|
4560 | 4681 | struct extent_map *em = NULL; |
---|
4561 | 4682 | struct extent_state *cached_state = NULL; |
---|
4562 | 4683 | struct btrfs_path *path; |
---|
4563 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
| 4684 | + struct btrfs_root *root = inode->root; |
---|
4564 | 4685 | struct fiemap_cache cache = { 0 }; |
---|
| 4686 | + struct ulist *roots; |
---|
| 4687 | + struct ulist *tmp_ulist; |
---|
4565 | 4688 | int end = 0; |
---|
4566 | 4689 | u64 em_start = 0; |
---|
4567 | 4690 | u64 em_len = 0; |
---|
.. | .. |
---|
4575 | 4698 | return -ENOMEM; |
---|
4576 | 4699 | path->leave_spinning = 1; |
---|
4577 | 4700 | |
---|
| 4701 | + roots = ulist_alloc(GFP_KERNEL); |
---|
| 4702 | + tmp_ulist = ulist_alloc(GFP_KERNEL); |
---|
| 4703 | + if (!roots || !tmp_ulist) { |
---|
| 4704 | + ret = -ENOMEM; |
---|
| 4705 | + goto out_free_ulist; |
---|
| 4706 | + } |
---|
| 4707 | + |
---|
| 4708 | + /* |
---|
| 4709 | + * We can't initialize that to 'start' as this could miss extents due |
---|
| 4710 | + * to extent item merging |
---|
| 4711 | + */ |
---|
| 4712 | + off = 0; |
---|
4578 | 4713 | start = round_down(start, btrfs_inode_sectorsize(inode)); |
---|
4579 | 4714 | len = round_up(max, btrfs_inode_sectorsize(inode)) - start; |
---|
4580 | 4715 | |
---|
.. | .. |
---|
4582 | 4717 | * lookup the last file extent. We're not using i_size here |
---|
4583 | 4718 | * because there might be preallocation past i_size |
---|
4584 | 4719 | */ |
---|
4585 | | - ret = btrfs_lookup_file_extent(NULL, root, path, |
---|
4586 | | - btrfs_ino(BTRFS_I(inode)), -1, 0); |
---|
| 4720 | + ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), -1, |
---|
| 4721 | + 0); |
---|
4587 | 4722 | if (ret < 0) { |
---|
4588 | | - btrfs_free_path(path); |
---|
4589 | | - return ret; |
---|
| 4723 | + goto out_free_ulist; |
---|
4590 | 4724 | } else { |
---|
4591 | 4725 | WARN_ON(!ret); |
---|
4592 | 4726 | if (ret == 1) |
---|
.. | .. |
---|
4598 | 4732 | found_type = found_key.type; |
---|
4599 | 4733 | |
---|
4600 | 4734 | /* No extents, but there might be delalloc bits */ |
---|
4601 | | - if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) || |
---|
| 4735 | + if (found_key.objectid != btrfs_ino(inode) || |
---|
4602 | 4736 | found_type != BTRFS_EXTENT_DATA_KEY) { |
---|
4603 | 4737 | /* have to trust i_size as the end */ |
---|
4604 | 4738 | last = (u64)-1; |
---|
.. | .. |
---|
4624 | 4758 | last_for_get_extent = isize; |
---|
4625 | 4759 | } |
---|
4626 | 4760 | |
---|
4627 | | - lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, |
---|
| 4761 | + lock_extent_bits(&inode->io_tree, start, start + len - 1, |
---|
4628 | 4762 | &cached_state); |
---|
4629 | 4763 | |
---|
4630 | 4764 | em = get_extent_skip_holes(inode, start, last_for_get_extent); |
---|
.. | .. |
---|
4693 | 4827 | * then we're just getting a count and we can skip the |
---|
4694 | 4828 | * lookup stuff. |
---|
4695 | 4829 | */ |
---|
4696 | | - ret = btrfs_check_shared(root, |
---|
4697 | | - btrfs_ino(BTRFS_I(inode)), |
---|
4698 | | - bytenr); |
---|
| 4830 | + ret = btrfs_check_shared(root, btrfs_ino(inode), |
---|
| 4831 | + bytenr, roots, tmp_ulist); |
---|
4699 | 4832 | if (ret < 0) |
---|
4700 | 4833 | goto out_free; |
---|
4701 | 4834 | if (ret) |
---|
.. | .. |
---|
4735 | 4868 | } |
---|
4736 | 4869 | out_free: |
---|
4737 | 4870 | if (!ret) |
---|
4738 | | - ret = emit_last_fiemap_cache(root->fs_info, fieinfo, &cache); |
---|
| 4871 | + ret = emit_last_fiemap_cache(fieinfo, &cache); |
---|
4739 | 4872 | free_extent_map(em); |
---|
4740 | 4873 | out: |
---|
4741 | | - btrfs_free_path(path); |
---|
4742 | | - unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1, |
---|
| 4874 | + unlock_extent_cached(&inode->io_tree, start, start + len - 1, |
---|
4743 | 4875 | &cached_state); |
---|
| 4876 | + |
---|
| 4877 | +out_free_ulist: |
---|
| 4878 | + btrfs_free_path(path); |
---|
| 4879 | + ulist_free(roots); |
---|
| 4880 | + ulist_free(tmp_ulist); |
---|
4744 | 4881 | return ret; |
---|
4745 | 4882 | } |
---|
4746 | 4883 | |
---|
4747 | 4884 | static void __free_extent_buffer(struct extent_buffer *eb) |
---|
4748 | 4885 | { |
---|
4749 | | - btrfs_leak_debug_del(&eb->leak_list); |
---|
4750 | 4886 | kmem_cache_free(extent_buffer_cache, eb); |
---|
4751 | 4887 | } |
---|
4752 | 4888 | |
---|
4753 | | -int extent_buffer_under_io(struct extent_buffer *eb) |
---|
| 4889 | +int extent_buffer_under_io(const struct extent_buffer *eb) |
---|
4754 | 4890 | { |
---|
4755 | 4891 | return (atomic_read(&eb->io_pages) || |
---|
4756 | 4892 | test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) || |
---|
.. | .. |
---|
4792 | 4928 | * We need to make sure we haven't be attached |
---|
4793 | 4929 | * to a new eb. |
---|
4794 | 4930 | */ |
---|
4795 | | - ClearPagePrivate(page); |
---|
4796 | | - set_page_private(page, 0); |
---|
4797 | | - /* One for the page private */ |
---|
4798 | | - put_page(page); |
---|
| 4931 | + detach_page_private(page); |
---|
4799 | 4932 | } |
---|
4800 | 4933 | |
---|
4801 | 4934 | if (mapped) |
---|
.. | .. |
---|
4812 | 4945 | static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) |
---|
4813 | 4946 | { |
---|
4814 | 4947 | btrfs_release_extent_buffer_pages(eb); |
---|
| 4948 | + btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list); |
---|
4815 | 4949 | __free_extent_buffer(eb); |
---|
4816 | 4950 | } |
---|
4817 | 4951 | |
---|
.. | .. |
---|
4827 | 4961 | eb->fs_info = fs_info; |
---|
4828 | 4962 | eb->bflags = 0; |
---|
4829 | 4963 | rwlock_init(&eb->lock); |
---|
4830 | | - atomic_set(&eb->write_locks, 0); |
---|
4831 | | - atomic_set(&eb->read_locks, 0); |
---|
4832 | 4964 | atomic_set(&eb->blocking_readers, 0); |
---|
4833 | | - atomic_set(&eb->blocking_writers, 0); |
---|
4834 | | - atomic_set(&eb->spinning_readers, 0); |
---|
4835 | | - atomic_set(&eb->spinning_writers, 0); |
---|
4836 | | - eb->lock_nested = 0; |
---|
| 4965 | + eb->blocking_writers = 0; |
---|
| 4966 | + eb->lock_recursed = false; |
---|
4837 | 4967 | init_waitqueue_head(&eb->write_lock_wq); |
---|
4838 | 4968 | init_waitqueue_head(&eb->read_lock_wq); |
---|
4839 | 4969 | |
---|
4840 | | - btrfs_leak_debug_add(&eb->leak_list, &buffers); |
---|
| 4970 | + btrfs_leak_debug_add(&fs_info->eb_leak_lock, &eb->leak_list, |
---|
| 4971 | + &fs_info->allocated_ebs); |
---|
4841 | 4972 | |
---|
4842 | 4973 | spin_lock_init(&eb->refs_lock); |
---|
4843 | 4974 | atomic_set(&eb->refs, 1); |
---|
.. | .. |
---|
4850 | 4981 | > MAX_INLINE_EXTENT_BUFFER_SIZE); |
---|
4851 | 4982 | BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE); |
---|
4852 | 4983 | |
---|
| 4984 | +#ifdef CONFIG_BTRFS_DEBUG |
---|
| 4985 | + eb->spinning_writers = 0; |
---|
| 4986 | + atomic_set(&eb->spinning_readers, 0); |
---|
| 4987 | + atomic_set(&eb->read_locks, 0); |
---|
| 4988 | + eb->write_locks = 0; |
---|
| 4989 | +#endif |
---|
| 4990 | + |
---|
4853 | 4991 | return eb; |
---|
4854 | 4992 | } |
---|
4855 | 4993 | |
---|
4856 | | -struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) |
---|
| 4994 | +struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src) |
---|
4857 | 4995 | { |
---|
4858 | 4996 | int i; |
---|
4859 | 4997 | struct page *p; |
---|
.. | .. |
---|
5042 | 5180 | check_buffer_tree_ref(eb); |
---|
5043 | 5181 | set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags); |
---|
5044 | 5182 | |
---|
5045 | | - /* |
---|
5046 | | - * We will free dummy extent buffer's if they come into |
---|
5047 | | - * free_extent_buffer with a ref count of 2, but if we are using this we |
---|
5048 | | - * want the buffers to stay in memory until we're done with them, so |
---|
5049 | | - * bump the ref count again. |
---|
5050 | | - */ |
---|
5051 | | - atomic_inc(&eb->refs); |
---|
5052 | 5183 | return eb; |
---|
5053 | 5184 | free_eb: |
---|
5054 | 5185 | btrfs_release_extent_buffer(eb); |
---|
.. | .. |
---|
5187 | 5318 | } |
---|
5188 | 5319 | |
---|
5189 | 5320 | static int release_extent_buffer(struct extent_buffer *eb) |
---|
| 5321 | + __releases(&eb->refs_lock) |
---|
5190 | 5322 | { |
---|
5191 | 5323 | lockdep_assert_held(&eb->refs_lock); |
---|
5192 | 5324 | |
---|
.. | .. |
---|
5205 | 5337 | spin_unlock(&eb->refs_lock); |
---|
5206 | 5338 | } |
---|
5207 | 5339 | |
---|
| 5340 | + btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list); |
---|
5208 | 5341 | /* Should be safe to release our pages at this point */ |
---|
5209 | 5342 | btrfs_release_extent_buffer_pages(eb); |
---|
5210 | 5343 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
---|
.. | .. |
---|
5230 | 5363 | |
---|
5231 | 5364 | while (1) { |
---|
5232 | 5365 | refs = atomic_read(&eb->refs); |
---|
5233 | | - if (refs <= 3) |
---|
| 5366 | + if ((!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && refs <= 3) |
---|
| 5367 | + || (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && |
---|
| 5368 | + refs == 1)) |
---|
5234 | 5369 | break; |
---|
5235 | 5370 | old = atomic_cmpxchg(&eb->refs, refs, refs - 1); |
---|
5236 | 5371 | if (old == refs) |
---|
.. | .. |
---|
5238 | 5373 | } |
---|
5239 | 5374 | |
---|
5240 | 5375 | spin_lock(&eb->refs_lock); |
---|
5241 | | - if (atomic_read(&eb->refs) == 2 && |
---|
5242 | | - test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags)) |
---|
5243 | | - atomic_dec(&eb->refs); |
---|
5244 | | - |
---|
5245 | 5376 | if (atomic_read(&eb->refs) == 2 && |
---|
5246 | 5377 | test_bit(EXTENT_BUFFER_STALE, &eb->bflags) && |
---|
5247 | 5378 | !extent_buffer_under_io(eb) && |
---|
.. | .. |
---|
5269 | 5400 | release_extent_buffer(eb); |
---|
5270 | 5401 | } |
---|
5271 | 5402 | |
---|
5272 | | -void clear_extent_buffer_dirty(struct extent_buffer *eb) |
---|
| 5403 | +void clear_extent_buffer_dirty(const struct extent_buffer *eb) |
---|
5273 | 5404 | { |
---|
5274 | 5405 | int i; |
---|
5275 | 5406 | int num_pages; |
---|
.. | .. |
---|
5287 | 5418 | |
---|
5288 | 5419 | clear_page_dirty_for_io(page); |
---|
5289 | 5420 | xa_lock_irq(&page->mapping->i_pages); |
---|
5290 | | - if (!PageDirty(page)) { |
---|
5291 | | - radix_tree_tag_clear(&page->mapping->i_pages, |
---|
5292 | | - page_index(page), |
---|
5293 | | - PAGECACHE_TAG_DIRTY); |
---|
5294 | | - } |
---|
| 5421 | + if (!PageDirty(page)) |
---|
| 5422 | + __xa_clear_mark(&page->mapping->i_pages, |
---|
| 5423 | + page_index(page), PAGECACHE_TAG_DIRTY); |
---|
5295 | 5424 | xa_unlock_irq(&page->mapping->i_pages); |
---|
5296 | 5425 | ClearPageError(page); |
---|
5297 | 5426 | unlock_page(page); |
---|
.. | .. |
---|
5299 | 5428 | WARN_ON(atomic_read(&eb->refs) == 0); |
---|
5300 | 5429 | } |
---|
5301 | 5430 | |
---|
5302 | | -int set_extent_buffer_dirty(struct extent_buffer *eb) |
---|
| 5431 | +bool set_extent_buffer_dirty(struct extent_buffer *eb) |
---|
5303 | 5432 | { |
---|
5304 | 5433 | int i; |
---|
5305 | 5434 | int num_pages; |
---|
5306 | | - int was_dirty = 0; |
---|
| 5435 | + bool was_dirty; |
---|
5307 | 5436 | |
---|
5308 | 5437 | check_buffer_tree_ref(eb); |
---|
5309 | 5438 | |
---|
.. | .. |
---|
5313 | 5442 | WARN_ON(atomic_read(&eb->refs) == 0); |
---|
5314 | 5443 | WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)); |
---|
5315 | 5444 | |
---|
| 5445 | + if (!was_dirty) |
---|
| 5446 | + for (i = 0; i < num_pages; i++) |
---|
| 5447 | + set_page_dirty(eb->pages[i]); |
---|
| 5448 | + |
---|
| 5449 | +#ifdef CONFIG_BTRFS_DEBUG |
---|
5316 | 5450 | for (i = 0; i < num_pages; i++) |
---|
5317 | | - set_page_dirty(eb->pages[i]); |
---|
| 5451 | + ASSERT(PageDirty(eb->pages[i])); |
---|
| 5452 | +#endif |
---|
| 5453 | + |
---|
5318 | 5454 | return was_dirty; |
---|
5319 | 5455 | } |
---|
5320 | 5456 | |
---|
.. | .. |
---|
5347 | 5483 | } |
---|
5348 | 5484 | } |
---|
5349 | 5485 | |
---|
5350 | | -int read_extent_buffer_pages(struct extent_io_tree *tree, |
---|
5351 | | - struct extent_buffer *eb, int wait, int mirror_num) |
---|
| 5486 | +int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num) |
---|
5352 | 5487 | { |
---|
5353 | 5488 | int i; |
---|
5354 | 5489 | struct page *page; |
---|
.. | .. |
---|
5412 | 5547 | } |
---|
5413 | 5548 | |
---|
5414 | 5549 | ClearPageError(page); |
---|
5415 | | - err = __extent_read_full_page(tree, page, |
---|
5416 | | - btree_get_extent, &bio, |
---|
5417 | | - mirror_num, &bio_flags, |
---|
5418 | | - REQ_META); |
---|
| 5550 | + err = submit_extent_page(REQ_OP_READ | REQ_META, NULL, |
---|
| 5551 | + page, page_offset(page), PAGE_SIZE, 0, |
---|
| 5552 | + &bio, end_bio_extent_readpage, |
---|
| 5553 | + mirror_num, 0, 0, false); |
---|
5419 | 5554 | if (err) { |
---|
5420 | | - ret = err; |
---|
5421 | 5555 | /* |
---|
5422 | | - * We use &bio in above __extent_read_full_page, |
---|
5423 | | - * so we ensure that if it returns error, the |
---|
5424 | | - * current page fails to add itself to bio and |
---|
5425 | | - * it's been unlocked. |
---|
5426 | | - * |
---|
5427 | | - * We must dec io_pages by ourselves. |
---|
| 5556 | + * We failed to submit the bio so it's the |
---|
| 5557 | + * caller's responsibility to perform cleanup |
---|
| 5558 | + * i.e unlock page/set error bit. |
---|
5428 | 5559 | */ |
---|
| 5560 | + ret = err; |
---|
| 5561 | + SetPageError(page); |
---|
| 5562 | + unlock_page(page); |
---|
5429 | 5563 | atomic_dec(&eb->io_pages); |
---|
5430 | 5564 | } |
---|
5431 | 5565 | } else { |
---|
.. | .. |
---|
5460 | 5594 | return ret; |
---|
5461 | 5595 | } |
---|
5462 | 5596 | |
---|
| 5597 | +static bool report_eb_range(const struct extent_buffer *eb, unsigned long start, |
---|
| 5598 | + unsigned long len) |
---|
| 5599 | +{ |
---|
| 5600 | + btrfs_warn(eb->fs_info, |
---|
| 5601 | + "access to eb bytenr %llu len %lu out of range start %lu len %lu", |
---|
| 5602 | + eb->start, eb->len, start, len); |
---|
| 5603 | + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); |
---|
| 5604 | + |
---|
| 5605 | + return true; |
---|
| 5606 | +} |
---|
| 5607 | + |
---|
| 5608 | +/* |
---|
| 5609 | + * Check if the [start, start + len) range is valid before reading/writing |
---|
| 5610 | + * the eb. |
---|
| 5611 | + * NOTE: @start and @len are offset inside the eb, not logical address. |
---|
| 5612 | + * |
---|
| 5613 | + * Caller should not touch the dst/src memory if this function returns error. |
---|
| 5614 | + */ |
---|
| 5615 | +static inline int check_eb_range(const struct extent_buffer *eb, |
---|
| 5616 | + unsigned long start, unsigned long len) |
---|
| 5617 | +{ |
---|
| 5618 | + unsigned long offset; |
---|
| 5619 | + |
---|
| 5620 | + /* start, start + len should not go beyond eb->len nor overflow */ |
---|
| 5621 | + if (unlikely(check_add_overflow(start, len, &offset) || offset > eb->len)) |
---|
| 5622 | + return report_eb_range(eb, start, len); |
---|
| 5623 | + |
---|
| 5624 | + return false; |
---|
| 5625 | +} |
---|
| 5626 | + |
---|
5463 | 5627 | void read_extent_buffer(const struct extent_buffer *eb, void *dstv, |
---|
5464 | 5628 | unsigned long start, unsigned long len) |
---|
5465 | 5629 | { |
---|
.. | .. |
---|
5468 | 5632 | struct page *page; |
---|
5469 | 5633 | char *kaddr; |
---|
5470 | 5634 | char *dst = (char *)dstv; |
---|
5471 | | - size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1); |
---|
5472 | | - unsigned long i = (start_offset + start) >> PAGE_SHIFT; |
---|
| 5635 | + unsigned long i = start >> PAGE_SHIFT; |
---|
5473 | 5636 | |
---|
5474 | | - if (start + len > eb->len) { |
---|
5475 | | - WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n", |
---|
5476 | | - eb->start, eb->len, start, len); |
---|
5477 | | - memset(dst, 0, len); |
---|
| 5637 | + if (check_eb_range(eb, start, len)) { |
---|
| 5638 | + /* |
---|
| 5639 | + * Invalid range hit, reset the memory, so callers won't get |
---|
| 5640 | + * some random garbage for their uninitialzed memory. |
---|
| 5641 | + */ |
---|
| 5642 | + memset(dstv, 0, len); |
---|
5478 | 5643 | return; |
---|
5479 | 5644 | } |
---|
5480 | 5645 | |
---|
5481 | | - offset = (start_offset + start) & (PAGE_SIZE - 1); |
---|
| 5646 | + offset = offset_in_page(start); |
---|
5482 | 5647 | |
---|
5483 | 5648 | while (len > 0) { |
---|
5484 | 5649 | page = eb->pages[i]; |
---|
.. | .. |
---|
5503 | 5668 | struct page *page; |
---|
5504 | 5669 | char *kaddr; |
---|
5505 | 5670 | char __user *dst = (char __user *)dstv; |
---|
5506 | | - size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1); |
---|
5507 | | - unsigned long i = (start_offset + start) >> PAGE_SHIFT; |
---|
| 5671 | + unsigned long i = start >> PAGE_SHIFT; |
---|
5508 | 5672 | int ret = 0; |
---|
5509 | 5673 | |
---|
5510 | 5674 | WARN_ON(start > eb->len); |
---|
5511 | 5675 | WARN_ON(start + len > eb->start + eb->len); |
---|
5512 | 5676 | |
---|
5513 | | - offset = (start_offset + start) & (PAGE_SIZE - 1); |
---|
| 5677 | + offset = offset_in_page(start); |
---|
5514 | 5678 | |
---|
5515 | 5679 | while (len > 0) { |
---|
5516 | 5680 | page = eb->pages[i]; |
---|
5517 | 5681 | |
---|
5518 | 5682 | cur = min(len, (PAGE_SIZE - offset)); |
---|
5519 | 5683 | kaddr = page_address(page); |
---|
5520 | | - if (probe_user_write(dst, kaddr + offset, cur)) { |
---|
| 5684 | + if (copy_to_user_nofault(dst, kaddr + offset, cur)) { |
---|
5521 | 5685 | ret = -EFAULT; |
---|
5522 | 5686 | break; |
---|
5523 | 5687 | } |
---|
.. | .. |
---|
5531 | 5695 | return ret; |
---|
5532 | 5696 | } |
---|
5533 | 5697 | |
---|
5534 | | -/* |
---|
5535 | | - * return 0 if the item is found within a page. |
---|
5536 | | - * return 1 if the item spans two pages. |
---|
5537 | | - * return -EINVAL otherwise. |
---|
5538 | | - */ |
---|
5539 | | -int map_private_extent_buffer(const struct extent_buffer *eb, |
---|
5540 | | - unsigned long start, unsigned long min_len, |
---|
5541 | | - char **map, unsigned long *map_start, |
---|
5542 | | - unsigned long *map_len) |
---|
5543 | | -{ |
---|
5544 | | - size_t offset = start & (PAGE_SIZE - 1); |
---|
5545 | | - char *kaddr; |
---|
5546 | | - struct page *p; |
---|
5547 | | - size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1); |
---|
5548 | | - unsigned long i = (start_offset + start) >> PAGE_SHIFT; |
---|
5549 | | - unsigned long end_i = (start_offset + start + min_len - 1) >> |
---|
5550 | | - PAGE_SHIFT; |
---|
5551 | | - |
---|
5552 | | - if (start + min_len > eb->len) { |
---|
5553 | | - WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n", |
---|
5554 | | - eb->start, eb->len, start, min_len); |
---|
5555 | | - return -EINVAL; |
---|
5556 | | - } |
---|
5557 | | - |
---|
5558 | | - if (i != end_i) |
---|
5559 | | - return 1; |
---|
5560 | | - |
---|
5561 | | - if (i == 0) { |
---|
5562 | | - offset = start_offset; |
---|
5563 | | - *map_start = 0; |
---|
5564 | | - } else { |
---|
5565 | | - offset = 0; |
---|
5566 | | - *map_start = ((u64)i << PAGE_SHIFT) - start_offset; |
---|
5567 | | - } |
---|
5568 | | - |
---|
5569 | | - p = eb->pages[i]; |
---|
5570 | | - kaddr = page_address(p); |
---|
5571 | | - *map = kaddr + offset; |
---|
5572 | | - *map_len = PAGE_SIZE - offset; |
---|
5573 | | - return 0; |
---|
5574 | | -} |
---|
5575 | | - |
---|
5576 | 5698 | int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv, |
---|
5577 | 5699 | unsigned long start, unsigned long len) |
---|
5578 | 5700 | { |
---|
.. | .. |
---|
5581 | 5703 | struct page *page; |
---|
5582 | 5704 | char *kaddr; |
---|
5583 | 5705 | char *ptr = (char *)ptrv; |
---|
5584 | | - size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1); |
---|
5585 | | - unsigned long i = (start_offset + start) >> PAGE_SHIFT; |
---|
| 5706 | + unsigned long i = start >> PAGE_SHIFT; |
---|
5586 | 5707 | int ret = 0; |
---|
5587 | 5708 | |
---|
5588 | | - WARN_ON(start > eb->len); |
---|
5589 | | - WARN_ON(start + len > eb->start + eb->len); |
---|
| 5709 | + if (check_eb_range(eb, start, len)) |
---|
| 5710 | + return -EINVAL; |
---|
5590 | 5711 | |
---|
5591 | | - offset = (start_offset + start) & (PAGE_SIZE - 1); |
---|
| 5712 | + offset = offset_in_page(start); |
---|
5592 | 5713 | |
---|
5593 | 5714 | while (len > 0) { |
---|
5594 | 5715 | page = eb->pages[i]; |
---|
.. | .. |
---|
5608 | 5729 | return ret; |
---|
5609 | 5730 | } |
---|
5610 | 5731 | |
---|
5611 | | -void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb, |
---|
| 5732 | +void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb, |
---|
5612 | 5733 | const void *srcv) |
---|
5613 | 5734 | { |
---|
5614 | 5735 | char *kaddr; |
---|
.. | .. |
---|
5619 | 5740 | BTRFS_FSID_SIZE); |
---|
5620 | 5741 | } |
---|
5621 | 5742 | |
---|
5622 | | -void write_extent_buffer_fsid(struct extent_buffer *eb, const void *srcv) |
---|
| 5743 | +void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *srcv) |
---|
5623 | 5744 | { |
---|
5624 | 5745 | char *kaddr; |
---|
5625 | 5746 | |
---|
.. | .. |
---|
5629 | 5750 | BTRFS_FSID_SIZE); |
---|
5630 | 5751 | } |
---|
5631 | 5752 | |
---|
5632 | | -void write_extent_buffer(struct extent_buffer *eb, const void *srcv, |
---|
| 5753 | +void write_extent_buffer(const struct extent_buffer *eb, const void *srcv, |
---|
5633 | 5754 | unsigned long start, unsigned long len) |
---|
5634 | 5755 | { |
---|
5635 | 5756 | size_t cur; |
---|
.. | .. |
---|
5637 | 5758 | struct page *page; |
---|
5638 | 5759 | char *kaddr; |
---|
5639 | 5760 | char *src = (char *)srcv; |
---|
5640 | | - size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1); |
---|
5641 | | - unsigned long i = (start_offset + start) >> PAGE_SHIFT; |
---|
| 5761 | + unsigned long i = start >> PAGE_SHIFT; |
---|
5642 | 5762 | |
---|
5643 | | - WARN_ON(start > eb->len); |
---|
5644 | | - WARN_ON(start + len > eb->start + eb->len); |
---|
| 5763 | + if (check_eb_range(eb, start, len)) |
---|
| 5764 | + return; |
---|
5645 | 5765 | |
---|
5646 | | - offset = (start_offset + start) & (PAGE_SIZE - 1); |
---|
| 5766 | + offset = offset_in_page(start); |
---|
5647 | 5767 | |
---|
5648 | 5768 | while (len > 0) { |
---|
5649 | 5769 | page = eb->pages[i]; |
---|
.. | .. |
---|
5660 | 5780 | } |
---|
5661 | 5781 | } |
---|
5662 | 5782 | |
---|
5663 | | -void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start, |
---|
| 5783 | +void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start, |
---|
5664 | 5784 | unsigned long len) |
---|
5665 | 5785 | { |
---|
5666 | 5786 | size_t cur; |
---|
5667 | 5787 | size_t offset; |
---|
5668 | 5788 | struct page *page; |
---|
5669 | 5789 | char *kaddr; |
---|
5670 | | - size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1); |
---|
5671 | | - unsigned long i = (start_offset + start) >> PAGE_SHIFT; |
---|
| 5790 | + unsigned long i = start >> PAGE_SHIFT; |
---|
5672 | 5791 | |
---|
5673 | | - WARN_ON(start > eb->len); |
---|
5674 | | - WARN_ON(start + len > eb->start + eb->len); |
---|
| 5792 | + if (check_eb_range(eb, start, len)) |
---|
| 5793 | + return; |
---|
5675 | 5794 | |
---|
5676 | | - offset = (start_offset + start) & (PAGE_SIZE - 1); |
---|
| 5795 | + offset = offset_in_page(start); |
---|
5677 | 5796 | |
---|
5678 | 5797 | while (len > 0) { |
---|
5679 | 5798 | page = eb->pages[i]; |
---|
.. | .. |
---|
5689 | 5808 | } |
---|
5690 | 5809 | } |
---|
5691 | 5810 | |
---|
5692 | | -void copy_extent_buffer_full(struct extent_buffer *dst, |
---|
5693 | | - struct extent_buffer *src) |
---|
| 5811 | +void copy_extent_buffer_full(const struct extent_buffer *dst, |
---|
| 5812 | + const struct extent_buffer *src) |
---|
5694 | 5813 | { |
---|
5695 | 5814 | int i; |
---|
5696 | 5815 | int num_pages; |
---|
.. | .. |
---|
5703 | 5822 | page_address(src->pages[i])); |
---|
5704 | 5823 | } |
---|
5705 | 5824 | |
---|
5706 | | -void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, |
---|
| 5825 | +void copy_extent_buffer(const struct extent_buffer *dst, |
---|
| 5826 | + const struct extent_buffer *src, |
---|
5707 | 5827 | unsigned long dst_offset, unsigned long src_offset, |
---|
5708 | 5828 | unsigned long len) |
---|
5709 | 5829 | { |
---|
.. | .. |
---|
5712 | 5832 | size_t offset; |
---|
5713 | 5833 | struct page *page; |
---|
5714 | 5834 | char *kaddr; |
---|
5715 | | - size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1); |
---|
5716 | | - unsigned long i = (start_offset + dst_offset) >> PAGE_SHIFT; |
---|
| 5835 | + unsigned long i = dst_offset >> PAGE_SHIFT; |
---|
| 5836 | + |
---|
| 5837 | + if (check_eb_range(dst, dst_offset, len) || |
---|
| 5838 | + check_eb_range(src, src_offset, len)) |
---|
| 5839 | + return; |
---|
5717 | 5840 | |
---|
5718 | 5841 | WARN_ON(src->len != dst_len); |
---|
5719 | 5842 | |
---|
5720 | | - offset = (start_offset + dst_offset) & |
---|
5721 | | - (PAGE_SIZE - 1); |
---|
| 5843 | + offset = offset_in_page(dst_offset); |
---|
5722 | 5844 | |
---|
5723 | 5845 | while (len > 0) { |
---|
5724 | 5846 | page = dst->pages[i]; |
---|
.. | .. |
---|
5749 | 5871 | * This helper hides the ugliness of finding the byte in an extent buffer which |
---|
5750 | 5872 | * contains a given bit. |
---|
5751 | 5873 | */ |
---|
5752 | | -static inline void eb_bitmap_offset(struct extent_buffer *eb, |
---|
| 5874 | +static inline void eb_bitmap_offset(const struct extent_buffer *eb, |
---|
5753 | 5875 | unsigned long start, unsigned long nr, |
---|
5754 | 5876 | unsigned long *page_index, |
---|
5755 | 5877 | size_t *page_offset) |
---|
5756 | 5878 | { |
---|
5757 | | - size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1); |
---|
5758 | 5879 | size_t byte_offset = BIT_BYTE(nr); |
---|
5759 | 5880 | size_t offset; |
---|
5760 | 5881 | |
---|
.. | .. |
---|
5763 | 5884 | * the bitmap item in the extent buffer + the offset of the byte in the |
---|
5764 | 5885 | * bitmap item. |
---|
5765 | 5886 | */ |
---|
5766 | | - offset = start_offset + start + byte_offset; |
---|
| 5887 | + offset = start + byte_offset; |
---|
5767 | 5888 | |
---|
5768 | 5889 | *page_index = offset >> PAGE_SHIFT; |
---|
5769 | | - *page_offset = offset & (PAGE_SIZE - 1); |
---|
| 5890 | + *page_offset = offset_in_page(offset); |
---|
5770 | 5891 | } |
---|
5771 | 5892 | |
---|
5772 | 5893 | /** |
---|
.. | .. |
---|
5775 | 5896 | * @start: offset of the bitmap item in the extent buffer |
---|
5776 | 5897 | * @nr: bit number to test |
---|
5777 | 5898 | */ |
---|
5778 | | -int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start, |
---|
| 5899 | +int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start, |
---|
5779 | 5900 | unsigned long nr) |
---|
5780 | 5901 | { |
---|
5781 | 5902 | u8 *kaddr; |
---|
.. | .. |
---|
5797 | 5918 | * @pos: bit number of the first bit |
---|
5798 | 5919 | * @len: number of bits to set |
---|
5799 | 5920 | */ |
---|
5800 | | -void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start, |
---|
| 5921 | +void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start, |
---|
5801 | 5922 | unsigned long pos, unsigned long len) |
---|
5802 | 5923 | { |
---|
5803 | 5924 | u8 *kaddr; |
---|
.. | .. |
---|
5839 | 5960 | * @pos: bit number of the first bit |
---|
5840 | 5961 | * @len: number of bits to clear |
---|
5841 | 5962 | */ |
---|
5842 | | -void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start, |
---|
5843 | | - unsigned long pos, unsigned long len) |
---|
| 5963 | +void extent_buffer_bitmap_clear(const struct extent_buffer *eb, |
---|
| 5964 | + unsigned long start, unsigned long pos, |
---|
| 5965 | + unsigned long len) |
---|
5844 | 5966 | { |
---|
5845 | 5967 | u8 *kaddr; |
---|
5846 | 5968 | struct page *page; |
---|
.. | .. |
---|
5901 | 6023 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); |
---|
5902 | 6024 | } |
---|
5903 | 6025 | |
---|
5904 | | -void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, |
---|
5905 | | - unsigned long src_offset, unsigned long len) |
---|
| 6026 | +void memcpy_extent_buffer(const struct extent_buffer *dst, |
---|
| 6027 | + unsigned long dst_offset, unsigned long src_offset, |
---|
| 6028 | + unsigned long len) |
---|
5906 | 6029 | { |
---|
5907 | | - struct btrfs_fs_info *fs_info = dst->fs_info; |
---|
5908 | 6030 | size_t cur; |
---|
5909 | 6031 | size_t dst_off_in_page; |
---|
5910 | 6032 | size_t src_off_in_page; |
---|
5911 | | - size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1); |
---|
5912 | 6033 | unsigned long dst_i; |
---|
5913 | 6034 | unsigned long src_i; |
---|
5914 | 6035 | |
---|
5915 | | - if (src_offset + len > dst->len) { |
---|
5916 | | - btrfs_err(fs_info, |
---|
5917 | | - "memmove bogus src_offset %lu move len %lu dst len %lu", |
---|
5918 | | - src_offset, len, dst->len); |
---|
5919 | | - BUG_ON(1); |
---|
5920 | | - } |
---|
5921 | | - if (dst_offset + len > dst->len) { |
---|
5922 | | - btrfs_err(fs_info, |
---|
5923 | | - "memmove bogus dst_offset %lu move len %lu dst len %lu", |
---|
5924 | | - dst_offset, len, dst->len); |
---|
5925 | | - BUG_ON(1); |
---|
5926 | | - } |
---|
| 6036 | + if (check_eb_range(dst, dst_offset, len) || |
---|
| 6037 | + check_eb_range(dst, src_offset, len)) |
---|
| 6038 | + return; |
---|
5927 | 6039 | |
---|
5928 | 6040 | while (len > 0) { |
---|
5929 | | - dst_off_in_page = (start_offset + dst_offset) & |
---|
5930 | | - (PAGE_SIZE - 1); |
---|
5931 | | - src_off_in_page = (start_offset + src_offset) & |
---|
5932 | | - (PAGE_SIZE - 1); |
---|
| 6041 | + dst_off_in_page = offset_in_page(dst_offset); |
---|
| 6042 | + src_off_in_page = offset_in_page(src_offset); |
---|
5933 | 6043 | |
---|
5934 | | - dst_i = (start_offset + dst_offset) >> PAGE_SHIFT; |
---|
5935 | | - src_i = (start_offset + src_offset) >> PAGE_SHIFT; |
---|
| 6044 | + dst_i = dst_offset >> PAGE_SHIFT; |
---|
| 6045 | + src_i = src_offset >> PAGE_SHIFT; |
---|
5936 | 6046 | |
---|
5937 | 6047 | cur = min(len, (unsigned long)(PAGE_SIZE - |
---|
5938 | 6048 | src_off_in_page)); |
---|
.. | .. |
---|
5948 | 6058 | } |
---|
5949 | 6059 | } |
---|
5950 | 6060 | |
---|
5951 | | -void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, |
---|
5952 | | - unsigned long src_offset, unsigned long len) |
---|
| 6061 | +void memmove_extent_buffer(const struct extent_buffer *dst, |
---|
| 6062 | + unsigned long dst_offset, unsigned long src_offset, |
---|
| 6063 | + unsigned long len) |
---|
5953 | 6064 | { |
---|
5954 | | - struct btrfs_fs_info *fs_info = dst->fs_info; |
---|
5955 | 6065 | size_t cur; |
---|
5956 | 6066 | size_t dst_off_in_page; |
---|
5957 | 6067 | size_t src_off_in_page; |
---|
5958 | 6068 | unsigned long dst_end = dst_offset + len - 1; |
---|
5959 | 6069 | unsigned long src_end = src_offset + len - 1; |
---|
5960 | | - size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1); |
---|
5961 | 6070 | unsigned long dst_i; |
---|
5962 | 6071 | unsigned long src_i; |
---|
5963 | 6072 | |
---|
5964 | | - if (src_offset + len > dst->len) { |
---|
5965 | | - btrfs_err(fs_info, |
---|
5966 | | - "memmove bogus src_offset %lu move len %lu len %lu", |
---|
5967 | | - src_offset, len, dst->len); |
---|
5968 | | - BUG_ON(1); |
---|
5969 | | - } |
---|
5970 | | - if (dst_offset + len > dst->len) { |
---|
5971 | | - btrfs_err(fs_info, |
---|
5972 | | - "memmove bogus dst_offset %lu move len %lu len %lu", |
---|
5973 | | - dst_offset, len, dst->len); |
---|
5974 | | - BUG_ON(1); |
---|
5975 | | - } |
---|
| 6073 | + if (check_eb_range(dst, dst_offset, len) || |
---|
| 6074 | + check_eb_range(dst, src_offset, len)) |
---|
| 6075 | + return; |
---|
5976 | 6076 | if (dst_offset < src_offset) { |
---|
5977 | 6077 | memcpy_extent_buffer(dst, dst_offset, src_offset, len); |
---|
5978 | 6078 | return; |
---|
5979 | 6079 | } |
---|
5980 | 6080 | while (len > 0) { |
---|
5981 | | - dst_i = (start_offset + dst_end) >> PAGE_SHIFT; |
---|
5982 | | - src_i = (start_offset + src_end) >> PAGE_SHIFT; |
---|
| 6081 | + dst_i = dst_end >> PAGE_SHIFT; |
---|
| 6082 | + src_i = src_end >> PAGE_SHIFT; |
---|
5983 | 6083 | |
---|
5984 | | - dst_off_in_page = (start_offset + dst_end) & |
---|
5985 | | - (PAGE_SIZE - 1); |
---|
5986 | | - src_off_in_page = (start_offset + src_end) & |
---|
5987 | | - (PAGE_SIZE - 1); |
---|
| 6084 | + dst_off_in_page = offset_in_page(dst_end); |
---|
| 6085 | + src_off_in_page = offset_in_page(src_end); |
---|
5988 | 6086 | |
---|
5989 | 6087 | cur = min_t(unsigned long, len, src_off_in_page + 1); |
---|
5990 | 6088 | cur = min(cur, dst_off_in_page + 1); |
---|