.. | .. |
---|
3 | 3 | * Copyright (C) 2007 Oracle. All rights reserved. |
---|
4 | 4 | */ |
---|
5 | 5 | |
---|
| 6 | +#include <crypto/hash.h> |
---|
6 | 7 | #include <linux/kernel.h> |
---|
7 | 8 | #include <linux/bio.h> |
---|
8 | | -#include <linux/buffer_head.h> |
---|
9 | 9 | #include <linux/file.h> |
---|
10 | 10 | #include <linux/fs.h> |
---|
11 | 11 | #include <linux/pagemap.h> |
---|
.. | .. |
---|
27 | 27 | #include <linux/uio.h> |
---|
28 | 28 | #include <linux/magic.h> |
---|
29 | 29 | #include <linux/iversion.h> |
---|
| 30 | +#include <linux/swap.h> |
---|
| 31 | +#include <linux/migrate.h> |
---|
| 32 | +#include <linux/sched/mm.h> |
---|
| 33 | +#include <linux/iomap.h> |
---|
30 | 34 | #include <asm/unaligned.h> |
---|
| 35 | +#include "misc.h" |
---|
31 | 36 | #include "ctree.h" |
---|
32 | 37 | #include "disk-io.h" |
---|
33 | 38 | #include "transaction.h" |
---|
.. | .. |
---|
41 | 46 | #include "locking.h" |
---|
42 | 47 | #include "free-space-cache.h" |
---|
43 | 48 | #include "inode-map.h" |
---|
44 | | -#include "backref.h" |
---|
45 | 49 | #include "props.h" |
---|
46 | 50 | #include "qgroup.h" |
---|
47 | | -#include "dedupe.h" |
---|
| 51 | +#include "delalloc-space.h" |
---|
| 52 | +#include "block-group.h" |
---|
| 53 | +#include "space-info.h" |
---|
48 | 54 | |
---|
49 | 55 | struct btrfs_iget_args { |
---|
50 | | - struct btrfs_key *location; |
---|
| 56 | + u64 ino; |
---|
51 | 57 | struct btrfs_root *root; |
---|
52 | 58 | }; |
---|
53 | 59 | |
---|
54 | 60 | struct btrfs_dio_data { |
---|
55 | 61 | u64 reserve; |
---|
56 | | - u64 unsubmitted_oe_range_start; |
---|
57 | | - u64 unsubmitted_oe_range_end; |
---|
58 | | - int overwrite; |
---|
| 62 | + loff_t length; |
---|
| 63 | + ssize_t submitted; |
---|
| 64 | + struct extent_changeset *data_reserved; |
---|
| 65 | + bool sync; |
---|
59 | 66 | }; |
---|
60 | 67 | |
---|
61 | 68 | static const struct inode_operations btrfs_dir_inode_operations; |
---|
62 | 69 | static const struct inode_operations btrfs_symlink_inode_operations; |
---|
63 | | -static const struct inode_operations btrfs_dir_ro_inode_operations; |
---|
64 | 70 | static const struct inode_operations btrfs_special_inode_operations; |
---|
65 | 71 | static const struct inode_operations btrfs_file_inode_operations; |
---|
66 | 72 | static const struct address_space_operations btrfs_aops; |
---|
67 | | -static const struct address_space_operations btrfs_symlink_aops; |
---|
68 | 73 | static const struct file_operations btrfs_dir_file_operations; |
---|
69 | | -static const struct extent_io_ops btrfs_extent_io_ops; |
---|
70 | 74 | |
---|
71 | 75 | static struct kmem_cache *btrfs_inode_cachep; |
---|
72 | 76 | struct kmem_cache *btrfs_trans_handle_cachep; |
---|
.. | .. |
---|
74 | 78 | struct kmem_cache *btrfs_free_space_cachep; |
---|
75 | 79 | struct kmem_cache *btrfs_free_space_bitmap_cachep; |
---|
76 | 80 | |
---|
77 | | -#define S_SHIFT 12 |
---|
78 | | -static const unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { |
---|
79 | | - [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE, |
---|
80 | | - [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR, |
---|
81 | | - [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV, |
---|
82 | | - [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV, |
---|
83 | | - [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO, |
---|
84 | | - [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK, |
---|
85 | | - [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, |
---|
86 | | -}; |
---|
87 | | - |
---|
88 | 81 | static int btrfs_setsize(struct inode *inode, struct iattr *attr); |
---|
89 | 82 | static int btrfs_truncate(struct inode *inode, bool skip_writeback); |
---|
90 | 83 | static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); |
---|
91 | | -static noinline int cow_file_range(struct inode *inode, |
---|
| 84 | +static noinline int cow_file_range(struct btrfs_inode *inode, |
---|
92 | 85 | struct page *locked_page, |
---|
93 | | - u64 start, u64 end, u64 delalloc_end, |
---|
94 | | - int *page_started, unsigned long *nr_written, |
---|
95 | | - int unlock, struct btrfs_dedupe_hash *hash); |
---|
96 | | -static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len, |
---|
97 | | - u64 orig_start, u64 block_start, |
---|
| 86 | + u64 start, u64 end, int *page_started, |
---|
| 87 | + unsigned long *nr_written, int unlock); |
---|
| 88 | +static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, |
---|
| 89 | + u64 len, u64 orig_start, u64 block_start, |
---|
98 | 90 | u64 block_len, u64 orig_block_len, |
---|
99 | 91 | u64 ram_bytes, int compress_type, |
---|
100 | 92 | int type); |
---|
101 | 93 | |
---|
102 | | -static void __endio_write_update_ordered(struct inode *inode, |
---|
| 94 | +static void __endio_write_update_ordered(struct btrfs_inode *inode, |
---|
103 | 95 | const u64 offset, const u64 bytes, |
---|
104 | 96 | const bool uptodate); |
---|
105 | 97 | |
---|
106 | 98 | /* |
---|
107 | 99 | * Cleanup all submitted ordered extents in specified range to handle errors |
---|
108 | | - * from the fill_dellaloc() callback. |
---|
| 100 | + * from the btrfs_run_delalloc_range() callback. |
---|
109 | 101 | * |
---|
110 | 102 | * NOTE: caller must ensure that when an error happens, it can not call |
---|
111 | 103 | * extent_clear_unlock_delalloc() to clear both the bits EXTENT_DO_ACCOUNTING |
---|
.. | .. |
---|
113 | 105 | * to be released, which we want to happen only when finishing the ordered |
---|
114 | 106 | * extent (btrfs_finish_ordered_io()). |
---|
115 | 107 | */ |
---|
116 | | -static inline void btrfs_cleanup_ordered_extents(struct inode *inode, |
---|
| 108 | +static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode, |
---|
117 | 109 | struct page *locked_page, |
---|
118 | 110 | u64 offset, u64 bytes) |
---|
119 | 111 | { |
---|
.. | .. |
---|
125 | 117 | struct page *page; |
---|
126 | 118 | |
---|
127 | 119 | while (index <= end_index) { |
---|
128 | | - page = find_get_page(inode->i_mapping, index); |
---|
| 120 | + page = find_get_page(inode->vfs_inode.i_mapping, index); |
---|
129 | 121 | index++; |
---|
130 | 122 | if (!page) |
---|
131 | 123 | continue; |
---|
.. | .. |
---|
147 | 139 | } |
---|
148 | 140 | |
---|
149 | 141 | static int btrfs_dirty_inode(struct inode *inode); |
---|
150 | | - |
---|
151 | | -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
---|
152 | | -void btrfs_test_inode_set_ops(struct inode *inode) |
---|
153 | | -{ |
---|
154 | | - BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
---|
155 | | -} |
---|
156 | | -#endif |
---|
157 | 142 | |
---|
158 | 143 | static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, |
---|
159 | 144 | struct inode *inode, struct inode *dir, |
---|
.. | .. |
---|
187 | 172 | int ret; |
---|
188 | 173 | size_t cur_size = size; |
---|
189 | 174 | unsigned long offset; |
---|
| 175 | + |
---|
| 176 | + ASSERT((compressed_size > 0 && compressed_pages) || |
---|
| 177 | + (compressed_size == 0 && !compressed_pages)); |
---|
190 | 178 | |
---|
191 | 179 | if (compressed_size && compressed_pages) |
---|
192 | 180 | cur_size = compressed_size; |
---|
.. | .. |
---|
241 | 229 | start >> PAGE_SHIFT); |
---|
242 | 230 | btrfs_set_file_extent_compression(leaf, ei, 0); |
---|
243 | 231 | kaddr = kmap_atomic(page); |
---|
244 | | - offset = start & (PAGE_SIZE - 1); |
---|
| 232 | + offset = offset_in_page(start); |
---|
245 | 233 | write_extent_buffer(leaf, kaddr + offset, ptr, size); |
---|
246 | 234 | kunmap_atomic(kaddr); |
---|
247 | 235 | put_page(page); |
---|
248 | 236 | } |
---|
249 | 237 | btrfs_mark_buffer_dirty(leaf); |
---|
250 | 238 | btrfs_release_path(path); |
---|
| 239 | + |
---|
| 240 | + /* |
---|
| 241 | + * We align size to sectorsize for inline extents just for simplicity |
---|
| 242 | + * sake. |
---|
| 243 | + */ |
---|
| 244 | + size = ALIGN(size, root->fs_info->sectorsize); |
---|
| 245 | + ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), start, size); |
---|
| 246 | + if (ret) |
---|
| 247 | + goto fail; |
---|
251 | 248 | |
---|
252 | 249 | /* |
---|
253 | 250 | * we're an inline extent, so nobody can |
---|
.. | .. |
---|
271 | 268 | * does the checks required to make sure the data is small enough |
---|
272 | 269 | * to fit as an inline extent. |
---|
273 | 270 | */ |
---|
274 | | -static noinline int cow_file_range_inline(struct inode *inode, u64 start, |
---|
| 271 | +static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start, |
---|
275 | 272 | u64 end, size_t compressed_size, |
---|
276 | 273 | int compress_type, |
---|
277 | 274 | struct page **compressed_pages) |
---|
278 | 275 | { |
---|
279 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
| 276 | + struct btrfs_root *root = inode->root; |
---|
280 | 277 | struct btrfs_fs_info *fs_info = root->fs_info; |
---|
281 | 278 | struct btrfs_trans_handle *trans; |
---|
282 | | - u64 isize = i_size_read(inode); |
---|
| 279 | + u64 isize = i_size_read(&inode->vfs_inode); |
---|
283 | 280 | u64 actual_end = min(end + 1, isize); |
---|
284 | 281 | u64 inline_len = actual_end - start; |
---|
285 | 282 | u64 aligned_end = ALIGN(end, fs_info->sectorsize); |
---|
.. | .. |
---|
311 | 308 | btrfs_free_path(path); |
---|
312 | 309 | return PTR_ERR(trans); |
---|
313 | 310 | } |
---|
314 | | - trans->block_rsv = &BTRFS_I(inode)->block_rsv; |
---|
| 311 | + trans->block_rsv = &inode->block_rsv; |
---|
315 | 312 | |
---|
316 | 313 | if (compressed_size && compressed_pages) |
---|
317 | 314 | extent_item_size = btrfs_file_extent_calc_inline_size( |
---|
.. | .. |
---|
320 | 317 | extent_item_size = btrfs_file_extent_calc_inline_size( |
---|
321 | 318 | inline_len); |
---|
322 | 319 | |
---|
323 | | - ret = __btrfs_drop_extents(trans, root, inode, path, |
---|
324 | | - start, aligned_end, NULL, |
---|
325 | | - 1, 1, extent_item_size, &extent_inserted); |
---|
| 320 | + ret = __btrfs_drop_extents(trans, root, inode, path, start, aligned_end, |
---|
| 321 | + NULL, 1, 1, extent_item_size, |
---|
| 322 | + &extent_inserted); |
---|
326 | 323 | if (ret) { |
---|
327 | 324 | btrfs_abort_transaction(trans, ret); |
---|
328 | 325 | goto out; |
---|
.. | .. |
---|
331 | 328 | if (isize > actual_end) |
---|
332 | 329 | inline_len = min_t(u64, isize, actual_end); |
---|
333 | 330 | ret = insert_inline_extent(trans, path, extent_inserted, |
---|
334 | | - root, inode, start, |
---|
| 331 | + root, &inode->vfs_inode, start, |
---|
335 | 332 | inline_len, compressed_size, |
---|
336 | 333 | compress_type, compressed_pages); |
---|
337 | 334 | if (ret && ret != -ENOSPC) { |
---|
.. | .. |
---|
342 | 339 | goto out; |
---|
343 | 340 | } |
---|
344 | 341 | |
---|
345 | | - set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); |
---|
346 | | - btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0); |
---|
| 342 | + set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); |
---|
| 343 | + btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
---|
347 | 344 | out: |
---|
348 | 345 | /* |
---|
349 | 346 | * Don't forget to free the reserved space, as for inlined extent |
---|
.. | .. |
---|
367 | 364 | struct list_head list; |
---|
368 | 365 | }; |
---|
369 | 366 | |
---|
370 | | -struct async_cow { |
---|
| 367 | +struct async_chunk { |
---|
371 | 368 | struct inode *inode; |
---|
372 | | - struct btrfs_root *root; |
---|
373 | 369 | struct page *locked_page; |
---|
374 | 370 | u64 start; |
---|
375 | 371 | u64 end; |
---|
376 | 372 | unsigned int write_flags; |
---|
377 | 373 | struct list_head extents; |
---|
| 374 | + struct cgroup_subsys_state *blkcg_css; |
---|
378 | 375 | struct btrfs_work work; |
---|
| 376 | + atomic_t *pending; |
---|
379 | 377 | }; |
---|
380 | 378 | |
---|
381 | | -static noinline int add_async_extent(struct async_cow *cow, |
---|
| 379 | +struct async_cow { |
---|
| 380 | + /* Number of chunks in flight; must be first in the structure */ |
---|
| 381 | + atomic_t num_chunks; |
---|
| 382 | + struct async_chunk chunks[]; |
---|
| 383 | +}; |
---|
| 384 | + |
---|
| 385 | +static noinline int add_async_extent(struct async_chunk *cow, |
---|
382 | 386 | u64 start, u64 ram_size, |
---|
383 | 387 | u64 compressed_size, |
---|
384 | 388 | struct page **pages, |
---|
.. | .. |
---|
402 | 406 | /* |
---|
403 | 407 | * Check if the inode has flags compatible with compression |
---|
404 | 408 | */ |
---|
405 | | -static inline bool inode_can_compress(struct inode *inode) |
---|
| 409 | +static inline bool inode_can_compress(struct btrfs_inode *inode) |
---|
406 | 410 | { |
---|
407 | | - if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW || |
---|
408 | | - BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) |
---|
| 411 | + if (inode->flags & BTRFS_INODE_NODATACOW || |
---|
| 412 | + inode->flags & BTRFS_INODE_NODATASUM) |
---|
409 | 413 | return false; |
---|
410 | 414 | return true; |
---|
411 | 415 | } |
---|
.. | .. |
---|
414 | 418 | * Check if the inode needs to be submitted to compression, based on mount |
---|
415 | 419 | * options, defragmentation, properties or heuristics. |
---|
416 | 420 | */ |
---|
417 | | -static inline int inode_need_compress(struct inode *inode, u64 start, u64 end) |
---|
| 421 | +static inline int inode_need_compress(struct btrfs_inode *inode, u64 start, |
---|
| 422 | + u64 end) |
---|
418 | 423 | { |
---|
419 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
| 424 | + struct btrfs_fs_info *fs_info = inode->root->fs_info; |
---|
420 | 425 | |
---|
421 | 426 | if (!inode_can_compress(inode)) { |
---|
422 | 427 | WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), |
---|
423 | 428 | KERN_ERR "BTRFS: unexpected compression for ino %llu\n", |
---|
424 | | - btrfs_ino(BTRFS_I(inode))); |
---|
| 429 | + btrfs_ino(inode)); |
---|
425 | 430 | return 0; |
---|
426 | 431 | } |
---|
427 | 432 | /* force compress */ |
---|
428 | 433 | if (btrfs_test_opt(fs_info, FORCE_COMPRESS)) |
---|
429 | 434 | return 1; |
---|
430 | 435 | /* defrag ioctl */ |
---|
431 | | - if (BTRFS_I(inode)->defrag_compress) |
---|
| 436 | + if (inode->defrag_compress) |
---|
432 | 437 | return 1; |
---|
433 | 438 | /* bad compression ratios */ |
---|
434 | | - if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) |
---|
| 439 | + if (inode->flags & BTRFS_INODE_NOCOMPRESS) |
---|
435 | 440 | return 0; |
---|
436 | 441 | if (btrfs_test_opt(fs_info, COMPRESS) || |
---|
437 | | - BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS || |
---|
438 | | - BTRFS_I(inode)->prop_compress) |
---|
439 | | - return btrfs_compress_heuristic(inode, start, end); |
---|
| 442 | + inode->flags & BTRFS_INODE_COMPRESS || |
---|
| 443 | + inode->prop_compress) |
---|
| 444 | + return btrfs_compress_heuristic(&inode->vfs_inode, start, end); |
---|
440 | 445 | return 0; |
---|
441 | 446 | } |
---|
442 | 447 | |
---|
.. | .. |
---|
466 | 471 | * are written in the same order that the flusher thread sent them |
---|
467 | 472 | * down. |
---|
468 | 473 | */ |
---|
469 | | -static noinline void compress_file_range(struct inode *inode, |
---|
470 | | - struct page *locked_page, |
---|
471 | | - u64 start, u64 end, |
---|
472 | | - struct async_cow *async_cow, |
---|
473 | | - int *num_added) |
---|
| 474 | +static noinline int compress_file_range(struct async_chunk *async_chunk) |
---|
474 | 475 | { |
---|
| 476 | + struct inode *inode = async_chunk->inode; |
---|
475 | 477 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
476 | 478 | u64 blocksize = fs_info->sectorsize; |
---|
| 479 | + u64 start = async_chunk->start; |
---|
| 480 | + u64 end = async_chunk->end; |
---|
477 | 481 | u64 actual_end; |
---|
478 | | - u64 isize = i_size_read(inode); |
---|
| 482 | + u64 i_size; |
---|
479 | 483 | int ret = 0; |
---|
480 | 484 | struct page **pages = NULL; |
---|
481 | 485 | unsigned long nr_pages; |
---|
.. | .. |
---|
484 | 488 | int i; |
---|
485 | 489 | int will_compress; |
---|
486 | 490 | int compress_type = fs_info->compress_type; |
---|
| 491 | + int compressed_extents = 0; |
---|
487 | 492 | int redirty = 0; |
---|
488 | 493 | |
---|
489 | 494 | inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1, |
---|
490 | 495 | SZ_16K); |
---|
491 | 496 | |
---|
492 | | - actual_end = min_t(u64, isize, end + 1); |
---|
| 497 | + /* |
---|
| 498 | + * We need to save i_size before now because it could change in between |
---|
| 499 | + * us evaluating the size and assigning it. This is because we lock and |
---|
| 500 | + * unlock the page in truncate and fallocate, and then modify the i_size |
---|
| 501 | + * later on. |
---|
| 502 | + * |
---|
| 503 | + * The barriers are to emulate READ_ONCE, remove that once i_size_read |
---|
| 504 | + * does that for us. |
---|
| 505 | + */ |
---|
| 506 | + barrier(); |
---|
| 507 | + i_size = i_size_read(inode); |
---|
| 508 | + barrier(); |
---|
| 509 | + actual_end = min_t(u64, i_size, end + 1); |
---|
493 | 510 | again: |
---|
494 | 511 | will_compress = 0; |
---|
495 | 512 | nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1; |
---|
.. | .. |
---|
530 | 547 | * inode has not been flagged as nocompress. This flag can |
---|
531 | 548 | * change at any time if we discover bad compression ratios. |
---|
532 | 549 | */ |
---|
533 | | - if (inode_need_compress(inode, start, end)) { |
---|
| 550 | + if (inode_need_compress(BTRFS_I(inode), start, end)) { |
---|
534 | 551 | WARN_ON(pages); |
---|
535 | 552 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); |
---|
536 | 553 | if (!pages) { |
---|
.. | .. |
---|
571 | 588 | &total_compressed); |
---|
572 | 589 | |
---|
573 | 590 | if (!ret) { |
---|
574 | | - unsigned long offset = total_compressed & |
---|
575 | | - (PAGE_SIZE - 1); |
---|
| 591 | + unsigned long offset = offset_in_page(total_compressed); |
---|
576 | 592 | struct page *page = pages[nr_pages - 1]; |
---|
577 | 593 | char *kaddr; |
---|
578 | 594 | |
---|
.. | .. |
---|
595 | 611 | /* we didn't compress the entire range, try |
---|
596 | 612 | * to make an uncompressed inline extent. |
---|
597 | 613 | */ |
---|
598 | | - ret = cow_file_range_inline(inode, start, end, 0, |
---|
599 | | - BTRFS_COMPRESS_NONE, NULL); |
---|
| 614 | + ret = cow_file_range_inline(BTRFS_I(inode), start, end, |
---|
| 615 | + 0, BTRFS_COMPRESS_NONE, |
---|
| 616 | + NULL); |
---|
600 | 617 | } else { |
---|
601 | 618 | /* try making a compressed inline extent */ |
---|
602 | | - ret = cow_file_range_inline(inode, start, end, |
---|
| 619 | + ret = cow_file_range_inline(BTRFS_I(inode), start, end, |
---|
603 | 620 | total_compressed, |
---|
604 | 621 | compress_type, pages); |
---|
605 | 622 | } |
---|
.. | .. |
---|
621 | 638 | * our outstanding extent for clearing delalloc for this |
---|
622 | 639 | * range. |
---|
623 | 640 | */ |
---|
624 | | - extent_clear_unlock_delalloc(inode, start, end, end, |
---|
625 | | - NULL, clear_flags, |
---|
| 641 | + extent_clear_unlock_delalloc(BTRFS_I(inode), start, end, |
---|
| 642 | + NULL, |
---|
| 643 | + clear_flags, |
---|
626 | 644 | PAGE_UNLOCK | |
---|
627 | 645 | PAGE_CLEAR_DIRTY | |
---|
628 | 646 | PAGE_SET_WRITEBACK | |
---|
.. | .. |
---|
641 | 659 | } |
---|
642 | 660 | kfree(pages); |
---|
643 | 661 | } |
---|
644 | | - |
---|
645 | | - return; |
---|
| 662 | + return 0; |
---|
646 | 663 | } |
---|
647 | 664 | } |
---|
648 | 665 | |
---|
.. | .. |
---|
661 | 678 | */ |
---|
662 | 679 | total_in = ALIGN(total_in, PAGE_SIZE); |
---|
663 | 680 | if (total_compressed + blocksize <= total_in) { |
---|
664 | | - *num_added += 1; |
---|
| 681 | + compressed_extents++; |
---|
665 | 682 | |
---|
666 | 683 | /* |
---|
667 | 684 | * The async work queues will take care of doing actual |
---|
668 | 685 | * allocation on disk for these compressed pages, and |
---|
669 | 686 | * will submit them to the elevator. |
---|
670 | 687 | */ |
---|
671 | | - add_async_extent(async_cow, start, total_in, |
---|
| 688 | + add_async_extent(async_chunk, start, total_in, |
---|
672 | 689 | total_compressed, pages, nr_pages, |
---|
673 | 690 | compress_type); |
---|
674 | 691 | |
---|
.. | .. |
---|
678 | 695 | cond_resched(); |
---|
679 | 696 | goto again; |
---|
680 | 697 | } |
---|
681 | | - return; |
---|
| 698 | + return compressed_extents; |
---|
682 | 699 | } |
---|
683 | 700 | } |
---|
684 | 701 | if (pages) { |
---|
.. | .. |
---|
708 | 725 | * to our extent and set things up for the async work queue to run |
---|
709 | 726 | * cow_file_range to do the normal delalloc dance. |
---|
710 | 727 | */ |
---|
711 | | - if (page_offset(locked_page) >= start && |
---|
712 | | - page_offset(locked_page) <= end) |
---|
713 | | - __set_page_dirty_nobuffers(locked_page); |
---|
| 728 | + if (async_chunk->locked_page && |
---|
| 729 | + (page_offset(async_chunk->locked_page) >= start && |
---|
| 730 | + page_offset(async_chunk->locked_page)) <= end) { |
---|
| 731 | + __set_page_dirty_nobuffers(async_chunk->locked_page); |
---|
714 | 732 | /* unlocked later on in the async handlers */ |
---|
| 733 | + } |
---|
715 | 734 | |
---|
716 | 735 | if (redirty) |
---|
717 | 736 | extent_range_redirty_for_io(inode, start, end); |
---|
718 | | - add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0, |
---|
| 737 | + add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0, |
---|
719 | 738 | BTRFS_COMPRESS_NONE); |
---|
720 | | - *num_added += 1; |
---|
| 739 | + compressed_extents++; |
---|
721 | 740 | |
---|
722 | | - return; |
---|
| 741 | + return compressed_extents; |
---|
723 | 742 | } |
---|
724 | 743 | |
---|
725 | 744 | static void free_async_extent_pages(struct async_extent *async_extent) |
---|
.. | .. |
---|
744 | 763 | * queued. We walk all the async extents created by compress_file_range |
---|
745 | 764 | * and send them down to the disk. |
---|
746 | 765 | */ |
---|
747 | | -static noinline void submit_compressed_extents(struct inode *inode, |
---|
748 | | - struct async_cow *async_cow) |
---|
| 766 | +static noinline void submit_compressed_extents(struct async_chunk *async_chunk) |
---|
749 | 767 | { |
---|
750 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
| 768 | + struct btrfs_inode *inode = BTRFS_I(async_chunk->inode); |
---|
| 769 | + struct btrfs_fs_info *fs_info = inode->root->fs_info; |
---|
751 | 770 | struct async_extent *async_extent; |
---|
752 | 771 | u64 alloc_hint = 0; |
---|
753 | 772 | struct btrfs_key ins; |
---|
754 | 773 | struct extent_map *em; |
---|
755 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
756 | | - struct extent_io_tree *io_tree; |
---|
| 774 | + struct btrfs_root *root = inode->root; |
---|
| 775 | + struct extent_io_tree *io_tree = &inode->io_tree; |
---|
757 | 776 | int ret = 0; |
---|
758 | 777 | |
---|
759 | 778 | again: |
---|
760 | | - while (!list_empty(&async_cow->extents)) { |
---|
761 | | - async_extent = list_entry(async_cow->extents.next, |
---|
| 779 | + while (!list_empty(&async_chunk->extents)) { |
---|
| 780 | + async_extent = list_entry(async_chunk->extents.next, |
---|
762 | 781 | struct async_extent, list); |
---|
763 | 782 | list_del(&async_extent->list); |
---|
764 | 783 | |
---|
765 | | - io_tree = &BTRFS_I(inode)->io_tree; |
---|
766 | | - |
---|
767 | 784 | retry: |
---|
| 785 | + lock_extent(io_tree, async_extent->start, |
---|
| 786 | + async_extent->start + async_extent->ram_size - 1); |
---|
768 | 787 | /* did the compression code fall back to uncompressed IO? */ |
---|
769 | 788 | if (!async_extent->pages) { |
---|
770 | 789 | int page_started = 0; |
---|
771 | 790 | unsigned long nr_written = 0; |
---|
772 | 791 | |
---|
773 | | - lock_extent(io_tree, async_extent->start, |
---|
774 | | - async_extent->start + |
---|
775 | | - async_extent->ram_size - 1); |
---|
776 | | - |
---|
777 | 792 | /* allocate blocks */ |
---|
778 | | - ret = cow_file_range(inode, async_cow->locked_page, |
---|
| 793 | + ret = cow_file_range(inode, async_chunk->locked_page, |
---|
779 | 794 | async_extent->start, |
---|
780 | 795 | async_extent->start + |
---|
781 | 796 | async_extent->ram_size - 1, |
---|
782 | | - async_extent->start + |
---|
783 | | - async_extent->ram_size - 1, |
---|
784 | | - &page_started, &nr_written, 0, |
---|
785 | | - NULL); |
---|
| 797 | + &page_started, &nr_written, 0); |
---|
786 | 798 | |
---|
787 | 799 | /* JDM XXX */ |
---|
788 | 800 | |
---|
.. | .. |
---|
793 | 805 | * all those pages down to the drive. |
---|
794 | 806 | */ |
---|
795 | 807 | if (!page_started && !ret) |
---|
796 | | - extent_write_locked_range(inode, |
---|
| 808 | + extent_write_locked_range(&inode->vfs_inode, |
---|
797 | 809 | async_extent->start, |
---|
798 | 810 | async_extent->start + |
---|
799 | 811 | async_extent->ram_size - 1, |
---|
800 | 812 | WB_SYNC_ALL); |
---|
801 | | - else if (ret) |
---|
802 | | - unlock_page(async_cow->locked_page); |
---|
| 813 | + else if (ret && async_chunk->locked_page) |
---|
| 814 | + unlock_page(async_chunk->locked_page); |
---|
803 | 815 | kfree(async_extent); |
---|
804 | 816 | cond_resched(); |
---|
805 | 817 | continue; |
---|
806 | 818 | } |
---|
807 | | - |
---|
808 | | - lock_extent(io_tree, async_extent->start, |
---|
809 | | - async_extent->start + async_extent->ram_size - 1); |
---|
810 | 819 | |
---|
811 | 820 | ret = btrfs_reserve_extent(root, async_extent->ram_size, |
---|
812 | 821 | async_extent->compressed_size, |
---|
.. | .. |
---|
826 | 835 | * will not submit these pages down to lower |
---|
827 | 836 | * layers. |
---|
828 | 837 | */ |
---|
829 | | - extent_range_redirty_for_io(inode, |
---|
| 838 | + extent_range_redirty_for_io(&inode->vfs_inode, |
---|
830 | 839 | async_extent->start, |
---|
831 | 840 | async_extent->start + |
---|
832 | 841 | async_extent->ram_size - 1); |
---|
.. | .. |
---|
861 | 870 | BTRFS_ORDERED_COMPRESSED, |
---|
862 | 871 | async_extent->compress_type); |
---|
863 | 872 | if (ret) { |
---|
864 | | - btrfs_drop_extent_cache(BTRFS_I(inode), |
---|
865 | | - async_extent->start, |
---|
| 873 | + btrfs_drop_extent_cache(inode, async_extent->start, |
---|
866 | 874 | async_extent->start + |
---|
867 | 875 | async_extent->ram_size - 1, 0); |
---|
868 | 876 | goto out_free_reserve; |
---|
.. | .. |
---|
875 | 883 | extent_clear_unlock_delalloc(inode, async_extent->start, |
---|
876 | 884 | async_extent->start + |
---|
877 | 885 | async_extent->ram_size - 1, |
---|
878 | | - async_extent->start + |
---|
879 | | - async_extent->ram_size - 1, |
---|
880 | 886 | NULL, EXTENT_LOCKED | EXTENT_DELALLOC, |
---|
881 | 887 | PAGE_UNLOCK | PAGE_CLEAR_DIRTY | |
---|
882 | 888 | PAGE_SET_WRITEBACK); |
---|
883 | | - if (btrfs_submit_compressed_write(inode, |
---|
884 | | - async_extent->start, |
---|
| 889 | + if (btrfs_submit_compressed_write(inode, async_extent->start, |
---|
885 | 890 | async_extent->ram_size, |
---|
886 | 891 | ins.objectid, |
---|
887 | 892 | ins.offset, async_extent->pages, |
---|
888 | 893 | async_extent->nr_pages, |
---|
889 | | - async_cow->write_flags)) { |
---|
890 | | - struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; |
---|
| 894 | + async_chunk->write_flags, |
---|
| 895 | + async_chunk->blkcg_css)) { |
---|
891 | 896 | struct page *p = async_extent->pages[0]; |
---|
892 | 897 | const u64 start = async_extent->start; |
---|
893 | 898 | const u64 end = start + async_extent->ram_size - 1; |
---|
894 | 899 | |
---|
895 | | - p->mapping = inode->i_mapping; |
---|
896 | | - tree->ops->writepage_end_io_hook(p, start, end, |
---|
897 | | - NULL, 0); |
---|
| 900 | + p->mapping = inode->vfs_inode.i_mapping; |
---|
| 901 | + btrfs_writepage_endio_finish_ordered(p, start, end, 0); |
---|
| 902 | + |
---|
898 | 903 | p->mapping = NULL; |
---|
899 | | - extent_clear_unlock_delalloc(inode, start, end, end, |
---|
900 | | - NULL, 0, |
---|
| 904 | + extent_clear_unlock_delalloc(inode, start, end, NULL, 0, |
---|
901 | 905 | PAGE_END_WRITEBACK | |
---|
902 | 906 | PAGE_SET_ERROR); |
---|
903 | 907 | free_async_extent_pages(async_extent); |
---|
.. | .. |
---|
914 | 918 | extent_clear_unlock_delalloc(inode, async_extent->start, |
---|
915 | 919 | async_extent->start + |
---|
916 | 920 | async_extent->ram_size - 1, |
---|
917 | | - async_extent->start + |
---|
918 | | - async_extent->ram_size - 1, |
---|
919 | 921 | NULL, EXTENT_LOCKED | EXTENT_DELALLOC | |
---|
920 | 922 | EXTENT_DELALLOC_NEW | |
---|
921 | 923 | EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, |
---|
.. | .. |
---|
927 | 929 | goto again; |
---|
928 | 930 | } |
---|
929 | 931 | |
---|
930 | | -static u64 get_extent_allocation_hint(struct inode *inode, u64 start, |
---|
| 932 | +static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start, |
---|
931 | 933 | u64 num_bytes) |
---|
932 | 934 | { |
---|
933 | | - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
---|
| 935 | + struct extent_map_tree *em_tree = &inode->extent_tree; |
---|
934 | 936 | struct extent_map *em; |
---|
935 | 937 | u64 alloc_hint = 0; |
---|
936 | 938 | |
---|
.. | .. |
---|
972 | 974 | * required to start IO on it. It may be clean and already done with |
---|
973 | 975 | * IO when we return. |
---|
974 | 976 | */ |
---|
975 | | -static noinline int cow_file_range(struct inode *inode, |
---|
| 977 | +static noinline int cow_file_range(struct btrfs_inode *inode, |
---|
976 | 978 | struct page *locked_page, |
---|
977 | | - u64 start, u64 end, u64 delalloc_end, |
---|
978 | | - int *page_started, unsigned long *nr_written, |
---|
979 | | - int unlock, struct btrfs_dedupe_hash *hash) |
---|
| 979 | + u64 start, u64 end, int *page_started, |
---|
| 980 | + unsigned long *nr_written, int unlock) |
---|
980 | 981 | { |
---|
981 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
982 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
| 982 | + struct btrfs_root *root = inode->root; |
---|
| 983 | + struct btrfs_fs_info *fs_info = root->fs_info; |
---|
983 | 984 | u64 alloc_hint = 0; |
---|
984 | 985 | u64 num_bytes; |
---|
985 | 986 | unsigned long ram_size; |
---|
.. | .. |
---|
993 | 994 | bool extent_reserved = false; |
---|
994 | 995 | int ret = 0; |
---|
995 | 996 | |
---|
996 | | - if (btrfs_is_free_space_inode(BTRFS_I(inode))) { |
---|
997 | | - WARN_ON_ONCE(1); |
---|
| 997 | + if (btrfs_is_free_space_inode(inode)) { |
---|
998 | 998 | ret = -EINVAL; |
---|
999 | 999 | goto out_unlock; |
---|
1000 | 1000 | } |
---|
.. | .. |
---|
1003 | 1003 | num_bytes = max(blocksize, num_bytes); |
---|
1004 | 1004 | ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy)); |
---|
1005 | 1005 | |
---|
1006 | | - inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K); |
---|
| 1006 | + inode_should_defrag(inode, start, end, num_bytes, SZ_64K); |
---|
1007 | 1007 | |
---|
1008 | 1008 | if (start == 0) { |
---|
1009 | 1009 | /* lets try to make an inline extent */ |
---|
.. | .. |
---|
1016 | 1016 | * our outstanding extent for clearing delalloc for this |
---|
1017 | 1017 | * range. |
---|
1018 | 1018 | */ |
---|
1019 | | - extent_clear_unlock_delalloc(inode, start, end, |
---|
1020 | | - delalloc_end, NULL, |
---|
| 1019 | + extent_clear_unlock_delalloc(inode, start, end, NULL, |
---|
1021 | 1020 | EXTENT_LOCKED | EXTENT_DELALLOC | |
---|
1022 | 1021 | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | |
---|
1023 | 1022 | EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | |
---|
.. | .. |
---|
1033 | 1032 | } |
---|
1034 | 1033 | |
---|
1035 | 1034 | alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); |
---|
1036 | | - btrfs_drop_extent_cache(BTRFS_I(inode), start, |
---|
1037 | | - start + num_bytes - 1, 0); |
---|
| 1035 | + btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
---|
1038 | 1036 | |
---|
1039 | 1037 | /* |
---|
1040 | 1038 | * Relocation relies on the relocated extents to have exactly the same |
---|
.. | .. |
---|
1098 | 1096 | * skip current ordered extent. |
---|
1099 | 1097 | */ |
---|
1100 | 1098 | if (ret) |
---|
1101 | | - btrfs_drop_extent_cache(BTRFS_I(inode), start, |
---|
| 1099 | + btrfs_drop_extent_cache(inode, start, |
---|
1102 | 1100 | start + ram_size - 1, 0); |
---|
1103 | 1101 | } |
---|
1104 | 1102 | |
---|
.. | .. |
---|
1114 | 1112 | page_ops = unlock ? PAGE_UNLOCK : 0; |
---|
1115 | 1113 | page_ops |= PAGE_SET_PRIVATE2; |
---|
1116 | 1114 | |
---|
1117 | | - extent_clear_unlock_delalloc(inode, start, |
---|
1118 | | - start + ram_size - 1, |
---|
1119 | | - delalloc_end, locked_page, |
---|
| 1115 | + extent_clear_unlock_delalloc(inode, start, start + ram_size - 1, |
---|
| 1116 | + locked_page, |
---|
1120 | 1117 | EXTENT_LOCKED | EXTENT_DELALLOC, |
---|
1121 | 1118 | page_ops); |
---|
1122 | 1119 | if (num_bytes < cur_alloc_size) |
---|
.. | .. |
---|
1139 | 1136 | return ret; |
---|
1140 | 1137 | |
---|
1141 | 1138 | out_drop_extent_cache: |
---|
1142 | | - btrfs_drop_extent_cache(BTRFS_I(inode), start, start + ram_size - 1, 0); |
---|
| 1139 | + btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0); |
---|
1143 | 1140 | out_reserve: |
---|
1144 | 1141 | btrfs_dec_block_group_reservations(fs_info, ins.objectid); |
---|
1145 | 1142 | btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1); |
---|
.. | .. |
---|
1161 | 1158 | if (extent_reserved) { |
---|
1162 | 1159 | extent_clear_unlock_delalloc(inode, start, |
---|
1163 | 1160 | start + cur_alloc_size - 1, |
---|
1164 | | - start + cur_alloc_size - 1, |
---|
1165 | 1161 | locked_page, |
---|
1166 | 1162 | clear_bits, |
---|
1167 | 1163 | page_ops); |
---|
.. | .. |
---|
1169 | 1165 | if (start >= end) |
---|
1170 | 1166 | goto out; |
---|
1171 | 1167 | } |
---|
1172 | | - extent_clear_unlock_delalloc(inode, start, end, delalloc_end, |
---|
1173 | | - locked_page, |
---|
| 1168 | + extent_clear_unlock_delalloc(inode, start, end, locked_page, |
---|
1174 | 1169 | clear_bits | EXTENT_CLEAR_DATA_RESV, |
---|
1175 | 1170 | page_ops); |
---|
1176 | 1171 | goto out; |
---|
.. | .. |
---|
1181 | 1176 | */ |
---|
1182 | 1177 | static noinline void async_cow_start(struct btrfs_work *work) |
---|
1183 | 1178 | { |
---|
1184 | | - struct async_cow *async_cow; |
---|
1185 | | - int num_added = 0; |
---|
1186 | | - async_cow = container_of(work, struct async_cow, work); |
---|
| 1179 | + struct async_chunk *async_chunk; |
---|
| 1180 | + int compressed_extents; |
---|
1187 | 1181 | |
---|
1188 | | - compress_file_range(async_cow->inode, async_cow->locked_page, |
---|
1189 | | - async_cow->start, async_cow->end, async_cow, |
---|
1190 | | - &num_added); |
---|
1191 | | - if (num_added == 0) { |
---|
1192 | | - btrfs_add_delayed_iput(async_cow->inode); |
---|
1193 | | - async_cow->inode = NULL; |
---|
| 1182 | + async_chunk = container_of(work, struct async_chunk, work); |
---|
| 1183 | + |
---|
| 1184 | + compressed_extents = compress_file_range(async_chunk); |
---|
| 1185 | + if (compressed_extents == 0) { |
---|
| 1186 | + btrfs_add_delayed_iput(async_chunk->inode); |
---|
| 1187 | + async_chunk->inode = NULL; |
---|
1194 | 1188 | } |
---|
1195 | 1189 | } |
---|
1196 | 1190 | |
---|
.. | .. |
---|
1199 | 1193 | */ |
---|
1200 | 1194 | static noinline void async_cow_submit(struct btrfs_work *work) |
---|
1201 | 1195 | { |
---|
1202 | | - struct btrfs_fs_info *fs_info; |
---|
1203 | | - struct async_cow *async_cow; |
---|
1204 | | - struct btrfs_root *root; |
---|
| 1196 | + struct async_chunk *async_chunk = container_of(work, struct async_chunk, |
---|
| 1197 | + work); |
---|
| 1198 | + struct btrfs_fs_info *fs_info = btrfs_work_owner(work); |
---|
1205 | 1199 | unsigned long nr_pages; |
---|
1206 | 1200 | |
---|
1207 | | - async_cow = container_of(work, struct async_cow, work); |
---|
1208 | | - |
---|
1209 | | - root = async_cow->root; |
---|
1210 | | - fs_info = root->fs_info; |
---|
1211 | | - nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >> |
---|
| 1201 | + nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >> |
---|
1212 | 1202 | PAGE_SHIFT; |
---|
| 1203 | + |
---|
| 1204 | + /* |
---|
| 1205 | + * ->inode could be NULL if async_chunk_start has failed to compress, |
---|
| 1206 | + * in which case we don't have anything to submit, yet we need to |
---|
| 1207 | + * always adjust ->async_delalloc_pages as its paired with the init |
---|
| 1208 | + * happening in cow_file_range_async |
---|
| 1209 | + */ |
---|
| 1210 | + if (async_chunk->inode) |
---|
| 1211 | + submit_compressed_extents(async_chunk); |
---|
1213 | 1212 | |
---|
1214 | 1213 | /* atomic_sub_return implies a barrier */ |
---|
1215 | 1214 | if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) < |
---|
1216 | 1215 | 5 * SZ_1M) |
---|
1217 | 1216 | cond_wake_up_nomb(&fs_info->async_submit_wait); |
---|
1218 | | - |
---|
1219 | | - if (async_cow->inode) |
---|
1220 | | - submit_compressed_extents(async_cow->inode, async_cow); |
---|
1221 | 1217 | } |
---|
1222 | 1218 | |
---|
1223 | 1219 | static noinline void async_cow_free(struct btrfs_work *work) |
---|
1224 | 1220 | { |
---|
1225 | | - struct async_cow *async_cow; |
---|
1226 | | - async_cow = container_of(work, struct async_cow, work); |
---|
1227 | | - if (async_cow->inode) |
---|
1228 | | - btrfs_add_delayed_iput(async_cow->inode); |
---|
1229 | | - kfree(async_cow); |
---|
| 1221 | + struct async_chunk *async_chunk; |
---|
| 1222 | + |
---|
| 1223 | + async_chunk = container_of(work, struct async_chunk, work); |
---|
| 1224 | + if (async_chunk->inode) |
---|
| 1225 | + btrfs_add_delayed_iput(async_chunk->inode); |
---|
| 1226 | + if (async_chunk->blkcg_css) |
---|
| 1227 | + css_put(async_chunk->blkcg_css); |
---|
| 1228 | + /* |
---|
| 1229 | + * Since the pointer to 'pending' is at the beginning of the array of |
---|
| 1230 | + * async_chunk's, freeing it ensures the whole array has been freed. |
---|
| 1231 | + */ |
---|
| 1232 | + if (atomic_dec_and_test(async_chunk->pending)) |
---|
| 1233 | + kvfree(async_chunk->pending); |
---|
1230 | 1234 | } |
---|
1231 | 1235 | |
---|
1232 | | -static int cow_file_range_async(struct inode *inode, struct page *locked_page, |
---|
| 1236 | +static int cow_file_range_async(struct btrfs_inode *inode, |
---|
| 1237 | + struct writeback_control *wbc, |
---|
| 1238 | + struct page *locked_page, |
---|
1233 | 1239 | u64 start, u64 end, int *page_started, |
---|
1234 | | - unsigned long *nr_written, |
---|
1235 | | - unsigned int write_flags) |
---|
| 1240 | + unsigned long *nr_written) |
---|
1236 | 1241 | { |
---|
1237 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
1238 | | - struct async_cow *async_cow; |
---|
1239 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
| 1242 | + struct btrfs_fs_info *fs_info = inode->root->fs_info; |
---|
| 1243 | + struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc); |
---|
| 1244 | + struct async_cow *ctx; |
---|
| 1245 | + struct async_chunk *async_chunk; |
---|
1240 | 1246 | unsigned long nr_pages; |
---|
1241 | 1247 | u64 cur_end; |
---|
| 1248 | + u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K); |
---|
| 1249 | + int i; |
---|
| 1250 | + bool should_compress; |
---|
| 1251 | + unsigned nofs_flag; |
---|
| 1252 | + const unsigned int write_flags = wbc_to_write_flags(wbc); |
---|
1242 | 1253 | |
---|
1243 | | - clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, |
---|
1244 | | - 1, 0, NULL); |
---|
1245 | | - while (start < end) { |
---|
1246 | | - async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); |
---|
1247 | | - BUG_ON(!async_cow); /* -ENOMEM */ |
---|
1248 | | - async_cow->inode = igrab(inode); |
---|
1249 | | - async_cow->root = root; |
---|
1250 | | - async_cow->locked_page = locked_page; |
---|
1251 | | - async_cow->start = start; |
---|
1252 | | - async_cow->write_flags = write_flags; |
---|
| 1254 | + unlock_extent(&inode->io_tree, start, end); |
---|
1253 | 1255 | |
---|
1254 | | - if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS && |
---|
1255 | | - !btrfs_test_opt(fs_info, FORCE_COMPRESS)) |
---|
1256 | | - cur_end = end; |
---|
1257 | | - else |
---|
| 1256 | + if (inode->flags & BTRFS_INODE_NOCOMPRESS && |
---|
| 1257 | + !btrfs_test_opt(fs_info, FORCE_COMPRESS)) { |
---|
| 1258 | + num_chunks = 1; |
---|
| 1259 | + should_compress = false; |
---|
| 1260 | + } else { |
---|
| 1261 | + should_compress = true; |
---|
| 1262 | + } |
---|
| 1263 | + |
---|
| 1264 | + nofs_flag = memalloc_nofs_save(); |
---|
| 1265 | + ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL); |
---|
| 1266 | + memalloc_nofs_restore(nofs_flag); |
---|
| 1267 | + |
---|
| 1268 | + if (!ctx) { |
---|
| 1269 | + unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | |
---|
| 1270 | + EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | |
---|
| 1271 | + EXTENT_DO_ACCOUNTING; |
---|
| 1272 | + unsigned long page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | |
---|
| 1273 | + PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK | |
---|
| 1274 | + PAGE_SET_ERROR; |
---|
| 1275 | + |
---|
| 1276 | + extent_clear_unlock_delalloc(inode, start, end, locked_page, |
---|
| 1277 | + clear_bits, page_ops); |
---|
| 1278 | + return -ENOMEM; |
---|
| 1279 | + } |
---|
| 1280 | + |
---|
| 1281 | + async_chunk = ctx->chunks; |
---|
| 1282 | + atomic_set(&ctx->num_chunks, num_chunks); |
---|
| 1283 | + |
---|
| 1284 | + for (i = 0; i < num_chunks; i++) { |
---|
| 1285 | + if (should_compress) |
---|
1258 | 1286 | cur_end = min(end, start + SZ_512K - 1); |
---|
| 1287 | + else |
---|
| 1288 | + cur_end = end; |
---|
1259 | 1289 | |
---|
1260 | | - async_cow->end = cur_end; |
---|
1261 | | - INIT_LIST_HEAD(&async_cow->extents); |
---|
| 1290 | + /* |
---|
| 1291 | + * igrab is called higher up in the call chain, take only the |
---|
| 1292 | + * lightweight reference for the callback lifetime |
---|
| 1293 | + */ |
---|
| 1294 | + ihold(&inode->vfs_inode); |
---|
| 1295 | + async_chunk[i].pending = &ctx->num_chunks; |
---|
| 1296 | + async_chunk[i].inode = &inode->vfs_inode; |
---|
| 1297 | + async_chunk[i].start = start; |
---|
| 1298 | + async_chunk[i].end = cur_end; |
---|
| 1299 | + async_chunk[i].write_flags = write_flags; |
---|
| 1300 | + INIT_LIST_HEAD(&async_chunk[i].extents); |
---|
1262 | 1301 | |
---|
1263 | | - btrfs_init_work(&async_cow->work, |
---|
1264 | | - btrfs_delalloc_helper, |
---|
1265 | | - async_cow_start, async_cow_submit, |
---|
1266 | | - async_cow_free); |
---|
| 1302 | + /* |
---|
| 1303 | + * The locked_page comes all the way from writepage and its |
---|
| 1304 | + * the original page we were actually given. As we spread |
---|
| 1305 | + * this large delalloc region across multiple async_chunk |
---|
| 1306 | + * structs, only the first struct needs a pointer to locked_page |
---|
| 1307 | + * |
---|
| 1308 | + * This way we don't need racey decisions about who is supposed |
---|
| 1309 | + * to unlock it. |
---|
| 1310 | + */ |
---|
| 1311 | + if (locked_page) { |
---|
| 1312 | + /* |
---|
| 1313 | + * Depending on the compressibility, the pages might or |
---|
| 1314 | + * might not go through async. We want all of them to |
---|
| 1315 | + * be accounted against wbc once. Let's do it here |
---|
| 1316 | + * before the paths diverge. wbc accounting is used |
---|
| 1317 | + * only for foreign writeback detection and doesn't |
---|
| 1318 | + * need full accuracy. Just account the whole thing |
---|
| 1319 | + * against the first page. |
---|
| 1320 | + */ |
---|
| 1321 | + wbc_account_cgroup_owner(wbc, locked_page, |
---|
| 1322 | + cur_end - start); |
---|
| 1323 | + async_chunk[i].locked_page = locked_page; |
---|
| 1324 | + locked_page = NULL; |
---|
| 1325 | + } else { |
---|
| 1326 | + async_chunk[i].locked_page = NULL; |
---|
| 1327 | + } |
---|
1267 | 1328 | |
---|
1268 | | - nr_pages = (cur_end - start + PAGE_SIZE) >> |
---|
1269 | | - PAGE_SHIFT; |
---|
| 1329 | + if (blkcg_css != blkcg_root_css) { |
---|
| 1330 | + css_get(blkcg_css); |
---|
| 1331 | + async_chunk[i].blkcg_css = blkcg_css; |
---|
| 1332 | + } else { |
---|
| 1333 | + async_chunk[i].blkcg_css = NULL; |
---|
| 1334 | + } |
---|
| 1335 | + |
---|
| 1336 | + btrfs_init_work(&async_chunk[i].work, async_cow_start, |
---|
| 1337 | + async_cow_submit, async_cow_free); |
---|
| 1338 | + |
---|
| 1339 | + nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE); |
---|
1270 | 1340 | atomic_add(nr_pages, &fs_info->async_delalloc_pages); |
---|
1271 | 1341 | |
---|
1272 | | - btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work); |
---|
| 1342 | + btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work); |
---|
1273 | 1343 | |
---|
1274 | 1344 | *nr_written += nr_pages; |
---|
1275 | 1345 | start = cur_end + 1; |
---|
.. | .. |
---|
1300 | 1370 | return 1; |
---|
1301 | 1371 | } |
---|
1302 | 1372 | |
---|
| 1373 | +static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page, |
---|
| 1374 | + const u64 start, const u64 end, |
---|
| 1375 | + int *page_started, unsigned long *nr_written) |
---|
| 1376 | +{ |
---|
| 1377 | + const bool is_space_ino = btrfs_is_free_space_inode(inode); |
---|
| 1378 | + const bool is_reloc_ino = (inode->root->root_key.objectid == |
---|
| 1379 | + BTRFS_DATA_RELOC_TREE_OBJECTID); |
---|
| 1380 | + const u64 range_bytes = end + 1 - start; |
---|
| 1381 | + struct extent_io_tree *io_tree = &inode->io_tree; |
---|
| 1382 | + u64 range_start = start; |
---|
| 1383 | + u64 count; |
---|
| 1384 | + |
---|
| 1385 | + /* |
---|
| 1386 | + * If EXTENT_NORESERVE is set it means that when the buffered write was |
---|
| 1387 | + * made we had not enough available data space and therefore we did not |
---|
| 1388 | + * reserve data space for it, since we though we could do NOCOW for the |
---|
| 1389 | + * respective file range (either there is prealloc extent or the inode |
---|
| 1390 | + * has the NOCOW bit set). |
---|
| 1391 | + * |
---|
| 1392 | + * However when we need to fallback to COW mode (because for example the |
---|
| 1393 | + * block group for the corresponding extent was turned to RO mode by a |
---|
| 1394 | + * scrub or relocation) we need to do the following: |
---|
| 1395 | + * |
---|
| 1396 | + * 1) We increment the bytes_may_use counter of the data space info. |
---|
| 1397 | + * If COW succeeds, it allocates a new data extent and after doing |
---|
| 1398 | + * that it decrements the space info's bytes_may_use counter and |
---|
| 1399 | + * increments its bytes_reserved counter by the same amount (we do |
---|
| 1400 | + * this at btrfs_add_reserved_bytes()). So we need to increment the |
---|
| 1401 | + * bytes_may_use counter to compensate (when space is reserved at |
---|
| 1402 | + * buffered write time, the bytes_may_use counter is incremented); |
---|
| 1403 | + * |
---|
| 1404 | + * 2) We clear the EXTENT_NORESERVE bit from the range. We do this so |
---|
| 1405 | + * that if the COW path fails for any reason, it decrements (through |
---|
| 1406 | + * extent_clear_unlock_delalloc()) the bytes_may_use counter of the |
---|
| 1407 | + * data space info, which we incremented in the step above. |
---|
| 1408 | + * |
---|
| 1409 | + * If we need to fallback to cow and the inode corresponds to a free |
---|
| 1410 | + * space cache inode or an inode of the data relocation tree, we must |
---|
| 1411 | + * also increment bytes_may_use of the data space_info for the same |
---|
| 1412 | + * reason. Space caches and relocated data extents always get a prealloc |
---|
| 1413 | + * extent for them, however scrub or balance may have set the block |
---|
| 1414 | + * group that contains that extent to RO mode and therefore force COW |
---|
| 1415 | + * when starting writeback. |
---|
| 1416 | + */ |
---|
| 1417 | + count = count_range_bits(io_tree, &range_start, end, range_bytes, |
---|
| 1418 | + EXTENT_NORESERVE, 0); |
---|
| 1419 | + if (count > 0 || is_space_ino || is_reloc_ino) { |
---|
| 1420 | + u64 bytes = count; |
---|
| 1421 | + struct btrfs_fs_info *fs_info = inode->root->fs_info; |
---|
| 1422 | + struct btrfs_space_info *sinfo = fs_info->data_sinfo; |
---|
| 1423 | + |
---|
| 1424 | + if (is_space_ino || is_reloc_ino) |
---|
| 1425 | + bytes = range_bytes; |
---|
| 1426 | + |
---|
| 1427 | + spin_lock(&sinfo->lock); |
---|
| 1428 | + btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes); |
---|
| 1429 | + spin_unlock(&sinfo->lock); |
---|
| 1430 | + |
---|
| 1431 | + if (count > 0) |
---|
| 1432 | + clear_extent_bit(io_tree, start, end, EXTENT_NORESERVE, |
---|
| 1433 | + 0, 0, NULL); |
---|
| 1434 | + } |
---|
| 1435 | + |
---|
| 1436 | + return cow_file_range(inode, locked_page, start, end, page_started, |
---|
| 1437 | + nr_written, 1); |
---|
| 1438 | +} |
---|
| 1439 | + |
---|
1303 | 1440 | /* |
---|
1304 | 1441 | * when nowcow writeback call back. This checks for snapshots or COW copies |
---|
1305 | 1442 | * of the extents that exist in the file, and COWs the file as required. |
---|
.. | .. |
---|
1307 | 1444 | * If no cow copies or snapshots exist, we write directly to the existing |
---|
1308 | 1445 | * blocks on disk |
---|
1309 | 1446 | */ |
---|
1310 | | -static noinline int run_delalloc_nocow(struct inode *inode, |
---|
| 1447 | +static noinline int run_delalloc_nocow(struct btrfs_inode *inode, |
---|
1311 | 1448 | struct page *locked_page, |
---|
1312 | | - u64 start, u64 end, int *page_started, int force, |
---|
1313 | | - unsigned long *nr_written) |
---|
| 1449 | + const u64 start, const u64 end, |
---|
| 1450 | + int *page_started, int force, |
---|
| 1451 | + unsigned long *nr_written) |
---|
1314 | 1452 | { |
---|
1315 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
1316 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
1317 | | - struct extent_buffer *leaf; |
---|
| 1453 | + struct btrfs_fs_info *fs_info = inode->root->fs_info; |
---|
| 1454 | + struct btrfs_root *root = inode->root; |
---|
1318 | 1455 | struct btrfs_path *path; |
---|
1319 | | - struct btrfs_file_extent_item *fi; |
---|
1320 | | - struct btrfs_key found_key; |
---|
1321 | | - struct extent_map *em; |
---|
1322 | | - u64 cow_start; |
---|
1323 | | - u64 cur_offset; |
---|
1324 | | - u64 extent_end; |
---|
1325 | | - u64 extent_offset; |
---|
1326 | | - u64 disk_bytenr; |
---|
1327 | | - u64 num_bytes; |
---|
1328 | | - u64 disk_num_bytes; |
---|
1329 | | - u64 ram_bytes; |
---|
1330 | | - int extent_type; |
---|
| 1456 | + u64 cow_start = (u64)-1; |
---|
| 1457 | + u64 cur_offset = start; |
---|
1331 | 1458 | int ret; |
---|
1332 | | - int type; |
---|
1333 | | - int nocow; |
---|
1334 | | - int check_prev = 1; |
---|
1335 | | - bool nolock; |
---|
1336 | | - u64 ino = btrfs_ino(BTRFS_I(inode)); |
---|
| 1459 | + bool check_prev = true; |
---|
| 1460 | + const bool freespace_inode = btrfs_is_free_space_inode(inode); |
---|
| 1461 | + u64 ino = btrfs_ino(inode); |
---|
| 1462 | + bool nocow = false; |
---|
| 1463 | + u64 disk_bytenr = 0; |
---|
1337 | 1464 | |
---|
1338 | 1465 | path = btrfs_alloc_path(); |
---|
1339 | 1466 | if (!path) { |
---|
1340 | | - extent_clear_unlock_delalloc(inode, start, end, end, |
---|
1341 | | - locked_page, |
---|
| 1467 | + extent_clear_unlock_delalloc(inode, start, end, locked_page, |
---|
1342 | 1468 | EXTENT_LOCKED | EXTENT_DELALLOC | |
---|
1343 | 1469 | EXTENT_DO_ACCOUNTING | |
---|
1344 | 1470 | EXTENT_DEFRAG, PAGE_UNLOCK | |
---|
.. | .. |
---|
1348 | 1474 | return -ENOMEM; |
---|
1349 | 1475 | } |
---|
1350 | 1476 | |
---|
1351 | | - nolock = btrfs_is_free_space_inode(BTRFS_I(inode)); |
---|
1352 | | - |
---|
1353 | | - cow_start = (u64)-1; |
---|
1354 | | - cur_offset = start; |
---|
1355 | 1477 | while (1) { |
---|
| 1478 | + struct btrfs_key found_key; |
---|
| 1479 | + struct btrfs_file_extent_item *fi; |
---|
| 1480 | + struct extent_buffer *leaf; |
---|
| 1481 | + u64 extent_end; |
---|
| 1482 | + u64 extent_offset; |
---|
| 1483 | + u64 num_bytes = 0; |
---|
| 1484 | + u64 disk_num_bytes; |
---|
| 1485 | + u64 ram_bytes; |
---|
| 1486 | + int extent_type; |
---|
| 1487 | + |
---|
| 1488 | + nocow = false; |
---|
| 1489 | + |
---|
1356 | 1490 | ret = btrfs_lookup_file_extent(NULL, root, path, ino, |
---|
1357 | 1491 | cur_offset, 0); |
---|
1358 | 1492 | if (ret < 0) |
---|
1359 | 1493 | goto error; |
---|
| 1494 | + |
---|
| 1495 | + /* |
---|
| 1496 | + * If there is no extent for our range when doing the initial |
---|
| 1497 | + * search, then go back to the previous slot as it will be the |
---|
| 1498 | + * one containing the search offset |
---|
| 1499 | + */ |
---|
1360 | 1500 | if (ret > 0 && path->slots[0] > 0 && check_prev) { |
---|
1361 | 1501 | leaf = path->nodes[0]; |
---|
1362 | 1502 | btrfs_item_key_to_cpu(leaf, &found_key, |
---|
.. | .. |
---|
1365 | 1505 | found_key.type == BTRFS_EXTENT_DATA_KEY) |
---|
1366 | 1506 | path->slots[0]--; |
---|
1367 | 1507 | } |
---|
1368 | | - check_prev = 0; |
---|
| 1508 | + check_prev = false; |
---|
1369 | 1509 | next_slot: |
---|
| 1510 | + /* Go to next leaf if we have exhausted the current one */ |
---|
1370 | 1511 | leaf = path->nodes[0]; |
---|
1371 | 1512 | if (path->slots[0] >= btrfs_header_nritems(leaf)) { |
---|
1372 | 1513 | ret = btrfs_next_leaf(root, path); |
---|
.. | .. |
---|
1380 | 1521 | leaf = path->nodes[0]; |
---|
1381 | 1522 | } |
---|
1382 | 1523 | |
---|
1383 | | - nocow = 0; |
---|
1384 | | - disk_bytenr = 0; |
---|
1385 | | - num_bytes = 0; |
---|
1386 | 1524 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
---|
1387 | 1525 | |
---|
| 1526 | + /* Didn't find anything for our INO */ |
---|
1388 | 1527 | if (found_key.objectid > ino) |
---|
1389 | 1528 | break; |
---|
| 1529 | + /* |
---|
| 1530 | + * Keep searching until we find an EXTENT_ITEM or there are no |
---|
| 1531 | + * more extents for this inode |
---|
| 1532 | + */ |
---|
1390 | 1533 | if (WARN_ON_ONCE(found_key.objectid < ino) || |
---|
1391 | 1534 | found_key.type < BTRFS_EXTENT_DATA_KEY) { |
---|
1392 | 1535 | path->slots[0]++; |
---|
1393 | 1536 | goto next_slot; |
---|
1394 | 1537 | } |
---|
| 1538 | + |
---|
| 1539 | + /* Found key is not EXTENT_DATA_KEY or starts after req range */ |
---|
1395 | 1540 | if (found_key.type > BTRFS_EXTENT_DATA_KEY || |
---|
1396 | 1541 | found_key.offset > end) |
---|
1397 | 1542 | break; |
---|
1398 | 1543 | |
---|
| 1544 | + /* |
---|
| 1545 | + * If the found extent starts after requested offset, then |
---|
| 1546 | + * adjust extent_end to be right before this extent begins |
---|
| 1547 | + */ |
---|
1399 | 1548 | if (found_key.offset > cur_offset) { |
---|
1400 | 1549 | extent_end = found_key.offset; |
---|
1401 | 1550 | extent_type = 0; |
---|
1402 | 1551 | goto out_check; |
---|
1403 | 1552 | } |
---|
1404 | 1553 | |
---|
| 1554 | + /* |
---|
| 1555 | + * Found extent which begins before our range and potentially |
---|
| 1556 | + * intersect it |
---|
| 1557 | + */ |
---|
1405 | 1558 | fi = btrfs_item_ptr(leaf, path->slots[0], |
---|
1406 | 1559 | struct btrfs_file_extent_item); |
---|
1407 | 1560 | extent_type = btrfs_file_extent_type(leaf, fi); |
---|
.. | .. |
---|
1415 | 1568 | btrfs_file_extent_num_bytes(leaf, fi); |
---|
1416 | 1569 | disk_num_bytes = |
---|
1417 | 1570 | btrfs_file_extent_disk_num_bytes(leaf, fi); |
---|
1418 | | - if (extent_end <= start) { |
---|
| 1571 | + /* |
---|
| 1572 | + * If the extent we got ends before our current offset, |
---|
| 1573 | + * skip to the next extent. |
---|
| 1574 | + */ |
---|
| 1575 | + if (extent_end <= cur_offset) { |
---|
1419 | 1576 | path->slots[0]++; |
---|
1420 | 1577 | goto next_slot; |
---|
1421 | 1578 | } |
---|
| 1579 | + /* Skip holes */ |
---|
1422 | 1580 | if (disk_bytenr == 0) |
---|
1423 | 1581 | goto out_check; |
---|
| 1582 | + /* Skip compressed/encrypted/encoded extents */ |
---|
1424 | 1583 | if (btrfs_file_extent_compression(leaf, fi) || |
---|
1425 | 1584 | btrfs_file_extent_encryption(leaf, fi) || |
---|
1426 | 1585 | btrfs_file_extent_other_encoding(leaf, fi)) |
---|
1427 | 1586 | goto out_check; |
---|
1428 | 1587 | /* |
---|
1429 | | - * Do the same check as in btrfs_cross_ref_exist but |
---|
1430 | | - * without the unnecessary search. |
---|
| 1588 | + * If extent is created before the last volume's snapshot |
---|
| 1589 | + * this implies the extent is shared, hence we can't do |
---|
| 1590 | + * nocow. This is the same check as in |
---|
| 1591 | + * btrfs_cross_ref_exist but without calling |
---|
| 1592 | + * btrfs_search_slot. |
---|
1431 | 1593 | */ |
---|
1432 | | - if (!nolock && |
---|
| 1594 | + if (!freespace_inode && |
---|
1433 | 1595 | btrfs_file_extent_generation(leaf, fi) <= |
---|
1434 | 1596 | btrfs_root_last_snapshot(&root->root_item)) |
---|
1435 | 1597 | goto out_check; |
---|
1436 | 1598 | if (extent_type == BTRFS_FILE_EXTENT_REG && !force) |
---|
1437 | 1599 | goto out_check; |
---|
| 1600 | + /* If extent is RO, we must COW it */ |
---|
1438 | 1601 | if (btrfs_extent_readonly(fs_info, disk_bytenr)) |
---|
1439 | 1602 | goto out_check; |
---|
1440 | 1603 | ret = btrfs_cross_ref_exist(root, ino, |
---|
1441 | 1604 | found_key.offset - |
---|
1442 | | - extent_offset, disk_bytenr); |
---|
| 1605 | + extent_offset, disk_bytenr, false); |
---|
1443 | 1606 | if (ret) { |
---|
1444 | 1607 | /* |
---|
1445 | 1608 | * ret could be -EIO if the above fails to read |
---|
.. | .. |
---|
1451 | 1614 | goto error; |
---|
1452 | 1615 | } |
---|
1453 | 1616 | |
---|
1454 | | - WARN_ON_ONCE(nolock); |
---|
| 1617 | + WARN_ON_ONCE(freespace_inode); |
---|
1455 | 1618 | goto out_check; |
---|
1456 | 1619 | } |
---|
1457 | 1620 | disk_bytenr += extent_offset; |
---|
1458 | 1621 | disk_bytenr += cur_offset - found_key.offset; |
---|
1459 | 1622 | num_bytes = min(end + 1, extent_end) - cur_offset; |
---|
1460 | 1623 | /* |
---|
1461 | | - * if there are pending snapshots for this root, |
---|
1462 | | - * we fall into common COW way. |
---|
| 1624 | + * If there are pending snapshots for this root, we |
---|
| 1625 | + * fall into common COW way |
---|
1463 | 1626 | */ |
---|
1464 | | - if (!nolock && atomic_read(&root->snapshot_force_cow)) |
---|
| 1627 | + if (!freespace_inode && atomic_read(&root->snapshot_force_cow)) |
---|
1465 | 1628 | goto out_check; |
---|
1466 | 1629 | /* |
---|
1467 | 1630 | * force cow if csum exists in the range. |
---|
.. | .. |
---|
1480 | 1643 | cur_offset = cow_start; |
---|
1481 | 1644 | goto error; |
---|
1482 | 1645 | } |
---|
1483 | | - WARN_ON_ONCE(nolock); |
---|
| 1646 | + WARN_ON_ONCE(freespace_inode); |
---|
1484 | 1647 | goto out_check; |
---|
1485 | 1648 | } |
---|
1486 | 1649 | if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) |
---|
1487 | 1650 | goto out_check; |
---|
1488 | | - nocow = 1; |
---|
| 1651 | + nocow = true; |
---|
1489 | 1652 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
---|
1490 | | - extent_end = found_key.offset + |
---|
1491 | | - btrfs_file_extent_ram_bytes(leaf, fi); |
---|
1492 | | - extent_end = ALIGN(extent_end, |
---|
1493 | | - fs_info->sectorsize); |
---|
| 1653 | + extent_end = found_key.offset + ram_bytes; |
---|
| 1654 | + extent_end = ALIGN(extent_end, fs_info->sectorsize); |
---|
| 1655 | + /* Skip extents outside of our requested range */ |
---|
| 1656 | + if (extent_end <= start) { |
---|
| 1657 | + path->slots[0]++; |
---|
| 1658 | + goto next_slot; |
---|
| 1659 | + } |
---|
1494 | 1660 | } else { |
---|
1495 | | - BUG_ON(1); |
---|
| 1661 | + /* If this triggers then we have a memory corruption */ |
---|
| 1662 | + BUG(); |
---|
1496 | 1663 | } |
---|
1497 | 1664 | out_check: |
---|
1498 | | - if (extent_end <= start) { |
---|
1499 | | - path->slots[0]++; |
---|
1500 | | - if (nocow) |
---|
1501 | | - btrfs_dec_nocow_writers(fs_info, disk_bytenr); |
---|
1502 | | - goto next_slot; |
---|
1503 | | - } |
---|
| 1665 | + /* |
---|
| 1666 | + * If nocow is false then record the beginning of the range |
---|
| 1667 | + * that needs to be COWed |
---|
| 1668 | + */ |
---|
1504 | 1669 | if (!nocow) { |
---|
1505 | 1670 | if (cow_start == (u64)-1) |
---|
1506 | 1671 | cow_start = cur_offset; |
---|
.. | .. |
---|
1512 | 1677 | } |
---|
1513 | 1678 | |
---|
1514 | 1679 | btrfs_release_path(path); |
---|
| 1680 | + |
---|
| 1681 | + /* |
---|
| 1682 | + * COW range from cow_start to found_key.offset - 1. As the key |
---|
| 1683 | + * will contain the beginning of the first extent that can be |
---|
| 1684 | + * NOCOW, following one which needs to be COW'ed |
---|
| 1685 | + */ |
---|
1515 | 1686 | if (cow_start != (u64)-1) { |
---|
1516 | | - ret = cow_file_range(inode, locked_page, |
---|
1517 | | - cow_start, found_key.offset - 1, |
---|
1518 | | - end, page_started, nr_written, 1, |
---|
1519 | | - NULL); |
---|
1520 | | - if (ret) { |
---|
1521 | | - if (nocow) |
---|
1522 | | - btrfs_dec_nocow_writers(fs_info, |
---|
1523 | | - disk_bytenr); |
---|
| 1687 | + ret = fallback_to_cow(inode, locked_page, |
---|
| 1688 | + cow_start, found_key.offset - 1, |
---|
| 1689 | + page_started, nr_written); |
---|
| 1690 | + if (ret) |
---|
1524 | 1691 | goto error; |
---|
1525 | | - } |
---|
1526 | 1692 | cow_start = (u64)-1; |
---|
1527 | 1693 | } |
---|
1528 | 1694 | |
---|
1529 | 1695 | if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) { |
---|
1530 | 1696 | u64 orig_start = found_key.offset - extent_offset; |
---|
| 1697 | + struct extent_map *em; |
---|
1531 | 1698 | |
---|
1532 | 1699 | em = create_io_em(inode, cur_offset, num_bytes, |
---|
1533 | 1700 | orig_start, |
---|
.. | .. |
---|
1537 | 1704 | ram_bytes, BTRFS_COMPRESS_NONE, |
---|
1538 | 1705 | BTRFS_ORDERED_PREALLOC); |
---|
1539 | 1706 | if (IS_ERR(em)) { |
---|
1540 | | - if (nocow) |
---|
1541 | | - btrfs_dec_nocow_writers(fs_info, |
---|
1542 | | - disk_bytenr); |
---|
1543 | 1707 | ret = PTR_ERR(em); |
---|
1544 | 1708 | goto error; |
---|
1545 | 1709 | } |
---|
1546 | 1710 | free_extent_map(em); |
---|
1547 | | - } |
---|
1548 | | - |
---|
1549 | | - if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) { |
---|
1550 | | - type = BTRFS_ORDERED_PREALLOC; |
---|
| 1711 | + ret = btrfs_add_ordered_extent(inode, cur_offset, |
---|
| 1712 | + disk_bytenr, num_bytes, |
---|
| 1713 | + num_bytes, |
---|
| 1714 | + BTRFS_ORDERED_PREALLOC); |
---|
| 1715 | + if (ret) { |
---|
| 1716 | + btrfs_drop_extent_cache(inode, cur_offset, |
---|
| 1717 | + cur_offset + num_bytes - 1, |
---|
| 1718 | + 0); |
---|
| 1719 | + goto error; |
---|
| 1720 | + } |
---|
1551 | 1721 | } else { |
---|
1552 | | - type = BTRFS_ORDERED_NOCOW; |
---|
| 1722 | + ret = btrfs_add_ordered_extent(inode, cur_offset, |
---|
| 1723 | + disk_bytenr, num_bytes, |
---|
| 1724 | + num_bytes, |
---|
| 1725 | + BTRFS_ORDERED_NOCOW); |
---|
| 1726 | + if (ret) |
---|
| 1727 | + goto error; |
---|
1553 | 1728 | } |
---|
1554 | 1729 | |
---|
1555 | | - ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr, |
---|
1556 | | - num_bytes, num_bytes, type); |
---|
1557 | 1730 | if (nocow) |
---|
1558 | 1731 | btrfs_dec_nocow_writers(fs_info, disk_bytenr); |
---|
1559 | | - BUG_ON(ret); /* -ENOMEM */ |
---|
| 1732 | + nocow = false; |
---|
1560 | 1733 | |
---|
1561 | 1734 | if (root->root_key.objectid == |
---|
1562 | 1735 | BTRFS_DATA_RELOC_TREE_OBJECTID) |
---|
.. | .. |
---|
1569 | 1742 | num_bytes); |
---|
1570 | 1743 | |
---|
1571 | 1744 | extent_clear_unlock_delalloc(inode, cur_offset, |
---|
1572 | | - cur_offset + num_bytes - 1, end, |
---|
| 1745 | + cur_offset + num_bytes - 1, |
---|
1573 | 1746 | locked_page, EXTENT_LOCKED | |
---|
1574 | 1747 | EXTENT_DELALLOC | |
---|
1575 | 1748 | EXTENT_CLEAR_DATA_RESV, |
---|
.. | .. |
---|
1594 | 1767 | |
---|
1595 | 1768 | if (cow_start != (u64)-1) { |
---|
1596 | 1769 | cur_offset = end; |
---|
1597 | | - ret = cow_file_range(inode, locked_page, cow_start, end, end, |
---|
1598 | | - page_started, nr_written, 1, NULL); |
---|
| 1770 | + ret = fallback_to_cow(inode, locked_page, cow_start, end, |
---|
| 1771 | + page_started, nr_written); |
---|
1599 | 1772 | if (ret) |
---|
1600 | 1773 | goto error; |
---|
1601 | 1774 | } |
---|
1602 | 1775 | |
---|
1603 | 1776 | error: |
---|
| 1777 | + if (nocow) |
---|
| 1778 | + btrfs_dec_nocow_writers(fs_info, disk_bytenr); |
---|
| 1779 | + |
---|
1604 | 1780 | if (ret && cur_offset < end) |
---|
1605 | | - extent_clear_unlock_delalloc(inode, cur_offset, end, end, |
---|
| 1781 | + extent_clear_unlock_delalloc(inode, cur_offset, end, |
---|
1606 | 1782 | locked_page, EXTENT_LOCKED | |
---|
1607 | 1783 | EXTENT_DELALLOC | EXTENT_DEFRAG | |
---|
1608 | 1784 | EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | |
---|
.. | .. |
---|
1613 | 1789 | return ret; |
---|
1614 | 1790 | } |
---|
1615 | 1791 | |
---|
1616 | | -static inline int need_force_cow(struct inode *inode, u64 start, u64 end) |
---|
| 1792 | +static inline int need_force_cow(struct btrfs_inode *inode, u64 start, u64 end) |
---|
1617 | 1793 | { |
---|
1618 | 1794 | |
---|
1619 | | - if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && |
---|
1620 | | - !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) |
---|
| 1795 | + if (!(inode->flags & BTRFS_INODE_NODATACOW) && |
---|
| 1796 | + !(inode->flags & BTRFS_INODE_PREALLOC)) |
---|
1621 | 1797 | return 0; |
---|
1622 | 1798 | |
---|
1623 | 1799 | /* |
---|
.. | .. |
---|
1625 | 1801 | * if is not zero, it means the file is defragging. |
---|
1626 | 1802 | * Force cow if given extent needs to be defragged. |
---|
1627 | 1803 | */ |
---|
1628 | | - if (BTRFS_I(inode)->defrag_bytes && |
---|
1629 | | - test_range_bit(&BTRFS_I(inode)->io_tree, start, end, |
---|
1630 | | - EXTENT_DEFRAG, 0, NULL)) |
---|
| 1804 | + if (inode->defrag_bytes && |
---|
| 1805 | + test_range_bit(&inode->io_tree, start, end, EXTENT_DEFRAG, 0, NULL)) |
---|
1631 | 1806 | return 1; |
---|
1632 | 1807 | |
---|
1633 | 1808 | return 0; |
---|
.. | .. |
---|
1637 | 1812 | * Function to process delayed allocation (create CoW) for ranges which are |
---|
1638 | 1813 | * being touched for the first time. |
---|
1639 | 1814 | */ |
---|
1640 | | -int btrfs_run_delalloc_range(void *private_data, struct page *locked_page, |
---|
| 1815 | +int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page, |
---|
1641 | 1816 | u64 start, u64 end, int *page_started, unsigned long *nr_written, |
---|
1642 | 1817 | struct writeback_control *wbc) |
---|
1643 | 1818 | { |
---|
1644 | | - struct inode *inode = private_data; |
---|
1645 | 1819 | int ret; |
---|
1646 | 1820 | int force_cow = need_force_cow(inode, start, end); |
---|
1647 | | - unsigned int write_flags = wbc_to_write_flags(wbc); |
---|
1648 | 1821 | |
---|
1649 | | - if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) { |
---|
| 1822 | + if (inode->flags & BTRFS_INODE_NODATACOW && !force_cow) { |
---|
1650 | 1823 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
---|
1651 | 1824 | page_started, 1, nr_written); |
---|
1652 | | - } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) { |
---|
| 1825 | + } else if (inode->flags & BTRFS_INODE_PREALLOC && !force_cow) { |
---|
1653 | 1826 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
---|
1654 | 1827 | page_started, 0, nr_written); |
---|
1655 | 1828 | } else if (!inode_can_compress(inode) || |
---|
1656 | 1829 | !inode_need_compress(inode, start, end)) { |
---|
1657 | | - ret = cow_file_range(inode, locked_page, start, end, end, |
---|
1658 | | - page_started, nr_written, 1, NULL); |
---|
| 1830 | + ret = cow_file_range(inode, locked_page, start, end, |
---|
| 1831 | + page_started, nr_written, 1); |
---|
1659 | 1832 | } else { |
---|
1660 | | - set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, |
---|
1661 | | - &BTRFS_I(inode)->runtime_flags); |
---|
1662 | | - ret = cow_file_range_async(inode, locked_page, start, end, |
---|
1663 | | - page_started, nr_written, |
---|
1664 | | - write_flags); |
---|
| 1833 | + set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags); |
---|
| 1834 | + ret = cow_file_range_async(inode, wbc, locked_page, start, end, |
---|
| 1835 | + page_started, nr_written); |
---|
1665 | 1836 | } |
---|
1666 | 1837 | if (ret) |
---|
1667 | 1838 | btrfs_cleanup_ordered_extents(inode, locked_page, start, |
---|
.. | .. |
---|
1669 | 1840 | return ret; |
---|
1670 | 1841 | } |
---|
1671 | 1842 | |
---|
1672 | | -static void btrfs_split_extent_hook(void *private_data, |
---|
1673 | | - struct extent_state *orig, u64 split) |
---|
| 1843 | +void btrfs_split_delalloc_extent(struct inode *inode, |
---|
| 1844 | + struct extent_state *orig, u64 split) |
---|
1674 | 1845 | { |
---|
1675 | | - struct inode *inode = private_data; |
---|
1676 | 1846 | u64 size; |
---|
1677 | 1847 | |
---|
1678 | 1848 | /* not delalloc, ignore it */ |
---|
.. | .. |
---|
1685 | 1855 | u64 new_size; |
---|
1686 | 1856 | |
---|
1687 | 1857 | /* |
---|
1688 | | - * See the explanation in btrfs_merge_extent_hook, the same |
---|
| 1858 | + * See the explanation in btrfs_merge_delalloc_extent, the same |
---|
1689 | 1859 | * applies here, just in reverse. |
---|
1690 | 1860 | */ |
---|
1691 | 1861 | new_size = orig->end - split + 1; |
---|
.. | .. |
---|
1702 | 1872 | } |
---|
1703 | 1873 | |
---|
1704 | 1874 | /* |
---|
1705 | | - * extent_io.c merge_extent_hook, used to track merged delayed allocation |
---|
1706 | | - * extents so we can keep track of new extents that are just merged onto old |
---|
1707 | | - * extents, such as when we are doing sequential writes, so we can properly |
---|
1708 | | - * account for the metadata space we'll need. |
---|
| 1875 | + * Handle merged delayed allocation extents so we can keep track of new extents |
---|
| 1876 | + * that are just merged onto old extents, such as when we are doing sequential |
---|
| 1877 | + * writes, so we can properly account for the metadata space we'll need. |
---|
1709 | 1878 | */ |
---|
1710 | | -static void btrfs_merge_extent_hook(void *private_data, |
---|
1711 | | - struct extent_state *new, |
---|
1712 | | - struct extent_state *other) |
---|
| 1879 | +void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new, |
---|
| 1880 | + struct extent_state *other) |
---|
1713 | 1881 | { |
---|
1714 | | - struct inode *inode = private_data; |
---|
1715 | 1882 | u64 new_size, old_size; |
---|
1716 | 1883 | u32 num_extents; |
---|
1717 | 1884 | |
---|
.. | .. |
---|
1815 | 1982 | } |
---|
1816 | 1983 | |
---|
1817 | 1984 | /* |
---|
1818 | | - * extent_io.c set_bit_hook, used to track delayed allocation |
---|
1819 | | - * bytes in this file, and to maintain the list of inodes that |
---|
1820 | | - * have pending delalloc work to be done. |
---|
| 1985 | + * Properly track delayed allocation bytes in the inode and to maintain the |
---|
| 1986 | + * list of inodes that have pending delalloc work to be done. |
---|
1821 | 1987 | */ |
---|
1822 | | -static void btrfs_set_bit_hook(void *private_data, |
---|
1823 | | - struct extent_state *state, unsigned *bits) |
---|
| 1988 | +void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state, |
---|
| 1989 | + unsigned *bits) |
---|
1824 | 1990 | { |
---|
1825 | | - struct inode *inode = private_data; |
---|
1826 | | - |
---|
1827 | 1991 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
1828 | 1992 | |
---|
1829 | 1993 | if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC)) |
---|
.. | .. |
---|
1869 | 2033 | } |
---|
1870 | 2034 | |
---|
1871 | 2035 | /* |
---|
1872 | | - * extent_io.c clear_bit_hook, see set_bit_hook for why |
---|
| 2036 | + * Once a range is no longer delalloc this function ensures that proper |
---|
| 2037 | + * accounting happens. |
---|
1873 | 2038 | */ |
---|
1874 | | -static void btrfs_clear_bit_hook(void *private_data, |
---|
1875 | | - struct extent_state *state, |
---|
1876 | | - unsigned *bits) |
---|
| 2039 | +void btrfs_clear_delalloc_extent(struct inode *vfs_inode, |
---|
| 2040 | + struct extent_state *state, unsigned *bits) |
---|
1877 | 2041 | { |
---|
1878 | | - struct btrfs_inode *inode = BTRFS_I((struct inode *)private_data); |
---|
1879 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); |
---|
| 2042 | + struct btrfs_inode *inode = BTRFS_I(vfs_inode); |
---|
| 2043 | + struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb); |
---|
1880 | 2044 | u64 len = state->end + 1 - state->start; |
---|
1881 | 2045 | u32 num_extents = count_max_extents(len); |
---|
1882 | 2046 | |
---|
.. | .. |
---|
1901 | 2065 | |
---|
1902 | 2066 | /* |
---|
1903 | 2067 | * We don't reserve metadata space for space cache inodes so we |
---|
1904 | | - * don't need to call dellalloc_release_metadata if there is an |
---|
| 2068 | + * don't need to call delalloc_release_metadata if there is an |
---|
1905 | 2069 | * error. |
---|
1906 | 2070 | */ |
---|
1907 | 2071 | if (*bits & EXTENT_CLEAR_META_RESV && |
---|
.. | .. |
---|
1915 | 2079 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID && |
---|
1916 | 2080 | do_list && !(state->state & EXTENT_NORESERVE) && |
---|
1917 | 2081 | (*bits & EXTENT_CLEAR_DATA_RESV)) |
---|
1918 | | - btrfs_free_reserved_data_space_noquota( |
---|
1919 | | - &inode->vfs_inode, |
---|
1920 | | - state->start, len); |
---|
| 2082 | + btrfs_free_reserved_data_space_noquota(fs_info, len); |
---|
1921 | 2083 | |
---|
1922 | 2084 | percpu_counter_add_batch(&fs_info->delalloc_bytes, -len, |
---|
1923 | 2085 | fs_info->delalloc_batch); |
---|
.. | .. |
---|
1940 | 2102 | } |
---|
1941 | 2103 | |
---|
1942 | 2104 | /* |
---|
1943 | | - * Merge bio hook, this must check the chunk tree to make sure we don't create |
---|
1944 | | - * bios that span stripes or chunks |
---|
| 2105 | + * btrfs_bio_fits_in_stripe - Checks whether the size of the given bio will fit |
---|
| 2106 | + * in a chunk's stripe. This function ensures that bios do not span a |
---|
| 2107 | + * stripe/chunk |
---|
1945 | 2108 | * |
---|
1946 | | - * return 1 if page cannot be merged to bio |
---|
1947 | | - * return 0 if page can be merged to bio |
---|
| 2109 | + * @page - The page we are about to add to the bio |
---|
| 2110 | + * @size - size we want to add to the bio |
---|
| 2111 | + * @bio - bio we want to ensure is smaller than a stripe |
---|
| 2112 | + * @bio_flags - flags of the bio |
---|
| 2113 | + * |
---|
| 2114 | + * return 1 if page cannot be added to the bio |
---|
| 2115 | + * return 0 if page can be added to the bio |
---|
1948 | 2116 | * return error otherwise |
---|
1949 | 2117 | */ |
---|
1950 | | -int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
---|
1951 | | - size_t size, struct bio *bio, |
---|
1952 | | - unsigned long bio_flags) |
---|
| 2118 | +int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio, |
---|
| 2119 | + unsigned long bio_flags) |
---|
1953 | 2120 | { |
---|
1954 | 2121 | struct inode *inode = page->mapping->host; |
---|
1955 | 2122 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
.. | .. |
---|
1957 | 2124 | u64 length = 0; |
---|
1958 | 2125 | u64 map_length; |
---|
1959 | 2126 | int ret; |
---|
| 2127 | + struct btrfs_io_geometry geom; |
---|
1960 | 2128 | |
---|
1961 | 2129 | if (bio_flags & EXTENT_BIO_COMPRESSED) |
---|
1962 | 2130 | return 0; |
---|
1963 | 2131 | |
---|
1964 | 2132 | length = bio->bi_iter.bi_size; |
---|
1965 | 2133 | map_length = length; |
---|
1966 | | - ret = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length, |
---|
1967 | | - NULL, 0); |
---|
| 2134 | + ret = btrfs_get_io_geometry(fs_info, btrfs_op(bio), logical, map_length, |
---|
| 2135 | + &geom); |
---|
1968 | 2136 | if (ret < 0) |
---|
1969 | 2137 | return ret; |
---|
1970 | | - if (map_length < length + size) |
---|
| 2138 | + |
---|
| 2139 | + if (geom.len < length + size) |
---|
1971 | 2140 | return 1; |
---|
1972 | 2141 | return 0; |
---|
1973 | 2142 | } |
---|
.. | .. |
---|
1984 | 2153 | u64 bio_offset) |
---|
1985 | 2154 | { |
---|
1986 | 2155 | struct inode *inode = private_data; |
---|
1987 | | - blk_status_t ret = 0; |
---|
1988 | 2156 | |
---|
1989 | | - ret = btrfs_csum_one_bio(inode, bio, 0, 0); |
---|
1990 | | - BUG_ON(ret); /* -ENOMEM */ |
---|
1991 | | - return 0; |
---|
1992 | | -} |
---|
1993 | | - |
---|
1994 | | -/* |
---|
1995 | | - * in order to insert checksums into the metadata in large chunks, |
---|
1996 | | - * we wait until bio submission time. All the pages in the bio are |
---|
1997 | | - * checksummed and sums are attached onto the ordered extent record. |
---|
1998 | | - * |
---|
1999 | | - * At IO completion time the cums attached on the ordered extent record |
---|
2000 | | - * are inserted into the btree |
---|
2001 | | - */ |
---|
2002 | | -blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio, |
---|
2003 | | - int mirror_num) |
---|
2004 | | -{ |
---|
2005 | | - struct inode *inode = private_data; |
---|
2006 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
2007 | | - blk_status_t ret; |
---|
2008 | | - |
---|
2009 | | - ret = btrfs_map_bio(fs_info, bio, mirror_num, 1); |
---|
2010 | | - if (ret) { |
---|
2011 | | - bio->bi_status = ret; |
---|
2012 | | - bio_endio(bio); |
---|
2013 | | - } |
---|
2014 | | - return ret; |
---|
| 2157 | + return btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0); |
---|
2015 | 2158 | } |
---|
2016 | 2159 | |
---|
2017 | 2160 | /* |
---|
.. | .. |
---|
2032 | 2175 | * |
---|
2033 | 2176 | * c-3) otherwise: async submit |
---|
2034 | 2177 | */ |
---|
2035 | | -static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio, |
---|
2036 | | - int mirror_num, unsigned long bio_flags, |
---|
2037 | | - u64 bio_offset) |
---|
| 2178 | +blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio, |
---|
| 2179 | + int mirror_num, unsigned long bio_flags) |
---|
| 2180 | + |
---|
2038 | 2181 | { |
---|
2039 | | - struct inode *inode = private_data; |
---|
2040 | 2182 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
2041 | 2183 | struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
2042 | 2184 | enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA; |
---|
.. | .. |
---|
2060 | 2202 | bio_flags); |
---|
2061 | 2203 | goto out; |
---|
2062 | 2204 | } else if (!skip_sum) { |
---|
2063 | | - ret = btrfs_lookup_bio_sums(inode, bio, NULL); |
---|
| 2205 | + ret = btrfs_lookup_bio_sums(inode, bio, (u64)-1, NULL); |
---|
2064 | 2206 | if (ret) |
---|
2065 | 2207 | goto out; |
---|
2066 | 2208 | } |
---|
.. | .. |
---|
2071 | 2213 | goto mapit; |
---|
2072 | 2214 | /* we're doing a write, do the async checksumming */ |
---|
2073 | 2215 | ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags, |
---|
2074 | | - bio_offset, inode, |
---|
2075 | | - btrfs_submit_bio_start); |
---|
| 2216 | + 0, inode, btrfs_submit_bio_start); |
---|
2076 | 2217 | goto out; |
---|
2077 | 2218 | } else if (!skip_sum) { |
---|
2078 | | - ret = btrfs_csum_one_bio(inode, bio, 0, 0); |
---|
| 2219 | + ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0); |
---|
2079 | 2220 | if (ret) |
---|
2080 | 2221 | goto out; |
---|
2081 | 2222 | } |
---|
2082 | 2223 | |
---|
2083 | 2224 | mapit: |
---|
2084 | | - ret = btrfs_map_bio(fs_info, bio, mirror_num, 0); |
---|
| 2225 | + ret = btrfs_map_bio(fs_info, bio, mirror_num); |
---|
2085 | 2226 | |
---|
2086 | 2227 | out: |
---|
2087 | 2228 | if (ret) { |
---|
.. | .. |
---|
2095 | 2236 | * given a list of ordered sums record them in the inode. This happens |
---|
2096 | 2237 | * at IO completion time based on sums calculated at bio submission time. |
---|
2097 | 2238 | */ |
---|
2098 | | -static noinline int add_pending_csums(struct btrfs_trans_handle *trans, |
---|
2099 | | - struct inode *inode, struct list_head *list) |
---|
| 2239 | +static int add_pending_csums(struct btrfs_trans_handle *trans, |
---|
| 2240 | + struct list_head *list) |
---|
2100 | 2241 | { |
---|
2101 | 2242 | struct btrfs_ordered_sum *sum; |
---|
2102 | 2243 | int ret; |
---|
2103 | 2244 | |
---|
2104 | 2245 | list_for_each_entry(sum, list, list) { |
---|
2105 | 2246 | trans->adding_csums = true; |
---|
2106 | | - ret = btrfs_csum_file_blocks(trans, |
---|
2107 | | - BTRFS_I(inode)->root->fs_info->csum_root, sum); |
---|
| 2247 | + ret = btrfs_csum_file_blocks(trans, trans->fs_info->csum_root, sum); |
---|
2108 | 2248 | trans->adding_csums = false; |
---|
2109 | 2249 | if (ret) |
---|
2110 | 2250 | return ret; |
---|
.. | .. |
---|
2112 | 2252 | return 0; |
---|
2113 | 2253 | } |
---|
2114 | 2254 | |
---|
2115 | | -int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
---|
2116 | | - unsigned int extra_bits, |
---|
2117 | | - struct extent_state **cached_state, int dedupe) |
---|
| 2255 | +static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode, |
---|
| 2256 | + const u64 start, |
---|
| 2257 | + const u64 len, |
---|
| 2258 | + struct extent_state **cached_state) |
---|
2118 | 2259 | { |
---|
2119 | | - WARN_ON((end & (PAGE_SIZE - 1)) == 0); |
---|
2120 | | - return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, |
---|
2121 | | - extra_bits, cached_state); |
---|
| 2260 | + u64 search_start = start; |
---|
| 2261 | + const u64 end = start + len - 1; |
---|
| 2262 | + |
---|
| 2263 | + while (search_start < end) { |
---|
| 2264 | + const u64 search_len = end - search_start + 1; |
---|
| 2265 | + struct extent_map *em; |
---|
| 2266 | + u64 em_len; |
---|
| 2267 | + int ret = 0; |
---|
| 2268 | + |
---|
| 2269 | + em = btrfs_get_extent(inode, NULL, 0, search_start, search_len); |
---|
| 2270 | + if (IS_ERR(em)) |
---|
| 2271 | + return PTR_ERR(em); |
---|
| 2272 | + |
---|
| 2273 | + if (em->block_start != EXTENT_MAP_HOLE) |
---|
| 2274 | + goto next; |
---|
| 2275 | + |
---|
| 2276 | + em_len = em->len; |
---|
| 2277 | + if (em->start < search_start) |
---|
| 2278 | + em_len -= search_start - em->start; |
---|
| 2279 | + if (em_len > search_len) |
---|
| 2280 | + em_len = search_len; |
---|
| 2281 | + |
---|
| 2282 | + ret = set_extent_bit(&inode->io_tree, search_start, |
---|
| 2283 | + search_start + em_len - 1, |
---|
| 2284 | + EXTENT_DELALLOC_NEW, |
---|
| 2285 | + NULL, cached_state, GFP_NOFS); |
---|
| 2286 | +next: |
---|
| 2287 | + search_start = extent_map_end(em); |
---|
| 2288 | + free_extent_map(em); |
---|
| 2289 | + if (ret) |
---|
| 2290 | + return ret; |
---|
| 2291 | + } |
---|
| 2292 | + return 0; |
---|
| 2293 | +} |
---|
| 2294 | + |
---|
| 2295 | +int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end, |
---|
| 2296 | + unsigned int extra_bits, |
---|
| 2297 | + struct extent_state **cached_state) |
---|
| 2298 | +{ |
---|
| 2299 | + WARN_ON(PAGE_ALIGNED(end)); |
---|
| 2300 | + |
---|
| 2301 | + if (start >= i_size_read(&inode->vfs_inode) && |
---|
| 2302 | + !(inode->flags & BTRFS_INODE_PREALLOC)) { |
---|
| 2303 | + /* |
---|
| 2304 | + * There can't be any extents following eof in this case so just |
---|
| 2305 | + * set the delalloc new bit for the range directly. |
---|
| 2306 | + */ |
---|
| 2307 | + extra_bits |= EXTENT_DELALLOC_NEW; |
---|
| 2308 | + } else { |
---|
| 2309 | + int ret; |
---|
| 2310 | + |
---|
| 2311 | + ret = btrfs_find_new_delalloc_bytes(inode, start, |
---|
| 2312 | + end + 1 - start, |
---|
| 2313 | + cached_state); |
---|
| 2314 | + if (ret) |
---|
| 2315 | + return ret; |
---|
| 2316 | + } |
---|
| 2317 | + |
---|
| 2318 | + return set_extent_delalloc(&inode->io_tree, start, end, extra_bits, |
---|
| 2319 | + cached_state); |
---|
2122 | 2320 | } |
---|
2123 | 2321 | |
---|
2124 | 2322 | /* see btrfs_writepage_start_hook for details on why this is required */ |
---|
2125 | 2323 | struct btrfs_writepage_fixup { |
---|
2126 | 2324 | struct page *page; |
---|
| 2325 | + struct inode *inode; |
---|
2127 | 2326 | struct btrfs_work work; |
---|
2128 | 2327 | }; |
---|
2129 | 2328 | |
---|
.. | .. |
---|
2134 | 2333 | struct extent_state *cached_state = NULL; |
---|
2135 | 2334 | struct extent_changeset *data_reserved = NULL; |
---|
2136 | 2335 | struct page *page; |
---|
2137 | | - struct inode *inode; |
---|
| 2336 | + struct btrfs_inode *inode; |
---|
2138 | 2337 | u64 page_start; |
---|
2139 | 2338 | u64 page_end; |
---|
2140 | | - int ret; |
---|
| 2339 | + int ret = 0; |
---|
| 2340 | + bool free_delalloc_space = true; |
---|
2141 | 2341 | |
---|
2142 | 2342 | fixup = container_of(work, struct btrfs_writepage_fixup, work); |
---|
2143 | 2343 | page = fixup->page; |
---|
2144 | | -again: |
---|
2145 | | - lock_page(page); |
---|
2146 | | - if (!page->mapping || !PageDirty(page) || !PageChecked(page)) { |
---|
2147 | | - ClearPageChecked(page); |
---|
2148 | | - goto out_page; |
---|
2149 | | - } |
---|
2150 | | - |
---|
2151 | | - inode = page->mapping->host; |
---|
| 2344 | + inode = BTRFS_I(fixup->inode); |
---|
2152 | 2345 | page_start = page_offset(page); |
---|
2153 | 2346 | page_end = page_offset(page) + PAGE_SIZE - 1; |
---|
2154 | 2347 | |
---|
2155 | | - lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, |
---|
2156 | | - &cached_state); |
---|
| 2348 | + /* |
---|
| 2349 | + * This is similar to page_mkwrite, we need to reserve the space before |
---|
| 2350 | + * we take the page lock. |
---|
| 2351 | + */ |
---|
| 2352 | + ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start, |
---|
| 2353 | + PAGE_SIZE); |
---|
| 2354 | +again: |
---|
| 2355 | + lock_page(page); |
---|
| 2356 | + |
---|
| 2357 | + /* |
---|
| 2358 | + * Before we queued this fixup, we took a reference on the page. |
---|
| 2359 | + * page->mapping may go NULL, but it shouldn't be moved to a different |
---|
| 2360 | + * address space. |
---|
| 2361 | + */ |
---|
| 2362 | + if (!page->mapping || !PageDirty(page) || !PageChecked(page)) { |
---|
| 2363 | + /* |
---|
| 2364 | + * Unfortunately this is a little tricky, either |
---|
| 2365 | + * |
---|
| 2366 | + * 1) We got here and our page had already been dealt with and |
---|
| 2367 | + * we reserved our space, thus ret == 0, so we need to just |
---|
| 2368 | + * drop our space reservation and bail. This can happen the |
---|
| 2369 | + * first time we come into the fixup worker, or could happen |
---|
| 2370 | + * while waiting for the ordered extent. |
---|
| 2371 | + * 2) Our page was already dealt with, but we happened to get an |
---|
| 2372 | + * ENOSPC above from the btrfs_delalloc_reserve_space. In |
---|
| 2373 | + * this case we obviously don't have anything to release, but |
---|
| 2374 | + * because the page was already dealt with we don't want to |
---|
| 2375 | + * mark the page with an error, so make sure we're resetting |
---|
| 2376 | + * ret to 0. This is why we have this check _before_ the ret |
---|
| 2377 | + * check, because we do not want to have a surprise ENOSPC |
---|
| 2378 | + * when the page was already properly dealt with. |
---|
| 2379 | + */ |
---|
| 2380 | + if (!ret) { |
---|
| 2381 | + btrfs_delalloc_release_extents(inode, PAGE_SIZE); |
---|
| 2382 | + btrfs_delalloc_release_space(inode, data_reserved, |
---|
| 2383 | + page_start, PAGE_SIZE, |
---|
| 2384 | + true); |
---|
| 2385 | + } |
---|
| 2386 | + ret = 0; |
---|
| 2387 | + goto out_page; |
---|
| 2388 | + } |
---|
| 2389 | + |
---|
| 2390 | + /* |
---|
| 2391 | + * We can't mess with the page state unless it is locked, so now that |
---|
| 2392 | + * it is locked bail if we failed to make our space reservation. |
---|
| 2393 | + */ |
---|
| 2394 | + if (ret) |
---|
| 2395 | + goto out_page; |
---|
| 2396 | + |
---|
| 2397 | + lock_extent_bits(&inode->io_tree, page_start, page_end, &cached_state); |
---|
2157 | 2398 | |
---|
2158 | 2399 | /* already ordered? We're done */ |
---|
2159 | 2400 | if (PagePrivate2(page)) |
---|
2160 | | - goto out; |
---|
| 2401 | + goto out_reserved; |
---|
2161 | 2402 | |
---|
2162 | | - ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start, |
---|
2163 | | - PAGE_SIZE); |
---|
| 2403 | + ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE); |
---|
2164 | 2404 | if (ordered) { |
---|
2165 | | - unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, |
---|
2166 | | - page_end, &cached_state); |
---|
| 2405 | + unlock_extent_cached(&inode->io_tree, page_start, page_end, |
---|
| 2406 | + &cached_state); |
---|
2167 | 2407 | unlock_page(page); |
---|
2168 | | - btrfs_start_ordered_extent(inode, ordered, 1); |
---|
| 2408 | + btrfs_start_ordered_extent(ordered, 1); |
---|
2169 | 2409 | btrfs_put_ordered_extent(ordered); |
---|
2170 | 2410 | goto again; |
---|
2171 | 2411 | } |
---|
2172 | 2412 | |
---|
2173 | | - ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start, |
---|
2174 | | - PAGE_SIZE); |
---|
2175 | | - if (ret) { |
---|
2176 | | - mapping_set_error(page->mapping, ret); |
---|
2177 | | - end_extent_writepage(page, ret, page_start, page_end); |
---|
2178 | | - ClearPageChecked(page); |
---|
2179 | | - goto out; |
---|
2180 | | - } |
---|
2181 | | - |
---|
2182 | 2413 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0, |
---|
2183 | | - &cached_state, 0); |
---|
2184 | | - if (ret) { |
---|
2185 | | - mapping_set_error(page->mapping, ret); |
---|
2186 | | - end_extent_writepage(page, ret, page_start, page_end); |
---|
2187 | | - ClearPageChecked(page); |
---|
2188 | | - goto out_reserved; |
---|
2189 | | - } |
---|
2190 | | - |
---|
2191 | | - ClearPageChecked(page); |
---|
2192 | | - set_page_dirty(page); |
---|
2193 | | -out_reserved: |
---|
2194 | | - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); |
---|
| 2414 | + &cached_state); |
---|
2195 | 2415 | if (ret) |
---|
| 2416 | + goto out_reserved; |
---|
| 2417 | + |
---|
| 2418 | + /* |
---|
| 2419 | + * Everything went as planned, we're now the owner of a dirty page with |
---|
| 2420 | + * delayed allocation bits set and space reserved for our COW |
---|
| 2421 | + * destination. |
---|
| 2422 | + * |
---|
| 2423 | + * The page was dirty when we started, nothing should have cleaned it. |
---|
| 2424 | + */ |
---|
| 2425 | + BUG_ON(!PageDirty(page)); |
---|
| 2426 | + free_delalloc_space = false; |
---|
| 2427 | +out_reserved: |
---|
| 2428 | + btrfs_delalloc_release_extents(inode, PAGE_SIZE); |
---|
| 2429 | + if (free_delalloc_space) |
---|
2196 | 2430 | btrfs_delalloc_release_space(inode, data_reserved, page_start, |
---|
2197 | 2431 | PAGE_SIZE, true); |
---|
2198 | | -out: |
---|
2199 | | - unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, |
---|
| 2432 | + unlock_extent_cached(&inode->io_tree, page_start, page_end, |
---|
2200 | 2433 | &cached_state); |
---|
2201 | 2434 | out_page: |
---|
| 2435 | + if (ret) { |
---|
| 2436 | + /* |
---|
| 2437 | + * We hit ENOSPC or other errors. Update the mapping and page |
---|
| 2438 | + * to reflect the errors and clean the page. |
---|
| 2439 | + */ |
---|
| 2440 | + mapping_set_error(page->mapping, ret); |
---|
| 2441 | + end_extent_writepage(page, ret, page_start, page_end); |
---|
| 2442 | + clear_page_dirty_for_io(page); |
---|
| 2443 | + SetPageError(page); |
---|
| 2444 | + } |
---|
| 2445 | + ClearPageChecked(page); |
---|
2202 | 2446 | unlock_page(page); |
---|
2203 | 2447 | put_page(page); |
---|
2204 | 2448 | kfree(fixup); |
---|
2205 | 2449 | extent_changeset_free(data_reserved); |
---|
| 2450 | + /* |
---|
| 2451 | + * As a precaution, do a delayed iput in case it would be the last iput |
---|
| 2452 | + * that could need flushing space. Recursing back to fixup worker would |
---|
| 2453 | + * deadlock. |
---|
| 2454 | + */ |
---|
| 2455 | + btrfs_add_delayed_iput(&inode->vfs_inode); |
---|
2206 | 2456 | } |
---|
2207 | 2457 | |
---|
2208 | 2458 | /* |
---|
.. | .. |
---|
2216 | 2466 | * to fix it up. The async helper will wait for ordered extents, set |
---|
2217 | 2467 | * the delalloc bit and make it safe to write the page. |
---|
2218 | 2468 | */ |
---|
2219 | | -static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) |
---|
| 2469 | +int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end) |
---|
2220 | 2470 | { |
---|
2221 | 2471 | struct inode *inode = page->mapping->host; |
---|
2222 | 2472 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
.. | .. |
---|
2226 | 2476 | if (TestClearPagePrivate2(page)) |
---|
2227 | 2477 | return 0; |
---|
2228 | 2478 | |
---|
| 2479 | + /* |
---|
| 2480 | + * PageChecked is set below when we create a fixup worker for this page, |
---|
| 2481 | + * don't try to create another one if we're already PageChecked() |
---|
| 2482 | + * |
---|
| 2483 | + * The extent_io writepage code will redirty the page if we send back |
---|
| 2484 | + * EAGAIN. |
---|
| 2485 | + */ |
---|
2229 | 2486 | if (PageChecked(page)) |
---|
2230 | 2487 | return -EAGAIN; |
---|
2231 | 2488 | |
---|
.. | .. |
---|
2233 | 2490 | if (!fixup) |
---|
2234 | 2491 | return -EAGAIN; |
---|
2235 | 2492 | |
---|
| 2493 | + /* |
---|
| 2494 | + * We are already holding a reference to this inode from |
---|
| 2495 | + * write_cache_pages. We need to hold it because the space reservation |
---|
| 2496 | + * takes place outside of the page lock, and we can't trust |
---|
| 2497 | + * page->mapping outside of the page lock. |
---|
| 2498 | + */ |
---|
| 2499 | + ihold(inode); |
---|
2236 | 2500 | SetPageChecked(page); |
---|
2237 | 2501 | get_page(page); |
---|
2238 | | - btrfs_init_work(&fixup->work, btrfs_fixup_helper, |
---|
2239 | | - btrfs_writepage_fixup_worker, NULL, NULL); |
---|
| 2502 | + btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); |
---|
2240 | 2503 | fixup->page = page; |
---|
| 2504 | + fixup->inode = inode; |
---|
2241 | 2505 | btrfs_queue_work(fs_info->fixup_workers, &fixup->work); |
---|
2242 | | - return -EBUSY; |
---|
| 2506 | + |
---|
| 2507 | + return -EAGAIN; |
---|
2243 | 2508 | } |
---|
2244 | 2509 | |
---|
2245 | 2510 | static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, |
---|
2246 | | - struct inode *inode, u64 file_pos, |
---|
2247 | | - u64 disk_bytenr, u64 disk_num_bytes, |
---|
2248 | | - u64 num_bytes, u64 ram_bytes, |
---|
2249 | | - u8 compression, u8 encryption, |
---|
2250 | | - u16 other_encoding, int extent_type) |
---|
| 2511 | + struct btrfs_inode *inode, u64 file_pos, |
---|
| 2512 | + struct btrfs_file_extent_item *stack_fi, |
---|
| 2513 | + u64 qgroup_reserved) |
---|
2251 | 2514 | { |
---|
2252 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
2253 | | - struct btrfs_file_extent_item *fi; |
---|
| 2515 | + struct btrfs_root *root = inode->root; |
---|
2254 | 2516 | struct btrfs_path *path; |
---|
2255 | 2517 | struct extent_buffer *leaf; |
---|
2256 | 2518 | struct btrfs_key ins; |
---|
2257 | | - u64 qg_released; |
---|
| 2519 | + u64 disk_num_bytes = btrfs_stack_file_extent_disk_num_bytes(stack_fi); |
---|
| 2520 | + u64 disk_bytenr = btrfs_stack_file_extent_disk_bytenr(stack_fi); |
---|
| 2521 | + u64 num_bytes = btrfs_stack_file_extent_num_bytes(stack_fi); |
---|
| 2522 | + u64 ram_bytes = btrfs_stack_file_extent_ram_bytes(stack_fi); |
---|
2258 | 2523 | int extent_inserted = 0; |
---|
2259 | 2524 | int ret; |
---|
2260 | 2525 | |
---|
.. | .. |
---|
2273 | 2538 | */ |
---|
2274 | 2539 | ret = __btrfs_drop_extents(trans, root, inode, path, file_pos, |
---|
2275 | 2540 | file_pos + num_bytes, NULL, 0, |
---|
2276 | | - 1, sizeof(*fi), &extent_inserted); |
---|
| 2541 | + 1, sizeof(*stack_fi), &extent_inserted); |
---|
2277 | 2542 | if (ret) |
---|
2278 | 2543 | goto out; |
---|
2279 | 2544 | |
---|
2280 | 2545 | if (!extent_inserted) { |
---|
2281 | | - ins.objectid = btrfs_ino(BTRFS_I(inode)); |
---|
| 2546 | + ins.objectid = btrfs_ino(inode); |
---|
2282 | 2547 | ins.offset = file_pos; |
---|
2283 | 2548 | ins.type = BTRFS_EXTENT_DATA_KEY; |
---|
2284 | 2549 | |
---|
2285 | 2550 | path->leave_spinning = 1; |
---|
2286 | 2551 | ret = btrfs_insert_empty_item(trans, root, path, &ins, |
---|
2287 | | - sizeof(*fi)); |
---|
| 2552 | + sizeof(*stack_fi)); |
---|
2288 | 2553 | if (ret) |
---|
2289 | 2554 | goto out; |
---|
2290 | 2555 | } |
---|
2291 | 2556 | leaf = path->nodes[0]; |
---|
2292 | | - fi = btrfs_item_ptr(leaf, path->slots[0], |
---|
2293 | | - struct btrfs_file_extent_item); |
---|
2294 | | - btrfs_set_file_extent_generation(leaf, fi, trans->transid); |
---|
2295 | | - btrfs_set_file_extent_type(leaf, fi, extent_type); |
---|
2296 | | - btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr); |
---|
2297 | | - btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_num_bytes); |
---|
2298 | | - btrfs_set_file_extent_offset(leaf, fi, 0); |
---|
2299 | | - btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); |
---|
2300 | | - btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes); |
---|
2301 | | - btrfs_set_file_extent_compression(leaf, fi, compression); |
---|
2302 | | - btrfs_set_file_extent_encryption(leaf, fi, encryption); |
---|
2303 | | - btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); |
---|
| 2557 | + btrfs_set_stack_file_extent_generation(stack_fi, trans->transid); |
---|
| 2558 | + write_extent_buffer(leaf, stack_fi, |
---|
| 2559 | + btrfs_item_ptr_offset(leaf, path->slots[0]), |
---|
| 2560 | + sizeof(struct btrfs_file_extent_item)); |
---|
2304 | 2561 | |
---|
2305 | 2562 | btrfs_mark_buffer_dirty(leaf); |
---|
2306 | 2563 | btrfs_release_path(path); |
---|
2307 | 2564 | |
---|
2308 | | - inode_add_bytes(inode, num_bytes); |
---|
| 2565 | + inode_add_bytes(&inode->vfs_inode, num_bytes); |
---|
2309 | 2566 | |
---|
2310 | 2567 | ins.objectid = disk_bytenr; |
---|
2311 | 2568 | ins.offset = disk_num_bytes; |
---|
2312 | 2569 | ins.type = BTRFS_EXTENT_ITEM_KEY; |
---|
2313 | 2570 | |
---|
2314 | | - /* |
---|
2315 | | - * Release the reserved range from inode dirty range map, as it is |
---|
2316 | | - * already moved into delayed_ref_head |
---|
2317 | | - */ |
---|
2318 | | - ret = btrfs_qgroup_release_data(inode, file_pos, ram_bytes); |
---|
2319 | | - if (ret < 0) |
---|
2320 | | - goto out; |
---|
2321 | | - qg_released = ret; |
---|
2322 | | - ret = btrfs_alloc_reserved_file_extent(trans, root, |
---|
2323 | | - btrfs_ino(BTRFS_I(inode)), |
---|
2324 | | - file_pos, qg_released, &ins); |
---|
2325 | | -out: |
---|
2326 | | - btrfs_free_path(path); |
---|
2327 | | - |
---|
2328 | | - return ret; |
---|
2329 | | -} |
---|
2330 | | - |
---|
2331 | | -/* snapshot-aware defrag */ |
---|
2332 | | -struct sa_defrag_extent_backref { |
---|
2333 | | - struct rb_node node; |
---|
2334 | | - struct old_sa_defrag_extent *old; |
---|
2335 | | - u64 root_id; |
---|
2336 | | - u64 inum; |
---|
2337 | | - u64 file_pos; |
---|
2338 | | - u64 extent_offset; |
---|
2339 | | - u64 num_bytes; |
---|
2340 | | - u64 generation; |
---|
2341 | | -}; |
---|
2342 | | - |
---|
2343 | | -struct old_sa_defrag_extent { |
---|
2344 | | - struct list_head list; |
---|
2345 | | - struct new_sa_defrag_extent *new; |
---|
2346 | | - |
---|
2347 | | - u64 extent_offset; |
---|
2348 | | - u64 bytenr; |
---|
2349 | | - u64 offset; |
---|
2350 | | - u64 len; |
---|
2351 | | - int count; |
---|
2352 | | -}; |
---|
2353 | | - |
---|
2354 | | -struct new_sa_defrag_extent { |
---|
2355 | | - struct rb_root root; |
---|
2356 | | - struct list_head head; |
---|
2357 | | - struct btrfs_path *path; |
---|
2358 | | - struct inode *inode; |
---|
2359 | | - u64 file_pos; |
---|
2360 | | - u64 len; |
---|
2361 | | - u64 bytenr; |
---|
2362 | | - u64 disk_len; |
---|
2363 | | - u8 compress_type; |
---|
2364 | | -}; |
---|
2365 | | - |
---|
2366 | | -static int backref_comp(struct sa_defrag_extent_backref *b1, |
---|
2367 | | - struct sa_defrag_extent_backref *b2) |
---|
2368 | | -{ |
---|
2369 | | - if (b1->root_id < b2->root_id) |
---|
2370 | | - return -1; |
---|
2371 | | - else if (b1->root_id > b2->root_id) |
---|
2372 | | - return 1; |
---|
2373 | | - |
---|
2374 | | - if (b1->inum < b2->inum) |
---|
2375 | | - return -1; |
---|
2376 | | - else if (b1->inum > b2->inum) |
---|
2377 | | - return 1; |
---|
2378 | | - |
---|
2379 | | - if (b1->file_pos < b2->file_pos) |
---|
2380 | | - return -1; |
---|
2381 | | - else if (b1->file_pos > b2->file_pos) |
---|
2382 | | - return 1; |
---|
2383 | | - |
---|
2384 | | - /* |
---|
2385 | | - * [------------------------------] ===> (a range of space) |
---|
2386 | | - * |<--->| |<---->| =============> (fs/file tree A) |
---|
2387 | | - * |<---------------------------->| ===> (fs/file tree B) |
---|
2388 | | - * |
---|
2389 | | - * A range of space can refer to two file extents in one tree while |
---|
2390 | | - * refer to only one file extent in another tree. |
---|
2391 | | - * |
---|
2392 | | - * So we may process a disk offset more than one time(two extents in A) |
---|
2393 | | - * and locate at the same extent(one extent in B), then insert two same |
---|
2394 | | - * backrefs(both refer to the extent in B). |
---|
2395 | | - */ |
---|
2396 | | - return 0; |
---|
2397 | | -} |
---|
2398 | | - |
---|
2399 | | -static void backref_insert(struct rb_root *root, |
---|
2400 | | - struct sa_defrag_extent_backref *backref) |
---|
2401 | | -{ |
---|
2402 | | - struct rb_node **p = &root->rb_node; |
---|
2403 | | - struct rb_node *parent = NULL; |
---|
2404 | | - struct sa_defrag_extent_backref *entry; |
---|
2405 | | - int ret; |
---|
2406 | | - |
---|
2407 | | - while (*p) { |
---|
2408 | | - parent = *p; |
---|
2409 | | - entry = rb_entry(parent, struct sa_defrag_extent_backref, node); |
---|
2410 | | - |
---|
2411 | | - ret = backref_comp(backref, entry); |
---|
2412 | | - if (ret < 0) |
---|
2413 | | - p = &(*p)->rb_left; |
---|
2414 | | - else |
---|
2415 | | - p = &(*p)->rb_right; |
---|
2416 | | - } |
---|
2417 | | - |
---|
2418 | | - rb_link_node(&backref->node, parent, p); |
---|
2419 | | - rb_insert_color(&backref->node, root); |
---|
2420 | | -} |
---|
2421 | | - |
---|
2422 | | -/* |
---|
2423 | | - * Note the backref might has changed, and in this case we just return 0. |
---|
2424 | | - */ |
---|
2425 | | -static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, |
---|
2426 | | - void *ctx) |
---|
2427 | | -{ |
---|
2428 | | - struct btrfs_file_extent_item *extent; |
---|
2429 | | - struct old_sa_defrag_extent *old = ctx; |
---|
2430 | | - struct new_sa_defrag_extent *new = old->new; |
---|
2431 | | - struct btrfs_path *path = new->path; |
---|
2432 | | - struct btrfs_key key; |
---|
2433 | | - struct btrfs_root *root; |
---|
2434 | | - struct sa_defrag_extent_backref *backref; |
---|
2435 | | - struct extent_buffer *leaf; |
---|
2436 | | - struct inode *inode = new->inode; |
---|
2437 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
2438 | | - int slot; |
---|
2439 | | - int ret; |
---|
2440 | | - u64 extent_offset; |
---|
2441 | | - u64 num_bytes; |
---|
2442 | | - |
---|
2443 | | - if (BTRFS_I(inode)->root->root_key.objectid == root_id && |
---|
2444 | | - inum == btrfs_ino(BTRFS_I(inode))) |
---|
2445 | | - return 0; |
---|
2446 | | - |
---|
2447 | | - key.objectid = root_id; |
---|
2448 | | - key.type = BTRFS_ROOT_ITEM_KEY; |
---|
2449 | | - key.offset = (u64)-1; |
---|
2450 | | - |
---|
2451 | | - root = btrfs_read_fs_root_no_name(fs_info, &key); |
---|
2452 | | - if (IS_ERR(root)) { |
---|
2453 | | - if (PTR_ERR(root) == -ENOENT) |
---|
2454 | | - return 0; |
---|
2455 | | - WARN_ON(1); |
---|
2456 | | - btrfs_debug(fs_info, "inum=%llu, offset=%llu, root_id=%llu", |
---|
2457 | | - inum, offset, root_id); |
---|
2458 | | - return PTR_ERR(root); |
---|
2459 | | - } |
---|
2460 | | - |
---|
2461 | | - key.objectid = inum; |
---|
2462 | | - key.type = BTRFS_EXTENT_DATA_KEY; |
---|
2463 | | - if (offset > (u64)-1 << 32) |
---|
2464 | | - key.offset = 0; |
---|
2465 | | - else |
---|
2466 | | - key.offset = offset; |
---|
2467 | | - |
---|
2468 | | - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
---|
2469 | | - if (WARN_ON(ret < 0)) |
---|
2470 | | - return ret; |
---|
2471 | | - ret = 0; |
---|
2472 | | - |
---|
2473 | | - while (1) { |
---|
2474 | | - cond_resched(); |
---|
2475 | | - |
---|
2476 | | - leaf = path->nodes[0]; |
---|
2477 | | - slot = path->slots[0]; |
---|
2478 | | - |
---|
2479 | | - if (slot >= btrfs_header_nritems(leaf)) { |
---|
2480 | | - ret = btrfs_next_leaf(root, path); |
---|
2481 | | - if (ret < 0) { |
---|
2482 | | - goto out; |
---|
2483 | | - } else if (ret > 0) { |
---|
2484 | | - ret = 0; |
---|
2485 | | - goto out; |
---|
2486 | | - } |
---|
2487 | | - continue; |
---|
2488 | | - } |
---|
2489 | | - |
---|
2490 | | - path->slots[0]++; |
---|
2491 | | - |
---|
2492 | | - btrfs_item_key_to_cpu(leaf, &key, slot); |
---|
2493 | | - |
---|
2494 | | - if (key.objectid > inum) |
---|
2495 | | - goto out; |
---|
2496 | | - |
---|
2497 | | - if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY) |
---|
2498 | | - continue; |
---|
2499 | | - |
---|
2500 | | - extent = btrfs_item_ptr(leaf, slot, |
---|
2501 | | - struct btrfs_file_extent_item); |
---|
2502 | | - |
---|
2503 | | - if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr) |
---|
2504 | | - continue; |
---|
2505 | | - |
---|
2506 | | - /* |
---|
2507 | | - * 'offset' refers to the exact key.offset, |
---|
2508 | | - * NOT the 'offset' field in btrfs_extent_data_ref, ie. |
---|
2509 | | - * (key.offset - extent_offset). |
---|
2510 | | - */ |
---|
2511 | | - if (key.offset != offset) |
---|
2512 | | - continue; |
---|
2513 | | - |
---|
2514 | | - extent_offset = btrfs_file_extent_offset(leaf, extent); |
---|
2515 | | - num_bytes = btrfs_file_extent_num_bytes(leaf, extent); |
---|
2516 | | - |
---|
2517 | | - if (extent_offset >= old->extent_offset + old->offset + |
---|
2518 | | - old->len || extent_offset + num_bytes <= |
---|
2519 | | - old->extent_offset + old->offset) |
---|
2520 | | - continue; |
---|
2521 | | - break; |
---|
2522 | | - } |
---|
2523 | | - |
---|
2524 | | - backref = kmalloc(sizeof(*backref), GFP_NOFS); |
---|
2525 | | - if (!backref) { |
---|
2526 | | - ret = -ENOENT; |
---|
2527 | | - goto out; |
---|
2528 | | - } |
---|
2529 | | - |
---|
2530 | | - backref->root_id = root_id; |
---|
2531 | | - backref->inum = inum; |
---|
2532 | | - backref->file_pos = offset; |
---|
2533 | | - backref->num_bytes = num_bytes; |
---|
2534 | | - backref->extent_offset = extent_offset; |
---|
2535 | | - backref->generation = btrfs_file_extent_generation(leaf, extent); |
---|
2536 | | - backref->old = old; |
---|
2537 | | - backref_insert(&new->root, backref); |
---|
2538 | | - old->count++; |
---|
2539 | | -out: |
---|
2540 | | - btrfs_release_path(path); |
---|
2541 | | - WARN_ON(ret); |
---|
2542 | | - return ret; |
---|
2543 | | -} |
---|
2544 | | - |
---|
2545 | | -static noinline bool record_extent_backrefs(struct btrfs_path *path, |
---|
2546 | | - struct new_sa_defrag_extent *new) |
---|
2547 | | -{ |
---|
2548 | | - struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb); |
---|
2549 | | - struct old_sa_defrag_extent *old, *tmp; |
---|
2550 | | - int ret; |
---|
2551 | | - |
---|
2552 | | - new->path = path; |
---|
2553 | | - |
---|
2554 | | - list_for_each_entry_safe(old, tmp, &new->head, list) { |
---|
2555 | | - ret = iterate_inodes_from_logical(old->bytenr + |
---|
2556 | | - old->extent_offset, fs_info, |
---|
2557 | | - path, record_one_backref, |
---|
2558 | | - old, false); |
---|
2559 | | - if (ret < 0 && ret != -ENOENT) |
---|
2560 | | - return false; |
---|
2561 | | - |
---|
2562 | | - /* no backref to be processed for this extent */ |
---|
2563 | | - if (!old->count) { |
---|
2564 | | - list_del(&old->list); |
---|
2565 | | - kfree(old); |
---|
2566 | | - } |
---|
2567 | | - } |
---|
2568 | | - |
---|
2569 | | - if (list_empty(&new->head)) |
---|
2570 | | - return false; |
---|
2571 | | - |
---|
2572 | | - return true; |
---|
2573 | | -} |
---|
2574 | | - |
---|
2575 | | -static int relink_is_mergable(struct extent_buffer *leaf, |
---|
2576 | | - struct btrfs_file_extent_item *fi, |
---|
2577 | | - struct new_sa_defrag_extent *new) |
---|
2578 | | -{ |
---|
2579 | | - if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr) |
---|
2580 | | - return 0; |
---|
2581 | | - |
---|
2582 | | - if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) |
---|
2583 | | - return 0; |
---|
2584 | | - |
---|
2585 | | - if (btrfs_file_extent_compression(leaf, fi) != new->compress_type) |
---|
2586 | | - return 0; |
---|
2587 | | - |
---|
2588 | | - if (btrfs_file_extent_encryption(leaf, fi) || |
---|
2589 | | - btrfs_file_extent_other_encoding(leaf, fi)) |
---|
2590 | | - return 0; |
---|
2591 | | - |
---|
2592 | | - return 1; |
---|
2593 | | -} |
---|
2594 | | - |
---|
2595 | | -/* |
---|
2596 | | - * Note the backref might has changed, and in this case we just return 0. |
---|
2597 | | - */ |
---|
2598 | | -static noinline int relink_extent_backref(struct btrfs_path *path, |
---|
2599 | | - struct sa_defrag_extent_backref *prev, |
---|
2600 | | - struct sa_defrag_extent_backref *backref) |
---|
2601 | | -{ |
---|
2602 | | - struct btrfs_file_extent_item *extent; |
---|
2603 | | - struct btrfs_file_extent_item *item; |
---|
2604 | | - struct btrfs_ordered_extent *ordered; |
---|
2605 | | - struct btrfs_trans_handle *trans; |
---|
2606 | | - struct btrfs_root *root; |
---|
2607 | | - struct btrfs_key key; |
---|
2608 | | - struct extent_buffer *leaf; |
---|
2609 | | - struct old_sa_defrag_extent *old = backref->old; |
---|
2610 | | - struct new_sa_defrag_extent *new = old->new; |
---|
2611 | | - struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb); |
---|
2612 | | - struct inode *inode; |
---|
2613 | | - struct extent_state *cached = NULL; |
---|
2614 | | - int ret = 0; |
---|
2615 | | - u64 start; |
---|
2616 | | - u64 len; |
---|
2617 | | - u64 lock_start; |
---|
2618 | | - u64 lock_end; |
---|
2619 | | - bool merge = false; |
---|
2620 | | - int index; |
---|
2621 | | - |
---|
2622 | | - if (prev && prev->root_id == backref->root_id && |
---|
2623 | | - prev->inum == backref->inum && |
---|
2624 | | - prev->file_pos + prev->num_bytes == backref->file_pos) |
---|
2625 | | - merge = true; |
---|
2626 | | - |
---|
2627 | | - /* step 1: get root */ |
---|
2628 | | - key.objectid = backref->root_id; |
---|
2629 | | - key.type = BTRFS_ROOT_ITEM_KEY; |
---|
2630 | | - key.offset = (u64)-1; |
---|
2631 | | - |
---|
2632 | | - index = srcu_read_lock(&fs_info->subvol_srcu); |
---|
2633 | | - |
---|
2634 | | - root = btrfs_read_fs_root_no_name(fs_info, &key); |
---|
2635 | | - if (IS_ERR(root)) { |
---|
2636 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
---|
2637 | | - if (PTR_ERR(root) == -ENOENT) |
---|
2638 | | - return 0; |
---|
2639 | | - return PTR_ERR(root); |
---|
2640 | | - } |
---|
2641 | | - |
---|
2642 | | - if (btrfs_root_readonly(root)) { |
---|
2643 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
---|
2644 | | - return 0; |
---|
2645 | | - } |
---|
2646 | | - |
---|
2647 | | - /* step 2: get inode */ |
---|
2648 | | - key.objectid = backref->inum; |
---|
2649 | | - key.type = BTRFS_INODE_ITEM_KEY; |
---|
2650 | | - key.offset = 0; |
---|
2651 | | - |
---|
2652 | | - inode = btrfs_iget(fs_info->sb, &key, root, NULL); |
---|
2653 | | - if (IS_ERR(inode)) { |
---|
2654 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
---|
2655 | | - return 0; |
---|
2656 | | - } |
---|
2657 | | - |
---|
2658 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
---|
2659 | | - |
---|
2660 | | - /* step 3: relink backref */ |
---|
2661 | | - lock_start = backref->file_pos; |
---|
2662 | | - lock_end = backref->file_pos + backref->num_bytes - 1; |
---|
2663 | | - lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end, |
---|
2664 | | - &cached); |
---|
2665 | | - |
---|
2666 | | - ordered = btrfs_lookup_first_ordered_extent(inode, lock_end); |
---|
2667 | | - if (ordered) { |
---|
2668 | | - btrfs_put_ordered_extent(ordered); |
---|
2669 | | - goto out_unlock; |
---|
2670 | | - } |
---|
2671 | | - |
---|
2672 | | - trans = btrfs_join_transaction(root); |
---|
2673 | | - if (IS_ERR(trans)) { |
---|
2674 | | - ret = PTR_ERR(trans); |
---|
2675 | | - goto out_unlock; |
---|
2676 | | - } |
---|
2677 | | - |
---|
2678 | | - key.objectid = backref->inum; |
---|
2679 | | - key.type = BTRFS_EXTENT_DATA_KEY; |
---|
2680 | | - key.offset = backref->file_pos; |
---|
2681 | | - |
---|
2682 | | - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
---|
2683 | | - if (ret < 0) { |
---|
2684 | | - goto out_free_path; |
---|
2685 | | - } else if (ret > 0) { |
---|
2686 | | - ret = 0; |
---|
2687 | | - goto out_free_path; |
---|
2688 | | - } |
---|
2689 | | - |
---|
2690 | | - extent = btrfs_item_ptr(path->nodes[0], path->slots[0], |
---|
2691 | | - struct btrfs_file_extent_item); |
---|
2692 | | - |
---|
2693 | | - if (btrfs_file_extent_generation(path->nodes[0], extent) != |
---|
2694 | | - backref->generation) |
---|
2695 | | - goto out_free_path; |
---|
2696 | | - |
---|
2697 | | - btrfs_release_path(path); |
---|
2698 | | - |
---|
2699 | | - start = backref->file_pos; |
---|
2700 | | - if (backref->extent_offset < old->extent_offset + old->offset) |
---|
2701 | | - start += old->extent_offset + old->offset - |
---|
2702 | | - backref->extent_offset; |
---|
2703 | | - |
---|
2704 | | - len = min(backref->extent_offset + backref->num_bytes, |
---|
2705 | | - old->extent_offset + old->offset + old->len); |
---|
2706 | | - len -= max(backref->extent_offset, old->extent_offset + old->offset); |
---|
2707 | | - |
---|
2708 | | - ret = btrfs_drop_extents(trans, root, inode, start, |
---|
2709 | | - start + len, 1); |
---|
| 2571 | + ret = btrfs_inode_set_file_extent_range(inode, file_pos, ram_bytes); |
---|
2710 | 2572 | if (ret) |
---|
2711 | | - goto out_free_path; |
---|
2712 | | -again: |
---|
2713 | | - key.objectid = btrfs_ino(BTRFS_I(inode)); |
---|
2714 | | - key.type = BTRFS_EXTENT_DATA_KEY; |
---|
2715 | | - key.offset = start; |
---|
2716 | | - |
---|
2717 | | - path->leave_spinning = 1; |
---|
2718 | | - if (merge) { |
---|
2719 | | - struct btrfs_file_extent_item *fi; |
---|
2720 | | - u64 extent_len; |
---|
2721 | | - struct btrfs_key found_key; |
---|
2722 | | - |
---|
2723 | | - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); |
---|
2724 | | - if (ret < 0) |
---|
2725 | | - goto out_free_path; |
---|
2726 | | - |
---|
2727 | | - path->slots[0]--; |
---|
2728 | | - leaf = path->nodes[0]; |
---|
2729 | | - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
---|
2730 | | - |
---|
2731 | | - fi = btrfs_item_ptr(leaf, path->slots[0], |
---|
2732 | | - struct btrfs_file_extent_item); |
---|
2733 | | - extent_len = btrfs_file_extent_num_bytes(leaf, fi); |
---|
2734 | | - |
---|
2735 | | - if (extent_len + found_key.offset == start && |
---|
2736 | | - relink_is_mergable(leaf, fi, new)) { |
---|
2737 | | - btrfs_set_file_extent_num_bytes(leaf, fi, |
---|
2738 | | - extent_len + len); |
---|
2739 | | - btrfs_mark_buffer_dirty(leaf); |
---|
2740 | | - inode_add_bytes(inode, len); |
---|
2741 | | - |
---|
2742 | | - ret = 1; |
---|
2743 | | - goto out_free_path; |
---|
2744 | | - } else { |
---|
2745 | | - merge = false; |
---|
2746 | | - btrfs_release_path(path); |
---|
2747 | | - goto again; |
---|
2748 | | - } |
---|
2749 | | - } |
---|
2750 | | - |
---|
2751 | | - ret = btrfs_insert_empty_item(trans, root, path, &key, |
---|
2752 | | - sizeof(*extent)); |
---|
2753 | | - if (ret) { |
---|
2754 | | - btrfs_abort_transaction(trans, ret); |
---|
2755 | | - goto out_free_path; |
---|
2756 | | - } |
---|
2757 | | - |
---|
2758 | | - leaf = path->nodes[0]; |
---|
2759 | | - item = btrfs_item_ptr(leaf, path->slots[0], |
---|
2760 | | - struct btrfs_file_extent_item); |
---|
2761 | | - btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr); |
---|
2762 | | - btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len); |
---|
2763 | | - btrfs_set_file_extent_offset(leaf, item, start - new->file_pos); |
---|
2764 | | - btrfs_set_file_extent_num_bytes(leaf, item, len); |
---|
2765 | | - btrfs_set_file_extent_ram_bytes(leaf, item, new->len); |
---|
2766 | | - btrfs_set_file_extent_generation(leaf, item, trans->transid); |
---|
2767 | | - btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); |
---|
2768 | | - btrfs_set_file_extent_compression(leaf, item, new->compress_type); |
---|
2769 | | - btrfs_set_file_extent_encryption(leaf, item, 0); |
---|
2770 | | - btrfs_set_file_extent_other_encoding(leaf, item, 0); |
---|
2771 | | - |
---|
2772 | | - btrfs_mark_buffer_dirty(leaf); |
---|
2773 | | - inode_add_bytes(inode, len); |
---|
2774 | | - btrfs_release_path(path); |
---|
2775 | | - |
---|
2776 | | - ret = btrfs_inc_extent_ref(trans, root, new->bytenr, |
---|
2777 | | - new->disk_len, 0, |
---|
2778 | | - backref->root_id, backref->inum, |
---|
2779 | | - new->file_pos); /* start - extent_offset */ |
---|
2780 | | - if (ret) { |
---|
2781 | | - btrfs_abort_transaction(trans, ret); |
---|
2782 | | - goto out_free_path; |
---|
2783 | | - } |
---|
2784 | | - |
---|
2785 | | - ret = 1; |
---|
2786 | | -out_free_path: |
---|
2787 | | - btrfs_release_path(path); |
---|
2788 | | - path->leave_spinning = 0; |
---|
2789 | | - btrfs_end_transaction(trans); |
---|
2790 | | -out_unlock: |
---|
2791 | | - unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end, |
---|
2792 | | - &cached); |
---|
2793 | | - iput(inode); |
---|
2794 | | - return ret; |
---|
2795 | | -} |
---|
2796 | | - |
---|
2797 | | -static void free_sa_defrag_extent(struct new_sa_defrag_extent *new) |
---|
2798 | | -{ |
---|
2799 | | - struct old_sa_defrag_extent *old, *tmp; |
---|
2800 | | - |
---|
2801 | | - if (!new) |
---|
2802 | | - return; |
---|
2803 | | - |
---|
2804 | | - list_for_each_entry_safe(old, tmp, &new->head, list) { |
---|
2805 | | - kfree(old); |
---|
2806 | | - } |
---|
2807 | | - kfree(new); |
---|
2808 | | -} |
---|
2809 | | - |
---|
2810 | | -static void relink_file_extents(struct new_sa_defrag_extent *new) |
---|
2811 | | -{ |
---|
2812 | | - struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb); |
---|
2813 | | - struct btrfs_path *path; |
---|
2814 | | - struct sa_defrag_extent_backref *backref; |
---|
2815 | | - struct sa_defrag_extent_backref *prev = NULL; |
---|
2816 | | - struct inode *inode; |
---|
2817 | | - struct rb_node *node; |
---|
2818 | | - int ret; |
---|
2819 | | - |
---|
2820 | | - inode = new->inode; |
---|
2821 | | - |
---|
2822 | | - path = btrfs_alloc_path(); |
---|
2823 | | - if (!path) |
---|
2824 | | - return; |
---|
2825 | | - |
---|
2826 | | - if (!record_extent_backrefs(path, new)) { |
---|
2827 | | - btrfs_free_path(path); |
---|
2828 | 2573 | goto out; |
---|
2829 | | - } |
---|
2830 | | - btrfs_release_path(path); |
---|
2831 | 2574 | |
---|
2832 | | - while (1) { |
---|
2833 | | - node = rb_first(&new->root); |
---|
2834 | | - if (!node) |
---|
2835 | | - break; |
---|
2836 | | - rb_erase(node, &new->root); |
---|
2837 | | - |
---|
2838 | | - backref = rb_entry(node, struct sa_defrag_extent_backref, node); |
---|
2839 | | - |
---|
2840 | | - ret = relink_extent_backref(path, prev, backref); |
---|
2841 | | - WARN_ON(ret < 0); |
---|
2842 | | - |
---|
2843 | | - kfree(prev); |
---|
2844 | | - |
---|
2845 | | - if (ret == 1) |
---|
2846 | | - prev = backref; |
---|
2847 | | - else |
---|
2848 | | - prev = NULL; |
---|
2849 | | - cond_resched(); |
---|
2850 | | - } |
---|
2851 | | - kfree(prev); |
---|
2852 | | - |
---|
2853 | | - btrfs_free_path(path); |
---|
| 2575 | + ret = btrfs_alloc_reserved_file_extent(trans, root, btrfs_ino(inode), |
---|
| 2576 | + file_pos, qgroup_reserved, &ins); |
---|
2854 | 2577 | out: |
---|
2855 | | - free_sa_defrag_extent(new); |
---|
2856 | | - |
---|
2857 | | - atomic_dec(&fs_info->defrag_running); |
---|
2858 | | - wake_up(&fs_info->transaction_wait); |
---|
2859 | | -} |
---|
2860 | | - |
---|
2861 | | -static struct new_sa_defrag_extent * |
---|
2862 | | -record_old_file_extents(struct inode *inode, |
---|
2863 | | - struct btrfs_ordered_extent *ordered) |
---|
2864 | | -{ |
---|
2865 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
2866 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
2867 | | - struct btrfs_path *path; |
---|
2868 | | - struct btrfs_key key; |
---|
2869 | | - struct old_sa_defrag_extent *old; |
---|
2870 | | - struct new_sa_defrag_extent *new; |
---|
2871 | | - int ret; |
---|
2872 | | - |
---|
2873 | | - new = kmalloc(sizeof(*new), GFP_NOFS); |
---|
2874 | | - if (!new) |
---|
2875 | | - return NULL; |
---|
2876 | | - |
---|
2877 | | - new->inode = inode; |
---|
2878 | | - new->file_pos = ordered->file_offset; |
---|
2879 | | - new->len = ordered->len; |
---|
2880 | | - new->bytenr = ordered->start; |
---|
2881 | | - new->disk_len = ordered->disk_len; |
---|
2882 | | - new->compress_type = ordered->compress_type; |
---|
2883 | | - new->root = RB_ROOT; |
---|
2884 | | - INIT_LIST_HEAD(&new->head); |
---|
2885 | | - |
---|
2886 | | - path = btrfs_alloc_path(); |
---|
2887 | | - if (!path) |
---|
2888 | | - goto out_kfree; |
---|
2889 | | - |
---|
2890 | | - key.objectid = btrfs_ino(BTRFS_I(inode)); |
---|
2891 | | - key.type = BTRFS_EXTENT_DATA_KEY; |
---|
2892 | | - key.offset = new->file_pos; |
---|
2893 | | - |
---|
2894 | | - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
---|
2895 | | - if (ret < 0) |
---|
2896 | | - goto out_free_path; |
---|
2897 | | - if (ret > 0 && path->slots[0] > 0) |
---|
2898 | | - path->slots[0]--; |
---|
2899 | | - |
---|
2900 | | - /* find out all the old extents for the file range */ |
---|
2901 | | - while (1) { |
---|
2902 | | - struct btrfs_file_extent_item *extent; |
---|
2903 | | - struct extent_buffer *l; |
---|
2904 | | - int slot; |
---|
2905 | | - u64 num_bytes; |
---|
2906 | | - u64 offset; |
---|
2907 | | - u64 end; |
---|
2908 | | - u64 disk_bytenr; |
---|
2909 | | - u64 extent_offset; |
---|
2910 | | - |
---|
2911 | | - l = path->nodes[0]; |
---|
2912 | | - slot = path->slots[0]; |
---|
2913 | | - |
---|
2914 | | - if (slot >= btrfs_header_nritems(l)) { |
---|
2915 | | - ret = btrfs_next_leaf(root, path); |
---|
2916 | | - if (ret < 0) |
---|
2917 | | - goto out_free_path; |
---|
2918 | | - else if (ret > 0) |
---|
2919 | | - break; |
---|
2920 | | - continue; |
---|
2921 | | - } |
---|
2922 | | - |
---|
2923 | | - btrfs_item_key_to_cpu(l, &key, slot); |
---|
2924 | | - |
---|
2925 | | - if (key.objectid != btrfs_ino(BTRFS_I(inode))) |
---|
2926 | | - break; |
---|
2927 | | - if (key.type != BTRFS_EXTENT_DATA_KEY) |
---|
2928 | | - break; |
---|
2929 | | - if (key.offset >= new->file_pos + new->len) |
---|
2930 | | - break; |
---|
2931 | | - |
---|
2932 | | - extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item); |
---|
2933 | | - |
---|
2934 | | - num_bytes = btrfs_file_extent_num_bytes(l, extent); |
---|
2935 | | - if (key.offset + num_bytes < new->file_pos) |
---|
2936 | | - goto next; |
---|
2937 | | - |
---|
2938 | | - disk_bytenr = btrfs_file_extent_disk_bytenr(l, extent); |
---|
2939 | | - if (!disk_bytenr) |
---|
2940 | | - goto next; |
---|
2941 | | - |
---|
2942 | | - extent_offset = btrfs_file_extent_offset(l, extent); |
---|
2943 | | - |
---|
2944 | | - old = kmalloc(sizeof(*old), GFP_NOFS); |
---|
2945 | | - if (!old) |
---|
2946 | | - goto out_free_path; |
---|
2947 | | - |
---|
2948 | | - offset = max(new->file_pos, key.offset); |
---|
2949 | | - end = min(new->file_pos + new->len, key.offset + num_bytes); |
---|
2950 | | - |
---|
2951 | | - old->bytenr = disk_bytenr; |
---|
2952 | | - old->extent_offset = extent_offset; |
---|
2953 | | - old->offset = offset - key.offset; |
---|
2954 | | - old->len = end - offset; |
---|
2955 | | - old->new = new; |
---|
2956 | | - old->count = 0; |
---|
2957 | | - list_add_tail(&old->list, &new->head); |
---|
2958 | | -next: |
---|
2959 | | - path->slots[0]++; |
---|
2960 | | - cond_resched(); |
---|
2961 | | - } |
---|
2962 | | - |
---|
2963 | 2578 | btrfs_free_path(path); |
---|
2964 | | - atomic_inc(&fs_info->defrag_running); |
---|
2965 | 2579 | |
---|
2966 | | - return new; |
---|
2967 | | - |
---|
2968 | | -out_free_path: |
---|
2969 | | - btrfs_free_path(path); |
---|
2970 | | -out_kfree: |
---|
2971 | | - free_sa_defrag_extent(new); |
---|
2972 | | - return NULL; |
---|
| 2580 | + return ret; |
---|
2973 | 2581 | } |
---|
2974 | 2582 | |
---|
2975 | 2583 | static void btrfs_release_delalloc_bytes(struct btrfs_fs_info *fs_info, |
---|
2976 | 2584 | u64 start, u64 len) |
---|
2977 | 2585 | { |
---|
2978 | | - struct btrfs_block_group_cache *cache; |
---|
| 2586 | + struct btrfs_block_group *cache; |
---|
2979 | 2587 | |
---|
2980 | 2588 | cache = btrfs_lookup_block_group(fs_info, start); |
---|
2981 | 2589 | ASSERT(cache); |
---|
.. | .. |
---|
2987 | 2595 | btrfs_put_block_group(cache); |
---|
2988 | 2596 | } |
---|
2989 | 2597 | |
---|
2990 | | -/* as ordered data IO finishes, this gets called so we can finish |
---|
| 2598 | +static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans, |
---|
| 2599 | + struct btrfs_ordered_extent *oe) |
---|
| 2600 | +{ |
---|
| 2601 | + struct btrfs_file_extent_item stack_fi; |
---|
| 2602 | + u64 logical_len; |
---|
| 2603 | + |
---|
| 2604 | + memset(&stack_fi, 0, sizeof(stack_fi)); |
---|
| 2605 | + btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG); |
---|
| 2606 | + btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, oe->disk_bytenr); |
---|
| 2607 | + btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi, |
---|
| 2608 | + oe->disk_num_bytes); |
---|
| 2609 | + if (test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags)) |
---|
| 2610 | + logical_len = oe->truncated_len; |
---|
| 2611 | + else |
---|
| 2612 | + logical_len = oe->num_bytes; |
---|
| 2613 | + btrfs_set_stack_file_extent_num_bytes(&stack_fi, logical_len); |
---|
| 2614 | + btrfs_set_stack_file_extent_ram_bytes(&stack_fi, logical_len); |
---|
| 2615 | + btrfs_set_stack_file_extent_compression(&stack_fi, oe->compress_type); |
---|
| 2616 | + /* Encryption and other encoding is reserved and all 0 */ |
---|
| 2617 | + |
---|
| 2618 | + return insert_reserved_file_extent(trans, BTRFS_I(oe->inode), |
---|
| 2619 | + oe->file_offset, &stack_fi, |
---|
| 2620 | + oe->qgroup_rsv); |
---|
| 2621 | +} |
---|
| 2622 | + |
---|
| 2623 | +/* |
---|
| 2624 | + * As ordered data IO finishes, this gets called so we can finish |
---|
2991 | 2625 | * an ordered extent if the range of bytes in the file it covers are |
---|
2992 | 2626 | * fully written. |
---|
2993 | 2627 | */ |
---|
.. | .. |
---|
2999 | 2633 | struct btrfs_trans_handle *trans = NULL; |
---|
3000 | 2634 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
---|
3001 | 2635 | struct extent_state *cached_state = NULL; |
---|
3002 | | - struct new_sa_defrag_extent *new = NULL; |
---|
| 2636 | + u64 start, end; |
---|
3003 | 2637 | int compress_type = 0; |
---|
3004 | 2638 | int ret = 0; |
---|
3005 | | - u64 logical_len = ordered_extent->len; |
---|
3006 | | - bool nolock; |
---|
| 2639 | + u64 logical_len = ordered_extent->num_bytes; |
---|
| 2640 | + bool freespace_inode; |
---|
3007 | 2641 | bool truncated = false; |
---|
3008 | 2642 | bool range_locked = false; |
---|
3009 | 2643 | bool clear_new_delalloc_bytes = false; |
---|
3010 | 2644 | bool clear_reserved_extent = true; |
---|
| 2645 | + unsigned int clear_bits; |
---|
| 2646 | + |
---|
| 2647 | + start = ordered_extent->file_offset; |
---|
| 2648 | + end = start + ordered_extent->num_bytes - 1; |
---|
3011 | 2649 | |
---|
3012 | 2650 | if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && |
---|
3013 | 2651 | !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) && |
---|
3014 | 2652 | !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags)) |
---|
3015 | 2653 | clear_new_delalloc_bytes = true; |
---|
3016 | 2654 | |
---|
3017 | | - nolock = btrfs_is_free_space_inode(BTRFS_I(inode)); |
---|
| 2655 | + freespace_inode = btrfs_is_free_space_inode(BTRFS_I(inode)); |
---|
3018 | 2656 | |
---|
3019 | 2657 | if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) { |
---|
3020 | 2658 | ret = -EIO; |
---|
3021 | 2659 | goto out; |
---|
3022 | 2660 | } |
---|
3023 | 2661 | |
---|
3024 | | - btrfs_free_io_failure_record(BTRFS_I(inode), |
---|
3025 | | - ordered_extent->file_offset, |
---|
3026 | | - ordered_extent->file_offset + |
---|
3027 | | - ordered_extent->len - 1); |
---|
| 2662 | + btrfs_free_io_failure_record(BTRFS_I(inode), start, end); |
---|
3028 | 2663 | |
---|
3029 | 2664 | if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) { |
---|
3030 | 2665 | truncated = true; |
---|
.. | .. |
---|
3037 | 2672 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { |
---|
3038 | 2673 | BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ |
---|
3039 | 2674 | |
---|
3040 | | - /* |
---|
3041 | | - * For mwrite(mmap + memset to write) case, we still reserve |
---|
3042 | | - * space for NOCOW range. |
---|
3043 | | - * As NOCOW won't cause a new delayed ref, just free the space |
---|
3044 | | - */ |
---|
3045 | | - btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset, |
---|
3046 | | - ordered_extent->len); |
---|
3047 | | - btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
---|
3048 | | - if (nolock) |
---|
3049 | | - trans = btrfs_join_transaction_nolock(root); |
---|
| 2675 | + btrfs_inode_safe_disk_i_size_write(inode, 0); |
---|
| 2676 | + if (freespace_inode) |
---|
| 2677 | + trans = btrfs_join_transaction_spacecache(root); |
---|
3050 | 2678 | else |
---|
3051 | 2679 | trans = btrfs_join_transaction(root); |
---|
3052 | 2680 | if (IS_ERR(trans)) { |
---|
.. | .. |
---|
3062 | 2690 | } |
---|
3063 | 2691 | |
---|
3064 | 2692 | range_locked = true; |
---|
3065 | | - lock_extent_bits(io_tree, ordered_extent->file_offset, |
---|
3066 | | - ordered_extent->file_offset + ordered_extent->len - 1, |
---|
3067 | | - &cached_state); |
---|
| 2693 | + lock_extent_bits(io_tree, start, end, &cached_state); |
---|
3068 | 2694 | |
---|
3069 | | - ret = test_range_bit(io_tree, ordered_extent->file_offset, |
---|
3070 | | - ordered_extent->file_offset + ordered_extent->len - 1, |
---|
3071 | | - EXTENT_DEFRAG, 0, cached_state); |
---|
3072 | | - if (ret) { |
---|
3073 | | - u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item); |
---|
3074 | | - if (0 && last_snapshot >= BTRFS_I(inode)->generation) |
---|
3075 | | - /* the inode is shared */ |
---|
3076 | | - new = record_old_file_extents(inode, ordered_extent); |
---|
3077 | | - |
---|
3078 | | - clear_extent_bit(io_tree, ordered_extent->file_offset, |
---|
3079 | | - ordered_extent->file_offset + ordered_extent->len - 1, |
---|
3080 | | - EXTENT_DEFRAG, 0, 0, &cached_state); |
---|
3081 | | - } |
---|
3082 | | - |
---|
3083 | | - if (nolock) |
---|
3084 | | - trans = btrfs_join_transaction_nolock(root); |
---|
| 2695 | + if (freespace_inode) |
---|
| 2696 | + trans = btrfs_join_transaction_spacecache(root); |
---|
3085 | 2697 | else |
---|
3086 | 2698 | trans = btrfs_join_transaction(root); |
---|
3087 | 2699 | if (IS_ERR(trans)) { |
---|
.. | .. |
---|
3096 | 2708 | compress_type = ordered_extent->compress_type; |
---|
3097 | 2709 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { |
---|
3098 | 2710 | BUG_ON(compress_type); |
---|
3099 | | - btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset, |
---|
3100 | | - ordered_extent->len); |
---|
3101 | 2711 | ret = btrfs_mark_extent_written(trans, BTRFS_I(inode), |
---|
3102 | 2712 | ordered_extent->file_offset, |
---|
3103 | 2713 | ordered_extent->file_offset + |
---|
3104 | 2714 | logical_len); |
---|
3105 | 2715 | } else { |
---|
3106 | 2716 | BUG_ON(root == fs_info->tree_root); |
---|
3107 | | - ret = insert_reserved_file_extent(trans, inode, |
---|
3108 | | - ordered_extent->file_offset, |
---|
3109 | | - ordered_extent->start, |
---|
3110 | | - ordered_extent->disk_len, |
---|
3111 | | - logical_len, logical_len, |
---|
3112 | | - compress_type, 0, 0, |
---|
3113 | | - BTRFS_FILE_EXTENT_REG); |
---|
| 2717 | + ret = insert_ordered_extent_file_extent(trans, ordered_extent); |
---|
3114 | 2718 | if (!ret) { |
---|
3115 | 2719 | clear_reserved_extent = false; |
---|
3116 | 2720 | btrfs_release_delalloc_bytes(fs_info, |
---|
3117 | | - ordered_extent->start, |
---|
3118 | | - ordered_extent->disk_len); |
---|
| 2721 | + ordered_extent->disk_bytenr, |
---|
| 2722 | + ordered_extent->disk_num_bytes); |
---|
3119 | 2723 | } |
---|
3120 | 2724 | } |
---|
3121 | 2725 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, |
---|
3122 | | - ordered_extent->file_offset, ordered_extent->len, |
---|
3123 | | - trans->transid); |
---|
| 2726 | + ordered_extent->file_offset, |
---|
| 2727 | + ordered_extent->num_bytes, trans->transid); |
---|
3124 | 2728 | if (ret < 0) { |
---|
3125 | 2729 | btrfs_abort_transaction(trans, ret); |
---|
3126 | 2730 | goto out; |
---|
3127 | 2731 | } |
---|
3128 | 2732 | |
---|
3129 | | - ret = add_pending_csums(trans, inode, &ordered_extent->list); |
---|
| 2733 | + ret = add_pending_csums(trans, &ordered_extent->list); |
---|
3130 | 2734 | if (ret) { |
---|
3131 | 2735 | btrfs_abort_transaction(trans, ret); |
---|
3132 | 2736 | goto out; |
---|
3133 | 2737 | } |
---|
3134 | 2738 | |
---|
3135 | | - btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
---|
| 2739 | + btrfs_inode_safe_disk_i_size_write(inode, 0); |
---|
3136 | 2740 | ret = btrfs_update_inode_fallback(trans, root, inode); |
---|
3137 | 2741 | if (ret) { /* -ENOMEM or corruption */ |
---|
3138 | 2742 | btrfs_abort_transaction(trans, ret); |
---|
.. | .. |
---|
3140 | 2744 | } |
---|
3141 | 2745 | ret = 0; |
---|
3142 | 2746 | out: |
---|
3143 | | - if (range_locked || clear_new_delalloc_bytes) { |
---|
3144 | | - unsigned int clear_bits = 0; |
---|
3145 | | - |
---|
3146 | | - if (range_locked) |
---|
3147 | | - clear_bits |= EXTENT_LOCKED; |
---|
3148 | | - if (clear_new_delalloc_bytes) |
---|
3149 | | - clear_bits |= EXTENT_DELALLOC_NEW; |
---|
3150 | | - clear_extent_bit(&BTRFS_I(inode)->io_tree, |
---|
3151 | | - ordered_extent->file_offset, |
---|
3152 | | - ordered_extent->file_offset + |
---|
3153 | | - ordered_extent->len - 1, |
---|
3154 | | - clear_bits, |
---|
3155 | | - (clear_bits & EXTENT_LOCKED) ? 1 : 0, |
---|
3156 | | - 0, &cached_state); |
---|
3157 | | - } |
---|
| 2747 | + clear_bits = EXTENT_DEFRAG; |
---|
| 2748 | + if (range_locked) |
---|
| 2749 | + clear_bits |= EXTENT_LOCKED; |
---|
| 2750 | + if (clear_new_delalloc_bytes) |
---|
| 2751 | + clear_bits |= EXTENT_DELALLOC_NEW; |
---|
| 2752 | + clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, |
---|
| 2753 | + (clear_bits & EXTENT_LOCKED) ? 1 : 0, 0, |
---|
| 2754 | + &cached_state); |
---|
3158 | 2755 | |
---|
3159 | 2756 | if (trans) |
---|
3160 | 2757 | btrfs_end_transaction(trans); |
---|
3161 | 2758 | |
---|
3162 | 2759 | if (ret || truncated) { |
---|
3163 | | - u64 start, end; |
---|
| 2760 | + u64 unwritten_start = start; |
---|
3164 | 2761 | |
---|
3165 | 2762 | /* |
---|
3166 | 2763 | * If we failed to finish this ordered extent for any reason we |
---|
.. | .. |
---|
3175 | 2772 | mapping_set_error(ordered_extent->inode->i_mapping, -EIO); |
---|
3176 | 2773 | |
---|
3177 | 2774 | if (truncated) |
---|
3178 | | - start = ordered_extent->file_offset + logical_len; |
---|
3179 | | - else |
---|
3180 | | - start = ordered_extent->file_offset; |
---|
3181 | | - end = ordered_extent->file_offset + ordered_extent->len - 1; |
---|
3182 | | - clear_extent_uptodate(io_tree, start, end, NULL); |
---|
| 2775 | + unwritten_start += logical_len; |
---|
| 2776 | + clear_extent_uptodate(io_tree, unwritten_start, end, NULL); |
---|
3183 | 2777 | |
---|
3184 | 2778 | /* Drop the cache for the part of the extent we didn't write. */ |
---|
3185 | | - btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0); |
---|
| 2779 | + btrfs_drop_extent_cache(BTRFS_I(inode), unwritten_start, end, 0); |
---|
3186 | 2780 | |
---|
3187 | 2781 | /* |
---|
3188 | 2782 | * If the ordered extent had an IOERR or something else went |
---|
.. | .. |
---|
3197 | 2791 | if ((ret || !logical_len) && |
---|
3198 | 2792 | clear_reserved_extent && |
---|
3199 | 2793 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && |
---|
3200 | | - !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) |
---|
| 2794 | + !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { |
---|
| 2795 | + /* |
---|
| 2796 | + * Discard the range before returning it back to the |
---|
| 2797 | + * free space pool |
---|
| 2798 | + */ |
---|
| 2799 | + if (ret && btrfs_test_opt(fs_info, DISCARD_SYNC)) |
---|
| 2800 | + btrfs_discard_extent(fs_info, |
---|
| 2801 | + ordered_extent->disk_bytenr, |
---|
| 2802 | + ordered_extent->disk_num_bytes, |
---|
| 2803 | + NULL); |
---|
3201 | 2804 | btrfs_free_reserved_extent(fs_info, |
---|
3202 | | - ordered_extent->start, |
---|
3203 | | - ordered_extent->disk_len, 1); |
---|
| 2805 | + ordered_extent->disk_bytenr, |
---|
| 2806 | + ordered_extent->disk_num_bytes, 1); |
---|
| 2807 | + } |
---|
3204 | 2808 | } |
---|
3205 | | - |
---|
3206 | 2809 | |
---|
3207 | 2810 | /* |
---|
3208 | 2811 | * This needs to be done to make sure anybody waiting knows we are done |
---|
3209 | 2812 | * updating everything for this ordered extent. |
---|
3210 | 2813 | */ |
---|
3211 | | - btrfs_remove_ordered_extent(inode, ordered_extent); |
---|
3212 | | - |
---|
3213 | | - /* for snapshot-aware defrag */ |
---|
3214 | | - if (new) { |
---|
3215 | | - if (ret) { |
---|
3216 | | - free_sa_defrag_extent(new); |
---|
3217 | | - atomic_dec(&fs_info->defrag_running); |
---|
3218 | | - } else { |
---|
3219 | | - relink_file_extents(new); |
---|
3220 | | - } |
---|
3221 | | - } |
---|
| 2814 | + btrfs_remove_ordered_extent(BTRFS_I(inode), ordered_extent); |
---|
3222 | 2815 | |
---|
3223 | 2816 | /* once for us */ |
---|
3224 | 2817 | btrfs_put_ordered_extent(ordered_extent); |
---|
.. | .. |
---|
3235 | 2828 | btrfs_finish_ordered_io(ordered_extent); |
---|
3236 | 2829 | } |
---|
3237 | 2830 | |
---|
3238 | | -static void btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, |
---|
3239 | | - struct extent_state *state, int uptodate) |
---|
| 2831 | +void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start, |
---|
| 2832 | + u64 end, int uptodate) |
---|
3240 | 2833 | { |
---|
3241 | | - struct inode *inode = page->mapping->host; |
---|
3242 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
| 2834 | + struct btrfs_inode *inode = BTRFS_I(page->mapping->host); |
---|
| 2835 | + struct btrfs_fs_info *fs_info = inode->root->fs_info; |
---|
3243 | 2836 | struct btrfs_ordered_extent *ordered_extent = NULL; |
---|
3244 | 2837 | struct btrfs_workqueue *wq; |
---|
3245 | | - btrfs_work_func_t func; |
---|
3246 | 2838 | |
---|
3247 | 2839 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); |
---|
3248 | 2840 | |
---|
.. | .. |
---|
3251 | 2843 | end - start + 1, uptodate)) |
---|
3252 | 2844 | return; |
---|
3253 | 2845 | |
---|
3254 | | - if (btrfs_is_free_space_inode(BTRFS_I(inode))) { |
---|
| 2846 | + if (btrfs_is_free_space_inode(inode)) |
---|
3255 | 2847 | wq = fs_info->endio_freespace_worker; |
---|
3256 | | - func = btrfs_freespace_write_helper; |
---|
3257 | | - } else { |
---|
| 2848 | + else |
---|
3258 | 2849 | wq = fs_info->endio_write_workers; |
---|
3259 | | - func = btrfs_endio_write_helper; |
---|
3260 | | - } |
---|
3261 | 2850 | |
---|
3262 | | - btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL, |
---|
3263 | | - NULL); |
---|
| 2851 | + btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); |
---|
3264 | 2852 | btrfs_queue_work(wq, &ordered_extent->work); |
---|
3265 | 2853 | } |
---|
3266 | 2854 | |
---|
3267 | | -static int __readpage_endio_check(struct inode *inode, |
---|
3268 | | - struct btrfs_io_bio *io_bio, |
---|
3269 | | - int icsum, struct page *page, |
---|
3270 | | - int pgoff, u64 start, size_t len) |
---|
| 2855 | +static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio, |
---|
| 2856 | + int icsum, struct page *page, int pgoff, u64 start, |
---|
| 2857 | + size_t len) |
---|
3271 | 2858 | { |
---|
| 2859 | + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
| 2860 | + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); |
---|
3272 | 2861 | char *kaddr; |
---|
3273 | | - u32 csum_expected; |
---|
3274 | | - u32 csum = ~(u32)0; |
---|
| 2862 | + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); |
---|
| 2863 | + u8 *csum_expected; |
---|
| 2864 | + u8 csum[BTRFS_CSUM_SIZE]; |
---|
3275 | 2865 | |
---|
3276 | | - csum_expected = *(((u32 *)io_bio->csum) + icsum); |
---|
| 2866 | + csum_expected = ((u8 *)io_bio->csum) + icsum * csum_size; |
---|
3277 | 2867 | |
---|
3278 | 2868 | kaddr = kmap_atomic(page); |
---|
3279 | | - csum = btrfs_csum_data(kaddr + pgoff, csum, len); |
---|
3280 | | - btrfs_csum_final(csum, (u8 *)&csum); |
---|
3281 | | - if (csum != csum_expected) |
---|
| 2869 | + shash->tfm = fs_info->csum_shash; |
---|
| 2870 | + |
---|
| 2871 | + crypto_shash_digest(shash, kaddr + pgoff, len, csum); |
---|
| 2872 | + |
---|
| 2873 | + if (memcmp(csum, csum_expected, csum_size)) |
---|
3282 | 2874 | goto zeroit; |
---|
3283 | 2875 | |
---|
3284 | 2876 | kunmap_atomic(kaddr); |
---|
.. | .. |
---|
3286 | 2878 | zeroit: |
---|
3287 | 2879 | btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected, |
---|
3288 | 2880 | io_bio->mirror_num); |
---|
| 2881 | + if (io_bio->device) |
---|
| 2882 | + btrfs_dev_stat_inc_and_print(io_bio->device, |
---|
| 2883 | + BTRFS_DEV_STAT_CORRUPTION_ERRS); |
---|
3289 | 2884 | memset(kaddr + pgoff, 1, len); |
---|
3290 | 2885 | flush_dcache_page(page); |
---|
3291 | 2886 | kunmap_atomic(kaddr); |
---|
.. | .. |
---|
3297 | 2892 | * if there's a match, we allow the bio to finish. If not, the code in |
---|
3298 | 2893 | * extent_io.c will try to find good copies for us. |
---|
3299 | 2894 | */ |
---|
3300 | | -static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio, |
---|
3301 | | - u64 phy_offset, struct page *page, |
---|
3302 | | - u64 start, u64 end, int mirror) |
---|
| 2895 | +int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u64 phy_offset, |
---|
| 2896 | + struct page *page, u64 start, u64 end, int mirror) |
---|
3303 | 2897 | { |
---|
3304 | 2898 | size_t offset = start - page_offset(page); |
---|
3305 | 2899 | struct inode *inode = page->mapping->host; |
---|
.. | .. |
---|
3321 | 2915 | } |
---|
3322 | 2916 | |
---|
3323 | 2917 | phy_offset >>= inode->i_sb->s_blocksize_bits; |
---|
3324 | | - return __readpage_endio_check(inode, io_bio, phy_offset, page, offset, |
---|
3325 | | - start, (size_t)(end - start + 1)); |
---|
| 2918 | + return check_data_csum(inode, io_bio, phy_offset, page, offset, start, |
---|
| 2919 | + (size_t)(end - start + 1)); |
---|
3326 | 2920 | } |
---|
3327 | 2921 | |
---|
3328 | 2922 | /* |
---|
.. | .. |
---|
3343 | 2937 | if (atomic_add_unless(&inode->i_count, -1, 1)) |
---|
3344 | 2938 | return; |
---|
3345 | 2939 | |
---|
| 2940 | + atomic_inc(&fs_info->nr_delayed_iputs); |
---|
3346 | 2941 | spin_lock(&fs_info->delayed_iput_lock); |
---|
3347 | 2942 | ASSERT(list_empty(&binode->delayed_iput)); |
---|
3348 | 2943 | list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs); |
---|
3349 | 2944 | spin_unlock(&fs_info->delayed_iput_lock); |
---|
| 2945 | + if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags)) |
---|
| 2946 | + wake_up_process(fs_info->cleaner_kthread); |
---|
| 2947 | +} |
---|
| 2948 | + |
---|
| 2949 | +static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info, |
---|
| 2950 | + struct btrfs_inode *inode) |
---|
| 2951 | +{ |
---|
| 2952 | + list_del_init(&inode->delayed_iput); |
---|
| 2953 | + spin_unlock(&fs_info->delayed_iput_lock); |
---|
| 2954 | + iput(&inode->vfs_inode); |
---|
| 2955 | + if (atomic_dec_and_test(&fs_info->nr_delayed_iputs)) |
---|
| 2956 | + wake_up(&fs_info->delayed_iputs_wait); |
---|
| 2957 | + spin_lock(&fs_info->delayed_iput_lock); |
---|
| 2958 | +} |
---|
| 2959 | + |
---|
| 2960 | +static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info, |
---|
| 2961 | + struct btrfs_inode *inode) |
---|
| 2962 | +{ |
---|
| 2963 | + if (!list_empty(&inode->delayed_iput)) { |
---|
| 2964 | + spin_lock(&fs_info->delayed_iput_lock); |
---|
| 2965 | + if (!list_empty(&inode->delayed_iput)) |
---|
| 2966 | + run_delayed_iput_locked(fs_info, inode); |
---|
| 2967 | + spin_unlock(&fs_info->delayed_iput_lock); |
---|
| 2968 | + } |
---|
3350 | 2969 | } |
---|
3351 | 2970 | |
---|
3352 | 2971 | void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) |
---|
.. | .. |
---|
3358 | 2977 | |
---|
3359 | 2978 | inode = list_first_entry(&fs_info->delayed_iputs, |
---|
3360 | 2979 | struct btrfs_inode, delayed_iput); |
---|
3361 | | - list_del_init(&inode->delayed_iput); |
---|
3362 | | - spin_unlock(&fs_info->delayed_iput_lock); |
---|
3363 | | - iput(&inode->vfs_inode); |
---|
3364 | | - spin_lock(&fs_info->delayed_iput_lock); |
---|
| 2980 | + run_delayed_iput_locked(fs_info, inode); |
---|
| 2981 | + cond_resched_lock(&fs_info->delayed_iput_lock); |
---|
3365 | 2982 | } |
---|
3366 | 2983 | spin_unlock(&fs_info->delayed_iput_lock); |
---|
| 2984 | +} |
---|
| 2985 | + |
---|
| 2986 | +/** |
---|
| 2987 | + * btrfs_wait_on_delayed_iputs - wait on the delayed iputs to be done running |
---|
| 2988 | + * @fs_info - the fs_info for this fs |
---|
| 2989 | + * @return - EINTR if we were killed, 0 if nothing's pending |
---|
| 2990 | + * |
---|
| 2991 | + * This will wait on any delayed iputs that are currently running with KILLABLE |
---|
| 2992 | + * set. Once they are all done running we will return, unless we are killed in |
---|
| 2993 | + * which case we return EINTR. This helps in user operations like fallocate etc |
---|
| 2994 | + * that might get blocked on the iputs. |
---|
| 2995 | + */ |
---|
| 2996 | +int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info) |
---|
| 2997 | +{ |
---|
| 2998 | + int ret = wait_event_killable(fs_info->delayed_iputs_wait, |
---|
| 2999 | + atomic_read(&fs_info->nr_delayed_iputs) == 0); |
---|
| 3000 | + if (ret) |
---|
| 3001 | + return -EINTR; |
---|
| 3002 | + return 0; |
---|
3367 | 3003 | } |
---|
3368 | 3004 | |
---|
3369 | 3005 | /* |
---|
.. | .. |
---|
3471 | 3107 | found_key.objectid = found_key.offset; |
---|
3472 | 3108 | found_key.type = BTRFS_INODE_ITEM_KEY; |
---|
3473 | 3109 | found_key.offset = 0; |
---|
3474 | | - inode = btrfs_iget(fs_info->sb, &found_key, root, NULL); |
---|
| 3110 | + inode = btrfs_iget(fs_info->sb, last_objectid, root); |
---|
3475 | 3111 | ret = PTR_ERR_OR_ZERO(inode); |
---|
3476 | 3112 | if (ret && ret != -ENOENT) |
---|
3477 | 3113 | goto out; |
---|
3478 | 3114 | |
---|
3479 | 3115 | if (ret == -ENOENT && root == fs_info->tree_root) { |
---|
3480 | 3116 | struct btrfs_root *dead_root; |
---|
3481 | | - struct btrfs_fs_info *fs_info = root->fs_info; |
---|
3482 | 3117 | int is_dead_root = 0; |
---|
3483 | 3118 | |
---|
3484 | 3119 | /* |
---|
.. | .. |
---|
3490 | 3125 | * orphan must not get deleted. |
---|
3491 | 3126 | * find_dead_roots already ran before us, so if this |
---|
3492 | 3127 | * is a snapshot deletion, we should find the root |
---|
3493 | | - * in the dead_roots list |
---|
| 3128 | + * in the fs_roots radix tree. |
---|
3494 | 3129 | */ |
---|
3495 | | - spin_lock(&fs_info->trans_lock); |
---|
3496 | | - list_for_each_entry(dead_root, &fs_info->dead_roots, |
---|
3497 | | - root_list) { |
---|
3498 | | - if (dead_root->root_key.objectid == |
---|
3499 | | - found_key.objectid) { |
---|
3500 | | - is_dead_root = 1; |
---|
3501 | | - break; |
---|
3502 | | - } |
---|
3503 | | - } |
---|
3504 | | - spin_unlock(&fs_info->trans_lock); |
---|
| 3130 | + |
---|
| 3131 | + spin_lock(&fs_info->fs_roots_radix_lock); |
---|
| 3132 | + dead_root = radix_tree_lookup(&fs_info->fs_roots_radix, |
---|
| 3133 | + (unsigned long)found_key.objectid); |
---|
| 3134 | + if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0) |
---|
| 3135 | + is_dead_root = 1; |
---|
| 3136 | + spin_unlock(&fs_info->fs_roots_radix_lock); |
---|
| 3137 | + |
---|
3505 | 3138 | if (is_dead_root) { |
---|
3506 | 3139 | /* prevent this orphan from being found again */ |
---|
3507 | 3140 | key.offset = found_key.objectid - 1; |
---|
.. | .. |
---|
3551 | 3184 | |
---|
3552 | 3185 | /* this will do delete_inode and everything for us */ |
---|
3553 | 3186 | iput(inode); |
---|
3554 | | - if (ret) |
---|
3555 | | - goto out; |
---|
3556 | 3187 | } |
---|
3557 | 3188 | /* release the path since we're done with it */ |
---|
3558 | 3189 | btrfs_release_path(path); |
---|
.. | .. |
---|
3694 | 3325 | i_uid_write(inode, btrfs_inode_uid(leaf, inode_item)); |
---|
3695 | 3326 | i_gid_write(inode, btrfs_inode_gid(leaf, inode_item)); |
---|
3696 | 3327 | btrfs_i_size_write(BTRFS_I(inode), btrfs_inode_size(leaf, inode_item)); |
---|
| 3328 | + btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0, |
---|
| 3329 | + round_up(i_size_read(inode), fs_info->sectorsize)); |
---|
3697 | 3330 | |
---|
3698 | 3331 | inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime); |
---|
3699 | 3332 | inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime); |
---|
.. | .. |
---|
3764 | 3397 | * inode is not a directory, logging its parent unnecessarily. |
---|
3765 | 3398 | */ |
---|
3766 | 3399 | BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans; |
---|
| 3400 | + |
---|
3767 | 3401 | /* |
---|
3768 | | - * Similar reasoning for last_link_trans, needs to be set otherwise |
---|
3769 | | - * for a case like the following: |
---|
3770 | | - * |
---|
3771 | | - * mkdir A |
---|
3772 | | - * touch foo |
---|
3773 | | - * ln foo A/bar |
---|
3774 | | - * echo 2 > /proc/sys/vm/drop_caches |
---|
3775 | | - * fsync foo |
---|
3776 | | - * <power failure> |
---|
3777 | | - * |
---|
3778 | | - * Would result in link bar and directory A not existing after the power |
---|
3779 | | - * failure. |
---|
| 3402 | + * Same logic as for last_unlink_trans. We don't persist the generation |
---|
| 3403 | + * of the last transaction where this inode was used for a reflink |
---|
| 3404 | + * operation, so after eviction and reloading the inode we must be |
---|
| 3405 | + * pessimistic and assume the last transaction that modified the inode. |
---|
3780 | 3406 | */ |
---|
3781 | | - BTRFS_I(inode)->last_link_trans = BTRFS_I(inode)->last_trans; |
---|
| 3407 | + BTRFS_I(inode)->last_reflink_trans = BTRFS_I(inode)->last_trans; |
---|
3782 | 3408 | |
---|
3783 | 3409 | path->slots[0]++; |
---|
3784 | 3410 | if (inode->i_nlink != 1 || |
---|
.. | .. |
---|
3827 | 3453 | switch (inode->i_mode & S_IFMT) { |
---|
3828 | 3454 | case S_IFREG: |
---|
3829 | 3455 | inode->i_mapping->a_ops = &btrfs_aops; |
---|
3830 | | - BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
---|
3831 | 3456 | inode->i_fop = &btrfs_file_operations; |
---|
3832 | 3457 | inode->i_op = &btrfs_file_inode_operations; |
---|
3833 | 3458 | break; |
---|
.. | .. |
---|
3838 | 3463 | case S_IFLNK: |
---|
3839 | 3464 | inode->i_op = &btrfs_symlink_inode_operations; |
---|
3840 | 3465 | inode_nohighmem(inode); |
---|
3841 | | - inode->i_mapping->a_ops = &btrfs_symlink_aops; |
---|
| 3466 | + inode->i_mapping->a_ops = &btrfs_aops; |
---|
3842 | 3467 | break; |
---|
3843 | 3468 | default: |
---|
3844 | 3469 | inode->i_op = &btrfs_special_inode_operations; |
---|
.. | .. |
---|
3860 | 3485 | { |
---|
3861 | 3486 | struct btrfs_map_token token; |
---|
3862 | 3487 | |
---|
3863 | | - btrfs_init_map_token(&token); |
---|
| 3488 | + btrfs_init_map_token(&token, leaf); |
---|
3864 | 3489 | |
---|
3865 | | - btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token); |
---|
3866 | | - btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token); |
---|
3867 | | - btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size, |
---|
3868 | | - &token); |
---|
3869 | | - btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); |
---|
3870 | | - btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); |
---|
| 3490 | + btrfs_set_token_inode_uid(&token, item, i_uid_read(inode)); |
---|
| 3491 | + btrfs_set_token_inode_gid(&token, item, i_gid_read(inode)); |
---|
| 3492 | + btrfs_set_token_inode_size(&token, item, BTRFS_I(inode)->disk_i_size); |
---|
| 3493 | + btrfs_set_token_inode_mode(&token, item, inode->i_mode); |
---|
| 3494 | + btrfs_set_token_inode_nlink(&token, item, inode->i_nlink); |
---|
3871 | 3495 | |
---|
3872 | | - btrfs_set_token_timespec_sec(leaf, &item->atime, |
---|
3873 | | - inode->i_atime.tv_sec, &token); |
---|
3874 | | - btrfs_set_token_timespec_nsec(leaf, &item->atime, |
---|
3875 | | - inode->i_atime.tv_nsec, &token); |
---|
| 3496 | + btrfs_set_token_timespec_sec(&token, &item->atime, |
---|
| 3497 | + inode->i_atime.tv_sec); |
---|
| 3498 | + btrfs_set_token_timespec_nsec(&token, &item->atime, |
---|
| 3499 | + inode->i_atime.tv_nsec); |
---|
3876 | 3500 | |
---|
3877 | | - btrfs_set_token_timespec_sec(leaf, &item->mtime, |
---|
3878 | | - inode->i_mtime.tv_sec, &token); |
---|
3879 | | - btrfs_set_token_timespec_nsec(leaf, &item->mtime, |
---|
3880 | | - inode->i_mtime.tv_nsec, &token); |
---|
| 3501 | + btrfs_set_token_timespec_sec(&token, &item->mtime, |
---|
| 3502 | + inode->i_mtime.tv_sec); |
---|
| 3503 | + btrfs_set_token_timespec_nsec(&token, &item->mtime, |
---|
| 3504 | + inode->i_mtime.tv_nsec); |
---|
3881 | 3505 | |
---|
3882 | | - btrfs_set_token_timespec_sec(leaf, &item->ctime, |
---|
3883 | | - inode->i_ctime.tv_sec, &token); |
---|
3884 | | - btrfs_set_token_timespec_nsec(leaf, &item->ctime, |
---|
3885 | | - inode->i_ctime.tv_nsec, &token); |
---|
| 3506 | + btrfs_set_token_timespec_sec(&token, &item->ctime, |
---|
| 3507 | + inode->i_ctime.tv_sec); |
---|
| 3508 | + btrfs_set_token_timespec_nsec(&token, &item->ctime, |
---|
| 3509 | + inode->i_ctime.tv_nsec); |
---|
3886 | 3510 | |
---|
3887 | | - btrfs_set_token_timespec_sec(leaf, &item->otime, |
---|
3888 | | - BTRFS_I(inode)->i_otime.tv_sec, &token); |
---|
3889 | | - btrfs_set_token_timespec_nsec(leaf, &item->otime, |
---|
3890 | | - BTRFS_I(inode)->i_otime.tv_nsec, &token); |
---|
| 3511 | + btrfs_set_token_timespec_sec(&token, &item->otime, |
---|
| 3512 | + BTRFS_I(inode)->i_otime.tv_sec); |
---|
| 3513 | + btrfs_set_token_timespec_nsec(&token, &item->otime, |
---|
| 3514 | + BTRFS_I(inode)->i_otime.tv_nsec); |
---|
3891 | 3515 | |
---|
3892 | | - btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), |
---|
3893 | | - &token); |
---|
3894 | | - btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation, |
---|
3895 | | - &token); |
---|
3896 | | - btrfs_set_token_inode_sequence(leaf, item, inode_peek_iversion(inode), |
---|
3897 | | - &token); |
---|
3898 | | - btrfs_set_token_inode_transid(leaf, item, trans->transid, &token); |
---|
3899 | | - btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token); |
---|
3900 | | - btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token); |
---|
3901 | | - btrfs_set_token_inode_block_group(leaf, item, 0, &token); |
---|
| 3516 | + btrfs_set_token_inode_nbytes(&token, item, inode_get_bytes(inode)); |
---|
| 3517 | + btrfs_set_token_inode_generation(&token, item, |
---|
| 3518 | + BTRFS_I(inode)->generation); |
---|
| 3519 | + btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode)); |
---|
| 3520 | + btrfs_set_token_inode_transid(&token, item, trans->transid); |
---|
| 3521 | + btrfs_set_token_inode_rdev(&token, item, inode->i_rdev); |
---|
| 3522 | + btrfs_set_token_inode_flags(&token, item, BTRFS_I(inode)->flags); |
---|
| 3523 | + btrfs_set_token_inode_block_group(&token, item, 0); |
---|
3902 | 3524 | } |
---|
3903 | 3525 | |
---|
3904 | 3526 | /* |
---|
.. | .. |
---|
3931 | 3553 | |
---|
3932 | 3554 | fill_inode_item(trans, leaf, inode_item, inode); |
---|
3933 | 3555 | btrfs_mark_buffer_dirty(leaf); |
---|
3934 | | - btrfs_set_inode_last_trans(trans, inode); |
---|
| 3556 | + btrfs_set_inode_last_trans(trans, BTRFS_I(inode)); |
---|
3935 | 3557 | ret = 0; |
---|
3936 | 3558 | failed: |
---|
3937 | 3559 | btrfs_free_path(path); |
---|
.. | .. |
---|
3961 | 3583 | |
---|
3962 | 3584 | ret = btrfs_delayed_update_inode(trans, root, inode); |
---|
3963 | 3585 | if (!ret) |
---|
3964 | | - btrfs_set_inode_last_trans(trans, inode); |
---|
| 3586 | + btrfs_set_inode_last_trans(trans, BTRFS_I(inode)); |
---|
3965 | 3587 | return ret; |
---|
3966 | 3588 | } |
---|
3967 | 3589 | |
---|
.. | .. |
---|
3994 | 3616 | struct btrfs_fs_info *fs_info = root->fs_info; |
---|
3995 | 3617 | struct btrfs_path *path; |
---|
3996 | 3618 | int ret = 0; |
---|
3997 | | - struct extent_buffer *leaf; |
---|
3998 | 3619 | struct btrfs_dir_item *di; |
---|
3999 | | - struct btrfs_key key; |
---|
4000 | 3620 | u64 index; |
---|
4001 | 3621 | u64 ino = btrfs_ino(inode); |
---|
4002 | 3622 | u64 dir_ino = btrfs_ino(dir); |
---|
.. | .. |
---|
4010 | 3630 | path->leave_spinning = 1; |
---|
4011 | 3631 | di = btrfs_lookup_dir_item(trans, root, path, dir_ino, |
---|
4012 | 3632 | name, name_len, -1); |
---|
4013 | | - if (IS_ERR(di)) { |
---|
4014 | | - ret = PTR_ERR(di); |
---|
| 3633 | + if (IS_ERR_OR_NULL(di)) { |
---|
| 3634 | + ret = di ? PTR_ERR(di) : -ENOENT; |
---|
4015 | 3635 | goto err; |
---|
4016 | 3636 | } |
---|
4017 | | - if (!di) { |
---|
4018 | | - ret = -ENOENT; |
---|
4019 | | - goto err; |
---|
4020 | | - } |
---|
4021 | | - leaf = path->nodes[0]; |
---|
4022 | | - btrfs_dir_item_key_to_cpu(leaf, di, &key); |
---|
4023 | 3637 | ret = btrfs_delete_one_dir_name(trans, root, path, di); |
---|
4024 | 3638 | if (ret) |
---|
4025 | 3639 | goto err; |
---|
.. | .. |
---|
4072 | 3686 | ret = 0; |
---|
4073 | 3687 | else if (ret) |
---|
4074 | 3688 | btrfs_abort_transaction(trans, ret); |
---|
| 3689 | + |
---|
| 3690 | + /* |
---|
| 3691 | + * If we have a pending delayed iput we could end up with the final iput |
---|
| 3692 | + * being run in btrfs-cleaner context. If we have enough of these built |
---|
| 3693 | + * up we can end up burning a lot of time in btrfs-cleaner without any |
---|
| 3694 | + * way to throttle the unlinks. Since we're currently holding a ref on |
---|
| 3695 | + * the inode we can run the delayed iput here without any issues as the |
---|
| 3696 | + * final iput won't be done until after we drop the ref we're currently |
---|
| 3697 | + * holding. |
---|
| 3698 | + */ |
---|
| 3699 | + btrfs_run_delayed_iput(fs_info, inode); |
---|
4075 | 3700 | err: |
---|
4076 | 3701 | btrfs_free_path(path); |
---|
4077 | 3702 | if (ret) |
---|
.. | .. |
---|
4120 | 3745 | * 1 for the inode ref |
---|
4121 | 3746 | * 1 for the inode |
---|
4122 | 3747 | */ |
---|
4123 | | - return btrfs_start_transaction_fallback_global_rsv(root, 5, 5); |
---|
| 3748 | + return btrfs_start_transaction_fallback_global_rsv(root, 5); |
---|
4124 | 3749 | } |
---|
4125 | 3750 | |
---|
4126 | 3751 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) |
---|
.. | .. |
---|
4187 | 3812 | di = btrfs_lookup_dir_item(trans, root, path, dir_ino, |
---|
4188 | 3813 | name, name_len, -1); |
---|
4189 | 3814 | if (IS_ERR_OR_NULL(di)) { |
---|
4190 | | - if (!di) |
---|
4191 | | - ret = -ENOENT; |
---|
4192 | | - else |
---|
4193 | | - ret = PTR_ERR(di); |
---|
| 3815 | + ret = di ? PTR_ERR(di) : -ENOENT; |
---|
4194 | 3816 | goto out; |
---|
4195 | 3817 | } |
---|
4196 | 3818 | |
---|
.. | .. |
---|
4393 | 4015 | * again is not run concurrently. |
---|
4394 | 4016 | */ |
---|
4395 | 4017 | spin_lock(&dest->root_item_lock); |
---|
4396 | | - root_flags = btrfs_root_flags(&dest->root_item); |
---|
4397 | | - if (dest->send_in_progress == 0) { |
---|
4398 | | - btrfs_set_root_flags(&dest->root_item, |
---|
4399 | | - root_flags | BTRFS_ROOT_SUBVOL_DEAD); |
---|
4400 | | - spin_unlock(&dest->root_item_lock); |
---|
4401 | | - } else { |
---|
| 4018 | + if (dest->send_in_progress) { |
---|
4402 | 4019 | spin_unlock(&dest->root_item_lock); |
---|
4403 | 4020 | btrfs_warn(fs_info, |
---|
4404 | 4021 | "attempt to delete subvolume %llu during send", |
---|
4405 | 4022 | dest->root_key.objectid); |
---|
4406 | 4023 | return -EPERM; |
---|
4407 | 4024 | } |
---|
| 4025 | + if (atomic_read(&dest->nr_swapfiles)) { |
---|
| 4026 | + spin_unlock(&dest->root_item_lock); |
---|
| 4027 | + btrfs_warn(fs_info, |
---|
| 4028 | + "attempt to delete subvolume %llu with active swapfile", |
---|
| 4029 | + root->root_key.objectid); |
---|
| 4030 | + return -EPERM; |
---|
| 4031 | + } |
---|
| 4032 | + root_flags = btrfs_root_flags(&dest->root_item); |
---|
| 4033 | + btrfs_set_root_flags(&dest->root_item, |
---|
| 4034 | + root_flags | BTRFS_ROOT_SUBVOL_DEAD); |
---|
| 4035 | + spin_unlock(&dest->root_item_lock); |
---|
4408 | 4036 | |
---|
4409 | 4037 | down_write(&fs_info->subvol_sem); |
---|
4410 | 4038 | |
---|
.. | .. |
---|
4487 | 4115 | err = ret; |
---|
4488 | 4116 | inode->i_flags |= S_DEAD; |
---|
4489 | 4117 | out_release: |
---|
4490 | | - btrfs_subvolume_release_metadata(fs_info, &block_rsv); |
---|
| 4118 | + btrfs_subvolume_release_metadata(root, &block_rsv); |
---|
4491 | 4119 | out_up_write: |
---|
4492 | 4120 | up_write(&fs_info->subvol_sem); |
---|
4493 | 4121 | if (err) { |
---|
.. | .. |
---|
4566 | 4194 | return err; |
---|
4567 | 4195 | } |
---|
4568 | 4196 | |
---|
4569 | | -static int truncate_space_check(struct btrfs_trans_handle *trans, |
---|
4570 | | - struct btrfs_root *root, |
---|
4571 | | - u64 bytes_deleted) |
---|
4572 | | -{ |
---|
4573 | | - struct btrfs_fs_info *fs_info = root->fs_info; |
---|
4574 | | - int ret; |
---|
4575 | | - |
---|
4576 | | - /* |
---|
4577 | | - * This is only used to apply pressure to the enospc system, we don't |
---|
4578 | | - * intend to use this reservation at all. |
---|
4579 | | - */ |
---|
4580 | | - bytes_deleted = btrfs_csum_bytes_to_leaves(fs_info, bytes_deleted); |
---|
4581 | | - bytes_deleted *= fs_info->nodesize; |
---|
4582 | | - ret = btrfs_block_rsv_add(root, &fs_info->trans_block_rsv, |
---|
4583 | | - bytes_deleted, BTRFS_RESERVE_NO_FLUSH); |
---|
4584 | | - if (!ret) { |
---|
4585 | | - trace_btrfs_space_reservation(fs_info, "transaction", |
---|
4586 | | - trans->transid, |
---|
4587 | | - bytes_deleted, 1); |
---|
4588 | | - trans->bytes_reserved += bytes_deleted; |
---|
4589 | | - } |
---|
4590 | | - return ret; |
---|
4591 | | - |
---|
4592 | | -} |
---|
4593 | | - |
---|
4594 | 4197 | /* |
---|
4595 | 4198 | * Return this if we need to call truncate_block for the last bit of the |
---|
4596 | 4199 | * truncate. |
---|
.. | .. |
---|
4635 | 4238 | u64 bytes_deleted = 0; |
---|
4636 | 4239 | bool be_nice = false; |
---|
4637 | 4240 | bool should_throttle = false; |
---|
4638 | | - bool should_end = false; |
---|
| 4241 | + const u64 lock_start = ALIGN_DOWN(new_size, fs_info->sectorsize); |
---|
| 4242 | + struct extent_state *cached_state = NULL; |
---|
4639 | 4243 | |
---|
4640 | 4244 | BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); |
---|
4641 | 4245 | |
---|
4642 | 4246 | /* |
---|
4643 | | - * for non-free space inodes and ref cows, we want to back off from |
---|
4644 | | - * time to time |
---|
| 4247 | + * For non-free space inodes and non-shareable roots, we want to back |
---|
| 4248 | + * off from time to time. This means all inodes in subvolume roots, |
---|
| 4249 | + * reloc roots, and data reloc roots. |
---|
4645 | 4250 | */ |
---|
4646 | 4251 | if (!btrfs_is_free_space_inode(BTRFS_I(inode)) && |
---|
4647 | | - test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
---|
| 4252 | + test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) |
---|
4648 | 4253 | be_nice = true; |
---|
4649 | 4254 | |
---|
4650 | 4255 | path = btrfs_alloc_path(); |
---|
.. | .. |
---|
4652 | 4257 | return -ENOMEM; |
---|
4653 | 4258 | path->reada = READA_BACK; |
---|
4654 | 4259 | |
---|
4655 | | - /* |
---|
4656 | | - * We want to drop from the next block forward in case this new size is |
---|
4657 | | - * not block aligned since we will be keeping the last block of the |
---|
4658 | | - * extent just the way it is. |
---|
4659 | | - */ |
---|
4660 | | - if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || |
---|
4661 | | - root == fs_info->tree_root) |
---|
| 4260 | + if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
---|
| 4261 | + lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1, |
---|
| 4262 | + &cached_state); |
---|
| 4263 | + |
---|
| 4264 | + /* |
---|
| 4265 | + * We want to drop from the next block forward in case this |
---|
| 4266 | + * new size is not block aligned since we will be keeping the |
---|
| 4267 | + * last block of the extent just the way it is. |
---|
| 4268 | + */ |
---|
4662 | 4269 | btrfs_drop_extent_cache(BTRFS_I(inode), ALIGN(new_size, |
---|
4663 | 4270 | fs_info->sectorsize), |
---|
4664 | 4271 | (u64)-1, 0); |
---|
| 4272 | + } |
---|
4665 | 4273 | |
---|
4666 | 4274 | /* |
---|
4667 | 4275 | * This function is also used to drop the items in the log tree before |
---|
4668 | 4276 | * we relog the inode, so if root != BTRFS_I(inode)->root, it means |
---|
4669 | | - * it is used to drop the loged items. So we shouldn't kill the delayed |
---|
| 4277 | + * it is used to drop the logged items. So we shouldn't kill the delayed |
---|
4670 | 4278 | * items. |
---|
4671 | 4279 | */ |
---|
4672 | 4280 | if (min_type == 0 && root == BTRFS_I(inode)->root) |
---|
.. | .. |
---|
4688 | 4296 | goto out; |
---|
4689 | 4297 | } |
---|
4690 | 4298 | |
---|
4691 | | - path->leave_spinning = 1; |
---|
4692 | 4299 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
---|
4693 | 4300 | if (ret < 0) |
---|
4694 | 4301 | goto out; |
---|
.. | .. |
---|
4704 | 4311 | } |
---|
4705 | 4312 | |
---|
4706 | 4313 | while (1) { |
---|
| 4314 | + u64 clear_start = 0, clear_len = 0; |
---|
| 4315 | + |
---|
4707 | 4316 | fi = NULL; |
---|
4708 | 4317 | leaf = path->nodes[0]; |
---|
4709 | 4318 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
---|
.. | .. |
---|
4754 | 4363 | |
---|
4755 | 4364 | if (extent_type != BTRFS_FILE_EXTENT_INLINE) { |
---|
4756 | 4365 | u64 num_dec; |
---|
| 4366 | + |
---|
| 4367 | + clear_start = found_key.offset; |
---|
4757 | 4368 | extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); |
---|
4758 | 4369 | if (!del_item) { |
---|
4759 | 4370 | u64 orig_num_bytes = |
---|
.. | .. |
---|
4761 | 4372 | extent_num_bytes = ALIGN(new_size - |
---|
4762 | 4373 | found_key.offset, |
---|
4763 | 4374 | fs_info->sectorsize); |
---|
| 4375 | + clear_start = ALIGN(new_size, fs_info->sectorsize); |
---|
4764 | 4376 | btrfs_set_file_extent_num_bytes(leaf, fi, |
---|
4765 | 4377 | extent_num_bytes); |
---|
4766 | 4378 | num_dec = (orig_num_bytes - |
---|
4767 | 4379 | extent_num_bytes); |
---|
4768 | | - if (test_bit(BTRFS_ROOT_REF_COWS, |
---|
| 4380 | + if (test_bit(BTRFS_ROOT_SHAREABLE, |
---|
4769 | 4381 | &root->state) && |
---|
4770 | 4382 | extent_start != 0) |
---|
4771 | 4383 | inode_sub_bytes(inode, num_dec); |
---|
.. | .. |
---|
4781 | 4393 | num_dec = btrfs_file_extent_num_bytes(leaf, fi); |
---|
4782 | 4394 | if (extent_start != 0) { |
---|
4783 | 4395 | found_extent = 1; |
---|
4784 | | - if (test_bit(BTRFS_ROOT_REF_COWS, |
---|
| 4396 | + if (test_bit(BTRFS_ROOT_SHAREABLE, |
---|
4785 | 4397 | &root->state)) |
---|
4786 | 4398 | inode_sub_bytes(inode, num_dec); |
---|
4787 | 4399 | } |
---|
4788 | 4400 | } |
---|
| 4401 | + clear_len = num_dec; |
---|
4789 | 4402 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
---|
4790 | 4403 | /* |
---|
4791 | 4404 | * we can't truncate inline items that have had |
---|
.. | .. |
---|
4799 | 4412 | |
---|
4800 | 4413 | btrfs_set_file_extent_ram_bytes(leaf, fi, size); |
---|
4801 | 4414 | size = btrfs_file_extent_calc_inline_size(size); |
---|
4802 | | - btrfs_truncate_item(root->fs_info, path, size, 1); |
---|
| 4415 | + btrfs_truncate_item(path, size, 1); |
---|
4803 | 4416 | } else if (!del_item) { |
---|
4804 | 4417 | /* |
---|
4805 | 4418 | * We have to bail so the last_size is set to |
---|
.. | .. |
---|
4807 | 4420 | */ |
---|
4808 | 4421 | ret = NEED_TRUNCATE_BLOCK; |
---|
4809 | 4422 | break; |
---|
| 4423 | + } else { |
---|
| 4424 | + /* |
---|
| 4425 | + * Inline extents are special, we just treat |
---|
| 4426 | + * them as a full sector worth in the file |
---|
| 4427 | + * extent tree just for simplicity sake. |
---|
| 4428 | + */ |
---|
| 4429 | + clear_len = fs_info->sectorsize; |
---|
4810 | 4430 | } |
---|
4811 | 4431 | |
---|
4812 | | - if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
---|
| 4432 | + if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) |
---|
4813 | 4433 | inode_sub_bytes(inode, item_end + 1 - new_size); |
---|
4814 | 4434 | } |
---|
4815 | 4435 | delete: |
---|
| 4436 | + /* |
---|
| 4437 | + * We use btrfs_truncate_inode_items() to clean up log trees for |
---|
| 4438 | + * multiple fsyncs, and in this case we don't want to clear the |
---|
| 4439 | + * file extent range because it's just the log. |
---|
| 4440 | + */ |
---|
| 4441 | + if (root == BTRFS_I(inode)->root) { |
---|
| 4442 | + ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode), |
---|
| 4443 | + clear_start, clear_len); |
---|
| 4444 | + if (ret) { |
---|
| 4445 | + btrfs_abort_transaction(trans, ret); |
---|
| 4446 | + break; |
---|
| 4447 | + } |
---|
| 4448 | + } |
---|
| 4449 | + |
---|
4816 | 4450 | if (del_item) |
---|
4817 | 4451 | last_size = found_key.offset; |
---|
4818 | 4452 | else |
---|
.. | .. |
---|
4836 | 4470 | should_throttle = false; |
---|
4837 | 4471 | |
---|
4838 | 4472 | if (found_extent && |
---|
4839 | | - (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || |
---|
4840 | | - root == fs_info->tree_root)) { |
---|
4841 | | - btrfs_set_path_blocking(path); |
---|
| 4473 | + root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
---|
| 4474 | + struct btrfs_ref ref = { 0 }; |
---|
| 4475 | + |
---|
4842 | 4476 | bytes_deleted += extent_num_bytes; |
---|
4843 | | - ret = btrfs_free_extent(trans, root, extent_start, |
---|
4844 | | - extent_num_bytes, 0, |
---|
4845 | | - btrfs_header_owner(leaf), |
---|
4846 | | - ino, extent_offset); |
---|
| 4477 | + |
---|
| 4478 | + btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, |
---|
| 4479 | + extent_start, extent_num_bytes, 0); |
---|
| 4480 | + ref.real_root = root->root_key.objectid; |
---|
| 4481 | + btrfs_init_data_ref(&ref, btrfs_header_owner(leaf), |
---|
| 4482 | + ino, extent_offset); |
---|
| 4483 | + ret = btrfs_free_extent(trans, &ref); |
---|
4847 | 4484 | if (ret) { |
---|
4848 | 4485 | btrfs_abort_transaction(trans, ret); |
---|
4849 | 4486 | break; |
---|
4850 | 4487 | } |
---|
4851 | | - if (btrfs_should_throttle_delayed_refs(trans, fs_info)) |
---|
4852 | | - btrfs_async_run_delayed_refs(fs_info, |
---|
4853 | | - trans->delayed_ref_updates * 2, |
---|
4854 | | - trans->transid, 0); |
---|
4855 | 4488 | if (be_nice) { |
---|
4856 | | - if (truncate_space_check(trans, root, |
---|
4857 | | - extent_num_bytes)) { |
---|
4858 | | - should_end = true; |
---|
4859 | | - } |
---|
4860 | | - if (btrfs_should_throttle_delayed_refs(trans, |
---|
4861 | | - fs_info)) |
---|
| 4489 | + if (btrfs_should_throttle_delayed_refs(trans)) |
---|
4862 | 4490 | should_throttle = true; |
---|
4863 | 4491 | } |
---|
4864 | 4492 | } |
---|
.. | .. |
---|
4868 | 4496 | |
---|
4869 | 4497 | if (path->slots[0] == 0 || |
---|
4870 | 4498 | path->slots[0] != pending_del_slot || |
---|
4871 | | - should_throttle || should_end) { |
---|
| 4499 | + should_throttle) { |
---|
4872 | 4500 | if (pending_del_nr) { |
---|
4873 | 4501 | ret = btrfs_del_items(trans, root, path, |
---|
4874 | 4502 | pending_del_slot, |
---|
.. | .. |
---|
4880 | 4508 | pending_del_nr = 0; |
---|
4881 | 4509 | } |
---|
4882 | 4510 | btrfs_release_path(path); |
---|
4883 | | - if (should_throttle) { |
---|
4884 | | - unsigned long updates = trans->delayed_ref_updates; |
---|
4885 | | - if (updates) { |
---|
4886 | | - trans->delayed_ref_updates = 0; |
---|
4887 | | - ret = btrfs_run_delayed_refs(trans, |
---|
4888 | | - updates * 2); |
---|
4889 | | - if (ret) |
---|
4890 | | - break; |
---|
4891 | | - } |
---|
4892 | | - } |
---|
| 4511 | + |
---|
4893 | 4512 | /* |
---|
4894 | | - * if we failed to refill our space rsv, bail out |
---|
4895 | | - * and let the transaction restart |
---|
| 4513 | + * We can generate a lot of delayed refs, so we need to |
---|
| 4514 | + * throttle every once and a while and make sure we're |
---|
| 4515 | + * adding enough space to keep up with the work we are |
---|
| 4516 | + * generating. Since we hold a transaction here we |
---|
| 4517 | + * can't flush, and we don't want to FLUSH_LIMIT because |
---|
| 4518 | + * we could have generated too many delayed refs to |
---|
| 4519 | + * actually allocate, so just bail if we're short and |
---|
| 4520 | + * let the normal reservation dance happen higher up. |
---|
4896 | 4521 | */ |
---|
4897 | | - if (should_end) { |
---|
4898 | | - ret = -EAGAIN; |
---|
4899 | | - break; |
---|
| 4522 | + if (should_throttle) { |
---|
| 4523 | + ret = btrfs_delayed_refs_rsv_refill(fs_info, |
---|
| 4524 | + BTRFS_RESERVE_NO_FLUSH); |
---|
| 4525 | + if (ret) { |
---|
| 4526 | + ret = -EAGAIN; |
---|
| 4527 | + break; |
---|
| 4528 | + } |
---|
4900 | 4529 | } |
---|
4901 | 4530 | goto search_again; |
---|
4902 | 4531 | } else { |
---|
.. | .. |
---|
4918 | 4547 | ASSERT(last_size >= new_size); |
---|
4919 | 4548 | if (!ret && last_size > new_size) |
---|
4920 | 4549 | last_size = new_size; |
---|
4921 | | - btrfs_ordered_update_i_size(inode, last_size, NULL); |
---|
| 4550 | + btrfs_inode_safe_disk_i_size_write(inode, last_size); |
---|
| 4551 | + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, |
---|
| 4552 | + (u64)-1, &cached_state); |
---|
4922 | 4553 | } |
---|
4923 | 4554 | |
---|
4924 | 4555 | btrfs_free_path(path); |
---|
4925 | | - |
---|
4926 | | - if (be_nice && bytes_deleted > SZ_32M && (ret >= 0 || ret == -EAGAIN)) { |
---|
4927 | | - unsigned long updates = trans->delayed_ref_updates; |
---|
4928 | | - int err; |
---|
4929 | | - |
---|
4930 | | - if (updates) { |
---|
4931 | | - trans->delayed_ref_updates = 0; |
---|
4932 | | - err = btrfs_run_delayed_refs(trans, updates * 2); |
---|
4933 | | - if (err) |
---|
4934 | | - ret = err; |
---|
4935 | | - } |
---|
4936 | | - } |
---|
4937 | 4556 | return ret; |
---|
4938 | 4557 | } |
---|
4939 | 4558 | |
---|
.. | .. |
---|
4958 | 4577 | struct extent_state *cached_state = NULL; |
---|
4959 | 4578 | struct extent_changeset *data_reserved = NULL; |
---|
4960 | 4579 | char *kaddr; |
---|
| 4580 | + bool only_release_metadata = false; |
---|
4961 | 4581 | u32 blocksize = fs_info->sectorsize; |
---|
4962 | 4582 | pgoff_t index = from >> PAGE_SHIFT; |
---|
4963 | 4583 | unsigned offset = from & (blocksize - 1); |
---|
4964 | 4584 | struct page *page; |
---|
4965 | 4585 | gfp_t mask = btrfs_alloc_write_mask(mapping); |
---|
| 4586 | + size_t write_bytes = blocksize; |
---|
4966 | 4587 | int ret = 0; |
---|
4967 | 4588 | u64 block_start; |
---|
4968 | 4589 | u64 block_end; |
---|
.. | .. |
---|
4974 | 4595 | block_start = round_down(from, blocksize); |
---|
4975 | 4596 | block_end = block_start + blocksize - 1; |
---|
4976 | 4597 | |
---|
4977 | | - ret = btrfs_delalloc_reserve_space(inode, &data_reserved, |
---|
4978 | | - block_start, blocksize); |
---|
4979 | | - if (ret) |
---|
| 4598 | + ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, |
---|
| 4599 | + block_start, blocksize); |
---|
| 4600 | + if (ret < 0) { |
---|
| 4601 | + if (btrfs_check_nocow_lock(BTRFS_I(inode), block_start, |
---|
| 4602 | + &write_bytes) > 0) { |
---|
| 4603 | + /* For nocow case, no need to reserve data space */ |
---|
| 4604 | + only_release_metadata = true; |
---|
| 4605 | + } else { |
---|
| 4606 | + goto out; |
---|
| 4607 | + } |
---|
| 4608 | + } |
---|
| 4609 | + ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), blocksize); |
---|
| 4610 | + if (ret < 0) { |
---|
| 4611 | + if (!only_release_metadata) |
---|
| 4612 | + btrfs_free_reserved_data_space(BTRFS_I(inode), |
---|
| 4613 | + data_reserved, block_start, blocksize); |
---|
4980 | 4614 | goto out; |
---|
4981 | | - |
---|
| 4615 | + } |
---|
4982 | 4616 | again: |
---|
4983 | 4617 | page = find_or_create_page(mapping, index, mask); |
---|
4984 | 4618 | if (!page) { |
---|
4985 | | - btrfs_delalloc_release_space(inode, data_reserved, |
---|
| 4619 | + btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, |
---|
4986 | 4620 | block_start, blocksize, true); |
---|
4987 | 4621 | btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize); |
---|
4988 | 4622 | ret = -ENOMEM; |
---|
.. | .. |
---|
5007 | 4641 | lock_extent_bits(io_tree, block_start, block_end, &cached_state); |
---|
5008 | 4642 | set_page_extent_mapped(page); |
---|
5009 | 4643 | |
---|
5010 | | - ordered = btrfs_lookup_ordered_extent(inode, block_start); |
---|
| 4644 | + ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), block_start); |
---|
5011 | 4645 | if (ordered) { |
---|
5012 | 4646 | unlock_extent_cached(io_tree, block_start, block_end, |
---|
5013 | 4647 | &cached_state); |
---|
5014 | 4648 | unlock_page(page); |
---|
5015 | 4649 | put_page(page); |
---|
5016 | | - btrfs_start_ordered_extent(inode, ordered, 1); |
---|
| 4650 | + btrfs_start_ordered_extent(ordered, 1); |
---|
5017 | 4651 | btrfs_put_ordered_extent(ordered); |
---|
5018 | 4652 | goto again; |
---|
5019 | 4653 | } |
---|
5020 | 4654 | |
---|
5021 | 4655 | clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end, |
---|
5022 | | - EXTENT_DIRTY | EXTENT_DELALLOC | |
---|
5023 | | - EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, |
---|
5024 | | - 0, 0, &cached_state); |
---|
| 4656 | + EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, |
---|
| 4657 | + 0, 0, &cached_state); |
---|
5025 | 4658 | |
---|
5026 | | - ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0, |
---|
5027 | | - &cached_state, 0); |
---|
| 4659 | + ret = btrfs_set_extent_delalloc(BTRFS_I(inode), block_start, block_end, 0, |
---|
| 4660 | + &cached_state); |
---|
5028 | 4661 | if (ret) { |
---|
5029 | 4662 | unlock_extent_cached(io_tree, block_start, block_end, |
---|
5030 | 4663 | &cached_state); |
---|
.. | .. |
---|
5048 | 4681 | set_page_dirty(page); |
---|
5049 | 4682 | unlock_extent_cached(io_tree, block_start, block_end, &cached_state); |
---|
5050 | 4683 | |
---|
| 4684 | + if (only_release_metadata) |
---|
| 4685 | + set_extent_bit(&BTRFS_I(inode)->io_tree, block_start, |
---|
| 4686 | + block_end, EXTENT_NORESERVE, NULL, NULL, |
---|
| 4687 | + GFP_NOFS); |
---|
| 4688 | + |
---|
5051 | 4689 | out_unlock: |
---|
5052 | | - if (ret) |
---|
5053 | | - btrfs_delalloc_release_space(inode, data_reserved, block_start, |
---|
5054 | | - blocksize, true); |
---|
| 4690 | + if (ret) { |
---|
| 4691 | + if (only_release_metadata) |
---|
| 4692 | + btrfs_delalloc_release_metadata(BTRFS_I(inode), |
---|
| 4693 | + blocksize, true); |
---|
| 4694 | + else |
---|
| 4695 | + btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, |
---|
| 4696 | + block_start, blocksize, true); |
---|
| 4697 | + } |
---|
5055 | 4698 | btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize); |
---|
5056 | 4699 | unlock_page(page); |
---|
5057 | 4700 | put_page(page); |
---|
5058 | 4701 | out: |
---|
| 4702 | + if (only_release_metadata) |
---|
| 4703 | + btrfs_check_nocow_unlock(BTRFS_I(inode)); |
---|
5059 | 4704 | extent_changeset_free(data_reserved); |
---|
5060 | 4705 | return ret; |
---|
5061 | 4706 | } |
---|
.. | .. |
---|
5137 | 4782 | if (size <= hole_start) |
---|
5138 | 4783 | return 0; |
---|
5139 | 4784 | |
---|
5140 | | - while (1) { |
---|
5141 | | - struct btrfs_ordered_extent *ordered; |
---|
5142 | | - |
---|
5143 | | - lock_extent_bits(io_tree, hole_start, block_end - 1, |
---|
5144 | | - &cached_state); |
---|
5145 | | - ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), hole_start, |
---|
5146 | | - block_end - hole_start); |
---|
5147 | | - if (!ordered) |
---|
5148 | | - break; |
---|
5149 | | - unlock_extent_cached(io_tree, hole_start, block_end - 1, |
---|
5150 | | - &cached_state); |
---|
5151 | | - btrfs_start_ordered_extent(inode, ordered, 1); |
---|
5152 | | - btrfs_put_ordered_extent(ordered); |
---|
5153 | | - } |
---|
5154 | | - |
---|
| 4785 | + btrfs_lock_and_flush_ordered_range(BTRFS_I(inode), hole_start, |
---|
| 4786 | + block_end - 1, &cached_state); |
---|
5155 | 4787 | cur_offset = hole_start; |
---|
5156 | 4788 | while (1) { |
---|
5157 | 4789 | em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset, |
---|
5158 | | - block_end - cur_offset, 0); |
---|
| 4790 | + block_end - cur_offset); |
---|
5159 | 4791 | if (IS_ERR(em)) { |
---|
5160 | 4792 | err = PTR_ERR(em); |
---|
5161 | 4793 | em = NULL; |
---|
.. | .. |
---|
5163 | 4795 | } |
---|
5164 | 4796 | last_byte = min(extent_map_end(em), block_end); |
---|
5165 | 4797 | last_byte = ALIGN(last_byte, fs_info->sectorsize); |
---|
| 4798 | + hole_size = last_byte - cur_offset; |
---|
| 4799 | + |
---|
5166 | 4800 | if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { |
---|
5167 | 4801 | struct extent_map *hole_em; |
---|
5168 | | - hole_size = last_byte - cur_offset; |
---|
5169 | 4802 | |
---|
5170 | 4803 | err = maybe_insert_hole(root, inode, cur_offset, |
---|
5171 | 4804 | hole_size); |
---|
5172 | 4805 | if (err) |
---|
5173 | 4806 | break; |
---|
| 4807 | + |
---|
| 4808 | + err = btrfs_inode_set_file_extent_range(BTRFS_I(inode), |
---|
| 4809 | + cur_offset, hole_size); |
---|
| 4810 | + if (err) |
---|
| 4811 | + break; |
---|
| 4812 | + |
---|
5174 | 4813 | btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset, |
---|
5175 | 4814 | cur_offset + hole_size - 1, 0); |
---|
5176 | 4815 | hole_em = alloc_extent_map(); |
---|
.. | .. |
---|
5187 | 4826 | hole_em->block_len = 0; |
---|
5188 | 4827 | hole_em->orig_block_len = 0; |
---|
5189 | 4828 | hole_em->ram_bytes = hole_size; |
---|
5190 | | - hole_em->bdev = fs_info->fs_devices->latest_bdev; |
---|
5191 | 4829 | hole_em->compress_type = BTRFS_COMPRESS_NONE; |
---|
5192 | 4830 | hole_em->generation = fs_info->generation; |
---|
5193 | 4831 | |
---|
.. | .. |
---|
5203 | 4841 | hole_size - 1, 0); |
---|
5204 | 4842 | } |
---|
5205 | 4843 | free_extent_map(hole_em); |
---|
| 4844 | + } else { |
---|
| 4845 | + err = btrfs_inode_set_file_extent_range(BTRFS_I(inode), |
---|
| 4846 | + cur_offset, hole_size); |
---|
| 4847 | + if (err) |
---|
| 4848 | + break; |
---|
5206 | 4849 | } |
---|
5207 | 4850 | next: |
---|
5208 | 4851 | free_extent_map(em); |
---|
.. | .. |
---|
5246 | 4889 | * truncation, it must capture all writes that happened before |
---|
5247 | 4890 | * this truncation. |
---|
5248 | 4891 | */ |
---|
5249 | | - btrfs_wait_for_snapshot_creation(root); |
---|
| 4892 | + btrfs_drew_write_lock(&root->snapshot_lock); |
---|
5250 | 4893 | ret = btrfs_cont_expand(inode, oldsize, newsize); |
---|
5251 | 4894 | if (ret) { |
---|
5252 | | - btrfs_end_write_no_snapshotting(root); |
---|
| 4895 | + btrfs_drew_write_unlock(&root->snapshot_lock); |
---|
5253 | 4896 | return ret; |
---|
5254 | 4897 | } |
---|
5255 | 4898 | |
---|
5256 | 4899 | trans = btrfs_start_transaction(root, 1); |
---|
5257 | 4900 | if (IS_ERR(trans)) { |
---|
5258 | | - btrfs_end_write_no_snapshotting(root); |
---|
| 4901 | + btrfs_drew_write_unlock(&root->snapshot_lock); |
---|
5259 | 4902 | return PTR_ERR(trans); |
---|
5260 | 4903 | } |
---|
5261 | 4904 | |
---|
5262 | 4905 | i_size_write(inode, newsize); |
---|
5263 | | - btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); |
---|
| 4906 | + btrfs_inode_safe_disk_i_size_write(inode, 0); |
---|
5264 | 4907 | pagecache_isize_extended(inode, oldsize, newsize); |
---|
5265 | 4908 | ret = btrfs_update_inode(trans, root, inode); |
---|
5266 | | - btrfs_end_write_no_snapshotting(root); |
---|
| 4909 | + btrfs_drew_write_unlock(&root->snapshot_lock); |
---|
5267 | 4910 | btrfs_end_transaction(trans); |
---|
5268 | 4911 | } else { |
---|
5269 | 4912 | |
---|
5270 | 4913 | /* |
---|
5271 | 4914 | * We're truncating a file that used to have good data down to |
---|
5272 | | - * zero. Make sure it gets into the ordered flush list so that |
---|
5273 | | - * any new writes get down to disk quickly. |
---|
| 4915 | + * zero. Make sure any new writes to the file get on disk |
---|
| 4916 | + * on close. |
---|
5274 | 4917 | */ |
---|
5275 | 4918 | if (newsize == 0) |
---|
5276 | | - set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, |
---|
| 4919 | + set_bit(BTRFS_INODE_FLUSH_ON_CLOSE, |
---|
5277 | 4920 | &BTRFS_I(inode)->runtime_flags); |
---|
5278 | 4921 | |
---|
5279 | 4922 | truncate_setsize(inode, newsize); |
---|
5280 | 4923 | |
---|
5281 | | - /* Disable nonlocked read DIO to avoid the end less truncate */ |
---|
5282 | | - btrfs_inode_block_unlocked_dio(BTRFS_I(inode)); |
---|
5283 | 4924 | inode_dio_wait(inode); |
---|
5284 | | - btrfs_inode_resume_unlocked_dio(BTRFS_I(inode)); |
---|
5285 | 4925 | |
---|
5286 | 4926 | ret = btrfs_truncate(inode, newsize == oldsize); |
---|
5287 | 4927 | if (ret && inode->i_nlink) { |
---|
.. | .. |
---|
5356 | 4996 | truncate_inode_pages_final(&inode->i_data); |
---|
5357 | 4997 | |
---|
5358 | 4998 | write_lock(&map_tree->lock); |
---|
5359 | | - while (!RB_EMPTY_ROOT(&map_tree->map)) { |
---|
| 4999 | + while (!RB_EMPTY_ROOT(&map_tree->map.rb_root)) { |
---|
5360 | 5000 | struct extent_map *em; |
---|
5361 | 5001 | |
---|
5362 | | - node = rb_first(&map_tree->map); |
---|
| 5002 | + node = rb_first_cached(&map_tree->map); |
---|
5363 | 5003 | em = rb_entry(node, struct extent_map, rb_node); |
---|
5364 | 5004 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
---|
5365 | 5005 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); |
---|
.. | .. |
---|
5375 | 5015 | |
---|
5376 | 5016 | /* |
---|
5377 | 5017 | * Keep looping until we have no more ranges in the io tree. |
---|
5378 | | - * We can have ongoing bios started by readpages (called from readahead) |
---|
5379 | | - * that have their endio callback (extent_io.c:end_bio_extent_readpage) |
---|
| 5018 | + * We can have ongoing bios started by readahead that have |
---|
| 5019 | + * their endio callback (extent_io.c:end_bio_extent_readpage) |
---|
5380 | 5020 | * still in progress (unlocked the pages in the bio but did not yet |
---|
5381 | 5021 | * unlocked the ranges in the io tree). Therefore this means some |
---|
5382 | 5022 | * ranges can still be locked and eviction started because before |
---|
.. | .. |
---|
5415 | 5055 | * Note, end is the bytenr of last byte, so we need + 1 here. |
---|
5416 | 5056 | */ |
---|
5417 | 5057 | if (state_flags & EXTENT_DELALLOC) |
---|
5418 | | - btrfs_qgroup_free_data(inode, NULL, start, end - start + 1); |
---|
| 5058 | + btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start, |
---|
| 5059 | + end - start + 1); |
---|
5419 | 5060 | |
---|
5420 | 5061 | clear_extent_bit(io_tree, start, end, |
---|
5421 | | - EXTENT_LOCKED | EXTENT_DIRTY | |
---|
5422 | | - EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | |
---|
5423 | | - EXTENT_DEFRAG, 1, 1, &cached_state); |
---|
| 5062 | + EXTENT_LOCKED | EXTENT_DELALLOC | |
---|
| 5063 | + EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1, |
---|
| 5064 | + &cached_state); |
---|
5424 | 5065 | |
---|
5425 | 5066 | cond_resched(); |
---|
5426 | 5067 | spin_lock(&io_tree->lock); |
---|
.. | .. |
---|
5429 | 5070 | } |
---|
5430 | 5071 | |
---|
5431 | 5072 | static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root, |
---|
5432 | | - struct btrfs_block_rsv *rsv, |
---|
5433 | | - u64 min_size) |
---|
| 5073 | + struct btrfs_block_rsv *rsv) |
---|
5434 | 5074 | { |
---|
5435 | 5075 | struct btrfs_fs_info *fs_info = root->fs_info; |
---|
5436 | 5076 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; |
---|
5437 | | - int failures = 0; |
---|
| 5077 | + struct btrfs_trans_handle *trans; |
---|
| 5078 | + u64 delayed_refs_extra = btrfs_calc_insert_metadata_size(fs_info, 1); |
---|
| 5079 | + int ret; |
---|
5438 | 5080 | |
---|
5439 | | - for (;;) { |
---|
5440 | | - struct btrfs_trans_handle *trans; |
---|
5441 | | - int ret; |
---|
5442 | | - |
---|
5443 | | - ret = btrfs_block_rsv_refill(root, rsv, min_size, |
---|
5444 | | - BTRFS_RESERVE_FLUSH_LIMIT); |
---|
5445 | | - |
---|
5446 | | - if (ret && ++failures > 2) { |
---|
5447 | | - btrfs_warn(fs_info, |
---|
5448 | | - "could not allocate space for a delete; will truncate on mount"); |
---|
5449 | | - return ERR_PTR(-ENOSPC); |
---|
5450 | | - } |
---|
5451 | | - |
---|
5452 | | - trans = btrfs_join_transaction(root); |
---|
5453 | | - if (IS_ERR(trans) || !ret) |
---|
5454 | | - return trans; |
---|
5455 | | - |
---|
| 5081 | + /* |
---|
| 5082 | + * Eviction should be taking place at some place safe because of our |
---|
| 5083 | + * delayed iputs. However the normal flushing code will run delayed |
---|
| 5084 | + * iputs, so we cannot use FLUSH_ALL otherwise we'll deadlock. |
---|
| 5085 | + * |
---|
| 5086 | + * We reserve the delayed_refs_extra here again because we can't use |
---|
| 5087 | + * btrfs_start_transaction(root, 0) for the same deadlocky reason as |
---|
| 5088 | + * above. We reserve our extra bit here because we generate a ton of |
---|
| 5089 | + * delayed refs activity by truncating. |
---|
| 5090 | + * |
---|
| 5091 | + * If we cannot make our reservation we'll attempt to steal from the |
---|
| 5092 | + * global reserve, because we really want to be able to free up space. |
---|
| 5093 | + */ |
---|
| 5094 | + ret = btrfs_block_rsv_refill(root, rsv, rsv->size + delayed_refs_extra, |
---|
| 5095 | + BTRFS_RESERVE_FLUSH_EVICT); |
---|
| 5096 | + if (ret) { |
---|
5456 | 5097 | /* |
---|
5457 | 5098 | * Try to steal from the global reserve if there is space for |
---|
5458 | 5099 | * it. |
---|
5459 | 5100 | */ |
---|
5460 | | - if (!btrfs_check_space_for_delayed_refs(trans, fs_info) && |
---|
5461 | | - !btrfs_block_rsv_migrate(global_rsv, rsv, min_size, 0)) |
---|
5462 | | - return trans; |
---|
5463 | | - |
---|
5464 | | - /* If not, commit and try again. */ |
---|
5465 | | - ret = btrfs_commit_transaction(trans); |
---|
5466 | | - if (ret) |
---|
5467 | | - return ERR_PTR(ret); |
---|
| 5101 | + if (btrfs_check_space_for_delayed_refs(fs_info) || |
---|
| 5102 | + btrfs_block_rsv_migrate(global_rsv, rsv, rsv->size, 0)) { |
---|
| 5103 | + btrfs_warn(fs_info, |
---|
| 5104 | + "could not allocate space for delete; will truncate on mount"); |
---|
| 5105 | + return ERR_PTR(-ENOSPC); |
---|
| 5106 | + } |
---|
| 5107 | + delayed_refs_extra = 0; |
---|
5468 | 5108 | } |
---|
| 5109 | + |
---|
| 5110 | + trans = btrfs_join_transaction(root); |
---|
| 5111 | + if (IS_ERR(trans)) |
---|
| 5112 | + return trans; |
---|
| 5113 | + |
---|
| 5114 | + if (delayed_refs_extra) { |
---|
| 5115 | + trans->block_rsv = &fs_info->trans_block_rsv; |
---|
| 5116 | + trans->bytes_reserved = delayed_refs_extra; |
---|
| 5117 | + btrfs_block_rsv_migrate(rsv, trans->block_rsv, |
---|
| 5118 | + delayed_refs_extra, 1); |
---|
| 5119 | + } |
---|
| 5120 | + return trans; |
---|
5469 | 5121 | } |
---|
5470 | 5122 | |
---|
5471 | 5123 | void btrfs_evict_inode(struct inode *inode) |
---|
.. | .. |
---|
5474 | 5126 | struct btrfs_trans_handle *trans; |
---|
5475 | 5127 | struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
5476 | 5128 | struct btrfs_block_rsv *rsv; |
---|
5477 | | - u64 min_size; |
---|
5478 | 5129 | int ret; |
---|
5479 | 5130 | |
---|
5480 | 5131 | trace_btrfs_inode_evict(inode); |
---|
.. | .. |
---|
5483 | 5134 | clear_inode(inode); |
---|
5484 | 5135 | return; |
---|
5485 | 5136 | } |
---|
5486 | | - |
---|
5487 | | - min_size = btrfs_calc_trunc_metadata_size(fs_info, 1); |
---|
5488 | 5137 | |
---|
5489 | 5138 | evict_inode_truncate_pages(inode); |
---|
5490 | 5139 | |
---|
.. | .. |
---|
5496 | 5145 | |
---|
5497 | 5146 | if (is_bad_inode(inode)) |
---|
5498 | 5147 | goto no_delete; |
---|
5499 | | - /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ |
---|
5500 | | - if (!special_file(inode->i_mode)) |
---|
5501 | | - btrfs_wait_ordered_range(inode, 0, (u64)-1); |
---|
5502 | 5148 | |
---|
5503 | 5149 | btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1); |
---|
5504 | 5150 | |
---|
.. | .. |
---|
5518 | 5164 | rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP); |
---|
5519 | 5165 | if (!rsv) |
---|
5520 | 5166 | goto no_delete; |
---|
5521 | | - rsv->size = min_size; |
---|
| 5167 | + rsv->size = btrfs_calc_metadata_size(fs_info, 1); |
---|
5522 | 5168 | rsv->failfast = 1; |
---|
5523 | 5169 | |
---|
5524 | 5170 | btrfs_i_size_write(BTRFS_I(inode), 0); |
---|
5525 | 5171 | |
---|
5526 | 5172 | while (1) { |
---|
5527 | | - trans = evict_refill_and_join(root, rsv, min_size); |
---|
| 5173 | + trans = evict_refill_and_join(root, rsv); |
---|
5528 | 5174 | if (IS_ERR(trans)) |
---|
5529 | 5175 | goto free_rsv; |
---|
5530 | 5176 | |
---|
.. | .. |
---|
5549 | 5195 | * If it turns out that we are dropping too many of these, we might want |
---|
5550 | 5196 | * to add a mechanism for retrying these after a commit. |
---|
5551 | 5197 | */ |
---|
5552 | | - trans = evict_refill_and_join(root, rsv, min_size); |
---|
| 5198 | + trans = evict_refill_and_join(root, rsv); |
---|
5553 | 5199 | if (!IS_ERR(trans)) { |
---|
5554 | 5200 | trans->block_rsv = rsv; |
---|
5555 | 5201 | btrfs_orphan_del(trans, BTRFS_I(inode)); |
---|
.. | .. |
---|
5596 | 5242 | |
---|
5597 | 5243 | di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)), |
---|
5598 | 5244 | name, namelen, 0); |
---|
5599 | | - if (!di) { |
---|
5600 | | - ret = -ENOENT; |
---|
5601 | | - goto out; |
---|
5602 | | - } |
---|
5603 | | - if (IS_ERR(di)) { |
---|
5604 | | - ret = PTR_ERR(di); |
---|
| 5245 | + if (IS_ERR_OR_NULL(di)) { |
---|
| 5246 | + ret = di ? PTR_ERR(di) : -ENOENT; |
---|
5605 | 5247 | goto out; |
---|
5606 | 5248 | } |
---|
5607 | 5249 | |
---|
.. | .. |
---|
5672 | 5314 | |
---|
5673 | 5315 | btrfs_release_path(path); |
---|
5674 | 5316 | |
---|
5675 | | - new_root = btrfs_read_fs_root_no_name(fs_info, location); |
---|
| 5317 | + new_root = btrfs_get_fs_root(fs_info, location->objectid, true); |
---|
5676 | 5318 | if (IS_ERR(new_root)) { |
---|
5677 | 5319 | err = PTR_ERR(new_root); |
---|
5678 | 5320 | goto out; |
---|
.. | .. |
---|
5724 | 5366 | spin_unlock(&root->inode_lock); |
---|
5725 | 5367 | } |
---|
5726 | 5368 | |
---|
5727 | | -static void inode_tree_del(struct inode *inode) |
---|
| 5369 | +static void inode_tree_del(struct btrfs_inode *inode) |
---|
5728 | 5370 | { |
---|
5729 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
| 5371 | + struct btrfs_root *root = inode->root; |
---|
5730 | 5372 | int empty = 0; |
---|
5731 | 5373 | |
---|
5732 | 5374 | spin_lock(&root->inode_lock); |
---|
5733 | | - if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { |
---|
5734 | | - rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); |
---|
5735 | | - RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); |
---|
| 5375 | + if (!RB_EMPTY_NODE(&inode->rb_node)) { |
---|
| 5376 | + rb_erase(&inode->rb_node, &root->inode_tree); |
---|
| 5377 | + RB_CLEAR_NODE(&inode->rb_node); |
---|
5736 | 5378 | empty = RB_EMPTY_ROOT(&root->inode_tree); |
---|
5737 | 5379 | } |
---|
5738 | 5380 | spin_unlock(&root->inode_lock); |
---|
.. | .. |
---|
5750 | 5392 | static int btrfs_init_locked_inode(struct inode *inode, void *p) |
---|
5751 | 5393 | { |
---|
5752 | 5394 | struct btrfs_iget_args *args = p; |
---|
5753 | | - inode->i_ino = args->location->objectid; |
---|
5754 | | - memcpy(&BTRFS_I(inode)->location, args->location, |
---|
5755 | | - sizeof(*args->location)); |
---|
5756 | | - BTRFS_I(inode)->root = args->root; |
---|
| 5395 | + |
---|
| 5396 | + inode->i_ino = args->ino; |
---|
| 5397 | + BTRFS_I(inode)->location.objectid = args->ino; |
---|
| 5398 | + BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; |
---|
| 5399 | + BTRFS_I(inode)->location.offset = 0; |
---|
| 5400 | + BTRFS_I(inode)->root = btrfs_grab_root(args->root); |
---|
| 5401 | + BUG_ON(args->root && !BTRFS_I(inode)->root); |
---|
5757 | 5402 | return 0; |
---|
5758 | 5403 | } |
---|
5759 | 5404 | |
---|
5760 | 5405 | static int btrfs_find_actor(struct inode *inode, void *opaque) |
---|
5761 | 5406 | { |
---|
5762 | 5407 | struct btrfs_iget_args *args = opaque; |
---|
5763 | | - return args->location->objectid == BTRFS_I(inode)->location.objectid && |
---|
| 5408 | + |
---|
| 5409 | + return args->ino == BTRFS_I(inode)->location.objectid && |
---|
5764 | 5410 | args->root == BTRFS_I(inode)->root; |
---|
5765 | 5411 | } |
---|
5766 | 5412 | |
---|
5767 | | -static struct inode *btrfs_iget_locked(struct super_block *s, |
---|
5768 | | - struct btrfs_key *location, |
---|
| 5413 | +static struct inode *btrfs_iget_locked(struct super_block *s, u64 ino, |
---|
5769 | 5414 | struct btrfs_root *root) |
---|
5770 | 5415 | { |
---|
5771 | 5416 | struct inode *inode; |
---|
5772 | 5417 | struct btrfs_iget_args args; |
---|
5773 | | - unsigned long hashval = btrfs_inode_hash(location->objectid, root); |
---|
| 5418 | + unsigned long hashval = btrfs_inode_hash(ino, root); |
---|
5774 | 5419 | |
---|
5775 | | - args.location = location; |
---|
| 5420 | + args.ino = ino; |
---|
5776 | 5421 | args.root = root; |
---|
5777 | 5422 | |
---|
5778 | 5423 | inode = iget5_locked(s, hashval, btrfs_find_actor, |
---|
.. | .. |
---|
5781 | 5426 | return inode; |
---|
5782 | 5427 | } |
---|
5783 | 5428 | |
---|
5784 | | -/* Get an inode object given its location and corresponding root. |
---|
5785 | | - * Returns in *is_new if the inode was read from disk |
---|
| 5429 | +/* |
---|
| 5430 | + * Get an inode object given its inode number and corresponding root. |
---|
| 5431 | + * Path can be preallocated to prevent recursing back to iget through |
---|
| 5432 | + * allocator. NULL is also valid but may require an additional allocation |
---|
| 5433 | + * later. |
---|
5786 | 5434 | */ |
---|
5787 | | -struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location, |
---|
5788 | | - struct btrfs_root *root, int *new, |
---|
5789 | | - struct btrfs_path *path) |
---|
| 5435 | +struct inode *btrfs_iget_path(struct super_block *s, u64 ino, |
---|
| 5436 | + struct btrfs_root *root, struct btrfs_path *path) |
---|
5790 | 5437 | { |
---|
5791 | 5438 | struct inode *inode; |
---|
5792 | 5439 | |
---|
5793 | | - inode = btrfs_iget_locked(s, location, root); |
---|
| 5440 | + inode = btrfs_iget_locked(s, ino, root); |
---|
5794 | 5441 | if (!inode) |
---|
5795 | 5442 | return ERR_PTR(-ENOMEM); |
---|
5796 | 5443 | |
---|
.. | .. |
---|
5801 | 5448 | if (!ret) { |
---|
5802 | 5449 | inode_tree_add(inode); |
---|
5803 | 5450 | unlock_new_inode(inode); |
---|
5804 | | - if (new) |
---|
5805 | | - *new = 1; |
---|
5806 | 5451 | } else { |
---|
5807 | 5452 | iget_failed(inode); |
---|
5808 | 5453 | /* |
---|
.. | .. |
---|
5819 | 5464 | return inode; |
---|
5820 | 5465 | } |
---|
5821 | 5466 | |
---|
5822 | | -struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, |
---|
5823 | | - struct btrfs_root *root, int *new) |
---|
| 5467 | +struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root) |
---|
5824 | 5468 | { |
---|
5825 | | - return btrfs_iget_path(s, location, root, new, NULL); |
---|
| 5469 | + return btrfs_iget_path(s, ino, root, NULL); |
---|
5826 | 5470 | } |
---|
5827 | 5471 | |
---|
5828 | 5472 | static struct inode *new_simple_dir(struct super_block *s, |
---|
.. | .. |
---|
5834 | 5478 | if (!inode) |
---|
5835 | 5479 | return ERR_PTR(-ENOMEM); |
---|
5836 | 5480 | |
---|
5837 | | - BTRFS_I(inode)->root = root; |
---|
| 5481 | + BTRFS_I(inode)->root = btrfs_grab_root(root); |
---|
5838 | 5482 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); |
---|
5839 | 5483 | set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags); |
---|
5840 | 5484 | |
---|
5841 | 5485 | inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; |
---|
5842 | | - inode->i_op = &btrfs_dir_ro_inode_operations; |
---|
| 5486 | + /* |
---|
| 5487 | + * We only need lookup, the rest is read-only and there's no inode |
---|
| 5488 | + * associated with the dentry |
---|
| 5489 | + */ |
---|
| 5490 | + inode->i_op = &simple_dir_inode_operations; |
---|
5843 | 5491 | inode->i_opflags &= ~IOP_XATTR; |
---|
5844 | 5492 | inode->i_fop = &simple_dir_operations; |
---|
5845 | 5493 | inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; |
---|
.. | .. |
---|
5853 | 5501 | |
---|
5854 | 5502 | static inline u8 btrfs_inode_type(struct inode *inode) |
---|
5855 | 5503 | { |
---|
5856 | | - return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; |
---|
| 5504 | + /* |
---|
| 5505 | + * Compile-time asserts that generic FT_* types still match |
---|
| 5506 | + * BTRFS_FT_* types |
---|
| 5507 | + */ |
---|
| 5508 | + BUILD_BUG_ON(BTRFS_FT_UNKNOWN != FT_UNKNOWN); |
---|
| 5509 | + BUILD_BUG_ON(BTRFS_FT_REG_FILE != FT_REG_FILE); |
---|
| 5510 | + BUILD_BUG_ON(BTRFS_FT_DIR != FT_DIR); |
---|
| 5511 | + BUILD_BUG_ON(BTRFS_FT_CHRDEV != FT_CHRDEV); |
---|
| 5512 | + BUILD_BUG_ON(BTRFS_FT_BLKDEV != FT_BLKDEV); |
---|
| 5513 | + BUILD_BUG_ON(BTRFS_FT_FIFO != FT_FIFO); |
---|
| 5514 | + BUILD_BUG_ON(BTRFS_FT_SOCK != FT_SOCK); |
---|
| 5515 | + BUILD_BUG_ON(BTRFS_FT_SYMLINK != FT_SYMLINK); |
---|
| 5516 | + |
---|
| 5517 | + return fs_umode_to_ftype(inode->i_mode); |
---|
5857 | 5518 | } |
---|
5858 | 5519 | |
---|
5859 | 5520 | struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) |
---|
.. | .. |
---|
5864 | 5525 | struct btrfs_root *sub_root = root; |
---|
5865 | 5526 | struct btrfs_key location; |
---|
5866 | 5527 | u8 di_type = 0; |
---|
5867 | | - int index; |
---|
5868 | 5528 | int ret = 0; |
---|
5869 | 5529 | |
---|
5870 | 5530 | if (dentry->d_name.len > BTRFS_NAME_LEN) |
---|
.. | .. |
---|
5875 | 5535 | return ERR_PTR(ret); |
---|
5876 | 5536 | |
---|
5877 | 5537 | if (location.type == BTRFS_INODE_ITEM_KEY) { |
---|
5878 | | - inode = btrfs_iget(dir->i_sb, &location, root, NULL); |
---|
| 5538 | + inode = btrfs_iget(dir->i_sb, location.objectid, root); |
---|
5879 | 5539 | if (IS_ERR(inode)) |
---|
5880 | 5540 | return inode; |
---|
5881 | 5541 | |
---|
.. | .. |
---|
5891 | 5551 | return inode; |
---|
5892 | 5552 | } |
---|
5893 | 5553 | |
---|
5894 | | - index = srcu_read_lock(&fs_info->subvol_srcu); |
---|
5895 | 5554 | ret = fixup_tree_root_location(fs_info, dir, dentry, |
---|
5896 | 5555 | &location, &sub_root); |
---|
5897 | 5556 | if (ret < 0) { |
---|
.. | .. |
---|
5900 | 5559 | else |
---|
5901 | 5560 | inode = new_simple_dir(dir->i_sb, &location, sub_root); |
---|
5902 | 5561 | } else { |
---|
5903 | | - inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL); |
---|
| 5562 | + inode = btrfs_iget(dir->i_sb, location.objectid, sub_root); |
---|
5904 | 5563 | } |
---|
5905 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
---|
| 5564 | + if (root != sub_root) |
---|
| 5565 | + btrfs_put_root(sub_root); |
---|
5906 | 5566 | |
---|
5907 | 5567 | if (!IS_ERR(inode) && root != sub_root) { |
---|
5908 | 5568 | down_read(&fs_info->cleanup_work_sem); |
---|
.. | .. |
---|
5940 | 5600 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, |
---|
5941 | 5601 | unsigned int flags) |
---|
5942 | 5602 | { |
---|
5943 | | - struct inode *inode; |
---|
| 5603 | + struct inode *inode = btrfs_lookup_dentry(dir, dentry); |
---|
5944 | 5604 | |
---|
5945 | | - inode = btrfs_lookup_dentry(dir, dentry); |
---|
5946 | | - if (IS_ERR(inode)) { |
---|
5947 | | - if (PTR_ERR(inode) == -ENOENT) |
---|
5948 | | - inode = NULL; |
---|
5949 | | - else |
---|
5950 | | - return ERR_CAST(inode); |
---|
5951 | | - } |
---|
5952 | | - |
---|
| 5605 | + if (inode == ERR_PTR(-ENOENT)) |
---|
| 5606 | + inode = NULL; |
---|
5953 | 5607 | return d_splice_alias(inode, dentry); |
---|
5954 | 5608 | } |
---|
5955 | | - |
---|
5956 | | -unsigned char btrfs_filetype_table[] = { |
---|
5957 | | - DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK |
---|
5958 | | -}; |
---|
5959 | 5609 | |
---|
5960 | 5610 | /* |
---|
5961 | 5611 | * All this infrastructure exists because dir_emit can fault, and we are holding |
---|
.. | .. |
---|
6095 | 5745 | name_ptr = (char *)(entry + 1); |
---|
6096 | 5746 | read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1), |
---|
6097 | 5747 | name_len); |
---|
6098 | | - put_unaligned(btrfs_filetype_table[btrfs_dir_type(leaf, di)], |
---|
| 5748 | + put_unaligned(fs_ftype_to_dtype(btrfs_dir_type(leaf, di)), |
---|
6099 | 5749 | &entry->type); |
---|
6100 | 5750 | btrfs_dir_item_key_to_cpu(leaf, di, &location); |
---|
6101 | 5751 | put_unaligned(location.objectid, &entry->ino); |
---|
.. | .. |
---|
6167 | 5817 | return PTR_ERR(trans); |
---|
6168 | 5818 | |
---|
6169 | 5819 | ret = btrfs_update_inode(trans, root, inode); |
---|
6170 | | - if (ret && ret == -ENOSPC) { |
---|
| 5820 | + if (ret && (ret == -ENOSPC || ret == -EDQUOT)) { |
---|
6171 | 5821 | /* whoops, lets try again with the full transaction */ |
---|
6172 | 5822 | btrfs_end_transaction(trans); |
---|
6173 | 5823 | trans = btrfs_start_transaction(root, 1); |
---|
.. | .. |
---|
6290 | 5940 | static int btrfs_insert_inode_locked(struct inode *inode) |
---|
6291 | 5941 | { |
---|
6292 | 5942 | struct btrfs_iget_args args; |
---|
6293 | | - args.location = &BTRFS_I(inode)->location; |
---|
| 5943 | + |
---|
| 5944 | + args.ino = BTRFS_I(inode)->location.objectid; |
---|
6294 | 5945 | args.root = BTRFS_I(inode)->root; |
---|
6295 | 5946 | |
---|
6296 | 5947 | return insert_inode_locked4(inode, |
---|
.. | .. |
---|
6346 | 5997 | u32 sizes[2]; |
---|
6347 | 5998 | int nitems = name ? 2 : 1; |
---|
6348 | 5999 | unsigned long ptr; |
---|
| 6000 | + unsigned int nofs_flag; |
---|
6349 | 6001 | int ret; |
---|
6350 | 6002 | |
---|
6351 | 6003 | path = btrfs_alloc_path(); |
---|
6352 | 6004 | if (!path) |
---|
6353 | 6005 | return ERR_PTR(-ENOMEM); |
---|
6354 | 6006 | |
---|
| 6007 | + nofs_flag = memalloc_nofs_save(); |
---|
6355 | 6008 | inode = new_inode(fs_info->sb); |
---|
| 6009 | + memalloc_nofs_restore(nofs_flag); |
---|
6356 | 6010 | if (!inode) { |
---|
6357 | 6011 | btrfs_free_path(path); |
---|
6358 | 6012 | return ERR_PTR(-ENOMEM); |
---|
.. | .. |
---|
6390 | 6044 | */ |
---|
6391 | 6045 | BTRFS_I(inode)->index_cnt = 2; |
---|
6392 | 6046 | BTRFS_I(inode)->dir_index = *index; |
---|
6393 | | - BTRFS_I(inode)->root = root; |
---|
| 6047 | + BTRFS_I(inode)->root = btrfs_grab_root(root); |
---|
6394 | 6048 | BTRFS_I(inode)->generation = trans->transid; |
---|
6395 | 6049 | inode->i_generation = BTRFS_I(inode)->generation; |
---|
6396 | 6050 | |
---|
.. | .. |
---|
6477 | 6131 | inode_tree_add(inode); |
---|
6478 | 6132 | |
---|
6479 | 6133 | trace_btrfs_inode_new(inode); |
---|
6480 | | - btrfs_set_inode_last_trans(trans, inode); |
---|
| 6134 | + btrfs_set_inode_last_trans(trans, BTRFS_I(inode)); |
---|
6481 | 6135 | |
---|
6482 | 6136 | btrfs_update_root_times(trans, root); |
---|
6483 | 6137 | |
---|
.. | .. |
---|
6535 | 6189 | if (ret) |
---|
6536 | 6190 | return ret; |
---|
6537 | 6191 | |
---|
6538 | | - ret = btrfs_insert_dir_item(trans, root, name, name_len, |
---|
6539 | | - parent_inode, &key, |
---|
| 6192 | + ret = btrfs_insert_dir_item(trans, name, name_len, parent_inode, &key, |
---|
6540 | 6193 | btrfs_inode_type(&inode->vfs_inode), index); |
---|
6541 | 6194 | if (ret == -EEXIST || ret == -EOVERFLOW) |
---|
6542 | 6195 | goto fail_dir_item; |
---|
.. | .. |
---|
6620 | 6273 | if (IS_ERR(trans)) |
---|
6621 | 6274 | return PTR_ERR(trans); |
---|
6622 | 6275 | |
---|
6623 | | - err = btrfs_find_free_ino(root, &objectid); |
---|
| 6276 | + err = btrfs_find_free_objectid(root, &objectid); |
---|
6624 | 6277 | if (err) |
---|
6625 | 6278 | goto out_unlock; |
---|
6626 | 6279 | |
---|
.. | .. |
---|
6684 | 6337 | if (IS_ERR(trans)) |
---|
6685 | 6338 | return PTR_ERR(trans); |
---|
6686 | 6339 | |
---|
6687 | | - err = btrfs_find_free_ino(root, &objectid); |
---|
| 6340 | + err = btrfs_find_free_objectid(root, &objectid); |
---|
6688 | 6341 | if (err) |
---|
6689 | 6342 | goto out_unlock; |
---|
6690 | 6343 | |
---|
.. | .. |
---|
6719 | 6372 | if (err) |
---|
6720 | 6373 | goto out_unlock; |
---|
6721 | 6374 | |
---|
6722 | | - BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
---|
6723 | 6375 | d_instantiate_new(dentry, inode); |
---|
6724 | 6376 | |
---|
6725 | 6377 | out_unlock: |
---|
.. | .. |
---|
6744 | 6396 | int drop_inode = 0; |
---|
6745 | 6397 | |
---|
6746 | 6398 | /* do not allow sys_link's with other subvols of the same device */ |
---|
6747 | | - if (root->objectid != BTRFS_I(inode)->root->objectid) |
---|
| 6399 | + if (root->root_key.objectid != BTRFS_I(inode)->root->root_key.objectid) |
---|
6748 | 6400 | return -EXDEV; |
---|
6749 | 6401 | |
---|
6750 | 6402 | if (inode->i_nlink >= BTRFS_LINK_MAX) |
---|
.. | .. |
---|
6782 | 6434 | drop_inode = 1; |
---|
6783 | 6435 | } else { |
---|
6784 | 6436 | struct dentry *parent = dentry->d_parent; |
---|
6785 | | - int ret; |
---|
6786 | 6437 | |
---|
6787 | 6438 | err = btrfs_update_inode(trans, root, inode); |
---|
6788 | 6439 | if (err) |
---|
.. | .. |
---|
6796 | 6447 | if (err) |
---|
6797 | 6448 | goto fail; |
---|
6798 | 6449 | } |
---|
6799 | | - BTRFS_I(inode)->last_link_trans = trans->transid; |
---|
6800 | 6450 | d_instantiate(dentry, inode); |
---|
6801 | | - ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent, |
---|
6802 | | - true, NULL); |
---|
6803 | | - if (ret == BTRFS_NEED_TRANS_COMMIT) { |
---|
6804 | | - err = btrfs_commit_transaction(trans); |
---|
6805 | | - trans = NULL; |
---|
6806 | | - } |
---|
| 6451 | + btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent); |
---|
6807 | 6452 | } |
---|
6808 | 6453 | |
---|
6809 | 6454 | fail: |
---|
.. | .. |
---|
6824 | 6469 | struct btrfs_trans_handle *trans; |
---|
6825 | 6470 | struct btrfs_root *root = BTRFS_I(dir)->root; |
---|
6826 | 6471 | int err = 0; |
---|
6827 | | - int drop_on_err = 0; |
---|
6828 | 6472 | u64 objectid = 0; |
---|
6829 | 6473 | u64 index = 0; |
---|
6830 | 6474 | |
---|
.. | .. |
---|
6837 | 6481 | if (IS_ERR(trans)) |
---|
6838 | 6482 | return PTR_ERR(trans); |
---|
6839 | 6483 | |
---|
6840 | | - err = btrfs_find_free_ino(root, &objectid); |
---|
| 6484 | + err = btrfs_find_free_objectid(root, &objectid); |
---|
6841 | 6485 | if (err) |
---|
6842 | 6486 | goto out_fail; |
---|
6843 | 6487 | |
---|
.. | .. |
---|
6850 | 6494 | goto out_fail; |
---|
6851 | 6495 | } |
---|
6852 | 6496 | |
---|
6853 | | - drop_on_err = 1; |
---|
6854 | 6497 | /* these must be set before we unlock the inode */ |
---|
6855 | 6498 | inode->i_op = &btrfs_dir_inode_operations; |
---|
6856 | 6499 | inode->i_fop = &btrfs_dir_file_operations; |
---|
.. | .. |
---|
6871 | 6514 | goto out_fail; |
---|
6872 | 6515 | |
---|
6873 | 6516 | d_instantiate_new(dentry, inode); |
---|
6874 | | - drop_on_err = 0; |
---|
6875 | 6517 | |
---|
6876 | 6518 | out_fail: |
---|
6877 | 6519 | btrfs_end_transaction(trans); |
---|
.. | .. |
---|
6929 | 6571 | return ret; |
---|
6930 | 6572 | } |
---|
6931 | 6573 | |
---|
6932 | | -/* |
---|
6933 | | - * a bit scary, this does extent mapping from logical file offset to the disk. |
---|
6934 | | - * the ugly parts come from merging extents from the disk with the in-ram |
---|
6935 | | - * representation. This gets more complex because of the data=ordered code, |
---|
6936 | | - * where the in-ram extents might be locked pending data=ordered completion. |
---|
| 6574 | +/** |
---|
| 6575 | + * btrfs_get_extent - Lookup the first extent overlapping a range in a file. |
---|
| 6576 | + * @inode: file to search in |
---|
| 6577 | + * @page: page to read extent data into if the extent is inline |
---|
| 6578 | + * @pg_offset: offset into @page to copy to |
---|
| 6579 | + * @start: file offset |
---|
| 6580 | + * @len: length of range starting at @start |
---|
6937 | 6581 | * |
---|
6938 | | - * This also copies inline extents directly into the page. |
---|
| 6582 | + * This returns the first &struct extent_map which overlaps with the given |
---|
| 6583 | + * range, reading it from the B-tree and caching it if necessary. Note that |
---|
| 6584 | + * there may be more extents which overlap the given range after the returned |
---|
| 6585 | + * extent_map. |
---|
| 6586 | + * |
---|
| 6587 | + * If @page is not NULL and the extent is inline, this also reads the extent |
---|
| 6588 | + * data directly into the page and marks the extent up to date in the io_tree. |
---|
| 6589 | + * |
---|
| 6590 | + * Return: ERR_PTR on error, non-NULL extent_map on success. |
---|
6939 | 6591 | */ |
---|
6940 | 6592 | struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, |
---|
6941 | | - struct page *page, |
---|
6942 | | - size_t pg_offset, u64 start, u64 len, |
---|
6943 | | - int create) |
---|
| 6593 | + struct page *page, size_t pg_offset, |
---|
| 6594 | + u64 start, u64 len) |
---|
6944 | 6595 | { |
---|
6945 | 6596 | struct btrfs_fs_info *fs_info = inode->root->fs_info; |
---|
6946 | | - int ret; |
---|
6947 | | - int err = 0; |
---|
| 6597 | + int ret = 0; |
---|
6948 | 6598 | u64 extent_start = 0; |
---|
6949 | 6599 | u64 extent_end = 0; |
---|
6950 | 6600 | u64 objectid = btrfs_ino(inode); |
---|
6951 | | - u32 found_type; |
---|
| 6601 | + int extent_type = -1; |
---|
6952 | 6602 | struct btrfs_path *path = NULL; |
---|
6953 | 6603 | struct btrfs_root *root = inode->root; |
---|
6954 | 6604 | struct btrfs_file_extent_item *item; |
---|
.. | .. |
---|
6957 | 6607 | struct extent_map *em = NULL; |
---|
6958 | 6608 | struct extent_map_tree *em_tree = &inode->extent_tree; |
---|
6959 | 6609 | struct extent_io_tree *io_tree = &inode->io_tree; |
---|
6960 | | - const bool new_inline = !page || create; |
---|
6961 | 6610 | |
---|
6962 | 6611 | read_lock(&em_tree->lock); |
---|
6963 | 6612 | em = lookup_extent_mapping(em_tree, start, len); |
---|
6964 | | - if (em) |
---|
6965 | | - em->bdev = fs_info->fs_devices->latest_bdev; |
---|
6966 | 6613 | read_unlock(&em_tree->lock); |
---|
6967 | 6614 | |
---|
6968 | 6615 | if (em) { |
---|
.. | .. |
---|
6975 | 6622 | } |
---|
6976 | 6623 | em = alloc_extent_map(); |
---|
6977 | 6624 | if (!em) { |
---|
6978 | | - err = -ENOMEM; |
---|
| 6625 | + ret = -ENOMEM; |
---|
6979 | 6626 | goto out; |
---|
6980 | 6627 | } |
---|
6981 | | - em->bdev = fs_info->fs_devices->latest_bdev; |
---|
6982 | 6628 | em->start = EXTENT_MAP_HOLE; |
---|
6983 | 6629 | em->orig_start = EXTENT_MAP_HOLE; |
---|
6984 | 6630 | em->len = (u64)-1; |
---|
6985 | 6631 | em->block_len = (u64)-1; |
---|
6986 | 6632 | |
---|
| 6633 | + path = btrfs_alloc_path(); |
---|
6987 | 6634 | if (!path) { |
---|
6988 | | - path = btrfs_alloc_path(); |
---|
6989 | | - if (!path) { |
---|
6990 | | - err = -ENOMEM; |
---|
6991 | | - goto out; |
---|
6992 | | - } |
---|
6993 | | - /* |
---|
6994 | | - * Chances are we'll be called again, so go ahead and do |
---|
6995 | | - * readahead |
---|
6996 | | - */ |
---|
6997 | | - path->reada = READA_FORWARD; |
---|
6998 | | - } |
---|
6999 | | - |
---|
7000 | | - ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0); |
---|
7001 | | - if (ret < 0) { |
---|
7002 | | - err = ret; |
---|
| 6635 | + ret = -ENOMEM; |
---|
7003 | 6636 | goto out; |
---|
7004 | 6637 | } |
---|
7005 | 6638 | |
---|
7006 | | - if (ret != 0) { |
---|
| 6639 | + /* Chances are we'll be called again, so go ahead and do readahead */ |
---|
| 6640 | + path->reada = READA_FORWARD; |
---|
| 6641 | + |
---|
| 6642 | + /* |
---|
| 6643 | + * Unless we're going to uncompress the inline extent, no sleep would |
---|
| 6644 | + * happen. |
---|
| 6645 | + */ |
---|
| 6646 | + path->leave_spinning = 1; |
---|
| 6647 | + |
---|
| 6648 | + path->recurse = btrfs_is_free_space_inode(inode); |
---|
| 6649 | + |
---|
| 6650 | + ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0); |
---|
| 6651 | + if (ret < 0) { |
---|
| 6652 | + goto out; |
---|
| 6653 | + } else if (ret > 0) { |
---|
7007 | 6654 | if (path->slots[0] == 0) |
---|
7008 | 6655 | goto not_found; |
---|
7009 | 6656 | path->slots[0]--; |
---|
| 6657 | + ret = 0; |
---|
7010 | 6658 | } |
---|
7011 | 6659 | |
---|
7012 | 6660 | leaf = path->nodes[0]; |
---|
7013 | 6661 | item = btrfs_item_ptr(leaf, path->slots[0], |
---|
7014 | 6662 | struct btrfs_file_extent_item); |
---|
7015 | | - /* are we inside the extent that was found? */ |
---|
7016 | 6663 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
---|
7017 | | - found_type = found_key.type; |
---|
7018 | 6664 | if (found_key.objectid != objectid || |
---|
7019 | | - found_type != BTRFS_EXTENT_DATA_KEY) { |
---|
| 6665 | + found_key.type != BTRFS_EXTENT_DATA_KEY) { |
---|
7020 | 6666 | /* |
---|
7021 | 6667 | * If we backup past the first extent we want to move forward |
---|
7022 | 6668 | * and see if there is an extent in front of us, otherwise we'll |
---|
.. | .. |
---|
7027 | 6673 | goto next; |
---|
7028 | 6674 | } |
---|
7029 | 6675 | |
---|
7030 | | - found_type = btrfs_file_extent_type(leaf, item); |
---|
| 6676 | + extent_type = btrfs_file_extent_type(leaf, item); |
---|
7031 | 6677 | extent_start = found_key.offset; |
---|
7032 | | - if (found_type == BTRFS_FILE_EXTENT_REG || |
---|
7033 | | - found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
---|
| 6678 | + extent_end = btrfs_file_extent_end(path); |
---|
| 6679 | + if (extent_type == BTRFS_FILE_EXTENT_REG || |
---|
| 6680 | + extent_type == BTRFS_FILE_EXTENT_PREALLOC) { |
---|
7034 | 6681 | /* Only regular file could have regular/prealloc extent */ |
---|
7035 | 6682 | if (!S_ISREG(inode->vfs_inode.i_mode)) { |
---|
7036 | | - err = -EUCLEAN; |
---|
| 6683 | + ret = -EUCLEAN; |
---|
7037 | 6684 | btrfs_crit(fs_info, |
---|
7038 | 6685 | "regular/prealloc extent found for non-regular inode %llu", |
---|
7039 | 6686 | btrfs_ino(inode)); |
---|
7040 | 6687 | goto out; |
---|
7041 | 6688 | } |
---|
7042 | | - extent_end = extent_start + |
---|
7043 | | - btrfs_file_extent_num_bytes(leaf, item); |
---|
7044 | | - |
---|
7045 | 6689 | trace_btrfs_get_extent_show_fi_regular(inode, leaf, item, |
---|
7046 | 6690 | extent_start); |
---|
7047 | | - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { |
---|
7048 | | - size_t size; |
---|
7049 | | - |
---|
7050 | | - size = btrfs_file_extent_ram_bytes(leaf, item); |
---|
7051 | | - extent_end = ALIGN(extent_start + size, |
---|
7052 | | - fs_info->sectorsize); |
---|
7053 | | - |
---|
| 6691 | + } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
---|
7054 | 6692 | trace_btrfs_get_extent_show_fi_inline(inode, leaf, item, |
---|
7055 | 6693 | path->slots[0], |
---|
7056 | 6694 | extent_start); |
---|
.. | .. |
---|
7060 | 6698 | path->slots[0]++; |
---|
7061 | 6699 | if (path->slots[0] >= btrfs_header_nritems(leaf)) { |
---|
7062 | 6700 | ret = btrfs_next_leaf(root, path); |
---|
7063 | | - if (ret < 0) { |
---|
7064 | | - err = ret; |
---|
| 6701 | + if (ret < 0) |
---|
7065 | 6702 | goto out; |
---|
7066 | | - } |
---|
7067 | | - if (ret > 0) |
---|
| 6703 | + else if (ret > 0) |
---|
7068 | 6704 | goto not_found; |
---|
| 6705 | + |
---|
7069 | 6706 | leaf = path->nodes[0]; |
---|
7070 | 6707 | } |
---|
7071 | 6708 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
---|
.. | .. |
---|
7076 | 6713 | goto not_found; |
---|
7077 | 6714 | if (start > found_key.offset) |
---|
7078 | 6715 | goto next; |
---|
| 6716 | + |
---|
| 6717 | + /* New extent overlaps with existing one */ |
---|
7079 | 6718 | em->start = start; |
---|
7080 | 6719 | em->orig_start = start; |
---|
7081 | 6720 | em->len = found_key.offset - start; |
---|
7082 | | - goto not_found_em; |
---|
| 6721 | + em->block_start = EXTENT_MAP_HOLE; |
---|
| 6722 | + goto insert; |
---|
7083 | 6723 | } |
---|
7084 | 6724 | |
---|
7085 | | - btrfs_extent_item_to_extent_map(inode, path, item, |
---|
7086 | | - new_inline, em); |
---|
| 6725 | + btrfs_extent_item_to_extent_map(inode, path, item, !page, em); |
---|
7087 | 6726 | |
---|
7088 | | - if (found_type == BTRFS_FILE_EXTENT_REG || |
---|
7089 | | - found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
---|
| 6727 | + if (extent_type == BTRFS_FILE_EXTENT_REG || |
---|
| 6728 | + extent_type == BTRFS_FILE_EXTENT_PREALLOC) { |
---|
7090 | 6729 | goto insert; |
---|
7091 | | - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { |
---|
| 6730 | + } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
---|
7092 | 6731 | unsigned long ptr; |
---|
7093 | 6732 | char *map; |
---|
7094 | 6733 | size_t size; |
---|
7095 | 6734 | size_t extent_offset; |
---|
7096 | 6735 | size_t copy_size; |
---|
7097 | 6736 | |
---|
7098 | | - if (new_inline) |
---|
| 6737 | + if (!page) |
---|
7099 | 6738 | goto out; |
---|
7100 | 6739 | |
---|
7101 | 6740 | size = btrfs_file_extent_ram_bytes(leaf, item); |
---|
.. | .. |
---|
7107 | 6746 | em->orig_block_len = em->len; |
---|
7108 | 6747 | em->orig_start = em->start; |
---|
7109 | 6748 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; |
---|
| 6749 | + |
---|
| 6750 | + btrfs_set_path_blocking(path); |
---|
7110 | 6751 | if (!PageUptodate(page)) { |
---|
7111 | 6752 | if (btrfs_file_extent_compression(leaf, item) != |
---|
7112 | 6753 | BTRFS_COMPRESS_NONE) { |
---|
7113 | 6754 | ret = uncompress_inline(path, page, pg_offset, |
---|
7114 | 6755 | extent_offset, item); |
---|
7115 | | - if (ret) { |
---|
7116 | | - err = ret; |
---|
| 6756 | + if (ret) |
---|
7117 | 6757 | goto out; |
---|
7118 | | - } |
---|
7119 | 6758 | } else { |
---|
7120 | 6759 | map = kmap(page); |
---|
7121 | 6760 | read_extent_buffer(leaf, map + pg_offset, ptr, |
---|
.. | .. |
---|
7137 | 6776 | em->start = start; |
---|
7138 | 6777 | em->orig_start = start; |
---|
7139 | 6778 | em->len = len; |
---|
7140 | | -not_found_em: |
---|
7141 | 6779 | em->block_start = EXTENT_MAP_HOLE; |
---|
7142 | 6780 | insert: |
---|
| 6781 | + ret = 0; |
---|
7143 | 6782 | btrfs_release_path(path); |
---|
7144 | 6783 | if (em->start > start || extent_map_end(em) <= start) { |
---|
7145 | 6784 | btrfs_err(fs_info, |
---|
7146 | 6785 | "bad extent! em: [%llu %llu] passed [%llu %llu]", |
---|
7147 | 6786 | em->start, em->len, start, len); |
---|
7148 | | - err = -EIO; |
---|
| 6787 | + ret = -EIO; |
---|
7149 | 6788 | goto out; |
---|
7150 | 6789 | } |
---|
7151 | 6790 | |
---|
7152 | | - err = 0; |
---|
7153 | 6791 | write_lock(&em_tree->lock); |
---|
7154 | | - err = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len); |
---|
| 6792 | + ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len); |
---|
7155 | 6793 | write_unlock(&em_tree->lock); |
---|
7156 | 6794 | out: |
---|
| 6795 | + btrfs_free_path(path); |
---|
7157 | 6796 | |
---|
7158 | 6797 | trace_btrfs_get_extent(root, inode, em); |
---|
7159 | 6798 | |
---|
7160 | | - btrfs_free_path(path); |
---|
7161 | | - if (err) { |
---|
| 6799 | + if (ret) { |
---|
7162 | 6800 | free_extent_map(em); |
---|
7163 | | - return ERR_PTR(err); |
---|
| 6801 | + return ERR_PTR(ret); |
---|
7164 | 6802 | } |
---|
7165 | | - BUG_ON(!em); /* Error is always set */ |
---|
7166 | 6803 | return em; |
---|
7167 | 6804 | } |
---|
7168 | 6805 | |
---|
7169 | 6806 | struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode, |
---|
7170 | | - struct page *page, |
---|
7171 | | - size_t pg_offset, u64 start, u64 len, |
---|
7172 | | - int create) |
---|
| 6807 | + u64 start, u64 len) |
---|
7173 | 6808 | { |
---|
7174 | 6809 | struct extent_map *em; |
---|
7175 | 6810 | struct extent_map *hole_em = NULL; |
---|
7176 | | - u64 range_start = start; |
---|
| 6811 | + u64 delalloc_start = start; |
---|
7177 | 6812 | u64 end; |
---|
7178 | | - u64 found; |
---|
7179 | | - u64 found_end; |
---|
| 6813 | + u64 delalloc_len; |
---|
| 6814 | + u64 delalloc_end; |
---|
7180 | 6815 | int err = 0; |
---|
7181 | 6816 | |
---|
7182 | | - em = btrfs_get_extent(inode, page, pg_offset, start, len, create); |
---|
| 6817 | + em = btrfs_get_extent(inode, NULL, 0, start, len); |
---|
7183 | 6818 | if (IS_ERR(em)) |
---|
7184 | 6819 | return em; |
---|
7185 | 6820 | /* |
---|
.. | .. |
---|
7204 | 6839 | em = NULL; |
---|
7205 | 6840 | |
---|
7206 | 6841 | /* ok, we didn't find anything, lets look for delalloc */ |
---|
7207 | | - found = count_range_bits(&inode->io_tree, &range_start, |
---|
| 6842 | + delalloc_len = count_range_bits(&inode->io_tree, &delalloc_start, |
---|
7208 | 6843 | end, len, EXTENT_DELALLOC, 1); |
---|
7209 | | - found_end = range_start + found; |
---|
7210 | | - if (found_end < range_start) |
---|
7211 | | - found_end = (u64)-1; |
---|
| 6844 | + delalloc_end = delalloc_start + delalloc_len; |
---|
| 6845 | + if (delalloc_end < delalloc_start) |
---|
| 6846 | + delalloc_end = (u64)-1; |
---|
7212 | 6847 | |
---|
7213 | 6848 | /* |
---|
7214 | | - * we didn't find anything useful, return |
---|
7215 | | - * the original results from get_extent() |
---|
| 6849 | + * We didn't find anything useful, return the original results from |
---|
| 6850 | + * get_extent() |
---|
7216 | 6851 | */ |
---|
7217 | | - if (range_start > end || found_end <= start) { |
---|
| 6852 | + if (delalloc_start > end || delalloc_end <= start) { |
---|
7218 | 6853 | em = hole_em; |
---|
7219 | 6854 | hole_em = NULL; |
---|
7220 | 6855 | goto out; |
---|
7221 | 6856 | } |
---|
7222 | 6857 | |
---|
7223 | | - /* adjust the range_start to make sure it doesn't |
---|
7224 | | - * go backwards from the start they passed in |
---|
| 6858 | + /* |
---|
| 6859 | + * Adjust the delalloc_start to make sure it doesn't go backwards from |
---|
| 6860 | + * the start they passed in |
---|
7225 | 6861 | */ |
---|
7226 | | - range_start = max(start, range_start); |
---|
7227 | | - found = found_end - range_start; |
---|
| 6862 | + delalloc_start = max(start, delalloc_start); |
---|
| 6863 | + delalloc_len = delalloc_end - delalloc_start; |
---|
7228 | 6864 | |
---|
7229 | | - if (found > 0) { |
---|
7230 | | - u64 hole_start = start; |
---|
7231 | | - u64 hole_len = len; |
---|
| 6865 | + if (delalloc_len > 0) { |
---|
| 6866 | + u64 hole_start; |
---|
| 6867 | + u64 hole_len; |
---|
| 6868 | + const u64 hole_end = extent_map_end(hole_em); |
---|
7232 | 6869 | |
---|
7233 | 6870 | em = alloc_extent_map(); |
---|
7234 | 6871 | if (!em) { |
---|
7235 | 6872 | err = -ENOMEM; |
---|
7236 | 6873 | goto out; |
---|
7237 | 6874 | } |
---|
7238 | | - /* |
---|
7239 | | - * when btrfs_get_extent can't find anything it |
---|
7240 | | - * returns one huge hole |
---|
7241 | | - * |
---|
7242 | | - * make sure what it found really fits our range, and |
---|
7243 | | - * adjust to make sure it is based on the start from |
---|
7244 | | - * the caller |
---|
7245 | | - */ |
---|
7246 | | - if (hole_em) { |
---|
7247 | | - u64 calc_end = extent_map_end(hole_em); |
---|
7248 | 6875 | |
---|
7249 | | - if (calc_end <= start || (hole_em->start > end)) { |
---|
7250 | | - free_extent_map(hole_em); |
---|
7251 | | - hole_em = NULL; |
---|
7252 | | - } else { |
---|
7253 | | - hole_start = max(hole_em->start, start); |
---|
7254 | | - hole_len = calc_end - hole_start; |
---|
7255 | | - } |
---|
| 6876 | + ASSERT(hole_em); |
---|
| 6877 | + /* |
---|
| 6878 | + * When btrfs_get_extent can't find anything it returns one |
---|
| 6879 | + * huge hole |
---|
| 6880 | + * |
---|
| 6881 | + * Make sure what it found really fits our range, and adjust to |
---|
| 6882 | + * make sure it is based on the start from the caller |
---|
| 6883 | + */ |
---|
| 6884 | + if (hole_end <= start || hole_em->start > end) { |
---|
| 6885 | + free_extent_map(hole_em); |
---|
| 6886 | + hole_em = NULL; |
---|
| 6887 | + } else { |
---|
| 6888 | + hole_start = max(hole_em->start, start); |
---|
| 6889 | + hole_len = hole_end - hole_start; |
---|
7256 | 6890 | } |
---|
7257 | | - em->bdev = NULL; |
---|
7258 | | - if (hole_em && range_start > hole_start) { |
---|
7259 | | - /* our hole starts before our delalloc, so we |
---|
7260 | | - * have to return just the parts of the hole |
---|
7261 | | - * that go until the delalloc starts |
---|
| 6891 | + |
---|
| 6892 | + if (hole_em && delalloc_start > hole_start) { |
---|
| 6893 | + /* |
---|
| 6894 | + * Our hole starts before our delalloc, so we have to |
---|
| 6895 | + * return just the parts of the hole that go until the |
---|
| 6896 | + * delalloc starts |
---|
7262 | 6897 | */ |
---|
7263 | | - em->len = min(hole_len, |
---|
7264 | | - range_start - hole_start); |
---|
| 6898 | + em->len = min(hole_len, delalloc_start - hole_start); |
---|
7265 | 6899 | em->start = hole_start; |
---|
7266 | 6900 | em->orig_start = hole_start; |
---|
7267 | 6901 | /* |
---|
7268 | | - * don't adjust block start at all, |
---|
7269 | | - * it is fixed at EXTENT_MAP_HOLE |
---|
| 6902 | + * Don't adjust block start at all, it is fixed at |
---|
| 6903 | + * EXTENT_MAP_HOLE |
---|
7270 | 6904 | */ |
---|
7271 | 6905 | em->block_start = hole_em->block_start; |
---|
7272 | 6906 | em->block_len = hole_len; |
---|
7273 | 6907 | if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags)) |
---|
7274 | 6908 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); |
---|
7275 | 6909 | } else { |
---|
7276 | | - em->start = range_start; |
---|
7277 | | - em->len = found; |
---|
7278 | | - em->orig_start = range_start; |
---|
| 6910 | + /* |
---|
| 6911 | + * Hole is out of passed range or it starts after |
---|
| 6912 | + * delalloc range |
---|
| 6913 | + */ |
---|
| 6914 | + em->start = delalloc_start; |
---|
| 6915 | + em->len = delalloc_len; |
---|
| 6916 | + em->orig_start = delalloc_start; |
---|
7279 | 6917 | em->block_start = EXTENT_MAP_DELALLOC; |
---|
7280 | | - em->block_len = found; |
---|
| 6918 | + em->block_len = delalloc_len; |
---|
7281 | 6919 | } |
---|
7282 | 6920 | } else { |
---|
7283 | 6921 | return hole_em; |
---|
.. | .. |
---|
7292 | 6930 | return em; |
---|
7293 | 6931 | } |
---|
7294 | 6932 | |
---|
7295 | | -static struct extent_map *btrfs_create_dio_extent(struct inode *inode, |
---|
| 6933 | +static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode, |
---|
7296 | 6934 | const u64 start, |
---|
7297 | 6935 | const u64 len, |
---|
7298 | 6936 | const u64 orig_start, |
---|
.. | .. |
---|
7306 | 6944 | int ret; |
---|
7307 | 6945 | |
---|
7308 | 6946 | if (type != BTRFS_ORDERED_NOCOW) { |
---|
7309 | | - em = create_io_em(inode, start, len, orig_start, |
---|
7310 | | - block_start, block_len, orig_block_len, |
---|
7311 | | - ram_bytes, |
---|
| 6947 | + em = create_io_em(inode, start, len, orig_start, block_start, |
---|
| 6948 | + block_len, orig_block_len, ram_bytes, |
---|
7312 | 6949 | BTRFS_COMPRESS_NONE, /* compress_type */ |
---|
7313 | 6950 | type); |
---|
7314 | 6951 | if (IS_ERR(em)) |
---|
7315 | 6952 | goto out; |
---|
7316 | 6953 | } |
---|
7317 | | - ret = btrfs_add_ordered_extent_dio(inode, start, block_start, |
---|
7318 | | - len, block_len, type); |
---|
| 6954 | + ret = btrfs_add_ordered_extent_dio(inode, start, block_start, len, |
---|
| 6955 | + block_len, type); |
---|
7319 | 6956 | if (ret) { |
---|
7320 | 6957 | if (em) { |
---|
7321 | 6958 | free_extent_map(em); |
---|
7322 | | - btrfs_drop_extent_cache(BTRFS_I(inode), start, |
---|
7323 | | - start + len - 1, 0); |
---|
| 6959 | + btrfs_drop_extent_cache(inode, start, start + len - 1, 0); |
---|
7324 | 6960 | } |
---|
7325 | 6961 | em = ERR_PTR(ret); |
---|
7326 | 6962 | } |
---|
.. | .. |
---|
7329 | 6965 | return em; |
---|
7330 | 6966 | } |
---|
7331 | 6967 | |
---|
7332 | | -static struct extent_map *btrfs_new_extent_direct(struct inode *inode, |
---|
| 6968 | +static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode, |
---|
7333 | 6969 | u64 start, u64 len) |
---|
7334 | 6970 | { |
---|
7335 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
7336 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
| 6971 | + struct btrfs_root *root = inode->root; |
---|
| 6972 | + struct btrfs_fs_info *fs_info = root->fs_info; |
---|
7337 | 6973 | struct extent_map *em; |
---|
7338 | 6974 | struct btrfs_key ins; |
---|
7339 | 6975 | u64 alloc_hint; |
---|
.. | .. |
---|
7350 | 6986 | ins.offset, BTRFS_ORDERED_REGULAR); |
---|
7351 | 6987 | btrfs_dec_block_group_reservations(fs_info, ins.objectid); |
---|
7352 | 6988 | if (IS_ERR(em)) |
---|
7353 | | - btrfs_free_reserved_extent(fs_info, ins.objectid, |
---|
7354 | | - ins.offset, 1); |
---|
| 6989 | + btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, |
---|
| 6990 | + 1); |
---|
7355 | 6991 | |
---|
7356 | 6992 | return em; |
---|
7357 | 6993 | } |
---|
7358 | 6994 | |
---|
7359 | 6995 | /* |
---|
7360 | | - * returns 1 when the nocow is safe, < 1 on error, 0 if the |
---|
7361 | | - * block must be cow'd |
---|
| 6996 | + * Check if we can do nocow write into the range [@offset, @offset + @len) |
---|
| 6997 | + * |
---|
| 6998 | + * @offset: File offset |
---|
| 6999 | + * @len: The length to write, will be updated to the nocow writeable |
---|
| 7000 | + * range |
---|
| 7001 | + * @orig_start: (optional) Return the original file offset of the file extent |
---|
| 7002 | + * @orig_len: (optional) Return the original on-disk length of the file extent |
---|
| 7003 | + * @ram_bytes: (optional) Return the ram_bytes of the file extent |
---|
| 7004 | + * @strict: if true, omit optimizations that might force us into unnecessary |
---|
| 7005 | + * cow. e.g., don't trust generation number. |
---|
| 7006 | + * |
---|
| 7007 | + * This function will flush ordered extents in the range to ensure proper |
---|
| 7008 | + * nocow checks for (nowait == false) case. |
---|
| 7009 | + * |
---|
| 7010 | + * Return: |
---|
| 7011 | + * >0 and update @len if we can do nocow write |
---|
| 7012 | + * 0 if we can't do nocow write |
---|
| 7013 | + * <0 if error happened |
---|
| 7014 | + * |
---|
| 7015 | + * NOTE: This only checks the file extents, caller is responsible to wait for |
---|
| 7016 | + * any ordered extents. |
---|
7362 | 7017 | */ |
---|
7363 | 7018 | noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, |
---|
7364 | 7019 | u64 *orig_start, u64 *orig_block_len, |
---|
7365 | | - u64 *ram_bytes) |
---|
| 7020 | + u64 *ram_bytes, bool strict) |
---|
7366 | 7021 | { |
---|
7367 | 7022 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
7368 | 7023 | struct btrfs_path *path; |
---|
.. | .. |
---|
7440 | 7095 | * Do the same check as in btrfs_cross_ref_exist but without the |
---|
7441 | 7096 | * unnecessary search. |
---|
7442 | 7097 | */ |
---|
7443 | | - if (btrfs_file_extent_generation(leaf, fi) <= |
---|
7444 | | - btrfs_root_last_snapshot(&root->root_item)) |
---|
| 7098 | + if (!strict && |
---|
| 7099 | + (btrfs_file_extent_generation(leaf, fi) <= |
---|
| 7100 | + btrfs_root_last_snapshot(&root->root_item))) |
---|
7445 | 7101 | goto out; |
---|
7446 | 7102 | |
---|
7447 | 7103 | backref_offset = btrfs_file_extent_offset(leaf, fi); |
---|
.. | .. |
---|
7477 | 7133 | */ |
---|
7478 | 7134 | |
---|
7479 | 7135 | ret = btrfs_cross_ref_exist(root, btrfs_ino(BTRFS_I(inode)), |
---|
7480 | | - key.offset - backref_offset, disk_bytenr); |
---|
| 7136 | + key.offset - backref_offset, disk_bytenr, |
---|
| 7137 | + strict); |
---|
7481 | 7138 | if (ret) { |
---|
7482 | 7139 | ret = 0; |
---|
7483 | 7140 | goto out; |
---|
.. | .. |
---|
7505 | 7162 | } |
---|
7506 | 7163 | |
---|
7507 | 7164 | static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, |
---|
7508 | | - struct extent_state **cached_state, int writing) |
---|
| 7165 | + struct extent_state **cached_state, bool writing) |
---|
7509 | 7166 | { |
---|
7510 | 7167 | struct btrfs_ordered_extent *ordered; |
---|
7511 | 7168 | int ret = 0; |
---|
.. | .. |
---|
7554 | 7211 | */ |
---|
7555 | 7212 | if (writing || |
---|
7556 | 7213 | test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) |
---|
7557 | | - btrfs_start_ordered_extent(inode, ordered, 1); |
---|
| 7214 | + btrfs_start_ordered_extent(ordered, 1); |
---|
7558 | 7215 | else |
---|
7559 | 7216 | ret = -ENOTBLK; |
---|
7560 | 7217 | btrfs_put_ordered_extent(ordered); |
---|
.. | .. |
---|
7564 | 7221 | * for it to complete) and then invalidate the pages for |
---|
7565 | 7222 | * this range (through invalidate_inode_pages2_range()), |
---|
7566 | 7223 | * but that can lead us to a deadlock with a concurrent |
---|
7567 | | - * call to readpages() (a buffered read or a defrag call |
---|
| 7224 | + * call to readahead (a buffered read or a defrag call |
---|
7568 | 7225 | * triggered a readahead) on a page lock due to an |
---|
7569 | 7226 | * ordered dio extent we created before but did not have |
---|
7570 | 7227 | * yet a corresponding bio submitted (whence it can not |
---|
7571 | | - * complete), which makes readpages() wait for that |
---|
| 7228 | + * complete), which makes readahead wait for that |
---|
7572 | 7229 | * ordered extent to complete while holding a lock on |
---|
7573 | 7230 | * that page. |
---|
7574 | 7231 | */ |
---|
.. | .. |
---|
7585 | 7242 | } |
---|
7586 | 7243 | |
---|
7587 | 7244 | /* The callers of this must take lock_extent() */ |
---|
7588 | | -static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len, |
---|
7589 | | - u64 orig_start, u64 block_start, |
---|
| 7245 | +static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, |
---|
| 7246 | + u64 len, u64 orig_start, u64 block_start, |
---|
7590 | 7247 | u64 block_len, u64 orig_block_len, |
---|
7591 | 7248 | u64 ram_bytes, int compress_type, |
---|
7592 | 7249 | int type) |
---|
7593 | 7250 | { |
---|
7594 | 7251 | struct extent_map_tree *em_tree; |
---|
7595 | 7252 | struct extent_map *em; |
---|
7596 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
7597 | 7253 | int ret; |
---|
7598 | 7254 | |
---|
7599 | 7255 | ASSERT(type == BTRFS_ORDERED_PREALLOC || |
---|
.. | .. |
---|
7601 | 7257 | type == BTRFS_ORDERED_NOCOW || |
---|
7602 | 7258 | type == BTRFS_ORDERED_REGULAR); |
---|
7603 | 7259 | |
---|
7604 | | - em_tree = &BTRFS_I(inode)->extent_tree; |
---|
| 7260 | + em_tree = &inode->extent_tree; |
---|
7605 | 7261 | em = alloc_extent_map(); |
---|
7606 | 7262 | if (!em) |
---|
7607 | 7263 | return ERR_PTR(-ENOMEM); |
---|
.. | .. |
---|
7611 | 7267 | em->len = len; |
---|
7612 | 7268 | em->block_len = block_len; |
---|
7613 | 7269 | em->block_start = block_start; |
---|
7614 | | - em->bdev = root->fs_info->fs_devices->latest_bdev; |
---|
7615 | 7270 | em->orig_block_len = orig_block_len; |
---|
7616 | 7271 | em->ram_bytes = ram_bytes; |
---|
7617 | 7272 | em->generation = -1; |
---|
.. | .. |
---|
7624 | 7279 | } |
---|
7625 | 7280 | |
---|
7626 | 7281 | do { |
---|
7627 | | - btrfs_drop_extent_cache(BTRFS_I(inode), em->start, |
---|
7628 | | - em->start + em->len - 1, 0); |
---|
| 7282 | + btrfs_drop_extent_cache(inode, em->start, |
---|
| 7283 | + em->start + em->len - 1, 0); |
---|
7629 | 7284 | write_lock(&em_tree->lock); |
---|
7630 | 7285 | ret = add_extent_mapping(em_tree, em, 1); |
---|
7631 | 7286 | write_unlock(&em_tree->lock); |
---|
.. | .. |
---|
7645 | 7300 | } |
---|
7646 | 7301 | |
---|
7647 | 7302 | |
---|
7648 | | -static int btrfs_get_blocks_direct_read(struct extent_map *em, |
---|
7649 | | - struct buffer_head *bh_result, |
---|
7650 | | - struct inode *inode, |
---|
7651 | | - u64 start, u64 len) |
---|
7652 | | -{ |
---|
7653 | | - if (em->block_start == EXTENT_MAP_HOLE || |
---|
7654 | | - test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) |
---|
7655 | | - return -ENOENT; |
---|
7656 | | - |
---|
7657 | | - len = min(len, em->len - (start - em->start)); |
---|
7658 | | - |
---|
7659 | | - bh_result->b_blocknr = (em->block_start + (start - em->start)) >> |
---|
7660 | | - inode->i_blkbits; |
---|
7661 | | - bh_result->b_size = len; |
---|
7662 | | - bh_result->b_bdev = em->bdev; |
---|
7663 | | - set_buffer_mapped(bh_result); |
---|
7664 | | - |
---|
7665 | | - return 0; |
---|
7666 | | -} |
---|
7667 | | - |
---|
7668 | 7303 | static int btrfs_get_blocks_direct_write(struct extent_map **map, |
---|
7669 | | - struct buffer_head *bh_result, |
---|
7670 | 7304 | struct inode *inode, |
---|
7671 | 7305 | struct btrfs_dio_data *dio_data, |
---|
7672 | 7306 | u64 start, u64 len) |
---|
.. | .. |
---|
7698 | 7332 | block_start = em->block_start + (start - em->start); |
---|
7699 | 7333 | |
---|
7700 | 7334 | if (can_nocow_extent(inode, start, &len, &orig_start, |
---|
7701 | | - &orig_block_len, &ram_bytes) == 1 && |
---|
| 7335 | + &orig_block_len, &ram_bytes, false) == 1 && |
---|
7702 | 7336 | btrfs_inc_nocow_writers(fs_info, block_start)) { |
---|
7703 | 7337 | struct extent_map *em2; |
---|
7704 | 7338 | |
---|
7705 | | - em2 = btrfs_create_dio_extent(inode, start, len, |
---|
| 7339 | + em2 = btrfs_create_dio_extent(BTRFS_I(inode), start, len, |
---|
7706 | 7340 | orig_start, block_start, |
---|
7707 | 7341 | len, orig_block_len, |
---|
7708 | 7342 | ram_bytes, type); |
---|
.. | .. |
---|
7721 | 7355 | * use the existing or preallocated extent, so does not |
---|
7722 | 7356 | * need to adjust btrfs_space_info's bytes_may_use. |
---|
7723 | 7357 | */ |
---|
7724 | | - btrfs_free_reserved_data_space_noquota(inode, start, |
---|
7725 | | - len); |
---|
| 7358 | + btrfs_free_reserved_data_space_noquota(fs_info, len); |
---|
7726 | 7359 | goto skip_cow; |
---|
7727 | 7360 | } |
---|
7728 | 7361 | } |
---|
7729 | 7362 | |
---|
7730 | 7363 | /* this will cow the extent */ |
---|
7731 | | - len = bh_result->b_size; |
---|
7732 | 7364 | free_extent_map(em); |
---|
7733 | | - *map = em = btrfs_new_extent_direct(inode, start, len); |
---|
| 7365 | + *map = em = btrfs_new_extent_direct(BTRFS_I(inode), start, len); |
---|
7734 | 7366 | if (IS_ERR(em)) { |
---|
7735 | 7367 | ret = PTR_ERR(em); |
---|
7736 | 7368 | goto out; |
---|
.. | .. |
---|
7739 | 7371 | len = min(len, em->len - (start - em->start)); |
---|
7740 | 7372 | |
---|
7741 | 7373 | skip_cow: |
---|
7742 | | - bh_result->b_blocknr = (em->block_start + (start - em->start)) >> |
---|
7743 | | - inode->i_blkbits; |
---|
7744 | | - bh_result->b_size = len; |
---|
7745 | | - bh_result->b_bdev = em->bdev; |
---|
7746 | | - set_buffer_mapped(bh_result); |
---|
7747 | | - |
---|
7748 | | - if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) |
---|
7749 | | - set_buffer_new(bh_result); |
---|
7750 | | - |
---|
7751 | 7374 | /* |
---|
7752 | 7375 | * Need to update the i_size under the extent lock so buffered |
---|
7753 | 7376 | * readers will get the updated i_size when we unlock. |
---|
7754 | 7377 | */ |
---|
7755 | | - if (!dio_data->overwrite && start + len > i_size_read(inode)) |
---|
| 7378 | + if (start + len > i_size_read(inode)) |
---|
7756 | 7379 | i_size_write(inode, start + len); |
---|
7757 | 7380 | |
---|
7758 | | - WARN_ON(dio_data->reserve < len); |
---|
7759 | 7381 | dio_data->reserve -= len; |
---|
7760 | | - dio_data->unsubmitted_oe_range_end = start + len; |
---|
7761 | | - current->journal_info = dio_data; |
---|
7762 | 7382 | out: |
---|
7763 | 7383 | return ret; |
---|
7764 | 7384 | } |
---|
7765 | 7385 | |
---|
7766 | | -static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, |
---|
7767 | | - struct buffer_head *bh_result, int create) |
---|
| 7386 | +static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, |
---|
| 7387 | + loff_t length, unsigned int flags, struct iomap *iomap, |
---|
| 7388 | + struct iomap *srcmap) |
---|
7768 | 7389 | { |
---|
7769 | 7390 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
7770 | 7391 | struct extent_map *em; |
---|
7771 | 7392 | struct extent_state *cached_state = NULL; |
---|
7772 | 7393 | struct btrfs_dio_data *dio_data = NULL; |
---|
7773 | | - u64 start = iblock << inode->i_blkbits; |
---|
7774 | 7394 | u64 lockstart, lockend; |
---|
7775 | | - u64 len = bh_result->b_size; |
---|
7776 | | - int unlock_bits = EXTENT_LOCKED; |
---|
| 7395 | + const bool write = !!(flags & IOMAP_WRITE); |
---|
7777 | 7396 | int ret = 0; |
---|
| 7397 | + u64 len = length; |
---|
| 7398 | + bool unlock_extents = false; |
---|
| 7399 | + bool sync = (current->journal_info == BTRFS_DIO_SYNC_STUB); |
---|
7778 | 7400 | |
---|
7779 | | - if (create) |
---|
7780 | | - unlock_bits |= EXTENT_DIRTY; |
---|
7781 | | - else |
---|
| 7401 | + /* |
---|
| 7402 | + * We used current->journal_info here to see if we were sync, but |
---|
| 7403 | + * there's a lot of tests in the enospc machinery to not do flushing if |
---|
| 7404 | + * we have a journal_info set, so we need to clear this out and re-set |
---|
| 7405 | + * it in iomap_end. |
---|
| 7406 | + */ |
---|
| 7407 | + ASSERT(current->journal_info == NULL || |
---|
| 7408 | + current->journal_info == BTRFS_DIO_SYNC_STUB); |
---|
| 7409 | + current->journal_info = NULL; |
---|
| 7410 | + |
---|
| 7411 | + if (!write) |
---|
7782 | 7412 | len = min_t(u64, len, fs_info->sectorsize); |
---|
7783 | 7413 | |
---|
7784 | 7414 | lockstart = start; |
---|
7785 | 7415 | lockend = start + len - 1; |
---|
7786 | 7416 | |
---|
7787 | | - if (current->journal_info) { |
---|
7788 | | - /* |
---|
7789 | | - * Need to pull our outstanding extents and set journal_info to NULL so |
---|
7790 | | - * that anything that needs to check if there's a transaction doesn't get |
---|
7791 | | - * confused. |
---|
7792 | | - */ |
---|
7793 | | - dio_data = current->journal_info; |
---|
7794 | | - current->journal_info = NULL; |
---|
| 7417 | + /* |
---|
| 7418 | + * The generic stuff only does filemap_write_and_wait_range, which |
---|
| 7419 | + * isn't enough if we've written compressed pages to this area, so we |
---|
| 7420 | + * need to flush the dirty pages again to make absolutely sure that any |
---|
| 7421 | + * outstanding dirty pages are on disk. |
---|
| 7422 | + */ |
---|
| 7423 | + if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, |
---|
| 7424 | + &BTRFS_I(inode)->runtime_flags)) { |
---|
| 7425 | + ret = filemap_fdatawrite_range(inode->i_mapping, start, |
---|
| 7426 | + start + length - 1); |
---|
| 7427 | + if (ret) |
---|
| 7428 | + return ret; |
---|
7795 | 7429 | } |
---|
| 7430 | + |
---|
| 7431 | + dio_data = kzalloc(sizeof(*dio_data), GFP_NOFS); |
---|
| 7432 | + if (!dio_data) |
---|
| 7433 | + return -ENOMEM; |
---|
| 7434 | + |
---|
| 7435 | + dio_data->sync = sync; |
---|
| 7436 | + dio_data->length = length; |
---|
| 7437 | + if (write) { |
---|
| 7438 | + dio_data->reserve = round_up(length, fs_info->sectorsize); |
---|
| 7439 | + ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), |
---|
| 7440 | + &dio_data->data_reserved, |
---|
| 7441 | + start, dio_data->reserve); |
---|
| 7442 | + if (ret) { |
---|
| 7443 | + extent_changeset_free(dio_data->data_reserved); |
---|
| 7444 | + kfree(dio_data); |
---|
| 7445 | + return ret; |
---|
| 7446 | + } |
---|
| 7447 | + } |
---|
| 7448 | + iomap->private = dio_data; |
---|
| 7449 | + |
---|
7796 | 7450 | |
---|
7797 | 7451 | /* |
---|
7798 | 7452 | * If this errors out it's because we couldn't invalidate pagecache for |
---|
7799 | 7453 | * this range and we need to fallback to buffered. |
---|
7800 | 7454 | */ |
---|
7801 | | - if (lock_extent_direct(inode, lockstart, lockend, &cached_state, |
---|
7802 | | - create)) { |
---|
| 7455 | + if (lock_extent_direct(inode, lockstart, lockend, &cached_state, write)) { |
---|
7803 | 7456 | ret = -ENOTBLK; |
---|
7804 | 7457 | goto err; |
---|
7805 | 7458 | } |
---|
7806 | 7459 | |
---|
7807 | | - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0); |
---|
| 7460 | + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len); |
---|
7808 | 7461 | if (IS_ERR(em)) { |
---|
7809 | 7462 | ret = PTR_ERR(em); |
---|
7810 | 7463 | goto unlock_err; |
---|
.. | .. |
---|
7827 | 7480 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || |
---|
7828 | 7481 | em->block_start == EXTENT_MAP_INLINE) { |
---|
7829 | 7482 | free_extent_map(em); |
---|
7830 | | - ret = -ENOTBLK; |
---|
| 7483 | + /* |
---|
| 7484 | + * If we are in a NOWAIT context, return -EAGAIN in order to |
---|
| 7485 | + * fallback to buffered IO. This is not only because we can |
---|
| 7486 | + * block with buffered IO (no support for NOWAIT semantics at |
---|
| 7487 | + * the moment) but also to avoid returning short reads to user |
---|
| 7488 | + * space - this happens if we were able to read some data from |
---|
| 7489 | + * previous non-compressed extents and then when we fallback to |
---|
| 7490 | + * buffered IO, at btrfs_file_read_iter() by calling |
---|
| 7491 | + * filemap_read(), we fail to fault in pages for the read buffer, |
---|
| 7492 | + * in which case filemap_read() returns a short read (the number |
---|
| 7493 | + * of bytes previously read is > 0, so it does not return -EFAULT). |
---|
| 7494 | + */ |
---|
| 7495 | + ret = (flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOTBLK; |
---|
7831 | 7496 | goto unlock_err; |
---|
7832 | 7497 | } |
---|
7833 | 7498 | |
---|
7834 | | - if (create) { |
---|
7835 | | - ret = btrfs_get_blocks_direct_write(&em, bh_result, inode, |
---|
7836 | | - dio_data, start, len); |
---|
| 7499 | + len = min(len, em->len - (start - em->start)); |
---|
| 7500 | + if (write) { |
---|
| 7501 | + ret = btrfs_get_blocks_direct_write(&em, inode, dio_data, |
---|
| 7502 | + start, len); |
---|
7837 | 7503 | if (ret < 0) |
---|
7838 | 7504 | goto unlock_err; |
---|
7839 | | - |
---|
7840 | | - /* clear and unlock the entire range */ |
---|
7841 | | - clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
---|
7842 | | - unlock_bits, 1, 0, &cached_state); |
---|
| 7505 | + unlock_extents = true; |
---|
| 7506 | + /* Recalc len in case the new em is smaller than requested */ |
---|
| 7507 | + len = min(len, em->len - (start - em->start)); |
---|
7843 | 7508 | } else { |
---|
7844 | | - ret = btrfs_get_blocks_direct_read(em, bh_result, inode, |
---|
7845 | | - start, len); |
---|
7846 | | - /* Can be negative only if we read from a hole */ |
---|
7847 | | - if (ret < 0) { |
---|
7848 | | - ret = 0; |
---|
7849 | | - free_extent_map(em); |
---|
7850 | | - goto unlock_err; |
---|
7851 | | - } |
---|
7852 | 7509 | /* |
---|
7853 | 7510 | * We need to unlock only the end area that we aren't using. |
---|
7854 | 7511 | * The rest is going to be unlocked by the endio routine. |
---|
7855 | 7512 | */ |
---|
7856 | | - lockstart = start + bh_result->b_size; |
---|
7857 | | - if (lockstart < lockend) { |
---|
7858 | | - clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, |
---|
7859 | | - lockend, unlock_bits, 1, 0, |
---|
7860 | | - &cached_state); |
---|
7861 | | - } else { |
---|
7862 | | - free_extent_state(cached_state); |
---|
7863 | | - } |
---|
| 7513 | + lockstart = start + len; |
---|
| 7514 | + if (lockstart < lockend) |
---|
| 7515 | + unlock_extents = true; |
---|
7864 | 7516 | } |
---|
| 7517 | + |
---|
| 7518 | + if (unlock_extents) |
---|
| 7519 | + unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
---|
| 7520 | + lockstart, lockend, &cached_state); |
---|
| 7521 | + else |
---|
| 7522 | + free_extent_state(cached_state); |
---|
| 7523 | + |
---|
| 7524 | + /* |
---|
| 7525 | + * Translate extent map information to iomap. |
---|
| 7526 | + * We trim the extents (and move the addr) even though iomap code does |
---|
| 7527 | + * that, since we have locked only the parts we are performing I/O in. |
---|
| 7528 | + */ |
---|
| 7529 | + if ((em->block_start == EXTENT_MAP_HOLE) || |
---|
| 7530 | + (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) && !write)) { |
---|
| 7531 | + iomap->addr = IOMAP_NULL_ADDR; |
---|
| 7532 | + iomap->type = IOMAP_HOLE; |
---|
| 7533 | + } else { |
---|
| 7534 | + iomap->addr = em->block_start + (start - em->start); |
---|
| 7535 | + iomap->type = IOMAP_MAPPED; |
---|
| 7536 | + } |
---|
| 7537 | + iomap->offset = start; |
---|
| 7538 | + iomap->bdev = fs_info->fs_devices->latest_bdev; |
---|
| 7539 | + iomap->length = len; |
---|
7865 | 7540 | |
---|
7866 | 7541 | free_extent_map(em); |
---|
7867 | 7542 | |
---|
7868 | 7543 | return 0; |
---|
7869 | 7544 | |
---|
7870 | 7545 | unlock_err: |
---|
7871 | | - clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
---|
7872 | | - unlock_bits, 1, 0, &cached_state); |
---|
| 7546 | + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
---|
| 7547 | + &cached_state); |
---|
7873 | 7548 | err: |
---|
7874 | | - if (dio_data) |
---|
7875 | | - current->journal_info = dio_data; |
---|
| 7549 | + if (dio_data) { |
---|
| 7550 | + btrfs_delalloc_release_space(BTRFS_I(inode), |
---|
| 7551 | + dio_data->data_reserved, start, |
---|
| 7552 | + dio_data->reserve, true); |
---|
| 7553 | + btrfs_delalloc_release_extents(BTRFS_I(inode), dio_data->reserve); |
---|
| 7554 | + extent_changeset_free(dio_data->data_reserved); |
---|
| 7555 | + kfree(dio_data); |
---|
| 7556 | + } |
---|
7876 | 7557 | return ret; |
---|
7877 | 7558 | } |
---|
7878 | 7559 | |
---|
7879 | | -static inline blk_status_t submit_dio_repair_bio(struct inode *inode, |
---|
7880 | | - struct bio *bio, |
---|
7881 | | - int mirror_num) |
---|
| 7560 | +static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length, |
---|
| 7561 | + ssize_t written, unsigned int flags, struct iomap *iomap) |
---|
7882 | 7562 | { |
---|
| 7563 | + int ret = 0; |
---|
| 7564 | + struct btrfs_dio_data *dio_data = iomap->private; |
---|
| 7565 | + size_t submitted = dio_data->submitted; |
---|
| 7566 | + const bool write = !!(flags & IOMAP_WRITE); |
---|
| 7567 | + |
---|
| 7568 | + if (!write && (iomap->type == IOMAP_HOLE)) { |
---|
| 7569 | + /* If reading from a hole, unlock and return */ |
---|
| 7570 | + unlock_extent(&BTRFS_I(inode)->io_tree, pos, pos + length - 1); |
---|
| 7571 | + goto out; |
---|
| 7572 | + } |
---|
| 7573 | + |
---|
| 7574 | + if (submitted < length) { |
---|
| 7575 | + pos += submitted; |
---|
| 7576 | + length -= submitted; |
---|
| 7577 | + if (write) |
---|
| 7578 | + __endio_write_update_ordered(BTRFS_I(inode), pos, |
---|
| 7579 | + length, false); |
---|
| 7580 | + else |
---|
| 7581 | + unlock_extent(&BTRFS_I(inode)->io_tree, pos, |
---|
| 7582 | + pos + length - 1); |
---|
| 7583 | + ret = -ENOTBLK; |
---|
| 7584 | + } |
---|
| 7585 | + |
---|
| 7586 | + if (write) { |
---|
| 7587 | + if (dio_data->reserve) |
---|
| 7588 | + btrfs_delalloc_release_space(BTRFS_I(inode), |
---|
| 7589 | + dio_data->data_reserved, pos, |
---|
| 7590 | + dio_data->reserve, true); |
---|
| 7591 | + btrfs_delalloc_release_extents(BTRFS_I(inode), dio_data->length); |
---|
| 7592 | + extent_changeset_free(dio_data->data_reserved); |
---|
| 7593 | + } |
---|
| 7594 | +out: |
---|
| 7595 | + /* |
---|
| 7596 | + * We're all done, we can re-set the current->journal_info now safely |
---|
| 7597 | + * for our endio. |
---|
| 7598 | + */ |
---|
| 7599 | + if (dio_data->sync) { |
---|
| 7600 | + ASSERT(current->journal_info == NULL); |
---|
| 7601 | + current->journal_info = BTRFS_DIO_SYNC_STUB; |
---|
| 7602 | + } |
---|
| 7603 | + kfree(dio_data); |
---|
| 7604 | + iomap->private = NULL; |
---|
| 7605 | + |
---|
| 7606 | + return ret; |
---|
| 7607 | +} |
---|
| 7608 | + |
---|
| 7609 | +static void btrfs_dio_private_put(struct btrfs_dio_private *dip) |
---|
| 7610 | +{ |
---|
| 7611 | + /* |
---|
| 7612 | + * This implies a barrier so that stores to dio_bio->bi_status before |
---|
| 7613 | + * this and loads of dio_bio->bi_status after this are fully ordered. |
---|
| 7614 | + */ |
---|
| 7615 | + if (!refcount_dec_and_test(&dip->refs)) |
---|
| 7616 | + return; |
---|
| 7617 | + |
---|
| 7618 | + if (bio_op(dip->dio_bio) == REQ_OP_WRITE) { |
---|
| 7619 | + __endio_write_update_ordered(BTRFS_I(dip->inode), |
---|
| 7620 | + dip->logical_offset, |
---|
| 7621 | + dip->bytes, |
---|
| 7622 | + !dip->dio_bio->bi_status); |
---|
| 7623 | + } else { |
---|
| 7624 | + unlock_extent(&BTRFS_I(dip->inode)->io_tree, |
---|
| 7625 | + dip->logical_offset, |
---|
| 7626 | + dip->logical_offset + dip->bytes - 1); |
---|
| 7627 | + } |
---|
| 7628 | + |
---|
| 7629 | + bio_endio(dip->dio_bio); |
---|
| 7630 | + kfree(dip); |
---|
| 7631 | +} |
---|
| 7632 | + |
---|
| 7633 | +static blk_status_t submit_dio_repair_bio(struct inode *inode, struct bio *bio, |
---|
| 7634 | + int mirror_num, |
---|
| 7635 | + unsigned long bio_flags) |
---|
| 7636 | +{ |
---|
| 7637 | + struct btrfs_dio_private *dip = bio->bi_private; |
---|
7883 | 7638 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
7884 | 7639 | blk_status_t ret; |
---|
7885 | 7640 | |
---|
7886 | 7641 | BUG_ON(bio_op(bio) == REQ_OP_WRITE); |
---|
7887 | 7642 | |
---|
7888 | | - ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DIO_REPAIR); |
---|
| 7643 | + ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA); |
---|
7889 | 7644 | if (ret) |
---|
7890 | 7645 | return ret; |
---|
7891 | 7646 | |
---|
7892 | | - ret = btrfs_map_bio(fs_info, bio, mirror_num, 0); |
---|
7893 | | - |
---|
| 7647 | + refcount_inc(&dip->refs); |
---|
| 7648 | + ret = btrfs_map_bio(fs_info, bio, mirror_num); |
---|
| 7649 | + if (ret) |
---|
| 7650 | + refcount_dec(&dip->refs); |
---|
7894 | 7651 | return ret; |
---|
7895 | 7652 | } |
---|
7896 | 7653 | |
---|
7897 | | -static int btrfs_check_dio_repairable(struct inode *inode, |
---|
7898 | | - struct bio *failed_bio, |
---|
7899 | | - struct io_failure_record *failrec, |
---|
7900 | | - int failed_mirror) |
---|
| 7654 | +static blk_status_t btrfs_check_read_dio_bio(struct inode *inode, |
---|
| 7655 | + struct btrfs_io_bio *io_bio, |
---|
| 7656 | + const bool uptodate) |
---|
7901 | 7657 | { |
---|
7902 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
7903 | | - int num_copies; |
---|
7904 | | - |
---|
7905 | | - num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len); |
---|
7906 | | - if (num_copies == 1) { |
---|
7907 | | - /* |
---|
7908 | | - * we only have a single copy of the data, so don't bother with |
---|
7909 | | - * all the retry and error correction code that follows. no |
---|
7910 | | - * matter what the error is, it is very likely to persist. |
---|
7911 | | - */ |
---|
7912 | | - btrfs_debug(fs_info, |
---|
7913 | | - "Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d", |
---|
7914 | | - num_copies, failrec->this_mirror, failed_mirror); |
---|
7915 | | - return 0; |
---|
7916 | | - } |
---|
7917 | | - |
---|
7918 | | - failrec->failed_mirror = failed_mirror; |
---|
7919 | | - failrec->this_mirror++; |
---|
7920 | | - if (failrec->this_mirror == failed_mirror) |
---|
7921 | | - failrec->this_mirror++; |
---|
7922 | | - |
---|
7923 | | - if (failrec->this_mirror > num_copies) { |
---|
7924 | | - btrfs_debug(fs_info, |
---|
7925 | | - "Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d", |
---|
7926 | | - num_copies, failrec->this_mirror, failed_mirror); |
---|
7927 | | - return 0; |
---|
7928 | | - } |
---|
7929 | | - |
---|
7930 | | - return 1; |
---|
7931 | | -} |
---|
7932 | | - |
---|
7933 | | -static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio, |
---|
7934 | | - struct page *page, unsigned int pgoff, |
---|
7935 | | - u64 start, u64 end, int failed_mirror, |
---|
7936 | | - bio_end_io_t *repair_endio, void *repair_arg) |
---|
7937 | | -{ |
---|
7938 | | - struct io_failure_record *failrec; |
---|
7939 | | - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
---|
| 7658 | + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; |
---|
| 7659 | + const u32 sectorsize = fs_info->sectorsize; |
---|
7940 | 7660 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; |
---|
7941 | | - struct bio *bio; |
---|
7942 | | - int isector; |
---|
7943 | | - unsigned int read_mode = 0; |
---|
7944 | | - int segs; |
---|
7945 | | - int ret; |
---|
7946 | | - blk_status_t status; |
---|
7947 | | - struct bio_vec bvec; |
---|
7948 | | - |
---|
7949 | | - BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); |
---|
7950 | | - |
---|
7951 | | - ret = btrfs_get_io_failure_record(inode, start, end, &failrec); |
---|
7952 | | - if (ret) |
---|
7953 | | - return errno_to_blk_status(ret); |
---|
7954 | | - |
---|
7955 | | - ret = btrfs_check_dio_repairable(inode, failed_bio, failrec, |
---|
7956 | | - failed_mirror); |
---|
7957 | | - if (!ret) { |
---|
7958 | | - free_io_failure(failure_tree, io_tree, failrec); |
---|
7959 | | - return BLK_STS_IOERR; |
---|
7960 | | - } |
---|
7961 | | - |
---|
7962 | | - segs = bio_segments(failed_bio); |
---|
7963 | | - bio_get_first_bvec(failed_bio, &bvec); |
---|
7964 | | - if (segs > 1 || |
---|
7965 | | - (bvec.bv_len > btrfs_inode_sectorsize(inode))) |
---|
7966 | | - read_mode |= REQ_FAILFAST_DEV; |
---|
7967 | | - |
---|
7968 | | - isector = start - btrfs_io_bio(failed_bio)->logical; |
---|
7969 | | - isector >>= inode->i_sb->s_blocksize_bits; |
---|
7970 | | - bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, |
---|
7971 | | - pgoff, isector, repair_endio, repair_arg); |
---|
7972 | | - bio->bi_opf = REQ_OP_READ | read_mode; |
---|
7973 | | - |
---|
7974 | | - btrfs_debug(BTRFS_I(inode)->root->fs_info, |
---|
7975 | | - "repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d", |
---|
7976 | | - read_mode, failrec->this_mirror, failrec->in_validation); |
---|
7977 | | - |
---|
7978 | | - status = submit_dio_repair_bio(inode, bio, failrec->this_mirror); |
---|
7979 | | - if (status) { |
---|
7980 | | - free_io_failure(failure_tree, io_tree, failrec); |
---|
7981 | | - bio_put(bio); |
---|
7982 | | - } |
---|
7983 | | - |
---|
7984 | | - return status; |
---|
7985 | | -} |
---|
7986 | | - |
---|
7987 | | -struct btrfs_retry_complete { |
---|
7988 | | - struct completion done; |
---|
7989 | | - struct inode *inode; |
---|
7990 | | - u64 start; |
---|
7991 | | - int uptodate; |
---|
7992 | | -}; |
---|
7993 | | - |
---|
7994 | | -static void btrfs_retry_endio_nocsum(struct bio *bio) |
---|
7995 | | -{ |
---|
7996 | | - struct btrfs_retry_complete *done = bio->bi_private; |
---|
7997 | | - struct inode *inode = done->inode; |
---|
7998 | | - struct bio_vec *bvec; |
---|
7999 | | - struct extent_io_tree *io_tree, *failure_tree; |
---|
8000 | | - int i; |
---|
8001 | | - |
---|
8002 | | - if (bio->bi_status) |
---|
8003 | | - goto end; |
---|
8004 | | - |
---|
8005 | | - ASSERT(bio->bi_vcnt == 1); |
---|
8006 | | - io_tree = &BTRFS_I(inode)->io_tree; |
---|
8007 | | - failure_tree = &BTRFS_I(inode)->io_failure_tree; |
---|
8008 | | - ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(inode)); |
---|
8009 | | - |
---|
8010 | | - done->uptodate = 1; |
---|
8011 | | - ASSERT(!bio_flagged(bio, BIO_CLONED)); |
---|
8012 | | - bio_for_each_segment_all(bvec, bio, i) |
---|
8013 | | - clean_io_failure(BTRFS_I(inode)->root->fs_info, failure_tree, |
---|
8014 | | - io_tree, done->start, bvec->bv_page, |
---|
8015 | | - btrfs_ino(BTRFS_I(inode)), 0); |
---|
8016 | | -end: |
---|
8017 | | - complete(&done->done); |
---|
8018 | | - bio_put(bio); |
---|
8019 | | -} |
---|
8020 | | - |
---|
8021 | | -static blk_status_t __btrfs_correct_data_nocsum(struct inode *inode, |
---|
8022 | | - struct btrfs_io_bio *io_bio) |
---|
8023 | | -{ |
---|
8024 | | - struct btrfs_fs_info *fs_info; |
---|
| 7661 | + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
---|
| 7662 | + const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM); |
---|
8025 | 7663 | struct bio_vec bvec; |
---|
8026 | 7664 | struct bvec_iter iter; |
---|
8027 | | - struct btrfs_retry_complete done; |
---|
8028 | | - u64 start; |
---|
8029 | | - unsigned int pgoff; |
---|
8030 | | - u32 sectorsize; |
---|
8031 | | - int nr_sectors; |
---|
8032 | | - blk_status_t ret; |
---|
| 7665 | + u64 start = io_bio->logical; |
---|
| 7666 | + int icsum = 0; |
---|
8033 | 7667 | blk_status_t err = BLK_STS_OK; |
---|
8034 | 7668 | |
---|
8035 | | - fs_info = BTRFS_I(inode)->root->fs_info; |
---|
8036 | | - sectorsize = fs_info->sectorsize; |
---|
| 7669 | + __bio_for_each_segment(bvec, &io_bio->bio, iter, io_bio->iter) { |
---|
| 7670 | + unsigned int i, nr_sectors, pgoff; |
---|
8037 | 7671 | |
---|
8038 | | - start = io_bio->logical; |
---|
8039 | | - done.inode = inode; |
---|
8040 | | - io_bio->bio.bi_iter = io_bio->iter; |
---|
8041 | | - |
---|
8042 | | - bio_for_each_segment(bvec, &io_bio->bio, iter) { |
---|
8043 | 7672 | nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len); |
---|
8044 | 7673 | pgoff = bvec.bv_offset; |
---|
8045 | | - |
---|
8046 | | -next_block_or_try_again: |
---|
8047 | | - done.uptodate = 0; |
---|
8048 | | - done.start = start; |
---|
8049 | | - init_completion(&done.done); |
---|
8050 | | - |
---|
8051 | | - ret = dio_read_error(inode, &io_bio->bio, bvec.bv_page, |
---|
8052 | | - pgoff, start, start + sectorsize - 1, |
---|
8053 | | - io_bio->mirror_num, |
---|
8054 | | - btrfs_retry_endio_nocsum, &done); |
---|
8055 | | - if (ret) { |
---|
8056 | | - err = ret; |
---|
8057 | | - goto next; |
---|
8058 | | - } |
---|
8059 | | - |
---|
8060 | | - wait_for_completion_io(&done.done); |
---|
8061 | | - |
---|
8062 | | - if (!done.uptodate) { |
---|
8063 | | - /* We might have another mirror, so try again */ |
---|
8064 | | - goto next_block_or_try_again; |
---|
8065 | | - } |
---|
8066 | | - |
---|
8067 | | -next: |
---|
8068 | | - start += sectorsize; |
---|
8069 | | - |
---|
8070 | | - nr_sectors--; |
---|
8071 | | - if (nr_sectors) { |
---|
8072 | | - pgoff += sectorsize; |
---|
| 7674 | + for (i = 0; i < nr_sectors; i++) { |
---|
8073 | 7675 | ASSERT(pgoff < PAGE_SIZE); |
---|
8074 | | - goto next_block_or_try_again; |
---|
| 7676 | + if (uptodate && |
---|
| 7677 | + (!csum || !check_data_csum(inode, io_bio, icsum, |
---|
| 7678 | + bvec.bv_page, pgoff, |
---|
| 7679 | + start, sectorsize))) { |
---|
| 7680 | + clean_io_failure(fs_info, failure_tree, io_tree, |
---|
| 7681 | + start, bvec.bv_page, |
---|
| 7682 | + btrfs_ino(BTRFS_I(inode)), |
---|
| 7683 | + pgoff); |
---|
| 7684 | + } else { |
---|
| 7685 | + blk_status_t status; |
---|
| 7686 | + |
---|
| 7687 | + status = btrfs_submit_read_repair(inode, |
---|
| 7688 | + &io_bio->bio, |
---|
| 7689 | + start - io_bio->logical, |
---|
| 7690 | + bvec.bv_page, pgoff, |
---|
| 7691 | + start, |
---|
| 7692 | + start + sectorsize - 1, |
---|
| 7693 | + io_bio->mirror_num, |
---|
| 7694 | + submit_dio_repair_bio); |
---|
| 7695 | + if (status) |
---|
| 7696 | + err = status; |
---|
| 7697 | + } |
---|
| 7698 | + start += sectorsize; |
---|
| 7699 | + icsum++; |
---|
| 7700 | + pgoff += sectorsize; |
---|
8075 | 7701 | } |
---|
8076 | 7702 | } |
---|
8077 | | - |
---|
8078 | 7703 | return err; |
---|
8079 | 7704 | } |
---|
8080 | 7705 | |
---|
8081 | | -static void btrfs_retry_endio(struct bio *bio) |
---|
8082 | | -{ |
---|
8083 | | - struct btrfs_retry_complete *done = bio->bi_private; |
---|
8084 | | - struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); |
---|
8085 | | - struct extent_io_tree *io_tree, *failure_tree; |
---|
8086 | | - struct inode *inode = done->inode; |
---|
8087 | | - struct bio_vec *bvec; |
---|
8088 | | - int uptodate; |
---|
8089 | | - int ret; |
---|
8090 | | - int i; |
---|
8091 | | - |
---|
8092 | | - if (bio->bi_status) |
---|
8093 | | - goto end; |
---|
8094 | | - |
---|
8095 | | - uptodate = 1; |
---|
8096 | | - |
---|
8097 | | - ASSERT(bio->bi_vcnt == 1); |
---|
8098 | | - ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(done->inode)); |
---|
8099 | | - |
---|
8100 | | - io_tree = &BTRFS_I(inode)->io_tree; |
---|
8101 | | - failure_tree = &BTRFS_I(inode)->io_failure_tree; |
---|
8102 | | - |
---|
8103 | | - ASSERT(!bio_flagged(bio, BIO_CLONED)); |
---|
8104 | | - bio_for_each_segment_all(bvec, bio, i) { |
---|
8105 | | - ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page, |
---|
8106 | | - bvec->bv_offset, done->start, |
---|
8107 | | - bvec->bv_len); |
---|
8108 | | - if (!ret) |
---|
8109 | | - clean_io_failure(BTRFS_I(inode)->root->fs_info, |
---|
8110 | | - failure_tree, io_tree, done->start, |
---|
8111 | | - bvec->bv_page, |
---|
8112 | | - btrfs_ino(BTRFS_I(inode)), |
---|
8113 | | - bvec->bv_offset); |
---|
8114 | | - else |
---|
8115 | | - uptodate = 0; |
---|
8116 | | - } |
---|
8117 | | - |
---|
8118 | | - done->uptodate = uptodate; |
---|
8119 | | -end: |
---|
8120 | | - complete(&done->done); |
---|
8121 | | - bio_put(bio); |
---|
8122 | | -} |
---|
8123 | | - |
---|
8124 | | -static blk_status_t __btrfs_subio_endio_read(struct inode *inode, |
---|
8125 | | - struct btrfs_io_bio *io_bio, blk_status_t err) |
---|
8126 | | -{ |
---|
8127 | | - struct btrfs_fs_info *fs_info; |
---|
8128 | | - struct bio_vec bvec; |
---|
8129 | | - struct bvec_iter iter; |
---|
8130 | | - struct btrfs_retry_complete done; |
---|
8131 | | - u64 start; |
---|
8132 | | - u64 offset = 0; |
---|
8133 | | - u32 sectorsize; |
---|
8134 | | - int nr_sectors; |
---|
8135 | | - unsigned int pgoff; |
---|
8136 | | - int csum_pos; |
---|
8137 | | - bool uptodate = (err == 0); |
---|
8138 | | - int ret; |
---|
8139 | | - blk_status_t status; |
---|
8140 | | - |
---|
8141 | | - fs_info = BTRFS_I(inode)->root->fs_info; |
---|
8142 | | - sectorsize = fs_info->sectorsize; |
---|
8143 | | - |
---|
8144 | | - err = BLK_STS_OK; |
---|
8145 | | - start = io_bio->logical; |
---|
8146 | | - done.inode = inode; |
---|
8147 | | - io_bio->bio.bi_iter = io_bio->iter; |
---|
8148 | | - |
---|
8149 | | - bio_for_each_segment(bvec, &io_bio->bio, iter) { |
---|
8150 | | - nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len); |
---|
8151 | | - |
---|
8152 | | - pgoff = bvec.bv_offset; |
---|
8153 | | -next_block: |
---|
8154 | | - if (uptodate) { |
---|
8155 | | - csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset); |
---|
8156 | | - ret = __readpage_endio_check(inode, io_bio, csum_pos, |
---|
8157 | | - bvec.bv_page, pgoff, start, sectorsize); |
---|
8158 | | - if (likely(!ret)) |
---|
8159 | | - goto next; |
---|
8160 | | - } |
---|
8161 | | -try_again: |
---|
8162 | | - done.uptodate = 0; |
---|
8163 | | - done.start = start; |
---|
8164 | | - init_completion(&done.done); |
---|
8165 | | - |
---|
8166 | | - status = dio_read_error(inode, &io_bio->bio, bvec.bv_page, |
---|
8167 | | - pgoff, start, start + sectorsize - 1, |
---|
8168 | | - io_bio->mirror_num, btrfs_retry_endio, |
---|
8169 | | - &done); |
---|
8170 | | - if (status) { |
---|
8171 | | - err = status; |
---|
8172 | | - goto next; |
---|
8173 | | - } |
---|
8174 | | - |
---|
8175 | | - wait_for_completion_io(&done.done); |
---|
8176 | | - |
---|
8177 | | - if (!done.uptodate) { |
---|
8178 | | - /* We might have another mirror, so try again */ |
---|
8179 | | - goto try_again; |
---|
8180 | | - } |
---|
8181 | | -next: |
---|
8182 | | - offset += sectorsize; |
---|
8183 | | - start += sectorsize; |
---|
8184 | | - |
---|
8185 | | - ASSERT(nr_sectors); |
---|
8186 | | - |
---|
8187 | | - nr_sectors--; |
---|
8188 | | - if (nr_sectors) { |
---|
8189 | | - pgoff += sectorsize; |
---|
8190 | | - ASSERT(pgoff < PAGE_SIZE); |
---|
8191 | | - goto next_block; |
---|
8192 | | - } |
---|
8193 | | - } |
---|
8194 | | - |
---|
8195 | | - return err; |
---|
8196 | | -} |
---|
8197 | | - |
---|
8198 | | -static blk_status_t btrfs_subio_endio_read(struct inode *inode, |
---|
8199 | | - struct btrfs_io_bio *io_bio, blk_status_t err) |
---|
8200 | | -{ |
---|
8201 | | - bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
---|
8202 | | - |
---|
8203 | | - if (skip_csum) { |
---|
8204 | | - if (unlikely(err)) |
---|
8205 | | - return __btrfs_correct_data_nocsum(inode, io_bio); |
---|
8206 | | - else |
---|
8207 | | - return BLK_STS_OK; |
---|
8208 | | - } else { |
---|
8209 | | - return __btrfs_subio_endio_read(inode, io_bio, err); |
---|
8210 | | - } |
---|
8211 | | -} |
---|
8212 | | - |
---|
8213 | | -static void btrfs_endio_direct_read(struct bio *bio) |
---|
8214 | | -{ |
---|
8215 | | - struct btrfs_dio_private *dip = bio->bi_private; |
---|
8216 | | - struct inode *inode = dip->inode; |
---|
8217 | | - struct bio *dio_bio; |
---|
8218 | | - struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); |
---|
8219 | | - blk_status_t err = bio->bi_status; |
---|
8220 | | - |
---|
8221 | | - if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) |
---|
8222 | | - err = btrfs_subio_endio_read(inode, io_bio, err); |
---|
8223 | | - |
---|
8224 | | - unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, |
---|
8225 | | - dip->logical_offset + dip->bytes - 1); |
---|
8226 | | - dio_bio = dip->dio_bio; |
---|
8227 | | - |
---|
8228 | | - kfree(dip); |
---|
8229 | | - |
---|
8230 | | - dio_bio->bi_status = err; |
---|
8231 | | - dio_end_io(dio_bio); |
---|
8232 | | - |
---|
8233 | | - if (io_bio->end_io) |
---|
8234 | | - io_bio->end_io(io_bio, blk_status_to_errno(err)); |
---|
8235 | | - bio_put(bio); |
---|
8236 | | -} |
---|
8237 | | - |
---|
8238 | | -static void __endio_write_update_ordered(struct inode *inode, |
---|
| 7706 | +static void __endio_write_update_ordered(struct btrfs_inode *inode, |
---|
8239 | 7707 | const u64 offset, const u64 bytes, |
---|
8240 | 7708 | const bool uptodate) |
---|
8241 | 7709 | { |
---|
8242 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
| 7710 | + struct btrfs_fs_info *fs_info = inode->root->fs_info; |
---|
8243 | 7711 | struct btrfs_ordered_extent *ordered = NULL; |
---|
8244 | 7712 | struct btrfs_workqueue *wq; |
---|
8245 | | - btrfs_work_func_t func; |
---|
8246 | 7713 | u64 ordered_offset = offset; |
---|
8247 | 7714 | u64 ordered_bytes = bytes; |
---|
8248 | 7715 | u64 last_offset; |
---|
8249 | 7716 | |
---|
8250 | | - if (btrfs_is_free_space_inode(BTRFS_I(inode))) { |
---|
| 7717 | + if (btrfs_is_free_space_inode(inode)) |
---|
8251 | 7718 | wq = fs_info->endio_freespace_worker; |
---|
8252 | | - func = btrfs_freespace_write_helper; |
---|
8253 | | - } else { |
---|
| 7719 | + else |
---|
8254 | 7720 | wq = fs_info->endio_write_workers; |
---|
8255 | | - func = btrfs_endio_write_helper; |
---|
8256 | | - } |
---|
8257 | 7721 | |
---|
8258 | 7722 | while (ordered_offset < offset + bytes) { |
---|
8259 | 7723 | last_offset = ordered_offset; |
---|
8260 | 7724 | if (btrfs_dec_test_first_ordered_pending(inode, &ordered, |
---|
8261 | | - &ordered_offset, |
---|
8262 | | - ordered_bytes, |
---|
8263 | | - uptodate)) { |
---|
8264 | | - btrfs_init_work(&ordered->work, func, |
---|
8265 | | - finish_ordered_fn, |
---|
8266 | | - NULL, NULL); |
---|
| 7725 | + &ordered_offset, |
---|
| 7726 | + ordered_bytes, |
---|
| 7727 | + uptodate)) { |
---|
| 7728 | + btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, |
---|
| 7729 | + NULL); |
---|
8267 | 7730 | btrfs_queue_work(wq, &ordered->work); |
---|
8268 | 7731 | } |
---|
8269 | 7732 | /* |
---|
.. | .. |
---|
8274 | 7737 | return; |
---|
8275 | 7738 | /* |
---|
8276 | 7739 | * Our bio might span multiple ordered extents. In this case |
---|
8277 | | - * we keep goin until we have accounted the whole dio. |
---|
| 7740 | + * we keep going until we have accounted the whole dio. |
---|
8278 | 7741 | */ |
---|
8279 | 7742 | if (ordered_offset < offset + bytes) { |
---|
8280 | 7743 | ordered_bytes = offset + bytes - ordered_offset; |
---|
.. | .. |
---|
8283 | 7746 | } |
---|
8284 | 7747 | } |
---|
8285 | 7748 | |
---|
8286 | | -static void btrfs_endio_direct_write(struct bio *bio) |
---|
8287 | | -{ |
---|
8288 | | - struct btrfs_dio_private *dip = bio->bi_private; |
---|
8289 | | - struct bio *dio_bio = dip->dio_bio; |
---|
8290 | | - |
---|
8291 | | - __endio_write_update_ordered(dip->inode, dip->logical_offset, |
---|
8292 | | - dip->bytes, !bio->bi_status); |
---|
8293 | | - |
---|
8294 | | - kfree(dip); |
---|
8295 | | - |
---|
8296 | | - dio_bio->bi_status = bio->bi_status; |
---|
8297 | | - dio_end_io(dio_bio); |
---|
8298 | | - bio_put(bio); |
---|
8299 | | -} |
---|
8300 | | - |
---|
8301 | 7749 | static blk_status_t btrfs_submit_bio_start_direct_io(void *private_data, |
---|
8302 | 7750 | struct bio *bio, u64 offset) |
---|
8303 | 7751 | { |
---|
8304 | 7752 | struct inode *inode = private_data; |
---|
8305 | | - blk_status_t ret; |
---|
8306 | | - ret = btrfs_csum_one_bio(inode, bio, offset, 1); |
---|
8307 | | - BUG_ON(ret); /* -ENOMEM */ |
---|
8308 | | - return 0; |
---|
| 7753 | + |
---|
| 7754 | + return btrfs_csum_one_bio(BTRFS_I(inode), bio, offset, 1); |
---|
8309 | 7755 | } |
---|
8310 | 7756 | |
---|
8311 | 7757 | static void btrfs_end_dio_bio(struct bio *bio) |
---|
.. | .. |
---|
8321 | 7767 | (unsigned long long)bio->bi_iter.bi_sector, |
---|
8322 | 7768 | bio->bi_iter.bi_size, err); |
---|
8323 | 7769 | |
---|
8324 | | - if (dip->subio_endio) |
---|
8325 | | - err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err); |
---|
8326 | | - |
---|
8327 | | - if (err) { |
---|
8328 | | - /* |
---|
8329 | | - * We want to perceive the errors flag being set before |
---|
8330 | | - * decrementing the reference count. We don't need a barrier |
---|
8331 | | - * since atomic operations with a return value are fully |
---|
8332 | | - * ordered as per atomic_t.txt |
---|
8333 | | - */ |
---|
8334 | | - dip->errors = 1; |
---|
| 7770 | + if (bio_op(bio) == REQ_OP_READ) { |
---|
| 7771 | + err = btrfs_check_read_dio_bio(dip->inode, btrfs_io_bio(bio), |
---|
| 7772 | + !err); |
---|
8335 | 7773 | } |
---|
8336 | 7774 | |
---|
8337 | | - /* if there are more bios still pending for this dio, just exit */ |
---|
8338 | | - if (!atomic_dec_and_test(&dip->pending_bios)) |
---|
8339 | | - goto out; |
---|
| 7775 | + if (err) |
---|
| 7776 | + dip->dio_bio->bi_status = err; |
---|
8340 | 7777 | |
---|
8341 | | - if (dip->errors) { |
---|
8342 | | - bio_io_error(dip->orig_bio); |
---|
8343 | | - } else { |
---|
8344 | | - dip->dio_bio->bi_status = BLK_STS_OK; |
---|
8345 | | - bio_endio(dip->orig_bio); |
---|
8346 | | - } |
---|
8347 | | -out: |
---|
8348 | 7778 | bio_put(bio); |
---|
8349 | | -} |
---|
8350 | | - |
---|
8351 | | -static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode, |
---|
8352 | | - struct btrfs_dio_private *dip, |
---|
8353 | | - struct bio *bio, |
---|
8354 | | - u64 file_offset) |
---|
8355 | | -{ |
---|
8356 | | - struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); |
---|
8357 | | - struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio); |
---|
8358 | | - blk_status_t ret; |
---|
8359 | | - |
---|
8360 | | - /* |
---|
8361 | | - * We load all the csum data we need when we submit |
---|
8362 | | - * the first bio to reduce the csum tree search and |
---|
8363 | | - * contention. |
---|
8364 | | - */ |
---|
8365 | | - if (dip->logical_offset == file_offset) { |
---|
8366 | | - ret = btrfs_lookup_bio_sums_dio(inode, dip->orig_bio, |
---|
8367 | | - file_offset); |
---|
8368 | | - if (ret) |
---|
8369 | | - return ret; |
---|
8370 | | - } |
---|
8371 | | - |
---|
8372 | | - if (bio == dip->orig_bio) |
---|
8373 | | - return 0; |
---|
8374 | | - |
---|
8375 | | - file_offset -= dip->logical_offset; |
---|
8376 | | - file_offset >>= inode->i_sb->s_blocksize_bits; |
---|
8377 | | - io_bio->csum = (u8 *)(((u32 *)orig_io_bio->csum) + file_offset); |
---|
8378 | | - |
---|
8379 | | - return 0; |
---|
| 7779 | + btrfs_dio_private_put(dip); |
---|
8380 | 7780 | } |
---|
8381 | 7781 | |
---|
8382 | 7782 | static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio, |
---|
.. | .. |
---|
8410 | 7810 | * If we aren't doing async submit, calculate the csum of the |
---|
8411 | 7811 | * bio now. |
---|
8412 | 7812 | */ |
---|
8413 | | - ret = btrfs_csum_one_bio(inode, bio, file_offset, 1); |
---|
| 7813 | + ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, 1); |
---|
8414 | 7814 | if (ret) |
---|
8415 | 7815 | goto err; |
---|
8416 | 7816 | } else { |
---|
8417 | | - ret = btrfs_lookup_and_bind_dio_csum(inode, dip, bio, |
---|
8418 | | - file_offset); |
---|
8419 | | - if (ret) |
---|
8420 | | - goto err; |
---|
| 7817 | + u64 csum_offset; |
---|
| 7818 | + |
---|
| 7819 | + csum_offset = file_offset - dip->logical_offset; |
---|
| 7820 | + csum_offset >>= inode->i_sb->s_blocksize_bits; |
---|
| 7821 | + csum_offset *= btrfs_super_csum_size(fs_info->super_copy); |
---|
| 7822 | + btrfs_io_bio(bio)->csum = dip->csums + csum_offset; |
---|
8421 | 7823 | } |
---|
8422 | 7824 | map: |
---|
8423 | | - ret = btrfs_map_bio(fs_info, bio, 0, 0); |
---|
| 7825 | + ret = btrfs_map_bio(fs_info, bio, 0); |
---|
8424 | 7826 | err: |
---|
8425 | 7827 | return ret; |
---|
8426 | 7828 | } |
---|
8427 | 7829 | |
---|
8428 | | -static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip) |
---|
| 7830 | +/* |
---|
| 7831 | + * If this succeeds, the btrfs_dio_private is responsible for cleaning up locked |
---|
| 7832 | + * or ordered extents whether or not we submit any bios. |
---|
| 7833 | + */ |
---|
| 7834 | +static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio, |
---|
| 7835 | + struct inode *inode, |
---|
| 7836 | + loff_t file_offset) |
---|
8429 | 7837 | { |
---|
8430 | | - struct inode *inode = dip->inode; |
---|
| 7838 | + const bool write = (bio_op(dio_bio) == REQ_OP_WRITE); |
---|
| 7839 | + const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM); |
---|
| 7840 | + size_t dip_size; |
---|
| 7841 | + struct btrfs_dio_private *dip; |
---|
| 7842 | + |
---|
| 7843 | + dip_size = sizeof(*dip); |
---|
| 7844 | + if (!write && csum) { |
---|
| 7845 | + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
| 7846 | + const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); |
---|
| 7847 | + size_t nblocks; |
---|
| 7848 | + |
---|
| 7849 | + nblocks = dio_bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits; |
---|
| 7850 | + dip_size += csum_size * nblocks; |
---|
| 7851 | + } |
---|
| 7852 | + |
---|
| 7853 | + dip = kzalloc(dip_size, GFP_NOFS); |
---|
| 7854 | + if (!dip) |
---|
| 7855 | + return NULL; |
---|
| 7856 | + |
---|
| 7857 | + dip->inode = inode; |
---|
| 7858 | + dip->logical_offset = file_offset; |
---|
| 7859 | + dip->bytes = dio_bio->bi_iter.bi_size; |
---|
| 7860 | + dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9; |
---|
| 7861 | + dip->dio_bio = dio_bio; |
---|
| 7862 | + refcount_set(&dip->refs, 1); |
---|
| 7863 | + return dip; |
---|
| 7864 | +} |
---|
| 7865 | + |
---|
| 7866 | +static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap, |
---|
| 7867 | + struct bio *dio_bio, loff_t file_offset) |
---|
| 7868 | +{ |
---|
| 7869 | + const bool write = (bio_op(dio_bio) == REQ_OP_WRITE); |
---|
| 7870 | + const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM); |
---|
8431 | 7871 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
| 7872 | + const bool raid56 = (btrfs_data_alloc_profile(fs_info) & |
---|
| 7873 | + BTRFS_BLOCK_GROUP_RAID56_MASK); |
---|
| 7874 | + struct btrfs_dio_private *dip; |
---|
8432 | 7875 | struct bio *bio; |
---|
8433 | | - struct bio *orig_bio = dip->orig_bio; |
---|
8434 | | - u64 start_sector = orig_bio->bi_iter.bi_sector; |
---|
8435 | | - u64 file_offset = dip->logical_offset; |
---|
8436 | | - u64 map_length; |
---|
| 7876 | + u64 start_sector; |
---|
8437 | 7877 | int async_submit = 0; |
---|
8438 | 7878 | u64 submit_len; |
---|
8439 | 7879 | int clone_offset = 0; |
---|
8440 | 7880 | int clone_len; |
---|
8441 | 7881 | int ret; |
---|
8442 | 7882 | blk_status_t status; |
---|
| 7883 | + struct btrfs_io_geometry geom; |
---|
| 7884 | + struct btrfs_dio_data *dio_data = iomap->private; |
---|
8443 | 7885 | |
---|
8444 | | - map_length = orig_bio->bi_iter.bi_size; |
---|
8445 | | - submit_len = map_length; |
---|
8446 | | - ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), start_sector << 9, |
---|
8447 | | - &map_length, NULL, 0); |
---|
8448 | | - if (ret) |
---|
8449 | | - return -EIO; |
---|
8450 | | - |
---|
8451 | | - if (map_length >= submit_len) { |
---|
8452 | | - bio = orig_bio; |
---|
8453 | | - dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED; |
---|
8454 | | - goto submit; |
---|
| 7886 | + dip = btrfs_create_dio_private(dio_bio, inode, file_offset); |
---|
| 7887 | + if (!dip) { |
---|
| 7888 | + if (!write) { |
---|
| 7889 | + unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, |
---|
| 7890 | + file_offset + dio_bio->bi_iter.bi_size - 1); |
---|
| 7891 | + } |
---|
| 7892 | + dio_bio->bi_status = BLK_STS_RESOURCE; |
---|
| 7893 | + bio_endio(dio_bio); |
---|
| 7894 | + return BLK_QC_T_NONE; |
---|
8455 | 7895 | } |
---|
8456 | 7896 | |
---|
8457 | | - /* async crcs make it difficult to collect full stripe writes. */ |
---|
8458 | | - if (btrfs_data_alloc_profile(fs_info) & BTRFS_BLOCK_GROUP_RAID56_MASK) |
---|
8459 | | - async_submit = 0; |
---|
8460 | | - else |
---|
8461 | | - async_submit = 1; |
---|
| 7897 | + if (!write && csum) { |
---|
| 7898 | + /* |
---|
| 7899 | + * Load the csums up front to reduce csum tree searches and |
---|
| 7900 | + * contention when submitting bios. |
---|
| 7901 | + */ |
---|
| 7902 | + status = btrfs_lookup_bio_sums(inode, dio_bio, file_offset, |
---|
| 7903 | + dip->csums); |
---|
| 7904 | + if (status != BLK_STS_OK) |
---|
| 7905 | + goto out_err; |
---|
| 7906 | + } |
---|
8462 | 7907 | |
---|
8463 | | - /* bio split */ |
---|
8464 | | - ASSERT(map_length <= INT_MAX); |
---|
| 7908 | + start_sector = dio_bio->bi_iter.bi_sector; |
---|
| 7909 | + submit_len = dio_bio->bi_iter.bi_size; |
---|
| 7910 | + |
---|
8465 | 7911 | do { |
---|
8466 | | - clone_len = min_t(int, submit_len, map_length); |
---|
| 7912 | + ret = btrfs_get_io_geometry(fs_info, btrfs_op(dio_bio), |
---|
| 7913 | + start_sector << 9, submit_len, |
---|
| 7914 | + &geom); |
---|
| 7915 | + if (ret) { |
---|
| 7916 | + status = errno_to_blk_status(ret); |
---|
| 7917 | + goto out_err; |
---|
| 7918 | + } |
---|
| 7919 | + ASSERT(geom.len <= INT_MAX); |
---|
| 7920 | + |
---|
| 7921 | + clone_len = min_t(int, submit_len, geom.len); |
---|
8467 | 7922 | |
---|
8468 | 7923 | /* |
---|
8469 | 7924 | * This will never fail as it's passing GPF_NOFS and |
---|
8470 | 7925 | * the allocation is backed by btrfs_bioset. |
---|
8471 | 7926 | */ |
---|
8472 | | - bio = btrfs_bio_clone_partial(orig_bio, clone_offset, |
---|
8473 | | - clone_len); |
---|
| 7927 | + bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len); |
---|
8474 | 7928 | bio->bi_private = dip; |
---|
8475 | 7929 | bio->bi_end_io = btrfs_end_dio_bio; |
---|
8476 | 7930 | btrfs_io_bio(bio)->logical = file_offset; |
---|
8477 | 7931 | |
---|
8478 | 7932 | ASSERT(submit_len >= clone_len); |
---|
8479 | 7933 | submit_len -= clone_len; |
---|
8480 | | - if (submit_len == 0) |
---|
8481 | | - break; |
---|
8482 | 7934 | |
---|
8483 | 7935 | /* |
---|
8484 | 7936 | * Increase the count before we submit the bio so we know |
---|
8485 | 7937 | * the end IO handler won't happen before we increase the |
---|
8486 | 7938 | * count. Otherwise, the dip might get freed before we're |
---|
8487 | 7939 | * done setting it up. |
---|
| 7940 | + * |
---|
| 7941 | + * We transfer the initial reference to the last bio, so we |
---|
| 7942 | + * don't need to increment the reference count for the last one. |
---|
8488 | 7943 | */ |
---|
8489 | | - atomic_inc(&dip->pending_bios); |
---|
| 7944 | + if (submit_len > 0) { |
---|
| 7945 | + refcount_inc(&dip->refs); |
---|
| 7946 | + /* |
---|
| 7947 | + * If we are submitting more than one bio, submit them |
---|
| 7948 | + * all asynchronously. The exception is RAID 5 or 6, as |
---|
| 7949 | + * asynchronous checksums make it difficult to collect |
---|
| 7950 | + * full stripe writes. |
---|
| 7951 | + */ |
---|
| 7952 | + if (!raid56) |
---|
| 7953 | + async_submit = 1; |
---|
| 7954 | + } |
---|
8490 | 7955 | |
---|
8491 | 7956 | status = btrfs_submit_dio_bio(bio, inode, file_offset, |
---|
8492 | 7957 | async_submit); |
---|
8493 | 7958 | if (status) { |
---|
8494 | 7959 | bio_put(bio); |
---|
8495 | | - atomic_dec(&dip->pending_bios); |
---|
| 7960 | + if (submit_len > 0) |
---|
| 7961 | + refcount_dec(&dip->refs); |
---|
8496 | 7962 | goto out_err; |
---|
8497 | 7963 | } |
---|
8498 | 7964 | |
---|
| 7965 | + dio_data->submitted += clone_len; |
---|
8499 | 7966 | clone_offset += clone_len; |
---|
8500 | 7967 | start_sector += clone_len >> 9; |
---|
8501 | 7968 | file_offset += clone_len; |
---|
8502 | | - |
---|
8503 | | - map_length = submit_len; |
---|
8504 | | - ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), |
---|
8505 | | - start_sector << 9, &map_length, NULL, 0); |
---|
8506 | | - if (ret) |
---|
8507 | | - goto out_err; |
---|
8508 | 7969 | } while (submit_len > 0); |
---|
| 7970 | + return BLK_QC_T_NONE; |
---|
8509 | 7971 | |
---|
8510 | | -submit: |
---|
8511 | | - status = btrfs_submit_dio_bio(bio, inode, file_offset, async_submit); |
---|
8512 | | - if (!status) |
---|
8513 | | - return 0; |
---|
8514 | | - |
---|
8515 | | - if (bio != orig_bio) |
---|
8516 | | - bio_put(bio); |
---|
8517 | 7972 | out_err: |
---|
8518 | | - dip->errors = 1; |
---|
8519 | | - /* |
---|
8520 | | - * Before atomic variable goto zero, we must make sure dip->errors is |
---|
8521 | | - * perceived to be set. This ordering is ensured by the fact that an |
---|
8522 | | - * atomic operations with a return value are fully ordered as per |
---|
8523 | | - * atomic_t.txt |
---|
8524 | | - */ |
---|
8525 | | - if (atomic_dec_and_test(&dip->pending_bios)) |
---|
8526 | | - bio_io_error(dip->orig_bio); |
---|
8527 | | - |
---|
8528 | | - /* bio_end_io() will handle error, so we needn't return it */ |
---|
8529 | | - return 0; |
---|
8530 | | -} |
---|
8531 | | - |
---|
8532 | | -static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode, |
---|
8533 | | - loff_t file_offset) |
---|
8534 | | -{ |
---|
8535 | | - struct btrfs_dio_private *dip = NULL; |
---|
8536 | | - struct bio *bio = NULL; |
---|
8537 | | - struct btrfs_io_bio *io_bio; |
---|
8538 | | - bool write = (bio_op(dio_bio) == REQ_OP_WRITE); |
---|
8539 | | - int ret = 0; |
---|
8540 | | - |
---|
8541 | | - bio = btrfs_bio_clone(dio_bio); |
---|
8542 | | - |
---|
8543 | | - dip = kzalloc(sizeof(*dip), GFP_NOFS); |
---|
8544 | | - if (!dip) { |
---|
8545 | | - ret = -ENOMEM; |
---|
8546 | | - goto free_ordered; |
---|
8547 | | - } |
---|
8548 | | - |
---|
8549 | | - dip->private = dio_bio->bi_private; |
---|
8550 | | - dip->inode = inode; |
---|
8551 | | - dip->logical_offset = file_offset; |
---|
8552 | | - dip->bytes = dio_bio->bi_iter.bi_size; |
---|
8553 | | - dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9; |
---|
8554 | | - bio->bi_private = dip; |
---|
8555 | | - dip->orig_bio = bio; |
---|
8556 | | - dip->dio_bio = dio_bio; |
---|
8557 | | - atomic_set(&dip->pending_bios, 1); |
---|
8558 | | - io_bio = btrfs_io_bio(bio); |
---|
8559 | | - io_bio->logical = file_offset; |
---|
8560 | | - |
---|
8561 | | - if (write) { |
---|
8562 | | - bio->bi_end_io = btrfs_endio_direct_write; |
---|
8563 | | - } else { |
---|
8564 | | - bio->bi_end_io = btrfs_endio_direct_read; |
---|
8565 | | - dip->subio_endio = btrfs_subio_endio_read; |
---|
8566 | | - } |
---|
8567 | | - |
---|
8568 | | - /* |
---|
8569 | | - * Reset the range for unsubmitted ordered extents (to a 0 length range) |
---|
8570 | | - * even if we fail to submit a bio, because in such case we do the |
---|
8571 | | - * corresponding error handling below and it must not be done a second |
---|
8572 | | - * time by btrfs_direct_IO(). |
---|
8573 | | - */ |
---|
8574 | | - if (write) { |
---|
8575 | | - struct btrfs_dio_data *dio_data = current->journal_info; |
---|
8576 | | - |
---|
8577 | | - dio_data->unsubmitted_oe_range_end = dip->logical_offset + |
---|
8578 | | - dip->bytes; |
---|
8579 | | - dio_data->unsubmitted_oe_range_start = |
---|
8580 | | - dio_data->unsubmitted_oe_range_end; |
---|
8581 | | - } |
---|
8582 | | - |
---|
8583 | | - ret = btrfs_submit_direct_hook(dip); |
---|
8584 | | - if (!ret) |
---|
8585 | | - return; |
---|
8586 | | - |
---|
8587 | | - if (io_bio->end_io) |
---|
8588 | | - io_bio->end_io(io_bio, ret); |
---|
8589 | | - |
---|
8590 | | -free_ordered: |
---|
8591 | | - /* |
---|
8592 | | - * If we arrived here it means either we failed to submit the dip |
---|
8593 | | - * or we either failed to clone the dio_bio or failed to allocate the |
---|
8594 | | - * dip. If we cloned the dio_bio and allocated the dip, we can just |
---|
8595 | | - * call bio_endio against our io_bio so that we get proper resource |
---|
8596 | | - * cleanup if we fail to submit the dip, otherwise, we must do the |
---|
8597 | | - * same as btrfs_endio_direct_[write|read] because we can't call these |
---|
8598 | | - * callbacks - they require an allocated dip and a clone of dio_bio. |
---|
8599 | | - */ |
---|
8600 | | - if (bio && dip) { |
---|
8601 | | - bio_io_error(bio); |
---|
8602 | | - /* |
---|
8603 | | - * The end io callbacks free our dip, do the final put on bio |
---|
8604 | | - * and all the cleanup and final put for dio_bio (through |
---|
8605 | | - * dio_end_io()). |
---|
8606 | | - */ |
---|
8607 | | - dip = NULL; |
---|
8608 | | - bio = NULL; |
---|
8609 | | - } else { |
---|
8610 | | - if (write) |
---|
8611 | | - __endio_write_update_ordered(inode, |
---|
8612 | | - file_offset, |
---|
8613 | | - dio_bio->bi_iter.bi_size, |
---|
8614 | | - false); |
---|
8615 | | - else |
---|
8616 | | - unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, |
---|
8617 | | - file_offset + dio_bio->bi_iter.bi_size - 1); |
---|
8618 | | - |
---|
8619 | | - dio_bio->bi_status = BLK_STS_IOERR; |
---|
8620 | | - /* |
---|
8621 | | - * Releases and cleans up our dio_bio, no need to bio_put() |
---|
8622 | | - * nor bio_endio()/bio_io_error() against dio_bio. |
---|
8623 | | - */ |
---|
8624 | | - dio_end_io(dio_bio); |
---|
8625 | | - } |
---|
8626 | | - if (bio) |
---|
8627 | | - bio_put(bio); |
---|
8628 | | - kfree(dip); |
---|
| 7973 | + dip->dio_bio->bi_status = status; |
---|
| 7974 | + btrfs_dio_private_put(dip); |
---|
| 7975 | + return BLK_QC_T_NONE; |
---|
8629 | 7976 | } |
---|
8630 | 7977 | |
---|
8631 | 7978 | static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info, |
---|
.. | .. |
---|
8661 | 8008 | return retval; |
---|
8662 | 8009 | } |
---|
8663 | 8010 | |
---|
8664 | | -static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) |
---|
| 8011 | +static inline int btrfs_maybe_fsync_end_io(struct kiocb *iocb, ssize_t size, |
---|
| 8012 | + int error, unsigned flags) |
---|
| 8013 | +{ |
---|
| 8014 | + /* |
---|
| 8015 | + * Now if we're still in the context of our submitter we know we can't |
---|
| 8016 | + * safely run generic_write_sync(), so clear our flag here so that the |
---|
| 8017 | + * caller knows to follow up with a sync. |
---|
| 8018 | + */ |
---|
| 8019 | + if (current->journal_info == BTRFS_DIO_SYNC_STUB) { |
---|
| 8020 | + current->journal_info = NULL; |
---|
| 8021 | + return error; |
---|
| 8022 | + } |
---|
| 8023 | + |
---|
| 8024 | + if (error) |
---|
| 8025 | + return error; |
---|
| 8026 | + |
---|
| 8027 | + if (size) { |
---|
| 8028 | + iocb->ki_flags |= IOCB_DSYNC; |
---|
| 8029 | + return generic_write_sync(iocb, size); |
---|
| 8030 | + } |
---|
| 8031 | + |
---|
| 8032 | + return 0; |
---|
| 8033 | +} |
---|
| 8034 | + |
---|
| 8035 | +static const struct iomap_ops btrfs_dio_iomap_ops = { |
---|
| 8036 | + .iomap_begin = btrfs_dio_iomap_begin, |
---|
| 8037 | + .iomap_end = btrfs_dio_iomap_end, |
---|
| 8038 | +}; |
---|
| 8039 | + |
---|
| 8040 | +static const struct iomap_dio_ops btrfs_dio_ops = { |
---|
| 8041 | + .submit_io = btrfs_submit_direct, |
---|
| 8042 | +}; |
---|
| 8043 | + |
---|
| 8044 | +static const struct iomap_dio_ops btrfs_sync_dops = { |
---|
| 8045 | + .submit_io = btrfs_submit_direct, |
---|
| 8046 | + .end_io = btrfs_maybe_fsync_end_io, |
---|
| 8047 | +}; |
---|
| 8048 | + |
---|
| 8049 | +ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) |
---|
8665 | 8050 | { |
---|
8666 | 8051 | struct file *file = iocb->ki_filp; |
---|
8667 | 8052 | struct inode *inode = file->f_mapping->host; |
---|
8668 | 8053 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
8669 | | - struct btrfs_dio_data dio_data = { 0 }; |
---|
8670 | 8054 | struct extent_changeset *data_reserved = NULL; |
---|
8671 | 8055 | loff_t offset = iocb->ki_pos; |
---|
8672 | 8056 | size_t count = 0; |
---|
8673 | | - int flags = 0; |
---|
8674 | | - bool wakeup = true; |
---|
8675 | 8057 | bool relock = false; |
---|
8676 | 8058 | ssize_t ret; |
---|
8677 | 8059 | |
---|
8678 | | - if (check_direct_IO(fs_info, iter, offset)) |
---|
| 8060 | + if (check_direct_IO(fs_info, iter, offset)) { |
---|
| 8061 | + ASSERT(current->journal_info == NULL || |
---|
| 8062 | + current->journal_info == BTRFS_DIO_SYNC_STUB); |
---|
| 8063 | + current->journal_info = NULL; |
---|
8679 | 8064 | return 0; |
---|
| 8065 | + } |
---|
8680 | 8066 | |
---|
8681 | | - inode_dio_begin(inode); |
---|
8682 | | - |
---|
8683 | | - /* |
---|
8684 | | - * The generic stuff only does filemap_write_and_wait_range, which |
---|
8685 | | - * isn't enough if we've written compressed pages to this area, so |
---|
8686 | | - * we need to flush the dirty pages again to make absolutely sure |
---|
8687 | | - * that any outstanding dirty pages are on disk. |
---|
8688 | | - */ |
---|
8689 | 8067 | count = iov_iter_count(iter); |
---|
8690 | | - if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, |
---|
8691 | | - &BTRFS_I(inode)->runtime_flags)) |
---|
8692 | | - filemap_fdatawrite_range(inode->i_mapping, offset, |
---|
8693 | | - offset + count - 1); |
---|
8694 | | - |
---|
8695 | 8068 | if (iov_iter_rw(iter) == WRITE) { |
---|
8696 | 8069 | /* |
---|
8697 | 8070 | * If the write DIO is beyond the EOF, we need update |
---|
.. | .. |
---|
8699 | 8072 | * not unlock the i_mutex at this case. |
---|
8700 | 8073 | */ |
---|
8701 | 8074 | if (offset + count <= inode->i_size) { |
---|
8702 | | - dio_data.overwrite = 1; |
---|
8703 | 8075 | inode_unlock(inode); |
---|
8704 | 8076 | relock = true; |
---|
8705 | 8077 | } |
---|
8706 | | - ret = btrfs_delalloc_reserve_space(inode, &data_reserved, |
---|
8707 | | - offset, count); |
---|
8708 | | - if (ret) |
---|
8709 | | - goto out; |
---|
8710 | | - |
---|
8711 | | - /* |
---|
8712 | | - * We need to know how many extents we reserved so that we can |
---|
8713 | | - * do the accounting properly if we go over the number we |
---|
8714 | | - * originally calculated. Abuse current->journal_info for this. |
---|
8715 | | - */ |
---|
8716 | | - dio_data.reserve = round_up(count, |
---|
8717 | | - fs_info->sectorsize); |
---|
8718 | | - dio_data.unsubmitted_oe_range_start = (u64)offset; |
---|
8719 | | - dio_data.unsubmitted_oe_range_end = (u64)offset; |
---|
8720 | | - current->journal_info = &dio_data; |
---|
8721 | 8078 | down_read(&BTRFS_I(inode)->dio_sem); |
---|
8722 | | - } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, |
---|
8723 | | - &BTRFS_I(inode)->runtime_flags)) { |
---|
8724 | | - inode_dio_end(inode); |
---|
8725 | | - flags = DIO_LOCKING | DIO_SKIP_HOLES; |
---|
8726 | | - wakeup = false; |
---|
8727 | 8079 | } |
---|
8728 | 8080 | |
---|
8729 | | - ret = __blockdev_direct_IO(iocb, inode, |
---|
8730 | | - fs_info->fs_devices->latest_bdev, |
---|
8731 | | - iter, btrfs_get_blocks_direct, NULL, |
---|
8732 | | - btrfs_submit_direct, flags); |
---|
8733 | | - if (iov_iter_rw(iter) == WRITE) { |
---|
| 8081 | + /* |
---|
| 8082 | + * We have are actually a sync iocb, so we need our fancy endio to know |
---|
| 8083 | + * if we need to sync. |
---|
| 8084 | + */ |
---|
| 8085 | + if (current->journal_info) |
---|
| 8086 | + ret = iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, |
---|
| 8087 | + &btrfs_sync_dops, is_sync_kiocb(iocb)); |
---|
| 8088 | + else |
---|
| 8089 | + ret = iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, |
---|
| 8090 | + &btrfs_dio_ops, is_sync_kiocb(iocb)); |
---|
| 8091 | + |
---|
| 8092 | + if (ret == -ENOTBLK) |
---|
| 8093 | + ret = 0; |
---|
| 8094 | + |
---|
| 8095 | + if (iov_iter_rw(iter) == WRITE) |
---|
8734 | 8096 | up_read(&BTRFS_I(inode)->dio_sem); |
---|
8735 | | - current->journal_info = NULL; |
---|
8736 | | - if (ret < 0 && ret != -EIOCBQUEUED) { |
---|
8737 | | - if (dio_data.reserve) |
---|
8738 | | - btrfs_delalloc_release_space(inode, data_reserved, |
---|
8739 | | - offset, dio_data.reserve, true); |
---|
8740 | | - /* |
---|
8741 | | - * On error we might have left some ordered extents |
---|
8742 | | - * without submitting corresponding bios for them, so |
---|
8743 | | - * cleanup them up to avoid other tasks getting them |
---|
8744 | | - * and waiting for them to complete forever. |
---|
8745 | | - */ |
---|
8746 | | - if (dio_data.unsubmitted_oe_range_start < |
---|
8747 | | - dio_data.unsubmitted_oe_range_end) |
---|
8748 | | - __endio_write_update_ordered(inode, |
---|
8749 | | - dio_data.unsubmitted_oe_range_start, |
---|
8750 | | - dio_data.unsubmitted_oe_range_end - |
---|
8751 | | - dio_data.unsubmitted_oe_range_start, |
---|
8752 | | - false); |
---|
8753 | | - } else if (ret >= 0 && (size_t)ret < count) |
---|
8754 | | - btrfs_delalloc_release_space(inode, data_reserved, |
---|
8755 | | - offset, count - (size_t)ret, true); |
---|
8756 | | - btrfs_delalloc_release_extents(BTRFS_I(inode), count); |
---|
8757 | | - } |
---|
8758 | | -out: |
---|
8759 | | - if (wakeup) |
---|
8760 | | - inode_dio_end(inode); |
---|
| 8097 | + |
---|
8761 | 8098 | if (relock) |
---|
8762 | 8099 | inode_lock(inode); |
---|
8763 | 8100 | |
---|
.. | .. |
---|
8765 | 8102 | return ret; |
---|
8766 | 8103 | } |
---|
8767 | 8104 | |
---|
8768 | | -#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) |
---|
8769 | | - |
---|
8770 | 8105 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
---|
8771 | | - __u64 start, __u64 len) |
---|
| 8106 | + u64 start, u64 len) |
---|
8772 | 8107 | { |
---|
8773 | 8108 | int ret; |
---|
8774 | 8109 | |
---|
8775 | | - ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS); |
---|
| 8110 | + ret = fiemap_prep(inode, fieinfo, start, &len, 0); |
---|
8776 | 8111 | if (ret) |
---|
8777 | 8112 | return ret; |
---|
8778 | 8113 | |
---|
8779 | | - return extent_fiemap(inode, fieinfo, start, len); |
---|
| 8114 | + return extent_fiemap(BTRFS_I(inode), fieinfo, start, len); |
---|
8780 | 8115 | } |
---|
8781 | 8116 | |
---|
8782 | 8117 | int btrfs_readpage(struct file *file, struct page *page) |
---|
8783 | 8118 | { |
---|
8784 | | - struct extent_io_tree *tree; |
---|
8785 | | - tree = &BTRFS_I(page->mapping->host)->io_tree; |
---|
8786 | | - return extent_read_full_page(tree, page, btrfs_get_extent, 0); |
---|
| 8119 | + struct btrfs_inode *inode = BTRFS_I(page->mapping->host); |
---|
| 8120 | + u64 start = page_offset(page); |
---|
| 8121 | + u64 end = start + PAGE_SIZE - 1; |
---|
| 8122 | + unsigned long bio_flags = 0; |
---|
| 8123 | + struct bio *bio = NULL; |
---|
| 8124 | + int ret; |
---|
| 8125 | + |
---|
| 8126 | + btrfs_lock_and_flush_ordered_range(inode, start, end, NULL); |
---|
| 8127 | + |
---|
| 8128 | + ret = btrfs_do_readpage(page, NULL, &bio, &bio_flags, 0, NULL); |
---|
| 8129 | + if (bio) |
---|
| 8130 | + ret = submit_one_bio(bio, 0, bio_flags); |
---|
| 8131 | + return ret; |
---|
8787 | 8132 | } |
---|
8788 | 8133 | |
---|
8789 | 8134 | static int btrfs_writepage(struct page *page, struct writeback_control *wbc) |
---|
.. | .. |
---|
8817 | 8162 | return extent_writepages(mapping, wbc); |
---|
8818 | 8163 | } |
---|
8819 | 8164 | |
---|
8820 | | -static int |
---|
8821 | | -btrfs_readpages(struct file *file, struct address_space *mapping, |
---|
8822 | | - struct list_head *pages, unsigned nr_pages) |
---|
| 8165 | +static void btrfs_readahead(struct readahead_control *rac) |
---|
8823 | 8166 | { |
---|
8824 | | - return extent_readpages(mapping, pages, nr_pages); |
---|
| 8167 | + extent_readahead(rac); |
---|
8825 | 8168 | } |
---|
8826 | 8169 | |
---|
8827 | 8170 | static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags) |
---|
8828 | 8171 | { |
---|
8829 | 8172 | int ret = try_release_extent_mapping(page, gfp_flags); |
---|
8830 | | - if (ret == 1) { |
---|
8831 | | - ClearPagePrivate(page); |
---|
8832 | | - set_page_private(page, 0); |
---|
8833 | | - put_page(page); |
---|
8834 | | - } |
---|
| 8173 | + if (ret == 1) |
---|
| 8174 | + detach_page_private(page); |
---|
8835 | 8175 | return ret; |
---|
8836 | 8176 | } |
---|
8837 | 8177 | |
---|
.. | .. |
---|
8842 | 8182 | return __btrfs_releasepage(page, gfp_flags); |
---|
8843 | 8183 | } |
---|
8844 | 8184 | |
---|
| 8185 | +#ifdef CONFIG_MIGRATION |
---|
| 8186 | +static int btrfs_migratepage(struct address_space *mapping, |
---|
| 8187 | + struct page *newpage, struct page *page, |
---|
| 8188 | + enum migrate_mode mode) |
---|
| 8189 | +{ |
---|
| 8190 | + int ret; |
---|
| 8191 | + |
---|
| 8192 | + ret = migrate_page_move_mapping(mapping, newpage, page, 0); |
---|
| 8193 | + if (ret != MIGRATEPAGE_SUCCESS) |
---|
| 8194 | + return ret; |
---|
| 8195 | + |
---|
| 8196 | + if (page_has_private(page)) |
---|
| 8197 | + attach_page_private(newpage, detach_page_private(page)); |
---|
| 8198 | + |
---|
| 8199 | + if (PagePrivate2(page)) { |
---|
| 8200 | + ClearPagePrivate2(page); |
---|
| 8201 | + SetPagePrivate2(newpage); |
---|
| 8202 | + } |
---|
| 8203 | + |
---|
| 8204 | + if (mode != MIGRATE_SYNC_NO_COPY) |
---|
| 8205 | + migrate_page_copy(newpage, page); |
---|
| 8206 | + else |
---|
| 8207 | + migrate_page_states(newpage, page); |
---|
| 8208 | + return MIGRATEPAGE_SUCCESS; |
---|
| 8209 | +} |
---|
| 8210 | +#endif |
---|
| 8211 | + |
---|
8845 | 8212 | static void btrfs_invalidatepage(struct page *page, unsigned int offset, |
---|
8846 | 8213 | unsigned int length) |
---|
8847 | 8214 | { |
---|
8848 | | - struct inode *inode = page->mapping->host; |
---|
8849 | | - struct extent_io_tree *tree; |
---|
| 8215 | + struct btrfs_inode *inode = BTRFS_I(page->mapping->host); |
---|
| 8216 | + struct extent_io_tree *tree = &inode->io_tree; |
---|
8850 | 8217 | struct btrfs_ordered_extent *ordered; |
---|
8851 | 8218 | struct extent_state *cached_state = NULL; |
---|
8852 | 8219 | u64 page_start = page_offset(page); |
---|
8853 | 8220 | u64 page_end = page_start + PAGE_SIZE - 1; |
---|
8854 | 8221 | u64 start; |
---|
8855 | 8222 | u64 end; |
---|
8856 | | - int inode_evicting = inode->i_state & I_FREEING; |
---|
| 8223 | + int inode_evicting = inode->vfs_inode.i_state & I_FREEING; |
---|
8857 | 8224 | |
---|
8858 | 8225 | /* |
---|
8859 | 8226 | * we have the page locked, so new writeback can't start, |
---|
.. | .. |
---|
8864 | 8231 | */ |
---|
8865 | 8232 | wait_on_page_writeback(page); |
---|
8866 | 8233 | |
---|
8867 | | - tree = &BTRFS_I(inode)->io_tree; |
---|
8868 | | - if (offset) { |
---|
| 8234 | + /* |
---|
| 8235 | + * For subpage case, we have call sites like |
---|
| 8236 | + * btrfs_punch_hole_lock_range() which passes range not aligned to |
---|
| 8237 | + * sectorsize. |
---|
| 8238 | + * If the range doesn't cover the full page, we don't need to and |
---|
| 8239 | + * shouldn't clear page extent mapped, as page->private can still |
---|
| 8240 | + * record subpage dirty bits for other part of the range. |
---|
| 8241 | + * |
---|
| 8242 | + * For cases that can invalidate the full even the range doesn't |
---|
| 8243 | + * cover the full page, like invalidating the last page, we're |
---|
| 8244 | + * still safe to wait for ordered extent to finish. |
---|
| 8245 | + */ |
---|
| 8246 | + if (!(offset == 0 && length == PAGE_SIZE)) { |
---|
8869 | 8247 | btrfs_releasepage(page, GFP_NOFS); |
---|
8870 | 8248 | return; |
---|
8871 | 8249 | } |
---|
8872 | 8250 | |
---|
8873 | 8251 | if (!inode_evicting) |
---|
8874 | 8252 | lock_extent_bits(tree, page_start, page_end, &cached_state); |
---|
8875 | | -again: |
---|
| 8253 | + |
---|
8876 | 8254 | start = page_start; |
---|
8877 | | - ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start, |
---|
8878 | | - page_end - start + 1); |
---|
| 8255 | +again: |
---|
| 8256 | + ordered = btrfs_lookup_ordered_range(inode, start, page_end - start + 1); |
---|
8879 | 8257 | if (ordered) { |
---|
8880 | | - end = min(page_end, ordered->file_offset + ordered->len - 1); |
---|
| 8258 | + end = min(page_end, |
---|
| 8259 | + ordered->file_offset + ordered->num_bytes - 1); |
---|
8881 | 8260 | /* |
---|
8882 | 8261 | * IO on this page will never be started, so we need |
---|
8883 | 8262 | * to account for any ordered extents now |
---|
8884 | 8263 | */ |
---|
8885 | 8264 | if (!inode_evicting) |
---|
8886 | 8265 | clear_extent_bit(tree, start, end, |
---|
8887 | | - EXTENT_DIRTY | EXTENT_DELALLOC | |
---|
8888 | | - EXTENT_DELALLOC_NEW | |
---|
| 8266 | + EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | |
---|
8889 | 8267 | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | |
---|
8890 | 8268 | EXTENT_DEFRAG, 1, 0, &cached_state); |
---|
8891 | 8269 | /* |
---|
.. | .. |
---|
8896 | 8274 | struct btrfs_ordered_inode_tree *tree; |
---|
8897 | 8275 | u64 new_len; |
---|
8898 | 8276 | |
---|
8899 | | - tree = &BTRFS_I(inode)->ordered_tree; |
---|
| 8277 | + tree = &inode->ordered_tree; |
---|
8900 | 8278 | |
---|
8901 | 8279 | spin_lock_irq(&tree->lock); |
---|
8902 | 8280 | set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags); |
---|
.. | .. |
---|
8937 | 8315 | */ |
---|
8938 | 8316 | btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE); |
---|
8939 | 8317 | if (!inode_evicting) { |
---|
8940 | | - clear_extent_bit(tree, page_start, page_end, |
---|
8941 | | - EXTENT_LOCKED | EXTENT_DIRTY | |
---|
| 8318 | + clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED | |
---|
8942 | 8319 | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | |
---|
8943 | 8320 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1, |
---|
8944 | 8321 | &cached_state); |
---|
.. | .. |
---|
8947 | 8324 | } |
---|
8948 | 8325 | |
---|
8949 | 8326 | ClearPageChecked(page); |
---|
8950 | | - if (PagePrivate(page)) { |
---|
8951 | | - ClearPagePrivate(page); |
---|
8952 | | - set_page_private(page, 0); |
---|
8953 | | - put_page(page); |
---|
8954 | | - } |
---|
| 8327 | + detach_page_private(page); |
---|
8955 | 8328 | } |
---|
8956 | 8329 | |
---|
8957 | 8330 | /* |
---|
.. | .. |
---|
9004 | 8377 | * end up waiting indefinitely to get a lock on the page currently |
---|
9005 | 8378 | * being processed by btrfs_page_mkwrite() function. |
---|
9006 | 8379 | */ |
---|
9007 | | - ret2 = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start, |
---|
9008 | | - reserved_space); |
---|
| 8380 | + ret2 = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved, |
---|
| 8381 | + page_start, reserved_space); |
---|
9009 | 8382 | if (!ret2) { |
---|
9010 | 8383 | ret2 = file_update_time(vmf->vma->vm_file); |
---|
9011 | 8384 | reserved = 1; |
---|
.. | .. |
---|
9042 | 8415 | unlock_extent_cached(io_tree, page_start, page_end, |
---|
9043 | 8416 | &cached_state); |
---|
9044 | 8417 | unlock_page(page); |
---|
9045 | | - btrfs_start_ordered_extent(inode, ordered, 1); |
---|
| 8418 | + btrfs_start_ordered_extent(ordered, 1); |
---|
9046 | 8419 | btrfs_put_ordered_extent(ordered); |
---|
9047 | 8420 | goto again; |
---|
9048 | 8421 | } |
---|
.. | .. |
---|
9052 | 8425 | fs_info->sectorsize); |
---|
9053 | 8426 | if (reserved_space < PAGE_SIZE) { |
---|
9054 | 8427 | end = page_start + reserved_space - 1; |
---|
9055 | | - btrfs_delalloc_release_space(inode, data_reserved, |
---|
9056 | | - page_start, PAGE_SIZE - reserved_space, |
---|
9057 | | - true); |
---|
| 8428 | + btrfs_delalloc_release_space(BTRFS_I(inode), |
---|
| 8429 | + data_reserved, page_start, |
---|
| 8430 | + PAGE_SIZE - reserved_space, true); |
---|
9058 | 8431 | } |
---|
9059 | 8432 | } |
---|
9060 | 8433 | |
---|
.. | .. |
---|
9066 | 8439 | * reserve data&meta space before lock_page() (see above comments). |
---|
9067 | 8440 | */ |
---|
9068 | 8441 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end, |
---|
9069 | | - EXTENT_DIRTY | EXTENT_DELALLOC | |
---|
9070 | | - EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, |
---|
9071 | | - 0, 0, &cached_state); |
---|
| 8442 | + EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | |
---|
| 8443 | + EXTENT_DEFRAG, 0, 0, &cached_state); |
---|
9072 | 8444 | |
---|
9073 | | - ret2 = btrfs_set_extent_delalloc(inode, page_start, end, 0, |
---|
9074 | | - &cached_state, 0); |
---|
| 8445 | + ret2 = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0, |
---|
| 8446 | + &cached_state); |
---|
9075 | 8447 | if (ret2) { |
---|
9076 | 8448 | unlock_extent_cached(io_tree, page_start, page_end, |
---|
9077 | 8449 | &cached_state); |
---|
9078 | 8450 | ret = VM_FAULT_SIGBUS; |
---|
9079 | 8451 | goto out_unlock; |
---|
9080 | 8452 | } |
---|
9081 | | - ret2 = 0; |
---|
9082 | 8453 | |
---|
9083 | 8454 | /* page is wholly or partially inside EOF */ |
---|
9084 | 8455 | if (page_start + PAGE_SIZE > size) |
---|
9085 | | - zero_start = size & ~PAGE_MASK; |
---|
| 8456 | + zero_start = offset_in_page(size); |
---|
9086 | 8457 | else |
---|
9087 | 8458 | zero_start = PAGE_SIZE; |
---|
9088 | 8459 | |
---|
.. | .. |
---|
9096 | 8467 | set_page_dirty(page); |
---|
9097 | 8468 | SetPageUptodate(page); |
---|
9098 | 8469 | |
---|
9099 | | - BTRFS_I(inode)->last_trans = fs_info->generation; |
---|
9100 | | - BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; |
---|
9101 | | - BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; |
---|
| 8470 | + btrfs_set_inode_last_sub_trans(BTRFS_I(inode)); |
---|
9102 | 8471 | |
---|
9103 | 8472 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state); |
---|
9104 | 8473 | |
---|
9105 | | - if (!ret2) { |
---|
9106 | | - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); |
---|
9107 | | - sb_end_pagefault(inode->i_sb); |
---|
9108 | | - extent_changeset_free(data_reserved); |
---|
9109 | | - return VM_FAULT_LOCKED; |
---|
9110 | | - } |
---|
| 8474 | + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); |
---|
| 8475 | + sb_end_pagefault(inode->i_sb); |
---|
| 8476 | + extent_changeset_free(data_reserved); |
---|
| 8477 | + return VM_FAULT_LOCKED; |
---|
9111 | 8478 | |
---|
9112 | 8479 | out_unlock: |
---|
9113 | 8480 | unlock_page(page); |
---|
9114 | 8481 | out: |
---|
9115 | 8482 | btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); |
---|
9116 | | - btrfs_delalloc_release_space(inode, data_reserved, page_start, |
---|
| 8483 | + btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start, |
---|
9117 | 8484 | reserved_space, (ret != 0)); |
---|
9118 | 8485 | out_noreserve: |
---|
9119 | 8486 | sb_end_pagefault(inode->i_sb); |
---|
.. | .. |
---|
9129 | 8496 | int ret; |
---|
9130 | 8497 | struct btrfs_trans_handle *trans; |
---|
9131 | 8498 | u64 mask = fs_info->sectorsize - 1; |
---|
9132 | | - u64 min_size = btrfs_calc_trunc_metadata_size(fs_info, 1); |
---|
| 8499 | + u64 min_size = btrfs_calc_metadata_size(fs_info, 1); |
---|
9133 | 8500 | |
---|
9134 | 8501 | if (!skip_writeback) { |
---|
9135 | 8502 | ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask), |
---|
.. | .. |
---|
9184 | 8551 | |
---|
9185 | 8552 | /* Migrate the slack space for the truncate to our reserve */ |
---|
9186 | 8553 | ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv, |
---|
9187 | | - min_size, 0); |
---|
| 8554 | + min_size, false); |
---|
9188 | 8555 | BUG_ON(ret); |
---|
9189 | 8556 | |
---|
9190 | 8557 | /* |
---|
.. | .. |
---|
9219 | 8586 | break; |
---|
9220 | 8587 | } |
---|
9221 | 8588 | |
---|
9222 | | - btrfs_block_rsv_release(fs_info, rsv, -1); |
---|
| 8589 | + btrfs_block_rsv_release(fs_info, rsv, -1, NULL); |
---|
9223 | 8590 | ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, |
---|
9224 | | - rsv, min_size, 0); |
---|
| 8591 | + rsv, min_size, false); |
---|
9225 | 8592 | BUG_ON(ret); /* shouldn't happen */ |
---|
9226 | 8593 | trans->block_rsv = rsv; |
---|
9227 | 8594 | } |
---|
.. | .. |
---|
9244 | 8611 | ret = PTR_ERR(trans); |
---|
9245 | 8612 | goto out; |
---|
9246 | 8613 | } |
---|
9247 | | - btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
---|
| 8614 | + btrfs_inode_safe_disk_i_size_write(inode, 0); |
---|
9248 | 8615 | } |
---|
9249 | 8616 | |
---|
9250 | 8617 | if (trans) { |
---|
.. | .. |
---|
9327 | 8694 | ei->index_cnt = (u64)-1; |
---|
9328 | 8695 | ei->dir_index = 0; |
---|
9329 | 8696 | ei->last_unlink_trans = 0; |
---|
9330 | | - ei->last_link_trans = 0; |
---|
| 8697 | + ei->last_reflink_trans = 0; |
---|
9331 | 8698 | ei->last_log_commit = 0; |
---|
9332 | 8699 | |
---|
9333 | 8700 | spin_lock_init(&ei->lock); |
---|
.. | .. |
---|
9346 | 8713 | |
---|
9347 | 8714 | inode = &ei->vfs_inode; |
---|
9348 | 8715 | extent_map_tree_init(&ei->extent_tree); |
---|
9349 | | - extent_io_tree_init(&ei->io_tree, inode); |
---|
9350 | | - extent_io_tree_init(&ei->io_failure_tree, inode); |
---|
9351 | | - ei->io_tree.track_uptodate = 1; |
---|
9352 | | - ei->io_failure_tree.track_uptodate = 1; |
---|
| 8716 | + extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode); |
---|
| 8717 | + extent_io_tree_init(fs_info, &ei->io_failure_tree, |
---|
| 8718 | + IO_TREE_INODE_IO_FAILURE, inode); |
---|
| 8719 | + extent_io_tree_init(fs_info, &ei->file_extent_tree, |
---|
| 8720 | + IO_TREE_INODE_FILE_EXTENT, inode); |
---|
| 8721 | + ei->io_tree.track_uptodate = true; |
---|
| 8722 | + ei->io_failure_tree.track_uptodate = true; |
---|
9353 | 8723 | atomic_set(&ei->sync_writers, 0); |
---|
9354 | 8724 | mutex_init(&ei->log_mutex); |
---|
9355 | | - mutex_init(&ei->delalloc_mutex); |
---|
9356 | 8725 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
---|
9357 | 8726 | INIT_LIST_HEAD(&ei->delalloc_inodes); |
---|
9358 | 8727 | INIT_LIST_HEAD(&ei->delayed_iput); |
---|
.. | .. |
---|
9370 | 8739 | } |
---|
9371 | 8740 | #endif |
---|
9372 | 8741 | |
---|
9373 | | -static void btrfs_i_callback(struct rcu_head *head) |
---|
| 8742 | +void btrfs_free_inode(struct inode *inode) |
---|
9374 | 8743 | { |
---|
9375 | | - struct inode *inode = container_of(head, struct inode, i_rcu); |
---|
9376 | 8744 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); |
---|
9377 | 8745 | } |
---|
9378 | 8746 | |
---|
9379 | | -void btrfs_destroy_inode(struct inode *inode) |
---|
| 8747 | +void btrfs_destroy_inode(struct inode *vfs_inode) |
---|
9380 | 8748 | { |
---|
9381 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
9382 | 8749 | struct btrfs_ordered_extent *ordered; |
---|
9383 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
| 8750 | + struct btrfs_inode *inode = BTRFS_I(vfs_inode); |
---|
| 8751 | + struct btrfs_root *root = inode->root; |
---|
9384 | 8752 | |
---|
9385 | | - WARN_ON(!hlist_empty(&inode->i_dentry)); |
---|
9386 | | - WARN_ON(inode->i_data.nrpages); |
---|
9387 | | - WARN_ON(BTRFS_I(inode)->block_rsv.reserved); |
---|
9388 | | - WARN_ON(BTRFS_I(inode)->block_rsv.size); |
---|
9389 | | - WARN_ON(BTRFS_I(inode)->outstanding_extents); |
---|
9390 | | - WARN_ON(BTRFS_I(inode)->delalloc_bytes); |
---|
9391 | | - WARN_ON(BTRFS_I(inode)->new_delalloc_bytes); |
---|
9392 | | - WARN_ON(BTRFS_I(inode)->csum_bytes); |
---|
9393 | | - WARN_ON(BTRFS_I(inode)->defrag_bytes); |
---|
| 8753 | + WARN_ON(!hlist_empty(&vfs_inode->i_dentry)); |
---|
| 8754 | + WARN_ON(vfs_inode->i_data.nrpages); |
---|
| 8755 | + WARN_ON(inode->block_rsv.reserved); |
---|
| 8756 | + WARN_ON(inode->block_rsv.size); |
---|
| 8757 | + WARN_ON(inode->outstanding_extents); |
---|
| 8758 | + WARN_ON(inode->delalloc_bytes); |
---|
| 8759 | + WARN_ON(inode->new_delalloc_bytes); |
---|
| 8760 | + WARN_ON(inode->csum_bytes); |
---|
| 8761 | + WARN_ON(inode->defrag_bytes); |
---|
9394 | 8762 | |
---|
9395 | 8763 | /* |
---|
9396 | 8764 | * This can happen where we create an inode, but somebody else also |
---|
.. | .. |
---|
9398 | 8766 | * created. |
---|
9399 | 8767 | */ |
---|
9400 | 8768 | if (!root) |
---|
9401 | | - goto free; |
---|
| 8769 | + return; |
---|
9402 | 8770 | |
---|
9403 | 8771 | while (1) { |
---|
9404 | 8772 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); |
---|
9405 | 8773 | if (!ordered) |
---|
9406 | 8774 | break; |
---|
9407 | 8775 | else { |
---|
9408 | | - btrfs_err(fs_info, |
---|
| 8776 | + btrfs_err(root->fs_info, |
---|
9409 | 8777 | "found ordered extent %llu %llu on inode cleanup", |
---|
9410 | | - ordered->file_offset, ordered->len); |
---|
| 8778 | + ordered->file_offset, ordered->num_bytes); |
---|
9411 | 8779 | btrfs_remove_ordered_extent(inode, ordered); |
---|
9412 | 8780 | btrfs_put_ordered_extent(ordered); |
---|
9413 | 8781 | btrfs_put_ordered_extent(ordered); |
---|
.. | .. |
---|
9415 | 8783 | } |
---|
9416 | 8784 | btrfs_qgroup_check_reserved_leak(inode); |
---|
9417 | 8785 | inode_tree_del(inode); |
---|
9418 | | - btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0); |
---|
9419 | | -free: |
---|
9420 | | - call_rcu(&inode->i_rcu, btrfs_i_callback); |
---|
| 8786 | + btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); |
---|
| 8787 | + btrfs_inode_clear_file_extent_range(inode, 0, (u64)-1); |
---|
| 8788 | + btrfs_put_root(inode->root); |
---|
9421 | 8789 | } |
---|
9422 | 8790 | |
---|
9423 | 8791 | int btrfs_drop_inode(struct inode *inode) |
---|
.. | .. |
---|
9542 | 8910 | struct inode *new_inode = new_dentry->d_inode; |
---|
9543 | 8911 | struct inode *old_inode = old_dentry->d_inode; |
---|
9544 | 8912 | struct timespec64 ctime = current_time(old_inode); |
---|
9545 | | - struct dentry *parent; |
---|
9546 | 8913 | u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); |
---|
9547 | 8914 | u64 new_ino = btrfs_ino(BTRFS_I(new_inode)); |
---|
9548 | 8915 | u64 old_idx = 0; |
---|
9549 | 8916 | u64 new_idx = 0; |
---|
9550 | 8917 | int ret; |
---|
| 8918 | + int ret2; |
---|
9551 | 8919 | bool root_log_pinned = false; |
---|
9552 | 8920 | bool dest_log_pinned = false; |
---|
9553 | | - struct btrfs_log_ctx ctx_root; |
---|
9554 | | - struct btrfs_log_ctx ctx_dest; |
---|
9555 | | - bool sync_log_root = false; |
---|
9556 | | - bool sync_log_dest = false; |
---|
9557 | | - bool commit_transaction = false; |
---|
| 8921 | + bool need_abort = false; |
---|
9558 | 8922 | |
---|
9559 | 8923 | /* |
---|
9560 | 8924 | * For non-subvolumes allow exchange only within one subvolume, in the |
---|
.. | .. |
---|
9565 | 8929 | (old_ino != BTRFS_FIRST_FREE_OBJECTID || |
---|
9566 | 8930 | new_ino != BTRFS_FIRST_FREE_OBJECTID)) |
---|
9567 | 8931 | return -EXDEV; |
---|
9568 | | - |
---|
9569 | | - btrfs_init_log_ctx(&ctx_root, old_inode); |
---|
9570 | | - btrfs_init_log_ctx(&ctx_dest, new_inode); |
---|
9571 | 8932 | |
---|
9572 | 8933 | /* close the race window with snapshot create/destroy ioctl */ |
---|
9573 | 8934 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID || |
---|
.. | .. |
---|
9608 | 8969 | /* Reference for the source. */ |
---|
9609 | 8970 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) { |
---|
9610 | 8971 | /* force full log commit if subvolume involved. */ |
---|
9611 | | - btrfs_set_log_full_commit(fs_info, trans); |
---|
| 8972 | + btrfs_set_log_full_commit(trans); |
---|
9612 | 8973 | } else { |
---|
9613 | 8974 | btrfs_pin_log_trans(root); |
---|
9614 | 8975 | root_log_pinned = true; |
---|
.. | .. |
---|
9620 | 8981 | old_idx); |
---|
9621 | 8982 | if (ret) |
---|
9622 | 8983 | goto out_fail; |
---|
| 8984 | + need_abort = true; |
---|
9623 | 8985 | } |
---|
9624 | 8986 | |
---|
9625 | 8987 | /* And now for the dest. */ |
---|
9626 | 8988 | if (new_ino == BTRFS_FIRST_FREE_OBJECTID) { |
---|
9627 | 8989 | /* force full log commit if subvolume involved. */ |
---|
9628 | | - btrfs_set_log_full_commit(fs_info, trans); |
---|
| 8990 | + btrfs_set_log_full_commit(trans); |
---|
9629 | 8991 | } else { |
---|
9630 | 8992 | btrfs_pin_log_trans(dest); |
---|
9631 | 8993 | dest_log_pinned = true; |
---|
.. | .. |
---|
9635 | 8997 | new_ino, |
---|
9636 | 8998 | btrfs_ino(BTRFS_I(old_dir)), |
---|
9637 | 8999 | new_idx); |
---|
9638 | | - if (ret) |
---|
| 9000 | + if (ret) { |
---|
| 9001 | + if (need_abort) |
---|
| 9002 | + btrfs_abort_transaction(trans, ret); |
---|
9639 | 9003 | goto out_fail; |
---|
| 9004 | + } |
---|
9640 | 9005 | } |
---|
9641 | 9006 | |
---|
9642 | 9007 | /* Update inode version and ctime/mtime. */ |
---|
.. | .. |
---|
9710 | 9075 | BTRFS_I(new_inode)->dir_index = new_idx; |
---|
9711 | 9076 | |
---|
9712 | 9077 | if (root_log_pinned) { |
---|
9713 | | - parent = new_dentry->d_parent; |
---|
9714 | | - ret = btrfs_log_new_name(trans, BTRFS_I(old_inode), |
---|
9715 | | - BTRFS_I(old_dir), parent, |
---|
9716 | | - false, &ctx_root); |
---|
9717 | | - if (ret == BTRFS_NEED_LOG_SYNC) |
---|
9718 | | - sync_log_root = true; |
---|
9719 | | - else if (ret == BTRFS_NEED_TRANS_COMMIT) |
---|
9720 | | - commit_transaction = true; |
---|
9721 | | - ret = 0; |
---|
| 9078 | + btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir), |
---|
| 9079 | + new_dentry->d_parent); |
---|
9722 | 9080 | btrfs_end_log_trans(root); |
---|
9723 | 9081 | root_log_pinned = false; |
---|
9724 | 9082 | } |
---|
9725 | 9083 | if (dest_log_pinned) { |
---|
9726 | | - if (!commit_transaction) { |
---|
9727 | | - parent = old_dentry->d_parent; |
---|
9728 | | - ret = btrfs_log_new_name(trans, BTRFS_I(new_inode), |
---|
9729 | | - BTRFS_I(new_dir), parent, |
---|
9730 | | - false, &ctx_dest); |
---|
9731 | | - if (ret == BTRFS_NEED_LOG_SYNC) |
---|
9732 | | - sync_log_dest = true; |
---|
9733 | | - else if (ret == BTRFS_NEED_TRANS_COMMIT) |
---|
9734 | | - commit_transaction = true; |
---|
9735 | | - ret = 0; |
---|
9736 | | - } |
---|
| 9084 | + btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir), |
---|
| 9085 | + old_dentry->d_parent); |
---|
9737 | 9086 | btrfs_end_log_trans(dest); |
---|
9738 | 9087 | dest_log_pinned = false; |
---|
9739 | 9088 | } |
---|
.. | .. |
---|
9755 | 9104 | btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) || |
---|
9756 | 9105 | (new_inode && |
---|
9757 | 9106 | btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation))) |
---|
9758 | | - btrfs_set_log_full_commit(fs_info, trans); |
---|
| 9107 | + btrfs_set_log_full_commit(trans); |
---|
9759 | 9108 | |
---|
9760 | 9109 | if (root_log_pinned) { |
---|
9761 | 9110 | btrfs_end_log_trans(root); |
---|
.. | .. |
---|
9766 | 9115 | dest_log_pinned = false; |
---|
9767 | 9116 | } |
---|
9768 | 9117 | } |
---|
9769 | | - if (!ret && sync_log_root && !commit_transaction) { |
---|
9770 | | - ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, |
---|
9771 | | - &ctx_root); |
---|
9772 | | - if (ret) |
---|
9773 | | - commit_transaction = true; |
---|
9774 | | - } |
---|
9775 | | - if (!ret && sync_log_dest && !commit_transaction) { |
---|
9776 | | - ret = btrfs_sync_log(trans, BTRFS_I(new_inode)->root, |
---|
9777 | | - &ctx_dest); |
---|
9778 | | - if (ret) |
---|
9779 | | - commit_transaction = true; |
---|
9780 | | - } |
---|
9781 | | - if (commit_transaction) { |
---|
9782 | | - /* |
---|
9783 | | - * We may have set commit_transaction when logging the new name |
---|
9784 | | - * in the destination root, in which case we left the source |
---|
9785 | | - * root context in the list of log contextes. So make sure we |
---|
9786 | | - * remove it to avoid invalid memory accesses, since the context |
---|
9787 | | - * was allocated in our stack frame. |
---|
9788 | | - */ |
---|
9789 | | - if (sync_log_root) { |
---|
9790 | | - mutex_lock(&root->log_mutex); |
---|
9791 | | - list_del_init(&ctx_root.list); |
---|
9792 | | - mutex_unlock(&root->log_mutex); |
---|
9793 | | - } |
---|
9794 | | - ret = btrfs_commit_transaction(trans); |
---|
9795 | | - } else { |
---|
9796 | | - int ret2; |
---|
9797 | | - |
---|
9798 | | - ret2 = btrfs_end_transaction(trans); |
---|
9799 | | - ret = ret ? ret : ret2; |
---|
9800 | | - } |
---|
| 9118 | + ret2 = btrfs_end_transaction(trans); |
---|
| 9119 | + ret = ret ? ret : ret2; |
---|
9801 | 9120 | out_notrans: |
---|
9802 | 9121 | if (new_ino == BTRFS_FIRST_FREE_OBJECTID || |
---|
9803 | 9122 | old_ino == BTRFS_FIRST_FREE_OBJECTID) |
---|
9804 | 9123 | up_read(&fs_info->subvol_sem); |
---|
9805 | | - |
---|
9806 | | - ASSERT(list_empty(&ctx_root.list)); |
---|
9807 | | - ASSERT(list_empty(&ctx_dest.list)); |
---|
9808 | 9124 | |
---|
9809 | 9125 | return ret; |
---|
9810 | 9126 | } |
---|
.. | .. |
---|
9819 | 9135 | u64 objectid; |
---|
9820 | 9136 | u64 index; |
---|
9821 | 9137 | |
---|
9822 | | - ret = btrfs_find_free_ino(root, &objectid); |
---|
| 9138 | + ret = btrfs_find_free_objectid(root, &objectid); |
---|
9823 | 9139 | if (ret) |
---|
9824 | 9140 | return ret; |
---|
9825 | 9141 | |
---|
.. | .. |
---|
9873 | 9189 | struct inode *old_inode = d_inode(old_dentry); |
---|
9874 | 9190 | u64 index = 0; |
---|
9875 | 9191 | int ret; |
---|
| 9192 | + int ret2; |
---|
9876 | 9193 | u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); |
---|
9877 | 9194 | bool log_pinned = false; |
---|
9878 | | - struct btrfs_log_ctx ctx; |
---|
9879 | | - bool sync_log = false; |
---|
9880 | | - bool commit_transaction = false; |
---|
9881 | 9195 | |
---|
9882 | 9196 | if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) |
---|
9883 | 9197 | return -EPERM; |
---|
.. | .. |
---|
9954 | 9268 | BTRFS_I(old_inode)->dir_index = 0ULL; |
---|
9955 | 9269 | if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { |
---|
9956 | 9270 | /* force full log commit if subvolume involved. */ |
---|
9957 | | - btrfs_set_log_full_commit(fs_info, trans); |
---|
| 9271 | + btrfs_set_log_full_commit(trans); |
---|
9958 | 9272 | } else { |
---|
9959 | 9273 | btrfs_pin_log_trans(root); |
---|
9960 | 9274 | log_pinned = true; |
---|
.. | .. |
---|
10027 | 9341 | BTRFS_I(old_inode)->dir_index = index; |
---|
10028 | 9342 | |
---|
10029 | 9343 | if (log_pinned) { |
---|
10030 | | - struct dentry *parent = new_dentry->d_parent; |
---|
10031 | | - |
---|
10032 | | - btrfs_init_log_ctx(&ctx, old_inode); |
---|
10033 | | - ret = btrfs_log_new_name(trans, BTRFS_I(old_inode), |
---|
10034 | | - BTRFS_I(old_dir), parent, |
---|
10035 | | - false, &ctx); |
---|
10036 | | - if (ret == BTRFS_NEED_LOG_SYNC) |
---|
10037 | | - sync_log = true; |
---|
10038 | | - else if (ret == BTRFS_NEED_TRANS_COMMIT) |
---|
10039 | | - commit_transaction = true; |
---|
10040 | | - ret = 0; |
---|
| 9344 | + btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir), |
---|
| 9345 | + new_dentry->d_parent); |
---|
10041 | 9346 | btrfs_end_log_trans(root); |
---|
10042 | 9347 | log_pinned = false; |
---|
10043 | 9348 | } |
---|
.. | .. |
---|
10069 | 9374 | btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) || |
---|
10070 | 9375 | (new_inode && |
---|
10071 | 9376 | btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation))) |
---|
10072 | | - btrfs_set_log_full_commit(fs_info, trans); |
---|
| 9377 | + btrfs_set_log_full_commit(trans); |
---|
10073 | 9378 | |
---|
10074 | 9379 | btrfs_end_log_trans(root); |
---|
10075 | 9380 | log_pinned = false; |
---|
10076 | 9381 | } |
---|
10077 | | - if (!ret && sync_log) { |
---|
10078 | | - ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx); |
---|
10079 | | - if (ret) |
---|
10080 | | - commit_transaction = true; |
---|
10081 | | - } else if (sync_log) { |
---|
10082 | | - mutex_lock(&root->log_mutex); |
---|
10083 | | - list_del(&ctx.list); |
---|
10084 | | - mutex_unlock(&root->log_mutex); |
---|
10085 | | - } |
---|
10086 | | - if (commit_transaction) { |
---|
10087 | | - ret = btrfs_commit_transaction(trans); |
---|
10088 | | - } else { |
---|
10089 | | - int ret2; |
---|
10090 | | - |
---|
10091 | | - ret2 = btrfs_end_transaction(trans); |
---|
10092 | | - ret = ret ? ret : ret2; |
---|
10093 | | - } |
---|
| 9382 | + ret2 = btrfs_end_transaction(trans); |
---|
| 9383 | + ret = ret ? ret : ret2; |
---|
10094 | 9384 | out_notrans: |
---|
10095 | 9385 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) |
---|
10096 | 9386 | up_read(&fs_info->subvol_sem); |
---|
.. | .. |
---|
10147 | 9437 | init_completion(&work->completion); |
---|
10148 | 9438 | INIT_LIST_HEAD(&work->list); |
---|
10149 | 9439 | work->inode = inode; |
---|
10150 | | - WARN_ON_ONCE(!inode); |
---|
10151 | | - btrfs_init_work(&work->work, btrfs_flush_delalloc_helper, |
---|
10152 | | - btrfs_run_delalloc_work, NULL, NULL); |
---|
| 9440 | + btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); |
---|
10153 | 9441 | |
---|
10154 | 9442 | return work; |
---|
10155 | 9443 | } |
---|
.. | .. |
---|
10158 | 9446 | * some fairly slow code that needs optimization. This walks the list |
---|
10159 | 9447 | * of all the inodes with pending delalloc and forces them to disk. |
---|
10160 | 9448 | */ |
---|
10161 | | -static int start_delalloc_inodes(struct btrfs_root *root, int nr, bool snapshot) |
---|
| 9449 | +static int start_delalloc_inodes(struct btrfs_root *root, |
---|
| 9450 | + struct writeback_control *wbc, bool snapshot, |
---|
| 9451 | + bool in_reclaim_context) |
---|
10162 | 9452 | { |
---|
10163 | 9453 | struct btrfs_inode *binode; |
---|
10164 | 9454 | struct inode *inode; |
---|
.. | .. |
---|
10166 | 9456 | struct list_head works; |
---|
10167 | 9457 | struct list_head splice; |
---|
10168 | 9458 | int ret = 0; |
---|
| 9459 | + bool full_flush = wbc->nr_to_write == LONG_MAX; |
---|
10169 | 9460 | |
---|
10170 | 9461 | INIT_LIST_HEAD(&works); |
---|
10171 | 9462 | INIT_LIST_HEAD(&splice); |
---|
.. | .. |
---|
10179 | 9470 | |
---|
10180 | 9471 | list_move_tail(&binode->delalloc_inodes, |
---|
10181 | 9472 | &root->delalloc_inodes); |
---|
| 9473 | + |
---|
| 9474 | + if (in_reclaim_context && |
---|
| 9475 | + test_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &binode->runtime_flags)) |
---|
| 9476 | + continue; |
---|
| 9477 | + |
---|
10182 | 9478 | inode = igrab(&binode->vfs_inode); |
---|
10183 | 9479 | if (!inode) { |
---|
10184 | 9480 | cond_resched_lock(&root->delalloc_lock); |
---|
.. | .. |
---|
10189 | 9485 | if (snapshot) |
---|
10190 | 9486 | set_bit(BTRFS_INODE_SNAPSHOT_FLUSH, |
---|
10191 | 9487 | &binode->runtime_flags); |
---|
10192 | | - work = btrfs_alloc_delalloc_work(inode); |
---|
10193 | | - if (!work) { |
---|
10194 | | - iput(inode); |
---|
10195 | | - ret = -ENOMEM; |
---|
10196 | | - goto out; |
---|
| 9488 | + if (full_flush) { |
---|
| 9489 | + work = btrfs_alloc_delalloc_work(inode); |
---|
| 9490 | + if (!work) { |
---|
| 9491 | + iput(inode); |
---|
| 9492 | + ret = -ENOMEM; |
---|
| 9493 | + goto out; |
---|
| 9494 | + } |
---|
| 9495 | + list_add_tail(&work->list, &works); |
---|
| 9496 | + btrfs_queue_work(root->fs_info->flush_workers, |
---|
| 9497 | + &work->work); |
---|
| 9498 | + } else { |
---|
| 9499 | + ret = sync_inode(inode, wbc); |
---|
| 9500 | + if (!ret && |
---|
| 9501 | + test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, |
---|
| 9502 | + &BTRFS_I(inode)->runtime_flags)) |
---|
| 9503 | + ret = sync_inode(inode, wbc); |
---|
| 9504 | + btrfs_add_delayed_iput(inode); |
---|
| 9505 | + if (ret || wbc->nr_to_write <= 0) |
---|
| 9506 | + goto out; |
---|
10197 | 9507 | } |
---|
10198 | | - list_add_tail(&work->list, &works); |
---|
10199 | | - btrfs_queue_work(root->fs_info->flush_workers, |
---|
10200 | | - &work->work); |
---|
10201 | | - ret++; |
---|
10202 | | - if (nr != -1 && ret >= nr) |
---|
10203 | | - goto out; |
---|
10204 | 9508 | cond_resched(); |
---|
10205 | 9509 | spin_lock(&root->delalloc_lock); |
---|
10206 | 9510 | } |
---|
.. | .. |
---|
10224 | 9528 | |
---|
10225 | 9529 | int btrfs_start_delalloc_snapshot(struct btrfs_root *root) |
---|
10226 | 9530 | { |
---|
| 9531 | + struct writeback_control wbc = { |
---|
| 9532 | + .nr_to_write = LONG_MAX, |
---|
| 9533 | + .sync_mode = WB_SYNC_NONE, |
---|
| 9534 | + .range_start = 0, |
---|
| 9535 | + .range_end = LLONG_MAX, |
---|
| 9536 | + }; |
---|
10227 | 9537 | struct btrfs_fs_info *fs_info = root->fs_info; |
---|
10228 | | - int ret; |
---|
10229 | 9538 | |
---|
10230 | 9539 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) |
---|
10231 | 9540 | return -EROFS; |
---|
10232 | 9541 | |
---|
10233 | | - ret = start_delalloc_inodes(root, -1, true); |
---|
10234 | | - if (ret > 0) |
---|
10235 | | - ret = 0; |
---|
10236 | | - return ret; |
---|
| 9542 | + return start_delalloc_inodes(root, &wbc, true, false); |
---|
10237 | 9543 | } |
---|
10238 | 9544 | |
---|
10239 | | -int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int nr) |
---|
| 9545 | +int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, |
---|
| 9546 | + bool in_reclaim_context) |
---|
10240 | 9547 | { |
---|
| 9548 | + struct writeback_control wbc = { |
---|
| 9549 | + .nr_to_write = (nr == U64_MAX) ? LONG_MAX : (unsigned long)nr, |
---|
| 9550 | + .sync_mode = WB_SYNC_NONE, |
---|
| 9551 | + .range_start = 0, |
---|
| 9552 | + .range_end = LLONG_MAX, |
---|
| 9553 | + }; |
---|
10241 | 9554 | struct btrfs_root *root; |
---|
10242 | 9555 | struct list_head splice; |
---|
10243 | 9556 | int ret; |
---|
.. | .. |
---|
10251 | 9564 | spin_lock(&fs_info->delalloc_root_lock); |
---|
10252 | 9565 | list_splice_init(&fs_info->delalloc_roots, &splice); |
---|
10253 | 9566 | while (!list_empty(&splice) && nr) { |
---|
| 9567 | + /* |
---|
| 9568 | + * Reset nr_to_write here so we know that we're doing a full |
---|
| 9569 | + * flush. |
---|
| 9570 | + */ |
---|
| 9571 | + if (nr == U64_MAX) |
---|
| 9572 | + wbc.nr_to_write = LONG_MAX; |
---|
| 9573 | + |
---|
10254 | 9574 | root = list_first_entry(&splice, struct btrfs_root, |
---|
10255 | 9575 | delalloc_root); |
---|
10256 | | - root = btrfs_grab_fs_root(root); |
---|
| 9576 | + root = btrfs_grab_root(root); |
---|
10257 | 9577 | BUG_ON(!root); |
---|
10258 | 9578 | list_move_tail(&root->delalloc_root, |
---|
10259 | 9579 | &fs_info->delalloc_roots); |
---|
10260 | 9580 | spin_unlock(&fs_info->delalloc_root_lock); |
---|
10261 | 9581 | |
---|
10262 | | - ret = start_delalloc_inodes(root, nr, false); |
---|
10263 | | - btrfs_put_fs_root(root); |
---|
10264 | | - if (ret < 0) |
---|
| 9582 | + ret = start_delalloc_inodes(root, &wbc, false, in_reclaim_context); |
---|
| 9583 | + btrfs_put_root(root); |
---|
| 9584 | + if (ret < 0 || wbc.nr_to_write <= 0) |
---|
10265 | 9585 | goto out; |
---|
10266 | | - |
---|
10267 | | - if (nr != -1) { |
---|
10268 | | - nr -= ret; |
---|
10269 | | - WARN_ON(nr < 0); |
---|
10270 | | - } |
---|
10271 | 9586 | spin_lock(&fs_info->delalloc_root_lock); |
---|
10272 | 9587 | } |
---|
10273 | 9588 | spin_unlock(&fs_info->delalloc_root_lock); |
---|
.. | .. |
---|
10316 | 9631 | if (IS_ERR(trans)) |
---|
10317 | 9632 | return PTR_ERR(trans); |
---|
10318 | 9633 | |
---|
10319 | | - err = btrfs_find_free_ino(root, &objectid); |
---|
| 9634 | + err = btrfs_find_free_objectid(root, &objectid); |
---|
10320 | 9635 | if (err) |
---|
10321 | 9636 | goto out_unlock; |
---|
10322 | 9637 | |
---|
.. | .. |
---|
10338 | 9653 | inode->i_fop = &btrfs_file_operations; |
---|
10339 | 9654 | inode->i_op = &btrfs_file_inode_operations; |
---|
10340 | 9655 | inode->i_mapping->a_ops = &btrfs_aops; |
---|
10341 | | - BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
---|
10342 | 9656 | |
---|
10343 | 9657 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); |
---|
10344 | 9658 | if (err) |
---|
.. | .. |
---|
10377 | 9691 | |
---|
10378 | 9692 | inode->i_op = &btrfs_symlink_inode_operations; |
---|
10379 | 9693 | inode_nohighmem(inode); |
---|
10380 | | - inode->i_mapping->a_ops = &btrfs_symlink_aops; |
---|
10381 | 9694 | inode_set_bytes(inode, name_len); |
---|
10382 | 9695 | btrfs_i_size_write(BTRFS_I(inode), name_len); |
---|
10383 | 9696 | err = btrfs_update_inode(trans, root, inode); |
---|
.. | .. |
---|
10404 | 9717 | return err; |
---|
10405 | 9718 | } |
---|
10406 | 9719 | |
---|
| 9720 | +static struct btrfs_trans_handle *insert_prealloc_file_extent( |
---|
| 9721 | + struct btrfs_trans_handle *trans_in, |
---|
| 9722 | + struct inode *inode, struct btrfs_key *ins, |
---|
| 9723 | + u64 file_offset) |
---|
| 9724 | +{ |
---|
| 9725 | + struct btrfs_file_extent_item stack_fi; |
---|
| 9726 | + struct btrfs_replace_extent_info extent_info; |
---|
| 9727 | + struct btrfs_trans_handle *trans = trans_in; |
---|
| 9728 | + struct btrfs_path *path; |
---|
| 9729 | + u64 start = ins->objectid; |
---|
| 9730 | + u64 len = ins->offset; |
---|
| 9731 | + int ret; |
---|
| 9732 | + |
---|
| 9733 | + memset(&stack_fi, 0, sizeof(stack_fi)); |
---|
| 9734 | + |
---|
| 9735 | + btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_PREALLOC); |
---|
| 9736 | + btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, start); |
---|
| 9737 | + btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi, len); |
---|
| 9738 | + btrfs_set_stack_file_extent_num_bytes(&stack_fi, len); |
---|
| 9739 | + btrfs_set_stack_file_extent_ram_bytes(&stack_fi, len); |
---|
| 9740 | + btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE); |
---|
| 9741 | + /* Encryption and other encoding is reserved and all 0 */ |
---|
| 9742 | + |
---|
| 9743 | + ret = btrfs_qgroup_release_data(BTRFS_I(inode), file_offset, len); |
---|
| 9744 | + if (ret < 0) |
---|
| 9745 | + return ERR_PTR(ret); |
---|
| 9746 | + |
---|
| 9747 | + if (trans) { |
---|
| 9748 | + ret = insert_reserved_file_extent(trans, BTRFS_I(inode), |
---|
| 9749 | + file_offset, &stack_fi, ret); |
---|
| 9750 | + if (ret) |
---|
| 9751 | + return ERR_PTR(ret); |
---|
| 9752 | + return trans; |
---|
| 9753 | + } |
---|
| 9754 | + |
---|
| 9755 | + extent_info.disk_offset = start; |
---|
| 9756 | + extent_info.disk_len = len; |
---|
| 9757 | + extent_info.data_offset = 0; |
---|
| 9758 | + extent_info.data_len = len; |
---|
| 9759 | + extent_info.file_offset = file_offset; |
---|
| 9760 | + extent_info.extent_buf = (char *)&stack_fi; |
---|
| 9761 | + extent_info.is_new_extent = true; |
---|
| 9762 | + extent_info.qgroup_reserved = ret; |
---|
| 9763 | + extent_info.insertions = 0; |
---|
| 9764 | + |
---|
| 9765 | + path = btrfs_alloc_path(); |
---|
| 9766 | + if (!path) |
---|
| 9767 | + return ERR_PTR(-ENOMEM); |
---|
| 9768 | + |
---|
| 9769 | + ret = btrfs_replace_file_extents(inode, path, file_offset, |
---|
| 9770 | + file_offset + len - 1, &extent_info, |
---|
| 9771 | + &trans); |
---|
| 9772 | + btrfs_free_path(path); |
---|
| 9773 | + if (ret) |
---|
| 9774 | + return ERR_PTR(ret); |
---|
| 9775 | + |
---|
| 9776 | + return trans; |
---|
| 9777 | +} |
---|
| 9778 | + |
---|
10407 | 9779 | static int __btrfs_prealloc_file_range(struct inode *inode, int mode, |
---|
10408 | 9780 | u64 start, u64 num_bytes, u64 min_size, |
---|
10409 | 9781 | loff_t actual_len, u64 *alloc_hint, |
---|
.. | .. |
---|
10426 | 9798 | if (trans) |
---|
10427 | 9799 | own_trans = false; |
---|
10428 | 9800 | while (num_bytes > 0) { |
---|
10429 | | - if (own_trans) { |
---|
10430 | | - trans = btrfs_start_transaction(root, 3); |
---|
10431 | | - if (IS_ERR(trans)) { |
---|
10432 | | - ret = PTR_ERR(trans); |
---|
10433 | | - break; |
---|
10434 | | - } |
---|
10435 | | - } |
---|
10436 | | - |
---|
10437 | 9801 | cur_bytes = min_t(u64, num_bytes, SZ_256M); |
---|
10438 | 9802 | cur_bytes = max(cur_bytes, min_size); |
---|
10439 | 9803 | /* |
---|
.. | .. |
---|
10445 | 9809 | cur_bytes = min(cur_bytes, last_alloc); |
---|
10446 | 9810 | ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes, |
---|
10447 | 9811 | min_size, 0, *alloc_hint, &ins, 1, 0); |
---|
10448 | | - if (ret) { |
---|
10449 | | - if (own_trans) |
---|
10450 | | - btrfs_end_transaction(trans); |
---|
| 9812 | + if (ret) |
---|
10451 | 9813 | break; |
---|
10452 | | - } |
---|
10453 | 9814 | |
---|
10454 | 9815 | /* |
---|
10455 | 9816 | * We've reserved this space, and thus converted it from |
---|
.. | .. |
---|
10459 | 9820 | * clear_offset by our extent size. |
---|
10460 | 9821 | */ |
---|
10461 | 9822 | clear_offset += ins.offset; |
---|
10462 | | - btrfs_dec_block_group_reservations(fs_info, ins.objectid); |
---|
10463 | 9823 | |
---|
10464 | 9824 | last_alloc = ins.offset; |
---|
10465 | | - ret = insert_reserved_file_extent(trans, inode, |
---|
10466 | | - cur_offset, ins.objectid, |
---|
10467 | | - ins.offset, ins.offset, |
---|
10468 | | - ins.offset, 0, 0, 0, |
---|
10469 | | - BTRFS_FILE_EXTENT_PREALLOC); |
---|
10470 | | - if (ret) { |
---|
| 9825 | + trans = insert_prealloc_file_extent(trans, inode, &ins, cur_offset); |
---|
| 9826 | + /* |
---|
| 9827 | + * Now that we inserted the prealloc extent we can finally |
---|
| 9828 | + * decrement the number of reservations in the block group. |
---|
| 9829 | + * If we did it before, we could race with relocation and have |
---|
| 9830 | + * relocation miss the reserved extent, making it fail later. |
---|
| 9831 | + */ |
---|
| 9832 | + btrfs_dec_block_group_reservations(fs_info, ins.objectid); |
---|
| 9833 | + if (IS_ERR(trans)) { |
---|
| 9834 | + ret = PTR_ERR(trans); |
---|
10471 | 9835 | btrfs_free_reserved_extent(fs_info, ins.objectid, |
---|
10472 | 9836 | ins.offset, 0); |
---|
10473 | | - btrfs_abort_transaction(trans, ret); |
---|
10474 | | - if (own_trans) |
---|
10475 | | - btrfs_end_transaction(trans); |
---|
10476 | 9837 | break; |
---|
10477 | 9838 | } |
---|
10478 | 9839 | |
---|
.. | .. |
---|
10493 | 9854 | em->block_len = ins.offset; |
---|
10494 | 9855 | em->orig_block_len = ins.offset; |
---|
10495 | 9856 | em->ram_bytes = ins.offset; |
---|
10496 | | - em->bdev = fs_info->fs_devices->latest_bdev; |
---|
10497 | 9857 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); |
---|
10498 | 9858 | em->generation = trans->transid; |
---|
10499 | 9859 | |
---|
.. | .. |
---|
10524 | 9884 | else |
---|
10525 | 9885 | i_size = cur_offset; |
---|
10526 | 9886 | i_size_write(inode, i_size); |
---|
10527 | | - btrfs_ordered_update_i_size(inode, i_size, NULL); |
---|
| 9887 | + btrfs_inode_safe_disk_i_size_write(inode, 0); |
---|
10528 | 9888 | } |
---|
10529 | 9889 | |
---|
10530 | 9890 | ret = btrfs_update_inode(trans, root, inode); |
---|
.. | .. |
---|
10536 | 9896 | break; |
---|
10537 | 9897 | } |
---|
10538 | 9898 | |
---|
10539 | | - if (own_trans) |
---|
| 9899 | + if (own_trans) { |
---|
10540 | 9900 | btrfs_end_transaction(trans); |
---|
| 9901 | + trans = NULL; |
---|
| 9902 | + } |
---|
10541 | 9903 | } |
---|
10542 | 9904 | if (clear_offset < end) |
---|
10543 | | - btrfs_free_reserved_data_space(inode, NULL, clear_offset, |
---|
| 9905 | + btrfs_free_reserved_data_space(BTRFS_I(inode), NULL, clear_offset, |
---|
10544 | 9906 | end - clear_offset + 1); |
---|
10545 | 9907 | return ret; |
---|
10546 | 9908 | } |
---|
.. | .. |
---|
10600 | 9962 | if (IS_ERR(trans)) |
---|
10601 | 9963 | return PTR_ERR(trans); |
---|
10602 | 9964 | |
---|
10603 | | - ret = btrfs_find_free_ino(root, &objectid); |
---|
| 9965 | + ret = btrfs_find_free_objectid(root, &objectid); |
---|
10604 | 9966 | if (ret) |
---|
10605 | 9967 | goto out; |
---|
10606 | 9968 | |
---|
.. | .. |
---|
10616 | 9978 | inode->i_op = &btrfs_file_inode_operations; |
---|
10617 | 9979 | |
---|
10618 | 9980 | inode->i_mapping->a_ops = &btrfs_aops; |
---|
10619 | | - BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
---|
10620 | 9981 | |
---|
10621 | 9982 | ret = btrfs_init_inode_security(trans, inode, dir, NULL); |
---|
10622 | 9983 | if (ret) |
---|
.. | .. |
---|
10648 | 10009 | return ret; |
---|
10649 | 10010 | } |
---|
10650 | 10011 | |
---|
10651 | | -__attribute__((const)) |
---|
10652 | | -static int btrfs_readpage_io_failed_hook(struct page *page, int failed_mirror) |
---|
10653 | | -{ |
---|
10654 | | - return -EAGAIN; |
---|
10655 | | -} |
---|
10656 | | - |
---|
10657 | | -static void btrfs_check_extent_io_range(void *private_data, const char *caller, |
---|
10658 | | - u64 start, u64 end) |
---|
10659 | | -{ |
---|
10660 | | - struct inode *inode = private_data; |
---|
10661 | | - u64 isize; |
---|
10662 | | - |
---|
10663 | | - isize = i_size_read(inode); |
---|
10664 | | - if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { |
---|
10665 | | - btrfs_debug_rl(BTRFS_I(inode)->root->fs_info, |
---|
10666 | | - "%s: ino %llu isize %llu odd range [%llu,%llu]", |
---|
10667 | | - caller, btrfs_ino(BTRFS_I(inode)), isize, start, end); |
---|
10668 | | - } |
---|
10669 | | -} |
---|
10670 | | - |
---|
10671 | 10012 | void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) |
---|
10672 | 10013 | { |
---|
10673 | 10014 | struct inode *inode = tree->private_data; |
---|
.. | .. |
---|
10683 | 10024 | index++; |
---|
10684 | 10025 | } |
---|
10685 | 10026 | } |
---|
| 10027 | + |
---|
| 10028 | +#ifdef CONFIG_SWAP |
---|
| 10029 | +/* |
---|
| 10030 | + * Add an entry indicating a block group or device which is pinned by a |
---|
| 10031 | + * swapfile. Returns 0 on success, 1 if there is already an entry for it, or a |
---|
| 10032 | + * negative errno on failure. |
---|
| 10033 | + */ |
---|
| 10034 | +static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr, |
---|
| 10035 | + bool is_block_group) |
---|
| 10036 | +{ |
---|
| 10037 | + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; |
---|
| 10038 | + struct btrfs_swapfile_pin *sp, *entry; |
---|
| 10039 | + struct rb_node **p; |
---|
| 10040 | + struct rb_node *parent = NULL; |
---|
| 10041 | + |
---|
| 10042 | + sp = kmalloc(sizeof(*sp), GFP_NOFS); |
---|
| 10043 | + if (!sp) |
---|
| 10044 | + return -ENOMEM; |
---|
| 10045 | + sp->ptr = ptr; |
---|
| 10046 | + sp->inode = inode; |
---|
| 10047 | + sp->is_block_group = is_block_group; |
---|
| 10048 | + sp->bg_extent_count = 1; |
---|
| 10049 | + |
---|
| 10050 | + spin_lock(&fs_info->swapfile_pins_lock); |
---|
| 10051 | + p = &fs_info->swapfile_pins.rb_node; |
---|
| 10052 | + while (*p) { |
---|
| 10053 | + parent = *p; |
---|
| 10054 | + entry = rb_entry(parent, struct btrfs_swapfile_pin, node); |
---|
| 10055 | + if (sp->ptr < entry->ptr || |
---|
| 10056 | + (sp->ptr == entry->ptr && sp->inode < entry->inode)) { |
---|
| 10057 | + p = &(*p)->rb_left; |
---|
| 10058 | + } else if (sp->ptr > entry->ptr || |
---|
| 10059 | + (sp->ptr == entry->ptr && sp->inode > entry->inode)) { |
---|
| 10060 | + p = &(*p)->rb_right; |
---|
| 10061 | + } else { |
---|
| 10062 | + if (is_block_group) |
---|
| 10063 | + entry->bg_extent_count++; |
---|
| 10064 | + spin_unlock(&fs_info->swapfile_pins_lock); |
---|
| 10065 | + kfree(sp); |
---|
| 10066 | + return 1; |
---|
| 10067 | + } |
---|
| 10068 | + } |
---|
| 10069 | + rb_link_node(&sp->node, parent, p); |
---|
| 10070 | + rb_insert_color(&sp->node, &fs_info->swapfile_pins); |
---|
| 10071 | + spin_unlock(&fs_info->swapfile_pins_lock); |
---|
| 10072 | + return 0; |
---|
| 10073 | +} |
---|
| 10074 | + |
---|
| 10075 | +/* Free all of the entries pinned by this swapfile. */ |
---|
| 10076 | +static void btrfs_free_swapfile_pins(struct inode *inode) |
---|
| 10077 | +{ |
---|
| 10078 | + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; |
---|
| 10079 | + struct btrfs_swapfile_pin *sp; |
---|
| 10080 | + struct rb_node *node, *next; |
---|
| 10081 | + |
---|
| 10082 | + spin_lock(&fs_info->swapfile_pins_lock); |
---|
| 10083 | + node = rb_first(&fs_info->swapfile_pins); |
---|
| 10084 | + while (node) { |
---|
| 10085 | + next = rb_next(node); |
---|
| 10086 | + sp = rb_entry(node, struct btrfs_swapfile_pin, node); |
---|
| 10087 | + if (sp->inode == inode) { |
---|
| 10088 | + rb_erase(&sp->node, &fs_info->swapfile_pins); |
---|
| 10089 | + if (sp->is_block_group) { |
---|
| 10090 | + btrfs_dec_block_group_swap_extents(sp->ptr, |
---|
| 10091 | + sp->bg_extent_count); |
---|
| 10092 | + btrfs_put_block_group(sp->ptr); |
---|
| 10093 | + } |
---|
| 10094 | + kfree(sp); |
---|
| 10095 | + } |
---|
| 10096 | + node = next; |
---|
| 10097 | + } |
---|
| 10098 | + spin_unlock(&fs_info->swapfile_pins_lock); |
---|
| 10099 | +} |
---|
| 10100 | + |
---|
| 10101 | +struct btrfs_swap_info { |
---|
| 10102 | + u64 start; |
---|
| 10103 | + u64 block_start; |
---|
| 10104 | + u64 block_len; |
---|
| 10105 | + u64 lowest_ppage; |
---|
| 10106 | + u64 highest_ppage; |
---|
| 10107 | + unsigned long nr_pages; |
---|
| 10108 | + int nr_extents; |
---|
| 10109 | +}; |
---|
| 10110 | + |
---|
| 10111 | +static int btrfs_add_swap_extent(struct swap_info_struct *sis, |
---|
| 10112 | + struct btrfs_swap_info *bsi) |
---|
| 10113 | +{ |
---|
| 10114 | + unsigned long nr_pages; |
---|
| 10115 | + unsigned long max_pages; |
---|
| 10116 | + u64 first_ppage, first_ppage_reported, next_ppage; |
---|
| 10117 | + int ret; |
---|
| 10118 | + |
---|
| 10119 | + /* |
---|
| 10120 | + * Our swapfile may have had its size extended after the swap header was |
---|
| 10121 | + * written. In that case activating the swapfile should not go beyond |
---|
| 10122 | + * the max size set in the swap header. |
---|
| 10123 | + */ |
---|
| 10124 | + if (bsi->nr_pages >= sis->max) |
---|
| 10125 | + return 0; |
---|
| 10126 | + |
---|
| 10127 | + max_pages = sis->max - bsi->nr_pages; |
---|
| 10128 | + first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT; |
---|
| 10129 | + next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len, |
---|
| 10130 | + PAGE_SIZE) >> PAGE_SHIFT; |
---|
| 10131 | + |
---|
| 10132 | + if (first_ppage >= next_ppage) |
---|
| 10133 | + return 0; |
---|
| 10134 | + nr_pages = next_ppage - first_ppage; |
---|
| 10135 | + nr_pages = min(nr_pages, max_pages); |
---|
| 10136 | + |
---|
| 10137 | + first_ppage_reported = first_ppage; |
---|
| 10138 | + if (bsi->start == 0) |
---|
| 10139 | + first_ppage_reported++; |
---|
| 10140 | + if (bsi->lowest_ppage > first_ppage_reported) |
---|
| 10141 | + bsi->lowest_ppage = first_ppage_reported; |
---|
| 10142 | + if (bsi->highest_ppage < (next_ppage - 1)) |
---|
| 10143 | + bsi->highest_ppage = next_ppage - 1; |
---|
| 10144 | + |
---|
| 10145 | + ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage); |
---|
| 10146 | + if (ret < 0) |
---|
| 10147 | + return ret; |
---|
| 10148 | + bsi->nr_extents += ret; |
---|
| 10149 | + bsi->nr_pages += nr_pages; |
---|
| 10150 | + return 0; |
---|
| 10151 | +} |
---|
| 10152 | + |
---|
| 10153 | +static void btrfs_swap_deactivate(struct file *file) |
---|
| 10154 | +{ |
---|
| 10155 | + struct inode *inode = file_inode(file); |
---|
| 10156 | + |
---|
| 10157 | + btrfs_free_swapfile_pins(inode); |
---|
| 10158 | + atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles); |
---|
| 10159 | +} |
---|
| 10160 | + |
---|
| 10161 | +static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, |
---|
| 10162 | + sector_t *span) |
---|
| 10163 | +{ |
---|
| 10164 | + struct inode *inode = file_inode(file); |
---|
| 10165 | + struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
| 10166 | + struct btrfs_fs_info *fs_info = root->fs_info; |
---|
| 10167 | + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
---|
| 10168 | + struct extent_state *cached_state = NULL; |
---|
| 10169 | + struct extent_map *em = NULL; |
---|
| 10170 | + struct btrfs_device *device = NULL; |
---|
| 10171 | + struct btrfs_swap_info bsi = { |
---|
| 10172 | + .lowest_ppage = (sector_t)-1ULL, |
---|
| 10173 | + }; |
---|
| 10174 | + int ret = 0; |
---|
| 10175 | + u64 isize; |
---|
| 10176 | + u64 start; |
---|
| 10177 | + |
---|
| 10178 | + /* |
---|
| 10179 | + * If the swap file was just created, make sure delalloc is done. If the |
---|
| 10180 | + * file changes again after this, the user is doing something stupid and |
---|
| 10181 | + * we don't really care. |
---|
| 10182 | + */ |
---|
| 10183 | + ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); |
---|
| 10184 | + if (ret) |
---|
| 10185 | + return ret; |
---|
| 10186 | + |
---|
| 10187 | + /* |
---|
| 10188 | + * The inode is locked, so these flags won't change after we check them. |
---|
| 10189 | + */ |
---|
| 10190 | + if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) { |
---|
| 10191 | + btrfs_warn(fs_info, "swapfile must not be compressed"); |
---|
| 10192 | + return -EINVAL; |
---|
| 10193 | + } |
---|
| 10194 | + if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) { |
---|
| 10195 | + btrfs_warn(fs_info, "swapfile must not be copy-on-write"); |
---|
| 10196 | + return -EINVAL; |
---|
| 10197 | + } |
---|
| 10198 | + if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { |
---|
| 10199 | + btrfs_warn(fs_info, "swapfile must not be checksummed"); |
---|
| 10200 | + return -EINVAL; |
---|
| 10201 | + } |
---|
| 10202 | + |
---|
| 10203 | + /* |
---|
| 10204 | + * Balance or device remove/replace/resize can move stuff around from |
---|
| 10205 | + * under us. The exclop protection makes sure they aren't running/won't |
---|
| 10206 | + * run concurrently while we are mapping the swap extents, and |
---|
| 10207 | + * fs_info->swapfile_pins prevents them from running while the swap |
---|
| 10208 | + * file is active and moving the extents. Note that this also prevents |
---|
| 10209 | + * a concurrent device add which isn't actually necessary, but it's not |
---|
| 10210 | + * really worth the trouble to allow it. |
---|
| 10211 | + */ |
---|
| 10212 | + if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_SWAP_ACTIVATE)) { |
---|
| 10213 | + btrfs_warn(fs_info, |
---|
| 10214 | + "cannot activate swapfile while exclusive operation is running"); |
---|
| 10215 | + return -EBUSY; |
---|
| 10216 | + } |
---|
| 10217 | + |
---|
| 10218 | + /* |
---|
| 10219 | + * Prevent snapshot creation while we are activating the swap file. |
---|
| 10220 | + * We do not want to race with snapshot creation. If snapshot creation |
---|
| 10221 | + * already started before we bumped nr_swapfiles from 0 to 1 and |
---|
| 10222 | + * completes before the first write into the swap file after it is |
---|
| 10223 | + * activated, than that write would fallback to COW. |
---|
| 10224 | + */ |
---|
| 10225 | + if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) { |
---|
| 10226 | + btrfs_exclop_finish(fs_info); |
---|
| 10227 | + btrfs_warn(fs_info, |
---|
| 10228 | + "cannot activate swapfile because snapshot creation is in progress"); |
---|
| 10229 | + return -EINVAL; |
---|
| 10230 | + } |
---|
| 10231 | + /* |
---|
| 10232 | + * Snapshots can create extents which require COW even if NODATACOW is |
---|
| 10233 | + * set. We use this counter to prevent snapshots. We must increment it |
---|
| 10234 | + * before walking the extents because we don't want a concurrent |
---|
| 10235 | + * snapshot to run after we've already checked the extents. |
---|
| 10236 | + * |
---|
| 10237 | + * It is possible that subvolume is marked for deletion but still not |
---|
| 10238 | + * removed yet. To prevent this race, we check the root status before |
---|
| 10239 | + * activating the swapfile. |
---|
| 10240 | + */ |
---|
| 10241 | + spin_lock(&root->root_item_lock); |
---|
| 10242 | + if (btrfs_root_dead(root)) { |
---|
| 10243 | + spin_unlock(&root->root_item_lock); |
---|
| 10244 | + |
---|
| 10245 | + btrfs_exclop_finish(fs_info); |
---|
| 10246 | + btrfs_warn(fs_info, |
---|
| 10247 | + "cannot activate swapfile because subvolume %llu is being deleted", |
---|
| 10248 | + root->root_key.objectid); |
---|
| 10249 | + return -EPERM; |
---|
| 10250 | + } |
---|
| 10251 | + atomic_inc(&root->nr_swapfiles); |
---|
| 10252 | + spin_unlock(&root->root_item_lock); |
---|
| 10253 | + |
---|
| 10254 | + isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize); |
---|
| 10255 | + |
---|
| 10256 | + lock_extent_bits(io_tree, 0, isize - 1, &cached_state); |
---|
| 10257 | + start = 0; |
---|
| 10258 | + while (start < isize) { |
---|
| 10259 | + u64 logical_block_start, physical_block_start; |
---|
| 10260 | + struct btrfs_block_group *bg; |
---|
| 10261 | + u64 len = isize - start; |
---|
| 10262 | + |
---|
| 10263 | + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len); |
---|
| 10264 | + if (IS_ERR(em)) { |
---|
| 10265 | + ret = PTR_ERR(em); |
---|
| 10266 | + goto out; |
---|
| 10267 | + } |
---|
| 10268 | + |
---|
| 10269 | + if (em->block_start == EXTENT_MAP_HOLE) { |
---|
| 10270 | + btrfs_warn(fs_info, "swapfile must not have holes"); |
---|
| 10271 | + ret = -EINVAL; |
---|
| 10272 | + goto out; |
---|
| 10273 | + } |
---|
| 10274 | + if (em->block_start == EXTENT_MAP_INLINE) { |
---|
| 10275 | + /* |
---|
| 10276 | + * It's unlikely we'll ever actually find ourselves |
---|
| 10277 | + * here, as a file small enough to fit inline won't be |
---|
| 10278 | + * big enough to store more than the swap header, but in |
---|
| 10279 | + * case something changes in the future, let's catch it |
---|
| 10280 | + * here rather than later. |
---|
| 10281 | + */ |
---|
| 10282 | + btrfs_warn(fs_info, "swapfile must not be inline"); |
---|
| 10283 | + ret = -EINVAL; |
---|
| 10284 | + goto out; |
---|
| 10285 | + } |
---|
| 10286 | + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
---|
| 10287 | + btrfs_warn(fs_info, "swapfile must not be compressed"); |
---|
| 10288 | + ret = -EINVAL; |
---|
| 10289 | + goto out; |
---|
| 10290 | + } |
---|
| 10291 | + |
---|
| 10292 | + logical_block_start = em->block_start + (start - em->start); |
---|
| 10293 | + len = min(len, em->len - (start - em->start)); |
---|
| 10294 | + free_extent_map(em); |
---|
| 10295 | + em = NULL; |
---|
| 10296 | + |
---|
| 10297 | + ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL, true); |
---|
| 10298 | + if (ret < 0) { |
---|
| 10299 | + goto out; |
---|
| 10300 | + } else if (ret) { |
---|
| 10301 | + ret = 0; |
---|
| 10302 | + } else { |
---|
| 10303 | + btrfs_warn(fs_info, |
---|
| 10304 | + "swapfile must not be copy-on-write"); |
---|
| 10305 | + ret = -EINVAL; |
---|
| 10306 | + goto out; |
---|
| 10307 | + } |
---|
| 10308 | + |
---|
| 10309 | + em = btrfs_get_chunk_map(fs_info, logical_block_start, len); |
---|
| 10310 | + if (IS_ERR(em)) { |
---|
| 10311 | + ret = PTR_ERR(em); |
---|
| 10312 | + goto out; |
---|
| 10313 | + } |
---|
| 10314 | + |
---|
| 10315 | + if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { |
---|
| 10316 | + btrfs_warn(fs_info, |
---|
| 10317 | + "swapfile must have single data profile"); |
---|
| 10318 | + ret = -EINVAL; |
---|
| 10319 | + goto out; |
---|
| 10320 | + } |
---|
| 10321 | + |
---|
| 10322 | + if (device == NULL) { |
---|
| 10323 | + device = em->map_lookup->stripes[0].dev; |
---|
| 10324 | + ret = btrfs_add_swapfile_pin(inode, device, false); |
---|
| 10325 | + if (ret == 1) |
---|
| 10326 | + ret = 0; |
---|
| 10327 | + else if (ret) |
---|
| 10328 | + goto out; |
---|
| 10329 | + } else if (device != em->map_lookup->stripes[0].dev) { |
---|
| 10330 | + btrfs_warn(fs_info, "swapfile must be on one device"); |
---|
| 10331 | + ret = -EINVAL; |
---|
| 10332 | + goto out; |
---|
| 10333 | + } |
---|
| 10334 | + |
---|
| 10335 | + physical_block_start = (em->map_lookup->stripes[0].physical + |
---|
| 10336 | + (logical_block_start - em->start)); |
---|
| 10337 | + len = min(len, em->len - (logical_block_start - em->start)); |
---|
| 10338 | + free_extent_map(em); |
---|
| 10339 | + em = NULL; |
---|
| 10340 | + |
---|
| 10341 | + bg = btrfs_lookup_block_group(fs_info, logical_block_start); |
---|
| 10342 | + if (!bg) { |
---|
| 10343 | + btrfs_warn(fs_info, |
---|
| 10344 | + "could not find block group containing swapfile"); |
---|
| 10345 | + ret = -EINVAL; |
---|
| 10346 | + goto out; |
---|
| 10347 | + } |
---|
| 10348 | + |
---|
| 10349 | + if (!btrfs_inc_block_group_swap_extents(bg)) { |
---|
| 10350 | + btrfs_warn(fs_info, |
---|
| 10351 | + "block group for swapfile at %llu is read-only%s", |
---|
| 10352 | + bg->start, |
---|
| 10353 | + atomic_read(&fs_info->scrubs_running) ? |
---|
| 10354 | + " (scrub running)" : ""); |
---|
| 10355 | + btrfs_put_block_group(bg); |
---|
| 10356 | + ret = -EINVAL; |
---|
| 10357 | + goto out; |
---|
| 10358 | + } |
---|
| 10359 | + |
---|
| 10360 | + ret = btrfs_add_swapfile_pin(inode, bg, true); |
---|
| 10361 | + if (ret) { |
---|
| 10362 | + btrfs_put_block_group(bg); |
---|
| 10363 | + if (ret == 1) |
---|
| 10364 | + ret = 0; |
---|
| 10365 | + else |
---|
| 10366 | + goto out; |
---|
| 10367 | + } |
---|
| 10368 | + |
---|
| 10369 | + if (bsi.block_len && |
---|
| 10370 | + bsi.block_start + bsi.block_len == physical_block_start) { |
---|
| 10371 | + bsi.block_len += len; |
---|
| 10372 | + } else { |
---|
| 10373 | + if (bsi.block_len) { |
---|
| 10374 | + ret = btrfs_add_swap_extent(sis, &bsi); |
---|
| 10375 | + if (ret) |
---|
| 10376 | + goto out; |
---|
| 10377 | + } |
---|
| 10378 | + bsi.start = start; |
---|
| 10379 | + bsi.block_start = physical_block_start; |
---|
| 10380 | + bsi.block_len = len; |
---|
| 10381 | + } |
---|
| 10382 | + |
---|
| 10383 | + start += len; |
---|
| 10384 | + } |
---|
| 10385 | + |
---|
| 10386 | + if (bsi.block_len) |
---|
| 10387 | + ret = btrfs_add_swap_extent(sis, &bsi); |
---|
| 10388 | + |
---|
| 10389 | +out: |
---|
| 10390 | + if (!IS_ERR_OR_NULL(em)) |
---|
| 10391 | + free_extent_map(em); |
---|
| 10392 | + |
---|
| 10393 | + unlock_extent_cached(io_tree, 0, isize - 1, &cached_state); |
---|
| 10394 | + |
---|
| 10395 | + if (ret) |
---|
| 10396 | + btrfs_swap_deactivate(file); |
---|
| 10397 | + |
---|
| 10398 | + btrfs_drew_write_unlock(&root->snapshot_lock); |
---|
| 10399 | + |
---|
| 10400 | + btrfs_exclop_finish(fs_info); |
---|
| 10401 | + |
---|
| 10402 | + if (ret) |
---|
| 10403 | + return ret; |
---|
| 10404 | + |
---|
| 10405 | + if (device) |
---|
| 10406 | + sis->bdev = device->bdev; |
---|
| 10407 | + *span = bsi.highest_ppage - bsi.lowest_ppage + 1; |
---|
| 10408 | + sis->max = bsi.nr_pages; |
---|
| 10409 | + sis->pages = bsi.nr_pages - 1; |
---|
| 10410 | + sis->highest_bit = bsi.nr_pages - 1; |
---|
| 10411 | + return bsi.nr_extents; |
---|
| 10412 | +} |
---|
| 10413 | +#else |
---|
| 10414 | +static void btrfs_swap_deactivate(struct file *file) |
---|
| 10415 | +{ |
---|
| 10416 | +} |
---|
| 10417 | + |
---|
| 10418 | +static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, |
---|
| 10419 | + sector_t *span) |
---|
| 10420 | +{ |
---|
| 10421 | + return -EOPNOTSUPP; |
---|
| 10422 | +} |
---|
| 10423 | +#endif |
---|
10686 | 10424 | |
---|
10687 | 10425 | static const struct inode_operations btrfs_dir_inode_operations = { |
---|
10688 | 10426 | .getattr = btrfs_getattr, |
---|
.. | .. |
---|
10703 | 10441 | .update_time = btrfs_update_time, |
---|
10704 | 10442 | .tmpfile = btrfs_tmpfile, |
---|
10705 | 10443 | }; |
---|
10706 | | -static const struct inode_operations btrfs_dir_ro_inode_operations = { |
---|
10707 | | - .lookup = btrfs_lookup, |
---|
10708 | | - .permission = btrfs_permission, |
---|
10709 | | - .update_time = btrfs_update_time, |
---|
10710 | | -}; |
---|
10711 | 10444 | |
---|
10712 | 10445 | static const struct file_operations btrfs_dir_file_operations = { |
---|
10713 | 10446 | .llseek = generic_file_llseek, |
---|
.. | .. |
---|
10720 | 10453 | #endif |
---|
10721 | 10454 | .release = btrfs_release_file, |
---|
10722 | 10455 | .fsync = btrfs_sync_file, |
---|
10723 | | -}; |
---|
10724 | | - |
---|
10725 | | -static const struct extent_io_ops btrfs_extent_io_ops = { |
---|
10726 | | - /* mandatory callbacks */ |
---|
10727 | | - .submit_bio_hook = btrfs_submit_bio_hook, |
---|
10728 | | - .readpage_end_io_hook = btrfs_readpage_end_io_hook, |
---|
10729 | | - .readpage_io_failed_hook = btrfs_readpage_io_failed_hook, |
---|
10730 | | - |
---|
10731 | | - /* optional callbacks */ |
---|
10732 | | - .writepage_end_io_hook = btrfs_writepage_end_io_hook, |
---|
10733 | | - .writepage_start_hook = btrfs_writepage_start_hook, |
---|
10734 | | - .set_bit_hook = btrfs_set_bit_hook, |
---|
10735 | | - .clear_bit_hook = btrfs_clear_bit_hook, |
---|
10736 | | - .merge_extent_hook = btrfs_merge_extent_hook, |
---|
10737 | | - .split_extent_hook = btrfs_split_extent_hook, |
---|
10738 | | - .check_extent_io_range = btrfs_check_extent_io_range, |
---|
10739 | 10456 | }; |
---|
10740 | 10457 | |
---|
10741 | 10458 | /* |
---|
.. | .. |
---|
10754 | 10471 | .readpage = btrfs_readpage, |
---|
10755 | 10472 | .writepage = btrfs_writepage, |
---|
10756 | 10473 | .writepages = btrfs_writepages, |
---|
10757 | | - .readpages = btrfs_readpages, |
---|
10758 | | - .direct_IO = btrfs_direct_IO, |
---|
| 10474 | + .readahead = btrfs_readahead, |
---|
| 10475 | + .direct_IO = noop_direct_IO, |
---|
10759 | 10476 | .invalidatepage = btrfs_invalidatepage, |
---|
10760 | 10477 | .releasepage = btrfs_releasepage, |
---|
| 10478 | +#ifdef CONFIG_MIGRATION |
---|
| 10479 | + .migratepage = btrfs_migratepage, |
---|
| 10480 | +#endif |
---|
10761 | 10481 | .set_page_dirty = btrfs_set_page_dirty, |
---|
10762 | 10482 | .error_remove_page = generic_error_remove_page, |
---|
10763 | | -}; |
---|
10764 | | - |
---|
10765 | | -static const struct address_space_operations btrfs_symlink_aops = { |
---|
10766 | | - .readpage = btrfs_readpage, |
---|
10767 | | - .writepage = btrfs_writepage, |
---|
10768 | | - .invalidatepage = btrfs_invalidatepage, |
---|
10769 | | - .releasepage = btrfs_releasepage, |
---|
| 10483 | + .swap_activate = btrfs_swap_activate, |
---|
| 10484 | + .swap_deactivate = btrfs_swap_deactivate, |
---|
10770 | 10485 | }; |
---|
10771 | 10486 | |
---|
10772 | 10487 | static const struct inode_operations btrfs_file_inode_operations = { |
---|