| .. | .. |
|---|
| 3 | 3 | * Copyright (C) 2007 Oracle. All rights reserved. |
|---|
| 4 | 4 | */ |
|---|
| 5 | 5 | |
|---|
| 6 | +#include <crypto/hash.h> |
|---|
| 6 | 7 | #include <linux/kernel.h> |
|---|
| 7 | 8 | #include <linux/bio.h> |
|---|
| 8 | | -#include <linux/buffer_head.h> |
|---|
| 9 | 9 | #include <linux/file.h> |
|---|
| 10 | 10 | #include <linux/fs.h> |
|---|
| 11 | 11 | #include <linux/pagemap.h> |
|---|
| .. | .. |
|---|
| 27 | 27 | #include <linux/uio.h> |
|---|
| 28 | 28 | #include <linux/magic.h> |
|---|
| 29 | 29 | #include <linux/iversion.h> |
|---|
| 30 | +#include <linux/swap.h> |
|---|
| 31 | +#include <linux/migrate.h> |
|---|
| 32 | +#include <linux/sched/mm.h> |
|---|
| 33 | +#include <linux/iomap.h> |
|---|
| 30 | 34 | #include <asm/unaligned.h> |
|---|
| 35 | +#include "misc.h" |
|---|
| 31 | 36 | #include "ctree.h" |
|---|
| 32 | 37 | #include "disk-io.h" |
|---|
| 33 | 38 | #include "transaction.h" |
|---|
| .. | .. |
|---|
| 41 | 46 | #include "locking.h" |
|---|
| 42 | 47 | #include "free-space-cache.h" |
|---|
| 43 | 48 | #include "inode-map.h" |
|---|
| 44 | | -#include "backref.h" |
|---|
| 45 | 49 | #include "props.h" |
|---|
| 46 | 50 | #include "qgroup.h" |
|---|
| 47 | | -#include "dedupe.h" |
|---|
| 51 | +#include "delalloc-space.h" |
|---|
| 52 | +#include "block-group.h" |
|---|
| 53 | +#include "space-info.h" |
|---|
| 48 | 54 | |
|---|
| 49 | 55 | struct btrfs_iget_args { |
|---|
| 50 | | - struct btrfs_key *location; |
|---|
| 56 | + u64 ino; |
|---|
| 51 | 57 | struct btrfs_root *root; |
|---|
| 52 | 58 | }; |
|---|
| 53 | 59 | |
|---|
| 54 | 60 | struct btrfs_dio_data { |
|---|
| 55 | 61 | u64 reserve; |
|---|
| 56 | | - u64 unsubmitted_oe_range_start; |
|---|
| 57 | | - u64 unsubmitted_oe_range_end; |
|---|
| 58 | | - int overwrite; |
|---|
| 62 | + loff_t length; |
|---|
| 63 | + ssize_t submitted; |
|---|
| 64 | + struct extent_changeset *data_reserved; |
|---|
| 65 | + bool sync; |
|---|
| 59 | 66 | }; |
|---|
| 60 | 67 | |
|---|
| 61 | 68 | static const struct inode_operations btrfs_dir_inode_operations; |
|---|
| 62 | 69 | static const struct inode_operations btrfs_symlink_inode_operations; |
|---|
| 63 | | -static const struct inode_operations btrfs_dir_ro_inode_operations; |
|---|
| 64 | 70 | static const struct inode_operations btrfs_special_inode_operations; |
|---|
| 65 | 71 | static const struct inode_operations btrfs_file_inode_operations; |
|---|
| 66 | 72 | static const struct address_space_operations btrfs_aops; |
|---|
| 67 | | -static const struct address_space_operations btrfs_symlink_aops; |
|---|
| 68 | 73 | static const struct file_operations btrfs_dir_file_operations; |
|---|
| 69 | | -static const struct extent_io_ops btrfs_extent_io_ops; |
|---|
| 70 | 74 | |
|---|
| 71 | 75 | static struct kmem_cache *btrfs_inode_cachep; |
|---|
| 72 | 76 | struct kmem_cache *btrfs_trans_handle_cachep; |
|---|
| .. | .. |
|---|
| 74 | 78 | struct kmem_cache *btrfs_free_space_cachep; |
|---|
| 75 | 79 | struct kmem_cache *btrfs_free_space_bitmap_cachep; |
|---|
| 76 | 80 | |
|---|
| 77 | | -#define S_SHIFT 12 |
|---|
| 78 | | -static const unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { |
|---|
| 79 | | - [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE, |
|---|
| 80 | | - [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR, |
|---|
| 81 | | - [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV, |
|---|
| 82 | | - [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV, |
|---|
| 83 | | - [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO, |
|---|
| 84 | | - [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK, |
|---|
| 85 | | - [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, |
|---|
| 86 | | -}; |
|---|
| 87 | | - |
|---|
| 88 | 81 | static int btrfs_setsize(struct inode *inode, struct iattr *attr); |
|---|
| 89 | 82 | static int btrfs_truncate(struct inode *inode, bool skip_writeback); |
|---|
| 90 | 83 | static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); |
|---|
| 91 | | -static noinline int cow_file_range(struct inode *inode, |
|---|
| 84 | +static noinline int cow_file_range(struct btrfs_inode *inode, |
|---|
| 92 | 85 | struct page *locked_page, |
|---|
| 93 | | - u64 start, u64 end, u64 delalloc_end, |
|---|
| 94 | | - int *page_started, unsigned long *nr_written, |
|---|
| 95 | | - int unlock, struct btrfs_dedupe_hash *hash); |
|---|
| 96 | | -static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len, |
|---|
| 97 | | - u64 orig_start, u64 block_start, |
|---|
| 86 | + u64 start, u64 end, int *page_started, |
|---|
| 87 | + unsigned long *nr_written, int unlock); |
|---|
| 88 | +static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, |
|---|
| 89 | + u64 len, u64 orig_start, u64 block_start, |
|---|
| 98 | 90 | u64 block_len, u64 orig_block_len, |
|---|
| 99 | 91 | u64 ram_bytes, int compress_type, |
|---|
| 100 | 92 | int type); |
|---|
| 101 | 93 | |
|---|
| 102 | | -static void __endio_write_update_ordered(struct inode *inode, |
|---|
| 94 | +static void __endio_write_update_ordered(struct btrfs_inode *inode, |
|---|
| 103 | 95 | const u64 offset, const u64 bytes, |
|---|
| 104 | 96 | const bool uptodate); |
|---|
| 105 | 97 | |
|---|
| 106 | 98 | /* |
|---|
| 107 | 99 | * Cleanup all submitted ordered extents in specified range to handle errors |
|---|
| 108 | | - * from the fill_dellaloc() callback. |
|---|
| 100 | + * from the btrfs_run_delalloc_range() callback. |
|---|
| 109 | 101 | * |
|---|
| 110 | 102 | * NOTE: caller must ensure that when an error happens, it can not call |
|---|
| 111 | 103 | * extent_clear_unlock_delalloc() to clear both the bits EXTENT_DO_ACCOUNTING |
|---|
| .. | .. |
|---|
| 113 | 105 | * to be released, which we want to happen only when finishing the ordered |
|---|
| 114 | 106 | * extent (btrfs_finish_ordered_io()). |
|---|
| 115 | 107 | */ |
|---|
| 116 | | -static inline void btrfs_cleanup_ordered_extents(struct inode *inode, |
|---|
| 108 | +static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode, |
|---|
| 117 | 109 | struct page *locked_page, |
|---|
| 118 | 110 | u64 offset, u64 bytes) |
|---|
| 119 | 111 | { |
|---|
| .. | .. |
|---|
| 125 | 117 | struct page *page; |
|---|
| 126 | 118 | |
|---|
| 127 | 119 | while (index <= end_index) { |
|---|
| 128 | | - page = find_get_page(inode->i_mapping, index); |
|---|
| 120 | + page = find_get_page(inode->vfs_inode.i_mapping, index); |
|---|
| 129 | 121 | index++; |
|---|
| 130 | 122 | if (!page) |
|---|
| 131 | 123 | continue; |
|---|
| .. | .. |
|---|
| 147 | 139 | } |
|---|
| 148 | 140 | |
|---|
| 149 | 141 | static int btrfs_dirty_inode(struct inode *inode); |
|---|
| 150 | | - |
|---|
| 151 | | -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
|---|
| 152 | | -void btrfs_test_inode_set_ops(struct inode *inode) |
|---|
| 153 | | -{ |
|---|
| 154 | | - BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
|---|
| 155 | | -} |
|---|
| 156 | | -#endif |
|---|
| 157 | 142 | |
|---|
| 158 | 143 | static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, |
|---|
| 159 | 144 | struct inode *inode, struct inode *dir, |
|---|
| .. | .. |
|---|
| 187 | 172 | int ret; |
|---|
| 188 | 173 | size_t cur_size = size; |
|---|
| 189 | 174 | unsigned long offset; |
|---|
| 175 | + |
|---|
| 176 | + ASSERT((compressed_size > 0 && compressed_pages) || |
|---|
| 177 | + (compressed_size == 0 && !compressed_pages)); |
|---|
| 190 | 178 | |
|---|
| 191 | 179 | if (compressed_size && compressed_pages) |
|---|
| 192 | 180 | cur_size = compressed_size; |
|---|
| .. | .. |
|---|
| 241 | 229 | start >> PAGE_SHIFT); |
|---|
| 242 | 230 | btrfs_set_file_extent_compression(leaf, ei, 0); |
|---|
| 243 | 231 | kaddr = kmap_atomic(page); |
|---|
| 244 | | - offset = start & (PAGE_SIZE - 1); |
|---|
| 232 | + offset = offset_in_page(start); |
|---|
| 245 | 233 | write_extent_buffer(leaf, kaddr + offset, ptr, size); |
|---|
| 246 | 234 | kunmap_atomic(kaddr); |
|---|
| 247 | 235 | put_page(page); |
|---|
| 248 | 236 | } |
|---|
| 249 | 237 | btrfs_mark_buffer_dirty(leaf); |
|---|
| 250 | 238 | btrfs_release_path(path); |
|---|
| 239 | + |
|---|
| 240 | + /* |
|---|
| 241 | + * We align size to sectorsize for inline extents just for simplicity |
|---|
| 242 | + * sake. |
|---|
| 243 | + */ |
|---|
| 244 | + size = ALIGN(size, root->fs_info->sectorsize); |
|---|
| 245 | + ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), start, size); |
|---|
| 246 | + if (ret) |
|---|
| 247 | + goto fail; |
|---|
| 251 | 248 | |
|---|
| 252 | 249 | /* |
|---|
| 253 | 250 | * we're an inline extent, so nobody can |
|---|
| .. | .. |
|---|
| 271 | 268 | * does the checks required to make sure the data is small enough |
|---|
| 272 | 269 | * to fit as an inline extent. |
|---|
| 273 | 270 | */ |
|---|
| 274 | | -static noinline int cow_file_range_inline(struct inode *inode, u64 start, |
|---|
| 271 | +static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start, |
|---|
| 275 | 272 | u64 end, size_t compressed_size, |
|---|
| 276 | 273 | int compress_type, |
|---|
| 277 | 274 | struct page **compressed_pages) |
|---|
| 278 | 275 | { |
|---|
| 279 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 276 | + struct btrfs_root *root = inode->root; |
|---|
| 280 | 277 | struct btrfs_fs_info *fs_info = root->fs_info; |
|---|
| 281 | 278 | struct btrfs_trans_handle *trans; |
|---|
| 282 | | - u64 isize = i_size_read(inode); |
|---|
| 279 | + u64 isize = i_size_read(&inode->vfs_inode); |
|---|
| 283 | 280 | u64 actual_end = min(end + 1, isize); |
|---|
| 284 | 281 | u64 inline_len = actual_end - start; |
|---|
| 285 | 282 | u64 aligned_end = ALIGN(end, fs_info->sectorsize); |
|---|
| .. | .. |
|---|
| 311 | 308 | btrfs_free_path(path); |
|---|
| 312 | 309 | return PTR_ERR(trans); |
|---|
| 313 | 310 | } |
|---|
| 314 | | - trans->block_rsv = &BTRFS_I(inode)->block_rsv; |
|---|
| 311 | + trans->block_rsv = &inode->block_rsv; |
|---|
| 315 | 312 | |
|---|
| 316 | 313 | if (compressed_size && compressed_pages) |
|---|
| 317 | 314 | extent_item_size = btrfs_file_extent_calc_inline_size( |
|---|
| .. | .. |
|---|
| 320 | 317 | extent_item_size = btrfs_file_extent_calc_inline_size( |
|---|
| 321 | 318 | inline_len); |
|---|
| 322 | 319 | |
|---|
| 323 | | - ret = __btrfs_drop_extents(trans, root, inode, path, |
|---|
| 324 | | - start, aligned_end, NULL, |
|---|
| 325 | | - 1, 1, extent_item_size, &extent_inserted); |
|---|
| 320 | + ret = __btrfs_drop_extents(trans, root, inode, path, start, aligned_end, |
|---|
| 321 | + NULL, 1, 1, extent_item_size, |
|---|
| 322 | + &extent_inserted); |
|---|
| 326 | 323 | if (ret) { |
|---|
| 327 | 324 | btrfs_abort_transaction(trans, ret); |
|---|
| 328 | 325 | goto out; |
|---|
| .. | .. |
|---|
| 331 | 328 | if (isize > actual_end) |
|---|
| 332 | 329 | inline_len = min_t(u64, isize, actual_end); |
|---|
| 333 | 330 | ret = insert_inline_extent(trans, path, extent_inserted, |
|---|
| 334 | | - root, inode, start, |
|---|
| 331 | + root, &inode->vfs_inode, start, |
|---|
| 335 | 332 | inline_len, compressed_size, |
|---|
| 336 | 333 | compress_type, compressed_pages); |
|---|
| 337 | 334 | if (ret && ret != -ENOSPC) { |
|---|
| .. | .. |
|---|
| 342 | 339 | goto out; |
|---|
| 343 | 340 | } |
|---|
| 344 | 341 | |
|---|
| 345 | | - set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); |
|---|
| 346 | | - btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0); |
|---|
| 342 | + set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); |
|---|
| 343 | + btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
|---|
| 347 | 344 | out: |
|---|
| 348 | 345 | /* |
|---|
| 349 | 346 | * Don't forget to free the reserved space, as for inlined extent |
|---|
| .. | .. |
|---|
| 367 | 364 | struct list_head list; |
|---|
| 368 | 365 | }; |
|---|
| 369 | 366 | |
|---|
| 370 | | -struct async_cow { |
|---|
| 367 | +struct async_chunk { |
|---|
| 371 | 368 | struct inode *inode; |
|---|
| 372 | | - struct btrfs_root *root; |
|---|
| 373 | 369 | struct page *locked_page; |
|---|
| 374 | 370 | u64 start; |
|---|
| 375 | 371 | u64 end; |
|---|
| 376 | 372 | unsigned int write_flags; |
|---|
| 377 | 373 | struct list_head extents; |
|---|
| 374 | + struct cgroup_subsys_state *blkcg_css; |
|---|
| 378 | 375 | struct btrfs_work work; |
|---|
| 376 | + atomic_t *pending; |
|---|
| 379 | 377 | }; |
|---|
| 380 | 378 | |
|---|
| 381 | | -static noinline int add_async_extent(struct async_cow *cow, |
|---|
| 379 | +struct async_cow { |
|---|
| 380 | + /* Number of chunks in flight; must be first in the structure */ |
|---|
| 381 | + atomic_t num_chunks; |
|---|
| 382 | + struct async_chunk chunks[]; |
|---|
| 383 | +}; |
|---|
| 384 | + |
|---|
| 385 | +static noinline int add_async_extent(struct async_chunk *cow, |
|---|
| 382 | 386 | u64 start, u64 ram_size, |
|---|
| 383 | 387 | u64 compressed_size, |
|---|
| 384 | 388 | struct page **pages, |
|---|
| .. | .. |
|---|
| 402 | 406 | /* |
|---|
| 403 | 407 | * Check if the inode has flags compatible with compression |
|---|
| 404 | 408 | */ |
|---|
| 405 | | -static inline bool inode_can_compress(struct inode *inode) |
|---|
| 409 | +static inline bool inode_can_compress(struct btrfs_inode *inode) |
|---|
| 406 | 410 | { |
|---|
| 407 | | - if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW || |
|---|
| 408 | | - BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) |
|---|
| 411 | + if (inode->flags & BTRFS_INODE_NODATACOW || |
|---|
| 412 | + inode->flags & BTRFS_INODE_NODATASUM) |
|---|
| 409 | 413 | return false; |
|---|
| 410 | 414 | return true; |
|---|
| 411 | 415 | } |
|---|
| .. | .. |
|---|
| 414 | 418 | * Check if the inode needs to be submitted to compression, based on mount |
|---|
| 415 | 419 | * options, defragmentation, properties or heuristics. |
|---|
| 416 | 420 | */ |
|---|
| 417 | | -static inline int inode_need_compress(struct inode *inode, u64 start, u64 end) |
|---|
| 421 | +static inline int inode_need_compress(struct btrfs_inode *inode, u64 start, |
|---|
| 422 | + u64 end) |
|---|
| 418 | 423 | { |
|---|
| 419 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 424 | + struct btrfs_fs_info *fs_info = inode->root->fs_info; |
|---|
| 420 | 425 | |
|---|
| 421 | 426 | if (!inode_can_compress(inode)) { |
|---|
| 422 | 427 | WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), |
|---|
| 423 | 428 | KERN_ERR "BTRFS: unexpected compression for ino %llu\n", |
|---|
| 424 | | - btrfs_ino(BTRFS_I(inode))); |
|---|
| 429 | + btrfs_ino(inode)); |
|---|
| 425 | 430 | return 0; |
|---|
| 426 | 431 | } |
|---|
| 427 | 432 | /* force compress */ |
|---|
| 428 | 433 | if (btrfs_test_opt(fs_info, FORCE_COMPRESS)) |
|---|
| 429 | 434 | return 1; |
|---|
| 430 | 435 | /* defrag ioctl */ |
|---|
| 431 | | - if (BTRFS_I(inode)->defrag_compress) |
|---|
| 436 | + if (inode->defrag_compress) |
|---|
| 432 | 437 | return 1; |
|---|
| 433 | 438 | /* bad compression ratios */ |
|---|
| 434 | | - if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) |
|---|
| 439 | + if (inode->flags & BTRFS_INODE_NOCOMPRESS) |
|---|
| 435 | 440 | return 0; |
|---|
| 436 | 441 | if (btrfs_test_opt(fs_info, COMPRESS) || |
|---|
| 437 | | - BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS || |
|---|
| 438 | | - BTRFS_I(inode)->prop_compress) |
|---|
| 439 | | - return btrfs_compress_heuristic(inode, start, end); |
|---|
| 442 | + inode->flags & BTRFS_INODE_COMPRESS || |
|---|
| 443 | + inode->prop_compress) |
|---|
| 444 | + return btrfs_compress_heuristic(&inode->vfs_inode, start, end); |
|---|
| 440 | 445 | return 0; |
|---|
| 441 | 446 | } |
|---|
| 442 | 447 | |
|---|
| .. | .. |
|---|
| 466 | 471 | * are written in the same order that the flusher thread sent them |
|---|
| 467 | 472 | * down. |
|---|
| 468 | 473 | */ |
|---|
| 469 | | -static noinline void compress_file_range(struct inode *inode, |
|---|
| 470 | | - struct page *locked_page, |
|---|
| 471 | | - u64 start, u64 end, |
|---|
| 472 | | - struct async_cow *async_cow, |
|---|
| 473 | | - int *num_added) |
|---|
| 474 | +static noinline int compress_file_range(struct async_chunk *async_chunk) |
|---|
| 474 | 475 | { |
|---|
| 476 | + struct inode *inode = async_chunk->inode; |
|---|
| 475 | 477 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 476 | 478 | u64 blocksize = fs_info->sectorsize; |
|---|
| 479 | + u64 start = async_chunk->start; |
|---|
| 480 | + u64 end = async_chunk->end; |
|---|
| 477 | 481 | u64 actual_end; |
|---|
| 478 | | - u64 isize = i_size_read(inode); |
|---|
| 482 | + u64 i_size; |
|---|
| 479 | 483 | int ret = 0; |
|---|
| 480 | 484 | struct page **pages = NULL; |
|---|
| 481 | 485 | unsigned long nr_pages; |
|---|
| .. | .. |
|---|
| 484 | 488 | int i; |
|---|
| 485 | 489 | int will_compress; |
|---|
| 486 | 490 | int compress_type = fs_info->compress_type; |
|---|
| 491 | + int compressed_extents = 0; |
|---|
| 487 | 492 | int redirty = 0; |
|---|
| 488 | 493 | |
|---|
| 489 | 494 | inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1, |
|---|
| 490 | 495 | SZ_16K); |
|---|
| 491 | 496 | |
|---|
| 492 | | - actual_end = min_t(u64, isize, end + 1); |
|---|
| 497 | + /* |
|---|
| 498 | + * We need to save i_size before now because it could change in between |
|---|
| 499 | + * us evaluating the size and assigning it. This is because we lock and |
|---|
| 500 | + * unlock the page in truncate and fallocate, and then modify the i_size |
|---|
| 501 | + * later on. |
|---|
| 502 | + * |
|---|
| 503 | + * The barriers are to emulate READ_ONCE, remove that once i_size_read |
|---|
| 504 | + * does that for us. |
|---|
| 505 | + */ |
|---|
| 506 | + barrier(); |
|---|
| 507 | + i_size = i_size_read(inode); |
|---|
| 508 | + barrier(); |
|---|
| 509 | + actual_end = min_t(u64, i_size, end + 1); |
|---|
| 493 | 510 | again: |
|---|
| 494 | 511 | will_compress = 0; |
|---|
| 495 | 512 | nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1; |
|---|
| .. | .. |
|---|
| 530 | 547 | * inode has not been flagged as nocompress. This flag can |
|---|
| 531 | 548 | * change at any time if we discover bad compression ratios. |
|---|
| 532 | 549 | */ |
|---|
| 533 | | - if (inode_need_compress(inode, start, end)) { |
|---|
| 550 | + if (inode_need_compress(BTRFS_I(inode), start, end)) { |
|---|
| 534 | 551 | WARN_ON(pages); |
|---|
| 535 | 552 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); |
|---|
| 536 | 553 | if (!pages) { |
|---|
| .. | .. |
|---|
| 571 | 588 | &total_compressed); |
|---|
| 572 | 589 | |
|---|
| 573 | 590 | if (!ret) { |
|---|
| 574 | | - unsigned long offset = total_compressed & |
|---|
| 575 | | - (PAGE_SIZE - 1); |
|---|
| 591 | + unsigned long offset = offset_in_page(total_compressed); |
|---|
| 576 | 592 | struct page *page = pages[nr_pages - 1]; |
|---|
| 577 | 593 | char *kaddr; |
|---|
| 578 | 594 | |
|---|
| .. | .. |
|---|
| 595 | 611 | /* we didn't compress the entire range, try |
|---|
| 596 | 612 | * to make an uncompressed inline extent. |
|---|
| 597 | 613 | */ |
|---|
| 598 | | - ret = cow_file_range_inline(inode, start, end, 0, |
|---|
| 599 | | - BTRFS_COMPRESS_NONE, NULL); |
|---|
| 614 | + ret = cow_file_range_inline(BTRFS_I(inode), start, end, |
|---|
| 615 | + 0, BTRFS_COMPRESS_NONE, |
|---|
| 616 | + NULL); |
|---|
| 600 | 617 | } else { |
|---|
| 601 | 618 | /* try making a compressed inline extent */ |
|---|
| 602 | | - ret = cow_file_range_inline(inode, start, end, |
|---|
| 619 | + ret = cow_file_range_inline(BTRFS_I(inode), start, end, |
|---|
| 603 | 620 | total_compressed, |
|---|
| 604 | 621 | compress_type, pages); |
|---|
| 605 | 622 | } |
|---|
| .. | .. |
|---|
| 621 | 638 | * our outstanding extent for clearing delalloc for this |
|---|
| 622 | 639 | * range. |
|---|
| 623 | 640 | */ |
|---|
| 624 | | - extent_clear_unlock_delalloc(inode, start, end, end, |
|---|
| 625 | | - NULL, clear_flags, |
|---|
| 641 | + extent_clear_unlock_delalloc(BTRFS_I(inode), start, end, |
|---|
| 642 | + NULL, |
|---|
| 643 | + clear_flags, |
|---|
| 626 | 644 | PAGE_UNLOCK | |
|---|
| 627 | 645 | PAGE_CLEAR_DIRTY | |
|---|
| 628 | 646 | PAGE_SET_WRITEBACK | |
|---|
| .. | .. |
|---|
| 641 | 659 | } |
|---|
| 642 | 660 | kfree(pages); |
|---|
| 643 | 661 | } |
|---|
| 644 | | - |
|---|
| 645 | | - return; |
|---|
| 662 | + return 0; |
|---|
| 646 | 663 | } |
|---|
| 647 | 664 | } |
|---|
| 648 | 665 | |
|---|
| .. | .. |
|---|
| 661 | 678 | */ |
|---|
| 662 | 679 | total_in = ALIGN(total_in, PAGE_SIZE); |
|---|
| 663 | 680 | if (total_compressed + blocksize <= total_in) { |
|---|
| 664 | | - *num_added += 1; |
|---|
| 681 | + compressed_extents++; |
|---|
| 665 | 682 | |
|---|
| 666 | 683 | /* |
|---|
| 667 | 684 | * The async work queues will take care of doing actual |
|---|
| 668 | 685 | * allocation on disk for these compressed pages, and |
|---|
| 669 | 686 | * will submit them to the elevator. |
|---|
| 670 | 687 | */ |
|---|
| 671 | | - add_async_extent(async_cow, start, total_in, |
|---|
| 688 | + add_async_extent(async_chunk, start, total_in, |
|---|
| 672 | 689 | total_compressed, pages, nr_pages, |
|---|
| 673 | 690 | compress_type); |
|---|
| 674 | 691 | |
|---|
| .. | .. |
|---|
| 678 | 695 | cond_resched(); |
|---|
| 679 | 696 | goto again; |
|---|
| 680 | 697 | } |
|---|
| 681 | | - return; |
|---|
| 698 | + return compressed_extents; |
|---|
| 682 | 699 | } |
|---|
| 683 | 700 | } |
|---|
| 684 | 701 | if (pages) { |
|---|
| .. | .. |
|---|
| 708 | 725 | * to our extent and set things up for the async work queue to run |
|---|
| 709 | 726 | * cow_file_range to do the normal delalloc dance. |
|---|
| 710 | 727 | */ |
|---|
| 711 | | - if (page_offset(locked_page) >= start && |
|---|
| 712 | | - page_offset(locked_page) <= end) |
|---|
| 713 | | - __set_page_dirty_nobuffers(locked_page); |
|---|
| 728 | + if (async_chunk->locked_page && |
|---|
| 729 | + (page_offset(async_chunk->locked_page) >= start && |
|---|
| 730 | + page_offset(async_chunk->locked_page)) <= end) { |
|---|
| 731 | + __set_page_dirty_nobuffers(async_chunk->locked_page); |
|---|
| 714 | 732 | /* unlocked later on in the async handlers */ |
|---|
| 733 | + } |
|---|
| 715 | 734 | |
|---|
| 716 | 735 | if (redirty) |
|---|
| 717 | 736 | extent_range_redirty_for_io(inode, start, end); |
|---|
| 718 | | - add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0, |
|---|
| 737 | + add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0, |
|---|
| 719 | 738 | BTRFS_COMPRESS_NONE); |
|---|
| 720 | | - *num_added += 1; |
|---|
| 739 | + compressed_extents++; |
|---|
| 721 | 740 | |
|---|
| 722 | | - return; |
|---|
| 741 | + return compressed_extents; |
|---|
| 723 | 742 | } |
|---|
| 724 | 743 | |
|---|
| 725 | 744 | static void free_async_extent_pages(struct async_extent *async_extent) |
|---|
| .. | .. |
|---|
| 744 | 763 | * queued. We walk all the async extents created by compress_file_range |
|---|
| 745 | 764 | * and send them down to the disk. |
|---|
| 746 | 765 | */ |
|---|
| 747 | | -static noinline void submit_compressed_extents(struct inode *inode, |
|---|
| 748 | | - struct async_cow *async_cow) |
|---|
| 766 | +static noinline void submit_compressed_extents(struct async_chunk *async_chunk) |
|---|
| 749 | 767 | { |
|---|
| 750 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 768 | + struct btrfs_inode *inode = BTRFS_I(async_chunk->inode); |
|---|
| 769 | + struct btrfs_fs_info *fs_info = inode->root->fs_info; |
|---|
| 751 | 770 | struct async_extent *async_extent; |
|---|
| 752 | 771 | u64 alloc_hint = 0; |
|---|
| 753 | 772 | struct btrfs_key ins; |
|---|
| 754 | 773 | struct extent_map *em; |
|---|
| 755 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 756 | | - struct extent_io_tree *io_tree; |
|---|
| 774 | + struct btrfs_root *root = inode->root; |
|---|
| 775 | + struct extent_io_tree *io_tree = &inode->io_tree; |
|---|
| 757 | 776 | int ret = 0; |
|---|
| 758 | 777 | |
|---|
| 759 | 778 | again: |
|---|
| 760 | | - while (!list_empty(&async_cow->extents)) { |
|---|
| 761 | | - async_extent = list_entry(async_cow->extents.next, |
|---|
| 779 | + while (!list_empty(&async_chunk->extents)) { |
|---|
| 780 | + async_extent = list_entry(async_chunk->extents.next, |
|---|
| 762 | 781 | struct async_extent, list); |
|---|
| 763 | 782 | list_del(&async_extent->list); |
|---|
| 764 | 783 | |
|---|
| 765 | | - io_tree = &BTRFS_I(inode)->io_tree; |
|---|
| 766 | | - |
|---|
| 767 | 784 | retry: |
|---|
| 785 | + lock_extent(io_tree, async_extent->start, |
|---|
| 786 | + async_extent->start + async_extent->ram_size - 1); |
|---|
| 768 | 787 | /* did the compression code fall back to uncompressed IO? */ |
|---|
| 769 | 788 | if (!async_extent->pages) { |
|---|
| 770 | 789 | int page_started = 0; |
|---|
| 771 | 790 | unsigned long nr_written = 0; |
|---|
| 772 | 791 | |
|---|
| 773 | | - lock_extent(io_tree, async_extent->start, |
|---|
| 774 | | - async_extent->start + |
|---|
| 775 | | - async_extent->ram_size - 1); |
|---|
| 776 | | - |
|---|
| 777 | 792 | /* allocate blocks */ |
|---|
| 778 | | - ret = cow_file_range(inode, async_cow->locked_page, |
|---|
| 793 | + ret = cow_file_range(inode, async_chunk->locked_page, |
|---|
| 779 | 794 | async_extent->start, |
|---|
| 780 | 795 | async_extent->start + |
|---|
| 781 | 796 | async_extent->ram_size - 1, |
|---|
| 782 | | - async_extent->start + |
|---|
| 783 | | - async_extent->ram_size - 1, |
|---|
| 784 | | - &page_started, &nr_written, 0, |
|---|
| 785 | | - NULL); |
|---|
| 797 | + &page_started, &nr_written, 0); |
|---|
| 786 | 798 | |
|---|
| 787 | 799 | /* JDM XXX */ |
|---|
| 788 | 800 | |
|---|
| .. | .. |
|---|
| 793 | 805 | * all those pages down to the drive. |
|---|
| 794 | 806 | */ |
|---|
| 795 | 807 | if (!page_started && !ret) |
|---|
| 796 | | - extent_write_locked_range(inode, |
|---|
| 808 | + extent_write_locked_range(&inode->vfs_inode, |
|---|
| 797 | 809 | async_extent->start, |
|---|
| 798 | 810 | async_extent->start + |
|---|
| 799 | 811 | async_extent->ram_size - 1, |
|---|
| 800 | 812 | WB_SYNC_ALL); |
|---|
| 801 | | - else if (ret) |
|---|
| 802 | | - unlock_page(async_cow->locked_page); |
|---|
| 813 | + else if (ret && async_chunk->locked_page) |
|---|
| 814 | + unlock_page(async_chunk->locked_page); |
|---|
| 803 | 815 | kfree(async_extent); |
|---|
| 804 | 816 | cond_resched(); |
|---|
| 805 | 817 | continue; |
|---|
| 806 | 818 | } |
|---|
| 807 | | - |
|---|
| 808 | | - lock_extent(io_tree, async_extent->start, |
|---|
| 809 | | - async_extent->start + async_extent->ram_size - 1); |
|---|
| 810 | 819 | |
|---|
| 811 | 820 | ret = btrfs_reserve_extent(root, async_extent->ram_size, |
|---|
| 812 | 821 | async_extent->compressed_size, |
|---|
| .. | .. |
|---|
| 826 | 835 | * will not submit these pages down to lower |
|---|
| 827 | 836 | * layers. |
|---|
| 828 | 837 | */ |
|---|
| 829 | | - extent_range_redirty_for_io(inode, |
|---|
| 838 | + extent_range_redirty_for_io(&inode->vfs_inode, |
|---|
| 830 | 839 | async_extent->start, |
|---|
| 831 | 840 | async_extent->start + |
|---|
| 832 | 841 | async_extent->ram_size - 1); |
|---|
| .. | .. |
|---|
| 861 | 870 | BTRFS_ORDERED_COMPRESSED, |
|---|
| 862 | 871 | async_extent->compress_type); |
|---|
| 863 | 872 | if (ret) { |
|---|
| 864 | | - btrfs_drop_extent_cache(BTRFS_I(inode), |
|---|
| 865 | | - async_extent->start, |
|---|
| 873 | + btrfs_drop_extent_cache(inode, async_extent->start, |
|---|
| 866 | 874 | async_extent->start + |
|---|
| 867 | 875 | async_extent->ram_size - 1, 0); |
|---|
| 868 | 876 | goto out_free_reserve; |
|---|
| .. | .. |
|---|
| 875 | 883 | extent_clear_unlock_delalloc(inode, async_extent->start, |
|---|
| 876 | 884 | async_extent->start + |
|---|
| 877 | 885 | async_extent->ram_size - 1, |
|---|
| 878 | | - async_extent->start + |
|---|
| 879 | | - async_extent->ram_size - 1, |
|---|
| 880 | 886 | NULL, EXTENT_LOCKED | EXTENT_DELALLOC, |
|---|
| 881 | 887 | PAGE_UNLOCK | PAGE_CLEAR_DIRTY | |
|---|
| 882 | 888 | PAGE_SET_WRITEBACK); |
|---|
| 883 | | - if (btrfs_submit_compressed_write(inode, |
|---|
| 884 | | - async_extent->start, |
|---|
| 889 | + if (btrfs_submit_compressed_write(inode, async_extent->start, |
|---|
| 885 | 890 | async_extent->ram_size, |
|---|
| 886 | 891 | ins.objectid, |
|---|
| 887 | 892 | ins.offset, async_extent->pages, |
|---|
| 888 | 893 | async_extent->nr_pages, |
|---|
| 889 | | - async_cow->write_flags)) { |
|---|
| 890 | | - struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; |
|---|
| 894 | + async_chunk->write_flags, |
|---|
| 895 | + async_chunk->blkcg_css)) { |
|---|
| 891 | 896 | struct page *p = async_extent->pages[0]; |
|---|
| 892 | 897 | const u64 start = async_extent->start; |
|---|
| 893 | 898 | const u64 end = start + async_extent->ram_size - 1; |
|---|
| 894 | 899 | |
|---|
| 895 | | - p->mapping = inode->i_mapping; |
|---|
| 896 | | - tree->ops->writepage_end_io_hook(p, start, end, |
|---|
| 897 | | - NULL, 0); |
|---|
| 900 | + p->mapping = inode->vfs_inode.i_mapping; |
|---|
| 901 | + btrfs_writepage_endio_finish_ordered(p, start, end, 0); |
|---|
| 902 | + |
|---|
| 898 | 903 | p->mapping = NULL; |
|---|
| 899 | | - extent_clear_unlock_delalloc(inode, start, end, end, |
|---|
| 900 | | - NULL, 0, |
|---|
| 904 | + extent_clear_unlock_delalloc(inode, start, end, NULL, 0, |
|---|
| 901 | 905 | PAGE_END_WRITEBACK | |
|---|
| 902 | 906 | PAGE_SET_ERROR); |
|---|
| 903 | 907 | free_async_extent_pages(async_extent); |
|---|
| .. | .. |
|---|
| 914 | 918 | extent_clear_unlock_delalloc(inode, async_extent->start, |
|---|
| 915 | 919 | async_extent->start + |
|---|
| 916 | 920 | async_extent->ram_size - 1, |
|---|
| 917 | | - async_extent->start + |
|---|
| 918 | | - async_extent->ram_size - 1, |
|---|
| 919 | 921 | NULL, EXTENT_LOCKED | EXTENT_DELALLOC | |
|---|
| 920 | 922 | EXTENT_DELALLOC_NEW | |
|---|
| 921 | 923 | EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, |
|---|
| .. | .. |
|---|
| 927 | 929 | goto again; |
|---|
| 928 | 930 | } |
|---|
| 929 | 931 | |
|---|
| 930 | | -static u64 get_extent_allocation_hint(struct inode *inode, u64 start, |
|---|
| 932 | +static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start, |
|---|
| 931 | 933 | u64 num_bytes) |
|---|
| 932 | 934 | { |
|---|
| 933 | | - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
|---|
| 935 | + struct extent_map_tree *em_tree = &inode->extent_tree; |
|---|
| 934 | 936 | struct extent_map *em; |
|---|
| 935 | 937 | u64 alloc_hint = 0; |
|---|
| 936 | 938 | |
|---|
| .. | .. |
|---|
| 972 | 974 | * required to start IO on it. It may be clean and already done with |
|---|
| 973 | 975 | * IO when we return. |
|---|
| 974 | 976 | */ |
|---|
| 975 | | -static noinline int cow_file_range(struct inode *inode, |
|---|
| 977 | +static noinline int cow_file_range(struct btrfs_inode *inode, |
|---|
| 976 | 978 | struct page *locked_page, |
|---|
| 977 | | - u64 start, u64 end, u64 delalloc_end, |
|---|
| 978 | | - int *page_started, unsigned long *nr_written, |
|---|
| 979 | | - int unlock, struct btrfs_dedupe_hash *hash) |
|---|
| 979 | + u64 start, u64 end, int *page_started, |
|---|
| 980 | + unsigned long *nr_written, int unlock) |
|---|
| 980 | 981 | { |
|---|
| 981 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 982 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 982 | + struct btrfs_root *root = inode->root; |
|---|
| 983 | + struct btrfs_fs_info *fs_info = root->fs_info; |
|---|
| 983 | 984 | u64 alloc_hint = 0; |
|---|
| 984 | 985 | u64 num_bytes; |
|---|
| 985 | 986 | unsigned long ram_size; |
|---|
| .. | .. |
|---|
| 993 | 994 | bool extent_reserved = false; |
|---|
| 994 | 995 | int ret = 0; |
|---|
| 995 | 996 | |
|---|
| 996 | | - if (btrfs_is_free_space_inode(BTRFS_I(inode))) { |
|---|
| 997 | | - WARN_ON_ONCE(1); |
|---|
| 997 | + if (btrfs_is_free_space_inode(inode)) { |
|---|
| 998 | 998 | ret = -EINVAL; |
|---|
| 999 | 999 | goto out_unlock; |
|---|
| 1000 | 1000 | } |
|---|
| .. | .. |
|---|
| 1003 | 1003 | num_bytes = max(blocksize, num_bytes); |
|---|
| 1004 | 1004 | ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy)); |
|---|
| 1005 | 1005 | |
|---|
| 1006 | | - inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K); |
|---|
| 1006 | + inode_should_defrag(inode, start, end, num_bytes, SZ_64K); |
|---|
| 1007 | 1007 | |
|---|
| 1008 | 1008 | if (start == 0) { |
|---|
| 1009 | 1009 | /* lets try to make an inline extent */ |
|---|
| .. | .. |
|---|
| 1016 | 1016 | * our outstanding extent for clearing delalloc for this |
|---|
| 1017 | 1017 | * range. |
|---|
| 1018 | 1018 | */ |
|---|
| 1019 | | - extent_clear_unlock_delalloc(inode, start, end, |
|---|
| 1020 | | - delalloc_end, NULL, |
|---|
| 1019 | + extent_clear_unlock_delalloc(inode, start, end, NULL, |
|---|
| 1021 | 1020 | EXTENT_LOCKED | EXTENT_DELALLOC | |
|---|
| 1022 | 1021 | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | |
|---|
| 1023 | 1022 | EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | |
|---|
| .. | .. |
|---|
| 1033 | 1032 | } |
|---|
| 1034 | 1033 | |
|---|
| 1035 | 1034 | alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); |
|---|
| 1036 | | - btrfs_drop_extent_cache(BTRFS_I(inode), start, |
|---|
| 1037 | | - start + num_bytes - 1, 0); |
|---|
| 1035 | + btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
|---|
| 1038 | 1036 | |
|---|
| 1039 | 1037 | /* |
|---|
| 1040 | 1038 | * Relocation relies on the relocated extents to have exactly the same |
|---|
| .. | .. |
|---|
| 1098 | 1096 | * skip current ordered extent. |
|---|
| 1099 | 1097 | */ |
|---|
| 1100 | 1098 | if (ret) |
|---|
| 1101 | | - btrfs_drop_extent_cache(BTRFS_I(inode), start, |
|---|
| 1099 | + btrfs_drop_extent_cache(inode, start, |
|---|
| 1102 | 1100 | start + ram_size - 1, 0); |
|---|
| 1103 | 1101 | } |
|---|
| 1104 | 1102 | |
|---|
| .. | .. |
|---|
| 1114 | 1112 | page_ops = unlock ? PAGE_UNLOCK : 0; |
|---|
| 1115 | 1113 | page_ops |= PAGE_SET_PRIVATE2; |
|---|
| 1116 | 1114 | |
|---|
| 1117 | | - extent_clear_unlock_delalloc(inode, start, |
|---|
| 1118 | | - start + ram_size - 1, |
|---|
| 1119 | | - delalloc_end, locked_page, |
|---|
| 1115 | + extent_clear_unlock_delalloc(inode, start, start + ram_size - 1, |
|---|
| 1116 | + locked_page, |
|---|
| 1120 | 1117 | EXTENT_LOCKED | EXTENT_DELALLOC, |
|---|
| 1121 | 1118 | page_ops); |
|---|
| 1122 | 1119 | if (num_bytes < cur_alloc_size) |
|---|
| .. | .. |
|---|
| 1139 | 1136 | return ret; |
|---|
| 1140 | 1137 | |
|---|
| 1141 | 1138 | out_drop_extent_cache: |
|---|
| 1142 | | - btrfs_drop_extent_cache(BTRFS_I(inode), start, start + ram_size - 1, 0); |
|---|
| 1139 | + btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0); |
|---|
| 1143 | 1140 | out_reserve: |
|---|
| 1144 | 1141 | btrfs_dec_block_group_reservations(fs_info, ins.objectid); |
|---|
| 1145 | 1142 | btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1); |
|---|
| .. | .. |
|---|
| 1161 | 1158 | if (extent_reserved) { |
|---|
| 1162 | 1159 | extent_clear_unlock_delalloc(inode, start, |
|---|
| 1163 | 1160 | start + cur_alloc_size - 1, |
|---|
| 1164 | | - start + cur_alloc_size - 1, |
|---|
| 1165 | 1161 | locked_page, |
|---|
| 1166 | 1162 | clear_bits, |
|---|
| 1167 | 1163 | page_ops); |
|---|
| .. | .. |
|---|
| 1169 | 1165 | if (start >= end) |
|---|
| 1170 | 1166 | goto out; |
|---|
| 1171 | 1167 | } |
|---|
| 1172 | | - extent_clear_unlock_delalloc(inode, start, end, delalloc_end, |
|---|
| 1173 | | - locked_page, |
|---|
| 1168 | + extent_clear_unlock_delalloc(inode, start, end, locked_page, |
|---|
| 1174 | 1169 | clear_bits | EXTENT_CLEAR_DATA_RESV, |
|---|
| 1175 | 1170 | page_ops); |
|---|
| 1176 | 1171 | goto out; |
|---|
| .. | .. |
|---|
| 1181 | 1176 | */ |
|---|
| 1182 | 1177 | static noinline void async_cow_start(struct btrfs_work *work) |
|---|
| 1183 | 1178 | { |
|---|
| 1184 | | - struct async_cow *async_cow; |
|---|
| 1185 | | - int num_added = 0; |
|---|
| 1186 | | - async_cow = container_of(work, struct async_cow, work); |
|---|
| 1179 | + struct async_chunk *async_chunk; |
|---|
| 1180 | + int compressed_extents; |
|---|
| 1187 | 1181 | |
|---|
| 1188 | | - compress_file_range(async_cow->inode, async_cow->locked_page, |
|---|
| 1189 | | - async_cow->start, async_cow->end, async_cow, |
|---|
| 1190 | | - &num_added); |
|---|
| 1191 | | - if (num_added == 0) { |
|---|
| 1192 | | - btrfs_add_delayed_iput(async_cow->inode); |
|---|
| 1193 | | - async_cow->inode = NULL; |
|---|
| 1182 | + async_chunk = container_of(work, struct async_chunk, work); |
|---|
| 1183 | + |
|---|
| 1184 | + compressed_extents = compress_file_range(async_chunk); |
|---|
| 1185 | + if (compressed_extents == 0) { |
|---|
| 1186 | + btrfs_add_delayed_iput(async_chunk->inode); |
|---|
| 1187 | + async_chunk->inode = NULL; |
|---|
| 1194 | 1188 | } |
|---|
| 1195 | 1189 | } |
|---|
| 1196 | 1190 | |
|---|
| .. | .. |
|---|
| 1199 | 1193 | */ |
|---|
| 1200 | 1194 | static noinline void async_cow_submit(struct btrfs_work *work) |
|---|
| 1201 | 1195 | { |
|---|
| 1202 | | - struct btrfs_fs_info *fs_info; |
|---|
| 1203 | | - struct async_cow *async_cow; |
|---|
| 1204 | | - struct btrfs_root *root; |
|---|
| 1196 | + struct async_chunk *async_chunk = container_of(work, struct async_chunk, |
|---|
| 1197 | + work); |
|---|
| 1198 | + struct btrfs_fs_info *fs_info = btrfs_work_owner(work); |
|---|
| 1205 | 1199 | unsigned long nr_pages; |
|---|
| 1206 | 1200 | |
|---|
| 1207 | | - async_cow = container_of(work, struct async_cow, work); |
|---|
| 1208 | | - |
|---|
| 1209 | | - root = async_cow->root; |
|---|
| 1210 | | - fs_info = root->fs_info; |
|---|
| 1211 | | - nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >> |
|---|
| 1201 | + nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >> |
|---|
| 1212 | 1202 | PAGE_SHIFT; |
|---|
| 1203 | + |
|---|
| 1204 | + /* |
|---|
| 1205 | + * ->inode could be NULL if async_chunk_start has failed to compress, |
|---|
| 1206 | + * in which case we don't have anything to submit, yet we need to |
|---|
| 1207 | + * always adjust ->async_delalloc_pages as its paired with the init |
|---|
| 1208 | + * happening in cow_file_range_async |
|---|
| 1209 | + */ |
|---|
| 1210 | + if (async_chunk->inode) |
|---|
| 1211 | + submit_compressed_extents(async_chunk); |
|---|
| 1213 | 1212 | |
|---|
| 1214 | 1213 | /* atomic_sub_return implies a barrier */ |
|---|
| 1215 | 1214 | if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) < |
|---|
| 1216 | 1215 | 5 * SZ_1M) |
|---|
| 1217 | 1216 | cond_wake_up_nomb(&fs_info->async_submit_wait); |
|---|
| 1218 | | - |
|---|
| 1219 | | - if (async_cow->inode) |
|---|
| 1220 | | - submit_compressed_extents(async_cow->inode, async_cow); |
|---|
| 1221 | 1217 | } |
|---|
| 1222 | 1218 | |
|---|
| 1223 | 1219 | static noinline void async_cow_free(struct btrfs_work *work) |
|---|
| 1224 | 1220 | { |
|---|
| 1225 | | - struct async_cow *async_cow; |
|---|
| 1226 | | - async_cow = container_of(work, struct async_cow, work); |
|---|
| 1227 | | - if (async_cow->inode) |
|---|
| 1228 | | - btrfs_add_delayed_iput(async_cow->inode); |
|---|
| 1229 | | - kfree(async_cow); |
|---|
| 1221 | + struct async_chunk *async_chunk; |
|---|
| 1222 | + |
|---|
| 1223 | + async_chunk = container_of(work, struct async_chunk, work); |
|---|
| 1224 | + if (async_chunk->inode) |
|---|
| 1225 | + btrfs_add_delayed_iput(async_chunk->inode); |
|---|
| 1226 | + if (async_chunk->blkcg_css) |
|---|
| 1227 | + css_put(async_chunk->blkcg_css); |
|---|
| 1228 | + /* |
|---|
| 1229 | + * Since the pointer to 'pending' is at the beginning of the array of |
|---|
| 1230 | + * async_chunk's, freeing it ensures the whole array has been freed. |
|---|
| 1231 | + */ |
|---|
| 1232 | + if (atomic_dec_and_test(async_chunk->pending)) |
|---|
| 1233 | + kvfree(async_chunk->pending); |
|---|
| 1230 | 1234 | } |
|---|
| 1231 | 1235 | |
|---|
| 1232 | | -static int cow_file_range_async(struct inode *inode, struct page *locked_page, |
|---|
| 1236 | +static int cow_file_range_async(struct btrfs_inode *inode, |
|---|
| 1237 | + struct writeback_control *wbc, |
|---|
| 1238 | + struct page *locked_page, |
|---|
| 1233 | 1239 | u64 start, u64 end, int *page_started, |
|---|
| 1234 | | - unsigned long *nr_written, |
|---|
| 1235 | | - unsigned int write_flags) |
|---|
| 1240 | + unsigned long *nr_written) |
|---|
| 1236 | 1241 | { |
|---|
| 1237 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 1238 | | - struct async_cow *async_cow; |
|---|
| 1239 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 1242 | + struct btrfs_fs_info *fs_info = inode->root->fs_info; |
|---|
| 1243 | + struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc); |
|---|
| 1244 | + struct async_cow *ctx; |
|---|
| 1245 | + struct async_chunk *async_chunk; |
|---|
| 1240 | 1246 | unsigned long nr_pages; |
|---|
| 1241 | 1247 | u64 cur_end; |
|---|
| 1248 | + u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K); |
|---|
| 1249 | + int i; |
|---|
| 1250 | + bool should_compress; |
|---|
| 1251 | + unsigned nofs_flag; |
|---|
| 1252 | + const unsigned int write_flags = wbc_to_write_flags(wbc); |
|---|
| 1242 | 1253 | |
|---|
| 1243 | | - clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, |
|---|
| 1244 | | - 1, 0, NULL); |
|---|
| 1245 | | - while (start < end) { |
|---|
| 1246 | | - async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); |
|---|
| 1247 | | - BUG_ON(!async_cow); /* -ENOMEM */ |
|---|
| 1248 | | - async_cow->inode = igrab(inode); |
|---|
| 1249 | | - async_cow->root = root; |
|---|
| 1250 | | - async_cow->locked_page = locked_page; |
|---|
| 1251 | | - async_cow->start = start; |
|---|
| 1252 | | - async_cow->write_flags = write_flags; |
|---|
| 1254 | + unlock_extent(&inode->io_tree, start, end); |
|---|
| 1253 | 1255 | |
|---|
| 1254 | | - if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS && |
|---|
| 1255 | | - !btrfs_test_opt(fs_info, FORCE_COMPRESS)) |
|---|
| 1256 | | - cur_end = end; |
|---|
| 1257 | | - else |
|---|
| 1256 | + if (inode->flags & BTRFS_INODE_NOCOMPRESS && |
|---|
| 1257 | + !btrfs_test_opt(fs_info, FORCE_COMPRESS)) { |
|---|
| 1258 | + num_chunks = 1; |
|---|
| 1259 | + should_compress = false; |
|---|
| 1260 | + } else { |
|---|
| 1261 | + should_compress = true; |
|---|
| 1262 | + } |
|---|
| 1263 | + |
|---|
| 1264 | + nofs_flag = memalloc_nofs_save(); |
|---|
| 1265 | + ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL); |
|---|
| 1266 | + memalloc_nofs_restore(nofs_flag); |
|---|
| 1267 | + |
|---|
| 1268 | + if (!ctx) { |
|---|
| 1269 | + unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | |
|---|
| 1270 | + EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | |
|---|
| 1271 | + EXTENT_DO_ACCOUNTING; |
|---|
| 1272 | + unsigned long page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | |
|---|
| 1273 | + PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK | |
|---|
| 1274 | + PAGE_SET_ERROR; |
|---|
| 1275 | + |
|---|
| 1276 | + extent_clear_unlock_delalloc(inode, start, end, locked_page, |
|---|
| 1277 | + clear_bits, page_ops); |
|---|
| 1278 | + return -ENOMEM; |
|---|
| 1279 | + } |
|---|
| 1280 | + |
|---|
| 1281 | + async_chunk = ctx->chunks; |
|---|
| 1282 | + atomic_set(&ctx->num_chunks, num_chunks); |
|---|
| 1283 | + |
|---|
| 1284 | + for (i = 0; i < num_chunks; i++) { |
|---|
| 1285 | + if (should_compress) |
|---|
| 1258 | 1286 | cur_end = min(end, start + SZ_512K - 1); |
|---|
| 1287 | + else |
|---|
| 1288 | + cur_end = end; |
|---|
| 1259 | 1289 | |
|---|
| 1260 | | - async_cow->end = cur_end; |
|---|
| 1261 | | - INIT_LIST_HEAD(&async_cow->extents); |
|---|
| 1290 | + /* |
|---|
| 1291 | + * igrab is called higher up in the call chain, take only the |
|---|
| 1292 | + * lightweight reference for the callback lifetime |
|---|
| 1293 | + */ |
|---|
| 1294 | + ihold(&inode->vfs_inode); |
|---|
| 1295 | + async_chunk[i].pending = &ctx->num_chunks; |
|---|
| 1296 | + async_chunk[i].inode = &inode->vfs_inode; |
|---|
| 1297 | + async_chunk[i].start = start; |
|---|
| 1298 | + async_chunk[i].end = cur_end; |
|---|
| 1299 | + async_chunk[i].write_flags = write_flags; |
|---|
| 1300 | + INIT_LIST_HEAD(&async_chunk[i].extents); |
|---|
| 1262 | 1301 | |
|---|
| 1263 | | - btrfs_init_work(&async_cow->work, |
|---|
| 1264 | | - btrfs_delalloc_helper, |
|---|
| 1265 | | - async_cow_start, async_cow_submit, |
|---|
| 1266 | | - async_cow_free); |
|---|
| 1302 | + /* |
|---|
| 1303 | + * The locked_page comes all the way from writepage and its |
|---|
| 1304 | + * the original page we were actually given. As we spread |
|---|
| 1305 | + * this large delalloc region across multiple async_chunk |
|---|
| 1306 | + * structs, only the first struct needs a pointer to locked_page |
|---|
| 1307 | + * |
|---|
| 1308 | + * This way we don't need racey decisions about who is supposed |
|---|
| 1309 | + * to unlock it. |
|---|
| 1310 | + */ |
|---|
| 1311 | + if (locked_page) { |
|---|
| 1312 | + /* |
|---|
| 1313 | + * Depending on the compressibility, the pages might or |
|---|
| 1314 | + * might not go through async. We want all of them to |
|---|
| 1315 | + * be accounted against wbc once. Let's do it here |
|---|
| 1316 | + * before the paths diverge. wbc accounting is used |
|---|
| 1317 | + * only for foreign writeback detection and doesn't |
|---|
| 1318 | + * need full accuracy. Just account the whole thing |
|---|
| 1319 | + * against the first page. |
|---|
| 1320 | + */ |
|---|
| 1321 | + wbc_account_cgroup_owner(wbc, locked_page, |
|---|
| 1322 | + cur_end - start); |
|---|
| 1323 | + async_chunk[i].locked_page = locked_page; |
|---|
| 1324 | + locked_page = NULL; |
|---|
| 1325 | + } else { |
|---|
| 1326 | + async_chunk[i].locked_page = NULL; |
|---|
| 1327 | + } |
|---|
| 1267 | 1328 | |
|---|
| 1268 | | - nr_pages = (cur_end - start + PAGE_SIZE) >> |
|---|
| 1269 | | - PAGE_SHIFT; |
|---|
| 1329 | + if (blkcg_css != blkcg_root_css) { |
|---|
| 1330 | + css_get(blkcg_css); |
|---|
| 1331 | + async_chunk[i].blkcg_css = blkcg_css; |
|---|
| 1332 | + } else { |
|---|
| 1333 | + async_chunk[i].blkcg_css = NULL; |
|---|
| 1334 | + } |
|---|
| 1335 | + |
|---|
| 1336 | + btrfs_init_work(&async_chunk[i].work, async_cow_start, |
|---|
| 1337 | + async_cow_submit, async_cow_free); |
|---|
| 1338 | + |
|---|
| 1339 | + nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE); |
|---|
| 1270 | 1340 | atomic_add(nr_pages, &fs_info->async_delalloc_pages); |
|---|
| 1271 | 1341 | |
|---|
| 1272 | | - btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work); |
|---|
| 1342 | + btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work); |
|---|
| 1273 | 1343 | |
|---|
| 1274 | 1344 | *nr_written += nr_pages; |
|---|
| 1275 | 1345 | start = cur_end + 1; |
|---|
| .. | .. |
|---|
| 1300 | 1370 | return 1; |
|---|
| 1301 | 1371 | } |
|---|
| 1302 | 1372 | |
|---|
| 1373 | +static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page, |
|---|
| 1374 | + const u64 start, const u64 end, |
|---|
| 1375 | + int *page_started, unsigned long *nr_written) |
|---|
| 1376 | +{ |
|---|
| 1377 | + const bool is_space_ino = btrfs_is_free_space_inode(inode); |
|---|
| 1378 | + const bool is_reloc_ino = (inode->root->root_key.objectid == |
|---|
| 1379 | + BTRFS_DATA_RELOC_TREE_OBJECTID); |
|---|
| 1380 | + const u64 range_bytes = end + 1 - start; |
|---|
| 1381 | + struct extent_io_tree *io_tree = &inode->io_tree; |
|---|
| 1382 | + u64 range_start = start; |
|---|
| 1383 | + u64 count; |
|---|
| 1384 | + |
|---|
| 1385 | + /* |
|---|
| 1386 | + * If EXTENT_NORESERVE is set it means that when the buffered write was |
|---|
| 1387 | + * made we had not enough available data space and therefore we did not |
|---|
| 1388 | + * reserve data space for it, since we though we could do NOCOW for the |
|---|
| 1389 | + * respective file range (either there is prealloc extent or the inode |
|---|
| 1390 | + * has the NOCOW bit set). |
|---|
| 1391 | + * |
|---|
| 1392 | + * However when we need to fallback to COW mode (because for example the |
|---|
| 1393 | + * block group for the corresponding extent was turned to RO mode by a |
|---|
| 1394 | + * scrub or relocation) we need to do the following: |
|---|
| 1395 | + * |
|---|
| 1396 | + * 1) We increment the bytes_may_use counter of the data space info. |
|---|
| 1397 | + * If COW succeeds, it allocates a new data extent and after doing |
|---|
| 1398 | + * that it decrements the space info's bytes_may_use counter and |
|---|
| 1399 | + * increments its bytes_reserved counter by the same amount (we do |
|---|
| 1400 | + * this at btrfs_add_reserved_bytes()). So we need to increment the |
|---|
| 1401 | + * bytes_may_use counter to compensate (when space is reserved at |
|---|
| 1402 | + * buffered write time, the bytes_may_use counter is incremented); |
|---|
| 1403 | + * |
|---|
| 1404 | + * 2) We clear the EXTENT_NORESERVE bit from the range. We do this so |
|---|
| 1405 | + * that if the COW path fails for any reason, it decrements (through |
|---|
| 1406 | + * extent_clear_unlock_delalloc()) the bytes_may_use counter of the |
|---|
| 1407 | + * data space info, which we incremented in the step above. |
|---|
| 1408 | + * |
|---|
| 1409 | + * If we need to fallback to cow and the inode corresponds to a free |
|---|
| 1410 | + * space cache inode or an inode of the data relocation tree, we must |
|---|
| 1411 | + * also increment bytes_may_use of the data space_info for the same |
|---|
| 1412 | + * reason. Space caches and relocated data extents always get a prealloc |
|---|
| 1413 | + * extent for them, however scrub or balance may have set the block |
|---|
| 1414 | + * group that contains that extent to RO mode and therefore force COW |
|---|
| 1415 | + * when starting writeback. |
|---|
| 1416 | + */ |
|---|
| 1417 | + count = count_range_bits(io_tree, &range_start, end, range_bytes, |
|---|
| 1418 | + EXTENT_NORESERVE, 0); |
|---|
| 1419 | + if (count > 0 || is_space_ino || is_reloc_ino) { |
|---|
| 1420 | + u64 bytes = count; |
|---|
| 1421 | + struct btrfs_fs_info *fs_info = inode->root->fs_info; |
|---|
| 1422 | + struct btrfs_space_info *sinfo = fs_info->data_sinfo; |
|---|
| 1423 | + |
|---|
| 1424 | + if (is_space_ino || is_reloc_ino) |
|---|
| 1425 | + bytes = range_bytes; |
|---|
| 1426 | + |
|---|
| 1427 | + spin_lock(&sinfo->lock); |
|---|
| 1428 | + btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes); |
|---|
| 1429 | + spin_unlock(&sinfo->lock); |
|---|
| 1430 | + |
|---|
| 1431 | + if (count > 0) |
|---|
| 1432 | + clear_extent_bit(io_tree, start, end, EXTENT_NORESERVE, |
|---|
| 1433 | + 0, 0, NULL); |
|---|
| 1434 | + } |
|---|
| 1435 | + |
|---|
| 1436 | + return cow_file_range(inode, locked_page, start, end, page_started, |
|---|
| 1437 | + nr_written, 1); |
|---|
| 1438 | +} |
|---|
| 1439 | + |
|---|
| 1303 | 1440 | /* |
|---|
| 1304 | 1441 | * when nowcow writeback call back. This checks for snapshots or COW copies |
|---|
| 1305 | 1442 | * of the extents that exist in the file, and COWs the file as required. |
|---|
| .. | .. |
|---|
| 1307 | 1444 | * If no cow copies or snapshots exist, we write directly to the existing |
|---|
| 1308 | 1445 | * blocks on disk |
|---|
| 1309 | 1446 | */ |
|---|
| 1310 | | -static noinline int run_delalloc_nocow(struct inode *inode, |
|---|
| 1447 | +static noinline int run_delalloc_nocow(struct btrfs_inode *inode, |
|---|
| 1311 | 1448 | struct page *locked_page, |
|---|
| 1312 | | - u64 start, u64 end, int *page_started, int force, |
|---|
| 1313 | | - unsigned long *nr_written) |
|---|
| 1449 | + const u64 start, const u64 end, |
|---|
| 1450 | + int *page_started, int force, |
|---|
| 1451 | + unsigned long *nr_written) |
|---|
| 1314 | 1452 | { |
|---|
| 1315 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 1316 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 1317 | | - struct extent_buffer *leaf; |
|---|
| 1453 | + struct btrfs_fs_info *fs_info = inode->root->fs_info; |
|---|
| 1454 | + struct btrfs_root *root = inode->root; |
|---|
| 1318 | 1455 | struct btrfs_path *path; |
|---|
| 1319 | | - struct btrfs_file_extent_item *fi; |
|---|
| 1320 | | - struct btrfs_key found_key; |
|---|
| 1321 | | - struct extent_map *em; |
|---|
| 1322 | | - u64 cow_start; |
|---|
| 1323 | | - u64 cur_offset; |
|---|
| 1324 | | - u64 extent_end; |
|---|
| 1325 | | - u64 extent_offset; |
|---|
| 1326 | | - u64 disk_bytenr; |
|---|
| 1327 | | - u64 num_bytes; |
|---|
| 1328 | | - u64 disk_num_bytes; |
|---|
| 1329 | | - u64 ram_bytes; |
|---|
| 1330 | | - int extent_type; |
|---|
| 1456 | + u64 cow_start = (u64)-1; |
|---|
| 1457 | + u64 cur_offset = start; |
|---|
| 1331 | 1458 | int ret; |
|---|
| 1332 | | - int type; |
|---|
| 1333 | | - int nocow; |
|---|
| 1334 | | - int check_prev = 1; |
|---|
| 1335 | | - bool nolock; |
|---|
| 1336 | | - u64 ino = btrfs_ino(BTRFS_I(inode)); |
|---|
| 1459 | + bool check_prev = true; |
|---|
| 1460 | + const bool freespace_inode = btrfs_is_free_space_inode(inode); |
|---|
| 1461 | + u64 ino = btrfs_ino(inode); |
|---|
| 1462 | + bool nocow = false; |
|---|
| 1463 | + u64 disk_bytenr = 0; |
|---|
| 1337 | 1464 | |
|---|
| 1338 | 1465 | path = btrfs_alloc_path(); |
|---|
| 1339 | 1466 | if (!path) { |
|---|
| 1340 | | - extent_clear_unlock_delalloc(inode, start, end, end, |
|---|
| 1341 | | - locked_page, |
|---|
| 1467 | + extent_clear_unlock_delalloc(inode, start, end, locked_page, |
|---|
| 1342 | 1468 | EXTENT_LOCKED | EXTENT_DELALLOC | |
|---|
| 1343 | 1469 | EXTENT_DO_ACCOUNTING | |
|---|
| 1344 | 1470 | EXTENT_DEFRAG, PAGE_UNLOCK | |
|---|
| .. | .. |
|---|
| 1348 | 1474 | return -ENOMEM; |
|---|
| 1349 | 1475 | } |
|---|
| 1350 | 1476 | |
|---|
| 1351 | | - nolock = btrfs_is_free_space_inode(BTRFS_I(inode)); |
|---|
| 1352 | | - |
|---|
| 1353 | | - cow_start = (u64)-1; |
|---|
| 1354 | | - cur_offset = start; |
|---|
| 1355 | 1477 | while (1) { |
|---|
| 1478 | + struct btrfs_key found_key; |
|---|
| 1479 | + struct btrfs_file_extent_item *fi; |
|---|
| 1480 | + struct extent_buffer *leaf; |
|---|
| 1481 | + u64 extent_end; |
|---|
| 1482 | + u64 extent_offset; |
|---|
| 1483 | + u64 num_bytes = 0; |
|---|
| 1484 | + u64 disk_num_bytes; |
|---|
| 1485 | + u64 ram_bytes; |
|---|
| 1486 | + int extent_type; |
|---|
| 1487 | + |
|---|
| 1488 | + nocow = false; |
|---|
| 1489 | + |
|---|
| 1356 | 1490 | ret = btrfs_lookup_file_extent(NULL, root, path, ino, |
|---|
| 1357 | 1491 | cur_offset, 0); |
|---|
| 1358 | 1492 | if (ret < 0) |
|---|
| 1359 | 1493 | goto error; |
|---|
| 1494 | + |
|---|
| 1495 | + /* |
|---|
| 1496 | + * If there is no extent for our range when doing the initial |
|---|
| 1497 | + * search, then go back to the previous slot as it will be the |
|---|
| 1498 | + * one containing the search offset |
|---|
| 1499 | + */ |
|---|
| 1360 | 1500 | if (ret > 0 && path->slots[0] > 0 && check_prev) { |
|---|
| 1361 | 1501 | leaf = path->nodes[0]; |
|---|
| 1362 | 1502 | btrfs_item_key_to_cpu(leaf, &found_key, |
|---|
| .. | .. |
|---|
| 1365 | 1505 | found_key.type == BTRFS_EXTENT_DATA_KEY) |
|---|
| 1366 | 1506 | path->slots[0]--; |
|---|
| 1367 | 1507 | } |
|---|
| 1368 | | - check_prev = 0; |
|---|
| 1508 | + check_prev = false; |
|---|
| 1369 | 1509 | next_slot: |
|---|
| 1510 | + /* Go to next leaf if we have exhausted the current one */ |
|---|
| 1370 | 1511 | leaf = path->nodes[0]; |
|---|
| 1371 | 1512 | if (path->slots[0] >= btrfs_header_nritems(leaf)) { |
|---|
| 1372 | 1513 | ret = btrfs_next_leaf(root, path); |
|---|
| .. | .. |
|---|
| 1380 | 1521 | leaf = path->nodes[0]; |
|---|
| 1381 | 1522 | } |
|---|
| 1382 | 1523 | |
|---|
| 1383 | | - nocow = 0; |
|---|
| 1384 | | - disk_bytenr = 0; |
|---|
| 1385 | | - num_bytes = 0; |
|---|
| 1386 | 1524 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
|---|
| 1387 | 1525 | |
|---|
| 1526 | + /* Didn't find anything for our INO */ |
|---|
| 1388 | 1527 | if (found_key.objectid > ino) |
|---|
| 1389 | 1528 | break; |
|---|
| 1529 | + /* |
|---|
| 1530 | + * Keep searching until we find an EXTENT_ITEM or there are no |
|---|
| 1531 | + * more extents for this inode |
|---|
| 1532 | + */ |
|---|
| 1390 | 1533 | if (WARN_ON_ONCE(found_key.objectid < ino) || |
|---|
| 1391 | 1534 | found_key.type < BTRFS_EXTENT_DATA_KEY) { |
|---|
| 1392 | 1535 | path->slots[0]++; |
|---|
| 1393 | 1536 | goto next_slot; |
|---|
| 1394 | 1537 | } |
|---|
| 1538 | + |
|---|
| 1539 | + /* Found key is not EXTENT_DATA_KEY or starts after req range */ |
|---|
| 1395 | 1540 | if (found_key.type > BTRFS_EXTENT_DATA_KEY || |
|---|
| 1396 | 1541 | found_key.offset > end) |
|---|
| 1397 | 1542 | break; |
|---|
| 1398 | 1543 | |
|---|
| 1544 | + /* |
|---|
| 1545 | + * If the found extent starts after requested offset, then |
|---|
| 1546 | + * adjust extent_end to be right before this extent begins |
|---|
| 1547 | + */ |
|---|
| 1399 | 1548 | if (found_key.offset > cur_offset) { |
|---|
| 1400 | 1549 | extent_end = found_key.offset; |
|---|
| 1401 | 1550 | extent_type = 0; |
|---|
| 1402 | 1551 | goto out_check; |
|---|
| 1403 | 1552 | } |
|---|
| 1404 | 1553 | |
|---|
| 1554 | + /* |
|---|
| 1555 | + * Found extent which begins before our range and potentially |
|---|
| 1556 | + * intersect it |
|---|
| 1557 | + */ |
|---|
| 1405 | 1558 | fi = btrfs_item_ptr(leaf, path->slots[0], |
|---|
| 1406 | 1559 | struct btrfs_file_extent_item); |
|---|
| 1407 | 1560 | extent_type = btrfs_file_extent_type(leaf, fi); |
|---|
| .. | .. |
|---|
| 1415 | 1568 | btrfs_file_extent_num_bytes(leaf, fi); |
|---|
| 1416 | 1569 | disk_num_bytes = |
|---|
| 1417 | 1570 | btrfs_file_extent_disk_num_bytes(leaf, fi); |
|---|
| 1418 | | - if (extent_end <= start) { |
|---|
| 1571 | + /* |
|---|
| 1572 | + * If the extent we got ends before our current offset, |
|---|
| 1573 | + * skip to the next extent. |
|---|
| 1574 | + */ |
|---|
| 1575 | + if (extent_end <= cur_offset) { |
|---|
| 1419 | 1576 | path->slots[0]++; |
|---|
| 1420 | 1577 | goto next_slot; |
|---|
| 1421 | 1578 | } |
|---|
| 1579 | + /* Skip holes */ |
|---|
| 1422 | 1580 | if (disk_bytenr == 0) |
|---|
| 1423 | 1581 | goto out_check; |
|---|
| 1582 | + /* Skip compressed/encrypted/encoded extents */ |
|---|
| 1424 | 1583 | if (btrfs_file_extent_compression(leaf, fi) || |
|---|
| 1425 | 1584 | btrfs_file_extent_encryption(leaf, fi) || |
|---|
| 1426 | 1585 | btrfs_file_extent_other_encoding(leaf, fi)) |
|---|
| 1427 | 1586 | goto out_check; |
|---|
| 1428 | 1587 | /* |
|---|
| 1429 | | - * Do the same check as in btrfs_cross_ref_exist but |
|---|
| 1430 | | - * without the unnecessary search. |
|---|
| 1588 | + * If extent is created before the last volume's snapshot |
|---|
| 1589 | + * this implies the extent is shared, hence we can't do |
|---|
| 1590 | + * nocow. This is the same check as in |
|---|
| 1591 | + * btrfs_cross_ref_exist but without calling |
|---|
| 1592 | + * btrfs_search_slot. |
|---|
| 1431 | 1593 | */ |
|---|
| 1432 | | - if (!nolock && |
|---|
| 1594 | + if (!freespace_inode && |
|---|
| 1433 | 1595 | btrfs_file_extent_generation(leaf, fi) <= |
|---|
| 1434 | 1596 | btrfs_root_last_snapshot(&root->root_item)) |
|---|
| 1435 | 1597 | goto out_check; |
|---|
| 1436 | 1598 | if (extent_type == BTRFS_FILE_EXTENT_REG && !force) |
|---|
| 1437 | 1599 | goto out_check; |
|---|
| 1600 | + /* If extent is RO, we must COW it */ |
|---|
| 1438 | 1601 | if (btrfs_extent_readonly(fs_info, disk_bytenr)) |
|---|
| 1439 | 1602 | goto out_check; |
|---|
| 1440 | 1603 | ret = btrfs_cross_ref_exist(root, ino, |
|---|
| 1441 | 1604 | found_key.offset - |
|---|
| 1442 | | - extent_offset, disk_bytenr); |
|---|
| 1605 | + extent_offset, disk_bytenr, false); |
|---|
| 1443 | 1606 | if (ret) { |
|---|
| 1444 | 1607 | /* |
|---|
| 1445 | 1608 | * ret could be -EIO if the above fails to read |
|---|
| .. | .. |
|---|
| 1451 | 1614 | goto error; |
|---|
| 1452 | 1615 | } |
|---|
| 1453 | 1616 | |
|---|
| 1454 | | - WARN_ON_ONCE(nolock); |
|---|
| 1617 | + WARN_ON_ONCE(freespace_inode); |
|---|
| 1455 | 1618 | goto out_check; |
|---|
| 1456 | 1619 | } |
|---|
| 1457 | 1620 | disk_bytenr += extent_offset; |
|---|
| 1458 | 1621 | disk_bytenr += cur_offset - found_key.offset; |
|---|
| 1459 | 1622 | num_bytes = min(end + 1, extent_end) - cur_offset; |
|---|
| 1460 | 1623 | /* |
|---|
| 1461 | | - * if there are pending snapshots for this root, |
|---|
| 1462 | | - * we fall into common COW way. |
|---|
| 1624 | + * If there are pending snapshots for this root, we |
|---|
| 1625 | + * fall into common COW way |
|---|
| 1463 | 1626 | */ |
|---|
| 1464 | | - if (!nolock && atomic_read(&root->snapshot_force_cow)) |
|---|
| 1627 | + if (!freespace_inode && atomic_read(&root->snapshot_force_cow)) |
|---|
| 1465 | 1628 | goto out_check; |
|---|
| 1466 | 1629 | /* |
|---|
| 1467 | 1630 | * force cow if csum exists in the range. |
|---|
| .. | .. |
|---|
| 1480 | 1643 | cur_offset = cow_start; |
|---|
| 1481 | 1644 | goto error; |
|---|
| 1482 | 1645 | } |
|---|
| 1483 | | - WARN_ON_ONCE(nolock); |
|---|
| 1646 | + WARN_ON_ONCE(freespace_inode); |
|---|
| 1484 | 1647 | goto out_check; |
|---|
| 1485 | 1648 | } |
|---|
| 1486 | 1649 | if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) |
|---|
| 1487 | 1650 | goto out_check; |
|---|
| 1488 | | - nocow = 1; |
|---|
| 1651 | + nocow = true; |
|---|
| 1489 | 1652 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
|---|
| 1490 | | - extent_end = found_key.offset + |
|---|
| 1491 | | - btrfs_file_extent_ram_bytes(leaf, fi); |
|---|
| 1492 | | - extent_end = ALIGN(extent_end, |
|---|
| 1493 | | - fs_info->sectorsize); |
|---|
| 1653 | + extent_end = found_key.offset + ram_bytes; |
|---|
| 1654 | + extent_end = ALIGN(extent_end, fs_info->sectorsize); |
|---|
| 1655 | + /* Skip extents outside of our requested range */ |
|---|
| 1656 | + if (extent_end <= start) { |
|---|
| 1657 | + path->slots[0]++; |
|---|
| 1658 | + goto next_slot; |
|---|
| 1659 | + } |
|---|
| 1494 | 1660 | } else { |
|---|
| 1495 | | - BUG_ON(1); |
|---|
| 1661 | + /* If this triggers then we have a memory corruption */ |
|---|
| 1662 | + BUG(); |
|---|
| 1496 | 1663 | } |
|---|
| 1497 | 1664 | out_check: |
|---|
| 1498 | | - if (extent_end <= start) { |
|---|
| 1499 | | - path->slots[0]++; |
|---|
| 1500 | | - if (nocow) |
|---|
| 1501 | | - btrfs_dec_nocow_writers(fs_info, disk_bytenr); |
|---|
| 1502 | | - goto next_slot; |
|---|
| 1503 | | - } |
|---|
| 1665 | + /* |
|---|
| 1666 | + * If nocow is false then record the beginning of the range |
|---|
| 1667 | + * that needs to be COWed |
|---|
| 1668 | + */ |
|---|
| 1504 | 1669 | if (!nocow) { |
|---|
| 1505 | 1670 | if (cow_start == (u64)-1) |
|---|
| 1506 | 1671 | cow_start = cur_offset; |
|---|
| .. | .. |
|---|
| 1512 | 1677 | } |
|---|
| 1513 | 1678 | |
|---|
| 1514 | 1679 | btrfs_release_path(path); |
|---|
| 1680 | + |
|---|
| 1681 | + /* |
|---|
| 1682 | + * COW range from cow_start to found_key.offset - 1. As the key |
|---|
| 1683 | + * will contain the beginning of the first extent that can be |
|---|
| 1684 | + * NOCOW, following one which needs to be COW'ed |
|---|
| 1685 | + */ |
|---|
| 1515 | 1686 | if (cow_start != (u64)-1) { |
|---|
| 1516 | | - ret = cow_file_range(inode, locked_page, |
|---|
| 1517 | | - cow_start, found_key.offset - 1, |
|---|
| 1518 | | - end, page_started, nr_written, 1, |
|---|
| 1519 | | - NULL); |
|---|
| 1520 | | - if (ret) { |
|---|
| 1521 | | - if (nocow) |
|---|
| 1522 | | - btrfs_dec_nocow_writers(fs_info, |
|---|
| 1523 | | - disk_bytenr); |
|---|
| 1687 | + ret = fallback_to_cow(inode, locked_page, |
|---|
| 1688 | + cow_start, found_key.offset - 1, |
|---|
| 1689 | + page_started, nr_written); |
|---|
| 1690 | + if (ret) |
|---|
| 1524 | 1691 | goto error; |
|---|
| 1525 | | - } |
|---|
| 1526 | 1692 | cow_start = (u64)-1; |
|---|
| 1527 | 1693 | } |
|---|
| 1528 | 1694 | |
|---|
| 1529 | 1695 | if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) { |
|---|
| 1530 | 1696 | u64 orig_start = found_key.offset - extent_offset; |
|---|
| 1697 | + struct extent_map *em; |
|---|
| 1531 | 1698 | |
|---|
| 1532 | 1699 | em = create_io_em(inode, cur_offset, num_bytes, |
|---|
| 1533 | 1700 | orig_start, |
|---|
| .. | .. |
|---|
| 1537 | 1704 | ram_bytes, BTRFS_COMPRESS_NONE, |
|---|
| 1538 | 1705 | BTRFS_ORDERED_PREALLOC); |
|---|
| 1539 | 1706 | if (IS_ERR(em)) { |
|---|
| 1540 | | - if (nocow) |
|---|
| 1541 | | - btrfs_dec_nocow_writers(fs_info, |
|---|
| 1542 | | - disk_bytenr); |
|---|
| 1543 | 1707 | ret = PTR_ERR(em); |
|---|
| 1544 | 1708 | goto error; |
|---|
| 1545 | 1709 | } |
|---|
| 1546 | 1710 | free_extent_map(em); |
|---|
| 1547 | | - } |
|---|
| 1548 | | - |
|---|
| 1549 | | - if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) { |
|---|
| 1550 | | - type = BTRFS_ORDERED_PREALLOC; |
|---|
| 1711 | + ret = btrfs_add_ordered_extent(inode, cur_offset, |
|---|
| 1712 | + disk_bytenr, num_bytes, |
|---|
| 1713 | + num_bytes, |
|---|
| 1714 | + BTRFS_ORDERED_PREALLOC); |
|---|
| 1715 | + if (ret) { |
|---|
| 1716 | + btrfs_drop_extent_cache(inode, cur_offset, |
|---|
| 1717 | + cur_offset + num_bytes - 1, |
|---|
| 1718 | + 0); |
|---|
| 1719 | + goto error; |
|---|
| 1720 | + } |
|---|
| 1551 | 1721 | } else { |
|---|
| 1552 | | - type = BTRFS_ORDERED_NOCOW; |
|---|
| 1722 | + ret = btrfs_add_ordered_extent(inode, cur_offset, |
|---|
| 1723 | + disk_bytenr, num_bytes, |
|---|
| 1724 | + num_bytes, |
|---|
| 1725 | + BTRFS_ORDERED_NOCOW); |
|---|
| 1726 | + if (ret) |
|---|
| 1727 | + goto error; |
|---|
| 1553 | 1728 | } |
|---|
| 1554 | 1729 | |
|---|
| 1555 | | - ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr, |
|---|
| 1556 | | - num_bytes, num_bytes, type); |
|---|
| 1557 | 1730 | if (nocow) |
|---|
| 1558 | 1731 | btrfs_dec_nocow_writers(fs_info, disk_bytenr); |
|---|
| 1559 | | - BUG_ON(ret); /* -ENOMEM */ |
|---|
| 1732 | + nocow = false; |
|---|
| 1560 | 1733 | |
|---|
| 1561 | 1734 | if (root->root_key.objectid == |
|---|
| 1562 | 1735 | BTRFS_DATA_RELOC_TREE_OBJECTID) |
|---|
| .. | .. |
|---|
| 1569 | 1742 | num_bytes); |
|---|
| 1570 | 1743 | |
|---|
| 1571 | 1744 | extent_clear_unlock_delalloc(inode, cur_offset, |
|---|
| 1572 | | - cur_offset + num_bytes - 1, end, |
|---|
| 1745 | + cur_offset + num_bytes - 1, |
|---|
| 1573 | 1746 | locked_page, EXTENT_LOCKED | |
|---|
| 1574 | 1747 | EXTENT_DELALLOC | |
|---|
| 1575 | 1748 | EXTENT_CLEAR_DATA_RESV, |
|---|
| .. | .. |
|---|
| 1594 | 1767 | |
|---|
| 1595 | 1768 | if (cow_start != (u64)-1) { |
|---|
| 1596 | 1769 | cur_offset = end; |
|---|
| 1597 | | - ret = cow_file_range(inode, locked_page, cow_start, end, end, |
|---|
| 1598 | | - page_started, nr_written, 1, NULL); |
|---|
| 1770 | + ret = fallback_to_cow(inode, locked_page, cow_start, end, |
|---|
| 1771 | + page_started, nr_written); |
|---|
| 1599 | 1772 | if (ret) |
|---|
| 1600 | 1773 | goto error; |
|---|
| 1601 | 1774 | } |
|---|
| 1602 | 1775 | |
|---|
| 1603 | 1776 | error: |
|---|
| 1777 | + if (nocow) |
|---|
| 1778 | + btrfs_dec_nocow_writers(fs_info, disk_bytenr); |
|---|
| 1779 | + |
|---|
| 1604 | 1780 | if (ret && cur_offset < end) |
|---|
| 1605 | | - extent_clear_unlock_delalloc(inode, cur_offset, end, end, |
|---|
| 1781 | + extent_clear_unlock_delalloc(inode, cur_offset, end, |
|---|
| 1606 | 1782 | locked_page, EXTENT_LOCKED | |
|---|
| 1607 | 1783 | EXTENT_DELALLOC | EXTENT_DEFRAG | |
|---|
| 1608 | 1784 | EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | |
|---|
| .. | .. |
|---|
| 1613 | 1789 | return ret; |
|---|
| 1614 | 1790 | } |
|---|
| 1615 | 1791 | |
|---|
| 1616 | | -static inline int need_force_cow(struct inode *inode, u64 start, u64 end) |
|---|
| 1792 | +static inline int need_force_cow(struct btrfs_inode *inode, u64 start, u64 end) |
|---|
| 1617 | 1793 | { |
|---|
| 1618 | 1794 | |
|---|
| 1619 | | - if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && |
|---|
| 1620 | | - !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) |
|---|
| 1795 | + if (!(inode->flags & BTRFS_INODE_NODATACOW) && |
|---|
| 1796 | + !(inode->flags & BTRFS_INODE_PREALLOC)) |
|---|
| 1621 | 1797 | return 0; |
|---|
| 1622 | 1798 | |
|---|
| 1623 | 1799 | /* |
|---|
| .. | .. |
|---|
| 1625 | 1801 | * if is not zero, it means the file is defragging. |
|---|
| 1626 | 1802 | * Force cow if given extent needs to be defragged. |
|---|
| 1627 | 1803 | */ |
|---|
| 1628 | | - if (BTRFS_I(inode)->defrag_bytes && |
|---|
| 1629 | | - test_range_bit(&BTRFS_I(inode)->io_tree, start, end, |
|---|
| 1630 | | - EXTENT_DEFRAG, 0, NULL)) |
|---|
| 1804 | + if (inode->defrag_bytes && |
|---|
| 1805 | + test_range_bit(&inode->io_tree, start, end, EXTENT_DEFRAG, 0, NULL)) |
|---|
| 1631 | 1806 | return 1; |
|---|
| 1632 | 1807 | |
|---|
| 1633 | 1808 | return 0; |
|---|
| .. | .. |
|---|
| 1637 | 1812 | * Function to process delayed allocation (create CoW) for ranges which are |
|---|
| 1638 | 1813 | * being touched for the first time. |
|---|
| 1639 | 1814 | */ |
|---|
| 1640 | | -int btrfs_run_delalloc_range(void *private_data, struct page *locked_page, |
|---|
| 1815 | +int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page, |
|---|
| 1641 | 1816 | u64 start, u64 end, int *page_started, unsigned long *nr_written, |
|---|
| 1642 | 1817 | struct writeback_control *wbc) |
|---|
| 1643 | 1818 | { |
|---|
| 1644 | | - struct inode *inode = private_data; |
|---|
| 1645 | 1819 | int ret; |
|---|
| 1646 | 1820 | int force_cow = need_force_cow(inode, start, end); |
|---|
| 1647 | | - unsigned int write_flags = wbc_to_write_flags(wbc); |
|---|
| 1648 | 1821 | |
|---|
| 1649 | | - if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) { |
|---|
| 1822 | + if (inode->flags & BTRFS_INODE_NODATACOW && !force_cow) { |
|---|
| 1650 | 1823 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
|---|
| 1651 | 1824 | page_started, 1, nr_written); |
|---|
| 1652 | | - } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) { |
|---|
| 1825 | + } else if (inode->flags & BTRFS_INODE_PREALLOC && !force_cow) { |
|---|
| 1653 | 1826 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
|---|
| 1654 | 1827 | page_started, 0, nr_written); |
|---|
| 1655 | 1828 | } else if (!inode_can_compress(inode) || |
|---|
| 1656 | 1829 | !inode_need_compress(inode, start, end)) { |
|---|
| 1657 | | - ret = cow_file_range(inode, locked_page, start, end, end, |
|---|
| 1658 | | - page_started, nr_written, 1, NULL); |
|---|
| 1830 | + ret = cow_file_range(inode, locked_page, start, end, |
|---|
| 1831 | + page_started, nr_written, 1); |
|---|
| 1659 | 1832 | } else { |
|---|
| 1660 | | - set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, |
|---|
| 1661 | | - &BTRFS_I(inode)->runtime_flags); |
|---|
| 1662 | | - ret = cow_file_range_async(inode, locked_page, start, end, |
|---|
| 1663 | | - page_started, nr_written, |
|---|
| 1664 | | - write_flags); |
|---|
| 1833 | + set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags); |
|---|
| 1834 | + ret = cow_file_range_async(inode, wbc, locked_page, start, end, |
|---|
| 1835 | + page_started, nr_written); |
|---|
| 1665 | 1836 | } |
|---|
| 1666 | 1837 | if (ret) |
|---|
| 1667 | 1838 | btrfs_cleanup_ordered_extents(inode, locked_page, start, |
|---|
| .. | .. |
|---|
| 1669 | 1840 | return ret; |
|---|
| 1670 | 1841 | } |
|---|
| 1671 | 1842 | |
|---|
| 1672 | | -static void btrfs_split_extent_hook(void *private_data, |
|---|
| 1673 | | - struct extent_state *orig, u64 split) |
|---|
| 1843 | +void btrfs_split_delalloc_extent(struct inode *inode, |
|---|
| 1844 | + struct extent_state *orig, u64 split) |
|---|
| 1674 | 1845 | { |
|---|
| 1675 | | - struct inode *inode = private_data; |
|---|
| 1676 | 1846 | u64 size; |
|---|
| 1677 | 1847 | |
|---|
| 1678 | 1848 | /* not delalloc, ignore it */ |
|---|
| .. | .. |
|---|
| 1685 | 1855 | u64 new_size; |
|---|
| 1686 | 1856 | |
|---|
| 1687 | 1857 | /* |
|---|
| 1688 | | - * See the explanation in btrfs_merge_extent_hook, the same |
|---|
| 1858 | + * See the explanation in btrfs_merge_delalloc_extent, the same |
|---|
| 1689 | 1859 | * applies here, just in reverse. |
|---|
| 1690 | 1860 | */ |
|---|
| 1691 | 1861 | new_size = orig->end - split + 1; |
|---|
| .. | .. |
|---|
| 1702 | 1872 | } |
|---|
| 1703 | 1873 | |
|---|
| 1704 | 1874 | /* |
|---|
| 1705 | | - * extent_io.c merge_extent_hook, used to track merged delayed allocation |
|---|
| 1706 | | - * extents so we can keep track of new extents that are just merged onto old |
|---|
| 1707 | | - * extents, such as when we are doing sequential writes, so we can properly |
|---|
| 1708 | | - * account for the metadata space we'll need. |
|---|
| 1875 | + * Handle merged delayed allocation extents so we can keep track of new extents |
|---|
| 1876 | + * that are just merged onto old extents, such as when we are doing sequential |
|---|
| 1877 | + * writes, so we can properly account for the metadata space we'll need. |
|---|
| 1709 | 1878 | */ |
|---|
| 1710 | | -static void btrfs_merge_extent_hook(void *private_data, |
|---|
| 1711 | | - struct extent_state *new, |
|---|
| 1712 | | - struct extent_state *other) |
|---|
| 1879 | +void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new, |
|---|
| 1880 | + struct extent_state *other) |
|---|
| 1713 | 1881 | { |
|---|
| 1714 | | - struct inode *inode = private_data; |
|---|
| 1715 | 1882 | u64 new_size, old_size; |
|---|
| 1716 | 1883 | u32 num_extents; |
|---|
| 1717 | 1884 | |
|---|
| .. | .. |
|---|
| 1815 | 1982 | } |
|---|
| 1816 | 1983 | |
|---|
| 1817 | 1984 | /* |
|---|
| 1818 | | - * extent_io.c set_bit_hook, used to track delayed allocation |
|---|
| 1819 | | - * bytes in this file, and to maintain the list of inodes that |
|---|
| 1820 | | - * have pending delalloc work to be done. |
|---|
| 1985 | + * Properly track delayed allocation bytes in the inode and to maintain the |
|---|
| 1986 | + * list of inodes that have pending delalloc work to be done. |
|---|
| 1821 | 1987 | */ |
|---|
| 1822 | | -static void btrfs_set_bit_hook(void *private_data, |
|---|
| 1823 | | - struct extent_state *state, unsigned *bits) |
|---|
| 1988 | +void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state, |
|---|
| 1989 | + unsigned *bits) |
|---|
| 1824 | 1990 | { |
|---|
| 1825 | | - struct inode *inode = private_data; |
|---|
| 1826 | | - |
|---|
| 1827 | 1991 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 1828 | 1992 | |
|---|
| 1829 | 1993 | if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC)) |
|---|
| .. | .. |
|---|
| 1869 | 2033 | } |
|---|
| 1870 | 2034 | |
|---|
| 1871 | 2035 | /* |
|---|
| 1872 | | - * extent_io.c clear_bit_hook, see set_bit_hook for why |
|---|
| 2036 | + * Once a range is no longer delalloc this function ensures that proper |
|---|
| 2037 | + * accounting happens. |
|---|
| 1873 | 2038 | */ |
|---|
| 1874 | | -static void btrfs_clear_bit_hook(void *private_data, |
|---|
| 1875 | | - struct extent_state *state, |
|---|
| 1876 | | - unsigned *bits) |
|---|
| 2039 | +void btrfs_clear_delalloc_extent(struct inode *vfs_inode, |
|---|
| 2040 | + struct extent_state *state, unsigned *bits) |
|---|
| 1877 | 2041 | { |
|---|
| 1878 | | - struct btrfs_inode *inode = BTRFS_I((struct inode *)private_data); |
|---|
| 1879 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); |
|---|
| 2042 | + struct btrfs_inode *inode = BTRFS_I(vfs_inode); |
|---|
| 2043 | + struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb); |
|---|
| 1880 | 2044 | u64 len = state->end + 1 - state->start; |
|---|
| 1881 | 2045 | u32 num_extents = count_max_extents(len); |
|---|
| 1882 | 2046 | |
|---|
| .. | .. |
|---|
| 1901 | 2065 | |
|---|
| 1902 | 2066 | /* |
|---|
| 1903 | 2067 | * We don't reserve metadata space for space cache inodes so we |
|---|
| 1904 | | - * don't need to call dellalloc_release_metadata if there is an |
|---|
| 2068 | + * don't need to call delalloc_release_metadata if there is an |
|---|
| 1905 | 2069 | * error. |
|---|
| 1906 | 2070 | */ |
|---|
| 1907 | 2071 | if (*bits & EXTENT_CLEAR_META_RESV && |
|---|
| .. | .. |
|---|
| 1915 | 2079 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID && |
|---|
| 1916 | 2080 | do_list && !(state->state & EXTENT_NORESERVE) && |
|---|
| 1917 | 2081 | (*bits & EXTENT_CLEAR_DATA_RESV)) |
|---|
| 1918 | | - btrfs_free_reserved_data_space_noquota( |
|---|
| 1919 | | - &inode->vfs_inode, |
|---|
| 1920 | | - state->start, len); |
|---|
| 2082 | + btrfs_free_reserved_data_space_noquota(fs_info, len); |
|---|
| 1921 | 2083 | |
|---|
| 1922 | 2084 | percpu_counter_add_batch(&fs_info->delalloc_bytes, -len, |
|---|
| 1923 | 2085 | fs_info->delalloc_batch); |
|---|
| .. | .. |
|---|
| 1940 | 2102 | } |
|---|
| 1941 | 2103 | |
|---|
| 1942 | 2104 | /* |
|---|
| 1943 | | - * Merge bio hook, this must check the chunk tree to make sure we don't create |
|---|
| 1944 | | - * bios that span stripes or chunks |
|---|
| 2105 | + * btrfs_bio_fits_in_stripe - Checks whether the size of the given bio will fit |
|---|
| 2106 | + * in a chunk's stripe. This function ensures that bios do not span a |
|---|
| 2107 | + * stripe/chunk |
|---|
| 1945 | 2108 | * |
|---|
| 1946 | | - * return 1 if page cannot be merged to bio |
|---|
| 1947 | | - * return 0 if page can be merged to bio |
|---|
| 2109 | + * @page - The page we are about to add to the bio |
|---|
| 2110 | + * @size - size we want to add to the bio |
|---|
| 2111 | + * @bio - bio we want to ensure is smaller than a stripe |
|---|
| 2112 | + * @bio_flags - flags of the bio |
|---|
| 2113 | + * |
|---|
| 2114 | + * return 1 if page cannot be added to the bio |
|---|
| 2115 | + * return 0 if page can be added to the bio |
|---|
| 1948 | 2116 | * return error otherwise |
|---|
| 1949 | 2117 | */ |
|---|
| 1950 | | -int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
|---|
| 1951 | | - size_t size, struct bio *bio, |
|---|
| 1952 | | - unsigned long bio_flags) |
|---|
| 2118 | +int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio, |
|---|
| 2119 | + unsigned long bio_flags) |
|---|
| 1953 | 2120 | { |
|---|
| 1954 | 2121 | struct inode *inode = page->mapping->host; |
|---|
| 1955 | 2122 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| .. | .. |
|---|
| 1957 | 2124 | u64 length = 0; |
|---|
| 1958 | 2125 | u64 map_length; |
|---|
| 1959 | 2126 | int ret; |
|---|
| 2127 | + struct btrfs_io_geometry geom; |
|---|
| 1960 | 2128 | |
|---|
| 1961 | 2129 | if (bio_flags & EXTENT_BIO_COMPRESSED) |
|---|
| 1962 | 2130 | return 0; |
|---|
| 1963 | 2131 | |
|---|
| 1964 | 2132 | length = bio->bi_iter.bi_size; |
|---|
| 1965 | 2133 | map_length = length; |
|---|
| 1966 | | - ret = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length, |
|---|
| 1967 | | - NULL, 0); |
|---|
| 2134 | + ret = btrfs_get_io_geometry(fs_info, btrfs_op(bio), logical, map_length, |
|---|
| 2135 | + &geom); |
|---|
| 1968 | 2136 | if (ret < 0) |
|---|
| 1969 | 2137 | return ret; |
|---|
| 1970 | | - if (map_length < length + size) |
|---|
| 2138 | + |
|---|
| 2139 | + if (geom.len < length + size) |
|---|
| 1971 | 2140 | return 1; |
|---|
| 1972 | 2141 | return 0; |
|---|
| 1973 | 2142 | } |
|---|
| .. | .. |
|---|
| 1984 | 2153 | u64 bio_offset) |
|---|
| 1985 | 2154 | { |
|---|
| 1986 | 2155 | struct inode *inode = private_data; |
|---|
| 1987 | | - blk_status_t ret = 0; |
|---|
| 1988 | 2156 | |
|---|
| 1989 | | - ret = btrfs_csum_one_bio(inode, bio, 0, 0); |
|---|
| 1990 | | - BUG_ON(ret); /* -ENOMEM */ |
|---|
| 1991 | | - return 0; |
|---|
| 1992 | | -} |
|---|
| 1993 | | - |
|---|
| 1994 | | -/* |
|---|
| 1995 | | - * in order to insert checksums into the metadata in large chunks, |
|---|
| 1996 | | - * we wait until bio submission time. All the pages in the bio are |
|---|
| 1997 | | - * checksummed and sums are attached onto the ordered extent record. |
|---|
| 1998 | | - * |
|---|
| 1999 | | - * At IO completion time the cums attached on the ordered extent record |
|---|
| 2000 | | - * are inserted into the btree |
|---|
| 2001 | | - */ |
|---|
| 2002 | | -blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio, |
|---|
| 2003 | | - int mirror_num) |
|---|
| 2004 | | -{ |
|---|
| 2005 | | - struct inode *inode = private_data; |
|---|
| 2006 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 2007 | | - blk_status_t ret; |
|---|
| 2008 | | - |
|---|
| 2009 | | - ret = btrfs_map_bio(fs_info, bio, mirror_num, 1); |
|---|
| 2010 | | - if (ret) { |
|---|
| 2011 | | - bio->bi_status = ret; |
|---|
| 2012 | | - bio_endio(bio); |
|---|
| 2013 | | - } |
|---|
| 2014 | | - return ret; |
|---|
| 2157 | + return btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0); |
|---|
| 2015 | 2158 | } |
|---|
| 2016 | 2159 | |
|---|
| 2017 | 2160 | /* |
|---|
| .. | .. |
|---|
| 2032 | 2175 | * |
|---|
| 2033 | 2176 | * c-3) otherwise: async submit |
|---|
| 2034 | 2177 | */ |
|---|
| 2035 | | -static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio, |
|---|
| 2036 | | - int mirror_num, unsigned long bio_flags, |
|---|
| 2037 | | - u64 bio_offset) |
|---|
| 2178 | +blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio, |
|---|
| 2179 | + int mirror_num, unsigned long bio_flags) |
|---|
| 2180 | + |
|---|
| 2038 | 2181 | { |
|---|
| 2039 | | - struct inode *inode = private_data; |
|---|
| 2040 | 2182 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 2041 | 2183 | struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 2042 | 2184 | enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA; |
|---|
| .. | .. |
|---|
| 2060 | 2202 | bio_flags); |
|---|
| 2061 | 2203 | goto out; |
|---|
| 2062 | 2204 | } else if (!skip_sum) { |
|---|
| 2063 | | - ret = btrfs_lookup_bio_sums(inode, bio, NULL); |
|---|
| 2205 | + ret = btrfs_lookup_bio_sums(inode, bio, (u64)-1, NULL); |
|---|
| 2064 | 2206 | if (ret) |
|---|
| 2065 | 2207 | goto out; |
|---|
| 2066 | 2208 | } |
|---|
| .. | .. |
|---|
| 2071 | 2213 | goto mapit; |
|---|
| 2072 | 2214 | /* we're doing a write, do the async checksumming */ |
|---|
| 2073 | 2215 | ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags, |
|---|
| 2074 | | - bio_offset, inode, |
|---|
| 2075 | | - btrfs_submit_bio_start); |
|---|
| 2216 | + 0, inode, btrfs_submit_bio_start); |
|---|
| 2076 | 2217 | goto out; |
|---|
| 2077 | 2218 | } else if (!skip_sum) { |
|---|
| 2078 | | - ret = btrfs_csum_one_bio(inode, bio, 0, 0); |
|---|
| 2219 | + ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0); |
|---|
| 2079 | 2220 | if (ret) |
|---|
| 2080 | 2221 | goto out; |
|---|
| 2081 | 2222 | } |
|---|
| 2082 | 2223 | |
|---|
| 2083 | 2224 | mapit: |
|---|
| 2084 | | - ret = btrfs_map_bio(fs_info, bio, mirror_num, 0); |
|---|
| 2225 | + ret = btrfs_map_bio(fs_info, bio, mirror_num); |
|---|
| 2085 | 2226 | |
|---|
| 2086 | 2227 | out: |
|---|
| 2087 | 2228 | if (ret) { |
|---|
| .. | .. |
|---|
| 2095 | 2236 | * given a list of ordered sums record them in the inode. This happens |
|---|
| 2096 | 2237 | * at IO completion time based on sums calculated at bio submission time. |
|---|
| 2097 | 2238 | */ |
|---|
| 2098 | | -static noinline int add_pending_csums(struct btrfs_trans_handle *trans, |
|---|
| 2099 | | - struct inode *inode, struct list_head *list) |
|---|
| 2239 | +static int add_pending_csums(struct btrfs_trans_handle *trans, |
|---|
| 2240 | + struct list_head *list) |
|---|
| 2100 | 2241 | { |
|---|
| 2101 | 2242 | struct btrfs_ordered_sum *sum; |
|---|
| 2102 | 2243 | int ret; |
|---|
| 2103 | 2244 | |
|---|
| 2104 | 2245 | list_for_each_entry(sum, list, list) { |
|---|
| 2105 | 2246 | trans->adding_csums = true; |
|---|
| 2106 | | - ret = btrfs_csum_file_blocks(trans, |
|---|
| 2107 | | - BTRFS_I(inode)->root->fs_info->csum_root, sum); |
|---|
| 2247 | + ret = btrfs_csum_file_blocks(trans, trans->fs_info->csum_root, sum); |
|---|
| 2108 | 2248 | trans->adding_csums = false; |
|---|
| 2109 | 2249 | if (ret) |
|---|
| 2110 | 2250 | return ret; |
|---|
| .. | .. |
|---|
| 2112 | 2252 | return 0; |
|---|
| 2113 | 2253 | } |
|---|
| 2114 | 2254 | |
|---|
| 2115 | | -int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
|---|
| 2116 | | - unsigned int extra_bits, |
|---|
| 2117 | | - struct extent_state **cached_state, int dedupe) |
|---|
| 2255 | +static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode, |
|---|
| 2256 | + const u64 start, |
|---|
| 2257 | + const u64 len, |
|---|
| 2258 | + struct extent_state **cached_state) |
|---|
| 2118 | 2259 | { |
|---|
| 2119 | | - WARN_ON((end & (PAGE_SIZE - 1)) == 0); |
|---|
| 2120 | | - return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, |
|---|
| 2121 | | - extra_bits, cached_state); |
|---|
| 2260 | + u64 search_start = start; |
|---|
| 2261 | + const u64 end = start + len - 1; |
|---|
| 2262 | + |
|---|
| 2263 | + while (search_start < end) { |
|---|
| 2264 | + const u64 search_len = end - search_start + 1; |
|---|
| 2265 | + struct extent_map *em; |
|---|
| 2266 | + u64 em_len; |
|---|
| 2267 | + int ret = 0; |
|---|
| 2268 | + |
|---|
| 2269 | + em = btrfs_get_extent(inode, NULL, 0, search_start, search_len); |
|---|
| 2270 | + if (IS_ERR(em)) |
|---|
| 2271 | + return PTR_ERR(em); |
|---|
| 2272 | + |
|---|
| 2273 | + if (em->block_start != EXTENT_MAP_HOLE) |
|---|
| 2274 | + goto next; |
|---|
| 2275 | + |
|---|
| 2276 | + em_len = em->len; |
|---|
| 2277 | + if (em->start < search_start) |
|---|
| 2278 | + em_len -= search_start - em->start; |
|---|
| 2279 | + if (em_len > search_len) |
|---|
| 2280 | + em_len = search_len; |
|---|
| 2281 | + |
|---|
| 2282 | + ret = set_extent_bit(&inode->io_tree, search_start, |
|---|
| 2283 | + search_start + em_len - 1, |
|---|
| 2284 | + EXTENT_DELALLOC_NEW, |
|---|
| 2285 | + NULL, cached_state, GFP_NOFS); |
|---|
| 2286 | +next: |
|---|
| 2287 | + search_start = extent_map_end(em); |
|---|
| 2288 | + free_extent_map(em); |
|---|
| 2289 | + if (ret) |
|---|
| 2290 | + return ret; |
|---|
| 2291 | + } |
|---|
| 2292 | + return 0; |
|---|
| 2293 | +} |
|---|
| 2294 | + |
|---|
| 2295 | +int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end, |
|---|
| 2296 | + unsigned int extra_bits, |
|---|
| 2297 | + struct extent_state **cached_state) |
|---|
| 2298 | +{ |
|---|
| 2299 | + WARN_ON(PAGE_ALIGNED(end)); |
|---|
| 2300 | + |
|---|
| 2301 | + if (start >= i_size_read(&inode->vfs_inode) && |
|---|
| 2302 | + !(inode->flags & BTRFS_INODE_PREALLOC)) { |
|---|
| 2303 | + /* |
|---|
| 2304 | + * There can't be any extents following eof in this case so just |
|---|
| 2305 | + * set the delalloc new bit for the range directly. |
|---|
| 2306 | + */ |
|---|
| 2307 | + extra_bits |= EXTENT_DELALLOC_NEW; |
|---|
| 2308 | + } else { |
|---|
| 2309 | + int ret; |
|---|
| 2310 | + |
|---|
| 2311 | + ret = btrfs_find_new_delalloc_bytes(inode, start, |
|---|
| 2312 | + end + 1 - start, |
|---|
| 2313 | + cached_state); |
|---|
| 2314 | + if (ret) |
|---|
| 2315 | + return ret; |
|---|
| 2316 | + } |
|---|
| 2317 | + |
|---|
| 2318 | + return set_extent_delalloc(&inode->io_tree, start, end, extra_bits, |
|---|
| 2319 | + cached_state); |
|---|
| 2122 | 2320 | } |
|---|
| 2123 | 2321 | |
|---|
| 2124 | 2322 | /* see btrfs_writepage_start_hook for details on why this is required */ |
|---|
| 2125 | 2323 | struct btrfs_writepage_fixup { |
|---|
| 2126 | 2324 | struct page *page; |
|---|
| 2325 | + struct inode *inode; |
|---|
| 2127 | 2326 | struct btrfs_work work; |
|---|
| 2128 | 2327 | }; |
|---|
| 2129 | 2328 | |
|---|
| .. | .. |
|---|
| 2134 | 2333 | struct extent_state *cached_state = NULL; |
|---|
| 2135 | 2334 | struct extent_changeset *data_reserved = NULL; |
|---|
| 2136 | 2335 | struct page *page; |
|---|
| 2137 | | - struct inode *inode; |
|---|
| 2336 | + struct btrfs_inode *inode; |
|---|
| 2138 | 2337 | u64 page_start; |
|---|
| 2139 | 2338 | u64 page_end; |
|---|
| 2140 | | - int ret; |
|---|
| 2339 | + int ret = 0; |
|---|
| 2340 | + bool free_delalloc_space = true; |
|---|
| 2141 | 2341 | |
|---|
| 2142 | 2342 | fixup = container_of(work, struct btrfs_writepage_fixup, work); |
|---|
| 2143 | 2343 | page = fixup->page; |
|---|
| 2144 | | -again: |
|---|
| 2145 | | - lock_page(page); |
|---|
| 2146 | | - if (!page->mapping || !PageDirty(page) || !PageChecked(page)) { |
|---|
| 2147 | | - ClearPageChecked(page); |
|---|
| 2148 | | - goto out_page; |
|---|
| 2149 | | - } |
|---|
| 2150 | | - |
|---|
| 2151 | | - inode = page->mapping->host; |
|---|
| 2344 | + inode = BTRFS_I(fixup->inode); |
|---|
| 2152 | 2345 | page_start = page_offset(page); |
|---|
| 2153 | 2346 | page_end = page_offset(page) + PAGE_SIZE - 1; |
|---|
| 2154 | 2347 | |
|---|
| 2155 | | - lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, |
|---|
| 2156 | | - &cached_state); |
|---|
| 2348 | + /* |
|---|
| 2349 | + * This is similar to page_mkwrite, we need to reserve the space before |
|---|
| 2350 | + * we take the page lock. |
|---|
| 2351 | + */ |
|---|
| 2352 | + ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start, |
|---|
| 2353 | + PAGE_SIZE); |
|---|
| 2354 | +again: |
|---|
| 2355 | + lock_page(page); |
|---|
| 2356 | + |
|---|
| 2357 | + /* |
|---|
| 2358 | + * Before we queued this fixup, we took a reference on the page. |
|---|
| 2359 | + * page->mapping may go NULL, but it shouldn't be moved to a different |
|---|
| 2360 | + * address space. |
|---|
| 2361 | + */ |
|---|
| 2362 | + if (!page->mapping || !PageDirty(page) || !PageChecked(page)) { |
|---|
| 2363 | + /* |
|---|
| 2364 | + * Unfortunately this is a little tricky, either |
|---|
| 2365 | + * |
|---|
| 2366 | + * 1) We got here and our page had already been dealt with and |
|---|
| 2367 | + * we reserved our space, thus ret == 0, so we need to just |
|---|
| 2368 | + * drop our space reservation and bail. This can happen the |
|---|
| 2369 | + * first time we come into the fixup worker, or could happen |
|---|
| 2370 | + * while waiting for the ordered extent. |
|---|
| 2371 | + * 2) Our page was already dealt with, but we happened to get an |
|---|
| 2372 | + * ENOSPC above from the btrfs_delalloc_reserve_space. In |
|---|
| 2373 | + * this case we obviously don't have anything to release, but |
|---|
| 2374 | + * because the page was already dealt with we don't want to |
|---|
| 2375 | + * mark the page with an error, so make sure we're resetting |
|---|
| 2376 | + * ret to 0. This is why we have this check _before_ the ret |
|---|
| 2377 | + * check, because we do not want to have a surprise ENOSPC |
|---|
| 2378 | + * when the page was already properly dealt with. |
|---|
| 2379 | + */ |
|---|
| 2380 | + if (!ret) { |
|---|
| 2381 | + btrfs_delalloc_release_extents(inode, PAGE_SIZE); |
|---|
| 2382 | + btrfs_delalloc_release_space(inode, data_reserved, |
|---|
| 2383 | + page_start, PAGE_SIZE, |
|---|
| 2384 | + true); |
|---|
| 2385 | + } |
|---|
| 2386 | + ret = 0; |
|---|
| 2387 | + goto out_page; |
|---|
| 2388 | + } |
|---|
| 2389 | + |
|---|
| 2390 | + /* |
|---|
| 2391 | + * We can't mess with the page state unless it is locked, so now that |
|---|
| 2392 | + * it is locked bail if we failed to make our space reservation. |
|---|
| 2393 | + */ |
|---|
| 2394 | + if (ret) |
|---|
| 2395 | + goto out_page; |
|---|
| 2396 | + |
|---|
| 2397 | + lock_extent_bits(&inode->io_tree, page_start, page_end, &cached_state); |
|---|
| 2157 | 2398 | |
|---|
| 2158 | 2399 | /* already ordered? We're done */ |
|---|
| 2159 | 2400 | if (PagePrivate2(page)) |
|---|
| 2160 | | - goto out; |
|---|
| 2401 | + goto out_reserved; |
|---|
| 2161 | 2402 | |
|---|
| 2162 | | - ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start, |
|---|
| 2163 | | - PAGE_SIZE); |
|---|
| 2403 | + ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE); |
|---|
| 2164 | 2404 | if (ordered) { |
|---|
| 2165 | | - unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, |
|---|
| 2166 | | - page_end, &cached_state); |
|---|
| 2405 | + unlock_extent_cached(&inode->io_tree, page_start, page_end, |
|---|
| 2406 | + &cached_state); |
|---|
| 2167 | 2407 | unlock_page(page); |
|---|
| 2168 | | - btrfs_start_ordered_extent(inode, ordered, 1); |
|---|
| 2408 | + btrfs_start_ordered_extent(ordered, 1); |
|---|
| 2169 | 2409 | btrfs_put_ordered_extent(ordered); |
|---|
| 2170 | 2410 | goto again; |
|---|
| 2171 | 2411 | } |
|---|
| 2172 | 2412 | |
|---|
| 2173 | | - ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start, |
|---|
| 2174 | | - PAGE_SIZE); |
|---|
| 2175 | | - if (ret) { |
|---|
| 2176 | | - mapping_set_error(page->mapping, ret); |
|---|
| 2177 | | - end_extent_writepage(page, ret, page_start, page_end); |
|---|
| 2178 | | - ClearPageChecked(page); |
|---|
| 2179 | | - goto out; |
|---|
| 2180 | | - } |
|---|
| 2181 | | - |
|---|
| 2182 | 2413 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0, |
|---|
| 2183 | | - &cached_state, 0); |
|---|
| 2184 | | - if (ret) { |
|---|
| 2185 | | - mapping_set_error(page->mapping, ret); |
|---|
| 2186 | | - end_extent_writepage(page, ret, page_start, page_end); |
|---|
| 2187 | | - ClearPageChecked(page); |
|---|
| 2188 | | - goto out_reserved; |
|---|
| 2189 | | - } |
|---|
| 2190 | | - |
|---|
| 2191 | | - ClearPageChecked(page); |
|---|
| 2192 | | - set_page_dirty(page); |
|---|
| 2193 | | -out_reserved: |
|---|
| 2194 | | - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); |
|---|
| 2414 | + &cached_state); |
|---|
| 2195 | 2415 | if (ret) |
|---|
| 2416 | + goto out_reserved; |
|---|
| 2417 | + |
|---|
| 2418 | + /* |
|---|
| 2419 | + * Everything went as planned, we're now the owner of a dirty page with |
|---|
| 2420 | + * delayed allocation bits set and space reserved for our COW |
|---|
| 2421 | + * destination. |
|---|
| 2422 | + * |
|---|
| 2423 | + * The page was dirty when we started, nothing should have cleaned it. |
|---|
| 2424 | + */ |
|---|
| 2425 | + BUG_ON(!PageDirty(page)); |
|---|
| 2426 | + free_delalloc_space = false; |
|---|
| 2427 | +out_reserved: |
|---|
| 2428 | + btrfs_delalloc_release_extents(inode, PAGE_SIZE); |
|---|
| 2429 | + if (free_delalloc_space) |
|---|
| 2196 | 2430 | btrfs_delalloc_release_space(inode, data_reserved, page_start, |
|---|
| 2197 | 2431 | PAGE_SIZE, true); |
|---|
| 2198 | | -out: |
|---|
| 2199 | | - unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, |
|---|
| 2432 | + unlock_extent_cached(&inode->io_tree, page_start, page_end, |
|---|
| 2200 | 2433 | &cached_state); |
|---|
| 2201 | 2434 | out_page: |
|---|
| 2435 | + if (ret) { |
|---|
| 2436 | + /* |
|---|
| 2437 | + * We hit ENOSPC or other errors. Update the mapping and page |
|---|
| 2438 | + * to reflect the errors and clean the page. |
|---|
| 2439 | + */ |
|---|
| 2440 | + mapping_set_error(page->mapping, ret); |
|---|
| 2441 | + end_extent_writepage(page, ret, page_start, page_end); |
|---|
| 2442 | + clear_page_dirty_for_io(page); |
|---|
| 2443 | + SetPageError(page); |
|---|
| 2444 | + } |
|---|
| 2445 | + ClearPageChecked(page); |
|---|
| 2202 | 2446 | unlock_page(page); |
|---|
| 2203 | 2447 | put_page(page); |
|---|
| 2204 | 2448 | kfree(fixup); |
|---|
| 2205 | 2449 | extent_changeset_free(data_reserved); |
|---|
| 2450 | + /* |
|---|
| 2451 | + * As a precaution, do a delayed iput in case it would be the last iput |
|---|
| 2452 | + * that could need flushing space. Recursing back to fixup worker would |
|---|
| 2453 | + * deadlock. |
|---|
| 2454 | + */ |
|---|
| 2455 | + btrfs_add_delayed_iput(&inode->vfs_inode); |
|---|
| 2206 | 2456 | } |
|---|
| 2207 | 2457 | |
|---|
| 2208 | 2458 | /* |
|---|
| .. | .. |
|---|
| 2216 | 2466 | * to fix it up. The async helper will wait for ordered extents, set |
|---|
| 2217 | 2467 | * the delalloc bit and make it safe to write the page. |
|---|
| 2218 | 2468 | */ |
|---|
| 2219 | | -static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) |
|---|
| 2469 | +int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end) |
|---|
| 2220 | 2470 | { |
|---|
| 2221 | 2471 | struct inode *inode = page->mapping->host; |
|---|
| 2222 | 2472 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| .. | .. |
|---|
| 2226 | 2476 | if (TestClearPagePrivate2(page)) |
|---|
| 2227 | 2477 | return 0; |
|---|
| 2228 | 2478 | |
|---|
| 2479 | + /* |
|---|
| 2480 | + * PageChecked is set below when we create a fixup worker for this page, |
|---|
| 2481 | + * don't try to create another one if we're already PageChecked() |
|---|
| 2482 | + * |
|---|
| 2483 | + * The extent_io writepage code will redirty the page if we send back |
|---|
| 2484 | + * EAGAIN. |
|---|
| 2485 | + */ |
|---|
| 2229 | 2486 | if (PageChecked(page)) |
|---|
| 2230 | 2487 | return -EAGAIN; |
|---|
| 2231 | 2488 | |
|---|
| .. | .. |
|---|
| 2233 | 2490 | if (!fixup) |
|---|
| 2234 | 2491 | return -EAGAIN; |
|---|
| 2235 | 2492 | |
|---|
| 2493 | + /* |
|---|
| 2494 | + * We are already holding a reference to this inode from |
|---|
| 2495 | + * write_cache_pages. We need to hold it because the space reservation |
|---|
| 2496 | + * takes place outside of the page lock, and we can't trust |
|---|
| 2497 | + * page->mapping outside of the page lock. |
|---|
| 2498 | + */ |
|---|
| 2499 | + ihold(inode); |
|---|
| 2236 | 2500 | SetPageChecked(page); |
|---|
| 2237 | 2501 | get_page(page); |
|---|
| 2238 | | - btrfs_init_work(&fixup->work, btrfs_fixup_helper, |
|---|
| 2239 | | - btrfs_writepage_fixup_worker, NULL, NULL); |
|---|
| 2502 | + btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); |
|---|
| 2240 | 2503 | fixup->page = page; |
|---|
| 2504 | + fixup->inode = inode; |
|---|
| 2241 | 2505 | btrfs_queue_work(fs_info->fixup_workers, &fixup->work); |
|---|
| 2242 | | - return -EBUSY; |
|---|
| 2506 | + |
|---|
| 2507 | + return -EAGAIN; |
|---|
| 2243 | 2508 | } |
|---|
| 2244 | 2509 | |
|---|
| 2245 | 2510 | static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, |
|---|
| 2246 | | - struct inode *inode, u64 file_pos, |
|---|
| 2247 | | - u64 disk_bytenr, u64 disk_num_bytes, |
|---|
| 2248 | | - u64 num_bytes, u64 ram_bytes, |
|---|
| 2249 | | - u8 compression, u8 encryption, |
|---|
| 2250 | | - u16 other_encoding, int extent_type) |
|---|
| 2511 | + struct btrfs_inode *inode, u64 file_pos, |
|---|
| 2512 | + struct btrfs_file_extent_item *stack_fi, |
|---|
| 2513 | + u64 qgroup_reserved) |
|---|
| 2251 | 2514 | { |
|---|
| 2252 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 2253 | | - struct btrfs_file_extent_item *fi; |
|---|
| 2515 | + struct btrfs_root *root = inode->root; |
|---|
| 2254 | 2516 | struct btrfs_path *path; |
|---|
| 2255 | 2517 | struct extent_buffer *leaf; |
|---|
| 2256 | 2518 | struct btrfs_key ins; |
|---|
| 2257 | | - u64 qg_released; |
|---|
| 2519 | + u64 disk_num_bytes = btrfs_stack_file_extent_disk_num_bytes(stack_fi); |
|---|
| 2520 | + u64 disk_bytenr = btrfs_stack_file_extent_disk_bytenr(stack_fi); |
|---|
| 2521 | + u64 num_bytes = btrfs_stack_file_extent_num_bytes(stack_fi); |
|---|
| 2522 | + u64 ram_bytes = btrfs_stack_file_extent_ram_bytes(stack_fi); |
|---|
| 2258 | 2523 | int extent_inserted = 0; |
|---|
| 2259 | 2524 | int ret; |
|---|
| 2260 | 2525 | |
|---|
| .. | .. |
|---|
| 2273 | 2538 | */ |
|---|
| 2274 | 2539 | ret = __btrfs_drop_extents(trans, root, inode, path, file_pos, |
|---|
| 2275 | 2540 | file_pos + num_bytes, NULL, 0, |
|---|
| 2276 | | - 1, sizeof(*fi), &extent_inserted); |
|---|
| 2541 | + 1, sizeof(*stack_fi), &extent_inserted); |
|---|
| 2277 | 2542 | if (ret) |
|---|
| 2278 | 2543 | goto out; |
|---|
| 2279 | 2544 | |
|---|
| 2280 | 2545 | if (!extent_inserted) { |
|---|
| 2281 | | - ins.objectid = btrfs_ino(BTRFS_I(inode)); |
|---|
| 2546 | + ins.objectid = btrfs_ino(inode); |
|---|
| 2282 | 2547 | ins.offset = file_pos; |
|---|
| 2283 | 2548 | ins.type = BTRFS_EXTENT_DATA_KEY; |
|---|
| 2284 | 2549 | |
|---|
| 2285 | 2550 | path->leave_spinning = 1; |
|---|
| 2286 | 2551 | ret = btrfs_insert_empty_item(trans, root, path, &ins, |
|---|
| 2287 | | - sizeof(*fi)); |
|---|
| 2552 | + sizeof(*stack_fi)); |
|---|
| 2288 | 2553 | if (ret) |
|---|
| 2289 | 2554 | goto out; |
|---|
| 2290 | 2555 | } |
|---|
| 2291 | 2556 | leaf = path->nodes[0]; |
|---|
| 2292 | | - fi = btrfs_item_ptr(leaf, path->slots[0], |
|---|
| 2293 | | - struct btrfs_file_extent_item); |
|---|
| 2294 | | - btrfs_set_file_extent_generation(leaf, fi, trans->transid); |
|---|
| 2295 | | - btrfs_set_file_extent_type(leaf, fi, extent_type); |
|---|
| 2296 | | - btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr); |
|---|
| 2297 | | - btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_num_bytes); |
|---|
| 2298 | | - btrfs_set_file_extent_offset(leaf, fi, 0); |
|---|
| 2299 | | - btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); |
|---|
| 2300 | | - btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes); |
|---|
| 2301 | | - btrfs_set_file_extent_compression(leaf, fi, compression); |
|---|
| 2302 | | - btrfs_set_file_extent_encryption(leaf, fi, encryption); |
|---|
| 2303 | | - btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); |
|---|
| 2557 | + btrfs_set_stack_file_extent_generation(stack_fi, trans->transid); |
|---|
| 2558 | + write_extent_buffer(leaf, stack_fi, |
|---|
| 2559 | + btrfs_item_ptr_offset(leaf, path->slots[0]), |
|---|
| 2560 | + sizeof(struct btrfs_file_extent_item)); |
|---|
| 2304 | 2561 | |
|---|
| 2305 | 2562 | btrfs_mark_buffer_dirty(leaf); |
|---|
| 2306 | 2563 | btrfs_release_path(path); |
|---|
| 2307 | 2564 | |
|---|
| 2308 | | - inode_add_bytes(inode, num_bytes); |
|---|
| 2565 | + inode_add_bytes(&inode->vfs_inode, num_bytes); |
|---|
| 2309 | 2566 | |
|---|
| 2310 | 2567 | ins.objectid = disk_bytenr; |
|---|
| 2311 | 2568 | ins.offset = disk_num_bytes; |
|---|
| 2312 | 2569 | ins.type = BTRFS_EXTENT_ITEM_KEY; |
|---|
| 2313 | 2570 | |
|---|
| 2314 | | - /* |
|---|
| 2315 | | - * Release the reserved range from inode dirty range map, as it is |
|---|
| 2316 | | - * already moved into delayed_ref_head |
|---|
| 2317 | | - */ |
|---|
| 2318 | | - ret = btrfs_qgroup_release_data(inode, file_pos, ram_bytes); |
|---|
| 2319 | | - if (ret < 0) |
|---|
| 2320 | | - goto out; |
|---|
| 2321 | | - qg_released = ret; |
|---|
| 2322 | | - ret = btrfs_alloc_reserved_file_extent(trans, root, |
|---|
| 2323 | | - btrfs_ino(BTRFS_I(inode)), |
|---|
| 2324 | | - file_pos, qg_released, &ins); |
|---|
| 2325 | | -out: |
|---|
| 2326 | | - btrfs_free_path(path); |
|---|
| 2327 | | - |
|---|
| 2328 | | - return ret; |
|---|
| 2329 | | -} |
|---|
| 2330 | | - |
|---|
| 2331 | | -/* snapshot-aware defrag */ |
|---|
| 2332 | | -struct sa_defrag_extent_backref { |
|---|
| 2333 | | - struct rb_node node; |
|---|
| 2334 | | - struct old_sa_defrag_extent *old; |
|---|
| 2335 | | - u64 root_id; |
|---|
| 2336 | | - u64 inum; |
|---|
| 2337 | | - u64 file_pos; |
|---|
| 2338 | | - u64 extent_offset; |
|---|
| 2339 | | - u64 num_bytes; |
|---|
| 2340 | | - u64 generation; |
|---|
| 2341 | | -}; |
|---|
| 2342 | | - |
|---|
| 2343 | | -struct old_sa_defrag_extent { |
|---|
| 2344 | | - struct list_head list; |
|---|
| 2345 | | - struct new_sa_defrag_extent *new; |
|---|
| 2346 | | - |
|---|
| 2347 | | - u64 extent_offset; |
|---|
| 2348 | | - u64 bytenr; |
|---|
| 2349 | | - u64 offset; |
|---|
| 2350 | | - u64 len; |
|---|
| 2351 | | - int count; |
|---|
| 2352 | | -}; |
|---|
| 2353 | | - |
|---|
| 2354 | | -struct new_sa_defrag_extent { |
|---|
| 2355 | | - struct rb_root root; |
|---|
| 2356 | | - struct list_head head; |
|---|
| 2357 | | - struct btrfs_path *path; |
|---|
| 2358 | | - struct inode *inode; |
|---|
| 2359 | | - u64 file_pos; |
|---|
| 2360 | | - u64 len; |
|---|
| 2361 | | - u64 bytenr; |
|---|
| 2362 | | - u64 disk_len; |
|---|
| 2363 | | - u8 compress_type; |
|---|
| 2364 | | -}; |
|---|
| 2365 | | - |
|---|
| 2366 | | -static int backref_comp(struct sa_defrag_extent_backref *b1, |
|---|
| 2367 | | - struct sa_defrag_extent_backref *b2) |
|---|
| 2368 | | -{ |
|---|
| 2369 | | - if (b1->root_id < b2->root_id) |
|---|
| 2370 | | - return -1; |
|---|
| 2371 | | - else if (b1->root_id > b2->root_id) |
|---|
| 2372 | | - return 1; |
|---|
| 2373 | | - |
|---|
| 2374 | | - if (b1->inum < b2->inum) |
|---|
| 2375 | | - return -1; |
|---|
| 2376 | | - else if (b1->inum > b2->inum) |
|---|
| 2377 | | - return 1; |
|---|
| 2378 | | - |
|---|
| 2379 | | - if (b1->file_pos < b2->file_pos) |
|---|
| 2380 | | - return -1; |
|---|
| 2381 | | - else if (b1->file_pos > b2->file_pos) |
|---|
| 2382 | | - return 1; |
|---|
| 2383 | | - |
|---|
| 2384 | | - /* |
|---|
| 2385 | | - * [------------------------------] ===> (a range of space) |
|---|
| 2386 | | - * |<--->| |<---->| =============> (fs/file tree A) |
|---|
| 2387 | | - * |<---------------------------->| ===> (fs/file tree B) |
|---|
| 2388 | | - * |
|---|
| 2389 | | - * A range of space can refer to two file extents in one tree while |
|---|
| 2390 | | - * refer to only one file extent in another tree. |
|---|
| 2391 | | - * |
|---|
| 2392 | | - * So we may process a disk offset more than one time(two extents in A) |
|---|
| 2393 | | - * and locate at the same extent(one extent in B), then insert two same |
|---|
| 2394 | | - * backrefs(both refer to the extent in B). |
|---|
| 2395 | | - */ |
|---|
| 2396 | | - return 0; |
|---|
| 2397 | | -} |
|---|
| 2398 | | - |
|---|
| 2399 | | -static void backref_insert(struct rb_root *root, |
|---|
| 2400 | | - struct sa_defrag_extent_backref *backref) |
|---|
| 2401 | | -{ |
|---|
| 2402 | | - struct rb_node **p = &root->rb_node; |
|---|
| 2403 | | - struct rb_node *parent = NULL; |
|---|
| 2404 | | - struct sa_defrag_extent_backref *entry; |
|---|
| 2405 | | - int ret; |
|---|
| 2406 | | - |
|---|
| 2407 | | - while (*p) { |
|---|
| 2408 | | - parent = *p; |
|---|
| 2409 | | - entry = rb_entry(parent, struct sa_defrag_extent_backref, node); |
|---|
| 2410 | | - |
|---|
| 2411 | | - ret = backref_comp(backref, entry); |
|---|
| 2412 | | - if (ret < 0) |
|---|
| 2413 | | - p = &(*p)->rb_left; |
|---|
| 2414 | | - else |
|---|
| 2415 | | - p = &(*p)->rb_right; |
|---|
| 2416 | | - } |
|---|
| 2417 | | - |
|---|
| 2418 | | - rb_link_node(&backref->node, parent, p); |
|---|
| 2419 | | - rb_insert_color(&backref->node, root); |
|---|
| 2420 | | -} |
|---|
| 2421 | | - |
|---|
| 2422 | | -/* |
|---|
| 2423 | | - * Note the backref might has changed, and in this case we just return 0. |
|---|
| 2424 | | - */ |
|---|
| 2425 | | -static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, |
|---|
| 2426 | | - void *ctx) |
|---|
| 2427 | | -{ |
|---|
| 2428 | | - struct btrfs_file_extent_item *extent; |
|---|
| 2429 | | - struct old_sa_defrag_extent *old = ctx; |
|---|
| 2430 | | - struct new_sa_defrag_extent *new = old->new; |
|---|
| 2431 | | - struct btrfs_path *path = new->path; |
|---|
| 2432 | | - struct btrfs_key key; |
|---|
| 2433 | | - struct btrfs_root *root; |
|---|
| 2434 | | - struct sa_defrag_extent_backref *backref; |
|---|
| 2435 | | - struct extent_buffer *leaf; |
|---|
| 2436 | | - struct inode *inode = new->inode; |
|---|
| 2437 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 2438 | | - int slot; |
|---|
| 2439 | | - int ret; |
|---|
| 2440 | | - u64 extent_offset; |
|---|
| 2441 | | - u64 num_bytes; |
|---|
| 2442 | | - |
|---|
| 2443 | | - if (BTRFS_I(inode)->root->root_key.objectid == root_id && |
|---|
| 2444 | | - inum == btrfs_ino(BTRFS_I(inode))) |
|---|
| 2445 | | - return 0; |
|---|
| 2446 | | - |
|---|
| 2447 | | - key.objectid = root_id; |
|---|
| 2448 | | - key.type = BTRFS_ROOT_ITEM_KEY; |
|---|
| 2449 | | - key.offset = (u64)-1; |
|---|
| 2450 | | - |
|---|
| 2451 | | - root = btrfs_read_fs_root_no_name(fs_info, &key); |
|---|
| 2452 | | - if (IS_ERR(root)) { |
|---|
| 2453 | | - if (PTR_ERR(root) == -ENOENT) |
|---|
| 2454 | | - return 0; |
|---|
| 2455 | | - WARN_ON(1); |
|---|
| 2456 | | - btrfs_debug(fs_info, "inum=%llu, offset=%llu, root_id=%llu", |
|---|
| 2457 | | - inum, offset, root_id); |
|---|
| 2458 | | - return PTR_ERR(root); |
|---|
| 2459 | | - } |
|---|
| 2460 | | - |
|---|
| 2461 | | - key.objectid = inum; |
|---|
| 2462 | | - key.type = BTRFS_EXTENT_DATA_KEY; |
|---|
| 2463 | | - if (offset > (u64)-1 << 32) |
|---|
| 2464 | | - key.offset = 0; |
|---|
| 2465 | | - else |
|---|
| 2466 | | - key.offset = offset; |
|---|
| 2467 | | - |
|---|
| 2468 | | - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
|---|
| 2469 | | - if (WARN_ON(ret < 0)) |
|---|
| 2470 | | - return ret; |
|---|
| 2471 | | - ret = 0; |
|---|
| 2472 | | - |
|---|
| 2473 | | - while (1) { |
|---|
| 2474 | | - cond_resched(); |
|---|
| 2475 | | - |
|---|
| 2476 | | - leaf = path->nodes[0]; |
|---|
| 2477 | | - slot = path->slots[0]; |
|---|
| 2478 | | - |
|---|
| 2479 | | - if (slot >= btrfs_header_nritems(leaf)) { |
|---|
| 2480 | | - ret = btrfs_next_leaf(root, path); |
|---|
| 2481 | | - if (ret < 0) { |
|---|
| 2482 | | - goto out; |
|---|
| 2483 | | - } else if (ret > 0) { |
|---|
| 2484 | | - ret = 0; |
|---|
| 2485 | | - goto out; |
|---|
| 2486 | | - } |
|---|
| 2487 | | - continue; |
|---|
| 2488 | | - } |
|---|
| 2489 | | - |
|---|
| 2490 | | - path->slots[0]++; |
|---|
| 2491 | | - |
|---|
| 2492 | | - btrfs_item_key_to_cpu(leaf, &key, slot); |
|---|
| 2493 | | - |
|---|
| 2494 | | - if (key.objectid > inum) |
|---|
| 2495 | | - goto out; |
|---|
| 2496 | | - |
|---|
| 2497 | | - if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY) |
|---|
| 2498 | | - continue; |
|---|
| 2499 | | - |
|---|
| 2500 | | - extent = btrfs_item_ptr(leaf, slot, |
|---|
| 2501 | | - struct btrfs_file_extent_item); |
|---|
| 2502 | | - |
|---|
| 2503 | | - if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr) |
|---|
| 2504 | | - continue; |
|---|
| 2505 | | - |
|---|
| 2506 | | - /* |
|---|
| 2507 | | - * 'offset' refers to the exact key.offset, |
|---|
| 2508 | | - * NOT the 'offset' field in btrfs_extent_data_ref, ie. |
|---|
| 2509 | | - * (key.offset - extent_offset). |
|---|
| 2510 | | - */ |
|---|
| 2511 | | - if (key.offset != offset) |
|---|
| 2512 | | - continue; |
|---|
| 2513 | | - |
|---|
| 2514 | | - extent_offset = btrfs_file_extent_offset(leaf, extent); |
|---|
| 2515 | | - num_bytes = btrfs_file_extent_num_bytes(leaf, extent); |
|---|
| 2516 | | - |
|---|
| 2517 | | - if (extent_offset >= old->extent_offset + old->offset + |
|---|
| 2518 | | - old->len || extent_offset + num_bytes <= |
|---|
| 2519 | | - old->extent_offset + old->offset) |
|---|
| 2520 | | - continue; |
|---|
| 2521 | | - break; |
|---|
| 2522 | | - } |
|---|
| 2523 | | - |
|---|
| 2524 | | - backref = kmalloc(sizeof(*backref), GFP_NOFS); |
|---|
| 2525 | | - if (!backref) { |
|---|
| 2526 | | - ret = -ENOENT; |
|---|
| 2527 | | - goto out; |
|---|
| 2528 | | - } |
|---|
| 2529 | | - |
|---|
| 2530 | | - backref->root_id = root_id; |
|---|
| 2531 | | - backref->inum = inum; |
|---|
| 2532 | | - backref->file_pos = offset; |
|---|
| 2533 | | - backref->num_bytes = num_bytes; |
|---|
| 2534 | | - backref->extent_offset = extent_offset; |
|---|
| 2535 | | - backref->generation = btrfs_file_extent_generation(leaf, extent); |
|---|
| 2536 | | - backref->old = old; |
|---|
| 2537 | | - backref_insert(&new->root, backref); |
|---|
| 2538 | | - old->count++; |
|---|
| 2539 | | -out: |
|---|
| 2540 | | - btrfs_release_path(path); |
|---|
| 2541 | | - WARN_ON(ret); |
|---|
| 2542 | | - return ret; |
|---|
| 2543 | | -} |
|---|
| 2544 | | - |
|---|
| 2545 | | -static noinline bool record_extent_backrefs(struct btrfs_path *path, |
|---|
| 2546 | | - struct new_sa_defrag_extent *new) |
|---|
| 2547 | | -{ |
|---|
| 2548 | | - struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb); |
|---|
| 2549 | | - struct old_sa_defrag_extent *old, *tmp; |
|---|
| 2550 | | - int ret; |
|---|
| 2551 | | - |
|---|
| 2552 | | - new->path = path; |
|---|
| 2553 | | - |
|---|
| 2554 | | - list_for_each_entry_safe(old, tmp, &new->head, list) { |
|---|
| 2555 | | - ret = iterate_inodes_from_logical(old->bytenr + |
|---|
| 2556 | | - old->extent_offset, fs_info, |
|---|
| 2557 | | - path, record_one_backref, |
|---|
| 2558 | | - old, false); |
|---|
| 2559 | | - if (ret < 0 && ret != -ENOENT) |
|---|
| 2560 | | - return false; |
|---|
| 2561 | | - |
|---|
| 2562 | | - /* no backref to be processed for this extent */ |
|---|
| 2563 | | - if (!old->count) { |
|---|
| 2564 | | - list_del(&old->list); |
|---|
| 2565 | | - kfree(old); |
|---|
| 2566 | | - } |
|---|
| 2567 | | - } |
|---|
| 2568 | | - |
|---|
| 2569 | | - if (list_empty(&new->head)) |
|---|
| 2570 | | - return false; |
|---|
| 2571 | | - |
|---|
| 2572 | | - return true; |
|---|
| 2573 | | -} |
|---|
| 2574 | | - |
|---|
| 2575 | | -static int relink_is_mergable(struct extent_buffer *leaf, |
|---|
| 2576 | | - struct btrfs_file_extent_item *fi, |
|---|
| 2577 | | - struct new_sa_defrag_extent *new) |
|---|
| 2578 | | -{ |
|---|
| 2579 | | - if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr) |
|---|
| 2580 | | - return 0; |
|---|
| 2581 | | - |
|---|
| 2582 | | - if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) |
|---|
| 2583 | | - return 0; |
|---|
| 2584 | | - |
|---|
| 2585 | | - if (btrfs_file_extent_compression(leaf, fi) != new->compress_type) |
|---|
| 2586 | | - return 0; |
|---|
| 2587 | | - |
|---|
| 2588 | | - if (btrfs_file_extent_encryption(leaf, fi) || |
|---|
| 2589 | | - btrfs_file_extent_other_encoding(leaf, fi)) |
|---|
| 2590 | | - return 0; |
|---|
| 2591 | | - |
|---|
| 2592 | | - return 1; |
|---|
| 2593 | | -} |
|---|
| 2594 | | - |
|---|
| 2595 | | -/* |
|---|
| 2596 | | - * Note the backref might has changed, and in this case we just return 0. |
|---|
| 2597 | | - */ |
|---|
| 2598 | | -static noinline int relink_extent_backref(struct btrfs_path *path, |
|---|
| 2599 | | - struct sa_defrag_extent_backref *prev, |
|---|
| 2600 | | - struct sa_defrag_extent_backref *backref) |
|---|
| 2601 | | -{ |
|---|
| 2602 | | - struct btrfs_file_extent_item *extent; |
|---|
| 2603 | | - struct btrfs_file_extent_item *item; |
|---|
| 2604 | | - struct btrfs_ordered_extent *ordered; |
|---|
| 2605 | | - struct btrfs_trans_handle *trans; |
|---|
| 2606 | | - struct btrfs_root *root; |
|---|
| 2607 | | - struct btrfs_key key; |
|---|
| 2608 | | - struct extent_buffer *leaf; |
|---|
| 2609 | | - struct old_sa_defrag_extent *old = backref->old; |
|---|
| 2610 | | - struct new_sa_defrag_extent *new = old->new; |
|---|
| 2611 | | - struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb); |
|---|
| 2612 | | - struct inode *inode; |
|---|
| 2613 | | - struct extent_state *cached = NULL; |
|---|
| 2614 | | - int ret = 0; |
|---|
| 2615 | | - u64 start; |
|---|
| 2616 | | - u64 len; |
|---|
| 2617 | | - u64 lock_start; |
|---|
| 2618 | | - u64 lock_end; |
|---|
| 2619 | | - bool merge = false; |
|---|
| 2620 | | - int index; |
|---|
| 2621 | | - |
|---|
| 2622 | | - if (prev && prev->root_id == backref->root_id && |
|---|
| 2623 | | - prev->inum == backref->inum && |
|---|
| 2624 | | - prev->file_pos + prev->num_bytes == backref->file_pos) |
|---|
| 2625 | | - merge = true; |
|---|
| 2626 | | - |
|---|
| 2627 | | - /* step 1: get root */ |
|---|
| 2628 | | - key.objectid = backref->root_id; |
|---|
| 2629 | | - key.type = BTRFS_ROOT_ITEM_KEY; |
|---|
| 2630 | | - key.offset = (u64)-1; |
|---|
| 2631 | | - |
|---|
| 2632 | | - index = srcu_read_lock(&fs_info->subvol_srcu); |
|---|
| 2633 | | - |
|---|
| 2634 | | - root = btrfs_read_fs_root_no_name(fs_info, &key); |
|---|
| 2635 | | - if (IS_ERR(root)) { |
|---|
| 2636 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
|---|
| 2637 | | - if (PTR_ERR(root) == -ENOENT) |
|---|
| 2638 | | - return 0; |
|---|
| 2639 | | - return PTR_ERR(root); |
|---|
| 2640 | | - } |
|---|
| 2641 | | - |
|---|
| 2642 | | - if (btrfs_root_readonly(root)) { |
|---|
| 2643 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
|---|
| 2644 | | - return 0; |
|---|
| 2645 | | - } |
|---|
| 2646 | | - |
|---|
| 2647 | | - /* step 2: get inode */ |
|---|
| 2648 | | - key.objectid = backref->inum; |
|---|
| 2649 | | - key.type = BTRFS_INODE_ITEM_KEY; |
|---|
| 2650 | | - key.offset = 0; |
|---|
| 2651 | | - |
|---|
| 2652 | | - inode = btrfs_iget(fs_info->sb, &key, root, NULL); |
|---|
| 2653 | | - if (IS_ERR(inode)) { |
|---|
| 2654 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
|---|
| 2655 | | - return 0; |
|---|
| 2656 | | - } |
|---|
| 2657 | | - |
|---|
| 2658 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
|---|
| 2659 | | - |
|---|
| 2660 | | - /* step 3: relink backref */ |
|---|
| 2661 | | - lock_start = backref->file_pos; |
|---|
| 2662 | | - lock_end = backref->file_pos + backref->num_bytes - 1; |
|---|
| 2663 | | - lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end, |
|---|
| 2664 | | - &cached); |
|---|
| 2665 | | - |
|---|
| 2666 | | - ordered = btrfs_lookup_first_ordered_extent(inode, lock_end); |
|---|
| 2667 | | - if (ordered) { |
|---|
| 2668 | | - btrfs_put_ordered_extent(ordered); |
|---|
| 2669 | | - goto out_unlock; |
|---|
| 2670 | | - } |
|---|
| 2671 | | - |
|---|
| 2672 | | - trans = btrfs_join_transaction(root); |
|---|
| 2673 | | - if (IS_ERR(trans)) { |
|---|
| 2674 | | - ret = PTR_ERR(trans); |
|---|
| 2675 | | - goto out_unlock; |
|---|
| 2676 | | - } |
|---|
| 2677 | | - |
|---|
| 2678 | | - key.objectid = backref->inum; |
|---|
| 2679 | | - key.type = BTRFS_EXTENT_DATA_KEY; |
|---|
| 2680 | | - key.offset = backref->file_pos; |
|---|
| 2681 | | - |
|---|
| 2682 | | - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
|---|
| 2683 | | - if (ret < 0) { |
|---|
| 2684 | | - goto out_free_path; |
|---|
| 2685 | | - } else if (ret > 0) { |
|---|
| 2686 | | - ret = 0; |
|---|
| 2687 | | - goto out_free_path; |
|---|
| 2688 | | - } |
|---|
| 2689 | | - |
|---|
| 2690 | | - extent = btrfs_item_ptr(path->nodes[0], path->slots[0], |
|---|
| 2691 | | - struct btrfs_file_extent_item); |
|---|
| 2692 | | - |
|---|
| 2693 | | - if (btrfs_file_extent_generation(path->nodes[0], extent) != |
|---|
| 2694 | | - backref->generation) |
|---|
| 2695 | | - goto out_free_path; |
|---|
| 2696 | | - |
|---|
| 2697 | | - btrfs_release_path(path); |
|---|
| 2698 | | - |
|---|
| 2699 | | - start = backref->file_pos; |
|---|
| 2700 | | - if (backref->extent_offset < old->extent_offset + old->offset) |
|---|
| 2701 | | - start += old->extent_offset + old->offset - |
|---|
| 2702 | | - backref->extent_offset; |
|---|
| 2703 | | - |
|---|
| 2704 | | - len = min(backref->extent_offset + backref->num_bytes, |
|---|
| 2705 | | - old->extent_offset + old->offset + old->len); |
|---|
| 2706 | | - len -= max(backref->extent_offset, old->extent_offset + old->offset); |
|---|
| 2707 | | - |
|---|
| 2708 | | - ret = btrfs_drop_extents(trans, root, inode, start, |
|---|
| 2709 | | - start + len, 1); |
|---|
| 2571 | + ret = btrfs_inode_set_file_extent_range(inode, file_pos, ram_bytes); |
|---|
| 2710 | 2572 | if (ret) |
|---|
| 2711 | | - goto out_free_path; |
|---|
| 2712 | | -again: |
|---|
| 2713 | | - key.objectid = btrfs_ino(BTRFS_I(inode)); |
|---|
| 2714 | | - key.type = BTRFS_EXTENT_DATA_KEY; |
|---|
| 2715 | | - key.offset = start; |
|---|
| 2716 | | - |
|---|
| 2717 | | - path->leave_spinning = 1; |
|---|
| 2718 | | - if (merge) { |
|---|
| 2719 | | - struct btrfs_file_extent_item *fi; |
|---|
| 2720 | | - u64 extent_len; |
|---|
| 2721 | | - struct btrfs_key found_key; |
|---|
| 2722 | | - |
|---|
| 2723 | | - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); |
|---|
| 2724 | | - if (ret < 0) |
|---|
| 2725 | | - goto out_free_path; |
|---|
| 2726 | | - |
|---|
| 2727 | | - path->slots[0]--; |
|---|
| 2728 | | - leaf = path->nodes[0]; |
|---|
| 2729 | | - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
|---|
| 2730 | | - |
|---|
| 2731 | | - fi = btrfs_item_ptr(leaf, path->slots[0], |
|---|
| 2732 | | - struct btrfs_file_extent_item); |
|---|
| 2733 | | - extent_len = btrfs_file_extent_num_bytes(leaf, fi); |
|---|
| 2734 | | - |
|---|
| 2735 | | - if (extent_len + found_key.offset == start && |
|---|
| 2736 | | - relink_is_mergable(leaf, fi, new)) { |
|---|
| 2737 | | - btrfs_set_file_extent_num_bytes(leaf, fi, |
|---|
| 2738 | | - extent_len + len); |
|---|
| 2739 | | - btrfs_mark_buffer_dirty(leaf); |
|---|
| 2740 | | - inode_add_bytes(inode, len); |
|---|
| 2741 | | - |
|---|
| 2742 | | - ret = 1; |
|---|
| 2743 | | - goto out_free_path; |
|---|
| 2744 | | - } else { |
|---|
| 2745 | | - merge = false; |
|---|
| 2746 | | - btrfs_release_path(path); |
|---|
| 2747 | | - goto again; |
|---|
| 2748 | | - } |
|---|
| 2749 | | - } |
|---|
| 2750 | | - |
|---|
| 2751 | | - ret = btrfs_insert_empty_item(trans, root, path, &key, |
|---|
| 2752 | | - sizeof(*extent)); |
|---|
| 2753 | | - if (ret) { |
|---|
| 2754 | | - btrfs_abort_transaction(trans, ret); |
|---|
| 2755 | | - goto out_free_path; |
|---|
| 2756 | | - } |
|---|
| 2757 | | - |
|---|
| 2758 | | - leaf = path->nodes[0]; |
|---|
| 2759 | | - item = btrfs_item_ptr(leaf, path->slots[0], |
|---|
| 2760 | | - struct btrfs_file_extent_item); |
|---|
| 2761 | | - btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr); |
|---|
| 2762 | | - btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len); |
|---|
| 2763 | | - btrfs_set_file_extent_offset(leaf, item, start - new->file_pos); |
|---|
| 2764 | | - btrfs_set_file_extent_num_bytes(leaf, item, len); |
|---|
| 2765 | | - btrfs_set_file_extent_ram_bytes(leaf, item, new->len); |
|---|
| 2766 | | - btrfs_set_file_extent_generation(leaf, item, trans->transid); |
|---|
| 2767 | | - btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); |
|---|
| 2768 | | - btrfs_set_file_extent_compression(leaf, item, new->compress_type); |
|---|
| 2769 | | - btrfs_set_file_extent_encryption(leaf, item, 0); |
|---|
| 2770 | | - btrfs_set_file_extent_other_encoding(leaf, item, 0); |
|---|
| 2771 | | - |
|---|
| 2772 | | - btrfs_mark_buffer_dirty(leaf); |
|---|
| 2773 | | - inode_add_bytes(inode, len); |
|---|
| 2774 | | - btrfs_release_path(path); |
|---|
| 2775 | | - |
|---|
| 2776 | | - ret = btrfs_inc_extent_ref(trans, root, new->bytenr, |
|---|
| 2777 | | - new->disk_len, 0, |
|---|
| 2778 | | - backref->root_id, backref->inum, |
|---|
| 2779 | | - new->file_pos); /* start - extent_offset */ |
|---|
| 2780 | | - if (ret) { |
|---|
| 2781 | | - btrfs_abort_transaction(trans, ret); |
|---|
| 2782 | | - goto out_free_path; |
|---|
| 2783 | | - } |
|---|
| 2784 | | - |
|---|
| 2785 | | - ret = 1; |
|---|
| 2786 | | -out_free_path: |
|---|
| 2787 | | - btrfs_release_path(path); |
|---|
| 2788 | | - path->leave_spinning = 0; |
|---|
| 2789 | | - btrfs_end_transaction(trans); |
|---|
| 2790 | | -out_unlock: |
|---|
| 2791 | | - unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end, |
|---|
| 2792 | | - &cached); |
|---|
| 2793 | | - iput(inode); |
|---|
| 2794 | | - return ret; |
|---|
| 2795 | | -} |
|---|
| 2796 | | - |
|---|
| 2797 | | -static void free_sa_defrag_extent(struct new_sa_defrag_extent *new) |
|---|
| 2798 | | -{ |
|---|
| 2799 | | - struct old_sa_defrag_extent *old, *tmp; |
|---|
| 2800 | | - |
|---|
| 2801 | | - if (!new) |
|---|
| 2802 | | - return; |
|---|
| 2803 | | - |
|---|
| 2804 | | - list_for_each_entry_safe(old, tmp, &new->head, list) { |
|---|
| 2805 | | - kfree(old); |
|---|
| 2806 | | - } |
|---|
| 2807 | | - kfree(new); |
|---|
| 2808 | | -} |
|---|
| 2809 | | - |
|---|
| 2810 | | -static void relink_file_extents(struct new_sa_defrag_extent *new) |
|---|
| 2811 | | -{ |
|---|
| 2812 | | - struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb); |
|---|
| 2813 | | - struct btrfs_path *path; |
|---|
| 2814 | | - struct sa_defrag_extent_backref *backref; |
|---|
| 2815 | | - struct sa_defrag_extent_backref *prev = NULL; |
|---|
| 2816 | | - struct inode *inode; |
|---|
| 2817 | | - struct rb_node *node; |
|---|
| 2818 | | - int ret; |
|---|
| 2819 | | - |
|---|
| 2820 | | - inode = new->inode; |
|---|
| 2821 | | - |
|---|
| 2822 | | - path = btrfs_alloc_path(); |
|---|
| 2823 | | - if (!path) |
|---|
| 2824 | | - return; |
|---|
| 2825 | | - |
|---|
| 2826 | | - if (!record_extent_backrefs(path, new)) { |
|---|
| 2827 | | - btrfs_free_path(path); |
|---|
| 2828 | 2573 | goto out; |
|---|
| 2829 | | - } |
|---|
| 2830 | | - btrfs_release_path(path); |
|---|
| 2831 | 2574 | |
|---|
| 2832 | | - while (1) { |
|---|
| 2833 | | - node = rb_first(&new->root); |
|---|
| 2834 | | - if (!node) |
|---|
| 2835 | | - break; |
|---|
| 2836 | | - rb_erase(node, &new->root); |
|---|
| 2837 | | - |
|---|
| 2838 | | - backref = rb_entry(node, struct sa_defrag_extent_backref, node); |
|---|
| 2839 | | - |
|---|
| 2840 | | - ret = relink_extent_backref(path, prev, backref); |
|---|
| 2841 | | - WARN_ON(ret < 0); |
|---|
| 2842 | | - |
|---|
| 2843 | | - kfree(prev); |
|---|
| 2844 | | - |
|---|
| 2845 | | - if (ret == 1) |
|---|
| 2846 | | - prev = backref; |
|---|
| 2847 | | - else |
|---|
| 2848 | | - prev = NULL; |
|---|
| 2849 | | - cond_resched(); |
|---|
| 2850 | | - } |
|---|
| 2851 | | - kfree(prev); |
|---|
| 2852 | | - |
|---|
| 2853 | | - btrfs_free_path(path); |
|---|
| 2575 | + ret = btrfs_alloc_reserved_file_extent(trans, root, btrfs_ino(inode), |
|---|
| 2576 | + file_pos, qgroup_reserved, &ins); |
|---|
| 2854 | 2577 | out: |
|---|
| 2855 | | - free_sa_defrag_extent(new); |
|---|
| 2856 | | - |
|---|
| 2857 | | - atomic_dec(&fs_info->defrag_running); |
|---|
| 2858 | | - wake_up(&fs_info->transaction_wait); |
|---|
| 2859 | | -} |
|---|
| 2860 | | - |
|---|
| 2861 | | -static struct new_sa_defrag_extent * |
|---|
| 2862 | | -record_old_file_extents(struct inode *inode, |
|---|
| 2863 | | - struct btrfs_ordered_extent *ordered) |
|---|
| 2864 | | -{ |
|---|
| 2865 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 2866 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 2867 | | - struct btrfs_path *path; |
|---|
| 2868 | | - struct btrfs_key key; |
|---|
| 2869 | | - struct old_sa_defrag_extent *old; |
|---|
| 2870 | | - struct new_sa_defrag_extent *new; |
|---|
| 2871 | | - int ret; |
|---|
| 2872 | | - |
|---|
| 2873 | | - new = kmalloc(sizeof(*new), GFP_NOFS); |
|---|
| 2874 | | - if (!new) |
|---|
| 2875 | | - return NULL; |
|---|
| 2876 | | - |
|---|
| 2877 | | - new->inode = inode; |
|---|
| 2878 | | - new->file_pos = ordered->file_offset; |
|---|
| 2879 | | - new->len = ordered->len; |
|---|
| 2880 | | - new->bytenr = ordered->start; |
|---|
| 2881 | | - new->disk_len = ordered->disk_len; |
|---|
| 2882 | | - new->compress_type = ordered->compress_type; |
|---|
| 2883 | | - new->root = RB_ROOT; |
|---|
| 2884 | | - INIT_LIST_HEAD(&new->head); |
|---|
| 2885 | | - |
|---|
| 2886 | | - path = btrfs_alloc_path(); |
|---|
| 2887 | | - if (!path) |
|---|
| 2888 | | - goto out_kfree; |
|---|
| 2889 | | - |
|---|
| 2890 | | - key.objectid = btrfs_ino(BTRFS_I(inode)); |
|---|
| 2891 | | - key.type = BTRFS_EXTENT_DATA_KEY; |
|---|
| 2892 | | - key.offset = new->file_pos; |
|---|
| 2893 | | - |
|---|
| 2894 | | - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
|---|
| 2895 | | - if (ret < 0) |
|---|
| 2896 | | - goto out_free_path; |
|---|
| 2897 | | - if (ret > 0 && path->slots[0] > 0) |
|---|
| 2898 | | - path->slots[0]--; |
|---|
| 2899 | | - |
|---|
| 2900 | | - /* find out all the old extents for the file range */ |
|---|
| 2901 | | - while (1) { |
|---|
| 2902 | | - struct btrfs_file_extent_item *extent; |
|---|
| 2903 | | - struct extent_buffer *l; |
|---|
| 2904 | | - int slot; |
|---|
| 2905 | | - u64 num_bytes; |
|---|
| 2906 | | - u64 offset; |
|---|
| 2907 | | - u64 end; |
|---|
| 2908 | | - u64 disk_bytenr; |
|---|
| 2909 | | - u64 extent_offset; |
|---|
| 2910 | | - |
|---|
| 2911 | | - l = path->nodes[0]; |
|---|
| 2912 | | - slot = path->slots[0]; |
|---|
| 2913 | | - |
|---|
| 2914 | | - if (slot >= btrfs_header_nritems(l)) { |
|---|
| 2915 | | - ret = btrfs_next_leaf(root, path); |
|---|
| 2916 | | - if (ret < 0) |
|---|
| 2917 | | - goto out_free_path; |
|---|
| 2918 | | - else if (ret > 0) |
|---|
| 2919 | | - break; |
|---|
| 2920 | | - continue; |
|---|
| 2921 | | - } |
|---|
| 2922 | | - |
|---|
| 2923 | | - btrfs_item_key_to_cpu(l, &key, slot); |
|---|
| 2924 | | - |
|---|
| 2925 | | - if (key.objectid != btrfs_ino(BTRFS_I(inode))) |
|---|
| 2926 | | - break; |
|---|
| 2927 | | - if (key.type != BTRFS_EXTENT_DATA_KEY) |
|---|
| 2928 | | - break; |
|---|
| 2929 | | - if (key.offset >= new->file_pos + new->len) |
|---|
| 2930 | | - break; |
|---|
| 2931 | | - |
|---|
| 2932 | | - extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item); |
|---|
| 2933 | | - |
|---|
| 2934 | | - num_bytes = btrfs_file_extent_num_bytes(l, extent); |
|---|
| 2935 | | - if (key.offset + num_bytes < new->file_pos) |
|---|
| 2936 | | - goto next; |
|---|
| 2937 | | - |
|---|
| 2938 | | - disk_bytenr = btrfs_file_extent_disk_bytenr(l, extent); |
|---|
| 2939 | | - if (!disk_bytenr) |
|---|
| 2940 | | - goto next; |
|---|
| 2941 | | - |
|---|
| 2942 | | - extent_offset = btrfs_file_extent_offset(l, extent); |
|---|
| 2943 | | - |
|---|
| 2944 | | - old = kmalloc(sizeof(*old), GFP_NOFS); |
|---|
| 2945 | | - if (!old) |
|---|
| 2946 | | - goto out_free_path; |
|---|
| 2947 | | - |
|---|
| 2948 | | - offset = max(new->file_pos, key.offset); |
|---|
| 2949 | | - end = min(new->file_pos + new->len, key.offset + num_bytes); |
|---|
| 2950 | | - |
|---|
| 2951 | | - old->bytenr = disk_bytenr; |
|---|
| 2952 | | - old->extent_offset = extent_offset; |
|---|
| 2953 | | - old->offset = offset - key.offset; |
|---|
| 2954 | | - old->len = end - offset; |
|---|
| 2955 | | - old->new = new; |
|---|
| 2956 | | - old->count = 0; |
|---|
| 2957 | | - list_add_tail(&old->list, &new->head); |
|---|
| 2958 | | -next: |
|---|
| 2959 | | - path->slots[0]++; |
|---|
| 2960 | | - cond_resched(); |
|---|
| 2961 | | - } |
|---|
| 2962 | | - |
|---|
| 2963 | 2578 | btrfs_free_path(path); |
|---|
| 2964 | | - atomic_inc(&fs_info->defrag_running); |
|---|
| 2965 | 2579 | |
|---|
| 2966 | | - return new; |
|---|
| 2967 | | - |
|---|
| 2968 | | -out_free_path: |
|---|
| 2969 | | - btrfs_free_path(path); |
|---|
| 2970 | | -out_kfree: |
|---|
| 2971 | | - free_sa_defrag_extent(new); |
|---|
| 2972 | | - return NULL; |
|---|
| 2580 | + return ret; |
|---|
| 2973 | 2581 | } |
|---|
| 2974 | 2582 | |
|---|
| 2975 | 2583 | static void btrfs_release_delalloc_bytes(struct btrfs_fs_info *fs_info, |
|---|
| 2976 | 2584 | u64 start, u64 len) |
|---|
| 2977 | 2585 | { |
|---|
| 2978 | | - struct btrfs_block_group_cache *cache; |
|---|
| 2586 | + struct btrfs_block_group *cache; |
|---|
| 2979 | 2587 | |
|---|
| 2980 | 2588 | cache = btrfs_lookup_block_group(fs_info, start); |
|---|
| 2981 | 2589 | ASSERT(cache); |
|---|
| .. | .. |
|---|
| 2987 | 2595 | btrfs_put_block_group(cache); |
|---|
| 2988 | 2596 | } |
|---|
| 2989 | 2597 | |
|---|
| 2990 | | -/* as ordered data IO finishes, this gets called so we can finish |
|---|
| 2598 | +static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans, |
|---|
| 2599 | + struct btrfs_ordered_extent *oe) |
|---|
| 2600 | +{ |
|---|
| 2601 | + struct btrfs_file_extent_item stack_fi; |
|---|
| 2602 | + u64 logical_len; |
|---|
| 2603 | + |
|---|
| 2604 | + memset(&stack_fi, 0, sizeof(stack_fi)); |
|---|
| 2605 | + btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG); |
|---|
| 2606 | + btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, oe->disk_bytenr); |
|---|
| 2607 | + btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi, |
|---|
| 2608 | + oe->disk_num_bytes); |
|---|
| 2609 | + if (test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags)) |
|---|
| 2610 | + logical_len = oe->truncated_len; |
|---|
| 2611 | + else |
|---|
| 2612 | + logical_len = oe->num_bytes; |
|---|
| 2613 | + btrfs_set_stack_file_extent_num_bytes(&stack_fi, logical_len); |
|---|
| 2614 | + btrfs_set_stack_file_extent_ram_bytes(&stack_fi, logical_len); |
|---|
| 2615 | + btrfs_set_stack_file_extent_compression(&stack_fi, oe->compress_type); |
|---|
| 2616 | + /* Encryption and other encoding is reserved and all 0 */ |
|---|
| 2617 | + |
|---|
| 2618 | + return insert_reserved_file_extent(trans, BTRFS_I(oe->inode), |
|---|
| 2619 | + oe->file_offset, &stack_fi, |
|---|
| 2620 | + oe->qgroup_rsv); |
|---|
| 2621 | +} |
|---|
| 2622 | + |
|---|
| 2623 | +/* |
|---|
| 2624 | + * As ordered data IO finishes, this gets called so we can finish |
|---|
| 2991 | 2625 | * an ordered extent if the range of bytes in the file it covers are |
|---|
| 2992 | 2626 | * fully written. |
|---|
| 2993 | 2627 | */ |
|---|
| .. | .. |
|---|
| 2999 | 2633 | struct btrfs_trans_handle *trans = NULL; |
|---|
| 3000 | 2634 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
|---|
| 3001 | 2635 | struct extent_state *cached_state = NULL; |
|---|
| 3002 | | - struct new_sa_defrag_extent *new = NULL; |
|---|
| 2636 | + u64 start, end; |
|---|
| 3003 | 2637 | int compress_type = 0; |
|---|
| 3004 | 2638 | int ret = 0; |
|---|
| 3005 | | - u64 logical_len = ordered_extent->len; |
|---|
| 3006 | | - bool nolock; |
|---|
| 2639 | + u64 logical_len = ordered_extent->num_bytes; |
|---|
| 2640 | + bool freespace_inode; |
|---|
| 3007 | 2641 | bool truncated = false; |
|---|
| 3008 | 2642 | bool range_locked = false; |
|---|
| 3009 | 2643 | bool clear_new_delalloc_bytes = false; |
|---|
| 3010 | 2644 | bool clear_reserved_extent = true; |
|---|
| 2645 | + unsigned int clear_bits; |
|---|
| 2646 | + |
|---|
| 2647 | + start = ordered_extent->file_offset; |
|---|
| 2648 | + end = start + ordered_extent->num_bytes - 1; |
|---|
| 3011 | 2649 | |
|---|
| 3012 | 2650 | if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && |
|---|
| 3013 | 2651 | !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) && |
|---|
| 3014 | 2652 | !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags)) |
|---|
| 3015 | 2653 | clear_new_delalloc_bytes = true; |
|---|
| 3016 | 2654 | |
|---|
| 3017 | | - nolock = btrfs_is_free_space_inode(BTRFS_I(inode)); |
|---|
| 2655 | + freespace_inode = btrfs_is_free_space_inode(BTRFS_I(inode)); |
|---|
| 3018 | 2656 | |
|---|
| 3019 | 2657 | if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) { |
|---|
| 3020 | 2658 | ret = -EIO; |
|---|
| 3021 | 2659 | goto out; |
|---|
| 3022 | 2660 | } |
|---|
| 3023 | 2661 | |
|---|
| 3024 | | - btrfs_free_io_failure_record(BTRFS_I(inode), |
|---|
| 3025 | | - ordered_extent->file_offset, |
|---|
| 3026 | | - ordered_extent->file_offset + |
|---|
| 3027 | | - ordered_extent->len - 1); |
|---|
| 2662 | + btrfs_free_io_failure_record(BTRFS_I(inode), start, end); |
|---|
| 3028 | 2663 | |
|---|
| 3029 | 2664 | if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) { |
|---|
| 3030 | 2665 | truncated = true; |
|---|
| .. | .. |
|---|
| 3037 | 2672 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { |
|---|
| 3038 | 2673 | BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ |
|---|
| 3039 | 2674 | |
|---|
| 3040 | | - /* |
|---|
| 3041 | | - * For mwrite(mmap + memset to write) case, we still reserve |
|---|
| 3042 | | - * space for NOCOW range. |
|---|
| 3043 | | - * As NOCOW won't cause a new delayed ref, just free the space |
|---|
| 3044 | | - */ |
|---|
| 3045 | | - btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset, |
|---|
| 3046 | | - ordered_extent->len); |
|---|
| 3047 | | - btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
|---|
| 3048 | | - if (nolock) |
|---|
| 3049 | | - trans = btrfs_join_transaction_nolock(root); |
|---|
| 2675 | + btrfs_inode_safe_disk_i_size_write(inode, 0); |
|---|
| 2676 | + if (freespace_inode) |
|---|
| 2677 | + trans = btrfs_join_transaction_spacecache(root); |
|---|
| 3050 | 2678 | else |
|---|
| 3051 | 2679 | trans = btrfs_join_transaction(root); |
|---|
| 3052 | 2680 | if (IS_ERR(trans)) { |
|---|
| .. | .. |
|---|
| 3062 | 2690 | } |
|---|
| 3063 | 2691 | |
|---|
| 3064 | 2692 | range_locked = true; |
|---|
| 3065 | | - lock_extent_bits(io_tree, ordered_extent->file_offset, |
|---|
| 3066 | | - ordered_extent->file_offset + ordered_extent->len - 1, |
|---|
| 3067 | | - &cached_state); |
|---|
| 2693 | + lock_extent_bits(io_tree, start, end, &cached_state); |
|---|
| 3068 | 2694 | |
|---|
| 3069 | | - ret = test_range_bit(io_tree, ordered_extent->file_offset, |
|---|
| 3070 | | - ordered_extent->file_offset + ordered_extent->len - 1, |
|---|
| 3071 | | - EXTENT_DEFRAG, 0, cached_state); |
|---|
| 3072 | | - if (ret) { |
|---|
| 3073 | | - u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item); |
|---|
| 3074 | | - if (0 && last_snapshot >= BTRFS_I(inode)->generation) |
|---|
| 3075 | | - /* the inode is shared */ |
|---|
| 3076 | | - new = record_old_file_extents(inode, ordered_extent); |
|---|
| 3077 | | - |
|---|
| 3078 | | - clear_extent_bit(io_tree, ordered_extent->file_offset, |
|---|
| 3079 | | - ordered_extent->file_offset + ordered_extent->len - 1, |
|---|
| 3080 | | - EXTENT_DEFRAG, 0, 0, &cached_state); |
|---|
| 3081 | | - } |
|---|
| 3082 | | - |
|---|
| 3083 | | - if (nolock) |
|---|
| 3084 | | - trans = btrfs_join_transaction_nolock(root); |
|---|
| 2695 | + if (freespace_inode) |
|---|
| 2696 | + trans = btrfs_join_transaction_spacecache(root); |
|---|
| 3085 | 2697 | else |
|---|
| 3086 | 2698 | trans = btrfs_join_transaction(root); |
|---|
| 3087 | 2699 | if (IS_ERR(trans)) { |
|---|
| .. | .. |
|---|
| 3096 | 2708 | compress_type = ordered_extent->compress_type; |
|---|
| 3097 | 2709 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { |
|---|
| 3098 | 2710 | BUG_ON(compress_type); |
|---|
| 3099 | | - btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset, |
|---|
| 3100 | | - ordered_extent->len); |
|---|
| 3101 | 2711 | ret = btrfs_mark_extent_written(trans, BTRFS_I(inode), |
|---|
| 3102 | 2712 | ordered_extent->file_offset, |
|---|
| 3103 | 2713 | ordered_extent->file_offset + |
|---|
| 3104 | 2714 | logical_len); |
|---|
| 3105 | 2715 | } else { |
|---|
| 3106 | 2716 | BUG_ON(root == fs_info->tree_root); |
|---|
| 3107 | | - ret = insert_reserved_file_extent(trans, inode, |
|---|
| 3108 | | - ordered_extent->file_offset, |
|---|
| 3109 | | - ordered_extent->start, |
|---|
| 3110 | | - ordered_extent->disk_len, |
|---|
| 3111 | | - logical_len, logical_len, |
|---|
| 3112 | | - compress_type, 0, 0, |
|---|
| 3113 | | - BTRFS_FILE_EXTENT_REG); |
|---|
| 2717 | + ret = insert_ordered_extent_file_extent(trans, ordered_extent); |
|---|
| 3114 | 2718 | if (!ret) { |
|---|
| 3115 | 2719 | clear_reserved_extent = false; |
|---|
| 3116 | 2720 | btrfs_release_delalloc_bytes(fs_info, |
|---|
| 3117 | | - ordered_extent->start, |
|---|
| 3118 | | - ordered_extent->disk_len); |
|---|
| 2721 | + ordered_extent->disk_bytenr, |
|---|
| 2722 | + ordered_extent->disk_num_bytes); |
|---|
| 3119 | 2723 | } |
|---|
| 3120 | 2724 | } |
|---|
| 3121 | 2725 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, |
|---|
| 3122 | | - ordered_extent->file_offset, ordered_extent->len, |
|---|
| 3123 | | - trans->transid); |
|---|
| 2726 | + ordered_extent->file_offset, |
|---|
| 2727 | + ordered_extent->num_bytes, trans->transid); |
|---|
| 3124 | 2728 | if (ret < 0) { |
|---|
| 3125 | 2729 | btrfs_abort_transaction(trans, ret); |
|---|
| 3126 | 2730 | goto out; |
|---|
| 3127 | 2731 | } |
|---|
| 3128 | 2732 | |
|---|
| 3129 | | - ret = add_pending_csums(trans, inode, &ordered_extent->list); |
|---|
| 2733 | + ret = add_pending_csums(trans, &ordered_extent->list); |
|---|
| 3130 | 2734 | if (ret) { |
|---|
| 3131 | 2735 | btrfs_abort_transaction(trans, ret); |
|---|
| 3132 | 2736 | goto out; |
|---|
| 3133 | 2737 | } |
|---|
| 3134 | 2738 | |
|---|
| 3135 | | - btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
|---|
| 2739 | + btrfs_inode_safe_disk_i_size_write(inode, 0); |
|---|
| 3136 | 2740 | ret = btrfs_update_inode_fallback(trans, root, inode); |
|---|
| 3137 | 2741 | if (ret) { /* -ENOMEM or corruption */ |
|---|
| 3138 | 2742 | btrfs_abort_transaction(trans, ret); |
|---|
| .. | .. |
|---|
| 3140 | 2744 | } |
|---|
| 3141 | 2745 | ret = 0; |
|---|
| 3142 | 2746 | out: |
|---|
| 3143 | | - if (range_locked || clear_new_delalloc_bytes) { |
|---|
| 3144 | | - unsigned int clear_bits = 0; |
|---|
| 3145 | | - |
|---|
| 3146 | | - if (range_locked) |
|---|
| 3147 | | - clear_bits |= EXTENT_LOCKED; |
|---|
| 3148 | | - if (clear_new_delalloc_bytes) |
|---|
| 3149 | | - clear_bits |= EXTENT_DELALLOC_NEW; |
|---|
| 3150 | | - clear_extent_bit(&BTRFS_I(inode)->io_tree, |
|---|
| 3151 | | - ordered_extent->file_offset, |
|---|
| 3152 | | - ordered_extent->file_offset + |
|---|
| 3153 | | - ordered_extent->len - 1, |
|---|
| 3154 | | - clear_bits, |
|---|
| 3155 | | - (clear_bits & EXTENT_LOCKED) ? 1 : 0, |
|---|
| 3156 | | - 0, &cached_state); |
|---|
| 3157 | | - } |
|---|
| 2747 | + clear_bits = EXTENT_DEFRAG; |
|---|
| 2748 | + if (range_locked) |
|---|
| 2749 | + clear_bits |= EXTENT_LOCKED; |
|---|
| 2750 | + if (clear_new_delalloc_bytes) |
|---|
| 2751 | + clear_bits |= EXTENT_DELALLOC_NEW; |
|---|
| 2752 | + clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, |
|---|
| 2753 | + (clear_bits & EXTENT_LOCKED) ? 1 : 0, 0, |
|---|
| 2754 | + &cached_state); |
|---|
| 3158 | 2755 | |
|---|
| 3159 | 2756 | if (trans) |
|---|
| 3160 | 2757 | btrfs_end_transaction(trans); |
|---|
| 3161 | 2758 | |
|---|
| 3162 | 2759 | if (ret || truncated) { |
|---|
| 3163 | | - u64 start, end; |
|---|
| 2760 | + u64 unwritten_start = start; |
|---|
| 3164 | 2761 | |
|---|
| 3165 | 2762 | /* |
|---|
| 3166 | 2763 | * If we failed to finish this ordered extent for any reason we |
|---|
| .. | .. |
|---|
| 3175 | 2772 | mapping_set_error(ordered_extent->inode->i_mapping, -EIO); |
|---|
| 3176 | 2773 | |
|---|
| 3177 | 2774 | if (truncated) |
|---|
| 3178 | | - start = ordered_extent->file_offset + logical_len; |
|---|
| 3179 | | - else |
|---|
| 3180 | | - start = ordered_extent->file_offset; |
|---|
| 3181 | | - end = ordered_extent->file_offset + ordered_extent->len - 1; |
|---|
| 3182 | | - clear_extent_uptodate(io_tree, start, end, NULL); |
|---|
| 2775 | + unwritten_start += logical_len; |
|---|
| 2776 | + clear_extent_uptodate(io_tree, unwritten_start, end, NULL); |
|---|
| 3183 | 2777 | |
|---|
| 3184 | 2778 | /* Drop the cache for the part of the extent we didn't write. */ |
|---|
| 3185 | | - btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0); |
|---|
| 2779 | + btrfs_drop_extent_cache(BTRFS_I(inode), unwritten_start, end, 0); |
|---|
| 3186 | 2780 | |
|---|
| 3187 | 2781 | /* |
|---|
| 3188 | 2782 | * If the ordered extent had an IOERR or something else went |
|---|
| .. | .. |
|---|
| 3197 | 2791 | if ((ret || !logical_len) && |
|---|
| 3198 | 2792 | clear_reserved_extent && |
|---|
| 3199 | 2793 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && |
|---|
| 3200 | | - !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) |
|---|
| 2794 | + !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { |
|---|
| 2795 | + /* |
|---|
| 2796 | + * Discard the range before returning it back to the |
|---|
| 2797 | + * free space pool |
|---|
| 2798 | + */ |
|---|
| 2799 | + if (ret && btrfs_test_opt(fs_info, DISCARD_SYNC)) |
|---|
| 2800 | + btrfs_discard_extent(fs_info, |
|---|
| 2801 | + ordered_extent->disk_bytenr, |
|---|
| 2802 | + ordered_extent->disk_num_bytes, |
|---|
| 2803 | + NULL); |
|---|
| 3201 | 2804 | btrfs_free_reserved_extent(fs_info, |
|---|
| 3202 | | - ordered_extent->start, |
|---|
| 3203 | | - ordered_extent->disk_len, 1); |
|---|
| 2805 | + ordered_extent->disk_bytenr, |
|---|
| 2806 | + ordered_extent->disk_num_bytes, 1); |
|---|
| 2807 | + } |
|---|
| 3204 | 2808 | } |
|---|
| 3205 | | - |
|---|
| 3206 | 2809 | |
|---|
| 3207 | 2810 | /* |
|---|
| 3208 | 2811 | * This needs to be done to make sure anybody waiting knows we are done |
|---|
| 3209 | 2812 | * updating everything for this ordered extent. |
|---|
| 3210 | 2813 | */ |
|---|
| 3211 | | - btrfs_remove_ordered_extent(inode, ordered_extent); |
|---|
| 3212 | | - |
|---|
| 3213 | | - /* for snapshot-aware defrag */ |
|---|
| 3214 | | - if (new) { |
|---|
| 3215 | | - if (ret) { |
|---|
| 3216 | | - free_sa_defrag_extent(new); |
|---|
| 3217 | | - atomic_dec(&fs_info->defrag_running); |
|---|
| 3218 | | - } else { |
|---|
| 3219 | | - relink_file_extents(new); |
|---|
| 3220 | | - } |
|---|
| 3221 | | - } |
|---|
| 2814 | + btrfs_remove_ordered_extent(BTRFS_I(inode), ordered_extent); |
|---|
| 3222 | 2815 | |
|---|
| 3223 | 2816 | /* once for us */ |
|---|
| 3224 | 2817 | btrfs_put_ordered_extent(ordered_extent); |
|---|
| .. | .. |
|---|
| 3235 | 2828 | btrfs_finish_ordered_io(ordered_extent); |
|---|
| 3236 | 2829 | } |
|---|
| 3237 | 2830 | |
|---|
| 3238 | | -static void btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, |
|---|
| 3239 | | - struct extent_state *state, int uptodate) |
|---|
| 2831 | +void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start, |
|---|
| 2832 | + u64 end, int uptodate) |
|---|
| 3240 | 2833 | { |
|---|
| 3241 | | - struct inode *inode = page->mapping->host; |
|---|
| 3242 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 2834 | + struct btrfs_inode *inode = BTRFS_I(page->mapping->host); |
|---|
| 2835 | + struct btrfs_fs_info *fs_info = inode->root->fs_info; |
|---|
| 3243 | 2836 | struct btrfs_ordered_extent *ordered_extent = NULL; |
|---|
| 3244 | 2837 | struct btrfs_workqueue *wq; |
|---|
| 3245 | | - btrfs_work_func_t func; |
|---|
| 3246 | 2838 | |
|---|
| 3247 | 2839 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); |
|---|
| 3248 | 2840 | |
|---|
| .. | .. |
|---|
| 3251 | 2843 | end - start + 1, uptodate)) |
|---|
| 3252 | 2844 | return; |
|---|
| 3253 | 2845 | |
|---|
| 3254 | | - if (btrfs_is_free_space_inode(BTRFS_I(inode))) { |
|---|
| 2846 | + if (btrfs_is_free_space_inode(inode)) |
|---|
| 3255 | 2847 | wq = fs_info->endio_freespace_worker; |
|---|
| 3256 | | - func = btrfs_freespace_write_helper; |
|---|
| 3257 | | - } else { |
|---|
| 2848 | + else |
|---|
| 3258 | 2849 | wq = fs_info->endio_write_workers; |
|---|
| 3259 | | - func = btrfs_endio_write_helper; |
|---|
| 3260 | | - } |
|---|
| 3261 | 2850 | |
|---|
| 3262 | | - btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL, |
|---|
| 3263 | | - NULL); |
|---|
| 2851 | + btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); |
|---|
| 3264 | 2852 | btrfs_queue_work(wq, &ordered_extent->work); |
|---|
| 3265 | 2853 | } |
|---|
| 3266 | 2854 | |
|---|
| 3267 | | -static int __readpage_endio_check(struct inode *inode, |
|---|
| 3268 | | - struct btrfs_io_bio *io_bio, |
|---|
| 3269 | | - int icsum, struct page *page, |
|---|
| 3270 | | - int pgoff, u64 start, size_t len) |
|---|
| 2855 | +static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio, |
|---|
| 2856 | + int icsum, struct page *page, int pgoff, u64 start, |
|---|
| 2857 | + size_t len) |
|---|
| 3271 | 2858 | { |
|---|
| 2859 | + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 2860 | + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); |
|---|
| 3272 | 2861 | char *kaddr; |
|---|
| 3273 | | - u32 csum_expected; |
|---|
| 3274 | | - u32 csum = ~(u32)0; |
|---|
| 2862 | + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); |
|---|
| 2863 | + u8 *csum_expected; |
|---|
| 2864 | + u8 csum[BTRFS_CSUM_SIZE]; |
|---|
| 3275 | 2865 | |
|---|
| 3276 | | - csum_expected = *(((u32 *)io_bio->csum) + icsum); |
|---|
| 2866 | + csum_expected = ((u8 *)io_bio->csum) + icsum * csum_size; |
|---|
| 3277 | 2867 | |
|---|
| 3278 | 2868 | kaddr = kmap_atomic(page); |
|---|
| 3279 | | - csum = btrfs_csum_data(kaddr + pgoff, csum, len); |
|---|
| 3280 | | - btrfs_csum_final(csum, (u8 *)&csum); |
|---|
| 3281 | | - if (csum != csum_expected) |
|---|
| 2869 | + shash->tfm = fs_info->csum_shash; |
|---|
| 2870 | + |
|---|
| 2871 | + crypto_shash_digest(shash, kaddr + pgoff, len, csum); |
|---|
| 2872 | + |
|---|
| 2873 | + if (memcmp(csum, csum_expected, csum_size)) |
|---|
| 3282 | 2874 | goto zeroit; |
|---|
| 3283 | 2875 | |
|---|
| 3284 | 2876 | kunmap_atomic(kaddr); |
|---|
| .. | .. |
|---|
| 3286 | 2878 | zeroit: |
|---|
| 3287 | 2879 | btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected, |
|---|
| 3288 | 2880 | io_bio->mirror_num); |
|---|
| 2881 | + if (io_bio->device) |
|---|
| 2882 | + btrfs_dev_stat_inc_and_print(io_bio->device, |
|---|
| 2883 | + BTRFS_DEV_STAT_CORRUPTION_ERRS); |
|---|
| 3289 | 2884 | memset(kaddr + pgoff, 1, len); |
|---|
| 3290 | 2885 | flush_dcache_page(page); |
|---|
| 3291 | 2886 | kunmap_atomic(kaddr); |
|---|
| .. | .. |
|---|
| 3297 | 2892 | * if there's a match, we allow the bio to finish. If not, the code in |
|---|
| 3298 | 2893 | * extent_io.c will try to find good copies for us. |
|---|
| 3299 | 2894 | */ |
|---|
| 3300 | | -static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio, |
|---|
| 3301 | | - u64 phy_offset, struct page *page, |
|---|
| 3302 | | - u64 start, u64 end, int mirror) |
|---|
| 2895 | +int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u64 phy_offset, |
|---|
| 2896 | + struct page *page, u64 start, u64 end, int mirror) |
|---|
| 3303 | 2897 | { |
|---|
| 3304 | 2898 | size_t offset = start - page_offset(page); |
|---|
| 3305 | 2899 | struct inode *inode = page->mapping->host; |
|---|
| .. | .. |
|---|
| 3321 | 2915 | } |
|---|
| 3322 | 2916 | |
|---|
| 3323 | 2917 | phy_offset >>= inode->i_sb->s_blocksize_bits; |
|---|
| 3324 | | - return __readpage_endio_check(inode, io_bio, phy_offset, page, offset, |
|---|
| 3325 | | - start, (size_t)(end - start + 1)); |
|---|
| 2918 | + return check_data_csum(inode, io_bio, phy_offset, page, offset, start, |
|---|
| 2919 | + (size_t)(end - start + 1)); |
|---|
| 3326 | 2920 | } |
|---|
| 3327 | 2921 | |
|---|
| 3328 | 2922 | /* |
|---|
| .. | .. |
|---|
| 3343 | 2937 | if (atomic_add_unless(&inode->i_count, -1, 1)) |
|---|
| 3344 | 2938 | return; |
|---|
| 3345 | 2939 | |
|---|
| 2940 | + atomic_inc(&fs_info->nr_delayed_iputs); |
|---|
| 3346 | 2941 | spin_lock(&fs_info->delayed_iput_lock); |
|---|
| 3347 | 2942 | ASSERT(list_empty(&binode->delayed_iput)); |
|---|
| 3348 | 2943 | list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs); |
|---|
| 3349 | 2944 | spin_unlock(&fs_info->delayed_iput_lock); |
|---|
| 2945 | + if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags)) |
|---|
| 2946 | + wake_up_process(fs_info->cleaner_kthread); |
|---|
| 2947 | +} |
|---|
| 2948 | + |
|---|
| 2949 | +static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info, |
|---|
| 2950 | + struct btrfs_inode *inode) |
|---|
| 2951 | +{ |
|---|
| 2952 | + list_del_init(&inode->delayed_iput); |
|---|
| 2953 | + spin_unlock(&fs_info->delayed_iput_lock); |
|---|
| 2954 | + iput(&inode->vfs_inode); |
|---|
| 2955 | + if (atomic_dec_and_test(&fs_info->nr_delayed_iputs)) |
|---|
| 2956 | + wake_up(&fs_info->delayed_iputs_wait); |
|---|
| 2957 | + spin_lock(&fs_info->delayed_iput_lock); |
|---|
| 2958 | +} |
|---|
| 2959 | + |
|---|
| 2960 | +static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info, |
|---|
| 2961 | + struct btrfs_inode *inode) |
|---|
| 2962 | +{ |
|---|
| 2963 | + if (!list_empty(&inode->delayed_iput)) { |
|---|
| 2964 | + spin_lock(&fs_info->delayed_iput_lock); |
|---|
| 2965 | + if (!list_empty(&inode->delayed_iput)) |
|---|
| 2966 | + run_delayed_iput_locked(fs_info, inode); |
|---|
| 2967 | + spin_unlock(&fs_info->delayed_iput_lock); |
|---|
| 2968 | + } |
|---|
| 3350 | 2969 | } |
|---|
| 3351 | 2970 | |
|---|
| 3352 | 2971 | void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) |
|---|
| .. | .. |
|---|
| 3358 | 2977 | |
|---|
| 3359 | 2978 | inode = list_first_entry(&fs_info->delayed_iputs, |
|---|
| 3360 | 2979 | struct btrfs_inode, delayed_iput); |
|---|
| 3361 | | - list_del_init(&inode->delayed_iput); |
|---|
| 3362 | | - spin_unlock(&fs_info->delayed_iput_lock); |
|---|
| 3363 | | - iput(&inode->vfs_inode); |
|---|
| 3364 | | - spin_lock(&fs_info->delayed_iput_lock); |
|---|
| 2980 | + run_delayed_iput_locked(fs_info, inode); |
|---|
| 2981 | + cond_resched_lock(&fs_info->delayed_iput_lock); |
|---|
| 3365 | 2982 | } |
|---|
| 3366 | 2983 | spin_unlock(&fs_info->delayed_iput_lock); |
|---|
| 2984 | +} |
|---|
| 2985 | + |
|---|
| 2986 | +/** |
|---|
| 2987 | + * btrfs_wait_on_delayed_iputs - wait on the delayed iputs to be done running |
|---|
| 2988 | + * @fs_info - the fs_info for this fs |
|---|
| 2989 | + * @return - EINTR if we were killed, 0 if nothing's pending |
|---|
| 2990 | + * |
|---|
| 2991 | + * This will wait on any delayed iputs that are currently running with KILLABLE |
|---|
| 2992 | + * set. Once they are all done running we will return, unless we are killed in |
|---|
| 2993 | + * which case we return EINTR. This helps in user operations like fallocate etc |
|---|
| 2994 | + * that might get blocked on the iputs. |
|---|
| 2995 | + */ |
|---|
| 2996 | +int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info) |
|---|
| 2997 | +{ |
|---|
| 2998 | + int ret = wait_event_killable(fs_info->delayed_iputs_wait, |
|---|
| 2999 | + atomic_read(&fs_info->nr_delayed_iputs) == 0); |
|---|
| 3000 | + if (ret) |
|---|
| 3001 | + return -EINTR; |
|---|
| 3002 | + return 0; |
|---|
| 3367 | 3003 | } |
|---|
| 3368 | 3004 | |
|---|
| 3369 | 3005 | /* |
|---|
| .. | .. |
|---|
| 3471 | 3107 | found_key.objectid = found_key.offset; |
|---|
| 3472 | 3108 | found_key.type = BTRFS_INODE_ITEM_KEY; |
|---|
| 3473 | 3109 | found_key.offset = 0; |
|---|
| 3474 | | - inode = btrfs_iget(fs_info->sb, &found_key, root, NULL); |
|---|
| 3110 | + inode = btrfs_iget(fs_info->sb, last_objectid, root); |
|---|
| 3475 | 3111 | ret = PTR_ERR_OR_ZERO(inode); |
|---|
| 3476 | 3112 | if (ret && ret != -ENOENT) |
|---|
| 3477 | 3113 | goto out; |
|---|
| 3478 | 3114 | |
|---|
| 3479 | 3115 | if (ret == -ENOENT && root == fs_info->tree_root) { |
|---|
| 3480 | 3116 | struct btrfs_root *dead_root; |
|---|
| 3481 | | - struct btrfs_fs_info *fs_info = root->fs_info; |
|---|
| 3482 | 3117 | int is_dead_root = 0; |
|---|
| 3483 | 3118 | |
|---|
| 3484 | 3119 | /* |
|---|
| .. | .. |
|---|
| 3490 | 3125 | * orphan must not get deleted. |
|---|
| 3491 | 3126 | * find_dead_roots already ran before us, so if this |
|---|
| 3492 | 3127 | * is a snapshot deletion, we should find the root |
|---|
| 3493 | | - * in the dead_roots list |
|---|
| 3128 | + * in the fs_roots radix tree. |
|---|
| 3494 | 3129 | */ |
|---|
| 3495 | | - spin_lock(&fs_info->trans_lock); |
|---|
| 3496 | | - list_for_each_entry(dead_root, &fs_info->dead_roots, |
|---|
| 3497 | | - root_list) { |
|---|
| 3498 | | - if (dead_root->root_key.objectid == |
|---|
| 3499 | | - found_key.objectid) { |
|---|
| 3500 | | - is_dead_root = 1; |
|---|
| 3501 | | - break; |
|---|
| 3502 | | - } |
|---|
| 3503 | | - } |
|---|
| 3504 | | - spin_unlock(&fs_info->trans_lock); |
|---|
| 3130 | + |
|---|
| 3131 | + spin_lock(&fs_info->fs_roots_radix_lock); |
|---|
| 3132 | + dead_root = radix_tree_lookup(&fs_info->fs_roots_radix, |
|---|
| 3133 | + (unsigned long)found_key.objectid); |
|---|
| 3134 | + if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0) |
|---|
| 3135 | + is_dead_root = 1; |
|---|
| 3136 | + spin_unlock(&fs_info->fs_roots_radix_lock); |
|---|
| 3137 | + |
|---|
| 3505 | 3138 | if (is_dead_root) { |
|---|
| 3506 | 3139 | /* prevent this orphan from being found again */ |
|---|
| 3507 | 3140 | key.offset = found_key.objectid - 1; |
|---|
| .. | .. |
|---|
| 3551 | 3184 | |
|---|
| 3552 | 3185 | /* this will do delete_inode and everything for us */ |
|---|
| 3553 | 3186 | iput(inode); |
|---|
| 3554 | | - if (ret) |
|---|
| 3555 | | - goto out; |
|---|
| 3556 | 3187 | } |
|---|
| 3557 | 3188 | /* release the path since we're done with it */ |
|---|
| 3558 | 3189 | btrfs_release_path(path); |
|---|
| .. | .. |
|---|
| 3694 | 3325 | i_uid_write(inode, btrfs_inode_uid(leaf, inode_item)); |
|---|
| 3695 | 3326 | i_gid_write(inode, btrfs_inode_gid(leaf, inode_item)); |
|---|
| 3696 | 3327 | btrfs_i_size_write(BTRFS_I(inode), btrfs_inode_size(leaf, inode_item)); |
|---|
| 3328 | + btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0, |
|---|
| 3329 | + round_up(i_size_read(inode), fs_info->sectorsize)); |
|---|
| 3697 | 3330 | |
|---|
| 3698 | 3331 | inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime); |
|---|
| 3699 | 3332 | inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime); |
|---|
| .. | .. |
|---|
| 3764 | 3397 | * inode is not a directory, logging its parent unnecessarily. |
|---|
| 3765 | 3398 | */ |
|---|
| 3766 | 3399 | BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans; |
|---|
| 3400 | + |
|---|
| 3767 | 3401 | /* |
|---|
| 3768 | | - * Similar reasoning for last_link_trans, needs to be set otherwise |
|---|
| 3769 | | - * for a case like the following: |
|---|
| 3770 | | - * |
|---|
| 3771 | | - * mkdir A |
|---|
| 3772 | | - * touch foo |
|---|
| 3773 | | - * ln foo A/bar |
|---|
| 3774 | | - * echo 2 > /proc/sys/vm/drop_caches |
|---|
| 3775 | | - * fsync foo |
|---|
| 3776 | | - * <power failure> |
|---|
| 3777 | | - * |
|---|
| 3778 | | - * Would result in link bar and directory A not existing after the power |
|---|
| 3779 | | - * failure. |
|---|
| 3402 | + * Same logic as for last_unlink_trans. We don't persist the generation |
|---|
| 3403 | + * of the last transaction where this inode was used for a reflink |
|---|
| 3404 | + * operation, so after eviction and reloading the inode we must be |
|---|
| 3405 | + * pessimistic and assume the last transaction that modified the inode. |
|---|
| 3780 | 3406 | */ |
|---|
| 3781 | | - BTRFS_I(inode)->last_link_trans = BTRFS_I(inode)->last_trans; |
|---|
| 3407 | + BTRFS_I(inode)->last_reflink_trans = BTRFS_I(inode)->last_trans; |
|---|
| 3782 | 3408 | |
|---|
| 3783 | 3409 | path->slots[0]++; |
|---|
| 3784 | 3410 | if (inode->i_nlink != 1 || |
|---|
| .. | .. |
|---|
| 3827 | 3453 | switch (inode->i_mode & S_IFMT) { |
|---|
| 3828 | 3454 | case S_IFREG: |
|---|
| 3829 | 3455 | inode->i_mapping->a_ops = &btrfs_aops; |
|---|
| 3830 | | - BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
|---|
| 3831 | 3456 | inode->i_fop = &btrfs_file_operations; |
|---|
| 3832 | 3457 | inode->i_op = &btrfs_file_inode_operations; |
|---|
| 3833 | 3458 | break; |
|---|
| .. | .. |
|---|
| 3838 | 3463 | case S_IFLNK: |
|---|
| 3839 | 3464 | inode->i_op = &btrfs_symlink_inode_operations; |
|---|
| 3840 | 3465 | inode_nohighmem(inode); |
|---|
| 3841 | | - inode->i_mapping->a_ops = &btrfs_symlink_aops; |
|---|
| 3466 | + inode->i_mapping->a_ops = &btrfs_aops; |
|---|
| 3842 | 3467 | break; |
|---|
| 3843 | 3468 | default: |
|---|
| 3844 | 3469 | inode->i_op = &btrfs_special_inode_operations; |
|---|
| .. | .. |
|---|
| 3860 | 3485 | { |
|---|
| 3861 | 3486 | struct btrfs_map_token token; |
|---|
| 3862 | 3487 | |
|---|
| 3863 | | - btrfs_init_map_token(&token); |
|---|
| 3488 | + btrfs_init_map_token(&token, leaf); |
|---|
| 3864 | 3489 | |
|---|
| 3865 | | - btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token); |
|---|
| 3866 | | - btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token); |
|---|
| 3867 | | - btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size, |
|---|
| 3868 | | - &token); |
|---|
| 3869 | | - btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); |
|---|
| 3870 | | - btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); |
|---|
| 3490 | + btrfs_set_token_inode_uid(&token, item, i_uid_read(inode)); |
|---|
| 3491 | + btrfs_set_token_inode_gid(&token, item, i_gid_read(inode)); |
|---|
| 3492 | + btrfs_set_token_inode_size(&token, item, BTRFS_I(inode)->disk_i_size); |
|---|
| 3493 | + btrfs_set_token_inode_mode(&token, item, inode->i_mode); |
|---|
| 3494 | + btrfs_set_token_inode_nlink(&token, item, inode->i_nlink); |
|---|
| 3871 | 3495 | |
|---|
| 3872 | | - btrfs_set_token_timespec_sec(leaf, &item->atime, |
|---|
| 3873 | | - inode->i_atime.tv_sec, &token); |
|---|
| 3874 | | - btrfs_set_token_timespec_nsec(leaf, &item->atime, |
|---|
| 3875 | | - inode->i_atime.tv_nsec, &token); |
|---|
| 3496 | + btrfs_set_token_timespec_sec(&token, &item->atime, |
|---|
| 3497 | + inode->i_atime.tv_sec); |
|---|
| 3498 | + btrfs_set_token_timespec_nsec(&token, &item->atime, |
|---|
| 3499 | + inode->i_atime.tv_nsec); |
|---|
| 3876 | 3500 | |
|---|
| 3877 | | - btrfs_set_token_timespec_sec(leaf, &item->mtime, |
|---|
| 3878 | | - inode->i_mtime.tv_sec, &token); |
|---|
| 3879 | | - btrfs_set_token_timespec_nsec(leaf, &item->mtime, |
|---|
| 3880 | | - inode->i_mtime.tv_nsec, &token); |
|---|
| 3501 | + btrfs_set_token_timespec_sec(&token, &item->mtime, |
|---|
| 3502 | + inode->i_mtime.tv_sec); |
|---|
| 3503 | + btrfs_set_token_timespec_nsec(&token, &item->mtime, |
|---|
| 3504 | + inode->i_mtime.tv_nsec); |
|---|
| 3881 | 3505 | |
|---|
| 3882 | | - btrfs_set_token_timespec_sec(leaf, &item->ctime, |
|---|
| 3883 | | - inode->i_ctime.tv_sec, &token); |
|---|
| 3884 | | - btrfs_set_token_timespec_nsec(leaf, &item->ctime, |
|---|
| 3885 | | - inode->i_ctime.tv_nsec, &token); |
|---|
| 3506 | + btrfs_set_token_timespec_sec(&token, &item->ctime, |
|---|
| 3507 | + inode->i_ctime.tv_sec); |
|---|
| 3508 | + btrfs_set_token_timespec_nsec(&token, &item->ctime, |
|---|
| 3509 | + inode->i_ctime.tv_nsec); |
|---|
| 3886 | 3510 | |
|---|
| 3887 | | - btrfs_set_token_timespec_sec(leaf, &item->otime, |
|---|
| 3888 | | - BTRFS_I(inode)->i_otime.tv_sec, &token); |
|---|
| 3889 | | - btrfs_set_token_timespec_nsec(leaf, &item->otime, |
|---|
| 3890 | | - BTRFS_I(inode)->i_otime.tv_nsec, &token); |
|---|
| 3511 | + btrfs_set_token_timespec_sec(&token, &item->otime, |
|---|
| 3512 | + BTRFS_I(inode)->i_otime.tv_sec); |
|---|
| 3513 | + btrfs_set_token_timespec_nsec(&token, &item->otime, |
|---|
| 3514 | + BTRFS_I(inode)->i_otime.tv_nsec); |
|---|
| 3891 | 3515 | |
|---|
| 3892 | | - btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), |
|---|
| 3893 | | - &token); |
|---|
| 3894 | | - btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation, |
|---|
| 3895 | | - &token); |
|---|
| 3896 | | - btrfs_set_token_inode_sequence(leaf, item, inode_peek_iversion(inode), |
|---|
| 3897 | | - &token); |
|---|
| 3898 | | - btrfs_set_token_inode_transid(leaf, item, trans->transid, &token); |
|---|
| 3899 | | - btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token); |
|---|
| 3900 | | - btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token); |
|---|
| 3901 | | - btrfs_set_token_inode_block_group(leaf, item, 0, &token); |
|---|
| 3516 | + btrfs_set_token_inode_nbytes(&token, item, inode_get_bytes(inode)); |
|---|
| 3517 | + btrfs_set_token_inode_generation(&token, item, |
|---|
| 3518 | + BTRFS_I(inode)->generation); |
|---|
| 3519 | + btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode)); |
|---|
| 3520 | + btrfs_set_token_inode_transid(&token, item, trans->transid); |
|---|
| 3521 | + btrfs_set_token_inode_rdev(&token, item, inode->i_rdev); |
|---|
| 3522 | + btrfs_set_token_inode_flags(&token, item, BTRFS_I(inode)->flags); |
|---|
| 3523 | + btrfs_set_token_inode_block_group(&token, item, 0); |
|---|
| 3902 | 3524 | } |
|---|
| 3903 | 3525 | |
|---|
| 3904 | 3526 | /* |
|---|
| .. | .. |
|---|
| 3931 | 3553 | |
|---|
| 3932 | 3554 | fill_inode_item(trans, leaf, inode_item, inode); |
|---|
| 3933 | 3555 | btrfs_mark_buffer_dirty(leaf); |
|---|
| 3934 | | - btrfs_set_inode_last_trans(trans, inode); |
|---|
| 3556 | + btrfs_set_inode_last_trans(trans, BTRFS_I(inode)); |
|---|
| 3935 | 3557 | ret = 0; |
|---|
| 3936 | 3558 | failed: |
|---|
| 3937 | 3559 | btrfs_free_path(path); |
|---|
| .. | .. |
|---|
| 3961 | 3583 | |
|---|
| 3962 | 3584 | ret = btrfs_delayed_update_inode(trans, root, inode); |
|---|
| 3963 | 3585 | if (!ret) |
|---|
| 3964 | | - btrfs_set_inode_last_trans(trans, inode); |
|---|
| 3586 | + btrfs_set_inode_last_trans(trans, BTRFS_I(inode)); |
|---|
| 3965 | 3587 | return ret; |
|---|
| 3966 | 3588 | } |
|---|
| 3967 | 3589 | |
|---|
| .. | .. |
|---|
| 3994 | 3616 | struct btrfs_fs_info *fs_info = root->fs_info; |
|---|
| 3995 | 3617 | struct btrfs_path *path; |
|---|
| 3996 | 3618 | int ret = 0; |
|---|
| 3997 | | - struct extent_buffer *leaf; |
|---|
| 3998 | 3619 | struct btrfs_dir_item *di; |
|---|
| 3999 | | - struct btrfs_key key; |
|---|
| 4000 | 3620 | u64 index; |
|---|
| 4001 | 3621 | u64 ino = btrfs_ino(inode); |
|---|
| 4002 | 3622 | u64 dir_ino = btrfs_ino(dir); |
|---|
| .. | .. |
|---|
| 4010 | 3630 | path->leave_spinning = 1; |
|---|
| 4011 | 3631 | di = btrfs_lookup_dir_item(trans, root, path, dir_ino, |
|---|
| 4012 | 3632 | name, name_len, -1); |
|---|
| 4013 | | - if (IS_ERR(di)) { |
|---|
| 4014 | | - ret = PTR_ERR(di); |
|---|
| 3633 | + if (IS_ERR_OR_NULL(di)) { |
|---|
| 3634 | + ret = di ? PTR_ERR(di) : -ENOENT; |
|---|
| 4015 | 3635 | goto err; |
|---|
| 4016 | 3636 | } |
|---|
| 4017 | | - if (!di) { |
|---|
| 4018 | | - ret = -ENOENT; |
|---|
| 4019 | | - goto err; |
|---|
| 4020 | | - } |
|---|
| 4021 | | - leaf = path->nodes[0]; |
|---|
| 4022 | | - btrfs_dir_item_key_to_cpu(leaf, di, &key); |
|---|
| 4023 | 3637 | ret = btrfs_delete_one_dir_name(trans, root, path, di); |
|---|
| 4024 | 3638 | if (ret) |
|---|
| 4025 | 3639 | goto err; |
|---|
| .. | .. |
|---|
| 4072 | 3686 | ret = 0; |
|---|
| 4073 | 3687 | else if (ret) |
|---|
| 4074 | 3688 | btrfs_abort_transaction(trans, ret); |
|---|
| 3689 | + |
|---|
| 3690 | + /* |
|---|
| 3691 | + * If we have a pending delayed iput we could end up with the final iput |
|---|
| 3692 | + * being run in btrfs-cleaner context. If we have enough of these built |
|---|
| 3693 | + * up we can end up burning a lot of time in btrfs-cleaner without any |
|---|
| 3694 | + * way to throttle the unlinks. Since we're currently holding a ref on |
|---|
| 3695 | + * the inode we can run the delayed iput here without any issues as the |
|---|
| 3696 | + * final iput won't be done until after we drop the ref we're currently |
|---|
| 3697 | + * holding. |
|---|
| 3698 | + */ |
|---|
| 3699 | + btrfs_run_delayed_iput(fs_info, inode); |
|---|
| 4075 | 3700 | err: |
|---|
| 4076 | 3701 | btrfs_free_path(path); |
|---|
| 4077 | 3702 | if (ret) |
|---|
| .. | .. |
|---|
| 4120 | 3745 | * 1 for the inode ref |
|---|
| 4121 | 3746 | * 1 for the inode |
|---|
| 4122 | 3747 | */ |
|---|
| 4123 | | - return btrfs_start_transaction_fallback_global_rsv(root, 5, 5); |
|---|
| 3748 | + return btrfs_start_transaction_fallback_global_rsv(root, 5); |
|---|
| 4124 | 3749 | } |
|---|
| 4125 | 3750 | |
|---|
| 4126 | 3751 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) |
|---|
| .. | .. |
|---|
| 4187 | 3812 | di = btrfs_lookup_dir_item(trans, root, path, dir_ino, |
|---|
| 4188 | 3813 | name, name_len, -1); |
|---|
| 4189 | 3814 | if (IS_ERR_OR_NULL(di)) { |
|---|
| 4190 | | - if (!di) |
|---|
| 4191 | | - ret = -ENOENT; |
|---|
| 4192 | | - else |
|---|
| 4193 | | - ret = PTR_ERR(di); |
|---|
| 3815 | + ret = di ? PTR_ERR(di) : -ENOENT; |
|---|
| 4194 | 3816 | goto out; |
|---|
| 4195 | 3817 | } |
|---|
| 4196 | 3818 | |
|---|
| .. | .. |
|---|
| 4393 | 4015 | * again is not run concurrently. |
|---|
| 4394 | 4016 | */ |
|---|
| 4395 | 4017 | spin_lock(&dest->root_item_lock); |
|---|
| 4396 | | - root_flags = btrfs_root_flags(&dest->root_item); |
|---|
| 4397 | | - if (dest->send_in_progress == 0) { |
|---|
| 4398 | | - btrfs_set_root_flags(&dest->root_item, |
|---|
| 4399 | | - root_flags | BTRFS_ROOT_SUBVOL_DEAD); |
|---|
| 4400 | | - spin_unlock(&dest->root_item_lock); |
|---|
| 4401 | | - } else { |
|---|
| 4018 | + if (dest->send_in_progress) { |
|---|
| 4402 | 4019 | spin_unlock(&dest->root_item_lock); |
|---|
| 4403 | 4020 | btrfs_warn(fs_info, |
|---|
| 4404 | 4021 | "attempt to delete subvolume %llu during send", |
|---|
| 4405 | 4022 | dest->root_key.objectid); |
|---|
| 4406 | 4023 | return -EPERM; |
|---|
| 4407 | 4024 | } |
|---|
| 4025 | + if (atomic_read(&dest->nr_swapfiles)) { |
|---|
| 4026 | + spin_unlock(&dest->root_item_lock); |
|---|
| 4027 | + btrfs_warn(fs_info, |
|---|
| 4028 | + "attempt to delete subvolume %llu with active swapfile", |
|---|
| 4029 | + root->root_key.objectid); |
|---|
| 4030 | + return -EPERM; |
|---|
| 4031 | + } |
|---|
| 4032 | + root_flags = btrfs_root_flags(&dest->root_item); |
|---|
| 4033 | + btrfs_set_root_flags(&dest->root_item, |
|---|
| 4034 | + root_flags | BTRFS_ROOT_SUBVOL_DEAD); |
|---|
| 4035 | + spin_unlock(&dest->root_item_lock); |
|---|
| 4408 | 4036 | |
|---|
| 4409 | 4037 | down_write(&fs_info->subvol_sem); |
|---|
| 4410 | 4038 | |
|---|
| .. | .. |
|---|
| 4487 | 4115 | err = ret; |
|---|
| 4488 | 4116 | inode->i_flags |= S_DEAD; |
|---|
| 4489 | 4117 | out_release: |
|---|
| 4490 | | - btrfs_subvolume_release_metadata(fs_info, &block_rsv); |
|---|
| 4118 | + btrfs_subvolume_release_metadata(root, &block_rsv); |
|---|
| 4491 | 4119 | out_up_write: |
|---|
| 4492 | 4120 | up_write(&fs_info->subvol_sem); |
|---|
| 4493 | 4121 | if (err) { |
|---|
| .. | .. |
|---|
| 4566 | 4194 | return err; |
|---|
| 4567 | 4195 | } |
|---|
| 4568 | 4196 | |
|---|
| 4569 | | -static int truncate_space_check(struct btrfs_trans_handle *trans, |
|---|
| 4570 | | - struct btrfs_root *root, |
|---|
| 4571 | | - u64 bytes_deleted) |
|---|
| 4572 | | -{ |
|---|
| 4573 | | - struct btrfs_fs_info *fs_info = root->fs_info; |
|---|
| 4574 | | - int ret; |
|---|
| 4575 | | - |
|---|
| 4576 | | - /* |
|---|
| 4577 | | - * This is only used to apply pressure to the enospc system, we don't |
|---|
| 4578 | | - * intend to use this reservation at all. |
|---|
| 4579 | | - */ |
|---|
| 4580 | | - bytes_deleted = btrfs_csum_bytes_to_leaves(fs_info, bytes_deleted); |
|---|
| 4581 | | - bytes_deleted *= fs_info->nodesize; |
|---|
| 4582 | | - ret = btrfs_block_rsv_add(root, &fs_info->trans_block_rsv, |
|---|
| 4583 | | - bytes_deleted, BTRFS_RESERVE_NO_FLUSH); |
|---|
| 4584 | | - if (!ret) { |
|---|
| 4585 | | - trace_btrfs_space_reservation(fs_info, "transaction", |
|---|
| 4586 | | - trans->transid, |
|---|
| 4587 | | - bytes_deleted, 1); |
|---|
| 4588 | | - trans->bytes_reserved += bytes_deleted; |
|---|
| 4589 | | - } |
|---|
| 4590 | | - return ret; |
|---|
| 4591 | | - |
|---|
| 4592 | | -} |
|---|
| 4593 | | - |
|---|
| 4594 | 4197 | /* |
|---|
| 4595 | 4198 | * Return this if we need to call truncate_block for the last bit of the |
|---|
| 4596 | 4199 | * truncate. |
|---|
| .. | .. |
|---|
| 4635 | 4238 | u64 bytes_deleted = 0; |
|---|
| 4636 | 4239 | bool be_nice = false; |
|---|
| 4637 | 4240 | bool should_throttle = false; |
|---|
| 4638 | | - bool should_end = false; |
|---|
| 4241 | + const u64 lock_start = ALIGN_DOWN(new_size, fs_info->sectorsize); |
|---|
| 4242 | + struct extent_state *cached_state = NULL; |
|---|
| 4639 | 4243 | |
|---|
| 4640 | 4244 | BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); |
|---|
| 4641 | 4245 | |
|---|
| 4642 | 4246 | /* |
|---|
| 4643 | | - * for non-free space inodes and ref cows, we want to back off from |
|---|
| 4644 | | - * time to time |
|---|
| 4247 | + * For non-free space inodes and non-shareable roots, we want to back |
|---|
| 4248 | + * off from time to time. This means all inodes in subvolume roots, |
|---|
| 4249 | + * reloc roots, and data reloc roots. |
|---|
| 4645 | 4250 | */ |
|---|
| 4646 | 4251 | if (!btrfs_is_free_space_inode(BTRFS_I(inode)) && |
|---|
| 4647 | | - test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
|---|
| 4252 | + test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) |
|---|
| 4648 | 4253 | be_nice = true; |
|---|
| 4649 | 4254 | |
|---|
| 4650 | 4255 | path = btrfs_alloc_path(); |
|---|
| .. | .. |
|---|
| 4652 | 4257 | return -ENOMEM; |
|---|
| 4653 | 4258 | path->reada = READA_BACK; |
|---|
| 4654 | 4259 | |
|---|
| 4655 | | - /* |
|---|
| 4656 | | - * We want to drop from the next block forward in case this new size is |
|---|
| 4657 | | - * not block aligned since we will be keeping the last block of the |
|---|
| 4658 | | - * extent just the way it is. |
|---|
| 4659 | | - */ |
|---|
| 4660 | | - if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || |
|---|
| 4661 | | - root == fs_info->tree_root) |
|---|
| 4260 | + if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
|---|
| 4261 | + lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1, |
|---|
| 4262 | + &cached_state); |
|---|
| 4263 | + |
|---|
| 4264 | + /* |
|---|
| 4265 | + * We want to drop from the next block forward in case this |
|---|
| 4266 | + * new size is not block aligned since we will be keeping the |
|---|
| 4267 | + * last block of the extent just the way it is. |
|---|
| 4268 | + */ |
|---|
| 4662 | 4269 | btrfs_drop_extent_cache(BTRFS_I(inode), ALIGN(new_size, |
|---|
| 4663 | 4270 | fs_info->sectorsize), |
|---|
| 4664 | 4271 | (u64)-1, 0); |
|---|
| 4272 | + } |
|---|
| 4665 | 4273 | |
|---|
| 4666 | 4274 | /* |
|---|
| 4667 | 4275 | * This function is also used to drop the items in the log tree before |
|---|
| 4668 | 4276 | * we relog the inode, so if root != BTRFS_I(inode)->root, it means |
|---|
| 4669 | | - * it is used to drop the loged items. So we shouldn't kill the delayed |
|---|
| 4277 | + * it is used to drop the logged items. So we shouldn't kill the delayed |
|---|
| 4670 | 4278 | * items. |
|---|
| 4671 | 4279 | */ |
|---|
| 4672 | 4280 | if (min_type == 0 && root == BTRFS_I(inode)->root) |
|---|
| .. | .. |
|---|
| 4688 | 4296 | goto out; |
|---|
| 4689 | 4297 | } |
|---|
| 4690 | 4298 | |
|---|
| 4691 | | - path->leave_spinning = 1; |
|---|
| 4692 | 4299 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
|---|
| 4693 | 4300 | if (ret < 0) |
|---|
| 4694 | 4301 | goto out; |
|---|
| .. | .. |
|---|
| 4704 | 4311 | } |
|---|
| 4705 | 4312 | |
|---|
| 4706 | 4313 | while (1) { |
|---|
| 4314 | + u64 clear_start = 0, clear_len = 0; |
|---|
| 4315 | + |
|---|
| 4707 | 4316 | fi = NULL; |
|---|
| 4708 | 4317 | leaf = path->nodes[0]; |
|---|
| 4709 | 4318 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
|---|
| .. | .. |
|---|
| 4754 | 4363 | |
|---|
| 4755 | 4364 | if (extent_type != BTRFS_FILE_EXTENT_INLINE) { |
|---|
| 4756 | 4365 | u64 num_dec; |
|---|
| 4366 | + |
|---|
| 4367 | + clear_start = found_key.offset; |
|---|
| 4757 | 4368 | extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); |
|---|
| 4758 | 4369 | if (!del_item) { |
|---|
| 4759 | 4370 | u64 orig_num_bytes = |
|---|
| .. | .. |
|---|
| 4761 | 4372 | extent_num_bytes = ALIGN(new_size - |
|---|
| 4762 | 4373 | found_key.offset, |
|---|
| 4763 | 4374 | fs_info->sectorsize); |
|---|
| 4375 | + clear_start = ALIGN(new_size, fs_info->sectorsize); |
|---|
| 4764 | 4376 | btrfs_set_file_extent_num_bytes(leaf, fi, |
|---|
| 4765 | 4377 | extent_num_bytes); |
|---|
| 4766 | 4378 | num_dec = (orig_num_bytes - |
|---|
| 4767 | 4379 | extent_num_bytes); |
|---|
| 4768 | | - if (test_bit(BTRFS_ROOT_REF_COWS, |
|---|
| 4380 | + if (test_bit(BTRFS_ROOT_SHAREABLE, |
|---|
| 4769 | 4381 | &root->state) && |
|---|
| 4770 | 4382 | extent_start != 0) |
|---|
| 4771 | 4383 | inode_sub_bytes(inode, num_dec); |
|---|
| .. | .. |
|---|
| 4781 | 4393 | num_dec = btrfs_file_extent_num_bytes(leaf, fi); |
|---|
| 4782 | 4394 | if (extent_start != 0) { |
|---|
| 4783 | 4395 | found_extent = 1; |
|---|
| 4784 | | - if (test_bit(BTRFS_ROOT_REF_COWS, |
|---|
| 4396 | + if (test_bit(BTRFS_ROOT_SHAREABLE, |
|---|
| 4785 | 4397 | &root->state)) |
|---|
| 4786 | 4398 | inode_sub_bytes(inode, num_dec); |
|---|
| 4787 | 4399 | } |
|---|
| 4788 | 4400 | } |
|---|
| 4401 | + clear_len = num_dec; |
|---|
| 4789 | 4402 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
|---|
| 4790 | 4403 | /* |
|---|
| 4791 | 4404 | * we can't truncate inline items that have had |
|---|
| .. | .. |
|---|
| 4799 | 4412 | |
|---|
| 4800 | 4413 | btrfs_set_file_extent_ram_bytes(leaf, fi, size); |
|---|
| 4801 | 4414 | size = btrfs_file_extent_calc_inline_size(size); |
|---|
| 4802 | | - btrfs_truncate_item(root->fs_info, path, size, 1); |
|---|
| 4415 | + btrfs_truncate_item(path, size, 1); |
|---|
| 4803 | 4416 | } else if (!del_item) { |
|---|
| 4804 | 4417 | /* |
|---|
| 4805 | 4418 | * We have to bail so the last_size is set to |
|---|
| .. | .. |
|---|
| 4807 | 4420 | */ |
|---|
| 4808 | 4421 | ret = NEED_TRUNCATE_BLOCK; |
|---|
| 4809 | 4422 | break; |
|---|
| 4423 | + } else { |
|---|
| 4424 | + /* |
|---|
| 4425 | + * Inline extents are special, we just treat |
|---|
| 4426 | + * them as a full sector worth in the file |
|---|
| 4427 | + * extent tree just for simplicity sake. |
|---|
| 4428 | + */ |
|---|
| 4429 | + clear_len = fs_info->sectorsize; |
|---|
| 4810 | 4430 | } |
|---|
| 4811 | 4431 | |
|---|
| 4812 | | - if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
|---|
| 4432 | + if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) |
|---|
| 4813 | 4433 | inode_sub_bytes(inode, item_end + 1 - new_size); |
|---|
| 4814 | 4434 | } |
|---|
| 4815 | 4435 | delete: |
|---|
| 4436 | + /* |
|---|
| 4437 | + * We use btrfs_truncate_inode_items() to clean up log trees for |
|---|
| 4438 | + * multiple fsyncs, and in this case we don't want to clear the |
|---|
| 4439 | + * file extent range because it's just the log. |
|---|
| 4440 | + */ |
|---|
| 4441 | + if (root == BTRFS_I(inode)->root) { |
|---|
| 4442 | + ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode), |
|---|
| 4443 | + clear_start, clear_len); |
|---|
| 4444 | + if (ret) { |
|---|
| 4445 | + btrfs_abort_transaction(trans, ret); |
|---|
| 4446 | + break; |
|---|
| 4447 | + } |
|---|
| 4448 | + } |
|---|
| 4449 | + |
|---|
| 4816 | 4450 | if (del_item) |
|---|
| 4817 | 4451 | last_size = found_key.offset; |
|---|
| 4818 | 4452 | else |
|---|
| .. | .. |
|---|
| 4836 | 4470 | should_throttle = false; |
|---|
| 4837 | 4471 | |
|---|
| 4838 | 4472 | if (found_extent && |
|---|
| 4839 | | - (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || |
|---|
| 4840 | | - root == fs_info->tree_root)) { |
|---|
| 4841 | | - btrfs_set_path_blocking(path); |
|---|
| 4473 | + root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
|---|
| 4474 | + struct btrfs_ref ref = { 0 }; |
|---|
| 4475 | + |
|---|
| 4842 | 4476 | bytes_deleted += extent_num_bytes; |
|---|
| 4843 | | - ret = btrfs_free_extent(trans, root, extent_start, |
|---|
| 4844 | | - extent_num_bytes, 0, |
|---|
| 4845 | | - btrfs_header_owner(leaf), |
|---|
| 4846 | | - ino, extent_offset); |
|---|
| 4477 | + |
|---|
| 4478 | + btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, |
|---|
| 4479 | + extent_start, extent_num_bytes, 0); |
|---|
| 4480 | + ref.real_root = root->root_key.objectid; |
|---|
| 4481 | + btrfs_init_data_ref(&ref, btrfs_header_owner(leaf), |
|---|
| 4482 | + ino, extent_offset); |
|---|
| 4483 | + ret = btrfs_free_extent(trans, &ref); |
|---|
| 4847 | 4484 | if (ret) { |
|---|
| 4848 | 4485 | btrfs_abort_transaction(trans, ret); |
|---|
| 4849 | 4486 | break; |
|---|
| 4850 | 4487 | } |
|---|
| 4851 | | - if (btrfs_should_throttle_delayed_refs(trans, fs_info)) |
|---|
| 4852 | | - btrfs_async_run_delayed_refs(fs_info, |
|---|
| 4853 | | - trans->delayed_ref_updates * 2, |
|---|
| 4854 | | - trans->transid, 0); |
|---|
| 4855 | 4488 | if (be_nice) { |
|---|
| 4856 | | - if (truncate_space_check(trans, root, |
|---|
| 4857 | | - extent_num_bytes)) { |
|---|
| 4858 | | - should_end = true; |
|---|
| 4859 | | - } |
|---|
| 4860 | | - if (btrfs_should_throttle_delayed_refs(trans, |
|---|
| 4861 | | - fs_info)) |
|---|
| 4489 | + if (btrfs_should_throttle_delayed_refs(trans)) |
|---|
| 4862 | 4490 | should_throttle = true; |
|---|
| 4863 | 4491 | } |
|---|
| 4864 | 4492 | } |
|---|
| .. | .. |
|---|
| 4868 | 4496 | |
|---|
| 4869 | 4497 | if (path->slots[0] == 0 || |
|---|
| 4870 | 4498 | path->slots[0] != pending_del_slot || |
|---|
| 4871 | | - should_throttle || should_end) { |
|---|
| 4499 | + should_throttle) { |
|---|
| 4872 | 4500 | if (pending_del_nr) { |
|---|
| 4873 | 4501 | ret = btrfs_del_items(trans, root, path, |
|---|
| 4874 | 4502 | pending_del_slot, |
|---|
| .. | .. |
|---|
| 4880 | 4508 | pending_del_nr = 0; |
|---|
| 4881 | 4509 | } |
|---|
| 4882 | 4510 | btrfs_release_path(path); |
|---|
| 4883 | | - if (should_throttle) { |
|---|
| 4884 | | - unsigned long updates = trans->delayed_ref_updates; |
|---|
| 4885 | | - if (updates) { |
|---|
| 4886 | | - trans->delayed_ref_updates = 0; |
|---|
| 4887 | | - ret = btrfs_run_delayed_refs(trans, |
|---|
| 4888 | | - updates * 2); |
|---|
| 4889 | | - if (ret) |
|---|
| 4890 | | - break; |
|---|
| 4891 | | - } |
|---|
| 4892 | | - } |
|---|
| 4511 | + |
|---|
| 4893 | 4512 | /* |
|---|
| 4894 | | - * if we failed to refill our space rsv, bail out |
|---|
| 4895 | | - * and let the transaction restart |
|---|
| 4513 | + * We can generate a lot of delayed refs, so we need to |
|---|
| 4514 | + * throttle every once and a while and make sure we're |
|---|
| 4515 | + * adding enough space to keep up with the work we are |
|---|
| 4516 | + * generating. Since we hold a transaction here we |
|---|
| 4517 | + * can't flush, and we don't want to FLUSH_LIMIT because |
|---|
| 4518 | + * we could have generated too many delayed refs to |
|---|
| 4519 | + * actually allocate, so just bail if we're short and |
|---|
| 4520 | + * let the normal reservation dance happen higher up. |
|---|
| 4896 | 4521 | */ |
|---|
| 4897 | | - if (should_end) { |
|---|
| 4898 | | - ret = -EAGAIN; |
|---|
| 4899 | | - break; |
|---|
| 4522 | + if (should_throttle) { |
|---|
| 4523 | + ret = btrfs_delayed_refs_rsv_refill(fs_info, |
|---|
| 4524 | + BTRFS_RESERVE_NO_FLUSH); |
|---|
| 4525 | + if (ret) { |
|---|
| 4526 | + ret = -EAGAIN; |
|---|
| 4527 | + break; |
|---|
| 4528 | + } |
|---|
| 4900 | 4529 | } |
|---|
| 4901 | 4530 | goto search_again; |
|---|
| 4902 | 4531 | } else { |
|---|
| .. | .. |
|---|
| 4918 | 4547 | ASSERT(last_size >= new_size); |
|---|
| 4919 | 4548 | if (!ret && last_size > new_size) |
|---|
| 4920 | 4549 | last_size = new_size; |
|---|
| 4921 | | - btrfs_ordered_update_i_size(inode, last_size, NULL); |
|---|
| 4550 | + btrfs_inode_safe_disk_i_size_write(inode, last_size); |
|---|
| 4551 | + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, |
|---|
| 4552 | + (u64)-1, &cached_state); |
|---|
| 4922 | 4553 | } |
|---|
| 4923 | 4554 | |
|---|
| 4924 | 4555 | btrfs_free_path(path); |
|---|
| 4925 | | - |
|---|
| 4926 | | - if (be_nice && bytes_deleted > SZ_32M && (ret >= 0 || ret == -EAGAIN)) { |
|---|
| 4927 | | - unsigned long updates = trans->delayed_ref_updates; |
|---|
| 4928 | | - int err; |
|---|
| 4929 | | - |
|---|
| 4930 | | - if (updates) { |
|---|
| 4931 | | - trans->delayed_ref_updates = 0; |
|---|
| 4932 | | - err = btrfs_run_delayed_refs(trans, updates * 2); |
|---|
| 4933 | | - if (err) |
|---|
| 4934 | | - ret = err; |
|---|
| 4935 | | - } |
|---|
| 4936 | | - } |
|---|
| 4937 | 4556 | return ret; |
|---|
| 4938 | 4557 | } |
|---|
| 4939 | 4558 | |
|---|
| .. | .. |
|---|
| 4958 | 4577 | struct extent_state *cached_state = NULL; |
|---|
| 4959 | 4578 | struct extent_changeset *data_reserved = NULL; |
|---|
| 4960 | 4579 | char *kaddr; |
|---|
| 4580 | + bool only_release_metadata = false; |
|---|
| 4961 | 4581 | u32 blocksize = fs_info->sectorsize; |
|---|
| 4962 | 4582 | pgoff_t index = from >> PAGE_SHIFT; |
|---|
| 4963 | 4583 | unsigned offset = from & (blocksize - 1); |
|---|
| 4964 | 4584 | struct page *page; |
|---|
| 4965 | 4585 | gfp_t mask = btrfs_alloc_write_mask(mapping); |
|---|
| 4586 | + size_t write_bytes = blocksize; |
|---|
| 4966 | 4587 | int ret = 0; |
|---|
| 4967 | 4588 | u64 block_start; |
|---|
| 4968 | 4589 | u64 block_end; |
|---|
| .. | .. |
|---|
| 4974 | 4595 | block_start = round_down(from, blocksize); |
|---|
| 4975 | 4596 | block_end = block_start + blocksize - 1; |
|---|
| 4976 | 4597 | |
|---|
| 4977 | | - ret = btrfs_delalloc_reserve_space(inode, &data_reserved, |
|---|
| 4978 | | - block_start, blocksize); |
|---|
| 4979 | | - if (ret) |
|---|
| 4598 | + ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, |
|---|
| 4599 | + block_start, blocksize); |
|---|
| 4600 | + if (ret < 0) { |
|---|
| 4601 | + if (btrfs_check_nocow_lock(BTRFS_I(inode), block_start, |
|---|
| 4602 | + &write_bytes) > 0) { |
|---|
| 4603 | + /* For nocow case, no need to reserve data space */ |
|---|
| 4604 | + only_release_metadata = true; |
|---|
| 4605 | + } else { |
|---|
| 4606 | + goto out; |
|---|
| 4607 | + } |
|---|
| 4608 | + } |
|---|
| 4609 | + ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), blocksize); |
|---|
| 4610 | + if (ret < 0) { |
|---|
| 4611 | + if (!only_release_metadata) |
|---|
| 4612 | + btrfs_free_reserved_data_space(BTRFS_I(inode), |
|---|
| 4613 | + data_reserved, block_start, blocksize); |
|---|
| 4980 | 4614 | goto out; |
|---|
| 4981 | | - |
|---|
| 4615 | + } |
|---|
| 4982 | 4616 | again: |
|---|
| 4983 | 4617 | page = find_or_create_page(mapping, index, mask); |
|---|
| 4984 | 4618 | if (!page) { |
|---|
| 4985 | | - btrfs_delalloc_release_space(inode, data_reserved, |
|---|
| 4619 | + btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, |
|---|
| 4986 | 4620 | block_start, blocksize, true); |
|---|
| 4987 | 4621 | btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize); |
|---|
| 4988 | 4622 | ret = -ENOMEM; |
|---|
| .. | .. |
|---|
| 5007 | 4641 | lock_extent_bits(io_tree, block_start, block_end, &cached_state); |
|---|
| 5008 | 4642 | set_page_extent_mapped(page); |
|---|
| 5009 | 4643 | |
|---|
| 5010 | | - ordered = btrfs_lookup_ordered_extent(inode, block_start); |
|---|
| 4644 | + ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), block_start); |
|---|
| 5011 | 4645 | if (ordered) { |
|---|
| 5012 | 4646 | unlock_extent_cached(io_tree, block_start, block_end, |
|---|
| 5013 | 4647 | &cached_state); |
|---|
| 5014 | 4648 | unlock_page(page); |
|---|
| 5015 | 4649 | put_page(page); |
|---|
| 5016 | | - btrfs_start_ordered_extent(inode, ordered, 1); |
|---|
| 4650 | + btrfs_start_ordered_extent(ordered, 1); |
|---|
| 5017 | 4651 | btrfs_put_ordered_extent(ordered); |
|---|
| 5018 | 4652 | goto again; |
|---|
| 5019 | 4653 | } |
|---|
| 5020 | 4654 | |
|---|
| 5021 | 4655 | clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end, |
|---|
| 5022 | | - EXTENT_DIRTY | EXTENT_DELALLOC | |
|---|
| 5023 | | - EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, |
|---|
| 5024 | | - 0, 0, &cached_state); |
|---|
| 4656 | + EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, |
|---|
| 4657 | + 0, 0, &cached_state); |
|---|
| 5025 | 4658 | |
|---|
| 5026 | | - ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0, |
|---|
| 5027 | | - &cached_state, 0); |
|---|
| 4659 | + ret = btrfs_set_extent_delalloc(BTRFS_I(inode), block_start, block_end, 0, |
|---|
| 4660 | + &cached_state); |
|---|
| 5028 | 4661 | if (ret) { |
|---|
| 5029 | 4662 | unlock_extent_cached(io_tree, block_start, block_end, |
|---|
| 5030 | 4663 | &cached_state); |
|---|
| .. | .. |
|---|
| 5048 | 4681 | set_page_dirty(page); |
|---|
| 5049 | 4682 | unlock_extent_cached(io_tree, block_start, block_end, &cached_state); |
|---|
| 5050 | 4683 | |
|---|
| 4684 | + if (only_release_metadata) |
|---|
| 4685 | + set_extent_bit(&BTRFS_I(inode)->io_tree, block_start, |
|---|
| 4686 | + block_end, EXTENT_NORESERVE, NULL, NULL, |
|---|
| 4687 | + GFP_NOFS); |
|---|
| 4688 | + |
|---|
| 5051 | 4689 | out_unlock: |
|---|
| 5052 | | - if (ret) |
|---|
| 5053 | | - btrfs_delalloc_release_space(inode, data_reserved, block_start, |
|---|
| 5054 | | - blocksize, true); |
|---|
| 4690 | + if (ret) { |
|---|
| 4691 | + if (only_release_metadata) |
|---|
| 4692 | + btrfs_delalloc_release_metadata(BTRFS_I(inode), |
|---|
| 4693 | + blocksize, true); |
|---|
| 4694 | + else |
|---|
| 4695 | + btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, |
|---|
| 4696 | + block_start, blocksize, true); |
|---|
| 4697 | + } |
|---|
| 5055 | 4698 | btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize); |
|---|
| 5056 | 4699 | unlock_page(page); |
|---|
| 5057 | 4700 | put_page(page); |
|---|
| 5058 | 4701 | out: |
|---|
| 4702 | + if (only_release_metadata) |
|---|
| 4703 | + btrfs_check_nocow_unlock(BTRFS_I(inode)); |
|---|
| 5059 | 4704 | extent_changeset_free(data_reserved); |
|---|
| 5060 | 4705 | return ret; |
|---|
| 5061 | 4706 | } |
|---|
| .. | .. |
|---|
| 5137 | 4782 | if (size <= hole_start) |
|---|
| 5138 | 4783 | return 0; |
|---|
| 5139 | 4784 | |
|---|
| 5140 | | - while (1) { |
|---|
| 5141 | | - struct btrfs_ordered_extent *ordered; |
|---|
| 5142 | | - |
|---|
| 5143 | | - lock_extent_bits(io_tree, hole_start, block_end - 1, |
|---|
| 5144 | | - &cached_state); |
|---|
| 5145 | | - ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), hole_start, |
|---|
| 5146 | | - block_end - hole_start); |
|---|
| 5147 | | - if (!ordered) |
|---|
| 5148 | | - break; |
|---|
| 5149 | | - unlock_extent_cached(io_tree, hole_start, block_end - 1, |
|---|
| 5150 | | - &cached_state); |
|---|
| 5151 | | - btrfs_start_ordered_extent(inode, ordered, 1); |
|---|
| 5152 | | - btrfs_put_ordered_extent(ordered); |
|---|
| 5153 | | - } |
|---|
| 5154 | | - |
|---|
| 4785 | + btrfs_lock_and_flush_ordered_range(BTRFS_I(inode), hole_start, |
|---|
| 4786 | + block_end - 1, &cached_state); |
|---|
| 5155 | 4787 | cur_offset = hole_start; |
|---|
| 5156 | 4788 | while (1) { |
|---|
| 5157 | 4789 | em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset, |
|---|
| 5158 | | - block_end - cur_offset, 0); |
|---|
| 4790 | + block_end - cur_offset); |
|---|
| 5159 | 4791 | if (IS_ERR(em)) { |
|---|
| 5160 | 4792 | err = PTR_ERR(em); |
|---|
| 5161 | 4793 | em = NULL; |
|---|
| .. | .. |
|---|
| 5163 | 4795 | } |
|---|
| 5164 | 4796 | last_byte = min(extent_map_end(em), block_end); |
|---|
| 5165 | 4797 | last_byte = ALIGN(last_byte, fs_info->sectorsize); |
|---|
| 4798 | + hole_size = last_byte - cur_offset; |
|---|
| 4799 | + |
|---|
| 5166 | 4800 | if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { |
|---|
| 5167 | 4801 | struct extent_map *hole_em; |
|---|
| 5168 | | - hole_size = last_byte - cur_offset; |
|---|
| 5169 | 4802 | |
|---|
| 5170 | 4803 | err = maybe_insert_hole(root, inode, cur_offset, |
|---|
| 5171 | 4804 | hole_size); |
|---|
| 5172 | 4805 | if (err) |
|---|
| 5173 | 4806 | break; |
|---|
| 4807 | + |
|---|
| 4808 | + err = btrfs_inode_set_file_extent_range(BTRFS_I(inode), |
|---|
| 4809 | + cur_offset, hole_size); |
|---|
| 4810 | + if (err) |
|---|
| 4811 | + break; |
|---|
| 4812 | + |
|---|
| 5174 | 4813 | btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset, |
|---|
| 5175 | 4814 | cur_offset + hole_size - 1, 0); |
|---|
| 5176 | 4815 | hole_em = alloc_extent_map(); |
|---|
| .. | .. |
|---|
| 5187 | 4826 | hole_em->block_len = 0; |
|---|
| 5188 | 4827 | hole_em->orig_block_len = 0; |
|---|
| 5189 | 4828 | hole_em->ram_bytes = hole_size; |
|---|
| 5190 | | - hole_em->bdev = fs_info->fs_devices->latest_bdev; |
|---|
| 5191 | 4829 | hole_em->compress_type = BTRFS_COMPRESS_NONE; |
|---|
| 5192 | 4830 | hole_em->generation = fs_info->generation; |
|---|
| 5193 | 4831 | |
|---|
| .. | .. |
|---|
| 5203 | 4841 | hole_size - 1, 0); |
|---|
| 5204 | 4842 | } |
|---|
| 5205 | 4843 | free_extent_map(hole_em); |
|---|
| 4844 | + } else { |
|---|
| 4845 | + err = btrfs_inode_set_file_extent_range(BTRFS_I(inode), |
|---|
| 4846 | + cur_offset, hole_size); |
|---|
| 4847 | + if (err) |
|---|
| 4848 | + break; |
|---|
| 5206 | 4849 | } |
|---|
| 5207 | 4850 | next: |
|---|
| 5208 | 4851 | free_extent_map(em); |
|---|
| .. | .. |
|---|
| 5246 | 4889 | * truncation, it must capture all writes that happened before |
|---|
| 5247 | 4890 | * this truncation. |
|---|
| 5248 | 4891 | */ |
|---|
| 5249 | | - btrfs_wait_for_snapshot_creation(root); |
|---|
| 4892 | + btrfs_drew_write_lock(&root->snapshot_lock); |
|---|
| 5250 | 4893 | ret = btrfs_cont_expand(inode, oldsize, newsize); |
|---|
| 5251 | 4894 | if (ret) { |
|---|
| 5252 | | - btrfs_end_write_no_snapshotting(root); |
|---|
| 4895 | + btrfs_drew_write_unlock(&root->snapshot_lock); |
|---|
| 5253 | 4896 | return ret; |
|---|
| 5254 | 4897 | } |
|---|
| 5255 | 4898 | |
|---|
| 5256 | 4899 | trans = btrfs_start_transaction(root, 1); |
|---|
| 5257 | 4900 | if (IS_ERR(trans)) { |
|---|
| 5258 | | - btrfs_end_write_no_snapshotting(root); |
|---|
| 4901 | + btrfs_drew_write_unlock(&root->snapshot_lock); |
|---|
| 5259 | 4902 | return PTR_ERR(trans); |
|---|
| 5260 | 4903 | } |
|---|
| 5261 | 4904 | |
|---|
| 5262 | 4905 | i_size_write(inode, newsize); |
|---|
| 5263 | | - btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); |
|---|
| 4906 | + btrfs_inode_safe_disk_i_size_write(inode, 0); |
|---|
| 5264 | 4907 | pagecache_isize_extended(inode, oldsize, newsize); |
|---|
| 5265 | 4908 | ret = btrfs_update_inode(trans, root, inode); |
|---|
| 5266 | | - btrfs_end_write_no_snapshotting(root); |
|---|
| 4909 | + btrfs_drew_write_unlock(&root->snapshot_lock); |
|---|
| 5267 | 4910 | btrfs_end_transaction(trans); |
|---|
| 5268 | 4911 | } else { |
|---|
| 5269 | 4912 | |
|---|
| 5270 | 4913 | /* |
|---|
| 5271 | 4914 | * We're truncating a file that used to have good data down to |
|---|
| 5272 | | - * zero. Make sure it gets into the ordered flush list so that |
|---|
| 5273 | | - * any new writes get down to disk quickly. |
|---|
| 4915 | + * zero. Make sure any new writes to the file get on disk |
|---|
| 4916 | + * on close. |
|---|
| 5274 | 4917 | */ |
|---|
| 5275 | 4918 | if (newsize == 0) |
|---|
| 5276 | | - set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, |
|---|
| 4919 | + set_bit(BTRFS_INODE_FLUSH_ON_CLOSE, |
|---|
| 5277 | 4920 | &BTRFS_I(inode)->runtime_flags); |
|---|
| 5278 | 4921 | |
|---|
| 5279 | 4922 | truncate_setsize(inode, newsize); |
|---|
| 5280 | 4923 | |
|---|
| 5281 | | - /* Disable nonlocked read DIO to avoid the end less truncate */ |
|---|
| 5282 | | - btrfs_inode_block_unlocked_dio(BTRFS_I(inode)); |
|---|
| 5283 | 4924 | inode_dio_wait(inode); |
|---|
| 5284 | | - btrfs_inode_resume_unlocked_dio(BTRFS_I(inode)); |
|---|
| 5285 | 4925 | |
|---|
| 5286 | 4926 | ret = btrfs_truncate(inode, newsize == oldsize); |
|---|
| 5287 | 4927 | if (ret && inode->i_nlink) { |
|---|
| .. | .. |
|---|
| 5356 | 4996 | truncate_inode_pages_final(&inode->i_data); |
|---|
| 5357 | 4997 | |
|---|
| 5358 | 4998 | write_lock(&map_tree->lock); |
|---|
| 5359 | | - while (!RB_EMPTY_ROOT(&map_tree->map)) { |
|---|
| 4999 | + while (!RB_EMPTY_ROOT(&map_tree->map.rb_root)) { |
|---|
| 5360 | 5000 | struct extent_map *em; |
|---|
| 5361 | 5001 | |
|---|
| 5362 | | - node = rb_first(&map_tree->map); |
|---|
| 5002 | + node = rb_first_cached(&map_tree->map); |
|---|
| 5363 | 5003 | em = rb_entry(node, struct extent_map, rb_node); |
|---|
| 5364 | 5004 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
|---|
| 5365 | 5005 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); |
|---|
| .. | .. |
|---|
| 5375 | 5015 | |
|---|
| 5376 | 5016 | /* |
|---|
| 5377 | 5017 | * Keep looping until we have no more ranges in the io tree. |
|---|
| 5378 | | - * We can have ongoing bios started by readpages (called from readahead) |
|---|
| 5379 | | - * that have their endio callback (extent_io.c:end_bio_extent_readpage) |
|---|
| 5018 | + * We can have ongoing bios started by readahead that have |
|---|
| 5019 | + * their endio callback (extent_io.c:end_bio_extent_readpage) |
|---|
| 5380 | 5020 | * still in progress (unlocked the pages in the bio but did not yet |
|---|
| 5381 | 5021 | * unlocked the ranges in the io tree). Therefore this means some |
|---|
| 5382 | 5022 | * ranges can still be locked and eviction started because before |
|---|
| .. | .. |
|---|
| 5415 | 5055 | * Note, end is the bytenr of last byte, so we need + 1 here. |
|---|
| 5416 | 5056 | */ |
|---|
| 5417 | 5057 | if (state_flags & EXTENT_DELALLOC) |
|---|
| 5418 | | - btrfs_qgroup_free_data(inode, NULL, start, end - start + 1); |
|---|
| 5058 | + btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start, |
|---|
| 5059 | + end - start + 1); |
|---|
| 5419 | 5060 | |
|---|
| 5420 | 5061 | clear_extent_bit(io_tree, start, end, |
|---|
| 5421 | | - EXTENT_LOCKED | EXTENT_DIRTY | |
|---|
| 5422 | | - EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | |
|---|
| 5423 | | - EXTENT_DEFRAG, 1, 1, &cached_state); |
|---|
| 5062 | + EXTENT_LOCKED | EXTENT_DELALLOC | |
|---|
| 5063 | + EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1, |
|---|
| 5064 | + &cached_state); |
|---|
| 5424 | 5065 | |
|---|
| 5425 | 5066 | cond_resched(); |
|---|
| 5426 | 5067 | spin_lock(&io_tree->lock); |
|---|
| .. | .. |
|---|
| 5429 | 5070 | } |
|---|
| 5430 | 5071 | |
|---|
| 5431 | 5072 | static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root, |
|---|
| 5432 | | - struct btrfs_block_rsv *rsv, |
|---|
| 5433 | | - u64 min_size) |
|---|
| 5073 | + struct btrfs_block_rsv *rsv) |
|---|
| 5434 | 5074 | { |
|---|
| 5435 | 5075 | struct btrfs_fs_info *fs_info = root->fs_info; |
|---|
| 5436 | 5076 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; |
|---|
| 5437 | | - int failures = 0; |
|---|
| 5077 | + struct btrfs_trans_handle *trans; |
|---|
| 5078 | + u64 delayed_refs_extra = btrfs_calc_insert_metadata_size(fs_info, 1); |
|---|
| 5079 | + int ret; |
|---|
| 5438 | 5080 | |
|---|
| 5439 | | - for (;;) { |
|---|
| 5440 | | - struct btrfs_trans_handle *trans; |
|---|
| 5441 | | - int ret; |
|---|
| 5442 | | - |
|---|
| 5443 | | - ret = btrfs_block_rsv_refill(root, rsv, min_size, |
|---|
| 5444 | | - BTRFS_RESERVE_FLUSH_LIMIT); |
|---|
| 5445 | | - |
|---|
| 5446 | | - if (ret && ++failures > 2) { |
|---|
| 5447 | | - btrfs_warn(fs_info, |
|---|
| 5448 | | - "could not allocate space for a delete; will truncate on mount"); |
|---|
| 5449 | | - return ERR_PTR(-ENOSPC); |
|---|
| 5450 | | - } |
|---|
| 5451 | | - |
|---|
| 5452 | | - trans = btrfs_join_transaction(root); |
|---|
| 5453 | | - if (IS_ERR(trans) || !ret) |
|---|
| 5454 | | - return trans; |
|---|
| 5455 | | - |
|---|
| 5081 | + /* |
|---|
| 5082 | + * Eviction should be taking place at some place safe because of our |
|---|
| 5083 | + * delayed iputs. However the normal flushing code will run delayed |
|---|
| 5084 | + * iputs, so we cannot use FLUSH_ALL otherwise we'll deadlock. |
|---|
| 5085 | + * |
|---|
| 5086 | + * We reserve the delayed_refs_extra here again because we can't use |
|---|
| 5087 | + * btrfs_start_transaction(root, 0) for the same deadlocky reason as |
|---|
| 5088 | + * above. We reserve our extra bit here because we generate a ton of |
|---|
| 5089 | + * delayed refs activity by truncating. |
|---|
| 5090 | + * |
|---|
| 5091 | + * If we cannot make our reservation we'll attempt to steal from the |
|---|
| 5092 | + * global reserve, because we really want to be able to free up space. |
|---|
| 5093 | + */ |
|---|
| 5094 | + ret = btrfs_block_rsv_refill(root, rsv, rsv->size + delayed_refs_extra, |
|---|
| 5095 | + BTRFS_RESERVE_FLUSH_EVICT); |
|---|
| 5096 | + if (ret) { |
|---|
| 5456 | 5097 | /* |
|---|
| 5457 | 5098 | * Try to steal from the global reserve if there is space for |
|---|
| 5458 | 5099 | * it. |
|---|
| 5459 | 5100 | */ |
|---|
| 5460 | | - if (!btrfs_check_space_for_delayed_refs(trans, fs_info) && |
|---|
| 5461 | | - !btrfs_block_rsv_migrate(global_rsv, rsv, min_size, 0)) |
|---|
| 5462 | | - return trans; |
|---|
| 5463 | | - |
|---|
| 5464 | | - /* If not, commit and try again. */ |
|---|
| 5465 | | - ret = btrfs_commit_transaction(trans); |
|---|
| 5466 | | - if (ret) |
|---|
| 5467 | | - return ERR_PTR(ret); |
|---|
| 5101 | + if (btrfs_check_space_for_delayed_refs(fs_info) || |
|---|
| 5102 | + btrfs_block_rsv_migrate(global_rsv, rsv, rsv->size, 0)) { |
|---|
| 5103 | + btrfs_warn(fs_info, |
|---|
| 5104 | + "could not allocate space for delete; will truncate on mount"); |
|---|
| 5105 | + return ERR_PTR(-ENOSPC); |
|---|
| 5106 | + } |
|---|
| 5107 | + delayed_refs_extra = 0; |
|---|
| 5468 | 5108 | } |
|---|
| 5109 | + |
|---|
| 5110 | + trans = btrfs_join_transaction(root); |
|---|
| 5111 | + if (IS_ERR(trans)) |
|---|
| 5112 | + return trans; |
|---|
| 5113 | + |
|---|
| 5114 | + if (delayed_refs_extra) { |
|---|
| 5115 | + trans->block_rsv = &fs_info->trans_block_rsv; |
|---|
| 5116 | + trans->bytes_reserved = delayed_refs_extra; |
|---|
| 5117 | + btrfs_block_rsv_migrate(rsv, trans->block_rsv, |
|---|
| 5118 | + delayed_refs_extra, 1); |
|---|
| 5119 | + } |
|---|
| 5120 | + return trans; |
|---|
| 5469 | 5121 | } |
|---|
| 5470 | 5122 | |
|---|
| 5471 | 5123 | void btrfs_evict_inode(struct inode *inode) |
|---|
| .. | .. |
|---|
| 5474 | 5126 | struct btrfs_trans_handle *trans; |
|---|
| 5475 | 5127 | struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 5476 | 5128 | struct btrfs_block_rsv *rsv; |
|---|
| 5477 | | - u64 min_size; |
|---|
| 5478 | 5129 | int ret; |
|---|
| 5479 | 5130 | |
|---|
| 5480 | 5131 | trace_btrfs_inode_evict(inode); |
|---|
| .. | .. |
|---|
| 5483 | 5134 | clear_inode(inode); |
|---|
| 5484 | 5135 | return; |
|---|
| 5485 | 5136 | } |
|---|
| 5486 | | - |
|---|
| 5487 | | - min_size = btrfs_calc_trunc_metadata_size(fs_info, 1); |
|---|
| 5488 | 5137 | |
|---|
| 5489 | 5138 | evict_inode_truncate_pages(inode); |
|---|
| 5490 | 5139 | |
|---|
| .. | .. |
|---|
| 5496 | 5145 | |
|---|
| 5497 | 5146 | if (is_bad_inode(inode)) |
|---|
| 5498 | 5147 | goto no_delete; |
|---|
| 5499 | | - /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ |
|---|
| 5500 | | - if (!special_file(inode->i_mode)) |
|---|
| 5501 | | - btrfs_wait_ordered_range(inode, 0, (u64)-1); |
|---|
| 5502 | 5148 | |
|---|
| 5503 | 5149 | btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1); |
|---|
| 5504 | 5150 | |
|---|
| .. | .. |
|---|
| 5518 | 5164 | rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP); |
|---|
| 5519 | 5165 | if (!rsv) |
|---|
| 5520 | 5166 | goto no_delete; |
|---|
| 5521 | | - rsv->size = min_size; |
|---|
| 5167 | + rsv->size = btrfs_calc_metadata_size(fs_info, 1); |
|---|
| 5522 | 5168 | rsv->failfast = 1; |
|---|
| 5523 | 5169 | |
|---|
| 5524 | 5170 | btrfs_i_size_write(BTRFS_I(inode), 0); |
|---|
| 5525 | 5171 | |
|---|
| 5526 | 5172 | while (1) { |
|---|
| 5527 | | - trans = evict_refill_and_join(root, rsv, min_size); |
|---|
| 5173 | + trans = evict_refill_and_join(root, rsv); |
|---|
| 5528 | 5174 | if (IS_ERR(trans)) |
|---|
| 5529 | 5175 | goto free_rsv; |
|---|
| 5530 | 5176 | |
|---|
| .. | .. |
|---|
| 5549 | 5195 | * If it turns out that we are dropping too many of these, we might want |
|---|
| 5550 | 5196 | * to add a mechanism for retrying these after a commit. |
|---|
| 5551 | 5197 | */ |
|---|
| 5552 | | - trans = evict_refill_and_join(root, rsv, min_size); |
|---|
| 5198 | + trans = evict_refill_and_join(root, rsv); |
|---|
| 5553 | 5199 | if (!IS_ERR(trans)) { |
|---|
| 5554 | 5200 | trans->block_rsv = rsv; |
|---|
| 5555 | 5201 | btrfs_orphan_del(trans, BTRFS_I(inode)); |
|---|
| .. | .. |
|---|
| 5596 | 5242 | |
|---|
| 5597 | 5243 | di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)), |
|---|
| 5598 | 5244 | name, namelen, 0); |
|---|
| 5599 | | - if (!di) { |
|---|
| 5600 | | - ret = -ENOENT; |
|---|
| 5601 | | - goto out; |
|---|
| 5602 | | - } |
|---|
| 5603 | | - if (IS_ERR(di)) { |
|---|
| 5604 | | - ret = PTR_ERR(di); |
|---|
| 5245 | + if (IS_ERR_OR_NULL(di)) { |
|---|
| 5246 | + ret = di ? PTR_ERR(di) : -ENOENT; |
|---|
| 5605 | 5247 | goto out; |
|---|
| 5606 | 5248 | } |
|---|
| 5607 | 5249 | |
|---|
| .. | .. |
|---|
| 5672 | 5314 | |
|---|
| 5673 | 5315 | btrfs_release_path(path); |
|---|
| 5674 | 5316 | |
|---|
| 5675 | | - new_root = btrfs_read_fs_root_no_name(fs_info, location); |
|---|
| 5317 | + new_root = btrfs_get_fs_root(fs_info, location->objectid, true); |
|---|
| 5676 | 5318 | if (IS_ERR(new_root)) { |
|---|
| 5677 | 5319 | err = PTR_ERR(new_root); |
|---|
| 5678 | 5320 | goto out; |
|---|
| .. | .. |
|---|
| 5724 | 5366 | spin_unlock(&root->inode_lock); |
|---|
| 5725 | 5367 | } |
|---|
| 5726 | 5368 | |
|---|
| 5727 | | -static void inode_tree_del(struct inode *inode) |
|---|
| 5369 | +static void inode_tree_del(struct btrfs_inode *inode) |
|---|
| 5728 | 5370 | { |
|---|
| 5729 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 5371 | + struct btrfs_root *root = inode->root; |
|---|
| 5730 | 5372 | int empty = 0; |
|---|
| 5731 | 5373 | |
|---|
| 5732 | 5374 | spin_lock(&root->inode_lock); |
|---|
| 5733 | | - if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { |
|---|
| 5734 | | - rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); |
|---|
| 5735 | | - RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); |
|---|
| 5375 | + if (!RB_EMPTY_NODE(&inode->rb_node)) { |
|---|
| 5376 | + rb_erase(&inode->rb_node, &root->inode_tree); |
|---|
| 5377 | + RB_CLEAR_NODE(&inode->rb_node); |
|---|
| 5736 | 5378 | empty = RB_EMPTY_ROOT(&root->inode_tree); |
|---|
| 5737 | 5379 | } |
|---|
| 5738 | 5380 | spin_unlock(&root->inode_lock); |
|---|
| .. | .. |
|---|
| 5750 | 5392 | static int btrfs_init_locked_inode(struct inode *inode, void *p) |
|---|
| 5751 | 5393 | { |
|---|
| 5752 | 5394 | struct btrfs_iget_args *args = p; |
|---|
| 5753 | | - inode->i_ino = args->location->objectid; |
|---|
| 5754 | | - memcpy(&BTRFS_I(inode)->location, args->location, |
|---|
| 5755 | | - sizeof(*args->location)); |
|---|
| 5756 | | - BTRFS_I(inode)->root = args->root; |
|---|
| 5395 | + |
|---|
| 5396 | + inode->i_ino = args->ino; |
|---|
| 5397 | + BTRFS_I(inode)->location.objectid = args->ino; |
|---|
| 5398 | + BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; |
|---|
| 5399 | + BTRFS_I(inode)->location.offset = 0; |
|---|
| 5400 | + BTRFS_I(inode)->root = btrfs_grab_root(args->root); |
|---|
| 5401 | + BUG_ON(args->root && !BTRFS_I(inode)->root); |
|---|
| 5757 | 5402 | return 0; |
|---|
| 5758 | 5403 | } |
|---|
| 5759 | 5404 | |
|---|
| 5760 | 5405 | static int btrfs_find_actor(struct inode *inode, void *opaque) |
|---|
| 5761 | 5406 | { |
|---|
| 5762 | 5407 | struct btrfs_iget_args *args = opaque; |
|---|
| 5763 | | - return args->location->objectid == BTRFS_I(inode)->location.objectid && |
|---|
| 5408 | + |
|---|
| 5409 | + return args->ino == BTRFS_I(inode)->location.objectid && |
|---|
| 5764 | 5410 | args->root == BTRFS_I(inode)->root; |
|---|
| 5765 | 5411 | } |
|---|
| 5766 | 5412 | |
|---|
| 5767 | | -static struct inode *btrfs_iget_locked(struct super_block *s, |
|---|
| 5768 | | - struct btrfs_key *location, |
|---|
| 5413 | +static struct inode *btrfs_iget_locked(struct super_block *s, u64 ino, |
|---|
| 5769 | 5414 | struct btrfs_root *root) |
|---|
| 5770 | 5415 | { |
|---|
| 5771 | 5416 | struct inode *inode; |
|---|
| 5772 | 5417 | struct btrfs_iget_args args; |
|---|
| 5773 | | - unsigned long hashval = btrfs_inode_hash(location->objectid, root); |
|---|
| 5418 | + unsigned long hashval = btrfs_inode_hash(ino, root); |
|---|
| 5774 | 5419 | |
|---|
| 5775 | | - args.location = location; |
|---|
| 5420 | + args.ino = ino; |
|---|
| 5776 | 5421 | args.root = root; |
|---|
| 5777 | 5422 | |
|---|
| 5778 | 5423 | inode = iget5_locked(s, hashval, btrfs_find_actor, |
|---|
| .. | .. |
|---|
| 5781 | 5426 | return inode; |
|---|
| 5782 | 5427 | } |
|---|
| 5783 | 5428 | |
|---|
| 5784 | | -/* Get an inode object given its location and corresponding root. |
|---|
| 5785 | | - * Returns in *is_new if the inode was read from disk |
|---|
| 5429 | +/* |
|---|
| 5430 | + * Get an inode object given its inode number and corresponding root. |
|---|
| 5431 | + * Path can be preallocated to prevent recursing back to iget through |
|---|
| 5432 | + * allocator. NULL is also valid but may require an additional allocation |
|---|
| 5433 | + * later. |
|---|
| 5786 | 5434 | */ |
|---|
| 5787 | | -struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location, |
|---|
| 5788 | | - struct btrfs_root *root, int *new, |
|---|
| 5789 | | - struct btrfs_path *path) |
|---|
| 5435 | +struct inode *btrfs_iget_path(struct super_block *s, u64 ino, |
|---|
| 5436 | + struct btrfs_root *root, struct btrfs_path *path) |
|---|
| 5790 | 5437 | { |
|---|
| 5791 | 5438 | struct inode *inode; |
|---|
| 5792 | 5439 | |
|---|
| 5793 | | - inode = btrfs_iget_locked(s, location, root); |
|---|
| 5440 | + inode = btrfs_iget_locked(s, ino, root); |
|---|
| 5794 | 5441 | if (!inode) |
|---|
| 5795 | 5442 | return ERR_PTR(-ENOMEM); |
|---|
| 5796 | 5443 | |
|---|
| .. | .. |
|---|
| 5801 | 5448 | if (!ret) { |
|---|
| 5802 | 5449 | inode_tree_add(inode); |
|---|
| 5803 | 5450 | unlock_new_inode(inode); |
|---|
| 5804 | | - if (new) |
|---|
| 5805 | | - *new = 1; |
|---|
| 5806 | 5451 | } else { |
|---|
| 5807 | 5452 | iget_failed(inode); |
|---|
| 5808 | 5453 | /* |
|---|
| .. | .. |
|---|
| 5819 | 5464 | return inode; |
|---|
| 5820 | 5465 | } |
|---|
| 5821 | 5466 | |
|---|
| 5822 | | -struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, |
|---|
| 5823 | | - struct btrfs_root *root, int *new) |
|---|
| 5467 | +struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root) |
|---|
| 5824 | 5468 | { |
|---|
| 5825 | | - return btrfs_iget_path(s, location, root, new, NULL); |
|---|
| 5469 | + return btrfs_iget_path(s, ino, root, NULL); |
|---|
| 5826 | 5470 | } |
|---|
| 5827 | 5471 | |
|---|
| 5828 | 5472 | static struct inode *new_simple_dir(struct super_block *s, |
|---|
| .. | .. |
|---|
| 5834 | 5478 | if (!inode) |
|---|
| 5835 | 5479 | return ERR_PTR(-ENOMEM); |
|---|
| 5836 | 5480 | |
|---|
| 5837 | | - BTRFS_I(inode)->root = root; |
|---|
| 5481 | + BTRFS_I(inode)->root = btrfs_grab_root(root); |
|---|
| 5838 | 5482 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); |
|---|
| 5839 | 5483 | set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags); |
|---|
| 5840 | 5484 | |
|---|
| 5841 | 5485 | inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; |
|---|
| 5842 | | - inode->i_op = &btrfs_dir_ro_inode_operations; |
|---|
| 5486 | + /* |
|---|
| 5487 | + * We only need lookup, the rest is read-only and there's no inode |
|---|
| 5488 | + * associated with the dentry |
|---|
| 5489 | + */ |
|---|
| 5490 | + inode->i_op = &simple_dir_inode_operations; |
|---|
| 5843 | 5491 | inode->i_opflags &= ~IOP_XATTR; |
|---|
| 5844 | 5492 | inode->i_fop = &simple_dir_operations; |
|---|
| 5845 | 5493 | inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; |
|---|
| .. | .. |
|---|
| 5853 | 5501 | |
|---|
| 5854 | 5502 | static inline u8 btrfs_inode_type(struct inode *inode) |
|---|
| 5855 | 5503 | { |
|---|
| 5856 | | - return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; |
|---|
| 5504 | + /* |
|---|
| 5505 | + * Compile-time asserts that generic FT_* types still match |
|---|
| 5506 | + * BTRFS_FT_* types |
|---|
| 5507 | + */ |
|---|
| 5508 | + BUILD_BUG_ON(BTRFS_FT_UNKNOWN != FT_UNKNOWN); |
|---|
| 5509 | + BUILD_BUG_ON(BTRFS_FT_REG_FILE != FT_REG_FILE); |
|---|
| 5510 | + BUILD_BUG_ON(BTRFS_FT_DIR != FT_DIR); |
|---|
| 5511 | + BUILD_BUG_ON(BTRFS_FT_CHRDEV != FT_CHRDEV); |
|---|
| 5512 | + BUILD_BUG_ON(BTRFS_FT_BLKDEV != FT_BLKDEV); |
|---|
| 5513 | + BUILD_BUG_ON(BTRFS_FT_FIFO != FT_FIFO); |
|---|
| 5514 | + BUILD_BUG_ON(BTRFS_FT_SOCK != FT_SOCK); |
|---|
| 5515 | + BUILD_BUG_ON(BTRFS_FT_SYMLINK != FT_SYMLINK); |
|---|
| 5516 | + |
|---|
| 5517 | + return fs_umode_to_ftype(inode->i_mode); |
|---|
| 5857 | 5518 | } |
|---|
| 5858 | 5519 | |
|---|
| 5859 | 5520 | struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) |
|---|
| .. | .. |
|---|
| 5864 | 5525 | struct btrfs_root *sub_root = root; |
|---|
| 5865 | 5526 | struct btrfs_key location; |
|---|
| 5866 | 5527 | u8 di_type = 0; |
|---|
| 5867 | | - int index; |
|---|
| 5868 | 5528 | int ret = 0; |
|---|
| 5869 | 5529 | |
|---|
| 5870 | 5530 | if (dentry->d_name.len > BTRFS_NAME_LEN) |
|---|
| .. | .. |
|---|
| 5875 | 5535 | return ERR_PTR(ret); |
|---|
| 5876 | 5536 | |
|---|
| 5877 | 5537 | if (location.type == BTRFS_INODE_ITEM_KEY) { |
|---|
| 5878 | | - inode = btrfs_iget(dir->i_sb, &location, root, NULL); |
|---|
| 5538 | + inode = btrfs_iget(dir->i_sb, location.objectid, root); |
|---|
| 5879 | 5539 | if (IS_ERR(inode)) |
|---|
| 5880 | 5540 | return inode; |
|---|
| 5881 | 5541 | |
|---|
| .. | .. |
|---|
| 5891 | 5551 | return inode; |
|---|
| 5892 | 5552 | } |
|---|
| 5893 | 5553 | |
|---|
| 5894 | | - index = srcu_read_lock(&fs_info->subvol_srcu); |
|---|
| 5895 | 5554 | ret = fixup_tree_root_location(fs_info, dir, dentry, |
|---|
| 5896 | 5555 | &location, &sub_root); |
|---|
| 5897 | 5556 | if (ret < 0) { |
|---|
| .. | .. |
|---|
| 5900 | 5559 | else |
|---|
| 5901 | 5560 | inode = new_simple_dir(dir->i_sb, &location, sub_root); |
|---|
| 5902 | 5561 | } else { |
|---|
| 5903 | | - inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL); |
|---|
| 5562 | + inode = btrfs_iget(dir->i_sb, location.objectid, sub_root); |
|---|
| 5904 | 5563 | } |
|---|
| 5905 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
|---|
| 5564 | + if (root != sub_root) |
|---|
| 5565 | + btrfs_put_root(sub_root); |
|---|
| 5906 | 5566 | |
|---|
| 5907 | 5567 | if (!IS_ERR(inode) && root != sub_root) { |
|---|
| 5908 | 5568 | down_read(&fs_info->cleanup_work_sem); |
|---|
| .. | .. |
|---|
| 5940 | 5600 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, |
|---|
| 5941 | 5601 | unsigned int flags) |
|---|
| 5942 | 5602 | { |
|---|
| 5943 | | - struct inode *inode; |
|---|
| 5603 | + struct inode *inode = btrfs_lookup_dentry(dir, dentry); |
|---|
| 5944 | 5604 | |
|---|
| 5945 | | - inode = btrfs_lookup_dentry(dir, dentry); |
|---|
| 5946 | | - if (IS_ERR(inode)) { |
|---|
| 5947 | | - if (PTR_ERR(inode) == -ENOENT) |
|---|
| 5948 | | - inode = NULL; |
|---|
| 5949 | | - else |
|---|
| 5950 | | - return ERR_CAST(inode); |
|---|
| 5951 | | - } |
|---|
| 5952 | | - |
|---|
| 5605 | + if (inode == ERR_PTR(-ENOENT)) |
|---|
| 5606 | + inode = NULL; |
|---|
| 5953 | 5607 | return d_splice_alias(inode, dentry); |
|---|
| 5954 | 5608 | } |
|---|
| 5955 | | - |
|---|
| 5956 | | -unsigned char btrfs_filetype_table[] = { |
|---|
| 5957 | | - DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK |
|---|
| 5958 | | -}; |
|---|
| 5959 | 5609 | |
|---|
| 5960 | 5610 | /* |
|---|
| 5961 | 5611 | * All this infrastructure exists because dir_emit can fault, and we are holding |
|---|
| .. | .. |
|---|
| 6095 | 5745 | name_ptr = (char *)(entry + 1); |
|---|
| 6096 | 5746 | read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1), |
|---|
| 6097 | 5747 | name_len); |
|---|
| 6098 | | - put_unaligned(btrfs_filetype_table[btrfs_dir_type(leaf, di)], |
|---|
| 5748 | + put_unaligned(fs_ftype_to_dtype(btrfs_dir_type(leaf, di)), |
|---|
| 6099 | 5749 | &entry->type); |
|---|
| 6100 | 5750 | btrfs_dir_item_key_to_cpu(leaf, di, &location); |
|---|
| 6101 | 5751 | put_unaligned(location.objectid, &entry->ino); |
|---|
| .. | .. |
|---|
| 6167 | 5817 | return PTR_ERR(trans); |
|---|
| 6168 | 5818 | |
|---|
| 6169 | 5819 | ret = btrfs_update_inode(trans, root, inode); |
|---|
| 6170 | | - if (ret && ret == -ENOSPC) { |
|---|
| 5820 | + if (ret && (ret == -ENOSPC || ret == -EDQUOT)) { |
|---|
| 6171 | 5821 | /* whoops, lets try again with the full transaction */ |
|---|
| 6172 | 5822 | btrfs_end_transaction(trans); |
|---|
| 6173 | 5823 | trans = btrfs_start_transaction(root, 1); |
|---|
| .. | .. |
|---|
| 6290 | 5940 | static int btrfs_insert_inode_locked(struct inode *inode) |
|---|
| 6291 | 5941 | { |
|---|
| 6292 | 5942 | struct btrfs_iget_args args; |
|---|
| 6293 | | - args.location = &BTRFS_I(inode)->location; |
|---|
| 5943 | + |
|---|
| 5944 | + args.ino = BTRFS_I(inode)->location.objectid; |
|---|
| 6294 | 5945 | args.root = BTRFS_I(inode)->root; |
|---|
| 6295 | 5946 | |
|---|
| 6296 | 5947 | return insert_inode_locked4(inode, |
|---|
| .. | .. |
|---|
| 6346 | 5997 | u32 sizes[2]; |
|---|
| 6347 | 5998 | int nitems = name ? 2 : 1; |
|---|
| 6348 | 5999 | unsigned long ptr; |
|---|
| 6000 | + unsigned int nofs_flag; |
|---|
| 6349 | 6001 | int ret; |
|---|
| 6350 | 6002 | |
|---|
| 6351 | 6003 | path = btrfs_alloc_path(); |
|---|
| 6352 | 6004 | if (!path) |
|---|
| 6353 | 6005 | return ERR_PTR(-ENOMEM); |
|---|
| 6354 | 6006 | |
|---|
| 6007 | + nofs_flag = memalloc_nofs_save(); |
|---|
| 6355 | 6008 | inode = new_inode(fs_info->sb); |
|---|
| 6009 | + memalloc_nofs_restore(nofs_flag); |
|---|
| 6356 | 6010 | if (!inode) { |
|---|
| 6357 | 6011 | btrfs_free_path(path); |
|---|
| 6358 | 6012 | return ERR_PTR(-ENOMEM); |
|---|
| .. | .. |
|---|
| 6390 | 6044 | */ |
|---|
| 6391 | 6045 | BTRFS_I(inode)->index_cnt = 2; |
|---|
| 6392 | 6046 | BTRFS_I(inode)->dir_index = *index; |
|---|
| 6393 | | - BTRFS_I(inode)->root = root; |
|---|
| 6047 | + BTRFS_I(inode)->root = btrfs_grab_root(root); |
|---|
| 6394 | 6048 | BTRFS_I(inode)->generation = trans->transid; |
|---|
| 6395 | 6049 | inode->i_generation = BTRFS_I(inode)->generation; |
|---|
| 6396 | 6050 | |
|---|
| .. | .. |
|---|
| 6477 | 6131 | inode_tree_add(inode); |
|---|
| 6478 | 6132 | |
|---|
| 6479 | 6133 | trace_btrfs_inode_new(inode); |
|---|
| 6480 | | - btrfs_set_inode_last_trans(trans, inode); |
|---|
| 6134 | + btrfs_set_inode_last_trans(trans, BTRFS_I(inode)); |
|---|
| 6481 | 6135 | |
|---|
| 6482 | 6136 | btrfs_update_root_times(trans, root); |
|---|
| 6483 | 6137 | |
|---|
| .. | .. |
|---|
| 6535 | 6189 | if (ret) |
|---|
| 6536 | 6190 | return ret; |
|---|
| 6537 | 6191 | |
|---|
| 6538 | | - ret = btrfs_insert_dir_item(trans, root, name, name_len, |
|---|
| 6539 | | - parent_inode, &key, |
|---|
| 6192 | + ret = btrfs_insert_dir_item(trans, name, name_len, parent_inode, &key, |
|---|
| 6540 | 6193 | btrfs_inode_type(&inode->vfs_inode), index); |
|---|
| 6541 | 6194 | if (ret == -EEXIST || ret == -EOVERFLOW) |
|---|
| 6542 | 6195 | goto fail_dir_item; |
|---|
| .. | .. |
|---|
| 6719 | 6372 | if (err) |
|---|
| 6720 | 6373 | goto out_unlock; |
|---|
| 6721 | 6374 | |
|---|
| 6722 | | - BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
|---|
| 6723 | 6375 | d_instantiate_new(dentry, inode); |
|---|
| 6724 | 6376 | |
|---|
| 6725 | 6377 | out_unlock: |
|---|
| .. | .. |
|---|
| 6744 | 6396 | int drop_inode = 0; |
|---|
| 6745 | 6397 | |
|---|
| 6746 | 6398 | /* do not allow sys_link's with other subvols of the same device */ |
|---|
| 6747 | | - if (root->objectid != BTRFS_I(inode)->root->objectid) |
|---|
| 6399 | + if (root->root_key.objectid != BTRFS_I(inode)->root->root_key.objectid) |
|---|
| 6748 | 6400 | return -EXDEV; |
|---|
| 6749 | 6401 | |
|---|
| 6750 | 6402 | if (inode->i_nlink >= BTRFS_LINK_MAX) |
|---|
| .. | .. |
|---|
| 6782 | 6434 | drop_inode = 1; |
|---|
| 6783 | 6435 | } else { |
|---|
| 6784 | 6436 | struct dentry *parent = dentry->d_parent; |
|---|
| 6785 | | - int ret; |
|---|
| 6786 | 6437 | |
|---|
| 6787 | 6438 | err = btrfs_update_inode(trans, root, inode); |
|---|
| 6788 | 6439 | if (err) |
|---|
| .. | .. |
|---|
| 6796 | 6447 | if (err) |
|---|
| 6797 | 6448 | goto fail; |
|---|
| 6798 | 6449 | } |
|---|
| 6799 | | - BTRFS_I(inode)->last_link_trans = trans->transid; |
|---|
| 6800 | 6450 | d_instantiate(dentry, inode); |
|---|
| 6801 | | - ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent, |
|---|
| 6802 | | - true, NULL); |
|---|
| 6803 | | - if (ret == BTRFS_NEED_TRANS_COMMIT) { |
|---|
| 6804 | | - err = btrfs_commit_transaction(trans); |
|---|
| 6805 | | - trans = NULL; |
|---|
| 6806 | | - } |
|---|
| 6451 | + btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent); |
|---|
| 6807 | 6452 | } |
|---|
| 6808 | 6453 | |
|---|
| 6809 | 6454 | fail: |
|---|
| .. | .. |
|---|
| 6824 | 6469 | struct btrfs_trans_handle *trans; |
|---|
| 6825 | 6470 | struct btrfs_root *root = BTRFS_I(dir)->root; |
|---|
| 6826 | 6471 | int err = 0; |
|---|
| 6827 | | - int drop_on_err = 0; |
|---|
| 6828 | 6472 | u64 objectid = 0; |
|---|
| 6829 | 6473 | u64 index = 0; |
|---|
| 6830 | 6474 | |
|---|
| .. | .. |
|---|
| 6850 | 6494 | goto out_fail; |
|---|
| 6851 | 6495 | } |
|---|
| 6852 | 6496 | |
|---|
| 6853 | | - drop_on_err = 1; |
|---|
| 6854 | 6497 | /* these must be set before we unlock the inode */ |
|---|
| 6855 | 6498 | inode->i_op = &btrfs_dir_inode_operations; |
|---|
| 6856 | 6499 | inode->i_fop = &btrfs_dir_file_operations; |
|---|
| .. | .. |
|---|
| 6871 | 6514 | goto out_fail; |
|---|
| 6872 | 6515 | |
|---|
| 6873 | 6516 | d_instantiate_new(dentry, inode); |
|---|
| 6874 | | - drop_on_err = 0; |
|---|
| 6875 | 6517 | |
|---|
| 6876 | 6518 | out_fail: |
|---|
| 6877 | 6519 | btrfs_end_transaction(trans); |
|---|
| .. | .. |
|---|
| 6929 | 6571 | return ret; |
|---|
| 6930 | 6572 | } |
|---|
| 6931 | 6573 | |
|---|
| 6932 | | -/* |
|---|
| 6933 | | - * a bit scary, this does extent mapping from logical file offset to the disk. |
|---|
| 6934 | | - * the ugly parts come from merging extents from the disk with the in-ram |
|---|
| 6935 | | - * representation. This gets more complex because of the data=ordered code, |
|---|
| 6936 | | - * where the in-ram extents might be locked pending data=ordered completion. |
|---|
| 6574 | +/** |
|---|
| 6575 | + * btrfs_get_extent - Lookup the first extent overlapping a range in a file. |
|---|
| 6576 | + * @inode: file to search in |
|---|
| 6577 | + * @page: page to read extent data into if the extent is inline |
|---|
| 6578 | + * @pg_offset: offset into @page to copy to |
|---|
| 6579 | + * @start: file offset |
|---|
| 6580 | + * @len: length of range starting at @start |
|---|
| 6937 | 6581 | * |
|---|
| 6938 | | - * This also copies inline extents directly into the page. |
|---|
| 6582 | + * This returns the first &struct extent_map which overlaps with the given |
|---|
| 6583 | + * range, reading it from the B-tree and caching it if necessary. Note that |
|---|
| 6584 | + * there may be more extents which overlap the given range after the returned |
|---|
| 6585 | + * extent_map. |
|---|
| 6586 | + * |
|---|
| 6587 | + * If @page is not NULL and the extent is inline, this also reads the extent |
|---|
| 6588 | + * data directly into the page and marks the extent up to date in the io_tree. |
|---|
| 6589 | + * |
|---|
| 6590 | + * Return: ERR_PTR on error, non-NULL extent_map on success. |
|---|
| 6939 | 6591 | */ |
|---|
| 6940 | 6592 | struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, |
|---|
| 6941 | | - struct page *page, |
|---|
| 6942 | | - size_t pg_offset, u64 start, u64 len, |
|---|
| 6943 | | - int create) |
|---|
| 6593 | + struct page *page, size_t pg_offset, |
|---|
| 6594 | + u64 start, u64 len) |
|---|
| 6944 | 6595 | { |
|---|
| 6945 | 6596 | struct btrfs_fs_info *fs_info = inode->root->fs_info; |
|---|
| 6946 | | - int ret; |
|---|
| 6947 | | - int err = 0; |
|---|
| 6597 | + int ret = 0; |
|---|
| 6948 | 6598 | u64 extent_start = 0; |
|---|
| 6949 | 6599 | u64 extent_end = 0; |
|---|
| 6950 | 6600 | u64 objectid = btrfs_ino(inode); |
|---|
| 6951 | | - u32 found_type; |
|---|
| 6601 | + int extent_type = -1; |
|---|
| 6952 | 6602 | struct btrfs_path *path = NULL; |
|---|
| 6953 | 6603 | struct btrfs_root *root = inode->root; |
|---|
| 6954 | 6604 | struct btrfs_file_extent_item *item; |
|---|
| .. | .. |
|---|
| 6957 | 6607 | struct extent_map *em = NULL; |
|---|
| 6958 | 6608 | struct extent_map_tree *em_tree = &inode->extent_tree; |
|---|
| 6959 | 6609 | struct extent_io_tree *io_tree = &inode->io_tree; |
|---|
| 6960 | | - const bool new_inline = !page || create; |
|---|
| 6961 | 6610 | |
|---|
| 6962 | 6611 | read_lock(&em_tree->lock); |
|---|
| 6963 | 6612 | em = lookup_extent_mapping(em_tree, start, len); |
|---|
| 6964 | | - if (em) |
|---|
| 6965 | | - em->bdev = fs_info->fs_devices->latest_bdev; |
|---|
| 6966 | 6613 | read_unlock(&em_tree->lock); |
|---|
| 6967 | 6614 | |
|---|
| 6968 | 6615 | if (em) { |
|---|
| .. | .. |
|---|
| 6975 | 6622 | } |
|---|
| 6976 | 6623 | em = alloc_extent_map(); |
|---|
| 6977 | 6624 | if (!em) { |
|---|
| 6978 | | - err = -ENOMEM; |
|---|
| 6625 | + ret = -ENOMEM; |
|---|
| 6979 | 6626 | goto out; |
|---|
| 6980 | 6627 | } |
|---|
| 6981 | | - em->bdev = fs_info->fs_devices->latest_bdev; |
|---|
| 6982 | 6628 | em->start = EXTENT_MAP_HOLE; |
|---|
| 6983 | 6629 | em->orig_start = EXTENT_MAP_HOLE; |
|---|
| 6984 | 6630 | em->len = (u64)-1; |
|---|
| 6985 | 6631 | em->block_len = (u64)-1; |
|---|
| 6986 | 6632 | |
|---|
| 6633 | + path = btrfs_alloc_path(); |
|---|
| 6987 | 6634 | if (!path) { |
|---|
| 6988 | | - path = btrfs_alloc_path(); |
|---|
| 6989 | | - if (!path) { |
|---|
| 6990 | | - err = -ENOMEM; |
|---|
| 6991 | | - goto out; |
|---|
| 6992 | | - } |
|---|
| 6993 | | - /* |
|---|
| 6994 | | - * Chances are we'll be called again, so go ahead and do |
|---|
| 6995 | | - * readahead |
|---|
| 6996 | | - */ |
|---|
| 6997 | | - path->reada = READA_FORWARD; |
|---|
| 6998 | | - } |
|---|
| 6999 | | - |
|---|
| 7000 | | - ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0); |
|---|
| 7001 | | - if (ret < 0) { |
|---|
| 7002 | | - err = ret; |
|---|
| 6635 | + ret = -ENOMEM; |
|---|
| 7003 | 6636 | goto out; |
|---|
| 7004 | 6637 | } |
|---|
| 7005 | 6638 | |
|---|
| 7006 | | - if (ret != 0) { |
|---|
| 6639 | + /* Chances are we'll be called again, so go ahead and do readahead */ |
|---|
| 6640 | + path->reada = READA_FORWARD; |
|---|
| 6641 | + |
|---|
| 6642 | + /* |
|---|
| 6643 | + * Unless we're going to uncompress the inline extent, no sleep would |
|---|
| 6644 | + * happen. |
|---|
| 6645 | + */ |
|---|
| 6646 | + path->leave_spinning = 1; |
|---|
| 6647 | + |
|---|
| 6648 | + path->recurse = btrfs_is_free_space_inode(inode); |
|---|
| 6649 | + |
|---|
| 6650 | + ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0); |
|---|
| 6651 | + if (ret < 0) { |
|---|
| 6652 | + goto out; |
|---|
| 6653 | + } else if (ret > 0) { |
|---|
| 7007 | 6654 | if (path->slots[0] == 0) |
|---|
| 7008 | 6655 | goto not_found; |
|---|
| 7009 | 6656 | path->slots[0]--; |
|---|
| 6657 | + ret = 0; |
|---|
| 7010 | 6658 | } |
|---|
| 7011 | 6659 | |
|---|
| 7012 | 6660 | leaf = path->nodes[0]; |
|---|
| 7013 | 6661 | item = btrfs_item_ptr(leaf, path->slots[0], |
|---|
| 7014 | 6662 | struct btrfs_file_extent_item); |
|---|
| 7015 | | - /* are we inside the extent that was found? */ |
|---|
| 7016 | 6663 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
|---|
| 7017 | | - found_type = found_key.type; |
|---|
| 7018 | 6664 | if (found_key.objectid != objectid || |
|---|
| 7019 | | - found_type != BTRFS_EXTENT_DATA_KEY) { |
|---|
| 6665 | + found_key.type != BTRFS_EXTENT_DATA_KEY) { |
|---|
| 7020 | 6666 | /* |
|---|
| 7021 | 6667 | * If we backup past the first extent we want to move forward |
|---|
| 7022 | 6668 | * and see if there is an extent in front of us, otherwise we'll |
|---|
| .. | .. |
|---|
| 7027 | 6673 | goto next; |
|---|
| 7028 | 6674 | } |
|---|
| 7029 | 6675 | |
|---|
| 7030 | | - found_type = btrfs_file_extent_type(leaf, item); |
|---|
| 6676 | + extent_type = btrfs_file_extent_type(leaf, item); |
|---|
| 7031 | 6677 | extent_start = found_key.offset; |
|---|
| 7032 | | - if (found_type == BTRFS_FILE_EXTENT_REG || |
|---|
| 7033 | | - found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
|---|
| 6678 | + extent_end = btrfs_file_extent_end(path); |
|---|
| 6679 | + if (extent_type == BTRFS_FILE_EXTENT_REG || |
|---|
| 6680 | + extent_type == BTRFS_FILE_EXTENT_PREALLOC) { |
|---|
| 7034 | 6681 | /* Only regular file could have regular/prealloc extent */ |
|---|
| 7035 | 6682 | if (!S_ISREG(inode->vfs_inode.i_mode)) { |
|---|
| 7036 | | - err = -EUCLEAN; |
|---|
| 6683 | + ret = -EUCLEAN; |
|---|
| 7037 | 6684 | btrfs_crit(fs_info, |
|---|
| 7038 | 6685 | "regular/prealloc extent found for non-regular inode %llu", |
|---|
| 7039 | 6686 | btrfs_ino(inode)); |
|---|
| 7040 | 6687 | goto out; |
|---|
| 7041 | 6688 | } |
|---|
| 7042 | | - extent_end = extent_start + |
|---|
| 7043 | | - btrfs_file_extent_num_bytes(leaf, item); |
|---|
| 7044 | | - |
|---|
| 7045 | 6689 | trace_btrfs_get_extent_show_fi_regular(inode, leaf, item, |
|---|
| 7046 | 6690 | extent_start); |
|---|
| 7047 | | - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { |
|---|
| 7048 | | - size_t size; |
|---|
| 7049 | | - |
|---|
| 7050 | | - size = btrfs_file_extent_ram_bytes(leaf, item); |
|---|
| 7051 | | - extent_end = ALIGN(extent_start + size, |
|---|
| 7052 | | - fs_info->sectorsize); |
|---|
| 7053 | | - |
|---|
| 6691 | + } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
|---|
| 7054 | 6692 | trace_btrfs_get_extent_show_fi_inline(inode, leaf, item, |
|---|
| 7055 | 6693 | path->slots[0], |
|---|
| 7056 | 6694 | extent_start); |
|---|
| .. | .. |
|---|
| 7060 | 6698 | path->slots[0]++; |
|---|
| 7061 | 6699 | if (path->slots[0] >= btrfs_header_nritems(leaf)) { |
|---|
| 7062 | 6700 | ret = btrfs_next_leaf(root, path); |
|---|
| 7063 | | - if (ret < 0) { |
|---|
| 7064 | | - err = ret; |
|---|
| 6701 | + if (ret < 0) |
|---|
| 7065 | 6702 | goto out; |
|---|
| 7066 | | - } |
|---|
| 7067 | | - if (ret > 0) |
|---|
| 6703 | + else if (ret > 0) |
|---|
| 7068 | 6704 | goto not_found; |
|---|
| 6705 | + |
|---|
| 7069 | 6706 | leaf = path->nodes[0]; |
|---|
| 7070 | 6707 | } |
|---|
| 7071 | 6708 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
|---|
| .. | .. |
|---|
| 7076 | 6713 | goto not_found; |
|---|
| 7077 | 6714 | if (start > found_key.offset) |
|---|
| 7078 | 6715 | goto next; |
|---|
| 6716 | + |
|---|
| 6717 | + /* New extent overlaps with existing one */ |
|---|
| 7079 | 6718 | em->start = start; |
|---|
| 7080 | 6719 | em->orig_start = start; |
|---|
| 7081 | 6720 | em->len = found_key.offset - start; |
|---|
| 7082 | | - goto not_found_em; |
|---|
| 6721 | + em->block_start = EXTENT_MAP_HOLE; |
|---|
| 6722 | + goto insert; |
|---|
| 7083 | 6723 | } |
|---|
| 7084 | 6724 | |
|---|
| 7085 | | - btrfs_extent_item_to_extent_map(inode, path, item, |
|---|
| 7086 | | - new_inline, em); |
|---|
| 6725 | + btrfs_extent_item_to_extent_map(inode, path, item, !page, em); |
|---|
| 7087 | 6726 | |
|---|
| 7088 | | - if (found_type == BTRFS_FILE_EXTENT_REG || |
|---|
| 7089 | | - found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
|---|
| 6727 | + if (extent_type == BTRFS_FILE_EXTENT_REG || |
|---|
| 6728 | + extent_type == BTRFS_FILE_EXTENT_PREALLOC) { |
|---|
| 7090 | 6729 | goto insert; |
|---|
| 7091 | | - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { |
|---|
| 6730 | + } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
|---|
| 7092 | 6731 | unsigned long ptr; |
|---|
| 7093 | 6732 | char *map; |
|---|
| 7094 | 6733 | size_t size; |
|---|
| 7095 | 6734 | size_t extent_offset; |
|---|
| 7096 | 6735 | size_t copy_size; |
|---|
| 7097 | 6736 | |
|---|
| 7098 | | - if (new_inline) |
|---|
| 6737 | + if (!page) |
|---|
| 7099 | 6738 | goto out; |
|---|
| 7100 | 6739 | |
|---|
| 7101 | 6740 | size = btrfs_file_extent_ram_bytes(leaf, item); |
|---|
| .. | .. |
|---|
| 7107 | 6746 | em->orig_block_len = em->len; |
|---|
| 7108 | 6747 | em->orig_start = em->start; |
|---|
| 7109 | 6748 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; |
|---|
| 6749 | + |
|---|
| 6750 | + btrfs_set_path_blocking(path); |
|---|
| 7110 | 6751 | if (!PageUptodate(page)) { |
|---|
| 7111 | 6752 | if (btrfs_file_extent_compression(leaf, item) != |
|---|
| 7112 | 6753 | BTRFS_COMPRESS_NONE) { |
|---|
| 7113 | 6754 | ret = uncompress_inline(path, page, pg_offset, |
|---|
| 7114 | 6755 | extent_offset, item); |
|---|
| 7115 | | - if (ret) { |
|---|
| 7116 | | - err = ret; |
|---|
| 6756 | + if (ret) |
|---|
| 7117 | 6757 | goto out; |
|---|
| 7118 | | - } |
|---|
| 7119 | 6758 | } else { |
|---|
| 7120 | 6759 | map = kmap(page); |
|---|
| 7121 | 6760 | read_extent_buffer(leaf, map + pg_offset, ptr, |
|---|
| .. | .. |
|---|
| 7137 | 6776 | em->start = start; |
|---|
| 7138 | 6777 | em->orig_start = start; |
|---|
| 7139 | 6778 | em->len = len; |
|---|
| 7140 | | -not_found_em: |
|---|
| 7141 | 6779 | em->block_start = EXTENT_MAP_HOLE; |
|---|
| 7142 | 6780 | insert: |
|---|
| 6781 | + ret = 0; |
|---|
| 7143 | 6782 | btrfs_release_path(path); |
|---|
| 7144 | 6783 | if (em->start > start || extent_map_end(em) <= start) { |
|---|
| 7145 | 6784 | btrfs_err(fs_info, |
|---|
| 7146 | 6785 | "bad extent! em: [%llu %llu] passed [%llu %llu]", |
|---|
| 7147 | 6786 | em->start, em->len, start, len); |
|---|
| 7148 | | - err = -EIO; |
|---|
| 6787 | + ret = -EIO; |
|---|
| 7149 | 6788 | goto out; |
|---|
| 7150 | 6789 | } |
|---|
| 7151 | 6790 | |
|---|
| 7152 | | - err = 0; |
|---|
| 7153 | 6791 | write_lock(&em_tree->lock); |
|---|
| 7154 | | - err = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len); |
|---|
| 6792 | + ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len); |
|---|
| 7155 | 6793 | write_unlock(&em_tree->lock); |
|---|
| 7156 | 6794 | out: |
|---|
| 6795 | + btrfs_free_path(path); |
|---|
| 7157 | 6796 | |
|---|
| 7158 | 6797 | trace_btrfs_get_extent(root, inode, em); |
|---|
| 7159 | 6798 | |
|---|
| 7160 | | - btrfs_free_path(path); |
|---|
| 7161 | | - if (err) { |
|---|
| 6799 | + if (ret) { |
|---|
| 7162 | 6800 | free_extent_map(em); |
|---|
| 7163 | | - return ERR_PTR(err); |
|---|
| 6801 | + return ERR_PTR(ret); |
|---|
| 7164 | 6802 | } |
|---|
| 7165 | | - BUG_ON(!em); /* Error is always set */ |
|---|
| 7166 | 6803 | return em; |
|---|
| 7167 | 6804 | } |
|---|
| 7168 | 6805 | |
|---|
| 7169 | 6806 | struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode, |
|---|
| 7170 | | - struct page *page, |
|---|
| 7171 | | - size_t pg_offset, u64 start, u64 len, |
|---|
| 7172 | | - int create) |
|---|
| 6807 | + u64 start, u64 len) |
|---|
| 7173 | 6808 | { |
|---|
| 7174 | 6809 | struct extent_map *em; |
|---|
| 7175 | 6810 | struct extent_map *hole_em = NULL; |
|---|
| 7176 | | - u64 range_start = start; |
|---|
| 6811 | + u64 delalloc_start = start; |
|---|
| 7177 | 6812 | u64 end; |
|---|
| 7178 | | - u64 found; |
|---|
| 7179 | | - u64 found_end; |
|---|
| 6813 | + u64 delalloc_len; |
|---|
| 6814 | + u64 delalloc_end; |
|---|
| 7180 | 6815 | int err = 0; |
|---|
| 7181 | 6816 | |
|---|
| 7182 | | - em = btrfs_get_extent(inode, page, pg_offset, start, len, create); |
|---|
| 6817 | + em = btrfs_get_extent(inode, NULL, 0, start, len); |
|---|
| 7183 | 6818 | if (IS_ERR(em)) |
|---|
| 7184 | 6819 | return em; |
|---|
| 7185 | 6820 | /* |
|---|
| .. | .. |
|---|
| 7204 | 6839 | em = NULL; |
|---|
| 7205 | 6840 | |
|---|
| 7206 | 6841 | /* ok, we didn't find anything, lets look for delalloc */ |
|---|
| 7207 | | - found = count_range_bits(&inode->io_tree, &range_start, |
|---|
| 6842 | + delalloc_len = count_range_bits(&inode->io_tree, &delalloc_start, |
|---|
| 7208 | 6843 | end, len, EXTENT_DELALLOC, 1); |
|---|
| 7209 | | - found_end = range_start + found; |
|---|
| 7210 | | - if (found_end < range_start) |
|---|
| 7211 | | - found_end = (u64)-1; |
|---|
| 6844 | + delalloc_end = delalloc_start + delalloc_len; |
|---|
| 6845 | + if (delalloc_end < delalloc_start) |
|---|
| 6846 | + delalloc_end = (u64)-1; |
|---|
| 7212 | 6847 | |
|---|
| 7213 | 6848 | /* |
|---|
| 7214 | | - * we didn't find anything useful, return |
|---|
| 7215 | | - * the original results from get_extent() |
|---|
| 6849 | + * We didn't find anything useful, return the original results from |
|---|
| 6850 | + * get_extent() |
|---|
| 7216 | 6851 | */ |
|---|
| 7217 | | - if (range_start > end || found_end <= start) { |
|---|
| 6852 | + if (delalloc_start > end || delalloc_end <= start) { |
|---|
| 7218 | 6853 | em = hole_em; |
|---|
| 7219 | 6854 | hole_em = NULL; |
|---|
| 7220 | 6855 | goto out; |
|---|
| 7221 | 6856 | } |
|---|
| 7222 | 6857 | |
|---|
| 7223 | | - /* adjust the range_start to make sure it doesn't |
|---|
| 7224 | | - * go backwards from the start they passed in |
|---|
| 6858 | + /* |
|---|
| 6859 | + * Adjust the delalloc_start to make sure it doesn't go backwards from |
|---|
| 6860 | + * the start they passed in |
|---|
| 7225 | 6861 | */ |
|---|
| 7226 | | - range_start = max(start, range_start); |
|---|
| 7227 | | - found = found_end - range_start; |
|---|
| 6862 | + delalloc_start = max(start, delalloc_start); |
|---|
| 6863 | + delalloc_len = delalloc_end - delalloc_start; |
|---|
| 7228 | 6864 | |
|---|
| 7229 | | - if (found > 0) { |
|---|
| 7230 | | - u64 hole_start = start; |
|---|
| 7231 | | - u64 hole_len = len; |
|---|
| 6865 | + if (delalloc_len > 0) { |
|---|
| 6866 | + u64 hole_start; |
|---|
| 6867 | + u64 hole_len; |
|---|
| 6868 | + const u64 hole_end = extent_map_end(hole_em); |
|---|
| 7232 | 6869 | |
|---|
| 7233 | 6870 | em = alloc_extent_map(); |
|---|
| 7234 | 6871 | if (!em) { |
|---|
| 7235 | 6872 | err = -ENOMEM; |
|---|
| 7236 | 6873 | goto out; |
|---|
| 7237 | 6874 | } |
|---|
| 7238 | | - /* |
|---|
| 7239 | | - * when btrfs_get_extent can't find anything it |
|---|
| 7240 | | - * returns one huge hole |
|---|
| 7241 | | - * |
|---|
| 7242 | | - * make sure what it found really fits our range, and |
|---|
| 7243 | | - * adjust to make sure it is based on the start from |
|---|
| 7244 | | - * the caller |
|---|
| 7245 | | - */ |
|---|
| 7246 | | - if (hole_em) { |
|---|
| 7247 | | - u64 calc_end = extent_map_end(hole_em); |
|---|
| 7248 | 6875 | |
|---|
| 7249 | | - if (calc_end <= start || (hole_em->start > end)) { |
|---|
| 7250 | | - free_extent_map(hole_em); |
|---|
| 7251 | | - hole_em = NULL; |
|---|
| 7252 | | - } else { |
|---|
| 7253 | | - hole_start = max(hole_em->start, start); |
|---|
| 7254 | | - hole_len = calc_end - hole_start; |
|---|
| 7255 | | - } |
|---|
| 6876 | + ASSERT(hole_em); |
|---|
| 6877 | + /* |
|---|
| 6878 | + * When btrfs_get_extent can't find anything it returns one |
|---|
| 6879 | + * huge hole |
|---|
| 6880 | + * |
|---|
| 6881 | + * Make sure what it found really fits our range, and adjust to |
|---|
| 6882 | + * make sure it is based on the start from the caller |
|---|
| 6883 | + */ |
|---|
| 6884 | + if (hole_end <= start || hole_em->start > end) { |
|---|
| 6885 | + free_extent_map(hole_em); |
|---|
| 6886 | + hole_em = NULL; |
|---|
| 6887 | + } else { |
|---|
| 6888 | + hole_start = max(hole_em->start, start); |
|---|
| 6889 | + hole_len = hole_end - hole_start; |
|---|
| 7256 | 6890 | } |
|---|
| 7257 | | - em->bdev = NULL; |
|---|
| 7258 | | - if (hole_em && range_start > hole_start) { |
|---|
| 7259 | | - /* our hole starts before our delalloc, so we |
|---|
| 7260 | | - * have to return just the parts of the hole |
|---|
| 7261 | | - * that go until the delalloc starts |
|---|
| 6891 | + |
|---|
| 6892 | + if (hole_em && delalloc_start > hole_start) { |
|---|
| 6893 | + /* |
|---|
| 6894 | + * Our hole starts before our delalloc, so we have to |
|---|
| 6895 | + * return just the parts of the hole that go until the |
|---|
| 6896 | + * delalloc starts |
|---|
| 7262 | 6897 | */ |
|---|
| 7263 | | - em->len = min(hole_len, |
|---|
| 7264 | | - range_start - hole_start); |
|---|
| 6898 | + em->len = min(hole_len, delalloc_start - hole_start); |
|---|
| 7265 | 6899 | em->start = hole_start; |
|---|
| 7266 | 6900 | em->orig_start = hole_start; |
|---|
| 7267 | 6901 | /* |
|---|
| 7268 | | - * don't adjust block start at all, |
|---|
| 7269 | | - * it is fixed at EXTENT_MAP_HOLE |
|---|
| 6902 | + * Don't adjust block start at all, it is fixed at |
|---|
| 6903 | + * EXTENT_MAP_HOLE |
|---|
| 7270 | 6904 | */ |
|---|
| 7271 | 6905 | em->block_start = hole_em->block_start; |
|---|
| 7272 | 6906 | em->block_len = hole_len; |
|---|
| 7273 | 6907 | if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags)) |
|---|
| 7274 | 6908 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); |
|---|
| 7275 | 6909 | } else { |
|---|
| 7276 | | - em->start = range_start; |
|---|
| 7277 | | - em->len = found; |
|---|
| 7278 | | - em->orig_start = range_start; |
|---|
| 6910 | + /* |
|---|
| 6911 | + * Hole is out of passed range or it starts after |
|---|
| 6912 | + * delalloc range |
|---|
| 6913 | + */ |
|---|
| 6914 | + em->start = delalloc_start; |
|---|
| 6915 | + em->len = delalloc_len; |
|---|
| 6916 | + em->orig_start = delalloc_start; |
|---|
| 7279 | 6917 | em->block_start = EXTENT_MAP_DELALLOC; |
|---|
| 7280 | | - em->block_len = found; |
|---|
| 6918 | + em->block_len = delalloc_len; |
|---|
| 7281 | 6919 | } |
|---|
| 7282 | 6920 | } else { |
|---|
| 7283 | 6921 | return hole_em; |
|---|
| .. | .. |
|---|
| 7292 | 6930 | return em; |
|---|
| 7293 | 6931 | } |
|---|
| 7294 | 6932 | |
|---|
| 7295 | | -static struct extent_map *btrfs_create_dio_extent(struct inode *inode, |
|---|
| 6933 | +static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode, |
|---|
| 7296 | 6934 | const u64 start, |
|---|
| 7297 | 6935 | const u64 len, |
|---|
| 7298 | 6936 | const u64 orig_start, |
|---|
| .. | .. |
|---|
| 7306 | 6944 | int ret; |
|---|
| 7307 | 6945 | |
|---|
| 7308 | 6946 | if (type != BTRFS_ORDERED_NOCOW) { |
|---|
| 7309 | | - em = create_io_em(inode, start, len, orig_start, |
|---|
| 7310 | | - block_start, block_len, orig_block_len, |
|---|
| 7311 | | - ram_bytes, |
|---|
| 6947 | + em = create_io_em(inode, start, len, orig_start, block_start, |
|---|
| 6948 | + block_len, orig_block_len, ram_bytes, |
|---|
| 7312 | 6949 | BTRFS_COMPRESS_NONE, /* compress_type */ |
|---|
| 7313 | 6950 | type); |
|---|
| 7314 | 6951 | if (IS_ERR(em)) |
|---|
| 7315 | 6952 | goto out; |
|---|
| 7316 | 6953 | } |
|---|
| 7317 | | - ret = btrfs_add_ordered_extent_dio(inode, start, block_start, |
|---|
| 7318 | | - len, block_len, type); |
|---|
| 6954 | + ret = btrfs_add_ordered_extent_dio(inode, start, block_start, len, |
|---|
| 6955 | + block_len, type); |
|---|
| 7319 | 6956 | if (ret) { |
|---|
| 7320 | 6957 | if (em) { |
|---|
| 7321 | 6958 | free_extent_map(em); |
|---|
| 7322 | | - btrfs_drop_extent_cache(BTRFS_I(inode), start, |
|---|
| 7323 | | - start + len - 1, 0); |
|---|
| 6959 | + btrfs_drop_extent_cache(inode, start, start + len - 1, 0); |
|---|
| 7324 | 6960 | } |
|---|
| 7325 | 6961 | em = ERR_PTR(ret); |
|---|
| 7326 | 6962 | } |
|---|
| .. | .. |
|---|
| 7329 | 6965 | return em; |
|---|
| 7330 | 6966 | } |
|---|
| 7331 | 6967 | |
|---|
| 7332 | | -static struct extent_map *btrfs_new_extent_direct(struct inode *inode, |
|---|
| 6968 | +static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode, |
|---|
| 7333 | 6969 | u64 start, u64 len) |
|---|
| 7334 | 6970 | { |
|---|
| 7335 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 7336 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 6971 | + struct btrfs_root *root = inode->root; |
|---|
| 6972 | + struct btrfs_fs_info *fs_info = root->fs_info; |
|---|
| 7337 | 6973 | struct extent_map *em; |
|---|
| 7338 | 6974 | struct btrfs_key ins; |
|---|
| 7339 | 6975 | u64 alloc_hint; |
|---|
| .. | .. |
|---|
| 7350 | 6986 | ins.offset, BTRFS_ORDERED_REGULAR); |
|---|
| 7351 | 6987 | btrfs_dec_block_group_reservations(fs_info, ins.objectid); |
|---|
| 7352 | 6988 | if (IS_ERR(em)) |
|---|
| 7353 | | - btrfs_free_reserved_extent(fs_info, ins.objectid, |
|---|
| 7354 | | - ins.offset, 1); |
|---|
| 6989 | + btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, |
|---|
| 6990 | + 1); |
|---|
| 7355 | 6991 | |
|---|
| 7356 | 6992 | return em; |
|---|
| 7357 | 6993 | } |
|---|
| 7358 | 6994 | |
|---|
| 7359 | 6995 | /* |
|---|
| 7360 | | - * returns 1 when the nocow is safe, < 1 on error, 0 if the |
|---|
| 7361 | | - * block must be cow'd |
|---|
| 6996 | + * Check if we can do nocow write into the range [@offset, @offset + @len) |
|---|
| 6997 | + * |
|---|
| 6998 | + * @offset: File offset |
|---|
| 6999 | + * @len: The length to write, will be updated to the nocow writeable |
|---|
| 7000 | + * range |
|---|
| 7001 | + * @orig_start: (optional) Return the original file offset of the file extent |
|---|
| 7002 | + * @orig_len: (optional) Return the original on-disk length of the file extent |
|---|
| 7003 | + * @ram_bytes: (optional) Return the ram_bytes of the file extent |
|---|
| 7004 | + * @strict: if true, omit optimizations that might force us into unnecessary |
|---|
| 7005 | + * cow. e.g., don't trust generation number. |
|---|
| 7006 | + * |
|---|
| 7007 | + * This function will flush ordered extents in the range to ensure proper |
|---|
| 7008 | + * nocow checks for (nowait == false) case. |
|---|
| 7009 | + * |
|---|
| 7010 | + * Return: |
|---|
| 7011 | + * >0 and update @len if we can do nocow write |
|---|
| 7012 | + * 0 if we can't do nocow write |
|---|
| 7013 | + * <0 if error happened |
|---|
| 7014 | + * |
|---|
| 7015 | + * NOTE: This only checks the file extents, caller is responsible to wait for |
|---|
| 7016 | + * any ordered extents. |
|---|
| 7362 | 7017 | */ |
|---|
| 7363 | 7018 | noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, |
|---|
| 7364 | 7019 | u64 *orig_start, u64 *orig_block_len, |
|---|
| 7365 | | - u64 *ram_bytes) |
|---|
| 7020 | + u64 *ram_bytes, bool strict) |
|---|
| 7366 | 7021 | { |
|---|
| 7367 | 7022 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 7368 | 7023 | struct btrfs_path *path; |
|---|
| .. | .. |
|---|
| 7440 | 7095 | * Do the same check as in btrfs_cross_ref_exist but without the |
|---|
| 7441 | 7096 | * unnecessary search. |
|---|
| 7442 | 7097 | */ |
|---|
| 7443 | | - if (btrfs_file_extent_generation(leaf, fi) <= |
|---|
| 7444 | | - btrfs_root_last_snapshot(&root->root_item)) |
|---|
| 7098 | + if (!strict && |
|---|
| 7099 | + (btrfs_file_extent_generation(leaf, fi) <= |
|---|
| 7100 | + btrfs_root_last_snapshot(&root->root_item))) |
|---|
| 7445 | 7101 | goto out; |
|---|
| 7446 | 7102 | |
|---|
| 7447 | 7103 | backref_offset = btrfs_file_extent_offset(leaf, fi); |
|---|
| .. | .. |
|---|
| 7477 | 7133 | */ |
|---|
| 7478 | 7134 | |
|---|
| 7479 | 7135 | ret = btrfs_cross_ref_exist(root, btrfs_ino(BTRFS_I(inode)), |
|---|
| 7480 | | - key.offset - backref_offset, disk_bytenr); |
|---|
| 7136 | + key.offset - backref_offset, disk_bytenr, |
|---|
| 7137 | + strict); |
|---|
| 7481 | 7138 | if (ret) { |
|---|
| 7482 | 7139 | ret = 0; |
|---|
| 7483 | 7140 | goto out; |
|---|
| .. | .. |
|---|
| 7505 | 7162 | } |
|---|
| 7506 | 7163 | |
|---|
| 7507 | 7164 | static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, |
|---|
| 7508 | | - struct extent_state **cached_state, int writing) |
|---|
| 7165 | + struct extent_state **cached_state, bool writing) |
|---|
| 7509 | 7166 | { |
|---|
| 7510 | 7167 | struct btrfs_ordered_extent *ordered; |
|---|
| 7511 | 7168 | int ret = 0; |
|---|
| .. | .. |
|---|
| 7554 | 7211 | */ |
|---|
| 7555 | 7212 | if (writing || |
|---|
| 7556 | 7213 | test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) |
|---|
| 7557 | | - btrfs_start_ordered_extent(inode, ordered, 1); |
|---|
| 7214 | + btrfs_start_ordered_extent(ordered, 1); |
|---|
| 7558 | 7215 | else |
|---|
| 7559 | 7216 | ret = -ENOTBLK; |
|---|
| 7560 | 7217 | btrfs_put_ordered_extent(ordered); |
|---|
| .. | .. |
|---|
| 7564 | 7221 | * for it to complete) and then invalidate the pages for |
|---|
| 7565 | 7222 | * this range (through invalidate_inode_pages2_range()), |
|---|
| 7566 | 7223 | * but that can lead us to a deadlock with a concurrent |
|---|
| 7567 | | - * call to readpages() (a buffered read or a defrag call |
|---|
| 7224 | + * call to readahead (a buffered read or a defrag call |
|---|
| 7568 | 7225 | * triggered a readahead) on a page lock due to an |
|---|
| 7569 | 7226 | * ordered dio extent we created before but did not have |
|---|
| 7570 | 7227 | * yet a corresponding bio submitted (whence it can not |
|---|
| 7571 | | - * complete), which makes readpages() wait for that |
|---|
| 7228 | + * complete), which makes readahead wait for that |
|---|
| 7572 | 7229 | * ordered extent to complete while holding a lock on |
|---|
| 7573 | 7230 | * that page. |
|---|
| 7574 | 7231 | */ |
|---|
| .. | .. |
|---|
| 7585 | 7242 | } |
|---|
| 7586 | 7243 | |
|---|
| 7587 | 7244 | /* The callers of this must take lock_extent() */ |
|---|
| 7588 | | -static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len, |
|---|
| 7589 | | - u64 orig_start, u64 block_start, |
|---|
| 7245 | +static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, |
|---|
| 7246 | + u64 len, u64 orig_start, u64 block_start, |
|---|
| 7590 | 7247 | u64 block_len, u64 orig_block_len, |
|---|
| 7591 | 7248 | u64 ram_bytes, int compress_type, |
|---|
| 7592 | 7249 | int type) |
|---|
| 7593 | 7250 | { |
|---|
| 7594 | 7251 | struct extent_map_tree *em_tree; |
|---|
| 7595 | 7252 | struct extent_map *em; |
|---|
| 7596 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 7597 | 7253 | int ret; |
|---|
| 7598 | 7254 | |
|---|
| 7599 | 7255 | ASSERT(type == BTRFS_ORDERED_PREALLOC || |
|---|
| .. | .. |
|---|
| 7601 | 7257 | type == BTRFS_ORDERED_NOCOW || |
|---|
| 7602 | 7258 | type == BTRFS_ORDERED_REGULAR); |
|---|
| 7603 | 7259 | |
|---|
| 7604 | | - em_tree = &BTRFS_I(inode)->extent_tree; |
|---|
| 7260 | + em_tree = &inode->extent_tree; |
|---|
| 7605 | 7261 | em = alloc_extent_map(); |
|---|
| 7606 | 7262 | if (!em) |
|---|
| 7607 | 7263 | return ERR_PTR(-ENOMEM); |
|---|
| .. | .. |
|---|
| 7611 | 7267 | em->len = len; |
|---|
| 7612 | 7268 | em->block_len = block_len; |
|---|
| 7613 | 7269 | em->block_start = block_start; |
|---|
| 7614 | | - em->bdev = root->fs_info->fs_devices->latest_bdev; |
|---|
| 7615 | 7270 | em->orig_block_len = orig_block_len; |
|---|
| 7616 | 7271 | em->ram_bytes = ram_bytes; |
|---|
| 7617 | 7272 | em->generation = -1; |
|---|
| .. | .. |
|---|
| 7624 | 7279 | } |
|---|
| 7625 | 7280 | |
|---|
| 7626 | 7281 | do { |
|---|
| 7627 | | - btrfs_drop_extent_cache(BTRFS_I(inode), em->start, |
|---|
| 7628 | | - em->start + em->len - 1, 0); |
|---|
| 7282 | + btrfs_drop_extent_cache(inode, em->start, |
|---|
| 7283 | + em->start + em->len - 1, 0); |
|---|
| 7629 | 7284 | write_lock(&em_tree->lock); |
|---|
| 7630 | 7285 | ret = add_extent_mapping(em_tree, em, 1); |
|---|
| 7631 | 7286 | write_unlock(&em_tree->lock); |
|---|
| .. | .. |
|---|
| 7645 | 7300 | } |
|---|
| 7646 | 7301 | |
|---|
| 7647 | 7302 | |
|---|
| 7648 | | -static int btrfs_get_blocks_direct_read(struct extent_map *em, |
|---|
| 7649 | | - struct buffer_head *bh_result, |
|---|
| 7650 | | - struct inode *inode, |
|---|
| 7651 | | - u64 start, u64 len) |
|---|
| 7652 | | -{ |
|---|
| 7653 | | - if (em->block_start == EXTENT_MAP_HOLE || |
|---|
| 7654 | | - test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) |
|---|
| 7655 | | - return -ENOENT; |
|---|
| 7656 | | - |
|---|
| 7657 | | - len = min(len, em->len - (start - em->start)); |
|---|
| 7658 | | - |
|---|
| 7659 | | - bh_result->b_blocknr = (em->block_start + (start - em->start)) >> |
|---|
| 7660 | | - inode->i_blkbits; |
|---|
| 7661 | | - bh_result->b_size = len; |
|---|
| 7662 | | - bh_result->b_bdev = em->bdev; |
|---|
| 7663 | | - set_buffer_mapped(bh_result); |
|---|
| 7664 | | - |
|---|
| 7665 | | - return 0; |
|---|
| 7666 | | -} |
|---|
| 7667 | | - |
|---|
| 7668 | 7303 | static int btrfs_get_blocks_direct_write(struct extent_map **map, |
|---|
| 7669 | | - struct buffer_head *bh_result, |
|---|
| 7670 | 7304 | struct inode *inode, |
|---|
| 7671 | 7305 | struct btrfs_dio_data *dio_data, |
|---|
| 7672 | 7306 | u64 start, u64 len) |
|---|
| .. | .. |
|---|
| 7698 | 7332 | block_start = em->block_start + (start - em->start); |
|---|
| 7699 | 7333 | |
|---|
| 7700 | 7334 | if (can_nocow_extent(inode, start, &len, &orig_start, |
|---|
| 7701 | | - &orig_block_len, &ram_bytes) == 1 && |
|---|
| 7335 | + &orig_block_len, &ram_bytes, false) == 1 && |
|---|
| 7702 | 7336 | btrfs_inc_nocow_writers(fs_info, block_start)) { |
|---|
| 7703 | 7337 | struct extent_map *em2; |
|---|
| 7704 | 7338 | |
|---|
| 7705 | | - em2 = btrfs_create_dio_extent(inode, start, len, |
|---|
| 7339 | + em2 = btrfs_create_dio_extent(BTRFS_I(inode), start, len, |
|---|
| 7706 | 7340 | orig_start, block_start, |
|---|
| 7707 | 7341 | len, orig_block_len, |
|---|
| 7708 | 7342 | ram_bytes, type); |
|---|
| .. | .. |
|---|
| 7721 | 7355 | * use the existing or preallocated extent, so does not |
|---|
| 7722 | 7356 | * need to adjust btrfs_space_info's bytes_may_use. |
|---|
| 7723 | 7357 | */ |
|---|
| 7724 | | - btrfs_free_reserved_data_space_noquota(inode, start, |
|---|
| 7725 | | - len); |
|---|
| 7358 | + btrfs_free_reserved_data_space_noquota(fs_info, len); |
|---|
| 7726 | 7359 | goto skip_cow; |
|---|
| 7727 | 7360 | } |
|---|
| 7728 | 7361 | } |
|---|
| 7729 | 7362 | |
|---|
| 7730 | 7363 | /* this will cow the extent */ |
|---|
| 7731 | | - len = bh_result->b_size; |
|---|
| 7732 | 7364 | free_extent_map(em); |
|---|
| 7733 | | - *map = em = btrfs_new_extent_direct(inode, start, len); |
|---|
| 7365 | + *map = em = btrfs_new_extent_direct(BTRFS_I(inode), start, len); |
|---|
| 7734 | 7366 | if (IS_ERR(em)) { |
|---|
| 7735 | 7367 | ret = PTR_ERR(em); |
|---|
| 7736 | 7368 | goto out; |
|---|
| .. | .. |
|---|
| 7739 | 7371 | len = min(len, em->len - (start - em->start)); |
|---|
| 7740 | 7372 | |
|---|
| 7741 | 7373 | skip_cow: |
|---|
| 7742 | | - bh_result->b_blocknr = (em->block_start + (start - em->start)) >> |
|---|
| 7743 | | - inode->i_blkbits; |
|---|
| 7744 | | - bh_result->b_size = len; |
|---|
| 7745 | | - bh_result->b_bdev = em->bdev; |
|---|
| 7746 | | - set_buffer_mapped(bh_result); |
|---|
| 7747 | | - |
|---|
| 7748 | | - if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) |
|---|
| 7749 | | - set_buffer_new(bh_result); |
|---|
| 7750 | | - |
|---|
| 7751 | 7374 | /* |
|---|
| 7752 | 7375 | * Need to update the i_size under the extent lock so buffered |
|---|
| 7753 | 7376 | * readers will get the updated i_size when we unlock. |
|---|
| 7754 | 7377 | */ |
|---|
| 7755 | | - if (!dio_data->overwrite && start + len > i_size_read(inode)) |
|---|
| 7378 | + if (start + len > i_size_read(inode)) |
|---|
| 7756 | 7379 | i_size_write(inode, start + len); |
|---|
| 7757 | 7380 | |
|---|
| 7758 | | - WARN_ON(dio_data->reserve < len); |
|---|
| 7759 | 7381 | dio_data->reserve -= len; |
|---|
| 7760 | | - dio_data->unsubmitted_oe_range_end = start + len; |
|---|
| 7761 | | - current->journal_info = dio_data; |
|---|
| 7762 | 7382 | out: |
|---|
| 7763 | 7383 | return ret; |
|---|
| 7764 | 7384 | } |
|---|
| 7765 | 7385 | |
|---|
| 7766 | | -static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, |
|---|
| 7767 | | - struct buffer_head *bh_result, int create) |
|---|
| 7386 | +static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, |
|---|
| 7387 | + loff_t length, unsigned int flags, struct iomap *iomap, |
|---|
| 7388 | + struct iomap *srcmap) |
|---|
| 7768 | 7389 | { |
|---|
| 7769 | 7390 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 7770 | 7391 | struct extent_map *em; |
|---|
| 7771 | 7392 | struct extent_state *cached_state = NULL; |
|---|
| 7772 | 7393 | struct btrfs_dio_data *dio_data = NULL; |
|---|
| 7773 | | - u64 start = iblock << inode->i_blkbits; |
|---|
| 7774 | 7394 | u64 lockstart, lockend; |
|---|
| 7775 | | - u64 len = bh_result->b_size; |
|---|
| 7776 | | - int unlock_bits = EXTENT_LOCKED; |
|---|
| 7395 | + const bool write = !!(flags & IOMAP_WRITE); |
|---|
| 7777 | 7396 | int ret = 0; |
|---|
| 7397 | + u64 len = length; |
|---|
| 7398 | + bool unlock_extents = false; |
|---|
| 7399 | + bool sync = (current->journal_info == BTRFS_DIO_SYNC_STUB); |
|---|
| 7778 | 7400 | |
|---|
| 7779 | | - if (create) |
|---|
| 7780 | | - unlock_bits |= EXTENT_DIRTY; |
|---|
| 7781 | | - else |
|---|
| 7401 | + /* |
|---|
| 7402 | + * We used current->journal_info here to see if we were sync, but |
|---|
| 7403 | + * there's a lot of tests in the enospc machinery to not do flushing if |
|---|
| 7404 | + * we have a journal_info set, so we need to clear this out and re-set |
|---|
| 7405 | + * it in iomap_end. |
|---|
| 7406 | + */ |
|---|
| 7407 | + ASSERT(current->journal_info == NULL || |
|---|
| 7408 | + current->journal_info == BTRFS_DIO_SYNC_STUB); |
|---|
| 7409 | + current->journal_info = NULL; |
|---|
| 7410 | + |
|---|
| 7411 | + if (!write) |
|---|
| 7782 | 7412 | len = min_t(u64, len, fs_info->sectorsize); |
|---|
| 7783 | 7413 | |
|---|
| 7784 | 7414 | lockstart = start; |
|---|
| 7785 | 7415 | lockend = start + len - 1; |
|---|
| 7786 | 7416 | |
|---|
| 7787 | | - if (current->journal_info) { |
|---|
| 7788 | | - /* |
|---|
| 7789 | | - * Need to pull our outstanding extents and set journal_info to NULL so |
|---|
| 7790 | | - * that anything that needs to check if there's a transaction doesn't get |
|---|
| 7791 | | - * confused. |
|---|
| 7792 | | - */ |
|---|
| 7793 | | - dio_data = current->journal_info; |
|---|
| 7794 | | - current->journal_info = NULL; |
|---|
| 7417 | + /* |
|---|
| 7418 | + * The generic stuff only does filemap_write_and_wait_range, which |
|---|
| 7419 | + * isn't enough if we've written compressed pages to this area, so we |
|---|
| 7420 | + * need to flush the dirty pages again to make absolutely sure that any |
|---|
| 7421 | + * outstanding dirty pages are on disk. |
|---|
| 7422 | + */ |
|---|
| 7423 | + if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, |
|---|
| 7424 | + &BTRFS_I(inode)->runtime_flags)) { |
|---|
| 7425 | + ret = filemap_fdatawrite_range(inode->i_mapping, start, |
|---|
| 7426 | + start + length - 1); |
|---|
| 7427 | + if (ret) |
|---|
| 7428 | + return ret; |
|---|
| 7795 | 7429 | } |
|---|
| 7430 | + |
|---|
| 7431 | + dio_data = kzalloc(sizeof(*dio_data), GFP_NOFS); |
|---|
| 7432 | + if (!dio_data) |
|---|
| 7433 | + return -ENOMEM; |
|---|
| 7434 | + |
|---|
| 7435 | + dio_data->sync = sync; |
|---|
| 7436 | + dio_data->length = length; |
|---|
| 7437 | + if (write) { |
|---|
| 7438 | + dio_data->reserve = round_up(length, fs_info->sectorsize); |
|---|
| 7439 | + ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), |
|---|
| 7440 | + &dio_data->data_reserved, |
|---|
| 7441 | + start, dio_data->reserve); |
|---|
| 7442 | + if (ret) { |
|---|
| 7443 | + extent_changeset_free(dio_data->data_reserved); |
|---|
| 7444 | + kfree(dio_data); |
|---|
| 7445 | + return ret; |
|---|
| 7446 | + } |
|---|
| 7447 | + } |
|---|
| 7448 | + iomap->private = dio_data; |
|---|
| 7449 | + |
|---|
| 7796 | 7450 | |
|---|
| 7797 | 7451 | /* |
|---|
| 7798 | 7452 | * If this errors out it's because we couldn't invalidate pagecache for |
|---|
| 7799 | 7453 | * this range and we need to fallback to buffered. |
|---|
| 7800 | 7454 | */ |
|---|
| 7801 | | - if (lock_extent_direct(inode, lockstart, lockend, &cached_state, |
|---|
| 7802 | | - create)) { |
|---|
| 7455 | + if (lock_extent_direct(inode, lockstart, lockend, &cached_state, write)) { |
|---|
| 7803 | 7456 | ret = -ENOTBLK; |
|---|
| 7804 | 7457 | goto err; |
|---|
| 7805 | 7458 | } |
|---|
| 7806 | 7459 | |
|---|
| 7807 | | - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0); |
|---|
| 7460 | + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len); |
|---|
| 7808 | 7461 | if (IS_ERR(em)) { |
|---|
| 7809 | 7462 | ret = PTR_ERR(em); |
|---|
| 7810 | 7463 | goto unlock_err; |
|---|
| .. | .. |
|---|
| 7827 | 7480 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || |
|---|
| 7828 | 7481 | em->block_start == EXTENT_MAP_INLINE) { |
|---|
| 7829 | 7482 | free_extent_map(em); |
|---|
| 7830 | | - ret = -ENOTBLK; |
|---|
| 7483 | + /* |
|---|
| 7484 | + * If we are in a NOWAIT context, return -EAGAIN in order to |
|---|
| 7485 | + * fallback to buffered IO. This is not only because we can |
|---|
| 7486 | + * block with buffered IO (no support for NOWAIT semantics at |
|---|
| 7487 | + * the moment) but also to avoid returning short reads to user |
|---|
| 7488 | + * space - this happens if we were able to read some data from |
|---|
| 7489 | + * previous non-compressed extents and then when we fallback to |
|---|
| 7490 | + * buffered IO, at btrfs_file_read_iter() by calling |
|---|
| 7491 | + * filemap_read(), we fail to fault in pages for the read buffer, |
|---|
| 7492 | + * in which case filemap_read() returns a short read (the number |
|---|
| 7493 | + * of bytes previously read is > 0, so it does not return -EFAULT). |
|---|
| 7494 | + */ |
|---|
| 7495 | + ret = (flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOTBLK; |
|---|
| 7831 | 7496 | goto unlock_err; |
|---|
| 7832 | 7497 | } |
|---|
| 7833 | 7498 | |
|---|
| 7834 | | - if (create) { |
|---|
| 7835 | | - ret = btrfs_get_blocks_direct_write(&em, bh_result, inode, |
|---|
| 7836 | | - dio_data, start, len); |
|---|
| 7499 | + len = min(len, em->len - (start - em->start)); |
|---|
| 7500 | + if (write) { |
|---|
| 7501 | + ret = btrfs_get_blocks_direct_write(&em, inode, dio_data, |
|---|
| 7502 | + start, len); |
|---|
| 7837 | 7503 | if (ret < 0) |
|---|
| 7838 | 7504 | goto unlock_err; |
|---|
| 7839 | | - |
|---|
| 7840 | | - /* clear and unlock the entire range */ |
|---|
| 7841 | | - clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
|---|
| 7842 | | - unlock_bits, 1, 0, &cached_state); |
|---|
| 7505 | + unlock_extents = true; |
|---|
| 7506 | + /* Recalc len in case the new em is smaller than requested */ |
|---|
| 7507 | + len = min(len, em->len - (start - em->start)); |
|---|
| 7843 | 7508 | } else { |
|---|
| 7844 | | - ret = btrfs_get_blocks_direct_read(em, bh_result, inode, |
|---|
| 7845 | | - start, len); |
|---|
| 7846 | | - /* Can be negative only if we read from a hole */ |
|---|
| 7847 | | - if (ret < 0) { |
|---|
| 7848 | | - ret = 0; |
|---|
| 7849 | | - free_extent_map(em); |
|---|
| 7850 | | - goto unlock_err; |
|---|
| 7851 | | - } |
|---|
| 7852 | 7509 | /* |
|---|
| 7853 | 7510 | * We need to unlock only the end area that we aren't using. |
|---|
| 7854 | 7511 | * The rest is going to be unlocked by the endio routine. |
|---|
| 7855 | 7512 | */ |
|---|
| 7856 | | - lockstart = start + bh_result->b_size; |
|---|
| 7857 | | - if (lockstart < lockend) { |
|---|
| 7858 | | - clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, |
|---|
| 7859 | | - lockend, unlock_bits, 1, 0, |
|---|
| 7860 | | - &cached_state); |
|---|
| 7861 | | - } else { |
|---|
| 7862 | | - free_extent_state(cached_state); |
|---|
| 7863 | | - } |
|---|
| 7513 | + lockstart = start + len; |
|---|
| 7514 | + if (lockstart < lockend) |
|---|
| 7515 | + unlock_extents = true; |
|---|
| 7864 | 7516 | } |
|---|
| 7517 | + |
|---|
| 7518 | + if (unlock_extents) |
|---|
| 7519 | + unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
|---|
| 7520 | + lockstart, lockend, &cached_state); |
|---|
| 7521 | + else |
|---|
| 7522 | + free_extent_state(cached_state); |
|---|
| 7523 | + |
|---|
| 7524 | + /* |
|---|
| 7525 | + * Translate extent map information to iomap. |
|---|
| 7526 | + * We trim the extents (and move the addr) even though iomap code does |
|---|
| 7527 | + * that, since we have locked only the parts we are performing I/O in. |
|---|
| 7528 | + */ |
|---|
| 7529 | + if ((em->block_start == EXTENT_MAP_HOLE) || |
|---|
| 7530 | + (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) && !write)) { |
|---|
| 7531 | + iomap->addr = IOMAP_NULL_ADDR; |
|---|
| 7532 | + iomap->type = IOMAP_HOLE; |
|---|
| 7533 | + } else { |
|---|
| 7534 | + iomap->addr = em->block_start + (start - em->start); |
|---|
| 7535 | + iomap->type = IOMAP_MAPPED; |
|---|
| 7536 | + } |
|---|
| 7537 | + iomap->offset = start; |
|---|
| 7538 | + iomap->bdev = fs_info->fs_devices->latest_bdev; |
|---|
| 7539 | + iomap->length = len; |
|---|
| 7865 | 7540 | |
|---|
| 7866 | 7541 | free_extent_map(em); |
|---|
| 7867 | 7542 | |
|---|
| 7868 | 7543 | return 0; |
|---|
| 7869 | 7544 | |
|---|
| 7870 | 7545 | unlock_err: |
|---|
| 7871 | | - clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
|---|
| 7872 | | - unlock_bits, 1, 0, &cached_state); |
|---|
| 7546 | + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
|---|
| 7547 | + &cached_state); |
|---|
| 7873 | 7548 | err: |
|---|
| 7874 | | - if (dio_data) |
|---|
| 7875 | | - current->journal_info = dio_data; |
|---|
| 7549 | + if (dio_data) { |
|---|
| 7550 | + btrfs_delalloc_release_space(BTRFS_I(inode), |
|---|
| 7551 | + dio_data->data_reserved, start, |
|---|
| 7552 | + dio_data->reserve, true); |
|---|
| 7553 | + btrfs_delalloc_release_extents(BTRFS_I(inode), dio_data->reserve); |
|---|
| 7554 | + extent_changeset_free(dio_data->data_reserved); |
|---|
| 7555 | + kfree(dio_data); |
|---|
| 7556 | + } |
|---|
| 7876 | 7557 | return ret; |
|---|
| 7877 | 7558 | } |
|---|
| 7878 | 7559 | |
|---|
| 7879 | | -static inline blk_status_t submit_dio_repair_bio(struct inode *inode, |
|---|
| 7880 | | - struct bio *bio, |
|---|
| 7881 | | - int mirror_num) |
|---|
| 7560 | +static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length, |
|---|
| 7561 | + ssize_t written, unsigned int flags, struct iomap *iomap) |
|---|
| 7882 | 7562 | { |
|---|
| 7563 | + int ret = 0; |
|---|
| 7564 | + struct btrfs_dio_data *dio_data = iomap->private; |
|---|
| 7565 | + size_t submitted = dio_data->submitted; |
|---|
| 7566 | + const bool write = !!(flags & IOMAP_WRITE); |
|---|
| 7567 | + |
|---|
| 7568 | + if (!write && (iomap->type == IOMAP_HOLE)) { |
|---|
| 7569 | + /* If reading from a hole, unlock and return */ |
|---|
| 7570 | + unlock_extent(&BTRFS_I(inode)->io_tree, pos, pos + length - 1); |
|---|
| 7571 | + goto out; |
|---|
| 7572 | + } |
|---|
| 7573 | + |
|---|
| 7574 | + if (submitted < length) { |
|---|
| 7575 | + pos += submitted; |
|---|
| 7576 | + length -= submitted; |
|---|
| 7577 | + if (write) |
|---|
| 7578 | + __endio_write_update_ordered(BTRFS_I(inode), pos, |
|---|
| 7579 | + length, false); |
|---|
| 7580 | + else |
|---|
| 7581 | + unlock_extent(&BTRFS_I(inode)->io_tree, pos, |
|---|
| 7582 | + pos + length - 1); |
|---|
| 7583 | + ret = -ENOTBLK; |
|---|
| 7584 | + } |
|---|
| 7585 | + |
|---|
| 7586 | + if (write) { |
|---|
| 7587 | + if (dio_data->reserve) |
|---|
| 7588 | + btrfs_delalloc_release_space(BTRFS_I(inode), |
|---|
| 7589 | + dio_data->data_reserved, pos, |
|---|
| 7590 | + dio_data->reserve, true); |
|---|
| 7591 | + btrfs_delalloc_release_extents(BTRFS_I(inode), dio_data->length); |
|---|
| 7592 | + extent_changeset_free(dio_data->data_reserved); |
|---|
| 7593 | + } |
|---|
| 7594 | +out: |
|---|
| 7595 | + /* |
|---|
| 7596 | + * We're all done, we can re-set the current->journal_info now safely |
|---|
| 7597 | + * for our endio. |
|---|
| 7598 | + */ |
|---|
| 7599 | + if (dio_data->sync) { |
|---|
| 7600 | + ASSERT(current->journal_info == NULL); |
|---|
| 7601 | + current->journal_info = BTRFS_DIO_SYNC_STUB; |
|---|
| 7602 | + } |
|---|
| 7603 | + kfree(dio_data); |
|---|
| 7604 | + iomap->private = NULL; |
|---|
| 7605 | + |
|---|
| 7606 | + return ret; |
|---|
| 7607 | +} |
|---|
| 7608 | + |
|---|
| 7609 | +static void btrfs_dio_private_put(struct btrfs_dio_private *dip) |
|---|
| 7610 | +{ |
|---|
| 7611 | + /* |
|---|
| 7612 | + * This implies a barrier so that stores to dio_bio->bi_status before |
|---|
| 7613 | + * this and loads of dio_bio->bi_status after this are fully ordered. |
|---|
| 7614 | + */ |
|---|
| 7615 | + if (!refcount_dec_and_test(&dip->refs)) |
|---|
| 7616 | + return; |
|---|
| 7617 | + |
|---|
| 7618 | + if (bio_op(dip->dio_bio) == REQ_OP_WRITE) { |
|---|
| 7619 | + __endio_write_update_ordered(BTRFS_I(dip->inode), |
|---|
| 7620 | + dip->logical_offset, |
|---|
| 7621 | + dip->bytes, |
|---|
| 7622 | + !dip->dio_bio->bi_status); |
|---|
| 7623 | + } else { |
|---|
| 7624 | + unlock_extent(&BTRFS_I(dip->inode)->io_tree, |
|---|
| 7625 | + dip->logical_offset, |
|---|
| 7626 | + dip->logical_offset + dip->bytes - 1); |
|---|
| 7627 | + } |
|---|
| 7628 | + |
|---|
| 7629 | + bio_endio(dip->dio_bio); |
|---|
| 7630 | + kfree(dip); |
|---|
| 7631 | +} |
|---|
| 7632 | + |
|---|
| 7633 | +static blk_status_t submit_dio_repair_bio(struct inode *inode, struct bio *bio, |
|---|
| 7634 | + int mirror_num, |
|---|
| 7635 | + unsigned long bio_flags) |
|---|
| 7636 | +{ |
|---|
| 7637 | + struct btrfs_dio_private *dip = bio->bi_private; |
|---|
| 7883 | 7638 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 7884 | 7639 | blk_status_t ret; |
|---|
| 7885 | 7640 | |
|---|
| 7886 | 7641 | BUG_ON(bio_op(bio) == REQ_OP_WRITE); |
|---|
| 7887 | 7642 | |
|---|
| 7888 | | - ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DIO_REPAIR); |
|---|
| 7643 | + ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA); |
|---|
| 7889 | 7644 | if (ret) |
|---|
| 7890 | 7645 | return ret; |
|---|
| 7891 | 7646 | |
|---|
| 7892 | | - ret = btrfs_map_bio(fs_info, bio, mirror_num, 0); |
|---|
| 7893 | | - |
|---|
| 7647 | + refcount_inc(&dip->refs); |
|---|
| 7648 | + ret = btrfs_map_bio(fs_info, bio, mirror_num); |
|---|
| 7649 | + if (ret) |
|---|
| 7650 | + refcount_dec(&dip->refs); |
|---|
| 7894 | 7651 | return ret; |
|---|
| 7895 | 7652 | } |
|---|
| 7896 | 7653 | |
|---|
| 7897 | | -static int btrfs_check_dio_repairable(struct inode *inode, |
|---|
| 7898 | | - struct bio *failed_bio, |
|---|
| 7899 | | - struct io_failure_record *failrec, |
|---|
| 7900 | | - int failed_mirror) |
|---|
| 7654 | +static blk_status_t btrfs_check_read_dio_bio(struct inode *inode, |
|---|
| 7655 | + struct btrfs_io_bio *io_bio, |
|---|
| 7656 | + const bool uptodate) |
|---|
| 7901 | 7657 | { |
|---|
| 7902 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 7903 | | - int num_copies; |
|---|
| 7904 | | - |
|---|
| 7905 | | - num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len); |
|---|
| 7906 | | - if (num_copies == 1) { |
|---|
| 7907 | | - /* |
|---|
| 7908 | | - * we only have a single copy of the data, so don't bother with |
|---|
| 7909 | | - * all the retry and error correction code that follows. no |
|---|
| 7910 | | - * matter what the error is, it is very likely to persist. |
|---|
| 7911 | | - */ |
|---|
| 7912 | | - btrfs_debug(fs_info, |
|---|
| 7913 | | - "Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d", |
|---|
| 7914 | | - num_copies, failrec->this_mirror, failed_mirror); |
|---|
| 7915 | | - return 0; |
|---|
| 7916 | | - } |
|---|
| 7917 | | - |
|---|
| 7918 | | - failrec->failed_mirror = failed_mirror; |
|---|
| 7919 | | - failrec->this_mirror++; |
|---|
| 7920 | | - if (failrec->this_mirror == failed_mirror) |
|---|
| 7921 | | - failrec->this_mirror++; |
|---|
| 7922 | | - |
|---|
| 7923 | | - if (failrec->this_mirror > num_copies) { |
|---|
| 7924 | | - btrfs_debug(fs_info, |
|---|
| 7925 | | - "Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d", |
|---|
| 7926 | | - num_copies, failrec->this_mirror, failed_mirror); |
|---|
| 7927 | | - return 0; |
|---|
| 7928 | | - } |
|---|
| 7929 | | - |
|---|
| 7930 | | - return 1; |
|---|
| 7931 | | -} |
|---|
| 7932 | | - |
|---|
| 7933 | | -static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio, |
|---|
| 7934 | | - struct page *page, unsigned int pgoff, |
|---|
| 7935 | | - u64 start, u64 end, int failed_mirror, |
|---|
| 7936 | | - bio_end_io_t *repair_endio, void *repair_arg) |
|---|
| 7937 | | -{ |
|---|
| 7938 | | - struct io_failure_record *failrec; |
|---|
| 7939 | | - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
|---|
| 7658 | + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; |
|---|
| 7659 | + const u32 sectorsize = fs_info->sectorsize; |
|---|
| 7940 | 7660 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; |
|---|
| 7941 | | - struct bio *bio; |
|---|
| 7942 | | - int isector; |
|---|
| 7943 | | - unsigned int read_mode = 0; |
|---|
| 7944 | | - int segs; |
|---|
| 7945 | | - int ret; |
|---|
| 7946 | | - blk_status_t status; |
|---|
| 7947 | | - struct bio_vec bvec; |
|---|
| 7948 | | - |
|---|
| 7949 | | - BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); |
|---|
| 7950 | | - |
|---|
| 7951 | | - ret = btrfs_get_io_failure_record(inode, start, end, &failrec); |
|---|
| 7952 | | - if (ret) |
|---|
| 7953 | | - return errno_to_blk_status(ret); |
|---|
| 7954 | | - |
|---|
| 7955 | | - ret = btrfs_check_dio_repairable(inode, failed_bio, failrec, |
|---|
| 7956 | | - failed_mirror); |
|---|
| 7957 | | - if (!ret) { |
|---|
| 7958 | | - free_io_failure(failure_tree, io_tree, failrec); |
|---|
| 7959 | | - return BLK_STS_IOERR; |
|---|
| 7960 | | - } |
|---|
| 7961 | | - |
|---|
| 7962 | | - segs = bio_segments(failed_bio); |
|---|
| 7963 | | - bio_get_first_bvec(failed_bio, &bvec); |
|---|
| 7964 | | - if (segs > 1 || |
|---|
| 7965 | | - (bvec.bv_len > btrfs_inode_sectorsize(inode))) |
|---|
| 7966 | | - read_mode |= REQ_FAILFAST_DEV; |
|---|
| 7967 | | - |
|---|
| 7968 | | - isector = start - btrfs_io_bio(failed_bio)->logical; |
|---|
| 7969 | | - isector >>= inode->i_sb->s_blocksize_bits; |
|---|
| 7970 | | - bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, |
|---|
| 7971 | | - pgoff, isector, repair_endio, repair_arg); |
|---|
| 7972 | | - bio->bi_opf = REQ_OP_READ | read_mode; |
|---|
| 7973 | | - |
|---|
| 7974 | | - btrfs_debug(BTRFS_I(inode)->root->fs_info, |
|---|
| 7975 | | - "repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d", |
|---|
| 7976 | | - read_mode, failrec->this_mirror, failrec->in_validation); |
|---|
| 7977 | | - |
|---|
| 7978 | | - status = submit_dio_repair_bio(inode, bio, failrec->this_mirror); |
|---|
| 7979 | | - if (status) { |
|---|
| 7980 | | - free_io_failure(failure_tree, io_tree, failrec); |
|---|
| 7981 | | - bio_put(bio); |
|---|
| 7982 | | - } |
|---|
| 7983 | | - |
|---|
| 7984 | | - return status; |
|---|
| 7985 | | -} |
|---|
| 7986 | | - |
|---|
| 7987 | | -struct btrfs_retry_complete { |
|---|
| 7988 | | - struct completion done; |
|---|
| 7989 | | - struct inode *inode; |
|---|
| 7990 | | - u64 start; |
|---|
| 7991 | | - int uptodate; |
|---|
| 7992 | | -}; |
|---|
| 7993 | | - |
|---|
| 7994 | | -static void btrfs_retry_endio_nocsum(struct bio *bio) |
|---|
| 7995 | | -{ |
|---|
| 7996 | | - struct btrfs_retry_complete *done = bio->bi_private; |
|---|
| 7997 | | - struct inode *inode = done->inode; |
|---|
| 7998 | | - struct bio_vec *bvec; |
|---|
| 7999 | | - struct extent_io_tree *io_tree, *failure_tree; |
|---|
| 8000 | | - int i; |
|---|
| 8001 | | - |
|---|
| 8002 | | - if (bio->bi_status) |
|---|
| 8003 | | - goto end; |
|---|
| 8004 | | - |
|---|
| 8005 | | - ASSERT(bio->bi_vcnt == 1); |
|---|
| 8006 | | - io_tree = &BTRFS_I(inode)->io_tree; |
|---|
| 8007 | | - failure_tree = &BTRFS_I(inode)->io_failure_tree; |
|---|
| 8008 | | - ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(inode)); |
|---|
| 8009 | | - |
|---|
| 8010 | | - done->uptodate = 1; |
|---|
| 8011 | | - ASSERT(!bio_flagged(bio, BIO_CLONED)); |
|---|
| 8012 | | - bio_for_each_segment_all(bvec, bio, i) |
|---|
| 8013 | | - clean_io_failure(BTRFS_I(inode)->root->fs_info, failure_tree, |
|---|
| 8014 | | - io_tree, done->start, bvec->bv_page, |
|---|
| 8015 | | - btrfs_ino(BTRFS_I(inode)), 0); |
|---|
| 8016 | | -end: |
|---|
| 8017 | | - complete(&done->done); |
|---|
| 8018 | | - bio_put(bio); |
|---|
| 8019 | | -} |
|---|
| 8020 | | - |
|---|
| 8021 | | -static blk_status_t __btrfs_correct_data_nocsum(struct inode *inode, |
|---|
| 8022 | | - struct btrfs_io_bio *io_bio) |
|---|
| 8023 | | -{ |
|---|
| 8024 | | - struct btrfs_fs_info *fs_info; |
|---|
| 7661 | + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
|---|
| 7662 | + const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM); |
|---|
| 8025 | 7663 | struct bio_vec bvec; |
|---|
| 8026 | 7664 | struct bvec_iter iter; |
|---|
| 8027 | | - struct btrfs_retry_complete done; |
|---|
| 8028 | | - u64 start; |
|---|
| 8029 | | - unsigned int pgoff; |
|---|
| 8030 | | - u32 sectorsize; |
|---|
| 8031 | | - int nr_sectors; |
|---|
| 8032 | | - blk_status_t ret; |
|---|
| 7665 | + u64 start = io_bio->logical; |
|---|
| 7666 | + int icsum = 0; |
|---|
| 8033 | 7667 | blk_status_t err = BLK_STS_OK; |
|---|
| 8034 | 7668 | |
|---|
| 8035 | | - fs_info = BTRFS_I(inode)->root->fs_info; |
|---|
| 8036 | | - sectorsize = fs_info->sectorsize; |
|---|
| 7669 | + __bio_for_each_segment(bvec, &io_bio->bio, iter, io_bio->iter) { |
|---|
| 7670 | + unsigned int i, nr_sectors, pgoff; |
|---|
| 8037 | 7671 | |
|---|
| 8038 | | - start = io_bio->logical; |
|---|
| 8039 | | - done.inode = inode; |
|---|
| 8040 | | - io_bio->bio.bi_iter = io_bio->iter; |
|---|
| 8041 | | - |
|---|
| 8042 | | - bio_for_each_segment(bvec, &io_bio->bio, iter) { |
|---|
| 8043 | 7672 | nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len); |
|---|
| 8044 | 7673 | pgoff = bvec.bv_offset; |
|---|
| 8045 | | - |
|---|
| 8046 | | -next_block_or_try_again: |
|---|
| 8047 | | - done.uptodate = 0; |
|---|
| 8048 | | - done.start = start; |
|---|
| 8049 | | - init_completion(&done.done); |
|---|
| 8050 | | - |
|---|
| 8051 | | - ret = dio_read_error(inode, &io_bio->bio, bvec.bv_page, |
|---|
| 8052 | | - pgoff, start, start + sectorsize - 1, |
|---|
| 8053 | | - io_bio->mirror_num, |
|---|
| 8054 | | - btrfs_retry_endio_nocsum, &done); |
|---|
| 8055 | | - if (ret) { |
|---|
| 8056 | | - err = ret; |
|---|
| 8057 | | - goto next; |
|---|
| 8058 | | - } |
|---|
| 8059 | | - |
|---|
| 8060 | | - wait_for_completion_io(&done.done); |
|---|
| 8061 | | - |
|---|
| 8062 | | - if (!done.uptodate) { |
|---|
| 8063 | | - /* We might have another mirror, so try again */ |
|---|
| 8064 | | - goto next_block_or_try_again; |
|---|
| 8065 | | - } |
|---|
| 8066 | | - |
|---|
| 8067 | | -next: |
|---|
| 8068 | | - start += sectorsize; |
|---|
| 8069 | | - |
|---|
| 8070 | | - nr_sectors--; |
|---|
| 8071 | | - if (nr_sectors) { |
|---|
| 8072 | | - pgoff += sectorsize; |
|---|
| 7674 | + for (i = 0; i < nr_sectors; i++) { |
|---|
| 8073 | 7675 | ASSERT(pgoff < PAGE_SIZE); |
|---|
| 8074 | | - goto next_block_or_try_again; |
|---|
| 7676 | + if (uptodate && |
|---|
| 7677 | + (!csum || !check_data_csum(inode, io_bio, icsum, |
|---|
| 7678 | + bvec.bv_page, pgoff, |
|---|
| 7679 | + start, sectorsize))) { |
|---|
| 7680 | + clean_io_failure(fs_info, failure_tree, io_tree, |
|---|
| 7681 | + start, bvec.bv_page, |
|---|
| 7682 | + btrfs_ino(BTRFS_I(inode)), |
|---|
| 7683 | + pgoff); |
|---|
| 7684 | + } else { |
|---|
| 7685 | + blk_status_t status; |
|---|
| 7686 | + |
|---|
| 7687 | + status = btrfs_submit_read_repair(inode, |
|---|
| 7688 | + &io_bio->bio, |
|---|
| 7689 | + start - io_bio->logical, |
|---|
| 7690 | + bvec.bv_page, pgoff, |
|---|
| 7691 | + start, |
|---|
| 7692 | + start + sectorsize - 1, |
|---|
| 7693 | + io_bio->mirror_num, |
|---|
| 7694 | + submit_dio_repair_bio); |
|---|
| 7695 | + if (status) |
|---|
| 7696 | + err = status; |
|---|
| 7697 | + } |
|---|
| 7698 | + start += sectorsize; |
|---|
| 7699 | + icsum++; |
|---|
| 7700 | + pgoff += sectorsize; |
|---|
| 8075 | 7701 | } |
|---|
| 8076 | 7702 | } |
|---|
| 8077 | | - |
|---|
| 8078 | 7703 | return err; |
|---|
| 8079 | 7704 | } |
|---|
| 8080 | 7705 | |
|---|
| 8081 | | -static void btrfs_retry_endio(struct bio *bio) |
|---|
| 8082 | | -{ |
|---|
| 8083 | | - struct btrfs_retry_complete *done = bio->bi_private; |
|---|
| 8084 | | - struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); |
|---|
| 8085 | | - struct extent_io_tree *io_tree, *failure_tree; |
|---|
| 8086 | | - struct inode *inode = done->inode; |
|---|
| 8087 | | - struct bio_vec *bvec; |
|---|
| 8088 | | - int uptodate; |
|---|
| 8089 | | - int ret; |
|---|
| 8090 | | - int i; |
|---|
| 8091 | | - |
|---|
| 8092 | | - if (bio->bi_status) |
|---|
| 8093 | | - goto end; |
|---|
| 8094 | | - |
|---|
| 8095 | | - uptodate = 1; |
|---|
| 8096 | | - |
|---|
| 8097 | | - ASSERT(bio->bi_vcnt == 1); |
|---|
| 8098 | | - ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(done->inode)); |
|---|
| 8099 | | - |
|---|
| 8100 | | - io_tree = &BTRFS_I(inode)->io_tree; |
|---|
| 8101 | | - failure_tree = &BTRFS_I(inode)->io_failure_tree; |
|---|
| 8102 | | - |
|---|
| 8103 | | - ASSERT(!bio_flagged(bio, BIO_CLONED)); |
|---|
| 8104 | | - bio_for_each_segment_all(bvec, bio, i) { |
|---|
| 8105 | | - ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page, |
|---|
| 8106 | | - bvec->bv_offset, done->start, |
|---|
| 8107 | | - bvec->bv_len); |
|---|
| 8108 | | - if (!ret) |
|---|
| 8109 | | - clean_io_failure(BTRFS_I(inode)->root->fs_info, |
|---|
| 8110 | | - failure_tree, io_tree, done->start, |
|---|
| 8111 | | - bvec->bv_page, |
|---|
| 8112 | | - btrfs_ino(BTRFS_I(inode)), |
|---|
| 8113 | | - bvec->bv_offset); |
|---|
| 8114 | | - else |
|---|
| 8115 | | - uptodate = 0; |
|---|
| 8116 | | - } |
|---|
| 8117 | | - |
|---|
| 8118 | | - done->uptodate = uptodate; |
|---|
| 8119 | | -end: |
|---|
| 8120 | | - complete(&done->done); |
|---|
| 8121 | | - bio_put(bio); |
|---|
| 8122 | | -} |
|---|
| 8123 | | - |
|---|
| 8124 | | -static blk_status_t __btrfs_subio_endio_read(struct inode *inode, |
|---|
| 8125 | | - struct btrfs_io_bio *io_bio, blk_status_t err) |
|---|
| 8126 | | -{ |
|---|
| 8127 | | - struct btrfs_fs_info *fs_info; |
|---|
| 8128 | | - struct bio_vec bvec; |
|---|
| 8129 | | - struct bvec_iter iter; |
|---|
| 8130 | | - struct btrfs_retry_complete done; |
|---|
| 8131 | | - u64 start; |
|---|
| 8132 | | - u64 offset = 0; |
|---|
| 8133 | | - u32 sectorsize; |
|---|
| 8134 | | - int nr_sectors; |
|---|
| 8135 | | - unsigned int pgoff; |
|---|
| 8136 | | - int csum_pos; |
|---|
| 8137 | | - bool uptodate = (err == 0); |
|---|
| 8138 | | - int ret; |
|---|
| 8139 | | - blk_status_t status; |
|---|
| 8140 | | - |
|---|
| 8141 | | - fs_info = BTRFS_I(inode)->root->fs_info; |
|---|
| 8142 | | - sectorsize = fs_info->sectorsize; |
|---|
| 8143 | | - |
|---|
| 8144 | | - err = BLK_STS_OK; |
|---|
| 8145 | | - start = io_bio->logical; |
|---|
| 8146 | | - done.inode = inode; |
|---|
| 8147 | | - io_bio->bio.bi_iter = io_bio->iter; |
|---|
| 8148 | | - |
|---|
| 8149 | | - bio_for_each_segment(bvec, &io_bio->bio, iter) { |
|---|
| 8150 | | - nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len); |
|---|
| 8151 | | - |
|---|
| 8152 | | - pgoff = bvec.bv_offset; |
|---|
| 8153 | | -next_block: |
|---|
| 8154 | | - if (uptodate) { |
|---|
| 8155 | | - csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset); |
|---|
| 8156 | | - ret = __readpage_endio_check(inode, io_bio, csum_pos, |
|---|
| 8157 | | - bvec.bv_page, pgoff, start, sectorsize); |
|---|
| 8158 | | - if (likely(!ret)) |
|---|
| 8159 | | - goto next; |
|---|
| 8160 | | - } |
|---|
| 8161 | | -try_again: |
|---|
| 8162 | | - done.uptodate = 0; |
|---|
| 8163 | | - done.start = start; |
|---|
| 8164 | | - init_completion(&done.done); |
|---|
| 8165 | | - |
|---|
| 8166 | | - status = dio_read_error(inode, &io_bio->bio, bvec.bv_page, |
|---|
| 8167 | | - pgoff, start, start + sectorsize - 1, |
|---|
| 8168 | | - io_bio->mirror_num, btrfs_retry_endio, |
|---|
| 8169 | | - &done); |
|---|
| 8170 | | - if (status) { |
|---|
| 8171 | | - err = status; |
|---|
| 8172 | | - goto next; |
|---|
| 8173 | | - } |
|---|
| 8174 | | - |
|---|
| 8175 | | - wait_for_completion_io(&done.done); |
|---|
| 8176 | | - |
|---|
| 8177 | | - if (!done.uptodate) { |
|---|
| 8178 | | - /* We might have another mirror, so try again */ |
|---|
| 8179 | | - goto try_again; |
|---|
| 8180 | | - } |
|---|
| 8181 | | -next: |
|---|
| 8182 | | - offset += sectorsize; |
|---|
| 8183 | | - start += sectorsize; |
|---|
| 8184 | | - |
|---|
| 8185 | | - ASSERT(nr_sectors); |
|---|
| 8186 | | - |
|---|
| 8187 | | - nr_sectors--; |
|---|
| 8188 | | - if (nr_sectors) { |
|---|
| 8189 | | - pgoff += sectorsize; |
|---|
| 8190 | | - ASSERT(pgoff < PAGE_SIZE); |
|---|
| 8191 | | - goto next_block; |
|---|
| 8192 | | - } |
|---|
| 8193 | | - } |
|---|
| 8194 | | - |
|---|
| 8195 | | - return err; |
|---|
| 8196 | | -} |
|---|
| 8197 | | - |
|---|
| 8198 | | -static blk_status_t btrfs_subio_endio_read(struct inode *inode, |
|---|
| 8199 | | - struct btrfs_io_bio *io_bio, blk_status_t err) |
|---|
| 8200 | | -{ |
|---|
| 8201 | | - bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
|---|
| 8202 | | - |
|---|
| 8203 | | - if (skip_csum) { |
|---|
| 8204 | | - if (unlikely(err)) |
|---|
| 8205 | | - return __btrfs_correct_data_nocsum(inode, io_bio); |
|---|
| 8206 | | - else |
|---|
| 8207 | | - return BLK_STS_OK; |
|---|
| 8208 | | - } else { |
|---|
| 8209 | | - return __btrfs_subio_endio_read(inode, io_bio, err); |
|---|
| 8210 | | - } |
|---|
| 8211 | | -} |
|---|
| 8212 | | - |
|---|
| 8213 | | -static void btrfs_endio_direct_read(struct bio *bio) |
|---|
| 8214 | | -{ |
|---|
| 8215 | | - struct btrfs_dio_private *dip = bio->bi_private; |
|---|
| 8216 | | - struct inode *inode = dip->inode; |
|---|
| 8217 | | - struct bio *dio_bio; |
|---|
| 8218 | | - struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); |
|---|
| 8219 | | - blk_status_t err = bio->bi_status; |
|---|
| 8220 | | - |
|---|
| 8221 | | - if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) |
|---|
| 8222 | | - err = btrfs_subio_endio_read(inode, io_bio, err); |
|---|
| 8223 | | - |
|---|
| 8224 | | - unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, |
|---|
| 8225 | | - dip->logical_offset + dip->bytes - 1); |
|---|
| 8226 | | - dio_bio = dip->dio_bio; |
|---|
| 8227 | | - |
|---|
| 8228 | | - kfree(dip); |
|---|
| 8229 | | - |
|---|
| 8230 | | - dio_bio->bi_status = err; |
|---|
| 8231 | | - dio_end_io(dio_bio); |
|---|
| 8232 | | - |
|---|
| 8233 | | - if (io_bio->end_io) |
|---|
| 8234 | | - io_bio->end_io(io_bio, blk_status_to_errno(err)); |
|---|
| 8235 | | - bio_put(bio); |
|---|
| 8236 | | -} |
|---|
| 8237 | | - |
|---|
| 8238 | | -static void __endio_write_update_ordered(struct inode *inode, |
|---|
| 7706 | +static void __endio_write_update_ordered(struct btrfs_inode *inode, |
|---|
| 8239 | 7707 | const u64 offset, const u64 bytes, |
|---|
| 8240 | 7708 | const bool uptodate) |
|---|
| 8241 | 7709 | { |
|---|
| 8242 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 7710 | + struct btrfs_fs_info *fs_info = inode->root->fs_info; |
|---|
| 8243 | 7711 | struct btrfs_ordered_extent *ordered = NULL; |
|---|
| 8244 | 7712 | struct btrfs_workqueue *wq; |
|---|
| 8245 | | - btrfs_work_func_t func; |
|---|
| 8246 | 7713 | u64 ordered_offset = offset; |
|---|
| 8247 | 7714 | u64 ordered_bytes = bytes; |
|---|
| 8248 | 7715 | u64 last_offset; |
|---|
| 8249 | 7716 | |
|---|
| 8250 | | - if (btrfs_is_free_space_inode(BTRFS_I(inode))) { |
|---|
| 7717 | + if (btrfs_is_free_space_inode(inode)) |
|---|
| 8251 | 7718 | wq = fs_info->endio_freespace_worker; |
|---|
| 8252 | | - func = btrfs_freespace_write_helper; |
|---|
| 8253 | | - } else { |
|---|
| 7719 | + else |
|---|
| 8254 | 7720 | wq = fs_info->endio_write_workers; |
|---|
| 8255 | | - func = btrfs_endio_write_helper; |
|---|
| 8256 | | - } |
|---|
| 8257 | 7721 | |
|---|
| 8258 | 7722 | while (ordered_offset < offset + bytes) { |
|---|
| 8259 | 7723 | last_offset = ordered_offset; |
|---|
| 8260 | 7724 | if (btrfs_dec_test_first_ordered_pending(inode, &ordered, |
|---|
| 8261 | | - &ordered_offset, |
|---|
| 8262 | | - ordered_bytes, |
|---|
| 8263 | | - uptodate)) { |
|---|
| 8264 | | - btrfs_init_work(&ordered->work, func, |
|---|
| 8265 | | - finish_ordered_fn, |
|---|
| 8266 | | - NULL, NULL); |
|---|
| 7725 | + &ordered_offset, |
|---|
| 7726 | + ordered_bytes, |
|---|
| 7727 | + uptodate)) { |
|---|
| 7728 | + btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, |
|---|
| 7729 | + NULL); |
|---|
| 8267 | 7730 | btrfs_queue_work(wq, &ordered->work); |
|---|
| 8268 | 7731 | } |
|---|
| 8269 | 7732 | /* |
|---|
| .. | .. |
|---|
| 8274 | 7737 | return; |
|---|
| 8275 | 7738 | /* |
|---|
| 8276 | 7739 | * Our bio might span multiple ordered extents. In this case |
|---|
| 8277 | | - * we keep goin until we have accounted the whole dio. |
|---|
| 7740 | + * we keep going until we have accounted the whole dio. |
|---|
| 8278 | 7741 | */ |
|---|
| 8279 | 7742 | if (ordered_offset < offset + bytes) { |
|---|
| 8280 | 7743 | ordered_bytes = offset + bytes - ordered_offset; |
|---|
| .. | .. |
|---|
| 8283 | 7746 | } |
|---|
| 8284 | 7747 | } |
|---|
| 8285 | 7748 | |
|---|
| 8286 | | -static void btrfs_endio_direct_write(struct bio *bio) |
|---|
| 8287 | | -{ |
|---|
| 8288 | | - struct btrfs_dio_private *dip = bio->bi_private; |
|---|
| 8289 | | - struct bio *dio_bio = dip->dio_bio; |
|---|
| 8290 | | - |
|---|
| 8291 | | - __endio_write_update_ordered(dip->inode, dip->logical_offset, |
|---|
| 8292 | | - dip->bytes, !bio->bi_status); |
|---|
| 8293 | | - |
|---|
| 8294 | | - kfree(dip); |
|---|
| 8295 | | - |
|---|
| 8296 | | - dio_bio->bi_status = bio->bi_status; |
|---|
| 8297 | | - dio_end_io(dio_bio); |
|---|
| 8298 | | - bio_put(bio); |
|---|
| 8299 | | -} |
|---|
| 8300 | | - |
|---|
| 8301 | 7749 | static blk_status_t btrfs_submit_bio_start_direct_io(void *private_data, |
|---|
| 8302 | 7750 | struct bio *bio, u64 offset) |
|---|
| 8303 | 7751 | { |
|---|
| 8304 | 7752 | struct inode *inode = private_data; |
|---|
| 8305 | | - blk_status_t ret; |
|---|
| 8306 | | - ret = btrfs_csum_one_bio(inode, bio, offset, 1); |
|---|
| 8307 | | - BUG_ON(ret); /* -ENOMEM */ |
|---|
| 8308 | | - return 0; |
|---|
| 7753 | + |
|---|
| 7754 | + return btrfs_csum_one_bio(BTRFS_I(inode), bio, offset, 1); |
|---|
| 8309 | 7755 | } |
|---|
| 8310 | 7756 | |
|---|
| 8311 | 7757 | static void btrfs_end_dio_bio(struct bio *bio) |
|---|
| .. | .. |
|---|
| 8321 | 7767 | (unsigned long long)bio->bi_iter.bi_sector, |
|---|
| 8322 | 7768 | bio->bi_iter.bi_size, err); |
|---|
| 8323 | 7769 | |
|---|
| 8324 | | - if (dip->subio_endio) |
|---|
| 8325 | | - err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err); |
|---|
| 8326 | | - |
|---|
| 8327 | | - if (err) { |
|---|
| 8328 | | - /* |
|---|
| 8329 | | - * We want to perceive the errors flag being set before |
|---|
| 8330 | | - * decrementing the reference count. We don't need a barrier |
|---|
| 8331 | | - * since atomic operations with a return value are fully |
|---|
| 8332 | | - * ordered as per atomic_t.txt |
|---|
| 8333 | | - */ |
|---|
| 8334 | | - dip->errors = 1; |
|---|
| 7770 | + if (bio_op(bio) == REQ_OP_READ) { |
|---|
| 7771 | + err = btrfs_check_read_dio_bio(dip->inode, btrfs_io_bio(bio), |
|---|
| 7772 | + !err); |
|---|
| 8335 | 7773 | } |
|---|
| 8336 | 7774 | |
|---|
| 8337 | | - /* if there are more bios still pending for this dio, just exit */ |
|---|
| 8338 | | - if (!atomic_dec_and_test(&dip->pending_bios)) |
|---|
| 8339 | | - goto out; |
|---|
| 7775 | + if (err) |
|---|
| 7776 | + dip->dio_bio->bi_status = err; |
|---|
| 8340 | 7777 | |
|---|
| 8341 | | - if (dip->errors) { |
|---|
| 8342 | | - bio_io_error(dip->orig_bio); |
|---|
| 8343 | | - } else { |
|---|
| 8344 | | - dip->dio_bio->bi_status = BLK_STS_OK; |
|---|
| 8345 | | - bio_endio(dip->orig_bio); |
|---|
| 8346 | | - } |
|---|
| 8347 | | -out: |
|---|
| 8348 | 7778 | bio_put(bio); |
|---|
| 8349 | | -} |
|---|
| 8350 | | - |
|---|
| 8351 | | -static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode, |
|---|
| 8352 | | - struct btrfs_dio_private *dip, |
|---|
| 8353 | | - struct bio *bio, |
|---|
| 8354 | | - u64 file_offset) |
|---|
| 8355 | | -{ |
|---|
| 8356 | | - struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); |
|---|
| 8357 | | - struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio); |
|---|
| 8358 | | - blk_status_t ret; |
|---|
| 8359 | | - |
|---|
| 8360 | | - /* |
|---|
| 8361 | | - * We load all the csum data we need when we submit |
|---|
| 8362 | | - * the first bio to reduce the csum tree search and |
|---|
| 8363 | | - * contention. |
|---|
| 8364 | | - */ |
|---|
| 8365 | | - if (dip->logical_offset == file_offset) { |
|---|
| 8366 | | - ret = btrfs_lookup_bio_sums_dio(inode, dip->orig_bio, |
|---|
| 8367 | | - file_offset); |
|---|
| 8368 | | - if (ret) |
|---|
| 8369 | | - return ret; |
|---|
| 8370 | | - } |
|---|
| 8371 | | - |
|---|
| 8372 | | - if (bio == dip->orig_bio) |
|---|
| 8373 | | - return 0; |
|---|
| 8374 | | - |
|---|
| 8375 | | - file_offset -= dip->logical_offset; |
|---|
| 8376 | | - file_offset >>= inode->i_sb->s_blocksize_bits; |
|---|
| 8377 | | - io_bio->csum = (u8 *)(((u32 *)orig_io_bio->csum) + file_offset); |
|---|
| 8378 | | - |
|---|
| 8379 | | - return 0; |
|---|
| 7779 | + btrfs_dio_private_put(dip); |
|---|
| 8380 | 7780 | } |
|---|
| 8381 | 7781 | |
|---|
| 8382 | 7782 | static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio, |
|---|
| .. | .. |
|---|
| 8410 | 7810 | * If we aren't doing async submit, calculate the csum of the |
|---|
| 8411 | 7811 | * bio now. |
|---|
| 8412 | 7812 | */ |
|---|
| 8413 | | - ret = btrfs_csum_one_bio(inode, bio, file_offset, 1); |
|---|
| 7813 | + ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, 1); |
|---|
| 8414 | 7814 | if (ret) |
|---|
| 8415 | 7815 | goto err; |
|---|
| 8416 | 7816 | } else { |
|---|
| 8417 | | - ret = btrfs_lookup_and_bind_dio_csum(inode, dip, bio, |
|---|
| 8418 | | - file_offset); |
|---|
| 8419 | | - if (ret) |
|---|
| 8420 | | - goto err; |
|---|
| 7817 | + u64 csum_offset; |
|---|
| 7818 | + |
|---|
| 7819 | + csum_offset = file_offset - dip->logical_offset; |
|---|
| 7820 | + csum_offset >>= inode->i_sb->s_blocksize_bits; |
|---|
| 7821 | + csum_offset *= btrfs_super_csum_size(fs_info->super_copy); |
|---|
| 7822 | + btrfs_io_bio(bio)->csum = dip->csums + csum_offset; |
|---|
| 8421 | 7823 | } |
|---|
| 8422 | 7824 | map: |
|---|
| 8423 | | - ret = btrfs_map_bio(fs_info, bio, 0, 0); |
|---|
| 7825 | + ret = btrfs_map_bio(fs_info, bio, 0); |
|---|
| 8424 | 7826 | err: |
|---|
| 8425 | 7827 | return ret; |
|---|
| 8426 | 7828 | } |
|---|
| 8427 | 7829 | |
|---|
| 8428 | | -static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip) |
|---|
| 7830 | +/* |
|---|
| 7831 | + * If this succeeds, the btrfs_dio_private is responsible for cleaning up locked |
|---|
| 7832 | + * or ordered extents whether or not we submit any bios. |
|---|
| 7833 | + */ |
|---|
| 7834 | +static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio, |
|---|
| 7835 | + struct inode *inode, |
|---|
| 7836 | + loff_t file_offset) |
|---|
| 8429 | 7837 | { |
|---|
| 8430 | | - struct inode *inode = dip->inode; |
|---|
| 7838 | + const bool write = (bio_op(dio_bio) == REQ_OP_WRITE); |
|---|
| 7839 | + const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM); |
|---|
| 7840 | + size_t dip_size; |
|---|
| 7841 | + struct btrfs_dio_private *dip; |
|---|
| 7842 | + |
|---|
| 7843 | + dip_size = sizeof(*dip); |
|---|
| 7844 | + if (!write && csum) { |
|---|
| 7845 | + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 7846 | + const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); |
|---|
| 7847 | + size_t nblocks; |
|---|
| 7848 | + |
|---|
| 7849 | + nblocks = dio_bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits; |
|---|
| 7850 | + dip_size += csum_size * nblocks; |
|---|
| 7851 | + } |
|---|
| 7852 | + |
|---|
| 7853 | + dip = kzalloc(dip_size, GFP_NOFS); |
|---|
| 7854 | + if (!dip) |
|---|
| 7855 | + return NULL; |
|---|
| 7856 | + |
|---|
| 7857 | + dip->inode = inode; |
|---|
| 7858 | + dip->logical_offset = file_offset; |
|---|
| 7859 | + dip->bytes = dio_bio->bi_iter.bi_size; |
|---|
| 7860 | + dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9; |
|---|
| 7861 | + dip->dio_bio = dio_bio; |
|---|
| 7862 | + refcount_set(&dip->refs, 1); |
|---|
| 7863 | + return dip; |
|---|
| 7864 | +} |
|---|
| 7865 | + |
|---|
| 7866 | +static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap, |
|---|
| 7867 | + struct bio *dio_bio, loff_t file_offset) |
|---|
| 7868 | +{ |
|---|
| 7869 | + const bool write = (bio_op(dio_bio) == REQ_OP_WRITE); |
|---|
| 7870 | + const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM); |
|---|
| 8431 | 7871 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 7872 | + const bool raid56 = (btrfs_data_alloc_profile(fs_info) & |
|---|
| 7873 | + BTRFS_BLOCK_GROUP_RAID56_MASK); |
|---|
| 7874 | + struct btrfs_dio_private *dip; |
|---|
| 8432 | 7875 | struct bio *bio; |
|---|
| 8433 | | - struct bio *orig_bio = dip->orig_bio; |
|---|
| 8434 | | - u64 start_sector = orig_bio->bi_iter.bi_sector; |
|---|
| 8435 | | - u64 file_offset = dip->logical_offset; |
|---|
| 8436 | | - u64 map_length; |
|---|
| 7876 | + u64 start_sector; |
|---|
| 8437 | 7877 | int async_submit = 0; |
|---|
| 8438 | 7878 | u64 submit_len; |
|---|
| 8439 | 7879 | int clone_offset = 0; |
|---|
| 8440 | 7880 | int clone_len; |
|---|
| 8441 | 7881 | int ret; |
|---|
| 8442 | 7882 | blk_status_t status; |
|---|
| 7883 | + struct btrfs_io_geometry geom; |
|---|
| 7884 | + struct btrfs_dio_data *dio_data = iomap->private; |
|---|
| 8443 | 7885 | |
|---|
| 8444 | | - map_length = orig_bio->bi_iter.bi_size; |
|---|
| 8445 | | - submit_len = map_length; |
|---|
| 8446 | | - ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), start_sector << 9, |
|---|
| 8447 | | - &map_length, NULL, 0); |
|---|
| 8448 | | - if (ret) |
|---|
| 8449 | | - return -EIO; |
|---|
| 8450 | | - |
|---|
| 8451 | | - if (map_length >= submit_len) { |
|---|
| 8452 | | - bio = orig_bio; |
|---|
| 8453 | | - dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED; |
|---|
| 8454 | | - goto submit; |
|---|
| 7886 | + dip = btrfs_create_dio_private(dio_bio, inode, file_offset); |
|---|
| 7887 | + if (!dip) { |
|---|
| 7888 | + if (!write) { |
|---|
| 7889 | + unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, |
|---|
| 7890 | + file_offset + dio_bio->bi_iter.bi_size - 1); |
|---|
| 7891 | + } |
|---|
| 7892 | + dio_bio->bi_status = BLK_STS_RESOURCE; |
|---|
| 7893 | + bio_endio(dio_bio); |
|---|
| 7894 | + return BLK_QC_T_NONE; |
|---|
| 8455 | 7895 | } |
|---|
| 8456 | 7896 | |
|---|
| 8457 | | - /* async crcs make it difficult to collect full stripe writes. */ |
|---|
| 8458 | | - if (btrfs_data_alloc_profile(fs_info) & BTRFS_BLOCK_GROUP_RAID56_MASK) |
|---|
| 8459 | | - async_submit = 0; |
|---|
| 8460 | | - else |
|---|
| 8461 | | - async_submit = 1; |
|---|
| 7897 | + if (!write && csum) { |
|---|
| 7898 | + /* |
|---|
| 7899 | + * Load the csums up front to reduce csum tree searches and |
|---|
| 7900 | + * contention when submitting bios. |
|---|
| 7901 | + */ |
|---|
| 7902 | + status = btrfs_lookup_bio_sums(inode, dio_bio, file_offset, |
|---|
| 7903 | + dip->csums); |
|---|
| 7904 | + if (status != BLK_STS_OK) |
|---|
| 7905 | + goto out_err; |
|---|
| 7906 | + } |
|---|
| 8462 | 7907 | |
|---|
| 8463 | | - /* bio split */ |
|---|
| 8464 | | - ASSERT(map_length <= INT_MAX); |
|---|
| 7908 | + start_sector = dio_bio->bi_iter.bi_sector; |
|---|
| 7909 | + submit_len = dio_bio->bi_iter.bi_size; |
|---|
| 7910 | + |
|---|
| 8465 | 7911 | do { |
|---|
| 8466 | | - clone_len = min_t(int, submit_len, map_length); |
|---|
| 7912 | + ret = btrfs_get_io_geometry(fs_info, btrfs_op(dio_bio), |
|---|
| 7913 | + start_sector << 9, submit_len, |
|---|
| 7914 | + &geom); |
|---|
| 7915 | + if (ret) { |
|---|
| 7916 | + status = errno_to_blk_status(ret); |
|---|
| 7917 | + goto out_err; |
|---|
| 7918 | + } |
|---|
| 7919 | + ASSERT(geom.len <= INT_MAX); |
|---|
| 7920 | + |
|---|
| 7921 | + clone_len = min_t(int, submit_len, geom.len); |
|---|
| 8467 | 7922 | |
|---|
| 8468 | 7923 | /* |
|---|
| 8469 | 7924 | * This will never fail as it's passing GPF_NOFS and |
|---|
| 8470 | 7925 | * the allocation is backed by btrfs_bioset. |
|---|
| 8471 | 7926 | */ |
|---|
| 8472 | | - bio = btrfs_bio_clone_partial(orig_bio, clone_offset, |
|---|
| 8473 | | - clone_len); |
|---|
| 7927 | + bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len); |
|---|
| 8474 | 7928 | bio->bi_private = dip; |
|---|
| 8475 | 7929 | bio->bi_end_io = btrfs_end_dio_bio; |
|---|
| 8476 | 7930 | btrfs_io_bio(bio)->logical = file_offset; |
|---|
| 8477 | 7931 | |
|---|
| 8478 | 7932 | ASSERT(submit_len >= clone_len); |
|---|
| 8479 | 7933 | submit_len -= clone_len; |
|---|
| 8480 | | - if (submit_len == 0) |
|---|
| 8481 | | - break; |
|---|
| 8482 | 7934 | |
|---|
| 8483 | 7935 | /* |
|---|
| 8484 | 7936 | * Increase the count before we submit the bio so we know |
|---|
| 8485 | 7937 | * the end IO handler won't happen before we increase the |
|---|
| 8486 | 7938 | * count. Otherwise, the dip might get freed before we're |
|---|
| 8487 | 7939 | * done setting it up. |
|---|
| 7940 | + * |
|---|
| 7941 | + * We transfer the initial reference to the last bio, so we |
|---|
| 7942 | + * don't need to increment the reference count for the last one. |
|---|
| 8488 | 7943 | */ |
|---|
| 8489 | | - atomic_inc(&dip->pending_bios); |
|---|
| 7944 | + if (submit_len > 0) { |
|---|
| 7945 | + refcount_inc(&dip->refs); |
|---|
| 7946 | + /* |
|---|
| 7947 | + * If we are submitting more than one bio, submit them |
|---|
| 7948 | + * all asynchronously. The exception is RAID 5 or 6, as |
|---|
| 7949 | + * asynchronous checksums make it difficult to collect |
|---|
| 7950 | + * full stripe writes. |
|---|
| 7951 | + */ |
|---|
| 7952 | + if (!raid56) |
|---|
| 7953 | + async_submit = 1; |
|---|
| 7954 | + } |
|---|
| 8490 | 7955 | |
|---|
| 8491 | 7956 | status = btrfs_submit_dio_bio(bio, inode, file_offset, |
|---|
| 8492 | 7957 | async_submit); |
|---|
| 8493 | 7958 | if (status) { |
|---|
| 8494 | 7959 | bio_put(bio); |
|---|
| 8495 | | - atomic_dec(&dip->pending_bios); |
|---|
| 7960 | + if (submit_len > 0) |
|---|
| 7961 | + refcount_dec(&dip->refs); |
|---|
| 8496 | 7962 | goto out_err; |
|---|
| 8497 | 7963 | } |
|---|
| 8498 | 7964 | |
|---|
| 7965 | + dio_data->submitted += clone_len; |
|---|
| 8499 | 7966 | clone_offset += clone_len; |
|---|
| 8500 | 7967 | start_sector += clone_len >> 9; |
|---|
| 8501 | 7968 | file_offset += clone_len; |
|---|
| 8502 | | - |
|---|
| 8503 | | - map_length = submit_len; |
|---|
| 8504 | | - ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), |
|---|
| 8505 | | - start_sector << 9, &map_length, NULL, 0); |
|---|
| 8506 | | - if (ret) |
|---|
| 8507 | | - goto out_err; |
|---|
| 8508 | 7969 | } while (submit_len > 0); |
|---|
| 7970 | + return BLK_QC_T_NONE; |
|---|
| 8509 | 7971 | |
|---|
| 8510 | | -submit: |
|---|
| 8511 | | - status = btrfs_submit_dio_bio(bio, inode, file_offset, async_submit); |
|---|
| 8512 | | - if (!status) |
|---|
| 8513 | | - return 0; |
|---|
| 8514 | | - |
|---|
| 8515 | | - if (bio != orig_bio) |
|---|
| 8516 | | - bio_put(bio); |
|---|
| 8517 | 7972 | out_err: |
|---|
| 8518 | | - dip->errors = 1; |
|---|
| 8519 | | - /* |
|---|
| 8520 | | - * Before atomic variable goto zero, we must make sure dip->errors is |
|---|
| 8521 | | - * perceived to be set. This ordering is ensured by the fact that an |
|---|
| 8522 | | - * atomic operations with a return value are fully ordered as per |
|---|
| 8523 | | - * atomic_t.txt |
|---|
| 8524 | | - */ |
|---|
| 8525 | | - if (atomic_dec_and_test(&dip->pending_bios)) |
|---|
| 8526 | | - bio_io_error(dip->orig_bio); |
|---|
| 8527 | | - |
|---|
| 8528 | | - /* bio_end_io() will handle error, so we needn't return it */ |
|---|
| 8529 | | - return 0; |
|---|
| 8530 | | -} |
|---|
| 8531 | | - |
|---|
| 8532 | | -static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode, |
|---|
| 8533 | | - loff_t file_offset) |
|---|
| 8534 | | -{ |
|---|
| 8535 | | - struct btrfs_dio_private *dip = NULL; |
|---|
| 8536 | | - struct bio *bio = NULL; |
|---|
| 8537 | | - struct btrfs_io_bio *io_bio; |
|---|
| 8538 | | - bool write = (bio_op(dio_bio) == REQ_OP_WRITE); |
|---|
| 8539 | | - int ret = 0; |
|---|
| 8540 | | - |
|---|
| 8541 | | - bio = btrfs_bio_clone(dio_bio); |
|---|
| 8542 | | - |
|---|
| 8543 | | - dip = kzalloc(sizeof(*dip), GFP_NOFS); |
|---|
| 8544 | | - if (!dip) { |
|---|
| 8545 | | - ret = -ENOMEM; |
|---|
| 8546 | | - goto free_ordered; |
|---|
| 8547 | | - } |
|---|
| 8548 | | - |
|---|
| 8549 | | - dip->private = dio_bio->bi_private; |
|---|
| 8550 | | - dip->inode = inode; |
|---|
| 8551 | | - dip->logical_offset = file_offset; |
|---|
| 8552 | | - dip->bytes = dio_bio->bi_iter.bi_size; |
|---|
| 8553 | | - dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9; |
|---|
| 8554 | | - bio->bi_private = dip; |
|---|
| 8555 | | - dip->orig_bio = bio; |
|---|
| 8556 | | - dip->dio_bio = dio_bio; |
|---|
| 8557 | | - atomic_set(&dip->pending_bios, 1); |
|---|
| 8558 | | - io_bio = btrfs_io_bio(bio); |
|---|
| 8559 | | - io_bio->logical = file_offset; |
|---|
| 8560 | | - |
|---|
| 8561 | | - if (write) { |
|---|
| 8562 | | - bio->bi_end_io = btrfs_endio_direct_write; |
|---|
| 8563 | | - } else { |
|---|
| 8564 | | - bio->bi_end_io = btrfs_endio_direct_read; |
|---|
| 8565 | | - dip->subio_endio = btrfs_subio_endio_read; |
|---|
| 8566 | | - } |
|---|
| 8567 | | - |
|---|
| 8568 | | - /* |
|---|
| 8569 | | - * Reset the range for unsubmitted ordered extents (to a 0 length range) |
|---|
| 8570 | | - * even if we fail to submit a bio, because in such case we do the |
|---|
| 8571 | | - * corresponding error handling below and it must not be done a second |
|---|
| 8572 | | - * time by btrfs_direct_IO(). |
|---|
| 8573 | | - */ |
|---|
| 8574 | | - if (write) { |
|---|
| 8575 | | - struct btrfs_dio_data *dio_data = current->journal_info; |
|---|
| 8576 | | - |
|---|
| 8577 | | - dio_data->unsubmitted_oe_range_end = dip->logical_offset + |
|---|
| 8578 | | - dip->bytes; |
|---|
| 8579 | | - dio_data->unsubmitted_oe_range_start = |
|---|
| 8580 | | - dio_data->unsubmitted_oe_range_end; |
|---|
| 8581 | | - } |
|---|
| 8582 | | - |
|---|
| 8583 | | - ret = btrfs_submit_direct_hook(dip); |
|---|
| 8584 | | - if (!ret) |
|---|
| 8585 | | - return; |
|---|
| 8586 | | - |
|---|
| 8587 | | - if (io_bio->end_io) |
|---|
| 8588 | | - io_bio->end_io(io_bio, ret); |
|---|
| 8589 | | - |
|---|
| 8590 | | -free_ordered: |
|---|
| 8591 | | - /* |
|---|
| 8592 | | - * If we arrived here it means either we failed to submit the dip |
|---|
| 8593 | | - * or we either failed to clone the dio_bio or failed to allocate the |
|---|
| 8594 | | - * dip. If we cloned the dio_bio and allocated the dip, we can just |
|---|
| 8595 | | - * call bio_endio against our io_bio so that we get proper resource |
|---|
| 8596 | | - * cleanup if we fail to submit the dip, otherwise, we must do the |
|---|
| 8597 | | - * same as btrfs_endio_direct_[write|read] because we can't call these |
|---|
| 8598 | | - * callbacks - they require an allocated dip and a clone of dio_bio. |
|---|
| 8599 | | - */ |
|---|
| 8600 | | - if (bio && dip) { |
|---|
| 8601 | | - bio_io_error(bio); |
|---|
| 8602 | | - /* |
|---|
| 8603 | | - * The end io callbacks free our dip, do the final put on bio |
|---|
| 8604 | | - * and all the cleanup and final put for dio_bio (through |
|---|
| 8605 | | - * dio_end_io()). |
|---|
| 8606 | | - */ |
|---|
| 8607 | | - dip = NULL; |
|---|
| 8608 | | - bio = NULL; |
|---|
| 8609 | | - } else { |
|---|
| 8610 | | - if (write) |
|---|
| 8611 | | - __endio_write_update_ordered(inode, |
|---|
| 8612 | | - file_offset, |
|---|
| 8613 | | - dio_bio->bi_iter.bi_size, |
|---|
| 8614 | | - false); |
|---|
| 8615 | | - else |
|---|
| 8616 | | - unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, |
|---|
| 8617 | | - file_offset + dio_bio->bi_iter.bi_size - 1); |
|---|
| 8618 | | - |
|---|
| 8619 | | - dio_bio->bi_status = BLK_STS_IOERR; |
|---|
| 8620 | | - /* |
|---|
| 8621 | | - * Releases and cleans up our dio_bio, no need to bio_put() |
|---|
| 8622 | | - * nor bio_endio()/bio_io_error() against dio_bio. |
|---|
| 8623 | | - */ |
|---|
| 8624 | | - dio_end_io(dio_bio); |
|---|
| 8625 | | - } |
|---|
| 8626 | | - if (bio) |
|---|
| 8627 | | - bio_put(bio); |
|---|
| 8628 | | - kfree(dip); |
|---|
| 7973 | + dip->dio_bio->bi_status = status; |
|---|
| 7974 | + btrfs_dio_private_put(dip); |
|---|
| 7975 | + return BLK_QC_T_NONE; |
|---|
| 8629 | 7976 | } |
|---|
| 8630 | 7977 | |
|---|
| 8631 | 7978 | static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info, |
|---|
| .. | .. |
|---|
| 8661 | 8008 | return retval; |
|---|
| 8662 | 8009 | } |
|---|
| 8663 | 8010 | |
|---|
| 8664 | | -static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) |
|---|
| 8011 | +static inline int btrfs_maybe_fsync_end_io(struct kiocb *iocb, ssize_t size, |
|---|
| 8012 | + int error, unsigned flags) |
|---|
| 8013 | +{ |
|---|
| 8014 | + /* |
|---|
| 8015 | + * Now if we're still in the context of our submitter we know we can't |
|---|
| 8016 | + * safely run generic_write_sync(), so clear our flag here so that the |
|---|
| 8017 | + * caller knows to follow up with a sync. |
|---|
| 8018 | + */ |
|---|
| 8019 | + if (current->journal_info == BTRFS_DIO_SYNC_STUB) { |
|---|
| 8020 | + current->journal_info = NULL; |
|---|
| 8021 | + return error; |
|---|
| 8022 | + } |
|---|
| 8023 | + |
|---|
| 8024 | + if (error) |
|---|
| 8025 | + return error; |
|---|
| 8026 | + |
|---|
| 8027 | + if (size) { |
|---|
| 8028 | + iocb->ki_flags |= IOCB_DSYNC; |
|---|
| 8029 | + return generic_write_sync(iocb, size); |
|---|
| 8030 | + } |
|---|
| 8031 | + |
|---|
| 8032 | + return 0; |
|---|
| 8033 | +} |
|---|
| 8034 | + |
|---|
| 8035 | +static const struct iomap_ops btrfs_dio_iomap_ops = { |
|---|
| 8036 | + .iomap_begin = btrfs_dio_iomap_begin, |
|---|
| 8037 | + .iomap_end = btrfs_dio_iomap_end, |
|---|
| 8038 | +}; |
|---|
| 8039 | + |
|---|
| 8040 | +static const struct iomap_dio_ops btrfs_dio_ops = { |
|---|
| 8041 | + .submit_io = btrfs_submit_direct, |
|---|
| 8042 | +}; |
|---|
| 8043 | + |
|---|
| 8044 | +static const struct iomap_dio_ops btrfs_sync_dops = { |
|---|
| 8045 | + .submit_io = btrfs_submit_direct, |
|---|
| 8046 | + .end_io = btrfs_maybe_fsync_end_io, |
|---|
| 8047 | +}; |
|---|
| 8048 | + |
|---|
| 8049 | +ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) |
|---|
| 8665 | 8050 | { |
|---|
| 8666 | 8051 | struct file *file = iocb->ki_filp; |
|---|
| 8667 | 8052 | struct inode *inode = file->f_mapping->host; |
|---|
| 8668 | 8053 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 8669 | | - struct btrfs_dio_data dio_data = { 0 }; |
|---|
| 8670 | 8054 | struct extent_changeset *data_reserved = NULL; |
|---|
| 8671 | 8055 | loff_t offset = iocb->ki_pos; |
|---|
| 8672 | 8056 | size_t count = 0; |
|---|
| 8673 | | - int flags = 0; |
|---|
| 8674 | | - bool wakeup = true; |
|---|
| 8675 | 8057 | bool relock = false; |
|---|
| 8676 | 8058 | ssize_t ret; |
|---|
| 8677 | 8059 | |
|---|
| 8678 | | - if (check_direct_IO(fs_info, iter, offset)) |
|---|
| 8060 | + if (check_direct_IO(fs_info, iter, offset)) { |
|---|
| 8061 | + ASSERT(current->journal_info == NULL || |
|---|
| 8062 | + current->journal_info == BTRFS_DIO_SYNC_STUB); |
|---|
| 8063 | + current->journal_info = NULL; |
|---|
| 8679 | 8064 | return 0; |
|---|
| 8065 | + } |
|---|
| 8680 | 8066 | |
|---|
| 8681 | | - inode_dio_begin(inode); |
|---|
| 8682 | | - |
|---|
| 8683 | | - /* |
|---|
| 8684 | | - * The generic stuff only does filemap_write_and_wait_range, which |
|---|
| 8685 | | - * isn't enough if we've written compressed pages to this area, so |
|---|
| 8686 | | - * we need to flush the dirty pages again to make absolutely sure |
|---|
| 8687 | | - * that any outstanding dirty pages are on disk. |
|---|
| 8688 | | - */ |
|---|
| 8689 | 8067 | count = iov_iter_count(iter); |
|---|
| 8690 | | - if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, |
|---|
| 8691 | | - &BTRFS_I(inode)->runtime_flags)) |
|---|
| 8692 | | - filemap_fdatawrite_range(inode->i_mapping, offset, |
|---|
| 8693 | | - offset + count - 1); |
|---|
| 8694 | | - |
|---|
| 8695 | 8068 | if (iov_iter_rw(iter) == WRITE) { |
|---|
| 8696 | 8069 | /* |
|---|
| 8697 | 8070 | * If the write DIO is beyond the EOF, we need update |
|---|
| .. | .. |
|---|
| 8699 | 8072 | * not unlock the i_mutex at this case. |
|---|
| 8700 | 8073 | */ |
|---|
| 8701 | 8074 | if (offset + count <= inode->i_size) { |
|---|
| 8702 | | - dio_data.overwrite = 1; |
|---|
| 8703 | 8075 | inode_unlock(inode); |
|---|
| 8704 | 8076 | relock = true; |
|---|
| 8705 | 8077 | } |
|---|
| 8706 | | - ret = btrfs_delalloc_reserve_space(inode, &data_reserved, |
|---|
| 8707 | | - offset, count); |
|---|
| 8708 | | - if (ret) |
|---|
| 8709 | | - goto out; |
|---|
| 8710 | | - |
|---|
| 8711 | | - /* |
|---|
| 8712 | | - * We need to know how many extents we reserved so that we can |
|---|
| 8713 | | - * do the accounting properly if we go over the number we |
|---|
| 8714 | | - * originally calculated. Abuse current->journal_info for this. |
|---|
| 8715 | | - */ |
|---|
| 8716 | | - dio_data.reserve = round_up(count, |
|---|
| 8717 | | - fs_info->sectorsize); |
|---|
| 8718 | | - dio_data.unsubmitted_oe_range_start = (u64)offset; |
|---|
| 8719 | | - dio_data.unsubmitted_oe_range_end = (u64)offset; |
|---|
| 8720 | | - current->journal_info = &dio_data; |
|---|
| 8721 | 8078 | down_read(&BTRFS_I(inode)->dio_sem); |
|---|
| 8722 | | - } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, |
|---|
| 8723 | | - &BTRFS_I(inode)->runtime_flags)) { |
|---|
| 8724 | | - inode_dio_end(inode); |
|---|
| 8725 | | - flags = DIO_LOCKING | DIO_SKIP_HOLES; |
|---|
| 8726 | | - wakeup = false; |
|---|
| 8727 | 8079 | } |
|---|
| 8728 | 8080 | |
|---|
| 8729 | | - ret = __blockdev_direct_IO(iocb, inode, |
|---|
| 8730 | | - fs_info->fs_devices->latest_bdev, |
|---|
| 8731 | | - iter, btrfs_get_blocks_direct, NULL, |
|---|
| 8732 | | - btrfs_submit_direct, flags); |
|---|
| 8733 | | - if (iov_iter_rw(iter) == WRITE) { |
|---|
| 8081 | + /* |
|---|
| 8082 | + * We have are actually a sync iocb, so we need our fancy endio to know |
|---|
| 8083 | + * if we need to sync. |
|---|
| 8084 | + */ |
|---|
| 8085 | + if (current->journal_info) |
|---|
| 8086 | + ret = iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, |
|---|
| 8087 | + &btrfs_sync_dops, is_sync_kiocb(iocb)); |
|---|
| 8088 | + else |
|---|
| 8089 | + ret = iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, |
|---|
| 8090 | + &btrfs_dio_ops, is_sync_kiocb(iocb)); |
|---|
| 8091 | + |
|---|
| 8092 | + if (ret == -ENOTBLK) |
|---|
| 8093 | + ret = 0; |
|---|
| 8094 | + |
|---|
| 8095 | + if (iov_iter_rw(iter) == WRITE) |
|---|
| 8734 | 8096 | up_read(&BTRFS_I(inode)->dio_sem); |
|---|
| 8735 | | - current->journal_info = NULL; |
|---|
| 8736 | | - if (ret < 0 && ret != -EIOCBQUEUED) { |
|---|
| 8737 | | - if (dio_data.reserve) |
|---|
| 8738 | | - btrfs_delalloc_release_space(inode, data_reserved, |
|---|
| 8739 | | - offset, dio_data.reserve, true); |
|---|
| 8740 | | - /* |
|---|
| 8741 | | - * On error we might have left some ordered extents |
|---|
| 8742 | | - * without submitting corresponding bios for them, so |
|---|
| 8743 | | - * cleanup them up to avoid other tasks getting them |
|---|
| 8744 | | - * and waiting for them to complete forever. |
|---|
| 8745 | | - */ |
|---|
| 8746 | | - if (dio_data.unsubmitted_oe_range_start < |
|---|
| 8747 | | - dio_data.unsubmitted_oe_range_end) |
|---|
| 8748 | | - __endio_write_update_ordered(inode, |
|---|
| 8749 | | - dio_data.unsubmitted_oe_range_start, |
|---|
| 8750 | | - dio_data.unsubmitted_oe_range_end - |
|---|
| 8751 | | - dio_data.unsubmitted_oe_range_start, |
|---|
| 8752 | | - false); |
|---|
| 8753 | | - } else if (ret >= 0 && (size_t)ret < count) |
|---|
| 8754 | | - btrfs_delalloc_release_space(inode, data_reserved, |
|---|
| 8755 | | - offset, count - (size_t)ret, true); |
|---|
| 8756 | | - btrfs_delalloc_release_extents(BTRFS_I(inode), count); |
|---|
| 8757 | | - } |
|---|
| 8758 | | -out: |
|---|
| 8759 | | - if (wakeup) |
|---|
| 8760 | | - inode_dio_end(inode); |
|---|
| 8097 | + |
|---|
| 8761 | 8098 | if (relock) |
|---|
| 8762 | 8099 | inode_lock(inode); |
|---|
| 8763 | 8100 | |
|---|
| .. | .. |
|---|
| 8765 | 8102 | return ret; |
|---|
| 8766 | 8103 | } |
|---|
| 8767 | 8104 | |
|---|
| 8768 | | -#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) |
|---|
| 8769 | | - |
|---|
| 8770 | 8105 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
|---|
| 8771 | | - __u64 start, __u64 len) |
|---|
| 8106 | + u64 start, u64 len) |
|---|
| 8772 | 8107 | { |
|---|
| 8773 | 8108 | int ret; |
|---|
| 8774 | 8109 | |
|---|
| 8775 | | - ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS); |
|---|
| 8110 | + ret = fiemap_prep(inode, fieinfo, start, &len, 0); |
|---|
| 8776 | 8111 | if (ret) |
|---|
| 8777 | 8112 | return ret; |
|---|
| 8778 | 8113 | |
|---|
| 8779 | | - return extent_fiemap(inode, fieinfo, start, len); |
|---|
| 8114 | + return extent_fiemap(BTRFS_I(inode), fieinfo, start, len); |
|---|
| 8780 | 8115 | } |
|---|
| 8781 | 8116 | |
|---|
| 8782 | 8117 | int btrfs_readpage(struct file *file, struct page *page) |
|---|
| 8783 | 8118 | { |
|---|
| 8784 | | - struct extent_io_tree *tree; |
|---|
| 8785 | | - tree = &BTRFS_I(page->mapping->host)->io_tree; |
|---|
| 8786 | | - return extent_read_full_page(tree, page, btrfs_get_extent, 0); |
|---|
| 8119 | + struct btrfs_inode *inode = BTRFS_I(page->mapping->host); |
|---|
| 8120 | + u64 start = page_offset(page); |
|---|
| 8121 | + u64 end = start + PAGE_SIZE - 1; |
|---|
| 8122 | + unsigned long bio_flags = 0; |
|---|
| 8123 | + struct bio *bio = NULL; |
|---|
| 8124 | + int ret; |
|---|
| 8125 | + |
|---|
| 8126 | + btrfs_lock_and_flush_ordered_range(inode, start, end, NULL); |
|---|
| 8127 | + |
|---|
| 8128 | + ret = btrfs_do_readpage(page, NULL, &bio, &bio_flags, 0, NULL); |
|---|
| 8129 | + if (bio) |
|---|
| 8130 | + ret = submit_one_bio(bio, 0, bio_flags); |
|---|
| 8131 | + return ret; |
|---|
| 8787 | 8132 | } |
|---|
| 8788 | 8133 | |
|---|
| 8789 | 8134 | static int btrfs_writepage(struct page *page, struct writeback_control *wbc) |
|---|
| .. | .. |
|---|
| 8817 | 8162 | return extent_writepages(mapping, wbc); |
|---|
| 8818 | 8163 | } |
|---|
| 8819 | 8164 | |
|---|
| 8820 | | -static int |
|---|
| 8821 | | -btrfs_readpages(struct file *file, struct address_space *mapping, |
|---|
| 8822 | | - struct list_head *pages, unsigned nr_pages) |
|---|
| 8165 | +static void btrfs_readahead(struct readahead_control *rac) |
|---|
| 8823 | 8166 | { |
|---|
| 8824 | | - return extent_readpages(mapping, pages, nr_pages); |
|---|
| 8167 | + extent_readahead(rac); |
|---|
| 8825 | 8168 | } |
|---|
| 8826 | 8169 | |
|---|
| 8827 | 8170 | static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags) |
|---|
| 8828 | 8171 | { |
|---|
| 8829 | 8172 | int ret = try_release_extent_mapping(page, gfp_flags); |
|---|
| 8830 | | - if (ret == 1) { |
|---|
| 8831 | | - ClearPagePrivate(page); |
|---|
| 8832 | | - set_page_private(page, 0); |
|---|
| 8833 | | - put_page(page); |
|---|
| 8834 | | - } |
|---|
| 8173 | + if (ret == 1) |
|---|
| 8174 | + detach_page_private(page); |
|---|
| 8835 | 8175 | return ret; |
|---|
| 8836 | 8176 | } |
|---|
| 8837 | 8177 | |
|---|
| .. | .. |
|---|
| 8842 | 8182 | return __btrfs_releasepage(page, gfp_flags); |
|---|
| 8843 | 8183 | } |
|---|
| 8844 | 8184 | |
|---|
| 8185 | +#ifdef CONFIG_MIGRATION |
|---|
| 8186 | +static int btrfs_migratepage(struct address_space *mapping, |
|---|
| 8187 | + struct page *newpage, struct page *page, |
|---|
| 8188 | + enum migrate_mode mode) |
|---|
| 8189 | +{ |
|---|
| 8190 | + int ret; |
|---|
| 8191 | + |
|---|
| 8192 | + ret = migrate_page_move_mapping(mapping, newpage, page, 0); |
|---|
| 8193 | + if (ret != MIGRATEPAGE_SUCCESS) |
|---|
| 8194 | + return ret; |
|---|
| 8195 | + |
|---|
| 8196 | + if (page_has_private(page)) |
|---|
| 8197 | + attach_page_private(newpage, detach_page_private(page)); |
|---|
| 8198 | + |
|---|
| 8199 | + if (PagePrivate2(page)) { |
|---|
| 8200 | + ClearPagePrivate2(page); |
|---|
| 8201 | + SetPagePrivate2(newpage); |
|---|
| 8202 | + } |
|---|
| 8203 | + |
|---|
| 8204 | + if (mode != MIGRATE_SYNC_NO_COPY) |
|---|
| 8205 | + migrate_page_copy(newpage, page); |
|---|
| 8206 | + else |
|---|
| 8207 | + migrate_page_states(newpage, page); |
|---|
| 8208 | + return MIGRATEPAGE_SUCCESS; |
|---|
| 8209 | +} |
|---|
| 8210 | +#endif |
|---|
| 8211 | + |
|---|
| 8845 | 8212 | static void btrfs_invalidatepage(struct page *page, unsigned int offset, |
|---|
| 8846 | 8213 | unsigned int length) |
|---|
| 8847 | 8214 | { |
|---|
| 8848 | | - struct inode *inode = page->mapping->host; |
|---|
| 8849 | | - struct extent_io_tree *tree; |
|---|
| 8215 | + struct btrfs_inode *inode = BTRFS_I(page->mapping->host); |
|---|
| 8216 | + struct extent_io_tree *tree = &inode->io_tree; |
|---|
| 8850 | 8217 | struct btrfs_ordered_extent *ordered; |
|---|
| 8851 | 8218 | struct extent_state *cached_state = NULL; |
|---|
| 8852 | 8219 | u64 page_start = page_offset(page); |
|---|
| 8853 | 8220 | u64 page_end = page_start + PAGE_SIZE - 1; |
|---|
| 8854 | 8221 | u64 start; |
|---|
| 8855 | 8222 | u64 end; |
|---|
| 8856 | | - int inode_evicting = inode->i_state & I_FREEING; |
|---|
| 8223 | + int inode_evicting = inode->vfs_inode.i_state & I_FREEING; |
|---|
| 8857 | 8224 | |
|---|
| 8858 | 8225 | /* |
|---|
| 8859 | 8226 | * we have the page locked, so new writeback can't start, |
|---|
| .. | .. |
|---|
| 8864 | 8231 | */ |
|---|
| 8865 | 8232 | wait_on_page_writeback(page); |
|---|
| 8866 | 8233 | |
|---|
| 8867 | | - tree = &BTRFS_I(inode)->io_tree; |
|---|
| 8868 | | - if (offset) { |
|---|
| 8234 | + /* |
|---|
| 8235 | + * For subpage case, we have call sites like |
|---|
| 8236 | + * btrfs_punch_hole_lock_range() which passes range not aligned to |
|---|
| 8237 | + * sectorsize. |
|---|
| 8238 | + * If the range doesn't cover the full page, we don't need to and |
|---|
| 8239 | + * shouldn't clear page extent mapped, as page->private can still |
|---|
| 8240 | + * record subpage dirty bits for other part of the range. |
|---|
| 8241 | + * |
|---|
| 8242 | + * For cases that can invalidate the full even the range doesn't |
|---|
| 8243 | + * cover the full page, like invalidating the last page, we're |
|---|
| 8244 | + * still safe to wait for ordered extent to finish. |
|---|
| 8245 | + */ |
|---|
| 8246 | + if (!(offset == 0 && length == PAGE_SIZE)) { |
|---|
| 8869 | 8247 | btrfs_releasepage(page, GFP_NOFS); |
|---|
| 8870 | 8248 | return; |
|---|
| 8871 | 8249 | } |
|---|
| 8872 | 8250 | |
|---|
| 8873 | 8251 | if (!inode_evicting) |
|---|
| 8874 | 8252 | lock_extent_bits(tree, page_start, page_end, &cached_state); |
|---|
| 8875 | | -again: |
|---|
| 8253 | + |
|---|
| 8876 | 8254 | start = page_start; |
|---|
| 8877 | | - ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start, |
|---|
| 8878 | | - page_end - start + 1); |
|---|
| 8255 | +again: |
|---|
| 8256 | + ordered = btrfs_lookup_ordered_range(inode, start, page_end - start + 1); |
|---|
| 8879 | 8257 | if (ordered) { |
|---|
| 8880 | | - end = min(page_end, ordered->file_offset + ordered->len - 1); |
|---|
| 8258 | + end = min(page_end, |
|---|
| 8259 | + ordered->file_offset + ordered->num_bytes - 1); |
|---|
| 8881 | 8260 | /* |
|---|
| 8882 | 8261 | * IO on this page will never be started, so we need |
|---|
| 8883 | 8262 | * to account for any ordered extents now |
|---|
| 8884 | 8263 | */ |
|---|
| 8885 | 8264 | if (!inode_evicting) |
|---|
| 8886 | 8265 | clear_extent_bit(tree, start, end, |
|---|
| 8887 | | - EXTENT_DIRTY | EXTENT_DELALLOC | |
|---|
| 8888 | | - EXTENT_DELALLOC_NEW | |
|---|
| 8266 | + EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | |
|---|
| 8889 | 8267 | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | |
|---|
| 8890 | 8268 | EXTENT_DEFRAG, 1, 0, &cached_state); |
|---|
| 8891 | 8269 | /* |
|---|
| .. | .. |
|---|
| 8896 | 8274 | struct btrfs_ordered_inode_tree *tree; |
|---|
| 8897 | 8275 | u64 new_len; |
|---|
| 8898 | 8276 | |
|---|
| 8899 | | - tree = &BTRFS_I(inode)->ordered_tree; |
|---|
| 8277 | + tree = &inode->ordered_tree; |
|---|
| 8900 | 8278 | |
|---|
| 8901 | 8279 | spin_lock_irq(&tree->lock); |
|---|
| 8902 | 8280 | set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags); |
|---|
| .. | .. |
|---|
| 8937 | 8315 | */ |
|---|
| 8938 | 8316 | btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE); |
|---|
| 8939 | 8317 | if (!inode_evicting) { |
|---|
| 8940 | | - clear_extent_bit(tree, page_start, page_end, |
|---|
| 8941 | | - EXTENT_LOCKED | EXTENT_DIRTY | |
|---|
| 8318 | + clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED | |
|---|
| 8942 | 8319 | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | |
|---|
| 8943 | 8320 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1, |
|---|
| 8944 | 8321 | &cached_state); |
|---|
| .. | .. |
|---|
| 8947 | 8324 | } |
|---|
| 8948 | 8325 | |
|---|
| 8949 | 8326 | ClearPageChecked(page); |
|---|
| 8950 | | - if (PagePrivate(page)) { |
|---|
| 8951 | | - ClearPagePrivate(page); |
|---|
| 8952 | | - set_page_private(page, 0); |
|---|
| 8953 | | - put_page(page); |
|---|
| 8954 | | - } |
|---|
| 8327 | + detach_page_private(page); |
|---|
| 8955 | 8328 | } |
|---|
| 8956 | 8329 | |
|---|
| 8957 | 8330 | /* |
|---|
| .. | .. |
|---|
| 9004 | 8377 | * end up waiting indefinitely to get a lock on the page currently |
|---|
| 9005 | 8378 | * being processed by btrfs_page_mkwrite() function. |
|---|
| 9006 | 8379 | */ |
|---|
| 9007 | | - ret2 = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start, |
|---|
| 9008 | | - reserved_space); |
|---|
| 8380 | + ret2 = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved, |
|---|
| 8381 | + page_start, reserved_space); |
|---|
| 9009 | 8382 | if (!ret2) { |
|---|
| 9010 | 8383 | ret2 = file_update_time(vmf->vma->vm_file); |
|---|
| 9011 | 8384 | reserved = 1; |
|---|
| .. | .. |
|---|
| 9042 | 8415 | unlock_extent_cached(io_tree, page_start, page_end, |
|---|
| 9043 | 8416 | &cached_state); |
|---|
| 9044 | 8417 | unlock_page(page); |
|---|
| 9045 | | - btrfs_start_ordered_extent(inode, ordered, 1); |
|---|
| 8418 | + btrfs_start_ordered_extent(ordered, 1); |
|---|
| 9046 | 8419 | btrfs_put_ordered_extent(ordered); |
|---|
| 9047 | 8420 | goto again; |
|---|
| 9048 | 8421 | } |
|---|
| .. | .. |
|---|
| 9052 | 8425 | fs_info->sectorsize); |
|---|
| 9053 | 8426 | if (reserved_space < PAGE_SIZE) { |
|---|
| 9054 | 8427 | end = page_start + reserved_space - 1; |
|---|
| 9055 | | - btrfs_delalloc_release_space(inode, data_reserved, |
|---|
| 9056 | | - page_start, PAGE_SIZE - reserved_space, |
|---|
| 9057 | | - true); |
|---|
| 8428 | + btrfs_delalloc_release_space(BTRFS_I(inode), |
|---|
| 8429 | + data_reserved, page_start, |
|---|
| 8430 | + PAGE_SIZE - reserved_space, true); |
|---|
| 9058 | 8431 | } |
|---|
| 9059 | 8432 | } |
|---|
| 9060 | 8433 | |
|---|
| .. | .. |
|---|
| 9066 | 8439 | * reserve data&meta space before lock_page() (see above comments). |
|---|
| 9067 | 8440 | */ |
|---|
| 9068 | 8441 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end, |
|---|
| 9069 | | - EXTENT_DIRTY | EXTENT_DELALLOC | |
|---|
| 9070 | | - EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, |
|---|
| 9071 | | - 0, 0, &cached_state); |
|---|
| 8442 | + EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | |
|---|
| 8443 | + EXTENT_DEFRAG, 0, 0, &cached_state); |
|---|
| 9072 | 8444 | |
|---|
| 9073 | | - ret2 = btrfs_set_extent_delalloc(inode, page_start, end, 0, |
|---|
| 9074 | | - &cached_state, 0); |
|---|
| 8445 | + ret2 = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0, |
|---|
| 8446 | + &cached_state); |
|---|
| 9075 | 8447 | if (ret2) { |
|---|
| 9076 | 8448 | unlock_extent_cached(io_tree, page_start, page_end, |
|---|
| 9077 | 8449 | &cached_state); |
|---|
| 9078 | 8450 | ret = VM_FAULT_SIGBUS; |
|---|
| 9079 | 8451 | goto out_unlock; |
|---|
| 9080 | 8452 | } |
|---|
| 9081 | | - ret2 = 0; |
|---|
| 9082 | 8453 | |
|---|
| 9083 | 8454 | /* page is wholly or partially inside EOF */ |
|---|
| 9084 | 8455 | if (page_start + PAGE_SIZE > size) |
|---|
| 9085 | | - zero_start = size & ~PAGE_MASK; |
|---|
| 8456 | + zero_start = offset_in_page(size); |
|---|
| 9086 | 8457 | else |
|---|
| 9087 | 8458 | zero_start = PAGE_SIZE; |
|---|
| 9088 | 8459 | |
|---|
| .. | .. |
|---|
| 9096 | 8467 | set_page_dirty(page); |
|---|
| 9097 | 8468 | SetPageUptodate(page); |
|---|
| 9098 | 8469 | |
|---|
| 9099 | | - BTRFS_I(inode)->last_trans = fs_info->generation; |
|---|
| 9100 | | - BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; |
|---|
| 9101 | | - BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; |
|---|
| 8470 | + btrfs_set_inode_last_sub_trans(BTRFS_I(inode)); |
|---|
| 9102 | 8471 | |
|---|
| 9103 | 8472 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state); |
|---|
| 9104 | 8473 | |
|---|
| 9105 | | - if (!ret2) { |
|---|
| 9106 | | - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); |
|---|
| 9107 | | - sb_end_pagefault(inode->i_sb); |
|---|
| 9108 | | - extent_changeset_free(data_reserved); |
|---|
| 9109 | | - return VM_FAULT_LOCKED; |
|---|
| 9110 | | - } |
|---|
| 8474 | + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); |
|---|
| 8475 | + sb_end_pagefault(inode->i_sb); |
|---|
| 8476 | + extent_changeset_free(data_reserved); |
|---|
| 8477 | + return VM_FAULT_LOCKED; |
|---|
| 9111 | 8478 | |
|---|
| 9112 | 8479 | out_unlock: |
|---|
| 9113 | 8480 | unlock_page(page); |
|---|
| 9114 | 8481 | out: |
|---|
| 9115 | 8482 | btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); |
|---|
| 9116 | | - btrfs_delalloc_release_space(inode, data_reserved, page_start, |
|---|
| 8483 | + btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start, |
|---|
| 9117 | 8484 | reserved_space, (ret != 0)); |
|---|
| 9118 | 8485 | out_noreserve: |
|---|
| 9119 | 8486 | sb_end_pagefault(inode->i_sb); |
|---|
| .. | .. |
|---|
| 9129 | 8496 | int ret; |
|---|
| 9130 | 8497 | struct btrfs_trans_handle *trans; |
|---|
| 9131 | 8498 | u64 mask = fs_info->sectorsize - 1; |
|---|
| 9132 | | - u64 min_size = btrfs_calc_trunc_metadata_size(fs_info, 1); |
|---|
| 8499 | + u64 min_size = btrfs_calc_metadata_size(fs_info, 1); |
|---|
| 9133 | 8500 | |
|---|
| 9134 | 8501 | if (!skip_writeback) { |
|---|
| 9135 | 8502 | ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask), |
|---|
| .. | .. |
|---|
| 9184 | 8551 | |
|---|
| 9185 | 8552 | /* Migrate the slack space for the truncate to our reserve */ |
|---|
| 9186 | 8553 | ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv, |
|---|
| 9187 | | - min_size, 0); |
|---|
| 8554 | + min_size, false); |
|---|
| 9188 | 8555 | BUG_ON(ret); |
|---|
| 9189 | 8556 | |
|---|
| 9190 | 8557 | /* |
|---|
| .. | .. |
|---|
| 9219 | 8586 | break; |
|---|
| 9220 | 8587 | } |
|---|
| 9221 | 8588 | |
|---|
| 9222 | | - btrfs_block_rsv_release(fs_info, rsv, -1); |
|---|
| 8589 | + btrfs_block_rsv_release(fs_info, rsv, -1, NULL); |
|---|
| 9223 | 8590 | ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, |
|---|
| 9224 | | - rsv, min_size, 0); |
|---|
| 8591 | + rsv, min_size, false); |
|---|
| 9225 | 8592 | BUG_ON(ret); /* shouldn't happen */ |
|---|
| 9226 | 8593 | trans->block_rsv = rsv; |
|---|
| 9227 | 8594 | } |
|---|
| .. | .. |
|---|
| 9244 | 8611 | ret = PTR_ERR(trans); |
|---|
| 9245 | 8612 | goto out; |
|---|
| 9246 | 8613 | } |
|---|
| 9247 | | - btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
|---|
| 8614 | + btrfs_inode_safe_disk_i_size_write(inode, 0); |
|---|
| 9248 | 8615 | } |
|---|
| 9249 | 8616 | |
|---|
| 9250 | 8617 | if (trans) { |
|---|
| .. | .. |
|---|
| 9327 | 8694 | ei->index_cnt = (u64)-1; |
|---|
| 9328 | 8695 | ei->dir_index = 0; |
|---|
| 9329 | 8696 | ei->last_unlink_trans = 0; |
|---|
| 9330 | | - ei->last_link_trans = 0; |
|---|
| 8697 | + ei->last_reflink_trans = 0; |
|---|
| 9331 | 8698 | ei->last_log_commit = 0; |
|---|
| 9332 | 8699 | |
|---|
| 9333 | 8700 | spin_lock_init(&ei->lock); |
|---|
| .. | .. |
|---|
| 9346 | 8713 | |
|---|
| 9347 | 8714 | inode = &ei->vfs_inode; |
|---|
| 9348 | 8715 | extent_map_tree_init(&ei->extent_tree); |
|---|
| 9349 | | - extent_io_tree_init(&ei->io_tree, inode); |
|---|
| 9350 | | - extent_io_tree_init(&ei->io_failure_tree, inode); |
|---|
| 9351 | | - ei->io_tree.track_uptodate = 1; |
|---|
| 9352 | | - ei->io_failure_tree.track_uptodate = 1; |
|---|
| 8716 | + extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode); |
|---|
| 8717 | + extent_io_tree_init(fs_info, &ei->io_failure_tree, |
|---|
| 8718 | + IO_TREE_INODE_IO_FAILURE, inode); |
|---|
| 8719 | + extent_io_tree_init(fs_info, &ei->file_extent_tree, |
|---|
| 8720 | + IO_TREE_INODE_FILE_EXTENT, inode); |
|---|
| 8721 | + ei->io_tree.track_uptodate = true; |
|---|
| 8722 | + ei->io_failure_tree.track_uptodate = true; |
|---|
| 9353 | 8723 | atomic_set(&ei->sync_writers, 0); |
|---|
| 9354 | 8724 | mutex_init(&ei->log_mutex); |
|---|
| 9355 | | - mutex_init(&ei->delalloc_mutex); |
|---|
| 9356 | 8725 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
|---|
| 9357 | 8726 | INIT_LIST_HEAD(&ei->delalloc_inodes); |
|---|
| 9358 | 8727 | INIT_LIST_HEAD(&ei->delayed_iput); |
|---|
| .. | .. |
|---|
| 9370 | 8739 | } |
|---|
| 9371 | 8740 | #endif |
|---|
| 9372 | 8741 | |
|---|
| 9373 | | -static void btrfs_i_callback(struct rcu_head *head) |
|---|
| 8742 | +void btrfs_free_inode(struct inode *inode) |
|---|
| 9374 | 8743 | { |
|---|
| 9375 | | - struct inode *inode = container_of(head, struct inode, i_rcu); |
|---|
| 9376 | 8744 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); |
|---|
| 9377 | 8745 | } |
|---|
| 9378 | 8746 | |
|---|
| 9379 | | -void btrfs_destroy_inode(struct inode *inode) |
|---|
| 8747 | +void btrfs_destroy_inode(struct inode *vfs_inode) |
|---|
| 9380 | 8748 | { |
|---|
| 9381 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
|---|
| 9382 | 8749 | struct btrfs_ordered_extent *ordered; |
|---|
| 9383 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 8750 | + struct btrfs_inode *inode = BTRFS_I(vfs_inode); |
|---|
| 8751 | + struct btrfs_root *root = inode->root; |
|---|
| 9384 | 8752 | |
|---|
| 9385 | | - WARN_ON(!hlist_empty(&inode->i_dentry)); |
|---|
| 9386 | | - WARN_ON(inode->i_data.nrpages); |
|---|
| 9387 | | - WARN_ON(BTRFS_I(inode)->block_rsv.reserved); |
|---|
| 9388 | | - WARN_ON(BTRFS_I(inode)->block_rsv.size); |
|---|
| 9389 | | - WARN_ON(BTRFS_I(inode)->outstanding_extents); |
|---|
| 9390 | | - WARN_ON(BTRFS_I(inode)->delalloc_bytes); |
|---|
| 9391 | | - WARN_ON(BTRFS_I(inode)->new_delalloc_bytes); |
|---|
| 9392 | | - WARN_ON(BTRFS_I(inode)->csum_bytes); |
|---|
| 9393 | | - WARN_ON(BTRFS_I(inode)->defrag_bytes); |
|---|
| 8753 | + WARN_ON(!hlist_empty(&vfs_inode->i_dentry)); |
|---|
| 8754 | + WARN_ON(vfs_inode->i_data.nrpages); |
|---|
| 8755 | + WARN_ON(inode->block_rsv.reserved); |
|---|
| 8756 | + WARN_ON(inode->block_rsv.size); |
|---|
| 8757 | + WARN_ON(inode->outstanding_extents); |
|---|
| 8758 | + WARN_ON(inode->delalloc_bytes); |
|---|
| 8759 | + WARN_ON(inode->new_delalloc_bytes); |
|---|
| 8760 | + WARN_ON(inode->csum_bytes); |
|---|
| 8761 | + WARN_ON(inode->defrag_bytes); |
|---|
| 9394 | 8762 | |
|---|
| 9395 | 8763 | /* |
|---|
| 9396 | 8764 | * This can happen where we create an inode, but somebody else also |
|---|
| .. | .. |
|---|
| 9398 | 8766 | * created. |
|---|
| 9399 | 8767 | */ |
|---|
| 9400 | 8768 | if (!root) |
|---|
| 9401 | | - goto free; |
|---|
| 8769 | + return; |
|---|
| 9402 | 8770 | |
|---|
| 9403 | 8771 | while (1) { |
|---|
| 9404 | 8772 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); |
|---|
| 9405 | 8773 | if (!ordered) |
|---|
| 9406 | 8774 | break; |
|---|
| 9407 | 8775 | else { |
|---|
| 9408 | | - btrfs_err(fs_info, |
|---|
| 8776 | + btrfs_err(root->fs_info, |
|---|
| 9409 | 8777 | "found ordered extent %llu %llu on inode cleanup", |
|---|
| 9410 | | - ordered->file_offset, ordered->len); |
|---|
| 8778 | + ordered->file_offset, ordered->num_bytes); |
|---|
| 9411 | 8779 | btrfs_remove_ordered_extent(inode, ordered); |
|---|
| 9412 | 8780 | btrfs_put_ordered_extent(ordered); |
|---|
| 9413 | 8781 | btrfs_put_ordered_extent(ordered); |
|---|
| .. | .. |
|---|
| 9415 | 8783 | } |
|---|
| 9416 | 8784 | btrfs_qgroup_check_reserved_leak(inode); |
|---|
| 9417 | 8785 | inode_tree_del(inode); |
|---|
| 9418 | | - btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0); |
|---|
| 9419 | | -free: |
|---|
| 9420 | | - call_rcu(&inode->i_rcu, btrfs_i_callback); |
|---|
| 8786 | + btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); |
|---|
| 8787 | + btrfs_inode_clear_file_extent_range(inode, 0, (u64)-1); |
|---|
| 8788 | + btrfs_put_root(inode->root); |
|---|
| 9421 | 8789 | } |
|---|
| 9422 | 8790 | |
|---|
| 9423 | 8791 | int btrfs_drop_inode(struct inode *inode) |
|---|
| .. | .. |
|---|
| 9542 | 8910 | struct inode *new_inode = new_dentry->d_inode; |
|---|
| 9543 | 8911 | struct inode *old_inode = old_dentry->d_inode; |
|---|
| 9544 | 8912 | struct timespec64 ctime = current_time(old_inode); |
|---|
| 9545 | | - struct dentry *parent; |
|---|
| 9546 | 8913 | u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); |
|---|
| 9547 | 8914 | u64 new_ino = btrfs_ino(BTRFS_I(new_inode)); |
|---|
| 9548 | 8915 | u64 old_idx = 0; |
|---|
| 9549 | 8916 | u64 new_idx = 0; |
|---|
| 9550 | 8917 | int ret; |
|---|
| 8918 | + int ret2; |
|---|
| 9551 | 8919 | bool root_log_pinned = false; |
|---|
| 9552 | 8920 | bool dest_log_pinned = false; |
|---|
| 9553 | | - struct btrfs_log_ctx ctx_root; |
|---|
| 9554 | | - struct btrfs_log_ctx ctx_dest; |
|---|
| 9555 | | - bool sync_log_root = false; |
|---|
| 9556 | | - bool sync_log_dest = false; |
|---|
| 9557 | | - bool commit_transaction = false; |
|---|
| 8921 | + bool need_abort = false; |
|---|
| 9558 | 8922 | |
|---|
| 9559 | 8923 | /* |
|---|
| 9560 | 8924 | * For non-subvolumes allow exchange only within one subvolume, in the |
|---|
| .. | .. |
|---|
| 9565 | 8929 | (old_ino != BTRFS_FIRST_FREE_OBJECTID || |
|---|
| 9566 | 8930 | new_ino != BTRFS_FIRST_FREE_OBJECTID)) |
|---|
| 9567 | 8931 | return -EXDEV; |
|---|
| 9568 | | - |
|---|
| 9569 | | - btrfs_init_log_ctx(&ctx_root, old_inode); |
|---|
| 9570 | | - btrfs_init_log_ctx(&ctx_dest, new_inode); |
|---|
| 9571 | 8932 | |
|---|
| 9572 | 8933 | /* close the race window with snapshot create/destroy ioctl */ |
|---|
| 9573 | 8934 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID || |
|---|
| .. | .. |
|---|
| 9608 | 8969 | /* Reference for the source. */ |
|---|
| 9609 | 8970 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) { |
|---|
| 9610 | 8971 | /* force full log commit if subvolume involved. */ |
|---|
| 9611 | | - btrfs_set_log_full_commit(fs_info, trans); |
|---|
| 8972 | + btrfs_set_log_full_commit(trans); |
|---|
| 9612 | 8973 | } else { |
|---|
| 9613 | 8974 | btrfs_pin_log_trans(root); |
|---|
| 9614 | 8975 | root_log_pinned = true; |
|---|
| .. | .. |
|---|
| 9620 | 8981 | old_idx); |
|---|
| 9621 | 8982 | if (ret) |
|---|
| 9622 | 8983 | goto out_fail; |
|---|
| 8984 | + need_abort = true; |
|---|
| 9623 | 8985 | } |
|---|
| 9624 | 8986 | |
|---|
| 9625 | 8987 | /* And now for the dest. */ |
|---|
| 9626 | 8988 | if (new_ino == BTRFS_FIRST_FREE_OBJECTID) { |
|---|
| 9627 | 8989 | /* force full log commit if subvolume involved. */ |
|---|
| 9628 | | - btrfs_set_log_full_commit(fs_info, trans); |
|---|
| 8990 | + btrfs_set_log_full_commit(trans); |
|---|
| 9629 | 8991 | } else { |
|---|
| 9630 | 8992 | btrfs_pin_log_trans(dest); |
|---|
| 9631 | 8993 | dest_log_pinned = true; |
|---|
| .. | .. |
|---|
| 9635 | 8997 | new_ino, |
|---|
| 9636 | 8998 | btrfs_ino(BTRFS_I(old_dir)), |
|---|
| 9637 | 8999 | new_idx); |
|---|
| 9638 | | - if (ret) |
|---|
| 9000 | + if (ret) { |
|---|
| 9001 | + if (need_abort) |
|---|
| 9002 | + btrfs_abort_transaction(trans, ret); |
|---|
| 9639 | 9003 | goto out_fail; |
|---|
| 9004 | + } |
|---|
| 9640 | 9005 | } |
|---|
| 9641 | 9006 | |
|---|
| 9642 | 9007 | /* Update inode version and ctime/mtime. */ |
|---|
| .. | .. |
|---|
| 9710 | 9075 | BTRFS_I(new_inode)->dir_index = new_idx; |
|---|
| 9711 | 9076 | |
|---|
| 9712 | 9077 | if (root_log_pinned) { |
|---|
| 9713 | | - parent = new_dentry->d_parent; |
|---|
| 9714 | | - ret = btrfs_log_new_name(trans, BTRFS_I(old_inode), |
|---|
| 9715 | | - BTRFS_I(old_dir), parent, |
|---|
| 9716 | | - false, &ctx_root); |
|---|
| 9717 | | - if (ret == BTRFS_NEED_LOG_SYNC) |
|---|
| 9718 | | - sync_log_root = true; |
|---|
| 9719 | | - else if (ret == BTRFS_NEED_TRANS_COMMIT) |
|---|
| 9720 | | - commit_transaction = true; |
|---|
| 9721 | | - ret = 0; |
|---|
| 9078 | + btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir), |
|---|
| 9079 | + new_dentry->d_parent); |
|---|
| 9722 | 9080 | btrfs_end_log_trans(root); |
|---|
| 9723 | 9081 | root_log_pinned = false; |
|---|
| 9724 | 9082 | } |
|---|
| 9725 | 9083 | if (dest_log_pinned) { |
|---|
| 9726 | | - if (!commit_transaction) { |
|---|
| 9727 | | - parent = old_dentry->d_parent; |
|---|
| 9728 | | - ret = btrfs_log_new_name(trans, BTRFS_I(new_inode), |
|---|
| 9729 | | - BTRFS_I(new_dir), parent, |
|---|
| 9730 | | - false, &ctx_dest); |
|---|
| 9731 | | - if (ret == BTRFS_NEED_LOG_SYNC) |
|---|
| 9732 | | - sync_log_dest = true; |
|---|
| 9733 | | - else if (ret == BTRFS_NEED_TRANS_COMMIT) |
|---|
| 9734 | | - commit_transaction = true; |
|---|
| 9735 | | - ret = 0; |
|---|
| 9736 | | - } |
|---|
| 9084 | + btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir), |
|---|
| 9085 | + old_dentry->d_parent); |
|---|
| 9737 | 9086 | btrfs_end_log_trans(dest); |
|---|
| 9738 | 9087 | dest_log_pinned = false; |
|---|
| 9739 | 9088 | } |
|---|
| .. | .. |
|---|
| 9755 | 9104 | btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) || |
|---|
| 9756 | 9105 | (new_inode && |
|---|
| 9757 | 9106 | btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation))) |
|---|
| 9758 | | - btrfs_set_log_full_commit(fs_info, trans); |
|---|
| 9107 | + btrfs_set_log_full_commit(trans); |
|---|
| 9759 | 9108 | |
|---|
| 9760 | 9109 | if (root_log_pinned) { |
|---|
| 9761 | 9110 | btrfs_end_log_trans(root); |
|---|
| .. | .. |
|---|
| 9766 | 9115 | dest_log_pinned = false; |
|---|
| 9767 | 9116 | } |
|---|
| 9768 | 9117 | } |
|---|
| 9769 | | - if (!ret && sync_log_root && !commit_transaction) { |
|---|
| 9770 | | - ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, |
|---|
| 9771 | | - &ctx_root); |
|---|
| 9772 | | - if (ret) |
|---|
| 9773 | | - commit_transaction = true; |
|---|
| 9774 | | - } |
|---|
| 9775 | | - if (!ret && sync_log_dest && !commit_transaction) { |
|---|
| 9776 | | - ret = btrfs_sync_log(trans, BTRFS_I(new_inode)->root, |
|---|
| 9777 | | - &ctx_dest); |
|---|
| 9778 | | - if (ret) |
|---|
| 9779 | | - commit_transaction = true; |
|---|
| 9780 | | - } |
|---|
| 9781 | | - if (commit_transaction) { |
|---|
| 9782 | | - /* |
|---|
| 9783 | | - * We may have set commit_transaction when logging the new name |
|---|
| 9784 | | - * in the destination root, in which case we left the source |
|---|
| 9785 | | - * root context in the list of log contextes. So make sure we |
|---|
| 9786 | | - * remove it to avoid invalid memory accesses, since the context |
|---|
| 9787 | | - * was allocated in our stack frame. |
|---|
| 9788 | | - */ |
|---|
| 9789 | | - if (sync_log_root) { |
|---|
| 9790 | | - mutex_lock(&root->log_mutex); |
|---|
| 9791 | | - list_del_init(&ctx_root.list); |
|---|
| 9792 | | - mutex_unlock(&root->log_mutex); |
|---|
| 9793 | | - } |
|---|
| 9794 | | - ret = btrfs_commit_transaction(trans); |
|---|
| 9795 | | - } else { |
|---|
| 9796 | | - int ret2; |
|---|
| 9797 | | - |
|---|
| 9798 | | - ret2 = btrfs_end_transaction(trans); |
|---|
| 9799 | | - ret = ret ? ret : ret2; |
|---|
| 9800 | | - } |
|---|
| 9118 | + ret2 = btrfs_end_transaction(trans); |
|---|
| 9119 | + ret = ret ? ret : ret2; |
|---|
| 9801 | 9120 | out_notrans: |
|---|
| 9802 | 9121 | if (new_ino == BTRFS_FIRST_FREE_OBJECTID || |
|---|
| 9803 | 9122 | old_ino == BTRFS_FIRST_FREE_OBJECTID) |
|---|
| 9804 | 9123 | up_read(&fs_info->subvol_sem); |
|---|
| 9805 | | - |
|---|
| 9806 | | - ASSERT(list_empty(&ctx_root.list)); |
|---|
| 9807 | | - ASSERT(list_empty(&ctx_dest.list)); |
|---|
| 9808 | 9124 | |
|---|
| 9809 | 9125 | return ret; |
|---|
| 9810 | 9126 | } |
|---|
| .. | .. |
|---|
| 9873 | 9189 | struct inode *old_inode = d_inode(old_dentry); |
|---|
| 9874 | 9190 | u64 index = 0; |
|---|
| 9875 | 9191 | int ret; |
|---|
| 9192 | + int ret2; |
|---|
| 9876 | 9193 | u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); |
|---|
| 9877 | 9194 | bool log_pinned = false; |
|---|
| 9878 | | - struct btrfs_log_ctx ctx; |
|---|
| 9879 | | - bool sync_log = false; |
|---|
| 9880 | | - bool commit_transaction = false; |
|---|
| 9881 | 9195 | |
|---|
| 9882 | 9196 | if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) |
|---|
| 9883 | 9197 | return -EPERM; |
|---|
| .. | .. |
|---|
| 9954 | 9268 | BTRFS_I(old_inode)->dir_index = 0ULL; |
|---|
| 9955 | 9269 | if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { |
|---|
| 9956 | 9270 | /* force full log commit if subvolume involved. */ |
|---|
| 9957 | | - btrfs_set_log_full_commit(fs_info, trans); |
|---|
| 9271 | + btrfs_set_log_full_commit(trans); |
|---|
| 9958 | 9272 | } else { |
|---|
| 9959 | 9273 | btrfs_pin_log_trans(root); |
|---|
| 9960 | 9274 | log_pinned = true; |
|---|
| .. | .. |
|---|
| 10027 | 9341 | BTRFS_I(old_inode)->dir_index = index; |
|---|
| 10028 | 9342 | |
|---|
| 10029 | 9343 | if (log_pinned) { |
|---|
| 10030 | | - struct dentry *parent = new_dentry->d_parent; |
|---|
| 10031 | | - |
|---|
| 10032 | | - btrfs_init_log_ctx(&ctx, old_inode); |
|---|
| 10033 | | - ret = btrfs_log_new_name(trans, BTRFS_I(old_inode), |
|---|
| 10034 | | - BTRFS_I(old_dir), parent, |
|---|
| 10035 | | - false, &ctx); |
|---|
| 10036 | | - if (ret == BTRFS_NEED_LOG_SYNC) |
|---|
| 10037 | | - sync_log = true; |
|---|
| 10038 | | - else if (ret == BTRFS_NEED_TRANS_COMMIT) |
|---|
| 10039 | | - commit_transaction = true; |
|---|
| 10040 | | - ret = 0; |
|---|
| 9344 | + btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir), |
|---|
| 9345 | + new_dentry->d_parent); |
|---|
| 10041 | 9346 | btrfs_end_log_trans(root); |
|---|
| 10042 | 9347 | log_pinned = false; |
|---|
| 10043 | 9348 | } |
|---|
| .. | .. |
|---|
| 10069 | 9374 | btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) || |
|---|
| 10070 | 9375 | (new_inode && |
|---|
| 10071 | 9376 | btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation))) |
|---|
| 10072 | | - btrfs_set_log_full_commit(fs_info, trans); |
|---|
| 9377 | + btrfs_set_log_full_commit(trans); |
|---|
| 10073 | 9378 | |
|---|
| 10074 | 9379 | btrfs_end_log_trans(root); |
|---|
| 10075 | 9380 | log_pinned = false; |
|---|
| 10076 | 9381 | } |
|---|
| 10077 | | - if (!ret && sync_log) { |
|---|
| 10078 | | - ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx); |
|---|
| 10079 | | - if (ret) |
|---|
| 10080 | | - commit_transaction = true; |
|---|
| 10081 | | - } else if (sync_log) { |
|---|
| 10082 | | - mutex_lock(&root->log_mutex); |
|---|
| 10083 | | - list_del(&ctx.list); |
|---|
| 10084 | | - mutex_unlock(&root->log_mutex); |
|---|
| 10085 | | - } |
|---|
| 10086 | | - if (commit_transaction) { |
|---|
| 10087 | | - ret = btrfs_commit_transaction(trans); |
|---|
| 10088 | | - } else { |
|---|
| 10089 | | - int ret2; |
|---|
| 10090 | | - |
|---|
| 10091 | | - ret2 = btrfs_end_transaction(trans); |
|---|
| 10092 | | - ret = ret ? ret : ret2; |
|---|
| 10093 | | - } |
|---|
| 9382 | + ret2 = btrfs_end_transaction(trans); |
|---|
| 9383 | + ret = ret ? ret : ret2; |
|---|
| 10094 | 9384 | out_notrans: |
|---|
| 10095 | 9385 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) |
|---|
| 10096 | 9386 | up_read(&fs_info->subvol_sem); |
|---|
| .. | .. |
|---|
| 10147 | 9437 | init_completion(&work->completion); |
|---|
| 10148 | 9438 | INIT_LIST_HEAD(&work->list); |
|---|
| 10149 | 9439 | work->inode = inode; |
|---|
| 10150 | | - WARN_ON_ONCE(!inode); |
|---|
| 10151 | | - btrfs_init_work(&work->work, btrfs_flush_delalloc_helper, |
|---|
| 10152 | | - btrfs_run_delalloc_work, NULL, NULL); |
|---|
| 9440 | + btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); |
|---|
| 10153 | 9441 | |
|---|
| 10154 | 9442 | return work; |
|---|
| 10155 | 9443 | } |
|---|
| .. | .. |
|---|
| 10158 | 9446 | * some fairly slow code that needs optimization. This walks the list |
|---|
| 10159 | 9447 | * of all the inodes with pending delalloc and forces them to disk. |
|---|
| 10160 | 9448 | */ |
|---|
| 10161 | | -static int start_delalloc_inodes(struct btrfs_root *root, int nr, bool snapshot) |
|---|
| 9449 | +static int start_delalloc_inodes(struct btrfs_root *root, |
|---|
| 9450 | + struct writeback_control *wbc, bool snapshot, |
|---|
| 9451 | + bool in_reclaim_context) |
|---|
| 10162 | 9452 | { |
|---|
| 10163 | 9453 | struct btrfs_inode *binode; |
|---|
| 10164 | 9454 | struct inode *inode; |
|---|
| .. | .. |
|---|
| 10166 | 9456 | struct list_head works; |
|---|
| 10167 | 9457 | struct list_head splice; |
|---|
| 10168 | 9458 | int ret = 0; |
|---|
| 9459 | + bool full_flush = wbc->nr_to_write == LONG_MAX; |
|---|
| 10169 | 9460 | |
|---|
| 10170 | 9461 | INIT_LIST_HEAD(&works); |
|---|
| 10171 | 9462 | INIT_LIST_HEAD(&splice); |
|---|
| .. | .. |
|---|
| 10179 | 9470 | |
|---|
| 10180 | 9471 | list_move_tail(&binode->delalloc_inodes, |
|---|
| 10181 | 9472 | &root->delalloc_inodes); |
|---|
| 9473 | + |
|---|
| 9474 | + if (in_reclaim_context && |
|---|
| 9475 | + test_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &binode->runtime_flags)) |
|---|
| 9476 | + continue; |
|---|
| 9477 | + |
|---|
| 10182 | 9478 | inode = igrab(&binode->vfs_inode); |
|---|
| 10183 | 9479 | if (!inode) { |
|---|
| 10184 | 9480 | cond_resched_lock(&root->delalloc_lock); |
|---|
| .. | .. |
|---|
| 10189 | 9485 | if (snapshot) |
|---|
| 10190 | 9486 | set_bit(BTRFS_INODE_SNAPSHOT_FLUSH, |
|---|
| 10191 | 9487 | &binode->runtime_flags); |
|---|
| 10192 | | - work = btrfs_alloc_delalloc_work(inode); |
|---|
| 10193 | | - if (!work) { |
|---|
| 10194 | | - iput(inode); |
|---|
| 10195 | | - ret = -ENOMEM; |
|---|
| 10196 | | - goto out; |
|---|
| 9488 | + if (full_flush) { |
|---|
| 9489 | + work = btrfs_alloc_delalloc_work(inode); |
|---|
| 9490 | + if (!work) { |
|---|
| 9491 | + iput(inode); |
|---|
| 9492 | + ret = -ENOMEM; |
|---|
| 9493 | + goto out; |
|---|
| 9494 | + } |
|---|
| 9495 | + list_add_tail(&work->list, &works); |
|---|
| 9496 | + btrfs_queue_work(root->fs_info->flush_workers, |
|---|
| 9497 | + &work->work); |
|---|
| 9498 | + } else { |
|---|
| 9499 | + ret = sync_inode(inode, wbc); |
|---|
| 9500 | + if (!ret && |
|---|
| 9501 | + test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, |
|---|
| 9502 | + &BTRFS_I(inode)->runtime_flags)) |
|---|
| 9503 | + ret = sync_inode(inode, wbc); |
|---|
| 9504 | + btrfs_add_delayed_iput(inode); |
|---|
| 9505 | + if (ret || wbc->nr_to_write <= 0) |
|---|
| 9506 | + goto out; |
|---|
| 10197 | 9507 | } |
|---|
| 10198 | | - list_add_tail(&work->list, &works); |
|---|
| 10199 | | - btrfs_queue_work(root->fs_info->flush_workers, |
|---|
| 10200 | | - &work->work); |
|---|
| 10201 | | - ret++; |
|---|
| 10202 | | - if (nr != -1 && ret >= nr) |
|---|
| 10203 | | - goto out; |
|---|
| 10204 | 9508 | cond_resched(); |
|---|
| 10205 | 9509 | spin_lock(&root->delalloc_lock); |
|---|
| 10206 | 9510 | } |
|---|
| .. | .. |
|---|
| 10224 | 9528 | |
|---|
| 10225 | 9529 | int btrfs_start_delalloc_snapshot(struct btrfs_root *root) |
|---|
| 10226 | 9530 | { |
|---|
| 9531 | + struct writeback_control wbc = { |
|---|
| 9532 | + .nr_to_write = LONG_MAX, |
|---|
| 9533 | + .sync_mode = WB_SYNC_NONE, |
|---|
| 9534 | + .range_start = 0, |
|---|
| 9535 | + .range_end = LLONG_MAX, |
|---|
| 9536 | + }; |
|---|
| 10227 | 9537 | struct btrfs_fs_info *fs_info = root->fs_info; |
|---|
| 10228 | | - int ret; |
|---|
| 10229 | 9538 | |
|---|
| 10230 | 9539 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) |
|---|
| 10231 | 9540 | return -EROFS; |
|---|
| 10232 | 9541 | |
|---|
| 10233 | | - ret = start_delalloc_inodes(root, -1, true); |
|---|
| 10234 | | - if (ret > 0) |
|---|
| 10235 | | - ret = 0; |
|---|
| 10236 | | - return ret; |
|---|
| 9542 | + return start_delalloc_inodes(root, &wbc, true, false); |
|---|
| 10237 | 9543 | } |
|---|
| 10238 | 9544 | |
|---|
| 10239 | | -int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int nr) |
|---|
| 9545 | +int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, |
|---|
| 9546 | + bool in_reclaim_context) |
|---|
| 10240 | 9547 | { |
|---|
| 9548 | + struct writeback_control wbc = { |
|---|
| 9549 | + .nr_to_write = (nr == U64_MAX) ? LONG_MAX : (unsigned long)nr, |
|---|
| 9550 | + .sync_mode = WB_SYNC_NONE, |
|---|
| 9551 | + .range_start = 0, |
|---|
| 9552 | + .range_end = LLONG_MAX, |
|---|
| 9553 | + }; |
|---|
| 10241 | 9554 | struct btrfs_root *root; |
|---|
| 10242 | 9555 | struct list_head splice; |
|---|
| 10243 | 9556 | int ret; |
|---|
| .. | .. |
|---|
| 10251 | 9564 | spin_lock(&fs_info->delalloc_root_lock); |
|---|
| 10252 | 9565 | list_splice_init(&fs_info->delalloc_roots, &splice); |
|---|
| 10253 | 9566 | while (!list_empty(&splice) && nr) { |
|---|
| 9567 | + /* |
|---|
| 9568 | + * Reset nr_to_write here so we know that we're doing a full |
|---|
| 9569 | + * flush. |
|---|
| 9570 | + */ |
|---|
| 9571 | + if (nr == U64_MAX) |
|---|
| 9572 | + wbc.nr_to_write = LONG_MAX; |
|---|
| 9573 | + |
|---|
| 10254 | 9574 | root = list_first_entry(&splice, struct btrfs_root, |
|---|
| 10255 | 9575 | delalloc_root); |
|---|
| 10256 | | - root = btrfs_grab_fs_root(root); |
|---|
| 9576 | + root = btrfs_grab_root(root); |
|---|
| 10257 | 9577 | BUG_ON(!root); |
|---|
| 10258 | 9578 | list_move_tail(&root->delalloc_root, |
|---|
| 10259 | 9579 | &fs_info->delalloc_roots); |
|---|
| 10260 | 9580 | spin_unlock(&fs_info->delalloc_root_lock); |
|---|
| 10261 | 9581 | |
|---|
| 10262 | | - ret = start_delalloc_inodes(root, nr, false); |
|---|
| 10263 | | - btrfs_put_fs_root(root); |
|---|
| 10264 | | - if (ret < 0) |
|---|
| 9582 | + ret = start_delalloc_inodes(root, &wbc, false, in_reclaim_context); |
|---|
| 9583 | + btrfs_put_root(root); |
|---|
| 9584 | + if (ret < 0 || wbc.nr_to_write <= 0) |
|---|
| 10265 | 9585 | goto out; |
|---|
| 10266 | | - |
|---|
| 10267 | | - if (nr != -1) { |
|---|
| 10268 | | - nr -= ret; |
|---|
| 10269 | | - WARN_ON(nr < 0); |
|---|
| 10270 | | - } |
|---|
| 10271 | 9586 | spin_lock(&fs_info->delalloc_root_lock); |
|---|
| 10272 | 9587 | } |
|---|
| 10273 | 9588 | spin_unlock(&fs_info->delalloc_root_lock); |
|---|
| .. | .. |
|---|
| 10338 | 9653 | inode->i_fop = &btrfs_file_operations; |
|---|
| 10339 | 9654 | inode->i_op = &btrfs_file_inode_operations; |
|---|
| 10340 | 9655 | inode->i_mapping->a_ops = &btrfs_aops; |
|---|
| 10341 | | - BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
|---|
| 10342 | 9656 | |
|---|
| 10343 | 9657 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); |
|---|
| 10344 | 9658 | if (err) |
|---|
| .. | .. |
|---|
| 10377 | 9691 | |
|---|
| 10378 | 9692 | inode->i_op = &btrfs_symlink_inode_operations; |
|---|
| 10379 | 9693 | inode_nohighmem(inode); |
|---|
| 10380 | | - inode->i_mapping->a_ops = &btrfs_symlink_aops; |
|---|
| 10381 | 9694 | inode_set_bytes(inode, name_len); |
|---|
| 10382 | 9695 | btrfs_i_size_write(BTRFS_I(inode), name_len); |
|---|
| 10383 | 9696 | err = btrfs_update_inode(trans, root, inode); |
|---|
| .. | .. |
|---|
| 10404 | 9717 | return err; |
|---|
| 10405 | 9718 | } |
|---|
| 10406 | 9719 | |
|---|
| 9720 | +static struct btrfs_trans_handle *insert_prealloc_file_extent( |
|---|
| 9721 | + struct btrfs_trans_handle *trans_in, |
|---|
| 9722 | + struct inode *inode, struct btrfs_key *ins, |
|---|
| 9723 | + u64 file_offset) |
|---|
| 9724 | +{ |
|---|
| 9725 | + struct btrfs_file_extent_item stack_fi; |
|---|
| 9726 | + struct btrfs_replace_extent_info extent_info; |
|---|
| 9727 | + struct btrfs_trans_handle *trans = trans_in; |
|---|
| 9728 | + struct btrfs_path *path; |
|---|
| 9729 | + u64 start = ins->objectid; |
|---|
| 9730 | + u64 len = ins->offset; |
|---|
| 9731 | + int ret; |
|---|
| 9732 | + |
|---|
| 9733 | + memset(&stack_fi, 0, sizeof(stack_fi)); |
|---|
| 9734 | + |
|---|
| 9735 | + btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_PREALLOC); |
|---|
| 9736 | + btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, start); |
|---|
| 9737 | + btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi, len); |
|---|
| 9738 | + btrfs_set_stack_file_extent_num_bytes(&stack_fi, len); |
|---|
| 9739 | + btrfs_set_stack_file_extent_ram_bytes(&stack_fi, len); |
|---|
| 9740 | + btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE); |
|---|
| 9741 | + /* Encryption and other encoding is reserved and all 0 */ |
|---|
| 9742 | + |
|---|
| 9743 | + ret = btrfs_qgroup_release_data(BTRFS_I(inode), file_offset, len); |
|---|
| 9744 | + if (ret < 0) |
|---|
| 9745 | + return ERR_PTR(ret); |
|---|
| 9746 | + |
|---|
| 9747 | + if (trans) { |
|---|
| 9748 | + ret = insert_reserved_file_extent(trans, BTRFS_I(inode), |
|---|
| 9749 | + file_offset, &stack_fi, ret); |
|---|
| 9750 | + if (ret) |
|---|
| 9751 | + return ERR_PTR(ret); |
|---|
| 9752 | + return trans; |
|---|
| 9753 | + } |
|---|
| 9754 | + |
|---|
| 9755 | + extent_info.disk_offset = start; |
|---|
| 9756 | + extent_info.disk_len = len; |
|---|
| 9757 | + extent_info.data_offset = 0; |
|---|
| 9758 | + extent_info.data_len = len; |
|---|
| 9759 | + extent_info.file_offset = file_offset; |
|---|
| 9760 | + extent_info.extent_buf = (char *)&stack_fi; |
|---|
| 9761 | + extent_info.is_new_extent = true; |
|---|
| 9762 | + extent_info.qgroup_reserved = ret; |
|---|
| 9763 | + extent_info.insertions = 0; |
|---|
| 9764 | + |
|---|
| 9765 | + path = btrfs_alloc_path(); |
|---|
| 9766 | + if (!path) |
|---|
| 9767 | + return ERR_PTR(-ENOMEM); |
|---|
| 9768 | + |
|---|
| 9769 | + ret = btrfs_replace_file_extents(inode, path, file_offset, |
|---|
| 9770 | + file_offset + len - 1, &extent_info, |
|---|
| 9771 | + &trans); |
|---|
| 9772 | + btrfs_free_path(path); |
|---|
| 9773 | + if (ret) |
|---|
| 9774 | + return ERR_PTR(ret); |
|---|
| 9775 | + |
|---|
| 9776 | + return trans; |
|---|
| 9777 | +} |
|---|
| 9778 | + |
|---|
| 10407 | 9779 | static int __btrfs_prealloc_file_range(struct inode *inode, int mode, |
|---|
| 10408 | 9780 | u64 start, u64 num_bytes, u64 min_size, |
|---|
| 10409 | 9781 | loff_t actual_len, u64 *alloc_hint, |
|---|
| .. | .. |
|---|
| 10426 | 9798 | if (trans) |
|---|
| 10427 | 9799 | own_trans = false; |
|---|
| 10428 | 9800 | while (num_bytes > 0) { |
|---|
| 10429 | | - if (own_trans) { |
|---|
| 10430 | | - trans = btrfs_start_transaction(root, 3); |
|---|
| 10431 | | - if (IS_ERR(trans)) { |
|---|
| 10432 | | - ret = PTR_ERR(trans); |
|---|
| 10433 | | - break; |
|---|
| 10434 | | - } |
|---|
| 10435 | | - } |
|---|
| 10436 | | - |
|---|
| 10437 | 9801 | cur_bytes = min_t(u64, num_bytes, SZ_256M); |
|---|
| 10438 | 9802 | cur_bytes = max(cur_bytes, min_size); |
|---|
| 10439 | 9803 | /* |
|---|
| .. | .. |
|---|
| 10445 | 9809 | cur_bytes = min(cur_bytes, last_alloc); |
|---|
| 10446 | 9810 | ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes, |
|---|
| 10447 | 9811 | min_size, 0, *alloc_hint, &ins, 1, 0); |
|---|
| 10448 | | - if (ret) { |
|---|
| 10449 | | - if (own_trans) |
|---|
| 10450 | | - btrfs_end_transaction(trans); |
|---|
| 9812 | + if (ret) |
|---|
| 10451 | 9813 | break; |
|---|
| 10452 | | - } |
|---|
| 10453 | 9814 | |
|---|
| 10454 | 9815 | /* |
|---|
| 10455 | 9816 | * We've reserved this space, and thus converted it from |
|---|
| .. | .. |
|---|
| 10459 | 9820 | * clear_offset by our extent size. |
|---|
| 10460 | 9821 | */ |
|---|
| 10461 | 9822 | clear_offset += ins.offset; |
|---|
| 10462 | | - btrfs_dec_block_group_reservations(fs_info, ins.objectid); |
|---|
| 10463 | 9823 | |
|---|
| 10464 | 9824 | last_alloc = ins.offset; |
|---|
| 10465 | | - ret = insert_reserved_file_extent(trans, inode, |
|---|
| 10466 | | - cur_offset, ins.objectid, |
|---|
| 10467 | | - ins.offset, ins.offset, |
|---|
| 10468 | | - ins.offset, 0, 0, 0, |
|---|
| 10469 | | - BTRFS_FILE_EXTENT_PREALLOC); |
|---|
| 10470 | | - if (ret) { |
|---|
| 9825 | + trans = insert_prealloc_file_extent(trans, inode, &ins, cur_offset); |
|---|
| 9826 | + /* |
|---|
| 9827 | + * Now that we inserted the prealloc extent we can finally |
|---|
| 9828 | + * decrement the number of reservations in the block group. |
|---|
| 9829 | + * If we did it before, we could race with relocation and have |
|---|
| 9830 | + * relocation miss the reserved extent, making it fail later. |
|---|
| 9831 | + */ |
|---|
| 9832 | + btrfs_dec_block_group_reservations(fs_info, ins.objectid); |
|---|
| 9833 | + if (IS_ERR(trans)) { |
|---|
| 9834 | + ret = PTR_ERR(trans); |
|---|
| 10471 | 9835 | btrfs_free_reserved_extent(fs_info, ins.objectid, |
|---|
| 10472 | 9836 | ins.offset, 0); |
|---|
| 10473 | | - btrfs_abort_transaction(trans, ret); |
|---|
| 10474 | | - if (own_trans) |
|---|
| 10475 | | - btrfs_end_transaction(trans); |
|---|
| 10476 | 9837 | break; |
|---|
| 10477 | 9838 | } |
|---|
| 10478 | 9839 | |
|---|
| .. | .. |
|---|
| 10493 | 9854 | em->block_len = ins.offset; |
|---|
| 10494 | 9855 | em->orig_block_len = ins.offset; |
|---|
| 10495 | 9856 | em->ram_bytes = ins.offset; |
|---|
| 10496 | | - em->bdev = fs_info->fs_devices->latest_bdev; |
|---|
| 10497 | 9857 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); |
|---|
| 10498 | 9858 | em->generation = trans->transid; |
|---|
| 10499 | 9859 | |
|---|
| .. | .. |
|---|
| 10524 | 9884 | else |
|---|
| 10525 | 9885 | i_size = cur_offset; |
|---|
| 10526 | 9886 | i_size_write(inode, i_size); |
|---|
| 10527 | | - btrfs_ordered_update_i_size(inode, i_size, NULL); |
|---|
| 9887 | + btrfs_inode_safe_disk_i_size_write(inode, 0); |
|---|
| 10528 | 9888 | } |
|---|
| 10529 | 9889 | |
|---|
| 10530 | 9890 | ret = btrfs_update_inode(trans, root, inode); |
|---|
| .. | .. |
|---|
| 10536 | 9896 | break; |
|---|
| 10537 | 9897 | } |
|---|
| 10538 | 9898 | |
|---|
| 10539 | | - if (own_trans) |
|---|
| 9899 | + if (own_trans) { |
|---|
| 10540 | 9900 | btrfs_end_transaction(trans); |
|---|
| 9901 | + trans = NULL; |
|---|
| 9902 | + } |
|---|
| 10541 | 9903 | } |
|---|
| 10542 | 9904 | if (clear_offset < end) |
|---|
| 10543 | | - btrfs_free_reserved_data_space(inode, NULL, clear_offset, |
|---|
| 9905 | + btrfs_free_reserved_data_space(BTRFS_I(inode), NULL, clear_offset, |
|---|
| 10544 | 9906 | end - clear_offset + 1); |
|---|
| 10545 | 9907 | return ret; |
|---|
| 10546 | 9908 | } |
|---|
| .. | .. |
|---|
| 10616 | 9978 | inode->i_op = &btrfs_file_inode_operations; |
|---|
| 10617 | 9979 | |
|---|
| 10618 | 9980 | inode->i_mapping->a_ops = &btrfs_aops; |
|---|
| 10619 | | - BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
|---|
| 10620 | 9981 | |
|---|
| 10621 | 9982 | ret = btrfs_init_inode_security(trans, inode, dir, NULL); |
|---|
| 10622 | 9983 | if (ret) |
|---|
| .. | .. |
|---|
| 10648 | 10009 | return ret; |
|---|
| 10649 | 10010 | } |
|---|
| 10650 | 10011 | |
|---|
| 10651 | | -__attribute__((const)) |
|---|
| 10652 | | -static int btrfs_readpage_io_failed_hook(struct page *page, int failed_mirror) |
|---|
| 10653 | | -{ |
|---|
| 10654 | | - return -EAGAIN; |
|---|
| 10655 | | -} |
|---|
| 10656 | | - |
|---|
| 10657 | | -static void btrfs_check_extent_io_range(void *private_data, const char *caller, |
|---|
| 10658 | | - u64 start, u64 end) |
|---|
| 10659 | | -{ |
|---|
| 10660 | | - struct inode *inode = private_data; |
|---|
| 10661 | | - u64 isize; |
|---|
| 10662 | | - |
|---|
| 10663 | | - isize = i_size_read(inode); |
|---|
| 10664 | | - if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { |
|---|
| 10665 | | - btrfs_debug_rl(BTRFS_I(inode)->root->fs_info, |
|---|
| 10666 | | - "%s: ino %llu isize %llu odd range [%llu,%llu]", |
|---|
| 10667 | | - caller, btrfs_ino(BTRFS_I(inode)), isize, start, end); |
|---|
| 10668 | | - } |
|---|
| 10669 | | -} |
|---|
| 10670 | | - |
|---|
| 10671 | 10012 | void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) |
|---|
| 10672 | 10013 | { |
|---|
| 10673 | 10014 | struct inode *inode = tree->private_data; |
|---|
| .. | .. |
|---|
| 10683 | 10024 | index++; |
|---|
| 10684 | 10025 | } |
|---|
| 10685 | 10026 | } |
|---|
| 10027 | + |
|---|
| 10028 | +#ifdef CONFIG_SWAP |
|---|
| 10029 | +/* |
|---|
| 10030 | + * Add an entry indicating a block group or device which is pinned by a |
|---|
| 10031 | + * swapfile. Returns 0 on success, 1 if there is already an entry for it, or a |
|---|
| 10032 | + * negative errno on failure. |
|---|
| 10033 | + */ |
|---|
| 10034 | +static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr, |
|---|
| 10035 | + bool is_block_group) |
|---|
| 10036 | +{ |
|---|
| 10037 | + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; |
|---|
| 10038 | + struct btrfs_swapfile_pin *sp, *entry; |
|---|
| 10039 | + struct rb_node **p; |
|---|
| 10040 | + struct rb_node *parent = NULL; |
|---|
| 10041 | + |
|---|
| 10042 | + sp = kmalloc(sizeof(*sp), GFP_NOFS); |
|---|
| 10043 | + if (!sp) |
|---|
| 10044 | + return -ENOMEM; |
|---|
| 10045 | + sp->ptr = ptr; |
|---|
| 10046 | + sp->inode = inode; |
|---|
| 10047 | + sp->is_block_group = is_block_group; |
|---|
| 10048 | + sp->bg_extent_count = 1; |
|---|
| 10049 | + |
|---|
| 10050 | + spin_lock(&fs_info->swapfile_pins_lock); |
|---|
| 10051 | + p = &fs_info->swapfile_pins.rb_node; |
|---|
| 10052 | + while (*p) { |
|---|
| 10053 | + parent = *p; |
|---|
| 10054 | + entry = rb_entry(parent, struct btrfs_swapfile_pin, node); |
|---|
| 10055 | + if (sp->ptr < entry->ptr || |
|---|
| 10056 | + (sp->ptr == entry->ptr && sp->inode < entry->inode)) { |
|---|
| 10057 | + p = &(*p)->rb_left; |
|---|
| 10058 | + } else if (sp->ptr > entry->ptr || |
|---|
| 10059 | + (sp->ptr == entry->ptr && sp->inode > entry->inode)) { |
|---|
| 10060 | + p = &(*p)->rb_right; |
|---|
| 10061 | + } else { |
|---|
| 10062 | + if (is_block_group) |
|---|
| 10063 | + entry->bg_extent_count++; |
|---|
| 10064 | + spin_unlock(&fs_info->swapfile_pins_lock); |
|---|
| 10065 | + kfree(sp); |
|---|
| 10066 | + return 1; |
|---|
| 10067 | + } |
|---|
| 10068 | + } |
|---|
| 10069 | + rb_link_node(&sp->node, parent, p); |
|---|
| 10070 | + rb_insert_color(&sp->node, &fs_info->swapfile_pins); |
|---|
| 10071 | + spin_unlock(&fs_info->swapfile_pins_lock); |
|---|
| 10072 | + return 0; |
|---|
| 10073 | +} |
|---|
| 10074 | + |
|---|
| 10075 | +/* Free all of the entries pinned by this swapfile. */ |
|---|
| 10076 | +static void btrfs_free_swapfile_pins(struct inode *inode) |
|---|
| 10077 | +{ |
|---|
| 10078 | + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; |
|---|
| 10079 | + struct btrfs_swapfile_pin *sp; |
|---|
| 10080 | + struct rb_node *node, *next; |
|---|
| 10081 | + |
|---|
| 10082 | + spin_lock(&fs_info->swapfile_pins_lock); |
|---|
| 10083 | + node = rb_first(&fs_info->swapfile_pins); |
|---|
| 10084 | + while (node) { |
|---|
| 10085 | + next = rb_next(node); |
|---|
| 10086 | + sp = rb_entry(node, struct btrfs_swapfile_pin, node); |
|---|
| 10087 | + if (sp->inode == inode) { |
|---|
| 10088 | + rb_erase(&sp->node, &fs_info->swapfile_pins); |
|---|
| 10089 | + if (sp->is_block_group) { |
|---|
| 10090 | + btrfs_dec_block_group_swap_extents(sp->ptr, |
|---|
| 10091 | + sp->bg_extent_count); |
|---|
| 10092 | + btrfs_put_block_group(sp->ptr); |
|---|
| 10093 | + } |
|---|
| 10094 | + kfree(sp); |
|---|
| 10095 | + } |
|---|
| 10096 | + node = next; |
|---|
| 10097 | + } |
|---|
| 10098 | + spin_unlock(&fs_info->swapfile_pins_lock); |
|---|
| 10099 | +} |
|---|
| 10100 | + |
|---|
| 10101 | +struct btrfs_swap_info { |
|---|
| 10102 | + u64 start; |
|---|
| 10103 | + u64 block_start; |
|---|
| 10104 | + u64 block_len; |
|---|
| 10105 | + u64 lowest_ppage; |
|---|
| 10106 | + u64 highest_ppage; |
|---|
| 10107 | + unsigned long nr_pages; |
|---|
| 10108 | + int nr_extents; |
|---|
| 10109 | +}; |
|---|
| 10110 | + |
|---|
| 10111 | +static int btrfs_add_swap_extent(struct swap_info_struct *sis, |
|---|
| 10112 | + struct btrfs_swap_info *bsi) |
|---|
| 10113 | +{ |
|---|
| 10114 | + unsigned long nr_pages; |
|---|
| 10115 | + unsigned long max_pages; |
|---|
| 10116 | + u64 first_ppage, first_ppage_reported, next_ppage; |
|---|
| 10117 | + int ret; |
|---|
| 10118 | + |
|---|
| 10119 | + /* |
|---|
| 10120 | + * Our swapfile may have had its size extended after the swap header was |
|---|
| 10121 | + * written. In that case activating the swapfile should not go beyond |
|---|
| 10122 | + * the max size set in the swap header. |
|---|
| 10123 | + */ |
|---|
| 10124 | + if (bsi->nr_pages >= sis->max) |
|---|
| 10125 | + return 0; |
|---|
| 10126 | + |
|---|
| 10127 | + max_pages = sis->max - bsi->nr_pages; |
|---|
| 10128 | + first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT; |
|---|
| 10129 | + next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len, |
|---|
| 10130 | + PAGE_SIZE) >> PAGE_SHIFT; |
|---|
| 10131 | + |
|---|
| 10132 | + if (first_ppage >= next_ppage) |
|---|
| 10133 | + return 0; |
|---|
| 10134 | + nr_pages = next_ppage - first_ppage; |
|---|
| 10135 | + nr_pages = min(nr_pages, max_pages); |
|---|
| 10136 | + |
|---|
| 10137 | + first_ppage_reported = first_ppage; |
|---|
| 10138 | + if (bsi->start == 0) |
|---|
| 10139 | + first_ppage_reported++; |
|---|
| 10140 | + if (bsi->lowest_ppage > first_ppage_reported) |
|---|
| 10141 | + bsi->lowest_ppage = first_ppage_reported; |
|---|
| 10142 | + if (bsi->highest_ppage < (next_ppage - 1)) |
|---|
| 10143 | + bsi->highest_ppage = next_ppage - 1; |
|---|
| 10144 | + |
|---|
| 10145 | + ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage); |
|---|
| 10146 | + if (ret < 0) |
|---|
| 10147 | + return ret; |
|---|
| 10148 | + bsi->nr_extents += ret; |
|---|
| 10149 | + bsi->nr_pages += nr_pages; |
|---|
| 10150 | + return 0; |
|---|
| 10151 | +} |
|---|
| 10152 | + |
|---|
| 10153 | +static void btrfs_swap_deactivate(struct file *file) |
|---|
| 10154 | +{ |
|---|
| 10155 | + struct inode *inode = file_inode(file); |
|---|
| 10156 | + |
|---|
| 10157 | + btrfs_free_swapfile_pins(inode); |
|---|
| 10158 | + atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles); |
|---|
| 10159 | +} |
|---|
| 10160 | + |
|---|
| 10161 | +static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, |
|---|
| 10162 | + sector_t *span) |
|---|
| 10163 | +{ |
|---|
| 10164 | + struct inode *inode = file_inode(file); |
|---|
| 10165 | + struct btrfs_root *root = BTRFS_I(inode)->root; |
|---|
| 10166 | + struct btrfs_fs_info *fs_info = root->fs_info; |
|---|
| 10167 | + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
|---|
| 10168 | + struct extent_state *cached_state = NULL; |
|---|
| 10169 | + struct extent_map *em = NULL; |
|---|
| 10170 | + struct btrfs_device *device = NULL; |
|---|
| 10171 | + struct btrfs_swap_info bsi = { |
|---|
| 10172 | + .lowest_ppage = (sector_t)-1ULL, |
|---|
| 10173 | + }; |
|---|
| 10174 | + int ret = 0; |
|---|
| 10175 | + u64 isize; |
|---|
| 10176 | + u64 start; |
|---|
| 10177 | + |
|---|
| 10178 | + /* |
|---|
| 10179 | + * If the swap file was just created, make sure delalloc is done. If the |
|---|
| 10180 | + * file changes again after this, the user is doing something stupid and |
|---|
| 10181 | + * we don't really care. |
|---|
| 10182 | + */ |
|---|
| 10183 | + ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); |
|---|
| 10184 | + if (ret) |
|---|
| 10185 | + return ret; |
|---|
| 10186 | + |
|---|
| 10187 | + /* |
|---|
| 10188 | + * The inode is locked, so these flags won't change after we check them. |
|---|
| 10189 | + */ |
|---|
| 10190 | + if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) { |
|---|
| 10191 | + btrfs_warn(fs_info, "swapfile must not be compressed"); |
|---|
| 10192 | + return -EINVAL; |
|---|
| 10193 | + } |
|---|
| 10194 | + if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) { |
|---|
| 10195 | + btrfs_warn(fs_info, "swapfile must not be copy-on-write"); |
|---|
| 10196 | + return -EINVAL; |
|---|
| 10197 | + } |
|---|
| 10198 | + if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { |
|---|
| 10199 | + btrfs_warn(fs_info, "swapfile must not be checksummed"); |
|---|
| 10200 | + return -EINVAL; |
|---|
| 10201 | + } |
|---|
| 10202 | + |
|---|
| 10203 | + /* |
|---|
| 10204 | + * Balance or device remove/replace/resize can move stuff around from |
|---|
| 10205 | + * under us. The exclop protection makes sure they aren't running/won't |
|---|
| 10206 | + * run concurrently while we are mapping the swap extents, and |
|---|
| 10207 | + * fs_info->swapfile_pins prevents them from running while the swap |
|---|
| 10208 | + * file is active and moving the extents. Note that this also prevents |
|---|
| 10209 | + * a concurrent device add which isn't actually necessary, but it's not |
|---|
| 10210 | + * really worth the trouble to allow it. |
|---|
| 10211 | + */ |
|---|
| 10212 | + if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_SWAP_ACTIVATE)) { |
|---|
| 10213 | + btrfs_warn(fs_info, |
|---|
| 10214 | + "cannot activate swapfile while exclusive operation is running"); |
|---|
| 10215 | + return -EBUSY; |
|---|
| 10216 | + } |
|---|
| 10217 | + |
|---|
| 10218 | + /* |
|---|
| 10219 | + * Prevent snapshot creation while we are activating the swap file. |
|---|
| 10220 | + * We do not want to race with snapshot creation. If snapshot creation |
|---|
| 10221 | + * already started before we bumped nr_swapfiles from 0 to 1 and |
|---|
| 10222 | + * completes before the first write into the swap file after it is |
|---|
| 10223 | + * activated, than that write would fallback to COW. |
|---|
| 10224 | + */ |
|---|
| 10225 | + if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) { |
|---|
| 10226 | + btrfs_exclop_finish(fs_info); |
|---|
| 10227 | + btrfs_warn(fs_info, |
|---|
| 10228 | + "cannot activate swapfile because snapshot creation is in progress"); |
|---|
| 10229 | + return -EINVAL; |
|---|
| 10230 | + } |
|---|
| 10231 | + /* |
|---|
| 10232 | + * Snapshots can create extents which require COW even if NODATACOW is |
|---|
| 10233 | + * set. We use this counter to prevent snapshots. We must increment it |
|---|
| 10234 | + * before walking the extents because we don't want a concurrent |
|---|
| 10235 | + * snapshot to run after we've already checked the extents. |
|---|
| 10236 | + * |
|---|
| 10237 | + * It is possible that subvolume is marked for deletion but still not |
|---|
| 10238 | + * removed yet. To prevent this race, we check the root status before |
|---|
| 10239 | + * activating the swapfile. |
|---|
| 10240 | + */ |
|---|
| 10241 | + spin_lock(&root->root_item_lock); |
|---|
| 10242 | + if (btrfs_root_dead(root)) { |
|---|
| 10243 | + spin_unlock(&root->root_item_lock); |
|---|
| 10244 | + |
|---|
| 10245 | + btrfs_exclop_finish(fs_info); |
|---|
| 10246 | + btrfs_warn(fs_info, |
|---|
| 10247 | + "cannot activate swapfile because subvolume %llu is being deleted", |
|---|
| 10248 | + root->root_key.objectid); |
|---|
| 10249 | + return -EPERM; |
|---|
| 10250 | + } |
|---|
| 10251 | + atomic_inc(&root->nr_swapfiles); |
|---|
| 10252 | + spin_unlock(&root->root_item_lock); |
|---|
| 10253 | + |
|---|
| 10254 | + isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize); |
|---|
| 10255 | + |
|---|
| 10256 | + lock_extent_bits(io_tree, 0, isize - 1, &cached_state); |
|---|
| 10257 | + start = 0; |
|---|
| 10258 | + while (start < isize) { |
|---|
| 10259 | + u64 logical_block_start, physical_block_start; |
|---|
| 10260 | + struct btrfs_block_group *bg; |
|---|
| 10261 | + u64 len = isize - start; |
|---|
| 10262 | + |
|---|
| 10263 | + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len); |
|---|
| 10264 | + if (IS_ERR(em)) { |
|---|
| 10265 | + ret = PTR_ERR(em); |
|---|
| 10266 | + goto out; |
|---|
| 10267 | + } |
|---|
| 10268 | + |
|---|
| 10269 | + if (em->block_start == EXTENT_MAP_HOLE) { |
|---|
| 10270 | + btrfs_warn(fs_info, "swapfile must not have holes"); |
|---|
| 10271 | + ret = -EINVAL; |
|---|
| 10272 | + goto out; |
|---|
| 10273 | + } |
|---|
| 10274 | + if (em->block_start == EXTENT_MAP_INLINE) { |
|---|
| 10275 | + /* |
|---|
| 10276 | + * It's unlikely we'll ever actually find ourselves |
|---|
| 10277 | + * here, as a file small enough to fit inline won't be |
|---|
| 10278 | + * big enough to store more than the swap header, but in |
|---|
| 10279 | + * case something changes in the future, let's catch it |
|---|
| 10280 | + * here rather than later. |
|---|
| 10281 | + */ |
|---|
| 10282 | + btrfs_warn(fs_info, "swapfile must not be inline"); |
|---|
| 10283 | + ret = -EINVAL; |
|---|
| 10284 | + goto out; |
|---|
| 10285 | + } |
|---|
| 10286 | + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
|---|
| 10287 | + btrfs_warn(fs_info, "swapfile must not be compressed"); |
|---|
| 10288 | + ret = -EINVAL; |
|---|
| 10289 | + goto out; |
|---|
| 10290 | + } |
|---|
| 10291 | + |
|---|
| 10292 | + logical_block_start = em->block_start + (start - em->start); |
|---|
| 10293 | + len = min(len, em->len - (start - em->start)); |
|---|
| 10294 | + free_extent_map(em); |
|---|
| 10295 | + em = NULL; |
|---|
| 10296 | + |
|---|
| 10297 | + ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL, true); |
|---|
| 10298 | + if (ret < 0) { |
|---|
| 10299 | + goto out; |
|---|
| 10300 | + } else if (ret) { |
|---|
| 10301 | + ret = 0; |
|---|
| 10302 | + } else { |
|---|
| 10303 | + btrfs_warn(fs_info, |
|---|
| 10304 | + "swapfile must not be copy-on-write"); |
|---|
| 10305 | + ret = -EINVAL; |
|---|
| 10306 | + goto out; |
|---|
| 10307 | + } |
|---|
| 10308 | + |
|---|
| 10309 | + em = btrfs_get_chunk_map(fs_info, logical_block_start, len); |
|---|
| 10310 | + if (IS_ERR(em)) { |
|---|
| 10311 | + ret = PTR_ERR(em); |
|---|
| 10312 | + goto out; |
|---|
| 10313 | + } |
|---|
| 10314 | + |
|---|
| 10315 | + if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { |
|---|
| 10316 | + btrfs_warn(fs_info, |
|---|
| 10317 | + "swapfile must have single data profile"); |
|---|
| 10318 | + ret = -EINVAL; |
|---|
| 10319 | + goto out; |
|---|
| 10320 | + } |
|---|
| 10321 | + |
|---|
| 10322 | + if (device == NULL) { |
|---|
| 10323 | + device = em->map_lookup->stripes[0].dev; |
|---|
| 10324 | + ret = btrfs_add_swapfile_pin(inode, device, false); |
|---|
| 10325 | + if (ret == 1) |
|---|
| 10326 | + ret = 0; |
|---|
| 10327 | + else if (ret) |
|---|
| 10328 | + goto out; |
|---|
| 10329 | + } else if (device != em->map_lookup->stripes[0].dev) { |
|---|
| 10330 | + btrfs_warn(fs_info, "swapfile must be on one device"); |
|---|
| 10331 | + ret = -EINVAL; |
|---|
| 10332 | + goto out; |
|---|
| 10333 | + } |
|---|
| 10334 | + |
|---|
| 10335 | + physical_block_start = (em->map_lookup->stripes[0].physical + |
|---|
| 10336 | + (logical_block_start - em->start)); |
|---|
| 10337 | + len = min(len, em->len - (logical_block_start - em->start)); |
|---|
| 10338 | + free_extent_map(em); |
|---|
| 10339 | + em = NULL; |
|---|
| 10340 | + |
|---|
| 10341 | + bg = btrfs_lookup_block_group(fs_info, logical_block_start); |
|---|
| 10342 | + if (!bg) { |
|---|
| 10343 | + btrfs_warn(fs_info, |
|---|
| 10344 | + "could not find block group containing swapfile"); |
|---|
| 10345 | + ret = -EINVAL; |
|---|
| 10346 | + goto out; |
|---|
| 10347 | + } |
|---|
| 10348 | + |
|---|
| 10349 | + if (!btrfs_inc_block_group_swap_extents(bg)) { |
|---|
| 10350 | + btrfs_warn(fs_info, |
|---|
| 10351 | + "block group for swapfile at %llu is read-only%s", |
|---|
| 10352 | + bg->start, |
|---|
| 10353 | + atomic_read(&fs_info->scrubs_running) ? |
|---|
| 10354 | + " (scrub running)" : ""); |
|---|
| 10355 | + btrfs_put_block_group(bg); |
|---|
| 10356 | + ret = -EINVAL; |
|---|
| 10357 | + goto out; |
|---|
| 10358 | + } |
|---|
| 10359 | + |
|---|
| 10360 | + ret = btrfs_add_swapfile_pin(inode, bg, true); |
|---|
| 10361 | + if (ret) { |
|---|
| 10362 | + btrfs_put_block_group(bg); |
|---|
| 10363 | + if (ret == 1) |
|---|
| 10364 | + ret = 0; |
|---|
| 10365 | + else |
|---|
| 10366 | + goto out; |
|---|
| 10367 | + } |
|---|
| 10368 | + |
|---|
| 10369 | + if (bsi.block_len && |
|---|
| 10370 | + bsi.block_start + bsi.block_len == physical_block_start) { |
|---|
| 10371 | + bsi.block_len += len; |
|---|
| 10372 | + } else { |
|---|
| 10373 | + if (bsi.block_len) { |
|---|
| 10374 | + ret = btrfs_add_swap_extent(sis, &bsi); |
|---|
| 10375 | + if (ret) |
|---|
| 10376 | + goto out; |
|---|
| 10377 | + } |
|---|
| 10378 | + bsi.start = start; |
|---|
| 10379 | + bsi.block_start = physical_block_start; |
|---|
| 10380 | + bsi.block_len = len; |
|---|
| 10381 | + } |
|---|
| 10382 | + |
|---|
| 10383 | + start += len; |
|---|
| 10384 | + } |
|---|
| 10385 | + |
|---|
| 10386 | + if (bsi.block_len) |
|---|
| 10387 | + ret = btrfs_add_swap_extent(sis, &bsi); |
|---|
| 10388 | + |
|---|
| 10389 | +out: |
|---|
| 10390 | + if (!IS_ERR_OR_NULL(em)) |
|---|
| 10391 | + free_extent_map(em); |
|---|
| 10392 | + |
|---|
| 10393 | + unlock_extent_cached(io_tree, 0, isize - 1, &cached_state); |
|---|
| 10394 | + |
|---|
| 10395 | + if (ret) |
|---|
| 10396 | + btrfs_swap_deactivate(file); |
|---|
| 10397 | + |
|---|
| 10398 | + btrfs_drew_write_unlock(&root->snapshot_lock); |
|---|
| 10399 | + |
|---|
| 10400 | + btrfs_exclop_finish(fs_info); |
|---|
| 10401 | + |
|---|
| 10402 | + if (ret) |
|---|
| 10403 | + return ret; |
|---|
| 10404 | + |
|---|
| 10405 | + if (device) |
|---|
| 10406 | + sis->bdev = device->bdev; |
|---|
| 10407 | + *span = bsi.highest_ppage - bsi.lowest_ppage + 1; |
|---|
| 10408 | + sis->max = bsi.nr_pages; |
|---|
| 10409 | + sis->pages = bsi.nr_pages - 1; |
|---|
| 10410 | + sis->highest_bit = bsi.nr_pages - 1; |
|---|
| 10411 | + return bsi.nr_extents; |
|---|
| 10412 | +} |
|---|
| 10413 | +#else |
|---|
| 10414 | +static void btrfs_swap_deactivate(struct file *file) |
|---|
| 10415 | +{ |
|---|
| 10416 | +} |
|---|
| 10417 | + |
|---|
| 10418 | +static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, |
|---|
| 10419 | + sector_t *span) |
|---|
| 10420 | +{ |
|---|
| 10421 | + return -EOPNOTSUPP; |
|---|
| 10422 | +} |
|---|
| 10423 | +#endif |
|---|
| 10686 | 10424 | |
|---|
| 10687 | 10425 | static const struct inode_operations btrfs_dir_inode_operations = { |
|---|
| 10688 | 10426 | .getattr = btrfs_getattr, |
|---|
| .. | .. |
|---|
| 10703 | 10441 | .update_time = btrfs_update_time, |
|---|
| 10704 | 10442 | .tmpfile = btrfs_tmpfile, |
|---|
| 10705 | 10443 | }; |
|---|
| 10706 | | -static const struct inode_operations btrfs_dir_ro_inode_operations = { |
|---|
| 10707 | | - .lookup = btrfs_lookup, |
|---|
| 10708 | | - .permission = btrfs_permission, |
|---|
| 10709 | | - .update_time = btrfs_update_time, |
|---|
| 10710 | | -}; |
|---|
| 10711 | 10444 | |
|---|
| 10712 | 10445 | static const struct file_operations btrfs_dir_file_operations = { |
|---|
| 10713 | 10446 | .llseek = generic_file_llseek, |
|---|
| .. | .. |
|---|
| 10720 | 10453 | #endif |
|---|
| 10721 | 10454 | .release = btrfs_release_file, |
|---|
| 10722 | 10455 | .fsync = btrfs_sync_file, |
|---|
| 10723 | | -}; |
|---|
| 10724 | | - |
|---|
| 10725 | | -static const struct extent_io_ops btrfs_extent_io_ops = { |
|---|
| 10726 | | - /* mandatory callbacks */ |
|---|
| 10727 | | - .submit_bio_hook = btrfs_submit_bio_hook, |
|---|
| 10728 | | - .readpage_end_io_hook = btrfs_readpage_end_io_hook, |
|---|
| 10729 | | - .readpage_io_failed_hook = btrfs_readpage_io_failed_hook, |
|---|
| 10730 | | - |
|---|
| 10731 | | - /* optional callbacks */ |
|---|
| 10732 | | - .writepage_end_io_hook = btrfs_writepage_end_io_hook, |
|---|
| 10733 | | - .writepage_start_hook = btrfs_writepage_start_hook, |
|---|
| 10734 | | - .set_bit_hook = btrfs_set_bit_hook, |
|---|
| 10735 | | - .clear_bit_hook = btrfs_clear_bit_hook, |
|---|
| 10736 | | - .merge_extent_hook = btrfs_merge_extent_hook, |
|---|
| 10737 | | - .split_extent_hook = btrfs_split_extent_hook, |
|---|
| 10738 | | - .check_extent_io_range = btrfs_check_extent_io_range, |
|---|
| 10739 | 10456 | }; |
|---|
| 10740 | 10457 | |
|---|
| 10741 | 10458 | /* |
|---|
| .. | .. |
|---|
| 10754 | 10471 | .readpage = btrfs_readpage, |
|---|
| 10755 | 10472 | .writepage = btrfs_writepage, |
|---|
| 10756 | 10473 | .writepages = btrfs_writepages, |
|---|
| 10757 | | - .readpages = btrfs_readpages, |
|---|
| 10758 | | - .direct_IO = btrfs_direct_IO, |
|---|
| 10474 | + .readahead = btrfs_readahead, |
|---|
| 10475 | + .direct_IO = noop_direct_IO, |
|---|
| 10759 | 10476 | .invalidatepage = btrfs_invalidatepage, |
|---|
| 10760 | 10477 | .releasepage = btrfs_releasepage, |
|---|
| 10478 | +#ifdef CONFIG_MIGRATION |
|---|
| 10479 | + .migratepage = btrfs_migratepage, |
|---|
| 10480 | +#endif |
|---|
| 10761 | 10481 | .set_page_dirty = btrfs_set_page_dirty, |
|---|
| 10762 | 10482 | .error_remove_page = generic_error_remove_page, |
|---|
| 10763 | | -}; |
|---|
| 10764 | | - |
|---|
| 10765 | | -static const struct address_space_operations btrfs_symlink_aops = { |
|---|
| 10766 | | - .readpage = btrfs_readpage, |
|---|
| 10767 | | - .writepage = btrfs_writepage, |
|---|
| 10768 | | - .invalidatepage = btrfs_invalidatepage, |
|---|
| 10769 | | - .releasepage = btrfs_releasepage, |
|---|
| 10483 | + .swap_activate = btrfs_swap_activate, |
|---|
| 10484 | + .swap_deactivate = btrfs_swap_deactivate, |
|---|
| 10770 | 10485 | }; |
|---|
| 10771 | 10486 | |
|---|
| 10772 | 10487 | static const struct inode_operations btrfs_file_inode_operations = { |
|---|