.. | .. |
---|
26 | 26 | #include "volumes.h" |
---|
27 | 27 | #include "qgroup.h" |
---|
28 | 28 | #include "compression.h" |
---|
| 29 | +#include "delalloc-space.h" |
---|
| 30 | +#include "reflink.h" |
---|
29 | 31 | |
---|
30 | 32 | static struct kmem_cache *btrfs_inode_defrag_cachep; |
---|
31 | 33 | /* |
---|
.. | .. |
---|
273 | 275 | { |
---|
274 | 276 | struct btrfs_root *inode_root; |
---|
275 | 277 | struct inode *inode; |
---|
276 | | - struct btrfs_key key; |
---|
277 | 278 | struct btrfs_ioctl_defrag_range_args range; |
---|
278 | 279 | int num_defrag; |
---|
279 | | - int index; |
---|
280 | 280 | int ret; |
---|
281 | 281 | |
---|
282 | 282 | /* get the inode */ |
---|
283 | | - key.objectid = defrag->root; |
---|
284 | | - key.type = BTRFS_ROOT_ITEM_KEY; |
---|
285 | | - key.offset = (u64)-1; |
---|
286 | | - |
---|
287 | | - index = srcu_read_lock(&fs_info->subvol_srcu); |
---|
288 | | - |
---|
289 | | - inode_root = btrfs_read_fs_root_no_name(fs_info, &key); |
---|
| 283 | + inode_root = btrfs_get_fs_root(fs_info, defrag->root, true); |
---|
290 | 284 | if (IS_ERR(inode_root)) { |
---|
291 | 285 | ret = PTR_ERR(inode_root); |
---|
292 | 286 | goto cleanup; |
---|
293 | 287 | } |
---|
294 | 288 | |
---|
295 | | - key.objectid = defrag->ino; |
---|
296 | | - key.type = BTRFS_INODE_ITEM_KEY; |
---|
297 | | - key.offset = 0; |
---|
298 | | - inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); |
---|
| 289 | + inode = btrfs_iget(fs_info->sb, defrag->ino, inode_root); |
---|
| 290 | + btrfs_put_root(inode_root); |
---|
299 | 291 | if (IS_ERR(inode)) { |
---|
300 | 292 | ret = PTR_ERR(inode); |
---|
301 | 293 | goto cleanup; |
---|
302 | 294 | } |
---|
303 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
---|
304 | 295 | |
---|
305 | 296 | /* do a chunk of defrag */ |
---|
306 | 297 | clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); |
---|
.. | .. |
---|
336 | 327 | iput(inode); |
---|
337 | 328 | return 0; |
---|
338 | 329 | cleanup: |
---|
339 | | - srcu_read_unlock(&fs_info->subvol_srcu, index); |
---|
340 | 330 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); |
---|
341 | 331 | return ret; |
---|
342 | 332 | } |
---|
.. | .. |
---|
399 | 389 | size_t copied = 0; |
---|
400 | 390 | size_t total_copied = 0; |
---|
401 | 391 | int pg = 0; |
---|
402 | | - int offset = pos & (PAGE_SIZE - 1); |
---|
| 392 | + int offset = offset_in_page(pos); |
---|
403 | 393 | |
---|
404 | 394 | while (write_bytes > 0) { |
---|
405 | 395 | size_t count = min_t(size_t, |
---|
.. | .. |
---|
462 | 452 | } |
---|
463 | 453 | } |
---|
464 | 454 | |
---|
465 | | -static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode, |
---|
466 | | - const u64 start, |
---|
467 | | - const u64 len, |
---|
468 | | - struct extent_state **cached_state) |
---|
469 | | -{ |
---|
470 | | - u64 search_start = start; |
---|
471 | | - const u64 end = start + len - 1; |
---|
472 | | - |
---|
473 | | - while (search_start < end) { |
---|
474 | | - const u64 search_len = end - search_start + 1; |
---|
475 | | - struct extent_map *em; |
---|
476 | | - u64 em_len; |
---|
477 | | - int ret = 0; |
---|
478 | | - |
---|
479 | | - em = btrfs_get_extent(inode, NULL, 0, search_start, |
---|
480 | | - search_len, 0); |
---|
481 | | - if (IS_ERR(em)) |
---|
482 | | - return PTR_ERR(em); |
---|
483 | | - |
---|
484 | | - if (em->block_start != EXTENT_MAP_HOLE) |
---|
485 | | - goto next; |
---|
486 | | - |
---|
487 | | - em_len = em->len; |
---|
488 | | - if (em->start < search_start) |
---|
489 | | - em_len -= search_start - em->start; |
---|
490 | | - if (em_len > search_len) |
---|
491 | | - em_len = search_len; |
---|
492 | | - |
---|
493 | | - ret = set_extent_bit(&inode->io_tree, search_start, |
---|
494 | | - search_start + em_len - 1, |
---|
495 | | - EXTENT_DELALLOC_NEW, |
---|
496 | | - NULL, cached_state, GFP_NOFS); |
---|
497 | | -next: |
---|
498 | | - search_start = extent_map_end(em); |
---|
499 | | - free_extent_map(em); |
---|
500 | | - if (ret) |
---|
501 | | - return ret; |
---|
502 | | - } |
---|
503 | | - return 0; |
---|
504 | | -} |
---|
505 | | - |
---|
506 | 455 | /* |
---|
507 | 456 | * after copy_from_user, pages need to be dirtied and we need to make |
---|
508 | 457 | * sure holes are created between the current EOF and the start of |
---|
.. | .. |
---|
511 | 460 | * this also makes the decision about creating an inline extent vs |
---|
512 | 461 | * doing real data extents, marking pages dirty and delalloc as required. |
---|
513 | 462 | */ |
---|
514 | | -int btrfs_dirty_pages(struct inode *inode, struct page **pages, |
---|
| 463 | +int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages, |
---|
515 | 464 | size_t num_pages, loff_t pos, size_t write_bytes, |
---|
516 | 465 | struct extent_state **cached) |
---|
517 | 466 | { |
---|
518 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
| 467 | + struct btrfs_fs_info *fs_info = inode->root->fs_info; |
---|
519 | 468 | int err = 0; |
---|
520 | 469 | int i; |
---|
521 | 470 | u64 num_bytes; |
---|
522 | 471 | u64 start_pos; |
---|
523 | 472 | u64 end_of_last_block; |
---|
524 | 473 | u64 end_pos = pos + write_bytes; |
---|
525 | | - loff_t isize = i_size_read(inode); |
---|
| 474 | + loff_t isize = i_size_read(&inode->vfs_inode); |
---|
526 | 475 | unsigned int extra_bits = 0; |
---|
527 | 476 | |
---|
528 | 477 | start_pos = pos & ~((u64) fs_info->sectorsize - 1); |
---|
.. | .. |
---|
535 | 484 | * The pages may have already been dirty, clear out old accounting so |
---|
536 | 485 | * we can set things up properly |
---|
537 | 486 | */ |
---|
538 | | - clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, end_of_last_block, |
---|
539 | | - EXTENT_DIRTY | EXTENT_DELALLOC | |
---|
540 | | - EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, cached); |
---|
541 | | - |
---|
542 | | - if (!btrfs_is_free_space_inode(BTRFS_I(inode))) { |
---|
543 | | - if (start_pos >= isize && |
---|
544 | | - !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) { |
---|
545 | | - /* |
---|
546 | | - * There can't be any extents following eof in this case |
---|
547 | | - * so just set the delalloc new bit for the range |
---|
548 | | - * directly. |
---|
549 | | - */ |
---|
550 | | - extra_bits |= EXTENT_DELALLOC_NEW; |
---|
551 | | - } else { |
---|
552 | | - err = btrfs_find_new_delalloc_bytes(BTRFS_I(inode), |
---|
553 | | - start_pos, |
---|
554 | | - num_bytes, cached); |
---|
555 | | - if (err) |
---|
556 | | - return err; |
---|
557 | | - } |
---|
558 | | - } |
---|
| 487 | + clear_extent_bit(&inode->io_tree, start_pos, end_of_last_block, |
---|
| 488 | + EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, |
---|
| 489 | + 0, 0, cached); |
---|
559 | 490 | |
---|
560 | 491 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, |
---|
561 | | - extra_bits, cached, 0); |
---|
| 492 | + extra_bits, cached); |
---|
562 | 493 | if (err) |
---|
563 | 494 | return err; |
---|
564 | 495 | |
---|
.. | .. |
---|
575 | 506 | * at this time. |
---|
576 | 507 | */ |
---|
577 | 508 | if (end_pos > isize) |
---|
578 | | - i_size_write(inode, end_pos); |
---|
| 509 | + i_size_write(&inode->vfs_inode, end_pos); |
---|
579 | 510 | return 0; |
---|
580 | 511 | } |
---|
581 | 512 | |
---|
.. | .. |
---|
666 | 597 | } |
---|
667 | 598 | |
---|
668 | 599 | split->generation = gen; |
---|
669 | | - split->bdev = em->bdev; |
---|
670 | 600 | split->flags = flags; |
---|
671 | 601 | split->compress_type = em->compress_type; |
---|
672 | 602 | replace_extent_mapping(em_tree, em, split, modified); |
---|
.. | .. |
---|
679 | 609 | |
---|
680 | 610 | split->start = start + len; |
---|
681 | 611 | split->len = em->start + em->len - (start + len); |
---|
682 | | - split->bdev = em->bdev; |
---|
683 | 612 | split->flags = flags; |
---|
684 | 613 | split->compress_type = em->compress_type; |
---|
685 | 614 | split->generation = gen; |
---|
.. | .. |
---|
744 | 673 | * is deleted from the tree. |
---|
745 | 674 | */ |
---|
746 | 675 | int __btrfs_drop_extents(struct btrfs_trans_handle *trans, |
---|
747 | | - struct btrfs_root *root, struct inode *inode, |
---|
| 676 | + struct btrfs_root *root, struct btrfs_inode *inode, |
---|
748 | 677 | struct btrfs_path *path, u64 start, u64 end, |
---|
749 | 678 | u64 *drop_end, int drop_cache, |
---|
750 | 679 | int replace_extent, |
---|
.. | .. |
---|
754 | 683 | struct btrfs_fs_info *fs_info = root->fs_info; |
---|
755 | 684 | struct extent_buffer *leaf; |
---|
756 | 685 | struct btrfs_file_extent_item *fi; |
---|
| 686 | + struct btrfs_ref ref = { 0 }; |
---|
757 | 687 | struct btrfs_key key; |
---|
758 | 688 | struct btrfs_key new_key; |
---|
759 | | - u64 ino = btrfs_ino(BTRFS_I(inode)); |
---|
| 689 | + struct inode *vfs_inode = &inode->vfs_inode; |
---|
| 690 | + u64 ino = btrfs_ino(inode); |
---|
760 | 691 | u64 search_start = start; |
---|
761 | 692 | u64 disk_bytenr = 0; |
---|
762 | 693 | u64 num_bytes = 0; |
---|
.. | .. |
---|
774 | 705 | int leafs_visited = 0; |
---|
775 | 706 | |
---|
776 | 707 | if (drop_cache) |
---|
777 | | - btrfs_drop_extent_cache(BTRFS_I(inode), start, end - 1, 0); |
---|
| 708 | + btrfs_drop_extent_cache(inode, start, end - 1, 0); |
---|
778 | 709 | |
---|
779 | | - if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent) |
---|
| 710 | + if (start >= inode->disk_i_size && !replace_extent) |
---|
780 | 711 | modify_tree = 0; |
---|
781 | 712 | |
---|
782 | | - update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || |
---|
783 | | - root == fs_info->tree_root); |
---|
| 713 | + update_refs = (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID); |
---|
784 | 714 | while (1) { |
---|
785 | 715 | recow = 0; |
---|
786 | 716 | ret = btrfs_lookup_file_extent(trans, root, path, ino, |
---|
.. | .. |
---|
909 | 839 | btrfs_mark_buffer_dirty(leaf); |
---|
910 | 840 | |
---|
911 | 841 | if (update_refs && disk_bytenr > 0) { |
---|
912 | | - ret = btrfs_inc_extent_ref(trans, root, |
---|
913 | | - disk_bytenr, num_bytes, 0, |
---|
| 842 | + btrfs_init_generic_ref(&ref, |
---|
| 843 | + BTRFS_ADD_DELAYED_REF, |
---|
| 844 | + disk_bytenr, num_bytes, 0); |
---|
| 845 | + btrfs_init_data_ref(&ref, |
---|
914 | 846 | root->root_key.objectid, |
---|
915 | 847 | new_key.objectid, |
---|
916 | 848 | start - extent_offset); |
---|
| 849 | + ret = btrfs_inc_extent_ref(trans, &ref); |
---|
917 | 850 | BUG_ON(ret); /* -ENOMEM */ |
---|
918 | 851 | } |
---|
919 | 852 | key.offset = start; |
---|
.. | .. |
---|
944 | 877 | extent_end - end); |
---|
945 | 878 | btrfs_mark_buffer_dirty(leaf); |
---|
946 | 879 | if (update_refs && disk_bytenr > 0) |
---|
947 | | - inode_sub_bytes(inode, end - key.offset); |
---|
| 880 | + inode_sub_bytes(vfs_inode, end - key.offset); |
---|
948 | 881 | break; |
---|
949 | 882 | } |
---|
950 | 883 | |
---|
.. | .. |
---|
964 | 897 | start - key.offset); |
---|
965 | 898 | btrfs_mark_buffer_dirty(leaf); |
---|
966 | 899 | if (update_refs && disk_bytenr > 0) |
---|
967 | | - inode_sub_bytes(inode, extent_end - start); |
---|
| 900 | + inode_sub_bytes(vfs_inode, extent_end - start); |
---|
968 | 901 | if (end == extent_end) |
---|
969 | 902 | break; |
---|
970 | 903 | |
---|
.. | .. |
---|
988 | 921 | |
---|
989 | 922 | if (update_refs && |
---|
990 | 923 | extent_type == BTRFS_FILE_EXTENT_INLINE) { |
---|
991 | | - inode_sub_bytes(inode, |
---|
| 924 | + inode_sub_bytes(vfs_inode, |
---|
992 | 925 | extent_end - key.offset); |
---|
993 | 926 | extent_end = ALIGN(extent_end, |
---|
994 | 927 | fs_info->sectorsize); |
---|
995 | 928 | } else if (update_refs && disk_bytenr > 0) { |
---|
996 | | - ret = btrfs_free_extent(trans, root, |
---|
997 | | - disk_bytenr, num_bytes, 0, |
---|
| 929 | + btrfs_init_generic_ref(&ref, |
---|
| 930 | + BTRFS_DROP_DELAYED_REF, |
---|
| 931 | + disk_bytenr, num_bytes, 0); |
---|
| 932 | + btrfs_init_data_ref(&ref, |
---|
998 | 933 | root->root_key.objectid, |
---|
999 | | - key.objectid, key.offset - |
---|
1000 | | - extent_offset); |
---|
| 934 | + key.objectid, |
---|
| 935 | + key.offset - extent_offset); |
---|
| 936 | + ret = btrfs_free_extent(trans, &ref); |
---|
1001 | 937 | BUG_ON(ret); /* -ENOMEM */ |
---|
1002 | | - inode_sub_bytes(inode, |
---|
| 938 | + inode_sub_bytes(vfs_inode, |
---|
1003 | 939 | extent_end - key.offset); |
---|
1004 | 940 | } |
---|
1005 | 941 | |
---|
.. | .. |
---|
1025 | 961 | continue; |
---|
1026 | 962 | } |
---|
1027 | 963 | |
---|
1028 | | - BUG_ON(1); |
---|
| 964 | + BUG(); |
---|
1029 | 965 | } |
---|
1030 | 966 | |
---|
1031 | 967 | if (!ret && del_nr > 0) { |
---|
.. | .. |
---|
1050 | 986 | if (!ret && replace_extent && leafs_visited == 1 && |
---|
1051 | 987 | (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING || |
---|
1052 | 988 | path->locks[0] == BTRFS_WRITE_LOCK) && |
---|
1053 | | - btrfs_leaf_free_space(fs_info, leaf) >= |
---|
| 989 | + btrfs_leaf_free_space(leaf) >= |
---|
1054 | 990 | sizeof(struct btrfs_item) + extent_item_size) { |
---|
1055 | 991 | |
---|
1056 | 992 | key.objectid = ino; |
---|
.. | .. |
---|
1063 | 999 | if (btrfs_comp_cpu_keys(&key, &slot_key) > 0) |
---|
1064 | 1000 | path->slots[0]++; |
---|
1065 | 1001 | } |
---|
1066 | | - setup_items_for_insert(root, path, &key, |
---|
1067 | | - &extent_item_size, |
---|
1068 | | - extent_item_size, |
---|
1069 | | - sizeof(struct btrfs_item) + |
---|
1070 | | - extent_item_size, 1); |
---|
| 1002 | + setup_items_for_insert(root, path, &key, &extent_item_size, 1); |
---|
1071 | 1003 | *key_inserted = 1; |
---|
1072 | 1004 | } |
---|
1073 | 1005 | |
---|
.. | .. |
---|
1088 | 1020 | path = btrfs_alloc_path(); |
---|
1089 | 1021 | if (!path) |
---|
1090 | 1022 | return -ENOMEM; |
---|
1091 | | - ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL, |
---|
1092 | | - drop_cache, 0, 0, NULL); |
---|
| 1023 | + ret = __btrfs_drop_extents(trans, root, BTRFS_I(inode), path, start, |
---|
| 1024 | + end, NULL, drop_cache, 0, 0, NULL); |
---|
1093 | 1025 | btrfs_free_path(path); |
---|
1094 | 1026 | return ret; |
---|
1095 | 1027 | } |
---|
.. | .. |
---|
1142 | 1074 | struct extent_buffer *leaf; |
---|
1143 | 1075 | struct btrfs_path *path; |
---|
1144 | 1076 | struct btrfs_file_extent_item *fi; |
---|
| 1077 | + struct btrfs_ref ref = { 0 }; |
---|
1145 | 1078 | struct btrfs_key key; |
---|
1146 | 1079 | struct btrfs_key new_key; |
---|
1147 | 1080 | u64 bytenr; |
---|
.. | .. |
---|
1287 | 1220 | extent_end - split); |
---|
1288 | 1221 | btrfs_mark_buffer_dirty(leaf); |
---|
1289 | 1222 | |
---|
1290 | | - ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, |
---|
1291 | | - 0, root->root_key.objectid, |
---|
1292 | | - ino, orig_offset); |
---|
| 1223 | + btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr, |
---|
| 1224 | + num_bytes, 0); |
---|
| 1225 | + btrfs_init_data_ref(&ref, root->root_key.objectid, ino, |
---|
| 1226 | + orig_offset); |
---|
| 1227 | + ret = btrfs_inc_extent_ref(trans, &ref); |
---|
1293 | 1228 | if (ret) { |
---|
1294 | 1229 | btrfs_abort_transaction(trans, ret); |
---|
1295 | 1230 | goto out; |
---|
.. | .. |
---|
1311 | 1246 | |
---|
1312 | 1247 | other_start = end; |
---|
1313 | 1248 | other_end = 0; |
---|
| 1249 | + btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, |
---|
| 1250 | + num_bytes, 0); |
---|
| 1251 | + btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset); |
---|
1314 | 1252 | if (extent_mergeable(leaf, path->slots[0] + 1, |
---|
1315 | 1253 | ino, bytenr, orig_offset, |
---|
1316 | 1254 | &other_start, &other_end)) { |
---|
.. | .. |
---|
1321 | 1259 | extent_end = other_end; |
---|
1322 | 1260 | del_slot = path->slots[0] + 1; |
---|
1323 | 1261 | del_nr++; |
---|
1324 | | - ret = btrfs_free_extent(trans, root, bytenr, num_bytes, |
---|
1325 | | - 0, root->root_key.objectid, |
---|
1326 | | - ino, orig_offset); |
---|
| 1262 | + ret = btrfs_free_extent(trans, &ref); |
---|
1327 | 1263 | if (ret) { |
---|
1328 | 1264 | btrfs_abort_transaction(trans, ret); |
---|
1329 | 1265 | goto out; |
---|
.. | .. |
---|
1341 | 1277 | key.offset = other_start; |
---|
1342 | 1278 | del_slot = path->slots[0]; |
---|
1343 | 1279 | del_nr++; |
---|
1344 | | - ret = btrfs_free_extent(trans, root, bytenr, num_bytes, |
---|
1345 | | - 0, root->root_key.objectid, |
---|
1346 | | - ino, orig_offset); |
---|
| 1280 | + ret = btrfs_free_extent(trans, &ref); |
---|
1347 | 1281 | if (ret) { |
---|
1348 | 1282 | btrfs_abort_transaction(trans, ret); |
---|
1349 | 1283 | goto out; |
---|
.. | .. |
---|
1481 | 1415 | int ret = 0; |
---|
1482 | 1416 | |
---|
1483 | 1417 | start_pos = round_down(pos, fs_info->sectorsize); |
---|
1484 | | - last_pos = start_pos |
---|
1485 | | - + round_up(pos + write_bytes - start_pos, |
---|
1486 | | - fs_info->sectorsize) - 1; |
---|
| 1418 | + last_pos = round_up(pos + write_bytes, fs_info->sectorsize) - 1; |
---|
1487 | 1419 | |
---|
1488 | 1420 | if (start_pos < inode->vfs_inode.i_size) { |
---|
1489 | 1421 | struct btrfs_ordered_extent *ordered; |
---|
.. | .. |
---|
1493 | 1425 | ordered = btrfs_lookup_ordered_range(inode, start_pos, |
---|
1494 | 1426 | last_pos - start_pos + 1); |
---|
1495 | 1427 | if (ordered && |
---|
1496 | | - ordered->file_offset + ordered->len > start_pos && |
---|
| 1428 | + ordered->file_offset + ordered->num_bytes > start_pos && |
---|
1497 | 1429 | ordered->file_offset <= last_pos) { |
---|
1498 | 1430 | unlock_extent_cached(&inode->io_tree, start_pos, |
---|
1499 | 1431 | last_pos, cached_state); |
---|
.. | .. |
---|
1501 | 1433 | unlock_page(pages[i]); |
---|
1502 | 1434 | put_page(pages[i]); |
---|
1503 | 1435 | } |
---|
1504 | | - btrfs_start_ordered_extent(&inode->vfs_inode, |
---|
1505 | | - ordered, 1); |
---|
| 1436 | + btrfs_start_ordered_extent(ordered, 1); |
---|
1506 | 1437 | btrfs_put_ordered_extent(ordered); |
---|
1507 | 1438 | return -EAGAIN; |
---|
1508 | 1439 | } |
---|
.. | .. |
---|
1536 | 1467 | return ret; |
---|
1537 | 1468 | } |
---|
1538 | 1469 | |
---|
1539 | | -static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, |
---|
1540 | | - size_t *write_bytes) |
---|
| 1470 | +static int check_can_nocow(struct btrfs_inode *inode, loff_t pos, |
---|
| 1471 | + size_t *write_bytes, bool nowait) |
---|
1541 | 1472 | { |
---|
1542 | 1473 | struct btrfs_fs_info *fs_info = inode->root->fs_info; |
---|
1543 | 1474 | struct btrfs_root *root = inode->root; |
---|
1544 | | - struct btrfs_ordered_extent *ordered; |
---|
1545 | 1475 | u64 lockstart, lockend; |
---|
1546 | 1476 | u64 num_bytes; |
---|
1547 | 1477 | int ret; |
---|
1548 | 1478 | |
---|
1549 | | - ret = btrfs_start_write_no_snapshotting(root); |
---|
1550 | | - if (!ret) |
---|
1551 | | - return -ENOSPC; |
---|
| 1479 | + if (!(inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC))) |
---|
| 1480 | + return 0; |
---|
| 1481 | + |
---|
| 1482 | + if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock)) |
---|
| 1483 | + return -EAGAIN; |
---|
1552 | 1484 | |
---|
1553 | 1485 | lockstart = round_down(pos, fs_info->sectorsize); |
---|
1554 | 1486 | lockend = round_up(pos + *write_bytes, |
---|
1555 | 1487 | fs_info->sectorsize) - 1; |
---|
| 1488 | + num_bytes = lockend - lockstart + 1; |
---|
1556 | 1489 | |
---|
1557 | | - while (1) { |
---|
1558 | | - lock_extent(&inode->io_tree, lockstart, lockend); |
---|
| 1490 | + if (nowait) { |
---|
| 1491 | + struct btrfs_ordered_extent *ordered; |
---|
| 1492 | + |
---|
| 1493 | + if (!try_lock_extent(&inode->io_tree, lockstart, lockend)) |
---|
| 1494 | + return -EAGAIN; |
---|
| 1495 | + |
---|
1559 | 1496 | ordered = btrfs_lookup_ordered_range(inode, lockstart, |
---|
1560 | | - lockend - lockstart + 1); |
---|
1561 | | - if (!ordered) { |
---|
1562 | | - break; |
---|
| 1497 | + num_bytes); |
---|
| 1498 | + if (ordered) { |
---|
| 1499 | + btrfs_put_ordered_extent(ordered); |
---|
| 1500 | + ret = -EAGAIN; |
---|
| 1501 | + goto out_unlock; |
---|
1563 | 1502 | } |
---|
1564 | | - unlock_extent(&inode->io_tree, lockstart, lockend); |
---|
1565 | | - btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1); |
---|
1566 | | - btrfs_put_ordered_extent(ordered); |
---|
| 1503 | + } else { |
---|
| 1504 | + btrfs_lock_and_flush_ordered_range(inode, lockstart, |
---|
| 1505 | + lockend, NULL); |
---|
1567 | 1506 | } |
---|
1568 | 1507 | |
---|
1569 | | - num_bytes = lockend - lockstart + 1; |
---|
1570 | 1508 | ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes, |
---|
1571 | | - NULL, NULL, NULL); |
---|
| 1509 | + NULL, NULL, NULL, false); |
---|
1572 | 1510 | if (ret <= 0) { |
---|
1573 | 1511 | ret = 0; |
---|
1574 | | - btrfs_end_write_no_snapshotting(root); |
---|
| 1512 | + if (!nowait) |
---|
| 1513 | + btrfs_drew_write_unlock(&root->snapshot_lock); |
---|
1575 | 1514 | } else { |
---|
1576 | 1515 | *write_bytes = min_t(size_t, *write_bytes , |
---|
1577 | 1516 | num_bytes - pos + lockstart); |
---|
1578 | 1517 | } |
---|
1579 | | - |
---|
| 1518 | +out_unlock: |
---|
1580 | 1519 | unlock_extent(&inode->io_tree, lockstart, lockend); |
---|
1581 | 1520 | |
---|
1582 | 1521 | return ret; |
---|
| 1522 | +} |
---|
| 1523 | + |
---|
| 1524 | +static int check_nocow_nolock(struct btrfs_inode *inode, loff_t pos, |
---|
| 1525 | + size_t *write_bytes) |
---|
| 1526 | +{ |
---|
| 1527 | + return check_can_nocow(inode, pos, write_bytes, true); |
---|
| 1528 | +} |
---|
| 1529 | + |
---|
| 1530 | +/* |
---|
| 1531 | + * Check if we can do nocow write into the range [@pos, @pos + @write_bytes) |
---|
| 1532 | + * |
---|
| 1533 | + * @pos: File offset |
---|
| 1534 | + * @write_bytes: The length to write, will be updated to the nocow writeable |
---|
| 1535 | + * range |
---|
| 1536 | + * |
---|
| 1537 | + * This function will flush ordered extents in the range to ensure proper |
---|
| 1538 | + * nocow checks. |
---|
| 1539 | + * |
---|
| 1540 | + * Return: |
---|
| 1541 | + * >0 and update @write_bytes if we can do nocow write |
---|
| 1542 | + * 0 if we can't do nocow write |
---|
| 1543 | + * -EAGAIN if we can't get the needed lock or there are ordered extents |
---|
| 1544 | + * for * (nowait == true) case |
---|
| 1545 | + * <0 if other error happened |
---|
| 1546 | + * |
---|
| 1547 | + * NOTE: Callers need to release the lock by btrfs_check_nocow_unlock(). |
---|
| 1548 | + */ |
---|
| 1549 | +int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, |
---|
| 1550 | + size_t *write_bytes) |
---|
| 1551 | +{ |
---|
| 1552 | + return check_can_nocow(inode, pos, write_bytes, false); |
---|
| 1553 | +} |
---|
| 1554 | + |
---|
| 1555 | +void btrfs_check_nocow_unlock(struct btrfs_inode *inode) |
---|
| 1556 | +{ |
---|
| 1557 | + btrfs_drew_write_unlock(&inode->root->snapshot_lock); |
---|
1583 | 1558 | } |
---|
1584 | 1559 | |
---|
1585 | 1560 | static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, |
---|
.. | .. |
---|
1589 | 1564 | loff_t pos = iocb->ki_pos; |
---|
1590 | 1565 | struct inode *inode = file_inode(file); |
---|
1591 | 1566 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
1592 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
1593 | 1567 | struct page **pages = NULL; |
---|
1594 | 1568 | struct extent_changeset *data_reserved = NULL; |
---|
1595 | 1569 | u64 release_bytes = 0; |
---|
.. | .. |
---|
1610 | 1584 | return -ENOMEM; |
---|
1611 | 1585 | |
---|
1612 | 1586 | while (iov_iter_count(i) > 0) { |
---|
1613 | | - size_t offset = pos & (PAGE_SIZE - 1); |
---|
1614 | 1587 | struct extent_state *cached_state = NULL; |
---|
| 1588 | + size_t offset = offset_in_page(pos); |
---|
1615 | 1589 | size_t sector_offset; |
---|
1616 | 1590 | size_t write_bytes = min(iov_iter_count(i), |
---|
1617 | 1591 | nrptrs * (size_t)PAGE_SIZE - |
---|
.. | .. |
---|
1642 | 1616 | fs_info->sectorsize); |
---|
1643 | 1617 | |
---|
1644 | 1618 | extent_changeset_release(data_reserved); |
---|
1645 | | - ret = btrfs_check_data_free_space(inode, &data_reserved, pos, |
---|
| 1619 | + ret = btrfs_check_data_free_space(BTRFS_I(inode), |
---|
| 1620 | + &data_reserved, pos, |
---|
1646 | 1621 | write_bytes); |
---|
1647 | 1622 | if (ret < 0) { |
---|
1648 | | - if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | |
---|
1649 | | - BTRFS_INODE_PREALLOC)) && |
---|
1650 | | - check_can_nocow(BTRFS_I(inode), pos, |
---|
1651 | | - &write_bytes) > 0) { |
---|
| 1623 | + if (btrfs_check_nocow_lock(BTRFS_I(inode), pos, |
---|
| 1624 | + &write_bytes) > 0) { |
---|
1652 | 1625 | /* |
---|
1653 | 1626 | * For nodata cow case, no need to reserve |
---|
1654 | 1627 | * data space. |
---|
.. | .. |
---|
1673 | 1646 | reserve_bytes); |
---|
1674 | 1647 | if (ret) { |
---|
1675 | 1648 | if (!only_release_metadata) |
---|
1676 | | - btrfs_free_reserved_data_space(inode, |
---|
| 1649 | + btrfs_free_reserved_data_space(BTRFS_I(inode), |
---|
1677 | 1650 | data_reserved, pos, |
---|
1678 | 1651 | write_bytes); |
---|
1679 | 1652 | else |
---|
1680 | | - btrfs_end_write_no_snapshotting(root); |
---|
| 1653 | + btrfs_check_nocow_unlock(BTRFS_I(inode)); |
---|
1681 | 1654 | break; |
---|
1682 | 1655 | } |
---|
1683 | 1656 | |
---|
.. | .. |
---|
1747 | 1720 | __pos = round_down(pos, |
---|
1748 | 1721 | fs_info->sectorsize) + |
---|
1749 | 1722 | (dirty_pages << PAGE_SHIFT); |
---|
1750 | | - btrfs_delalloc_release_space(inode, |
---|
| 1723 | + btrfs_delalloc_release_space(BTRFS_I(inode), |
---|
1751 | 1724 | data_reserved, __pos, |
---|
1752 | 1725 | release_bytes, true); |
---|
1753 | 1726 | } |
---|
.. | .. |
---|
1757 | 1730 | fs_info->sectorsize); |
---|
1758 | 1731 | |
---|
1759 | 1732 | if (copied > 0) |
---|
1760 | | - ret = btrfs_dirty_pages(inode, pages, dirty_pages, |
---|
1761 | | - pos, copied, &cached_state); |
---|
| 1733 | + ret = btrfs_dirty_pages(BTRFS_I(inode), pages, |
---|
| 1734 | + dirty_pages, pos, copied, |
---|
| 1735 | + &cached_state); |
---|
1762 | 1736 | |
---|
1763 | 1737 | /* |
---|
1764 | 1738 | * If we have not locked the extent range, because the range's |
---|
.. | .. |
---|
1781 | 1755 | |
---|
1782 | 1756 | release_bytes = 0; |
---|
1783 | 1757 | if (only_release_metadata) |
---|
1784 | | - btrfs_end_write_no_snapshotting(root); |
---|
| 1758 | + btrfs_check_nocow_unlock(BTRFS_I(inode)); |
---|
1785 | 1759 | |
---|
1786 | 1760 | if (only_release_metadata && copied > 0) { |
---|
1787 | 1761 | lockstart = round_down(pos, |
---|
.. | .. |
---|
1799 | 1773 | cond_resched(); |
---|
1800 | 1774 | |
---|
1801 | 1775 | balance_dirty_pages_ratelimited(inode->i_mapping); |
---|
1802 | | - if (dirty_pages < (fs_info->nodesize >> PAGE_SHIFT) + 1) |
---|
1803 | | - btrfs_btree_balance_dirty(fs_info); |
---|
1804 | 1776 | |
---|
1805 | 1777 | pos += copied; |
---|
1806 | 1778 | num_written += copied; |
---|
.. | .. |
---|
1810 | 1782 | |
---|
1811 | 1783 | if (release_bytes) { |
---|
1812 | 1784 | if (only_release_metadata) { |
---|
1813 | | - btrfs_end_write_no_snapshotting(root); |
---|
| 1785 | + btrfs_check_nocow_unlock(BTRFS_I(inode)); |
---|
1814 | 1786 | btrfs_delalloc_release_metadata(BTRFS_I(inode), |
---|
1815 | 1787 | release_bytes, true); |
---|
1816 | 1788 | } else { |
---|
1817 | | - btrfs_delalloc_release_space(inode, data_reserved, |
---|
| 1789 | + btrfs_delalloc_release_space(BTRFS_I(inode), |
---|
| 1790 | + data_reserved, |
---|
1818 | 1791 | round_down(pos, fs_info->sectorsize), |
---|
1819 | 1792 | release_bytes, true); |
---|
1820 | 1793 | } |
---|
.. | .. |
---|
1834 | 1807 | loff_t endbyte; |
---|
1835 | 1808 | int err; |
---|
1836 | 1809 | |
---|
1837 | | - written = generic_file_direct_write(iocb, from); |
---|
| 1810 | + written = btrfs_direct_IO(iocb, from); |
---|
1838 | 1811 | |
---|
1839 | 1812 | if (written < 0 || !iov_iter_count(from)) |
---|
1840 | 1813 | return written; |
---|
.. | .. |
---|
1888 | 1861 | struct file *file = iocb->ki_filp; |
---|
1889 | 1862 | struct inode *inode = file_inode(file); |
---|
1890 | 1863 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
1891 | | - struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
1892 | 1864 | u64 start_pos; |
---|
1893 | 1865 | u64 end_pos; |
---|
1894 | 1866 | ssize_t num_written = 0; |
---|
1895 | | - bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); |
---|
| 1867 | + const bool sync = iocb->ki_flags & IOCB_DSYNC; |
---|
1896 | 1868 | ssize_t err; |
---|
1897 | 1869 | loff_t pos; |
---|
1898 | 1870 | size_t count; |
---|
.. | .. |
---|
1919 | 1891 | pos = iocb->ki_pos; |
---|
1920 | 1892 | count = iov_iter_count(from); |
---|
1921 | 1893 | if (iocb->ki_flags & IOCB_NOWAIT) { |
---|
| 1894 | + size_t nocow_bytes = count; |
---|
| 1895 | + |
---|
1922 | 1896 | /* |
---|
1923 | 1897 | * We will allocate space in case nodatacow is not set, |
---|
1924 | 1898 | * so bail |
---|
1925 | 1899 | */ |
---|
1926 | | - if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | |
---|
1927 | | - BTRFS_INODE_PREALLOC)) || |
---|
1928 | | - check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) { |
---|
| 1900 | + if (check_nocow_nolock(BTRFS_I(inode), pos, &nocow_bytes) |
---|
| 1901 | + <= 0) { |
---|
| 1902 | + inode_unlock(inode); |
---|
| 1903 | + return -EAGAIN; |
---|
| 1904 | + } |
---|
| 1905 | + /* |
---|
| 1906 | + * There are holes in the range or parts of the range that must |
---|
| 1907 | + * be COWed (shared extents, RO block groups, etc), so just bail |
---|
| 1908 | + * out. |
---|
| 1909 | + */ |
---|
| 1910 | + if (nocow_bytes < count) { |
---|
1929 | 1911 | inode_unlock(inode); |
---|
1930 | 1912 | return -EAGAIN; |
---|
1931 | 1913 | } |
---|
.. | .. |
---|
1977 | 1959 | atomic_inc(&BTRFS_I(inode)->sync_writers); |
---|
1978 | 1960 | |
---|
1979 | 1961 | if (iocb->ki_flags & IOCB_DIRECT) { |
---|
| 1962 | + /* |
---|
| 1963 | + * 1. We must always clear IOCB_DSYNC in order to not deadlock |
---|
| 1964 | + * in iomap, as it calls generic_write_sync() in this case. |
---|
| 1965 | + * 2. If we are async, we can call iomap_dio_complete() either |
---|
| 1966 | + * in |
---|
| 1967 | + * |
---|
| 1968 | + * 2.1. A worker thread from the last bio completed. In this |
---|
| 1969 | + * case we need to mark the btrfs_dio_data that it is |
---|
| 1970 | + * async in order to call generic_write_sync() properly. |
---|
| 1971 | + * This is handled by setting BTRFS_DIO_SYNC_STUB in the |
---|
| 1972 | + * current->journal_info. |
---|
| 1973 | + * 2.2 The submitter context, because all IO completed |
---|
| 1974 | + * before we exited iomap_dio_rw(). In this case we can |
---|
| 1975 | + * just re-set the IOCB_DSYNC on the iocb and we'll do |
---|
| 1976 | + * the sync below. If our ->end_io() gets called and |
---|
| 1977 | + * current->journal_info is set, then we know we're in |
---|
| 1978 | + * our current context and we will clear |
---|
| 1979 | + * current->journal_info to indicate that we need to |
---|
| 1980 | + * sync below. |
---|
| 1981 | + */ |
---|
| 1982 | + if (sync) { |
---|
| 1983 | + ASSERT(current->journal_info == NULL); |
---|
| 1984 | + iocb->ki_flags &= ~IOCB_DSYNC; |
---|
| 1985 | + current->journal_info = BTRFS_DIO_SYNC_STUB; |
---|
| 1986 | + } |
---|
1980 | 1987 | num_written = __btrfs_direct_write(iocb, from); |
---|
| 1988 | + |
---|
| 1989 | + /* |
---|
| 1990 | + * As stated above, we cleared journal_info, so we need to do |
---|
| 1991 | + * the sync ourselves. |
---|
| 1992 | + */ |
---|
| 1993 | + if (sync && current->journal_info == NULL) |
---|
| 1994 | + iocb->ki_flags |= IOCB_DSYNC; |
---|
| 1995 | + current->journal_info = NULL; |
---|
1981 | 1996 | } else { |
---|
1982 | 1997 | num_written = btrfs_buffered_write(iocb, from); |
---|
1983 | 1998 | if (num_written > 0) |
---|
.. | .. |
---|
1989 | 2004 | |
---|
1990 | 2005 | inode_unlock(inode); |
---|
1991 | 2006 | |
---|
1992 | | - /* |
---|
1993 | | - * We also have to set last_sub_trans to the current log transid, |
---|
1994 | | - * otherwise subsequent syncs to a file that's been synced in this |
---|
1995 | | - * transaction will appear to have already occurred. |
---|
1996 | | - */ |
---|
1997 | | - spin_lock(&BTRFS_I(inode)->lock); |
---|
1998 | | - BTRFS_I(inode)->last_sub_trans = root->log_transid; |
---|
1999 | | - spin_unlock(&BTRFS_I(inode)->lock); |
---|
| 2007 | + btrfs_set_inode_last_sub_trans(BTRFS_I(inode)); |
---|
| 2008 | + |
---|
2000 | 2009 | if (num_written > 0) |
---|
2001 | 2010 | num_written = generic_write_sync(iocb, num_written); |
---|
2002 | 2011 | |
---|
.. | .. |
---|
2017 | 2026 | filp->private_data = NULL; |
---|
2018 | 2027 | |
---|
2019 | 2028 | /* |
---|
2020 | | - * ordered_data_close is set by settattr when we are about to truncate |
---|
2021 | | - * a file from a non-zero size to a zero size. This tries to |
---|
2022 | | - * flush down new bytes that may have been written if the |
---|
2023 | | - * application were using truncate to replace a file in place. |
---|
| 2029 | + * Set by setattr when we are about to truncate a file from a non-zero |
---|
| 2030 | + * size to a zero size. This tries to flush down new bytes that may |
---|
| 2031 | + * have been written if the application were using truncate to replace |
---|
| 2032 | + * a file in place. |
---|
2024 | 2033 | */ |
---|
2025 | | - if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, |
---|
| 2034 | + if (test_and_clear_bit(BTRFS_INODE_FLUSH_ON_CLOSE, |
---|
2026 | 2035 | &BTRFS_I(inode)->runtime_flags)) |
---|
2027 | 2036 | filemap_flush(inode->i_mapping); |
---|
2028 | 2037 | return 0; |
---|
.. | .. |
---|
2048 | 2057 | return ret; |
---|
2049 | 2058 | } |
---|
2050 | 2059 | |
---|
| 2060 | +static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx) |
---|
| 2061 | +{ |
---|
| 2062 | + struct btrfs_inode *inode = BTRFS_I(ctx->inode); |
---|
| 2063 | + struct btrfs_fs_info *fs_info = inode->root->fs_info; |
---|
| 2064 | + |
---|
| 2065 | + if (btrfs_inode_in_log(inode, fs_info->generation) && |
---|
| 2066 | + list_empty(&ctx->ordered_extents)) |
---|
| 2067 | + return true; |
---|
| 2068 | + |
---|
| 2069 | + /* |
---|
| 2070 | + * If we are doing a fast fsync we can not bail out if the inode's |
---|
| 2071 | + * last_trans is <= then the last committed transaction, because we only |
---|
| 2072 | + * update the last_trans of the inode during ordered extent completion, |
---|
| 2073 | + * and for a fast fsync we don't wait for that, we only wait for the |
---|
| 2074 | + * writeback to complete. |
---|
| 2075 | + */ |
---|
| 2076 | + if (inode->last_trans <= fs_info->last_trans_committed && |
---|
| 2077 | + (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) || |
---|
| 2078 | + list_empty(&ctx->ordered_extents))) |
---|
| 2079 | + return true; |
---|
| 2080 | + |
---|
| 2081 | + return false; |
---|
| 2082 | +} |
---|
| 2083 | + |
---|
2051 | 2084 | /* |
---|
2052 | 2085 | * fsync call for both files and directories. This logs the inode into |
---|
2053 | 2086 | * the tree log instead of forcing full commits whenever possible. |
---|
.. | .. |
---|
2063 | 2096 | { |
---|
2064 | 2097 | struct dentry *dentry = file_dentry(file); |
---|
2065 | 2098 | struct inode *inode = d_inode(dentry); |
---|
2066 | | - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
2067 | 2099 | struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
2068 | 2100 | struct btrfs_trans_handle *trans; |
---|
2069 | 2101 | struct btrfs_log_ctx ctx; |
---|
2070 | 2102 | int ret = 0, err; |
---|
| 2103 | + u64 len; |
---|
| 2104 | + bool full_sync; |
---|
2071 | 2105 | |
---|
2072 | 2106 | trace_btrfs_sync_file(file, datasync); |
---|
2073 | 2107 | |
---|
2074 | 2108 | btrfs_init_log_ctx(&ctx, inode); |
---|
2075 | 2109 | |
---|
2076 | 2110 | /* |
---|
2077 | | - * Set the range to full if the NO_HOLES feature is not enabled. |
---|
2078 | | - * This is to avoid missing file extent items representing holes after |
---|
2079 | | - * replaying the log. |
---|
| 2111 | + * Always set the range to a full range, otherwise we can get into |
---|
| 2112 | + * several problems, from missing file extent items to represent holes |
---|
| 2113 | + * when not using the NO_HOLES feature, to log tree corruption due to |
---|
| 2114 | + * races between hole detection during logging and completion of ordered |
---|
| 2115 | + * extents outside the range, to missing checksums due to ordered extents |
---|
| 2116 | + * for which we flushed only a subset of their pages. |
---|
2080 | 2117 | */ |
---|
2081 | | - if (!btrfs_fs_incompat(fs_info, NO_HOLES)) { |
---|
2082 | | - start = 0; |
---|
2083 | | - end = LLONG_MAX; |
---|
2084 | | - } |
---|
| 2118 | + start = 0; |
---|
| 2119 | + end = LLONG_MAX; |
---|
| 2120 | + len = (u64)LLONG_MAX + 1; |
---|
2085 | 2121 | |
---|
2086 | 2122 | /* |
---|
2087 | 2123 | * We write the dirty pages in the range and wait until they complete |
---|
.. | .. |
---|
2105 | 2141 | atomic_inc(&root->log_batch); |
---|
2106 | 2142 | |
---|
2107 | 2143 | /* |
---|
2108 | | - * If the inode needs a full sync, make sure we use a full range to |
---|
2109 | | - * avoid log tree corruption, due to hole detection racing with ordered |
---|
2110 | | - * extent completion for adjacent ranges, and assertion failures during |
---|
2111 | | - * hole detection. Do this while holding the inode lock, to avoid races |
---|
2112 | | - * with other tasks. |
---|
| 2144 | + * Always check for the full sync flag while holding the inode's lock, |
---|
| 2145 | + * to avoid races with other tasks. The flag must be either set all the |
---|
| 2146 | + * time during logging or always off all the time while logging. |
---|
2113 | 2147 | */ |
---|
2114 | | - if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
---|
2115 | | - &BTRFS_I(inode)->runtime_flags)) { |
---|
2116 | | - start = 0; |
---|
2117 | | - end = LLONG_MAX; |
---|
2118 | | - } |
---|
| 2148 | + full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
---|
| 2149 | + &BTRFS_I(inode)->runtime_flags); |
---|
2119 | 2150 | |
---|
2120 | 2151 | /* |
---|
2121 | 2152 | * Before we acquired the inode's lock, someone may have dirtied more |
---|
.. | .. |
---|
2144 | 2175 | |
---|
2145 | 2176 | /* |
---|
2146 | 2177 | * We have to do this here to avoid the priority inversion of waiting on |
---|
2147 | | - * IO of a lower priority task while holding a transaciton open. |
---|
| 2178 | + * IO of a lower priority task while holding a transaction open. |
---|
2148 | 2179 | * |
---|
2149 | | - * Also, the range length can be represented by u64, we have to do the |
---|
2150 | | - * typecasts to avoid signed overflow if it's [0, LLONG_MAX]. |
---|
| 2180 | + * For a full fsync we wait for the ordered extents to complete while |
---|
| 2181 | + * for a fast fsync we wait just for writeback to complete, and then |
---|
| 2182 | + * attach the ordered extents to the transaction so that a transaction |
---|
| 2183 | + * commit waits for their completion, to avoid data loss if we fsync, |
---|
| 2184 | + * the current transaction commits before the ordered extents complete |
---|
| 2185 | + * and a power failure happens right after that. |
---|
2151 | 2186 | */ |
---|
2152 | | - ret = btrfs_wait_ordered_range(inode, start, (u64)end - (u64)start + 1); |
---|
2153 | | - if (ret) { |
---|
2154 | | - up_write(&BTRFS_I(inode)->dio_sem); |
---|
2155 | | - inode_unlock(inode); |
---|
2156 | | - goto out; |
---|
| 2187 | + if (full_sync) { |
---|
| 2188 | + ret = btrfs_wait_ordered_range(inode, start, len); |
---|
| 2189 | + } else { |
---|
| 2190 | + /* |
---|
| 2191 | + * Get our ordered extents as soon as possible to avoid doing |
---|
| 2192 | + * checksum lookups in the csum tree, and use instead the |
---|
| 2193 | + * checksums attached to the ordered extents. |
---|
| 2194 | + */ |
---|
| 2195 | + btrfs_get_ordered_extents_for_logging(BTRFS_I(inode), |
---|
| 2196 | + &ctx.ordered_extents); |
---|
| 2197 | + ret = filemap_fdatawait_range(inode->i_mapping, start, end); |
---|
2157 | 2198 | } |
---|
| 2199 | + |
---|
| 2200 | + if (ret) |
---|
| 2201 | + goto out_release_extents; |
---|
| 2202 | + |
---|
2158 | 2203 | atomic_inc(&root->log_batch); |
---|
2159 | 2204 | |
---|
2160 | 2205 | smp_mb(); |
---|
2161 | | - if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) || |
---|
2162 | | - BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed) { |
---|
| 2206 | + if (skip_inode_logging(&ctx)) { |
---|
2163 | 2207 | /* |
---|
2164 | 2208 | * We've had everything committed since the last time we were |
---|
2165 | 2209 | * modified so clear this flag in case it was set for whatever |
---|
.. | .. |
---|
2175 | 2219 | * checked called fsync. |
---|
2176 | 2220 | */ |
---|
2177 | 2221 | ret = filemap_check_wb_err(inode->i_mapping, file->f_wb_err); |
---|
2178 | | - up_write(&BTRFS_I(inode)->dio_sem); |
---|
2179 | | - inode_unlock(inode); |
---|
2180 | | - goto out; |
---|
| 2222 | + goto out_release_extents; |
---|
2181 | 2223 | } |
---|
2182 | 2224 | |
---|
2183 | 2225 | /* |
---|
.. | .. |
---|
2187 | 2229 | * here we could get into a situation where we're waiting on IO to |
---|
2188 | 2230 | * happen that is blocked on a transaction trying to commit. With start |
---|
2189 | 2231 | * we inc the extwriter counter, so we wait for all extwriters to exit |
---|
2190 | | - * before we start blocking join'ers. This comment is to keep somebody |
---|
| 2232 | + * before we start blocking joiners. This comment is to keep somebody |
---|
2191 | 2233 | * from thinking they are super smart and changing this to |
---|
2192 | 2234 | * btrfs_join_transaction *cough*Josef*cough*. |
---|
2193 | 2235 | */ |
---|
2194 | 2236 | trans = btrfs_start_transaction(root, 0); |
---|
2195 | 2237 | if (IS_ERR(trans)) { |
---|
2196 | 2238 | ret = PTR_ERR(trans); |
---|
2197 | | - up_write(&BTRFS_I(inode)->dio_sem); |
---|
2198 | | - inode_unlock(inode); |
---|
2199 | | - goto out; |
---|
| 2239 | + goto out_release_extents; |
---|
2200 | 2240 | } |
---|
2201 | | - trans->sync = true; |
---|
2202 | 2241 | |
---|
2203 | | - ret = btrfs_log_dentry_safe(trans, dentry, start, end, &ctx); |
---|
| 2242 | + ret = btrfs_log_dentry_safe(trans, dentry, &ctx); |
---|
| 2243 | + btrfs_release_log_ctx_extents(&ctx); |
---|
2204 | 2244 | if (ret < 0) { |
---|
2205 | 2245 | /* Fallthrough and commit/free transaction. */ |
---|
2206 | 2246 | ret = 1; |
---|
.. | .. |
---|
2219 | 2259 | up_write(&BTRFS_I(inode)->dio_sem); |
---|
2220 | 2260 | inode_unlock(inode); |
---|
2221 | 2261 | |
---|
2222 | | - /* |
---|
2223 | | - * If any of the ordered extents had an error, just return it to user |
---|
2224 | | - * space, so that the application knows some writes didn't succeed and |
---|
2225 | | - * can take proper action (retry for e.g.). Blindly committing the |
---|
2226 | | - * transaction in this case, would fool userspace that everything was |
---|
2227 | | - * successful. And we also want to make sure our log doesn't contain |
---|
2228 | | - * file extent items pointing to extents that weren't fully written to - |
---|
2229 | | - * just like in the non fast fsync path, where we check for the ordered |
---|
2230 | | - * operation's error flag before writing to the log tree and return -EIO |
---|
2231 | | - * if any of them had this flag set (btrfs_wait_ordered_range) - |
---|
2232 | | - * therefore we need to check for errors in the ordered operations, |
---|
2233 | | - * which are indicated by ctx.io_err. |
---|
2234 | | - */ |
---|
2235 | | - if (ctx.io_err) { |
---|
2236 | | - btrfs_end_transaction(trans); |
---|
2237 | | - ret = ctx.io_err; |
---|
2238 | | - goto out; |
---|
2239 | | - } |
---|
2240 | | - |
---|
2241 | 2262 | if (ret != BTRFS_NO_LOG_SYNC) { |
---|
2242 | 2263 | if (!ret) { |
---|
2243 | 2264 | ret = btrfs_sync_log(trans, root, &ctx); |
---|
2244 | 2265 | if (!ret) { |
---|
2245 | 2266 | ret = btrfs_end_transaction(trans); |
---|
| 2267 | + goto out; |
---|
| 2268 | + } |
---|
| 2269 | + } |
---|
| 2270 | + if (!full_sync) { |
---|
| 2271 | + ret = btrfs_wait_ordered_range(inode, start, len); |
---|
| 2272 | + if (ret) { |
---|
| 2273 | + btrfs_end_transaction(trans); |
---|
2246 | 2274 | goto out; |
---|
2247 | 2275 | } |
---|
2248 | 2276 | } |
---|
.. | .. |
---|
2256 | 2284 | if (!ret) |
---|
2257 | 2285 | ret = err; |
---|
2258 | 2286 | return ret > 0 ? -EIO : ret; |
---|
| 2287 | + |
---|
| 2288 | +out_release_extents: |
---|
| 2289 | + btrfs_release_log_ctx_extents(&ctx); |
---|
| 2290 | + up_write(&BTRFS_I(inode)->dio_sem); |
---|
| 2291 | + inode_unlock(inode); |
---|
| 2292 | + goto out; |
---|
2259 | 2293 | } |
---|
2260 | 2294 | |
---|
2261 | 2295 | static const struct vm_operations_struct btrfs_file_vm_ops = { |
---|
.. | .. |
---|
2391 | 2425 | hole_em->block_start = EXTENT_MAP_HOLE; |
---|
2392 | 2426 | hole_em->block_len = 0; |
---|
2393 | 2427 | hole_em->orig_block_len = 0; |
---|
2394 | | - hole_em->bdev = fs_info->fs_devices->latest_bdev; |
---|
2395 | 2428 | hole_em->compress_type = BTRFS_COMPRESS_NONE; |
---|
2396 | 2429 | hole_em->generation = trans->transid; |
---|
2397 | 2430 | |
---|
.. | .. |
---|
2424 | 2457 | |
---|
2425 | 2458 | em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, |
---|
2426 | 2459 | round_down(*start, fs_info->sectorsize), |
---|
2427 | | - round_up(*len, fs_info->sectorsize), 0); |
---|
| 2460 | + round_up(*len, fs_info->sectorsize)); |
---|
2428 | 2461 | if (IS_ERR(em)) |
---|
2429 | 2462 | return PTR_ERR(em); |
---|
2430 | 2463 | |
---|
.. | .. |
---|
2452 | 2485 | |
---|
2453 | 2486 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
---|
2454 | 2487 | cached_state); |
---|
2455 | | - ordered = btrfs_lookup_first_ordered_extent(inode, lockend); |
---|
| 2488 | + ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode), |
---|
| 2489 | + lockend); |
---|
2456 | 2490 | |
---|
2457 | 2491 | /* |
---|
2458 | 2492 | * We need to make sure we have no ordered extents in this range |
---|
.. | .. |
---|
2460 | 2494 | * we need to try again. |
---|
2461 | 2495 | */ |
---|
2462 | 2496 | if ((!ordered || |
---|
2463 | | - (ordered->file_offset + ordered->len <= lockstart || |
---|
| 2497 | + (ordered->file_offset + ordered->num_bytes <= lockstart || |
---|
2464 | 2498 | ordered->file_offset > lockend)) && |
---|
2465 | 2499 | !filemap_range_has_page(inode->i_mapping, |
---|
2466 | 2500 | lockstart, lockend)) { |
---|
.. | .. |
---|
2480 | 2514 | return 0; |
---|
2481 | 2515 | } |
---|
2482 | 2516 | |
---|
2483 | | -static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) |
---|
| 2517 | +static int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans, |
---|
| 2518 | + struct inode *inode, |
---|
| 2519 | + struct btrfs_path *path, |
---|
| 2520 | + struct btrfs_replace_extent_info *extent_info, |
---|
| 2521 | + const u64 replace_len) |
---|
2484 | 2522 | { |
---|
| 2523 | + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
| 2524 | + struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
| 2525 | + struct btrfs_file_extent_item *extent; |
---|
| 2526 | + struct extent_buffer *leaf; |
---|
| 2527 | + struct btrfs_key key; |
---|
| 2528 | + int slot; |
---|
| 2529 | + struct btrfs_ref ref = { 0 }; |
---|
| 2530 | + int ret; |
---|
| 2531 | + |
---|
| 2532 | + if (replace_len == 0) |
---|
| 2533 | + return 0; |
---|
| 2534 | + |
---|
| 2535 | + if (extent_info->disk_offset == 0 && |
---|
| 2536 | + btrfs_fs_incompat(fs_info, NO_HOLES)) |
---|
| 2537 | + return 0; |
---|
| 2538 | + |
---|
| 2539 | + key.objectid = btrfs_ino(BTRFS_I(inode)); |
---|
| 2540 | + key.type = BTRFS_EXTENT_DATA_KEY; |
---|
| 2541 | + key.offset = extent_info->file_offset; |
---|
| 2542 | + ret = btrfs_insert_empty_item(trans, root, path, &key, |
---|
| 2543 | + sizeof(struct btrfs_file_extent_item)); |
---|
| 2544 | + if (ret) |
---|
| 2545 | + return ret; |
---|
| 2546 | + leaf = path->nodes[0]; |
---|
| 2547 | + slot = path->slots[0]; |
---|
| 2548 | + write_extent_buffer(leaf, extent_info->extent_buf, |
---|
| 2549 | + btrfs_item_ptr_offset(leaf, slot), |
---|
| 2550 | + sizeof(struct btrfs_file_extent_item)); |
---|
| 2551 | + extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); |
---|
| 2552 | + ASSERT(btrfs_file_extent_type(leaf, extent) != BTRFS_FILE_EXTENT_INLINE); |
---|
| 2553 | + btrfs_set_file_extent_offset(leaf, extent, extent_info->data_offset); |
---|
| 2554 | + btrfs_set_file_extent_num_bytes(leaf, extent, replace_len); |
---|
| 2555 | + if (extent_info->is_new_extent) |
---|
| 2556 | + btrfs_set_file_extent_generation(leaf, extent, trans->transid); |
---|
| 2557 | + btrfs_mark_buffer_dirty(leaf); |
---|
| 2558 | + btrfs_release_path(path); |
---|
| 2559 | + |
---|
| 2560 | + ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), |
---|
| 2561 | + extent_info->file_offset, replace_len); |
---|
| 2562 | + if (ret) |
---|
| 2563 | + return ret; |
---|
| 2564 | + |
---|
| 2565 | + /* If it's a hole, nothing more needs to be done. */ |
---|
| 2566 | + if (extent_info->disk_offset == 0) |
---|
| 2567 | + return 0; |
---|
| 2568 | + |
---|
| 2569 | + inode_add_bytes(inode, replace_len); |
---|
| 2570 | + |
---|
| 2571 | + if (extent_info->is_new_extent && extent_info->insertions == 0) { |
---|
| 2572 | + key.objectid = extent_info->disk_offset; |
---|
| 2573 | + key.type = BTRFS_EXTENT_ITEM_KEY; |
---|
| 2574 | + key.offset = extent_info->disk_len; |
---|
| 2575 | + ret = btrfs_alloc_reserved_file_extent(trans, root, |
---|
| 2576 | + btrfs_ino(BTRFS_I(inode)), |
---|
| 2577 | + extent_info->file_offset, |
---|
| 2578 | + extent_info->qgroup_reserved, |
---|
| 2579 | + &key); |
---|
| 2580 | + } else { |
---|
| 2581 | + u64 ref_offset; |
---|
| 2582 | + |
---|
| 2583 | + btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, |
---|
| 2584 | + extent_info->disk_offset, |
---|
| 2585 | + extent_info->disk_len, 0); |
---|
| 2586 | + ref_offset = extent_info->file_offset - extent_info->data_offset; |
---|
| 2587 | + btrfs_init_data_ref(&ref, root->root_key.objectid, |
---|
| 2588 | + btrfs_ino(BTRFS_I(inode)), ref_offset); |
---|
| 2589 | + ret = btrfs_inc_extent_ref(trans, &ref); |
---|
| 2590 | + } |
---|
| 2591 | + |
---|
| 2592 | + extent_info->insertions++; |
---|
| 2593 | + |
---|
| 2594 | + return ret; |
---|
| 2595 | +} |
---|
| 2596 | + |
---|
| 2597 | +/* |
---|
| 2598 | + * The respective range must have been previously locked, as well as the inode. |
---|
| 2599 | + * The end offset is inclusive (last byte of the range). |
---|
| 2600 | + * @extent_info is NULL for fallocate's hole punching and non-NULL when replacing |
---|
| 2601 | + * the file range with an extent. |
---|
| 2602 | + * When not punching a hole, we don't want to end up in a state where we dropped |
---|
| 2603 | + * extents without inserting a new one, so we must abort the transaction to avoid |
---|
| 2604 | + * a corruption. |
---|
| 2605 | + */ |
---|
| 2606 | +int btrfs_replace_file_extents(struct inode *inode, struct btrfs_path *path, |
---|
| 2607 | + const u64 start, const u64 end, |
---|
| 2608 | + struct btrfs_replace_extent_info *extent_info, |
---|
| 2609 | + struct btrfs_trans_handle **trans_out) |
---|
| 2610 | +{ |
---|
| 2611 | + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
| 2612 | + u64 min_size = btrfs_calc_insert_metadata_size(fs_info, 1); |
---|
| 2613 | + u64 ino_size = round_up(inode->i_size, fs_info->sectorsize); |
---|
| 2614 | + struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
| 2615 | + struct btrfs_trans_handle *trans = NULL; |
---|
| 2616 | + struct btrfs_block_rsv *rsv; |
---|
| 2617 | + unsigned int rsv_count; |
---|
| 2618 | + u64 cur_offset; |
---|
| 2619 | + u64 drop_end; |
---|
| 2620 | + u64 len = end - start; |
---|
| 2621 | + int ret = 0; |
---|
| 2622 | + |
---|
| 2623 | + if (end <= start) |
---|
| 2624 | + return -EINVAL; |
---|
| 2625 | + |
---|
| 2626 | + rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP); |
---|
| 2627 | + if (!rsv) { |
---|
| 2628 | + ret = -ENOMEM; |
---|
| 2629 | + goto out; |
---|
| 2630 | + } |
---|
| 2631 | + rsv->size = btrfs_calc_insert_metadata_size(fs_info, 1); |
---|
| 2632 | + rsv->failfast = 1; |
---|
| 2633 | + |
---|
| 2634 | + /* |
---|
| 2635 | + * 1 - update the inode |
---|
| 2636 | + * 1 - removing the extents in the range |
---|
| 2637 | + * 1 - adding the hole extent if no_holes isn't set or if we are |
---|
| 2638 | + * replacing the range with a new extent |
---|
| 2639 | + */ |
---|
| 2640 | + if (!btrfs_fs_incompat(fs_info, NO_HOLES) || extent_info) |
---|
| 2641 | + rsv_count = 3; |
---|
| 2642 | + else |
---|
| 2643 | + rsv_count = 2; |
---|
| 2644 | + |
---|
| 2645 | + trans = btrfs_start_transaction(root, rsv_count); |
---|
| 2646 | + if (IS_ERR(trans)) { |
---|
| 2647 | + ret = PTR_ERR(trans); |
---|
| 2648 | + trans = NULL; |
---|
| 2649 | + goto out_free; |
---|
| 2650 | + } |
---|
| 2651 | + |
---|
| 2652 | + ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv, |
---|
| 2653 | + min_size, false); |
---|
| 2654 | + BUG_ON(ret); |
---|
| 2655 | + trans->block_rsv = rsv; |
---|
| 2656 | + |
---|
| 2657 | + cur_offset = start; |
---|
| 2658 | + while (cur_offset < end) { |
---|
| 2659 | + ret = __btrfs_drop_extents(trans, root, BTRFS_I(inode), path, |
---|
| 2660 | + cur_offset, end + 1, &drop_end, |
---|
| 2661 | + 1, 0, 0, NULL); |
---|
| 2662 | + if (ret != -ENOSPC) { |
---|
| 2663 | + /* |
---|
| 2664 | + * The only time we don't want to abort is if we are |
---|
| 2665 | + * attempting to clone a partial inline extent, in which |
---|
| 2666 | + * case we'll get EOPNOTSUPP. However if we aren't |
---|
| 2667 | + * clone we need to abort no matter what, because if we |
---|
| 2668 | + * got EOPNOTSUPP via prealloc then we messed up and |
---|
| 2669 | + * need to abort. |
---|
| 2670 | + */ |
---|
| 2671 | + if (ret && |
---|
| 2672 | + (ret != -EOPNOTSUPP || |
---|
| 2673 | + (extent_info && extent_info->is_new_extent))) |
---|
| 2674 | + btrfs_abort_transaction(trans, ret); |
---|
| 2675 | + break; |
---|
| 2676 | + } |
---|
| 2677 | + |
---|
| 2678 | + trans->block_rsv = &fs_info->trans_block_rsv; |
---|
| 2679 | + |
---|
| 2680 | + if (!extent_info && cur_offset < drop_end && |
---|
| 2681 | + cur_offset < ino_size) { |
---|
| 2682 | + ret = fill_holes(trans, BTRFS_I(inode), path, |
---|
| 2683 | + cur_offset, drop_end); |
---|
| 2684 | + if (ret) { |
---|
| 2685 | + /* |
---|
| 2686 | + * If we failed then we didn't insert our hole |
---|
| 2687 | + * entries for the area we dropped, so now the |
---|
| 2688 | + * fs is corrupted, so we must abort the |
---|
| 2689 | + * transaction. |
---|
| 2690 | + */ |
---|
| 2691 | + btrfs_abort_transaction(trans, ret); |
---|
| 2692 | + break; |
---|
| 2693 | + } |
---|
| 2694 | + } else if (!extent_info && cur_offset < drop_end) { |
---|
| 2695 | + /* |
---|
| 2696 | + * We are past the i_size here, but since we didn't |
---|
| 2697 | + * insert holes we need to clear the mapped area so we |
---|
| 2698 | + * know to not set disk_i_size in this area until a new |
---|
| 2699 | + * file extent is inserted here. |
---|
| 2700 | + */ |
---|
| 2701 | + ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode), |
---|
| 2702 | + cur_offset, drop_end - cur_offset); |
---|
| 2703 | + if (ret) { |
---|
| 2704 | + /* |
---|
| 2705 | + * We couldn't clear our area, so we could |
---|
| 2706 | + * presumably adjust up and corrupt the fs, so |
---|
| 2707 | + * we need to abort. |
---|
| 2708 | + */ |
---|
| 2709 | + btrfs_abort_transaction(trans, ret); |
---|
| 2710 | + break; |
---|
| 2711 | + } |
---|
| 2712 | + } |
---|
| 2713 | + |
---|
| 2714 | + if (extent_info && drop_end > extent_info->file_offset) { |
---|
| 2715 | + u64 replace_len = drop_end - extent_info->file_offset; |
---|
| 2716 | + |
---|
| 2717 | + ret = btrfs_insert_replace_extent(trans, inode, path, |
---|
| 2718 | + extent_info, replace_len); |
---|
| 2719 | + if (ret) { |
---|
| 2720 | + btrfs_abort_transaction(trans, ret); |
---|
| 2721 | + break; |
---|
| 2722 | + } |
---|
| 2723 | + extent_info->data_len -= replace_len; |
---|
| 2724 | + extent_info->data_offset += replace_len; |
---|
| 2725 | + extent_info->file_offset += replace_len; |
---|
| 2726 | + } |
---|
| 2727 | + |
---|
| 2728 | + cur_offset = drop_end; |
---|
| 2729 | + |
---|
| 2730 | + ret = btrfs_update_inode(trans, root, inode); |
---|
| 2731 | + if (ret) |
---|
| 2732 | + break; |
---|
| 2733 | + |
---|
| 2734 | + btrfs_end_transaction(trans); |
---|
| 2735 | + btrfs_btree_balance_dirty(fs_info); |
---|
| 2736 | + |
---|
| 2737 | + trans = btrfs_start_transaction(root, rsv_count); |
---|
| 2738 | + if (IS_ERR(trans)) { |
---|
| 2739 | + ret = PTR_ERR(trans); |
---|
| 2740 | + trans = NULL; |
---|
| 2741 | + break; |
---|
| 2742 | + } |
---|
| 2743 | + |
---|
| 2744 | + ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, |
---|
| 2745 | + rsv, min_size, false); |
---|
| 2746 | + BUG_ON(ret); /* shouldn't happen */ |
---|
| 2747 | + trans->block_rsv = rsv; |
---|
| 2748 | + |
---|
| 2749 | + if (!extent_info) { |
---|
| 2750 | + ret = find_first_non_hole(inode, &cur_offset, &len); |
---|
| 2751 | + if (unlikely(ret < 0)) |
---|
| 2752 | + break; |
---|
| 2753 | + if (ret && !len) { |
---|
| 2754 | + ret = 0; |
---|
| 2755 | + break; |
---|
| 2756 | + } |
---|
| 2757 | + } |
---|
| 2758 | + } |
---|
| 2759 | + |
---|
| 2760 | + /* |
---|
| 2761 | + * If we were cloning, force the next fsync to be a full one since we |
---|
| 2762 | + * we replaced (or just dropped in the case of cloning holes when |
---|
| 2763 | + * NO_HOLES is enabled) extents and extent maps. |
---|
| 2764 | + * This is for the sake of simplicity, and cloning into files larger |
---|
| 2765 | + * than 16Mb would force the full fsync any way (when |
---|
| 2766 | + * try_release_extent_mapping() is invoked during page cache truncation. |
---|
| 2767 | + */ |
---|
| 2768 | + if (extent_info && !extent_info->is_new_extent) |
---|
| 2769 | + set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
---|
| 2770 | + &BTRFS_I(inode)->runtime_flags); |
---|
| 2771 | + |
---|
| 2772 | + if (ret) |
---|
| 2773 | + goto out_trans; |
---|
| 2774 | + |
---|
| 2775 | + trans->block_rsv = &fs_info->trans_block_rsv; |
---|
| 2776 | + /* |
---|
| 2777 | + * If we are using the NO_HOLES feature we might have had already an |
---|
| 2778 | + * hole that overlaps a part of the region [lockstart, lockend] and |
---|
| 2779 | + * ends at (or beyond) lockend. Since we have no file extent items to |
---|
| 2780 | + * represent holes, drop_end can be less than lockend and so we must |
---|
| 2781 | + * make sure we have an extent map representing the existing hole (the |
---|
| 2782 | + * call to __btrfs_drop_extents() might have dropped the existing extent |
---|
| 2783 | + * map representing the existing hole), otherwise the fast fsync path |
---|
| 2784 | + * will not record the existence of the hole region |
---|
| 2785 | + * [existing_hole_start, lockend]. |
---|
| 2786 | + */ |
---|
| 2787 | + if (drop_end <= end) |
---|
| 2788 | + drop_end = end + 1; |
---|
| 2789 | + /* |
---|
| 2790 | + * Don't insert file hole extent item if it's for a range beyond eof |
---|
| 2791 | + * (because it's useless) or if it represents a 0 bytes range (when |
---|
| 2792 | + * cur_offset == drop_end). |
---|
| 2793 | + */ |
---|
| 2794 | + if (!extent_info && cur_offset < ino_size && cur_offset < drop_end) { |
---|
| 2795 | + ret = fill_holes(trans, BTRFS_I(inode), path, |
---|
| 2796 | + cur_offset, drop_end); |
---|
| 2797 | + if (ret) { |
---|
| 2798 | + /* Same comment as above. */ |
---|
| 2799 | + btrfs_abort_transaction(trans, ret); |
---|
| 2800 | + goto out_trans; |
---|
| 2801 | + } |
---|
| 2802 | + } else if (!extent_info && cur_offset < drop_end) { |
---|
| 2803 | + /* See the comment in the loop above for the reasoning here. */ |
---|
| 2804 | + ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode), |
---|
| 2805 | + cur_offset, drop_end - cur_offset); |
---|
| 2806 | + if (ret) { |
---|
| 2807 | + btrfs_abort_transaction(trans, ret); |
---|
| 2808 | + goto out_trans; |
---|
| 2809 | + } |
---|
| 2810 | + |
---|
| 2811 | + } |
---|
| 2812 | + if (extent_info) { |
---|
| 2813 | + ret = btrfs_insert_replace_extent(trans, inode, path, extent_info, |
---|
| 2814 | + extent_info->data_len); |
---|
| 2815 | + if (ret) { |
---|
| 2816 | + btrfs_abort_transaction(trans, ret); |
---|
| 2817 | + goto out_trans; |
---|
| 2818 | + } |
---|
| 2819 | + } |
---|
| 2820 | + |
---|
| 2821 | +out_trans: |
---|
| 2822 | + if (!trans) |
---|
| 2823 | + goto out_free; |
---|
| 2824 | + |
---|
| 2825 | + trans->block_rsv = &fs_info->trans_block_rsv; |
---|
| 2826 | + if (ret) |
---|
| 2827 | + btrfs_end_transaction(trans); |
---|
| 2828 | + else |
---|
| 2829 | + *trans_out = trans; |
---|
| 2830 | +out_free: |
---|
| 2831 | + btrfs_free_block_rsv(fs_info, rsv); |
---|
| 2832 | +out: |
---|
| 2833 | + return ret; |
---|
| 2834 | +} |
---|
| 2835 | + |
---|
| 2836 | +static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len) |
---|
| 2837 | +{ |
---|
| 2838 | + struct inode *inode = file_inode(file); |
---|
2485 | 2839 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
2486 | 2840 | struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
2487 | 2841 | struct extent_state *cached_state = NULL; |
---|
2488 | 2842 | struct btrfs_path *path; |
---|
2489 | | - struct btrfs_block_rsv *rsv; |
---|
2490 | | - struct btrfs_trans_handle *trans; |
---|
| 2843 | + struct btrfs_trans_handle *trans = NULL; |
---|
2491 | 2844 | u64 lockstart; |
---|
2492 | 2845 | u64 lockend; |
---|
2493 | 2846 | u64 tail_start; |
---|
2494 | 2847 | u64 tail_len; |
---|
2495 | 2848 | u64 orig_start = offset; |
---|
2496 | | - u64 cur_offset; |
---|
2497 | | - u64 min_size = btrfs_calc_trans_metadata_size(fs_info, 1); |
---|
2498 | | - u64 drop_end; |
---|
2499 | 2849 | int ret = 0; |
---|
2500 | | - int err = 0; |
---|
2501 | | - unsigned int rsv_count; |
---|
2502 | 2850 | bool same_block; |
---|
2503 | | - bool no_holes = btrfs_fs_incompat(fs_info, NO_HOLES); |
---|
2504 | 2851 | u64 ino_size; |
---|
2505 | 2852 | bool truncated_block = false; |
---|
2506 | 2853 | bool updated_inode = false; |
---|
.. | .. |
---|
2520 | 2867 | goto out_only_mutex; |
---|
2521 | 2868 | } |
---|
2522 | 2869 | |
---|
2523 | | - lockstart = round_up(offset, btrfs_inode_sectorsize(inode)); |
---|
| 2870 | + ret = file_modified(file); |
---|
| 2871 | + if (ret) |
---|
| 2872 | + goto out_only_mutex; |
---|
| 2873 | + |
---|
| 2874 | + lockstart = round_up(offset, btrfs_inode_sectorsize(BTRFS_I(inode))); |
---|
2524 | 2875 | lockend = round_down(offset + len, |
---|
2525 | | - btrfs_inode_sectorsize(inode)) - 1; |
---|
| 2876 | + btrfs_inode_sectorsize(BTRFS_I(inode))) - 1; |
---|
2526 | 2877 | same_block = (BTRFS_BYTES_TO_BLKS(fs_info, offset)) |
---|
2527 | 2878 | == (BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)); |
---|
2528 | 2879 | /* |
---|
.. | .. |
---|
2607 | 2958 | goto out; |
---|
2608 | 2959 | } |
---|
2609 | 2960 | |
---|
2610 | | - rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP); |
---|
2611 | | - if (!rsv) { |
---|
2612 | | - ret = -ENOMEM; |
---|
2613 | | - goto out_free; |
---|
2614 | | - } |
---|
2615 | | - rsv->size = btrfs_calc_trans_metadata_size(fs_info, 1); |
---|
2616 | | - rsv->failfast = 1; |
---|
| 2961 | + ret = btrfs_replace_file_extents(inode, path, lockstart, lockend, NULL, |
---|
| 2962 | + &trans); |
---|
| 2963 | + btrfs_free_path(path); |
---|
| 2964 | + if (ret) |
---|
| 2965 | + goto out; |
---|
2617 | 2966 | |
---|
2618 | | - /* |
---|
2619 | | - * 1 - update the inode |
---|
2620 | | - * 1 - removing the extents in the range |
---|
2621 | | - * 1 - adding the hole extent if no_holes isn't set |
---|
2622 | | - */ |
---|
2623 | | - rsv_count = no_holes ? 2 : 3; |
---|
2624 | | - trans = btrfs_start_transaction(root, rsv_count); |
---|
2625 | | - if (IS_ERR(trans)) { |
---|
2626 | | - err = PTR_ERR(trans); |
---|
2627 | | - goto out_free; |
---|
2628 | | - } |
---|
2629 | | - |
---|
2630 | | - ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv, |
---|
2631 | | - min_size, 0); |
---|
2632 | | - BUG_ON(ret); |
---|
2633 | | - trans->block_rsv = rsv; |
---|
2634 | | - |
---|
2635 | | - cur_offset = lockstart; |
---|
2636 | | - len = lockend - cur_offset; |
---|
2637 | | - while (cur_offset < lockend) { |
---|
2638 | | - ret = __btrfs_drop_extents(trans, root, inode, path, |
---|
2639 | | - cur_offset, lockend + 1, |
---|
2640 | | - &drop_end, 1, 0, 0, NULL); |
---|
2641 | | - if (ret != -ENOSPC) |
---|
2642 | | - break; |
---|
2643 | | - |
---|
2644 | | - trans->block_rsv = &fs_info->trans_block_rsv; |
---|
2645 | | - |
---|
2646 | | - if (cur_offset < drop_end && cur_offset < ino_size) { |
---|
2647 | | - ret = fill_holes(trans, BTRFS_I(inode), path, |
---|
2648 | | - cur_offset, drop_end); |
---|
2649 | | - if (ret) { |
---|
2650 | | - /* |
---|
2651 | | - * If we failed then we didn't insert our hole |
---|
2652 | | - * entries for the area we dropped, so now the |
---|
2653 | | - * fs is corrupted, so we must abort the |
---|
2654 | | - * transaction. |
---|
2655 | | - */ |
---|
2656 | | - btrfs_abort_transaction(trans, ret); |
---|
2657 | | - err = ret; |
---|
2658 | | - break; |
---|
2659 | | - } |
---|
2660 | | - } |
---|
2661 | | - |
---|
2662 | | - cur_offset = drop_end; |
---|
2663 | | - |
---|
2664 | | - ret = btrfs_update_inode(trans, root, inode); |
---|
2665 | | - if (ret) { |
---|
2666 | | - err = ret; |
---|
2667 | | - break; |
---|
2668 | | - } |
---|
2669 | | - |
---|
2670 | | - btrfs_end_transaction(trans); |
---|
2671 | | - btrfs_btree_balance_dirty(fs_info); |
---|
2672 | | - |
---|
2673 | | - trans = btrfs_start_transaction(root, rsv_count); |
---|
2674 | | - if (IS_ERR(trans)) { |
---|
2675 | | - ret = PTR_ERR(trans); |
---|
2676 | | - trans = NULL; |
---|
2677 | | - break; |
---|
2678 | | - } |
---|
2679 | | - |
---|
2680 | | - ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, |
---|
2681 | | - rsv, min_size, 0); |
---|
2682 | | - BUG_ON(ret); /* shouldn't happen */ |
---|
2683 | | - trans->block_rsv = rsv; |
---|
2684 | | - |
---|
2685 | | - ret = find_first_non_hole(inode, &cur_offset, &len); |
---|
2686 | | - if (unlikely(ret < 0)) |
---|
2687 | | - break; |
---|
2688 | | - if (ret && !len) { |
---|
2689 | | - ret = 0; |
---|
2690 | | - break; |
---|
2691 | | - } |
---|
2692 | | - } |
---|
2693 | | - |
---|
2694 | | - if (ret) { |
---|
2695 | | - err = ret; |
---|
2696 | | - goto out_trans; |
---|
2697 | | - } |
---|
2698 | | - |
---|
2699 | | - trans->block_rsv = &fs_info->trans_block_rsv; |
---|
2700 | | - /* |
---|
2701 | | - * If we are using the NO_HOLES feature we might have had already an |
---|
2702 | | - * hole that overlaps a part of the region [lockstart, lockend] and |
---|
2703 | | - * ends at (or beyond) lockend. Since we have no file extent items to |
---|
2704 | | - * represent holes, drop_end can be less than lockend and so we must |
---|
2705 | | - * make sure we have an extent map representing the existing hole (the |
---|
2706 | | - * call to __btrfs_drop_extents() might have dropped the existing extent |
---|
2707 | | - * map representing the existing hole), otherwise the fast fsync path |
---|
2708 | | - * will not record the existence of the hole region |
---|
2709 | | - * [existing_hole_start, lockend]. |
---|
2710 | | - */ |
---|
2711 | | - if (drop_end <= lockend) |
---|
2712 | | - drop_end = lockend + 1; |
---|
2713 | | - /* |
---|
2714 | | - * Don't insert file hole extent item if it's for a range beyond eof |
---|
2715 | | - * (because it's useless) or if it represents a 0 bytes range (when |
---|
2716 | | - * cur_offset == drop_end). |
---|
2717 | | - */ |
---|
2718 | | - if (cur_offset < ino_size && cur_offset < drop_end) { |
---|
2719 | | - ret = fill_holes(trans, BTRFS_I(inode), path, |
---|
2720 | | - cur_offset, drop_end); |
---|
2721 | | - if (ret) { |
---|
2722 | | - /* Same comment as above. */ |
---|
2723 | | - btrfs_abort_transaction(trans, ret); |
---|
2724 | | - err = ret; |
---|
2725 | | - goto out_trans; |
---|
2726 | | - } |
---|
2727 | | - } |
---|
2728 | | - |
---|
2729 | | -out_trans: |
---|
2730 | | - if (!trans) |
---|
2731 | | - goto out_free; |
---|
2732 | | - |
---|
| 2967 | + ASSERT(trans != NULL); |
---|
2733 | 2968 | inode_inc_iversion(inode); |
---|
2734 | 2969 | inode->i_mtime = inode->i_ctime = current_time(inode); |
---|
2735 | | - |
---|
2736 | | - trans->block_rsv = &fs_info->trans_block_rsv; |
---|
2737 | 2970 | ret = btrfs_update_inode(trans, root, inode); |
---|
2738 | 2971 | updated_inode = true; |
---|
2739 | 2972 | btrfs_end_transaction(trans); |
---|
2740 | 2973 | btrfs_btree_balance_dirty(fs_info); |
---|
2741 | | -out_free: |
---|
2742 | | - btrfs_free_path(path); |
---|
2743 | | - btrfs_free_block_rsv(fs_info, rsv); |
---|
2744 | 2974 | out: |
---|
2745 | 2975 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
---|
2746 | 2976 | &cached_state); |
---|
2747 | 2977 | out_only_mutex: |
---|
2748 | | - if (!updated_inode && truncated_block && !ret && !err) { |
---|
| 2978 | + if (!updated_inode && truncated_block && !ret) { |
---|
2749 | 2979 | /* |
---|
2750 | 2980 | * If we only end up zeroing part of a page, we still need to |
---|
2751 | 2981 | * update the inode item, so that all the time fields are |
---|
.. | .. |
---|
2760 | 2990 | inode->i_ctime = now; |
---|
2761 | 2991 | trans = btrfs_start_transaction(root, 1); |
---|
2762 | 2992 | if (IS_ERR(trans)) { |
---|
2763 | | - err = PTR_ERR(trans); |
---|
| 2993 | + ret = PTR_ERR(trans); |
---|
2764 | 2994 | } else { |
---|
2765 | | - err = btrfs_update_inode(trans, root, inode); |
---|
2766 | | - ret = btrfs_end_transaction(trans); |
---|
| 2995 | + int ret2; |
---|
| 2996 | + |
---|
| 2997 | + ret = btrfs_update_inode(trans, root, inode); |
---|
| 2998 | + ret2 = btrfs_end_transaction(trans); |
---|
| 2999 | + if (!ret) |
---|
| 3000 | + ret = ret2; |
---|
2767 | 3001 | } |
---|
2768 | 3002 | } |
---|
2769 | 3003 | inode_unlock(inode); |
---|
2770 | | - if (ret && !err) |
---|
2771 | | - err = ret; |
---|
2772 | | - return err; |
---|
| 3004 | + return ret; |
---|
2773 | 3005 | } |
---|
2774 | 3006 | |
---|
2775 | 3007 | /* Helper structure to record which range is already reserved */ |
---|
.. | .. |
---|
2830 | 3062 | |
---|
2831 | 3063 | inode->i_ctime = current_time(inode); |
---|
2832 | 3064 | i_size_write(inode, end); |
---|
2833 | | - btrfs_ordered_update_i_size(inode, end, NULL); |
---|
| 3065 | + btrfs_inode_safe_disk_i_size_write(inode, 0); |
---|
2834 | 3066 | ret = btrfs_update_inode(trans, root, inode); |
---|
2835 | 3067 | ret2 = btrfs_end_transaction(trans); |
---|
2836 | 3068 | |
---|
.. | .. |
---|
2838 | 3070 | } |
---|
2839 | 3071 | |
---|
2840 | 3072 | enum { |
---|
2841 | | - RANGE_BOUNDARY_WRITTEN_EXTENT = 0, |
---|
2842 | | - RANGE_BOUNDARY_PREALLOC_EXTENT = 1, |
---|
2843 | | - RANGE_BOUNDARY_HOLE = 2, |
---|
| 3073 | + RANGE_BOUNDARY_WRITTEN_EXTENT, |
---|
| 3074 | + RANGE_BOUNDARY_PREALLOC_EXTENT, |
---|
| 3075 | + RANGE_BOUNDARY_HOLE, |
---|
2844 | 3076 | }; |
---|
2845 | 3077 | |
---|
2846 | | -static int btrfs_zero_range_check_range_boundary(struct inode *inode, |
---|
| 3078 | +static int btrfs_zero_range_check_range_boundary(struct btrfs_inode *inode, |
---|
2847 | 3079 | u64 offset) |
---|
2848 | 3080 | { |
---|
2849 | 3081 | const u64 sectorsize = btrfs_inode_sectorsize(inode); |
---|
.. | .. |
---|
2851 | 3083 | int ret; |
---|
2852 | 3084 | |
---|
2853 | 3085 | offset = round_down(offset, sectorsize); |
---|
2854 | | - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); |
---|
| 3086 | + em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize); |
---|
2855 | 3087 | if (IS_ERR(em)) |
---|
2856 | 3088 | return PTR_ERR(em); |
---|
2857 | 3089 | |
---|
.. | .. |
---|
2876 | 3108 | struct extent_changeset *data_reserved = NULL; |
---|
2877 | 3109 | int ret; |
---|
2878 | 3110 | u64 alloc_hint = 0; |
---|
2879 | | - const u64 sectorsize = btrfs_inode_sectorsize(inode); |
---|
| 3111 | + const u64 sectorsize = btrfs_inode_sectorsize(BTRFS_I(inode)); |
---|
2880 | 3112 | u64 alloc_start = round_down(offset, sectorsize); |
---|
2881 | 3113 | u64 alloc_end = round_up(offset + len, sectorsize); |
---|
2882 | 3114 | u64 bytes_to_reserve = 0; |
---|
.. | .. |
---|
2884 | 3116 | |
---|
2885 | 3117 | inode_dio_wait(inode); |
---|
2886 | 3118 | |
---|
2887 | | - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, |
---|
2888 | | - alloc_start, alloc_end - alloc_start, 0); |
---|
| 3119 | + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start, |
---|
| 3120 | + alloc_end - alloc_start); |
---|
2889 | 3121 | if (IS_ERR(em)) { |
---|
2890 | 3122 | ret = PTR_ERR(em); |
---|
2891 | 3123 | goto out; |
---|
.. | .. |
---|
2928 | 3160 | |
---|
2929 | 3161 | if (BTRFS_BYTES_TO_BLKS(fs_info, offset) == |
---|
2930 | 3162 | BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) { |
---|
2931 | | - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, |
---|
2932 | | - alloc_start, sectorsize, 0); |
---|
| 3163 | + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start, |
---|
| 3164 | + sectorsize); |
---|
2933 | 3165 | if (IS_ERR(em)) { |
---|
2934 | 3166 | ret = PTR_ERR(em); |
---|
2935 | 3167 | goto out; |
---|
.. | .. |
---|
2966 | 3198 | * to cover them. |
---|
2967 | 3199 | */ |
---|
2968 | 3200 | if (!IS_ALIGNED(offset, sectorsize)) { |
---|
2969 | | - ret = btrfs_zero_range_check_range_boundary(inode, offset); |
---|
| 3201 | + ret = btrfs_zero_range_check_range_boundary(BTRFS_I(inode), |
---|
| 3202 | + offset); |
---|
2970 | 3203 | if (ret < 0) |
---|
2971 | 3204 | goto out; |
---|
2972 | 3205 | if (ret == RANGE_BOUNDARY_HOLE) { |
---|
.. | .. |
---|
2982 | 3215 | } |
---|
2983 | 3216 | |
---|
2984 | 3217 | if (!IS_ALIGNED(offset + len, sectorsize)) { |
---|
2985 | | - ret = btrfs_zero_range_check_range_boundary(inode, |
---|
| 3218 | + ret = btrfs_zero_range_check_range_boundary(BTRFS_I(inode), |
---|
2986 | 3219 | offset + len); |
---|
2987 | 3220 | if (ret < 0) |
---|
2988 | 3221 | goto out; |
---|
.. | .. |
---|
3014 | 3247 | &cached_state); |
---|
3015 | 3248 | if (ret) |
---|
3016 | 3249 | goto out; |
---|
3017 | | - ret = btrfs_qgroup_reserve_data(inode, &data_reserved, |
---|
| 3250 | + ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved, |
---|
3018 | 3251 | alloc_start, bytes_to_reserve); |
---|
3019 | 3252 | if (ret) { |
---|
3020 | 3253 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, |
---|
.. | .. |
---|
3036 | 3269 | ret = btrfs_fallocate_update_isize(inode, offset + len, mode); |
---|
3037 | 3270 | out: |
---|
3038 | 3271 | if (ret && space_reserved) |
---|
3039 | | - btrfs_free_reserved_data_space(inode, data_reserved, |
---|
| 3272 | + btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved, |
---|
3040 | 3273 | alloc_start, bytes_to_reserve); |
---|
3041 | 3274 | extent_changeset_free(data_reserved); |
---|
3042 | 3275 | |
---|
.. | .. |
---|
3060 | 3293 | u64 locked_end; |
---|
3061 | 3294 | u64 actual_end = 0; |
---|
3062 | 3295 | struct extent_map *em; |
---|
3063 | | - int blocksize = btrfs_inode_sectorsize(inode); |
---|
| 3296 | + int blocksize = btrfs_inode_sectorsize(BTRFS_I(inode)); |
---|
3064 | 3297 | int ret; |
---|
3065 | 3298 | |
---|
3066 | 3299 | alloc_start = round_down(offset, blocksize); |
---|
.. | .. |
---|
3073 | 3306 | return -EOPNOTSUPP; |
---|
3074 | 3307 | |
---|
3075 | 3308 | if (mode & FALLOC_FL_PUNCH_HOLE) |
---|
3076 | | - return btrfs_punch_hole(inode, offset, len); |
---|
| 3309 | + return btrfs_punch_hole(file, offset, len); |
---|
3077 | 3310 | |
---|
3078 | 3311 | /* |
---|
3079 | 3312 | * Only trigger disk allocation, don't trigger qgroup reserve |
---|
.. | .. |
---|
3094 | 3327 | if (ret) |
---|
3095 | 3328 | goto out; |
---|
3096 | 3329 | } |
---|
| 3330 | + |
---|
| 3331 | + ret = file_modified(file); |
---|
| 3332 | + if (ret) |
---|
| 3333 | + goto out; |
---|
3097 | 3334 | |
---|
3098 | 3335 | /* |
---|
3099 | 3336 | * TODO: Move these two operations after we have checked |
---|
.. | .. |
---|
3142 | 3379 | */ |
---|
3143 | 3380 | lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, |
---|
3144 | 3381 | locked_end, &cached_state); |
---|
3145 | | - ordered = btrfs_lookup_first_ordered_extent(inode, locked_end); |
---|
| 3382 | + ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode), |
---|
| 3383 | + locked_end); |
---|
3146 | 3384 | |
---|
3147 | 3385 | if (ordered && |
---|
3148 | | - ordered->file_offset + ordered->len > alloc_start && |
---|
| 3386 | + ordered->file_offset + ordered->num_bytes > alloc_start && |
---|
3149 | 3387 | ordered->file_offset < alloc_end) { |
---|
3150 | 3388 | btrfs_put_ordered_extent(ordered); |
---|
3151 | 3389 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
---|
.. | .. |
---|
3170 | 3408 | INIT_LIST_HEAD(&reserve_list); |
---|
3171 | 3409 | while (cur_offset < alloc_end) { |
---|
3172 | 3410 | em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset, |
---|
3173 | | - alloc_end - cur_offset, 0); |
---|
| 3411 | + alloc_end - cur_offset); |
---|
3174 | 3412 | if (IS_ERR(em)) { |
---|
3175 | 3413 | ret = PTR_ERR(em); |
---|
3176 | 3414 | break; |
---|
.. | .. |
---|
3187 | 3425 | free_extent_map(em); |
---|
3188 | 3426 | break; |
---|
3189 | 3427 | } |
---|
3190 | | - ret = btrfs_qgroup_reserve_data(inode, &data_reserved, |
---|
3191 | | - cur_offset, last_byte - cur_offset); |
---|
| 3428 | + ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), |
---|
| 3429 | + &data_reserved, cur_offset, |
---|
| 3430 | + last_byte - cur_offset); |
---|
3192 | 3431 | if (ret < 0) { |
---|
3193 | 3432 | cur_offset = last_byte; |
---|
3194 | 3433 | free_extent_map(em); |
---|
.. | .. |
---|
3200 | 3439 | * range, free reserved data space first, otherwise |
---|
3201 | 3440 | * it'll result in false ENOSPC error. |
---|
3202 | 3441 | */ |
---|
3203 | | - btrfs_free_reserved_data_space(inode, data_reserved, |
---|
3204 | | - cur_offset, last_byte - cur_offset); |
---|
| 3442 | + btrfs_free_reserved_data_space(BTRFS_I(inode), |
---|
| 3443 | + data_reserved, cur_offset, |
---|
| 3444 | + last_byte - cur_offset); |
---|
3205 | 3445 | } |
---|
3206 | 3446 | free_extent_map(em); |
---|
3207 | 3447 | cur_offset = last_byte; |
---|
.. | .. |
---|
3218 | 3458 | range->len, i_blocksize(inode), |
---|
3219 | 3459 | offset + len, &alloc_hint); |
---|
3220 | 3460 | else |
---|
3221 | | - btrfs_free_reserved_data_space(inode, |
---|
| 3461 | + btrfs_free_reserved_data_space(BTRFS_I(inode), |
---|
3222 | 3462 | data_reserved, range->start, |
---|
3223 | 3463 | range->len); |
---|
3224 | 3464 | list_del(&range->list); |
---|
.. | .. |
---|
3239 | 3479 | inode_unlock(inode); |
---|
3240 | 3480 | /* Let go of our reservation. */ |
---|
3241 | 3481 | if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE)) |
---|
3242 | | - btrfs_free_reserved_data_space(inode, data_reserved, |
---|
| 3482 | + btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved, |
---|
3243 | 3483 | cur_offset, alloc_end - cur_offset); |
---|
3244 | 3484 | extent_changeset_free(data_reserved); |
---|
3245 | 3485 | return ret; |
---|
3246 | 3486 | } |
---|
3247 | 3487 | |
---|
3248 | | -static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) |
---|
| 3488 | +static loff_t find_desired_extent(struct inode *inode, loff_t offset, |
---|
| 3489 | + int whence) |
---|
3249 | 3490 | { |
---|
3250 | 3491 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
---|
3251 | 3492 | struct extent_map *em = NULL; |
---|
3252 | 3493 | struct extent_state *cached_state = NULL; |
---|
| 3494 | + loff_t i_size = inode->i_size; |
---|
3253 | 3495 | u64 lockstart; |
---|
3254 | 3496 | u64 lockend; |
---|
3255 | 3497 | u64 start; |
---|
3256 | 3498 | u64 len; |
---|
3257 | 3499 | int ret = 0; |
---|
3258 | 3500 | |
---|
3259 | | - if (inode->i_size == 0) |
---|
| 3501 | + if (i_size == 0 || offset >= i_size) |
---|
3260 | 3502 | return -ENXIO; |
---|
3261 | 3503 | |
---|
3262 | 3504 | /* |
---|
3263 | | - * *offset can be negative, in this case we start finding DATA/HOLE from |
---|
| 3505 | + * offset can be negative, in this case we start finding DATA/HOLE from |
---|
3264 | 3506 | * the very start of the file. |
---|
3265 | 3507 | */ |
---|
3266 | | - start = max_t(loff_t, 0, *offset); |
---|
| 3508 | + start = max_t(loff_t, 0, offset); |
---|
3267 | 3509 | |
---|
3268 | 3510 | lockstart = round_down(start, fs_info->sectorsize); |
---|
3269 | | - lockend = round_up(i_size_read(inode), |
---|
3270 | | - fs_info->sectorsize); |
---|
| 3511 | + lockend = round_up(i_size, fs_info->sectorsize); |
---|
3271 | 3512 | if (lockend <= lockstart) |
---|
3272 | 3513 | lockend = lockstart + fs_info->sectorsize; |
---|
3273 | 3514 | lockend--; |
---|
.. | .. |
---|
3276 | 3517 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
---|
3277 | 3518 | &cached_state); |
---|
3278 | 3519 | |
---|
3279 | | - while (start < inode->i_size) { |
---|
3280 | | - em = btrfs_get_extent_fiemap(BTRFS_I(inode), NULL, 0, |
---|
3281 | | - start, len, 0); |
---|
| 3520 | + while (start < i_size) { |
---|
| 3521 | + em = btrfs_get_extent_fiemap(BTRFS_I(inode), start, len); |
---|
3282 | 3522 | if (IS_ERR(em)) { |
---|
3283 | 3523 | ret = PTR_ERR(em); |
---|
3284 | 3524 | em = NULL; |
---|
.. | .. |
---|
3300 | 3540 | cond_resched(); |
---|
3301 | 3541 | } |
---|
3302 | 3542 | free_extent_map(em); |
---|
3303 | | - if (!ret) { |
---|
3304 | | - if (whence == SEEK_DATA && start >= inode->i_size) |
---|
3305 | | - ret = -ENXIO; |
---|
3306 | | - else |
---|
3307 | | - *offset = min_t(loff_t, start, inode->i_size); |
---|
3308 | | - } |
---|
3309 | 3543 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
---|
3310 | 3544 | &cached_state); |
---|
3311 | | - return ret; |
---|
| 3545 | + if (ret) { |
---|
| 3546 | + offset = ret; |
---|
| 3547 | + } else { |
---|
| 3548 | + if (whence == SEEK_DATA && start >= i_size) |
---|
| 3549 | + offset = -ENXIO; |
---|
| 3550 | + else |
---|
| 3551 | + offset = min_t(loff_t, start, i_size); |
---|
| 3552 | + } |
---|
| 3553 | + |
---|
| 3554 | + return offset; |
---|
3312 | 3555 | } |
---|
3313 | 3556 | |
---|
3314 | 3557 | static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence) |
---|
3315 | 3558 | { |
---|
3316 | 3559 | struct inode *inode = file->f_mapping->host; |
---|
3317 | | - int ret; |
---|
3318 | 3560 | |
---|
3319 | | - inode_lock(inode); |
---|
3320 | 3561 | switch (whence) { |
---|
3321 | | - case SEEK_END: |
---|
3322 | | - case SEEK_CUR: |
---|
3323 | | - offset = generic_file_llseek(file, offset, whence); |
---|
3324 | | - goto out; |
---|
| 3562 | + default: |
---|
| 3563 | + return generic_file_llseek(file, offset, whence); |
---|
3325 | 3564 | case SEEK_DATA: |
---|
3326 | 3565 | case SEEK_HOLE: |
---|
3327 | | - if (offset >= i_size_read(inode)) { |
---|
3328 | | - inode_unlock(inode); |
---|
3329 | | - return -ENXIO; |
---|
3330 | | - } |
---|
3331 | | - |
---|
3332 | | - ret = find_desired_extent(inode, &offset, whence); |
---|
3333 | | - if (ret) { |
---|
3334 | | - inode_unlock(inode); |
---|
3335 | | - return ret; |
---|
3336 | | - } |
---|
| 3566 | + inode_lock_shared(inode); |
---|
| 3567 | + offset = find_desired_extent(inode, offset, whence); |
---|
| 3568 | + inode_unlock_shared(inode); |
---|
| 3569 | + break; |
---|
3337 | 3570 | } |
---|
3338 | 3571 | |
---|
3339 | | - offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); |
---|
3340 | | -out: |
---|
3341 | | - inode_unlock(inode); |
---|
3342 | | - return offset; |
---|
| 3572 | + if (offset < 0) |
---|
| 3573 | + return offset; |
---|
| 3574 | + |
---|
| 3575 | + return vfs_setpos(file, offset, inode->i_sb->s_maxbytes); |
---|
3343 | 3576 | } |
---|
3344 | 3577 | |
---|
3345 | 3578 | static int btrfs_file_open(struct inode *inode, struct file *filp) |
---|
3346 | 3579 | { |
---|
3347 | | - filp->f_mode |= FMODE_NOWAIT; |
---|
| 3580 | + filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; |
---|
3348 | 3581 | return generic_file_open(inode, filp); |
---|
| 3582 | +} |
---|
| 3583 | + |
---|
| 3584 | +static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) |
---|
| 3585 | +{ |
---|
| 3586 | + ssize_t ret = 0; |
---|
| 3587 | + |
---|
| 3588 | + if (iocb->ki_flags & IOCB_DIRECT) { |
---|
| 3589 | + struct inode *inode = file_inode(iocb->ki_filp); |
---|
| 3590 | + |
---|
| 3591 | + inode_lock_shared(inode); |
---|
| 3592 | + ret = btrfs_direct_IO(iocb, to); |
---|
| 3593 | + inode_unlock_shared(inode); |
---|
| 3594 | + if (ret < 0 || !iov_iter_count(to) || |
---|
| 3595 | + iocb->ki_pos >= i_size_read(file_inode(iocb->ki_filp))) |
---|
| 3596 | + return ret; |
---|
| 3597 | + } |
---|
| 3598 | + |
---|
| 3599 | + return generic_file_buffered_read(iocb, to, ret); |
---|
3349 | 3600 | } |
---|
3350 | 3601 | |
---|
3351 | 3602 | const struct file_operations btrfs_file_operations = { |
---|
3352 | 3603 | .llseek = btrfs_file_llseek, |
---|
3353 | | - .read_iter = generic_file_read_iter, |
---|
| 3604 | + .read_iter = btrfs_file_read_iter, |
---|
3354 | 3605 | .splice_read = generic_file_splice_read, |
---|
3355 | 3606 | .write_iter = btrfs_file_write_iter, |
---|
| 3607 | + .splice_write = iter_file_splice_write, |
---|
3356 | 3608 | .mmap = btrfs_file_mmap, |
---|
3357 | 3609 | .open = btrfs_file_open, |
---|
3358 | 3610 | .release = btrfs_release_file, |
---|
.. | .. |
---|
3362 | 3614 | #ifdef CONFIG_COMPAT |
---|
3363 | 3615 | .compat_ioctl = btrfs_compat_ioctl, |
---|
3364 | 3616 | #endif |
---|
3365 | | - .clone_file_range = btrfs_clone_file_range, |
---|
3366 | | - .dedupe_file_range = btrfs_dedupe_file_range, |
---|
| 3617 | + .remap_file_range = btrfs_remap_file_range, |
---|
3367 | 3618 | }; |
---|
3368 | 3619 | |
---|
3369 | 3620 | void __cold btrfs_auto_defrag_exit(void) |
---|