.. | .. |
---|
18 | 18 | #include "extent_io.h" |
---|
19 | 19 | #include "inode-map.h" |
---|
20 | 20 | #include "volumes.h" |
---|
| 21 | +#include "space-info.h" |
---|
| 22 | +#include "delalloc-space.h" |
---|
| 23 | +#include "block-group.h" |
---|
| 24 | +#include "discard.h" |
---|
21 | 25 | |
---|
22 | 26 | #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) |
---|
23 | | -#define MAX_CACHE_BYTES_PER_GIG SZ_32K |
---|
| 27 | +#define MAX_CACHE_BYTES_PER_GIG SZ_64K |
---|
| 28 | +#define FORCE_EXTENT_THRESHOLD SZ_1M |
---|
24 | 29 | |
---|
25 | 30 | struct btrfs_trim_range { |
---|
26 | 31 | u64 start; |
---|
.. | .. |
---|
28 | 33 | struct list_head list; |
---|
29 | 34 | }; |
---|
30 | 35 | |
---|
| 36 | +static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl, |
---|
| 37 | + struct btrfs_free_space *bitmap_info); |
---|
31 | 38 | static int link_free_space(struct btrfs_free_space_ctl *ctl, |
---|
32 | 39 | struct btrfs_free_space *info); |
---|
33 | 40 | static void unlink_free_space(struct btrfs_free_space_ctl *ctl, |
---|
.. | .. |
---|
75 | 82 | * sure NOFS is set to keep us from deadlocking. |
---|
76 | 83 | */ |
---|
77 | 84 | nofs_flag = memalloc_nofs_save(); |
---|
78 | | - inode = btrfs_iget_path(fs_info->sb, &location, root, NULL, path); |
---|
| 85 | + inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path); |
---|
79 | 86 | btrfs_release_path(path); |
---|
80 | 87 | memalloc_nofs_restore(nofs_flag); |
---|
81 | 88 | if (IS_ERR(inode)) |
---|
.. | .. |
---|
88 | 95 | return inode; |
---|
89 | 96 | } |
---|
90 | 97 | |
---|
91 | | -struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info, |
---|
92 | | - struct btrfs_block_group_cache |
---|
93 | | - *block_group, struct btrfs_path *path) |
---|
| 98 | +struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group, |
---|
| 99 | + struct btrfs_path *path) |
---|
94 | 100 | { |
---|
| 101 | + struct btrfs_fs_info *fs_info = block_group->fs_info; |
---|
95 | 102 | struct inode *inode = NULL; |
---|
96 | 103 | u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; |
---|
97 | 104 | |
---|
.. | .. |
---|
103 | 110 | return inode; |
---|
104 | 111 | |
---|
105 | 112 | inode = __lookup_free_space_inode(fs_info->tree_root, path, |
---|
106 | | - block_group->key.objectid); |
---|
| 113 | + block_group->start); |
---|
107 | 114 | if (IS_ERR(inode)) |
---|
108 | 115 | return inode; |
---|
109 | 116 | |
---|
.. | .. |
---|
185 | 192 | return 0; |
---|
186 | 193 | } |
---|
187 | 194 | |
---|
188 | | -int create_free_space_inode(struct btrfs_fs_info *fs_info, |
---|
189 | | - struct btrfs_trans_handle *trans, |
---|
190 | | - struct btrfs_block_group_cache *block_group, |
---|
| 195 | +int create_free_space_inode(struct btrfs_trans_handle *trans, |
---|
| 196 | + struct btrfs_block_group *block_group, |
---|
191 | 197 | struct btrfs_path *path) |
---|
192 | 198 | { |
---|
193 | 199 | int ret; |
---|
194 | 200 | u64 ino; |
---|
195 | 201 | |
---|
196 | | - ret = btrfs_find_free_objectid(fs_info->tree_root, &ino); |
---|
| 202 | + ret = btrfs_find_free_objectid(trans->fs_info->tree_root, &ino); |
---|
197 | 203 | if (ret < 0) |
---|
198 | 204 | return ret; |
---|
199 | 205 | |
---|
200 | | - return __create_free_space_inode(fs_info->tree_root, trans, path, ino, |
---|
201 | | - block_group->key.objectid); |
---|
| 206 | + return __create_free_space_inode(trans->fs_info->tree_root, trans, path, |
---|
| 207 | + ino, block_group->start); |
---|
202 | 208 | } |
---|
203 | 209 | |
---|
204 | 210 | int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, |
---|
.. | .. |
---|
208 | 214 | int ret; |
---|
209 | 215 | |
---|
210 | 216 | /* 1 for slack space, 1 for updating the inode */ |
---|
211 | | - needed_bytes = btrfs_calc_trunc_metadata_size(fs_info, 1) + |
---|
212 | | - btrfs_calc_trans_metadata_size(fs_info, 1); |
---|
| 217 | + needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) + |
---|
| 218 | + btrfs_calc_metadata_size(fs_info, 1); |
---|
213 | 219 | |
---|
214 | 220 | spin_lock(&rsv->lock); |
---|
215 | 221 | if (rsv->reserved < needed_bytes) |
---|
.. | .. |
---|
221 | 227 | } |
---|
222 | 228 | |
---|
223 | 229 | int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, |
---|
224 | | - struct btrfs_block_group_cache *block_group, |
---|
| 230 | + struct btrfs_block_group *block_group, |
---|
225 | 231 | struct inode *inode) |
---|
226 | 232 | { |
---|
227 | 233 | struct btrfs_root *root = BTRFS_I(inode)->root; |
---|
.. | .. |
---|
365 | 371 | } |
---|
366 | 372 | } |
---|
367 | 373 | |
---|
368 | | -static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode, |
---|
369 | | - int uptodate) |
---|
| 374 | +static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate) |
---|
370 | 375 | { |
---|
371 | 376 | struct page *page; |
---|
| 377 | + struct inode *inode = io_ctl->inode; |
---|
372 | 378 | gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); |
---|
373 | 379 | int i; |
---|
374 | 380 | |
---|
.. | .. |
---|
407 | 413 | |
---|
408 | 414 | static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) |
---|
409 | 415 | { |
---|
410 | | - __le64 *val; |
---|
411 | | - |
---|
412 | 416 | io_ctl_map_page(io_ctl, 1); |
---|
413 | 417 | |
---|
414 | 418 | /* |
---|
.. | .. |
---|
423 | 427 | io_ctl->size -= sizeof(u64) * 2; |
---|
424 | 428 | } |
---|
425 | 429 | |
---|
426 | | - val = io_ctl->cur; |
---|
427 | | - *val = cpu_to_le64(generation); |
---|
| 430 | + put_unaligned_le64(generation, io_ctl->cur); |
---|
428 | 431 | io_ctl->cur += sizeof(u64); |
---|
429 | 432 | } |
---|
430 | 433 | |
---|
431 | 434 | static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) |
---|
432 | 435 | { |
---|
433 | | - __le64 *gen; |
---|
| 436 | + u64 cache_gen; |
---|
434 | 437 | |
---|
435 | 438 | /* |
---|
436 | 439 | * Skip the crc area. If we don't check crcs then we just have a 64bit |
---|
.. | .. |
---|
445 | 448 | io_ctl->size -= sizeof(u64) * 2; |
---|
446 | 449 | } |
---|
447 | 450 | |
---|
448 | | - gen = io_ctl->cur; |
---|
449 | | - if (le64_to_cpu(*gen) != generation) { |
---|
| 451 | + cache_gen = get_unaligned_le64(io_ctl->cur); |
---|
| 452 | + if (cache_gen != generation) { |
---|
450 | 453 | btrfs_err_rl(io_ctl->fs_info, |
---|
451 | 454 | "space cache generation (%llu) does not match inode (%llu)", |
---|
452 | | - *gen, generation); |
---|
| 455 | + cache_gen, generation); |
---|
453 | 456 | io_ctl_unmap_page(io_ctl); |
---|
454 | 457 | return -EIO; |
---|
455 | 458 | } |
---|
.. | .. |
---|
471 | 474 | if (index == 0) |
---|
472 | 475 | offset = sizeof(u32) * io_ctl->num_pages; |
---|
473 | 476 | |
---|
474 | | - crc = btrfs_csum_data(io_ctl->orig + offset, crc, |
---|
475 | | - PAGE_SIZE - offset); |
---|
476 | | - btrfs_csum_final(crc, (u8 *)&crc); |
---|
| 477 | + crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); |
---|
| 478 | + btrfs_crc32c_final(crc, (u8 *)&crc); |
---|
477 | 479 | io_ctl_unmap_page(io_ctl); |
---|
478 | 480 | tmp = page_address(io_ctl->pages[0]); |
---|
479 | 481 | tmp += index; |
---|
.. | .. |
---|
499 | 501 | val = *tmp; |
---|
500 | 502 | |
---|
501 | 503 | io_ctl_map_page(io_ctl, 0); |
---|
502 | | - crc = btrfs_csum_data(io_ctl->orig + offset, crc, |
---|
503 | | - PAGE_SIZE - offset); |
---|
504 | | - btrfs_csum_final(crc, (u8 *)&crc); |
---|
| 504 | + crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); |
---|
| 505 | + btrfs_crc32c_final(crc, (u8 *)&crc); |
---|
505 | 506 | if (val != crc) { |
---|
506 | 507 | btrfs_err_rl(io_ctl->fs_info, |
---|
507 | 508 | "csum mismatch on free space cache"); |
---|
.. | .. |
---|
521 | 522 | return -ENOSPC; |
---|
522 | 523 | |
---|
523 | 524 | entry = io_ctl->cur; |
---|
524 | | - entry->offset = cpu_to_le64(offset); |
---|
525 | | - entry->bytes = cpu_to_le64(bytes); |
---|
| 525 | + put_unaligned_le64(offset, &entry->offset); |
---|
| 526 | + put_unaligned_le64(bytes, &entry->bytes); |
---|
526 | 527 | entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : |
---|
527 | 528 | BTRFS_FREE_SPACE_EXTENT; |
---|
528 | 529 | io_ctl->cur += sizeof(struct btrfs_free_space_entry); |
---|
.. | .. |
---|
595 | 596 | } |
---|
596 | 597 | |
---|
597 | 598 | e = io_ctl->cur; |
---|
598 | | - entry->offset = le64_to_cpu(e->offset); |
---|
599 | | - entry->bytes = le64_to_cpu(e->bytes); |
---|
| 599 | + entry->offset = get_unaligned_le64(&e->offset); |
---|
| 600 | + entry->bytes = get_unaligned_le64(&e->bytes); |
---|
600 | 601 | *type = e->type; |
---|
601 | 602 | io_ctl->cur += sizeof(struct btrfs_free_space_entry); |
---|
602 | 603 | io_ctl->size -= sizeof(struct btrfs_free_space_entry); |
---|
.. | .. |
---|
728 | 729 | |
---|
729 | 730 | readahead_cache(inode); |
---|
730 | 731 | |
---|
731 | | - ret = io_ctl_prepare_pages(&io_ctl, inode, 1); |
---|
| 732 | + ret = io_ctl_prepare_pages(&io_ctl, true); |
---|
732 | 733 | if (ret) |
---|
733 | 734 | goto out; |
---|
734 | 735 | |
---|
.. | .. |
---|
753 | 754 | kmem_cache_free(btrfs_free_space_cachep, e); |
---|
754 | 755 | goto free_cache; |
---|
755 | 756 | } |
---|
| 757 | + |
---|
| 758 | + /* |
---|
| 759 | + * Sync discard ensures that the free space cache is always |
---|
| 760 | + * trimmed. So when reading this in, the state should reflect |
---|
| 761 | + * that. We also do this for async as a stop gap for lack of |
---|
| 762 | + * persistence. |
---|
| 763 | + */ |
---|
| 764 | + if (btrfs_test_opt(fs_info, DISCARD_SYNC) || |
---|
| 765 | + btrfs_test_opt(fs_info, DISCARD_ASYNC)) |
---|
| 766 | + e->trim_state = BTRFS_TRIM_STATE_TRIMMED; |
---|
756 | 767 | |
---|
757 | 768 | if (!e->bytes) { |
---|
758 | 769 | ret = -1; |
---|
.. | .. |
---|
783 | 794 | } |
---|
784 | 795 | spin_lock(&ctl->tree_lock); |
---|
785 | 796 | ret = link_free_space(ctl, e); |
---|
786 | | - ctl->total_bitmaps++; |
---|
787 | | - ctl->op->recalc_thresholds(ctl); |
---|
788 | | - spin_unlock(&ctl->tree_lock); |
---|
789 | 797 | if (ret) { |
---|
| 798 | + spin_unlock(&ctl->tree_lock); |
---|
790 | 799 | btrfs_err(fs_info, |
---|
791 | 800 | "Duplicate entries in free space cache, dumping"); |
---|
792 | 801 | kmem_cache_free(btrfs_free_space_cachep, e); |
---|
793 | 802 | goto free_cache; |
---|
794 | 803 | } |
---|
| 804 | + ctl->total_bitmaps++; |
---|
| 805 | + ctl->op->recalc_thresholds(ctl); |
---|
| 806 | + spin_unlock(&ctl->tree_lock); |
---|
795 | 807 | list_add_tail(&e->list, &bitmaps); |
---|
796 | 808 | } |
---|
797 | 809 | |
---|
.. | .. |
---|
809 | 821 | ret = io_ctl_read_bitmap(&io_ctl, e); |
---|
810 | 822 | if (ret) |
---|
811 | 823 | goto free_cache; |
---|
| 824 | + e->bitmap_extents = count_bitmap_extents(ctl, e); |
---|
| 825 | + if (!btrfs_free_space_trimmed(e)) { |
---|
| 826 | + ctl->discardable_extents[BTRFS_STAT_CURR] += |
---|
| 827 | + e->bitmap_extents; |
---|
| 828 | + ctl->discardable_bytes[BTRFS_STAT_CURR] += e->bytes; |
---|
| 829 | + } |
---|
812 | 830 | } |
---|
813 | 831 | |
---|
814 | 832 | io_ctl_drop_pages(&io_ctl); |
---|
815 | 833 | merge_space_tree(ctl); |
---|
816 | 834 | ret = 1; |
---|
817 | 835 | out: |
---|
| 836 | + btrfs_discard_update_discardable(ctl->private, ctl); |
---|
818 | 837 | io_ctl_free(&io_ctl); |
---|
819 | 838 | return ret; |
---|
820 | 839 | free_cache: |
---|
.. | .. |
---|
823 | 842 | goto out; |
---|
824 | 843 | } |
---|
825 | 844 | |
---|
826 | | -int load_free_space_cache(struct btrfs_fs_info *fs_info, |
---|
827 | | - struct btrfs_block_group_cache *block_group) |
---|
| 845 | +int load_free_space_cache(struct btrfs_block_group *block_group) |
---|
828 | 846 | { |
---|
| 847 | + struct btrfs_fs_info *fs_info = block_group->fs_info; |
---|
829 | 848 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
---|
830 | 849 | struct inode *inode; |
---|
831 | 850 | struct btrfs_path *path; |
---|
832 | 851 | int ret = 0; |
---|
833 | 852 | bool matched; |
---|
834 | | - u64 used = btrfs_block_group_used(&block_group->item); |
---|
| 853 | + u64 used = block_group->used; |
---|
835 | 854 | |
---|
836 | 855 | /* |
---|
837 | 856 | * If this block group has been marked to be cleared for one reason or |
---|
.. | .. |
---|
869 | 888 | * once created get their ->cached field set to BTRFS_CACHE_FINISHED so |
---|
870 | 889 | * we will never try to read their inode item while the fs is mounted. |
---|
871 | 890 | */ |
---|
872 | | - inode = lookup_free_space_inode(fs_info, block_group, path); |
---|
| 891 | + inode = lookup_free_space_inode(block_group, path); |
---|
873 | 892 | if (IS_ERR(inode)) { |
---|
874 | 893 | btrfs_free_path(path); |
---|
875 | 894 | return 0; |
---|
.. | .. |
---|
885 | 904 | spin_unlock(&block_group->lock); |
---|
886 | 905 | |
---|
887 | 906 | ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, |
---|
888 | | - path, block_group->key.objectid); |
---|
| 907 | + path, block_group->start); |
---|
889 | 908 | btrfs_free_path(path); |
---|
890 | 909 | if (ret <= 0) |
---|
891 | 910 | goto out; |
---|
892 | 911 | |
---|
893 | 912 | spin_lock(&ctl->tree_lock); |
---|
894 | | - matched = (ctl->free_space == (block_group->key.offset - used - |
---|
| 913 | + matched = (ctl->free_space == (block_group->length - used - |
---|
895 | 914 | block_group->bytes_super)); |
---|
896 | 915 | spin_unlock(&ctl->tree_lock); |
---|
897 | 916 | |
---|
.. | .. |
---|
899 | 918 | __btrfs_remove_free_space_cache(ctl); |
---|
900 | 919 | btrfs_warn(fs_info, |
---|
901 | 920 | "block group %llu has wrong amount of free space", |
---|
902 | | - block_group->key.objectid); |
---|
| 921 | + block_group->start); |
---|
903 | 922 | ret = -1; |
---|
904 | 923 | } |
---|
905 | 924 | out: |
---|
.. | .. |
---|
912 | 931 | |
---|
913 | 932 | btrfs_warn(fs_info, |
---|
914 | 933 | "failed to load free space cache for block group %llu, rebuilding it now", |
---|
915 | | - block_group->key.objectid); |
---|
| 934 | + block_group->start); |
---|
916 | 935 | } |
---|
917 | 936 | |
---|
918 | 937 | iput(inode); |
---|
.. | .. |
---|
922 | 941 | static noinline_for_stack |
---|
923 | 942 | int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, |
---|
924 | 943 | struct btrfs_free_space_ctl *ctl, |
---|
925 | | - struct btrfs_block_group_cache *block_group, |
---|
| 944 | + struct btrfs_block_group *block_group, |
---|
926 | 945 | int *entries, int *bitmaps, |
---|
927 | 946 | struct list_head *bitmap_list) |
---|
928 | 947 | { |
---|
.. | .. |
---|
1015 | 1034 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); |
---|
1016 | 1035 | if (ret < 0) { |
---|
1017 | 1036 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, |
---|
1018 | | - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL); |
---|
| 1037 | + EXTENT_DELALLOC, 0, 0, NULL); |
---|
1019 | 1038 | goto fail; |
---|
1020 | 1039 | } |
---|
1021 | 1040 | leaf = path->nodes[0]; |
---|
.. | .. |
---|
1027 | 1046 | if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || |
---|
1028 | 1047 | found_key.offset != offset) { |
---|
1029 | 1048 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, |
---|
1030 | | - inode->i_size - 1, |
---|
1031 | | - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, |
---|
1032 | | - NULL); |
---|
| 1049 | + inode->i_size - 1, EXTENT_DELALLOC, 0, |
---|
| 1050 | + 0, NULL); |
---|
1033 | 1051 | btrfs_release_path(path); |
---|
1034 | 1052 | goto fail; |
---|
1035 | 1053 | } |
---|
.. | .. |
---|
1050 | 1068 | return -1; |
---|
1051 | 1069 | } |
---|
1052 | 1070 | |
---|
1053 | | -static noinline_for_stack int |
---|
1054 | | -write_pinned_extent_entries(struct btrfs_fs_info *fs_info, |
---|
1055 | | - struct btrfs_block_group_cache *block_group, |
---|
| 1071 | +static noinline_for_stack int write_pinned_extent_entries( |
---|
| 1072 | + struct btrfs_trans_handle *trans, |
---|
| 1073 | + struct btrfs_block_group *block_group, |
---|
1056 | 1074 | struct btrfs_io_ctl *io_ctl, |
---|
1057 | 1075 | int *entries) |
---|
1058 | 1076 | { |
---|
.. | .. |
---|
1070 | 1088 | * We shouldn't have switched the pinned extents yet so this is the |
---|
1071 | 1089 | * right one |
---|
1072 | 1090 | */ |
---|
1073 | | - unpin = fs_info->pinned_extents; |
---|
| 1091 | + unpin = &trans->transaction->pinned_extents; |
---|
1074 | 1092 | |
---|
1075 | | - start = block_group->key.objectid; |
---|
| 1093 | + start = block_group->start; |
---|
1076 | 1094 | |
---|
1077 | | - while (start < block_group->key.objectid + block_group->key.offset) { |
---|
| 1095 | + while (start < block_group->start + block_group->length) { |
---|
1078 | 1096 | ret = find_first_extent_bit(unpin, start, |
---|
1079 | 1097 | &extent_start, &extent_end, |
---|
1080 | 1098 | EXTENT_DIRTY, NULL); |
---|
.. | .. |
---|
1082 | 1100 | return 0; |
---|
1083 | 1101 | |
---|
1084 | 1102 | /* This pinned extent is out of our range */ |
---|
1085 | | - if (extent_start >= block_group->key.objectid + |
---|
1086 | | - block_group->key.offset) |
---|
| 1103 | + if (extent_start >= block_group->start + block_group->length) |
---|
1087 | 1104 | return 0; |
---|
1088 | 1105 | |
---|
1089 | 1106 | extent_start = max(extent_start, start); |
---|
1090 | | - extent_end = min(block_group->key.objectid + |
---|
1091 | | - block_group->key.offset, extent_end + 1); |
---|
| 1107 | + extent_end = min(block_group->start + block_group->length, |
---|
| 1108 | + extent_end + 1); |
---|
1092 | 1109 | len = extent_end - extent_start; |
---|
1093 | 1110 | |
---|
1094 | 1111 | *entries += 1; |
---|
.. | .. |
---|
1126 | 1143 | ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); |
---|
1127 | 1144 | if (ret) |
---|
1128 | 1145 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, |
---|
1129 | | - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL); |
---|
| 1146 | + EXTENT_DELALLOC, 0, 0, NULL); |
---|
1130 | 1147 | |
---|
1131 | 1148 | return ret; |
---|
1132 | 1149 | } |
---|
.. | .. |
---|
1152 | 1169 | |
---|
1153 | 1170 | static int __btrfs_wait_cache_io(struct btrfs_root *root, |
---|
1154 | 1171 | struct btrfs_trans_handle *trans, |
---|
1155 | | - struct btrfs_block_group_cache *block_group, |
---|
| 1172 | + struct btrfs_block_group *block_group, |
---|
1156 | 1173 | struct btrfs_io_ctl *io_ctl, |
---|
1157 | 1174 | struct btrfs_path *path, u64 offset) |
---|
1158 | 1175 | { |
---|
.. | .. |
---|
1174 | 1191 | if (ret) { |
---|
1175 | 1192 | invalidate_inode_pages2(inode->i_mapping); |
---|
1176 | 1193 | BTRFS_I(inode)->generation = 0; |
---|
1177 | | - if (block_group) { |
---|
1178 | | -#ifdef DEBUG |
---|
1179 | | - btrfs_err(root->fs_info, |
---|
1180 | | - "failed to write free space cache for block group %llu", |
---|
1181 | | - block_group->key.objectid); |
---|
1182 | | -#endif |
---|
1183 | | - } |
---|
| 1194 | + if (block_group) |
---|
| 1195 | + btrfs_debug(root->fs_info, |
---|
| 1196 | + "failed to write free space cache for block group %llu error %d", |
---|
| 1197 | + block_group->start, ret); |
---|
1184 | 1198 | } |
---|
1185 | 1199 | btrfs_update_inode(trans, root, inode); |
---|
1186 | 1200 | |
---|
.. | .. |
---|
1220 | 1234 | } |
---|
1221 | 1235 | |
---|
1222 | 1236 | int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, |
---|
1223 | | - struct btrfs_block_group_cache *block_group, |
---|
| 1237 | + struct btrfs_block_group *block_group, |
---|
1224 | 1238 | struct btrfs_path *path) |
---|
1225 | 1239 | { |
---|
1226 | 1240 | return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans, |
---|
1227 | 1241 | block_group, &block_group->io_ctl, |
---|
1228 | | - path, block_group->key.objectid); |
---|
| 1242 | + path, block_group->start); |
---|
1229 | 1243 | } |
---|
1230 | 1244 | |
---|
1231 | 1245 | /** |
---|
.. | .. |
---|
1241 | 1255 | */ |
---|
1242 | 1256 | static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, |
---|
1243 | 1257 | struct btrfs_free_space_ctl *ctl, |
---|
1244 | | - struct btrfs_block_group_cache *block_group, |
---|
| 1258 | + struct btrfs_block_group *block_group, |
---|
1245 | 1259 | struct btrfs_io_ctl *io_ctl, |
---|
1246 | 1260 | struct btrfs_trans_handle *trans) |
---|
1247 | 1261 | { |
---|
1248 | | - struct btrfs_fs_info *fs_info = root->fs_info; |
---|
1249 | 1262 | struct extent_state *cached_state = NULL; |
---|
1250 | 1263 | LIST_HEAD(bitmap_list); |
---|
1251 | 1264 | int entries = 0; |
---|
.. | .. |
---|
1277 | 1290 | } |
---|
1278 | 1291 | |
---|
1279 | 1292 | /* Lock all pages first so we can lock the extent safely. */ |
---|
1280 | | - ret = io_ctl_prepare_pages(io_ctl, inode, 0); |
---|
| 1293 | + ret = io_ctl_prepare_pages(io_ctl, false); |
---|
1281 | 1294 | if (ret) |
---|
1282 | 1295 | goto out_unlock; |
---|
1283 | 1296 | |
---|
.. | .. |
---|
1303 | 1316 | * If this changes while we are working we'll get added back to |
---|
1304 | 1317 | * the dirty list and redo it. No locking needed |
---|
1305 | 1318 | */ |
---|
1306 | | - ret = write_pinned_extent_entries(fs_info, block_group, |
---|
1307 | | - io_ctl, &entries); |
---|
| 1319 | + ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries); |
---|
1308 | 1320 | if (ret) |
---|
1309 | 1321 | goto out_nospc_locked; |
---|
1310 | 1322 | |
---|
.. | .. |
---|
1323 | 1335 | io_ctl_zero_remaining_pages(io_ctl); |
---|
1324 | 1336 | |
---|
1325 | 1337 | /* Everything is written out, now we dirty the pages in the file. */ |
---|
1326 | | - ret = btrfs_dirty_pages(inode, io_ctl->pages, io_ctl->num_pages, 0, |
---|
1327 | | - i_size_read(inode), &cached_state); |
---|
| 1338 | + ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages, |
---|
| 1339 | + io_ctl->num_pages, 0, i_size_read(inode), |
---|
| 1340 | + &cached_state); |
---|
1328 | 1341 | if (ret) |
---|
1329 | 1342 | goto out_nospc; |
---|
1330 | 1343 | |
---|
.. | .. |
---|
1342 | 1355 | |
---|
1343 | 1356 | /* |
---|
1344 | 1357 | * at this point the pages are under IO and we're happy, |
---|
1345 | | - * The caller is responsible for waiting on them and updating the |
---|
| 1358 | + * The caller is responsible for waiting on them and updating |
---|
1346 | 1359 | * the cache and the inode |
---|
1347 | 1360 | */ |
---|
1348 | 1361 | io_ctl->entries = entries; |
---|
.. | .. |
---|
1353 | 1366 | goto out; |
---|
1354 | 1367 | |
---|
1355 | 1368 | return 0; |
---|
1356 | | - |
---|
1357 | | -out: |
---|
1358 | | - io_ctl->inode = NULL; |
---|
1359 | | - io_ctl_free(io_ctl); |
---|
1360 | | - if (ret) { |
---|
1361 | | - invalidate_inode_pages2(inode->i_mapping); |
---|
1362 | | - BTRFS_I(inode)->generation = 0; |
---|
1363 | | - } |
---|
1364 | | - btrfs_update_inode(trans, root, inode); |
---|
1365 | | - if (must_iput) |
---|
1366 | | - iput(inode); |
---|
1367 | | - return ret; |
---|
1368 | 1369 | |
---|
1369 | 1370 | out_nospc_locked: |
---|
1370 | 1371 | cleanup_bitmap_list(&bitmap_list); |
---|
.. | .. |
---|
1378 | 1379 | if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) |
---|
1379 | 1380 | up_write(&block_group->data_rwsem); |
---|
1380 | 1381 | |
---|
1381 | | - goto out; |
---|
| 1382 | +out: |
---|
| 1383 | + io_ctl->inode = NULL; |
---|
| 1384 | + io_ctl_free(io_ctl); |
---|
| 1385 | + if (ret) { |
---|
| 1386 | + invalidate_inode_pages2(inode->i_mapping); |
---|
| 1387 | + BTRFS_I(inode)->generation = 0; |
---|
| 1388 | + } |
---|
| 1389 | + btrfs_update_inode(trans, root, inode); |
---|
| 1390 | + if (must_iput) |
---|
| 1391 | + iput(inode); |
---|
| 1392 | + return ret; |
---|
1382 | 1393 | } |
---|
1383 | 1394 | |
---|
1384 | | -int btrfs_write_out_cache(struct btrfs_fs_info *fs_info, |
---|
1385 | | - struct btrfs_trans_handle *trans, |
---|
1386 | | - struct btrfs_block_group_cache *block_group, |
---|
| 1395 | +int btrfs_write_out_cache(struct btrfs_trans_handle *trans, |
---|
| 1396 | + struct btrfs_block_group *block_group, |
---|
1387 | 1397 | struct btrfs_path *path) |
---|
1388 | 1398 | { |
---|
| 1399 | + struct btrfs_fs_info *fs_info = trans->fs_info; |
---|
1389 | 1400 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
---|
1390 | 1401 | struct inode *inode; |
---|
1391 | 1402 | int ret = 0; |
---|
.. | .. |
---|
1397 | 1408 | } |
---|
1398 | 1409 | spin_unlock(&block_group->lock); |
---|
1399 | 1410 | |
---|
1400 | | - inode = lookup_free_space_inode(fs_info, block_group, path); |
---|
| 1411 | + inode = lookup_free_space_inode(block_group, path); |
---|
1401 | 1412 | if (IS_ERR(inode)) |
---|
1402 | 1413 | return 0; |
---|
1403 | 1414 | |
---|
1404 | 1415 | ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl, |
---|
1405 | 1416 | block_group, &block_group->io_ctl, trans); |
---|
1406 | 1417 | if (ret) { |
---|
1407 | | -#ifdef DEBUG |
---|
1408 | | - btrfs_err(fs_info, |
---|
1409 | | - "failed to write free space cache for block group %llu", |
---|
1410 | | - block_group->key.objectid); |
---|
1411 | | -#endif |
---|
| 1418 | + btrfs_debug(fs_info, |
---|
| 1419 | + "failed to write free space cache for block group %llu error %d", |
---|
| 1420 | + block_group->start, ret); |
---|
1412 | 1421 | spin_lock(&block_group->lock); |
---|
1413 | 1422 | block_group->disk_cache_state = BTRFS_DC_ERROR; |
---|
1414 | 1423 | spin_unlock(&block_group->lock); |
---|
.. | .. |
---|
1633 | 1642 | { |
---|
1634 | 1643 | rb_erase(&info->offset_index, &ctl->free_space_offset); |
---|
1635 | 1644 | ctl->free_extents--; |
---|
| 1645 | + |
---|
| 1646 | + if (!info->bitmap && !btrfs_free_space_trimmed(info)) { |
---|
| 1647 | + ctl->discardable_extents[BTRFS_STAT_CURR]--; |
---|
| 1648 | + ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes; |
---|
| 1649 | + } |
---|
1636 | 1650 | } |
---|
1637 | 1651 | |
---|
1638 | 1652 | static void unlink_free_space(struct btrfs_free_space_ctl *ctl, |
---|
.. | .. |
---|
1653 | 1667 | if (ret) |
---|
1654 | 1668 | return ret; |
---|
1655 | 1669 | |
---|
| 1670 | + if (!info->bitmap && !btrfs_free_space_trimmed(info)) { |
---|
| 1671 | + ctl->discardable_extents[BTRFS_STAT_CURR]++; |
---|
| 1672 | + ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; |
---|
| 1673 | + } |
---|
| 1674 | + |
---|
1656 | 1675 | ctl->free_space += info->bytes; |
---|
1657 | 1676 | ctl->free_extents++; |
---|
1658 | 1677 | return ret; |
---|
.. | .. |
---|
1660 | 1679 | |
---|
1661 | 1680 | static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) |
---|
1662 | 1681 | { |
---|
1663 | | - struct btrfs_block_group_cache *block_group = ctl->private; |
---|
| 1682 | + struct btrfs_block_group *block_group = ctl->private; |
---|
1664 | 1683 | u64 max_bytes; |
---|
1665 | 1684 | u64 bitmap_bytes; |
---|
1666 | 1685 | u64 extent_bytes; |
---|
1667 | | - u64 size = block_group->key.offset; |
---|
| 1686 | + u64 size = block_group->length; |
---|
1668 | 1687 | u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; |
---|
1669 | 1688 | u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); |
---|
1670 | 1689 | |
---|
.. | .. |
---|
1673 | 1692 | ASSERT(ctl->total_bitmaps <= max_bitmaps); |
---|
1674 | 1693 | |
---|
1675 | 1694 | /* |
---|
1676 | | - * The goal is to keep the total amount of memory used per 1gb of space |
---|
1677 | | - * at or below 32k, so we need to adjust how much memory we allow to be |
---|
1678 | | - * used by extent based free space tracking |
---|
| 1695 | + * We are trying to keep the total amount of memory used per 1GiB of |
---|
| 1696 | + * space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation |
---|
| 1697 | + * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of |
---|
| 1698 | + * bitmaps, we may end up using more memory than this. |
---|
1679 | 1699 | */ |
---|
1680 | 1700 | if (size < SZ_1G) |
---|
1681 | 1701 | max_bytes = MAX_CACHE_BYTES_PER_GIG; |
---|
1682 | 1702 | else |
---|
1683 | 1703 | max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); |
---|
1684 | 1704 | |
---|
1685 | | - /* |
---|
1686 | | - * we want to account for 1 more bitmap than what we have so we can make |
---|
1687 | | - * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as |
---|
1688 | | - * we add more bitmaps. |
---|
1689 | | - */ |
---|
1690 | | - bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit; |
---|
1691 | | - |
---|
1692 | | - if (bitmap_bytes >= max_bytes) { |
---|
1693 | | - ctl->extents_thresh = 0; |
---|
1694 | | - return; |
---|
1695 | | - } |
---|
| 1705 | + bitmap_bytes = ctl->total_bitmaps * ctl->unit; |
---|
1696 | 1706 | |
---|
1697 | 1707 | /* |
---|
1698 | 1708 | * we want the extent entry threshold to always be at most 1/2 the max |
---|
.. | .. |
---|
1709 | 1719 | struct btrfs_free_space *info, |
---|
1710 | 1720 | u64 offset, u64 bytes) |
---|
1711 | 1721 | { |
---|
1712 | | - unsigned long start, count; |
---|
| 1722 | + unsigned long start, count, end; |
---|
| 1723 | + int extent_delta = -1; |
---|
1713 | 1724 | |
---|
1714 | 1725 | start = offset_to_bit(info->offset, ctl->unit, offset); |
---|
1715 | 1726 | count = bytes_to_bits(bytes, ctl->unit); |
---|
1716 | | - ASSERT(start + count <= BITS_PER_BITMAP); |
---|
| 1727 | + end = start + count; |
---|
| 1728 | + ASSERT(end <= BITS_PER_BITMAP); |
---|
1717 | 1729 | |
---|
1718 | 1730 | bitmap_clear(info->bitmap, start, count); |
---|
1719 | 1731 | |
---|
1720 | 1732 | info->bytes -= bytes; |
---|
1721 | 1733 | if (info->max_extent_size > ctl->unit) |
---|
1722 | 1734 | info->max_extent_size = 0; |
---|
| 1735 | + |
---|
| 1736 | + if (start && test_bit(start - 1, info->bitmap)) |
---|
| 1737 | + extent_delta++; |
---|
| 1738 | + |
---|
| 1739 | + if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap)) |
---|
| 1740 | + extent_delta++; |
---|
| 1741 | + |
---|
| 1742 | + info->bitmap_extents += extent_delta; |
---|
| 1743 | + if (!btrfs_free_space_trimmed(info)) { |
---|
| 1744 | + ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta; |
---|
| 1745 | + ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; |
---|
| 1746 | + } |
---|
1723 | 1747 | } |
---|
1724 | 1748 | |
---|
1725 | 1749 | static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, |
---|
.. | .. |
---|
1734 | 1758 | struct btrfs_free_space *info, u64 offset, |
---|
1735 | 1759 | u64 bytes) |
---|
1736 | 1760 | { |
---|
1737 | | - unsigned long start, count; |
---|
| 1761 | + unsigned long start, count, end; |
---|
| 1762 | + int extent_delta = 1; |
---|
1738 | 1763 | |
---|
1739 | 1764 | start = offset_to_bit(info->offset, ctl->unit, offset); |
---|
1740 | 1765 | count = bytes_to_bits(bytes, ctl->unit); |
---|
1741 | | - ASSERT(start + count <= BITS_PER_BITMAP); |
---|
| 1766 | + end = start + count; |
---|
| 1767 | + ASSERT(end <= BITS_PER_BITMAP); |
---|
1742 | 1768 | |
---|
1743 | 1769 | bitmap_set(info->bitmap, start, count); |
---|
1744 | 1770 | |
---|
1745 | 1771 | info->bytes += bytes; |
---|
1746 | 1772 | ctl->free_space += bytes; |
---|
| 1773 | + |
---|
| 1774 | + if (start && test_bit(start - 1, info->bitmap)) |
---|
| 1775 | + extent_delta--; |
---|
| 1776 | + |
---|
| 1777 | + if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap)) |
---|
| 1778 | + extent_delta--; |
---|
| 1779 | + |
---|
| 1780 | + info->bitmap_extents += extent_delta; |
---|
| 1781 | + if (!btrfs_free_space_trimmed(info)) { |
---|
| 1782 | + ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta; |
---|
| 1783 | + ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes; |
---|
| 1784 | + } |
---|
1747 | 1785 | } |
---|
1748 | 1786 | |
---|
1749 | 1787 | /* |
---|
.. | .. |
---|
1879 | 1917 | return NULL; |
---|
1880 | 1918 | } |
---|
1881 | 1919 | |
---|
| 1920 | +static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl, |
---|
| 1921 | + struct btrfs_free_space *bitmap_info) |
---|
| 1922 | +{ |
---|
| 1923 | + struct btrfs_block_group *block_group = ctl->private; |
---|
| 1924 | + u64 bytes = bitmap_info->bytes; |
---|
| 1925 | + unsigned int rs, re; |
---|
| 1926 | + int count = 0; |
---|
| 1927 | + |
---|
| 1928 | + if (!block_group || !bytes) |
---|
| 1929 | + return count; |
---|
| 1930 | + |
---|
| 1931 | + bitmap_for_each_set_region(bitmap_info->bitmap, rs, re, 0, |
---|
| 1932 | + BITS_PER_BITMAP) { |
---|
| 1933 | + bytes -= (rs - re) * ctl->unit; |
---|
| 1934 | + count++; |
---|
| 1935 | + |
---|
| 1936 | + if (!bytes) |
---|
| 1937 | + break; |
---|
| 1938 | + } |
---|
| 1939 | + |
---|
| 1940 | + return count; |
---|
| 1941 | +} |
---|
| 1942 | + |
---|
1882 | 1943 | static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, |
---|
1883 | 1944 | struct btrfs_free_space *info, u64 offset) |
---|
1884 | 1945 | { |
---|
1885 | 1946 | info->offset = offset_to_bitmap(ctl, offset); |
---|
1886 | 1947 | info->bytes = 0; |
---|
| 1948 | + info->bitmap_extents = 0; |
---|
1887 | 1949 | INIT_LIST_HEAD(&info->list); |
---|
1888 | 1950 | link_free_space(ctl, info); |
---|
1889 | 1951 | ctl->total_bitmaps++; |
---|
.. | .. |
---|
1894 | 1956 | static void free_bitmap(struct btrfs_free_space_ctl *ctl, |
---|
1895 | 1957 | struct btrfs_free_space *bitmap_info) |
---|
1896 | 1958 | { |
---|
| 1959 | + /* |
---|
| 1960 | + * Normally when this is called, the bitmap is completely empty. However, |
---|
| 1961 | + * if we are blowing up the free space cache for one reason or another |
---|
| 1962 | + * via __btrfs_remove_free_space_cache(), then it may not be freed and |
---|
| 1963 | + * we may leave stats on the table. |
---|
| 1964 | + */ |
---|
| 1965 | + if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) { |
---|
| 1966 | + ctl->discardable_extents[BTRFS_STAT_CURR] -= |
---|
| 1967 | + bitmap_info->bitmap_extents; |
---|
| 1968 | + ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes; |
---|
| 1969 | + |
---|
| 1970 | + } |
---|
1897 | 1971 | unlink_free_space(ctl, bitmap_info); |
---|
1898 | 1972 | kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap); |
---|
1899 | 1973 | kmem_cache_free(btrfs_free_space_cachep, bitmap_info); |
---|
.. | .. |
---|
1980 | 2054 | |
---|
1981 | 2055 | static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, |
---|
1982 | 2056 | struct btrfs_free_space *info, u64 offset, |
---|
1983 | | - u64 bytes) |
---|
| 2057 | + u64 bytes, enum btrfs_trim_state trim_state) |
---|
1984 | 2058 | { |
---|
1985 | 2059 | u64 bytes_to_set = 0; |
---|
1986 | 2060 | u64 end; |
---|
| 2061 | + |
---|
| 2062 | + /* |
---|
| 2063 | + * This is a tradeoff to make bitmap trim state minimal. We mark the |
---|
| 2064 | + * whole bitmap untrimmed if at any point we add untrimmed regions. |
---|
| 2065 | + */ |
---|
| 2066 | + if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) { |
---|
| 2067 | + if (btrfs_free_space_trimmed(info)) { |
---|
| 2068 | + ctl->discardable_extents[BTRFS_STAT_CURR] += |
---|
| 2069 | + info->bitmap_extents; |
---|
| 2070 | + ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; |
---|
| 2071 | + } |
---|
| 2072 | + info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; |
---|
| 2073 | + } |
---|
1987 | 2074 | |
---|
1988 | 2075 | end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); |
---|
1989 | 2076 | |
---|
.. | .. |
---|
2004 | 2091 | static bool use_bitmap(struct btrfs_free_space_ctl *ctl, |
---|
2005 | 2092 | struct btrfs_free_space *info) |
---|
2006 | 2093 | { |
---|
2007 | | - struct btrfs_block_group_cache *block_group = ctl->private; |
---|
| 2094 | + struct btrfs_block_group *block_group = ctl->private; |
---|
2008 | 2095 | struct btrfs_fs_info *fs_info = block_group->fs_info; |
---|
2009 | 2096 | bool forced = false; |
---|
2010 | 2097 | |
---|
.. | .. |
---|
2012 | 2099 | if (btrfs_should_fragment_free_space(block_group)) |
---|
2013 | 2100 | forced = true; |
---|
2014 | 2101 | #endif |
---|
| 2102 | + |
---|
| 2103 | + /* This is a way to reclaim large regions from the bitmaps. */ |
---|
| 2104 | + if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD) |
---|
| 2105 | + return false; |
---|
2015 | 2106 | |
---|
2016 | 2107 | /* |
---|
2017 | 2108 | * If we are below the extents threshold then we can add this as an |
---|
.. | .. |
---|
2025 | 2116 | * of cache left then go ahead an dadd them, no sense in adding |
---|
2026 | 2117 | * the overhead of a bitmap if we don't have to. |
---|
2027 | 2118 | */ |
---|
2028 | | - if (info->bytes <= fs_info->sectorsize * 4) { |
---|
2029 | | - if (ctl->free_extents * 2 <= ctl->extents_thresh) |
---|
| 2119 | + if (info->bytes <= fs_info->sectorsize * 8) { |
---|
| 2120 | + if (ctl->free_extents * 3 <= ctl->extents_thresh) |
---|
2030 | 2121 | return false; |
---|
2031 | 2122 | } else { |
---|
2032 | 2123 | return false; |
---|
.. | .. |
---|
2041 | 2132 | * so allow those block groups to still be allowed to have a bitmap |
---|
2042 | 2133 | * entry. |
---|
2043 | 2134 | */ |
---|
2044 | | - if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset) |
---|
| 2135 | + if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length) |
---|
2045 | 2136 | return false; |
---|
2046 | 2137 | |
---|
2047 | 2138 | return true; |
---|
.. | .. |
---|
2056 | 2147 | struct btrfs_free_space *info) |
---|
2057 | 2148 | { |
---|
2058 | 2149 | struct btrfs_free_space *bitmap_info; |
---|
2059 | | - struct btrfs_block_group_cache *block_group = NULL; |
---|
| 2150 | + struct btrfs_block_group *block_group = NULL; |
---|
2060 | 2151 | int added = 0; |
---|
2061 | 2152 | u64 bytes, offset, bytes_added; |
---|
| 2153 | + enum btrfs_trim_state trim_state; |
---|
2062 | 2154 | int ret; |
---|
2063 | 2155 | |
---|
2064 | 2156 | bytes = info->bytes; |
---|
2065 | 2157 | offset = info->offset; |
---|
| 2158 | + trim_state = info->trim_state; |
---|
2066 | 2159 | |
---|
2067 | 2160 | if (!ctl->op->use_bitmap(ctl, info)) |
---|
2068 | 2161 | return 0; |
---|
.. | .. |
---|
2097 | 2190 | } |
---|
2098 | 2191 | |
---|
2099 | 2192 | if (entry->offset == offset_to_bitmap(ctl, offset)) { |
---|
2100 | | - bytes_added = add_bytes_to_bitmap(ctl, entry, |
---|
2101 | | - offset, bytes); |
---|
| 2193 | + bytes_added = add_bytes_to_bitmap(ctl, entry, offset, |
---|
| 2194 | + bytes, trim_state); |
---|
2102 | 2195 | bytes -= bytes_added; |
---|
2103 | 2196 | offset += bytes_added; |
---|
2104 | 2197 | } |
---|
.. | .. |
---|
2117 | 2210 | goto new_bitmap; |
---|
2118 | 2211 | } |
---|
2119 | 2212 | |
---|
2120 | | - bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); |
---|
| 2213 | + bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes, |
---|
| 2214 | + trim_state); |
---|
2121 | 2215 | bytes -= bytes_added; |
---|
2122 | 2216 | offset += bytes_added; |
---|
2123 | 2217 | added = 0; |
---|
.. | .. |
---|
2151 | 2245 | /* allocate the bitmap */ |
---|
2152 | 2246 | info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, |
---|
2153 | 2247 | GFP_NOFS); |
---|
| 2248 | + info->trim_state = BTRFS_TRIM_STATE_TRIMMED; |
---|
2154 | 2249 | spin_lock(&ctl->tree_lock); |
---|
2155 | 2250 | if (!info->bitmap) { |
---|
2156 | 2251 | ret = -ENOMEM; |
---|
.. | .. |
---|
2170 | 2265 | return ret; |
---|
2171 | 2266 | } |
---|
2172 | 2267 | |
---|
| 2268 | +/* |
---|
| 2269 | + * Free space merging rules: |
---|
| 2270 | + * 1) Merge trimmed areas together |
---|
| 2271 | + * 2) Let untrimmed areas coalesce with trimmed areas |
---|
| 2272 | + * 3) Always pull neighboring regions from bitmaps |
---|
| 2273 | + * |
---|
| 2274 | + * The above rules are for when we merge free space based on btrfs_trim_state. |
---|
| 2275 | + * Rules 2 and 3 are subtle because they are suboptimal, but are done for the |
---|
| 2276 | + * same reason: to promote larger extent regions which makes life easier for |
---|
| 2277 | + * find_free_extent(). Rule 2 enables coalescing based on the common path |
---|
| 2278 | + * being returning free space from btrfs_finish_extent_commit(). So when free |
---|
| 2279 | + * space is trimmed, it will prevent aggregating trimmed new region and |
---|
| 2280 | + * untrimmed regions in the rb_tree. Rule 3 is purely to obtain larger extents |
---|
| 2281 | + * and provide find_free_extent() with the largest extents possible hoping for |
---|
| 2282 | + * the reuse path. |
---|
| 2283 | + */ |
---|
2173 | 2284 | static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, |
---|
2174 | 2285 | struct btrfs_free_space *info, bool update_stat) |
---|
2175 | 2286 | { |
---|
.. | .. |
---|
2178 | 2289 | bool merged = false; |
---|
2179 | 2290 | u64 offset = info->offset; |
---|
2180 | 2291 | u64 bytes = info->bytes; |
---|
| 2292 | + const bool is_trimmed = btrfs_free_space_trimmed(info); |
---|
2181 | 2293 | |
---|
2182 | 2294 | /* |
---|
2183 | 2295 | * first we want to see if there is free space adjacent to the range we |
---|
.. | .. |
---|
2191 | 2303 | else if (!right_info) |
---|
2192 | 2304 | left_info = tree_search_offset(ctl, offset - 1, 0, 0); |
---|
2193 | 2305 | |
---|
2194 | | - if (right_info && !right_info->bitmap) { |
---|
| 2306 | + /* See try_merge_free_space() comment. */ |
---|
| 2307 | + if (right_info && !right_info->bitmap && |
---|
| 2308 | + (!is_trimmed || btrfs_free_space_trimmed(right_info))) { |
---|
2195 | 2309 | if (update_stat) |
---|
2196 | 2310 | unlink_free_space(ctl, right_info); |
---|
2197 | 2311 | else |
---|
.. | .. |
---|
2201 | 2315 | merged = true; |
---|
2202 | 2316 | } |
---|
2203 | 2317 | |
---|
| 2318 | + /* See try_merge_free_space() comment. */ |
---|
2204 | 2319 | if (left_info && !left_info->bitmap && |
---|
2205 | | - left_info->offset + left_info->bytes == offset) { |
---|
| 2320 | + left_info->offset + left_info->bytes == offset && |
---|
| 2321 | + (!is_trimmed || btrfs_free_space_trimmed(left_info))) { |
---|
2206 | 2322 | if (update_stat) |
---|
2207 | 2323 | unlink_free_space(ctl, left_info); |
---|
2208 | 2324 | else |
---|
.. | .. |
---|
2237 | 2353 | return false; |
---|
2238 | 2354 | bytes = (j - i) * ctl->unit; |
---|
2239 | 2355 | info->bytes += bytes; |
---|
| 2356 | + |
---|
| 2357 | + /* See try_merge_free_space() comment. */ |
---|
| 2358 | + if (!btrfs_free_space_trimmed(bitmap)) |
---|
| 2359 | + info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; |
---|
2240 | 2360 | |
---|
2241 | 2361 | if (update_stat) |
---|
2242 | 2362 | bitmap_clear_bits(ctl, bitmap, end, bytes); |
---|
.. | .. |
---|
2291 | 2411 | info->offset -= bytes; |
---|
2292 | 2412 | info->bytes += bytes; |
---|
2293 | 2413 | |
---|
| 2414 | + /* See try_merge_free_space() comment. */ |
---|
| 2415 | + if (!btrfs_free_space_trimmed(bitmap)) |
---|
| 2416 | + info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; |
---|
| 2417 | + |
---|
2294 | 2418 | if (update_stat) |
---|
2295 | 2419 | bitmap_clear_bits(ctl, bitmap, info->offset, bytes); |
---|
2296 | 2420 | else |
---|
.. | .. |
---|
2340 | 2464 | |
---|
2341 | 2465 | int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, |
---|
2342 | 2466 | struct btrfs_free_space_ctl *ctl, |
---|
2343 | | - u64 offset, u64 bytes) |
---|
| 2467 | + u64 offset, u64 bytes, |
---|
| 2468 | + enum btrfs_trim_state trim_state) |
---|
2344 | 2469 | { |
---|
| 2470 | + struct btrfs_block_group *block_group = ctl->private; |
---|
2345 | 2471 | struct btrfs_free_space *info; |
---|
2346 | 2472 | int ret = 0; |
---|
| 2473 | + u64 filter_bytes = bytes; |
---|
2347 | 2474 | |
---|
2348 | 2475 | info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); |
---|
2349 | 2476 | if (!info) |
---|
.. | .. |
---|
2351 | 2478 | |
---|
2352 | 2479 | info->offset = offset; |
---|
2353 | 2480 | info->bytes = bytes; |
---|
| 2481 | + info->trim_state = trim_state; |
---|
2354 | 2482 | RB_CLEAR_NODE(&info->offset_index); |
---|
2355 | 2483 | |
---|
2356 | 2484 | spin_lock(&ctl->tree_lock); |
---|
.. | .. |
---|
2379 | 2507 | */ |
---|
2380 | 2508 | steal_from_bitmap(ctl, info, true); |
---|
2381 | 2509 | |
---|
| 2510 | + filter_bytes = max(filter_bytes, info->bytes); |
---|
| 2511 | + |
---|
2382 | 2512 | ret = link_free_space(ctl, info); |
---|
2383 | 2513 | if (ret) |
---|
2384 | 2514 | kmem_cache_free(btrfs_free_space_cachep, info); |
---|
2385 | 2515 | out: |
---|
| 2516 | + btrfs_discard_update_discardable(block_group, ctl); |
---|
2386 | 2517 | spin_unlock(&ctl->tree_lock); |
---|
2387 | 2518 | |
---|
2388 | 2519 | if (ret) { |
---|
.. | .. |
---|
2390 | 2521 | ASSERT(ret != -EEXIST); |
---|
2391 | 2522 | } |
---|
2392 | 2523 | |
---|
| 2524 | + if (trim_state != BTRFS_TRIM_STATE_TRIMMED) { |
---|
| 2525 | + btrfs_discard_check_filter(block_group, filter_bytes); |
---|
| 2526 | + btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); |
---|
| 2527 | + } |
---|
| 2528 | + |
---|
2393 | 2529 | return ret; |
---|
2394 | 2530 | } |
---|
2395 | 2531 | |
---|
2396 | | -int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, |
---|
| 2532 | +int btrfs_add_free_space(struct btrfs_block_group *block_group, |
---|
| 2533 | + u64 bytenr, u64 size) |
---|
| 2534 | +{ |
---|
| 2535 | + enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; |
---|
| 2536 | + |
---|
| 2537 | + if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC)) |
---|
| 2538 | + trim_state = BTRFS_TRIM_STATE_TRIMMED; |
---|
| 2539 | + |
---|
| 2540 | + return __btrfs_add_free_space(block_group->fs_info, |
---|
| 2541 | + block_group->free_space_ctl, |
---|
| 2542 | + bytenr, size, trim_state); |
---|
| 2543 | +} |
---|
| 2544 | + |
---|
| 2545 | +/* |
---|
| 2546 | + * This is a subtle distinction because when adding free space back in general, |
---|
| 2547 | + * we want it to be added as untrimmed for async. But in the case where we add |
---|
| 2548 | + * it on loading of a block group, we want to consider it trimmed. |
---|
| 2549 | + */ |
---|
| 2550 | +int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group, |
---|
| 2551 | + u64 bytenr, u64 size) |
---|
| 2552 | +{ |
---|
| 2553 | + enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; |
---|
| 2554 | + |
---|
| 2555 | + if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) || |
---|
| 2556 | + btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) |
---|
| 2557 | + trim_state = BTRFS_TRIM_STATE_TRIMMED; |
---|
| 2558 | + |
---|
| 2559 | + return __btrfs_add_free_space(block_group->fs_info, |
---|
| 2560 | + block_group->free_space_ctl, |
---|
| 2561 | + bytenr, size, trim_state); |
---|
| 2562 | +} |
---|
| 2563 | + |
---|
| 2564 | +int btrfs_remove_free_space(struct btrfs_block_group *block_group, |
---|
2397 | 2565 | u64 offset, u64 bytes) |
---|
2398 | 2566 | { |
---|
2399 | 2567 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
---|
.. | .. |
---|
2465 | 2633 | } |
---|
2466 | 2634 | spin_unlock(&ctl->tree_lock); |
---|
2467 | 2635 | |
---|
2468 | | - ret = btrfs_add_free_space(block_group, offset + bytes, |
---|
2469 | | - old_end - (offset + bytes)); |
---|
| 2636 | + ret = __btrfs_add_free_space(block_group->fs_info, ctl, |
---|
| 2637 | + offset + bytes, |
---|
| 2638 | + old_end - (offset + bytes), |
---|
| 2639 | + info->trim_state); |
---|
2470 | 2640 | WARN_ON(ret); |
---|
2471 | 2641 | goto out; |
---|
2472 | 2642 | } |
---|
.. | .. |
---|
2478 | 2648 | goto again; |
---|
2479 | 2649 | } |
---|
2480 | 2650 | out_lock: |
---|
| 2651 | + btrfs_discard_update_discardable(block_group, ctl); |
---|
2481 | 2652 | spin_unlock(&ctl->tree_lock); |
---|
2482 | 2653 | out: |
---|
2483 | 2654 | return ret; |
---|
2484 | 2655 | } |
---|
2485 | 2656 | |
---|
2486 | | -void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, |
---|
| 2657 | +void btrfs_dump_free_space(struct btrfs_block_group *block_group, |
---|
2487 | 2658 | u64 bytes) |
---|
2488 | 2659 | { |
---|
2489 | 2660 | struct btrfs_fs_info *fs_info = block_group->fs_info; |
---|
.. | .. |
---|
2508 | 2679 | "%d blocks of free space at or bigger than bytes is", count); |
---|
2509 | 2680 | } |
---|
2510 | 2681 | |
---|
2511 | | -void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) |
---|
| 2682 | +void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group) |
---|
2512 | 2683 | { |
---|
2513 | 2684 | struct btrfs_fs_info *fs_info = block_group->fs_info; |
---|
2514 | 2685 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
---|
2515 | 2686 | |
---|
2516 | 2687 | spin_lock_init(&ctl->tree_lock); |
---|
2517 | 2688 | ctl->unit = fs_info->sectorsize; |
---|
2518 | | - ctl->start = block_group->key.objectid; |
---|
| 2689 | + ctl->start = block_group->start; |
---|
2519 | 2690 | ctl->private = block_group; |
---|
2520 | 2691 | ctl->op = &free_space_op; |
---|
2521 | 2692 | INIT_LIST_HEAD(&ctl->trimming_ranges); |
---|
.. | .. |
---|
2535 | 2706 | * pointed to by the cluster, someone else raced in and freed the |
---|
2536 | 2707 | * cluster already. In that case, we just return without changing anything |
---|
2537 | 2708 | */ |
---|
2538 | | -static int |
---|
2539 | | -__btrfs_return_cluster_to_free_space( |
---|
2540 | | - struct btrfs_block_group_cache *block_group, |
---|
| 2709 | +static void __btrfs_return_cluster_to_free_space( |
---|
| 2710 | + struct btrfs_block_group *block_group, |
---|
2541 | 2711 | struct btrfs_free_cluster *cluster) |
---|
2542 | 2712 | { |
---|
2543 | 2713 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
---|
.. | .. |
---|
2545 | 2715 | struct rb_node *node; |
---|
2546 | 2716 | |
---|
2547 | 2717 | spin_lock(&cluster->lock); |
---|
2548 | | - if (cluster->block_group != block_group) |
---|
2549 | | - goto out; |
---|
| 2718 | + if (cluster->block_group != block_group) { |
---|
| 2719 | + spin_unlock(&cluster->lock); |
---|
| 2720 | + return; |
---|
| 2721 | + } |
---|
2550 | 2722 | |
---|
2551 | 2723 | cluster->block_group = NULL; |
---|
2552 | 2724 | cluster->window_start = 0; |
---|
.. | .. |
---|
2563 | 2735 | |
---|
2564 | 2736 | bitmap = (entry->bitmap != NULL); |
---|
2565 | 2737 | if (!bitmap) { |
---|
| 2738 | + /* Merging treats extents as if they were new */ |
---|
| 2739 | + if (!btrfs_free_space_trimmed(entry)) { |
---|
| 2740 | + ctl->discardable_extents[BTRFS_STAT_CURR]--; |
---|
| 2741 | + ctl->discardable_bytes[BTRFS_STAT_CURR] -= |
---|
| 2742 | + entry->bytes; |
---|
| 2743 | + } |
---|
| 2744 | + |
---|
2566 | 2745 | try_merge_free_space(ctl, entry, false); |
---|
2567 | 2746 | steal_from_bitmap(ctl, entry, false); |
---|
| 2747 | + |
---|
| 2748 | + /* As we insert directly, update these statistics */ |
---|
| 2749 | + if (!btrfs_free_space_trimmed(entry)) { |
---|
| 2750 | + ctl->discardable_extents[BTRFS_STAT_CURR]++; |
---|
| 2751 | + ctl->discardable_bytes[BTRFS_STAT_CURR] += |
---|
| 2752 | + entry->bytes; |
---|
| 2753 | + } |
---|
2568 | 2754 | } |
---|
2569 | 2755 | tree_insert_offset(&ctl->free_space_offset, |
---|
2570 | 2756 | entry->offset, &entry->offset_index, bitmap); |
---|
2571 | 2757 | } |
---|
2572 | 2758 | cluster->root = RB_ROOT; |
---|
2573 | | - |
---|
2574 | | -out: |
---|
2575 | 2759 | spin_unlock(&cluster->lock); |
---|
2576 | 2760 | btrfs_put_block_group(block_group); |
---|
2577 | | - return 0; |
---|
2578 | 2761 | } |
---|
2579 | 2762 | |
---|
2580 | 2763 | static void __btrfs_remove_free_space_cache_locked( |
---|
.. | .. |
---|
2600 | 2783 | { |
---|
2601 | 2784 | spin_lock(&ctl->tree_lock); |
---|
2602 | 2785 | __btrfs_remove_free_space_cache_locked(ctl); |
---|
| 2786 | + if (ctl->private) |
---|
| 2787 | + btrfs_discard_update_discardable(ctl->private, ctl); |
---|
2603 | 2788 | spin_unlock(&ctl->tree_lock); |
---|
2604 | 2789 | } |
---|
2605 | 2790 | |
---|
2606 | | -void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) |
---|
| 2791 | +void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group) |
---|
2607 | 2792 | { |
---|
2608 | 2793 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
---|
2609 | 2794 | struct btrfs_free_cluster *cluster; |
---|
.. | .. |
---|
2621 | 2806 | cond_resched_lock(&ctl->tree_lock); |
---|
2622 | 2807 | } |
---|
2623 | 2808 | __btrfs_remove_free_space_cache_locked(ctl); |
---|
| 2809 | + btrfs_discard_update_discardable(block_group, ctl); |
---|
2624 | 2810 | spin_unlock(&ctl->tree_lock); |
---|
2625 | 2811 | |
---|
2626 | 2812 | } |
---|
2627 | 2813 | |
---|
2628 | | -u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, |
---|
| 2814 | +/** |
---|
| 2815 | + * btrfs_is_free_space_trimmed - see if everything is trimmed |
---|
| 2816 | + * @block_group: block_group of interest |
---|
| 2817 | + * |
---|
| 2818 | + * Walk @block_group's free space rb_tree to determine if everything is trimmed. |
---|
| 2819 | + */ |
---|
| 2820 | +bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group) |
---|
| 2821 | +{ |
---|
| 2822 | + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
---|
| 2823 | + struct btrfs_free_space *info; |
---|
| 2824 | + struct rb_node *node; |
---|
| 2825 | + bool ret = true; |
---|
| 2826 | + |
---|
| 2827 | + spin_lock(&ctl->tree_lock); |
---|
| 2828 | + node = rb_first(&ctl->free_space_offset); |
---|
| 2829 | + |
---|
| 2830 | + while (node) { |
---|
| 2831 | + info = rb_entry(node, struct btrfs_free_space, offset_index); |
---|
| 2832 | + |
---|
| 2833 | + if (!btrfs_free_space_trimmed(info)) { |
---|
| 2834 | + ret = false; |
---|
| 2835 | + break; |
---|
| 2836 | + } |
---|
| 2837 | + |
---|
| 2838 | + node = rb_next(node); |
---|
| 2839 | + } |
---|
| 2840 | + |
---|
| 2841 | + spin_unlock(&ctl->tree_lock); |
---|
| 2842 | + return ret; |
---|
| 2843 | +} |
---|
| 2844 | + |
---|
| 2845 | +u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group, |
---|
2629 | 2846 | u64 offset, u64 bytes, u64 empty_size, |
---|
2630 | 2847 | u64 *max_extent_size) |
---|
2631 | 2848 | { |
---|
2632 | 2849 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
---|
| 2850 | + struct btrfs_discard_ctl *discard_ctl = |
---|
| 2851 | + &block_group->fs_info->discard_ctl; |
---|
2633 | 2852 | struct btrfs_free_space *entry = NULL; |
---|
2634 | 2853 | u64 bytes_search = bytes + empty_size; |
---|
2635 | 2854 | u64 ret = 0; |
---|
2636 | 2855 | u64 align_gap = 0; |
---|
2637 | 2856 | u64 align_gap_len = 0; |
---|
| 2857 | + enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED; |
---|
2638 | 2858 | |
---|
2639 | 2859 | spin_lock(&ctl->tree_lock); |
---|
2640 | 2860 | entry = find_free_space(ctl, &offset, &bytes_search, |
---|
.. | .. |
---|
2645 | 2865 | ret = offset; |
---|
2646 | 2866 | if (entry->bitmap) { |
---|
2647 | 2867 | bitmap_clear_bits(ctl, entry, offset, bytes); |
---|
| 2868 | + |
---|
| 2869 | + if (!btrfs_free_space_trimmed(entry)) |
---|
| 2870 | + atomic64_add(bytes, &discard_ctl->discard_bytes_saved); |
---|
| 2871 | + |
---|
2648 | 2872 | if (!entry->bytes) |
---|
2649 | 2873 | free_bitmap(ctl, entry); |
---|
2650 | 2874 | } else { |
---|
2651 | 2875 | unlink_free_space(ctl, entry); |
---|
2652 | 2876 | align_gap_len = offset - entry->offset; |
---|
2653 | 2877 | align_gap = entry->offset; |
---|
| 2878 | + align_gap_trim_state = entry->trim_state; |
---|
| 2879 | + |
---|
| 2880 | + if (!btrfs_free_space_trimmed(entry)) |
---|
| 2881 | + atomic64_add(bytes, &discard_ctl->discard_bytes_saved); |
---|
2654 | 2882 | |
---|
2655 | 2883 | entry->offset = offset + bytes; |
---|
2656 | 2884 | WARN_ON(entry->bytes < bytes + align_gap_len); |
---|
.. | .. |
---|
2662 | 2890 | link_free_space(ctl, entry); |
---|
2663 | 2891 | } |
---|
2664 | 2892 | out: |
---|
| 2893 | + btrfs_discard_update_discardable(block_group, ctl); |
---|
2665 | 2894 | spin_unlock(&ctl->tree_lock); |
---|
2666 | 2895 | |
---|
2667 | 2896 | if (align_gap_len) |
---|
2668 | 2897 | __btrfs_add_free_space(block_group->fs_info, ctl, |
---|
2669 | | - align_gap, align_gap_len); |
---|
| 2898 | + align_gap, align_gap_len, |
---|
| 2899 | + align_gap_trim_state); |
---|
2670 | 2900 | return ret; |
---|
2671 | 2901 | } |
---|
2672 | 2902 | |
---|
.. | .. |
---|
2678 | 2908 | * Otherwise, it'll get a reference on the block group pointed to by the |
---|
2679 | 2909 | * cluster and remove the cluster from it. |
---|
2680 | 2910 | */ |
---|
2681 | | -int btrfs_return_cluster_to_free_space( |
---|
2682 | | - struct btrfs_block_group_cache *block_group, |
---|
| 2911 | +void btrfs_return_cluster_to_free_space( |
---|
| 2912 | + struct btrfs_block_group *block_group, |
---|
2683 | 2913 | struct btrfs_free_cluster *cluster) |
---|
2684 | 2914 | { |
---|
2685 | 2915 | struct btrfs_free_space_ctl *ctl; |
---|
2686 | | - int ret; |
---|
2687 | 2916 | |
---|
2688 | 2917 | /* first, get a safe pointer to the block group */ |
---|
2689 | 2918 | spin_lock(&cluster->lock); |
---|
.. | .. |
---|
2691 | 2920 | block_group = cluster->block_group; |
---|
2692 | 2921 | if (!block_group) { |
---|
2693 | 2922 | spin_unlock(&cluster->lock); |
---|
2694 | | - return 0; |
---|
| 2923 | + return; |
---|
2695 | 2924 | } |
---|
2696 | 2925 | } else if (cluster->block_group != block_group) { |
---|
2697 | 2926 | /* someone else has already freed it don't redo their work */ |
---|
2698 | 2927 | spin_unlock(&cluster->lock); |
---|
2699 | | - return 0; |
---|
| 2928 | + return; |
---|
2700 | 2929 | } |
---|
2701 | | - atomic_inc(&block_group->count); |
---|
| 2930 | + btrfs_get_block_group(block_group); |
---|
2702 | 2931 | spin_unlock(&cluster->lock); |
---|
2703 | 2932 | |
---|
2704 | 2933 | ctl = block_group->free_space_ctl; |
---|
2705 | 2934 | |
---|
2706 | 2935 | /* now return any extents the cluster had on it */ |
---|
2707 | 2936 | spin_lock(&ctl->tree_lock); |
---|
2708 | | - ret = __btrfs_return_cluster_to_free_space(block_group, cluster); |
---|
| 2937 | + __btrfs_return_cluster_to_free_space(block_group, cluster); |
---|
2709 | 2938 | spin_unlock(&ctl->tree_lock); |
---|
| 2939 | + |
---|
| 2940 | + btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group); |
---|
2710 | 2941 | |
---|
2711 | 2942 | /* finally drop our ref */ |
---|
2712 | 2943 | btrfs_put_block_group(block_group); |
---|
2713 | | - return ret; |
---|
2714 | 2944 | } |
---|
2715 | 2945 | |
---|
2716 | | -static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, |
---|
| 2946 | +static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group, |
---|
2717 | 2947 | struct btrfs_free_cluster *cluster, |
---|
2718 | 2948 | struct btrfs_free_space *entry, |
---|
2719 | 2949 | u64 bytes, u64 min_start, |
---|
.. | .. |
---|
2746 | 2976 | * if it couldn't find anything suitably large, or a logical disk offset |
---|
2747 | 2977 | * if things worked out |
---|
2748 | 2978 | */ |
---|
2749 | | -u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, |
---|
| 2979 | +u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group, |
---|
2750 | 2980 | struct btrfs_free_cluster *cluster, u64 bytes, |
---|
2751 | 2981 | u64 min_start, u64 *max_extent_size) |
---|
2752 | 2982 | { |
---|
2753 | 2983 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
---|
| 2984 | + struct btrfs_discard_ctl *discard_ctl = |
---|
| 2985 | + &block_group->fs_info->discard_ctl; |
---|
2754 | 2986 | struct btrfs_free_space *entry = NULL; |
---|
2755 | 2987 | struct rb_node *node; |
---|
2756 | 2988 | u64 ret = 0; |
---|
.. | .. |
---|
2803 | 3035 | entry->bytes -= bytes; |
---|
2804 | 3036 | } |
---|
2805 | 3037 | |
---|
2806 | | - if (entry->bytes == 0) |
---|
2807 | | - rb_erase(&entry->offset_index, &cluster->root); |
---|
2808 | 3038 | break; |
---|
2809 | 3039 | } |
---|
2810 | 3040 | out: |
---|
.. | .. |
---|
2815 | 3045 | |
---|
2816 | 3046 | spin_lock(&ctl->tree_lock); |
---|
2817 | 3047 | |
---|
| 3048 | + if (!btrfs_free_space_trimmed(entry)) |
---|
| 3049 | + atomic64_add(bytes, &discard_ctl->discard_bytes_saved); |
---|
| 3050 | + |
---|
2818 | 3051 | ctl->free_space -= bytes; |
---|
| 3052 | + if (!entry->bitmap && !btrfs_free_space_trimmed(entry)) |
---|
| 3053 | + ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; |
---|
| 3054 | + |
---|
| 3055 | + spin_lock(&cluster->lock); |
---|
2819 | 3056 | if (entry->bytes == 0) { |
---|
| 3057 | + rb_erase(&entry->offset_index, &cluster->root); |
---|
2820 | 3058 | ctl->free_extents--; |
---|
2821 | 3059 | if (entry->bitmap) { |
---|
2822 | 3060 | kmem_cache_free(btrfs_free_space_bitmap_cachep, |
---|
2823 | 3061 | entry->bitmap); |
---|
2824 | 3062 | ctl->total_bitmaps--; |
---|
2825 | 3063 | ctl->op->recalc_thresholds(ctl); |
---|
| 3064 | + } else if (!btrfs_free_space_trimmed(entry)) { |
---|
| 3065 | + ctl->discardable_extents[BTRFS_STAT_CURR]--; |
---|
2826 | 3066 | } |
---|
2827 | 3067 | kmem_cache_free(btrfs_free_space_cachep, entry); |
---|
2828 | 3068 | } |
---|
2829 | 3069 | |
---|
| 3070 | + spin_unlock(&cluster->lock); |
---|
2830 | 3071 | spin_unlock(&ctl->tree_lock); |
---|
2831 | 3072 | |
---|
2832 | 3073 | return ret; |
---|
2833 | 3074 | } |
---|
2834 | 3075 | |
---|
2835 | | -static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, |
---|
| 3076 | +static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group, |
---|
2836 | 3077 | struct btrfs_free_space *entry, |
---|
2837 | 3078 | struct btrfs_free_cluster *cluster, |
---|
2838 | 3079 | u64 offset, u64 bytes, |
---|
.. | .. |
---|
2914 | 3155 | * extent of cont1_bytes, and other clusters of at least min_bytes. |
---|
2915 | 3156 | */ |
---|
2916 | 3157 | static noinline int |
---|
2917 | | -setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, |
---|
| 3158 | +setup_cluster_no_bitmap(struct btrfs_block_group *block_group, |
---|
2918 | 3159 | struct btrfs_free_cluster *cluster, |
---|
2919 | 3160 | struct list_head *bitmaps, u64 offset, u64 bytes, |
---|
2920 | 3161 | u64 cont1_bytes, u64 min_bytes) |
---|
.. | .. |
---|
3005 | 3246 | * that we have already failed to find extents that will work. |
---|
3006 | 3247 | */ |
---|
3007 | 3248 | static noinline int |
---|
3008 | | -setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, |
---|
| 3249 | +setup_cluster_bitmap(struct btrfs_block_group *block_group, |
---|
3009 | 3250 | struct btrfs_free_cluster *cluster, |
---|
3010 | 3251 | struct list_head *bitmaps, u64 offset, u64 bytes, |
---|
3011 | 3252 | u64 cont1_bytes, u64 min_bytes) |
---|
.. | .. |
---|
3055 | 3296 | * returns zero and sets up cluster if things worked out, otherwise |
---|
3056 | 3297 | * it returns -enospc |
---|
3057 | 3298 | */ |
---|
3058 | | -int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info, |
---|
3059 | | - struct btrfs_block_group_cache *block_group, |
---|
| 3299 | +int btrfs_find_space_cluster(struct btrfs_block_group *block_group, |
---|
3060 | 3300 | struct btrfs_free_cluster *cluster, |
---|
3061 | 3301 | u64 offset, u64 bytes, u64 empty_size) |
---|
3062 | 3302 | { |
---|
| 3303 | + struct btrfs_fs_info *fs_info = block_group->fs_info; |
---|
3063 | 3304 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
---|
3064 | 3305 | struct btrfs_free_space *entry, *tmp; |
---|
3065 | 3306 | LIST_HEAD(bitmaps); |
---|
.. | .. |
---|
3118 | 3359 | list_del_init(&entry->list); |
---|
3119 | 3360 | |
---|
3120 | 3361 | if (!ret) { |
---|
3121 | | - atomic_inc(&block_group->count); |
---|
| 3362 | + btrfs_get_block_group(block_group); |
---|
3122 | 3363 | list_add_tail(&cluster->block_group_list, |
---|
3123 | 3364 | &block_group->cluster_list); |
---|
3124 | 3365 | cluster->block_group = block_group; |
---|
.. | .. |
---|
3146 | 3387 | cluster->block_group = NULL; |
---|
3147 | 3388 | } |
---|
3148 | 3389 | |
---|
3149 | | -static int do_trimming(struct btrfs_block_group_cache *block_group, |
---|
| 3390 | +static int do_trimming(struct btrfs_block_group *block_group, |
---|
3150 | 3391 | u64 *total_trimmed, u64 start, u64 bytes, |
---|
3151 | 3392 | u64 reserved_start, u64 reserved_bytes, |
---|
| 3393 | + enum btrfs_trim_state reserved_trim_state, |
---|
3152 | 3394 | struct btrfs_trim_range *trim_entry) |
---|
3153 | 3395 | { |
---|
3154 | 3396 | struct btrfs_space_info *space_info = block_group->space_info; |
---|
.. | .. |
---|
3156 | 3398 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
---|
3157 | 3399 | int ret; |
---|
3158 | 3400 | int update = 0; |
---|
| 3401 | + const u64 end = start + bytes; |
---|
| 3402 | + const u64 reserved_end = reserved_start + reserved_bytes; |
---|
| 3403 | + enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; |
---|
3159 | 3404 | u64 trimmed = 0; |
---|
3160 | 3405 | |
---|
3161 | 3406 | spin_lock(&space_info->lock); |
---|
.. | .. |
---|
3169 | 3414 | spin_unlock(&space_info->lock); |
---|
3170 | 3415 | |
---|
3171 | 3416 | ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); |
---|
3172 | | - if (!ret) |
---|
| 3417 | + if (!ret) { |
---|
3173 | 3418 | *total_trimmed += trimmed; |
---|
| 3419 | + trim_state = BTRFS_TRIM_STATE_TRIMMED; |
---|
| 3420 | + } |
---|
3174 | 3421 | |
---|
3175 | 3422 | mutex_lock(&ctl->cache_writeout_mutex); |
---|
3176 | | - btrfs_add_free_space(block_group, reserved_start, reserved_bytes); |
---|
| 3423 | + if (reserved_start < start) |
---|
| 3424 | + __btrfs_add_free_space(fs_info, ctl, reserved_start, |
---|
| 3425 | + start - reserved_start, |
---|
| 3426 | + reserved_trim_state); |
---|
| 3427 | + if (start + bytes < reserved_start + reserved_bytes) |
---|
| 3428 | + __btrfs_add_free_space(fs_info, ctl, end, reserved_end - end, |
---|
| 3429 | + reserved_trim_state); |
---|
| 3430 | + __btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state); |
---|
3177 | 3431 | list_del(&trim_entry->list); |
---|
3178 | 3432 | mutex_unlock(&ctl->cache_writeout_mutex); |
---|
3179 | 3433 | |
---|
.. | .. |
---|
3184 | 3438 | space_info->bytes_readonly += reserved_bytes; |
---|
3185 | 3439 | block_group->reserved -= reserved_bytes; |
---|
3186 | 3440 | space_info->bytes_reserved -= reserved_bytes; |
---|
3187 | | - spin_unlock(&space_info->lock); |
---|
3188 | 3441 | spin_unlock(&block_group->lock); |
---|
| 3442 | + spin_unlock(&space_info->lock); |
---|
3189 | 3443 | } |
---|
3190 | 3444 | |
---|
3191 | 3445 | return ret; |
---|
3192 | 3446 | } |
---|
3193 | 3447 | |
---|
3194 | | -static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, |
---|
3195 | | - u64 *total_trimmed, u64 start, u64 end, u64 minlen) |
---|
| 3448 | +/* |
---|
| 3449 | + * If @async is set, then we will trim 1 region and return. |
---|
| 3450 | + */ |
---|
| 3451 | +static int trim_no_bitmap(struct btrfs_block_group *block_group, |
---|
| 3452 | + u64 *total_trimmed, u64 start, u64 end, u64 minlen, |
---|
| 3453 | + bool async) |
---|
3196 | 3454 | { |
---|
| 3455 | + struct btrfs_discard_ctl *discard_ctl = |
---|
| 3456 | + &block_group->fs_info->discard_ctl; |
---|
3197 | 3457 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
---|
3198 | 3458 | struct btrfs_free_space *entry; |
---|
3199 | 3459 | struct rb_node *node; |
---|
3200 | 3460 | int ret = 0; |
---|
3201 | 3461 | u64 extent_start; |
---|
3202 | 3462 | u64 extent_bytes; |
---|
| 3463 | + enum btrfs_trim_state extent_trim_state; |
---|
3203 | 3464 | u64 bytes; |
---|
| 3465 | + const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size); |
---|
3204 | 3466 | |
---|
3205 | 3467 | while (start < end) { |
---|
3206 | 3468 | struct btrfs_trim_range trim_entry; |
---|
.. | .. |
---|
3208 | 3470 | mutex_lock(&ctl->cache_writeout_mutex); |
---|
3209 | 3471 | spin_lock(&ctl->tree_lock); |
---|
3210 | 3472 | |
---|
3211 | | - if (ctl->free_space < minlen) { |
---|
3212 | | - spin_unlock(&ctl->tree_lock); |
---|
3213 | | - mutex_unlock(&ctl->cache_writeout_mutex); |
---|
3214 | | - break; |
---|
3215 | | - } |
---|
| 3473 | + if (ctl->free_space < minlen) |
---|
| 3474 | + goto out_unlock; |
---|
3216 | 3475 | |
---|
3217 | 3476 | entry = tree_search_offset(ctl, start, 0, 1); |
---|
3218 | | - if (!entry) { |
---|
3219 | | - spin_unlock(&ctl->tree_lock); |
---|
3220 | | - mutex_unlock(&ctl->cache_writeout_mutex); |
---|
3221 | | - break; |
---|
3222 | | - } |
---|
| 3477 | + if (!entry) |
---|
| 3478 | + goto out_unlock; |
---|
3223 | 3479 | |
---|
3224 | | - /* skip bitmaps */ |
---|
3225 | | - while (entry->bitmap) { |
---|
| 3480 | + /* Skip bitmaps and if async, already trimmed entries */ |
---|
| 3481 | + while (entry->bitmap || |
---|
| 3482 | + (async && btrfs_free_space_trimmed(entry))) { |
---|
3226 | 3483 | node = rb_next(&entry->offset_index); |
---|
3227 | | - if (!node) { |
---|
3228 | | - spin_unlock(&ctl->tree_lock); |
---|
3229 | | - mutex_unlock(&ctl->cache_writeout_mutex); |
---|
3230 | | - goto out; |
---|
3231 | | - } |
---|
| 3484 | + if (!node) |
---|
| 3485 | + goto out_unlock; |
---|
3232 | 3486 | entry = rb_entry(node, struct btrfs_free_space, |
---|
3233 | 3487 | offset_index); |
---|
3234 | 3488 | } |
---|
3235 | 3489 | |
---|
3236 | | - if (entry->offset >= end) { |
---|
3237 | | - spin_unlock(&ctl->tree_lock); |
---|
3238 | | - mutex_unlock(&ctl->cache_writeout_mutex); |
---|
3239 | | - break; |
---|
3240 | | - } |
---|
| 3490 | + if (entry->offset >= end) |
---|
| 3491 | + goto out_unlock; |
---|
3241 | 3492 | |
---|
3242 | 3493 | extent_start = entry->offset; |
---|
3243 | 3494 | extent_bytes = entry->bytes; |
---|
3244 | | - start = max(start, extent_start); |
---|
3245 | | - bytes = min(extent_start + extent_bytes, end) - start; |
---|
3246 | | - if (bytes < minlen) { |
---|
3247 | | - spin_unlock(&ctl->tree_lock); |
---|
3248 | | - mutex_unlock(&ctl->cache_writeout_mutex); |
---|
3249 | | - goto next; |
---|
3250 | | - } |
---|
| 3495 | + extent_trim_state = entry->trim_state; |
---|
| 3496 | + if (async) { |
---|
| 3497 | + start = entry->offset; |
---|
| 3498 | + bytes = entry->bytes; |
---|
| 3499 | + if (bytes < minlen) { |
---|
| 3500 | + spin_unlock(&ctl->tree_lock); |
---|
| 3501 | + mutex_unlock(&ctl->cache_writeout_mutex); |
---|
| 3502 | + goto next; |
---|
| 3503 | + } |
---|
| 3504 | + unlink_free_space(ctl, entry); |
---|
| 3505 | + /* |
---|
| 3506 | + * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. |
---|
| 3507 | + * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim |
---|
| 3508 | + * X when we come back around. So trim it now. |
---|
| 3509 | + */ |
---|
| 3510 | + if (max_discard_size && |
---|
| 3511 | + bytes >= (max_discard_size + |
---|
| 3512 | + BTRFS_ASYNC_DISCARD_MIN_FILTER)) { |
---|
| 3513 | + bytes = max_discard_size; |
---|
| 3514 | + extent_bytes = max_discard_size; |
---|
| 3515 | + entry->offset += max_discard_size; |
---|
| 3516 | + entry->bytes -= max_discard_size; |
---|
| 3517 | + link_free_space(ctl, entry); |
---|
| 3518 | + } else { |
---|
| 3519 | + kmem_cache_free(btrfs_free_space_cachep, entry); |
---|
| 3520 | + } |
---|
| 3521 | + } else { |
---|
| 3522 | + start = max(start, extent_start); |
---|
| 3523 | + bytes = min(extent_start + extent_bytes, end) - start; |
---|
| 3524 | + if (bytes < minlen) { |
---|
| 3525 | + spin_unlock(&ctl->tree_lock); |
---|
| 3526 | + mutex_unlock(&ctl->cache_writeout_mutex); |
---|
| 3527 | + goto next; |
---|
| 3528 | + } |
---|
3251 | 3529 | |
---|
3252 | | - unlink_free_space(ctl, entry); |
---|
3253 | | - kmem_cache_free(btrfs_free_space_cachep, entry); |
---|
| 3530 | + unlink_free_space(ctl, entry); |
---|
| 3531 | + kmem_cache_free(btrfs_free_space_cachep, entry); |
---|
| 3532 | + } |
---|
3254 | 3533 | |
---|
3255 | 3534 | spin_unlock(&ctl->tree_lock); |
---|
3256 | 3535 | trim_entry.start = extent_start; |
---|
.. | .. |
---|
3259 | 3538 | mutex_unlock(&ctl->cache_writeout_mutex); |
---|
3260 | 3539 | |
---|
3261 | 3540 | ret = do_trimming(block_group, total_trimmed, start, bytes, |
---|
3262 | | - extent_start, extent_bytes, &trim_entry); |
---|
3263 | | - if (ret) |
---|
| 3541 | + extent_start, extent_bytes, extent_trim_state, |
---|
| 3542 | + &trim_entry); |
---|
| 3543 | + if (ret) { |
---|
| 3544 | + block_group->discard_cursor = start + bytes; |
---|
3264 | 3545 | break; |
---|
| 3546 | + } |
---|
3265 | 3547 | next: |
---|
3266 | 3548 | start += bytes; |
---|
| 3549 | + block_group->discard_cursor = start; |
---|
| 3550 | + if (async && *total_trimmed) |
---|
| 3551 | + break; |
---|
3267 | 3552 | |
---|
3268 | 3553 | if (fatal_signal_pending(current)) { |
---|
3269 | 3554 | ret = -ERESTARTSYS; |
---|
.. | .. |
---|
3272 | 3557 | |
---|
3273 | 3558 | cond_resched(); |
---|
3274 | 3559 | } |
---|
3275 | | -out: |
---|
| 3560 | + |
---|
| 3561 | + return ret; |
---|
| 3562 | + |
---|
| 3563 | +out_unlock: |
---|
| 3564 | + block_group->discard_cursor = btrfs_block_group_end(block_group); |
---|
| 3565 | + spin_unlock(&ctl->tree_lock); |
---|
| 3566 | + mutex_unlock(&ctl->cache_writeout_mutex); |
---|
| 3567 | + |
---|
3276 | 3568 | return ret; |
---|
3277 | 3569 | } |
---|
3278 | 3570 | |
---|
3279 | | -static int trim_bitmaps(struct btrfs_block_group_cache *block_group, |
---|
3280 | | - u64 *total_trimmed, u64 start, u64 end, u64 minlen) |
---|
| 3571 | +/* |
---|
| 3572 | + * If we break out of trimming a bitmap prematurely, we should reset the |
---|
| 3573 | + * trimming bit. In a rather contrieved case, it's possible to race here so |
---|
| 3574 | + * reset the state to BTRFS_TRIM_STATE_UNTRIMMED. |
---|
| 3575 | + * |
---|
| 3576 | + * start = start of bitmap |
---|
| 3577 | + * end = near end of bitmap |
---|
| 3578 | + * |
---|
| 3579 | + * Thread 1: Thread 2: |
---|
| 3580 | + * trim_bitmaps(start) |
---|
| 3581 | + * trim_bitmaps(end) |
---|
| 3582 | + * end_trimming_bitmap() |
---|
| 3583 | + * reset_trimming_bitmap() |
---|
| 3584 | + */ |
---|
| 3585 | +static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset) |
---|
3281 | 3586 | { |
---|
| 3587 | + struct btrfs_free_space *entry; |
---|
| 3588 | + |
---|
| 3589 | + spin_lock(&ctl->tree_lock); |
---|
| 3590 | + entry = tree_search_offset(ctl, offset, 1, 0); |
---|
| 3591 | + if (entry) { |
---|
| 3592 | + if (btrfs_free_space_trimmed(entry)) { |
---|
| 3593 | + ctl->discardable_extents[BTRFS_STAT_CURR] += |
---|
| 3594 | + entry->bitmap_extents; |
---|
| 3595 | + ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes; |
---|
| 3596 | + } |
---|
| 3597 | + entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; |
---|
| 3598 | + } |
---|
| 3599 | + |
---|
| 3600 | + spin_unlock(&ctl->tree_lock); |
---|
| 3601 | +} |
---|
| 3602 | + |
---|
| 3603 | +static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl, |
---|
| 3604 | + struct btrfs_free_space *entry) |
---|
| 3605 | +{ |
---|
| 3606 | + if (btrfs_free_space_trimming_bitmap(entry)) { |
---|
| 3607 | + entry->trim_state = BTRFS_TRIM_STATE_TRIMMED; |
---|
| 3608 | + ctl->discardable_extents[BTRFS_STAT_CURR] -= |
---|
| 3609 | + entry->bitmap_extents; |
---|
| 3610 | + ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes; |
---|
| 3611 | + } |
---|
| 3612 | +} |
---|
| 3613 | + |
---|
| 3614 | +/* |
---|
| 3615 | + * If @async is set, then we will trim 1 region and return. |
---|
| 3616 | + */ |
---|
| 3617 | +static int trim_bitmaps(struct btrfs_block_group *block_group, |
---|
| 3618 | + u64 *total_trimmed, u64 start, u64 end, u64 minlen, |
---|
| 3619 | + u64 maxlen, bool async) |
---|
| 3620 | +{ |
---|
| 3621 | + struct btrfs_discard_ctl *discard_ctl = |
---|
| 3622 | + &block_group->fs_info->discard_ctl; |
---|
3282 | 3623 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
---|
3283 | 3624 | struct btrfs_free_space *entry; |
---|
3284 | 3625 | int ret = 0; |
---|
3285 | 3626 | int ret2; |
---|
3286 | 3627 | u64 bytes; |
---|
3287 | 3628 | u64 offset = offset_to_bitmap(ctl, start); |
---|
| 3629 | + const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size); |
---|
3288 | 3630 | |
---|
3289 | 3631 | while (offset < end) { |
---|
3290 | 3632 | bool next_bitmap = false; |
---|
.. | .. |
---|
3294 | 3636 | spin_lock(&ctl->tree_lock); |
---|
3295 | 3637 | |
---|
3296 | 3638 | if (ctl->free_space < minlen) { |
---|
| 3639 | + block_group->discard_cursor = |
---|
| 3640 | + btrfs_block_group_end(block_group); |
---|
3297 | 3641 | spin_unlock(&ctl->tree_lock); |
---|
3298 | 3642 | mutex_unlock(&ctl->cache_writeout_mutex); |
---|
3299 | 3643 | break; |
---|
3300 | 3644 | } |
---|
3301 | 3645 | |
---|
3302 | 3646 | entry = tree_search_offset(ctl, offset, 1, 0); |
---|
3303 | | - if (!entry) { |
---|
| 3647 | + /* |
---|
| 3648 | + * Bitmaps are marked trimmed lossily now to prevent constant |
---|
| 3649 | + * discarding of the same bitmap (the reason why we are bound |
---|
| 3650 | + * by the filters). So, retrim the block group bitmaps when we |
---|
| 3651 | + * are preparing to punt to the unused_bgs list. This uses |
---|
| 3652 | + * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED |
---|
| 3653 | + * which is the only discard index which sets minlen to 0. |
---|
| 3654 | + */ |
---|
| 3655 | + if (!entry || (async && minlen && start == offset && |
---|
| 3656 | + btrfs_free_space_trimmed(entry))) { |
---|
3304 | 3657 | spin_unlock(&ctl->tree_lock); |
---|
3305 | 3658 | mutex_unlock(&ctl->cache_writeout_mutex); |
---|
3306 | 3659 | next_bitmap = true; |
---|
3307 | 3660 | goto next; |
---|
3308 | 3661 | } |
---|
| 3662 | + |
---|
| 3663 | + /* |
---|
| 3664 | + * Async discard bitmap trimming begins at by setting the start |
---|
| 3665 | + * to be key.objectid and the offset_to_bitmap() aligns to the |
---|
| 3666 | + * start of the bitmap. This lets us know we are fully |
---|
| 3667 | + * scanning the bitmap rather than only some portion of it. |
---|
| 3668 | + */ |
---|
| 3669 | + if (start == offset) |
---|
| 3670 | + entry->trim_state = BTRFS_TRIM_STATE_TRIMMING; |
---|
3309 | 3671 | |
---|
3310 | 3672 | bytes = minlen; |
---|
3311 | 3673 | ret2 = search_bitmap(ctl, entry, &start, &bytes, false); |
---|
3312 | 3674 | if (ret2 || start >= end) { |
---|
| 3675 | + /* |
---|
| 3676 | + * We lossily consider a bitmap trimmed if we only skip |
---|
| 3677 | + * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER. |
---|
| 3678 | + */ |
---|
| 3679 | + if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER) |
---|
| 3680 | + end_trimming_bitmap(ctl, entry); |
---|
| 3681 | + else |
---|
| 3682 | + entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; |
---|
3313 | 3683 | spin_unlock(&ctl->tree_lock); |
---|
3314 | 3684 | mutex_unlock(&ctl->cache_writeout_mutex); |
---|
3315 | 3685 | next_bitmap = true; |
---|
3316 | 3686 | goto next; |
---|
3317 | 3687 | } |
---|
3318 | 3688 | |
---|
| 3689 | + /* |
---|
| 3690 | + * We already trimmed a region, but are using the locking above |
---|
| 3691 | + * to reset the trim_state. |
---|
| 3692 | + */ |
---|
| 3693 | + if (async && *total_trimmed) { |
---|
| 3694 | + spin_unlock(&ctl->tree_lock); |
---|
| 3695 | + mutex_unlock(&ctl->cache_writeout_mutex); |
---|
| 3696 | + goto out; |
---|
| 3697 | + } |
---|
| 3698 | + |
---|
3319 | 3699 | bytes = min(bytes, end - start); |
---|
3320 | | - if (bytes < minlen) { |
---|
| 3700 | + if (bytes < minlen || (async && maxlen && bytes > maxlen)) { |
---|
3321 | 3701 | spin_unlock(&ctl->tree_lock); |
---|
3322 | 3702 | mutex_unlock(&ctl->cache_writeout_mutex); |
---|
3323 | 3703 | goto next; |
---|
3324 | 3704 | } |
---|
| 3705 | + |
---|
| 3706 | + /* |
---|
| 3707 | + * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. |
---|
| 3708 | + * If X < @minlen, we won't trim X when we come back around. |
---|
| 3709 | + * So trim it now. We differ here from trimming extents as we |
---|
| 3710 | + * don't keep individual state per bit. |
---|
| 3711 | + */ |
---|
| 3712 | + if (async && |
---|
| 3713 | + max_discard_size && |
---|
| 3714 | + bytes > (max_discard_size + minlen)) |
---|
| 3715 | + bytes = max_discard_size; |
---|
3325 | 3716 | |
---|
3326 | 3717 | bitmap_clear_bits(ctl, entry, start, bytes); |
---|
3327 | 3718 | if (entry->bytes == 0) |
---|
.. | .. |
---|
3334 | 3725 | mutex_unlock(&ctl->cache_writeout_mutex); |
---|
3335 | 3726 | |
---|
3336 | 3727 | ret = do_trimming(block_group, total_trimmed, start, bytes, |
---|
3337 | | - start, bytes, &trim_entry); |
---|
3338 | | - if (ret) |
---|
| 3728 | + start, bytes, 0, &trim_entry); |
---|
| 3729 | + if (ret) { |
---|
| 3730 | + reset_trimming_bitmap(ctl, offset); |
---|
| 3731 | + block_group->discard_cursor = |
---|
| 3732 | + btrfs_block_group_end(block_group); |
---|
3339 | 3733 | break; |
---|
| 3734 | + } |
---|
3340 | 3735 | next: |
---|
3341 | 3736 | if (next_bitmap) { |
---|
3342 | 3737 | offset += BITS_PER_BITMAP * ctl->unit; |
---|
| 3738 | + start = offset; |
---|
3343 | 3739 | } else { |
---|
3344 | 3740 | start += bytes; |
---|
3345 | | - if (start >= offset + BITS_PER_BITMAP * ctl->unit) |
---|
3346 | | - offset += BITS_PER_BITMAP * ctl->unit; |
---|
3347 | 3741 | } |
---|
| 3742 | + block_group->discard_cursor = start; |
---|
3348 | 3743 | |
---|
3349 | 3744 | if (fatal_signal_pending(current)) { |
---|
| 3745 | + if (start != offset) |
---|
| 3746 | + reset_trimming_bitmap(ctl, offset); |
---|
3350 | 3747 | ret = -ERESTARTSYS; |
---|
3351 | 3748 | break; |
---|
3352 | 3749 | } |
---|
.. | .. |
---|
3354 | 3751 | cond_resched(); |
---|
3355 | 3752 | } |
---|
3356 | 3753 | |
---|
| 3754 | + if (offset >= end) |
---|
| 3755 | + block_group->discard_cursor = end; |
---|
| 3756 | + |
---|
| 3757 | +out: |
---|
3357 | 3758 | return ret; |
---|
3358 | 3759 | } |
---|
3359 | 3760 | |
---|
3360 | | -void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache) |
---|
| 3761 | +int btrfs_trim_block_group(struct btrfs_block_group *block_group, |
---|
| 3762 | + u64 *trimmed, u64 start, u64 end, u64 minlen) |
---|
3361 | 3763 | { |
---|
3362 | | - atomic_inc(&cache->trimming); |
---|
3363 | | -} |
---|
| 3764 | + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
---|
| 3765 | + int ret; |
---|
| 3766 | + u64 rem = 0; |
---|
3364 | 3767 | |
---|
3365 | | -void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) |
---|
3366 | | -{ |
---|
3367 | | - struct btrfs_fs_info *fs_info = block_group->fs_info; |
---|
3368 | | - struct extent_map_tree *em_tree; |
---|
3369 | | - struct extent_map *em; |
---|
3370 | | - bool cleanup; |
---|
| 3768 | + *trimmed = 0; |
---|
3371 | 3769 | |
---|
3372 | 3770 | spin_lock(&block_group->lock); |
---|
3373 | | - cleanup = (atomic_dec_and_test(&block_group->trimming) && |
---|
3374 | | - block_group->removed); |
---|
| 3771 | + if (block_group->removed) { |
---|
| 3772 | + spin_unlock(&block_group->lock); |
---|
| 3773 | + return 0; |
---|
| 3774 | + } |
---|
| 3775 | + btrfs_freeze_block_group(block_group); |
---|
3375 | 3776 | spin_unlock(&block_group->lock); |
---|
3376 | 3777 | |
---|
3377 | | - if (cleanup) { |
---|
3378 | | - mutex_lock(&fs_info->chunk_mutex); |
---|
3379 | | - em_tree = &fs_info->mapping_tree.map_tree; |
---|
3380 | | - write_lock(&em_tree->lock); |
---|
3381 | | - em = lookup_extent_mapping(em_tree, block_group->key.objectid, |
---|
3382 | | - 1); |
---|
3383 | | - BUG_ON(!em); /* logic error, can't happen */ |
---|
3384 | | - /* |
---|
3385 | | - * remove_extent_mapping() will delete us from the pinned_chunks |
---|
3386 | | - * list, which is protected by the chunk mutex. |
---|
3387 | | - */ |
---|
3388 | | - remove_extent_mapping(em_tree, em); |
---|
3389 | | - write_unlock(&em_tree->lock); |
---|
3390 | | - mutex_unlock(&fs_info->chunk_mutex); |
---|
| 3778 | + ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false); |
---|
| 3779 | + if (ret) |
---|
| 3780 | + goto out; |
---|
3391 | 3781 | |
---|
3392 | | - /* once for us and once for the tree */ |
---|
3393 | | - free_extent_map(em); |
---|
3394 | | - free_extent_map(em); |
---|
3395 | | - |
---|
3396 | | - /* |
---|
3397 | | - * We've left one free space entry and other tasks trimming |
---|
3398 | | - * this block group have left 1 entry each one. Free them. |
---|
3399 | | - */ |
---|
3400 | | - __btrfs_remove_free_space_cache(block_group->free_space_ctl); |
---|
3401 | | - } |
---|
| 3782 | + ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false); |
---|
| 3783 | + div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem); |
---|
| 3784 | + /* If we ended in the middle of a bitmap, reset the trimming flag */ |
---|
| 3785 | + if (rem) |
---|
| 3786 | + reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end)); |
---|
| 3787 | +out: |
---|
| 3788 | + btrfs_unfreeze_block_group(block_group); |
---|
| 3789 | + return ret; |
---|
3402 | 3790 | } |
---|
3403 | 3791 | |
---|
3404 | | -int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, |
---|
3405 | | - u64 *trimmed, u64 start, u64 end, u64 minlen) |
---|
| 3792 | +int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group, |
---|
| 3793 | + u64 *trimmed, u64 start, u64 end, u64 minlen, |
---|
| 3794 | + bool async) |
---|
3406 | 3795 | { |
---|
3407 | 3796 | int ret; |
---|
3408 | 3797 | |
---|
.. | .. |
---|
3413 | 3802 | spin_unlock(&block_group->lock); |
---|
3414 | 3803 | return 0; |
---|
3415 | 3804 | } |
---|
3416 | | - btrfs_get_block_group_trimming(block_group); |
---|
| 3805 | + btrfs_freeze_block_group(block_group); |
---|
3417 | 3806 | spin_unlock(&block_group->lock); |
---|
3418 | 3807 | |
---|
3419 | | - ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); |
---|
3420 | | - if (ret) |
---|
3421 | | - goto out; |
---|
| 3808 | + ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async); |
---|
| 3809 | + btrfs_unfreeze_block_group(block_group); |
---|
3422 | 3810 | |
---|
3423 | | - ret = trim_bitmaps(block_group, trimmed, start, end, minlen); |
---|
3424 | | -out: |
---|
3425 | | - btrfs_put_block_group_trimming(block_group); |
---|
| 3811 | + return ret; |
---|
| 3812 | +} |
---|
| 3813 | + |
---|
| 3814 | +int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group, |
---|
| 3815 | + u64 *trimmed, u64 start, u64 end, u64 minlen, |
---|
| 3816 | + u64 maxlen, bool async) |
---|
| 3817 | +{ |
---|
| 3818 | + int ret; |
---|
| 3819 | + |
---|
| 3820 | + *trimmed = 0; |
---|
| 3821 | + |
---|
| 3822 | + spin_lock(&block_group->lock); |
---|
| 3823 | + if (block_group->removed) { |
---|
| 3824 | + spin_unlock(&block_group->lock); |
---|
| 3825 | + return 0; |
---|
| 3826 | + } |
---|
| 3827 | + btrfs_freeze_block_group(block_group); |
---|
| 3828 | + spin_unlock(&block_group->lock); |
---|
| 3829 | + |
---|
| 3830 | + ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen, |
---|
| 3831 | + async); |
---|
| 3832 | + |
---|
| 3833 | + btrfs_unfreeze_block_group(block_group); |
---|
| 3834 | + |
---|
3426 | 3835 | return ret; |
---|
3427 | 3836 | } |
---|
3428 | 3837 | |
---|
.. | .. |
---|
3582 | 3991 | if (release_metadata) |
---|
3583 | 3992 | btrfs_delalloc_release_metadata(BTRFS_I(inode), |
---|
3584 | 3993 | inode->i_size, true); |
---|
3585 | | -#ifdef DEBUG |
---|
3586 | | - btrfs_err(fs_info, |
---|
3587 | | - "failed to write free ino cache for root %llu", |
---|
3588 | | - root->root_key.objectid); |
---|
3589 | | -#endif |
---|
| 3994 | + btrfs_debug(fs_info, |
---|
| 3995 | + "failed to write free ino cache for root %llu error %d", |
---|
| 3996 | + root->root_key.objectid, ret); |
---|
3590 | 3997 | } |
---|
3591 | 3998 | |
---|
3592 | 3999 | return ret; |
---|
.. | .. |
---|
3599 | 4006 | * how the free space cache loading stuff works, so you can get really weird |
---|
3600 | 4007 | * configurations. |
---|
3601 | 4008 | */ |
---|
3602 | | -int test_add_free_space_entry(struct btrfs_block_group_cache *cache, |
---|
| 4009 | +int test_add_free_space_entry(struct btrfs_block_group *cache, |
---|
3603 | 4010 | u64 offset, u64 bytes, bool bitmap) |
---|
3604 | 4011 | { |
---|
3605 | 4012 | struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; |
---|
3606 | 4013 | struct btrfs_free_space *info = NULL, *bitmap_info; |
---|
3607 | 4014 | void *map = NULL; |
---|
| 4015 | + enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED; |
---|
3608 | 4016 | u64 bytes_added; |
---|
3609 | 4017 | int ret; |
---|
3610 | 4018 | |
---|
.. | .. |
---|
3646 | 4054 | info = NULL; |
---|
3647 | 4055 | } |
---|
3648 | 4056 | |
---|
3649 | | - bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); |
---|
| 4057 | + bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes, |
---|
| 4058 | + trim_state); |
---|
3650 | 4059 | |
---|
3651 | 4060 | bytes -= bytes_added; |
---|
3652 | 4061 | offset += bytes_added; |
---|
.. | .. |
---|
3667 | 4076 | * just used to check the absence of space, so if there is free space in the |
---|
3668 | 4077 | * range at all we will return 1. |
---|
3669 | 4078 | */ |
---|
3670 | | -int test_check_exists(struct btrfs_block_group_cache *cache, |
---|
| 4079 | +int test_check_exists(struct btrfs_block_group *cache, |
---|
3671 | 4080 | u64 offset, u64 bytes) |
---|
3672 | 4081 | { |
---|
3673 | 4082 | struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; |
---|