forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-09 95099d4622f8cb224d94e314c7a8e0df60b13f87
kernel/fs/btrfs/free-space-cache.c
....@@ -18,9 +18,14 @@
1818 #include "extent_io.h"
1919 #include "inode-map.h"
2020 #include "volumes.h"
21
+#include "space-info.h"
22
+#include "delalloc-space.h"
23
+#include "block-group.h"
24
+#include "discard.h"
2125
2226 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
23
-#define MAX_CACHE_BYTES_PER_GIG SZ_32K
27
+#define MAX_CACHE_BYTES_PER_GIG SZ_64K
28
+#define FORCE_EXTENT_THRESHOLD SZ_1M
2429
2530 struct btrfs_trim_range {
2631 u64 start;
....@@ -28,6 +33,8 @@
2833 struct list_head list;
2934 };
3035
36
+static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
37
+ struct btrfs_free_space *bitmap_info);
3138 static int link_free_space(struct btrfs_free_space_ctl *ctl,
3239 struct btrfs_free_space *info);
3340 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
....@@ -75,7 +82,7 @@
7582 * sure NOFS is set to keep us from deadlocking.
7683 */
7784 nofs_flag = memalloc_nofs_save();
78
- inode = btrfs_iget_path(fs_info->sb, &location, root, NULL, path);
85
+ inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path);
7986 btrfs_release_path(path);
8087 memalloc_nofs_restore(nofs_flag);
8188 if (IS_ERR(inode))
....@@ -88,10 +95,10 @@
8895 return inode;
8996 }
9097
91
-struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info,
92
- struct btrfs_block_group_cache
93
- *block_group, struct btrfs_path *path)
98
+struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
99
+ struct btrfs_path *path)
94100 {
101
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
95102 struct inode *inode = NULL;
96103 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
97104
....@@ -103,7 +110,7 @@
103110 return inode;
104111
105112 inode = __lookup_free_space_inode(fs_info->tree_root, path,
106
- block_group->key.objectid);
113
+ block_group->start);
107114 if (IS_ERR(inode))
108115 return inode;
109116
....@@ -185,20 +192,19 @@
185192 return 0;
186193 }
187194
188
-int create_free_space_inode(struct btrfs_fs_info *fs_info,
189
- struct btrfs_trans_handle *trans,
190
- struct btrfs_block_group_cache *block_group,
195
+int create_free_space_inode(struct btrfs_trans_handle *trans,
196
+ struct btrfs_block_group *block_group,
191197 struct btrfs_path *path)
192198 {
193199 int ret;
194200 u64 ino;
195201
196
- ret = btrfs_find_free_objectid(fs_info->tree_root, &ino);
202
+ ret = btrfs_find_free_objectid(trans->fs_info->tree_root, &ino);
197203 if (ret < 0)
198204 return ret;
199205
200
- return __create_free_space_inode(fs_info->tree_root, trans, path, ino,
201
- block_group->key.objectid);
206
+ return __create_free_space_inode(trans->fs_info->tree_root, trans, path,
207
+ ino, block_group->start);
202208 }
203209
204210 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
....@@ -208,8 +214,8 @@
208214 int ret;
209215
210216 /* 1 for slack space, 1 for updating the inode */
211
- needed_bytes = btrfs_calc_trunc_metadata_size(fs_info, 1) +
212
- btrfs_calc_trans_metadata_size(fs_info, 1);
217
+ needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) +
218
+ btrfs_calc_metadata_size(fs_info, 1);
213219
214220 spin_lock(&rsv->lock);
215221 if (rsv->reserved < needed_bytes)
....@@ -221,7 +227,7 @@
221227 }
222228
223229 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
224
- struct btrfs_block_group_cache *block_group,
230
+ struct btrfs_block_group *block_group,
225231 struct inode *inode)
226232 {
227233 struct btrfs_root *root = BTRFS_I(inode)->root;
....@@ -365,10 +371,10 @@
365371 }
366372 }
367373
368
-static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode,
369
- int uptodate)
374
+static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate)
370375 {
371376 struct page *page;
377
+ struct inode *inode = io_ctl->inode;
372378 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
373379 int i;
374380
....@@ -407,8 +413,6 @@
407413
408414 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
409415 {
410
- __le64 *val;
411
-
412416 io_ctl_map_page(io_ctl, 1);
413417
414418 /*
....@@ -423,14 +427,13 @@
423427 io_ctl->size -= sizeof(u64) * 2;
424428 }
425429
426
- val = io_ctl->cur;
427
- *val = cpu_to_le64(generation);
430
+ put_unaligned_le64(generation, io_ctl->cur);
428431 io_ctl->cur += sizeof(u64);
429432 }
430433
431434 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
432435 {
433
- __le64 *gen;
436
+ u64 cache_gen;
434437
435438 /*
436439 * Skip the crc area. If we don't check crcs then we just have a 64bit
....@@ -445,11 +448,11 @@
445448 io_ctl->size -= sizeof(u64) * 2;
446449 }
447450
448
- gen = io_ctl->cur;
449
- if (le64_to_cpu(*gen) != generation) {
451
+ cache_gen = get_unaligned_le64(io_ctl->cur);
452
+ if (cache_gen != generation) {
450453 btrfs_err_rl(io_ctl->fs_info,
451454 "space cache generation (%llu) does not match inode (%llu)",
452
- *gen, generation);
455
+ cache_gen, generation);
453456 io_ctl_unmap_page(io_ctl);
454457 return -EIO;
455458 }
....@@ -471,9 +474,8 @@
471474 if (index == 0)
472475 offset = sizeof(u32) * io_ctl->num_pages;
473476
474
- crc = btrfs_csum_data(io_ctl->orig + offset, crc,
475
- PAGE_SIZE - offset);
476
- btrfs_csum_final(crc, (u8 *)&crc);
477
+ crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
478
+ btrfs_crc32c_final(crc, (u8 *)&crc);
477479 io_ctl_unmap_page(io_ctl);
478480 tmp = page_address(io_ctl->pages[0]);
479481 tmp += index;
....@@ -499,9 +501,8 @@
499501 val = *tmp;
500502
501503 io_ctl_map_page(io_ctl, 0);
502
- crc = btrfs_csum_data(io_ctl->orig + offset, crc,
503
- PAGE_SIZE - offset);
504
- btrfs_csum_final(crc, (u8 *)&crc);
504
+ crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
505
+ btrfs_crc32c_final(crc, (u8 *)&crc);
505506 if (val != crc) {
506507 btrfs_err_rl(io_ctl->fs_info,
507508 "csum mismatch on free space cache");
....@@ -521,8 +522,8 @@
521522 return -ENOSPC;
522523
523524 entry = io_ctl->cur;
524
- entry->offset = cpu_to_le64(offset);
525
- entry->bytes = cpu_to_le64(bytes);
525
+ put_unaligned_le64(offset, &entry->offset);
526
+ put_unaligned_le64(bytes, &entry->bytes);
526527 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
527528 BTRFS_FREE_SPACE_EXTENT;
528529 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
....@@ -595,8 +596,8 @@
595596 }
596597
597598 e = io_ctl->cur;
598
- entry->offset = le64_to_cpu(e->offset);
599
- entry->bytes = le64_to_cpu(e->bytes);
599
+ entry->offset = get_unaligned_le64(&e->offset);
600
+ entry->bytes = get_unaligned_le64(&e->bytes);
600601 *type = e->type;
601602 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
602603 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
....@@ -728,7 +729,7 @@
728729
729730 readahead_cache(inode);
730731
731
- ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
732
+ ret = io_ctl_prepare_pages(&io_ctl, true);
732733 if (ret)
733734 goto out;
734735
....@@ -753,6 +754,16 @@
753754 kmem_cache_free(btrfs_free_space_cachep, e);
754755 goto free_cache;
755756 }
757
+
758
+ /*
759
+ * Sync discard ensures that the free space cache is always
760
+ * trimmed. So when reading this in, the state should reflect
761
+ * that. We also do this for async as a stop gap for lack of
762
+ * persistence.
763
+ */
764
+ if (btrfs_test_opt(fs_info, DISCARD_SYNC) ||
765
+ btrfs_test_opt(fs_info, DISCARD_ASYNC))
766
+ e->trim_state = BTRFS_TRIM_STATE_TRIMMED;
756767
757768 if (!e->bytes) {
758769 ret = -1;
....@@ -809,12 +820,19 @@
809820 ret = io_ctl_read_bitmap(&io_ctl, e);
810821 if (ret)
811822 goto free_cache;
823
+ e->bitmap_extents = count_bitmap_extents(ctl, e);
824
+ if (!btrfs_free_space_trimmed(e)) {
825
+ ctl->discardable_extents[BTRFS_STAT_CURR] +=
826
+ e->bitmap_extents;
827
+ ctl->discardable_bytes[BTRFS_STAT_CURR] += e->bytes;
828
+ }
812829 }
813830
814831 io_ctl_drop_pages(&io_ctl);
815832 merge_space_tree(ctl);
816833 ret = 1;
817834 out:
835
+ btrfs_discard_update_discardable(ctl->private, ctl);
818836 io_ctl_free(&io_ctl);
819837 return ret;
820838 free_cache:
....@@ -823,15 +841,15 @@
823841 goto out;
824842 }
825843
826
-int load_free_space_cache(struct btrfs_fs_info *fs_info,
827
- struct btrfs_block_group_cache *block_group)
844
+int load_free_space_cache(struct btrfs_block_group *block_group)
828845 {
846
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
829847 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
830848 struct inode *inode;
831849 struct btrfs_path *path;
832850 int ret = 0;
833851 bool matched;
834
- u64 used = btrfs_block_group_used(&block_group->item);
852
+ u64 used = block_group->used;
835853
836854 /*
837855 * If this block group has been marked to be cleared for one reason or
....@@ -869,7 +887,7 @@
869887 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so
870888 * we will never try to read their inode item while the fs is mounted.
871889 */
872
- inode = lookup_free_space_inode(fs_info, block_group, path);
890
+ inode = lookup_free_space_inode(block_group, path);
873891 if (IS_ERR(inode)) {
874892 btrfs_free_path(path);
875893 return 0;
....@@ -885,13 +903,13 @@
885903 spin_unlock(&block_group->lock);
886904
887905 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
888
- path, block_group->key.objectid);
906
+ path, block_group->start);
889907 btrfs_free_path(path);
890908 if (ret <= 0)
891909 goto out;
892910
893911 spin_lock(&ctl->tree_lock);
894
- matched = (ctl->free_space == (block_group->key.offset - used -
912
+ matched = (ctl->free_space == (block_group->length - used -
895913 block_group->bytes_super));
896914 spin_unlock(&ctl->tree_lock);
897915
....@@ -899,7 +917,7 @@
899917 __btrfs_remove_free_space_cache(ctl);
900918 btrfs_warn(fs_info,
901919 "block group %llu has wrong amount of free space",
902
- block_group->key.objectid);
920
+ block_group->start);
903921 ret = -1;
904922 }
905923 out:
....@@ -912,7 +930,7 @@
912930
913931 btrfs_warn(fs_info,
914932 "failed to load free space cache for block group %llu, rebuilding it now",
915
- block_group->key.objectid);
933
+ block_group->start);
916934 }
917935
918936 iput(inode);
....@@ -922,7 +940,7 @@
922940 static noinline_for_stack
923941 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
924942 struct btrfs_free_space_ctl *ctl,
925
- struct btrfs_block_group_cache *block_group,
943
+ struct btrfs_block_group *block_group,
926944 int *entries, int *bitmaps,
927945 struct list_head *bitmap_list)
928946 {
....@@ -1015,7 +1033,7 @@
10151033 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10161034 if (ret < 0) {
10171035 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1018
- EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL);
1036
+ EXTENT_DELALLOC, 0, 0, NULL);
10191037 goto fail;
10201038 }
10211039 leaf = path->nodes[0];
....@@ -1027,9 +1045,8 @@
10271045 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
10281046 found_key.offset != offset) {
10291047 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
1030
- inode->i_size - 1,
1031
- EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
1032
- NULL);
1048
+ inode->i_size - 1, EXTENT_DELALLOC, 0,
1049
+ 0, NULL);
10331050 btrfs_release_path(path);
10341051 goto fail;
10351052 }
....@@ -1050,9 +1067,9 @@
10501067 return -1;
10511068 }
10521069
1053
-static noinline_for_stack int
1054
-write_pinned_extent_entries(struct btrfs_fs_info *fs_info,
1055
- struct btrfs_block_group_cache *block_group,
1070
+static noinline_for_stack int write_pinned_extent_entries(
1071
+ struct btrfs_trans_handle *trans,
1072
+ struct btrfs_block_group *block_group,
10561073 struct btrfs_io_ctl *io_ctl,
10571074 int *entries)
10581075 {
....@@ -1070,11 +1087,11 @@
10701087 * We shouldn't have switched the pinned extents yet so this is the
10711088 * right one
10721089 */
1073
- unpin = fs_info->pinned_extents;
1090
+ unpin = &trans->transaction->pinned_extents;
10741091
1075
- start = block_group->key.objectid;
1092
+ start = block_group->start;
10761093
1077
- while (start < block_group->key.objectid + block_group->key.offset) {
1094
+ while (start < block_group->start + block_group->length) {
10781095 ret = find_first_extent_bit(unpin, start,
10791096 &extent_start, &extent_end,
10801097 EXTENT_DIRTY, NULL);
....@@ -1082,13 +1099,12 @@
10821099 return 0;
10831100
10841101 /* This pinned extent is out of our range */
1085
- if (extent_start >= block_group->key.objectid +
1086
- block_group->key.offset)
1102
+ if (extent_start >= block_group->start + block_group->length)
10871103 return 0;
10881104
10891105 extent_start = max(extent_start, start);
1090
- extent_end = min(block_group->key.objectid +
1091
- block_group->key.offset, extent_end + 1);
1106
+ extent_end = min(block_group->start + block_group->length,
1107
+ extent_end + 1);
10921108 len = extent_end - extent_start;
10931109
10941110 *entries += 1;
....@@ -1126,7 +1142,7 @@
11261142 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
11271143 if (ret)
11281144 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1129
- EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL);
1145
+ EXTENT_DELALLOC, 0, 0, NULL);
11301146
11311147 return ret;
11321148 }
....@@ -1152,7 +1168,7 @@
11521168
11531169 static int __btrfs_wait_cache_io(struct btrfs_root *root,
11541170 struct btrfs_trans_handle *trans,
1155
- struct btrfs_block_group_cache *block_group,
1171
+ struct btrfs_block_group *block_group,
11561172 struct btrfs_io_ctl *io_ctl,
11571173 struct btrfs_path *path, u64 offset)
11581174 {
....@@ -1174,13 +1190,10 @@
11741190 if (ret) {
11751191 invalidate_inode_pages2(inode->i_mapping);
11761192 BTRFS_I(inode)->generation = 0;
1177
- if (block_group) {
1178
-#ifdef DEBUG
1179
- btrfs_err(root->fs_info,
1180
- "failed to write free space cache for block group %llu",
1181
- block_group->key.objectid);
1182
-#endif
1183
- }
1193
+ if (block_group)
1194
+ btrfs_debug(root->fs_info,
1195
+ "failed to write free space cache for block group %llu error %d",
1196
+ block_group->start, ret);
11841197 }
11851198 btrfs_update_inode(trans, root, inode);
11861199
....@@ -1220,12 +1233,12 @@
12201233 }
12211234
12221235 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
1223
- struct btrfs_block_group_cache *block_group,
1236
+ struct btrfs_block_group *block_group,
12241237 struct btrfs_path *path)
12251238 {
12261239 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans,
12271240 block_group, &block_group->io_ctl,
1228
- path, block_group->key.objectid);
1241
+ path, block_group->start);
12291242 }
12301243
12311244 /**
....@@ -1241,11 +1254,10 @@
12411254 */
12421255 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
12431256 struct btrfs_free_space_ctl *ctl,
1244
- struct btrfs_block_group_cache *block_group,
1257
+ struct btrfs_block_group *block_group,
12451258 struct btrfs_io_ctl *io_ctl,
12461259 struct btrfs_trans_handle *trans)
12471260 {
1248
- struct btrfs_fs_info *fs_info = root->fs_info;
12491261 struct extent_state *cached_state = NULL;
12501262 LIST_HEAD(bitmap_list);
12511263 int entries = 0;
....@@ -1277,7 +1289,7 @@
12771289 }
12781290
12791291 /* Lock all pages first so we can lock the extent safely. */
1280
- ret = io_ctl_prepare_pages(io_ctl, inode, 0);
1292
+ ret = io_ctl_prepare_pages(io_ctl, false);
12811293 if (ret)
12821294 goto out_unlock;
12831295
....@@ -1303,8 +1315,7 @@
13031315 * If this changes while we are working we'll get added back to
13041316 * the dirty list and redo it. No locking needed
13051317 */
1306
- ret = write_pinned_extent_entries(fs_info, block_group,
1307
- io_ctl, &entries);
1318
+ ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries);
13081319 if (ret)
13091320 goto out_nospc_locked;
13101321
....@@ -1323,8 +1334,9 @@
13231334 io_ctl_zero_remaining_pages(io_ctl);
13241335
13251336 /* Everything is written out, now we dirty the pages in the file. */
1326
- ret = btrfs_dirty_pages(inode, io_ctl->pages, io_ctl->num_pages, 0,
1327
- i_size_read(inode), &cached_state);
1337
+ ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages,
1338
+ io_ctl->num_pages, 0, i_size_read(inode),
1339
+ &cached_state);
13281340 if (ret)
13291341 goto out_nospc;
13301342
....@@ -1342,7 +1354,7 @@
13421354
13431355 /*
13441356 * at this point the pages are under IO and we're happy,
1345
- * The caller is responsible for waiting on them and updating the
1357
+ * The caller is responsible for waiting on them and updating
13461358 * the cache and the inode
13471359 */
13481360 io_ctl->entries = entries;
....@@ -1353,18 +1365,6 @@
13531365 goto out;
13541366
13551367 return 0;
1356
-
1357
-out:
1358
- io_ctl->inode = NULL;
1359
- io_ctl_free(io_ctl);
1360
- if (ret) {
1361
- invalidate_inode_pages2(inode->i_mapping);
1362
- BTRFS_I(inode)->generation = 0;
1363
- }
1364
- btrfs_update_inode(trans, root, inode);
1365
- if (must_iput)
1366
- iput(inode);
1367
- return ret;
13681368
13691369 out_nospc_locked:
13701370 cleanup_bitmap_list(&bitmap_list);
....@@ -1378,14 +1378,24 @@
13781378 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
13791379 up_write(&block_group->data_rwsem);
13801380
1381
- goto out;
1381
+out:
1382
+ io_ctl->inode = NULL;
1383
+ io_ctl_free(io_ctl);
1384
+ if (ret) {
1385
+ invalidate_inode_pages2(inode->i_mapping);
1386
+ BTRFS_I(inode)->generation = 0;
1387
+ }
1388
+ btrfs_update_inode(trans, root, inode);
1389
+ if (must_iput)
1390
+ iput(inode);
1391
+ return ret;
13821392 }
13831393
1384
-int btrfs_write_out_cache(struct btrfs_fs_info *fs_info,
1385
- struct btrfs_trans_handle *trans,
1386
- struct btrfs_block_group_cache *block_group,
1394
+int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
1395
+ struct btrfs_block_group *block_group,
13871396 struct btrfs_path *path)
13881397 {
1398
+ struct btrfs_fs_info *fs_info = trans->fs_info;
13891399 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
13901400 struct inode *inode;
13911401 int ret = 0;
....@@ -1397,18 +1407,16 @@
13971407 }
13981408 spin_unlock(&block_group->lock);
13991409
1400
- inode = lookup_free_space_inode(fs_info, block_group, path);
1410
+ inode = lookup_free_space_inode(block_group, path);
14011411 if (IS_ERR(inode))
14021412 return 0;
14031413
14041414 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl,
14051415 block_group, &block_group->io_ctl, trans);
14061416 if (ret) {
1407
-#ifdef DEBUG
1408
- btrfs_err(fs_info,
1409
- "failed to write free space cache for block group %llu",
1410
- block_group->key.objectid);
1411
-#endif
1417
+ btrfs_debug(fs_info,
1418
+ "failed to write free space cache for block group %llu error %d",
1419
+ block_group->start, ret);
14121420 spin_lock(&block_group->lock);
14131421 block_group->disk_cache_state = BTRFS_DC_ERROR;
14141422 spin_unlock(&block_group->lock);
....@@ -1633,6 +1641,11 @@
16331641 {
16341642 rb_erase(&info->offset_index, &ctl->free_space_offset);
16351643 ctl->free_extents--;
1644
+
1645
+ if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
1646
+ ctl->discardable_extents[BTRFS_STAT_CURR]--;
1647
+ ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes;
1648
+ }
16361649 }
16371650
16381651 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
....@@ -1653,6 +1666,11 @@
16531666 if (ret)
16541667 return ret;
16551668
1669
+ if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
1670
+ ctl->discardable_extents[BTRFS_STAT_CURR]++;
1671
+ ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
1672
+ }
1673
+
16561674 ctl->free_space += info->bytes;
16571675 ctl->free_extents++;
16581676 return ret;
....@@ -1660,11 +1678,11 @@
16601678
16611679 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
16621680 {
1663
- struct btrfs_block_group_cache *block_group = ctl->private;
1681
+ struct btrfs_block_group *block_group = ctl->private;
16641682 u64 max_bytes;
16651683 u64 bitmap_bytes;
16661684 u64 extent_bytes;
1667
- u64 size = block_group->key.offset;
1685
+ u64 size = block_group->length;
16681686 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
16691687 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
16701688
....@@ -1673,26 +1691,17 @@
16731691 ASSERT(ctl->total_bitmaps <= max_bitmaps);
16741692
16751693 /*
1676
- * The goal is to keep the total amount of memory used per 1gb of space
1677
- * at or below 32k, so we need to adjust how much memory we allow to be
1678
- * used by extent based free space tracking
1694
+ * We are trying to keep the total amount of memory used per 1GiB of
1695
+ * space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation
1696
+ * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of
1697
+ * bitmaps, we may end up using more memory than this.
16791698 */
16801699 if (size < SZ_1G)
16811700 max_bytes = MAX_CACHE_BYTES_PER_GIG;
16821701 else
16831702 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G);
16841703
1685
- /*
1686
- * we want to account for 1 more bitmap than what we have so we can make
1687
- * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1688
- * we add more bitmaps.
1689
- */
1690
- bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit;
1691
-
1692
- if (bitmap_bytes >= max_bytes) {
1693
- ctl->extents_thresh = 0;
1694
- return;
1695
- }
1704
+ bitmap_bytes = ctl->total_bitmaps * ctl->unit;
16961705
16971706 /*
16981707 * we want the extent entry threshold to always be at most 1/2 the max
....@@ -1709,17 +1718,31 @@
17091718 struct btrfs_free_space *info,
17101719 u64 offset, u64 bytes)
17111720 {
1712
- unsigned long start, count;
1721
+ unsigned long start, count, end;
1722
+ int extent_delta = -1;
17131723
17141724 start = offset_to_bit(info->offset, ctl->unit, offset);
17151725 count = bytes_to_bits(bytes, ctl->unit);
1716
- ASSERT(start + count <= BITS_PER_BITMAP);
1726
+ end = start + count;
1727
+ ASSERT(end <= BITS_PER_BITMAP);
17171728
17181729 bitmap_clear(info->bitmap, start, count);
17191730
17201731 info->bytes -= bytes;
17211732 if (info->max_extent_size > ctl->unit)
17221733 info->max_extent_size = 0;
1734
+
1735
+ if (start && test_bit(start - 1, info->bitmap))
1736
+ extent_delta++;
1737
+
1738
+ if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1739
+ extent_delta++;
1740
+
1741
+ info->bitmap_extents += extent_delta;
1742
+ if (!btrfs_free_space_trimmed(info)) {
1743
+ ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
1744
+ ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
1745
+ }
17231746 }
17241747
17251748 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
....@@ -1734,16 +1757,30 @@
17341757 struct btrfs_free_space *info, u64 offset,
17351758 u64 bytes)
17361759 {
1737
- unsigned long start, count;
1760
+ unsigned long start, count, end;
1761
+ int extent_delta = 1;
17381762
17391763 start = offset_to_bit(info->offset, ctl->unit, offset);
17401764 count = bytes_to_bits(bytes, ctl->unit);
1741
- ASSERT(start + count <= BITS_PER_BITMAP);
1765
+ end = start + count;
1766
+ ASSERT(end <= BITS_PER_BITMAP);
17421767
17431768 bitmap_set(info->bitmap, start, count);
17441769
17451770 info->bytes += bytes;
17461771 ctl->free_space += bytes;
1772
+
1773
+ if (start && test_bit(start - 1, info->bitmap))
1774
+ extent_delta--;
1775
+
1776
+ if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1777
+ extent_delta--;
1778
+
1779
+ info->bitmap_extents += extent_delta;
1780
+ if (!btrfs_free_space_trimmed(info)) {
1781
+ ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
1782
+ ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes;
1783
+ }
17471784 }
17481785
17491786 /*
....@@ -1879,11 +1916,35 @@
18791916 return NULL;
18801917 }
18811918
1919
+static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
1920
+ struct btrfs_free_space *bitmap_info)
1921
+{
1922
+ struct btrfs_block_group *block_group = ctl->private;
1923
+ u64 bytes = bitmap_info->bytes;
1924
+ unsigned int rs, re;
1925
+ int count = 0;
1926
+
1927
+ if (!block_group || !bytes)
1928
+ return count;
1929
+
1930
+ bitmap_for_each_set_region(bitmap_info->bitmap, rs, re, 0,
1931
+ BITS_PER_BITMAP) {
1932
+ bytes -= (rs - re) * ctl->unit;
1933
+ count++;
1934
+
1935
+ if (!bytes)
1936
+ break;
1937
+ }
1938
+
1939
+ return count;
1940
+}
1941
+
18821942 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
18831943 struct btrfs_free_space *info, u64 offset)
18841944 {
18851945 info->offset = offset_to_bitmap(ctl, offset);
18861946 info->bytes = 0;
1947
+ info->bitmap_extents = 0;
18871948 INIT_LIST_HEAD(&info->list);
18881949 link_free_space(ctl, info);
18891950 ctl->total_bitmaps++;
....@@ -1894,6 +1955,18 @@
18941955 static void free_bitmap(struct btrfs_free_space_ctl *ctl,
18951956 struct btrfs_free_space *bitmap_info)
18961957 {
1958
+ /*
1959
+ * Normally when this is called, the bitmap is completely empty. However,
1960
+ * if we are blowing up the free space cache for one reason or another
1961
+ * via __btrfs_remove_free_space_cache(), then it may not be freed and
1962
+ * we may leave stats on the table.
1963
+ */
1964
+ if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) {
1965
+ ctl->discardable_extents[BTRFS_STAT_CURR] -=
1966
+ bitmap_info->bitmap_extents;
1967
+ ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes;
1968
+
1969
+ }
18971970 unlink_free_space(ctl, bitmap_info);
18981971 kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap);
18991972 kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
....@@ -1980,10 +2053,23 @@
19802053
19812054 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
19822055 struct btrfs_free_space *info, u64 offset,
1983
- u64 bytes)
2056
+ u64 bytes, enum btrfs_trim_state trim_state)
19842057 {
19852058 u64 bytes_to_set = 0;
19862059 u64 end;
2060
+
2061
+ /*
2062
+ * This is a tradeoff to make bitmap trim state minimal. We mark the
2063
+ * whole bitmap untrimmed if at any point we add untrimmed regions.
2064
+ */
2065
+ if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) {
2066
+ if (btrfs_free_space_trimmed(info)) {
2067
+ ctl->discardable_extents[BTRFS_STAT_CURR] +=
2068
+ info->bitmap_extents;
2069
+ ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
2070
+ }
2071
+ info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2072
+ }
19872073
19882074 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
19892075
....@@ -2004,7 +2090,7 @@
20042090 static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
20052091 struct btrfs_free_space *info)
20062092 {
2007
- struct btrfs_block_group_cache *block_group = ctl->private;
2093
+ struct btrfs_block_group *block_group = ctl->private;
20082094 struct btrfs_fs_info *fs_info = block_group->fs_info;
20092095 bool forced = false;
20102096
....@@ -2012,6 +2098,10 @@
20122098 if (btrfs_should_fragment_free_space(block_group))
20132099 forced = true;
20142100 #endif
2101
+
2102
+ /* This is a way to reclaim large regions from the bitmaps. */
2103
+ if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD)
2104
+ return false;
20152105
20162106 /*
20172107 * If we are below the extents threshold then we can add this as an
....@@ -2025,8 +2115,8 @@
20252115 * of cache left then go ahead an dadd them, no sense in adding
20262116 * the overhead of a bitmap if we don't have to.
20272117 */
2028
- if (info->bytes <= fs_info->sectorsize * 4) {
2029
- if (ctl->free_extents * 2 <= ctl->extents_thresh)
2118
+ if (info->bytes <= fs_info->sectorsize * 8) {
2119
+ if (ctl->free_extents * 3 <= ctl->extents_thresh)
20302120 return false;
20312121 } else {
20322122 return false;
....@@ -2041,7 +2131,7 @@
20412131 * so allow those block groups to still be allowed to have a bitmap
20422132 * entry.
20432133 */
2044
- if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset)
2134
+ if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length)
20452135 return false;
20462136
20472137 return true;
....@@ -2056,13 +2146,15 @@
20562146 struct btrfs_free_space *info)
20572147 {
20582148 struct btrfs_free_space *bitmap_info;
2059
- struct btrfs_block_group_cache *block_group = NULL;
2149
+ struct btrfs_block_group *block_group = NULL;
20602150 int added = 0;
20612151 u64 bytes, offset, bytes_added;
2152
+ enum btrfs_trim_state trim_state;
20622153 int ret;
20632154
20642155 bytes = info->bytes;
20652156 offset = info->offset;
2157
+ trim_state = info->trim_state;
20662158
20672159 if (!ctl->op->use_bitmap(ctl, info))
20682160 return 0;
....@@ -2097,8 +2189,8 @@
20972189 }
20982190
20992191 if (entry->offset == offset_to_bitmap(ctl, offset)) {
2100
- bytes_added = add_bytes_to_bitmap(ctl, entry,
2101
- offset, bytes);
2192
+ bytes_added = add_bytes_to_bitmap(ctl, entry, offset,
2193
+ bytes, trim_state);
21022194 bytes -= bytes_added;
21032195 offset += bytes_added;
21042196 }
....@@ -2117,7 +2209,8 @@
21172209 goto new_bitmap;
21182210 }
21192211
2120
- bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
2212
+ bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
2213
+ trim_state);
21212214 bytes -= bytes_added;
21222215 offset += bytes_added;
21232216 added = 0;
....@@ -2151,6 +2244,7 @@
21512244 /* allocate the bitmap */
21522245 info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep,
21532246 GFP_NOFS);
2247
+ info->trim_state = BTRFS_TRIM_STATE_TRIMMED;
21542248 spin_lock(&ctl->tree_lock);
21552249 if (!info->bitmap) {
21562250 ret = -ENOMEM;
....@@ -2170,6 +2264,22 @@
21702264 return ret;
21712265 }
21722266
2267
+/*
2268
+ * Free space merging rules:
2269
+ * 1) Merge trimmed areas together
2270
+ * 2) Let untrimmed areas coalesce with trimmed areas
2271
+ * 3) Always pull neighboring regions from bitmaps
2272
+ *
2273
+ * The above rules are for when we merge free space based on btrfs_trim_state.
2274
+ * Rules 2 and 3 are subtle because they are suboptimal, but are done for the
2275
+ * same reason: to promote larger extent regions which makes life easier for
2276
+ * find_free_extent(). Rule 2 enables coalescing based on the common path
2277
+ * being returning free space from btrfs_finish_extent_commit(). So when free
2278
+ * space is trimmed, it will prevent aggregating trimmed new region and
2279
+ * untrimmed regions in the rb_tree. Rule 3 is purely to obtain larger extents
2280
+ * and provide find_free_extent() with the largest extents possible hoping for
2281
+ * the reuse path.
2282
+ */
21732283 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
21742284 struct btrfs_free_space *info, bool update_stat)
21752285 {
....@@ -2178,6 +2288,7 @@
21782288 bool merged = false;
21792289 u64 offset = info->offset;
21802290 u64 bytes = info->bytes;
2291
+ const bool is_trimmed = btrfs_free_space_trimmed(info);
21812292
21822293 /*
21832294 * first we want to see if there is free space adjacent to the range we
....@@ -2191,7 +2302,9 @@
21912302 else if (!right_info)
21922303 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
21932304
2194
- if (right_info && !right_info->bitmap) {
2305
+ /* See try_merge_free_space() comment. */
2306
+ if (right_info && !right_info->bitmap &&
2307
+ (!is_trimmed || btrfs_free_space_trimmed(right_info))) {
21952308 if (update_stat)
21962309 unlink_free_space(ctl, right_info);
21972310 else
....@@ -2201,8 +2314,10 @@
22012314 merged = true;
22022315 }
22032316
2317
+ /* See try_merge_free_space() comment. */
22042318 if (left_info && !left_info->bitmap &&
2205
- left_info->offset + left_info->bytes == offset) {
2319
+ left_info->offset + left_info->bytes == offset &&
2320
+ (!is_trimmed || btrfs_free_space_trimmed(left_info))) {
22062321 if (update_stat)
22072322 unlink_free_space(ctl, left_info);
22082323 else
....@@ -2237,6 +2352,10 @@
22372352 return false;
22382353 bytes = (j - i) * ctl->unit;
22392354 info->bytes += bytes;
2355
+
2356
+ /* See try_merge_free_space() comment. */
2357
+ if (!btrfs_free_space_trimmed(bitmap))
2358
+ info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
22402359
22412360 if (update_stat)
22422361 bitmap_clear_bits(ctl, bitmap, end, bytes);
....@@ -2291,6 +2410,10 @@
22912410 info->offset -= bytes;
22922411 info->bytes += bytes;
22932412
2413
+ /* See try_merge_free_space() comment. */
2414
+ if (!btrfs_free_space_trimmed(bitmap))
2415
+ info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2416
+
22942417 if (update_stat)
22952418 bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
22962419 else
....@@ -2340,10 +2463,13 @@
23402463
23412464 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
23422465 struct btrfs_free_space_ctl *ctl,
2343
- u64 offset, u64 bytes)
2466
+ u64 offset, u64 bytes,
2467
+ enum btrfs_trim_state trim_state)
23442468 {
2469
+ struct btrfs_block_group *block_group = ctl->private;
23452470 struct btrfs_free_space *info;
23462471 int ret = 0;
2472
+ u64 filter_bytes = bytes;
23472473
23482474 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
23492475 if (!info)
....@@ -2351,6 +2477,7 @@
23512477
23522478 info->offset = offset;
23532479 info->bytes = bytes;
2480
+ info->trim_state = trim_state;
23542481 RB_CLEAR_NODE(&info->offset_index);
23552482
23562483 spin_lock(&ctl->tree_lock);
....@@ -2379,10 +2506,13 @@
23792506 */
23802507 steal_from_bitmap(ctl, info, true);
23812508
2509
+ filter_bytes = max(filter_bytes, info->bytes);
2510
+
23822511 ret = link_free_space(ctl, info);
23832512 if (ret)
23842513 kmem_cache_free(btrfs_free_space_cachep, info);
23852514 out:
2515
+ btrfs_discard_update_discardable(block_group, ctl);
23862516 spin_unlock(&ctl->tree_lock);
23872517
23882518 if (ret) {
....@@ -2390,10 +2520,47 @@
23902520 ASSERT(ret != -EEXIST);
23912521 }
23922522
2523
+ if (trim_state != BTRFS_TRIM_STATE_TRIMMED) {
2524
+ btrfs_discard_check_filter(block_group, filter_bytes);
2525
+ btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
2526
+ }
2527
+
23932528 return ret;
23942529 }
23952530
2396
-int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
2531
+int btrfs_add_free_space(struct btrfs_block_group *block_group,
2532
+ u64 bytenr, u64 size)
2533
+{
2534
+ enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2535
+
2536
+ if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC))
2537
+ trim_state = BTRFS_TRIM_STATE_TRIMMED;
2538
+
2539
+ return __btrfs_add_free_space(block_group->fs_info,
2540
+ block_group->free_space_ctl,
2541
+ bytenr, size, trim_state);
2542
+}
2543
+
2544
+/*
2545
+ * This is a subtle distinction because when adding free space back in general,
2546
+ * we want it to be added as untrimmed for async. But in the case where we add
2547
+ * it on loading of a block group, we want to consider it trimmed.
2548
+ */
2549
+int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
2550
+ u64 bytenr, u64 size)
2551
+{
2552
+ enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2553
+
2554
+ if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) ||
2555
+ btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
2556
+ trim_state = BTRFS_TRIM_STATE_TRIMMED;
2557
+
2558
+ return __btrfs_add_free_space(block_group->fs_info,
2559
+ block_group->free_space_ctl,
2560
+ bytenr, size, trim_state);
2561
+}
2562
+
2563
+int btrfs_remove_free_space(struct btrfs_block_group *block_group,
23972564 u64 offset, u64 bytes)
23982565 {
23992566 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
....@@ -2465,8 +2632,10 @@
24652632 }
24662633 spin_unlock(&ctl->tree_lock);
24672634
2468
- ret = btrfs_add_free_space(block_group, offset + bytes,
2469
- old_end - (offset + bytes));
2635
+ ret = __btrfs_add_free_space(block_group->fs_info, ctl,
2636
+ offset + bytes,
2637
+ old_end - (offset + bytes),
2638
+ info->trim_state);
24702639 WARN_ON(ret);
24712640 goto out;
24722641 }
....@@ -2478,12 +2647,13 @@
24782647 goto again;
24792648 }
24802649 out_lock:
2650
+ btrfs_discard_update_discardable(block_group, ctl);
24812651 spin_unlock(&ctl->tree_lock);
24822652 out:
24832653 return ret;
24842654 }
24852655
2486
-void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
2656
+void btrfs_dump_free_space(struct btrfs_block_group *block_group,
24872657 u64 bytes)
24882658 {
24892659 struct btrfs_fs_info *fs_info = block_group->fs_info;
....@@ -2508,14 +2678,14 @@
25082678 "%d blocks of free space at or bigger than bytes is", count);
25092679 }
25102680
2511
-void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
2681
+void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group)
25122682 {
25132683 struct btrfs_fs_info *fs_info = block_group->fs_info;
25142684 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
25152685
25162686 spin_lock_init(&ctl->tree_lock);
25172687 ctl->unit = fs_info->sectorsize;
2518
- ctl->start = block_group->key.objectid;
2688
+ ctl->start = block_group->start;
25192689 ctl->private = block_group;
25202690 ctl->op = &free_space_op;
25212691 INIT_LIST_HEAD(&ctl->trimming_ranges);
....@@ -2535,9 +2705,8 @@
25352705 * pointed to by the cluster, someone else raced in and freed the
25362706 * cluster already. In that case, we just return without changing anything
25372707 */
2538
-static int
2539
-__btrfs_return_cluster_to_free_space(
2540
- struct btrfs_block_group_cache *block_group,
2708
+static void __btrfs_return_cluster_to_free_space(
2709
+ struct btrfs_block_group *block_group,
25412710 struct btrfs_free_cluster *cluster)
25422711 {
25432712 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
....@@ -2545,8 +2714,10 @@
25452714 struct rb_node *node;
25462715
25472716 spin_lock(&cluster->lock);
2548
- if (cluster->block_group != block_group)
2549
- goto out;
2717
+ if (cluster->block_group != block_group) {
2718
+ spin_unlock(&cluster->lock);
2719
+ return;
2720
+ }
25502721
25512722 cluster->block_group = NULL;
25522723 cluster->window_start = 0;
....@@ -2563,18 +2734,29 @@
25632734
25642735 bitmap = (entry->bitmap != NULL);
25652736 if (!bitmap) {
2737
+ /* Merging treats extents as if they were new */
2738
+ if (!btrfs_free_space_trimmed(entry)) {
2739
+ ctl->discardable_extents[BTRFS_STAT_CURR]--;
2740
+ ctl->discardable_bytes[BTRFS_STAT_CURR] -=
2741
+ entry->bytes;
2742
+ }
2743
+
25662744 try_merge_free_space(ctl, entry, false);
25672745 steal_from_bitmap(ctl, entry, false);
2746
+
2747
+ /* As we insert directly, update these statistics */
2748
+ if (!btrfs_free_space_trimmed(entry)) {
2749
+ ctl->discardable_extents[BTRFS_STAT_CURR]++;
2750
+ ctl->discardable_bytes[BTRFS_STAT_CURR] +=
2751
+ entry->bytes;
2752
+ }
25682753 }
25692754 tree_insert_offset(&ctl->free_space_offset,
25702755 entry->offset, &entry->offset_index, bitmap);
25712756 }
25722757 cluster->root = RB_ROOT;
2573
-
2574
-out:
25752758 spin_unlock(&cluster->lock);
25762759 btrfs_put_block_group(block_group);
2577
- return 0;
25782760 }
25792761
25802762 static void __btrfs_remove_free_space_cache_locked(
....@@ -2600,10 +2782,12 @@
26002782 {
26012783 spin_lock(&ctl->tree_lock);
26022784 __btrfs_remove_free_space_cache_locked(ctl);
2785
+ if (ctl->private)
2786
+ btrfs_discard_update_discardable(ctl->private, ctl);
26032787 spin_unlock(&ctl->tree_lock);
26042788 }
26052789
2606
-void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2790
+void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
26072791 {
26082792 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
26092793 struct btrfs_free_cluster *cluster;
....@@ -2621,20 +2805,55 @@
26212805 cond_resched_lock(&ctl->tree_lock);
26222806 }
26232807 __btrfs_remove_free_space_cache_locked(ctl);
2808
+ btrfs_discard_update_discardable(block_group, ctl);
26242809 spin_unlock(&ctl->tree_lock);
26252810
26262811 }
26272812
2628
-u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2813
+/**
2814
+ * btrfs_is_free_space_trimmed - see if everything is trimmed
2815
+ * @block_group: block_group of interest
2816
+ *
2817
+ * Walk @block_group's free space rb_tree to determine if everything is trimmed.
2818
+ */
2819
+bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group)
2820
+{
2821
+ struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2822
+ struct btrfs_free_space *info;
2823
+ struct rb_node *node;
2824
+ bool ret = true;
2825
+
2826
+ spin_lock(&ctl->tree_lock);
2827
+ node = rb_first(&ctl->free_space_offset);
2828
+
2829
+ while (node) {
2830
+ info = rb_entry(node, struct btrfs_free_space, offset_index);
2831
+
2832
+ if (!btrfs_free_space_trimmed(info)) {
2833
+ ret = false;
2834
+ break;
2835
+ }
2836
+
2837
+ node = rb_next(node);
2838
+ }
2839
+
2840
+ spin_unlock(&ctl->tree_lock);
2841
+ return ret;
2842
+}
2843
+
2844
+u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
26292845 u64 offset, u64 bytes, u64 empty_size,
26302846 u64 *max_extent_size)
26312847 {
26322848 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2849
+ struct btrfs_discard_ctl *discard_ctl =
2850
+ &block_group->fs_info->discard_ctl;
26332851 struct btrfs_free_space *entry = NULL;
26342852 u64 bytes_search = bytes + empty_size;
26352853 u64 ret = 0;
26362854 u64 align_gap = 0;
26372855 u64 align_gap_len = 0;
2856
+ enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
26382857
26392858 spin_lock(&ctl->tree_lock);
26402859 entry = find_free_space(ctl, &offset, &bytes_search,
....@@ -2645,12 +2864,20 @@
26452864 ret = offset;
26462865 if (entry->bitmap) {
26472866 bitmap_clear_bits(ctl, entry, offset, bytes);
2867
+
2868
+ if (!btrfs_free_space_trimmed(entry))
2869
+ atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2870
+
26482871 if (!entry->bytes)
26492872 free_bitmap(ctl, entry);
26502873 } else {
26512874 unlink_free_space(ctl, entry);
26522875 align_gap_len = offset - entry->offset;
26532876 align_gap = entry->offset;
2877
+ align_gap_trim_state = entry->trim_state;
2878
+
2879
+ if (!btrfs_free_space_trimmed(entry))
2880
+ atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
26542881
26552882 entry->offset = offset + bytes;
26562883 WARN_ON(entry->bytes < bytes + align_gap_len);
....@@ -2662,11 +2889,13 @@
26622889 link_free_space(ctl, entry);
26632890 }
26642891 out:
2892
+ btrfs_discard_update_discardable(block_group, ctl);
26652893 spin_unlock(&ctl->tree_lock);
26662894
26672895 if (align_gap_len)
26682896 __btrfs_add_free_space(block_group->fs_info, ctl,
2669
- align_gap, align_gap_len);
2897
+ align_gap, align_gap_len,
2898
+ align_gap_trim_state);
26702899 return ret;
26712900 }
26722901
....@@ -2678,12 +2907,11 @@
26782907 * Otherwise, it'll get a reference on the block group pointed to by the
26792908 * cluster and remove the cluster from it.
26802909 */
2681
-int btrfs_return_cluster_to_free_space(
2682
- struct btrfs_block_group_cache *block_group,
2910
+void btrfs_return_cluster_to_free_space(
2911
+ struct btrfs_block_group *block_group,
26832912 struct btrfs_free_cluster *cluster)
26842913 {
26852914 struct btrfs_free_space_ctl *ctl;
2686
- int ret;
26872915
26882916 /* first, get a safe pointer to the block group */
26892917 spin_lock(&cluster->lock);
....@@ -2691,29 +2919,30 @@
26912919 block_group = cluster->block_group;
26922920 if (!block_group) {
26932921 spin_unlock(&cluster->lock);
2694
- return 0;
2922
+ return;
26952923 }
26962924 } else if (cluster->block_group != block_group) {
26972925 /* someone else has already freed it don't redo their work */
26982926 spin_unlock(&cluster->lock);
2699
- return 0;
2927
+ return;
27002928 }
2701
- atomic_inc(&block_group->count);
2929
+ btrfs_get_block_group(block_group);
27022930 spin_unlock(&cluster->lock);
27032931
27042932 ctl = block_group->free_space_ctl;
27052933
27062934 /* now return any extents the cluster had on it */
27072935 spin_lock(&ctl->tree_lock);
2708
- ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
2936
+ __btrfs_return_cluster_to_free_space(block_group, cluster);
27092937 spin_unlock(&ctl->tree_lock);
2938
+
2939
+ btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group);
27102940
27112941 /* finally drop our ref */
27122942 btrfs_put_block_group(block_group);
2713
- return ret;
27142943 }
27152944
2716
-static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2945
+static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group,
27172946 struct btrfs_free_cluster *cluster,
27182947 struct btrfs_free_space *entry,
27192948 u64 bytes, u64 min_start,
....@@ -2746,11 +2975,13 @@
27462975 * if it couldn't find anything suitably large, or a logical disk offset
27472976 * if things worked out
27482977 */
2749
-u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2978
+u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
27502979 struct btrfs_free_cluster *cluster, u64 bytes,
27512980 u64 min_start, u64 *max_extent_size)
27522981 {
27532982 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2983
+ struct btrfs_discard_ctl *discard_ctl =
2984
+ &block_group->fs_info->discard_ctl;
27542985 struct btrfs_free_space *entry = NULL;
27552986 struct rb_node *node;
27562987 u64 ret = 0;
....@@ -2803,8 +3034,6 @@
28033034 entry->bytes -= bytes;
28043035 }
28053036
2806
- if (entry->bytes == 0)
2807
- rb_erase(&entry->offset_index, &cluster->root);
28083037 break;
28093038 }
28103039 out:
....@@ -2815,24 +3044,35 @@
28153044
28163045 spin_lock(&ctl->tree_lock);
28173046
3047
+ if (!btrfs_free_space_trimmed(entry))
3048
+ atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
3049
+
28183050 ctl->free_space -= bytes;
3051
+ if (!entry->bitmap && !btrfs_free_space_trimmed(entry))
3052
+ ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
3053
+
3054
+ spin_lock(&cluster->lock);
28193055 if (entry->bytes == 0) {
3056
+ rb_erase(&entry->offset_index, &cluster->root);
28203057 ctl->free_extents--;
28213058 if (entry->bitmap) {
28223059 kmem_cache_free(btrfs_free_space_bitmap_cachep,
28233060 entry->bitmap);
28243061 ctl->total_bitmaps--;
28253062 ctl->op->recalc_thresholds(ctl);
3063
+ } else if (!btrfs_free_space_trimmed(entry)) {
3064
+ ctl->discardable_extents[BTRFS_STAT_CURR]--;
28263065 }
28273066 kmem_cache_free(btrfs_free_space_cachep, entry);
28283067 }
28293068
3069
+ spin_unlock(&cluster->lock);
28303070 spin_unlock(&ctl->tree_lock);
28313071
28323072 return ret;
28333073 }
28343074
2835
-static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
3075
+static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group,
28363076 struct btrfs_free_space *entry,
28373077 struct btrfs_free_cluster *cluster,
28383078 u64 offset, u64 bytes,
....@@ -2914,7 +3154,7 @@
29143154 * extent of cont1_bytes, and other clusters of at least min_bytes.
29153155 */
29163156 static noinline int
2917
-setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
3157
+setup_cluster_no_bitmap(struct btrfs_block_group *block_group,
29183158 struct btrfs_free_cluster *cluster,
29193159 struct list_head *bitmaps, u64 offset, u64 bytes,
29203160 u64 cont1_bytes, u64 min_bytes)
....@@ -3005,7 +3245,7 @@
30053245 * that we have already failed to find extents that will work.
30063246 */
30073247 static noinline int
3008
-setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
3248
+setup_cluster_bitmap(struct btrfs_block_group *block_group,
30093249 struct btrfs_free_cluster *cluster,
30103250 struct list_head *bitmaps, u64 offset, u64 bytes,
30113251 u64 cont1_bytes, u64 min_bytes)
....@@ -3055,11 +3295,11 @@
30553295 * returns zero and sets up cluster if things worked out, otherwise
30563296 * it returns -enospc
30573297 */
3058
-int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info,
3059
- struct btrfs_block_group_cache *block_group,
3298
+int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
30603299 struct btrfs_free_cluster *cluster,
30613300 u64 offset, u64 bytes, u64 empty_size)
30623301 {
3302
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
30633303 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
30643304 struct btrfs_free_space *entry, *tmp;
30653305 LIST_HEAD(bitmaps);
....@@ -3118,7 +3358,7 @@
31183358 list_del_init(&entry->list);
31193359
31203360 if (!ret) {
3121
- atomic_inc(&block_group->count);
3361
+ btrfs_get_block_group(block_group);
31223362 list_add_tail(&cluster->block_group_list,
31233363 &block_group->cluster_list);
31243364 cluster->block_group = block_group;
....@@ -3146,9 +3386,10 @@
31463386 cluster->block_group = NULL;
31473387 }
31483388
3149
-static int do_trimming(struct btrfs_block_group_cache *block_group,
3389
+static int do_trimming(struct btrfs_block_group *block_group,
31503390 u64 *total_trimmed, u64 start, u64 bytes,
31513391 u64 reserved_start, u64 reserved_bytes,
3392
+ enum btrfs_trim_state reserved_trim_state,
31523393 struct btrfs_trim_range *trim_entry)
31533394 {
31543395 struct btrfs_space_info *space_info = block_group->space_info;
....@@ -3156,6 +3397,9 @@
31563397 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
31573398 int ret;
31583399 int update = 0;
3400
+ const u64 end = start + bytes;
3401
+ const u64 reserved_end = reserved_start + reserved_bytes;
3402
+ enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
31593403 u64 trimmed = 0;
31603404
31613405 spin_lock(&space_info->lock);
....@@ -3169,11 +3413,20 @@
31693413 spin_unlock(&space_info->lock);
31703414
31713415 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed);
3172
- if (!ret)
3416
+ if (!ret) {
31733417 *total_trimmed += trimmed;
3418
+ trim_state = BTRFS_TRIM_STATE_TRIMMED;
3419
+ }
31743420
31753421 mutex_lock(&ctl->cache_writeout_mutex);
3176
- btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
3422
+ if (reserved_start < start)
3423
+ __btrfs_add_free_space(fs_info, ctl, reserved_start,
3424
+ start - reserved_start,
3425
+ reserved_trim_state);
3426
+ if (start + bytes < reserved_start + reserved_bytes)
3427
+ __btrfs_add_free_space(fs_info, ctl, end, reserved_end - end,
3428
+ reserved_trim_state);
3429
+ __btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state);
31773430 list_del(&trim_entry->list);
31783431 mutex_unlock(&ctl->cache_writeout_mutex);
31793432
....@@ -3184,23 +3437,31 @@
31843437 space_info->bytes_readonly += reserved_bytes;
31853438 block_group->reserved -= reserved_bytes;
31863439 space_info->bytes_reserved -= reserved_bytes;
3187
- spin_unlock(&space_info->lock);
31883440 spin_unlock(&block_group->lock);
3441
+ spin_unlock(&space_info->lock);
31893442 }
31903443
31913444 return ret;
31923445 }
31933446
3194
-static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
3195
- u64 *total_trimmed, u64 start, u64 end, u64 minlen)
3447
+/*
3448
+ * If @async is set, then we will trim 1 region and return.
3449
+ */
3450
+static int trim_no_bitmap(struct btrfs_block_group *block_group,
3451
+ u64 *total_trimmed, u64 start, u64 end, u64 minlen,
3452
+ bool async)
31963453 {
3454
+ struct btrfs_discard_ctl *discard_ctl =
3455
+ &block_group->fs_info->discard_ctl;
31973456 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
31983457 struct btrfs_free_space *entry;
31993458 struct rb_node *node;
32003459 int ret = 0;
32013460 u64 extent_start;
32023461 u64 extent_bytes;
3462
+ enum btrfs_trim_state extent_trim_state;
32033463 u64 bytes;
3464
+ const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
32043465
32053466 while (start < end) {
32063467 struct btrfs_trim_range trim_entry;
....@@ -3208,49 +3469,66 @@
32083469 mutex_lock(&ctl->cache_writeout_mutex);
32093470 spin_lock(&ctl->tree_lock);
32103471
3211
- if (ctl->free_space < minlen) {
3212
- spin_unlock(&ctl->tree_lock);
3213
- mutex_unlock(&ctl->cache_writeout_mutex);
3214
- break;
3215
- }
3472
+ if (ctl->free_space < minlen)
3473
+ goto out_unlock;
32163474
32173475 entry = tree_search_offset(ctl, start, 0, 1);
3218
- if (!entry) {
3219
- spin_unlock(&ctl->tree_lock);
3220
- mutex_unlock(&ctl->cache_writeout_mutex);
3221
- break;
3222
- }
3476
+ if (!entry)
3477
+ goto out_unlock;
32233478
3224
- /* skip bitmaps */
3225
- while (entry->bitmap) {
3479
+ /* Skip bitmaps and if async, already trimmed entries */
3480
+ while (entry->bitmap ||
3481
+ (async && btrfs_free_space_trimmed(entry))) {
32263482 node = rb_next(&entry->offset_index);
3227
- if (!node) {
3228
- spin_unlock(&ctl->tree_lock);
3229
- mutex_unlock(&ctl->cache_writeout_mutex);
3230
- goto out;
3231
- }
3483
+ if (!node)
3484
+ goto out_unlock;
32323485 entry = rb_entry(node, struct btrfs_free_space,
32333486 offset_index);
32343487 }
32353488
3236
- if (entry->offset >= end) {
3237
- spin_unlock(&ctl->tree_lock);
3238
- mutex_unlock(&ctl->cache_writeout_mutex);
3239
- break;
3240
- }
3489
+ if (entry->offset >= end)
3490
+ goto out_unlock;
32413491
32423492 extent_start = entry->offset;
32433493 extent_bytes = entry->bytes;
3244
- start = max(start, extent_start);
3245
- bytes = min(extent_start + extent_bytes, end) - start;
3246
- if (bytes < minlen) {
3247
- spin_unlock(&ctl->tree_lock);
3248
- mutex_unlock(&ctl->cache_writeout_mutex);
3249
- goto next;
3250
- }
3494
+ extent_trim_state = entry->trim_state;
3495
+ if (async) {
3496
+ start = entry->offset;
3497
+ bytes = entry->bytes;
3498
+ if (bytes < minlen) {
3499
+ spin_unlock(&ctl->tree_lock);
3500
+ mutex_unlock(&ctl->cache_writeout_mutex);
3501
+ goto next;
3502
+ }
3503
+ unlink_free_space(ctl, entry);
3504
+ /*
3505
+ * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3506
+ * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim
3507
+ * X when we come back around. So trim it now.
3508
+ */
3509
+ if (max_discard_size &&
3510
+ bytes >= (max_discard_size +
3511
+ BTRFS_ASYNC_DISCARD_MIN_FILTER)) {
3512
+ bytes = max_discard_size;
3513
+ extent_bytes = max_discard_size;
3514
+ entry->offset += max_discard_size;
3515
+ entry->bytes -= max_discard_size;
3516
+ link_free_space(ctl, entry);
3517
+ } else {
3518
+ kmem_cache_free(btrfs_free_space_cachep, entry);
3519
+ }
3520
+ } else {
3521
+ start = max(start, extent_start);
3522
+ bytes = min(extent_start + extent_bytes, end) - start;
3523
+ if (bytes < minlen) {
3524
+ spin_unlock(&ctl->tree_lock);
3525
+ mutex_unlock(&ctl->cache_writeout_mutex);
3526
+ goto next;
3527
+ }
32513528
3252
- unlink_free_space(ctl, entry);
3253
- kmem_cache_free(btrfs_free_space_cachep, entry);
3529
+ unlink_free_space(ctl, entry);
3530
+ kmem_cache_free(btrfs_free_space_cachep, entry);
3531
+ }
32543532
32553533 spin_unlock(&ctl->tree_lock);
32563534 trim_entry.start = extent_start;
....@@ -3259,11 +3537,17 @@
32593537 mutex_unlock(&ctl->cache_writeout_mutex);
32603538
32613539 ret = do_trimming(block_group, total_trimmed, start, bytes,
3262
- extent_start, extent_bytes, &trim_entry);
3263
- if (ret)
3540
+ extent_start, extent_bytes, extent_trim_state,
3541
+ &trim_entry);
3542
+ if (ret) {
3543
+ block_group->discard_cursor = start + bytes;
32643544 break;
3545
+ }
32653546 next:
32663547 start += bytes;
3548
+ block_group->discard_cursor = start;
3549
+ if (async && *total_trimmed)
3550
+ break;
32673551
32683552 if (fatal_signal_pending(current)) {
32693553 ret = -ERESTARTSYS;
....@@ -3272,19 +3556,76 @@
32723556
32733557 cond_resched();
32743558 }
3275
-out:
3559
+
3560
+ return ret;
3561
+
3562
+out_unlock:
3563
+ block_group->discard_cursor = btrfs_block_group_end(block_group);
3564
+ spin_unlock(&ctl->tree_lock);
3565
+ mutex_unlock(&ctl->cache_writeout_mutex);
3566
+
32763567 return ret;
32773568 }
32783569
3279
-static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
3280
- u64 *total_trimmed, u64 start, u64 end, u64 minlen)
3570
+/*
3571
+ * If we break out of trimming a bitmap prematurely, we should reset the
3572
+ * trimming bit. In a rather contrieved case, it's possible to race here so
3573
+ * reset the state to BTRFS_TRIM_STATE_UNTRIMMED.
3574
+ *
3575
+ * start = start of bitmap
3576
+ * end = near end of bitmap
3577
+ *
3578
+ * Thread 1: Thread 2:
3579
+ * trim_bitmaps(start)
3580
+ * trim_bitmaps(end)
3581
+ * end_trimming_bitmap()
3582
+ * reset_trimming_bitmap()
3583
+ */
3584
+static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset)
32813585 {
3586
+ struct btrfs_free_space *entry;
3587
+
3588
+ spin_lock(&ctl->tree_lock);
3589
+ entry = tree_search_offset(ctl, offset, 1, 0);
3590
+ if (entry) {
3591
+ if (btrfs_free_space_trimmed(entry)) {
3592
+ ctl->discardable_extents[BTRFS_STAT_CURR] +=
3593
+ entry->bitmap_extents;
3594
+ ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes;
3595
+ }
3596
+ entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3597
+ }
3598
+
3599
+ spin_unlock(&ctl->tree_lock);
3600
+}
3601
+
3602
+static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl,
3603
+ struct btrfs_free_space *entry)
3604
+{
3605
+ if (btrfs_free_space_trimming_bitmap(entry)) {
3606
+ entry->trim_state = BTRFS_TRIM_STATE_TRIMMED;
3607
+ ctl->discardable_extents[BTRFS_STAT_CURR] -=
3608
+ entry->bitmap_extents;
3609
+ ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes;
3610
+ }
3611
+}
3612
+
3613
+/*
3614
+ * If @async is set, then we will trim 1 region and return.
3615
+ */
3616
+static int trim_bitmaps(struct btrfs_block_group *block_group,
3617
+ u64 *total_trimmed, u64 start, u64 end, u64 minlen,
3618
+ u64 maxlen, bool async)
3619
+{
3620
+ struct btrfs_discard_ctl *discard_ctl =
3621
+ &block_group->fs_info->discard_ctl;
32823622 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
32833623 struct btrfs_free_space *entry;
32843624 int ret = 0;
32853625 int ret2;
32863626 u64 bytes;
32873627 u64 offset = offset_to_bitmap(ctl, start);
3628
+ const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
32883629
32893630 while (offset < end) {
32903631 bool next_bitmap = false;
....@@ -3294,34 +3635,83 @@
32943635 spin_lock(&ctl->tree_lock);
32953636
32963637 if (ctl->free_space < minlen) {
3638
+ block_group->discard_cursor =
3639
+ btrfs_block_group_end(block_group);
32973640 spin_unlock(&ctl->tree_lock);
32983641 mutex_unlock(&ctl->cache_writeout_mutex);
32993642 break;
33003643 }
33013644
33023645 entry = tree_search_offset(ctl, offset, 1, 0);
3303
- if (!entry) {
3646
+ /*
3647
+ * Bitmaps are marked trimmed lossily now to prevent constant
3648
+ * discarding of the same bitmap (the reason why we are bound
3649
+ * by the filters). So, retrim the block group bitmaps when we
3650
+ * are preparing to punt to the unused_bgs list. This uses
3651
+ * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED
3652
+ * which is the only discard index which sets minlen to 0.
3653
+ */
3654
+ if (!entry || (async && minlen && start == offset &&
3655
+ btrfs_free_space_trimmed(entry))) {
33043656 spin_unlock(&ctl->tree_lock);
33053657 mutex_unlock(&ctl->cache_writeout_mutex);
33063658 next_bitmap = true;
33073659 goto next;
33083660 }
3661
+
3662
+ /*
3663
+ * Async discard bitmap trimming begins at by setting the start
3664
+ * to be key.objectid and the offset_to_bitmap() aligns to the
3665
+ * start of the bitmap. This lets us know we are fully
3666
+ * scanning the bitmap rather than only some portion of it.
3667
+ */
3668
+ if (start == offset)
3669
+ entry->trim_state = BTRFS_TRIM_STATE_TRIMMING;
33093670
33103671 bytes = minlen;
33113672 ret2 = search_bitmap(ctl, entry, &start, &bytes, false);
33123673 if (ret2 || start >= end) {
3674
+ /*
3675
+ * We lossily consider a bitmap trimmed if we only skip
3676
+ * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER.
3677
+ */
3678
+ if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER)
3679
+ end_trimming_bitmap(ctl, entry);
3680
+ else
3681
+ entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
33133682 spin_unlock(&ctl->tree_lock);
33143683 mutex_unlock(&ctl->cache_writeout_mutex);
33153684 next_bitmap = true;
33163685 goto next;
33173686 }
33183687
3688
+ /*
3689
+ * We already trimmed a region, but are using the locking above
3690
+ * to reset the trim_state.
3691
+ */
3692
+ if (async && *total_trimmed) {
3693
+ spin_unlock(&ctl->tree_lock);
3694
+ mutex_unlock(&ctl->cache_writeout_mutex);
3695
+ goto out;
3696
+ }
3697
+
33193698 bytes = min(bytes, end - start);
3320
- if (bytes < minlen) {
3699
+ if (bytes < minlen || (async && maxlen && bytes > maxlen)) {
33213700 spin_unlock(&ctl->tree_lock);
33223701 mutex_unlock(&ctl->cache_writeout_mutex);
33233702 goto next;
33243703 }
3704
+
3705
+ /*
3706
+ * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3707
+ * If X < @minlen, we won't trim X when we come back around.
3708
+ * So trim it now. We differ here from trimming extents as we
3709
+ * don't keep individual state per bit.
3710
+ */
3711
+ if (async &&
3712
+ max_discard_size &&
3713
+ bytes > (max_discard_size + minlen))
3714
+ bytes = max_discard_size;
33253715
33263716 bitmap_clear_bits(ctl, entry, start, bytes);
33273717 if (entry->bytes == 0)
....@@ -3334,19 +3724,25 @@
33343724 mutex_unlock(&ctl->cache_writeout_mutex);
33353725
33363726 ret = do_trimming(block_group, total_trimmed, start, bytes,
3337
- start, bytes, &trim_entry);
3338
- if (ret)
3727
+ start, bytes, 0, &trim_entry);
3728
+ if (ret) {
3729
+ reset_trimming_bitmap(ctl, offset);
3730
+ block_group->discard_cursor =
3731
+ btrfs_block_group_end(block_group);
33393732 break;
3733
+ }
33403734 next:
33413735 if (next_bitmap) {
33423736 offset += BITS_PER_BITMAP * ctl->unit;
3737
+ start = offset;
33433738 } else {
33443739 start += bytes;
3345
- if (start >= offset + BITS_PER_BITMAP * ctl->unit)
3346
- offset += BITS_PER_BITMAP * ctl->unit;
33473740 }
3741
+ block_group->discard_cursor = start;
33483742
33493743 if (fatal_signal_pending(current)) {
3744
+ if (start != offset)
3745
+ reset_trimming_bitmap(ctl, offset);
33503746 ret = -ERESTARTSYS;
33513747 break;
33523748 }
....@@ -3354,55 +3750,47 @@
33543750 cond_resched();
33553751 }
33563752
3753
+ if (offset >= end)
3754
+ block_group->discard_cursor = end;
3755
+
3756
+out:
33573757 return ret;
33583758 }
33593759
3360
-void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache)
3760
+int btrfs_trim_block_group(struct btrfs_block_group *block_group,
3761
+ u64 *trimmed, u64 start, u64 end, u64 minlen)
33613762 {
3362
- atomic_inc(&cache->trimming);
3363
-}
3763
+ struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3764
+ int ret;
3765
+ u64 rem = 0;
33643766
3365
-void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group)
3366
-{
3367
- struct btrfs_fs_info *fs_info = block_group->fs_info;
3368
- struct extent_map_tree *em_tree;
3369
- struct extent_map *em;
3370
- bool cleanup;
3767
+ *trimmed = 0;
33713768
33723769 spin_lock(&block_group->lock);
3373
- cleanup = (atomic_dec_and_test(&block_group->trimming) &&
3374
- block_group->removed);
3770
+ if (block_group->removed) {
3771
+ spin_unlock(&block_group->lock);
3772
+ return 0;
3773
+ }
3774
+ btrfs_freeze_block_group(block_group);
33753775 spin_unlock(&block_group->lock);
33763776
3377
- if (cleanup) {
3378
- mutex_lock(&fs_info->chunk_mutex);
3379
- em_tree = &fs_info->mapping_tree.map_tree;
3380
- write_lock(&em_tree->lock);
3381
- em = lookup_extent_mapping(em_tree, block_group->key.objectid,
3382
- 1);
3383
- BUG_ON(!em); /* logic error, can't happen */
3384
- /*
3385
- * remove_extent_mapping() will delete us from the pinned_chunks
3386
- * list, which is protected by the chunk mutex.
3387
- */
3388
- remove_extent_mapping(em_tree, em);
3389
- write_unlock(&em_tree->lock);
3390
- mutex_unlock(&fs_info->chunk_mutex);
3777
+ ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false);
3778
+ if (ret)
3779
+ goto out;
33913780
3392
- /* once for us and once for the tree */
3393
- free_extent_map(em);
3394
- free_extent_map(em);
3395
-
3396
- /*
3397
- * We've left one free space entry and other tasks trimming
3398
- * this block group have left 1 entry each one. Free them.
3399
- */
3400
- __btrfs_remove_free_space_cache(block_group->free_space_ctl);
3401
- }
3781
+ ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false);
3782
+ div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem);
3783
+ /* If we ended in the middle of a bitmap, reset the trimming flag */
3784
+ if (rem)
3785
+ reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end));
3786
+out:
3787
+ btrfs_unfreeze_block_group(block_group);
3788
+ return ret;
34023789 }
34033790
3404
-int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
3405
- u64 *trimmed, u64 start, u64 end, u64 minlen)
3791
+int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
3792
+ u64 *trimmed, u64 start, u64 end, u64 minlen,
3793
+ bool async)
34063794 {
34073795 int ret;
34083796
....@@ -3413,16 +3801,36 @@
34133801 spin_unlock(&block_group->lock);
34143802 return 0;
34153803 }
3416
- btrfs_get_block_group_trimming(block_group);
3804
+ btrfs_freeze_block_group(block_group);
34173805 spin_unlock(&block_group->lock);
34183806
3419
- ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
3420
- if (ret)
3421
- goto out;
3807
+ ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async);
3808
+ btrfs_unfreeze_block_group(block_group);
34223809
3423
- ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
3424
-out:
3425
- btrfs_put_block_group_trimming(block_group);
3810
+ return ret;
3811
+}
3812
+
3813
+int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
3814
+ u64 *trimmed, u64 start, u64 end, u64 minlen,
3815
+ u64 maxlen, bool async)
3816
+{
3817
+ int ret;
3818
+
3819
+ *trimmed = 0;
3820
+
3821
+ spin_lock(&block_group->lock);
3822
+ if (block_group->removed) {
3823
+ spin_unlock(&block_group->lock);
3824
+ return 0;
3825
+ }
3826
+ btrfs_freeze_block_group(block_group);
3827
+ spin_unlock(&block_group->lock);
3828
+
3829
+ ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen,
3830
+ async);
3831
+
3832
+ btrfs_unfreeze_block_group(block_group);
3833
+
34263834 return ret;
34273835 }
34283836
....@@ -3582,11 +3990,9 @@
35823990 if (release_metadata)
35833991 btrfs_delalloc_release_metadata(BTRFS_I(inode),
35843992 inode->i_size, true);
3585
-#ifdef DEBUG
3586
- btrfs_err(fs_info,
3587
- "failed to write free ino cache for root %llu",
3588
- root->root_key.objectid);
3589
-#endif
3993
+ btrfs_debug(fs_info,
3994
+ "failed to write free ino cache for root %llu error %d",
3995
+ root->root_key.objectid, ret);
35903996 }
35913997
35923998 return ret;
....@@ -3599,12 +4005,13 @@
35994005 * how the free space cache loading stuff works, so you can get really weird
36004006 * configurations.
36014007 */
3602
-int test_add_free_space_entry(struct btrfs_block_group_cache *cache,
4008
+int test_add_free_space_entry(struct btrfs_block_group *cache,
36034009 u64 offset, u64 bytes, bool bitmap)
36044010 {
36054011 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
36064012 struct btrfs_free_space *info = NULL, *bitmap_info;
36074013 void *map = NULL;
4014
+ enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED;
36084015 u64 bytes_added;
36094016 int ret;
36104017
....@@ -3646,7 +4053,8 @@
36464053 info = NULL;
36474054 }
36484055
3649
- bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
4056
+ bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
4057
+ trim_state);
36504058
36514059 bytes -= bytes_added;
36524060 offset += bytes_added;
....@@ -3667,7 +4075,7 @@
36674075 * just used to check the absence of space, so if there is free space in the
36684076 * range at all we will return 1.
36694077 */
3670
-int test_check_exists(struct btrfs_block_group_cache *cache,
4078
+int test_check_exists(struct btrfs_block_group *cache,
36714079 u64 offset, u64 bytes)
36724080 {
36734081 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;