hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/fs/btrfs/free-space-cache.c
....@@ -18,9 +18,14 @@
1818 #include "extent_io.h"
1919 #include "inode-map.h"
2020 #include "volumes.h"
21
+#include "space-info.h"
22
+#include "delalloc-space.h"
23
+#include "block-group.h"
24
+#include "discard.h"
2125
2226 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
23
-#define MAX_CACHE_BYTES_PER_GIG SZ_32K
27
+#define MAX_CACHE_BYTES_PER_GIG SZ_64K
28
+#define FORCE_EXTENT_THRESHOLD SZ_1M
2429
2530 struct btrfs_trim_range {
2631 u64 start;
....@@ -28,6 +33,8 @@
2833 struct list_head list;
2934 };
3035
36
+static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
37
+ struct btrfs_free_space *bitmap_info);
3138 static int link_free_space(struct btrfs_free_space_ctl *ctl,
3239 struct btrfs_free_space *info);
3340 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
....@@ -75,7 +82,7 @@
7582 * sure NOFS is set to keep us from deadlocking.
7683 */
7784 nofs_flag = memalloc_nofs_save();
78
- inode = btrfs_iget_path(fs_info->sb, &location, root, NULL, path);
85
+ inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path);
7986 btrfs_release_path(path);
8087 memalloc_nofs_restore(nofs_flag);
8188 if (IS_ERR(inode))
....@@ -88,10 +95,10 @@
8895 return inode;
8996 }
9097
91
-struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info,
92
- struct btrfs_block_group_cache
93
- *block_group, struct btrfs_path *path)
98
+struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
99
+ struct btrfs_path *path)
94100 {
101
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
95102 struct inode *inode = NULL;
96103 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
97104
....@@ -103,7 +110,7 @@
103110 return inode;
104111
105112 inode = __lookup_free_space_inode(fs_info->tree_root, path,
106
- block_group->key.objectid);
113
+ block_group->start);
107114 if (IS_ERR(inode))
108115 return inode;
109116
....@@ -185,20 +192,19 @@
185192 return 0;
186193 }
187194
188
-int create_free_space_inode(struct btrfs_fs_info *fs_info,
189
- struct btrfs_trans_handle *trans,
190
- struct btrfs_block_group_cache *block_group,
195
+int create_free_space_inode(struct btrfs_trans_handle *trans,
196
+ struct btrfs_block_group *block_group,
191197 struct btrfs_path *path)
192198 {
193199 int ret;
194200 u64 ino;
195201
196
- ret = btrfs_find_free_objectid(fs_info->tree_root, &ino);
202
+ ret = btrfs_find_free_objectid(trans->fs_info->tree_root, &ino);
197203 if (ret < 0)
198204 return ret;
199205
200
- return __create_free_space_inode(fs_info->tree_root, trans, path, ino,
201
- block_group->key.objectid);
206
+ return __create_free_space_inode(trans->fs_info->tree_root, trans, path,
207
+ ino, block_group->start);
202208 }
203209
204210 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
....@@ -208,8 +214,8 @@
208214 int ret;
209215
210216 /* 1 for slack space, 1 for updating the inode */
211
- needed_bytes = btrfs_calc_trunc_metadata_size(fs_info, 1) +
212
- btrfs_calc_trans_metadata_size(fs_info, 1);
217
+ needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) +
218
+ btrfs_calc_metadata_size(fs_info, 1);
213219
214220 spin_lock(&rsv->lock);
215221 if (rsv->reserved < needed_bytes)
....@@ -221,7 +227,7 @@
221227 }
222228
223229 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
224
- struct btrfs_block_group_cache *block_group,
230
+ struct btrfs_block_group *block_group,
225231 struct inode *inode)
226232 {
227233 struct btrfs_root *root = BTRFS_I(inode)->root;
....@@ -365,10 +371,10 @@
365371 }
366372 }
367373
368
-static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode,
369
- int uptodate)
374
+static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate)
370375 {
371376 struct page *page;
377
+ struct inode *inode = io_ctl->inode;
372378 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
373379 int i;
374380
....@@ -407,8 +413,6 @@
407413
408414 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
409415 {
410
- __le64 *val;
411
-
412416 io_ctl_map_page(io_ctl, 1);
413417
414418 /*
....@@ -423,14 +427,13 @@
423427 io_ctl->size -= sizeof(u64) * 2;
424428 }
425429
426
- val = io_ctl->cur;
427
- *val = cpu_to_le64(generation);
430
+ put_unaligned_le64(generation, io_ctl->cur);
428431 io_ctl->cur += sizeof(u64);
429432 }
430433
431434 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
432435 {
433
- __le64 *gen;
436
+ u64 cache_gen;
434437
435438 /*
436439 * Skip the crc area. If we don't check crcs then we just have a 64bit
....@@ -445,11 +448,11 @@
445448 io_ctl->size -= sizeof(u64) * 2;
446449 }
447450
448
- gen = io_ctl->cur;
449
- if (le64_to_cpu(*gen) != generation) {
451
+ cache_gen = get_unaligned_le64(io_ctl->cur);
452
+ if (cache_gen != generation) {
450453 btrfs_err_rl(io_ctl->fs_info,
451454 "space cache generation (%llu) does not match inode (%llu)",
452
- *gen, generation);
455
+ cache_gen, generation);
453456 io_ctl_unmap_page(io_ctl);
454457 return -EIO;
455458 }
....@@ -471,9 +474,8 @@
471474 if (index == 0)
472475 offset = sizeof(u32) * io_ctl->num_pages;
473476
474
- crc = btrfs_csum_data(io_ctl->orig + offset, crc,
475
- PAGE_SIZE - offset);
476
- btrfs_csum_final(crc, (u8 *)&crc);
477
+ crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
478
+ btrfs_crc32c_final(crc, (u8 *)&crc);
477479 io_ctl_unmap_page(io_ctl);
478480 tmp = page_address(io_ctl->pages[0]);
479481 tmp += index;
....@@ -499,9 +501,8 @@
499501 val = *tmp;
500502
501503 io_ctl_map_page(io_ctl, 0);
502
- crc = btrfs_csum_data(io_ctl->orig + offset, crc,
503
- PAGE_SIZE - offset);
504
- btrfs_csum_final(crc, (u8 *)&crc);
504
+ crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
505
+ btrfs_crc32c_final(crc, (u8 *)&crc);
505506 if (val != crc) {
506507 btrfs_err_rl(io_ctl->fs_info,
507508 "csum mismatch on free space cache");
....@@ -521,8 +522,8 @@
521522 return -ENOSPC;
522523
523524 entry = io_ctl->cur;
524
- entry->offset = cpu_to_le64(offset);
525
- entry->bytes = cpu_to_le64(bytes);
525
+ put_unaligned_le64(offset, &entry->offset);
526
+ put_unaligned_le64(bytes, &entry->bytes);
526527 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
527528 BTRFS_FREE_SPACE_EXTENT;
528529 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
....@@ -595,8 +596,8 @@
595596 }
596597
597598 e = io_ctl->cur;
598
- entry->offset = le64_to_cpu(e->offset);
599
- entry->bytes = le64_to_cpu(e->bytes);
599
+ entry->offset = get_unaligned_le64(&e->offset);
600
+ entry->bytes = get_unaligned_le64(&e->bytes);
600601 *type = e->type;
601602 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
602603 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
....@@ -728,7 +729,7 @@
728729
729730 readahead_cache(inode);
730731
731
- ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
732
+ ret = io_ctl_prepare_pages(&io_ctl, true);
732733 if (ret)
733734 goto out;
734735
....@@ -753,6 +754,16 @@
753754 kmem_cache_free(btrfs_free_space_cachep, e);
754755 goto free_cache;
755756 }
757
+
758
+ /*
759
+ * Sync discard ensures that the free space cache is always
760
+ * trimmed. So when reading this in, the state should reflect
761
+ * that. We also do this for async as a stop gap for lack of
762
+ * persistence.
763
+ */
764
+ if (btrfs_test_opt(fs_info, DISCARD_SYNC) ||
765
+ btrfs_test_opt(fs_info, DISCARD_ASYNC))
766
+ e->trim_state = BTRFS_TRIM_STATE_TRIMMED;
756767
757768 if (!e->bytes) {
758769 ret = -1;
....@@ -783,15 +794,16 @@
783794 }
784795 spin_lock(&ctl->tree_lock);
785796 ret = link_free_space(ctl, e);
786
- ctl->total_bitmaps++;
787
- ctl->op->recalc_thresholds(ctl);
788
- spin_unlock(&ctl->tree_lock);
789797 if (ret) {
798
+ spin_unlock(&ctl->tree_lock);
790799 btrfs_err(fs_info,
791800 "Duplicate entries in free space cache, dumping");
792801 kmem_cache_free(btrfs_free_space_cachep, e);
793802 goto free_cache;
794803 }
804
+ ctl->total_bitmaps++;
805
+ ctl->op->recalc_thresholds(ctl);
806
+ spin_unlock(&ctl->tree_lock);
795807 list_add_tail(&e->list, &bitmaps);
796808 }
797809
....@@ -809,12 +821,19 @@
809821 ret = io_ctl_read_bitmap(&io_ctl, e);
810822 if (ret)
811823 goto free_cache;
824
+ e->bitmap_extents = count_bitmap_extents(ctl, e);
825
+ if (!btrfs_free_space_trimmed(e)) {
826
+ ctl->discardable_extents[BTRFS_STAT_CURR] +=
827
+ e->bitmap_extents;
828
+ ctl->discardable_bytes[BTRFS_STAT_CURR] += e->bytes;
829
+ }
812830 }
813831
814832 io_ctl_drop_pages(&io_ctl);
815833 merge_space_tree(ctl);
816834 ret = 1;
817835 out:
836
+ btrfs_discard_update_discardable(ctl->private, ctl);
818837 io_ctl_free(&io_ctl);
819838 return ret;
820839 free_cache:
....@@ -823,15 +842,15 @@
823842 goto out;
824843 }
825844
826
-int load_free_space_cache(struct btrfs_fs_info *fs_info,
827
- struct btrfs_block_group_cache *block_group)
845
+int load_free_space_cache(struct btrfs_block_group *block_group)
828846 {
847
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
829848 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
830849 struct inode *inode;
831850 struct btrfs_path *path;
832851 int ret = 0;
833852 bool matched;
834
- u64 used = btrfs_block_group_used(&block_group->item);
853
+ u64 used = block_group->used;
835854
836855 /*
837856 * If this block group has been marked to be cleared for one reason or
....@@ -869,7 +888,7 @@
869888 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so
870889 * we will never try to read their inode item while the fs is mounted.
871890 */
872
- inode = lookup_free_space_inode(fs_info, block_group, path);
891
+ inode = lookup_free_space_inode(block_group, path);
873892 if (IS_ERR(inode)) {
874893 btrfs_free_path(path);
875894 return 0;
....@@ -885,13 +904,13 @@
885904 spin_unlock(&block_group->lock);
886905
887906 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
888
- path, block_group->key.objectid);
907
+ path, block_group->start);
889908 btrfs_free_path(path);
890909 if (ret <= 0)
891910 goto out;
892911
893912 spin_lock(&ctl->tree_lock);
894
- matched = (ctl->free_space == (block_group->key.offset - used -
913
+ matched = (ctl->free_space == (block_group->length - used -
895914 block_group->bytes_super));
896915 spin_unlock(&ctl->tree_lock);
897916
....@@ -899,7 +918,7 @@
899918 __btrfs_remove_free_space_cache(ctl);
900919 btrfs_warn(fs_info,
901920 "block group %llu has wrong amount of free space",
902
- block_group->key.objectid);
921
+ block_group->start);
903922 ret = -1;
904923 }
905924 out:
....@@ -912,7 +931,7 @@
912931
913932 btrfs_warn(fs_info,
914933 "failed to load free space cache for block group %llu, rebuilding it now",
915
- block_group->key.objectid);
934
+ block_group->start);
916935 }
917936
918937 iput(inode);
....@@ -922,7 +941,7 @@
922941 static noinline_for_stack
923942 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
924943 struct btrfs_free_space_ctl *ctl,
925
- struct btrfs_block_group_cache *block_group,
944
+ struct btrfs_block_group *block_group,
926945 int *entries, int *bitmaps,
927946 struct list_head *bitmap_list)
928947 {
....@@ -1015,7 +1034,7 @@
10151034 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10161035 if (ret < 0) {
10171036 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1018
- EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL);
1037
+ EXTENT_DELALLOC, 0, 0, NULL);
10191038 goto fail;
10201039 }
10211040 leaf = path->nodes[0];
....@@ -1027,9 +1046,8 @@
10271046 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
10281047 found_key.offset != offset) {
10291048 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
1030
- inode->i_size - 1,
1031
- EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
1032
- NULL);
1049
+ inode->i_size - 1, EXTENT_DELALLOC, 0,
1050
+ 0, NULL);
10331051 btrfs_release_path(path);
10341052 goto fail;
10351053 }
....@@ -1050,9 +1068,9 @@
10501068 return -1;
10511069 }
10521070
1053
-static noinline_for_stack int
1054
-write_pinned_extent_entries(struct btrfs_fs_info *fs_info,
1055
- struct btrfs_block_group_cache *block_group,
1071
+static noinline_for_stack int write_pinned_extent_entries(
1072
+ struct btrfs_trans_handle *trans,
1073
+ struct btrfs_block_group *block_group,
10561074 struct btrfs_io_ctl *io_ctl,
10571075 int *entries)
10581076 {
....@@ -1070,11 +1088,11 @@
10701088 * We shouldn't have switched the pinned extents yet so this is the
10711089 * right one
10721090 */
1073
- unpin = fs_info->pinned_extents;
1091
+ unpin = &trans->transaction->pinned_extents;
10741092
1075
- start = block_group->key.objectid;
1093
+ start = block_group->start;
10761094
1077
- while (start < block_group->key.objectid + block_group->key.offset) {
1095
+ while (start < block_group->start + block_group->length) {
10781096 ret = find_first_extent_bit(unpin, start,
10791097 &extent_start, &extent_end,
10801098 EXTENT_DIRTY, NULL);
....@@ -1082,13 +1100,12 @@
10821100 return 0;
10831101
10841102 /* This pinned extent is out of our range */
1085
- if (extent_start >= block_group->key.objectid +
1086
- block_group->key.offset)
1103
+ if (extent_start >= block_group->start + block_group->length)
10871104 return 0;
10881105
10891106 extent_start = max(extent_start, start);
1090
- extent_end = min(block_group->key.objectid +
1091
- block_group->key.offset, extent_end + 1);
1107
+ extent_end = min(block_group->start + block_group->length,
1108
+ extent_end + 1);
10921109 len = extent_end - extent_start;
10931110
10941111 *entries += 1;
....@@ -1126,7 +1143,7 @@
11261143 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
11271144 if (ret)
11281145 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1129
- EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL);
1146
+ EXTENT_DELALLOC, 0, 0, NULL);
11301147
11311148 return ret;
11321149 }
....@@ -1152,7 +1169,7 @@
11521169
11531170 static int __btrfs_wait_cache_io(struct btrfs_root *root,
11541171 struct btrfs_trans_handle *trans,
1155
- struct btrfs_block_group_cache *block_group,
1172
+ struct btrfs_block_group *block_group,
11561173 struct btrfs_io_ctl *io_ctl,
11571174 struct btrfs_path *path, u64 offset)
11581175 {
....@@ -1174,13 +1191,10 @@
11741191 if (ret) {
11751192 invalidate_inode_pages2(inode->i_mapping);
11761193 BTRFS_I(inode)->generation = 0;
1177
- if (block_group) {
1178
-#ifdef DEBUG
1179
- btrfs_err(root->fs_info,
1180
- "failed to write free space cache for block group %llu",
1181
- block_group->key.objectid);
1182
-#endif
1183
- }
1194
+ if (block_group)
1195
+ btrfs_debug(root->fs_info,
1196
+ "failed to write free space cache for block group %llu error %d",
1197
+ block_group->start, ret);
11841198 }
11851199 btrfs_update_inode(trans, root, inode);
11861200
....@@ -1220,12 +1234,12 @@
12201234 }
12211235
12221236 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
1223
- struct btrfs_block_group_cache *block_group,
1237
+ struct btrfs_block_group *block_group,
12241238 struct btrfs_path *path)
12251239 {
12261240 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans,
12271241 block_group, &block_group->io_ctl,
1228
- path, block_group->key.objectid);
1242
+ path, block_group->start);
12291243 }
12301244
12311245 /**
....@@ -1241,11 +1255,10 @@
12411255 */
12421256 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
12431257 struct btrfs_free_space_ctl *ctl,
1244
- struct btrfs_block_group_cache *block_group,
1258
+ struct btrfs_block_group *block_group,
12451259 struct btrfs_io_ctl *io_ctl,
12461260 struct btrfs_trans_handle *trans)
12471261 {
1248
- struct btrfs_fs_info *fs_info = root->fs_info;
12491262 struct extent_state *cached_state = NULL;
12501263 LIST_HEAD(bitmap_list);
12511264 int entries = 0;
....@@ -1277,7 +1290,7 @@
12771290 }
12781291
12791292 /* Lock all pages first so we can lock the extent safely. */
1280
- ret = io_ctl_prepare_pages(io_ctl, inode, 0);
1293
+ ret = io_ctl_prepare_pages(io_ctl, false);
12811294 if (ret)
12821295 goto out_unlock;
12831296
....@@ -1303,8 +1316,7 @@
13031316 * If this changes while we are working we'll get added back to
13041317 * the dirty list and redo it. No locking needed
13051318 */
1306
- ret = write_pinned_extent_entries(fs_info, block_group,
1307
- io_ctl, &entries);
1319
+ ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries);
13081320 if (ret)
13091321 goto out_nospc_locked;
13101322
....@@ -1323,8 +1335,9 @@
13231335 io_ctl_zero_remaining_pages(io_ctl);
13241336
13251337 /* Everything is written out, now we dirty the pages in the file. */
1326
- ret = btrfs_dirty_pages(inode, io_ctl->pages, io_ctl->num_pages, 0,
1327
- i_size_read(inode), &cached_state);
1338
+ ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages,
1339
+ io_ctl->num_pages, 0, i_size_read(inode),
1340
+ &cached_state);
13281341 if (ret)
13291342 goto out_nospc;
13301343
....@@ -1342,7 +1355,7 @@
13421355
13431356 /*
13441357 * at this point the pages are under IO and we're happy,
1345
- * The caller is responsible for waiting on them and updating the
1358
+ * The caller is responsible for waiting on them and updating
13461359 * the cache and the inode
13471360 */
13481361 io_ctl->entries = entries;
....@@ -1353,18 +1366,6 @@
13531366 goto out;
13541367
13551368 return 0;
1356
-
1357
-out:
1358
- io_ctl->inode = NULL;
1359
- io_ctl_free(io_ctl);
1360
- if (ret) {
1361
- invalidate_inode_pages2(inode->i_mapping);
1362
- BTRFS_I(inode)->generation = 0;
1363
- }
1364
- btrfs_update_inode(trans, root, inode);
1365
- if (must_iput)
1366
- iput(inode);
1367
- return ret;
13681369
13691370 out_nospc_locked:
13701371 cleanup_bitmap_list(&bitmap_list);
....@@ -1378,14 +1379,24 @@
13781379 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
13791380 up_write(&block_group->data_rwsem);
13801381
1381
- goto out;
1382
+out:
1383
+ io_ctl->inode = NULL;
1384
+ io_ctl_free(io_ctl);
1385
+ if (ret) {
1386
+ invalidate_inode_pages2(inode->i_mapping);
1387
+ BTRFS_I(inode)->generation = 0;
1388
+ }
1389
+ btrfs_update_inode(trans, root, inode);
1390
+ if (must_iput)
1391
+ iput(inode);
1392
+ return ret;
13821393 }
13831394
1384
-int btrfs_write_out_cache(struct btrfs_fs_info *fs_info,
1385
- struct btrfs_trans_handle *trans,
1386
- struct btrfs_block_group_cache *block_group,
1395
+int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
1396
+ struct btrfs_block_group *block_group,
13871397 struct btrfs_path *path)
13881398 {
1399
+ struct btrfs_fs_info *fs_info = trans->fs_info;
13891400 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
13901401 struct inode *inode;
13911402 int ret = 0;
....@@ -1397,18 +1408,16 @@
13971408 }
13981409 spin_unlock(&block_group->lock);
13991410
1400
- inode = lookup_free_space_inode(fs_info, block_group, path);
1411
+ inode = lookup_free_space_inode(block_group, path);
14011412 if (IS_ERR(inode))
14021413 return 0;
14031414
14041415 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl,
14051416 block_group, &block_group->io_ctl, trans);
14061417 if (ret) {
1407
-#ifdef DEBUG
1408
- btrfs_err(fs_info,
1409
- "failed to write free space cache for block group %llu",
1410
- block_group->key.objectid);
1411
-#endif
1418
+ btrfs_debug(fs_info,
1419
+ "failed to write free space cache for block group %llu error %d",
1420
+ block_group->start, ret);
14121421 spin_lock(&block_group->lock);
14131422 block_group->disk_cache_state = BTRFS_DC_ERROR;
14141423 spin_unlock(&block_group->lock);
....@@ -1633,6 +1642,11 @@
16331642 {
16341643 rb_erase(&info->offset_index, &ctl->free_space_offset);
16351644 ctl->free_extents--;
1645
+
1646
+ if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
1647
+ ctl->discardable_extents[BTRFS_STAT_CURR]--;
1648
+ ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes;
1649
+ }
16361650 }
16371651
16381652 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
....@@ -1653,6 +1667,11 @@
16531667 if (ret)
16541668 return ret;
16551669
1670
+ if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
1671
+ ctl->discardable_extents[BTRFS_STAT_CURR]++;
1672
+ ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
1673
+ }
1674
+
16561675 ctl->free_space += info->bytes;
16571676 ctl->free_extents++;
16581677 return ret;
....@@ -1660,11 +1679,11 @@
16601679
16611680 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
16621681 {
1663
- struct btrfs_block_group_cache *block_group = ctl->private;
1682
+ struct btrfs_block_group *block_group = ctl->private;
16641683 u64 max_bytes;
16651684 u64 bitmap_bytes;
16661685 u64 extent_bytes;
1667
- u64 size = block_group->key.offset;
1686
+ u64 size = block_group->length;
16681687 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
16691688 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
16701689
....@@ -1673,26 +1692,17 @@
16731692 ASSERT(ctl->total_bitmaps <= max_bitmaps);
16741693
16751694 /*
1676
- * The goal is to keep the total amount of memory used per 1gb of space
1677
- * at or below 32k, so we need to adjust how much memory we allow to be
1678
- * used by extent based free space tracking
1695
+ * We are trying to keep the total amount of memory used per 1GiB of
1696
+ * space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation
1697
+ * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of
1698
+ * bitmaps, we may end up using more memory than this.
16791699 */
16801700 if (size < SZ_1G)
16811701 max_bytes = MAX_CACHE_BYTES_PER_GIG;
16821702 else
16831703 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G);
16841704
1685
- /*
1686
- * we want to account for 1 more bitmap than what we have so we can make
1687
- * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1688
- * we add more bitmaps.
1689
- */
1690
- bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit;
1691
-
1692
- if (bitmap_bytes >= max_bytes) {
1693
- ctl->extents_thresh = 0;
1694
- return;
1695
- }
1705
+ bitmap_bytes = ctl->total_bitmaps * ctl->unit;
16961706
16971707 /*
16981708 * we want the extent entry threshold to always be at most 1/2 the max
....@@ -1709,17 +1719,31 @@
17091719 struct btrfs_free_space *info,
17101720 u64 offset, u64 bytes)
17111721 {
1712
- unsigned long start, count;
1722
+ unsigned long start, count, end;
1723
+ int extent_delta = -1;
17131724
17141725 start = offset_to_bit(info->offset, ctl->unit, offset);
17151726 count = bytes_to_bits(bytes, ctl->unit);
1716
- ASSERT(start + count <= BITS_PER_BITMAP);
1727
+ end = start + count;
1728
+ ASSERT(end <= BITS_PER_BITMAP);
17171729
17181730 bitmap_clear(info->bitmap, start, count);
17191731
17201732 info->bytes -= bytes;
17211733 if (info->max_extent_size > ctl->unit)
17221734 info->max_extent_size = 0;
1735
+
1736
+ if (start && test_bit(start - 1, info->bitmap))
1737
+ extent_delta++;
1738
+
1739
+ if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1740
+ extent_delta++;
1741
+
1742
+ info->bitmap_extents += extent_delta;
1743
+ if (!btrfs_free_space_trimmed(info)) {
1744
+ ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
1745
+ ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
1746
+ }
17231747 }
17241748
17251749 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
....@@ -1734,16 +1758,30 @@
17341758 struct btrfs_free_space *info, u64 offset,
17351759 u64 bytes)
17361760 {
1737
- unsigned long start, count;
1761
+ unsigned long start, count, end;
1762
+ int extent_delta = 1;
17381763
17391764 start = offset_to_bit(info->offset, ctl->unit, offset);
17401765 count = bytes_to_bits(bytes, ctl->unit);
1741
- ASSERT(start + count <= BITS_PER_BITMAP);
1766
+ end = start + count;
1767
+ ASSERT(end <= BITS_PER_BITMAP);
17421768
17431769 bitmap_set(info->bitmap, start, count);
17441770
17451771 info->bytes += bytes;
17461772 ctl->free_space += bytes;
1773
+
1774
+ if (start && test_bit(start - 1, info->bitmap))
1775
+ extent_delta--;
1776
+
1777
+ if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1778
+ extent_delta--;
1779
+
1780
+ info->bitmap_extents += extent_delta;
1781
+ if (!btrfs_free_space_trimmed(info)) {
1782
+ ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
1783
+ ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes;
1784
+ }
17471785 }
17481786
17491787 /*
....@@ -1879,11 +1917,35 @@
18791917 return NULL;
18801918 }
18811919
1920
+static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
1921
+ struct btrfs_free_space *bitmap_info)
1922
+{
1923
+ struct btrfs_block_group *block_group = ctl->private;
1924
+ u64 bytes = bitmap_info->bytes;
1925
+ unsigned int rs, re;
1926
+ int count = 0;
1927
+
1928
+ if (!block_group || !bytes)
1929
+ return count;
1930
+
1931
+ bitmap_for_each_set_region(bitmap_info->bitmap, rs, re, 0,
1932
+ BITS_PER_BITMAP) {
1933
+ bytes -= (rs - re) * ctl->unit;
1934
+ count++;
1935
+
1936
+ if (!bytes)
1937
+ break;
1938
+ }
1939
+
1940
+ return count;
1941
+}
1942
+
18821943 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
18831944 struct btrfs_free_space *info, u64 offset)
18841945 {
18851946 info->offset = offset_to_bitmap(ctl, offset);
18861947 info->bytes = 0;
1948
+ info->bitmap_extents = 0;
18871949 INIT_LIST_HEAD(&info->list);
18881950 link_free_space(ctl, info);
18891951 ctl->total_bitmaps++;
....@@ -1894,6 +1956,18 @@
18941956 static void free_bitmap(struct btrfs_free_space_ctl *ctl,
18951957 struct btrfs_free_space *bitmap_info)
18961958 {
1959
+ /*
1960
+ * Normally when this is called, the bitmap is completely empty. However,
1961
+ * if we are blowing up the free space cache for one reason or another
1962
+ * via __btrfs_remove_free_space_cache(), then it may not be freed and
1963
+ * we may leave stats on the table.
1964
+ */
1965
+ if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) {
1966
+ ctl->discardable_extents[BTRFS_STAT_CURR] -=
1967
+ bitmap_info->bitmap_extents;
1968
+ ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes;
1969
+
1970
+ }
18971971 unlink_free_space(ctl, bitmap_info);
18981972 kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap);
18991973 kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
....@@ -1980,10 +2054,23 @@
19802054
19812055 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
19822056 struct btrfs_free_space *info, u64 offset,
1983
- u64 bytes)
2057
+ u64 bytes, enum btrfs_trim_state trim_state)
19842058 {
19852059 u64 bytes_to_set = 0;
19862060 u64 end;
2061
+
2062
+ /*
2063
+ * This is a tradeoff to make bitmap trim state minimal. We mark the
2064
+ * whole bitmap untrimmed if at any point we add untrimmed regions.
2065
+ */
2066
+ if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) {
2067
+ if (btrfs_free_space_trimmed(info)) {
2068
+ ctl->discardable_extents[BTRFS_STAT_CURR] +=
2069
+ info->bitmap_extents;
2070
+ ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
2071
+ }
2072
+ info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2073
+ }
19872074
19882075 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
19892076
....@@ -2004,7 +2091,7 @@
20042091 static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
20052092 struct btrfs_free_space *info)
20062093 {
2007
- struct btrfs_block_group_cache *block_group = ctl->private;
2094
+ struct btrfs_block_group *block_group = ctl->private;
20082095 struct btrfs_fs_info *fs_info = block_group->fs_info;
20092096 bool forced = false;
20102097
....@@ -2012,6 +2099,10 @@
20122099 if (btrfs_should_fragment_free_space(block_group))
20132100 forced = true;
20142101 #endif
2102
+
2103
+ /* This is a way to reclaim large regions from the bitmaps. */
2104
+ if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD)
2105
+ return false;
20152106
20162107 /*
20172108 * If we are below the extents threshold then we can add this as an
....@@ -2025,8 +2116,8 @@
20252116 * of cache left then go ahead an dadd them, no sense in adding
20262117 * the overhead of a bitmap if we don't have to.
20272118 */
2028
- if (info->bytes <= fs_info->sectorsize * 4) {
2029
- if (ctl->free_extents * 2 <= ctl->extents_thresh)
2119
+ if (info->bytes <= fs_info->sectorsize * 8) {
2120
+ if (ctl->free_extents * 3 <= ctl->extents_thresh)
20302121 return false;
20312122 } else {
20322123 return false;
....@@ -2041,7 +2132,7 @@
20412132 * so allow those block groups to still be allowed to have a bitmap
20422133 * entry.
20432134 */
2044
- if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset)
2135
+ if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length)
20452136 return false;
20462137
20472138 return true;
....@@ -2056,13 +2147,15 @@
20562147 struct btrfs_free_space *info)
20572148 {
20582149 struct btrfs_free_space *bitmap_info;
2059
- struct btrfs_block_group_cache *block_group = NULL;
2150
+ struct btrfs_block_group *block_group = NULL;
20602151 int added = 0;
20612152 u64 bytes, offset, bytes_added;
2153
+ enum btrfs_trim_state trim_state;
20622154 int ret;
20632155
20642156 bytes = info->bytes;
20652157 offset = info->offset;
2158
+ trim_state = info->trim_state;
20662159
20672160 if (!ctl->op->use_bitmap(ctl, info))
20682161 return 0;
....@@ -2097,8 +2190,8 @@
20972190 }
20982191
20992192 if (entry->offset == offset_to_bitmap(ctl, offset)) {
2100
- bytes_added = add_bytes_to_bitmap(ctl, entry,
2101
- offset, bytes);
2193
+ bytes_added = add_bytes_to_bitmap(ctl, entry, offset,
2194
+ bytes, trim_state);
21022195 bytes -= bytes_added;
21032196 offset += bytes_added;
21042197 }
....@@ -2117,7 +2210,8 @@
21172210 goto new_bitmap;
21182211 }
21192212
2120
- bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
2213
+ bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
2214
+ trim_state);
21212215 bytes -= bytes_added;
21222216 offset += bytes_added;
21232217 added = 0;
....@@ -2151,6 +2245,7 @@
21512245 /* allocate the bitmap */
21522246 info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep,
21532247 GFP_NOFS);
2248
+ info->trim_state = BTRFS_TRIM_STATE_TRIMMED;
21542249 spin_lock(&ctl->tree_lock);
21552250 if (!info->bitmap) {
21562251 ret = -ENOMEM;
....@@ -2170,6 +2265,22 @@
21702265 return ret;
21712266 }
21722267
2268
+/*
2269
+ * Free space merging rules:
2270
+ * 1) Merge trimmed areas together
2271
+ * 2) Let untrimmed areas coalesce with trimmed areas
2272
+ * 3) Always pull neighboring regions from bitmaps
2273
+ *
2274
+ * The above rules are for when we merge free space based on btrfs_trim_state.
2275
+ * Rules 2 and 3 are subtle because they are suboptimal, but are done for the
2276
+ * same reason: to promote larger extent regions which makes life easier for
2277
+ * find_free_extent(). Rule 2 enables coalescing based on the common path
2278
+ * being returning free space from btrfs_finish_extent_commit(). So when free
2279
+ * space is trimmed, it will prevent aggregating trimmed new region and
2280
+ * untrimmed regions in the rb_tree. Rule 3 is purely to obtain larger extents
2281
+ * and provide find_free_extent() with the largest extents possible hoping for
2282
+ * the reuse path.
2283
+ */
21732284 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
21742285 struct btrfs_free_space *info, bool update_stat)
21752286 {
....@@ -2178,6 +2289,7 @@
21782289 bool merged = false;
21792290 u64 offset = info->offset;
21802291 u64 bytes = info->bytes;
2292
+ const bool is_trimmed = btrfs_free_space_trimmed(info);
21812293
21822294 /*
21832295 * first we want to see if there is free space adjacent to the range we
....@@ -2191,7 +2303,9 @@
21912303 else if (!right_info)
21922304 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
21932305
2194
- if (right_info && !right_info->bitmap) {
2306
+ /* See try_merge_free_space() comment. */
2307
+ if (right_info && !right_info->bitmap &&
2308
+ (!is_trimmed || btrfs_free_space_trimmed(right_info))) {
21952309 if (update_stat)
21962310 unlink_free_space(ctl, right_info);
21972311 else
....@@ -2201,8 +2315,10 @@
22012315 merged = true;
22022316 }
22032317
2318
+ /* See try_merge_free_space() comment. */
22042319 if (left_info && !left_info->bitmap &&
2205
- left_info->offset + left_info->bytes == offset) {
2320
+ left_info->offset + left_info->bytes == offset &&
2321
+ (!is_trimmed || btrfs_free_space_trimmed(left_info))) {
22062322 if (update_stat)
22072323 unlink_free_space(ctl, left_info);
22082324 else
....@@ -2237,6 +2353,10 @@
22372353 return false;
22382354 bytes = (j - i) * ctl->unit;
22392355 info->bytes += bytes;
2356
+
2357
+ /* See try_merge_free_space() comment. */
2358
+ if (!btrfs_free_space_trimmed(bitmap))
2359
+ info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
22402360
22412361 if (update_stat)
22422362 bitmap_clear_bits(ctl, bitmap, end, bytes);
....@@ -2291,6 +2411,10 @@
22912411 info->offset -= bytes;
22922412 info->bytes += bytes;
22932413
2414
+ /* See try_merge_free_space() comment. */
2415
+ if (!btrfs_free_space_trimmed(bitmap))
2416
+ info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2417
+
22942418 if (update_stat)
22952419 bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
22962420 else
....@@ -2340,10 +2464,13 @@
23402464
23412465 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
23422466 struct btrfs_free_space_ctl *ctl,
2343
- u64 offset, u64 bytes)
2467
+ u64 offset, u64 bytes,
2468
+ enum btrfs_trim_state trim_state)
23442469 {
2470
+ struct btrfs_block_group *block_group = ctl->private;
23452471 struct btrfs_free_space *info;
23462472 int ret = 0;
2473
+ u64 filter_bytes = bytes;
23472474
23482475 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
23492476 if (!info)
....@@ -2351,6 +2478,7 @@
23512478
23522479 info->offset = offset;
23532480 info->bytes = bytes;
2481
+ info->trim_state = trim_state;
23542482 RB_CLEAR_NODE(&info->offset_index);
23552483
23562484 spin_lock(&ctl->tree_lock);
....@@ -2379,10 +2507,13 @@
23792507 */
23802508 steal_from_bitmap(ctl, info, true);
23812509
2510
+ filter_bytes = max(filter_bytes, info->bytes);
2511
+
23822512 ret = link_free_space(ctl, info);
23832513 if (ret)
23842514 kmem_cache_free(btrfs_free_space_cachep, info);
23852515 out:
2516
+ btrfs_discard_update_discardable(block_group, ctl);
23862517 spin_unlock(&ctl->tree_lock);
23872518
23882519 if (ret) {
....@@ -2390,10 +2521,47 @@
23902521 ASSERT(ret != -EEXIST);
23912522 }
23922523
2524
+ if (trim_state != BTRFS_TRIM_STATE_TRIMMED) {
2525
+ btrfs_discard_check_filter(block_group, filter_bytes);
2526
+ btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
2527
+ }
2528
+
23932529 return ret;
23942530 }
23952531
2396
-int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
2532
+int btrfs_add_free_space(struct btrfs_block_group *block_group,
2533
+ u64 bytenr, u64 size)
2534
+{
2535
+ enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2536
+
2537
+ if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC))
2538
+ trim_state = BTRFS_TRIM_STATE_TRIMMED;
2539
+
2540
+ return __btrfs_add_free_space(block_group->fs_info,
2541
+ block_group->free_space_ctl,
2542
+ bytenr, size, trim_state);
2543
+}
2544
+
2545
+/*
2546
+ * This is a subtle distinction because when adding free space back in general,
2547
+ * we want it to be added as untrimmed for async. But in the case where we add
2548
+ * it on loading of a block group, we want to consider it trimmed.
2549
+ */
2550
+int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
2551
+ u64 bytenr, u64 size)
2552
+{
2553
+ enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2554
+
2555
+ if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) ||
2556
+ btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
2557
+ trim_state = BTRFS_TRIM_STATE_TRIMMED;
2558
+
2559
+ return __btrfs_add_free_space(block_group->fs_info,
2560
+ block_group->free_space_ctl,
2561
+ bytenr, size, trim_state);
2562
+}
2563
+
2564
+int btrfs_remove_free_space(struct btrfs_block_group *block_group,
23972565 u64 offset, u64 bytes)
23982566 {
23992567 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
....@@ -2465,8 +2633,10 @@
24652633 }
24662634 spin_unlock(&ctl->tree_lock);
24672635
2468
- ret = btrfs_add_free_space(block_group, offset + bytes,
2469
- old_end - (offset + bytes));
2636
+ ret = __btrfs_add_free_space(block_group->fs_info, ctl,
2637
+ offset + bytes,
2638
+ old_end - (offset + bytes),
2639
+ info->trim_state);
24702640 WARN_ON(ret);
24712641 goto out;
24722642 }
....@@ -2478,12 +2648,13 @@
24782648 goto again;
24792649 }
24802650 out_lock:
2651
+ btrfs_discard_update_discardable(block_group, ctl);
24812652 spin_unlock(&ctl->tree_lock);
24822653 out:
24832654 return ret;
24842655 }
24852656
2486
-void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
2657
+void btrfs_dump_free_space(struct btrfs_block_group *block_group,
24872658 u64 bytes)
24882659 {
24892660 struct btrfs_fs_info *fs_info = block_group->fs_info;
....@@ -2508,14 +2679,14 @@
25082679 "%d blocks of free space at or bigger than bytes is", count);
25092680 }
25102681
2511
-void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
2682
+void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group)
25122683 {
25132684 struct btrfs_fs_info *fs_info = block_group->fs_info;
25142685 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
25152686
25162687 spin_lock_init(&ctl->tree_lock);
25172688 ctl->unit = fs_info->sectorsize;
2518
- ctl->start = block_group->key.objectid;
2689
+ ctl->start = block_group->start;
25192690 ctl->private = block_group;
25202691 ctl->op = &free_space_op;
25212692 INIT_LIST_HEAD(&ctl->trimming_ranges);
....@@ -2535,9 +2706,8 @@
25352706 * pointed to by the cluster, someone else raced in and freed the
25362707 * cluster already. In that case, we just return without changing anything
25372708 */
2538
-static int
2539
-__btrfs_return_cluster_to_free_space(
2540
- struct btrfs_block_group_cache *block_group,
2709
+static void __btrfs_return_cluster_to_free_space(
2710
+ struct btrfs_block_group *block_group,
25412711 struct btrfs_free_cluster *cluster)
25422712 {
25432713 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
....@@ -2545,8 +2715,10 @@
25452715 struct rb_node *node;
25462716
25472717 spin_lock(&cluster->lock);
2548
- if (cluster->block_group != block_group)
2549
- goto out;
2718
+ if (cluster->block_group != block_group) {
2719
+ spin_unlock(&cluster->lock);
2720
+ return;
2721
+ }
25502722
25512723 cluster->block_group = NULL;
25522724 cluster->window_start = 0;
....@@ -2563,18 +2735,29 @@
25632735
25642736 bitmap = (entry->bitmap != NULL);
25652737 if (!bitmap) {
2738
+ /* Merging treats extents as if they were new */
2739
+ if (!btrfs_free_space_trimmed(entry)) {
2740
+ ctl->discardable_extents[BTRFS_STAT_CURR]--;
2741
+ ctl->discardable_bytes[BTRFS_STAT_CURR] -=
2742
+ entry->bytes;
2743
+ }
2744
+
25662745 try_merge_free_space(ctl, entry, false);
25672746 steal_from_bitmap(ctl, entry, false);
2747
+
2748
+ /* As we insert directly, update these statistics */
2749
+ if (!btrfs_free_space_trimmed(entry)) {
2750
+ ctl->discardable_extents[BTRFS_STAT_CURR]++;
2751
+ ctl->discardable_bytes[BTRFS_STAT_CURR] +=
2752
+ entry->bytes;
2753
+ }
25682754 }
25692755 tree_insert_offset(&ctl->free_space_offset,
25702756 entry->offset, &entry->offset_index, bitmap);
25712757 }
25722758 cluster->root = RB_ROOT;
2573
-
2574
-out:
25752759 spin_unlock(&cluster->lock);
25762760 btrfs_put_block_group(block_group);
2577
- return 0;
25782761 }
25792762
25802763 static void __btrfs_remove_free_space_cache_locked(
....@@ -2600,10 +2783,12 @@
26002783 {
26012784 spin_lock(&ctl->tree_lock);
26022785 __btrfs_remove_free_space_cache_locked(ctl);
2786
+ if (ctl->private)
2787
+ btrfs_discard_update_discardable(ctl->private, ctl);
26032788 spin_unlock(&ctl->tree_lock);
26042789 }
26052790
2606
-void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2791
+void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
26072792 {
26082793 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
26092794 struct btrfs_free_cluster *cluster;
....@@ -2621,20 +2806,55 @@
26212806 cond_resched_lock(&ctl->tree_lock);
26222807 }
26232808 __btrfs_remove_free_space_cache_locked(ctl);
2809
+ btrfs_discard_update_discardable(block_group, ctl);
26242810 spin_unlock(&ctl->tree_lock);
26252811
26262812 }
26272813
2628
-u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2814
+/**
2815
+ * btrfs_is_free_space_trimmed - see if everything is trimmed
2816
+ * @block_group: block_group of interest
2817
+ *
2818
+ * Walk @block_group's free space rb_tree to determine if everything is trimmed.
2819
+ */
2820
+bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group)
2821
+{
2822
+ struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2823
+ struct btrfs_free_space *info;
2824
+ struct rb_node *node;
2825
+ bool ret = true;
2826
+
2827
+ spin_lock(&ctl->tree_lock);
2828
+ node = rb_first(&ctl->free_space_offset);
2829
+
2830
+ while (node) {
2831
+ info = rb_entry(node, struct btrfs_free_space, offset_index);
2832
+
2833
+ if (!btrfs_free_space_trimmed(info)) {
2834
+ ret = false;
2835
+ break;
2836
+ }
2837
+
2838
+ node = rb_next(node);
2839
+ }
2840
+
2841
+ spin_unlock(&ctl->tree_lock);
2842
+ return ret;
2843
+}
2844
+
2845
+u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
26292846 u64 offset, u64 bytes, u64 empty_size,
26302847 u64 *max_extent_size)
26312848 {
26322849 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2850
+ struct btrfs_discard_ctl *discard_ctl =
2851
+ &block_group->fs_info->discard_ctl;
26332852 struct btrfs_free_space *entry = NULL;
26342853 u64 bytes_search = bytes + empty_size;
26352854 u64 ret = 0;
26362855 u64 align_gap = 0;
26372856 u64 align_gap_len = 0;
2857
+ enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
26382858
26392859 spin_lock(&ctl->tree_lock);
26402860 entry = find_free_space(ctl, &offset, &bytes_search,
....@@ -2645,12 +2865,20 @@
26452865 ret = offset;
26462866 if (entry->bitmap) {
26472867 bitmap_clear_bits(ctl, entry, offset, bytes);
2868
+
2869
+ if (!btrfs_free_space_trimmed(entry))
2870
+ atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2871
+
26482872 if (!entry->bytes)
26492873 free_bitmap(ctl, entry);
26502874 } else {
26512875 unlink_free_space(ctl, entry);
26522876 align_gap_len = offset - entry->offset;
26532877 align_gap = entry->offset;
2878
+ align_gap_trim_state = entry->trim_state;
2879
+
2880
+ if (!btrfs_free_space_trimmed(entry))
2881
+ atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
26542882
26552883 entry->offset = offset + bytes;
26562884 WARN_ON(entry->bytes < bytes + align_gap_len);
....@@ -2662,11 +2890,13 @@
26622890 link_free_space(ctl, entry);
26632891 }
26642892 out:
2893
+ btrfs_discard_update_discardable(block_group, ctl);
26652894 spin_unlock(&ctl->tree_lock);
26662895
26672896 if (align_gap_len)
26682897 __btrfs_add_free_space(block_group->fs_info, ctl,
2669
- align_gap, align_gap_len);
2898
+ align_gap, align_gap_len,
2899
+ align_gap_trim_state);
26702900 return ret;
26712901 }
26722902
....@@ -2678,12 +2908,11 @@
26782908 * Otherwise, it'll get a reference on the block group pointed to by the
26792909 * cluster and remove the cluster from it.
26802910 */
2681
-int btrfs_return_cluster_to_free_space(
2682
- struct btrfs_block_group_cache *block_group,
2911
+void btrfs_return_cluster_to_free_space(
2912
+ struct btrfs_block_group *block_group,
26832913 struct btrfs_free_cluster *cluster)
26842914 {
26852915 struct btrfs_free_space_ctl *ctl;
2686
- int ret;
26872916
26882917 /* first, get a safe pointer to the block group */
26892918 spin_lock(&cluster->lock);
....@@ -2691,29 +2920,30 @@
26912920 block_group = cluster->block_group;
26922921 if (!block_group) {
26932922 spin_unlock(&cluster->lock);
2694
- return 0;
2923
+ return;
26952924 }
26962925 } else if (cluster->block_group != block_group) {
26972926 /* someone else has already freed it don't redo their work */
26982927 spin_unlock(&cluster->lock);
2699
- return 0;
2928
+ return;
27002929 }
2701
- atomic_inc(&block_group->count);
2930
+ btrfs_get_block_group(block_group);
27022931 spin_unlock(&cluster->lock);
27032932
27042933 ctl = block_group->free_space_ctl;
27052934
27062935 /* now return any extents the cluster had on it */
27072936 spin_lock(&ctl->tree_lock);
2708
- ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
2937
+ __btrfs_return_cluster_to_free_space(block_group, cluster);
27092938 spin_unlock(&ctl->tree_lock);
2939
+
2940
+ btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group);
27102941
27112942 /* finally drop our ref */
27122943 btrfs_put_block_group(block_group);
2713
- return ret;
27142944 }
27152945
2716
-static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2946
+static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group,
27172947 struct btrfs_free_cluster *cluster,
27182948 struct btrfs_free_space *entry,
27192949 u64 bytes, u64 min_start,
....@@ -2746,11 +2976,13 @@
27462976 * if it couldn't find anything suitably large, or a logical disk offset
27472977 * if things worked out
27482978 */
2749
-u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2979
+u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
27502980 struct btrfs_free_cluster *cluster, u64 bytes,
27512981 u64 min_start, u64 *max_extent_size)
27522982 {
27532983 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2984
+ struct btrfs_discard_ctl *discard_ctl =
2985
+ &block_group->fs_info->discard_ctl;
27542986 struct btrfs_free_space *entry = NULL;
27552987 struct rb_node *node;
27562988 u64 ret = 0;
....@@ -2803,8 +3035,6 @@
28033035 entry->bytes -= bytes;
28043036 }
28053037
2806
- if (entry->bytes == 0)
2807
- rb_erase(&entry->offset_index, &cluster->root);
28083038 break;
28093039 }
28103040 out:
....@@ -2815,24 +3045,35 @@
28153045
28163046 spin_lock(&ctl->tree_lock);
28173047
3048
+ if (!btrfs_free_space_trimmed(entry))
3049
+ atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
3050
+
28183051 ctl->free_space -= bytes;
3052
+ if (!entry->bitmap && !btrfs_free_space_trimmed(entry))
3053
+ ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
3054
+
3055
+ spin_lock(&cluster->lock);
28193056 if (entry->bytes == 0) {
3057
+ rb_erase(&entry->offset_index, &cluster->root);
28203058 ctl->free_extents--;
28213059 if (entry->bitmap) {
28223060 kmem_cache_free(btrfs_free_space_bitmap_cachep,
28233061 entry->bitmap);
28243062 ctl->total_bitmaps--;
28253063 ctl->op->recalc_thresholds(ctl);
3064
+ } else if (!btrfs_free_space_trimmed(entry)) {
3065
+ ctl->discardable_extents[BTRFS_STAT_CURR]--;
28263066 }
28273067 kmem_cache_free(btrfs_free_space_cachep, entry);
28283068 }
28293069
3070
+ spin_unlock(&cluster->lock);
28303071 spin_unlock(&ctl->tree_lock);
28313072
28323073 return ret;
28333074 }
28343075
2835
-static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
3076
+static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group,
28363077 struct btrfs_free_space *entry,
28373078 struct btrfs_free_cluster *cluster,
28383079 u64 offset, u64 bytes,
....@@ -2914,7 +3155,7 @@
29143155 * extent of cont1_bytes, and other clusters of at least min_bytes.
29153156 */
29163157 static noinline int
2917
-setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
3158
+setup_cluster_no_bitmap(struct btrfs_block_group *block_group,
29183159 struct btrfs_free_cluster *cluster,
29193160 struct list_head *bitmaps, u64 offset, u64 bytes,
29203161 u64 cont1_bytes, u64 min_bytes)
....@@ -3005,7 +3246,7 @@
30053246 * that we have already failed to find extents that will work.
30063247 */
30073248 static noinline int
3008
-setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
3249
+setup_cluster_bitmap(struct btrfs_block_group *block_group,
30093250 struct btrfs_free_cluster *cluster,
30103251 struct list_head *bitmaps, u64 offset, u64 bytes,
30113252 u64 cont1_bytes, u64 min_bytes)
....@@ -3055,11 +3296,11 @@
30553296 * returns zero and sets up cluster if things worked out, otherwise
30563297 * it returns -enospc
30573298 */
3058
-int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info,
3059
- struct btrfs_block_group_cache *block_group,
3299
+int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
30603300 struct btrfs_free_cluster *cluster,
30613301 u64 offset, u64 bytes, u64 empty_size)
30623302 {
3303
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
30633304 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
30643305 struct btrfs_free_space *entry, *tmp;
30653306 LIST_HEAD(bitmaps);
....@@ -3118,7 +3359,7 @@
31183359 list_del_init(&entry->list);
31193360
31203361 if (!ret) {
3121
- atomic_inc(&block_group->count);
3362
+ btrfs_get_block_group(block_group);
31223363 list_add_tail(&cluster->block_group_list,
31233364 &block_group->cluster_list);
31243365 cluster->block_group = block_group;
....@@ -3146,9 +3387,10 @@
31463387 cluster->block_group = NULL;
31473388 }
31483389
3149
-static int do_trimming(struct btrfs_block_group_cache *block_group,
3390
+static int do_trimming(struct btrfs_block_group *block_group,
31503391 u64 *total_trimmed, u64 start, u64 bytes,
31513392 u64 reserved_start, u64 reserved_bytes,
3393
+ enum btrfs_trim_state reserved_trim_state,
31523394 struct btrfs_trim_range *trim_entry)
31533395 {
31543396 struct btrfs_space_info *space_info = block_group->space_info;
....@@ -3156,6 +3398,9 @@
31563398 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
31573399 int ret;
31583400 int update = 0;
3401
+ const u64 end = start + bytes;
3402
+ const u64 reserved_end = reserved_start + reserved_bytes;
3403
+ enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
31593404 u64 trimmed = 0;
31603405
31613406 spin_lock(&space_info->lock);
....@@ -3169,11 +3414,20 @@
31693414 spin_unlock(&space_info->lock);
31703415
31713416 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed);
3172
- if (!ret)
3417
+ if (!ret) {
31733418 *total_trimmed += trimmed;
3419
+ trim_state = BTRFS_TRIM_STATE_TRIMMED;
3420
+ }
31743421
31753422 mutex_lock(&ctl->cache_writeout_mutex);
3176
- btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
3423
+ if (reserved_start < start)
3424
+ __btrfs_add_free_space(fs_info, ctl, reserved_start,
3425
+ start - reserved_start,
3426
+ reserved_trim_state);
3427
+ if (start + bytes < reserved_start + reserved_bytes)
3428
+ __btrfs_add_free_space(fs_info, ctl, end, reserved_end - end,
3429
+ reserved_trim_state);
3430
+ __btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state);
31773431 list_del(&trim_entry->list);
31783432 mutex_unlock(&ctl->cache_writeout_mutex);
31793433
....@@ -3184,23 +3438,31 @@
31843438 space_info->bytes_readonly += reserved_bytes;
31853439 block_group->reserved -= reserved_bytes;
31863440 space_info->bytes_reserved -= reserved_bytes;
3187
- spin_unlock(&space_info->lock);
31883441 spin_unlock(&block_group->lock);
3442
+ spin_unlock(&space_info->lock);
31893443 }
31903444
31913445 return ret;
31923446 }
31933447
3194
-static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
3195
- u64 *total_trimmed, u64 start, u64 end, u64 minlen)
3448
+/*
3449
+ * If @async is set, then we will trim 1 region and return.
3450
+ */
3451
+static int trim_no_bitmap(struct btrfs_block_group *block_group,
3452
+ u64 *total_trimmed, u64 start, u64 end, u64 minlen,
3453
+ bool async)
31963454 {
3455
+ struct btrfs_discard_ctl *discard_ctl =
3456
+ &block_group->fs_info->discard_ctl;
31973457 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
31983458 struct btrfs_free_space *entry;
31993459 struct rb_node *node;
32003460 int ret = 0;
32013461 u64 extent_start;
32023462 u64 extent_bytes;
3463
+ enum btrfs_trim_state extent_trim_state;
32033464 u64 bytes;
3465
+ const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
32043466
32053467 while (start < end) {
32063468 struct btrfs_trim_range trim_entry;
....@@ -3208,49 +3470,66 @@
32083470 mutex_lock(&ctl->cache_writeout_mutex);
32093471 spin_lock(&ctl->tree_lock);
32103472
3211
- if (ctl->free_space < minlen) {
3212
- spin_unlock(&ctl->tree_lock);
3213
- mutex_unlock(&ctl->cache_writeout_mutex);
3214
- break;
3215
- }
3473
+ if (ctl->free_space < minlen)
3474
+ goto out_unlock;
32163475
32173476 entry = tree_search_offset(ctl, start, 0, 1);
3218
- if (!entry) {
3219
- spin_unlock(&ctl->tree_lock);
3220
- mutex_unlock(&ctl->cache_writeout_mutex);
3221
- break;
3222
- }
3477
+ if (!entry)
3478
+ goto out_unlock;
32233479
3224
- /* skip bitmaps */
3225
- while (entry->bitmap) {
3480
+ /* Skip bitmaps and if async, already trimmed entries */
3481
+ while (entry->bitmap ||
3482
+ (async && btrfs_free_space_trimmed(entry))) {
32263483 node = rb_next(&entry->offset_index);
3227
- if (!node) {
3228
- spin_unlock(&ctl->tree_lock);
3229
- mutex_unlock(&ctl->cache_writeout_mutex);
3230
- goto out;
3231
- }
3484
+ if (!node)
3485
+ goto out_unlock;
32323486 entry = rb_entry(node, struct btrfs_free_space,
32333487 offset_index);
32343488 }
32353489
3236
- if (entry->offset >= end) {
3237
- spin_unlock(&ctl->tree_lock);
3238
- mutex_unlock(&ctl->cache_writeout_mutex);
3239
- break;
3240
- }
3490
+ if (entry->offset >= end)
3491
+ goto out_unlock;
32413492
32423493 extent_start = entry->offset;
32433494 extent_bytes = entry->bytes;
3244
- start = max(start, extent_start);
3245
- bytes = min(extent_start + extent_bytes, end) - start;
3246
- if (bytes < minlen) {
3247
- spin_unlock(&ctl->tree_lock);
3248
- mutex_unlock(&ctl->cache_writeout_mutex);
3249
- goto next;
3250
- }
3495
+ extent_trim_state = entry->trim_state;
3496
+ if (async) {
3497
+ start = entry->offset;
3498
+ bytes = entry->bytes;
3499
+ if (bytes < minlen) {
3500
+ spin_unlock(&ctl->tree_lock);
3501
+ mutex_unlock(&ctl->cache_writeout_mutex);
3502
+ goto next;
3503
+ }
3504
+ unlink_free_space(ctl, entry);
3505
+ /*
3506
+ * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3507
+ * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim
3508
+ * X when we come back around. So trim it now.
3509
+ */
3510
+ if (max_discard_size &&
3511
+ bytes >= (max_discard_size +
3512
+ BTRFS_ASYNC_DISCARD_MIN_FILTER)) {
3513
+ bytes = max_discard_size;
3514
+ extent_bytes = max_discard_size;
3515
+ entry->offset += max_discard_size;
3516
+ entry->bytes -= max_discard_size;
3517
+ link_free_space(ctl, entry);
3518
+ } else {
3519
+ kmem_cache_free(btrfs_free_space_cachep, entry);
3520
+ }
3521
+ } else {
3522
+ start = max(start, extent_start);
3523
+ bytes = min(extent_start + extent_bytes, end) - start;
3524
+ if (bytes < minlen) {
3525
+ spin_unlock(&ctl->tree_lock);
3526
+ mutex_unlock(&ctl->cache_writeout_mutex);
3527
+ goto next;
3528
+ }
32513529
3252
- unlink_free_space(ctl, entry);
3253
- kmem_cache_free(btrfs_free_space_cachep, entry);
3530
+ unlink_free_space(ctl, entry);
3531
+ kmem_cache_free(btrfs_free_space_cachep, entry);
3532
+ }
32543533
32553534 spin_unlock(&ctl->tree_lock);
32563535 trim_entry.start = extent_start;
....@@ -3259,11 +3538,17 @@
32593538 mutex_unlock(&ctl->cache_writeout_mutex);
32603539
32613540 ret = do_trimming(block_group, total_trimmed, start, bytes,
3262
- extent_start, extent_bytes, &trim_entry);
3263
- if (ret)
3541
+ extent_start, extent_bytes, extent_trim_state,
3542
+ &trim_entry);
3543
+ if (ret) {
3544
+ block_group->discard_cursor = start + bytes;
32643545 break;
3546
+ }
32653547 next:
32663548 start += bytes;
3549
+ block_group->discard_cursor = start;
3550
+ if (async && *total_trimmed)
3551
+ break;
32673552
32683553 if (fatal_signal_pending(current)) {
32693554 ret = -ERESTARTSYS;
....@@ -3272,19 +3557,76 @@
32723557
32733558 cond_resched();
32743559 }
3275
-out:
3560
+
3561
+ return ret;
3562
+
3563
+out_unlock:
3564
+ block_group->discard_cursor = btrfs_block_group_end(block_group);
3565
+ spin_unlock(&ctl->tree_lock);
3566
+ mutex_unlock(&ctl->cache_writeout_mutex);
3567
+
32763568 return ret;
32773569 }
32783570
3279
-static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
3280
- u64 *total_trimmed, u64 start, u64 end, u64 minlen)
3571
+/*
3572
+ * If we break out of trimming a bitmap prematurely, we should reset the
3573
+ * trimming bit. In a rather contrieved case, it's possible to race here so
3574
+ * reset the state to BTRFS_TRIM_STATE_UNTRIMMED.
3575
+ *
3576
+ * start = start of bitmap
3577
+ * end = near end of bitmap
3578
+ *
3579
+ * Thread 1: Thread 2:
3580
+ * trim_bitmaps(start)
3581
+ * trim_bitmaps(end)
3582
+ * end_trimming_bitmap()
3583
+ * reset_trimming_bitmap()
3584
+ */
3585
+static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset)
32813586 {
3587
+ struct btrfs_free_space *entry;
3588
+
3589
+ spin_lock(&ctl->tree_lock);
3590
+ entry = tree_search_offset(ctl, offset, 1, 0);
3591
+ if (entry) {
3592
+ if (btrfs_free_space_trimmed(entry)) {
3593
+ ctl->discardable_extents[BTRFS_STAT_CURR] +=
3594
+ entry->bitmap_extents;
3595
+ ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes;
3596
+ }
3597
+ entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3598
+ }
3599
+
3600
+ spin_unlock(&ctl->tree_lock);
3601
+}
3602
+
3603
+static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl,
3604
+ struct btrfs_free_space *entry)
3605
+{
3606
+ if (btrfs_free_space_trimming_bitmap(entry)) {
3607
+ entry->trim_state = BTRFS_TRIM_STATE_TRIMMED;
3608
+ ctl->discardable_extents[BTRFS_STAT_CURR] -=
3609
+ entry->bitmap_extents;
3610
+ ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes;
3611
+ }
3612
+}
3613
+
3614
+/*
3615
+ * If @async is set, then we will trim 1 region and return.
3616
+ */
3617
+static int trim_bitmaps(struct btrfs_block_group *block_group,
3618
+ u64 *total_trimmed, u64 start, u64 end, u64 minlen,
3619
+ u64 maxlen, bool async)
3620
+{
3621
+ struct btrfs_discard_ctl *discard_ctl =
3622
+ &block_group->fs_info->discard_ctl;
32823623 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
32833624 struct btrfs_free_space *entry;
32843625 int ret = 0;
32853626 int ret2;
32863627 u64 bytes;
32873628 u64 offset = offset_to_bitmap(ctl, start);
3629
+ const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
32883630
32893631 while (offset < end) {
32903632 bool next_bitmap = false;
....@@ -3294,34 +3636,83 @@
32943636 spin_lock(&ctl->tree_lock);
32953637
32963638 if (ctl->free_space < minlen) {
3639
+ block_group->discard_cursor =
3640
+ btrfs_block_group_end(block_group);
32973641 spin_unlock(&ctl->tree_lock);
32983642 mutex_unlock(&ctl->cache_writeout_mutex);
32993643 break;
33003644 }
33013645
33023646 entry = tree_search_offset(ctl, offset, 1, 0);
3303
- if (!entry) {
3647
+ /*
3648
+ * Bitmaps are marked trimmed lossily now to prevent constant
3649
+ * discarding of the same bitmap (the reason why we are bound
3650
+ * by the filters). So, retrim the block group bitmaps when we
3651
+ * are preparing to punt to the unused_bgs list. This uses
3652
+ * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED
3653
+ * which is the only discard index which sets minlen to 0.
3654
+ */
3655
+ if (!entry || (async && minlen && start == offset &&
3656
+ btrfs_free_space_trimmed(entry))) {
33043657 spin_unlock(&ctl->tree_lock);
33053658 mutex_unlock(&ctl->cache_writeout_mutex);
33063659 next_bitmap = true;
33073660 goto next;
33083661 }
3662
+
3663
+ /*
3664
+ * Async discard bitmap trimming begins at by setting the start
3665
+ * to be key.objectid and the offset_to_bitmap() aligns to the
3666
+ * start of the bitmap. This lets us know we are fully
3667
+ * scanning the bitmap rather than only some portion of it.
3668
+ */
3669
+ if (start == offset)
3670
+ entry->trim_state = BTRFS_TRIM_STATE_TRIMMING;
33093671
33103672 bytes = minlen;
33113673 ret2 = search_bitmap(ctl, entry, &start, &bytes, false);
33123674 if (ret2 || start >= end) {
3675
+ /*
3676
+ * We lossily consider a bitmap trimmed if we only skip
3677
+ * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER.
3678
+ */
3679
+ if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER)
3680
+ end_trimming_bitmap(ctl, entry);
3681
+ else
3682
+ entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
33133683 spin_unlock(&ctl->tree_lock);
33143684 mutex_unlock(&ctl->cache_writeout_mutex);
33153685 next_bitmap = true;
33163686 goto next;
33173687 }
33183688
3689
+ /*
3690
+ * We already trimmed a region, but are using the locking above
3691
+ * to reset the trim_state.
3692
+ */
3693
+ if (async && *total_trimmed) {
3694
+ spin_unlock(&ctl->tree_lock);
3695
+ mutex_unlock(&ctl->cache_writeout_mutex);
3696
+ goto out;
3697
+ }
3698
+
33193699 bytes = min(bytes, end - start);
3320
- if (bytes < minlen) {
3700
+ if (bytes < minlen || (async && maxlen && bytes > maxlen)) {
33213701 spin_unlock(&ctl->tree_lock);
33223702 mutex_unlock(&ctl->cache_writeout_mutex);
33233703 goto next;
33243704 }
3705
+
3706
+ /*
3707
+ * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3708
+ * If X < @minlen, we won't trim X when we come back around.
3709
+ * So trim it now. We differ here from trimming extents as we
3710
+ * don't keep individual state per bit.
3711
+ */
3712
+ if (async &&
3713
+ max_discard_size &&
3714
+ bytes > (max_discard_size + minlen))
3715
+ bytes = max_discard_size;
33253716
33263717 bitmap_clear_bits(ctl, entry, start, bytes);
33273718 if (entry->bytes == 0)
....@@ -3334,19 +3725,25 @@
33343725 mutex_unlock(&ctl->cache_writeout_mutex);
33353726
33363727 ret = do_trimming(block_group, total_trimmed, start, bytes,
3337
- start, bytes, &trim_entry);
3338
- if (ret)
3728
+ start, bytes, 0, &trim_entry);
3729
+ if (ret) {
3730
+ reset_trimming_bitmap(ctl, offset);
3731
+ block_group->discard_cursor =
3732
+ btrfs_block_group_end(block_group);
33393733 break;
3734
+ }
33403735 next:
33413736 if (next_bitmap) {
33423737 offset += BITS_PER_BITMAP * ctl->unit;
3738
+ start = offset;
33433739 } else {
33443740 start += bytes;
3345
- if (start >= offset + BITS_PER_BITMAP * ctl->unit)
3346
- offset += BITS_PER_BITMAP * ctl->unit;
33473741 }
3742
+ block_group->discard_cursor = start;
33483743
33493744 if (fatal_signal_pending(current)) {
3745
+ if (start != offset)
3746
+ reset_trimming_bitmap(ctl, offset);
33503747 ret = -ERESTARTSYS;
33513748 break;
33523749 }
....@@ -3354,55 +3751,47 @@
33543751 cond_resched();
33553752 }
33563753
3754
+ if (offset >= end)
3755
+ block_group->discard_cursor = end;
3756
+
3757
+out:
33573758 return ret;
33583759 }
33593760
3360
-void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache)
3761
+int btrfs_trim_block_group(struct btrfs_block_group *block_group,
3762
+ u64 *trimmed, u64 start, u64 end, u64 minlen)
33613763 {
3362
- atomic_inc(&cache->trimming);
3363
-}
3764
+ struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3765
+ int ret;
3766
+ u64 rem = 0;
33643767
3365
-void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group)
3366
-{
3367
- struct btrfs_fs_info *fs_info = block_group->fs_info;
3368
- struct extent_map_tree *em_tree;
3369
- struct extent_map *em;
3370
- bool cleanup;
3768
+ *trimmed = 0;
33713769
33723770 spin_lock(&block_group->lock);
3373
- cleanup = (atomic_dec_and_test(&block_group->trimming) &&
3374
- block_group->removed);
3771
+ if (block_group->removed) {
3772
+ spin_unlock(&block_group->lock);
3773
+ return 0;
3774
+ }
3775
+ btrfs_freeze_block_group(block_group);
33753776 spin_unlock(&block_group->lock);
33763777
3377
- if (cleanup) {
3378
- mutex_lock(&fs_info->chunk_mutex);
3379
- em_tree = &fs_info->mapping_tree.map_tree;
3380
- write_lock(&em_tree->lock);
3381
- em = lookup_extent_mapping(em_tree, block_group->key.objectid,
3382
- 1);
3383
- BUG_ON(!em); /* logic error, can't happen */
3384
- /*
3385
- * remove_extent_mapping() will delete us from the pinned_chunks
3386
- * list, which is protected by the chunk mutex.
3387
- */
3388
- remove_extent_mapping(em_tree, em);
3389
- write_unlock(&em_tree->lock);
3390
- mutex_unlock(&fs_info->chunk_mutex);
3778
+ ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false);
3779
+ if (ret)
3780
+ goto out;
33913781
3392
- /* once for us and once for the tree */
3393
- free_extent_map(em);
3394
- free_extent_map(em);
3395
-
3396
- /*
3397
- * We've left one free space entry and other tasks trimming
3398
- * this block group have left 1 entry each one. Free them.
3399
- */
3400
- __btrfs_remove_free_space_cache(block_group->free_space_ctl);
3401
- }
3782
+ ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false);
3783
+ div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem);
3784
+ /* If we ended in the middle of a bitmap, reset the trimming flag */
3785
+ if (rem)
3786
+ reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end));
3787
+out:
3788
+ btrfs_unfreeze_block_group(block_group);
3789
+ return ret;
34023790 }
34033791
3404
-int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
3405
- u64 *trimmed, u64 start, u64 end, u64 minlen)
3792
+int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
3793
+ u64 *trimmed, u64 start, u64 end, u64 minlen,
3794
+ bool async)
34063795 {
34073796 int ret;
34083797
....@@ -3413,16 +3802,36 @@
34133802 spin_unlock(&block_group->lock);
34143803 return 0;
34153804 }
3416
- btrfs_get_block_group_trimming(block_group);
3805
+ btrfs_freeze_block_group(block_group);
34173806 spin_unlock(&block_group->lock);
34183807
3419
- ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
3420
- if (ret)
3421
- goto out;
3808
+ ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async);
3809
+ btrfs_unfreeze_block_group(block_group);
34223810
3423
- ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
3424
-out:
3425
- btrfs_put_block_group_trimming(block_group);
3811
+ return ret;
3812
+}
3813
+
3814
+int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
3815
+ u64 *trimmed, u64 start, u64 end, u64 minlen,
3816
+ u64 maxlen, bool async)
3817
+{
3818
+ int ret;
3819
+
3820
+ *trimmed = 0;
3821
+
3822
+ spin_lock(&block_group->lock);
3823
+ if (block_group->removed) {
3824
+ spin_unlock(&block_group->lock);
3825
+ return 0;
3826
+ }
3827
+ btrfs_freeze_block_group(block_group);
3828
+ spin_unlock(&block_group->lock);
3829
+
3830
+ ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen,
3831
+ async);
3832
+
3833
+ btrfs_unfreeze_block_group(block_group);
3834
+
34263835 return ret;
34273836 }
34283837
....@@ -3582,11 +3991,9 @@
35823991 if (release_metadata)
35833992 btrfs_delalloc_release_metadata(BTRFS_I(inode),
35843993 inode->i_size, true);
3585
-#ifdef DEBUG
3586
- btrfs_err(fs_info,
3587
- "failed to write free ino cache for root %llu",
3588
- root->root_key.objectid);
3589
-#endif
3994
+ btrfs_debug(fs_info,
3995
+ "failed to write free ino cache for root %llu error %d",
3996
+ root->root_key.objectid, ret);
35903997 }
35913998
35923999 return ret;
....@@ -3599,12 +4006,13 @@
35994006 * how the free space cache loading stuff works, so you can get really weird
36004007 * configurations.
36014008 */
3602
-int test_add_free_space_entry(struct btrfs_block_group_cache *cache,
4009
+int test_add_free_space_entry(struct btrfs_block_group *cache,
36034010 u64 offset, u64 bytes, bool bitmap)
36044011 {
36054012 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
36064013 struct btrfs_free_space *info = NULL, *bitmap_info;
36074014 void *map = NULL;
4015
+ enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED;
36084016 u64 bytes_added;
36094017 int ret;
36104018
....@@ -3646,7 +4054,8 @@
36464054 info = NULL;
36474055 }
36484056
3649
- bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
4057
+ bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
4058
+ trim_state);
36504059
36514060 bytes -= bytes_added;
36524061 offset += bytes_added;
....@@ -3667,7 +4076,7 @@
36674076 * just used to check the absence of space, so if there is free space in the
36684077 * range at all we will return 1.
36694078 */
3670
-int test_check_exists(struct btrfs_block_group_cache *cache,
4079
+int test_check_exists(struct btrfs_block_group *cache,
36714080 u64 offset, u64 bytes)
36724081 {
36734082 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;