hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/fs/btrfs/compression.c
....@@ -17,6 +17,8 @@
1717 #include <linux/slab.h>
1818 #include <linux/sched/mm.h>
1919 #include <linux/log2.h>
20
+#include <crypto/hash.h>
21
+#include "misc.h"
2022 #include "ctree.h"
2123 #include "disk-io.h"
2224 #include "transaction.h"
....@@ -37,6 +39,8 @@
3739 case BTRFS_COMPRESS_ZSTD:
3840 case BTRFS_COMPRESS_NONE:
3941 return btrfs_compress_types[type];
42
+ default:
43
+ break;
4044 }
4145
4246 return NULL;
....@@ -58,6 +62,75 @@
5862 return false;
5963 }
6064
65
+static int compression_compress_pages(int type, struct list_head *ws,
66
+ struct address_space *mapping, u64 start, struct page **pages,
67
+ unsigned long *out_pages, unsigned long *total_in,
68
+ unsigned long *total_out)
69
+{
70
+ switch (type) {
71
+ case BTRFS_COMPRESS_ZLIB:
72
+ return zlib_compress_pages(ws, mapping, start, pages,
73
+ out_pages, total_in, total_out);
74
+ case BTRFS_COMPRESS_LZO:
75
+ return lzo_compress_pages(ws, mapping, start, pages,
76
+ out_pages, total_in, total_out);
77
+ case BTRFS_COMPRESS_ZSTD:
78
+ return zstd_compress_pages(ws, mapping, start, pages,
79
+ out_pages, total_in, total_out);
80
+ case BTRFS_COMPRESS_NONE:
81
+ default:
82
+ /*
83
+ * This can happen when compression races with remount setting
84
+ * it to 'no compress', while caller doesn't call
85
+ * inode_need_compress() to check if we really need to
86
+ * compress.
87
+ *
88
+ * Not a big deal, just need to inform caller that we
89
+ * haven't allocated any pages yet.
90
+ */
91
+ *out_pages = 0;
92
+ return -E2BIG;
93
+ }
94
+}
95
+
96
+static int compression_decompress_bio(int type, struct list_head *ws,
97
+ struct compressed_bio *cb)
98
+{
99
+ switch (type) {
100
+ case BTRFS_COMPRESS_ZLIB: return zlib_decompress_bio(ws, cb);
101
+ case BTRFS_COMPRESS_LZO: return lzo_decompress_bio(ws, cb);
102
+ case BTRFS_COMPRESS_ZSTD: return zstd_decompress_bio(ws, cb);
103
+ case BTRFS_COMPRESS_NONE:
104
+ default:
105
+ /*
106
+ * This can't happen, the type is validated several times
107
+ * before we get here.
108
+ */
109
+ BUG();
110
+ }
111
+}
112
+
113
+static int compression_decompress(int type, struct list_head *ws,
114
+ unsigned char *data_in, struct page *dest_page,
115
+ unsigned long start_byte, size_t srclen, size_t destlen)
116
+{
117
+ switch (type) {
118
+ case BTRFS_COMPRESS_ZLIB: return zlib_decompress(ws, data_in, dest_page,
119
+ start_byte, srclen, destlen);
120
+ case BTRFS_COMPRESS_LZO: return lzo_decompress(ws, data_in, dest_page,
121
+ start_byte, srclen, destlen);
122
+ case BTRFS_COMPRESS_ZSTD: return zstd_decompress(ws, data_in, dest_page,
123
+ start_byte, srclen, destlen);
124
+ case BTRFS_COMPRESS_NONE:
125
+ default:
126
+ /*
127
+ * This can't happen, the type is validated several times
128
+ * before we get here.
129
+ */
130
+ BUG();
131
+ }
132
+}
133
+
61134 static int btrfs_decompress_bio(struct compressed_bio *cb);
62135
63136 static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
....@@ -69,41 +142,43 @@
69142 (DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * csum_size;
70143 }
71144
72
-static int check_compressed_csum(struct btrfs_inode *inode,
73
- struct compressed_bio *cb,
145
+static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
74146 u64 disk_start)
75147 {
76
- int ret;
148
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
149
+ SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
150
+ const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
77151 struct page *page;
78152 unsigned long i;
79153 char *kaddr;
80
- u32 csum;
81
- u32 *cb_sum = &cb->sums;
154
+ u8 csum[BTRFS_CSUM_SIZE];
155
+ struct compressed_bio *cb = bio->bi_private;
156
+ u8 *cb_sum = cb->sums;
82157
83158 if (inode->flags & BTRFS_INODE_NODATASUM)
84159 return 0;
85160
161
+ shash->tfm = fs_info->csum_shash;
162
+
86163 for (i = 0; i < cb->nr_pages; i++) {
87164 page = cb->compressed_pages[i];
88
- csum = ~(u32)0;
89165
90166 kaddr = kmap_atomic(page);
91
- csum = btrfs_csum_data(kaddr, csum, PAGE_SIZE);
92
- btrfs_csum_final(csum, (u8 *)&csum);
167
+ crypto_shash_digest(shash, kaddr, PAGE_SIZE, csum);
93168 kunmap_atomic(kaddr);
94169
95
- if (csum != *cb_sum) {
96
- btrfs_print_data_csum_error(inode, disk_start, csum,
97
- *cb_sum, cb->mirror_num);
98
- ret = -EIO;
99
- goto fail;
170
+ if (memcmp(&csum, cb_sum, csum_size)) {
171
+ btrfs_print_data_csum_error(inode, disk_start,
172
+ csum, cb_sum, cb->mirror_num);
173
+ if (btrfs_io_bio(bio)->device)
174
+ btrfs_dev_stat_inc_and_print(
175
+ btrfs_io_bio(bio)->device,
176
+ BTRFS_DEV_STAT_CORRUPTION_ERRS);
177
+ return -EIO;
100178 }
101
- cb_sum++;
102
-
179
+ cb_sum += csum_size;
103180 }
104
- ret = 0;
105
-fail:
106
- return ret;
181
+ return 0;
107182 }
108183
109184 /* when we finish reading compressed pages from the disk, we
....@@ -138,7 +213,6 @@
138213 * Record the correct mirror_num in cb->orig_bio so that
139214 * read-repair can work properly.
140215 */
141
- ASSERT(btrfs_io_bio(cb->orig_bio));
142216 btrfs_io_bio(cb->orig_bio)->mirror_num = mirror;
143217 cb->mirror_num = mirror;
144218
....@@ -150,7 +224,7 @@
150224 goto csum_failed;
151225
152226 inode = cb->inode;
153
- ret = check_compressed_csum(BTRFS_I(inode), cb,
227
+ ret = check_compressed_csum(BTRFS_I(inode), bio,
154228 (u64)bio->bi_iter.bi_sector << 9);
155229 if (ret)
156230 goto csum_failed;
....@@ -176,15 +250,15 @@
176250 if (cb->errors) {
177251 bio_io_error(cb->orig_bio);
178252 } else {
179
- int i;
180253 struct bio_vec *bvec;
254
+ struct bvec_iter_all iter_all;
181255
182256 /*
183257 * we have verified the checksum already, set page
184258 * checked so the end_io handlers know about it
185259 */
186260 ASSERT(!bio_flagged(bio, BIO_CLONED));
187
- bio_for_each_segment_all(bvec, cb->orig_bio, i)
261
+ bio_for_each_segment_all(bvec, cb->orig_bio, iter_all)
188262 SetPageChecked(bvec->bv_page);
189263
190264 bio_endio(cb->orig_bio);
....@@ -245,7 +319,6 @@
245319 */
246320 static void end_compressed_bio_write(struct bio *bio)
247321 {
248
- struct extent_io_tree *tree;
249322 struct compressed_bio *cb = bio->bi_private;
250323 struct inode *inode;
251324 struct page *page;
....@@ -264,13 +337,10 @@
264337 * call back into the FS and do all the end_io operations
265338 */
266339 inode = cb->inode;
267
- tree = &BTRFS_I(inode)->io_tree;
268340 cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
269
- tree->ops->writepage_end_io_hook(cb->compressed_pages[0],
270
- cb->start,
271
- cb->start + cb->len - 1,
272
- NULL,
273
- !cb->errors);
341
+ btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0],
342
+ cb->start, cb->start + cb->len - 1,
343
+ !cb->errors);
274344 cb->compressed_pages[0]->mapping = NULL;
275345
276346 end_compressed_writeback(inode, cb);
....@@ -303,31 +373,31 @@
303373 * This also checksums the file bytes and gets things ready for
304374 * the end io hooks.
305375 */
306
-blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
376
+blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
307377 unsigned long len, u64 disk_start,
308378 unsigned long compressed_len,
309379 struct page **compressed_pages,
310380 unsigned long nr_pages,
311
- unsigned int write_flags)
381
+ unsigned int write_flags,
382
+ struct cgroup_subsys_state *blkcg_css)
312383 {
313
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
384
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
314385 struct bio *bio = NULL;
315386 struct compressed_bio *cb;
316387 unsigned long bytes_left;
317388 int pg_index = 0;
318389 struct page *page;
319390 u64 first_byte = disk_start;
320
- struct block_device *bdev;
321391 blk_status_t ret;
322
- int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
392
+ int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
323393
324
- WARN_ON(start & ((u64)PAGE_SIZE - 1));
394
+ WARN_ON(!PAGE_ALIGNED(start));
325395 cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
326396 if (!cb)
327397 return BLK_STS_RESOURCE;
328398 refcount_set(&cb->pending_bios, 0);
329399 cb->errors = 0;
330
- cb->inode = inode;
400
+ cb->inode = &inode->vfs_inode;
331401 cb->start = start;
332402 cb->len = len;
333403 cb->mirror_num = 0;
....@@ -336,12 +406,15 @@
336406 cb->orig_bio = NULL;
337407 cb->nr_pages = nr_pages;
338408
339
- bdev = fs_info->fs_devices->latest_bdev;
340
-
341
- bio = btrfs_bio_alloc(bdev, first_byte);
409
+ bio = btrfs_bio_alloc(first_byte);
342410 bio->bi_opf = REQ_OP_WRITE | write_flags;
343411 bio->bi_private = cb;
344412 bio->bi_end_io = end_compressed_bio_write;
413
+
414
+ if (blkcg_css) {
415
+ bio->bi_opf |= REQ_CGROUP_PUNT;
416
+ kthread_associate_blkcg(blkcg_css);
417
+ }
345418 refcount_set(&cb->pending_bios, 1);
346419
347420 /* create and submit bios for the compressed pages */
....@@ -350,9 +423,10 @@
350423 int submit = 0;
351424
352425 page = compressed_pages[pg_index];
353
- page->mapping = inode->i_mapping;
426
+ page->mapping = inode->vfs_inode.i_mapping;
354427 if (bio->bi_iter.bi_size)
355
- submit = btrfs_merge_bio_hook(page, 0, PAGE_SIZE, bio, 0);
428
+ submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
429
+ 0);
356430
357431 page->mapping = NULL;
358432 if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
....@@ -373,16 +447,18 @@
373447 BUG_ON(ret); /* -ENOMEM */
374448 }
375449
376
- ret = btrfs_map_bio(fs_info, bio, 0, 1);
450
+ ret = btrfs_map_bio(fs_info, bio, 0);
377451 if (ret) {
378452 bio->bi_status = ret;
379453 bio_endio(bio);
380454 }
381455
382
- bio = btrfs_bio_alloc(bdev, first_byte);
456
+ bio = btrfs_bio_alloc(first_byte);
383457 bio->bi_opf = REQ_OP_WRITE | write_flags;
384458 bio->bi_private = cb;
385459 bio->bi_end_io = end_compressed_bio_write;
460
+ if (blkcg_css)
461
+ bio->bi_opf |= REQ_CGROUP_PUNT;
386462 bio_add_page(bio, page, PAGE_SIZE, 0);
387463 }
388464 if (bytes_left < PAGE_SIZE) {
....@@ -403,11 +479,14 @@
403479 BUG_ON(ret); /* -ENOMEM */
404480 }
405481
406
- ret = btrfs_map_bio(fs_info, bio, 0, 1);
482
+ ret = btrfs_map_bio(fs_info, bio, 0);
407483 if (ret) {
408484 bio->bi_status = ret;
409485 bio_endio(bio);
410486 }
487
+
488
+ if (blkcg_css)
489
+ kthread_associate_blkcg(NULL);
411490
412491 return 0;
413492 }
....@@ -452,10 +531,8 @@
452531 if (pg_index > end_index)
453532 break;
454533
455
- rcu_read_lock();
456
- page = radix_tree_lookup(&mapping->i_pages, pg_index);
457
- rcu_read_unlock();
458
- if (page && !radix_tree_exceptional_entry(page)) {
534
+ page = xa_load(&mapping->i_pages, pg_index);
535
+ if (page && !xa_is_value(page)) {
459536 misses++;
460537 if (misses > 4)
461538 break;
....@@ -498,7 +575,7 @@
498575
499576 if (page->index == end_index) {
500577 char *userpage;
501
- size_t zero_offset = isize & (PAGE_SIZE - 1);
578
+ size_t zero_offset = offset_in_page(isize);
502579
503580 if (zero_offset) {
504581 int zeros;
....@@ -543,14 +620,12 @@
543620 int mirror_num, unsigned long bio_flags)
544621 {
545622 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
546
- struct extent_io_tree *tree;
547623 struct extent_map_tree *em_tree;
548624 struct compressed_bio *cb;
549625 unsigned long compressed_len;
550626 unsigned long nr_pages;
551627 unsigned long pg_index;
552628 struct page *page;
553
- struct block_device *bdev;
554629 struct bio *comp_bio;
555630 u64 cur_disk_byte = (u64)bio->bi_iter.bi_sector << 9;
556631 u64 em_len;
....@@ -558,9 +633,9 @@
558633 struct extent_map *em;
559634 blk_status_t ret = BLK_STS_RESOURCE;
560635 int faili = 0;
561
- u32 *sums;
636
+ const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
637
+ u8 *sums;
562638
563
- tree = &BTRFS_I(inode)->io_tree;
564639 em_tree = &BTRFS_I(inode)->extent_tree;
565640
566641 /* we need the actual starting offset of this extent in the file */
....@@ -581,7 +656,7 @@
581656 cb->errors = 0;
582657 cb->inode = inode;
583658 cb->mirror_num = mirror_num;
584
- sums = &cb->sums;
659
+ sums = cb->sums;
585660
586661 cb->start = em->orig_start;
587662 em_len = em->len;
....@@ -601,8 +676,6 @@
601676 if (!cb->compressed_pages)
602677 goto fail1;
603678
604
- bdev = fs_info->fs_devices->latest_bdev;
605
-
606679 for (pg_index = 0; pg_index < nr_pages; pg_index++) {
607680 cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS |
608681 __GFP_HIGHMEM);
....@@ -620,7 +693,7 @@
620693 /* include any pages we added in add_ra-bio_pages */
621694 cb->len = bio->bi_iter.bi_size;
622695
623
- comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
696
+ comp_bio = btrfs_bio_alloc(cur_disk_byte);
624697 comp_bio->bi_opf = REQ_OP_READ;
625698 comp_bio->bi_private = cb;
626699 comp_bio->bi_end_io = end_compressed_bio_read;
....@@ -634,12 +707,14 @@
634707 page->index = em_start >> PAGE_SHIFT;
635708
636709 if (comp_bio->bi_iter.bi_size)
637
- submit = btrfs_merge_bio_hook(page, 0, PAGE_SIZE,
638
- comp_bio, 0);
710
+ submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE,
711
+ comp_bio, 0);
639712
640713 page->mapping = NULL;
641714 if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
642715 PAGE_SIZE) {
716
+ unsigned int nr_sectors;
717
+
643718 ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
644719 BTRFS_WQ_ENDIO_DATA);
645720 BUG_ON(ret); /* -ENOMEM */
....@@ -654,19 +729,21 @@
654729
655730 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
656731 ret = btrfs_lookup_bio_sums(inode, comp_bio,
657
- sums);
732
+ (u64)-1, sums);
658733 BUG_ON(ret); /* -ENOMEM */
659734 }
660
- sums += DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
661
- fs_info->sectorsize);
662735
663
- ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
736
+ nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
737
+ fs_info->sectorsize);
738
+ sums += csum_size * nr_sectors;
739
+
740
+ ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
664741 if (ret) {
665742 comp_bio->bi_status = ret;
666743 bio_endio(comp_bio);
667744 }
668745
669
- comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
746
+ comp_bio = btrfs_bio_alloc(cur_disk_byte);
670747 comp_bio->bi_opf = REQ_OP_READ;
671748 comp_bio->bi_private = cb;
672749 comp_bio->bi_end_io = end_compressed_bio_read;
....@@ -680,11 +757,11 @@
680757 BUG_ON(ret); /* -ENOMEM */
681758
682759 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
683
- ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
760
+ ret = btrfs_lookup_bio_sums(inode, comp_bio, (u64)-1, sums);
684761 BUG_ON(ret); /* -ENOMEM */
685762 }
686763
687
- ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
764
+ ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
688765 if (ret) {
689766 comp_bio->bi_status = ret;
690767 bio_endio(comp_bio);
....@@ -753,6 +830,8 @@
753830 struct list_head list;
754831 };
755832
833
+static struct workspace_manager heuristic_wsm;
834
+
756835 static void free_heuristic_ws(struct list_head *ws)
757836 {
758837 struct heuristic_ws *workspace;
....@@ -765,7 +844,7 @@
765844 kfree(workspace);
766845 }
767846
768
-static struct list_head *alloc_heuristic_ws(void)
847
+static struct list_head *alloc_heuristic_ws(unsigned int level)
769848 {
770849 struct heuristic_ws *ws;
771850
....@@ -792,65 +871,87 @@
792871 return ERR_PTR(-ENOMEM);
793872 }
794873
795
-struct workspaces_list {
796
- struct list_head idle_ws;
797
- spinlock_t ws_lock;
798
- /* Number of free workspaces */
799
- int free_ws;
800
- /* Total number of allocated workspaces */
801
- atomic_t total_ws;
802
- /* Waiters for a free workspace */
803
- wait_queue_head_t ws_wait;
874
+const struct btrfs_compress_op btrfs_heuristic_compress = {
875
+ .workspace_manager = &heuristic_wsm,
804876 };
805877
806
-static struct workspaces_list btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
807
-
808
-static struct workspaces_list btrfs_heuristic_ws;
809
-
810878 static const struct btrfs_compress_op * const btrfs_compress_op[] = {
879
+ /* The heuristic is represented as compression type 0 */
880
+ &btrfs_heuristic_compress,
811881 &btrfs_zlib_compress,
812882 &btrfs_lzo_compress,
813883 &btrfs_zstd_compress,
814884 };
815885
816
-void __init btrfs_init_compress(void)
886
+static struct list_head *alloc_workspace(int type, unsigned int level)
817887 {
888
+ switch (type) {
889
+ case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws(level);
890
+ case BTRFS_COMPRESS_ZLIB: return zlib_alloc_workspace(level);
891
+ case BTRFS_COMPRESS_LZO: return lzo_alloc_workspace(level);
892
+ case BTRFS_COMPRESS_ZSTD: return zstd_alloc_workspace(level);
893
+ default:
894
+ /*
895
+ * This can't happen, the type is validated several times
896
+ * before we get here.
897
+ */
898
+ BUG();
899
+ }
900
+}
901
+
902
+static void free_workspace(int type, struct list_head *ws)
903
+{
904
+ switch (type) {
905
+ case BTRFS_COMPRESS_NONE: return free_heuristic_ws(ws);
906
+ case BTRFS_COMPRESS_ZLIB: return zlib_free_workspace(ws);
907
+ case BTRFS_COMPRESS_LZO: return lzo_free_workspace(ws);
908
+ case BTRFS_COMPRESS_ZSTD: return zstd_free_workspace(ws);
909
+ default:
910
+ /*
911
+ * This can't happen, the type is validated several times
912
+ * before we get here.
913
+ */
914
+ BUG();
915
+ }
916
+}
917
+
918
+static void btrfs_init_workspace_manager(int type)
919
+{
920
+ struct workspace_manager *wsm;
818921 struct list_head *workspace;
819
- int i;
820922
821
- INIT_LIST_HEAD(&btrfs_heuristic_ws.idle_ws);
822
- spin_lock_init(&btrfs_heuristic_ws.ws_lock);
823
- atomic_set(&btrfs_heuristic_ws.total_ws, 0);
824
- init_waitqueue_head(&btrfs_heuristic_ws.ws_wait);
923
+ wsm = btrfs_compress_op[type]->workspace_manager;
924
+ INIT_LIST_HEAD(&wsm->idle_ws);
925
+ spin_lock_init(&wsm->ws_lock);
926
+ atomic_set(&wsm->total_ws, 0);
927
+ init_waitqueue_head(&wsm->ws_wait);
825928
826
- workspace = alloc_heuristic_ws();
929
+ /*
930
+ * Preallocate one workspace for each compression type so we can
931
+ * guarantee forward progress in the worst case
932
+ */
933
+ workspace = alloc_workspace(type, 0);
827934 if (IS_ERR(workspace)) {
828935 pr_warn(
829
- "BTRFS: cannot preallocate heuristic workspace, will try later\n");
936
+ "BTRFS: cannot preallocate compression workspace, will try later\n");
830937 } else {
831
- atomic_set(&btrfs_heuristic_ws.total_ws, 1);
832
- btrfs_heuristic_ws.free_ws = 1;
833
- list_add(workspace, &btrfs_heuristic_ws.idle_ws);
938
+ atomic_set(&wsm->total_ws, 1);
939
+ wsm->free_ws = 1;
940
+ list_add(workspace, &wsm->idle_ws);
834941 }
942
+}
835943
836
- for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
837
- INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
838
- spin_lock_init(&btrfs_comp_ws[i].ws_lock);
839
- atomic_set(&btrfs_comp_ws[i].total_ws, 0);
840
- init_waitqueue_head(&btrfs_comp_ws[i].ws_wait);
944
+static void btrfs_cleanup_workspace_manager(int type)
945
+{
946
+ struct workspace_manager *wsman;
947
+ struct list_head *ws;
841948
842
- /*
843
- * Preallocate one workspace for each compression type so
844
- * we can guarantee forward progress in the worst case
845
- */
846
- workspace = btrfs_compress_op[i]->alloc_workspace();
847
- if (IS_ERR(workspace)) {
848
- pr_warn("BTRFS: cannot preallocate compression workspace, will try later\n");
849
- } else {
850
- atomic_set(&btrfs_comp_ws[i].total_ws, 1);
851
- btrfs_comp_ws[i].free_ws = 1;
852
- list_add(workspace, &btrfs_comp_ws[i].idle_ws);
853
- }
949
+ wsman = btrfs_compress_op[type]->workspace_manager;
950
+ while (!list_empty(&wsman->idle_ws)) {
951
+ ws = wsman->idle_ws.next;
952
+ list_del(ws);
953
+ free_workspace(type, ws);
954
+ atomic_dec(&wsman->total_ws);
854955 }
855956 }
856957
....@@ -860,11 +961,11 @@
860961 * Preallocation makes a forward progress guarantees and we do not return
861962 * errors.
862963 */
863
-static struct list_head *__find_workspace(int type, bool heuristic)
964
+struct list_head *btrfs_get_workspace(int type, unsigned int level)
864965 {
966
+ struct workspace_manager *wsm;
865967 struct list_head *workspace;
866968 int cpus = num_online_cpus();
867
- int idx = type - 1;
868969 unsigned nofs_flag;
869970 struct list_head *idle_ws;
870971 spinlock_t *ws_lock;
....@@ -872,19 +973,12 @@
872973 wait_queue_head_t *ws_wait;
873974 int *free_ws;
874975
875
- if (heuristic) {
876
- idle_ws = &btrfs_heuristic_ws.idle_ws;
877
- ws_lock = &btrfs_heuristic_ws.ws_lock;
878
- total_ws = &btrfs_heuristic_ws.total_ws;
879
- ws_wait = &btrfs_heuristic_ws.ws_wait;
880
- free_ws = &btrfs_heuristic_ws.free_ws;
881
- } else {
882
- idle_ws = &btrfs_comp_ws[idx].idle_ws;
883
- ws_lock = &btrfs_comp_ws[idx].ws_lock;
884
- total_ws = &btrfs_comp_ws[idx].total_ws;
885
- ws_wait = &btrfs_comp_ws[idx].ws_wait;
886
- free_ws = &btrfs_comp_ws[idx].free_ws;
887
- }
976
+ wsm = btrfs_compress_op[type]->workspace_manager;
977
+ idle_ws = &wsm->idle_ws;
978
+ ws_lock = &wsm->ws_lock;
979
+ total_ws = &wsm->total_ws;
980
+ ws_wait = &wsm->ws_wait;
981
+ free_ws = &wsm->free_ws;
888982
889983 again:
890984 spin_lock(ws_lock);
....@@ -915,10 +1009,7 @@
9151009 * context of btrfs_compress_bio/btrfs_compress_pages
9161010 */
9171011 nofs_flag = memalloc_nofs_save();
918
- if (heuristic)
919
- workspace = alloc_heuristic_ws();
920
- else
921
- workspace = btrfs_compress_op[idx]->alloc_workspace();
1012
+ workspace = alloc_workspace(type, level);
9221013 memalloc_nofs_restore(nofs_flag);
9231014
9241015 if (IS_ERR(workspace)) {
....@@ -949,85 +1040,87 @@
9491040 return workspace;
9501041 }
9511042
952
-static struct list_head *find_workspace(int type)
1043
+static struct list_head *get_workspace(int type, int level)
9531044 {
954
- return __find_workspace(type, false);
1045
+ switch (type) {
1046
+ case BTRFS_COMPRESS_NONE: return btrfs_get_workspace(type, level);
1047
+ case BTRFS_COMPRESS_ZLIB: return zlib_get_workspace(level);
1048
+ case BTRFS_COMPRESS_LZO: return btrfs_get_workspace(type, level);
1049
+ case BTRFS_COMPRESS_ZSTD: return zstd_get_workspace(level);
1050
+ default:
1051
+ /*
1052
+ * This can't happen, the type is validated several times
1053
+ * before we get here.
1054
+ */
1055
+ BUG();
1056
+ }
9551057 }
9561058
9571059 /*
9581060 * put a workspace struct back on the list or free it if we have enough
9591061 * idle ones sitting around
9601062 */
961
-static void __free_workspace(int type, struct list_head *workspace,
962
- bool heuristic)
1063
+void btrfs_put_workspace(int type, struct list_head *ws)
9631064 {
964
- int idx = type - 1;
1065
+ struct workspace_manager *wsm;
9651066 struct list_head *idle_ws;
9661067 spinlock_t *ws_lock;
9671068 atomic_t *total_ws;
9681069 wait_queue_head_t *ws_wait;
9691070 int *free_ws;
9701071
971
- if (heuristic) {
972
- idle_ws = &btrfs_heuristic_ws.idle_ws;
973
- ws_lock = &btrfs_heuristic_ws.ws_lock;
974
- total_ws = &btrfs_heuristic_ws.total_ws;
975
- ws_wait = &btrfs_heuristic_ws.ws_wait;
976
- free_ws = &btrfs_heuristic_ws.free_ws;
977
- } else {
978
- idle_ws = &btrfs_comp_ws[idx].idle_ws;
979
- ws_lock = &btrfs_comp_ws[idx].ws_lock;
980
- total_ws = &btrfs_comp_ws[idx].total_ws;
981
- ws_wait = &btrfs_comp_ws[idx].ws_wait;
982
- free_ws = &btrfs_comp_ws[idx].free_ws;
983
- }
1072
+ wsm = btrfs_compress_op[type]->workspace_manager;
1073
+ idle_ws = &wsm->idle_ws;
1074
+ ws_lock = &wsm->ws_lock;
1075
+ total_ws = &wsm->total_ws;
1076
+ ws_wait = &wsm->ws_wait;
1077
+ free_ws = &wsm->free_ws;
9841078
9851079 spin_lock(ws_lock);
9861080 if (*free_ws <= num_online_cpus()) {
987
- list_add(workspace, idle_ws);
1081
+ list_add(ws, idle_ws);
9881082 (*free_ws)++;
9891083 spin_unlock(ws_lock);
9901084 goto wake;
9911085 }
9921086 spin_unlock(ws_lock);
9931087
994
- if (heuristic)
995
- free_heuristic_ws(workspace);
996
- else
997
- btrfs_compress_op[idx]->free_workspace(workspace);
1088
+ free_workspace(type, ws);
9981089 atomic_dec(total_ws);
9991090 wake:
10001091 cond_wake_up(ws_wait);
10011092 }
10021093
1003
-static void free_workspace(int type, struct list_head *ws)
1094
+static void put_workspace(int type, struct list_head *ws)
10041095 {
1005
- return __free_workspace(type, ws, false);
1096
+ switch (type) {
1097
+ case BTRFS_COMPRESS_NONE: return btrfs_put_workspace(type, ws);
1098
+ case BTRFS_COMPRESS_ZLIB: return btrfs_put_workspace(type, ws);
1099
+ case BTRFS_COMPRESS_LZO: return btrfs_put_workspace(type, ws);
1100
+ case BTRFS_COMPRESS_ZSTD: return zstd_put_workspace(ws);
1101
+ default:
1102
+ /*
1103
+ * This can't happen, the type is validated several times
1104
+ * before we get here.
1105
+ */
1106
+ BUG();
1107
+ }
10061108 }
10071109
10081110 /*
1009
- * cleanup function for module exit
1111
+ * Adjust @level according to the limits of the compression algorithm or
1112
+ * fallback to default
10101113 */
1011
-static void free_workspaces(void)
1114
+static unsigned int btrfs_compress_set_level(int type, unsigned level)
10121115 {
1013
- struct list_head *workspace;
1014
- int i;
1116
+ const struct btrfs_compress_op *ops = btrfs_compress_op[type];
10151117
1016
- while (!list_empty(&btrfs_heuristic_ws.idle_ws)) {
1017
- workspace = btrfs_heuristic_ws.idle_ws.next;
1018
- list_del(workspace);
1019
- free_heuristic_ws(workspace);
1020
- atomic_dec(&btrfs_heuristic_ws.total_ws);
1021
- }
1118
+ if (level == 0)
1119
+ level = ops->default_level;
1120
+ else
1121
+ level = min(level, ops->max_level);
10221122
1023
- for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
1024
- while (!list_empty(&btrfs_comp_ws[i].idle_ws)) {
1025
- workspace = btrfs_comp_ws[i].idle_ws.next;
1026
- list_del(workspace);
1027
- btrfs_compress_op[i]->free_workspace(workspace);
1028
- atomic_dec(&btrfs_comp_ws[i].total_ws);
1029
- }
1030
- }
1123
+ return level;
10311124 }
10321125
10331126 /*
....@@ -1059,18 +1152,16 @@
10591152 unsigned long *total_in,
10601153 unsigned long *total_out)
10611154 {
1155
+ int type = btrfs_compress_type(type_level);
1156
+ int level = btrfs_compress_level(type_level);
10621157 struct list_head *workspace;
10631158 int ret;
1064
- int type = type_level & 0xF;
10651159
1066
- workspace = find_workspace(type);
1067
-
1068
- btrfs_compress_op[type - 1]->set_level(workspace, type_level);
1069
- ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
1070
- start, pages,
1071
- out_pages,
1072
- total_in, total_out);
1073
- free_workspace(type, workspace);
1160
+ level = btrfs_compress_set_level(type, level);
1161
+ workspace = get_workspace(type, level);
1162
+ ret = compression_compress_pages(type, workspace, mapping, start, pages,
1163
+ out_pages, total_in, total_out);
1164
+ put_workspace(type, workspace);
10741165 return ret;
10751166 }
10761167
....@@ -1094,9 +1185,9 @@
10941185 int ret;
10951186 int type = cb->compress_type;
10961187
1097
- workspace = find_workspace(type);
1098
- ret = btrfs_compress_op[type - 1]->decompress_bio(workspace, cb);
1099
- free_workspace(type, workspace);
1188
+ workspace = get_workspace(type, 0);
1189
+ ret = compression_decompress_bio(type, workspace, cb);
1190
+ put_workspace(type, workspace);
11001191
11011192 return ret;
11021193 }
....@@ -1112,19 +1203,28 @@
11121203 struct list_head *workspace;
11131204 int ret;
11141205
1115
- workspace = find_workspace(type);
1206
+ workspace = get_workspace(type, 0);
1207
+ ret = compression_decompress(type, workspace, data_in, dest_page,
1208
+ start_byte, srclen, destlen);
1209
+ put_workspace(type, workspace);
11161210
1117
- ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
1118
- dest_page, start_byte,
1119
- srclen, destlen);
1120
-
1121
- free_workspace(type, workspace);
11221211 return ret;
1212
+}
1213
+
1214
+void __init btrfs_init_compress(void)
1215
+{
1216
+ btrfs_init_workspace_manager(BTRFS_COMPRESS_NONE);
1217
+ btrfs_init_workspace_manager(BTRFS_COMPRESS_ZLIB);
1218
+ btrfs_init_workspace_manager(BTRFS_COMPRESS_LZO);
1219
+ zstd_init_workspace_manager();
11231220 }
11241221
11251222 void __cold btrfs_exit_compress(void)
11261223 {
1127
- free_workspaces();
1224
+ btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_NONE);
1225
+ btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_ZLIB);
1226
+ btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_LZO);
1227
+ zstd_cleanup_workspace_manager();
11281228 }
11291229
11301230 /*
....@@ -1172,7 +1272,7 @@
11721272 /* copy bytes from the working buffer into the pages */
11731273 while (working_bytes > 0) {
11741274 bytes = min_t(unsigned long, bvec.bv_len,
1175
- PAGE_SIZE - buf_offset);
1275
+ PAGE_SIZE - (buf_offset % PAGE_SIZE));
11761276 bytes = min(bytes, working_bytes);
11771277
11781278 kaddr = kmap_atomic(bvec.bv_page);
....@@ -1226,7 +1326,7 @@
12261326 /*
12271327 * Shannon Entropy calculation
12281328 *
1229
- * Pure byte distribution analysis fails to determine compressiability of data.
1329
+ * Pure byte distribution analysis fails to determine compressibility of data.
12301330 * Try calculating entropy to estimate the average minimum number of bits
12311331 * needed to encode the sampled data.
12321332 *
....@@ -1290,7 +1390,7 @@
12901390
12911391 /*
12921392 * Use 4 bits as radix base
1293
- * Use 16 u32 counters for calculating new possition in buf array
1393
+ * Use 16 u32 counters for calculating new position in buf array
12941394 *
12951395 * @array - array that will be sorted
12961396 * @array_buf - buffer array to store sorting results
....@@ -1535,7 +1635,7 @@
15351635 */
15361636 int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
15371637 {
1538
- struct list_head *ws_list = __find_workspace(0, true);
1638
+ struct list_head *ws_list = get_workspace(0, 0);
15391639 struct heuristic_ws *ws;
15401640 u32 i;
15411641 u8 byte;
....@@ -1604,18 +1704,29 @@
16041704 }
16051705
16061706 out:
1607
- __free_workspace(0, ws_list, true);
1707
+ put_workspace(0, ws_list);
16081708 return ret;
16091709 }
16101710
1611
-unsigned int btrfs_compress_str2level(const char *str)
1711
+/*
1712
+ * Convert the compression suffix (eg. after "zlib" starting with ":") to
1713
+ * level, unrecognized string will set the default level
1714
+ */
1715
+unsigned int btrfs_compress_str2level(unsigned int type, const char *str)
16121716 {
1613
- if (strncmp(str, "zlib", 4) != 0)
1717
+ unsigned int level = 0;
1718
+ int ret;
1719
+
1720
+ if (!type)
16141721 return 0;
16151722
1616
- /* Accepted form: zlib:1 up to zlib:9 and nothing left after the number */
1617
- if (str[4] == ':' && '1' <= str[5] && str[5] <= '9' && str[6] == 0)
1618
- return str[5] - '0';
1723
+ if (str[0] == ':') {
1724
+ ret = kstrtouint(str + 1, 10, &level);
1725
+ if (ret)
1726
+ level = 0;
1727
+ }
16191728
1620
- return BTRFS_ZLIB_DEFAULT_LEVEL;
1729
+ level = btrfs_compress_set_level(type, level);
1730
+
1731
+ return level;
16211732 }