hc
2024-05-10 9999e48639b3cecb08ffb37358bcba3b48161b29
kernel/fs/btrfs/zstd.c
....@@ -6,25 +6,32 @@
66 */
77
88 #include <linux/bio.h>
9
+#include <linux/bitmap.h>
910 #include <linux/err.h>
1011 #include <linux/init.h>
1112 #include <linux/kernel.h>
1213 #include <linux/mm.h>
14
+#include <linux/sched/mm.h>
1315 #include <linux/pagemap.h>
1416 #include <linux/refcount.h>
1517 #include <linux/sched.h>
1618 #include <linux/slab.h>
1719 #include <linux/zstd.h>
20
+#include "misc.h"
1821 #include "compression.h"
22
+#include "ctree.h"
1923
2024 #define ZSTD_BTRFS_MAX_WINDOWLOG 17
2125 #define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)
2226 #define ZSTD_BTRFS_DEFAULT_LEVEL 3
27
+#define ZSTD_BTRFS_MAX_LEVEL 15
28
+/* 307s to avoid pathologically clashing with transaction commit */
29
+#define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ)
2330
24
-static ZSTD_parameters zstd_get_btrfs_parameters(size_t src_len)
31
+static ZSTD_parameters zstd_get_btrfs_parameters(unsigned int level,
32
+ size_t src_len)
2533 {
26
- ZSTD_parameters params = ZSTD_getParams(ZSTD_BTRFS_DEFAULT_LEVEL,
27
- src_len, 0);
34
+ ZSTD_parameters params = ZSTD_getParams(level, src_len, 0);
2835
2936 if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
3037 params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
....@@ -36,12 +43,295 @@
3643 void *mem;
3744 size_t size;
3845 char *buf;
46
+ unsigned int level;
47
+ unsigned int req_level;
48
+ unsigned long last_used; /* jiffies */
3949 struct list_head list;
50
+ struct list_head lru_list;
4051 ZSTD_inBuffer in_buf;
4152 ZSTD_outBuffer out_buf;
4253 };
4354
44
-static void zstd_free_workspace(struct list_head *ws)
55
+/*
56
+ * Zstd Workspace Management
57
+ *
58
+ * Zstd workspaces have different memory requirements depending on the level.
59
+ * The zstd workspaces are managed by having individual lists for each level
60
+ * and a global lru. Forward progress is maintained by protecting a max level
61
+ * workspace.
62
+ *
63
+ * Getting a workspace is done by using the bitmap to identify the levels that
64
+ * have available workspaces and scans up. This lets us recycle higher level
65
+ * workspaces because of the monotonic memory guarantee. A workspace's
66
+ * last_used is only updated if it is being used by the corresponding memory
67
+ * level. Putting a workspace involves adding it back to the appropriate places
68
+ * and adding it back to the lru if necessary.
69
+ *
70
+ * A timer is used to reclaim workspaces if they have not been used for
71
+ * ZSTD_BTRFS_RECLAIM_JIFFIES. This helps keep only active workspaces around.
72
+ * The upper bound is provided by the workqueue limit which is 2 (percpu limit).
73
+ */
74
+
75
+struct zstd_workspace_manager {
76
+ const struct btrfs_compress_op *ops;
77
+ spinlock_t lock;
78
+ struct list_head lru_list;
79
+ struct list_head idle_ws[ZSTD_BTRFS_MAX_LEVEL];
80
+ unsigned long active_map;
81
+ wait_queue_head_t wait;
82
+ struct timer_list timer;
83
+};
84
+
85
+static struct zstd_workspace_manager wsm;
86
+
87
+static size_t zstd_ws_mem_sizes[ZSTD_BTRFS_MAX_LEVEL];
88
+
89
+static inline struct workspace *list_to_workspace(struct list_head *list)
90
+{
91
+ return container_of(list, struct workspace, list);
92
+}
93
+
94
+void zstd_free_workspace(struct list_head *ws);
95
+struct list_head *zstd_alloc_workspace(unsigned int level);
96
+/*
97
+ * zstd_reclaim_timer_fn - reclaim timer
98
+ * @t: timer
99
+ *
100
+ * This scans the lru_list and attempts to reclaim any workspace that hasn't
101
+ * been used for ZSTD_BTRFS_RECLAIM_JIFFIES.
102
+ */
103
+static void zstd_reclaim_timer_fn(struct timer_list *timer)
104
+{
105
+ unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
106
+ struct list_head *pos, *next;
107
+
108
+ spin_lock_bh(&wsm.lock);
109
+
110
+ if (list_empty(&wsm.lru_list)) {
111
+ spin_unlock_bh(&wsm.lock);
112
+ return;
113
+ }
114
+
115
+ list_for_each_prev_safe(pos, next, &wsm.lru_list) {
116
+ struct workspace *victim = container_of(pos, struct workspace,
117
+ lru_list);
118
+ unsigned int level;
119
+
120
+ if (time_after(victim->last_used, reclaim_threshold))
121
+ break;
122
+
123
+ /* workspace is in use */
124
+ if (victim->req_level)
125
+ continue;
126
+
127
+ level = victim->level;
128
+ list_del(&victim->lru_list);
129
+ list_del(&victim->list);
130
+ zstd_free_workspace(&victim->list);
131
+
132
+ if (list_empty(&wsm.idle_ws[level - 1]))
133
+ clear_bit(level - 1, &wsm.active_map);
134
+
135
+ }
136
+
137
+ if (!list_empty(&wsm.lru_list))
138
+ mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
139
+
140
+ spin_unlock_bh(&wsm.lock);
141
+}
142
+
143
+/*
144
+ * zstd_calc_ws_mem_sizes - calculate monotonic memory bounds
145
+ *
146
+ * It is possible based on the level configurations that a higher level
147
+ * workspace uses less memory than a lower level workspace. In order to reuse
148
+ * workspaces, this must be made a monotonic relationship. This precomputes
149
+ * the required memory for each level and enforces the monotonicity between
150
+ * level and memory required.
151
+ */
152
+static void zstd_calc_ws_mem_sizes(void)
153
+{
154
+ size_t max_size = 0;
155
+ unsigned int level;
156
+
157
+ for (level = 1; level <= ZSTD_BTRFS_MAX_LEVEL; level++) {
158
+ ZSTD_parameters params =
159
+ zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT);
160
+ size_t level_size =
161
+ max_t(size_t,
162
+ ZSTD_CStreamWorkspaceBound(params.cParams),
163
+ ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT));
164
+
165
+ max_size = max_t(size_t, max_size, level_size);
166
+ zstd_ws_mem_sizes[level - 1] = max_size;
167
+ }
168
+}
169
+
170
+void zstd_init_workspace_manager(void)
171
+{
172
+ struct list_head *ws;
173
+ int i;
174
+
175
+ zstd_calc_ws_mem_sizes();
176
+
177
+ wsm.ops = &btrfs_zstd_compress;
178
+ spin_lock_init(&wsm.lock);
179
+ init_waitqueue_head(&wsm.wait);
180
+ timer_setup(&wsm.timer, zstd_reclaim_timer_fn, 0);
181
+
182
+ INIT_LIST_HEAD(&wsm.lru_list);
183
+ for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++)
184
+ INIT_LIST_HEAD(&wsm.idle_ws[i]);
185
+
186
+ ws = zstd_alloc_workspace(ZSTD_BTRFS_MAX_LEVEL);
187
+ if (IS_ERR(ws)) {
188
+ pr_warn(
189
+ "BTRFS: cannot preallocate zstd compression workspace\n");
190
+ } else {
191
+ set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &wsm.active_map);
192
+ list_add(ws, &wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]);
193
+ }
194
+}
195
+
196
+void zstd_cleanup_workspace_manager(void)
197
+{
198
+ struct workspace *workspace;
199
+ int i;
200
+
201
+ spin_lock_bh(&wsm.lock);
202
+ for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
203
+ while (!list_empty(&wsm.idle_ws[i])) {
204
+ workspace = container_of(wsm.idle_ws[i].next,
205
+ struct workspace, list);
206
+ list_del(&workspace->list);
207
+ list_del(&workspace->lru_list);
208
+ zstd_free_workspace(&workspace->list);
209
+ }
210
+ }
211
+ spin_unlock_bh(&wsm.lock);
212
+
213
+ del_timer_sync(&wsm.timer);
214
+}
215
+
216
+/*
217
+ * zstd_find_workspace - find workspace
218
+ * @level: compression level
219
+ *
220
+ * This iterates over the set bits in the active_map beginning at the requested
221
+ * compression level. This lets us utilize already allocated workspaces before
222
+ * allocating a new one. If the workspace is of a larger size, it is used, but
223
+ * the place in the lru_list and last_used times are not updated. This is to
224
+ * offer the opportunity to reclaim the workspace in favor of allocating an
225
+ * appropriately sized one in the future.
226
+ */
227
+static struct list_head *zstd_find_workspace(unsigned int level)
228
+{
229
+ struct list_head *ws;
230
+ struct workspace *workspace;
231
+ int i = level - 1;
232
+
233
+ spin_lock_bh(&wsm.lock);
234
+ for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) {
235
+ if (!list_empty(&wsm.idle_ws[i])) {
236
+ ws = wsm.idle_ws[i].next;
237
+ workspace = list_to_workspace(ws);
238
+ list_del_init(ws);
239
+ /* keep its place if it's a lower level using this */
240
+ workspace->req_level = level;
241
+ if (level == workspace->level)
242
+ list_del(&workspace->lru_list);
243
+ if (list_empty(&wsm.idle_ws[i]))
244
+ clear_bit(i, &wsm.active_map);
245
+ spin_unlock_bh(&wsm.lock);
246
+ return ws;
247
+ }
248
+ }
249
+ spin_unlock_bh(&wsm.lock);
250
+
251
+ return NULL;
252
+}
253
+
254
+/*
255
+ * zstd_get_workspace - zstd's get_workspace
256
+ * @level: compression level
257
+ *
258
+ * If @level is 0, then any compression level can be used. Therefore, we begin
259
+ * scanning from 1. We first scan through possible workspaces and then after
260
+ * attempt to allocate a new workspace. If we fail to allocate one due to
261
+ * memory pressure, go to sleep waiting for the max level workspace to free up.
262
+ */
263
+struct list_head *zstd_get_workspace(unsigned int level)
264
+{
265
+ struct list_head *ws;
266
+ unsigned int nofs_flag;
267
+
268
+ /* level == 0 means we can use any workspace */
269
+ if (!level)
270
+ level = 1;
271
+
272
+again:
273
+ ws = zstd_find_workspace(level);
274
+ if (ws)
275
+ return ws;
276
+
277
+ nofs_flag = memalloc_nofs_save();
278
+ ws = zstd_alloc_workspace(level);
279
+ memalloc_nofs_restore(nofs_flag);
280
+
281
+ if (IS_ERR(ws)) {
282
+ DEFINE_WAIT(wait);
283
+
284
+ prepare_to_wait(&wsm.wait, &wait, TASK_UNINTERRUPTIBLE);
285
+ schedule();
286
+ finish_wait(&wsm.wait, &wait);
287
+
288
+ goto again;
289
+ }
290
+
291
+ return ws;
292
+}
293
+
294
+/*
295
+ * zstd_put_workspace - zstd put_workspace
296
+ * @ws: list_head for the workspace
297
+ *
298
+ * When putting back a workspace, we only need to update the LRU if we are of
299
+ * the requested compression level. Here is where we continue to protect the
300
+ * max level workspace or update last_used accordingly. If the reclaim timer
301
+ * isn't set, it is also set here. Only the max level workspace tries and wakes
302
+ * up waiting workspaces.
303
+ */
304
+void zstd_put_workspace(struct list_head *ws)
305
+{
306
+ struct workspace *workspace = list_to_workspace(ws);
307
+
308
+ spin_lock_bh(&wsm.lock);
309
+
310
+ /* A node is only taken off the lru if we are the corresponding level */
311
+ if (workspace->req_level == workspace->level) {
312
+ /* Hide a max level workspace from reclaim */
313
+ if (list_empty(&wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) {
314
+ INIT_LIST_HEAD(&workspace->lru_list);
315
+ } else {
316
+ workspace->last_used = jiffies;
317
+ list_add(&workspace->lru_list, &wsm.lru_list);
318
+ if (!timer_pending(&wsm.timer))
319
+ mod_timer(&wsm.timer,
320
+ jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
321
+ }
322
+ }
323
+
324
+ set_bit(workspace->level - 1, &wsm.active_map);
325
+ list_add(&workspace->list, &wsm.idle_ws[workspace->level - 1]);
326
+ workspace->req_level = 0;
327
+
328
+ spin_unlock_bh(&wsm.lock);
329
+
330
+ if (workspace->level == ZSTD_BTRFS_MAX_LEVEL)
331
+ cond_wake_up(&wsm.wait);
332
+}
333
+
334
+void zstd_free_workspace(struct list_head *ws)
45335 {
46336 struct workspace *workspace = list_entry(ws, struct workspace, list);
47337
....@@ -50,25 +340,25 @@
50340 kfree(workspace);
51341 }
52342
53
-static struct list_head *zstd_alloc_workspace(void)
343
+struct list_head *zstd_alloc_workspace(unsigned int level)
54344 {
55
- ZSTD_parameters params =
56
- zstd_get_btrfs_parameters(ZSTD_BTRFS_MAX_INPUT);
57345 struct workspace *workspace;
58346
59347 workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
60348 if (!workspace)
61349 return ERR_PTR(-ENOMEM);
62350
63
- workspace->size = max_t(size_t,
64
- ZSTD_CStreamWorkspaceBound(params.cParams),
65
- ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT));
351
+ workspace->size = zstd_ws_mem_sizes[level - 1];
352
+ workspace->level = level;
353
+ workspace->req_level = level;
354
+ workspace->last_used = jiffies;
66355 workspace->mem = kvmalloc(workspace->size, GFP_KERNEL);
67356 workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
68357 if (!workspace->mem || !workspace->buf)
69358 goto fail;
70359
71360 INIT_LIST_HEAD(&workspace->list);
361
+ INIT_LIST_HEAD(&workspace->lru_list);
72362
73363 return &workspace->list;
74364 fail:
....@@ -76,13 +366,9 @@
76366 return ERR_PTR(-ENOMEM);
77367 }
78368
79
-static int zstd_compress_pages(struct list_head *ws,
80
- struct address_space *mapping,
81
- u64 start,
82
- struct page **pages,
83
- unsigned long *out_pages,
84
- unsigned long *total_in,
85
- unsigned long *total_out)
369
+int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
370
+ u64 start, struct page **pages, unsigned long *out_pages,
371
+ unsigned long *total_in, unsigned long *total_out)
86372 {
87373 struct workspace *workspace = list_entry(ws, struct workspace, list);
88374 ZSTD_CStream *stream;
....@@ -95,7 +381,8 @@
95381 unsigned long len = *total_out;
96382 const unsigned long nr_dest_pages = *out_pages;
97383 unsigned long max_out = nr_dest_pages * PAGE_SIZE;
98
- ZSTD_parameters params = zstd_get_btrfs_parameters(len);
384
+ ZSTD_parameters params = zstd_get_btrfs_parameters(workspace->req_level,
385
+ len);
99386
100387 *out_pages = 0;
101388 *total_out = 0;
....@@ -256,7 +543,7 @@
256543 return ret;
257544 }
258545
259
-static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
546
+int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
260547 {
261548 struct workspace *workspace = list_entry(ws, struct workspace, list);
262549 struct page **pages_in = cb->compressed_pages;
....@@ -334,10 +621,9 @@
334621 return ret;
335622 }
336623
337
-static int zstd_decompress(struct list_head *ws, unsigned char *data_in,
338
- struct page *dest_page,
339
- unsigned long start_byte,
340
- size_t srclen, size_t destlen)
624
+int zstd_decompress(struct list_head *ws, unsigned char *data_in,
625
+ struct page *dest_page, unsigned long start_byte, size_t srclen,
626
+ size_t destlen)
341627 {
342628 struct workspace *workspace = list_entry(ws, struct workspace, list);
343629 ZSTD_DStream *stream;
....@@ -419,15 +705,9 @@
419705 return ret;
420706 }
421707
422
-static void zstd_set_level(struct list_head *ws, unsigned int type)
423
-{
424
-}
425
-
426708 const struct btrfs_compress_op btrfs_zstd_compress = {
427
- .alloc_workspace = zstd_alloc_workspace,
428
- .free_workspace = zstd_free_workspace,
429
- .compress_pages = zstd_compress_pages,
430
- .decompress_bio = zstd_decompress_bio,
431
- .decompress = zstd_decompress,
432
- .set_level = zstd_set_level,
709
+ /* ZSTD uses own workspace manager */
710
+ .workspace_manager = NULL,
711
+ .max_level = ZSTD_BTRFS_MAX_LEVEL,
712
+ .default_level = ZSTD_BTRFS_DEFAULT_LEVEL,
433713 };