hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/mm/zswap.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * zswap.c - zswap driver file
34 *
....@@ -8,16 +9,6 @@
89 * than reading from the swap device, can also improve workload performance.
910 *
1011 * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com>
11
- *
12
- * This program is free software; you can redistribute it and/or
13
- * modify it under the terms of the GNU General Public License
14
- * as published by the Free Software Foundation; either version 2
15
- * of the License, or (at your option) any later version.
16
- *
17
- * This program is distributed in the hope that it will be useful,
18
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20
- * GNU General Public License for more details.
2112 */
2213
2314 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
....@@ -41,6 +32,7 @@
4132 #include <linux/swapops.h>
4233 #include <linux/writeback.h>
4334 #include <linux/pagemap.h>
35
+#include <linux/workqueue.h>
4436
4537 /*********************************
4638 * statistics
....@@ -74,14 +66,19 @@
7466 /* Duplicate store was encountered (rare) */
7567 static u64 zswap_duplicate_entry;
7668
69
+/* Shrinker work queue */
70
+static struct workqueue_struct *shrink_wq;
71
+/* Pool limit was hit, we need to calm down */
72
+static bool zswap_pool_reached_full;
73
+
7774 /*********************************
7875 * tunables
7976 **********************************/
8077
8178 #define ZSWAP_PARAM_UNSET ""
8279
83
-/* Enable/disable zswap (disabled by default) */
84
-static bool zswap_enabled;
80
+/* Enable/disable zswap */
81
+static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
8582 static int zswap_enabled_param_set(const char *,
8683 const struct kernel_param *);
8784 static struct kernel_param_ops zswap_enabled_param_ops = {
....@@ -91,8 +88,7 @@
9188 module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644);
9289
9390 /* Crypto compressor to use */
94
-#define ZSWAP_COMPRESSOR_DEFAULT "lzo"
95
-static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
91
+static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
9692 static int zswap_compressor_param_set(const char *,
9793 const struct kernel_param *);
9894 static struct kernel_param_ops zswap_compressor_param_ops = {
....@@ -104,8 +100,7 @@
104100 &zswap_compressor, 0644);
105101
106102 /* Compressed storage zpool to use */
107
-#define ZSWAP_ZPOOL_DEFAULT "zbud"
108
-static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
103
+static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
109104 static int zswap_zpool_param_set(const char *, const struct kernel_param *);
110105 static struct kernel_param_ops zswap_zpool_param_ops = {
111106 .set = zswap_zpool_param_set,
....@@ -117,6 +112,11 @@
117112 /* The maximum percentage of memory that the compressed pool can occupy */
118113 static unsigned int zswap_max_pool_percent = 20;
119114 module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644);
115
+
116
+/* The threshold for accepting new pages after the max_pool_percent was hit */
117
+static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
118
+module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
119
+ uint, 0644);
120120
121121 /* Enable/disable handling same-value filled pages (enabled by default) */
122122 static bool zswap_same_filled_pages_enabled = true;
....@@ -132,7 +132,8 @@
132132 struct crypto_comp * __percpu *tfm;
133133 struct kref kref;
134134 struct list_head list;
135
- struct work_struct work;
135
+ struct work_struct release_work;
136
+ struct work_struct shrink_work;
136137 struct hlist_node node;
137138 char tfm_name[CRYPTO_MAX_ALG_NAME];
138139 };
....@@ -219,8 +220,15 @@
219220
220221 static bool zswap_is_full(void)
221222 {
222
- return totalram_pages * zswap_max_pool_percent / 100 <
223
- DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
223
+ return totalram_pages() * zswap_max_pool_percent / 100 <
224
+ DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
225
+}
226
+
227
+static bool zswap_can_accept(void)
228
+{
229
+ return totalram_pages() * zswap_accept_thr_percent / 100 *
230
+ zswap_max_pool_percent / 100 >
231
+ DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
224232 }
225233
226234 static void zswap_update_total_size(void)
....@@ -510,6 +518,16 @@
510518 return NULL;
511519 }
512520
521
+static void shrink_worker(struct work_struct *w)
522
+{
523
+ struct zswap_pool *pool = container_of(w, typeof(*pool),
524
+ shrink_work);
525
+
526
+ if (zpool_shrink(pool->zpool, 1, NULL))
527
+ zswap_reject_reclaim_fail++;
528
+ zswap_pool_put(pool);
529
+}
530
+
513531 static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
514532 {
515533 struct zswap_pool *pool;
....@@ -560,6 +578,7 @@
560578 */
561579 kref_init(&pool->kref);
562580 INIT_LIST_HEAD(&pool->list);
581
+ INIT_WORK(&pool->shrink_work, shrink_worker);
563582
564583 zswap_pool_debug("created", pool);
565584
....@@ -578,11 +597,12 @@
578597 bool has_comp, has_zpool;
579598
580599 has_comp = crypto_has_comp(zswap_compressor, 0, 0);
581
- if (!has_comp && strcmp(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT)) {
600
+ if (!has_comp && strcmp(zswap_compressor,
601
+ CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
582602 pr_err("compressor %s not available, using default %s\n",
583
- zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
603
+ zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
584604 param_free_charp(&zswap_compressor);
585
- zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
605
+ zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
586606 has_comp = crypto_has_comp(zswap_compressor, 0, 0);
587607 }
588608 if (!has_comp) {
....@@ -593,11 +613,12 @@
593613 }
594614
595615 has_zpool = zpool_has_pool(zswap_zpool_type);
596
- if (!has_zpool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
616
+ if (!has_zpool && strcmp(zswap_zpool_type,
617
+ CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
597618 pr_err("zpool %s not available, using default %s\n",
598
- zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
619
+ zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
599620 param_free_charp(&zswap_zpool_type);
600
- zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
621
+ zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
601622 has_zpool = zpool_has_pool(zswap_zpool_type);
602623 }
603624 if (!has_zpool) {
....@@ -633,7 +654,8 @@
633654
634655 static void __zswap_pool_release(struct work_struct *work)
635656 {
636
- struct zswap_pool *pool = container_of(work, typeof(*pool), work);
657
+ struct zswap_pool *pool = container_of(work, typeof(*pool),
658
+ release_work);
637659
638660 synchronize_rcu();
639661
....@@ -656,8 +678,8 @@
656678
657679 list_del_rcu(&pool->list);
658680
659
- INIT_WORK(&pool->work, __zswap_pool_release);
660
- schedule_work(&pool->work);
681
+ INIT_WORK(&pool->release_work, __zswap_pool_release);
682
+ schedule_work(&pool->release_work);
661683
662684 spin_unlock(&zswap_pools_lock);
663685 }
....@@ -865,7 +887,6 @@
865887 /* extract swpentry from data */
866888 zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
867889 swpentry = zhdr->swpentry; /* here */
868
- zpool_unmap_handle(pool, handle);
869890 tree = zswap_trees[swp_type(swpentry)];
870891 offset = swp_offset(swpentry);
871892
....@@ -875,6 +896,7 @@
875896 if (!entry) {
876897 /* entry was invalidated */
877898 spin_unlock(&tree->lock);
899
+ zpool_unmap_handle(pool, handle);
878900 return 0;
879901 }
880902 spin_unlock(&tree->lock);
....@@ -895,15 +917,13 @@
895917 case ZSWAP_SWAPCACHE_NEW: /* page is locked */
896918 /* decompress */
897919 dlen = PAGE_SIZE;
898
- src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
899
- ZPOOL_MM_RO) + sizeof(struct zswap_header);
920
+ src = (u8 *)zhdr + sizeof(struct zswap_header);
900921 dst = kmap_atomic(page);
901922 tfm = *get_cpu_ptr(entry->pool->tfm);
902923 ret = crypto_comp_decompress(tfm, src, entry->length,
903924 dst, &dlen);
904925 put_cpu_ptr(entry->pool->tfm);
905926 kunmap_atomic(dst);
906
- zpool_unmap_handle(entry->pool->zpool, entry->handle);
907927 BUG_ON(ret);
908928 BUG_ON(dlen != PAGE_SIZE);
909929
....@@ -949,22 +969,7 @@
949969 spin_unlock(&tree->lock);
950970
951971 end:
952
- return ret;
953
-}
954
-
955
-static int zswap_shrink(void)
956
-{
957
- struct zswap_pool *pool;
958
- int ret;
959
-
960
- pool = zswap_pool_last_get();
961
- if (!pool)
962
- return -ENOENT;
963
-
964
- ret = zpool_shrink(pool->zpool, 1, NULL);
965
-
966
- zswap_pool_put(pool);
967
-
972
+ zpool_unmap_handle(pool, handle);
968973 return ret;
969974 }
970975
....@@ -1006,6 +1011,7 @@
10061011 char *buf;
10071012 u8 *src, *dst;
10081013 struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
1014
+ gfp_t gfp;
10091015
10101016 /* THP isn't supported */
10111017 if (PageTransHuge(page)) {
....@@ -1020,21 +1026,23 @@
10201026
10211027 /* reclaim space if needed */
10221028 if (zswap_is_full()) {
1023
- zswap_pool_limit_hit++;
1024
- if (zswap_shrink()) {
1025
- zswap_reject_reclaim_fail++;
1026
- ret = -ENOMEM;
1027
- goto reject;
1028
- }
1029
+ struct zswap_pool *pool;
10291030
1030
- /* A second zswap_is_full() check after
1031
- * zswap_shrink() to make sure it's now
1032
- * under the max_pool_percent
1033
- */
1034
- if (zswap_is_full()) {
1031
+ zswap_pool_limit_hit++;
1032
+ zswap_pool_reached_full = true;
1033
+ pool = zswap_pool_last_get();
1034
+ if (pool)
1035
+ queue_work(shrink_wq, &pool->shrink_work);
1036
+ ret = -ENOMEM;
1037
+ goto reject;
1038
+ }
1039
+
1040
+ if (zswap_pool_reached_full) {
1041
+ if (!zswap_can_accept()) {
10351042 ret = -ENOMEM;
10361043 goto reject;
1037
- }
1044
+ } else
1045
+ zswap_pool_reached_full = false;
10381046 }
10391047
10401048 /* allocate entry */
....@@ -1079,9 +1087,10 @@
10791087
10801088 /* store */
10811089 hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0;
1082
- ret = zpool_malloc(entry->pool->zpool, hlen + dlen,
1083
- __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM,
1084
- &handle);
1090
+ gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
1091
+ if (zpool_malloc_support_movable(entry->pool->zpool))
1092
+ gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
1093
+ ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle);
10851094 if (ret == -ENOSPC) {
10861095 zswap_reject_compress_poor++;
10871096 goto put_dstmem;
....@@ -1262,8 +1271,6 @@
12621271 return -ENODEV;
12631272
12641273 zswap_debugfs_root = debugfs_create_dir("zswap", NULL);
1265
- if (!zswap_debugfs_root)
1266
- return -ENOMEM;
12671274
12681275 debugfs_create_u64("pool_limit_hit", 0444,
12691276 zswap_debugfs_root, &zswap_pool_limit_hit);
....@@ -1342,11 +1349,18 @@
13421349 zswap_enabled = false;
13431350 }
13441351
1352
+ shrink_wq = create_workqueue("zswap-shrink");
1353
+ if (!shrink_wq)
1354
+ goto fallback_fail;
1355
+
13451356 frontswap_register_ops(&zswap_frontswap_ops);
13461357 if (zswap_debugfs_init())
13471358 pr_warn("debugfs initialization failed\n");
13481359 return 0;
13491360
1361
+fallback_fail:
1362
+ if (pool)
1363
+ zswap_pool_destroy(pool);
13501364 hp_fail:
13511365 cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE);
13521366 dstmem_fail: