hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/mm/zswap.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * zswap.c - zswap driver file
34 *
....@@ -8,16 +9,6 @@
89 * than reading from the swap device, can also improve workload performance.
910 *
1011 * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com>
11
- *
12
- * This program is free software; you can redistribute it and/or
13
- * modify it under the terms of the GNU General Public License
14
- * as published by the Free Software Foundation; either version 2
15
- * of the License, or (at your option) any later version.
16
- *
17
- * This program is distributed in the hope that it will be useful,
18
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20
- * GNU General Public License for more details.
2112 */
2213
2314 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
....@@ -27,6 +18,7 @@
2718 #include <linux/highmem.h>
2819 #include <linux/slab.h>
2920 #include <linux/spinlock.h>
21
+#include <linux/local_lock.h>
3022 #include <linux/types.h>
3123 #include <linux/atomic.h>
3224 #include <linux/frontswap.h>
....@@ -41,6 +33,7 @@
4133 #include <linux/swapops.h>
4234 #include <linux/writeback.h>
4335 #include <linux/pagemap.h>
36
+#include <linux/workqueue.h>
4437
4538 /*********************************
4639 * statistics
....@@ -74,14 +67,19 @@
7467 /* Duplicate store was encountered (rare) */
7568 static u64 zswap_duplicate_entry;
7669
70
+/* Shrinker work queue */
71
+static struct workqueue_struct *shrink_wq;
72
+/* Pool limit was hit, we need to calm down */
73
+static bool zswap_pool_reached_full;
74
+
7775 /*********************************
7876 * tunables
7977 **********************************/
8078
8179 #define ZSWAP_PARAM_UNSET ""
8280
83
-/* Enable/disable zswap (disabled by default) */
84
-static bool zswap_enabled;
81
+/* Enable/disable zswap */
82
+static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
8583 static int zswap_enabled_param_set(const char *,
8684 const struct kernel_param *);
8785 static struct kernel_param_ops zswap_enabled_param_ops = {
....@@ -91,8 +89,7 @@
9189 module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644);
9290
9391 /* Crypto compressor to use */
94
-#define ZSWAP_COMPRESSOR_DEFAULT "lzo"
95
-static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
92
+static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
9693 static int zswap_compressor_param_set(const char *,
9794 const struct kernel_param *);
9895 static struct kernel_param_ops zswap_compressor_param_ops = {
....@@ -104,8 +101,7 @@
104101 &zswap_compressor, 0644);
105102
106103 /* Compressed storage zpool to use */
107
-#define ZSWAP_ZPOOL_DEFAULT "zbud"
108
-static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
104
+static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
109105 static int zswap_zpool_param_set(const char *, const struct kernel_param *);
110106 static struct kernel_param_ops zswap_zpool_param_ops = {
111107 .set = zswap_zpool_param_set,
....@@ -117,6 +113,11 @@
117113 /* The maximum percentage of memory that the compressed pool can occupy */
118114 static unsigned int zswap_max_pool_percent = 20;
119115 module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644);
116
+
117
+/* The threshold for accepting new pages after the max_pool_percent was hit */
118
+static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
119
+module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
120
+ uint, 0644);
120121
121122 /* Enable/disable handling same-value filled pages (enabled by default) */
122123 static bool zswap_same_filled_pages_enabled = true;
....@@ -132,7 +133,8 @@
132133 struct crypto_comp * __percpu *tfm;
133134 struct kref kref;
134135 struct list_head list;
135
- struct work_struct work;
136
+ struct work_struct release_work;
137
+ struct work_struct shrink_work;
136138 struct hlist_node node;
137139 char tfm_name[CRYPTO_MAX_ALG_NAME];
138140 };
....@@ -219,8 +221,15 @@
219221
220222 static bool zswap_is_full(void)
221223 {
222
- return totalram_pages * zswap_max_pool_percent / 100 <
223
- DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
224
+ return totalram_pages() * zswap_max_pool_percent / 100 <
225
+ DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
226
+}
227
+
228
+static bool zswap_can_accept(void)
229
+{
230
+ return totalram_pages() * zswap_accept_thr_percent / 100 *
231
+ zswap_max_pool_percent / 100 >
232
+ DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
224233 }
225234
226235 static void zswap_update_total_size(void)
....@@ -379,27 +388,37 @@
379388 /*********************************
380389 * per-cpu code
381390 **********************************/
382
-static DEFINE_PER_CPU(u8 *, zswap_dstmem);
391
+struct zswap_comp {
392
+ /* Used for per-CPU dstmem and tfm */
393
+ local_lock_t lock;
394
+ u8 *dstmem;
395
+};
396
+
397
+static DEFINE_PER_CPU(struct zswap_comp, zswap_comp) = {
398
+ .lock = INIT_LOCAL_LOCK(lock),
399
+};
383400
384401 static int zswap_dstmem_prepare(unsigned int cpu)
385402 {
403
+ struct zswap_comp *zcomp;
386404 u8 *dst;
387405
388406 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
389407 if (!dst)
390408 return -ENOMEM;
391409
392
- per_cpu(zswap_dstmem, cpu) = dst;
410
+ zcomp = per_cpu_ptr(&zswap_comp, cpu);
411
+ zcomp->dstmem = dst;
393412 return 0;
394413 }
395414
396415 static int zswap_dstmem_dead(unsigned int cpu)
397416 {
398
- u8 *dst;
417
+ struct zswap_comp *zcomp;
399418
400
- dst = per_cpu(zswap_dstmem, cpu);
401
- kfree(dst);
402
- per_cpu(zswap_dstmem, cpu) = NULL;
419
+ zcomp = per_cpu_ptr(&zswap_comp, cpu);
420
+ kfree(zcomp->dstmem);
421
+ zcomp->dstmem = NULL;
403422
404423 return 0;
405424 }
....@@ -510,6 +529,16 @@
510529 return NULL;
511530 }
512531
532
+static void shrink_worker(struct work_struct *w)
533
+{
534
+ struct zswap_pool *pool = container_of(w, typeof(*pool),
535
+ shrink_work);
536
+
537
+ if (zpool_shrink(pool->zpool, 1, NULL))
538
+ zswap_reject_reclaim_fail++;
539
+ zswap_pool_put(pool);
540
+}
541
+
513542 static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
514543 {
515544 struct zswap_pool *pool;
....@@ -560,6 +589,7 @@
560589 */
561590 kref_init(&pool->kref);
562591 INIT_LIST_HEAD(&pool->list);
592
+ INIT_WORK(&pool->shrink_work, shrink_worker);
563593
564594 zswap_pool_debug("created", pool);
565595
....@@ -578,11 +608,12 @@
578608 bool has_comp, has_zpool;
579609
580610 has_comp = crypto_has_comp(zswap_compressor, 0, 0);
581
- if (!has_comp && strcmp(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT)) {
611
+ if (!has_comp && strcmp(zswap_compressor,
612
+ CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
582613 pr_err("compressor %s not available, using default %s\n",
583
- zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
614
+ zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
584615 param_free_charp(&zswap_compressor);
585
- zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
616
+ zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
586617 has_comp = crypto_has_comp(zswap_compressor, 0, 0);
587618 }
588619 if (!has_comp) {
....@@ -593,11 +624,12 @@
593624 }
594625
595626 has_zpool = zpool_has_pool(zswap_zpool_type);
596
- if (!has_zpool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
627
+ if (!has_zpool && strcmp(zswap_zpool_type,
628
+ CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
597629 pr_err("zpool %s not available, using default %s\n",
598
- zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
630
+ zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
599631 param_free_charp(&zswap_zpool_type);
600
- zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
632
+ zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
601633 has_zpool = zpool_has_pool(zswap_zpool_type);
602634 }
603635 if (!has_zpool) {
....@@ -633,7 +665,8 @@
633665
634666 static void __zswap_pool_release(struct work_struct *work)
635667 {
636
- struct zswap_pool *pool = container_of(work, typeof(*pool), work);
668
+ struct zswap_pool *pool = container_of(work, typeof(*pool),
669
+ release_work);
637670
638671 synchronize_rcu();
639672
....@@ -656,8 +689,8 @@
656689
657690 list_del_rcu(&pool->list);
658691
659
- INIT_WORK(&pool->work, __zswap_pool_release);
660
- schedule_work(&pool->work);
692
+ INIT_WORK(&pool->release_work, __zswap_pool_release);
693
+ schedule_work(&pool->release_work);
661694
662695 spin_unlock(&zswap_pools_lock);
663696 }
....@@ -865,7 +898,6 @@
865898 /* extract swpentry from data */
866899 zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
867900 swpentry = zhdr->swpentry; /* here */
868
- zpool_unmap_handle(pool, handle);
869901 tree = zswap_trees[swp_type(swpentry)];
870902 offset = swp_offset(swpentry);
871903
....@@ -875,6 +907,7 @@
875907 if (!entry) {
876908 /* entry was invalidated */
877909 spin_unlock(&tree->lock);
910
+ zpool_unmap_handle(pool, handle);
878911 return 0;
879912 }
880913 spin_unlock(&tree->lock);
....@@ -895,15 +928,14 @@
895928 case ZSWAP_SWAPCACHE_NEW: /* page is locked */
896929 /* decompress */
897930 dlen = PAGE_SIZE;
898
- src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
899
- ZPOOL_MM_RO) + sizeof(struct zswap_header);
931
+ src = (u8 *)zhdr + sizeof(struct zswap_header);
900932 dst = kmap_atomic(page);
901
- tfm = *get_cpu_ptr(entry->pool->tfm);
933
+ local_lock(&zswap_comp.lock);
934
+ tfm = *this_cpu_ptr(entry->pool->tfm);
902935 ret = crypto_comp_decompress(tfm, src, entry->length,
903936 dst, &dlen);
904
- put_cpu_ptr(entry->pool->tfm);
937
+ local_unlock(&zswap_comp.lock);
905938 kunmap_atomic(dst);
906
- zpool_unmap_handle(entry->pool->zpool, entry->handle);
907939 BUG_ON(ret);
908940 BUG_ON(dlen != PAGE_SIZE);
909941
....@@ -949,22 +981,7 @@
949981 spin_unlock(&tree->lock);
950982
951983 end:
952
- return ret;
953
-}
954
-
955
-static int zswap_shrink(void)
956
-{
957
- struct zswap_pool *pool;
958
- int ret;
959
-
960
- pool = zswap_pool_last_get();
961
- if (!pool)
962
- return -ENOENT;
963
-
964
- ret = zpool_shrink(pool->zpool, 1, NULL);
965
-
966
- zswap_pool_put(pool);
967
-
984
+ zpool_unmap_handle(pool, handle);
968985 return ret;
969986 }
970987
....@@ -1006,6 +1023,7 @@
10061023 char *buf;
10071024 u8 *src, *dst;
10081025 struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
1026
+ gfp_t gfp;
10091027
10101028 /* THP isn't supported */
10111029 if (PageTransHuge(page)) {
....@@ -1020,21 +1038,23 @@
10201038
10211039 /* reclaim space if needed */
10221040 if (zswap_is_full()) {
1023
- zswap_pool_limit_hit++;
1024
- if (zswap_shrink()) {
1025
- zswap_reject_reclaim_fail++;
1026
- ret = -ENOMEM;
1027
- goto reject;
1028
- }
1041
+ struct zswap_pool *pool;
10291042
1030
- /* A second zswap_is_full() check after
1031
- * zswap_shrink() to make sure it's now
1032
- * under the max_pool_percent
1033
- */
1034
- if (zswap_is_full()) {
1043
+ zswap_pool_limit_hit++;
1044
+ zswap_pool_reached_full = true;
1045
+ pool = zswap_pool_last_get();
1046
+ if (pool)
1047
+ queue_work(shrink_wq, &pool->shrink_work);
1048
+ ret = -ENOMEM;
1049
+ goto reject;
1050
+ }
1051
+
1052
+ if (zswap_pool_reached_full) {
1053
+ if (!zswap_can_accept()) {
10351054 ret = -ENOMEM;
10361055 goto reject;
1037
- }
1056
+ } else
1057
+ zswap_pool_reached_full = false;
10381058 }
10391059
10401060 /* allocate entry */
....@@ -1066,12 +1086,12 @@
10661086 }
10671087
10681088 /* compress */
1069
- dst = get_cpu_var(zswap_dstmem);
1070
- tfm = *get_cpu_ptr(entry->pool->tfm);
1089
+ local_lock(&zswap_comp.lock);
1090
+ dst = *this_cpu_ptr(&zswap_comp.dstmem);
1091
+ tfm = *this_cpu_ptr(entry->pool->tfm);
10711092 src = kmap_atomic(page);
10721093 ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen);
10731094 kunmap_atomic(src);
1074
- put_cpu_ptr(entry->pool->tfm);
10751095 if (ret) {
10761096 ret = -EINVAL;
10771097 goto put_dstmem;
....@@ -1079,9 +1099,10 @@
10791099
10801100 /* store */
10811101 hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0;
1082
- ret = zpool_malloc(entry->pool->zpool, hlen + dlen,
1083
- __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM,
1084
- &handle);
1102
+ gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
1103
+ if (zpool_malloc_support_movable(entry->pool->zpool))
1104
+ gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
1105
+ ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle);
10851106 if (ret == -ENOSPC) {
10861107 zswap_reject_compress_poor++;
10871108 goto put_dstmem;
....@@ -1094,7 +1115,7 @@
10941115 memcpy(buf, &zhdr, hlen);
10951116 memcpy(buf + hlen, dst, dlen);
10961117 zpool_unmap_handle(entry->pool->zpool, handle);
1097
- put_cpu_var(zswap_dstmem);
1118
+ local_unlock(&zswap_comp.lock);
10981119
10991120 /* populate entry */
11001121 entry->offset = offset;
....@@ -1122,7 +1143,7 @@
11221143 return 0;
11231144
11241145 put_dstmem:
1125
- put_cpu_var(zswap_dstmem);
1146
+ local_unlock(&zswap_comp.lock);
11261147 zswap_pool_put(entry->pool);
11271148 freepage:
11281149 zswap_entry_cache_free(entry);
....@@ -1167,9 +1188,10 @@
11671188 if (zpool_evictable(entry->pool->zpool))
11681189 src += sizeof(struct zswap_header);
11691190 dst = kmap_atomic(page);
1170
- tfm = *get_cpu_ptr(entry->pool->tfm);
1191
+ local_lock(&zswap_comp.lock);
1192
+ tfm = *this_cpu_ptr(entry->pool->tfm);
11711193 ret = crypto_comp_decompress(tfm, src, entry->length, dst, &dlen);
1172
- put_cpu_ptr(entry->pool->tfm);
1194
+ local_unlock(&zswap_comp.lock);
11731195 kunmap_atomic(dst);
11741196 zpool_unmap_handle(entry->pool->zpool, entry->handle);
11751197 BUG_ON(ret);
....@@ -1262,8 +1284,6 @@
12621284 return -ENODEV;
12631285
12641286 zswap_debugfs_root = debugfs_create_dir("zswap", NULL);
1265
- if (!zswap_debugfs_root)
1266
- return -ENOMEM;
12671287
12681288 debugfs_create_u64("pool_limit_hit", 0444,
12691289 zswap_debugfs_root, &zswap_pool_limit_hit);
....@@ -1342,11 +1362,18 @@
13421362 zswap_enabled = false;
13431363 }
13441364
1365
+ shrink_wq = create_workqueue("zswap-shrink");
1366
+ if (!shrink_wq)
1367
+ goto fallback_fail;
1368
+
13451369 frontswap_register_ops(&zswap_frontswap_ops);
13461370 if (zswap_debugfs_init())
13471371 pr_warn("debugfs initialization failed\n");
13481372 return 0;
13491373
1374
+fallback_fail:
1375
+ if (pool)
1376
+ zswap_pool_destroy(pool);
13501377 hp_fail:
13511378 cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE);
13521379 dstmem_fail: