hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/mm/zswap.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * zswap.c - zswap driver file
34 *
....@@ -8,16 +9,6 @@
89 * than reading from the swap device, can also improve workload performance.
910 *
1011 * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com>
11
- *
12
- * This program is free software; you can redistribute it and/or
13
- * modify it under the terms of the GNU General Public License
14
- * as published by the Free Software Foundation; either version 2
15
- * of the License, or (at your option) any later version.
16
- *
17
- * This program is distributed in the hope that it will be useful,
18
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20
- * GNU General Public License for more details.
2112 */
2213
2314 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
....@@ -27,7 +18,6 @@
2718 #include <linux/highmem.h>
2819 #include <linux/slab.h>
2920 #include <linux/spinlock.h>
30
-#include <linux/locallock.h>
3121 #include <linux/types.h>
3222 #include <linux/atomic.h>
3323 #include <linux/frontswap.h>
....@@ -42,6 +32,7 @@
4232 #include <linux/swapops.h>
4333 #include <linux/writeback.h>
4434 #include <linux/pagemap.h>
35
+#include <linux/workqueue.h>
4536
4637 /*********************************
4738 * statistics
....@@ -75,14 +66,19 @@
7566 /* Duplicate store was encountered (rare) */
7667 static u64 zswap_duplicate_entry;
7768
69
+/* Shrinker work queue */
70
+static struct workqueue_struct *shrink_wq;
71
+/* Pool limit was hit, we need to calm down */
72
+static bool zswap_pool_reached_full;
73
+
7874 /*********************************
7975 * tunables
8076 **********************************/
8177
8278 #define ZSWAP_PARAM_UNSET ""
8379
84
-/* Enable/disable zswap (disabled by default) */
85
-static bool zswap_enabled;
80
+/* Enable/disable zswap */
81
+static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
8682 static int zswap_enabled_param_set(const char *,
8783 const struct kernel_param *);
8884 static struct kernel_param_ops zswap_enabled_param_ops = {
....@@ -92,8 +88,7 @@
9288 module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644);
9389
9490 /* Crypto compressor to use */
95
-#define ZSWAP_COMPRESSOR_DEFAULT "lzo"
96
-static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
91
+static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
9792 static int zswap_compressor_param_set(const char *,
9893 const struct kernel_param *);
9994 static struct kernel_param_ops zswap_compressor_param_ops = {
....@@ -105,8 +100,7 @@
105100 &zswap_compressor, 0644);
106101
107102 /* Compressed storage zpool to use */
108
-#define ZSWAP_ZPOOL_DEFAULT "zbud"
109
-static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
103
+static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
110104 static int zswap_zpool_param_set(const char *, const struct kernel_param *);
111105 static struct kernel_param_ops zswap_zpool_param_ops = {
112106 .set = zswap_zpool_param_set,
....@@ -118,6 +112,11 @@
118112 /* The maximum percentage of memory that the compressed pool can occupy */
119113 static unsigned int zswap_max_pool_percent = 20;
120114 module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644);
115
+
116
+/* The threshold for accepting new pages after the max_pool_percent was hit */
117
+static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
118
+module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
119
+ uint, 0644);
121120
122121 /* Enable/disable handling same-value filled pages (enabled by default) */
123122 static bool zswap_same_filled_pages_enabled = true;
....@@ -133,7 +132,8 @@
133132 struct crypto_comp * __percpu *tfm;
134133 struct kref kref;
135134 struct list_head list;
136
- struct work_struct work;
135
+ struct work_struct release_work;
136
+ struct work_struct shrink_work;
137137 struct hlist_node node;
138138 char tfm_name[CRYPTO_MAX_ALG_NAME];
139139 };
....@@ -220,8 +220,15 @@
220220
221221 static bool zswap_is_full(void)
222222 {
223
- return totalram_pages * zswap_max_pool_percent / 100 <
224
- DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
223
+ return totalram_pages() * zswap_max_pool_percent / 100 <
224
+ DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
225
+}
226
+
227
+static bool zswap_can_accept(void)
228
+{
229
+ return totalram_pages() * zswap_accept_thr_percent / 100 *
230
+ zswap_max_pool_percent / 100 >
231
+ DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
225232 }
226233
227234 static void zswap_update_total_size(void)
....@@ -511,6 +518,16 @@
511518 return NULL;
512519 }
513520
521
+static void shrink_worker(struct work_struct *w)
522
+{
523
+ struct zswap_pool *pool = container_of(w, typeof(*pool),
524
+ shrink_work);
525
+
526
+ if (zpool_shrink(pool->zpool, 1, NULL))
527
+ zswap_reject_reclaim_fail++;
528
+ zswap_pool_put(pool);
529
+}
530
+
514531 static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
515532 {
516533 struct zswap_pool *pool;
....@@ -561,6 +578,7 @@
561578 */
562579 kref_init(&pool->kref);
563580 INIT_LIST_HEAD(&pool->list);
581
+ INIT_WORK(&pool->shrink_work, shrink_worker);
564582
565583 zswap_pool_debug("created", pool);
566584
....@@ -579,11 +597,12 @@
579597 bool has_comp, has_zpool;
580598
581599 has_comp = crypto_has_comp(zswap_compressor, 0, 0);
582
- if (!has_comp && strcmp(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT)) {
600
+ if (!has_comp && strcmp(zswap_compressor,
601
+ CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
583602 pr_err("compressor %s not available, using default %s\n",
584
- zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
603
+ zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
585604 param_free_charp(&zswap_compressor);
586
- zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
605
+ zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
587606 has_comp = crypto_has_comp(zswap_compressor, 0, 0);
588607 }
589608 if (!has_comp) {
....@@ -594,11 +613,12 @@
594613 }
595614
596615 has_zpool = zpool_has_pool(zswap_zpool_type);
597
- if (!has_zpool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
616
+ if (!has_zpool && strcmp(zswap_zpool_type,
617
+ CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
598618 pr_err("zpool %s not available, using default %s\n",
599
- zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
619
+ zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
600620 param_free_charp(&zswap_zpool_type);
601
- zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
621
+ zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
602622 has_zpool = zpool_has_pool(zswap_zpool_type);
603623 }
604624 if (!has_zpool) {
....@@ -634,7 +654,8 @@
634654
635655 static void __zswap_pool_release(struct work_struct *work)
636656 {
637
- struct zswap_pool *pool = container_of(work, typeof(*pool), work);
657
+ struct zswap_pool *pool = container_of(work, typeof(*pool),
658
+ release_work);
638659
639660 synchronize_rcu();
640661
....@@ -657,8 +678,8 @@
657678
658679 list_del_rcu(&pool->list);
659680
660
- INIT_WORK(&pool->work, __zswap_pool_release);
661
- schedule_work(&pool->work);
681
+ INIT_WORK(&pool->release_work, __zswap_pool_release);
682
+ schedule_work(&pool->release_work);
662683
663684 spin_unlock(&zswap_pools_lock);
664685 }
....@@ -866,7 +887,6 @@
866887 /* extract swpentry from data */
867888 zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
868889 swpentry = zhdr->swpentry; /* here */
869
- zpool_unmap_handle(pool, handle);
870890 tree = zswap_trees[swp_type(swpentry)];
871891 offset = swp_offset(swpentry);
872892
....@@ -876,6 +896,7 @@
876896 if (!entry) {
877897 /* entry was invalidated */
878898 spin_unlock(&tree->lock);
899
+ zpool_unmap_handle(pool, handle);
879900 return 0;
880901 }
881902 spin_unlock(&tree->lock);
....@@ -896,15 +917,13 @@
896917 case ZSWAP_SWAPCACHE_NEW: /* page is locked */
897918 /* decompress */
898919 dlen = PAGE_SIZE;
899
- src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
900
- ZPOOL_MM_RO) + sizeof(struct zswap_header);
920
+ src = (u8 *)zhdr + sizeof(struct zswap_header);
901921 dst = kmap_atomic(page);
902922 tfm = *get_cpu_ptr(entry->pool->tfm);
903923 ret = crypto_comp_decompress(tfm, src, entry->length,
904924 dst, &dlen);
905925 put_cpu_ptr(entry->pool->tfm);
906926 kunmap_atomic(dst);
907
- zpool_unmap_handle(entry->pool->zpool, entry->handle);
908927 BUG_ON(ret);
909928 BUG_ON(dlen != PAGE_SIZE);
910929
....@@ -950,22 +969,7 @@
950969 spin_unlock(&tree->lock);
951970
952971 end:
953
- return ret;
954
-}
955
-
956
-static int zswap_shrink(void)
957
-{
958
- struct zswap_pool *pool;
959
- int ret;
960
-
961
- pool = zswap_pool_last_get();
962
- if (!pool)
963
- return -ENOENT;
964
-
965
- ret = zpool_shrink(pool->zpool, 1, NULL);
966
-
967
- zswap_pool_put(pool);
968
-
972
+ zpool_unmap_handle(pool, handle);
969973 return ret;
970974 }
971975
....@@ -991,8 +995,6 @@
991995 memset_l(page, value, PAGE_SIZE / sizeof(unsigned long));
992996 }
993997
994
-/* protect zswap_dstmem from concurrency */
995
-static DEFINE_LOCAL_IRQ_LOCK(zswap_dstmem_lock);
996998 /*********************************
997999 * frontswap hooks
9981000 **********************************/
....@@ -1009,6 +1011,7 @@
10091011 char *buf;
10101012 u8 *src, *dst;
10111013 struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
1014
+ gfp_t gfp;
10121015
10131016 /* THP isn't supported */
10141017 if (PageTransHuge(page)) {
....@@ -1023,21 +1026,23 @@
10231026
10241027 /* reclaim space if needed */
10251028 if (zswap_is_full()) {
1026
- zswap_pool_limit_hit++;
1027
- if (zswap_shrink()) {
1028
- zswap_reject_reclaim_fail++;
1029
- ret = -ENOMEM;
1030
- goto reject;
1031
- }
1029
+ struct zswap_pool *pool;
10321030
1033
- /* A second zswap_is_full() check after
1034
- * zswap_shrink() to make sure it's now
1035
- * under the max_pool_percent
1036
- */
1037
- if (zswap_is_full()) {
1031
+ zswap_pool_limit_hit++;
1032
+ zswap_pool_reached_full = true;
1033
+ pool = zswap_pool_last_get();
1034
+ if (pool)
1035
+ queue_work(shrink_wq, &pool->shrink_work);
1036
+ ret = -ENOMEM;
1037
+ goto reject;
1038
+ }
1039
+
1040
+ if (zswap_pool_reached_full) {
1041
+ if (!zswap_can_accept()) {
10381042 ret = -ENOMEM;
10391043 goto reject;
1040
- }
1044
+ } else
1045
+ zswap_pool_reached_full = false;
10411046 }
10421047
10431048 /* allocate entry */
....@@ -1069,11 +1074,12 @@
10691074 }
10701075
10711076 /* compress */
1072
- dst = get_locked_var(zswap_dstmem_lock, zswap_dstmem);
1073
- tfm = *this_cpu_ptr(entry->pool->tfm);
1077
+ dst = get_cpu_var(zswap_dstmem);
1078
+ tfm = *get_cpu_ptr(entry->pool->tfm);
10741079 src = kmap_atomic(page);
10751080 ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen);
10761081 kunmap_atomic(src);
1082
+ put_cpu_ptr(entry->pool->tfm);
10771083 if (ret) {
10781084 ret = -EINVAL;
10791085 goto put_dstmem;
....@@ -1081,9 +1087,10 @@
10811087
10821088 /* store */
10831089 hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0;
1084
- ret = zpool_malloc(entry->pool->zpool, hlen + dlen,
1085
- __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM,
1086
- &handle);
1090
+ gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
1091
+ if (zpool_malloc_support_movable(entry->pool->zpool))
1092
+ gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
1093
+ ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle);
10871094 if (ret == -ENOSPC) {
10881095 zswap_reject_compress_poor++;
10891096 goto put_dstmem;
....@@ -1096,7 +1103,7 @@
10961103 memcpy(buf, &zhdr, hlen);
10971104 memcpy(buf + hlen, dst, dlen);
10981105 zpool_unmap_handle(entry->pool->zpool, handle);
1099
- put_locked_var(zswap_dstmem_lock, zswap_dstmem);
1106
+ put_cpu_var(zswap_dstmem);
11001107
11011108 /* populate entry */
11021109 entry->offset = offset;
....@@ -1124,7 +1131,7 @@
11241131 return 0;
11251132
11261133 put_dstmem:
1127
- put_locked_var(zswap_dstmem_lock, zswap_dstmem);
1134
+ put_cpu_var(zswap_dstmem);
11281135 zswap_pool_put(entry->pool);
11291136 freepage:
11301137 zswap_entry_cache_free(entry);
....@@ -1264,8 +1271,6 @@
12641271 return -ENODEV;
12651272
12661273 zswap_debugfs_root = debugfs_create_dir("zswap", NULL);
1267
- if (!zswap_debugfs_root)
1268
- return -ENOMEM;
12691274
12701275 debugfs_create_u64("pool_limit_hit", 0444,
12711276 zswap_debugfs_root, &zswap_pool_limit_hit);
....@@ -1344,11 +1349,18 @@
13441349 zswap_enabled = false;
13451350 }
13461351
1352
+ shrink_wq = create_workqueue("zswap-shrink");
1353
+ if (!shrink_wq)
1354
+ goto fallback_fail;
1355
+
13471356 frontswap_register_ops(&zswap_frontswap_ops);
13481357 if (zswap_debugfs_init())
13491358 pr_warn("debugfs initialization failed\n");
13501359 return 0;
13511360
1361
+fallback_fail:
1362
+ if (pool)
1363
+ zswap_pool_destroy(pool);
13521364 hp_fail:
13531365 cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE);
13541366 dstmem_fail: