hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/mm/zswap.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * zswap.c - zswap driver file
34 *
....@@ -8,16 +9,6 @@
89 * than reading from the swap device, can also improve workload performance.
910 *
1011 * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com>
11
- *
12
- * This program is free software; you can redistribute it and/or
13
- * modify it under the terms of the GNU General Public License
14
- * as published by the Free Software Foundation; either version 2
15
- * of the License, or (at your option) any later version.
16
- *
17
- * This program is distributed in the hope that it will be useful,
18
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20
- * GNU General Public License for more details.
2112 */
2213
2314 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
....@@ -27,7 +18,7 @@
2718 #include <linux/highmem.h>
2819 #include <linux/slab.h>
2920 #include <linux/spinlock.h>
30
-#include <linux/locallock.h>
21
+#include <linux/local_lock.h>
3122 #include <linux/types.h>
3223 #include <linux/atomic.h>
3324 #include <linux/frontswap.h>
....@@ -42,6 +33,7 @@
4233 #include <linux/swapops.h>
4334 #include <linux/writeback.h>
4435 #include <linux/pagemap.h>
36
+#include <linux/workqueue.h>
4537
4638 /*********************************
4739 * statistics
....@@ -75,14 +67,19 @@
7567 /* Duplicate store was encountered (rare) */
7668 static u64 zswap_duplicate_entry;
7769
70
+/* Shrinker work queue */
71
+static struct workqueue_struct *shrink_wq;
72
+/* Pool limit was hit, we need to calm down */
73
+static bool zswap_pool_reached_full;
74
+
7875 /*********************************
7976 * tunables
8077 **********************************/
8178
8279 #define ZSWAP_PARAM_UNSET ""
8380
84
-/* Enable/disable zswap (disabled by default) */
85
-static bool zswap_enabled;
81
+/* Enable/disable zswap */
82
+static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
8683 static int zswap_enabled_param_set(const char *,
8784 const struct kernel_param *);
8885 static struct kernel_param_ops zswap_enabled_param_ops = {
....@@ -92,8 +89,7 @@
9289 module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644);
9390
9491 /* Crypto compressor to use */
95
-#define ZSWAP_COMPRESSOR_DEFAULT "lzo"
96
-static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
92
+static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
9793 static int zswap_compressor_param_set(const char *,
9894 const struct kernel_param *);
9995 static struct kernel_param_ops zswap_compressor_param_ops = {
....@@ -105,8 +101,7 @@
105101 &zswap_compressor, 0644);
106102
107103 /* Compressed storage zpool to use */
108
-#define ZSWAP_ZPOOL_DEFAULT "zbud"
109
-static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
104
+static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
110105 static int zswap_zpool_param_set(const char *, const struct kernel_param *);
111106 static struct kernel_param_ops zswap_zpool_param_ops = {
112107 .set = zswap_zpool_param_set,
....@@ -118,6 +113,11 @@
118113 /* The maximum percentage of memory that the compressed pool can occupy */
119114 static unsigned int zswap_max_pool_percent = 20;
120115 module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644);
116
+
117
+/* The threshold for accepting new pages after the max_pool_percent was hit */
118
+static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
119
+module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
120
+ uint, 0644);
121121
122122 /* Enable/disable handling same-value filled pages (enabled by default) */
123123 static bool zswap_same_filled_pages_enabled = true;
....@@ -133,7 +133,8 @@
133133 struct crypto_comp * __percpu *tfm;
134134 struct kref kref;
135135 struct list_head list;
136
- struct work_struct work;
136
+ struct work_struct release_work;
137
+ struct work_struct shrink_work;
137138 struct hlist_node node;
138139 char tfm_name[CRYPTO_MAX_ALG_NAME];
139140 };
....@@ -220,8 +221,15 @@
220221
221222 static bool zswap_is_full(void)
222223 {
223
- return totalram_pages * zswap_max_pool_percent / 100 <
224
- DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
224
+ return totalram_pages() * zswap_max_pool_percent / 100 <
225
+ DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
226
+}
227
+
228
+static bool zswap_can_accept(void)
229
+{
230
+ return totalram_pages() * zswap_accept_thr_percent / 100 *
231
+ zswap_max_pool_percent / 100 >
232
+ DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
225233 }
226234
227235 static void zswap_update_total_size(void)
....@@ -380,27 +388,37 @@
380388 /*********************************
381389 * per-cpu code
382390 **********************************/
383
-static DEFINE_PER_CPU(u8 *, zswap_dstmem);
391
+struct zswap_comp {
392
+ /* Used for per-CPU dstmem and tfm */
393
+ local_lock_t lock;
394
+ u8 *dstmem;
395
+};
396
+
397
+static DEFINE_PER_CPU(struct zswap_comp, zswap_comp) = {
398
+ .lock = INIT_LOCAL_LOCK(lock),
399
+};
384400
385401 static int zswap_dstmem_prepare(unsigned int cpu)
386402 {
403
+ struct zswap_comp *zcomp;
387404 u8 *dst;
388405
389406 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
390407 if (!dst)
391408 return -ENOMEM;
392409
393
- per_cpu(zswap_dstmem, cpu) = dst;
410
+ zcomp = per_cpu_ptr(&zswap_comp, cpu);
411
+ zcomp->dstmem = dst;
394412 return 0;
395413 }
396414
397415 static int zswap_dstmem_dead(unsigned int cpu)
398416 {
399
- u8 *dst;
417
+ struct zswap_comp *zcomp;
400418
401
- dst = per_cpu(zswap_dstmem, cpu);
402
- kfree(dst);
403
- per_cpu(zswap_dstmem, cpu) = NULL;
419
+ zcomp = per_cpu_ptr(&zswap_comp, cpu);
420
+ kfree(zcomp->dstmem);
421
+ zcomp->dstmem = NULL;
404422
405423 return 0;
406424 }
....@@ -511,6 +529,16 @@
511529 return NULL;
512530 }
513531
532
+static void shrink_worker(struct work_struct *w)
533
+{
534
+ struct zswap_pool *pool = container_of(w, typeof(*pool),
535
+ shrink_work);
536
+
537
+ if (zpool_shrink(pool->zpool, 1, NULL))
538
+ zswap_reject_reclaim_fail++;
539
+ zswap_pool_put(pool);
540
+}
541
+
514542 static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
515543 {
516544 struct zswap_pool *pool;
....@@ -561,6 +589,7 @@
561589 */
562590 kref_init(&pool->kref);
563591 INIT_LIST_HEAD(&pool->list);
592
+ INIT_WORK(&pool->shrink_work, shrink_worker);
564593
565594 zswap_pool_debug("created", pool);
566595
....@@ -579,11 +608,12 @@
579608 bool has_comp, has_zpool;
580609
581610 has_comp = crypto_has_comp(zswap_compressor, 0, 0);
582
- if (!has_comp && strcmp(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT)) {
611
+ if (!has_comp && strcmp(zswap_compressor,
612
+ CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
583613 pr_err("compressor %s not available, using default %s\n",
584
- zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
614
+ zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
585615 param_free_charp(&zswap_compressor);
586
- zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
616
+ zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
587617 has_comp = crypto_has_comp(zswap_compressor, 0, 0);
588618 }
589619 if (!has_comp) {
....@@ -594,11 +624,12 @@
594624 }
595625
596626 has_zpool = zpool_has_pool(zswap_zpool_type);
597
- if (!has_zpool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
627
+ if (!has_zpool && strcmp(zswap_zpool_type,
628
+ CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
598629 pr_err("zpool %s not available, using default %s\n",
599
- zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
630
+ zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
600631 param_free_charp(&zswap_zpool_type);
601
- zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
632
+ zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
602633 has_zpool = zpool_has_pool(zswap_zpool_type);
603634 }
604635 if (!has_zpool) {
....@@ -634,7 +665,8 @@
634665
635666 static void __zswap_pool_release(struct work_struct *work)
636667 {
637
- struct zswap_pool *pool = container_of(work, typeof(*pool), work);
668
+ struct zswap_pool *pool = container_of(work, typeof(*pool),
669
+ release_work);
638670
639671 synchronize_rcu();
640672
....@@ -657,8 +689,8 @@
657689
658690 list_del_rcu(&pool->list);
659691
660
- INIT_WORK(&pool->work, __zswap_pool_release);
661
- schedule_work(&pool->work);
692
+ INIT_WORK(&pool->release_work, __zswap_pool_release);
693
+ schedule_work(&pool->release_work);
662694
663695 spin_unlock(&zswap_pools_lock);
664696 }
....@@ -866,7 +898,6 @@
866898 /* extract swpentry from data */
867899 zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
868900 swpentry = zhdr->swpentry; /* here */
869
- zpool_unmap_handle(pool, handle);
870901 tree = zswap_trees[swp_type(swpentry)];
871902 offset = swp_offset(swpentry);
872903
....@@ -876,6 +907,7 @@
876907 if (!entry) {
877908 /* entry was invalidated */
878909 spin_unlock(&tree->lock);
910
+ zpool_unmap_handle(pool, handle);
879911 return 0;
880912 }
881913 spin_unlock(&tree->lock);
....@@ -896,15 +928,14 @@
896928 case ZSWAP_SWAPCACHE_NEW: /* page is locked */
897929 /* decompress */
898930 dlen = PAGE_SIZE;
899
- src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
900
- ZPOOL_MM_RO) + sizeof(struct zswap_header);
931
+ src = (u8 *)zhdr + sizeof(struct zswap_header);
901932 dst = kmap_atomic(page);
902
- tfm = *get_cpu_ptr(entry->pool->tfm);
933
+ local_lock(&zswap_comp.lock);
934
+ tfm = *this_cpu_ptr(entry->pool->tfm);
903935 ret = crypto_comp_decompress(tfm, src, entry->length,
904936 dst, &dlen);
905
- put_cpu_ptr(entry->pool->tfm);
937
+ local_unlock(&zswap_comp.lock);
906938 kunmap_atomic(dst);
907
- zpool_unmap_handle(entry->pool->zpool, entry->handle);
908939 BUG_ON(ret);
909940 BUG_ON(dlen != PAGE_SIZE);
910941
....@@ -950,22 +981,7 @@
950981 spin_unlock(&tree->lock);
951982
952983 end:
953
- return ret;
954
-}
955
-
956
-static int zswap_shrink(void)
957
-{
958
- struct zswap_pool *pool;
959
- int ret;
960
-
961
- pool = zswap_pool_last_get();
962
- if (!pool)
963
- return -ENOENT;
964
-
965
- ret = zpool_shrink(pool->zpool, 1, NULL);
966
-
967
- zswap_pool_put(pool);
968
-
984
+ zpool_unmap_handle(pool, handle);
969985 return ret;
970986 }
971987
....@@ -991,8 +1007,6 @@
9911007 memset_l(page, value, PAGE_SIZE / sizeof(unsigned long));
9921008 }
9931009
994
-/* protect zswap_dstmem from concurrency */
995
-static DEFINE_LOCAL_IRQ_LOCK(zswap_dstmem_lock);
9961010 /*********************************
9971011 * frontswap hooks
9981012 **********************************/
....@@ -1009,6 +1023,7 @@
10091023 char *buf;
10101024 u8 *src, *dst;
10111025 struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
1026
+ gfp_t gfp;
10121027
10131028 /* THP isn't supported */
10141029 if (PageTransHuge(page)) {
....@@ -1023,21 +1038,23 @@
10231038
10241039 /* reclaim space if needed */
10251040 if (zswap_is_full()) {
1026
- zswap_pool_limit_hit++;
1027
- if (zswap_shrink()) {
1028
- zswap_reject_reclaim_fail++;
1029
- ret = -ENOMEM;
1030
- goto reject;
1031
- }
1041
+ struct zswap_pool *pool;
10321042
1033
- /* A second zswap_is_full() check after
1034
- * zswap_shrink() to make sure it's now
1035
- * under the max_pool_percent
1036
- */
1037
- if (zswap_is_full()) {
1043
+ zswap_pool_limit_hit++;
1044
+ zswap_pool_reached_full = true;
1045
+ pool = zswap_pool_last_get();
1046
+ if (pool)
1047
+ queue_work(shrink_wq, &pool->shrink_work);
1048
+ ret = -ENOMEM;
1049
+ goto reject;
1050
+ }
1051
+
1052
+ if (zswap_pool_reached_full) {
1053
+ if (!zswap_can_accept()) {
10381054 ret = -ENOMEM;
10391055 goto reject;
1040
- }
1056
+ } else
1057
+ zswap_pool_reached_full = false;
10411058 }
10421059
10431060 /* allocate entry */
....@@ -1069,7 +1086,8 @@
10691086 }
10701087
10711088 /* compress */
1072
- dst = get_locked_var(zswap_dstmem_lock, zswap_dstmem);
1089
+ local_lock(&zswap_comp.lock);
1090
+ dst = *this_cpu_ptr(&zswap_comp.dstmem);
10731091 tfm = *this_cpu_ptr(entry->pool->tfm);
10741092 src = kmap_atomic(page);
10751093 ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen);
....@@ -1081,9 +1099,10 @@
10811099
10821100 /* store */
10831101 hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0;
1084
- ret = zpool_malloc(entry->pool->zpool, hlen + dlen,
1085
- __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM,
1086
- &handle);
1102
+ gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
1103
+ if (zpool_malloc_support_movable(entry->pool->zpool))
1104
+ gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
1105
+ ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle);
10871106 if (ret == -ENOSPC) {
10881107 zswap_reject_compress_poor++;
10891108 goto put_dstmem;
....@@ -1096,7 +1115,7 @@
10961115 memcpy(buf, &zhdr, hlen);
10971116 memcpy(buf + hlen, dst, dlen);
10981117 zpool_unmap_handle(entry->pool->zpool, handle);
1099
- put_locked_var(zswap_dstmem_lock, zswap_dstmem);
1118
+ local_unlock(&zswap_comp.lock);
11001119
11011120 /* populate entry */
11021121 entry->offset = offset;
....@@ -1124,7 +1143,7 @@
11241143 return 0;
11251144
11261145 put_dstmem:
1127
- put_locked_var(zswap_dstmem_lock, zswap_dstmem);
1146
+ local_unlock(&zswap_comp.lock);
11281147 zswap_pool_put(entry->pool);
11291148 freepage:
11301149 zswap_entry_cache_free(entry);
....@@ -1169,9 +1188,10 @@
11691188 if (zpool_evictable(entry->pool->zpool))
11701189 src += sizeof(struct zswap_header);
11711190 dst = kmap_atomic(page);
1172
- tfm = *get_cpu_ptr(entry->pool->tfm);
1191
+ local_lock(&zswap_comp.lock);
1192
+ tfm = *this_cpu_ptr(entry->pool->tfm);
11731193 ret = crypto_comp_decompress(tfm, src, entry->length, dst, &dlen);
1174
- put_cpu_ptr(entry->pool->tfm);
1194
+ local_unlock(&zswap_comp.lock);
11751195 kunmap_atomic(dst);
11761196 zpool_unmap_handle(entry->pool->zpool, entry->handle);
11771197 BUG_ON(ret);
....@@ -1264,8 +1284,6 @@
12641284 return -ENODEV;
12651285
12661286 zswap_debugfs_root = debugfs_create_dir("zswap", NULL);
1267
- if (!zswap_debugfs_root)
1268
- return -ENOMEM;
12691287
12701288 debugfs_create_u64("pool_limit_hit", 0444,
12711289 zswap_debugfs_root, &zswap_pool_limit_hit);
....@@ -1344,11 +1362,18 @@
13441362 zswap_enabled = false;
13451363 }
13461364
1365
+ shrink_wq = create_workqueue("zswap-shrink");
1366
+ if (!shrink_wq)
1367
+ goto fallback_fail;
1368
+
13471369 frontswap_register_ops(&zswap_frontswap_ops);
13481370 if (zswap_debugfs_init())
13491371 pr_warn("debugfs initialization failed\n");
13501372 return 0;
13511373
1374
+fallback_fail:
1375
+ if (pool)
1376
+ zswap_pool_destroy(pool);
13521377 hp_fail:
13531378 cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE);
13541379 dstmem_fail: