.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * zswap.c - zswap driver file |
---|
3 | 4 | * |
---|
.. | .. |
---|
8 | 9 | * than reading from the swap device, can also improve workload performance. |
---|
9 | 10 | * |
---|
10 | 11 | * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> |
---|
11 | | - * |
---|
12 | | - * This program is free software; you can redistribute it and/or |
---|
13 | | - * modify it under the terms of the GNU General Public License |
---|
14 | | - * as published by the Free Software Foundation; either version 2 |
---|
15 | | - * of the License, or (at your option) any later version. |
---|
16 | | - * |
---|
17 | | - * This program is distributed in the hope that it will be useful, |
---|
18 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
19 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
20 | | - * GNU General Public License for more details. |
---|
21 | 12 | */ |
---|
22 | 13 | |
---|
23 | 14 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
---|
.. | .. |
---|
27 | 18 | #include <linux/highmem.h> |
---|
28 | 19 | #include <linux/slab.h> |
---|
29 | 20 | #include <linux/spinlock.h> |
---|
30 | | -#include <linux/locallock.h> |
---|
31 | 21 | #include <linux/types.h> |
---|
32 | 22 | #include <linux/atomic.h> |
---|
33 | 23 | #include <linux/frontswap.h> |
---|
.. | .. |
---|
42 | 32 | #include <linux/swapops.h> |
---|
43 | 33 | #include <linux/writeback.h> |
---|
44 | 34 | #include <linux/pagemap.h> |
---|
| 35 | +#include <linux/workqueue.h> |
---|
45 | 36 | |
---|
46 | 37 | /********************************* |
---|
47 | 38 | * statistics |
---|
.. | .. |
---|
75 | 66 | /* Duplicate store was encountered (rare) */ |
---|
76 | 67 | static u64 zswap_duplicate_entry; |
---|
77 | 68 | |
---|
| 69 | +/* Shrinker work queue */ |
---|
| 70 | +static struct workqueue_struct *shrink_wq; |
---|
| 71 | +/* Pool limit was hit, we need to calm down */ |
---|
| 72 | +static bool zswap_pool_reached_full; |
---|
| 73 | + |
---|
78 | 74 | /********************************* |
---|
79 | 75 | * tunables |
---|
80 | 76 | **********************************/ |
---|
81 | 77 | |
---|
82 | 78 | #define ZSWAP_PARAM_UNSET "" |
---|
83 | 79 | |
---|
84 | | -/* Enable/disable zswap (disabled by default) */ |
---|
85 | | -static bool zswap_enabled; |
---|
| 80 | +/* Enable/disable zswap */ |
---|
| 81 | +static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON); |
---|
86 | 82 | static int zswap_enabled_param_set(const char *, |
---|
87 | 83 | const struct kernel_param *); |
---|
88 | 84 | static struct kernel_param_ops zswap_enabled_param_ops = { |
---|
.. | .. |
---|
92 | 88 | module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); |
---|
93 | 89 | |
---|
94 | 90 | /* Crypto compressor to use */ |
---|
95 | | -#define ZSWAP_COMPRESSOR_DEFAULT "lzo" |
---|
96 | | -static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; |
---|
| 91 | +static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; |
---|
97 | 92 | static int zswap_compressor_param_set(const char *, |
---|
98 | 93 | const struct kernel_param *); |
---|
99 | 94 | static struct kernel_param_ops zswap_compressor_param_ops = { |
---|
.. | .. |
---|
105 | 100 | &zswap_compressor, 0644); |
---|
106 | 101 | |
---|
107 | 102 | /* Compressed storage zpool to use */ |
---|
108 | | -#define ZSWAP_ZPOOL_DEFAULT "zbud" |
---|
109 | | -static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT; |
---|
| 103 | +static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; |
---|
110 | 104 | static int zswap_zpool_param_set(const char *, const struct kernel_param *); |
---|
111 | 105 | static struct kernel_param_ops zswap_zpool_param_ops = { |
---|
112 | 106 | .set = zswap_zpool_param_set, |
---|
.. | .. |
---|
118 | 112 | /* The maximum percentage of memory that the compressed pool can occupy */ |
---|
119 | 113 | static unsigned int zswap_max_pool_percent = 20; |
---|
120 | 114 | module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644); |
---|
| 115 | + |
---|
| 116 | +/* The threshold for accepting new pages after the max_pool_percent was hit */ |
---|
| 117 | +static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ |
---|
| 118 | +module_param_named(accept_threshold_percent, zswap_accept_thr_percent, |
---|
| 119 | + uint, 0644); |
---|
121 | 120 | |
---|
122 | 121 | /* Enable/disable handling same-value filled pages (enabled by default) */ |
---|
123 | 122 | static bool zswap_same_filled_pages_enabled = true; |
---|
.. | .. |
---|
133 | 132 | struct crypto_comp * __percpu *tfm; |
---|
134 | 133 | struct kref kref; |
---|
135 | 134 | struct list_head list; |
---|
136 | | - struct work_struct work; |
---|
| 135 | + struct work_struct release_work; |
---|
| 136 | + struct work_struct shrink_work; |
---|
137 | 137 | struct hlist_node node; |
---|
138 | 138 | char tfm_name[CRYPTO_MAX_ALG_NAME]; |
---|
139 | 139 | }; |
---|
.. | .. |
---|
220 | 220 | |
---|
221 | 221 | static bool zswap_is_full(void) |
---|
222 | 222 | { |
---|
223 | | - return totalram_pages * zswap_max_pool_percent / 100 < |
---|
224 | | - DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); |
---|
| 223 | + return totalram_pages() * zswap_max_pool_percent / 100 < |
---|
| 224 | + DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); |
---|
| 225 | +} |
---|
| 226 | + |
---|
| 227 | +static bool zswap_can_accept(void) |
---|
| 228 | +{ |
---|
| 229 | + return totalram_pages() * zswap_accept_thr_percent / 100 * |
---|
| 230 | + zswap_max_pool_percent / 100 > |
---|
| 231 | + DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); |
---|
225 | 232 | } |
---|
226 | 233 | |
---|
227 | 234 | static void zswap_update_total_size(void) |
---|
.. | .. |
---|
511 | 518 | return NULL; |
---|
512 | 519 | } |
---|
513 | 520 | |
---|
| 521 | +static void shrink_worker(struct work_struct *w) |
---|
| 522 | +{ |
---|
| 523 | + struct zswap_pool *pool = container_of(w, typeof(*pool), |
---|
| 524 | + shrink_work); |
---|
| 525 | + |
---|
| 526 | + if (zpool_shrink(pool->zpool, 1, NULL)) |
---|
| 527 | + zswap_reject_reclaim_fail++; |
---|
| 528 | + zswap_pool_put(pool); |
---|
| 529 | +} |
---|
| 530 | + |
---|
514 | 531 | static struct zswap_pool *zswap_pool_create(char *type, char *compressor) |
---|
515 | 532 | { |
---|
516 | 533 | struct zswap_pool *pool; |
---|
.. | .. |
---|
561 | 578 | */ |
---|
562 | 579 | kref_init(&pool->kref); |
---|
563 | 580 | INIT_LIST_HEAD(&pool->list); |
---|
| 581 | + INIT_WORK(&pool->shrink_work, shrink_worker); |
---|
564 | 582 | |
---|
565 | 583 | zswap_pool_debug("created", pool); |
---|
566 | 584 | |
---|
.. | .. |
---|
579 | 597 | bool has_comp, has_zpool; |
---|
580 | 598 | |
---|
581 | 599 | has_comp = crypto_has_comp(zswap_compressor, 0, 0); |
---|
582 | | - if (!has_comp && strcmp(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT)) { |
---|
| 600 | + if (!has_comp && strcmp(zswap_compressor, |
---|
| 601 | + CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) { |
---|
583 | 602 | pr_err("compressor %s not available, using default %s\n", |
---|
584 | | - zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT); |
---|
| 603 | + zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT); |
---|
585 | 604 | param_free_charp(&zswap_compressor); |
---|
586 | | - zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; |
---|
| 605 | + zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; |
---|
587 | 606 | has_comp = crypto_has_comp(zswap_compressor, 0, 0); |
---|
588 | 607 | } |
---|
589 | 608 | if (!has_comp) { |
---|
.. | .. |
---|
594 | 613 | } |
---|
595 | 614 | |
---|
596 | 615 | has_zpool = zpool_has_pool(zswap_zpool_type); |
---|
597 | | - if (!has_zpool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) { |
---|
| 616 | + if (!has_zpool && strcmp(zswap_zpool_type, |
---|
| 617 | + CONFIG_ZSWAP_ZPOOL_DEFAULT)) { |
---|
598 | 618 | pr_err("zpool %s not available, using default %s\n", |
---|
599 | | - zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT); |
---|
| 619 | + zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT); |
---|
600 | 620 | param_free_charp(&zswap_zpool_type); |
---|
601 | | - zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT; |
---|
| 621 | + zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; |
---|
602 | 622 | has_zpool = zpool_has_pool(zswap_zpool_type); |
---|
603 | 623 | } |
---|
604 | 624 | if (!has_zpool) { |
---|
.. | .. |
---|
634 | 654 | |
---|
635 | 655 | static void __zswap_pool_release(struct work_struct *work) |
---|
636 | 656 | { |
---|
637 | | - struct zswap_pool *pool = container_of(work, typeof(*pool), work); |
---|
| 657 | + struct zswap_pool *pool = container_of(work, typeof(*pool), |
---|
| 658 | + release_work); |
---|
638 | 659 | |
---|
639 | 660 | synchronize_rcu(); |
---|
640 | 661 | |
---|
.. | .. |
---|
657 | 678 | |
---|
658 | 679 | list_del_rcu(&pool->list); |
---|
659 | 680 | |
---|
660 | | - INIT_WORK(&pool->work, __zswap_pool_release); |
---|
661 | | - schedule_work(&pool->work); |
---|
| 681 | + INIT_WORK(&pool->release_work, __zswap_pool_release); |
---|
| 682 | + schedule_work(&pool->release_work); |
---|
662 | 683 | |
---|
663 | 684 | spin_unlock(&zswap_pools_lock); |
---|
664 | 685 | } |
---|
.. | .. |
---|
866 | 887 | /* extract swpentry from data */ |
---|
867 | 888 | zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO); |
---|
868 | 889 | swpentry = zhdr->swpentry; /* here */ |
---|
869 | | - zpool_unmap_handle(pool, handle); |
---|
870 | 890 | tree = zswap_trees[swp_type(swpentry)]; |
---|
871 | 891 | offset = swp_offset(swpentry); |
---|
872 | 892 | |
---|
.. | .. |
---|
876 | 896 | if (!entry) { |
---|
877 | 897 | /* entry was invalidated */ |
---|
878 | 898 | spin_unlock(&tree->lock); |
---|
| 899 | + zpool_unmap_handle(pool, handle); |
---|
879 | 900 | return 0; |
---|
880 | 901 | } |
---|
881 | 902 | spin_unlock(&tree->lock); |
---|
.. | .. |
---|
896 | 917 | case ZSWAP_SWAPCACHE_NEW: /* page is locked */ |
---|
897 | 918 | /* decompress */ |
---|
898 | 919 | dlen = PAGE_SIZE; |
---|
899 | | - src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle, |
---|
900 | | - ZPOOL_MM_RO) + sizeof(struct zswap_header); |
---|
| 920 | + src = (u8 *)zhdr + sizeof(struct zswap_header); |
---|
901 | 921 | dst = kmap_atomic(page); |
---|
902 | 922 | tfm = *get_cpu_ptr(entry->pool->tfm); |
---|
903 | 923 | ret = crypto_comp_decompress(tfm, src, entry->length, |
---|
904 | 924 | dst, &dlen); |
---|
905 | 925 | put_cpu_ptr(entry->pool->tfm); |
---|
906 | 926 | kunmap_atomic(dst); |
---|
907 | | - zpool_unmap_handle(entry->pool->zpool, entry->handle); |
---|
908 | 927 | BUG_ON(ret); |
---|
909 | 928 | BUG_ON(dlen != PAGE_SIZE); |
---|
910 | 929 | |
---|
.. | .. |
---|
950 | 969 | spin_unlock(&tree->lock); |
---|
951 | 970 | |
---|
952 | 971 | end: |
---|
953 | | - return ret; |
---|
954 | | -} |
---|
955 | | - |
---|
956 | | -static int zswap_shrink(void) |
---|
957 | | -{ |
---|
958 | | - struct zswap_pool *pool; |
---|
959 | | - int ret; |
---|
960 | | - |
---|
961 | | - pool = zswap_pool_last_get(); |
---|
962 | | - if (!pool) |
---|
963 | | - return -ENOENT; |
---|
964 | | - |
---|
965 | | - ret = zpool_shrink(pool->zpool, 1, NULL); |
---|
966 | | - |
---|
967 | | - zswap_pool_put(pool); |
---|
968 | | - |
---|
| 972 | + zpool_unmap_handle(pool, handle); |
---|
969 | 973 | return ret; |
---|
970 | 974 | } |
---|
971 | 975 | |
---|
.. | .. |
---|
991 | 995 | memset_l(page, value, PAGE_SIZE / sizeof(unsigned long)); |
---|
992 | 996 | } |
---|
993 | 997 | |
---|
994 | | -/* protect zswap_dstmem from concurrency */ |
---|
995 | | -static DEFINE_LOCAL_IRQ_LOCK(zswap_dstmem_lock); |
---|
996 | 998 | /********************************* |
---|
997 | 999 | * frontswap hooks |
---|
998 | 1000 | **********************************/ |
---|
.. | .. |
---|
1009 | 1011 | char *buf; |
---|
1010 | 1012 | u8 *src, *dst; |
---|
1011 | 1013 | struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) }; |
---|
| 1014 | + gfp_t gfp; |
---|
1012 | 1015 | |
---|
1013 | 1016 | /* THP isn't supported */ |
---|
1014 | 1017 | if (PageTransHuge(page)) { |
---|
.. | .. |
---|
1023 | 1026 | |
---|
1024 | 1027 | /* reclaim space if needed */ |
---|
1025 | 1028 | if (zswap_is_full()) { |
---|
1026 | | - zswap_pool_limit_hit++; |
---|
1027 | | - if (zswap_shrink()) { |
---|
1028 | | - zswap_reject_reclaim_fail++; |
---|
1029 | | - ret = -ENOMEM; |
---|
1030 | | - goto reject; |
---|
1031 | | - } |
---|
| 1029 | + struct zswap_pool *pool; |
---|
1032 | 1030 | |
---|
1033 | | - /* A second zswap_is_full() check after |
---|
1034 | | - * zswap_shrink() to make sure it's now |
---|
1035 | | - * under the max_pool_percent |
---|
1036 | | - */ |
---|
1037 | | - if (zswap_is_full()) { |
---|
| 1031 | + zswap_pool_limit_hit++; |
---|
| 1032 | + zswap_pool_reached_full = true; |
---|
| 1033 | + pool = zswap_pool_last_get(); |
---|
| 1034 | + if (pool) |
---|
| 1035 | + queue_work(shrink_wq, &pool->shrink_work); |
---|
| 1036 | + ret = -ENOMEM; |
---|
| 1037 | + goto reject; |
---|
| 1038 | + } |
---|
| 1039 | + |
---|
| 1040 | + if (zswap_pool_reached_full) { |
---|
| 1041 | + if (!zswap_can_accept()) { |
---|
1038 | 1042 | ret = -ENOMEM; |
---|
1039 | 1043 | goto reject; |
---|
1040 | | - } |
---|
| 1044 | + } else |
---|
| 1045 | + zswap_pool_reached_full = false; |
---|
1041 | 1046 | } |
---|
1042 | 1047 | |
---|
1043 | 1048 | /* allocate entry */ |
---|
.. | .. |
---|
1069 | 1074 | } |
---|
1070 | 1075 | |
---|
1071 | 1076 | /* compress */ |
---|
1072 | | - dst = get_locked_var(zswap_dstmem_lock, zswap_dstmem); |
---|
1073 | | - tfm = *this_cpu_ptr(entry->pool->tfm); |
---|
| 1077 | + dst = get_cpu_var(zswap_dstmem); |
---|
| 1078 | + tfm = *get_cpu_ptr(entry->pool->tfm); |
---|
1074 | 1079 | src = kmap_atomic(page); |
---|
1075 | 1080 | ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen); |
---|
1076 | 1081 | kunmap_atomic(src); |
---|
| 1082 | + put_cpu_ptr(entry->pool->tfm); |
---|
1077 | 1083 | if (ret) { |
---|
1078 | 1084 | ret = -EINVAL; |
---|
1079 | 1085 | goto put_dstmem; |
---|
.. | .. |
---|
1081 | 1087 | |
---|
1082 | 1088 | /* store */ |
---|
1083 | 1089 | hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0; |
---|
1084 | | - ret = zpool_malloc(entry->pool->zpool, hlen + dlen, |
---|
1085 | | - __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM, |
---|
1086 | | - &handle); |
---|
| 1090 | + gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; |
---|
| 1091 | + if (zpool_malloc_support_movable(entry->pool->zpool)) |
---|
| 1092 | + gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; |
---|
| 1093 | + ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle); |
---|
1087 | 1094 | if (ret == -ENOSPC) { |
---|
1088 | 1095 | zswap_reject_compress_poor++; |
---|
1089 | 1096 | goto put_dstmem; |
---|
.. | .. |
---|
1096 | 1103 | memcpy(buf, &zhdr, hlen); |
---|
1097 | 1104 | memcpy(buf + hlen, dst, dlen); |
---|
1098 | 1105 | zpool_unmap_handle(entry->pool->zpool, handle); |
---|
1099 | | - put_locked_var(zswap_dstmem_lock, zswap_dstmem); |
---|
| 1106 | + put_cpu_var(zswap_dstmem); |
---|
1100 | 1107 | |
---|
1101 | 1108 | /* populate entry */ |
---|
1102 | 1109 | entry->offset = offset; |
---|
.. | .. |
---|
1124 | 1131 | return 0; |
---|
1125 | 1132 | |
---|
1126 | 1133 | put_dstmem: |
---|
1127 | | - put_locked_var(zswap_dstmem_lock, zswap_dstmem); |
---|
| 1134 | + put_cpu_var(zswap_dstmem); |
---|
1128 | 1135 | zswap_pool_put(entry->pool); |
---|
1129 | 1136 | freepage: |
---|
1130 | 1137 | zswap_entry_cache_free(entry); |
---|
.. | .. |
---|
1264 | 1271 | return -ENODEV; |
---|
1265 | 1272 | |
---|
1266 | 1273 | zswap_debugfs_root = debugfs_create_dir("zswap", NULL); |
---|
1267 | | - if (!zswap_debugfs_root) |
---|
1268 | | - return -ENOMEM; |
---|
1269 | 1274 | |
---|
1270 | 1275 | debugfs_create_u64("pool_limit_hit", 0444, |
---|
1271 | 1276 | zswap_debugfs_root, &zswap_pool_limit_hit); |
---|
.. | .. |
---|
1344 | 1349 | zswap_enabled = false; |
---|
1345 | 1350 | } |
---|
1346 | 1351 | |
---|
| 1352 | + shrink_wq = create_workqueue("zswap-shrink"); |
---|
| 1353 | + if (!shrink_wq) |
---|
| 1354 | + goto fallback_fail; |
---|
| 1355 | + |
---|
1347 | 1356 | frontswap_register_ops(&zswap_frontswap_ops); |
---|
1348 | 1357 | if (zswap_debugfs_init()) |
---|
1349 | 1358 | pr_warn("debugfs initialization failed\n"); |
---|
1350 | 1359 | return 0; |
---|
1351 | 1360 | |
---|
| 1361 | +fallback_fail: |
---|
| 1362 | + if (pool) |
---|
| 1363 | + zswap_pool_destroy(pool); |
---|
1352 | 1364 | hp_fail: |
---|
1353 | 1365 | cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE); |
---|
1354 | 1366 | dstmem_fail: |
---|