| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * zswap.c - zswap driver file |
|---|
| 3 | 4 | * |
|---|
| .. | .. |
|---|
| 8 | 9 | * than reading from the swap device, can also improve workload performance. |
|---|
| 9 | 10 | * |
|---|
| 10 | 11 | * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> |
|---|
| 11 | | - * |
|---|
| 12 | | - * This program is free software; you can redistribute it and/or |
|---|
| 13 | | - * modify it under the terms of the GNU General Public License |
|---|
| 14 | | - * as published by the Free Software Foundation; either version 2 |
|---|
| 15 | | - * of the License, or (at your option) any later version. |
|---|
| 16 | | - * |
|---|
| 17 | | - * This program is distributed in the hope that it will be useful, |
|---|
| 18 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 19 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 20 | | - * GNU General Public License for more details. |
|---|
| 21 | 12 | */ |
|---|
| 22 | 13 | |
|---|
| 23 | 14 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
|---|
| .. | .. |
|---|
| 41 | 32 | #include <linux/swapops.h> |
|---|
| 42 | 33 | #include <linux/writeback.h> |
|---|
| 43 | 34 | #include <linux/pagemap.h> |
|---|
| 35 | +#include <linux/workqueue.h> |
|---|
| 44 | 36 | |
|---|
| 45 | 37 | /********************************* |
|---|
| 46 | 38 | * statistics |
|---|
| .. | .. |
|---|
| 74 | 66 | /* Duplicate store was encountered (rare) */ |
|---|
| 75 | 67 | static u64 zswap_duplicate_entry; |
|---|
| 76 | 68 | |
|---|
| 69 | +/* Shrinker work queue */ |
|---|
| 70 | +static struct workqueue_struct *shrink_wq; |
|---|
| 71 | +/* Pool limit was hit, we need to calm down */ |
|---|
| 72 | +static bool zswap_pool_reached_full; |
|---|
| 73 | + |
|---|
| 77 | 74 | /********************************* |
|---|
| 78 | 75 | * tunables |
|---|
| 79 | 76 | **********************************/ |
|---|
| 80 | 77 | |
|---|
| 81 | 78 | #define ZSWAP_PARAM_UNSET "" |
|---|
| 82 | 79 | |
|---|
| 83 | | -/* Enable/disable zswap (disabled by default) */ |
|---|
| 84 | | -static bool zswap_enabled; |
|---|
| 80 | +/* Enable/disable zswap */ |
|---|
| 81 | +static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON); |
|---|
| 85 | 82 | static int zswap_enabled_param_set(const char *, |
|---|
| 86 | 83 | const struct kernel_param *); |
|---|
| 87 | 84 | static struct kernel_param_ops zswap_enabled_param_ops = { |
|---|
| .. | .. |
|---|
| 91 | 88 | module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); |
|---|
| 92 | 89 | |
|---|
| 93 | 90 | /* Crypto compressor to use */ |
|---|
| 94 | | -#define ZSWAP_COMPRESSOR_DEFAULT "lzo" |
|---|
| 95 | | -static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; |
|---|
| 91 | +static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; |
|---|
| 96 | 92 | static int zswap_compressor_param_set(const char *, |
|---|
| 97 | 93 | const struct kernel_param *); |
|---|
| 98 | 94 | static struct kernel_param_ops zswap_compressor_param_ops = { |
|---|
| .. | .. |
|---|
| 104 | 100 | &zswap_compressor, 0644); |
|---|
| 105 | 101 | |
|---|
| 106 | 102 | /* Compressed storage zpool to use */ |
|---|
| 107 | | -#define ZSWAP_ZPOOL_DEFAULT "zbud" |
|---|
| 108 | | -static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT; |
|---|
| 103 | +static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; |
|---|
| 109 | 104 | static int zswap_zpool_param_set(const char *, const struct kernel_param *); |
|---|
| 110 | 105 | static struct kernel_param_ops zswap_zpool_param_ops = { |
|---|
| 111 | 106 | .set = zswap_zpool_param_set, |
|---|
| .. | .. |
|---|
| 117 | 112 | /* The maximum percentage of memory that the compressed pool can occupy */ |
|---|
| 118 | 113 | static unsigned int zswap_max_pool_percent = 20; |
|---|
| 119 | 114 | module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644); |
|---|
| 115 | + |
|---|
| 116 | +/* The threshold for accepting new pages after the max_pool_percent was hit */ |
|---|
| 117 | +static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ |
|---|
| 118 | +module_param_named(accept_threshold_percent, zswap_accept_thr_percent, |
|---|
| 119 | + uint, 0644); |
|---|
| 120 | 120 | |
|---|
| 121 | 121 | /* Enable/disable handling same-value filled pages (enabled by default) */ |
|---|
| 122 | 122 | static bool zswap_same_filled_pages_enabled = true; |
|---|
| .. | .. |
|---|
| 132 | 132 | struct crypto_comp * __percpu *tfm; |
|---|
| 133 | 133 | struct kref kref; |
|---|
| 134 | 134 | struct list_head list; |
|---|
| 135 | | - struct work_struct work; |
|---|
| 135 | + struct work_struct release_work; |
|---|
| 136 | + struct work_struct shrink_work; |
|---|
| 136 | 137 | struct hlist_node node; |
|---|
| 137 | 138 | char tfm_name[CRYPTO_MAX_ALG_NAME]; |
|---|
| 138 | 139 | }; |
|---|
| .. | .. |
|---|
| 219 | 220 | |
|---|
| 220 | 221 | static bool zswap_is_full(void) |
|---|
| 221 | 222 | { |
|---|
| 222 | | - return totalram_pages * zswap_max_pool_percent / 100 < |
|---|
| 223 | | - DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); |
|---|
| 223 | + return totalram_pages() * zswap_max_pool_percent / 100 < |
|---|
| 224 | + DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); |
|---|
| 225 | +} |
|---|
| 226 | + |
|---|
| 227 | +static bool zswap_can_accept(void) |
|---|
| 228 | +{ |
|---|
| 229 | + return totalram_pages() * zswap_accept_thr_percent / 100 * |
|---|
| 230 | + zswap_max_pool_percent / 100 > |
|---|
| 231 | + DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); |
|---|
| 224 | 232 | } |
|---|
| 225 | 233 | |
|---|
| 226 | 234 | static void zswap_update_total_size(void) |
|---|
| .. | .. |
|---|
| 510 | 518 | return NULL; |
|---|
| 511 | 519 | } |
|---|
| 512 | 520 | |
|---|
| 521 | +static void shrink_worker(struct work_struct *w) |
|---|
| 522 | +{ |
|---|
| 523 | + struct zswap_pool *pool = container_of(w, typeof(*pool), |
|---|
| 524 | + shrink_work); |
|---|
| 525 | + |
|---|
| 526 | + if (zpool_shrink(pool->zpool, 1, NULL)) |
|---|
| 527 | + zswap_reject_reclaim_fail++; |
|---|
| 528 | + zswap_pool_put(pool); |
|---|
| 529 | +} |
|---|
| 530 | + |
|---|
| 513 | 531 | static struct zswap_pool *zswap_pool_create(char *type, char *compressor) |
|---|
| 514 | 532 | { |
|---|
| 515 | 533 | struct zswap_pool *pool; |
|---|
| .. | .. |
|---|
| 560 | 578 | */ |
|---|
| 561 | 579 | kref_init(&pool->kref); |
|---|
| 562 | 580 | INIT_LIST_HEAD(&pool->list); |
|---|
| 581 | + INIT_WORK(&pool->shrink_work, shrink_worker); |
|---|
| 563 | 582 | |
|---|
| 564 | 583 | zswap_pool_debug("created", pool); |
|---|
| 565 | 584 | |
|---|
| .. | .. |
|---|
| 578 | 597 | bool has_comp, has_zpool; |
|---|
| 579 | 598 | |
|---|
| 580 | 599 | has_comp = crypto_has_comp(zswap_compressor, 0, 0); |
|---|
| 581 | | - if (!has_comp && strcmp(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT)) { |
|---|
| 600 | + if (!has_comp && strcmp(zswap_compressor, |
|---|
| 601 | + CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) { |
|---|
| 582 | 602 | pr_err("compressor %s not available, using default %s\n", |
|---|
| 583 | | - zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT); |
|---|
| 603 | + zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT); |
|---|
| 584 | 604 | param_free_charp(&zswap_compressor); |
|---|
| 585 | | - zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; |
|---|
| 605 | + zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; |
|---|
| 586 | 606 | has_comp = crypto_has_comp(zswap_compressor, 0, 0); |
|---|
| 587 | 607 | } |
|---|
| 588 | 608 | if (!has_comp) { |
|---|
| .. | .. |
|---|
| 593 | 613 | } |
|---|
| 594 | 614 | |
|---|
| 595 | 615 | has_zpool = zpool_has_pool(zswap_zpool_type); |
|---|
| 596 | | - if (!has_zpool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) { |
|---|
| 616 | + if (!has_zpool && strcmp(zswap_zpool_type, |
|---|
| 617 | + CONFIG_ZSWAP_ZPOOL_DEFAULT)) { |
|---|
| 597 | 618 | pr_err("zpool %s not available, using default %s\n", |
|---|
| 598 | | - zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT); |
|---|
| 619 | + zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT); |
|---|
| 599 | 620 | param_free_charp(&zswap_zpool_type); |
|---|
| 600 | | - zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT; |
|---|
| 621 | + zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; |
|---|
| 601 | 622 | has_zpool = zpool_has_pool(zswap_zpool_type); |
|---|
| 602 | 623 | } |
|---|
| 603 | 624 | if (!has_zpool) { |
|---|
| .. | .. |
|---|
| 633 | 654 | |
|---|
| 634 | 655 | static void __zswap_pool_release(struct work_struct *work) |
|---|
| 635 | 656 | { |
|---|
| 636 | | - struct zswap_pool *pool = container_of(work, typeof(*pool), work); |
|---|
| 657 | + struct zswap_pool *pool = container_of(work, typeof(*pool), |
|---|
| 658 | + release_work); |
|---|
| 637 | 659 | |
|---|
| 638 | 660 | synchronize_rcu(); |
|---|
| 639 | 661 | |
|---|
| .. | .. |
|---|
| 656 | 678 | |
|---|
| 657 | 679 | list_del_rcu(&pool->list); |
|---|
| 658 | 680 | |
|---|
| 659 | | - INIT_WORK(&pool->work, __zswap_pool_release); |
|---|
| 660 | | - schedule_work(&pool->work); |
|---|
| 681 | + INIT_WORK(&pool->release_work, __zswap_pool_release); |
|---|
| 682 | + schedule_work(&pool->release_work); |
|---|
| 661 | 683 | |
|---|
| 662 | 684 | spin_unlock(&zswap_pools_lock); |
|---|
| 663 | 685 | } |
|---|
| .. | .. |
|---|
| 865 | 887 | /* extract swpentry from data */ |
|---|
| 866 | 888 | zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO); |
|---|
| 867 | 889 | swpentry = zhdr->swpentry; /* here */ |
|---|
| 868 | | - zpool_unmap_handle(pool, handle); |
|---|
| 869 | 890 | tree = zswap_trees[swp_type(swpentry)]; |
|---|
| 870 | 891 | offset = swp_offset(swpentry); |
|---|
| 871 | 892 | |
|---|
| .. | .. |
|---|
| 875 | 896 | if (!entry) { |
|---|
| 876 | 897 | /* entry was invalidated */ |
|---|
| 877 | 898 | spin_unlock(&tree->lock); |
|---|
| 899 | + zpool_unmap_handle(pool, handle); |
|---|
| 878 | 900 | return 0; |
|---|
| 879 | 901 | } |
|---|
| 880 | 902 | spin_unlock(&tree->lock); |
|---|
| .. | .. |
|---|
| 895 | 917 | case ZSWAP_SWAPCACHE_NEW: /* page is locked */ |
|---|
| 896 | 918 | /* decompress */ |
|---|
| 897 | 919 | dlen = PAGE_SIZE; |
|---|
| 898 | | - src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle, |
|---|
| 899 | | - ZPOOL_MM_RO) + sizeof(struct zswap_header); |
|---|
| 920 | + src = (u8 *)zhdr + sizeof(struct zswap_header); |
|---|
| 900 | 921 | dst = kmap_atomic(page); |
|---|
| 901 | 922 | tfm = *get_cpu_ptr(entry->pool->tfm); |
|---|
| 902 | 923 | ret = crypto_comp_decompress(tfm, src, entry->length, |
|---|
| 903 | 924 | dst, &dlen); |
|---|
| 904 | 925 | put_cpu_ptr(entry->pool->tfm); |
|---|
| 905 | 926 | kunmap_atomic(dst); |
|---|
| 906 | | - zpool_unmap_handle(entry->pool->zpool, entry->handle); |
|---|
| 907 | 927 | BUG_ON(ret); |
|---|
| 908 | 928 | BUG_ON(dlen != PAGE_SIZE); |
|---|
| 909 | 929 | |
|---|
| .. | .. |
|---|
| 949 | 969 | spin_unlock(&tree->lock); |
|---|
| 950 | 970 | |
|---|
| 951 | 971 | end: |
|---|
| 952 | | - return ret; |
|---|
| 953 | | -} |
|---|
| 954 | | - |
|---|
| 955 | | -static int zswap_shrink(void) |
|---|
| 956 | | -{ |
|---|
| 957 | | - struct zswap_pool *pool; |
|---|
| 958 | | - int ret; |
|---|
| 959 | | - |
|---|
| 960 | | - pool = zswap_pool_last_get(); |
|---|
| 961 | | - if (!pool) |
|---|
| 962 | | - return -ENOENT; |
|---|
| 963 | | - |
|---|
| 964 | | - ret = zpool_shrink(pool->zpool, 1, NULL); |
|---|
| 965 | | - |
|---|
| 966 | | - zswap_pool_put(pool); |
|---|
| 967 | | - |
|---|
| 972 | + zpool_unmap_handle(pool, handle); |
|---|
| 968 | 973 | return ret; |
|---|
| 969 | 974 | } |
|---|
| 970 | 975 | |
|---|
| .. | .. |
|---|
| 1006 | 1011 | char *buf; |
|---|
| 1007 | 1012 | u8 *src, *dst; |
|---|
| 1008 | 1013 | struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) }; |
|---|
| 1014 | + gfp_t gfp; |
|---|
| 1009 | 1015 | |
|---|
| 1010 | 1016 | /* THP isn't supported */ |
|---|
| 1011 | 1017 | if (PageTransHuge(page)) { |
|---|
| .. | .. |
|---|
| 1020 | 1026 | |
|---|
| 1021 | 1027 | /* reclaim space if needed */ |
|---|
| 1022 | 1028 | if (zswap_is_full()) { |
|---|
| 1023 | | - zswap_pool_limit_hit++; |
|---|
| 1024 | | - if (zswap_shrink()) { |
|---|
| 1025 | | - zswap_reject_reclaim_fail++; |
|---|
| 1026 | | - ret = -ENOMEM; |
|---|
| 1027 | | - goto reject; |
|---|
| 1028 | | - } |
|---|
| 1029 | + struct zswap_pool *pool; |
|---|
| 1029 | 1030 | |
|---|
| 1030 | | - /* A second zswap_is_full() check after |
|---|
| 1031 | | - * zswap_shrink() to make sure it's now |
|---|
| 1032 | | - * under the max_pool_percent |
|---|
| 1033 | | - */ |
|---|
| 1034 | | - if (zswap_is_full()) { |
|---|
| 1031 | + zswap_pool_limit_hit++; |
|---|
| 1032 | + zswap_pool_reached_full = true; |
|---|
| 1033 | + pool = zswap_pool_last_get(); |
|---|
| 1034 | + if (pool) |
|---|
| 1035 | + queue_work(shrink_wq, &pool->shrink_work); |
|---|
| 1036 | + ret = -ENOMEM; |
|---|
| 1037 | + goto reject; |
|---|
| 1038 | + } |
|---|
| 1039 | + |
|---|
| 1040 | + if (zswap_pool_reached_full) { |
|---|
| 1041 | + if (!zswap_can_accept()) { |
|---|
| 1035 | 1042 | ret = -ENOMEM; |
|---|
| 1036 | 1043 | goto reject; |
|---|
| 1037 | | - } |
|---|
| 1044 | + } else |
|---|
| 1045 | + zswap_pool_reached_full = false; |
|---|
| 1038 | 1046 | } |
|---|
| 1039 | 1047 | |
|---|
| 1040 | 1048 | /* allocate entry */ |
|---|
| .. | .. |
|---|
| 1079 | 1087 | |
|---|
| 1080 | 1088 | /* store */ |
|---|
| 1081 | 1089 | hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0; |
|---|
| 1082 | | - ret = zpool_malloc(entry->pool->zpool, hlen + dlen, |
|---|
| 1083 | | - __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM, |
|---|
| 1084 | | - &handle); |
|---|
| 1090 | + gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; |
|---|
| 1091 | + if (zpool_malloc_support_movable(entry->pool->zpool)) |
|---|
| 1092 | + gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; |
|---|
| 1093 | + ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle); |
|---|
| 1085 | 1094 | if (ret == -ENOSPC) { |
|---|
| 1086 | 1095 | zswap_reject_compress_poor++; |
|---|
| 1087 | 1096 | goto put_dstmem; |
|---|
| .. | .. |
|---|
| 1262 | 1271 | return -ENODEV; |
|---|
| 1263 | 1272 | |
|---|
| 1264 | 1273 | zswap_debugfs_root = debugfs_create_dir("zswap", NULL); |
|---|
| 1265 | | - if (!zswap_debugfs_root) |
|---|
| 1266 | | - return -ENOMEM; |
|---|
| 1267 | 1274 | |
|---|
| 1268 | 1275 | debugfs_create_u64("pool_limit_hit", 0444, |
|---|
| 1269 | 1276 | zswap_debugfs_root, &zswap_pool_limit_hit); |
|---|
| .. | .. |
|---|
| 1342 | 1349 | zswap_enabled = false; |
|---|
| 1343 | 1350 | } |
|---|
| 1344 | 1351 | |
|---|
| 1352 | + shrink_wq = create_workqueue("zswap-shrink"); |
|---|
| 1353 | + if (!shrink_wq) |
|---|
| 1354 | + goto fallback_fail; |
|---|
| 1355 | + |
|---|
| 1345 | 1356 | frontswap_register_ops(&zswap_frontswap_ops); |
|---|
| 1346 | 1357 | if (zswap_debugfs_init()) |
|---|
| 1347 | 1358 | pr_warn("debugfs initialization failed\n"); |
|---|
| 1348 | 1359 | return 0; |
|---|
| 1349 | 1360 | |
|---|
| 1361 | +fallback_fail: |
|---|
| 1362 | + if (pool) |
|---|
| 1363 | + zswap_pool_destroy(pool); |
|---|
| 1350 | 1364 | hp_fail: |
|---|
| 1351 | 1365 | cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE); |
|---|
| 1352 | 1366 | dstmem_fail: |
|---|