From 6778948f9de86c3cfaf36725a7c87dcff9ba247f Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Mon, 11 Dec 2023 08:20:59 +0000
Subject: [PATCH] kernel_5.10 no rt
---
kernel/mm/zsmalloc.c | 236 ++++++++++++++++-------------------------------------------
1 files changed, 64 insertions(+), 172 deletions(-)
diff --git a/kernel/mm/zsmalloc.c b/kernel/mm/zsmalloc.c
index 59d97e3..1b309c6 100644
--- a/kernel/mm/zsmalloc.c
+++ b/kernel/mm/zsmalloc.c
@@ -39,8 +39,8 @@
#include <linux/highmem.h>
#include <linux/string.h>
#include <linux/slab.h>
+#include <linux/pgtable.h>
#include <asm/tlbflush.h>
-#include <asm/pgtable.h>
#include <linux/cpumask.h>
#include <linux/cpu.h>
#include <linux/vmalloc.h>
@@ -52,11 +52,11 @@
#include <linux/zsmalloc.h>
#include <linux/zpool.h>
#include <linux/mount.h>
+#include <linux/pseudo_fs.h>
#include <linux/migrate.h>
#include <linux/wait.h>
#include <linux/pagemap.h>
#include <linux/fs.h>
-#include <linux/locallock.h>
#define ZSPAGE_MAGIC 0x58
@@ -74,25 +74,12 @@
*/
#define ZS_MAX_ZSPAGE_ORDER 2
#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
+
#define ZS_HANDLE_SIZE (sizeof(unsigned long))
-
-#ifdef CONFIG_PREEMPT_RT_FULL
-
-struct zsmalloc_handle {
- unsigned long addr;
- struct mutex lock;
-};
-
-#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle))
-
-#else
-
-#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long))
-#endif
/*
* Object location (<PFN>, <obj_idx>) is encoded as
- * as single (unsigned long) handle value.
+ * a single (unsigned long) handle value.
*
* Note that object index <obj_idx> starts from 0.
*
@@ -306,11 +293,7 @@
};
struct mapping_area {
-#ifdef CONFIG_PGTABLE_MAPPING
- struct vm_struct *vm; /* vm area for mapping object that span pages */
-#else
char *vm_buf; /* copy buffer for objects that span pages */
-#endif
char *vm_addr; /* address of kmap_atomic()'ed pages */
enum zs_mapmode vm_mm; /* mapping mode */
};
@@ -339,7 +322,7 @@
static int create_cache(struct zs_pool *pool)
{
- pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE,
+ pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
0, 0, NULL);
if (!pool->handle_cachep)
return 1;
@@ -363,26 +346,9 @@
static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
{
- void *p;
-
- p = kmem_cache_alloc(pool->handle_cachep,
- gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
-#ifdef CONFIG_PREEMPT_RT_FULL
- if (p) {
- struct zsmalloc_handle *zh = p;
-
- mutex_init(&zh->lock);
- }
-#endif
- return (unsigned long)p;
+ return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
+ gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA));
}
-
-#ifdef CONFIG_PREEMPT_RT_FULL
-static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle)
-{
- return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1));
-}
-#endif
static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
{
@@ -392,7 +358,7 @@
static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
{
return kmem_cache_alloc(pool->zspage_cachep,
- flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
+ flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA));
}
static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
@@ -402,18 +368,12 @@
static void record_obj(unsigned long handle, unsigned long obj)
{
-#ifdef CONFIG_PREEMPT_RT_FULL
- struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
-
- WRITE_ONCE(zh->addr, obj);
-#else
/*
* lsb of @obj represents handle lock while other bits
* represent object value the handle is pointing so
* updating shouldn't do store tearing.
*/
WRITE_ONCE(*(unsigned long *)handle, obj);
-#endif
}
/* zpool driver */
@@ -460,7 +420,7 @@
case ZPOOL_MM_WO:
zs_mm = ZS_MM_WO;
break;
- case ZPOOL_MM_RW: /* fallthru */
+ case ZPOOL_MM_RW:
default:
zs_mm = ZS_MM_RW;
break;
@@ -479,15 +439,16 @@
}
static struct zpool_driver zs_zpool_driver = {
- .type = "zsmalloc",
- .owner = THIS_MODULE,
- .create = zs_zpool_create,
- .destroy = zs_zpool_destroy,
- .malloc = zs_zpool_malloc,
- .free = zs_zpool_free,
- .map = zs_zpool_map,
- .unmap = zs_zpool_unmap,
- .total_size = zs_zpool_total_size,
+ .type = "zsmalloc",
+ .owner = THIS_MODULE,
+ .create = zs_zpool_create,
+ .destroy = zs_zpool_destroy,
+ .malloc_support_movable = true,
+ .malloc = zs_zpool_malloc,
+ .free = zs_zpool_free,
+ .map = zs_zpool_map,
+ .unmap = zs_zpool_unmap,
+ .total_size = zs_zpool_total_size,
};
MODULE_ALIAS("zpool-zsmalloc");
@@ -495,7 +456,6 @@
/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
-static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock);
static bool is_zspage_isolated(struct zspage *zspage)
{
@@ -513,10 +473,6 @@
return zspage->inuse;
}
-static inline void set_zspage_inuse(struct zspage *zspage, int val)
-{
- zspage->inuse = val;
-}
static inline void mod_zspage_inuse(struct zspage *zspage, int val)
{
@@ -618,8 +574,6 @@
}
zs_stat_root = debugfs_create_dir("zsmalloc", NULL);
- if (!zs_stat_root)
- pr_warn("debugfs 'zsmalloc' stat dir creation failed\n");
}
static void __exit zs_stat_exit(void)
@@ -690,29 +644,15 @@
static void zs_pool_stat_create(struct zs_pool *pool, const char *name)
{
- struct dentry *entry;
-
if (!zs_stat_root) {
pr_warn("no root stat dir, not creating <%s> stat dir\n", name);
return;
}
- entry = debugfs_create_dir(name, zs_stat_root);
- if (!entry) {
- pr_warn("debugfs dir <%s> creation failed\n", name);
- return;
- }
- pool->stat_dentry = entry;
+ pool->stat_dentry = debugfs_create_dir(name, zs_stat_root);
- entry = debugfs_create_file("classes", S_IFREG | 0444,
- pool->stat_dentry, pool,
- &zs_stats_size_fops);
- if (!entry) {
- pr_warn("%s: debugfs file entry <%s> creation failed\n",
- name, "classes");
- debugfs_remove_recursive(pool->stat_dentry);
- pool->stat_dentry = NULL;
- }
+ debugfs_create_file("classes", S_IFREG | 0444, pool->stat_dentry, pool,
+ &zs_stats_size_fops);
}
static void zs_pool_stat_destroy(struct zs_pool *pool)
@@ -925,13 +865,7 @@
static unsigned long handle_to_obj(unsigned long handle)
{
-#ifdef CONFIG_PREEMPT_RT_FULL
- struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
-
- return zh->addr;
-#else
return *(unsigned long *)handle;
-#endif
}
static unsigned long obj_to_head(struct page *page, void *obj)
@@ -945,46 +879,22 @@
static inline int testpin_tag(unsigned long handle)
{
-#ifdef CONFIG_PREEMPT_RT_FULL
- struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
-
- return mutex_is_locked(&zh->lock);
-#else
return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
-#endif
}
static inline int trypin_tag(unsigned long handle)
{
-#ifdef CONFIG_PREEMPT_RT_FULL
- struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
-
- return mutex_trylock(&zh->lock);
-#else
return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
-#endif
}
-static void pin_tag(unsigned long handle)
+static void pin_tag(unsigned long handle) __acquires(bitlock)
{
-#ifdef CONFIG_PREEMPT_RT_FULL
- struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
-
- return mutex_lock(&zh->lock);
-#else
bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
-#endif
}
-static void unpin_tag(unsigned long handle)
+static void unpin_tag(unsigned long handle) __releases(bitlock)
{
-#ifdef CONFIG_PREEMPT_RT_FULL
- struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
-
- return mutex_unlock(&zh->lock);
-#else
bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
-#endif
}
static void reset_page(struct page *page)
@@ -1199,46 +1109,6 @@
return zspage;
}
-#ifdef CONFIG_PGTABLE_MAPPING
-static inline int __zs_cpu_up(struct mapping_area *area)
-{
- /*
- * Make sure we don't leak memory if a cpu UP notification
- * and zs_init() race and both call zs_cpu_up() on the same cpu
- */
- if (area->vm)
- return 0;
- area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
- if (!area->vm)
- return -ENOMEM;
- return 0;
-}
-
-static inline void __zs_cpu_down(struct mapping_area *area)
-{
- if (area->vm)
- free_vm_area(area->vm);
- area->vm = NULL;
-}
-
-static inline void *__zs_map_object(struct mapping_area *area,
- struct page *pages[2], int off, int size)
-{
- BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages));
- area->vm_addr = area->vm->addr;
- return area->vm_addr + off;
-}
-
-static inline void __zs_unmap_object(struct mapping_area *area,
- struct page *pages[2], int off, int size)
-{
- unsigned long addr = (unsigned long)area->vm_addr;
-
- unmap_kernel_range(addr, PAGE_SIZE * 2);
-}
-
-#else /* CONFIG_PGTABLE_MAPPING */
-
static inline int __zs_cpu_up(struct mapping_area *area)
{
/*
@@ -1318,8 +1188,6 @@
/* enable page faults to match kunmap_atomic() return conditions */
pagefault_enable();
}
-
-#endif /* CONFIG_PGTABLE_MAPPING */
static int zs_cpu_prepare(unsigned int cpu)
{
@@ -1410,7 +1278,7 @@
class = pool->size_class[class_idx];
off = (class->size * obj_idx) & ~PAGE_MASK;
- area = &get_locked_var(zs_map_area_lock, zs_map_area);
+ area = &get_cpu_var(zs_map_area);
area->vm_mm = mm;
if (off + class->size <= PAGE_SIZE) {
/* this object is contained entirely within a page */
@@ -1464,7 +1332,7 @@
__zs_unmap_object(area, pages, off, class->size);
}
- put_locked_var(zs_map_area_lock, zs_map_area);
+ put_cpu_var(zs_map_area);
migrate_read_unlock(zspage);
unpin_tag(handle);
@@ -1880,26 +1748,50 @@
*/
static void lock_zspage(struct zspage *zspage)
{
- struct page *page = get_first_page(zspage);
+ struct page *curr_page, *page;
- do {
- lock_page(page);
- } while ((page = get_next_page(page)) != NULL);
+ /*
+ * Pages we haven't locked yet can be migrated off the list while we're
+ * trying to lock them, so we need to be careful and only attempt to
+ * lock each page under migrate_read_lock(). Otherwise, the page we lock
+ * may no longer belong to the zspage. This means that we may wait for
+ * the wrong page to unlock, so we must take a reference to the page
+ * prior to waiting for it to unlock outside migrate_read_lock().
+ */
+ while (1) {
+ migrate_read_lock(zspage);
+ page = get_first_page(zspage);
+ if (trylock_page(page))
+ break;
+ get_page(page);
+ migrate_read_unlock(zspage);
+ wait_on_page_locked(page);
+ put_page(page);
+ }
+
+ curr_page = page;
+ while ((page = get_next_page(curr_page))) {
+ if (trylock_page(page)) {
+ curr_page = page;
+ } else {
+ get_page(page);
+ migrate_read_unlock(zspage);
+ wait_on_page_locked(page);
+ put_page(page);
+ migrate_read_lock(zspage);
+ }
+ }
+ migrate_read_unlock(zspage);
}
-static struct dentry *zs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int zs_init_fs_context(struct fs_context *fc)
{
- static const struct dentry_operations ops = {
- .d_dname = simple_dname,
- };
-
- return mount_pseudo(fs_type, "zsmalloc:", NULL, &ops, ZSMALLOC_MAGIC);
+ return init_pseudo(fc, ZSMALLOC_MAGIC) ? 0 : -ENOMEM;
}
static struct file_system_type zsmalloc_fs = {
.name = "zsmalloc",
- .mount = zs_mount,
+ .init_fs_context = zs_init_fs_context,
.kill_sb = kill_anon_super,
};
@@ -1924,12 +1816,12 @@
rwlock_init(&zspage->lock);
}
-static void migrate_read_lock(struct zspage *zspage)
+static void migrate_read_lock(struct zspage *zspage) __acquires(&zspage->lock)
{
read_lock(&zspage->lock);
}
-static void migrate_read_unlock(struct zspage *zspage)
+static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock)
{
read_unlock(&zspage->lock);
}
--
Gitblit v1.6.2