From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/mm/zsmalloc.c | 236 ++++++++++++++++------------------------------------------- 1 files changed, 64 insertions(+), 172 deletions(-) diff --git a/kernel/mm/zsmalloc.c b/kernel/mm/zsmalloc.c index 59d97e3..1b309c6 100644 --- a/kernel/mm/zsmalloc.c +++ b/kernel/mm/zsmalloc.c @@ -39,8 +39,8 @@ #include <linux/highmem.h> #include <linux/string.h> #include <linux/slab.h> +#include <linux/pgtable.h> #include <asm/tlbflush.h> -#include <asm/pgtable.h> #include <linux/cpumask.h> #include <linux/cpu.h> #include <linux/vmalloc.h> @@ -52,11 +52,11 @@ #include <linux/zsmalloc.h> #include <linux/zpool.h> #include <linux/mount.h> +#include <linux/pseudo_fs.h> #include <linux/migrate.h> #include <linux/wait.h> #include <linux/pagemap.h> #include <linux/fs.h> -#include <linux/locallock.h> #define ZSPAGE_MAGIC 0x58 @@ -74,25 +74,12 @@ */ #define ZS_MAX_ZSPAGE_ORDER 2 #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER) + #define ZS_HANDLE_SIZE (sizeof(unsigned long)) - -#ifdef CONFIG_PREEMPT_RT_FULL - -struct zsmalloc_handle { - unsigned long addr; - struct mutex lock; -}; - -#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle)) - -#else - -#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long)) -#endif /* * Object location (<PFN>, <obj_idx>) is encoded as - * as single (unsigned long) handle value. + * a single (unsigned long) handle value. * * Note that object index <obj_idx> starts from 0. * @@ -306,11 +293,7 @@ }; struct mapping_area { -#ifdef CONFIG_PGTABLE_MAPPING - struct vm_struct *vm; /* vm area for mapping object that span pages */ -#else char *vm_buf; /* copy buffer for objects that span pages */ -#endif char *vm_addr; /* address of kmap_atomic()'ed pages */ enum zs_mapmode vm_mm; /* mapping mode */ }; @@ -339,7 +322,7 @@ static int create_cache(struct zs_pool *pool) { - pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE, + pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE, 0, 0, NULL); if (!pool->handle_cachep) return 1; @@ -363,26 +346,9 @@ static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) { - void *p; - - p = kmem_cache_alloc(pool->handle_cachep, - gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); -#ifdef CONFIG_PREEMPT_RT_FULL - if (p) { - struct zsmalloc_handle *zh = p; - - mutex_init(&zh->lock); - } -#endif - return (unsigned long)p; + return (unsigned long)kmem_cache_alloc(pool->handle_cachep, + gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA)); } - -#ifdef CONFIG_PREEMPT_RT_FULL -static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle) -{ - return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1)); -} -#endif static void cache_free_handle(struct zs_pool *pool, unsigned long handle) { @@ -392,7 +358,7 @@ static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags) { return kmem_cache_alloc(pool->zspage_cachep, - flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); + flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA)); } static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) @@ -402,18 +368,12 @@ static void record_obj(unsigned long handle, unsigned long obj) { -#ifdef CONFIG_PREEMPT_RT_FULL - struct zsmalloc_handle *zh = zs_get_pure_handle(handle); - - WRITE_ONCE(zh->addr, obj); -#else /* * lsb of @obj represents handle lock while other bits * represent object value the handle is pointing so * updating shouldn't do store tearing. */ WRITE_ONCE(*(unsigned long *)handle, obj); -#endif } /* zpool driver */ @@ -460,7 +420,7 @@ case ZPOOL_MM_WO: zs_mm = ZS_MM_WO; break; - case ZPOOL_MM_RW: /* fallthru */ + case ZPOOL_MM_RW: default: zs_mm = ZS_MM_RW; break; @@ -479,15 +439,16 @@ } static struct zpool_driver zs_zpool_driver = { - .type = "zsmalloc", - .owner = THIS_MODULE, - .create = zs_zpool_create, - .destroy = zs_zpool_destroy, - .malloc = zs_zpool_malloc, - .free = zs_zpool_free, - .map = zs_zpool_map, - .unmap = zs_zpool_unmap, - .total_size = zs_zpool_total_size, + .type = "zsmalloc", + .owner = THIS_MODULE, + .create = zs_zpool_create, + .destroy = zs_zpool_destroy, + .malloc_support_movable = true, + .malloc = zs_zpool_malloc, + .free = zs_zpool_free, + .map = zs_zpool_map, + .unmap = zs_zpool_unmap, + .total_size = zs_zpool_total_size, }; MODULE_ALIAS("zpool-zsmalloc"); @@ -495,7 +456,6 @@ /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ static DEFINE_PER_CPU(struct mapping_area, zs_map_area); -static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock); static bool is_zspage_isolated(struct zspage *zspage) { @@ -513,10 +473,6 @@ return zspage->inuse; } -static inline void set_zspage_inuse(struct zspage *zspage, int val) -{ - zspage->inuse = val; -} static inline void mod_zspage_inuse(struct zspage *zspage, int val) { @@ -618,8 +574,6 @@ } zs_stat_root = debugfs_create_dir("zsmalloc", NULL); - if (!zs_stat_root) - pr_warn("debugfs 'zsmalloc' stat dir creation failed\n"); } static void __exit zs_stat_exit(void) @@ -690,29 +644,15 @@ static void zs_pool_stat_create(struct zs_pool *pool, const char *name) { - struct dentry *entry; - if (!zs_stat_root) { pr_warn("no root stat dir, not creating <%s> stat dir\n", name); return; } - entry = debugfs_create_dir(name, zs_stat_root); - if (!entry) { - pr_warn("debugfs dir <%s> creation failed\n", name); - return; - } - pool->stat_dentry = entry; + pool->stat_dentry = debugfs_create_dir(name, zs_stat_root); - entry = debugfs_create_file("classes", S_IFREG | 0444, - pool->stat_dentry, pool, - &zs_stats_size_fops); - if (!entry) { - pr_warn("%s: debugfs file entry <%s> creation failed\n", - name, "classes"); - debugfs_remove_recursive(pool->stat_dentry); - pool->stat_dentry = NULL; - } + debugfs_create_file("classes", S_IFREG | 0444, pool->stat_dentry, pool, + &zs_stats_size_fops); } static void zs_pool_stat_destroy(struct zs_pool *pool) @@ -925,13 +865,7 @@ static unsigned long handle_to_obj(unsigned long handle) { -#ifdef CONFIG_PREEMPT_RT_FULL - struct zsmalloc_handle *zh = zs_get_pure_handle(handle); - - return zh->addr; -#else return *(unsigned long *)handle; -#endif } static unsigned long obj_to_head(struct page *page, void *obj) @@ -945,46 +879,22 @@ static inline int testpin_tag(unsigned long handle) { -#ifdef CONFIG_PREEMPT_RT_FULL - struct zsmalloc_handle *zh = zs_get_pure_handle(handle); - - return mutex_is_locked(&zh->lock); -#else return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle); -#endif } static inline int trypin_tag(unsigned long handle) { -#ifdef CONFIG_PREEMPT_RT_FULL - struct zsmalloc_handle *zh = zs_get_pure_handle(handle); - - return mutex_trylock(&zh->lock); -#else return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); -#endif } -static void pin_tag(unsigned long handle) +static void pin_tag(unsigned long handle) __acquires(bitlock) { -#ifdef CONFIG_PREEMPT_RT_FULL - struct zsmalloc_handle *zh = zs_get_pure_handle(handle); - - return mutex_lock(&zh->lock); -#else bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle); -#endif } -static void unpin_tag(unsigned long handle) +static void unpin_tag(unsigned long handle) __releases(bitlock) { -#ifdef CONFIG_PREEMPT_RT_FULL - struct zsmalloc_handle *zh = zs_get_pure_handle(handle); - - return mutex_unlock(&zh->lock); -#else bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle); -#endif } static void reset_page(struct page *page) @@ -1199,46 +1109,6 @@ return zspage; } -#ifdef CONFIG_PGTABLE_MAPPING -static inline int __zs_cpu_up(struct mapping_area *area) -{ - /* - * Make sure we don't leak memory if a cpu UP notification - * and zs_init() race and both call zs_cpu_up() on the same cpu - */ - if (area->vm) - return 0; - area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL); - if (!area->vm) - return -ENOMEM; - return 0; -} - -static inline void __zs_cpu_down(struct mapping_area *area) -{ - if (area->vm) - free_vm_area(area->vm); - area->vm = NULL; -} - -static inline void *__zs_map_object(struct mapping_area *area, - struct page *pages[2], int off, int size) -{ - BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages)); - area->vm_addr = area->vm->addr; - return area->vm_addr + off; -} - -static inline void __zs_unmap_object(struct mapping_area *area, - struct page *pages[2], int off, int size) -{ - unsigned long addr = (unsigned long)area->vm_addr; - - unmap_kernel_range(addr, PAGE_SIZE * 2); -} - -#else /* CONFIG_PGTABLE_MAPPING */ - static inline int __zs_cpu_up(struct mapping_area *area) { /* @@ -1318,8 +1188,6 @@ /* enable page faults to match kunmap_atomic() return conditions */ pagefault_enable(); } - -#endif /* CONFIG_PGTABLE_MAPPING */ static int zs_cpu_prepare(unsigned int cpu) { @@ -1410,7 +1278,7 @@ class = pool->size_class[class_idx]; off = (class->size * obj_idx) & ~PAGE_MASK; - area = &get_locked_var(zs_map_area_lock, zs_map_area); + area = &get_cpu_var(zs_map_area); area->vm_mm = mm; if (off + class->size <= PAGE_SIZE) { /* this object is contained entirely within a page */ @@ -1464,7 +1332,7 @@ __zs_unmap_object(area, pages, off, class->size); } - put_locked_var(zs_map_area_lock, zs_map_area); + put_cpu_var(zs_map_area); migrate_read_unlock(zspage); unpin_tag(handle); @@ -1880,26 +1748,50 @@ */ static void lock_zspage(struct zspage *zspage) { - struct page *page = get_first_page(zspage); + struct page *curr_page, *page; - do { - lock_page(page); - } while ((page = get_next_page(page)) != NULL); + /* + * Pages we haven't locked yet can be migrated off the list while we're + * trying to lock them, so we need to be careful and only attempt to + * lock each page under migrate_read_lock(). Otherwise, the page we lock + * may no longer belong to the zspage. This means that we may wait for + * the wrong page to unlock, so we must take a reference to the page + * prior to waiting for it to unlock outside migrate_read_lock(). + */ + while (1) { + migrate_read_lock(zspage); + page = get_first_page(zspage); + if (trylock_page(page)) + break; + get_page(page); + migrate_read_unlock(zspage); + wait_on_page_locked(page); + put_page(page); + } + + curr_page = page; + while ((page = get_next_page(curr_page))) { + if (trylock_page(page)) { + curr_page = page; + } else { + get_page(page); + migrate_read_unlock(zspage); + wait_on_page_locked(page); + put_page(page); + migrate_read_lock(zspage); + } + } + migrate_read_unlock(zspage); } -static struct dentry *zs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int zs_init_fs_context(struct fs_context *fc) { - static const struct dentry_operations ops = { - .d_dname = simple_dname, - }; - - return mount_pseudo(fs_type, "zsmalloc:", NULL, &ops, ZSMALLOC_MAGIC); + return init_pseudo(fc, ZSMALLOC_MAGIC) ? 0 : -ENOMEM; } static struct file_system_type zsmalloc_fs = { .name = "zsmalloc", - .mount = zs_mount, + .init_fs_context = zs_init_fs_context, .kill_sb = kill_anon_super, }; @@ -1924,12 +1816,12 @@ rwlock_init(&zspage->lock); } -static void migrate_read_lock(struct zspage *zspage) +static void migrate_read_lock(struct zspage *zspage) __acquires(&zspage->lock) { read_lock(&zspage->lock); } -static void migrate_read_unlock(struct zspage *zspage) +static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock) { read_unlock(&zspage->lock); } -- Gitblit v1.6.2