From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/mm/slab_common.c | 877 ++++++++++++++++------------------------------------------ 1 files changed, 247 insertions(+), 630 deletions(-) diff --git a/kernel/mm/slab_common.c b/kernel/mm/slab_common.c index 0d8d00b..05135eb 100644 --- a/kernel/mm/slab_common.c +++ b/kernel/mm/slab_common.c @@ -12,11 +12,14 @@ #include <linux/memory.h> #include <linux/cache.h> #include <linux/compiler.h> +#include <linux/kfence.h> #include <linux/module.h> #include <linux/cpu.h> #include <linux/uaccess.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> +#include <linux/debugfs.h> +#include <linux/kasan.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/page.h> @@ -24,6 +27,9 @@ #define CREATE_TRACE_POINTS #include <trace/events/kmem.h> +#undef CREATE_TRACE_POINTS +#include <trace/hooks/mm.h> +#include "internal.h" #include "slab.h" @@ -50,7 +56,7 @@ */ #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \ - SLAB_FAILSLAB | SLAB_KASAN) + SLAB_FAILSLAB | kasan_never_merge()) #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ SLAB_CACHE_DMA32 | SLAB_ACCOUNT) @@ -84,8 +90,7 @@ #ifdef CONFIG_DEBUG_VM static int kmem_cache_sanity_check(const char *name, unsigned int size) { - if (!name || in_interrupt() || size < sizeof(void *) || - size > KMALLOC_MAX_SIZE) { + if (!name || in_interrupt() || size > KMALLOC_MAX_SIZE) { pr_err("kmem_cache_create(%s) integrity check failed\n", name); return -EINVAL; } @@ -127,138 +132,6 @@ return i; } -#ifdef CONFIG_MEMCG_KMEM - -LIST_HEAD(slab_root_caches); -static DEFINE_SPINLOCK(memcg_kmem_wq_lock); - -void slab_init_memcg_params(struct kmem_cache *s) -{ - s->memcg_params.root_cache = NULL; - RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL); - INIT_LIST_HEAD(&s->memcg_params.children); - s->memcg_params.dying = false; -} - -static int init_memcg_params(struct kmem_cache *s, - struct mem_cgroup *memcg, struct kmem_cache *root_cache) -{ - struct memcg_cache_array *arr; - - if (root_cache) { - s->memcg_params.root_cache = root_cache; - s->memcg_params.memcg = memcg; - INIT_LIST_HEAD(&s->memcg_params.children_node); - INIT_LIST_HEAD(&s->memcg_params.kmem_caches_node); - return 0; - } - - slab_init_memcg_params(s); - - if (!memcg_nr_cache_ids) - return 0; - - arr = kvzalloc(sizeof(struct memcg_cache_array) + - memcg_nr_cache_ids * sizeof(void *), - GFP_KERNEL); - if (!arr) - return -ENOMEM; - - RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr); - return 0; -} - -static void destroy_memcg_params(struct kmem_cache *s) -{ - if (is_root_cache(s)) - kvfree(rcu_access_pointer(s->memcg_params.memcg_caches)); -} - -static void free_memcg_params(struct rcu_head *rcu) -{ - struct memcg_cache_array *old; - - old = container_of(rcu, struct memcg_cache_array, rcu); - kvfree(old); -} - -static int update_memcg_params(struct kmem_cache *s, int new_array_size) -{ - struct memcg_cache_array *old, *new; - - new = kvzalloc(sizeof(struct memcg_cache_array) + - new_array_size * sizeof(void *), GFP_KERNEL); - if (!new) - return -ENOMEM; - - old = rcu_dereference_protected(s->memcg_params.memcg_caches, - lockdep_is_held(&slab_mutex)); - if (old) - memcpy(new->entries, old->entries, - memcg_nr_cache_ids * sizeof(void *)); - - rcu_assign_pointer(s->memcg_params.memcg_caches, new); - if (old) - call_rcu(&old->rcu, free_memcg_params); - return 0; -} - -int memcg_update_all_caches(int num_memcgs) -{ - struct kmem_cache *s; - int ret = 0; - - mutex_lock(&slab_mutex); - list_for_each_entry(s, &slab_root_caches, root_caches_node) { - ret = update_memcg_params(s, num_memcgs); - /* - * Instead of freeing the memory, we'll just leave the caches - * up to this point in an updated state. - */ - if (ret) - break; - } - mutex_unlock(&slab_mutex); - return ret; -} - -void memcg_link_cache(struct kmem_cache *s) -{ - if (is_root_cache(s)) { - list_add(&s->root_caches_node, &slab_root_caches); - } else { - list_add(&s->memcg_params.children_node, - &s->memcg_params.root_cache->memcg_params.children); - list_add(&s->memcg_params.kmem_caches_node, - &s->memcg_params.memcg->kmem_caches); - } -} - -static void memcg_unlink_cache(struct kmem_cache *s) -{ - if (is_root_cache(s)) { - list_del(&s->root_caches_node); - } else { - list_del(&s->memcg_params.children_node); - list_del(&s->memcg_params.kmem_caches_node); - } -} -#else -static inline int init_memcg_params(struct kmem_cache *s, - struct mem_cgroup *memcg, struct kmem_cache *root_cache) -{ - return 0; -} - -static inline void destroy_memcg_params(struct kmem_cache *s) -{ -} - -static inline void memcg_unlink_cache(struct kmem_cache *s) -{ -} -#endif /* CONFIG_MEMCG_KMEM */ - /* * Figure out what the alignment of the objects will be given a set of * flags, a user specified alignment and the size of the objects. @@ -282,8 +155,7 @@ align = max(align, ralign); } - if (align < ARCH_SLAB_MINALIGN) - align = ARCH_SLAB_MINALIGN; + align = max(align, arch_slab_minalign()); return ALIGN(align, sizeof(void *)); } @@ -294,9 +166,6 @@ int slab_unmergeable(struct kmem_cache *s) { if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE)) - return 1; - - if (!is_root_cache(s)) return 1; if (s->ctor) @@ -328,12 +197,12 @@ size = ALIGN(size, sizeof(void *)); align = calculate_alignment(flags, align, size); size = ALIGN(size, align); - flags = kmem_cache_flags(size, flags, name, NULL); + flags = kmem_cache_flags(size, flags, name); if (flags & SLAB_NEVER_MERGE) return NULL; - list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) { + list_for_each_entry_reverse(s, &slab_caches, list) { if (slab_unmergeable(s)) continue; @@ -365,7 +234,7 @@ unsigned int object_size, unsigned int align, slab_flags_t flags, unsigned int useroffset, unsigned int usersize, void (*ctor)(void *), - struct mem_cgroup *memcg, struct kmem_cache *root_cache) + struct kmem_cache *root_cache) { struct kmem_cache *s; int err; @@ -385,30 +254,25 @@ s->useroffset = useroffset; s->usersize = usersize; - err = init_memcg_params(s, memcg, root_cache); - if (err) - goto out_free_cache; - err = __kmem_cache_create(s, flags); if (err) goto out_free_cache; s->refcount = 1; list_add(&s->list, &slab_caches); - memcg_link_cache(s); out: if (err) return ERR_PTR(err); return s; out_free_cache: - destroy_memcg_params(s); kmem_cache_free(kmem_cache, s); goto out; } -/* - * kmem_cache_create_usercopy - Create a cache. +/** + * kmem_cache_create_usercopy - Create a cache with a region suitable + * for copying to userspace * @name: A string which is used in /proc/slabinfo to identify this cache. * @size: The size of objects to be created in this cache. * @align: The required alignment for the objects. @@ -417,7 +281,6 @@ * @usersize: Usercopy region size * @ctor: A constructor for the objects. * - * Returns a ptr to the cache on success, NULL on failure. * Cannot be called within a interrupt, but can be interrupted. * The @ctor is run when new pages are allocated by the cache. * @@ -426,12 +289,14 @@ * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) * to catch references to uninitialised memory. * - * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check + * %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check * for buffer overruns. * * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware * cacheline. This can be beneficial if you're counting cycles as closely * as davem. + * + * Return: a pointer to the cache on success, NULL on failure. */ struct kmem_cache * kmem_cache_create_usercopy(const char *name, @@ -446,7 +311,16 @@ get_online_cpus(); get_online_mems(); - memcg_get_cache_ids(); + +#ifdef CONFIG_SLUB_DEBUG + /* + * If no slub_debug was enabled globally, the static key is not yet + * enabled by setup_slub_debug(). Enable it if the cache is being + * created with any of the debugging flags passed explicitly. + */ + if (flags & SLAB_DEBUG_FLAGS) + static_branch_enable(&slub_debug_enabled); +#endif mutex_lock(&slab_mutex); @@ -487,7 +361,7 @@ s = create_cache(cache_name, size, calculate_alignment(flags, align, size), - flags, useroffset, usersize, ctor, NULL, NULL); + flags, useroffset, usersize, ctor, NULL); if (IS_ERR(s)) { err = PTR_ERR(s); kfree_const(cache_name); @@ -496,7 +370,6 @@ out_unlock: mutex_unlock(&slab_mutex); - memcg_put_cache_ids(); put_online_mems(); put_online_cpus(); @@ -515,6 +388,31 @@ } EXPORT_SYMBOL(kmem_cache_create_usercopy); +/** + * kmem_cache_create - Create a cache. + * @name: A string which is used in /proc/slabinfo to identify this cache. + * @size: The size of objects to be created in this cache. + * @align: The required alignment for the objects. + * @flags: SLAB flags + * @ctor: A constructor for the objects. + * + * Cannot be called within a interrupt, but can be interrupted. + * The @ctor is run when new pages are allocated by the cache. + * + * The flags are + * + * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) + * to catch references to uninitialised memory. + * + * %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check + * for buffer overruns. + * + * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware + * cacheline. This can be beneficial if you're counting cycles as closely + * as davem. + * + * Return: a pointer to the cache on success, NULL on failure. + */ struct kmem_cache * kmem_cache_create(const char *name, unsigned int size, unsigned int align, slab_flags_t flags, void (*ctor)(void *)) @@ -532,7 +430,7 @@ /* * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the * @slab_caches_to_rcu_destroy list. The slab pages are freed - * through RCU and and the associated kmem_cache are dereferenced + * through RCU and the associated kmem_cache are dereferenced * while freeing the pages, so the kmem_caches should be freed only * after the pending RCU operations are finished. As rcu_barrier() * is a pretty slow operation, we batch all pending destructions @@ -548,6 +446,8 @@ rcu_barrier(); list_for_each_entry_safe(s, s2, &to_destroy, list) { + debugfs_slab_release(s); + kfence_shutdown_cache(s); #ifdef SLAB_SUPPORTS_SYSFS sysfs_slab_release(s); #else @@ -564,7 +464,6 @@ if (__kmem_cache_shutdown(s) != 0) return -EBUSY; - memcg_unlink_cache(s); list_del(&s->list); if (s->flags & SLAB_TYPESAFE_BY_RCU) { @@ -574,6 +473,8 @@ list_add_tail(&s->list, &slab_caches_to_rcu_destroy); schedule_work(&slab_caches_to_rcu_destroy_work); } else { + kfence_shutdown_cache(s); + debugfs_slab_release(s); #ifdef SLAB_SUPPORTS_SYSFS sysfs_slab_unlink(s); sysfs_slab_release(s); @@ -585,297 +486,9 @@ return 0; } -#ifdef CONFIG_MEMCG_KMEM -/* - * memcg_create_kmem_cache - Create a cache for a memory cgroup. - * @memcg: The memory cgroup the new cache is for. - * @root_cache: The parent of the new cache. - * - * This function attempts to create a kmem cache that will serve allocation - * requests going from @memcg to @root_cache. The new cache inherits properties - * from its parent. - */ -void memcg_create_kmem_cache(struct mem_cgroup *memcg, - struct kmem_cache *root_cache) -{ - static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */ - struct cgroup_subsys_state *css = &memcg->css; - struct memcg_cache_array *arr; - struct kmem_cache *s = NULL; - char *cache_name; - int idx; - - get_online_cpus(); - get_online_mems(); - - mutex_lock(&slab_mutex); - - /* - * The memory cgroup could have been offlined while the cache - * creation work was pending. - */ - if (memcg->kmem_state != KMEM_ONLINE || root_cache->memcg_params.dying) - goto out_unlock; - - idx = memcg_cache_id(memcg); - arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches, - lockdep_is_held(&slab_mutex)); - - /* - * Since per-memcg caches are created asynchronously on first - * allocation (see memcg_kmem_get_cache()), several threads can try to - * create the same cache, but only one of them may succeed. - */ - if (arr->entries[idx]) - goto out_unlock; - - cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf)); - cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name, - css->serial_nr, memcg_name_buf); - if (!cache_name) - goto out_unlock; - - s = create_cache(cache_name, root_cache->object_size, - root_cache->align, - root_cache->flags & CACHE_CREATE_MASK, - root_cache->useroffset, root_cache->usersize, - root_cache->ctor, memcg, root_cache); - /* - * If we could not create a memcg cache, do not complain, because - * that's not critical at all as we can always proceed with the root - * cache. - */ - if (IS_ERR(s)) { - kfree(cache_name); - goto out_unlock; - } - - /* - * Since readers won't lock (see cache_from_memcg_idx()), we need a - * barrier here to ensure nobody will see the kmem_cache partially - * initialized. - */ - smp_wmb(); - arr->entries[idx] = s; - -out_unlock: - mutex_unlock(&slab_mutex); - - put_online_mems(); - put_online_cpus(); -} - -static void kmemcg_deactivate_workfn(struct work_struct *work) -{ - struct kmem_cache *s = container_of(work, struct kmem_cache, - memcg_params.deact_work); - - get_online_cpus(); - get_online_mems(); - - mutex_lock(&slab_mutex); - - s->memcg_params.deact_fn(s); - - mutex_unlock(&slab_mutex); - - put_online_mems(); - put_online_cpus(); - - /* done, put the ref from slab_deactivate_memcg_cache_rcu_sched() */ - css_put(&s->memcg_params.memcg->css); -} - -static void kmemcg_deactivate_rcufn(struct rcu_head *head) -{ - struct kmem_cache *s = container_of(head, struct kmem_cache, - memcg_params.deact_rcu_head); - - /* - * We need to grab blocking locks. Bounce to ->deact_work. The - * work item shares the space with the RCU head and can't be - * initialized eariler. - */ - INIT_WORK(&s->memcg_params.deact_work, kmemcg_deactivate_workfn); - queue_work(memcg_kmem_cache_wq, &s->memcg_params.deact_work); -} - -/** - * slab_deactivate_memcg_cache_rcu_sched - schedule deactivation after a - * sched RCU grace period - * @s: target kmem_cache - * @deact_fn: deactivation function to call - * - * Schedule @deact_fn to be invoked with online cpus, mems and slab_mutex - * held after a sched RCU grace period. The slab is guaranteed to stay - * alive until @deact_fn is finished. This is to be used from - * __kmemcg_cache_deactivate(). - */ -void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s, - void (*deact_fn)(struct kmem_cache *)) -{ - if (WARN_ON_ONCE(is_root_cache(s)) || - WARN_ON_ONCE(s->memcg_params.deact_fn)) - return; - - /* - * memcg_kmem_wq_lock is used to synchronize memcg_params.dying - * flag and make sure that no new kmem_cache deactivation tasks - * are queued (see flush_memcg_workqueue() ). - */ - spin_lock_irq(&memcg_kmem_wq_lock); - if (s->memcg_params.root_cache->memcg_params.dying) - goto unlock; - - /* pin memcg so that @s doesn't get destroyed in the middle */ - css_get(&s->memcg_params.memcg->css); - - s->memcg_params.deact_fn = deact_fn; - call_rcu_sched(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn); -unlock: - spin_unlock_irq(&memcg_kmem_wq_lock); -} - -void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) -{ - int idx; - struct memcg_cache_array *arr; - struct kmem_cache *s, *c; - - idx = memcg_cache_id(memcg); - - get_online_cpus(); - get_online_mems(); - - mutex_lock(&slab_mutex); - list_for_each_entry(s, &slab_root_caches, root_caches_node) { - arr = rcu_dereference_protected(s->memcg_params.memcg_caches, - lockdep_is_held(&slab_mutex)); - c = arr->entries[idx]; - if (!c) - continue; - - __kmemcg_cache_deactivate(c); - arr->entries[idx] = NULL; - } - mutex_unlock(&slab_mutex); - - put_online_mems(); - put_online_cpus(); -} - -void memcg_destroy_kmem_caches(struct mem_cgroup *memcg) -{ - struct kmem_cache *s, *s2; - - get_online_cpus(); - get_online_mems(); - - mutex_lock(&slab_mutex); - list_for_each_entry_safe(s, s2, &memcg->kmem_caches, - memcg_params.kmem_caches_node) { - /* - * The cgroup is about to be freed and therefore has no charges - * left. Hence, all its caches must be empty by now. - */ - BUG_ON(shutdown_cache(s)); - } - mutex_unlock(&slab_mutex); - - put_online_mems(); - put_online_cpus(); -} - -static int shutdown_memcg_caches(struct kmem_cache *s) -{ - struct memcg_cache_array *arr; - struct kmem_cache *c, *c2; - LIST_HEAD(busy); - int i; - - BUG_ON(!is_root_cache(s)); - - /* - * First, shutdown active caches, i.e. caches that belong to online - * memory cgroups. - */ - arr = rcu_dereference_protected(s->memcg_params.memcg_caches, - lockdep_is_held(&slab_mutex)); - for_each_memcg_cache_index(i) { - c = arr->entries[i]; - if (!c) - continue; - if (shutdown_cache(c)) - /* - * The cache still has objects. Move it to a temporary - * list so as not to try to destroy it for a second - * time while iterating over inactive caches below. - */ - list_move(&c->memcg_params.children_node, &busy); - else - /* - * The cache is empty and will be destroyed soon. Clear - * the pointer to it in the memcg_caches array so that - * it will never be accessed even if the root cache - * stays alive. - */ - arr->entries[i] = NULL; - } - - /* - * Second, shutdown all caches left from memory cgroups that are now - * offline. - */ - list_for_each_entry_safe(c, c2, &s->memcg_params.children, - memcg_params.children_node) - shutdown_cache(c); - - list_splice(&busy, &s->memcg_params.children); - - /* - * A cache being destroyed must be empty. In particular, this means - * that all per memcg caches attached to it must be empty too. - */ - if (!list_empty(&s->memcg_params.children)) - return -EBUSY; - return 0; -} - -static void memcg_set_kmem_cache_dying(struct kmem_cache *s) -{ - spin_lock_irq(&memcg_kmem_wq_lock); - s->memcg_params.dying = true; - spin_unlock_irq(&memcg_kmem_wq_lock); -} - -static void flush_memcg_workqueue(struct kmem_cache *s) -{ - /* - * SLUB deactivates the kmem_caches through call_rcu_sched. Make - * sure all registered rcu callbacks have been invoked. - */ - if (IS_ENABLED(CONFIG_SLUB)) - rcu_barrier_sched(); - - /* - * SLAB and SLUB create memcg kmem_caches through workqueue and SLUB - * deactivates the memcg kmem_caches through workqueue. Make sure all - * previous workitems on workqueue are processed. - */ - if (likely(memcg_kmem_cache_wq)) - flush_workqueue(memcg_kmem_cache_wq); -} -#else -static inline int shutdown_memcg_caches(struct kmem_cache *s) -{ - return 0; -} -#endif /* CONFIG_MEMCG_KMEM */ - void slab_kmem_cache_release(struct kmem_cache *s) { __kmem_cache_release(s); - destroy_memcg_params(s); kfree_const(s->name); kmem_cache_free(kmem_cache, s); } @@ -896,36 +509,7 @@ if (s->refcount) goto out_unlock; -#ifdef CONFIG_MEMCG_KMEM - memcg_set_kmem_cache_dying(s); - - mutex_unlock(&slab_mutex); - - put_online_mems(); - put_online_cpus(); - - flush_memcg_workqueue(s); - - get_online_cpus(); - get_online_mems(); - - mutex_lock(&slab_mutex); - - /* - * Another thread referenced it again - */ - if (READ_ONCE(s->refcount)) { - spin_lock_irq(&memcg_kmem_wq_lock); - s->memcg_params.dying = false; - spin_unlock_irq(&memcg_kmem_wq_lock); - goto out_unlock; - } -#endif - - err = shutdown_memcg_caches(s); - if (!err) - err = shutdown_cache(s); - + err = shutdown_cache(s); if (err) { pr_err("kmem_cache_destroy %s: Slab cache still has objects\n", s->name); @@ -945,6 +529,8 @@ * * Releases as many slabs as possible for a cache. * To help debugging, a zero exit status indicates all slabs were released. + * + * Return: %0 if all slabs were released, non-zero otherwise */ int kmem_cache_shrink(struct kmem_cache *cachep) { @@ -972,14 +558,21 @@ unsigned int useroffset, unsigned int usersize) { int err; + unsigned int align = ARCH_KMALLOC_MINALIGN; s->name = name; s->size = s->object_size = size; - s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); + + /* + * For power of two sizes, guarantee natural alignment for kmalloc + * caches, regardless of SL*B debugging options. + */ + if (is_power_of_2(size)) + align = max(align, size); + s->align = calculate_alignment(flags, align, size); + s->useroffset = useroffset; s->usersize = usersize; - - slab_init_memcg_params(s); err = __kmem_cache_create(s, flags); @@ -1000,14 +593,15 @@ panic("Out of memory when creating slab %s\n", name); create_boot_cache(s, name, size, flags, useroffset, usersize); + kasan_cache_create_kmalloc(s); list_add(&s->list, &slab_caches); - memcg_link_cache(s); s->refcount = 1; return s; } struct kmem_cache * -kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init; +kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init = +{ /* initialization for https://bugs.llvm.org/show_bug.cgi?id=42570 */ }; EXPORT_SYMBOL(kmalloc_caches); /* @@ -1055,6 +649,7 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) { unsigned int index; + struct kmem_cache *s = NULL; if (size <= 192) { if (!size) @@ -1062,15 +657,34 @@ index = size_index[size_index_elem(size)]; } else { - if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { - WARN_ON(1); + if (WARN_ON_ONCE(size > KMALLOC_MAX_CACHE_SIZE)) return NULL; - } index = fls(size - 1); } + trace_android_vh_kmalloc_slab(index, flags, &s); + if (s) + return s; + return kmalloc_caches[kmalloc_type(flags)][index]; } + +#ifdef CONFIG_ZONE_DMA +#define INIT_KMALLOC_INFO(__size, __short_size) \ +{ \ + .name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \ + .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size, \ + .name[KMALLOC_DMA] = "dma-kmalloc-" #__short_size, \ + .size = __size, \ +} +#else +#define INIT_KMALLOC_INFO(__size, __short_size) \ +{ \ + .name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \ + .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size, \ + .size = __size, \ +} +#endif /* * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time. @@ -1078,20 +692,33 @@ * kmalloc-67108864. */ const struct kmalloc_info_struct kmalloc_info[] __initconst = { - {NULL, 0}, {"kmalloc-96", 96}, - {"kmalloc-192", 192}, {"kmalloc-8", 8}, - {"kmalloc-16", 16}, {"kmalloc-32", 32}, - {"kmalloc-64", 64}, {"kmalloc-128", 128}, - {"kmalloc-256", 256}, {"kmalloc-512", 512}, - {"kmalloc-1k", 1024}, {"kmalloc-2k", 2048}, - {"kmalloc-4k", 4096}, {"kmalloc-8k", 8192}, - {"kmalloc-16k", 16384}, {"kmalloc-32k", 32768}, - {"kmalloc-64k", 65536}, {"kmalloc-128k", 131072}, - {"kmalloc-256k", 262144}, {"kmalloc-512k", 524288}, - {"kmalloc-1M", 1048576}, {"kmalloc-2M", 2097152}, - {"kmalloc-4M", 4194304}, {"kmalloc-8M", 8388608}, - {"kmalloc-16M", 16777216}, {"kmalloc-32M", 33554432}, - {"kmalloc-64M", 67108864} + INIT_KMALLOC_INFO(0, 0), + INIT_KMALLOC_INFO(96, 96), + INIT_KMALLOC_INFO(192, 192), + INIT_KMALLOC_INFO(8, 8), + INIT_KMALLOC_INFO(16, 16), + INIT_KMALLOC_INFO(32, 32), + INIT_KMALLOC_INFO(64, 64), + INIT_KMALLOC_INFO(128, 128), + INIT_KMALLOC_INFO(256, 256), + INIT_KMALLOC_INFO(512, 512), + INIT_KMALLOC_INFO(1024, 1k), + INIT_KMALLOC_INFO(2048, 2k), + INIT_KMALLOC_INFO(4096, 4k), + INIT_KMALLOC_INFO(8192, 8k), + INIT_KMALLOC_INFO(16384, 16k), + INIT_KMALLOC_INFO(32768, 32k), + INIT_KMALLOC_INFO(65536, 64k), + INIT_KMALLOC_INFO(131072, 128k), + INIT_KMALLOC_INFO(262144, 256k), + INIT_KMALLOC_INFO(524288, 512k), + INIT_KMALLOC_INFO(1048576, 1M), + INIT_KMALLOC_INFO(2097152, 2M), + INIT_KMALLOC_INFO(4194304, 4M), + INIT_KMALLOC_INFO(8388608, 8M), + INIT_KMALLOC_INFO(16777216, 16M), + INIT_KMALLOC_INFO(33554432, 32M), + INIT_KMALLOC_INFO(67108864, 64M) }; /* @@ -1141,36 +768,14 @@ } } -static const char * -kmalloc_cache_name(const char *prefix, unsigned int size) -{ - - static const char units[3] = "\0kM"; - int idx = 0; - - while (size >= 1024 && (size % 1024 == 0)) { - size /= 1024; - idx++; - } - - return kasprintf(GFP_NOWAIT, "%s-%u%c", prefix, size, units[idx]); -} - static void __init -new_kmalloc_cache(int idx, int type, slab_flags_t flags) +new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags) { - const char *name; - - if (type == KMALLOC_RECLAIM) { + if (type == KMALLOC_RECLAIM) flags |= SLAB_RECLAIM_ACCOUNT; - name = kmalloc_cache_name("kmalloc-rcl", - kmalloc_info[idx].size); - BUG_ON(!name); - } else { - name = kmalloc_info[idx].name; - } - kmalloc_caches[type][idx] = create_kmalloc_cache(name, + kmalloc_caches[type][idx] = create_kmalloc_cache( + kmalloc_info[idx].name[type], kmalloc_info[idx].size, flags, 0, kmalloc_info[idx].size); } @@ -1182,7 +787,8 @@ */ void __init create_kmalloc_caches(slab_flags_t flags) { - int i, type; + int i; + enum kmalloc_cache_type type; for (type = KMALLOC_NORMAL; type <= KMALLOC_RECLAIM; type++) { for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { @@ -1211,17 +817,28 @@ struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i]; if (s) { - unsigned int size = kmalloc_size(i); - const char *n = kmalloc_cache_name("dma-kmalloc", size); - - BUG_ON(!n); kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache( - n, size, SLAB_CACHE_DMA | flags, 0, 0); + kmalloc_info[i].name[KMALLOC_DMA], + kmalloc_info[i].size, + SLAB_CACHE_DMA | flags, 0, + kmalloc_info[i].size); } } #endif } #endif /* !CONFIG_SLOB */ + +gfp_t kmalloc_fix_flags(gfp_t flags) +{ + gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK; + + flags &= ~GFP_SLAB_BUG_MASK; + pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n", + invalid_mask, &invalid_mask, flags, &flags); + dump_stack(); + + return flags; +} /* * To avoid unnecessary overhead, we pass through large allocation requests @@ -1230,13 +847,21 @@ */ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) { - void *ret; + void *ret = NULL; struct page *page; + + if (unlikely(flags & GFP_SLAB_BUG_MASK)) + flags = kmalloc_fix_flags(flags); flags |= __GFP_COMP; page = alloc_pages(flags, order); - ret = page ? page_address(page) : NULL; + if (likely(page)) { + ret = page_address(page); + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, + PAGE_SIZE << order); + } ret = kasan_kmalloc_large(ret, size, flags); + /* As ret might get tagged, call kmemleak hook after KASAN. */ kmemleak_alloc(ret, size, 1, flags); return ret; } @@ -1330,38 +955,17 @@ void *slab_start(struct seq_file *m, loff_t *pos) { mutex_lock(&slab_mutex); - return seq_list_start(&slab_root_caches, *pos); + return seq_list_start(&slab_caches, *pos); } void *slab_next(struct seq_file *m, void *p, loff_t *pos) { - return seq_list_next(p, &slab_root_caches, pos); + return seq_list_next(p, &slab_caches, pos); } void slab_stop(struct seq_file *m, void *p) { mutex_unlock(&slab_mutex); -} - -static void -memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info) -{ - struct kmem_cache *c; - struct slabinfo sinfo; - - if (!is_root_cache(s)) - return; - - for_each_memcg_cache(c, s) { - memset(&sinfo, 0, sizeof(sinfo)); - get_slabinfo(c, &sinfo); - - info->active_slabs += sinfo.active_slabs; - info->num_slabs += sinfo.num_slabs; - info->shared_avail += sinfo.shared_avail; - info->active_objs += sinfo.active_objs; - info->num_objs += sinfo.num_objs; - } } static void cache_show(struct kmem_cache *s, struct seq_file *m) @@ -1371,10 +975,8 @@ memset(&sinfo, 0, sizeof(sinfo)); get_slabinfo(s, &sinfo); - memcg_accumulate_slabinfo(s, &sinfo); - seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", - cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size, + s->name, sinfo.active_objs, sinfo.num_objs, s->size, sinfo.objects_per_slab, (1 << sinfo.cache_order)); seq_printf(m, " : tunables %4u %4u %4u", @@ -1387,9 +989,9 @@ static int slab_show(struct seq_file *m, void *p) { - struct kmem_cache *s = list_entry(p, struct kmem_cache, root_caches_node); + struct kmem_cache *s = list_entry(p, struct kmem_cache, list); - if (p == slab_root_caches.next) + if (p == slab_caches.next) print_slabinfo_header(m); cache_show(s, m); return 0; @@ -1416,49 +1018,26 @@ pr_info("Name Used Total\n"); list_for_each_entry_safe(s, s2, &slab_caches, list) { - if (!is_root_cache(s) || (s->flags & SLAB_RECLAIM_ACCOUNT)) + if (s->flags & SLAB_RECLAIM_ACCOUNT) continue; get_slabinfo(s, &sinfo); if (sinfo.num_objs > 0) - pr_info("%-17s %10luKB %10luKB\n", cache_name(s), + pr_info("%-17s %10luKB %10luKB\n", s->name, (sinfo.active_objs * s->size) / 1024, (sinfo.num_objs * s->size) / 1024); } mutex_unlock(&slab_mutex); } -#if defined(CONFIG_MEMCG) -void *memcg_slab_start(struct seq_file *m, loff_t *pos) -{ - struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); - - mutex_lock(&slab_mutex); - return seq_list_start(&memcg->kmem_caches, *pos); -} - -void *memcg_slab_next(struct seq_file *m, void *p, loff_t *pos) -{ - struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); - - return seq_list_next(p, &memcg->kmem_caches, pos); -} - -void memcg_slab_stop(struct seq_file *m, void *p) -{ - mutex_unlock(&slab_mutex); -} - +#if defined(CONFIG_MEMCG_KMEM) int memcg_slab_show(struct seq_file *m, void *p) { - struct kmem_cache *s = list_entry(p, struct kmem_cache, - memcg_params.kmem_caches_node); - struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); - - if (p == memcg->kmem_caches.next) - print_slabinfo_header(m); - cache_show(s, m); + /* + * Deprecated. + * Please, take a look at tools/cgroup/slabinfo.py . + */ return 0; } #endif @@ -1488,63 +1067,54 @@ return seq_open(file, &slabinfo_op); } -static const struct file_operations proc_slabinfo_operations = { - .open = slabinfo_open, - .read = seq_read, - .write = slabinfo_write, - .llseek = seq_lseek, - .release = seq_release, +static const struct proc_ops slabinfo_proc_ops = { + .proc_flags = PROC_ENTRY_PERMANENT, + .proc_open = slabinfo_open, + .proc_read = seq_read, + .proc_write = slabinfo_write, + .proc_lseek = seq_lseek, + .proc_release = seq_release, }; static int __init slab_proc_init(void) { - proc_create("slabinfo", SLABINFO_RIGHTS, NULL, - &proc_slabinfo_operations); + proc_create("slabinfo", SLABINFO_RIGHTS, NULL, &slabinfo_proc_ops); return 0; } module_init(slab_proc_init); + #endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */ static __always_inline void *__do_krealloc(const void *p, size_t new_size, gfp_t flags) { void *ret; - size_t ks = 0; + size_t ks; - if (p) - ks = ksize(p); + /* Don't use instrumented ksize to allow precise KASAN poisoning. */ + if (likely(!ZERO_OR_NULL_PTR(p))) { + if (!kasan_check_byte(p)) + return NULL; + ks = kfence_ksize(p) ?: __ksize(p); + } else + ks = 0; + /* If the object still fits, repoison it precisely. */ if (ks >= new_size) { p = kasan_krealloc((void *)p, new_size, flags); return (void *)p; } ret = kmalloc_track_caller(new_size, flags); - if (ret && p) - memcpy(ret, p, ks); + if (ret && p) { + /* Disable KASAN checks as the object's redzone is accessed. */ + kasan_disable_current(); + memcpy(ret, kasan_reset_tag(p), ks); + kasan_enable_current(); + } return ret; } - -/** - * __krealloc - like krealloc() but don't free @p. - * @p: object to reallocate memory for. - * @new_size: how many bytes of memory are required. - * @flags: the type of memory to allocate. - * - * This function is like krealloc() except it never frees the originally - * allocated buffer. Use this if you don't want to free the buffer immediately - * like, for example, with RCU. - */ -void *__krealloc(const void *p, size_t new_size, gfp_t flags) -{ - if (unlikely(!new_size)) - return ZERO_SIZE_PTR; - - return __do_krealloc(p, new_size, flags); - -} -EXPORT_SYMBOL(__krealloc); /** * krealloc - reallocate memory. The contents will remain unchanged. @@ -1556,6 +1126,8 @@ * lesser of the new and old sizes. If @p is %NULL, krealloc() * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a * %NULL pointer, the object pointed to is freed. + * + * Return: pointer to the allocated memory or %NULL in case of error */ void *krealloc(const void *p, size_t new_size, gfp_t flags) { @@ -1575,28 +1147,73 @@ EXPORT_SYMBOL(krealloc); /** - * kzfree - like kfree but zero memory + * kfree_sensitive - Clear sensitive information in memory before freeing * @p: object to free memory of * * The memory of the object @p points to is zeroed before freed. - * If @p is %NULL, kzfree() does nothing. + * If @p is %NULL, kfree_sensitive() does nothing. * * Note: this function zeroes the whole allocated buffer which can be a good * deal bigger than the requested buffer size passed to kmalloc(). So be * careful when using this function in performance sensitive code. */ -void kzfree(const void *p) +void kfree_sensitive(const void *p) { size_t ks; void *mem = (void *)p; - if (unlikely(ZERO_OR_NULL_PTR(mem))) - return; ks = ksize(mem); - memzero_explicit(mem, ks); + if (ks) + memzero_explicit(mem, ks); kfree(mem); } -EXPORT_SYMBOL(kzfree); +EXPORT_SYMBOL(kfree_sensitive); + +/** + * ksize - get the actual amount of memory allocated for a given object + * @objp: Pointer to the object + * + * kmalloc may internally round up allocations and return more memory + * than requested. ksize() can be used to determine the actual amount of + * memory allocated. The caller may use this additional memory, even though + * a smaller amount of memory was initially specified with the kmalloc call. + * The caller must guarantee that objp points to a valid object previously + * allocated with either kmalloc() or kmem_cache_alloc(). The object + * must not be freed during the duration of the call. + * + * Return: size of the actual memory used by @objp in bytes + */ +size_t ksize(const void *objp) +{ + size_t size; + + /* + * We need to first check that the pointer to the object is valid, and + * only then unpoison the memory. The report printed from ksize() is + * more useful, then when it's printed later when the behaviour could + * be undefined due to a potential use-after-free or double-free. + * + * We use kasan_check_byte(), which is supported for the hardware + * tag-based KASAN mode, unlike kasan_check_read/write(). + * + * If the pointed to memory is invalid, we return 0 to avoid users of + * ksize() writing to and potentially corrupting the memory region. + * + * We want to perform the check before __ksize(), to avoid potentially + * crashing in __ksize() due to accessing invalid metadata. + */ + if (unlikely(ZERO_OR_NULL_PTR(objp)) || !kasan_check_byte(objp)) + return 0; + + size = kfence_ksize(objp) ?: __ksize(objp); + /* + * We assume that ksize callers could use whole allocated area, + * so we need to unpoison this area. + */ + kasan_unpoison_range(objp, size); + return size; +} +EXPORT_SYMBOL(ksize); /* Tracepoints definitions. */ EXPORT_TRACEPOINT_SYMBOL(kmalloc); -- Gitblit v1.6.2