From ea08eeccae9297f7aabd2ef7f0c2517ac4549acc Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Tue, 20 Feb 2024 01:18:26 +0000
Subject: [PATCH] write in 30M
---
kernel/mm/slab_common.c | 877 ++++++++++++++++------------------------------------------
1 files changed, 247 insertions(+), 630 deletions(-)
diff --git a/kernel/mm/slab_common.c b/kernel/mm/slab_common.c
index 0d8d00b..05135eb 100644
--- a/kernel/mm/slab_common.c
+++ b/kernel/mm/slab_common.c
@@ -12,11 +12,14 @@
#include <linux/memory.h>
#include <linux/cache.h>
#include <linux/compiler.h>
+#include <linux/kfence.h>
#include <linux/module.h>
#include <linux/cpu.h>
#include <linux/uaccess.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
+#include <linux/debugfs.h>
+#include <linux/kasan.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <asm/page.h>
@@ -24,6 +27,9 @@
#define CREATE_TRACE_POINTS
#include <trace/events/kmem.h>
+#undef CREATE_TRACE_POINTS
+#include <trace/hooks/mm.h>
+#include "internal.h"
#include "slab.h"
@@ -50,7 +56,7 @@
*/
#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
- SLAB_FAILSLAB | SLAB_KASAN)
+ SLAB_FAILSLAB | kasan_never_merge())
#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
@@ -84,8 +90,7 @@
#ifdef CONFIG_DEBUG_VM
static int kmem_cache_sanity_check(const char *name, unsigned int size)
{
- if (!name || in_interrupt() || size < sizeof(void *) ||
- size > KMALLOC_MAX_SIZE) {
+ if (!name || in_interrupt() || size > KMALLOC_MAX_SIZE) {
pr_err("kmem_cache_create(%s) integrity check failed\n", name);
return -EINVAL;
}
@@ -127,138 +132,6 @@
return i;
}
-#ifdef CONFIG_MEMCG_KMEM
-
-LIST_HEAD(slab_root_caches);
-static DEFINE_SPINLOCK(memcg_kmem_wq_lock);
-
-void slab_init_memcg_params(struct kmem_cache *s)
-{
- s->memcg_params.root_cache = NULL;
- RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
- INIT_LIST_HEAD(&s->memcg_params.children);
- s->memcg_params.dying = false;
-}
-
-static int init_memcg_params(struct kmem_cache *s,
- struct mem_cgroup *memcg, struct kmem_cache *root_cache)
-{
- struct memcg_cache_array *arr;
-
- if (root_cache) {
- s->memcg_params.root_cache = root_cache;
- s->memcg_params.memcg = memcg;
- INIT_LIST_HEAD(&s->memcg_params.children_node);
- INIT_LIST_HEAD(&s->memcg_params.kmem_caches_node);
- return 0;
- }
-
- slab_init_memcg_params(s);
-
- if (!memcg_nr_cache_ids)
- return 0;
-
- arr = kvzalloc(sizeof(struct memcg_cache_array) +
- memcg_nr_cache_ids * sizeof(void *),
- GFP_KERNEL);
- if (!arr)
- return -ENOMEM;
-
- RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
- return 0;
-}
-
-static void destroy_memcg_params(struct kmem_cache *s)
-{
- if (is_root_cache(s))
- kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
-}
-
-static void free_memcg_params(struct rcu_head *rcu)
-{
- struct memcg_cache_array *old;
-
- old = container_of(rcu, struct memcg_cache_array, rcu);
- kvfree(old);
-}
-
-static int update_memcg_params(struct kmem_cache *s, int new_array_size)
-{
- struct memcg_cache_array *old, *new;
-
- new = kvzalloc(sizeof(struct memcg_cache_array) +
- new_array_size * sizeof(void *), GFP_KERNEL);
- if (!new)
- return -ENOMEM;
-
- old = rcu_dereference_protected(s->memcg_params.memcg_caches,
- lockdep_is_held(&slab_mutex));
- if (old)
- memcpy(new->entries, old->entries,
- memcg_nr_cache_ids * sizeof(void *));
-
- rcu_assign_pointer(s->memcg_params.memcg_caches, new);
- if (old)
- call_rcu(&old->rcu, free_memcg_params);
- return 0;
-}
-
-int memcg_update_all_caches(int num_memcgs)
-{
- struct kmem_cache *s;
- int ret = 0;
-
- mutex_lock(&slab_mutex);
- list_for_each_entry(s, &slab_root_caches, root_caches_node) {
- ret = update_memcg_params(s, num_memcgs);
- /*
- * Instead of freeing the memory, we'll just leave the caches
- * up to this point in an updated state.
- */
- if (ret)
- break;
- }
- mutex_unlock(&slab_mutex);
- return ret;
-}
-
-void memcg_link_cache(struct kmem_cache *s)
-{
- if (is_root_cache(s)) {
- list_add(&s->root_caches_node, &slab_root_caches);
- } else {
- list_add(&s->memcg_params.children_node,
- &s->memcg_params.root_cache->memcg_params.children);
- list_add(&s->memcg_params.kmem_caches_node,
- &s->memcg_params.memcg->kmem_caches);
- }
-}
-
-static void memcg_unlink_cache(struct kmem_cache *s)
-{
- if (is_root_cache(s)) {
- list_del(&s->root_caches_node);
- } else {
- list_del(&s->memcg_params.children_node);
- list_del(&s->memcg_params.kmem_caches_node);
- }
-}
-#else
-static inline int init_memcg_params(struct kmem_cache *s,
- struct mem_cgroup *memcg, struct kmem_cache *root_cache)
-{
- return 0;
-}
-
-static inline void destroy_memcg_params(struct kmem_cache *s)
-{
-}
-
-static inline void memcg_unlink_cache(struct kmem_cache *s)
-{
-}
-#endif /* CONFIG_MEMCG_KMEM */
-
/*
* Figure out what the alignment of the objects will be given a set of
* flags, a user specified alignment and the size of the objects.
@@ -282,8 +155,7 @@
align = max(align, ralign);
}
- if (align < ARCH_SLAB_MINALIGN)
- align = ARCH_SLAB_MINALIGN;
+ align = max(align, arch_slab_minalign());
return ALIGN(align, sizeof(void *));
}
@@ -294,9 +166,6 @@
int slab_unmergeable(struct kmem_cache *s)
{
if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
- return 1;
-
- if (!is_root_cache(s))
return 1;
if (s->ctor)
@@ -328,12 +197,12 @@
size = ALIGN(size, sizeof(void *));
align = calculate_alignment(flags, align, size);
size = ALIGN(size, align);
- flags = kmem_cache_flags(size, flags, name, NULL);
+ flags = kmem_cache_flags(size, flags, name);
if (flags & SLAB_NEVER_MERGE)
return NULL;
- list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) {
+ list_for_each_entry_reverse(s, &slab_caches, list) {
if (slab_unmergeable(s))
continue;
@@ -365,7 +234,7 @@
unsigned int object_size, unsigned int align,
slab_flags_t flags, unsigned int useroffset,
unsigned int usersize, void (*ctor)(void *),
- struct mem_cgroup *memcg, struct kmem_cache *root_cache)
+ struct kmem_cache *root_cache)
{
struct kmem_cache *s;
int err;
@@ -385,30 +254,25 @@
s->useroffset = useroffset;
s->usersize = usersize;
- err = init_memcg_params(s, memcg, root_cache);
- if (err)
- goto out_free_cache;
-
err = __kmem_cache_create(s, flags);
if (err)
goto out_free_cache;
s->refcount = 1;
list_add(&s->list, &slab_caches);
- memcg_link_cache(s);
out:
if (err)
return ERR_PTR(err);
return s;
out_free_cache:
- destroy_memcg_params(s);
kmem_cache_free(kmem_cache, s);
goto out;
}
-/*
- * kmem_cache_create_usercopy - Create a cache.
+/**
+ * kmem_cache_create_usercopy - Create a cache with a region suitable
+ * for copying to userspace
* @name: A string which is used in /proc/slabinfo to identify this cache.
* @size: The size of objects to be created in this cache.
* @align: The required alignment for the objects.
@@ -417,7 +281,6 @@
* @usersize: Usercopy region size
* @ctor: A constructor for the objects.
*
- * Returns a ptr to the cache on success, NULL on failure.
* Cannot be called within a interrupt, but can be interrupted.
* The @ctor is run when new pages are allocated by the cache.
*
@@ -426,12 +289,14 @@
* %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
* to catch references to uninitialised memory.
*
- * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
+ * %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check
* for buffer overruns.
*
* %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
* cacheline. This can be beneficial if you're counting cycles as closely
* as davem.
+ *
+ * Return: a pointer to the cache on success, NULL on failure.
*/
struct kmem_cache *
kmem_cache_create_usercopy(const char *name,
@@ -446,7 +311,16 @@
get_online_cpus();
get_online_mems();
- memcg_get_cache_ids();
+
+#ifdef CONFIG_SLUB_DEBUG
+ /*
+ * If no slub_debug was enabled globally, the static key is not yet
+ * enabled by setup_slub_debug(). Enable it if the cache is being
+ * created with any of the debugging flags passed explicitly.
+ */
+ if (flags & SLAB_DEBUG_FLAGS)
+ static_branch_enable(&slub_debug_enabled);
+#endif
mutex_lock(&slab_mutex);
@@ -487,7 +361,7 @@
s = create_cache(cache_name, size,
calculate_alignment(flags, align, size),
- flags, useroffset, usersize, ctor, NULL, NULL);
+ flags, useroffset, usersize, ctor, NULL);
if (IS_ERR(s)) {
err = PTR_ERR(s);
kfree_const(cache_name);
@@ -496,7 +370,6 @@
out_unlock:
mutex_unlock(&slab_mutex);
- memcg_put_cache_ids();
put_online_mems();
put_online_cpus();
@@ -515,6 +388,31 @@
}
EXPORT_SYMBOL(kmem_cache_create_usercopy);
+/**
+ * kmem_cache_create - Create a cache.
+ * @name: A string which is used in /proc/slabinfo to identify this cache.
+ * @size: The size of objects to be created in this cache.
+ * @align: The required alignment for the objects.
+ * @flags: SLAB flags
+ * @ctor: A constructor for the objects.
+ *
+ * Cannot be called within a interrupt, but can be interrupted.
+ * The @ctor is run when new pages are allocated by the cache.
+ *
+ * The flags are
+ *
+ * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
+ * to catch references to uninitialised memory.
+ *
+ * %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check
+ * for buffer overruns.
+ *
+ * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
+ * cacheline. This can be beneficial if you're counting cycles as closely
+ * as davem.
+ *
+ * Return: a pointer to the cache on success, NULL on failure.
+ */
struct kmem_cache *
kmem_cache_create(const char *name, unsigned int size, unsigned int align,
slab_flags_t flags, void (*ctor)(void *))
@@ -532,7 +430,7 @@
/*
* On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
* @slab_caches_to_rcu_destroy list. The slab pages are freed
- * through RCU and and the associated kmem_cache are dereferenced
+ * through RCU and the associated kmem_cache are dereferenced
* while freeing the pages, so the kmem_caches should be freed only
* after the pending RCU operations are finished. As rcu_barrier()
* is a pretty slow operation, we batch all pending destructions
@@ -548,6 +446,8 @@
rcu_barrier();
list_for_each_entry_safe(s, s2, &to_destroy, list) {
+ debugfs_slab_release(s);
+ kfence_shutdown_cache(s);
#ifdef SLAB_SUPPORTS_SYSFS
sysfs_slab_release(s);
#else
@@ -564,7 +464,6 @@
if (__kmem_cache_shutdown(s) != 0)
return -EBUSY;
- memcg_unlink_cache(s);
list_del(&s->list);
if (s->flags & SLAB_TYPESAFE_BY_RCU) {
@@ -574,6 +473,8 @@
list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
schedule_work(&slab_caches_to_rcu_destroy_work);
} else {
+ kfence_shutdown_cache(s);
+ debugfs_slab_release(s);
#ifdef SLAB_SUPPORTS_SYSFS
sysfs_slab_unlink(s);
sysfs_slab_release(s);
@@ -585,297 +486,9 @@
return 0;
}
-#ifdef CONFIG_MEMCG_KMEM
-/*
- * memcg_create_kmem_cache - Create a cache for a memory cgroup.
- * @memcg: The memory cgroup the new cache is for.
- * @root_cache: The parent of the new cache.
- *
- * This function attempts to create a kmem cache that will serve allocation
- * requests going from @memcg to @root_cache. The new cache inherits properties
- * from its parent.
- */
-void memcg_create_kmem_cache(struct mem_cgroup *memcg,
- struct kmem_cache *root_cache)
-{
- static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
- struct cgroup_subsys_state *css = &memcg->css;
- struct memcg_cache_array *arr;
- struct kmem_cache *s = NULL;
- char *cache_name;
- int idx;
-
- get_online_cpus();
- get_online_mems();
-
- mutex_lock(&slab_mutex);
-
- /*
- * The memory cgroup could have been offlined while the cache
- * creation work was pending.
- */
- if (memcg->kmem_state != KMEM_ONLINE || root_cache->memcg_params.dying)
- goto out_unlock;
-
- idx = memcg_cache_id(memcg);
- arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
- lockdep_is_held(&slab_mutex));
-
- /*
- * Since per-memcg caches are created asynchronously on first
- * allocation (see memcg_kmem_get_cache()), several threads can try to
- * create the same cache, but only one of them may succeed.
- */
- if (arr->entries[idx])
- goto out_unlock;
-
- cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
- cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name,
- css->serial_nr, memcg_name_buf);
- if (!cache_name)
- goto out_unlock;
-
- s = create_cache(cache_name, root_cache->object_size,
- root_cache->align,
- root_cache->flags & CACHE_CREATE_MASK,
- root_cache->useroffset, root_cache->usersize,
- root_cache->ctor, memcg, root_cache);
- /*
- * If we could not create a memcg cache, do not complain, because
- * that's not critical at all as we can always proceed with the root
- * cache.
- */
- if (IS_ERR(s)) {
- kfree(cache_name);
- goto out_unlock;
- }
-
- /*
- * Since readers won't lock (see cache_from_memcg_idx()), we need a
- * barrier here to ensure nobody will see the kmem_cache partially
- * initialized.
- */
- smp_wmb();
- arr->entries[idx] = s;
-
-out_unlock:
- mutex_unlock(&slab_mutex);
-
- put_online_mems();
- put_online_cpus();
-}
-
-static void kmemcg_deactivate_workfn(struct work_struct *work)
-{
- struct kmem_cache *s = container_of(work, struct kmem_cache,
- memcg_params.deact_work);
-
- get_online_cpus();
- get_online_mems();
-
- mutex_lock(&slab_mutex);
-
- s->memcg_params.deact_fn(s);
-
- mutex_unlock(&slab_mutex);
-
- put_online_mems();
- put_online_cpus();
-
- /* done, put the ref from slab_deactivate_memcg_cache_rcu_sched() */
- css_put(&s->memcg_params.memcg->css);
-}
-
-static void kmemcg_deactivate_rcufn(struct rcu_head *head)
-{
- struct kmem_cache *s = container_of(head, struct kmem_cache,
- memcg_params.deact_rcu_head);
-
- /*
- * We need to grab blocking locks. Bounce to ->deact_work. The
- * work item shares the space with the RCU head and can't be
- * initialized eariler.
- */
- INIT_WORK(&s->memcg_params.deact_work, kmemcg_deactivate_workfn);
- queue_work(memcg_kmem_cache_wq, &s->memcg_params.deact_work);
-}
-
-/**
- * slab_deactivate_memcg_cache_rcu_sched - schedule deactivation after a
- * sched RCU grace period
- * @s: target kmem_cache
- * @deact_fn: deactivation function to call
- *
- * Schedule @deact_fn to be invoked with online cpus, mems and slab_mutex
- * held after a sched RCU grace period. The slab is guaranteed to stay
- * alive until @deact_fn is finished. This is to be used from
- * __kmemcg_cache_deactivate().
- */
-void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
- void (*deact_fn)(struct kmem_cache *))
-{
- if (WARN_ON_ONCE(is_root_cache(s)) ||
- WARN_ON_ONCE(s->memcg_params.deact_fn))
- return;
-
- /*
- * memcg_kmem_wq_lock is used to synchronize memcg_params.dying
- * flag and make sure that no new kmem_cache deactivation tasks
- * are queued (see flush_memcg_workqueue() ).
- */
- spin_lock_irq(&memcg_kmem_wq_lock);
- if (s->memcg_params.root_cache->memcg_params.dying)
- goto unlock;
-
- /* pin memcg so that @s doesn't get destroyed in the middle */
- css_get(&s->memcg_params.memcg->css);
-
- s->memcg_params.deact_fn = deact_fn;
- call_rcu_sched(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn);
-unlock:
- spin_unlock_irq(&memcg_kmem_wq_lock);
-}
-
-void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
-{
- int idx;
- struct memcg_cache_array *arr;
- struct kmem_cache *s, *c;
-
- idx = memcg_cache_id(memcg);
-
- get_online_cpus();
- get_online_mems();
-
- mutex_lock(&slab_mutex);
- list_for_each_entry(s, &slab_root_caches, root_caches_node) {
- arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
- lockdep_is_held(&slab_mutex));
- c = arr->entries[idx];
- if (!c)
- continue;
-
- __kmemcg_cache_deactivate(c);
- arr->entries[idx] = NULL;
- }
- mutex_unlock(&slab_mutex);
-
- put_online_mems();
- put_online_cpus();
-}
-
-void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
-{
- struct kmem_cache *s, *s2;
-
- get_online_cpus();
- get_online_mems();
-
- mutex_lock(&slab_mutex);
- list_for_each_entry_safe(s, s2, &memcg->kmem_caches,
- memcg_params.kmem_caches_node) {
- /*
- * The cgroup is about to be freed and therefore has no charges
- * left. Hence, all its caches must be empty by now.
- */
- BUG_ON(shutdown_cache(s));
- }
- mutex_unlock(&slab_mutex);
-
- put_online_mems();
- put_online_cpus();
-}
-
-static int shutdown_memcg_caches(struct kmem_cache *s)
-{
- struct memcg_cache_array *arr;
- struct kmem_cache *c, *c2;
- LIST_HEAD(busy);
- int i;
-
- BUG_ON(!is_root_cache(s));
-
- /*
- * First, shutdown active caches, i.e. caches that belong to online
- * memory cgroups.
- */
- arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
- lockdep_is_held(&slab_mutex));
- for_each_memcg_cache_index(i) {
- c = arr->entries[i];
- if (!c)
- continue;
- if (shutdown_cache(c))
- /*
- * The cache still has objects. Move it to a temporary
- * list so as not to try to destroy it for a second
- * time while iterating over inactive caches below.
- */
- list_move(&c->memcg_params.children_node, &busy);
- else
- /*
- * The cache is empty and will be destroyed soon. Clear
- * the pointer to it in the memcg_caches array so that
- * it will never be accessed even if the root cache
- * stays alive.
- */
- arr->entries[i] = NULL;
- }
-
- /*
- * Second, shutdown all caches left from memory cgroups that are now
- * offline.
- */
- list_for_each_entry_safe(c, c2, &s->memcg_params.children,
- memcg_params.children_node)
- shutdown_cache(c);
-
- list_splice(&busy, &s->memcg_params.children);
-
- /*
- * A cache being destroyed must be empty. In particular, this means
- * that all per memcg caches attached to it must be empty too.
- */
- if (!list_empty(&s->memcg_params.children))
- return -EBUSY;
- return 0;
-}
-
-static void memcg_set_kmem_cache_dying(struct kmem_cache *s)
-{
- spin_lock_irq(&memcg_kmem_wq_lock);
- s->memcg_params.dying = true;
- spin_unlock_irq(&memcg_kmem_wq_lock);
-}
-
-static void flush_memcg_workqueue(struct kmem_cache *s)
-{
- /*
- * SLUB deactivates the kmem_caches through call_rcu_sched. Make
- * sure all registered rcu callbacks have been invoked.
- */
- if (IS_ENABLED(CONFIG_SLUB))
- rcu_barrier_sched();
-
- /*
- * SLAB and SLUB create memcg kmem_caches through workqueue and SLUB
- * deactivates the memcg kmem_caches through workqueue. Make sure all
- * previous workitems on workqueue are processed.
- */
- if (likely(memcg_kmem_cache_wq))
- flush_workqueue(memcg_kmem_cache_wq);
-}
-#else
-static inline int shutdown_memcg_caches(struct kmem_cache *s)
-{
- return 0;
-}
-#endif /* CONFIG_MEMCG_KMEM */
-
void slab_kmem_cache_release(struct kmem_cache *s)
{
__kmem_cache_release(s);
- destroy_memcg_params(s);
kfree_const(s->name);
kmem_cache_free(kmem_cache, s);
}
@@ -896,36 +509,7 @@
if (s->refcount)
goto out_unlock;
-#ifdef CONFIG_MEMCG_KMEM
- memcg_set_kmem_cache_dying(s);
-
- mutex_unlock(&slab_mutex);
-
- put_online_mems();
- put_online_cpus();
-
- flush_memcg_workqueue(s);
-
- get_online_cpus();
- get_online_mems();
-
- mutex_lock(&slab_mutex);
-
- /*
- * Another thread referenced it again
- */
- if (READ_ONCE(s->refcount)) {
- spin_lock_irq(&memcg_kmem_wq_lock);
- s->memcg_params.dying = false;
- spin_unlock_irq(&memcg_kmem_wq_lock);
- goto out_unlock;
- }
-#endif
-
- err = shutdown_memcg_caches(s);
- if (!err)
- err = shutdown_cache(s);
-
+ err = shutdown_cache(s);
if (err) {
pr_err("kmem_cache_destroy %s: Slab cache still has objects\n",
s->name);
@@ -945,6 +529,8 @@
*
* Releases as many slabs as possible for a cache.
* To help debugging, a zero exit status indicates all slabs were released.
+ *
+ * Return: %0 if all slabs were released, non-zero otherwise
*/
int kmem_cache_shrink(struct kmem_cache *cachep)
{
@@ -972,14 +558,21 @@
unsigned int useroffset, unsigned int usersize)
{
int err;
+ unsigned int align = ARCH_KMALLOC_MINALIGN;
s->name = name;
s->size = s->object_size = size;
- s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
+
+ /*
+ * For power of two sizes, guarantee natural alignment for kmalloc
+ * caches, regardless of SL*B debugging options.
+ */
+ if (is_power_of_2(size))
+ align = max(align, size);
+ s->align = calculate_alignment(flags, align, size);
+
s->useroffset = useroffset;
s->usersize = usersize;
-
- slab_init_memcg_params(s);
err = __kmem_cache_create(s, flags);
@@ -1000,14 +593,15 @@
panic("Out of memory when creating slab %s\n", name);
create_boot_cache(s, name, size, flags, useroffset, usersize);
+ kasan_cache_create_kmalloc(s);
list_add(&s->list, &slab_caches);
- memcg_link_cache(s);
s->refcount = 1;
return s;
}
struct kmem_cache *
-kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
+kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init =
+{ /* initialization for https://bugs.llvm.org/show_bug.cgi?id=42570 */ };
EXPORT_SYMBOL(kmalloc_caches);
/*
@@ -1055,6 +649,7 @@
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
{
unsigned int index;
+ struct kmem_cache *s = NULL;
if (size <= 192) {
if (!size)
@@ -1062,15 +657,34 @@
index = size_index[size_index_elem(size)];
} else {
- if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
- WARN_ON(1);
+ if (WARN_ON_ONCE(size > KMALLOC_MAX_CACHE_SIZE))
return NULL;
- }
index = fls(size - 1);
}
+ trace_android_vh_kmalloc_slab(index, flags, &s);
+ if (s)
+ return s;
+
return kmalloc_caches[kmalloc_type(flags)][index];
}
+
+#ifdef CONFIG_ZONE_DMA
+#define INIT_KMALLOC_INFO(__size, __short_size) \
+{ \
+ .name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \
+ .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size, \
+ .name[KMALLOC_DMA] = "dma-kmalloc-" #__short_size, \
+ .size = __size, \
+}
+#else
+#define INIT_KMALLOC_INFO(__size, __short_size) \
+{ \
+ .name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \
+ .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size, \
+ .size = __size, \
+}
+#endif
/*
* kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
@@ -1078,20 +692,33 @@
* kmalloc-67108864.
*/
const struct kmalloc_info_struct kmalloc_info[] __initconst = {
- {NULL, 0}, {"kmalloc-96", 96},
- {"kmalloc-192", 192}, {"kmalloc-8", 8},
- {"kmalloc-16", 16}, {"kmalloc-32", 32},
- {"kmalloc-64", 64}, {"kmalloc-128", 128},
- {"kmalloc-256", 256}, {"kmalloc-512", 512},
- {"kmalloc-1k", 1024}, {"kmalloc-2k", 2048},
- {"kmalloc-4k", 4096}, {"kmalloc-8k", 8192},
- {"kmalloc-16k", 16384}, {"kmalloc-32k", 32768},
- {"kmalloc-64k", 65536}, {"kmalloc-128k", 131072},
- {"kmalloc-256k", 262144}, {"kmalloc-512k", 524288},
- {"kmalloc-1M", 1048576}, {"kmalloc-2M", 2097152},
- {"kmalloc-4M", 4194304}, {"kmalloc-8M", 8388608},
- {"kmalloc-16M", 16777216}, {"kmalloc-32M", 33554432},
- {"kmalloc-64M", 67108864}
+ INIT_KMALLOC_INFO(0, 0),
+ INIT_KMALLOC_INFO(96, 96),
+ INIT_KMALLOC_INFO(192, 192),
+ INIT_KMALLOC_INFO(8, 8),
+ INIT_KMALLOC_INFO(16, 16),
+ INIT_KMALLOC_INFO(32, 32),
+ INIT_KMALLOC_INFO(64, 64),
+ INIT_KMALLOC_INFO(128, 128),
+ INIT_KMALLOC_INFO(256, 256),
+ INIT_KMALLOC_INFO(512, 512),
+ INIT_KMALLOC_INFO(1024, 1k),
+ INIT_KMALLOC_INFO(2048, 2k),
+ INIT_KMALLOC_INFO(4096, 4k),
+ INIT_KMALLOC_INFO(8192, 8k),
+ INIT_KMALLOC_INFO(16384, 16k),
+ INIT_KMALLOC_INFO(32768, 32k),
+ INIT_KMALLOC_INFO(65536, 64k),
+ INIT_KMALLOC_INFO(131072, 128k),
+ INIT_KMALLOC_INFO(262144, 256k),
+ INIT_KMALLOC_INFO(524288, 512k),
+ INIT_KMALLOC_INFO(1048576, 1M),
+ INIT_KMALLOC_INFO(2097152, 2M),
+ INIT_KMALLOC_INFO(4194304, 4M),
+ INIT_KMALLOC_INFO(8388608, 8M),
+ INIT_KMALLOC_INFO(16777216, 16M),
+ INIT_KMALLOC_INFO(33554432, 32M),
+ INIT_KMALLOC_INFO(67108864, 64M)
};
/*
@@ -1141,36 +768,14 @@
}
}
-static const char *
-kmalloc_cache_name(const char *prefix, unsigned int size)
-{
-
- static const char units[3] = "\0kM";
- int idx = 0;
-
- while (size >= 1024 && (size % 1024 == 0)) {
- size /= 1024;
- idx++;
- }
-
- return kasprintf(GFP_NOWAIT, "%s-%u%c", prefix, size, units[idx]);
-}
-
static void __init
-new_kmalloc_cache(int idx, int type, slab_flags_t flags)
+new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
{
- const char *name;
-
- if (type == KMALLOC_RECLAIM) {
+ if (type == KMALLOC_RECLAIM)
flags |= SLAB_RECLAIM_ACCOUNT;
- name = kmalloc_cache_name("kmalloc-rcl",
- kmalloc_info[idx].size);
- BUG_ON(!name);
- } else {
- name = kmalloc_info[idx].name;
- }
- kmalloc_caches[type][idx] = create_kmalloc_cache(name,
+ kmalloc_caches[type][idx] = create_kmalloc_cache(
+ kmalloc_info[idx].name[type],
kmalloc_info[idx].size, flags, 0,
kmalloc_info[idx].size);
}
@@ -1182,7 +787,8 @@
*/
void __init create_kmalloc_caches(slab_flags_t flags)
{
- int i, type;
+ int i;
+ enum kmalloc_cache_type type;
for (type = KMALLOC_NORMAL; type <= KMALLOC_RECLAIM; type++) {
for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
@@ -1211,17 +817,28 @@
struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i];
if (s) {
- unsigned int size = kmalloc_size(i);
- const char *n = kmalloc_cache_name("dma-kmalloc", size);
-
- BUG_ON(!n);
kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache(
- n, size, SLAB_CACHE_DMA | flags, 0, 0);
+ kmalloc_info[i].name[KMALLOC_DMA],
+ kmalloc_info[i].size,
+ SLAB_CACHE_DMA | flags, 0,
+ kmalloc_info[i].size);
}
}
#endif
}
#endif /* !CONFIG_SLOB */
+
+gfp_t kmalloc_fix_flags(gfp_t flags)
+{
+ gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
+
+ flags &= ~GFP_SLAB_BUG_MASK;
+ pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
+ invalid_mask, &invalid_mask, flags, &flags);
+ dump_stack();
+
+ return flags;
+}
/*
* To avoid unnecessary overhead, we pass through large allocation requests
@@ -1230,13 +847,21 @@
*/
void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
{
- void *ret;
+ void *ret = NULL;
struct page *page;
+
+ if (unlikely(flags & GFP_SLAB_BUG_MASK))
+ flags = kmalloc_fix_flags(flags);
flags |= __GFP_COMP;
page = alloc_pages(flags, order);
- ret = page ? page_address(page) : NULL;
+ if (likely(page)) {
+ ret = page_address(page);
+ mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
+ PAGE_SIZE << order);
+ }
ret = kasan_kmalloc_large(ret, size, flags);
+ /* As ret might get tagged, call kmemleak hook after KASAN. */
kmemleak_alloc(ret, size, 1, flags);
return ret;
}
@@ -1330,38 +955,17 @@
void *slab_start(struct seq_file *m, loff_t *pos)
{
mutex_lock(&slab_mutex);
- return seq_list_start(&slab_root_caches, *pos);
+ return seq_list_start(&slab_caches, *pos);
}
void *slab_next(struct seq_file *m, void *p, loff_t *pos)
{
- return seq_list_next(p, &slab_root_caches, pos);
+ return seq_list_next(p, &slab_caches, pos);
}
void slab_stop(struct seq_file *m, void *p)
{
mutex_unlock(&slab_mutex);
-}
-
-static void
-memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
-{
- struct kmem_cache *c;
- struct slabinfo sinfo;
-
- if (!is_root_cache(s))
- return;
-
- for_each_memcg_cache(c, s) {
- memset(&sinfo, 0, sizeof(sinfo));
- get_slabinfo(c, &sinfo);
-
- info->active_slabs += sinfo.active_slabs;
- info->num_slabs += sinfo.num_slabs;
- info->shared_avail += sinfo.shared_avail;
- info->active_objs += sinfo.active_objs;
- info->num_objs += sinfo.num_objs;
- }
}
static void cache_show(struct kmem_cache *s, struct seq_file *m)
@@ -1371,10 +975,8 @@
memset(&sinfo, 0, sizeof(sinfo));
get_slabinfo(s, &sinfo);
- memcg_accumulate_slabinfo(s, &sinfo);
-
seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
- cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
+ s->name, sinfo.active_objs, sinfo.num_objs, s->size,
sinfo.objects_per_slab, (1 << sinfo.cache_order));
seq_printf(m, " : tunables %4u %4u %4u",
@@ -1387,9 +989,9 @@
static int slab_show(struct seq_file *m, void *p)
{
- struct kmem_cache *s = list_entry(p, struct kmem_cache, root_caches_node);
+ struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
- if (p == slab_root_caches.next)
+ if (p == slab_caches.next)
print_slabinfo_header(m);
cache_show(s, m);
return 0;
@@ -1416,49 +1018,26 @@
pr_info("Name Used Total\n");
list_for_each_entry_safe(s, s2, &slab_caches, list) {
- if (!is_root_cache(s) || (s->flags & SLAB_RECLAIM_ACCOUNT))
+ if (s->flags & SLAB_RECLAIM_ACCOUNT)
continue;
get_slabinfo(s, &sinfo);
if (sinfo.num_objs > 0)
- pr_info("%-17s %10luKB %10luKB\n", cache_name(s),
+ pr_info("%-17s %10luKB %10luKB\n", s->name,
(sinfo.active_objs * s->size) / 1024,
(sinfo.num_objs * s->size) / 1024);
}
mutex_unlock(&slab_mutex);
}
-#if defined(CONFIG_MEMCG)
-void *memcg_slab_start(struct seq_file *m, loff_t *pos)
-{
- struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
-
- mutex_lock(&slab_mutex);
- return seq_list_start(&memcg->kmem_caches, *pos);
-}
-
-void *memcg_slab_next(struct seq_file *m, void *p, loff_t *pos)
-{
- struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
-
- return seq_list_next(p, &memcg->kmem_caches, pos);
-}
-
-void memcg_slab_stop(struct seq_file *m, void *p)
-{
- mutex_unlock(&slab_mutex);
-}
-
+#if defined(CONFIG_MEMCG_KMEM)
int memcg_slab_show(struct seq_file *m, void *p)
{
- struct kmem_cache *s = list_entry(p, struct kmem_cache,
- memcg_params.kmem_caches_node);
- struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
-
- if (p == memcg->kmem_caches.next)
- print_slabinfo_header(m);
- cache_show(s, m);
+ /*
+ * Deprecated.
+ * Please, take a look at tools/cgroup/slabinfo.py .
+ */
return 0;
}
#endif
@@ -1488,63 +1067,54 @@
return seq_open(file, &slabinfo_op);
}
-static const struct file_operations proc_slabinfo_operations = {
- .open = slabinfo_open,
- .read = seq_read,
- .write = slabinfo_write,
- .llseek = seq_lseek,
- .release = seq_release,
+static const struct proc_ops slabinfo_proc_ops = {
+ .proc_flags = PROC_ENTRY_PERMANENT,
+ .proc_open = slabinfo_open,
+ .proc_read = seq_read,
+ .proc_write = slabinfo_write,
+ .proc_lseek = seq_lseek,
+ .proc_release = seq_release,
};
static int __init slab_proc_init(void)
{
- proc_create("slabinfo", SLABINFO_RIGHTS, NULL,
- &proc_slabinfo_operations);
+ proc_create("slabinfo", SLABINFO_RIGHTS, NULL, &slabinfo_proc_ops);
return 0;
}
module_init(slab_proc_init);
+
#endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */
static __always_inline void *__do_krealloc(const void *p, size_t new_size,
gfp_t flags)
{
void *ret;
- size_t ks = 0;
+ size_t ks;
- if (p)
- ks = ksize(p);
+ /* Don't use instrumented ksize to allow precise KASAN poisoning. */
+ if (likely(!ZERO_OR_NULL_PTR(p))) {
+ if (!kasan_check_byte(p))
+ return NULL;
+ ks = kfence_ksize(p) ?: __ksize(p);
+ } else
+ ks = 0;
+ /* If the object still fits, repoison it precisely. */
if (ks >= new_size) {
p = kasan_krealloc((void *)p, new_size, flags);
return (void *)p;
}
ret = kmalloc_track_caller(new_size, flags);
- if (ret && p)
- memcpy(ret, p, ks);
+ if (ret && p) {
+ /* Disable KASAN checks as the object's redzone is accessed. */
+ kasan_disable_current();
+ memcpy(ret, kasan_reset_tag(p), ks);
+ kasan_enable_current();
+ }
return ret;
}
-
-/**
- * __krealloc - like krealloc() but don't free @p.
- * @p: object to reallocate memory for.
- * @new_size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
- *
- * This function is like krealloc() except it never frees the originally
- * allocated buffer. Use this if you don't want to free the buffer immediately
- * like, for example, with RCU.
- */
-void *__krealloc(const void *p, size_t new_size, gfp_t flags)
-{
- if (unlikely(!new_size))
- return ZERO_SIZE_PTR;
-
- return __do_krealloc(p, new_size, flags);
-
-}
-EXPORT_SYMBOL(__krealloc);
/**
* krealloc - reallocate memory. The contents will remain unchanged.
@@ -1556,6 +1126,8 @@
* lesser of the new and old sizes. If @p is %NULL, krealloc()
* behaves exactly like kmalloc(). If @new_size is 0 and @p is not a
* %NULL pointer, the object pointed to is freed.
+ *
+ * Return: pointer to the allocated memory or %NULL in case of error
*/
void *krealloc(const void *p, size_t new_size, gfp_t flags)
{
@@ -1575,28 +1147,73 @@
EXPORT_SYMBOL(krealloc);
/**
- * kzfree - like kfree but zero memory
+ * kfree_sensitive - Clear sensitive information in memory before freeing
* @p: object to free memory of
*
* The memory of the object @p points to is zeroed before freed.
- * If @p is %NULL, kzfree() does nothing.
+ * If @p is %NULL, kfree_sensitive() does nothing.
*
* Note: this function zeroes the whole allocated buffer which can be a good
* deal bigger than the requested buffer size passed to kmalloc(). So be
* careful when using this function in performance sensitive code.
*/
-void kzfree(const void *p)
+void kfree_sensitive(const void *p)
{
size_t ks;
void *mem = (void *)p;
- if (unlikely(ZERO_OR_NULL_PTR(mem)))
- return;
ks = ksize(mem);
- memzero_explicit(mem, ks);
+ if (ks)
+ memzero_explicit(mem, ks);
kfree(mem);
}
-EXPORT_SYMBOL(kzfree);
+EXPORT_SYMBOL(kfree_sensitive);
+
+/**
+ * ksize - get the actual amount of memory allocated for a given object
+ * @objp: Pointer to the object
+ *
+ * kmalloc may internally round up allocations and return more memory
+ * than requested. ksize() can be used to determine the actual amount of
+ * memory allocated. The caller may use this additional memory, even though
+ * a smaller amount of memory was initially specified with the kmalloc call.
+ * The caller must guarantee that objp points to a valid object previously
+ * allocated with either kmalloc() or kmem_cache_alloc(). The object
+ * must not be freed during the duration of the call.
+ *
+ * Return: size of the actual memory used by @objp in bytes
+ */
+size_t ksize(const void *objp)
+{
+ size_t size;
+
+ /*
+ * We need to first check that the pointer to the object is valid, and
+ * only then unpoison the memory. The report printed from ksize() is
+ * more useful, then when it's printed later when the behaviour could
+ * be undefined due to a potential use-after-free or double-free.
+ *
+ * We use kasan_check_byte(), which is supported for the hardware
+ * tag-based KASAN mode, unlike kasan_check_read/write().
+ *
+ * If the pointed to memory is invalid, we return 0 to avoid users of
+ * ksize() writing to and potentially corrupting the memory region.
+ *
+ * We want to perform the check before __ksize(), to avoid potentially
+ * crashing in __ksize() due to accessing invalid metadata.
+ */
+ if (unlikely(ZERO_OR_NULL_PTR(objp)) || !kasan_check_byte(objp))
+ return 0;
+
+ size = kfence_ksize(objp) ?: __ksize(objp);
+ /*
+ * We assume that ksize callers could use whole allocated area,
+ * so we need to unpoison this area.
+ */
+ kasan_unpoison_range(objp, size);
+ return size;
+}
+EXPORT_SYMBOL(ksize);
/* Tracepoints definitions. */
EXPORT_TRACEPOINT_SYMBOL(kmalloc);
--
Gitblit v1.6.2