From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/mm/slab.c | 614 +++++++++++++++---------------------------------------- 1 files changed, 174 insertions(+), 440 deletions(-) diff --git a/kernel/mm/slab.c b/kernel/mm/slab.c index 4e2be12..aa4ef18 100644 --- a/kernel/mm/slab.c +++ b/kernel/mm/slab.c @@ -100,6 +100,7 @@ #include <linux/seq_file.h> #include <linux/notifier.h> #include <linux/kallsyms.h> +#include <linux/kfence.h> #include <linux/cpu.h> #include <linux/sysctl.h> #include <linux/module.h> @@ -362,29 +363,6 @@ #endif -#ifdef CONFIG_DEBUG_SLAB_LEAK - -static inline bool is_store_user_clean(struct kmem_cache *cachep) -{ - return atomic_read(&cachep->store_user_clean) == 1; -} - -static inline void set_store_user_clean(struct kmem_cache *cachep) -{ - atomic_set(&cachep->store_user_clean, 1); -} - -static inline void set_store_user_dirty(struct kmem_cache *cachep) -{ - if (is_store_user_clean(cachep)) - atomic_set(&cachep->store_user_clean, 0); -} - -#else -static inline void set_store_user_dirty(struct kmem_cache *cachep) {} - -#endif - /* * Do not go above this order unless 0 objects fit into the slab or * overridden on the command line. @@ -393,12 +371,6 @@ #define SLAB_MAX_ORDER_LO 0 static int slab_max_order = SLAB_MAX_ORDER_LO; static bool slab_max_order_set __initdata; - -static inline struct kmem_cache *virt_to_cache(const void *obj) -{ - struct page *page = virt_to_head_page(obj); - return page->slab_cache; -} static inline void *index_to_obj(struct kmem_cache *cache, struct page *page, unsigned int idx) @@ -617,6 +589,16 @@ return nr; } +/* &alien->lock must be held by alien callers. */ +static __always_inline void __free_one(struct array_cache *ac, void *objp) +{ + /* Avoid trivial double-free. */ + if (IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) && + WARN_ON_ONCE(ac->avail > 0 && ac->entry[ac->avail - 1] == objp)) + return; + ac->entry[ac->avail++] = objp; +} + #ifndef CONFIG_NUMA #define drain_alien_cache(cachep, alien) do { } while (0) @@ -677,12 +659,11 @@ static struct alien_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) { struct alien_cache **alc_ptr; - size_t memsize = sizeof(void *) * nr_node_ids; int i; if (limit > 1) limit = 12; - alc_ptr = kzalloc_node(memsize, gfp, node); + alc_ptr = kcalloc_node(nr_node_ids, sizeof(void *), gfp, node); if (!alc_ptr) return NULL; @@ -797,7 +778,7 @@ STATS_INC_ACOVERFLOW(cachep); __drain_alien_cache(cachep, ac, page_node, &list); } - ac->entry[ac->avail++] = objp; + __free_one(ac, objp); spin_unlock(&alien->lock); slabs_destroy(cachep, &list); } else { @@ -952,10 +933,10 @@ * To protect lockless access to n->shared during irq disabled context. * If n->shared isn't NULL in irq disabled context, accessing to it is * guaranteed to be valid until irq is re-enabled, because it will be - * freed after synchronize_sched(). + * freed after synchronize_rcu(). */ if (old_shared && force_change) - synchronize_sched(); + synchronize_rcu(); fail: kfree(old_shared); @@ -991,10 +972,8 @@ /* cpu is dead; no one can alloc from it. */ nc = per_cpu_ptr(cachep->cpu_cache, cpu); - if (nc) { - free_block(cachep, nc->entry, nc->avail, node, &list); - nc->avail = 0; - } + free_block(cachep, nc->entry, nc->avail, node, &list); + nc->avail = 0; if (!cpumask_empty(mask)) { spin_unlock_irq(&n->list_lock); @@ -1082,9 +1061,9 @@ * offline. * * Even if all the cpus of a node are down, we don't free the - * kmem_list3 of any cache. This to avoid a race between cpu_down, and + * kmem_cache_node of any cache. This to avoid a race between cpu_down, and * a kmalloc allocation from another cpu for memory from the node of - * the cpu going down. The list3 structure is usually allocated from + * the cpu going down. The kmem_cache_node structure is usually allocated from * kmem_cache_create() and gets destroyed at kmem_cache_destroy(). */ int slab_dead_cpu(unsigned int cpu) @@ -1238,7 +1217,7 @@ * page orders on machines with more than 32MB of memory if * not overridden on the command line. */ - if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT) + if (!slab_max_order_set && totalram_pages() > (32 << 20) >> PAGE_SHIFT) slab_max_order = SLAB_MAX_ORDER_HI; /* Bootstrap is tricky, because several objects are allocated @@ -1271,7 +1250,6 @@ nr_node_ids * sizeof(struct kmem_cache_node *), SLAB_HWCACHE_ALIGN, 0, 0); list_add(&kmem_cache->list, &slab_caches); - memcg_link_cache(kmem_cache); slab_state = PARTIAL; /* @@ -1279,9 +1257,10 @@ * structures first. Without this, further allocations will bug. */ kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE] = create_kmalloc_cache( - kmalloc_info[INDEX_NODE].name, - kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS, - 0, kmalloc_size(INDEX_NODE)); + kmalloc_info[INDEX_NODE].name[KMALLOC_NORMAL], + kmalloc_info[INDEX_NODE].size, + ARCH_KMALLOC_FLAGS, 0, + kmalloc_info[INDEX_NODE].size); slab_state = PARTIAL_NODE; setup_kmalloc_cache_index_table(); @@ -1392,7 +1371,6 @@ int nodeid) { struct page *page; - int nr_pages; flags |= cachep->allocflags; @@ -1402,17 +1380,7 @@ return NULL; } - if (memcg_charge_slab(page, flags, cachep->gfporder, cachep)) { - __free_pages(page, cachep->gfporder); - return NULL; - } - - nr_pages = (1 << cachep->gfporder); - if (cachep->flags & SLAB_RECLAIM_ACCOUNT) - mod_lruvec_page_state(page, NR_SLAB_RECLAIMABLE, nr_pages); - else - mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE, nr_pages); - + account_slab_page(page, cachep->gfporder, cachep); __SetPageSlab(page); /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */ if (sk_memalloc_socks() && page_is_pfmemalloc(page)) @@ -1427,12 +1395,6 @@ static void kmem_freepages(struct kmem_cache *cachep, struct page *page) { int order = cachep->gfporder; - unsigned long nr_freed = (1 << order); - - if (cachep->flags & SLAB_RECLAIM_ACCOUNT) - mod_lruvec_page_state(page, NR_SLAB_RECLAIMABLE, -nr_freed); - else - mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE, -nr_freed); BUG_ON(!PageSlab(page)); __ClearPageSlabPfmemalloc(page); @@ -1441,8 +1403,8 @@ page->mapping = NULL; if (current->reclaim_state) - current->reclaim_state->reclaimed_slab += nr_freed; - memcg_uncharge_slab(page, order, cachep); + current->reclaim_state->reclaimed_slab += 1 << order; + unaccount_slab_page(page, order, cachep); __free_pages(page, order); } @@ -1460,7 +1422,7 @@ #if DEBUG static bool is_debug_pagealloc_cache(struct kmem_cache *cachep) { - if (debug_pagealloc_enabled() && OFF_SLAB(cachep) && + if (debug_pagealloc_enabled_static() && OFF_SLAB(cachep) && (cachep->size % PAGE_SIZE) == 0) return true; @@ -1468,53 +1430,17 @@ } #ifdef CONFIG_DEBUG_PAGEALLOC -static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, - unsigned long caller) -{ - int size = cachep->object_size; - - addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)]; - - if (size < 5 * sizeof(unsigned long)) - return; - - *addr++ = 0x12345678; - *addr++ = caller; - *addr++ = smp_processor_id(); - size -= 3 * sizeof(unsigned long); - { - unsigned long *sptr = &caller; - unsigned long svalue; - - while (!kstack_end(sptr)) { - svalue = *sptr++; - if (kernel_text_address(svalue)) { - *addr++ = svalue; - size -= sizeof(unsigned long); - if (size <= sizeof(unsigned long)) - break; - } - } - - } - *addr++ = 0x87654321; -} - -static void slab_kernel_map(struct kmem_cache *cachep, void *objp, - int map, unsigned long caller) +static void slab_kernel_map(struct kmem_cache *cachep, void *objp, int map) { if (!is_debug_pagealloc_cache(cachep)) return; - if (caller) - store_stackinfo(cachep, objp, caller); - - kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map); + __kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map); } #else static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp, - int map, unsigned long caller) {} + int map) {} #endif @@ -1662,7 +1588,7 @@ if (cachep->flags & SLAB_POISON) { check_poison_obj(cachep, objp); - slab_kernel_map(cachep, objp, 1, 0); + slab_kernel_map(cachep, objp, 1); } if (cachep->flags & SLAB_RED_ZONE) { if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) @@ -1707,12 +1633,16 @@ kmem_cache_free(cachep->freelist_cache, freelist); } +/* + * Update the size of the caches before calling slabs_destroy as it may + * recursively call kfree. + */ static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list) { struct page *page, *n; - list_for_each_entry_safe(page, n, list, lru) { - list_del(&page->lru); + list_for_each_entry_safe(page, n, list, slab_list) { + list_del(&page->slab_list); slab_destroy(cachep, page); } } @@ -1728,6 +1658,8 @@ * This could be made much more intelligent. For now, try to avoid using * high order pages for slabs. When the gfp() functions are more friendly * towards high-order requests, this should be changed. + * + * Return: number of left-over bytes in a slab */ static size_t calculate_slab_order(struct kmem_cache *cachep, size_t size, slab_flags_t flags) @@ -1858,8 +1790,7 @@ } slab_flags_t kmem_cache_flags(unsigned int object_size, - slab_flags_t flags, const char *name, - void (*ctor)(void *)) + slab_flags_t flags, const char *name) { return flags; } @@ -1984,6 +1915,8 @@ * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware * cacheline. This can be beneficial if you're counting cycles as closely * as davem. + * + * Return: a pointer to the created cache or %NULL in case of error */ int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags) { @@ -2084,7 +2017,7 @@ * to check size >= 256. It guarantees that all necessary small * sized slab is initialized in current slab initialization sequence. */ - if (debug_pagealloc_enabled() && (flags & SLAB_POISON) && + if (debug_pagealloc_enabled_static() && (flags & SLAB_POISON) && size >= 256 && cachep->object_size > cache_line_size()) { if (size < PAGE_SIZE || size % PAGE_SIZE == 0) { size_t tmp_size = ALIGN(size, PAGE_SIZE); @@ -2224,8 +2157,8 @@ spin_lock(&n->list_lock); free_block(cachep, ac->entry, ac->avail, node, &list); spin_unlock(&n->list_lock); - slabs_destroy(cachep, &list); ac->avail = 0; + slabs_destroy(cachep, &list); } static void drain_cpu_caches(struct kmem_cache *cachep) @@ -2272,8 +2205,8 @@ goto out; } - page = list_entry(p, struct page, lru); - list_del(&page->lru); + page = list_entry(p, struct page, slab_list); + list_del(&page->slab_list); n->free_slabs--; n->total_slabs--; /* @@ -2318,13 +2251,6 @@ } return (ret ? 1 : 0); } - -#ifdef CONFIG_MEMCG -void __kmemcg_cache_deactivate(struct kmem_cache *cachep) -{ - __kmem_cache_shrink(cachep); -} -#endif int __kmem_cache_shutdown(struct kmem_cache *cachep) { @@ -2379,8 +2305,6 @@ /* Slab management obj is off-slab. */ freelist = kmem_cache_alloc_node(cachep->freelist_cache, local_flags, nodeid); - if (!freelist) - return NULL; } else { /* We will use last bytes at the slab for freelist */ freelist = addr + (PAGE_SIZE << cachep->gfporder) - @@ -2438,7 +2362,7 @@ /* need to poison the objs? */ if (cachep->flags & SLAB_POISON) { poison_obj(cachep, objp, POISON_FREE); - slab_kernel_map(cachep, objp, 0, 0); + slab_kernel_map(cachep, objp, 0); } } #endif @@ -2595,11 +2519,6 @@ objp = index_to_obj(cachep, page, get_free_obj(page, page->active)); page->active++; -#if DEBUG - if (cachep->flags & SLAB_STORE_USER) - set_store_user_dirty(cachep); -#endif - return objp; } @@ -2656,13 +2575,9 @@ * Be lazy and only check for valid flags here, keeping it out of the * critical path in kmem_cache_alloc(). */ - if (unlikely(flags & GFP_SLAB_BUG_MASK)) { - gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK; - flags &= ~GFP_SLAB_BUG_MASK; - pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n", - invalid_mask, &invalid_mask, flags, &flags); - dump_stack(); - } + if (unlikely(flags & GFP_SLAB_BUG_MASK)) + flags = kmalloc_fix_flags(flags); + WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO)); local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); @@ -2732,13 +2647,13 @@ if (!page) return; - INIT_LIST_HEAD(&page->lru); + INIT_LIST_HEAD(&page->slab_list); n = get_node(cachep, page_to_nid(page)); spin_lock(&n->list_lock); n->total_slabs++; if (!page->active) { - list_add_tail(&page->lru, &(n->slabs_free)); + list_add_tail(&page->slab_list, &n->slabs_free); n->free_slabs++; } else fixup_slab_list(cachep, n, page, &list); @@ -2805,10 +2720,8 @@ *dbg_redzone1(cachep, objp) = RED_INACTIVE; *dbg_redzone2(cachep, objp) = RED_INACTIVE; } - if (cachep->flags & SLAB_STORE_USER) { - set_store_user_dirty(cachep); + if (cachep->flags & SLAB_STORE_USER) *dbg_userword(cachep, objp) = (void *)caller; - } objnr = obj_to_index(cachep, page, objp); @@ -2817,7 +2730,7 @@ if (cachep->flags & SLAB_POISON) { poison_obj(cachep, objp, POISON_FREE); - slab_kernel_map(cachep, objp, 0, caller); + slab_kernel_map(cachep, objp, 0); } return objp; } @@ -2847,9 +2760,9 @@ void **list) { /* move slabp to correct slabp list: */ - list_del(&page->lru); + list_del(&page->slab_list); if (page->active == cachep->num) { - list_add(&page->lru, &n->slabs_full); + list_add(&page->slab_list, &n->slabs_full); if (OBJFREELIST_SLAB(cachep)) { #if DEBUG /* Poisoning will be done without holding the lock */ @@ -2863,7 +2776,7 @@ page->freelist = NULL; } } else - list_add(&page->lru, &n->slabs_partial); + list_add(&page->slab_list, &n->slabs_partial); } /* Try to find non-pfmemalloc slab if needed */ @@ -2886,20 +2799,20 @@ } /* Move pfmemalloc slab to the end of list to speed up next search */ - list_del(&page->lru); + list_del(&page->slab_list); if (!page->active) { - list_add_tail(&page->lru, &n->slabs_free); + list_add_tail(&page->slab_list, &n->slabs_free); n->free_slabs++; } else - list_add_tail(&page->lru, &n->slabs_partial); + list_add_tail(&page->slab_list, &n->slabs_partial); - list_for_each_entry(page, &n->slabs_partial, lru) { + list_for_each_entry(page, &n->slabs_partial, slab_list) { if (!PageSlabPfmemalloc(page)) return page; } n->free_touched = 1; - list_for_each_entry(page, &n->slabs_free, lru) { + list_for_each_entry(page, &n->slabs_free, slab_list) { if (!PageSlabPfmemalloc(page)) { n->free_slabs--; return page; @@ -2914,11 +2827,12 @@ struct page *page; assert_spin_locked(&n->list_lock); - page = list_first_entry_or_null(&n->slabs_partial, struct page, lru); + page = list_first_entry_or_null(&n->slabs_partial, struct page, + slab_list); if (!page) { n->free_touched = 1; page = list_first_entry_or_null(&n->slabs_free, struct page, - lru); + slab_list); if (page) n->free_slabs--; } @@ -3081,7 +2995,7 @@ return objp; if (cachep->flags & SLAB_POISON) { check_poison_obj(cachep, objp); - slab_kernel_map(cachep, objp, 1, 0); + slab_kernel_map(cachep, objp, 1); poison_obj(cachep, objp, POISON_INUSE); } if (cachep->flags & SLAB_STORE_USER) @@ -3102,10 +3016,9 @@ objp += obj_offset(cachep); if (cachep->ctor && cachep->flags & SLAB_POISON) cachep->ctor(objp); - if (ARCH_SLAB_MINALIGN && - ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) { - pr_err("0x%px: not aligned to ARCH_SLAB_MINALIGN=%d\n", - objp, (int)ARCH_SLAB_MINALIGN); + if ((unsigned long)objp & (arch_slab_minalign() - 1)) { + pr_err("0x%px: not aligned to arch_slab_minalign()=%u\n", objp, + arch_slab_minalign()); } return objp; } @@ -3184,7 +3097,7 @@ struct zonelist *zonelist; struct zoneref *z; struct zone *zone; - enum zone_type high_zoneidx = gfp_zone(flags); + enum zone_type highest_zoneidx = gfp_zone(flags); void *obj = NULL; struct page *page; int nid; @@ -3202,7 +3115,7 @@ * Look through allowed nodes for objects available * from existing per node queues. */ - for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { + for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) { nid = zone_to_nid(zone); if (cpuset_zone_allowed(zone, flags) && @@ -3294,17 +3207,23 @@ } static __always_inline void * -slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, +slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, size_t orig_size, unsigned long caller) { unsigned long save_flags; void *ptr; int slab_node = numa_mem_id(); + struct obj_cgroup *objcg = NULL; + bool init = false; flags &= gfp_allowed_mask; - cachep = slab_pre_alloc_hook(cachep, flags); + cachep = slab_pre_alloc_hook(cachep, &objcg, 1, flags); if (unlikely(!cachep)) return NULL; + + ptr = kfence_alloc(cachep, orig_size, flags); + if (unlikely(ptr)) + goto out_hooks; cache_alloc_debugcheck_before(cachep, flags); local_irq_save(save_flags); @@ -3334,11 +3253,10 @@ out: local_irq_restore(save_flags); ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); + init = slab_want_init_on_alloc(flags, cachep); - if (unlikely(slab_want_init_on_alloc(flags, cachep)) && ptr) - memset(ptr, 0, cachep->object_size); - - slab_post_alloc_hook(cachep, flags, 1, &ptr); +out_hooks: + slab_post_alloc_hook(cachep, objcg, flags, 1, &ptr, init); return ptr; } @@ -3375,15 +3293,21 @@ #endif /* CONFIG_NUMA */ static __always_inline void * -slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller) +slab_alloc(struct kmem_cache *cachep, gfp_t flags, size_t orig_size, unsigned long caller) { unsigned long save_flags; void *objp; + struct obj_cgroup *objcg = NULL; + bool init = false; flags &= gfp_allowed_mask; - cachep = slab_pre_alloc_hook(cachep, flags); + cachep = slab_pre_alloc_hook(cachep, &objcg, 1, flags); if (unlikely(!cachep)) return NULL; + + objp = kfence_alloc(cachep, orig_size, flags); + if (unlikely(objp)) + goto out; cache_alloc_debugcheck_before(cachep, flags); local_irq_save(save_flags); @@ -3391,11 +3315,10 @@ local_irq_restore(save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); prefetchw(objp); + init = slab_want_init_on_alloc(flags, cachep); - if (unlikely(slab_want_init_on_alloc(flags, cachep)) && objp) - memset(objp, 0, cachep->object_size); - - slab_post_alloc_hook(cachep, flags, 1, &objp); +out: + slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init); return objp; } @@ -3419,29 +3342,29 @@ objp = objpp[i]; page = virt_to_head_page(objp); - list_del(&page->lru); + list_del(&page->slab_list); check_spinlock_acquired_node(cachep, node); slab_put_obj(cachep, page, objp); STATS_DEC_ACTIVE(cachep); /* fixup slab chains */ if (page->active == 0) { - list_add(&page->lru, &n->slabs_free); + list_add(&page->slab_list, &n->slabs_free); n->free_slabs++; } else { /* Unconditionally move a slab to the end of the * partial list on free - maximum time for the * other objects to be freed, too. */ - list_add_tail(&page->lru, &n->slabs_partial); + list_add_tail(&page->slab_list, &n->slabs_partial); } } while (n->free_objects > n->free_limit && !list_empty(&n->slabs_free)) { n->free_objects -= cachep->num; - page = list_last_entry(&n->slabs_free, struct page, lru); - list_move(&page->lru, list); + page = list_last_entry(&n->slabs_free, struct page, slab_list); + list_move(&page->slab_list, list); n->free_slabs--; n->total_slabs--; } @@ -3479,7 +3402,7 @@ int i = 0; struct page *page; - list_for_each_entry(page, &n->slabs_free, lru) { + list_for_each_entry(page, &n->slabs_free, slab_list) { BUG_ON(page->active); i++; @@ -3488,9 +3411,9 @@ } #endif spin_unlock(&n->list_lock); - slabs_destroy(cachep, &list); ac->avail -= batchcount; memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); + slabs_destroy(cachep, &list); } /* @@ -3500,9 +3423,31 @@ static __always_inline void __cache_free(struct kmem_cache *cachep, void *objp, unsigned long caller) { - /* Put the object into the quarantine, don't touch it for now. */ - if (kasan_slab_free(cachep, objp, _RET_IP_)) + bool init; + + if (is_kfence_address(objp)) { + kmemleak_free_recursive(objp, cachep->flags); + memcg_slab_free_hook(cachep, &objp, 1); + __kfence_free(objp); return; + } + + /* + * As memory initialization might be integrated into KASAN, + * kasan_slab_free and initialization memset must be + * kept together to avoid discrepancies in behavior. + */ + init = slab_want_init_on_free(cachep); + if (init && !kasan_has_integrated_init()) + memset(objp, 0, cachep->object_size); + /* KASAN might put objp into memory quarantine, delaying its reuse. */ + if (kasan_slab_free(cachep, objp, init)) + return; + + /* Use KCSAN to help debug racy use-after-free. */ + if (!(cachep->flags & SLAB_TYPESAFE_BY_RCU)) + __kcsan_check_access(objp, cachep->object_size, + KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT); ___cache_free(cachep, objp, caller); } @@ -3513,10 +3458,9 @@ struct array_cache *ac = cpu_cache_get(cachep); check_irq_off(); - if (unlikely(slab_want_init_on_free(cachep))) - memset(objp, 0, cachep->object_size); kmemleak_free_recursive(objp, cachep->flags); objp = cache_free_debugcheck(cachep, objp, caller); + memcg_slab_free_hook(cachep, &objp, 1); /* * Skip calling cache_free_alien() when the platform is not numa. @@ -3544,7 +3488,7 @@ } } - ac->entry[ac->avail++] = objp; + __free_one(ac, objp); } /** @@ -3554,10 +3498,12 @@ * * Allocate an object from this cache. The flags are only relevant * if the cache has no available objects. + * + * Return: pointer to the new object or %NULL in case of error */ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) { - void *ret = slab_alloc(cachep, flags, _RET_IP_); + void *ret = slab_alloc(cachep, flags, cachep->object_size, _RET_IP_); trace_kmem_cache_alloc(_RET_IP_, ret, cachep->object_size, cachep->size, flags); @@ -3580,8 +3526,9 @@ void **p) { size_t i; + struct obj_cgroup *objcg = NULL; - s = slab_pre_alloc_hook(s, flags); + s = slab_pre_alloc_hook(s, &objcg, size, flags); if (!s) return 0; @@ -3589,7 +3536,7 @@ local_irq_disable(); for (i = 0; i < size; i++) { - void *objp = __do_cache_alloc(s, flags); + void *objp = kfence_alloc(s, s->object_size, flags) ?: __do_cache_alloc(s, flags); if (unlikely(!objp)) goto error; @@ -3599,18 +3546,18 @@ cache_alloc_debugcheck_after_bulk(s, flags, size, p, _RET_IP_); - /* Clear memory outside IRQ disabled section */ - if (unlikely(slab_want_init_on_alloc(flags, s))) - for (i = 0; i < size; i++) - memset(p[i], 0, s->object_size); - - slab_post_alloc_hook(s, flags, size, p); + /* + * memcg and kmem_cache debug support and memory initialization. + * Done outside of the IRQ disabled section. + */ + slab_post_alloc_hook(s, objcg, flags, size, p, + slab_want_init_on_alloc(flags, s)); /* FIXME: Trace call missing. Christoph would like a bulk variant */ return size; error: local_irq_enable(); cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_); - slab_post_alloc_hook(s, flags, i, p); + slab_post_alloc_hook(s, objcg, flags, i, p, false); __kmem_cache_free_bulk(s, i, p); return 0; } @@ -3622,7 +3569,7 @@ { void *ret; - ret = slab_alloc(cachep, flags, _RET_IP_); + ret = slab_alloc(cachep, flags, size, _RET_IP_); ret = kasan_kmalloc(cachep, ret, size, flags); trace_kmalloc(_RET_IP_, ret, @@ -3643,10 +3590,12 @@ * node, which can improve the performance for cpu bound structures. * * Fallback to other node is possible if __GFP_THISNODE is not set. + * + * Return: pointer to the new object or %NULL in case of error */ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { - void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_); + void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_); trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep->object_size, cachep->size, @@ -3664,7 +3613,7 @@ { void *ret; - ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_); + ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_); ret = kasan_kmalloc(cachep, ret, size, flags); trace_kmalloc_node(_RET_IP_, ret, @@ -3711,6 +3660,8 @@ * @size: how many bytes of memory are required. * @flags: the type of memory to allocate (see kmalloc). * @caller: function caller for debug tracking of the caller + * + * Return: pointer to the allocated memory or %NULL in case of error */ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, unsigned long caller) @@ -3723,7 +3674,7 @@ cachep = kmalloc_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; - ret = slab_alloc(cachep, flags, caller); + ret = slab_alloc(cachep, flags, size, caller); ret = kasan_kmalloc(cachep, ret, size, flags); trace_kmalloc(caller, ret, @@ -3783,6 +3734,8 @@ s = virt_to_cache(objp); else s = cache_from_obj(orig_s, objp); + if (!s) + continue; debug_check_no_locks_freed(objp, s->object_size); if (!(s->flags & SLAB_DEBUG_OBJECTS)) @@ -3817,6 +3770,10 @@ local_irq_save(flags); kfree_debugcheck(objp); c = virt_to_cache(objp); + if (!c) { + local_irq_restore(flags); + return; + } debug_check_no_locks_freed(objp, c->object_size); debug_check_no_obj_freed(objp, c->object_size); @@ -3862,8 +3819,8 @@ } /* Always called with the slab_mutex held */ -static int __do_tune_cpucache(struct kmem_cache *cachep, int limit, - int batchcount, int shared, gfp_t gfp) +static int do_tune_cpucache(struct kmem_cache *cachep, int limit, + int batchcount, int shared, gfp_t gfp) { struct array_cache __percpu *cpu_cache, *prev; int cpu; @@ -3908,29 +3865,6 @@ return setup_kmem_cache_nodes(cachep, gfp); } -static int do_tune_cpucache(struct kmem_cache *cachep, int limit, - int batchcount, int shared, gfp_t gfp) -{ - int ret; - struct kmem_cache *c; - - ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp); - - if (slab_state < FULL) - return ret; - - if ((ret < 0) || !is_root_cache(cachep)) - return ret; - - lockdep_assert_held(&slab_mutex); - for_each_memcg_cache(c, cachep) { - /* return value determined by the root cache only */ - __do_tune_cpucache(c, limit, batchcount, shared, gfp); - } - - return ret; -} - /* Called with slab_mutex held always */ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) { @@ -3942,13 +3876,6 @@ err = cache_random_seq_create(cachep, cachep->num, gfp); if (err) goto end; - - if (!is_root_cache(cachep)) { - struct kmem_cache *root = memcg_root_cache(cachep); - limit = root->limit; - shared = root->shared; - batchcount = root->batchcount; - } if (limit && shared && batchcount) goto skip_setup; @@ -4136,6 +4063,7 @@ sinfo->objects_per_slab = cachep->num; sinfo->cache_order = cachep->gfporder; } +EXPORT_SYMBOL_GPL(get_slabinfo); void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *cachep) { @@ -4176,6 +4104,8 @@ * @buffer: user buffer * @count: data length * @ppos: unused + * + * Return: %0 on success, negative error code otherwise. */ ssize_t slabinfo_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) @@ -4220,200 +4150,6 @@ return res; } -#ifdef CONFIG_DEBUG_SLAB_LEAK - -static inline int add_caller(unsigned long *n, unsigned long v) -{ - unsigned long *p; - int l; - if (!v) - return 1; - l = n[1]; - p = n + 2; - while (l) { - int i = l/2; - unsigned long *q = p + 2 * i; - if (*q == v) { - q[1]++; - return 1; - } - if (*q > v) { - l = i; - } else { - p = q + 2; - l -= i + 1; - } - } - if (++n[1] == n[0]) - return 0; - memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n)); - p[0] = v; - p[1] = 1; - return 1; -} - -static void handle_slab(unsigned long *n, struct kmem_cache *c, - struct page *page) -{ - void *p; - int i, j; - unsigned long v; - - if (n[0] == n[1]) - return; - for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) { - bool active = true; - - for (j = page->active; j < c->num; j++) { - if (get_free_obj(page, j) == i) { - active = false; - break; - } - } - - if (!active) - continue; - - /* - * probe_kernel_read() is used for DEBUG_PAGEALLOC. page table - * mapping is established when actual object allocation and - * we could mistakenly access the unmapped object in the cpu - * cache. - */ - if (probe_kernel_read(&v, dbg_userword(c, p), sizeof(v))) - continue; - - if (!add_caller(n, v)) - return; - } -} - -static void show_symbol(struct seq_file *m, unsigned long address) -{ -#ifdef CONFIG_KALLSYMS - unsigned long offset, size; - char modname[MODULE_NAME_LEN], name[KSYM_NAME_LEN]; - - if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) { - seq_printf(m, "%s+%#lx/%#lx", name, offset, size); - if (modname[0]) - seq_printf(m, " [%s]", modname); - return; - } -#endif - seq_printf(m, "%px", (void *)address); -} - -static int leaks_show(struct seq_file *m, void *p) -{ - struct kmem_cache *cachep = list_entry(p, struct kmem_cache, - root_caches_node); - struct page *page; - struct kmem_cache_node *n; - const char *name; - unsigned long *x = m->private; - int node; - int i; - - if (!(cachep->flags & SLAB_STORE_USER)) - return 0; - if (!(cachep->flags & SLAB_RED_ZONE)) - return 0; - - /* - * Set store_user_clean and start to grab stored user information - * for all objects on this cache. If some alloc/free requests comes - * during the processing, information would be wrong so restart - * whole processing. - */ - do { - drain_cpu_caches(cachep); - /* - * drain_cpu_caches() could make kmemleak_object and - * debug_objects_cache dirty, so reset afterwards. - */ - set_store_user_clean(cachep); - - x[1] = 0; - - for_each_kmem_cache_node(cachep, node, n) { - - check_irq_on(); - spin_lock_irq(&n->list_lock); - - list_for_each_entry(page, &n->slabs_full, lru) - handle_slab(x, cachep, page); - list_for_each_entry(page, &n->slabs_partial, lru) - handle_slab(x, cachep, page); - spin_unlock_irq(&n->list_lock); - } - } while (!is_store_user_clean(cachep)); - - name = cachep->name; - if (x[0] == x[1]) { - /* Increase the buffer size */ - mutex_unlock(&slab_mutex); - m->private = kcalloc(x[0] * 4, sizeof(unsigned long), - GFP_KERNEL); - if (!m->private) { - /* Too bad, we are really out */ - m->private = x; - mutex_lock(&slab_mutex); - return -ENOMEM; - } - *(unsigned long *)m->private = x[0] * 2; - kfree(x); - mutex_lock(&slab_mutex); - /* Now make sure this entry will be retried */ - m->count = m->size; - return 0; - } - for (i = 0; i < x[1]; i++) { - seq_printf(m, "%s: %lu ", name, x[2*i+3]); - show_symbol(m, x[2*i+2]); - seq_putc(m, '\n'); - } - - return 0; -} - -static const struct seq_operations slabstats_op = { - .start = slab_start, - .next = slab_next, - .stop = slab_stop, - .show = leaks_show, -}; - -static int slabstats_open(struct inode *inode, struct file *file) -{ - unsigned long *n; - - n = __seq_open_private(file, &slabstats_op, PAGE_SIZE); - if (!n) - return -ENOMEM; - - *n = PAGE_SIZE / (2 * sizeof(unsigned long)); - - return 0; -} - -static const struct file_operations proc_slabstats_operations = { - .open = slabstats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; -#endif - -static int __init slab_proc_init(void) -{ -#ifdef CONFIG_DEBUG_SLAB_LEAK - proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations); -#endif - return 0; -} -module_init(slab_proc_init); - #ifdef CONFIG_HARDENED_USERCOPY /* * Rejects incorrectly sized objects and objects that are to be copied @@ -4438,7 +4174,10 @@ BUG_ON(objnr >= cachep->num); /* Find offset within object. */ - offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep); + if (is_kfence_address(ptr)) + offset = ptr - kfence_object_start(ptr); + else + offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep); /* Allow address range falling entirely within usercopy region. */ if (offset >= cachep->useroffset && @@ -4464,31 +4203,26 @@ #endif /* CONFIG_HARDENED_USERCOPY */ /** - * ksize - get the actual amount of memory allocated for a given object - * @objp: Pointer to the object + * __ksize -- Uninstrumented ksize. + * @objp: pointer to the object * - * kmalloc may internally round up allocations and return more memory - * than requested. ksize() can be used to determine the actual amount of - * memory allocated. The caller may use this additional memory, even though - * a smaller amount of memory was initially specified with the kmalloc call. - * The caller must guarantee that objp points to a valid object previously - * allocated with either kmalloc() or kmem_cache_alloc(). The object - * must not be freed during the duration of the call. + * Unlike ksize(), __ksize() is uninstrumented, and does not provide the same + * safety checks as ksize() with KASAN instrumentation enabled. + * + * Return: size of the actual memory used by @objp in bytes */ -size_t ksize(const void *objp) +size_t __ksize(const void *objp) { + struct kmem_cache *c; size_t size; BUG_ON(!objp); if (unlikely(objp == ZERO_SIZE_PTR)) return 0; - size = virt_to_cache(objp)->object_size; - /* We assume that ksize callers could use the whole allocated area, - * so we need to unpoison this area. - */ - kasan_unpoison_shadow(objp, size); + c = virt_to_cache(objp); + size = c ? c->object_size : 0; return size; } -EXPORT_SYMBOL(ksize); +EXPORT_SYMBOL(__ksize); -- Gitblit v1.6.2