| .. | .. |
|---|
| 28 | 28 | #include <linux/ctype.h> |
|---|
| 29 | 29 | #include <linux/debugobjects.h> |
|---|
| 30 | 30 | #include <linux/kallsyms.h> |
|---|
| 31 | +#include <linux/kfence.h> |
|---|
| 31 | 32 | #include <linux/memory.h> |
|---|
| 32 | 33 | #include <linux/math64.h> |
|---|
| 33 | 34 | #include <linux/fault-inject.h> |
|---|
| .. | .. |
|---|
| 36 | 37 | #include <linux/memcontrol.h> |
|---|
| 37 | 38 | #include <linux/random.h> |
|---|
| 38 | 39 | |
|---|
| 40 | +#include <linux/debugfs.h> |
|---|
| 39 | 41 | #include <trace/events/kmem.h> |
|---|
| 42 | +#include <trace/hooks/mm.h> |
|---|
| 40 | 43 | |
|---|
| 41 | 44 | #include "internal.h" |
|---|
| 42 | 45 | |
|---|
| .. | .. |
|---|
| 59 | 62 | * D. page->frozen -> frozen state |
|---|
| 60 | 63 | * |
|---|
| 61 | 64 | * If a slab is frozen then it is exempt from list management. It is not |
|---|
| 62 | | - * on any list. The processor that froze the slab is the one who can |
|---|
| 63 | | - * perform list operations on the page. Other processors may put objects |
|---|
| 64 | | - * onto the freelist but the processor that froze the slab is the only |
|---|
| 65 | | - * one that can retrieve the objects from the page's freelist. |
|---|
| 65 | + * on any list except per cpu partial list. The processor that froze the |
|---|
| 66 | + * slab is the one who can perform list operations on the page. Other |
|---|
| 67 | + * processors may put objects onto the freelist but the processor that |
|---|
| 68 | + * froze the slab is the only one that can retrieve the objects from the |
|---|
| 69 | + * page's freelist. |
|---|
| 66 | 70 | * |
|---|
| 67 | 71 | * The list_lock protects the partial and full list on each node and |
|---|
| 68 | 72 | * the partial slab counter. If taken then no new slabs may be added or |
|---|
| .. | .. |
|---|
| 93 | 97 | * minimal so we rely on the page allocators per cpu caches for |
|---|
| 94 | 98 | * fast frees and allocs. |
|---|
| 95 | 99 | * |
|---|
| 96 | | - * Overloading of page flags that are otherwise used for LRU management. |
|---|
| 97 | | - * |
|---|
| 98 | | - * PageActive The slab is frozen and exempt from list processing. |
|---|
| 100 | + * page->frozen The slab is frozen and exempt from list processing. |
|---|
| 99 | 101 | * This means that the slab is dedicated to a purpose |
|---|
| 100 | 102 | * such as satisfying allocations for a specific |
|---|
| 101 | 103 | * processor. Objects may be freed in the slab while |
|---|
| .. | .. |
|---|
| 111 | 113 | * free objects in addition to the regular freelist |
|---|
| 112 | 114 | * that requires the slab lock. |
|---|
| 113 | 115 | * |
|---|
| 114 | | - * PageError Slab requires special handling due to debug |
|---|
| 116 | + * SLAB_DEBUG_FLAGS Slab requires special handling due to debug |
|---|
| 115 | 117 | * options set. This moves slab handling out of |
|---|
| 116 | 118 | * the fast path and disables lockless freelists. |
|---|
| 117 | 119 | */ |
|---|
| 118 | 120 | |
|---|
| 119 | | -static inline int kmem_cache_debug(struct kmem_cache *s) |
|---|
| 120 | | -{ |
|---|
| 121 | 121 | #ifdef CONFIG_SLUB_DEBUG |
|---|
| 122 | | - return unlikely(s->flags & SLAB_DEBUG_FLAGS); |
|---|
| 122 | +#ifdef CONFIG_SLUB_DEBUG_ON |
|---|
| 123 | +DEFINE_STATIC_KEY_TRUE(slub_debug_enabled); |
|---|
| 123 | 124 | #else |
|---|
| 124 | | - return 0; |
|---|
| 125 | +DEFINE_STATIC_KEY_FALSE(slub_debug_enabled); |
|---|
| 125 | 126 | #endif |
|---|
| 127 | +#endif |
|---|
| 128 | + |
|---|
| 129 | +static inline bool kmem_cache_debug(struct kmem_cache *s) |
|---|
| 130 | +{ |
|---|
| 131 | + return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS); |
|---|
| 126 | 132 | } |
|---|
| 127 | 133 | |
|---|
| 128 | 134 | void *fixup_red_left(struct kmem_cache *s, void *p) |
|---|
| 129 | 135 | { |
|---|
| 130 | | - if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) |
|---|
| 136 | + if (kmem_cache_debug_flags(s, SLAB_RED_ZONE)) |
|---|
| 131 | 137 | p += s->red_left_pad; |
|---|
| 132 | 138 | |
|---|
| 133 | 139 | return p; |
|---|
| .. | .. |
|---|
| 197 | 203 | /* Use cmpxchg_double */ |
|---|
| 198 | 204 | #define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U) |
|---|
| 199 | 205 | |
|---|
| 200 | | -/* |
|---|
| 201 | | - * Tracking user of a slab. |
|---|
| 202 | | - */ |
|---|
| 203 | | -#define TRACK_ADDRS_COUNT 16 |
|---|
| 204 | | -struct track { |
|---|
| 205 | | - unsigned long addr; /* Called from address */ |
|---|
| 206 | | -#ifdef CONFIG_STACKTRACE |
|---|
| 207 | | - unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */ |
|---|
| 208 | | -#endif |
|---|
| 209 | | - int cpu; /* Was running on cpu */ |
|---|
| 210 | | - int pid; /* Pid context */ |
|---|
| 211 | | - unsigned long when; /* When did the operation occur */ |
|---|
| 212 | | -}; |
|---|
| 213 | | - |
|---|
| 214 | | -enum track_item { TRACK_ALLOC, TRACK_FREE }; |
|---|
| 215 | | - |
|---|
| 216 | 206 | #ifdef CONFIG_SLUB_SYSFS |
|---|
| 217 | 207 | static int sysfs_slab_add(struct kmem_cache *); |
|---|
| 218 | 208 | static int sysfs_slab_alias(struct kmem_cache *, const char *); |
|---|
| 219 | | -static void memcg_propagate_slab_attrs(struct kmem_cache *s); |
|---|
| 220 | | -static void sysfs_slab_remove(struct kmem_cache *s); |
|---|
| 221 | 209 | #else |
|---|
| 222 | 210 | static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } |
|---|
| 223 | 211 | static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) |
|---|
| 224 | 212 | { return 0; } |
|---|
| 225 | | -static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { } |
|---|
| 226 | | -static inline void sysfs_slab_remove(struct kmem_cache *s) { } |
|---|
| 213 | +#endif |
|---|
| 214 | + |
|---|
| 215 | +#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG) |
|---|
| 216 | +static void debugfs_slab_add(struct kmem_cache *); |
|---|
| 217 | +#else |
|---|
| 218 | +static inline void debugfs_slab_add(struct kmem_cache *s) { } |
|---|
| 227 | 219 | #endif |
|---|
| 228 | 220 | |
|---|
| 229 | 221 | static inline void stat(const struct kmem_cache *s, enum stat_item si) |
|---|
| .. | .. |
|---|
| 251 | 243 | { |
|---|
| 252 | 244 | #ifdef CONFIG_SLAB_FREELIST_HARDENED |
|---|
| 253 | 245 | /* |
|---|
| 254 | | - * When CONFIG_KASAN_SW_TAGS is enabled, ptr_addr might be tagged. |
|---|
| 246 | + * When CONFIG_KASAN_SW/HW_TAGS is enabled, ptr_addr might be tagged. |
|---|
| 255 | 247 | * Normally, this doesn't cause any issues, as both set_freepointer() |
|---|
| 256 | 248 | * and get_freepointer() are called with a pointer with the same tag. |
|---|
| 257 | 249 | * However, there are some issues with CONFIG_SLUB_DEBUG code. For |
|---|
| .. | .. |
|---|
| 277 | 269 | |
|---|
| 278 | 270 | static inline void *get_freepointer(struct kmem_cache *s, void *object) |
|---|
| 279 | 271 | { |
|---|
| 272 | + object = kasan_reset_tag(object); |
|---|
| 280 | 273 | return freelist_dereference(s, object + s->offset); |
|---|
| 281 | 274 | } |
|---|
| 282 | 275 | |
|---|
| .. | .. |
|---|
| 290 | 283 | unsigned long freepointer_addr; |
|---|
| 291 | 284 | void *p; |
|---|
| 292 | 285 | |
|---|
| 293 | | - if (!debug_pagealloc_enabled()) |
|---|
| 286 | + if (!debug_pagealloc_enabled_static()) |
|---|
| 294 | 287 | return get_freepointer(s, object); |
|---|
| 295 | 288 | |
|---|
| 289 | + object = kasan_reset_tag(object); |
|---|
| 296 | 290 | freepointer_addr = (unsigned long)object + s->offset; |
|---|
| 297 | | - probe_kernel_read(&p, (void **)freepointer_addr, sizeof(p)); |
|---|
| 291 | + copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p)); |
|---|
| 298 | 292 | return freelist_ptr(s, p, freepointer_addr); |
|---|
| 299 | 293 | } |
|---|
| 300 | 294 | |
|---|
| .. | .. |
|---|
| 306 | 300 | BUG_ON(object == fp); /* naive detection of double free or corruption */ |
|---|
| 307 | 301 | #endif |
|---|
| 308 | 302 | |
|---|
| 303 | + freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr); |
|---|
| 309 | 304 | *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr); |
|---|
| 310 | 305 | } |
|---|
| 311 | 306 | |
|---|
| .. | .. |
|---|
| 314 | 309 | for (__p = fixup_red_left(__s, __addr); \ |
|---|
| 315 | 310 | __p < (__addr) + (__objects) * (__s)->size; \ |
|---|
| 316 | 311 | __p += (__s)->size) |
|---|
| 317 | | - |
|---|
| 318 | | -/* Determine object index from a given position */ |
|---|
| 319 | | -static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr) |
|---|
| 320 | | -{ |
|---|
| 321 | | - return (kasan_reset_tag(p) - addr) / s->size; |
|---|
| 322 | | -} |
|---|
| 323 | 312 | |
|---|
| 324 | 313 | static inline unsigned int order_objects(unsigned int order, unsigned int size) |
|---|
| 325 | 314 | { |
|---|
| .. | .. |
|---|
| 441 | 430 | } |
|---|
| 442 | 431 | |
|---|
| 443 | 432 | #ifdef CONFIG_SLUB_DEBUG |
|---|
| 433 | +static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)]; |
|---|
| 434 | +static DEFINE_SPINLOCK(object_map_lock); |
|---|
| 435 | + |
|---|
| 436 | +static void __fill_map(unsigned long *obj_map, struct kmem_cache *s, |
|---|
| 437 | + struct page *page) |
|---|
| 438 | +{ |
|---|
| 439 | + void *addr = page_address(page); |
|---|
| 440 | + void *p; |
|---|
| 441 | + |
|---|
| 442 | + bitmap_zero(obj_map, page->objects); |
|---|
| 443 | + |
|---|
| 444 | + for (p = page->freelist; p; p = get_freepointer(s, p)) |
|---|
| 445 | + set_bit(__obj_to_index(s, addr, p), obj_map); |
|---|
| 446 | +} |
|---|
| 447 | + |
|---|
| 444 | 448 | /* |
|---|
| 445 | 449 | * Determine a map of object in use on a page. |
|---|
| 446 | 450 | * |
|---|
| 447 | 451 | * Node listlock must be held to guarantee that the page does |
|---|
| 448 | 452 | * not vanish from under us. |
|---|
| 449 | 453 | */ |
|---|
| 450 | | -static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) |
|---|
| 454 | +static unsigned long *get_map(struct kmem_cache *s, struct page *page) |
|---|
| 455 | + __acquires(&object_map_lock) |
|---|
| 451 | 456 | { |
|---|
| 452 | | - void *p; |
|---|
| 453 | | - void *addr = page_address(page); |
|---|
| 457 | + VM_BUG_ON(!irqs_disabled()); |
|---|
| 454 | 458 | |
|---|
| 455 | | - for (p = page->freelist; p; p = get_freepointer(s, p)) |
|---|
| 456 | | - set_bit(slab_index(p, s, addr), map); |
|---|
| 459 | + spin_lock(&object_map_lock); |
|---|
| 460 | + |
|---|
| 461 | + __fill_map(object_map, s, page); |
|---|
| 462 | + |
|---|
| 463 | + return object_map; |
|---|
| 464 | +} |
|---|
| 465 | + |
|---|
| 466 | +static void put_map(unsigned long *map) __releases(&object_map_lock) |
|---|
| 467 | +{ |
|---|
| 468 | + VM_BUG_ON(map != object_map); |
|---|
| 469 | + spin_unlock(&object_map_lock); |
|---|
| 457 | 470 | } |
|---|
| 458 | 471 | |
|---|
| 459 | 472 | static inline unsigned int size_from_object(struct kmem_cache *s) |
|---|
| .. | .. |
|---|
| 476 | 489 | * Debug settings: |
|---|
| 477 | 490 | */ |
|---|
| 478 | 491 | #if defined(CONFIG_SLUB_DEBUG_ON) |
|---|
| 479 | | -static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS; |
|---|
| 492 | +slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS; |
|---|
| 480 | 493 | #else |
|---|
| 481 | | -static slab_flags_t slub_debug; |
|---|
| 494 | +slab_flags_t slub_debug; |
|---|
| 482 | 495 | #endif |
|---|
| 483 | 496 | |
|---|
| 484 | | -static char *slub_debug_slabs; |
|---|
| 497 | +static char *slub_debug_string; |
|---|
| 485 | 498 | static int disable_higher_order_debug; |
|---|
| 486 | 499 | |
|---|
| 487 | 500 | /* |
|---|
| .. | .. |
|---|
| 528 | 541 | unsigned int length) |
|---|
| 529 | 542 | { |
|---|
| 530 | 543 | metadata_access_enable(); |
|---|
| 531 | | - print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr, |
|---|
| 532 | | - length, 1); |
|---|
| 544 | + print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, |
|---|
| 545 | + 16, 1, kasan_reset_tag((void *)addr), length, 1); |
|---|
| 533 | 546 | metadata_access_disable(); |
|---|
| 547 | +} |
|---|
| 548 | + |
|---|
| 549 | +/* |
|---|
| 550 | + * See comment in calculate_sizes(). |
|---|
| 551 | + */ |
|---|
| 552 | +static inline bool freeptr_outside_object(struct kmem_cache *s) |
|---|
| 553 | +{ |
|---|
| 554 | + return s->offset >= s->inuse; |
|---|
| 555 | +} |
|---|
| 556 | + |
|---|
| 557 | +/* |
|---|
| 558 | + * Return offset of the end of info block which is inuse + free pointer if |
|---|
| 559 | + * not overlapping with object. |
|---|
| 560 | + */ |
|---|
| 561 | +static inline unsigned int get_info_end(struct kmem_cache *s) |
|---|
| 562 | +{ |
|---|
| 563 | + if (freeptr_outside_object(s)) |
|---|
| 564 | + return s->inuse + sizeof(void *); |
|---|
| 565 | + else |
|---|
| 566 | + return s->inuse; |
|---|
| 534 | 567 | } |
|---|
| 535 | 568 | |
|---|
| 536 | 569 | static struct track *get_track(struct kmem_cache *s, void *object, |
|---|
| .. | .. |
|---|
| 538 | 571 | { |
|---|
| 539 | 572 | struct track *p; |
|---|
| 540 | 573 | |
|---|
| 541 | | - if (s->offset) |
|---|
| 542 | | - p = object + s->offset + sizeof(void *); |
|---|
| 543 | | - else |
|---|
| 544 | | - p = object + s->inuse; |
|---|
| 574 | + p = object + get_info_end(s); |
|---|
| 545 | 575 | |
|---|
| 546 | | - return p + alloc; |
|---|
| 576 | + return kasan_reset_tag(p + alloc); |
|---|
| 547 | 577 | } |
|---|
| 578 | + |
|---|
| 579 | +/* |
|---|
| 580 | + * This function will be used to loop through all the slab objects in |
|---|
| 581 | + * a page to give track structure for each object, the function fn will |
|---|
| 582 | + * be using this track structure and extract required info into its private |
|---|
| 583 | + * data, the return value will be the number of track structures that are |
|---|
| 584 | + * processed. |
|---|
| 585 | + */ |
|---|
| 586 | +unsigned long get_each_object_track(struct kmem_cache *s, |
|---|
| 587 | + struct page *page, enum track_item alloc, |
|---|
| 588 | + int (*fn)(const struct kmem_cache *, const void *, |
|---|
| 589 | + const struct track *, void *), void *private) |
|---|
| 590 | +{ |
|---|
| 591 | + void *p; |
|---|
| 592 | + struct track *t; |
|---|
| 593 | + int ret; |
|---|
| 594 | + unsigned long num_track = 0; |
|---|
| 595 | + |
|---|
| 596 | + if (!slub_debug || !(s->flags & SLAB_STORE_USER)) |
|---|
| 597 | + return 0; |
|---|
| 598 | + |
|---|
| 599 | + slab_lock(page); |
|---|
| 600 | + for_each_object(p, s, page_address(page), page->objects) { |
|---|
| 601 | + t = get_track(s, p, alloc); |
|---|
| 602 | + metadata_access_enable(); |
|---|
| 603 | + ret = fn(s, p, t, private); |
|---|
| 604 | + metadata_access_disable(); |
|---|
| 605 | + if (ret < 0) |
|---|
| 606 | + break; |
|---|
| 607 | + num_track += 1; |
|---|
| 608 | + } |
|---|
| 609 | + slab_unlock(page); |
|---|
| 610 | + return num_track; |
|---|
| 611 | +} |
|---|
| 612 | +EXPORT_SYMBOL_GPL(get_each_object_track); |
|---|
| 548 | 613 | |
|---|
| 549 | 614 | static void set_track(struct kmem_cache *s, void *object, |
|---|
| 550 | 615 | enum track_item alloc, unsigned long addr) |
|---|
| .. | .. |
|---|
| 553 | 618 | |
|---|
| 554 | 619 | if (addr) { |
|---|
| 555 | 620 | #ifdef CONFIG_STACKTRACE |
|---|
| 556 | | - struct stack_trace trace; |
|---|
| 557 | | - int i; |
|---|
| 621 | + unsigned int nr_entries; |
|---|
| 558 | 622 | |
|---|
| 559 | | - trace.nr_entries = 0; |
|---|
| 560 | | - trace.max_entries = TRACK_ADDRS_COUNT; |
|---|
| 561 | | - trace.entries = p->addrs; |
|---|
| 562 | | - trace.skip = 3; |
|---|
| 563 | 623 | metadata_access_enable(); |
|---|
| 564 | | - save_stack_trace(&trace); |
|---|
| 624 | + nr_entries = stack_trace_save(kasan_reset_tag(p->addrs), |
|---|
| 625 | + TRACK_ADDRS_COUNT, 3); |
|---|
| 565 | 626 | metadata_access_disable(); |
|---|
| 566 | 627 | |
|---|
| 567 | | - /* See rant in lockdep.c */ |
|---|
| 568 | | - if (trace.nr_entries != 0 && |
|---|
| 569 | | - trace.entries[trace.nr_entries - 1] == ULONG_MAX) |
|---|
| 570 | | - trace.nr_entries--; |
|---|
| 571 | | - |
|---|
| 572 | | - for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++) |
|---|
| 573 | | - p->addrs[i] = 0; |
|---|
| 628 | + if (nr_entries < TRACK_ADDRS_COUNT) |
|---|
| 629 | + p->addrs[nr_entries] = 0; |
|---|
| 630 | + trace_android_vh_save_track_hash(alloc == TRACK_ALLOC, |
|---|
| 631 | + (unsigned long)p); |
|---|
| 574 | 632 | #endif |
|---|
| 575 | 633 | p->addr = addr; |
|---|
| 576 | 634 | p->cpu = smp_processor_id(); |
|---|
| 577 | 635 | p->pid = current->pid; |
|---|
| 578 | 636 | p->when = jiffies; |
|---|
| 579 | | - } else |
|---|
| 637 | + } else { |
|---|
| 580 | 638 | memset(p, 0, sizeof(struct track)); |
|---|
| 639 | + } |
|---|
| 581 | 640 | } |
|---|
| 582 | 641 | |
|---|
| 583 | 642 | static void init_tracking(struct kmem_cache *s, void *object) |
|---|
| .. | .. |
|---|
| 608 | 667 | #endif |
|---|
| 609 | 668 | } |
|---|
| 610 | 669 | |
|---|
| 611 | | -static void print_tracking(struct kmem_cache *s, void *object) |
|---|
| 670 | +void print_tracking(struct kmem_cache *s, void *object) |
|---|
| 612 | 671 | { |
|---|
| 613 | 672 | unsigned long pr_time = jiffies; |
|---|
| 614 | 673 | if (!(s->flags & SLAB_STORE_USER)) |
|---|
| .. | .. |
|---|
| 636 | 695 | pr_err("=============================================================================\n"); |
|---|
| 637 | 696 | pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf); |
|---|
| 638 | 697 | pr_err("-----------------------------------------------------------------------------\n\n"); |
|---|
| 639 | | - |
|---|
| 640 | | - add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); |
|---|
| 641 | 698 | va_end(args); |
|---|
| 642 | 699 | } |
|---|
| 643 | 700 | |
|---|
| .. | .. |
|---|
| 691 | 748 | print_section(KERN_ERR, "Redzone ", p + s->object_size, |
|---|
| 692 | 749 | s->inuse - s->object_size); |
|---|
| 693 | 750 | |
|---|
| 694 | | - if (s->offset) |
|---|
| 695 | | - off = s->offset + sizeof(void *); |
|---|
| 696 | | - else |
|---|
| 697 | | - off = s->inuse; |
|---|
| 751 | + off = get_info_end(s); |
|---|
| 698 | 752 | |
|---|
| 699 | 753 | if (s->flags & SLAB_STORE_USER) |
|---|
| 700 | 754 | off += 2 * sizeof(struct track); |
|---|
| .. | .. |
|---|
| 714 | 768 | { |
|---|
| 715 | 769 | slab_bug(s, "%s", reason); |
|---|
| 716 | 770 | print_trailer(s, page, object); |
|---|
| 771 | + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); |
|---|
| 717 | 772 | } |
|---|
| 718 | 773 | |
|---|
| 719 | 774 | static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page, |
|---|
| .. | .. |
|---|
| 728 | 783 | slab_bug(s, "%s", buf); |
|---|
| 729 | 784 | print_page_info(page); |
|---|
| 730 | 785 | dump_stack(); |
|---|
| 786 | + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); |
|---|
| 731 | 787 | } |
|---|
| 732 | 788 | |
|---|
| 733 | 789 | static void init_object(struct kmem_cache *s, void *object, u8 val) |
|---|
| 734 | 790 | { |
|---|
| 735 | | - u8 *p = object; |
|---|
| 791 | + u8 *p = kasan_reset_tag(object); |
|---|
| 736 | 792 | |
|---|
| 737 | 793 | if (s->flags & SLAB_RED_ZONE) |
|---|
| 738 | 794 | memset(p - s->red_left_pad, val, s->red_left_pad); |
|---|
| .. | .. |
|---|
| 759 | 815 | { |
|---|
| 760 | 816 | u8 *fault; |
|---|
| 761 | 817 | u8 *end; |
|---|
| 818 | + u8 *addr = page_address(page); |
|---|
| 762 | 819 | |
|---|
| 763 | 820 | metadata_access_enable(); |
|---|
| 764 | | - fault = memchr_inv(start, value, bytes); |
|---|
| 821 | + fault = memchr_inv(kasan_reset_tag(start), value, bytes); |
|---|
| 765 | 822 | metadata_access_disable(); |
|---|
| 766 | 823 | if (!fault) |
|---|
| 767 | 824 | return 1; |
|---|
| .. | .. |
|---|
| 771 | 828 | end--; |
|---|
| 772 | 829 | |
|---|
| 773 | 830 | slab_bug(s, "%s overwritten", what); |
|---|
| 774 | | - pr_err("INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n", |
|---|
| 775 | | - fault, end - 1, fault[0], value); |
|---|
| 831 | + pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n", |
|---|
| 832 | + fault, end - 1, fault - addr, |
|---|
| 833 | + fault[0], value); |
|---|
| 776 | 834 | print_trailer(s, page, object); |
|---|
| 835 | + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); |
|---|
| 777 | 836 | |
|---|
| 778 | 837 | restore_bytes(s, what, value, fault, end); |
|---|
| 779 | 838 | return 0; |
|---|
| .. | .. |
|---|
| 785 | 844 | * object address |
|---|
| 786 | 845 | * Bytes of the object to be managed. |
|---|
| 787 | 846 | * If the freepointer may overlay the object then the free |
|---|
| 788 | | - * pointer is the first word of the object. |
|---|
| 847 | + * pointer is at the middle of the object. |
|---|
| 789 | 848 | * |
|---|
| 790 | 849 | * Poisoning uses 0x6b (POISON_FREE) and the last byte is |
|---|
| 791 | 850 | * 0xa5 (POISON_END) |
|---|
| .. | .. |
|---|
| 819 | 878 | |
|---|
| 820 | 879 | static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) |
|---|
| 821 | 880 | { |
|---|
| 822 | | - unsigned long off = s->inuse; /* The end of info */ |
|---|
| 823 | | - |
|---|
| 824 | | - if (s->offset) |
|---|
| 825 | | - /* Freepointer is placed after the object. */ |
|---|
| 826 | | - off += sizeof(void *); |
|---|
| 881 | + unsigned long off = get_info_end(s); /* The end of info */ |
|---|
| 827 | 882 | |
|---|
| 828 | 883 | if (s->flags & SLAB_STORE_USER) |
|---|
| 829 | 884 | /* We also have user information there */ |
|---|
| .. | .. |
|---|
| 852 | 907 | return 1; |
|---|
| 853 | 908 | |
|---|
| 854 | 909 | start = page_address(page); |
|---|
| 855 | | - length = PAGE_SIZE << compound_order(page); |
|---|
| 910 | + length = page_size(page); |
|---|
| 856 | 911 | end = start + length; |
|---|
| 857 | 912 | remainder = length % s->size; |
|---|
| 858 | 913 | if (!remainder) |
|---|
| .. | .. |
|---|
| 860 | 915 | |
|---|
| 861 | 916 | pad = end - remainder; |
|---|
| 862 | 917 | metadata_access_enable(); |
|---|
| 863 | | - fault = memchr_inv(pad, POISON_INUSE, remainder); |
|---|
| 918 | + fault = memchr_inv(kasan_reset_tag(pad), POISON_INUSE, remainder); |
|---|
| 864 | 919 | metadata_access_disable(); |
|---|
| 865 | 920 | if (!fault) |
|---|
| 866 | 921 | return 1; |
|---|
| 867 | 922 | while (end > fault && end[-1] == POISON_INUSE) |
|---|
| 868 | 923 | end--; |
|---|
| 869 | 924 | |
|---|
| 870 | | - slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); |
|---|
| 925 | + slab_err(s, page, "Padding overwritten. 0x%p-0x%p @offset=%tu", |
|---|
| 926 | + fault, end - 1, fault - start); |
|---|
| 871 | 927 | print_section(KERN_ERR, "Padding ", pad, remainder); |
|---|
| 872 | 928 | |
|---|
| 873 | 929 | restore_bytes(s, "slab padding", POISON_INUSE, fault, end); |
|---|
| .. | .. |
|---|
| 909 | 965 | check_pad_bytes(s, page, p); |
|---|
| 910 | 966 | } |
|---|
| 911 | 967 | |
|---|
| 912 | | - if (!s->offset && val == SLUB_RED_ACTIVE) |
|---|
| 968 | + if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE) |
|---|
| 913 | 969 | /* |
|---|
| 914 | 970 | * Object and freepointer overlap. Cannot check |
|---|
| 915 | 971 | * freepointer while object is allocated. |
|---|
| .. | .. |
|---|
| 1038 | 1094 | return; |
|---|
| 1039 | 1095 | |
|---|
| 1040 | 1096 | lockdep_assert_held(&n->list_lock); |
|---|
| 1041 | | - list_add(&page->lru, &n->full); |
|---|
| 1097 | + list_add(&page->slab_list, &n->full); |
|---|
| 1042 | 1098 | } |
|---|
| 1043 | 1099 | |
|---|
| 1044 | 1100 | static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page) |
|---|
| .. | .. |
|---|
| 1047 | 1103 | return; |
|---|
| 1048 | 1104 | |
|---|
| 1049 | 1105 | lockdep_assert_held(&n->list_lock); |
|---|
| 1050 | | - list_del(&page->lru); |
|---|
| 1106 | + list_del(&page->slab_list); |
|---|
| 1051 | 1107 | } |
|---|
| 1052 | 1108 | |
|---|
| 1053 | 1109 | /* Tracking of the number of slabs for debugging purposes */ |
|---|
| .. | .. |
|---|
| 1090 | 1146 | static void setup_object_debug(struct kmem_cache *s, struct page *page, |
|---|
| 1091 | 1147 | void *object) |
|---|
| 1092 | 1148 | { |
|---|
| 1093 | | - if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))) |
|---|
| 1149 | + if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)) |
|---|
| 1094 | 1150 | return; |
|---|
| 1095 | 1151 | |
|---|
| 1096 | 1152 | init_object(s, object, SLUB_RED_INACTIVE); |
|---|
| 1097 | 1153 | init_tracking(s, object); |
|---|
| 1098 | 1154 | } |
|---|
| 1099 | 1155 | |
|---|
| 1100 | | -static void setup_page_debug(struct kmem_cache *s, void *addr, int order) |
|---|
| 1156 | +static |
|---|
| 1157 | +void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) |
|---|
| 1101 | 1158 | { |
|---|
| 1102 | | - if (!(s->flags & SLAB_POISON)) |
|---|
| 1159 | + if (!kmem_cache_debug_flags(s, SLAB_POISON)) |
|---|
| 1103 | 1160 | return; |
|---|
| 1104 | 1161 | |
|---|
| 1105 | 1162 | metadata_access_enable(); |
|---|
| 1106 | | - memset(addr, POISON_INUSE, PAGE_SIZE << order); |
|---|
| 1163 | + memset(kasan_reset_tag(addr), POISON_INUSE, page_size(page)); |
|---|
| 1107 | 1164 | metadata_access_disable(); |
|---|
| 1108 | 1165 | } |
|---|
| 1109 | 1166 | |
|---|
| 1110 | 1167 | static inline int alloc_consistency_checks(struct kmem_cache *s, |
|---|
| 1111 | | - struct page *page, |
|---|
| 1112 | | - void *object, unsigned long addr) |
|---|
| 1168 | + struct page *page, void *object) |
|---|
| 1113 | 1169 | { |
|---|
| 1114 | 1170 | if (!check_slab(s, page)) |
|---|
| 1115 | 1171 | return 0; |
|---|
| .. | .. |
|---|
| 1130 | 1186 | void *object, unsigned long addr) |
|---|
| 1131 | 1187 | { |
|---|
| 1132 | 1188 | if (s->flags & SLAB_CONSISTENCY_CHECKS) { |
|---|
| 1133 | | - if (!alloc_consistency_checks(s, page, object, addr)) |
|---|
| 1189 | + if (!alloc_consistency_checks(s, page, object)) |
|---|
| 1134 | 1190 | goto bad; |
|---|
| 1135 | 1191 | } |
|---|
| 1136 | 1192 | |
|---|
| .. | .. |
|---|
| 1196 | 1252 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); |
|---|
| 1197 | 1253 | void *object = head; |
|---|
| 1198 | 1254 | int cnt = 0; |
|---|
| 1199 | | - unsigned long uninitialized_var(flags); |
|---|
| 1255 | + unsigned long flags; |
|---|
| 1200 | 1256 | int ret = 0; |
|---|
| 1201 | 1257 | |
|---|
| 1202 | 1258 | spin_lock_irqsave(&n->list_lock, flags); |
|---|
| .. | .. |
|---|
| 1240 | 1296 | return ret; |
|---|
| 1241 | 1297 | } |
|---|
| 1242 | 1298 | |
|---|
| 1243 | | -static int __init setup_slub_debug(char *str) |
|---|
| 1299 | +/* |
|---|
| 1300 | + * Parse a block of slub_debug options. Blocks are delimited by ';' |
|---|
| 1301 | + * |
|---|
| 1302 | + * @str: start of block |
|---|
| 1303 | + * @flags: returns parsed flags, or DEBUG_DEFAULT_FLAGS if none specified |
|---|
| 1304 | + * @slabs: return start of list of slabs, or NULL when there's no list |
|---|
| 1305 | + * @init: assume this is initial parsing and not per-kmem-create parsing |
|---|
| 1306 | + * |
|---|
| 1307 | + * returns the start of next block if there's any, or NULL |
|---|
| 1308 | + */ |
|---|
| 1309 | +static char * |
|---|
| 1310 | +parse_slub_debug_flags(char *str, slab_flags_t *flags, char **slabs, bool init) |
|---|
| 1244 | 1311 | { |
|---|
| 1245 | | - slub_debug = DEBUG_DEFAULT_FLAGS; |
|---|
| 1246 | | - if (*str++ != '=' || !*str) |
|---|
| 1247 | | - /* |
|---|
| 1248 | | - * No options specified. Switch on full debugging. |
|---|
| 1249 | | - */ |
|---|
| 1250 | | - goto out; |
|---|
| 1312 | + bool higher_order_disable = false; |
|---|
| 1251 | 1313 | |
|---|
| 1252 | | - if (*str == ',') |
|---|
| 1314 | + /* Skip any completely empty blocks */ |
|---|
| 1315 | + while (*str && *str == ';') |
|---|
| 1316 | + str++; |
|---|
| 1317 | + |
|---|
| 1318 | + if (*str == ',') { |
|---|
| 1253 | 1319 | /* |
|---|
| 1254 | 1320 | * No options but restriction on slabs. This means full |
|---|
| 1255 | 1321 | * debugging for slabs matching a pattern. |
|---|
| 1256 | 1322 | */ |
|---|
| 1323 | + *flags = DEBUG_DEFAULT_FLAGS; |
|---|
| 1257 | 1324 | goto check_slabs; |
|---|
| 1325 | + } |
|---|
| 1326 | + *flags = 0; |
|---|
| 1258 | 1327 | |
|---|
| 1259 | | - slub_debug = 0; |
|---|
| 1260 | | - if (*str == '-') |
|---|
| 1261 | | - /* |
|---|
| 1262 | | - * Switch off all debugging measures. |
|---|
| 1263 | | - */ |
|---|
| 1264 | | - goto out; |
|---|
| 1265 | | - |
|---|
| 1266 | | - /* |
|---|
| 1267 | | - * Determine which debug features should be switched on |
|---|
| 1268 | | - */ |
|---|
| 1269 | | - for (; *str && *str != ','; str++) { |
|---|
| 1328 | + /* Determine which debug features should be switched on */ |
|---|
| 1329 | + for (; *str && *str != ',' && *str != ';'; str++) { |
|---|
| 1270 | 1330 | switch (tolower(*str)) { |
|---|
| 1331 | + case '-': |
|---|
| 1332 | + *flags = 0; |
|---|
| 1333 | + break; |
|---|
| 1271 | 1334 | case 'f': |
|---|
| 1272 | | - slub_debug |= SLAB_CONSISTENCY_CHECKS; |
|---|
| 1335 | + *flags |= SLAB_CONSISTENCY_CHECKS; |
|---|
| 1273 | 1336 | break; |
|---|
| 1274 | 1337 | case 'z': |
|---|
| 1275 | | - slub_debug |= SLAB_RED_ZONE; |
|---|
| 1338 | + *flags |= SLAB_RED_ZONE; |
|---|
| 1276 | 1339 | break; |
|---|
| 1277 | 1340 | case 'p': |
|---|
| 1278 | | - slub_debug |= SLAB_POISON; |
|---|
| 1341 | + *flags |= SLAB_POISON; |
|---|
| 1279 | 1342 | break; |
|---|
| 1280 | 1343 | case 'u': |
|---|
| 1281 | | - slub_debug |= SLAB_STORE_USER; |
|---|
| 1344 | + *flags |= SLAB_STORE_USER; |
|---|
| 1282 | 1345 | break; |
|---|
| 1283 | 1346 | case 't': |
|---|
| 1284 | | - slub_debug |= SLAB_TRACE; |
|---|
| 1347 | + *flags |= SLAB_TRACE; |
|---|
| 1285 | 1348 | break; |
|---|
| 1286 | 1349 | case 'a': |
|---|
| 1287 | | - slub_debug |= SLAB_FAILSLAB; |
|---|
| 1350 | + *flags |= SLAB_FAILSLAB; |
|---|
| 1288 | 1351 | break; |
|---|
| 1289 | 1352 | case 'o': |
|---|
| 1290 | 1353 | /* |
|---|
| 1291 | 1354 | * Avoid enabling debugging on caches if its minimum |
|---|
| 1292 | 1355 | * order would increase as a result. |
|---|
| 1293 | 1356 | */ |
|---|
| 1294 | | - disable_higher_order_debug = 1; |
|---|
| 1357 | + higher_order_disable = true; |
|---|
| 1295 | 1358 | break; |
|---|
| 1296 | 1359 | default: |
|---|
| 1297 | | - pr_err("slub_debug option '%c' unknown. skipped\n", |
|---|
| 1298 | | - *str); |
|---|
| 1360 | + if (init) |
|---|
| 1361 | + pr_err("slub_debug option '%c' unknown. skipped\n", *str); |
|---|
| 1362 | + } |
|---|
| 1363 | + } |
|---|
| 1364 | +check_slabs: |
|---|
| 1365 | + if (*str == ',') |
|---|
| 1366 | + *slabs = ++str; |
|---|
| 1367 | + else |
|---|
| 1368 | + *slabs = NULL; |
|---|
| 1369 | + |
|---|
| 1370 | + /* Skip over the slab list */ |
|---|
| 1371 | + while (*str && *str != ';') |
|---|
| 1372 | + str++; |
|---|
| 1373 | + |
|---|
| 1374 | + /* Skip any completely empty blocks */ |
|---|
| 1375 | + while (*str && *str == ';') |
|---|
| 1376 | + str++; |
|---|
| 1377 | + |
|---|
| 1378 | + if (init && higher_order_disable) |
|---|
| 1379 | + disable_higher_order_debug = 1; |
|---|
| 1380 | + |
|---|
| 1381 | + if (*str) |
|---|
| 1382 | + return str; |
|---|
| 1383 | + else |
|---|
| 1384 | + return NULL; |
|---|
| 1385 | +} |
|---|
| 1386 | + |
|---|
| 1387 | +static int __init setup_slub_debug(char *str) |
|---|
| 1388 | +{ |
|---|
| 1389 | + slab_flags_t flags; |
|---|
| 1390 | + slab_flags_t global_flags; |
|---|
| 1391 | + char *saved_str; |
|---|
| 1392 | + char *slab_list; |
|---|
| 1393 | + bool global_slub_debug_changed = false; |
|---|
| 1394 | + bool slab_list_specified = false; |
|---|
| 1395 | + |
|---|
| 1396 | + global_flags = DEBUG_DEFAULT_FLAGS; |
|---|
| 1397 | + if (*str++ != '=' || !*str) |
|---|
| 1398 | + /* |
|---|
| 1399 | + * No options specified. Switch on full debugging. |
|---|
| 1400 | + */ |
|---|
| 1401 | + goto out; |
|---|
| 1402 | + |
|---|
| 1403 | + saved_str = str; |
|---|
| 1404 | + while (str) { |
|---|
| 1405 | + str = parse_slub_debug_flags(str, &flags, &slab_list, true); |
|---|
| 1406 | + |
|---|
| 1407 | + if (!slab_list) { |
|---|
| 1408 | + global_flags = flags; |
|---|
| 1409 | + global_slub_debug_changed = true; |
|---|
| 1410 | + } else { |
|---|
| 1411 | + slab_list_specified = true; |
|---|
| 1299 | 1412 | } |
|---|
| 1300 | 1413 | } |
|---|
| 1301 | 1414 | |
|---|
| 1302 | | -check_slabs: |
|---|
| 1303 | | - if (*str == ',') |
|---|
| 1304 | | - slub_debug_slabs = str + 1; |
|---|
| 1415 | + /* |
|---|
| 1416 | + * For backwards compatibility, a single list of flags with list of |
|---|
| 1417 | + * slabs means debugging is only changed for those slabs, so the global |
|---|
| 1418 | + * slub_debug should be unchanged (0 or DEBUG_DEFAULT_FLAGS, depending |
|---|
| 1419 | + * on CONFIG_SLUB_DEBUG_ON). We can extended that to multiple lists as |
|---|
| 1420 | + * long as there is no option specifying flags without a slab list. |
|---|
| 1421 | + */ |
|---|
| 1422 | + if (slab_list_specified) { |
|---|
| 1423 | + if (!global_slub_debug_changed) |
|---|
| 1424 | + global_flags = slub_debug; |
|---|
| 1425 | + slub_debug_string = saved_str; |
|---|
| 1426 | + } |
|---|
| 1305 | 1427 | out: |
|---|
| 1428 | + slub_debug = global_flags; |
|---|
| 1429 | + if (slub_debug != 0 || slub_debug_string) |
|---|
| 1430 | + static_branch_enable(&slub_debug_enabled); |
|---|
| 1306 | 1431 | if ((static_branch_unlikely(&init_on_alloc) || |
|---|
| 1307 | 1432 | static_branch_unlikely(&init_on_free)) && |
|---|
| 1308 | 1433 | (slub_debug & SLAB_POISON)) |
|---|
| .. | .. |
|---|
| 1312 | 1437 | |
|---|
| 1313 | 1438 | __setup("slub_debug", setup_slub_debug); |
|---|
| 1314 | 1439 | |
|---|
| 1440 | +/* |
|---|
| 1441 | + * kmem_cache_flags - apply debugging options to the cache |
|---|
| 1442 | + * @object_size: the size of an object without meta data |
|---|
| 1443 | + * @flags: flags to set |
|---|
| 1444 | + * @name: name of the cache |
|---|
| 1445 | + * |
|---|
| 1446 | + * Debug option(s) are applied to @flags. In addition to the debug |
|---|
| 1447 | + * option(s), if a slab name (or multiple) is specified i.e. |
|---|
| 1448 | + * slub_debug=<Debug-Options>,<slab name1>,<slab name2> ... |
|---|
| 1449 | + * then only the select slabs will receive the debug option(s). |
|---|
| 1450 | + */ |
|---|
| 1315 | 1451 | slab_flags_t kmem_cache_flags(unsigned int object_size, |
|---|
| 1316 | | - slab_flags_t flags, const char *name, |
|---|
| 1317 | | - void (*ctor)(void *)) |
|---|
| 1452 | + slab_flags_t flags, const char *name) |
|---|
| 1318 | 1453 | { |
|---|
| 1319 | | - /* |
|---|
| 1320 | | - * Enable debugging if selected on the kernel commandline. |
|---|
| 1321 | | - */ |
|---|
| 1322 | | - if (slub_debug && (!slub_debug_slabs || (name && |
|---|
| 1323 | | - !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))) |
|---|
| 1324 | | - flags |= slub_debug; |
|---|
| 1454 | + char *iter; |
|---|
| 1455 | + size_t len; |
|---|
| 1456 | + char *next_block; |
|---|
| 1457 | + slab_flags_t block_flags; |
|---|
| 1325 | 1458 | |
|---|
| 1326 | | - return flags; |
|---|
| 1459 | + len = strlen(name); |
|---|
| 1460 | + next_block = slub_debug_string; |
|---|
| 1461 | + /* Go through all blocks of debug options, see if any matches our slab's name */ |
|---|
| 1462 | + while (next_block) { |
|---|
| 1463 | + next_block = parse_slub_debug_flags(next_block, &block_flags, &iter, false); |
|---|
| 1464 | + if (!iter) |
|---|
| 1465 | + continue; |
|---|
| 1466 | + /* Found a block that has a slab list, search it */ |
|---|
| 1467 | + while (*iter) { |
|---|
| 1468 | + char *end, *glob; |
|---|
| 1469 | + size_t cmplen; |
|---|
| 1470 | + |
|---|
| 1471 | + end = strchrnul(iter, ','); |
|---|
| 1472 | + if (next_block && next_block < end) |
|---|
| 1473 | + end = next_block - 1; |
|---|
| 1474 | + |
|---|
| 1475 | + glob = strnchr(iter, end - iter, '*'); |
|---|
| 1476 | + if (glob) |
|---|
| 1477 | + cmplen = glob - iter; |
|---|
| 1478 | + else |
|---|
| 1479 | + cmplen = max_t(size_t, len, (end - iter)); |
|---|
| 1480 | + |
|---|
| 1481 | + if (!strncmp(name, iter, cmplen)) { |
|---|
| 1482 | + flags |= block_flags; |
|---|
| 1483 | + return flags; |
|---|
| 1484 | + } |
|---|
| 1485 | + |
|---|
| 1486 | + if (!*end || *end == ';') |
|---|
| 1487 | + break; |
|---|
| 1488 | + iter = end + 1; |
|---|
| 1489 | + } |
|---|
| 1490 | + } |
|---|
| 1491 | + |
|---|
| 1492 | + return flags | slub_debug; |
|---|
| 1327 | 1493 | } |
|---|
| 1328 | 1494 | #else /* !CONFIG_SLUB_DEBUG */ |
|---|
| 1329 | 1495 | static inline void setup_object_debug(struct kmem_cache *s, |
|---|
| 1330 | 1496 | struct page *page, void *object) {} |
|---|
| 1331 | | -static inline void setup_page_debug(struct kmem_cache *s, |
|---|
| 1332 | | - void *addr, int order) {} |
|---|
| 1497 | +static inline |
|---|
| 1498 | +void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {} |
|---|
| 1333 | 1499 | |
|---|
| 1334 | 1500 | static inline int alloc_debug_processing(struct kmem_cache *s, |
|---|
| 1335 | 1501 | struct page *page, void *object, unsigned long addr) { return 0; } |
|---|
| .. | .. |
|---|
| 1348 | 1514 | static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, |
|---|
| 1349 | 1515 | struct page *page) {} |
|---|
| 1350 | 1516 | slab_flags_t kmem_cache_flags(unsigned int object_size, |
|---|
| 1351 | | - slab_flags_t flags, const char *name, |
|---|
| 1352 | | - void (*ctor)(void *)) |
|---|
| 1517 | + slab_flags_t flags, const char *name) |
|---|
| 1353 | 1518 | { |
|---|
| 1354 | 1519 | return flags; |
|---|
| 1355 | 1520 | } |
|---|
| .. | .. |
|---|
| 1380 | 1545 | static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) |
|---|
| 1381 | 1546 | { |
|---|
| 1382 | 1547 | ptr = kasan_kmalloc_large(ptr, size, flags); |
|---|
| 1548 | + /* As ptr might get tagged, call kmemleak hook after KASAN. */ |
|---|
| 1383 | 1549 | kmemleak_alloc(ptr, size, 1, flags); |
|---|
| 1384 | 1550 | return ptr; |
|---|
| 1385 | 1551 | } |
|---|
| .. | .. |
|---|
| 1387 | 1553 | static __always_inline void kfree_hook(void *x) |
|---|
| 1388 | 1554 | { |
|---|
| 1389 | 1555 | kmemleak_free(x); |
|---|
| 1390 | | - kasan_kfree_large(x, _RET_IP_); |
|---|
| 1556 | + kasan_kfree_large(x); |
|---|
| 1391 | 1557 | } |
|---|
| 1392 | 1558 | |
|---|
| 1393 | | -static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x) |
|---|
| 1559 | +static __always_inline bool slab_free_hook(struct kmem_cache *s, |
|---|
| 1560 | + void *x, bool init) |
|---|
| 1394 | 1561 | { |
|---|
| 1395 | 1562 | kmemleak_free_recursive(x, s->flags); |
|---|
| 1396 | 1563 | |
|---|
| .. | .. |
|---|
| 1411 | 1578 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) |
|---|
| 1412 | 1579 | debug_check_no_obj_freed(x, s->object_size); |
|---|
| 1413 | 1580 | |
|---|
| 1414 | | - /* KASAN might put x into memory quarantine, delaying its reuse */ |
|---|
| 1415 | | - return kasan_slab_free(s, x, _RET_IP_); |
|---|
| 1581 | + /* Use KCSAN to help debug racy use-after-free. */ |
|---|
| 1582 | + if (!(s->flags & SLAB_TYPESAFE_BY_RCU)) |
|---|
| 1583 | + __kcsan_check_access(x, s->object_size, |
|---|
| 1584 | + KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT); |
|---|
| 1585 | + |
|---|
| 1586 | + /* |
|---|
| 1587 | + * As memory initialization might be integrated into KASAN, |
|---|
| 1588 | + * kasan_slab_free and initialization memset's must be |
|---|
| 1589 | + * kept together to avoid discrepancies in behavior. |
|---|
| 1590 | + * |
|---|
| 1591 | + * The initialization memset's clear the object and the metadata, |
|---|
| 1592 | + * but don't touch the SLAB redzone. |
|---|
| 1593 | + */ |
|---|
| 1594 | + if (init) { |
|---|
| 1595 | + int rsize; |
|---|
| 1596 | + |
|---|
| 1597 | + if (!kasan_has_integrated_init()) |
|---|
| 1598 | + memset(kasan_reset_tag(x), 0, s->object_size); |
|---|
| 1599 | + rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad : 0; |
|---|
| 1600 | + memset((char *)kasan_reset_tag(x) + s->inuse, 0, |
|---|
| 1601 | + s->size - s->inuse - rsize); |
|---|
| 1602 | + } |
|---|
| 1603 | + /* KASAN might put x into memory quarantine, delaying its reuse. */ |
|---|
| 1604 | + return kasan_slab_free(s, x, init); |
|---|
| 1416 | 1605 | } |
|---|
| 1417 | 1606 | |
|---|
| 1418 | 1607 | static inline bool slab_free_freelist_hook(struct kmem_cache *s, |
|---|
| .. | .. |
|---|
| 1423 | 1612 | void *object; |
|---|
| 1424 | 1613 | void *next = *head; |
|---|
| 1425 | 1614 | void *old_tail = *tail ? *tail : *head; |
|---|
| 1426 | | - int rsize; |
|---|
| 1615 | + |
|---|
| 1616 | + if (is_kfence_address(next)) { |
|---|
| 1617 | + slab_free_hook(s, next, false); |
|---|
| 1618 | + return true; |
|---|
| 1619 | + } |
|---|
| 1427 | 1620 | |
|---|
| 1428 | 1621 | /* Head and tail of the reconstructed freelist */ |
|---|
| 1429 | 1622 | *head = NULL; |
|---|
| .. | .. |
|---|
| 1433 | 1626 | object = next; |
|---|
| 1434 | 1627 | next = get_freepointer(s, object); |
|---|
| 1435 | 1628 | |
|---|
| 1436 | | - if (slab_want_init_on_free(s)) { |
|---|
| 1437 | | - /* |
|---|
| 1438 | | - * Clear the object and the metadata, but don't touch |
|---|
| 1439 | | - * the redzone. |
|---|
| 1440 | | - */ |
|---|
| 1441 | | - memset(object, 0, s->object_size); |
|---|
| 1442 | | - rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad |
|---|
| 1443 | | - : 0; |
|---|
| 1444 | | - memset((char *)object + s->inuse, 0, |
|---|
| 1445 | | - s->size - s->inuse - rsize); |
|---|
| 1446 | | - |
|---|
| 1447 | | - } |
|---|
| 1448 | 1629 | /* If object's reuse doesn't have to be delayed */ |
|---|
| 1449 | | - if (!slab_free_hook(s, object)) { |
|---|
| 1630 | + if (!slab_free_hook(s, object, slab_want_init_on_free(s))) { |
|---|
| 1450 | 1631 | /* Move object to the new freelist */ |
|---|
| 1451 | 1632 | set_freepointer(s, object, *head); |
|---|
| 1452 | 1633 | *head = object; |
|---|
| .. | .. |
|---|
| 1494 | 1675 | else |
|---|
| 1495 | 1676 | page = __alloc_pages_node(node, flags, order); |
|---|
| 1496 | 1677 | |
|---|
| 1497 | | - if (page && memcg_charge_slab(page, flags, order, s)) { |
|---|
| 1498 | | - __free_pages(page, order); |
|---|
| 1499 | | - page = NULL; |
|---|
| 1500 | | - } |
|---|
| 1678 | + if (page) |
|---|
| 1679 | + account_slab_page(page, order, s); |
|---|
| 1501 | 1680 | |
|---|
| 1502 | 1681 | return page; |
|---|
| 1503 | 1682 | } |
|---|
| .. | .. |
|---|
| 1617 | 1796 | struct kmem_cache_order_objects oo = s->oo; |
|---|
| 1618 | 1797 | gfp_t alloc_gfp; |
|---|
| 1619 | 1798 | void *start, *p, *next; |
|---|
| 1620 | | - int idx, order; |
|---|
| 1799 | + int idx; |
|---|
| 1621 | 1800 | bool shuffle; |
|---|
| 1622 | 1801 | |
|---|
| 1623 | 1802 | flags &= gfp_allowed_mask; |
|---|
| .. | .. |
|---|
| 1651 | 1830 | |
|---|
| 1652 | 1831 | page->objects = oo_objects(oo); |
|---|
| 1653 | 1832 | |
|---|
| 1654 | | - order = compound_order(page); |
|---|
| 1655 | 1833 | page->slab_cache = s; |
|---|
| 1656 | 1834 | __SetPageSlab(page); |
|---|
| 1657 | 1835 | if (page_is_pfmemalloc(page)) |
|---|
| .. | .. |
|---|
| 1661 | 1839 | |
|---|
| 1662 | 1840 | start = page_address(page); |
|---|
| 1663 | 1841 | |
|---|
| 1664 | | - setup_page_debug(s, start, order); |
|---|
| 1842 | + setup_page_debug(s, page, start); |
|---|
| 1665 | 1843 | |
|---|
| 1666 | 1844 | shuffle = shuffle_freelist(s, page); |
|---|
| 1667 | 1845 | |
|---|
| .. | .. |
|---|
| 1687 | 1865 | if (!page) |
|---|
| 1688 | 1866 | return NULL; |
|---|
| 1689 | 1867 | |
|---|
| 1690 | | - mod_lruvec_page_state(page, |
|---|
| 1691 | | - (s->flags & SLAB_RECLAIM_ACCOUNT) ? |
|---|
| 1692 | | - NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, |
|---|
| 1693 | | - 1 << oo_order(oo)); |
|---|
| 1694 | | - |
|---|
| 1695 | 1868 | inc_slabs_node(s, page_to_nid(page), page->objects); |
|---|
| 1696 | 1869 | |
|---|
| 1697 | 1870 | return page; |
|---|
| .. | .. |
|---|
| 1699 | 1872 | |
|---|
| 1700 | 1873 | static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) |
|---|
| 1701 | 1874 | { |
|---|
| 1702 | | - if (unlikely(flags & GFP_SLAB_BUG_MASK)) { |
|---|
| 1703 | | - gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK; |
|---|
| 1704 | | - flags &= ~GFP_SLAB_BUG_MASK; |
|---|
| 1705 | | - pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n", |
|---|
| 1706 | | - invalid_mask, &invalid_mask, flags, &flags); |
|---|
| 1707 | | - dump_stack(); |
|---|
| 1708 | | - } |
|---|
| 1875 | + if (unlikely(flags & GFP_SLAB_BUG_MASK)) |
|---|
| 1876 | + flags = kmalloc_fix_flags(flags); |
|---|
| 1709 | 1877 | |
|---|
| 1710 | 1878 | return allocate_slab(s, |
|---|
| 1711 | 1879 | flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node); |
|---|
| .. | .. |
|---|
| 1716 | 1884 | int order = compound_order(page); |
|---|
| 1717 | 1885 | int pages = 1 << order; |
|---|
| 1718 | 1886 | |
|---|
| 1719 | | - if (s->flags & SLAB_CONSISTENCY_CHECKS) { |
|---|
| 1887 | + if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) { |
|---|
| 1720 | 1888 | void *p; |
|---|
| 1721 | 1889 | |
|---|
| 1722 | 1890 | slab_pad_check(s, page); |
|---|
| .. | .. |
|---|
| 1725 | 1893 | check_object(s, page, p, SLUB_RED_INACTIVE); |
|---|
| 1726 | 1894 | } |
|---|
| 1727 | 1895 | |
|---|
| 1728 | | - mod_lruvec_page_state(page, |
|---|
| 1729 | | - (s->flags & SLAB_RECLAIM_ACCOUNT) ? |
|---|
| 1730 | | - NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, |
|---|
| 1731 | | - -pages); |
|---|
| 1732 | | - |
|---|
| 1733 | 1896 | __ClearPageSlabPfmemalloc(page); |
|---|
| 1734 | 1897 | __ClearPageSlab(page); |
|---|
| 1735 | 1898 | |
|---|
| 1736 | 1899 | page->mapping = NULL; |
|---|
| 1737 | 1900 | if (current->reclaim_state) |
|---|
| 1738 | 1901 | current->reclaim_state->reclaimed_slab += pages; |
|---|
| 1739 | | - memcg_uncharge_slab(page, order, s); |
|---|
| 1902 | + unaccount_slab_page(page, order, s); |
|---|
| 1740 | 1903 | __free_pages(page, order); |
|---|
| 1741 | 1904 | } |
|---|
| 1742 | 1905 | |
|---|
| .. | .. |
|---|
| 1769 | 1932 | { |
|---|
| 1770 | 1933 | n->nr_partial++; |
|---|
| 1771 | 1934 | if (tail == DEACTIVATE_TO_TAIL) |
|---|
| 1772 | | - list_add_tail(&page->lru, &n->partial); |
|---|
| 1935 | + list_add_tail(&page->slab_list, &n->partial); |
|---|
| 1773 | 1936 | else |
|---|
| 1774 | | - list_add(&page->lru, &n->partial); |
|---|
| 1937 | + list_add(&page->slab_list, &n->partial); |
|---|
| 1775 | 1938 | } |
|---|
| 1776 | 1939 | |
|---|
| 1777 | 1940 | static inline void add_partial(struct kmem_cache_node *n, |
|---|
| .. | .. |
|---|
| 1785 | 1948 | struct page *page) |
|---|
| 1786 | 1949 | { |
|---|
| 1787 | 1950 | lockdep_assert_held(&n->list_lock); |
|---|
| 1788 | | - list_del(&page->lru); |
|---|
| 1951 | + list_del(&page->slab_list); |
|---|
| 1789 | 1952 | n->nr_partial--; |
|---|
| 1790 | 1953 | } |
|---|
| 1791 | 1954 | |
|---|
| .. | .. |
|---|
| 1852 | 2015 | /* |
|---|
| 1853 | 2016 | * Racy check. If we mistakenly see no partial slabs then we |
|---|
| 1854 | 2017 | * just allocate an empty slab. If we mistakenly try to get a |
|---|
| 1855 | | - * partial slab and there is none available then get_partials() |
|---|
| 2018 | + * partial slab and there is none available then get_partial() |
|---|
| 1856 | 2019 | * will return NULL. |
|---|
| 1857 | 2020 | */ |
|---|
| 1858 | 2021 | if (!n || !n->nr_partial) |
|---|
| 1859 | 2022 | return NULL; |
|---|
| 1860 | 2023 | |
|---|
| 1861 | 2024 | spin_lock(&n->list_lock); |
|---|
| 1862 | | - list_for_each_entry_safe(page, page2, &n->partial, lru) { |
|---|
| 2025 | + list_for_each_entry_safe(page, page2, &n->partial, slab_list) { |
|---|
| 1863 | 2026 | void *t; |
|---|
| 1864 | 2027 | |
|---|
| 1865 | 2028 | if (!pfmemalloc_match(page, flags)) |
|---|
| .. | .. |
|---|
| 1897 | 2060 | struct zonelist *zonelist; |
|---|
| 1898 | 2061 | struct zoneref *z; |
|---|
| 1899 | 2062 | struct zone *zone; |
|---|
| 1900 | | - enum zone_type high_zoneidx = gfp_zone(flags); |
|---|
| 2063 | + enum zone_type highest_zoneidx = gfp_zone(flags); |
|---|
| 1901 | 2064 | void *object; |
|---|
| 1902 | 2065 | unsigned int cpuset_mems_cookie; |
|---|
| 1903 | 2066 | |
|---|
| .. | .. |
|---|
| 1926 | 2089 | do { |
|---|
| 1927 | 2090 | cpuset_mems_cookie = read_mems_allowed_begin(); |
|---|
| 1928 | 2091 | zonelist = node_zonelist(mempolicy_slab_node(), flags); |
|---|
| 1929 | | - for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
|---|
| 2092 | + for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) { |
|---|
| 1930 | 2093 | struct kmem_cache_node *n; |
|---|
| 1931 | 2094 | |
|---|
| 1932 | 2095 | n = get_node(s, zone_to_nid(zone)); |
|---|
| .. | .. |
|---|
| 1947 | 2110 | } |
|---|
| 1948 | 2111 | } |
|---|
| 1949 | 2112 | } while (read_mems_allowed_retry(cpuset_mems_cookie)); |
|---|
| 1950 | | -#endif |
|---|
| 2113 | +#endif /* CONFIG_NUMA */ |
|---|
| 1951 | 2114 | return NULL; |
|---|
| 1952 | 2115 | } |
|---|
| 1953 | 2116 | |
|---|
| .. | .. |
|---|
| 1970 | 2133 | return get_any_partial(s, flags, c); |
|---|
| 1971 | 2134 | } |
|---|
| 1972 | 2135 | |
|---|
| 1973 | | -#ifdef CONFIG_PREEMPT |
|---|
| 2136 | +#ifdef CONFIG_PREEMPTION |
|---|
| 1974 | 2137 | /* |
|---|
| 1975 | | - * Calculate the next globally unique transaction for disambiguiation |
|---|
| 2138 | + * Calculate the next globally unique transaction for disambiguation |
|---|
| 1976 | 2139 | * during cmpxchg. The transactions start with the cpu number and are then |
|---|
| 1977 | 2140 | * incremented by CONFIG_NR_CPUS. |
|---|
| 1978 | 2141 | */ |
|---|
| .. | .. |
|---|
| 1990 | 2153 | return tid + TID_STEP; |
|---|
| 1991 | 2154 | } |
|---|
| 1992 | 2155 | |
|---|
| 2156 | +#ifdef SLUB_DEBUG_CMPXCHG |
|---|
| 1993 | 2157 | static inline unsigned int tid_to_cpu(unsigned long tid) |
|---|
| 1994 | 2158 | { |
|---|
| 1995 | 2159 | return tid % TID_STEP; |
|---|
| .. | .. |
|---|
| 1999 | 2163 | { |
|---|
| 2000 | 2164 | return tid / TID_STEP; |
|---|
| 2001 | 2165 | } |
|---|
| 2166 | +#endif |
|---|
| 2002 | 2167 | |
|---|
| 2003 | 2168 | static inline unsigned int init_tid(int cpu) |
|---|
| 2004 | 2169 | { |
|---|
| .. | .. |
|---|
| 2013 | 2178 | |
|---|
| 2014 | 2179 | pr_info("%s %s: cmpxchg redo ", n, s->name); |
|---|
| 2015 | 2180 | |
|---|
| 2016 | | -#ifdef CONFIG_PREEMPT |
|---|
| 2181 | +#ifdef CONFIG_PREEMPTION |
|---|
| 2017 | 2182 | if (tid_to_cpu(tid) != tid_to_cpu(actual_tid)) |
|---|
| 2018 | 2183 | pr_warn("due to cpu change %d -> %d\n", |
|---|
| 2019 | 2184 | tid_to_cpu(tid), tid_to_cpu(actual_tid)); |
|---|
| .. | .. |
|---|
| 2131 | 2296 | if (!lock) { |
|---|
| 2132 | 2297 | lock = 1; |
|---|
| 2133 | 2298 | /* |
|---|
| 2134 | | - * Taking the spinlock removes the possiblity |
|---|
| 2299 | + * Taking the spinlock removes the possibility |
|---|
| 2135 | 2300 | * that acquire_slab() will see a slab page that |
|---|
| 2136 | 2301 | * is frozen |
|---|
| 2137 | 2302 | */ |
|---|
| .. | .. |
|---|
| 2139 | 2304 | } |
|---|
| 2140 | 2305 | } else { |
|---|
| 2141 | 2306 | m = M_FULL; |
|---|
| 2142 | | - if (kmem_cache_debug(s) && !lock) { |
|---|
| 2307 | +#ifdef CONFIG_SLUB_DEBUG |
|---|
| 2308 | + if ((s->flags & SLAB_STORE_USER) && !lock) { |
|---|
| 2143 | 2309 | lock = 1; |
|---|
| 2144 | 2310 | /* |
|---|
| 2145 | 2311 | * This also ensures that the scanning of full |
|---|
| .. | .. |
|---|
| 2148 | 2314 | */ |
|---|
| 2149 | 2315 | spin_lock(&n->list_lock); |
|---|
| 2150 | 2316 | } |
|---|
| 2317 | +#endif |
|---|
| 2151 | 2318 | } |
|---|
| 2152 | 2319 | |
|---|
| 2153 | 2320 | if (l != m) { |
|---|
| 2154 | | - |
|---|
| 2155 | 2321 | if (l == M_PARTIAL) |
|---|
| 2156 | | - |
|---|
| 2157 | 2322 | remove_partial(n, page); |
|---|
| 2158 | | - |
|---|
| 2159 | 2323 | else if (l == M_FULL) |
|---|
| 2160 | | - |
|---|
| 2161 | 2324 | remove_full(s, n, page); |
|---|
| 2162 | 2325 | |
|---|
| 2163 | | - if (m == M_PARTIAL) { |
|---|
| 2164 | | - |
|---|
| 2326 | + if (m == M_PARTIAL) |
|---|
| 2165 | 2327 | add_partial(n, page, tail); |
|---|
| 2166 | | - stat(s, tail); |
|---|
| 2167 | | - |
|---|
| 2168 | | - } else if (m == M_FULL) { |
|---|
| 2169 | | - |
|---|
| 2170 | | - stat(s, DEACTIVATE_FULL); |
|---|
| 2328 | + else if (m == M_FULL) |
|---|
| 2171 | 2329 | add_full(s, n, page); |
|---|
| 2172 | | - |
|---|
| 2173 | | - } |
|---|
| 2174 | 2330 | } |
|---|
| 2175 | 2331 | |
|---|
| 2176 | 2332 | l = m; |
|---|
| .. | .. |
|---|
| 2183 | 2339 | if (lock) |
|---|
| 2184 | 2340 | spin_unlock(&n->list_lock); |
|---|
| 2185 | 2341 | |
|---|
| 2186 | | - if (m == M_FREE) { |
|---|
| 2342 | + if (m == M_PARTIAL) |
|---|
| 2343 | + stat(s, tail); |
|---|
| 2344 | + else if (m == M_FULL) |
|---|
| 2345 | + stat(s, DEACTIVATE_FULL); |
|---|
| 2346 | + else if (m == M_FREE) { |
|---|
| 2187 | 2347 | stat(s, DEACTIVATE_EMPTY); |
|---|
| 2188 | 2348 | discard_slab(s, page); |
|---|
| 2189 | 2349 | stat(s, FREE_SLAB); |
|---|
| .. | .. |
|---|
| 2191 | 2351 | |
|---|
| 2192 | 2352 | c->page = NULL; |
|---|
| 2193 | 2353 | c->freelist = NULL; |
|---|
| 2354 | + c->tid = next_tid(c->tid); |
|---|
| 2194 | 2355 | } |
|---|
| 2195 | 2356 | |
|---|
| 2196 | 2357 | /* |
|---|
| .. | .. |
|---|
| 2207 | 2368 | struct kmem_cache_node *n = NULL, *n2 = NULL; |
|---|
| 2208 | 2369 | struct page *page, *discard_page = NULL; |
|---|
| 2209 | 2370 | |
|---|
| 2210 | | - while ((page = c->partial)) { |
|---|
| 2371 | + while ((page = slub_percpu_partial(c))) { |
|---|
| 2211 | 2372 | struct page new; |
|---|
| 2212 | 2373 | struct page old; |
|---|
| 2213 | 2374 | |
|---|
| 2214 | | - c->partial = page->next; |
|---|
| 2375 | + slub_set_percpu_partial(c, page); |
|---|
| 2215 | 2376 | |
|---|
| 2216 | 2377 | n2 = get_node(s, page_to_nid(page)); |
|---|
| 2217 | 2378 | if (n != n2) { |
|---|
| .. | .. |
|---|
| 2258 | 2419 | discard_slab(s, page); |
|---|
| 2259 | 2420 | stat(s, FREE_SLAB); |
|---|
| 2260 | 2421 | } |
|---|
| 2261 | | -#endif |
|---|
| 2422 | +#endif /* CONFIG_SLUB_CPU_PARTIAL */ |
|---|
| 2262 | 2423 | } |
|---|
| 2263 | 2424 | |
|---|
| 2264 | 2425 | /* |
|---|
| 2265 | | - * Put a page that was just frozen (in __slab_free) into a partial page |
|---|
| 2266 | | - * slot if available. |
|---|
| 2426 | + * Put a page that was just frozen (in __slab_free|get_partial_node) into a |
|---|
| 2427 | + * partial page slot if available. |
|---|
| 2267 | 2428 | * |
|---|
| 2268 | 2429 | * If we did not find a slot then simply move all the partials to the |
|---|
| 2269 | 2430 | * per node partial list. |
|---|
| .. | .. |
|---|
| 2284 | 2445 | if (oldpage) { |
|---|
| 2285 | 2446 | pobjects = oldpage->pobjects; |
|---|
| 2286 | 2447 | pages = oldpage->pages; |
|---|
| 2287 | | - if (drain && pobjects > s->cpu_partial) { |
|---|
| 2448 | + if (drain && pobjects > slub_cpu_partial(s)) { |
|---|
| 2288 | 2449 | unsigned long flags; |
|---|
| 2289 | 2450 | /* |
|---|
| 2290 | 2451 | * partial array is full. Move the existing |
|---|
| .. | .. |
|---|
| 2309 | 2470 | |
|---|
| 2310 | 2471 | } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) |
|---|
| 2311 | 2472 | != oldpage); |
|---|
| 2312 | | - if (unlikely(!s->cpu_partial)) { |
|---|
| 2473 | + if (unlikely(!slub_cpu_partial(s))) { |
|---|
| 2313 | 2474 | unsigned long flags; |
|---|
| 2314 | 2475 | |
|---|
| 2315 | 2476 | local_irq_save(flags); |
|---|
| .. | .. |
|---|
| 2317 | 2478 | local_irq_restore(flags); |
|---|
| 2318 | 2479 | } |
|---|
| 2319 | 2480 | preempt_enable(); |
|---|
| 2320 | | -#endif |
|---|
| 2481 | +#endif /* CONFIG_SLUB_CPU_PARTIAL */ |
|---|
| 2321 | 2482 | } |
|---|
| 2322 | 2483 | |
|---|
| 2323 | 2484 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) |
|---|
| 2324 | 2485 | { |
|---|
| 2325 | 2486 | stat(s, CPUSLAB_FLUSH); |
|---|
| 2326 | 2487 | deactivate_slab(s, c->page, c->freelist, c); |
|---|
| 2327 | | - |
|---|
| 2328 | | - c->tid = next_tid(c->tid); |
|---|
| 2329 | 2488 | } |
|---|
| 2330 | 2489 | |
|---|
| 2331 | 2490 | /* |
|---|
| .. | .. |
|---|
| 2337 | 2496 | { |
|---|
| 2338 | 2497 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); |
|---|
| 2339 | 2498 | |
|---|
| 2340 | | - if (likely(c)) { |
|---|
| 2341 | | - if (c->page) |
|---|
| 2342 | | - flush_slab(s, c); |
|---|
| 2499 | + if (c->page) |
|---|
| 2500 | + flush_slab(s, c); |
|---|
| 2343 | 2501 | |
|---|
| 2344 | | - unfreeze_partials(s, c); |
|---|
| 2345 | | - } |
|---|
| 2502 | + unfreeze_partials(s, c); |
|---|
| 2346 | 2503 | } |
|---|
| 2347 | 2504 | |
|---|
| 2348 | 2505 | static void flush_cpu_slab(void *d) |
|---|
| .. | .. |
|---|
| 2362 | 2519 | |
|---|
| 2363 | 2520 | static void flush_all(struct kmem_cache *s) |
|---|
| 2364 | 2521 | { |
|---|
| 2365 | | - on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC); |
|---|
| 2522 | + on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1); |
|---|
| 2366 | 2523 | } |
|---|
| 2367 | 2524 | |
|---|
| 2368 | 2525 | /* |
|---|
| .. | .. |
|---|
| 2391 | 2548 | static inline int node_match(struct page *page, int node) |
|---|
| 2392 | 2549 | { |
|---|
| 2393 | 2550 | #ifdef CONFIG_NUMA |
|---|
| 2394 | | - if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node)) |
|---|
| 2551 | + if (node != NUMA_NO_NODE && page_to_nid(page) != node) |
|---|
| 2395 | 2552 | return 0; |
|---|
| 2396 | 2553 | #endif |
|---|
| 2397 | 2554 | return 1; |
|---|
| .. | .. |
|---|
| 2418 | 2575 | struct page *page; |
|---|
| 2419 | 2576 | |
|---|
| 2420 | 2577 | spin_lock_irqsave(&n->list_lock, flags); |
|---|
| 2421 | | - list_for_each_entry(page, &n->partial, lru) |
|---|
| 2578 | + list_for_each_entry(page, &n->partial, slab_list) |
|---|
| 2422 | 2579 | x += get_count(page); |
|---|
| 2423 | 2580 | spin_unlock_irqrestore(&n->list_lock, flags); |
|---|
| 2424 | 2581 | return x; |
|---|
| .. | .. |
|---|
| 2492 | 2649 | stat(s, ALLOC_SLAB); |
|---|
| 2493 | 2650 | c->page = page; |
|---|
| 2494 | 2651 | *pc = c; |
|---|
| 2495 | | - } else |
|---|
| 2496 | | - freelist = NULL; |
|---|
| 2652 | + } |
|---|
| 2497 | 2653 | |
|---|
| 2498 | 2654 | return freelist; |
|---|
| 2499 | 2655 | } |
|---|
| .. | .. |
|---|
| 2565 | 2721 | void *freelist; |
|---|
| 2566 | 2722 | struct page *page; |
|---|
| 2567 | 2723 | |
|---|
| 2724 | + stat(s, ALLOC_SLOWPATH); |
|---|
| 2725 | + |
|---|
| 2568 | 2726 | page = c->page; |
|---|
| 2569 | 2727 | if (!page) { |
|---|
| 2570 | 2728 | /* |
|---|
| .. | .. |
|---|
| 2612 | 2770 | |
|---|
| 2613 | 2771 | if (!freelist) { |
|---|
| 2614 | 2772 | c->page = NULL; |
|---|
| 2773 | + c->tid = next_tid(c->tid); |
|---|
| 2615 | 2774 | stat(s, DEACTIVATE_BYPASS); |
|---|
| 2616 | 2775 | goto new_slab; |
|---|
| 2617 | 2776 | } |
|---|
| .. | .. |
|---|
| 2669 | 2828 | unsigned long flags; |
|---|
| 2670 | 2829 | |
|---|
| 2671 | 2830 | local_irq_save(flags); |
|---|
| 2672 | | -#ifdef CONFIG_PREEMPT |
|---|
| 2831 | +#ifdef CONFIG_PREEMPTION |
|---|
| 2673 | 2832 | /* |
|---|
| 2674 | 2833 | * We may have been preempted and rescheduled on a different |
|---|
| 2675 | 2834 | * cpu before disabling interrupts. Need to reload cpu area |
|---|
| .. | .. |
|---|
| 2691 | 2850 | void *obj) |
|---|
| 2692 | 2851 | { |
|---|
| 2693 | 2852 | if (unlikely(slab_want_init_on_free(s)) && obj) |
|---|
| 2694 | | - memset((void *)((char *)obj + s->offset), 0, sizeof(void *)); |
|---|
| 2853 | + memset((void *)((char *)kasan_reset_tag(obj) + s->offset), |
|---|
| 2854 | + 0, sizeof(void *)); |
|---|
| 2695 | 2855 | } |
|---|
| 2696 | 2856 | |
|---|
| 2697 | 2857 | /* |
|---|
| .. | .. |
|---|
| 2705 | 2865 | * Otherwise we can simply pick the next object from the lockless free list. |
|---|
| 2706 | 2866 | */ |
|---|
| 2707 | 2867 | static __always_inline void *slab_alloc_node(struct kmem_cache *s, |
|---|
| 2708 | | - gfp_t gfpflags, int node, unsigned long addr) |
|---|
| 2868 | + gfp_t gfpflags, int node, unsigned long addr, size_t orig_size) |
|---|
| 2709 | 2869 | { |
|---|
| 2710 | 2870 | void *object; |
|---|
| 2711 | 2871 | struct kmem_cache_cpu *c; |
|---|
| 2712 | 2872 | struct page *page; |
|---|
| 2713 | 2873 | unsigned long tid; |
|---|
| 2874 | + struct obj_cgroup *objcg = NULL; |
|---|
| 2875 | + bool init = false; |
|---|
| 2714 | 2876 | |
|---|
| 2715 | | - s = slab_pre_alloc_hook(s, gfpflags); |
|---|
| 2877 | + s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags); |
|---|
| 2716 | 2878 | if (!s) |
|---|
| 2717 | 2879 | return NULL; |
|---|
| 2880 | + |
|---|
| 2881 | + object = kfence_alloc(s, orig_size, gfpflags); |
|---|
| 2882 | + if (unlikely(object)) |
|---|
| 2883 | + goto out; |
|---|
| 2884 | + |
|---|
| 2718 | 2885 | redo: |
|---|
| 2719 | 2886 | /* |
|---|
| 2720 | 2887 | * Must read kmem_cache cpu data via this cpu ptr. Preemption is |
|---|
| .. | .. |
|---|
| 2723 | 2890 | * as we end up on the original cpu again when doing the cmpxchg. |
|---|
| 2724 | 2891 | * |
|---|
| 2725 | 2892 | * We should guarantee that tid and kmem_cache are retrieved on |
|---|
| 2726 | | - * the same cpu. It could be different if CONFIG_PREEMPT so we need |
|---|
| 2893 | + * the same cpu. It could be different if CONFIG_PREEMPTION so we need |
|---|
| 2727 | 2894 | * to check if it is matched or not. |
|---|
| 2728 | 2895 | */ |
|---|
| 2729 | 2896 | do { |
|---|
| 2730 | 2897 | tid = this_cpu_read(s->cpu_slab->tid); |
|---|
| 2731 | 2898 | c = raw_cpu_ptr(s->cpu_slab); |
|---|
| 2732 | | - } while (IS_ENABLED(CONFIG_PREEMPT) && |
|---|
| 2899 | + } while (IS_ENABLED(CONFIG_PREEMPTION) && |
|---|
| 2733 | 2900 | unlikely(tid != READ_ONCE(c->tid))); |
|---|
| 2734 | 2901 | |
|---|
| 2735 | 2902 | /* |
|---|
| .. | .. |
|---|
| 2751 | 2918 | |
|---|
| 2752 | 2919 | object = c->freelist; |
|---|
| 2753 | 2920 | page = c->page; |
|---|
| 2754 | | - if (unlikely(!object || !node_match(page, node))) { |
|---|
| 2921 | + if (unlikely(!object || !page || !node_match(page, node))) { |
|---|
| 2755 | 2922 | object = __slab_alloc(s, gfpflags, node, addr, c); |
|---|
| 2756 | | - stat(s, ALLOC_SLOWPATH); |
|---|
| 2757 | 2923 | } else { |
|---|
| 2758 | 2924 | void *next_object = get_freepointer_safe(s, object); |
|---|
| 2759 | 2925 | |
|---|
| .. | .. |
|---|
| 2784 | 2950 | } |
|---|
| 2785 | 2951 | |
|---|
| 2786 | 2952 | maybe_wipe_obj_freeptr(s, object); |
|---|
| 2953 | + init = slab_want_init_on_alloc(gfpflags, s); |
|---|
| 2787 | 2954 | |
|---|
| 2788 | | - if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) |
|---|
| 2789 | | - memset(object, 0, s->object_size); |
|---|
| 2790 | | - |
|---|
| 2791 | | - slab_post_alloc_hook(s, gfpflags, 1, &object); |
|---|
| 2955 | +out: |
|---|
| 2956 | + slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init); |
|---|
| 2792 | 2957 | |
|---|
| 2793 | 2958 | return object; |
|---|
| 2794 | 2959 | } |
|---|
| 2795 | 2960 | |
|---|
| 2796 | 2961 | static __always_inline void *slab_alloc(struct kmem_cache *s, |
|---|
| 2797 | | - gfp_t gfpflags, unsigned long addr) |
|---|
| 2962 | + gfp_t gfpflags, unsigned long addr, size_t orig_size) |
|---|
| 2798 | 2963 | { |
|---|
| 2799 | | - return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr); |
|---|
| 2964 | + return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr, orig_size); |
|---|
| 2800 | 2965 | } |
|---|
| 2801 | 2966 | |
|---|
| 2802 | 2967 | void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) |
|---|
| 2803 | 2968 | { |
|---|
| 2804 | | - void *ret = slab_alloc(s, gfpflags, _RET_IP_); |
|---|
| 2969 | + void *ret = slab_alloc(s, gfpflags, _RET_IP_, s->object_size); |
|---|
| 2805 | 2970 | |
|---|
| 2806 | 2971 | trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, |
|---|
| 2807 | 2972 | s->size, gfpflags); |
|---|
| .. | .. |
|---|
| 2813 | 2978 | #ifdef CONFIG_TRACING |
|---|
| 2814 | 2979 | void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) |
|---|
| 2815 | 2980 | { |
|---|
| 2816 | | - void *ret = slab_alloc(s, gfpflags, _RET_IP_); |
|---|
| 2981 | + void *ret = slab_alloc(s, gfpflags, _RET_IP_, size); |
|---|
| 2817 | 2982 | trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags); |
|---|
| 2818 | 2983 | ret = kasan_kmalloc(s, ret, size, gfpflags); |
|---|
| 2819 | 2984 | return ret; |
|---|
| .. | .. |
|---|
| 2824 | 2989 | #ifdef CONFIG_NUMA |
|---|
| 2825 | 2990 | void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) |
|---|
| 2826 | 2991 | { |
|---|
| 2827 | | - void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_); |
|---|
| 2992 | + void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, s->object_size); |
|---|
| 2828 | 2993 | |
|---|
| 2829 | 2994 | trace_kmem_cache_alloc_node(_RET_IP_, ret, |
|---|
| 2830 | 2995 | s->object_size, s->size, gfpflags, node); |
|---|
| .. | .. |
|---|
| 2838 | 3003 | gfp_t gfpflags, |
|---|
| 2839 | 3004 | int node, size_t size) |
|---|
| 2840 | 3005 | { |
|---|
| 2841 | | - void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_); |
|---|
| 3006 | + void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, size); |
|---|
| 2842 | 3007 | |
|---|
| 2843 | 3008 | trace_kmalloc_node(_RET_IP_, ret, |
|---|
| 2844 | 3009 | size, s->size, gfpflags, node); |
|---|
| .. | .. |
|---|
| 2848 | 3013 | } |
|---|
| 2849 | 3014 | EXPORT_SYMBOL(kmem_cache_alloc_node_trace); |
|---|
| 2850 | 3015 | #endif |
|---|
| 2851 | | -#endif |
|---|
| 3016 | +#endif /* CONFIG_NUMA */ |
|---|
| 2852 | 3017 | |
|---|
| 2853 | 3018 | /* |
|---|
| 2854 | 3019 | * Slow path handling. This may still be called frequently since objects |
|---|
| .. | .. |
|---|
| 2868 | 3033 | struct page new; |
|---|
| 2869 | 3034 | unsigned long counters; |
|---|
| 2870 | 3035 | struct kmem_cache_node *n = NULL; |
|---|
| 2871 | | - unsigned long uninitialized_var(flags); |
|---|
| 3036 | + unsigned long flags; |
|---|
| 2872 | 3037 | |
|---|
| 2873 | 3038 | stat(s, FREE_SLOWPATH); |
|---|
| 3039 | + |
|---|
| 3040 | + if (kfence_free(head)) |
|---|
| 3041 | + return; |
|---|
| 2874 | 3042 | |
|---|
| 2875 | 3043 | if (kmem_cache_debug(s) && |
|---|
| 2876 | 3044 | !free_debug_processing(s, page, head, tail, cnt, addr)) |
|---|
| .. | .. |
|---|
| 2922 | 3090 | |
|---|
| 2923 | 3091 | if (likely(!n)) { |
|---|
| 2924 | 3092 | |
|---|
| 2925 | | - /* |
|---|
| 2926 | | - * If we just froze the page then put it onto the |
|---|
| 2927 | | - * per cpu partial list. |
|---|
| 2928 | | - */ |
|---|
| 2929 | | - if (new.frozen && !was_frozen) { |
|---|
| 3093 | + if (likely(was_frozen)) { |
|---|
| 3094 | + /* |
|---|
| 3095 | + * The list lock was not taken therefore no list |
|---|
| 3096 | + * activity can be necessary. |
|---|
| 3097 | + */ |
|---|
| 3098 | + stat(s, FREE_FROZEN); |
|---|
| 3099 | + } else if (new.frozen) { |
|---|
| 3100 | + /* |
|---|
| 3101 | + * If we just froze the page then put it onto the |
|---|
| 3102 | + * per cpu partial list. |
|---|
| 3103 | + */ |
|---|
| 2930 | 3104 | put_cpu_partial(s, page, 1); |
|---|
| 2931 | 3105 | stat(s, CPU_PARTIAL_FREE); |
|---|
| 2932 | 3106 | } |
|---|
| 2933 | | - /* |
|---|
| 2934 | | - * The list lock was not taken therefore no list |
|---|
| 2935 | | - * activity can be necessary. |
|---|
| 2936 | | - */ |
|---|
| 2937 | | - if (was_frozen) |
|---|
| 2938 | | - stat(s, FREE_FROZEN); |
|---|
| 3107 | + |
|---|
| 2939 | 3108 | return; |
|---|
| 2940 | 3109 | } |
|---|
| 2941 | 3110 | |
|---|
| .. | .. |
|---|
| 2947 | 3116 | * then add it. |
|---|
| 2948 | 3117 | */ |
|---|
| 2949 | 3118 | if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) { |
|---|
| 2950 | | - if (kmem_cache_debug(s)) |
|---|
| 2951 | | - remove_full(s, n, page); |
|---|
| 3119 | + remove_full(s, n, page); |
|---|
| 2952 | 3120 | add_partial(n, page, DEACTIVATE_TO_TAIL); |
|---|
| 2953 | 3121 | stat(s, FREE_ADD_PARTIAL); |
|---|
| 2954 | 3122 | } |
|---|
| .. | .. |
|---|
| 2994 | 3162 | void *tail_obj = tail ? : head; |
|---|
| 2995 | 3163 | struct kmem_cache_cpu *c; |
|---|
| 2996 | 3164 | unsigned long tid; |
|---|
| 3165 | + |
|---|
| 3166 | + /* memcg_slab_free_hook() is already called for bulk free. */ |
|---|
| 3167 | + if (!tail) |
|---|
| 3168 | + memcg_slab_free_hook(s, &head, 1); |
|---|
| 2997 | 3169 | redo: |
|---|
| 2998 | 3170 | /* |
|---|
| 2999 | 3171 | * Determine the currently cpus per cpu slab. |
|---|
| .. | .. |
|---|
| 3004 | 3176 | do { |
|---|
| 3005 | 3177 | tid = this_cpu_read(s->cpu_slab->tid); |
|---|
| 3006 | 3178 | c = raw_cpu_ptr(s->cpu_slab); |
|---|
| 3007 | | - } while (IS_ENABLED(CONFIG_PREEMPT) && |
|---|
| 3179 | + } while (IS_ENABLED(CONFIG_PREEMPTION) && |
|---|
| 3008 | 3180 | unlikely(tid != READ_ONCE(c->tid))); |
|---|
| 3009 | 3181 | |
|---|
| 3010 | 3182 | /* Same with comment on barrier() in slab_alloc_node() */ |
|---|
| .. | .. |
|---|
| 3114 | 3286 | df->s = cache_from_obj(s, object); /* Support for memcg */ |
|---|
| 3115 | 3287 | } |
|---|
| 3116 | 3288 | |
|---|
| 3289 | + if (is_kfence_address(object)) { |
|---|
| 3290 | + slab_free_hook(df->s, object, false); |
|---|
| 3291 | + __kfence_free(object); |
|---|
| 3292 | + p[size] = NULL; /* mark object processed */ |
|---|
| 3293 | + return size; |
|---|
| 3294 | + } |
|---|
| 3295 | + |
|---|
| 3117 | 3296 | /* Start new detached freelist */ |
|---|
| 3118 | 3297 | df->page = page; |
|---|
| 3119 | 3298 | set_freepointer(df->s, object, NULL); |
|---|
| .. | .. |
|---|
| 3155 | 3334 | if (WARN_ON(!size)) |
|---|
| 3156 | 3335 | return; |
|---|
| 3157 | 3336 | |
|---|
| 3337 | + memcg_slab_free_hook(s, p, size); |
|---|
| 3158 | 3338 | do { |
|---|
| 3159 | 3339 | struct detached_freelist df; |
|---|
| 3160 | 3340 | |
|---|
| .. | .. |
|---|
| 3173 | 3353 | { |
|---|
| 3174 | 3354 | struct kmem_cache_cpu *c; |
|---|
| 3175 | 3355 | int i; |
|---|
| 3356 | + struct obj_cgroup *objcg = NULL; |
|---|
| 3176 | 3357 | |
|---|
| 3177 | 3358 | /* memcg and kmem_cache debug support */ |
|---|
| 3178 | | - s = slab_pre_alloc_hook(s, flags); |
|---|
| 3359 | + s = slab_pre_alloc_hook(s, &objcg, size, flags); |
|---|
| 3179 | 3360 | if (unlikely(!s)) |
|---|
| 3180 | 3361 | return false; |
|---|
| 3181 | 3362 | /* |
|---|
| .. | .. |
|---|
| 3187 | 3368 | c = this_cpu_ptr(s->cpu_slab); |
|---|
| 3188 | 3369 | |
|---|
| 3189 | 3370 | for (i = 0; i < size; i++) { |
|---|
| 3190 | | - void *object = c->freelist; |
|---|
| 3371 | + void *object = kfence_alloc(s, s->object_size, flags); |
|---|
| 3191 | 3372 | |
|---|
| 3373 | + if (unlikely(object)) { |
|---|
| 3374 | + p[i] = object; |
|---|
| 3375 | + continue; |
|---|
| 3376 | + } |
|---|
| 3377 | + |
|---|
| 3378 | + object = c->freelist; |
|---|
| 3192 | 3379 | if (unlikely(!object)) { |
|---|
| 3193 | 3380 | /* |
|---|
| 3194 | 3381 | * We may have removed an object from c->freelist using |
|---|
| .. | .. |
|---|
| 3220 | 3407 | c->tid = next_tid(c->tid); |
|---|
| 3221 | 3408 | local_irq_enable(); |
|---|
| 3222 | 3409 | |
|---|
| 3223 | | - /* Clear memory outside IRQ disabled fastpath loop */ |
|---|
| 3224 | | - if (unlikely(slab_want_init_on_alloc(flags, s))) { |
|---|
| 3225 | | - int j; |
|---|
| 3226 | | - |
|---|
| 3227 | | - for (j = 0; j < i; j++) |
|---|
| 3228 | | - memset(p[j], 0, s->object_size); |
|---|
| 3229 | | - } |
|---|
| 3230 | | - |
|---|
| 3231 | | - /* memcg and kmem_cache debug support */ |
|---|
| 3232 | | - slab_post_alloc_hook(s, flags, size, p); |
|---|
| 3410 | + /* |
|---|
| 3411 | + * memcg and kmem_cache debug support and memory initialization. |
|---|
| 3412 | + * Done outside of the IRQ disabled fastpath loop. |
|---|
| 3413 | + */ |
|---|
| 3414 | + slab_post_alloc_hook(s, objcg, flags, size, p, |
|---|
| 3415 | + slab_want_init_on_alloc(flags, s)); |
|---|
| 3233 | 3416 | return i; |
|---|
| 3234 | 3417 | error: |
|---|
| 3235 | 3418 | local_irq_enable(); |
|---|
| 3236 | | - slab_post_alloc_hook(s, flags, i, p); |
|---|
| 3419 | + slab_post_alloc_hook(s, objcg, flags, i, p, false); |
|---|
| 3237 | 3420 | __kmem_cache_free_bulk(s, i, p); |
|---|
| 3238 | 3421 | return 0; |
|---|
| 3239 | 3422 | } |
|---|
| .. | .. |
|---|
| 3429 | 3612 | init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); |
|---|
| 3430 | 3613 | init_tracking(kmem_cache_node, n); |
|---|
| 3431 | 3614 | #endif |
|---|
| 3432 | | - n = kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node), |
|---|
| 3433 | | - GFP_KERNEL); |
|---|
| 3615 | + n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false); |
|---|
| 3434 | 3616 | page->freelist = get_freepointer(kmem_cache_node, n); |
|---|
| 3435 | 3617 | page->inuse = 1; |
|---|
| 3436 | 3618 | page->frozen = 0; |
|---|
| .. | .. |
|---|
| 3518 | 3700 | * 50% to keep some capacity around for frees. |
|---|
| 3519 | 3701 | */ |
|---|
| 3520 | 3702 | if (!kmem_cache_has_cpu_partial(s)) |
|---|
| 3521 | | - s->cpu_partial = 0; |
|---|
| 3703 | + slub_set_cpu_partial(s, 0); |
|---|
| 3522 | 3704 | else if (s->size >= PAGE_SIZE) |
|---|
| 3523 | | - s->cpu_partial = 2; |
|---|
| 3705 | + slub_set_cpu_partial(s, 2); |
|---|
| 3524 | 3706 | else if (s->size >= 1024) |
|---|
| 3525 | | - s->cpu_partial = 6; |
|---|
| 3707 | + slub_set_cpu_partial(s, 6); |
|---|
| 3526 | 3708 | else if (s->size >= 256) |
|---|
| 3527 | | - s->cpu_partial = 13; |
|---|
| 3709 | + slub_set_cpu_partial(s, 13); |
|---|
| 3528 | 3710 | else |
|---|
| 3529 | | - s->cpu_partial = 30; |
|---|
| 3711 | + slub_set_cpu_partial(s, 30); |
|---|
| 3530 | 3712 | #endif |
|---|
| 3531 | 3713 | } |
|---|
| 3532 | 3714 | |
|---|
| .. | .. |
|---|
| 3571 | 3753 | |
|---|
| 3572 | 3754 | /* |
|---|
| 3573 | 3755 | * With that we have determined the number of bytes in actual use |
|---|
| 3574 | | - * by the object. This is the potential offset to the free pointer. |
|---|
| 3756 | + * by the object and redzoning. |
|---|
| 3575 | 3757 | */ |
|---|
| 3576 | 3758 | s->inuse = size; |
|---|
| 3577 | 3759 | |
|---|
| 3578 | | - if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || |
|---|
| 3579 | | - s->ctor)) { |
|---|
| 3760 | + if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || |
|---|
| 3761 | + ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) || |
|---|
| 3762 | + s->ctor) { |
|---|
| 3580 | 3763 | /* |
|---|
| 3581 | 3764 | * Relocate free pointer after the object if it is not |
|---|
| 3582 | 3765 | * permitted to overwrite the first word of the object on |
|---|
| 3583 | 3766 | * kmem_cache_free. |
|---|
| 3584 | 3767 | * |
|---|
| 3585 | 3768 | * This is the case if we do RCU, have a constructor or |
|---|
| 3586 | | - * destructor or are poisoning the objects. |
|---|
| 3769 | + * destructor, are poisoning the objects, or are |
|---|
| 3770 | + * redzoning an object smaller than sizeof(void *). |
|---|
| 3771 | + * |
|---|
| 3772 | + * The assumption that s->offset >= s->inuse means free |
|---|
| 3773 | + * pointer is outside of the object is used in the |
|---|
| 3774 | + * freeptr_outside_object() function. If that is no |
|---|
| 3775 | + * longer true, the function needs to be modified. |
|---|
| 3587 | 3776 | */ |
|---|
| 3588 | 3777 | s->offset = size; |
|---|
| 3589 | 3778 | size += sizeof(void *); |
|---|
| 3779 | + } else { |
|---|
| 3780 | + /* |
|---|
| 3781 | + * Store freelist pointer near middle of object to keep |
|---|
| 3782 | + * it away from the edges of the object to avoid small |
|---|
| 3783 | + * sized over/underflows from neighboring allocations. |
|---|
| 3784 | + */ |
|---|
| 3785 | + s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *)); |
|---|
| 3590 | 3786 | } |
|---|
| 3591 | 3787 | |
|---|
| 3592 | 3788 | #ifdef CONFIG_SLUB_DEBUG |
|---|
| .. | .. |
|---|
| 3623 | 3819 | */ |
|---|
| 3624 | 3820 | size = ALIGN(size, s->align); |
|---|
| 3625 | 3821 | s->size = size; |
|---|
| 3822 | + s->reciprocal_size = reciprocal_value(size); |
|---|
| 3626 | 3823 | if (forced_order >= 0) |
|---|
| 3627 | 3824 | order = forced_order; |
|---|
| 3628 | 3825 | else |
|---|
| .. | .. |
|---|
| 3657 | 3854 | |
|---|
| 3658 | 3855 | static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) |
|---|
| 3659 | 3856 | { |
|---|
| 3660 | | - s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor); |
|---|
| 3857 | + s->flags = kmem_cache_flags(s->size, flags, s->name); |
|---|
| 3661 | 3858 | #ifdef CONFIG_SLAB_FREELIST_HARDENED |
|---|
| 3662 | 3859 | s->random = get_random_long(); |
|---|
| 3663 | 3860 | #endif |
|---|
| .. | .. |
|---|
| 3708 | 3905 | if (alloc_kmem_cache_cpus(s)) |
|---|
| 3709 | 3906 | return 0; |
|---|
| 3710 | 3907 | |
|---|
| 3711 | | - free_kmem_cache_nodes(s); |
|---|
| 3712 | 3908 | error: |
|---|
| 3713 | | - if (flags & SLAB_PANIC) |
|---|
| 3714 | | - panic("Cannot create slab %s size=%u realsize=%u order=%u offset=%u flags=%lx\n", |
|---|
| 3715 | | - s->name, s->size, s->size, |
|---|
| 3716 | | - oo_order(s->oo), s->offset, (unsigned long)flags); |
|---|
| 3909 | + __kmem_cache_release(s); |
|---|
| 3717 | 3910 | return -EINVAL; |
|---|
| 3718 | 3911 | } |
|---|
| 3719 | 3912 | |
|---|
| 3720 | 3913 | static void list_slab_objects(struct kmem_cache *s, struct page *page, |
|---|
| 3721 | | - const char *text) |
|---|
| 3914 | + const char *text) |
|---|
| 3722 | 3915 | { |
|---|
| 3723 | 3916 | #ifdef CONFIG_SLUB_DEBUG |
|---|
| 3724 | 3917 | void *addr = page_address(page); |
|---|
| 3918 | + unsigned long *map; |
|---|
| 3725 | 3919 | void *p; |
|---|
| 3726 | | - unsigned long *map = kcalloc(BITS_TO_LONGS(page->objects), |
|---|
| 3727 | | - sizeof(long), |
|---|
| 3728 | | - GFP_ATOMIC); |
|---|
| 3729 | | - if (!map) |
|---|
| 3730 | | - return; |
|---|
| 3920 | + |
|---|
| 3731 | 3921 | slab_err(s, page, text, s->name); |
|---|
| 3732 | 3922 | slab_lock(page); |
|---|
| 3733 | 3923 | |
|---|
| 3734 | | - get_map(s, page, map); |
|---|
| 3924 | + map = get_map(s, page); |
|---|
| 3735 | 3925 | for_each_object(p, s, addr, page->objects) { |
|---|
| 3736 | 3926 | |
|---|
| 3737 | | - if (!test_bit(slab_index(p, s, addr), map)) { |
|---|
| 3927 | + if (!test_bit(__obj_to_index(s, addr, p), map)) { |
|---|
| 3738 | 3928 | pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr); |
|---|
| 3739 | 3929 | print_tracking(s, p); |
|---|
| 3740 | 3930 | } |
|---|
| 3741 | 3931 | } |
|---|
| 3932 | + put_map(map); |
|---|
| 3742 | 3933 | slab_unlock(page); |
|---|
| 3743 | | - kfree(map); |
|---|
| 3744 | 3934 | #endif |
|---|
| 3745 | 3935 | } |
|---|
| 3746 | 3936 | |
|---|
| .. | .. |
|---|
| 3756 | 3946 | |
|---|
| 3757 | 3947 | BUG_ON(irqs_disabled()); |
|---|
| 3758 | 3948 | spin_lock_irq(&n->list_lock); |
|---|
| 3759 | | - list_for_each_entry_safe(page, h, &n->partial, lru) { |
|---|
| 3949 | + list_for_each_entry_safe(page, h, &n->partial, slab_list) { |
|---|
| 3760 | 3950 | if (!page->inuse) { |
|---|
| 3761 | 3951 | remove_partial(n, page); |
|---|
| 3762 | | - list_add(&page->lru, &discard); |
|---|
| 3952 | + list_add(&page->slab_list, &discard); |
|---|
| 3763 | 3953 | } else { |
|---|
| 3764 | 3954 | list_slab_objects(s, page, |
|---|
| 3765 | | - "Objects remaining in %s on __kmem_cache_shutdown()"); |
|---|
| 3955 | + "Objects remaining in %s on __kmem_cache_shutdown()"); |
|---|
| 3766 | 3956 | } |
|---|
| 3767 | 3957 | } |
|---|
| 3768 | 3958 | spin_unlock_irq(&n->list_lock); |
|---|
| 3769 | 3959 | |
|---|
| 3770 | | - list_for_each_entry_safe(page, h, &discard, lru) |
|---|
| 3960 | + list_for_each_entry_safe(page, h, &discard, slab_list) |
|---|
| 3771 | 3961 | discard_slab(s, page); |
|---|
| 3772 | 3962 | } |
|---|
| 3773 | 3963 | |
|---|
| .. | .. |
|---|
| 3797 | 3987 | if (n->nr_partial || slabs_node(s, node)) |
|---|
| 3798 | 3988 | return 1; |
|---|
| 3799 | 3989 | } |
|---|
| 3800 | | - sysfs_slab_remove(s); |
|---|
| 3801 | 3990 | return 0; |
|---|
| 3802 | 3991 | } |
|---|
| 3803 | 3992 | |
|---|
| .. | .. |
|---|
| 3846 | 4035 | if (unlikely(ZERO_OR_NULL_PTR(s))) |
|---|
| 3847 | 4036 | return s; |
|---|
| 3848 | 4037 | |
|---|
| 3849 | | - ret = slab_alloc(s, flags, _RET_IP_); |
|---|
| 4038 | + ret = slab_alloc(s, flags, _RET_IP_, size); |
|---|
| 3850 | 4039 | |
|---|
| 3851 | 4040 | trace_kmalloc(_RET_IP_, ret, size, s->size, flags); |
|---|
| 3852 | 4041 | |
|---|
| .. | .. |
|---|
| 3861 | 4050 | { |
|---|
| 3862 | 4051 | struct page *page; |
|---|
| 3863 | 4052 | void *ptr = NULL; |
|---|
| 4053 | + unsigned int order = get_order(size); |
|---|
| 3864 | 4054 | |
|---|
| 3865 | 4055 | flags |= __GFP_COMP; |
|---|
| 3866 | | - page = alloc_pages_node(node, flags, get_order(size)); |
|---|
| 3867 | | - if (page) |
|---|
| 4056 | + page = alloc_pages_node(node, flags, order); |
|---|
| 4057 | + if (page) { |
|---|
| 3868 | 4058 | ptr = page_address(page); |
|---|
| 4059 | + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, |
|---|
| 4060 | + PAGE_SIZE << order); |
|---|
| 4061 | + } |
|---|
| 3869 | 4062 | |
|---|
| 3870 | 4063 | return kmalloc_large_node_hook(ptr, size, flags); |
|---|
| 3871 | 4064 | } |
|---|
| .. | .. |
|---|
| 3890 | 4083 | if (unlikely(ZERO_OR_NULL_PTR(s))) |
|---|
| 3891 | 4084 | return s; |
|---|
| 3892 | 4085 | |
|---|
| 3893 | | - ret = slab_alloc_node(s, flags, node, _RET_IP_); |
|---|
| 4086 | + ret = slab_alloc_node(s, flags, node, _RET_IP_, size); |
|---|
| 3894 | 4087 | |
|---|
| 3895 | 4088 | trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node); |
|---|
| 3896 | 4089 | |
|---|
| .. | .. |
|---|
| 3899 | 4092 | return ret; |
|---|
| 3900 | 4093 | } |
|---|
| 3901 | 4094 | EXPORT_SYMBOL(__kmalloc_node); |
|---|
| 3902 | | -#endif |
|---|
| 4095 | +#endif /* CONFIG_NUMA */ |
|---|
| 3903 | 4096 | |
|---|
| 3904 | 4097 | #ifdef CONFIG_HARDENED_USERCOPY |
|---|
| 3905 | 4098 | /* |
|---|
| .. | .. |
|---|
| 3916 | 4109 | struct kmem_cache *s; |
|---|
| 3917 | 4110 | unsigned int offset; |
|---|
| 3918 | 4111 | size_t object_size; |
|---|
| 4112 | + bool is_kfence = is_kfence_address(ptr); |
|---|
| 3919 | 4113 | |
|---|
| 3920 | 4114 | ptr = kasan_reset_tag(ptr); |
|---|
| 3921 | 4115 | |
|---|
| .. | .. |
|---|
| 3928 | 4122 | to_user, 0, n); |
|---|
| 3929 | 4123 | |
|---|
| 3930 | 4124 | /* Find offset within object. */ |
|---|
| 3931 | | - offset = (ptr - page_address(page)) % s->size; |
|---|
| 4125 | + if (is_kfence) |
|---|
| 4126 | + offset = ptr - kfence_object_start(ptr); |
|---|
| 4127 | + else |
|---|
| 4128 | + offset = (ptr - page_address(page)) % s->size; |
|---|
| 3932 | 4129 | |
|---|
| 3933 | 4130 | /* Adjust for redzone and reject if within the redzone. */ |
|---|
| 3934 | | - if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) { |
|---|
| 4131 | + if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) { |
|---|
| 3935 | 4132 | if (offset < s->red_left_pad) |
|---|
| 3936 | 4133 | usercopy_abort("SLUB object in left red zone", |
|---|
| 3937 | 4134 | s->name, to_user, offset, n); |
|---|
| .. | .. |
|---|
| 3961 | 4158 | } |
|---|
| 3962 | 4159 | #endif /* CONFIG_HARDENED_USERCOPY */ |
|---|
| 3963 | 4160 | |
|---|
| 3964 | | -static size_t __ksize(const void *object) |
|---|
| 4161 | +size_t __ksize(const void *object) |
|---|
| 3965 | 4162 | { |
|---|
| 3966 | 4163 | struct page *page; |
|---|
| 3967 | 4164 | |
|---|
| .. | .. |
|---|
| 3972 | 4169 | |
|---|
| 3973 | 4170 | if (unlikely(!PageSlab(page))) { |
|---|
| 3974 | 4171 | WARN_ON(!PageCompound(page)); |
|---|
| 3975 | | - return PAGE_SIZE << compound_order(page); |
|---|
| 4172 | + return page_size(page); |
|---|
| 3976 | 4173 | } |
|---|
| 3977 | 4174 | |
|---|
| 3978 | 4175 | return slab_ksize(page->slab_cache); |
|---|
| 3979 | 4176 | } |
|---|
| 3980 | | - |
|---|
| 3981 | | -size_t ksize(const void *object) |
|---|
| 3982 | | -{ |
|---|
| 3983 | | - size_t size = __ksize(object); |
|---|
| 3984 | | - /* We assume that ksize callers could use whole allocated area, |
|---|
| 3985 | | - * so we need to unpoison this area. |
|---|
| 3986 | | - */ |
|---|
| 3987 | | - kasan_unpoison_shadow(object, size); |
|---|
| 3988 | | - return size; |
|---|
| 3989 | | -} |
|---|
| 3990 | | -EXPORT_SYMBOL(ksize); |
|---|
| 4177 | +EXPORT_SYMBOL(__ksize); |
|---|
| 3991 | 4178 | |
|---|
| 3992 | 4179 | void kfree(const void *x) |
|---|
| 3993 | 4180 | { |
|---|
| .. | .. |
|---|
| 4001 | 4188 | |
|---|
| 4002 | 4189 | page = virt_to_head_page(x); |
|---|
| 4003 | 4190 | if (unlikely(!PageSlab(page))) { |
|---|
| 4191 | + unsigned int order = compound_order(page); |
|---|
| 4192 | + |
|---|
| 4004 | 4193 | BUG_ON(!PageCompound(page)); |
|---|
| 4005 | 4194 | kfree_hook(object); |
|---|
| 4006 | | - __free_pages(page, compound_order(page)); |
|---|
| 4195 | + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, |
|---|
| 4196 | + -(PAGE_SIZE << order)); |
|---|
| 4197 | + __free_pages(page, order); |
|---|
| 4007 | 4198 | return; |
|---|
| 4008 | 4199 | } |
|---|
| 4009 | 4200 | slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_); |
|---|
| .. | .. |
|---|
| 4047 | 4238 | * Note that concurrent frees may occur while we hold the |
|---|
| 4048 | 4239 | * list_lock. page->inuse here is the upper limit. |
|---|
| 4049 | 4240 | */ |
|---|
| 4050 | | - list_for_each_entry_safe(page, t, &n->partial, lru) { |
|---|
| 4241 | + list_for_each_entry_safe(page, t, &n->partial, slab_list) { |
|---|
| 4051 | 4242 | int free = page->objects - page->inuse; |
|---|
| 4052 | 4243 | |
|---|
| 4053 | 4244 | /* Do not reread page->inuse */ |
|---|
| .. | .. |
|---|
| 4057 | 4248 | BUG_ON(free <= 0); |
|---|
| 4058 | 4249 | |
|---|
| 4059 | 4250 | if (free == page->objects) { |
|---|
| 4060 | | - list_move(&page->lru, &discard); |
|---|
| 4251 | + list_move(&page->slab_list, &discard); |
|---|
| 4061 | 4252 | n->nr_partial--; |
|---|
| 4062 | 4253 | } else if (free <= SHRINK_PROMOTE_MAX) |
|---|
| 4063 | | - list_move(&page->lru, promote + free - 1); |
|---|
| 4254 | + list_move(&page->slab_list, promote + free - 1); |
|---|
| 4064 | 4255 | } |
|---|
| 4065 | 4256 | |
|---|
| 4066 | 4257 | /* |
|---|
| .. | .. |
|---|
| 4073 | 4264 | spin_unlock_irqrestore(&n->list_lock, flags); |
|---|
| 4074 | 4265 | |
|---|
| 4075 | 4266 | /* Release empty slabs */ |
|---|
| 4076 | | - list_for_each_entry_safe(page, t, &discard, lru) |
|---|
| 4267 | + list_for_each_entry_safe(page, t, &discard, slab_list) |
|---|
| 4077 | 4268 | discard_slab(s, page); |
|---|
| 4078 | 4269 | |
|---|
| 4079 | 4270 | if (slabs_node(s, node)) |
|---|
| .. | .. |
|---|
| 4082 | 4273 | |
|---|
| 4083 | 4274 | return ret; |
|---|
| 4084 | 4275 | } |
|---|
| 4085 | | - |
|---|
| 4086 | | -#ifdef CONFIG_MEMCG |
|---|
| 4087 | | -static void kmemcg_cache_deact_after_rcu(struct kmem_cache *s) |
|---|
| 4088 | | -{ |
|---|
| 4089 | | - /* |
|---|
| 4090 | | - * Called with all the locks held after a sched RCU grace period. |
|---|
| 4091 | | - * Even if @s becomes empty after shrinking, we can't know that @s |
|---|
| 4092 | | - * doesn't have allocations already in-flight and thus can't |
|---|
| 4093 | | - * destroy @s until the associated memcg is released. |
|---|
| 4094 | | - * |
|---|
| 4095 | | - * However, let's remove the sysfs files for empty caches here. |
|---|
| 4096 | | - * Each cache has a lot of interface files which aren't |
|---|
| 4097 | | - * particularly useful for empty draining caches; otherwise, we can |
|---|
| 4098 | | - * easily end up with millions of unnecessary sysfs files on |
|---|
| 4099 | | - * systems which have a lot of memory and transient cgroups. |
|---|
| 4100 | | - */ |
|---|
| 4101 | | - if (!__kmem_cache_shrink(s)) |
|---|
| 4102 | | - sysfs_slab_remove(s); |
|---|
| 4103 | | -} |
|---|
| 4104 | | - |
|---|
| 4105 | | -void __kmemcg_cache_deactivate(struct kmem_cache *s) |
|---|
| 4106 | | -{ |
|---|
| 4107 | | - /* |
|---|
| 4108 | | - * Disable empty slabs caching. Used to avoid pinning offline |
|---|
| 4109 | | - * memory cgroups by kmem pages that can be freed. |
|---|
| 4110 | | - */ |
|---|
| 4111 | | - slub_set_cpu_partial(s, 0); |
|---|
| 4112 | | - s->min_partial = 0; |
|---|
| 4113 | | - |
|---|
| 4114 | | - /* |
|---|
| 4115 | | - * s->cpu_partial is checked locklessly (see put_cpu_partial), so |
|---|
| 4116 | | - * we have to make sure the change is visible before shrinking. |
|---|
| 4117 | | - */ |
|---|
| 4118 | | - slab_deactivate_memcg_cache_rcu_sched(s, kmemcg_cache_deact_after_rcu); |
|---|
| 4119 | | -} |
|---|
| 4120 | | -#endif |
|---|
| 4121 | 4276 | |
|---|
| 4122 | 4277 | static int slab_mem_going_offline_callback(void *arg) |
|---|
| 4123 | 4278 | { |
|---|
| .. | .. |
|---|
| 4265 | 4420 | for_each_kmem_cache_node(s, node, n) { |
|---|
| 4266 | 4421 | struct page *p; |
|---|
| 4267 | 4422 | |
|---|
| 4268 | | - list_for_each_entry(p, &n->partial, lru) |
|---|
| 4423 | + list_for_each_entry(p, &n->partial, slab_list) |
|---|
| 4269 | 4424 | p->slab_cache = s; |
|---|
| 4270 | 4425 | |
|---|
| 4271 | 4426 | #ifdef CONFIG_SLUB_DEBUG |
|---|
| 4272 | | - list_for_each_entry(p, &n->full, lru) |
|---|
| 4427 | + list_for_each_entry(p, &n->full, slab_list) |
|---|
| 4273 | 4428 | p->slab_cache = s; |
|---|
| 4274 | 4429 | #endif |
|---|
| 4275 | 4430 | } |
|---|
| 4276 | | - slab_init_memcg_params(s); |
|---|
| 4277 | 4431 | list_add(&s->list, &slab_caches); |
|---|
| 4278 | | - memcg_link_cache(s); |
|---|
| 4279 | 4432 | return s; |
|---|
| 4280 | 4433 | } |
|---|
| 4281 | 4434 | |
|---|
| .. | .. |
|---|
| 4316 | 4469 | cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL, |
|---|
| 4317 | 4470 | slub_cpu_dead); |
|---|
| 4318 | 4471 | |
|---|
| 4319 | | - pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%d\n", |
|---|
| 4472 | + pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n", |
|---|
| 4320 | 4473 | cache_line_size(), |
|---|
| 4321 | 4474 | slub_min_order, slub_max_order, slub_min_objects, |
|---|
| 4322 | 4475 | nr_cpu_ids, nr_node_ids); |
|---|
| .. | .. |
|---|
| 4330 | 4483 | __kmem_cache_alias(const char *name, unsigned int size, unsigned int align, |
|---|
| 4331 | 4484 | slab_flags_t flags, void (*ctor)(void *)) |
|---|
| 4332 | 4485 | { |
|---|
| 4333 | | - struct kmem_cache *s, *c; |
|---|
| 4486 | + struct kmem_cache *s; |
|---|
| 4334 | 4487 | |
|---|
| 4335 | 4488 | s = find_mergeable(size, align, flags, name, ctor); |
|---|
| 4336 | 4489 | if (s) { |
|---|
| .. | .. |
|---|
| 4342 | 4495 | */ |
|---|
| 4343 | 4496 | s->object_size = max(s->object_size, size); |
|---|
| 4344 | 4497 | s->inuse = max(s->inuse, ALIGN(size, sizeof(void *))); |
|---|
| 4345 | | - |
|---|
| 4346 | | - for_each_memcg_cache(c, s) { |
|---|
| 4347 | | - c->object_size = s->object_size; |
|---|
| 4348 | | - c->inuse = max(c->inuse, ALIGN(size, sizeof(void *))); |
|---|
| 4349 | | - } |
|---|
| 4350 | 4498 | |
|---|
| 4351 | 4499 | if (sysfs_slab_alias(s, name)) { |
|---|
| 4352 | 4500 | s->refcount--; |
|---|
| .. | .. |
|---|
| 4369 | 4517 | if (slab_state <= UP) |
|---|
| 4370 | 4518 | return 0; |
|---|
| 4371 | 4519 | |
|---|
| 4372 | | - memcg_propagate_slab_attrs(s); |
|---|
| 4373 | 4520 | err = sysfs_slab_add(s); |
|---|
| 4374 | | - if (err) |
|---|
| 4521 | + if (err) { |
|---|
| 4375 | 4522 | __kmem_cache_release(s); |
|---|
| 4523 | + return err; |
|---|
| 4524 | + } |
|---|
| 4376 | 4525 | |
|---|
| 4377 | | - return err; |
|---|
| 4526 | + if (s->flags & SLAB_STORE_USER) |
|---|
| 4527 | + debugfs_slab_add(s); |
|---|
| 4528 | + |
|---|
| 4529 | + return 0; |
|---|
| 4378 | 4530 | } |
|---|
| 4379 | 4531 | |
|---|
| 4380 | 4532 | void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) |
|---|
| .. | .. |
|---|
| 4390 | 4542 | if (unlikely(ZERO_OR_NULL_PTR(s))) |
|---|
| 4391 | 4543 | return s; |
|---|
| 4392 | 4544 | |
|---|
| 4393 | | - ret = slab_alloc(s, gfpflags, caller); |
|---|
| 4545 | + ret = slab_alloc(s, gfpflags, caller, size); |
|---|
| 4394 | 4546 | |
|---|
| 4395 | 4547 | /* Honor the call site pointer we received. */ |
|---|
| 4396 | 4548 | trace_kmalloc(caller, ret, size, s->size, gfpflags); |
|---|
| .. | .. |
|---|
| 4421 | 4573 | if (unlikely(ZERO_OR_NULL_PTR(s))) |
|---|
| 4422 | 4574 | return s; |
|---|
| 4423 | 4575 | |
|---|
| 4424 | | - ret = slab_alloc_node(s, gfpflags, node, caller); |
|---|
| 4576 | + ret = slab_alloc_node(s, gfpflags, node, caller, size); |
|---|
| 4425 | 4577 | |
|---|
| 4426 | 4578 | /* Honor the call site pointer we received. */ |
|---|
| 4427 | 4579 | trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); |
|---|
| .. | .. |
|---|
| 4444 | 4596 | #endif |
|---|
| 4445 | 4597 | |
|---|
| 4446 | 4598 | #ifdef CONFIG_SLUB_DEBUG |
|---|
| 4447 | | -static int validate_slab(struct kmem_cache *s, struct page *page, |
|---|
| 4448 | | - unsigned long *map) |
|---|
| 4599 | +static void validate_slab(struct kmem_cache *s, struct page *page) |
|---|
| 4449 | 4600 | { |
|---|
| 4450 | 4601 | void *p; |
|---|
| 4451 | 4602 | void *addr = page_address(page); |
|---|
| 4603 | + unsigned long *map; |
|---|
| 4452 | 4604 | |
|---|
| 4453 | | - if (!check_slab(s, page) || |
|---|
| 4454 | | - !on_freelist(s, page, NULL)) |
|---|
| 4455 | | - return 0; |
|---|
| 4605 | + slab_lock(page); |
|---|
| 4606 | + |
|---|
| 4607 | + if (!check_slab(s, page) || !on_freelist(s, page, NULL)) |
|---|
| 4608 | + goto unlock; |
|---|
| 4456 | 4609 | |
|---|
| 4457 | 4610 | /* Now we know that a valid freelist exists */ |
|---|
| 4458 | | - bitmap_zero(map, page->objects); |
|---|
| 4459 | | - |
|---|
| 4460 | | - get_map(s, page, map); |
|---|
| 4611 | + map = get_map(s, page); |
|---|
| 4461 | 4612 | for_each_object(p, s, addr, page->objects) { |
|---|
| 4462 | | - if (test_bit(slab_index(p, s, addr), map)) |
|---|
| 4463 | | - if (!check_object(s, page, p, SLUB_RED_INACTIVE)) |
|---|
| 4464 | | - return 0; |
|---|
| 4613 | + u8 val = test_bit(__obj_to_index(s, addr, p), map) ? |
|---|
| 4614 | + SLUB_RED_INACTIVE : SLUB_RED_ACTIVE; |
|---|
| 4615 | + |
|---|
| 4616 | + if (!check_object(s, page, p, val)) |
|---|
| 4617 | + break; |
|---|
| 4465 | 4618 | } |
|---|
| 4466 | | - |
|---|
| 4467 | | - for_each_object(p, s, addr, page->objects) |
|---|
| 4468 | | - if (!test_bit(slab_index(p, s, addr), map)) |
|---|
| 4469 | | - if (!check_object(s, page, p, SLUB_RED_ACTIVE)) |
|---|
| 4470 | | - return 0; |
|---|
| 4471 | | - return 1; |
|---|
| 4472 | | -} |
|---|
| 4473 | | - |
|---|
| 4474 | | -static void validate_slab_slab(struct kmem_cache *s, struct page *page, |
|---|
| 4475 | | - unsigned long *map) |
|---|
| 4476 | | -{ |
|---|
| 4477 | | - slab_lock(page); |
|---|
| 4478 | | - validate_slab(s, page, map); |
|---|
| 4619 | + put_map(map); |
|---|
| 4620 | +unlock: |
|---|
| 4479 | 4621 | slab_unlock(page); |
|---|
| 4480 | 4622 | } |
|---|
| 4481 | 4623 | |
|---|
| 4482 | 4624 | static int validate_slab_node(struct kmem_cache *s, |
|---|
| 4483 | | - struct kmem_cache_node *n, unsigned long *map) |
|---|
| 4625 | + struct kmem_cache_node *n) |
|---|
| 4484 | 4626 | { |
|---|
| 4485 | 4627 | unsigned long count = 0; |
|---|
| 4486 | 4628 | struct page *page; |
|---|
| .. | .. |
|---|
| 4488 | 4630 | |
|---|
| 4489 | 4631 | spin_lock_irqsave(&n->list_lock, flags); |
|---|
| 4490 | 4632 | |
|---|
| 4491 | | - list_for_each_entry(page, &n->partial, lru) { |
|---|
| 4492 | | - validate_slab_slab(s, page, map); |
|---|
| 4633 | + list_for_each_entry(page, &n->partial, slab_list) { |
|---|
| 4634 | + validate_slab(s, page); |
|---|
| 4493 | 4635 | count++; |
|---|
| 4494 | 4636 | } |
|---|
| 4495 | 4637 | if (count != n->nr_partial) |
|---|
| .. | .. |
|---|
| 4499 | 4641 | if (!(s->flags & SLAB_STORE_USER)) |
|---|
| 4500 | 4642 | goto out; |
|---|
| 4501 | 4643 | |
|---|
| 4502 | | - list_for_each_entry(page, &n->full, lru) { |
|---|
| 4503 | | - validate_slab_slab(s, page, map); |
|---|
| 4644 | + list_for_each_entry(page, &n->full, slab_list) { |
|---|
| 4645 | + validate_slab(s, page); |
|---|
| 4504 | 4646 | count++; |
|---|
| 4505 | 4647 | } |
|---|
| 4506 | 4648 | if (count != atomic_long_read(&n->nr_slabs)) |
|---|
| .. | .. |
|---|
| 4516 | 4658 | { |
|---|
| 4517 | 4659 | int node; |
|---|
| 4518 | 4660 | unsigned long count = 0; |
|---|
| 4519 | | - unsigned long *map = kmalloc_array(BITS_TO_LONGS(oo_objects(s->max)), |
|---|
| 4520 | | - sizeof(unsigned long), |
|---|
| 4521 | | - GFP_KERNEL); |
|---|
| 4522 | 4661 | struct kmem_cache_node *n; |
|---|
| 4523 | | - |
|---|
| 4524 | | - if (!map) |
|---|
| 4525 | | - return -ENOMEM; |
|---|
| 4526 | 4662 | |
|---|
| 4527 | 4663 | flush_all(s); |
|---|
| 4528 | 4664 | for_each_kmem_cache_node(s, node, n) |
|---|
| 4529 | | - count += validate_slab_node(s, n, map); |
|---|
| 4530 | | - kfree(map); |
|---|
| 4665 | + count += validate_slab_node(s, n); |
|---|
| 4666 | + |
|---|
| 4531 | 4667 | return count; |
|---|
| 4532 | 4668 | } |
|---|
| 4669 | + |
|---|
| 4670 | +#ifdef CONFIG_DEBUG_FS |
|---|
| 4533 | 4671 | /* |
|---|
| 4534 | 4672 | * Generate lists of code addresses where slabcache objects are allocated |
|---|
| 4535 | 4673 | * and freed. |
|---|
| .. | .. |
|---|
| 4551 | 4689 | unsigned long max; |
|---|
| 4552 | 4690 | unsigned long count; |
|---|
| 4553 | 4691 | struct location *loc; |
|---|
| 4692 | + loff_t idx; |
|---|
| 4554 | 4693 | }; |
|---|
| 4694 | + |
|---|
| 4695 | +static struct dentry *slab_debugfs_root; |
|---|
| 4555 | 4696 | |
|---|
| 4556 | 4697 | static void free_loc_track(struct loc_track *t) |
|---|
| 4557 | 4698 | { |
|---|
| .. | .. |
|---|
| 4658 | 4799 | |
|---|
| 4659 | 4800 | static void process_slab(struct loc_track *t, struct kmem_cache *s, |
|---|
| 4660 | 4801 | struct page *page, enum track_item alloc, |
|---|
| 4661 | | - unsigned long *map) |
|---|
| 4802 | + unsigned long *obj_map) |
|---|
| 4662 | 4803 | { |
|---|
| 4663 | 4804 | void *addr = page_address(page); |
|---|
| 4664 | 4805 | void *p; |
|---|
| 4665 | 4806 | |
|---|
| 4666 | | - bitmap_zero(map, page->objects); |
|---|
| 4667 | | - get_map(s, page, map); |
|---|
| 4807 | + __fill_map(obj_map, s, page); |
|---|
| 4668 | 4808 | |
|---|
| 4669 | 4809 | for_each_object(p, s, addr, page->objects) |
|---|
| 4670 | | - if (!test_bit(slab_index(p, s, addr), map)) |
|---|
| 4810 | + if (!test_bit(__obj_to_index(s, addr, p), obj_map)) |
|---|
| 4671 | 4811 | add_location(t, s, get_track(s, p, alloc)); |
|---|
| 4672 | 4812 | } |
|---|
| 4673 | | - |
|---|
| 4674 | | -static int list_locations(struct kmem_cache *s, char *buf, |
|---|
| 4675 | | - enum track_item alloc) |
|---|
| 4676 | | -{ |
|---|
| 4677 | | - int len = 0; |
|---|
| 4678 | | - unsigned long i; |
|---|
| 4679 | | - struct loc_track t = { 0, 0, NULL }; |
|---|
| 4680 | | - int node; |
|---|
| 4681 | | - unsigned long *map = kmalloc_array(BITS_TO_LONGS(oo_objects(s->max)), |
|---|
| 4682 | | - sizeof(unsigned long), |
|---|
| 4683 | | - GFP_KERNEL); |
|---|
| 4684 | | - struct kmem_cache_node *n; |
|---|
| 4685 | | - |
|---|
| 4686 | | - if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), |
|---|
| 4687 | | - GFP_KERNEL)) { |
|---|
| 4688 | | - kfree(map); |
|---|
| 4689 | | - return sprintf(buf, "Out of memory\n"); |
|---|
| 4690 | | - } |
|---|
| 4691 | | - /* Push back cpu slabs */ |
|---|
| 4692 | | - flush_all(s); |
|---|
| 4693 | | - |
|---|
| 4694 | | - for_each_kmem_cache_node(s, node, n) { |
|---|
| 4695 | | - unsigned long flags; |
|---|
| 4696 | | - struct page *page; |
|---|
| 4697 | | - |
|---|
| 4698 | | - if (!atomic_long_read(&n->nr_slabs)) |
|---|
| 4699 | | - continue; |
|---|
| 4700 | | - |
|---|
| 4701 | | - spin_lock_irqsave(&n->list_lock, flags); |
|---|
| 4702 | | - list_for_each_entry(page, &n->partial, lru) |
|---|
| 4703 | | - process_slab(&t, s, page, alloc, map); |
|---|
| 4704 | | - list_for_each_entry(page, &n->full, lru) |
|---|
| 4705 | | - process_slab(&t, s, page, alloc, map); |
|---|
| 4706 | | - spin_unlock_irqrestore(&n->list_lock, flags); |
|---|
| 4707 | | - } |
|---|
| 4708 | | - |
|---|
| 4709 | | - for (i = 0; i < t.count; i++) { |
|---|
| 4710 | | - struct location *l = &t.loc[i]; |
|---|
| 4711 | | - |
|---|
| 4712 | | - if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100) |
|---|
| 4713 | | - break; |
|---|
| 4714 | | - len += sprintf(buf + len, "%7ld ", l->count); |
|---|
| 4715 | | - |
|---|
| 4716 | | - if (l->addr) |
|---|
| 4717 | | - len += sprintf(buf + len, "%pS", (void *)l->addr); |
|---|
| 4718 | | - else |
|---|
| 4719 | | - len += sprintf(buf + len, "<not-available>"); |
|---|
| 4720 | | - |
|---|
| 4721 | | - if (l->sum_time != l->min_time) { |
|---|
| 4722 | | - len += sprintf(buf + len, " age=%ld/%ld/%ld", |
|---|
| 4723 | | - l->min_time, |
|---|
| 4724 | | - (long)div_u64(l->sum_time, l->count), |
|---|
| 4725 | | - l->max_time); |
|---|
| 4726 | | - } else |
|---|
| 4727 | | - len += sprintf(buf + len, " age=%ld", |
|---|
| 4728 | | - l->min_time); |
|---|
| 4729 | | - |
|---|
| 4730 | | - if (l->min_pid != l->max_pid) |
|---|
| 4731 | | - len += sprintf(buf + len, " pid=%ld-%ld", |
|---|
| 4732 | | - l->min_pid, l->max_pid); |
|---|
| 4733 | | - else |
|---|
| 4734 | | - len += sprintf(buf + len, " pid=%ld", |
|---|
| 4735 | | - l->min_pid); |
|---|
| 4736 | | - |
|---|
| 4737 | | - if (num_online_cpus() > 1 && |
|---|
| 4738 | | - !cpumask_empty(to_cpumask(l->cpus)) && |
|---|
| 4739 | | - len < PAGE_SIZE - 60) |
|---|
| 4740 | | - len += scnprintf(buf + len, PAGE_SIZE - len - 50, |
|---|
| 4741 | | - " cpus=%*pbl", |
|---|
| 4742 | | - cpumask_pr_args(to_cpumask(l->cpus))); |
|---|
| 4743 | | - |
|---|
| 4744 | | - if (nr_online_nodes > 1 && !nodes_empty(l->nodes) && |
|---|
| 4745 | | - len < PAGE_SIZE - 60) |
|---|
| 4746 | | - len += scnprintf(buf + len, PAGE_SIZE - len - 50, |
|---|
| 4747 | | - " nodes=%*pbl", |
|---|
| 4748 | | - nodemask_pr_args(&l->nodes)); |
|---|
| 4749 | | - |
|---|
| 4750 | | - len += sprintf(buf + len, "\n"); |
|---|
| 4751 | | - } |
|---|
| 4752 | | - |
|---|
| 4753 | | - free_loc_track(&t); |
|---|
| 4754 | | - kfree(map); |
|---|
| 4755 | | - if (!t.count) |
|---|
| 4756 | | - len += sprintf(buf, "No data\n"); |
|---|
| 4757 | | - return len; |
|---|
| 4758 | | -} |
|---|
| 4759 | | -#endif |
|---|
| 4813 | +#endif /* CONFIG_DEBUG_FS */ |
|---|
| 4814 | +#endif /* CONFIG_SLUB_DEBUG */ |
|---|
| 4760 | 4815 | |
|---|
| 4761 | 4816 | #ifdef SLUB_RESILIENCY_TEST |
|---|
| 4762 | 4817 | static void __init resiliency_test(void) |
|---|
| .. | .. |
|---|
| 4816 | 4871 | #ifdef CONFIG_SLUB_SYSFS |
|---|
| 4817 | 4872 | static void resiliency_test(void) {}; |
|---|
| 4818 | 4873 | #endif |
|---|
| 4819 | | -#endif |
|---|
| 4874 | +#endif /* SLUB_RESILIENCY_TEST */ |
|---|
| 4820 | 4875 | |
|---|
| 4821 | 4876 | #ifdef CONFIG_SLUB_SYSFS |
|---|
| 4822 | 4877 | enum slab_stat_type { |
|---|
| .. | .. |
|---|
| 4955 | 5010 | return x + sprintf(buf + x, "\n"); |
|---|
| 4956 | 5011 | } |
|---|
| 4957 | 5012 | |
|---|
| 4958 | | -#ifdef CONFIG_SLUB_DEBUG |
|---|
| 4959 | | -static int any_slab_objects(struct kmem_cache *s) |
|---|
| 4960 | | -{ |
|---|
| 4961 | | - int node; |
|---|
| 4962 | | - struct kmem_cache_node *n; |
|---|
| 4963 | | - |
|---|
| 4964 | | - for_each_kmem_cache_node(s, node, n) |
|---|
| 4965 | | - if (atomic_long_read(&n->total_objects)) |
|---|
| 4966 | | - return 1; |
|---|
| 4967 | | - |
|---|
| 4968 | | - return 0; |
|---|
| 4969 | | -} |
|---|
| 4970 | | -#endif |
|---|
| 4971 | | - |
|---|
| 4972 | 5013 | #define to_slab_attr(n) container_of(n, struct slab_attribute, attr) |
|---|
| 4973 | 5014 | #define to_slab(n) container_of(n, struct kmem_cache, kobj) |
|---|
| 4974 | 5015 | |
|---|
| .. | .. |
|---|
| 5010 | 5051 | } |
|---|
| 5011 | 5052 | SLAB_ATTR_RO(objs_per_slab); |
|---|
| 5012 | 5053 | |
|---|
| 5013 | | -static ssize_t order_store(struct kmem_cache *s, |
|---|
| 5014 | | - const char *buf, size_t length) |
|---|
| 5015 | | -{ |
|---|
| 5016 | | - unsigned int order; |
|---|
| 5017 | | - int err; |
|---|
| 5018 | | - |
|---|
| 5019 | | - err = kstrtouint(buf, 10, &order); |
|---|
| 5020 | | - if (err) |
|---|
| 5021 | | - return err; |
|---|
| 5022 | | - |
|---|
| 5023 | | - if (order > slub_max_order || order < slub_min_order) |
|---|
| 5024 | | - return -EINVAL; |
|---|
| 5025 | | - |
|---|
| 5026 | | - calculate_sizes(s, order); |
|---|
| 5027 | | - return length; |
|---|
| 5028 | | -} |
|---|
| 5029 | | - |
|---|
| 5030 | 5054 | static ssize_t order_show(struct kmem_cache *s, char *buf) |
|---|
| 5031 | 5055 | { |
|---|
| 5032 | 5056 | return sprintf(buf, "%u\n", oo_order(s->oo)); |
|---|
| 5033 | 5057 | } |
|---|
| 5034 | | -SLAB_ATTR(order); |
|---|
| 5058 | +SLAB_ATTR_RO(order); |
|---|
| 5035 | 5059 | |
|---|
| 5036 | 5060 | static ssize_t min_partial_show(struct kmem_cache *s, char *buf) |
|---|
| 5037 | 5061 | { |
|---|
| .. | .. |
|---|
| 5153 | 5177 | { |
|---|
| 5154 | 5178 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); |
|---|
| 5155 | 5179 | } |
|---|
| 5156 | | - |
|---|
| 5157 | | -static ssize_t reclaim_account_store(struct kmem_cache *s, |
|---|
| 5158 | | - const char *buf, size_t length) |
|---|
| 5159 | | -{ |
|---|
| 5160 | | - s->flags &= ~SLAB_RECLAIM_ACCOUNT; |
|---|
| 5161 | | - if (buf[0] == '1') |
|---|
| 5162 | | - s->flags |= SLAB_RECLAIM_ACCOUNT; |
|---|
| 5163 | | - return length; |
|---|
| 5164 | | -} |
|---|
| 5165 | | -SLAB_ATTR(reclaim_account); |
|---|
| 5180 | +SLAB_ATTR_RO(reclaim_account); |
|---|
| 5166 | 5181 | |
|---|
| 5167 | 5182 | static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf) |
|---|
| 5168 | 5183 | { |
|---|
| .. | .. |
|---|
| 5207 | 5222 | { |
|---|
| 5208 | 5223 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS)); |
|---|
| 5209 | 5224 | } |
|---|
| 5210 | | - |
|---|
| 5211 | | -static ssize_t sanity_checks_store(struct kmem_cache *s, |
|---|
| 5212 | | - const char *buf, size_t length) |
|---|
| 5213 | | -{ |
|---|
| 5214 | | - s->flags &= ~SLAB_CONSISTENCY_CHECKS; |
|---|
| 5215 | | - if (buf[0] == '1') { |
|---|
| 5216 | | - s->flags &= ~__CMPXCHG_DOUBLE; |
|---|
| 5217 | | - s->flags |= SLAB_CONSISTENCY_CHECKS; |
|---|
| 5218 | | - } |
|---|
| 5219 | | - return length; |
|---|
| 5220 | | -} |
|---|
| 5221 | | -SLAB_ATTR(sanity_checks); |
|---|
| 5225 | +SLAB_ATTR_RO(sanity_checks); |
|---|
| 5222 | 5226 | |
|---|
| 5223 | 5227 | static ssize_t trace_show(struct kmem_cache *s, char *buf) |
|---|
| 5224 | 5228 | { |
|---|
| 5225 | 5229 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE)); |
|---|
| 5226 | 5230 | } |
|---|
| 5227 | | - |
|---|
| 5228 | | -static ssize_t trace_store(struct kmem_cache *s, const char *buf, |
|---|
| 5229 | | - size_t length) |
|---|
| 5230 | | -{ |
|---|
| 5231 | | - /* |
|---|
| 5232 | | - * Tracing a merged cache is going to give confusing results |
|---|
| 5233 | | - * as well as cause other issues like converting a mergeable |
|---|
| 5234 | | - * cache into an umergeable one. |
|---|
| 5235 | | - */ |
|---|
| 5236 | | - if (s->refcount > 1) |
|---|
| 5237 | | - return -EINVAL; |
|---|
| 5238 | | - |
|---|
| 5239 | | - s->flags &= ~SLAB_TRACE; |
|---|
| 5240 | | - if (buf[0] == '1') { |
|---|
| 5241 | | - s->flags &= ~__CMPXCHG_DOUBLE; |
|---|
| 5242 | | - s->flags |= SLAB_TRACE; |
|---|
| 5243 | | - } |
|---|
| 5244 | | - return length; |
|---|
| 5245 | | -} |
|---|
| 5246 | | -SLAB_ATTR(trace); |
|---|
| 5231 | +SLAB_ATTR_RO(trace); |
|---|
| 5247 | 5232 | |
|---|
| 5248 | 5233 | static ssize_t red_zone_show(struct kmem_cache *s, char *buf) |
|---|
| 5249 | 5234 | { |
|---|
| 5250 | 5235 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE)); |
|---|
| 5251 | 5236 | } |
|---|
| 5252 | 5237 | |
|---|
| 5253 | | -static ssize_t red_zone_store(struct kmem_cache *s, |
|---|
| 5254 | | - const char *buf, size_t length) |
|---|
| 5255 | | -{ |
|---|
| 5256 | | - if (any_slab_objects(s)) |
|---|
| 5257 | | - return -EBUSY; |
|---|
| 5258 | | - |
|---|
| 5259 | | - s->flags &= ~SLAB_RED_ZONE; |
|---|
| 5260 | | - if (buf[0] == '1') { |
|---|
| 5261 | | - s->flags |= SLAB_RED_ZONE; |
|---|
| 5262 | | - } |
|---|
| 5263 | | - calculate_sizes(s, -1); |
|---|
| 5264 | | - return length; |
|---|
| 5265 | | -} |
|---|
| 5266 | | -SLAB_ATTR(red_zone); |
|---|
| 5238 | +SLAB_ATTR_RO(red_zone); |
|---|
| 5267 | 5239 | |
|---|
| 5268 | 5240 | static ssize_t poison_show(struct kmem_cache *s, char *buf) |
|---|
| 5269 | 5241 | { |
|---|
| 5270 | 5242 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON)); |
|---|
| 5271 | 5243 | } |
|---|
| 5272 | 5244 | |
|---|
| 5273 | | -static ssize_t poison_store(struct kmem_cache *s, |
|---|
| 5274 | | - const char *buf, size_t length) |
|---|
| 5275 | | -{ |
|---|
| 5276 | | - if (any_slab_objects(s)) |
|---|
| 5277 | | - return -EBUSY; |
|---|
| 5278 | | - |
|---|
| 5279 | | - s->flags &= ~SLAB_POISON; |
|---|
| 5280 | | - if (buf[0] == '1') { |
|---|
| 5281 | | - s->flags |= SLAB_POISON; |
|---|
| 5282 | | - } |
|---|
| 5283 | | - calculate_sizes(s, -1); |
|---|
| 5284 | | - return length; |
|---|
| 5285 | | -} |
|---|
| 5286 | | -SLAB_ATTR(poison); |
|---|
| 5245 | +SLAB_ATTR_RO(poison); |
|---|
| 5287 | 5246 | |
|---|
| 5288 | 5247 | static ssize_t store_user_show(struct kmem_cache *s, char *buf) |
|---|
| 5289 | 5248 | { |
|---|
| 5290 | 5249 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER)); |
|---|
| 5291 | 5250 | } |
|---|
| 5292 | 5251 | |
|---|
| 5293 | | -static ssize_t store_user_store(struct kmem_cache *s, |
|---|
| 5294 | | - const char *buf, size_t length) |
|---|
| 5295 | | -{ |
|---|
| 5296 | | - if (any_slab_objects(s)) |
|---|
| 5297 | | - return -EBUSY; |
|---|
| 5298 | | - |
|---|
| 5299 | | - s->flags &= ~SLAB_STORE_USER; |
|---|
| 5300 | | - if (buf[0] == '1') { |
|---|
| 5301 | | - s->flags &= ~__CMPXCHG_DOUBLE; |
|---|
| 5302 | | - s->flags |= SLAB_STORE_USER; |
|---|
| 5303 | | - } |
|---|
| 5304 | | - calculate_sizes(s, -1); |
|---|
| 5305 | | - return length; |
|---|
| 5306 | | -} |
|---|
| 5307 | | -SLAB_ATTR(store_user); |
|---|
| 5252 | +SLAB_ATTR_RO(store_user); |
|---|
| 5308 | 5253 | |
|---|
| 5309 | 5254 | static ssize_t validate_show(struct kmem_cache *s, char *buf) |
|---|
| 5310 | 5255 | { |
|---|
| .. | .. |
|---|
| 5325 | 5270 | } |
|---|
| 5326 | 5271 | SLAB_ATTR(validate); |
|---|
| 5327 | 5272 | |
|---|
| 5328 | | -static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf) |
|---|
| 5329 | | -{ |
|---|
| 5330 | | - if (!(s->flags & SLAB_STORE_USER)) |
|---|
| 5331 | | - return -ENOSYS; |
|---|
| 5332 | | - return list_locations(s, buf, TRACK_ALLOC); |
|---|
| 5333 | | -} |
|---|
| 5334 | | -SLAB_ATTR_RO(alloc_calls); |
|---|
| 5335 | | - |
|---|
| 5336 | | -static ssize_t free_calls_show(struct kmem_cache *s, char *buf) |
|---|
| 5337 | | -{ |
|---|
| 5338 | | - if (!(s->flags & SLAB_STORE_USER)) |
|---|
| 5339 | | - return -ENOSYS; |
|---|
| 5340 | | - return list_locations(s, buf, TRACK_FREE); |
|---|
| 5341 | | -} |
|---|
| 5342 | | -SLAB_ATTR_RO(free_calls); |
|---|
| 5343 | 5273 | #endif /* CONFIG_SLUB_DEBUG */ |
|---|
| 5344 | 5274 | |
|---|
| 5345 | 5275 | #ifdef CONFIG_FAILSLAB |
|---|
| .. | .. |
|---|
| 5347 | 5277 | { |
|---|
| 5348 | 5278 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); |
|---|
| 5349 | 5279 | } |
|---|
| 5350 | | - |
|---|
| 5351 | | -static ssize_t failslab_store(struct kmem_cache *s, const char *buf, |
|---|
| 5352 | | - size_t length) |
|---|
| 5353 | | -{ |
|---|
| 5354 | | - if (s->refcount > 1) |
|---|
| 5355 | | - return -EINVAL; |
|---|
| 5356 | | - |
|---|
| 5357 | | - s->flags &= ~SLAB_FAILSLAB; |
|---|
| 5358 | | - if (buf[0] == '1') |
|---|
| 5359 | | - s->flags |= SLAB_FAILSLAB; |
|---|
| 5360 | | - return length; |
|---|
| 5361 | | -} |
|---|
| 5362 | | -SLAB_ATTR(failslab); |
|---|
| 5280 | +SLAB_ATTR_RO(failslab); |
|---|
| 5363 | 5281 | #endif |
|---|
| 5364 | 5282 | |
|---|
| 5365 | 5283 | static ssize_t shrink_show(struct kmem_cache *s, char *buf) |
|---|
| .. | .. |
|---|
| 5482 | 5400 | STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free); |
|---|
| 5483 | 5401 | STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node); |
|---|
| 5484 | 5402 | STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain); |
|---|
| 5485 | | -#endif |
|---|
| 5403 | +#endif /* CONFIG_SLUB_STATS */ |
|---|
| 5486 | 5404 | |
|---|
| 5487 | 5405 | static struct attribute *slab_attrs[] = { |
|---|
| 5488 | 5406 | &slab_size_attr.attr, |
|---|
| .. | .. |
|---|
| 5512 | 5430 | &poison_attr.attr, |
|---|
| 5513 | 5431 | &store_user_attr.attr, |
|---|
| 5514 | 5432 | &validate_attr.attr, |
|---|
| 5515 | | - &alloc_calls_attr.attr, |
|---|
| 5516 | | - &free_calls_attr.attr, |
|---|
| 5517 | 5433 | #endif |
|---|
| 5518 | 5434 | #ifdef CONFIG_ZONE_DMA |
|---|
| 5519 | 5435 | &cache_dma_attr.attr, |
|---|
| .. | .. |
|---|
| 5595 | 5511 | return -EIO; |
|---|
| 5596 | 5512 | |
|---|
| 5597 | 5513 | err = attribute->store(s, buf, len); |
|---|
| 5598 | | -#ifdef CONFIG_MEMCG |
|---|
| 5599 | | - if (slab_state >= FULL && err >= 0 && is_root_cache(s)) { |
|---|
| 5600 | | - struct kmem_cache *c; |
|---|
| 5601 | | - |
|---|
| 5602 | | - mutex_lock(&slab_mutex); |
|---|
| 5603 | | - if (s->max_attr_size < len) |
|---|
| 5604 | | - s->max_attr_size = len; |
|---|
| 5605 | | - |
|---|
| 5606 | | - /* |
|---|
| 5607 | | - * This is a best effort propagation, so this function's return |
|---|
| 5608 | | - * value will be determined by the parent cache only. This is |
|---|
| 5609 | | - * basically because not all attributes will have a well |
|---|
| 5610 | | - * defined semantics for rollbacks - most of the actions will |
|---|
| 5611 | | - * have permanent effects. |
|---|
| 5612 | | - * |
|---|
| 5613 | | - * Returning the error value of any of the children that fail |
|---|
| 5614 | | - * is not 100 % defined, in the sense that users seeing the |
|---|
| 5615 | | - * error code won't be able to know anything about the state of |
|---|
| 5616 | | - * the cache. |
|---|
| 5617 | | - * |
|---|
| 5618 | | - * Only returning the error code for the parent cache at least |
|---|
| 5619 | | - * has well defined semantics. The cache being written to |
|---|
| 5620 | | - * directly either failed or succeeded, in which case we loop |
|---|
| 5621 | | - * through the descendants with best-effort propagation. |
|---|
| 5622 | | - */ |
|---|
| 5623 | | - for_each_memcg_cache(c, s) |
|---|
| 5624 | | - attribute->store(c, buf, len); |
|---|
| 5625 | | - mutex_unlock(&slab_mutex); |
|---|
| 5626 | | - } |
|---|
| 5627 | | -#endif |
|---|
| 5628 | 5514 | return err; |
|---|
| 5629 | | -} |
|---|
| 5630 | | - |
|---|
| 5631 | | -static void memcg_propagate_slab_attrs(struct kmem_cache *s) |
|---|
| 5632 | | -{ |
|---|
| 5633 | | -#ifdef CONFIG_MEMCG |
|---|
| 5634 | | - int i; |
|---|
| 5635 | | - char *buffer = NULL; |
|---|
| 5636 | | - struct kmem_cache *root_cache; |
|---|
| 5637 | | - |
|---|
| 5638 | | - if (is_root_cache(s)) |
|---|
| 5639 | | - return; |
|---|
| 5640 | | - |
|---|
| 5641 | | - root_cache = s->memcg_params.root_cache; |
|---|
| 5642 | | - |
|---|
| 5643 | | - /* |
|---|
| 5644 | | - * This mean this cache had no attribute written. Therefore, no point |
|---|
| 5645 | | - * in copying default values around |
|---|
| 5646 | | - */ |
|---|
| 5647 | | - if (!root_cache->max_attr_size) |
|---|
| 5648 | | - return; |
|---|
| 5649 | | - |
|---|
| 5650 | | - for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) { |
|---|
| 5651 | | - char mbuf[64]; |
|---|
| 5652 | | - char *buf; |
|---|
| 5653 | | - struct slab_attribute *attr = to_slab_attr(slab_attrs[i]); |
|---|
| 5654 | | - ssize_t len; |
|---|
| 5655 | | - |
|---|
| 5656 | | - if (!attr || !attr->store || !attr->show) |
|---|
| 5657 | | - continue; |
|---|
| 5658 | | - |
|---|
| 5659 | | - /* |
|---|
| 5660 | | - * It is really bad that we have to allocate here, so we will |
|---|
| 5661 | | - * do it only as a fallback. If we actually allocate, though, |
|---|
| 5662 | | - * we can just use the allocated buffer until the end. |
|---|
| 5663 | | - * |
|---|
| 5664 | | - * Most of the slub attributes will tend to be very small in |
|---|
| 5665 | | - * size, but sysfs allows buffers up to a page, so they can |
|---|
| 5666 | | - * theoretically happen. |
|---|
| 5667 | | - */ |
|---|
| 5668 | | - if (buffer) |
|---|
| 5669 | | - buf = buffer; |
|---|
| 5670 | | - else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf) && |
|---|
| 5671 | | - !IS_ENABLED(CONFIG_SLUB_STATS)) |
|---|
| 5672 | | - buf = mbuf; |
|---|
| 5673 | | - else { |
|---|
| 5674 | | - buffer = (char *) get_zeroed_page(GFP_KERNEL); |
|---|
| 5675 | | - if (WARN_ON(!buffer)) |
|---|
| 5676 | | - continue; |
|---|
| 5677 | | - buf = buffer; |
|---|
| 5678 | | - } |
|---|
| 5679 | | - |
|---|
| 5680 | | - len = attr->show(root_cache, buf); |
|---|
| 5681 | | - if (len > 0) |
|---|
| 5682 | | - attr->store(s, buf, len); |
|---|
| 5683 | | - } |
|---|
| 5684 | | - |
|---|
| 5685 | | - if (buffer) |
|---|
| 5686 | | - free_page((unsigned long)buffer); |
|---|
| 5687 | | -#endif |
|---|
| 5688 | 5515 | } |
|---|
| 5689 | 5516 | |
|---|
| 5690 | 5517 | static void kmem_cache_release(struct kobject *k) |
|---|
| .. | .. |
|---|
| 5702 | 5529 | .release = kmem_cache_release, |
|---|
| 5703 | 5530 | }; |
|---|
| 5704 | 5531 | |
|---|
| 5705 | | -static int uevent_filter(struct kset *kset, struct kobject *kobj) |
|---|
| 5706 | | -{ |
|---|
| 5707 | | - struct kobj_type *ktype = get_ktype(kobj); |
|---|
| 5708 | | - |
|---|
| 5709 | | - if (ktype == &slab_ktype) |
|---|
| 5710 | | - return 1; |
|---|
| 5711 | | - return 0; |
|---|
| 5712 | | -} |
|---|
| 5713 | | - |
|---|
| 5714 | | -static const struct kset_uevent_ops slab_uevent_ops = { |
|---|
| 5715 | | - .filter = uevent_filter, |
|---|
| 5716 | | -}; |
|---|
| 5717 | | - |
|---|
| 5718 | 5532 | static struct kset *slab_kset; |
|---|
| 5719 | 5533 | |
|---|
| 5720 | 5534 | static inline struct kset *cache_kset(struct kmem_cache *s) |
|---|
| 5721 | 5535 | { |
|---|
| 5722 | | -#ifdef CONFIG_MEMCG |
|---|
| 5723 | | - if (!is_root_cache(s)) |
|---|
| 5724 | | - return s->memcg_params.root_cache->memcg_kset; |
|---|
| 5725 | | -#endif |
|---|
| 5726 | 5536 | return slab_kset; |
|---|
| 5727 | 5537 | } |
|---|
| 5728 | 5538 | |
|---|
| .. | .. |
|---|
| 5737 | 5547 | char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL); |
|---|
| 5738 | 5548 | char *p = name; |
|---|
| 5739 | 5549 | |
|---|
| 5740 | | - BUG_ON(!name); |
|---|
| 5550 | + if (!name) |
|---|
| 5551 | + return ERR_PTR(-ENOMEM); |
|---|
| 5741 | 5552 | |
|---|
| 5742 | 5553 | *p++ = ':'; |
|---|
| 5743 | 5554 | /* |
|---|
| .. | .. |
|---|
| 5765 | 5576 | return name; |
|---|
| 5766 | 5577 | } |
|---|
| 5767 | 5578 | |
|---|
| 5768 | | -static void sysfs_slab_remove_workfn(struct work_struct *work) |
|---|
| 5769 | | -{ |
|---|
| 5770 | | - struct kmem_cache *s = |
|---|
| 5771 | | - container_of(work, struct kmem_cache, kobj_remove_work); |
|---|
| 5772 | | - |
|---|
| 5773 | | - if (!s->kobj.state_in_sysfs) |
|---|
| 5774 | | - /* |
|---|
| 5775 | | - * For a memcg cache, this may be called during |
|---|
| 5776 | | - * deactivation and again on shutdown. Remove only once. |
|---|
| 5777 | | - * A cache is never shut down before deactivation is |
|---|
| 5778 | | - * complete, so no need to worry about synchronization. |
|---|
| 5779 | | - */ |
|---|
| 5780 | | - goto out; |
|---|
| 5781 | | - |
|---|
| 5782 | | -#ifdef CONFIG_MEMCG |
|---|
| 5783 | | - kset_unregister(s->memcg_kset); |
|---|
| 5784 | | -#endif |
|---|
| 5785 | | - kobject_uevent(&s->kobj, KOBJ_REMOVE); |
|---|
| 5786 | | -out: |
|---|
| 5787 | | - kobject_put(&s->kobj); |
|---|
| 5788 | | -} |
|---|
| 5789 | | - |
|---|
| 5790 | 5579 | static int sysfs_slab_add(struct kmem_cache *s) |
|---|
| 5791 | 5580 | { |
|---|
| 5792 | 5581 | int err; |
|---|
| 5793 | 5582 | const char *name; |
|---|
| 5794 | 5583 | struct kset *kset = cache_kset(s); |
|---|
| 5795 | 5584 | int unmergeable = slab_unmergeable(s); |
|---|
| 5796 | | - |
|---|
| 5797 | | - INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn); |
|---|
| 5798 | 5585 | |
|---|
| 5799 | 5586 | if (!kset) { |
|---|
| 5800 | 5587 | kobject_init(&s->kobj, &slab_ktype); |
|---|
| .. | .. |
|---|
| 5819 | 5606 | * for the symlinks. |
|---|
| 5820 | 5607 | */ |
|---|
| 5821 | 5608 | name = create_unique_id(s); |
|---|
| 5609 | + if (IS_ERR(name)) |
|---|
| 5610 | + return PTR_ERR(name); |
|---|
| 5822 | 5611 | } |
|---|
| 5823 | 5612 | |
|---|
| 5824 | 5613 | s->kobj.kset = kset; |
|---|
| .. | .. |
|---|
| 5830 | 5619 | if (err) |
|---|
| 5831 | 5620 | goto out_del_kobj; |
|---|
| 5832 | 5621 | |
|---|
| 5833 | | -#ifdef CONFIG_MEMCG |
|---|
| 5834 | | - if (is_root_cache(s) && memcg_sysfs_enabled) { |
|---|
| 5835 | | - s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj); |
|---|
| 5836 | | - if (!s->memcg_kset) { |
|---|
| 5837 | | - err = -ENOMEM; |
|---|
| 5838 | | - goto out_del_kobj; |
|---|
| 5839 | | - } |
|---|
| 5840 | | - } |
|---|
| 5841 | | -#endif |
|---|
| 5842 | | - |
|---|
| 5843 | | - kobject_uevent(&s->kobj, KOBJ_ADD); |
|---|
| 5844 | 5622 | if (!unmergeable) { |
|---|
| 5845 | 5623 | /* Setup first alias */ |
|---|
| 5846 | 5624 | sysfs_slab_alias(s, s->name); |
|---|
| .. | .. |
|---|
| 5852 | 5630 | out_del_kobj: |
|---|
| 5853 | 5631 | kobject_del(&s->kobj); |
|---|
| 5854 | 5632 | goto out; |
|---|
| 5855 | | -} |
|---|
| 5856 | | - |
|---|
| 5857 | | -static void sysfs_slab_remove(struct kmem_cache *s) |
|---|
| 5858 | | -{ |
|---|
| 5859 | | - if (slab_state < FULL) |
|---|
| 5860 | | - /* |
|---|
| 5861 | | - * Sysfs has not been setup yet so no need to remove the |
|---|
| 5862 | | - * cache from sysfs. |
|---|
| 5863 | | - */ |
|---|
| 5864 | | - return; |
|---|
| 5865 | | - |
|---|
| 5866 | | - kobject_get(&s->kobj); |
|---|
| 5867 | | - schedule_work(&s->kobj_remove_work); |
|---|
| 5868 | 5633 | } |
|---|
| 5869 | 5634 | |
|---|
| 5870 | 5635 | void sysfs_slab_unlink(struct kmem_cache *s) |
|---|
| .. | .. |
|---|
| 5921 | 5686 | |
|---|
| 5922 | 5687 | mutex_lock(&slab_mutex); |
|---|
| 5923 | 5688 | |
|---|
| 5924 | | - slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); |
|---|
| 5689 | + slab_kset = kset_create_and_add("slab", NULL, kernel_kobj); |
|---|
| 5925 | 5690 | if (!slab_kset) { |
|---|
| 5926 | 5691 | mutex_unlock(&slab_mutex); |
|---|
| 5927 | 5692 | pr_err("Cannot register slab subsystem.\n"); |
|---|
| .. | .. |
|---|
| 5956 | 5721 | __initcall(slab_sysfs_init); |
|---|
| 5957 | 5722 | #endif /* CONFIG_SLUB_SYSFS */ |
|---|
| 5958 | 5723 | |
|---|
| 5724 | +#if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS) |
|---|
| 5725 | +static int slab_debugfs_show(struct seq_file *seq, void *v) |
|---|
| 5726 | +{ |
|---|
| 5727 | + struct loc_track *t = seq->private; |
|---|
| 5728 | + struct location *l; |
|---|
| 5729 | + unsigned long idx; |
|---|
| 5730 | + |
|---|
| 5731 | + idx = (unsigned long) t->idx; |
|---|
| 5732 | + if (idx < t->count) { |
|---|
| 5733 | + l = &t->loc[idx]; |
|---|
| 5734 | + |
|---|
| 5735 | + seq_printf(seq, "%7ld ", l->count); |
|---|
| 5736 | + |
|---|
| 5737 | + if (l->addr) |
|---|
| 5738 | + seq_printf(seq, "%pS", (void *)l->addr); |
|---|
| 5739 | + else |
|---|
| 5740 | + seq_puts(seq, "<not-available>"); |
|---|
| 5741 | + |
|---|
| 5742 | + if (l->sum_time != l->min_time) { |
|---|
| 5743 | + seq_printf(seq, " age=%ld/%llu/%ld", |
|---|
| 5744 | + l->min_time, div_u64(l->sum_time, l->count), |
|---|
| 5745 | + l->max_time); |
|---|
| 5746 | + } else |
|---|
| 5747 | + seq_printf(seq, " age=%ld", l->min_time); |
|---|
| 5748 | + |
|---|
| 5749 | + if (l->min_pid != l->max_pid) |
|---|
| 5750 | + seq_printf(seq, " pid=%ld-%ld", l->min_pid, l->max_pid); |
|---|
| 5751 | + else |
|---|
| 5752 | + seq_printf(seq, " pid=%ld", |
|---|
| 5753 | + l->min_pid); |
|---|
| 5754 | + |
|---|
| 5755 | + if (num_online_cpus() > 1 && !cpumask_empty(to_cpumask(l->cpus))) |
|---|
| 5756 | + seq_printf(seq, " cpus=%*pbl", |
|---|
| 5757 | + cpumask_pr_args(to_cpumask(l->cpus))); |
|---|
| 5758 | + |
|---|
| 5759 | + if (nr_online_nodes > 1 && !nodes_empty(l->nodes)) |
|---|
| 5760 | + seq_printf(seq, " nodes=%*pbl", |
|---|
| 5761 | + nodemask_pr_args(&l->nodes)); |
|---|
| 5762 | + |
|---|
| 5763 | + seq_puts(seq, "\n"); |
|---|
| 5764 | + } |
|---|
| 5765 | + |
|---|
| 5766 | + if (!idx && !t->count) |
|---|
| 5767 | + seq_puts(seq, "No data\n"); |
|---|
| 5768 | + |
|---|
| 5769 | + return 0; |
|---|
| 5770 | +} |
|---|
| 5771 | + |
|---|
| 5772 | +static void slab_debugfs_stop(struct seq_file *seq, void *v) |
|---|
| 5773 | +{ |
|---|
| 5774 | +} |
|---|
| 5775 | + |
|---|
| 5776 | +static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos) |
|---|
| 5777 | +{ |
|---|
| 5778 | + struct loc_track *t = seq->private; |
|---|
| 5779 | + |
|---|
| 5780 | + t->idx = ++(*ppos); |
|---|
| 5781 | + if (*ppos <= t->count) |
|---|
| 5782 | + return ppos; |
|---|
| 5783 | + |
|---|
| 5784 | + return NULL; |
|---|
| 5785 | +} |
|---|
| 5786 | + |
|---|
| 5787 | +static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos) |
|---|
| 5788 | +{ |
|---|
| 5789 | + struct loc_track *t = seq->private; |
|---|
| 5790 | + |
|---|
| 5791 | + t->idx = *ppos; |
|---|
| 5792 | + return ppos; |
|---|
| 5793 | +} |
|---|
| 5794 | + |
|---|
| 5795 | +static const struct seq_operations slab_debugfs_sops = { |
|---|
| 5796 | + .start = slab_debugfs_start, |
|---|
| 5797 | + .next = slab_debugfs_next, |
|---|
| 5798 | + .stop = slab_debugfs_stop, |
|---|
| 5799 | + .show = slab_debugfs_show, |
|---|
| 5800 | +}; |
|---|
| 5801 | + |
|---|
| 5802 | +static int slab_debug_trace_open(struct inode *inode, struct file *filep) |
|---|
| 5803 | +{ |
|---|
| 5804 | + |
|---|
| 5805 | + struct kmem_cache_node *n; |
|---|
| 5806 | + enum track_item alloc; |
|---|
| 5807 | + int node; |
|---|
| 5808 | + struct loc_track *t = __seq_open_private(filep, &slab_debugfs_sops, |
|---|
| 5809 | + sizeof(struct loc_track)); |
|---|
| 5810 | + struct kmem_cache *s = file_inode(filep)->i_private; |
|---|
| 5811 | + unsigned long *obj_map; |
|---|
| 5812 | + |
|---|
| 5813 | + if (!t) |
|---|
| 5814 | + return -ENOMEM; |
|---|
| 5815 | + |
|---|
| 5816 | + obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL); |
|---|
| 5817 | + if (!obj_map) { |
|---|
| 5818 | + seq_release_private(inode, filep); |
|---|
| 5819 | + return -ENOMEM; |
|---|
| 5820 | + } |
|---|
| 5821 | + |
|---|
| 5822 | + if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0) |
|---|
| 5823 | + alloc = TRACK_ALLOC; |
|---|
| 5824 | + else |
|---|
| 5825 | + alloc = TRACK_FREE; |
|---|
| 5826 | + |
|---|
| 5827 | + if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) { |
|---|
| 5828 | + bitmap_free(obj_map); |
|---|
| 5829 | + seq_release_private(inode, filep); |
|---|
| 5830 | + return -ENOMEM; |
|---|
| 5831 | + } |
|---|
| 5832 | + |
|---|
| 5833 | + /* Push back cpu slabs */ |
|---|
| 5834 | + flush_all(s); |
|---|
| 5835 | + |
|---|
| 5836 | + for_each_kmem_cache_node(s, node, n) { |
|---|
| 5837 | + unsigned long flags; |
|---|
| 5838 | + struct page *page; |
|---|
| 5839 | + |
|---|
| 5840 | + if (!atomic_long_read(&n->nr_slabs)) |
|---|
| 5841 | + continue; |
|---|
| 5842 | + |
|---|
| 5843 | + spin_lock_irqsave(&n->list_lock, flags); |
|---|
| 5844 | + list_for_each_entry(page, &n->partial, slab_list) |
|---|
| 5845 | + process_slab(t, s, page, alloc, obj_map); |
|---|
| 5846 | + list_for_each_entry(page, &n->full, slab_list) |
|---|
| 5847 | + process_slab(t, s, page, alloc, obj_map); |
|---|
| 5848 | + spin_unlock_irqrestore(&n->list_lock, flags); |
|---|
| 5849 | + } |
|---|
| 5850 | + |
|---|
| 5851 | + bitmap_free(obj_map); |
|---|
| 5852 | + return 0; |
|---|
| 5853 | +} |
|---|
| 5854 | + |
|---|
| 5855 | +static int slab_debug_trace_release(struct inode *inode, struct file *file) |
|---|
| 5856 | +{ |
|---|
| 5857 | + struct seq_file *seq = file->private_data; |
|---|
| 5858 | + struct loc_track *t = seq->private; |
|---|
| 5859 | + |
|---|
| 5860 | + free_loc_track(t); |
|---|
| 5861 | + return seq_release_private(inode, file); |
|---|
| 5862 | +} |
|---|
| 5863 | + |
|---|
| 5864 | +static const struct file_operations slab_debugfs_fops = { |
|---|
| 5865 | + .open = slab_debug_trace_open, |
|---|
| 5866 | + .read = seq_read, |
|---|
| 5867 | + .llseek = seq_lseek, |
|---|
| 5868 | + .release = slab_debug_trace_release, |
|---|
| 5869 | +}; |
|---|
| 5870 | + |
|---|
| 5871 | +static void debugfs_slab_add(struct kmem_cache *s) |
|---|
| 5872 | +{ |
|---|
| 5873 | + struct dentry *slab_cache_dir; |
|---|
| 5874 | + |
|---|
| 5875 | + if (unlikely(!slab_debugfs_root)) |
|---|
| 5876 | + return; |
|---|
| 5877 | + |
|---|
| 5878 | + slab_cache_dir = debugfs_create_dir(s->name, slab_debugfs_root); |
|---|
| 5879 | + |
|---|
| 5880 | + debugfs_create_file("alloc_traces", 0400, |
|---|
| 5881 | + slab_cache_dir, s, &slab_debugfs_fops); |
|---|
| 5882 | + |
|---|
| 5883 | + debugfs_create_file("free_traces", 0400, |
|---|
| 5884 | + slab_cache_dir, s, &slab_debugfs_fops); |
|---|
| 5885 | +} |
|---|
| 5886 | + |
|---|
| 5887 | +void debugfs_slab_release(struct kmem_cache *s) |
|---|
| 5888 | +{ |
|---|
| 5889 | + debugfs_remove_recursive(debugfs_lookup(s->name, slab_debugfs_root)); |
|---|
| 5890 | +} |
|---|
| 5891 | + |
|---|
| 5892 | +static int __init slab_debugfs_init(void) |
|---|
| 5893 | +{ |
|---|
| 5894 | + struct kmem_cache *s; |
|---|
| 5895 | + |
|---|
| 5896 | + slab_debugfs_root = debugfs_create_dir("slab", NULL); |
|---|
| 5897 | + |
|---|
| 5898 | + list_for_each_entry(s, &slab_caches, list) |
|---|
| 5899 | + if (s->flags & SLAB_STORE_USER) |
|---|
| 5900 | + debugfs_slab_add(s); |
|---|
| 5901 | + |
|---|
| 5902 | + return 0; |
|---|
| 5903 | + |
|---|
| 5904 | +} |
|---|
| 5905 | +__initcall(slab_debugfs_init); |
|---|
| 5906 | +#endif |
|---|
| 5959 | 5907 | /* |
|---|
| 5960 | 5908 | * The /proc/slabinfo ABI |
|---|
| 5961 | 5909 | */ |
|---|
| .. | .. |
|---|
| 5981 | 5929 | sinfo->objects_per_slab = oo_objects(s->oo); |
|---|
| 5982 | 5930 | sinfo->cache_order = oo_order(s->oo); |
|---|
| 5983 | 5931 | } |
|---|
| 5932 | +EXPORT_SYMBOL_GPL(get_slabinfo); |
|---|
| 5984 | 5933 | |
|---|
| 5985 | 5934 | void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s) |
|---|
| 5986 | 5935 | { |
|---|