.. | .. |
---|
28 | 28 | #include <linux/ctype.h> |
---|
29 | 29 | #include <linux/debugobjects.h> |
---|
30 | 30 | #include <linux/kallsyms.h> |
---|
| 31 | +#include <linux/kfence.h> |
---|
31 | 32 | #include <linux/memory.h> |
---|
32 | 33 | #include <linux/math64.h> |
---|
33 | 34 | #include <linux/fault-inject.h> |
---|
.. | .. |
---|
36 | 37 | #include <linux/memcontrol.h> |
---|
37 | 38 | #include <linux/random.h> |
---|
38 | 39 | |
---|
| 40 | +#include <linux/debugfs.h> |
---|
39 | 41 | #include <trace/events/kmem.h> |
---|
| 42 | +#include <trace/hooks/mm.h> |
---|
40 | 43 | |
---|
41 | 44 | #include "internal.h" |
---|
42 | 45 | |
---|
.. | .. |
---|
59 | 62 | * D. page->frozen -> frozen state |
---|
60 | 63 | * |
---|
61 | 64 | * If a slab is frozen then it is exempt from list management. It is not |
---|
62 | | - * on any list. The processor that froze the slab is the one who can |
---|
63 | | - * perform list operations on the page. Other processors may put objects |
---|
64 | | - * onto the freelist but the processor that froze the slab is the only |
---|
65 | | - * one that can retrieve the objects from the page's freelist. |
---|
| 65 | + * on any list except per cpu partial list. The processor that froze the |
---|
| 66 | + * slab is the one who can perform list operations on the page. Other |
---|
| 67 | + * processors may put objects onto the freelist but the processor that |
---|
| 68 | + * froze the slab is the only one that can retrieve the objects from the |
---|
| 69 | + * page's freelist. |
---|
66 | 70 | * |
---|
67 | 71 | * The list_lock protects the partial and full list on each node and |
---|
68 | 72 | * the partial slab counter. If taken then no new slabs may be added or |
---|
.. | .. |
---|
93 | 97 | * minimal so we rely on the page allocators per cpu caches for |
---|
94 | 98 | * fast frees and allocs. |
---|
95 | 99 | * |
---|
96 | | - * Overloading of page flags that are otherwise used for LRU management. |
---|
97 | | - * |
---|
98 | | - * PageActive The slab is frozen and exempt from list processing. |
---|
| 100 | + * page->frozen The slab is frozen and exempt from list processing. |
---|
99 | 101 | * This means that the slab is dedicated to a purpose |
---|
100 | 102 | * such as satisfying allocations for a specific |
---|
101 | 103 | * processor. Objects may be freed in the slab while |
---|
.. | .. |
---|
111 | 113 | * free objects in addition to the regular freelist |
---|
112 | 114 | * that requires the slab lock. |
---|
113 | 115 | * |
---|
114 | | - * PageError Slab requires special handling due to debug |
---|
| 116 | + * SLAB_DEBUG_FLAGS Slab requires special handling due to debug |
---|
115 | 117 | * options set. This moves slab handling out of |
---|
116 | 118 | * the fast path and disables lockless freelists. |
---|
117 | 119 | */ |
---|
118 | 120 | |
---|
119 | | -static inline int kmem_cache_debug(struct kmem_cache *s) |
---|
120 | | -{ |
---|
121 | 121 | #ifdef CONFIG_SLUB_DEBUG |
---|
122 | | - return unlikely(s->flags & SLAB_DEBUG_FLAGS); |
---|
| 122 | +#ifdef CONFIG_SLUB_DEBUG_ON |
---|
| 123 | +DEFINE_STATIC_KEY_TRUE(slub_debug_enabled); |
---|
123 | 124 | #else |
---|
124 | | - return 0; |
---|
| 125 | +DEFINE_STATIC_KEY_FALSE(slub_debug_enabled); |
---|
125 | 126 | #endif |
---|
| 127 | +#endif |
---|
| 128 | + |
---|
| 129 | +static inline bool kmem_cache_debug(struct kmem_cache *s) |
---|
| 130 | +{ |
---|
| 131 | + return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS); |
---|
126 | 132 | } |
---|
127 | 133 | |
---|
128 | 134 | void *fixup_red_left(struct kmem_cache *s, void *p) |
---|
129 | 135 | { |
---|
130 | | - if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) |
---|
| 136 | + if (kmem_cache_debug_flags(s, SLAB_RED_ZONE)) |
---|
131 | 137 | p += s->red_left_pad; |
---|
132 | 138 | |
---|
133 | 139 | return p; |
---|
.. | .. |
---|
197 | 203 | /* Use cmpxchg_double */ |
---|
198 | 204 | #define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U) |
---|
199 | 205 | |
---|
200 | | -/* |
---|
201 | | - * Tracking user of a slab. |
---|
202 | | - */ |
---|
203 | | -#define TRACK_ADDRS_COUNT 16 |
---|
204 | | -struct track { |
---|
205 | | - unsigned long addr; /* Called from address */ |
---|
206 | | -#ifdef CONFIG_STACKTRACE |
---|
207 | | - unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */ |
---|
208 | | -#endif |
---|
209 | | - int cpu; /* Was running on cpu */ |
---|
210 | | - int pid; /* Pid context */ |
---|
211 | | - unsigned long when; /* When did the operation occur */ |
---|
212 | | -}; |
---|
213 | | - |
---|
214 | | -enum track_item { TRACK_ALLOC, TRACK_FREE }; |
---|
215 | | - |
---|
216 | 206 | #ifdef CONFIG_SLUB_SYSFS |
---|
217 | 207 | static int sysfs_slab_add(struct kmem_cache *); |
---|
218 | 208 | static int sysfs_slab_alias(struct kmem_cache *, const char *); |
---|
219 | | -static void memcg_propagate_slab_attrs(struct kmem_cache *s); |
---|
220 | | -static void sysfs_slab_remove(struct kmem_cache *s); |
---|
221 | 209 | #else |
---|
222 | 210 | static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } |
---|
223 | 211 | static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) |
---|
224 | 212 | { return 0; } |
---|
225 | | -static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { } |
---|
226 | | -static inline void sysfs_slab_remove(struct kmem_cache *s) { } |
---|
| 213 | +#endif |
---|
| 214 | + |
---|
| 215 | +#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG) |
---|
| 216 | +static void debugfs_slab_add(struct kmem_cache *); |
---|
| 217 | +#else |
---|
| 218 | +static inline void debugfs_slab_add(struct kmem_cache *s) { } |
---|
227 | 219 | #endif |
---|
228 | 220 | |
---|
229 | 221 | static inline void stat(const struct kmem_cache *s, enum stat_item si) |
---|
.. | .. |
---|
251 | 243 | { |
---|
252 | 244 | #ifdef CONFIG_SLAB_FREELIST_HARDENED |
---|
253 | 245 | /* |
---|
254 | | - * When CONFIG_KASAN_SW_TAGS is enabled, ptr_addr might be tagged. |
---|
| 246 | + * When CONFIG_KASAN_SW/HW_TAGS is enabled, ptr_addr might be tagged. |
---|
255 | 247 | * Normally, this doesn't cause any issues, as both set_freepointer() |
---|
256 | 248 | * and get_freepointer() are called with a pointer with the same tag. |
---|
257 | 249 | * However, there are some issues with CONFIG_SLUB_DEBUG code. For |
---|
.. | .. |
---|
277 | 269 | |
---|
278 | 270 | static inline void *get_freepointer(struct kmem_cache *s, void *object) |
---|
279 | 271 | { |
---|
| 272 | + object = kasan_reset_tag(object); |
---|
280 | 273 | return freelist_dereference(s, object + s->offset); |
---|
281 | 274 | } |
---|
282 | 275 | |
---|
.. | .. |
---|
290 | 283 | unsigned long freepointer_addr; |
---|
291 | 284 | void *p; |
---|
292 | 285 | |
---|
293 | | - if (!debug_pagealloc_enabled()) |
---|
| 286 | + if (!debug_pagealloc_enabled_static()) |
---|
294 | 287 | return get_freepointer(s, object); |
---|
295 | 288 | |
---|
| 289 | + object = kasan_reset_tag(object); |
---|
296 | 290 | freepointer_addr = (unsigned long)object + s->offset; |
---|
297 | | - probe_kernel_read(&p, (void **)freepointer_addr, sizeof(p)); |
---|
| 291 | + copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p)); |
---|
298 | 292 | return freelist_ptr(s, p, freepointer_addr); |
---|
299 | 293 | } |
---|
300 | 294 | |
---|
.. | .. |
---|
306 | 300 | BUG_ON(object == fp); /* naive detection of double free or corruption */ |
---|
307 | 301 | #endif |
---|
308 | 302 | |
---|
| 303 | + freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr); |
---|
309 | 304 | *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr); |
---|
310 | 305 | } |
---|
311 | 306 | |
---|
.. | .. |
---|
314 | 309 | for (__p = fixup_red_left(__s, __addr); \ |
---|
315 | 310 | __p < (__addr) + (__objects) * (__s)->size; \ |
---|
316 | 311 | __p += (__s)->size) |
---|
317 | | - |
---|
318 | | -/* Determine object index from a given position */ |
---|
319 | | -static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr) |
---|
320 | | -{ |
---|
321 | | - return (kasan_reset_tag(p) - addr) / s->size; |
---|
322 | | -} |
---|
323 | 312 | |
---|
324 | 313 | static inline unsigned int order_objects(unsigned int order, unsigned int size) |
---|
325 | 314 | { |
---|
.. | .. |
---|
441 | 430 | } |
---|
442 | 431 | |
---|
443 | 432 | #ifdef CONFIG_SLUB_DEBUG |
---|
| 433 | +static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)]; |
---|
| 434 | +static DEFINE_SPINLOCK(object_map_lock); |
---|
| 435 | + |
---|
| 436 | +static void __fill_map(unsigned long *obj_map, struct kmem_cache *s, |
---|
| 437 | + struct page *page) |
---|
| 438 | +{ |
---|
| 439 | + void *addr = page_address(page); |
---|
| 440 | + void *p; |
---|
| 441 | + |
---|
| 442 | + bitmap_zero(obj_map, page->objects); |
---|
| 443 | + |
---|
| 444 | + for (p = page->freelist; p; p = get_freepointer(s, p)) |
---|
| 445 | + set_bit(__obj_to_index(s, addr, p), obj_map); |
---|
| 446 | +} |
---|
| 447 | + |
---|
444 | 448 | /* |
---|
445 | 449 | * Determine a map of object in use on a page. |
---|
446 | 450 | * |
---|
447 | 451 | * Node listlock must be held to guarantee that the page does |
---|
448 | 452 | * not vanish from under us. |
---|
449 | 453 | */ |
---|
450 | | -static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) |
---|
| 454 | +static unsigned long *get_map(struct kmem_cache *s, struct page *page) |
---|
| 455 | + __acquires(&object_map_lock) |
---|
451 | 456 | { |
---|
452 | | - void *p; |
---|
453 | | - void *addr = page_address(page); |
---|
| 457 | + VM_BUG_ON(!irqs_disabled()); |
---|
454 | 458 | |
---|
455 | | - for (p = page->freelist; p; p = get_freepointer(s, p)) |
---|
456 | | - set_bit(slab_index(p, s, addr), map); |
---|
| 459 | + spin_lock(&object_map_lock); |
---|
| 460 | + |
---|
| 461 | + __fill_map(object_map, s, page); |
---|
| 462 | + |
---|
| 463 | + return object_map; |
---|
| 464 | +} |
---|
| 465 | + |
---|
| 466 | +static void put_map(unsigned long *map) __releases(&object_map_lock) |
---|
| 467 | +{ |
---|
| 468 | + VM_BUG_ON(map != object_map); |
---|
| 469 | + spin_unlock(&object_map_lock); |
---|
457 | 470 | } |
---|
458 | 471 | |
---|
459 | 472 | static inline unsigned int size_from_object(struct kmem_cache *s) |
---|
.. | .. |
---|
476 | 489 | * Debug settings: |
---|
477 | 490 | */ |
---|
478 | 491 | #if defined(CONFIG_SLUB_DEBUG_ON) |
---|
479 | | -static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS; |
---|
| 492 | +slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS; |
---|
480 | 493 | #else |
---|
481 | | -static slab_flags_t slub_debug; |
---|
| 494 | +slab_flags_t slub_debug; |
---|
482 | 495 | #endif |
---|
483 | 496 | |
---|
484 | | -static char *slub_debug_slabs; |
---|
| 497 | +static char *slub_debug_string; |
---|
485 | 498 | static int disable_higher_order_debug; |
---|
486 | 499 | |
---|
487 | 500 | /* |
---|
.. | .. |
---|
528 | 541 | unsigned int length) |
---|
529 | 542 | { |
---|
530 | 543 | metadata_access_enable(); |
---|
531 | | - print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr, |
---|
532 | | - length, 1); |
---|
| 544 | + print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, |
---|
| 545 | + 16, 1, kasan_reset_tag((void *)addr), length, 1); |
---|
533 | 546 | metadata_access_disable(); |
---|
| 547 | +} |
---|
| 548 | + |
---|
| 549 | +/* |
---|
| 550 | + * See comment in calculate_sizes(). |
---|
| 551 | + */ |
---|
| 552 | +static inline bool freeptr_outside_object(struct kmem_cache *s) |
---|
| 553 | +{ |
---|
| 554 | + return s->offset >= s->inuse; |
---|
| 555 | +} |
---|
| 556 | + |
---|
| 557 | +/* |
---|
| 558 | + * Return offset of the end of info block which is inuse + free pointer if |
---|
| 559 | + * not overlapping with object. |
---|
| 560 | + */ |
---|
| 561 | +static inline unsigned int get_info_end(struct kmem_cache *s) |
---|
| 562 | +{ |
---|
| 563 | + if (freeptr_outside_object(s)) |
---|
| 564 | + return s->inuse + sizeof(void *); |
---|
| 565 | + else |
---|
| 566 | + return s->inuse; |
---|
534 | 567 | } |
---|
535 | 568 | |
---|
536 | 569 | static struct track *get_track(struct kmem_cache *s, void *object, |
---|
.. | .. |
---|
538 | 571 | { |
---|
539 | 572 | struct track *p; |
---|
540 | 573 | |
---|
541 | | - if (s->offset) |
---|
542 | | - p = object + s->offset + sizeof(void *); |
---|
543 | | - else |
---|
544 | | - p = object + s->inuse; |
---|
| 574 | + p = object + get_info_end(s); |
---|
545 | 575 | |
---|
546 | | - return p + alloc; |
---|
| 576 | + return kasan_reset_tag(p + alloc); |
---|
547 | 577 | } |
---|
| 578 | + |
---|
| 579 | +/* |
---|
| 580 | + * This function will be used to loop through all the slab objects in |
---|
| 581 | + * a page to give track structure for each object, the function fn will |
---|
| 582 | + * be using this track structure and extract required info into its private |
---|
| 583 | + * data, the return value will be the number of track structures that are |
---|
| 584 | + * processed. |
---|
| 585 | + */ |
---|
| 586 | +unsigned long get_each_object_track(struct kmem_cache *s, |
---|
| 587 | + struct page *page, enum track_item alloc, |
---|
| 588 | + int (*fn)(const struct kmem_cache *, const void *, |
---|
| 589 | + const struct track *, void *), void *private) |
---|
| 590 | +{ |
---|
| 591 | + void *p; |
---|
| 592 | + struct track *t; |
---|
| 593 | + int ret; |
---|
| 594 | + unsigned long num_track = 0; |
---|
| 595 | + |
---|
| 596 | + if (!slub_debug || !(s->flags & SLAB_STORE_USER)) |
---|
| 597 | + return 0; |
---|
| 598 | + |
---|
| 599 | + slab_lock(page); |
---|
| 600 | + for_each_object(p, s, page_address(page), page->objects) { |
---|
| 601 | + t = get_track(s, p, alloc); |
---|
| 602 | + metadata_access_enable(); |
---|
| 603 | + ret = fn(s, p, t, private); |
---|
| 604 | + metadata_access_disable(); |
---|
| 605 | + if (ret < 0) |
---|
| 606 | + break; |
---|
| 607 | + num_track += 1; |
---|
| 608 | + } |
---|
| 609 | + slab_unlock(page); |
---|
| 610 | + return num_track; |
---|
| 611 | +} |
---|
| 612 | +EXPORT_SYMBOL_GPL(get_each_object_track); |
---|
548 | 613 | |
---|
549 | 614 | static void set_track(struct kmem_cache *s, void *object, |
---|
550 | 615 | enum track_item alloc, unsigned long addr) |
---|
.. | .. |
---|
553 | 618 | |
---|
554 | 619 | if (addr) { |
---|
555 | 620 | #ifdef CONFIG_STACKTRACE |
---|
556 | | - struct stack_trace trace; |
---|
557 | | - int i; |
---|
| 621 | + unsigned int nr_entries; |
---|
558 | 622 | |
---|
559 | | - trace.nr_entries = 0; |
---|
560 | | - trace.max_entries = TRACK_ADDRS_COUNT; |
---|
561 | | - trace.entries = p->addrs; |
---|
562 | | - trace.skip = 3; |
---|
563 | 623 | metadata_access_enable(); |
---|
564 | | - save_stack_trace(&trace); |
---|
| 624 | + nr_entries = stack_trace_save(kasan_reset_tag(p->addrs), |
---|
| 625 | + TRACK_ADDRS_COUNT, 3); |
---|
565 | 626 | metadata_access_disable(); |
---|
566 | 627 | |
---|
567 | | - /* See rant in lockdep.c */ |
---|
568 | | - if (trace.nr_entries != 0 && |
---|
569 | | - trace.entries[trace.nr_entries - 1] == ULONG_MAX) |
---|
570 | | - trace.nr_entries--; |
---|
571 | | - |
---|
572 | | - for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++) |
---|
573 | | - p->addrs[i] = 0; |
---|
| 628 | + if (nr_entries < TRACK_ADDRS_COUNT) |
---|
| 629 | + p->addrs[nr_entries] = 0; |
---|
| 630 | + trace_android_vh_save_track_hash(alloc == TRACK_ALLOC, |
---|
| 631 | + (unsigned long)p); |
---|
574 | 632 | #endif |
---|
575 | 633 | p->addr = addr; |
---|
576 | 634 | p->cpu = smp_processor_id(); |
---|
577 | 635 | p->pid = current->pid; |
---|
578 | 636 | p->when = jiffies; |
---|
579 | | - } else |
---|
| 637 | + } else { |
---|
580 | 638 | memset(p, 0, sizeof(struct track)); |
---|
| 639 | + } |
---|
581 | 640 | } |
---|
582 | 641 | |
---|
583 | 642 | static void init_tracking(struct kmem_cache *s, void *object) |
---|
.. | .. |
---|
608 | 667 | #endif |
---|
609 | 668 | } |
---|
610 | 669 | |
---|
611 | | -static void print_tracking(struct kmem_cache *s, void *object) |
---|
| 670 | +void print_tracking(struct kmem_cache *s, void *object) |
---|
612 | 671 | { |
---|
613 | 672 | unsigned long pr_time = jiffies; |
---|
614 | 673 | if (!(s->flags & SLAB_STORE_USER)) |
---|
.. | .. |
---|
636 | 695 | pr_err("=============================================================================\n"); |
---|
637 | 696 | pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf); |
---|
638 | 697 | pr_err("-----------------------------------------------------------------------------\n\n"); |
---|
639 | | - |
---|
640 | | - add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); |
---|
641 | 698 | va_end(args); |
---|
642 | 699 | } |
---|
643 | 700 | |
---|
.. | .. |
---|
691 | 748 | print_section(KERN_ERR, "Redzone ", p + s->object_size, |
---|
692 | 749 | s->inuse - s->object_size); |
---|
693 | 750 | |
---|
694 | | - if (s->offset) |
---|
695 | | - off = s->offset + sizeof(void *); |
---|
696 | | - else |
---|
697 | | - off = s->inuse; |
---|
| 751 | + off = get_info_end(s); |
---|
698 | 752 | |
---|
699 | 753 | if (s->flags & SLAB_STORE_USER) |
---|
700 | 754 | off += 2 * sizeof(struct track); |
---|
.. | .. |
---|
714 | 768 | { |
---|
715 | 769 | slab_bug(s, "%s", reason); |
---|
716 | 770 | print_trailer(s, page, object); |
---|
| 771 | + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); |
---|
717 | 772 | } |
---|
718 | 773 | |
---|
719 | 774 | static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page, |
---|
.. | .. |
---|
728 | 783 | slab_bug(s, "%s", buf); |
---|
729 | 784 | print_page_info(page); |
---|
730 | 785 | dump_stack(); |
---|
| 786 | + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); |
---|
731 | 787 | } |
---|
732 | 788 | |
---|
733 | 789 | static void init_object(struct kmem_cache *s, void *object, u8 val) |
---|
734 | 790 | { |
---|
735 | | - u8 *p = object; |
---|
| 791 | + u8 *p = kasan_reset_tag(object); |
---|
736 | 792 | |
---|
737 | 793 | if (s->flags & SLAB_RED_ZONE) |
---|
738 | 794 | memset(p - s->red_left_pad, val, s->red_left_pad); |
---|
.. | .. |
---|
759 | 815 | { |
---|
760 | 816 | u8 *fault; |
---|
761 | 817 | u8 *end; |
---|
| 818 | + u8 *addr = page_address(page); |
---|
762 | 819 | |
---|
763 | 820 | metadata_access_enable(); |
---|
764 | | - fault = memchr_inv(start, value, bytes); |
---|
| 821 | + fault = memchr_inv(kasan_reset_tag(start), value, bytes); |
---|
765 | 822 | metadata_access_disable(); |
---|
766 | 823 | if (!fault) |
---|
767 | 824 | return 1; |
---|
.. | .. |
---|
771 | 828 | end--; |
---|
772 | 829 | |
---|
773 | 830 | slab_bug(s, "%s overwritten", what); |
---|
774 | | - pr_err("INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n", |
---|
775 | | - fault, end - 1, fault[0], value); |
---|
| 831 | + pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n", |
---|
| 832 | + fault, end - 1, fault - addr, |
---|
| 833 | + fault[0], value); |
---|
776 | 834 | print_trailer(s, page, object); |
---|
| 835 | + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); |
---|
777 | 836 | |
---|
778 | 837 | restore_bytes(s, what, value, fault, end); |
---|
779 | 838 | return 0; |
---|
.. | .. |
---|
785 | 844 | * object address |
---|
786 | 845 | * Bytes of the object to be managed. |
---|
787 | 846 | * If the freepointer may overlay the object then the free |
---|
788 | | - * pointer is the first word of the object. |
---|
| 847 | + * pointer is at the middle of the object. |
---|
789 | 848 | * |
---|
790 | 849 | * Poisoning uses 0x6b (POISON_FREE) and the last byte is |
---|
791 | 850 | * 0xa5 (POISON_END) |
---|
.. | .. |
---|
819 | 878 | |
---|
820 | 879 | static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) |
---|
821 | 880 | { |
---|
822 | | - unsigned long off = s->inuse; /* The end of info */ |
---|
823 | | - |
---|
824 | | - if (s->offset) |
---|
825 | | - /* Freepointer is placed after the object. */ |
---|
826 | | - off += sizeof(void *); |
---|
| 881 | + unsigned long off = get_info_end(s); /* The end of info */ |
---|
827 | 882 | |
---|
828 | 883 | if (s->flags & SLAB_STORE_USER) |
---|
829 | 884 | /* We also have user information there */ |
---|
.. | .. |
---|
852 | 907 | return 1; |
---|
853 | 908 | |
---|
854 | 909 | start = page_address(page); |
---|
855 | | - length = PAGE_SIZE << compound_order(page); |
---|
| 910 | + length = page_size(page); |
---|
856 | 911 | end = start + length; |
---|
857 | 912 | remainder = length % s->size; |
---|
858 | 913 | if (!remainder) |
---|
.. | .. |
---|
860 | 915 | |
---|
861 | 916 | pad = end - remainder; |
---|
862 | 917 | metadata_access_enable(); |
---|
863 | | - fault = memchr_inv(pad, POISON_INUSE, remainder); |
---|
| 918 | + fault = memchr_inv(kasan_reset_tag(pad), POISON_INUSE, remainder); |
---|
864 | 919 | metadata_access_disable(); |
---|
865 | 920 | if (!fault) |
---|
866 | 921 | return 1; |
---|
867 | 922 | while (end > fault && end[-1] == POISON_INUSE) |
---|
868 | 923 | end--; |
---|
869 | 924 | |
---|
870 | | - slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); |
---|
| 925 | + slab_err(s, page, "Padding overwritten. 0x%p-0x%p @offset=%tu", |
---|
| 926 | + fault, end - 1, fault - start); |
---|
871 | 927 | print_section(KERN_ERR, "Padding ", pad, remainder); |
---|
872 | 928 | |
---|
873 | 929 | restore_bytes(s, "slab padding", POISON_INUSE, fault, end); |
---|
.. | .. |
---|
909 | 965 | check_pad_bytes(s, page, p); |
---|
910 | 966 | } |
---|
911 | 967 | |
---|
912 | | - if (!s->offset && val == SLUB_RED_ACTIVE) |
---|
| 968 | + if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE) |
---|
913 | 969 | /* |
---|
914 | 970 | * Object and freepointer overlap. Cannot check |
---|
915 | 971 | * freepointer while object is allocated. |
---|
.. | .. |
---|
1038 | 1094 | return; |
---|
1039 | 1095 | |
---|
1040 | 1096 | lockdep_assert_held(&n->list_lock); |
---|
1041 | | - list_add(&page->lru, &n->full); |
---|
| 1097 | + list_add(&page->slab_list, &n->full); |
---|
1042 | 1098 | } |
---|
1043 | 1099 | |
---|
1044 | 1100 | static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page) |
---|
.. | .. |
---|
1047 | 1103 | return; |
---|
1048 | 1104 | |
---|
1049 | 1105 | lockdep_assert_held(&n->list_lock); |
---|
1050 | | - list_del(&page->lru); |
---|
| 1106 | + list_del(&page->slab_list); |
---|
1051 | 1107 | } |
---|
1052 | 1108 | |
---|
1053 | 1109 | /* Tracking of the number of slabs for debugging purposes */ |
---|
.. | .. |
---|
1090 | 1146 | static void setup_object_debug(struct kmem_cache *s, struct page *page, |
---|
1091 | 1147 | void *object) |
---|
1092 | 1148 | { |
---|
1093 | | - if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))) |
---|
| 1149 | + if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)) |
---|
1094 | 1150 | return; |
---|
1095 | 1151 | |
---|
1096 | 1152 | init_object(s, object, SLUB_RED_INACTIVE); |
---|
1097 | 1153 | init_tracking(s, object); |
---|
1098 | 1154 | } |
---|
1099 | 1155 | |
---|
1100 | | -static void setup_page_debug(struct kmem_cache *s, void *addr, int order) |
---|
| 1156 | +static |
---|
| 1157 | +void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) |
---|
1101 | 1158 | { |
---|
1102 | | - if (!(s->flags & SLAB_POISON)) |
---|
| 1159 | + if (!kmem_cache_debug_flags(s, SLAB_POISON)) |
---|
1103 | 1160 | return; |
---|
1104 | 1161 | |
---|
1105 | 1162 | metadata_access_enable(); |
---|
1106 | | - memset(addr, POISON_INUSE, PAGE_SIZE << order); |
---|
| 1163 | + memset(kasan_reset_tag(addr), POISON_INUSE, page_size(page)); |
---|
1107 | 1164 | metadata_access_disable(); |
---|
1108 | 1165 | } |
---|
1109 | 1166 | |
---|
1110 | 1167 | static inline int alloc_consistency_checks(struct kmem_cache *s, |
---|
1111 | | - struct page *page, |
---|
1112 | | - void *object, unsigned long addr) |
---|
| 1168 | + struct page *page, void *object) |
---|
1113 | 1169 | { |
---|
1114 | 1170 | if (!check_slab(s, page)) |
---|
1115 | 1171 | return 0; |
---|
.. | .. |
---|
1130 | 1186 | void *object, unsigned long addr) |
---|
1131 | 1187 | { |
---|
1132 | 1188 | if (s->flags & SLAB_CONSISTENCY_CHECKS) { |
---|
1133 | | - if (!alloc_consistency_checks(s, page, object, addr)) |
---|
| 1189 | + if (!alloc_consistency_checks(s, page, object)) |
---|
1134 | 1190 | goto bad; |
---|
1135 | 1191 | } |
---|
1136 | 1192 | |
---|
.. | .. |
---|
1196 | 1252 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); |
---|
1197 | 1253 | void *object = head; |
---|
1198 | 1254 | int cnt = 0; |
---|
1199 | | - unsigned long uninitialized_var(flags); |
---|
| 1255 | + unsigned long flags; |
---|
1200 | 1256 | int ret = 0; |
---|
1201 | 1257 | |
---|
1202 | | - raw_spin_lock_irqsave(&n->list_lock, flags); |
---|
| 1258 | + spin_lock_irqsave(&n->list_lock, flags); |
---|
1203 | 1259 | slab_lock(page); |
---|
1204 | 1260 | |
---|
1205 | 1261 | if (s->flags & SLAB_CONSISTENCY_CHECKS) { |
---|
.. | .. |
---|
1234 | 1290 | bulk_cnt, cnt); |
---|
1235 | 1291 | |
---|
1236 | 1292 | slab_unlock(page); |
---|
1237 | | - raw_spin_unlock_irqrestore(&n->list_lock, flags); |
---|
| 1293 | + spin_unlock_irqrestore(&n->list_lock, flags); |
---|
1238 | 1294 | if (!ret) |
---|
1239 | 1295 | slab_fix(s, "Object at 0x%p not freed", object); |
---|
1240 | 1296 | return ret; |
---|
1241 | 1297 | } |
---|
1242 | 1298 | |
---|
1243 | | -static int __init setup_slub_debug(char *str) |
---|
| 1299 | +/* |
---|
| 1300 | + * Parse a block of slub_debug options. Blocks are delimited by ';' |
---|
| 1301 | + * |
---|
| 1302 | + * @str: start of block |
---|
| 1303 | + * @flags: returns parsed flags, or DEBUG_DEFAULT_FLAGS if none specified |
---|
| 1304 | + * @slabs: return start of list of slabs, or NULL when there's no list |
---|
| 1305 | + * @init: assume this is initial parsing and not per-kmem-create parsing |
---|
| 1306 | + * |
---|
| 1307 | + * returns the start of next block if there's any, or NULL |
---|
| 1308 | + */ |
---|
| 1309 | +static char * |
---|
| 1310 | +parse_slub_debug_flags(char *str, slab_flags_t *flags, char **slabs, bool init) |
---|
1244 | 1311 | { |
---|
1245 | | - slub_debug = DEBUG_DEFAULT_FLAGS; |
---|
1246 | | - if (*str++ != '=' || !*str) |
---|
1247 | | - /* |
---|
1248 | | - * No options specified. Switch on full debugging. |
---|
1249 | | - */ |
---|
1250 | | - goto out; |
---|
| 1312 | + bool higher_order_disable = false; |
---|
1251 | 1313 | |
---|
1252 | | - if (*str == ',') |
---|
| 1314 | + /* Skip any completely empty blocks */ |
---|
| 1315 | + while (*str && *str == ';') |
---|
| 1316 | + str++; |
---|
| 1317 | + |
---|
| 1318 | + if (*str == ',') { |
---|
1253 | 1319 | /* |
---|
1254 | 1320 | * No options but restriction on slabs. This means full |
---|
1255 | 1321 | * debugging for slabs matching a pattern. |
---|
1256 | 1322 | */ |
---|
| 1323 | + *flags = DEBUG_DEFAULT_FLAGS; |
---|
1257 | 1324 | goto check_slabs; |
---|
| 1325 | + } |
---|
| 1326 | + *flags = 0; |
---|
1258 | 1327 | |
---|
1259 | | - slub_debug = 0; |
---|
1260 | | - if (*str == '-') |
---|
1261 | | - /* |
---|
1262 | | - * Switch off all debugging measures. |
---|
1263 | | - */ |
---|
1264 | | - goto out; |
---|
1265 | | - |
---|
1266 | | - /* |
---|
1267 | | - * Determine which debug features should be switched on |
---|
1268 | | - */ |
---|
1269 | | - for (; *str && *str != ','; str++) { |
---|
| 1328 | + /* Determine which debug features should be switched on */ |
---|
| 1329 | + for (; *str && *str != ',' && *str != ';'; str++) { |
---|
1270 | 1330 | switch (tolower(*str)) { |
---|
| 1331 | + case '-': |
---|
| 1332 | + *flags = 0; |
---|
| 1333 | + break; |
---|
1271 | 1334 | case 'f': |
---|
1272 | | - slub_debug |= SLAB_CONSISTENCY_CHECKS; |
---|
| 1335 | + *flags |= SLAB_CONSISTENCY_CHECKS; |
---|
1273 | 1336 | break; |
---|
1274 | 1337 | case 'z': |
---|
1275 | | - slub_debug |= SLAB_RED_ZONE; |
---|
| 1338 | + *flags |= SLAB_RED_ZONE; |
---|
1276 | 1339 | break; |
---|
1277 | 1340 | case 'p': |
---|
1278 | | - slub_debug |= SLAB_POISON; |
---|
| 1341 | + *flags |= SLAB_POISON; |
---|
1279 | 1342 | break; |
---|
1280 | 1343 | case 'u': |
---|
1281 | | - slub_debug |= SLAB_STORE_USER; |
---|
| 1344 | + *flags |= SLAB_STORE_USER; |
---|
1282 | 1345 | break; |
---|
1283 | 1346 | case 't': |
---|
1284 | | - slub_debug |= SLAB_TRACE; |
---|
| 1347 | + *flags |= SLAB_TRACE; |
---|
1285 | 1348 | break; |
---|
1286 | 1349 | case 'a': |
---|
1287 | | - slub_debug |= SLAB_FAILSLAB; |
---|
| 1350 | + *flags |= SLAB_FAILSLAB; |
---|
1288 | 1351 | break; |
---|
1289 | 1352 | case 'o': |
---|
1290 | 1353 | /* |
---|
1291 | 1354 | * Avoid enabling debugging on caches if its minimum |
---|
1292 | 1355 | * order would increase as a result. |
---|
1293 | 1356 | */ |
---|
1294 | | - disable_higher_order_debug = 1; |
---|
| 1357 | + higher_order_disable = true; |
---|
1295 | 1358 | break; |
---|
1296 | 1359 | default: |
---|
1297 | | - pr_err("slub_debug option '%c' unknown. skipped\n", |
---|
1298 | | - *str); |
---|
| 1360 | + if (init) |
---|
| 1361 | + pr_err("slub_debug option '%c' unknown. skipped\n", *str); |
---|
| 1362 | + } |
---|
| 1363 | + } |
---|
| 1364 | +check_slabs: |
---|
| 1365 | + if (*str == ',') |
---|
| 1366 | + *slabs = ++str; |
---|
| 1367 | + else |
---|
| 1368 | + *slabs = NULL; |
---|
| 1369 | + |
---|
| 1370 | + /* Skip over the slab list */ |
---|
| 1371 | + while (*str && *str != ';') |
---|
| 1372 | + str++; |
---|
| 1373 | + |
---|
| 1374 | + /* Skip any completely empty blocks */ |
---|
| 1375 | + while (*str && *str == ';') |
---|
| 1376 | + str++; |
---|
| 1377 | + |
---|
| 1378 | + if (init && higher_order_disable) |
---|
| 1379 | + disable_higher_order_debug = 1; |
---|
| 1380 | + |
---|
| 1381 | + if (*str) |
---|
| 1382 | + return str; |
---|
| 1383 | + else |
---|
| 1384 | + return NULL; |
---|
| 1385 | +} |
---|
| 1386 | + |
---|
| 1387 | +static int __init setup_slub_debug(char *str) |
---|
| 1388 | +{ |
---|
| 1389 | + slab_flags_t flags; |
---|
| 1390 | + slab_flags_t global_flags; |
---|
| 1391 | + char *saved_str; |
---|
| 1392 | + char *slab_list; |
---|
| 1393 | + bool global_slub_debug_changed = false; |
---|
| 1394 | + bool slab_list_specified = false; |
---|
| 1395 | + |
---|
| 1396 | + global_flags = DEBUG_DEFAULT_FLAGS; |
---|
| 1397 | + if (*str++ != '=' || !*str) |
---|
| 1398 | + /* |
---|
| 1399 | + * No options specified. Switch on full debugging. |
---|
| 1400 | + */ |
---|
| 1401 | + goto out; |
---|
| 1402 | + |
---|
| 1403 | + saved_str = str; |
---|
| 1404 | + while (str) { |
---|
| 1405 | + str = parse_slub_debug_flags(str, &flags, &slab_list, true); |
---|
| 1406 | + |
---|
| 1407 | + if (!slab_list) { |
---|
| 1408 | + global_flags = flags; |
---|
| 1409 | + global_slub_debug_changed = true; |
---|
| 1410 | + } else { |
---|
| 1411 | + slab_list_specified = true; |
---|
1299 | 1412 | } |
---|
1300 | 1413 | } |
---|
1301 | 1414 | |
---|
1302 | | -check_slabs: |
---|
1303 | | - if (*str == ',') |
---|
1304 | | - slub_debug_slabs = str + 1; |
---|
| 1415 | + /* |
---|
| 1416 | + * For backwards compatibility, a single list of flags with list of |
---|
| 1417 | + * slabs means debugging is only changed for those slabs, so the global |
---|
| 1418 | + * slub_debug should be unchanged (0 or DEBUG_DEFAULT_FLAGS, depending |
---|
| 1419 | + * on CONFIG_SLUB_DEBUG_ON). We can extended that to multiple lists as |
---|
| 1420 | + * long as there is no option specifying flags without a slab list. |
---|
| 1421 | + */ |
---|
| 1422 | + if (slab_list_specified) { |
---|
| 1423 | + if (!global_slub_debug_changed) |
---|
| 1424 | + global_flags = slub_debug; |
---|
| 1425 | + slub_debug_string = saved_str; |
---|
| 1426 | + } |
---|
1305 | 1427 | out: |
---|
| 1428 | + slub_debug = global_flags; |
---|
| 1429 | + if (slub_debug != 0 || slub_debug_string) |
---|
| 1430 | + static_branch_enable(&slub_debug_enabled); |
---|
1306 | 1431 | if ((static_branch_unlikely(&init_on_alloc) || |
---|
1307 | 1432 | static_branch_unlikely(&init_on_free)) && |
---|
1308 | 1433 | (slub_debug & SLAB_POISON)) |
---|
.. | .. |
---|
1312 | 1437 | |
---|
1313 | 1438 | __setup("slub_debug", setup_slub_debug); |
---|
1314 | 1439 | |
---|
| 1440 | +/* |
---|
| 1441 | + * kmem_cache_flags - apply debugging options to the cache |
---|
| 1442 | + * @object_size: the size of an object without meta data |
---|
| 1443 | + * @flags: flags to set |
---|
| 1444 | + * @name: name of the cache |
---|
| 1445 | + * |
---|
| 1446 | + * Debug option(s) are applied to @flags. In addition to the debug |
---|
| 1447 | + * option(s), if a slab name (or multiple) is specified i.e. |
---|
| 1448 | + * slub_debug=<Debug-Options>,<slab name1>,<slab name2> ... |
---|
| 1449 | + * then only the select slabs will receive the debug option(s). |
---|
| 1450 | + */ |
---|
1315 | 1451 | slab_flags_t kmem_cache_flags(unsigned int object_size, |
---|
1316 | | - slab_flags_t flags, const char *name, |
---|
1317 | | - void (*ctor)(void *)) |
---|
| 1452 | + slab_flags_t flags, const char *name) |
---|
1318 | 1453 | { |
---|
1319 | | - /* |
---|
1320 | | - * Enable debugging if selected on the kernel commandline. |
---|
1321 | | - */ |
---|
1322 | | - if (slub_debug && (!slub_debug_slabs || (name && |
---|
1323 | | - !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))) |
---|
1324 | | - flags |= slub_debug; |
---|
| 1454 | + char *iter; |
---|
| 1455 | + size_t len; |
---|
| 1456 | + char *next_block; |
---|
| 1457 | + slab_flags_t block_flags; |
---|
1325 | 1458 | |
---|
1326 | | - return flags; |
---|
| 1459 | + len = strlen(name); |
---|
| 1460 | + next_block = slub_debug_string; |
---|
| 1461 | + /* Go through all blocks of debug options, see if any matches our slab's name */ |
---|
| 1462 | + while (next_block) { |
---|
| 1463 | + next_block = parse_slub_debug_flags(next_block, &block_flags, &iter, false); |
---|
| 1464 | + if (!iter) |
---|
| 1465 | + continue; |
---|
| 1466 | + /* Found a block that has a slab list, search it */ |
---|
| 1467 | + while (*iter) { |
---|
| 1468 | + char *end, *glob; |
---|
| 1469 | + size_t cmplen; |
---|
| 1470 | + |
---|
| 1471 | + end = strchrnul(iter, ','); |
---|
| 1472 | + if (next_block && next_block < end) |
---|
| 1473 | + end = next_block - 1; |
---|
| 1474 | + |
---|
| 1475 | + glob = strnchr(iter, end - iter, '*'); |
---|
| 1476 | + if (glob) |
---|
| 1477 | + cmplen = glob - iter; |
---|
| 1478 | + else |
---|
| 1479 | + cmplen = max_t(size_t, len, (end - iter)); |
---|
| 1480 | + |
---|
| 1481 | + if (!strncmp(name, iter, cmplen)) { |
---|
| 1482 | + flags |= block_flags; |
---|
| 1483 | + return flags; |
---|
| 1484 | + } |
---|
| 1485 | + |
---|
| 1486 | + if (!*end || *end == ';') |
---|
| 1487 | + break; |
---|
| 1488 | + iter = end + 1; |
---|
| 1489 | + } |
---|
| 1490 | + } |
---|
| 1491 | + |
---|
| 1492 | + return flags | slub_debug; |
---|
1327 | 1493 | } |
---|
1328 | 1494 | #else /* !CONFIG_SLUB_DEBUG */ |
---|
1329 | 1495 | static inline void setup_object_debug(struct kmem_cache *s, |
---|
1330 | 1496 | struct page *page, void *object) {} |
---|
1331 | | -static inline void setup_page_debug(struct kmem_cache *s, |
---|
1332 | | - void *addr, int order) {} |
---|
| 1497 | +static inline |
---|
| 1498 | +void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {} |
---|
1333 | 1499 | |
---|
1334 | 1500 | static inline int alloc_debug_processing(struct kmem_cache *s, |
---|
1335 | 1501 | struct page *page, void *object, unsigned long addr) { return 0; } |
---|
.. | .. |
---|
1348 | 1514 | static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, |
---|
1349 | 1515 | struct page *page) {} |
---|
1350 | 1516 | slab_flags_t kmem_cache_flags(unsigned int object_size, |
---|
1351 | | - slab_flags_t flags, const char *name, |
---|
1352 | | - void (*ctor)(void *)) |
---|
| 1517 | + slab_flags_t flags, const char *name) |
---|
1353 | 1518 | { |
---|
1354 | 1519 | return flags; |
---|
1355 | 1520 | } |
---|
.. | .. |
---|
1373 | 1538 | } |
---|
1374 | 1539 | #endif /* CONFIG_SLUB_DEBUG */ |
---|
1375 | 1540 | |
---|
1376 | | -struct slub_free_list { |
---|
1377 | | - raw_spinlock_t lock; |
---|
1378 | | - struct list_head list; |
---|
1379 | | -}; |
---|
1380 | | -static DEFINE_PER_CPU(struct slub_free_list, slub_free_list); |
---|
1381 | | - |
---|
1382 | 1541 | /* |
---|
1383 | 1542 | * Hooks for other subsystems that check memory allocations. In a typical |
---|
1384 | 1543 | * production configuration these hooks all should produce no code at all. |
---|
.. | .. |
---|
1386 | 1545 | static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) |
---|
1387 | 1546 | { |
---|
1388 | 1547 | ptr = kasan_kmalloc_large(ptr, size, flags); |
---|
| 1548 | + /* As ptr might get tagged, call kmemleak hook after KASAN. */ |
---|
1389 | 1549 | kmemleak_alloc(ptr, size, 1, flags); |
---|
1390 | 1550 | return ptr; |
---|
1391 | 1551 | } |
---|
.. | .. |
---|
1393 | 1553 | static __always_inline void kfree_hook(void *x) |
---|
1394 | 1554 | { |
---|
1395 | 1555 | kmemleak_free(x); |
---|
1396 | | - kasan_kfree_large(x, _RET_IP_); |
---|
| 1556 | + kasan_kfree_large(x); |
---|
1397 | 1557 | } |
---|
1398 | 1558 | |
---|
1399 | | -static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x) |
---|
| 1559 | +static __always_inline bool slab_free_hook(struct kmem_cache *s, |
---|
| 1560 | + void *x, bool init) |
---|
1400 | 1561 | { |
---|
1401 | 1562 | kmemleak_free_recursive(x, s->flags); |
---|
1402 | 1563 | |
---|
.. | .. |
---|
1417 | 1578 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) |
---|
1418 | 1579 | debug_check_no_obj_freed(x, s->object_size); |
---|
1419 | 1580 | |
---|
1420 | | - /* KASAN might put x into memory quarantine, delaying its reuse */ |
---|
1421 | | - return kasan_slab_free(s, x, _RET_IP_); |
---|
| 1581 | + /* Use KCSAN to help debug racy use-after-free. */ |
---|
| 1582 | + if (!(s->flags & SLAB_TYPESAFE_BY_RCU)) |
---|
| 1583 | + __kcsan_check_access(x, s->object_size, |
---|
| 1584 | + KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT); |
---|
| 1585 | + |
---|
| 1586 | + /* |
---|
| 1587 | + * As memory initialization might be integrated into KASAN, |
---|
| 1588 | + * kasan_slab_free and initialization memset's must be |
---|
| 1589 | + * kept together to avoid discrepancies in behavior. |
---|
| 1590 | + * |
---|
| 1591 | + * The initialization memset's clear the object and the metadata, |
---|
| 1592 | + * but don't touch the SLAB redzone. |
---|
| 1593 | + */ |
---|
| 1594 | + if (init) { |
---|
| 1595 | + int rsize; |
---|
| 1596 | + |
---|
| 1597 | + if (!kasan_has_integrated_init()) |
---|
| 1598 | + memset(kasan_reset_tag(x), 0, s->object_size); |
---|
| 1599 | + rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad : 0; |
---|
| 1600 | + memset((char *)kasan_reset_tag(x) + s->inuse, 0, |
---|
| 1601 | + s->size - s->inuse - rsize); |
---|
| 1602 | + } |
---|
| 1603 | + /* KASAN might put x into memory quarantine, delaying its reuse. */ |
---|
| 1604 | + return kasan_slab_free(s, x, init); |
---|
1422 | 1605 | } |
---|
1423 | 1606 | |
---|
1424 | 1607 | static inline bool slab_free_freelist_hook(struct kmem_cache *s, |
---|
.. | .. |
---|
1429 | 1612 | void *object; |
---|
1430 | 1613 | void *next = *head; |
---|
1431 | 1614 | void *old_tail = *tail ? *tail : *head; |
---|
1432 | | - int rsize; |
---|
| 1615 | + |
---|
| 1616 | + if (is_kfence_address(next)) { |
---|
| 1617 | + slab_free_hook(s, next, false); |
---|
| 1618 | + return true; |
---|
| 1619 | + } |
---|
1433 | 1620 | |
---|
1434 | 1621 | /* Head and tail of the reconstructed freelist */ |
---|
1435 | 1622 | *head = NULL; |
---|
.. | .. |
---|
1439 | 1626 | object = next; |
---|
1440 | 1627 | next = get_freepointer(s, object); |
---|
1441 | 1628 | |
---|
1442 | | - if (slab_want_init_on_free(s)) { |
---|
1443 | | - /* |
---|
1444 | | - * Clear the object and the metadata, but don't touch |
---|
1445 | | - * the redzone. |
---|
1446 | | - */ |
---|
1447 | | - memset(object, 0, s->object_size); |
---|
1448 | | - rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad |
---|
1449 | | - : 0; |
---|
1450 | | - memset((char *)object + s->inuse, 0, |
---|
1451 | | - s->size - s->inuse - rsize); |
---|
1452 | | - |
---|
1453 | | - } |
---|
1454 | 1629 | /* If object's reuse doesn't have to be delayed */ |
---|
1455 | | - if (!slab_free_hook(s, object)) { |
---|
| 1630 | + if (!slab_free_hook(s, object, slab_want_init_on_free(s))) { |
---|
1456 | 1631 | /* Move object to the new freelist */ |
---|
1457 | 1632 | set_freepointer(s, object, *head); |
---|
1458 | 1633 | *head = object; |
---|
.. | .. |
---|
1500 | 1675 | else |
---|
1501 | 1676 | page = __alloc_pages_node(node, flags, order); |
---|
1502 | 1677 | |
---|
1503 | | - if (page && memcg_charge_slab(page, flags, order, s)) { |
---|
1504 | | - __free_pages(page, order); |
---|
1505 | | - page = NULL; |
---|
1506 | | - } |
---|
| 1678 | + if (page) |
---|
| 1679 | + account_slab_page(page, order, s); |
---|
1507 | 1680 | |
---|
1508 | 1681 | return page; |
---|
1509 | 1682 | } |
---|
.. | .. |
---|
1623 | 1796 | struct kmem_cache_order_objects oo = s->oo; |
---|
1624 | 1797 | gfp_t alloc_gfp; |
---|
1625 | 1798 | void *start, *p, *next; |
---|
1626 | | - int idx, order; |
---|
| 1799 | + int idx; |
---|
1627 | 1800 | bool shuffle; |
---|
1628 | | - bool enableirqs = false; |
---|
1629 | 1801 | |
---|
1630 | 1802 | flags &= gfp_allowed_mask; |
---|
1631 | 1803 | |
---|
1632 | 1804 | if (gfpflags_allow_blocking(flags)) |
---|
1633 | | - enableirqs = true; |
---|
1634 | | -#ifdef CONFIG_PREEMPT_RT_FULL |
---|
1635 | | - if (system_state > SYSTEM_BOOTING) |
---|
1636 | | - enableirqs = true; |
---|
1637 | | -#endif |
---|
1638 | | - if (enableirqs) |
---|
1639 | 1805 | local_irq_enable(); |
---|
1640 | 1806 | |
---|
1641 | 1807 | flags |= s->allocflags; |
---|
.. | .. |
---|
1664 | 1830 | |
---|
1665 | 1831 | page->objects = oo_objects(oo); |
---|
1666 | 1832 | |
---|
1667 | | - order = compound_order(page); |
---|
1668 | 1833 | page->slab_cache = s; |
---|
1669 | 1834 | __SetPageSlab(page); |
---|
1670 | 1835 | if (page_is_pfmemalloc(page)) |
---|
.. | .. |
---|
1674 | 1839 | |
---|
1675 | 1840 | start = page_address(page); |
---|
1676 | 1841 | |
---|
1677 | | - setup_page_debug(s, start, order); |
---|
| 1842 | + setup_page_debug(s, page, start); |
---|
1678 | 1843 | |
---|
1679 | 1844 | shuffle = shuffle_freelist(s, page); |
---|
1680 | 1845 | |
---|
.. | .. |
---|
1695 | 1860 | page->frozen = 1; |
---|
1696 | 1861 | |
---|
1697 | 1862 | out: |
---|
1698 | | - if (enableirqs) |
---|
| 1863 | + if (gfpflags_allow_blocking(flags)) |
---|
1699 | 1864 | local_irq_disable(); |
---|
1700 | 1865 | if (!page) |
---|
1701 | 1866 | return NULL; |
---|
1702 | | - |
---|
1703 | | - mod_lruvec_page_state(page, |
---|
1704 | | - (s->flags & SLAB_RECLAIM_ACCOUNT) ? |
---|
1705 | | - NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, |
---|
1706 | | - 1 << oo_order(oo)); |
---|
1707 | 1867 | |
---|
1708 | 1868 | inc_slabs_node(s, page_to_nid(page), page->objects); |
---|
1709 | 1869 | |
---|
.. | .. |
---|
1712 | 1872 | |
---|
1713 | 1873 | static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) |
---|
1714 | 1874 | { |
---|
1715 | | - if (unlikely(flags & GFP_SLAB_BUG_MASK)) { |
---|
1716 | | - gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK; |
---|
1717 | | - flags &= ~GFP_SLAB_BUG_MASK; |
---|
1718 | | - pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n", |
---|
1719 | | - invalid_mask, &invalid_mask, flags, &flags); |
---|
1720 | | - dump_stack(); |
---|
1721 | | - } |
---|
| 1875 | + if (unlikely(flags & GFP_SLAB_BUG_MASK)) |
---|
| 1876 | + flags = kmalloc_fix_flags(flags); |
---|
1722 | 1877 | |
---|
1723 | 1878 | return allocate_slab(s, |
---|
1724 | 1879 | flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node); |
---|
.. | .. |
---|
1729 | 1884 | int order = compound_order(page); |
---|
1730 | 1885 | int pages = 1 << order; |
---|
1731 | 1886 | |
---|
1732 | | - if (s->flags & SLAB_CONSISTENCY_CHECKS) { |
---|
| 1887 | + if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) { |
---|
1733 | 1888 | void *p; |
---|
1734 | 1889 | |
---|
1735 | 1890 | slab_pad_check(s, page); |
---|
.. | .. |
---|
1738 | 1893 | check_object(s, page, p, SLUB_RED_INACTIVE); |
---|
1739 | 1894 | } |
---|
1740 | 1895 | |
---|
1741 | | - mod_lruvec_page_state(page, |
---|
1742 | | - (s->flags & SLAB_RECLAIM_ACCOUNT) ? |
---|
1743 | | - NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, |
---|
1744 | | - -pages); |
---|
1745 | | - |
---|
1746 | 1896 | __ClearPageSlabPfmemalloc(page); |
---|
1747 | 1897 | __ClearPageSlab(page); |
---|
1748 | 1898 | |
---|
1749 | 1899 | page->mapping = NULL; |
---|
1750 | 1900 | if (current->reclaim_state) |
---|
1751 | 1901 | current->reclaim_state->reclaimed_slab += pages; |
---|
1752 | | - memcg_uncharge_slab(page, order, s); |
---|
| 1902 | + unaccount_slab_page(page, order, s); |
---|
1753 | 1903 | __free_pages(page, order); |
---|
1754 | | -} |
---|
1755 | | - |
---|
1756 | | -static void free_delayed(struct list_head *h) |
---|
1757 | | -{ |
---|
1758 | | - while (!list_empty(h)) { |
---|
1759 | | - struct page *page = list_first_entry(h, struct page, lru); |
---|
1760 | | - |
---|
1761 | | - list_del(&page->lru); |
---|
1762 | | - __free_slab(page->slab_cache, page); |
---|
1763 | | - } |
---|
1764 | 1904 | } |
---|
1765 | 1905 | |
---|
1766 | 1906 | static void rcu_free_slab(struct rcu_head *h) |
---|
.. | .. |
---|
1774 | 1914 | { |
---|
1775 | 1915 | if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) { |
---|
1776 | 1916 | call_rcu(&page->rcu_head, rcu_free_slab); |
---|
1777 | | - } else if (irqs_disabled()) { |
---|
1778 | | - struct slub_free_list *f = this_cpu_ptr(&slub_free_list); |
---|
1779 | | - |
---|
1780 | | - raw_spin_lock(&f->lock); |
---|
1781 | | - list_add(&page->lru, &f->list); |
---|
1782 | | - raw_spin_unlock(&f->lock); |
---|
1783 | 1917 | } else |
---|
1784 | 1918 | __free_slab(s, page); |
---|
1785 | 1919 | } |
---|
.. | .. |
---|
1798 | 1932 | { |
---|
1799 | 1933 | n->nr_partial++; |
---|
1800 | 1934 | if (tail == DEACTIVATE_TO_TAIL) |
---|
1801 | | - list_add_tail(&page->lru, &n->partial); |
---|
| 1935 | + list_add_tail(&page->slab_list, &n->partial); |
---|
1802 | 1936 | else |
---|
1803 | | - list_add(&page->lru, &n->partial); |
---|
| 1937 | + list_add(&page->slab_list, &n->partial); |
---|
1804 | 1938 | } |
---|
1805 | 1939 | |
---|
1806 | 1940 | static inline void add_partial(struct kmem_cache_node *n, |
---|
.. | .. |
---|
1814 | 1948 | struct page *page) |
---|
1815 | 1949 | { |
---|
1816 | 1950 | lockdep_assert_held(&n->list_lock); |
---|
1817 | | - list_del(&page->lru); |
---|
| 1951 | + list_del(&page->slab_list); |
---|
1818 | 1952 | n->nr_partial--; |
---|
1819 | 1953 | } |
---|
1820 | 1954 | |
---|
.. | .. |
---|
1881 | 2015 | /* |
---|
1882 | 2016 | * Racy check. If we mistakenly see no partial slabs then we |
---|
1883 | 2017 | * just allocate an empty slab. If we mistakenly try to get a |
---|
1884 | | - * partial slab and there is none available then get_partials() |
---|
| 2018 | + * partial slab and there is none available then get_partial() |
---|
1885 | 2019 | * will return NULL. |
---|
1886 | 2020 | */ |
---|
1887 | 2021 | if (!n || !n->nr_partial) |
---|
1888 | 2022 | return NULL; |
---|
1889 | 2023 | |
---|
1890 | | - raw_spin_lock(&n->list_lock); |
---|
1891 | | - list_for_each_entry_safe(page, page2, &n->partial, lru) { |
---|
| 2024 | + spin_lock(&n->list_lock); |
---|
| 2025 | + list_for_each_entry_safe(page, page2, &n->partial, slab_list) { |
---|
1892 | 2026 | void *t; |
---|
1893 | 2027 | |
---|
1894 | 2028 | if (!pfmemalloc_match(page, flags)) |
---|
.. | .. |
---|
1912 | 2046 | break; |
---|
1913 | 2047 | |
---|
1914 | 2048 | } |
---|
1915 | | - raw_spin_unlock(&n->list_lock); |
---|
| 2049 | + spin_unlock(&n->list_lock); |
---|
1916 | 2050 | return object; |
---|
1917 | 2051 | } |
---|
1918 | 2052 | |
---|
.. | .. |
---|
1926 | 2060 | struct zonelist *zonelist; |
---|
1927 | 2061 | struct zoneref *z; |
---|
1928 | 2062 | struct zone *zone; |
---|
1929 | | - enum zone_type high_zoneidx = gfp_zone(flags); |
---|
| 2063 | + enum zone_type highest_zoneidx = gfp_zone(flags); |
---|
1930 | 2064 | void *object; |
---|
1931 | 2065 | unsigned int cpuset_mems_cookie; |
---|
1932 | 2066 | |
---|
.. | .. |
---|
1955 | 2089 | do { |
---|
1956 | 2090 | cpuset_mems_cookie = read_mems_allowed_begin(); |
---|
1957 | 2091 | zonelist = node_zonelist(mempolicy_slab_node(), flags); |
---|
1958 | | - for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
---|
| 2092 | + for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) { |
---|
1959 | 2093 | struct kmem_cache_node *n; |
---|
1960 | 2094 | |
---|
1961 | 2095 | n = get_node(s, zone_to_nid(zone)); |
---|
.. | .. |
---|
1976 | 2110 | } |
---|
1977 | 2111 | } |
---|
1978 | 2112 | } while (read_mems_allowed_retry(cpuset_mems_cookie)); |
---|
1979 | | -#endif |
---|
| 2113 | +#endif /* CONFIG_NUMA */ |
---|
1980 | 2114 | return NULL; |
---|
1981 | 2115 | } |
---|
1982 | 2116 | |
---|
.. | .. |
---|
1999 | 2133 | return get_any_partial(s, flags, c); |
---|
2000 | 2134 | } |
---|
2001 | 2135 | |
---|
2002 | | -#ifdef CONFIG_PREEMPT |
---|
| 2136 | +#ifdef CONFIG_PREEMPTION |
---|
2003 | 2137 | /* |
---|
2004 | | - * Calculate the next globally unique transaction for disambiguiation |
---|
| 2138 | + * Calculate the next globally unique transaction for disambiguation |
---|
2005 | 2139 | * during cmpxchg. The transactions start with the cpu number and are then |
---|
2006 | 2140 | * incremented by CONFIG_NR_CPUS. |
---|
2007 | 2141 | */ |
---|
.. | .. |
---|
2019 | 2153 | return tid + TID_STEP; |
---|
2020 | 2154 | } |
---|
2021 | 2155 | |
---|
| 2156 | +#ifdef SLUB_DEBUG_CMPXCHG |
---|
2022 | 2157 | static inline unsigned int tid_to_cpu(unsigned long tid) |
---|
2023 | 2158 | { |
---|
2024 | 2159 | return tid % TID_STEP; |
---|
.. | .. |
---|
2028 | 2163 | { |
---|
2029 | 2164 | return tid / TID_STEP; |
---|
2030 | 2165 | } |
---|
| 2166 | +#endif |
---|
2031 | 2167 | |
---|
2032 | 2168 | static inline unsigned int init_tid(int cpu) |
---|
2033 | 2169 | { |
---|
.. | .. |
---|
2042 | 2178 | |
---|
2043 | 2179 | pr_info("%s %s: cmpxchg redo ", n, s->name); |
---|
2044 | 2180 | |
---|
2045 | | -#ifdef CONFIG_PREEMPT |
---|
| 2181 | +#ifdef CONFIG_PREEMPTION |
---|
2046 | 2182 | if (tid_to_cpu(tid) != tid_to_cpu(actual_tid)) |
---|
2047 | 2183 | pr_warn("due to cpu change %d -> %d\n", |
---|
2048 | 2184 | tid_to_cpu(tid), tid_to_cpu(actual_tid)); |
---|
.. | .. |
---|
2160 | 2296 | if (!lock) { |
---|
2161 | 2297 | lock = 1; |
---|
2162 | 2298 | /* |
---|
2163 | | - * Taking the spinlock removes the possiblity |
---|
| 2299 | + * Taking the spinlock removes the possibility |
---|
2164 | 2300 | * that acquire_slab() will see a slab page that |
---|
2165 | 2301 | * is frozen |
---|
2166 | 2302 | */ |
---|
2167 | | - raw_spin_lock(&n->list_lock); |
---|
| 2303 | + spin_lock(&n->list_lock); |
---|
2168 | 2304 | } |
---|
2169 | 2305 | } else { |
---|
2170 | 2306 | m = M_FULL; |
---|
2171 | | - if (kmem_cache_debug(s) && !lock) { |
---|
| 2307 | +#ifdef CONFIG_SLUB_DEBUG |
---|
| 2308 | + if ((s->flags & SLAB_STORE_USER) && !lock) { |
---|
2172 | 2309 | lock = 1; |
---|
2173 | 2310 | /* |
---|
2174 | 2311 | * This also ensures that the scanning of full |
---|
2175 | 2312 | * slabs from diagnostic functions will not see |
---|
2176 | 2313 | * any frozen slabs. |
---|
2177 | 2314 | */ |
---|
2178 | | - raw_spin_lock(&n->list_lock); |
---|
| 2315 | + spin_lock(&n->list_lock); |
---|
2179 | 2316 | } |
---|
| 2317 | +#endif |
---|
2180 | 2318 | } |
---|
2181 | 2319 | |
---|
2182 | 2320 | if (l != m) { |
---|
2183 | | - |
---|
2184 | 2321 | if (l == M_PARTIAL) |
---|
2185 | | - |
---|
2186 | 2322 | remove_partial(n, page); |
---|
2187 | | - |
---|
2188 | 2323 | else if (l == M_FULL) |
---|
2189 | | - |
---|
2190 | 2324 | remove_full(s, n, page); |
---|
2191 | 2325 | |
---|
2192 | | - if (m == M_PARTIAL) { |
---|
2193 | | - |
---|
| 2326 | + if (m == M_PARTIAL) |
---|
2194 | 2327 | add_partial(n, page, tail); |
---|
2195 | | - stat(s, tail); |
---|
2196 | | - |
---|
2197 | | - } else if (m == M_FULL) { |
---|
2198 | | - |
---|
2199 | | - stat(s, DEACTIVATE_FULL); |
---|
| 2328 | + else if (m == M_FULL) |
---|
2200 | 2329 | add_full(s, n, page); |
---|
2201 | | - |
---|
2202 | | - } |
---|
2203 | 2330 | } |
---|
2204 | 2331 | |
---|
2205 | 2332 | l = m; |
---|
.. | .. |
---|
2210 | 2337 | goto redo; |
---|
2211 | 2338 | |
---|
2212 | 2339 | if (lock) |
---|
2213 | | - raw_spin_unlock(&n->list_lock); |
---|
| 2340 | + spin_unlock(&n->list_lock); |
---|
2214 | 2341 | |
---|
2215 | | - if (m == M_FREE) { |
---|
| 2342 | + if (m == M_PARTIAL) |
---|
| 2343 | + stat(s, tail); |
---|
| 2344 | + else if (m == M_FULL) |
---|
| 2345 | + stat(s, DEACTIVATE_FULL); |
---|
| 2346 | + else if (m == M_FREE) { |
---|
2216 | 2347 | stat(s, DEACTIVATE_EMPTY); |
---|
2217 | 2348 | discard_slab(s, page); |
---|
2218 | 2349 | stat(s, FREE_SLAB); |
---|
.. | .. |
---|
2220 | 2351 | |
---|
2221 | 2352 | c->page = NULL; |
---|
2222 | 2353 | c->freelist = NULL; |
---|
| 2354 | + c->tid = next_tid(c->tid); |
---|
2223 | 2355 | } |
---|
2224 | 2356 | |
---|
2225 | 2357 | /* |
---|
.. | .. |
---|
2236 | 2368 | struct kmem_cache_node *n = NULL, *n2 = NULL; |
---|
2237 | 2369 | struct page *page, *discard_page = NULL; |
---|
2238 | 2370 | |
---|
2239 | | - while ((page = c->partial)) { |
---|
| 2371 | + while ((page = slub_percpu_partial(c))) { |
---|
2240 | 2372 | struct page new; |
---|
2241 | 2373 | struct page old; |
---|
2242 | 2374 | |
---|
2243 | | - c->partial = page->next; |
---|
| 2375 | + slub_set_percpu_partial(c, page); |
---|
2244 | 2376 | |
---|
2245 | 2377 | n2 = get_node(s, page_to_nid(page)); |
---|
2246 | 2378 | if (n != n2) { |
---|
2247 | 2379 | if (n) |
---|
2248 | | - raw_spin_unlock(&n->list_lock); |
---|
| 2380 | + spin_unlock(&n->list_lock); |
---|
2249 | 2381 | |
---|
2250 | 2382 | n = n2; |
---|
2251 | | - raw_spin_lock(&n->list_lock); |
---|
| 2383 | + spin_lock(&n->list_lock); |
---|
2252 | 2384 | } |
---|
2253 | 2385 | |
---|
2254 | 2386 | do { |
---|
.. | .. |
---|
2277 | 2409 | } |
---|
2278 | 2410 | |
---|
2279 | 2411 | if (n) |
---|
2280 | | - raw_spin_unlock(&n->list_lock); |
---|
| 2412 | + spin_unlock(&n->list_lock); |
---|
2281 | 2413 | |
---|
2282 | 2414 | while (discard_page) { |
---|
2283 | 2415 | page = discard_page; |
---|
.. | .. |
---|
2287 | 2419 | discard_slab(s, page); |
---|
2288 | 2420 | stat(s, FREE_SLAB); |
---|
2289 | 2421 | } |
---|
2290 | | -#endif |
---|
| 2422 | +#endif /* CONFIG_SLUB_CPU_PARTIAL */ |
---|
2291 | 2423 | } |
---|
2292 | 2424 | |
---|
2293 | 2425 | /* |
---|
2294 | | - * Put a page that was just frozen (in __slab_free) into a partial page |
---|
2295 | | - * slot if available. |
---|
| 2426 | + * Put a page that was just frozen (in __slab_free|get_partial_node) into a |
---|
| 2427 | + * partial page slot if available. |
---|
2296 | 2428 | * |
---|
2297 | 2429 | * If we did not find a slot then simply move all the partials to the |
---|
2298 | 2430 | * per node partial list. |
---|
.. | .. |
---|
2313 | 2445 | if (oldpage) { |
---|
2314 | 2446 | pobjects = oldpage->pobjects; |
---|
2315 | 2447 | pages = oldpage->pages; |
---|
2316 | | - if (drain && pobjects > s->cpu_partial) { |
---|
2317 | | - struct slub_free_list *f; |
---|
| 2448 | + if (drain && pobjects > slub_cpu_partial(s)) { |
---|
2318 | 2449 | unsigned long flags; |
---|
2319 | | - LIST_HEAD(tofree); |
---|
2320 | 2450 | /* |
---|
2321 | 2451 | * partial array is full. Move the existing |
---|
2322 | 2452 | * set to the per node partial list. |
---|
2323 | 2453 | */ |
---|
2324 | 2454 | local_irq_save(flags); |
---|
2325 | 2455 | unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); |
---|
2326 | | - f = this_cpu_ptr(&slub_free_list); |
---|
2327 | | - raw_spin_lock(&f->lock); |
---|
2328 | | - list_splice_init(&f->list, &tofree); |
---|
2329 | | - raw_spin_unlock(&f->lock); |
---|
2330 | 2456 | local_irq_restore(flags); |
---|
2331 | | - free_delayed(&tofree); |
---|
2332 | 2457 | oldpage = NULL; |
---|
2333 | 2458 | pobjects = 0; |
---|
2334 | 2459 | pages = 0; |
---|
.. | .. |
---|
2345 | 2470 | |
---|
2346 | 2471 | } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) |
---|
2347 | 2472 | != oldpage); |
---|
2348 | | - if (unlikely(!s->cpu_partial)) { |
---|
| 2473 | + if (unlikely(!slub_cpu_partial(s))) { |
---|
2349 | 2474 | unsigned long flags; |
---|
2350 | 2475 | |
---|
2351 | 2476 | local_irq_save(flags); |
---|
.. | .. |
---|
2353 | 2478 | local_irq_restore(flags); |
---|
2354 | 2479 | } |
---|
2355 | 2480 | preempt_enable(); |
---|
2356 | | -#endif |
---|
| 2481 | +#endif /* CONFIG_SLUB_CPU_PARTIAL */ |
---|
2357 | 2482 | } |
---|
2358 | 2483 | |
---|
2359 | 2484 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) |
---|
2360 | 2485 | { |
---|
2361 | 2486 | stat(s, CPUSLAB_FLUSH); |
---|
2362 | 2487 | deactivate_slab(s, c->page, c->freelist, c); |
---|
2363 | | - |
---|
2364 | | - c->tid = next_tid(c->tid); |
---|
2365 | 2488 | } |
---|
2366 | 2489 | |
---|
2367 | 2490 | /* |
---|
.. | .. |
---|
2373 | 2496 | { |
---|
2374 | 2497 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); |
---|
2375 | 2498 | |
---|
2376 | | - if (likely(c)) { |
---|
2377 | | - if (c->page) |
---|
2378 | | - flush_slab(s, c); |
---|
| 2499 | + if (c->page) |
---|
| 2500 | + flush_slab(s, c); |
---|
2379 | 2501 | |
---|
2380 | | - unfreeze_partials(s, c); |
---|
2381 | | - } |
---|
| 2502 | + unfreeze_partials(s, c); |
---|
2382 | 2503 | } |
---|
2383 | 2504 | |
---|
2384 | 2505 | static void flush_cpu_slab(void *d) |
---|
.. | .. |
---|
2398 | 2519 | |
---|
2399 | 2520 | static void flush_all(struct kmem_cache *s) |
---|
2400 | 2521 | { |
---|
2401 | | - LIST_HEAD(tofree); |
---|
2402 | | - int cpu; |
---|
2403 | | - |
---|
2404 | | - on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC); |
---|
2405 | | - for_each_online_cpu(cpu) { |
---|
2406 | | - struct slub_free_list *f; |
---|
2407 | | - |
---|
2408 | | - f = &per_cpu(slub_free_list, cpu); |
---|
2409 | | - raw_spin_lock_irq(&f->lock); |
---|
2410 | | - list_splice_init(&f->list, &tofree); |
---|
2411 | | - raw_spin_unlock_irq(&f->lock); |
---|
2412 | | - free_delayed(&tofree); |
---|
2413 | | - } |
---|
| 2522 | + on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1); |
---|
2414 | 2523 | } |
---|
2415 | 2524 | |
---|
2416 | 2525 | /* |
---|
.. | .. |
---|
2439 | 2548 | static inline int node_match(struct page *page, int node) |
---|
2440 | 2549 | { |
---|
2441 | 2550 | #ifdef CONFIG_NUMA |
---|
2442 | | - if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node)) |
---|
| 2551 | + if (node != NUMA_NO_NODE && page_to_nid(page) != node) |
---|
2443 | 2552 | return 0; |
---|
2444 | 2553 | #endif |
---|
2445 | 2554 | return 1; |
---|
.. | .. |
---|
2465 | 2574 | unsigned long x = 0; |
---|
2466 | 2575 | struct page *page; |
---|
2467 | 2576 | |
---|
2468 | | - raw_spin_lock_irqsave(&n->list_lock, flags); |
---|
2469 | | - list_for_each_entry(page, &n->partial, lru) |
---|
| 2577 | + spin_lock_irqsave(&n->list_lock, flags); |
---|
| 2578 | + list_for_each_entry(page, &n->partial, slab_list) |
---|
2470 | 2579 | x += get_count(page); |
---|
2471 | | - raw_spin_unlock_irqrestore(&n->list_lock, flags); |
---|
| 2580 | + spin_unlock_irqrestore(&n->list_lock, flags); |
---|
2472 | 2581 | return x; |
---|
2473 | 2582 | } |
---|
2474 | 2583 | #endif /* CONFIG_SLUB_DEBUG || CONFIG_SLUB_SYSFS */ |
---|
.. | .. |
---|
2540 | 2649 | stat(s, ALLOC_SLAB); |
---|
2541 | 2650 | c->page = page; |
---|
2542 | 2651 | *pc = c; |
---|
2543 | | - } else |
---|
2544 | | - freelist = NULL; |
---|
| 2652 | + } |
---|
2545 | 2653 | |
---|
2546 | 2654 | return freelist; |
---|
2547 | 2655 | } |
---|
.. | .. |
---|
2608 | 2716 | * already disabled (which is the case for bulk allocation). |
---|
2609 | 2717 | */ |
---|
2610 | 2718 | static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, |
---|
2611 | | - unsigned long addr, struct kmem_cache_cpu *c, |
---|
2612 | | - struct list_head *to_free) |
---|
| 2719 | + unsigned long addr, struct kmem_cache_cpu *c) |
---|
2613 | 2720 | { |
---|
2614 | | - struct slub_free_list *f; |
---|
2615 | 2721 | void *freelist; |
---|
2616 | 2722 | struct page *page; |
---|
| 2723 | + |
---|
| 2724 | + stat(s, ALLOC_SLOWPATH); |
---|
2617 | 2725 | |
---|
2618 | 2726 | page = c->page; |
---|
2619 | 2727 | if (!page) { |
---|
.. | .. |
---|
2662 | 2770 | |
---|
2663 | 2771 | if (!freelist) { |
---|
2664 | 2772 | c->page = NULL; |
---|
| 2773 | + c->tid = next_tid(c->tid); |
---|
2665 | 2774 | stat(s, DEACTIVATE_BYPASS); |
---|
2666 | 2775 | goto new_slab; |
---|
2667 | 2776 | } |
---|
.. | .. |
---|
2677 | 2786 | VM_BUG_ON(!c->page->frozen); |
---|
2678 | 2787 | c->freelist = get_freepointer(s, freelist); |
---|
2679 | 2788 | c->tid = next_tid(c->tid); |
---|
2680 | | - |
---|
2681 | | -out: |
---|
2682 | | - f = this_cpu_ptr(&slub_free_list); |
---|
2683 | | - raw_spin_lock(&f->lock); |
---|
2684 | | - list_splice_init(&f->list, to_free); |
---|
2685 | | - raw_spin_unlock(&f->lock); |
---|
2686 | | - |
---|
2687 | 2789 | return freelist; |
---|
2688 | 2790 | |
---|
2689 | 2791 | new_slab: |
---|
.. | .. |
---|
2699 | 2801 | |
---|
2700 | 2802 | if (unlikely(!freelist)) { |
---|
2701 | 2803 | slab_out_of_memory(s, gfpflags, node); |
---|
2702 | | - goto out; |
---|
| 2804 | + return NULL; |
---|
2703 | 2805 | } |
---|
2704 | 2806 | |
---|
2705 | 2807 | page = c->page; |
---|
.. | .. |
---|
2712 | 2814 | goto new_slab; /* Slab failed checks. Next slab needed */ |
---|
2713 | 2815 | |
---|
2714 | 2816 | deactivate_slab(s, page, get_freepointer(s, freelist), c); |
---|
2715 | | - goto out; |
---|
| 2817 | + return freelist; |
---|
2716 | 2818 | } |
---|
2717 | 2819 | |
---|
2718 | 2820 | /* |
---|
.. | .. |
---|
2724 | 2826 | { |
---|
2725 | 2827 | void *p; |
---|
2726 | 2828 | unsigned long flags; |
---|
2727 | | - LIST_HEAD(tofree); |
---|
2728 | 2829 | |
---|
2729 | 2830 | local_irq_save(flags); |
---|
2730 | | -#ifdef CONFIG_PREEMPT |
---|
| 2831 | +#ifdef CONFIG_PREEMPTION |
---|
2731 | 2832 | /* |
---|
2732 | 2833 | * We may have been preempted and rescheduled on a different |
---|
2733 | 2834 | * cpu before disabling interrupts. Need to reload cpu area |
---|
.. | .. |
---|
2736 | 2837 | c = this_cpu_ptr(s->cpu_slab); |
---|
2737 | 2838 | #endif |
---|
2738 | 2839 | |
---|
2739 | | - p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree); |
---|
| 2840 | + p = ___slab_alloc(s, gfpflags, node, addr, c); |
---|
2740 | 2841 | local_irq_restore(flags); |
---|
2741 | | - free_delayed(&tofree); |
---|
2742 | 2842 | return p; |
---|
2743 | 2843 | } |
---|
2744 | 2844 | |
---|
.. | .. |
---|
2750 | 2850 | void *obj) |
---|
2751 | 2851 | { |
---|
2752 | 2852 | if (unlikely(slab_want_init_on_free(s)) && obj) |
---|
2753 | | - memset((void *)((char *)obj + s->offset), 0, sizeof(void *)); |
---|
| 2853 | + memset((void *)((char *)kasan_reset_tag(obj) + s->offset), |
---|
| 2854 | + 0, sizeof(void *)); |
---|
2754 | 2855 | } |
---|
2755 | 2856 | |
---|
2756 | 2857 | /* |
---|
.. | .. |
---|
2764 | 2865 | * Otherwise we can simply pick the next object from the lockless free list. |
---|
2765 | 2866 | */ |
---|
2766 | 2867 | static __always_inline void *slab_alloc_node(struct kmem_cache *s, |
---|
2767 | | - gfp_t gfpflags, int node, unsigned long addr) |
---|
| 2868 | + gfp_t gfpflags, int node, unsigned long addr, size_t orig_size) |
---|
2768 | 2869 | { |
---|
2769 | 2870 | void *object; |
---|
2770 | 2871 | struct kmem_cache_cpu *c; |
---|
2771 | 2872 | struct page *page; |
---|
2772 | 2873 | unsigned long tid; |
---|
| 2874 | + struct obj_cgroup *objcg = NULL; |
---|
| 2875 | + bool init = false; |
---|
2773 | 2876 | |
---|
2774 | | - s = slab_pre_alloc_hook(s, gfpflags); |
---|
| 2877 | + s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags); |
---|
2775 | 2878 | if (!s) |
---|
2776 | 2879 | return NULL; |
---|
| 2880 | + |
---|
| 2881 | + object = kfence_alloc(s, orig_size, gfpflags); |
---|
| 2882 | + if (unlikely(object)) |
---|
| 2883 | + goto out; |
---|
| 2884 | + |
---|
2777 | 2885 | redo: |
---|
2778 | 2886 | /* |
---|
2779 | 2887 | * Must read kmem_cache cpu data via this cpu ptr. Preemption is |
---|
.. | .. |
---|
2782 | 2890 | * as we end up on the original cpu again when doing the cmpxchg. |
---|
2783 | 2891 | * |
---|
2784 | 2892 | * We should guarantee that tid and kmem_cache are retrieved on |
---|
2785 | | - * the same cpu. It could be different if CONFIG_PREEMPT so we need |
---|
| 2893 | + * the same cpu. It could be different if CONFIG_PREEMPTION so we need |
---|
2786 | 2894 | * to check if it is matched or not. |
---|
2787 | 2895 | */ |
---|
2788 | 2896 | do { |
---|
2789 | 2897 | tid = this_cpu_read(s->cpu_slab->tid); |
---|
2790 | 2898 | c = raw_cpu_ptr(s->cpu_slab); |
---|
2791 | | - } while (IS_ENABLED(CONFIG_PREEMPT) && |
---|
| 2899 | + } while (IS_ENABLED(CONFIG_PREEMPTION) && |
---|
2792 | 2900 | unlikely(tid != READ_ONCE(c->tid))); |
---|
2793 | 2901 | |
---|
2794 | 2902 | /* |
---|
.. | .. |
---|
2810 | 2918 | |
---|
2811 | 2919 | object = c->freelist; |
---|
2812 | 2920 | page = c->page; |
---|
2813 | | - if (unlikely(!object || !node_match(page, node))) { |
---|
| 2921 | + if (unlikely(!object || !page || !node_match(page, node))) { |
---|
2814 | 2922 | object = __slab_alloc(s, gfpflags, node, addr, c); |
---|
2815 | | - stat(s, ALLOC_SLOWPATH); |
---|
2816 | 2923 | } else { |
---|
2817 | 2924 | void *next_object = get_freepointer_safe(s, object); |
---|
2818 | 2925 | |
---|
.. | .. |
---|
2843 | 2950 | } |
---|
2844 | 2951 | |
---|
2845 | 2952 | maybe_wipe_obj_freeptr(s, object); |
---|
| 2953 | + init = slab_want_init_on_alloc(gfpflags, s); |
---|
2846 | 2954 | |
---|
2847 | | - if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) |
---|
2848 | | - memset(object, 0, s->object_size); |
---|
2849 | | - |
---|
2850 | | - slab_post_alloc_hook(s, gfpflags, 1, &object); |
---|
| 2955 | +out: |
---|
| 2956 | + slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init); |
---|
2851 | 2957 | |
---|
2852 | 2958 | return object; |
---|
2853 | 2959 | } |
---|
2854 | 2960 | |
---|
2855 | 2961 | static __always_inline void *slab_alloc(struct kmem_cache *s, |
---|
2856 | | - gfp_t gfpflags, unsigned long addr) |
---|
| 2962 | + gfp_t gfpflags, unsigned long addr, size_t orig_size) |
---|
2857 | 2963 | { |
---|
2858 | | - return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr); |
---|
| 2964 | + return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr, orig_size); |
---|
2859 | 2965 | } |
---|
2860 | 2966 | |
---|
2861 | 2967 | void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) |
---|
2862 | 2968 | { |
---|
2863 | | - void *ret = slab_alloc(s, gfpflags, _RET_IP_); |
---|
| 2969 | + void *ret = slab_alloc(s, gfpflags, _RET_IP_, s->object_size); |
---|
2864 | 2970 | |
---|
2865 | 2971 | trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, |
---|
2866 | 2972 | s->size, gfpflags); |
---|
.. | .. |
---|
2872 | 2978 | #ifdef CONFIG_TRACING |
---|
2873 | 2979 | void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) |
---|
2874 | 2980 | { |
---|
2875 | | - void *ret = slab_alloc(s, gfpflags, _RET_IP_); |
---|
| 2981 | + void *ret = slab_alloc(s, gfpflags, _RET_IP_, size); |
---|
2876 | 2982 | trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags); |
---|
2877 | 2983 | ret = kasan_kmalloc(s, ret, size, gfpflags); |
---|
2878 | 2984 | return ret; |
---|
.. | .. |
---|
2883 | 2989 | #ifdef CONFIG_NUMA |
---|
2884 | 2990 | void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) |
---|
2885 | 2991 | { |
---|
2886 | | - void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_); |
---|
| 2992 | + void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, s->object_size); |
---|
2887 | 2993 | |
---|
2888 | 2994 | trace_kmem_cache_alloc_node(_RET_IP_, ret, |
---|
2889 | 2995 | s->object_size, s->size, gfpflags, node); |
---|
.. | .. |
---|
2897 | 3003 | gfp_t gfpflags, |
---|
2898 | 3004 | int node, size_t size) |
---|
2899 | 3005 | { |
---|
2900 | | - void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_); |
---|
| 3006 | + void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, size); |
---|
2901 | 3007 | |
---|
2902 | 3008 | trace_kmalloc_node(_RET_IP_, ret, |
---|
2903 | 3009 | size, s->size, gfpflags, node); |
---|
.. | .. |
---|
2907 | 3013 | } |
---|
2908 | 3014 | EXPORT_SYMBOL(kmem_cache_alloc_node_trace); |
---|
2909 | 3015 | #endif |
---|
2910 | | -#endif |
---|
| 3016 | +#endif /* CONFIG_NUMA */ |
---|
2911 | 3017 | |
---|
2912 | 3018 | /* |
---|
2913 | 3019 | * Slow path handling. This may still be called frequently since objects |
---|
.. | .. |
---|
2927 | 3033 | struct page new; |
---|
2928 | 3034 | unsigned long counters; |
---|
2929 | 3035 | struct kmem_cache_node *n = NULL; |
---|
2930 | | - unsigned long uninitialized_var(flags); |
---|
| 3036 | + unsigned long flags; |
---|
2931 | 3037 | |
---|
2932 | 3038 | stat(s, FREE_SLOWPATH); |
---|
| 3039 | + |
---|
| 3040 | + if (kfence_free(head)) |
---|
| 3041 | + return; |
---|
2933 | 3042 | |
---|
2934 | 3043 | if (kmem_cache_debug(s) && |
---|
2935 | 3044 | !free_debug_processing(s, page, head, tail, cnt, addr)) |
---|
.. | .. |
---|
2937 | 3046 | |
---|
2938 | 3047 | do { |
---|
2939 | 3048 | if (unlikely(n)) { |
---|
2940 | | - raw_spin_unlock_irqrestore(&n->list_lock, flags); |
---|
| 3049 | + spin_unlock_irqrestore(&n->list_lock, flags); |
---|
2941 | 3050 | n = NULL; |
---|
2942 | 3051 | } |
---|
2943 | 3052 | prior = page->freelist; |
---|
.. | .. |
---|
2969 | 3078 | * Otherwise the list_lock will synchronize with |
---|
2970 | 3079 | * other processors updating the list of slabs. |
---|
2971 | 3080 | */ |
---|
2972 | | - raw_spin_lock_irqsave(&n->list_lock, flags); |
---|
| 3081 | + spin_lock_irqsave(&n->list_lock, flags); |
---|
2973 | 3082 | |
---|
2974 | 3083 | } |
---|
2975 | 3084 | } |
---|
.. | .. |
---|
2981 | 3090 | |
---|
2982 | 3091 | if (likely(!n)) { |
---|
2983 | 3092 | |
---|
2984 | | - /* |
---|
2985 | | - * If we just froze the page then put it onto the |
---|
2986 | | - * per cpu partial list. |
---|
2987 | | - */ |
---|
2988 | | - if (new.frozen && !was_frozen) { |
---|
| 3093 | + if (likely(was_frozen)) { |
---|
| 3094 | + /* |
---|
| 3095 | + * The list lock was not taken therefore no list |
---|
| 3096 | + * activity can be necessary. |
---|
| 3097 | + */ |
---|
| 3098 | + stat(s, FREE_FROZEN); |
---|
| 3099 | + } else if (new.frozen) { |
---|
| 3100 | + /* |
---|
| 3101 | + * If we just froze the page then put it onto the |
---|
| 3102 | + * per cpu partial list. |
---|
| 3103 | + */ |
---|
2989 | 3104 | put_cpu_partial(s, page, 1); |
---|
2990 | 3105 | stat(s, CPU_PARTIAL_FREE); |
---|
2991 | 3106 | } |
---|
2992 | | - /* |
---|
2993 | | - * The list lock was not taken therefore no list |
---|
2994 | | - * activity can be necessary. |
---|
2995 | | - */ |
---|
2996 | | - if (was_frozen) |
---|
2997 | | - stat(s, FREE_FROZEN); |
---|
| 3107 | + |
---|
2998 | 3108 | return; |
---|
2999 | 3109 | } |
---|
3000 | 3110 | |
---|
.. | .. |
---|
3006 | 3116 | * then add it. |
---|
3007 | 3117 | */ |
---|
3008 | 3118 | if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) { |
---|
3009 | | - if (kmem_cache_debug(s)) |
---|
3010 | | - remove_full(s, n, page); |
---|
| 3119 | + remove_full(s, n, page); |
---|
3011 | 3120 | add_partial(n, page, DEACTIVATE_TO_TAIL); |
---|
3012 | 3121 | stat(s, FREE_ADD_PARTIAL); |
---|
3013 | 3122 | } |
---|
3014 | | - raw_spin_unlock_irqrestore(&n->list_lock, flags); |
---|
| 3123 | + spin_unlock_irqrestore(&n->list_lock, flags); |
---|
3015 | 3124 | return; |
---|
3016 | 3125 | |
---|
3017 | 3126 | slab_empty: |
---|
.. | .. |
---|
3026 | 3135 | remove_full(s, n, page); |
---|
3027 | 3136 | } |
---|
3028 | 3137 | |
---|
3029 | | - raw_spin_unlock_irqrestore(&n->list_lock, flags); |
---|
| 3138 | + spin_unlock_irqrestore(&n->list_lock, flags); |
---|
3030 | 3139 | stat(s, FREE_SLAB); |
---|
3031 | 3140 | discard_slab(s, page); |
---|
3032 | 3141 | } |
---|
.. | .. |
---|
3053 | 3162 | void *tail_obj = tail ? : head; |
---|
3054 | 3163 | struct kmem_cache_cpu *c; |
---|
3055 | 3164 | unsigned long tid; |
---|
| 3165 | + |
---|
| 3166 | + /* memcg_slab_free_hook() is already called for bulk free. */ |
---|
| 3167 | + if (!tail) |
---|
| 3168 | + memcg_slab_free_hook(s, &head, 1); |
---|
3056 | 3169 | redo: |
---|
3057 | 3170 | /* |
---|
3058 | 3171 | * Determine the currently cpus per cpu slab. |
---|
.. | .. |
---|
3063 | 3176 | do { |
---|
3064 | 3177 | tid = this_cpu_read(s->cpu_slab->tid); |
---|
3065 | 3178 | c = raw_cpu_ptr(s->cpu_slab); |
---|
3066 | | - } while (IS_ENABLED(CONFIG_PREEMPT) && |
---|
| 3179 | + } while (IS_ENABLED(CONFIG_PREEMPTION) && |
---|
3067 | 3180 | unlikely(tid != READ_ONCE(c->tid))); |
---|
3068 | 3181 | |
---|
3069 | 3182 | /* Same with comment on barrier() in slab_alloc_node() */ |
---|
.. | .. |
---|
3173 | 3286 | df->s = cache_from_obj(s, object); /* Support for memcg */ |
---|
3174 | 3287 | } |
---|
3175 | 3288 | |
---|
| 3289 | + if (is_kfence_address(object)) { |
---|
| 3290 | + slab_free_hook(df->s, object, false); |
---|
| 3291 | + __kfence_free(object); |
---|
| 3292 | + p[size] = NULL; /* mark object processed */ |
---|
| 3293 | + return size; |
---|
| 3294 | + } |
---|
| 3295 | + |
---|
3176 | 3296 | /* Start new detached freelist */ |
---|
3177 | 3297 | df->page = page; |
---|
3178 | 3298 | set_freepointer(df->s, object, NULL); |
---|
.. | .. |
---|
3214 | 3334 | if (WARN_ON(!size)) |
---|
3215 | 3335 | return; |
---|
3216 | 3336 | |
---|
| 3337 | + memcg_slab_free_hook(s, p, size); |
---|
3217 | 3338 | do { |
---|
3218 | 3339 | struct detached_freelist df; |
---|
3219 | 3340 | |
---|
.. | .. |
---|
3231 | 3352 | void **p) |
---|
3232 | 3353 | { |
---|
3233 | 3354 | struct kmem_cache_cpu *c; |
---|
3234 | | - LIST_HEAD(to_free); |
---|
3235 | 3355 | int i; |
---|
| 3356 | + struct obj_cgroup *objcg = NULL; |
---|
3236 | 3357 | |
---|
3237 | 3358 | /* memcg and kmem_cache debug support */ |
---|
3238 | | - s = slab_pre_alloc_hook(s, flags); |
---|
| 3359 | + s = slab_pre_alloc_hook(s, &objcg, size, flags); |
---|
3239 | 3360 | if (unlikely(!s)) |
---|
3240 | 3361 | return false; |
---|
3241 | 3362 | /* |
---|
.. | .. |
---|
3247 | 3368 | c = this_cpu_ptr(s->cpu_slab); |
---|
3248 | 3369 | |
---|
3249 | 3370 | for (i = 0; i < size; i++) { |
---|
3250 | | - void *object = c->freelist; |
---|
| 3371 | + void *object = kfence_alloc(s, s->object_size, flags); |
---|
3251 | 3372 | |
---|
| 3373 | + if (unlikely(object)) { |
---|
| 3374 | + p[i] = object; |
---|
| 3375 | + continue; |
---|
| 3376 | + } |
---|
| 3377 | + |
---|
| 3378 | + object = c->freelist; |
---|
3252 | 3379 | if (unlikely(!object)) { |
---|
3253 | 3380 | /* |
---|
3254 | 3381 | * We may have removed an object from c->freelist using |
---|
.. | .. |
---|
3264 | 3391 | * of re-populating per CPU c->freelist |
---|
3265 | 3392 | */ |
---|
3266 | 3393 | p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, |
---|
3267 | | - _RET_IP_, c, &to_free); |
---|
| 3394 | + _RET_IP_, c); |
---|
3268 | 3395 | if (unlikely(!p[i])) |
---|
3269 | 3396 | goto error; |
---|
3270 | 3397 | |
---|
.. | .. |
---|
3279 | 3406 | } |
---|
3280 | 3407 | c->tid = next_tid(c->tid); |
---|
3281 | 3408 | local_irq_enable(); |
---|
3282 | | - free_delayed(&to_free); |
---|
3283 | 3409 | |
---|
3284 | | - /* Clear memory outside IRQ disabled fastpath loop */ |
---|
3285 | | - if (unlikely(slab_want_init_on_alloc(flags, s))) { |
---|
3286 | | - int j; |
---|
3287 | | - |
---|
3288 | | - for (j = 0; j < i; j++) |
---|
3289 | | - memset(p[j], 0, s->object_size); |
---|
3290 | | - } |
---|
3291 | | - |
---|
3292 | | - /* memcg and kmem_cache debug support */ |
---|
3293 | | - slab_post_alloc_hook(s, flags, size, p); |
---|
| 3410 | + /* |
---|
| 3411 | + * memcg and kmem_cache debug support and memory initialization. |
---|
| 3412 | + * Done outside of the IRQ disabled fastpath loop. |
---|
| 3413 | + */ |
---|
| 3414 | + slab_post_alloc_hook(s, objcg, flags, size, p, |
---|
| 3415 | + slab_want_init_on_alloc(flags, s)); |
---|
3294 | 3416 | return i; |
---|
3295 | 3417 | error: |
---|
3296 | 3418 | local_irq_enable(); |
---|
3297 | | - free_delayed(&to_free); |
---|
3298 | | - slab_post_alloc_hook(s, flags, i, p); |
---|
| 3419 | + slab_post_alloc_hook(s, objcg, flags, i, p, false); |
---|
3299 | 3420 | __kmem_cache_free_bulk(s, i, p); |
---|
3300 | 3421 | return 0; |
---|
3301 | 3422 | } |
---|
.. | .. |
---|
3430 | 3551 | init_kmem_cache_node(struct kmem_cache_node *n) |
---|
3431 | 3552 | { |
---|
3432 | 3553 | n->nr_partial = 0; |
---|
3433 | | - raw_spin_lock_init(&n->list_lock); |
---|
| 3554 | + spin_lock_init(&n->list_lock); |
---|
3434 | 3555 | INIT_LIST_HEAD(&n->partial); |
---|
3435 | 3556 | #ifdef CONFIG_SLUB_DEBUG |
---|
3436 | 3557 | atomic_long_set(&n->nr_slabs, 0); |
---|
.. | .. |
---|
3491 | 3612 | init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); |
---|
3492 | 3613 | init_tracking(kmem_cache_node, n); |
---|
3493 | 3614 | #endif |
---|
3494 | | - n = kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node), |
---|
3495 | | - GFP_KERNEL); |
---|
| 3615 | + n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false); |
---|
3496 | 3616 | page->freelist = get_freepointer(kmem_cache_node, n); |
---|
3497 | 3617 | page->inuse = 1; |
---|
3498 | 3618 | page->frozen = 0; |
---|
.. | .. |
---|
3580 | 3700 | * 50% to keep some capacity around for frees. |
---|
3581 | 3701 | */ |
---|
3582 | 3702 | if (!kmem_cache_has_cpu_partial(s)) |
---|
3583 | | - s->cpu_partial = 0; |
---|
| 3703 | + slub_set_cpu_partial(s, 0); |
---|
3584 | 3704 | else if (s->size >= PAGE_SIZE) |
---|
3585 | | - s->cpu_partial = 2; |
---|
| 3705 | + slub_set_cpu_partial(s, 2); |
---|
3586 | 3706 | else if (s->size >= 1024) |
---|
3587 | | - s->cpu_partial = 6; |
---|
| 3707 | + slub_set_cpu_partial(s, 6); |
---|
3588 | 3708 | else if (s->size >= 256) |
---|
3589 | | - s->cpu_partial = 13; |
---|
| 3709 | + slub_set_cpu_partial(s, 13); |
---|
3590 | 3710 | else |
---|
3591 | | - s->cpu_partial = 30; |
---|
| 3711 | + slub_set_cpu_partial(s, 30); |
---|
3592 | 3712 | #endif |
---|
3593 | 3713 | } |
---|
3594 | 3714 | |
---|
.. | .. |
---|
3633 | 3753 | |
---|
3634 | 3754 | /* |
---|
3635 | 3755 | * With that we have determined the number of bytes in actual use |
---|
3636 | | - * by the object. This is the potential offset to the free pointer. |
---|
| 3756 | + * by the object and redzoning. |
---|
3637 | 3757 | */ |
---|
3638 | 3758 | s->inuse = size; |
---|
3639 | 3759 | |
---|
3640 | | - if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || |
---|
3641 | | - s->ctor)) { |
---|
| 3760 | + if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || |
---|
| 3761 | + ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) || |
---|
| 3762 | + s->ctor) { |
---|
3642 | 3763 | /* |
---|
3643 | 3764 | * Relocate free pointer after the object if it is not |
---|
3644 | 3765 | * permitted to overwrite the first word of the object on |
---|
3645 | 3766 | * kmem_cache_free. |
---|
3646 | 3767 | * |
---|
3647 | 3768 | * This is the case if we do RCU, have a constructor or |
---|
3648 | | - * destructor or are poisoning the objects. |
---|
| 3769 | + * destructor, are poisoning the objects, or are |
---|
| 3770 | + * redzoning an object smaller than sizeof(void *). |
---|
| 3771 | + * |
---|
| 3772 | + * The assumption that s->offset >= s->inuse means free |
---|
| 3773 | + * pointer is outside of the object is used in the |
---|
| 3774 | + * freeptr_outside_object() function. If that is no |
---|
| 3775 | + * longer true, the function needs to be modified. |
---|
3649 | 3776 | */ |
---|
3650 | 3777 | s->offset = size; |
---|
3651 | 3778 | size += sizeof(void *); |
---|
| 3779 | + } else { |
---|
| 3780 | + /* |
---|
| 3781 | + * Store freelist pointer near middle of object to keep |
---|
| 3782 | + * it away from the edges of the object to avoid small |
---|
| 3783 | + * sized over/underflows from neighboring allocations. |
---|
| 3784 | + */ |
---|
| 3785 | + s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *)); |
---|
3652 | 3786 | } |
---|
3653 | 3787 | |
---|
3654 | 3788 | #ifdef CONFIG_SLUB_DEBUG |
---|
.. | .. |
---|
3685 | 3819 | */ |
---|
3686 | 3820 | size = ALIGN(size, s->align); |
---|
3687 | 3821 | s->size = size; |
---|
| 3822 | + s->reciprocal_size = reciprocal_value(size); |
---|
3688 | 3823 | if (forced_order >= 0) |
---|
3689 | 3824 | order = forced_order; |
---|
3690 | 3825 | else |
---|
.. | .. |
---|
3719 | 3854 | |
---|
3720 | 3855 | static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) |
---|
3721 | 3856 | { |
---|
3722 | | - s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor); |
---|
| 3857 | + s->flags = kmem_cache_flags(s->size, flags, s->name); |
---|
3723 | 3858 | #ifdef CONFIG_SLAB_FREELIST_HARDENED |
---|
3724 | 3859 | s->random = get_random_long(); |
---|
3725 | 3860 | #endif |
---|
.. | .. |
---|
3770 | 3905 | if (alloc_kmem_cache_cpus(s)) |
---|
3771 | 3906 | return 0; |
---|
3772 | 3907 | |
---|
3773 | | - free_kmem_cache_nodes(s); |
---|
3774 | 3908 | error: |
---|
3775 | | - if (flags & SLAB_PANIC) |
---|
3776 | | - panic("Cannot create slab %s size=%u realsize=%u order=%u offset=%u flags=%lx\n", |
---|
3777 | | - s->name, s->size, s->size, |
---|
3778 | | - oo_order(s->oo), s->offset, (unsigned long)flags); |
---|
| 3909 | + __kmem_cache_release(s); |
---|
3779 | 3910 | return -EINVAL; |
---|
3780 | 3911 | } |
---|
3781 | 3912 | |
---|
3782 | 3913 | static void list_slab_objects(struct kmem_cache *s, struct page *page, |
---|
3783 | | - const char *text) |
---|
| 3914 | + const char *text) |
---|
3784 | 3915 | { |
---|
3785 | 3916 | #ifdef CONFIG_SLUB_DEBUG |
---|
3786 | | -#ifdef CONFIG_PREEMPT_RT_BASE |
---|
3787 | | - /* XXX move out of irq-off section */ |
---|
3788 | | - slab_err(s, page, text, s->name); |
---|
3789 | | -#else |
---|
3790 | | - |
---|
3791 | 3917 | void *addr = page_address(page); |
---|
| 3918 | + unsigned long *map; |
---|
3792 | 3919 | void *p; |
---|
3793 | | - unsigned long *map = kcalloc(BITS_TO_LONGS(page->objects), |
---|
3794 | | - sizeof(long), |
---|
3795 | | - GFP_ATOMIC); |
---|
3796 | | - if (!map) |
---|
3797 | | - return; |
---|
| 3920 | + |
---|
3798 | 3921 | slab_err(s, page, text, s->name); |
---|
3799 | 3922 | slab_lock(page); |
---|
3800 | 3923 | |
---|
3801 | | - get_map(s, page, map); |
---|
| 3924 | + map = get_map(s, page); |
---|
3802 | 3925 | for_each_object(p, s, addr, page->objects) { |
---|
3803 | 3926 | |
---|
3804 | | - if (!test_bit(slab_index(p, s, addr), map)) { |
---|
| 3927 | + if (!test_bit(__obj_to_index(s, addr, p), map)) { |
---|
3805 | 3928 | pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr); |
---|
3806 | 3929 | print_tracking(s, p); |
---|
3807 | 3930 | } |
---|
3808 | 3931 | } |
---|
| 3932 | + put_map(map); |
---|
3809 | 3933 | slab_unlock(page); |
---|
3810 | | - kfree(map); |
---|
3811 | | -#endif |
---|
3812 | 3934 | #endif |
---|
3813 | 3935 | } |
---|
3814 | 3936 | |
---|
.. | .. |
---|
3823 | 3945 | struct page *page, *h; |
---|
3824 | 3946 | |
---|
3825 | 3947 | BUG_ON(irqs_disabled()); |
---|
3826 | | - raw_spin_lock_irq(&n->list_lock); |
---|
3827 | | - list_for_each_entry_safe(page, h, &n->partial, lru) { |
---|
| 3948 | + spin_lock_irq(&n->list_lock); |
---|
| 3949 | + list_for_each_entry_safe(page, h, &n->partial, slab_list) { |
---|
3828 | 3950 | if (!page->inuse) { |
---|
3829 | 3951 | remove_partial(n, page); |
---|
3830 | | - list_add(&page->lru, &discard); |
---|
| 3952 | + list_add(&page->slab_list, &discard); |
---|
3831 | 3953 | } else { |
---|
3832 | 3954 | list_slab_objects(s, page, |
---|
3833 | | - "Objects remaining in %s on __kmem_cache_shutdown()"); |
---|
| 3955 | + "Objects remaining in %s on __kmem_cache_shutdown()"); |
---|
3834 | 3956 | } |
---|
3835 | 3957 | } |
---|
3836 | | - raw_spin_unlock_irq(&n->list_lock); |
---|
| 3958 | + spin_unlock_irq(&n->list_lock); |
---|
3837 | 3959 | |
---|
3838 | | - list_for_each_entry_safe(page, h, &discard, lru) |
---|
| 3960 | + list_for_each_entry_safe(page, h, &discard, slab_list) |
---|
3839 | 3961 | discard_slab(s, page); |
---|
3840 | 3962 | } |
---|
3841 | 3963 | |
---|
.. | .. |
---|
3865 | 3987 | if (n->nr_partial || slabs_node(s, node)) |
---|
3866 | 3988 | return 1; |
---|
3867 | 3989 | } |
---|
3868 | | - sysfs_slab_remove(s); |
---|
3869 | 3990 | return 0; |
---|
3870 | 3991 | } |
---|
3871 | 3992 | |
---|
.. | .. |
---|
3914 | 4035 | if (unlikely(ZERO_OR_NULL_PTR(s))) |
---|
3915 | 4036 | return s; |
---|
3916 | 4037 | |
---|
3917 | | - ret = slab_alloc(s, flags, _RET_IP_); |
---|
| 4038 | + ret = slab_alloc(s, flags, _RET_IP_, size); |
---|
3918 | 4039 | |
---|
3919 | 4040 | trace_kmalloc(_RET_IP_, ret, size, s->size, flags); |
---|
3920 | 4041 | |
---|
.. | .. |
---|
3929 | 4050 | { |
---|
3930 | 4051 | struct page *page; |
---|
3931 | 4052 | void *ptr = NULL; |
---|
| 4053 | + unsigned int order = get_order(size); |
---|
3932 | 4054 | |
---|
3933 | 4055 | flags |= __GFP_COMP; |
---|
3934 | | - page = alloc_pages_node(node, flags, get_order(size)); |
---|
3935 | | - if (page) |
---|
| 4056 | + page = alloc_pages_node(node, flags, order); |
---|
| 4057 | + if (page) { |
---|
3936 | 4058 | ptr = page_address(page); |
---|
| 4059 | + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, |
---|
| 4060 | + PAGE_SIZE << order); |
---|
| 4061 | + } |
---|
3937 | 4062 | |
---|
3938 | 4063 | return kmalloc_large_node_hook(ptr, size, flags); |
---|
3939 | 4064 | } |
---|
.. | .. |
---|
3958 | 4083 | if (unlikely(ZERO_OR_NULL_PTR(s))) |
---|
3959 | 4084 | return s; |
---|
3960 | 4085 | |
---|
3961 | | - ret = slab_alloc_node(s, flags, node, _RET_IP_); |
---|
| 4086 | + ret = slab_alloc_node(s, flags, node, _RET_IP_, size); |
---|
3962 | 4087 | |
---|
3963 | 4088 | trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node); |
---|
3964 | 4089 | |
---|
.. | .. |
---|
3967 | 4092 | return ret; |
---|
3968 | 4093 | } |
---|
3969 | 4094 | EXPORT_SYMBOL(__kmalloc_node); |
---|
3970 | | -#endif |
---|
| 4095 | +#endif /* CONFIG_NUMA */ |
---|
3971 | 4096 | |
---|
3972 | 4097 | #ifdef CONFIG_HARDENED_USERCOPY |
---|
3973 | 4098 | /* |
---|
.. | .. |
---|
3984 | 4109 | struct kmem_cache *s; |
---|
3985 | 4110 | unsigned int offset; |
---|
3986 | 4111 | size_t object_size; |
---|
| 4112 | + bool is_kfence = is_kfence_address(ptr); |
---|
3987 | 4113 | |
---|
3988 | 4114 | ptr = kasan_reset_tag(ptr); |
---|
3989 | 4115 | |
---|
.. | .. |
---|
3996 | 4122 | to_user, 0, n); |
---|
3997 | 4123 | |
---|
3998 | 4124 | /* Find offset within object. */ |
---|
3999 | | - offset = (ptr - page_address(page)) % s->size; |
---|
| 4125 | + if (is_kfence) |
---|
| 4126 | + offset = ptr - kfence_object_start(ptr); |
---|
| 4127 | + else |
---|
| 4128 | + offset = (ptr - page_address(page)) % s->size; |
---|
4000 | 4129 | |
---|
4001 | 4130 | /* Adjust for redzone and reject if within the redzone. */ |
---|
4002 | | - if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) { |
---|
| 4131 | + if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) { |
---|
4003 | 4132 | if (offset < s->red_left_pad) |
---|
4004 | 4133 | usercopy_abort("SLUB object in left red zone", |
---|
4005 | 4134 | s->name, to_user, offset, n); |
---|
.. | .. |
---|
4029 | 4158 | } |
---|
4030 | 4159 | #endif /* CONFIG_HARDENED_USERCOPY */ |
---|
4031 | 4160 | |
---|
4032 | | -static size_t __ksize(const void *object) |
---|
| 4161 | +size_t __ksize(const void *object) |
---|
4033 | 4162 | { |
---|
4034 | 4163 | struct page *page; |
---|
4035 | 4164 | |
---|
.. | .. |
---|
4040 | 4169 | |
---|
4041 | 4170 | if (unlikely(!PageSlab(page))) { |
---|
4042 | 4171 | WARN_ON(!PageCompound(page)); |
---|
4043 | | - return PAGE_SIZE << compound_order(page); |
---|
| 4172 | + return page_size(page); |
---|
4044 | 4173 | } |
---|
4045 | 4174 | |
---|
4046 | 4175 | return slab_ksize(page->slab_cache); |
---|
4047 | 4176 | } |
---|
4048 | | - |
---|
4049 | | -size_t ksize(const void *object) |
---|
4050 | | -{ |
---|
4051 | | - size_t size = __ksize(object); |
---|
4052 | | - /* We assume that ksize callers could use whole allocated area, |
---|
4053 | | - * so we need to unpoison this area. |
---|
4054 | | - */ |
---|
4055 | | - kasan_unpoison_shadow(object, size); |
---|
4056 | | - return size; |
---|
4057 | | -} |
---|
4058 | | -EXPORT_SYMBOL(ksize); |
---|
| 4177 | +EXPORT_SYMBOL(__ksize); |
---|
4059 | 4178 | |
---|
4060 | 4179 | void kfree(const void *x) |
---|
4061 | 4180 | { |
---|
.. | .. |
---|
4069 | 4188 | |
---|
4070 | 4189 | page = virt_to_head_page(x); |
---|
4071 | 4190 | if (unlikely(!PageSlab(page))) { |
---|
| 4191 | + unsigned int order = compound_order(page); |
---|
| 4192 | + |
---|
4072 | 4193 | BUG_ON(!PageCompound(page)); |
---|
4073 | 4194 | kfree_hook(object); |
---|
4074 | | - __free_pages(page, compound_order(page)); |
---|
| 4195 | + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, |
---|
| 4196 | + -(PAGE_SIZE << order)); |
---|
| 4197 | + __free_pages(page, order); |
---|
4075 | 4198 | return; |
---|
4076 | 4199 | } |
---|
4077 | 4200 | slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_); |
---|
.. | .. |
---|
4107 | 4230 | for (i = 0; i < SHRINK_PROMOTE_MAX; i++) |
---|
4108 | 4231 | INIT_LIST_HEAD(promote + i); |
---|
4109 | 4232 | |
---|
4110 | | - raw_spin_lock_irqsave(&n->list_lock, flags); |
---|
| 4233 | + spin_lock_irqsave(&n->list_lock, flags); |
---|
4111 | 4234 | |
---|
4112 | 4235 | /* |
---|
4113 | 4236 | * Build lists of slabs to discard or promote. |
---|
.. | .. |
---|
4115 | 4238 | * Note that concurrent frees may occur while we hold the |
---|
4116 | 4239 | * list_lock. page->inuse here is the upper limit. |
---|
4117 | 4240 | */ |
---|
4118 | | - list_for_each_entry_safe(page, t, &n->partial, lru) { |
---|
| 4241 | + list_for_each_entry_safe(page, t, &n->partial, slab_list) { |
---|
4119 | 4242 | int free = page->objects - page->inuse; |
---|
4120 | 4243 | |
---|
4121 | 4244 | /* Do not reread page->inuse */ |
---|
.. | .. |
---|
4125 | 4248 | BUG_ON(free <= 0); |
---|
4126 | 4249 | |
---|
4127 | 4250 | if (free == page->objects) { |
---|
4128 | | - list_move(&page->lru, &discard); |
---|
| 4251 | + list_move(&page->slab_list, &discard); |
---|
4129 | 4252 | n->nr_partial--; |
---|
4130 | 4253 | } else if (free <= SHRINK_PROMOTE_MAX) |
---|
4131 | | - list_move(&page->lru, promote + free - 1); |
---|
| 4254 | + list_move(&page->slab_list, promote + free - 1); |
---|
4132 | 4255 | } |
---|
4133 | 4256 | |
---|
4134 | 4257 | /* |
---|
.. | .. |
---|
4138 | 4261 | for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--) |
---|
4139 | 4262 | list_splice(promote + i, &n->partial); |
---|
4140 | 4263 | |
---|
4141 | | - raw_spin_unlock_irqrestore(&n->list_lock, flags); |
---|
| 4264 | + spin_unlock_irqrestore(&n->list_lock, flags); |
---|
4142 | 4265 | |
---|
4143 | 4266 | /* Release empty slabs */ |
---|
4144 | | - list_for_each_entry_safe(page, t, &discard, lru) |
---|
| 4267 | + list_for_each_entry_safe(page, t, &discard, slab_list) |
---|
4145 | 4268 | discard_slab(s, page); |
---|
4146 | 4269 | |
---|
4147 | 4270 | if (slabs_node(s, node)) |
---|
.. | .. |
---|
4150 | 4273 | |
---|
4151 | 4274 | return ret; |
---|
4152 | 4275 | } |
---|
4153 | | - |
---|
4154 | | -#ifdef CONFIG_MEMCG |
---|
4155 | | -static void kmemcg_cache_deact_after_rcu(struct kmem_cache *s) |
---|
4156 | | -{ |
---|
4157 | | - /* |
---|
4158 | | - * Called with all the locks held after a sched RCU grace period. |
---|
4159 | | - * Even if @s becomes empty after shrinking, we can't know that @s |
---|
4160 | | - * doesn't have allocations already in-flight and thus can't |
---|
4161 | | - * destroy @s until the associated memcg is released. |
---|
4162 | | - * |
---|
4163 | | - * However, let's remove the sysfs files for empty caches here. |
---|
4164 | | - * Each cache has a lot of interface files which aren't |
---|
4165 | | - * particularly useful for empty draining caches; otherwise, we can |
---|
4166 | | - * easily end up with millions of unnecessary sysfs files on |
---|
4167 | | - * systems which have a lot of memory and transient cgroups. |
---|
4168 | | - */ |
---|
4169 | | - if (!__kmem_cache_shrink(s)) |
---|
4170 | | - sysfs_slab_remove(s); |
---|
4171 | | -} |
---|
4172 | | - |
---|
4173 | | -void __kmemcg_cache_deactivate(struct kmem_cache *s) |
---|
4174 | | -{ |
---|
4175 | | - /* |
---|
4176 | | - * Disable empty slabs caching. Used to avoid pinning offline |
---|
4177 | | - * memory cgroups by kmem pages that can be freed. |
---|
4178 | | - */ |
---|
4179 | | - slub_set_cpu_partial(s, 0); |
---|
4180 | | - s->min_partial = 0; |
---|
4181 | | - |
---|
4182 | | - /* |
---|
4183 | | - * s->cpu_partial is checked locklessly (see put_cpu_partial), so |
---|
4184 | | - * we have to make sure the change is visible before shrinking. |
---|
4185 | | - */ |
---|
4186 | | - slab_deactivate_memcg_cache_rcu_sched(s, kmemcg_cache_deact_after_rcu); |
---|
4187 | | -} |
---|
4188 | | -#endif |
---|
4189 | 4276 | |
---|
4190 | 4277 | static int slab_mem_going_offline_callback(void *arg) |
---|
4191 | 4278 | { |
---|
.. | .. |
---|
4333 | 4420 | for_each_kmem_cache_node(s, node, n) { |
---|
4334 | 4421 | struct page *p; |
---|
4335 | 4422 | |
---|
4336 | | - list_for_each_entry(p, &n->partial, lru) |
---|
| 4423 | + list_for_each_entry(p, &n->partial, slab_list) |
---|
4337 | 4424 | p->slab_cache = s; |
---|
4338 | 4425 | |
---|
4339 | 4426 | #ifdef CONFIG_SLUB_DEBUG |
---|
4340 | | - list_for_each_entry(p, &n->full, lru) |
---|
| 4427 | + list_for_each_entry(p, &n->full, slab_list) |
---|
4341 | 4428 | p->slab_cache = s; |
---|
4342 | 4429 | #endif |
---|
4343 | 4430 | } |
---|
4344 | | - slab_init_memcg_params(s); |
---|
4345 | 4431 | list_add(&s->list, &slab_caches); |
---|
4346 | | - memcg_link_cache(s); |
---|
4347 | 4432 | return s; |
---|
4348 | 4433 | } |
---|
4349 | 4434 | |
---|
.. | .. |
---|
4351 | 4436 | { |
---|
4352 | 4437 | static __initdata struct kmem_cache boot_kmem_cache, |
---|
4353 | 4438 | boot_kmem_cache_node; |
---|
4354 | | - int cpu; |
---|
4355 | | - |
---|
4356 | | - for_each_possible_cpu(cpu) { |
---|
4357 | | - raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock); |
---|
4358 | | - INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list); |
---|
4359 | | - } |
---|
4360 | 4439 | |
---|
4361 | 4440 | if (debug_guardpage_minorder()) |
---|
4362 | 4441 | slub_max_order = 0; |
---|
.. | .. |
---|
4390 | 4469 | cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL, |
---|
4391 | 4470 | slub_cpu_dead); |
---|
4392 | 4471 | |
---|
4393 | | - pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%d\n", |
---|
| 4472 | + pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n", |
---|
4394 | 4473 | cache_line_size(), |
---|
4395 | 4474 | slub_min_order, slub_max_order, slub_min_objects, |
---|
4396 | 4475 | nr_cpu_ids, nr_node_ids); |
---|
.. | .. |
---|
4404 | 4483 | __kmem_cache_alias(const char *name, unsigned int size, unsigned int align, |
---|
4405 | 4484 | slab_flags_t flags, void (*ctor)(void *)) |
---|
4406 | 4485 | { |
---|
4407 | | - struct kmem_cache *s, *c; |
---|
| 4486 | + struct kmem_cache *s; |
---|
4408 | 4487 | |
---|
4409 | 4488 | s = find_mergeable(size, align, flags, name, ctor); |
---|
4410 | 4489 | if (s) { |
---|
.. | .. |
---|
4416 | 4495 | */ |
---|
4417 | 4496 | s->object_size = max(s->object_size, size); |
---|
4418 | 4497 | s->inuse = max(s->inuse, ALIGN(size, sizeof(void *))); |
---|
4419 | | - |
---|
4420 | | - for_each_memcg_cache(c, s) { |
---|
4421 | | - c->object_size = s->object_size; |
---|
4422 | | - c->inuse = max(c->inuse, ALIGN(size, sizeof(void *))); |
---|
4423 | | - } |
---|
4424 | 4498 | |
---|
4425 | 4499 | if (sysfs_slab_alias(s, name)) { |
---|
4426 | 4500 | s->refcount--; |
---|
.. | .. |
---|
4443 | 4517 | if (slab_state <= UP) |
---|
4444 | 4518 | return 0; |
---|
4445 | 4519 | |
---|
4446 | | - memcg_propagate_slab_attrs(s); |
---|
4447 | 4520 | err = sysfs_slab_add(s); |
---|
4448 | | - if (err) |
---|
| 4521 | + if (err) { |
---|
4449 | 4522 | __kmem_cache_release(s); |
---|
| 4523 | + return err; |
---|
| 4524 | + } |
---|
4450 | 4525 | |
---|
4451 | | - return err; |
---|
| 4526 | + if (s->flags & SLAB_STORE_USER) |
---|
| 4527 | + debugfs_slab_add(s); |
---|
| 4528 | + |
---|
| 4529 | + return 0; |
---|
4452 | 4530 | } |
---|
4453 | 4531 | |
---|
4454 | 4532 | void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) |
---|
.. | .. |
---|
4464 | 4542 | if (unlikely(ZERO_OR_NULL_PTR(s))) |
---|
4465 | 4543 | return s; |
---|
4466 | 4544 | |
---|
4467 | | - ret = slab_alloc(s, gfpflags, caller); |
---|
| 4545 | + ret = slab_alloc(s, gfpflags, caller, size); |
---|
4468 | 4546 | |
---|
4469 | 4547 | /* Honor the call site pointer we received. */ |
---|
4470 | 4548 | trace_kmalloc(caller, ret, size, s->size, gfpflags); |
---|
.. | .. |
---|
4495 | 4573 | if (unlikely(ZERO_OR_NULL_PTR(s))) |
---|
4496 | 4574 | return s; |
---|
4497 | 4575 | |
---|
4498 | | - ret = slab_alloc_node(s, gfpflags, node, caller); |
---|
| 4576 | + ret = slab_alloc_node(s, gfpflags, node, caller, size); |
---|
4499 | 4577 | |
---|
4500 | 4578 | /* Honor the call site pointer we received. */ |
---|
4501 | 4579 | trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); |
---|
.. | .. |
---|
4518 | 4596 | #endif |
---|
4519 | 4597 | |
---|
4520 | 4598 | #ifdef CONFIG_SLUB_DEBUG |
---|
4521 | | -static int validate_slab(struct kmem_cache *s, struct page *page, |
---|
4522 | | - unsigned long *map) |
---|
| 4599 | +static void validate_slab(struct kmem_cache *s, struct page *page) |
---|
4523 | 4600 | { |
---|
4524 | 4601 | void *p; |
---|
4525 | 4602 | void *addr = page_address(page); |
---|
| 4603 | + unsigned long *map; |
---|
4526 | 4604 | |
---|
4527 | | - if (!check_slab(s, page) || |
---|
4528 | | - !on_freelist(s, page, NULL)) |
---|
4529 | | - return 0; |
---|
| 4605 | + slab_lock(page); |
---|
| 4606 | + |
---|
| 4607 | + if (!check_slab(s, page) || !on_freelist(s, page, NULL)) |
---|
| 4608 | + goto unlock; |
---|
4530 | 4609 | |
---|
4531 | 4610 | /* Now we know that a valid freelist exists */ |
---|
4532 | | - bitmap_zero(map, page->objects); |
---|
4533 | | - |
---|
4534 | | - get_map(s, page, map); |
---|
| 4611 | + map = get_map(s, page); |
---|
4535 | 4612 | for_each_object(p, s, addr, page->objects) { |
---|
4536 | | - if (test_bit(slab_index(p, s, addr), map)) |
---|
4537 | | - if (!check_object(s, page, p, SLUB_RED_INACTIVE)) |
---|
4538 | | - return 0; |
---|
| 4613 | + u8 val = test_bit(__obj_to_index(s, addr, p), map) ? |
---|
| 4614 | + SLUB_RED_INACTIVE : SLUB_RED_ACTIVE; |
---|
| 4615 | + |
---|
| 4616 | + if (!check_object(s, page, p, val)) |
---|
| 4617 | + break; |
---|
4539 | 4618 | } |
---|
4540 | | - |
---|
4541 | | - for_each_object(p, s, addr, page->objects) |
---|
4542 | | - if (!test_bit(slab_index(p, s, addr), map)) |
---|
4543 | | - if (!check_object(s, page, p, SLUB_RED_ACTIVE)) |
---|
4544 | | - return 0; |
---|
4545 | | - return 1; |
---|
4546 | | -} |
---|
4547 | | - |
---|
4548 | | -static void validate_slab_slab(struct kmem_cache *s, struct page *page, |
---|
4549 | | - unsigned long *map) |
---|
4550 | | -{ |
---|
4551 | | - slab_lock(page); |
---|
4552 | | - validate_slab(s, page, map); |
---|
| 4619 | + put_map(map); |
---|
| 4620 | +unlock: |
---|
4553 | 4621 | slab_unlock(page); |
---|
4554 | 4622 | } |
---|
4555 | 4623 | |
---|
4556 | 4624 | static int validate_slab_node(struct kmem_cache *s, |
---|
4557 | | - struct kmem_cache_node *n, unsigned long *map) |
---|
| 4625 | + struct kmem_cache_node *n) |
---|
4558 | 4626 | { |
---|
4559 | 4627 | unsigned long count = 0; |
---|
4560 | 4628 | struct page *page; |
---|
4561 | 4629 | unsigned long flags; |
---|
4562 | 4630 | |
---|
4563 | | - raw_spin_lock_irqsave(&n->list_lock, flags); |
---|
| 4631 | + spin_lock_irqsave(&n->list_lock, flags); |
---|
4564 | 4632 | |
---|
4565 | | - list_for_each_entry(page, &n->partial, lru) { |
---|
4566 | | - validate_slab_slab(s, page, map); |
---|
| 4633 | + list_for_each_entry(page, &n->partial, slab_list) { |
---|
| 4634 | + validate_slab(s, page); |
---|
4567 | 4635 | count++; |
---|
4568 | 4636 | } |
---|
4569 | 4637 | if (count != n->nr_partial) |
---|
.. | .. |
---|
4573 | 4641 | if (!(s->flags & SLAB_STORE_USER)) |
---|
4574 | 4642 | goto out; |
---|
4575 | 4643 | |
---|
4576 | | - list_for_each_entry(page, &n->full, lru) { |
---|
4577 | | - validate_slab_slab(s, page, map); |
---|
| 4644 | + list_for_each_entry(page, &n->full, slab_list) { |
---|
| 4645 | + validate_slab(s, page); |
---|
4578 | 4646 | count++; |
---|
4579 | 4647 | } |
---|
4580 | 4648 | if (count != atomic_long_read(&n->nr_slabs)) |
---|
.. | .. |
---|
4582 | 4650 | s->name, count, atomic_long_read(&n->nr_slabs)); |
---|
4583 | 4651 | |
---|
4584 | 4652 | out: |
---|
4585 | | - raw_spin_unlock_irqrestore(&n->list_lock, flags); |
---|
| 4653 | + spin_unlock_irqrestore(&n->list_lock, flags); |
---|
4586 | 4654 | return count; |
---|
4587 | 4655 | } |
---|
4588 | 4656 | |
---|
.. | .. |
---|
4590 | 4658 | { |
---|
4591 | 4659 | int node; |
---|
4592 | 4660 | unsigned long count = 0; |
---|
4593 | | - unsigned long *map = kmalloc_array(BITS_TO_LONGS(oo_objects(s->max)), |
---|
4594 | | - sizeof(unsigned long), |
---|
4595 | | - GFP_KERNEL); |
---|
4596 | 4661 | struct kmem_cache_node *n; |
---|
4597 | | - |
---|
4598 | | - if (!map) |
---|
4599 | | - return -ENOMEM; |
---|
4600 | 4662 | |
---|
4601 | 4663 | flush_all(s); |
---|
4602 | 4664 | for_each_kmem_cache_node(s, node, n) |
---|
4603 | | - count += validate_slab_node(s, n, map); |
---|
4604 | | - kfree(map); |
---|
| 4665 | + count += validate_slab_node(s, n); |
---|
| 4666 | + |
---|
4605 | 4667 | return count; |
---|
4606 | 4668 | } |
---|
| 4669 | + |
---|
| 4670 | +#ifdef CONFIG_DEBUG_FS |
---|
4607 | 4671 | /* |
---|
4608 | 4672 | * Generate lists of code addresses where slabcache objects are allocated |
---|
4609 | 4673 | * and freed. |
---|
.. | .. |
---|
4625 | 4689 | unsigned long max; |
---|
4626 | 4690 | unsigned long count; |
---|
4627 | 4691 | struct location *loc; |
---|
| 4692 | + loff_t idx; |
---|
4628 | 4693 | }; |
---|
| 4694 | + |
---|
| 4695 | +static struct dentry *slab_debugfs_root; |
---|
4629 | 4696 | |
---|
4630 | 4697 | static void free_loc_track(struct loc_track *t) |
---|
4631 | 4698 | { |
---|
.. | .. |
---|
4638 | 4705 | { |
---|
4639 | 4706 | struct location *l; |
---|
4640 | 4707 | int order; |
---|
4641 | | - |
---|
4642 | | - if (IS_ENABLED(CONFIG_PREEMPT_RT) && flags == GFP_ATOMIC) |
---|
4643 | | - return 0; |
---|
4644 | 4708 | |
---|
4645 | 4709 | order = get_order(sizeof(struct location) * max); |
---|
4646 | 4710 | |
---|
.. | .. |
---|
4735 | 4799 | |
---|
4736 | 4800 | static void process_slab(struct loc_track *t, struct kmem_cache *s, |
---|
4737 | 4801 | struct page *page, enum track_item alloc, |
---|
4738 | | - unsigned long *map) |
---|
| 4802 | + unsigned long *obj_map) |
---|
4739 | 4803 | { |
---|
4740 | 4804 | void *addr = page_address(page); |
---|
4741 | 4805 | void *p; |
---|
4742 | 4806 | |
---|
4743 | | - bitmap_zero(map, page->objects); |
---|
4744 | | - get_map(s, page, map); |
---|
| 4807 | + __fill_map(obj_map, s, page); |
---|
4745 | 4808 | |
---|
4746 | 4809 | for_each_object(p, s, addr, page->objects) |
---|
4747 | | - if (!test_bit(slab_index(p, s, addr), map)) |
---|
| 4810 | + if (!test_bit(__obj_to_index(s, addr, p), obj_map)) |
---|
4748 | 4811 | add_location(t, s, get_track(s, p, alloc)); |
---|
4749 | 4812 | } |
---|
4750 | | - |
---|
4751 | | -static int list_locations(struct kmem_cache *s, char *buf, |
---|
4752 | | - enum track_item alloc) |
---|
4753 | | -{ |
---|
4754 | | - int len = 0; |
---|
4755 | | - unsigned long i; |
---|
4756 | | - struct loc_track t = { 0, 0, NULL }; |
---|
4757 | | - int node; |
---|
4758 | | - unsigned long *map = kmalloc_array(BITS_TO_LONGS(oo_objects(s->max)), |
---|
4759 | | - sizeof(unsigned long), |
---|
4760 | | - GFP_KERNEL); |
---|
4761 | | - struct kmem_cache_node *n; |
---|
4762 | | - |
---|
4763 | | - if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), |
---|
4764 | | - GFP_KERNEL)) { |
---|
4765 | | - kfree(map); |
---|
4766 | | - return sprintf(buf, "Out of memory\n"); |
---|
4767 | | - } |
---|
4768 | | - /* Push back cpu slabs */ |
---|
4769 | | - flush_all(s); |
---|
4770 | | - |
---|
4771 | | - for_each_kmem_cache_node(s, node, n) { |
---|
4772 | | - unsigned long flags; |
---|
4773 | | - struct page *page; |
---|
4774 | | - |
---|
4775 | | - if (!atomic_long_read(&n->nr_slabs)) |
---|
4776 | | - continue; |
---|
4777 | | - |
---|
4778 | | - raw_spin_lock_irqsave(&n->list_lock, flags); |
---|
4779 | | - list_for_each_entry(page, &n->partial, lru) |
---|
4780 | | - process_slab(&t, s, page, alloc, map); |
---|
4781 | | - list_for_each_entry(page, &n->full, lru) |
---|
4782 | | - process_slab(&t, s, page, alloc, map); |
---|
4783 | | - raw_spin_unlock_irqrestore(&n->list_lock, flags); |
---|
4784 | | - } |
---|
4785 | | - |
---|
4786 | | - for (i = 0; i < t.count; i++) { |
---|
4787 | | - struct location *l = &t.loc[i]; |
---|
4788 | | - |
---|
4789 | | - if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100) |
---|
4790 | | - break; |
---|
4791 | | - len += sprintf(buf + len, "%7ld ", l->count); |
---|
4792 | | - |
---|
4793 | | - if (l->addr) |
---|
4794 | | - len += sprintf(buf + len, "%pS", (void *)l->addr); |
---|
4795 | | - else |
---|
4796 | | - len += sprintf(buf + len, "<not-available>"); |
---|
4797 | | - |
---|
4798 | | - if (l->sum_time != l->min_time) { |
---|
4799 | | - len += sprintf(buf + len, " age=%ld/%ld/%ld", |
---|
4800 | | - l->min_time, |
---|
4801 | | - (long)div_u64(l->sum_time, l->count), |
---|
4802 | | - l->max_time); |
---|
4803 | | - } else |
---|
4804 | | - len += sprintf(buf + len, " age=%ld", |
---|
4805 | | - l->min_time); |
---|
4806 | | - |
---|
4807 | | - if (l->min_pid != l->max_pid) |
---|
4808 | | - len += sprintf(buf + len, " pid=%ld-%ld", |
---|
4809 | | - l->min_pid, l->max_pid); |
---|
4810 | | - else |
---|
4811 | | - len += sprintf(buf + len, " pid=%ld", |
---|
4812 | | - l->min_pid); |
---|
4813 | | - |
---|
4814 | | - if (num_online_cpus() > 1 && |
---|
4815 | | - !cpumask_empty(to_cpumask(l->cpus)) && |
---|
4816 | | - len < PAGE_SIZE - 60) |
---|
4817 | | - len += scnprintf(buf + len, PAGE_SIZE - len - 50, |
---|
4818 | | - " cpus=%*pbl", |
---|
4819 | | - cpumask_pr_args(to_cpumask(l->cpus))); |
---|
4820 | | - |
---|
4821 | | - if (nr_online_nodes > 1 && !nodes_empty(l->nodes) && |
---|
4822 | | - len < PAGE_SIZE - 60) |
---|
4823 | | - len += scnprintf(buf + len, PAGE_SIZE - len - 50, |
---|
4824 | | - " nodes=%*pbl", |
---|
4825 | | - nodemask_pr_args(&l->nodes)); |
---|
4826 | | - |
---|
4827 | | - len += sprintf(buf + len, "\n"); |
---|
4828 | | - } |
---|
4829 | | - |
---|
4830 | | - free_loc_track(&t); |
---|
4831 | | - kfree(map); |
---|
4832 | | - if (!t.count) |
---|
4833 | | - len += sprintf(buf, "No data\n"); |
---|
4834 | | - return len; |
---|
4835 | | -} |
---|
4836 | | -#endif |
---|
| 4813 | +#endif /* CONFIG_DEBUG_FS */ |
---|
| 4814 | +#endif /* CONFIG_SLUB_DEBUG */ |
---|
4837 | 4815 | |
---|
4838 | 4816 | #ifdef SLUB_RESILIENCY_TEST |
---|
4839 | 4817 | static void __init resiliency_test(void) |
---|
.. | .. |
---|
4893 | 4871 | #ifdef CONFIG_SLUB_SYSFS |
---|
4894 | 4872 | static void resiliency_test(void) {}; |
---|
4895 | 4873 | #endif |
---|
4896 | | -#endif |
---|
| 4874 | +#endif /* SLUB_RESILIENCY_TEST */ |
---|
4897 | 4875 | |
---|
4898 | 4876 | #ifdef CONFIG_SLUB_SYSFS |
---|
4899 | 4877 | enum slab_stat_type { |
---|
.. | .. |
---|
5032 | 5010 | return x + sprintf(buf + x, "\n"); |
---|
5033 | 5011 | } |
---|
5034 | 5012 | |
---|
5035 | | -#ifdef CONFIG_SLUB_DEBUG |
---|
5036 | | -static int any_slab_objects(struct kmem_cache *s) |
---|
5037 | | -{ |
---|
5038 | | - int node; |
---|
5039 | | - struct kmem_cache_node *n; |
---|
5040 | | - |
---|
5041 | | - for_each_kmem_cache_node(s, node, n) |
---|
5042 | | - if (atomic_long_read(&n->total_objects)) |
---|
5043 | | - return 1; |
---|
5044 | | - |
---|
5045 | | - return 0; |
---|
5046 | | -} |
---|
5047 | | -#endif |
---|
5048 | | - |
---|
5049 | 5013 | #define to_slab_attr(n) container_of(n, struct slab_attribute, attr) |
---|
5050 | 5014 | #define to_slab(n) container_of(n, struct kmem_cache, kobj) |
---|
5051 | 5015 | |
---|
.. | .. |
---|
5087 | 5051 | } |
---|
5088 | 5052 | SLAB_ATTR_RO(objs_per_slab); |
---|
5089 | 5053 | |
---|
5090 | | -static ssize_t order_store(struct kmem_cache *s, |
---|
5091 | | - const char *buf, size_t length) |
---|
5092 | | -{ |
---|
5093 | | - unsigned int order; |
---|
5094 | | - int err; |
---|
5095 | | - |
---|
5096 | | - err = kstrtouint(buf, 10, &order); |
---|
5097 | | - if (err) |
---|
5098 | | - return err; |
---|
5099 | | - |
---|
5100 | | - if (order > slub_max_order || order < slub_min_order) |
---|
5101 | | - return -EINVAL; |
---|
5102 | | - |
---|
5103 | | - calculate_sizes(s, order); |
---|
5104 | | - return length; |
---|
5105 | | -} |
---|
5106 | | - |
---|
5107 | 5054 | static ssize_t order_show(struct kmem_cache *s, char *buf) |
---|
5108 | 5055 | { |
---|
5109 | 5056 | return sprintf(buf, "%u\n", oo_order(s->oo)); |
---|
5110 | 5057 | } |
---|
5111 | | -SLAB_ATTR(order); |
---|
| 5058 | +SLAB_ATTR_RO(order); |
---|
5112 | 5059 | |
---|
5113 | 5060 | static ssize_t min_partial_show(struct kmem_cache *s, char *buf) |
---|
5114 | 5061 | { |
---|
.. | .. |
---|
5230 | 5177 | { |
---|
5231 | 5178 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); |
---|
5232 | 5179 | } |
---|
5233 | | - |
---|
5234 | | -static ssize_t reclaim_account_store(struct kmem_cache *s, |
---|
5235 | | - const char *buf, size_t length) |
---|
5236 | | -{ |
---|
5237 | | - s->flags &= ~SLAB_RECLAIM_ACCOUNT; |
---|
5238 | | - if (buf[0] == '1') |
---|
5239 | | - s->flags |= SLAB_RECLAIM_ACCOUNT; |
---|
5240 | | - return length; |
---|
5241 | | -} |
---|
5242 | | -SLAB_ATTR(reclaim_account); |
---|
| 5180 | +SLAB_ATTR_RO(reclaim_account); |
---|
5243 | 5181 | |
---|
5244 | 5182 | static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf) |
---|
5245 | 5183 | { |
---|
.. | .. |
---|
5284 | 5222 | { |
---|
5285 | 5223 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS)); |
---|
5286 | 5224 | } |
---|
5287 | | - |
---|
5288 | | -static ssize_t sanity_checks_store(struct kmem_cache *s, |
---|
5289 | | - const char *buf, size_t length) |
---|
5290 | | -{ |
---|
5291 | | - s->flags &= ~SLAB_CONSISTENCY_CHECKS; |
---|
5292 | | - if (buf[0] == '1') { |
---|
5293 | | - s->flags &= ~__CMPXCHG_DOUBLE; |
---|
5294 | | - s->flags |= SLAB_CONSISTENCY_CHECKS; |
---|
5295 | | - } |
---|
5296 | | - return length; |
---|
5297 | | -} |
---|
5298 | | -SLAB_ATTR(sanity_checks); |
---|
| 5225 | +SLAB_ATTR_RO(sanity_checks); |
---|
5299 | 5226 | |
---|
5300 | 5227 | static ssize_t trace_show(struct kmem_cache *s, char *buf) |
---|
5301 | 5228 | { |
---|
5302 | 5229 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE)); |
---|
5303 | 5230 | } |
---|
5304 | | - |
---|
5305 | | -static ssize_t trace_store(struct kmem_cache *s, const char *buf, |
---|
5306 | | - size_t length) |
---|
5307 | | -{ |
---|
5308 | | - /* |
---|
5309 | | - * Tracing a merged cache is going to give confusing results |
---|
5310 | | - * as well as cause other issues like converting a mergeable |
---|
5311 | | - * cache into an umergeable one. |
---|
5312 | | - */ |
---|
5313 | | - if (s->refcount > 1) |
---|
5314 | | - return -EINVAL; |
---|
5315 | | - |
---|
5316 | | - s->flags &= ~SLAB_TRACE; |
---|
5317 | | - if (buf[0] == '1') { |
---|
5318 | | - s->flags &= ~__CMPXCHG_DOUBLE; |
---|
5319 | | - s->flags |= SLAB_TRACE; |
---|
5320 | | - } |
---|
5321 | | - return length; |
---|
5322 | | -} |
---|
5323 | | -SLAB_ATTR(trace); |
---|
| 5231 | +SLAB_ATTR_RO(trace); |
---|
5324 | 5232 | |
---|
5325 | 5233 | static ssize_t red_zone_show(struct kmem_cache *s, char *buf) |
---|
5326 | 5234 | { |
---|
5327 | 5235 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE)); |
---|
5328 | 5236 | } |
---|
5329 | 5237 | |
---|
5330 | | -static ssize_t red_zone_store(struct kmem_cache *s, |
---|
5331 | | - const char *buf, size_t length) |
---|
5332 | | -{ |
---|
5333 | | - if (any_slab_objects(s)) |
---|
5334 | | - return -EBUSY; |
---|
5335 | | - |
---|
5336 | | - s->flags &= ~SLAB_RED_ZONE; |
---|
5337 | | - if (buf[0] == '1') { |
---|
5338 | | - s->flags |= SLAB_RED_ZONE; |
---|
5339 | | - } |
---|
5340 | | - calculate_sizes(s, -1); |
---|
5341 | | - return length; |
---|
5342 | | -} |
---|
5343 | | -SLAB_ATTR(red_zone); |
---|
| 5238 | +SLAB_ATTR_RO(red_zone); |
---|
5344 | 5239 | |
---|
5345 | 5240 | static ssize_t poison_show(struct kmem_cache *s, char *buf) |
---|
5346 | 5241 | { |
---|
5347 | 5242 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON)); |
---|
5348 | 5243 | } |
---|
5349 | 5244 | |
---|
5350 | | -static ssize_t poison_store(struct kmem_cache *s, |
---|
5351 | | - const char *buf, size_t length) |
---|
5352 | | -{ |
---|
5353 | | - if (any_slab_objects(s)) |
---|
5354 | | - return -EBUSY; |
---|
5355 | | - |
---|
5356 | | - s->flags &= ~SLAB_POISON; |
---|
5357 | | - if (buf[0] == '1') { |
---|
5358 | | - s->flags |= SLAB_POISON; |
---|
5359 | | - } |
---|
5360 | | - calculate_sizes(s, -1); |
---|
5361 | | - return length; |
---|
5362 | | -} |
---|
5363 | | -SLAB_ATTR(poison); |
---|
| 5245 | +SLAB_ATTR_RO(poison); |
---|
5364 | 5246 | |
---|
5365 | 5247 | static ssize_t store_user_show(struct kmem_cache *s, char *buf) |
---|
5366 | 5248 | { |
---|
5367 | 5249 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER)); |
---|
5368 | 5250 | } |
---|
5369 | 5251 | |
---|
5370 | | -static ssize_t store_user_store(struct kmem_cache *s, |
---|
5371 | | - const char *buf, size_t length) |
---|
5372 | | -{ |
---|
5373 | | - if (any_slab_objects(s)) |
---|
5374 | | - return -EBUSY; |
---|
5375 | | - |
---|
5376 | | - s->flags &= ~SLAB_STORE_USER; |
---|
5377 | | - if (buf[0] == '1') { |
---|
5378 | | - s->flags &= ~__CMPXCHG_DOUBLE; |
---|
5379 | | - s->flags |= SLAB_STORE_USER; |
---|
5380 | | - } |
---|
5381 | | - calculate_sizes(s, -1); |
---|
5382 | | - return length; |
---|
5383 | | -} |
---|
5384 | | -SLAB_ATTR(store_user); |
---|
| 5252 | +SLAB_ATTR_RO(store_user); |
---|
5385 | 5253 | |
---|
5386 | 5254 | static ssize_t validate_show(struct kmem_cache *s, char *buf) |
---|
5387 | 5255 | { |
---|
.. | .. |
---|
5402 | 5270 | } |
---|
5403 | 5271 | SLAB_ATTR(validate); |
---|
5404 | 5272 | |
---|
5405 | | -static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf) |
---|
5406 | | -{ |
---|
5407 | | - if (!(s->flags & SLAB_STORE_USER)) |
---|
5408 | | - return -ENOSYS; |
---|
5409 | | - return list_locations(s, buf, TRACK_ALLOC); |
---|
5410 | | -} |
---|
5411 | | -SLAB_ATTR_RO(alloc_calls); |
---|
5412 | | - |
---|
5413 | | -static ssize_t free_calls_show(struct kmem_cache *s, char *buf) |
---|
5414 | | -{ |
---|
5415 | | - if (!(s->flags & SLAB_STORE_USER)) |
---|
5416 | | - return -ENOSYS; |
---|
5417 | | - return list_locations(s, buf, TRACK_FREE); |
---|
5418 | | -} |
---|
5419 | | -SLAB_ATTR_RO(free_calls); |
---|
5420 | 5273 | #endif /* CONFIG_SLUB_DEBUG */ |
---|
5421 | 5274 | |
---|
5422 | 5275 | #ifdef CONFIG_FAILSLAB |
---|
.. | .. |
---|
5424 | 5277 | { |
---|
5425 | 5278 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); |
---|
5426 | 5279 | } |
---|
5427 | | - |
---|
5428 | | -static ssize_t failslab_store(struct kmem_cache *s, const char *buf, |
---|
5429 | | - size_t length) |
---|
5430 | | -{ |
---|
5431 | | - if (s->refcount > 1) |
---|
5432 | | - return -EINVAL; |
---|
5433 | | - |
---|
5434 | | - s->flags &= ~SLAB_FAILSLAB; |
---|
5435 | | - if (buf[0] == '1') |
---|
5436 | | - s->flags |= SLAB_FAILSLAB; |
---|
5437 | | - return length; |
---|
5438 | | -} |
---|
5439 | | -SLAB_ATTR(failslab); |
---|
| 5280 | +SLAB_ATTR_RO(failslab); |
---|
5440 | 5281 | #endif |
---|
5441 | 5282 | |
---|
5442 | 5283 | static ssize_t shrink_show(struct kmem_cache *s, char *buf) |
---|
.. | .. |
---|
5559 | 5400 | STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free); |
---|
5560 | 5401 | STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node); |
---|
5561 | 5402 | STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain); |
---|
5562 | | -#endif |
---|
| 5403 | +#endif /* CONFIG_SLUB_STATS */ |
---|
5563 | 5404 | |
---|
5564 | 5405 | static struct attribute *slab_attrs[] = { |
---|
5565 | 5406 | &slab_size_attr.attr, |
---|
.. | .. |
---|
5589 | 5430 | &poison_attr.attr, |
---|
5590 | 5431 | &store_user_attr.attr, |
---|
5591 | 5432 | &validate_attr.attr, |
---|
5592 | | - &alloc_calls_attr.attr, |
---|
5593 | | - &free_calls_attr.attr, |
---|
5594 | 5433 | #endif |
---|
5595 | 5434 | #ifdef CONFIG_ZONE_DMA |
---|
5596 | 5435 | &cache_dma_attr.attr, |
---|
.. | .. |
---|
5672 | 5511 | return -EIO; |
---|
5673 | 5512 | |
---|
5674 | 5513 | err = attribute->store(s, buf, len); |
---|
5675 | | -#ifdef CONFIG_MEMCG |
---|
5676 | | - if (slab_state >= FULL && err >= 0 && is_root_cache(s)) { |
---|
5677 | | - struct kmem_cache *c; |
---|
5678 | | - |
---|
5679 | | - mutex_lock(&slab_mutex); |
---|
5680 | | - if (s->max_attr_size < len) |
---|
5681 | | - s->max_attr_size = len; |
---|
5682 | | - |
---|
5683 | | - /* |
---|
5684 | | - * This is a best effort propagation, so this function's return |
---|
5685 | | - * value will be determined by the parent cache only. This is |
---|
5686 | | - * basically because not all attributes will have a well |
---|
5687 | | - * defined semantics for rollbacks - most of the actions will |
---|
5688 | | - * have permanent effects. |
---|
5689 | | - * |
---|
5690 | | - * Returning the error value of any of the children that fail |
---|
5691 | | - * is not 100 % defined, in the sense that users seeing the |
---|
5692 | | - * error code won't be able to know anything about the state of |
---|
5693 | | - * the cache. |
---|
5694 | | - * |
---|
5695 | | - * Only returning the error code for the parent cache at least |
---|
5696 | | - * has well defined semantics. The cache being written to |
---|
5697 | | - * directly either failed or succeeded, in which case we loop |
---|
5698 | | - * through the descendants with best-effort propagation. |
---|
5699 | | - */ |
---|
5700 | | - for_each_memcg_cache(c, s) |
---|
5701 | | - attribute->store(c, buf, len); |
---|
5702 | | - mutex_unlock(&slab_mutex); |
---|
5703 | | - } |
---|
5704 | | -#endif |
---|
5705 | 5514 | return err; |
---|
5706 | | -} |
---|
5707 | | - |
---|
5708 | | -static void memcg_propagate_slab_attrs(struct kmem_cache *s) |
---|
5709 | | -{ |
---|
5710 | | -#ifdef CONFIG_MEMCG |
---|
5711 | | - int i; |
---|
5712 | | - char *buffer = NULL; |
---|
5713 | | - struct kmem_cache *root_cache; |
---|
5714 | | - |
---|
5715 | | - if (is_root_cache(s)) |
---|
5716 | | - return; |
---|
5717 | | - |
---|
5718 | | - root_cache = s->memcg_params.root_cache; |
---|
5719 | | - |
---|
5720 | | - /* |
---|
5721 | | - * This mean this cache had no attribute written. Therefore, no point |
---|
5722 | | - * in copying default values around |
---|
5723 | | - */ |
---|
5724 | | - if (!root_cache->max_attr_size) |
---|
5725 | | - return; |
---|
5726 | | - |
---|
5727 | | - for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) { |
---|
5728 | | - char mbuf[64]; |
---|
5729 | | - char *buf; |
---|
5730 | | - struct slab_attribute *attr = to_slab_attr(slab_attrs[i]); |
---|
5731 | | - ssize_t len; |
---|
5732 | | - |
---|
5733 | | - if (!attr || !attr->store || !attr->show) |
---|
5734 | | - continue; |
---|
5735 | | - |
---|
5736 | | - /* |
---|
5737 | | - * It is really bad that we have to allocate here, so we will |
---|
5738 | | - * do it only as a fallback. If we actually allocate, though, |
---|
5739 | | - * we can just use the allocated buffer until the end. |
---|
5740 | | - * |
---|
5741 | | - * Most of the slub attributes will tend to be very small in |
---|
5742 | | - * size, but sysfs allows buffers up to a page, so they can |
---|
5743 | | - * theoretically happen. |
---|
5744 | | - */ |
---|
5745 | | - if (buffer) |
---|
5746 | | - buf = buffer; |
---|
5747 | | - else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf) && |
---|
5748 | | - !IS_ENABLED(CONFIG_SLUB_STATS)) |
---|
5749 | | - buf = mbuf; |
---|
5750 | | - else { |
---|
5751 | | - buffer = (char *) get_zeroed_page(GFP_KERNEL); |
---|
5752 | | - if (WARN_ON(!buffer)) |
---|
5753 | | - continue; |
---|
5754 | | - buf = buffer; |
---|
5755 | | - } |
---|
5756 | | - |
---|
5757 | | - len = attr->show(root_cache, buf); |
---|
5758 | | - if (len > 0) |
---|
5759 | | - attr->store(s, buf, len); |
---|
5760 | | - } |
---|
5761 | | - |
---|
5762 | | - if (buffer) |
---|
5763 | | - free_page((unsigned long)buffer); |
---|
5764 | | -#endif |
---|
5765 | 5515 | } |
---|
5766 | 5516 | |
---|
5767 | 5517 | static void kmem_cache_release(struct kobject *k) |
---|
.. | .. |
---|
5779 | 5529 | .release = kmem_cache_release, |
---|
5780 | 5530 | }; |
---|
5781 | 5531 | |
---|
5782 | | -static int uevent_filter(struct kset *kset, struct kobject *kobj) |
---|
5783 | | -{ |
---|
5784 | | - struct kobj_type *ktype = get_ktype(kobj); |
---|
5785 | | - |
---|
5786 | | - if (ktype == &slab_ktype) |
---|
5787 | | - return 1; |
---|
5788 | | - return 0; |
---|
5789 | | -} |
---|
5790 | | - |
---|
5791 | | -static const struct kset_uevent_ops slab_uevent_ops = { |
---|
5792 | | - .filter = uevent_filter, |
---|
5793 | | -}; |
---|
5794 | | - |
---|
5795 | 5532 | static struct kset *slab_kset; |
---|
5796 | 5533 | |
---|
5797 | 5534 | static inline struct kset *cache_kset(struct kmem_cache *s) |
---|
5798 | 5535 | { |
---|
5799 | | -#ifdef CONFIG_MEMCG |
---|
5800 | | - if (!is_root_cache(s)) |
---|
5801 | | - return s->memcg_params.root_cache->memcg_kset; |
---|
5802 | | -#endif |
---|
5803 | 5536 | return slab_kset; |
---|
5804 | 5537 | } |
---|
5805 | 5538 | |
---|
.. | .. |
---|
5814 | 5547 | char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL); |
---|
5815 | 5548 | char *p = name; |
---|
5816 | 5549 | |
---|
5817 | | - BUG_ON(!name); |
---|
| 5550 | + if (!name) |
---|
| 5551 | + return ERR_PTR(-ENOMEM); |
---|
5818 | 5552 | |
---|
5819 | 5553 | *p++ = ':'; |
---|
5820 | 5554 | /* |
---|
.. | .. |
---|
5842 | 5576 | return name; |
---|
5843 | 5577 | } |
---|
5844 | 5578 | |
---|
5845 | | -static void sysfs_slab_remove_workfn(struct work_struct *work) |
---|
5846 | | -{ |
---|
5847 | | - struct kmem_cache *s = |
---|
5848 | | - container_of(work, struct kmem_cache, kobj_remove_work); |
---|
5849 | | - |
---|
5850 | | - if (!s->kobj.state_in_sysfs) |
---|
5851 | | - /* |
---|
5852 | | - * For a memcg cache, this may be called during |
---|
5853 | | - * deactivation and again on shutdown. Remove only once. |
---|
5854 | | - * A cache is never shut down before deactivation is |
---|
5855 | | - * complete, so no need to worry about synchronization. |
---|
5856 | | - */ |
---|
5857 | | - goto out; |
---|
5858 | | - |
---|
5859 | | -#ifdef CONFIG_MEMCG |
---|
5860 | | - kset_unregister(s->memcg_kset); |
---|
5861 | | -#endif |
---|
5862 | | - kobject_uevent(&s->kobj, KOBJ_REMOVE); |
---|
5863 | | -out: |
---|
5864 | | - kobject_put(&s->kobj); |
---|
5865 | | -} |
---|
5866 | | - |
---|
5867 | 5579 | static int sysfs_slab_add(struct kmem_cache *s) |
---|
5868 | 5580 | { |
---|
5869 | 5581 | int err; |
---|
5870 | 5582 | const char *name; |
---|
5871 | 5583 | struct kset *kset = cache_kset(s); |
---|
5872 | 5584 | int unmergeable = slab_unmergeable(s); |
---|
5873 | | - |
---|
5874 | | - INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn); |
---|
5875 | 5585 | |
---|
5876 | 5586 | if (!kset) { |
---|
5877 | 5587 | kobject_init(&s->kobj, &slab_ktype); |
---|
.. | .. |
---|
5896 | 5606 | * for the symlinks. |
---|
5897 | 5607 | */ |
---|
5898 | 5608 | name = create_unique_id(s); |
---|
| 5609 | + if (IS_ERR(name)) |
---|
| 5610 | + return PTR_ERR(name); |
---|
5899 | 5611 | } |
---|
5900 | 5612 | |
---|
5901 | 5613 | s->kobj.kset = kset; |
---|
.. | .. |
---|
5907 | 5619 | if (err) |
---|
5908 | 5620 | goto out_del_kobj; |
---|
5909 | 5621 | |
---|
5910 | | -#ifdef CONFIG_MEMCG |
---|
5911 | | - if (is_root_cache(s) && memcg_sysfs_enabled) { |
---|
5912 | | - s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj); |
---|
5913 | | - if (!s->memcg_kset) { |
---|
5914 | | - err = -ENOMEM; |
---|
5915 | | - goto out_del_kobj; |
---|
5916 | | - } |
---|
5917 | | - } |
---|
5918 | | -#endif |
---|
5919 | | - |
---|
5920 | | - kobject_uevent(&s->kobj, KOBJ_ADD); |
---|
5921 | 5622 | if (!unmergeable) { |
---|
5922 | 5623 | /* Setup first alias */ |
---|
5923 | 5624 | sysfs_slab_alias(s, s->name); |
---|
.. | .. |
---|
5929 | 5630 | out_del_kobj: |
---|
5930 | 5631 | kobject_del(&s->kobj); |
---|
5931 | 5632 | goto out; |
---|
5932 | | -} |
---|
5933 | | - |
---|
5934 | | -static void sysfs_slab_remove(struct kmem_cache *s) |
---|
5935 | | -{ |
---|
5936 | | - if (slab_state < FULL) |
---|
5937 | | - /* |
---|
5938 | | - * Sysfs has not been setup yet so no need to remove the |
---|
5939 | | - * cache from sysfs. |
---|
5940 | | - */ |
---|
5941 | | - return; |
---|
5942 | | - |
---|
5943 | | - kobject_get(&s->kobj); |
---|
5944 | | - schedule_work(&s->kobj_remove_work); |
---|
5945 | 5633 | } |
---|
5946 | 5634 | |
---|
5947 | 5635 | void sysfs_slab_unlink(struct kmem_cache *s) |
---|
.. | .. |
---|
5998 | 5686 | |
---|
5999 | 5687 | mutex_lock(&slab_mutex); |
---|
6000 | 5688 | |
---|
6001 | | - slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); |
---|
| 5689 | + slab_kset = kset_create_and_add("slab", NULL, kernel_kobj); |
---|
6002 | 5690 | if (!slab_kset) { |
---|
6003 | 5691 | mutex_unlock(&slab_mutex); |
---|
6004 | 5692 | pr_err("Cannot register slab subsystem.\n"); |
---|
.. | .. |
---|
6033 | 5721 | __initcall(slab_sysfs_init); |
---|
6034 | 5722 | #endif /* CONFIG_SLUB_SYSFS */ |
---|
6035 | 5723 | |
---|
| 5724 | +#if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS) |
---|
| 5725 | +static int slab_debugfs_show(struct seq_file *seq, void *v) |
---|
| 5726 | +{ |
---|
| 5727 | + struct loc_track *t = seq->private; |
---|
| 5728 | + struct location *l; |
---|
| 5729 | + unsigned long idx; |
---|
| 5730 | + |
---|
| 5731 | + idx = (unsigned long) t->idx; |
---|
| 5732 | + if (idx < t->count) { |
---|
| 5733 | + l = &t->loc[idx]; |
---|
| 5734 | + |
---|
| 5735 | + seq_printf(seq, "%7ld ", l->count); |
---|
| 5736 | + |
---|
| 5737 | + if (l->addr) |
---|
| 5738 | + seq_printf(seq, "%pS", (void *)l->addr); |
---|
| 5739 | + else |
---|
| 5740 | + seq_puts(seq, "<not-available>"); |
---|
| 5741 | + |
---|
| 5742 | + if (l->sum_time != l->min_time) { |
---|
| 5743 | + seq_printf(seq, " age=%ld/%llu/%ld", |
---|
| 5744 | + l->min_time, div_u64(l->sum_time, l->count), |
---|
| 5745 | + l->max_time); |
---|
| 5746 | + } else |
---|
| 5747 | + seq_printf(seq, " age=%ld", l->min_time); |
---|
| 5748 | + |
---|
| 5749 | + if (l->min_pid != l->max_pid) |
---|
| 5750 | + seq_printf(seq, " pid=%ld-%ld", l->min_pid, l->max_pid); |
---|
| 5751 | + else |
---|
| 5752 | + seq_printf(seq, " pid=%ld", |
---|
| 5753 | + l->min_pid); |
---|
| 5754 | + |
---|
| 5755 | + if (num_online_cpus() > 1 && !cpumask_empty(to_cpumask(l->cpus))) |
---|
| 5756 | + seq_printf(seq, " cpus=%*pbl", |
---|
| 5757 | + cpumask_pr_args(to_cpumask(l->cpus))); |
---|
| 5758 | + |
---|
| 5759 | + if (nr_online_nodes > 1 && !nodes_empty(l->nodes)) |
---|
| 5760 | + seq_printf(seq, " nodes=%*pbl", |
---|
| 5761 | + nodemask_pr_args(&l->nodes)); |
---|
| 5762 | + |
---|
| 5763 | + seq_puts(seq, "\n"); |
---|
| 5764 | + } |
---|
| 5765 | + |
---|
| 5766 | + if (!idx && !t->count) |
---|
| 5767 | + seq_puts(seq, "No data\n"); |
---|
| 5768 | + |
---|
| 5769 | + return 0; |
---|
| 5770 | +} |
---|
| 5771 | + |
---|
| 5772 | +static void slab_debugfs_stop(struct seq_file *seq, void *v) |
---|
| 5773 | +{ |
---|
| 5774 | +} |
---|
| 5775 | + |
---|
| 5776 | +static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos) |
---|
| 5777 | +{ |
---|
| 5778 | + struct loc_track *t = seq->private; |
---|
| 5779 | + |
---|
| 5780 | + t->idx = ++(*ppos); |
---|
| 5781 | + if (*ppos <= t->count) |
---|
| 5782 | + return ppos; |
---|
| 5783 | + |
---|
| 5784 | + return NULL; |
---|
| 5785 | +} |
---|
| 5786 | + |
---|
| 5787 | +static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos) |
---|
| 5788 | +{ |
---|
| 5789 | + struct loc_track *t = seq->private; |
---|
| 5790 | + |
---|
| 5791 | + t->idx = *ppos; |
---|
| 5792 | + return ppos; |
---|
| 5793 | +} |
---|
| 5794 | + |
---|
| 5795 | +static const struct seq_operations slab_debugfs_sops = { |
---|
| 5796 | + .start = slab_debugfs_start, |
---|
| 5797 | + .next = slab_debugfs_next, |
---|
| 5798 | + .stop = slab_debugfs_stop, |
---|
| 5799 | + .show = slab_debugfs_show, |
---|
| 5800 | +}; |
---|
| 5801 | + |
---|
| 5802 | +static int slab_debug_trace_open(struct inode *inode, struct file *filep) |
---|
| 5803 | +{ |
---|
| 5804 | + |
---|
| 5805 | + struct kmem_cache_node *n; |
---|
| 5806 | + enum track_item alloc; |
---|
| 5807 | + int node; |
---|
| 5808 | + struct loc_track *t = __seq_open_private(filep, &slab_debugfs_sops, |
---|
| 5809 | + sizeof(struct loc_track)); |
---|
| 5810 | + struct kmem_cache *s = file_inode(filep)->i_private; |
---|
| 5811 | + unsigned long *obj_map; |
---|
| 5812 | + |
---|
| 5813 | + if (!t) |
---|
| 5814 | + return -ENOMEM; |
---|
| 5815 | + |
---|
| 5816 | + obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL); |
---|
| 5817 | + if (!obj_map) { |
---|
| 5818 | + seq_release_private(inode, filep); |
---|
| 5819 | + return -ENOMEM; |
---|
| 5820 | + } |
---|
| 5821 | + |
---|
| 5822 | + if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0) |
---|
| 5823 | + alloc = TRACK_ALLOC; |
---|
| 5824 | + else |
---|
| 5825 | + alloc = TRACK_FREE; |
---|
| 5826 | + |
---|
| 5827 | + if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) { |
---|
| 5828 | + bitmap_free(obj_map); |
---|
| 5829 | + seq_release_private(inode, filep); |
---|
| 5830 | + return -ENOMEM; |
---|
| 5831 | + } |
---|
| 5832 | + |
---|
| 5833 | + /* Push back cpu slabs */ |
---|
| 5834 | + flush_all(s); |
---|
| 5835 | + |
---|
| 5836 | + for_each_kmem_cache_node(s, node, n) { |
---|
| 5837 | + unsigned long flags; |
---|
| 5838 | + struct page *page; |
---|
| 5839 | + |
---|
| 5840 | + if (!atomic_long_read(&n->nr_slabs)) |
---|
| 5841 | + continue; |
---|
| 5842 | + |
---|
| 5843 | + spin_lock_irqsave(&n->list_lock, flags); |
---|
| 5844 | + list_for_each_entry(page, &n->partial, slab_list) |
---|
| 5845 | + process_slab(t, s, page, alloc, obj_map); |
---|
| 5846 | + list_for_each_entry(page, &n->full, slab_list) |
---|
| 5847 | + process_slab(t, s, page, alloc, obj_map); |
---|
| 5848 | + spin_unlock_irqrestore(&n->list_lock, flags); |
---|
| 5849 | + } |
---|
| 5850 | + |
---|
| 5851 | + bitmap_free(obj_map); |
---|
| 5852 | + return 0; |
---|
| 5853 | +} |
---|
| 5854 | + |
---|
| 5855 | +static int slab_debug_trace_release(struct inode *inode, struct file *file) |
---|
| 5856 | +{ |
---|
| 5857 | + struct seq_file *seq = file->private_data; |
---|
| 5858 | + struct loc_track *t = seq->private; |
---|
| 5859 | + |
---|
| 5860 | + free_loc_track(t); |
---|
| 5861 | + return seq_release_private(inode, file); |
---|
| 5862 | +} |
---|
| 5863 | + |
---|
| 5864 | +static const struct file_operations slab_debugfs_fops = { |
---|
| 5865 | + .open = slab_debug_trace_open, |
---|
| 5866 | + .read = seq_read, |
---|
| 5867 | + .llseek = seq_lseek, |
---|
| 5868 | + .release = slab_debug_trace_release, |
---|
| 5869 | +}; |
---|
| 5870 | + |
---|
| 5871 | +static void debugfs_slab_add(struct kmem_cache *s) |
---|
| 5872 | +{ |
---|
| 5873 | + struct dentry *slab_cache_dir; |
---|
| 5874 | + |
---|
| 5875 | + if (unlikely(!slab_debugfs_root)) |
---|
| 5876 | + return; |
---|
| 5877 | + |
---|
| 5878 | + slab_cache_dir = debugfs_create_dir(s->name, slab_debugfs_root); |
---|
| 5879 | + |
---|
| 5880 | + debugfs_create_file("alloc_traces", 0400, |
---|
| 5881 | + slab_cache_dir, s, &slab_debugfs_fops); |
---|
| 5882 | + |
---|
| 5883 | + debugfs_create_file("free_traces", 0400, |
---|
| 5884 | + slab_cache_dir, s, &slab_debugfs_fops); |
---|
| 5885 | +} |
---|
| 5886 | + |
---|
| 5887 | +void debugfs_slab_release(struct kmem_cache *s) |
---|
| 5888 | +{ |
---|
| 5889 | + debugfs_remove_recursive(debugfs_lookup(s->name, slab_debugfs_root)); |
---|
| 5890 | +} |
---|
| 5891 | + |
---|
| 5892 | +static int __init slab_debugfs_init(void) |
---|
| 5893 | +{ |
---|
| 5894 | + struct kmem_cache *s; |
---|
| 5895 | + |
---|
| 5896 | + slab_debugfs_root = debugfs_create_dir("slab", NULL); |
---|
| 5897 | + |
---|
| 5898 | + list_for_each_entry(s, &slab_caches, list) |
---|
| 5899 | + if (s->flags & SLAB_STORE_USER) |
---|
| 5900 | + debugfs_slab_add(s); |
---|
| 5901 | + |
---|
| 5902 | + return 0; |
---|
| 5903 | + |
---|
| 5904 | +} |
---|
| 5905 | +__initcall(slab_debugfs_init); |
---|
| 5906 | +#endif |
---|
6036 | 5907 | /* |
---|
6037 | 5908 | * The /proc/slabinfo ABI |
---|
6038 | 5909 | */ |
---|
.. | .. |
---|
6058 | 5929 | sinfo->objects_per_slab = oo_objects(s->oo); |
---|
6059 | 5930 | sinfo->cache_order = oo_order(s->oo); |
---|
6060 | 5931 | } |
---|
| 5932 | +EXPORT_SYMBOL_GPL(get_slabinfo); |
---|
6061 | 5933 | |
---|
6062 | 5934 | void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s) |
---|
6063 | 5935 | { |
---|