hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/mm/slub.c
....@@ -28,6 +28,7 @@
2828 #include <linux/ctype.h>
2929 #include <linux/debugobjects.h>
3030 #include <linux/kallsyms.h>
31
+#include <linux/kfence.h>
3132 #include <linux/memory.h>
3233 #include <linux/math64.h>
3334 #include <linux/fault-inject.h>
....@@ -36,7 +37,9 @@
3637 #include <linux/memcontrol.h>
3738 #include <linux/random.h>
3839
40
+#include <linux/debugfs.h>
3941 #include <trace/events/kmem.h>
42
+#include <trace/hooks/mm.h>
4043
4144 #include "internal.h"
4245
....@@ -59,10 +62,11 @@
5962 * D. page->frozen -> frozen state
6063 *
6164 * If a slab is frozen then it is exempt from list management. It is not
62
- * on any list. The processor that froze the slab is the one who can
63
- * perform list operations on the page. Other processors may put objects
64
- * onto the freelist but the processor that froze the slab is the only
65
- * one that can retrieve the objects from the page's freelist.
65
+ * on any list except per cpu partial list. The processor that froze the
66
+ * slab is the one who can perform list operations on the page. Other
67
+ * processors may put objects onto the freelist but the processor that
68
+ * froze the slab is the only one that can retrieve the objects from the
69
+ * page's freelist.
6670 *
6771 * The list_lock protects the partial and full list on each node and
6872 * the partial slab counter. If taken then no new slabs may be added or
....@@ -93,9 +97,7 @@
9397 * minimal so we rely on the page allocators per cpu caches for
9498 * fast frees and allocs.
9599 *
96
- * Overloading of page flags that are otherwise used for LRU management.
97
- *
98
- * PageActive The slab is frozen and exempt from list processing.
100
+ * page->frozen The slab is frozen and exempt from list processing.
99101 * This means that the slab is dedicated to a purpose
100102 * such as satisfying allocations for a specific
101103 * processor. Objects may be freed in the slab while
....@@ -111,23 +113,27 @@
111113 * free objects in addition to the regular freelist
112114 * that requires the slab lock.
113115 *
114
- * PageError Slab requires special handling due to debug
116
+ * SLAB_DEBUG_FLAGS Slab requires special handling due to debug
115117 * options set. This moves slab handling out of
116118 * the fast path and disables lockless freelists.
117119 */
118120
119
-static inline int kmem_cache_debug(struct kmem_cache *s)
120
-{
121121 #ifdef CONFIG_SLUB_DEBUG
122
- return unlikely(s->flags & SLAB_DEBUG_FLAGS);
122
+#ifdef CONFIG_SLUB_DEBUG_ON
123
+DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
123124 #else
124
- return 0;
125
+DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
125126 #endif
127
+#endif
128
+
129
+static inline bool kmem_cache_debug(struct kmem_cache *s)
130
+{
131
+ return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
126132 }
127133
128134 void *fixup_red_left(struct kmem_cache *s, void *p)
129135 {
130
- if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
136
+ if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
131137 p += s->red_left_pad;
132138
133139 return p;
....@@ -197,33 +203,19 @@
197203 /* Use cmpxchg_double */
198204 #define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
199205
200
-/*
201
- * Tracking user of a slab.
202
- */
203
-#define TRACK_ADDRS_COUNT 16
204
-struct track {
205
- unsigned long addr; /* Called from address */
206
-#ifdef CONFIG_STACKTRACE
207
- unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */
208
-#endif
209
- int cpu; /* Was running on cpu */
210
- int pid; /* Pid context */
211
- unsigned long when; /* When did the operation occur */
212
-};
213
-
214
-enum track_item { TRACK_ALLOC, TRACK_FREE };
215
-
216206 #ifdef CONFIG_SLUB_SYSFS
217207 static int sysfs_slab_add(struct kmem_cache *);
218208 static int sysfs_slab_alias(struct kmem_cache *, const char *);
219
-static void memcg_propagate_slab_attrs(struct kmem_cache *s);
220
-static void sysfs_slab_remove(struct kmem_cache *s);
221209 #else
222210 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
223211 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
224212 { return 0; }
225
-static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
226
-static inline void sysfs_slab_remove(struct kmem_cache *s) { }
213
+#endif
214
+
215
+#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
216
+static void debugfs_slab_add(struct kmem_cache *);
217
+#else
218
+static inline void debugfs_slab_add(struct kmem_cache *s) { }
227219 #endif
228220
229221 static inline void stat(const struct kmem_cache *s, enum stat_item si)
....@@ -251,7 +243,7 @@
251243 {
252244 #ifdef CONFIG_SLAB_FREELIST_HARDENED
253245 /*
254
- * When CONFIG_KASAN_SW_TAGS is enabled, ptr_addr might be tagged.
246
+ * When CONFIG_KASAN_SW/HW_TAGS is enabled, ptr_addr might be tagged.
255247 * Normally, this doesn't cause any issues, as both set_freepointer()
256248 * and get_freepointer() are called with a pointer with the same tag.
257249 * However, there are some issues with CONFIG_SLUB_DEBUG code. For
....@@ -277,6 +269,7 @@
277269
278270 static inline void *get_freepointer(struct kmem_cache *s, void *object)
279271 {
272
+ object = kasan_reset_tag(object);
280273 return freelist_dereference(s, object + s->offset);
281274 }
282275
....@@ -290,11 +283,12 @@
290283 unsigned long freepointer_addr;
291284 void *p;
292285
293
- if (!debug_pagealloc_enabled())
286
+ if (!debug_pagealloc_enabled_static())
294287 return get_freepointer(s, object);
295288
289
+ object = kasan_reset_tag(object);
296290 freepointer_addr = (unsigned long)object + s->offset;
297
- probe_kernel_read(&p, (void **)freepointer_addr, sizeof(p));
291
+ copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p));
298292 return freelist_ptr(s, p, freepointer_addr);
299293 }
300294
....@@ -306,6 +300,7 @@
306300 BUG_ON(object == fp); /* naive detection of double free or corruption */
307301 #endif
308302
303
+ freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr);
309304 *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
310305 }
311306
....@@ -314,12 +309,6 @@
314309 for (__p = fixup_red_left(__s, __addr); \
315310 __p < (__addr) + (__objects) * (__s)->size; \
316311 __p += (__s)->size)
317
-
318
-/* Determine object index from a given position */
319
-static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr)
320
-{
321
- return (kasan_reset_tag(p) - addr) / s->size;
322
-}
323312
324313 static inline unsigned int order_objects(unsigned int order, unsigned int size)
325314 {
....@@ -441,19 +430,43 @@
441430 }
442431
443432 #ifdef CONFIG_SLUB_DEBUG
433
+static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
434
+static DEFINE_SPINLOCK(object_map_lock);
435
+
436
+static void __fill_map(unsigned long *obj_map, struct kmem_cache *s,
437
+ struct page *page)
438
+{
439
+ void *addr = page_address(page);
440
+ void *p;
441
+
442
+ bitmap_zero(obj_map, page->objects);
443
+
444
+ for (p = page->freelist; p; p = get_freepointer(s, p))
445
+ set_bit(__obj_to_index(s, addr, p), obj_map);
446
+}
447
+
444448 /*
445449 * Determine a map of object in use on a page.
446450 *
447451 * Node listlock must be held to guarantee that the page does
448452 * not vanish from under us.
449453 */
450
-static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
454
+static unsigned long *get_map(struct kmem_cache *s, struct page *page)
455
+ __acquires(&object_map_lock)
451456 {
452
- void *p;
453
- void *addr = page_address(page);
457
+ VM_BUG_ON(!irqs_disabled());
454458
455
- for (p = page->freelist; p; p = get_freepointer(s, p))
456
- set_bit(slab_index(p, s, addr), map);
459
+ spin_lock(&object_map_lock);
460
+
461
+ __fill_map(object_map, s, page);
462
+
463
+ return object_map;
464
+}
465
+
466
+static void put_map(unsigned long *map) __releases(&object_map_lock)
467
+{
468
+ VM_BUG_ON(map != object_map);
469
+ spin_unlock(&object_map_lock);
457470 }
458471
459472 static inline unsigned int size_from_object(struct kmem_cache *s)
....@@ -476,12 +489,12 @@
476489 * Debug settings:
477490 */
478491 #if defined(CONFIG_SLUB_DEBUG_ON)
479
-static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
492
+slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
480493 #else
481
-static slab_flags_t slub_debug;
494
+slab_flags_t slub_debug;
482495 #endif
483496
484
-static char *slub_debug_slabs;
497
+static char *slub_debug_string;
485498 static int disable_higher_order_debug;
486499
487500 /*
....@@ -528,9 +541,29 @@
528541 unsigned int length)
529542 {
530543 metadata_access_enable();
531
- print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
532
- length, 1);
544
+ print_hex_dump(level, text, DUMP_PREFIX_ADDRESS,
545
+ 16, 1, kasan_reset_tag((void *)addr), length, 1);
533546 metadata_access_disable();
547
+}
548
+
549
+/*
550
+ * See comment in calculate_sizes().
551
+ */
552
+static inline bool freeptr_outside_object(struct kmem_cache *s)
553
+{
554
+ return s->offset >= s->inuse;
555
+}
556
+
557
+/*
558
+ * Return offset of the end of info block which is inuse + free pointer if
559
+ * not overlapping with object.
560
+ */
561
+static inline unsigned int get_info_end(struct kmem_cache *s)
562
+{
563
+ if (freeptr_outside_object(s))
564
+ return s->inuse + sizeof(void *);
565
+ else
566
+ return s->inuse;
534567 }
535568
536569 static struct track *get_track(struct kmem_cache *s, void *object,
....@@ -538,13 +571,45 @@
538571 {
539572 struct track *p;
540573
541
- if (s->offset)
542
- p = object + s->offset + sizeof(void *);
543
- else
544
- p = object + s->inuse;
574
+ p = object + get_info_end(s);
545575
546
- return p + alloc;
576
+ return kasan_reset_tag(p + alloc);
547577 }
578
+
579
+/*
580
+ * This function will be used to loop through all the slab objects in
581
+ * a page to give track structure for each object, the function fn will
582
+ * be using this track structure and extract required info into its private
583
+ * data, the return value will be the number of track structures that are
584
+ * processed.
585
+ */
586
+unsigned long get_each_object_track(struct kmem_cache *s,
587
+ struct page *page, enum track_item alloc,
588
+ int (*fn)(const struct kmem_cache *, const void *,
589
+ const struct track *, void *), void *private)
590
+{
591
+ void *p;
592
+ struct track *t;
593
+ int ret;
594
+ unsigned long num_track = 0;
595
+
596
+ if (!slub_debug || !(s->flags & SLAB_STORE_USER))
597
+ return 0;
598
+
599
+ slab_lock(page);
600
+ for_each_object(p, s, page_address(page), page->objects) {
601
+ t = get_track(s, p, alloc);
602
+ metadata_access_enable();
603
+ ret = fn(s, p, t, private);
604
+ metadata_access_disable();
605
+ if (ret < 0)
606
+ break;
607
+ num_track += 1;
608
+ }
609
+ slab_unlock(page);
610
+ return num_track;
611
+}
612
+EXPORT_SYMBOL_GPL(get_each_object_track);
548613
549614 static void set_track(struct kmem_cache *s, void *object,
550615 enum track_item alloc, unsigned long addr)
....@@ -553,31 +618,25 @@
553618
554619 if (addr) {
555620 #ifdef CONFIG_STACKTRACE
556
- struct stack_trace trace;
557
- int i;
621
+ unsigned int nr_entries;
558622
559
- trace.nr_entries = 0;
560
- trace.max_entries = TRACK_ADDRS_COUNT;
561
- trace.entries = p->addrs;
562
- trace.skip = 3;
563623 metadata_access_enable();
564
- save_stack_trace(&trace);
624
+ nr_entries = stack_trace_save(kasan_reset_tag(p->addrs),
625
+ TRACK_ADDRS_COUNT, 3);
565626 metadata_access_disable();
566627
567
- /* See rant in lockdep.c */
568
- if (trace.nr_entries != 0 &&
569
- trace.entries[trace.nr_entries - 1] == ULONG_MAX)
570
- trace.nr_entries--;
571
-
572
- for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
573
- p->addrs[i] = 0;
628
+ if (nr_entries < TRACK_ADDRS_COUNT)
629
+ p->addrs[nr_entries] = 0;
630
+ trace_android_vh_save_track_hash(alloc == TRACK_ALLOC,
631
+ (unsigned long)p);
574632 #endif
575633 p->addr = addr;
576634 p->cpu = smp_processor_id();
577635 p->pid = current->pid;
578636 p->when = jiffies;
579
- } else
637
+ } else {
580638 memset(p, 0, sizeof(struct track));
639
+ }
581640 }
582641
583642 static void init_tracking(struct kmem_cache *s, void *object)
....@@ -608,7 +667,7 @@
608667 #endif
609668 }
610669
611
-static void print_tracking(struct kmem_cache *s, void *object)
670
+void print_tracking(struct kmem_cache *s, void *object)
612671 {
613672 unsigned long pr_time = jiffies;
614673 if (!(s->flags & SLAB_STORE_USER))
....@@ -636,8 +695,6 @@
636695 pr_err("=============================================================================\n");
637696 pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
638697 pr_err("-----------------------------------------------------------------------------\n\n");
639
-
640
- add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
641698 va_end(args);
642699 }
643700
....@@ -691,10 +748,7 @@
691748 print_section(KERN_ERR, "Redzone ", p + s->object_size,
692749 s->inuse - s->object_size);
693750
694
- if (s->offset)
695
- off = s->offset + sizeof(void *);
696
- else
697
- off = s->inuse;
751
+ off = get_info_end(s);
698752
699753 if (s->flags & SLAB_STORE_USER)
700754 off += 2 * sizeof(struct track);
....@@ -714,6 +768,7 @@
714768 {
715769 slab_bug(s, "%s", reason);
716770 print_trailer(s, page, object);
771
+ add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
717772 }
718773
719774 static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
....@@ -728,11 +783,12 @@
728783 slab_bug(s, "%s", buf);
729784 print_page_info(page);
730785 dump_stack();
786
+ add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
731787 }
732788
733789 static void init_object(struct kmem_cache *s, void *object, u8 val)
734790 {
735
- u8 *p = object;
791
+ u8 *p = kasan_reset_tag(object);
736792
737793 if (s->flags & SLAB_RED_ZONE)
738794 memset(p - s->red_left_pad, val, s->red_left_pad);
....@@ -759,9 +815,10 @@
759815 {
760816 u8 *fault;
761817 u8 *end;
818
+ u8 *addr = page_address(page);
762819
763820 metadata_access_enable();
764
- fault = memchr_inv(start, value, bytes);
821
+ fault = memchr_inv(kasan_reset_tag(start), value, bytes);
765822 metadata_access_disable();
766823 if (!fault)
767824 return 1;
....@@ -771,9 +828,11 @@
771828 end--;
772829
773830 slab_bug(s, "%s overwritten", what);
774
- pr_err("INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
775
- fault, end - 1, fault[0], value);
831
+ pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
832
+ fault, end - 1, fault - addr,
833
+ fault[0], value);
776834 print_trailer(s, page, object);
835
+ add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
777836
778837 restore_bytes(s, what, value, fault, end);
779838 return 0;
....@@ -785,7 +844,7 @@
785844 * object address
786845 * Bytes of the object to be managed.
787846 * If the freepointer may overlay the object then the free
788
- * pointer is the first word of the object.
847
+ * pointer is at the middle of the object.
789848 *
790849 * Poisoning uses 0x6b (POISON_FREE) and the last byte is
791850 * 0xa5 (POISON_END)
....@@ -819,11 +878,7 @@
819878
820879 static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
821880 {
822
- unsigned long off = s->inuse; /* The end of info */
823
-
824
- if (s->offset)
825
- /* Freepointer is placed after the object. */
826
- off += sizeof(void *);
881
+ unsigned long off = get_info_end(s); /* The end of info */
827882
828883 if (s->flags & SLAB_STORE_USER)
829884 /* We also have user information there */
....@@ -852,7 +907,7 @@
852907 return 1;
853908
854909 start = page_address(page);
855
- length = PAGE_SIZE << compound_order(page);
910
+ length = page_size(page);
856911 end = start + length;
857912 remainder = length % s->size;
858913 if (!remainder)
....@@ -860,14 +915,15 @@
860915
861916 pad = end - remainder;
862917 metadata_access_enable();
863
- fault = memchr_inv(pad, POISON_INUSE, remainder);
918
+ fault = memchr_inv(kasan_reset_tag(pad), POISON_INUSE, remainder);
864919 metadata_access_disable();
865920 if (!fault)
866921 return 1;
867922 while (end > fault && end[-1] == POISON_INUSE)
868923 end--;
869924
870
- slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
925
+ slab_err(s, page, "Padding overwritten. 0x%p-0x%p @offset=%tu",
926
+ fault, end - 1, fault - start);
871927 print_section(KERN_ERR, "Padding ", pad, remainder);
872928
873929 restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
....@@ -909,7 +965,7 @@
909965 check_pad_bytes(s, page, p);
910966 }
911967
912
- if (!s->offset && val == SLUB_RED_ACTIVE)
968
+ if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
913969 /*
914970 * Object and freepointer overlap. Cannot check
915971 * freepointer while object is allocated.
....@@ -1038,7 +1094,7 @@
10381094 return;
10391095
10401096 lockdep_assert_held(&n->list_lock);
1041
- list_add(&page->lru, &n->full);
1097
+ list_add(&page->slab_list, &n->full);
10421098 }
10431099
10441100 static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
....@@ -1047,7 +1103,7 @@
10471103 return;
10481104
10491105 lockdep_assert_held(&n->list_lock);
1050
- list_del(&page->lru);
1106
+ list_del(&page->slab_list);
10511107 }
10521108
10531109 /* Tracking of the number of slabs for debugging purposes */
....@@ -1090,26 +1146,26 @@
10901146 static void setup_object_debug(struct kmem_cache *s, struct page *page,
10911147 void *object)
10921148 {
1093
- if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1149
+ if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))
10941150 return;
10951151
10961152 init_object(s, object, SLUB_RED_INACTIVE);
10971153 init_tracking(s, object);
10981154 }
10991155
1100
-static void setup_page_debug(struct kmem_cache *s, void *addr, int order)
1156
+static
1157
+void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
11011158 {
1102
- if (!(s->flags & SLAB_POISON))
1159
+ if (!kmem_cache_debug_flags(s, SLAB_POISON))
11031160 return;
11041161
11051162 metadata_access_enable();
1106
- memset(addr, POISON_INUSE, PAGE_SIZE << order);
1163
+ memset(kasan_reset_tag(addr), POISON_INUSE, page_size(page));
11071164 metadata_access_disable();
11081165 }
11091166
11101167 static inline int alloc_consistency_checks(struct kmem_cache *s,
1111
- struct page *page,
1112
- void *object, unsigned long addr)
1168
+ struct page *page, void *object)
11131169 {
11141170 if (!check_slab(s, page))
11151171 return 0;
....@@ -1130,7 +1186,7 @@
11301186 void *object, unsigned long addr)
11311187 {
11321188 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1133
- if (!alloc_consistency_checks(s, page, object, addr))
1189
+ if (!alloc_consistency_checks(s, page, object))
11341190 goto bad;
11351191 }
11361192
....@@ -1196,10 +1252,10 @@
11961252 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
11971253 void *object = head;
11981254 int cnt = 0;
1199
- unsigned long uninitialized_var(flags);
1255
+ unsigned long flags;
12001256 int ret = 0;
12011257
1202
- raw_spin_lock_irqsave(&n->list_lock, flags);
1258
+ spin_lock_irqsave(&n->list_lock, flags);
12031259 slab_lock(page);
12041260
12051261 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
....@@ -1234,75 +1290,144 @@
12341290 bulk_cnt, cnt);
12351291
12361292 slab_unlock(page);
1237
- raw_spin_unlock_irqrestore(&n->list_lock, flags);
1293
+ spin_unlock_irqrestore(&n->list_lock, flags);
12381294 if (!ret)
12391295 slab_fix(s, "Object at 0x%p not freed", object);
12401296 return ret;
12411297 }
12421298
1243
-static int __init setup_slub_debug(char *str)
1299
+/*
1300
+ * Parse a block of slub_debug options. Blocks are delimited by ';'
1301
+ *
1302
+ * @str: start of block
1303
+ * @flags: returns parsed flags, or DEBUG_DEFAULT_FLAGS if none specified
1304
+ * @slabs: return start of list of slabs, or NULL when there's no list
1305
+ * @init: assume this is initial parsing and not per-kmem-create parsing
1306
+ *
1307
+ * returns the start of next block if there's any, or NULL
1308
+ */
1309
+static char *
1310
+parse_slub_debug_flags(char *str, slab_flags_t *flags, char **slabs, bool init)
12441311 {
1245
- slub_debug = DEBUG_DEFAULT_FLAGS;
1246
- if (*str++ != '=' || !*str)
1247
- /*
1248
- * No options specified. Switch on full debugging.
1249
- */
1250
- goto out;
1312
+ bool higher_order_disable = false;
12511313
1252
- if (*str == ',')
1314
+ /* Skip any completely empty blocks */
1315
+ while (*str && *str == ';')
1316
+ str++;
1317
+
1318
+ if (*str == ',') {
12531319 /*
12541320 * No options but restriction on slabs. This means full
12551321 * debugging for slabs matching a pattern.
12561322 */
1323
+ *flags = DEBUG_DEFAULT_FLAGS;
12571324 goto check_slabs;
1325
+ }
1326
+ *flags = 0;
12581327
1259
- slub_debug = 0;
1260
- if (*str == '-')
1261
- /*
1262
- * Switch off all debugging measures.
1263
- */
1264
- goto out;
1265
-
1266
- /*
1267
- * Determine which debug features should be switched on
1268
- */
1269
- for (; *str && *str != ','; str++) {
1328
+ /* Determine which debug features should be switched on */
1329
+ for (; *str && *str != ',' && *str != ';'; str++) {
12701330 switch (tolower(*str)) {
1331
+ case '-':
1332
+ *flags = 0;
1333
+ break;
12711334 case 'f':
1272
- slub_debug |= SLAB_CONSISTENCY_CHECKS;
1335
+ *flags |= SLAB_CONSISTENCY_CHECKS;
12731336 break;
12741337 case 'z':
1275
- slub_debug |= SLAB_RED_ZONE;
1338
+ *flags |= SLAB_RED_ZONE;
12761339 break;
12771340 case 'p':
1278
- slub_debug |= SLAB_POISON;
1341
+ *flags |= SLAB_POISON;
12791342 break;
12801343 case 'u':
1281
- slub_debug |= SLAB_STORE_USER;
1344
+ *flags |= SLAB_STORE_USER;
12821345 break;
12831346 case 't':
1284
- slub_debug |= SLAB_TRACE;
1347
+ *flags |= SLAB_TRACE;
12851348 break;
12861349 case 'a':
1287
- slub_debug |= SLAB_FAILSLAB;
1350
+ *flags |= SLAB_FAILSLAB;
12881351 break;
12891352 case 'o':
12901353 /*
12911354 * Avoid enabling debugging on caches if its minimum
12921355 * order would increase as a result.
12931356 */
1294
- disable_higher_order_debug = 1;
1357
+ higher_order_disable = true;
12951358 break;
12961359 default:
1297
- pr_err("slub_debug option '%c' unknown. skipped\n",
1298
- *str);
1360
+ if (init)
1361
+ pr_err("slub_debug option '%c' unknown. skipped\n", *str);
1362
+ }
1363
+ }
1364
+check_slabs:
1365
+ if (*str == ',')
1366
+ *slabs = ++str;
1367
+ else
1368
+ *slabs = NULL;
1369
+
1370
+ /* Skip over the slab list */
1371
+ while (*str && *str != ';')
1372
+ str++;
1373
+
1374
+ /* Skip any completely empty blocks */
1375
+ while (*str && *str == ';')
1376
+ str++;
1377
+
1378
+ if (init && higher_order_disable)
1379
+ disable_higher_order_debug = 1;
1380
+
1381
+ if (*str)
1382
+ return str;
1383
+ else
1384
+ return NULL;
1385
+}
1386
+
1387
+static int __init setup_slub_debug(char *str)
1388
+{
1389
+ slab_flags_t flags;
1390
+ slab_flags_t global_flags;
1391
+ char *saved_str;
1392
+ char *slab_list;
1393
+ bool global_slub_debug_changed = false;
1394
+ bool slab_list_specified = false;
1395
+
1396
+ global_flags = DEBUG_DEFAULT_FLAGS;
1397
+ if (*str++ != '=' || !*str)
1398
+ /*
1399
+ * No options specified. Switch on full debugging.
1400
+ */
1401
+ goto out;
1402
+
1403
+ saved_str = str;
1404
+ while (str) {
1405
+ str = parse_slub_debug_flags(str, &flags, &slab_list, true);
1406
+
1407
+ if (!slab_list) {
1408
+ global_flags = flags;
1409
+ global_slub_debug_changed = true;
1410
+ } else {
1411
+ slab_list_specified = true;
12991412 }
13001413 }
13011414
1302
-check_slabs:
1303
- if (*str == ',')
1304
- slub_debug_slabs = str + 1;
1415
+ /*
1416
+ * For backwards compatibility, a single list of flags with list of
1417
+ * slabs means debugging is only changed for those slabs, so the global
1418
+ * slub_debug should be unchanged (0 or DEBUG_DEFAULT_FLAGS, depending
1419
+ * on CONFIG_SLUB_DEBUG_ON). We can extended that to multiple lists as
1420
+ * long as there is no option specifying flags without a slab list.
1421
+ */
1422
+ if (slab_list_specified) {
1423
+ if (!global_slub_debug_changed)
1424
+ global_flags = slub_debug;
1425
+ slub_debug_string = saved_str;
1426
+ }
13051427 out:
1428
+ slub_debug = global_flags;
1429
+ if (slub_debug != 0 || slub_debug_string)
1430
+ static_branch_enable(&slub_debug_enabled);
13061431 if ((static_branch_unlikely(&init_on_alloc) ||
13071432 static_branch_unlikely(&init_on_free)) &&
13081433 (slub_debug & SLAB_POISON))
....@@ -1312,24 +1437,65 @@
13121437
13131438 __setup("slub_debug", setup_slub_debug);
13141439
1440
+/*
1441
+ * kmem_cache_flags - apply debugging options to the cache
1442
+ * @object_size: the size of an object without meta data
1443
+ * @flags: flags to set
1444
+ * @name: name of the cache
1445
+ *
1446
+ * Debug option(s) are applied to @flags. In addition to the debug
1447
+ * option(s), if a slab name (or multiple) is specified i.e.
1448
+ * slub_debug=<Debug-Options>,<slab name1>,<slab name2> ...
1449
+ * then only the select slabs will receive the debug option(s).
1450
+ */
13151451 slab_flags_t kmem_cache_flags(unsigned int object_size,
1316
- slab_flags_t flags, const char *name,
1317
- void (*ctor)(void *))
1452
+ slab_flags_t flags, const char *name)
13181453 {
1319
- /*
1320
- * Enable debugging if selected on the kernel commandline.
1321
- */
1322
- if (slub_debug && (!slub_debug_slabs || (name &&
1323
- !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))))
1324
- flags |= slub_debug;
1454
+ char *iter;
1455
+ size_t len;
1456
+ char *next_block;
1457
+ slab_flags_t block_flags;
13251458
1326
- return flags;
1459
+ len = strlen(name);
1460
+ next_block = slub_debug_string;
1461
+ /* Go through all blocks of debug options, see if any matches our slab's name */
1462
+ while (next_block) {
1463
+ next_block = parse_slub_debug_flags(next_block, &block_flags, &iter, false);
1464
+ if (!iter)
1465
+ continue;
1466
+ /* Found a block that has a slab list, search it */
1467
+ while (*iter) {
1468
+ char *end, *glob;
1469
+ size_t cmplen;
1470
+
1471
+ end = strchrnul(iter, ',');
1472
+ if (next_block && next_block < end)
1473
+ end = next_block - 1;
1474
+
1475
+ glob = strnchr(iter, end - iter, '*');
1476
+ if (glob)
1477
+ cmplen = glob - iter;
1478
+ else
1479
+ cmplen = max_t(size_t, len, (end - iter));
1480
+
1481
+ if (!strncmp(name, iter, cmplen)) {
1482
+ flags |= block_flags;
1483
+ return flags;
1484
+ }
1485
+
1486
+ if (!*end || *end == ';')
1487
+ break;
1488
+ iter = end + 1;
1489
+ }
1490
+ }
1491
+
1492
+ return flags | slub_debug;
13271493 }
13281494 #else /* !CONFIG_SLUB_DEBUG */
13291495 static inline void setup_object_debug(struct kmem_cache *s,
13301496 struct page *page, void *object) {}
1331
-static inline void setup_page_debug(struct kmem_cache *s,
1332
- void *addr, int order) {}
1497
+static inline
1498
+void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {}
13331499
13341500 static inline int alloc_debug_processing(struct kmem_cache *s,
13351501 struct page *page, void *object, unsigned long addr) { return 0; }
....@@ -1348,8 +1514,7 @@
13481514 static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
13491515 struct page *page) {}
13501516 slab_flags_t kmem_cache_flags(unsigned int object_size,
1351
- slab_flags_t flags, const char *name,
1352
- void (*ctor)(void *))
1517
+ slab_flags_t flags, const char *name)
13531518 {
13541519 return flags;
13551520 }
....@@ -1373,12 +1538,6 @@
13731538 }
13741539 #endif /* CONFIG_SLUB_DEBUG */
13751540
1376
-struct slub_free_list {
1377
- raw_spinlock_t lock;
1378
- struct list_head list;
1379
-};
1380
-static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
1381
-
13821541 /*
13831542 * Hooks for other subsystems that check memory allocations. In a typical
13841543 * production configuration these hooks all should produce no code at all.
....@@ -1386,6 +1545,7 @@
13861545 static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
13871546 {
13881547 ptr = kasan_kmalloc_large(ptr, size, flags);
1548
+ /* As ptr might get tagged, call kmemleak hook after KASAN. */
13891549 kmemleak_alloc(ptr, size, 1, flags);
13901550 return ptr;
13911551 }
....@@ -1393,10 +1553,11 @@
13931553 static __always_inline void kfree_hook(void *x)
13941554 {
13951555 kmemleak_free(x);
1396
- kasan_kfree_large(x, _RET_IP_);
1556
+ kasan_kfree_large(x);
13971557 }
13981558
1399
-static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x)
1559
+static __always_inline bool slab_free_hook(struct kmem_cache *s,
1560
+ void *x, bool init)
14001561 {
14011562 kmemleak_free_recursive(x, s->flags);
14021563
....@@ -1417,8 +1578,30 @@
14171578 if (!(s->flags & SLAB_DEBUG_OBJECTS))
14181579 debug_check_no_obj_freed(x, s->object_size);
14191580
1420
- /* KASAN might put x into memory quarantine, delaying its reuse */
1421
- return kasan_slab_free(s, x, _RET_IP_);
1581
+ /* Use KCSAN to help debug racy use-after-free. */
1582
+ if (!(s->flags & SLAB_TYPESAFE_BY_RCU))
1583
+ __kcsan_check_access(x, s->object_size,
1584
+ KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
1585
+
1586
+ /*
1587
+ * As memory initialization might be integrated into KASAN,
1588
+ * kasan_slab_free and initialization memset's must be
1589
+ * kept together to avoid discrepancies in behavior.
1590
+ *
1591
+ * The initialization memset's clear the object and the metadata,
1592
+ * but don't touch the SLAB redzone.
1593
+ */
1594
+ if (init) {
1595
+ int rsize;
1596
+
1597
+ if (!kasan_has_integrated_init())
1598
+ memset(kasan_reset_tag(x), 0, s->object_size);
1599
+ rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad : 0;
1600
+ memset((char *)kasan_reset_tag(x) + s->inuse, 0,
1601
+ s->size - s->inuse - rsize);
1602
+ }
1603
+ /* KASAN might put x into memory quarantine, delaying its reuse. */
1604
+ return kasan_slab_free(s, x, init);
14221605 }
14231606
14241607 static inline bool slab_free_freelist_hook(struct kmem_cache *s,
....@@ -1429,7 +1612,11 @@
14291612 void *object;
14301613 void *next = *head;
14311614 void *old_tail = *tail ? *tail : *head;
1432
- int rsize;
1615
+
1616
+ if (is_kfence_address(next)) {
1617
+ slab_free_hook(s, next, false);
1618
+ return true;
1619
+ }
14331620
14341621 /* Head and tail of the reconstructed freelist */
14351622 *head = NULL;
....@@ -1439,20 +1626,8 @@
14391626 object = next;
14401627 next = get_freepointer(s, object);
14411628
1442
- if (slab_want_init_on_free(s)) {
1443
- /*
1444
- * Clear the object and the metadata, but don't touch
1445
- * the redzone.
1446
- */
1447
- memset(object, 0, s->object_size);
1448
- rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad
1449
- : 0;
1450
- memset((char *)object + s->inuse, 0,
1451
- s->size - s->inuse - rsize);
1452
-
1453
- }
14541629 /* If object's reuse doesn't have to be delayed */
1455
- if (!slab_free_hook(s, object)) {
1630
+ if (!slab_free_hook(s, object, slab_want_init_on_free(s))) {
14561631 /* Move object to the new freelist */
14571632 set_freepointer(s, object, *head);
14581633 *head = object;
....@@ -1500,10 +1675,8 @@
15001675 else
15011676 page = __alloc_pages_node(node, flags, order);
15021677
1503
- if (page && memcg_charge_slab(page, flags, order, s)) {
1504
- __free_pages(page, order);
1505
- page = NULL;
1506
- }
1678
+ if (page)
1679
+ account_slab_page(page, order, s);
15071680
15081681 return page;
15091682 }
....@@ -1623,19 +1796,12 @@
16231796 struct kmem_cache_order_objects oo = s->oo;
16241797 gfp_t alloc_gfp;
16251798 void *start, *p, *next;
1626
- int idx, order;
1799
+ int idx;
16271800 bool shuffle;
1628
- bool enableirqs = false;
16291801
16301802 flags &= gfp_allowed_mask;
16311803
16321804 if (gfpflags_allow_blocking(flags))
1633
- enableirqs = true;
1634
-#ifdef CONFIG_PREEMPT_RT_FULL
1635
- if (system_state > SYSTEM_BOOTING)
1636
- enableirqs = true;
1637
-#endif
1638
- if (enableirqs)
16391805 local_irq_enable();
16401806
16411807 flags |= s->allocflags;
....@@ -1664,7 +1830,6 @@
16641830
16651831 page->objects = oo_objects(oo);
16661832
1667
- order = compound_order(page);
16681833 page->slab_cache = s;
16691834 __SetPageSlab(page);
16701835 if (page_is_pfmemalloc(page))
....@@ -1674,7 +1839,7 @@
16741839
16751840 start = page_address(page);
16761841
1677
- setup_page_debug(s, start, order);
1842
+ setup_page_debug(s, page, start);
16781843
16791844 shuffle = shuffle_freelist(s, page);
16801845
....@@ -1695,15 +1860,10 @@
16951860 page->frozen = 1;
16961861
16971862 out:
1698
- if (enableirqs)
1863
+ if (gfpflags_allow_blocking(flags))
16991864 local_irq_disable();
17001865 if (!page)
17011866 return NULL;
1702
-
1703
- mod_lruvec_page_state(page,
1704
- (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1705
- NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1706
- 1 << oo_order(oo));
17071867
17081868 inc_slabs_node(s, page_to_nid(page), page->objects);
17091869
....@@ -1712,13 +1872,8 @@
17121872
17131873 static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
17141874 {
1715
- if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
1716
- gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
1717
- flags &= ~GFP_SLAB_BUG_MASK;
1718
- pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
1719
- invalid_mask, &invalid_mask, flags, &flags);
1720
- dump_stack();
1721
- }
1875
+ if (unlikely(flags & GFP_SLAB_BUG_MASK))
1876
+ flags = kmalloc_fix_flags(flags);
17221877
17231878 return allocate_slab(s,
17241879 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
....@@ -1729,7 +1884,7 @@
17291884 int order = compound_order(page);
17301885 int pages = 1 << order;
17311886
1732
- if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1887
+ if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
17331888 void *p;
17341889
17351890 slab_pad_check(s, page);
....@@ -1738,29 +1893,14 @@
17381893 check_object(s, page, p, SLUB_RED_INACTIVE);
17391894 }
17401895
1741
- mod_lruvec_page_state(page,
1742
- (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1743
- NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1744
- -pages);
1745
-
17461896 __ClearPageSlabPfmemalloc(page);
17471897 __ClearPageSlab(page);
17481898
17491899 page->mapping = NULL;
17501900 if (current->reclaim_state)
17511901 current->reclaim_state->reclaimed_slab += pages;
1752
- memcg_uncharge_slab(page, order, s);
1902
+ unaccount_slab_page(page, order, s);
17531903 __free_pages(page, order);
1754
-}
1755
-
1756
-static void free_delayed(struct list_head *h)
1757
-{
1758
- while (!list_empty(h)) {
1759
- struct page *page = list_first_entry(h, struct page, lru);
1760
-
1761
- list_del(&page->lru);
1762
- __free_slab(page->slab_cache, page);
1763
- }
17641904 }
17651905
17661906 static void rcu_free_slab(struct rcu_head *h)
....@@ -1774,12 +1914,6 @@
17741914 {
17751915 if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
17761916 call_rcu(&page->rcu_head, rcu_free_slab);
1777
- } else if (irqs_disabled()) {
1778
- struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
1779
-
1780
- raw_spin_lock(&f->lock);
1781
- list_add(&page->lru, &f->list);
1782
- raw_spin_unlock(&f->lock);
17831917 } else
17841918 __free_slab(s, page);
17851919 }
....@@ -1798,9 +1932,9 @@
17981932 {
17991933 n->nr_partial++;
18001934 if (tail == DEACTIVATE_TO_TAIL)
1801
- list_add_tail(&page->lru, &n->partial);
1935
+ list_add_tail(&page->slab_list, &n->partial);
18021936 else
1803
- list_add(&page->lru, &n->partial);
1937
+ list_add(&page->slab_list, &n->partial);
18041938 }
18051939
18061940 static inline void add_partial(struct kmem_cache_node *n,
....@@ -1814,7 +1948,7 @@
18141948 struct page *page)
18151949 {
18161950 lockdep_assert_held(&n->list_lock);
1817
- list_del(&page->lru);
1951
+ list_del(&page->slab_list);
18181952 n->nr_partial--;
18191953 }
18201954
....@@ -1881,14 +2015,14 @@
18812015 /*
18822016 * Racy check. If we mistakenly see no partial slabs then we
18832017 * just allocate an empty slab. If we mistakenly try to get a
1884
- * partial slab and there is none available then get_partials()
2018
+ * partial slab and there is none available then get_partial()
18852019 * will return NULL.
18862020 */
18872021 if (!n || !n->nr_partial)
18882022 return NULL;
18892023
1890
- raw_spin_lock(&n->list_lock);
1891
- list_for_each_entry_safe(page, page2, &n->partial, lru) {
2024
+ spin_lock(&n->list_lock);
2025
+ list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
18922026 void *t;
18932027
18942028 if (!pfmemalloc_match(page, flags))
....@@ -1912,7 +2046,7 @@
19122046 break;
19132047
19142048 }
1915
- raw_spin_unlock(&n->list_lock);
2049
+ spin_unlock(&n->list_lock);
19162050 return object;
19172051 }
19182052
....@@ -1926,7 +2060,7 @@
19262060 struct zonelist *zonelist;
19272061 struct zoneref *z;
19282062 struct zone *zone;
1929
- enum zone_type high_zoneidx = gfp_zone(flags);
2063
+ enum zone_type highest_zoneidx = gfp_zone(flags);
19302064 void *object;
19312065 unsigned int cpuset_mems_cookie;
19322066
....@@ -1955,7 +2089,7 @@
19552089 do {
19562090 cpuset_mems_cookie = read_mems_allowed_begin();
19572091 zonelist = node_zonelist(mempolicy_slab_node(), flags);
1958
- for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
2092
+ for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
19592093 struct kmem_cache_node *n;
19602094
19612095 n = get_node(s, zone_to_nid(zone));
....@@ -1976,7 +2110,7 @@
19762110 }
19772111 }
19782112 } while (read_mems_allowed_retry(cpuset_mems_cookie));
1979
-#endif
2113
+#endif /* CONFIG_NUMA */
19802114 return NULL;
19812115 }
19822116
....@@ -1999,9 +2133,9 @@
19992133 return get_any_partial(s, flags, c);
20002134 }
20012135
2002
-#ifdef CONFIG_PREEMPT
2136
+#ifdef CONFIG_PREEMPTION
20032137 /*
2004
- * Calculate the next globally unique transaction for disambiguiation
2138
+ * Calculate the next globally unique transaction for disambiguation
20052139 * during cmpxchg. The transactions start with the cpu number and are then
20062140 * incremented by CONFIG_NR_CPUS.
20072141 */
....@@ -2019,6 +2153,7 @@
20192153 return tid + TID_STEP;
20202154 }
20212155
2156
+#ifdef SLUB_DEBUG_CMPXCHG
20222157 static inline unsigned int tid_to_cpu(unsigned long tid)
20232158 {
20242159 return tid % TID_STEP;
....@@ -2028,6 +2163,7 @@
20282163 {
20292164 return tid / TID_STEP;
20302165 }
2166
+#endif
20312167
20322168 static inline unsigned int init_tid(int cpu)
20332169 {
....@@ -2042,7 +2178,7 @@
20422178
20432179 pr_info("%s %s: cmpxchg redo ", n, s->name);
20442180
2045
-#ifdef CONFIG_PREEMPT
2181
+#ifdef CONFIG_PREEMPTION
20462182 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
20472183 pr_warn("due to cpu change %d -> %d\n",
20482184 tid_to_cpu(tid), tid_to_cpu(actual_tid));
....@@ -2160,46 +2296,37 @@
21602296 if (!lock) {
21612297 lock = 1;
21622298 /*
2163
- * Taking the spinlock removes the possiblity
2299
+ * Taking the spinlock removes the possibility
21642300 * that acquire_slab() will see a slab page that
21652301 * is frozen
21662302 */
2167
- raw_spin_lock(&n->list_lock);
2303
+ spin_lock(&n->list_lock);
21682304 }
21692305 } else {
21702306 m = M_FULL;
2171
- if (kmem_cache_debug(s) && !lock) {
2307
+#ifdef CONFIG_SLUB_DEBUG
2308
+ if ((s->flags & SLAB_STORE_USER) && !lock) {
21722309 lock = 1;
21732310 /*
21742311 * This also ensures that the scanning of full
21752312 * slabs from diagnostic functions will not see
21762313 * any frozen slabs.
21772314 */
2178
- raw_spin_lock(&n->list_lock);
2315
+ spin_lock(&n->list_lock);
21792316 }
2317
+#endif
21802318 }
21812319
21822320 if (l != m) {
2183
-
21842321 if (l == M_PARTIAL)
2185
-
21862322 remove_partial(n, page);
2187
-
21882323 else if (l == M_FULL)
2189
-
21902324 remove_full(s, n, page);
21912325
2192
- if (m == M_PARTIAL) {
2193
-
2326
+ if (m == M_PARTIAL)
21942327 add_partial(n, page, tail);
2195
- stat(s, tail);
2196
-
2197
- } else if (m == M_FULL) {
2198
-
2199
- stat(s, DEACTIVATE_FULL);
2328
+ else if (m == M_FULL)
22002329 add_full(s, n, page);
2201
-
2202
- }
22032330 }
22042331
22052332 l = m;
....@@ -2210,9 +2337,13 @@
22102337 goto redo;
22112338
22122339 if (lock)
2213
- raw_spin_unlock(&n->list_lock);
2340
+ spin_unlock(&n->list_lock);
22142341
2215
- if (m == M_FREE) {
2342
+ if (m == M_PARTIAL)
2343
+ stat(s, tail);
2344
+ else if (m == M_FULL)
2345
+ stat(s, DEACTIVATE_FULL);
2346
+ else if (m == M_FREE) {
22162347 stat(s, DEACTIVATE_EMPTY);
22172348 discard_slab(s, page);
22182349 stat(s, FREE_SLAB);
....@@ -2220,6 +2351,7 @@
22202351
22212352 c->page = NULL;
22222353 c->freelist = NULL;
2354
+ c->tid = next_tid(c->tid);
22232355 }
22242356
22252357 /*
....@@ -2236,19 +2368,19 @@
22362368 struct kmem_cache_node *n = NULL, *n2 = NULL;
22372369 struct page *page, *discard_page = NULL;
22382370
2239
- while ((page = c->partial)) {
2371
+ while ((page = slub_percpu_partial(c))) {
22402372 struct page new;
22412373 struct page old;
22422374
2243
- c->partial = page->next;
2375
+ slub_set_percpu_partial(c, page);
22442376
22452377 n2 = get_node(s, page_to_nid(page));
22462378 if (n != n2) {
22472379 if (n)
2248
- raw_spin_unlock(&n->list_lock);
2380
+ spin_unlock(&n->list_lock);
22492381
22502382 n = n2;
2251
- raw_spin_lock(&n->list_lock);
2383
+ spin_lock(&n->list_lock);
22522384 }
22532385
22542386 do {
....@@ -2277,7 +2409,7 @@
22772409 }
22782410
22792411 if (n)
2280
- raw_spin_unlock(&n->list_lock);
2412
+ spin_unlock(&n->list_lock);
22812413
22822414 while (discard_page) {
22832415 page = discard_page;
....@@ -2287,12 +2419,12 @@
22872419 discard_slab(s, page);
22882420 stat(s, FREE_SLAB);
22892421 }
2290
-#endif
2422
+#endif /* CONFIG_SLUB_CPU_PARTIAL */
22912423 }
22922424
22932425 /*
2294
- * Put a page that was just frozen (in __slab_free) into a partial page
2295
- * slot if available.
2426
+ * Put a page that was just frozen (in __slab_free|get_partial_node) into a
2427
+ * partial page slot if available.
22962428 *
22972429 * If we did not find a slot then simply move all the partials to the
22982430 * per node partial list.
....@@ -2313,22 +2445,15 @@
23132445 if (oldpage) {
23142446 pobjects = oldpage->pobjects;
23152447 pages = oldpage->pages;
2316
- if (drain && pobjects > s->cpu_partial) {
2317
- struct slub_free_list *f;
2448
+ if (drain && pobjects > slub_cpu_partial(s)) {
23182449 unsigned long flags;
2319
- LIST_HEAD(tofree);
23202450 /*
23212451 * partial array is full. Move the existing
23222452 * set to the per node partial list.
23232453 */
23242454 local_irq_save(flags);
23252455 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2326
- f = this_cpu_ptr(&slub_free_list);
2327
- raw_spin_lock(&f->lock);
2328
- list_splice_init(&f->list, &tofree);
2329
- raw_spin_unlock(&f->lock);
23302456 local_irq_restore(flags);
2331
- free_delayed(&tofree);
23322457 oldpage = NULL;
23332458 pobjects = 0;
23342459 pages = 0;
....@@ -2345,7 +2470,7 @@
23452470
23462471 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
23472472 != oldpage);
2348
- if (unlikely(!s->cpu_partial)) {
2473
+ if (unlikely(!slub_cpu_partial(s))) {
23492474 unsigned long flags;
23502475
23512476 local_irq_save(flags);
....@@ -2353,15 +2478,13 @@
23532478 local_irq_restore(flags);
23542479 }
23552480 preempt_enable();
2356
-#endif
2481
+#endif /* CONFIG_SLUB_CPU_PARTIAL */
23572482 }
23582483
23592484 static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
23602485 {
23612486 stat(s, CPUSLAB_FLUSH);
23622487 deactivate_slab(s, c->page, c->freelist, c);
2363
-
2364
- c->tid = next_tid(c->tid);
23652488 }
23662489
23672490 /*
....@@ -2373,12 +2496,10 @@
23732496 {
23742497 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
23752498
2376
- if (likely(c)) {
2377
- if (c->page)
2378
- flush_slab(s, c);
2499
+ if (c->page)
2500
+ flush_slab(s, c);
23792501
2380
- unfreeze_partials(s, c);
2381
- }
2502
+ unfreeze_partials(s, c);
23822503 }
23832504
23842505 static void flush_cpu_slab(void *d)
....@@ -2398,19 +2519,7 @@
23982519
23992520 static void flush_all(struct kmem_cache *s)
24002521 {
2401
- LIST_HEAD(tofree);
2402
- int cpu;
2403
-
2404
- on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2405
- for_each_online_cpu(cpu) {
2406
- struct slub_free_list *f;
2407
-
2408
- f = &per_cpu(slub_free_list, cpu);
2409
- raw_spin_lock_irq(&f->lock);
2410
- list_splice_init(&f->list, &tofree);
2411
- raw_spin_unlock_irq(&f->lock);
2412
- free_delayed(&tofree);
2413
- }
2522
+ on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
24142523 }
24152524
24162525 /*
....@@ -2439,7 +2548,7 @@
24392548 static inline int node_match(struct page *page, int node)
24402549 {
24412550 #ifdef CONFIG_NUMA
2442
- if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node))
2551
+ if (node != NUMA_NO_NODE && page_to_nid(page) != node)
24432552 return 0;
24442553 #endif
24452554 return 1;
....@@ -2465,10 +2574,10 @@
24652574 unsigned long x = 0;
24662575 struct page *page;
24672576
2468
- raw_spin_lock_irqsave(&n->list_lock, flags);
2469
- list_for_each_entry(page, &n->partial, lru)
2577
+ spin_lock_irqsave(&n->list_lock, flags);
2578
+ list_for_each_entry(page, &n->partial, slab_list)
24702579 x += get_count(page);
2471
- raw_spin_unlock_irqrestore(&n->list_lock, flags);
2580
+ spin_unlock_irqrestore(&n->list_lock, flags);
24722581 return x;
24732582 }
24742583 #endif /* CONFIG_SLUB_DEBUG || CONFIG_SLUB_SYSFS */
....@@ -2540,8 +2649,7 @@
25402649 stat(s, ALLOC_SLAB);
25412650 c->page = page;
25422651 *pc = c;
2543
- } else
2544
- freelist = NULL;
2652
+ }
25452653
25462654 return freelist;
25472655 }
....@@ -2608,12 +2716,12 @@
26082716 * already disabled (which is the case for bulk allocation).
26092717 */
26102718 static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2611
- unsigned long addr, struct kmem_cache_cpu *c,
2612
- struct list_head *to_free)
2719
+ unsigned long addr, struct kmem_cache_cpu *c)
26132720 {
2614
- struct slub_free_list *f;
26152721 void *freelist;
26162722 struct page *page;
2723
+
2724
+ stat(s, ALLOC_SLOWPATH);
26172725
26182726 page = c->page;
26192727 if (!page) {
....@@ -2662,6 +2770,7 @@
26622770
26632771 if (!freelist) {
26642772 c->page = NULL;
2773
+ c->tid = next_tid(c->tid);
26652774 stat(s, DEACTIVATE_BYPASS);
26662775 goto new_slab;
26672776 }
....@@ -2677,13 +2786,6 @@
26772786 VM_BUG_ON(!c->page->frozen);
26782787 c->freelist = get_freepointer(s, freelist);
26792788 c->tid = next_tid(c->tid);
2680
-
2681
-out:
2682
- f = this_cpu_ptr(&slub_free_list);
2683
- raw_spin_lock(&f->lock);
2684
- list_splice_init(&f->list, to_free);
2685
- raw_spin_unlock(&f->lock);
2686
-
26872789 return freelist;
26882790
26892791 new_slab:
....@@ -2699,7 +2801,7 @@
26992801
27002802 if (unlikely(!freelist)) {
27012803 slab_out_of_memory(s, gfpflags, node);
2702
- goto out;
2804
+ return NULL;
27032805 }
27042806
27052807 page = c->page;
....@@ -2712,7 +2814,7 @@
27122814 goto new_slab; /* Slab failed checks. Next slab needed */
27132815
27142816 deactivate_slab(s, page, get_freepointer(s, freelist), c);
2715
- goto out;
2817
+ return freelist;
27162818 }
27172819
27182820 /*
....@@ -2724,10 +2826,9 @@
27242826 {
27252827 void *p;
27262828 unsigned long flags;
2727
- LIST_HEAD(tofree);
27282829
27292830 local_irq_save(flags);
2730
-#ifdef CONFIG_PREEMPT
2831
+#ifdef CONFIG_PREEMPTION
27312832 /*
27322833 * We may have been preempted and rescheduled on a different
27332834 * cpu before disabling interrupts. Need to reload cpu area
....@@ -2736,9 +2837,8 @@
27362837 c = this_cpu_ptr(s->cpu_slab);
27372838 #endif
27382839
2739
- p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
2840
+ p = ___slab_alloc(s, gfpflags, node, addr, c);
27402841 local_irq_restore(flags);
2741
- free_delayed(&tofree);
27422842 return p;
27432843 }
27442844
....@@ -2750,7 +2850,8 @@
27502850 void *obj)
27512851 {
27522852 if (unlikely(slab_want_init_on_free(s)) && obj)
2753
- memset((void *)((char *)obj + s->offset), 0, sizeof(void *));
2853
+ memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
2854
+ 0, sizeof(void *));
27542855 }
27552856
27562857 /*
....@@ -2764,16 +2865,23 @@
27642865 * Otherwise we can simply pick the next object from the lockless free list.
27652866 */
27662867 static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2767
- gfp_t gfpflags, int node, unsigned long addr)
2868
+ gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
27682869 {
27692870 void *object;
27702871 struct kmem_cache_cpu *c;
27712872 struct page *page;
27722873 unsigned long tid;
2874
+ struct obj_cgroup *objcg = NULL;
2875
+ bool init = false;
27732876
2774
- s = slab_pre_alloc_hook(s, gfpflags);
2877
+ s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
27752878 if (!s)
27762879 return NULL;
2880
+
2881
+ object = kfence_alloc(s, orig_size, gfpflags);
2882
+ if (unlikely(object))
2883
+ goto out;
2884
+
27772885 redo:
27782886 /*
27792887 * Must read kmem_cache cpu data via this cpu ptr. Preemption is
....@@ -2782,13 +2890,13 @@
27822890 * as we end up on the original cpu again when doing the cmpxchg.
27832891 *
27842892 * We should guarantee that tid and kmem_cache are retrieved on
2785
- * the same cpu. It could be different if CONFIG_PREEMPT so we need
2893
+ * the same cpu. It could be different if CONFIG_PREEMPTION so we need
27862894 * to check if it is matched or not.
27872895 */
27882896 do {
27892897 tid = this_cpu_read(s->cpu_slab->tid);
27902898 c = raw_cpu_ptr(s->cpu_slab);
2791
- } while (IS_ENABLED(CONFIG_PREEMPT) &&
2899
+ } while (IS_ENABLED(CONFIG_PREEMPTION) &&
27922900 unlikely(tid != READ_ONCE(c->tid)));
27932901
27942902 /*
....@@ -2810,9 +2918,8 @@
28102918
28112919 object = c->freelist;
28122920 page = c->page;
2813
- if (unlikely(!object || !node_match(page, node))) {
2921
+ if (unlikely(!object || !page || !node_match(page, node))) {
28142922 object = __slab_alloc(s, gfpflags, node, addr, c);
2815
- stat(s, ALLOC_SLOWPATH);
28162923 } else {
28172924 void *next_object = get_freepointer_safe(s, object);
28182925
....@@ -2843,24 +2950,23 @@
28432950 }
28442951
28452952 maybe_wipe_obj_freeptr(s, object);
2953
+ init = slab_want_init_on_alloc(gfpflags, s);
28462954
2847
- if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
2848
- memset(object, 0, s->object_size);
2849
-
2850
- slab_post_alloc_hook(s, gfpflags, 1, &object);
2955
+out:
2956
+ slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);
28512957
28522958 return object;
28532959 }
28542960
28552961 static __always_inline void *slab_alloc(struct kmem_cache *s,
2856
- gfp_t gfpflags, unsigned long addr)
2962
+ gfp_t gfpflags, unsigned long addr, size_t orig_size)
28572963 {
2858
- return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
2964
+ return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr, orig_size);
28592965 }
28602966
28612967 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
28622968 {
2863
- void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2969
+ void *ret = slab_alloc(s, gfpflags, _RET_IP_, s->object_size);
28642970
28652971 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
28662972 s->size, gfpflags);
....@@ -2872,7 +2978,7 @@
28722978 #ifdef CONFIG_TRACING
28732979 void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
28742980 {
2875
- void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2981
+ void *ret = slab_alloc(s, gfpflags, _RET_IP_, size);
28762982 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
28772983 ret = kasan_kmalloc(s, ret, size, gfpflags);
28782984 return ret;
....@@ -2883,7 +2989,7 @@
28832989 #ifdef CONFIG_NUMA
28842990 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
28852991 {
2886
- void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2992
+ void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, s->object_size);
28872993
28882994 trace_kmem_cache_alloc_node(_RET_IP_, ret,
28892995 s->object_size, s->size, gfpflags, node);
....@@ -2897,7 +3003,7 @@
28973003 gfp_t gfpflags,
28983004 int node, size_t size)
28993005 {
2900
- void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
3006
+ void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, size);
29013007
29023008 trace_kmalloc_node(_RET_IP_, ret,
29033009 size, s->size, gfpflags, node);
....@@ -2907,7 +3013,7 @@
29073013 }
29083014 EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
29093015 #endif
2910
-#endif
3016
+#endif /* CONFIG_NUMA */
29113017
29123018 /*
29133019 * Slow path handling. This may still be called frequently since objects
....@@ -2927,9 +3033,12 @@
29273033 struct page new;
29283034 unsigned long counters;
29293035 struct kmem_cache_node *n = NULL;
2930
- unsigned long uninitialized_var(flags);
3036
+ unsigned long flags;
29313037
29323038 stat(s, FREE_SLOWPATH);
3039
+
3040
+ if (kfence_free(head))
3041
+ return;
29333042
29343043 if (kmem_cache_debug(s) &&
29353044 !free_debug_processing(s, page, head, tail, cnt, addr))
....@@ -2937,7 +3046,7 @@
29373046
29383047 do {
29393048 if (unlikely(n)) {
2940
- raw_spin_unlock_irqrestore(&n->list_lock, flags);
3049
+ spin_unlock_irqrestore(&n->list_lock, flags);
29413050 n = NULL;
29423051 }
29433052 prior = page->freelist;
....@@ -2969,7 +3078,7 @@
29693078 * Otherwise the list_lock will synchronize with
29703079 * other processors updating the list of slabs.
29713080 */
2972
- raw_spin_lock_irqsave(&n->list_lock, flags);
3081
+ spin_lock_irqsave(&n->list_lock, flags);
29733082
29743083 }
29753084 }
....@@ -2981,20 +3090,21 @@
29813090
29823091 if (likely(!n)) {
29833092
2984
- /*
2985
- * If we just froze the page then put it onto the
2986
- * per cpu partial list.
2987
- */
2988
- if (new.frozen && !was_frozen) {
3093
+ if (likely(was_frozen)) {
3094
+ /*
3095
+ * The list lock was not taken therefore no list
3096
+ * activity can be necessary.
3097
+ */
3098
+ stat(s, FREE_FROZEN);
3099
+ } else if (new.frozen) {
3100
+ /*
3101
+ * If we just froze the page then put it onto the
3102
+ * per cpu partial list.
3103
+ */
29893104 put_cpu_partial(s, page, 1);
29903105 stat(s, CPU_PARTIAL_FREE);
29913106 }
2992
- /*
2993
- * The list lock was not taken therefore no list
2994
- * activity can be necessary.
2995
- */
2996
- if (was_frozen)
2997
- stat(s, FREE_FROZEN);
3107
+
29983108 return;
29993109 }
30003110
....@@ -3006,12 +3116,11 @@
30063116 * then add it.
30073117 */
30083118 if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
3009
- if (kmem_cache_debug(s))
3010
- remove_full(s, n, page);
3119
+ remove_full(s, n, page);
30113120 add_partial(n, page, DEACTIVATE_TO_TAIL);
30123121 stat(s, FREE_ADD_PARTIAL);
30133122 }
3014
- raw_spin_unlock_irqrestore(&n->list_lock, flags);
3123
+ spin_unlock_irqrestore(&n->list_lock, flags);
30153124 return;
30163125
30173126 slab_empty:
....@@ -3026,7 +3135,7 @@
30263135 remove_full(s, n, page);
30273136 }
30283137
3029
- raw_spin_unlock_irqrestore(&n->list_lock, flags);
3138
+ spin_unlock_irqrestore(&n->list_lock, flags);
30303139 stat(s, FREE_SLAB);
30313140 discard_slab(s, page);
30323141 }
....@@ -3053,6 +3162,10 @@
30533162 void *tail_obj = tail ? : head;
30543163 struct kmem_cache_cpu *c;
30553164 unsigned long tid;
3165
+
3166
+ /* memcg_slab_free_hook() is already called for bulk free. */
3167
+ if (!tail)
3168
+ memcg_slab_free_hook(s, &head, 1);
30563169 redo:
30573170 /*
30583171 * Determine the currently cpus per cpu slab.
....@@ -3063,7 +3176,7 @@
30633176 do {
30643177 tid = this_cpu_read(s->cpu_slab->tid);
30653178 c = raw_cpu_ptr(s->cpu_slab);
3066
- } while (IS_ENABLED(CONFIG_PREEMPT) &&
3179
+ } while (IS_ENABLED(CONFIG_PREEMPTION) &&
30673180 unlikely(tid != READ_ONCE(c->tid)));
30683181
30693182 /* Same with comment on barrier() in slab_alloc_node() */
....@@ -3173,6 +3286,13 @@
31733286 df->s = cache_from_obj(s, object); /* Support for memcg */
31743287 }
31753288
3289
+ if (is_kfence_address(object)) {
3290
+ slab_free_hook(df->s, object, false);
3291
+ __kfence_free(object);
3292
+ p[size] = NULL; /* mark object processed */
3293
+ return size;
3294
+ }
3295
+
31763296 /* Start new detached freelist */
31773297 df->page = page;
31783298 set_freepointer(df->s, object, NULL);
....@@ -3214,6 +3334,7 @@
32143334 if (WARN_ON(!size))
32153335 return;
32163336
3337
+ memcg_slab_free_hook(s, p, size);
32173338 do {
32183339 struct detached_freelist df;
32193340
....@@ -3231,11 +3352,11 @@
32313352 void **p)
32323353 {
32333354 struct kmem_cache_cpu *c;
3234
- LIST_HEAD(to_free);
32353355 int i;
3356
+ struct obj_cgroup *objcg = NULL;
32363357
32373358 /* memcg and kmem_cache debug support */
3238
- s = slab_pre_alloc_hook(s, flags);
3359
+ s = slab_pre_alloc_hook(s, &objcg, size, flags);
32393360 if (unlikely(!s))
32403361 return false;
32413362 /*
....@@ -3247,8 +3368,14 @@
32473368 c = this_cpu_ptr(s->cpu_slab);
32483369
32493370 for (i = 0; i < size; i++) {
3250
- void *object = c->freelist;
3371
+ void *object = kfence_alloc(s, s->object_size, flags);
32513372
3373
+ if (unlikely(object)) {
3374
+ p[i] = object;
3375
+ continue;
3376
+ }
3377
+
3378
+ object = c->freelist;
32523379 if (unlikely(!object)) {
32533380 /*
32543381 * We may have removed an object from c->freelist using
....@@ -3264,7 +3391,7 @@
32643391 * of re-populating per CPU c->freelist
32653392 */
32663393 p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
3267
- _RET_IP_, c, &to_free);
3394
+ _RET_IP_, c);
32683395 if (unlikely(!p[i]))
32693396 goto error;
32703397
....@@ -3279,23 +3406,17 @@
32793406 }
32803407 c->tid = next_tid(c->tid);
32813408 local_irq_enable();
3282
- free_delayed(&to_free);
32833409
3284
- /* Clear memory outside IRQ disabled fastpath loop */
3285
- if (unlikely(slab_want_init_on_alloc(flags, s))) {
3286
- int j;
3287
-
3288
- for (j = 0; j < i; j++)
3289
- memset(p[j], 0, s->object_size);
3290
- }
3291
-
3292
- /* memcg and kmem_cache debug support */
3293
- slab_post_alloc_hook(s, flags, size, p);
3410
+ /*
3411
+ * memcg and kmem_cache debug support and memory initialization.
3412
+ * Done outside of the IRQ disabled fastpath loop.
3413
+ */
3414
+ slab_post_alloc_hook(s, objcg, flags, size, p,
3415
+ slab_want_init_on_alloc(flags, s));
32943416 return i;
32953417 error:
32963418 local_irq_enable();
3297
- free_delayed(&to_free);
3298
- slab_post_alloc_hook(s, flags, i, p);
3419
+ slab_post_alloc_hook(s, objcg, flags, i, p, false);
32993420 __kmem_cache_free_bulk(s, i, p);
33003421 return 0;
33013422 }
....@@ -3430,7 +3551,7 @@
34303551 init_kmem_cache_node(struct kmem_cache_node *n)
34313552 {
34323553 n->nr_partial = 0;
3433
- raw_spin_lock_init(&n->list_lock);
3554
+ spin_lock_init(&n->list_lock);
34343555 INIT_LIST_HEAD(&n->partial);
34353556 #ifdef CONFIG_SLUB_DEBUG
34363557 atomic_long_set(&n->nr_slabs, 0);
....@@ -3491,8 +3612,7 @@
34913612 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
34923613 init_tracking(kmem_cache_node, n);
34933614 #endif
3494
- n = kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
3495
- GFP_KERNEL);
3615
+ n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false);
34963616 page->freelist = get_freepointer(kmem_cache_node, n);
34973617 page->inuse = 1;
34983618 page->frozen = 0;
....@@ -3580,15 +3700,15 @@
35803700 * 50% to keep some capacity around for frees.
35813701 */
35823702 if (!kmem_cache_has_cpu_partial(s))
3583
- s->cpu_partial = 0;
3703
+ slub_set_cpu_partial(s, 0);
35843704 else if (s->size >= PAGE_SIZE)
3585
- s->cpu_partial = 2;
3705
+ slub_set_cpu_partial(s, 2);
35863706 else if (s->size >= 1024)
3587
- s->cpu_partial = 6;
3707
+ slub_set_cpu_partial(s, 6);
35883708 else if (s->size >= 256)
3589
- s->cpu_partial = 13;
3709
+ slub_set_cpu_partial(s, 13);
35903710 else
3591
- s->cpu_partial = 30;
3711
+ slub_set_cpu_partial(s, 30);
35923712 #endif
35933713 }
35943714
....@@ -3633,22 +3753,36 @@
36333753
36343754 /*
36353755 * With that we have determined the number of bytes in actual use
3636
- * by the object. This is the potential offset to the free pointer.
3756
+ * by the object and redzoning.
36373757 */
36383758 s->inuse = size;
36393759
3640
- if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
3641
- s->ctor)) {
3760
+ if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
3761
+ ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
3762
+ s->ctor) {
36423763 /*
36433764 * Relocate free pointer after the object if it is not
36443765 * permitted to overwrite the first word of the object on
36453766 * kmem_cache_free.
36463767 *
36473768 * This is the case if we do RCU, have a constructor or
3648
- * destructor or are poisoning the objects.
3769
+ * destructor, are poisoning the objects, or are
3770
+ * redzoning an object smaller than sizeof(void *).
3771
+ *
3772
+ * The assumption that s->offset >= s->inuse means free
3773
+ * pointer is outside of the object is used in the
3774
+ * freeptr_outside_object() function. If that is no
3775
+ * longer true, the function needs to be modified.
36493776 */
36503777 s->offset = size;
36513778 size += sizeof(void *);
3779
+ } else {
3780
+ /*
3781
+ * Store freelist pointer near middle of object to keep
3782
+ * it away from the edges of the object to avoid small
3783
+ * sized over/underflows from neighboring allocations.
3784
+ */
3785
+ s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *));
36523786 }
36533787
36543788 #ifdef CONFIG_SLUB_DEBUG
....@@ -3685,6 +3819,7 @@
36853819 */
36863820 size = ALIGN(size, s->align);
36873821 s->size = size;
3822
+ s->reciprocal_size = reciprocal_value(size);
36883823 if (forced_order >= 0)
36893824 order = forced_order;
36903825 else
....@@ -3719,7 +3854,7 @@
37193854
37203855 static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
37213856 {
3722
- s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
3857
+ s->flags = kmem_cache_flags(s->size, flags, s->name);
37233858 #ifdef CONFIG_SLAB_FREELIST_HARDENED
37243859 s->random = get_random_long();
37253860 #endif
....@@ -3770,45 +3905,32 @@
37703905 if (alloc_kmem_cache_cpus(s))
37713906 return 0;
37723907
3773
- free_kmem_cache_nodes(s);
37743908 error:
3775
- if (flags & SLAB_PANIC)
3776
- panic("Cannot create slab %s size=%u realsize=%u order=%u offset=%u flags=%lx\n",
3777
- s->name, s->size, s->size,
3778
- oo_order(s->oo), s->offset, (unsigned long)flags);
3909
+ __kmem_cache_release(s);
37793910 return -EINVAL;
37803911 }
37813912
37823913 static void list_slab_objects(struct kmem_cache *s, struct page *page,
3783
- const char *text)
3914
+ const char *text)
37843915 {
37853916 #ifdef CONFIG_SLUB_DEBUG
3786
-#ifdef CONFIG_PREEMPT_RT_BASE
3787
- /* XXX move out of irq-off section */
3788
- slab_err(s, page, text, s->name);
3789
-#else
3790
-
37913917 void *addr = page_address(page);
3918
+ unsigned long *map;
37923919 void *p;
3793
- unsigned long *map = kcalloc(BITS_TO_LONGS(page->objects),
3794
- sizeof(long),
3795
- GFP_ATOMIC);
3796
- if (!map)
3797
- return;
3920
+
37983921 slab_err(s, page, text, s->name);
37993922 slab_lock(page);
38003923
3801
- get_map(s, page, map);
3924
+ map = get_map(s, page);
38023925 for_each_object(p, s, addr, page->objects) {
38033926
3804
- if (!test_bit(slab_index(p, s, addr), map)) {
3927
+ if (!test_bit(__obj_to_index(s, addr, p), map)) {
38053928 pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
38063929 print_tracking(s, p);
38073930 }
38083931 }
3932
+ put_map(map);
38093933 slab_unlock(page);
3810
- kfree(map);
3811
-#endif
38123934 #endif
38133935 }
38143936
....@@ -3823,19 +3945,19 @@
38233945 struct page *page, *h;
38243946
38253947 BUG_ON(irqs_disabled());
3826
- raw_spin_lock_irq(&n->list_lock);
3827
- list_for_each_entry_safe(page, h, &n->partial, lru) {
3948
+ spin_lock_irq(&n->list_lock);
3949
+ list_for_each_entry_safe(page, h, &n->partial, slab_list) {
38283950 if (!page->inuse) {
38293951 remove_partial(n, page);
3830
- list_add(&page->lru, &discard);
3952
+ list_add(&page->slab_list, &discard);
38313953 } else {
38323954 list_slab_objects(s, page,
3833
- "Objects remaining in %s on __kmem_cache_shutdown()");
3955
+ "Objects remaining in %s on __kmem_cache_shutdown()");
38343956 }
38353957 }
3836
- raw_spin_unlock_irq(&n->list_lock);
3958
+ spin_unlock_irq(&n->list_lock);
38373959
3838
- list_for_each_entry_safe(page, h, &discard, lru)
3960
+ list_for_each_entry_safe(page, h, &discard, slab_list)
38393961 discard_slab(s, page);
38403962 }
38413963
....@@ -3865,7 +3987,6 @@
38653987 if (n->nr_partial || slabs_node(s, node))
38663988 return 1;
38673989 }
3868
- sysfs_slab_remove(s);
38693990 return 0;
38703991 }
38713992
....@@ -3914,7 +4035,7 @@
39144035 if (unlikely(ZERO_OR_NULL_PTR(s)))
39154036 return s;
39164037
3917
- ret = slab_alloc(s, flags, _RET_IP_);
4038
+ ret = slab_alloc(s, flags, _RET_IP_, size);
39184039
39194040 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
39204041
....@@ -3929,11 +4050,15 @@
39294050 {
39304051 struct page *page;
39314052 void *ptr = NULL;
4053
+ unsigned int order = get_order(size);
39324054
39334055 flags |= __GFP_COMP;
3934
- page = alloc_pages_node(node, flags, get_order(size));
3935
- if (page)
4056
+ page = alloc_pages_node(node, flags, order);
4057
+ if (page) {
39364058 ptr = page_address(page);
4059
+ mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
4060
+ PAGE_SIZE << order);
4061
+ }
39374062
39384063 return kmalloc_large_node_hook(ptr, size, flags);
39394064 }
....@@ -3958,7 +4083,7 @@
39584083 if (unlikely(ZERO_OR_NULL_PTR(s)))
39594084 return s;
39604085
3961
- ret = slab_alloc_node(s, flags, node, _RET_IP_);
4086
+ ret = slab_alloc_node(s, flags, node, _RET_IP_, size);
39624087
39634088 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
39644089
....@@ -3967,7 +4092,7 @@
39674092 return ret;
39684093 }
39694094 EXPORT_SYMBOL(__kmalloc_node);
3970
-#endif
4095
+#endif /* CONFIG_NUMA */
39714096
39724097 #ifdef CONFIG_HARDENED_USERCOPY
39734098 /*
....@@ -3984,6 +4109,7 @@
39844109 struct kmem_cache *s;
39854110 unsigned int offset;
39864111 size_t object_size;
4112
+ bool is_kfence = is_kfence_address(ptr);
39874113
39884114 ptr = kasan_reset_tag(ptr);
39894115
....@@ -3996,10 +4122,13 @@
39964122 to_user, 0, n);
39974123
39984124 /* Find offset within object. */
3999
- offset = (ptr - page_address(page)) % s->size;
4125
+ if (is_kfence)
4126
+ offset = ptr - kfence_object_start(ptr);
4127
+ else
4128
+ offset = (ptr - page_address(page)) % s->size;
40004129
40014130 /* Adjust for redzone and reject if within the redzone. */
4002
- if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
4131
+ if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
40034132 if (offset < s->red_left_pad)
40044133 usercopy_abort("SLUB object in left red zone",
40054134 s->name, to_user, offset, n);
....@@ -4029,7 +4158,7 @@
40294158 }
40304159 #endif /* CONFIG_HARDENED_USERCOPY */
40314160
4032
-static size_t __ksize(const void *object)
4161
+size_t __ksize(const void *object)
40334162 {
40344163 struct page *page;
40354164
....@@ -4040,22 +4169,12 @@
40404169
40414170 if (unlikely(!PageSlab(page))) {
40424171 WARN_ON(!PageCompound(page));
4043
- return PAGE_SIZE << compound_order(page);
4172
+ return page_size(page);
40444173 }
40454174
40464175 return slab_ksize(page->slab_cache);
40474176 }
4048
-
4049
-size_t ksize(const void *object)
4050
-{
4051
- size_t size = __ksize(object);
4052
- /* We assume that ksize callers could use whole allocated area,
4053
- * so we need to unpoison this area.
4054
- */
4055
- kasan_unpoison_shadow(object, size);
4056
- return size;
4057
-}
4058
-EXPORT_SYMBOL(ksize);
4177
+EXPORT_SYMBOL(__ksize);
40594178
40604179 void kfree(const void *x)
40614180 {
....@@ -4069,9 +4188,13 @@
40694188
40704189 page = virt_to_head_page(x);
40714190 if (unlikely(!PageSlab(page))) {
4191
+ unsigned int order = compound_order(page);
4192
+
40724193 BUG_ON(!PageCompound(page));
40734194 kfree_hook(object);
4074
- __free_pages(page, compound_order(page));
4195
+ mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
4196
+ -(PAGE_SIZE << order));
4197
+ __free_pages(page, order);
40754198 return;
40764199 }
40774200 slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
....@@ -4107,7 +4230,7 @@
41074230 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
41084231 INIT_LIST_HEAD(promote + i);
41094232
4110
- raw_spin_lock_irqsave(&n->list_lock, flags);
4233
+ spin_lock_irqsave(&n->list_lock, flags);
41114234
41124235 /*
41134236 * Build lists of slabs to discard or promote.
....@@ -4115,7 +4238,7 @@
41154238 * Note that concurrent frees may occur while we hold the
41164239 * list_lock. page->inuse here is the upper limit.
41174240 */
4118
- list_for_each_entry_safe(page, t, &n->partial, lru) {
4241
+ list_for_each_entry_safe(page, t, &n->partial, slab_list) {
41194242 int free = page->objects - page->inuse;
41204243
41214244 /* Do not reread page->inuse */
....@@ -4125,10 +4248,10 @@
41254248 BUG_ON(free <= 0);
41264249
41274250 if (free == page->objects) {
4128
- list_move(&page->lru, &discard);
4251
+ list_move(&page->slab_list, &discard);
41294252 n->nr_partial--;
41304253 } else if (free <= SHRINK_PROMOTE_MAX)
4131
- list_move(&page->lru, promote + free - 1);
4254
+ list_move(&page->slab_list, promote + free - 1);
41324255 }
41334256
41344257 /*
....@@ -4138,10 +4261,10 @@
41384261 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
41394262 list_splice(promote + i, &n->partial);
41404263
4141
- raw_spin_unlock_irqrestore(&n->list_lock, flags);
4264
+ spin_unlock_irqrestore(&n->list_lock, flags);
41424265
41434266 /* Release empty slabs */
4144
- list_for_each_entry_safe(page, t, &discard, lru)
4267
+ list_for_each_entry_safe(page, t, &discard, slab_list)
41454268 discard_slab(s, page);
41464269
41474270 if (slabs_node(s, node))
....@@ -4150,42 +4273,6 @@
41504273
41514274 return ret;
41524275 }
4153
-
4154
-#ifdef CONFIG_MEMCG
4155
-static void kmemcg_cache_deact_after_rcu(struct kmem_cache *s)
4156
-{
4157
- /*
4158
- * Called with all the locks held after a sched RCU grace period.
4159
- * Even if @s becomes empty after shrinking, we can't know that @s
4160
- * doesn't have allocations already in-flight and thus can't
4161
- * destroy @s until the associated memcg is released.
4162
- *
4163
- * However, let's remove the sysfs files for empty caches here.
4164
- * Each cache has a lot of interface files which aren't
4165
- * particularly useful for empty draining caches; otherwise, we can
4166
- * easily end up with millions of unnecessary sysfs files on
4167
- * systems which have a lot of memory and transient cgroups.
4168
- */
4169
- if (!__kmem_cache_shrink(s))
4170
- sysfs_slab_remove(s);
4171
-}
4172
-
4173
-void __kmemcg_cache_deactivate(struct kmem_cache *s)
4174
-{
4175
- /*
4176
- * Disable empty slabs caching. Used to avoid pinning offline
4177
- * memory cgroups by kmem pages that can be freed.
4178
- */
4179
- slub_set_cpu_partial(s, 0);
4180
- s->min_partial = 0;
4181
-
4182
- /*
4183
- * s->cpu_partial is checked locklessly (see put_cpu_partial), so
4184
- * we have to make sure the change is visible before shrinking.
4185
- */
4186
- slab_deactivate_memcg_cache_rcu_sched(s, kmemcg_cache_deact_after_rcu);
4187
-}
4188
-#endif
41894276
41904277 static int slab_mem_going_offline_callback(void *arg)
41914278 {
....@@ -4333,17 +4420,15 @@
43334420 for_each_kmem_cache_node(s, node, n) {
43344421 struct page *p;
43354422
4336
- list_for_each_entry(p, &n->partial, lru)
4423
+ list_for_each_entry(p, &n->partial, slab_list)
43374424 p->slab_cache = s;
43384425
43394426 #ifdef CONFIG_SLUB_DEBUG
4340
- list_for_each_entry(p, &n->full, lru)
4427
+ list_for_each_entry(p, &n->full, slab_list)
43414428 p->slab_cache = s;
43424429 #endif
43434430 }
4344
- slab_init_memcg_params(s);
43454431 list_add(&s->list, &slab_caches);
4346
- memcg_link_cache(s);
43474432 return s;
43484433 }
43494434
....@@ -4351,12 +4436,6 @@
43514436 {
43524437 static __initdata struct kmem_cache boot_kmem_cache,
43534438 boot_kmem_cache_node;
4354
- int cpu;
4355
-
4356
- for_each_possible_cpu(cpu) {
4357
- raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
4358
- INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
4359
- }
43604439
43614440 if (debug_guardpage_minorder())
43624441 slub_max_order = 0;
....@@ -4390,7 +4469,7 @@
43904469 cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
43914470 slub_cpu_dead);
43924471
4393
- pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%d\n",
4472
+ pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
43944473 cache_line_size(),
43954474 slub_min_order, slub_max_order, slub_min_objects,
43964475 nr_cpu_ids, nr_node_ids);
....@@ -4404,7 +4483,7 @@
44044483 __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
44054484 slab_flags_t flags, void (*ctor)(void *))
44064485 {
4407
- struct kmem_cache *s, *c;
4486
+ struct kmem_cache *s;
44084487
44094488 s = find_mergeable(size, align, flags, name, ctor);
44104489 if (s) {
....@@ -4416,11 +4495,6 @@
44164495 */
44174496 s->object_size = max(s->object_size, size);
44184497 s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
4419
-
4420
- for_each_memcg_cache(c, s) {
4421
- c->object_size = s->object_size;
4422
- c->inuse = max(c->inuse, ALIGN(size, sizeof(void *)));
4423
- }
44244498
44254499 if (sysfs_slab_alias(s, name)) {
44264500 s->refcount--;
....@@ -4443,12 +4517,16 @@
44434517 if (slab_state <= UP)
44444518 return 0;
44454519
4446
- memcg_propagate_slab_attrs(s);
44474520 err = sysfs_slab_add(s);
4448
- if (err)
4521
+ if (err) {
44494522 __kmem_cache_release(s);
4523
+ return err;
4524
+ }
44504525
4451
- return err;
4526
+ if (s->flags & SLAB_STORE_USER)
4527
+ debugfs_slab_add(s);
4528
+
4529
+ return 0;
44524530 }
44534531
44544532 void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
....@@ -4464,7 +4542,7 @@
44644542 if (unlikely(ZERO_OR_NULL_PTR(s)))
44654543 return s;
44664544
4467
- ret = slab_alloc(s, gfpflags, caller);
4545
+ ret = slab_alloc(s, gfpflags, caller, size);
44684546
44694547 /* Honor the call site pointer we received. */
44704548 trace_kmalloc(caller, ret, size, s->size, gfpflags);
....@@ -4495,7 +4573,7 @@
44954573 if (unlikely(ZERO_OR_NULL_PTR(s)))
44964574 return s;
44974575
4498
- ret = slab_alloc_node(s, gfpflags, node, caller);
4576
+ ret = slab_alloc_node(s, gfpflags, node, caller, size);
44994577
45004578 /* Honor the call site pointer we received. */
45014579 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
....@@ -4518,52 +4596,42 @@
45184596 #endif
45194597
45204598 #ifdef CONFIG_SLUB_DEBUG
4521
-static int validate_slab(struct kmem_cache *s, struct page *page,
4522
- unsigned long *map)
4599
+static void validate_slab(struct kmem_cache *s, struct page *page)
45234600 {
45244601 void *p;
45254602 void *addr = page_address(page);
4603
+ unsigned long *map;
45264604
4527
- if (!check_slab(s, page) ||
4528
- !on_freelist(s, page, NULL))
4529
- return 0;
4605
+ slab_lock(page);
4606
+
4607
+ if (!check_slab(s, page) || !on_freelist(s, page, NULL))
4608
+ goto unlock;
45304609
45314610 /* Now we know that a valid freelist exists */
4532
- bitmap_zero(map, page->objects);
4533
-
4534
- get_map(s, page, map);
4611
+ map = get_map(s, page);
45354612 for_each_object(p, s, addr, page->objects) {
4536
- if (test_bit(slab_index(p, s, addr), map))
4537
- if (!check_object(s, page, p, SLUB_RED_INACTIVE))
4538
- return 0;
4613
+ u8 val = test_bit(__obj_to_index(s, addr, p), map) ?
4614
+ SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
4615
+
4616
+ if (!check_object(s, page, p, val))
4617
+ break;
45394618 }
4540
-
4541
- for_each_object(p, s, addr, page->objects)
4542
- if (!test_bit(slab_index(p, s, addr), map))
4543
- if (!check_object(s, page, p, SLUB_RED_ACTIVE))
4544
- return 0;
4545
- return 1;
4546
-}
4547
-
4548
-static void validate_slab_slab(struct kmem_cache *s, struct page *page,
4549
- unsigned long *map)
4550
-{
4551
- slab_lock(page);
4552
- validate_slab(s, page, map);
4619
+ put_map(map);
4620
+unlock:
45534621 slab_unlock(page);
45544622 }
45554623
45564624 static int validate_slab_node(struct kmem_cache *s,
4557
- struct kmem_cache_node *n, unsigned long *map)
4625
+ struct kmem_cache_node *n)
45584626 {
45594627 unsigned long count = 0;
45604628 struct page *page;
45614629 unsigned long flags;
45624630
4563
- raw_spin_lock_irqsave(&n->list_lock, flags);
4631
+ spin_lock_irqsave(&n->list_lock, flags);
45644632
4565
- list_for_each_entry(page, &n->partial, lru) {
4566
- validate_slab_slab(s, page, map);
4633
+ list_for_each_entry(page, &n->partial, slab_list) {
4634
+ validate_slab(s, page);
45674635 count++;
45684636 }
45694637 if (count != n->nr_partial)
....@@ -4573,8 +4641,8 @@
45734641 if (!(s->flags & SLAB_STORE_USER))
45744642 goto out;
45754643
4576
- list_for_each_entry(page, &n->full, lru) {
4577
- validate_slab_slab(s, page, map);
4644
+ list_for_each_entry(page, &n->full, slab_list) {
4645
+ validate_slab(s, page);
45784646 count++;
45794647 }
45804648 if (count != atomic_long_read(&n->nr_slabs))
....@@ -4582,7 +4650,7 @@
45824650 s->name, count, atomic_long_read(&n->nr_slabs));
45834651
45844652 out:
4585
- raw_spin_unlock_irqrestore(&n->list_lock, flags);
4653
+ spin_unlock_irqrestore(&n->list_lock, flags);
45864654 return count;
45874655 }
45884656
....@@ -4590,20 +4658,16 @@
45904658 {
45914659 int node;
45924660 unsigned long count = 0;
4593
- unsigned long *map = kmalloc_array(BITS_TO_LONGS(oo_objects(s->max)),
4594
- sizeof(unsigned long),
4595
- GFP_KERNEL);
45964661 struct kmem_cache_node *n;
4597
-
4598
- if (!map)
4599
- return -ENOMEM;
46004662
46014663 flush_all(s);
46024664 for_each_kmem_cache_node(s, node, n)
4603
- count += validate_slab_node(s, n, map);
4604
- kfree(map);
4665
+ count += validate_slab_node(s, n);
4666
+
46054667 return count;
46064668 }
4669
+
4670
+#ifdef CONFIG_DEBUG_FS
46074671 /*
46084672 * Generate lists of code addresses where slabcache objects are allocated
46094673 * and freed.
....@@ -4625,7 +4689,10 @@
46254689 unsigned long max;
46264690 unsigned long count;
46274691 struct location *loc;
4692
+ loff_t idx;
46284693 };
4694
+
4695
+static struct dentry *slab_debugfs_root;
46294696
46304697 static void free_loc_track(struct loc_track *t)
46314698 {
....@@ -4638,9 +4705,6 @@
46384705 {
46394706 struct location *l;
46404707 int order;
4641
-
4642
- if (IS_ENABLED(CONFIG_PREEMPT_RT) && flags == GFP_ATOMIC)
4643
- return 0;
46444708
46454709 order = get_order(sizeof(struct location) * max);
46464710
....@@ -4735,105 +4799,19 @@
47354799
47364800 static void process_slab(struct loc_track *t, struct kmem_cache *s,
47374801 struct page *page, enum track_item alloc,
4738
- unsigned long *map)
4802
+ unsigned long *obj_map)
47394803 {
47404804 void *addr = page_address(page);
47414805 void *p;
47424806
4743
- bitmap_zero(map, page->objects);
4744
- get_map(s, page, map);
4807
+ __fill_map(obj_map, s, page);
47454808
47464809 for_each_object(p, s, addr, page->objects)
4747
- if (!test_bit(slab_index(p, s, addr), map))
4810
+ if (!test_bit(__obj_to_index(s, addr, p), obj_map))
47484811 add_location(t, s, get_track(s, p, alloc));
47494812 }
4750
-
4751
-static int list_locations(struct kmem_cache *s, char *buf,
4752
- enum track_item alloc)
4753
-{
4754
- int len = 0;
4755
- unsigned long i;
4756
- struct loc_track t = { 0, 0, NULL };
4757
- int node;
4758
- unsigned long *map = kmalloc_array(BITS_TO_LONGS(oo_objects(s->max)),
4759
- sizeof(unsigned long),
4760
- GFP_KERNEL);
4761
- struct kmem_cache_node *n;
4762
-
4763
- if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4764
- GFP_KERNEL)) {
4765
- kfree(map);
4766
- return sprintf(buf, "Out of memory\n");
4767
- }
4768
- /* Push back cpu slabs */
4769
- flush_all(s);
4770
-
4771
- for_each_kmem_cache_node(s, node, n) {
4772
- unsigned long flags;
4773
- struct page *page;
4774
-
4775
- if (!atomic_long_read(&n->nr_slabs))
4776
- continue;
4777
-
4778
- raw_spin_lock_irqsave(&n->list_lock, flags);
4779
- list_for_each_entry(page, &n->partial, lru)
4780
- process_slab(&t, s, page, alloc, map);
4781
- list_for_each_entry(page, &n->full, lru)
4782
- process_slab(&t, s, page, alloc, map);
4783
- raw_spin_unlock_irqrestore(&n->list_lock, flags);
4784
- }
4785
-
4786
- for (i = 0; i < t.count; i++) {
4787
- struct location *l = &t.loc[i];
4788
-
4789
- if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4790
- break;
4791
- len += sprintf(buf + len, "%7ld ", l->count);
4792
-
4793
- if (l->addr)
4794
- len += sprintf(buf + len, "%pS", (void *)l->addr);
4795
- else
4796
- len += sprintf(buf + len, "<not-available>");
4797
-
4798
- if (l->sum_time != l->min_time) {
4799
- len += sprintf(buf + len, " age=%ld/%ld/%ld",
4800
- l->min_time,
4801
- (long)div_u64(l->sum_time, l->count),
4802
- l->max_time);
4803
- } else
4804
- len += sprintf(buf + len, " age=%ld",
4805
- l->min_time);
4806
-
4807
- if (l->min_pid != l->max_pid)
4808
- len += sprintf(buf + len, " pid=%ld-%ld",
4809
- l->min_pid, l->max_pid);
4810
- else
4811
- len += sprintf(buf + len, " pid=%ld",
4812
- l->min_pid);
4813
-
4814
- if (num_online_cpus() > 1 &&
4815
- !cpumask_empty(to_cpumask(l->cpus)) &&
4816
- len < PAGE_SIZE - 60)
4817
- len += scnprintf(buf + len, PAGE_SIZE - len - 50,
4818
- " cpus=%*pbl",
4819
- cpumask_pr_args(to_cpumask(l->cpus)));
4820
-
4821
- if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4822
- len < PAGE_SIZE - 60)
4823
- len += scnprintf(buf + len, PAGE_SIZE - len - 50,
4824
- " nodes=%*pbl",
4825
- nodemask_pr_args(&l->nodes));
4826
-
4827
- len += sprintf(buf + len, "\n");
4828
- }
4829
-
4830
- free_loc_track(&t);
4831
- kfree(map);
4832
- if (!t.count)
4833
- len += sprintf(buf, "No data\n");
4834
- return len;
4835
-}
4836
-#endif
4813
+#endif /* CONFIG_DEBUG_FS */
4814
+#endif /* CONFIG_SLUB_DEBUG */
48374815
48384816 #ifdef SLUB_RESILIENCY_TEST
48394817 static void __init resiliency_test(void)
....@@ -4893,7 +4871,7 @@
48934871 #ifdef CONFIG_SLUB_SYSFS
48944872 static void resiliency_test(void) {};
48954873 #endif
4896
-#endif
4874
+#endif /* SLUB_RESILIENCY_TEST */
48974875
48984876 #ifdef CONFIG_SLUB_SYSFS
48994877 enum slab_stat_type {
....@@ -5032,20 +5010,6 @@
50325010 return x + sprintf(buf + x, "\n");
50335011 }
50345012
5035
-#ifdef CONFIG_SLUB_DEBUG
5036
-static int any_slab_objects(struct kmem_cache *s)
5037
-{
5038
- int node;
5039
- struct kmem_cache_node *n;
5040
-
5041
- for_each_kmem_cache_node(s, node, n)
5042
- if (atomic_long_read(&n->total_objects))
5043
- return 1;
5044
-
5045
- return 0;
5046
-}
5047
-#endif
5048
-
50495013 #define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
50505014 #define to_slab(n) container_of(n, struct kmem_cache, kobj)
50515015
....@@ -5087,28 +5051,11 @@
50875051 }
50885052 SLAB_ATTR_RO(objs_per_slab);
50895053
5090
-static ssize_t order_store(struct kmem_cache *s,
5091
- const char *buf, size_t length)
5092
-{
5093
- unsigned int order;
5094
- int err;
5095
-
5096
- err = kstrtouint(buf, 10, &order);
5097
- if (err)
5098
- return err;
5099
-
5100
- if (order > slub_max_order || order < slub_min_order)
5101
- return -EINVAL;
5102
-
5103
- calculate_sizes(s, order);
5104
- return length;
5105
-}
5106
-
51075054 static ssize_t order_show(struct kmem_cache *s, char *buf)
51085055 {
51095056 return sprintf(buf, "%u\n", oo_order(s->oo));
51105057 }
5111
-SLAB_ATTR(order);
5058
+SLAB_ATTR_RO(order);
51125059
51135060 static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
51145061 {
....@@ -5230,16 +5177,7 @@
52305177 {
52315178 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
52325179 }
5233
-
5234
-static ssize_t reclaim_account_store(struct kmem_cache *s,
5235
- const char *buf, size_t length)
5236
-{
5237
- s->flags &= ~SLAB_RECLAIM_ACCOUNT;
5238
- if (buf[0] == '1')
5239
- s->flags |= SLAB_RECLAIM_ACCOUNT;
5240
- return length;
5241
-}
5242
-SLAB_ATTR(reclaim_account);
5180
+SLAB_ATTR_RO(reclaim_account);
52435181
52445182 static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
52455183 {
....@@ -5284,104 +5222,34 @@
52845222 {
52855223 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
52865224 }
5287
-
5288
-static ssize_t sanity_checks_store(struct kmem_cache *s,
5289
- const char *buf, size_t length)
5290
-{
5291
- s->flags &= ~SLAB_CONSISTENCY_CHECKS;
5292
- if (buf[0] == '1') {
5293
- s->flags &= ~__CMPXCHG_DOUBLE;
5294
- s->flags |= SLAB_CONSISTENCY_CHECKS;
5295
- }
5296
- return length;
5297
-}
5298
-SLAB_ATTR(sanity_checks);
5225
+SLAB_ATTR_RO(sanity_checks);
52995226
53005227 static ssize_t trace_show(struct kmem_cache *s, char *buf)
53015228 {
53025229 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
53035230 }
5304
-
5305
-static ssize_t trace_store(struct kmem_cache *s, const char *buf,
5306
- size_t length)
5307
-{
5308
- /*
5309
- * Tracing a merged cache is going to give confusing results
5310
- * as well as cause other issues like converting a mergeable
5311
- * cache into an umergeable one.
5312
- */
5313
- if (s->refcount > 1)
5314
- return -EINVAL;
5315
-
5316
- s->flags &= ~SLAB_TRACE;
5317
- if (buf[0] == '1') {
5318
- s->flags &= ~__CMPXCHG_DOUBLE;
5319
- s->flags |= SLAB_TRACE;
5320
- }
5321
- return length;
5322
-}
5323
-SLAB_ATTR(trace);
5231
+SLAB_ATTR_RO(trace);
53245232
53255233 static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
53265234 {
53275235 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
53285236 }
53295237
5330
-static ssize_t red_zone_store(struct kmem_cache *s,
5331
- const char *buf, size_t length)
5332
-{
5333
- if (any_slab_objects(s))
5334
- return -EBUSY;
5335
-
5336
- s->flags &= ~SLAB_RED_ZONE;
5337
- if (buf[0] == '1') {
5338
- s->flags |= SLAB_RED_ZONE;
5339
- }
5340
- calculate_sizes(s, -1);
5341
- return length;
5342
-}
5343
-SLAB_ATTR(red_zone);
5238
+SLAB_ATTR_RO(red_zone);
53445239
53455240 static ssize_t poison_show(struct kmem_cache *s, char *buf)
53465241 {
53475242 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
53485243 }
53495244
5350
-static ssize_t poison_store(struct kmem_cache *s,
5351
- const char *buf, size_t length)
5352
-{
5353
- if (any_slab_objects(s))
5354
- return -EBUSY;
5355
-
5356
- s->flags &= ~SLAB_POISON;
5357
- if (buf[0] == '1') {
5358
- s->flags |= SLAB_POISON;
5359
- }
5360
- calculate_sizes(s, -1);
5361
- return length;
5362
-}
5363
-SLAB_ATTR(poison);
5245
+SLAB_ATTR_RO(poison);
53645246
53655247 static ssize_t store_user_show(struct kmem_cache *s, char *buf)
53665248 {
53675249 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
53685250 }
53695251
5370
-static ssize_t store_user_store(struct kmem_cache *s,
5371
- const char *buf, size_t length)
5372
-{
5373
- if (any_slab_objects(s))
5374
- return -EBUSY;
5375
-
5376
- s->flags &= ~SLAB_STORE_USER;
5377
- if (buf[0] == '1') {
5378
- s->flags &= ~__CMPXCHG_DOUBLE;
5379
- s->flags |= SLAB_STORE_USER;
5380
- }
5381
- calculate_sizes(s, -1);
5382
- return length;
5383
-}
5384
-SLAB_ATTR(store_user);
5252
+SLAB_ATTR_RO(store_user);
53855253
53865254 static ssize_t validate_show(struct kmem_cache *s, char *buf)
53875255 {
....@@ -5402,21 +5270,6 @@
54025270 }
54035271 SLAB_ATTR(validate);
54045272
5405
-static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
5406
-{
5407
- if (!(s->flags & SLAB_STORE_USER))
5408
- return -ENOSYS;
5409
- return list_locations(s, buf, TRACK_ALLOC);
5410
-}
5411
-SLAB_ATTR_RO(alloc_calls);
5412
-
5413
-static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
5414
-{
5415
- if (!(s->flags & SLAB_STORE_USER))
5416
- return -ENOSYS;
5417
- return list_locations(s, buf, TRACK_FREE);
5418
-}
5419
-SLAB_ATTR_RO(free_calls);
54205273 #endif /* CONFIG_SLUB_DEBUG */
54215274
54225275 #ifdef CONFIG_FAILSLAB
....@@ -5424,19 +5277,7 @@
54245277 {
54255278 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
54265279 }
5427
-
5428
-static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
5429
- size_t length)
5430
-{
5431
- if (s->refcount > 1)
5432
- return -EINVAL;
5433
-
5434
- s->flags &= ~SLAB_FAILSLAB;
5435
- if (buf[0] == '1')
5436
- s->flags |= SLAB_FAILSLAB;
5437
- return length;
5438
-}
5439
-SLAB_ATTR(failslab);
5280
+SLAB_ATTR_RO(failslab);
54405281 #endif
54415282
54425283 static ssize_t shrink_show(struct kmem_cache *s, char *buf)
....@@ -5559,7 +5400,7 @@
55595400 STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
55605401 STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
55615402 STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5562
-#endif
5403
+#endif /* CONFIG_SLUB_STATS */
55635404
55645405 static struct attribute *slab_attrs[] = {
55655406 &slab_size_attr.attr,
....@@ -5589,8 +5430,6 @@
55895430 &poison_attr.attr,
55905431 &store_user_attr.attr,
55915432 &validate_attr.attr,
5592
- &alloc_calls_attr.attr,
5593
- &free_calls_attr.attr,
55945433 #endif
55955434 #ifdef CONFIG_ZONE_DMA
55965435 &cache_dma_attr.attr,
....@@ -5672,96 +5511,7 @@
56725511 return -EIO;
56735512
56745513 err = attribute->store(s, buf, len);
5675
-#ifdef CONFIG_MEMCG
5676
- if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
5677
- struct kmem_cache *c;
5678
-
5679
- mutex_lock(&slab_mutex);
5680
- if (s->max_attr_size < len)
5681
- s->max_attr_size = len;
5682
-
5683
- /*
5684
- * This is a best effort propagation, so this function's return
5685
- * value will be determined by the parent cache only. This is
5686
- * basically because not all attributes will have a well
5687
- * defined semantics for rollbacks - most of the actions will
5688
- * have permanent effects.
5689
- *
5690
- * Returning the error value of any of the children that fail
5691
- * is not 100 % defined, in the sense that users seeing the
5692
- * error code won't be able to know anything about the state of
5693
- * the cache.
5694
- *
5695
- * Only returning the error code for the parent cache at least
5696
- * has well defined semantics. The cache being written to
5697
- * directly either failed or succeeded, in which case we loop
5698
- * through the descendants with best-effort propagation.
5699
- */
5700
- for_each_memcg_cache(c, s)
5701
- attribute->store(c, buf, len);
5702
- mutex_unlock(&slab_mutex);
5703
- }
5704
-#endif
57055514 return err;
5706
-}
5707
-
5708
-static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5709
-{
5710
-#ifdef CONFIG_MEMCG
5711
- int i;
5712
- char *buffer = NULL;
5713
- struct kmem_cache *root_cache;
5714
-
5715
- if (is_root_cache(s))
5716
- return;
5717
-
5718
- root_cache = s->memcg_params.root_cache;
5719
-
5720
- /*
5721
- * This mean this cache had no attribute written. Therefore, no point
5722
- * in copying default values around
5723
- */
5724
- if (!root_cache->max_attr_size)
5725
- return;
5726
-
5727
- for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
5728
- char mbuf[64];
5729
- char *buf;
5730
- struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
5731
- ssize_t len;
5732
-
5733
- if (!attr || !attr->store || !attr->show)
5734
- continue;
5735
-
5736
- /*
5737
- * It is really bad that we have to allocate here, so we will
5738
- * do it only as a fallback. If we actually allocate, though,
5739
- * we can just use the allocated buffer until the end.
5740
- *
5741
- * Most of the slub attributes will tend to be very small in
5742
- * size, but sysfs allows buffers up to a page, so they can
5743
- * theoretically happen.
5744
- */
5745
- if (buffer)
5746
- buf = buffer;
5747
- else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf) &&
5748
- !IS_ENABLED(CONFIG_SLUB_STATS))
5749
- buf = mbuf;
5750
- else {
5751
- buffer = (char *) get_zeroed_page(GFP_KERNEL);
5752
- if (WARN_ON(!buffer))
5753
- continue;
5754
- buf = buffer;
5755
- }
5756
-
5757
- len = attr->show(root_cache, buf);
5758
- if (len > 0)
5759
- attr->store(s, buf, len);
5760
- }
5761
-
5762
- if (buffer)
5763
- free_page((unsigned long)buffer);
5764
-#endif
57655515 }
57665516
57675517 static void kmem_cache_release(struct kobject *k)
....@@ -5779,27 +5529,10 @@
57795529 .release = kmem_cache_release,
57805530 };
57815531
5782
-static int uevent_filter(struct kset *kset, struct kobject *kobj)
5783
-{
5784
- struct kobj_type *ktype = get_ktype(kobj);
5785
-
5786
- if (ktype == &slab_ktype)
5787
- return 1;
5788
- return 0;
5789
-}
5790
-
5791
-static const struct kset_uevent_ops slab_uevent_ops = {
5792
- .filter = uevent_filter,
5793
-};
5794
-
57955532 static struct kset *slab_kset;
57965533
57975534 static inline struct kset *cache_kset(struct kmem_cache *s)
57985535 {
5799
-#ifdef CONFIG_MEMCG
5800
- if (!is_root_cache(s))
5801
- return s->memcg_params.root_cache->memcg_kset;
5802
-#endif
58035536 return slab_kset;
58045537 }
58055538
....@@ -5814,7 +5547,8 @@
58145547 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
58155548 char *p = name;
58165549
5817
- BUG_ON(!name);
5550
+ if (!name)
5551
+ return ERR_PTR(-ENOMEM);
58185552
58195553 *p++ = ':';
58205554 /*
....@@ -5842,36 +5576,12 @@
58425576 return name;
58435577 }
58445578
5845
-static void sysfs_slab_remove_workfn(struct work_struct *work)
5846
-{
5847
- struct kmem_cache *s =
5848
- container_of(work, struct kmem_cache, kobj_remove_work);
5849
-
5850
- if (!s->kobj.state_in_sysfs)
5851
- /*
5852
- * For a memcg cache, this may be called during
5853
- * deactivation and again on shutdown. Remove only once.
5854
- * A cache is never shut down before deactivation is
5855
- * complete, so no need to worry about synchronization.
5856
- */
5857
- goto out;
5858
-
5859
-#ifdef CONFIG_MEMCG
5860
- kset_unregister(s->memcg_kset);
5861
-#endif
5862
- kobject_uevent(&s->kobj, KOBJ_REMOVE);
5863
-out:
5864
- kobject_put(&s->kobj);
5865
-}
5866
-
58675579 static int sysfs_slab_add(struct kmem_cache *s)
58685580 {
58695581 int err;
58705582 const char *name;
58715583 struct kset *kset = cache_kset(s);
58725584 int unmergeable = slab_unmergeable(s);
5873
-
5874
- INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
58755585
58765586 if (!kset) {
58775587 kobject_init(&s->kobj, &slab_ktype);
....@@ -5896,6 +5606,8 @@
58965606 * for the symlinks.
58975607 */
58985608 name = create_unique_id(s);
5609
+ if (IS_ERR(name))
5610
+ return PTR_ERR(name);
58995611 }
59005612
59015613 s->kobj.kset = kset;
....@@ -5907,17 +5619,6 @@
59075619 if (err)
59085620 goto out_del_kobj;
59095621
5910
-#ifdef CONFIG_MEMCG
5911
- if (is_root_cache(s) && memcg_sysfs_enabled) {
5912
- s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
5913
- if (!s->memcg_kset) {
5914
- err = -ENOMEM;
5915
- goto out_del_kobj;
5916
- }
5917
- }
5918
-#endif
5919
-
5920
- kobject_uevent(&s->kobj, KOBJ_ADD);
59215622 if (!unmergeable) {
59225623 /* Setup first alias */
59235624 sysfs_slab_alias(s, s->name);
....@@ -5929,19 +5630,6 @@
59295630 out_del_kobj:
59305631 kobject_del(&s->kobj);
59315632 goto out;
5932
-}
5933
-
5934
-static void sysfs_slab_remove(struct kmem_cache *s)
5935
-{
5936
- if (slab_state < FULL)
5937
- /*
5938
- * Sysfs has not been setup yet so no need to remove the
5939
- * cache from sysfs.
5940
- */
5941
- return;
5942
-
5943
- kobject_get(&s->kobj);
5944
- schedule_work(&s->kobj_remove_work);
59455633 }
59465634
59475635 void sysfs_slab_unlink(struct kmem_cache *s)
....@@ -5998,7 +5686,7 @@
59985686
59995687 mutex_lock(&slab_mutex);
60005688
6001
- slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5689
+ slab_kset = kset_create_and_add("slab", NULL, kernel_kobj);
60025690 if (!slab_kset) {
60035691 mutex_unlock(&slab_mutex);
60045692 pr_err("Cannot register slab subsystem.\n");
....@@ -6033,6 +5721,189 @@
60335721 __initcall(slab_sysfs_init);
60345722 #endif /* CONFIG_SLUB_SYSFS */
60355723
5724
+#if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS)
5725
+static int slab_debugfs_show(struct seq_file *seq, void *v)
5726
+{
5727
+ struct loc_track *t = seq->private;
5728
+ struct location *l;
5729
+ unsigned long idx;
5730
+
5731
+ idx = (unsigned long) t->idx;
5732
+ if (idx < t->count) {
5733
+ l = &t->loc[idx];
5734
+
5735
+ seq_printf(seq, "%7ld ", l->count);
5736
+
5737
+ if (l->addr)
5738
+ seq_printf(seq, "%pS", (void *)l->addr);
5739
+ else
5740
+ seq_puts(seq, "<not-available>");
5741
+
5742
+ if (l->sum_time != l->min_time) {
5743
+ seq_printf(seq, " age=%ld/%llu/%ld",
5744
+ l->min_time, div_u64(l->sum_time, l->count),
5745
+ l->max_time);
5746
+ } else
5747
+ seq_printf(seq, " age=%ld", l->min_time);
5748
+
5749
+ if (l->min_pid != l->max_pid)
5750
+ seq_printf(seq, " pid=%ld-%ld", l->min_pid, l->max_pid);
5751
+ else
5752
+ seq_printf(seq, " pid=%ld",
5753
+ l->min_pid);
5754
+
5755
+ if (num_online_cpus() > 1 && !cpumask_empty(to_cpumask(l->cpus)))
5756
+ seq_printf(seq, " cpus=%*pbl",
5757
+ cpumask_pr_args(to_cpumask(l->cpus)));
5758
+
5759
+ if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
5760
+ seq_printf(seq, " nodes=%*pbl",
5761
+ nodemask_pr_args(&l->nodes));
5762
+
5763
+ seq_puts(seq, "\n");
5764
+ }
5765
+
5766
+ if (!idx && !t->count)
5767
+ seq_puts(seq, "No data\n");
5768
+
5769
+ return 0;
5770
+}
5771
+
5772
+static void slab_debugfs_stop(struct seq_file *seq, void *v)
5773
+{
5774
+}
5775
+
5776
+static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos)
5777
+{
5778
+ struct loc_track *t = seq->private;
5779
+
5780
+ t->idx = ++(*ppos);
5781
+ if (*ppos <= t->count)
5782
+ return ppos;
5783
+
5784
+ return NULL;
5785
+}
5786
+
5787
+static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos)
5788
+{
5789
+ struct loc_track *t = seq->private;
5790
+
5791
+ t->idx = *ppos;
5792
+ return ppos;
5793
+}
5794
+
5795
+static const struct seq_operations slab_debugfs_sops = {
5796
+ .start = slab_debugfs_start,
5797
+ .next = slab_debugfs_next,
5798
+ .stop = slab_debugfs_stop,
5799
+ .show = slab_debugfs_show,
5800
+};
5801
+
5802
+static int slab_debug_trace_open(struct inode *inode, struct file *filep)
5803
+{
5804
+
5805
+ struct kmem_cache_node *n;
5806
+ enum track_item alloc;
5807
+ int node;
5808
+ struct loc_track *t = __seq_open_private(filep, &slab_debugfs_sops,
5809
+ sizeof(struct loc_track));
5810
+ struct kmem_cache *s = file_inode(filep)->i_private;
5811
+ unsigned long *obj_map;
5812
+
5813
+ if (!t)
5814
+ return -ENOMEM;
5815
+
5816
+ obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL);
5817
+ if (!obj_map) {
5818
+ seq_release_private(inode, filep);
5819
+ return -ENOMEM;
5820
+ }
5821
+
5822
+ if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0)
5823
+ alloc = TRACK_ALLOC;
5824
+ else
5825
+ alloc = TRACK_FREE;
5826
+
5827
+ if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) {
5828
+ bitmap_free(obj_map);
5829
+ seq_release_private(inode, filep);
5830
+ return -ENOMEM;
5831
+ }
5832
+
5833
+ /* Push back cpu slabs */
5834
+ flush_all(s);
5835
+
5836
+ for_each_kmem_cache_node(s, node, n) {
5837
+ unsigned long flags;
5838
+ struct page *page;
5839
+
5840
+ if (!atomic_long_read(&n->nr_slabs))
5841
+ continue;
5842
+
5843
+ spin_lock_irqsave(&n->list_lock, flags);
5844
+ list_for_each_entry(page, &n->partial, slab_list)
5845
+ process_slab(t, s, page, alloc, obj_map);
5846
+ list_for_each_entry(page, &n->full, slab_list)
5847
+ process_slab(t, s, page, alloc, obj_map);
5848
+ spin_unlock_irqrestore(&n->list_lock, flags);
5849
+ }
5850
+
5851
+ bitmap_free(obj_map);
5852
+ return 0;
5853
+}
5854
+
5855
+static int slab_debug_trace_release(struct inode *inode, struct file *file)
5856
+{
5857
+ struct seq_file *seq = file->private_data;
5858
+ struct loc_track *t = seq->private;
5859
+
5860
+ free_loc_track(t);
5861
+ return seq_release_private(inode, file);
5862
+}
5863
+
5864
+static const struct file_operations slab_debugfs_fops = {
5865
+ .open = slab_debug_trace_open,
5866
+ .read = seq_read,
5867
+ .llseek = seq_lseek,
5868
+ .release = slab_debug_trace_release,
5869
+};
5870
+
5871
+static void debugfs_slab_add(struct kmem_cache *s)
5872
+{
5873
+ struct dentry *slab_cache_dir;
5874
+
5875
+ if (unlikely(!slab_debugfs_root))
5876
+ return;
5877
+
5878
+ slab_cache_dir = debugfs_create_dir(s->name, slab_debugfs_root);
5879
+
5880
+ debugfs_create_file("alloc_traces", 0400,
5881
+ slab_cache_dir, s, &slab_debugfs_fops);
5882
+
5883
+ debugfs_create_file("free_traces", 0400,
5884
+ slab_cache_dir, s, &slab_debugfs_fops);
5885
+}
5886
+
5887
+void debugfs_slab_release(struct kmem_cache *s)
5888
+{
5889
+ debugfs_remove_recursive(debugfs_lookup(s->name, slab_debugfs_root));
5890
+}
5891
+
5892
+static int __init slab_debugfs_init(void)
5893
+{
5894
+ struct kmem_cache *s;
5895
+
5896
+ slab_debugfs_root = debugfs_create_dir("slab", NULL);
5897
+
5898
+ list_for_each_entry(s, &slab_caches, list)
5899
+ if (s->flags & SLAB_STORE_USER)
5900
+ debugfs_slab_add(s);
5901
+
5902
+ return 0;
5903
+
5904
+}
5905
+__initcall(slab_debugfs_init);
5906
+#endif
60365907 /*
60375908 * The /proc/slabinfo ABI
60385909 */
....@@ -6058,6 +5929,7 @@
60585929 sinfo->objects_per_slab = oo_objects(s->oo);
60595930 sinfo->cache_order = oo_order(s->oo);
60605931 }
5932
+EXPORT_SYMBOL_GPL(get_slabinfo);
60615933
60625934 void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
60635935 {