~hc/RK356X_SDK_RELEASE.git

..	..	@@ -28,6 +28,7 @@
28	28	#include <linux/ctype.h>
29	29	#include <linux/debugobjects.h>
30	30	#include <linux/kallsyms.h>
	31	+#include <linux/kfence.h>
31	32	#include <linux/memory.h>
32	33	#include <linux/math64.h>
33	34	#include <linux/fault-inject.h>
..	..	@@ -36,7 +37,9 @@
36	37	#include <linux/memcontrol.h>
37	38	#include <linux/random.h>
38	39
	40	+#include <linux/debugfs.h>
39	41	#include <trace/events/kmem.h>
	42	+#include <trace/hooks/mm.h>
40	43
41	44	#include "internal.h"
42	45
..	..	@@ -59,10 +62,11 @@
59	62	* D. page->frozen -> frozen state
60	63	*
61	64	* If a slab is frozen then it is exempt from list management. It is not
62		- * on any list. The processor that froze the slab is the one who can
63		- * perform list operations on the page. Other processors may put objects
64		- * onto the freelist but the processor that froze the slab is the only
65		- * one that can retrieve the objects from the page's freelist.
	65	+ * on any list except per cpu partial list. The processor that froze the
	66	+ * slab is the one who can perform list operations on the page. Other
	67	+ * processors may put objects onto the freelist but the processor that
	68	+ * froze the slab is the only one that can retrieve the objects from the
	69	+ * page's freelist.
66	70	*
67	71	* The list_lock protects the partial and full list on each node and
68	72	* the partial slab counter. If taken then no new slabs may be added or
..	..	@@ -93,9 +97,7 @@
93	97	* minimal so we rely on the page allocators per cpu caches for
94	98	* fast frees and allocs.
95	99	*
96		- * Overloading of page flags that are otherwise used for LRU management.
97		- *
98		- * PageActive The slab is frozen and exempt from list processing.
	100	+ * page->frozen The slab is frozen and exempt from list processing.
99	101	* This means that the slab is dedicated to a purpose
100	102	* such as satisfying allocations for a specific
101	103	* processor. Objects may be freed in the slab while
..	..	@@ -111,23 +113,27 @@
111	113	* free objects in addition to the regular freelist
112	114	* that requires the slab lock.
113	115	*
114		- * PageError Slab requires special handling due to debug
	116	+ * SLAB_DEBUG_FLAGS Slab requires special handling due to debug
115	117	* options set. This moves slab handling out of
116	118	* the fast path and disables lockless freelists.
117	119	*/
118	120
119		-static inline int kmem_cache_debug(struct kmem_cache *s)
120		-{
121	121	#ifdef CONFIG_SLUB_DEBUG
122		- return unlikely(s->flags & SLAB_DEBUG_FLAGS);
	122	+#ifdef CONFIG_SLUB_DEBUG_ON
	123	+DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
123	124	#else
124		- return 0;
	125	+DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
125	126	#endif
	127	+#endif
	128	+
	129	+static inline bool kmem_cache_debug(struct kmem_cache *s)
	130	+{
	131	+ return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
126	132	}
127	133
128	134	void fixup_red_left(struct kmem_cache s, void *p)
129	135	{
130		- if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
	136	+ if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
131	137	p += s->red_left_pad;
132	138
133	139	return p;
..	..	@@ -197,33 +203,19 @@
197	203	/* Use cmpxchg_double */
198	204	#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
199	205
200		-/*
201		- * Tracking user of a slab.
202		- */
203		-#define TRACK_ADDRS_COUNT 16
204		-struct track {
205		- unsigned long addr; /* Called from address */
206		-#ifdef CONFIG_STACKTRACE
207		- unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */
208		-#endif
209		- int cpu; /* Was running on cpu */
210		- int pid; /* Pid context */
211		- unsigned long when; /* When did the operation occur */
212		-};
213		-
214		-enum track_item { TRACK_ALLOC, TRACK_FREE };
215		-
216	206	#ifdef CONFIG_SLUB_SYSFS
217	207	static int sysfs_slab_add(struct kmem_cache *);
218	208	static int sysfs_slab_alias(struct kmem_cache , const char );
219		-static void memcg_propagate_slab_attrs(struct kmem_cache *s);
220		-static void sysfs_slab_remove(struct kmem_cache *s);
221	209	#else
222	210	static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
223	211	static inline int sysfs_slab_alias(struct kmem_cache s, const char p)
224	212	{ return 0; }
225		-static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
226		-static inline void sysfs_slab_remove(struct kmem_cache *s) { }
	213	+#endif
	214	+
	215	+#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
	216	+static void debugfs_slab_add(struct kmem_cache *);
	217	+#else
	218	+static inline void debugfs_slab_add(struct kmem_cache *s) { }
227	219	#endif
228	220
229	221	static inline void stat(const struct kmem_cache *s, enum stat_item si)
..	..	@@ -251,7 +243,7 @@
251	243	{
252	244	#ifdef CONFIG_SLAB_FREELIST_HARDENED
253	245	/*
254		- * When CONFIG_KASAN_SW_TAGS is enabled, ptr_addr might be tagged.
	246	+ * When CONFIG_KASAN_SW/HW_TAGS is enabled, ptr_addr might be tagged.
255	247	* Normally, this doesn't cause any issues, as both set_freepointer()
256	248	* and get_freepointer() are called with a pointer with the same tag.
257	249	* However, there are some issues with CONFIG_SLUB_DEBUG code. For
..	..	@@ -277,6 +269,7 @@
277	269
278	270	static inline void get_freepointer(struct kmem_cache s, void *object)
279	271	{
	272	+ object = kasan_reset_tag(object);
280	273	return freelist_dereference(s, object + s->offset);
281	274	}
282	275
..	..	@@ -290,11 +283,12 @@
290	283	unsigned long freepointer_addr;
291	284	void *p;
292	285
293		- if (!debug_pagealloc_enabled())
	286	+ if (!debug_pagealloc_enabled_static())
294	287	return get_freepointer(s, object);
295	288
	289	+ object = kasan_reset_tag(object);
296	290	freepointer_addr = (unsigned long)object + s->offset;
297		- probe_kernel_read(&p, (void **)freepointer_addr, sizeof(p));
	291	+ copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p));
298	292	return freelist_ptr(s, p, freepointer_addr);
299	293	}
300	294
..	..	@@ -306,6 +300,7 @@
306	300	BUG_ON(object == fp); /* naive detection of double free or corruption */
307	301	#endif
308	302
	303	+ freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr);
309	304	(void *)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
310	305	}
311	306
..	..	@@ -314,12 +309,6 @@
314	309	for (__p = fixup_red_left(__s, __addr); \
315	310	__p < (__addr) + (__objects) * (__s)->size; \
316	311	__p += (__s)->size)
317		-
318		-/* Determine object index from a given position */
319		-static inline unsigned int slab_index(void p, struct kmem_cache s, void *addr)
320		-{
321		- return (kasan_reset_tag(p) - addr) / s->size;
322		-}
323	312
324	313	static inline unsigned int order_objects(unsigned int order, unsigned int size)
325	314	{
..	..	@@ -441,19 +430,43 @@
441	430	}
442	431
443	432	#ifdef CONFIG_SLUB_DEBUG
	433	+static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
	434	+static DEFINE_SPINLOCK(object_map_lock);
	435	+
	436	+static void __fill_map(unsigned long obj_map, struct kmem_cache s,
	437	+ struct page *page)
	438	+{
	439	+ void *addr = page_address(page);
	440	+ void *p;
	441	+
	442	+ bitmap_zero(obj_map, page->objects);
	443	+
	444	+ for (p = page->freelist; p; p = get_freepointer(s, p))
	445	+ set_bit(__obj_to_index(s, addr, p), obj_map);
	446	+}
	447	+
444	448	/*
445	449	* Determine a map of object in use on a page.
446	450	*
447	451	* Node listlock must be held to guarantee that the page does
448	452	* not vanish from under us.
449	453	*/
450		-static void get_map(struct kmem_cache s, struct page page, unsigned long *map)
	454	+static unsigned long get_map(struct kmem_cache s, struct page *page)
	455	+ __acquires(&object_map_lock)
451	456	{
452		- void *p;
453		- void *addr = page_address(page);
	457	+ VM_BUG_ON(!irqs_disabled());
454	458
455		- for (p = page->freelist; p; p = get_freepointer(s, p))
456		- set_bit(slab_index(p, s, addr), map);
	459	+ spin_lock(&object_map_lock);
	460	+
	461	+ __fill_map(object_map, s, page);
	462	+
	463	+ return object_map;
	464	+}
	465	+
	466	+static void put_map(unsigned long *map) __releases(&object_map_lock)
	467	+{
	468	+ VM_BUG_ON(map != object_map);
	469	+ spin_unlock(&object_map_lock);
457	470	}
458	471
459	472	static inline unsigned int size_from_object(struct kmem_cache *s)
..	..	@@ -476,12 +489,12 @@
476	489	* Debug settings:
477	490	*/
478	491	#if defined(CONFIG_SLUB_DEBUG_ON)
479		-static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
	492	+slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
480	493	#else
481		-static slab_flags_t slub_debug;
	494	+slab_flags_t slub_debug;
482	495	#endif
483	496
484		-static char *slub_debug_slabs;
	497	+static char *slub_debug_string;
485	498	static int disable_higher_order_debug;
486	499
487	500	/*
..	..	@@ -528,9 +541,29 @@
528	541	unsigned int length)
529	542	{
530	543	metadata_access_enable();
531		- print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
532		- length, 1);
	544	+ print_hex_dump(level, text, DUMP_PREFIX_ADDRESS,
	545	+ 16, 1, kasan_reset_tag((void *)addr), length, 1);
533	546	metadata_access_disable();
	547	+}
	548	+
	549	+/*
	550	+ * See comment in calculate_sizes().
	551	+ */
	552	+static inline bool freeptr_outside_object(struct kmem_cache *s)
	553	+{
	554	+ return s->offset >= s->inuse;
	555	+}
	556	+
	557	+/*
	558	+ * Return offset of the end of info block which is inuse + free pointer if
	559	+ * not overlapping with object.
	560	+ */
	561	+static inline unsigned int get_info_end(struct kmem_cache *s)
	562	+{
	563	+ if (freeptr_outside_object(s))
	564	+ return s->inuse + sizeof(void *);
	565	+ else
	566	+ return s->inuse;
534	567	}
535	568
536	569	static struct track get_track(struct kmem_cache s, void *object,
..	..	@@ -538,13 +571,45 @@
538	571	{
539	572	struct track *p;
540	573
541		- if (s->offset)
542		- p = object + s->offset + sizeof(void *);
543		- else
544		- p = object + s->inuse;
	574	+ p = object + get_info_end(s);
545	575
546		- return p + alloc;
	576	+ return kasan_reset_tag(p + alloc);
547	577	}
	578	+
	579	+/*
	580	+ * This function will be used to loop through all the slab objects in
	581	+ * a page to give track structure for each object, the function fn will
	582	+ * be using this track structure and extract required info into its private
	583	+ * data, the return value will be the number of track structures that are
	584	+ * processed.
	585	+ */
	586	+unsigned long get_each_object_track(struct kmem_cache *s,
	587	+ struct page *page, enum track_item alloc,
	588	+ int (fn)(const struct kmem_cache , const void *,
	589	+ const struct track , void ), void *private)
	590	+{
	591	+ void *p;
	592	+ struct track *t;
	593	+ int ret;
	594	+ unsigned long num_track = 0;
	595	+
	596	+ if (!slub_debug \|\| !(s->flags & SLAB_STORE_USER))
	597	+ return 0;
	598	+
	599	+ slab_lock(page);
	600	+ for_each_object(p, s, page_address(page), page->objects) {
	601	+ t = get_track(s, p, alloc);
	602	+ metadata_access_enable();
	603	+ ret = fn(s, p, t, private);
	604	+ metadata_access_disable();
	605	+ if (ret < 0)
	606	+ break;
	607	+ num_track += 1;
	608	+ }
	609	+ slab_unlock(page);
	610	+ return num_track;
	611	+}
	612	+EXPORT_SYMBOL_GPL(get_each_object_track);
548	613
549	614	static void set_track(struct kmem_cache s, void object,
550	615	enum track_item alloc, unsigned long addr)
..	..	@@ -553,31 +618,25 @@
553	618
554	619	if (addr) {
555	620	#ifdef CONFIG_STACKTRACE
556		- struct stack_trace trace;
557		- int i;
	621	+ unsigned int nr_entries;
558	622
559		- trace.nr_entries = 0;
560		- trace.max_entries = TRACK_ADDRS_COUNT;
561		- trace.entries = p->addrs;
562		- trace.skip = 3;
563	623	metadata_access_enable();
564		- save_stack_trace(&trace);
	624	+ nr_entries = stack_trace_save(kasan_reset_tag(p->addrs),
	625	+ TRACK_ADDRS_COUNT, 3);
565	626	metadata_access_disable();
566	627
567		- /* See rant in lockdep.c */
568		- if (trace.nr_entries != 0 &&
569		- trace.entries[trace.nr_entries - 1] == ULONG_MAX)
570		- trace.nr_entries--;
571		-
572		- for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
573		- p->addrs[i] = 0;
	628	+ if (nr_entries < TRACK_ADDRS_COUNT)
	629	+ p->addrs[nr_entries] = 0;
	630	+ trace_android_vh_save_track_hash(alloc == TRACK_ALLOC,
	631	+ (unsigned long)p);
574	632	#endif
575	633	p->addr = addr;
576	634	p->cpu = smp_processor_id();
577	635	p->pid = current->pid;
578	636	p->when = jiffies;
579		- } else
	637	+ } else {
580	638	memset(p, 0, sizeof(struct track));
	639	+ }
581	640	}
582	641
583	642	static void init_tracking(struct kmem_cache s, void object)
..	..	@@ -608,7 +667,7 @@
608	667	#endif
609	668	}
610	669
611		-static void print_tracking(struct kmem_cache s, void object)
	670	+void print_tracking(struct kmem_cache s, void object)
612	671	{
613	672	unsigned long pr_time = jiffies;
614	673	if (!(s->flags & SLAB_STORE_USER))
..	..	@@ -636,8 +695,6 @@
636	695	pr_err("=============================================================================\n");
637	696	pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
638	697	pr_err("-----------------------------------------------------------------------------\n\n");
639		-
640		- add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
641	698	va_end(args);
642	699	}
643	700
..	..	@@ -691,10 +748,7 @@
691	748	print_section(KERN_ERR, "Redzone ", p + s->object_size,
692	749	s->inuse - s->object_size);
693	750
694		- if (s->offset)
695		- off = s->offset + sizeof(void *);
696		- else
697		- off = s->inuse;
	751	+ off = get_info_end(s);
698	752
699	753	if (s->flags & SLAB_STORE_USER)
700	754	off += 2 * sizeof(struct track);
..	..	@@ -714,6 +768,7 @@
714	768	{
715	769	slab_bug(s, "%s", reason);
716	770	print_trailer(s, page, object);
	771	+ add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
717	772	}
718	773
719	774	static __printf(3, 4) void slab_err(struct kmem_cache s, struct page page,
..	..	@@ -728,11 +783,12 @@
728	783	slab_bug(s, "%s", buf);
729	784	print_page_info(page);
730	785	dump_stack();
	786	+ add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
731	787	}
732	788
733	789	static void init_object(struct kmem_cache s, void object, u8 val)
734	790	{
735		- u8 *p = object;
	791	+ u8 *p = kasan_reset_tag(object);
736	792
737	793	if (s->flags & SLAB_RED_ZONE)
738	794	memset(p - s->red_left_pad, val, s->red_left_pad);
..	..	@@ -759,9 +815,10 @@
759	815	{
760	816	u8 *fault;
761	817	u8 *end;
	818	+ u8 *addr = page_address(page);
762	819
763	820	metadata_access_enable();
764		- fault = memchr_inv(start, value, bytes);
	821	+ fault = memchr_inv(kasan_reset_tag(start), value, bytes);
765	822	metadata_access_disable();
766	823	if (!fault)
767	824	return 1;
..	..	@@ -771,9 +828,11 @@
771	828	end--;
772	829
773	830	slab_bug(s, "%s overwritten", what);
774		- pr_err("INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
775		- fault, end - 1, fault[0], value);
	831	+ pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
	832	+ fault, end - 1, fault - addr,
	833	+ fault[0], value);
776	834	print_trailer(s, page, object);
	835	+ add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
777	836
778	837	restore_bytes(s, what, value, fault, end);
779	838	return 0;
..	..	@@ -785,7 +844,7 @@
785	844	* object address
786	845	* Bytes of the object to be managed.
787	846	* If the freepointer may overlay the object then the free
788		- * pointer is the first word of the object.
	847	+ * pointer is at the middle of the object.
789	848	*
790	849	* Poisoning uses 0x6b (POISON_FREE) and the last byte is
791	850	* 0xa5 (POISON_END)
..	..	@@ -819,11 +878,7 @@
819	878
820	879	static int check_pad_bytes(struct kmem_cache s, struct page page, u8 *p)
821	880	{
822		- unsigned long off = s->inuse; /* The end of info */
823		-
824		- if (s->offset)
825		- /* Freepointer is placed after the object. */
826		- off += sizeof(void *);
	881	+ unsigned long off = get_info_end(s); /* The end of info */
827	882
828	883	if (s->flags & SLAB_STORE_USER)
829	884	/* We also have user information there */
..	..	@@ -852,7 +907,7 @@
852	907	return 1;
853	908
854	909	start = page_address(page);
855		- length = PAGE_SIZE << compound_order(page);
	910	+ length = page_size(page);
856	911	end = start + length;
857	912	remainder = length % s->size;
858	913	if (!remainder)
..	..	@@ -860,14 +915,15 @@
860	915
861	916	pad = end - remainder;
862	917	metadata_access_enable();
863		- fault = memchr_inv(pad, POISON_INUSE, remainder);
	918	+ fault = memchr_inv(kasan_reset_tag(pad), POISON_INUSE, remainder);
864	919	metadata_access_disable();
865	920	if (!fault)
866	921	return 1;
867	922	while (end > fault && end[-1] == POISON_INUSE)
868	923	end--;
869	924
870		- slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
	925	+ slab_err(s, page, "Padding overwritten. 0x%p-0x%p @offset=%tu",
	926	+ fault, end - 1, fault - start);
871	927	print_section(KERN_ERR, "Padding ", pad, remainder);
872	928
873	929	restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
..	..	@@ -909,7 +965,7 @@
909	965	check_pad_bytes(s, page, p);
910	966	}
911	967
912		- if (!s->offset && val == SLUB_RED_ACTIVE)
	968	+ if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
913	969	/*
914	970	* Object and freepointer overlap. Cannot check
915	971	* freepointer while object is allocated.
..	..	@@ -1038,7 +1094,7 @@
1038	1094	return;
1039	1095
1040	1096	lockdep_assert_held(&n->list_lock);
1041		- list_add(&page->lru, &n->full);
	1097	+ list_add(&page->slab_list, &n->full);
1042	1098	}
1043	1099
1044	1100	static void remove_full(struct kmem_cache s, struct kmem_cache_node n, struct page *page)
..	..	@@ -1047,7 +1103,7 @@
1047	1103	return;
1048	1104
1049	1105	lockdep_assert_held(&n->list_lock);
1050		- list_del(&page->lru);
	1106	+ list_del(&page->slab_list);
1051	1107	}
1052	1108
1053	1109	/* Tracking of the number of slabs for debugging purposes */
..	..	@@ -1090,26 +1146,26 @@
1090	1146	static void setup_object_debug(struct kmem_cache s, struct page page,
1091	1147	void *object)
1092	1148	{
1093		- if (!(s->flags & (SLAB_STORE_USER\|SLAB_RED_ZONE\|__OBJECT_POISON)))
	1149	+ if (!kmem_cache_debug_flags(s, SLAB_STORE_USER\|SLAB_RED_ZONE\|__OBJECT_POISON))
1094	1150	return;
1095	1151
1096	1152	init_object(s, object, SLUB_RED_INACTIVE);
1097	1153	init_tracking(s, object);
1098	1154	}
1099	1155
1100		-static void setup_page_debug(struct kmem_cache s, void addr, int order)
	1156	+static
	1157	+void setup_page_debug(struct kmem_cache s, struct page page, void *addr)
1101	1158	{
1102		- if (!(s->flags & SLAB_POISON))
	1159	+ if (!kmem_cache_debug_flags(s, SLAB_POISON))
1103	1160	return;
1104	1161
1105	1162	metadata_access_enable();
1106		- memset(addr, POISON_INUSE, PAGE_SIZE << order);
	1163	+ memset(kasan_reset_tag(addr), POISON_INUSE, page_size(page));
1107	1164	metadata_access_disable();
1108	1165	}
1109	1166
1110	1167	static inline int alloc_consistency_checks(struct kmem_cache *s,
1111		- struct page *page,
1112		- void *object, unsigned long addr)
	1168	+ struct page page, void object)
1113	1169	{
1114	1170	if (!check_slab(s, page))
1115	1171	return 0;
..	..	@@ -1130,7 +1186,7 @@
1130	1186	void *object, unsigned long addr)
1131	1187	{
1132	1188	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1133		- if (!alloc_consistency_checks(s, page, object, addr))
	1189	+ if (!alloc_consistency_checks(s, page, object))
1134	1190	goto bad;
1135	1191	}
1136	1192
..	..	@@ -1196,10 +1252,10 @@
1196	1252	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1197	1253	void *object = head;
1198	1254	int cnt = 0;
1199		- unsigned long uninitialized_var(flags);
	1255	+ unsigned long flags;
1200	1256	int ret = 0;
1201	1257
1202		- raw_spin_lock_irqsave(&n->list_lock, flags);
	1258	+ spin_lock_irqsave(&n->list_lock, flags);
1203	1259	slab_lock(page);
1204	1260
1205	1261	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
..	..	@@ -1234,75 +1290,144 @@
1234	1290	bulk_cnt, cnt);
1235	1291
1236	1292	slab_unlock(page);
1237		- raw_spin_unlock_irqrestore(&n->list_lock, flags);
	1293	+ spin_unlock_irqrestore(&n->list_lock, flags);
1238	1294	if (!ret)
1239	1295	slab_fix(s, "Object at 0x%p not freed", object);
1240	1296	return ret;
1241	1297	}
1242	1298
1243		-static int __init setup_slub_debug(char *str)
	1299	+/*
	1300	+ * Parse a block of slub_debug options. Blocks are delimited by ';'
	1301	+ *
	1302	+ * @str: start of block
	1303	+ * @flags: returns parsed flags, or DEBUG_DEFAULT_FLAGS if none specified
	1304	+ * @slabs: return start of list of slabs, or NULL when there's no list
	1305	+ * @init: assume this is initial parsing and not per-kmem-create parsing
	1306	+ *
	1307	+ * returns the start of next block if there's any, or NULL
	1308	+ */
	1309	+static char *
	1310	+parse_slub_debug_flags(char str, slab_flags_t flags, char **slabs, bool init)
1244	1311	{
1245		- slub_debug = DEBUG_DEFAULT_FLAGS;
1246		- if (str++ != '=' \|\| !str)
1247		- /*
1248		- * No options specified. Switch on full debugging.
1249		- */
1250		- goto out;
	1312	+ bool higher_order_disable = false;
1251	1313
1252		- if (*str == ',')
	1314	+ /* Skip any completely empty blocks */
	1315	+ while (str && str == ';')
	1316	+ str++;
	1317	+
	1318	+ if (*str == ',') {
1253	1319	/*
1254	1320	* No options but restriction on slabs. This means full
1255	1321	* debugging for slabs matching a pattern.
1256	1322	*/
	1323	+ *flags = DEBUG_DEFAULT_FLAGS;
1257	1324	goto check_slabs;
	1325	+ }
	1326	+ *flags = 0;
1258	1327
1259		- slub_debug = 0;
1260		- if (*str == '-')
1261		- /*
1262		- * Switch off all debugging measures.
1263		- */
1264		- goto out;
1265		-
1266		- /*
1267		- * Determine which debug features should be switched on
1268		- */
1269		- for (; str && str != ','; str++) {
	1328	+ /* Determine which debug features should be switched on */
	1329	+ for (; str && str != ',' && *str != ';'; str++) {
1270	1330	switch (tolower(*str)) {
	1331	+ case '-':
	1332	+ *flags = 0;
	1333	+ break;
1271	1334	case 'f':
1272		- slub_debug \|= SLAB_CONSISTENCY_CHECKS;
	1335	+ *flags \|= SLAB_CONSISTENCY_CHECKS;
1273	1336	break;
1274	1337	case 'z':
1275		- slub_debug \|= SLAB_RED_ZONE;
	1338	+ *flags \|= SLAB_RED_ZONE;
1276	1339	break;
1277	1340	case 'p':
1278		- slub_debug \|= SLAB_POISON;
	1341	+ *flags \|= SLAB_POISON;
1279	1342	break;
1280	1343	case 'u':
1281		- slub_debug \|= SLAB_STORE_USER;
	1344	+ *flags \|= SLAB_STORE_USER;
1282	1345	break;
1283	1346	case 't':
1284		- slub_debug \|= SLAB_TRACE;
	1347	+ *flags \|= SLAB_TRACE;
1285	1348	break;
1286	1349	case 'a':
1287		- slub_debug \|= SLAB_FAILSLAB;
	1350	+ *flags \|= SLAB_FAILSLAB;
1288	1351	break;
1289	1352	case 'o':
1290	1353	/*
1291	1354	* Avoid enabling debugging on caches if its minimum
1292	1355	* order would increase as a result.
1293	1356	*/
1294		- disable_higher_order_debug = 1;
	1357	+ higher_order_disable = true;
1295	1358	break;
1296	1359	default:
1297		- pr_err("slub_debug option '%c' unknown. skipped\n",
1298		- *str);
	1360	+ if (init)
	1361	+ pr_err("slub_debug option '%c' unknown. skipped\n", *str);
	1362	+ }
	1363	+ }
	1364	+check_slabs:
	1365	+ if (*str == ',')
	1366	+ *slabs = ++str;
	1367	+ else
	1368	+ *slabs = NULL;
	1369	+
	1370	+ /* Skip over the slab list */
	1371	+ while (str && str != ';')
	1372	+ str++;
	1373	+
	1374	+ /* Skip any completely empty blocks */
	1375	+ while (str && str == ';')
	1376	+ str++;
	1377	+
	1378	+ if (init && higher_order_disable)
	1379	+ disable_higher_order_debug = 1;
	1380	+
	1381	+ if (*str)
	1382	+ return str;
	1383	+ else
	1384	+ return NULL;
	1385	+}
	1386	+
	1387	+static int __init setup_slub_debug(char *str)
	1388	+{
	1389	+ slab_flags_t flags;
	1390	+ slab_flags_t global_flags;
	1391	+ char *saved_str;
	1392	+ char *slab_list;
	1393	+ bool global_slub_debug_changed = false;
	1394	+ bool slab_list_specified = false;
	1395	+
	1396	+ global_flags = DEBUG_DEFAULT_FLAGS;
	1397	+ if (str++ != '=' \|\| !str)
	1398	+ /*
	1399	+ * No options specified. Switch on full debugging.
	1400	+ */
	1401	+ goto out;
	1402	+
	1403	+ saved_str = str;
	1404	+ while (str) {
	1405	+ str = parse_slub_debug_flags(str, &flags, &slab_list, true);
	1406	+
	1407	+ if (!slab_list) {
	1408	+ global_flags = flags;
	1409	+ global_slub_debug_changed = true;
	1410	+ } else {
	1411	+ slab_list_specified = true;
1299	1412	}
1300	1413	}
1301	1414
1302		-check_slabs:
1303		- if (*str == ',')
1304		- slub_debug_slabs = str + 1;
	1415	+ /*
	1416	+ * For backwards compatibility, a single list of flags with list of
	1417	+ * slabs means debugging is only changed for those slabs, so the global
	1418	+ * slub_debug should be unchanged (0 or DEBUG_DEFAULT_FLAGS, depending
	1419	+ * on CONFIG_SLUB_DEBUG_ON). We can extended that to multiple lists as
	1420	+ * long as there is no option specifying flags without a slab list.
	1421	+ */
	1422	+ if (slab_list_specified) {
	1423	+ if (!global_slub_debug_changed)
	1424	+ global_flags = slub_debug;
	1425	+ slub_debug_string = saved_str;
	1426	+ }
1305	1427	out:
	1428	+ slub_debug = global_flags;
	1429	+ if (slub_debug != 0 \|\| slub_debug_string)
	1430	+ static_branch_enable(&slub_debug_enabled);
1306	1431	if ((static_branch_unlikely(&init_on_alloc) \|\|
1307	1432	static_branch_unlikely(&init_on_free)) &&
1308	1433	(slub_debug & SLAB_POISON))
..	..	@@ -1312,24 +1437,65 @@
1312	1437
1313	1438	__setup("slub_debug", setup_slub_debug);
1314	1439
	1440	+/*
	1441	+ * kmem_cache_flags - apply debugging options to the cache
	1442	+ * @object_size: the size of an object without meta data
	1443	+ * @flags: flags to set
	1444	+ * @name: name of the cache
	1445	+ *
	1446	+ * Debug option(s) are applied to @flags. In addition to the debug
	1447	+ * option(s), if a slab name (or multiple) is specified i.e.
	1448	+ * slub_debug=<Debug-Options>,<slab name1>,<slab name2> ...
	1449	+ * then only the select slabs will receive the debug option(s).
	1450	+ */
1315	1451	slab_flags_t kmem_cache_flags(unsigned int object_size,
1316		- slab_flags_t flags, const char *name,
1317		- void (ctor)(void ))
	1452	+ slab_flags_t flags, const char *name)
1318	1453	{
1319		- /*
1320		- * Enable debugging if selected on the kernel commandline.
1321		- */
1322		- if (slub_debug && (!slub_debug_slabs \|\| (name &&
1323		- !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))))
1324		- flags \|= slub_debug;
	1454	+ char *iter;
	1455	+ size_t len;
	1456	+ char *next_block;
	1457	+ slab_flags_t block_flags;
1325	1458
1326		- return flags;
	1459	+ len = strlen(name);
	1460	+ next_block = slub_debug_string;
	1461	+ /* Go through all blocks of debug options, see if any matches our slab's name */
	1462	+ while (next_block) {
	1463	+ next_block = parse_slub_debug_flags(next_block, &block_flags, &iter, false);
	1464	+ if (!iter)
	1465	+ continue;
	1466	+ /* Found a block that has a slab list, search it */
	1467	+ while (*iter) {
	1468	+ char end, glob;
	1469	+ size_t cmplen;
	1470	+
	1471	+ end = strchrnul(iter, ',');
	1472	+ if (next_block && next_block < end)
	1473	+ end = next_block - 1;
	1474	+
	1475	+ glob = strnchr(iter, end - iter, '*');
	1476	+ if (glob)
	1477	+ cmplen = glob - iter;
	1478	+ else
	1479	+ cmplen = max_t(size_t, len, (end - iter));
	1480	+
	1481	+ if (!strncmp(name, iter, cmplen)) {
	1482	+ flags \|= block_flags;
	1483	+ return flags;
	1484	+ }
	1485	+
	1486	+ if (!end \|\| end == ';')
	1487	+ break;
	1488	+ iter = end + 1;
	1489	+ }
	1490	+ }
	1491	+
	1492	+ return flags \| slub_debug;
1327	1493	}
1328	1494	#else /* !CONFIG_SLUB_DEBUG */
1329	1495	static inline void setup_object_debug(struct kmem_cache *s,
1330	1496	struct page page, void object) {}
1331		-static inline void setup_page_debug(struct kmem_cache *s,
1332		- void *addr, int order) {}
	1497	+static inline
	1498	+void setup_page_debug(struct kmem_cache s, struct page page, void *addr) {}
1333	1499
1334	1500	static inline int alloc_debug_processing(struct kmem_cache *s,
1335	1501	struct page page, void object, unsigned long addr) { return 0; }
..	..	@@ -1348,8 +1514,7 @@
1348	1514	static inline void remove_full(struct kmem_cache s, struct kmem_cache_node n,
1349	1515	struct page *page) {}
1350	1516	slab_flags_t kmem_cache_flags(unsigned int object_size,
1351		- slab_flags_t flags, const char *name,
1352		- void (ctor)(void ))
	1517	+ slab_flags_t flags, const char *name)
1353	1518	{
1354	1519	return flags;
1355	1520	}
..	..	@@ -1373,12 +1538,6 @@
1373	1538	}
1374	1539	#endif /* CONFIG_SLUB_DEBUG */
1375	1540
1376		-struct slub_free_list {
1377		- raw_spinlock_t lock;
1378		- struct list_head list;
1379		-};
1380		-static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
1381		-
1382	1541	/*
1383	1542	* Hooks for other subsystems that check memory allocations. In a typical
1384	1543	* production configuration these hooks all should produce no code at all.
..	..	@@ -1386,6 +1545,7 @@
1386	1545	static inline void kmalloc_large_node_hook(void ptr, size_t size, gfp_t flags)
1387	1546	{
1388	1547	ptr = kasan_kmalloc_large(ptr, size, flags);
	1548	+ /* As ptr might get tagged, call kmemleak hook after KASAN. */
1389	1549	kmemleak_alloc(ptr, size, 1, flags);
1390	1550	return ptr;
1391	1551	}
..	..	@@ -1393,10 +1553,11 @@
1393	1553	static __always_inline void kfree_hook(void *x)
1394	1554	{
1395	1555	kmemleak_free(x);
1396		- kasan_kfree_large(x, _RET_IP_);
	1556	+ kasan_kfree_large(x);
1397	1557	}
1398	1558
1399		-static __always_inline bool slab_free_hook(struct kmem_cache s, void x)
	1559	+static __always_inline bool slab_free_hook(struct kmem_cache *s,
	1560	+ void *x, bool init)
1400	1561	{
1401	1562	kmemleak_free_recursive(x, s->flags);
1402	1563
..	..	@@ -1417,8 +1578,30 @@
1417	1578	if (!(s->flags & SLAB_DEBUG_OBJECTS))
1418	1579	debug_check_no_obj_freed(x, s->object_size);
1419	1580
1420		- /* KASAN might put x into memory quarantine, delaying its reuse */
1421		- return kasan_slab_free(s, x, _RET_IP_);
	1581	+ /* Use KCSAN to help debug racy use-after-free. */
	1582	+ if (!(s->flags & SLAB_TYPESAFE_BY_RCU))
	1583	+ __kcsan_check_access(x, s->object_size,
	1584	+ KCSAN_ACCESS_WRITE \| KCSAN_ACCESS_ASSERT);
	1585	+
	1586	+ /*
	1587	+ * As memory initialization might be integrated into KASAN,
	1588	+ * kasan_slab_free and initialization memset's must be
	1589	+ * kept together to avoid discrepancies in behavior.
	1590	+ *
	1591	+ * The initialization memset's clear the object and the metadata,
	1592	+ * but don't touch the SLAB redzone.
	1593	+ */
	1594	+ if (init) {
	1595	+ int rsize;
	1596	+
	1597	+ if (!kasan_has_integrated_init())
	1598	+ memset(kasan_reset_tag(x), 0, s->object_size);
	1599	+ rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad : 0;
	1600	+ memset((char *)kasan_reset_tag(x) + s->inuse, 0,
	1601	+ s->size - s->inuse - rsize);
	1602	+ }
	1603	+ /* KASAN might put x into memory quarantine, delaying its reuse. */
	1604	+ return kasan_slab_free(s, x, init);
1422	1605	}
1423	1606
1424	1607	static inline bool slab_free_freelist_hook(struct kmem_cache *s,
..	..	@@ -1429,7 +1612,11 @@
1429	1612	void *object;
1430	1613	void next = head;
1431	1614	void old_tail = tail ? tail : head;
1432		- int rsize;
	1615	+
	1616	+ if (is_kfence_address(next)) {
	1617	+ slab_free_hook(s, next, false);
	1618	+ return true;
	1619	+ }
1433	1620
1434	1621	/* Head and tail of the reconstructed freelist */
1435	1622	*head = NULL;
..	..	@@ -1439,20 +1626,8 @@
1439	1626	object = next;
1440	1627	next = get_freepointer(s, object);
1441	1628
1442		- if (slab_want_init_on_free(s)) {
1443		- /*
1444		- * Clear the object and the metadata, but don't touch
1445		- * the redzone.
1446		- */
1447		- memset(object, 0, s->object_size);
1448		- rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad
1449		- : 0;
1450		- memset((char *)object + s->inuse, 0,
1451		- s->size - s->inuse - rsize);
1452		-
1453		- }
1454	1629	/* If object's reuse doesn't have to be delayed */
1455		- if (!slab_free_hook(s, object)) {
	1630	+ if (!slab_free_hook(s, object, slab_want_init_on_free(s))) {
1456	1631	/* Move object to the new freelist */
1457	1632	set_freepointer(s, object, *head);
1458	1633	*head = object;
..	..	@@ -1500,10 +1675,8 @@
1500	1675	else
1501	1676	page = __alloc_pages_node(node, flags, order);
1502	1677
1503		- if (page && memcg_charge_slab(page, flags, order, s)) {
1504		- __free_pages(page, order);
1505		- page = NULL;
1506		- }
	1678	+ if (page)
	1679	+ account_slab_page(page, order, s);
1507	1680
1508	1681	return page;
1509	1682	}
..	..	@@ -1623,19 +1796,12 @@
1623	1796	struct kmem_cache_order_objects oo = s->oo;
1624	1797	gfp_t alloc_gfp;
1625	1798	void start, p, *next;
1626		- int idx, order;
	1799	+ int idx;
1627	1800	bool shuffle;
1628		- bool enableirqs = false;
1629	1801
1630	1802	flags &= gfp_allowed_mask;
1631	1803
1632	1804	if (gfpflags_allow_blocking(flags))
1633		- enableirqs = true;
1634		-#ifdef CONFIG_PREEMPT_RT_FULL
1635		- if (system_state > SYSTEM_BOOTING)
1636		- enableirqs = true;
1637		-#endif
1638		- if (enableirqs)
1639	1805	local_irq_enable();
1640	1806
1641	1807	flags \|= s->allocflags;
..	..	@@ -1664,7 +1830,6 @@
1664	1830
1665	1831	page->objects = oo_objects(oo);
1666	1832
1667		- order = compound_order(page);
1668	1833	page->slab_cache = s;
1669	1834	__SetPageSlab(page);
1670	1835	if (page_is_pfmemalloc(page))
..	..	@@ -1674,7 +1839,7 @@
1674	1839
1675	1840	start = page_address(page);
1676	1841
1677		- setup_page_debug(s, start, order);
	1842	+ setup_page_debug(s, page, start);
1678	1843
1679	1844	shuffle = shuffle_freelist(s, page);
1680	1845
..	..	@@ -1695,15 +1860,10 @@
1695	1860	page->frozen = 1;
1696	1861
1697	1862	out:
1698		- if (enableirqs)
	1863	+ if (gfpflags_allow_blocking(flags))
1699	1864	local_irq_disable();
1700	1865	if (!page)
1701	1866	return NULL;
1702		-
1703		- mod_lruvec_page_state(page,
1704		- (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1705		- NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1706		- 1 << oo_order(oo));
1707	1867
1708	1868	inc_slabs_node(s, page_to_nid(page), page->objects);
1709	1869
..	..	@@ -1712,13 +1872,8 @@
1712	1872
1713	1873	static struct page new_slab(struct kmem_cache s, gfp_t flags, int node)
1714	1874	{
1715		- if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
1716		- gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
1717		- flags &= ~GFP_SLAB_BUG_MASK;
1718		- pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
1719		- invalid_mask, &invalid_mask, flags, &flags);
1720		- dump_stack();
1721		- }
	1875	+ if (unlikely(flags & GFP_SLAB_BUG_MASK))
	1876	+ flags = kmalloc_fix_flags(flags);
1722	1877
1723	1878	return allocate_slab(s,
1724	1879	flags & (GFP_RECLAIM_MASK \| GFP_CONSTRAINT_MASK), node);
..	..	@@ -1729,7 +1884,7 @@
1729	1884	int order = compound_order(page);
1730	1885	int pages = 1 << order;
1731	1886
1732		- if (s->flags & SLAB_CONSISTENCY_CHECKS) {
	1887	+ if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
1733	1888	void *p;
1734	1889
1735	1890	slab_pad_check(s, page);
..	..	@@ -1738,29 +1893,14 @@
1738	1893	check_object(s, page, p, SLUB_RED_INACTIVE);
1739	1894	}
1740	1895
1741		- mod_lruvec_page_state(page,
1742		- (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1743		- NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1744		- -pages);
1745		-
1746	1896	__ClearPageSlabPfmemalloc(page);
1747	1897	__ClearPageSlab(page);
1748	1898
1749	1899	page->mapping = NULL;
1750	1900	if (current->reclaim_state)
1751	1901	current->reclaim_state->reclaimed_slab += pages;
1752		- memcg_uncharge_slab(page, order, s);
	1902	+ unaccount_slab_page(page, order, s);
1753	1903	__free_pages(page, order);
1754		-}
1755		-
1756		-static void free_delayed(struct list_head *h)
1757		-{
1758		- while (!list_empty(h)) {
1759		- struct page *page = list_first_entry(h, struct page, lru);
1760		-
1761		- list_del(&page->lru);
1762		- __free_slab(page->slab_cache, page);
1763		- }
1764	1904	}
1765	1905
1766	1906	static void rcu_free_slab(struct rcu_head *h)
..	..	@@ -1774,12 +1914,6 @@
1774	1914	{
1775	1915	if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
1776	1916	call_rcu(&page->rcu_head, rcu_free_slab);
1777		- } else if (irqs_disabled()) {
1778		- struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
1779		-
1780		- raw_spin_lock(&f->lock);
1781		- list_add(&page->lru, &f->list);
1782		- raw_spin_unlock(&f->lock);
1783	1917	} else
1784	1918	__free_slab(s, page);
1785	1919	}
..	..	@@ -1798,9 +1932,9 @@
1798	1932	{
1799	1933	n->nr_partial++;
1800	1934	if (tail == DEACTIVATE_TO_TAIL)
1801		- list_add_tail(&page->lru, &n->partial);
	1935	+ list_add_tail(&page->slab_list, &n->partial);
1802	1936	else
1803		- list_add(&page->lru, &n->partial);
	1937	+ list_add(&page->slab_list, &n->partial);
1804	1938	}
1805	1939
1806	1940	static inline void add_partial(struct kmem_cache_node *n,
..	..	@@ -1814,7 +1948,7 @@
1814	1948	struct page *page)
1815	1949	{
1816	1950	lockdep_assert_held(&n->list_lock);
1817		- list_del(&page->lru);
	1951	+ list_del(&page->slab_list);
1818	1952	n->nr_partial--;
1819	1953	}
1820	1954
..	..	@@ -1881,14 +2015,14 @@
1881	2015	/*
1882	2016	* Racy check. If we mistakenly see no partial slabs then we
1883	2017	* just allocate an empty slab. If we mistakenly try to get a
1884		- * partial slab and there is none available then get_partials()
	2018	+ * partial slab and there is none available then get_partial()
1885	2019	* will return NULL.
1886	2020	*/
1887	2021	if (!n \|\| !n->nr_partial)
1888	2022	return NULL;
1889	2023
1890		- raw_spin_lock(&n->list_lock);
1891		- list_for_each_entry_safe(page, page2, &n->partial, lru) {
	2024	+ spin_lock(&n->list_lock);
	2025	+ list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
1892	2026	void *t;
1893	2027
1894	2028	if (!pfmemalloc_match(page, flags))
..	..	@@ -1912,7 +2046,7 @@
1912	2046	break;
1913	2047
1914	2048	}
1915		- raw_spin_unlock(&n->list_lock);
	2049	+ spin_unlock(&n->list_lock);
1916	2050	return object;
1917	2051	}
1918	2052
..	..	@@ -1926,7 +2060,7 @@
1926	2060	struct zonelist *zonelist;
1927	2061	struct zoneref *z;
1928	2062	struct zone *zone;
1929		- enum zone_type high_zoneidx = gfp_zone(flags);
	2063	+ enum zone_type highest_zoneidx = gfp_zone(flags);
1930	2064	void *object;
1931	2065	unsigned int cpuset_mems_cookie;
1932	2066
..	..	@@ -1955,7 +2089,7 @@
1955	2089	do {
1956	2090	cpuset_mems_cookie = read_mems_allowed_begin();
1957	2091	zonelist = node_zonelist(mempolicy_slab_node(), flags);
1958		- for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
	2092	+ for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
1959	2093	struct kmem_cache_node *n;
1960	2094
1961	2095	n = get_node(s, zone_to_nid(zone));
..	..	@@ -1976,7 +2110,7 @@
1976	2110	}
1977	2111	}
1978	2112	} while (read_mems_allowed_retry(cpuset_mems_cookie));
1979		-#endif
	2113	+#endif /* CONFIG_NUMA */
1980	2114	return NULL;
1981	2115	}
1982	2116
..	..	@@ -1999,9 +2133,9 @@
1999	2133	return get_any_partial(s, flags, c);
2000	2134	}
2001	2135
2002		-#ifdef CONFIG_PREEMPT
	2136	+#ifdef CONFIG_PREEMPTION
2003	2137	/*
2004		- * Calculate the next globally unique transaction for disambiguiation
	2138	+ * Calculate the next globally unique transaction for disambiguation
2005	2139	* during cmpxchg. The transactions start with the cpu number and are then
2006	2140	* incremented by CONFIG_NR_CPUS.
2007	2141	*/
..	..	@@ -2019,6 +2153,7 @@
2019	2153	return tid + TID_STEP;
2020	2154	}
2021	2155
	2156	+#ifdef SLUB_DEBUG_CMPXCHG
2022	2157	static inline unsigned int tid_to_cpu(unsigned long tid)
2023	2158	{
2024	2159	return tid % TID_STEP;
..	..	@@ -2028,6 +2163,7 @@
2028	2163	{
2029	2164	return tid / TID_STEP;
2030	2165	}
	2166	+#endif
2031	2167
2032	2168	static inline unsigned int init_tid(int cpu)
2033	2169	{
..	..	@@ -2042,7 +2178,7 @@
2042	2178
2043	2179	pr_info("%s %s: cmpxchg redo ", n, s->name);
2044	2180
2045		-#ifdef CONFIG_PREEMPT
	2181	+#ifdef CONFIG_PREEMPTION
2046	2182	if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
2047	2183	pr_warn("due to cpu change %d -> %d\n",
2048	2184	tid_to_cpu(tid), tid_to_cpu(actual_tid));
..	..	@@ -2160,46 +2296,37 @@
2160	2296	if (!lock) {
2161	2297	lock = 1;
2162	2298	/*
2163		- * Taking the spinlock removes the possiblity
	2299	+ * Taking the spinlock removes the possibility
2164	2300	* that acquire_slab() will see a slab page that
2165	2301	* is frozen
2166	2302	*/
2167		- raw_spin_lock(&n->list_lock);
	2303	+ spin_lock(&n->list_lock);
2168	2304	}
2169	2305	} else {
2170	2306	m = M_FULL;
2171		- if (kmem_cache_debug(s) && !lock) {
	2307	+#ifdef CONFIG_SLUB_DEBUG
	2308	+ if ((s->flags & SLAB_STORE_USER) && !lock) {
2172	2309	lock = 1;
2173	2310	/*
2174	2311	* This also ensures that the scanning of full
2175	2312	* slabs from diagnostic functions will not see
2176	2313	* any frozen slabs.
2177	2314	*/
2178		- raw_spin_lock(&n->list_lock);
	2315	+ spin_lock(&n->list_lock);
2179	2316	}
	2317	+#endif
2180	2318	}
2181	2319
2182	2320	if (l != m) {
2183		-
2184	2321	if (l == M_PARTIAL)
2185		-
2186	2322	remove_partial(n, page);
2187		-
2188	2323	else if (l == M_FULL)
2189		-
2190	2324	remove_full(s, n, page);
2191	2325
2192		- if (m == M_PARTIAL) {
2193		-
	2326	+ if (m == M_PARTIAL)
2194	2327	add_partial(n, page, tail);
2195		- stat(s, tail);
2196		-
2197		- } else if (m == M_FULL) {
2198		-
2199		- stat(s, DEACTIVATE_FULL);
	2328	+ else if (m == M_FULL)
2200	2329	add_full(s, n, page);
2201		-
2202		- }
2203	2330	}
2204	2331
2205	2332	l = m;
..	..	@@ -2210,9 +2337,13 @@
2210	2337	goto redo;
2211	2338
2212	2339	if (lock)
2213		- raw_spin_unlock(&n->list_lock);
	2340	+ spin_unlock(&n->list_lock);
2214	2341
2215		- if (m == M_FREE) {
	2342	+ if (m == M_PARTIAL)
	2343	+ stat(s, tail);
	2344	+ else if (m == M_FULL)
	2345	+ stat(s, DEACTIVATE_FULL);
	2346	+ else if (m == M_FREE) {
2216	2347	stat(s, DEACTIVATE_EMPTY);
2217	2348	discard_slab(s, page);
2218	2349	stat(s, FREE_SLAB);
..	..	@@ -2220,6 +2351,7 @@
2220	2351
2221	2352	c->page = NULL;
2222	2353	c->freelist = NULL;
	2354	+ c->tid = next_tid(c->tid);
2223	2355	}
2224	2356
2225	2357	/*
..	..	@@ -2236,19 +2368,19 @@
2236	2368	struct kmem_cache_node n = NULL, n2 = NULL;
2237	2369	struct page page, discard_page = NULL;
2238	2370
2239		- while ((page = c->partial)) {
	2371	+ while ((page = slub_percpu_partial(c))) {
2240	2372	struct page new;
2241	2373	struct page old;
2242	2374
2243		- c->partial = page->next;
	2375	+ slub_set_percpu_partial(c, page);
2244	2376
2245	2377	n2 = get_node(s, page_to_nid(page));
2246	2378	if (n != n2) {
2247	2379	if (n)
2248		- raw_spin_unlock(&n->list_lock);
	2380	+ spin_unlock(&n->list_lock);
2249	2381
2250	2382	n = n2;
2251		- raw_spin_lock(&n->list_lock);
	2383	+ spin_lock(&n->list_lock);
2252	2384	}
2253	2385
2254	2386	do {
..	..	@@ -2277,7 +2409,7 @@
2277	2409	}
2278	2410
2279	2411	if (n)
2280		- raw_spin_unlock(&n->list_lock);
	2412	+ spin_unlock(&n->list_lock);
2281	2413
2282	2414	while (discard_page) {
2283	2415	page = discard_page;
..	..	@@ -2287,12 +2419,12 @@
2287	2419	discard_slab(s, page);
2288	2420	stat(s, FREE_SLAB);
2289	2421	}
2290		-#endif
	2422	+#endif /* CONFIG_SLUB_CPU_PARTIAL */
2291	2423	}
2292	2424
2293	2425	/*
2294		- * Put a page that was just frozen (in __slab_free) into a partial page
2295		- * slot if available.
	2426	+ * Put a page that was just frozen (in __slab_free\|get_partial_node) into a
	2427	+ * partial page slot if available.
2296	2428	*
2297	2429	* If we did not find a slot then simply move all the partials to the
2298	2430	* per node partial list.
..	..	@@ -2313,22 +2445,15 @@
2313	2445	if (oldpage) {
2314	2446	pobjects = oldpage->pobjects;
2315	2447	pages = oldpage->pages;
2316		- if (drain && pobjects > s->cpu_partial) {
2317		- struct slub_free_list *f;
	2448	+ if (drain && pobjects > slub_cpu_partial(s)) {
2318	2449	unsigned long flags;
2319		- LIST_HEAD(tofree);
2320	2450	/*
2321	2451	* partial array is full. Move the existing
2322	2452	* set to the per node partial list.
2323	2453	*/
2324	2454	local_irq_save(flags);
2325	2455	unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2326		- f = this_cpu_ptr(&slub_free_list);
2327		- raw_spin_lock(&f->lock);
2328		- list_splice_init(&f->list, &tofree);
2329		- raw_spin_unlock(&f->lock);
2330	2456	local_irq_restore(flags);
2331		- free_delayed(&tofree);
2332	2457	oldpage = NULL;
2333	2458	pobjects = 0;
2334	2459	pages = 0;
..	..	@@ -2345,7 +2470,7 @@
2345	2470
2346	2471	} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
2347	2472	!= oldpage);
2348		- if (unlikely(!s->cpu_partial)) {
	2473	+ if (unlikely(!slub_cpu_partial(s))) {
2349	2474	unsigned long flags;
2350	2475
2351	2476	local_irq_save(flags);
..	..	@@ -2353,15 +2478,13 @@
2353	2478	local_irq_restore(flags);
2354	2479	}
2355	2480	preempt_enable();
2356		-#endif
	2481	+#endif /* CONFIG_SLUB_CPU_PARTIAL */
2357	2482	}
2358	2483
2359	2484	static inline void flush_slab(struct kmem_cache s, struct kmem_cache_cpu c)
2360	2485	{
2361	2486	stat(s, CPUSLAB_FLUSH);
2362	2487	deactivate_slab(s, c->page, c->freelist, c);
2363		-
2364		- c->tid = next_tid(c->tid);
2365	2488	}
2366	2489
2367	2490	/*
..	..	@@ -2373,12 +2496,10 @@
2373	2496	{
2374	2497	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2375	2498
2376		- if (likely(c)) {
2377		- if (c->page)
2378		- flush_slab(s, c);
	2499	+ if (c->page)
	2500	+ flush_slab(s, c);
2379	2501
2380		- unfreeze_partials(s, c);
2381		- }
	2502	+ unfreeze_partials(s, c);
2382	2503	}
2383	2504
2384	2505	static void flush_cpu_slab(void *d)
..	..	@@ -2398,19 +2519,7 @@
2398	2519
2399	2520	static void flush_all(struct kmem_cache *s)
2400	2521	{
2401		- LIST_HEAD(tofree);
2402		- int cpu;
2403		-
2404		- on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2405		- for_each_online_cpu(cpu) {
2406		- struct slub_free_list *f;
2407		-
2408		- f = &per_cpu(slub_free_list, cpu);
2409		- raw_spin_lock_irq(&f->lock);
2410		- list_splice_init(&f->list, &tofree);
2411		- raw_spin_unlock_irq(&f->lock);
2412		- free_delayed(&tofree);
2413		- }
	2522	+ on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
2414	2523	}
2415	2524
2416	2525	/*
..	..	@@ -2439,7 +2548,7 @@
2439	2548	static inline int node_match(struct page *page, int node)
2440	2549	{
2441	2550	#ifdef CONFIG_NUMA
2442		- if (!page \|\| (node != NUMA_NO_NODE && page_to_nid(page) != node))
	2551	+ if (node != NUMA_NO_NODE && page_to_nid(page) != node)
2443	2552	return 0;
2444	2553	#endif
2445	2554	return 1;
..	..	@@ -2465,10 +2574,10 @@
2465	2574	unsigned long x = 0;
2466	2575	struct page *page;
2467	2576
2468		- raw_spin_lock_irqsave(&n->list_lock, flags);
2469		- list_for_each_entry(page, &n->partial, lru)
	2577	+ spin_lock_irqsave(&n->list_lock, flags);
	2578	+ list_for_each_entry(page, &n->partial, slab_list)
2470	2579	x += get_count(page);
2471		- raw_spin_unlock_irqrestore(&n->list_lock, flags);
	2580	+ spin_unlock_irqrestore(&n->list_lock, flags);
2472	2581	return x;
2473	2582	}
2474	2583	#endif /* CONFIG_SLUB_DEBUG \|\| CONFIG_SLUB_SYSFS */
..	..	@@ -2540,8 +2649,7 @@
2540	2649	stat(s, ALLOC_SLAB);
2541	2650	c->page = page;
2542	2651	*pc = c;
2543		- } else
2544		- freelist = NULL;
	2652	+ }
2545	2653
2546	2654	return freelist;
2547	2655	}
..	..	@@ -2608,12 +2716,12 @@
2608	2716	* already disabled (which is the case for bulk allocation).
2609	2717	*/
2610	2718	static void ___slab_alloc(struct kmem_cache s, gfp_t gfpflags, int node,
2611		- unsigned long addr, struct kmem_cache_cpu *c,
2612		- struct list_head *to_free)
	2719	+ unsigned long addr, struct kmem_cache_cpu *c)
2613	2720	{
2614		- struct slub_free_list *f;
2615	2721	void *freelist;
2616	2722	struct page *page;
	2723	+
	2724	+ stat(s, ALLOC_SLOWPATH);
2617	2725
2618	2726	page = c->page;
2619	2727	if (!page) {
..	..	@@ -2662,6 +2770,7 @@
2662	2770
2663	2771	if (!freelist) {
2664	2772	c->page = NULL;
	2773	+ c->tid = next_tid(c->tid);
2665	2774	stat(s, DEACTIVATE_BYPASS);
2666	2775	goto new_slab;
2667	2776	}
..	..	@@ -2677,13 +2786,6 @@
2677	2786	VM_BUG_ON(!c->page->frozen);
2678	2787	c->freelist = get_freepointer(s, freelist);
2679	2788	c->tid = next_tid(c->tid);
2680		-
2681		-out:
2682		- f = this_cpu_ptr(&slub_free_list);
2683		- raw_spin_lock(&f->lock);
2684		- list_splice_init(&f->list, to_free);
2685		- raw_spin_unlock(&f->lock);
2686		-
2687	2789	return freelist;
2688	2790
2689	2791	new_slab:
..	..	@@ -2699,7 +2801,7 @@
2699	2801
2700	2802	if (unlikely(!freelist)) {
2701	2803	slab_out_of_memory(s, gfpflags, node);
2702		- goto out;
	2804	+ return NULL;
2703	2805	}
2704	2806
2705	2807	page = c->page;
..	..	@@ -2712,7 +2814,7 @@
2712	2814	goto new_slab; /* Slab failed checks. Next slab needed */
2713	2815
2714	2816	deactivate_slab(s, page, get_freepointer(s, freelist), c);
2715		- goto out;
	2817	+ return freelist;
2716	2818	}
2717	2819
2718	2820	/*
..	..	@@ -2724,10 +2826,9 @@
2724	2826	{
2725	2827	void *p;
2726	2828	unsigned long flags;
2727		- LIST_HEAD(tofree);
2728	2829
2729	2830	local_irq_save(flags);
2730		-#ifdef CONFIG_PREEMPT
	2831	+#ifdef CONFIG_PREEMPTION
2731	2832	/*
2732	2833	* We may have been preempted and rescheduled on a different
2733	2834	* cpu before disabling interrupts. Need to reload cpu area
..	..	@@ -2736,9 +2837,8 @@
2736	2837	c = this_cpu_ptr(s->cpu_slab);
2737	2838	#endif
2738	2839
2739		- p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
	2840	+ p = ___slab_alloc(s, gfpflags, node, addr, c);
2740	2841	local_irq_restore(flags);
2741		- free_delayed(&tofree);
2742	2842	return p;
2743	2843	}
2744	2844
..	..	@@ -2750,7 +2850,8 @@
2750	2850	void *obj)
2751	2851	{
2752	2852	if (unlikely(slab_want_init_on_free(s)) && obj)
2753		- memset((void )((char )obj + s->offset), 0, sizeof(void *));
	2853	+ memset((void )((char )kasan_reset_tag(obj) + s->offset),
	2854	+ 0, sizeof(void *));
2754	2855	}
2755	2856
2756	2857	/*
..	..	@@ -2764,16 +2865,23 @@
2764	2865	* Otherwise we can simply pick the next object from the lockless free list.
2765	2866	*/
2766	2867	static __always_inline void slab_alloc_node(struct kmem_cache s,
2767		- gfp_t gfpflags, int node, unsigned long addr)
	2868	+ gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
2768	2869	{
2769	2870	void *object;
2770	2871	struct kmem_cache_cpu *c;
2771	2872	struct page *page;
2772	2873	unsigned long tid;
	2874	+ struct obj_cgroup *objcg = NULL;
	2875	+ bool init = false;
2773	2876
2774		- s = slab_pre_alloc_hook(s, gfpflags);
	2877	+ s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
2775	2878	if (!s)
2776	2879	return NULL;
	2880	+
	2881	+ object = kfence_alloc(s, orig_size, gfpflags);
	2882	+ if (unlikely(object))
	2883	+ goto out;
	2884	+
2777	2885	redo:
2778	2886	/*
2779	2887	* Must read kmem_cache cpu data via this cpu ptr. Preemption is
..	..	@@ -2782,13 +2890,13 @@
2782	2890	* as we end up on the original cpu again when doing the cmpxchg.
2783	2891	*
2784	2892	* We should guarantee that tid and kmem_cache are retrieved on
2785		- * the same cpu. It could be different if CONFIG_PREEMPT so we need
	2893	+ * the same cpu. It could be different if CONFIG_PREEMPTION so we need
2786	2894	* to check if it is matched or not.
2787	2895	*/
2788	2896	do {
2789	2897	tid = this_cpu_read(s->cpu_slab->tid);
2790	2898	c = raw_cpu_ptr(s->cpu_slab);
2791		- } while (IS_ENABLED(CONFIG_PREEMPT) &&
	2899	+ } while (IS_ENABLED(CONFIG_PREEMPTION) &&
2792	2900	unlikely(tid != READ_ONCE(c->tid)));
2793	2901
2794	2902	/*
..	..	@@ -2810,9 +2918,8 @@
2810	2918
2811	2919	object = c->freelist;
2812	2920	page = c->page;
2813		- if (unlikely(!object \|\| !node_match(page, node))) {
	2921	+ if (unlikely(!object \|\| !page \|\| !node_match(page, node))) {
2814	2922	object = __slab_alloc(s, gfpflags, node, addr, c);
2815		- stat(s, ALLOC_SLOWPATH);
2816	2923	} else {
2817	2924	void *next_object = get_freepointer_safe(s, object);
2818	2925
..	..	@@ -2843,24 +2950,23 @@
2843	2950	}
2844	2951
2845	2952	maybe_wipe_obj_freeptr(s, object);
	2953	+ init = slab_want_init_on_alloc(gfpflags, s);
2846	2954
2847		- if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
2848		- memset(object, 0, s->object_size);
2849		-
2850		- slab_post_alloc_hook(s, gfpflags, 1, &object);
	2955	+out:
	2956	+ slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);
2851	2957
2852	2958	return object;
2853	2959	}
2854	2960
2855	2961	static __always_inline void slab_alloc(struct kmem_cache s,
2856		- gfp_t gfpflags, unsigned long addr)
	2962	+ gfp_t gfpflags, unsigned long addr, size_t orig_size)
2857	2963	{
2858		- return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
	2964	+ return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr, orig_size);
2859	2965	}
2860	2966
2861	2967	void kmem_cache_alloc(struct kmem_cache s, gfp_t gfpflags)
2862	2968	{
2863		- void *ret = slab_alloc(s, gfpflags, _RET_IP_);
	2969	+ void *ret = slab_alloc(s, gfpflags, _RET_IP_, s->object_size);
2864	2970
2865	2971	trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
2866	2972	s->size, gfpflags);
..	..	@@ -2872,7 +2978,7 @@
2872	2978	#ifdef CONFIG_TRACING
2873	2979	void kmem_cache_alloc_trace(struct kmem_cache s, gfp_t gfpflags, size_t size)
2874	2980	{
2875		- void *ret = slab_alloc(s, gfpflags, _RET_IP_);
	2981	+ void *ret = slab_alloc(s, gfpflags, _RET_IP_, size);
2876	2982	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2877	2983	ret = kasan_kmalloc(s, ret, size, gfpflags);
2878	2984	return ret;
..	..	@@ -2883,7 +2989,7 @@
2883	2989	#ifdef CONFIG_NUMA
2884	2990	void kmem_cache_alloc_node(struct kmem_cache s, gfp_t gfpflags, int node)
2885	2991	{
2886		- void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
	2992	+ void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, s->object_size);
2887	2993
2888	2994	trace_kmem_cache_alloc_node(_RET_IP_, ret,
2889	2995	s->object_size, s->size, gfpflags, node);
..	..	@@ -2897,7 +3003,7 @@
2897	3003	gfp_t gfpflags,
2898	3004	int node, size_t size)
2899	3005	{
2900		- void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
	3006	+ void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, size);
2901	3007
2902	3008	trace_kmalloc_node(_RET_IP_, ret,
2903	3009	size, s->size, gfpflags, node);
..	..	@@ -2907,7 +3013,7 @@
2907	3013	}
2908	3014	EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2909	3015	#endif
2910		-#endif
	3016	+#endif /* CONFIG_NUMA */
2911	3017
2912	3018	/*
2913	3019	* Slow path handling. This may still be called frequently since objects
..	..	@@ -2927,9 +3033,12 @@
2927	3033	struct page new;
2928	3034	unsigned long counters;
2929	3035	struct kmem_cache_node *n = NULL;
2930		- unsigned long uninitialized_var(flags);
	3036	+ unsigned long flags;
2931	3037
2932	3038	stat(s, FREE_SLOWPATH);
	3039	+
	3040	+ if (kfence_free(head))
	3041	+ return;
2933	3042
2934	3043	if (kmem_cache_debug(s) &&
2935	3044	!free_debug_processing(s, page, head, tail, cnt, addr))
..	..	@@ -2937,7 +3046,7 @@
2937	3046
2938	3047	do {
2939	3048	if (unlikely(n)) {
2940		- raw_spin_unlock_irqrestore(&n->list_lock, flags);
	3049	+ spin_unlock_irqrestore(&n->list_lock, flags);
2941	3050	n = NULL;
2942	3051	}
2943	3052	prior = page->freelist;
..	..	@@ -2969,7 +3078,7 @@
2969	3078	* Otherwise the list_lock will synchronize with
2970	3079	* other processors updating the list of slabs.
2971	3080	*/
2972		- raw_spin_lock_irqsave(&n->list_lock, flags);
	3081	+ spin_lock_irqsave(&n->list_lock, flags);
2973	3082
2974	3083	}
2975	3084	}
..	..	@@ -2981,20 +3090,21 @@
2981	3090
2982	3091	if (likely(!n)) {
2983	3092
2984		- /*
2985		- * If we just froze the page then put it onto the
2986		- * per cpu partial list.
2987		- */
2988		- if (new.frozen && !was_frozen) {
	3093	+ if (likely(was_frozen)) {
	3094	+ /*
	3095	+ * The list lock was not taken therefore no list
	3096	+ * activity can be necessary.
	3097	+ */
	3098	+ stat(s, FREE_FROZEN);
	3099	+ } else if (new.frozen) {
	3100	+ /*
	3101	+ * If we just froze the page then put it onto the
	3102	+ * per cpu partial list.
	3103	+ */
2989	3104	put_cpu_partial(s, page, 1);
2990	3105	stat(s, CPU_PARTIAL_FREE);
2991	3106	}
2992		- /*
2993		- * The list lock was not taken therefore no list
2994		- * activity can be necessary.
2995		- */
2996		- if (was_frozen)
2997		- stat(s, FREE_FROZEN);
	3107	+
2998	3108	return;
2999	3109	}
3000	3110
..	..	@@ -3006,12 +3116,11 @@
3006	3116	* then add it.
3007	3117	*/
3008	3118	if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
3009		- if (kmem_cache_debug(s))
3010		- remove_full(s, n, page);
	3119	+ remove_full(s, n, page);
3011	3120	add_partial(n, page, DEACTIVATE_TO_TAIL);
3012	3121	stat(s, FREE_ADD_PARTIAL);
3013	3122	}
3014		- raw_spin_unlock_irqrestore(&n->list_lock, flags);
	3123	+ spin_unlock_irqrestore(&n->list_lock, flags);
3015	3124	return;
3016	3125
3017	3126	slab_empty:
..	..	@@ -3026,7 +3135,7 @@
3026	3135	remove_full(s, n, page);
3027	3136	}
3028	3137
3029		- raw_spin_unlock_irqrestore(&n->list_lock, flags);
	3138	+ spin_unlock_irqrestore(&n->list_lock, flags);
3030	3139	stat(s, FREE_SLAB);
3031	3140	discard_slab(s, page);
3032	3141	}
..	..	@@ -3053,6 +3162,10 @@
3053	3162	void *tail_obj = tail ? : head;
3054	3163	struct kmem_cache_cpu *c;
3055	3164	unsigned long tid;
	3165	+
	3166	+ /* memcg_slab_free_hook() is already called for bulk free. */
	3167	+ if (!tail)
	3168	+ memcg_slab_free_hook(s, &head, 1);
3056	3169	redo:
3057	3170	/*
3058	3171	* Determine the currently cpus per cpu slab.
..	..	@@ -3063,7 +3176,7 @@
3063	3176	do {
3064	3177	tid = this_cpu_read(s->cpu_slab->tid);
3065	3178	c = raw_cpu_ptr(s->cpu_slab);
3066		- } while (IS_ENABLED(CONFIG_PREEMPT) &&
	3179	+ } while (IS_ENABLED(CONFIG_PREEMPTION) &&
3067	3180	unlikely(tid != READ_ONCE(c->tid)));
3068	3181
3069	3182	/* Same with comment on barrier() in slab_alloc_node() */
..	..	@@ -3173,6 +3286,13 @@
3173	3286	df->s = cache_from_obj(s, object); /* Support for memcg */
3174	3287	}
3175	3288
	3289	+ if (is_kfence_address(object)) {
	3290	+ slab_free_hook(df->s, object, false);
	3291	+ __kfence_free(object);
	3292	+ p[size] = NULL; /* mark object processed */
	3293	+ return size;
	3294	+ }
	3295	+
3176	3296	/* Start new detached freelist */
3177	3297	df->page = page;
3178	3298	set_freepointer(df->s, object, NULL);
..	..	@@ -3214,6 +3334,7 @@
3214	3334	if (WARN_ON(!size))
3215	3335	return;
3216	3336
	3337	+ memcg_slab_free_hook(s, p, size);
3217	3338	do {
3218	3339	struct detached_freelist df;
3219	3340
..	..	@@ -3231,11 +3352,11 @@
3231	3352	void **p)
3232	3353	{
3233	3354	struct kmem_cache_cpu *c;
3234		- LIST_HEAD(to_free);
3235	3355	int i;
	3356	+ struct obj_cgroup *objcg = NULL;
3236	3357
3237	3358	/* memcg and kmem_cache debug support */
3238		- s = slab_pre_alloc_hook(s, flags);
	3359	+ s = slab_pre_alloc_hook(s, &objcg, size, flags);
3239	3360	if (unlikely(!s))
3240	3361	return false;
3241	3362	/*
..	..	@@ -3247,8 +3368,14 @@
3247	3368	c = this_cpu_ptr(s->cpu_slab);
3248	3369
3249	3370	for (i = 0; i < size; i++) {
3250		- void *object = c->freelist;
	3371	+ void *object = kfence_alloc(s, s->object_size, flags);
3251	3372
	3373	+ if (unlikely(object)) {
	3374	+ p[i] = object;
	3375	+ continue;
	3376	+ }
	3377	+
	3378	+ object = c->freelist;
3252	3379	if (unlikely(!object)) {
3253	3380	/*
3254	3381	* We may have removed an object from c->freelist using
..	..	@@ -3264,7 +3391,7 @@
3264	3391	* of re-populating per CPU c->freelist
3265	3392	*/
3266	3393	p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
3267		- _RET_IP_, c, &to_free);
	3394	+ _RET_IP_, c);
3268	3395	if (unlikely(!p[i]))
3269	3396	goto error;
3270	3397
..	..	@@ -3279,23 +3406,17 @@
3279	3406	}
3280	3407	c->tid = next_tid(c->tid);
3281	3408	local_irq_enable();
3282		- free_delayed(&to_free);
3283	3409
3284		- /* Clear memory outside IRQ disabled fastpath loop */
3285		- if (unlikely(slab_want_init_on_alloc(flags, s))) {
3286		- int j;
3287		-
3288		- for (j = 0; j < i; j++)
3289		- memset(p[j], 0, s->object_size);
3290		- }
3291		-
3292		- /* memcg and kmem_cache debug support */
3293		- slab_post_alloc_hook(s, flags, size, p);
	3410	+ /*
	3411	+ * memcg and kmem_cache debug support and memory initialization.
	3412	+ * Done outside of the IRQ disabled fastpath loop.
	3413	+ */
	3414	+ slab_post_alloc_hook(s, objcg, flags, size, p,
	3415	+ slab_want_init_on_alloc(flags, s));
3294	3416	return i;
3295	3417	error:
3296	3418	local_irq_enable();
3297		- free_delayed(&to_free);
3298		- slab_post_alloc_hook(s, flags, i, p);
	3419	+ slab_post_alloc_hook(s, objcg, flags, i, p, false);
3299	3420	__kmem_cache_free_bulk(s, i, p);
3300	3421	return 0;
3301	3422	}
..	..	@@ -3430,7 +3551,7 @@
3430	3551	init_kmem_cache_node(struct kmem_cache_node *n)
3431	3552	{
3432	3553	n->nr_partial = 0;
3433		- raw_spin_lock_init(&n->list_lock);
	3554	+ spin_lock_init(&n->list_lock);
3434	3555	INIT_LIST_HEAD(&n->partial);
3435	3556	#ifdef CONFIG_SLUB_DEBUG
3436	3557	atomic_long_set(&n->nr_slabs, 0);
..	..	@@ -3491,8 +3612,7 @@
3491	3612	init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
3492	3613	init_tracking(kmem_cache_node, n);
3493	3614	#endif
3494		- n = kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
3495		- GFP_KERNEL);
	3615	+ n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false);
3496	3616	page->freelist = get_freepointer(kmem_cache_node, n);
3497	3617	page->inuse = 1;
3498	3618	page->frozen = 0;
..	..	@@ -3580,15 +3700,15 @@
3580	3700	* 50% to keep some capacity around for frees.
3581	3701	*/
3582	3702	if (!kmem_cache_has_cpu_partial(s))
3583		- s->cpu_partial = 0;
	3703	+ slub_set_cpu_partial(s, 0);
3584	3704	else if (s->size >= PAGE_SIZE)
3585		- s->cpu_partial = 2;
	3705	+ slub_set_cpu_partial(s, 2);
3586	3706	else if (s->size >= 1024)
3587		- s->cpu_partial = 6;
	3707	+ slub_set_cpu_partial(s, 6);
3588	3708	else if (s->size >= 256)
3589		- s->cpu_partial = 13;
	3709	+ slub_set_cpu_partial(s, 13);
3590	3710	else
3591		- s->cpu_partial = 30;
	3711	+ slub_set_cpu_partial(s, 30);
3592	3712	#endif
3593	3713	}
3594	3714
..	..	@@ -3633,22 +3753,36 @@
3633	3753
3634	3754	/*
3635	3755	* With that we have determined the number of bytes in actual use
3636		- * by the object. This is the potential offset to the free pointer.
	3756	+ * by the object and redzoning.
3637	3757	*/
3638	3758	s->inuse = size;
3639	3759
3640		- if (((flags & (SLAB_TYPESAFE_BY_RCU \| SLAB_POISON)) \|\|
3641		- s->ctor)) {
	3760	+ if ((flags & (SLAB_TYPESAFE_BY_RCU \| SLAB_POISON)) \|\|
	3761	+ ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) \|\|
	3762	+ s->ctor) {
3642	3763	/*
3643	3764	* Relocate free pointer after the object if it is not
3644	3765	* permitted to overwrite the first word of the object on
3645	3766	* kmem_cache_free.
3646	3767	*
3647	3768	* This is the case if we do RCU, have a constructor or
3648		- * destructor or are poisoning the objects.
	3769	+ * destructor, are poisoning the objects, or are
	3770	+ * redzoning an object smaller than sizeof(void *).
	3771	+ *
	3772	+ * The assumption that s->offset >= s->inuse means free
	3773	+ * pointer is outside of the object is used in the
	3774	+ * freeptr_outside_object() function. If that is no
	3775	+ * longer true, the function needs to be modified.
3649	3776	*/
3650	3777	s->offset = size;
3651	3778	size += sizeof(void *);
	3779	+ } else {
	3780	+ /*
	3781	+ * Store freelist pointer near middle of object to keep
	3782	+ * it away from the edges of the object to avoid small
	3783	+ * sized over/underflows from neighboring allocations.
	3784	+ */
	3785	+ s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *));
3652	3786	}
3653	3787
3654	3788	#ifdef CONFIG_SLUB_DEBUG
..	..	@@ -3685,6 +3819,7 @@
3685	3819	*/
3686	3820	size = ALIGN(size, s->align);
3687	3821	s->size = size;
	3822	+ s->reciprocal_size = reciprocal_value(size);
3688	3823	if (forced_order >= 0)
3689	3824	order = forced_order;
3690	3825	else
..	..	@@ -3719,7 +3854,7 @@
3719	3854
3720	3855	static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
3721	3856	{
3722		- s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
	3857	+ s->flags = kmem_cache_flags(s->size, flags, s->name);
3723	3858	#ifdef CONFIG_SLAB_FREELIST_HARDENED
3724	3859	s->random = get_random_long();
3725	3860	#endif
..	..	@@ -3770,45 +3905,32 @@
3770	3905	if (alloc_kmem_cache_cpus(s))
3771	3906	return 0;
3772	3907
3773		- free_kmem_cache_nodes(s);
3774	3908	error:
3775		- if (flags & SLAB_PANIC)
3776		- panic("Cannot create slab %s size=%u realsize=%u order=%u offset=%u flags=%lx\n",
3777		- s->name, s->size, s->size,
3778		- oo_order(s->oo), s->offset, (unsigned long)flags);
	3909	+ __kmem_cache_release(s);
3779	3910	return -EINVAL;
3780	3911	}
3781	3912
3782	3913	static void list_slab_objects(struct kmem_cache s, struct page page,
3783		- const char *text)
	3914	+ const char *text)
3784	3915	{
3785	3916	#ifdef CONFIG_SLUB_DEBUG
3786		-#ifdef CONFIG_PREEMPT_RT_BASE
3787		- /* XXX move out of irq-off section */
3788		- slab_err(s, page, text, s->name);
3789		-#else
3790		-
3791	3917	void *addr = page_address(page);
	3918	+ unsigned long *map;
3792	3919	void *p;
3793		- unsigned long *map = kcalloc(BITS_TO_LONGS(page->objects),
3794		- sizeof(long),
3795		- GFP_ATOMIC);
3796		- if (!map)
3797		- return;
	3920	+
3798	3921	slab_err(s, page, text, s->name);
3799	3922	slab_lock(page);
3800	3923
3801		- get_map(s, page, map);
	3924	+ map = get_map(s, page);
3802	3925	for_each_object(p, s, addr, page->objects) {
3803	3926
3804		- if (!test_bit(slab_index(p, s, addr), map)) {
	3927	+ if (!test_bit(__obj_to_index(s, addr, p), map)) {
3805	3928	pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
3806	3929	print_tracking(s, p);
3807	3930	}
3808	3931	}
	3932	+ put_map(map);
3809	3933	slab_unlock(page);
3810		- kfree(map);
3811		-#endif
3812	3934	#endif
3813	3935	}
3814	3936
..	..	@@ -3823,19 +3945,19 @@
3823	3945	struct page page, h;
3824	3946
3825	3947	BUG_ON(irqs_disabled());
3826		- raw_spin_lock_irq(&n->list_lock);
3827		- list_for_each_entry_safe(page, h, &n->partial, lru) {
	3948	+ spin_lock_irq(&n->list_lock);
	3949	+ list_for_each_entry_safe(page, h, &n->partial, slab_list) {
3828	3950	if (!page->inuse) {
3829	3951	remove_partial(n, page);
3830		- list_add(&page->lru, &discard);
	3952	+ list_add(&page->slab_list, &discard);
3831	3953	} else {
3832	3954	list_slab_objects(s, page,
3833		- "Objects remaining in %s on __kmem_cache_shutdown()");
	3955	+ "Objects remaining in %s on __kmem_cache_shutdown()");
3834	3956	}
3835	3957	}
3836		- raw_spin_unlock_irq(&n->list_lock);
	3958	+ spin_unlock_irq(&n->list_lock);
3837	3959
3838		- list_for_each_entry_safe(page, h, &discard, lru)
	3960	+ list_for_each_entry_safe(page, h, &discard, slab_list)
3839	3961	discard_slab(s, page);
3840	3962	}
3841	3963
..	..	@@ -3865,7 +3987,6 @@
3865	3987	if (n->nr_partial \|\| slabs_node(s, node))
3866	3988	return 1;
3867	3989	}
3868		- sysfs_slab_remove(s);
3869	3990	return 0;
3870	3991	}
3871	3992
..	..	@@ -3914,7 +4035,7 @@
3914	4035	if (unlikely(ZERO_OR_NULL_PTR(s)))
3915	4036	return s;
3916	4037
3917		- ret = slab_alloc(s, flags, _RET_IP_);
	4038	+ ret = slab_alloc(s, flags, _RET_IP_, size);
3918	4039
3919	4040	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3920	4041
..	..	@@ -3929,11 +4050,15 @@
3929	4050	{
3930	4051	struct page *page;
3931	4052	void *ptr = NULL;
	4053	+ unsigned int order = get_order(size);
3932	4054
3933	4055	flags \|= __GFP_COMP;
3934		- page = alloc_pages_node(node, flags, get_order(size));
3935		- if (page)
	4056	+ page = alloc_pages_node(node, flags, order);
	4057	+ if (page) {
3936	4058	ptr = page_address(page);
	4059	+ mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
	4060	+ PAGE_SIZE << order);
	4061	+ }
3937	4062
3938	4063	return kmalloc_large_node_hook(ptr, size, flags);
3939	4064	}
..	..	@@ -3958,7 +4083,7 @@
3958	4083	if (unlikely(ZERO_OR_NULL_PTR(s)))
3959	4084	return s;
3960	4085
3961		- ret = slab_alloc_node(s, flags, node, _RET_IP_);
	4086	+ ret = slab_alloc_node(s, flags, node, _RET_IP_, size);
3962	4087
3963	4088	trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3964	4089
..	..	@@ -3967,7 +4092,7 @@
3967	4092	return ret;
3968	4093	}
3969	4094	EXPORT_SYMBOL(__kmalloc_node);
3970		-#endif
	4095	+#endif /* CONFIG_NUMA */
3971	4096
3972	4097	#ifdef CONFIG_HARDENED_USERCOPY
3973	4098	/*
..	..	@@ -3984,6 +4109,7 @@
3984	4109	struct kmem_cache *s;
3985	4110	unsigned int offset;
3986	4111	size_t object_size;
	4112	+ bool is_kfence = is_kfence_address(ptr);
3987	4113
3988	4114	ptr = kasan_reset_tag(ptr);
3989	4115
..	..	@@ -3996,10 +4122,13 @@
3996	4122	to_user, 0, n);
3997	4123
3998	4124	/* Find offset within object. */
3999		- offset = (ptr - page_address(page)) % s->size;
	4125	+ if (is_kfence)
	4126	+ offset = ptr - kfence_object_start(ptr);
	4127	+ else
	4128	+ offset = (ptr - page_address(page)) % s->size;
4000	4129
4001	4130	/* Adjust for redzone and reject if within the redzone. */
4002		- if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
	4131	+ if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
4003	4132	if (offset < s->red_left_pad)
4004	4133	usercopy_abort("SLUB object in left red zone",
4005	4134	s->name, to_user, offset, n);
..	..	@@ -4029,7 +4158,7 @@
4029	4158	}
4030	4159	#endif /* CONFIG_HARDENED_USERCOPY */
4031	4160
4032		-static size_t __ksize(const void *object)
	4161	+size_t __ksize(const void *object)
4033	4162	{
4034	4163	struct page *page;
4035	4164
..	..	@@ -4040,22 +4169,12 @@
4040	4169
4041	4170	if (unlikely(!PageSlab(page))) {
4042	4171	WARN_ON(!PageCompound(page));
4043		- return PAGE_SIZE << compound_order(page);
	4172	+ return page_size(page);
4044	4173	}
4045	4174
4046	4175	return slab_ksize(page->slab_cache);
4047	4176	}
4048		-
4049		-size_t ksize(const void *object)
4050		-{
4051		- size_t size = __ksize(object);
4052		- /* We assume that ksize callers could use whole allocated area,
4053		- * so we need to unpoison this area.
4054		- */
4055		- kasan_unpoison_shadow(object, size);
4056		- return size;
4057		-}
4058		-EXPORT_SYMBOL(ksize);
	4177	+EXPORT_SYMBOL(__ksize);
4059	4178
4060	4179	void kfree(const void *x)
4061	4180	{
..	..	@@ -4069,9 +4188,13 @@
4069	4188
4070	4189	page = virt_to_head_page(x);
4071	4190	if (unlikely(!PageSlab(page))) {
	4191	+ unsigned int order = compound_order(page);
	4192	+
4072	4193	BUG_ON(!PageCompound(page));
4073	4194	kfree_hook(object);
4074		- __free_pages(page, compound_order(page));
	4195	+ mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
	4196	+ -(PAGE_SIZE << order));
	4197	+ __free_pages(page, order);
4075	4198	return;
4076	4199	}
4077	4200	slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
..	..	@@ -4107,7 +4230,7 @@
4107	4230	for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
4108	4231	INIT_LIST_HEAD(promote + i);
4109	4232
4110		- raw_spin_lock_irqsave(&n->list_lock, flags);
	4233	+ spin_lock_irqsave(&n->list_lock, flags);
4111	4234
4112	4235	/*
4113	4236	* Build lists of slabs to discard or promote.
..	..	@@ -4115,7 +4238,7 @@
4115	4238	* Note that concurrent frees may occur while we hold the
4116	4239	* list_lock. page->inuse here is the upper limit.
4117	4240	*/
4118		- list_for_each_entry_safe(page, t, &n->partial, lru) {
	4241	+ list_for_each_entry_safe(page, t, &n->partial, slab_list) {
4119	4242	int free = page->objects - page->inuse;
4120	4243
4121	4244	/* Do not reread page->inuse */
..	..	@@ -4125,10 +4248,10 @@
4125	4248	BUG_ON(free <= 0);
4126	4249
4127	4250	if (free == page->objects) {
4128		- list_move(&page->lru, &discard);
	4251	+ list_move(&page->slab_list, &discard);
4129	4252	n->nr_partial--;
4130	4253	} else if (free <= SHRINK_PROMOTE_MAX)
4131		- list_move(&page->lru, promote + free - 1);
	4254	+ list_move(&page->slab_list, promote + free - 1);
4132	4255	}
4133	4256
4134	4257	/*
..	..	@@ -4138,10 +4261,10 @@
4138	4261	for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
4139	4262	list_splice(promote + i, &n->partial);
4140	4263
4141		- raw_spin_unlock_irqrestore(&n->list_lock, flags);
	4264	+ spin_unlock_irqrestore(&n->list_lock, flags);
4142	4265
4143	4266	/* Release empty slabs */
4144		- list_for_each_entry_safe(page, t, &discard, lru)
	4267	+ list_for_each_entry_safe(page, t, &discard, slab_list)
4145	4268	discard_slab(s, page);
4146	4269
4147	4270	if (slabs_node(s, node))
..	..	@@ -4150,42 +4273,6 @@
4150	4273
4151	4274	return ret;
4152	4275	}
4153		-
4154		-#ifdef CONFIG_MEMCG
4155		-static void kmemcg_cache_deact_after_rcu(struct kmem_cache *s)
4156		-{
4157		- /*
4158		- * Called with all the locks held after a sched RCU grace period.
4159		- * Even if @s becomes empty after shrinking, we can't know that @s
4160		- * doesn't have allocations already in-flight and thus can't
4161		- * destroy @s until the associated memcg is released.
4162		- *
4163		- * However, let's remove the sysfs files for empty caches here.
4164		- * Each cache has a lot of interface files which aren't
4165		- * particularly useful for empty draining caches; otherwise, we can
4166		- * easily end up with millions of unnecessary sysfs files on
4167		- * systems which have a lot of memory and transient cgroups.
4168		- */
4169		- if (!__kmem_cache_shrink(s))
4170		- sysfs_slab_remove(s);
4171		-}
4172		-
4173		-void __kmemcg_cache_deactivate(struct kmem_cache *s)
4174		-{
4175		- /*
4176		- * Disable empty slabs caching. Used to avoid pinning offline
4177		- * memory cgroups by kmem pages that can be freed.
4178		- */
4179		- slub_set_cpu_partial(s, 0);
4180		- s->min_partial = 0;
4181		-
4182		- /*
4183		- * s->cpu_partial is checked locklessly (see put_cpu_partial), so
4184		- * we have to make sure the change is visible before shrinking.
4185		- */
4186		- slab_deactivate_memcg_cache_rcu_sched(s, kmemcg_cache_deact_after_rcu);
4187		-}
4188		-#endif
4189	4276
4190	4277	static int slab_mem_going_offline_callback(void *arg)
4191	4278	{
..	..	@@ -4333,17 +4420,15 @@
4333	4420	for_each_kmem_cache_node(s, node, n) {
4334	4421	struct page *p;
4335	4422
4336		- list_for_each_entry(p, &n->partial, lru)
	4423	+ list_for_each_entry(p, &n->partial, slab_list)
4337	4424	p->slab_cache = s;
4338	4425
4339	4426	#ifdef CONFIG_SLUB_DEBUG
4340		- list_for_each_entry(p, &n->full, lru)
	4427	+ list_for_each_entry(p, &n->full, slab_list)
4341	4428	p->slab_cache = s;
4342	4429	#endif
4343	4430	}
4344		- slab_init_memcg_params(s);
4345	4431	list_add(&s->list, &slab_caches);
4346		- memcg_link_cache(s);
4347	4432	return s;
4348	4433	}
4349	4434
..	..	@@ -4351,12 +4436,6 @@
4351	4436	{
4352	4437	static __initdata struct kmem_cache boot_kmem_cache,
4353	4438	boot_kmem_cache_node;
4354		- int cpu;
4355		-
4356		- for_each_possible_cpu(cpu) {
4357		- raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
4358		- INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
4359		- }
4360	4439
4361	4440	if (debug_guardpage_minorder())
4362	4441	slub_max_order = 0;
..	..	@@ -4390,7 +4469,7 @@
4390	4469	cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
4391	4470	slub_cpu_dead);
4392	4471
4393		- pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%d\n",
	4472	+ pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
4394	4473	cache_line_size(),
4395	4474	slub_min_order, slub_max_order, slub_min_objects,
4396	4475	nr_cpu_ids, nr_node_ids);
..	..	@@ -4404,7 +4483,7 @@
4404	4483	__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
4405	4484	slab_flags_t flags, void (ctor)(void ))
4406	4485	{
4407		- struct kmem_cache s, c;
	4486	+ struct kmem_cache *s;
4408	4487
4409	4488	s = find_mergeable(size, align, flags, name, ctor);
4410	4489	if (s) {
..	..	@@ -4416,11 +4495,6 @@
4416	4495	*/
4417	4496	s->object_size = max(s->object_size, size);
4418	4497	s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
4419		-
4420		- for_each_memcg_cache(c, s) {
4421		- c->object_size = s->object_size;
4422		- c->inuse = max(c->inuse, ALIGN(size, sizeof(void *)));
4423		- }
4424	4498
4425	4499	if (sysfs_slab_alias(s, name)) {
4426	4500	s->refcount--;
..	..	@@ -4443,12 +4517,16 @@
4443	4517	if (slab_state <= UP)
4444	4518	return 0;
4445	4519
4446		- memcg_propagate_slab_attrs(s);
4447	4520	err = sysfs_slab_add(s);
4448		- if (err)
	4521	+ if (err) {
4449	4522	__kmem_cache_release(s);
	4523	+ return err;
	4524	+ }
4450	4525
4451		- return err;
	4526	+ if (s->flags & SLAB_STORE_USER)
	4527	+ debugfs_slab_add(s);
	4528	+
	4529	+ return 0;
4452	4530	}
4453	4531
4454	4532	void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
..	..	@@ -4464,7 +4542,7 @@
4464	4542	if (unlikely(ZERO_OR_NULL_PTR(s)))
4465	4543	return s;
4466	4544
4467		- ret = slab_alloc(s, gfpflags, caller);
	4545	+ ret = slab_alloc(s, gfpflags, caller, size);
4468	4546
4469	4547	/* Honor the call site pointer we received. */
4470	4548	trace_kmalloc(caller, ret, size, s->size, gfpflags);
..	..	@@ -4495,7 +4573,7 @@
4495	4573	if (unlikely(ZERO_OR_NULL_PTR(s)))
4496	4574	return s;
4497	4575
4498		- ret = slab_alloc_node(s, gfpflags, node, caller);
	4576	+ ret = slab_alloc_node(s, gfpflags, node, caller, size);
4499	4577
4500	4578	/* Honor the call site pointer we received. */
4501	4579	trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
..	..	@@ -4518,52 +4596,42 @@
4518	4596	#endif
4519	4597
4520	4598	#ifdef CONFIG_SLUB_DEBUG
4521		-static int validate_slab(struct kmem_cache s, struct page page,
4522		- unsigned long *map)
	4599	+static void validate_slab(struct kmem_cache s, struct page page)
4523	4600	{
4524	4601	void *p;
4525	4602	void *addr = page_address(page);
	4603	+ unsigned long *map;
4526	4604
4527		- if (!check_slab(s, page) \|\|
4528		- !on_freelist(s, page, NULL))
4529		- return 0;
	4605	+ slab_lock(page);
	4606	+
	4607	+ if (!check_slab(s, page) \|\| !on_freelist(s, page, NULL))
	4608	+ goto unlock;
4530	4609
4531	4610	/* Now we know that a valid freelist exists */
4532		- bitmap_zero(map, page->objects);
4533		-
4534		- get_map(s, page, map);
	4611	+ map = get_map(s, page);
4535	4612	for_each_object(p, s, addr, page->objects) {
4536		- if (test_bit(slab_index(p, s, addr), map))
4537		- if (!check_object(s, page, p, SLUB_RED_INACTIVE))
4538		- return 0;
	4613	+ u8 val = test_bit(__obj_to_index(s, addr, p), map) ?
	4614	+ SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
	4615	+
	4616	+ if (!check_object(s, page, p, val))
	4617	+ break;
4539	4618	}
4540		-
4541		- for_each_object(p, s, addr, page->objects)
4542		- if (!test_bit(slab_index(p, s, addr), map))
4543		- if (!check_object(s, page, p, SLUB_RED_ACTIVE))
4544		- return 0;
4545		- return 1;
4546		-}
4547		-
4548		-static void validate_slab_slab(struct kmem_cache s, struct page page,
4549		- unsigned long *map)
4550		-{
4551		- slab_lock(page);
4552		- validate_slab(s, page, map);
	4619	+ put_map(map);
	4620	+unlock:
4553	4621	slab_unlock(page);
4554	4622	}
4555	4623
4556	4624	static int validate_slab_node(struct kmem_cache *s,
4557		- struct kmem_cache_node n, unsigned long map)
	4625	+ struct kmem_cache_node *n)
4558	4626	{
4559	4627	unsigned long count = 0;
4560	4628	struct page *page;
4561	4629	unsigned long flags;
4562	4630
4563		- raw_spin_lock_irqsave(&n->list_lock, flags);
	4631	+ spin_lock_irqsave(&n->list_lock, flags);
4564	4632
4565		- list_for_each_entry(page, &n->partial, lru) {
4566		- validate_slab_slab(s, page, map);
	4633	+ list_for_each_entry(page, &n->partial, slab_list) {
	4634	+ validate_slab(s, page);
4567	4635	count++;
4568	4636	}
4569	4637	if (count != n->nr_partial)
..	..	@@ -4573,8 +4641,8 @@
4573	4641	if (!(s->flags & SLAB_STORE_USER))
4574	4642	goto out;
4575	4643
4576		- list_for_each_entry(page, &n->full, lru) {
4577		- validate_slab_slab(s, page, map);
	4644	+ list_for_each_entry(page, &n->full, slab_list) {
	4645	+ validate_slab(s, page);
4578	4646	count++;
4579	4647	}
4580	4648	if (count != atomic_long_read(&n->nr_slabs))
..	..	@@ -4582,7 +4650,7 @@
4582	4650	s->name, count, atomic_long_read(&n->nr_slabs));
4583	4651
4584	4652	out:
4585		- raw_spin_unlock_irqrestore(&n->list_lock, flags);
	4653	+ spin_unlock_irqrestore(&n->list_lock, flags);
4586	4654	return count;
4587	4655	}
4588	4656
..	..	@@ -4590,20 +4658,16 @@
4590	4658	{
4591	4659	int node;
4592	4660	unsigned long count = 0;
4593		- unsigned long *map = kmalloc_array(BITS_TO_LONGS(oo_objects(s->max)),
4594		- sizeof(unsigned long),
4595		- GFP_KERNEL);
4596	4661	struct kmem_cache_node *n;
4597		-
4598		- if (!map)
4599		- return -ENOMEM;
4600	4662
4601	4663	flush_all(s);
4602	4664	for_each_kmem_cache_node(s, node, n)
4603		- count += validate_slab_node(s, n, map);
4604		- kfree(map);
	4665	+ count += validate_slab_node(s, n);
	4666	+
4605	4667	return count;
4606	4668	}
	4669	+
	4670	+#ifdef CONFIG_DEBUG_FS
4607	4671	/*
4608	4672	* Generate lists of code addresses where slabcache objects are allocated
4609	4673	* and freed.
..	..	@@ -4625,7 +4689,10 @@
4625	4689	unsigned long max;
4626	4690	unsigned long count;
4627	4691	struct location *loc;
	4692	+ loff_t idx;
4628	4693	};
	4694	+
	4695	+static struct dentry *slab_debugfs_root;
4629	4696
4630	4697	static void free_loc_track(struct loc_track *t)
4631	4698	{
..	..	@@ -4638,9 +4705,6 @@
4638	4705	{
4639	4706	struct location *l;
4640	4707	int order;
4641		-
4642		- if (IS_ENABLED(CONFIG_PREEMPT_RT) && flags == GFP_ATOMIC)
4643		- return 0;
4644	4708
4645	4709	order = get_order(sizeof(struct location) * max);
4646	4710
..	..	@@ -4735,105 +4799,19 @@
4735	4799
4736	4800	static void process_slab(struct loc_track t, struct kmem_cache s,
4737	4801	struct page *page, enum track_item alloc,
4738		- unsigned long *map)
	4802	+ unsigned long *obj_map)
4739	4803	{
4740	4804	void *addr = page_address(page);
4741	4805	void *p;
4742	4806
4743		- bitmap_zero(map, page->objects);
4744		- get_map(s, page, map);
	4807	+ __fill_map(obj_map, s, page);
4745	4808
4746	4809	for_each_object(p, s, addr, page->objects)
4747		- if (!test_bit(slab_index(p, s, addr), map))
	4810	+ if (!test_bit(__obj_to_index(s, addr, p), obj_map))
4748	4811	add_location(t, s, get_track(s, p, alloc));
4749	4812	}
4750		-
4751		-static int list_locations(struct kmem_cache s, char buf,
4752		- enum track_item alloc)
4753		-{
4754		- int len = 0;
4755		- unsigned long i;
4756		- struct loc_track t = { 0, 0, NULL };
4757		- int node;
4758		- unsigned long *map = kmalloc_array(BITS_TO_LONGS(oo_objects(s->max)),
4759		- sizeof(unsigned long),
4760		- GFP_KERNEL);
4761		- struct kmem_cache_node *n;
4762		-
4763		- if (!map \|\| !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4764		- GFP_KERNEL)) {
4765		- kfree(map);
4766		- return sprintf(buf, "Out of memory\n");
4767		- }
4768		- /* Push back cpu slabs */
4769		- flush_all(s);
4770		-
4771		- for_each_kmem_cache_node(s, node, n) {
4772		- unsigned long flags;
4773		- struct page *page;
4774		-
4775		- if (!atomic_long_read(&n->nr_slabs))
4776		- continue;
4777		-
4778		- raw_spin_lock_irqsave(&n->list_lock, flags);
4779		- list_for_each_entry(page, &n->partial, lru)
4780		- process_slab(&t, s, page, alloc, map);
4781		- list_for_each_entry(page, &n->full, lru)
4782		- process_slab(&t, s, page, alloc, map);
4783		- raw_spin_unlock_irqrestore(&n->list_lock, flags);
4784		- }
4785		-
4786		- for (i = 0; i < t.count; i++) {
4787		- struct location *l = &t.loc[i];
4788		-
4789		- if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4790		- break;
4791		- len += sprintf(buf + len, "%7ld ", l->count);
4792		-
4793		- if (l->addr)
4794		- len += sprintf(buf + len, "%pS", (void *)l->addr);
4795		- else
4796		- len += sprintf(buf + len, "<not-available>");
4797		-
4798		- if (l->sum_time != l->min_time) {
4799		- len += sprintf(buf + len, " age=%ld/%ld/%ld",
4800		- l->min_time,
4801		- (long)div_u64(l->sum_time, l->count),
4802		- l->max_time);
4803		- } else
4804		- len += sprintf(buf + len, " age=%ld",
4805		- l->min_time);
4806		-
4807		- if (l->min_pid != l->max_pid)
4808		- len += sprintf(buf + len, " pid=%ld-%ld",
4809		- l->min_pid, l->max_pid);
4810		- else
4811		- len += sprintf(buf + len, " pid=%ld",
4812		- l->min_pid);
4813		-
4814		- if (num_online_cpus() > 1 &&
4815		- !cpumask_empty(to_cpumask(l->cpus)) &&
4816		- len < PAGE_SIZE - 60)
4817		- len += scnprintf(buf + len, PAGE_SIZE - len - 50,
4818		- " cpus=%*pbl",
4819		- cpumask_pr_args(to_cpumask(l->cpus)));
4820		-
4821		- if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4822		- len < PAGE_SIZE - 60)
4823		- len += scnprintf(buf + len, PAGE_SIZE - len - 50,
4824		- " nodes=%*pbl",
4825		- nodemask_pr_args(&l->nodes));
4826		-
4827		- len += sprintf(buf + len, "\n");
4828		- }
4829		-
4830		- free_loc_track(&t);
4831		- kfree(map);
4832		- if (!t.count)
4833		- len += sprintf(buf, "No data\n");
4834		- return len;
4835		-}
4836		-#endif
	4813	+#endif /* CONFIG_DEBUG_FS */
	4814	+#endif /* CONFIG_SLUB_DEBUG */
4837	4815
4838	4816	#ifdef SLUB_RESILIENCY_TEST
4839	4817	static void __init resiliency_test(void)
..	..	@@ -4893,7 +4871,7 @@
4893	4871	#ifdef CONFIG_SLUB_SYSFS
4894	4872	static void resiliency_test(void) {};
4895	4873	#endif
4896		-#endif
	4874	+#endif /* SLUB_RESILIENCY_TEST */
4897	4875
4898	4876	#ifdef CONFIG_SLUB_SYSFS
4899	4877	enum slab_stat_type {
..	..	@@ -5032,20 +5010,6 @@
5032	5010	return x + sprintf(buf + x, "\n");
5033	5011	}
5034	5012
5035		-#ifdef CONFIG_SLUB_DEBUG
5036		-static int any_slab_objects(struct kmem_cache *s)
5037		-{
5038		- int node;
5039		- struct kmem_cache_node *n;
5040		-
5041		- for_each_kmem_cache_node(s, node, n)
5042		- if (atomic_long_read(&n->total_objects))
5043		- return 1;
5044		-
5045		- return 0;
5046		-}
5047		-#endif
5048		-
5049	5013	#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
5050	5014	#define to_slab(n) container_of(n, struct kmem_cache, kobj)
5051	5015
..	..	@@ -5087,28 +5051,11 @@
5087	5051	}
5088	5052	SLAB_ATTR_RO(objs_per_slab);
5089	5053
5090		-static ssize_t order_store(struct kmem_cache *s,
5091		- const char *buf, size_t length)
5092		-{
5093		- unsigned int order;
5094		- int err;
5095		-
5096		- err = kstrtouint(buf, 10, &order);
5097		- if (err)
5098		- return err;
5099		-
5100		- if (order > slub_max_order \|\| order < slub_min_order)
5101		- return -EINVAL;
5102		-
5103		- calculate_sizes(s, order);
5104		- return length;
5105		-}
5106		-
5107	5054	static ssize_t order_show(struct kmem_cache s, char buf)
5108	5055	{
5109	5056	return sprintf(buf, "%u\n", oo_order(s->oo));
5110	5057	}
5111		-SLAB_ATTR(order);
	5058	+SLAB_ATTR_RO(order);
5112	5059
5113	5060	static ssize_t min_partial_show(struct kmem_cache s, char buf)
5114	5061	{
..	..	@@ -5230,16 +5177,7 @@
5230	5177	{
5231	5178	return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
5232	5179	}
5233		-
5234		-static ssize_t reclaim_account_store(struct kmem_cache *s,
5235		- const char *buf, size_t length)
5236		-{
5237		- s->flags &= ~SLAB_RECLAIM_ACCOUNT;
5238		- if (buf[0] == '1')
5239		- s->flags \|= SLAB_RECLAIM_ACCOUNT;
5240		- return length;
5241		-}
5242		-SLAB_ATTR(reclaim_account);
	5180	+SLAB_ATTR_RO(reclaim_account);
5243	5181
5244	5182	static ssize_t hwcache_align_show(struct kmem_cache s, char buf)
5245	5183	{
..	..	@@ -5284,104 +5222,34 @@
5284	5222	{
5285	5223	return sprintf(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
5286	5224	}
5287		-
5288		-static ssize_t sanity_checks_store(struct kmem_cache *s,
5289		- const char *buf, size_t length)
5290		-{
5291		- s->flags &= ~SLAB_CONSISTENCY_CHECKS;
5292		- if (buf[0] == '1') {
5293		- s->flags &= ~__CMPXCHG_DOUBLE;
5294		- s->flags \|= SLAB_CONSISTENCY_CHECKS;
5295		- }
5296		- return length;
5297		-}
5298		-SLAB_ATTR(sanity_checks);
	5225	+SLAB_ATTR_RO(sanity_checks);
5299	5226
5300	5227	static ssize_t trace_show(struct kmem_cache s, char buf)
5301	5228	{
5302	5229	return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
5303	5230	}
5304		-
5305		-static ssize_t trace_store(struct kmem_cache s, const char buf,
5306		- size_t length)
5307		-{
5308		- /*
5309		- * Tracing a merged cache is going to give confusing results
5310		- * as well as cause other issues like converting a mergeable
5311		- * cache into an umergeable one.
5312		- */
5313		- if (s->refcount > 1)
5314		- return -EINVAL;
5315		-
5316		- s->flags &= ~SLAB_TRACE;
5317		- if (buf[0] == '1') {
5318		- s->flags &= ~__CMPXCHG_DOUBLE;
5319		- s->flags \|= SLAB_TRACE;
5320		- }
5321		- return length;
5322		-}
5323		-SLAB_ATTR(trace);
	5231	+SLAB_ATTR_RO(trace);
5324	5232
5325	5233	static ssize_t red_zone_show(struct kmem_cache s, char buf)
5326	5234	{
5327	5235	return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
5328	5236	}
5329	5237
5330		-static ssize_t red_zone_store(struct kmem_cache *s,
5331		- const char *buf, size_t length)
5332		-{
5333		- if (any_slab_objects(s))
5334		- return -EBUSY;
5335		-
5336		- s->flags &= ~SLAB_RED_ZONE;
5337		- if (buf[0] == '1') {
5338		- s->flags \|= SLAB_RED_ZONE;
5339		- }
5340		- calculate_sizes(s, -1);
5341		- return length;
5342		-}
5343		-SLAB_ATTR(red_zone);
	5238	+SLAB_ATTR_RO(red_zone);
5344	5239
5345	5240	static ssize_t poison_show(struct kmem_cache s, char buf)
5346	5241	{
5347	5242	return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
5348	5243	}
5349	5244
5350		-static ssize_t poison_store(struct kmem_cache *s,
5351		- const char *buf, size_t length)
5352		-{
5353		- if (any_slab_objects(s))
5354		- return -EBUSY;
5355		-
5356		- s->flags &= ~SLAB_POISON;
5357		- if (buf[0] == '1') {
5358		- s->flags \|= SLAB_POISON;
5359		- }
5360		- calculate_sizes(s, -1);
5361		- return length;
5362		-}
5363		-SLAB_ATTR(poison);
	5245	+SLAB_ATTR_RO(poison);
5364	5246
5365	5247	static ssize_t store_user_show(struct kmem_cache s, char buf)
5366	5248	{
5367	5249	return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
5368	5250	}
5369	5251
5370		-static ssize_t store_user_store(struct kmem_cache *s,
5371		- const char *buf, size_t length)
5372		-{
5373		- if (any_slab_objects(s))
5374		- return -EBUSY;
5375		-
5376		- s->flags &= ~SLAB_STORE_USER;
5377		- if (buf[0] == '1') {
5378		- s->flags &= ~__CMPXCHG_DOUBLE;
5379		- s->flags \|= SLAB_STORE_USER;
5380		- }
5381		- calculate_sizes(s, -1);
5382		- return length;
5383		-}
5384		-SLAB_ATTR(store_user);
	5252	+SLAB_ATTR_RO(store_user);
5385	5253
5386	5254	static ssize_t validate_show(struct kmem_cache s, char buf)
5387	5255	{
..	..	@@ -5402,21 +5270,6 @@
5402	5270	}
5403	5271	SLAB_ATTR(validate);
5404	5272
5405		-static ssize_t alloc_calls_show(struct kmem_cache s, char buf)
5406		-{
5407		- if (!(s->flags & SLAB_STORE_USER))
5408		- return -ENOSYS;
5409		- return list_locations(s, buf, TRACK_ALLOC);
5410		-}
5411		-SLAB_ATTR_RO(alloc_calls);
5412		-
5413		-static ssize_t free_calls_show(struct kmem_cache s, char buf)
5414		-{
5415		- if (!(s->flags & SLAB_STORE_USER))
5416		- return -ENOSYS;
5417		- return list_locations(s, buf, TRACK_FREE);
5418		-}
5419		-SLAB_ATTR_RO(free_calls);
5420	5273	#endif /* CONFIG_SLUB_DEBUG */
5421	5274
5422	5275	#ifdef CONFIG_FAILSLAB
..	..	@@ -5424,19 +5277,7 @@
5424	5277	{
5425	5278	return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
5426	5279	}
5427		-
5428		-static ssize_t failslab_store(struct kmem_cache s, const char buf,
5429		- size_t length)
5430		-{
5431		- if (s->refcount > 1)
5432		- return -EINVAL;
5433		-
5434		- s->flags &= ~SLAB_FAILSLAB;
5435		- if (buf[0] == '1')
5436		- s->flags \|= SLAB_FAILSLAB;
5437		- return length;
5438		-}
5439		-SLAB_ATTR(failslab);
	5280	+SLAB_ATTR_RO(failslab);
5440	5281	#endif
5441	5282
5442	5283	static ssize_t shrink_show(struct kmem_cache s, char buf)
..	..	@@ -5559,7 +5400,7 @@
5559	5400	STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5560	5401	STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5561	5402	STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5562		-#endif
	5403	+#endif /* CONFIG_SLUB_STATS */
5563	5404
5564	5405	static struct attribute *slab_attrs[] = {
5565	5406	&slab_size_attr.attr,
..	..	@@ -5589,8 +5430,6 @@
5589	5430	&poison_attr.attr,
5590	5431	&store_user_attr.attr,
5591	5432	&validate_attr.attr,
5592		- &alloc_calls_attr.attr,
5593		- &free_calls_attr.attr,
5594	5433	#endif
5595	5434	#ifdef CONFIG_ZONE_DMA
5596	5435	&cache_dma_attr.attr,
..	..	@@ -5672,96 +5511,7 @@
5672	5511	return -EIO;
5673	5512
5674	5513	err = attribute->store(s, buf, len);
5675		-#ifdef CONFIG_MEMCG
5676		- if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
5677		- struct kmem_cache *c;
5678		-
5679		- mutex_lock(&slab_mutex);
5680		- if (s->max_attr_size < len)
5681		- s->max_attr_size = len;
5682		-
5683		- /*
5684		- * This is a best effort propagation, so this function's return
5685		- * value will be determined by the parent cache only. This is
5686		- * basically because not all attributes will have a well
5687		- * defined semantics for rollbacks - most of the actions will
5688		- * have permanent effects.
5689		- *
5690		- * Returning the error value of any of the children that fail
5691		- * is not 100 % defined, in the sense that users seeing the
5692		- * error code won't be able to know anything about the state of
5693		- * the cache.
5694		- *
5695		- * Only returning the error code for the parent cache at least
5696		- * has well defined semantics. The cache being written to
5697		- * directly either failed or succeeded, in which case we loop
5698		- * through the descendants with best-effort propagation.
5699		- */
5700		- for_each_memcg_cache(c, s)
5701		- attribute->store(c, buf, len);
5702		- mutex_unlock(&slab_mutex);
5703		- }
5704		-#endif
5705	5514	return err;
5706		-}
5707		-
5708		-static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5709		-{
5710		-#ifdef CONFIG_MEMCG
5711		- int i;
5712		- char *buffer = NULL;
5713		- struct kmem_cache *root_cache;
5714		-
5715		- if (is_root_cache(s))
5716		- return;
5717		-
5718		- root_cache = s->memcg_params.root_cache;
5719		-
5720		- /*
5721		- * This mean this cache had no attribute written. Therefore, no point
5722		- * in copying default values around
5723		- */
5724		- if (!root_cache->max_attr_size)
5725		- return;
5726		-
5727		- for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
5728		- char mbuf[64];
5729		- char *buf;
5730		- struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
5731		- ssize_t len;
5732		-
5733		- if (!attr \|\| !attr->store \|\| !attr->show)
5734		- continue;
5735		-
5736		- /*
5737		- * It is really bad that we have to allocate here, so we will
5738		- * do it only as a fallback. If we actually allocate, though,
5739		- * we can just use the allocated buffer until the end.
5740		- *
5741		- * Most of the slub attributes will tend to be very small in
5742		- * size, but sysfs allows buffers up to a page, so they can
5743		- * theoretically happen.
5744		- */
5745		- if (buffer)
5746		- buf = buffer;
5747		- else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf) &&
5748		- !IS_ENABLED(CONFIG_SLUB_STATS))
5749		- buf = mbuf;
5750		- else {
5751		- buffer = (char *) get_zeroed_page(GFP_KERNEL);
5752		- if (WARN_ON(!buffer))
5753		- continue;
5754		- buf = buffer;
5755		- }
5756		-
5757		- len = attr->show(root_cache, buf);
5758		- if (len > 0)
5759		- attr->store(s, buf, len);
5760		- }
5761		-
5762		- if (buffer)
5763		- free_page((unsigned long)buffer);
5764		-#endif
5765	5515	}
5766	5516
5767	5517	static void kmem_cache_release(struct kobject *k)
..	..	@@ -5779,27 +5529,10 @@
5779	5529	.release = kmem_cache_release,
5780	5530	};
5781	5531
5782		-static int uevent_filter(struct kset kset, struct kobject kobj)
5783		-{
5784		- struct kobj_type *ktype = get_ktype(kobj);
5785		-
5786		- if (ktype == &slab_ktype)
5787		- return 1;
5788		- return 0;
5789		-}
5790		-
5791		-static const struct kset_uevent_ops slab_uevent_ops = {
5792		- .filter = uevent_filter,
5793		-};
5794		-
5795	5532	static struct kset *slab_kset;
5796	5533
5797	5534	static inline struct kset cache_kset(struct kmem_cache s)
5798	5535	{
5799		-#ifdef CONFIG_MEMCG
5800		- if (!is_root_cache(s))
5801		- return s->memcg_params.root_cache->memcg_kset;
5802		-#endif
5803	5536	return slab_kset;
5804	5537	}
5805	5538
..	..	@@ -5814,7 +5547,8 @@
5814	5547	char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5815	5548	char *p = name;
5816	5549
5817		- BUG_ON(!name);
	5550	+ if (!name)
	5551	+ return ERR_PTR(-ENOMEM);
5818	5552
5819	5553	*p++ = ':';
5820	5554	/*
..	..	@@ -5842,36 +5576,12 @@
5842	5576	return name;
5843	5577	}
5844	5578
5845		-static void sysfs_slab_remove_workfn(struct work_struct *work)
5846		-{
5847		- struct kmem_cache *s =
5848		- container_of(work, struct kmem_cache, kobj_remove_work);
5849		-
5850		- if (!s->kobj.state_in_sysfs)
5851		- /*
5852		- * For a memcg cache, this may be called during
5853		- * deactivation and again on shutdown. Remove only once.
5854		- * A cache is never shut down before deactivation is
5855		- * complete, so no need to worry about synchronization.
5856		- */
5857		- goto out;
5858		-
5859		-#ifdef CONFIG_MEMCG
5860		- kset_unregister(s->memcg_kset);
5861		-#endif
5862		- kobject_uevent(&s->kobj, KOBJ_REMOVE);
5863		-out:
5864		- kobject_put(&s->kobj);
5865		-}
5866		-
5867	5579	static int sysfs_slab_add(struct kmem_cache *s)
5868	5580	{
5869	5581	int err;
5870	5582	const char *name;
5871	5583	struct kset *kset = cache_kset(s);
5872	5584	int unmergeable = slab_unmergeable(s);
5873		-
5874		- INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
5875	5585
5876	5586	if (!kset) {
5877	5587	kobject_init(&s->kobj, &slab_ktype);
..	..	@@ -5896,6 +5606,8 @@
5896	5606	* for the symlinks.
5897	5607	*/
5898	5608	name = create_unique_id(s);
	5609	+ if (IS_ERR(name))
	5610	+ return PTR_ERR(name);
5899	5611	}
5900	5612
5901	5613	s->kobj.kset = kset;
..	..	@@ -5907,17 +5619,6 @@
5907	5619	if (err)
5908	5620	goto out_del_kobj;
5909	5621
5910		-#ifdef CONFIG_MEMCG
5911		- if (is_root_cache(s) && memcg_sysfs_enabled) {
5912		- s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
5913		- if (!s->memcg_kset) {
5914		- err = -ENOMEM;
5915		- goto out_del_kobj;
5916		- }
5917		- }
5918		-#endif
5919		-
5920		- kobject_uevent(&s->kobj, KOBJ_ADD);
5921	5622	if (!unmergeable) {
5922	5623	/* Setup first alias */
5923	5624	sysfs_slab_alias(s, s->name);
..	..	@@ -5929,19 +5630,6 @@
5929	5630	out_del_kobj:
5930	5631	kobject_del(&s->kobj);
5931	5632	goto out;
5932		-}
5933		-
5934		-static void sysfs_slab_remove(struct kmem_cache *s)
5935		-{
5936		- if (slab_state < FULL)
5937		- /*
5938		- * Sysfs has not been setup yet so no need to remove the
5939		- * cache from sysfs.
5940		- */
5941		- return;
5942		-
5943		- kobject_get(&s->kobj);
5944		- schedule_work(&s->kobj_remove_work);
5945	5633	}
5946	5634
5947	5635	void sysfs_slab_unlink(struct kmem_cache *s)
..	..	@@ -5998,7 +5686,7 @@
5998	5686
5999	5687	mutex_lock(&slab_mutex);
6000	5688
6001		- slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
	5689	+ slab_kset = kset_create_and_add("slab", NULL, kernel_kobj);
6002	5690	if (!slab_kset) {
6003	5691	mutex_unlock(&slab_mutex);
6004	5692	pr_err("Cannot register slab subsystem.\n");
..	..	@@ -6033,6 +5721,189 @@
6033	5721	__initcall(slab_sysfs_init);
6034	5722	#endif /* CONFIG_SLUB_SYSFS */
6035	5723
	5724	+#if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS)
	5725	+static int slab_debugfs_show(struct seq_file seq, void v)
	5726	+{
	5727	+ struct loc_track *t = seq->private;
	5728	+ struct location *l;
	5729	+ unsigned long idx;
	5730	+
	5731	+ idx = (unsigned long) t->idx;
	5732	+ if (idx < t->count) {
	5733	+ l = &t->loc[idx];
	5734	+
	5735	+ seq_printf(seq, "%7ld ", l->count);
	5736	+
	5737	+ if (l->addr)
	5738	+ seq_printf(seq, "%pS", (void *)l->addr);
	5739	+ else
	5740	+ seq_puts(seq, "<not-available>");
	5741	+
	5742	+ if (l->sum_time != l->min_time) {
	5743	+ seq_printf(seq, " age=%ld/%llu/%ld",
	5744	+ l->min_time, div_u64(l->sum_time, l->count),
	5745	+ l->max_time);
	5746	+ } else
	5747	+ seq_printf(seq, " age=%ld", l->min_time);
	5748	+
	5749	+ if (l->min_pid != l->max_pid)
	5750	+ seq_printf(seq, " pid=%ld-%ld", l->min_pid, l->max_pid);
	5751	+ else
	5752	+ seq_printf(seq, " pid=%ld",
	5753	+ l->min_pid);
	5754	+
	5755	+ if (num_online_cpus() > 1 && !cpumask_empty(to_cpumask(l->cpus)))
	5756	+ seq_printf(seq, " cpus=%*pbl",
	5757	+ cpumask_pr_args(to_cpumask(l->cpus)));
	5758	+
	5759	+ if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
	5760	+ seq_printf(seq, " nodes=%*pbl",
	5761	+ nodemask_pr_args(&l->nodes));
	5762	+
	5763	+ seq_puts(seq, "\n");
	5764	+ }
	5765	+
	5766	+ if (!idx && !t->count)
	5767	+ seq_puts(seq, "No data\n");
	5768	+
	5769	+ return 0;
	5770	+}
	5771	+
	5772	+static void slab_debugfs_stop(struct seq_file seq, void v)
	5773	+{
	5774	+}
	5775	+
	5776	+static void slab_debugfs_next(struct seq_file seq, void v, loff_t ppos)
	5777	+{
	5778	+ struct loc_track *t = seq->private;
	5779	+
	5780	+ t->idx = ++(*ppos);
	5781	+ if (*ppos <= t->count)
	5782	+ return ppos;
	5783	+
	5784	+ return NULL;
	5785	+}
	5786	+
	5787	+static void slab_debugfs_start(struct seq_file seq, loff_t *ppos)
	5788	+{
	5789	+ struct loc_track *t = seq->private;
	5790	+
	5791	+ t->idx = *ppos;
	5792	+ return ppos;
	5793	+}
	5794	+
	5795	+static const struct seq_operations slab_debugfs_sops = {
	5796	+ .start = slab_debugfs_start,
	5797	+ .next = slab_debugfs_next,
	5798	+ .stop = slab_debugfs_stop,
	5799	+ .show = slab_debugfs_show,
	5800	+};
	5801	+
	5802	+static int slab_debug_trace_open(struct inode inode, struct file filep)
	5803	+{
	5804	+
	5805	+ struct kmem_cache_node *n;
	5806	+ enum track_item alloc;
	5807	+ int node;
	5808	+ struct loc_track *t = __seq_open_private(filep, &slab_debugfs_sops,
	5809	+ sizeof(struct loc_track));
	5810	+ struct kmem_cache *s = file_inode(filep)->i_private;
	5811	+ unsigned long *obj_map;
	5812	+
	5813	+ if (!t)
	5814	+ return -ENOMEM;
	5815	+
	5816	+ obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL);
	5817	+ if (!obj_map) {
	5818	+ seq_release_private(inode, filep);
	5819	+ return -ENOMEM;
	5820	+ }
	5821	+
	5822	+ if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0)
	5823	+ alloc = TRACK_ALLOC;
	5824	+ else
	5825	+ alloc = TRACK_FREE;
	5826	+
	5827	+ if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) {
	5828	+ bitmap_free(obj_map);
	5829	+ seq_release_private(inode, filep);
	5830	+ return -ENOMEM;
	5831	+ }
	5832	+
	5833	+ /* Push back cpu slabs */
	5834	+ flush_all(s);
	5835	+
	5836	+ for_each_kmem_cache_node(s, node, n) {
	5837	+ unsigned long flags;
	5838	+ struct page *page;
	5839	+
	5840	+ if (!atomic_long_read(&n->nr_slabs))
	5841	+ continue;
	5842	+
	5843	+ spin_lock_irqsave(&n->list_lock, flags);
	5844	+ list_for_each_entry(page, &n->partial, slab_list)
	5845	+ process_slab(t, s, page, alloc, obj_map);
	5846	+ list_for_each_entry(page, &n->full, slab_list)
	5847	+ process_slab(t, s, page, alloc, obj_map);
	5848	+ spin_unlock_irqrestore(&n->list_lock, flags);
	5849	+ }
	5850	+
	5851	+ bitmap_free(obj_map);
	5852	+ return 0;
	5853	+}
	5854	+
	5855	+static int slab_debug_trace_release(struct inode inode, struct file file)
	5856	+{
	5857	+ struct seq_file *seq = file->private_data;
	5858	+ struct loc_track *t = seq->private;
	5859	+
	5860	+ free_loc_track(t);
	5861	+ return seq_release_private(inode, file);
	5862	+}
	5863	+
	5864	+static const struct file_operations slab_debugfs_fops = {
	5865	+ .open = slab_debug_trace_open,
	5866	+ .read = seq_read,
	5867	+ .llseek = seq_lseek,
	5868	+ .release = slab_debug_trace_release,
	5869	+};
	5870	+
	5871	+static void debugfs_slab_add(struct kmem_cache *s)
	5872	+{
	5873	+ struct dentry *slab_cache_dir;
	5874	+
	5875	+ if (unlikely(!slab_debugfs_root))
	5876	+ return;
	5877	+
	5878	+ slab_cache_dir = debugfs_create_dir(s->name, slab_debugfs_root);
	5879	+
	5880	+ debugfs_create_file("alloc_traces", 0400,
	5881	+ slab_cache_dir, s, &slab_debugfs_fops);
	5882	+
	5883	+ debugfs_create_file("free_traces", 0400,
	5884	+ slab_cache_dir, s, &slab_debugfs_fops);
	5885	+}
	5886	+
	5887	+void debugfs_slab_release(struct kmem_cache *s)
	5888	+{
	5889	+ debugfs_remove_recursive(debugfs_lookup(s->name, slab_debugfs_root));
	5890	+}
	5891	+
	5892	+static int __init slab_debugfs_init(void)
	5893	+{
	5894	+ struct kmem_cache *s;
	5895	+
	5896	+ slab_debugfs_root = debugfs_create_dir("slab", NULL);
	5897	+
	5898	+ list_for_each_entry(s, &slab_caches, list)
	5899	+ if (s->flags & SLAB_STORE_USER)
	5900	+ debugfs_slab_add(s);
	5901	+
	5902	+ return 0;
	5903	+
	5904	+}
	5905	+__initcall(slab_debugfs_init);
	5906	+#endif
6036	5907	/*
6037	5908	* The /proc/slabinfo ABI
6038	5909	*/
..	..	@@ -6058,6 +5929,7 @@
6058	5929	sinfo->objects_per_slab = oo_objects(s->oo);
6059	5930	sinfo->cache_order = oo_order(s->oo);
6060	5931	}
	5932	+EXPORT_SYMBOL_GPL(get_slabinfo);
6061	5933
6062	5934	void slabinfo_show_stats(struct seq_file m, struct kmem_cache s)
6063	5935	{