hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/mm/slab_common.c
....@@ -12,11 +12,14 @@
1212 #include <linux/memory.h>
1313 #include <linux/cache.h>
1414 #include <linux/compiler.h>
15
+#include <linux/kfence.h>
1516 #include <linux/module.h>
1617 #include <linux/cpu.h>
1718 #include <linux/uaccess.h>
1819 #include <linux/seq_file.h>
1920 #include <linux/proc_fs.h>
21
+#include <linux/debugfs.h>
22
+#include <linux/kasan.h>
2023 #include <asm/cacheflush.h>
2124 #include <asm/tlbflush.h>
2225 #include <asm/page.h>
....@@ -24,6 +27,9 @@
2427
2528 #define CREATE_TRACE_POINTS
2629 #include <trace/events/kmem.h>
30
+#undef CREATE_TRACE_POINTS
31
+#include <trace/hooks/mm.h>
32
+#include "internal.h"
2733
2834 #include "slab.h"
2935
....@@ -50,7 +56,7 @@
5056 */
5157 #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
5258 SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
53
- SLAB_FAILSLAB | SLAB_KASAN)
59
+ SLAB_FAILSLAB | kasan_never_merge())
5460
5561 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
5662 SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
....@@ -84,8 +90,7 @@
8490 #ifdef CONFIG_DEBUG_VM
8591 static int kmem_cache_sanity_check(const char *name, unsigned int size)
8692 {
87
- if (!name || in_interrupt() || size < sizeof(void *) ||
88
- size > KMALLOC_MAX_SIZE) {
93
+ if (!name || in_interrupt() || size > KMALLOC_MAX_SIZE) {
8994 pr_err("kmem_cache_create(%s) integrity check failed\n", name);
9095 return -EINVAL;
9196 }
....@@ -127,138 +132,6 @@
127132 return i;
128133 }
129134
130
-#ifdef CONFIG_MEMCG_KMEM
131
-
132
-LIST_HEAD(slab_root_caches);
133
-static DEFINE_SPINLOCK(memcg_kmem_wq_lock);
134
-
135
-void slab_init_memcg_params(struct kmem_cache *s)
136
-{
137
- s->memcg_params.root_cache = NULL;
138
- RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
139
- INIT_LIST_HEAD(&s->memcg_params.children);
140
- s->memcg_params.dying = false;
141
-}
142
-
143
-static int init_memcg_params(struct kmem_cache *s,
144
- struct mem_cgroup *memcg, struct kmem_cache *root_cache)
145
-{
146
- struct memcg_cache_array *arr;
147
-
148
- if (root_cache) {
149
- s->memcg_params.root_cache = root_cache;
150
- s->memcg_params.memcg = memcg;
151
- INIT_LIST_HEAD(&s->memcg_params.children_node);
152
- INIT_LIST_HEAD(&s->memcg_params.kmem_caches_node);
153
- return 0;
154
- }
155
-
156
- slab_init_memcg_params(s);
157
-
158
- if (!memcg_nr_cache_ids)
159
- return 0;
160
-
161
- arr = kvzalloc(sizeof(struct memcg_cache_array) +
162
- memcg_nr_cache_ids * sizeof(void *),
163
- GFP_KERNEL);
164
- if (!arr)
165
- return -ENOMEM;
166
-
167
- RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
168
- return 0;
169
-}
170
-
171
-static void destroy_memcg_params(struct kmem_cache *s)
172
-{
173
- if (is_root_cache(s))
174
- kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
175
-}
176
-
177
-static void free_memcg_params(struct rcu_head *rcu)
178
-{
179
- struct memcg_cache_array *old;
180
-
181
- old = container_of(rcu, struct memcg_cache_array, rcu);
182
- kvfree(old);
183
-}
184
-
185
-static int update_memcg_params(struct kmem_cache *s, int new_array_size)
186
-{
187
- struct memcg_cache_array *old, *new;
188
-
189
- new = kvzalloc(sizeof(struct memcg_cache_array) +
190
- new_array_size * sizeof(void *), GFP_KERNEL);
191
- if (!new)
192
- return -ENOMEM;
193
-
194
- old = rcu_dereference_protected(s->memcg_params.memcg_caches,
195
- lockdep_is_held(&slab_mutex));
196
- if (old)
197
- memcpy(new->entries, old->entries,
198
- memcg_nr_cache_ids * sizeof(void *));
199
-
200
- rcu_assign_pointer(s->memcg_params.memcg_caches, new);
201
- if (old)
202
- call_rcu(&old->rcu, free_memcg_params);
203
- return 0;
204
-}
205
-
206
-int memcg_update_all_caches(int num_memcgs)
207
-{
208
- struct kmem_cache *s;
209
- int ret = 0;
210
-
211
- mutex_lock(&slab_mutex);
212
- list_for_each_entry(s, &slab_root_caches, root_caches_node) {
213
- ret = update_memcg_params(s, num_memcgs);
214
- /*
215
- * Instead of freeing the memory, we'll just leave the caches
216
- * up to this point in an updated state.
217
- */
218
- if (ret)
219
- break;
220
- }
221
- mutex_unlock(&slab_mutex);
222
- return ret;
223
-}
224
-
225
-void memcg_link_cache(struct kmem_cache *s)
226
-{
227
- if (is_root_cache(s)) {
228
- list_add(&s->root_caches_node, &slab_root_caches);
229
- } else {
230
- list_add(&s->memcg_params.children_node,
231
- &s->memcg_params.root_cache->memcg_params.children);
232
- list_add(&s->memcg_params.kmem_caches_node,
233
- &s->memcg_params.memcg->kmem_caches);
234
- }
235
-}
236
-
237
-static void memcg_unlink_cache(struct kmem_cache *s)
238
-{
239
- if (is_root_cache(s)) {
240
- list_del(&s->root_caches_node);
241
- } else {
242
- list_del(&s->memcg_params.children_node);
243
- list_del(&s->memcg_params.kmem_caches_node);
244
- }
245
-}
246
-#else
247
-static inline int init_memcg_params(struct kmem_cache *s,
248
- struct mem_cgroup *memcg, struct kmem_cache *root_cache)
249
-{
250
- return 0;
251
-}
252
-
253
-static inline void destroy_memcg_params(struct kmem_cache *s)
254
-{
255
-}
256
-
257
-static inline void memcg_unlink_cache(struct kmem_cache *s)
258
-{
259
-}
260
-#endif /* CONFIG_MEMCG_KMEM */
261
-
262135 /*
263136 * Figure out what the alignment of the objects will be given a set of
264137 * flags, a user specified alignment and the size of the objects.
....@@ -282,8 +155,7 @@
282155 align = max(align, ralign);
283156 }
284157
285
- if (align < ARCH_SLAB_MINALIGN)
286
- align = ARCH_SLAB_MINALIGN;
158
+ align = max(align, arch_slab_minalign());
287159
288160 return ALIGN(align, sizeof(void *));
289161 }
....@@ -294,9 +166,6 @@
294166 int slab_unmergeable(struct kmem_cache *s)
295167 {
296168 if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
297
- return 1;
298
-
299
- if (!is_root_cache(s))
300169 return 1;
301170
302171 if (s->ctor)
....@@ -328,12 +197,12 @@
328197 size = ALIGN(size, sizeof(void *));
329198 align = calculate_alignment(flags, align, size);
330199 size = ALIGN(size, align);
331
- flags = kmem_cache_flags(size, flags, name, NULL);
200
+ flags = kmem_cache_flags(size, flags, name);
332201
333202 if (flags & SLAB_NEVER_MERGE)
334203 return NULL;
335204
336
- list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) {
205
+ list_for_each_entry_reverse(s, &slab_caches, list) {
337206 if (slab_unmergeable(s))
338207 continue;
339208
....@@ -365,7 +234,7 @@
365234 unsigned int object_size, unsigned int align,
366235 slab_flags_t flags, unsigned int useroffset,
367236 unsigned int usersize, void (*ctor)(void *),
368
- struct mem_cgroup *memcg, struct kmem_cache *root_cache)
237
+ struct kmem_cache *root_cache)
369238 {
370239 struct kmem_cache *s;
371240 int err;
....@@ -385,30 +254,25 @@
385254 s->useroffset = useroffset;
386255 s->usersize = usersize;
387256
388
- err = init_memcg_params(s, memcg, root_cache);
389
- if (err)
390
- goto out_free_cache;
391
-
392257 err = __kmem_cache_create(s, flags);
393258 if (err)
394259 goto out_free_cache;
395260
396261 s->refcount = 1;
397262 list_add(&s->list, &slab_caches);
398
- memcg_link_cache(s);
399263 out:
400264 if (err)
401265 return ERR_PTR(err);
402266 return s;
403267
404268 out_free_cache:
405
- destroy_memcg_params(s);
406269 kmem_cache_free(kmem_cache, s);
407270 goto out;
408271 }
409272
410
-/*
411
- * kmem_cache_create_usercopy - Create a cache.
273
+/**
274
+ * kmem_cache_create_usercopy - Create a cache with a region suitable
275
+ * for copying to userspace
412276 * @name: A string which is used in /proc/slabinfo to identify this cache.
413277 * @size: The size of objects to be created in this cache.
414278 * @align: The required alignment for the objects.
....@@ -417,7 +281,6 @@
417281 * @usersize: Usercopy region size
418282 * @ctor: A constructor for the objects.
419283 *
420
- * Returns a ptr to the cache on success, NULL on failure.
421284 * Cannot be called within a interrupt, but can be interrupted.
422285 * The @ctor is run when new pages are allocated by the cache.
423286 *
....@@ -426,12 +289,14 @@
426289 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
427290 * to catch references to uninitialised memory.
428291 *
429
- * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
292
+ * %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check
430293 * for buffer overruns.
431294 *
432295 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
433296 * cacheline. This can be beneficial if you're counting cycles as closely
434297 * as davem.
298
+ *
299
+ * Return: a pointer to the cache on success, NULL on failure.
435300 */
436301 struct kmem_cache *
437302 kmem_cache_create_usercopy(const char *name,
....@@ -446,7 +311,16 @@
446311
447312 get_online_cpus();
448313 get_online_mems();
449
- memcg_get_cache_ids();
314
+
315
+#ifdef CONFIG_SLUB_DEBUG
316
+ /*
317
+ * If no slub_debug was enabled globally, the static key is not yet
318
+ * enabled by setup_slub_debug(). Enable it if the cache is being
319
+ * created with any of the debugging flags passed explicitly.
320
+ */
321
+ if (flags & SLAB_DEBUG_FLAGS)
322
+ static_branch_enable(&slub_debug_enabled);
323
+#endif
450324
451325 mutex_lock(&slab_mutex);
452326
....@@ -487,7 +361,7 @@
487361
488362 s = create_cache(cache_name, size,
489363 calculate_alignment(flags, align, size),
490
- flags, useroffset, usersize, ctor, NULL, NULL);
364
+ flags, useroffset, usersize, ctor, NULL);
491365 if (IS_ERR(s)) {
492366 err = PTR_ERR(s);
493367 kfree_const(cache_name);
....@@ -496,7 +370,6 @@
496370 out_unlock:
497371 mutex_unlock(&slab_mutex);
498372
499
- memcg_put_cache_ids();
500373 put_online_mems();
501374 put_online_cpus();
502375
....@@ -515,6 +388,31 @@
515388 }
516389 EXPORT_SYMBOL(kmem_cache_create_usercopy);
517390
391
+/**
392
+ * kmem_cache_create - Create a cache.
393
+ * @name: A string which is used in /proc/slabinfo to identify this cache.
394
+ * @size: The size of objects to be created in this cache.
395
+ * @align: The required alignment for the objects.
396
+ * @flags: SLAB flags
397
+ * @ctor: A constructor for the objects.
398
+ *
399
+ * Cannot be called within a interrupt, but can be interrupted.
400
+ * The @ctor is run when new pages are allocated by the cache.
401
+ *
402
+ * The flags are
403
+ *
404
+ * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
405
+ * to catch references to uninitialised memory.
406
+ *
407
+ * %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check
408
+ * for buffer overruns.
409
+ *
410
+ * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
411
+ * cacheline. This can be beneficial if you're counting cycles as closely
412
+ * as davem.
413
+ *
414
+ * Return: a pointer to the cache on success, NULL on failure.
415
+ */
518416 struct kmem_cache *
519417 kmem_cache_create(const char *name, unsigned int size, unsigned int align,
520418 slab_flags_t flags, void (*ctor)(void *))
....@@ -532,7 +430,7 @@
532430 /*
533431 * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
534432 * @slab_caches_to_rcu_destroy list. The slab pages are freed
535
- * through RCU and and the associated kmem_cache are dereferenced
433
+ * through RCU and the associated kmem_cache are dereferenced
536434 * while freeing the pages, so the kmem_caches should be freed only
537435 * after the pending RCU operations are finished. As rcu_barrier()
538436 * is a pretty slow operation, we batch all pending destructions
....@@ -548,6 +446,8 @@
548446 rcu_barrier();
549447
550448 list_for_each_entry_safe(s, s2, &to_destroy, list) {
449
+ debugfs_slab_release(s);
450
+ kfence_shutdown_cache(s);
551451 #ifdef SLAB_SUPPORTS_SYSFS
552452 sysfs_slab_release(s);
553453 #else
....@@ -564,7 +464,6 @@
564464 if (__kmem_cache_shutdown(s) != 0)
565465 return -EBUSY;
566466
567
- memcg_unlink_cache(s);
568467 list_del(&s->list);
569468
570469 if (s->flags & SLAB_TYPESAFE_BY_RCU) {
....@@ -574,6 +473,8 @@
574473 list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
575474 schedule_work(&slab_caches_to_rcu_destroy_work);
576475 } else {
476
+ kfence_shutdown_cache(s);
477
+ debugfs_slab_release(s);
577478 #ifdef SLAB_SUPPORTS_SYSFS
578479 sysfs_slab_unlink(s);
579480 sysfs_slab_release(s);
....@@ -585,297 +486,9 @@
585486 return 0;
586487 }
587488
588
-#ifdef CONFIG_MEMCG_KMEM
589
-/*
590
- * memcg_create_kmem_cache - Create a cache for a memory cgroup.
591
- * @memcg: The memory cgroup the new cache is for.
592
- * @root_cache: The parent of the new cache.
593
- *
594
- * This function attempts to create a kmem cache that will serve allocation
595
- * requests going from @memcg to @root_cache. The new cache inherits properties
596
- * from its parent.
597
- */
598
-void memcg_create_kmem_cache(struct mem_cgroup *memcg,
599
- struct kmem_cache *root_cache)
600
-{
601
- static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
602
- struct cgroup_subsys_state *css = &memcg->css;
603
- struct memcg_cache_array *arr;
604
- struct kmem_cache *s = NULL;
605
- char *cache_name;
606
- int idx;
607
-
608
- get_online_cpus();
609
- get_online_mems();
610
-
611
- mutex_lock(&slab_mutex);
612
-
613
- /*
614
- * The memory cgroup could have been offlined while the cache
615
- * creation work was pending.
616
- */
617
- if (memcg->kmem_state != KMEM_ONLINE || root_cache->memcg_params.dying)
618
- goto out_unlock;
619
-
620
- idx = memcg_cache_id(memcg);
621
- arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
622
- lockdep_is_held(&slab_mutex));
623
-
624
- /*
625
- * Since per-memcg caches are created asynchronously on first
626
- * allocation (see memcg_kmem_get_cache()), several threads can try to
627
- * create the same cache, but only one of them may succeed.
628
- */
629
- if (arr->entries[idx])
630
- goto out_unlock;
631
-
632
- cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
633
- cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name,
634
- css->serial_nr, memcg_name_buf);
635
- if (!cache_name)
636
- goto out_unlock;
637
-
638
- s = create_cache(cache_name, root_cache->object_size,
639
- root_cache->align,
640
- root_cache->flags & CACHE_CREATE_MASK,
641
- root_cache->useroffset, root_cache->usersize,
642
- root_cache->ctor, memcg, root_cache);
643
- /*
644
- * If we could not create a memcg cache, do not complain, because
645
- * that's not critical at all as we can always proceed with the root
646
- * cache.
647
- */
648
- if (IS_ERR(s)) {
649
- kfree(cache_name);
650
- goto out_unlock;
651
- }
652
-
653
- /*
654
- * Since readers won't lock (see cache_from_memcg_idx()), we need a
655
- * barrier here to ensure nobody will see the kmem_cache partially
656
- * initialized.
657
- */
658
- smp_wmb();
659
- arr->entries[idx] = s;
660
-
661
-out_unlock:
662
- mutex_unlock(&slab_mutex);
663
-
664
- put_online_mems();
665
- put_online_cpus();
666
-}
667
-
668
-static void kmemcg_deactivate_workfn(struct work_struct *work)
669
-{
670
- struct kmem_cache *s = container_of(work, struct kmem_cache,
671
- memcg_params.deact_work);
672
-
673
- get_online_cpus();
674
- get_online_mems();
675
-
676
- mutex_lock(&slab_mutex);
677
-
678
- s->memcg_params.deact_fn(s);
679
-
680
- mutex_unlock(&slab_mutex);
681
-
682
- put_online_mems();
683
- put_online_cpus();
684
-
685
- /* done, put the ref from slab_deactivate_memcg_cache_rcu_sched() */
686
- css_put(&s->memcg_params.memcg->css);
687
-}
688
-
689
-static void kmemcg_deactivate_rcufn(struct rcu_head *head)
690
-{
691
- struct kmem_cache *s = container_of(head, struct kmem_cache,
692
- memcg_params.deact_rcu_head);
693
-
694
- /*
695
- * We need to grab blocking locks. Bounce to ->deact_work. The
696
- * work item shares the space with the RCU head and can't be
697
- * initialized eariler.
698
- */
699
- INIT_WORK(&s->memcg_params.deact_work, kmemcg_deactivate_workfn);
700
- queue_work(memcg_kmem_cache_wq, &s->memcg_params.deact_work);
701
-}
702
-
703
-/**
704
- * slab_deactivate_memcg_cache_rcu_sched - schedule deactivation after a
705
- * sched RCU grace period
706
- * @s: target kmem_cache
707
- * @deact_fn: deactivation function to call
708
- *
709
- * Schedule @deact_fn to be invoked with online cpus, mems and slab_mutex
710
- * held after a sched RCU grace period. The slab is guaranteed to stay
711
- * alive until @deact_fn is finished. This is to be used from
712
- * __kmemcg_cache_deactivate().
713
- */
714
-void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
715
- void (*deact_fn)(struct kmem_cache *))
716
-{
717
- if (WARN_ON_ONCE(is_root_cache(s)) ||
718
- WARN_ON_ONCE(s->memcg_params.deact_fn))
719
- return;
720
-
721
- /*
722
- * memcg_kmem_wq_lock is used to synchronize memcg_params.dying
723
- * flag and make sure that no new kmem_cache deactivation tasks
724
- * are queued (see flush_memcg_workqueue() ).
725
- */
726
- spin_lock_irq(&memcg_kmem_wq_lock);
727
- if (s->memcg_params.root_cache->memcg_params.dying)
728
- goto unlock;
729
-
730
- /* pin memcg so that @s doesn't get destroyed in the middle */
731
- css_get(&s->memcg_params.memcg->css);
732
-
733
- s->memcg_params.deact_fn = deact_fn;
734
- call_rcu_sched(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn);
735
-unlock:
736
- spin_unlock_irq(&memcg_kmem_wq_lock);
737
-}
738
-
739
-void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
740
-{
741
- int idx;
742
- struct memcg_cache_array *arr;
743
- struct kmem_cache *s, *c;
744
-
745
- idx = memcg_cache_id(memcg);
746
-
747
- get_online_cpus();
748
- get_online_mems();
749
-
750
- mutex_lock(&slab_mutex);
751
- list_for_each_entry(s, &slab_root_caches, root_caches_node) {
752
- arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
753
- lockdep_is_held(&slab_mutex));
754
- c = arr->entries[idx];
755
- if (!c)
756
- continue;
757
-
758
- __kmemcg_cache_deactivate(c);
759
- arr->entries[idx] = NULL;
760
- }
761
- mutex_unlock(&slab_mutex);
762
-
763
- put_online_mems();
764
- put_online_cpus();
765
-}
766
-
767
-void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
768
-{
769
- struct kmem_cache *s, *s2;
770
-
771
- get_online_cpus();
772
- get_online_mems();
773
-
774
- mutex_lock(&slab_mutex);
775
- list_for_each_entry_safe(s, s2, &memcg->kmem_caches,
776
- memcg_params.kmem_caches_node) {
777
- /*
778
- * The cgroup is about to be freed and therefore has no charges
779
- * left. Hence, all its caches must be empty by now.
780
- */
781
- BUG_ON(shutdown_cache(s));
782
- }
783
- mutex_unlock(&slab_mutex);
784
-
785
- put_online_mems();
786
- put_online_cpus();
787
-}
788
-
789
-static int shutdown_memcg_caches(struct kmem_cache *s)
790
-{
791
- struct memcg_cache_array *arr;
792
- struct kmem_cache *c, *c2;
793
- LIST_HEAD(busy);
794
- int i;
795
-
796
- BUG_ON(!is_root_cache(s));
797
-
798
- /*
799
- * First, shutdown active caches, i.e. caches that belong to online
800
- * memory cgroups.
801
- */
802
- arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
803
- lockdep_is_held(&slab_mutex));
804
- for_each_memcg_cache_index(i) {
805
- c = arr->entries[i];
806
- if (!c)
807
- continue;
808
- if (shutdown_cache(c))
809
- /*
810
- * The cache still has objects. Move it to a temporary
811
- * list so as not to try to destroy it for a second
812
- * time while iterating over inactive caches below.
813
- */
814
- list_move(&c->memcg_params.children_node, &busy);
815
- else
816
- /*
817
- * The cache is empty and will be destroyed soon. Clear
818
- * the pointer to it in the memcg_caches array so that
819
- * it will never be accessed even if the root cache
820
- * stays alive.
821
- */
822
- arr->entries[i] = NULL;
823
- }
824
-
825
- /*
826
- * Second, shutdown all caches left from memory cgroups that are now
827
- * offline.
828
- */
829
- list_for_each_entry_safe(c, c2, &s->memcg_params.children,
830
- memcg_params.children_node)
831
- shutdown_cache(c);
832
-
833
- list_splice(&busy, &s->memcg_params.children);
834
-
835
- /*
836
- * A cache being destroyed must be empty. In particular, this means
837
- * that all per memcg caches attached to it must be empty too.
838
- */
839
- if (!list_empty(&s->memcg_params.children))
840
- return -EBUSY;
841
- return 0;
842
-}
843
-
844
-static void memcg_set_kmem_cache_dying(struct kmem_cache *s)
845
-{
846
- spin_lock_irq(&memcg_kmem_wq_lock);
847
- s->memcg_params.dying = true;
848
- spin_unlock_irq(&memcg_kmem_wq_lock);
849
-}
850
-
851
-static void flush_memcg_workqueue(struct kmem_cache *s)
852
-{
853
- /*
854
- * SLUB deactivates the kmem_caches through call_rcu_sched. Make
855
- * sure all registered rcu callbacks have been invoked.
856
- */
857
- if (IS_ENABLED(CONFIG_SLUB))
858
- rcu_barrier_sched();
859
-
860
- /*
861
- * SLAB and SLUB create memcg kmem_caches through workqueue and SLUB
862
- * deactivates the memcg kmem_caches through workqueue. Make sure all
863
- * previous workitems on workqueue are processed.
864
- */
865
- if (likely(memcg_kmem_cache_wq))
866
- flush_workqueue(memcg_kmem_cache_wq);
867
-}
868
-#else
869
-static inline int shutdown_memcg_caches(struct kmem_cache *s)
870
-{
871
- return 0;
872
-}
873
-#endif /* CONFIG_MEMCG_KMEM */
874
-
875489 void slab_kmem_cache_release(struct kmem_cache *s)
876490 {
877491 __kmem_cache_release(s);
878
- destroy_memcg_params(s);
879492 kfree_const(s->name);
880493 kmem_cache_free(kmem_cache, s);
881494 }
....@@ -896,36 +509,7 @@
896509 if (s->refcount)
897510 goto out_unlock;
898511
899
-#ifdef CONFIG_MEMCG_KMEM
900
- memcg_set_kmem_cache_dying(s);
901
-
902
- mutex_unlock(&slab_mutex);
903
-
904
- put_online_mems();
905
- put_online_cpus();
906
-
907
- flush_memcg_workqueue(s);
908
-
909
- get_online_cpus();
910
- get_online_mems();
911
-
912
- mutex_lock(&slab_mutex);
913
-
914
- /*
915
- * Another thread referenced it again
916
- */
917
- if (READ_ONCE(s->refcount)) {
918
- spin_lock_irq(&memcg_kmem_wq_lock);
919
- s->memcg_params.dying = false;
920
- spin_unlock_irq(&memcg_kmem_wq_lock);
921
- goto out_unlock;
922
- }
923
-#endif
924
-
925
- err = shutdown_memcg_caches(s);
926
- if (!err)
927
- err = shutdown_cache(s);
928
-
512
+ err = shutdown_cache(s);
929513 if (err) {
930514 pr_err("kmem_cache_destroy %s: Slab cache still has objects\n",
931515 s->name);
....@@ -945,6 +529,8 @@
945529 *
946530 * Releases as many slabs as possible for a cache.
947531 * To help debugging, a zero exit status indicates all slabs were released.
532
+ *
533
+ * Return: %0 if all slabs were released, non-zero otherwise
948534 */
949535 int kmem_cache_shrink(struct kmem_cache *cachep)
950536 {
....@@ -972,14 +558,21 @@
972558 unsigned int useroffset, unsigned int usersize)
973559 {
974560 int err;
561
+ unsigned int align = ARCH_KMALLOC_MINALIGN;
975562
976563 s->name = name;
977564 s->size = s->object_size = size;
978
- s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
565
+
566
+ /*
567
+ * For power of two sizes, guarantee natural alignment for kmalloc
568
+ * caches, regardless of SL*B debugging options.
569
+ */
570
+ if (is_power_of_2(size))
571
+ align = max(align, size);
572
+ s->align = calculate_alignment(flags, align, size);
573
+
979574 s->useroffset = useroffset;
980575 s->usersize = usersize;
981
-
982
- slab_init_memcg_params(s);
983576
984577 err = __kmem_cache_create(s, flags);
985578
....@@ -1000,14 +593,15 @@
1000593 panic("Out of memory when creating slab %s\n", name);
1001594
1002595 create_boot_cache(s, name, size, flags, useroffset, usersize);
596
+ kasan_cache_create_kmalloc(s);
1003597 list_add(&s->list, &slab_caches);
1004
- memcg_link_cache(s);
1005598 s->refcount = 1;
1006599 return s;
1007600 }
1008601
1009602 struct kmem_cache *
1010
-kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
603
+kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init =
604
+{ /* initialization for https://bugs.llvm.org/show_bug.cgi?id=42570 */ };
1011605 EXPORT_SYMBOL(kmalloc_caches);
1012606
1013607 /*
....@@ -1055,6 +649,7 @@
1055649 struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
1056650 {
1057651 unsigned int index;
652
+ struct kmem_cache *s = NULL;
1058653
1059654 if (size <= 192) {
1060655 if (!size)
....@@ -1062,15 +657,34 @@
1062657
1063658 index = size_index[size_index_elem(size)];
1064659 } else {
1065
- if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
1066
- WARN_ON(1);
660
+ if (WARN_ON_ONCE(size > KMALLOC_MAX_CACHE_SIZE))
1067661 return NULL;
1068
- }
1069662 index = fls(size - 1);
1070663 }
1071664
665
+ trace_android_vh_kmalloc_slab(index, flags, &s);
666
+ if (s)
667
+ return s;
668
+
1072669 return kmalloc_caches[kmalloc_type(flags)][index];
1073670 }
671
+
672
+#ifdef CONFIG_ZONE_DMA
673
+#define INIT_KMALLOC_INFO(__size, __short_size) \
674
+{ \
675
+ .name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \
676
+ .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size, \
677
+ .name[KMALLOC_DMA] = "dma-kmalloc-" #__short_size, \
678
+ .size = __size, \
679
+}
680
+#else
681
+#define INIT_KMALLOC_INFO(__size, __short_size) \
682
+{ \
683
+ .name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \
684
+ .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size, \
685
+ .size = __size, \
686
+}
687
+#endif
1074688
1075689 /*
1076690 * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
....@@ -1078,20 +692,33 @@
1078692 * kmalloc-67108864.
1079693 */
1080694 const struct kmalloc_info_struct kmalloc_info[] __initconst = {
1081
- {NULL, 0}, {"kmalloc-96", 96},
1082
- {"kmalloc-192", 192}, {"kmalloc-8", 8},
1083
- {"kmalloc-16", 16}, {"kmalloc-32", 32},
1084
- {"kmalloc-64", 64}, {"kmalloc-128", 128},
1085
- {"kmalloc-256", 256}, {"kmalloc-512", 512},
1086
- {"kmalloc-1k", 1024}, {"kmalloc-2k", 2048},
1087
- {"kmalloc-4k", 4096}, {"kmalloc-8k", 8192},
1088
- {"kmalloc-16k", 16384}, {"kmalloc-32k", 32768},
1089
- {"kmalloc-64k", 65536}, {"kmalloc-128k", 131072},
1090
- {"kmalloc-256k", 262144}, {"kmalloc-512k", 524288},
1091
- {"kmalloc-1M", 1048576}, {"kmalloc-2M", 2097152},
1092
- {"kmalloc-4M", 4194304}, {"kmalloc-8M", 8388608},
1093
- {"kmalloc-16M", 16777216}, {"kmalloc-32M", 33554432},
1094
- {"kmalloc-64M", 67108864}
695
+ INIT_KMALLOC_INFO(0, 0),
696
+ INIT_KMALLOC_INFO(96, 96),
697
+ INIT_KMALLOC_INFO(192, 192),
698
+ INIT_KMALLOC_INFO(8, 8),
699
+ INIT_KMALLOC_INFO(16, 16),
700
+ INIT_KMALLOC_INFO(32, 32),
701
+ INIT_KMALLOC_INFO(64, 64),
702
+ INIT_KMALLOC_INFO(128, 128),
703
+ INIT_KMALLOC_INFO(256, 256),
704
+ INIT_KMALLOC_INFO(512, 512),
705
+ INIT_KMALLOC_INFO(1024, 1k),
706
+ INIT_KMALLOC_INFO(2048, 2k),
707
+ INIT_KMALLOC_INFO(4096, 4k),
708
+ INIT_KMALLOC_INFO(8192, 8k),
709
+ INIT_KMALLOC_INFO(16384, 16k),
710
+ INIT_KMALLOC_INFO(32768, 32k),
711
+ INIT_KMALLOC_INFO(65536, 64k),
712
+ INIT_KMALLOC_INFO(131072, 128k),
713
+ INIT_KMALLOC_INFO(262144, 256k),
714
+ INIT_KMALLOC_INFO(524288, 512k),
715
+ INIT_KMALLOC_INFO(1048576, 1M),
716
+ INIT_KMALLOC_INFO(2097152, 2M),
717
+ INIT_KMALLOC_INFO(4194304, 4M),
718
+ INIT_KMALLOC_INFO(8388608, 8M),
719
+ INIT_KMALLOC_INFO(16777216, 16M),
720
+ INIT_KMALLOC_INFO(33554432, 32M),
721
+ INIT_KMALLOC_INFO(67108864, 64M)
1095722 };
1096723
1097724 /*
....@@ -1141,36 +768,14 @@
1141768 }
1142769 }
1143770
1144
-static const char *
1145
-kmalloc_cache_name(const char *prefix, unsigned int size)
1146
-{
1147
-
1148
- static const char units[3] = "\0kM";
1149
- int idx = 0;
1150
-
1151
- while (size >= 1024 && (size % 1024 == 0)) {
1152
- size /= 1024;
1153
- idx++;
1154
- }
1155
-
1156
- return kasprintf(GFP_NOWAIT, "%s-%u%c", prefix, size, units[idx]);
1157
-}
1158
-
1159771 static void __init
1160
-new_kmalloc_cache(int idx, int type, slab_flags_t flags)
772
+new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
1161773 {
1162
- const char *name;
1163
-
1164
- if (type == KMALLOC_RECLAIM) {
774
+ if (type == KMALLOC_RECLAIM)
1165775 flags |= SLAB_RECLAIM_ACCOUNT;
1166
- name = kmalloc_cache_name("kmalloc-rcl",
1167
- kmalloc_info[idx].size);
1168
- BUG_ON(!name);
1169
- } else {
1170
- name = kmalloc_info[idx].name;
1171
- }
1172776
1173
- kmalloc_caches[type][idx] = create_kmalloc_cache(name,
777
+ kmalloc_caches[type][idx] = create_kmalloc_cache(
778
+ kmalloc_info[idx].name[type],
1174779 kmalloc_info[idx].size, flags, 0,
1175780 kmalloc_info[idx].size);
1176781 }
....@@ -1182,7 +787,8 @@
1182787 */
1183788 void __init create_kmalloc_caches(slab_flags_t flags)
1184789 {
1185
- int i, type;
790
+ int i;
791
+ enum kmalloc_cache_type type;
1186792
1187793 for (type = KMALLOC_NORMAL; type <= KMALLOC_RECLAIM; type++) {
1188794 for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
....@@ -1211,17 +817,28 @@
1211817 struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i];
1212818
1213819 if (s) {
1214
- unsigned int size = kmalloc_size(i);
1215
- const char *n = kmalloc_cache_name("dma-kmalloc", size);
1216
-
1217
- BUG_ON(!n);
1218820 kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache(
1219
- n, size, SLAB_CACHE_DMA | flags, 0, 0);
821
+ kmalloc_info[i].name[KMALLOC_DMA],
822
+ kmalloc_info[i].size,
823
+ SLAB_CACHE_DMA | flags, 0,
824
+ kmalloc_info[i].size);
1220825 }
1221826 }
1222827 #endif
1223828 }
1224829 #endif /* !CONFIG_SLOB */
830
+
831
+gfp_t kmalloc_fix_flags(gfp_t flags)
832
+{
833
+ gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
834
+
835
+ flags &= ~GFP_SLAB_BUG_MASK;
836
+ pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
837
+ invalid_mask, &invalid_mask, flags, &flags);
838
+ dump_stack();
839
+
840
+ return flags;
841
+}
1225842
1226843 /*
1227844 * To avoid unnecessary overhead, we pass through large allocation requests
....@@ -1230,13 +847,21 @@
1230847 */
1231848 void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
1232849 {
1233
- void *ret;
850
+ void *ret = NULL;
1234851 struct page *page;
852
+
853
+ if (unlikely(flags & GFP_SLAB_BUG_MASK))
854
+ flags = kmalloc_fix_flags(flags);
1235855
1236856 flags |= __GFP_COMP;
1237857 page = alloc_pages(flags, order);
1238
- ret = page ? page_address(page) : NULL;
858
+ if (likely(page)) {
859
+ ret = page_address(page);
860
+ mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
861
+ PAGE_SIZE << order);
862
+ }
1239863 ret = kasan_kmalloc_large(ret, size, flags);
864
+ /* As ret might get tagged, call kmemleak hook after KASAN. */
1240865 kmemleak_alloc(ret, size, 1, flags);
1241866 return ret;
1242867 }
....@@ -1330,38 +955,17 @@
1330955 void *slab_start(struct seq_file *m, loff_t *pos)
1331956 {
1332957 mutex_lock(&slab_mutex);
1333
- return seq_list_start(&slab_root_caches, *pos);
958
+ return seq_list_start(&slab_caches, *pos);
1334959 }
1335960
1336961 void *slab_next(struct seq_file *m, void *p, loff_t *pos)
1337962 {
1338
- return seq_list_next(p, &slab_root_caches, pos);
963
+ return seq_list_next(p, &slab_caches, pos);
1339964 }
1340965
1341966 void slab_stop(struct seq_file *m, void *p)
1342967 {
1343968 mutex_unlock(&slab_mutex);
1344
-}
1345
-
1346
-static void
1347
-memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
1348
-{
1349
- struct kmem_cache *c;
1350
- struct slabinfo sinfo;
1351
-
1352
- if (!is_root_cache(s))
1353
- return;
1354
-
1355
- for_each_memcg_cache(c, s) {
1356
- memset(&sinfo, 0, sizeof(sinfo));
1357
- get_slabinfo(c, &sinfo);
1358
-
1359
- info->active_slabs += sinfo.active_slabs;
1360
- info->num_slabs += sinfo.num_slabs;
1361
- info->shared_avail += sinfo.shared_avail;
1362
- info->active_objs += sinfo.active_objs;
1363
- info->num_objs += sinfo.num_objs;
1364
- }
1365969 }
1366970
1367971 static void cache_show(struct kmem_cache *s, struct seq_file *m)
....@@ -1371,10 +975,8 @@
1371975 memset(&sinfo, 0, sizeof(sinfo));
1372976 get_slabinfo(s, &sinfo);
1373977
1374
- memcg_accumulate_slabinfo(s, &sinfo);
1375
-
1376978 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
1377
- cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
979
+ s->name, sinfo.active_objs, sinfo.num_objs, s->size,
1378980 sinfo.objects_per_slab, (1 << sinfo.cache_order));
1379981
1380982 seq_printf(m, " : tunables %4u %4u %4u",
....@@ -1387,9 +989,9 @@
1387989
1388990 static int slab_show(struct seq_file *m, void *p)
1389991 {
1390
- struct kmem_cache *s = list_entry(p, struct kmem_cache, root_caches_node);
992
+ struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
1391993
1392
- if (p == slab_root_caches.next)
994
+ if (p == slab_caches.next)
1393995 print_slabinfo_header(m);
1394996 cache_show(s, m);
1395997 return 0;
....@@ -1416,49 +1018,26 @@
14161018 pr_info("Name Used Total\n");
14171019
14181020 list_for_each_entry_safe(s, s2, &slab_caches, list) {
1419
- if (!is_root_cache(s) || (s->flags & SLAB_RECLAIM_ACCOUNT))
1021
+ if (s->flags & SLAB_RECLAIM_ACCOUNT)
14201022 continue;
14211023
14221024 get_slabinfo(s, &sinfo);
14231025
14241026 if (sinfo.num_objs > 0)
1425
- pr_info("%-17s %10luKB %10luKB\n", cache_name(s),
1027
+ pr_info("%-17s %10luKB %10luKB\n", s->name,
14261028 (sinfo.active_objs * s->size) / 1024,
14271029 (sinfo.num_objs * s->size) / 1024);
14281030 }
14291031 mutex_unlock(&slab_mutex);
14301032 }
14311033
1432
-#if defined(CONFIG_MEMCG)
1433
-void *memcg_slab_start(struct seq_file *m, loff_t *pos)
1434
-{
1435
- struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1436
-
1437
- mutex_lock(&slab_mutex);
1438
- return seq_list_start(&memcg->kmem_caches, *pos);
1439
-}
1440
-
1441
-void *memcg_slab_next(struct seq_file *m, void *p, loff_t *pos)
1442
-{
1443
- struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1444
-
1445
- return seq_list_next(p, &memcg->kmem_caches, pos);
1446
-}
1447
-
1448
-void memcg_slab_stop(struct seq_file *m, void *p)
1449
-{
1450
- mutex_unlock(&slab_mutex);
1451
-}
1452
-
1034
+#if defined(CONFIG_MEMCG_KMEM)
14531035 int memcg_slab_show(struct seq_file *m, void *p)
14541036 {
1455
- struct kmem_cache *s = list_entry(p, struct kmem_cache,
1456
- memcg_params.kmem_caches_node);
1457
- struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1458
-
1459
- if (p == memcg->kmem_caches.next)
1460
- print_slabinfo_header(m);
1461
- cache_show(s, m);
1037
+ /*
1038
+ * Deprecated.
1039
+ * Please, take a look at tools/cgroup/slabinfo.py .
1040
+ */
14621041 return 0;
14631042 }
14641043 #endif
....@@ -1488,63 +1067,54 @@
14881067 return seq_open(file, &slabinfo_op);
14891068 }
14901069
1491
-static const struct file_operations proc_slabinfo_operations = {
1492
- .open = slabinfo_open,
1493
- .read = seq_read,
1494
- .write = slabinfo_write,
1495
- .llseek = seq_lseek,
1496
- .release = seq_release,
1070
+static const struct proc_ops slabinfo_proc_ops = {
1071
+ .proc_flags = PROC_ENTRY_PERMANENT,
1072
+ .proc_open = slabinfo_open,
1073
+ .proc_read = seq_read,
1074
+ .proc_write = slabinfo_write,
1075
+ .proc_lseek = seq_lseek,
1076
+ .proc_release = seq_release,
14971077 };
14981078
14991079 static int __init slab_proc_init(void)
15001080 {
1501
- proc_create("slabinfo", SLABINFO_RIGHTS, NULL,
1502
- &proc_slabinfo_operations);
1081
+ proc_create("slabinfo", SLABINFO_RIGHTS, NULL, &slabinfo_proc_ops);
15031082 return 0;
15041083 }
15051084 module_init(slab_proc_init);
1085
+
15061086 #endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */
15071087
15081088 static __always_inline void *__do_krealloc(const void *p, size_t new_size,
15091089 gfp_t flags)
15101090 {
15111091 void *ret;
1512
- size_t ks = 0;
1092
+ size_t ks;
15131093
1514
- if (p)
1515
- ks = ksize(p);
1094
+ /* Don't use instrumented ksize to allow precise KASAN poisoning. */
1095
+ if (likely(!ZERO_OR_NULL_PTR(p))) {
1096
+ if (!kasan_check_byte(p))
1097
+ return NULL;
1098
+ ks = kfence_ksize(p) ?: __ksize(p);
1099
+ } else
1100
+ ks = 0;
15161101
1102
+ /* If the object still fits, repoison it precisely. */
15171103 if (ks >= new_size) {
15181104 p = kasan_krealloc((void *)p, new_size, flags);
15191105 return (void *)p;
15201106 }
15211107
15221108 ret = kmalloc_track_caller(new_size, flags);
1523
- if (ret && p)
1524
- memcpy(ret, p, ks);
1109
+ if (ret && p) {
1110
+ /* Disable KASAN checks as the object's redzone is accessed. */
1111
+ kasan_disable_current();
1112
+ memcpy(ret, kasan_reset_tag(p), ks);
1113
+ kasan_enable_current();
1114
+ }
15251115
15261116 return ret;
15271117 }
1528
-
1529
-/**
1530
- * __krealloc - like krealloc() but don't free @p.
1531
- * @p: object to reallocate memory for.
1532
- * @new_size: how many bytes of memory are required.
1533
- * @flags: the type of memory to allocate.
1534
- *
1535
- * This function is like krealloc() except it never frees the originally
1536
- * allocated buffer. Use this if you don't want to free the buffer immediately
1537
- * like, for example, with RCU.
1538
- */
1539
-void *__krealloc(const void *p, size_t new_size, gfp_t flags)
1540
-{
1541
- if (unlikely(!new_size))
1542
- return ZERO_SIZE_PTR;
1543
-
1544
- return __do_krealloc(p, new_size, flags);
1545
-
1546
-}
1547
-EXPORT_SYMBOL(__krealloc);
15481118
15491119 /**
15501120 * krealloc - reallocate memory. The contents will remain unchanged.
....@@ -1556,6 +1126,8 @@
15561126 * lesser of the new and old sizes. If @p is %NULL, krealloc()
15571127 * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a
15581128 * %NULL pointer, the object pointed to is freed.
1129
+ *
1130
+ * Return: pointer to the allocated memory or %NULL in case of error
15591131 */
15601132 void *krealloc(const void *p, size_t new_size, gfp_t flags)
15611133 {
....@@ -1575,28 +1147,73 @@
15751147 EXPORT_SYMBOL(krealloc);
15761148
15771149 /**
1578
- * kzfree - like kfree but zero memory
1150
+ * kfree_sensitive - Clear sensitive information in memory before freeing
15791151 * @p: object to free memory of
15801152 *
15811153 * The memory of the object @p points to is zeroed before freed.
1582
- * If @p is %NULL, kzfree() does nothing.
1154
+ * If @p is %NULL, kfree_sensitive() does nothing.
15831155 *
15841156 * Note: this function zeroes the whole allocated buffer which can be a good
15851157 * deal bigger than the requested buffer size passed to kmalloc(). So be
15861158 * careful when using this function in performance sensitive code.
15871159 */
1588
-void kzfree(const void *p)
1160
+void kfree_sensitive(const void *p)
15891161 {
15901162 size_t ks;
15911163 void *mem = (void *)p;
15921164
1593
- if (unlikely(ZERO_OR_NULL_PTR(mem)))
1594
- return;
15951165 ks = ksize(mem);
1596
- memzero_explicit(mem, ks);
1166
+ if (ks)
1167
+ memzero_explicit(mem, ks);
15971168 kfree(mem);
15981169 }
1599
-EXPORT_SYMBOL(kzfree);
1170
+EXPORT_SYMBOL(kfree_sensitive);
1171
+
1172
+/**
1173
+ * ksize - get the actual amount of memory allocated for a given object
1174
+ * @objp: Pointer to the object
1175
+ *
1176
+ * kmalloc may internally round up allocations and return more memory
1177
+ * than requested. ksize() can be used to determine the actual amount of
1178
+ * memory allocated. The caller may use this additional memory, even though
1179
+ * a smaller amount of memory was initially specified with the kmalloc call.
1180
+ * The caller must guarantee that objp points to a valid object previously
1181
+ * allocated with either kmalloc() or kmem_cache_alloc(). The object
1182
+ * must not be freed during the duration of the call.
1183
+ *
1184
+ * Return: size of the actual memory used by @objp in bytes
1185
+ */
1186
+size_t ksize(const void *objp)
1187
+{
1188
+ size_t size;
1189
+
1190
+ /*
1191
+ * We need to first check that the pointer to the object is valid, and
1192
+ * only then unpoison the memory. The report printed from ksize() is
1193
+ * more useful, then when it's printed later when the behaviour could
1194
+ * be undefined due to a potential use-after-free or double-free.
1195
+ *
1196
+ * We use kasan_check_byte(), which is supported for the hardware
1197
+ * tag-based KASAN mode, unlike kasan_check_read/write().
1198
+ *
1199
+ * If the pointed to memory is invalid, we return 0 to avoid users of
1200
+ * ksize() writing to and potentially corrupting the memory region.
1201
+ *
1202
+ * We want to perform the check before __ksize(), to avoid potentially
1203
+ * crashing in __ksize() due to accessing invalid metadata.
1204
+ */
1205
+ if (unlikely(ZERO_OR_NULL_PTR(objp)) || !kasan_check_byte(objp))
1206
+ return 0;
1207
+
1208
+ size = kfence_ksize(objp) ?: __ksize(objp);
1209
+ /*
1210
+ * We assume that ksize callers could use whole allocated area,
1211
+ * so we need to unpoison this area.
1212
+ */
1213
+ kasan_unpoison_range(objp, size);
1214
+ return size;
1215
+}
1216
+EXPORT_SYMBOL(ksize);
16001217
16011218 /* Tracepoints definitions. */
16021219 EXPORT_TRACEPOINT_SYMBOL(kmalloc);