.. | .. |
---|
12 | 12 | #include <linux/memory.h> |
---|
13 | 13 | #include <linux/cache.h> |
---|
14 | 14 | #include <linux/compiler.h> |
---|
| 15 | +#include <linux/kfence.h> |
---|
15 | 16 | #include <linux/module.h> |
---|
16 | 17 | #include <linux/cpu.h> |
---|
17 | 18 | #include <linux/uaccess.h> |
---|
18 | 19 | #include <linux/seq_file.h> |
---|
19 | 20 | #include <linux/proc_fs.h> |
---|
| 21 | +#include <linux/debugfs.h> |
---|
| 22 | +#include <linux/kasan.h> |
---|
20 | 23 | #include <asm/cacheflush.h> |
---|
21 | 24 | #include <asm/tlbflush.h> |
---|
22 | 25 | #include <asm/page.h> |
---|
.. | .. |
---|
24 | 27 | |
---|
25 | 28 | #define CREATE_TRACE_POINTS |
---|
26 | 29 | #include <trace/events/kmem.h> |
---|
| 30 | +#undef CREATE_TRACE_POINTS |
---|
| 31 | +#include <trace/hooks/mm.h> |
---|
| 32 | +#include "internal.h" |
---|
27 | 33 | |
---|
28 | 34 | #include "slab.h" |
---|
29 | 35 | |
---|
.. | .. |
---|
50 | 56 | */ |
---|
51 | 57 | #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ |
---|
52 | 58 | SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \ |
---|
53 | | - SLAB_FAILSLAB | SLAB_KASAN) |
---|
| 59 | + SLAB_FAILSLAB | kasan_never_merge()) |
---|
54 | 60 | |
---|
55 | 61 | #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ |
---|
56 | 62 | SLAB_CACHE_DMA32 | SLAB_ACCOUNT) |
---|
.. | .. |
---|
84 | 90 | #ifdef CONFIG_DEBUG_VM |
---|
85 | 91 | static int kmem_cache_sanity_check(const char *name, unsigned int size) |
---|
86 | 92 | { |
---|
87 | | - if (!name || in_interrupt() || size < sizeof(void *) || |
---|
88 | | - size > KMALLOC_MAX_SIZE) { |
---|
| 93 | + if (!name || in_interrupt() || size > KMALLOC_MAX_SIZE) { |
---|
89 | 94 | pr_err("kmem_cache_create(%s) integrity check failed\n", name); |
---|
90 | 95 | return -EINVAL; |
---|
91 | 96 | } |
---|
.. | .. |
---|
127 | 132 | return i; |
---|
128 | 133 | } |
---|
129 | 134 | |
---|
130 | | -#ifdef CONFIG_MEMCG_KMEM |
---|
131 | | - |
---|
132 | | -LIST_HEAD(slab_root_caches); |
---|
133 | | -static DEFINE_SPINLOCK(memcg_kmem_wq_lock); |
---|
134 | | - |
---|
135 | | -void slab_init_memcg_params(struct kmem_cache *s) |
---|
136 | | -{ |
---|
137 | | - s->memcg_params.root_cache = NULL; |
---|
138 | | - RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL); |
---|
139 | | - INIT_LIST_HEAD(&s->memcg_params.children); |
---|
140 | | - s->memcg_params.dying = false; |
---|
141 | | -} |
---|
142 | | - |
---|
143 | | -static int init_memcg_params(struct kmem_cache *s, |
---|
144 | | - struct mem_cgroup *memcg, struct kmem_cache *root_cache) |
---|
145 | | -{ |
---|
146 | | - struct memcg_cache_array *arr; |
---|
147 | | - |
---|
148 | | - if (root_cache) { |
---|
149 | | - s->memcg_params.root_cache = root_cache; |
---|
150 | | - s->memcg_params.memcg = memcg; |
---|
151 | | - INIT_LIST_HEAD(&s->memcg_params.children_node); |
---|
152 | | - INIT_LIST_HEAD(&s->memcg_params.kmem_caches_node); |
---|
153 | | - return 0; |
---|
154 | | - } |
---|
155 | | - |
---|
156 | | - slab_init_memcg_params(s); |
---|
157 | | - |
---|
158 | | - if (!memcg_nr_cache_ids) |
---|
159 | | - return 0; |
---|
160 | | - |
---|
161 | | - arr = kvzalloc(sizeof(struct memcg_cache_array) + |
---|
162 | | - memcg_nr_cache_ids * sizeof(void *), |
---|
163 | | - GFP_KERNEL); |
---|
164 | | - if (!arr) |
---|
165 | | - return -ENOMEM; |
---|
166 | | - |
---|
167 | | - RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr); |
---|
168 | | - return 0; |
---|
169 | | -} |
---|
170 | | - |
---|
171 | | -static void destroy_memcg_params(struct kmem_cache *s) |
---|
172 | | -{ |
---|
173 | | - if (is_root_cache(s)) |
---|
174 | | - kvfree(rcu_access_pointer(s->memcg_params.memcg_caches)); |
---|
175 | | -} |
---|
176 | | - |
---|
177 | | -static void free_memcg_params(struct rcu_head *rcu) |
---|
178 | | -{ |
---|
179 | | - struct memcg_cache_array *old; |
---|
180 | | - |
---|
181 | | - old = container_of(rcu, struct memcg_cache_array, rcu); |
---|
182 | | - kvfree(old); |
---|
183 | | -} |
---|
184 | | - |
---|
185 | | -static int update_memcg_params(struct kmem_cache *s, int new_array_size) |
---|
186 | | -{ |
---|
187 | | - struct memcg_cache_array *old, *new; |
---|
188 | | - |
---|
189 | | - new = kvzalloc(sizeof(struct memcg_cache_array) + |
---|
190 | | - new_array_size * sizeof(void *), GFP_KERNEL); |
---|
191 | | - if (!new) |
---|
192 | | - return -ENOMEM; |
---|
193 | | - |
---|
194 | | - old = rcu_dereference_protected(s->memcg_params.memcg_caches, |
---|
195 | | - lockdep_is_held(&slab_mutex)); |
---|
196 | | - if (old) |
---|
197 | | - memcpy(new->entries, old->entries, |
---|
198 | | - memcg_nr_cache_ids * sizeof(void *)); |
---|
199 | | - |
---|
200 | | - rcu_assign_pointer(s->memcg_params.memcg_caches, new); |
---|
201 | | - if (old) |
---|
202 | | - call_rcu(&old->rcu, free_memcg_params); |
---|
203 | | - return 0; |
---|
204 | | -} |
---|
205 | | - |
---|
206 | | -int memcg_update_all_caches(int num_memcgs) |
---|
207 | | -{ |
---|
208 | | - struct kmem_cache *s; |
---|
209 | | - int ret = 0; |
---|
210 | | - |
---|
211 | | - mutex_lock(&slab_mutex); |
---|
212 | | - list_for_each_entry(s, &slab_root_caches, root_caches_node) { |
---|
213 | | - ret = update_memcg_params(s, num_memcgs); |
---|
214 | | - /* |
---|
215 | | - * Instead of freeing the memory, we'll just leave the caches |
---|
216 | | - * up to this point in an updated state. |
---|
217 | | - */ |
---|
218 | | - if (ret) |
---|
219 | | - break; |
---|
220 | | - } |
---|
221 | | - mutex_unlock(&slab_mutex); |
---|
222 | | - return ret; |
---|
223 | | -} |
---|
224 | | - |
---|
225 | | -void memcg_link_cache(struct kmem_cache *s) |
---|
226 | | -{ |
---|
227 | | - if (is_root_cache(s)) { |
---|
228 | | - list_add(&s->root_caches_node, &slab_root_caches); |
---|
229 | | - } else { |
---|
230 | | - list_add(&s->memcg_params.children_node, |
---|
231 | | - &s->memcg_params.root_cache->memcg_params.children); |
---|
232 | | - list_add(&s->memcg_params.kmem_caches_node, |
---|
233 | | - &s->memcg_params.memcg->kmem_caches); |
---|
234 | | - } |
---|
235 | | -} |
---|
236 | | - |
---|
237 | | -static void memcg_unlink_cache(struct kmem_cache *s) |
---|
238 | | -{ |
---|
239 | | - if (is_root_cache(s)) { |
---|
240 | | - list_del(&s->root_caches_node); |
---|
241 | | - } else { |
---|
242 | | - list_del(&s->memcg_params.children_node); |
---|
243 | | - list_del(&s->memcg_params.kmem_caches_node); |
---|
244 | | - } |
---|
245 | | -} |
---|
246 | | -#else |
---|
247 | | -static inline int init_memcg_params(struct kmem_cache *s, |
---|
248 | | - struct mem_cgroup *memcg, struct kmem_cache *root_cache) |
---|
249 | | -{ |
---|
250 | | - return 0; |
---|
251 | | -} |
---|
252 | | - |
---|
253 | | -static inline void destroy_memcg_params(struct kmem_cache *s) |
---|
254 | | -{ |
---|
255 | | -} |
---|
256 | | - |
---|
257 | | -static inline void memcg_unlink_cache(struct kmem_cache *s) |
---|
258 | | -{ |
---|
259 | | -} |
---|
260 | | -#endif /* CONFIG_MEMCG_KMEM */ |
---|
261 | | - |
---|
262 | 135 | /* |
---|
263 | 136 | * Figure out what the alignment of the objects will be given a set of |
---|
264 | 137 | * flags, a user specified alignment and the size of the objects. |
---|
.. | .. |
---|
282 | 155 | align = max(align, ralign); |
---|
283 | 156 | } |
---|
284 | 157 | |
---|
285 | | - if (align < ARCH_SLAB_MINALIGN) |
---|
286 | | - align = ARCH_SLAB_MINALIGN; |
---|
| 158 | + align = max(align, arch_slab_minalign()); |
---|
287 | 159 | |
---|
288 | 160 | return ALIGN(align, sizeof(void *)); |
---|
289 | 161 | } |
---|
.. | .. |
---|
294 | 166 | int slab_unmergeable(struct kmem_cache *s) |
---|
295 | 167 | { |
---|
296 | 168 | if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE)) |
---|
297 | | - return 1; |
---|
298 | | - |
---|
299 | | - if (!is_root_cache(s)) |
---|
300 | 169 | return 1; |
---|
301 | 170 | |
---|
302 | 171 | if (s->ctor) |
---|
.. | .. |
---|
328 | 197 | size = ALIGN(size, sizeof(void *)); |
---|
329 | 198 | align = calculate_alignment(flags, align, size); |
---|
330 | 199 | size = ALIGN(size, align); |
---|
331 | | - flags = kmem_cache_flags(size, flags, name, NULL); |
---|
| 200 | + flags = kmem_cache_flags(size, flags, name); |
---|
332 | 201 | |
---|
333 | 202 | if (flags & SLAB_NEVER_MERGE) |
---|
334 | 203 | return NULL; |
---|
335 | 204 | |
---|
336 | | - list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) { |
---|
| 205 | + list_for_each_entry_reverse(s, &slab_caches, list) { |
---|
337 | 206 | if (slab_unmergeable(s)) |
---|
338 | 207 | continue; |
---|
339 | 208 | |
---|
.. | .. |
---|
365 | 234 | unsigned int object_size, unsigned int align, |
---|
366 | 235 | slab_flags_t flags, unsigned int useroffset, |
---|
367 | 236 | unsigned int usersize, void (*ctor)(void *), |
---|
368 | | - struct mem_cgroup *memcg, struct kmem_cache *root_cache) |
---|
| 237 | + struct kmem_cache *root_cache) |
---|
369 | 238 | { |
---|
370 | 239 | struct kmem_cache *s; |
---|
371 | 240 | int err; |
---|
.. | .. |
---|
385 | 254 | s->useroffset = useroffset; |
---|
386 | 255 | s->usersize = usersize; |
---|
387 | 256 | |
---|
388 | | - err = init_memcg_params(s, memcg, root_cache); |
---|
389 | | - if (err) |
---|
390 | | - goto out_free_cache; |
---|
391 | | - |
---|
392 | 257 | err = __kmem_cache_create(s, flags); |
---|
393 | 258 | if (err) |
---|
394 | 259 | goto out_free_cache; |
---|
395 | 260 | |
---|
396 | 261 | s->refcount = 1; |
---|
397 | 262 | list_add(&s->list, &slab_caches); |
---|
398 | | - memcg_link_cache(s); |
---|
399 | 263 | out: |
---|
400 | 264 | if (err) |
---|
401 | 265 | return ERR_PTR(err); |
---|
402 | 266 | return s; |
---|
403 | 267 | |
---|
404 | 268 | out_free_cache: |
---|
405 | | - destroy_memcg_params(s); |
---|
406 | 269 | kmem_cache_free(kmem_cache, s); |
---|
407 | 270 | goto out; |
---|
408 | 271 | } |
---|
409 | 272 | |
---|
410 | | -/* |
---|
411 | | - * kmem_cache_create_usercopy - Create a cache. |
---|
| 273 | +/** |
---|
| 274 | + * kmem_cache_create_usercopy - Create a cache with a region suitable |
---|
| 275 | + * for copying to userspace |
---|
412 | 276 | * @name: A string which is used in /proc/slabinfo to identify this cache. |
---|
413 | 277 | * @size: The size of objects to be created in this cache. |
---|
414 | 278 | * @align: The required alignment for the objects. |
---|
.. | .. |
---|
417 | 281 | * @usersize: Usercopy region size |
---|
418 | 282 | * @ctor: A constructor for the objects. |
---|
419 | 283 | * |
---|
420 | | - * Returns a ptr to the cache on success, NULL on failure. |
---|
421 | 284 | * Cannot be called within a interrupt, but can be interrupted. |
---|
422 | 285 | * The @ctor is run when new pages are allocated by the cache. |
---|
423 | 286 | * |
---|
.. | .. |
---|
426 | 289 | * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) |
---|
427 | 290 | * to catch references to uninitialised memory. |
---|
428 | 291 | * |
---|
429 | | - * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check |
---|
| 292 | + * %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check |
---|
430 | 293 | * for buffer overruns. |
---|
431 | 294 | * |
---|
432 | 295 | * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware |
---|
433 | 296 | * cacheline. This can be beneficial if you're counting cycles as closely |
---|
434 | 297 | * as davem. |
---|
| 298 | + * |
---|
| 299 | + * Return: a pointer to the cache on success, NULL on failure. |
---|
435 | 300 | */ |
---|
436 | 301 | struct kmem_cache * |
---|
437 | 302 | kmem_cache_create_usercopy(const char *name, |
---|
.. | .. |
---|
446 | 311 | |
---|
447 | 312 | get_online_cpus(); |
---|
448 | 313 | get_online_mems(); |
---|
449 | | - memcg_get_cache_ids(); |
---|
| 314 | + |
---|
| 315 | +#ifdef CONFIG_SLUB_DEBUG |
---|
| 316 | + /* |
---|
| 317 | + * If no slub_debug was enabled globally, the static key is not yet |
---|
| 318 | + * enabled by setup_slub_debug(). Enable it if the cache is being |
---|
| 319 | + * created with any of the debugging flags passed explicitly. |
---|
| 320 | + */ |
---|
| 321 | + if (flags & SLAB_DEBUG_FLAGS) |
---|
| 322 | + static_branch_enable(&slub_debug_enabled); |
---|
| 323 | +#endif |
---|
450 | 324 | |
---|
451 | 325 | mutex_lock(&slab_mutex); |
---|
452 | 326 | |
---|
.. | .. |
---|
487 | 361 | |
---|
488 | 362 | s = create_cache(cache_name, size, |
---|
489 | 363 | calculate_alignment(flags, align, size), |
---|
490 | | - flags, useroffset, usersize, ctor, NULL, NULL); |
---|
| 364 | + flags, useroffset, usersize, ctor, NULL); |
---|
491 | 365 | if (IS_ERR(s)) { |
---|
492 | 366 | err = PTR_ERR(s); |
---|
493 | 367 | kfree_const(cache_name); |
---|
.. | .. |
---|
496 | 370 | out_unlock: |
---|
497 | 371 | mutex_unlock(&slab_mutex); |
---|
498 | 372 | |
---|
499 | | - memcg_put_cache_ids(); |
---|
500 | 373 | put_online_mems(); |
---|
501 | 374 | put_online_cpus(); |
---|
502 | 375 | |
---|
.. | .. |
---|
515 | 388 | } |
---|
516 | 389 | EXPORT_SYMBOL(kmem_cache_create_usercopy); |
---|
517 | 390 | |
---|
| 391 | +/** |
---|
| 392 | + * kmem_cache_create - Create a cache. |
---|
| 393 | + * @name: A string which is used in /proc/slabinfo to identify this cache. |
---|
| 394 | + * @size: The size of objects to be created in this cache. |
---|
| 395 | + * @align: The required alignment for the objects. |
---|
| 396 | + * @flags: SLAB flags |
---|
| 397 | + * @ctor: A constructor for the objects. |
---|
| 398 | + * |
---|
| 399 | + * Cannot be called within a interrupt, but can be interrupted. |
---|
| 400 | + * The @ctor is run when new pages are allocated by the cache. |
---|
| 401 | + * |
---|
| 402 | + * The flags are |
---|
| 403 | + * |
---|
| 404 | + * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) |
---|
| 405 | + * to catch references to uninitialised memory. |
---|
| 406 | + * |
---|
| 407 | + * %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check |
---|
| 408 | + * for buffer overruns. |
---|
| 409 | + * |
---|
| 410 | + * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware |
---|
| 411 | + * cacheline. This can be beneficial if you're counting cycles as closely |
---|
| 412 | + * as davem. |
---|
| 413 | + * |
---|
| 414 | + * Return: a pointer to the cache on success, NULL on failure. |
---|
| 415 | + */ |
---|
518 | 416 | struct kmem_cache * |
---|
519 | 417 | kmem_cache_create(const char *name, unsigned int size, unsigned int align, |
---|
520 | 418 | slab_flags_t flags, void (*ctor)(void *)) |
---|
.. | .. |
---|
532 | 430 | /* |
---|
533 | 431 | * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the |
---|
534 | 432 | * @slab_caches_to_rcu_destroy list. The slab pages are freed |
---|
535 | | - * through RCU and and the associated kmem_cache are dereferenced |
---|
| 433 | + * through RCU and the associated kmem_cache are dereferenced |
---|
536 | 434 | * while freeing the pages, so the kmem_caches should be freed only |
---|
537 | 435 | * after the pending RCU operations are finished. As rcu_barrier() |
---|
538 | 436 | * is a pretty slow operation, we batch all pending destructions |
---|
.. | .. |
---|
548 | 446 | rcu_barrier(); |
---|
549 | 447 | |
---|
550 | 448 | list_for_each_entry_safe(s, s2, &to_destroy, list) { |
---|
| 449 | + debugfs_slab_release(s); |
---|
| 450 | + kfence_shutdown_cache(s); |
---|
551 | 451 | #ifdef SLAB_SUPPORTS_SYSFS |
---|
552 | 452 | sysfs_slab_release(s); |
---|
553 | 453 | #else |
---|
.. | .. |
---|
564 | 464 | if (__kmem_cache_shutdown(s) != 0) |
---|
565 | 465 | return -EBUSY; |
---|
566 | 466 | |
---|
567 | | - memcg_unlink_cache(s); |
---|
568 | 467 | list_del(&s->list); |
---|
569 | 468 | |
---|
570 | 469 | if (s->flags & SLAB_TYPESAFE_BY_RCU) { |
---|
.. | .. |
---|
574 | 473 | list_add_tail(&s->list, &slab_caches_to_rcu_destroy); |
---|
575 | 474 | schedule_work(&slab_caches_to_rcu_destroy_work); |
---|
576 | 475 | } else { |
---|
| 476 | + kfence_shutdown_cache(s); |
---|
| 477 | + debugfs_slab_release(s); |
---|
577 | 478 | #ifdef SLAB_SUPPORTS_SYSFS |
---|
578 | 479 | sysfs_slab_unlink(s); |
---|
579 | 480 | sysfs_slab_release(s); |
---|
.. | .. |
---|
585 | 486 | return 0; |
---|
586 | 487 | } |
---|
587 | 488 | |
---|
588 | | -#ifdef CONFIG_MEMCG_KMEM |
---|
589 | | -/* |
---|
590 | | - * memcg_create_kmem_cache - Create a cache for a memory cgroup. |
---|
591 | | - * @memcg: The memory cgroup the new cache is for. |
---|
592 | | - * @root_cache: The parent of the new cache. |
---|
593 | | - * |
---|
594 | | - * This function attempts to create a kmem cache that will serve allocation |
---|
595 | | - * requests going from @memcg to @root_cache. The new cache inherits properties |
---|
596 | | - * from its parent. |
---|
597 | | - */ |
---|
598 | | -void memcg_create_kmem_cache(struct mem_cgroup *memcg, |
---|
599 | | - struct kmem_cache *root_cache) |
---|
600 | | -{ |
---|
601 | | - static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */ |
---|
602 | | - struct cgroup_subsys_state *css = &memcg->css; |
---|
603 | | - struct memcg_cache_array *arr; |
---|
604 | | - struct kmem_cache *s = NULL; |
---|
605 | | - char *cache_name; |
---|
606 | | - int idx; |
---|
607 | | - |
---|
608 | | - get_online_cpus(); |
---|
609 | | - get_online_mems(); |
---|
610 | | - |
---|
611 | | - mutex_lock(&slab_mutex); |
---|
612 | | - |
---|
613 | | - /* |
---|
614 | | - * The memory cgroup could have been offlined while the cache |
---|
615 | | - * creation work was pending. |
---|
616 | | - */ |
---|
617 | | - if (memcg->kmem_state != KMEM_ONLINE || root_cache->memcg_params.dying) |
---|
618 | | - goto out_unlock; |
---|
619 | | - |
---|
620 | | - idx = memcg_cache_id(memcg); |
---|
621 | | - arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches, |
---|
622 | | - lockdep_is_held(&slab_mutex)); |
---|
623 | | - |
---|
624 | | - /* |
---|
625 | | - * Since per-memcg caches are created asynchronously on first |
---|
626 | | - * allocation (see memcg_kmem_get_cache()), several threads can try to |
---|
627 | | - * create the same cache, but only one of them may succeed. |
---|
628 | | - */ |
---|
629 | | - if (arr->entries[idx]) |
---|
630 | | - goto out_unlock; |
---|
631 | | - |
---|
632 | | - cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf)); |
---|
633 | | - cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name, |
---|
634 | | - css->serial_nr, memcg_name_buf); |
---|
635 | | - if (!cache_name) |
---|
636 | | - goto out_unlock; |
---|
637 | | - |
---|
638 | | - s = create_cache(cache_name, root_cache->object_size, |
---|
639 | | - root_cache->align, |
---|
640 | | - root_cache->flags & CACHE_CREATE_MASK, |
---|
641 | | - root_cache->useroffset, root_cache->usersize, |
---|
642 | | - root_cache->ctor, memcg, root_cache); |
---|
643 | | - /* |
---|
644 | | - * If we could not create a memcg cache, do not complain, because |
---|
645 | | - * that's not critical at all as we can always proceed with the root |
---|
646 | | - * cache. |
---|
647 | | - */ |
---|
648 | | - if (IS_ERR(s)) { |
---|
649 | | - kfree(cache_name); |
---|
650 | | - goto out_unlock; |
---|
651 | | - } |
---|
652 | | - |
---|
653 | | - /* |
---|
654 | | - * Since readers won't lock (see cache_from_memcg_idx()), we need a |
---|
655 | | - * barrier here to ensure nobody will see the kmem_cache partially |
---|
656 | | - * initialized. |
---|
657 | | - */ |
---|
658 | | - smp_wmb(); |
---|
659 | | - arr->entries[idx] = s; |
---|
660 | | - |
---|
661 | | -out_unlock: |
---|
662 | | - mutex_unlock(&slab_mutex); |
---|
663 | | - |
---|
664 | | - put_online_mems(); |
---|
665 | | - put_online_cpus(); |
---|
666 | | -} |
---|
667 | | - |
---|
668 | | -static void kmemcg_deactivate_workfn(struct work_struct *work) |
---|
669 | | -{ |
---|
670 | | - struct kmem_cache *s = container_of(work, struct kmem_cache, |
---|
671 | | - memcg_params.deact_work); |
---|
672 | | - |
---|
673 | | - get_online_cpus(); |
---|
674 | | - get_online_mems(); |
---|
675 | | - |
---|
676 | | - mutex_lock(&slab_mutex); |
---|
677 | | - |
---|
678 | | - s->memcg_params.deact_fn(s); |
---|
679 | | - |
---|
680 | | - mutex_unlock(&slab_mutex); |
---|
681 | | - |
---|
682 | | - put_online_mems(); |
---|
683 | | - put_online_cpus(); |
---|
684 | | - |
---|
685 | | - /* done, put the ref from slab_deactivate_memcg_cache_rcu_sched() */ |
---|
686 | | - css_put(&s->memcg_params.memcg->css); |
---|
687 | | -} |
---|
688 | | - |
---|
689 | | -static void kmemcg_deactivate_rcufn(struct rcu_head *head) |
---|
690 | | -{ |
---|
691 | | - struct kmem_cache *s = container_of(head, struct kmem_cache, |
---|
692 | | - memcg_params.deact_rcu_head); |
---|
693 | | - |
---|
694 | | - /* |
---|
695 | | - * We need to grab blocking locks. Bounce to ->deact_work. The |
---|
696 | | - * work item shares the space with the RCU head and can't be |
---|
697 | | - * initialized eariler. |
---|
698 | | - */ |
---|
699 | | - INIT_WORK(&s->memcg_params.deact_work, kmemcg_deactivate_workfn); |
---|
700 | | - queue_work(memcg_kmem_cache_wq, &s->memcg_params.deact_work); |
---|
701 | | -} |
---|
702 | | - |
---|
703 | | -/** |
---|
704 | | - * slab_deactivate_memcg_cache_rcu_sched - schedule deactivation after a |
---|
705 | | - * sched RCU grace period |
---|
706 | | - * @s: target kmem_cache |
---|
707 | | - * @deact_fn: deactivation function to call |
---|
708 | | - * |
---|
709 | | - * Schedule @deact_fn to be invoked with online cpus, mems and slab_mutex |
---|
710 | | - * held after a sched RCU grace period. The slab is guaranteed to stay |
---|
711 | | - * alive until @deact_fn is finished. This is to be used from |
---|
712 | | - * __kmemcg_cache_deactivate(). |
---|
713 | | - */ |
---|
714 | | -void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s, |
---|
715 | | - void (*deact_fn)(struct kmem_cache *)) |
---|
716 | | -{ |
---|
717 | | - if (WARN_ON_ONCE(is_root_cache(s)) || |
---|
718 | | - WARN_ON_ONCE(s->memcg_params.deact_fn)) |
---|
719 | | - return; |
---|
720 | | - |
---|
721 | | - /* |
---|
722 | | - * memcg_kmem_wq_lock is used to synchronize memcg_params.dying |
---|
723 | | - * flag and make sure that no new kmem_cache deactivation tasks |
---|
724 | | - * are queued (see flush_memcg_workqueue() ). |
---|
725 | | - */ |
---|
726 | | - spin_lock_irq(&memcg_kmem_wq_lock); |
---|
727 | | - if (s->memcg_params.root_cache->memcg_params.dying) |
---|
728 | | - goto unlock; |
---|
729 | | - |
---|
730 | | - /* pin memcg so that @s doesn't get destroyed in the middle */ |
---|
731 | | - css_get(&s->memcg_params.memcg->css); |
---|
732 | | - |
---|
733 | | - s->memcg_params.deact_fn = deact_fn; |
---|
734 | | - call_rcu_sched(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn); |
---|
735 | | -unlock: |
---|
736 | | - spin_unlock_irq(&memcg_kmem_wq_lock); |
---|
737 | | -} |
---|
738 | | - |
---|
739 | | -void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) |
---|
740 | | -{ |
---|
741 | | - int idx; |
---|
742 | | - struct memcg_cache_array *arr; |
---|
743 | | - struct kmem_cache *s, *c; |
---|
744 | | - |
---|
745 | | - idx = memcg_cache_id(memcg); |
---|
746 | | - |
---|
747 | | - get_online_cpus(); |
---|
748 | | - get_online_mems(); |
---|
749 | | - |
---|
750 | | - mutex_lock(&slab_mutex); |
---|
751 | | - list_for_each_entry(s, &slab_root_caches, root_caches_node) { |
---|
752 | | - arr = rcu_dereference_protected(s->memcg_params.memcg_caches, |
---|
753 | | - lockdep_is_held(&slab_mutex)); |
---|
754 | | - c = arr->entries[idx]; |
---|
755 | | - if (!c) |
---|
756 | | - continue; |
---|
757 | | - |
---|
758 | | - __kmemcg_cache_deactivate(c); |
---|
759 | | - arr->entries[idx] = NULL; |
---|
760 | | - } |
---|
761 | | - mutex_unlock(&slab_mutex); |
---|
762 | | - |
---|
763 | | - put_online_mems(); |
---|
764 | | - put_online_cpus(); |
---|
765 | | -} |
---|
766 | | - |
---|
767 | | -void memcg_destroy_kmem_caches(struct mem_cgroup *memcg) |
---|
768 | | -{ |
---|
769 | | - struct kmem_cache *s, *s2; |
---|
770 | | - |
---|
771 | | - get_online_cpus(); |
---|
772 | | - get_online_mems(); |
---|
773 | | - |
---|
774 | | - mutex_lock(&slab_mutex); |
---|
775 | | - list_for_each_entry_safe(s, s2, &memcg->kmem_caches, |
---|
776 | | - memcg_params.kmem_caches_node) { |
---|
777 | | - /* |
---|
778 | | - * The cgroup is about to be freed and therefore has no charges |
---|
779 | | - * left. Hence, all its caches must be empty by now. |
---|
780 | | - */ |
---|
781 | | - BUG_ON(shutdown_cache(s)); |
---|
782 | | - } |
---|
783 | | - mutex_unlock(&slab_mutex); |
---|
784 | | - |
---|
785 | | - put_online_mems(); |
---|
786 | | - put_online_cpus(); |
---|
787 | | -} |
---|
788 | | - |
---|
789 | | -static int shutdown_memcg_caches(struct kmem_cache *s) |
---|
790 | | -{ |
---|
791 | | - struct memcg_cache_array *arr; |
---|
792 | | - struct kmem_cache *c, *c2; |
---|
793 | | - LIST_HEAD(busy); |
---|
794 | | - int i; |
---|
795 | | - |
---|
796 | | - BUG_ON(!is_root_cache(s)); |
---|
797 | | - |
---|
798 | | - /* |
---|
799 | | - * First, shutdown active caches, i.e. caches that belong to online |
---|
800 | | - * memory cgroups. |
---|
801 | | - */ |
---|
802 | | - arr = rcu_dereference_protected(s->memcg_params.memcg_caches, |
---|
803 | | - lockdep_is_held(&slab_mutex)); |
---|
804 | | - for_each_memcg_cache_index(i) { |
---|
805 | | - c = arr->entries[i]; |
---|
806 | | - if (!c) |
---|
807 | | - continue; |
---|
808 | | - if (shutdown_cache(c)) |
---|
809 | | - /* |
---|
810 | | - * The cache still has objects. Move it to a temporary |
---|
811 | | - * list so as not to try to destroy it for a second |
---|
812 | | - * time while iterating over inactive caches below. |
---|
813 | | - */ |
---|
814 | | - list_move(&c->memcg_params.children_node, &busy); |
---|
815 | | - else |
---|
816 | | - /* |
---|
817 | | - * The cache is empty and will be destroyed soon. Clear |
---|
818 | | - * the pointer to it in the memcg_caches array so that |
---|
819 | | - * it will never be accessed even if the root cache |
---|
820 | | - * stays alive. |
---|
821 | | - */ |
---|
822 | | - arr->entries[i] = NULL; |
---|
823 | | - } |
---|
824 | | - |
---|
825 | | - /* |
---|
826 | | - * Second, shutdown all caches left from memory cgroups that are now |
---|
827 | | - * offline. |
---|
828 | | - */ |
---|
829 | | - list_for_each_entry_safe(c, c2, &s->memcg_params.children, |
---|
830 | | - memcg_params.children_node) |
---|
831 | | - shutdown_cache(c); |
---|
832 | | - |
---|
833 | | - list_splice(&busy, &s->memcg_params.children); |
---|
834 | | - |
---|
835 | | - /* |
---|
836 | | - * A cache being destroyed must be empty. In particular, this means |
---|
837 | | - * that all per memcg caches attached to it must be empty too. |
---|
838 | | - */ |
---|
839 | | - if (!list_empty(&s->memcg_params.children)) |
---|
840 | | - return -EBUSY; |
---|
841 | | - return 0; |
---|
842 | | -} |
---|
843 | | - |
---|
844 | | -static void memcg_set_kmem_cache_dying(struct kmem_cache *s) |
---|
845 | | -{ |
---|
846 | | - spin_lock_irq(&memcg_kmem_wq_lock); |
---|
847 | | - s->memcg_params.dying = true; |
---|
848 | | - spin_unlock_irq(&memcg_kmem_wq_lock); |
---|
849 | | -} |
---|
850 | | - |
---|
851 | | -static void flush_memcg_workqueue(struct kmem_cache *s) |
---|
852 | | -{ |
---|
853 | | - /* |
---|
854 | | - * SLUB deactivates the kmem_caches through call_rcu_sched. Make |
---|
855 | | - * sure all registered rcu callbacks have been invoked. |
---|
856 | | - */ |
---|
857 | | - if (IS_ENABLED(CONFIG_SLUB)) |
---|
858 | | - rcu_barrier_sched(); |
---|
859 | | - |
---|
860 | | - /* |
---|
861 | | - * SLAB and SLUB create memcg kmem_caches through workqueue and SLUB |
---|
862 | | - * deactivates the memcg kmem_caches through workqueue. Make sure all |
---|
863 | | - * previous workitems on workqueue are processed. |
---|
864 | | - */ |
---|
865 | | - if (likely(memcg_kmem_cache_wq)) |
---|
866 | | - flush_workqueue(memcg_kmem_cache_wq); |
---|
867 | | -} |
---|
868 | | -#else |
---|
869 | | -static inline int shutdown_memcg_caches(struct kmem_cache *s) |
---|
870 | | -{ |
---|
871 | | - return 0; |
---|
872 | | -} |
---|
873 | | -#endif /* CONFIG_MEMCG_KMEM */ |
---|
874 | | - |
---|
875 | 489 | void slab_kmem_cache_release(struct kmem_cache *s) |
---|
876 | 490 | { |
---|
877 | 491 | __kmem_cache_release(s); |
---|
878 | | - destroy_memcg_params(s); |
---|
879 | 492 | kfree_const(s->name); |
---|
880 | 493 | kmem_cache_free(kmem_cache, s); |
---|
881 | 494 | } |
---|
.. | .. |
---|
896 | 509 | if (s->refcount) |
---|
897 | 510 | goto out_unlock; |
---|
898 | 511 | |
---|
899 | | -#ifdef CONFIG_MEMCG_KMEM |
---|
900 | | - memcg_set_kmem_cache_dying(s); |
---|
901 | | - |
---|
902 | | - mutex_unlock(&slab_mutex); |
---|
903 | | - |
---|
904 | | - put_online_mems(); |
---|
905 | | - put_online_cpus(); |
---|
906 | | - |
---|
907 | | - flush_memcg_workqueue(s); |
---|
908 | | - |
---|
909 | | - get_online_cpus(); |
---|
910 | | - get_online_mems(); |
---|
911 | | - |
---|
912 | | - mutex_lock(&slab_mutex); |
---|
913 | | - |
---|
914 | | - /* |
---|
915 | | - * Another thread referenced it again |
---|
916 | | - */ |
---|
917 | | - if (READ_ONCE(s->refcount)) { |
---|
918 | | - spin_lock_irq(&memcg_kmem_wq_lock); |
---|
919 | | - s->memcg_params.dying = false; |
---|
920 | | - spin_unlock_irq(&memcg_kmem_wq_lock); |
---|
921 | | - goto out_unlock; |
---|
922 | | - } |
---|
923 | | -#endif |
---|
924 | | - |
---|
925 | | - err = shutdown_memcg_caches(s); |
---|
926 | | - if (!err) |
---|
927 | | - err = shutdown_cache(s); |
---|
928 | | - |
---|
| 512 | + err = shutdown_cache(s); |
---|
929 | 513 | if (err) { |
---|
930 | 514 | pr_err("kmem_cache_destroy %s: Slab cache still has objects\n", |
---|
931 | 515 | s->name); |
---|
.. | .. |
---|
945 | 529 | * |
---|
946 | 530 | * Releases as many slabs as possible for a cache. |
---|
947 | 531 | * To help debugging, a zero exit status indicates all slabs were released. |
---|
| 532 | + * |
---|
| 533 | + * Return: %0 if all slabs were released, non-zero otherwise |
---|
948 | 534 | */ |
---|
949 | 535 | int kmem_cache_shrink(struct kmem_cache *cachep) |
---|
950 | 536 | { |
---|
.. | .. |
---|
972 | 558 | unsigned int useroffset, unsigned int usersize) |
---|
973 | 559 | { |
---|
974 | 560 | int err; |
---|
| 561 | + unsigned int align = ARCH_KMALLOC_MINALIGN; |
---|
975 | 562 | |
---|
976 | 563 | s->name = name; |
---|
977 | 564 | s->size = s->object_size = size; |
---|
978 | | - s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); |
---|
| 565 | + |
---|
| 566 | + /* |
---|
| 567 | + * For power of two sizes, guarantee natural alignment for kmalloc |
---|
| 568 | + * caches, regardless of SL*B debugging options. |
---|
| 569 | + */ |
---|
| 570 | + if (is_power_of_2(size)) |
---|
| 571 | + align = max(align, size); |
---|
| 572 | + s->align = calculate_alignment(flags, align, size); |
---|
| 573 | + |
---|
979 | 574 | s->useroffset = useroffset; |
---|
980 | 575 | s->usersize = usersize; |
---|
981 | | - |
---|
982 | | - slab_init_memcg_params(s); |
---|
983 | 576 | |
---|
984 | 577 | err = __kmem_cache_create(s, flags); |
---|
985 | 578 | |
---|
.. | .. |
---|
1000 | 593 | panic("Out of memory when creating slab %s\n", name); |
---|
1001 | 594 | |
---|
1002 | 595 | create_boot_cache(s, name, size, flags, useroffset, usersize); |
---|
| 596 | + kasan_cache_create_kmalloc(s); |
---|
1003 | 597 | list_add(&s->list, &slab_caches); |
---|
1004 | | - memcg_link_cache(s); |
---|
1005 | 598 | s->refcount = 1; |
---|
1006 | 599 | return s; |
---|
1007 | 600 | } |
---|
1008 | 601 | |
---|
1009 | 602 | struct kmem_cache * |
---|
1010 | | -kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init; |
---|
| 603 | +kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init = |
---|
| 604 | +{ /* initialization for https://bugs.llvm.org/show_bug.cgi?id=42570 */ }; |
---|
1011 | 605 | EXPORT_SYMBOL(kmalloc_caches); |
---|
1012 | 606 | |
---|
1013 | 607 | /* |
---|
.. | .. |
---|
1055 | 649 | struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) |
---|
1056 | 650 | { |
---|
1057 | 651 | unsigned int index; |
---|
| 652 | + struct kmem_cache *s = NULL; |
---|
1058 | 653 | |
---|
1059 | 654 | if (size <= 192) { |
---|
1060 | 655 | if (!size) |
---|
.. | .. |
---|
1062 | 657 | |
---|
1063 | 658 | index = size_index[size_index_elem(size)]; |
---|
1064 | 659 | } else { |
---|
1065 | | - if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { |
---|
1066 | | - WARN_ON(1); |
---|
| 660 | + if (WARN_ON_ONCE(size > KMALLOC_MAX_CACHE_SIZE)) |
---|
1067 | 661 | return NULL; |
---|
1068 | | - } |
---|
1069 | 662 | index = fls(size - 1); |
---|
1070 | 663 | } |
---|
1071 | 664 | |
---|
| 665 | + trace_android_vh_kmalloc_slab(index, flags, &s); |
---|
| 666 | + if (s) |
---|
| 667 | + return s; |
---|
| 668 | + |
---|
1072 | 669 | return kmalloc_caches[kmalloc_type(flags)][index]; |
---|
1073 | 670 | } |
---|
| 671 | + |
---|
| 672 | +#ifdef CONFIG_ZONE_DMA |
---|
| 673 | +#define INIT_KMALLOC_INFO(__size, __short_size) \ |
---|
| 674 | +{ \ |
---|
| 675 | + .name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \ |
---|
| 676 | + .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size, \ |
---|
| 677 | + .name[KMALLOC_DMA] = "dma-kmalloc-" #__short_size, \ |
---|
| 678 | + .size = __size, \ |
---|
| 679 | +} |
---|
| 680 | +#else |
---|
| 681 | +#define INIT_KMALLOC_INFO(__size, __short_size) \ |
---|
| 682 | +{ \ |
---|
| 683 | + .name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \ |
---|
| 684 | + .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size, \ |
---|
| 685 | + .size = __size, \ |
---|
| 686 | +} |
---|
| 687 | +#endif |
---|
1074 | 688 | |
---|
1075 | 689 | /* |
---|
1076 | 690 | * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time. |
---|
.. | .. |
---|
1078 | 692 | * kmalloc-67108864. |
---|
1079 | 693 | */ |
---|
1080 | 694 | const struct kmalloc_info_struct kmalloc_info[] __initconst = { |
---|
1081 | | - {NULL, 0}, {"kmalloc-96", 96}, |
---|
1082 | | - {"kmalloc-192", 192}, {"kmalloc-8", 8}, |
---|
1083 | | - {"kmalloc-16", 16}, {"kmalloc-32", 32}, |
---|
1084 | | - {"kmalloc-64", 64}, {"kmalloc-128", 128}, |
---|
1085 | | - {"kmalloc-256", 256}, {"kmalloc-512", 512}, |
---|
1086 | | - {"kmalloc-1k", 1024}, {"kmalloc-2k", 2048}, |
---|
1087 | | - {"kmalloc-4k", 4096}, {"kmalloc-8k", 8192}, |
---|
1088 | | - {"kmalloc-16k", 16384}, {"kmalloc-32k", 32768}, |
---|
1089 | | - {"kmalloc-64k", 65536}, {"kmalloc-128k", 131072}, |
---|
1090 | | - {"kmalloc-256k", 262144}, {"kmalloc-512k", 524288}, |
---|
1091 | | - {"kmalloc-1M", 1048576}, {"kmalloc-2M", 2097152}, |
---|
1092 | | - {"kmalloc-4M", 4194304}, {"kmalloc-8M", 8388608}, |
---|
1093 | | - {"kmalloc-16M", 16777216}, {"kmalloc-32M", 33554432}, |
---|
1094 | | - {"kmalloc-64M", 67108864} |
---|
| 695 | + INIT_KMALLOC_INFO(0, 0), |
---|
| 696 | + INIT_KMALLOC_INFO(96, 96), |
---|
| 697 | + INIT_KMALLOC_INFO(192, 192), |
---|
| 698 | + INIT_KMALLOC_INFO(8, 8), |
---|
| 699 | + INIT_KMALLOC_INFO(16, 16), |
---|
| 700 | + INIT_KMALLOC_INFO(32, 32), |
---|
| 701 | + INIT_KMALLOC_INFO(64, 64), |
---|
| 702 | + INIT_KMALLOC_INFO(128, 128), |
---|
| 703 | + INIT_KMALLOC_INFO(256, 256), |
---|
| 704 | + INIT_KMALLOC_INFO(512, 512), |
---|
| 705 | + INIT_KMALLOC_INFO(1024, 1k), |
---|
| 706 | + INIT_KMALLOC_INFO(2048, 2k), |
---|
| 707 | + INIT_KMALLOC_INFO(4096, 4k), |
---|
| 708 | + INIT_KMALLOC_INFO(8192, 8k), |
---|
| 709 | + INIT_KMALLOC_INFO(16384, 16k), |
---|
| 710 | + INIT_KMALLOC_INFO(32768, 32k), |
---|
| 711 | + INIT_KMALLOC_INFO(65536, 64k), |
---|
| 712 | + INIT_KMALLOC_INFO(131072, 128k), |
---|
| 713 | + INIT_KMALLOC_INFO(262144, 256k), |
---|
| 714 | + INIT_KMALLOC_INFO(524288, 512k), |
---|
| 715 | + INIT_KMALLOC_INFO(1048576, 1M), |
---|
| 716 | + INIT_KMALLOC_INFO(2097152, 2M), |
---|
| 717 | + INIT_KMALLOC_INFO(4194304, 4M), |
---|
| 718 | + INIT_KMALLOC_INFO(8388608, 8M), |
---|
| 719 | + INIT_KMALLOC_INFO(16777216, 16M), |
---|
| 720 | + INIT_KMALLOC_INFO(33554432, 32M), |
---|
| 721 | + INIT_KMALLOC_INFO(67108864, 64M) |
---|
1095 | 722 | }; |
---|
1096 | 723 | |
---|
1097 | 724 | /* |
---|
.. | .. |
---|
1141 | 768 | } |
---|
1142 | 769 | } |
---|
1143 | 770 | |
---|
1144 | | -static const char * |
---|
1145 | | -kmalloc_cache_name(const char *prefix, unsigned int size) |
---|
1146 | | -{ |
---|
1147 | | - |
---|
1148 | | - static const char units[3] = "\0kM"; |
---|
1149 | | - int idx = 0; |
---|
1150 | | - |
---|
1151 | | - while (size >= 1024 && (size % 1024 == 0)) { |
---|
1152 | | - size /= 1024; |
---|
1153 | | - idx++; |
---|
1154 | | - } |
---|
1155 | | - |
---|
1156 | | - return kasprintf(GFP_NOWAIT, "%s-%u%c", prefix, size, units[idx]); |
---|
1157 | | -} |
---|
1158 | | - |
---|
1159 | 771 | static void __init |
---|
1160 | | -new_kmalloc_cache(int idx, int type, slab_flags_t flags) |
---|
| 772 | +new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags) |
---|
1161 | 773 | { |
---|
1162 | | - const char *name; |
---|
1163 | | - |
---|
1164 | | - if (type == KMALLOC_RECLAIM) { |
---|
| 774 | + if (type == KMALLOC_RECLAIM) |
---|
1165 | 775 | flags |= SLAB_RECLAIM_ACCOUNT; |
---|
1166 | | - name = kmalloc_cache_name("kmalloc-rcl", |
---|
1167 | | - kmalloc_info[idx].size); |
---|
1168 | | - BUG_ON(!name); |
---|
1169 | | - } else { |
---|
1170 | | - name = kmalloc_info[idx].name; |
---|
1171 | | - } |
---|
1172 | 776 | |
---|
1173 | | - kmalloc_caches[type][idx] = create_kmalloc_cache(name, |
---|
| 777 | + kmalloc_caches[type][idx] = create_kmalloc_cache( |
---|
| 778 | + kmalloc_info[idx].name[type], |
---|
1174 | 779 | kmalloc_info[idx].size, flags, 0, |
---|
1175 | 780 | kmalloc_info[idx].size); |
---|
1176 | 781 | } |
---|
.. | .. |
---|
1182 | 787 | */ |
---|
1183 | 788 | void __init create_kmalloc_caches(slab_flags_t flags) |
---|
1184 | 789 | { |
---|
1185 | | - int i, type; |
---|
| 790 | + int i; |
---|
| 791 | + enum kmalloc_cache_type type; |
---|
1186 | 792 | |
---|
1187 | 793 | for (type = KMALLOC_NORMAL; type <= KMALLOC_RECLAIM; type++) { |
---|
1188 | 794 | for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { |
---|
.. | .. |
---|
1211 | 817 | struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i]; |
---|
1212 | 818 | |
---|
1213 | 819 | if (s) { |
---|
1214 | | - unsigned int size = kmalloc_size(i); |
---|
1215 | | - const char *n = kmalloc_cache_name("dma-kmalloc", size); |
---|
1216 | | - |
---|
1217 | | - BUG_ON(!n); |
---|
1218 | 820 | kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache( |
---|
1219 | | - n, size, SLAB_CACHE_DMA | flags, 0, 0); |
---|
| 821 | + kmalloc_info[i].name[KMALLOC_DMA], |
---|
| 822 | + kmalloc_info[i].size, |
---|
| 823 | + SLAB_CACHE_DMA | flags, 0, |
---|
| 824 | + kmalloc_info[i].size); |
---|
1220 | 825 | } |
---|
1221 | 826 | } |
---|
1222 | 827 | #endif |
---|
1223 | 828 | } |
---|
1224 | 829 | #endif /* !CONFIG_SLOB */ |
---|
| 830 | + |
---|
| 831 | +gfp_t kmalloc_fix_flags(gfp_t flags) |
---|
| 832 | +{ |
---|
| 833 | + gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK; |
---|
| 834 | + |
---|
| 835 | + flags &= ~GFP_SLAB_BUG_MASK; |
---|
| 836 | + pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n", |
---|
| 837 | + invalid_mask, &invalid_mask, flags, &flags); |
---|
| 838 | + dump_stack(); |
---|
| 839 | + |
---|
| 840 | + return flags; |
---|
| 841 | +} |
---|
1225 | 842 | |
---|
1226 | 843 | /* |
---|
1227 | 844 | * To avoid unnecessary overhead, we pass through large allocation requests |
---|
.. | .. |
---|
1230 | 847 | */ |
---|
1231 | 848 | void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) |
---|
1232 | 849 | { |
---|
1233 | | - void *ret; |
---|
| 850 | + void *ret = NULL; |
---|
1234 | 851 | struct page *page; |
---|
| 852 | + |
---|
| 853 | + if (unlikely(flags & GFP_SLAB_BUG_MASK)) |
---|
| 854 | + flags = kmalloc_fix_flags(flags); |
---|
1235 | 855 | |
---|
1236 | 856 | flags |= __GFP_COMP; |
---|
1237 | 857 | page = alloc_pages(flags, order); |
---|
1238 | | - ret = page ? page_address(page) : NULL; |
---|
| 858 | + if (likely(page)) { |
---|
| 859 | + ret = page_address(page); |
---|
| 860 | + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, |
---|
| 861 | + PAGE_SIZE << order); |
---|
| 862 | + } |
---|
1239 | 863 | ret = kasan_kmalloc_large(ret, size, flags); |
---|
| 864 | + /* As ret might get tagged, call kmemleak hook after KASAN. */ |
---|
1240 | 865 | kmemleak_alloc(ret, size, 1, flags); |
---|
1241 | 866 | return ret; |
---|
1242 | 867 | } |
---|
.. | .. |
---|
1330 | 955 | void *slab_start(struct seq_file *m, loff_t *pos) |
---|
1331 | 956 | { |
---|
1332 | 957 | mutex_lock(&slab_mutex); |
---|
1333 | | - return seq_list_start(&slab_root_caches, *pos); |
---|
| 958 | + return seq_list_start(&slab_caches, *pos); |
---|
1334 | 959 | } |
---|
1335 | 960 | |
---|
1336 | 961 | void *slab_next(struct seq_file *m, void *p, loff_t *pos) |
---|
1337 | 962 | { |
---|
1338 | | - return seq_list_next(p, &slab_root_caches, pos); |
---|
| 963 | + return seq_list_next(p, &slab_caches, pos); |
---|
1339 | 964 | } |
---|
1340 | 965 | |
---|
1341 | 966 | void slab_stop(struct seq_file *m, void *p) |
---|
1342 | 967 | { |
---|
1343 | 968 | mutex_unlock(&slab_mutex); |
---|
1344 | | -} |
---|
1345 | | - |
---|
1346 | | -static void |
---|
1347 | | -memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info) |
---|
1348 | | -{ |
---|
1349 | | - struct kmem_cache *c; |
---|
1350 | | - struct slabinfo sinfo; |
---|
1351 | | - |
---|
1352 | | - if (!is_root_cache(s)) |
---|
1353 | | - return; |
---|
1354 | | - |
---|
1355 | | - for_each_memcg_cache(c, s) { |
---|
1356 | | - memset(&sinfo, 0, sizeof(sinfo)); |
---|
1357 | | - get_slabinfo(c, &sinfo); |
---|
1358 | | - |
---|
1359 | | - info->active_slabs += sinfo.active_slabs; |
---|
1360 | | - info->num_slabs += sinfo.num_slabs; |
---|
1361 | | - info->shared_avail += sinfo.shared_avail; |
---|
1362 | | - info->active_objs += sinfo.active_objs; |
---|
1363 | | - info->num_objs += sinfo.num_objs; |
---|
1364 | | - } |
---|
1365 | 969 | } |
---|
1366 | 970 | |
---|
1367 | 971 | static void cache_show(struct kmem_cache *s, struct seq_file *m) |
---|
.. | .. |
---|
1371 | 975 | memset(&sinfo, 0, sizeof(sinfo)); |
---|
1372 | 976 | get_slabinfo(s, &sinfo); |
---|
1373 | 977 | |
---|
1374 | | - memcg_accumulate_slabinfo(s, &sinfo); |
---|
1375 | | - |
---|
1376 | 978 | seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", |
---|
1377 | | - cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size, |
---|
| 979 | + s->name, sinfo.active_objs, sinfo.num_objs, s->size, |
---|
1378 | 980 | sinfo.objects_per_slab, (1 << sinfo.cache_order)); |
---|
1379 | 981 | |
---|
1380 | 982 | seq_printf(m, " : tunables %4u %4u %4u", |
---|
.. | .. |
---|
1387 | 989 | |
---|
1388 | 990 | static int slab_show(struct seq_file *m, void *p) |
---|
1389 | 991 | { |
---|
1390 | | - struct kmem_cache *s = list_entry(p, struct kmem_cache, root_caches_node); |
---|
| 992 | + struct kmem_cache *s = list_entry(p, struct kmem_cache, list); |
---|
1391 | 993 | |
---|
1392 | | - if (p == slab_root_caches.next) |
---|
| 994 | + if (p == slab_caches.next) |
---|
1393 | 995 | print_slabinfo_header(m); |
---|
1394 | 996 | cache_show(s, m); |
---|
1395 | 997 | return 0; |
---|
.. | .. |
---|
1416 | 1018 | pr_info("Name Used Total\n"); |
---|
1417 | 1019 | |
---|
1418 | 1020 | list_for_each_entry_safe(s, s2, &slab_caches, list) { |
---|
1419 | | - if (!is_root_cache(s) || (s->flags & SLAB_RECLAIM_ACCOUNT)) |
---|
| 1021 | + if (s->flags & SLAB_RECLAIM_ACCOUNT) |
---|
1420 | 1022 | continue; |
---|
1421 | 1023 | |
---|
1422 | 1024 | get_slabinfo(s, &sinfo); |
---|
1423 | 1025 | |
---|
1424 | 1026 | if (sinfo.num_objs > 0) |
---|
1425 | | - pr_info("%-17s %10luKB %10luKB\n", cache_name(s), |
---|
| 1027 | + pr_info("%-17s %10luKB %10luKB\n", s->name, |
---|
1426 | 1028 | (sinfo.active_objs * s->size) / 1024, |
---|
1427 | 1029 | (sinfo.num_objs * s->size) / 1024); |
---|
1428 | 1030 | } |
---|
1429 | 1031 | mutex_unlock(&slab_mutex); |
---|
1430 | 1032 | } |
---|
1431 | 1033 | |
---|
1432 | | -#if defined(CONFIG_MEMCG) |
---|
1433 | | -void *memcg_slab_start(struct seq_file *m, loff_t *pos) |
---|
1434 | | -{ |
---|
1435 | | - struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
---|
1436 | | - |
---|
1437 | | - mutex_lock(&slab_mutex); |
---|
1438 | | - return seq_list_start(&memcg->kmem_caches, *pos); |
---|
1439 | | -} |
---|
1440 | | - |
---|
1441 | | -void *memcg_slab_next(struct seq_file *m, void *p, loff_t *pos) |
---|
1442 | | -{ |
---|
1443 | | - struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
---|
1444 | | - |
---|
1445 | | - return seq_list_next(p, &memcg->kmem_caches, pos); |
---|
1446 | | -} |
---|
1447 | | - |
---|
1448 | | -void memcg_slab_stop(struct seq_file *m, void *p) |
---|
1449 | | -{ |
---|
1450 | | - mutex_unlock(&slab_mutex); |
---|
1451 | | -} |
---|
1452 | | - |
---|
| 1034 | +#if defined(CONFIG_MEMCG_KMEM) |
---|
1453 | 1035 | int memcg_slab_show(struct seq_file *m, void *p) |
---|
1454 | 1036 | { |
---|
1455 | | - struct kmem_cache *s = list_entry(p, struct kmem_cache, |
---|
1456 | | - memcg_params.kmem_caches_node); |
---|
1457 | | - struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
---|
1458 | | - |
---|
1459 | | - if (p == memcg->kmem_caches.next) |
---|
1460 | | - print_slabinfo_header(m); |
---|
1461 | | - cache_show(s, m); |
---|
| 1037 | + /* |
---|
| 1038 | + * Deprecated. |
---|
| 1039 | + * Please, take a look at tools/cgroup/slabinfo.py . |
---|
| 1040 | + */ |
---|
1462 | 1041 | return 0; |
---|
1463 | 1042 | } |
---|
1464 | 1043 | #endif |
---|
.. | .. |
---|
1488 | 1067 | return seq_open(file, &slabinfo_op); |
---|
1489 | 1068 | } |
---|
1490 | 1069 | |
---|
1491 | | -static const struct file_operations proc_slabinfo_operations = { |
---|
1492 | | - .open = slabinfo_open, |
---|
1493 | | - .read = seq_read, |
---|
1494 | | - .write = slabinfo_write, |
---|
1495 | | - .llseek = seq_lseek, |
---|
1496 | | - .release = seq_release, |
---|
| 1070 | +static const struct proc_ops slabinfo_proc_ops = { |
---|
| 1071 | + .proc_flags = PROC_ENTRY_PERMANENT, |
---|
| 1072 | + .proc_open = slabinfo_open, |
---|
| 1073 | + .proc_read = seq_read, |
---|
| 1074 | + .proc_write = slabinfo_write, |
---|
| 1075 | + .proc_lseek = seq_lseek, |
---|
| 1076 | + .proc_release = seq_release, |
---|
1497 | 1077 | }; |
---|
1498 | 1078 | |
---|
1499 | 1079 | static int __init slab_proc_init(void) |
---|
1500 | 1080 | { |
---|
1501 | | - proc_create("slabinfo", SLABINFO_RIGHTS, NULL, |
---|
1502 | | - &proc_slabinfo_operations); |
---|
| 1081 | + proc_create("slabinfo", SLABINFO_RIGHTS, NULL, &slabinfo_proc_ops); |
---|
1503 | 1082 | return 0; |
---|
1504 | 1083 | } |
---|
1505 | 1084 | module_init(slab_proc_init); |
---|
| 1085 | + |
---|
1506 | 1086 | #endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */ |
---|
1507 | 1087 | |
---|
1508 | 1088 | static __always_inline void *__do_krealloc(const void *p, size_t new_size, |
---|
1509 | 1089 | gfp_t flags) |
---|
1510 | 1090 | { |
---|
1511 | 1091 | void *ret; |
---|
1512 | | - size_t ks = 0; |
---|
| 1092 | + size_t ks; |
---|
1513 | 1093 | |
---|
1514 | | - if (p) |
---|
1515 | | - ks = ksize(p); |
---|
| 1094 | + /* Don't use instrumented ksize to allow precise KASAN poisoning. */ |
---|
| 1095 | + if (likely(!ZERO_OR_NULL_PTR(p))) { |
---|
| 1096 | + if (!kasan_check_byte(p)) |
---|
| 1097 | + return NULL; |
---|
| 1098 | + ks = kfence_ksize(p) ?: __ksize(p); |
---|
| 1099 | + } else |
---|
| 1100 | + ks = 0; |
---|
1516 | 1101 | |
---|
| 1102 | + /* If the object still fits, repoison it precisely. */ |
---|
1517 | 1103 | if (ks >= new_size) { |
---|
1518 | 1104 | p = kasan_krealloc((void *)p, new_size, flags); |
---|
1519 | 1105 | return (void *)p; |
---|
1520 | 1106 | } |
---|
1521 | 1107 | |
---|
1522 | 1108 | ret = kmalloc_track_caller(new_size, flags); |
---|
1523 | | - if (ret && p) |
---|
1524 | | - memcpy(ret, p, ks); |
---|
| 1109 | + if (ret && p) { |
---|
| 1110 | + /* Disable KASAN checks as the object's redzone is accessed. */ |
---|
| 1111 | + kasan_disable_current(); |
---|
| 1112 | + memcpy(ret, kasan_reset_tag(p), ks); |
---|
| 1113 | + kasan_enable_current(); |
---|
| 1114 | + } |
---|
1525 | 1115 | |
---|
1526 | 1116 | return ret; |
---|
1527 | 1117 | } |
---|
1528 | | - |
---|
1529 | | -/** |
---|
1530 | | - * __krealloc - like krealloc() but don't free @p. |
---|
1531 | | - * @p: object to reallocate memory for. |
---|
1532 | | - * @new_size: how many bytes of memory are required. |
---|
1533 | | - * @flags: the type of memory to allocate. |
---|
1534 | | - * |
---|
1535 | | - * This function is like krealloc() except it never frees the originally |
---|
1536 | | - * allocated buffer. Use this if you don't want to free the buffer immediately |
---|
1537 | | - * like, for example, with RCU. |
---|
1538 | | - */ |
---|
1539 | | -void *__krealloc(const void *p, size_t new_size, gfp_t flags) |
---|
1540 | | -{ |
---|
1541 | | - if (unlikely(!new_size)) |
---|
1542 | | - return ZERO_SIZE_PTR; |
---|
1543 | | - |
---|
1544 | | - return __do_krealloc(p, new_size, flags); |
---|
1545 | | - |
---|
1546 | | -} |
---|
1547 | | -EXPORT_SYMBOL(__krealloc); |
---|
1548 | 1118 | |
---|
1549 | 1119 | /** |
---|
1550 | 1120 | * krealloc - reallocate memory. The contents will remain unchanged. |
---|
.. | .. |
---|
1556 | 1126 | * lesser of the new and old sizes. If @p is %NULL, krealloc() |
---|
1557 | 1127 | * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a |
---|
1558 | 1128 | * %NULL pointer, the object pointed to is freed. |
---|
| 1129 | + * |
---|
| 1130 | + * Return: pointer to the allocated memory or %NULL in case of error |
---|
1559 | 1131 | */ |
---|
1560 | 1132 | void *krealloc(const void *p, size_t new_size, gfp_t flags) |
---|
1561 | 1133 | { |
---|
.. | .. |
---|
1575 | 1147 | EXPORT_SYMBOL(krealloc); |
---|
1576 | 1148 | |
---|
1577 | 1149 | /** |
---|
1578 | | - * kzfree - like kfree but zero memory |
---|
| 1150 | + * kfree_sensitive - Clear sensitive information in memory before freeing |
---|
1579 | 1151 | * @p: object to free memory of |
---|
1580 | 1152 | * |
---|
1581 | 1153 | * The memory of the object @p points to is zeroed before freed. |
---|
1582 | | - * If @p is %NULL, kzfree() does nothing. |
---|
| 1154 | + * If @p is %NULL, kfree_sensitive() does nothing. |
---|
1583 | 1155 | * |
---|
1584 | 1156 | * Note: this function zeroes the whole allocated buffer which can be a good |
---|
1585 | 1157 | * deal bigger than the requested buffer size passed to kmalloc(). So be |
---|
1586 | 1158 | * careful when using this function in performance sensitive code. |
---|
1587 | 1159 | */ |
---|
1588 | | -void kzfree(const void *p) |
---|
| 1160 | +void kfree_sensitive(const void *p) |
---|
1589 | 1161 | { |
---|
1590 | 1162 | size_t ks; |
---|
1591 | 1163 | void *mem = (void *)p; |
---|
1592 | 1164 | |
---|
1593 | | - if (unlikely(ZERO_OR_NULL_PTR(mem))) |
---|
1594 | | - return; |
---|
1595 | 1165 | ks = ksize(mem); |
---|
1596 | | - memzero_explicit(mem, ks); |
---|
| 1166 | + if (ks) |
---|
| 1167 | + memzero_explicit(mem, ks); |
---|
1597 | 1168 | kfree(mem); |
---|
1598 | 1169 | } |
---|
1599 | | -EXPORT_SYMBOL(kzfree); |
---|
| 1170 | +EXPORT_SYMBOL(kfree_sensitive); |
---|
| 1171 | + |
---|
| 1172 | +/** |
---|
| 1173 | + * ksize - get the actual amount of memory allocated for a given object |
---|
| 1174 | + * @objp: Pointer to the object |
---|
| 1175 | + * |
---|
| 1176 | + * kmalloc may internally round up allocations and return more memory |
---|
| 1177 | + * than requested. ksize() can be used to determine the actual amount of |
---|
| 1178 | + * memory allocated. The caller may use this additional memory, even though |
---|
| 1179 | + * a smaller amount of memory was initially specified with the kmalloc call. |
---|
| 1180 | + * The caller must guarantee that objp points to a valid object previously |
---|
| 1181 | + * allocated with either kmalloc() or kmem_cache_alloc(). The object |
---|
| 1182 | + * must not be freed during the duration of the call. |
---|
| 1183 | + * |
---|
| 1184 | + * Return: size of the actual memory used by @objp in bytes |
---|
| 1185 | + */ |
---|
| 1186 | +size_t ksize(const void *objp) |
---|
| 1187 | +{ |
---|
| 1188 | + size_t size; |
---|
| 1189 | + |
---|
| 1190 | + /* |
---|
| 1191 | + * We need to first check that the pointer to the object is valid, and |
---|
| 1192 | + * only then unpoison the memory. The report printed from ksize() is |
---|
| 1193 | + * more useful, then when it's printed later when the behaviour could |
---|
| 1194 | + * be undefined due to a potential use-after-free or double-free. |
---|
| 1195 | + * |
---|
| 1196 | + * We use kasan_check_byte(), which is supported for the hardware |
---|
| 1197 | + * tag-based KASAN mode, unlike kasan_check_read/write(). |
---|
| 1198 | + * |
---|
| 1199 | + * If the pointed to memory is invalid, we return 0 to avoid users of |
---|
| 1200 | + * ksize() writing to and potentially corrupting the memory region. |
---|
| 1201 | + * |
---|
| 1202 | + * We want to perform the check before __ksize(), to avoid potentially |
---|
| 1203 | + * crashing in __ksize() due to accessing invalid metadata. |
---|
| 1204 | + */ |
---|
| 1205 | + if (unlikely(ZERO_OR_NULL_PTR(objp)) || !kasan_check_byte(objp)) |
---|
| 1206 | + return 0; |
---|
| 1207 | + |
---|
| 1208 | + size = kfence_ksize(objp) ?: __ksize(objp); |
---|
| 1209 | + /* |
---|
| 1210 | + * We assume that ksize callers could use whole allocated area, |
---|
| 1211 | + * so we need to unpoison this area. |
---|
| 1212 | + */ |
---|
| 1213 | + kasan_unpoison_range(objp, size); |
---|
| 1214 | + return size; |
---|
| 1215 | +} |
---|
| 1216 | +EXPORT_SYMBOL(ksize); |
---|
1600 | 1217 | |
---|
1601 | 1218 | /* Tracepoints definitions. */ |
---|
1602 | 1219 | EXPORT_TRACEPOINT_SYMBOL(kmalloc); |
---|