.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * mm/percpu.c - percpu memory allocator |
---|
3 | 4 | * |
---|
.. | .. |
---|
5 | 6 | * Copyright (C) 2009 Tejun Heo <tj@kernel.org> |
---|
6 | 7 | * |
---|
7 | 8 | * Copyright (C) 2017 Facebook Inc. |
---|
8 | | - * Copyright (C) 2017 Dennis Zhou <dennisszhou@gmail.com> |
---|
9 | | - * |
---|
10 | | - * This file is released under the GPLv2 license. |
---|
| 9 | + * Copyright (C) 2017 Dennis Zhou <dennis@kernel.org> |
---|
11 | 10 | * |
---|
12 | 11 | * The percpu allocator handles both static and dynamic areas. Percpu |
---|
13 | 12 | * areas are allocated in chunks which are divided into units. There is |
---|
.. | .. |
---|
38 | 37 | * takes care of normal allocations. |
---|
39 | 38 | * |
---|
40 | 39 | * The allocator organizes chunks into lists according to free size and |
---|
41 | | - * tries to allocate from the fullest chunk first. Each chunk is managed |
---|
42 | | - * by a bitmap with metadata blocks. The allocation map is updated on |
---|
43 | | - * every allocation and free to reflect the current state while the boundary |
---|
| 40 | + * memcg-awareness. To make a percpu allocation memcg-aware the __GFP_ACCOUNT |
---|
| 41 | + * flag should be passed. All memcg-aware allocations are sharing one set |
---|
| 42 | + * of chunks and all unaccounted allocations and allocations performed |
---|
| 43 | + * by processes belonging to the root memory cgroup are using the second set. |
---|
| 44 | + * |
---|
| 45 | + * The allocator tries to allocate from the fullest chunk first. Each chunk |
---|
| 46 | + * is managed by a bitmap with metadata blocks. The allocation map is updated |
---|
| 47 | + * on every allocation and free to reflect the current state while the boundary |
---|
44 | 48 | * map is only updated on allocation. Each metadata block contains |
---|
45 | 49 | * information to help mitigate the need to iterate over large portions |
---|
46 | 50 | * of the bitmap. The reverse mapping from page to chunk is stored in |
---|
.. | .. |
---|
65 | 69 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
---|
66 | 70 | |
---|
67 | 71 | #include <linux/bitmap.h> |
---|
68 | | -#include <linux/bootmem.h> |
---|
| 72 | +#include <linux/memblock.h> |
---|
69 | 73 | #include <linux/err.h> |
---|
70 | 74 | #include <linux/lcm.h> |
---|
71 | 75 | #include <linux/list.h> |
---|
.. | .. |
---|
81 | 85 | #include <linux/workqueue.h> |
---|
82 | 86 | #include <linux/kmemleak.h> |
---|
83 | 87 | #include <linux/sched.h> |
---|
| 88 | +#include <linux/sched/mm.h> |
---|
| 89 | +#include <linux/memcontrol.h> |
---|
84 | 90 | |
---|
85 | 91 | #include <asm/cacheflush.h> |
---|
86 | 92 | #include <asm/sections.h> |
---|
.. | .. |
---|
94 | 100 | |
---|
95 | 101 | /* the slots are sorted by free bytes left, 1-31 bytes share the same slot */ |
---|
96 | 102 | #define PCPU_SLOT_BASE_SHIFT 5 |
---|
| 103 | +/* chunks in slots below this are subject to being sidelined on failed alloc */ |
---|
| 104 | +#define PCPU_SLOT_FAIL_THRESHOLD 3 |
---|
97 | 105 | |
---|
98 | 106 | #define PCPU_EMPTY_POP_PAGES_LOW 2 |
---|
99 | 107 | #define PCPU_EMPTY_POP_PAGES_HIGH 4 |
---|
.. | .. |
---|
158 | 166 | DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */ |
---|
159 | 167 | static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop, map ext */ |
---|
160 | 168 | |
---|
161 | | -struct list_head *pcpu_slot __ro_after_init; /* chunk list slots */ |
---|
| 169 | +struct list_head *pcpu_chunk_lists __ro_after_init; /* chunk list slots */ |
---|
162 | 170 | |
---|
163 | 171 | /* chunks which need their map areas extended, protected by pcpu_lock */ |
---|
164 | 172 | static LIST_HEAD(pcpu_map_extend_chunks); |
---|
165 | 173 | |
---|
166 | 174 | /* |
---|
167 | | - * The number of empty populated pages, protected by pcpu_lock. The |
---|
168 | | - * reserved chunk doesn't contribute to the count. |
---|
| 175 | + * The number of empty populated pages by chunk type, protected by pcpu_lock. |
---|
| 176 | + * The reserved chunk doesn't contribute to the count. |
---|
169 | 177 | */ |
---|
170 | | -int pcpu_nr_empty_pop_pages; |
---|
| 178 | +int pcpu_nr_empty_pop_pages[PCPU_NR_CHUNK_TYPES]; |
---|
171 | 179 | |
---|
172 | 180 | /* |
---|
173 | 181 | * The number of populated pages in use by the allocator, protected by |
---|
.. | .. |
---|
231 | 239 | |
---|
232 | 240 | static int pcpu_chunk_slot(const struct pcpu_chunk *chunk) |
---|
233 | 241 | { |
---|
234 | | - if (chunk->free_bytes < PCPU_MIN_ALLOC_SIZE || chunk->contig_bits == 0) |
---|
| 242 | + const struct pcpu_block_md *chunk_md = &chunk->chunk_md; |
---|
| 243 | + |
---|
| 244 | + if (chunk->free_bytes < PCPU_MIN_ALLOC_SIZE || |
---|
| 245 | + chunk_md->contig_hint == 0) |
---|
235 | 246 | return 0; |
---|
236 | 247 | |
---|
237 | | - return pcpu_size_to_slot(chunk->free_bytes); |
---|
| 248 | + return pcpu_size_to_slot(chunk_md->contig_hint * PCPU_MIN_ALLOC_SIZE); |
---|
238 | 249 | } |
---|
239 | 250 | |
---|
240 | 251 | /* set the pointer to a chunk in a page struct */ |
---|
.. | .. |
---|
266 | 277 | pcpu_unit_page_offset(cpu, page_idx); |
---|
267 | 278 | } |
---|
268 | 279 | |
---|
269 | | -static void pcpu_next_unpop(unsigned long *bitmap, int *rs, int *re, int end) |
---|
270 | | -{ |
---|
271 | | - *rs = find_next_zero_bit(bitmap, end, *rs); |
---|
272 | | - *re = find_next_bit(bitmap, end, *rs + 1); |
---|
273 | | -} |
---|
274 | | - |
---|
275 | | -static void pcpu_next_pop(unsigned long *bitmap, int *rs, int *re, int end) |
---|
276 | | -{ |
---|
277 | | - *rs = find_next_bit(bitmap, end, *rs); |
---|
278 | | - *re = find_next_zero_bit(bitmap, end, *rs + 1); |
---|
279 | | -} |
---|
280 | | - |
---|
281 | | -/* |
---|
282 | | - * Bitmap region iterators. Iterates over the bitmap between |
---|
283 | | - * [@start, @end) in @chunk. @rs and @re should be integer variables |
---|
284 | | - * and will be set to start and end index of the current free region. |
---|
285 | | - */ |
---|
286 | | -#define pcpu_for_each_unpop_region(bitmap, rs, re, start, end) \ |
---|
287 | | - for ((rs) = (start), pcpu_next_unpop((bitmap), &(rs), &(re), (end)); \ |
---|
288 | | - (rs) < (re); \ |
---|
289 | | - (rs) = (re) + 1, pcpu_next_unpop((bitmap), &(rs), &(re), (end))) |
---|
290 | | - |
---|
291 | | -#define pcpu_for_each_pop_region(bitmap, rs, re, start, end) \ |
---|
292 | | - for ((rs) = (start), pcpu_next_pop((bitmap), &(rs), &(re), (end)); \ |
---|
293 | | - (rs) < (re); \ |
---|
294 | | - (rs) = (re) + 1, pcpu_next_pop((bitmap), &(rs), &(re), (end))) |
---|
295 | | - |
---|
296 | 280 | /* |
---|
297 | 281 | * The following are helper functions to help access bitmaps and convert |
---|
298 | 282 | * between bitmap offsets to address offsets. |
---|
.. | .. |
---|
316 | 300 | static unsigned long pcpu_block_off_to_off(int index, int off) |
---|
317 | 301 | { |
---|
318 | 302 | return index * PCPU_BITMAP_BLOCK_BITS + off; |
---|
| 303 | +} |
---|
| 304 | + |
---|
| 305 | +/* |
---|
| 306 | + * pcpu_next_hint - determine which hint to use |
---|
| 307 | + * @block: block of interest |
---|
| 308 | + * @alloc_bits: size of allocation |
---|
| 309 | + * |
---|
| 310 | + * This determines if we should scan based on the scan_hint or first_free. |
---|
| 311 | + * In general, we want to scan from first_free to fulfill allocations by |
---|
| 312 | + * first fit. However, if we know a scan_hint at position scan_hint_start |
---|
| 313 | + * cannot fulfill an allocation, we can begin scanning from there knowing |
---|
| 314 | + * the contig_hint will be our fallback. |
---|
| 315 | + */ |
---|
| 316 | +static int pcpu_next_hint(struct pcpu_block_md *block, int alloc_bits) |
---|
| 317 | +{ |
---|
| 318 | + /* |
---|
| 319 | + * The three conditions below determine if we can skip past the |
---|
| 320 | + * scan_hint. First, does the scan hint exist. Second, is the |
---|
| 321 | + * contig_hint after the scan_hint (possibly not true iff |
---|
| 322 | + * contig_hint == scan_hint). Third, is the allocation request |
---|
| 323 | + * larger than the scan_hint. |
---|
| 324 | + */ |
---|
| 325 | + if (block->scan_hint && |
---|
| 326 | + block->contig_hint_start > block->scan_hint_start && |
---|
| 327 | + alloc_bits > block->scan_hint) |
---|
| 328 | + return block->scan_hint_start + block->scan_hint; |
---|
| 329 | + |
---|
| 330 | + return block->first_free; |
---|
319 | 331 | } |
---|
320 | 332 | |
---|
321 | 333 | /** |
---|
.. | .. |
---|
413 | 425 | if (block->contig_hint && |
---|
414 | 426 | block->contig_hint_start >= block_off && |
---|
415 | 427 | block->contig_hint >= *bits + alloc_bits) { |
---|
| 428 | + int start = pcpu_next_hint(block, alloc_bits); |
---|
| 429 | + |
---|
416 | 430 | *bits += alloc_bits + block->contig_hint_start - |
---|
417 | | - block->first_free; |
---|
418 | | - *bit_off = pcpu_block_off_to_off(i, block->first_free); |
---|
| 431 | + start; |
---|
| 432 | + *bit_off = pcpu_block_off_to_off(i, start); |
---|
419 | 433 | return; |
---|
420 | 434 | } |
---|
421 | 435 | /* reset to satisfy the second predicate above */ |
---|
.. | .. |
---|
474 | 488 | if (size <= PAGE_SIZE) |
---|
475 | 489 | return kzalloc(size, gfp); |
---|
476 | 490 | else |
---|
477 | | - return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL); |
---|
| 491 | + return __vmalloc(size, gfp | __GFP_ZERO); |
---|
478 | 492 | } |
---|
479 | 493 | |
---|
480 | 494 | /** |
---|
.. | .. |
---|
486 | 500 | static void pcpu_mem_free(void *ptr) |
---|
487 | 501 | { |
---|
488 | 502 | kvfree(ptr); |
---|
| 503 | +} |
---|
| 504 | + |
---|
| 505 | +static void __pcpu_chunk_move(struct pcpu_chunk *chunk, int slot, |
---|
| 506 | + bool move_front) |
---|
| 507 | +{ |
---|
| 508 | + if (chunk != pcpu_reserved_chunk) { |
---|
| 509 | + struct list_head *pcpu_slot; |
---|
| 510 | + |
---|
| 511 | + pcpu_slot = pcpu_chunk_list(pcpu_chunk_type(chunk)); |
---|
| 512 | + if (move_front) |
---|
| 513 | + list_move(&chunk->list, &pcpu_slot[slot]); |
---|
| 514 | + else |
---|
| 515 | + list_move_tail(&chunk->list, &pcpu_slot[slot]); |
---|
| 516 | + } |
---|
| 517 | +} |
---|
| 518 | + |
---|
| 519 | +static void pcpu_chunk_move(struct pcpu_chunk *chunk, int slot) |
---|
| 520 | +{ |
---|
| 521 | + __pcpu_chunk_move(chunk, slot, true); |
---|
489 | 522 | } |
---|
490 | 523 | |
---|
491 | 524 | /** |
---|
.. | .. |
---|
505 | 538 | { |
---|
506 | 539 | int nslot = pcpu_chunk_slot(chunk); |
---|
507 | 540 | |
---|
508 | | - if (chunk != pcpu_reserved_chunk && oslot != nslot) { |
---|
509 | | - if (oslot < nslot) |
---|
510 | | - list_move(&chunk->list, &pcpu_slot[nslot]); |
---|
511 | | - else |
---|
512 | | - list_move_tail(&chunk->list, &pcpu_slot[nslot]); |
---|
513 | | - } |
---|
| 541 | + if (oslot != nslot) |
---|
| 542 | + __pcpu_chunk_move(chunk, nslot, oslot < nslot); |
---|
514 | 543 | } |
---|
515 | 544 | |
---|
516 | | -/** |
---|
517 | | - * pcpu_cnt_pop_pages- counts populated backing pages in range |
---|
| 545 | +/* |
---|
| 546 | + * pcpu_update_empty_pages - update empty page counters |
---|
518 | 547 | * @chunk: chunk of interest |
---|
519 | | - * @bit_off: start offset |
---|
520 | | - * @bits: size of area to check |
---|
| 548 | + * @nr: nr of empty pages |
---|
521 | 549 | * |
---|
522 | | - * Calculates the number of populated pages in the region |
---|
523 | | - * [page_start, page_end). This keeps track of how many empty populated |
---|
524 | | - * pages are available and decide if async work should be scheduled. |
---|
525 | | - * |
---|
526 | | - * RETURNS: |
---|
527 | | - * The nr of populated pages. |
---|
| 550 | + * This is used to keep track of the empty pages now based on the premise |
---|
| 551 | + * a md_block covers a page. The hint update functions recognize if a block |
---|
| 552 | + * is made full or broken to calculate deltas for keeping track of free pages. |
---|
528 | 553 | */ |
---|
529 | | -static inline int pcpu_cnt_pop_pages(struct pcpu_chunk *chunk, int bit_off, |
---|
530 | | - int bits) |
---|
| 554 | +static inline void pcpu_update_empty_pages(struct pcpu_chunk *chunk, int nr) |
---|
531 | 555 | { |
---|
532 | | - int page_start = PFN_UP(bit_off * PCPU_MIN_ALLOC_SIZE); |
---|
533 | | - int page_end = PFN_DOWN((bit_off + bits) * PCPU_MIN_ALLOC_SIZE); |
---|
534 | | - |
---|
535 | | - if (page_start >= page_end) |
---|
536 | | - return 0; |
---|
537 | | - |
---|
538 | | - /* |
---|
539 | | - * bitmap_weight counts the number of bits set in a bitmap up to |
---|
540 | | - * the specified number of bits. This is counting the populated |
---|
541 | | - * pages up to page_end and then subtracting the populated pages |
---|
542 | | - * up to page_start to count the populated pages in |
---|
543 | | - * [page_start, page_end). |
---|
544 | | - */ |
---|
545 | | - return bitmap_weight(chunk->populated, page_end) - |
---|
546 | | - bitmap_weight(chunk->populated, page_start); |
---|
547 | | -} |
---|
548 | | - |
---|
549 | | -/** |
---|
550 | | - * pcpu_chunk_update - updates the chunk metadata given a free area |
---|
551 | | - * @chunk: chunk of interest |
---|
552 | | - * @bit_off: chunk offset |
---|
553 | | - * @bits: size of free area |
---|
554 | | - * |
---|
555 | | - * This updates the chunk's contig hint and starting offset given a free area. |
---|
556 | | - * Choose the best starting offset if the contig hint is equal. |
---|
557 | | - */ |
---|
558 | | -static void pcpu_chunk_update(struct pcpu_chunk *chunk, int bit_off, int bits) |
---|
559 | | -{ |
---|
560 | | - if (bits > chunk->contig_bits) { |
---|
561 | | - chunk->contig_bits_start = bit_off; |
---|
562 | | - chunk->contig_bits = bits; |
---|
563 | | - } else if (bits == chunk->contig_bits && chunk->contig_bits_start && |
---|
564 | | - (!bit_off || |
---|
565 | | - __ffs(bit_off) > __ffs(chunk->contig_bits_start))) { |
---|
566 | | - /* use the start with the best alignment */ |
---|
567 | | - chunk->contig_bits_start = bit_off; |
---|
568 | | - } |
---|
569 | | -} |
---|
570 | | - |
---|
571 | | -/** |
---|
572 | | - * pcpu_chunk_refresh_hint - updates metadata about a chunk |
---|
573 | | - * @chunk: chunk of interest |
---|
574 | | - * |
---|
575 | | - * Iterates over the metadata blocks to find the largest contig area. |
---|
576 | | - * It also counts the populated pages and uses the delta to update the |
---|
577 | | - * global count. |
---|
578 | | - * |
---|
579 | | - * Updates: |
---|
580 | | - * chunk->contig_bits |
---|
581 | | - * chunk->contig_bits_start |
---|
582 | | - * nr_empty_pop_pages (chunk and global) |
---|
583 | | - */ |
---|
584 | | -static void pcpu_chunk_refresh_hint(struct pcpu_chunk *chunk) |
---|
585 | | -{ |
---|
586 | | - int bit_off, bits, nr_empty_pop_pages; |
---|
587 | | - |
---|
588 | | - /* clear metadata */ |
---|
589 | | - chunk->contig_bits = 0; |
---|
590 | | - |
---|
591 | | - bit_off = chunk->first_bit; |
---|
592 | | - bits = nr_empty_pop_pages = 0; |
---|
593 | | - pcpu_for_each_md_free_region(chunk, bit_off, bits) { |
---|
594 | | - pcpu_chunk_update(chunk, bit_off, bits); |
---|
595 | | - |
---|
596 | | - nr_empty_pop_pages += pcpu_cnt_pop_pages(chunk, bit_off, bits); |
---|
597 | | - } |
---|
598 | | - |
---|
599 | | - /* |
---|
600 | | - * Keep track of nr_empty_pop_pages. |
---|
601 | | - * |
---|
602 | | - * The chunk maintains the previous number of free pages it held, |
---|
603 | | - * so the delta is used to update the global counter. The reserved |
---|
604 | | - * chunk is not part of the free page count as they are populated |
---|
605 | | - * at init and are special to serving reserved allocations. |
---|
606 | | - */ |
---|
| 556 | + chunk->nr_empty_pop_pages += nr; |
---|
607 | 557 | if (chunk != pcpu_reserved_chunk) |
---|
608 | | - pcpu_nr_empty_pop_pages += |
---|
609 | | - (nr_empty_pop_pages - chunk->nr_empty_pop_pages); |
---|
| 558 | + pcpu_nr_empty_pop_pages[pcpu_chunk_type(chunk)] += nr; |
---|
| 559 | +} |
---|
610 | 560 | |
---|
611 | | - chunk->nr_empty_pop_pages = nr_empty_pop_pages; |
---|
| 561 | +/* |
---|
| 562 | + * pcpu_region_overlap - determines if two regions overlap |
---|
| 563 | + * @a: start of first region, inclusive |
---|
| 564 | + * @b: end of first region, exclusive |
---|
| 565 | + * @x: start of second region, inclusive |
---|
| 566 | + * @y: end of second region, exclusive |
---|
| 567 | + * |
---|
| 568 | + * This is used to determine if the hint region [a, b) overlaps with the |
---|
| 569 | + * allocated region [x, y). |
---|
| 570 | + */ |
---|
| 571 | +static inline bool pcpu_region_overlap(int a, int b, int x, int y) |
---|
| 572 | +{ |
---|
| 573 | + return (a < y) && (x < b); |
---|
612 | 574 | } |
---|
613 | 575 | |
---|
614 | 576 | /** |
---|
.. | .. |
---|
629 | 591 | if (start == 0) |
---|
630 | 592 | block->left_free = contig; |
---|
631 | 593 | |
---|
632 | | - if (end == PCPU_BITMAP_BLOCK_BITS) |
---|
| 594 | + if (end == block->nr_bits) |
---|
633 | 595 | block->right_free = contig; |
---|
634 | 596 | |
---|
635 | 597 | if (contig > block->contig_hint) { |
---|
| 598 | + /* promote the old contig_hint to be the new scan_hint */ |
---|
| 599 | + if (start > block->contig_hint_start) { |
---|
| 600 | + if (block->contig_hint > block->scan_hint) { |
---|
| 601 | + block->scan_hint_start = |
---|
| 602 | + block->contig_hint_start; |
---|
| 603 | + block->scan_hint = block->contig_hint; |
---|
| 604 | + } else if (start < block->scan_hint_start) { |
---|
| 605 | + /* |
---|
| 606 | + * The old contig_hint == scan_hint. But, the |
---|
| 607 | + * new contig is larger so hold the invariant |
---|
| 608 | + * scan_hint_start < contig_hint_start. |
---|
| 609 | + */ |
---|
| 610 | + block->scan_hint = 0; |
---|
| 611 | + } |
---|
| 612 | + } else { |
---|
| 613 | + block->scan_hint = 0; |
---|
| 614 | + } |
---|
636 | 615 | block->contig_hint_start = start; |
---|
637 | 616 | block->contig_hint = contig; |
---|
638 | | - } else if (block->contig_hint_start && contig == block->contig_hint && |
---|
639 | | - (!start || __ffs(start) > __ffs(block->contig_hint_start))) { |
---|
640 | | - /* use the start with the best alignment */ |
---|
641 | | - block->contig_hint_start = start; |
---|
| 617 | + } else if (contig == block->contig_hint) { |
---|
| 618 | + if (block->contig_hint_start && |
---|
| 619 | + (!start || |
---|
| 620 | + __ffs(start) > __ffs(block->contig_hint_start))) { |
---|
| 621 | + /* start has a better alignment so use it */ |
---|
| 622 | + block->contig_hint_start = start; |
---|
| 623 | + if (start < block->scan_hint_start && |
---|
| 624 | + block->contig_hint > block->scan_hint) |
---|
| 625 | + block->scan_hint = 0; |
---|
| 626 | + } else if (start > block->scan_hint_start || |
---|
| 627 | + block->contig_hint > block->scan_hint) { |
---|
| 628 | + /* |
---|
| 629 | + * Knowing contig == contig_hint, update the scan_hint |
---|
| 630 | + * if it is farther than or larger than the current |
---|
| 631 | + * scan_hint. |
---|
| 632 | + */ |
---|
| 633 | + block->scan_hint_start = start; |
---|
| 634 | + block->scan_hint = contig; |
---|
| 635 | + } |
---|
| 636 | + } else { |
---|
| 637 | + /* |
---|
| 638 | + * The region is smaller than the contig_hint. So only update |
---|
| 639 | + * the scan_hint if it is larger than or equal and farther than |
---|
| 640 | + * the current scan_hint. |
---|
| 641 | + */ |
---|
| 642 | + if ((start < block->contig_hint_start && |
---|
| 643 | + (contig > block->scan_hint || |
---|
| 644 | + (contig == block->scan_hint && |
---|
| 645 | + start > block->scan_hint_start)))) { |
---|
| 646 | + block->scan_hint_start = start; |
---|
| 647 | + block->scan_hint = contig; |
---|
| 648 | + } |
---|
642 | 649 | } |
---|
| 650 | +} |
---|
| 651 | + |
---|
| 652 | +/* |
---|
| 653 | + * pcpu_block_update_scan - update a block given a free area from a scan |
---|
| 654 | + * @chunk: chunk of interest |
---|
| 655 | + * @bit_off: chunk offset |
---|
| 656 | + * @bits: size of free area |
---|
| 657 | + * |
---|
| 658 | + * Finding the final allocation spot first goes through pcpu_find_block_fit() |
---|
| 659 | + * to find a block that can hold the allocation and then pcpu_alloc_area() |
---|
| 660 | + * where a scan is used. When allocations require specific alignments, |
---|
| 661 | + * we can inadvertently create holes which will not be seen in the alloc |
---|
| 662 | + * or free paths. |
---|
| 663 | + * |
---|
| 664 | + * This takes a given free area hole and updates a block as it may change the |
---|
| 665 | + * scan_hint. We need to scan backwards to ensure we don't miss free bits |
---|
| 666 | + * from alignment. |
---|
| 667 | + */ |
---|
| 668 | +static void pcpu_block_update_scan(struct pcpu_chunk *chunk, int bit_off, |
---|
| 669 | + int bits) |
---|
| 670 | +{ |
---|
| 671 | + int s_off = pcpu_off_to_block_off(bit_off); |
---|
| 672 | + int e_off = s_off + bits; |
---|
| 673 | + int s_index, l_bit; |
---|
| 674 | + struct pcpu_block_md *block; |
---|
| 675 | + |
---|
| 676 | + if (e_off > PCPU_BITMAP_BLOCK_BITS) |
---|
| 677 | + return; |
---|
| 678 | + |
---|
| 679 | + s_index = pcpu_off_to_block_index(bit_off); |
---|
| 680 | + block = chunk->md_blocks + s_index; |
---|
| 681 | + |
---|
| 682 | + /* scan backwards in case of alignment skipping free bits */ |
---|
| 683 | + l_bit = find_last_bit(pcpu_index_alloc_map(chunk, s_index), s_off); |
---|
| 684 | + s_off = (s_off == l_bit) ? 0 : l_bit + 1; |
---|
| 685 | + |
---|
| 686 | + pcpu_block_update(block, s_off, e_off); |
---|
| 687 | +} |
---|
| 688 | + |
---|
| 689 | +/** |
---|
| 690 | + * pcpu_chunk_refresh_hint - updates metadata about a chunk |
---|
| 691 | + * @chunk: chunk of interest |
---|
| 692 | + * @full_scan: if we should scan from the beginning |
---|
| 693 | + * |
---|
| 694 | + * Iterates over the metadata blocks to find the largest contig area. |
---|
| 695 | + * A full scan can be avoided on the allocation path as this is triggered |
---|
| 696 | + * if we broke the contig_hint. In doing so, the scan_hint will be before |
---|
| 697 | + * the contig_hint or after if the scan_hint == contig_hint. This cannot |
---|
| 698 | + * be prevented on freeing as we want to find the largest area possibly |
---|
| 699 | + * spanning blocks. |
---|
| 700 | + */ |
---|
| 701 | +static void pcpu_chunk_refresh_hint(struct pcpu_chunk *chunk, bool full_scan) |
---|
| 702 | +{ |
---|
| 703 | + struct pcpu_block_md *chunk_md = &chunk->chunk_md; |
---|
| 704 | + int bit_off, bits; |
---|
| 705 | + |
---|
| 706 | + /* promote scan_hint to contig_hint */ |
---|
| 707 | + if (!full_scan && chunk_md->scan_hint) { |
---|
| 708 | + bit_off = chunk_md->scan_hint_start + chunk_md->scan_hint; |
---|
| 709 | + chunk_md->contig_hint_start = chunk_md->scan_hint_start; |
---|
| 710 | + chunk_md->contig_hint = chunk_md->scan_hint; |
---|
| 711 | + chunk_md->scan_hint = 0; |
---|
| 712 | + } else { |
---|
| 713 | + bit_off = chunk_md->first_free; |
---|
| 714 | + chunk_md->contig_hint = 0; |
---|
| 715 | + } |
---|
| 716 | + |
---|
| 717 | + bits = 0; |
---|
| 718 | + pcpu_for_each_md_free_region(chunk, bit_off, bits) |
---|
| 719 | + pcpu_block_update(chunk_md, bit_off, bit_off + bits); |
---|
643 | 720 | } |
---|
644 | 721 | |
---|
645 | 722 | /** |
---|
.. | .. |
---|
654 | 731 | { |
---|
655 | 732 | struct pcpu_block_md *block = chunk->md_blocks + index; |
---|
656 | 733 | unsigned long *alloc_map = pcpu_index_alloc_map(chunk, index); |
---|
657 | | - int rs, re; /* region start, region end */ |
---|
| 734 | + unsigned int rs, re, start; /* region start, region end */ |
---|
658 | 735 | |
---|
659 | | - /* clear hints */ |
---|
660 | | - block->contig_hint = 0; |
---|
661 | | - block->left_free = block->right_free = 0; |
---|
| 736 | + /* promote scan_hint to contig_hint */ |
---|
| 737 | + if (block->scan_hint) { |
---|
| 738 | + start = block->scan_hint_start + block->scan_hint; |
---|
| 739 | + block->contig_hint_start = block->scan_hint_start; |
---|
| 740 | + block->contig_hint = block->scan_hint; |
---|
| 741 | + block->scan_hint = 0; |
---|
| 742 | + } else { |
---|
| 743 | + start = block->first_free; |
---|
| 744 | + block->contig_hint = 0; |
---|
| 745 | + } |
---|
| 746 | + |
---|
| 747 | + block->right_free = 0; |
---|
662 | 748 | |
---|
663 | 749 | /* iterate over free areas and update the contig hints */ |
---|
664 | | - pcpu_for_each_unpop_region(alloc_map, rs, re, block->first_free, |
---|
665 | | - PCPU_BITMAP_BLOCK_BITS) { |
---|
| 750 | + bitmap_for_each_clear_region(alloc_map, rs, re, start, |
---|
| 751 | + PCPU_BITMAP_BLOCK_BITS) |
---|
666 | 752 | pcpu_block_update(block, rs, re); |
---|
667 | | - } |
---|
668 | 753 | } |
---|
669 | 754 | |
---|
670 | 755 | /** |
---|
.. | .. |
---|
680 | 765 | static void pcpu_block_update_hint_alloc(struct pcpu_chunk *chunk, int bit_off, |
---|
681 | 766 | int bits) |
---|
682 | 767 | { |
---|
| 768 | + struct pcpu_block_md *chunk_md = &chunk->chunk_md; |
---|
| 769 | + int nr_empty_pages = 0; |
---|
683 | 770 | struct pcpu_block_md *s_block, *e_block, *block; |
---|
684 | 771 | int s_index, e_index; /* block indexes of the freed allocation */ |
---|
685 | 772 | int s_off, e_off; /* block offsets of the freed allocation */ |
---|
.. | .. |
---|
704 | 791 | * If the allocation breaks the contig_hint, a scan is required to |
---|
705 | 792 | * restore this hint. |
---|
706 | 793 | */ |
---|
| 794 | + if (s_block->contig_hint == PCPU_BITMAP_BLOCK_BITS) |
---|
| 795 | + nr_empty_pages++; |
---|
| 796 | + |
---|
707 | 797 | if (s_off == s_block->first_free) |
---|
708 | 798 | s_block->first_free = find_next_zero_bit( |
---|
709 | 799 | pcpu_index_alloc_map(chunk, s_index), |
---|
710 | 800 | PCPU_BITMAP_BLOCK_BITS, |
---|
711 | 801 | s_off + bits); |
---|
712 | 802 | |
---|
713 | | - if (s_off >= s_block->contig_hint_start && |
---|
714 | | - s_off < s_block->contig_hint_start + s_block->contig_hint) { |
---|
| 803 | + if (pcpu_region_overlap(s_block->scan_hint_start, |
---|
| 804 | + s_block->scan_hint_start + s_block->scan_hint, |
---|
| 805 | + s_off, |
---|
| 806 | + s_off + bits)) |
---|
| 807 | + s_block->scan_hint = 0; |
---|
| 808 | + |
---|
| 809 | + if (pcpu_region_overlap(s_block->contig_hint_start, |
---|
| 810 | + s_block->contig_hint_start + |
---|
| 811 | + s_block->contig_hint, |
---|
| 812 | + s_off, |
---|
| 813 | + s_off + bits)) { |
---|
715 | 814 | /* block contig hint is broken - scan to fix it */ |
---|
| 815 | + if (!s_off) |
---|
| 816 | + s_block->left_free = 0; |
---|
716 | 817 | pcpu_block_refresh_hint(chunk, s_index); |
---|
717 | 818 | } else { |
---|
718 | 819 | /* update left and right contig manually */ |
---|
.. | .. |
---|
728 | 829 | * Update e_block. |
---|
729 | 830 | */ |
---|
730 | 831 | if (s_index != e_index) { |
---|
| 832 | + if (e_block->contig_hint == PCPU_BITMAP_BLOCK_BITS) |
---|
| 833 | + nr_empty_pages++; |
---|
| 834 | + |
---|
731 | 835 | /* |
---|
732 | 836 | * When the allocation is across blocks, the end is along |
---|
733 | 837 | * the left part of the e_block. |
---|
.. | .. |
---|
740 | 844 | /* reset the block */ |
---|
741 | 845 | e_block++; |
---|
742 | 846 | } else { |
---|
| 847 | + if (e_off > e_block->scan_hint_start) |
---|
| 848 | + e_block->scan_hint = 0; |
---|
| 849 | + |
---|
| 850 | + e_block->left_free = 0; |
---|
743 | 851 | if (e_off > e_block->contig_hint_start) { |
---|
744 | 852 | /* contig hint is broken - scan to fix it */ |
---|
745 | 853 | pcpu_block_refresh_hint(chunk, e_index); |
---|
746 | 854 | } else { |
---|
747 | | - e_block->left_free = 0; |
---|
748 | 855 | e_block->right_free = |
---|
749 | 856 | min_t(int, e_block->right_free, |
---|
750 | 857 | PCPU_BITMAP_BLOCK_BITS - e_off); |
---|
.. | .. |
---|
752 | 859 | } |
---|
753 | 860 | |
---|
754 | 861 | /* update in-between md_blocks */ |
---|
| 862 | + nr_empty_pages += (e_index - s_index - 1); |
---|
755 | 863 | for (block = s_block + 1; block < e_block; block++) { |
---|
| 864 | + block->scan_hint = 0; |
---|
756 | 865 | block->contig_hint = 0; |
---|
757 | 866 | block->left_free = 0; |
---|
758 | 867 | block->right_free = 0; |
---|
759 | 868 | } |
---|
760 | 869 | } |
---|
761 | 870 | |
---|
| 871 | + if (nr_empty_pages) |
---|
| 872 | + pcpu_update_empty_pages(chunk, -nr_empty_pages); |
---|
| 873 | + |
---|
| 874 | + if (pcpu_region_overlap(chunk_md->scan_hint_start, |
---|
| 875 | + chunk_md->scan_hint_start + |
---|
| 876 | + chunk_md->scan_hint, |
---|
| 877 | + bit_off, |
---|
| 878 | + bit_off + bits)) |
---|
| 879 | + chunk_md->scan_hint = 0; |
---|
| 880 | + |
---|
762 | 881 | /* |
---|
763 | 882 | * The only time a full chunk scan is required is if the chunk |
---|
764 | 883 | * contig hint is broken. Otherwise, it means a smaller space |
---|
765 | 884 | * was used and therefore the chunk contig hint is still correct. |
---|
766 | 885 | */ |
---|
767 | | - if (bit_off >= chunk->contig_bits_start && |
---|
768 | | - bit_off < chunk->contig_bits_start + chunk->contig_bits) |
---|
769 | | - pcpu_chunk_refresh_hint(chunk); |
---|
| 886 | + if (pcpu_region_overlap(chunk_md->contig_hint_start, |
---|
| 887 | + chunk_md->contig_hint_start + |
---|
| 888 | + chunk_md->contig_hint, |
---|
| 889 | + bit_off, |
---|
| 890 | + bit_off + bits)) |
---|
| 891 | + pcpu_chunk_refresh_hint(chunk, false); |
---|
770 | 892 | } |
---|
771 | 893 | |
---|
772 | 894 | /** |
---|
.. | .. |
---|
782 | 904 | * |
---|
783 | 905 | * A chunk update is triggered if a page becomes free, a block becomes free, |
---|
784 | 906 | * or the free spans across blocks. This tradeoff is to minimize iterating |
---|
785 | | - * over the block metadata to update chunk->contig_bits. chunk->contig_bits |
---|
786 | | - * may be off by up to a page, but it will never be more than the available |
---|
787 | | - * space. If the contig hint is contained in one block, it will be accurate. |
---|
| 907 | + * over the block metadata to update chunk_md->contig_hint. |
---|
| 908 | + * chunk_md->contig_hint may be off by up to a page, but it will never be more |
---|
| 909 | + * than the available space. If the contig hint is contained in one block, it |
---|
| 910 | + * will be accurate. |
---|
788 | 911 | */ |
---|
789 | 912 | static void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off, |
---|
790 | 913 | int bits) |
---|
791 | 914 | { |
---|
| 915 | + int nr_empty_pages = 0; |
---|
792 | 916 | struct pcpu_block_md *s_block, *e_block, *block; |
---|
793 | 917 | int s_index, e_index; /* block indexes of the freed allocation */ |
---|
794 | 918 | int s_off, e_off; /* block offsets of the freed allocation */ |
---|
.. | .. |
---|
842 | 966 | |
---|
843 | 967 | /* update s_block */ |
---|
844 | 968 | e_off = (s_index == e_index) ? end : PCPU_BITMAP_BLOCK_BITS; |
---|
| 969 | + if (!start && e_off == PCPU_BITMAP_BLOCK_BITS) |
---|
| 970 | + nr_empty_pages++; |
---|
845 | 971 | pcpu_block_update(s_block, start, e_off); |
---|
846 | 972 | |
---|
847 | 973 | /* freeing in the same block */ |
---|
848 | 974 | if (s_index != e_index) { |
---|
849 | 975 | /* update e_block */ |
---|
| 976 | + if (end == PCPU_BITMAP_BLOCK_BITS) |
---|
| 977 | + nr_empty_pages++; |
---|
850 | 978 | pcpu_block_update(e_block, 0, end); |
---|
851 | 979 | |
---|
852 | 980 | /* reset md_blocks in the middle */ |
---|
| 981 | + nr_empty_pages += (e_index - s_index - 1); |
---|
853 | 982 | for (block = s_block + 1; block < e_block; block++) { |
---|
854 | 983 | block->first_free = 0; |
---|
| 984 | + block->scan_hint = 0; |
---|
855 | 985 | block->contig_hint_start = 0; |
---|
856 | 986 | block->contig_hint = PCPU_BITMAP_BLOCK_BITS; |
---|
857 | 987 | block->left_free = PCPU_BITMAP_BLOCK_BITS; |
---|
.. | .. |
---|
859 | 989 | } |
---|
860 | 990 | } |
---|
861 | 991 | |
---|
| 992 | + if (nr_empty_pages) |
---|
| 993 | + pcpu_update_empty_pages(chunk, nr_empty_pages); |
---|
| 994 | + |
---|
862 | 995 | /* |
---|
863 | | - * Refresh chunk metadata when the free makes a page free, a block |
---|
864 | | - * free, or spans across blocks. The contig hint may be off by up to |
---|
865 | | - * a page, but if the hint is contained in a block, it will be accurate |
---|
866 | | - * with the else condition below. |
---|
| 996 | + * Refresh chunk metadata when the free makes a block free or spans |
---|
| 997 | + * across blocks. The contig_hint may be off by up to a page, but if |
---|
| 998 | + * the contig_hint is contained in a block, it will be accurate with |
---|
| 999 | + * the else condition below. |
---|
867 | 1000 | */ |
---|
868 | | - if ((ALIGN_DOWN(end, min(PCPU_BITS_PER_PAGE, PCPU_BITMAP_BLOCK_BITS)) > |
---|
869 | | - ALIGN(start, min(PCPU_BITS_PER_PAGE, PCPU_BITMAP_BLOCK_BITS))) || |
---|
870 | | - s_index != e_index) |
---|
871 | | - pcpu_chunk_refresh_hint(chunk); |
---|
| 1001 | + if (((end - start) >= PCPU_BITMAP_BLOCK_BITS) || s_index != e_index) |
---|
| 1002 | + pcpu_chunk_refresh_hint(chunk, true); |
---|
872 | 1003 | else |
---|
873 | | - pcpu_chunk_update(chunk, pcpu_block_off_to_off(s_index, start), |
---|
874 | | - s_block->contig_hint); |
---|
| 1004 | + pcpu_block_update(&chunk->chunk_md, |
---|
| 1005 | + pcpu_block_off_to_off(s_index, start), |
---|
| 1006 | + end); |
---|
875 | 1007 | } |
---|
876 | 1008 | |
---|
877 | 1009 | /** |
---|
.. | .. |
---|
890 | 1022 | static bool pcpu_is_populated(struct pcpu_chunk *chunk, int bit_off, int bits, |
---|
891 | 1023 | int *next_off) |
---|
892 | 1024 | { |
---|
893 | | - int page_start, page_end, rs, re; |
---|
| 1025 | + unsigned int page_start, page_end, rs, re; |
---|
894 | 1026 | |
---|
895 | 1027 | page_start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE); |
---|
896 | 1028 | page_end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE); |
---|
897 | 1029 | |
---|
898 | 1030 | rs = page_start; |
---|
899 | | - pcpu_next_unpop(chunk->populated, &rs, &re, page_end); |
---|
| 1031 | + bitmap_next_clear_region(chunk->populated, &rs, &re, page_end); |
---|
900 | 1032 | if (rs >= page_end) |
---|
901 | 1033 | return true; |
---|
902 | 1034 | |
---|
.. | .. |
---|
926 | 1058 | static int pcpu_find_block_fit(struct pcpu_chunk *chunk, int alloc_bits, |
---|
927 | 1059 | size_t align, bool pop_only) |
---|
928 | 1060 | { |
---|
| 1061 | + struct pcpu_block_md *chunk_md = &chunk->chunk_md; |
---|
929 | 1062 | int bit_off, bits, next_off; |
---|
930 | 1063 | |
---|
931 | 1064 | /* |
---|
.. | .. |
---|
934 | 1067 | * cannot fit in the global hint, there is memory pressure and creating |
---|
935 | 1068 | * a new chunk would happen soon. |
---|
936 | 1069 | */ |
---|
937 | | - bit_off = ALIGN(chunk->contig_bits_start, align) - |
---|
938 | | - chunk->contig_bits_start; |
---|
939 | | - if (bit_off + alloc_bits > chunk->contig_bits) |
---|
| 1070 | + bit_off = ALIGN(chunk_md->contig_hint_start, align) - |
---|
| 1071 | + chunk_md->contig_hint_start; |
---|
| 1072 | + if (bit_off + alloc_bits > chunk_md->contig_hint) |
---|
940 | 1073 | return -1; |
---|
941 | 1074 | |
---|
942 | | - bit_off = chunk->first_bit; |
---|
| 1075 | + bit_off = pcpu_next_hint(chunk_md, alloc_bits); |
---|
943 | 1076 | bits = 0; |
---|
944 | 1077 | pcpu_for_each_fit_region(chunk, alloc_bits, align, bit_off, bits) { |
---|
945 | 1078 | if (!pop_only || pcpu_is_populated(chunk, bit_off, bits, |
---|
.. | .. |
---|
954 | 1087 | return -1; |
---|
955 | 1088 | |
---|
956 | 1089 | return bit_off; |
---|
| 1090 | +} |
---|
| 1091 | + |
---|
| 1092 | +/* |
---|
| 1093 | + * pcpu_find_zero_area - modified from bitmap_find_next_zero_area_off() |
---|
| 1094 | + * @map: the address to base the search on |
---|
| 1095 | + * @size: the bitmap size in bits |
---|
| 1096 | + * @start: the bitnumber to start searching at |
---|
| 1097 | + * @nr: the number of zeroed bits we're looking for |
---|
| 1098 | + * @align_mask: alignment mask for zero area |
---|
| 1099 | + * @largest_off: offset of the largest area skipped |
---|
| 1100 | + * @largest_bits: size of the largest area skipped |
---|
| 1101 | + * |
---|
| 1102 | + * The @align_mask should be one less than a power of 2. |
---|
| 1103 | + * |
---|
| 1104 | + * This is a modified version of bitmap_find_next_zero_area_off() to remember |
---|
| 1105 | + * the largest area that was skipped. This is imperfect, but in general is |
---|
| 1106 | + * good enough. The largest remembered region is the largest failed region |
---|
| 1107 | + * seen. This does not include anything we possibly skipped due to alignment. |
---|
| 1108 | + * pcpu_block_update_scan() does scan backwards to try and recover what was |
---|
| 1109 | + * lost to alignment. While this can cause scanning to miss earlier possible |
---|
| 1110 | + * free areas, smaller allocations will eventually fill those holes. |
---|
| 1111 | + */ |
---|
| 1112 | +static unsigned long pcpu_find_zero_area(unsigned long *map, |
---|
| 1113 | + unsigned long size, |
---|
| 1114 | + unsigned long start, |
---|
| 1115 | + unsigned long nr, |
---|
| 1116 | + unsigned long align_mask, |
---|
| 1117 | + unsigned long *largest_off, |
---|
| 1118 | + unsigned long *largest_bits) |
---|
| 1119 | +{ |
---|
| 1120 | + unsigned long index, end, i, area_off, area_bits; |
---|
| 1121 | +again: |
---|
| 1122 | + index = find_next_zero_bit(map, size, start); |
---|
| 1123 | + |
---|
| 1124 | + /* Align allocation */ |
---|
| 1125 | + index = __ALIGN_MASK(index, align_mask); |
---|
| 1126 | + area_off = index; |
---|
| 1127 | + |
---|
| 1128 | + end = index + nr; |
---|
| 1129 | + if (end > size) |
---|
| 1130 | + return end; |
---|
| 1131 | + i = find_next_bit(map, end, index); |
---|
| 1132 | + if (i < end) { |
---|
| 1133 | + area_bits = i - area_off; |
---|
| 1134 | + /* remember largest unused area with best alignment */ |
---|
| 1135 | + if (area_bits > *largest_bits || |
---|
| 1136 | + (area_bits == *largest_bits && *largest_off && |
---|
| 1137 | + (!area_off || __ffs(area_off) > __ffs(*largest_off)))) { |
---|
| 1138 | + *largest_off = area_off; |
---|
| 1139 | + *largest_bits = area_bits; |
---|
| 1140 | + } |
---|
| 1141 | + |
---|
| 1142 | + start = i + 1; |
---|
| 1143 | + goto again; |
---|
| 1144 | + } |
---|
| 1145 | + return index; |
---|
957 | 1146 | } |
---|
958 | 1147 | |
---|
959 | 1148 | /** |
---|
.. | .. |
---|
978 | 1167 | static int pcpu_alloc_area(struct pcpu_chunk *chunk, int alloc_bits, |
---|
979 | 1168 | size_t align, int start) |
---|
980 | 1169 | { |
---|
| 1170 | + struct pcpu_block_md *chunk_md = &chunk->chunk_md; |
---|
981 | 1171 | size_t align_mask = (align) ? (align - 1) : 0; |
---|
| 1172 | + unsigned long area_off = 0, area_bits = 0; |
---|
982 | 1173 | int bit_off, end, oslot; |
---|
983 | 1174 | |
---|
984 | 1175 | lockdep_assert_held(&pcpu_lock); |
---|
.. | .. |
---|
990 | 1181 | */ |
---|
991 | 1182 | end = min_t(int, start + alloc_bits + PCPU_BITMAP_BLOCK_BITS, |
---|
992 | 1183 | pcpu_chunk_map_bits(chunk)); |
---|
993 | | - bit_off = bitmap_find_next_zero_area(chunk->alloc_map, end, start, |
---|
994 | | - alloc_bits, align_mask); |
---|
| 1184 | + bit_off = pcpu_find_zero_area(chunk->alloc_map, end, start, alloc_bits, |
---|
| 1185 | + align_mask, &area_off, &area_bits); |
---|
995 | 1186 | if (bit_off >= end) |
---|
996 | 1187 | return -1; |
---|
| 1188 | + |
---|
| 1189 | + if (area_bits) |
---|
| 1190 | + pcpu_block_update_scan(chunk, area_off, area_bits); |
---|
997 | 1191 | |
---|
998 | 1192 | /* update alloc map */ |
---|
999 | 1193 | bitmap_set(chunk->alloc_map, bit_off, alloc_bits); |
---|
.. | .. |
---|
1006 | 1200 | chunk->free_bytes -= alloc_bits * PCPU_MIN_ALLOC_SIZE; |
---|
1007 | 1201 | |
---|
1008 | 1202 | /* update first free bit */ |
---|
1009 | | - if (bit_off == chunk->first_bit) |
---|
1010 | | - chunk->first_bit = find_next_zero_bit( |
---|
| 1203 | + if (bit_off == chunk_md->first_free) |
---|
| 1204 | + chunk_md->first_free = find_next_zero_bit( |
---|
1011 | 1205 | chunk->alloc_map, |
---|
1012 | 1206 | pcpu_chunk_map_bits(chunk), |
---|
1013 | 1207 | bit_off + alloc_bits); |
---|
.. | .. |
---|
1026 | 1220 | * |
---|
1027 | 1221 | * This function determines the size of an allocation to free using |
---|
1028 | 1222 | * the boundary bitmap and clears the allocation map. |
---|
| 1223 | + * |
---|
| 1224 | + * RETURNS: |
---|
| 1225 | + * Number of freed bytes. |
---|
1029 | 1226 | */ |
---|
1030 | | -static void pcpu_free_area(struct pcpu_chunk *chunk, int off) |
---|
| 1227 | +static int pcpu_free_area(struct pcpu_chunk *chunk, int off) |
---|
1031 | 1228 | { |
---|
1032 | | - int bit_off, bits, end, oslot; |
---|
| 1229 | + struct pcpu_block_md *chunk_md = &chunk->chunk_md; |
---|
| 1230 | + int bit_off, bits, end, oslot, freed; |
---|
1033 | 1231 | |
---|
1034 | 1232 | lockdep_assert_held(&pcpu_lock); |
---|
1035 | 1233 | pcpu_stats_area_dealloc(chunk); |
---|
.. | .. |
---|
1044 | 1242 | bits = end - bit_off; |
---|
1045 | 1243 | bitmap_clear(chunk->alloc_map, bit_off, bits); |
---|
1046 | 1244 | |
---|
| 1245 | + freed = bits * PCPU_MIN_ALLOC_SIZE; |
---|
| 1246 | + |
---|
1047 | 1247 | /* update metadata */ |
---|
1048 | | - chunk->free_bytes += bits * PCPU_MIN_ALLOC_SIZE; |
---|
| 1248 | + chunk->free_bytes += freed; |
---|
1049 | 1249 | |
---|
1050 | 1250 | /* update first free bit */ |
---|
1051 | | - chunk->first_bit = min(chunk->first_bit, bit_off); |
---|
| 1251 | + chunk_md->first_free = min(chunk_md->first_free, bit_off); |
---|
1052 | 1252 | |
---|
1053 | 1253 | pcpu_block_update_hint_free(chunk, bit_off, bits); |
---|
1054 | 1254 | |
---|
1055 | 1255 | pcpu_chunk_relocate(chunk, oslot); |
---|
| 1256 | + |
---|
| 1257 | + return freed; |
---|
| 1258 | +} |
---|
| 1259 | + |
---|
| 1260 | +static void pcpu_init_md_block(struct pcpu_block_md *block, int nr_bits) |
---|
| 1261 | +{ |
---|
| 1262 | + block->scan_hint = 0; |
---|
| 1263 | + block->contig_hint = nr_bits; |
---|
| 1264 | + block->left_free = nr_bits; |
---|
| 1265 | + block->right_free = nr_bits; |
---|
| 1266 | + block->first_free = 0; |
---|
| 1267 | + block->nr_bits = nr_bits; |
---|
1056 | 1268 | } |
---|
1057 | 1269 | |
---|
1058 | 1270 | static void pcpu_init_md_blocks(struct pcpu_chunk *chunk) |
---|
1059 | 1271 | { |
---|
1060 | 1272 | struct pcpu_block_md *md_block; |
---|
1061 | 1273 | |
---|
| 1274 | + /* init the chunk's block */ |
---|
| 1275 | + pcpu_init_md_block(&chunk->chunk_md, pcpu_chunk_map_bits(chunk)); |
---|
| 1276 | + |
---|
1062 | 1277 | for (md_block = chunk->md_blocks; |
---|
1063 | 1278 | md_block != chunk->md_blocks + pcpu_chunk_nr_blocks(chunk); |
---|
1064 | | - md_block++) { |
---|
1065 | | - md_block->contig_hint = PCPU_BITMAP_BLOCK_BITS; |
---|
1066 | | - md_block->left_free = PCPU_BITMAP_BLOCK_BITS; |
---|
1067 | | - md_block->right_free = PCPU_BITMAP_BLOCK_BITS; |
---|
1068 | | - } |
---|
| 1279 | + md_block++) |
---|
| 1280 | + pcpu_init_md_block(md_block, PCPU_BITMAP_BLOCK_BITS); |
---|
1069 | 1281 | } |
---|
1070 | 1282 | |
---|
1071 | 1283 | /** |
---|
.. | .. |
---|
1087 | 1299 | struct pcpu_chunk *chunk; |
---|
1088 | 1300 | unsigned long aligned_addr, lcm_align; |
---|
1089 | 1301 | int start_offset, offset_bits, region_size, region_bits; |
---|
| 1302 | + size_t alloc_size; |
---|
1090 | 1303 | |
---|
1091 | 1304 | /* region calculations */ |
---|
1092 | 1305 | aligned_addr = tmp_addr & PAGE_MASK; |
---|
.. | .. |
---|
1102 | 1315 | region_size = ALIGN(start_offset + map_size, lcm_align); |
---|
1103 | 1316 | |
---|
1104 | 1317 | /* allocate chunk */ |
---|
1105 | | - chunk = memblock_virt_alloc(sizeof(struct pcpu_chunk) + |
---|
1106 | | - BITS_TO_LONGS(region_size >> PAGE_SHIFT) * sizeof(unsigned long), |
---|
1107 | | - 0); |
---|
| 1318 | + alloc_size = struct_size(chunk, populated, |
---|
| 1319 | + BITS_TO_LONGS(region_size >> PAGE_SHIFT)); |
---|
| 1320 | + chunk = memblock_alloc(alloc_size, SMP_CACHE_BYTES); |
---|
| 1321 | + if (!chunk) |
---|
| 1322 | + panic("%s: Failed to allocate %zu bytes\n", __func__, |
---|
| 1323 | + alloc_size); |
---|
1108 | 1324 | |
---|
1109 | 1325 | INIT_LIST_HEAD(&chunk->list); |
---|
1110 | 1326 | |
---|
.. | .. |
---|
1115 | 1331 | chunk->nr_pages = region_size >> PAGE_SHIFT; |
---|
1116 | 1332 | region_bits = pcpu_chunk_map_bits(chunk); |
---|
1117 | 1333 | |
---|
1118 | | - chunk->alloc_map = memblock_virt_alloc(BITS_TO_LONGS(region_bits) * |
---|
1119 | | - sizeof(chunk->alloc_map[0]), 0); |
---|
1120 | | - chunk->bound_map = memblock_virt_alloc(BITS_TO_LONGS(region_bits + 1) * |
---|
1121 | | - sizeof(chunk->bound_map[0]), 0); |
---|
1122 | | - chunk->md_blocks = memblock_virt_alloc(pcpu_chunk_nr_blocks(chunk) * |
---|
1123 | | - sizeof(chunk->md_blocks[0]), 0); |
---|
| 1334 | + alloc_size = BITS_TO_LONGS(region_bits) * sizeof(chunk->alloc_map[0]); |
---|
| 1335 | + chunk->alloc_map = memblock_alloc(alloc_size, SMP_CACHE_BYTES); |
---|
| 1336 | + if (!chunk->alloc_map) |
---|
| 1337 | + panic("%s: Failed to allocate %zu bytes\n", __func__, |
---|
| 1338 | + alloc_size); |
---|
| 1339 | + |
---|
| 1340 | + alloc_size = |
---|
| 1341 | + BITS_TO_LONGS(region_bits + 1) * sizeof(chunk->bound_map[0]); |
---|
| 1342 | + chunk->bound_map = memblock_alloc(alloc_size, SMP_CACHE_BYTES); |
---|
| 1343 | + if (!chunk->bound_map) |
---|
| 1344 | + panic("%s: Failed to allocate %zu bytes\n", __func__, |
---|
| 1345 | + alloc_size); |
---|
| 1346 | + |
---|
| 1347 | + alloc_size = pcpu_chunk_nr_blocks(chunk) * sizeof(chunk->md_blocks[0]); |
---|
| 1348 | + chunk->md_blocks = memblock_alloc(alloc_size, SMP_CACHE_BYTES); |
---|
| 1349 | + if (!chunk->md_blocks) |
---|
| 1350 | + panic("%s: Failed to allocate %zu bytes\n", __func__, |
---|
| 1351 | + alloc_size); |
---|
| 1352 | + |
---|
| 1353 | +#ifdef CONFIG_MEMCG_KMEM |
---|
| 1354 | + /* first chunk isn't memcg-aware */ |
---|
| 1355 | + chunk->obj_cgroups = NULL; |
---|
| 1356 | +#endif |
---|
1124 | 1357 | pcpu_init_md_blocks(chunk); |
---|
1125 | 1358 | |
---|
1126 | 1359 | /* manage populated page bitmap */ |
---|
1127 | 1360 | chunk->immutable = true; |
---|
1128 | 1361 | bitmap_fill(chunk->populated, chunk->nr_pages); |
---|
1129 | 1362 | chunk->nr_populated = chunk->nr_pages; |
---|
1130 | | - chunk->nr_empty_pop_pages = |
---|
1131 | | - pcpu_cnt_pop_pages(chunk, start_offset / PCPU_MIN_ALLOC_SIZE, |
---|
1132 | | - map_size / PCPU_MIN_ALLOC_SIZE); |
---|
| 1363 | + chunk->nr_empty_pop_pages = chunk->nr_pages; |
---|
1133 | 1364 | |
---|
1134 | | - chunk->contig_bits = map_size / PCPU_MIN_ALLOC_SIZE; |
---|
1135 | 1365 | chunk->free_bytes = map_size; |
---|
1136 | 1366 | |
---|
1137 | 1367 | if (chunk->start_offset) { |
---|
.. | .. |
---|
1141 | 1371 | set_bit(0, chunk->bound_map); |
---|
1142 | 1372 | set_bit(offset_bits, chunk->bound_map); |
---|
1143 | 1373 | |
---|
1144 | | - chunk->first_bit = offset_bits; |
---|
| 1374 | + chunk->chunk_md.first_free = offset_bits; |
---|
1145 | 1375 | |
---|
1146 | 1376 | pcpu_block_update_hint_alloc(chunk, 0, offset_bits); |
---|
1147 | 1377 | } |
---|
.. | .. |
---|
1163 | 1393 | return chunk; |
---|
1164 | 1394 | } |
---|
1165 | 1395 | |
---|
1166 | | -static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp) |
---|
| 1396 | +static struct pcpu_chunk *pcpu_alloc_chunk(enum pcpu_chunk_type type, gfp_t gfp) |
---|
1167 | 1397 | { |
---|
1168 | 1398 | struct pcpu_chunk *chunk; |
---|
1169 | 1399 | int region_bits; |
---|
.. | .. |
---|
1191 | 1421 | if (!chunk->md_blocks) |
---|
1192 | 1422 | goto md_blocks_fail; |
---|
1193 | 1423 | |
---|
| 1424 | +#ifdef CONFIG_MEMCG_KMEM |
---|
| 1425 | + if (pcpu_is_memcg_chunk(type)) { |
---|
| 1426 | + chunk->obj_cgroups = |
---|
| 1427 | + pcpu_mem_zalloc(pcpu_chunk_map_bits(chunk) * |
---|
| 1428 | + sizeof(struct obj_cgroup *), gfp); |
---|
| 1429 | + if (!chunk->obj_cgroups) |
---|
| 1430 | + goto objcg_fail; |
---|
| 1431 | + } |
---|
| 1432 | +#endif |
---|
| 1433 | + |
---|
1194 | 1434 | pcpu_init_md_blocks(chunk); |
---|
1195 | 1435 | |
---|
1196 | 1436 | /* init metadata */ |
---|
1197 | | - chunk->contig_bits = region_bits; |
---|
1198 | 1437 | chunk->free_bytes = chunk->nr_pages * PAGE_SIZE; |
---|
1199 | 1438 | |
---|
1200 | 1439 | return chunk; |
---|
1201 | 1440 | |
---|
| 1441 | +#ifdef CONFIG_MEMCG_KMEM |
---|
| 1442 | +objcg_fail: |
---|
| 1443 | + pcpu_mem_free(chunk->md_blocks); |
---|
| 1444 | +#endif |
---|
1202 | 1445 | md_blocks_fail: |
---|
1203 | 1446 | pcpu_mem_free(chunk->bound_map); |
---|
1204 | 1447 | bound_map_fail: |
---|
.. | .. |
---|
1213 | 1456 | { |
---|
1214 | 1457 | if (!chunk) |
---|
1215 | 1458 | return; |
---|
| 1459 | +#ifdef CONFIG_MEMCG_KMEM |
---|
| 1460 | + pcpu_mem_free(chunk->obj_cgroups); |
---|
| 1461 | +#endif |
---|
1216 | 1462 | pcpu_mem_free(chunk->md_blocks); |
---|
1217 | 1463 | pcpu_mem_free(chunk->bound_map); |
---|
1218 | 1464 | pcpu_mem_free(chunk->alloc_map); |
---|
.. | .. |
---|
1224 | 1470 | * @chunk: pcpu_chunk which got populated |
---|
1225 | 1471 | * @page_start: the start page |
---|
1226 | 1472 | * @page_end: the end page |
---|
1227 | | - * @for_alloc: if this is to populate for allocation |
---|
1228 | 1473 | * |
---|
1229 | 1474 | * Pages in [@page_start,@page_end) have been populated to @chunk. Update |
---|
1230 | 1475 | * the bookkeeping information accordingly. Must be called after each |
---|
.. | .. |
---|
1234 | 1479 | * is to serve an allocation in that area. |
---|
1235 | 1480 | */ |
---|
1236 | 1481 | static void pcpu_chunk_populated(struct pcpu_chunk *chunk, int page_start, |
---|
1237 | | - int page_end, bool for_alloc) |
---|
| 1482 | + int page_end) |
---|
1238 | 1483 | { |
---|
1239 | 1484 | int nr = page_end - page_start; |
---|
1240 | 1485 | |
---|
.. | .. |
---|
1244 | 1489 | chunk->nr_populated += nr; |
---|
1245 | 1490 | pcpu_nr_populated += nr; |
---|
1246 | 1491 | |
---|
1247 | | - if (!for_alloc) { |
---|
1248 | | - chunk->nr_empty_pop_pages += nr; |
---|
1249 | | - pcpu_nr_empty_pop_pages += nr; |
---|
1250 | | - } |
---|
| 1492 | + pcpu_update_empty_pages(chunk, nr); |
---|
1251 | 1493 | } |
---|
1252 | 1494 | |
---|
1253 | 1495 | /** |
---|
.. | .. |
---|
1269 | 1511 | |
---|
1270 | 1512 | bitmap_clear(chunk->populated, page_start, nr); |
---|
1271 | 1513 | chunk->nr_populated -= nr; |
---|
1272 | | - chunk->nr_empty_pop_pages -= nr; |
---|
1273 | | - pcpu_nr_empty_pop_pages -= nr; |
---|
1274 | 1514 | pcpu_nr_populated -= nr; |
---|
| 1515 | + |
---|
| 1516 | + pcpu_update_empty_pages(chunk, -nr); |
---|
1275 | 1517 | } |
---|
1276 | 1518 | |
---|
1277 | 1519 | /* |
---|
.. | .. |
---|
1293 | 1535 | int page_start, int page_end, gfp_t gfp); |
---|
1294 | 1536 | static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, |
---|
1295 | 1537 | int page_start, int page_end); |
---|
1296 | | -static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp); |
---|
| 1538 | +static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type, |
---|
| 1539 | + gfp_t gfp); |
---|
1297 | 1540 | static void pcpu_destroy_chunk(struct pcpu_chunk *chunk); |
---|
1298 | 1541 | static struct page *pcpu_addr_to_page(void *addr); |
---|
1299 | 1542 | static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); |
---|
.. | .. |
---|
1335 | 1578 | return pcpu_get_page_chunk(pcpu_addr_to_page(addr)); |
---|
1336 | 1579 | } |
---|
1337 | 1580 | |
---|
| 1581 | +#ifdef CONFIG_MEMCG_KMEM |
---|
| 1582 | +static enum pcpu_chunk_type pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, |
---|
| 1583 | + struct obj_cgroup **objcgp) |
---|
| 1584 | +{ |
---|
| 1585 | + struct obj_cgroup *objcg; |
---|
| 1586 | + |
---|
| 1587 | + if (!memcg_kmem_enabled() || !(gfp & __GFP_ACCOUNT)) |
---|
| 1588 | + return PCPU_CHUNK_ROOT; |
---|
| 1589 | + |
---|
| 1590 | + objcg = get_obj_cgroup_from_current(); |
---|
| 1591 | + if (!objcg) |
---|
| 1592 | + return PCPU_CHUNK_ROOT; |
---|
| 1593 | + |
---|
| 1594 | + if (obj_cgroup_charge(objcg, gfp, size * num_possible_cpus())) { |
---|
| 1595 | + obj_cgroup_put(objcg); |
---|
| 1596 | + return PCPU_FAIL_ALLOC; |
---|
| 1597 | + } |
---|
| 1598 | + |
---|
| 1599 | + *objcgp = objcg; |
---|
| 1600 | + return PCPU_CHUNK_MEMCG; |
---|
| 1601 | +} |
---|
| 1602 | + |
---|
| 1603 | +static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg, |
---|
| 1604 | + struct pcpu_chunk *chunk, int off, |
---|
| 1605 | + size_t size) |
---|
| 1606 | +{ |
---|
| 1607 | + if (!objcg) |
---|
| 1608 | + return; |
---|
| 1609 | + |
---|
| 1610 | + if (chunk) { |
---|
| 1611 | + chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg; |
---|
| 1612 | + |
---|
| 1613 | + rcu_read_lock(); |
---|
| 1614 | + mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B, |
---|
| 1615 | + size * num_possible_cpus()); |
---|
| 1616 | + rcu_read_unlock(); |
---|
| 1617 | + } else { |
---|
| 1618 | + obj_cgroup_uncharge(objcg, size * num_possible_cpus()); |
---|
| 1619 | + obj_cgroup_put(objcg); |
---|
| 1620 | + } |
---|
| 1621 | +} |
---|
| 1622 | + |
---|
| 1623 | +static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size) |
---|
| 1624 | +{ |
---|
| 1625 | + struct obj_cgroup *objcg; |
---|
| 1626 | + |
---|
| 1627 | + if (!pcpu_is_memcg_chunk(pcpu_chunk_type(chunk))) |
---|
| 1628 | + return; |
---|
| 1629 | + |
---|
| 1630 | + objcg = chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT]; |
---|
| 1631 | + chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL; |
---|
| 1632 | + |
---|
| 1633 | + obj_cgroup_uncharge(objcg, size * num_possible_cpus()); |
---|
| 1634 | + |
---|
| 1635 | + rcu_read_lock(); |
---|
| 1636 | + mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B, |
---|
| 1637 | + -(size * num_possible_cpus())); |
---|
| 1638 | + rcu_read_unlock(); |
---|
| 1639 | + |
---|
| 1640 | + obj_cgroup_put(objcg); |
---|
| 1641 | +} |
---|
| 1642 | + |
---|
| 1643 | +#else /* CONFIG_MEMCG_KMEM */ |
---|
| 1644 | +static enum pcpu_chunk_type |
---|
| 1645 | +pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, struct obj_cgroup **objcgp) |
---|
| 1646 | +{ |
---|
| 1647 | + return PCPU_CHUNK_ROOT; |
---|
| 1648 | +} |
---|
| 1649 | + |
---|
| 1650 | +static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg, |
---|
| 1651 | + struct pcpu_chunk *chunk, int off, |
---|
| 1652 | + size_t size) |
---|
| 1653 | +{ |
---|
| 1654 | +} |
---|
| 1655 | + |
---|
| 1656 | +static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size) |
---|
| 1657 | +{ |
---|
| 1658 | +} |
---|
| 1659 | +#endif /* CONFIG_MEMCG_KMEM */ |
---|
| 1660 | + |
---|
1338 | 1661 | /** |
---|
1339 | 1662 | * pcpu_alloc - the percpu allocator |
---|
1340 | 1663 | * @size: size of area to allocate in bytes |
---|
.. | .. |
---|
1353 | 1676 | static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, |
---|
1354 | 1677 | gfp_t gfp) |
---|
1355 | 1678 | { |
---|
1356 | | - /* whitelisted flags that can be passed to the backing allocators */ |
---|
1357 | | - gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); |
---|
1358 | | - bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; |
---|
1359 | | - bool do_warn = !(gfp & __GFP_NOWARN); |
---|
| 1679 | + gfp_t pcpu_gfp; |
---|
| 1680 | + bool is_atomic; |
---|
| 1681 | + bool do_warn; |
---|
| 1682 | + enum pcpu_chunk_type type; |
---|
| 1683 | + struct list_head *pcpu_slot; |
---|
| 1684 | + struct obj_cgroup *objcg = NULL; |
---|
1360 | 1685 | static int warn_limit = 10; |
---|
1361 | | - struct pcpu_chunk *chunk; |
---|
| 1686 | + struct pcpu_chunk *chunk, *next; |
---|
1362 | 1687 | const char *err; |
---|
1363 | 1688 | int slot, off, cpu, ret; |
---|
1364 | 1689 | unsigned long flags; |
---|
1365 | 1690 | void __percpu *ptr; |
---|
1366 | 1691 | size_t bits, bit_align; |
---|
| 1692 | + |
---|
| 1693 | + gfp = current_gfp_context(gfp); |
---|
| 1694 | + /* whitelisted flags that can be passed to the backing allocators */ |
---|
| 1695 | + pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); |
---|
| 1696 | + is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; |
---|
| 1697 | + do_warn = !(gfp & __GFP_NOWARN); |
---|
1367 | 1698 | |
---|
1368 | 1699 | /* |
---|
1369 | 1700 | * There is now a minimum allocation size of PCPU_MIN_ALLOC_SIZE, |
---|
.. | .. |
---|
1385 | 1716 | return NULL; |
---|
1386 | 1717 | } |
---|
1387 | 1718 | |
---|
| 1719 | + type = pcpu_memcg_pre_alloc_hook(size, gfp, &objcg); |
---|
| 1720 | + if (unlikely(type == PCPU_FAIL_ALLOC)) |
---|
| 1721 | + return NULL; |
---|
| 1722 | + pcpu_slot = pcpu_chunk_list(type); |
---|
| 1723 | + |
---|
1388 | 1724 | if (!is_atomic) { |
---|
1389 | 1725 | /* |
---|
1390 | 1726 | * pcpu_balance_workfn() allocates memory under this mutex, |
---|
1391 | 1727 | * and it may wait for memory reclaim. Allow current task |
---|
1392 | 1728 | * to become OOM victim, in case of memory pressure. |
---|
1393 | 1729 | */ |
---|
1394 | | - if (gfp & __GFP_NOFAIL) |
---|
| 1730 | + if (gfp & __GFP_NOFAIL) { |
---|
1395 | 1731 | mutex_lock(&pcpu_alloc_mutex); |
---|
1396 | | - else if (mutex_lock_killable(&pcpu_alloc_mutex)) |
---|
| 1732 | + } else if (mutex_lock_killable(&pcpu_alloc_mutex)) { |
---|
| 1733 | + pcpu_memcg_post_alloc_hook(objcg, NULL, 0, size); |
---|
1397 | 1734 | return NULL; |
---|
| 1735 | + } |
---|
1398 | 1736 | } |
---|
1399 | 1737 | |
---|
1400 | 1738 | spin_lock_irqsave(&pcpu_lock, flags); |
---|
.. | .. |
---|
1420 | 1758 | restart: |
---|
1421 | 1759 | /* search through normal chunks */ |
---|
1422 | 1760 | for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) { |
---|
1423 | | - list_for_each_entry(chunk, &pcpu_slot[slot], list) { |
---|
| 1761 | + list_for_each_entry_safe(chunk, next, &pcpu_slot[slot], list) { |
---|
1424 | 1762 | off = pcpu_find_block_fit(chunk, bits, bit_align, |
---|
1425 | 1763 | is_atomic); |
---|
1426 | | - if (off < 0) |
---|
| 1764 | + if (off < 0) { |
---|
| 1765 | + if (slot < PCPU_SLOT_FAIL_THRESHOLD) |
---|
| 1766 | + pcpu_chunk_move(chunk, 0); |
---|
1427 | 1767 | continue; |
---|
| 1768 | + } |
---|
1428 | 1769 | |
---|
1429 | 1770 | off = pcpu_alloc_area(chunk, bits, bit_align, off); |
---|
1430 | 1771 | if (off >= 0) |
---|
.. | .. |
---|
1446 | 1787 | } |
---|
1447 | 1788 | |
---|
1448 | 1789 | if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { |
---|
1449 | | - chunk = pcpu_create_chunk(pcpu_gfp); |
---|
| 1790 | + chunk = pcpu_create_chunk(type, pcpu_gfp); |
---|
1450 | 1791 | if (!chunk) { |
---|
1451 | 1792 | err = "failed to allocate new chunk"; |
---|
1452 | 1793 | goto fail; |
---|
.. | .. |
---|
1466 | 1807 | |
---|
1467 | 1808 | /* populate if not all pages are already there */ |
---|
1468 | 1809 | if (!is_atomic) { |
---|
1469 | | - int page_start, page_end, rs, re; |
---|
| 1810 | + unsigned int page_start, page_end, rs, re; |
---|
1470 | 1811 | |
---|
1471 | 1812 | page_start = PFN_DOWN(off); |
---|
1472 | 1813 | page_end = PFN_UP(off + size); |
---|
1473 | 1814 | |
---|
1474 | | - pcpu_for_each_unpop_region(chunk->populated, rs, re, |
---|
1475 | | - page_start, page_end) { |
---|
| 1815 | + bitmap_for_each_clear_region(chunk->populated, rs, re, |
---|
| 1816 | + page_start, page_end) { |
---|
1476 | 1817 | WARN_ON(chunk->immutable); |
---|
1477 | 1818 | |
---|
1478 | 1819 | ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp); |
---|
.. | .. |
---|
1483 | 1824 | err = "failed to populate"; |
---|
1484 | 1825 | goto fail_unlock; |
---|
1485 | 1826 | } |
---|
1486 | | - pcpu_chunk_populated(chunk, rs, re, true); |
---|
| 1827 | + pcpu_chunk_populated(chunk, rs, re); |
---|
1487 | 1828 | spin_unlock_irqrestore(&pcpu_lock, flags); |
---|
1488 | 1829 | } |
---|
1489 | 1830 | |
---|
1490 | 1831 | mutex_unlock(&pcpu_alloc_mutex); |
---|
1491 | 1832 | } |
---|
1492 | 1833 | |
---|
1493 | | - if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW) |
---|
| 1834 | + if (pcpu_nr_empty_pop_pages[type] < PCPU_EMPTY_POP_PAGES_LOW) |
---|
1494 | 1835 | pcpu_schedule_balance_work(); |
---|
1495 | 1836 | |
---|
1496 | 1837 | /* clear the areas and return address relative to base address */ |
---|
.. | .. |
---|
1502 | 1843 | |
---|
1503 | 1844 | trace_percpu_alloc_percpu(reserved, is_atomic, size, align, |
---|
1504 | 1845 | chunk->base_addr, off, ptr); |
---|
| 1846 | + |
---|
| 1847 | + pcpu_memcg_post_alloc_hook(objcg, chunk, off, size); |
---|
1505 | 1848 | |
---|
1506 | 1849 | return ptr; |
---|
1507 | 1850 | |
---|
.. | .. |
---|
1524 | 1867 | } else { |
---|
1525 | 1868 | mutex_unlock(&pcpu_alloc_mutex); |
---|
1526 | 1869 | } |
---|
| 1870 | + |
---|
| 1871 | + pcpu_memcg_post_alloc_hook(objcg, NULL, 0, size); |
---|
| 1872 | + |
---|
1527 | 1873 | return NULL; |
---|
1528 | 1874 | } |
---|
1529 | 1875 | |
---|
.. | .. |
---|
1583 | 1929 | } |
---|
1584 | 1930 | |
---|
1585 | 1931 | /** |
---|
1586 | | - * pcpu_balance_workfn - manage the amount of free chunks and populated pages |
---|
1587 | | - * @work: unused |
---|
| 1932 | + * __pcpu_balance_workfn - manage the amount of free chunks and populated pages |
---|
| 1933 | + * @type: chunk type |
---|
1588 | 1934 | * |
---|
1589 | 1935 | * Reclaim all fully free chunks except for the first one. This is also |
---|
1590 | 1936 | * responsible for maintaining the pool of empty populated pages. However, |
---|
.. | .. |
---|
1593 | 1939 | * allocation causes the failure as it is possible that requests can be |
---|
1594 | 1940 | * serviced from already backed regions. |
---|
1595 | 1941 | */ |
---|
1596 | | -static void pcpu_balance_workfn(struct work_struct *work) |
---|
| 1942 | +static void __pcpu_balance_workfn(enum pcpu_chunk_type type) |
---|
1597 | 1943 | { |
---|
1598 | 1944 | /* gfp flags passed to underlying allocators */ |
---|
1599 | 1945 | const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; |
---|
1600 | 1946 | LIST_HEAD(to_free); |
---|
| 1947 | + struct list_head *pcpu_slot = pcpu_chunk_list(type); |
---|
1601 | 1948 | struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; |
---|
1602 | 1949 | struct pcpu_chunk *chunk, *next; |
---|
1603 | 1950 | int slot, nr_to_pop, ret; |
---|
.. | .. |
---|
1622 | 1969 | spin_unlock_irq(&pcpu_lock); |
---|
1623 | 1970 | |
---|
1624 | 1971 | list_for_each_entry_safe(chunk, next, &to_free, list) { |
---|
1625 | | - int rs, re; |
---|
| 1972 | + unsigned int rs, re; |
---|
1626 | 1973 | |
---|
1627 | | - pcpu_for_each_pop_region(chunk->populated, rs, re, 0, |
---|
1628 | | - chunk->nr_pages) { |
---|
| 1974 | + bitmap_for_each_set_region(chunk->populated, rs, re, 0, |
---|
| 1975 | + chunk->nr_pages) { |
---|
1629 | 1976 | pcpu_depopulate_chunk(chunk, rs, re); |
---|
1630 | 1977 | spin_lock_irq(&pcpu_lock); |
---|
1631 | 1978 | pcpu_chunk_depopulated(chunk, rs, re); |
---|
.. | .. |
---|
1652 | 1999 | pcpu_atomic_alloc_failed = false; |
---|
1653 | 2000 | } else { |
---|
1654 | 2001 | nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH - |
---|
1655 | | - pcpu_nr_empty_pop_pages, |
---|
| 2002 | + pcpu_nr_empty_pop_pages[type], |
---|
1656 | 2003 | 0, PCPU_EMPTY_POP_PAGES_HIGH); |
---|
1657 | 2004 | } |
---|
1658 | 2005 | |
---|
1659 | 2006 | for (slot = pcpu_size_to_slot(PAGE_SIZE); slot < pcpu_nr_slots; slot++) { |
---|
1660 | | - int nr_unpop = 0, rs, re; |
---|
| 2007 | + unsigned int nr_unpop = 0, rs, re; |
---|
1661 | 2008 | |
---|
1662 | 2009 | if (!nr_to_pop) |
---|
1663 | 2010 | break; |
---|
.. | .. |
---|
1674 | 2021 | continue; |
---|
1675 | 2022 | |
---|
1676 | 2023 | /* @chunk can't go away while pcpu_alloc_mutex is held */ |
---|
1677 | | - pcpu_for_each_unpop_region(chunk->populated, rs, re, 0, |
---|
1678 | | - chunk->nr_pages) { |
---|
1679 | | - int nr = min(re - rs, nr_to_pop); |
---|
| 2024 | + bitmap_for_each_clear_region(chunk->populated, rs, re, 0, |
---|
| 2025 | + chunk->nr_pages) { |
---|
| 2026 | + int nr = min_t(int, re - rs, nr_to_pop); |
---|
1680 | 2027 | |
---|
1681 | 2028 | ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp); |
---|
1682 | 2029 | if (!ret) { |
---|
1683 | 2030 | nr_to_pop -= nr; |
---|
1684 | 2031 | spin_lock_irq(&pcpu_lock); |
---|
1685 | | - pcpu_chunk_populated(chunk, rs, rs + nr, false); |
---|
| 2032 | + pcpu_chunk_populated(chunk, rs, rs + nr); |
---|
1686 | 2033 | spin_unlock_irq(&pcpu_lock); |
---|
1687 | 2034 | } else { |
---|
1688 | 2035 | nr_to_pop = 0; |
---|
.. | .. |
---|
1695 | 2042 | |
---|
1696 | 2043 | if (nr_to_pop) { |
---|
1697 | 2044 | /* ran out of chunks to populate, create a new one and retry */ |
---|
1698 | | - chunk = pcpu_create_chunk(gfp); |
---|
| 2045 | + chunk = pcpu_create_chunk(type, gfp); |
---|
1699 | 2046 | if (chunk) { |
---|
1700 | 2047 | spin_lock_irq(&pcpu_lock); |
---|
1701 | 2048 | pcpu_chunk_relocate(chunk, -1); |
---|
.. | .. |
---|
1705 | 2052 | } |
---|
1706 | 2053 | |
---|
1707 | 2054 | mutex_unlock(&pcpu_alloc_mutex); |
---|
| 2055 | +} |
---|
| 2056 | + |
---|
| 2057 | +/** |
---|
| 2058 | + * pcpu_balance_workfn - manage the amount of free chunks and populated pages |
---|
| 2059 | + * @work: unused |
---|
| 2060 | + * |
---|
| 2061 | + * Call __pcpu_balance_workfn() for each chunk type. |
---|
| 2062 | + */ |
---|
| 2063 | +static void pcpu_balance_workfn(struct work_struct *work) |
---|
| 2064 | +{ |
---|
| 2065 | + enum pcpu_chunk_type type; |
---|
| 2066 | + |
---|
| 2067 | + for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) |
---|
| 2068 | + __pcpu_balance_workfn(type); |
---|
1708 | 2069 | } |
---|
1709 | 2070 | |
---|
1710 | 2071 | /** |
---|
.. | .. |
---|
1721 | 2082 | void *addr; |
---|
1722 | 2083 | struct pcpu_chunk *chunk; |
---|
1723 | 2084 | unsigned long flags; |
---|
1724 | | - int off; |
---|
| 2085 | + int size, off; |
---|
1725 | 2086 | bool need_balance = false; |
---|
| 2087 | + struct list_head *pcpu_slot; |
---|
1726 | 2088 | |
---|
1727 | 2089 | if (!ptr) |
---|
1728 | 2090 | return; |
---|
.. | .. |
---|
1736 | 2098 | chunk = pcpu_chunk_addr_search(addr); |
---|
1737 | 2099 | off = addr - chunk->base_addr; |
---|
1738 | 2100 | |
---|
1739 | | - pcpu_free_area(chunk, off); |
---|
| 2101 | + size = pcpu_free_area(chunk, off); |
---|
| 2102 | + |
---|
| 2103 | + pcpu_slot = pcpu_chunk_list(pcpu_chunk_type(chunk)); |
---|
| 2104 | + |
---|
| 2105 | + pcpu_memcg_free_hook(chunk, off, size); |
---|
1740 | 2106 | |
---|
1741 | 2107 | /* if there are more than one fully free chunks, wake up grim reaper */ |
---|
1742 | 2108 | if (chunk->free_bytes == pcpu_unit_size) { |
---|
.. | .. |
---|
1890 | 2256 | void *ptr; |
---|
1891 | 2257 | int unit; |
---|
1892 | 2258 | |
---|
1893 | | - base_size = ALIGN(sizeof(*ai) + nr_groups * sizeof(ai->groups[0]), |
---|
| 2259 | + base_size = ALIGN(struct_size(ai, groups, nr_groups), |
---|
1894 | 2260 | __alignof__(ai->groups[0].cpu_map[0])); |
---|
1895 | 2261 | ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]); |
---|
1896 | 2262 | |
---|
1897 | | - ptr = memblock_virt_alloc_nopanic(PFN_ALIGN(ai_size), PAGE_SIZE); |
---|
| 2263 | + ptr = memblock_alloc(PFN_ALIGN(ai_size), PAGE_SIZE); |
---|
1898 | 2264 | if (!ptr) |
---|
1899 | 2265 | return NULL; |
---|
1900 | 2266 | ai = ptr; |
---|
.. | .. |
---|
1985 | 2351 | * @base_addr: mapped address |
---|
1986 | 2352 | * |
---|
1987 | 2353 | * Initialize the first percpu chunk which contains the kernel static |
---|
1988 | | - * perpcu area. This function is to be called from arch percpu area |
---|
| 2354 | + * percpu area. This function is to be called from arch percpu area |
---|
1989 | 2355 | * setup path. |
---|
1990 | 2356 | * |
---|
1991 | 2357 | * @ai contains all information necessary to initialize the first |
---|
.. | .. |
---|
2032 | 2398 | * share the same vm, but use offset regions in the area allocation map. |
---|
2033 | 2399 | * The chunk serving the dynamic region is circulated in the chunk slots |
---|
2034 | 2400 | * and available for dynamic allocation like any other chunk. |
---|
2035 | | - * |
---|
2036 | | - * RETURNS: |
---|
2037 | | - * 0 on success, -errno on failure. |
---|
2038 | 2401 | */ |
---|
2039 | | -int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, |
---|
2040 | | - void *base_addr) |
---|
| 2402 | +void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, |
---|
| 2403 | + void *base_addr) |
---|
2041 | 2404 | { |
---|
2042 | 2405 | size_t size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; |
---|
2043 | 2406 | size_t static_size, dyn_size; |
---|
.. | .. |
---|
2050 | 2413 | int group, unit, i; |
---|
2051 | 2414 | int map_size; |
---|
2052 | 2415 | unsigned long tmp_addr; |
---|
| 2416 | + size_t alloc_size; |
---|
| 2417 | + enum pcpu_chunk_type type; |
---|
2053 | 2418 | |
---|
2054 | 2419 | #define PCPU_SETUP_BUG_ON(cond) do { \ |
---|
2055 | 2420 | if (unlikely(cond)) { \ |
---|
.. | .. |
---|
2081 | 2446 | PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0); |
---|
2082 | 2447 | |
---|
2083 | 2448 | /* process group information and build config tables accordingly */ |
---|
2084 | | - group_offsets = memblock_virt_alloc(ai->nr_groups * |
---|
2085 | | - sizeof(group_offsets[0]), 0); |
---|
2086 | | - group_sizes = memblock_virt_alloc(ai->nr_groups * |
---|
2087 | | - sizeof(group_sizes[0]), 0); |
---|
2088 | | - unit_map = memblock_virt_alloc(nr_cpu_ids * sizeof(unit_map[0]), 0); |
---|
2089 | | - unit_off = memblock_virt_alloc(nr_cpu_ids * sizeof(unit_off[0]), 0); |
---|
| 2449 | + alloc_size = ai->nr_groups * sizeof(group_offsets[0]); |
---|
| 2450 | + group_offsets = memblock_alloc(alloc_size, SMP_CACHE_BYTES); |
---|
| 2451 | + if (!group_offsets) |
---|
| 2452 | + panic("%s: Failed to allocate %zu bytes\n", __func__, |
---|
| 2453 | + alloc_size); |
---|
| 2454 | + |
---|
| 2455 | + alloc_size = ai->nr_groups * sizeof(group_sizes[0]); |
---|
| 2456 | + group_sizes = memblock_alloc(alloc_size, SMP_CACHE_BYTES); |
---|
| 2457 | + if (!group_sizes) |
---|
| 2458 | + panic("%s: Failed to allocate %zu bytes\n", __func__, |
---|
| 2459 | + alloc_size); |
---|
| 2460 | + |
---|
| 2461 | + alloc_size = nr_cpu_ids * sizeof(unit_map[0]); |
---|
| 2462 | + unit_map = memblock_alloc(alloc_size, SMP_CACHE_BYTES); |
---|
| 2463 | + if (!unit_map) |
---|
| 2464 | + panic("%s: Failed to allocate %zu bytes\n", __func__, |
---|
| 2465 | + alloc_size); |
---|
| 2466 | + |
---|
| 2467 | + alloc_size = nr_cpu_ids * sizeof(unit_off[0]); |
---|
| 2468 | + unit_off = memblock_alloc(alloc_size, SMP_CACHE_BYTES); |
---|
| 2469 | + if (!unit_off) |
---|
| 2470 | + panic("%s: Failed to allocate %zu bytes\n", __func__, |
---|
| 2471 | + alloc_size); |
---|
2090 | 2472 | |
---|
2091 | 2473 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) |
---|
2092 | 2474 | unit_map[cpu] = UINT_MAX; |
---|
.. | .. |
---|
2140 | 2522 | pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT; |
---|
2141 | 2523 | pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; |
---|
2142 | 2524 | pcpu_atom_size = ai->atom_size; |
---|
2143 | | - pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + |
---|
2144 | | - BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long); |
---|
| 2525 | + pcpu_chunk_struct_size = struct_size(chunk, populated, |
---|
| 2526 | + BITS_TO_LONGS(pcpu_unit_pages)); |
---|
2145 | 2527 | |
---|
2146 | 2528 | pcpu_stats_save_ai(ai); |
---|
2147 | 2529 | |
---|
.. | .. |
---|
2150 | 2532 | * empty chunks. |
---|
2151 | 2533 | */ |
---|
2152 | 2534 | pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2; |
---|
2153 | | - pcpu_slot = memblock_virt_alloc( |
---|
2154 | | - pcpu_nr_slots * sizeof(pcpu_slot[0]), 0); |
---|
2155 | | - for (i = 0; i < pcpu_nr_slots; i++) |
---|
2156 | | - INIT_LIST_HEAD(&pcpu_slot[i]); |
---|
| 2535 | + pcpu_chunk_lists = memblock_alloc(pcpu_nr_slots * |
---|
| 2536 | + sizeof(pcpu_chunk_lists[0]) * |
---|
| 2537 | + PCPU_NR_CHUNK_TYPES, |
---|
| 2538 | + SMP_CACHE_BYTES); |
---|
| 2539 | + if (!pcpu_chunk_lists) |
---|
| 2540 | + panic("%s: Failed to allocate %zu bytes\n", __func__, |
---|
| 2541 | + pcpu_nr_slots * sizeof(pcpu_chunk_lists[0]) * |
---|
| 2542 | + PCPU_NR_CHUNK_TYPES); |
---|
| 2543 | + |
---|
| 2544 | + for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) |
---|
| 2545 | + for (i = 0; i < pcpu_nr_slots; i++) |
---|
| 2546 | + INIT_LIST_HEAD(&pcpu_chunk_list(type)[i]); |
---|
2157 | 2547 | |
---|
2158 | 2548 | /* |
---|
2159 | 2549 | * The end of the static region needs to be aligned with the |
---|
.. | .. |
---|
2190 | 2580 | |
---|
2191 | 2581 | /* link the first chunk in */ |
---|
2192 | 2582 | pcpu_first_chunk = chunk; |
---|
2193 | | - pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages; |
---|
| 2583 | + pcpu_nr_empty_pop_pages[PCPU_CHUNK_ROOT] = pcpu_first_chunk->nr_empty_pop_pages; |
---|
2194 | 2584 | pcpu_chunk_relocate(pcpu_first_chunk, -1); |
---|
2195 | 2585 | |
---|
2196 | 2586 | /* include all regions of the first chunk */ |
---|
.. | .. |
---|
2201 | 2591 | |
---|
2202 | 2592 | /* we're done */ |
---|
2203 | 2593 | pcpu_base_addr = base_addr; |
---|
2204 | | - return 0; |
---|
2205 | 2594 | } |
---|
2206 | 2595 | |
---|
2207 | 2596 | #ifdef CONFIG_SMP |
---|
.. | .. |
---|
2284 | 2673 | const size_t static_size = __per_cpu_end - __per_cpu_start; |
---|
2285 | 2674 | int nr_groups = 1, nr_units = 0; |
---|
2286 | 2675 | size_t size_sum, min_unit_size, alloc_size; |
---|
2287 | | - int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ |
---|
| 2676 | + int upa, max_upa, best_upa; /* units_per_alloc */ |
---|
2288 | 2677 | int last_allocs, group, unit; |
---|
2289 | 2678 | unsigned int cpu, tcpu; |
---|
2290 | 2679 | struct pcpu_alloc_info *ai; |
---|
.. | .. |
---|
2388 | 2777 | ai->atom_size = atom_size; |
---|
2389 | 2778 | ai->alloc_size = alloc_size; |
---|
2390 | 2779 | |
---|
2391 | | - for (group = 0, unit = 0; group_cnt[group]; group++) { |
---|
| 2780 | + for (group = 0, unit = 0; group < nr_groups; group++) { |
---|
2392 | 2781 | struct pcpu_group_info *gi = &ai->groups[group]; |
---|
2393 | 2782 | |
---|
2394 | 2783 | /* |
---|
.. | .. |
---|
2454 | 2843 | struct pcpu_alloc_info *ai; |
---|
2455 | 2844 | size_t size_sum, areas_size; |
---|
2456 | 2845 | unsigned long max_distance; |
---|
2457 | | - int group, i, highest_group, rc; |
---|
| 2846 | + int group, i, highest_group, rc = 0; |
---|
2458 | 2847 | |
---|
2459 | 2848 | ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size, |
---|
2460 | 2849 | cpu_distance_fn); |
---|
.. | .. |
---|
2464 | 2853 | size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; |
---|
2465 | 2854 | areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *)); |
---|
2466 | 2855 | |
---|
2467 | | - areas = memblock_virt_alloc_nopanic(areas_size, 0); |
---|
| 2856 | + areas = memblock_alloc(areas_size, SMP_CACHE_BYTES); |
---|
2468 | 2857 | if (!areas) { |
---|
2469 | 2858 | rc = -ENOMEM; |
---|
2470 | 2859 | goto out_free; |
---|
.. | .. |
---|
2539 | 2928 | PFN_DOWN(size_sum), ai->static_size, ai->reserved_size, |
---|
2540 | 2929 | ai->dyn_size, ai->unit_size); |
---|
2541 | 2930 | |
---|
2542 | | - rc = pcpu_setup_first_chunk(ai, base); |
---|
| 2931 | + pcpu_setup_first_chunk(ai, base); |
---|
2543 | 2932 | goto out_free; |
---|
2544 | 2933 | |
---|
2545 | 2934 | out_free_areas: |
---|
.. | .. |
---|
2583 | 2972 | int unit_pages; |
---|
2584 | 2973 | size_t pages_size; |
---|
2585 | 2974 | struct page **pages; |
---|
2586 | | - int unit, i, j, rc; |
---|
| 2975 | + int unit, i, j, rc = 0; |
---|
2587 | 2976 | int upa; |
---|
2588 | 2977 | int nr_g0_units; |
---|
2589 | 2978 | |
---|
.. | .. |
---|
2595 | 2984 | BUG_ON(ai->nr_groups != 1); |
---|
2596 | 2985 | upa = ai->alloc_size/ai->unit_size; |
---|
2597 | 2986 | nr_g0_units = roundup(num_possible_cpus(), upa); |
---|
2598 | | - if (unlikely(WARN_ON(ai->groups[0].nr_units != nr_g0_units))) { |
---|
| 2987 | + if (WARN_ON(ai->groups[0].nr_units != nr_g0_units)) { |
---|
2599 | 2988 | pcpu_free_alloc_info(ai); |
---|
2600 | 2989 | return -EINVAL; |
---|
2601 | 2990 | } |
---|
.. | .. |
---|
2605 | 2994 | /* unaligned allocations can't be freed, round up to page size */ |
---|
2606 | 2995 | pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() * |
---|
2607 | 2996 | sizeof(pages[0])); |
---|
2608 | | - pages = memblock_virt_alloc(pages_size, 0); |
---|
| 2997 | + pages = memblock_alloc(pages_size, SMP_CACHE_BYTES); |
---|
| 2998 | + if (!pages) |
---|
| 2999 | + panic("%s: Failed to allocate %zu bytes\n", __func__, |
---|
| 3000 | + pages_size); |
---|
2609 | 3001 | |
---|
2610 | 3002 | /* allocate pages */ |
---|
2611 | 3003 | j = 0; |
---|
.. | .. |
---|
2661 | 3053 | unit_pages, psize_str, ai->static_size, |
---|
2662 | 3054 | ai->reserved_size, ai->dyn_size); |
---|
2663 | 3055 | |
---|
2664 | | - rc = pcpu_setup_first_chunk(ai, vm.addr); |
---|
| 3056 | + pcpu_setup_first_chunk(ai, vm.addr); |
---|
2665 | 3057 | goto out_free_ar; |
---|
2666 | 3058 | |
---|
2667 | 3059 | enomem: |
---|
.. | .. |
---|
2694 | 3086 | static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, |
---|
2695 | 3087 | size_t align) |
---|
2696 | 3088 | { |
---|
2697 | | - return memblock_virt_alloc_from_nopanic( |
---|
2698 | | - size, align, __pa(MAX_DMA_ADDRESS)); |
---|
| 3089 | + return memblock_alloc_from(size, align, __pa(MAX_DMA_ADDRESS)); |
---|
2699 | 3090 | } |
---|
2700 | 3091 | |
---|
2701 | 3092 | static void __init pcpu_dfl_fc_free(void *ptr, size_t size) |
---|
.. | .. |
---|
2743 | 3134 | void *fc; |
---|
2744 | 3135 | |
---|
2745 | 3136 | ai = pcpu_alloc_alloc_info(1, 1); |
---|
2746 | | - fc = memblock_virt_alloc_from_nopanic(unit_size, |
---|
2747 | | - PAGE_SIZE, |
---|
2748 | | - __pa(MAX_DMA_ADDRESS)); |
---|
| 3137 | + fc = memblock_alloc_from(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); |
---|
2749 | 3138 | if (!ai || !fc) |
---|
2750 | 3139 | panic("Failed to allocate memory for percpu areas."); |
---|
2751 | 3140 | /* kmemleak tracks the percpu allocations separately */ |
---|
.. | .. |
---|
2758 | 3147 | ai->groups[0].nr_units = 1; |
---|
2759 | 3148 | ai->groups[0].cpu_map[0] = 0; |
---|
2760 | 3149 | |
---|
2761 | | - if (pcpu_setup_first_chunk(ai, fc) < 0) |
---|
2762 | | - panic("Failed to initialize percpu areas."); |
---|
| 3150 | + pcpu_setup_first_chunk(ai, fc); |
---|
2763 | 3151 | pcpu_free_alloc_info(ai); |
---|
2764 | 3152 | } |
---|
2765 | 3153 | |
---|
.. | .. |
---|
2780 | 3168 | { |
---|
2781 | 3169 | return pcpu_nr_populated * pcpu_nr_units; |
---|
2782 | 3170 | } |
---|
| 3171 | +EXPORT_SYMBOL_GPL(pcpu_nr_pages); |
---|
2783 | 3172 | |
---|
2784 | 3173 | /* |
---|
2785 | 3174 | * Percpu allocator is initialized early during boot when neither slab or |
---|