.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * linux/arch/x86_64/mm/init.c |
---|
3 | 4 | * |
---|
.. | .. |
---|
20 | 21 | #include <linux/init.h> |
---|
21 | 22 | #include <linux/initrd.h> |
---|
22 | 23 | #include <linux/pagemap.h> |
---|
23 | | -#include <linux/bootmem.h> |
---|
24 | 24 | #include <linux/memblock.h> |
---|
25 | 25 | #include <linux/proc_fs.h> |
---|
26 | 26 | #include <linux/pci.h> |
---|
.. | .. |
---|
37 | 37 | #include <asm/processor.h> |
---|
38 | 38 | #include <asm/bios_ebda.h> |
---|
39 | 39 | #include <linux/uaccess.h> |
---|
40 | | -#include <asm/pgtable.h> |
---|
41 | 40 | #include <asm/pgalloc.h> |
---|
42 | 41 | #include <asm/dma.h> |
---|
43 | 42 | #include <asm/fixmap.h> |
---|
.. | .. |
---|
54 | 53 | #include <asm/init.h> |
---|
55 | 54 | #include <asm/uv/uv.h> |
---|
56 | 55 | #include <asm/setup.h> |
---|
| 56 | +#include <asm/ftrace.h> |
---|
57 | 57 | |
---|
58 | 58 | #include "mm_internal.h" |
---|
59 | 59 | |
---|
60 | 60 | #include "ident_map.c" |
---|
| 61 | + |
---|
| 62 | +#define DEFINE_POPULATE(fname, type1, type2, init) \ |
---|
| 63 | +static inline void fname##_init(struct mm_struct *mm, \ |
---|
| 64 | + type1##_t *arg1, type2##_t *arg2, bool init) \ |
---|
| 65 | +{ \ |
---|
| 66 | + if (init) \ |
---|
| 67 | + fname##_safe(mm, arg1, arg2); \ |
---|
| 68 | + else \ |
---|
| 69 | + fname(mm, arg1, arg2); \ |
---|
| 70 | +} |
---|
| 71 | + |
---|
| 72 | +DEFINE_POPULATE(p4d_populate, p4d, pud, init) |
---|
| 73 | +DEFINE_POPULATE(pgd_populate, pgd, p4d, init) |
---|
| 74 | +DEFINE_POPULATE(pud_populate, pud, pmd, init) |
---|
| 75 | +DEFINE_POPULATE(pmd_populate_kernel, pmd, pte, init) |
---|
| 76 | + |
---|
| 77 | +#define DEFINE_ENTRY(type1, type2, init) \ |
---|
| 78 | +static inline void set_##type1##_init(type1##_t *arg1, \ |
---|
| 79 | + type2##_t arg2, bool init) \ |
---|
| 80 | +{ \ |
---|
| 81 | + if (init) \ |
---|
| 82 | + set_##type1##_safe(arg1, arg2); \ |
---|
| 83 | + else \ |
---|
| 84 | + set_##type1(arg1, arg2); \ |
---|
| 85 | +} |
---|
| 86 | + |
---|
| 87 | +DEFINE_ENTRY(p4d, p4d, init) |
---|
| 88 | +DEFINE_ENTRY(pud, pud, init) |
---|
| 89 | +DEFINE_ENTRY(pmd, pmd, init) |
---|
| 90 | +DEFINE_ENTRY(pte, pte, init) |
---|
| 91 | + |
---|
61 | 92 | |
---|
62 | 93 | /* |
---|
63 | 94 | * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the |
---|
.. | .. |
---|
162 | 193 | spin_lock(pgt_lock); |
---|
163 | 194 | |
---|
164 | 195 | if (!p4d_none(*p4d_ref) && !p4d_none(*p4d)) |
---|
165 | | - BUG_ON(p4d_page_vaddr(*p4d) |
---|
166 | | - != p4d_page_vaddr(*p4d_ref)); |
---|
| 196 | + BUG_ON(p4d_pgtable(*p4d) |
---|
| 197 | + != p4d_pgtable(*p4d_ref)); |
---|
167 | 198 | |
---|
168 | 199 | if (p4d_none(*p4d)) |
---|
169 | 200 | set_p4d(p4d, *p4d_ref); |
---|
.. | .. |
---|
178 | 209 | * When memory was added make sure all the processes MM have |
---|
179 | 210 | * suitable PGD entries in the local PGD level page. |
---|
180 | 211 | */ |
---|
181 | | -void sync_global_pgds(unsigned long start, unsigned long end) |
---|
| 212 | +static void sync_global_pgds(unsigned long start, unsigned long end) |
---|
182 | 213 | { |
---|
183 | 214 | if (pgtable_l5_enabled()) |
---|
184 | 215 | sync_global_pgds_l5(start, end); |
---|
.. | .. |
---|
197 | 228 | if (after_bootmem) |
---|
198 | 229 | ptr = (void *) get_zeroed_page(GFP_ATOMIC); |
---|
199 | 230 | else |
---|
200 | | - ptr = alloc_bootmem_pages(PAGE_SIZE); |
---|
| 231 | + ptr = memblock_alloc(PAGE_SIZE, PAGE_SIZE); |
---|
201 | 232 | |
---|
202 | 233 | if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) { |
---|
203 | 234 | panic("set_pte_phys: cannot allocate page data %s\n", |
---|
.. | .. |
---|
267 | 298 | * It's enough to flush this one mapping. |
---|
268 | 299 | * (PGE mappings get flushed as well) |
---|
269 | 300 | */ |
---|
270 | | - __flush_tlb_one_kernel(vaddr); |
---|
| 301 | + flush_tlb_one_kernel(vaddr); |
---|
271 | 302 | } |
---|
272 | 303 | |
---|
273 | 304 | void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte) |
---|
.. | .. |
---|
336 | 367 | pgprot_t prot; |
---|
337 | 368 | |
---|
338 | 369 | pgprot_val(prot) = pgprot_val(PAGE_KERNEL_LARGE) | |
---|
339 | | - pgprot_val(pgprot_4k_2_large(cachemode2pgprot(cache))); |
---|
| 370 | + protval_4k_2_large(cachemode2protval(cache)); |
---|
340 | 371 | BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK)); |
---|
341 | 372 | for (; size; phys += PMD_SIZE, size -= PMD_SIZE) { |
---|
342 | 373 | pgd = pgd_offset_k((unsigned long)__va(phys)); |
---|
.. | .. |
---|
415 | 446 | */ |
---|
416 | 447 | static unsigned long __meminit |
---|
417 | 448 | phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end, |
---|
418 | | - pgprot_t prot) |
---|
| 449 | + pgprot_t prot, bool init) |
---|
419 | 450 | { |
---|
420 | 451 | unsigned long pages = 0, paddr_next; |
---|
421 | 452 | unsigned long paddr_last = paddr_end; |
---|
.. | .. |
---|
433 | 464 | E820_TYPE_RAM) && |
---|
434 | 465 | !e820__mapped_any(paddr & PAGE_MASK, paddr_next, |
---|
435 | 466 | E820_TYPE_RESERVED_KERN)) |
---|
436 | | - set_pte(pte, __pte(0)); |
---|
| 467 | + set_pte_init(pte, __pte(0), init); |
---|
437 | 468 | continue; |
---|
438 | 469 | } |
---|
439 | 470 | |
---|
.. | .. |
---|
453 | 484 | pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr, |
---|
454 | 485 | pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte); |
---|
455 | 486 | pages++; |
---|
456 | | - set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot)); |
---|
| 487 | + set_pte_init(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), init); |
---|
457 | 488 | paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE; |
---|
458 | 489 | } |
---|
459 | 490 | |
---|
.. | .. |
---|
469 | 500 | */ |
---|
470 | 501 | static unsigned long __meminit |
---|
471 | 502 | phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, |
---|
472 | | - unsigned long page_size_mask, pgprot_t prot) |
---|
| 503 | + unsigned long page_size_mask, pgprot_t prot, bool init) |
---|
473 | 504 | { |
---|
474 | 505 | unsigned long pages = 0, paddr_next; |
---|
475 | 506 | unsigned long paddr_last = paddr_end; |
---|
.. | .. |
---|
488 | 519 | E820_TYPE_RAM) && |
---|
489 | 520 | !e820__mapped_any(paddr & PMD_MASK, paddr_next, |
---|
490 | 521 | E820_TYPE_RESERVED_KERN)) |
---|
491 | | - set_pmd(pmd, __pmd(0)); |
---|
| 522 | + set_pmd_init(pmd, __pmd(0), init); |
---|
492 | 523 | continue; |
---|
493 | 524 | } |
---|
494 | 525 | |
---|
.. | .. |
---|
497 | 528 | spin_lock(&init_mm.page_table_lock); |
---|
498 | 529 | pte = (pte_t *)pmd_page_vaddr(*pmd); |
---|
499 | 530 | paddr_last = phys_pte_init(pte, paddr, |
---|
500 | | - paddr_end, prot); |
---|
| 531 | + paddr_end, prot, |
---|
| 532 | + init); |
---|
501 | 533 | spin_unlock(&init_mm.page_table_lock); |
---|
502 | 534 | continue; |
---|
503 | 535 | } |
---|
.. | .. |
---|
525 | 557 | if (page_size_mask & (1<<PG_LEVEL_2M)) { |
---|
526 | 558 | pages++; |
---|
527 | 559 | spin_lock(&init_mm.page_table_lock); |
---|
528 | | - set_pte((pte_t *)pmd, |
---|
529 | | - pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT, |
---|
530 | | - __pgprot(pgprot_val(prot) | _PAGE_PSE))); |
---|
| 560 | + set_pte_init((pte_t *)pmd, |
---|
| 561 | + pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT, |
---|
| 562 | + __pgprot(pgprot_val(prot) | _PAGE_PSE)), |
---|
| 563 | + init); |
---|
531 | 564 | spin_unlock(&init_mm.page_table_lock); |
---|
532 | 565 | paddr_last = paddr_next; |
---|
533 | 566 | continue; |
---|
534 | 567 | } |
---|
535 | 568 | |
---|
536 | 569 | pte = alloc_low_page(); |
---|
537 | | - paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot); |
---|
| 570 | + paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init); |
---|
538 | 571 | |
---|
539 | 572 | spin_lock(&init_mm.page_table_lock); |
---|
540 | | - pmd_populate_kernel(&init_mm, pmd, pte); |
---|
| 573 | + pmd_populate_kernel_init(&init_mm, pmd, pte, init); |
---|
541 | 574 | spin_unlock(&init_mm.page_table_lock); |
---|
542 | 575 | } |
---|
543 | 576 | update_page_count(PG_LEVEL_2M, pages); |
---|
.. | .. |
---|
552 | 585 | */ |
---|
553 | 586 | static unsigned long __meminit |
---|
554 | 587 | phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, |
---|
555 | | - unsigned long page_size_mask) |
---|
| 588 | + unsigned long page_size_mask, pgprot_t _prot, bool init) |
---|
556 | 589 | { |
---|
557 | 590 | unsigned long pages = 0, paddr_next; |
---|
558 | 591 | unsigned long paddr_last = paddr_end; |
---|
.. | .. |
---|
562 | 595 | for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) { |
---|
563 | 596 | pud_t *pud; |
---|
564 | 597 | pmd_t *pmd; |
---|
565 | | - pgprot_t prot = PAGE_KERNEL; |
---|
| 598 | + pgprot_t prot = _prot; |
---|
566 | 599 | |
---|
567 | 600 | vaddr = (unsigned long)__va(paddr); |
---|
568 | 601 | pud = pud_page + pud_index(vaddr); |
---|
.. | .. |
---|
574 | 607 | E820_TYPE_RAM) && |
---|
575 | 608 | !e820__mapped_any(paddr & PUD_MASK, paddr_next, |
---|
576 | 609 | E820_TYPE_RESERVED_KERN)) |
---|
577 | | - set_pud(pud, __pud(0)); |
---|
| 610 | + set_pud_init(pud, __pud(0), init); |
---|
578 | 611 | continue; |
---|
579 | 612 | } |
---|
580 | 613 | |
---|
.. | .. |
---|
584 | 617 | paddr_last = phys_pmd_init(pmd, paddr, |
---|
585 | 618 | paddr_end, |
---|
586 | 619 | page_size_mask, |
---|
587 | | - prot); |
---|
| 620 | + prot, init); |
---|
588 | 621 | continue; |
---|
589 | 622 | } |
---|
590 | 623 | /* |
---|
.. | .. |
---|
611 | 644 | if (page_size_mask & (1<<PG_LEVEL_1G)) { |
---|
612 | 645 | pages++; |
---|
613 | 646 | spin_lock(&init_mm.page_table_lock); |
---|
614 | | - set_pte((pte_t *)pud, |
---|
615 | | - pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT, |
---|
616 | | - PAGE_KERNEL_LARGE)); |
---|
| 647 | + |
---|
| 648 | + prot = __pgprot(pgprot_val(prot) | _PAGE_PSE); |
---|
| 649 | + |
---|
| 650 | + set_pte_init((pte_t *)pud, |
---|
| 651 | + pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT, |
---|
| 652 | + prot), |
---|
| 653 | + init); |
---|
617 | 654 | spin_unlock(&init_mm.page_table_lock); |
---|
618 | 655 | paddr_last = paddr_next; |
---|
619 | 656 | continue; |
---|
.. | .. |
---|
621 | 658 | |
---|
622 | 659 | pmd = alloc_low_page(); |
---|
623 | 660 | paddr_last = phys_pmd_init(pmd, paddr, paddr_end, |
---|
624 | | - page_size_mask, prot); |
---|
| 661 | + page_size_mask, prot, init); |
---|
625 | 662 | |
---|
626 | 663 | spin_lock(&init_mm.page_table_lock); |
---|
627 | | - pud_populate(&init_mm, pud, pmd); |
---|
| 664 | + pud_populate_init(&init_mm, pud, pmd, init); |
---|
628 | 665 | spin_unlock(&init_mm.page_table_lock); |
---|
629 | 666 | } |
---|
630 | 667 | |
---|
.. | .. |
---|
635 | 672 | |
---|
636 | 673 | static unsigned long __meminit |
---|
637 | 674 | phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, |
---|
638 | | - unsigned long page_size_mask) |
---|
| 675 | + unsigned long page_size_mask, pgprot_t prot, bool init) |
---|
639 | 676 | { |
---|
640 | | - unsigned long paddr_next, paddr_last = paddr_end; |
---|
641 | | - unsigned long vaddr = (unsigned long)__va(paddr); |
---|
642 | | - int i = p4d_index(vaddr); |
---|
| 677 | + unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last; |
---|
| 678 | + |
---|
| 679 | + paddr_last = paddr_end; |
---|
| 680 | + vaddr = (unsigned long)__va(paddr); |
---|
| 681 | + vaddr_end = (unsigned long)__va(paddr_end); |
---|
643 | 682 | |
---|
644 | 683 | if (!pgtable_l5_enabled()) |
---|
645 | | - return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask); |
---|
| 684 | + return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, |
---|
| 685 | + page_size_mask, prot, init); |
---|
646 | 686 | |
---|
647 | | - for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { |
---|
648 | | - p4d_t *p4d; |
---|
| 687 | + for (; vaddr < vaddr_end; vaddr = vaddr_next) { |
---|
| 688 | + p4d_t *p4d = p4d_page + p4d_index(vaddr); |
---|
649 | 689 | pud_t *pud; |
---|
650 | 690 | |
---|
651 | | - vaddr = (unsigned long)__va(paddr); |
---|
652 | | - p4d = p4d_page + p4d_index(vaddr); |
---|
653 | | - paddr_next = (paddr & P4D_MASK) + P4D_SIZE; |
---|
| 691 | + vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE; |
---|
| 692 | + paddr = __pa(vaddr); |
---|
654 | 693 | |
---|
655 | 694 | if (paddr >= paddr_end) { |
---|
| 695 | + paddr_next = __pa(vaddr_next); |
---|
656 | 696 | if (!after_bootmem && |
---|
657 | 697 | !e820__mapped_any(paddr & P4D_MASK, paddr_next, |
---|
658 | 698 | E820_TYPE_RAM) && |
---|
659 | 699 | !e820__mapped_any(paddr & P4D_MASK, paddr_next, |
---|
660 | 700 | E820_TYPE_RESERVED_KERN)) |
---|
661 | | - set_p4d(p4d, __p4d(0)); |
---|
| 701 | + set_p4d_init(p4d, __p4d(0), init); |
---|
662 | 702 | continue; |
---|
663 | 703 | } |
---|
664 | 704 | |
---|
665 | 705 | if (!p4d_none(*p4d)) { |
---|
666 | 706 | pud = pud_offset(p4d, 0); |
---|
667 | | - paddr_last = phys_pud_init(pud, paddr, |
---|
668 | | - paddr_end, |
---|
669 | | - page_size_mask); |
---|
| 707 | + paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), |
---|
| 708 | + page_size_mask, prot, init); |
---|
670 | 709 | continue; |
---|
671 | 710 | } |
---|
672 | 711 | |
---|
673 | 712 | pud = alloc_low_page(); |
---|
674 | | - paddr_last = phys_pud_init(pud, paddr, paddr_end, |
---|
675 | | - page_size_mask); |
---|
| 713 | + paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), |
---|
| 714 | + page_size_mask, prot, init); |
---|
676 | 715 | |
---|
677 | 716 | spin_lock(&init_mm.page_table_lock); |
---|
678 | | - p4d_populate(&init_mm, p4d, pud); |
---|
| 717 | + p4d_populate_init(&init_mm, p4d, pud, init); |
---|
679 | 718 | spin_unlock(&init_mm.page_table_lock); |
---|
680 | 719 | } |
---|
681 | 720 | |
---|
682 | 721 | return paddr_last; |
---|
683 | 722 | } |
---|
684 | 723 | |
---|
685 | | -/* |
---|
686 | | - * Create page table mapping for the physical memory for specific physical |
---|
687 | | - * addresses. The virtual and physical addresses have to be aligned on PMD level |
---|
688 | | - * down. It returns the last physical address mapped. |
---|
689 | | - */ |
---|
690 | | -unsigned long __meminit |
---|
691 | | -kernel_physical_mapping_init(unsigned long paddr_start, |
---|
692 | | - unsigned long paddr_end, |
---|
693 | | - unsigned long page_size_mask) |
---|
| 724 | +static unsigned long __meminit |
---|
| 725 | +__kernel_physical_mapping_init(unsigned long paddr_start, |
---|
| 726 | + unsigned long paddr_end, |
---|
| 727 | + unsigned long page_size_mask, |
---|
| 728 | + pgprot_t prot, bool init) |
---|
694 | 729 | { |
---|
695 | 730 | bool pgd_changed = false; |
---|
696 | 731 | unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last; |
---|
.. | .. |
---|
710 | 745 | p4d = (p4d_t *)pgd_page_vaddr(*pgd); |
---|
711 | 746 | paddr_last = phys_p4d_init(p4d, __pa(vaddr), |
---|
712 | 747 | __pa(vaddr_end), |
---|
713 | | - page_size_mask); |
---|
| 748 | + page_size_mask, |
---|
| 749 | + prot, init); |
---|
714 | 750 | continue; |
---|
715 | 751 | } |
---|
716 | 752 | |
---|
717 | 753 | p4d = alloc_low_page(); |
---|
718 | 754 | paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end), |
---|
719 | | - page_size_mask); |
---|
| 755 | + page_size_mask, prot, init); |
---|
720 | 756 | |
---|
721 | 757 | spin_lock(&init_mm.page_table_lock); |
---|
722 | 758 | if (pgtable_l5_enabled()) |
---|
723 | | - pgd_populate(&init_mm, pgd, p4d); |
---|
| 759 | + pgd_populate_init(&init_mm, pgd, p4d, init); |
---|
724 | 760 | else |
---|
725 | | - p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d); |
---|
| 761 | + p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr), |
---|
| 762 | + (pud_t *) p4d, init); |
---|
| 763 | + |
---|
726 | 764 | spin_unlock(&init_mm.page_table_lock); |
---|
727 | 765 | pgd_changed = true; |
---|
728 | 766 | } |
---|
.. | .. |
---|
731 | 769 | sync_global_pgds(vaddr_start, vaddr_end - 1); |
---|
732 | 770 | |
---|
733 | 771 | return paddr_last; |
---|
| 772 | +} |
---|
| 773 | + |
---|
| 774 | + |
---|
| 775 | +/* |
---|
| 776 | + * Create page table mapping for the physical memory for specific physical |
---|
| 777 | + * addresses. Note that it can only be used to populate non-present entries. |
---|
| 778 | + * The virtual and physical addresses have to be aligned on PMD level |
---|
| 779 | + * down. It returns the last physical address mapped. |
---|
| 780 | + */ |
---|
| 781 | +unsigned long __meminit |
---|
| 782 | +kernel_physical_mapping_init(unsigned long paddr_start, |
---|
| 783 | + unsigned long paddr_end, |
---|
| 784 | + unsigned long page_size_mask, pgprot_t prot) |
---|
| 785 | +{ |
---|
| 786 | + return __kernel_physical_mapping_init(paddr_start, paddr_end, |
---|
| 787 | + page_size_mask, prot, true); |
---|
| 788 | +} |
---|
| 789 | + |
---|
| 790 | +/* |
---|
| 791 | + * This function is similar to kernel_physical_mapping_init() above with the |
---|
| 792 | + * exception that it uses set_{pud,pmd}() instead of the set_{pud,pte}_safe() |
---|
| 793 | + * when updating the mapping. The caller is responsible to flush the TLBs after |
---|
| 794 | + * the function returns. |
---|
| 795 | + */ |
---|
| 796 | +unsigned long __meminit |
---|
| 797 | +kernel_physical_mapping_change(unsigned long paddr_start, |
---|
| 798 | + unsigned long paddr_end, |
---|
| 799 | + unsigned long page_size_mask) |
---|
| 800 | +{ |
---|
| 801 | + return __kernel_physical_mapping_init(paddr_start, paddr_end, |
---|
| 802 | + page_size_mask, PAGE_KERNEL, |
---|
| 803 | + false); |
---|
734 | 804 | } |
---|
735 | 805 | |
---|
736 | 806 | #ifndef CONFIG_NUMA |
---|
.. | .. |
---|
742 | 812 | |
---|
743 | 813 | void __init paging_init(void) |
---|
744 | 814 | { |
---|
745 | | - sparse_memory_present_with_active_regions(MAX_NUMNODES); |
---|
746 | 815 | sparse_init(); |
---|
747 | 816 | |
---|
748 | 817 | /* |
---|
.. | .. |
---|
752 | 821 | * will not set it back. |
---|
753 | 822 | */ |
---|
754 | 823 | node_clear_state(0, N_MEMORY); |
---|
755 | | - if (N_MEMORY != N_NORMAL_MEMORY) |
---|
756 | | - node_clear_state(0, N_NORMAL_MEMORY); |
---|
| 824 | + node_clear_state(0, N_NORMAL_MEMORY); |
---|
757 | 825 | |
---|
758 | 826 | zone_sizes_init(); |
---|
759 | 827 | } |
---|
.. | .. |
---|
778 | 846 | } |
---|
779 | 847 | |
---|
780 | 848 | int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, |
---|
781 | | - struct vmem_altmap *altmap, bool want_memblock) |
---|
| 849 | + struct mhp_params *params) |
---|
782 | 850 | { |
---|
783 | 851 | int ret; |
---|
784 | 852 | |
---|
785 | | - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); |
---|
| 853 | + ret = __add_pages(nid, start_pfn, nr_pages, params); |
---|
786 | 854 | WARN_ON_ONCE(ret); |
---|
787 | 855 | |
---|
788 | 856 | /* update max_pfn, max_low_pfn and high_memory */ |
---|
.. | .. |
---|
792 | 860 | return ret; |
---|
793 | 861 | } |
---|
794 | 862 | |
---|
795 | | -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, |
---|
796 | | - bool want_memblock) |
---|
| 863 | +int arch_add_memory(int nid, u64 start, u64 size, |
---|
| 864 | + struct mhp_params *params) |
---|
797 | 865 | { |
---|
798 | 866 | unsigned long start_pfn = start >> PAGE_SHIFT; |
---|
799 | 867 | unsigned long nr_pages = size >> PAGE_SHIFT; |
---|
800 | 868 | |
---|
801 | | - init_memory_mapping(start, start + size); |
---|
| 869 | + init_memory_mapping(start, start + size, params->pgprot); |
---|
802 | 870 | |
---|
803 | | - return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); |
---|
| 871 | + return add_pages(nid, start_pfn, nr_pages, params); |
---|
804 | 872 | } |
---|
805 | 873 | |
---|
806 | 874 | #define PAGE_INUSE 0xFD |
---|
.. | .. |
---|
1164 | 1232 | #endif |
---|
1165 | 1233 | } |
---|
1166 | 1234 | |
---|
| 1235 | +/* |
---|
| 1236 | + * Pre-allocates page-table pages for the vmalloc area in the kernel page-table. |
---|
| 1237 | + * Only the level which needs to be synchronized between all page-tables is |
---|
| 1238 | + * allocated because the synchronization can be expensive. |
---|
| 1239 | + */ |
---|
| 1240 | +static void __init preallocate_vmalloc_pages(void) |
---|
| 1241 | +{ |
---|
| 1242 | + unsigned long addr; |
---|
| 1243 | + const char *lvl; |
---|
| 1244 | + |
---|
| 1245 | + for (addr = VMALLOC_START; addr <= VMALLOC_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) { |
---|
| 1246 | + pgd_t *pgd = pgd_offset_k(addr); |
---|
| 1247 | + p4d_t *p4d; |
---|
| 1248 | + pud_t *pud; |
---|
| 1249 | + |
---|
| 1250 | + lvl = "p4d"; |
---|
| 1251 | + p4d = p4d_alloc(&init_mm, pgd, addr); |
---|
| 1252 | + if (!p4d) |
---|
| 1253 | + goto failed; |
---|
| 1254 | + |
---|
| 1255 | + if (pgtable_l5_enabled()) |
---|
| 1256 | + continue; |
---|
| 1257 | + |
---|
| 1258 | + /* |
---|
| 1259 | + * The goal here is to allocate all possibly required |
---|
| 1260 | + * hardware page tables pointed to by the top hardware |
---|
| 1261 | + * level. |
---|
| 1262 | + * |
---|
| 1263 | + * On 4-level systems, the P4D layer is folded away and |
---|
| 1264 | + * the above code does no preallocation. Below, go down |
---|
| 1265 | + * to the pud _software_ level to ensure the second |
---|
| 1266 | + * hardware level is allocated on 4-level systems too. |
---|
| 1267 | + */ |
---|
| 1268 | + lvl = "pud"; |
---|
| 1269 | + pud = pud_alloc(&init_mm, p4d, addr); |
---|
| 1270 | + if (!pud) |
---|
| 1271 | + goto failed; |
---|
| 1272 | + } |
---|
| 1273 | + |
---|
| 1274 | + return; |
---|
| 1275 | + |
---|
| 1276 | +failed: |
---|
| 1277 | + |
---|
| 1278 | + /* |
---|
| 1279 | + * The pages have to be there now or they will be missing in |
---|
| 1280 | + * process page-tables later. |
---|
| 1281 | + */ |
---|
| 1282 | + panic("Failed to pre-allocate %s pages for vmalloc area\n", lvl); |
---|
| 1283 | +} |
---|
| 1284 | + |
---|
1167 | 1285 | void __init mem_init(void) |
---|
1168 | 1286 | { |
---|
1169 | 1287 | pci_iommu_alloc(); |
---|
.. | .. |
---|
1171 | 1289 | /* clear_bss() already clear the empty_zero_page */ |
---|
1172 | 1290 | |
---|
1173 | 1291 | /* this will put all memory onto the freelists */ |
---|
1174 | | - free_all_bootmem(); |
---|
| 1292 | + memblock_free_all(); |
---|
1175 | 1293 | after_bootmem = 1; |
---|
1176 | 1294 | x86_init.hyper.init_after_bootmem(); |
---|
1177 | 1295 | |
---|
1178 | 1296 | /* |
---|
1179 | 1297 | * Must be done after boot memory is put on freelist, because here we |
---|
1180 | 1298 | * might set fields in deferred struct pages that have not yet been |
---|
1181 | | - * initialized, and free_all_bootmem() initializes all the reserved |
---|
| 1299 | + * initialized, and memblock_free_all() initializes all the reserved |
---|
1182 | 1300 | * deferred pages for us. |
---|
1183 | 1301 | */ |
---|
1184 | 1302 | register_page_bootmem_info(); |
---|
.. | .. |
---|
1187 | 1305 | if (get_gate_vma(&init_mm)) |
---|
1188 | 1306 | kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER); |
---|
1189 | 1307 | |
---|
| 1308 | + preallocate_vmalloc_pages(); |
---|
| 1309 | + |
---|
1190 | 1310 | mem_init_print_info(NULL); |
---|
1191 | 1311 | } |
---|
1192 | 1312 | |
---|
| 1313 | +#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
---|
| 1314 | +int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask) |
---|
| 1315 | +{ |
---|
| 1316 | + /* |
---|
| 1317 | + * More CPUs always led to greater speedups on tested systems, up to |
---|
| 1318 | + * all the nodes' CPUs. Use all since the system is otherwise idle |
---|
| 1319 | + * now. |
---|
| 1320 | + */ |
---|
| 1321 | + return max_t(int, cpumask_weight(node_cpumask), 1); |
---|
| 1322 | +} |
---|
| 1323 | +#endif |
---|
| 1324 | + |
---|
1193 | 1325 | int kernel_set_to_readonly; |
---|
1194 | | - |
---|
1195 | | -void set_kernel_text_rw(void) |
---|
1196 | | -{ |
---|
1197 | | - unsigned long start = PFN_ALIGN(_text); |
---|
1198 | | - unsigned long end = PFN_ALIGN(_etext); |
---|
1199 | | - |
---|
1200 | | - if (!kernel_set_to_readonly) |
---|
1201 | | - return; |
---|
1202 | | - |
---|
1203 | | - pr_debug("Set kernel text: %lx - %lx for read write\n", |
---|
1204 | | - start, end); |
---|
1205 | | - |
---|
1206 | | - /* |
---|
1207 | | - * Make the kernel identity mapping for text RW. Kernel text |
---|
1208 | | - * mapping will always be RO. Refer to the comment in |
---|
1209 | | - * static_protections() in pageattr.c |
---|
1210 | | - */ |
---|
1211 | | - set_memory_rw(start, (end - start) >> PAGE_SHIFT); |
---|
1212 | | -} |
---|
1213 | | - |
---|
1214 | | -void set_kernel_text_ro(void) |
---|
1215 | | -{ |
---|
1216 | | - unsigned long start = PFN_ALIGN(_text); |
---|
1217 | | - unsigned long end = PFN_ALIGN(_etext); |
---|
1218 | | - |
---|
1219 | | - if (!kernel_set_to_readonly) |
---|
1220 | | - return; |
---|
1221 | | - |
---|
1222 | | - pr_debug("Set kernel text: %lx - %lx for read only\n", |
---|
1223 | | - start, end); |
---|
1224 | | - |
---|
1225 | | - /* |
---|
1226 | | - * Set the kernel identity mapping for text RO. |
---|
1227 | | - */ |
---|
1228 | | - set_memory_ro(start, (end - start) >> PAGE_SHIFT); |
---|
1229 | | -} |
---|
1230 | 1326 | |
---|
1231 | 1327 | void mark_rodata_ro(void) |
---|
1232 | 1328 | { |
---|
1233 | 1329 | unsigned long start = PFN_ALIGN(_text); |
---|
1234 | 1330 | unsigned long rodata_start = PFN_ALIGN(__start_rodata); |
---|
1235 | | - unsigned long end = (unsigned long) &__end_rodata_hpage_align; |
---|
1236 | | - unsigned long text_end = PFN_ALIGN(&_etext); |
---|
1237 | | - unsigned long rodata_end = PFN_ALIGN(&__end_rodata); |
---|
| 1331 | + unsigned long end = (unsigned long)__end_rodata_hpage_align; |
---|
| 1332 | + unsigned long text_end = PFN_ALIGN(_etext); |
---|
| 1333 | + unsigned long rodata_end = PFN_ALIGN(__end_rodata); |
---|
1238 | 1334 | unsigned long all_end; |
---|
1239 | 1335 | |
---|
1240 | 1336 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
---|
.. | .. |
---|
1258 | 1354 | all_end = roundup((unsigned long)_brk_end, PMD_SIZE); |
---|
1259 | 1355 | set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT); |
---|
1260 | 1356 | |
---|
| 1357 | + set_ftrace_ops_ro(); |
---|
| 1358 | + |
---|
1261 | 1359 | #ifdef CONFIG_CPA_DEBUG |
---|
1262 | 1360 | printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end); |
---|
1263 | 1361 | set_memory_rw(start, (end-start) >> PAGE_SHIFT); |
---|
.. | .. |
---|
1266 | 1364 | set_memory_ro(start, (end-start) >> PAGE_SHIFT); |
---|
1267 | 1365 | #endif |
---|
1268 | 1366 | |
---|
1269 | | - free_kernel_image_pages((void *)text_end, (void *)rodata_start); |
---|
1270 | | - free_kernel_image_pages((void *)rodata_end, (void *)_sdata); |
---|
| 1367 | + free_kernel_image_pages("unused kernel image (text/rodata gap)", |
---|
| 1368 | + (void *)text_end, (void *)rodata_start); |
---|
| 1369 | + free_kernel_image_pages("unused kernel image (rodata/data gap)", |
---|
| 1370 | + (void *)rodata_end, (void *)_sdata); |
---|
1271 | 1371 | |
---|
1272 | 1372 | debug_checkwx(); |
---|
1273 | 1373 | } |
---|
.. | .. |
---|
1352 | 1452 | goto done; |
---|
1353 | 1453 | } |
---|
1354 | 1454 | |
---|
| 1455 | + /* |
---|
| 1456 | + * Use max block size to minimize overhead on bare metal, where |
---|
| 1457 | + * alignment for memory hotplug isn't a concern. |
---|
| 1458 | + */ |
---|
| 1459 | + if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) { |
---|
| 1460 | + bz = MAX_BLOCK_SIZE; |
---|
| 1461 | + goto done; |
---|
| 1462 | + } |
---|
| 1463 | + |
---|
1355 | 1464 | /* Find the largest allowed block size that aligns to memory end */ |
---|
1356 | 1465 | for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) { |
---|
1357 | 1466 | if (IS_ALIGNED(boot_mem_end, bz)) |
---|
.. | .. |
---|
1409 | 1518 | if (pmd_none(*pmd)) { |
---|
1410 | 1519 | void *p; |
---|
1411 | 1520 | |
---|
1412 | | - if (altmap) |
---|
1413 | | - p = altmap_alloc_block_buf(PMD_SIZE, altmap); |
---|
1414 | | - else |
---|
1415 | | - p = vmemmap_alloc_block_buf(PMD_SIZE, node); |
---|
| 1521 | + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); |
---|
1416 | 1522 | if (p) { |
---|
1417 | 1523 | pte_t entry; |
---|
1418 | 1524 | |
---|
.. | .. |
---|
1439 | 1545 | vmemmap_verify((pte_t *)pmd, node, addr, next); |
---|
1440 | 1546 | continue; |
---|
1441 | 1547 | } |
---|
1442 | | - if (vmemmap_populate_basepages(addr, next, node)) |
---|
| 1548 | + if (vmemmap_populate_basepages(addr, next, node, NULL)) |
---|
1443 | 1549 | return -ENOMEM; |
---|
1444 | 1550 | } |
---|
1445 | 1551 | return 0; |
---|
.. | .. |
---|
1450 | 1556 | { |
---|
1451 | 1557 | int err; |
---|
1452 | 1558 | |
---|
1453 | | - if (boot_cpu_has(X86_FEATURE_PSE)) |
---|
| 1559 | + if (end - start < PAGES_PER_SECTION * sizeof(struct page)) |
---|
| 1560 | + err = vmemmap_populate_basepages(start, end, node, NULL); |
---|
| 1561 | + else if (boot_cpu_has(X86_FEATURE_PSE)) |
---|
1454 | 1562 | err = vmemmap_populate_hugepages(start, end, node, altmap); |
---|
1455 | 1563 | else if (altmap) { |
---|
1456 | 1564 | pr_err_once("%s: no cpu support for altmap allocations\n", |
---|
1457 | 1565 | __func__); |
---|
1458 | 1566 | err = -ENOMEM; |
---|
1459 | 1567 | } else |
---|
1460 | | - err = vmemmap_populate_basepages(start, end, node); |
---|
| 1568 | + err = vmemmap_populate_basepages(start, end, node, NULL); |
---|
1461 | 1569 | if (!err) |
---|
1462 | 1570 | sync_global_pgds(start, end - 1); |
---|
1463 | 1571 | return err; |
---|