hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/arch/x86/mm/init_64.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * linux/arch/x86_64/mm/init.c
34 *
....@@ -20,7 +21,6 @@
2021 #include <linux/init.h>
2122 #include <linux/initrd.h>
2223 #include <linux/pagemap.h>
23
-#include <linux/bootmem.h>
2424 #include <linux/memblock.h>
2525 #include <linux/proc_fs.h>
2626 #include <linux/pci.h>
....@@ -37,7 +37,6 @@
3737 #include <asm/processor.h>
3838 #include <asm/bios_ebda.h>
3939 #include <linux/uaccess.h>
40
-#include <asm/pgtable.h>
4140 #include <asm/pgalloc.h>
4241 #include <asm/dma.h>
4342 #include <asm/fixmap.h>
....@@ -54,10 +53,42 @@
5453 #include <asm/init.h>
5554 #include <asm/uv/uv.h>
5655 #include <asm/setup.h>
56
+#include <asm/ftrace.h>
5757
5858 #include "mm_internal.h"
5959
6060 #include "ident_map.c"
61
+
62
+#define DEFINE_POPULATE(fname, type1, type2, init) \
63
+static inline void fname##_init(struct mm_struct *mm, \
64
+ type1##_t *arg1, type2##_t *arg2, bool init) \
65
+{ \
66
+ if (init) \
67
+ fname##_safe(mm, arg1, arg2); \
68
+ else \
69
+ fname(mm, arg1, arg2); \
70
+}
71
+
72
+DEFINE_POPULATE(p4d_populate, p4d, pud, init)
73
+DEFINE_POPULATE(pgd_populate, pgd, p4d, init)
74
+DEFINE_POPULATE(pud_populate, pud, pmd, init)
75
+DEFINE_POPULATE(pmd_populate_kernel, pmd, pte, init)
76
+
77
+#define DEFINE_ENTRY(type1, type2, init) \
78
+static inline void set_##type1##_init(type1##_t *arg1, \
79
+ type2##_t arg2, bool init) \
80
+{ \
81
+ if (init) \
82
+ set_##type1##_safe(arg1, arg2); \
83
+ else \
84
+ set_##type1(arg1, arg2); \
85
+}
86
+
87
+DEFINE_ENTRY(p4d, p4d, init)
88
+DEFINE_ENTRY(pud, pud, init)
89
+DEFINE_ENTRY(pmd, pmd, init)
90
+DEFINE_ENTRY(pte, pte, init)
91
+
6192
6293 /*
6394 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
....@@ -162,8 +193,8 @@
162193 spin_lock(pgt_lock);
163194
164195 if (!p4d_none(*p4d_ref) && !p4d_none(*p4d))
165
- BUG_ON(p4d_page_vaddr(*p4d)
166
- != p4d_page_vaddr(*p4d_ref));
196
+ BUG_ON(p4d_pgtable(*p4d)
197
+ != p4d_pgtable(*p4d_ref));
167198
168199 if (p4d_none(*p4d))
169200 set_p4d(p4d, *p4d_ref);
....@@ -178,7 +209,7 @@
178209 * When memory was added make sure all the processes MM have
179210 * suitable PGD entries in the local PGD level page.
180211 */
181
-void sync_global_pgds(unsigned long start, unsigned long end)
212
+static void sync_global_pgds(unsigned long start, unsigned long end)
182213 {
183214 if (pgtable_l5_enabled())
184215 sync_global_pgds_l5(start, end);
....@@ -197,7 +228,7 @@
197228 if (after_bootmem)
198229 ptr = (void *) get_zeroed_page(GFP_ATOMIC);
199230 else
200
- ptr = alloc_bootmem_pages(PAGE_SIZE);
231
+ ptr = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
201232
202233 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) {
203234 panic("set_pte_phys: cannot allocate page data %s\n",
....@@ -267,7 +298,7 @@
267298 * It's enough to flush this one mapping.
268299 * (PGE mappings get flushed as well)
269300 */
270
- __flush_tlb_one_kernel(vaddr);
301
+ flush_tlb_one_kernel(vaddr);
271302 }
272303
273304 void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte)
....@@ -336,7 +367,7 @@
336367 pgprot_t prot;
337368
338369 pgprot_val(prot) = pgprot_val(PAGE_KERNEL_LARGE) |
339
- pgprot_val(pgprot_4k_2_large(cachemode2pgprot(cache)));
370
+ protval_4k_2_large(cachemode2protval(cache));
340371 BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK));
341372 for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
342373 pgd = pgd_offset_k((unsigned long)__va(phys));
....@@ -415,7 +446,7 @@
415446 */
416447 static unsigned long __meminit
417448 phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
418
- pgprot_t prot)
449
+ pgprot_t prot, bool init)
419450 {
420451 unsigned long pages = 0, paddr_next;
421452 unsigned long paddr_last = paddr_end;
....@@ -433,7 +464,7 @@
433464 E820_TYPE_RAM) &&
434465 !e820__mapped_any(paddr & PAGE_MASK, paddr_next,
435466 E820_TYPE_RESERVED_KERN))
436
- set_pte(pte, __pte(0));
467
+ set_pte_init(pte, __pte(0), init);
437468 continue;
438469 }
439470
....@@ -453,7 +484,7 @@
453484 pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr,
454485 pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
455486 pages++;
456
- set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
487
+ set_pte_init(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), init);
457488 paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
458489 }
459490
....@@ -469,7 +500,7 @@
469500 */
470501 static unsigned long __meminit
471502 phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
472
- unsigned long page_size_mask, pgprot_t prot)
503
+ unsigned long page_size_mask, pgprot_t prot, bool init)
473504 {
474505 unsigned long pages = 0, paddr_next;
475506 unsigned long paddr_last = paddr_end;
....@@ -488,7 +519,7 @@
488519 E820_TYPE_RAM) &&
489520 !e820__mapped_any(paddr & PMD_MASK, paddr_next,
490521 E820_TYPE_RESERVED_KERN))
491
- set_pmd(pmd, __pmd(0));
522
+ set_pmd_init(pmd, __pmd(0), init);
492523 continue;
493524 }
494525
....@@ -497,7 +528,8 @@
497528 spin_lock(&init_mm.page_table_lock);
498529 pte = (pte_t *)pmd_page_vaddr(*pmd);
499530 paddr_last = phys_pte_init(pte, paddr,
500
- paddr_end, prot);
531
+ paddr_end, prot,
532
+ init);
501533 spin_unlock(&init_mm.page_table_lock);
502534 continue;
503535 }
....@@ -525,19 +557,20 @@
525557 if (page_size_mask & (1<<PG_LEVEL_2M)) {
526558 pages++;
527559 spin_lock(&init_mm.page_table_lock);
528
- set_pte((pte_t *)pmd,
529
- pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
530
- __pgprot(pgprot_val(prot) | _PAGE_PSE)));
560
+ set_pte_init((pte_t *)pmd,
561
+ pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
562
+ __pgprot(pgprot_val(prot) | _PAGE_PSE)),
563
+ init);
531564 spin_unlock(&init_mm.page_table_lock);
532565 paddr_last = paddr_next;
533566 continue;
534567 }
535568
536569 pte = alloc_low_page();
537
- paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot);
570
+ paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init);
538571
539572 spin_lock(&init_mm.page_table_lock);
540
- pmd_populate_kernel(&init_mm, pmd, pte);
573
+ pmd_populate_kernel_init(&init_mm, pmd, pte, init);
541574 spin_unlock(&init_mm.page_table_lock);
542575 }
543576 update_page_count(PG_LEVEL_2M, pages);
....@@ -552,7 +585,7 @@
552585 */
553586 static unsigned long __meminit
554587 phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
555
- unsigned long page_size_mask)
588
+ unsigned long page_size_mask, pgprot_t _prot, bool init)
556589 {
557590 unsigned long pages = 0, paddr_next;
558591 unsigned long paddr_last = paddr_end;
....@@ -562,7 +595,7 @@
562595 for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) {
563596 pud_t *pud;
564597 pmd_t *pmd;
565
- pgprot_t prot = PAGE_KERNEL;
598
+ pgprot_t prot = _prot;
566599
567600 vaddr = (unsigned long)__va(paddr);
568601 pud = pud_page + pud_index(vaddr);
....@@ -574,7 +607,7 @@
574607 E820_TYPE_RAM) &&
575608 !e820__mapped_any(paddr & PUD_MASK, paddr_next,
576609 E820_TYPE_RESERVED_KERN))
577
- set_pud(pud, __pud(0));
610
+ set_pud_init(pud, __pud(0), init);
578611 continue;
579612 }
580613
....@@ -584,7 +617,7 @@
584617 paddr_last = phys_pmd_init(pmd, paddr,
585618 paddr_end,
586619 page_size_mask,
587
- prot);
620
+ prot, init);
588621 continue;
589622 }
590623 /*
....@@ -611,9 +644,13 @@
611644 if (page_size_mask & (1<<PG_LEVEL_1G)) {
612645 pages++;
613646 spin_lock(&init_mm.page_table_lock);
614
- set_pte((pte_t *)pud,
615
- pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
616
- PAGE_KERNEL_LARGE));
647
+
648
+ prot = __pgprot(pgprot_val(prot) | _PAGE_PSE);
649
+
650
+ set_pte_init((pte_t *)pud,
651
+ pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
652
+ prot),
653
+ init);
617654 spin_unlock(&init_mm.page_table_lock);
618655 paddr_last = paddr_next;
619656 continue;
....@@ -621,10 +658,10 @@
621658
622659 pmd = alloc_low_page();
623660 paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
624
- page_size_mask, prot);
661
+ page_size_mask, prot, init);
625662
626663 spin_lock(&init_mm.page_table_lock);
627
- pud_populate(&init_mm, pud, pmd);
664
+ pud_populate_init(&init_mm, pud, pmd, init);
628665 spin_unlock(&init_mm.page_table_lock);
629666 }
630667
....@@ -635,62 +672,60 @@
635672
636673 static unsigned long __meminit
637674 phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
638
- unsigned long page_size_mask)
675
+ unsigned long page_size_mask, pgprot_t prot, bool init)
639676 {
640
- unsigned long paddr_next, paddr_last = paddr_end;
641
- unsigned long vaddr = (unsigned long)__va(paddr);
642
- int i = p4d_index(vaddr);
677
+ unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last;
678
+
679
+ paddr_last = paddr_end;
680
+ vaddr = (unsigned long)__va(paddr);
681
+ vaddr_end = (unsigned long)__va(paddr_end);
643682
644683 if (!pgtable_l5_enabled())
645
- return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
684
+ return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
685
+ page_size_mask, prot, init);
646686
647
- for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
648
- p4d_t *p4d;
687
+ for (; vaddr < vaddr_end; vaddr = vaddr_next) {
688
+ p4d_t *p4d = p4d_page + p4d_index(vaddr);
649689 pud_t *pud;
650690
651
- vaddr = (unsigned long)__va(paddr);
652
- p4d = p4d_page + p4d_index(vaddr);
653
- paddr_next = (paddr & P4D_MASK) + P4D_SIZE;
691
+ vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE;
692
+ paddr = __pa(vaddr);
654693
655694 if (paddr >= paddr_end) {
695
+ paddr_next = __pa(vaddr_next);
656696 if (!after_bootmem &&
657697 !e820__mapped_any(paddr & P4D_MASK, paddr_next,
658698 E820_TYPE_RAM) &&
659699 !e820__mapped_any(paddr & P4D_MASK, paddr_next,
660700 E820_TYPE_RESERVED_KERN))
661
- set_p4d(p4d, __p4d(0));
701
+ set_p4d_init(p4d, __p4d(0), init);
662702 continue;
663703 }
664704
665705 if (!p4d_none(*p4d)) {
666706 pud = pud_offset(p4d, 0);
667
- paddr_last = phys_pud_init(pud, paddr,
668
- paddr_end,
669
- page_size_mask);
707
+ paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
708
+ page_size_mask, prot, init);
670709 continue;
671710 }
672711
673712 pud = alloc_low_page();
674
- paddr_last = phys_pud_init(pud, paddr, paddr_end,
675
- page_size_mask);
713
+ paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
714
+ page_size_mask, prot, init);
676715
677716 spin_lock(&init_mm.page_table_lock);
678
- p4d_populate(&init_mm, p4d, pud);
717
+ p4d_populate_init(&init_mm, p4d, pud, init);
679718 spin_unlock(&init_mm.page_table_lock);
680719 }
681720
682721 return paddr_last;
683722 }
684723
685
-/*
686
- * Create page table mapping for the physical memory for specific physical
687
- * addresses. The virtual and physical addresses have to be aligned on PMD level
688
- * down. It returns the last physical address mapped.
689
- */
690
-unsigned long __meminit
691
-kernel_physical_mapping_init(unsigned long paddr_start,
692
- unsigned long paddr_end,
693
- unsigned long page_size_mask)
724
+static unsigned long __meminit
725
+__kernel_physical_mapping_init(unsigned long paddr_start,
726
+ unsigned long paddr_end,
727
+ unsigned long page_size_mask,
728
+ pgprot_t prot, bool init)
694729 {
695730 bool pgd_changed = false;
696731 unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
....@@ -710,19 +745,22 @@
710745 p4d = (p4d_t *)pgd_page_vaddr(*pgd);
711746 paddr_last = phys_p4d_init(p4d, __pa(vaddr),
712747 __pa(vaddr_end),
713
- page_size_mask);
748
+ page_size_mask,
749
+ prot, init);
714750 continue;
715751 }
716752
717753 p4d = alloc_low_page();
718754 paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
719
- page_size_mask);
755
+ page_size_mask, prot, init);
720756
721757 spin_lock(&init_mm.page_table_lock);
722758 if (pgtable_l5_enabled())
723
- pgd_populate(&init_mm, pgd, p4d);
759
+ pgd_populate_init(&init_mm, pgd, p4d, init);
724760 else
725
- p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
761
+ p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr),
762
+ (pud_t *) p4d, init);
763
+
726764 spin_unlock(&init_mm.page_table_lock);
727765 pgd_changed = true;
728766 }
....@@ -731,6 +769,38 @@
731769 sync_global_pgds(vaddr_start, vaddr_end - 1);
732770
733771 return paddr_last;
772
+}
773
+
774
+
775
+/*
776
+ * Create page table mapping for the physical memory for specific physical
777
+ * addresses. Note that it can only be used to populate non-present entries.
778
+ * The virtual and physical addresses have to be aligned on PMD level
779
+ * down. It returns the last physical address mapped.
780
+ */
781
+unsigned long __meminit
782
+kernel_physical_mapping_init(unsigned long paddr_start,
783
+ unsigned long paddr_end,
784
+ unsigned long page_size_mask, pgprot_t prot)
785
+{
786
+ return __kernel_physical_mapping_init(paddr_start, paddr_end,
787
+ page_size_mask, prot, true);
788
+}
789
+
790
+/*
791
+ * This function is similar to kernel_physical_mapping_init() above with the
792
+ * exception that it uses set_{pud,pmd}() instead of the set_{pud,pte}_safe()
793
+ * when updating the mapping. The caller is responsible to flush the TLBs after
794
+ * the function returns.
795
+ */
796
+unsigned long __meminit
797
+kernel_physical_mapping_change(unsigned long paddr_start,
798
+ unsigned long paddr_end,
799
+ unsigned long page_size_mask)
800
+{
801
+ return __kernel_physical_mapping_init(paddr_start, paddr_end,
802
+ page_size_mask, PAGE_KERNEL,
803
+ false);
734804 }
735805
736806 #ifndef CONFIG_NUMA
....@@ -742,7 +812,6 @@
742812
743813 void __init paging_init(void)
744814 {
745
- sparse_memory_present_with_active_regions(MAX_NUMNODES);
746815 sparse_init();
747816
748817 /*
....@@ -752,8 +821,7 @@
752821 * will not set it back.
753822 */
754823 node_clear_state(0, N_MEMORY);
755
- if (N_MEMORY != N_NORMAL_MEMORY)
756
- node_clear_state(0, N_NORMAL_MEMORY);
824
+ node_clear_state(0, N_NORMAL_MEMORY);
757825
758826 zone_sizes_init();
759827 }
....@@ -778,11 +846,11 @@
778846 }
779847
780848 int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
781
- struct vmem_altmap *altmap, bool want_memblock)
849
+ struct mhp_params *params)
782850 {
783851 int ret;
784852
785
- ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
853
+ ret = __add_pages(nid, start_pfn, nr_pages, params);
786854 WARN_ON_ONCE(ret);
787855
788856 /* update max_pfn, max_low_pfn and high_memory */
....@@ -792,15 +860,15 @@
792860 return ret;
793861 }
794862
795
-int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
796
- bool want_memblock)
863
+int arch_add_memory(int nid, u64 start, u64 size,
864
+ struct mhp_params *params)
797865 {
798866 unsigned long start_pfn = start >> PAGE_SHIFT;
799867 unsigned long nr_pages = size >> PAGE_SHIFT;
800868
801
- init_memory_mapping(start, start + size);
869
+ init_memory_mapping(start, start + size, params->pgprot);
802870
803
- return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
871
+ return add_pages(nid, start_pfn, nr_pages, params);
804872 }
805873
806874 #define PAGE_INUSE 0xFD
....@@ -1164,6 +1232,56 @@
11641232 #endif
11651233 }
11661234
1235
+/*
1236
+ * Pre-allocates page-table pages for the vmalloc area in the kernel page-table.
1237
+ * Only the level which needs to be synchronized between all page-tables is
1238
+ * allocated because the synchronization can be expensive.
1239
+ */
1240
+static void __init preallocate_vmalloc_pages(void)
1241
+{
1242
+ unsigned long addr;
1243
+ const char *lvl;
1244
+
1245
+ for (addr = VMALLOC_START; addr <= VMALLOC_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
1246
+ pgd_t *pgd = pgd_offset_k(addr);
1247
+ p4d_t *p4d;
1248
+ pud_t *pud;
1249
+
1250
+ lvl = "p4d";
1251
+ p4d = p4d_alloc(&init_mm, pgd, addr);
1252
+ if (!p4d)
1253
+ goto failed;
1254
+
1255
+ if (pgtable_l5_enabled())
1256
+ continue;
1257
+
1258
+ /*
1259
+ * The goal here is to allocate all possibly required
1260
+ * hardware page tables pointed to by the top hardware
1261
+ * level.
1262
+ *
1263
+ * On 4-level systems, the P4D layer is folded away and
1264
+ * the above code does no preallocation. Below, go down
1265
+ * to the pud _software_ level to ensure the second
1266
+ * hardware level is allocated on 4-level systems too.
1267
+ */
1268
+ lvl = "pud";
1269
+ pud = pud_alloc(&init_mm, p4d, addr);
1270
+ if (!pud)
1271
+ goto failed;
1272
+ }
1273
+
1274
+ return;
1275
+
1276
+failed:
1277
+
1278
+ /*
1279
+ * The pages have to be there now or they will be missing in
1280
+ * process page-tables later.
1281
+ */
1282
+ panic("Failed to pre-allocate %s pages for vmalloc area\n", lvl);
1283
+}
1284
+
11671285 void __init mem_init(void)
11681286 {
11691287 pci_iommu_alloc();
....@@ -1171,14 +1289,14 @@
11711289 /* clear_bss() already clear the empty_zero_page */
11721290
11731291 /* this will put all memory onto the freelists */
1174
- free_all_bootmem();
1292
+ memblock_free_all();
11751293 after_bootmem = 1;
11761294 x86_init.hyper.init_after_bootmem();
11771295
11781296 /*
11791297 * Must be done after boot memory is put on freelist, because here we
11801298 * might set fields in deferred struct pages that have not yet been
1181
- * initialized, and free_all_bootmem() initializes all the reserved
1299
+ * initialized, and memblock_free_all() initializes all the reserved
11821300 * deferred pages for us.
11831301 */
11841302 register_page_bootmem_info();
....@@ -1187,54 +1305,32 @@
11871305 if (get_gate_vma(&init_mm))
11881306 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);
11891307
1308
+ preallocate_vmalloc_pages();
1309
+
11901310 mem_init_print_info(NULL);
11911311 }
11921312
1313
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
1314
+int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask)
1315
+{
1316
+ /*
1317
+ * More CPUs always led to greater speedups on tested systems, up to
1318
+ * all the nodes' CPUs. Use all since the system is otherwise idle
1319
+ * now.
1320
+ */
1321
+ return max_t(int, cpumask_weight(node_cpumask), 1);
1322
+}
1323
+#endif
1324
+
11931325 int kernel_set_to_readonly;
1194
-
1195
-void set_kernel_text_rw(void)
1196
-{
1197
- unsigned long start = PFN_ALIGN(_text);
1198
- unsigned long end = PFN_ALIGN(_etext);
1199
-
1200
- if (!kernel_set_to_readonly)
1201
- return;
1202
-
1203
- pr_debug("Set kernel text: %lx - %lx for read write\n",
1204
- start, end);
1205
-
1206
- /*
1207
- * Make the kernel identity mapping for text RW. Kernel text
1208
- * mapping will always be RO. Refer to the comment in
1209
- * static_protections() in pageattr.c
1210
- */
1211
- set_memory_rw(start, (end - start) >> PAGE_SHIFT);
1212
-}
1213
-
1214
-void set_kernel_text_ro(void)
1215
-{
1216
- unsigned long start = PFN_ALIGN(_text);
1217
- unsigned long end = PFN_ALIGN(_etext);
1218
-
1219
- if (!kernel_set_to_readonly)
1220
- return;
1221
-
1222
- pr_debug("Set kernel text: %lx - %lx for read only\n",
1223
- start, end);
1224
-
1225
- /*
1226
- * Set the kernel identity mapping for text RO.
1227
- */
1228
- set_memory_ro(start, (end - start) >> PAGE_SHIFT);
1229
-}
12301326
12311327 void mark_rodata_ro(void)
12321328 {
12331329 unsigned long start = PFN_ALIGN(_text);
12341330 unsigned long rodata_start = PFN_ALIGN(__start_rodata);
1235
- unsigned long end = (unsigned long) &__end_rodata_hpage_align;
1236
- unsigned long text_end = PFN_ALIGN(&_etext);
1237
- unsigned long rodata_end = PFN_ALIGN(&__end_rodata);
1331
+ unsigned long end = (unsigned long)__end_rodata_hpage_align;
1332
+ unsigned long text_end = PFN_ALIGN(_etext);
1333
+ unsigned long rodata_end = PFN_ALIGN(__end_rodata);
12381334 unsigned long all_end;
12391335
12401336 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
....@@ -1258,6 +1354,8 @@
12581354 all_end = roundup((unsigned long)_brk_end, PMD_SIZE);
12591355 set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT);
12601356
1357
+ set_ftrace_ops_ro();
1358
+
12611359 #ifdef CONFIG_CPA_DEBUG
12621360 printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end);
12631361 set_memory_rw(start, (end-start) >> PAGE_SHIFT);
....@@ -1266,8 +1364,10 @@
12661364 set_memory_ro(start, (end-start) >> PAGE_SHIFT);
12671365 #endif
12681366
1269
- free_kernel_image_pages((void *)text_end, (void *)rodata_start);
1270
- free_kernel_image_pages((void *)rodata_end, (void *)_sdata);
1367
+ free_kernel_image_pages("unused kernel image (text/rodata gap)",
1368
+ (void *)text_end, (void *)rodata_start);
1369
+ free_kernel_image_pages("unused kernel image (rodata/data gap)",
1370
+ (void *)rodata_end, (void *)_sdata);
12711371
12721372 debug_checkwx();
12731373 }
....@@ -1352,6 +1452,15 @@
13521452 goto done;
13531453 }
13541454
1455
+ /*
1456
+ * Use max block size to minimize overhead on bare metal, where
1457
+ * alignment for memory hotplug isn't a concern.
1458
+ */
1459
+ if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
1460
+ bz = MAX_BLOCK_SIZE;
1461
+ goto done;
1462
+ }
1463
+
13551464 /* Find the largest allowed block size that aligns to memory end */
13561465 for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) {
13571466 if (IS_ALIGNED(boot_mem_end, bz))
....@@ -1409,10 +1518,7 @@
14091518 if (pmd_none(*pmd)) {
14101519 void *p;
14111520
1412
- if (altmap)
1413
- p = altmap_alloc_block_buf(PMD_SIZE, altmap);
1414
- else
1415
- p = vmemmap_alloc_block_buf(PMD_SIZE, node);
1521
+ p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
14161522 if (p) {
14171523 pte_t entry;
14181524
....@@ -1439,7 +1545,7 @@
14391545 vmemmap_verify((pte_t *)pmd, node, addr, next);
14401546 continue;
14411547 }
1442
- if (vmemmap_populate_basepages(addr, next, node))
1548
+ if (vmemmap_populate_basepages(addr, next, node, NULL))
14431549 return -ENOMEM;
14441550 }
14451551 return 0;
....@@ -1450,14 +1556,16 @@
14501556 {
14511557 int err;
14521558
1453
- if (boot_cpu_has(X86_FEATURE_PSE))
1559
+ if (end - start < PAGES_PER_SECTION * sizeof(struct page))
1560
+ err = vmemmap_populate_basepages(start, end, node, NULL);
1561
+ else if (boot_cpu_has(X86_FEATURE_PSE))
14541562 err = vmemmap_populate_hugepages(start, end, node, altmap);
14551563 else if (altmap) {
14561564 pr_err_once("%s: no cpu support for altmap allocations\n",
14571565 __func__);
14581566 err = -ENOMEM;
14591567 } else
1460
- err = vmemmap_populate_basepages(start, end, node);
1568
+ err = vmemmap_populate_basepages(start, end, node, NULL);
14611569 if (!err)
14621570 sync_global_pgds(start, end - 1);
14631571 return err;