forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-11 072de836f53be56a70cecf70b43ae43b7ce17376
kernel/arch/x86/mm/init.c
....@@ -3,10 +3,10 @@
33 #include <linux/ioport.h>
44 #include <linux/swap.h>
55 #include <linux/memblock.h>
6
-#include <linux/bootmem.h> /* for max_low_pfn */
76 #include <linux/swapfile.h>
87 #include <linux/swapops.h>
98 #include <linux/kmemleak.h>
9
+#include <linux/sched/task.h>
1010
1111 #include <asm/set_memory.h>
1212 #include <asm/e820/api.h>
....@@ -24,6 +24,8 @@
2424 #include <asm/hypervisor.h>
2525 #include <asm/cpufeature.h>
2626 #include <asm/pti.h>
27
+#include <asm/text-patching.h>
28
+#include <asm/memtype.h>
2729
2830 /*
2931 * We need to define the tracepoints somewhere, and tlb.c
....@@ -48,7 +50,7 @@
4850 * Index into __pte2cachemode_tbl[] are the caching attribute bits of the pte
4951 * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2.
5052 */
51
-uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = {
53
+static uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = {
5254 [_PAGE_CACHE_MODE_WB ] = 0 | 0 ,
5355 [_PAGE_CACHE_MODE_WC ] = 0 | _PAGE_PCD,
5456 [_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD,
....@@ -56,9 +58,16 @@
5658 [_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD,
5759 [_PAGE_CACHE_MODE_WP ] = 0 | _PAGE_PCD,
5860 };
59
-EXPORT_SYMBOL(__cachemode2pte_tbl);
6061
61
-uint8_t __pte2cachemode_tbl[8] = {
62
+unsigned long cachemode2protval(enum page_cache_mode pcm)
63
+{
64
+ if (likely(pcm == 0))
65
+ return 0;
66
+ return __cachemode2pte_tbl[pcm];
67
+}
68
+EXPORT_SYMBOL(cachemode2protval);
69
+
70
+static uint8_t __pte2cachemode_tbl[8] = {
6271 [__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB,
6372 [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_UC_MINUS,
6473 [__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS,
....@@ -68,7 +77,32 @@
6877 [__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
6978 [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
7079 };
71
-EXPORT_SYMBOL(__pte2cachemode_tbl);
80
+
81
+/*
82
+ * Check that the write-protect PAT entry is set for write-protect.
83
+ * To do this without making assumptions how PAT has been set up (Xen has
84
+ * another layout than the kernel), translate the _PAGE_CACHE_MODE_WP cache
85
+ * mode via the __cachemode2pte_tbl[] into protection bits (those protection
86
+ * bits will select a cache mode of WP or better), and then translate the
87
+ * protection bits back into the cache mode using __pte2cm_idx() and the
88
+ * __pte2cachemode_tbl[] array. This will return the really used cache mode.
89
+ */
90
+bool x86_has_pat_wp(void)
91
+{
92
+ uint16_t prot = __cachemode2pte_tbl[_PAGE_CACHE_MODE_WP];
93
+
94
+ return __pte2cachemode_tbl[__pte2cm_idx(prot)] == _PAGE_CACHE_MODE_WP;
95
+}
96
+
97
+enum page_cache_mode pgprot2cachemode(pgprot_t pgprot)
98
+{
99
+ unsigned long masked;
100
+
101
+ masked = pgprot_val(pgprot) & _PAGE_CACHE_MASK;
102
+ if (likely(masked == 0))
103
+ return 0;
104
+ return __pte2cachemode_tbl[__pte2cm_idx(masked)];
105
+}
72106
73107 static unsigned long __initdata pgt_buf_start;
74108 static unsigned long __initdata pgt_buf_end;
....@@ -77,6 +111,12 @@
77111 static unsigned long min_pfn_mapped;
78112
79113 static bool __initdata can_use_brk_pgt = true;
114
+
115
+/*
116
+ * Provide a run-time mean of disabling ZONE_DMA32 if it is enabled via
117
+ * CONFIG_ZONE_DMA32.
118
+ */
119
+static bool disable_dma32 __ro_after_init;
80120
81121 /*
82122 * Pages returned are already directly mapped.
....@@ -168,6 +208,19 @@
168208 };
169209
170210 static int page_size_mask;
211
+
212
+/*
213
+ * Save some of cr4 feature set we're using (e.g. Pentium 4MB
214
+ * enable and PPro Global page enable), so that any CPU's that boot
215
+ * up after us can get the correct flags. Invoked on the boot CPU.
216
+ */
217
+static inline void cr4_set_bits_and_update_boot(unsigned long mask)
218
+{
219
+ mmu_cr4_features |= mask;
220
+ if (trampoline_cr4_features)
221
+ *trampoline_cr4_features = mmu_cr4_features;
222
+ cr4_set_bits(mask);
223
+}
171224
172225 static void __init probe_page_size_mask(void)
173226 {
....@@ -464,7 +517,7 @@
464517 * the physical memory. To access them they are temporarily mapped.
465518 */
466519 unsigned long __ref init_memory_mapping(unsigned long start,
467
- unsigned long end)
520
+ unsigned long end, pgprot_t prot)
468521 {
469522 struct map_range mr[NR_RANGE_MR];
470523 unsigned long ret = 0;
....@@ -478,7 +531,8 @@
478531
479532 for (i = 0; i < nr_range; i++)
480533 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
481
- mr[i].page_size_mask);
534
+ mr[i].page_size_mask,
535
+ prot);
482536
483537 add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT);
484538
....@@ -518,7 +572,7 @@
518572 */
519573 can_use_brk_pgt = max(start, (u64)pgt_buf_end<<PAGE_SHIFT) >=
520574 min(end, (u64)pgt_buf_top<<PAGE_SHIFT);
521
- init_memory_mapping(start, end);
575
+ init_memory_mapping(start, end, PAGE_KERNEL);
522576 mapped_ram_size += end - start;
523577 can_use_brk_pgt = true;
524578 }
....@@ -643,6 +697,28 @@
643697 }
644698 }
645699
700
+/*
701
+ * The real mode trampoline, which is required for bootstrapping CPUs
702
+ * occupies only a small area under the low 1MB. See reserve_real_mode()
703
+ * for details.
704
+ *
705
+ * If KASLR is disabled the first PGD entry of the direct mapping is copied
706
+ * to map the real mode trampoline.
707
+ *
708
+ * If KASLR is enabled, copy only the PUD which covers the low 1MB
709
+ * area. This limits the randomization granularity to 1GB for both 4-level
710
+ * and 5-level paging.
711
+ */
712
+static void __init init_trampoline(void)
713
+{
714
+#ifdef CONFIG_X86_64
715
+ if (!kaslr_memory_enabled())
716
+ trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
717
+ else
718
+ init_trampoline_kaslr();
719
+#endif
720
+}
721
+
646722 void __init init_mem_mapping(void)
647723 {
648724 unsigned long end;
....@@ -658,7 +734,7 @@
658734 #endif
659735
660736 /* the ISA range is always mapped regardless of memory holes */
661
- init_memory_mapping(0, ISA_END_ADDRESS);
737
+ init_memory_mapping(0, ISA_END_ADDRESS, PAGE_KERNEL);
662738
663739 /* Init the trampoline, possibly with KASLR memory offset */
664740 init_trampoline();
....@@ -698,6 +774,41 @@
698774 x86_init.hyper.init_mem_mapping();
699775
700776 early_memtest(0, max_pfn_mapped << PAGE_SHIFT);
777
+}
778
+
779
+/*
780
+ * Initialize an mm_struct to be used during poking and a pointer to be used
781
+ * during patching.
782
+ */
783
+void __init poking_init(void)
784
+{
785
+ spinlock_t *ptl;
786
+ pte_t *ptep;
787
+
788
+ poking_mm = copy_init_mm();
789
+ BUG_ON(!poking_mm);
790
+
791
+ /*
792
+ * Randomize the poking address, but make sure that the following page
793
+ * will be mapped at the same PMD. We need 2 pages, so find space for 3,
794
+ * and adjust the address if the PMD ends after the first one.
795
+ */
796
+ poking_addr = TASK_UNMAPPED_BASE;
797
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
798
+ poking_addr += (kaslr_get_random_long("Poking") & PAGE_MASK) %
799
+ (TASK_SIZE - TASK_UNMAPPED_BASE - 3 * PAGE_SIZE);
800
+
801
+ if (((poking_addr + PAGE_SIZE) & ~PMD_MASK) == 0)
802
+ poking_addr += PAGE_SIZE;
803
+
804
+ /*
805
+ * We need to trigger the allocation of the page-tables that will be
806
+ * needed for poking now. Later, poking may be performed in an atomic
807
+ * section, which might cause allocation to fail.
808
+ */
809
+ ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
810
+ BUG_ON(!ptep);
811
+ pte_unmap_unlock(ptep, ptl);
701812 }
702813
703814 /*
....@@ -742,7 +853,7 @@
742853 return 1;
743854 }
744855
745
-void free_init_pages(char *what, unsigned long begin, unsigned long end)
856
+void free_init_pages(const char *what, unsigned long begin, unsigned long end)
746857 {
747858 unsigned long begin_aligned, end_aligned;
748859
....@@ -791,14 +902,13 @@
791902 * used for the kernel image only. free_init_pages() will do the
792903 * right thing for either kind of address.
793904 */
794
-void free_kernel_image_pages(void *begin, void *end)
905
+void free_kernel_image_pages(const char *what, void *begin, void *end)
795906 {
796907 unsigned long begin_ul = (unsigned long)begin;
797908 unsigned long end_ul = (unsigned long)end;
798909 unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT;
799910
800
-
801
- free_init_pages("unused kernel image", begin_ul, end_ul);
911
+ free_init_pages(what, begin_ul, end_ul);
802912
803913 /*
804914 * PTI maps some of the kernel into userspace. For performance,
....@@ -819,15 +929,14 @@
819929 set_memory_np_noalias(begin_ul, len_pages);
820930 }
821931
822
-void __weak mem_encrypt_free_decrypted_mem(void) { }
823
-
824932 void __ref free_initmem(void)
825933 {
826934 e820__reallocate_tables();
827935
828936 mem_encrypt_free_decrypted_mem();
829937
830
- free_kernel_image_pages(&__init_begin, &__init_end);
938
+ free_kernel_image_pages("unused kernel image (initmem)",
939
+ &__init_begin, &__init_end);
831940 }
832941
833942 #ifdef CONFIG_BLK_DEV_INITRD
....@@ -903,22 +1012,33 @@
9031012 max_zone_pfns[ZONE_DMA] = min(MAX_DMA_PFN, max_low_pfn);
9041013 #endif
9051014 #ifdef CONFIG_ZONE_DMA32
906
- max_zone_pfns[ZONE_DMA32] = min(MAX_DMA32_PFN, max_low_pfn);
1015
+ max_zone_pfns[ZONE_DMA32] = disable_dma32 ? 0 : min(MAX_DMA32_PFN, max_low_pfn);
9071016 #endif
9081017 max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
9091018 #ifdef CONFIG_HIGHMEM
9101019 max_zone_pfns[ZONE_HIGHMEM] = max_pfn;
9111020 #endif
9121021
913
- free_area_init_nodes(max_zone_pfns);
1022
+ free_area_init(max_zone_pfns);
9141023 }
1024
+
1025
+static int __init early_disable_dma32(char *buf)
1026
+{
1027
+ if (!buf)
1028
+ return -EINVAL;
1029
+
1030
+ if (!strcmp(buf, "on"))
1031
+ disable_dma32 = true;
1032
+
1033
+ return 0;
1034
+}
1035
+early_param("disable_dma32", early_disable_dma32);
9151036
9161037 __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
9171038 .loaded_mm = &init_mm,
9181039 .next_asid = 1,
9191040 .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
9201041 };
921
-EXPORT_PER_CPU_SYMBOL(cpu_tlbstate);
9221042
9231043 void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
9241044 {