From 9999e48639b3cecb08ffb37358bcba3b48161b29 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Fri, 10 May 2024 08:50:17 +0000 Subject: [PATCH] add ax88772_rst --- kernel/arch/x86/mm/init.c | 188 ++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 169 insertions(+), 19 deletions(-) diff --git a/kernel/arch/x86/mm/init.c b/kernel/arch/x86/mm/init.c index b1dba09..8f67fed 100644 --- a/kernel/arch/x86/mm/init.c +++ b/kernel/arch/x86/mm/init.c @@ -3,12 +3,14 @@ #include <linux/ioport.h> #include <linux/swap.h> #include <linux/memblock.h> -#include <linux/bootmem.h> /* for max_low_pfn */ #include <linux/swapfile.h> #include <linux/swapops.h> #include <linux/kmemleak.h> +#include <linux/sched/task.h> +#include <linux/sched/mm.h> #include <asm/set_memory.h> +#include <asm/cpu_device_id.h> #include <asm/e820/api.h> #include <asm/init.h> #include <asm/page.h> @@ -24,6 +26,9 @@ #include <asm/hypervisor.h> #include <asm/cpufeature.h> #include <asm/pti.h> +#include <asm/text-patching.h> +#include <asm/memtype.h> +#include <asm/paravirt.h> /* * We need to define the tracepoints somewhere, and tlb.c @@ -48,7 +53,7 @@ * Index into __pte2cachemode_tbl[] are the caching attribute bits of the pte * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2. */ -uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { +static uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { [_PAGE_CACHE_MODE_WB ] = 0 | 0 , [_PAGE_CACHE_MODE_WC ] = 0 | _PAGE_PCD, [_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD, @@ -56,9 +61,16 @@ [_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD, [_PAGE_CACHE_MODE_WP ] = 0 | _PAGE_PCD, }; -EXPORT_SYMBOL(__cachemode2pte_tbl); -uint8_t __pte2cachemode_tbl[8] = { +unsigned long cachemode2protval(enum page_cache_mode pcm) +{ + if (likely(pcm == 0)) + return 0; + return __cachemode2pte_tbl[pcm]; +} +EXPORT_SYMBOL(cachemode2protval); + +static uint8_t __pte2cachemode_tbl[8] = { [__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB, [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, @@ -68,7 +80,32 @@ [__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, }; -EXPORT_SYMBOL(__pte2cachemode_tbl); + +/* + * Check that the write-protect PAT entry is set for write-protect. + * To do this without making assumptions how PAT has been set up (Xen has + * another layout than the kernel), translate the _PAGE_CACHE_MODE_WP cache + * mode via the __cachemode2pte_tbl[] into protection bits (those protection + * bits will select a cache mode of WP or better), and then translate the + * protection bits back into the cache mode using __pte2cm_idx() and the + * __pte2cachemode_tbl[] array. This will return the really used cache mode. + */ +bool x86_has_pat_wp(void) +{ + uint16_t prot = __cachemode2pte_tbl[_PAGE_CACHE_MODE_WP]; + + return __pte2cachemode_tbl[__pte2cm_idx(prot)] == _PAGE_CACHE_MODE_WP; +} + +enum page_cache_mode pgprot2cachemode(pgprot_t pgprot) +{ + unsigned long masked; + + masked = pgprot_val(pgprot) & _PAGE_CACHE_MASK; + if (likely(masked == 0)) + return 0; + return __pte2cachemode_tbl[__pte2cm_idx(masked)]; +} static unsigned long __initdata pgt_buf_start; static unsigned long __initdata pgt_buf_end; @@ -77,6 +114,12 @@ static unsigned long min_pfn_mapped; static bool __initdata can_use_brk_pgt = true; + +/* + * Provide a run-time mean of disabling ZONE_DMA32 if it is enabled via + * CONFIG_ZONE_DMA32. + */ +static bool disable_dma32 __ro_after_init; /* * Pages returned are already directly mapped. @@ -169,6 +212,19 @@ static int page_size_mask; +/* + * Save some of cr4 feature set we're using (e.g. Pentium 4MB + * enable and PPro Global page enable), so that any CPU's that boot + * up after us can get the correct flags. Invoked on the boot CPU. + */ +static inline void cr4_set_bits_and_update_boot(unsigned long mask) +{ + mmu_cr4_features |= mask; + if (trampoline_cr4_features) + *trampoline_cr4_features = mmu_cr4_features; + cr4_set_bits(mask); +} + static void __init probe_page_size_mask(void) { /* @@ -207,6 +263,24 @@ } } +#define INTEL_MATCH(_model) { .vendor = X86_VENDOR_INTEL, \ + .family = 6, \ + .model = _model, \ + } +/* + * INVLPG may not properly flush Global entries + * on these CPUs when PCIDs are enabled. + */ +static const struct x86_cpu_id invlpg_miss_ids[] = { + INTEL_MATCH(INTEL_FAM6_ALDERLAKE ), + INTEL_MATCH(INTEL_FAM6_ALDERLAKE_L ), + INTEL_MATCH(INTEL_FAM6_ALDERLAKE_N ), + INTEL_MATCH(INTEL_FAM6_RAPTORLAKE ), + INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_P), + INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_S), + {} +}; + static void setup_pcid(void) { if (!IS_ENABLED(CONFIG_X86_64)) @@ -214,6 +288,12 @@ if (!boot_cpu_has(X86_FEATURE_PCID)) return; + + if (x86_match_cpu(invlpg_miss_ids)) { + pr_info("Incomplete global flushes, disabling PCID"); + setup_clear_cpu_cap(X86_FEATURE_PCID); + return; + } if (boot_cpu_has(X86_FEATURE_PGE)) { /* @@ -464,7 +544,7 @@ * the physical memory. To access them they are temporarily mapped. */ unsigned long __ref init_memory_mapping(unsigned long start, - unsigned long end) + unsigned long end, pgprot_t prot) { struct map_range mr[NR_RANGE_MR]; unsigned long ret = 0; @@ -478,7 +558,8 @@ for (i = 0; i < nr_range; i++) ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, - mr[i].page_size_mask); + mr[i].page_size_mask, + prot); add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT); @@ -518,7 +599,7 @@ */ can_use_brk_pgt = max(start, (u64)pgt_buf_end<<PAGE_SHIFT) >= min(end, (u64)pgt_buf_top<<PAGE_SHIFT); - init_memory_mapping(start, end); + init_memory_mapping(start, end, PAGE_KERNEL); mapped_ram_size += end - start; can_use_brk_pgt = true; } @@ -643,6 +724,28 @@ } } +/* + * The real mode trampoline, which is required for bootstrapping CPUs + * occupies only a small area under the low 1MB. See reserve_real_mode() + * for details. + * + * If KASLR is disabled the first PGD entry of the direct mapping is copied + * to map the real mode trampoline. + * + * If KASLR is enabled, copy only the PUD which covers the low 1MB + * area. This limits the randomization granularity to 1GB for both 4-level + * and 5-level paging. + */ +static void __init init_trampoline(void) +{ +#ifdef CONFIG_X86_64 + if (!kaslr_memory_enabled()) + trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)]; + else + init_trampoline_kaslr(); +#endif +} + void __init init_mem_mapping(void) { unsigned long end; @@ -658,7 +761,7 @@ #endif /* the ISA range is always mapped regardless of memory holes */ - init_memory_mapping(0, ISA_END_ADDRESS); + init_memory_mapping(0, ISA_END_ADDRESS, PAGE_KERNEL); /* Init the trampoline, possibly with KASLR memory offset */ init_trampoline(); @@ -698,6 +801,44 @@ x86_init.hyper.init_mem_mapping(); early_memtest(0, max_pfn_mapped << PAGE_SHIFT); +} + +/* + * Initialize an mm_struct to be used during poking and a pointer to be used + * during patching. + */ +void __init poking_init(void) +{ + spinlock_t *ptl; + pte_t *ptep; + + poking_mm = mm_alloc(); + BUG_ON(!poking_mm); + + /* Xen PV guests need the PGD to be pinned. */ + paravirt_arch_dup_mmap(NULL, poking_mm); + + /* + * Randomize the poking address, but make sure that the following page + * will be mapped at the same PMD. We need 2 pages, so find space for 3, + * and adjust the address if the PMD ends after the first one. + */ + poking_addr = TASK_UNMAPPED_BASE; + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) + poking_addr += (kaslr_get_random_long("Poking") & PAGE_MASK) % + (TASK_SIZE - TASK_UNMAPPED_BASE - 3 * PAGE_SIZE); + + if (((poking_addr + PAGE_SIZE) & ~PMD_MASK) == 0) + poking_addr += PAGE_SIZE; + + /* + * We need to trigger the allocation of the page-tables that will be + * needed for poking now. Later, poking may be performed in an atomic + * section, which might cause allocation to fail. + */ + ptep = get_locked_pte(poking_mm, poking_addr, &ptl); + BUG_ON(!ptep); + pte_unmap_unlock(ptep, ptl); } /* @@ -742,7 +883,7 @@ return 1; } -void free_init_pages(char *what, unsigned long begin, unsigned long end) +void free_init_pages(const char *what, unsigned long begin, unsigned long end) { unsigned long begin_aligned, end_aligned; @@ -791,14 +932,13 @@ * used for the kernel image only. free_init_pages() will do the * right thing for either kind of address. */ -void free_kernel_image_pages(void *begin, void *end) +void free_kernel_image_pages(const char *what, void *begin, void *end) { unsigned long begin_ul = (unsigned long)begin; unsigned long end_ul = (unsigned long)end; unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT; - - free_init_pages("unused kernel image", begin_ul, end_ul); + free_init_pages(what, begin_ul, end_ul); /* * PTI maps some of the kernel into userspace. For performance, @@ -819,15 +959,14 @@ set_memory_np_noalias(begin_ul, len_pages); } -void __weak mem_encrypt_free_decrypted_mem(void) { } - void __ref free_initmem(void) { e820__reallocate_tables(); mem_encrypt_free_decrypted_mem(); - free_kernel_image_pages(&__init_begin, &__init_end); + free_kernel_image_pages("unused kernel image (initmem)", + &__init_begin, &__init_end); } #ifdef CONFIG_BLK_DEV_INITRD @@ -903,22 +1042,33 @@ max_zone_pfns[ZONE_DMA] = min(MAX_DMA_PFN, max_low_pfn); #endif #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = min(MAX_DMA32_PFN, max_low_pfn); + max_zone_pfns[ZONE_DMA32] = disable_dma32 ? 0 : min(MAX_DMA32_PFN, max_low_pfn); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM max_zone_pfns[ZONE_HIGHMEM] = max_pfn; #endif - free_area_init_nodes(max_zone_pfns); + free_area_init(max_zone_pfns); } + +static int __init early_disable_dma32(char *buf) +{ + if (!buf) + return -EINVAL; + + if (!strcmp(buf, "on")) + disable_dma32 = true; + + return 0; +} +early_param("disable_dma32", early_disable_dma32); __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { .loaded_mm = &init_mm, .next_asid = 1, .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ }; -EXPORT_PER_CPU_SYMBOL(cpu_tlbstate); void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) { -- Gitblit v1.6.2