From 2f7c68cb55ecb7331f2381deb497c27155f32faf Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Wed, 03 Jan 2024 09:43:39 +0000 Subject: [PATCH] update kernel to 5.10.198 --- kernel/arch/x86/kernel/cpu/common.c | 888 +++++++++++++++++++++++++++++++++++++--------------------- 1 files changed, 565 insertions(+), 323 deletions(-) diff --git a/kernel/arch/x86/kernel/cpu/common.c b/kernel/arch/x86/kernel/cpu/common.c index 4665afa..4ecc607 100644 --- a/kernel/arch/x86/kernel/cpu/common.c +++ b/kernel/arch/x86/kernel/cpu/common.c @@ -1,7 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* cpu_feature_enabled() cannot be used this early */ #define USE_EARLY_PGTABLE_L5 -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/linkage.h> #include <linux/bitops.h> #include <linux/kernel.h> @@ -13,16 +14,24 @@ #include <linux/sched/mm.h> #include <linux/sched/clock.h> #include <linux/sched/task.h> +#include <linux/sched/smt.h> #include <linux/init.h> #include <linux/kprobes.h> #include <linux/kgdb.h> +#include <linux/mem_encrypt.h> #include <linux/smp.h> +#include <linux/cpu.h> #include <linux/io.h> #include <linux/syscore_ops.h> +#include <linux/pgtable.h> +#include <linux/utsname.h> +#include <asm/alternative.h> +#include <asm/cmdline.h> #include <asm/stackprotector.h> #include <asm/perf_event.h> #include <asm/mmu_context.h> +#include <asm/doublefault.h> #include <asm/archrandom.h> #include <asm/hypervisor.h> #include <asm/processor.h> @@ -32,7 +41,6 @@ #include <asm/vsyscall.h> #include <linux/topology.h> #include <linux/cpumask.h> -#include <asm/pgtable.h> #include <linux/atomic.h> #include <asm/proto.h> #include <asm/setup.h> @@ -42,20 +50,19 @@ #include <asm/mtrr.h> #include <asm/hwcap2.h> #include <linux/numa.h> +#include <asm/numa.h> #include <asm/asm.h> #include <asm/bugs.h> #include <asm/cpu.h> #include <asm/mce.h> #include <asm/msr.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/microcode.h> #include <asm/microcode_intel.h> #include <asm/intel-family.h> #include <asm/cpu_device_id.h> - -#ifdef CONFIG_X86_LOCAL_APIC #include <asm/uv/uv.h> -#endif +#include <asm/set_memory.h> #include "cpu.h" @@ -163,22 +170,6 @@ #endif } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); - -static int __init x86_mpx_setup(char *s) -{ - /* require an exact match without trailing characters */ - if (strlen(s)) - return 0; - - /* do not emit a message if the feature is not present */ - if (!boot_cpu_has(X86_FEATURE_MPX)) - return 1; - - setup_clear_cpu_cap(X86_FEATURE_MPX); - pr_info("nompx: Intel Memory Protection Extensions (MPX) disabled\n"); - return 1; -} -__setup("nompx", x86_mpx_setup); #ifdef CONFIG_X86_64 static int __init x86_nopcid_setup(char *s) @@ -306,8 +297,6 @@ static __init int setup_disable_smep(char *arg) { setup_clear_cpu_cap(X86_FEATURE_SMEP); - /* Check for things that depend on SMEP being enabled: */ - check_mpx_erratum(&boot_cpu_data); return 1; } __setup("nosmep", setup_disable_smep); @@ -336,6 +325,7 @@ #ifdef CONFIG_X86_SMAP cr4_set_bits(X86_CR4_SMAP); #else + clear_cpu_cap(c, X86_FEATURE_SMAP); cr4_clear_bits(X86_CR4_SMAP); #endif } @@ -353,7 +343,7 @@ cr4_set_bits(X86_CR4_UMIP); - pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n"); + pr_info_once("x86/cpu: User Mode Instruction Prevention (UMIP) activated\n"); return; @@ -364,6 +354,116 @@ */ cr4_clear_bits(X86_CR4_UMIP); } + +/* These bits should not change their value after CPU init is finished. */ +static const unsigned long cr4_pinned_mask = + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE; +static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); +static unsigned long cr4_pinned_bits __ro_after_init; + +void native_write_cr0(unsigned long val) +{ + unsigned long bits_missing = 0; + +set_register: + asm volatile("mov %0,%%cr0": "+r" (val) : : "memory"); + + if (static_branch_likely(&cr_pinning)) { + if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) { + bits_missing = X86_CR0_WP; + val |= bits_missing; + goto set_register; + } + /* Warn after we've set the missing bits. */ + WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n"); + } +} +EXPORT_SYMBOL(native_write_cr0); + +void native_write_cr4(unsigned long val) +{ + unsigned long bits_changed = 0; + +set_register: + asm volatile("mov %0,%%cr4": "+r" (val) : : "memory"); + + if (static_branch_likely(&cr_pinning)) { + if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) { + bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits; + val = (val & ~cr4_pinned_mask) | cr4_pinned_bits; + goto set_register; + } + /* Warn after we've corrected the changed bits. */ + WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n", + bits_changed); + } +} +#if IS_MODULE(CONFIG_LKDTM) +EXPORT_SYMBOL_GPL(native_write_cr4); +#endif + +void cr4_update_irqsoff(unsigned long set, unsigned long clear) +{ + unsigned long newval, cr4 = this_cpu_read(cpu_tlbstate.cr4); + + lockdep_assert_irqs_disabled(); + + newval = (cr4 & ~clear) | set; + if (newval != cr4) { + this_cpu_write(cpu_tlbstate.cr4, newval); + __write_cr4(newval); + } +} +EXPORT_SYMBOL(cr4_update_irqsoff); + +/* Read the CR4 shadow. */ +unsigned long cr4_read_shadow(void) +{ + return this_cpu_read(cpu_tlbstate.cr4); +} +EXPORT_SYMBOL_GPL(cr4_read_shadow); + +void cr4_init(void) +{ + unsigned long cr4 = __read_cr4(); + + if (boot_cpu_has(X86_FEATURE_PCID)) + cr4 |= X86_CR4_PCIDE; + if (static_branch_likely(&cr_pinning)) + cr4 = (cr4 & ~cr4_pinned_mask) | cr4_pinned_bits; + + __write_cr4(cr4); + + /* Initialize cr4 shadow for this CPU. */ + this_cpu_write(cpu_tlbstate.cr4, cr4); +} + +/* + * Once CPU feature detection is finished (and boot params have been + * parsed), record any of the sensitive CR bits that are set, and + * enable CR pinning. + */ +static void __init setup_cr_pinning(void) +{ + cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & cr4_pinned_mask; + static_key_enable(&cr_pinning.key); +} + +static __init int x86_nofsgsbase_setup(char *arg) +{ + /* Require an exact match without trailing characters. */ + if (strlen(arg)) + return 0; + + /* Do not emit a message if the feature is not present. */ + if (!boot_cpu_has(X86_FEATURE_FSGSBASE)) + return 1; + + setup_clear_cpu_cap(X86_FEATURE_FSGSBASE); + pr_info("FSGSBASE disabled via kernel command line\n"); + return 1; +} +__setup("nofsgsbase", x86_nofsgsbase_setup); /* * Protection Keys are not available in 32-bit mode. @@ -488,8 +588,9 @@ return NULL; /* Not found */ } -__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; -__u32 cpu_caps_set[NCAPINTS + NBUGINTS]; +/* Aligned to unsigned long to avoid split lock in atomic bitmap ops */ +__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long)); +__u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long)); void load_percpu_segment(int cpu) { @@ -505,19 +606,6 @@ #ifdef CONFIG_X86_32 /* The 32-bit entry code needs to find cpu_entry_area. */ DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); -#endif - -#ifdef CONFIG_X86_64 -/* - * Special IST stacks which the CPU switches to when it calls - * an IST-marked descriptor entry. Up to 7 stacks (hardware - * limit), all of them are 4K, except the debug stack which - * is 8K. - */ -static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, - [DEBUG_STACK - 1] = DEBUG_STKSZ -}; #endif /* Load the original GDT from the per-cpu structure */ @@ -808,30 +896,6 @@ } } -static void init_cqm(struct cpuinfo_x86 *c) -{ - if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { - c->x86_cache_max_rmid = -1; - c->x86_cache_occ_scale = -1; - return; - } - - /* will be overridden if occupancy monitoring exists */ - c->x86_cache_max_rmid = cpuid_ebx(0xf); - - if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) || - cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) || - cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) { - u32 eax, ebx, ecx, edx; - - /* QoS sub-leaf, EAX=0Fh, ECX=1 */ - cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx); - - c->x86_cache_max_rmid = ecx; - c->x86_cache_occ_scale = ebx; - } -} - void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -854,6 +918,12 @@ c->x86_capability[CPUID_7_0_EBX] = ebx; c->x86_capability[CPUID_7_ECX] = ecx; c->x86_capability[CPUID_7_EDX] = edx; + + /* Check valid sub-leaf index before accessing it */ + if (eax >= 1) { + cpuid_count(0x00000007, 1, &eax, &ebx, &ecx, &edx); + c->x86_capability[CPUID_7_1_EAX] = eax; + } } /* Extended state features: level 0x0000000d */ @@ -891,9 +961,14 @@ if (c->extended_cpuid_level >= 0x8000000a) c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); + if (c->extended_cpuid_level >= 0x8000001f) + c->x86_capability[CPUID_8000_001F_EAX] = cpuid_eax(0x8000001f); + + if (c->extended_cpuid_level >= 0x80000021) + c->x86_capability[CPUID_8000_0021_EAX] = cpuid_eax(0x80000021); + init_scattered_cpuid_features(c); init_speculation_control(c); - init_cqm(c); /* * Clear/Set all flags overridden by options, after probe. @@ -954,15 +1029,21 @@ #define MSBDS_ONLY BIT(5) #define NO_SWAPGS BIT(6) #define NO_ITLB_MULTIHIT BIT(7) +#define NO_SPECTRE_V2 BIT(8) +#define NO_MMIO BIT(9) +#define NO_EIBRS_PBRSB BIT(10) -#define VULNWL(_vendor, _family, _model, _whitelist) \ - { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } +#define VULNWL(vendor, family, model, whitelist) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) #define VULNWL_INTEL(model, whitelist) \ VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist) #define VULNWL_AMD(family, whitelist) \ VULNWL(AMD, family, X86_MODEL_ANY, whitelist) + +#define VULNWL_HYGON(family, whitelist) \ + VULNWL(HYGON, family, X86_MODEL_ANY, whitelist) static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION), @@ -971,6 +1052,11 @@ VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), /* Intel Family 6 */ + VULNWL_INTEL(TIGERLAKE, NO_MMIO), + VULNWL_INTEL(TIGERLAKE_L, NO_MMIO), + VULNWL_INTEL(ALDERLAKE, NO_MMIO), + VULNWL_INTEL(ALDERLAKE_L, NO_MMIO), + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), @@ -978,7 +1064,7 @@ VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), @@ -987,10 +1073,11 @@ VULNWL_INTEL(CORE_YONAH, NO_SSB), VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), /* * Technically, swapgs isn't serializing on AMD (despite it previously @@ -1000,37 +1087,89 @@ * good enough for our purposes. */ - VULNWL_INTEL(ATOM_TREMONT_X, NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_TREMONT, NO_EIBRS_PBRSB), + VULNWL_INTEL(ATOM_TREMONT_L, NO_EIBRS_PBRSB), + VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + + /* Zhaoxin Family 7 */ + VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), + VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), {} }; + +#define VULNBL(vendor, family, model, blacklist) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist) #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ INTEL_FAM6_##model, steppings, \ X86_FEATURE_ANY, issues) +#define VULNBL_AMD(family, blacklist) \ + VULNBL(AMD, family, X86_MODEL_ANY, blacklist) + +#define VULNBL_HYGON(family, blacklist) \ + VULNBL(HYGON, family, X86_MODEL_ANY, blacklist) + #define SRBDS BIT(0) +/* CPU is affected by X86_BUG_MMIO_STALE_DATA */ +#define MMIO BIT(1) +/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */ +#define MMIO_SBDS BIT(2) +/* CPU is affected by RETbleed, speculating where you would not expect it */ +#define RETBLEED BIT(3) +/* CPU is affected by SMT (cross-thread) return predictions */ +#define SMT_RSB BIT(4) +/* CPU is affected by SRSO */ +#define SRSO BIT(5) +/* CPU is affected by GDS */ +#define GDS BIT(6) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(HASWELL_CORE, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(HASWELL_ULT, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(HASWELL_GT3E, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(BROADWELL_GT3E, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(BROADWELL_CORE, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_DESKTOP, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPINGS(0x0, 0xC), SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPINGS(0x0, 0xD), SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS), + VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS), + VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS), + VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + + VULNBL_AMD(0x15, RETBLEED), + VULNBL_AMD(0x16, RETBLEED), + VULNBL_AMD(0x17, RETBLEED | SRSO), + VULNBL_HYGON(0x18, RETBLEED), + VULNBL_AMD(0x19, SRSO), {} }; @@ -1051,6 +1190,13 @@ return ia32_cap; } +static bool arch_cap_mmio_immune(u64 ia32_cap) +{ + return (ia32_cap & ARCH_CAP_FBSDP_NO && + ia32_cap & ARCH_CAP_PSDP_NO && + ia32_cap & ARCH_CAP_SBDR_SSDP_NO); +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = x86_read_arch_cap_msr(); @@ -1064,7 +1210,9 @@ return; setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + + if (!cpu_matches(cpu_vuln_whitelist, NO_SPECTRE_V2)) + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && @@ -1102,11 +1250,57 @@ /* * SRBDS affects CPUs which support RDRAND or RDSEED and are listed * in the vulnerability blacklist. + * + * Some of the implications and mitigation of Shared Buffers Data + * Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as + * SRBDS. */ if ((cpu_has(c, X86_FEATURE_RDRAND) || cpu_has(c, X86_FEATURE_RDSEED)) && - cpu_matches(cpu_vuln_blacklist, SRBDS)) + cpu_matches(cpu_vuln_blacklist, SRBDS | MMIO_SBDS)) setup_force_cpu_bug(X86_BUG_SRBDS); + + /* + * Processor MMIO Stale Data bug enumeration + * + * Affected CPU list is generally enough to enumerate the vulnerability, + * but for virtualization case check for ARCH_CAP MSR bits also, VMM may + * not want the guest to enumerate the bug. + * + * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, + * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. + */ + if (!arch_cap_mmio_immune(ia32_cap)) { + if (cpu_matches(cpu_vuln_blacklist, MMIO)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) + setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN); + } + + if (!cpu_has(c, X86_FEATURE_BTC_NO)) { + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) + setup_force_cpu_bug(X86_BUG_RETBLEED); + } + + if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) && + !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && + !(ia32_cap & ARCH_CAP_PBRSB_NO)) + setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); + + /* + * Check if CPU is vulnerable to GDS. If running in a virtual machine on + * an affected processor, the VMM may have disabled the use of GATHER by + * disabling AVX2. The only way to do this in HW is to clear XCR0[2], + * which means that AVX will be disabled. + */ + if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) && + boot_cpu_has(X86_FEATURE_AVX)) + setup_force_cpu_bug(X86_BUG_GDS); + + if (!cpu_has(c, X86_FEATURE_SRSO_NO)) { + if (cpu_matches(cpu_vuln_blacklist, SRSO)) + setup_force_cpu_bug(X86_BUG_SRSO); + } if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; @@ -1142,6 +1336,59 @@ } /* + * We parse cpu parameters early because fpu__init_system() is executed + * before parse_early_param(). + */ +static void __init cpu_parse_early_param(void) +{ + char arg[128]; + char *argptr = arg; + int arglen, res, bit; + +#ifdef CONFIG_X86_32 + if (cmdline_find_option_bool(boot_command_line, "no387")) +#ifdef CONFIG_MATH_EMULATION + setup_clear_cpu_cap(X86_FEATURE_FPU); +#else + pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n"); +#endif + + if (cmdline_find_option_bool(boot_command_line, "nofxsr")) + setup_clear_cpu_cap(X86_FEATURE_FXSR); +#endif + + if (cmdline_find_option_bool(boot_command_line, "noxsave")) + setup_clear_cpu_cap(X86_FEATURE_XSAVE); + + if (cmdline_find_option_bool(boot_command_line, "noxsaveopt")) + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + + if (cmdline_find_option_bool(boot_command_line, "noxsaves")) + setup_clear_cpu_cap(X86_FEATURE_XSAVES); + + arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg)); + if (arglen <= 0) + return; + + pr_info("Clearing CPUID bits:"); + do { + res = get_option(&argptr, &bit); + if (res == 0 || res == 3) + break; + + /* If the argument was too long, the last bit may be cut off */ + if (res == 1 && arglen >= sizeof(arg)) + break; + + if (bit >= 0 && bit < NCAPINTS * 32) { + pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit)); + setup_clear_cpu_cap(bit); + } + } while (res == 2); + pr_cont("\n"); +} + +/* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, * cache alignment. @@ -1163,7 +1410,7 @@ #endif c->x86_cache_alignment = c->x86_clflush_size; - memset(&c->x86_capability, 0, sizeof c->x86_capability); + memset(&c->x86_capability, 0, sizeof(c->x86_capability)); c->extended_cpuid_level = 0; if (!have_cpuid_p()) @@ -1176,6 +1423,7 @@ get_cpu_cap(c); get_cpu_address_sizes(c); setup_force_cpu_cap(X86_FEATURE_CPUID); + cpu_parse_early_param(); if (this_cpu->c_early_init) this_cpu->c_early_init(c); @@ -1193,7 +1441,7 @@ cpu_set_bug_bits(c); - fpu__init_system(c); + cpu_set_core_cap_bits(c); #ifdef CONFIG_X86_32 /* @@ -1362,30 +1610,8 @@ * ESPFIX issue, we can change this. */ #ifdef CONFIG_X86_32 -# ifdef CONFIG_PARAVIRT - do { - extern void native_iret(void); - if (pv_cpu_ops.iret == native_iret) - set_cpu_bug(c, X86_BUG_ESPFIX); - } while (0); -# else set_cpu_bug(c, X86_BUG_ESPFIX); -# endif #endif -} - -static void x86_init_cache_qos(struct cpuinfo_x86 *c) -{ - /* - * The heavy lifting of max_rmid and cache_occ_scale are handled - * in get_cpu_cap(). Here we just set the max_rmid for the boot_cpu - * in case CQM bits really aren't there in this CPU. - */ - if (c != &boot_cpu_data) { - boot_cpu_data.x86_cache_max_rmid = - min(boot_cpu_data.x86_cache_max_rmid, - c->x86_cache_max_rmid); - } } /* @@ -1404,6 +1630,7 @@ cpu, apicid, c->initial_apicid); } BUG_ON(topology_update_package_map(c->phys_proc_id, cpu)); + BUG_ON(topology_update_die_map(c->cpu_die_id, cpu)); #else c->logical_proc_id = 0; #endif @@ -1436,7 +1663,10 @@ c->x86_virt_bits = 32; #endif c->x86_cache_alignment = c->x86_clflush_size; - memset(&c->x86_capability, 0, sizeof c->x86_capability); + memset(&c->x86_capability, 0, sizeof(c->x86_capability)); +#ifdef CONFIG_X86_VMX_FEATURE_NAMES + memset(&c->vmx_capability, 0, sizeof(c->vmx_capability)); +#endif generic_identify(c); @@ -1471,6 +1701,12 @@ setup_smap(c); setup_umip(c); + /* Enable FSGSBASE instructions if available. */ + if (cpu_has(c, X86_FEATURE_FSGSBASE)) { + cr4_set_bits(X86_CR4_FSGSBASE); + elf_hwcap2 |= HWCAP2_FSGSBASE; + } + /* * The vendor-specific functions might have changed features. * Now we do "generic changes." @@ -1496,7 +1732,6 @@ #endif x86_init_rdrand(c); - x86_init_cache_qos(c); setup_pku(c); /* @@ -1569,6 +1804,8 @@ enable_sep_cpu(); #endif cpu_detect_tlb(&boot_cpu_data); + setup_cr_pinning(); + tsx_init(); } @@ -1583,6 +1820,8 @@ validate_apic_and_package_id(c); x86_spec_ctrl_setup_ap(); update_srbds_msr(); + if (boot_cpu_has_bug(X86_BUG_GDS)) + update_gds_msr(); } static __init int setup_noclflush(char *arg) @@ -1632,9 +1871,9 @@ __setup("clearcpuid=", setup_clearcpuid); #ifdef CONFIG_X86_64 -DEFINE_PER_CPU_FIRST(union irq_stack_union, - irq_stack_union) __aligned(PAGE_SIZE) __visible; -EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union); +DEFINE_PER_CPU_FIRST(struct fixed_percpu_data, + fixed_percpu_data) __aligned(PAGE_SIZE) __visible; +EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data); /* * The following percpu variables are hot. Align current_task to @@ -1644,9 +1883,7 @@ &init_task; EXPORT_PER_CPU_SYMBOL(current_task); -DEFINE_PER_CPU(char *, irq_stack_ptr) = - init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE; - +DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; @@ -1655,19 +1892,8 @@ /* May not be marked __init: used by software suspend */ void syscall_init(void) { - extern char _entry_trampoline[]; - extern char entry_SYSCALL_64_trampoline[]; - - int cpu = smp_processor_id(); - unsigned long SYSCALL64_entry_trampoline = - (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline + - (entry_SYSCALL_64_trampoline - _entry_trampoline); - wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); - if (static_cpu_has(X86_FEATURE_PTI)) - wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline); - else - wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); + wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); #ifdef CONFIG_IA32_EMULATION wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); @@ -1678,7 +1904,8 @@ * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). */ wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1)); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, + (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1)); wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); #else wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); @@ -1692,41 +1919,6 @@ X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF| X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT); } - -/* - * Copies of the original ist values from the tss are only accessed during - * debugging, no special alignment required. - */ -DEFINE_PER_CPU(struct orig_ist, orig_ist); - -static DEFINE_PER_CPU(unsigned long, debug_stack_addr); -DEFINE_PER_CPU(int, debug_stack_usage); - -int is_debug_stack(unsigned long addr) -{ - return __this_cpu_read(debug_stack_usage) || - (addr <= __this_cpu_read(debug_stack_addr) && - addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ)); -} -NOKPROBE_SYMBOL(is_debug_stack); - -DEFINE_PER_CPU(u32, debug_idt_ctr); - -void debug_stack_set_zero(void) -{ - this_cpu_inc(debug_idt_ctr); - load_current_idt(); -} -NOKPROBE_SYMBOL(debug_stack_set_zero); - -void debug_stack_reset(void) -{ - if (WARN_ON(!this_cpu_read(debug_idt_ctr))) - return; - if (this_cpu_dec_return(debug_idt_ctr) == 0) - load_current_idt(); -} -NOKPROBE_SYMBOL(debug_stack_reset); #else /* CONFIG_X86_64 */ @@ -1794,12 +1986,12 @@ } #ifdef CONFIG_X86_64 -static void setup_getcpu(int cpu) +static inline void setup_getcpu(int cpu) { unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu)); struct desc_struct d = { }; - if (boot_cpu_has(X86_FEATURE_RDTSCP)) + if (boot_cpu_has(X86_FEATURE_RDTSCP) || boot_cpu_has(X86_FEATURE_RDPID)) write_rdtscp_aux(cpudata); /* Store CPU and node number in limit. */ @@ -1814,109 +2006,130 @@ write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_CPUNODE, &d, DESCTYPE_S); } + +static inline void ucode_cpu_init(int cpu) +{ + if (cpu) + load_ucode_ap(); +} + +static inline void tss_setup_ist(struct tss_struct *tss) +{ + /* Set up the per-CPU TSS IST stacks */ + tss->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF); + tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI); + tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB); + tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE); + /* Only mapped when SEV-ES is active */ + tss->x86_tss.ist[IST_INDEX_VC] = __this_cpu_ist_top_va(VC); +} + +#else /* CONFIG_X86_64 */ + +static inline void setup_getcpu(int cpu) { } + +static inline void ucode_cpu_init(int cpu) +{ + show_ucode_info_early(); +} + +static inline void tss_setup_ist(struct tss_struct *tss) { } + +#endif /* !CONFIG_X86_64 */ + +static inline void tss_setup_io_bitmap(struct tss_struct *tss) +{ + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; + +#ifdef CONFIG_X86_IOPL_IOPERM + tss->io_bitmap.prev_max = 0; + tss->io_bitmap.prev_sequence = 0; + memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap)); + /* + * Invalidate the extra array entry past the end of the all + * permission bitmap as required by the hardware. + */ + tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL; #endif +} + +/* + * Setup everything needed to handle exceptions from the IDT, including the IST + * exceptions which use paranoid_entry(). + */ +void cpu_init_exception_handling(void) +{ + struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); + int cpu = raw_smp_processor_id(); + + /* paranoid_entry() gets the CPU number from the GDT */ + setup_getcpu(cpu); + + /* IST vectors need TSS to be set up. */ + tss_setup_ist(tss); + tss_setup_io_bitmap(tss); + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); + + load_TR_desc(); + + /* Finally load the IDT */ + load_current_idt(); +} /* * cpu_init() initializes state that is per-CPU. Some data is already - * initialized (naturally) in the bootstrap process, such as the GDT - * and IDT. We reload them nevertheless, this function acts as a - * 'CPU state barrier', nothing should get across. - * A lot of state is already set up in PDA init for 64 bit + * initialized (naturally) in the bootstrap process, such as the GDT. We + * reload it nevertheless, this function acts as a 'CPU state barrier', + * nothing should get across. */ -#ifdef CONFIG_X86_64 - void cpu_init(void) { - struct orig_ist *oist; - struct task_struct *me; - struct tss_struct *t; - unsigned long v; + struct task_struct *cur = current; int cpu = raw_smp_processor_id(); - int i; wait_for_master_cpu(cpu); - /* - * Initialize the CR4 shadow before doing anything that could - * try to read it. - */ - cr4_init_shadow(); - - if (cpu) - load_ucode_ap(); - - t = &per_cpu(cpu_tss_rw, cpu); - oist = &per_cpu(orig_ist, cpu); + ucode_cpu_init(cpu); #ifdef CONFIG_NUMA if (this_cpu_read(numa_node) == 0 && early_cpu_to_node(cpu) != NUMA_NO_NODE) set_numa_node(early_cpu_to_node(cpu)); #endif - setup_getcpu(cpu); - - me = current; - pr_debug("Initializing CPU#%d\n", cpu); - cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + if (IS_ENABLED(CONFIG_X86_64) || cpu_feature_enabled(X86_FEATURE_VME) || + boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE)) + cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); /* * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ - switch_to_new_gdt(cpu); - loadsegment(fs, 0); - load_current_idt(); + if (IS_ENABLED(CONFIG_X86_64)) { + loadsegment(fs, 0); + memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); + syscall_init(); - memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); - syscall_init(); + wrmsrl(MSR_FS_BASE, 0); + wrmsrl(MSR_KERNEL_GS_BASE, 0); + barrier(); - wrmsrl(MSR_FS_BASE, 0); - wrmsrl(MSR_KERNEL_GS_BASE, 0); - barrier(); - - x86_configure_nx(); - x2apic_setup(); - - /* - * set up and load the per-CPU TSS - */ - if (!oist->ist[0]) { - char *estacks = get_cpu_entry_area(cpu)->exception_stacks; - - for (v = 0; v < N_EXCEPTION_STACKS; v++) { - estacks += exception_stack_sizes[v]; - oist->ist[v] = t->x86_tss.ist[v] = - (unsigned long)estacks; - if (v == DEBUG_STACK-1) - per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; - } + x2apic_setup(); } - t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; - - /* - * <= is required because the CPU will access up to - * 8 bits beyond the end of the IO permission bitmap. - */ - for (i = 0; i <= IO_BITMAP_LONGS; i++) - t->io_bitmap[i] = ~0UL; - mmgrab(&init_mm); - me->active_mm = &init_mm; - BUG_ON(me->mm); + cur->active_mm = &init_mm; + BUG_ON(cur->mm); initialize_tlbstate_and_flush(); - enter_lazy_tlb(&init_mm, me); + enter_lazy_tlb(&init_mm, cur); /* - * Initialize the TSS. sp0 points to the entry trampoline stack - * regardless of what task is running. + * sp0 points to the entry trampoline stack regardless of what task + * is running. */ - set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); - load_TR_desc(); load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1)); load_mm_ldt(&init_mm); @@ -1924,7 +2137,7 @@ clear_all_debug_regs(); dbg_restore_debug_regs(); - fpu__init_cpu(); + doublefault_init_cpu_tss(); if (is_uv_system()) uv_cpu_init(); @@ -1932,112 +2145,141 @@ load_fixmap_gdt(cpu); } -#else - -void cpu_init(void) +#ifdef CONFIG_SMP +void cpu_init_secondary(void) { - int cpu = smp_processor_id(); - struct task_struct *curr = current; - struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu); - - wait_for_master_cpu(cpu); - /* - * Initialize the CR4 shadow before doing anything that could - * try to read it. + * Relies on the BP having set-up the IDT tables, which are loaded + * on this CPU in cpu_init_exception_handling(). */ - cr4_init_shadow(); - - show_ucode_info_early(); - - pr_info("Initializing CPU#%d\n", cpu); - - if (cpu_feature_enabled(X86_FEATURE_VME) || - boot_cpu_has(X86_FEATURE_TSC) || - boot_cpu_has(X86_FEATURE_DE)) - cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - - load_current_idt(); - switch_to_new_gdt(cpu); - - /* - * Set up and load the per-CPU TSS and LDT - */ - mmgrab(&init_mm); - curr->active_mm = &init_mm; - BUG_ON(curr->mm); - initialize_tlbstate_and_flush(); - enter_lazy_tlb(&init_mm, curr); - - /* - * Initialize the TSS. sp0 points to the entry trampoline stack - * regardless of what task is running. - */ - set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); - load_TR_desc(); - load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1)); - - load_mm_ldt(&init_mm); - - t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; - -#ifdef CONFIG_DOUBLEFAULT - /* Set up doublefault TSS pointer in the GDT */ - __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); -#endif - - clear_all_debug_regs(); - dbg_restore_debug_regs(); - + cpu_init_exception_handling(); + cpu_init(); fpu__init_cpu(); - - load_fixmap_gdt(cpu); } #endif -static void bsp_resume(void) +#ifdef CONFIG_MICROCODE_LATE_LOADING +/** + * store_cpu_caps() - Store a snapshot of CPU capabilities + * @curr_info: Pointer where to store it + * + * Returns: None + */ +void store_cpu_caps(struct cpuinfo_x86 *curr_info) { - if (this_cpu->c_bsp_resume) - this_cpu->c_bsp_resume(&boot_cpu_data); + /* Reload CPUID max function as it might've changed. */ + curr_info->cpuid_level = cpuid_eax(0); + + /* Copy all capability leafs and pick up the synthetic ones. */ + memcpy(&curr_info->x86_capability, &boot_cpu_data.x86_capability, + sizeof(curr_info->x86_capability)); + + /* Get the hardware CPUID leafs */ + get_cpu_cap(curr_info); } -static struct syscore_ops cpu_syscore_ops = { - .resume = bsp_resume, -}; - -static int __init init_cpu_syscore(void) -{ - register_syscore_ops(&cpu_syscore_ops); - return 0; -} -core_initcall(init_cpu_syscore); - -/* +/** + * microcode_check() - Check if any CPU capabilities changed after an update. + * @prev_info: CPU capabilities stored before an update. + * * The microcode loader calls this upon late microcode load to recheck features, * only when microcode has been updated. Caller holds microcode_mutex and CPU * hotplug lock. + * + * Return: None */ -void microcode_check(void) +void microcode_check(struct cpuinfo_x86 *prev_info) { - struct cpuinfo_x86 info; + struct cpuinfo_x86 curr_info; perf_check_microcode(); - /* Reload CPUID max function as it might've changed. */ - info.cpuid_level = cpuid_eax(0); + amd_check_microcode(); - /* - * Copy all capability leafs to pick up the synthetic ones so that - * memcmp() below doesn't fail on that. The ones coming from CPUID will - * get overwritten in get_cpu_cap(). - */ - memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)); + store_cpu_caps(&curr_info); - get_cpu_cap(&info); - - if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability))) + if (!memcmp(&prev_info->x86_capability, &curr_info.x86_capability, + sizeof(prev_info->x86_capability))) return; pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n"); pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); } +#endif + +/* + * Invoked from core CPU hotplug code after hotplug operations + */ +void arch_smt_update(void) +{ + /* Handle the speculative execution misfeatures */ + cpu_bugs_smt_update(); + /* Check whether IPI broadcasting can be enabled */ + apic_smt_update(); +} + +void __init arch_cpu_finalize_init(void) +{ + identify_boot_cpu(); + + /* + * identify_boot_cpu() initialized SMT support information, let the + * core code know. + */ + cpu_smt_check_topology(); + + if (!IS_ENABLED(CONFIG_SMP)) { + pr_info("CPU: "); + print_cpu_info(&boot_cpu_data); + } + + cpu_select_mitigations(); + + arch_smt_update(); + + if (IS_ENABLED(CONFIG_X86_32)) { + /* + * Check whether this is a real i386 which is not longer + * supported and fixup the utsname. + */ + if (boot_cpu_data.x86 < 4) + panic("Kernel requires i486+ for 'invlpg' and other features"); + + init_utsname()->machine[1] = + '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); + } + + /* + * Must be before alternatives because it might set or clear + * feature bits. + */ + fpu__init_system(); + fpu__init_cpu(); + + alternative_instructions(); + + if (IS_ENABLED(CONFIG_X86_64)) { + /* + * Make sure the first 2MB area is not mapped by huge pages + * There are typically fixed size MTRRs in there and overlapping + * MTRRs into large pages causes slow downs. + * + * Right now we don't do that with gbpages because there seems + * very little benefit for that case. + */ + if (!direct_gbpages) + set_memory_4k((unsigned long)__va(0), 1); + } else { + fpu__init_check_bugs(); + } + + /* + * This needs to be called before any devices perform DMA + * operations that might use the SWIOTLB bounce buffers. It will + * mark the bounce buffers as decrypted so that their usage will + * not cause "plain-text" data to be decrypted when accessed. It + * must be called after late_time_init() so that Hyper-V x86/x64 + * hypercalls work when the SWIOTLB bounce buffers are decrypted. + */ + mem_encrypt_init(); +} -- Gitblit v1.6.2