forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/arch/x86/kernel/cpu/common.c
....@@ -1,7 +1,8 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /* cpu_feature_enabled() cannot be used this early */
23 #define USE_EARLY_PGTABLE_L5
34
4
-#include <linux/bootmem.h>
5
+#include <linux/memblock.h>
56 #include <linux/linkage.h>
67 #include <linux/bitops.h>
78 #include <linux/kernel.h>
....@@ -13,16 +14,20 @@
1314 #include <linux/sched/mm.h>
1415 #include <linux/sched/clock.h>
1516 #include <linux/sched/task.h>
17
+#include <linux/sched/smt.h>
1618 #include <linux/init.h>
1719 #include <linux/kprobes.h>
1820 #include <linux/kgdb.h>
1921 #include <linux/smp.h>
2022 #include <linux/io.h>
2123 #include <linux/syscore_ops.h>
24
+#include <linux/pgtable.h>
2225
26
+#include <asm/cmdline.h>
2327 #include <asm/stackprotector.h>
2428 #include <asm/perf_event.h>
2529 #include <asm/mmu_context.h>
30
+#include <asm/doublefault.h>
2631 #include <asm/archrandom.h>
2732 #include <asm/hypervisor.h>
2833 #include <asm/processor.h>
....@@ -32,7 +37,6 @@
3237 #include <asm/vsyscall.h>
3338 #include <linux/topology.h>
3439 #include <linux/cpumask.h>
35
-#include <asm/pgtable.h>
3640 #include <linux/atomic.h>
3741 #include <asm/proto.h>
3842 #include <asm/setup.h>
....@@ -42,20 +46,18 @@
4246 #include <asm/mtrr.h>
4347 #include <asm/hwcap2.h>
4448 #include <linux/numa.h>
49
+#include <asm/numa.h>
4550 #include <asm/asm.h>
4651 #include <asm/bugs.h>
4752 #include <asm/cpu.h>
4853 #include <asm/mce.h>
4954 #include <asm/msr.h>
50
-#include <asm/pat.h>
55
+#include <asm/memtype.h>
5156 #include <asm/microcode.h>
5257 #include <asm/microcode_intel.h>
5358 #include <asm/intel-family.h>
5459 #include <asm/cpu_device_id.h>
55
-
56
-#ifdef CONFIG_X86_LOCAL_APIC
5760 #include <asm/uv/uv.h>
58
-#endif
5961
6062 #include "cpu.h"
6163
....@@ -163,22 +165,6 @@
163165 #endif
164166 } };
165167 EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
166
-
167
-static int __init x86_mpx_setup(char *s)
168
-{
169
- /* require an exact match without trailing characters */
170
- if (strlen(s))
171
- return 0;
172
-
173
- /* do not emit a message if the feature is not present */
174
- if (!boot_cpu_has(X86_FEATURE_MPX))
175
- return 1;
176
-
177
- setup_clear_cpu_cap(X86_FEATURE_MPX);
178
- pr_info("nompx: Intel Memory Protection Extensions (MPX) disabled\n");
179
- return 1;
180
-}
181
-__setup("nompx", x86_mpx_setup);
182168
183169 #ifdef CONFIG_X86_64
184170 static int __init x86_nopcid_setup(char *s)
....@@ -306,8 +292,6 @@
306292 static __init int setup_disable_smep(char *arg)
307293 {
308294 setup_clear_cpu_cap(X86_FEATURE_SMEP);
309
- /* Check for things that depend on SMEP being enabled: */
310
- check_mpx_erratum(&boot_cpu_data);
311295 return 1;
312296 }
313297 __setup("nosmep", setup_disable_smep);
....@@ -336,6 +320,7 @@
336320 #ifdef CONFIG_X86_SMAP
337321 cr4_set_bits(X86_CR4_SMAP);
338322 #else
323
+ clear_cpu_cap(c, X86_FEATURE_SMAP);
339324 cr4_clear_bits(X86_CR4_SMAP);
340325 #endif
341326 }
....@@ -353,7 +338,7 @@
353338
354339 cr4_set_bits(X86_CR4_UMIP);
355340
356
- pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n");
341
+ pr_info_once("x86/cpu: User Mode Instruction Prevention (UMIP) activated\n");
357342
358343 return;
359344
....@@ -365,6 +350,116 @@
365350 cr4_clear_bits(X86_CR4_UMIP);
366351 }
367352
353
+/* These bits should not change their value after CPU init is finished. */
354
+static const unsigned long cr4_pinned_mask =
355
+ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE;
356
+static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
357
+static unsigned long cr4_pinned_bits __ro_after_init;
358
+
359
+void native_write_cr0(unsigned long val)
360
+{
361
+ unsigned long bits_missing = 0;
362
+
363
+set_register:
364
+ asm volatile("mov %0,%%cr0": "+r" (val) : : "memory");
365
+
366
+ if (static_branch_likely(&cr_pinning)) {
367
+ if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
368
+ bits_missing = X86_CR0_WP;
369
+ val |= bits_missing;
370
+ goto set_register;
371
+ }
372
+ /* Warn after we've set the missing bits. */
373
+ WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n");
374
+ }
375
+}
376
+EXPORT_SYMBOL(native_write_cr0);
377
+
378
+void native_write_cr4(unsigned long val)
379
+{
380
+ unsigned long bits_changed = 0;
381
+
382
+set_register:
383
+ asm volatile("mov %0,%%cr4": "+r" (val) : : "memory");
384
+
385
+ if (static_branch_likely(&cr_pinning)) {
386
+ if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) {
387
+ bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits;
388
+ val = (val & ~cr4_pinned_mask) | cr4_pinned_bits;
389
+ goto set_register;
390
+ }
391
+ /* Warn after we've corrected the changed bits. */
392
+ WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n",
393
+ bits_changed);
394
+ }
395
+}
396
+#if IS_MODULE(CONFIG_LKDTM)
397
+EXPORT_SYMBOL_GPL(native_write_cr4);
398
+#endif
399
+
400
+void cr4_update_irqsoff(unsigned long set, unsigned long clear)
401
+{
402
+ unsigned long newval, cr4 = this_cpu_read(cpu_tlbstate.cr4);
403
+
404
+ lockdep_assert_irqs_disabled();
405
+
406
+ newval = (cr4 & ~clear) | set;
407
+ if (newval != cr4) {
408
+ this_cpu_write(cpu_tlbstate.cr4, newval);
409
+ __write_cr4(newval);
410
+ }
411
+}
412
+EXPORT_SYMBOL(cr4_update_irqsoff);
413
+
414
+/* Read the CR4 shadow. */
415
+unsigned long cr4_read_shadow(void)
416
+{
417
+ return this_cpu_read(cpu_tlbstate.cr4);
418
+}
419
+EXPORT_SYMBOL_GPL(cr4_read_shadow);
420
+
421
+void cr4_init(void)
422
+{
423
+ unsigned long cr4 = __read_cr4();
424
+
425
+ if (boot_cpu_has(X86_FEATURE_PCID))
426
+ cr4 |= X86_CR4_PCIDE;
427
+ if (static_branch_likely(&cr_pinning))
428
+ cr4 = (cr4 & ~cr4_pinned_mask) | cr4_pinned_bits;
429
+
430
+ __write_cr4(cr4);
431
+
432
+ /* Initialize cr4 shadow for this CPU. */
433
+ this_cpu_write(cpu_tlbstate.cr4, cr4);
434
+}
435
+
436
+/*
437
+ * Once CPU feature detection is finished (and boot params have been
438
+ * parsed), record any of the sensitive CR bits that are set, and
439
+ * enable CR pinning.
440
+ */
441
+static void __init setup_cr_pinning(void)
442
+{
443
+ cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & cr4_pinned_mask;
444
+ static_key_enable(&cr_pinning.key);
445
+}
446
+
447
+static __init int x86_nofsgsbase_setup(char *arg)
448
+{
449
+ /* Require an exact match without trailing characters. */
450
+ if (strlen(arg))
451
+ return 0;
452
+
453
+ /* Do not emit a message if the feature is not present. */
454
+ if (!boot_cpu_has(X86_FEATURE_FSGSBASE))
455
+ return 1;
456
+
457
+ setup_clear_cpu_cap(X86_FEATURE_FSGSBASE);
458
+ pr_info("FSGSBASE disabled via kernel command line\n");
459
+ return 1;
460
+}
461
+__setup("nofsgsbase", x86_nofsgsbase_setup);
462
+
368463 /*
369464 * Protection Keys are not available in 32-bit mode.
370465 */
....@@ -372,6 +467,8 @@
372467
373468 static __always_inline void setup_pku(struct cpuinfo_x86 *c)
374469 {
470
+ struct pkru_state *pk;
471
+
375472 /* check the boot processor, plus compile options for PKU: */
376473 if (!cpu_feature_enabled(X86_FEATURE_PKU))
377474 return;
....@@ -382,6 +479,9 @@
382479 return;
383480
384481 cr4_set_bits(X86_CR4_PKE);
482
+ pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU);
483
+ if (pk)
484
+ pk->pkru = init_pkru_value;
385485 /*
386486 * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
387487 * cpuid bit to be set. We need to ensure that we
....@@ -488,8 +588,9 @@
488588 return NULL; /* Not found */
489589 }
490590
491
-__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
492
-__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
591
+/* Aligned to unsigned long to avoid split lock in atomic bitmap ops */
592
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
593
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
493594
494595 void load_percpu_segment(int cpu)
495596 {
....@@ -505,19 +606,6 @@
505606 #ifdef CONFIG_X86_32
506607 /* The 32-bit entry code needs to find cpu_entry_area. */
507608 DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
508
-#endif
509
-
510
-#ifdef CONFIG_X86_64
511
-/*
512
- * Special IST stacks which the CPU switches to when it calls
513
- * an IST-marked descriptor entry. Up to 7 stacks (hardware
514
- * limit), all of them are 4K, except the debug stack which
515
- * is 8K.
516
- */
517
-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
518
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
519
- [DEBUG_STACK - 1] = DEBUG_STKSZ
520
-};
521609 #endif
522610
523611 /* Load the original GDT from the per-cpu structure */
....@@ -808,30 +896,6 @@
808896 }
809897 }
810898
811
-static void init_cqm(struct cpuinfo_x86 *c)
812
-{
813
- if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
814
- c->x86_cache_max_rmid = -1;
815
- c->x86_cache_occ_scale = -1;
816
- return;
817
- }
818
-
819
- /* will be overridden if occupancy monitoring exists */
820
- c->x86_cache_max_rmid = cpuid_ebx(0xf);
821
-
822
- if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
823
- cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
824
- cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
825
- u32 eax, ebx, ecx, edx;
826
-
827
- /* QoS sub-leaf, EAX=0Fh, ECX=1 */
828
- cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
829
-
830
- c->x86_cache_max_rmid = ecx;
831
- c->x86_cache_occ_scale = ebx;
832
- }
833
-}
834
-
835899 void get_cpu_cap(struct cpuinfo_x86 *c)
836900 {
837901 u32 eax, ebx, ecx, edx;
....@@ -854,6 +918,12 @@
854918 c->x86_capability[CPUID_7_0_EBX] = ebx;
855919 c->x86_capability[CPUID_7_ECX] = ecx;
856920 c->x86_capability[CPUID_7_EDX] = edx;
921
+
922
+ /* Check valid sub-leaf index before accessing it */
923
+ if (eax >= 1) {
924
+ cpuid_count(0x00000007, 1, &eax, &ebx, &ecx, &edx);
925
+ c->x86_capability[CPUID_7_1_EAX] = eax;
926
+ }
857927 }
858928
859929 /* Extended state features: level 0x0000000d */
....@@ -893,7 +963,6 @@
893963
894964 init_scattered_cpuid_features(c);
895965 init_speculation_control(c);
896
- init_cqm(c);
897966
898967 /*
899968 * Clear/Set all flags overridden by options, after probe.
....@@ -954,15 +1023,21 @@
9541023 #define MSBDS_ONLY BIT(5)
9551024 #define NO_SWAPGS BIT(6)
9561025 #define NO_ITLB_MULTIHIT BIT(7)
1026
+#define NO_SPECTRE_V2 BIT(8)
1027
+#define NO_MMIO BIT(9)
1028
+#define NO_EIBRS_PBRSB BIT(10)
9571029
958
-#define VULNWL(_vendor, _family, _model, _whitelist) \
959
- { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
1030
+#define VULNWL(vendor, family, model, whitelist) \
1031
+ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
9601032
9611033 #define VULNWL_INTEL(model, whitelist) \
9621034 VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist)
9631035
9641036 #define VULNWL_AMD(family, whitelist) \
9651037 VULNWL(AMD, family, X86_MODEL_ANY, whitelist)
1038
+
1039
+#define VULNWL_HYGON(family, whitelist) \
1040
+ VULNWL(HYGON, family, X86_MODEL_ANY, whitelist)
9661041
9671042 static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
9681043 VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION),
....@@ -971,6 +1046,11 @@
9711046 VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION),
9721047
9731048 /* Intel Family 6 */
1049
+ VULNWL_INTEL(TIGERLAKE, NO_MMIO),
1050
+ VULNWL_INTEL(TIGERLAKE_L, NO_MMIO),
1051
+ VULNWL_INTEL(ALDERLAKE, NO_MMIO),
1052
+ VULNWL_INTEL(ALDERLAKE_L, NO_MMIO),
1053
+
9741054 VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
9751055 VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT),
9761056 VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
....@@ -978,7 +1058,7 @@
9781058 VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
9791059
9801060 VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
981
- VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
1061
+ VULNWL_INTEL(ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
9821062 VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
9831063 VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
9841064 VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
....@@ -987,10 +1067,11 @@
9871067 VULNWL_INTEL(CORE_YONAH, NO_SSB),
9881068
9891069 VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
1070
+ VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
9901071
991
- VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
992
- VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
993
- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
1072
+ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1073
+ VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1074
+ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
9941075
9951076 /*
9961077 * Technically, swapgs isn't serializing on AMD (despite it previously
....@@ -1000,37 +1081,80 @@
10001081 * good enough for our purposes.
10011082 */
10021083
1003
- VULNWL_INTEL(ATOM_TREMONT_X, NO_ITLB_MULTIHIT),
1084
+ VULNWL_INTEL(ATOM_TREMONT, NO_EIBRS_PBRSB),
1085
+ VULNWL_INTEL(ATOM_TREMONT_L, NO_EIBRS_PBRSB),
1086
+ VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
10041087
10051088 /* AMD Family 0xf - 0x12 */
1006
- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
1007
- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
1008
- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
1009
- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
1089
+ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1090
+ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1091
+ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1092
+ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
10101093
10111094 /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
1012
- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
1095
+ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1096
+ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1097
+
1098
+ /* Zhaoxin Family 7 */
1099
+ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
1100
+ VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
10131101 {}
10141102 };
1103
+
1104
+#define VULNBL(vendor, family, model, blacklist) \
1105
+ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist)
10151106
10161107 #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \
10171108 X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \
10181109 INTEL_FAM6_##model, steppings, \
10191110 X86_FEATURE_ANY, issues)
10201111
1112
+#define VULNBL_AMD(family, blacklist) \
1113
+ VULNBL(AMD, family, X86_MODEL_ANY, blacklist)
1114
+
1115
+#define VULNBL_HYGON(family, blacklist) \
1116
+ VULNBL(HYGON, family, X86_MODEL_ANY, blacklist)
1117
+
10211118 #define SRBDS BIT(0)
1119
+/* CPU is affected by X86_BUG_MMIO_STALE_DATA */
1120
+#define MMIO BIT(1)
1121
+/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */
1122
+#define MMIO_SBDS BIT(2)
1123
+/* CPU is affected by RETbleed, speculating where you would not expect it */
1124
+#define RETBLEED BIT(3)
10221125
10231126 static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
10241127 VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
1025
- VULNBL_INTEL_STEPPINGS(HASWELL_CORE, X86_STEPPING_ANY, SRBDS),
1026
- VULNBL_INTEL_STEPPINGS(HASWELL_ULT, X86_STEPPING_ANY, SRBDS),
1027
- VULNBL_INTEL_STEPPINGS(HASWELL_GT3E, X86_STEPPING_ANY, SRBDS),
1028
- VULNBL_INTEL_STEPPINGS(BROADWELL_GT3E, X86_STEPPING_ANY, SRBDS),
1029
- VULNBL_INTEL_STEPPINGS(BROADWELL_CORE, X86_STEPPING_ANY, SRBDS),
1030
- VULNBL_INTEL_STEPPINGS(SKYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS),
1031
- VULNBL_INTEL_STEPPINGS(SKYLAKE_DESKTOP, X86_STEPPING_ANY, SRBDS),
1032
- VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPINGS(0x0, 0xC), SRBDS),
1033
- VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPINGS(0x0, 0xD), SRBDS),
1128
+ VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS),
1129
+ VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS),
1130
+ VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS),
1131
+ VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO),
1132
+ VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO),
1133
+ VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS),
1134
+ VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
1135
+ VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),
1136
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
1137
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED),
1138
+ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
1139
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
1140
+ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
1141
+ VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED),
1142
+ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
1143
+ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO),
1144
+ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO),
1145
+ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
1146
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED),
1147
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
1148
+ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
1149
+ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED),
1150
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
1151
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),
1152
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
1153
+
1154
+ VULNBL_AMD(0x15, RETBLEED),
1155
+ VULNBL_AMD(0x16, RETBLEED),
1156
+ VULNBL_AMD(0x17, RETBLEED),
1157
+ VULNBL_HYGON(0x18, RETBLEED),
10341158 {}
10351159 };
10361160
....@@ -1051,6 +1175,13 @@
10511175 return ia32_cap;
10521176 }
10531177
1178
+static bool arch_cap_mmio_immune(u64 ia32_cap)
1179
+{
1180
+ return (ia32_cap & ARCH_CAP_FBSDP_NO &&
1181
+ ia32_cap & ARCH_CAP_PSDP_NO &&
1182
+ ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
1183
+}
1184
+
10541185 static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
10551186 {
10561187 u64 ia32_cap = x86_read_arch_cap_msr();
....@@ -1064,7 +1195,9 @@
10641195 return;
10651196
10661197 setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
1067
- setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
1198
+
1199
+ if (!cpu_matches(cpu_vuln_whitelist, NO_SPECTRE_V2))
1200
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
10681201
10691202 if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) &&
10701203 !(ia32_cap & ARCH_CAP_SSB_NO) &&
....@@ -1102,11 +1235,42 @@
11021235 /*
11031236 * SRBDS affects CPUs which support RDRAND or RDSEED and are listed
11041237 * in the vulnerability blacklist.
1238
+ *
1239
+ * Some of the implications and mitigation of Shared Buffers Data
1240
+ * Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as
1241
+ * SRBDS.
11051242 */
11061243 if ((cpu_has(c, X86_FEATURE_RDRAND) ||
11071244 cpu_has(c, X86_FEATURE_RDSEED)) &&
1108
- cpu_matches(cpu_vuln_blacklist, SRBDS))
1245
+ cpu_matches(cpu_vuln_blacklist, SRBDS | MMIO_SBDS))
11091246 setup_force_cpu_bug(X86_BUG_SRBDS);
1247
+
1248
+ /*
1249
+ * Processor MMIO Stale Data bug enumeration
1250
+ *
1251
+ * Affected CPU list is generally enough to enumerate the vulnerability,
1252
+ * but for virtualization case check for ARCH_CAP MSR bits also, VMM may
1253
+ * not want the guest to enumerate the bug.
1254
+ *
1255
+ * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
1256
+ * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
1257
+ */
1258
+ if (!arch_cap_mmio_immune(ia32_cap)) {
1259
+ if (cpu_matches(cpu_vuln_blacklist, MMIO))
1260
+ setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
1261
+ else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
1262
+ setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN);
1263
+ }
1264
+
1265
+ if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
1266
+ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
1267
+ setup_force_cpu_bug(X86_BUG_RETBLEED);
1268
+ }
1269
+
1270
+ if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) &&
1271
+ !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
1272
+ !(ia32_cap & ARCH_CAP_PBRSB_NO))
1273
+ setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
11101274
11111275 if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
11121276 return;
....@@ -1142,6 +1306,59 @@
11421306 }
11431307
11441308 /*
1309
+ * We parse cpu parameters early because fpu__init_system() is executed
1310
+ * before parse_early_param().
1311
+ */
1312
+static void __init cpu_parse_early_param(void)
1313
+{
1314
+ char arg[128];
1315
+ char *argptr = arg;
1316
+ int arglen, res, bit;
1317
+
1318
+#ifdef CONFIG_X86_32
1319
+ if (cmdline_find_option_bool(boot_command_line, "no387"))
1320
+#ifdef CONFIG_MATH_EMULATION
1321
+ setup_clear_cpu_cap(X86_FEATURE_FPU);
1322
+#else
1323
+ pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n");
1324
+#endif
1325
+
1326
+ if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
1327
+ setup_clear_cpu_cap(X86_FEATURE_FXSR);
1328
+#endif
1329
+
1330
+ if (cmdline_find_option_bool(boot_command_line, "noxsave"))
1331
+ setup_clear_cpu_cap(X86_FEATURE_XSAVE);
1332
+
1333
+ if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
1334
+ setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
1335
+
1336
+ if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
1337
+ setup_clear_cpu_cap(X86_FEATURE_XSAVES);
1338
+
1339
+ arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
1340
+ if (arglen <= 0)
1341
+ return;
1342
+
1343
+ pr_info("Clearing CPUID bits:");
1344
+ do {
1345
+ res = get_option(&argptr, &bit);
1346
+ if (res == 0 || res == 3)
1347
+ break;
1348
+
1349
+ /* If the argument was too long, the last bit may be cut off */
1350
+ if (res == 1 && arglen >= sizeof(arg))
1351
+ break;
1352
+
1353
+ if (bit >= 0 && bit < NCAPINTS * 32) {
1354
+ pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
1355
+ setup_clear_cpu_cap(bit);
1356
+ }
1357
+ } while (res == 2);
1358
+ pr_cont("\n");
1359
+}
1360
+
1361
+/*
11451362 * Do minimum CPU detection early.
11461363 * Fields really needed: vendor, cpuid_level, family, model, mask,
11471364 * cache alignment.
....@@ -1163,7 +1380,7 @@
11631380 #endif
11641381 c->x86_cache_alignment = c->x86_clflush_size;
11651382
1166
- memset(&c->x86_capability, 0, sizeof c->x86_capability);
1383
+ memset(&c->x86_capability, 0, sizeof(c->x86_capability));
11671384 c->extended_cpuid_level = 0;
11681385
11691386 if (!have_cpuid_p())
....@@ -1176,6 +1393,7 @@
11761393 get_cpu_cap(c);
11771394 get_cpu_address_sizes(c);
11781395 setup_force_cpu_cap(X86_FEATURE_CPUID);
1396
+ cpu_parse_early_param();
11791397
11801398 if (this_cpu->c_early_init)
11811399 this_cpu->c_early_init(c);
....@@ -1192,6 +1410,8 @@
11921410 setup_force_cpu_cap(X86_FEATURE_ALWAYS);
11931411
11941412 cpu_set_bug_bits(c);
1413
+
1414
+ cpu_set_core_cap_bits(c);
11951415
11961416 fpu__init_system(c);
11971417
....@@ -1362,30 +1582,8 @@
13621582 * ESPFIX issue, we can change this.
13631583 */
13641584 #ifdef CONFIG_X86_32
1365
-# ifdef CONFIG_PARAVIRT
1366
- do {
1367
- extern void native_iret(void);
1368
- if (pv_cpu_ops.iret == native_iret)
1369
- set_cpu_bug(c, X86_BUG_ESPFIX);
1370
- } while (0);
1371
-# else
13721585 set_cpu_bug(c, X86_BUG_ESPFIX);
1373
-# endif
13741586 #endif
1375
-}
1376
-
1377
-static void x86_init_cache_qos(struct cpuinfo_x86 *c)
1378
-{
1379
- /*
1380
- * The heavy lifting of max_rmid and cache_occ_scale are handled
1381
- * in get_cpu_cap(). Here we just set the max_rmid for the boot_cpu
1382
- * in case CQM bits really aren't there in this CPU.
1383
- */
1384
- if (c != &boot_cpu_data) {
1385
- boot_cpu_data.x86_cache_max_rmid =
1386
- min(boot_cpu_data.x86_cache_max_rmid,
1387
- c->x86_cache_max_rmid);
1388
- }
13891587 }
13901588
13911589 /*
....@@ -1404,6 +1602,7 @@
14041602 cpu, apicid, c->initial_apicid);
14051603 }
14061604 BUG_ON(topology_update_package_map(c->phys_proc_id, cpu));
1605
+ BUG_ON(topology_update_die_map(c->cpu_die_id, cpu));
14071606 #else
14081607 c->logical_proc_id = 0;
14091608 #endif
....@@ -1436,7 +1635,10 @@
14361635 c->x86_virt_bits = 32;
14371636 #endif
14381637 c->x86_cache_alignment = c->x86_clflush_size;
1439
- memset(&c->x86_capability, 0, sizeof c->x86_capability);
1638
+ memset(&c->x86_capability, 0, sizeof(c->x86_capability));
1639
+#ifdef CONFIG_X86_VMX_FEATURE_NAMES
1640
+ memset(&c->vmx_capability, 0, sizeof(c->vmx_capability));
1641
+#endif
14401642
14411643 generic_identify(c);
14421644
....@@ -1471,6 +1673,12 @@
14711673 setup_smap(c);
14721674 setup_umip(c);
14731675
1676
+ /* Enable FSGSBASE instructions if available. */
1677
+ if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
1678
+ cr4_set_bits(X86_CR4_FSGSBASE);
1679
+ elf_hwcap2 |= HWCAP2_FSGSBASE;
1680
+ }
1681
+
14741682 /*
14751683 * The vendor-specific functions might have changed features.
14761684 * Now we do "generic changes."
....@@ -1496,7 +1704,6 @@
14961704 #endif
14971705
14981706 x86_init_rdrand(c);
1499
- x86_init_cache_qos(c);
15001707 setup_pku(c);
15011708
15021709 /*
....@@ -1569,6 +1776,8 @@
15691776 enable_sep_cpu();
15701777 #endif
15711778 cpu_detect_tlb(&boot_cpu_data);
1779
+ setup_cr_pinning();
1780
+
15721781 tsx_init();
15731782 }
15741783
....@@ -1632,9 +1841,9 @@
16321841 __setup("clearcpuid=", setup_clearcpuid);
16331842
16341843 #ifdef CONFIG_X86_64
1635
-DEFINE_PER_CPU_FIRST(union irq_stack_union,
1636
- irq_stack_union) __aligned(PAGE_SIZE) __visible;
1637
-EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union);
1844
+DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
1845
+ fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
1846
+EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
16381847
16391848 /*
16401849 * The following percpu variables are hot. Align current_task to
....@@ -1644,9 +1853,7 @@
16441853 &init_task;
16451854 EXPORT_PER_CPU_SYMBOL(current_task);
16461855
1647
-DEFINE_PER_CPU(char *, irq_stack_ptr) =
1648
- init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
1649
-
1856
+DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
16501857 DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
16511858
16521859 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
....@@ -1655,19 +1862,8 @@
16551862 /* May not be marked __init: used by software suspend */
16561863 void syscall_init(void)
16571864 {
1658
- extern char _entry_trampoline[];
1659
- extern char entry_SYSCALL_64_trampoline[];
1660
-
1661
- int cpu = smp_processor_id();
1662
- unsigned long SYSCALL64_entry_trampoline =
1663
- (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
1664
- (entry_SYSCALL_64_trampoline - _entry_trampoline);
1665
-
16661865 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
1667
- if (static_cpu_has(X86_FEATURE_PTI))
1668
- wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
1669
- else
1670
- wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
1866
+ wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
16711867
16721868 #ifdef CONFIG_IA32_EMULATION
16731869 wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
....@@ -1678,7 +1874,8 @@
16781874 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
16791875 */
16801876 wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
1681
- wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
1877
+ wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
1878
+ (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1));
16821879 wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
16831880 #else
16841881 wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
....@@ -1692,41 +1889,6 @@
16921889 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|
16931890 X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
16941891 }
1695
-
1696
-/*
1697
- * Copies of the original ist values from the tss are only accessed during
1698
- * debugging, no special alignment required.
1699
- */
1700
-DEFINE_PER_CPU(struct orig_ist, orig_ist);
1701
-
1702
-static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
1703
-DEFINE_PER_CPU(int, debug_stack_usage);
1704
-
1705
-int is_debug_stack(unsigned long addr)
1706
-{
1707
- return __this_cpu_read(debug_stack_usage) ||
1708
- (addr <= __this_cpu_read(debug_stack_addr) &&
1709
- addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ));
1710
-}
1711
-NOKPROBE_SYMBOL(is_debug_stack);
1712
-
1713
-DEFINE_PER_CPU(u32, debug_idt_ctr);
1714
-
1715
-void debug_stack_set_zero(void)
1716
-{
1717
- this_cpu_inc(debug_idt_ctr);
1718
- load_current_idt();
1719
-}
1720
-NOKPROBE_SYMBOL(debug_stack_set_zero);
1721
-
1722
-void debug_stack_reset(void)
1723
-{
1724
- if (WARN_ON(!this_cpu_read(debug_idt_ctr)))
1725
- return;
1726
- if (this_cpu_dec_return(debug_idt_ctr) == 0)
1727
- load_current_idt();
1728
-}
1729
-NOKPROBE_SYMBOL(debug_stack_reset);
17301892
17311893 #else /* CONFIG_X86_64 */
17321894
....@@ -1794,12 +1956,12 @@
17941956 }
17951957
17961958 #ifdef CONFIG_X86_64
1797
-static void setup_getcpu(int cpu)
1959
+static inline void setup_getcpu(int cpu)
17981960 {
17991961 unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu));
18001962 struct desc_struct d = { };
18011963
1802
- if (boot_cpu_has(X86_FEATURE_RDTSCP))
1964
+ if (boot_cpu_has(X86_FEATURE_RDTSCP) || boot_cpu_has(X86_FEATURE_RDPID))
18031965 write_rdtscp_aux(cpudata);
18041966
18051967 /* Store CPU and node number in limit. */
....@@ -1814,39 +1976,91 @@
18141976
18151977 write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_CPUNODE, &d, DESCTYPE_S);
18161978 }
1979
+
1980
+static inline void ucode_cpu_init(int cpu)
1981
+{
1982
+ if (cpu)
1983
+ load_ucode_ap();
1984
+}
1985
+
1986
+static inline void tss_setup_ist(struct tss_struct *tss)
1987
+{
1988
+ /* Set up the per-CPU TSS IST stacks */
1989
+ tss->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF);
1990
+ tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
1991
+ tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
1992
+ tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
1993
+ /* Only mapped when SEV-ES is active */
1994
+ tss->x86_tss.ist[IST_INDEX_VC] = __this_cpu_ist_top_va(VC);
1995
+}
1996
+
1997
+#else /* CONFIG_X86_64 */
1998
+
1999
+static inline void setup_getcpu(int cpu) { }
2000
+
2001
+static inline void ucode_cpu_init(int cpu)
2002
+{
2003
+ show_ucode_info_early();
2004
+}
2005
+
2006
+static inline void tss_setup_ist(struct tss_struct *tss) { }
2007
+
2008
+#endif /* !CONFIG_X86_64 */
2009
+
2010
+static inline void tss_setup_io_bitmap(struct tss_struct *tss)
2011
+{
2012
+ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
2013
+
2014
+#ifdef CONFIG_X86_IOPL_IOPERM
2015
+ tss->io_bitmap.prev_max = 0;
2016
+ tss->io_bitmap.prev_sequence = 0;
2017
+ memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap));
2018
+ /*
2019
+ * Invalidate the extra array entry past the end of the all
2020
+ * permission bitmap as required by the hardware.
2021
+ */
2022
+ tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL;
18172023 #endif
2024
+}
2025
+
2026
+/*
2027
+ * Setup everything needed to handle exceptions from the IDT, including the IST
2028
+ * exceptions which use paranoid_entry().
2029
+ */
2030
+void cpu_init_exception_handling(void)
2031
+{
2032
+ struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
2033
+ int cpu = raw_smp_processor_id();
2034
+
2035
+ /* paranoid_entry() gets the CPU number from the GDT */
2036
+ setup_getcpu(cpu);
2037
+
2038
+ /* IST vectors need TSS to be set up. */
2039
+ tss_setup_ist(tss);
2040
+ tss_setup_io_bitmap(tss);
2041
+ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
2042
+
2043
+ load_TR_desc();
2044
+
2045
+ /* Finally load the IDT */
2046
+ load_current_idt();
2047
+}
18182048
18192049 /*
18202050 * cpu_init() initializes state that is per-CPU. Some data is already
18212051 * initialized (naturally) in the bootstrap process, such as the GDT
18222052 * and IDT. We reload them nevertheless, this function acts as a
18232053 * 'CPU state barrier', nothing should get across.
1824
- * A lot of state is already set up in PDA init for 64 bit
18252054 */
1826
-#ifdef CONFIG_X86_64
1827
-
18282055 void cpu_init(void)
18292056 {
1830
- struct orig_ist *oist;
1831
- struct task_struct *me;
1832
- struct tss_struct *t;
1833
- unsigned long v;
2057
+ struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
2058
+ struct task_struct *cur = current;
18342059 int cpu = raw_smp_processor_id();
1835
- int i;
18362060
18372061 wait_for_master_cpu(cpu);
18382062
1839
- /*
1840
- * Initialize the CR4 shadow before doing anything that could
1841
- * try to read it.
1842
- */
1843
- cr4_init_shadow();
1844
-
1845
- if (cpu)
1846
- load_ucode_ap();
1847
-
1848
- t = &per_cpu(cpu_tss_rw, cpu);
1849
- oist = &per_cpu(orig_ist, cpu);
2063
+ ucode_cpu_init(cpu);
18502064
18512065 #ifdef CONFIG_NUMA
18522066 if (this_cpu_read(numa_node) == 0 &&
....@@ -1855,74 +2069,55 @@
18552069 #endif
18562070 setup_getcpu(cpu);
18572071
1858
- me = current;
1859
-
18602072 pr_debug("Initializing CPU#%d\n", cpu);
18612073
1862
- cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
2074
+ if (IS_ENABLED(CONFIG_X86_64) || cpu_feature_enabled(X86_FEATURE_VME) ||
2075
+ boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE))
2076
+ cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
18632077
18642078 /*
18652079 * Initialize the per-CPU GDT with the boot GDT,
18662080 * and set up the GDT descriptor:
18672081 */
1868
-
18692082 switch_to_new_gdt(cpu);
1870
- loadsegment(fs, 0);
1871
-
18722083 load_current_idt();
18732084
1874
- memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
1875
- syscall_init();
2085
+ if (IS_ENABLED(CONFIG_X86_64)) {
2086
+ loadsegment(fs, 0);
2087
+ memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
2088
+ syscall_init();
18762089
1877
- wrmsrl(MSR_FS_BASE, 0);
1878
- wrmsrl(MSR_KERNEL_GS_BASE, 0);
1879
- barrier();
2090
+ wrmsrl(MSR_FS_BASE, 0);
2091
+ wrmsrl(MSR_KERNEL_GS_BASE, 0);
2092
+ barrier();
18802093
1881
- x86_configure_nx();
1882
- x2apic_setup();
1883
-
1884
- /*
1885
- * set up and load the per-CPU TSS
1886
- */
1887
- if (!oist->ist[0]) {
1888
- char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
1889
-
1890
- for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1891
- estacks += exception_stack_sizes[v];
1892
- oist->ist[v] = t->x86_tss.ist[v] =
1893
- (unsigned long)estacks;
1894
- if (v == DEBUG_STACK-1)
1895
- per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
1896
- }
2094
+ x2apic_setup();
18972095 }
18982096
1899
- t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
1900
-
1901
- /*
1902
- * <= is required because the CPU will access up to
1903
- * 8 bits beyond the end of the IO permission bitmap.
1904
- */
1905
- for (i = 0; i <= IO_BITMAP_LONGS; i++)
1906
- t->io_bitmap[i] = ~0UL;
1907
-
19082097 mmgrab(&init_mm);
1909
- me->active_mm = &init_mm;
1910
- BUG_ON(me->mm);
2098
+ cur->active_mm = &init_mm;
2099
+ BUG_ON(cur->mm);
19112100 initialize_tlbstate_and_flush();
1912
- enter_lazy_tlb(&init_mm, me);
2101
+ enter_lazy_tlb(&init_mm, cur);
19132102
1914
- /*
1915
- * Initialize the TSS. sp0 points to the entry trampoline stack
1916
- * regardless of what task is running.
1917
- */
2103
+ /* Initialize the TSS. */
2104
+ tss_setup_ist(tss);
2105
+ tss_setup_io_bitmap(tss);
19182106 set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
2107
+
19192108 load_TR_desc();
2109
+ /*
2110
+ * sp0 points to the entry trampoline stack regardless of what task
2111
+ * is running.
2112
+ */
19202113 load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
19212114
19222115 load_mm_ldt(&init_mm);
19232116
19242117 clear_all_debug_regs();
19252118 dbg_restore_debug_regs();
2119
+
2120
+ doublefault_init_cpu_tss();
19262121
19272122 fpu__init_cpu();
19282123
....@@ -1931,86 +2126,6 @@
19312126
19322127 load_fixmap_gdt(cpu);
19332128 }
1934
-
1935
-#else
1936
-
1937
-void cpu_init(void)
1938
-{
1939
- int cpu = smp_processor_id();
1940
- struct task_struct *curr = current;
1941
- struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
1942
-
1943
- wait_for_master_cpu(cpu);
1944
-
1945
- /*
1946
- * Initialize the CR4 shadow before doing anything that could
1947
- * try to read it.
1948
- */
1949
- cr4_init_shadow();
1950
-
1951
- show_ucode_info_early();
1952
-
1953
- pr_info("Initializing CPU#%d\n", cpu);
1954
-
1955
- if (cpu_feature_enabled(X86_FEATURE_VME) ||
1956
- boot_cpu_has(X86_FEATURE_TSC) ||
1957
- boot_cpu_has(X86_FEATURE_DE))
1958
- cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
1959
-
1960
- load_current_idt();
1961
- switch_to_new_gdt(cpu);
1962
-
1963
- /*
1964
- * Set up and load the per-CPU TSS and LDT
1965
- */
1966
- mmgrab(&init_mm);
1967
- curr->active_mm = &init_mm;
1968
- BUG_ON(curr->mm);
1969
- initialize_tlbstate_and_flush();
1970
- enter_lazy_tlb(&init_mm, curr);
1971
-
1972
- /*
1973
- * Initialize the TSS. sp0 points to the entry trampoline stack
1974
- * regardless of what task is running.
1975
- */
1976
- set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
1977
- load_TR_desc();
1978
- load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
1979
-
1980
- load_mm_ldt(&init_mm);
1981
-
1982
- t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
1983
-
1984
-#ifdef CONFIG_DOUBLEFAULT
1985
- /* Set up doublefault TSS pointer in the GDT */
1986
- __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
1987
-#endif
1988
-
1989
- clear_all_debug_regs();
1990
- dbg_restore_debug_regs();
1991
-
1992
- fpu__init_cpu();
1993
-
1994
- load_fixmap_gdt(cpu);
1995
-}
1996
-#endif
1997
-
1998
-static void bsp_resume(void)
1999
-{
2000
- if (this_cpu->c_bsp_resume)
2001
- this_cpu->c_bsp_resume(&boot_cpu_data);
2002
-}
2003
-
2004
-static struct syscore_ops cpu_syscore_ops = {
2005
- .resume = bsp_resume,
2006
-};
2007
-
2008
-static int __init init_cpu_syscore(void)
2009
-{
2010
- register_syscore_ops(&cpu_syscore_ops);
2011
- return 0;
2012
-}
2013
-core_initcall(init_cpu_syscore);
20142129
20152130 /*
20162131 * The microcode loader calls this upon late microcode load to recheck features,
....@@ -2041,3 +2156,14 @@
20412156 pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
20422157 pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
20432158 }
2159
+
2160
+/*
2161
+ * Invoked from core CPU hotplug code after hotplug operations
2162
+ */
2163
+void arch_smt_update(void)
2164
+{
2165
+ /* Handle the speculative execution misfeatures */
2166
+ cpu_bugs_smt_update();
2167
+ /* Check whether IPI broadcasting can be enabled */
2168
+ apic_smt_update();
2169
+}