hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/arch/x86/kernel/cpu/common.c
....@@ -1,7 +1,8 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /* cpu_feature_enabled() cannot be used this early */
23 #define USE_EARLY_PGTABLE_L5
34
4
-#include <linux/bootmem.h>
5
+#include <linux/memblock.h>
56 #include <linux/linkage.h>
67 #include <linux/bitops.h>
78 #include <linux/kernel.h>
....@@ -13,16 +14,24 @@
1314 #include <linux/sched/mm.h>
1415 #include <linux/sched/clock.h>
1516 #include <linux/sched/task.h>
17
+#include <linux/sched/smt.h>
1618 #include <linux/init.h>
1719 #include <linux/kprobes.h>
1820 #include <linux/kgdb.h>
21
+#include <linux/mem_encrypt.h>
1922 #include <linux/smp.h>
23
+#include <linux/cpu.h>
2024 #include <linux/io.h>
2125 #include <linux/syscore_ops.h>
26
+#include <linux/pgtable.h>
27
+#include <linux/utsname.h>
2228
29
+#include <asm/alternative.h>
30
+#include <asm/cmdline.h>
2331 #include <asm/stackprotector.h>
2432 #include <asm/perf_event.h>
2533 #include <asm/mmu_context.h>
34
+#include <asm/doublefault.h>
2635 #include <asm/archrandom.h>
2736 #include <asm/hypervisor.h>
2837 #include <asm/processor.h>
....@@ -32,7 +41,6 @@
3241 #include <asm/vsyscall.h>
3342 #include <linux/topology.h>
3443 #include <linux/cpumask.h>
35
-#include <asm/pgtable.h>
3644 #include <linux/atomic.h>
3745 #include <asm/proto.h>
3846 #include <asm/setup.h>
....@@ -42,20 +50,19 @@
4250 #include <asm/mtrr.h>
4351 #include <asm/hwcap2.h>
4452 #include <linux/numa.h>
53
+#include <asm/numa.h>
4554 #include <asm/asm.h>
4655 #include <asm/bugs.h>
4756 #include <asm/cpu.h>
4857 #include <asm/mce.h>
4958 #include <asm/msr.h>
50
-#include <asm/pat.h>
59
+#include <asm/memtype.h>
5160 #include <asm/microcode.h>
5261 #include <asm/microcode_intel.h>
5362 #include <asm/intel-family.h>
5463 #include <asm/cpu_device_id.h>
55
-
56
-#ifdef CONFIG_X86_LOCAL_APIC
5764 #include <asm/uv/uv.h>
58
-#endif
65
+#include <asm/set_memory.h>
5966
6067 #include "cpu.h"
6168
....@@ -163,22 +170,6 @@
163170 #endif
164171 } };
165172 EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
166
-
167
-static int __init x86_mpx_setup(char *s)
168
-{
169
- /* require an exact match without trailing characters */
170
- if (strlen(s))
171
- return 0;
172
-
173
- /* do not emit a message if the feature is not present */
174
- if (!boot_cpu_has(X86_FEATURE_MPX))
175
- return 1;
176
-
177
- setup_clear_cpu_cap(X86_FEATURE_MPX);
178
- pr_info("nompx: Intel Memory Protection Extensions (MPX) disabled\n");
179
- return 1;
180
-}
181
-__setup("nompx", x86_mpx_setup);
182173
183174 #ifdef CONFIG_X86_64
184175 static int __init x86_nopcid_setup(char *s)
....@@ -306,8 +297,6 @@
306297 static __init int setup_disable_smep(char *arg)
307298 {
308299 setup_clear_cpu_cap(X86_FEATURE_SMEP);
309
- /* Check for things that depend on SMEP being enabled: */
310
- check_mpx_erratum(&boot_cpu_data);
311300 return 1;
312301 }
313302 __setup("nosmep", setup_disable_smep);
....@@ -336,6 +325,7 @@
336325 #ifdef CONFIG_X86_SMAP
337326 cr4_set_bits(X86_CR4_SMAP);
338327 #else
328
+ clear_cpu_cap(c, X86_FEATURE_SMAP);
339329 cr4_clear_bits(X86_CR4_SMAP);
340330 #endif
341331 }
....@@ -353,7 +343,7 @@
353343
354344 cr4_set_bits(X86_CR4_UMIP);
355345
356
- pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n");
346
+ pr_info_once("x86/cpu: User Mode Instruction Prevention (UMIP) activated\n");
357347
358348 return;
359349
....@@ -364,6 +354,116 @@
364354 */
365355 cr4_clear_bits(X86_CR4_UMIP);
366356 }
357
+
358
+/* These bits should not change their value after CPU init is finished. */
359
+static const unsigned long cr4_pinned_mask =
360
+ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE;
361
+static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
362
+static unsigned long cr4_pinned_bits __ro_after_init;
363
+
364
+void native_write_cr0(unsigned long val)
365
+{
366
+ unsigned long bits_missing = 0;
367
+
368
+set_register:
369
+ asm volatile("mov %0,%%cr0": "+r" (val) : : "memory");
370
+
371
+ if (static_branch_likely(&cr_pinning)) {
372
+ if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
373
+ bits_missing = X86_CR0_WP;
374
+ val |= bits_missing;
375
+ goto set_register;
376
+ }
377
+ /* Warn after we've set the missing bits. */
378
+ WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n");
379
+ }
380
+}
381
+EXPORT_SYMBOL(native_write_cr0);
382
+
383
+void native_write_cr4(unsigned long val)
384
+{
385
+ unsigned long bits_changed = 0;
386
+
387
+set_register:
388
+ asm volatile("mov %0,%%cr4": "+r" (val) : : "memory");
389
+
390
+ if (static_branch_likely(&cr_pinning)) {
391
+ if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) {
392
+ bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits;
393
+ val = (val & ~cr4_pinned_mask) | cr4_pinned_bits;
394
+ goto set_register;
395
+ }
396
+ /* Warn after we've corrected the changed bits. */
397
+ WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n",
398
+ bits_changed);
399
+ }
400
+}
401
+#if IS_MODULE(CONFIG_LKDTM)
402
+EXPORT_SYMBOL_GPL(native_write_cr4);
403
+#endif
404
+
405
+void cr4_update_irqsoff(unsigned long set, unsigned long clear)
406
+{
407
+ unsigned long newval, cr4 = this_cpu_read(cpu_tlbstate.cr4);
408
+
409
+ lockdep_assert_irqs_disabled();
410
+
411
+ newval = (cr4 & ~clear) | set;
412
+ if (newval != cr4) {
413
+ this_cpu_write(cpu_tlbstate.cr4, newval);
414
+ __write_cr4(newval);
415
+ }
416
+}
417
+EXPORT_SYMBOL(cr4_update_irqsoff);
418
+
419
+/* Read the CR4 shadow. */
420
+unsigned long cr4_read_shadow(void)
421
+{
422
+ return this_cpu_read(cpu_tlbstate.cr4);
423
+}
424
+EXPORT_SYMBOL_GPL(cr4_read_shadow);
425
+
426
+void cr4_init(void)
427
+{
428
+ unsigned long cr4 = __read_cr4();
429
+
430
+ if (boot_cpu_has(X86_FEATURE_PCID))
431
+ cr4 |= X86_CR4_PCIDE;
432
+ if (static_branch_likely(&cr_pinning))
433
+ cr4 = (cr4 & ~cr4_pinned_mask) | cr4_pinned_bits;
434
+
435
+ __write_cr4(cr4);
436
+
437
+ /* Initialize cr4 shadow for this CPU. */
438
+ this_cpu_write(cpu_tlbstate.cr4, cr4);
439
+}
440
+
441
+/*
442
+ * Once CPU feature detection is finished (and boot params have been
443
+ * parsed), record any of the sensitive CR bits that are set, and
444
+ * enable CR pinning.
445
+ */
446
+static void __init setup_cr_pinning(void)
447
+{
448
+ cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & cr4_pinned_mask;
449
+ static_key_enable(&cr_pinning.key);
450
+}
451
+
452
+static __init int x86_nofsgsbase_setup(char *arg)
453
+{
454
+ /* Require an exact match without trailing characters. */
455
+ if (strlen(arg))
456
+ return 0;
457
+
458
+ /* Do not emit a message if the feature is not present. */
459
+ if (!boot_cpu_has(X86_FEATURE_FSGSBASE))
460
+ return 1;
461
+
462
+ setup_clear_cpu_cap(X86_FEATURE_FSGSBASE);
463
+ pr_info("FSGSBASE disabled via kernel command line\n");
464
+ return 1;
465
+}
466
+__setup("nofsgsbase", x86_nofsgsbase_setup);
367467
368468 /*
369469 * Protection Keys are not available in 32-bit mode.
....@@ -488,8 +588,9 @@
488588 return NULL; /* Not found */
489589 }
490590
491
-__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
492
-__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
591
+/* Aligned to unsigned long to avoid split lock in atomic bitmap ops */
592
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
593
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
493594
494595 void load_percpu_segment(int cpu)
495596 {
....@@ -505,19 +606,6 @@
505606 #ifdef CONFIG_X86_32
506607 /* The 32-bit entry code needs to find cpu_entry_area. */
507608 DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
508
-#endif
509
-
510
-#ifdef CONFIG_X86_64
511
-/*
512
- * Special IST stacks which the CPU switches to when it calls
513
- * an IST-marked descriptor entry. Up to 7 stacks (hardware
514
- * limit), all of them are 4K, except the debug stack which
515
- * is 8K.
516
- */
517
-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
518
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
519
- [DEBUG_STACK - 1] = DEBUG_STKSZ
520
-};
521609 #endif
522610
523611 /* Load the original GDT from the per-cpu structure */
....@@ -808,30 +896,6 @@
808896 }
809897 }
810898
811
-static void init_cqm(struct cpuinfo_x86 *c)
812
-{
813
- if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
814
- c->x86_cache_max_rmid = -1;
815
- c->x86_cache_occ_scale = -1;
816
- return;
817
- }
818
-
819
- /* will be overridden if occupancy monitoring exists */
820
- c->x86_cache_max_rmid = cpuid_ebx(0xf);
821
-
822
- if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
823
- cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
824
- cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
825
- u32 eax, ebx, ecx, edx;
826
-
827
- /* QoS sub-leaf, EAX=0Fh, ECX=1 */
828
- cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
829
-
830
- c->x86_cache_max_rmid = ecx;
831
- c->x86_cache_occ_scale = ebx;
832
- }
833
-}
834
-
835899 void get_cpu_cap(struct cpuinfo_x86 *c)
836900 {
837901 u32 eax, ebx, ecx, edx;
....@@ -854,6 +918,12 @@
854918 c->x86_capability[CPUID_7_0_EBX] = ebx;
855919 c->x86_capability[CPUID_7_ECX] = ecx;
856920 c->x86_capability[CPUID_7_EDX] = edx;
921
+
922
+ /* Check valid sub-leaf index before accessing it */
923
+ if (eax >= 1) {
924
+ cpuid_count(0x00000007, 1, &eax, &ebx, &ecx, &edx);
925
+ c->x86_capability[CPUID_7_1_EAX] = eax;
926
+ }
857927 }
858928
859929 /* Extended state features: level 0x0000000d */
....@@ -891,9 +961,14 @@
891961 if (c->extended_cpuid_level >= 0x8000000a)
892962 c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
893963
964
+ if (c->extended_cpuid_level >= 0x8000001f)
965
+ c->x86_capability[CPUID_8000_001F_EAX] = cpuid_eax(0x8000001f);
966
+
967
+ if (c->extended_cpuid_level >= 0x80000021)
968
+ c->x86_capability[CPUID_8000_0021_EAX] = cpuid_eax(0x80000021);
969
+
894970 init_scattered_cpuid_features(c);
895971 init_speculation_control(c);
896
- init_cqm(c);
897972
898973 /*
899974 * Clear/Set all flags overridden by options, after probe.
....@@ -954,15 +1029,21 @@
9541029 #define MSBDS_ONLY BIT(5)
9551030 #define NO_SWAPGS BIT(6)
9561031 #define NO_ITLB_MULTIHIT BIT(7)
1032
+#define NO_SPECTRE_V2 BIT(8)
1033
+#define NO_MMIO BIT(9)
1034
+#define NO_EIBRS_PBRSB BIT(10)
9571035
958
-#define VULNWL(_vendor, _family, _model, _whitelist) \
959
- { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
1036
+#define VULNWL(vendor, family, model, whitelist) \
1037
+ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
9601038
9611039 #define VULNWL_INTEL(model, whitelist) \
9621040 VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist)
9631041
9641042 #define VULNWL_AMD(family, whitelist) \
9651043 VULNWL(AMD, family, X86_MODEL_ANY, whitelist)
1044
+
1045
+#define VULNWL_HYGON(family, whitelist) \
1046
+ VULNWL(HYGON, family, X86_MODEL_ANY, whitelist)
9661047
9671048 static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
9681049 VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION),
....@@ -971,6 +1052,11 @@
9711052 VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION),
9721053
9731054 /* Intel Family 6 */
1055
+ VULNWL_INTEL(TIGERLAKE, NO_MMIO),
1056
+ VULNWL_INTEL(TIGERLAKE_L, NO_MMIO),
1057
+ VULNWL_INTEL(ALDERLAKE, NO_MMIO),
1058
+ VULNWL_INTEL(ALDERLAKE_L, NO_MMIO),
1059
+
9741060 VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
9751061 VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT),
9761062 VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
....@@ -978,7 +1064,7 @@
9781064 VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
9791065
9801066 VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
981
- VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
1067
+ VULNWL_INTEL(ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
9821068 VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
9831069 VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
9841070 VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
....@@ -987,10 +1073,11 @@
9871073 VULNWL_INTEL(CORE_YONAH, NO_SSB),
9881074
9891075 VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
1076
+ VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
9901077
991
- VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
992
- VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
993
- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
1078
+ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1079
+ VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1080
+ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
9941081
9951082 /*
9961083 * Technically, swapgs isn't serializing on AMD (despite it previously
....@@ -1000,37 +1087,89 @@
10001087 * good enough for our purposes.
10011088 */
10021089
1003
- VULNWL_INTEL(ATOM_TREMONT_X, NO_ITLB_MULTIHIT),
1090
+ VULNWL_INTEL(ATOM_TREMONT, NO_EIBRS_PBRSB),
1091
+ VULNWL_INTEL(ATOM_TREMONT_L, NO_EIBRS_PBRSB),
1092
+ VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
10041093
10051094 /* AMD Family 0xf - 0x12 */
1006
- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
1007
- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
1008
- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
1009
- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
1095
+ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1096
+ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1097
+ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1098
+ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
10101099
10111100 /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
1012
- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
1101
+ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1102
+ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
1103
+
1104
+ /* Zhaoxin Family 7 */
1105
+ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
1106
+ VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
10131107 {}
10141108 };
1109
+
1110
+#define VULNBL(vendor, family, model, blacklist) \
1111
+ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist)
10151112
10161113 #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \
10171114 X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \
10181115 INTEL_FAM6_##model, steppings, \
10191116 X86_FEATURE_ANY, issues)
10201117
1118
+#define VULNBL_AMD(family, blacklist) \
1119
+ VULNBL(AMD, family, X86_MODEL_ANY, blacklist)
1120
+
1121
+#define VULNBL_HYGON(family, blacklist) \
1122
+ VULNBL(HYGON, family, X86_MODEL_ANY, blacklist)
1123
+
10211124 #define SRBDS BIT(0)
1125
+/* CPU is affected by X86_BUG_MMIO_STALE_DATA */
1126
+#define MMIO BIT(1)
1127
+/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */
1128
+#define MMIO_SBDS BIT(2)
1129
+/* CPU is affected by RETbleed, speculating where you would not expect it */
1130
+#define RETBLEED BIT(3)
1131
+/* CPU is affected by SMT (cross-thread) return predictions */
1132
+#define SMT_RSB BIT(4)
1133
+/* CPU is affected by SRSO */
1134
+#define SRSO BIT(5)
1135
+/* CPU is affected by GDS */
1136
+#define GDS BIT(6)
10221137
10231138 static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
10241139 VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
1025
- VULNBL_INTEL_STEPPINGS(HASWELL_CORE, X86_STEPPING_ANY, SRBDS),
1026
- VULNBL_INTEL_STEPPINGS(HASWELL_ULT, X86_STEPPING_ANY, SRBDS),
1027
- VULNBL_INTEL_STEPPINGS(HASWELL_GT3E, X86_STEPPING_ANY, SRBDS),
1028
- VULNBL_INTEL_STEPPINGS(BROADWELL_GT3E, X86_STEPPING_ANY, SRBDS),
1029
- VULNBL_INTEL_STEPPINGS(BROADWELL_CORE, X86_STEPPING_ANY, SRBDS),
1030
- VULNBL_INTEL_STEPPINGS(SKYLAKE_MOBILE, X86_STEPPING_ANY, SRBDS),
1031
- VULNBL_INTEL_STEPPINGS(SKYLAKE_DESKTOP, X86_STEPPING_ANY, SRBDS),
1032
- VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE, X86_STEPPINGS(0x0, 0xC), SRBDS),
1033
- VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPINGS(0x0, 0xD), SRBDS),
1140
+ VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS),
1141
+ VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS),
1142
+ VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS),
1143
+ VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO),
1144
+ VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO),
1145
+ VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS),
1146
+ VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
1147
+ VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),
1148
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
1149
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
1150
+ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
1151
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
1152
+ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
1153
+ VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED),
1154
+ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
1155
+ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS),
1156
+ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS),
1157
+ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
1158
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED),
1159
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
1160
+ VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS),
1161
+ VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS),
1162
+ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
1163
+ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
1164
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
1165
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),
1166
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
1167
+
1168
+ VULNBL_AMD(0x15, RETBLEED),
1169
+ VULNBL_AMD(0x16, RETBLEED),
1170
+ VULNBL_AMD(0x17, RETBLEED | SRSO),
1171
+ VULNBL_HYGON(0x18, RETBLEED),
1172
+ VULNBL_AMD(0x19, SRSO),
10341173 {}
10351174 };
10361175
....@@ -1051,6 +1190,13 @@
10511190 return ia32_cap;
10521191 }
10531192
1193
+static bool arch_cap_mmio_immune(u64 ia32_cap)
1194
+{
1195
+ return (ia32_cap & ARCH_CAP_FBSDP_NO &&
1196
+ ia32_cap & ARCH_CAP_PSDP_NO &&
1197
+ ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
1198
+}
1199
+
10541200 static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
10551201 {
10561202 u64 ia32_cap = x86_read_arch_cap_msr();
....@@ -1064,7 +1210,9 @@
10641210 return;
10651211
10661212 setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
1067
- setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
1213
+
1214
+ if (!cpu_matches(cpu_vuln_whitelist, NO_SPECTRE_V2))
1215
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
10681216
10691217 if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) &&
10701218 !(ia32_cap & ARCH_CAP_SSB_NO) &&
....@@ -1102,11 +1250,57 @@
11021250 /*
11031251 * SRBDS affects CPUs which support RDRAND or RDSEED and are listed
11041252 * in the vulnerability blacklist.
1253
+ *
1254
+ * Some of the implications and mitigation of Shared Buffers Data
1255
+ * Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as
1256
+ * SRBDS.
11051257 */
11061258 if ((cpu_has(c, X86_FEATURE_RDRAND) ||
11071259 cpu_has(c, X86_FEATURE_RDSEED)) &&
1108
- cpu_matches(cpu_vuln_blacklist, SRBDS))
1260
+ cpu_matches(cpu_vuln_blacklist, SRBDS | MMIO_SBDS))
11091261 setup_force_cpu_bug(X86_BUG_SRBDS);
1262
+
1263
+ /*
1264
+ * Processor MMIO Stale Data bug enumeration
1265
+ *
1266
+ * Affected CPU list is generally enough to enumerate the vulnerability,
1267
+ * but for virtualization case check for ARCH_CAP MSR bits also, VMM may
1268
+ * not want the guest to enumerate the bug.
1269
+ *
1270
+ * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
1271
+ * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
1272
+ */
1273
+ if (!arch_cap_mmio_immune(ia32_cap)) {
1274
+ if (cpu_matches(cpu_vuln_blacklist, MMIO))
1275
+ setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
1276
+ else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
1277
+ setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN);
1278
+ }
1279
+
1280
+ if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
1281
+ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
1282
+ setup_force_cpu_bug(X86_BUG_RETBLEED);
1283
+ }
1284
+
1285
+ if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) &&
1286
+ !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
1287
+ !(ia32_cap & ARCH_CAP_PBRSB_NO))
1288
+ setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
1289
+
1290
+ /*
1291
+ * Check if CPU is vulnerable to GDS. If running in a virtual machine on
1292
+ * an affected processor, the VMM may have disabled the use of GATHER by
1293
+ * disabling AVX2. The only way to do this in HW is to clear XCR0[2],
1294
+ * which means that AVX will be disabled.
1295
+ */
1296
+ if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) &&
1297
+ boot_cpu_has(X86_FEATURE_AVX))
1298
+ setup_force_cpu_bug(X86_BUG_GDS);
1299
+
1300
+ if (!cpu_has(c, X86_FEATURE_SRSO_NO)) {
1301
+ if (cpu_matches(cpu_vuln_blacklist, SRSO))
1302
+ setup_force_cpu_bug(X86_BUG_SRSO);
1303
+ }
11101304
11111305 if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
11121306 return;
....@@ -1142,6 +1336,59 @@
11421336 }
11431337
11441338 /*
1339
+ * We parse cpu parameters early because fpu__init_system() is executed
1340
+ * before parse_early_param().
1341
+ */
1342
+static void __init cpu_parse_early_param(void)
1343
+{
1344
+ char arg[128];
1345
+ char *argptr = arg;
1346
+ int arglen, res, bit;
1347
+
1348
+#ifdef CONFIG_X86_32
1349
+ if (cmdline_find_option_bool(boot_command_line, "no387"))
1350
+#ifdef CONFIG_MATH_EMULATION
1351
+ setup_clear_cpu_cap(X86_FEATURE_FPU);
1352
+#else
1353
+ pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n");
1354
+#endif
1355
+
1356
+ if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
1357
+ setup_clear_cpu_cap(X86_FEATURE_FXSR);
1358
+#endif
1359
+
1360
+ if (cmdline_find_option_bool(boot_command_line, "noxsave"))
1361
+ setup_clear_cpu_cap(X86_FEATURE_XSAVE);
1362
+
1363
+ if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
1364
+ setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
1365
+
1366
+ if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
1367
+ setup_clear_cpu_cap(X86_FEATURE_XSAVES);
1368
+
1369
+ arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
1370
+ if (arglen <= 0)
1371
+ return;
1372
+
1373
+ pr_info("Clearing CPUID bits:");
1374
+ do {
1375
+ res = get_option(&argptr, &bit);
1376
+ if (res == 0 || res == 3)
1377
+ break;
1378
+
1379
+ /* If the argument was too long, the last bit may be cut off */
1380
+ if (res == 1 && arglen >= sizeof(arg))
1381
+ break;
1382
+
1383
+ if (bit >= 0 && bit < NCAPINTS * 32) {
1384
+ pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
1385
+ setup_clear_cpu_cap(bit);
1386
+ }
1387
+ } while (res == 2);
1388
+ pr_cont("\n");
1389
+}
1390
+
1391
+/*
11451392 * Do minimum CPU detection early.
11461393 * Fields really needed: vendor, cpuid_level, family, model, mask,
11471394 * cache alignment.
....@@ -1163,7 +1410,7 @@
11631410 #endif
11641411 c->x86_cache_alignment = c->x86_clflush_size;
11651412
1166
- memset(&c->x86_capability, 0, sizeof c->x86_capability);
1413
+ memset(&c->x86_capability, 0, sizeof(c->x86_capability));
11671414 c->extended_cpuid_level = 0;
11681415
11691416 if (!have_cpuid_p())
....@@ -1176,6 +1423,7 @@
11761423 get_cpu_cap(c);
11771424 get_cpu_address_sizes(c);
11781425 setup_force_cpu_cap(X86_FEATURE_CPUID);
1426
+ cpu_parse_early_param();
11791427
11801428 if (this_cpu->c_early_init)
11811429 this_cpu->c_early_init(c);
....@@ -1193,7 +1441,7 @@
11931441
11941442 cpu_set_bug_bits(c);
11951443
1196
- fpu__init_system(c);
1444
+ cpu_set_core_cap_bits(c);
11971445
11981446 #ifdef CONFIG_X86_32
11991447 /*
....@@ -1362,30 +1610,8 @@
13621610 * ESPFIX issue, we can change this.
13631611 */
13641612 #ifdef CONFIG_X86_32
1365
-# ifdef CONFIG_PARAVIRT
1366
- do {
1367
- extern void native_iret(void);
1368
- if (pv_cpu_ops.iret == native_iret)
1369
- set_cpu_bug(c, X86_BUG_ESPFIX);
1370
- } while (0);
1371
-# else
13721613 set_cpu_bug(c, X86_BUG_ESPFIX);
1373
-# endif
13741614 #endif
1375
-}
1376
-
1377
-static void x86_init_cache_qos(struct cpuinfo_x86 *c)
1378
-{
1379
- /*
1380
- * The heavy lifting of max_rmid and cache_occ_scale are handled
1381
- * in get_cpu_cap(). Here we just set the max_rmid for the boot_cpu
1382
- * in case CQM bits really aren't there in this CPU.
1383
- */
1384
- if (c != &boot_cpu_data) {
1385
- boot_cpu_data.x86_cache_max_rmid =
1386
- min(boot_cpu_data.x86_cache_max_rmid,
1387
- c->x86_cache_max_rmid);
1388
- }
13891615 }
13901616
13911617 /*
....@@ -1404,6 +1630,7 @@
14041630 cpu, apicid, c->initial_apicid);
14051631 }
14061632 BUG_ON(topology_update_package_map(c->phys_proc_id, cpu));
1633
+ BUG_ON(topology_update_die_map(c->cpu_die_id, cpu));
14071634 #else
14081635 c->logical_proc_id = 0;
14091636 #endif
....@@ -1436,7 +1663,10 @@
14361663 c->x86_virt_bits = 32;
14371664 #endif
14381665 c->x86_cache_alignment = c->x86_clflush_size;
1439
- memset(&c->x86_capability, 0, sizeof c->x86_capability);
1666
+ memset(&c->x86_capability, 0, sizeof(c->x86_capability));
1667
+#ifdef CONFIG_X86_VMX_FEATURE_NAMES
1668
+ memset(&c->vmx_capability, 0, sizeof(c->vmx_capability));
1669
+#endif
14401670
14411671 generic_identify(c);
14421672
....@@ -1471,6 +1701,12 @@
14711701 setup_smap(c);
14721702 setup_umip(c);
14731703
1704
+ /* Enable FSGSBASE instructions if available. */
1705
+ if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
1706
+ cr4_set_bits(X86_CR4_FSGSBASE);
1707
+ elf_hwcap2 |= HWCAP2_FSGSBASE;
1708
+ }
1709
+
14741710 /*
14751711 * The vendor-specific functions might have changed features.
14761712 * Now we do "generic changes."
....@@ -1496,7 +1732,6 @@
14961732 #endif
14971733
14981734 x86_init_rdrand(c);
1499
- x86_init_cache_qos(c);
15001735 setup_pku(c);
15011736
15021737 /*
....@@ -1569,6 +1804,8 @@
15691804 enable_sep_cpu();
15701805 #endif
15711806 cpu_detect_tlb(&boot_cpu_data);
1807
+ setup_cr_pinning();
1808
+
15721809 tsx_init();
15731810 }
15741811
....@@ -1583,6 +1820,8 @@
15831820 validate_apic_and_package_id(c);
15841821 x86_spec_ctrl_setup_ap();
15851822 update_srbds_msr();
1823
+ if (boot_cpu_has_bug(X86_BUG_GDS))
1824
+ update_gds_msr();
15861825 }
15871826
15881827 static __init int setup_noclflush(char *arg)
....@@ -1632,9 +1871,9 @@
16321871 __setup("clearcpuid=", setup_clearcpuid);
16331872
16341873 #ifdef CONFIG_X86_64
1635
-DEFINE_PER_CPU_FIRST(union irq_stack_union,
1636
- irq_stack_union) __aligned(PAGE_SIZE) __visible;
1637
-EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union);
1874
+DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
1875
+ fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
1876
+EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
16381877
16391878 /*
16401879 * The following percpu variables are hot. Align current_task to
....@@ -1644,9 +1883,7 @@
16441883 &init_task;
16451884 EXPORT_PER_CPU_SYMBOL(current_task);
16461885
1647
-DEFINE_PER_CPU(char *, irq_stack_ptr) =
1648
- init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
1649
-
1886
+DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
16501887 DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
16511888
16521889 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
....@@ -1655,19 +1892,8 @@
16551892 /* May not be marked __init: used by software suspend */
16561893 void syscall_init(void)
16571894 {
1658
- extern char _entry_trampoline[];
1659
- extern char entry_SYSCALL_64_trampoline[];
1660
-
1661
- int cpu = smp_processor_id();
1662
- unsigned long SYSCALL64_entry_trampoline =
1663
- (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
1664
- (entry_SYSCALL_64_trampoline - _entry_trampoline);
1665
-
16661895 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
1667
- if (static_cpu_has(X86_FEATURE_PTI))
1668
- wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
1669
- else
1670
- wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
1896
+ wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
16711897
16721898 #ifdef CONFIG_IA32_EMULATION
16731899 wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
....@@ -1678,7 +1904,8 @@
16781904 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
16791905 */
16801906 wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
1681
- wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
1907
+ wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
1908
+ (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1));
16821909 wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
16831910 #else
16841911 wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
....@@ -1692,41 +1919,6 @@
16921919 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|
16931920 X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
16941921 }
1695
-
1696
-/*
1697
- * Copies of the original ist values from the tss are only accessed during
1698
- * debugging, no special alignment required.
1699
- */
1700
-DEFINE_PER_CPU(struct orig_ist, orig_ist);
1701
-
1702
-static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
1703
-DEFINE_PER_CPU(int, debug_stack_usage);
1704
-
1705
-int is_debug_stack(unsigned long addr)
1706
-{
1707
- return __this_cpu_read(debug_stack_usage) ||
1708
- (addr <= __this_cpu_read(debug_stack_addr) &&
1709
- addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ));
1710
-}
1711
-NOKPROBE_SYMBOL(is_debug_stack);
1712
-
1713
-DEFINE_PER_CPU(u32, debug_idt_ctr);
1714
-
1715
-void debug_stack_set_zero(void)
1716
-{
1717
- this_cpu_inc(debug_idt_ctr);
1718
- load_current_idt();
1719
-}
1720
-NOKPROBE_SYMBOL(debug_stack_set_zero);
1721
-
1722
-void debug_stack_reset(void)
1723
-{
1724
- if (WARN_ON(!this_cpu_read(debug_idt_ctr)))
1725
- return;
1726
- if (this_cpu_dec_return(debug_idt_ctr) == 0)
1727
- load_current_idt();
1728
-}
1729
-NOKPROBE_SYMBOL(debug_stack_reset);
17301922
17311923 #else /* CONFIG_X86_64 */
17321924
....@@ -1794,12 +1986,12 @@
17941986 }
17951987
17961988 #ifdef CONFIG_X86_64
1797
-static void setup_getcpu(int cpu)
1989
+static inline void setup_getcpu(int cpu)
17981990 {
17991991 unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu));
18001992 struct desc_struct d = { };
18011993
1802
- if (boot_cpu_has(X86_FEATURE_RDTSCP))
1994
+ if (boot_cpu_has(X86_FEATURE_RDTSCP) || boot_cpu_has(X86_FEATURE_RDPID))
18031995 write_rdtscp_aux(cpudata);
18041996
18051997 /* Store CPU and node number in limit. */
....@@ -1814,109 +2006,130 @@
18142006
18152007 write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_CPUNODE, &d, DESCTYPE_S);
18162008 }
2009
+
2010
+static inline void ucode_cpu_init(int cpu)
2011
+{
2012
+ if (cpu)
2013
+ load_ucode_ap();
2014
+}
2015
+
2016
+static inline void tss_setup_ist(struct tss_struct *tss)
2017
+{
2018
+ /* Set up the per-CPU TSS IST stacks */
2019
+ tss->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF);
2020
+ tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
2021
+ tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
2022
+ tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
2023
+ /* Only mapped when SEV-ES is active */
2024
+ tss->x86_tss.ist[IST_INDEX_VC] = __this_cpu_ist_top_va(VC);
2025
+}
2026
+
2027
+#else /* CONFIG_X86_64 */
2028
+
2029
+static inline void setup_getcpu(int cpu) { }
2030
+
2031
+static inline void ucode_cpu_init(int cpu)
2032
+{
2033
+ show_ucode_info_early();
2034
+}
2035
+
2036
+static inline void tss_setup_ist(struct tss_struct *tss) { }
2037
+
2038
+#endif /* !CONFIG_X86_64 */
2039
+
2040
+static inline void tss_setup_io_bitmap(struct tss_struct *tss)
2041
+{
2042
+ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
2043
+
2044
+#ifdef CONFIG_X86_IOPL_IOPERM
2045
+ tss->io_bitmap.prev_max = 0;
2046
+ tss->io_bitmap.prev_sequence = 0;
2047
+ memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap));
2048
+ /*
2049
+ * Invalidate the extra array entry past the end of the all
2050
+ * permission bitmap as required by the hardware.
2051
+ */
2052
+ tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL;
18172053 #endif
2054
+}
2055
+
2056
+/*
2057
+ * Setup everything needed to handle exceptions from the IDT, including the IST
2058
+ * exceptions which use paranoid_entry().
2059
+ */
2060
+void cpu_init_exception_handling(void)
2061
+{
2062
+ struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
2063
+ int cpu = raw_smp_processor_id();
2064
+
2065
+ /* paranoid_entry() gets the CPU number from the GDT */
2066
+ setup_getcpu(cpu);
2067
+
2068
+ /* IST vectors need TSS to be set up. */
2069
+ tss_setup_ist(tss);
2070
+ tss_setup_io_bitmap(tss);
2071
+ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
2072
+
2073
+ load_TR_desc();
2074
+
2075
+ /* Finally load the IDT */
2076
+ load_current_idt();
2077
+}
18182078
18192079 /*
18202080 * cpu_init() initializes state that is per-CPU. Some data is already
1821
- * initialized (naturally) in the bootstrap process, such as the GDT
1822
- * and IDT. We reload them nevertheless, this function acts as a
1823
- * 'CPU state barrier', nothing should get across.
1824
- * A lot of state is already set up in PDA init for 64 bit
2081
+ * initialized (naturally) in the bootstrap process, such as the GDT. We
2082
+ * reload it nevertheless, this function acts as a 'CPU state barrier',
2083
+ * nothing should get across.
18252084 */
1826
-#ifdef CONFIG_X86_64
1827
-
18282085 void cpu_init(void)
18292086 {
1830
- struct orig_ist *oist;
1831
- struct task_struct *me;
1832
- struct tss_struct *t;
1833
- unsigned long v;
2087
+ struct task_struct *cur = current;
18342088 int cpu = raw_smp_processor_id();
1835
- int i;
18362089
18372090 wait_for_master_cpu(cpu);
18382091
1839
- /*
1840
- * Initialize the CR4 shadow before doing anything that could
1841
- * try to read it.
1842
- */
1843
- cr4_init_shadow();
1844
-
1845
- if (cpu)
1846
- load_ucode_ap();
1847
-
1848
- t = &per_cpu(cpu_tss_rw, cpu);
1849
- oist = &per_cpu(orig_ist, cpu);
2092
+ ucode_cpu_init(cpu);
18502093
18512094 #ifdef CONFIG_NUMA
18522095 if (this_cpu_read(numa_node) == 0 &&
18532096 early_cpu_to_node(cpu) != NUMA_NO_NODE)
18542097 set_numa_node(early_cpu_to_node(cpu));
18552098 #endif
1856
- setup_getcpu(cpu);
1857
-
1858
- me = current;
1859
-
18602099 pr_debug("Initializing CPU#%d\n", cpu);
18612100
1862
- cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
2101
+ if (IS_ENABLED(CONFIG_X86_64) || cpu_feature_enabled(X86_FEATURE_VME) ||
2102
+ boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE))
2103
+ cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
18632104
18642105 /*
18652106 * Initialize the per-CPU GDT with the boot GDT,
18662107 * and set up the GDT descriptor:
18672108 */
1868
-
18692109 switch_to_new_gdt(cpu);
1870
- loadsegment(fs, 0);
18712110
1872
- load_current_idt();
2111
+ if (IS_ENABLED(CONFIG_X86_64)) {
2112
+ loadsegment(fs, 0);
2113
+ memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
2114
+ syscall_init();
18732115
1874
- memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
1875
- syscall_init();
2116
+ wrmsrl(MSR_FS_BASE, 0);
2117
+ wrmsrl(MSR_KERNEL_GS_BASE, 0);
2118
+ barrier();
18762119
1877
- wrmsrl(MSR_FS_BASE, 0);
1878
- wrmsrl(MSR_KERNEL_GS_BASE, 0);
1879
- barrier();
1880
-
1881
- x86_configure_nx();
1882
- x2apic_setup();
1883
-
1884
- /*
1885
- * set up and load the per-CPU TSS
1886
- */
1887
- if (!oist->ist[0]) {
1888
- char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
1889
-
1890
- for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1891
- estacks += exception_stack_sizes[v];
1892
- oist->ist[v] = t->x86_tss.ist[v] =
1893
- (unsigned long)estacks;
1894
- if (v == DEBUG_STACK-1)
1895
- per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
1896
- }
2120
+ x2apic_setup();
18972121 }
18982122
1899
- t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
1900
-
1901
- /*
1902
- * <= is required because the CPU will access up to
1903
- * 8 bits beyond the end of the IO permission bitmap.
1904
- */
1905
- for (i = 0; i <= IO_BITMAP_LONGS; i++)
1906
- t->io_bitmap[i] = ~0UL;
1907
-
19082123 mmgrab(&init_mm);
1909
- me->active_mm = &init_mm;
1910
- BUG_ON(me->mm);
2124
+ cur->active_mm = &init_mm;
2125
+ BUG_ON(cur->mm);
19112126 initialize_tlbstate_and_flush();
1912
- enter_lazy_tlb(&init_mm, me);
2127
+ enter_lazy_tlb(&init_mm, cur);
19132128
19142129 /*
1915
- * Initialize the TSS. sp0 points to the entry trampoline stack
1916
- * regardless of what task is running.
2130
+ * sp0 points to the entry trampoline stack regardless of what task
2131
+ * is running.
19172132 */
1918
- set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
1919
- load_TR_desc();
19202133 load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
19212134
19222135 load_mm_ldt(&init_mm);
....@@ -1924,7 +2137,7 @@
19242137 clear_all_debug_regs();
19252138 dbg_restore_debug_regs();
19262139
1927
- fpu__init_cpu();
2140
+ doublefault_init_cpu_tss();
19282141
19292142 if (is_uv_system())
19302143 uv_cpu_init();
....@@ -1932,112 +2145,141 @@
19322145 load_fixmap_gdt(cpu);
19332146 }
19342147
1935
-#else
1936
-
1937
-void cpu_init(void)
2148
+#ifdef CONFIG_SMP
2149
+void cpu_init_secondary(void)
19382150 {
1939
- int cpu = smp_processor_id();
1940
- struct task_struct *curr = current;
1941
- struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
1942
-
1943
- wait_for_master_cpu(cpu);
1944
-
19452151 /*
1946
- * Initialize the CR4 shadow before doing anything that could
1947
- * try to read it.
2152
+ * Relies on the BP having set-up the IDT tables, which are loaded
2153
+ * on this CPU in cpu_init_exception_handling().
19482154 */
1949
- cr4_init_shadow();
1950
-
1951
- show_ucode_info_early();
1952
-
1953
- pr_info("Initializing CPU#%d\n", cpu);
1954
-
1955
- if (cpu_feature_enabled(X86_FEATURE_VME) ||
1956
- boot_cpu_has(X86_FEATURE_TSC) ||
1957
- boot_cpu_has(X86_FEATURE_DE))
1958
- cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
1959
-
1960
- load_current_idt();
1961
- switch_to_new_gdt(cpu);
1962
-
1963
- /*
1964
- * Set up and load the per-CPU TSS and LDT
1965
- */
1966
- mmgrab(&init_mm);
1967
- curr->active_mm = &init_mm;
1968
- BUG_ON(curr->mm);
1969
- initialize_tlbstate_and_flush();
1970
- enter_lazy_tlb(&init_mm, curr);
1971
-
1972
- /*
1973
- * Initialize the TSS. sp0 points to the entry trampoline stack
1974
- * regardless of what task is running.
1975
- */
1976
- set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
1977
- load_TR_desc();
1978
- load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
1979
-
1980
- load_mm_ldt(&init_mm);
1981
-
1982
- t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
1983
-
1984
-#ifdef CONFIG_DOUBLEFAULT
1985
- /* Set up doublefault TSS pointer in the GDT */
1986
- __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
1987
-#endif
1988
-
1989
- clear_all_debug_regs();
1990
- dbg_restore_debug_regs();
1991
-
2155
+ cpu_init_exception_handling();
2156
+ cpu_init();
19922157 fpu__init_cpu();
1993
-
1994
- load_fixmap_gdt(cpu);
19952158 }
19962159 #endif
19972160
1998
-static void bsp_resume(void)
2161
+#ifdef CONFIG_MICROCODE_LATE_LOADING
2162
+/**
2163
+ * store_cpu_caps() - Store a snapshot of CPU capabilities
2164
+ * @curr_info: Pointer where to store it
2165
+ *
2166
+ * Returns: None
2167
+ */
2168
+void store_cpu_caps(struct cpuinfo_x86 *curr_info)
19992169 {
2000
- if (this_cpu->c_bsp_resume)
2001
- this_cpu->c_bsp_resume(&boot_cpu_data);
2170
+ /* Reload CPUID max function as it might've changed. */
2171
+ curr_info->cpuid_level = cpuid_eax(0);
2172
+
2173
+ /* Copy all capability leafs and pick up the synthetic ones. */
2174
+ memcpy(&curr_info->x86_capability, &boot_cpu_data.x86_capability,
2175
+ sizeof(curr_info->x86_capability));
2176
+
2177
+ /* Get the hardware CPUID leafs */
2178
+ get_cpu_cap(curr_info);
20022179 }
20032180
2004
-static struct syscore_ops cpu_syscore_ops = {
2005
- .resume = bsp_resume,
2006
-};
2007
-
2008
-static int __init init_cpu_syscore(void)
2009
-{
2010
- register_syscore_ops(&cpu_syscore_ops);
2011
- return 0;
2012
-}
2013
-core_initcall(init_cpu_syscore);
2014
-
2015
-/*
2181
+/**
2182
+ * microcode_check() - Check if any CPU capabilities changed after an update.
2183
+ * @prev_info: CPU capabilities stored before an update.
2184
+ *
20162185 * The microcode loader calls this upon late microcode load to recheck features,
20172186 * only when microcode has been updated. Caller holds microcode_mutex and CPU
20182187 * hotplug lock.
2188
+ *
2189
+ * Return: None
20192190 */
2020
-void microcode_check(void)
2191
+void microcode_check(struct cpuinfo_x86 *prev_info)
20212192 {
2022
- struct cpuinfo_x86 info;
2193
+ struct cpuinfo_x86 curr_info;
20232194
20242195 perf_check_microcode();
20252196
2026
- /* Reload CPUID max function as it might've changed. */
2027
- info.cpuid_level = cpuid_eax(0);
2197
+ amd_check_microcode();
20282198
2029
- /*
2030
- * Copy all capability leafs to pick up the synthetic ones so that
2031
- * memcmp() below doesn't fail on that. The ones coming from CPUID will
2032
- * get overwritten in get_cpu_cap().
2033
- */
2034
- memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability));
2199
+ store_cpu_caps(&curr_info);
20352200
2036
- get_cpu_cap(&info);
2037
-
2038
- if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)))
2201
+ if (!memcmp(&prev_info->x86_capability, &curr_info.x86_capability,
2202
+ sizeof(prev_info->x86_capability)))
20392203 return;
20402204
20412205 pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
20422206 pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
20432207 }
2208
+#endif
2209
+
2210
+/*
2211
+ * Invoked from core CPU hotplug code after hotplug operations
2212
+ */
2213
+void arch_smt_update(void)
2214
+{
2215
+ /* Handle the speculative execution misfeatures */
2216
+ cpu_bugs_smt_update();
2217
+ /* Check whether IPI broadcasting can be enabled */
2218
+ apic_smt_update();
2219
+}
2220
+
2221
+void __init arch_cpu_finalize_init(void)
2222
+{
2223
+ identify_boot_cpu();
2224
+
2225
+ /*
2226
+ * identify_boot_cpu() initialized SMT support information, let the
2227
+ * core code know.
2228
+ */
2229
+ cpu_smt_check_topology();
2230
+
2231
+ if (!IS_ENABLED(CONFIG_SMP)) {
2232
+ pr_info("CPU: ");
2233
+ print_cpu_info(&boot_cpu_data);
2234
+ }
2235
+
2236
+ cpu_select_mitigations();
2237
+
2238
+ arch_smt_update();
2239
+
2240
+ if (IS_ENABLED(CONFIG_X86_32)) {
2241
+ /*
2242
+ * Check whether this is a real i386 which is not longer
2243
+ * supported and fixup the utsname.
2244
+ */
2245
+ if (boot_cpu_data.x86 < 4)
2246
+ panic("Kernel requires i486+ for 'invlpg' and other features");
2247
+
2248
+ init_utsname()->machine[1] =
2249
+ '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
2250
+ }
2251
+
2252
+ /*
2253
+ * Must be before alternatives because it might set or clear
2254
+ * feature bits.
2255
+ */
2256
+ fpu__init_system();
2257
+ fpu__init_cpu();
2258
+
2259
+ alternative_instructions();
2260
+
2261
+ if (IS_ENABLED(CONFIG_X86_64)) {
2262
+ /*
2263
+ * Make sure the first 2MB area is not mapped by huge pages
2264
+ * There are typically fixed size MTRRs in there and overlapping
2265
+ * MTRRs into large pages causes slow downs.
2266
+ *
2267
+ * Right now we don't do that with gbpages because there seems
2268
+ * very little benefit for that case.
2269
+ */
2270
+ if (!direct_gbpages)
2271
+ set_memory_4k((unsigned long)__va(0), 1);
2272
+ } else {
2273
+ fpu__init_check_bugs();
2274
+ }
2275
+
2276
+ /*
2277
+ * This needs to be called before any devices perform DMA
2278
+ * operations that might use the SWIOTLB bounce buffers. It will
2279
+ * mark the bounce buffers as decrypted so that their usage will
2280
+ * not cause "plain-text" data to be decrypted when accessed. It
2281
+ * must be called after late_time_init() so that Hyper-V x86/x64
2282
+ * hypercalls work when the SWIOTLB bounce buffers are decrypted.
2283
+ */
2284
+ mem_encrypt_init();
2285
+}