hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/arch/x86/kernel/cpu/common.c
....@@ -18,11 +18,15 @@
1818 #include <linux/init.h>
1919 #include <linux/kprobes.h>
2020 #include <linux/kgdb.h>
21
+#include <linux/mem_encrypt.h>
2122 #include <linux/smp.h>
23
+#include <linux/cpu.h>
2224 #include <linux/io.h>
2325 #include <linux/syscore_ops.h>
2426 #include <linux/pgtable.h>
27
+#include <linux/utsname.h>
2528
29
+#include <asm/alternative.h>
2630 #include <asm/cmdline.h>
2731 #include <asm/stackprotector.h>
2832 #include <asm/perf_event.h>
....@@ -58,6 +62,7 @@
5862 #include <asm/intel-family.h>
5963 #include <asm/cpu_device_id.h>
6064 #include <asm/uv/uv.h>
65
+#include <asm/set_memory.h>
6166
6267 #include "cpu.h"
6368
....@@ -467,8 +472,6 @@
467472
468473 static __always_inline void setup_pku(struct cpuinfo_x86 *c)
469474 {
470
- struct pkru_state *pk;
471
-
472475 /* check the boot processor, plus compile options for PKU: */
473476 if (!cpu_feature_enabled(X86_FEATURE_PKU))
474477 return;
....@@ -479,9 +482,6 @@
479482 return;
480483
481484 cr4_set_bits(X86_CR4_PKE);
482
- pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU);
483
- if (pk)
484
- pk->pkru = init_pkru_value;
485485 /*
486486 * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
487487 * cpuid bit to be set. We need to ensure that we
....@@ -961,6 +961,12 @@
961961 if (c->extended_cpuid_level >= 0x8000000a)
962962 c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
963963
964
+ if (c->extended_cpuid_level >= 0x8000001f)
965
+ c->x86_capability[CPUID_8000_001F_EAX] = cpuid_eax(0x8000001f);
966
+
967
+ if (c->extended_cpuid_level >= 0x80000021)
968
+ c->x86_capability[CPUID_8000_0021_EAX] = cpuid_eax(0x80000021);
969
+
964970 init_scattered_cpuid_features(c);
965971 init_speculation_control(c);
966972
....@@ -1122,6 +1128,12 @@
11221128 #define MMIO_SBDS BIT(2)
11231129 /* CPU is affected by RETbleed, speculating where you would not expect it */
11241130 #define RETBLEED BIT(3)
1131
+/* CPU is affected by SMT (cross-thread) return predictions */
1132
+#define SMT_RSB BIT(4)
1133
+/* CPU is affected by SRSO */
1134
+#define SRSO BIT(5)
1135
+/* CPU is affected by GDS */
1136
+#define GDS BIT(6)
11251137
11261138 static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
11271139 VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
....@@ -1133,28 +1145,31 @@
11331145 VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS),
11341146 VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
11351147 VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),
1136
- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
1137
- VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED),
1138
- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
1139
- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
1140
- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
1148
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
1149
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
1150
+ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
1151
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
1152
+ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
11411153 VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED),
1142
- VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
1143
- VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO),
1144
- VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO),
1145
- VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
1154
+ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
1155
+ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS),
1156
+ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS),
1157
+ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
11461158 VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED),
1147
- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
1159
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
1160
+ VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS),
1161
+ VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS),
11481162 VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
1149
- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED),
1163
+ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
11501164 VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
11511165 VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),
11521166 VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
11531167
11541168 VULNBL_AMD(0x15, RETBLEED),
11551169 VULNBL_AMD(0x16, RETBLEED),
1156
- VULNBL_AMD(0x17, RETBLEED),
1170
+ VULNBL_AMD(0x17, RETBLEED | SRSO),
11571171 VULNBL_HYGON(0x18, RETBLEED),
1172
+ VULNBL_AMD(0x19, SRSO),
11581173 {}
11591174 };
11601175
....@@ -1271,6 +1286,21 @@
12711286 !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
12721287 !(ia32_cap & ARCH_CAP_PBRSB_NO))
12731288 setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
1289
+
1290
+ /*
1291
+ * Check if CPU is vulnerable to GDS. If running in a virtual machine on
1292
+ * an affected processor, the VMM may have disabled the use of GATHER by
1293
+ * disabling AVX2. The only way to do this in HW is to clear XCR0[2],
1294
+ * which means that AVX will be disabled.
1295
+ */
1296
+ if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) &&
1297
+ boot_cpu_has(X86_FEATURE_AVX))
1298
+ setup_force_cpu_bug(X86_BUG_GDS);
1299
+
1300
+ if (!cpu_has(c, X86_FEATURE_SRSO_NO)) {
1301
+ if (cpu_matches(cpu_vuln_blacklist, SRSO))
1302
+ setup_force_cpu_bug(X86_BUG_SRSO);
1303
+ }
12741304
12751305 if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
12761306 return;
....@@ -1412,8 +1442,6 @@
14121442 cpu_set_bug_bits(c);
14131443
14141444 cpu_set_core_cap_bits(c);
1415
-
1416
- fpu__init_system(c);
14171445
14181446 #ifdef CONFIG_X86_32
14191447 /*
....@@ -1792,6 +1820,8 @@
17921820 validate_apic_and_package_id(c);
17931821 x86_spec_ctrl_setup_ap();
17941822 update_srbds_msr();
1823
+ if (boot_cpu_has_bug(X86_BUG_GDS))
1824
+ update_gds_msr();
17951825 }
17961826
17971827 static __init int setup_noclflush(char *arg)
....@@ -2048,13 +2078,12 @@
20482078
20492079 /*
20502080 * cpu_init() initializes state that is per-CPU. Some data is already
2051
- * initialized (naturally) in the bootstrap process, such as the GDT
2052
- * and IDT. We reload them nevertheless, this function acts as a
2053
- * 'CPU state barrier', nothing should get across.
2081
+ * initialized (naturally) in the bootstrap process, such as the GDT. We
2082
+ * reload it nevertheless, this function acts as a 'CPU state barrier',
2083
+ * nothing should get across.
20542084 */
20552085 void cpu_init(void)
20562086 {
2057
- struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
20582087 struct task_struct *cur = current;
20592088 int cpu = raw_smp_processor_id();
20602089
....@@ -2067,8 +2096,6 @@
20672096 early_cpu_to_node(cpu) != NUMA_NO_NODE)
20682097 set_numa_node(early_cpu_to_node(cpu));
20692098 #endif
2070
- setup_getcpu(cpu);
2071
-
20722099 pr_debug("Initializing CPU#%d\n", cpu);
20732100
20742101 if (IS_ENABLED(CONFIG_X86_64) || cpu_feature_enabled(X86_FEATURE_VME) ||
....@@ -2080,7 +2107,6 @@
20802107 * and set up the GDT descriptor:
20812108 */
20822109 switch_to_new_gdt(cpu);
2083
- load_current_idt();
20842110
20852111 if (IS_ENABLED(CONFIG_X86_64)) {
20862112 loadsegment(fs, 0);
....@@ -2100,12 +2126,6 @@
21002126 initialize_tlbstate_and_flush();
21012127 enter_lazy_tlb(&init_mm, cur);
21022128
2103
- /* Initialize the TSS. */
2104
- tss_setup_ist(tss);
2105
- tss_setup_io_bitmap(tss);
2106
- set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
2107
-
2108
- load_TR_desc();
21092129 /*
21102130 * sp0 points to the entry trampoline stack regardless of what task
21112131 * is running.
....@@ -2119,43 +2139,73 @@
21192139
21202140 doublefault_init_cpu_tss();
21212141
2122
- fpu__init_cpu();
2123
-
21242142 if (is_uv_system())
21252143 uv_cpu_init();
21262144
21272145 load_fixmap_gdt(cpu);
21282146 }
21292147
2130
-/*
2148
+#ifdef CONFIG_SMP
2149
+void cpu_init_secondary(void)
2150
+{
2151
+ /*
2152
+ * Relies on the BP having set-up the IDT tables, which are loaded
2153
+ * on this CPU in cpu_init_exception_handling().
2154
+ */
2155
+ cpu_init_exception_handling();
2156
+ cpu_init();
2157
+ fpu__init_cpu();
2158
+}
2159
+#endif
2160
+
2161
+#ifdef CONFIG_MICROCODE_LATE_LOADING
2162
+/**
2163
+ * store_cpu_caps() - Store a snapshot of CPU capabilities
2164
+ * @curr_info: Pointer where to store it
2165
+ *
2166
+ * Returns: None
2167
+ */
2168
+void store_cpu_caps(struct cpuinfo_x86 *curr_info)
2169
+{
2170
+ /* Reload CPUID max function as it might've changed. */
2171
+ curr_info->cpuid_level = cpuid_eax(0);
2172
+
2173
+ /* Copy all capability leafs and pick up the synthetic ones. */
2174
+ memcpy(&curr_info->x86_capability, &boot_cpu_data.x86_capability,
2175
+ sizeof(curr_info->x86_capability));
2176
+
2177
+ /* Get the hardware CPUID leafs */
2178
+ get_cpu_cap(curr_info);
2179
+}
2180
+
2181
+/**
2182
+ * microcode_check() - Check if any CPU capabilities changed after an update.
2183
+ * @prev_info: CPU capabilities stored before an update.
2184
+ *
21312185 * The microcode loader calls this upon late microcode load to recheck features,
21322186 * only when microcode has been updated. Caller holds microcode_mutex and CPU
21332187 * hotplug lock.
2188
+ *
2189
+ * Return: None
21342190 */
2135
-void microcode_check(void)
2191
+void microcode_check(struct cpuinfo_x86 *prev_info)
21362192 {
2137
- struct cpuinfo_x86 info;
2193
+ struct cpuinfo_x86 curr_info;
21382194
21392195 perf_check_microcode();
21402196
2141
- /* Reload CPUID max function as it might've changed. */
2142
- info.cpuid_level = cpuid_eax(0);
2197
+ amd_check_microcode();
21432198
2144
- /*
2145
- * Copy all capability leafs to pick up the synthetic ones so that
2146
- * memcmp() below doesn't fail on that. The ones coming from CPUID will
2147
- * get overwritten in get_cpu_cap().
2148
- */
2149
- memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability));
2199
+ store_cpu_caps(&curr_info);
21502200
2151
- get_cpu_cap(&info);
2152
-
2153
- if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)))
2201
+ if (!memcmp(&prev_info->x86_capability, &curr_info.x86_capability,
2202
+ sizeof(prev_info->x86_capability)))
21542203 return;
21552204
21562205 pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
21572206 pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
21582207 }
2208
+#endif
21592209
21602210 /*
21612211 * Invoked from core CPU hotplug code after hotplug operations
....@@ -2167,3 +2217,69 @@
21672217 /* Check whether IPI broadcasting can be enabled */
21682218 apic_smt_update();
21692219 }
2220
+
2221
+void __init arch_cpu_finalize_init(void)
2222
+{
2223
+ identify_boot_cpu();
2224
+
2225
+ /*
2226
+ * identify_boot_cpu() initialized SMT support information, let the
2227
+ * core code know.
2228
+ */
2229
+ cpu_smt_check_topology();
2230
+
2231
+ if (!IS_ENABLED(CONFIG_SMP)) {
2232
+ pr_info("CPU: ");
2233
+ print_cpu_info(&boot_cpu_data);
2234
+ }
2235
+
2236
+ cpu_select_mitigations();
2237
+
2238
+ arch_smt_update();
2239
+
2240
+ if (IS_ENABLED(CONFIG_X86_32)) {
2241
+ /*
2242
+ * Check whether this is a real i386 which is not longer
2243
+ * supported and fixup the utsname.
2244
+ */
2245
+ if (boot_cpu_data.x86 < 4)
2246
+ panic("Kernel requires i486+ for 'invlpg' and other features");
2247
+
2248
+ init_utsname()->machine[1] =
2249
+ '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
2250
+ }
2251
+
2252
+ /*
2253
+ * Must be before alternatives because it might set or clear
2254
+ * feature bits.
2255
+ */
2256
+ fpu__init_system();
2257
+ fpu__init_cpu();
2258
+
2259
+ alternative_instructions();
2260
+
2261
+ if (IS_ENABLED(CONFIG_X86_64)) {
2262
+ /*
2263
+ * Make sure the first 2MB area is not mapped by huge pages
2264
+ * There are typically fixed size MTRRs in there and overlapping
2265
+ * MTRRs into large pages causes slow downs.
2266
+ *
2267
+ * Right now we don't do that with gbpages because there seems
2268
+ * very little benefit for that case.
2269
+ */
2270
+ if (!direct_gbpages)
2271
+ set_memory_4k((unsigned long)__va(0), 1);
2272
+ } else {
2273
+ fpu__init_check_bugs();
2274
+ }
2275
+
2276
+ /*
2277
+ * This needs to be called before any devices perform DMA
2278
+ * operations that might use the SWIOTLB bounce buffers. It will
2279
+ * mark the bounce buffers as decrypted so that their usage will
2280
+ * not cause "plain-text" data to be decrypted when accessed. It
2281
+ * must be called after late_time_init() so that Hyper-V x86/x64
2282
+ * hypercalls work when the SWIOTLB bounce buffers are decrypted.
2283
+ */
2284
+ mem_encrypt_init();
2285
+}