hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/arch/x86/include/asm/nospec-branch.h
....@@ -4,11 +4,16 @@
44 #define _ASM_X86_NOSPEC_BRANCH_H_
55
66 #include <linux/static_key.h>
7
+#include <linux/objtool.h>
8
+#include <linux/linkage.h>
79
810 #include <asm/alternative.h>
9
-#include <asm/alternative-asm.h>
1011 #include <asm/cpufeatures.h>
1112 #include <asm/msr-index.h>
13
+#include <asm/unwind_hints.h>
14
+#include <asm/percpu.h>
15
+
16
+#define RETPOLINE_THUNK_SIZE 32
1217
1318 /*
1419 * Fill the CPU return stack buffer.
....@@ -28,46 +33,61 @@
2833 */
2934
3035 #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
31
-#define RSB_FILL_LOOPS 16 /* To avoid underflow */
3236
3337 /*
38
+ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
39
+ */
40
+#define __FILL_RETURN_SLOT \
41
+ ANNOTATE_INTRA_FUNCTION_CALL; \
42
+ call 772f; \
43
+ int3; \
44
+772:
45
+
46
+/*
47
+ * Stuff the entire RSB.
48
+ *
3449 * Google experimented with loop-unrolling and this turned out to be
3550 * the optimal version — two calls, each with their own speculation
3651 * trap should their return address end up getting used, in a loop.
3752 */
38
-#define __FILL_RETURN_BUFFER(reg, nr, sp) \
39
- mov $(nr/2), reg; \
40
-771: \
41
- call 772f; \
42
-773: /* speculation trap */ \
43
- pause; \
44
- lfence; \
45
- jmp 773b; \
46
-772: \
47
- call 774f; \
48
-775: /* speculation trap */ \
49
- pause; \
50
- lfence; \
51
- jmp 775b; \
52
-774: \
53
- dec reg; \
54
- jnz 771b; \
55
- add $(BITS_PER_LONG/8) * nr, sp;
56
-
57
-#ifdef __ASSEMBLY__
53
+#ifdef CONFIG_X86_64
54
+#define __FILL_RETURN_BUFFER(reg, nr) \
55
+ mov $(nr/2), reg; \
56
+771: \
57
+ __FILL_RETURN_SLOT \
58
+ __FILL_RETURN_SLOT \
59
+ add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \
60
+ dec reg; \
61
+ jnz 771b; \
62
+ /* barrier for jnz misprediction */ \
63
+ lfence;
64
+#else
65
+/*
66
+ * i386 doesn't unconditionally have LFENCE, as such it can't
67
+ * do a loop.
68
+ */
69
+#define __FILL_RETURN_BUFFER(reg, nr) \
70
+ .rept nr; \
71
+ __FILL_RETURN_SLOT; \
72
+ .endr; \
73
+ add $(BITS_PER_LONG/8) * nr, %_ASM_SP;
74
+#endif
5875
5976 /*
60
- * This should be used immediately before a retpoline alternative. It tells
61
- * objtool where the retpolines are so that it can make sense of the control
62
- * flow by just reading the original instruction(s) and ignoring the
63
- * alternatives.
77
+ * Stuff a single RSB slot.
78
+ *
79
+ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
80
+ * forced to retire before letting a RET instruction execute.
81
+ *
82
+ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
83
+ * before this point.
6484 */
65
-.macro ANNOTATE_NOSPEC_ALTERNATIVE
66
- .Lannotate_\@:
67
- .pushsection .discard.nospec
68
- .long .Lannotate_\@ - .
69
- .popsection
70
-.endm
85
+#define __FILL_ONE_RETURN \
86
+ __FILL_RETURN_SLOT \
87
+ add $(BITS_PER_LONG/8), %_ASM_SP; \
88
+ lfence;
89
+
90
+#ifdef __ASSEMBLY__
7191
7292 /*
7393 * This should be used immediately before an indirect jump/call. It tells
....@@ -82,31 +102,20 @@
82102 .endm
83103
84104 /*
85
- * These are the bare retpoline primitives for indirect jmp and call.
86
- * Do not use these directly; they only exist to make the ALTERNATIVE
87
- * invocation below less ugly.
105
+ * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
106
+ * vs RETBleed validation.
88107 */
89
-.macro RETPOLINE_JMP reg:req
90
- call .Ldo_rop_\@
91
-.Lspec_trap_\@:
92
- pause
93
- lfence
94
- jmp .Lspec_trap_\@
95
-.Ldo_rop_\@:
96
- mov \reg, (%_ASM_SP)
97
- ret
98
-.endm
108
+#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE
99109
100110 /*
101
- * This is a wrapper around RETPOLINE_JMP so the called function in reg
102
- * returns to the instruction after the macro.
111
+ * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
112
+ * eventually turn into it's own annotation.
103113 */
104
-.macro RETPOLINE_CALL reg:req
105
- jmp .Ldo_call_\@
106
-.Ldo_retpoline_jmp_\@:
107
- RETPOLINE_JMP \reg
108
-.Ldo_call_\@:
109
- call .Ldo_retpoline_jmp_\@
114
+.macro ANNOTATE_UNRET_END
115
+#ifdef CONFIG_DEBUG_ENTRY
116
+ ANNOTATE_RETPOLINE_SAFE
117
+ nop
118
+#endif
110119 .endm
111120
112121 /*
....@@ -116,23 +125,21 @@
116125 */
117126 .macro JMP_NOSPEC reg:req
118127 #ifdef CONFIG_RETPOLINE
119
- ANNOTATE_NOSPEC_ALTERNATIVE
120
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \
121
- __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
122
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
128
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
129
+ __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
130
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE
123131 #else
124
- jmp *\reg
132
+ jmp *%\reg
125133 #endif
126134 .endm
127135
128136 .macro CALL_NOSPEC reg:req
129137 #ifdef CONFIG_RETPOLINE
130
- ANNOTATE_NOSPEC_ALTERNATIVE
131
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \
132
- __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
133
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
138
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
139
+ __stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
140
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_LFENCE
134141 #else
135
- call *\reg
142
+ call *%\reg
136143 #endif
137144 .endm
138145
....@@ -140,23 +147,41 @@
140147 * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
141148 * monstrosity above, manually.
142149 */
143
-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
144
-#ifdef CONFIG_RETPOLINE
145
- ANNOTATE_NOSPEC_ALTERNATIVE
146
- ALTERNATIVE "jmp .Lskip_rsb_\@", \
147
- __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
148
- \ftr
150
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
151
+ ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
152
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
153
+ __stringify(__FILL_ONE_RETURN), \ftr2
154
+
149155 .Lskip_rsb_\@:
156
+.endm
157
+
158
+#ifdef CONFIG_CPU_UNRET_ENTRY
159
+#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret"
160
+#else
161
+#define CALL_ZEN_UNTRAIN_RET ""
162
+#endif
163
+
164
+/*
165
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
166
+ * return thunk isn't mapped into the userspace tables (then again, AMD
167
+ * typically has NO_MELTDOWN).
168
+ *
169
+ * While zen_untrain_ret() doesn't clobber anything but requires stack,
170
+ * entry_ibpb() will clobber AX, CX, DX.
171
+ *
172
+ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
173
+ * where we have a stack but before any RET instruction.
174
+ */
175
+.macro UNTRAIN_RET
176
+#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY)
177
+ ANNOTATE_UNRET_END
178
+ ALTERNATIVE_2 "", \
179
+ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \
180
+ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
150181 #endif
151182 .endm
152183
153184 #else /* __ASSEMBLY__ */
154
-
155
-#define ANNOTATE_NOSPEC_ALTERNATIVE \
156
- "999:\n\t" \
157
- ".pushsection .discard.nospec\n\t" \
158
- ".long 999b - .\n\t" \
159
- ".popsection\n\t"
160185
161186 #define ANNOTATE_RETPOLINE_SAFE \
162187 "999:\n\t" \
....@@ -164,7 +189,21 @@
164189 _ASM_PTR " 999b\n\t" \
165190 ".popsection\n\t"
166191
192
+extern void __x86_return_thunk(void);
193
+extern void zen_untrain_ret(void);
194
+extern void entry_ibpb(void);
195
+
167196 #ifdef CONFIG_RETPOLINE
197
+
198
+typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
199
+
200
+#define GEN(reg) \
201
+ extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
202
+#include <asm/GEN-for-each-reg.h>
203
+#undef GEN
204
+
205
+extern retpoline_thunk_t __x86_indirect_thunk_array[];
206
+
168207 #ifdef CONFIG_X86_64
169208
170209 /*
....@@ -172,7 +211,6 @@
172211 * which is ensured when CONFIG_RETPOLINE is defined.
173212 */
174213 # define CALL_NOSPEC \
175
- ANNOTATE_NOSPEC_ALTERNATIVE \
176214 ALTERNATIVE_2( \
177215 ANNOTATE_RETPOLINE_SAFE \
178216 "call *%[thunk_target]\n", \
....@@ -181,7 +219,8 @@
181219 "lfence;\n" \
182220 ANNOTATE_RETPOLINE_SAFE \
183221 "call *%[thunk_target]\n", \
184
- X86_FEATURE_RETPOLINE_AMD)
222
+ X86_FEATURE_RETPOLINE_LFENCE)
223
+
185224 # define THUNK_TARGET(addr) [thunk_target] "r" (addr)
186225
187226 #else /* CONFIG_X86_32 */
....@@ -191,7 +230,6 @@
191230 * here, anyway.
192231 */
193232 # define CALL_NOSPEC \
194
- ANNOTATE_NOSPEC_ALTERNATIVE \
195233 ALTERNATIVE_2( \
196234 ANNOTATE_RETPOLINE_SAFE \
197235 "call *%[thunk_target]\n", \
....@@ -211,7 +249,7 @@
211249 "lfence;\n" \
212250 ANNOTATE_RETPOLINE_SAFE \
213251 "call *%[thunk_target]\n", \
214
- X86_FEATURE_RETPOLINE_AMD)
252
+ X86_FEATURE_RETPOLINE_LFENCE)
215253
216254 # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
217255 #endif
....@@ -223,9 +261,12 @@
223261 /* The Spectre V2 mitigation variants */
224262 enum spectre_v2_mitigation {
225263 SPECTRE_V2_NONE,
226
- SPECTRE_V2_RETPOLINE_GENERIC,
227
- SPECTRE_V2_RETPOLINE_AMD,
228
- SPECTRE_V2_IBRS_ENHANCED,
264
+ SPECTRE_V2_RETPOLINE,
265
+ SPECTRE_V2_LFENCE,
266
+ SPECTRE_V2_EIBRS,
267
+ SPECTRE_V2_EIBRS_RETPOLINE,
268
+ SPECTRE_V2_EIBRS_LFENCE,
269
+ SPECTRE_V2_IBRS,
229270 };
230271
231272 /* The indirect branch speculation control variants */
....@@ -248,27 +289,6 @@
248289 extern char __indirect_thunk_start[];
249290 extern char __indirect_thunk_end[];
250291
251
-/*
252
- * On VMEXIT we must ensure that no RSB predictions learned in the guest
253
- * can be followed in the host, by overwriting the RSB completely. Both
254
- * retpoline and IBRS mitigations for Spectre v2 need this; only on future
255
- * CPUs with IBRS_ALL *might* it be avoided.
256
- */
257
-static inline void vmexit_fill_RSB(void)
258
-{
259
-#ifdef CONFIG_RETPOLINE
260
- unsigned long loops;
261
-
262
- asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
263
- ALTERNATIVE("jmp 910f",
264
- __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
265
- X86_FEATURE_RETPOLINE)
266
- "910:"
267
- : "=r" (loops), ASM_CALL_CONSTRAINT
268
- : : "memory" );
269
-#endif
270
-}
271
-
272292 static __always_inline
273293 void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
274294 {
....@@ -289,6 +309,9 @@
289309
290310 /* The Intel SPEC CTRL MSR base value cache */
291311 extern u64 x86_spec_ctrl_base;
312
+DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
313
+extern void update_spec_ctrl_cond(u64 val);
314
+extern u64 spec_ctrl_current(void);
292315
293316 /*
294317 * With retpoline, we must use IBRS to restrict branch prediction
....@@ -298,18 +321,18 @@
298321 */
299322 #define firmware_restrict_branch_speculation_start() \
300323 do { \
301
- u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
302
- \
303324 preempt_disable(); \
304
- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
325
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
326
+ spec_ctrl_current() | SPEC_CTRL_IBRS, \
305327 X86_FEATURE_USE_IBRS_FW); \
328
+ alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \
329
+ X86_FEATURE_USE_IBPB_FW); \
306330 } while (0)
307331
308332 #define firmware_restrict_branch_speculation_end() \
309333 do { \
310
- u64 val = x86_spec_ctrl_base; \
311
- \
312
- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
334
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
335
+ spec_ctrl_current(), \
313336 X86_FEATURE_USE_IBRS_FW); \
314337 preempt_enable(); \
315338 } while (0)
....@@ -320,6 +343,8 @@
320343
321344 DECLARE_STATIC_KEY_FALSE(mds_user_clear);
322345 DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
346
+
347
+DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
323348
324349 #include <asm/segment.h>
325350
....@@ -369,64 +394,5 @@
369394 }
370395
371396 #endif /* __ASSEMBLY__ */
372
-
373
-/*
374
- * Below is used in the eBPF JIT compiler and emits the byte sequence
375
- * for the following assembly:
376
- *
377
- * With retpolines configured:
378
- *
379
- * callq do_rop
380
- * spec_trap:
381
- * pause
382
- * lfence
383
- * jmp spec_trap
384
- * do_rop:
385
- * mov %rax,(%rsp) for x86_64
386
- * mov %edx,(%esp) for x86_32
387
- * retq
388
- *
389
- * Without retpolines configured:
390
- *
391
- * jmp *%rax for x86_64
392
- * jmp *%edx for x86_32
393
- */
394
-#ifdef CONFIG_RETPOLINE
395
-# ifdef CONFIG_X86_64
396
-# define RETPOLINE_RAX_BPF_JIT_SIZE 17
397
-# define RETPOLINE_RAX_BPF_JIT() \
398
-do { \
399
- EMIT1_off32(0xE8, 7); /* callq do_rop */ \
400
- /* spec_trap: */ \
401
- EMIT2(0xF3, 0x90); /* pause */ \
402
- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
403
- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
404
- /* do_rop: */ \
405
- EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \
406
- EMIT1(0xC3); /* retq */ \
407
-} while (0)
408
-# else /* !CONFIG_X86_64 */
409
-# define RETPOLINE_EDX_BPF_JIT() \
410
-do { \
411
- EMIT1_off32(0xE8, 7); /* call do_rop */ \
412
- /* spec_trap: */ \
413
- EMIT2(0xF3, 0x90); /* pause */ \
414
- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
415
- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
416
- /* do_rop: */ \
417
- EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
418
- EMIT1(0xC3); /* ret */ \
419
-} while (0)
420
-# endif
421
-#else /* !CONFIG_RETPOLINE */
422
-# ifdef CONFIG_X86_64
423
-# define RETPOLINE_RAX_BPF_JIT_SIZE 2
424
-# define RETPOLINE_RAX_BPF_JIT() \
425
- EMIT2(0xFF, 0xE0); /* jmp *%rax */
426
-# else /* !CONFIG_X86_64 */
427
-# define RETPOLINE_EDX_BPF_JIT() \
428
- EMIT2(0xFF, 0xE2) /* jmp *%edx */
429
-# endif
430
-#endif
431397
432398 #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */