hc
2024-10-12 a5969cabbb4660eab42b6ef0412cbbd1200cf14d
kernel/arch/x86/include/asm/nospec-branch.h
....@@ -4,11 +4,16 @@
44 #define _ASM_X86_NOSPEC_BRANCH_H_
55
66 #include <linux/static_key.h>
7
+#include <linux/objtool.h>
8
+#include <linux/linkage.h>
79
810 #include <asm/alternative.h>
9
-#include <asm/alternative-asm.h>
1011 #include <asm/cpufeatures.h>
1112 #include <asm/msr-index.h>
13
+#include <asm/unwind_hints.h>
14
+#include <asm/percpu.h>
15
+
16
+#define RETPOLINE_THUNK_SIZE 32
1217
1318 /*
1419 * Fill the CPU return stack buffer.
....@@ -28,46 +33,61 @@
2833 */
2934
3035 #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
31
-#define RSB_FILL_LOOPS 16 /* To avoid underflow */
3236
3337 /*
38
+ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
39
+ */
40
+#define __FILL_RETURN_SLOT \
41
+ ANNOTATE_INTRA_FUNCTION_CALL; \
42
+ call 772f; \
43
+ int3; \
44
+772:
45
+
46
+/*
47
+ * Stuff the entire RSB.
48
+ *
3449 * Google experimented with loop-unrolling and this turned out to be
3550 * the optimal version — two calls, each with their own speculation
3651 * trap should their return address end up getting used, in a loop.
3752 */
38
-#define __FILL_RETURN_BUFFER(reg, nr, sp) \
39
- mov $(nr/2), reg; \
40
-771: \
41
- call 772f; \
42
-773: /* speculation trap */ \
43
- pause; \
44
- lfence; \
45
- jmp 773b; \
46
-772: \
47
- call 774f; \
48
-775: /* speculation trap */ \
49
- pause; \
50
- lfence; \
51
- jmp 775b; \
52
-774: \
53
- dec reg; \
54
- jnz 771b; \
55
- add $(BITS_PER_LONG/8) * nr, sp;
56
-
57
-#ifdef __ASSEMBLY__
53
+#ifdef CONFIG_X86_64
54
+#define __FILL_RETURN_BUFFER(reg, nr) \
55
+ mov $(nr/2), reg; \
56
+771: \
57
+ __FILL_RETURN_SLOT \
58
+ __FILL_RETURN_SLOT \
59
+ add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \
60
+ dec reg; \
61
+ jnz 771b; \
62
+ /* barrier for jnz misprediction */ \
63
+ lfence;
64
+#else
65
+/*
66
+ * i386 doesn't unconditionally have LFENCE, as such it can't
67
+ * do a loop.
68
+ */
69
+#define __FILL_RETURN_BUFFER(reg, nr) \
70
+ .rept nr; \
71
+ __FILL_RETURN_SLOT; \
72
+ .endr; \
73
+ add $(BITS_PER_LONG/8) * nr, %_ASM_SP;
74
+#endif
5875
5976 /*
60
- * This should be used immediately before a retpoline alternative. It tells
61
- * objtool where the retpolines are so that it can make sense of the control
62
- * flow by just reading the original instruction(s) and ignoring the
63
- * alternatives.
77
+ * Stuff a single RSB slot.
78
+ *
79
+ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
80
+ * forced to retire before letting a RET instruction execute.
81
+ *
82
+ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
83
+ * before this point.
6484 */
65
-.macro ANNOTATE_NOSPEC_ALTERNATIVE
66
- .Lannotate_\@:
67
- .pushsection .discard.nospec
68
- .long .Lannotate_\@ - .
69
- .popsection
70
-.endm
85
+#define __FILL_ONE_RETURN \
86
+ __FILL_RETURN_SLOT \
87
+ add $(BITS_PER_LONG/8), %_ASM_SP; \
88
+ lfence;
89
+
90
+#ifdef __ASSEMBLY__
7191
7292 /*
7393 * This should be used immediately before an indirect jump/call. It tells
....@@ -82,31 +102,20 @@
82102 .endm
83103
84104 /*
85
- * These are the bare retpoline primitives for indirect jmp and call.
86
- * Do not use these directly; they only exist to make the ALTERNATIVE
87
- * invocation below less ugly.
105
+ * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
106
+ * vs RETBleed validation.
88107 */
89
-.macro RETPOLINE_JMP reg:req
90
- call .Ldo_rop_\@
91
-.Lspec_trap_\@:
92
- pause
93
- lfence
94
- jmp .Lspec_trap_\@
95
-.Ldo_rop_\@:
96
- mov \reg, (%_ASM_SP)
97
- ret
98
-.endm
108
+#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE
99109
100110 /*
101
- * This is a wrapper around RETPOLINE_JMP so the called function in reg
102
- * returns to the instruction after the macro.
111
+ * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
112
+ * eventually turn into it's own annotation.
103113 */
104
-.macro RETPOLINE_CALL reg:req
105
- jmp .Ldo_call_\@
106
-.Ldo_retpoline_jmp_\@:
107
- RETPOLINE_JMP \reg
108
-.Ldo_call_\@:
109
- call .Ldo_retpoline_jmp_\@
114
+.macro ANNOTATE_UNRET_END
115
+#if (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO))
116
+ ANNOTATE_RETPOLINE_SAFE
117
+ nop
118
+#endif
110119 .endm
111120
112121 /*
....@@ -116,23 +125,21 @@
116125 */
117126 .macro JMP_NOSPEC reg:req
118127 #ifdef CONFIG_RETPOLINE
119
- ANNOTATE_NOSPEC_ALTERNATIVE
120
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \
121
- __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
122
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
128
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
129
+ __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
130
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE
123131 #else
124
- jmp *\reg
132
+ jmp *%\reg
125133 #endif
126134 .endm
127135
128136 .macro CALL_NOSPEC reg:req
129137 #ifdef CONFIG_RETPOLINE
130
- ANNOTATE_NOSPEC_ALTERNATIVE
131
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \
132
- __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
133
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
138
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
139
+ __stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
140
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_LFENCE
134141 #else
135
- call *\reg
142
+ call *%\reg
136143 #endif
137144 .endm
138145
....@@ -140,23 +147,42 @@
140147 * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
141148 * monstrosity above, manually.
142149 */
143
-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
144
-#ifdef CONFIG_RETPOLINE
145
- ANNOTATE_NOSPEC_ALTERNATIVE
146
- ALTERNATIVE "jmp .Lskip_rsb_\@", \
147
- __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
148
- \ftr
150
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
151
+ ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
152
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
153
+ __stringify(__FILL_ONE_RETURN), \ftr2
154
+
149155 .Lskip_rsb_\@:
156
+.endm
157
+
158
+#ifdef CONFIG_CPU_UNRET_ENTRY
159
+#define CALL_UNTRAIN_RET "call entry_untrain_ret"
160
+#else
161
+#define CALL_UNTRAIN_RET ""
162
+#endif
163
+
164
+/*
165
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
166
+ * return thunk isn't mapped into the userspace tables (then again, AMD
167
+ * typically has NO_MELTDOWN).
168
+ *
169
+ * While retbleed_untrain_ret() doesn't clobber anything but requires stack,
170
+ * entry_ibpb() will clobber AX, CX, DX.
171
+ *
172
+ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
173
+ * where we have a stack but before any RET instruction.
174
+ */
175
+.macro UNTRAIN_RET
176
+#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
177
+ defined(CONFIG_CPU_SRSO)
178
+ ANNOTATE_UNRET_END
179
+ ALTERNATIVE_2 "", \
180
+ CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
181
+ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
150182 #endif
151183 .endm
152184
153185 #else /* __ASSEMBLY__ */
154
-
155
-#define ANNOTATE_NOSPEC_ALTERNATIVE \
156
- "999:\n\t" \
157
- ".pushsection .discard.nospec\n\t" \
158
- ".long 999b - .\n\t" \
159
- ".popsection\n\t"
160186
161187 #define ANNOTATE_RETPOLINE_SAFE \
162188 "999:\n\t" \
....@@ -164,7 +190,34 @@
164190 _ASM_PTR " 999b\n\t" \
165191 ".popsection\n\t"
166192
193
+#ifdef CONFIG_RETHUNK
194
+extern void __x86_return_thunk(void);
195
+#else
196
+static inline void __x86_return_thunk(void) {}
197
+#endif
198
+
199
+extern void retbleed_return_thunk(void);
200
+extern void srso_return_thunk(void);
201
+extern void srso_alias_return_thunk(void);
202
+
203
+extern void retbleed_untrain_ret(void);
204
+extern void srso_untrain_ret(void);
205
+extern void srso_alias_untrain_ret(void);
206
+
207
+extern void entry_untrain_ret(void);
208
+extern void entry_ibpb(void);
209
+
167210 #ifdef CONFIG_RETPOLINE
211
+
212
+typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
213
+
214
+#define GEN(reg) \
215
+ extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
216
+#include <asm/GEN-for-each-reg.h>
217
+#undef GEN
218
+
219
+extern retpoline_thunk_t __x86_indirect_thunk_array[];
220
+
168221 #ifdef CONFIG_X86_64
169222
170223 /*
....@@ -172,7 +225,6 @@
172225 * which is ensured when CONFIG_RETPOLINE is defined.
173226 */
174227 # define CALL_NOSPEC \
175
- ANNOTATE_NOSPEC_ALTERNATIVE \
176228 ALTERNATIVE_2( \
177229 ANNOTATE_RETPOLINE_SAFE \
178230 "call *%[thunk_target]\n", \
....@@ -181,7 +233,8 @@
181233 "lfence;\n" \
182234 ANNOTATE_RETPOLINE_SAFE \
183235 "call *%[thunk_target]\n", \
184
- X86_FEATURE_RETPOLINE_AMD)
236
+ X86_FEATURE_RETPOLINE_LFENCE)
237
+
185238 # define THUNK_TARGET(addr) [thunk_target] "r" (addr)
186239
187240 #else /* CONFIG_X86_32 */
....@@ -191,7 +244,6 @@
191244 * here, anyway.
192245 */
193246 # define CALL_NOSPEC \
194
- ANNOTATE_NOSPEC_ALTERNATIVE \
195247 ALTERNATIVE_2( \
196248 ANNOTATE_RETPOLINE_SAFE \
197249 "call *%[thunk_target]\n", \
....@@ -211,7 +263,7 @@
211263 "lfence;\n" \
212264 ANNOTATE_RETPOLINE_SAFE \
213265 "call *%[thunk_target]\n", \
214
- X86_FEATURE_RETPOLINE_AMD)
266
+ X86_FEATURE_RETPOLINE_LFENCE)
215267
216268 # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
217269 #endif
....@@ -223,9 +275,12 @@
223275 /* The Spectre V2 mitigation variants */
224276 enum spectre_v2_mitigation {
225277 SPECTRE_V2_NONE,
226
- SPECTRE_V2_RETPOLINE_GENERIC,
227
- SPECTRE_V2_RETPOLINE_AMD,
228
- SPECTRE_V2_IBRS_ENHANCED,
278
+ SPECTRE_V2_RETPOLINE,
279
+ SPECTRE_V2_LFENCE,
280
+ SPECTRE_V2_EIBRS,
281
+ SPECTRE_V2_EIBRS_RETPOLINE,
282
+ SPECTRE_V2_EIBRS_LFENCE,
283
+ SPECTRE_V2_IBRS,
229284 };
230285
231286 /* The indirect branch speculation control variants */
....@@ -248,27 +303,6 @@
248303 extern char __indirect_thunk_start[];
249304 extern char __indirect_thunk_end[];
250305
251
-/*
252
- * On VMEXIT we must ensure that no RSB predictions learned in the guest
253
- * can be followed in the host, by overwriting the RSB completely. Both
254
- * retpoline and IBRS mitigations for Spectre v2 need this; only on future
255
- * CPUs with IBRS_ALL *might* it be avoided.
256
- */
257
-static inline void vmexit_fill_RSB(void)
258
-{
259
-#ifdef CONFIG_RETPOLINE
260
- unsigned long loops;
261
-
262
- asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
263
- ALTERNATIVE("jmp 910f",
264
- __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
265
- X86_FEATURE_RETPOLINE)
266
- "910:"
267
- : "=r" (loops), ASM_CALL_CONSTRAINT
268
- : : "memory" );
269
-#endif
270
-}
271
-
272306 static __always_inline
273307 void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
274308 {
....@@ -280,15 +314,18 @@
280314 : "memory");
281315 }
282316
317
+extern u64 x86_pred_cmd;
318
+
283319 static inline void indirect_branch_prediction_barrier(void)
284320 {
285
- u64 val = PRED_CMD_IBPB;
286
-
287
- alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
321
+ alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_USE_IBPB);
288322 }
289323
290324 /* The Intel SPEC CTRL MSR base value cache */
291325 extern u64 x86_spec_ctrl_base;
326
+DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
327
+extern void update_spec_ctrl_cond(u64 val);
328
+extern u64 spec_ctrl_current(void);
292329
293330 /*
294331 * With retpoline, we must use IBRS to restrict branch prediction
....@@ -298,18 +335,18 @@
298335 */
299336 #define firmware_restrict_branch_speculation_start() \
300337 do { \
301
- u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
302
- \
303338 preempt_disable(); \
304
- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
339
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
340
+ spec_ctrl_current() | SPEC_CTRL_IBRS, \
305341 X86_FEATURE_USE_IBRS_FW); \
342
+ alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \
343
+ X86_FEATURE_USE_IBPB_FW); \
306344 } while (0)
307345
308346 #define firmware_restrict_branch_speculation_end() \
309347 do { \
310
- u64 val = x86_spec_ctrl_base; \
311
- \
312
- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
348
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
349
+ spec_ctrl_current(), \
313350 X86_FEATURE_USE_IBRS_FW); \
314351 preempt_enable(); \
315352 } while (0)
....@@ -320,6 +357,8 @@
320357
321358 DECLARE_STATIC_KEY_FALSE(mds_user_clear);
322359 DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
360
+
361
+DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
323362
324363 #include <asm/segment.h>
325364
....@@ -369,64 +408,5 @@
369408 }
370409
371410 #endif /* __ASSEMBLY__ */
372
-
373
-/*
374
- * Below is used in the eBPF JIT compiler and emits the byte sequence
375
- * for the following assembly:
376
- *
377
- * With retpolines configured:
378
- *
379
- * callq do_rop
380
- * spec_trap:
381
- * pause
382
- * lfence
383
- * jmp spec_trap
384
- * do_rop:
385
- * mov %rax,(%rsp) for x86_64
386
- * mov %edx,(%esp) for x86_32
387
- * retq
388
- *
389
- * Without retpolines configured:
390
- *
391
- * jmp *%rax for x86_64
392
- * jmp *%edx for x86_32
393
- */
394
-#ifdef CONFIG_RETPOLINE
395
-# ifdef CONFIG_X86_64
396
-# define RETPOLINE_RAX_BPF_JIT_SIZE 17
397
-# define RETPOLINE_RAX_BPF_JIT() \
398
-do { \
399
- EMIT1_off32(0xE8, 7); /* callq do_rop */ \
400
- /* spec_trap: */ \
401
- EMIT2(0xF3, 0x90); /* pause */ \
402
- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
403
- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
404
- /* do_rop: */ \
405
- EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \
406
- EMIT1(0xC3); /* retq */ \
407
-} while (0)
408
-# else /* !CONFIG_X86_64 */
409
-# define RETPOLINE_EDX_BPF_JIT() \
410
-do { \
411
- EMIT1_off32(0xE8, 7); /* call do_rop */ \
412
- /* spec_trap: */ \
413
- EMIT2(0xF3, 0x90); /* pause */ \
414
- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
415
- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
416
- /* do_rop: */ \
417
- EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
418
- EMIT1(0xC3); /* ret */ \
419
-} while (0)
420
-# endif
421
-#else /* !CONFIG_RETPOLINE */
422
-# ifdef CONFIG_X86_64
423
-# define RETPOLINE_RAX_BPF_JIT_SIZE 2
424
-# define RETPOLINE_RAX_BPF_JIT() \
425
- EMIT2(0xFF, 0xE0); /* jmp *%rax */
426
-# else /* !CONFIG_X86_64 */
427
-# define RETPOLINE_EDX_BPF_JIT() \
428
- EMIT2(0xFF, 0xE2) /* jmp *%edx */
429
-# endif
430
-#endif
431411
432412 #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */