~hc/RK356X_SDK_RELEASE.git

..	..	@@ -9,8 +9,9 @@
9	9	#include <asm/nospec-branch.h>
10	10	#include <asm/unwind_hints.h>
11	11	#include <asm/frame.h>
	12	+#include <asm/nops.h>
12	13
13		- .section .text.__x86.indirect_thunk
	14	+ .section .text..__x86.indirect_thunk
14	15
15	16	.macro RETPOLINE reg
16	17	ANNOTATE_INTRA_FUNCTION_CALL
..	..	@@ -73,36 +74,106 @@
73	74	*/
74	75	#ifdef CONFIG_RETHUNK
75	76
76		- .section .text.__x86.return_thunk
	77	+/*
	78	+ * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at
	79	+ * special addresses:
	80	+ *
	81	+ * - srso_alias_untrain_ret() is 2M aligned
	82	+ * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14
	83	+ * and 20 in its virtual address are set (while those bits in the
	84	+ * srso_alias_untrain_ret() function are cleared).
	85	+ *
	86	+ * This guarantees that those two addresses will alias in the branch
	87	+ * target buffer of Zen3/4 generations, leading to any potential
	88	+ * poisoned entries at that BTB slot to get evicted.
	89	+ *
	90	+ * As a result, srso_alias_safe_ret() becomes a safe return.
	91	+ */
	92	+#ifdef CONFIG_CPU_SRSO
	93	+ .section .text..__x86.rethunk_untrain
	94	+
	95	+SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
	96	+ UNWIND_HINT_FUNC
	97	+ ASM_NOP2
	98	+ lfence
	99	+ jmp srso_alias_return_thunk
	100	+SYM_FUNC_END(srso_alias_untrain_ret)
	101	+__EXPORT_THUNK(srso_alias_untrain_ret)
	102	+
	103	+ .section .text..__x86.rethunk_safe
	104	+#else
	105	+/* dummy definition for alternatives */
	106	+SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
	107	+ ANNOTATE_UNRET_SAFE
	108	+ ret
	109	+ int3
	110	+SYM_FUNC_END(srso_alias_untrain_ret)
	111	+#endif
	112	+
	113	+SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
	114	+ lea 8(%_ASM_SP), %_ASM_SP
	115	+ UNWIND_HINT_FUNC
	116	+ ANNOTATE_UNRET_SAFE
	117	+ ret
	118	+ int3
	119	+SYM_FUNC_END(srso_alias_safe_ret)
	120	+
	121	+ .section .text..__x86.return_thunk
	122	+
	123	+SYM_CODE_START(srso_alias_return_thunk)
	124	+ UNWIND_HINT_FUNC
	125	+ ANNOTATE_NOENDBR
	126	+ call srso_alias_safe_ret
	127	+ ud2
	128	+SYM_CODE_END(srso_alias_return_thunk)
	129	+
	130	+/*
	131	+ * Some generic notes on the untraining sequences:
	132	+ *
	133	+ * They are interchangeable when it comes to flushing potentially wrong
	134	+ * RET predictions from the BTB.
	135	+ *
	136	+ * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the
	137	+ * Retbleed sequence because the return sequence done there
	138	+ * (srso_safe_ret()) is longer and the return sequence must fully nest
	139	+ * (end before) the untraining sequence. Therefore, the untraining
	140	+ * sequence must fully overlap the return sequence.
	141	+ *
	142	+ * Regarding alignment - the instructions which need to be untrained,
	143	+ * must all start at a cacheline boundary for Zen1/2 generations. That
	144	+ * is, instruction sequences starting at srso_safe_ret() and
	145	+ * the respective instruction sequences at retbleed_return_thunk()
	146	+ * must start at a cacheline boundary.
	147	+ */
77	148
78	149	/*
79	150	* Safety details here pertain to the AMD Zen{1,2} microarchitecture:
80		- * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
	151	+ * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for
81	152	* alignment within the BTB.
82		- * 2) The instruction at zen_untrain_ret must contain, and not
	153	+ * 2) The instruction at retbleed_untrain_ret must contain, and not
83	154	* end with, the 0xc3 byte of the RET.
84	155	* 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
85	156	* from re-poisioning the BTB prediction.
86	157	*/
87	158	.align 64
88		- .skip 63, 0xcc
89		-SYM_FUNC_START_NOALIGN(zen_untrain_ret);
	159	+ .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc
	160	+SYM_FUNC_START_NOALIGN(retbleed_untrain_ret);
90	161
91	162	/*
92		- * As executed from zen_untrain_ret, this is:
	163	+ * As executed from retbleed_untrain_ret, this is:
93	164	*
94	165	* TEST $0xcc, %bl
95	166	* LFENCE
96		- * JMP __x86_return_thunk
	167	+ * JMP retbleed_return_thunk
97	168	*
98	169	* Executing the TEST instruction has a side effect of evicting any BTB
99	170	* prediction (potentially attacker controlled) attached to the RET, as
100		- * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
	171	+ * retbleed_return_thunk + 1 isn't an instruction boundary at the moment.
101	172	*/
102	173	.byte 0xf6
103	174
104	175	/*
105		- * As executed from __x86_return_thunk, this is a plain RET.
	176	+ * As executed from retbleed_return_thunk, this is a plain RET.
106	177	*
107	178	* As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
108	179	*
..	..	@@ -114,13 +185,13 @@
114	185	* With SMT enabled and STIBP active, a sibling thread cannot poison
115	186	* RET's prediction to a type of its choice, but can evict the
116	187	* prediction due to competitive sharing. If the prediction is
117		- * evicted, __x86_return_thunk will suffer Straight Line Speculation
	188	+ * evicted, retbleed_return_thunk will suffer Straight Line Speculation
118	189	* which will be contained safely by the INT3.
119	190	*/
120		-SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
	191	+SYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL)
121	192	ret
122	193	int3
123		-SYM_CODE_END(__x86_return_thunk)
	194	+SYM_CODE_END(retbleed_return_thunk)
124	195
125	196	/*
126	197	* Ensure the TEST decoding / BTB invalidation is complete.
..	..	@@ -131,11 +202,66 @@
131	202	* Jump back and execute the RET in the middle of the TEST instruction.
132	203	* INT3 is for SLS protection.
133	204	*/
134		- jmp __x86_return_thunk
	205	+ jmp retbleed_return_thunk
135	206	int3
136		-SYM_FUNC_END(zen_untrain_ret)
137		-__EXPORT_THUNK(zen_untrain_ret)
	207	+SYM_FUNC_END(retbleed_untrain_ret)
	208	+__EXPORT_THUNK(retbleed_untrain_ret)
138	209
	210	+/*
	211	+ * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret()
	212	+ * above. On kernel entry, srso_untrain_ret() is executed which is a
	213	+ *
	214	+ * movabs $0xccccc30824648d48,%rax
	215	+ *
	216	+ * and when the return thunk executes the inner label srso_safe_ret()
	217	+ * later, it is a stack manipulation and a RET which is mispredicted and
	218	+ * thus a "safe" one to use.
	219	+ */
	220	+ .align 64
	221	+ .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc
	222	+SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
	223	+ .byte 0x48, 0xb8
	224	+
	225	+/*
	226	+ * This forces the function return instruction to speculate into a trap
	227	+ * (UD2 in srso_return_thunk() below). This RET will then mispredict
	228	+ * and execution will continue at the return site read from the top of
	229	+ * the stack.
	230	+ */
	231	+SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
	232	+ lea 8(%_ASM_SP), %_ASM_SP
	233	+ ret
	234	+ int3
	235	+ int3
	236	+ /* end of movabs */
	237	+ lfence
	238	+ call srso_safe_ret
	239	+ ud2
	240	+SYM_CODE_END(srso_safe_ret)
	241	+SYM_FUNC_END(srso_untrain_ret)
	242	+__EXPORT_THUNK(srso_untrain_ret)
	243	+
	244	+SYM_CODE_START(srso_return_thunk)
	245	+ UNWIND_HINT_FUNC
	246	+ ANNOTATE_NOENDBR
	247	+ call srso_safe_ret
	248	+ ud2
	249	+SYM_CODE_END(srso_return_thunk)
	250	+
	251	+SYM_FUNC_START(entry_untrain_ret)
	252	+ ALTERNATIVE_2 "jmp retbleed_untrain_ret", \
	253	+ "jmp srso_untrain_ret", X86_FEATURE_SRSO, \
	254	+ "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
	255	+SYM_FUNC_END(entry_untrain_ret)
	256	+__EXPORT_THUNK(entry_untrain_ret)
	257	+
	258	+SYM_CODE_START(__x86_return_thunk)
	259	+ UNWIND_HINT_FUNC
	260	+ ANNOTATE_NOENDBR
	261	+ ANNOTATE_UNRET_SAFE
	262	+ ret
	263	+ int3
	264	+SYM_CODE_END(__x86_return_thunk)
139	265	EXPORT_SYMBOL(__x86_return_thunk)
140	266
141	267	#endif /* CONFIG_RETHUNK */