~hc/RK356X_SDK_RELEASE.git

..	..	@@ -37,34 +37,32 @@
37	37	#include <linux/mm.h>
38	38	#include <linux/smp.h>
39	39	#include <linux/io.h>
40		-
41		-#if defined(CONFIG_EDAC)
42		-#include <linux/edac.h>
43		-#endif
	40	+#include <linux/hardirq.h>
	41	+#include <linux/atomic.h>
44	42
45	43	#include <asm/stacktrace.h>
46	44	#include <asm/processor.h>
47	45	#include <asm/debugreg.h>
48		-#include <linux/atomic.h>
	46	+#include <asm/realmode.h>
49	47	#include <asm/text-patching.h>
50	48	#include <asm/ftrace.h>
51	49	#include <asm/traps.h>
52	50	#include <asm/desc.h>
53	51	#include <asm/fpu/internal.h>
	52	+#include <asm/cpu.h>
54	53	#include <asm/cpu_entry_area.h>
55	54	#include <asm/mce.h>
56	55	#include <asm/fixmap.h>
57	56	#include <asm/mach_traps.h>
58	57	#include <asm/alternative.h>
59	58	#include <asm/fpu/xstate.h>
60		-#include <asm/trace/mpx.h>
61		-#include <asm/mpx.h>
62	59	#include <asm/vm86.h>
63	60	#include <asm/umip.h>
	61	+#include <asm/insn.h>
	62	+#include <asm/insn-eval.h>
64	63
65	64	#ifdef CONFIG_X86_64
66	65	#include <asm/x86_init.h>
67		-#include <asm/pgalloc.h>
68	66	#include <asm/proto.h>
69	67	#else
70	68	#include <asm/processor-flags.h>
..	..	@@ -86,110 +84,20 @@
86	84	local_irq_disable();
87	85	}
88	86
89		-/*
90		- * In IST context, we explicitly disable preemption. This serves two
91		- * purposes: it makes it much less likely that we would accidentally
92		- * schedule in IST context and it will force a warning if we somehow
93		- * manage to schedule by accident.
94		- */
95		-void ist_enter(struct pt_regs *regs)
	87	+__always_inline int is_valid_bugaddr(unsigned long addr)
96	88	{
97		- if (user_mode(regs)) {
98		- RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
99		- } else {
100		- /*
101		- * We might have interrupted pretty much anything. In
102		- * fact, if we're a machine check, we can even interrupt
103		- * NMI processing. We don't want in_nmi() to return true,
104		- * but we need to notify RCU.
105		- */
106		- rcu_nmi_enter();
107		- }
108		-
109		- preempt_disable();
110		-
111		- /* This code is a bit fragile. Test it. */
112		- RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work");
113		-}
114		-
115		-void ist_exit(struct pt_regs *regs)
116		-{
117		- preempt_enable_no_resched();
118		-
119		- if (!user_mode(regs))
120		- rcu_nmi_exit();
121		-}
122		-
123		-/**
124		- * ist_begin_non_atomic() - begin a non-atomic section in an IST exception
125		- * @regs: regs passed to the IST exception handler
126		- *
127		- * IST exception handlers normally cannot schedule. As a special
128		- * exception, if the exception interrupted userspace code (i.e.
129		- * user_mode(regs) would return true) and the exception was not
130		- * a double fault, it can be safe to schedule. ist_begin_non_atomic()
131		- * begins a non-atomic section within an ist_enter()/ist_exit() region.
132		- * Callers are responsible for enabling interrupts themselves inside
133		- * the non-atomic section, and callers must call ist_end_non_atomic()
134		- * before ist_exit().
135		- */
136		-void ist_begin_non_atomic(struct pt_regs *regs)
137		-{
138		- BUG_ON(!user_mode(regs));
139		-
140		- /*
141		- * Sanity check: we need to be on the normal thread stack. This
142		- * will catch asm bugs and any attempt to use ist_preempt_enable
143		- * from double_fault.
144		- */
145		- BUG_ON(!on_thread_stack());
146		-
147		- preempt_enable_no_resched();
148		-}
149		-
150		-/**
151		- * ist_end_non_atomic() - begin a non-atomic section in an IST exception
152		- *
153		- * Ends a non-atomic section started with ist_begin_non_atomic().
154		- */
155		-void ist_end_non_atomic(void)
156		-{
157		- preempt_disable();
158		-}
159		-
160		-int is_valid_bugaddr(unsigned long addr)
161		-{
162		- unsigned short ud;
163		-
164	89	if (addr < TASK_SIZE_MAX)
165	90	return 0;
166	91
167		- if (probe_kernel_address((unsigned short *)addr, ud))
168		- return 0;
169		-
170		- return ud == INSN_UD0 \|\| ud == INSN_UD2;
171		-}
172		-
173		-int fixup_bug(struct pt_regs *regs, int trapnr)
174		-{
175		- if (trapnr != X86_TRAP_UD)
176		- return 0;
177		-
178		- switch (report_bug(regs->ip, regs)) {
179		- case BUG_TRAP_TYPE_NONE:
180		- case BUG_TRAP_TYPE_BUG:
181		- break;
182		-
183		- case BUG_TRAP_TYPE_WARN:
184		- regs->ip += LEN_UD2;
185		- return 1;
186		- }
187		-
188		- return 0;
	92	+ /*
	93	+ * We got #UD, if the text isn't readable we'd have gotten
	94	+ * a different exception.
	95	+ */
	96	+ return (unsigned short )addr == INSN_UD2;
189	97	}
190	98
191	99	static nokprobe_inline int
192		-do_trap_no_signal(struct task_struct tsk, int trapnr, char str,
	100	+do_trap_no_signal(struct task_struct tsk, int trapnr, const char str,
193	101	struct pt_regs *regs, long error_code)
194	102	{
195	103	if (v8086_mode(regs)) {
..	..	@@ -202,11 +110,8 @@
202	110	error_code, trapnr))
203	111	return 0;
204	112	}
205		- return -1;
206		- }
207		-
208		- if (!user_mode(regs)) {
209		- if (fixup_exception(regs, trapnr))
	113	+ } else if (!user_mode(regs)) {
	114	+ if (fixup_exception(regs, trapnr, error_code, 0))
210	115	return 0;
211	116
212	117	tsk->thread.error_code = error_code;
..	..	@@ -214,49 +119,6 @@
214	119	die(str, regs, error_code);
215	120	}
216	121
217		- return -1;
218		-}
219		-
220		-static siginfo_t fill_trap_info(struct pt_regs regs, int signr, int trapnr,
221		- siginfo_t *info)
222		-{
223		- unsigned long siaddr;
224		- int sicode;
225		-
226		- switch (trapnr) {
227		- default:
228		- return SEND_SIG_PRIV;
229		-
230		- case X86_TRAP_DE:
231		- sicode = FPE_INTDIV;
232		- siaddr = uprobe_get_trap_addr(regs);
233		- break;
234		- case X86_TRAP_UD:
235		- sicode = ILL_ILLOPN;
236		- siaddr = uprobe_get_trap_addr(regs);
237		- break;
238		- case X86_TRAP_AC:
239		- sicode = BUS_ADRALN;
240		- siaddr = 0;
241		- break;
242		- }
243		-
244		- info->si_signo = signr;
245		- info->si_errno = 0;
246		- info->si_code = sicode;
247		- info->si_addr = (void __user *)siaddr;
248		- return info;
249		-}
250		-
251		-static void
252		-do_trap(int trapnr, int signr, char str, struct pt_regs regs,
253		- long error_code, siginfo_t *info)
254		-{
255		- struct task_struct *tsk = current;
256		-
257		-
258		- if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code))
259		- return;
260	122	/*
261	123	* We want error_code and trap_nr set for userspace faults and
262	124	* kernelspace faults which result in die(), but not
..	..	@@ -264,61 +126,187 @@
264	126	* process no chance to handle the signal and notice the
265	127	* kernel fault information, so that won't result in polluting
266	128	* the information about previously queued, but not yet
267		- * delivered, faults. See also do_general_protection below.
	129	+ * delivered, faults. See also exc_general_protection below.
268	130	*/
269	131	tsk->thread.error_code = error_code;
270	132	tsk->thread.trap_nr = trapnr;
271	133
	134	+ return -1;
	135	+}
	136	+
	137	+static void show_signal(struct task_struct *tsk, int signr,
	138	+ const char type, const char desc,
	139	+ struct pt_regs *regs, long error_code)
	140	+{
272	141	if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
273	142	printk_ratelimit()) {
274		- pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx",
275		- tsk->comm, tsk->pid, str,
	143	+ pr_info("%s[%d] %s%s ip:%lx sp:%lx error:%lx",
	144	+ tsk->comm, task_pid_nr(tsk), type, desc,
276	145	regs->ip, regs->sp, error_code);
277	146	print_vma_addr(KERN_CONT " in ", regs->ip);
278	147	pr_cont("\n");
279	148	}
	149	+}
280	150
281		- force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk);
	151	+static void
	152	+do_trap(int trapnr, int signr, char str, struct pt_regs regs,
	153	+ long error_code, int sicode, void __user *addr)
	154	+{
	155	+ struct task_struct *tsk = current;
	156	+
	157	+ if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code))
	158	+ return;
	159	+
	160	+ show_signal(tsk, signr, "trap ", str, regs, error_code);
	161	+
	162	+ if (!sicode)
	163	+ force_sig(signr);
	164	+ else
	165	+ force_sig_fault(signr, sicode, addr);
282	166	}
283	167	NOKPROBE_SYMBOL(do_trap);
284	168
285	169	static void do_error_trap(struct pt_regs regs, long error_code, char str,
286		- unsigned long trapnr, int signr)
	170	+ unsigned long trapnr, int signr, int sicode, void __user *addr)
287	171	{
288		- siginfo_t info;
289		-
290	172	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
291		-
292		- /*
293		- * WARN*()s end up here; fix them up before we call the
294		- * notifier chain.
295		- */
296		- if (!user_mode(regs) && fixup_bug(regs, trapnr))
297		- return;
298	173
299	174	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
300	175	NOTIFY_STOP) {
301	176	cond_local_irq_enable(regs);
302		- clear_siginfo(&info);
303		- do_trap(trapnr, signr, str, regs, error_code,
304		- fill_trap_info(regs, signr, trapnr, &info));
	177	+ do_trap(trapnr, signr, str, regs, error_code, sicode, addr);
	178	+ cond_local_irq_disable(regs);
305	179	}
306	180	}
307	181
308		-#define DO_ERROR(trapnr, signr, str, name) \
309		-dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
310		-{ \
311		- do_error_trap(regs, error_code, str, trapnr, signr); \
	182	+/*
	183	+ * Posix requires to provide the address of the faulting instruction for
	184	+ * SIGILL (#UD) and SIGFPE (#DE) in the si_addr member of siginfo_t.
	185	+ *
	186	+ * This address is usually regs->ip, but when an uprobe moved the code out
	187	+ * of line then regs->ip points to the XOL code which would confuse
	188	+ * anything which analyzes the fault address vs. the unmodified binary. If
	189	+ * a trap happened in XOL code then uprobe maps regs->ip back to the
	190	+ * original instruction address.
	191	+ */
	192	+static __always_inline void __user error_get_trap_addr(struct pt_regs regs)
	193	+{
	194	+ return (void __user *)uprobe_get_trap_addr(regs);
312	195	}
313	196
314		-DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error)
315		-DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow)
316		-DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op)
317		-DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun)
318		-DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS)
319		-DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present)
320		-DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment)
321		-DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check)
	197	+DEFINE_IDTENTRY(exc_divide_error)
	198	+{
	199	+ do_error_trap(regs, 0, "divide error", X86_TRAP_DE, SIGFPE,
	200	+ FPE_INTDIV, error_get_trap_addr(regs));
	201	+}
	202	+
	203	+DEFINE_IDTENTRY(exc_overflow)
	204	+{
	205	+ do_error_trap(regs, 0, "overflow", X86_TRAP_OF, SIGSEGV, 0, NULL);
	206	+}
	207	+
	208	+#ifdef CONFIG_X86_F00F_BUG
	209	+void handle_invalid_op(struct pt_regs *regs)
	210	+#else
	211	+static inline void handle_invalid_op(struct pt_regs *regs)
	212	+#endif
	213	+{
	214	+ do_error_trap(regs, 0, "invalid opcode", X86_TRAP_UD, SIGILL,
	215	+ ILL_ILLOPN, error_get_trap_addr(regs));
	216	+}
	217	+
	218	+static noinstr bool handle_bug(struct pt_regs *regs)
	219	+{
	220	+ bool handled = false;
	221	+
	222	+ if (!is_valid_bugaddr(regs->ip))
	223	+ return handled;
	224	+
	225	+ /*
	226	+ * All lies, just get the WARN/BUG out.
	227	+ */
	228	+ instrumentation_begin();
	229	+ /*
	230	+ * Since we're emulating a CALL with exceptions, restore the interrupt
	231	+ * state to what it was at the exception site.
	232	+ */
	233	+ if (regs->flags & X86_EFLAGS_IF)
	234	+ raw_local_irq_enable();
	235	+ if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) {
	236	+ regs->ip += LEN_UD2;
	237	+ handled = true;
	238	+ }
	239	+ if (regs->flags & X86_EFLAGS_IF)
	240	+ raw_local_irq_disable();
	241	+ instrumentation_end();
	242	+
	243	+ return handled;
	244	+}
	245	+
	246	+DEFINE_IDTENTRY_RAW(exc_invalid_op)
	247	+{
	248	+ irqentry_state_t state;
	249	+
	250	+ /*
	251	+ * We use UD2 as a short encoding for 'CALL __WARN', as such
	252	+ * handle it before exception entry to avoid recursive WARN
	253	+ * in case exception entry is the one triggering WARNs.
	254	+ */
	255	+ if (!user_mode(regs) && handle_bug(regs))
	256	+ return;
	257	+
	258	+ state = irqentry_enter(regs);
	259	+ instrumentation_begin();
	260	+ handle_invalid_op(regs);
	261	+ instrumentation_end();
	262	+ irqentry_exit(regs, state);
	263	+}
	264	+
	265	+DEFINE_IDTENTRY(exc_coproc_segment_overrun)
	266	+{
	267	+ do_error_trap(regs, 0, "coprocessor segment overrun",
	268	+ X86_TRAP_OLD_MF, SIGFPE, 0, NULL);
	269	+}
	270	+
	271	+DEFINE_IDTENTRY_ERRORCODE(exc_invalid_tss)
	272	+{
	273	+ do_error_trap(regs, error_code, "invalid TSS", X86_TRAP_TS, SIGSEGV,
	274	+ 0, NULL);
	275	+}
	276	+
	277	+DEFINE_IDTENTRY_ERRORCODE(exc_segment_not_present)
	278	+{
	279	+ do_error_trap(regs, error_code, "segment not present", X86_TRAP_NP,
	280	+ SIGBUS, 0, NULL);
	281	+}
	282	+
	283	+DEFINE_IDTENTRY_ERRORCODE(exc_stack_segment)
	284	+{
	285	+ do_error_trap(regs, error_code, "stack segment", X86_TRAP_SS, SIGBUS,
	286	+ 0, NULL);
	287	+}
	288	+
	289	+DEFINE_IDTENTRY_ERRORCODE(exc_alignment_check)
	290	+{
	291	+ char *str = "alignment check";
	292	+
	293	+ if (notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_AC, SIGBUS) == NOTIFY_STOP)
	294	+ return;
	295	+
	296	+ if (!user_mode(regs))
	297	+ die("Split lock detected\n", regs, error_code);
	298	+
	299	+ local_irq_enable();
	300	+
	301	+ if (handle_user_split_lock(regs, error_code))
	302	+ goto out;
	303	+
	304	+ do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs,
	305	+ error_code, BUS_ADRALN, NULL);
	306	+
	307	+out:
	308	+ local_irq_disable();
	309	+}
322	310
323	311	#ifdef CONFIG_VMAP_STACK
324	312	__visible void __noreturn handle_stack_overflow(const char *message,
..	..	@@ -331,18 +319,36 @@
331	319	die(message, regs, 0);
332	320
333	321	/* Be absolutely certain we don't return. */
334		- panic(message);
	322	+ panic("%s", message);
335	323	}
336	324	#endif
337	325
338		-#ifdef CONFIG_X86_64
339		-/* Runs on IST stack */
340		-dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
	326	+/*
	327	+ * Runs on an IST stack for x86_64 and on a special task stack for x86_32.
	328	+ *
	329	+ * On x86_64, this is more or less a normal kernel entry. Notwithstanding the
	330	+ * SDM's warnings about double faults being unrecoverable, returning works as
	331	+ * expected. Presumably what the SDM actually means is that the CPU may get
	332	+ * the register state wrong on entry, so returning could be a bad idea.
	333	+ *
	334	+ * Various CPU engineers have promised that double faults due to an IRET fault
	335	+ * while the stack is read-only are, in fact, recoverable.
	336	+ *
	337	+ * On x86_32, this is entered through a task gate, and regs are synthesized
	338	+ * from the TSS. Returning is, in principle, okay, but changes to regs will
	339	+ * be lost. If, for some reason, we need to return to a context with modified
	340	+ * regs, the shim code could be adjusted to synchronize the registers.
	341	+ *
	342	+ * The 32bit #DF shim provides CR2 already as an argument. On 64bit it needs
	343	+ * to be read before doing anything else.
	344	+ */
	345	+DEFINE_IDTENTRY_DF(exc_double_fault)
341	346	{
342	347	static const char str[] = "double fault";
343	348	struct task_struct *tsk = current;
	349	+
344	350	#ifdef CONFIG_VMAP_STACK
345		- unsigned long cr2;
	351	+ unsigned long address = read_cr2();
346	352	#endif
347	353
348	354	#ifdef CONFIG_X86_ESPFIX64
..	..	@@ -360,13 +366,14 @@
360	366	* The net result is that our #GP handler will think that we
361	367	* entered from usermode with the bad user context.
362	368	*
363		- * No need for ist_enter here because we don't use RCU.
	369	+ * No need for nmi_enter() here because we don't use RCU.
364	370	*/
365	371	if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY &&
366	372	regs->cs == __KERNEL_CS &&
367	373	regs->ip == (unsigned long)native_irq_return_iret)
368	374	{
369	375	struct pt_regs gpregs = (struct pt_regs )this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
	376	+ unsigned long p = (unsigned long )regs->sp;
370	377
371	378	/*
372	379	* regs->sp points to the failing IRET frame on the
..	..	@@ -374,7 +381,11 @@
374	381	* in gpregs->ss through gpregs->ip.
375	382	*
376	383	*/
377		- memmove(&gpregs->ip, (void )regs->sp, 58);
	384	+ gpregs->ip = p[0];
	385	+ gpregs->cs = p[1];
	386	+ gpregs->flags = p[2];
	387	+ gpregs->sp = p[3];
	388	+ gpregs->ss = p[4];
378	389	gpregs->orig_ax = 0; /* Missing (lost) #GP error code */
379	390
380	391	/*
..	..	@@ -383,15 +394,20 @@
383	394	* we won't enable interupts or schedule before we invoke
384	395	* general_protection, so nothing will clobber the stack
385	396	* frame we just set up.
	397	+ *
	398	+ * We will enter general_protection with kernel GSBASE,
	399	+ * which is what the stub expects, given that the faulting
	400	+ * RIP will be the IRET instruction.
386	401	*/
387		- regs->ip = (unsigned long)general_protection;
	402	+ regs->ip = (unsigned long)asm_exc_general_protection;
388	403	regs->sp = (unsigned long)&gpregs->orig_ax;
389	404
390	405	return;
391	406	}
392	407	#endif
393	408
394		- ist_enter(regs);
	409	+ irqentry_nmi_enter(regs);
	410	+ instrumentation_begin();
395	411	notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
396	412
397	413	tsk->thread.error_code = error_code;
..	..	@@ -435,195 +451,251 @@
435	451	* stack even if the actual trigger for the double fault was
436	452	* something else.
437	453	*/
438		- cr2 = read_cr2();
439		- if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
440		- handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
	454	+ if ((unsigned long)task_stack_page(tsk) - 1 - address < PAGE_SIZE) {
	455	+ handle_stack_overflow("kernel stack overflow (double-fault)",
	456	+ regs, address);
	457	+ }
441	458	#endif
442	459
443		-#ifdef CONFIG_DOUBLEFAULT
444		- df_debug(regs, error_code);
445		-#endif
446		- /*
447		- * This is always a kernel trap and never fixable (and thus must
448		- * never return).
449		- */
450		- for (;;)
451		- die(str, regs, error_code);
	460	+ pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code);
	461	+ die("double fault", regs, error_code);
	462	+ panic("Machine halted.");
	463	+ instrumentation_end();
452	464	}
453		-#endif
454	465
455		-dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
	466	+DEFINE_IDTENTRY(exc_bounds)
456	467	{
457		- const struct mpx_bndcsr *bndcsr;
458		- siginfo_t *info;
459		-
460		- RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
461		- if (notify_die(DIE_TRAP, "bounds", regs, error_code,
	468	+ if (notify_die(DIE_TRAP, "bounds", regs, 0,
462	469	X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
463	470	return;
464	471	cond_local_irq_enable(regs);
465	472
466	473	if (!user_mode(regs))
467		- die("bounds", regs, error_code);
	474	+ die("bounds", regs, 0);
468	475
469		- if (!cpu_feature_enabled(X86_FEATURE_MPX)) {
470		- /* The exception is not from Intel MPX */
471		- goto exit_trap;
472		- }
	476	+ do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, 0, 0, NULL);
473	477
474		- /*
475		- * We need to look at BNDSTATUS to resolve this exception.
476		- * A NULL here might mean that it is in its 'init state',
477		- * which is all zeros which indicates MPX was not
478		- * responsible for the exception.
479		- */
480		- bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
481		- if (!bndcsr)
482		- goto exit_trap;
483		-
484		- trace_bounds_exception_mpx(bndcsr);
485		- /*
486		- * The error code field of the BNDSTATUS register communicates status
487		- * information of a bound range exception #BR or operation involving
488		- * bound directory.
489		- */
490		- switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) {
491		- case 2: /* Bound directory has invalid entry. */
492		- if (mpx_handle_bd_fault())
493		- goto exit_trap;
494		- break; /* Success, it was handled */
495		- case 1: /* Bound violation. */
496		- info = mpx_generate_siginfo(regs);
497		- if (IS_ERR(info)) {
498		- /*
499		- * We failed to decode the MPX instruction. Act as if
500		- * the exception was not caused by MPX.
501		- */
502		- goto exit_trap;
503		- }
504		- /*
505		- * Success, we decoded the instruction and retrieved
506		- * an 'info' containing the address being accessed
507		- * which caused the exception. This information
508		- * allows and application to possibly handle the
509		- * #BR exception itself.
510		- */
511		- do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, info);
512		- kfree(info);
513		- break;
514		- case 0: /* No exception caused by Intel MPX operations. */
515		- goto exit_trap;
516		- default:
517		- die("bounds", regs, error_code);
518		- }
519		-
520		- return;
521		-
522		-exit_trap:
523		- /*
524		- * This path out is for all the cases where we could not
525		- * handle the exception in some way (like allocating a
526		- * table or telling userspace about it. We will also end
527		- * up here if the kernel has MPX turned off at compile
528		- * time..
529		- */
530		- do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL);
	478	+ cond_local_irq_disable(regs);
531	479	}
532	480
533		-dotraplinkage void
534		-do_general_protection(struct pt_regs *regs, long error_code)
535		-{
536		- struct task_struct *tsk;
	481	+enum kernel_gp_hint {
	482	+ GP_NO_HINT,
	483	+ GP_NON_CANONICAL,
	484	+ GP_CANONICAL
	485	+};
537	486
538		- RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
	487	+/*
	488	+ * When an uncaught #GP occurs, try to determine the memory address accessed by
	489	+ * the instruction and return that address to the caller. Also, try to figure
	490	+ * out whether any part of the access to that address was non-canonical.
	491	+ */
	492	+static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
	493	+ unsigned long *addr)
	494	+{
	495	+ u8 insn_buf[MAX_INSN_SIZE];
	496	+ struct insn insn;
	497	+
	498	+ if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip,
	499	+ MAX_INSN_SIZE))
	500	+ return GP_NO_HINT;
	501	+
	502	+ kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
	503	+ insn_get_modrm(&insn);
	504	+ insn_get_sib(&insn);
	505	+
	506	+ *addr = (unsigned long)insn_get_addr_ref(&insn, regs);
	507	+ if (*addr == -1UL)
	508	+ return GP_NO_HINT;
	509	+
	510	+#ifdef CONFIG_X86_64
	511	+ /*
	512	+ * Check that:
	513	+ * - the operand is not in the kernel half
	514	+ * - the last byte of the operand is not in the user canonical half
	515	+ */
	516	+ if (*addr < ~__VIRTUAL_MASK &&
	517	+ *addr + insn.opnd_bytes - 1 > __VIRTUAL_MASK)
	518	+ return GP_NON_CANONICAL;
	519	+#endif
	520	+
	521	+ return GP_CANONICAL;
	522	+}
	523	+
	524	+#define GPFSTR "general protection fault"
	525	+
	526	+static bool fixup_iopl_exception(struct pt_regs *regs)
	527	+{
	528	+ struct thread_struct *t = &current->thread;
	529	+ unsigned char byte;
	530	+ unsigned long ip;
	531	+
	532	+ if (!IS_ENABLED(CONFIG_X86_IOPL_IOPERM) \|\| t->iopl_emul != 3)
	533	+ return false;
	534	+
	535	+ ip = insn_get_effective_ip(regs);
	536	+ if (!ip)
	537	+ return false;
	538	+
	539	+ if (get_user(byte, (const char __user *)ip))
	540	+ return false;
	541	+
	542	+ if (byte != 0xfa && byte != 0xfb)
	543	+ return false;
	544	+
	545	+ if (!t->iopl_warn && printk_ratelimit()) {
	546	+ pr_err("%s[%d] attempts to use CLI/STI, pretending it's a NOP, ip:%lx",
	547	+ current->comm, task_pid_nr(current), ip);
	548	+ print_vma_addr(KERN_CONT " in ", ip);
	549	+ pr_cont("\n");
	550	+ t->iopl_warn = 1;
	551	+ }
	552	+
	553	+ regs->ip += 1;
	554	+ return true;
	555	+}
	556	+
	557	+DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
	558	+{
	559	+ char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
	560	+ enum kernel_gp_hint hint = GP_NO_HINT;
	561	+ struct task_struct *tsk;
	562	+ unsigned long gp_addr;
	563	+ int ret;
	564	+
539	565	cond_local_irq_enable(regs);
540	566
541	567	if (static_cpu_has(X86_FEATURE_UMIP)) {
542	568	if (user_mode(regs) && fixup_umip_exception(regs))
543		- return;
	569	+ goto exit;
544	570	}
545	571
546	572	if (v8086_mode(regs)) {
547	573	local_irq_enable();
548	574	handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
	575	+ local_irq_disable();
549	576	return;
550	577	}
551	578
552	579	tsk = current;
553		- if (!user_mode(regs)) {
554		- if (fixup_exception(regs, X86_TRAP_GP))
555		- return;
	580	+
	581	+ if (user_mode(regs)) {
	582	+ if (fixup_iopl_exception(regs))
	583	+ goto exit;
556	584
557	585	tsk->thread.error_code = error_code;
558	586	tsk->thread.trap_nr = X86_TRAP_GP;
559		- if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
560		- X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
561		- die("general protection fault", regs, error_code);
562		- return;
	587	+
	588	+ show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
	589	+ force_sig(SIGSEGV);
	590	+ goto exit;
563	591	}
	592	+
	593	+ if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
	594	+ goto exit;
564	595
565	596	tsk->thread.error_code = error_code;
566	597	tsk->thread.trap_nr = X86_TRAP_GP;
567	598
568		- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
569		- printk_ratelimit()) {
570		- pr_info("%s[%d] general protection ip:%lx sp:%lx error:%lx",
571		- tsk->comm, task_pid_nr(tsk),
572		- regs->ip, regs->sp, error_code);
573		- print_vma_addr(KERN_CONT " in ", regs->ip);
574		- pr_cont("\n");
575		- }
576		-
577		- force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
578		-}
579		-NOKPROBE_SYMBOL(do_general_protection);
580		-
581		-dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
582		-{
583		-#ifdef CONFIG_DYNAMIC_FTRACE
584	599	/*
585		- * ftrace must be first, everything else may cause a recursive crash.
586		- * See note by declaration of modifying_ftrace_code in ftrace.c
	600	+ * To be potentially processing a kprobe fault and to trust the result
	601	+ * from kprobe_running(), we have to be non-preemptible.
587	602	*/
588		- if (unlikely(atomic_read(&modifying_ftrace_code)) &&
589		- ftrace_int3_handler(regs))
590		- return;
591		-#endif
592		- if (poke_int3_handler(regs))
593		- return;
594		-
595		- /*
596		- * Use ist_enter despite the fact that we don't use an IST stack.
597		- * We can be called from a kprobe in non-CONTEXT_KERNEL kernel
598		- * mode or even during context tracking state changes.
599		- *
600		- * This means that we can't schedule. That's okay.
601		- */
602		- ist_enter(regs);
603		- RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
604		-#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
605		- if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
606		- SIGTRAP) == NOTIFY_STOP)
	603	+ if (!preemptible() &&
	604	+ kprobe_running() &&
	605	+ kprobe_fault_handler(regs, X86_TRAP_GP))
607	606	goto exit;
	607	+
	608	+ ret = notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV);
	609	+ if (ret == NOTIFY_STOP)
	610	+ goto exit;
	611	+
	612	+ if (error_code)
	613	+ snprintf(desc, sizeof(desc), "segment-related " GPFSTR);
	614	+ else
	615	+ hint = get_kernel_gp_address(regs, &gp_addr);
	616	+
	617	+ if (hint != GP_NO_HINT)
	618	+ snprintf(desc, sizeof(desc), GPFSTR ", %s 0x%lx",
	619	+ (hint == GP_NON_CANONICAL) ? "probably for non-canonical address"
	620	+ : "maybe for address",
	621	+ gp_addr);
	622	+
	623	+ /*
	624	+ * KASAN is interested only in the non-canonical case, clear it
	625	+ * otherwise.
	626	+ */
	627	+ if (hint != GP_NON_CANONICAL)
	628	+ gp_addr = 0;
	629	+
	630	+ die_addr(desc, regs, error_code, gp_addr);
	631	+
	632	+exit:
	633	+ cond_local_irq_disable(regs);
	634	+}
	635	+
	636	+static bool do_int3(struct pt_regs *regs)
	637	+{
	638	+ int res;
	639	+
	640	+#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
	641	+ if (kgdb_ll_trap(DIE_INT3, "int3", regs, 0, X86_TRAP_BP,
	642	+ SIGTRAP) == NOTIFY_STOP)
	643	+ return true;
608	644	#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
609	645
610	646	#ifdef CONFIG_KPROBES
611	647	if (kprobe_int3_handler(regs))
612		- goto exit;
	648	+ return true;
613	649	#endif
	650	+ res = notify_die(DIE_INT3, "int3", regs, 0, X86_TRAP_BP, SIGTRAP);
614	651
615		- if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
616		- SIGTRAP) == NOTIFY_STOP)
617		- goto exit;
618		-
619		- cond_local_irq_enable(regs);
620		- do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
621		- cond_local_irq_disable(regs);
622		-
623		-exit:
624		- ist_exit(regs);
	652	+ return res == NOTIFY_STOP;
625	653	}
626	654	NOKPROBE_SYMBOL(do_int3);
	655	+
	656	+static void do_int3_user(struct pt_regs *regs)
	657	+{
	658	+ if (do_int3(regs))
	659	+ return;
	660	+
	661	+ cond_local_irq_enable(regs);
	662	+ do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, 0, 0, NULL);
	663	+ cond_local_irq_disable(regs);
	664	+}
	665	+
	666	+DEFINE_IDTENTRY_RAW(exc_int3)
	667	+{
	668	+ /*
	669	+ * poke_int3_handler() is completely self contained code; it does (and
	670	+ * must) NOT call out to anything, lest it hits upon yet another
	671	+ * INT3.
	672	+ */
	673	+ if (poke_int3_handler(regs))
	674	+ return;
	675	+
	676	+ /*
	677	+ * irqentry_enter_from_user_mode() uses static_branch_{,un}likely()
	678	+ * and therefore can trigger INT3, hence poke_int3_handler() must
	679	+ * be done before. If the entry came from kernel mode, then use
	680	+ * nmi_enter() because the INT3 could have been hit in any context
	681	+ * including NMI.
	682	+ */
	683	+ if (user_mode(regs)) {
	684	+ irqentry_enter_from_user_mode(regs);
	685	+ instrumentation_begin();
	686	+ do_int3_user(regs);
	687	+ instrumentation_end();
	688	+ irqentry_exit_to_user_mode(regs);
	689	+ } else {
	690	+ irqentry_state_t irq_state = irqentry_nmi_enter(regs);
	691	+
	692	+ instrumentation_begin();
	693	+ if (!do_int3(regs))
	694	+ die("int3", regs, 0);
	695	+ instrumentation_end();
	696	+ irqentry_nmi_exit(regs, irq_state);
	697	+ }
	698	+}
627	699
628	700	#ifdef CONFIG_X86_64
629	701	/*
..	..	@@ -631,21 +703,63 @@
631	703	* to switch to the normal thread stack if the interrupted code was in
632	704	* user mode. The actual stack switch is done in entry_64.S
633	705	*/
634		-asmlinkage __visible notrace struct pt_regs sync_regs(struct pt_regs eregs)
	706	+asmlinkage __visible noinstr struct pt_regs sync_regs(struct pt_regs eregs)
635	707	{
636	708	struct pt_regs regs = (struct pt_regs )this_cpu_read(cpu_current_top_of_stack) - 1;
637	709	if (regs != eregs)
638	710	regs = eregs;
639	711	return regs;
640	712	}
641		-NOKPROBE_SYMBOL(sync_regs);
	713	+
	714	+#ifdef CONFIG_AMD_MEM_ENCRYPT
	715	+asmlinkage __visible noinstr struct pt_regs vc_switch_off_ist(struct pt_regs regs)
	716	+{
	717	+ unsigned long sp, *stack;
	718	+ struct stack_info info;
	719	+ struct pt_regs *regs_ret;
	720	+
	721	+ /*
	722	+ * In the SYSCALL entry path the RSP value comes from user-space - don't
	723	+ * trust it and switch to the current kernel stack
	724	+ */
	725	+ if (ip_within_syscall_gap(regs)) {
	726	+ sp = this_cpu_read(cpu_current_top_of_stack);
	727	+ goto sync;
	728	+ }
	729	+
	730	+ /*
	731	+ * From here on the RSP value is trusted. Now check whether entry
	732	+ * happened from a safe stack. Not safe are the entry or unknown stacks,
	733	+ * use the fall-back stack instead in this case.
	734	+ */
	735	+ sp = regs->sp;
	736	+ stack = (unsigned long *)sp;
	737	+
	738	+ if (!get_stack_info_noinstr(stack, current, &info) \|\| info.type == STACK_TYPE_ENTRY \|\|
	739	+ info.type > STACK_TYPE_EXCEPTION_LAST)
	740	+ sp = __this_cpu_ist_top_va(VC2);
	741	+
	742	+sync:
	743	+ /*
	744	+ * Found a safe stack - switch to it as if the entry didn't happen via
	745	+ * IST stack. The code below only copies pt_regs, the real switch happens
	746	+ * in assembly code.
	747	+ */
	748	+ sp = ALIGN_DOWN(sp, 8) - sizeof(*regs_ret);
	749	+
	750	+ regs_ret = (struct pt_regs *)sp;
	751	+ regs_ret = regs;
	752	+
	753	+ return regs_ret;
	754	+}
	755	+#endif
642	756
643	757	struct bad_iret_stack {
644	758	void *error_entry_ret;
645	759	struct pt_regs regs;
646	760	};
647	761
648		-asmlinkage __visible notrace
	762	+asmlinkage __visible noinstr
649	763	struct bad_iret_stack fixup_bad_iret(struct bad_iret_stack s)
650	764	{
651	765	/*
..	..	@@ -656,19 +770,21 @@
656	770	* just below the IRET frame) and we want to pretend that the
657	771	* exception came from the IRET target.
658	772	*/
659		- struct bad_iret_stack *new_stack =
660		- (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
	773	+ struct bad_iret_stack tmp, *new_stack =
	774	+ (struct bad_iret_stack *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
661	775
662		- /* Copy the IRET target to the new stack. */
663		- memmove(&new_stack->regs.ip, (void )s->regs.sp, 58);
	776	+ /* Copy the IRET target to the temporary storage. */
	777	+ __memcpy(&tmp.regs.ip, (void )s->regs.sp, 58);
664	778
665	779	/* Copy the remainder of the stack from the current stack. */
666		- memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
	780	+ __memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip));
	781	+
	782	+ /* Update the entry stack */
	783	+ __memcpy(new_stack, &tmp, sizeof(tmp));
667	784
668	785	BUG_ON(!user_mode(&new_stack->regs));
669	786	return new_stack;
670	787	}
671		-NOKPROBE_SYMBOL(fixup_bad_iret);
672	788	#endif
673	789
674	790	static bool is_sysenter_singlestep(struct pt_regs *regs)
..	..	@@ -692,6 +808,28 @@
692	808	#else
693	809	return false;
694	810	#endif
	811	+}
	812	+
	813	+static __always_inline unsigned long debug_read_clear_dr6(void)
	814	+{
	815	+ unsigned long dr6;
	816	+
	817	+ /*
	818	+ * The Intel SDM says:
	819	+ *
	820	+ * Certain debug exceptions may clear bits 0-3. The remaining
	821	+ * contents of the DR6 register are never cleared by the
	822	+ * processor. To avoid confusion in identifying debug
	823	+ * exceptions, debug handlers should clear the register before
	824	+ * returning to the interrupted task.
	825	+ *
	826	+ * Keep it simple: clear DR6 immediately.
	827	+ */
	828	+ get_debugreg(dr6, 6);
	829	+ set_debugreg(DR6_RESERVED, 6);
	830	+ dr6 ^= DR6_RESERVED; /* Flip to positive polarity */
	831	+
	832	+ return dr6;
695	833	}
696	834
697	835	/*
..	..	@@ -718,136 +856,216 @@
718	856	*
719	857	* May run on IST stack.
720	858	*/
721		-dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
	859	+
	860	+static bool notify_debug(struct pt_regs regs, unsigned long dr6)
722	861	{
723		- struct task_struct *tsk = current;
724		- int user_icebp = 0;
725		- unsigned long dr6;
726		- int si_code;
727		-
728		- ist_enter(regs);
729		-
730		- get_debugreg(dr6, 6);
731	862	/*
732		- * The Intel SDM says:
	863	+ * Notifiers will clear bits in @dr6 to indicate the event has been
	864	+ * consumed - hw_breakpoint_handler(), single_stop_cont().
733	865	*
734		- * Certain debug exceptions may clear bits 0-3. The remaining
735		- * contents of the DR6 register are never cleared by the
736		- * processor. To avoid confusion in identifying debug
737		- * exceptions, debug handlers should clear the register before
738		- * returning to the interrupted task.
739		- *
740		- * Keep it simple: clear DR6 immediately.
	866	+ * Notifiers will set bits in @virtual_dr6 to indicate the desire
	867	+ * for signals - ptrace_triggered(), kgdb_hw_overflow_handler().
741	868	*/
742		- set_debugreg(0, 6);
	869	+ if (notify_die(DIE_DEBUG, "debug", regs, (long)dr6, 0, SIGTRAP) == NOTIFY_STOP)
	870	+ return true;
743	871
744		- /* Filter out all the reserved bits which are preset to 1 */
745		- dr6 &= ~DR6_RESERVED;
	872	+ return false;
	873	+}
	874	+
	875	+static __always_inline void exc_debug_kernel(struct pt_regs *regs,
	876	+ unsigned long dr6)
	877	+{
	878	+ /*
	879	+ * Disable breakpoints during exception handling; recursive exceptions
	880	+ * are exceedingly 'fun'.
	881	+ *
	882	+ * Since this function is NOKPROBE, and that also applies to
	883	+ * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
	884	+ * HW_BREAKPOINT_W on our stack)
	885	+ *
	886	+ * Entry text is excluded for HW_BP_X and cpu_entry_area, which
	887	+ * includes the entry stack is excluded for everything.
	888	+ */
	889	+ unsigned long dr7 = local_db_save();
	890	+ irqentry_state_t irq_state = irqentry_nmi_enter(regs);
	891	+ instrumentation_begin();
	892	+
	893	+ /*
	894	+ * If something gets miswired and we end up here for a user mode
	895	+ * #DB, we will malfunction.
	896	+ */
	897	+ WARN_ON_ONCE(user_mode(regs));
	898	+
	899	+ if (test_thread_flag(TIF_BLOCKSTEP)) {
	900	+ /*
	901	+ * The SDM says "The processor clears the BTF flag when it
	902	+ * generates a debug exception." but PTRACE_BLOCKSTEP requested
	903	+ * it for userspace, but we just took a kernel #DB, so re-set
	904	+ * BTF.
	905	+ */
	906	+ unsigned long debugctl;
	907	+
	908	+ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
	909	+ debugctl \|= DEBUGCTLMSR_BTF;
	910	+ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
	911	+ }
	912	+
	913	+ /*
	914	+ * Catch SYSENTER with TF set and clear DR_STEP. If this hit a
	915	+ * watchpoint at the same time then that will still be handled.
	916	+ */
	917	+ if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs))
	918	+ dr6 &= ~DR_STEP;
	919	+
	920	+ /*
	921	+ * The kernel doesn't use INT1
	922	+ */
	923	+ if (!dr6)
	924	+ goto out;
	925	+
	926	+ if (notify_debug(regs, &dr6))
	927	+ goto out;
	928	+
	929	+ /*
	930	+ * The kernel doesn't use TF single-step outside of:
	931	+ *
	932	+ * - Kprobes, consumed through kprobe_debug_handler()
	933	+ * - KGDB, consumed through notify_debug()
	934	+ *
	935	+ * So if we get here with DR_STEP set, something is wonky.
	936	+ *
	937	+ * A known way to trigger this is through QEMU's GDB stub,
	938	+ * which leaks #DB into the guest and causes IST recursion.
	939	+ */
	940	+ if (WARN_ON_ONCE(dr6 & DR_STEP))
	941	+ regs->flags &= ~X86_EFLAGS_TF;
	942	+out:
	943	+ instrumentation_end();
	944	+ irqentry_nmi_exit(regs, irq_state);
	945	+
	946	+ local_db_restore(dr7);
	947	+}
	948	+
	949	+static __always_inline void exc_debug_user(struct pt_regs *regs,
	950	+ unsigned long dr6)
	951	+{
	952	+ bool icebp;
	953	+
	954	+ /*
	955	+ * If something gets miswired and we end up here for a kernel mode
	956	+ * #DB, we will malfunction.
	957	+ */
	958	+ WARN_ON_ONCE(!user_mode(regs));
	959	+
	960	+ /*
	961	+ * NB: We can't easily clear DR7 here because
	962	+ * irqentry_exit_to_usermode() can invoke ptrace, schedule, access
	963	+ * user memory, etc. This means that a recursive #DB is possible. If
	964	+ * this happens, that #DB will hit exc_debug_kernel() and clear DR7.
	965	+ * Since we're not on the IST stack right now, everything will be
	966	+ * fine.
	967	+ */
	968	+
	969	+ irqentry_enter_from_user_mode(regs);
	970	+ instrumentation_begin();
	971	+
	972	+ /*
	973	+ * Start the virtual/ptrace DR6 value with just the DR_STEP mask
	974	+ * of the real DR6. ptrace_triggered() will set the DR_TRAPn bits.
	975	+ *
	976	+ * Userspace expects DR_STEP to be visible in ptrace_get_debugreg(6)
	977	+ * even if it is not the result of PTRACE_SINGLESTEP.
	978	+ */
	979	+ current->thread.virtual_dr6 = (dr6 & DR_STEP);
746	980
747	981	/*
748	982	* The SDM says "The processor clears the BTF flag when it
749	983	* generates a debug exception." Clear TIF_BLOCKSTEP to keep
750	984	* TIF_BLOCKSTEP in sync with the hardware BTF flag.
751	985	*/
752		- clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
753		-
754		- if (unlikely(!user_mode(regs) && (dr6 & DR_STEP) &&
755		- is_sysenter_singlestep(regs))) {
756		- dr6 &= ~DR_STEP;
757		- if (!dr6)
758		- goto exit;
759		- /*
760		- * else we might have gotten a single-step trap and hit a
761		- * watchpoint at the same time, in which case we should fall
762		- * through and handle the watchpoint.
763		- */
764		- }
	986	+ clear_thread_flag(TIF_BLOCKSTEP);
765	987
766	988	/*
767	989	* If dr6 has no reason to give us about the origin of this trap,
768	990	* then it's very likely the result of an icebp/int01 trap.
769	991	* User wants a sigtrap for that.
770	992	*/
771		- if (!dr6 && user_mode(regs))
772		- user_icebp = 1;
	993	+ icebp = !dr6;
773	994
774		- /* Store the virtualized DR6 value */
775		- tsk->thread.debugreg6 = dr6;
776		-
777		-#ifdef CONFIG_KPROBES
778		- if (kprobe_debug_handler(regs))
779		- goto exit;
780		-#endif
781		-
782		- if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code,
783		- SIGTRAP) == NOTIFY_STOP)
784		- goto exit;
785		-
786		- /*
787		- * Let others (NMI) know that the debug stack is in use
788		- * as we may switch to the interrupt stack.
789		- */
790		- debug_stack_usage_inc();
	995	+ if (notify_debug(regs, &dr6))
	996	+ goto out;
791	997
792	998	/* It's safe to allow irq's after DR6 has been saved */
793		- cond_local_irq_enable(regs);
	999	+ local_irq_enable();
794	1000
795	1001	if (v8086_mode(regs)) {
796		- handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
797		- X86_TRAP_DB);
798		- cond_local_irq_disable(regs);
799		- debug_stack_usage_dec();
800		- goto exit;
	1002	+ handle_vm86_trap((struct kernel_vm86_regs *)regs, 0, X86_TRAP_DB);
	1003	+ goto out_irq;
801	1004	}
802	1005
803		- if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
804		- /*
805		- * Historical junk that used to handle SYSENTER single-stepping.
806		- * This should be unreachable now. If we survive for a while
807		- * without anyone hitting this warning, we'll turn this into
808		- * an oops.
809		- */
810		- tsk->thread.debugreg6 &= ~DR_STEP;
811		- set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
812		- regs->flags &= ~X86_EFLAGS_TF;
813		- }
814		- si_code = get_si_code(tsk->thread.debugreg6);
815		- if (tsk->thread.debugreg6 & (DR_STEP \| DR_TRAP_BITS) \|\| user_icebp)
816		- send_sigtrap(tsk, regs, error_code, si_code);
817		- cond_local_irq_disable(regs);
818		- debug_stack_usage_dec();
	1006	+ /* Add the virtual_dr6 bits for signals. */
	1007	+ dr6 \|= current->thread.virtual_dr6;
	1008	+ if (dr6 & (DR_STEP \| DR_TRAP_BITS) \|\| icebp)
	1009	+ send_sigtrap(regs, 0, get_si_code(dr6));
819	1010
820		-exit:
821		- ist_exit(regs);
	1011	+out_irq:
	1012	+ local_irq_disable();
	1013	+out:
	1014	+ instrumentation_end();
	1015	+ irqentry_exit_to_user_mode(regs);
822	1016	}
823		-NOKPROBE_SYMBOL(do_debug);
	1017	+
	1018	+#ifdef CONFIG_X86_64
	1019	+/* IST stack entry */
	1020	+DEFINE_IDTENTRY_DEBUG(exc_debug)
	1021	+{
	1022	+ exc_debug_kernel(regs, debug_read_clear_dr6());
	1023	+}
	1024	+
	1025	+/* User entry, runs on regular task stack */
	1026	+DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
	1027	+{
	1028	+ exc_debug_user(regs, debug_read_clear_dr6());
	1029	+}
	1030	+#else
	1031	+/* 32 bit does not have separate entry points. */
	1032	+DEFINE_IDTENTRY_RAW(exc_debug)
	1033	+{
	1034	+ unsigned long dr6 = debug_read_clear_dr6();
	1035	+
	1036	+ if (user_mode(regs))
	1037	+ exc_debug_user(regs, dr6);
	1038	+ else
	1039	+ exc_debug_kernel(regs, dr6);
	1040	+}
	1041	+#endif
824	1042
825	1043	/*
826	1044	* Note that we play around with the 'TS' bit in an attempt to get
827	1045	* the correct behaviour even in the presence of the asynchronous
828	1046	* IRQ13 behaviour
829	1047	*/
830		-static void math_error(struct pt_regs *regs, int error_code, int trapnr)
	1048	+static void math_error(struct pt_regs *regs, int trapnr)
831	1049	{
832	1050	struct task_struct *task = current;
833	1051	struct fpu *fpu = &task->thread.fpu;
834		- siginfo_t info;
	1052	+ int si_code;
835	1053	char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" :
836	1054	"simd exception";
837	1055
838	1056	cond_local_irq_enable(regs);
839	1057
840	1058	if (!user_mode(regs)) {
841		- if (fixup_exception(regs, trapnr))
842		- return;
	1059	+ if (fixup_exception(regs, trapnr, 0, 0))
	1060	+ goto exit;
843	1061
844		- task->thread.error_code = error_code;
	1062	+ task->thread.error_code = 0;
845	1063	task->thread.trap_nr = trapnr;
846	1064
847		- if (notify_die(DIE_TRAP, str, regs, error_code,
848		- trapnr, SIGFPE) != NOTIFY_STOP)
849		- die(str, regs, error_code);
850		- return;
	1065	+ if (notify_die(DIE_TRAP, str, regs, 0, trapnr,
	1066	+ SIGFPE) != NOTIFY_STOP)
	1067	+ die(str, regs, 0);
	1068	+ goto exit;
851	1069	}
852	1070
853	1071	/*
..	..	@@ -856,61 +1074,78 @@
856	1074	fpu__save(fpu);
857	1075
858	1076	task->thread.trap_nr = trapnr;
859		- task->thread.error_code = error_code;
860		- clear_siginfo(&info);
861		- info.si_signo = SIGFPE;
862		- info.si_errno = 0;
863		- info.si_addr = (void __user *)uprobe_get_trap_addr(regs);
	1077	+ task->thread.error_code = 0;
864	1078
865		- info.si_code = fpu__exception_code(fpu, trapnr);
866		-
	1079	+ si_code = fpu__exception_code(fpu, trapnr);
867	1080	/* Retry when we get spurious exceptions: */
868		- if (!info.si_code)
869		- return;
	1081	+ if (!si_code)
	1082	+ goto exit;
870	1083
871		- force_sig_info(SIGFPE, &info, task);
	1084	+ force_sig_fault(SIGFPE, si_code,
	1085	+ (void __user *)uprobe_get_trap_addr(regs));
	1086	+exit:
	1087	+ cond_local_irq_disable(regs);
872	1088	}
873	1089
874		-dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
	1090	+DEFINE_IDTENTRY(exc_coprocessor_error)
875	1091	{
876		- RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
877		- math_error(regs, error_code, X86_TRAP_MF);
	1092	+ math_error(regs, X86_TRAP_MF);
878	1093	}
879	1094
880		-dotraplinkage void
881		-do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
	1095	+DEFINE_IDTENTRY(exc_simd_coprocessor_error)
882	1096	{
883		- RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
884		- math_error(regs, error_code, X86_TRAP_XF);
	1097	+ if (IS_ENABLED(CONFIG_X86_INVD_BUG)) {
	1098	+ /* AMD 486 bug: INVD in CPL 0 raises #XF instead of #GP */
	1099	+ if (!static_cpu_has(X86_FEATURE_XMM)) {
	1100	+ __exc_general_protection(regs, 0);
	1101	+ return;
	1102	+ }
	1103	+ }
	1104	+ math_error(regs, X86_TRAP_XF);
885	1105	}
886	1106
887		-dotraplinkage void
888		-do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
	1107	+DEFINE_IDTENTRY(exc_spurious_interrupt_bug)
889	1108	{
890		- cond_local_irq_enable(regs);
	1109	+ /*
	1110	+ * This addresses a Pentium Pro Erratum:
	1111	+ *
	1112	+ * PROBLEM: If the APIC subsystem is configured in mixed mode with
	1113	+ * Virtual Wire mode implemented through the local APIC, an
	1114	+ * interrupt vector of 0Fh (Intel reserved encoding) may be
	1115	+ * generated by the local APIC (Int 15). This vector may be
	1116	+ * generated upon receipt of a spurious interrupt (an interrupt
	1117	+ * which is removed before the system receives the INTA sequence)
	1118	+ * instead of the programmed 8259 spurious interrupt vector.
	1119	+ *
	1120	+ * IMPLICATION: The spurious interrupt vector programmed in the
	1121	+ * 8259 is normally handled by an operating system's spurious
	1122	+ * interrupt handler. However, a vector of 0Fh is unknown to some
	1123	+ * operating systems, which would crash if this erratum occurred.
	1124	+ *
	1125	+ * In theory this could be limited to 32bit, but the handler is not
	1126	+ * hurting and who knows which other CPUs suffer from this.
	1127	+ */
891	1128	}
892	1129
893		-dotraplinkage void
894		-do_device_not_available(struct pt_regs *regs, long error_code)
	1130	+DEFINE_IDTENTRY(exc_device_not_available)
895	1131	{
896		- unsigned long cr0;
897		-
898		- RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
	1132	+ unsigned long cr0 = read_cr0();
899	1133
900	1134	#ifdef CONFIG_MATH_EMULATION
901		- if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) {
	1135	+ if (!boot_cpu_has(X86_FEATURE_FPU) && (cr0 & X86_CR0_EM)) {
902	1136	struct math_emu_info info = { };
903	1137
904	1138	cond_local_irq_enable(regs);
905	1139
906	1140	info.regs = regs;
907	1141	math_emulate(&info);
	1142	+
	1143	+ cond_local_irq_disable(regs);
908	1144	return;
909	1145	}
910	1146	#endif
911	1147
912	1148	/* This should not happen. */
913		- cr0 = read_cr0();
914	1149	if (WARN(cr0 & X86_CR0_TS, "CR0.TS was set")) {
915	1150	/* Try to fix it up and carry on. */
916	1151	write_cr0(cr0 & ~X86_CR0_TS);
..	..	@@ -920,29 +1155,20 @@
920	1155	* to kill the task than getting stuck in a never-ending
921	1156	* loop of #NM faults.
922	1157	*/
923		- die("unexpected #NM exception", regs, error_code);
	1158	+ die("unexpected #NM exception", regs, 0);
924	1159	}
925	1160	}
926		-NOKPROBE_SYMBOL(do_device_not_available);
927	1161
928	1162	#ifdef CONFIG_X86_32
929		-dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
	1163	+DEFINE_IDTENTRY_SW(iret_error)
930	1164	{
931		- siginfo_t info;
932		-
933		- RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
934	1165	local_irq_enable();
935		-
936		- clear_siginfo(&info);
937		- info.si_signo = SIGILL;
938		- info.si_errno = 0;
939		- info.si_code = ILL_BADSTK;
940		- info.si_addr = NULL;
941		- if (notify_die(DIE_TRAP, "iret exception", regs, error_code,
	1166	+ if (notify_die(DIE_TRAP, "iret exception", regs, 0,
942	1167	X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) {
943		- do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
944		- &info);
	1168	+ do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, 0,
	1169	+ ILL_BADSTK, (void __user *)NULL);
945	1170	}
	1171	+ local_irq_disable();
946	1172	}
947	1173	#endif
948	1174
..	..	@@ -951,25 +1177,13 @@
951	1177	/* Init cpu_entry_area before IST entries are set up */
952	1178	setup_cpu_entry_areas();
953	1179
	1180	+ /* Init GHCB memory pages when running as an SEV-ES guest */
	1181	+ sev_es_init_vc_handling();
	1182	+
954	1183	idt_setup_traps();
955	1184
956		- /*
957		- * Set the IDT descriptor to a fixed read-only location, so that the
958		- * "sidt" instruction will not leak the location of the kernel, and
959		- * to defend the IDT against arbitrary memory write vulnerabilities.
960		- * It will be reloaded in cpu_init() */
961		- cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
962		- PAGE_KERNEL_RO);
963		- idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
964		-
965		- /*
966		- * Should be a barrier for any external CPU state:
967		- */
	1185	+ cpu_init_exception_handling();
968	1186	cpu_init();
969	1187
970	1188	idt_setup_ist_traps();
971		-
972		- x86_init.irqs.trap_init();
973		-
974		- idt_setup_debugidt_traps();
975	1189	}