hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/arch/x86/entry/entry_32.S
....@@ -40,42 +40,16 @@
4040 #include <asm/processor-flags.h>
4141 #include <asm/irq_vectors.h>
4242 #include <asm/cpufeatures.h>
43
-#include <asm/alternative-asm.h>
43
+#include <asm/alternative.h>
4444 #include <asm/asm.h>
4545 #include <asm/smap.h>
4646 #include <asm/frame.h>
47
+#include <asm/trapnr.h>
4748 #include <asm/nospec-branch.h>
4849
50
+#include "calling.h"
51
+
4952 .section .entry.text, "ax"
50
-
51
-/*
52
- * We use macros for low-level operations which need to be overridden
53
- * for paravirtualization. The following will never clobber any registers:
54
- * INTERRUPT_RETURN (aka. "iret")
55
- * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
56
- * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
57
- *
58
- * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
59
- * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
60
- * Allowing a register to be clobbered can shrink the paravirt replacement
61
- * enough to patch inline, increasing performance.
62
- */
63
-
64
-#ifdef CONFIG_PREEMPT
65
-# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
66
-#else
67
-# define preempt_stop(clobbers)
68
-# define resume_kernel restore_all_kernel
69
-#endif
70
-
71
-.macro TRACE_IRQS_IRET
72
-#ifdef CONFIG_TRACE_IRQFLAGS
73
- testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off?
74
- jz 1f
75
- TRACE_IRQS_ON
76
-1:
77
-#endif
78
-.endm
7953
8054 #define PTI_SWITCH_MASK (1 << PAGE_SHIFT)
8155
....@@ -171,7 +145,7 @@
171145 ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
172146 .if \no_user_check == 0
173147 /* coming from usermode? */
174
- testl $SEGMENT_RPL_MASK, PT_CS(%esp)
148
+ testl $USER_SEGMENT_RPL_MASK, PT_CS(%esp)
175149 jz .Lend_\@
176150 .endif
177151 /* On user-cr3? */
....@@ -201,10 +175,126 @@
201175 .Lend_\@:
202176 .endm
203177
204
-.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
178
+#define CS_FROM_ENTRY_STACK (1 << 31)
179
+#define CS_FROM_USER_CR3 (1 << 30)
180
+#define CS_FROM_KERNEL (1 << 29)
181
+#define CS_FROM_ESPFIX (1 << 28)
182
+
183
+.macro FIXUP_FRAME
184
+ /*
185
+ * The high bits of the CS dword (__csh) are used for CS_FROM_*.
186
+ * Clear them in case hardware didn't do this for us.
187
+ */
188
+ andl $0x0000ffff, 4*4(%esp)
189
+
190
+#ifdef CONFIG_VM86
191
+ testl $X86_EFLAGS_VM, 5*4(%esp)
192
+ jnz .Lfrom_usermode_no_fixup_\@
193
+#endif
194
+ testl $USER_SEGMENT_RPL_MASK, 4*4(%esp)
195
+ jnz .Lfrom_usermode_no_fixup_\@
196
+
197
+ orl $CS_FROM_KERNEL, 4*4(%esp)
198
+
199
+ /*
200
+ * When we're here from kernel mode; the (exception) stack looks like:
201
+ *
202
+ * 6*4(%esp) - <previous context>
203
+ * 5*4(%esp) - flags
204
+ * 4*4(%esp) - cs
205
+ * 3*4(%esp) - ip
206
+ * 2*4(%esp) - orig_eax
207
+ * 1*4(%esp) - gs / function
208
+ * 0*4(%esp) - fs
209
+ *
210
+ * Lets build a 5 entry IRET frame after that, such that struct pt_regs
211
+ * is complete and in particular regs->sp is correct. This gives us
212
+ * the original 6 enties as gap:
213
+ *
214
+ * 14*4(%esp) - <previous context>
215
+ * 13*4(%esp) - gap / flags
216
+ * 12*4(%esp) - gap / cs
217
+ * 11*4(%esp) - gap / ip
218
+ * 10*4(%esp) - gap / orig_eax
219
+ * 9*4(%esp) - gap / gs / function
220
+ * 8*4(%esp) - gap / fs
221
+ * 7*4(%esp) - ss
222
+ * 6*4(%esp) - sp
223
+ * 5*4(%esp) - flags
224
+ * 4*4(%esp) - cs
225
+ * 3*4(%esp) - ip
226
+ * 2*4(%esp) - orig_eax
227
+ * 1*4(%esp) - gs / function
228
+ * 0*4(%esp) - fs
229
+ */
230
+
231
+ pushl %ss # ss
232
+ pushl %esp # sp (points at ss)
233
+ addl $7*4, (%esp) # point sp back at the previous context
234
+ pushl 7*4(%esp) # flags
235
+ pushl 7*4(%esp) # cs
236
+ pushl 7*4(%esp) # ip
237
+ pushl 7*4(%esp) # orig_eax
238
+ pushl 7*4(%esp) # gs / function
239
+ pushl 7*4(%esp) # fs
240
+.Lfrom_usermode_no_fixup_\@:
241
+.endm
242
+
243
+.macro IRET_FRAME
244
+ /*
245
+ * We're called with %ds, %es, %fs, and %gs from the interrupted
246
+ * frame, so we shouldn't use them. Also, we may be in ESPFIX
247
+ * mode and therefore have a nonzero SS base and an offset ESP,
248
+ * so any attempt to access the stack needs to use SS. (except for
249
+ * accesses through %esp, which automatically use SS.)
250
+ */
251
+ testl $CS_FROM_KERNEL, 1*4(%esp)
252
+ jz .Lfinished_frame_\@
253
+
254
+ /*
255
+ * Reconstruct the 3 entry IRET frame right after the (modified)
256
+ * regs->sp without lowering %esp in between, such that an NMI in the
257
+ * middle doesn't scribble our stack.
258
+ */
259
+ pushl %eax
260
+ pushl %ecx
261
+ movl 5*4(%esp), %eax # (modified) regs->sp
262
+
263
+ movl 4*4(%esp), %ecx # flags
264
+ movl %ecx, %ss:-1*4(%eax)
265
+
266
+ movl 3*4(%esp), %ecx # cs
267
+ andl $0x0000ffff, %ecx
268
+ movl %ecx, %ss:-2*4(%eax)
269
+
270
+ movl 2*4(%esp), %ecx # ip
271
+ movl %ecx, %ss:-3*4(%eax)
272
+
273
+ movl 1*4(%esp), %ecx # eax
274
+ movl %ecx, %ss:-4*4(%eax)
275
+
276
+ popl %ecx
277
+ lea -4*4(%eax), %esp
278
+ popl %eax
279
+.Lfinished_frame_\@:
280
+.endm
281
+
282
+.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 unwind_espfix=0
205283 cld
284
+.if \skip_gs == 0
206285 PUSH_GS
286
+.endif
207287 pushl %fs
288
+
289
+ pushl %eax
290
+ movl $(__KERNEL_PERCPU), %eax
291
+ movl %eax, %fs
292
+.if \unwind_espfix > 0
293
+ UNWIND_ESPFIX_STACK
294
+.endif
295
+ popl %eax
296
+
297
+ FIXUP_FRAME
208298 pushl %es
209299 pushl %ds
210300 pushl \pt_regs_ax
....@@ -217,19 +307,17 @@
217307 movl $(__USER_DS), %edx
218308 movl %edx, %ds
219309 movl %edx, %es
220
- movl $(__KERNEL_PERCPU), %edx
221
- movl %edx, %fs
310
+.if \skip_gs == 0
222311 SET_KERNEL_GS %edx
223
-
312
+.endif
224313 /* Switch to kernel stack if necessary */
225314 .if \switch_stacks > 0
226315 SWITCH_TO_KERNEL_STACK
227316 .endif
228
-
229317 .endm
230318
231
-.macro SAVE_ALL_NMI cr3_reg:req
232
- SAVE_ALL
319
+.macro SAVE_ALL_NMI cr3_reg:req unwind_espfix=0
320
+ SAVE_ALL unwind_espfix=\unwind_espfix
233321
234322 BUG_IF_WRONG_CR3
235323
....@@ -261,6 +349,7 @@
261349 2: popl %es
262350 3: popl %fs
263351 POP_GS \pop
352
+ IRET_FRAME
264353 .pushsection .fixup, "ax"
265354 4: movl $0, (%esp)
266355 jmp 1b
....@@ -299,7 +388,8 @@
299388
300389 .macro CHECK_AND_APPLY_ESPFIX
301390 #ifdef CONFIG_X86_ESPFIX32
302
-#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
391
+#define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8)
392
+#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET
303393
304394 ALTERNATIVE "jmp .Lend_\@", "", X86_BUG_ESPFIX
305395
....@@ -357,12 +447,7 @@
357447 * switch to it before we do any copying.
358448 */
359449
360
-#define CS_FROM_ENTRY_STACK (1 << 31)
361
-#define CS_FROM_USER_CR3 (1 << 30)
362
-
363450 .macro SWITCH_TO_KERNEL_STACK
364
-
365
- ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
366451
367452 BUG_IF_WRONG_CR3
368453
....@@ -372,13 +457,6 @@
372457 * %eax now contains the entry cr3 and we carry it forward in
373458 * that register for the time this macro runs
374459 */
375
-
376
- /*
377
- * The high bits of the CS dword (__csh) are used for
378
- * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
379
- * hardware didn't do this for us.
380
- */
381
- andl $(0x0000ffff), PT_CS(%esp)
382460
383461 /* Are we on the entry stack? Bail out if not! */
384462 movl PER_CPU_VAR(cpu_entry_area), %ecx
....@@ -519,8 +597,6 @@
519597 */
520598 .macro SWITCH_TO_ENTRY_STACK
521599
522
- ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
523
-
524600 /* Bytes to copy */
525601 movl $PTREGS_SIZE, %ecx
526602
....@@ -619,11 +695,69 @@
619695
620696 .Lend_\@:
621697 .endm
698
+
699
+/**
700
+ * idtentry - Macro to generate entry stubs for simple IDT entries
701
+ * @vector: Vector number
702
+ * @asmsym: ASM symbol for the entry point
703
+ * @cfunc: C function to be called
704
+ * @has_error_code: Hardware pushed error code on stack
705
+ */
706
+.macro idtentry vector asmsym cfunc has_error_code:req
707
+SYM_CODE_START(\asmsym)
708
+ ASM_CLAC
709
+ cld
710
+
711
+ .if \has_error_code == 0
712
+ pushl $0 /* Clear the error code */
713
+ .endif
714
+
715
+ /* Push the C-function address into the GS slot */
716
+ pushl $\cfunc
717
+ /* Invoke the common exception entry */
718
+ jmp handle_exception
719
+SYM_CODE_END(\asmsym)
720
+.endm
721
+
722
+.macro idtentry_irq vector cfunc
723
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
724
+SYM_CODE_START_LOCAL(asm_\cfunc)
725
+ ASM_CLAC
726
+ SAVE_ALL switch_stacks=1
727
+ ENCODE_FRAME_POINTER
728
+ movl %esp, %eax
729
+ movl PT_ORIG_EAX(%esp), %edx /* get the vector from stack */
730
+ movl $-1, PT_ORIG_EAX(%esp) /* no syscall to restart */
731
+ call \cfunc
732
+ jmp handle_exception_return
733
+SYM_CODE_END(asm_\cfunc)
734
+.endm
735
+
736
+.macro idtentry_sysvec vector cfunc
737
+ idtentry \vector asm_\cfunc \cfunc has_error_code=0
738
+.endm
739
+
740
+/*
741
+ * Include the defines which emit the idt entries which are shared
742
+ * shared between 32 and 64 bit and emit the __irqentry_text_* markers
743
+ * so the stacktrace boundary checks work.
744
+ */
745
+ .align 16
746
+ .globl __irqentry_text_start
747
+__irqentry_text_start:
748
+
749
+#include <asm/idtentry.h>
750
+
751
+ .align 16
752
+ .globl __irqentry_text_end
753
+__irqentry_text_end:
754
+
622755 /*
623756 * %eax: prev task
624757 * %edx: next task
625758 */
626
-ENTRY(__switch_to_asm)
759
+.pushsection .text, "ax"
760
+SYM_CODE_START(__switch_to_asm)
627761 /*
628762 * Save callee-saved registers
629763 * This must match the order in struct inactive_task_frame
....@@ -632,6 +766,11 @@
632766 pushl %ebx
633767 pushl %edi
634768 pushl %esi
769
+ /*
770
+ * Flags are saved to prevent AC leakage. This could go
771
+ * away if objtool would have 32bit support to verify
772
+ * the STAC/CLAC correctness.
773
+ */
635774 pushfl
636775
637776 /* switch stack */
....@@ -643,7 +782,6 @@
643782 movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
644783 #endif
645784
646
-#ifdef CONFIG_RETPOLINE
647785 /*
648786 * When switching from a shallower to a deeper call stack
649787 * the RSB may either underflow or use entries populated
....@@ -652,17 +790,18 @@
652790 * speculative execution to prevent attack.
653791 */
654792 FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
655
-#endif
656793
657
- /* restore callee-saved registers */
794
+ /* Restore flags or the incoming task to restore AC state. */
658795 popfl
796
+ /* restore callee-saved registers */
659797 popl %esi
660798 popl %edi
661799 popl %ebx
662800 popl %ebp
663801
664802 jmp __switch_to
665
-END(__switch_to_asm)
803
+SYM_CODE_END(__switch_to_asm)
804
+.popsection
666805
667806 /*
668807 * The unwinder expects the last frame on the stack to always be at the same
....@@ -671,7 +810,8 @@
671810 * asmlinkage function so its argument has to be pushed on the stack. This
672811 * wrapper creates a proper "end of stack" frame header before the call.
673812 */
674
-ENTRY(schedule_tail_wrapper)
813
+.pushsection .text, "ax"
814
+SYM_FUNC_START(schedule_tail_wrapper)
675815 FRAME_BEGIN
676816
677817 pushl %eax
....@@ -679,8 +819,10 @@
679819 popl %eax
680820
681821 FRAME_END
682
- ret
683
-ENDPROC(schedule_tail_wrapper)
822
+ RET
823
+SYM_FUNC_END(schedule_tail_wrapper)
824
+.popsection
825
+
684826 /*
685827 * A newly forked process directly context switches into this address.
686828 *
....@@ -688,7 +830,8 @@
688830 * ebx: kernel thread func (NULL for user thread)
689831 * edi: kernel thread arg
690832 */
691
-ENTRY(ret_from_fork)
833
+.pushsection .text, "ax"
834
+SYM_CODE_START(ret_from_fork)
692835 call schedule_tail_wrapper
693836
694837 testl %ebx, %ebx
....@@ -697,69 +840,23 @@
697840 2:
698841 /* When we fork, we trace the syscall return in the child, too. */
699842 movl %esp, %eax
700
- call syscall_return_slowpath
701
- jmp restore_all
843
+ call syscall_exit_to_user_mode
844
+ jmp .Lsyscall_32_done
702845
703846 /* kernel thread */
704847 1: movl %edi, %eax
705
- CALL_NOSPEC %ebx
848
+ CALL_NOSPEC ebx
706849 /*
707850 * A kernel thread is allowed to return here after successfully
708
- * calling do_execve(). Exit to userspace to complete the execve()
851
+ * calling kernel_execve(). Exit to userspace to complete the execve()
709852 * syscall.
710853 */
711854 movl $0, PT_EAX(%esp)
712855 jmp 2b
713
-END(ret_from_fork)
856
+SYM_CODE_END(ret_from_fork)
857
+.popsection
714858
715
-/*
716
- * Return to user mode is not as complex as all this looks,
717
- * but we want the default path for a system call return to
718
- * go as quickly as possible which is why some of this is
719
- * less clear than it otherwise should be.
720
- */
721
-
722
- # userspace resumption stub bypassing syscall exit tracing
723
- ALIGN
724
-ret_from_exception:
725
- preempt_stop(CLBR_ANY)
726
-ret_from_intr:
727
-#ifdef CONFIG_VM86
728
- movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
729
- movb PT_CS(%esp), %al
730
- andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
731
-#else
732
- /*
733
- * We can be coming here from child spawned by kernel_thread().
734
- */
735
- movl PT_CS(%esp), %eax
736
- andl $SEGMENT_RPL_MASK, %eax
737
-#endif
738
- cmpl $USER_RPL, %eax
739
- jb resume_kernel # not returning to v8086 or userspace
740
-
741
-ENTRY(resume_userspace)
742
- DISABLE_INTERRUPTS(CLBR_ANY)
743
- TRACE_IRQS_OFF
744
- movl %esp, %eax
745
- call prepare_exit_to_usermode
746
- jmp restore_all
747
-END(ret_from_exception)
748
-
749
-#ifdef CONFIG_PREEMPT
750
-ENTRY(resume_kernel)
751
- DISABLE_INTERRUPTS(CLBR_ANY)
752
-.Lneed_resched:
753
- cmpl $0, PER_CPU_VAR(__preempt_count)
754
- jnz restore_all_kernel
755
- testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
756
- jz restore_all_kernel
757
- call preempt_schedule_irq
758
- jmp .Lneed_resched
759
-END(resume_kernel)
760
-#endif
761
-
762
-GLOBAL(__begin_SYSENTER_singlestep_region)
859
+SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
763860 /*
764861 * All code from here through __end_SYSENTER_singlestep_region is subject
765862 * to being single-stepped if a user program sets TF and executes SYSENTER.
....@@ -768,16 +865,6 @@
768865 * possible, we handle TF just like AC and NT, except that our #DB handler
769866 * will ignore all of the single-step traps generated in this range.
770867 */
771
-
772
-#ifdef CONFIG_XEN
773
-/*
774
- * Xen doesn't set %esp to be precisely what the normal SYSENTER
775
- * entry point expects, so fix it up before using the normal path.
776
- */
777
-ENTRY(xen_sysenter_target)
778
- addl $5*4, %esp /* remove xen-provided frame */
779
- jmp .Lsysenter_past_esp
780
-#endif
781868
782869 /*
783870 * 32-bit SYSENTER entry.
....@@ -811,7 +898,7 @@
811898 * ebp user stack
812899 * 0(%ebp) arg6
813900 */
814
-ENTRY(entry_SYSENTER_32)
901
+SYM_FUNC_START(entry_SYSENTER_32)
815902 /*
816903 * On entry-stack with all userspace-regs live - save and
817904 * restore eflags and %eax to use it as scratch-reg for the cr3
....@@ -829,9 +916,8 @@
829916
830917 .Lsysenter_past_esp:
831918 pushl $__USER_DS /* pt_regs->ss */
832
- pushl %ebp /* pt_regs->sp (stashed in bp) */
919
+ pushl $0 /* pt_regs->sp (placeholder) */
833920 pushfl /* pt_regs->flags (except IF = 0) */
834
- orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
835921 pushl $__USER_CS /* pt_regs->cs */
836922 pushl $0 /* pt_regs->ip = 0 (placeholder) */
837923 pushl %eax /* pt_regs->orig_ax */
....@@ -860,20 +946,14 @@
860946 jnz .Lsysenter_fix_flags
861947 .Lsysenter_flags_fixed:
862948
863
- /*
864
- * User mode is traced as though IRQs are on, and SYSENTER
865
- * turned them off.
866
- */
867
- TRACE_IRQS_OFF
868
-
869949 movl %esp, %eax
870
- call do_fast_syscall_32
871
- /* XEN PV guests always use IRET path */
872
- ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
873
- "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
950
+ call do_SYSENTER_32
951
+ testl %eax, %eax
952
+ jz .Lsyscall_32_done
874953
875
-/* Opportunistic SYSEXIT */
876
- TRACE_IRQS_ON /* User mode traces as IRQs on. */
954
+ STACKLEAK_ERASE
955
+
956
+ /* Opportunistic SYSEXIT */
877957
878958 /*
879959 * Setup entry stack - we keep the pointer in %eax and do the
....@@ -936,8 +1016,8 @@
9361016 pushl $X86_EFLAGS_FIXED
9371017 popfl
9381018 jmp .Lsysenter_flags_fixed
939
-GLOBAL(__end_SYSENTER_singlestep_region)
940
-ENDPROC(entry_SYSENTER_32)
1019
+SYM_ENTRY(__end_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
1020
+SYM_FUNC_END(entry_SYSENTER_32)
9411021
9421022 /*
9431023 * 32-bit legacy system call entry.
....@@ -967,28 +1047,21 @@
9671047 * edi arg5
9681048 * ebp arg6
9691049 */
970
-ENTRY(entry_INT80_32)
1050
+SYM_FUNC_START(entry_INT80_32)
9711051 ASM_CLAC
9721052 pushl %eax /* pt_regs->orig_ax */
9731053
9741054 SAVE_ALL pt_regs_ax=$-ENOSYS switch_stacks=1 /* save rest */
9751055
976
- /*
977
- * User mode is traced as though IRQs are on, and the interrupt gate
978
- * turned them off.
979
- */
980
- TRACE_IRQS_OFF
981
-
9821056 movl %esp, %eax
9831057 call do_int80_syscall_32
9841058 .Lsyscall_32_done:
1059
+ STACKLEAK_ERASE
9851060
986
-restore_all:
987
- TRACE_IRQS_IRET
1061
+restore_all_switch_stack:
9881062 SWITCH_TO_ENTRY_STACK
989
-.Lrestore_all_notrace:
9901063 CHECK_AND_APPLY_ESPFIX
991
-.Lrestore_nocheck:
1064
+
9921065 /* Switch back to user CR3 */
9931066 SWITCH_TO_USER_CR3 scratch_reg=%eax
9941067
....@@ -1004,17 +1077,10 @@
10041077 */
10051078 INTERRUPT_RETURN
10061079
1007
-restore_all_kernel:
1008
- TRACE_IRQS_IRET
1009
- PARANOID_EXIT_TO_KERNEL_MODE
1010
- BUG_IF_WRONG_CR3
1011
- RESTORE_REGS 4
1012
- jmp .Lirq_return
1013
-
10141080 .section .fixup, "ax"
1015
-ENTRY(iret_exc )
1081
+SYM_CODE_START(asm_iret_error)
10161082 pushl $0 # no error code
1017
- pushl $do_iret_error
1083
+ pushl $iret_error
10181084
10191085 #ifdef CONFIG_DEBUG_ENTRY
10201086 /*
....@@ -1028,10 +1094,11 @@
10281094 popl %eax
10291095 #endif
10301096
1031
- jmp common_exception
1097
+ jmp handle_exception
1098
+SYM_CODE_END(asm_iret_error)
10321099 .previous
1033
- _ASM_EXTABLE(.Lirq_return, iret_exc)
1034
-ENDPROC(entry_INT80_32)
1100
+ _ASM_EXTABLE(.Lirq_return, asm_iret_error)
1101
+SYM_FUNC_END(entry_INT80_32)
10351102
10361103 .macro FIXUP_ESPFIX_STACK
10371104 /*
....@@ -1040,366 +1107,128 @@
10401107 * We can't call C functions using the ESPFIX stack. This code reads
10411108 * the high word of the segment base from the GDT and swiches to the
10421109 * normal stack and adjusts ESP with the matching offset.
1110
+ *
1111
+ * We might be on user CR3 here, so percpu data is not mapped and we can't
1112
+ * access the GDT through the percpu segment. Instead, use SGDT to find
1113
+ * the cpu_entry_area alias of the GDT.
10431114 */
10441115 #ifdef CONFIG_X86_ESPFIX32
10451116 /* fixup the stack */
1046
- mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
1047
- mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
1117
+ pushl %ecx
1118
+ subl $2*4, %esp
1119
+ sgdt (%esp)
1120
+ movl 2(%esp), %ecx /* GDT address */
1121
+ /*
1122
+ * Careful: ECX is a linear pointer, so we need to force base
1123
+ * zero. %cs is the only known-linear segment we have right now.
1124
+ */
1125
+ mov %cs:GDT_ESPFIX_OFFSET + 4(%ecx), %al /* bits 16..23 */
1126
+ mov %cs:GDT_ESPFIX_OFFSET + 7(%ecx), %ah /* bits 24..31 */
10481127 shl $16, %eax
1128
+ addl $2*4, %esp
1129
+ popl %ecx
10491130 addl %esp, %eax /* the adjusted stack pointer */
10501131 pushl $__KERNEL_DS
10511132 pushl %eax
10521133 lss (%esp), %esp /* switch to the normal stack segment */
10531134 #endif
10541135 .endm
1136
+
10551137 .macro UNWIND_ESPFIX_STACK
1138
+ /* It's safe to clobber %eax, all other regs need to be preserved */
10561139 #ifdef CONFIG_X86_ESPFIX32
10571140 movl %ss, %eax
10581141 /* see if on espfix stack */
10591142 cmpw $__ESPFIX_SS, %ax
1060
- jne 27f
1061
- movl $__KERNEL_DS, %eax
1062
- movl %eax, %ds
1063
- movl %eax, %es
1143
+ jne .Lno_fixup_\@
10641144 /* switch to normal stack */
10651145 FIXUP_ESPFIX_STACK
1066
-27:
1146
+.Lno_fixup_\@:
10671147 #endif
10681148 .endm
10691149
1070
-/*
1071
- * Build the entry stubs with some assembler magic.
1072
- * We pack 1 stub into every 8-byte block.
1073
- */
1074
- .align 8
1075
-ENTRY(irq_entries_start)
1076
- vector=FIRST_EXTERNAL_VECTOR
1077
- .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
1078
- pushl $(~vector+0x80) /* Note: always in signed byte range */
1079
- vector=vector+1
1080
- jmp common_interrupt
1081
- .align 8
1082
- .endr
1083
-END(irq_entries_start)
1084
-
1085
-#ifdef CONFIG_X86_LOCAL_APIC
1086
- .align 8
1087
-ENTRY(spurious_entries_start)
1088
- vector=FIRST_SYSTEM_VECTOR
1089
- .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
1090
- pushl $(~vector+0x80) /* Note: always in signed byte range */
1091
- vector=vector+1
1092
- jmp common_spurious
1093
- .align 8
1094
- .endr
1095
-END(spurious_entries_start)
1096
-
1097
-common_spurious:
1098
- ASM_CLAC
1099
- addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
1100
- SAVE_ALL switch_stacks=1
1101
- ENCODE_FRAME_POINTER
1102
- TRACE_IRQS_OFF
1103
- movl %esp, %eax
1104
- call smp_spurious_interrupt
1105
- jmp ret_from_intr
1106
-ENDPROC(common_spurious)
1107
-#endif
1108
-
1109
-/*
1110
- * the CPU automatically disables interrupts when executing an IRQ vector,
1111
- * so IRQ-flags tracing has to follow that:
1112
- */
1113
- .p2align CONFIG_X86_L1_CACHE_SHIFT
1114
-common_interrupt:
1115
- ASM_CLAC
1116
- addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
1117
-
1118
- SAVE_ALL switch_stacks=1
1119
- ENCODE_FRAME_POINTER
1120
- TRACE_IRQS_OFF
1121
- movl %esp, %eax
1122
- call do_IRQ
1123
- jmp ret_from_intr
1124
-ENDPROC(common_interrupt)
1125
-
1126
-#define BUILD_INTERRUPT3(name, nr, fn) \
1127
-ENTRY(name) \
1128
- ASM_CLAC; \
1129
- pushl $~(nr); \
1130
- SAVE_ALL switch_stacks=1; \
1131
- ENCODE_FRAME_POINTER; \
1132
- TRACE_IRQS_OFF \
1133
- movl %esp, %eax; \
1134
- call fn; \
1135
- jmp ret_from_intr; \
1136
-ENDPROC(name)
1137
-
1138
-#define BUILD_INTERRUPT(name, nr) \
1139
- BUILD_INTERRUPT3(name, nr, smp_##name); \
1140
-
1141
-/* The include is where all of the SMP etc. interrupts come from */
1142
-#include <asm/entry_arch.h>
1143
-
1144
-ENTRY(coprocessor_error)
1145
- ASM_CLAC
1146
- pushl $0
1147
- pushl $do_coprocessor_error
1148
- jmp common_exception
1149
-END(coprocessor_error)
1150
-
1151
-ENTRY(simd_coprocessor_error)
1152
- ASM_CLAC
1153
- pushl $0
1154
-#ifdef CONFIG_X86_INVD_BUG
1155
- /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
1156
- ALTERNATIVE "pushl $do_general_protection", \
1157
- "pushl $do_simd_coprocessor_error", \
1158
- X86_FEATURE_XMM
1159
-#else
1160
- pushl $do_simd_coprocessor_error
1161
-#endif
1162
- jmp common_exception
1163
-END(simd_coprocessor_error)
1164
-
1165
-ENTRY(device_not_available)
1166
- ASM_CLAC
1167
- pushl $-1 # mark this as an int
1168
- pushl $do_device_not_available
1169
- jmp common_exception
1170
-END(device_not_available)
1171
-
1172
-#ifdef CONFIG_PARAVIRT
1173
-ENTRY(native_iret)
1174
- iret
1175
- _ASM_EXTABLE(native_iret, iret_exc)
1176
-END(native_iret)
1177
-#endif
1178
-
1179
-ENTRY(overflow)
1180
- ASM_CLAC
1181
- pushl $0
1182
- pushl $do_overflow
1183
- jmp common_exception
1184
-END(overflow)
1185
-
1186
-ENTRY(bounds)
1187
- ASM_CLAC
1188
- pushl $0
1189
- pushl $do_bounds
1190
- jmp common_exception
1191
-END(bounds)
1192
-
1193
-ENTRY(invalid_op)
1194
- ASM_CLAC
1195
- pushl $0
1196
- pushl $do_invalid_op
1197
- jmp common_exception
1198
-END(invalid_op)
1199
-
1200
-ENTRY(coprocessor_segment_overrun)
1201
- ASM_CLAC
1202
- pushl $0
1203
- pushl $do_coprocessor_segment_overrun
1204
- jmp common_exception
1205
-END(coprocessor_segment_overrun)
1206
-
1207
-ENTRY(invalid_TSS)
1208
- ASM_CLAC
1209
- pushl $do_invalid_TSS
1210
- jmp common_exception
1211
-END(invalid_TSS)
1212
-
1213
-ENTRY(segment_not_present)
1214
- ASM_CLAC
1215
- pushl $do_segment_not_present
1216
- jmp common_exception
1217
-END(segment_not_present)
1218
-
1219
-ENTRY(stack_segment)
1220
- ASM_CLAC
1221
- pushl $do_stack_segment
1222
- jmp common_exception
1223
-END(stack_segment)
1224
-
1225
-ENTRY(alignment_check)
1226
- ASM_CLAC
1227
- pushl $do_alignment_check
1228
- jmp common_exception
1229
-END(alignment_check)
1230
-
1231
-ENTRY(divide_error)
1232
- ASM_CLAC
1233
- pushl $0 # no error code
1234
- pushl $do_divide_error
1235
- jmp common_exception
1236
-END(divide_error)
1237
-
1238
-#ifdef CONFIG_X86_MCE
1239
-ENTRY(machine_check)
1240
- ASM_CLAC
1241
- pushl $0
1242
- pushl machine_check_vector
1243
- jmp common_exception
1244
-END(machine_check)
1245
-#endif
1246
-
1247
-ENTRY(spurious_interrupt_bug)
1248
- ASM_CLAC
1249
- pushl $0
1250
- pushl $do_spurious_interrupt_bug
1251
- jmp common_exception
1252
-END(spurious_interrupt_bug)
1253
-
1254
-#ifdef CONFIG_XEN
1255
-ENTRY(xen_hypervisor_callback)
1256
- pushl $-1 /* orig_ax = -1 => not a system call */
1257
- SAVE_ALL
1258
- ENCODE_FRAME_POINTER
1259
- TRACE_IRQS_OFF
1260
-
1261
- /*
1262
- * Check to see if we got the event in the critical
1263
- * region in xen_iret_direct, after we've reenabled
1264
- * events and checked for pending events. This simulates
1265
- * iret instruction's behaviour where it delivers a
1266
- * pending interrupt when enabling interrupts:
1267
- */
1268
- movl PT_EIP(%esp), %eax
1269
- cmpl $xen_iret_start_crit, %eax
1270
- jb 1f
1271
- cmpl $xen_iret_end_crit, %eax
1272
- jae 1f
1273
-
1274
- jmp xen_iret_crit_fixup
1275
-
1276
-ENTRY(xen_do_upcall)
1277
-1: mov %esp, %eax
1278
- call xen_evtchn_do_upcall
1279
-#ifndef CONFIG_PREEMPT
1280
- call xen_maybe_preempt_hcall
1281
-#endif
1282
- jmp ret_from_intr
1283
-ENDPROC(xen_hypervisor_callback)
1284
-
1285
-/*
1286
- * Hypervisor uses this for application faults while it executes.
1287
- * We get here for two reasons:
1288
- * 1. Fault while reloading DS, ES, FS or GS
1289
- * 2. Fault while executing IRET
1290
- * Category 1 we fix up by reattempting the load, and zeroing the segment
1291
- * register if the load fails.
1292
- * Category 2 we fix up by jumping to do_iret_error. We cannot use the
1293
- * normal Linux return path in this case because if we use the IRET hypercall
1294
- * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1295
- * We distinguish between categories by maintaining a status value in EAX.
1296
- */
1297
-ENTRY(xen_failsafe_callback)
1298
- pushl %eax
1299
- movl $1, %eax
1300
-1: mov 4(%esp), %ds
1301
-2: mov 8(%esp), %es
1302
-3: mov 12(%esp), %fs
1303
-4: mov 16(%esp), %gs
1304
- /* EAX == 0 => Category 1 (Bad segment)
1305
- EAX != 0 => Category 2 (Bad IRET) */
1306
- testl %eax, %eax
1307
- popl %eax
1308
- lea 16(%esp), %esp
1309
- jz 5f
1310
- jmp iret_exc
1311
-5: pushl $-1 /* orig_ax = -1 => not a system call */
1312
- SAVE_ALL
1313
- ENCODE_FRAME_POINTER
1314
- jmp ret_from_exception
1315
-
1316
-.section .fixup, "ax"
1317
-6: xorl %eax, %eax
1318
- movl %eax, 4(%esp)
1319
- jmp 1b
1320
-7: xorl %eax, %eax
1321
- movl %eax, 8(%esp)
1322
- jmp 2b
1323
-8: xorl %eax, %eax
1324
- movl %eax, 12(%esp)
1325
- jmp 3b
1326
-9: xorl %eax, %eax
1327
- movl %eax, 16(%esp)
1328
- jmp 4b
1329
-.previous
1330
- _ASM_EXTABLE(1b, 6b)
1331
- _ASM_EXTABLE(2b, 7b)
1332
- _ASM_EXTABLE(3b, 8b)
1333
- _ASM_EXTABLE(4b, 9b)
1334
-ENDPROC(xen_failsafe_callback)
1335
-
1336
-BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
1337
- xen_evtchn_do_upcall)
1338
-
1339
-#endif /* CONFIG_XEN */
1340
-
1341
-#if IS_ENABLED(CONFIG_HYPERV)
1342
-
1343
-BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
1344
- hyperv_vector_handler)
1345
-
1346
-BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR,
1347
- hyperv_reenlightenment_intr)
1348
-
1349
-BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
1350
- hv_stimer0_vector_handler)
1351
-
1352
-#endif /* CONFIG_HYPERV */
1353
-
1354
-ENTRY(page_fault)
1355
- ASM_CLAC
1356
- pushl $do_page_fault
1357
- ALIGN
1358
- jmp common_exception
1359
-END(page_fault)
1360
-
1361
-common_exception:
1150
+SYM_CODE_START_LOCAL_NOALIGN(handle_exception)
13621151 /* the function address is in %gs's slot on the stack */
1363
- pushl %fs
1364
- pushl %es
1365
- pushl %ds
1366
- pushl %eax
1367
- movl $(__USER_DS), %eax
1368
- movl %eax, %ds
1369
- movl %eax, %es
1370
- movl $(__KERNEL_PERCPU), %eax
1371
- movl %eax, %fs
1372
- pushl %ebp
1373
- pushl %edi
1374
- pushl %esi
1375
- pushl %edx
1376
- pushl %ecx
1377
- pushl %ebx
1378
- SWITCH_TO_KERNEL_STACK
1152
+ SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
13791153 ENCODE_FRAME_POINTER
1380
- cld
1381
- UNWIND_ESPFIX_STACK
1154
+
1155
+ /* fixup %gs */
13821156 GS_TO_REG %ecx
13831157 movl PT_GS(%esp), %edi # get the function address
1384
- movl PT_ORIG_EAX(%esp), %edx # get the error code
1385
- movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
13861158 REG_TO_PTGS %ecx
13871159 SET_KERNEL_GS %ecx
1388
- TRACE_IRQS_OFF
1389
- movl %esp, %eax # pt_regs pointer
1390
- CALL_NOSPEC %edi
1391
- jmp ret_from_exception
1392
-END(common_exception)
13931160
1394
-ENTRY(debug)
1161
+ /* fixup orig %eax */
1162
+ movl PT_ORIG_EAX(%esp), %edx # get the error code
1163
+ movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
1164
+
1165
+ movl %esp, %eax # pt_regs pointer
1166
+ CALL_NOSPEC edi
1167
+
1168
+handle_exception_return:
1169
+#ifdef CONFIG_VM86
1170
+ movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
1171
+ movb PT_CS(%esp), %al
1172
+ andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
1173
+#else
13951174 /*
1396
- * Entry from sysenter is now handled in common_exception
1175
+ * We can be coming here from child spawned by kernel_thread().
13971176 */
1398
- ASM_CLAC
1399
- pushl $-1 # mark this as an int
1400
- pushl $do_debug
1401
- jmp common_exception
1402
-END(debug)
1177
+ movl PT_CS(%esp), %eax
1178
+ andl $SEGMENT_RPL_MASK, %eax
1179
+#endif
1180
+ cmpl $USER_RPL, %eax # returning to v8086 or userspace ?
1181
+ jnb ret_to_user
1182
+
1183
+ PARANOID_EXIT_TO_KERNEL_MODE
1184
+ BUG_IF_WRONG_CR3
1185
+ RESTORE_REGS 4
1186
+ jmp .Lirq_return
1187
+
1188
+ret_to_user:
1189
+ movl %esp, %eax
1190
+ jmp restore_all_switch_stack
1191
+SYM_CODE_END(handle_exception)
1192
+
1193
+SYM_CODE_START(asm_exc_double_fault)
1194
+1:
1195
+ /*
1196
+ * This is a task gate handler, not an interrupt gate handler.
1197
+ * The error code is on the stack, but the stack is otherwise
1198
+ * empty. Interrupts are off. Our state is sane with the following
1199
+ * exceptions:
1200
+ *
1201
+ * - CR0.TS is set. "TS" literally means "task switched".
1202
+ * - EFLAGS.NT is set because we're a "nested task".
1203
+ * - The doublefault TSS has back_link set and has been marked busy.
1204
+ * - TR points to the doublefault TSS and the normal TSS is busy.
1205
+ * - CR3 is the normal kernel PGD. This would be delightful, except
1206
+ * that the CPU didn't bother to save the old CR3 anywhere. This
1207
+ * would make it very awkward to return back to the context we came
1208
+ * from.
1209
+ *
1210
+ * The rest of EFLAGS is sanitized for us, so we don't need to
1211
+ * worry about AC or DF.
1212
+ *
1213
+ * Don't even bother popping the error code. It's always zero,
1214
+ * and ignoring it makes us a bit more robust against buggy
1215
+ * hypervisor task gate implementations.
1216
+ *
1217
+ * We will manually undo the task switch instead of doing a
1218
+ * task-switching IRET.
1219
+ */
1220
+
1221
+ clts /* clear CR0.TS */
1222
+ pushl $X86_EFLAGS_FIXED
1223
+ popfl /* clear EFLAGS.NT */
1224
+
1225
+ call doublefault_shim
1226
+
1227
+ /* We don't support returning, so we have no IRET here. */
1228
+1:
1229
+ hlt
1230
+ jmp 1b
1231
+SYM_CODE_END(asm_exc_double_fault)
14031232
14041233 /*
14051234 * NMI is doubly nasty. It can happen on the first instruction of
....@@ -1408,10 +1237,14 @@
14081237 * switched stacks. We handle both conditions by simply checking whether we
14091238 * interrupted kernel code running on the SYSENTER stack.
14101239 */
1411
-ENTRY(nmi)
1240
+SYM_CODE_START(asm_exc_nmi)
14121241 ASM_CLAC
14131242
14141243 #ifdef CONFIG_X86_ESPFIX32
1244
+ /*
1245
+ * ESPFIX_SS is only ever set on the return to user path
1246
+ * after we've switched to the entry stack.
1247
+ */
14151248 pushl %eax
14161249 movl %ss, %eax
14171250 cmpw $__ESPFIX_SS, %ax
....@@ -1433,7 +1266,7 @@
14331266 jb .Lnmi_from_sysenter_stack
14341267
14351268 /* Not on SYSENTER stack. */
1436
- call do_nmi
1269
+ call exc_nmi
14371270 jmp .Lnmi_return
14381271
14391272 .Lnmi_from_sysenter_stack:
....@@ -1443,10 +1276,15 @@
14431276 */
14441277 movl %esp, %ebx
14451278 movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
1446
- call do_nmi
1279
+ call exc_nmi
14471280 movl %ebx, %esp
14481281
14491282 .Lnmi_return:
1283
+#ifdef CONFIG_X86_ESPFIX32
1284
+ testl $CS_FROM_ESPFIX, PT_CS(%esp)
1285
+ jnz .Lnmi_from_espfix
1286
+#endif
1287
+
14501288 CHECK_AND_APPLY_ESPFIX
14511289 RESTORE_ALL_NMI cr3_reg=%edi pop=4
14521290 jmp .Lirq_return
....@@ -1454,55 +1292,48 @@
14541292 #ifdef CONFIG_X86_ESPFIX32
14551293 .Lnmi_espfix_stack:
14561294 /*
1457
- * create the pointer to lss back
1295
+ * Create the pointer to LSS back
14581296 */
14591297 pushl %ss
14601298 pushl %esp
14611299 addl $4, (%esp)
1462
- /* copy the iret frame of 12 bytes */
1463
- .rept 3
1464
- pushl 16(%esp)
1465
- .endr
1466
- pushl %eax
1467
- SAVE_ALL_NMI cr3_reg=%edi
1468
- ENCODE_FRAME_POINTER
1469
- FIXUP_ESPFIX_STACK # %eax == %esp
1470
- xorl %edx, %edx # zero error code
1471
- call do_nmi
1472
- RESTORE_ALL_NMI cr3_reg=%edi
1473
- lss 12+4(%esp), %esp # back to espfix stack
1474
- jmp .Lirq_return
1475
-#endif
1476
-END(nmi)
14771300
1478
-ENTRY(int3)
1479
- ASM_CLAC
1480
- pushl $-1 # mark this as an int
1301
+ /* Copy the (short) IRET frame */
1302
+ pushl 4*4(%esp) # flags
1303
+ pushl 4*4(%esp) # cs
1304
+ pushl 4*4(%esp) # ip
14811305
1482
- SAVE_ALL switch_stacks=1
1306
+ pushl %eax # orig_ax
1307
+
1308
+ SAVE_ALL_NMI cr3_reg=%edi unwind_espfix=1
14831309 ENCODE_FRAME_POINTER
1484
- TRACE_IRQS_OFF
1310
+
1311
+ /* clear CS_FROM_KERNEL, set CS_FROM_ESPFIX */
1312
+ xorl $(CS_FROM_ESPFIX | CS_FROM_KERNEL), PT_CS(%esp)
1313
+
14851314 xorl %edx, %edx # zero error code
14861315 movl %esp, %eax # pt_regs pointer
1487
- call do_int3
1488
- jmp ret_from_exception
1489
-END(int3)
1316
+ jmp .Lnmi_from_sysenter_stack
14901317
1491
-ENTRY(general_protection)
1492
- ASM_CLAC
1493
- pushl $do_general_protection
1494
- jmp common_exception
1495
-END(general_protection)
1496
-
1497
-#ifdef CONFIG_KVM_GUEST
1498
-ENTRY(async_page_fault)
1499
- ASM_CLAC
1500
- pushl $do_async_page_fault
1501
- jmp common_exception
1502
-END(async_page_fault)
1318
+.Lnmi_from_espfix:
1319
+ RESTORE_ALL_NMI cr3_reg=%edi
1320
+ /*
1321
+ * Because we cleared CS_FROM_KERNEL, IRET_FRAME 'forgot' to
1322
+ * fix up the gap and long frame:
1323
+ *
1324
+ * 3 - original frame (exception)
1325
+ * 2 - ESPFIX block (above)
1326
+ * 6 - gap (FIXUP_FRAME)
1327
+ * 5 - long frame (FIXUP_FRAME)
1328
+ * 1 - orig_ax
1329
+ */
1330
+ lss (1+5+6)*4(%esp), %esp # back to espfix stack
1331
+ jmp .Lirq_return
15031332 #endif
1333
+SYM_CODE_END(asm_exc_nmi)
15041334
1505
-ENTRY(rewind_stack_do_exit)
1335
+.pushsection .text, "ax"
1336
+SYM_CODE_START(rewind_stack_do_exit)
15061337 /* Prevent any naive code from trying to unwind to our caller. */
15071338 xorl %ebp, %ebp
15081339
....@@ -1511,4 +1342,5 @@
15111342
15121343 call do_exit
15131344 1: jmp 1b
1514
-END(rewind_stack_do_exit)
1345
+SYM_CODE_END(rewind_stack_do_exit)
1346
+.popsection