| .. | .. |
|---|
| 8 | 8 | * |
|---|
| 9 | 9 | * entry.S contains the system-call and fault low-level handling routines. |
|---|
| 10 | 10 | * |
|---|
| 11 | | - * Some of this is documented in Documentation/x86/entry_64.txt |
|---|
| 11 | + * Some of this is documented in Documentation/x86/entry_64.rst |
|---|
| 12 | 12 | * |
|---|
| 13 | 13 | * A note on terminology: |
|---|
| 14 | 14 | * - iret frame: Architecture defined interrupt frame from SS to RIP |
|---|
| 15 | 15 | * at the top of the kernel process stack. |
|---|
| 16 | 16 | * |
|---|
| 17 | 17 | * Some macro usage: |
|---|
| 18 | | - * - ENTRY/END: Define functions in the symbol table. |
|---|
| 19 | | - * - TRACE_IRQ_*: Trace hardirq state for lock debugging. |
|---|
| 18 | + * - SYM_FUNC_START/END:Define functions in the symbol table. |
|---|
| 20 | 19 | * - idtentry: Define exception entry points. |
|---|
| 21 | 20 | */ |
|---|
| 22 | 21 | #include <linux/linkage.h> |
|---|
| .. | .. |
|---|
| 37 | 36 | #include <asm/pgtable_types.h> |
|---|
| 38 | 37 | #include <asm/export.h> |
|---|
| 39 | 38 | #include <asm/frame.h> |
|---|
| 39 | +#include <asm/trapnr.h> |
|---|
| 40 | 40 | #include <asm/nospec-branch.h> |
|---|
| 41 | +#include <asm/fsgsbase.h> |
|---|
| 41 | 42 | #include <linux/err.h> |
|---|
| 42 | 43 | |
|---|
| 43 | 44 | #include "calling.h" |
|---|
| .. | .. |
|---|
| 45 | 46 | .code64 |
|---|
| 46 | 47 | .section .entry.text, "ax" |
|---|
| 47 | 48 | |
|---|
| 48 | | -#ifdef CONFIG_PARAVIRT |
|---|
| 49 | | -ENTRY(native_usergs_sysret64) |
|---|
| 49 | +#ifdef CONFIG_PARAVIRT_XXL |
|---|
| 50 | +SYM_CODE_START(native_usergs_sysret64) |
|---|
| 50 | 51 | UNWIND_HINT_EMPTY |
|---|
| 51 | 52 | swapgs |
|---|
| 52 | 53 | sysretq |
|---|
| 53 | | -END(native_usergs_sysret64) |
|---|
| 54 | | -#endif /* CONFIG_PARAVIRT */ |
|---|
| 55 | | - |
|---|
| 56 | | -.macro TRACE_IRQS_FLAGS flags:req |
|---|
| 57 | | -#ifdef CONFIG_TRACE_IRQFLAGS |
|---|
| 58 | | - btl $9, \flags /* interrupts off? */ |
|---|
| 59 | | - jnc 1f |
|---|
| 60 | | - TRACE_IRQS_ON |
|---|
| 61 | | -1: |
|---|
| 62 | | -#endif |
|---|
| 63 | | -.endm |
|---|
| 64 | | - |
|---|
| 65 | | -.macro TRACE_IRQS_IRETQ |
|---|
| 66 | | - TRACE_IRQS_FLAGS EFLAGS(%rsp) |
|---|
| 67 | | -.endm |
|---|
| 68 | | - |
|---|
| 69 | | -/* |
|---|
| 70 | | - * When dynamic function tracer is enabled it will add a breakpoint |
|---|
| 71 | | - * to all locations that it is about to modify, sync CPUs, update |
|---|
| 72 | | - * all the code, sync CPUs, then remove the breakpoints. In this time |
|---|
| 73 | | - * if lockdep is enabled, it might jump back into the debug handler |
|---|
| 74 | | - * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF). |
|---|
| 75 | | - * |
|---|
| 76 | | - * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to |
|---|
| 77 | | - * make sure the stack pointer does not get reset back to the top |
|---|
| 78 | | - * of the debug stack, and instead just reuses the current stack. |
|---|
| 79 | | - */ |
|---|
| 80 | | -#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) |
|---|
| 81 | | - |
|---|
| 82 | | -.macro TRACE_IRQS_OFF_DEBUG |
|---|
| 83 | | - call debug_stack_set_zero |
|---|
| 84 | | - TRACE_IRQS_OFF |
|---|
| 85 | | - call debug_stack_reset |
|---|
| 86 | | -.endm |
|---|
| 87 | | - |
|---|
| 88 | | -.macro TRACE_IRQS_ON_DEBUG |
|---|
| 89 | | - call debug_stack_set_zero |
|---|
| 90 | | - TRACE_IRQS_ON |
|---|
| 91 | | - call debug_stack_reset |
|---|
| 92 | | -.endm |
|---|
| 93 | | - |
|---|
| 94 | | -.macro TRACE_IRQS_IRETQ_DEBUG |
|---|
| 95 | | - btl $9, EFLAGS(%rsp) /* interrupts off? */ |
|---|
| 96 | | - jnc 1f |
|---|
| 97 | | - TRACE_IRQS_ON_DEBUG |
|---|
| 98 | | -1: |
|---|
| 99 | | -.endm |
|---|
| 100 | | - |
|---|
| 101 | | -#else |
|---|
| 102 | | -# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF |
|---|
| 103 | | -# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON |
|---|
| 104 | | -# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ |
|---|
| 105 | | -#endif |
|---|
| 54 | +SYM_CODE_END(native_usergs_sysret64) |
|---|
| 55 | +#endif /* CONFIG_PARAVIRT_XXL */ |
|---|
| 106 | 56 | |
|---|
| 107 | 57 | /* |
|---|
| 108 | 58 | * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers. |
|---|
| .. | .. |
|---|
| 142 | 92 | * with them due to bugs in both AMD and Intel CPUs. |
|---|
| 143 | 93 | */ |
|---|
| 144 | 94 | |
|---|
| 145 | | - .pushsection .entry_trampoline, "ax" |
|---|
| 95 | +SYM_CODE_START(entry_SYSCALL_64) |
|---|
| 96 | + UNWIND_HINT_ENTRY |
|---|
| 146 | 97 | |
|---|
| 147 | | -/* |
|---|
| 148 | | - * The code in here gets remapped into cpu_entry_area's trampoline. This means |
|---|
| 149 | | - * that the assembler and linker have the wrong idea as to where this code |
|---|
| 150 | | - * lives (and, in fact, it's mapped more than once, so it's not even at a |
|---|
| 151 | | - * fixed address). So we can't reference any symbols outside the entry |
|---|
| 152 | | - * trampoline and expect it to work. |
|---|
| 153 | | - * |
|---|
| 154 | | - * Instead, we carefully abuse %rip-relative addressing. |
|---|
| 155 | | - * _entry_trampoline(%rip) refers to the start of the remapped) entry |
|---|
| 156 | | - * trampoline. We can thus find cpu_entry_area with this macro: |
|---|
| 157 | | - */ |
|---|
| 158 | | - |
|---|
| 159 | | -#define CPU_ENTRY_AREA \ |
|---|
| 160 | | - _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip) |
|---|
| 161 | | - |
|---|
| 162 | | -/* The top word of the SYSENTER stack is hot and is usable as scratch space. */ |
|---|
| 163 | | -#define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \ |
|---|
| 164 | | - SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA |
|---|
| 165 | | - |
|---|
| 166 | | -ENTRY(entry_SYSCALL_64_trampoline) |
|---|
| 167 | | - UNWIND_HINT_EMPTY |
|---|
| 168 | 98 | swapgs |
|---|
| 169 | | - |
|---|
| 170 | | - /* Stash the user RSP. */ |
|---|
| 171 | | - movq %rsp, RSP_SCRATCH |
|---|
| 172 | | - |
|---|
| 173 | | - /* Note: using %rsp as a scratch reg. */ |
|---|
| 99 | + /* tss.sp2 is scratch space. */ |
|---|
| 100 | + movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) |
|---|
| 174 | 101 | SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp |
|---|
| 175 | | - |
|---|
| 176 | | - /* Load the top of the task stack into RSP */ |
|---|
| 177 | | - movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp |
|---|
| 178 | | - |
|---|
| 179 | | - /* Start building the simulated IRET frame. */ |
|---|
| 180 | | - pushq $__USER_DS /* pt_regs->ss */ |
|---|
| 181 | | - pushq RSP_SCRATCH /* pt_regs->sp */ |
|---|
| 182 | | - pushq %r11 /* pt_regs->flags */ |
|---|
| 183 | | - pushq $__USER_CS /* pt_regs->cs */ |
|---|
| 184 | | - pushq %rcx /* pt_regs->ip */ |
|---|
| 185 | | - |
|---|
| 186 | | - /* |
|---|
| 187 | | - * x86 lacks a near absolute jump, and we can't jump to the real |
|---|
| 188 | | - * entry text with a relative jump. We could push the target |
|---|
| 189 | | - * address and then use retq, but this destroys the pipeline on |
|---|
| 190 | | - * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead, |
|---|
| 191 | | - * spill RDI and restore it in a second-stage trampoline. |
|---|
| 192 | | - */ |
|---|
| 193 | | - pushq %rdi |
|---|
| 194 | | - movq $entry_SYSCALL_64_stage2, %rdi |
|---|
| 195 | | - JMP_NOSPEC %rdi |
|---|
| 196 | | -END(entry_SYSCALL_64_trampoline) |
|---|
| 197 | | - |
|---|
| 198 | | - .popsection |
|---|
| 199 | | - |
|---|
| 200 | | -ENTRY(entry_SYSCALL_64_stage2) |
|---|
| 201 | | - UNWIND_HINT_EMPTY |
|---|
| 202 | | - popq %rdi |
|---|
| 203 | | - jmp entry_SYSCALL_64_after_hwframe |
|---|
| 204 | | -END(entry_SYSCALL_64_stage2) |
|---|
| 205 | | - |
|---|
| 206 | | -ENTRY(entry_SYSCALL_64) |
|---|
| 207 | | - UNWIND_HINT_EMPTY |
|---|
| 208 | | - /* |
|---|
| 209 | | - * Interrupts are off on entry. |
|---|
| 210 | | - * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, |
|---|
| 211 | | - * it is too small to ever cause noticeable irq latency. |
|---|
| 212 | | - */ |
|---|
| 213 | | - |
|---|
| 214 | | - swapgs |
|---|
| 215 | | - /* |
|---|
| 216 | | - * This path is only taken when PAGE_TABLE_ISOLATION is disabled so it |
|---|
| 217 | | - * is not required to switch CR3. |
|---|
| 218 | | - */ |
|---|
| 219 | | - movq %rsp, PER_CPU_VAR(rsp_scratch) |
|---|
| 220 | 102 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp |
|---|
| 221 | 103 | |
|---|
| 104 | +SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) |
|---|
| 105 | + |
|---|
| 222 | 106 | /* Construct struct pt_regs on stack */ |
|---|
| 223 | | - pushq $__USER_DS /* pt_regs->ss */ |
|---|
| 224 | | - pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ |
|---|
| 225 | | - pushq %r11 /* pt_regs->flags */ |
|---|
| 226 | | - pushq $__USER_CS /* pt_regs->cs */ |
|---|
| 227 | | - pushq %rcx /* pt_regs->ip */ |
|---|
| 228 | | -GLOBAL(entry_SYSCALL_64_after_hwframe) |
|---|
| 229 | | - pushq %rax /* pt_regs->orig_ax */ |
|---|
| 107 | + pushq $__USER_DS /* pt_regs->ss */ |
|---|
| 108 | + pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) /* pt_regs->sp */ |
|---|
| 109 | + pushq %r11 /* pt_regs->flags */ |
|---|
| 110 | + pushq $__USER_CS /* pt_regs->cs */ |
|---|
| 111 | + pushq %rcx /* pt_regs->ip */ |
|---|
| 112 | +SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) |
|---|
| 113 | + pushq %rax /* pt_regs->orig_ax */ |
|---|
| 230 | 114 | |
|---|
| 231 | 115 | PUSH_AND_CLEAR_REGS rax=$-ENOSYS |
|---|
| 232 | | - |
|---|
| 233 | | - TRACE_IRQS_OFF |
|---|
| 234 | 116 | |
|---|
| 235 | 117 | /* IRQs are off. */ |
|---|
| 236 | 118 | movq %rax, %rdi |
|---|
| 237 | 119 | movq %rsp, %rsi |
|---|
| 238 | | - call do_syscall_64 /* returns with IRQs disabled */ |
|---|
| 239 | 120 | |
|---|
| 240 | | - TRACE_IRQS_IRETQ /* we're about to change IF */ |
|---|
| 121 | + /* clobbers %rax, make sure it is after saving the syscall nr */ |
|---|
| 122 | + IBRS_ENTER |
|---|
| 123 | + UNTRAIN_RET |
|---|
| 124 | + |
|---|
| 125 | + call do_syscall_64 /* returns with IRQs disabled */ |
|---|
| 241 | 126 | |
|---|
| 242 | 127 | /* |
|---|
| 243 | 128 | * Try to use SYSRET instead of IRET if we're returning to |
|---|
| .. | .. |
|---|
| 311 | 196 | * perf profiles. Nothing jumps here. |
|---|
| 312 | 197 | */ |
|---|
| 313 | 198 | syscall_return_via_sysret: |
|---|
| 314 | | - /* rcx and r11 are already restored (see code above) */ |
|---|
| 315 | | - POP_REGS pop_rdi=0 skip_r11rcx=1 |
|---|
| 199 | + IBRS_EXIT |
|---|
| 200 | + POP_REGS pop_rdi=0 |
|---|
| 316 | 201 | |
|---|
| 317 | 202 | /* |
|---|
| 318 | 203 | * Now all regs are restored except RSP and RDI. |
|---|
| .. | .. |
|---|
| 329 | 214 | * We are on the trampoline stack. All regs except RDI are live. |
|---|
| 330 | 215 | * We can do future final exit work right here. |
|---|
| 331 | 216 | */ |
|---|
| 217 | + STACKLEAK_ERASE_NOCLOBBER |
|---|
| 218 | + |
|---|
| 332 | 219 | SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi |
|---|
| 333 | 220 | |
|---|
| 334 | 221 | popq %rdi |
|---|
| 335 | 222 | popq %rsp |
|---|
| 336 | 223 | USERGS_SYSRET64 |
|---|
| 337 | | -END(entry_SYSCALL_64) |
|---|
| 224 | +SYM_CODE_END(entry_SYSCALL_64) |
|---|
| 338 | 225 | |
|---|
| 339 | 226 | /* |
|---|
| 340 | 227 | * %rdi: prev task |
|---|
| 341 | 228 | * %rsi: next task |
|---|
| 342 | 229 | */ |
|---|
| 343 | | -ENTRY(__switch_to_asm) |
|---|
| 344 | | - UNWIND_HINT_FUNC |
|---|
| 230 | +.pushsection .text, "ax" |
|---|
| 231 | +SYM_FUNC_START(__switch_to_asm) |
|---|
| 345 | 232 | /* |
|---|
| 346 | 233 | * Save callee-saved registers |
|---|
| 347 | 234 | * This must match the order in inactive_task_frame |
|---|
| .. | .. |
|---|
| 352 | 239 | pushq %r13 |
|---|
| 353 | 240 | pushq %r14 |
|---|
| 354 | 241 | pushq %r15 |
|---|
| 355 | | - pushfq |
|---|
| 356 | 242 | |
|---|
| 357 | 243 | /* switch stack */ |
|---|
| 358 | 244 | movq %rsp, TASK_threadsp(%rdi) |
|---|
| .. | .. |
|---|
| 360 | 246 | |
|---|
| 361 | 247 | #ifdef CONFIG_STACKPROTECTOR |
|---|
| 362 | 248 | movq TASK_stack_canary(%rsi), %rbx |
|---|
| 363 | | - movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset |
|---|
| 249 | + movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset |
|---|
| 364 | 250 | #endif |
|---|
| 365 | 251 | |
|---|
| 366 | | -#ifdef CONFIG_RETPOLINE |
|---|
| 367 | 252 | /* |
|---|
| 368 | 253 | * When switching from a shallower to a deeper call stack |
|---|
| 369 | 254 | * the RSB may either underflow or use entries populated |
|---|
| .. | .. |
|---|
| 372 | 257 | * speculative execution to prevent attack. |
|---|
| 373 | 258 | */ |
|---|
| 374 | 259 | FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW |
|---|
| 375 | | -#endif |
|---|
| 376 | 260 | |
|---|
| 377 | 261 | /* restore callee-saved registers */ |
|---|
| 378 | | - popfq |
|---|
| 379 | 262 | popq %r15 |
|---|
| 380 | 263 | popq %r14 |
|---|
| 381 | 264 | popq %r13 |
|---|
| .. | .. |
|---|
| 384 | 267 | popq %rbp |
|---|
| 385 | 268 | |
|---|
| 386 | 269 | jmp __switch_to |
|---|
| 387 | | -END(__switch_to_asm) |
|---|
| 270 | +SYM_FUNC_END(__switch_to_asm) |
|---|
| 271 | +.popsection |
|---|
| 388 | 272 | |
|---|
| 389 | 273 | /* |
|---|
| 390 | 274 | * A newly forked process directly context switches into this address. |
|---|
| .. | .. |
|---|
| 393 | 277 | * rbx: kernel thread func (NULL for user thread) |
|---|
| 394 | 278 | * r12: kernel thread arg |
|---|
| 395 | 279 | */ |
|---|
| 396 | | -ENTRY(ret_from_fork) |
|---|
| 280 | +.pushsection .text, "ax" |
|---|
| 281 | +SYM_CODE_START(ret_from_fork) |
|---|
| 397 | 282 | UNWIND_HINT_EMPTY |
|---|
| 398 | 283 | movq %rax, %rdi |
|---|
| 399 | 284 | call schedule_tail /* rdi: 'prev' task parameter */ |
|---|
| .. | .. |
|---|
| 404 | 289 | 2: |
|---|
| 405 | 290 | UNWIND_HINT_REGS |
|---|
| 406 | 291 | movq %rsp, %rdi |
|---|
| 407 | | - call syscall_return_slowpath /* returns with IRQs disabled */ |
|---|
| 408 | | - TRACE_IRQS_ON /* user mode is traced as IRQS on */ |
|---|
| 292 | + call syscall_exit_to_user_mode /* returns with IRQs disabled */ |
|---|
| 409 | 293 | jmp swapgs_restore_regs_and_return_to_usermode |
|---|
| 410 | 294 | |
|---|
| 411 | 295 | 1: |
|---|
| 412 | 296 | /* kernel thread */ |
|---|
| 413 | 297 | UNWIND_HINT_EMPTY |
|---|
| 414 | 298 | movq %r12, %rdi |
|---|
| 415 | | - CALL_NOSPEC %rbx |
|---|
| 299 | + CALL_NOSPEC rbx |
|---|
| 416 | 300 | /* |
|---|
| 417 | 301 | * A kernel thread is allowed to return here after successfully |
|---|
| 418 | | - * calling do_execve(). Exit to userspace to complete the execve() |
|---|
| 302 | + * calling kernel_execve(). Exit to userspace to complete the execve() |
|---|
| 419 | 303 | * syscall. |
|---|
| 420 | 304 | */ |
|---|
| 421 | 305 | movq $0, RAX(%rsp) |
|---|
| 422 | 306 | jmp 2b |
|---|
| 423 | | -END(ret_from_fork) |
|---|
| 424 | | - |
|---|
| 425 | | -/* |
|---|
| 426 | | - * Build the entry stubs with some assembler magic. |
|---|
| 427 | | - * We pack 1 stub into every 8-byte block. |
|---|
| 428 | | - */ |
|---|
| 429 | | - .align 8 |
|---|
| 430 | | -ENTRY(irq_entries_start) |
|---|
| 431 | | - vector=FIRST_EXTERNAL_VECTOR |
|---|
| 432 | | - .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) |
|---|
| 433 | | - UNWIND_HINT_IRET_REGS |
|---|
| 434 | | - pushq $(~vector+0x80) /* Note: always in signed byte range */ |
|---|
| 435 | | - jmp common_interrupt |
|---|
| 436 | | - .align 8 |
|---|
| 437 | | - vector=vector+1 |
|---|
| 438 | | - .endr |
|---|
| 439 | | -END(irq_entries_start) |
|---|
| 440 | | - |
|---|
| 441 | | - .align 8 |
|---|
| 442 | | -ENTRY(spurious_entries_start) |
|---|
| 443 | | - vector=FIRST_SYSTEM_VECTOR |
|---|
| 444 | | - .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) |
|---|
| 445 | | - UNWIND_HINT_IRET_REGS |
|---|
| 446 | | - pushq $(~vector+0x80) /* Note: always in signed byte range */ |
|---|
| 447 | | - jmp common_spurious |
|---|
| 448 | | - .align 8 |
|---|
| 449 | | - vector=vector+1 |
|---|
| 450 | | - .endr |
|---|
| 451 | | -END(spurious_entries_start) |
|---|
| 307 | +SYM_CODE_END(ret_from_fork) |
|---|
| 308 | +.popsection |
|---|
| 452 | 309 | |
|---|
| 453 | 310 | .macro DEBUG_ENTRY_ASSERT_IRQS_OFF |
|---|
| 454 | 311 | #ifdef CONFIG_DEBUG_ENTRY |
|---|
| .. | .. |
|---|
| 462 | 319 | #endif |
|---|
| 463 | 320 | .endm |
|---|
| 464 | 321 | |
|---|
| 465 | | -/* |
|---|
| 466 | | - * Enters the IRQ stack if we're not already using it. NMI-safe. Clobbers |
|---|
| 467 | | - * flags and puts old RSP into old_rsp, and leaves all other GPRs alone. |
|---|
| 468 | | - * Requires kernel GSBASE. |
|---|
| 469 | | - * |
|---|
| 470 | | - * The invariant is that, if irq_count != -1, then the IRQ stack is in use. |
|---|
| 322 | +/** |
|---|
| 323 | + * idtentry_body - Macro to emit code calling the C function |
|---|
| 324 | + * @cfunc: C function to be called |
|---|
| 325 | + * @has_error_code: Hardware pushed error code on stack |
|---|
| 471 | 326 | */ |
|---|
| 472 | | -.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0 |
|---|
| 473 | | - DEBUG_ENTRY_ASSERT_IRQS_OFF |
|---|
| 327 | +.macro idtentry_body cfunc has_error_code:req |
|---|
| 474 | 328 | |
|---|
| 475 | | - .if \save_ret |
|---|
| 476 | | - /* |
|---|
| 477 | | - * If save_ret is set, the original stack contains one additional |
|---|
| 478 | | - * entry -- the return address. Therefore, move the address one |
|---|
| 479 | | - * entry below %rsp to \old_rsp. |
|---|
| 480 | | - */ |
|---|
| 481 | | - leaq 8(%rsp), \old_rsp |
|---|
| 482 | | - .else |
|---|
| 483 | | - movq %rsp, \old_rsp |
|---|
| 329 | + call error_entry |
|---|
| 330 | + UNWIND_HINT_REGS |
|---|
| 331 | + |
|---|
| 332 | + movq %rsp, %rdi /* pt_regs pointer into 1st argument*/ |
|---|
| 333 | + |
|---|
| 334 | + .if \has_error_code == 1 |
|---|
| 335 | + movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/ |
|---|
| 336 | + movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ |
|---|
| 484 | 337 | .endif |
|---|
| 485 | 338 | |
|---|
| 486 | | - .if \regs |
|---|
| 487 | | - UNWIND_HINT_REGS base=\old_rsp |
|---|
| 339 | + call \cfunc |
|---|
| 340 | + |
|---|
| 341 | + jmp error_return |
|---|
| 342 | +.endm |
|---|
| 343 | + |
|---|
| 344 | +/** |
|---|
| 345 | + * idtentry - Macro to generate entry stubs for simple IDT entries |
|---|
| 346 | + * @vector: Vector number |
|---|
| 347 | + * @asmsym: ASM symbol for the entry point |
|---|
| 348 | + * @cfunc: C function to be called |
|---|
| 349 | + * @has_error_code: Hardware pushed error code on stack |
|---|
| 350 | + * |
|---|
| 351 | + * The macro emits code to set up the kernel context for straight forward |
|---|
| 352 | + * and simple IDT entries. No IST stack, no paranoid entry checks. |
|---|
| 353 | + */ |
|---|
| 354 | +.macro idtentry vector asmsym cfunc has_error_code:req |
|---|
| 355 | +SYM_CODE_START(\asmsym) |
|---|
| 356 | + UNWIND_HINT_IRET_REGS offset=\has_error_code*8 |
|---|
| 357 | + ASM_CLAC |
|---|
| 358 | + |
|---|
| 359 | + .if \has_error_code == 0 |
|---|
| 360 | + pushq $-1 /* ORIG_RAX: no syscall to restart */ |
|---|
| 488 | 361 | .endif |
|---|
| 489 | 362 | |
|---|
| 490 | | - incl PER_CPU_VAR(irq_count) |
|---|
| 491 | | - jnz .Lirq_stack_push_old_rsp_\@ |
|---|
| 363 | + .if \vector == X86_TRAP_BP |
|---|
| 364 | + /* |
|---|
| 365 | + * If coming from kernel space, create a 6-word gap to allow the |
|---|
| 366 | + * int3 handler to emulate a call instruction. |
|---|
| 367 | + */ |
|---|
| 368 | + testb $3, CS-ORIG_RAX(%rsp) |
|---|
| 369 | + jnz .Lfrom_usermode_no_gap_\@ |
|---|
| 370 | + .rept 6 |
|---|
| 371 | + pushq 5*8(%rsp) |
|---|
| 372 | + .endr |
|---|
| 373 | + UNWIND_HINT_IRET_REGS offset=8 |
|---|
| 374 | +.Lfrom_usermode_no_gap_\@: |
|---|
| 375 | + .endif |
|---|
| 376 | + |
|---|
| 377 | + idtentry_body \cfunc \has_error_code |
|---|
| 378 | + |
|---|
| 379 | +_ASM_NOKPROBE(\asmsym) |
|---|
| 380 | +SYM_CODE_END(\asmsym) |
|---|
| 381 | +.endm |
|---|
| 382 | + |
|---|
| 383 | +/* |
|---|
| 384 | + * Interrupt entry/exit. |
|---|
| 385 | + * |
|---|
| 386 | + + The interrupt stubs push (vector) onto the stack, which is the error_code |
|---|
| 387 | + * position of idtentry exceptions, and jump to one of the two idtentry points |
|---|
| 388 | + * (common/spurious). |
|---|
| 389 | + * |
|---|
| 390 | + * common_interrupt is a hotpath, align it to a cache line |
|---|
| 391 | + */ |
|---|
| 392 | +.macro idtentry_irq vector cfunc |
|---|
| 393 | + .p2align CONFIG_X86_L1_CACHE_SHIFT |
|---|
| 394 | + idtentry \vector asm_\cfunc \cfunc has_error_code=1 |
|---|
| 395 | +.endm |
|---|
| 396 | + |
|---|
| 397 | +/* |
|---|
| 398 | + * System vectors which invoke their handlers directly and are not |
|---|
| 399 | + * going through the regular common device interrupt handling code. |
|---|
| 400 | + */ |
|---|
| 401 | +.macro idtentry_sysvec vector cfunc |
|---|
| 402 | + idtentry \vector asm_\cfunc \cfunc has_error_code=0 |
|---|
| 403 | +.endm |
|---|
| 404 | + |
|---|
| 405 | +/** |
|---|
| 406 | + * idtentry_mce_db - Macro to generate entry stubs for #MC and #DB |
|---|
| 407 | + * @vector: Vector number |
|---|
| 408 | + * @asmsym: ASM symbol for the entry point |
|---|
| 409 | + * @cfunc: C function to be called |
|---|
| 410 | + * |
|---|
| 411 | + * The macro emits code to set up the kernel context for #MC and #DB |
|---|
| 412 | + * |
|---|
| 413 | + * If the entry comes from user space it uses the normal entry path |
|---|
| 414 | + * including the return to user space work and preemption checks on |
|---|
| 415 | + * exit. |
|---|
| 416 | + * |
|---|
| 417 | + * If hits in kernel mode then it needs to go through the paranoid |
|---|
| 418 | + * entry as the exception can hit any random state. No preemption |
|---|
| 419 | + * check on exit to keep the paranoid path simple. |
|---|
| 420 | + */ |
|---|
| 421 | +.macro idtentry_mce_db vector asmsym cfunc |
|---|
| 422 | +SYM_CODE_START(\asmsym) |
|---|
| 423 | + UNWIND_HINT_IRET_REGS |
|---|
| 424 | + ASM_CLAC |
|---|
| 425 | + |
|---|
| 426 | + pushq $-1 /* ORIG_RAX: no syscall to restart */ |
|---|
| 492 | 427 | |
|---|
| 493 | 428 | /* |
|---|
| 494 | | - * Right now, if we just incremented irq_count to zero, we've |
|---|
| 495 | | - * claimed the IRQ stack but we haven't switched to it yet. |
|---|
| 496 | | - * |
|---|
| 497 | | - * If anything is added that can interrupt us here without using IST, |
|---|
| 498 | | - * it must be *extremely* careful to limit its stack usage. This |
|---|
| 499 | | - * could include kprobes and a hypothetical future IST-less #DB |
|---|
| 500 | | - * handler. |
|---|
| 501 | | - * |
|---|
| 502 | | - * The OOPS unwinder relies on the word at the top of the IRQ |
|---|
| 503 | | - * stack linking back to the previous RSP for the entire time we're |
|---|
| 504 | | - * on the IRQ stack. For this to work reliably, we need to write |
|---|
| 505 | | - * it before we actually move ourselves to the IRQ stack. |
|---|
| 429 | + * If the entry is from userspace, switch stacks and treat it as |
|---|
| 430 | + * a normal entry. |
|---|
| 506 | 431 | */ |
|---|
| 432 | + testb $3, CS-ORIG_RAX(%rsp) |
|---|
| 433 | + jnz .Lfrom_usermode_switch_stack_\@ |
|---|
| 507 | 434 | |
|---|
| 508 | | - movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8) |
|---|
| 509 | | - movq PER_CPU_VAR(irq_stack_ptr), %rsp |
|---|
| 435 | + /* paranoid_entry returns GS information for paranoid_exit in EBX. */ |
|---|
| 436 | + call paranoid_entry |
|---|
| 510 | 437 | |
|---|
| 511 | | -#ifdef CONFIG_DEBUG_ENTRY |
|---|
| 438 | + UNWIND_HINT_REGS |
|---|
| 439 | + |
|---|
| 440 | + movq %rsp, %rdi /* pt_regs pointer */ |
|---|
| 441 | + |
|---|
| 442 | + call \cfunc |
|---|
| 443 | + |
|---|
| 444 | + jmp paranoid_exit |
|---|
| 445 | + |
|---|
| 446 | + /* Switch to the regular task stack and use the noist entry point */ |
|---|
| 447 | +.Lfrom_usermode_switch_stack_\@: |
|---|
| 448 | + idtentry_body noist_\cfunc, has_error_code=0 |
|---|
| 449 | + |
|---|
| 450 | +_ASM_NOKPROBE(\asmsym) |
|---|
| 451 | +SYM_CODE_END(\asmsym) |
|---|
| 452 | +.endm |
|---|
| 453 | + |
|---|
| 454 | +#ifdef CONFIG_AMD_MEM_ENCRYPT |
|---|
| 455 | +/** |
|---|
| 456 | + * idtentry_vc - Macro to generate entry stub for #VC |
|---|
| 457 | + * @vector: Vector number |
|---|
| 458 | + * @asmsym: ASM symbol for the entry point |
|---|
| 459 | + * @cfunc: C function to be called |
|---|
| 460 | + * |
|---|
| 461 | + * The macro emits code to set up the kernel context for #VC. The #VC handler |
|---|
| 462 | + * runs on an IST stack and needs to be able to cause nested #VC exceptions. |
|---|
| 463 | + * |
|---|
| 464 | + * To make this work the #VC entry code tries its best to pretend it doesn't use |
|---|
| 465 | + * an IST stack by switching to the task stack if coming from user-space (which |
|---|
| 466 | + * includes early SYSCALL entry path) or back to the stack in the IRET frame if |
|---|
| 467 | + * entered from kernel-mode. |
|---|
| 468 | + * |
|---|
| 469 | + * If entered from kernel-mode the return stack is validated first, and if it is |
|---|
| 470 | + * not safe to use (e.g. because it points to the entry stack) the #VC handler |
|---|
| 471 | + * will switch to a fall-back stack (VC2) and call a special handler function. |
|---|
| 472 | + * |
|---|
| 473 | + * The macro is only used for one vector, but it is planned to be extended in |
|---|
| 474 | + * the future for the #HV exception. |
|---|
| 475 | + */ |
|---|
| 476 | +.macro idtentry_vc vector asmsym cfunc |
|---|
| 477 | +SYM_CODE_START(\asmsym) |
|---|
| 478 | + UNWIND_HINT_IRET_REGS |
|---|
| 479 | + ASM_CLAC |
|---|
| 480 | + |
|---|
| 512 | 481 | /* |
|---|
| 513 | | - * If the first movq above becomes wrong due to IRQ stack layout |
|---|
| 514 | | - * changes, the only way we'll notice is if we try to unwind right |
|---|
| 515 | | - * here. Assert that we set up the stack right to catch this type |
|---|
| 516 | | - * of bug quickly. |
|---|
| 482 | + * If the entry is from userspace, switch stacks and treat it as |
|---|
| 483 | + * a normal entry. |
|---|
| 517 | 484 | */ |
|---|
| 518 | | - cmpq -8(%rsp), \old_rsp |
|---|
| 519 | | - je .Lirq_stack_okay\@ |
|---|
| 520 | | - ud2 |
|---|
| 521 | | - .Lirq_stack_okay\@: |
|---|
| 485 | + testb $3, CS-ORIG_RAX(%rsp) |
|---|
| 486 | + jnz .Lfrom_usermode_switch_stack_\@ |
|---|
| 487 | + |
|---|
| 488 | + /* |
|---|
| 489 | + * paranoid_entry returns SWAPGS flag for paranoid_exit in EBX. |
|---|
| 490 | + * EBX == 0 -> SWAPGS, EBX == 1 -> no SWAPGS |
|---|
| 491 | + */ |
|---|
| 492 | + call paranoid_entry |
|---|
| 493 | + |
|---|
| 494 | + UNWIND_HINT_REGS |
|---|
| 495 | + |
|---|
| 496 | + /* |
|---|
| 497 | + * Switch off the IST stack to make it free for nested exceptions. The |
|---|
| 498 | + * vc_switch_off_ist() function will switch back to the interrupted |
|---|
| 499 | + * stack if it is safe to do so. If not it switches to the VC fall-back |
|---|
| 500 | + * stack. |
|---|
| 501 | + */ |
|---|
| 502 | + movq %rsp, %rdi /* pt_regs pointer */ |
|---|
| 503 | + call vc_switch_off_ist |
|---|
| 504 | + movq %rax, %rsp /* Switch to new stack */ |
|---|
| 505 | + |
|---|
| 506 | + ENCODE_FRAME_POINTER |
|---|
| 507 | + UNWIND_HINT_REGS |
|---|
| 508 | + |
|---|
| 509 | + /* Update pt_regs */ |
|---|
| 510 | + movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/ |
|---|
| 511 | + movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ |
|---|
| 512 | + |
|---|
| 513 | + movq %rsp, %rdi /* pt_regs pointer */ |
|---|
| 514 | + |
|---|
| 515 | + call kernel_\cfunc |
|---|
| 516 | + |
|---|
| 517 | + /* |
|---|
| 518 | + * No need to switch back to the IST stack. The current stack is either |
|---|
| 519 | + * identical to the stack in the IRET frame or the VC fall-back stack, |
|---|
| 520 | + * so it is definitly mapped even with PTI enabled. |
|---|
| 521 | + */ |
|---|
| 522 | + jmp paranoid_exit |
|---|
| 523 | + |
|---|
| 524 | + /* Switch to the regular task stack */ |
|---|
| 525 | +.Lfrom_usermode_switch_stack_\@: |
|---|
| 526 | + idtentry_body user_\cfunc, has_error_code=1 |
|---|
| 527 | + |
|---|
| 528 | +_ASM_NOKPROBE(\asmsym) |
|---|
| 529 | +SYM_CODE_END(\asmsym) |
|---|
| 530 | +.endm |
|---|
| 522 | 531 | #endif |
|---|
| 523 | 532 | |
|---|
| 524 | | -.Lirq_stack_push_old_rsp_\@: |
|---|
| 525 | | - pushq \old_rsp |
|---|
| 526 | | - |
|---|
| 527 | | - .if \regs |
|---|
| 528 | | - UNWIND_HINT_REGS indirect=1 |
|---|
| 529 | | - .endif |
|---|
| 530 | | - |
|---|
| 531 | | - .if \save_ret |
|---|
| 532 | | - /* |
|---|
| 533 | | - * Push the return address to the stack. This return address can |
|---|
| 534 | | - * be found at the "real" original RSP, which was offset by 8 at |
|---|
| 535 | | - * the beginning of this macro. |
|---|
| 536 | | - */ |
|---|
| 537 | | - pushq -8(\old_rsp) |
|---|
| 538 | | - .endif |
|---|
| 539 | | -.endm |
|---|
| 540 | | - |
|---|
| 541 | 533 | /* |
|---|
| 542 | | - * Undoes ENTER_IRQ_STACK. |
|---|
| 534 | + * Double fault entry. Straight paranoid. No checks from which context |
|---|
| 535 | + * this comes because for the espfix induced #DF this would do the wrong |
|---|
| 536 | + * thing. |
|---|
| 543 | 537 | */ |
|---|
| 544 | | -.macro LEAVE_IRQ_STACK regs=1 |
|---|
| 545 | | - DEBUG_ENTRY_ASSERT_IRQS_OFF |
|---|
| 546 | | - /* We need to be off the IRQ stack before decrementing irq_count. */ |
|---|
| 547 | | - popq %rsp |
|---|
| 548 | | - |
|---|
| 549 | | - .if \regs |
|---|
| 550 | | - UNWIND_HINT_REGS |
|---|
| 551 | | - .endif |
|---|
| 552 | | - |
|---|
| 553 | | - /* |
|---|
| 554 | | - * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming |
|---|
| 555 | | - * the irq stack but we're not on it. |
|---|
| 556 | | - */ |
|---|
| 557 | | - |
|---|
| 558 | | - decl PER_CPU_VAR(irq_count) |
|---|
| 559 | | -.endm |
|---|
| 560 | | - |
|---|
| 561 | | -/* |
|---|
| 562 | | - * Interrupt entry helper function. |
|---|
| 563 | | - * |
|---|
| 564 | | - * Entry runs with interrupts off. Stack layout at entry: |
|---|
| 565 | | - * +----------------------------------------------------+ |
|---|
| 566 | | - * | regs->ss | |
|---|
| 567 | | - * | regs->rsp | |
|---|
| 568 | | - * | regs->eflags | |
|---|
| 569 | | - * | regs->cs | |
|---|
| 570 | | - * | regs->ip | |
|---|
| 571 | | - * +----------------------------------------------------+ |
|---|
| 572 | | - * | regs->orig_ax = ~(interrupt number) | |
|---|
| 573 | | - * +----------------------------------------------------+ |
|---|
| 574 | | - * | return address | |
|---|
| 575 | | - * +----------------------------------------------------+ |
|---|
| 576 | | - */ |
|---|
| 577 | | -ENTRY(interrupt_entry) |
|---|
| 578 | | - UNWIND_HINT_IRET_REGS offset=16 |
|---|
| 538 | +.macro idtentry_df vector asmsym cfunc |
|---|
| 539 | +SYM_CODE_START(\asmsym) |
|---|
| 540 | + UNWIND_HINT_IRET_REGS offset=8 |
|---|
| 579 | 541 | ASM_CLAC |
|---|
| 580 | | - cld |
|---|
| 581 | 542 | |
|---|
| 582 | | - testb $3, CS-ORIG_RAX+8(%rsp) |
|---|
| 583 | | - jz 1f |
|---|
| 584 | | - SWAPGS |
|---|
| 585 | | - FENCE_SWAPGS_USER_ENTRY |
|---|
| 586 | | - /* |
|---|
| 587 | | - * Switch to the thread stack. The IRET frame and orig_ax are |
|---|
| 588 | | - * on the stack, as well as the return address. RDI..R12 are |
|---|
| 589 | | - * not (yet) on the stack and space has not (yet) been |
|---|
| 590 | | - * allocated for them. |
|---|
| 591 | | - */ |
|---|
| 592 | | - pushq %rdi |
|---|
| 543 | + /* paranoid_entry returns GS information for paranoid_exit in EBX. */ |
|---|
| 544 | + call paranoid_entry |
|---|
| 545 | + UNWIND_HINT_REGS |
|---|
| 593 | 546 | |
|---|
| 594 | | - /* Need to switch before accessing the thread stack. */ |
|---|
| 595 | | - SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi |
|---|
| 596 | | - movq %rsp, %rdi |
|---|
| 597 | | - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp |
|---|
| 547 | + movq %rsp, %rdi /* pt_regs pointer into first argument */ |
|---|
| 548 | + movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/ |
|---|
| 549 | + movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ |
|---|
| 550 | + call \cfunc |
|---|
| 598 | 551 | |
|---|
| 599 | | - /* |
|---|
| 600 | | - * We have RDI, return address, and orig_ax on the stack on |
|---|
| 601 | | - * top of the IRET frame. That means offset=24 |
|---|
| 602 | | - */ |
|---|
| 603 | | - UNWIND_HINT_IRET_REGS base=%rdi offset=24 |
|---|
| 552 | + jmp paranoid_exit |
|---|
| 604 | 553 | |
|---|
| 605 | | - pushq 7*8(%rdi) /* regs->ss */ |
|---|
| 606 | | - pushq 6*8(%rdi) /* regs->rsp */ |
|---|
| 607 | | - pushq 5*8(%rdi) /* regs->eflags */ |
|---|
| 608 | | - pushq 4*8(%rdi) /* regs->cs */ |
|---|
| 609 | | - pushq 3*8(%rdi) /* regs->ip */ |
|---|
| 610 | | - UNWIND_HINT_IRET_REGS |
|---|
| 611 | | - pushq 2*8(%rdi) /* regs->orig_ax */ |
|---|
| 612 | | - pushq 8(%rdi) /* return address */ |
|---|
| 613 | | - |
|---|
| 614 | | - movq (%rdi), %rdi |
|---|
| 615 | | - jmp 2f |
|---|
| 616 | | -1: |
|---|
| 617 | | - FENCE_SWAPGS_KERNEL_ENTRY |
|---|
| 618 | | -2: |
|---|
| 619 | | - PUSH_AND_CLEAR_REGS save_ret=1 |
|---|
| 620 | | - ENCODE_FRAME_POINTER 8 |
|---|
| 621 | | - |
|---|
| 622 | | - testb $3, CS+8(%rsp) |
|---|
| 623 | | - jz 1f |
|---|
| 624 | | - |
|---|
| 625 | | - /* |
|---|
| 626 | | - * IRQ from user mode. |
|---|
| 627 | | - * |
|---|
| 628 | | - * We need to tell lockdep that IRQs are off. We can't do this until |
|---|
| 629 | | - * we fix gsbase, and we should do it before enter_from_user_mode |
|---|
| 630 | | - * (which can take locks). Since TRACE_IRQS_OFF is idempotent, |
|---|
| 631 | | - * the simplest way to handle it is to just call it twice if |
|---|
| 632 | | - * we enter from user mode. There's no reason to optimize this since |
|---|
| 633 | | - * TRACE_IRQS_OFF is a no-op if lockdep is off. |
|---|
| 634 | | - */ |
|---|
| 635 | | - TRACE_IRQS_OFF |
|---|
| 636 | | - |
|---|
| 637 | | - CALL_enter_from_user_mode |
|---|
| 638 | | - |
|---|
| 639 | | -1: |
|---|
| 640 | | - ENTER_IRQ_STACK old_rsp=%rdi save_ret=1 |
|---|
| 641 | | - /* We entered an interrupt context - irqs are off: */ |
|---|
| 642 | | - TRACE_IRQS_OFF |
|---|
| 643 | | - |
|---|
| 644 | | - ret |
|---|
| 645 | | -END(interrupt_entry) |
|---|
| 646 | | -_ASM_NOKPROBE(interrupt_entry) |
|---|
| 647 | | - |
|---|
| 648 | | - |
|---|
| 649 | | -/* Interrupt entry/exit. */ |
|---|
| 554 | +_ASM_NOKPROBE(\asmsym) |
|---|
| 555 | +SYM_CODE_END(\asmsym) |
|---|
| 556 | +.endm |
|---|
| 650 | 557 | |
|---|
| 651 | 558 | /* |
|---|
| 652 | | - * The interrupt stubs push (~vector+0x80) onto the stack and |
|---|
| 653 | | - * then jump to common_spurious/interrupt. |
|---|
| 559 | + * Include the defines which emit the idt entries which are shared |
|---|
| 560 | + * shared between 32 and 64 bit and emit the __irqentry_text_* markers |
|---|
| 561 | + * so the stacktrace boundary checks work. |
|---|
| 654 | 562 | */ |
|---|
| 655 | | -common_spurious: |
|---|
| 656 | | - addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ |
|---|
| 657 | | - call interrupt_entry |
|---|
| 658 | | - UNWIND_HINT_REGS indirect=1 |
|---|
| 659 | | - call smp_spurious_interrupt /* rdi points to pt_regs */ |
|---|
| 660 | | - jmp ret_from_intr |
|---|
| 661 | | -END(common_spurious) |
|---|
| 662 | | -_ASM_NOKPROBE(common_spurious) |
|---|
| 563 | + .align 16 |
|---|
| 564 | + .globl __irqentry_text_start |
|---|
| 565 | +__irqentry_text_start: |
|---|
| 663 | 566 | |
|---|
| 664 | | -/* common_interrupt is a hotpath. Align it */ |
|---|
| 665 | | - .p2align CONFIG_X86_L1_CACHE_SHIFT |
|---|
| 666 | | -common_interrupt: |
|---|
| 667 | | - addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ |
|---|
| 668 | | - call interrupt_entry |
|---|
| 669 | | - UNWIND_HINT_REGS indirect=1 |
|---|
| 670 | | - call do_IRQ /* rdi points to pt_regs */ |
|---|
| 671 | | - /* 0(%rsp): old RSP */ |
|---|
| 672 | | -ret_from_intr: |
|---|
| 673 | | - DISABLE_INTERRUPTS(CLBR_ANY) |
|---|
| 674 | | - TRACE_IRQS_OFF |
|---|
| 567 | +#include <asm/idtentry.h> |
|---|
| 675 | 568 | |
|---|
| 676 | | - LEAVE_IRQ_STACK |
|---|
| 569 | + .align 16 |
|---|
| 570 | + .globl __irqentry_text_end |
|---|
| 571 | +__irqentry_text_end: |
|---|
| 677 | 572 | |
|---|
| 678 | | - testb $3, CS(%rsp) |
|---|
| 679 | | - jz retint_kernel |
|---|
| 680 | | - |
|---|
| 681 | | - /* Interrupt came from user space */ |
|---|
| 682 | | -GLOBAL(retint_user) |
|---|
| 683 | | - mov %rsp,%rdi |
|---|
| 684 | | - call prepare_exit_to_usermode |
|---|
| 685 | | - TRACE_IRQS_IRETQ |
|---|
| 686 | | - |
|---|
| 687 | | -GLOBAL(swapgs_restore_regs_and_return_to_usermode) |
|---|
| 573 | +SYM_CODE_START_LOCAL(common_interrupt_return) |
|---|
| 574 | +SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) |
|---|
| 575 | + IBRS_EXIT |
|---|
| 688 | 576 | #ifdef CONFIG_DEBUG_ENTRY |
|---|
| 689 | 577 | /* Assert that pt_regs indicates user mode. */ |
|---|
| 690 | 578 | testb $3, CS(%rsp) |
|---|
| .. | .. |
|---|
| 692 | 580 | ud2 |
|---|
| 693 | 581 | 1: |
|---|
| 694 | 582 | #endif |
|---|
| 583 | +#ifdef CONFIG_XEN_PV |
|---|
| 584 | + ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV |
|---|
| 585 | +#endif |
|---|
| 586 | + |
|---|
| 695 | 587 | POP_REGS pop_rdi=0 |
|---|
| 696 | 588 | |
|---|
| 697 | 589 | /* |
|---|
| .. | .. |
|---|
| 716 | 608 | * We are on the trampoline stack. All regs except RDI are live. |
|---|
| 717 | 609 | * We can do future final exit work right here. |
|---|
| 718 | 610 | */ |
|---|
| 611 | + STACKLEAK_ERASE_NOCLOBBER |
|---|
| 719 | 612 | |
|---|
| 720 | 613 | SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi |
|---|
| 721 | 614 | |
|---|
| .. | .. |
|---|
| 725 | 618 | INTERRUPT_RETURN |
|---|
| 726 | 619 | |
|---|
| 727 | 620 | |
|---|
| 728 | | -/* Returning to kernel space */ |
|---|
| 729 | | -retint_kernel: |
|---|
| 730 | | -#ifdef CONFIG_PREEMPT |
|---|
| 731 | | - /* Interrupts are off */ |
|---|
| 732 | | - /* Check if we need preemption */ |
|---|
| 733 | | - btl $9, EFLAGS(%rsp) /* were interrupts off? */ |
|---|
| 734 | | - jnc 1f |
|---|
| 735 | | -0: cmpl $0, PER_CPU_VAR(__preempt_count) |
|---|
| 736 | | - jnz 1f |
|---|
| 737 | | - call preempt_schedule_irq |
|---|
| 738 | | - jmp 0b |
|---|
| 739 | | -1: |
|---|
| 740 | | -#endif |
|---|
| 741 | | - /* |
|---|
| 742 | | - * The iretq could re-enable interrupts: |
|---|
| 743 | | - */ |
|---|
| 744 | | - TRACE_IRQS_IRETQ |
|---|
| 745 | | - |
|---|
| 746 | | -GLOBAL(restore_regs_and_return_to_kernel) |
|---|
| 621 | +SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL) |
|---|
| 747 | 622 | #ifdef CONFIG_DEBUG_ENTRY |
|---|
| 748 | 623 | /* Assert that pt_regs indicates kernel mode. */ |
|---|
| 749 | 624 | testb $3, CS(%rsp) |
|---|
| .. | .. |
|---|
| 759 | 634 | */ |
|---|
| 760 | 635 | INTERRUPT_RETURN |
|---|
| 761 | 636 | |
|---|
| 762 | | -ENTRY(native_iret) |
|---|
| 637 | +SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL) |
|---|
| 763 | 638 | UNWIND_HINT_IRET_REGS |
|---|
| 764 | 639 | /* |
|---|
| 765 | 640 | * Are we returning to a stack segment from the LDT? Note: in |
|---|
| .. | .. |
|---|
| 770 | 645 | jnz native_irq_return_ldt |
|---|
| 771 | 646 | #endif |
|---|
| 772 | 647 | |
|---|
| 773 | | -.global native_irq_return_iret |
|---|
| 774 | | -native_irq_return_iret: |
|---|
| 648 | +SYM_INNER_LABEL(native_irq_return_iret, SYM_L_GLOBAL) |
|---|
| 775 | 649 | /* |
|---|
| 776 | 650 | * This may fault. Non-paranoid faults on return to userspace are |
|---|
| 777 | 651 | * handled by fixup_bad_iret. These include #SS, #GP, and #NP. |
|---|
| 778 | | - * Double-faults due to espfix64 are handled in do_double_fault. |
|---|
| 652 | + * Double-faults due to espfix64 are handled in exc_double_fault. |
|---|
| 779 | 653 | * Other faults here are fatal. |
|---|
| 780 | 654 | */ |
|---|
| 781 | 655 | iretq |
|---|
| .. | .. |
|---|
| 804 | 678 | */ |
|---|
| 805 | 679 | |
|---|
| 806 | 680 | pushq %rdi /* Stash user RDI */ |
|---|
| 807 | | - SWAPGS /* to kernel GS */ |
|---|
| 681 | + swapgs /* to kernel GS */ |
|---|
| 808 | 682 | SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */ |
|---|
| 683 | + UNTRAIN_RET |
|---|
| 809 | 684 | |
|---|
| 810 | 685 | movq PER_CPU_VAR(espfix_waddr), %rdi |
|---|
| 811 | 686 | movq %rax, (0*8)(%rdi) /* user RAX */ |
|---|
| .. | .. |
|---|
| 834 | 709 | orq PER_CPU_VAR(espfix_stack), %rax |
|---|
| 835 | 710 | |
|---|
| 836 | 711 | SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi |
|---|
| 837 | | - SWAPGS /* to user GS */ |
|---|
| 712 | + swapgs /* to user GS */ |
|---|
| 838 | 713 | popq %rdi /* Restore user RDI */ |
|---|
| 839 | 714 | |
|---|
| 840 | 715 | movq %rax, %rsp |
|---|
| .. | .. |
|---|
| 853 | 728 | */ |
|---|
| 854 | 729 | jmp native_irq_return_iret |
|---|
| 855 | 730 | #endif |
|---|
| 856 | | -END(common_interrupt) |
|---|
| 857 | | -_ASM_NOKPROBE(common_interrupt) |
|---|
| 731 | +SYM_CODE_END(common_interrupt_return) |
|---|
| 732 | +_ASM_NOKPROBE(common_interrupt_return) |
|---|
| 858 | 733 | |
|---|
| 859 | 734 | /* |
|---|
| 860 | | - * APIC interrupts. |
|---|
| 735 | + * Reload gs selector with exception handling |
|---|
| 736 | + * edi: new selector |
|---|
| 737 | + * |
|---|
| 738 | + * Is in entry.text as it shouldn't be instrumented. |
|---|
| 861 | 739 | */ |
|---|
| 862 | | -.macro apicinterrupt3 num sym do_sym |
|---|
| 863 | | -ENTRY(\sym) |
|---|
| 864 | | - UNWIND_HINT_IRET_REGS |
|---|
| 865 | | - pushq $~(\num) |
|---|
| 866 | | -.Lcommon_\sym: |
|---|
| 867 | | - call interrupt_entry |
|---|
| 868 | | - UNWIND_HINT_REGS indirect=1 |
|---|
| 869 | | - call \do_sym /* rdi points to pt_regs */ |
|---|
| 870 | | - jmp ret_from_intr |
|---|
| 871 | | -END(\sym) |
|---|
| 872 | | -_ASM_NOKPROBE(\sym) |
|---|
| 873 | | -.endm |
|---|
| 874 | | - |
|---|
| 875 | | -/* Make sure APIC interrupt handlers end up in the irqentry section: */ |
|---|
| 876 | | -#define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax" |
|---|
| 877 | | -#define POP_SECTION_IRQENTRY .popsection |
|---|
| 878 | | - |
|---|
| 879 | | -.macro apicinterrupt num sym do_sym |
|---|
| 880 | | -PUSH_SECTION_IRQENTRY |
|---|
| 881 | | -apicinterrupt3 \num \sym \do_sym |
|---|
| 882 | | -POP_SECTION_IRQENTRY |
|---|
| 883 | | -.endm |
|---|
| 884 | | - |
|---|
| 885 | | -#ifdef CONFIG_SMP |
|---|
| 886 | | -apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt |
|---|
| 887 | | -apicinterrupt3 REBOOT_VECTOR reboot_interrupt smp_reboot_interrupt |
|---|
| 888 | | -#endif |
|---|
| 889 | | - |
|---|
| 890 | | -#ifdef CONFIG_X86_UV |
|---|
| 891 | | -apicinterrupt3 UV_BAU_MESSAGE uv_bau_message_intr1 uv_bau_message_interrupt |
|---|
| 892 | | -#endif |
|---|
| 893 | | - |
|---|
| 894 | | -apicinterrupt LOCAL_TIMER_VECTOR apic_timer_interrupt smp_apic_timer_interrupt |
|---|
| 895 | | -apicinterrupt X86_PLATFORM_IPI_VECTOR x86_platform_ipi smp_x86_platform_ipi |
|---|
| 896 | | - |
|---|
| 897 | | -#ifdef CONFIG_HAVE_KVM |
|---|
| 898 | | -apicinterrupt3 POSTED_INTR_VECTOR kvm_posted_intr_ipi smp_kvm_posted_intr_ipi |
|---|
| 899 | | -apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi |
|---|
| 900 | | -apicinterrupt3 POSTED_INTR_NESTED_VECTOR kvm_posted_intr_nested_ipi smp_kvm_posted_intr_nested_ipi |
|---|
| 901 | | -#endif |
|---|
| 902 | | - |
|---|
| 903 | | -#ifdef CONFIG_X86_MCE_THRESHOLD |
|---|
| 904 | | -apicinterrupt THRESHOLD_APIC_VECTOR threshold_interrupt smp_threshold_interrupt |
|---|
| 905 | | -#endif |
|---|
| 906 | | - |
|---|
| 907 | | -#ifdef CONFIG_X86_MCE_AMD |
|---|
| 908 | | -apicinterrupt DEFERRED_ERROR_VECTOR deferred_error_interrupt smp_deferred_error_interrupt |
|---|
| 909 | | -#endif |
|---|
| 910 | | - |
|---|
| 911 | | -#ifdef CONFIG_X86_THERMAL_VECTOR |
|---|
| 912 | | -apicinterrupt THERMAL_APIC_VECTOR thermal_interrupt smp_thermal_interrupt |
|---|
| 913 | | -#endif |
|---|
| 914 | | - |
|---|
| 915 | | -#ifdef CONFIG_SMP |
|---|
| 916 | | -apicinterrupt CALL_FUNCTION_SINGLE_VECTOR call_function_single_interrupt smp_call_function_single_interrupt |
|---|
| 917 | | -apicinterrupt CALL_FUNCTION_VECTOR call_function_interrupt smp_call_function_interrupt |
|---|
| 918 | | -apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt |
|---|
| 919 | | -#endif |
|---|
| 920 | | - |
|---|
| 921 | | -apicinterrupt ERROR_APIC_VECTOR error_interrupt smp_error_interrupt |
|---|
| 922 | | -apicinterrupt SPURIOUS_APIC_VECTOR spurious_interrupt smp_spurious_interrupt |
|---|
| 923 | | - |
|---|
| 924 | | -#ifdef CONFIG_IRQ_WORK |
|---|
| 925 | | -apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt |
|---|
| 926 | | -#endif |
|---|
| 927 | | - |
|---|
| 928 | | -/* |
|---|
| 929 | | - * Exception entry points. |
|---|
| 930 | | - */ |
|---|
| 931 | | -#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) |
|---|
| 932 | | - |
|---|
| 933 | | -.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 create_gap=0 |
|---|
| 934 | | -ENTRY(\sym) |
|---|
| 935 | | - UNWIND_HINT_IRET_REGS offset=\has_error_code*8 |
|---|
| 936 | | - |
|---|
| 937 | | - /* Sanity check */ |
|---|
| 938 | | - .if \shift_ist != -1 && \paranoid == 0 |
|---|
| 939 | | - .error "using shift_ist requires paranoid=1" |
|---|
| 940 | | - .endif |
|---|
| 941 | | - |
|---|
| 942 | | - ASM_CLAC |
|---|
| 943 | | - |
|---|
| 944 | | - .if \has_error_code == 0 |
|---|
| 945 | | - pushq $-1 /* ORIG_RAX: no syscall to restart */ |
|---|
| 946 | | - .endif |
|---|
| 947 | | - |
|---|
| 948 | | - .if \paranoid == 1 |
|---|
| 949 | | - testb $3, CS-ORIG_RAX(%rsp) /* If coming from userspace, switch stacks */ |
|---|
| 950 | | - jnz .Lfrom_usermode_switch_stack_\@ |
|---|
| 951 | | - .endif |
|---|
| 952 | | - |
|---|
| 953 | | - .if \create_gap == 1 |
|---|
| 954 | | - /* |
|---|
| 955 | | - * If coming from kernel space, create a 6-word gap to allow the |
|---|
| 956 | | - * int3 handler to emulate a call instruction. |
|---|
| 957 | | - */ |
|---|
| 958 | | - testb $3, CS-ORIG_RAX(%rsp) |
|---|
| 959 | | - jnz .Lfrom_usermode_no_gap_\@ |
|---|
| 960 | | - .rept 6 |
|---|
| 961 | | - pushq 5*8(%rsp) |
|---|
| 962 | | - .endr |
|---|
| 963 | | - UNWIND_HINT_IRET_REGS offset=8 |
|---|
| 964 | | -.Lfrom_usermode_no_gap_\@: |
|---|
| 965 | | - .endif |
|---|
| 966 | | - |
|---|
| 967 | | - .if \paranoid |
|---|
| 968 | | - call paranoid_entry |
|---|
| 969 | | - .else |
|---|
| 970 | | - call error_entry |
|---|
| 971 | | - .endif |
|---|
| 972 | | - UNWIND_HINT_REGS |
|---|
| 973 | | - /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ |
|---|
| 974 | | - |
|---|
| 975 | | - .if \paranoid |
|---|
| 976 | | - .if \shift_ist != -1 |
|---|
| 977 | | - TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */ |
|---|
| 978 | | - .else |
|---|
| 979 | | - TRACE_IRQS_OFF |
|---|
| 980 | | - .endif |
|---|
| 981 | | - .endif |
|---|
| 982 | | - |
|---|
| 983 | | - movq %rsp, %rdi /* pt_regs pointer */ |
|---|
| 984 | | - |
|---|
| 985 | | - .if \has_error_code |
|---|
| 986 | | - movq ORIG_RAX(%rsp), %rsi /* get error code */ |
|---|
| 987 | | - movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ |
|---|
| 988 | | - .else |
|---|
| 989 | | - xorl %esi, %esi /* no error code */ |
|---|
| 990 | | - .endif |
|---|
| 991 | | - |
|---|
| 992 | | - .if \shift_ist != -1 |
|---|
| 993 | | - subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) |
|---|
| 994 | | - .endif |
|---|
| 995 | | - |
|---|
| 996 | | - call \do_sym |
|---|
| 997 | | - |
|---|
| 998 | | - .if \shift_ist != -1 |
|---|
| 999 | | - addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) |
|---|
| 1000 | | - .endif |
|---|
| 1001 | | - |
|---|
| 1002 | | - /* these procedures expect "no swapgs" flag in ebx */ |
|---|
| 1003 | | - .if \paranoid |
|---|
| 1004 | | - jmp paranoid_exit |
|---|
| 1005 | | - .else |
|---|
| 1006 | | - jmp error_exit |
|---|
| 1007 | | - .endif |
|---|
| 1008 | | - |
|---|
| 1009 | | - .if \paranoid == 1 |
|---|
| 1010 | | - /* |
|---|
| 1011 | | - * Entry from userspace. Switch stacks and treat it |
|---|
| 1012 | | - * as a normal entry. This means that paranoid handlers |
|---|
| 1013 | | - * run in real process context if user_mode(regs). |
|---|
| 1014 | | - */ |
|---|
| 1015 | | -.Lfrom_usermode_switch_stack_\@: |
|---|
| 1016 | | - call error_entry |
|---|
| 1017 | | - |
|---|
| 1018 | | - movq %rsp, %rdi /* pt_regs pointer */ |
|---|
| 1019 | | - |
|---|
| 1020 | | - .if \has_error_code |
|---|
| 1021 | | - movq ORIG_RAX(%rsp), %rsi /* get error code */ |
|---|
| 1022 | | - movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ |
|---|
| 1023 | | - .else |
|---|
| 1024 | | - xorl %esi, %esi /* no error code */ |
|---|
| 1025 | | - .endif |
|---|
| 1026 | | - |
|---|
| 1027 | | - call \do_sym |
|---|
| 1028 | | - |
|---|
| 1029 | | - jmp error_exit |
|---|
| 1030 | | - .endif |
|---|
| 1031 | | -_ASM_NOKPROBE(\sym) |
|---|
| 1032 | | -END(\sym) |
|---|
| 1033 | | -.endm |
|---|
| 1034 | | - |
|---|
| 1035 | | -idtentry divide_error do_divide_error has_error_code=0 |
|---|
| 1036 | | -idtentry overflow do_overflow has_error_code=0 |
|---|
| 1037 | | -idtentry bounds do_bounds has_error_code=0 |
|---|
| 1038 | | -idtentry invalid_op do_invalid_op has_error_code=0 |
|---|
| 1039 | | -idtentry device_not_available do_device_not_available has_error_code=0 |
|---|
| 1040 | | -idtentry double_fault do_double_fault has_error_code=1 paranoid=2 |
|---|
| 1041 | | -idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 |
|---|
| 1042 | | -idtentry invalid_TSS do_invalid_TSS has_error_code=1 |
|---|
| 1043 | | -idtentry segment_not_present do_segment_not_present has_error_code=1 |
|---|
| 1044 | | -idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0 |
|---|
| 1045 | | -idtentry coprocessor_error do_coprocessor_error has_error_code=0 |
|---|
| 1046 | | -idtentry alignment_check do_alignment_check has_error_code=1 |
|---|
| 1047 | | -idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 |
|---|
| 1048 | | - |
|---|
| 1049 | | - |
|---|
| 1050 | | - /* |
|---|
| 1051 | | - * Reload gs selector with exception handling |
|---|
| 1052 | | - * edi: new selector |
|---|
| 1053 | | - */ |
|---|
| 1054 | | -ENTRY(native_load_gs_index) |
|---|
| 740 | +SYM_FUNC_START(asm_load_gs_index) |
|---|
| 1055 | 741 | FRAME_BEGIN |
|---|
| 1056 | | - pushfq |
|---|
| 1057 | | - DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) |
|---|
| 1058 | | - TRACE_IRQS_OFF |
|---|
| 1059 | | - SWAPGS |
|---|
| 742 | + swapgs |
|---|
| 1060 | 743 | .Lgs_change: |
|---|
| 1061 | 744 | movl %edi, %gs |
|---|
| 1062 | 745 | 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE |
|---|
| 1063 | | - SWAPGS |
|---|
| 1064 | | - TRACE_IRQS_FLAGS (%rsp) |
|---|
| 1065 | | - popfq |
|---|
| 746 | + swapgs |
|---|
| 1066 | 747 | FRAME_END |
|---|
| 1067 | | - ret |
|---|
| 1068 | | -ENDPROC(native_load_gs_index) |
|---|
| 1069 | | -EXPORT_SYMBOL(native_load_gs_index) |
|---|
| 748 | + RET |
|---|
| 749 | +SYM_FUNC_END(asm_load_gs_index) |
|---|
| 750 | +EXPORT_SYMBOL(asm_load_gs_index) |
|---|
| 1070 | 751 | |
|---|
| 1071 | | - _ASM_EXTABLE(.Lgs_change, bad_gs) |
|---|
| 752 | + _ASM_EXTABLE(.Lgs_change, .Lbad_gs) |
|---|
| 1072 | 753 | .section .fixup, "ax" |
|---|
| 1073 | 754 | /* running with kernelgs */ |
|---|
| 1074 | | -bad_gs: |
|---|
| 1075 | | - SWAPGS /* switch back to user gs */ |
|---|
| 755 | +SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs) |
|---|
| 756 | + swapgs /* switch back to user gs */ |
|---|
| 1076 | 757 | .macro ZAP_GS |
|---|
| 1077 | 758 | /* This can't be a string because the preprocessor needs to see it. */ |
|---|
| 1078 | 759 | movl $__USER_DS, %eax |
|---|
| .. | .. |
|---|
| 1082 | 763 | xorl %eax, %eax |
|---|
| 1083 | 764 | movl %eax, %gs |
|---|
| 1084 | 765 | jmp 2b |
|---|
| 766 | +SYM_CODE_END(.Lbad_gs) |
|---|
| 1085 | 767 | .previous |
|---|
| 1086 | 768 | |
|---|
| 1087 | | -/* Call softirq on interrupt stack. Interrupts are off. */ |
|---|
| 1088 | | -ENTRY(do_softirq_own_stack) |
|---|
| 1089 | | - pushq %rbp |
|---|
| 1090 | | - mov %rsp, %rbp |
|---|
| 1091 | | - ENTER_IRQ_STACK regs=0 old_rsp=%r11 |
|---|
| 1092 | | - call __do_softirq |
|---|
| 1093 | | - LEAVE_IRQ_STACK regs=0 |
|---|
| 769 | +/* |
|---|
| 770 | + * rdi: New stack pointer points to the top word of the stack |
|---|
| 771 | + * rsi: Function pointer |
|---|
| 772 | + * rdx: Function argument (can be NULL if none) |
|---|
| 773 | + */ |
|---|
| 774 | +SYM_FUNC_START(asm_call_on_stack) |
|---|
| 775 | +SYM_INNER_LABEL(asm_call_sysvec_on_stack, SYM_L_GLOBAL) |
|---|
| 776 | +SYM_INNER_LABEL(asm_call_irq_on_stack, SYM_L_GLOBAL) |
|---|
| 777 | + /* |
|---|
| 778 | + * Save the frame pointer unconditionally. This allows the ORC |
|---|
| 779 | + * unwinder to handle the stack switch. |
|---|
| 780 | + */ |
|---|
| 781 | + pushq %rbp |
|---|
| 782 | + mov %rsp, %rbp |
|---|
| 783 | + |
|---|
| 784 | + /* |
|---|
| 785 | + * The unwinder relies on the word at the top of the new stack |
|---|
| 786 | + * page linking back to the previous RSP. |
|---|
| 787 | + */ |
|---|
| 788 | + mov %rsp, (%rdi) |
|---|
| 789 | + mov %rdi, %rsp |
|---|
| 790 | + /* Move the argument to the right place */ |
|---|
| 791 | + mov %rdx, %rdi |
|---|
| 792 | + |
|---|
| 793 | +1: |
|---|
| 794 | + .pushsection .discard.instr_begin |
|---|
| 795 | + .long 1b - . |
|---|
| 796 | + .popsection |
|---|
| 797 | + |
|---|
| 798 | + CALL_NOSPEC rsi |
|---|
| 799 | + |
|---|
| 800 | +2: |
|---|
| 801 | + .pushsection .discard.instr_end |
|---|
| 802 | + .long 2b - . |
|---|
| 803 | + .popsection |
|---|
| 804 | + |
|---|
| 805 | + /* Restore the previous stack pointer from RBP. */ |
|---|
| 1094 | 806 | leaveq |
|---|
| 1095 | | - ret |
|---|
| 1096 | | -ENDPROC(do_softirq_own_stack) |
|---|
| 807 | + RET |
|---|
| 808 | +SYM_FUNC_END(asm_call_on_stack) |
|---|
| 1097 | 809 | |
|---|
| 1098 | | -#ifdef CONFIG_XEN |
|---|
| 1099 | | -idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0 |
|---|
| 1100 | | - |
|---|
| 810 | +#ifdef CONFIG_XEN_PV |
|---|
| 1101 | 811 | /* |
|---|
| 1102 | 812 | * A note on the "critical region" in our callback handler. |
|---|
| 1103 | 813 | * We want to avoid stacking callback handlers due to events occurring |
|---|
| .. | .. |
|---|
| 1110 | 820 | * So, on entry to the handler we detect whether we interrupted an |
|---|
| 1111 | 821 | * existing activation in its critical region -- if so, we pop the current |
|---|
| 1112 | 822 | * activation and restart the handler using the previous one. |
|---|
| 823 | + * |
|---|
| 824 | + * C calling convention: exc_xen_hypervisor_callback(struct *pt_regs) |
|---|
| 1113 | 825 | */ |
|---|
| 1114 | | -ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */ |
|---|
| 826 | +SYM_CODE_START_LOCAL(exc_xen_hypervisor_callback) |
|---|
| 1115 | 827 | |
|---|
| 1116 | 828 | /* |
|---|
| 1117 | 829 | * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will |
|---|
| .. | .. |
|---|
| 1121 | 833 | movq %rdi, %rsp /* we don't return, adjust the stack frame */ |
|---|
| 1122 | 834 | UNWIND_HINT_REGS |
|---|
| 1123 | 835 | |
|---|
| 1124 | | - ENTER_IRQ_STACK old_rsp=%r10 |
|---|
| 1125 | | - call xen_evtchn_do_upcall |
|---|
| 1126 | | - LEAVE_IRQ_STACK |
|---|
| 836 | + call xen_pv_evtchn_do_upcall |
|---|
| 1127 | 837 | |
|---|
| 1128 | | -#ifndef CONFIG_PREEMPT |
|---|
| 1129 | | - call xen_maybe_preempt_hcall |
|---|
| 1130 | | -#endif |
|---|
| 1131 | | - jmp error_exit |
|---|
| 1132 | | -END(xen_do_hypervisor_callback) |
|---|
| 838 | + jmp error_return |
|---|
| 839 | +SYM_CODE_END(exc_xen_hypervisor_callback) |
|---|
| 1133 | 840 | |
|---|
| 1134 | 841 | /* |
|---|
| 1135 | 842 | * Hypervisor uses this for application faults while it executes. |
|---|
| .. | .. |
|---|
| 1144 | 851 | * We distinguish between categories by comparing each saved segment register |
|---|
| 1145 | 852 | * with its current contents: any discrepancy means we in category 1. |
|---|
| 1146 | 853 | */ |
|---|
| 1147 | | -ENTRY(xen_failsafe_callback) |
|---|
| 854 | +SYM_CODE_START(xen_failsafe_callback) |
|---|
| 1148 | 855 | UNWIND_HINT_EMPTY |
|---|
| 1149 | 856 | movl %ds, %ecx |
|---|
| 1150 | 857 | cmpw %cx, 0x10(%rsp) |
|---|
| .. | .. |
|---|
| 1164 | 871 | addq $0x30, %rsp |
|---|
| 1165 | 872 | pushq $0 /* RIP */ |
|---|
| 1166 | 873 | UNWIND_HINT_IRET_REGS offset=8 |
|---|
| 1167 | | - jmp general_protection |
|---|
| 874 | + jmp asm_exc_general_protection |
|---|
| 1168 | 875 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ |
|---|
| 1169 | 876 | movq (%rsp), %rcx |
|---|
| 1170 | 877 | movq 8(%rsp), %r11 |
|---|
| .. | .. |
|---|
| 1173 | 880 | pushq $-1 /* orig_ax = -1 => not a system call */ |
|---|
| 1174 | 881 | PUSH_AND_CLEAR_REGS |
|---|
| 1175 | 882 | ENCODE_FRAME_POINTER |
|---|
| 1176 | | - jmp error_exit |
|---|
| 1177 | | -END(xen_failsafe_callback) |
|---|
| 1178 | | - |
|---|
| 1179 | | -apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ |
|---|
| 1180 | | - xen_hvm_callback_vector xen_evtchn_do_upcall |
|---|
| 1181 | | - |
|---|
| 1182 | | -#endif /* CONFIG_XEN */ |
|---|
| 1183 | | - |
|---|
| 1184 | | -#if IS_ENABLED(CONFIG_HYPERV) |
|---|
| 1185 | | -apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ |
|---|
| 1186 | | - hyperv_callback_vector hyperv_vector_handler |
|---|
| 1187 | | - |
|---|
| 1188 | | -apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \ |
|---|
| 1189 | | - hyperv_reenlightenment_vector hyperv_reenlightenment_intr |
|---|
| 1190 | | - |
|---|
| 1191 | | -apicinterrupt3 HYPERV_STIMER0_VECTOR \ |
|---|
| 1192 | | - hv_stimer0_callback_vector hv_stimer0_vector_handler |
|---|
| 1193 | | -#endif /* CONFIG_HYPERV */ |
|---|
| 1194 | | - |
|---|
| 1195 | | -idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK |
|---|
| 1196 | | -idtentry int3 do_int3 has_error_code=0 create_gap=1 |
|---|
| 1197 | | -idtentry stack_segment do_stack_segment has_error_code=1 |
|---|
| 1198 | | - |
|---|
| 1199 | | -#ifdef CONFIG_XEN |
|---|
| 1200 | | -idtentry xennmi do_nmi has_error_code=0 |
|---|
| 1201 | | -idtentry xendebug do_debug has_error_code=0 |
|---|
| 1202 | | -#endif |
|---|
| 1203 | | - |
|---|
| 1204 | | -idtentry general_protection do_general_protection has_error_code=1 |
|---|
| 1205 | | -idtentry page_fault do_page_fault has_error_code=1 |
|---|
| 1206 | | - |
|---|
| 1207 | | -#ifdef CONFIG_KVM_GUEST |
|---|
| 1208 | | -idtentry async_page_fault do_async_page_fault has_error_code=1 |
|---|
| 1209 | | -#endif |
|---|
| 1210 | | - |
|---|
| 1211 | | -#ifdef CONFIG_X86_MCE |
|---|
| 1212 | | -idtentry machine_check do_mce has_error_code=0 paranoid=1 |
|---|
| 1213 | | -#endif |
|---|
| 883 | + jmp error_return |
|---|
| 884 | +SYM_CODE_END(xen_failsafe_callback) |
|---|
| 885 | +#endif /* CONFIG_XEN_PV */ |
|---|
| 1214 | 886 | |
|---|
| 1215 | 887 | /* |
|---|
| 1216 | | - * Save all registers in pt_regs, and switch gs if needed. |
|---|
| 1217 | | - * Use slow, but surefire "are we in kernel?" check. |
|---|
| 1218 | | - * Return: ebx=0: need swapgs on exit, ebx=1: otherwise |
|---|
| 888 | + * Save all registers in pt_regs. Return GSBASE related information |
|---|
| 889 | + * in EBX depending on the availability of the FSGSBASE instructions: |
|---|
| 890 | + * |
|---|
| 891 | + * FSGSBASE R/EBX |
|---|
| 892 | + * N 0 -> SWAPGS on exit |
|---|
| 893 | + * 1 -> no SWAPGS on exit |
|---|
| 894 | + * |
|---|
| 895 | + * Y GSBASE value at entry, must be restored in paranoid_exit |
|---|
| 896 | + * |
|---|
| 897 | + * R14 - old CR3 |
|---|
| 898 | + * R15 - old SPEC_CTRL |
|---|
| 1219 | 899 | */ |
|---|
| 1220 | | -ENTRY(paranoid_entry) |
|---|
| 900 | +SYM_CODE_START_LOCAL(paranoid_entry) |
|---|
| 1221 | 901 | UNWIND_HINT_FUNC |
|---|
| 1222 | 902 | cld |
|---|
| 1223 | 903 | PUSH_AND_CLEAR_REGS save_ret=1 |
|---|
| 1224 | 904 | ENCODE_FRAME_POINTER 8 |
|---|
| 1225 | | - movl $1, %ebx |
|---|
| 1226 | | - movl $MSR_GS_BASE, %ecx |
|---|
| 1227 | | - rdmsr |
|---|
| 1228 | | - testl %edx, %edx |
|---|
| 1229 | | - js 1f /* negative -> in kernel */ |
|---|
| 1230 | | - SWAPGS |
|---|
| 1231 | | - xorl %ebx, %ebx |
|---|
| 1232 | 905 | |
|---|
| 1233 | | -1: |
|---|
| 1234 | 906 | /* |
|---|
| 1235 | 907 | * Always stash CR3 in %r14. This value will be restored, |
|---|
| 1236 | 908 | * verbatim, at exit. Needed if paranoid_entry interrupted |
|---|
| .. | .. |
|---|
| 1240 | 912 | * This is also why CS (stashed in the "iret frame" by the |
|---|
| 1241 | 913 | * hardware at entry) can not be used: this may be a return |
|---|
| 1242 | 914 | * to kernel code, but with a user CR3 value. |
|---|
| 915 | + * |
|---|
| 916 | + * Switching CR3 does not depend on kernel GSBASE so it can |
|---|
| 917 | + * be done before switching to the kernel GSBASE. This is |
|---|
| 918 | + * required for FSGSBASE because the kernel GSBASE has to |
|---|
| 919 | + * be retrieved from a kernel internal table. |
|---|
| 1243 | 920 | */ |
|---|
| 1244 | 921 | SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 |
|---|
| 1245 | 922 | |
|---|
| 1246 | 923 | /* |
|---|
| 1247 | | - * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an |
|---|
| 1248 | | - * unconditional CR3 write, even in the PTI case. So do an lfence |
|---|
| 1249 | | - * to prevent GS speculation, regardless of whether PTI is enabled. |
|---|
| 924 | + * Handling GSBASE depends on the availability of FSGSBASE. |
|---|
| 925 | + * |
|---|
| 926 | + * Without FSGSBASE the kernel enforces that negative GSBASE |
|---|
| 927 | + * values indicate kernel GSBASE. With FSGSBASE no assumptions |
|---|
| 928 | + * can be made about the GSBASE value when entering from user |
|---|
| 929 | + * space. |
|---|
| 1250 | 930 | */ |
|---|
| 1251 | | - FENCE_SWAPGS_KERNEL_ENTRY |
|---|
| 931 | + ALTERNATIVE "jmp .Lparanoid_entry_checkgs", "", X86_FEATURE_FSGSBASE |
|---|
| 1252 | 932 | |
|---|
| 1253 | | - ret |
|---|
| 1254 | | -END(paranoid_entry) |
|---|
| 933 | + /* |
|---|
| 934 | + * Read the current GSBASE and store it in %rbx unconditionally, |
|---|
| 935 | + * retrieve and set the current CPUs kernel GSBASE. The stored value |
|---|
| 936 | + * has to be restored in paranoid_exit unconditionally. |
|---|
| 937 | + * |
|---|
| 938 | + * The unconditional write to GS base below ensures that no subsequent |
|---|
| 939 | + * loads based on a mispredicted GS base can happen, therefore no LFENCE |
|---|
| 940 | + * is needed here. |
|---|
| 941 | + */ |
|---|
| 942 | + SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx |
|---|
| 943 | + jmp .Lparanoid_gsbase_done |
|---|
| 944 | + |
|---|
| 945 | +.Lparanoid_entry_checkgs: |
|---|
| 946 | + /* EBX = 1 -> kernel GSBASE active, no restore required */ |
|---|
| 947 | + movl $1, %ebx |
|---|
| 948 | + |
|---|
| 949 | + /* |
|---|
| 950 | + * The kernel-enforced convention is a negative GSBASE indicates |
|---|
| 951 | + * a kernel value. No SWAPGS needed on entry and exit. |
|---|
| 952 | + */ |
|---|
| 953 | + movl $MSR_GS_BASE, %ecx |
|---|
| 954 | + rdmsr |
|---|
| 955 | + testl %edx, %edx |
|---|
| 956 | + js .Lparanoid_kernel_gsbase |
|---|
| 957 | + |
|---|
| 958 | + /* EBX = 0 -> SWAPGS required on exit */ |
|---|
| 959 | + xorl %ebx, %ebx |
|---|
| 960 | + swapgs |
|---|
| 961 | +.Lparanoid_kernel_gsbase: |
|---|
| 962 | + FENCE_SWAPGS_KERNEL_ENTRY |
|---|
| 963 | +.Lparanoid_gsbase_done: |
|---|
| 964 | + |
|---|
| 965 | + /* |
|---|
| 966 | + * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like |
|---|
| 967 | + * CR3 above, keep the old value in a callee saved register. |
|---|
| 968 | + */ |
|---|
| 969 | + IBRS_ENTER save_reg=%r15 |
|---|
| 970 | + UNTRAIN_RET |
|---|
| 971 | + |
|---|
| 972 | + RET |
|---|
| 973 | +SYM_CODE_END(paranoid_entry) |
|---|
| 1255 | 974 | |
|---|
| 1256 | 975 | /* |
|---|
| 1257 | 976 | * "Paranoid" exit path from exception stack. This is invoked |
|---|
| .. | .. |
|---|
| 1260 | 979 | * |
|---|
| 1261 | 980 | * We may be returning to very strange contexts (e.g. very early |
|---|
| 1262 | 981 | * in syscall entry), so checking for preemption here would |
|---|
| 1263 | | - * be complicated. Fortunately, we there's no good reason |
|---|
| 1264 | | - * to try to handle preemption here. |
|---|
| 982 | + * be complicated. Fortunately, there's no good reason to try |
|---|
| 983 | + * to handle preemption here. |
|---|
| 1265 | 984 | * |
|---|
| 1266 | | - * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) |
|---|
| 985 | + * R/EBX contains the GSBASE related information depending on the |
|---|
| 986 | + * availability of the FSGSBASE instructions: |
|---|
| 987 | + * |
|---|
| 988 | + * FSGSBASE R/EBX |
|---|
| 989 | + * N 0 -> SWAPGS on exit |
|---|
| 990 | + * 1 -> no SWAPGS on exit |
|---|
| 991 | + * |
|---|
| 992 | + * Y User space GSBASE, must be restored unconditionally |
|---|
| 993 | + * |
|---|
| 994 | + * R14 - old CR3 |
|---|
| 995 | + * R15 - old SPEC_CTRL |
|---|
| 1267 | 996 | */ |
|---|
| 1268 | | -ENTRY(paranoid_exit) |
|---|
| 997 | +SYM_CODE_START_LOCAL(paranoid_exit) |
|---|
| 1269 | 998 | UNWIND_HINT_REGS |
|---|
| 1270 | | - DISABLE_INTERRUPTS(CLBR_ANY) |
|---|
| 1271 | | - TRACE_IRQS_OFF_DEBUG |
|---|
| 1272 | | - testl %ebx, %ebx /* swapgs needed? */ |
|---|
| 1273 | | - jnz .Lparanoid_exit_no_swapgs |
|---|
| 1274 | | - TRACE_IRQS_IRETQ |
|---|
| 1275 | | - /* Always restore stashed CR3 value (see paranoid_entry) */ |
|---|
| 1276 | | - RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 |
|---|
| 1277 | | - SWAPGS_UNSAFE_STACK |
|---|
| 1278 | | - jmp .Lparanoid_exit_restore |
|---|
| 1279 | | -.Lparanoid_exit_no_swapgs: |
|---|
| 1280 | | - TRACE_IRQS_IRETQ_DEBUG |
|---|
| 1281 | | - /* Always restore stashed CR3 value (see paranoid_entry) */ |
|---|
| 1282 | | - RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 |
|---|
| 1283 | | -.Lparanoid_exit_restore: |
|---|
| 1284 | | - jmp restore_regs_and_return_to_kernel |
|---|
| 1285 | | -END(paranoid_exit) |
|---|
| 999 | + |
|---|
| 1000 | + /* |
|---|
| 1001 | + * Must restore IBRS state before both CR3 and %GS since we need access |
|---|
| 1002 | + * to the per-CPU x86_spec_ctrl_shadow variable. |
|---|
| 1003 | + */ |
|---|
| 1004 | + IBRS_EXIT save_reg=%r15 |
|---|
| 1005 | + |
|---|
| 1006 | + /* |
|---|
| 1007 | + * The order of operations is important. RESTORE_CR3 requires |
|---|
| 1008 | + * kernel GSBASE. |
|---|
| 1009 | + * |
|---|
| 1010 | + * NB to anyone to try to optimize this code: this code does |
|---|
| 1011 | + * not execute at all for exceptions from user mode. Those |
|---|
| 1012 | + * exceptions go through error_exit instead. |
|---|
| 1013 | + */ |
|---|
| 1014 | + RESTORE_CR3 scratch_reg=%rax save_reg=%r14 |
|---|
| 1015 | + |
|---|
| 1016 | + /* Handle the three GSBASE cases */ |
|---|
| 1017 | + ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE |
|---|
| 1018 | + |
|---|
| 1019 | + /* With FSGSBASE enabled, unconditionally restore GSBASE */ |
|---|
| 1020 | + wrgsbase %rbx |
|---|
| 1021 | + jmp restore_regs_and_return_to_kernel |
|---|
| 1022 | + |
|---|
| 1023 | +.Lparanoid_exit_checkgs: |
|---|
| 1024 | + /* On non-FSGSBASE systems, conditionally do SWAPGS */ |
|---|
| 1025 | + testl %ebx, %ebx |
|---|
| 1026 | + jnz restore_regs_and_return_to_kernel |
|---|
| 1027 | + |
|---|
| 1028 | + /* We are returning to a context with user GSBASE */ |
|---|
| 1029 | + swapgs |
|---|
| 1030 | + jmp restore_regs_and_return_to_kernel |
|---|
| 1031 | +SYM_CODE_END(paranoid_exit) |
|---|
| 1286 | 1032 | |
|---|
| 1287 | 1033 | /* |
|---|
| 1288 | 1034 | * Save all registers in pt_regs, and switch GS if needed. |
|---|
| 1289 | 1035 | */ |
|---|
| 1290 | | -ENTRY(error_entry) |
|---|
| 1036 | +SYM_CODE_START_LOCAL(error_entry) |
|---|
| 1291 | 1037 | UNWIND_HINT_FUNC |
|---|
| 1292 | 1038 | cld |
|---|
| 1293 | 1039 | PUSH_AND_CLEAR_REGS save_ret=1 |
|---|
| .. | .. |
|---|
| 1303 | 1049 | FENCE_SWAPGS_USER_ENTRY |
|---|
| 1304 | 1050 | /* We have user CR3. Change to kernel CR3. */ |
|---|
| 1305 | 1051 | SWITCH_TO_KERNEL_CR3 scratch_reg=%rax |
|---|
| 1052 | + IBRS_ENTER |
|---|
| 1053 | + UNTRAIN_RET |
|---|
| 1306 | 1054 | |
|---|
| 1307 | 1055 | .Lerror_entry_from_usermode_after_swapgs: |
|---|
| 1056 | + |
|---|
| 1308 | 1057 | /* Put us onto the real thread stack. */ |
|---|
| 1309 | 1058 | popq %r12 /* save return addr in %12 */ |
|---|
| 1310 | 1059 | movq %rsp, %rdi /* arg0 = pt_regs pointer */ |
|---|
| .. | .. |
|---|
| 1312 | 1061 | movq %rax, %rsp /* switch stack */ |
|---|
| 1313 | 1062 | ENCODE_FRAME_POINTER |
|---|
| 1314 | 1063 | pushq %r12 |
|---|
| 1315 | | - |
|---|
| 1316 | | - /* |
|---|
| 1317 | | - * We need to tell lockdep that IRQs are off. We can't do this until |
|---|
| 1318 | | - * we fix gsbase, and we should do it before enter_from_user_mode |
|---|
| 1319 | | - * (which can take locks). |
|---|
| 1320 | | - */ |
|---|
| 1321 | | - TRACE_IRQS_OFF |
|---|
| 1322 | | - CALL_enter_from_user_mode |
|---|
| 1323 | | - ret |
|---|
| 1324 | | - |
|---|
| 1325 | | -.Lerror_entry_done_lfence: |
|---|
| 1326 | | - FENCE_SWAPGS_KERNEL_ENTRY |
|---|
| 1327 | | -.Lerror_entry_done: |
|---|
| 1328 | | - TRACE_IRQS_OFF |
|---|
| 1329 | | - ret |
|---|
| 1064 | + RET |
|---|
| 1330 | 1065 | |
|---|
| 1331 | 1066 | /* |
|---|
| 1332 | 1067 | * There are two places in the kernel that can potentially fault with |
|---|
| .. | .. |
|---|
| 1350 | 1085 | * .Lgs_change's error handler with kernel gsbase. |
|---|
| 1351 | 1086 | */ |
|---|
| 1352 | 1087 | SWAPGS |
|---|
| 1353 | | - FENCE_SWAPGS_USER_ENTRY |
|---|
| 1354 | | - SWITCH_TO_KERNEL_CR3 scratch_reg=%rax |
|---|
| 1355 | | - jmp .Lerror_entry_done |
|---|
| 1088 | + |
|---|
| 1089 | + /* |
|---|
| 1090 | + * Issue an LFENCE to prevent GS speculation, regardless of whether it is a |
|---|
| 1091 | + * kernel or user gsbase. |
|---|
| 1092 | + */ |
|---|
| 1093 | +.Lerror_entry_done_lfence: |
|---|
| 1094 | + FENCE_SWAPGS_KERNEL_ENTRY |
|---|
| 1095 | + ANNOTATE_UNRET_END |
|---|
| 1096 | + RET |
|---|
| 1356 | 1097 | |
|---|
| 1357 | 1098 | .Lbstep_iret: |
|---|
| 1358 | 1099 | /* Fix truncated RIP */ |
|---|
| .. | .. |
|---|
| 1367 | 1108 | SWAPGS |
|---|
| 1368 | 1109 | FENCE_SWAPGS_USER_ENTRY |
|---|
| 1369 | 1110 | SWITCH_TO_KERNEL_CR3 scratch_reg=%rax |
|---|
| 1111 | + IBRS_ENTER |
|---|
| 1112 | + UNTRAIN_RET |
|---|
| 1370 | 1113 | |
|---|
| 1371 | 1114 | /* |
|---|
| 1372 | 1115 | * Pretend that the exception came from user mode: set up pt_regs |
|---|
| .. | .. |
|---|
| 1376 | 1119 | call fixup_bad_iret |
|---|
| 1377 | 1120 | mov %rax, %rsp |
|---|
| 1378 | 1121 | jmp .Lerror_entry_from_usermode_after_swapgs |
|---|
| 1379 | | -END(error_entry) |
|---|
| 1122 | +SYM_CODE_END(error_entry) |
|---|
| 1380 | 1123 | |
|---|
| 1381 | | -ENTRY(error_exit) |
|---|
| 1124 | +SYM_CODE_START_LOCAL(error_return) |
|---|
| 1382 | 1125 | UNWIND_HINT_REGS |
|---|
| 1383 | | - DISABLE_INTERRUPTS(CLBR_ANY) |
|---|
| 1384 | | - TRACE_IRQS_OFF |
|---|
| 1126 | + DEBUG_ENTRY_ASSERT_IRQS_OFF |
|---|
| 1385 | 1127 | testb $3, CS(%rsp) |
|---|
| 1386 | | - jz retint_kernel |
|---|
| 1387 | | - jmp retint_user |
|---|
| 1388 | | -END(error_exit) |
|---|
| 1128 | + jz restore_regs_and_return_to_kernel |
|---|
| 1129 | + jmp swapgs_restore_regs_and_return_to_usermode |
|---|
| 1130 | +SYM_CODE_END(error_return) |
|---|
| 1389 | 1131 | |
|---|
| 1390 | 1132 | /* |
|---|
| 1391 | 1133 | * Runs on exception stack. Xen PV does not go through this path at all, |
|---|
| .. | .. |
|---|
| 1395 | 1137 | * %r14: Used to save/restore the CR3 of the interrupted context |
|---|
| 1396 | 1138 | * when PAGE_TABLE_ISOLATION is in use. Do not clobber. |
|---|
| 1397 | 1139 | */ |
|---|
| 1398 | | -ENTRY(nmi) |
|---|
| 1140 | +SYM_CODE_START(asm_exc_nmi) |
|---|
| 1399 | 1141 | UNWIND_HINT_IRET_REGS |
|---|
| 1400 | 1142 | |
|---|
| 1401 | 1143 | /* |
|---|
| .. | .. |
|---|
| 1472 | 1214 | PUSH_AND_CLEAR_REGS rdx=(%rdx) |
|---|
| 1473 | 1215 | ENCODE_FRAME_POINTER |
|---|
| 1474 | 1216 | |
|---|
| 1217 | + IBRS_ENTER |
|---|
| 1218 | + UNTRAIN_RET |
|---|
| 1219 | + |
|---|
| 1475 | 1220 | /* |
|---|
| 1476 | 1221 | * At this point we no longer need to worry about stack damage |
|---|
| 1477 | 1222 | * due to nesting -- we're on the normal thread stack and we're |
|---|
| .. | .. |
|---|
| 1480 | 1225 | |
|---|
| 1481 | 1226 | movq %rsp, %rdi |
|---|
| 1482 | 1227 | movq $-1, %rsi |
|---|
| 1483 | | - call do_nmi |
|---|
| 1228 | + call exc_nmi |
|---|
| 1484 | 1229 | |
|---|
| 1485 | 1230 | /* |
|---|
| 1486 | 1231 | * Return back to user mode. We must *not* do the normal exit |
|---|
| .. | .. |
|---|
| 1537 | 1282 | * end_repeat_nmi, then we are a nested NMI. We must not |
|---|
| 1538 | 1283 | * modify the "iret" frame because it's being written by |
|---|
| 1539 | 1284 | * the outer NMI. That's okay; the outer NMI handler is |
|---|
| 1540 | | - * about to about to call do_nmi anyway, so we can just |
|---|
| 1285 | + * about to about to call exc_nmi() anyway, so we can just |
|---|
| 1541 | 1286 | * resume the outer NMI. |
|---|
| 1542 | 1287 | */ |
|---|
| 1543 | 1288 | |
|---|
| .. | .. |
|---|
| 1656 | 1401 | * RSP is pointing to "outermost RIP". gsbase is unknown, but, if |
|---|
| 1657 | 1402 | * we're repeating an NMI, gsbase has the same value that it had on |
|---|
| 1658 | 1403 | * the first iteration. paranoid_entry will load the kernel |
|---|
| 1659 | | - * gsbase if needed before we call do_nmi. "NMI executing" |
|---|
| 1404 | + * gsbase if needed before we call exc_nmi(). "NMI executing" |
|---|
| 1660 | 1405 | * is zero. |
|---|
| 1661 | 1406 | */ |
|---|
| 1662 | 1407 | movq $1, 10*8(%rsp) /* Set "NMI executing". */ |
|---|
| .. | .. |
|---|
| 1690 | 1435 | call paranoid_entry |
|---|
| 1691 | 1436 | UNWIND_HINT_REGS |
|---|
| 1692 | 1437 | |
|---|
| 1693 | | - /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ |
|---|
| 1694 | 1438 | movq %rsp, %rdi |
|---|
| 1695 | 1439 | movq $-1, %rsi |
|---|
| 1696 | | - call do_nmi |
|---|
| 1440 | + call exc_nmi |
|---|
| 1441 | + |
|---|
| 1442 | + /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */ |
|---|
| 1443 | + IBRS_EXIT save_reg=%r15 |
|---|
| 1697 | 1444 | |
|---|
| 1698 | 1445 | /* Always restore stashed CR3 value (see paranoid_entry) */ |
|---|
| 1699 | 1446 | RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 |
|---|
| 1700 | 1447 | |
|---|
| 1701 | | - testl %ebx, %ebx /* swapgs needed? */ |
|---|
| 1448 | + /* |
|---|
| 1449 | + * The above invocation of paranoid_entry stored the GSBASE |
|---|
| 1450 | + * related information in R/EBX depending on the availability |
|---|
| 1451 | + * of FSGSBASE. |
|---|
| 1452 | + * |
|---|
| 1453 | + * If FSGSBASE is enabled, restore the saved GSBASE value |
|---|
| 1454 | + * unconditionally, otherwise take the conditional SWAPGS path. |
|---|
| 1455 | + */ |
|---|
| 1456 | + ALTERNATIVE "jmp nmi_no_fsgsbase", "", X86_FEATURE_FSGSBASE |
|---|
| 1457 | + |
|---|
| 1458 | + wrgsbase %rbx |
|---|
| 1459 | + jmp nmi_restore |
|---|
| 1460 | + |
|---|
| 1461 | +nmi_no_fsgsbase: |
|---|
| 1462 | + /* EBX == 0 -> invoke SWAPGS */ |
|---|
| 1463 | + testl %ebx, %ebx |
|---|
| 1702 | 1464 | jnz nmi_restore |
|---|
| 1465 | + |
|---|
| 1703 | 1466 | nmi_swapgs: |
|---|
| 1704 | | - SWAPGS_UNSAFE_STACK |
|---|
| 1467 | + swapgs |
|---|
| 1468 | + |
|---|
| 1705 | 1469 | nmi_restore: |
|---|
| 1706 | 1470 | POP_REGS |
|---|
| 1707 | 1471 | |
|---|
| .. | .. |
|---|
| 1730 | 1494 | * about espfix64 on the way back to kernel mode. |
|---|
| 1731 | 1495 | */ |
|---|
| 1732 | 1496 | iretq |
|---|
| 1733 | | -END(nmi) |
|---|
| 1497 | +SYM_CODE_END(asm_exc_nmi) |
|---|
| 1734 | 1498 | |
|---|
| 1735 | | -ENTRY(ignore_sysret) |
|---|
| 1499 | +#ifndef CONFIG_IA32_EMULATION |
|---|
| 1500 | +/* |
|---|
| 1501 | + * This handles SYSCALL from 32-bit code. There is no way to program |
|---|
| 1502 | + * MSRs to fully disable 32-bit SYSCALL. |
|---|
| 1503 | + */ |
|---|
| 1504 | +SYM_CODE_START(ignore_sysret) |
|---|
| 1736 | 1505 | UNWIND_HINT_EMPTY |
|---|
| 1737 | 1506 | mov $-ENOSYS, %eax |
|---|
| 1738 | | - sysret |
|---|
| 1739 | | -END(ignore_sysret) |
|---|
| 1507 | + sysretl |
|---|
| 1508 | +SYM_CODE_END(ignore_sysret) |
|---|
| 1509 | +#endif |
|---|
| 1740 | 1510 | |
|---|
| 1741 | | -ENTRY(rewind_stack_do_exit) |
|---|
| 1511 | +.pushsection .text, "ax" |
|---|
| 1512 | +SYM_CODE_START(rewind_stack_and_make_dead) |
|---|
| 1742 | 1513 | UNWIND_HINT_FUNC |
|---|
| 1743 | 1514 | /* Prevent any naive code from trying to unwind to our caller. */ |
|---|
| 1744 | 1515 | xorl %ebp, %ebp |
|---|
| .. | .. |
|---|
| 1747 | 1518 | leaq -PTREGS_SIZE(%rax), %rsp |
|---|
| 1748 | 1519 | UNWIND_HINT_REGS |
|---|
| 1749 | 1520 | |
|---|
| 1750 | | - call do_exit |
|---|
| 1751 | | -END(rewind_stack_do_exit) |
|---|
| 1521 | + call make_task_dead |
|---|
| 1522 | +SYM_CODE_END(rewind_stack_and_make_dead) |
|---|
| 1523 | +.popsection |
|---|