| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Copyright (C) 1991, 1992 Linus Torvalds |
|---|
| 3 | 4 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs |
|---|
| .. | .. |
|---|
| 21 | 22 | #include <linux/ratelimit.h> |
|---|
| 22 | 23 | #include <linux/slab.h> |
|---|
| 23 | 24 | #include <linux/export.h> |
|---|
| 25 | +#include <linux/atomic.h> |
|---|
| 24 | 26 | #include <linux/sched/clock.h> |
|---|
| 25 | 27 | |
|---|
| 26 | | -#if defined(CONFIG_EDAC) |
|---|
| 27 | | -#include <linux/edac.h> |
|---|
| 28 | | -#endif |
|---|
| 29 | | - |
|---|
| 30 | | -#include <linux/atomic.h> |
|---|
| 28 | +#include <asm/cpu_entry_area.h> |
|---|
| 31 | 29 | #include <asm/traps.h> |
|---|
| 32 | 30 | #include <asm/mach_traps.h> |
|---|
| 33 | 31 | #include <asm/nmi.h> |
|---|
| .. | .. |
|---|
| 35 | 33 | #include <asm/reboot.h> |
|---|
| 36 | 34 | #include <asm/cache.h> |
|---|
| 37 | 35 | #include <asm/nospec-branch.h> |
|---|
| 36 | +#include <asm/sev-es.h> |
|---|
| 38 | 37 | |
|---|
| 39 | 38 | #define CREATE_TRACE_POINTS |
|---|
| 40 | 39 | #include <trace/events/nmi.h> |
|---|
| .. | .. |
|---|
| 304 | 303 | static DEFINE_PER_CPU(bool, swallow_nmi); |
|---|
| 305 | 304 | static DEFINE_PER_CPU(unsigned long, last_nmi_rip); |
|---|
| 306 | 305 | |
|---|
| 307 | | -static void default_do_nmi(struct pt_regs *regs) |
|---|
| 306 | +static noinstr void default_do_nmi(struct pt_regs *regs) |
|---|
| 308 | 307 | { |
|---|
| 309 | 308 | unsigned char reason = 0; |
|---|
| 310 | 309 | int handled; |
|---|
| .. | .. |
|---|
| 330 | 329 | |
|---|
| 331 | 330 | __this_cpu_write(last_nmi_rip, regs->ip); |
|---|
| 332 | 331 | |
|---|
| 332 | + instrumentation_begin(); |
|---|
| 333 | + |
|---|
| 333 | 334 | handled = nmi_handle(NMI_LOCAL, regs); |
|---|
| 334 | 335 | __this_cpu_add(nmi_stats.normal, handled); |
|---|
| 335 | 336 | if (handled) { |
|---|
| .. | .. |
|---|
| 343 | 344 | */ |
|---|
| 344 | 345 | if (handled > 1) |
|---|
| 345 | 346 | __this_cpu_write(swallow_nmi, true); |
|---|
| 346 | | - return; |
|---|
| 347 | + goto out; |
|---|
| 347 | 348 | } |
|---|
| 348 | 349 | |
|---|
| 349 | 350 | /* |
|---|
| .. | .. |
|---|
| 375 | 376 | #endif |
|---|
| 376 | 377 | __this_cpu_add(nmi_stats.external, 1); |
|---|
| 377 | 378 | raw_spin_unlock(&nmi_reason_lock); |
|---|
| 378 | | - return; |
|---|
| 379 | + goto out; |
|---|
| 379 | 380 | } |
|---|
| 380 | 381 | raw_spin_unlock(&nmi_reason_lock); |
|---|
| 381 | 382 | |
|---|
| .. | .. |
|---|
| 400 | 401 | * a 'real' unknown NMI. For example, while processing |
|---|
| 401 | 402 | * a perf NMI another perf NMI comes in along with a |
|---|
| 402 | 403 | * 'real' unknown NMI. These two NMIs get combined into |
|---|
| 403 | | - * one (as descibed above). When the next NMI gets |
|---|
| 404 | + * one (as described above). When the next NMI gets |
|---|
| 404 | 405 | * processed, it will be flagged by perf as handled, but |
|---|
| 405 | | - * noone will know that there was a 'real' unknown NMI sent |
|---|
| 406 | + * no one will know that there was a 'real' unknown NMI sent |
|---|
| 406 | 407 | * also. As a result it gets swallowed. Or if the first |
|---|
| 407 | 408 | * perf NMI returns two events handled then the second |
|---|
| 408 | 409 | * NMI will get eaten by the logic below, again losing a |
|---|
| .. | .. |
|---|
| 413 | 414 | __this_cpu_add(nmi_stats.swallow, 1); |
|---|
| 414 | 415 | else |
|---|
| 415 | 416 | unknown_nmi_error(reason, regs); |
|---|
| 417 | + |
|---|
| 418 | +out: |
|---|
| 419 | + instrumentation_end(); |
|---|
| 416 | 420 | } |
|---|
| 417 | | -NOKPROBE_SYMBOL(default_do_nmi); |
|---|
| 418 | 421 | |
|---|
| 419 | 422 | /* |
|---|
| 420 | 423 | * NMIs can page fault or hit breakpoints which will cause it to lose |
|---|
| .. | .. |
|---|
| 468 | 471 | }; |
|---|
| 469 | 472 | static DEFINE_PER_CPU(enum nmi_states, nmi_state); |
|---|
| 470 | 473 | static DEFINE_PER_CPU(unsigned long, nmi_cr2); |
|---|
| 474 | +static DEFINE_PER_CPU(unsigned long, nmi_dr7); |
|---|
| 471 | 475 | |
|---|
| 472 | | -#ifdef CONFIG_X86_64 |
|---|
| 473 | | -/* |
|---|
| 474 | | - * In x86_64, we need to handle breakpoint -> NMI -> breakpoint. Without |
|---|
| 475 | | - * some care, the inner breakpoint will clobber the outer breakpoint's |
|---|
| 476 | | - * stack. |
|---|
| 477 | | - * |
|---|
| 478 | | - * If a breakpoint is being processed, and the debug stack is being |
|---|
| 479 | | - * used, if an NMI comes in and also hits a breakpoint, the stack |
|---|
| 480 | | - * pointer will be set to the same fixed address as the breakpoint that |
|---|
| 481 | | - * was interrupted, causing that stack to be corrupted. To handle this |
|---|
| 482 | | - * case, check if the stack that was interrupted is the debug stack, and |
|---|
| 483 | | - * if so, change the IDT so that new breakpoints will use the current |
|---|
| 484 | | - * stack and not switch to the fixed address. On return of the NMI, |
|---|
| 485 | | - * switch back to the original IDT. |
|---|
| 486 | | - */ |
|---|
| 487 | | -static DEFINE_PER_CPU(int, update_debug_stack); |
|---|
| 488 | | -#endif |
|---|
| 489 | | - |
|---|
| 490 | | -dotraplinkage notrace void |
|---|
| 491 | | -do_nmi(struct pt_regs *regs, long error_code) |
|---|
| 476 | +DEFINE_IDTENTRY_RAW(exc_nmi) |
|---|
| 492 | 477 | { |
|---|
| 478 | + irqentry_state_t irq_state; |
|---|
| 479 | + |
|---|
| 480 | + /* |
|---|
| 481 | + * Re-enable NMIs right here when running as an SEV-ES guest. This might |
|---|
| 482 | + * cause nested NMIs, but those can be handled safely. |
|---|
| 483 | + */ |
|---|
| 484 | + sev_es_nmi_complete(); |
|---|
| 485 | + |
|---|
| 486 | + if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id())) |
|---|
| 487 | + return; |
|---|
| 488 | + |
|---|
| 493 | 489 | if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { |
|---|
| 494 | 490 | this_cpu_write(nmi_state, NMI_LATCHED); |
|---|
| 495 | 491 | return; |
|---|
| .. | .. |
|---|
| 498 | 494 | this_cpu_write(nmi_cr2, read_cr2()); |
|---|
| 499 | 495 | nmi_restart: |
|---|
| 500 | 496 | |
|---|
| 501 | | -#ifdef CONFIG_X86_64 |
|---|
| 502 | 497 | /* |
|---|
| 503 | | - * If we interrupted a breakpoint, it is possible that |
|---|
| 504 | | - * the nmi handler will have breakpoints too. We need to |
|---|
| 505 | | - * change the IDT such that breakpoints that happen here |
|---|
| 506 | | - * continue to use the NMI stack. |
|---|
| 498 | + * Needs to happen before DR7 is accessed, because the hypervisor can |
|---|
| 499 | + * intercept DR7 reads/writes, turning those into #VC exceptions. |
|---|
| 507 | 500 | */ |
|---|
| 508 | | - if (unlikely(is_debug_stack(regs->sp))) { |
|---|
| 509 | | - debug_stack_set_zero(); |
|---|
| 510 | | - this_cpu_write(update_debug_stack, 1); |
|---|
| 511 | | - } |
|---|
| 512 | | -#endif |
|---|
| 501 | + sev_es_ist_enter(regs); |
|---|
| 513 | 502 | |
|---|
| 514 | | - nmi_enter(); |
|---|
| 503 | + this_cpu_write(nmi_dr7, local_db_save()); |
|---|
| 504 | + |
|---|
| 505 | + irq_state = irqentry_nmi_enter(regs); |
|---|
| 515 | 506 | |
|---|
| 516 | 507 | inc_irq_stat(__nmi_count); |
|---|
| 517 | 508 | |
|---|
| 518 | 509 | if (!ignore_nmis) |
|---|
| 519 | 510 | default_do_nmi(regs); |
|---|
| 520 | 511 | |
|---|
| 521 | | - nmi_exit(); |
|---|
| 512 | + irqentry_nmi_exit(regs, irq_state); |
|---|
| 522 | 513 | |
|---|
| 523 | | -#ifdef CONFIG_X86_64 |
|---|
| 524 | | - if (unlikely(this_cpu_read(update_debug_stack))) { |
|---|
| 525 | | - debug_stack_reset(); |
|---|
| 526 | | - this_cpu_write(update_debug_stack, 0); |
|---|
| 527 | | - } |
|---|
| 528 | | -#endif |
|---|
| 514 | + local_db_restore(this_cpu_read(nmi_dr7)); |
|---|
| 515 | + |
|---|
| 516 | + sev_es_ist_exit(); |
|---|
| 529 | 517 | |
|---|
| 530 | 518 | if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) |
|---|
| 531 | 519 | write_cr2(this_cpu_read(nmi_cr2)); |
|---|
| .. | .. |
|---|
| 535 | 523 | if (user_mode(regs)) |
|---|
| 536 | 524 | mds_user_clear_cpu_buffers(); |
|---|
| 537 | 525 | } |
|---|
| 538 | | -NOKPROBE_SYMBOL(do_nmi); |
|---|
| 526 | + |
|---|
| 527 | +#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL) |
|---|
| 528 | +DEFINE_IDTENTRY_RAW(exc_nmi_noist) |
|---|
| 529 | +{ |
|---|
| 530 | + exc_nmi(regs); |
|---|
| 531 | +} |
|---|
| 532 | +#endif |
|---|
| 533 | +#if IS_MODULE(CONFIG_KVM_INTEL) |
|---|
| 534 | +EXPORT_SYMBOL_GPL(asm_exc_nmi_noist); |
|---|
| 535 | +#endif |
|---|
| 539 | 536 | |
|---|
| 540 | 537 | void stop_nmi(void) |
|---|
| 541 | 538 | { |
|---|