.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Copyright (C) 1991, 1992 Linus Torvalds |
---|
3 | 4 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs |
---|
.. | .. |
---|
21 | 22 | #include <linux/ratelimit.h> |
---|
22 | 23 | #include <linux/slab.h> |
---|
23 | 24 | #include <linux/export.h> |
---|
| 25 | +#include <linux/atomic.h> |
---|
24 | 26 | #include <linux/sched/clock.h> |
---|
25 | 27 | |
---|
26 | | -#if defined(CONFIG_EDAC) |
---|
27 | | -#include <linux/edac.h> |
---|
28 | | -#endif |
---|
29 | | - |
---|
30 | | -#include <linux/atomic.h> |
---|
| 28 | +#include <asm/cpu_entry_area.h> |
---|
31 | 29 | #include <asm/traps.h> |
---|
32 | 30 | #include <asm/mach_traps.h> |
---|
33 | 31 | #include <asm/nmi.h> |
---|
.. | .. |
---|
35 | 33 | #include <asm/reboot.h> |
---|
36 | 34 | #include <asm/cache.h> |
---|
37 | 35 | #include <asm/nospec-branch.h> |
---|
| 36 | +#include <asm/sev-es.h> |
---|
38 | 37 | |
---|
39 | 38 | #define CREATE_TRACE_POINTS |
---|
40 | 39 | #include <trace/events/nmi.h> |
---|
.. | .. |
---|
304 | 303 | static DEFINE_PER_CPU(bool, swallow_nmi); |
---|
305 | 304 | static DEFINE_PER_CPU(unsigned long, last_nmi_rip); |
---|
306 | 305 | |
---|
307 | | -static void default_do_nmi(struct pt_regs *regs) |
---|
| 306 | +static noinstr void default_do_nmi(struct pt_regs *regs) |
---|
308 | 307 | { |
---|
309 | 308 | unsigned char reason = 0; |
---|
310 | 309 | int handled; |
---|
.. | .. |
---|
330 | 329 | |
---|
331 | 330 | __this_cpu_write(last_nmi_rip, regs->ip); |
---|
332 | 331 | |
---|
| 332 | + instrumentation_begin(); |
---|
| 333 | + |
---|
333 | 334 | handled = nmi_handle(NMI_LOCAL, regs); |
---|
334 | 335 | __this_cpu_add(nmi_stats.normal, handled); |
---|
335 | 336 | if (handled) { |
---|
.. | .. |
---|
343 | 344 | */ |
---|
344 | 345 | if (handled > 1) |
---|
345 | 346 | __this_cpu_write(swallow_nmi, true); |
---|
346 | | - return; |
---|
| 347 | + goto out; |
---|
347 | 348 | } |
---|
348 | 349 | |
---|
349 | 350 | /* |
---|
.. | .. |
---|
375 | 376 | #endif |
---|
376 | 377 | __this_cpu_add(nmi_stats.external, 1); |
---|
377 | 378 | raw_spin_unlock(&nmi_reason_lock); |
---|
378 | | - return; |
---|
| 379 | + goto out; |
---|
379 | 380 | } |
---|
380 | 381 | raw_spin_unlock(&nmi_reason_lock); |
---|
381 | 382 | |
---|
.. | .. |
---|
400 | 401 | * a 'real' unknown NMI. For example, while processing |
---|
401 | 402 | * a perf NMI another perf NMI comes in along with a |
---|
402 | 403 | * 'real' unknown NMI. These two NMIs get combined into |
---|
403 | | - * one (as descibed above). When the next NMI gets |
---|
| 404 | + * one (as described above). When the next NMI gets |
---|
404 | 405 | * processed, it will be flagged by perf as handled, but |
---|
405 | | - * noone will know that there was a 'real' unknown NMI sent |
---|
| 406 | + * no one will know that there was a 'real' unknown NMI sent |
---|
406 | 407 | * also. As a result it gets swallowed. Or if the first |
---|
407 | 408 | * perf NMI returns two events handled then the second |
---|
408 | 409 | * NMI will get eaten by the logic below, again losing a |
---|
.. | .. |
---|
413 | 414 | __this_cpu_add(nmi_stats.swallow, 1); |
---|
414 | 415 | else |
---|
415 | 416 | unknown_nmi_error(reason, regs); |
---|
| 417 | + |
---|
| 418 | +out: |
---|
| 419 | + instrumentation_end(); |
---|
416 | 420 | } |
---|
417 | | -NOKPROBE_SYMBOL(default_do_nmi); |
---|
418 | 421 | |
---|
419 | 422 | /* |
---|
420 | 423 | * NMIs can page fault or hit breakpoints which will cause it to lose |
---|
.. | .. |
---|
468 | 471 | }; |
---|
469 | 472 | static DEFINE_PER_CPU(enum nmi_states, nmi_state); |
---|
470 | 473 | static DEFINE_PER_CPU(unsigned long, nmi_cr2); |
---|
| 474 | +static DEFINE_PER_CPU(unsigned long, nmi_dr7); |
---|
471 | 475 | |
---|
472 | | -#ifdef CONFIG_X86_64 |
---|
473 | | -/* |
---|
474 | | - * In x86_64, we need to handle breakpoint -> NMI -> breakpoint. Without |
---|
475 | | - * some care, the inner breakpoint will clobber the outer breakpoint's |
---|
476 | | - * stack. |
---|
477 | | - * |
---|
478 | | - * If a breakpoint is being processed, and the debug stack is being |
---|
479 | | - * used, if an NMI comes in and also hits a breakpoint, the stack |
---|
480 | | - * pointer will be set to the same fixed address as the breakpoint that |
---|
481 | | - * was interrupted, causing that stack to be corrupted. To handle this |
---|
482 | | - * case, check if the stack that was interrupted is the debug stack, and |
---|
483 | | - * if so, change the IDT so that new breakpoints will use the current |
---|
484 | | - * stack and not switch to the fixed address. On return of the NMI, |
---|
485 | | - * switch back to the original IDT. |
---|
486 | | - */ |
---|
487 | | -static DEFINE_PER_CPU(int, update_debug_stack); |
---|
488 | | -#endif |
---|
489 | | - |
---|
490 | | -dotraplinkage notrace void |
---|
491 | | -do_nmi(struct pt_regs *regs, long error_code) |
---|
| 476 | +DEFINE_IDTENTRY_RAW(exc_nmi) |
---|
492 | 477 | { |
---|
| 478 | + irqentry_state_t irq_state; |
---|
| 479 | + |
---|
| 480 | + /* |
---|
| 481 | + * Re-enable NMIs right here when running as an SEV-ES guest. This might |
---|
| 482 | + * cause nested NMIs, but those can be handled safely. |
---|
| 483 | + */ |
---|
| 484 | + sev_es_nmi_complete(); |
---|
| 485 | + |
---|
| 486 | + if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id())) |
---|
| 487 | + return; |
---|
| 488 | + |
---|
493 | 489 | if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { |
---|
494 | 490 | this_cpu_write(nmi_state, NMI_LATCHED); |
---|
495 | 491 | return; |
---|
.. | .. |
---|
498 | 494 | this_cpu_write(nmi_cr2, read_cr2()); |
---|
499 | 495 | nmi_restart: |
---|
500 | 496 | |
---|
501 | | -#ifdef CONFIG_X86_64 |
---|
502 | 497 | /* |
---|
503 | | - * If we interrupted a breakpoint, it is possible that |
---|
504 | | - * the nmi handler will have breakpoints too. We need to |
---|
505 | | - * change the IDT such that breakpoints that happen here |
---|
506 | | - * continue to use the NMI stack. |
---|
| 498 | + * Needs to happen before DR7 is accessed, because the hypervisor can |
---|
| 499 | + * intercept DR7 reads/writes, turning those into #VC exceptions. |
---|
507 | 500 | */ |
---|
508 | | - if (unlikely(is_debug_stack(regs->sp))) { |
---|
509 | | - debug_stack_set_zero(); |
---|
510 | | - this_cpu_write(update_debug_stack, 1); |
---|
511 | | - } |
---|
512 | | -#endif |
---|
| 501 | + sev_es_ist_enter(regs); |
---|
513 | 502 | |
---|
514 | | - nmi_enter(); |
---|
| 503 | + this_cpu_write(nmi_dr7, local_db_save()); |
---|
| 504 | + |
---|
| 505 | + irq_state = irqentry_nmi_enter(regs); |
---|
515 | 506 | |
---|
516 | 507 | inc_irq_stat(__nmi_count); |
---|
517 | 508 | |
---|
518 | 509 | if (!ignore_nmis) |
---|
519 | 510 | default_do_nmi(regs); |
---|
520 | 511 | |
---|
521 | | - nmi_exit(); |
---|
| 512 | + irqentry_nmi_exit(regs, irq_state); |
---|
522 | 513 | |
---|
523 | | -#ifdef CONFIG_X86_64 |
---|
524 | | - if (unlikely(this_cpu_read(update_debug_stack))) { |
---|
525 | | - debug_stack_reset(); |
---|
526 | | - this_cpu_write(update_debug_stack, 0); |
---|
527 | | - } |
---|
528 | | -#endif |
---|
| 514 | + local_db_restore(this_cpu_read(nmi_dr7)); |
---|
| 515 | + |
---|
| 516 | + sev_es_ist_exit(); |
---|
529 | 517 | |
---|
530 | 518 | if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) |
---|
531 | 519 | write_cr2(this_cpu_read(nmi_cr2)); |
---|
.. | .. |
---|
535 | 523 | if (user_mode(regs)) |
---|
536 | 524 | mds_user_clear_cpu_buffers(); |
---|
537 | 525 | } |
---|
538 | | -NOKPROBE_SYMBOL(do_nmi); |
---|
| 526 | + |
---|
| 527 | +#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL) |
---|
| 528 | +DEFINE_IDTENTRY_RAW(exc_nmi_noist) |
---|
| 529 | +{ |
---|
| 530 | + exc_nmi(regs); |
---|
| 531 | +} |
---|
| 532 | +#endif |
---|
| 533 | +#if IS_MODULE(CONFIG_KVM_INTEL) |
---|
| 534 | +EXPORT_SYMBOL_GPL(asm_exc_nmi_noist); |
---|
| 535 | +#endif |
---|
539 | 536 | |
---|
540 | 537 | void stop_nmi(void) |
---|
541 | 538 | { |
---|