| .. | .. |
|---|
| 23 | 23 | #include <linux/start_kernel.h> |
|---|
| 24 | 24 | #include <linux/sched.h> |
|---|
| 25 | 25 | #include <linux/kprobes.h> |
|---|
| 26 | | -#include <linux/bootmem.h> |
|---|
| 26 | +#include <linux/memblock.h> |
|---|
| 27 | 27 | #include <linux/export.h> |
|---|
| 28 | 28 | #include <linux/mm.h> |
|---|
| 29 | 29 | #include <linux/page-flags.h> |
|---|
| .. | .. |
|---|
| 31 | 31 | #include <linux/console.h> |
|---|
| 32 | 32 | #include <linux/pci.h> |
|---|
| 33 | 33 | #include <linux/gfp.h> |
|---|
| 34 | | -#include <linux/memblock.h> |
|---|
| 35 | 34 | #include <linux/edd.h> |
|---|
| 36 | | -#include <linux/frame.h> |
|---|
| 35 | +#include <linux/objtool.h> |
|---|
| 37 | 36 | |
|---|
| 38 | 37 | #include <xen/xen.h> |
|---|
| 39 | 38 | #include <xen/events.h> |
|---|
| .. | .. |
|---|
| 64 | 63 | #include <asm/setup.h> |
|---|
| 65 | 64 | #include <asm/desc.h> |
|---|
| 66 | 65 | #include <asm/pgalloc.h> |
|---|
| 67 | | -#include <asm/pgtable.h> |
|---|
| 68 | 66 | #include <asm/tlbflush.h> |
|---|
| 69 | 67 | #include <asm/reboot.h> |
|---|
| 70 | 68 | #include <asm/stackprotector.h> |
|---|
| .. | .. |
|---|
| 73 | 71 | #include <asm/mwait.h> |
|---|
| 74 | 72 | #include <asm/pci_x86.h> |
|---|
| 75 | 73 | #include <asm/cpu.h> |
|---|
| 74 | +#ifdef CONFIG_X86_IOPL_IOPERM |
|---|
| 75 | +#include <asm/io_bitmap.h> |
|---|
| 76 | +#endif |
|---|
| 76 | 77 | |
|---|
| 77 | 78 | #ifdef CONFIG_ACPI |
|---|
| 78 | 79 | #include <linux/acpi.h> |
|---|
| .. | .. |
|---|
| 344 | 345 | pte_t *ptep; |
|---|
| 345 | 346 | pte_t pte; |
|---|
| 346 | 347 | unsigned long pfn; |
|---|
| 347 | | - struct page *page; |
|---|
| 348 | 348 | unsigned char dummy; |
|---|
| 349 | + void *va; |
|---|
| 349 | 350 | |
|---|
| 350 | 351 | ptep = lookup_address((unsigned long)v, &level); |
|---|
| 351 | 352 | BUG_ON(ptep == NULL); |
|---|
| 352 | 353 | |
|---|
| 353 | 354 | pfn = pte_pfn(*ptep); |
|---|
| 354 | | - page = pfn_to_page(pfn); |
|---|
| 355 | | - |
|---|
| 356 | 355 | pte = pfn_pte(pfn, prot); |
|---|
| 357 | 356 | |
|---|
| 358 | 357 | /* |
|---|
| .. | .. |
|---|
| 377 | 376 | |
|---|
| 378 | 377 | preempt_disable(); |
|---|
| 379 | 378 | |
|---|
| 380 | | - probe_kernel_read(&dummy, v, 1); |
|---|
| 379 | + copy_from_kernel_nofault(&dummy, v, 1); |
|---|
| 381 | 380 | |
|---|
| 382 | 381 | if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) |
|---|
| 383 | 382 | BUG(); |
|---|
| 384 | 383 | |
|---|
| 385 | | - if (!PageHighMem(page)) { |
|---|
| 386 | | - void *av = __va(PFN_PHYS(pfn)); |
|---|
| 384 | + va = __va(PFN_PHYS(pfn)); |
|---|
| 387 | 385 | |
|---|
| 388 | | - if (av != v) |
|---|
| 389 | | - if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0)) |
|---|
| 390 | | - BUG(); |
|---|
| 391 | | - } else |
|---|
| 392 | | - kmap_flush_unused(); |
|---|
| 386 | + if (va != v && HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0)) |
|---|
| 387 | + BUG(); |
|---|
| 393 | 388 | |
|---|
| 394 | 389 | preempt_enable(); |
|---|
| 395 | 390 | } |
|---|
| .. | .. |
|---|
| 529 | 524 | static void xen_load_tls(struct thread_struct *t, unsigned int cpu) |
|---|
| 530 | 525 | { |
|---|
| 531 | 526 | /* |
|---|
| 532 | | - * XXX sleazy hack: If we're being called in a lazy-cpu zone |
|---|
| 533 | | - * and lazy gs handling is enabled, it means we're in a |
|---|
| 534 | | - * context switch, and %gs has just been saved. This means we |
|---|
| 535 | | - * can zero it out to prevent faults on exit from the |
|---|
| 536 | | - * hypervisor if the next process has no %gs. Either way, it |
|---|
| 537 | | - * has been saved, and the new value will get loaded properly. |
|---|
| 538 | | - * This will go away as soon as Xen has been modified to not |
|---|
| 539 | | - * save/restore %gs for normal hypercalls. |
|---|
| 540 | | - * |
|---|
| 541 | | - * On x86_64, this hack is not used for %gs, because gs points |
|---|
| 542 | | - * to KERNEL_GS_BASE (and uses it for PDA references), so we |
|---|
| 543 | | - * must not zero %gs on x86_64 |
|---|
| 544 | | - * |
|---|
| 545 | | - * For x86_64, we need to zero %fs, otherwise we may get an |
|---|
| 527 | + * In lazy mode we need to zero %fs, otherwise we may get an |
|---|
| 546 | 528 | * exception between the new %fs descriptor being loaded and |
|---|
| 547 | 529 | * %fs being effectively cleared at __switch_to(). |
|---|
| 548 | 530 | */ |
|---|
| 549 | | - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { |
|---|
| 550 | | -#ifdef CONFIG_X86_32 |
|---|
| 551 | | - lazy_load_gs(0); |
|---|
| 552 | | -#else |
|---|
| 531 | + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) |
|---|
| 553 | 532 | loadsegment(fs, 0); |
|---|
| 554 | | -#endif |
|---|
| 555 | | - } |
|---|
| 556 | 533 | |
|---|
| 557 | 534 | xen_mc_batch(); |
|---|
| 558 | 535 | |
|---|
| .. | .. |
|---|
| 563 | 540 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
|---|
| 564 | 541 | } |
|---|
| 565 | 542 | |
|---|
| 566 | | -#ifdef CONFIG_X86_64 |
|---|
| 567 | 543 | static void xen_load_gs_index(unsigned int idx) |
|---|
| 568 | 544 | { |
|---|
| 569 | 545 | if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx)) |
|---|
| 570 | 546 | BUG(); |
|---|
| 571 | 547 | } |
|---|
| 572 | | -#endif |
|---|
| 573 | 548 | |
|---|
| 574 | 549 | static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, |
|---|
| 575 | 550 | const void *ptr) |
|---|
| .. | .. |
|---|
| 588 | 563 | preempt_enable(); |
|---|
| 589 | 564 | } |
|---|
| 590 | 565 | |
|---|
| 591 | | -#ifdef CONFIG_X86_64 |
|---|
| 566 | +void noist_exc_debug(struct pt_regs *regs); |
|---|
| 567 | + |
|---|
| 568 | +DEFINE_IDTENTRY_RAW(xenpv_exc_nmi) |
|---|
| 569 | +{ |
|---|
| 570 | + /* On Xen PV, NMI doesn't use IST. The C part is the sane as native. */ |
|---|
| 571 | + exc_nmi(regs); |
|---|
| 572 | +} |
|---|
| 573 | + |
|---|
| 574 | +DEFINE_IDTENTRY_RAW(xenpv_exc_debug) |
|---|
| 575 | +{ |
|---|
| 576 | + /* |
|---|
| 577 | + * There's no IST on Xen PV, but we still need to dispatch |
|---|
| 578 | + * to the correct handler. |
|---|
| 579 | + */ |
|---|
| 580 | + if (user_mode(regs)) |
|---|
| 581 | + noist_exc_debug(regs); |
|---|
| 582 | + else |
|---|
| 583 | + exc_debug(regs); |
|---|
| 584 | +} |
|---|
| 585 | + |
|---|
| 586 | +DEFINE_IDTENTRY_RAW(exc_xen_unknown_trap) |
|---|
| 587 | +{ |
|---|
| 588 | + /* This should never happen and there is no way to handle it. */ |
|---|
| 589 | + instrumentation_begin(); |
|---|
| 590 | + pr_err("Unknown trap in Xen PV mode."); |
|---|
| 591 | + BUG(); |
|---|
| 592 | + instrumentation_end(); |
|---|
| 593 | +} |
|---|
| 594 | + |
|---|
| 592 | 595 | struct trap_array_entry { |
|---|
| 593 | 596 | void (*orig)(void); |
|---|
| 594 | 597 | void (*xen)(void); |
|---|
| 595 | 598 | bool ist_okay; |
|---|
| 596 | 599 | }; |
|---|
| 597 | 600 | |
|---|
| 601 | +#define TRAP_ENTRY(func, ist_ok) { \ |
|---|
| 602 | + .orig = asm_##func, \ |
|---|
| 603 | + .xen = xen_asm_##func, \ |
|---|
| 604 | + .ist_okay = ist_ok } |
|---|
| 605 | + |
|---|
| 606 | +#define TRAP_ENTRY_REDIR(func, ist_ok) { \ |
|---|
| 607 | + .orig = asm_##func, \ |
|---|
| 608 | + .xen = xen_asm_xenpv_##func, \ |
|---|
| 609 | + .ist_okay = ist_ok } |
|---|
| 610 | + |
|---|
| 598 | 611 | static struct trap_array_entry trap_array[] = { |
|---|
| 599 | | - { debug, xen_xendebug, true }, |
|---|
| 600 | | - { double_fault, xen_double_fault, true }, |
|---|
| 612 | + TRAP_ENTRY_REDIR(exc_debug, true ), |
|---|
| 613 | + TRAP_ENTRY(exc_double_fault, true ), |
|---|
| 601 | 614 | #ifdef CONFIG_X86_MCE |
|---|
| 602 | | - { machine_check, xen_machine_check, true }, |
|---|
| 615 | + TRAP_ENTRY(exc_machine_check, true ), |
|---|
| 603 | 616 | #endif |
|---|
| 604 | | - { nmi, xen_xennmi, true }, |
|---|
| 605 | | - { int3, xen_int3, false }, |
|---|
| 606 | | - { overflow, xen_overflow, false }, |
|---|
| 617 | + TRAP_ENTRY_REDIR(exc_nmi, true ), |
|---|
| 618 | + TRAP_ENTRY(exc_int3, false ), |
|---|
| 619 | + TRAP_ENTRY(exc_overflow, false ), |
|---|
| 607 | 620 | #ifdef CONFIG_IA32_EMULATION |
|---|
| 608 | 621 | { entry_INT80_compat, xen_entry_INT80_compat, false }, |
|---|
| 609 | 622 | #endif |
|---|
| 610 | | - { page_fault, xen_page_fault, false }, |
|---|
| 611 | | - { divide_error, xen_divide_error, false }, |
|---|
| 612 | | - { bounds, xen_bounds, false }, |
|---|
| 613 | | - { invalid_op, xen_invalid_op, false }, |
|---|
| 614 | | - { device_not_available, xen_device_not_available, false }, |
|---|
| 615 | | - { coprocessor_segment_overrun, xen_coprocessor_segment_overrun, false }, |
|---|
| 616 | | - { invalid_TSS, xen_invalid_TSS, false }, |
|---|
| 617 | | - { segment_not_present, xen_segment_not_present, false }, |
|---|
| 618 | | - { stack_segment, xen_stack_segment, false }, |
|---|
| 619 | | - { general_protection, xen_general_protection, false }, |
|---|
| 620 | | - { spurious_interrupt_bug, xen_spurious_interrupt_bug, false }, |
|---|
| 621 | | - { coprocessor_error, xen_coprocessor_error, false }, |
|---|
| 622 | | - { alignment_check, xen_alignment_check, false }, |
|---|
| 623 | | - { simd_coprocessor_error, xen_simd_coprocessor_error, false }, |
|---|
| 623 | + TRAP_ENTRY(exc_page_fault, false ), |
|---|
| 624 | + TRAP_ENTRY(exc_divide_error, false ), |
|---|
| 625 | + TRAP_ENTRY(exc_bounds, false ), |
|---|
| 626 | + TRAP_ENTRY(exc_invalid_op, false ), |
|---|
| 627 | + TRAP_ENTRY(exc_device_not_available, false ), |
|---|
| 628 | + TRAP_ENTRY(exc_coproc_segment_overrun, false ), |
|---|
| 629 | + TRAP_ENTRY(exc_invalid_tss, false ), |
|---|
| 630 | + TRAP_ENTRY(exc_segment_not_present, false ), |
|---|
| 631 | + TRAP_ENTRY(exc_stack_segment, false ), |
|---|
| 632 | + TRAP_ENTRY(exc_general_protection, false ), |
|---|
| 633 | + TRAP_ENTRY(exc_spurious_interrupt_bug, false ), |
|---|
| 634 | + TRAP_ENTRY(exc_coprocessor_error, false ), |
|---|
| 635 | + TRAP_ENTRY(exc_alignment_check, false ), |
|---|
| 636 | + TRAP_ENTRY(exc_simd_coprocessor_error, false ), |
|---|
| 624 | 637 | }; |
|---|
| 625 | 638 | |
|---|
| 626 | 639 | static bool __ref get_trap_addr(void **addr, unsigned int ist) |
|---|
| 627 | 640 | { |
|---|
| 628 | 641 | unsigned int nr; |
|---|
| 629 | 642 | bool ist_okay = false; |
|---|
| 643 | + bool found = false; |
|---|
| 630 | 644 | |
|---|
| 631 | 645 | /* |
|---|
| 632 | 646 | * Replace trap handler addresses by Xen specific ones. |
|---|
| 633 | 647 | * Check for known traps using IST and whitelist them. |
|---|
| 634 | 648 | * The debugger ones are the only ones we care about. |
|---|
| 635 | | - * Xen will handle faults like double_fault, * so we should never see |
|---|
| 649 | + * Xen will handle faults like double_fault, so we should never see |
|---|
| 636 | 650 | * them. Warn if there's an unexpected IST-using fault handler. |
|---|
| 637 | 651 | */ |
|---|
| 638 | 652 | for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++) { |
|---|
| .. | .. |
|---|
| 641 | 655 | if (*addr == entry->orig) { |
|---|
| 642 | 656 | *addr = entry->xen; |
|---|
| 643 | 657 | ist_okay = entry->ist_okay; |
|---|
| 658 | + found = true; |
|---|
| 644 | 659 | break; |
|---|
| 645 | 660 | } |
|---|
| 646 | 661 | } |
|---|
| .. | .. |
|---|
| 651 | 666 | nr = (*addr - (void *)early_idt_handler_array[0]) / |
|---|
| 652 | 667 | EARLY_IDT_HANDLER_SIZE; |
|---|
| 653 | 668 | *addr = (void *)xen_early_idt_handler_array[nr]; |
|---|
| 669 | + found = true; |
|---|
| 654 | 670 | } |
|---|
| 655 | 671 | |
|---|
| 656 | | - if (WARN_ON(ist != 0 && !ist_okay)) |
|---|
| 672 | + if (!found) |
|---|
| 673 | + *addr = (void *)xen_asm_exc_xen_unknown_trap; |
|---|
| 674 | + |
|---|
| 675 | + if (WARN_ON(found && ist != 0 && !ist_okay)) |
|---|
| 657 | 676 | return false; |
|---|
| 658 | 677 | |
|---|
| 659 | 678 | return true; |
|---|
| 660 | 679 | } |
|---|
| 661 | | -#endif |
|---|
| 662 | 680 | |
|---|
| 663 | 681 | static int cvt_gate_to_trap(int vector, const gate_desc *val, |
|---|
| 664 | 682 | struct trap_info *info) |
|---|
| .. | .. |
|---|
| 671 | 689 | info->vector = vector; |
|---|
| 672 | 690 | |
|---|
| 673 | 691 | addr = gate_offset(val); |
|---|
| 674 | | -#ifdef CONFIG_X86_64 |
|---|
| 675 | 692 | if (!get_trap_addr((void **)&addr, val->bits.ist)) |
|---|
| 676 | 693 | return 0; |
|---|
| 677 | | -#endif /* CONFIG_X86_64 */ |
|---|
| 678 | 694 | info->address = addr; |
|---|
| 679 | 695 | |
|---|
| 680 | 696 | info->cs = gate_segment(val); |
|---|
| .. | .. |
|---|
| 752 | 768 | { |
|---|
| 753 | 769 | static DEFINE_SPINLOCK(lock); |
|---|
| 754 | 770 | static struct trap_info traps[257]; |
|---|
| 771 | + static const struct trap_info zero = { }; |
|---|
| 755 | 772 | unsigned out; |
|---|
| 756 | 773 | |
|---|
| 757 | 774 | trace_xen_cpu_load_idt(desc); |
|---|
| .. | .. |
|---|
| 761 | 778 | memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc)); |
|---|
| 762 | 779 | |
|---|
| 763 | 780 | out = xen_convert_trap_info(desc, traps, false); |
|---|
| 764 | | - memset(&traps[out], 0, sizeof(traps[0])); |
|---|
| 781 | + traps[out] = zero; |
|---|
| 765 | 782 | |
|---|
| 766 | 783 | xen_mc_flush(); |
|---|
| 767 | 784 | if (HYPERVISOR_set_trap_table(traps)) |
|---|
| .. | .. |
|---|
| 833 | 850 | this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); |
|---|
| 834 | 851 | } |
|---|
| 835 | 852 | |
|---|
| 836 | | -void xen_set_iopl_mask(unsigned mask) |
|---|
| 853 | +#ifdef CONFIG_X86_IOPL_IOPERM |
|---|
| 854 | +static void xen_invalidate_io_bitmap(void) |
|---|
| 837 | 855 | { |
|---|
| 838 | | - struct physdev_set_iopl set_iopl; |
|---|
| 856 | + struct physdev_set_iobitmap iobitmap = { |
|---|
| 857 | + .bitmap = NULL, |
|---|
| 858 | + .nr_ports = 0, |
|---|
| 859 | + }; |
|---|
| 839 | 860 | |
|---|
| 840 | | - /* Force the change at ring 0. */ |
|---|
| 841 | | - set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; |
|---|
| 842 | | - HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); |
|---|
| 861 | + native_tss_invalidate_io_bitmap(); |
|---|
| 862 | + HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap); |
|---|
| 843 | 863 | } |
|---|
| 864 | + |
|---|
| 865 | +static void xen_update_io_bitmap(void) |
|---|
| 866 | +{ |
|---|
| 867 | + struct physdev_set_iobitmap iobitmap; |
|---|
| 868 | + struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); |
|---|
| 869 | + |
|---|
| 870 | + native_tss_update_io_bitmap(); |
|---|
| 871 | + |
|---|
| 872 | + iobitmap.bitmap = (uint8_t *)(&tss->x86_tss) + |
|---|
| 873 | + tss->x86_tss.io_bitmap_base; |
|---|
| 874 | + if (tss->x86_tss.io_bitmap_base == IO_BITMAP_OFFSET_INVALID) |
|---|
| 875 | + iobitmap.nr_ports = 0; |
|---|
| 876 | + else |
|---|
| 877 | + iobitmap.nr_ports = IO_BITMAP_BITS; |
|---|
| 878 | + |
|---|
| 879 | + HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap); |
|---|
| 880 | +} |
|---|
| 881 | +#endif |
|---|
| 844 | 882 | |
|---|
| 845 | 883 | static void xen_io_delay(void) |
|---|
| 846 | 884 | { |
|---|
| .. | .. |
|---|
| 881 | 919 | |
|---|
| 882 | 920 | native_write_cr4(cr4); |
|---|
| 883 | 921 | } |
|---|
| 884 | | -#ifdef CONFIG_X86_64 |
|---|
| 885 | | -static inline unsigned long xen_read_cr8(void) |
|---|
| 886 | | -{ |
|---|
| 887 | | - return 0; |
|---|
| 888 | | -} |
|---|
| 889 | | -static inline void xen_write_cr8(unsigned long val) |
|---|
| 890 | | -{ |
|---|
| 891 | | - BUG_ON(val); |
|---|
| 892 | | -} |
|---|
| 893 | | -#endif |
|---|
| 894 | 922 | |
|---|
| 895 | 923 | static u64 xen_read_msr_safe(unsigned int msr, int *err) |
|---|
| 896 | 924 | { |
|---|
| .. | .. |
|---|
| 911 | 939 | static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) |
|---|
| 912 | 940 | { |
|---|
| 913 | 941 | int ret; |
|---|
| 914 | | -#ifdef CONFIG_X86_64 |
|---|
| 915 | 942 | unsigned int which; |
|---|
| 916 | 943 | u64 base; |
|---|
| 917 | | -#endif |
|---|
| 918 | 944 | |
|---|
| 919 | 945 | ret = 0; |
|---|
| 920 | 946 | |
|---|
| 921 | 947 | switch (msr) { |
|---|
| 922 | | -#ifdef CONFIG_X86_64 |
|---|
| 923 | 948 | case MSR_FS_BASE: which = SEGBASE_FS; goto set; |
|---|
| 924 | 949 | case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; |
|---|
| 925 | 950 | case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; |
|---|
| .. | .. |
|---|
| 929 | 954 | if (HYPERVISOR_set_segment_base(which, base) != 0) |
|---|
| 930 | 955 | ret = -EIO; |
|---|
| 931 | 956 | break; |
|---|
| 932 | | -#endif |
|---|
| 933 | 957 | |
|---|
| 934 | 958 | case MSR_STAR: |
|---|
| 935 | 959 | case MSR_CSTAR: |
|---|
| .. | .. |
|---|
| 996 | 1020 | * percpu area for all cpus, so make use of it. |
|---|
| 997 | 1021 | */ |
|---|
| 998 | 1022 | if (xen_have_vcpu_info_placement) { |
|---|
| 999 | | - pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); |
|---|
| 1000 | | - pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); |
|---|
| 1001 | | - pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); |
|---|
| 1002 | | - pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); |
|---|
| 1003 | | - pv_mmu_ops.read_cr2 = xen_read_cr2_direct; |
|---|
| 1023 | + pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); |
|---|
| 1024 | + pv_ops.irq.restore_fl = |
|---|
| 1025 | + __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); |
|---|
| 1026 | + pv_ops.irq.irq_disable = |
|---|
| 1027 | + __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); |
|---|
| 1028 | + pv_ops.irq.irq_enable = |
|---|
| 1029 | + __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); |
|---|
| 1030 | + pv_ops.mmu.read_cr2 = |
|---|
| 1031 | + __PV_IS_CALLEE_SAVE(xen_read_cr2_direct); |
|---|
| 1004 | 1032 | } |
|---|
| 1005 | 1033 | } |
|---|
| 1006 | 1034 | |
|---|
| 1007 | 1035 | static const struct pv_info xen_info __initconst = { |
|---|
| 1008 | | - .shared_kernel_pmd = 0, |
|---|
| 1009 | | - |
|---|
| 1010 | | -#ifdef CONFIG_X86_64 |
|---|
| 1011 | 1036 | .extra_user_64bit_cs = FLAT_USER_CS64, |
|---|
| 1012 | | -#endif |
|---|
| 1013 | 1037 | .name = "Xen", |
|---|
| 1014 | 1038 | }; |
|---|
| 1015 | 1039 | |
|---|
| .. | .. |
|---|
| 1024 | 1048 | |
|---|
| 1025 | 1049 | .write_cr4 = xen_write_cr4, |
|---|
| 1026 | 1050 | |
|---|
| 1027 | | -#ifdef CONFIG_X86_64 |
|---|
| 1028 | | - .read_cr8 = xen_read_cr8, |
|---|
| 1029 | | - .write_cr8 = xen_write_cr8, |
|---|
| 1030 | | -#endif |
|---|
| 1031 | | - |
|---|
| 1032 | 1051 | .wbinvd = native_wbinvd, |
|---|
| 1033 | 1052 | |
|---|
| 1034 | 1053 | .read_msr = xen_read_msr, |
|---|
| .. | .. |
|---|
| 1040 | 1059 | .read_pmc = xen_read_pmc, |
|---|
| 1041 | 1060 | |
|---|
| 1042 | 1061 | .iret = xen_iret, |
|---|
| 1043 | | -#ifdef CONFIG_X86_64 |
|---|
| 1044 | 1062 | .usergs_sysret64 = xen_sysret64, |
|---|
| 1045 | | -#endif |
|---|
| 1046 | 1063 | |
|---|
| 1047 | 1064 | .load_tr_desc = paravirt_nop, |
|---|
| 1048 | 1065 | .set_ldt = xen_set_ldt, |
|---|
| 1049 | 1066 | .load_gdt = xen_load_gdt, |
|---|
| 1050 | 1067 | .load_idt = xen_load_idt, |
|---|
| 1051 | 1068 | .load_tls = xen_load_tls, |
|---|
| 1052 | | -#ifdef CONFIG_X86_64 |
|---|
| 1053 | 1069 | .load_gs_index = xen_load_gs_index, |
|---|
| 1054 | | -#endif |
|---|
| 1055 | 1070 | |
|---|
| 1056 | 1071 | .alloc_ldt = xen_alloc_ldt, |
|---|
| 1057 | 1072 | .free_ldt = xen_free_ldt, |
|---|
| .. | .. |
|---|
| 1063 | 1078 | .write_idt_entry = xen_write_idt_entry, |
|---|
| 1064 | 1079 | .load_sp0 = xen_load_sp0, |
|---|
| 1065 | 1080 | |
|---|
| 1066 | | - .set_iopl_mask = xen_set_iopl_mask, |
|---|
| 1081 | +#ifdef CONFIG_X86_IOPL_IOPERM |
|---|
| 1082 | + .invalidate_io_bitmap = xen_invalidate_io_bitmap, |
|---|
| 1083 | + .update_io_bitmap = xen_update_io_bitmap, |
|---|
| 1084 | +#endif |
|---|
| 1067 | 1085 | .io_delay = xen_io_delay, |
|---|
| 1068 | | - |
|---|
| 1069 | | - /* Xen takes care of %gs when switching to usermode for us */ |
|---|
| 1070 | | - .swapgs = paravirt_nop, |
|---|
| 1071 | 1086 | |
|---|
| 1072 | 1087 | .start_context_switch = paravirt_start_context_switch, |
|---|
| 1073 | 1088 | .end_context_switch = xen_end_context_switch, |
|---|
| .. | .. |
|---|
| 1175 | 1190 | */ |
|---|
| 1176 | 1191 | static void __init xen_setup_gdt(int cpu) |
|---|
| 1177 | 1192 | { |
|---|
| 1178 | | - pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; |
|---|
| 1179 | | - pv_cpu_ops.load_gdt = xen_load_gdt_boot; |
|---|
| 1193 | + pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot; |
|---|
| 1194 | + pv_ops.cpu.load_gdt = xen_load_gdt_boot; |
|---|
| 1180 | 1195 | |
|---|
| 1181 | 1196 | setup_stack_canary_segment(cpu); |
|---|
| 1182 | 1197 | switch_to_new_gdt(cpu); |
|---|
| 1183 | 1198 | |
|---|
| 1184 | | - pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry; |
|---|
| 1185 | | - pv_cpu_ops.load_gdt = xen_load_gdt; |
|---|
| 1199 | + pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry; |
|---|
| 1200 | + pv_ops.cpu.load_gdt = xen_load_gdt; |
|---|
| 1186 | 1201 | } |
|---|
| 1187 | 1202 | |
|---|
| 1188 | 1203 | static void __init xen_dom0_set_legacy_features(void) |
|---|
| .. | .. |
|---|
| 1212 | 1227 | |
|---|
| 1213 | 1228 | /* Install Xen paravirt ops */ |
|---|
| 1214 | 1229 | pv_info = xen_info; |
|---|
| 1215 | | - pv_init_ops.patch = paravirt_patch_default; |
|---|
| 1216 | | - pv_cpu_ops = xen_cpu_ops; |
|---|
| 1230 | + pv_ops.init.patch = paravirt_patch_default; |
|---|
| 1231 | + pv_ops.cpu = xen_cpu_ops; |
|---|
| 1217 | 1232 | xen_init_irq_ops(); |
|---|
| 1218 | 1233 | |
|---|
| 1219 | 1234 | /* |
|---|
| .. | .. |
|---|
| 1228 | 1243 | x86_platform.get_nmi_reason = xen_get_nmi_reason; |
|---|
| 1229 | 1244 | |
|---|
| 1230 | 1245 | x86_init.resources.memory_setup = xen_memory_setup; |
|---|
| 1246 | + x86_init.irqs.intr_mode_select = x86_init_noop; |
|---|
| 1231 | 1247 | x86_init.irqs.intr_mode_init = x86_init_noop; |
|---|
| 1232 | 1248 | x86_init.oem.arch_setup = xen_arch_setup; |
|---|
| 1233 | 1249 | x86_init.oem.banner = xen_banner; |
|---|
| .. | .. |
|---|
| 1254 | 1270 | /* Get mfn list */ |
|---|
| 1255 | 1271 | xen_build_dynamic_phys_to_machine(); |
|---|
| 1256 | 1272 | |
|---|
| 1273 | + /* Work out if we support NX */ |
|---|
| 1274 | + get_cpu_cap(&boot_cpu_data); |
|---|
| 1275 | + x86_configure_nx(); |
|---|
| 1276 | + |
|---|
| 1257 | 1277 | /* |
|---|
| 1258 | 1278 | * Set up kernel GDT and segment registers, mainly so that |
|---|
| 1259 | 1279 | * -fstack-protector code can be executed. |
|---|
| 1260 | 1280 | */ |
|---|
| 1261 | 1281 | xen_setup_gdt(0); |
|---|
| 1262 | | - |
|---|
| 1263 | | - /* Work out if we support NX */ |
|---|
| 1264 | | - get_cpu_cap(&boot_cpu_data); |
|---|
| 1265 | | - x86_configure_nx(); |
|---|
| 1266 | 1282 | |
|---|
| 1267 | 1283 | /* Determine virtual and physical address sizes */ |
|---|
| 1268 | 1284 | get_cpu_address_sizes(&boot_cpu_data); |
|---|
| .. | .. |
|---|
| 1282 | 1298 | #endif |
|---|
| 1283 | 1299 | |
|---|
| 1284 | 1300 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { |
|---|
| 1285 | | - pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; |
|---|
| 1286 | | - pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; |
|---|
| 1301 | + pv_ops.mmu.ptep_modify_prot_start = |
|---|
| 1302 | + xen_ptep_modify_prot_start; |
|---|
| 1303 | + pv_ops.mmu.ptep_modify_prot_commit = |
|---|
| 1304 | + xen_ptep_modify_prot_commit; |
|---|
| 1287 | 1305 | } |
|---|
| 1288 | 1306 | |
|---|
| 1289 | 1307 | machine_ops = xen_machine_ops; |
|---|
| .. | .. |
|---|
| 1303 | 1321 | * any NUMA information the kernel tries to get from ACPI will |
|---|
| 1304 | 1322 | * be meaningless. Prevent it from trying. |
|---|
| 1305 | 1323 | */ |
|---|
| 1306 | | - acpi_numa = -1; |
|---|
| 1324 | + disable_srat(); |
|---|
| 1307 | 1325 | #endif |
|---|
| 1308 | 1326 | WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv)); |
|---|
| 1309 | 1327 | |
|---|
| .. | .. |
|---|
| 1315 | 1333 | xen_start_info->nr_pages); |
|---|
| 1316 | 1334 | xen_reserve_special_pages(); |
|---|
| 1317 | 1335 | |
|---|
| 1318 | | - /* keep using Xen gdt for now; no urgent need to change it */ |
|---|
| 1319 | | - |
|---|
| 1320 | | -#ifdef CONFIG_X86_32 |
|---|
| 1321 | | - pv_info.kernel_rpl = 1; |
|---|
| 1322 | | - if (xen_feature(XENFEAT_supervisor_mode_kernel)) |
|---|
| 1323 | | - pv_info.kernel_rpl = 0; |
|---|
| 1324 | | -#else |
|---|
| 1325 | | - pv_info.kernel_rpl = 0; |
|---|
| 1326 | | -#endif |
|---|
| 1327 | | - /* set the limit of our address space */ |
|---|
| 1328 | | - xen_reserve_top(); |
|---|
| 1329 | | - |
|---|
| 1330 | 1336 | /* |
|---|
| 1331 | 1337 | * We used to do this in xen_arch_setup, but that is too late |
|---|
| 1332 | 1338 | * on AMD were early_cpu_init (run before ->arch_setup()) calls |
|---|
| .. | .. |
|---|
| 1337 | 1343 | if (rc != 0) |
|---|
| 1338 | 1344 | xen_raw_printk("physdev_op failed %d\n", rc); |
|---|
| 1339 | 1345 | |
|---|
| 1340 | | -#ifdef CONFIG_X86_32 |
|---|
| 1341 | | - /* set up basic CPUID stuff */ |
|---|
| 1342 | | - cpu_detect(&new_cpu_data); |
|---|
| 1343 | | - set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU); |
|---|
| 1344 | | - new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1); |
|---|
| 1345 | | -#endif |
|---|
| 1346 | 1346 | |
|---|
| 1347 | 1347 | if (xen_start_info->mod_start) { |
|---|
| 1348 | 1348 | if (xen_start_info->flags & SIF_MOD_START_PFN) |
|---|
| .. | .. |
|---|
| 1388 | 1388 | |
|---|
| 1389 | 1389 | xen_acpi_sleep_register(); |
|---|
| 1390 | 1390 | |
|---|
| 1391 | | - /* Avoid searching for BIOS MP tables */ |
|---|
| 1392 | | - x86_init.mpparse.find_smp_config = x86_init_noop; |
|---|
| 1393 | | - x86_init.mpparse.get_smp_config = x86_init_uint_noop; |
|---|
| 1394 | | - |
|---|
| 1395 | 1391 | xen_boot_params_init_edd(); |
|---|
| 1396 | 1392 | |
|---|
| 1397 | 1393 | #ifdef CONFIG_ACPI |
|---|
| .. | .. |
|---|
| 1422 | 1418 | xen_efi_init(&boot_params); |
|---|
| 1423 | 1419 | |
|---|
| 1424 | 1420 | /* Start the world */ |
|---|
| 1425 | | -#ifdef CONFIG_X86_32 |
|---|
| 1426 | | - i386_start_kernel(); |
|---|
| 1427 | | -#else |
|---|
| 1428 | 1421 | cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */ |
|---|
| 1429 | 1422 | x86_64_start_reservations((char *)__pa_symbol(&boot_params)); |
|---|
| 1430 | | -#endif |
|---|
| 1431 | 1423 | } |
|---|
| 1432 | 1424 | |
|---|
| 1433 | 1425 | static int xen_cpu_up_prepare_pv(unsigned int cpu) |
|---|
| .. | .. |
|---|
| 1479 | 1471 | .detect = xen_platform_pv, |
|---|
| 1480 | 1472 | .type = X86_HYPER_XEN_PV, |
|---|
| 1481 | 1473 | .runtime.pin_vcpu = xen_pin_vcpu, |
|---|
| 1474 | + .ignore_nopv = true, |
|---|
| 1482 | 1475 | }; |
|---|