.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
---|
| 2 | + |
---|
1 | 3 | /* |
---|
2 | 4 | * Xen mmu operations |
---|
3 | 5 | * |
---|
.. | .. |
---|
49 | 51 | #include <linux/memblock.h> |
---|
50 | 52 | #include <linux/seq_file.h> |
---|
51 | 53 | #include <linux/crash_dump.h> |
---|
| 54 | +#include <linux/pgtable.h> |
---|
52 | 55 | #ifdef CONFIG_KEXEC_CORE |
---|
53 | 56 | #include <linux/kexec.h> |
---|
54 | 57 | #endif |
---|
55 | 58 | |
---|
56 | 59 | #include <trace/events/xen.h> |
---|
57 | 60 | |
---|
58 | | -#include <asm/pgtable.h> |
---|
59 | 61 | #include <asm/tlbflush.h> |
---|
60 | 62 | #include <asm/fixmap.h> |
---|
61 | 63 | #include <asm/mmu_context.h> |
---|
.. | .. |
---|
65 | 67 | #include <asm/linkage.h> |
---|
66 | 68 | #include <asm/page.h> |
---|
67 | 69 | #include <asm/init.h> |
---|
68 | | -#include <asm/pat.h> |
---|
| 70 | +#include <asm/memtype.h> |
---|
69 | 71 | #include <asm/smp.h> |
---|
70 | 72 | #include <asm/tlb.h> |
---|
71 | 73 | |
---|
.. | .. |
---|
84 | 86 | #include "mmu.h" |
---|
85 | 87 | #include "debugfs.h" |
---|
86 | 88 | |
---|
87 | | -#ifdef CONFIG_X86_32 |
---|
88 | | -/* |
---|
89 | | - * Identity map, in addition to plain kernel map. This needs to be |
---|
90 | | - * large enough to allocate page table pages to allocate the rest. |
---|
91 | | - * Each page can map 2MB. |
---|
92 | | - */ |
---|
93 | | -#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) |
---|
94 | | -static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); |
---|
95 | | -#endif |
---|
96 | | -#ifdef CONFIG_X86_64 |
---|
97 | 89 | /* l3 pud for userspace vsyscall mapping */ |
---|
98 | 90 | static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; |
---|
99 | | -#endif /* CONFIG_X86_64 */ |
---|
| 91 | + |
---|
| 92 | +/* |
---|
| 93 | + * Protects atomic reservation decrease/increase against concurrent increases. |
---|
| 94 | + * Also protects non-atomic updates of current_pages and balloon lists. |
---|
| 95 | + */ |
---|
| 96 | +static DEFINE_SPINLOCK(xen_reservation_lock); |
---|
100 | 97 | |
---|
101 | 98 | /* |
---|
102 | 99 | * Note about cr3 (pagetable base) values: |
---|
.. | .. |
---|
272 | 269 | if (!xen_batched_set_pte(ptep, pteval)) { |
---|
273 | 270 | /* |
---|
274 | 271 | * Could call native_set_pte() here and trap and |
---|
275 | | - * emulate the PTE write but with 32-bit guests this |
---|
276 | | - * needs two traps (one for each of the two 32-bit |
---|
277 | | - * words in the PTE) so do one hypercall directly |
---|
278 | | - * instead. |
---|
| 272 | + * emulate the PTE write, but a hypercall is much cheaper. |
---|
279 | 273 | */ |
---|
280 | 274 | struct mmu_update u; |
---|
281 | 275 | |
---|
.. | .. |
---|
291 | 285 | __xen_set_pte(ptep, pteval); |
---|
292 | 286 | } |
---|
293 | 287 | |
---|
294 | | -static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
---|
295 | | - pte_t *ptep, pte_t pteval) |
---|
296 | | -{ |
---|
297 | | - trace_xen_mmu_set_pte_at(mm, addr, ptep, pteval); |
---|
298 | | - __xen_set_pte(ptep, pteval); |
---|
299 | | -} |
---|
300 | | - |
---|
301 | | -pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, |
---|
| 288 | +pte_t xen_ptep_modify_prot_start(struct vm_area_struct *vma, |
---|
302 | 289 | unsigned long addr, pte_t *ptep) |
---|
303 | 290 | { |
---|
304 | 291 | /* Just return the pte as-is. We preserve the bits on commit */ |
---|
305 | | - trace_xen_mmu_ptep_modify_prot_start(mm, addr, ptep, *ptep); |
---|
| 292 | + trace_xen_mmu_ptep_modify_prot_start(vma->vm_mm, addr, ptep, *ptep); |
---|
306 | 293 | return *ptep; |
---|
307 | 294 | } |
---|
308 | 295 | |
---|
309 | | -void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, |
---|
| 296 | +void xen_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, |
---|
310 | 297 | pte_t *ptep, pte_t pte) |
---|
311 | 298 | { |
---|
312 | 299 | struct mmu_update u; |
---|
313 | 300 | |
---|
314 | | - trace_xen_mmu_ptep_modify_prot_commit(mm, addr, ptep, pte); |
---|
| 301 | + trace_xen_mmu_ptep_modify_prot_commit(vma->vm_mm, addr, ptep, pte); |
---|
315 | 302 | xen_mc_batch(); |
---|
316 | 303 | |
---|
317 | 304 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; |
---|
.. | .. |
---|
431 | 418 | xen_set_pud_hyper(ptr, val); |
---|
432 | 419 | } |
---|
433 | 420 | |
---|
434 | | -#ifdef CONFIG_X86_PAE |
---|
435 | | -static void xen_set_pte_atomic(pte_t *ptep, pte_t pte) |
---|
436 | | -{ |
---|
437 | | - trace_xen_mmu_set_pte_atomic(ptep, pte); |
---|
438 | | - __xen_set_pte(ptep, pte); |
---|
439 | | -} |
---|
440 | | - |
---|
441 | | -static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
---|
442 | | -{ |
---|
443 | | - trace_xen_mmu_pte_clear(mm, addr, ptep); |
---|
444 | | - __xen_set_pte(ptep, native_make_pte(0)); |
---|
445 | | -} |
---|
446 | | - |
---|
447 | | -static void xen_pmd_clear(pmd_t *pmdp) |
---|
448 | | -{ |
---|
449 | | - trace_xen_mmu_pmd_clear(pmdp); |
---|
450 | | - set_pmd(pmdp, __pmd(0)); |
---|
451 | | -} |
---|
452 | | -#endif /* CONFIG_X86_PAE */ |
---|
453 | | - |
---|
454 | 421 | __visible pmd_t xen_make_pmd(pmdval_t pmd) |
---|
455 | 422 | { |
---|
456 | 423 | pmd = pte_pfn_to_mfn(pmd); |
---|
.. | .. |
---|
458 | 425 | } |
---|
459 | 426 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); |
---|
460 | 427 | |
---|
461 | | -#ifdef CONFIG_X86_64 |
---|
462 | 428 | __visible pudval_t xen_pud_val(pud_t pud) |
---|
463 | 429 | { |
---|
464 | 430 | return pte_mfn_to_pfn(pud.pud); |
---|
.. | .. |
---|
563 | 529 | } |
---|
564 | 530 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d); |
---|
565 | 531 | #endif /* CONFIG_PGTABLE_LEVELS >= 5 */ |
---|
566 | | -#endif /* CONFIG_X86_64 */ |
---|
567 | 532 | |
---|
568 | | -static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, |
---|
569 | | - int (*func)(struct mm_struct *mm, struct page *, enum pt_level), |
---|
570 | | - bool last, unsigned long limit) |
---|
| 533 | +static void xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, |
---|
| 534 | + void (*func)(struct mm_struct *mm, struct page *, |
---|
| 535 | + enum pt_level), |
---|
| 536 | + bool last, unsigned long limit) |
---|
571 | 537 | { |
---|
572 | | - int i, nr, flush = 0; |
---|
| 538 | + int i, nr; |
---|
573 | 539 | |
---|
574 | 540 | nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD; |
---|
575 | 541 | for (i = 0; i < nr; i++) { |
---|
576 | 542 | if (!pmd_none(pmd[i])) |
---|
577 | | - flush |= (*func)(mm, pmd_page(pmd[i]), PT_PTE); |
---|
| 543 | + (*func)(mm, pmd_page(pmd[i]), PT_PTE); |
---|
578 | 544 | } |
---|
579 | | - return flush; |
---|
580 | 545 | } |
---|
581 | 546 | |
---|
582 | | -static int xen_pud_walk(struct mm_struct *mm, pud_t *pud, |
---|
583 | | - int (*func)(struct mm_struct *mm, struct page *, enum pt_level), |
---|
584 | | - bool last, unsigned long limit) |
---|
| 547 | +static void xen_pud_walk(struct mm_struct *mm, pud_t *pud, |
---|
| 548 | + void (*func)(struct mm_struct *mm, struct page *, |
---|
| 549 | + enum pt_level), |
---|
| 550 | + bool last, unsigned long limit) |
---|
585 | 551 | { |
---|
586 | | - int i, nr, flush = 0; |
---|
| 552 | + int i, nr; |
---|
587 | 553 | |
---|
588 | 554 | nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD; |
---|
589 | 555 | for (i = 0; i < nr; i++) { |
---|
.. | .. |
---|
594 | 560 | |
---|
595 | 561 | pmd = pmd_offset(&pud[i], 0); |
---|
596 | 562 | if (PTRS_PER_PMD > 1) |
---|
597 | | - flush |= (*func)(mm, virt_to_page(pmd), PT_PMD); |
---|
598 | | - flush |= xen_pmd_walk(mm, pmd, func, |
---|
599 | | - last && i == nr - 1, limit); |
---|
| 563 | + (*func)(mm, virt_to_page(pmd), PT_PMD); |
---|
| 564 | + xen_pmd_walk(mm, pmd, func, last && i == nr - 1, limit); |
---|
600 | 565 | } |
---|
601 | | - return flush; |
---|
602 | 566 | } |
---|
603 | 567 | |
---|
604 | | -static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, |
---|
605 | | - int (*func)(struct mm_struct *mm, struct page *, enum pt_level), |
---|
606 | | - bool last, unsigned long limit) |
---|
| 568 | +static void xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, |
---|
| 569 | + void (*func)(struct mm_struct *mm, struct page *, |
---|
| 570 | + enum pt_level), |
---|
| 571 | + bool last, unsigned long limit) |
---|
607 | 572 | { |
---|
608 | | - int flush = 0; |
---|
609 | 573 | pud_t *pud; |
---|
610 | 574 | |
---|
611 | 575 | |
---|
612 | 576 | if (p4d_none(*p4d)) |
---|
613 | | - return flush; |
---|
| 577 | + return; |
---|
614 | 578 | |
---|
615 | 579 | pud = pud_offset(p4d, 0); |
---|
616 | 580 | if (PTRS_PER_PUD > 1) |
---|
617 | | - flush |= (*func)(mm, virt_to_page(pud), PT_PUD); |
---|
618 | | - flush |= xen_pud_walk(mm, pud, func, last, limit); |
---|
619 | | - return flush; |
---|
| 581 | + (*func)(mm, virt_to_page(pud), PT_PUD); |
---|
| 582 | + xen_pud_walk(mm, pud, func, last, limit); |
---|
620 | 583 | } |
---|
621 | 584 | |
---|
622 | 585 | /* |
---|
.. | .. |
---|
628 | 591 | * will be STACK_TOP_MAX, but at boot we need to pin up to |
---|
629 | 592 | * FIXADDR_TOP. |
---|
630 | 593 | * |
---|
631 | | - * For 32-bit the important bit is that we don't pin beyond there, |
---|
632 | | - * because then we start getting into Xen's ptes. |
---|
633 | | - * |
---|
634 | | - * For 64-bit, we must skip the Xen hole in the middle of the address |
---|
635 | | - * space, just after the big x86-64 virtual hole. |
---|
| 594 | + * We must skip the Xen hole in the middle of the address space, just after |
---|
| 595 | + * the big x86-64 virtual hole. |
---|
636 | 596 | */ |
---|
637 | | -static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, |
---|
638 | | - int (*func)(struct mm_struct *mm, struct page *, |
---|
639 | | - enum pt_level), |
---|
640 | | - unsigned long limit) |
---|
| 597 | +static void __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, |
---|
| 598 | + void (*func)(struct mm_struct *mm, struct page *, |
---|
| 599 | + enum pt_level), |
---|
| 600 | + unsigned long limit) |
---|
641 | 601 | { |
---|
642 | | - int i, nr, flush = 0; |
---|
| 602 | + int i, nr; |
---|
643 | 603 | unsigned hole_low = 0, hole_high = 0; |
---|
644 | 604 | |
---|
645 | 605 | /* The limit is the last byte to be touched */ |
---|
646 | 606 | limit--; |
---|
647 | 607 | BUG_ON(limit >= FIXADDR_TOP); |
---|
648 | 608 | |
---|
649 | | -#ifdef CONFIG_X86_64 |
---|
650 | 609 | /* |
---|
651 | 610 | * 64-bit has a great big hole in the middle of the address |
---|
652 | 611 | * space, which contains the Xen mappings. |
---|
653 | 612 | */ |
---|
654 | 613 | hole_low = pgd_index(GUARD_HOLE_BASE_ADDR); |
---|
655 | 614 | hole_high = pgd_index(GUARD_HOLE_END_ADDR); |
---|
656 | | -#endif |
---|
657 | 615 | |
---|
658 | 616 | nr = pgd_index(limit) + 1; |
---|
659 | 617 | for (i = 0; i < nr; i++) { |
---|
.. | .. |
---|
666 | 624 | continue; |
---|
667 | 625 | |
---|
668 | 626 | p4d = p4d_offset(&pgd[i], 0); |
---|
669 | | - flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit); |
---|
| 627 | + xen_p4d_walk(mm, p4d, func, i == nr - 1, limit); |
---|
670 | 628 | } |
---|
671 | 629 | |
---|
672 | 630 | /* Do the top level last, so that the callbacks can use it as |
---|
673 | 631 | a cue to do final things like tlb flushes. */ |
---|
674 | | - flush |= (*func)(mm, virt_to_page(pgd), PT_PGD); |
---|
675 | | - |
---|
676 | | - return flush; |
---|
| 632 | + (*func)(mm, virt_to_page(pgd), PT_PGD); |
---|
677 | 633 | } |
---|
678 | 634 | |
---|
679 | | -static int xen_pgd_walk(struct mm_struct *mm, |
---|
680 | | - int (*func)(struct mm_struct *mm, struct page *, |
---|
681 | | - enum pt_level), |
---|
682 | | - unsigned long limit) |
---|
| 635 | +static void xen_pgd_walk(struct mm_struct *mm, |
---|
| 636 | + void (*func)(struct mm_struct *mm, struct page *, |
---|
| 637 | + enum pt_level), |
---|
| 638 | + unsigned long limit) |
---|
683 | 639 | { |
---|
684 | | - return __xen_pgd_walk(mm, mm->pgd, func, limit); |
---|
| 640 | + __xen_pgd_walk(mm, mm->pgd, func, limit); |
---|
685 | 641 | } |
---|
686 | 642 | |
---|
687 | 643 | /* If we're using split pte locks, then take the page's lock and |
---|
.. | .. |
---|
714 | 670 | xen_extend_mmuext_op(&op); |
---|
715 | 671 | } |
---|
716 | 672 | |
---|
717 | | -static int xen_pin_page(struct mm_struct *mm, struct page *page, |
---|
718 | | - enum pt_level level) |
---|
| 673 | +static void xen_pin_page(struct mm_struct *mm, struct page *page, |
---|
| 674 | + enum pt_level level) |
---|
719 | 675 | { |
---|
720 | 676 | unsigned pgfl = TestSetPagePinned(page); |
---|
721 | | - int flush; |
---|
722 | 677 | |
---|
723 | | - if (pgfl) |
---|
724 | | - flush = 0; /* already pinned */ |
---|
725 | | - else if (PageHighMem(page)) |
---|
726 | | - /* kmaps need flushing if we found an unpinned |
---|
727 | | - highpage */ |
---|
728 | | - flush = 1; |
---|
729 | | - else { |
---|
| 678 | + if (!pgfl) { |
---|
730 | 679 | void *pt = lowmem_page_address(page); |
---|
731 | 680 | unsigned long pfn = page_to_pfn(page); |
---|
732 | 681 | struct multicall_space mcs = __xen_mc_entry(0); |
---|
733 | 682 | spinlock_t *ptl; |
---|
734 | | - |
---|
735 | | - flush = 0; |
---|
736 | 683 | |
---|
737 | 684 | /* |
---|
738 | 685 | * We need to hold the pagetable lock between the time |
---|
.. | .. |
---|
770 | 717 | xen_mc_callback(xen_pte_unlock, ptl); |
---|
771 | 718 | } |
---|
772 | 719 | } |
---|
773 | | - |
---|
774 | | - return flush; |
---|
775 | 720 | } |
---|
776 | 721 | |
---|
777 | 722 | /* This is called just after a mm has been created, but it has not |
---|
.. | .. |
---|
779 | 724 | read-only, and can be pinned. */ |
---|
780 | 725 | static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) |
---|
781 | 726 | { |
---|
| 727 | + pgd_t *user_pgd = xen_get_user_pgd(pgd); |
---|
| 728 | + |
---|
782 | 729 | trace_xen_mmu_pgd_pin(mm, pgd); |
---|
783 | 730 | |
---|
784 | 731 | xen_mc_batch(); |
---|
785 | 732 | |
---|
786 | | - if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { |
---|
787 | | - /* re-enable interrupts for flushing */ |
---|
788 | | - xen_mc_issue(0); |
---|
| 733 | + __xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT); |
---|
789 | 734 | |
---|
790 | | - kmap_flush_unused(); |
---|
| 735 | + xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); |
---|
791 | 736 | |
---|
792 | | - xen_mc_batch(); |
---|
| 737 | + if (user_pgd) { |
---|
| 738 | + xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD); |
---|
| 739 | + xen_do_pin(MMUEXT_PIN_L4_TABLE, |
---|
| 740 | + PFN_DOWN(__pa(user_pgd))); |
---|
793 | 741 | } |
---|
794 | 742 | |
---|
795 | | -#ifdef CONFIG_X86_64 |
---|
796 | | - { |
---|
797 | | - pgd_t *user_pgd = xen_get_user_pgd(pgd); |
---|
798 | | - |
---|
799 | | - xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); |
---|
800 | | - |
---|
801 | | - if (user_pgd) { |
---|
802 | | - xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD); |
---|
803 | | - xen_do_pin(MMUEXT_PIN_L4_TABLE, |
---|
804 | | - PFN_DOWN(__pa(user_pgd))); |
---|
805 | | - } |
---|
806 | | - } |
---|
807 | | -#else /* CONFIG_X86_32 */ |
---|
808 | | -#ifdef CONFIG_X86_PAE |
---|
809 | | - /* Need to make sure unshared kernel PMD is pinnable */ |
---|
810 | | - xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]), |
---|
811 | | - PT_PMD); |
---|
812 | | -#endif |
---|
813 | | - xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); |
---|
814 | | -#endif /* CONFIG_X86_64 */ |
---|
815 | 743 | xen_mc_issue(0); |
---|
816 | 744 | } |
---|
817 | 745 | |
---|
.. | .. |
---|
846 | 774 | spin_unlock(&pgd_lock); |
---|
847 | 775 | } |
---|
848 | 776 | |
---|
849 | | -static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, |
---|
850 | | - enum pt_level level) |
---|
| 777 | +static void __init xen_mark_pinned(struct mm_struct *mm, struct page *page, |
---|
| 778 | + enum pt_level level) |
---|
851 | 779 | { |
---|
852 | 780 | SetPagePinned(page); |
---|
853 | | - return 0; |
---|
854 | 781 | } |
---|
855 | 782 | |
---|
856 | 783 | /* |
---|
857 | 784 | * The init_mm pagetable is really pinned as soon as its created, but |
---|
858 | 785 | * that's before we have page structures to store the bits. So do all |
---|
859 | 786 | * the book-keeping now once struct pages for allocated pages are |
---|
860 | | - * initialized. This happens only after free_all_bootmem() is called. |
---|
| 787 | + * initialized. This happens only after memblock_free_all() is called. |
---|
861 | 788 | */ |
---|
862 | 789 | static void __init xen_after_bootmem(void) |
---|
863 | 790 | { |
---|
864 | 791 | static_branch_enable(&xen_struct_pages_ready); |
---|
865 | | -#ifdef CONFIG_X86_64 |
---|
866 | 792 | SetPagePinned(virt_to_page(level3_user_vsyscall)); |
---|
867 | | -#endif |
---|
868 | 793 | xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); |
---|
869 | 794 | } |
---|
870 | 795 | |
---|
871 | | -static int xen_unpin_page(struct mm_struct *mm, struct page *page, |
---|
872 | | - enum pt_level level) |
---|
| 796 | +static void xen_unpin_page(struct mm_struct *mm, struct page *page, |
---|
| 797 | + enum pt_level level) |
---|
873 | 798 | { |
---|
874 | 799 | unsigned pgfl = TestClearPagePinned(page); |
---|
875 | 800 | |
---|
876 | | - if (pgfl && !PageHighMem(page)) { |
---|
| 801 | + if (pgfl) { |
---|
877 | 802 | void *pt = lowmem_page_address(page); |
---|
878 | 803 | unsigned long pfn = page_to_pfn(page); |
---|
879 | 804 | spinlock_t *ptl = NULL; |
---|
.. | .. |
---|
904 | 829 | xen_mc_callback(xen_pte_unlock, ptl); |
---|
905 | 830 | } |
---|
906 | 831 | } |
---|
907 | | - |
---|
908 | | - return 0; /* never need to flush on unpin */ |
---|
909 | 832 | } |
---|
910 | 833 | |
---|
911 | 834 | /* Release a pagetables pages back as normal RW */ |
---|
912 | 835 | static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) |
---|
913 | 836 | { |
---|
| 837 | + pgd_t *user_pgd = xen_get_user_pgd(pgd); |
---|
| 838 | + |
---|
914 | 839 | trace_xen_mmu_pgd_unpin(mm, pgd); |
---|
915 | 840 | |
---|
916 | 841 | xen_mc_batch(); |
---|
917 | 842 | |
---|
918 | 843 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
---|
919 | 844 | |
---|
920 | | -#ifdef CONFIG_X86_64 |
---|
921 | | - { |
---|
922 | | - pgd_t *user_pgd = xen_get_user_pgd(pgd); |
---|
923 | | - |
---|
924 | | - if (user_pgd) { |
---|
925 | | - xen_do_pin(MMUEXT_UNPIN_TABLE, |
---|
926 | | - PFN_DOWN(__pa(user_pgd))); |
---|
927 | | - xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD); |
---|
928 | | - } |
---|
| 845 | + if (user_pgd) { |
---|
| 846 | + xen_do_pin(MMUEXT_UNPIN_TABLE, |
---|
| 847 | + PFN_DOWN(__pa(user_pgd))); |
---|
| 848 | + xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD); |
---|
929 | 849 | } |
---|
930 | | -#endif |
---|
931 | | - |
---|
932 | | -#ifdef CONFIG_X86_PAE |
---|
933 | | - /* Need to make sure unshared kernel PMD is unpinned */ |
---|
934 | | - xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]), |
---|
935 | | - PT_PMD); |
---|
936 | | -#endif |
---|
937 | 850 | |
---|
938 | 851 | __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT); |
---|
939 | 852 | |
---|
.. | .. |
---|
1081 | 994 | BUG(); |
---|
1082 | 995 | } |
---|
1083 | 996 | |
---|
1084 | | -#ifdef CONFIG_X86_64 |
---|
1085 | 997 | static void __init xen_cleanhighmap(unsigned long vaddr, |
---|
1086 | 998 | unsigned long vaddr_end) |
---|
1087 | 999 | { |
---|
.. | .. |
---|
1230 | 1142 | * We could be in __ka space. |
---|
1231 | 1143 | * We roundup to the PMD, which means that if anybody at this stage is |
---|
1232 | 1144 | * using the __ka address of xen_start_info or |
---|
1233 | | - * xen_start_info->shared_info they are in going to crash. Fortunatly |
---|
| 1145 | + * xen_start_info->shared_info they are in going to crash. Fortunately |
---|
1234 | 1146 | * we have already revectored in xen_setup_kernel_pagetable. |
---|
1235 | 1147 | */ |
---|
1236 | 1148 | size = roundup(size, PMD_SIZE); |
---|
.. | .. |
---|
1265 | 1177 | xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2)); |
---|
1266 | 1178 | xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base)); |
---|
1267 | 1179 | } |
---|
1268 | | -#endif |
---|
1269 | 1180 | |
---|
1270 | 1181 | static void __init xen_pagetable_p2m_setup(void) |
---|
1271 | 1182 | { |
---|
1272 | 1183 | xen_vmalloc_p2m_tree(); |
---|
1273 | 1184 | |
---|
1274 | | -#ifdef CONFIG_X86_64 |
---|
1275 | 1185 | xen_pagetable_p2m_free(); |
---|
1276 | 1186 | |
---|
1277 | 1187 | xen_pagetable_cleanhighmap(); |
---|
1278 | | -#endif |
---|
| 1188 | + |
---|
1279 | 1189 | /* And revector! Bye bye old array */ |
---|
1280 | 1190 | xen_start_info->mfn_list = (unsigned long)xen_p2m_addr; |
---|
1281 | 1191 | } |
---|
.. | .. |
---|
1297 | 1207 | static void xen_write_cr2(unsigned long cr2) |
---|
1298 | 1208 | { |
---|
1299 | 1209 | this_cpu_read(xen_vcpu)->arch.cr2 = cr2; |
---|
1300 | | -} |
---|
1301 | | - |
---|
1302 | | -static unsigned long xen_read_cr2(void) |
---|
1303 | | -{ |
---|
1304 | | - return this_cpu_read(xen_vcpu)->arch.cr2; |
---|
1305 | | -} |
---|
1306 | | - |
---|
1307 | | -unsigned long xen_read_cr2_direct(void) |
---|
1308 | | -{ |
---|
1309 | | - return this_cpu_read(xen_vcpu_info.arch.cr2); |
---|
1310 | 1210 | } |
---|
1311 | 1211 | |
---|
1312 | 1212 | static noinline void xen_flush_tlb(void) |
---|
.. | .. |
---|
1422 | 1322 | } |
---|
1423 | 1323 | static void xen_write_cr3(unsigned long cr3) |
---|
1424 | 1324 | { |
---|
| 1325 | + pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); |
---|
| 1326 | + |
---|
1425 | 1327 | BUG_ON(preemptible()); |
---|
1426 | 1328 | |
---|
1427 | 1329 | xen_mc_batch(); /* disables interrupts */ |
---|
.. | .. |
---|
1432 | 1334 | |
---|
1433 | 1335 | __xen_write_cr3(true, cr3); |
---|
1434 | 1336 | |
---|
1435 | | -#ifdef CONFIG_X86_64 |
---|
1436 | | - { |
---|
1437 | | - pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); |
---|
1438 | | - if (user_pgd) |
---|
1439 | | - __xen_write_cr3(false, __pa(user_pgd)); |
---|
1440 | | - else |
---|
1441 | | - __xen_write_cr3(false, 0); |
---|
1442 | | - } |
---|
1443 | | -#endif |
---|
| 1337 | + if (user_pgd) |
---|
| 1338 | + __xen_write_cr3(false, __pa(user_pgd)); |
---|
| 1339 | + else |
---|
| 1340 | + __xen_write_cr3(false, 0); |
---|
1444 | 1341 | |
---|
1445 | 1342 | xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ |
---|
1446 | 1343 | } |
---|
1447 | 1344 | |
---|
1448 | | -#ifdef CONFIG_X86_64 |
---|
1449 | 1345 | /* |
---|
1450 | 1346 | * At the start of the day - when Xen launches a guest, it has already |
---|
1451 | 1347 | * built pagetables for the guest. We diligently look over them |
---|
.. | .. |
---|
1480 | 1376 | |
---|
1481 | 1377 | xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ |
---|
1482 | 1378 | } |
---|
1483 | | -#endif |
---|
1484 | 1379 | |
---|
1485 | 1380 | static int xen_pgd_alloc(struct mm_struct *mm) |
---|
1486 | 1381 | { |
---|
1487 | 1382 | pgd_t *pgd = mm->pgd; |
---|
1488 | | - int ret = 0; |
---|
| 1383 | + struct page *page = virt_to_page(pgd); |
---|
| 1384 | + pgd_t *user_pgd; |
---|
| 1385 | + int ret = -ENOMEM; |
---|
1489 | 1386 | |
---|
1490 | 1387 | BUG_ON(PagePinned(virt_to_page(pgd))); |
---|
| 1388 | + BUG_ON(page->private != 0); |
---|
1491 | 1389 | |
---|
1492 | | -#ifdef CONFIG_X86_64 |
---|
1493 | | - { |
---|
1494 | | - struct page *page = virt_to_page(pgd); |
---|
1495 | | - pgd_t *user_pgd; |
---|
| 1390 | + user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); |
---|
| 1391 | + page->private = (unsigned long)user_pgd; |
---|
1496 | 1392 | |
---|
1497 | | - BUG_ON(page->private != 0); |
---|
1498 | | - |
---|
1499 | | - ret = -ENOMEM; |
---|
1500 | | - |
---|
1501 | | - user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); |
---|
1502 | | - page->private = (unsigned long)user_pgd; |
---|
1503 | | - |
---|
1504 | | - if (user_pgd != NULL) { |
---|
| 1393 | + if (user_pgd != NULL) { |
---|
1505 | 1394 | #ifdef CONFIG_X86_VSYSCALL_EMULATION |
---|
1506 | | - user_pgd[pgd_index(VSYSCALL_ADDR)] = |
---|
1507 | | - __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); |
---|
| 1395 | + user_pgd[pgd_index(VSYSCALL_ADDR)] = |
---|
| 1396 | + __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); |
---|
1508 | 1397 | #endif |
---|
1509 | | - ret = 0; |
---|
1510 | | - } |
---|
1511 | | - |
---|
1512 | | - BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); |
---|
| 1398 | + ret = 0; |
---|
1513 | 1399 | } |
---|
1514 | | -#endif |
---|
| 1400 | + |
---|
| 1401 | + BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); |
---|
| 1402 | + |
---|
1515 | 1403 | return ret; |
---|
1516 | 1404 | } |
---|
1517 | 1405 | |
---|
1518 | 1406 | static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) |
---|
1519 | 1407 | { |
---|
1520 | | -#ifdef CONFIG_X86_64 |
---|
1521 | 1408 | pgd_t *user_pgd = xen_get_user_pgd(pgd); |
---|
1522 | 1409 | |
---|
1523 | 1410 | if (user_pgd) |
---|
1524 | 1411 | free_page((unsigned long)user_pgd); |
---|
1525 | | -#endif |
---|
1526 | 1412 | } |
---|
1527 | 1413 | |
---|
1528 | 1414 | /* |
---|
.. | .. |
---|
1541 | 1427 | */ |
---|
1542 | 1428 | __visible pte_t xen_make_pte_init(pteval_t pte) |
---|
1543 | 1429 | { |
---|
1544 | | -#ifdef CONFIG_X86_64 |
---|
1545 | 1430 | unsigned long pfn; |
---|
1546 | 1431 | |
---|
1547 | 1432 | /* |
---|
.. | .. |
---|
1555 | 1440 | pfn >= xen_start_info->first_p2m_pfn && |
---|
1556 | 1441 | pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames) |
---|
1557 | 1442 | pte &= ~_PAGE_RW; |
---|
1558 | | -#endif |
---|
| 1443 | + |
---|
1559 | 1444 | pte = pte_pfn_to_mfn(pte); |
---|
1560 | 1445 | return native_make_pte(pte); |
---|
1561 | 1446 | } |
---|
.. | .. |
---|
1563 | 1448 | |
---|
1564 | 1449 | static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) |
---|
1565 | 1450 | { |
---|
1566 | | -#ifdef CONFIG_X86_32 |
---|
1567 | | - /* If there's an existing pte, then don't allow _PAGE_RW to be set */ |
---|
1568 | | - if (pte_mfn(pte) != INVALID_P2M_ENTRY |
---|
1569 | | - && pte_val_ma(*ptep) & _PAGE_PRESENT) |
---|
1570 | | - pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & |
---|
1571 | | - pte_val_ma(pte)); |
---|
1572 | | -#endif |
---|
1573 | 1451 | __xen_set_pte(ptep, pte); |
---|
1574 | 1452 | } |
---|
1575 | 1453 | |
---|
.. | .. |
---|
1644 | 1522 | if (static_branch_likely(&xen_struct_pages_ready)) |
---|
1645 | 1523 | SetPagePinned(page); |
---|
1646 | 1524 | |
---|
1647 | | - if (!PageHighMem(page)) { |
---|
1648 | | - xen_mc_batch(); |
---|
| 1525 | + xen_mc_batch(); |
---|
1649 | 1526 | |
---|
1650 | | - __set_pfn_prot(pfn, PAGE_KERNEL_RO); |
---|
| 1527 | + __set_pfn_prot(pfn, PAGE_KERNEL_RO); |
---|
1651 | 1528 | |
---|
1652 | | - if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) |
---|
1653 | | - __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); |
---|
| 1529 | + if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) |
---|
| 1530 | + __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); |
---|
1654 | 1531 | |
---|
1655 | | - xen_mc_issue(PARAVIRT_LAZY_MMU); |
---|
1656 | | - } else { |
---|
1657 | | - /* make sure there are no stray mappings of |
---|
1658 | | - this page */ |
---|
1659 | | - kmap_flush_unused(); |
---|
1660 | | - } |
---|
| 1532 | + xen_mc_issue(PARAVIRT_LAZY_MMU); |
---|
1661 | 1533 | } |
---|
1662 | 1534 | } |
---|
1663 | 1535 | |
---|
.. | .. |
---|
1680 | 1552 | trace_xen_mmu_release_ptpage(pfn, level, pinned); |
---|
1681 | 1553 | |
---|
1682 | 1554 | if (pinned) { |
---|
1683 | | - if (!PageHighMem(page)) { |
---|
1684 | | - xen_mc_batch(); |
---|
| 1555 | + xen_mc_batch(); |
---|
1685 | 1556 | |
---|
1686 | | - if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) |
---|
1687 | | - __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); |
---|
| 1557 | + if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) |
---|
| 1558 | + __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); |
---|
1688 | 1559 | |
---|
1689 | | - __set_pfn_prot(pfn, PAGE_KERNEL); |
---|
| 1560 | + __set_pfn_prot(pfn, PAGE_KERNEL); |
---|
1690 | 1561 | |
---|
1691 | | - xen_mc_issue(PARAVIRT_LAZY_MMU); |
---|
1692 | | - } |
---|
| 1562 | + xen_mc_issue(PARAVIRT_LAZY_MMU); |
---|
| 1563 | + |
---|
1693 | 1564 | ClearPagePinned(page); |
---|
1694 | 1565 | } |
---|
1695 | 1566 | } |
---|
.. | .. |
---|
1704 | 1575 | xen_release_ptpage(pfn, PT_PMD); |
---|
1705 | 1576 | } |
---|
1706 | 1577 | |
---|
1707 | | -#ifdef CONFIG_X86_64 |
---|
1708 | 1578 | static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) |
---|
1709 | 1579 | { |
---|
1710 | 1580 | xen_alloc_ptpage(mm, pfn, PT_PUD); |
---|
.. | .. |
---|
1714 | 1584 | { |
---|
1715 | 1585 | xen_release_ptpage(pfn, PT_PUD); |
---|
1716 | 1586 | } |
---|
1717 | | -#endif |
---|
1718 | | - |
---|
1719 | | -void __init xen_reserve_top(void) |
---|
1720 | | -{ |
---|
1721 | | -#ifdef CONFIG_X86_32 |
---|
1722 | | - unsigned long top = HYPERVISOR_VIRT_START; |
---|
1723 | | - struct xen_platform_parameters pp; |
---|
1724 | | - |
---|
1725 | | - if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) |
---|
1726 | | - top = pp.virt_start; |
---|
1727 | | - |
---|
1728 | | - reserve_top_address(-top); |
---|
1729 | | -#endif /* CONFIG_X86_32 */ |
---|
1730 | | -} |
---|
1731 | 1587 | |
---|
1732 | 1588 | /* |
---|
1733 | 1589 | * Like __va(), but returns address in the kernel mapping (which is |
---|
.. | .. |
---|
1735 | 1591 | */ |
---|
1736 | 1592 | static void * __init __ka(phys_addr_t paddr) |
---|
1737 | 1593 | { |
---|
1738 | | -#ifdef CONFIG_X86_64 |
---|
1739 | 1594 | return (void *)(paddr + __START_KERNEL_map); |
---|
1740 | | -#else |
---|
1741 | | - return __va(paddr); |
---|
1742 | | -#endif |
---|
1743 | 1595 | } |
---|
1744 | 1596 | |
---|
1745 | 1597 | /* Convert a machine address to physical address */ |
---|
.. | .. |
---|
1773 | 1625 | { |
---|
1774 | 1626 | return set_page_prot_flags(addr, prot, UVMF_NONE); |
---|
1775 | 1627 | } |
---|
1776 | | -#ifdef CONFIG_X86_32 |
---|
1777 | | -static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) |
---|
1778 | | -{ |
---|
1779 | | - unsigned pmdidx, pteidx; |
---|
1780 | | - unsigned ident_pte; |
---|
1781 | | - unsigned long pfn; |
---|
1782 | 1628 | |
---|
1783 | | - level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES, |
---|
1784 | | - PAGE_SIZE); |
---|
1785 | | - |
---|
1786 | | - ident_pte = 0; |
---|
1787 | | - pfn = 0; |
---|
1788 | | - for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { |
---|
1789 | | - pte_t *pte_page; |
---|
1790 | | - |
---|
1791 | | - /* Reuse or allocate a page of ptes */ |
---|
1792 | | - if (pmd_present(pmd[pmdidx])) |
---|
1793 | | - pte_page = m2v(pmd[pmdidx].pmd); |
---|
1794 | | - else { |
---|
1795 | | - /* Check for free pte pages */ |
---|
1796 | | - if (ident_pte == LEVEL1_IDENT_ENTRIES) |
---|
1797 | | - break; |
---|
1798 | | - |
---|
1799 | | - pte_page = &level1_ident_pgt[ident_pte]; |
---|
1800 | | - ident_pte += PTRS_PER_PTE; |
---|
1801 | | - |
---|
1802 | | - pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); |
---|
1803 | | - } |
---|
1804 | | - |
---|
1805 | | - /* Install mappings */ |
---|
1806 | | - for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { |
---|
1807 | | - pte_t pte; |
---|
1808 | | - |
---|
1809 | | - if (pfn > max_pfn_mapped) |
---|
1810 | | - max_pfn_mapped = pfn; |
---|
1811 | | - |
---|
1812 | | - if (!pte_none(pte_page[pteidx])) |
---|
1813 | | - continue; |
---|
1814 | | - |
---|
1815 | | - pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); |
---|
1816 | | - pte_page[pteidx] = pte; |
---|
1817 | | - } |
---|
1818 | | - } |
---|
1819 | | - |
---|
1820 | | - for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) |
---|
1821 | | - set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); |
---|
1822 | | - |
---|
1823 | | - set_page_prot(pmd, PAGE_KERNEL_RO); |
---|
1824 | | -} |
---|
1825 | | -#endif |
---|
1826 | 1629 | void __init xen_setup_machphys_mapping(void) |
---|
1827 | 1630 | { |
---|
1828 | 1631 | struct xen_machphys_mapping mapping; |
---|
.. | .. |
---|
1833 | 1636 | } else { |
---|
1834 | 1637 | machine_to_phys_nr = MACH2PHYS_NR_ENTRIES; |
---|
1835 | 1638 | } |
---|
1836 | | -#ifdef CONFIG_X86_32 |
---|
1837 | | - WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1)) |
---|
1838 | | - < machine_to_phys_mapping); |
---|
1839 | | -#endif |
---|
1840 | 1639 | } |
---|
1841 | 1640 | |
---|
1842 | | -#ifdef CONFIG_X86_64 |
---|
1843 | 1641 | static void __init convert_pfn_mfn(void *v) |
---|
1844 | 1642 | { |
---|
1845 | 1643 | pte_t *pte = v; |
---|
.. | .. |
---|
2170 | 1968 | xen_start_info->nr_p2m_frames = n_frames; |
---|
2171 | 1969 | } |
---|
2172 | 1970 | |
---|
2173 | | -#else /* !CONFIG_X86_64 */ |
---|
2174 | | -static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); |
---|
2175 | | -static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); |
---|
2176 | | -RESERVE_BRK(fixup_kernel_pmd, PAGE_SIZE); |
---|
2177 | | -RESERVE_BRK(fixup_kernel_pte, PAGE_SIZE); |
---|
2178 | | - |
---|
2179 | | -static void __init xen_write_cr3_init(unsigned long cr3) |
---|
2180 | | -{ |
---|
2181 | | - unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); |
---|
2182 | | - |
---|
2183 | | - BUG_ON(read_cr3_pa() != __pa(initial_page_table)); |
---|
2184 | | - BUG_ON(cr3 != __pa(swapper_pg_dir)); |
---|
2185 | | - |
---|
2186 | | - /* |
---|
2187 | | - * We are switching to swapper_pg_dir for the first time (from |
---|
2188 | | - * initial_page_table) and therefore need to mark that page |
---|
2189 | | - * read-only and then pin it. |
---|
2190 | | - * |
---|
2191 | | - * Xen disallows sharing of kernel PMDs for PAE |
---|
2192 | | - * guests. Therefore we must copy the kernel PMD from |
---|
2193 | | - * initial_page_table into a new kernel PMD to be used in |
---|
2194 | | - * swapper_pg_dir. |
---|
2195 | | - */ |
---|
2196 | | - swapper_kernel_pmd = |
---|
2197 | | - extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); |
---|
2198 | | - copy_page(swapper_kernel_pmd, initial_kernel_pmd); |
---|
2199 | | - swapper_pg_dir[KERNEL_PGD_BOUNDARY] = |
---|
2200 | | - __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); |
---|
2201 | | - set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); |
---|
2202 | | - |
---|
2203 | | - set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); |
---|
2204 | | - xen_write_cr3(cr3); |
---|
2205 | | - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn); |
---|
2206 | | - |
---|
2207 | | - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, |
---|
2208 | | - PFN_DOWN(__pa(initial_page_table))); |
---|
2209 | | - set_page_prot(initial_page_table, PAGE_KERNEL); |
---|
2210 | | - set_page_prot(initial_kernel_pmd, PAGE_KERNEL); |
---|
2211 | | - |
---|
2212 | | - pv_mmu_ops.write_cr3 = &xen_write_cr3; |
---|
2213 | | -} |
---|
2214 | | - |
---|
2215 | | -/* |
---|
2216 | | - * For 32 bit domains xen_start_info->pt_base is the pgd address which might be |
---|
2217 | | - * not the first page table in the page table pool. |
---|
2218 | | - * Iterate through the initial page tables to find the real page table base. |
---|
2219 | | - */ |
---|
2220 | | -static phys_addr_t __init xen_find_pt_base(pmd_t *pmd) |
---|
2221 | | -{ |
---|
2222 | | - phys_addr_t pt_base, paddr; |
---|
2223 | | - unsigned pmdidx; |
---|
2224 | | - |
---|
2225 | | - pt_base = min(__pa(xen_start_info->pt_base), __pa(pmd)); |
---|
2226 | | - |
---|
2227 | | - for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) |
---|
2228 | | - if (pmd_present(pmd[pmdidx]) && !pmd_large(pmd[pmdidx])) { |
---|
2229 | | - paddr = m2p(pmd[pmdidx].pmd); |
---|
2230 | | - pt_base = min(pt_base, paddr); |
---|
2231 | | - } |
---|
2232 | | - |
---|
2233 | | - return pt_base; |
---|
2234 | | -} |
---|
2235 | | - |
---|
2236 | | -void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) |
---|
2237 | | -{ |
---|
2238 | | - pmd_t *kernel_pmd; |
---|
2239 | | - |
---|
2240 | | - kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); |
---|
2241 | | - |
---|
2242 | | - xen_pt_base = xen_find_pt_base(kernel_pmd); |
---|
2243 | | - xen_pt_size = xen_start_info->nr_pt_frames * PAGE_SIZE; |
---|
2244 | | - |
---|
2245 | | - initial_kernel_pmd = |
---|
2246 | | - extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); |
---|
2247 | | - |
---|
2248 | | - max_pfn_mapped = PFN_DOWN(xen_pt_base + xen_pt_size + 512 * 1024); |
---|
2249 | | - |
---|
2250 | | - copy_page(initial_kernel_pmd, kernel_pmd); |
---|
2251 | | - |
---|
2252 | | - xen_map_identity_early(initial_kernel_pmd, max_pfn); |
---|
2253 | | - |
---|
2254 | | - copy_page(initial_page_table, pgd); |
---|
2255 | | - initial_page_table[KERNEL_PGD_BOUNDARY] = |
---|
2256 | | - __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); |
---|
2257 | | - |
---|
2258 | | - set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO); |
---|
2259 | | - set_page_prot(initial_page_table, PAGE_KERNEL_RO); |
---|
2260 | | - set_page_prot(empty_zero_page, PAGE_KERNEL_RO); |
---|
2261 | | - |
---|
2262 | | - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
---|
2263 | | - |
---|
2264 | | - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, |
---|
2265 | | - PFN_DOWN(__pa(initial_page_table))); |
---|
2266 | | - xen_write_cr3(__pa(initial_page_table)); |
---|
2267 | | - |
---|
2268 | | - memblock_reserve(xen_pt_base, xen_pt_size); |
---|
2269 | | -} |
---|
2270 | | -#endif /* CONFIG_X86_64 */ |
---|
2271 | | - |
---|
2272 | 1971 | void __init xen_reserve_special_pages(void) |
---|
2273 | 1972 | { |
---|
2274 | 1973 | phys_addr_t paddr; |
---|
.. | .. |
---|
2302 | 2001 | |
---|
2303 | 2002 | switch (idx) { |
---|
2304 | 2003 | case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: |
---|
2305 | | -#ifdef CONFIG_X86_32 |
---|
2306 | | - case FIX_WP_TEST: |
---|
2307 | | -# ifdef CONFIG_HIGHMEM |
---|
2308 | | - case FIX_KMAP_BEGIN ... FIX_KMAP_END: |
---|
2309 | | -# endif |
---|
2310 | | -#elif defined(CONFIG_X86_VSYSCALL_EMULATION) |
---|
| 2004 | +#ifdef CONFIG_X86_VSYSCALL_EMULATION |
---|
2311 | 2005 | case VSYSCALL_PAGE: |
---|
2312 | 2006 | #endif |
---|
2313 | | - case FIX_TEXT_POKE0: |
---|
2314 | | - case FIX_TEXT_POKE1: |
---|
2315 | 2007 | /* All local page mappings */ |
---|
2316 | 2008 | pte = pfn_pte(phys, prot); |
---|
2317 | 2009 | break; |
---|
.. | .. |
---|
2358 | 2050 | |
---|
2359 | 2051 | static void __init xen_post_allocator_init(void) |
---|
2360 | 2052 | { |
---|
2361 | | - pv_mmu_ops.set_pte = xen_set_pte; |
---|
2362 | | - pv_mmu_ops.set_pmd = xen_set_pmd; |
---|
2363 | | - pv_mmu_ops.set_pud = xen_set_pud; |
---|
2364 | | -#ifdef CONFIG_X86_64 |
---|
2365 | | - pv_mmu_ops.set_p4d = xen_set_p4d; |
---|
2366 | | -#endif |
---|
| 2053 | + pv_ops.mmu.set_pte = xen_set_pte; |
---|
| 2054 | + pv_ops.mmu.set_pmd = xen_set_pmd; |
---|
| 2055 | + pv_ops.mmu.set_pud = xen_set_pud; |
---|
| 2056 | + pv_ops.mmu.set_p4d = xen_set_p4d; |
---|
2367 | 2057 | |
---|
2368 | 2058 | /* This will work as long as patching hasn't happened yet |
---|
2369 | 2059 | (which it hasn't) */ |
---|
2370 | | - pv_mmu_ops.alloc_pte = xen_alloc_pte; |
---|
2371 | | - pv_mmu_ops.alloc_pmd = xen_alloc_pmd; |
---|
2372 | | - pv_mmu_ops.release_pte = xen_release_pte; |
---|
2373 | | - pv_mmu_ops.release_pmd = xen_release_pmd; |
---|
2374 | | -#ifdef CONFIG_X86_64 |
---|
2375 | | - pv_mmu_ops.alloc_pud = xen_alloc_pud; |
---|
2376 | | - pv_mmu_ops.release_pud = xen_release_pud; |
---|
2377 | | -#endif |
---|
2378 | | - pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte); |
---|
| 2060 | + pv_ops.mmu.alloc_pte = xen_alloc_pte; |
---|
| 2061 | + pv_ops.mmu.alloc_pmd = xen_alloc_pmd; |
---|
| 2062 | + pv_ops.mmu.release_pte = xen_release_pte; |
---|
| 2063 | + pv_ops.mmu.release_pmd = xen_release_pmd; |
---|
| 2064 | + pv_ops.mmu.alloc_pud = xen_alloc_pud; |
---|
| 2065 | + pv_ops.mmu.release_pud = xen_release_pud; |
---|
| 2066 | + pv_ops.mmu.make_pte = PV_CALLEE_SAVE(xen_make_pte); |
---|
2379 | 2067 | |
---|
2380 | | -#ifdef CONFIG_X86_64 |
---|
2381 | | - pv_mmu_ops.write_cr3 = &xen_write_cr3; |
---|
2382 | | -#endif |
---|
| 2068 | + pv_ops.mmu.write_cr3 = &xen_write_cr3; |
---|
2383 | 2069 | } |
---|
2384 | 2070 | |
---|
2385 | 2071 | static void xen_leave_lazy_mmu(void) |
---|
.. | .. |
---|
2391 | 2077 | } |
---|
2392 | 2078 | |
---|
2393 | 2079 | static const struct pv_mmu_ops xen_mmu_ops __initconst = { |
---|
2394 | | - .read_cr2 = xen_read_cr2, |
---|
| 2080 | + .read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2), |
---|
2395 | 2081 | .write_cr2 = xen_write_cr2, |
---|
2396 | 2082 | |
---|
2397 | 2083 | .read_cr3 = xen_read_cr3, |
---|
.. | .. |
---|
2412 | 2098 | .release_pmd = xen_release_pmd_init, |
---|
2413 | 2099 | |
---|
2414 | 2100 | .set_pte = xen_set_pte_init, |
---|
2415 | | - .set_pte_at = xen_set_pte_at, |
---|
2416 | 2101 | .set_pmd = xen_set_pmd_hyper, |
---|
2417 | 2102 | |
---|
2418 | 2103 | .ptep_modify_prot_start = __ptep_modify_prot_start, |
---|
.. | .. |
---|
2424 | 2109 | .make_pte = PV_CALLEE_SAVE(xen_make_pte_init), |
---|
2425 | 2110 | .make_pgd = PV_CALLEE_SAVE(xen_make_pgd), |
---|
2426 | 2111 | |
---|
2427 | | -#ifdef CONFIG_X86_PAE |
---|
2428 | | - .set_pte_atomic = xen_set_pte_atomic, |
---|
2429 | | - .pte_clear = xen_pte_clear, |
---|
2430 | | - .pmd_clear = xen_pmd_clear, |
---|
2431 | | -#endif /* CONFIG_X86_PAE */ |
---|
2432 | 2112 | .set_pud = xen_set_pud_hyper, |
---|
2433 | 2113 | |
---|
2434 | 2114 | .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), |
---|
2435 | 2115 | .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), |
---|
2436 | 2116 | |
---|
2437 | | -#ifdef CONFIG_X86_64 |
---|
2438 | 2117 | .pud_val = PV_CALLEE_SAVE(xen_pud_val), |
---|
2439 | 2118 | .make_pud = PV_CALLEE_SAVE(xen_make_pud), |
---|
2440 | 2119 | .set_p4d = xen_set_p4d_hyper, |
---|
.. | .. |
---|
2446 | 2125 | .p4d_val = PV_CALLEE_SAVE(xen_p4d_val), |
---|
2447 | 2126 | .make_p4d = PV_CALLEE_SAVE(xen_make_p4d), |
---|
2448 | 2127 | #endif |
---|
2449 | | -#endif /* CONFIG_X86_64 */ |
---|
2450 | 2128 | |
---|
2451 | 2129 | .activate_mm = xen_activate_mm, |
---|
2452 | 2130 | .dup_mmap = xen_dup_mmap, |
---|
.. | .. |
---|
2466 | 2144 | x86_init.paging.pagetable_init = xen_pagetable_init; |
---|
2467 | 2145 | x86_init.hyper.init_after_bootmem = xen_after_bootmem; |
---|
2468 | 2146 | |
---|
2469 | | - pv_mmu_ops = xen_mmu_ops; |
---|
| 2147 | + pv_ops.mmu = xen_mmu_ops; |
---|
2470 | 2148 | |
---|
2471 | 2149 | memset(dummy_mapping, 0xff, PAGE_SIZE); |
---|
2472 | 2150 | } |
---|
.. | .. |
---|
2629 | 2307 | *dma_handle = virt_to_machine(vstart).maddr; |
---|
2630 | 2308 | return success ? 0 : -ENOMEM; |
---|
2631 | 2309 | } |
---|
2632 | | -EXPORT_SYMBOL_GPL(xen_create_contiguous_region); |
---|
2633 | 2310 | |
---|
2634 | 2311 | void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) |
---|
2635 | 2312 | { |
---|
.. | .. |
---|
2664 | 2341 | |
---|
2665 | 2342 | spin_unlock_irqrestore(&xen_reservation_lock, flags); |
---|
2666 | 2343 | } |
---|
2667 | | -EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); |
---|
| 2344 | + |
---|
| 2345 | +static noinline void xen_flush_tlb_all(void) |
---|
| 2346 | +{ |
---|
| 2347 | + struct mmuext_op *op; |
---|
| 2348 | + struct multicall_space mcs; |
---|
| 2349 | + |
---|
| 2350 | + preempt_disable(); |
---|
| 2351 | + |
---|
| 2352 | + mcs = xen_mc_entry(sizeof(*op)); |
---|
| 2353 | + |
---|
| 2354 | + op = mcs.args; |
---|
| 2355 | + op->cmd = MMUEXT_TLB_FLUSH_ALL; |
---|
| 2356 | + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); |
---|
| 2357 | + |
---|
| 2358 | + xen_mc_issue(PARAVIRT_LAZY_MMU); |
---|
| 2359 | + |
---|
| 2360 | + preempt_enable(); |
---|
| 2361 | +} |
---|
| 2362 | + |
---|
| 2363 | +#define REMAP_BATCH_SIZE 16 |
---|
| 2364 | + |
---|
| 2365 | +struct remap_data { |
---|
| 2366 | + xen_pfn_t *pfn; |
---|
| 2367 | + bool contiguous; |
---|
| 2368 | + bool no_translate; |
---|
| 2369 | + pgprot_t prot; |
---|
| 2370 | + struct mmu_update *mmu_update; |
---|
| 2371 | +}; |
---|
| 2372 | + |
---|
| 2373 | +static int remap_area_pfn_pte_fn(pte_t *ptep, unsigned long addr, void *data) |
---|
| 2374 | +{ |
---|
| 2375 | + struct remap_data *rmd = data; |
---|
| 2376 | + pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot)); |
---|
| 2377 | + |
---|
| 2378 | + /* |
---|
| 2379 | + * If we have a contiguous range, just update the pfn itself, |
---|
| 2380 | + * else update pointer to be "next pfn". |
---|
| 2381 | + */ |
---|
| 2382 | + if (rmd->contiguous) |
---|
| 2383 | + (*rmd->pfn)++; |
---|
| 2384 | + else |
---|
| 2385 | + rmd->pfn++; |
---|
| 2386 | + |
---|
| 2387 | + rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; |
---|
| 2388 | + rmd->mmu_update->ptr |= rmd->no_translate ? |
---|
| 2389 | + MMU_PT_UPDATE_NO_TRANSLATE : |
---|
| 2390 | + MMU_NORMAL_PT_UPDATE; |
---|
| 2391 | + rmd->mmu_update->val = pte_val_ma(pte); |
---|
| 2392 | + rmd->mmu_update++; |
---|
| 2393 | + |
---|
| 2394 | + return 0; |
---|
| 2395 | +} |
---|
| 2396 | + |
---|
| 2397 | +int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, |
---|
| 2398 | + xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, |
---|
| 2399 | + unsigned int domid, bool no_translate, struct page **pages) |
---|
| 2400 | +{ |
---|
| 2401 | + int err = 0; |
---|
| 2402 | + struct remap_data rmd; |
---|
| 2403 | + struct mmu_update mmu_update[REMAP_BATCH_SIZE]; |
---|
| 2404 | + unsigned long range; |
---|
| 2405 | + int mapped = 0; |
---|
| 2406 | + |
---|
| 2407 | + BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); |
---|
| 2408 | + |
---|
| 2409 | + rmd.pfn = pfn; |
---|
| 2410 | + rmd.prot = prot; |
---|
| 2411 | + /* |
---|
| 2412 | + * We use the err_ptr to indicate if there we are doing a contiguous |
---|
| 2413 | + * mapping or a discontigious mapping. |
---|
| 2414 | + */ |
---|
| 2415 | + rmd.contiguous = !err_ptr; |
---|
| 2416 | + rmd.no_translate = no_translate; |
---|
| 2417 | + |
---|
| 2418 | + while (nr) { |
---|
| 2419 | + int index = 0; |
---|
| 2420 | + int done = 0; |
---|
| 2421 | + int batch = min(REMAP_BATCH_SIZE, nr); |
---|
| 2422 | + int batch_left = batch; |
---|
| 2423 | + |
---|
| 2424 | + range = (unsigned long)batch << PAGE_SHIFT; |
---|
| 2425 | + |
---|
| 2426 | + rmd.mmu_update = mmu_update; |
---|
| 2427 | + err = apply_to_page_range(vma->vm_mm, addr, range, |
---|
| 2428 | + remap_area_pfn_pte_fn, &rmd); |
---|
| 2429 | + if (err) |
---|
| 2430 | + goto out; |
---|
| 2431 | + |
---|
| 2432 | + /* |
---|
| 2433 | + * We record the error for each page that gives an error, but |
---|
| 2434 | + * continue mapping until the whole set is done |
---|
| 2435 | + */ |
---|
| 2436 | + do { |
---|
| 2437 | + int i; |
---|
| 2438 | + |
---|
| 2439 | + err = HYPERVISOR_mmu_update(&mmu_update[index], |
---|
| 2440 | + batch_left, &done, domid); |
---|
| 2441 | + |
---|
| 2442 | + /* |
---|
| 2443 | + * @err_ptr may be the same buffer as @gfn, so |
---|
| 2444 | + * only clear it after each chunk of @gfn is |
---|
| 2445 | + * used. |
---|
| 2446 | + */ |
---|
| 2447 | + if (err_ptr) { |
---|
| 2448 | + for (i = index; i < index + done; i++) |
---|
| 2449 | + err_ptr[i] = 0; |
---|
| 2450 | + } |
---|
| 2451 | + if (err < 0) { |
---|
| 2452 | + if (!err_ptr) |
---|
| 2453 | + goto out; |
---|
| 2454 | + err_ptr[i] = err; |
---|
| 2455 | + done++; /* Skip failed frame. */ |
---|
| 2456 | + } else |
---|
| 2457 | + mapped += done; |
---|
| 2458 | + batch_left -= done; |
---|
| 2459 | + index += done; |
---|
| 2460 | + } while (batch_left); |
---|
| 2461 | + |
---|
| 2462 | + nr -= batch; |
---|
| 2463 | + addr += range; |
---|
| 2464 | + if (err_ptr) |
---|
| 2465 | + err_ptr += batch; |
---|
| 2466 | + cond_resched(); |
---|
| 2467 | + } |
---|
| 2468 | +out: |
---|
| 2469 | + |
---|
| 2470 | + xen_flush_tlb_all(); |
---|
| 2471 | + |
---|
| 2472 | + return err < 0 ? err : mapped; |
---|
| 2473 | +} |
---|
| 2474 | +EXPORT_SYMBOL_GPL(xen_remap_pfn); |
---|
2668 | 2475 | |
---|
2669 | 2476 | #ifdef CONFIG_KEXEC_CORE |
---|
2670 | 2477 | phys_addr_t paddr_vmcoreinfo_note(void) |
---|