From 08f87f769b595151be1afeff53e144f543faa614 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Wed, 06 Dec 2023 09:51:13 +0000
Subject: [PATCH] add dts config
---
kernel/arch/x86/xen/enlighten_pv.c | 281 +++++++++++++++++++++++++++----------------------------
1 files changed, 137 insertions(+), 144 deletions(-)
diff --git a/kernel/arch/x86/xen/enlighten_pv.c b/kernel/arch/x86/xen/enlighten_pv.c
index 8f1ff8d..815030b 100644
--- a/kernel/arch/x86/xen/enlighten_pv.c
+++ b/kernel/arch/x86/xen/enlighten_pv.c
@@ -23,7 +23,7 @@
#include <linux/start_kernel.h>
#include <linux/sched.h>
#include <linux/kprobes.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/page-flags.h>
@@ -31,9 +31,8 @@
#include <linux/console.h>
#include <linux/pci.h>
#include <linux/gfp.h>
-#include <linux/memblock.h>
#include <linux/edd.h>
-#include <linux/frame.h>
+#include <linux/objtool.h>
#include <xen/xen.h>
#include <xen/events.h>
@@ -64,7 +63,6 @@
#include <asm/setup.h>
#include <asm/desc.h>
#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/reboot.h>
#include <asm/stackprotector.h>
@@ -73,6 +71,9 @@
#include <asm/mwait.h>
#include <asm/pci_x86.h>
#include <asm/cpu.h>
+#ifdef CONFIG_X86_IOPL_IOPERM
+#include <asm/io_bitmap.h>
+#endif
#ifdef CONFIG_ACPI
#include <linux/acpi.h>
@@ -344,15 +345,13 @@
pte_t *ptep;
pte_t pte;
unsigned long pfn;
- struct page *page;
unsigned char dummy;
+ void *va;
ptep = lookup_address((unsigned long)v, &level);
BUG_ON(ptep == NULL);
pfn = pte_pfn(*ptep);
- page = pfn_to_page(pfn);
-
pte = pfn_pte(pfn, prot);
/*
@@ -377,19 +376,15 @@
preempt_disable();
- probe_kernel_read(&dummy, v, 1);
+ copy_from_kernel_nofault(&dummy, v, 1);
if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
BUG();
- if (!PageHighMem(page)) {
- void *av = __va(PFN_PHYS(pfn));
+ va = __va(PFN_PHYS(pfn));
- if (av != v)
- if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0))
- BUG();
- } else
- kmap_flush_unused();
+ if (va != v && HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
+ BUG();
preempt_enable();
}
@@ -529,30 +524,12 @@
static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
{
/*
- * XXX sleazy hack: If we're being called in a lazy-cpu zone
- * and lazy gs handling is enabled, it means we're in a
- * context switch, and %gs has just been saved. This means we
- * can zero it out to prevent faults on exit from the
- * hypervisor if the next process has no %gs. Either way, it
- * has been saved, and the new value will get loaded properly.
- * This will go away as soon as Xen has been modified to not
- * save/restore %gs for normal hypercalls.
- *
- * On x86_64, this hack is not used for %gs, because gs points
- * to KERNEL_GS_BASE (and uses it for PDA references), so we
- * must not zero %gs on x86_64
- *
- * For x86_64, we need to zero %fs, otherwise we may get an
+ * In lazy mode we need to zero %fs, otherwise we may get an
* exception between the new %fs descriptor being loaded and
* %fs being effectively cleared at __switch_to().
*/
- if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
-#ifdef CONFIG_X86_32
- lazy_load_gs(0);
-#else
+ if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
loadsegment(fs, 0);
-#endif
- }
xen_mc_batch();
@@ -563,13 +540,11 @@
xen_mc_issue(PARAVIRT_LAZY_CPU);
}
-#ifdef CONFIG_X86_64
static void xen_load_gs_index(unsigned int idx)
{
if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
BUG();
}
-#endif
static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
const void *ptr)
@@ -588,51 +563,90 @@
preempt_enable();
}
-#ifdef CONFIG_X86_64
+void noist_exc_debug(struct pt_regs *regs);
+
+DEFINE_IDTENTRY_RAW(xenpv_exc_nmi)
+{
+ /* On Xen PV, NMI doesn't use IST. The C part is the sane as native. */
+ exc_nmi(regs);
+}
+
+DEFINE_IDTENTRY_RAW(xenpv_exc_debug)
+{
+ /*
+ * There's no IST on Xen PV, but we still need to dispatch
+ * to the correct handler.
+ */
+ if (user_mode(regs))
+ noist_exc_debug(regs);
+ else
+ exc_debug(regs);
+}
+
+DEFINE_IDTENTRY_RAW(exc_xen_unknown_trap)
+{
+ /* This should never happen and there is no way to handle it. */
+ instrumentation_begin();
+ pr_err("Unknown trap in Xen PV mode.");
+ BUG();
+ instrumentation_end();
+}
+
struct trap_array_entry {
void (*orig)(void);
void (*xen)(void);
bool ist_okay;
};
+#define TRAP_ENTRY(func, ist_ok) { \
+ .orig = asm_##func, \
+ .xen = xen_asm_##func, \
+ .ist_okay = ist_ok }
+
+#define TRAP_ENTRY_REDIR(func, ist_ok) { \
+ .orig = asm_##func, \
+ .xen = xen_asm_xenpv_##func, \
+ .ist_okay = ist_ok }
+
static struct trap_array_entry trap_array[] = {
- { debug, xen_xendebug, true },
- { double_fault, xen_double_fault, true },
+ TRAP_ENTRY_REDIR(exc_debug, true ),
+ TRAP_ENTRY(exc_double_fault, true ),
#ifdef CONFIG_X86_MCE
- { machine_check, xen_machine_check, true },
+ TRAP_ENTRY(exc_machine_check, true ),
#endif
- { nmi, xen_xennmi, true },
- { int3, xen_int3, false },
- { overflow, xen_overflow, false },
+ TRAP_ENTRY_REDIR(exc_nmi, true ),
+ TRAP_ENTRY(exc_int3, false ),
+ TRAP_ENTRY(exc_overflow, false ),
#ifdef CONFIG_IA32_EMULATION
{ entry_INT80_compat, xen_entry_INT80_compat, false },
#endif
- { page_fault, xen_page_fault, false },
- { divide_error, xen_divide_error, false },
- { bounds, xen_bounds, false },
- { invalid_op, xen_invalid_op, false },
- { device_not_available, xen_device_not_available, false },
- { coprocessor_segment_overrun, xen_coprocessor_segment_overrun, false },
- { invalid_TSS, xen_invalid_TSS, false },
- { segment_not_present, xen_segment_not_present, false },
- { stack_segment, xen_stack_segment, false },
- { general_protection, xen_general_protection, false },
- { spurious_interrupt_bug, xen_spurious_interrupt_bug, false },
- { coprocessor_error, xen_coprocessor_error, false },
- { alignment_check, xen_alignment_check, false },
- { simd_coprocessor_error, xen_simd_coprocessor_error, false },
+ TRAP_ENTRY(exc_page_fault, false ),
+ TRAP_ENTRY(exc_divide_error, false ),
+ TRAP_ENTRY(exc_bounds, false ),
+ TRAP_ENTRY(exc_invalid_op, false ),
+ TRAP_ENTRY(exc_device_not_available, false ),
+ TRAP_ENTRY(exc_coproc_segment_overrun, false ),
+ TRAP_ENTRY(exc_invalid_tss, false ),
+ TRAP_ENTRY(exc_segment_not_present, false ),
+ TRAP_ENTRY(exc_stack_segment, false ),
+ TRAP_ENTRY(exc_general_protection, false ),
+ TRAP_ENTRY(exc_spurious_interrupt_bug, false ),
+ TRAP_ENTRY(exc_coprocessor_error, false ),
+ TRAP_ENTRY(exc_alignment_check, false ),
+ TRAP_ENTRY(exc_simd_coprocessor_error, false ),
};
static bool __ref get_trap_addr(void **addr, unsigned int ist)
{
unsigned int nr;
bool ist_okay = false;
+ bool found = false;
/*
* Replace trap handler addresses by Xen specific ones.
* Check for known traps using IST and whitelist them.
* The debugger ones are the only ones we care about.
- * Xen will handle faults like double_fault, * so we should never see
+ * Xen will handle faults like double_fault, so we should never see
* them. Warn if there's an unexpected IST-using fault handler.
*/
for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++) {
@@ -641,6 +655,7 @@
if (*addr == entry->orig) {
*addr = entry->xen;
ist_okay = entry->ist_okay;
+ found = true;
break;
}
}
@@ -651,14 +666,17 @@
nr = (*addr - (void *)early_idt_handler_array[0]) /
EARLY_IDT_HANDLER_SIZE;
*addr = (void *)xen_early_idt_handler_array[nr];
+ found = true;
}
- if (WARN_ON(ist != 0 && !ist_okay))
+ if (!found)
+ *addr = (void *)xen_asm_exc_xen_unknown_trap;
+
+ if (WARN_ON(found && ist != 0 && !ist_okay))
return false;
return true;
}
-#endif
static int cvt_gate_to_trap(int vector, const gate_desc *val,
struct trap_info *info)
@@ -671,10 +689,8 @@
info->vector = vector;
addr = gate_offset(val);
-#ifdef CONFIG_X86_64
if (!get_trap_addr((void **)&addr, val->bits.ist))
return 0;
-#endif /* CONFIG_X86_64 */
info->address = addr;
info->cs = gate_segment(val);
@@ -752,6 +768,7 @@
{
static DEFINE_SPINLOCK(lock);
static struct trap_info traps[257];
+ static const struct trap_info zero = { };
unsigned out;
trace_xen_cpu_load_idt(desc);
@@ -761,7 +778,7 @@
memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc));
out = xen_convert_trap_info(desc, traps, false);
- memset(&traps[out], 0, sizeof(traps[0]));
+ traps[out] = zero;
xen_mc_flush();
if (HYPERVISOR_set_trap_table(traps))
@@ -833,14 +850,35 @@
this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
}
-void xen_set_iopl_mask(unsigned mask)
+#ifdef CONFIG_X86_IOPL_IOPERM
+static void xen_invalidate_io_bitmap(void)
{
- struct physdev_set_iopl set_iopl;
+ struct physdev_set_iobitmap iobitmap = {
+ .bitmap = NULL,
+ .nr_ports = 0,
+ };
- /* Force the change at ring 0. */
- set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
- HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
+ native_tss_invalidate_io_bitmap();
+ HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap);
}
+
+static void xen_update_io_bitmap(void)
+{
+ struct physdev_set_iobitmap iobitmap;
+ struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
+
+ native_tss_update_io_bitmap();
+
+ iobitmap.bitmap = (uint8_t *)(&tss->x86_tss) +
+ tss->x86_tss.io_bitmap_base;
+ if (tss->x86_tss.io_bitmap_base == IO_BITMAP_OFFSET_INVALID)
+ iobitmap.nr_ports = 0;
+ else
+ iobitmap.nr_ports = IO_BITMAP_BITS;
+
+ HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap);
+}
+#endif
static void xen_io_delay(void)
{
@@ -881,16 +919,6 @@
native_write_cr4(cr4);
}
-#ifdef CONFIG_X86_64
-static inline unsigned long xen_read_cr8(void)
-{
- return 0;
-}
-static inline void xen_write_cr8(unsigned long val)
-{
- BUG_ON(val);
-}
-#endif
static u64 xen_read_msr_safe(unsigned int msr, int *err)
{
@@ -911,15 +939,12 @@
static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
{
int ret;
-#ifdef CONFIG_X86_64
unsigned int which;
u64 base;
-#endif
ret = 0;
switch (msr) {
-#ifdef CONFIG_X86_64
case MSR_FS_BASE: which = SEGBASE_FS; goto set;
case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set;
case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set;
@@ -929,7 +954,6 @@
if (HYPERVISOR_set_segment_base(which, base) != 0)
ret = -EIO;
break;
-#endif
case MSR_STAR:
case MSR_CSTAR:
@@ -996,20 +1020,20 @@
* percpu area for all cpus, so make use of it.
*/
if (xen_have_vcpu_info_placement) {
- pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
- pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
- pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
- pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
- pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
+ pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
+ pv_ops.irq.restore_fl =
+ __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
+ pv_ops.irq.irq_disable =
+ __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
+ pv_ops.irq.irq_enable =
+ __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
+ pv_ops.mmu.read_cr2 =
+ __PV_IS_CALLEE_SAVE(xen_read_cr2_direct);
}
}
static const struct pv_info xen_info __initconst = {
- .shared_kernel_pmd = 0,
-
-#ifdef CONFIG_X86_64
.extra_user_64bit_cs = FLAT_USER_CS64,
-#endif
.name = "Xen",
};
@@ -1024,11 +1048,6 @@
.write_cr4 = xen_write_cr4,
-#ifdef CONFIG_X86_64
- .read_cr8 = xen_read_cr8,
- .write_cr8 = xen_write_cr8,
-#endif
-
.wbinvd = native_wbinvd,
.read_msr = xen_read_msr,
@@ -1040,18 +1059,14 @@
.read_pmc = xen_read_pmc,
.iret = xen_iret,
-#ifdef CONFIG_X86_64
.usergs_sysret64 = xen_sysret64,
-#endif
.load_tr_desc = paravirt_nop,
.set_ldt = xen_set_ldt,
.load_gdt = xen_load_gdt,
.load_idt = xen_load_idt,
.load_tls = xen_load_tls,
-#ifdef CONFIG_X86_64
.load_gs_index = xen_load_gs_index,
-#endif
.alloc_ldt = xen_alloc_ldt,
.free_ldt = xen_free_ldt,
@@ -1063,11 +1078,11 @@
.write_idt_entry = xen_write_idt_entry,
.load_sp0 = xen_load_sp0,
- .set_iopl_mask = xen_set_iopl_mask,
+#ifdef CONFIG_X86_IOPL_IOPERM
+ .invalidate_io_bitmap = xen_invalidate_io_bitmap,
+ .update_io_bitmap = xen_update_io_bitmap,
+#endif
.io_delay = xen_io_delay,
-
- /* Xen takes care of %gs when switching to usermode for us */
- .swapgs = paravirt_nop,
.start_context_switch = paravirt_start_context_switch,
.end_context_switch = xen_end_context_switch,
@@ -1175,14 +1190,14 @@
*/
static void __init xen_setup_gdt(int cpu)
{
- pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
- pv_cpu_ops.load_gdt = xen_load_gdt_boot;
+ pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot;
+ pv_ops.cpu.load_gdt = xen_load_gdt_boot;
setup_stack_canary_segment(cpu);
switch_to_new_gdt(cpu);
- pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry;
- pv_cpu_ops.load_gdt = xen_load_gdt;
+ pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry;
+ pv_ops.cpu.load_gdt = xen_load_gdt;
}
static void __init xen_dom0_set_legacy_features(void)
@@ -1212,8 +1227,8 @@
/* Install Xen paravirt ops */
pv_info = xen_info;
- pv_init_ops.patch = paravirt_patch_default;
- pv_cpu_ops = xen_cpu_ops;
+ pv_ops.init.patch = paravirt_patch_default;
+ pv_ops.cpu = xen_cpu_ops;
xen_init_irq_ops();
/*
@@ -1228,6 +1243,7 @@
x86_platform.get_nmi_reason = xen_get_nmi_reason;
x86_init.resources.memory_setup = xen_memory_setup;
+ x86_init.irqs.intr_mode_select = x86_init_noop;
x86_init.irqs.intr_mode_init = x86_init_noop;
x86_init.oem.arch_setup = xen_arch_setup;
x86_init.oem.banner = xen_banner;
@@ -1254,15 +1270,15 @@
/* Get mfn list */
xen_build_dynamic_phys_to_machine();
+ /* Work out if we support NX */
+ get_cpu_cap(&boot_cpu_data);
+ x86_configure_nx();
+
/*
* Set up kernel GDT and segment registers, mainly so that
* -fstack-protector code can be executed.
*/
xen_setup_gdt(0);
-
- /* Work out if we support NX */
- get_cpu_cap(&boot_cpu_data);
- x86_configure_nx();
/* Determine virtual and physical address sizes */
get_cpu_address_sizes(&boot_cpu_data);
@@ -1282,8 +1298,10 @@
#endif
if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
- pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
- pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
+ pv_ops.mmu.ptep_modify_prot_start =
+ xen_ptep_modify_prot_start;
+ pv_ops.mmu.ptep_modify_prot_commit =
+ xen_ptep_modify_prot_commit;
}
machine_ops = xen_machine_ops;
@@ -1303,7 +1321,7 @@
* any NUMA information the kernel tries to get from ACPI will
* be meaningless. Prevent it from trying.
*/
- acpi_numa = -1;
+ disable_srat();
#endif
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
@@ -1315,18 +1333,6 @@
xen_start_info->nr_pages);
xen_reserve_special_pages();
- /* keep using Xen gdt for now; no urgent need to change it */
-
-#ifdef CONFIG_X86_32
- pv_info.kernel_rpl = 1;
- if (xen_feature(XENFEAT_supervisor_mode_kernel))
- pv_info.kernel_rpl = 0;
-#else
- pv_info.kernel_rpl = 0;
-#endif
- /* set the limit of our address space */
- xen_reserve_top();
-
/*
* We used to do this in xen_arch_setup, but that is too late
* on AMD were early_cpu_init (run before ->arch_setup()) calls
@@ -1337,12 +1343,6 @@
if (rc != 0)
xen_raw_printk("physdev_op failed %d\n", rc);
-#ifdef CONFIG_X86_32
- /* set up basic CPUID stuff */
- cpu_detect(&new_cpu_data);
- set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
- new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
-#endif
if (xen_start_info->mod_start) {
if (xen_start_info->flags & SIF_MOD_START_PFN)
@@ -1388,10 +1388,6 @@
xen_acpi_sleep_register();
- /* Avoid searching for BIOS MP tables */
- x86_init.mpparse.find_smp_config = x86_init_noop;
- x86_init.mpparse.get_smp_config = x86_init_uint_noop;
-
xen_boot_params_init_edd();
#ifdef CONFIG_ACPI
@@ -1422,12 +1418,8 @@
xen_efi_init(&boot_params);
/* Start the world */
-#ifdef CONFIG_X86_32
- i386_start_kernel();
-#else
cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */
x86_64_start_reservations((char *)__pa_symbol(&boot_params));
-#endif
}
static int xen_cpu_up_prepare_pv(unsigned int cpu)
@@ -1479,4 +1471,5 @@
.detect = xen_platform_pv,
.type = X86_HYPER_XEN_PV,
.runtime.pin_vcpu = xen_pin_vcpu,
+ .ignore_nopv = true,
};
--
Gitblit v1.6.2