From 04dd17822334871b23ea2862f7798fb0e0007777 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Sat, 11 May 2024 08:53:19 +0000
Subject: [PATCH] change otg to host mode
---
kernel/arch/x86/include/asm/kvm_host.h | 702 +++++++++++++++++++++++++++++++++++++--------------------
1 files changed, 455 insertions(+), 247 deletions(-)
diff --git a/kernel/arch/x86/include/asm/kvm_host.h b/kernel/arch/x86/include/asm/kvm_host.h
index 98b7471..3e9f1c8 100644
--- a/kernel/arch/x86/include/asm/kvm_host.h
+++ b/kernel/arch/x86/include/asm/kvm_host.h
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Kernel-based Virtual Machine driver for Linux
*
* This header defines architecture specific interfaces, x86 version
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
*/
#ifndef _ASM_X86_KVM_HOST_H
@@ -35,7 +32,10 @@
#include <asm/msr-index.h>
#include <asm/asm.h>
#include <asm/kvm_page_track.h>
+#include <asm/kvm_vcpu_regs.h>
#include <asm/hyperv-tlfs.h>
+
+#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
#define KVM_MAX_VCPUS 288
#define KVM_SOFT_MAX_VCPUS 240
@@ -49,13 +49,16 @@
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
+#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
+ KVM_DIRTY_LOG_INITIALLY_SET)
+
/* x86-specific vcpu->requests bit members */
#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0)
#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1)
#define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2)
#define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3)
#define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4)
-#define KVM_REQ_LOAD_CR3 KVM_ARCH_REQ(5)
+#define KVM_REQ_LOAD_MMU_PGD KVM_ARCH_REQ(5)
#define KVM_REQ_EVENT KVM_ARCH_REQ(6)
#define KVM_REQ_APF_HALT KVM_ARCH_REQ(7)
#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8)
@@ -77,7 +80,14 @@
#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23)
-#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24)
+#define KVM_REQ_GET_NESTED_STATE_PAGES KVM_ARCH_REQ(24)
+#define KVM_REQ_APICV_UPDATE \
+ KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26)
+#define KVM_REQ_TLB_FLUSH_GUEST \
+ KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
+#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -102,7 +112,8 @@
#define UNMAPPED_GVA (~(gpa_t)0)
/* KVM Hugepage definitions for x86 */
-#define KVM_NR_PAGE_SIZES 3
+#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G
+#define KVM_NR_PAGE_SIZES (KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1)
#define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9)
#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
@@ -111,7 +122,7 @@
static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
{
- /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
+ /* KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K) must be 0. */
return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
}
@@ -122,40 +133,42 @@
#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
#define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
-#define KVM_MAX_CPUID_ENTRIES 80
+#define KVM_MAX_CPUID_ENTRIES 256
#define KVM_NR_FIXED_MTRR_REGION 88
#define KVM_NR_VAR_MTRR 8
#define ASYNC_PF_PER_VCPU 64
enum kvm_reg {
- VCPU_REGS_RAX = 0,
- VCPU_REGS_RCX = 1,
- VCPU_REGS_RDX = 2,
- VCPU_REGS_RBX = 3,
- VCPU_REGS_RSP = 4,
- VCPU_REGS_RBP = 5,
- VCPU_REGS_RSI = 6,
- VCPU_REGS_RDI = 7,
+ VCPU_REGS_RAX = __VCPU_REGS_RAX,
+ VCPU_REGS_RCX = __VCPU_REGS_RCX,
+ VCPU_REGS_RDX = __VCPU_REGS_RDX,
+ VCPU_REGS_RBX = __VCPU_REGS_RBX,
+ VCPU_REGS_RSP = __VCPU_REGS_RSP,
+ VCPU_REGS_RBP = __VCPU_REGS_RBP,
+ VCPU_REGS_RSI = __VCPU_REGS_RSI,
+ VCPU_REGS_RDI = __VCPU_REGS_RDI,
#ifdef CONFIG_X86_64
- VCPU_REGS_R8 = 8,
- VCPU_REGS_R9 = 9,
- VCPU_REGS_R10 = 10,
- VCPU_REGS_R11 = 11,
- VCPU_REGS_R12 = 12,
- VCPU_REGS_R13 = 13,
- VCPU_REGS_R14 = 14,
- VCPU_REGS_R15 = 15,
+ VCPU_REGS_R8 = __VCPU_REGS_R8,
+ VCPU_REGS_R9 = __VCPU_REGS_R9,
+ VCPU_REGS_R10 = __VCPU_REGS_R10,
+ VCPU_REGS_R11 = __VCPU_REGS_R11,
+ VCPU_REGS_R12 = __VCPU_REGS_R12,
+ VCPU_REGS_R13 = __VCPU_REGS_R13,
+ VCPU_REGS_R14 = __VCPU_REGS_R14,
+ VCPU_REGS_R15 = __VCPU_REGS_R15,
#endif
VCPU_REGS_RIP,
- NR_VCPU_REGS
-};
+ NR_VCPU_REGS,
-enum kvm_reg_ex {
VCPU_EXREG_PDPTR = NR_VCPU_REGS,
+ VCPU_EXREG_CR0,
VCPU_EXREG_CR3,
+ VCPU_EXREG_CR4,
VCPU_EXREG_RFLAGS,
VCPU_EXREG_SEGMENTS,
+ VCPU_EXREG_EXIT_INFO_1,
+ VCPU_EXREG_EXIT_INFO_2,
};
enum {
@@ -169,9 +182,17 @@
VCPU_SREG_LDTR,
};
-#include <asm/kvm_emulate.h>
+enum exit_fastpath_completion {
+ EXIT_FASTPATH_NONE,
+ EXIT_FASTPATH_REENTER_GUEST,
+ EXIT_FASTPATH_EXIT_HANDLED,
+};
+typedef enum exit_fastpath_completion fastpath_t;
-#define KVM_NR_MEM_OBJS 40
+struct x86_emulate_ctxt;
+struct x86_exception;
+enum x86_intercept;
+enum x86_intercept_stage;
#define KVM_NR_DB_REGS 4
@@ -211,13 +232,6 @@
PFERR_WRITE_MASK | \
PFERR_PRESENT_MASK)
-/*
- * The mask used to denote special SPTEs, which can be either MMIO SPTEs or
- * Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting
- * with the SVE bit in EPT PTEs.
- */
-#define SPTE_SPECIAL_MASK (1ULL << 62)
-
/* apic attention bits */
#define KVM_APIC_CHECK_VAPIC 0
/*
@@ -231,27 +245,18 @@
struct kvm_kernel_irq_routing_entry;
/*
- * We don't want allocation failures within the mmu code, so we preallocate
- * enough memory for a single page fault in a cache.
- */
-struct kvm_mmu_memory_cache {
- int nobjs;
- void *objects[KVM_NR_MEM_OBJS];
-};
-
-/*
* the pages used as guest page table on soft mmu are tracked by
* kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
* by indirect shadow page can not be more than 15 bits.
*
- * Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access,
+ * Currently, we used 14 bits that are @level, @gpte_is_8_bytes, @quadrant, @access,
* @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
*/
union kvm_mmu_page_role {
- unsigned word;
+ u32 word;
struct {
unsigned level:4;
- unsigned cr4_pae:1;
+ unsigned gpte_is_8_bytes:1;
unsigned quadrant:2;
unsigned direct:1;
unsigned access:3;
@@ -274,46 +279,38 @@
};
};
-struct kvm_rmap_head {
- unsigned long val;
+union kvm_mmu_extended_role {
+/*
+ * This structure complements kvm_mmu_page_role caching everything needed for
+ * MMU configuration. If nothing in both these structures changed, MMU
+ * re-configuration can be skipped. @valid bit is set on first usage so we don't
+ * treat all-zero structure as valid data.
+ */
+ u32 word;
+ struct {
+ unsigned int valid:1;
+ unsigned int execonly:1;
+ unsigned int cr0_pg:1;
+ unsigned int cr4_pae:1;
+ unsigned int cr4_pse:1;
+ unsigned int cr4_pke:1;
+ unsigned int cr4_smap:1;
+ unsigned int cr4_smep:1;
+ unsigned int cr4_la57:1;
+ unsigned int maxphyaddr:6;
+ };
};
-struct kvm_mmu_page {
- struct list_head link;
- struct hlist_node hash_link;
- struct list_head lpage_disallowed_link;
+union kvm_mmu_role {
+ u64 as_u64;
+ struct {
+ union kvm_mmu_page_role base;
+ union kvm_mmu_extended_role ext;
+ };
+};
- /*
- * The following two entries are used to key the shadow page in the
- * hash table.
- */
- gfn_t gfn;
- union kvm_mmu_page_role role;
-
- u64 *spt;
- /* hold the gfn of each spte inside spt */
- gfn_t *gfns;
- bool unsync;
- bool lpage_disallowed; /* Can't be replaced by an equiv large page */
- int root_count; /* Currently serving as active root */
- unsigned int unsync_children;
- struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
-
- /* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen. */
- unsigned long mmu_valid_gen;
-
- DECLARE_BITMAP(unsync_child_bitmap, 512);
-
-#ifdef CONFIG_X86_32
- /*
- * Used out of the mmu-lock to avoid reading spte values while an
- * update is in progress; see the comments in __get_spte_lockless().
- */
- int clear_spte_count;
-#endif
-
- /* Number of writes since the last time traversal visited this page. */
- atomic_t write_flooding_count;
+struct kvm_rmap_head {
+ unsigned long val;
};
struct kvm_pio_request {
@@ -332,14 +329,16 @@
};
struct kvm_mmu_root_info {
- gpa_t cr3;
+ gpa_t pgd;
hpa_t hpa;
};
#define KVM_MMU_ROOT_INFO_INVALID \
- ((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
+ ((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE })
#define KVM_MMU_NUM_PREV_ROOTS 3
+
+struct kvm_mmu_page;
/*
* x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
@@ -347,8 +346,7 @@
* current mmu mode.
*/
struct kvm_mmu {
- void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
- unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
+ unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu);
u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err,
bool prefault);
@@ -361,10 +359,9 @@
int (*sync_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp);
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
- void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
- u64 *spte, const void *pte);
hpa_t root_hpa;
- union kvm_mmu_page_role base_role;
+ gpa_t root_pgd;
+ union kvm_mmu_role mmu_role;
u8 root_level;
u8 shadow_root_level;
u8 ept_ad;
@@ -406,6 +403,11 @@
u64 pdptrs[4]; /* pae */
};
+struct kvm_tlb_range {
+ u64 start_gfn;
+ u64 pages;
+};
+
enum pmc_type {
KVM_PMC_GP = 0,
KVM_PMC_FIXED,
@@ -418,6 +420,11 @@
u64 eventsel;
struct perf_event *perf_event;
struct kvm_vcpu *vcpu;
+ /*
+ * eventsel value for general purpose counters,
+ * ctrl value for fixed counters.
+ */
+ u64 current_config;
};
struct kvm_pmu {
@@ -425,17 +432,34 @@
unsigned nr_arch_fixed_counters;
unsigned available_event_types;
u64 fixed_ctr_ctrl;
+ u64 fixed_ctr_ctrl_mask;
u64 global_ctrl;
u64 global_status;
u64 global_ovf_ctrl;
u64 counter_bitmask[2];
u64 global_ctrl_mask;
+ u64 global_ovf_ctrl_mask;
u64 reserved_bits;
+ u64 raw_event_mask;
u8 version;
struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
struct irq_work irq_work;
- u64 reprogram_pmi;
+ DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
+ DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
+ DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
+
+ /*
+ * The gate to release perf_events not marked in
+ * pmc_in_use only once in a vcpu time slice.
+ */
+ bool need_cleanup;
+
+ /*
+ * The total number of programmed perf_events and it helps to avoid
+ * redundant check before cleanup if guest don't use vPMU at all.
+ */
+ u8 event_count;
};
struct kvm_pmu_ops;
@@ -464,7 +488,7 @@
struct kvm_vcpu_hv_stimer {
struct hrtimer timer;
int index;
- u64 config;
+ union hv_stimer_config config;
u64 count;
u64 exp_time;
struct hv_message msg;
@@ -494,7 +518,7 @@
struct kvm_hyperv_exit exit;
struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
- cpumask_t tlb_lush;
+ cpumask_t tlb_flush;
};
struct kvm_vcpu_arch {
@@ -512,7 +536,9 @@
unsigned long cr3;
unsigned long cr4;
unsigned long cr4_guest_owned_bits;
+ unsigned long cr4_guest_rsvd_bits;
unsigned long cr8;
+ u32 host_pkru;
u32 pkru;
u32 hflags;
u64 efer;
@@ -527,10 +553,13 @@
u64 ia32_misc_enable_msr;
u64 smbase;
u64 smi_count;
+ bool at_instruction_boundary;
bool tpr_access_reporting;
+ bool xsaves_enabled;
u64 ia32_xss;
u64 microcode_version;
u64 arch_capabilities;
+ u64 perf_capabilities;
/*
* Paging state of the vcpu
@@ -539,13 +568,19 @@
* the paging mode of the l1 guest. This context is always used to
* handle faults.
*/
- struct kvm_mmu mmu;
+ struct kvm_mmu *mmu;
+
+ /* Non-nested MMU for L1 */
+ struct kvm_mmu root_mmu;
+
+ /* L1 MMU when running nested */
+ struct kvm_mmu guest_mmu;
/*
* Paging state of an L2 guest (used for nested npt)
*
* This context will save all necessary information to walk page tables
- * of the an L2 guest. This context is only initialized for page table
+ * of an L2 guest. This context is only initialized for page table
* walking and not for faulting since we never handle l2 page faults on
* the host.
*/
@@ -558,7 +593,8 @@
struct kvm_mmu *walk_mmu;
struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
- struct kvm_mmu_memory_cache mmu_page_cache;
+ struct kvm_mmu_memory_cache mmu_shadow_page_cache;
+ struct kvm_mmu_memory_cache mmu_gfn_array_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
/*
@@ -572,12 +608,11 @@
* "guest_fpu" state here contains the guest FPU context, with the
* host PRKU bits.
*/
- struct fpu user_fpu;
- struct fpu guest_fpu;
+ struct fpu *user_fpu;
+ struct fpu *guest_fpu;
u64 xcr0;
u64 guest_supported_xcr0;
- u32 guest_xstate_size;
struct kvm_pio_request pio;
void *pio_data;
@@ -590,6 +625,8 @@
bool has_error_code;
u8 nr;
u32 error_code;
+ unsigned long payload;
+ bool has_payload;
u8 nested_apf;
} exception;
@@ -602,13 +639,15 @@
int halt_request; /* real mode on Intel only */
int cpuid_nent;
- struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+ struct kvm_cpuid_entry2 *cpuid_entries;
+ unsigned long cr3_lm_rsvd_bits;
int maxphyaddr;
+ int max_tdp_level;
/* emulate context */
- struct x86_emulate_ctxt emulate_ctxt;
+ struct x86_emulate_ctxt *emulate_ctxt;
bool emulate_regs_need_sync_to_vcpu;
bool emulate_regs_need_sync_from_vcpu;
int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
@@ -625,9 +664,10 @@
u8 preempted;
u64 msr_val;
u64 last_steal;
- struct gfn_to_pfn_cache cache;
+ struct gfn_to_hva_cache cache;
} st;
+ u64 l1_tsc_offset;
u64 tsc_offset;
u64 last_guest_tsc;
u64 last_host_tsc;
@@ -641,6 +681,7 @@
u32 virtual_tsc_mult;
u32 virtual_tsc_khz;
s64 ia32_tsc_adjust_msr;
+ u64 msr_ia32_power_ctl;
u64 tsc_scaling_ratio;
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
@@ -668,7 +709,7 @@
/* Cache MMIO info */
u64 mmio_gva;
- unsigned access;
+ unsigned mmio_access;
gfn_t mmio_gfn;
u64 mmio_gen;
@@ -686,14 +727,17 @@
struct {
bool halted;
- gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
+ gfn_t gfns[ASYNC_PF_PER_VCPU];
struct gfn_to_hva_cache data;
- u64 msr_val;
+ u64 msr_en_val; /* MSR_KVM_ASYNC_PF_EN */
+ u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */
+ u16 vec;
u32 id;
bool send_user_only;
- u32 host_apf_reason;
+ u32 host_apf_flags;
unsigned long nested_apf_token;
bool delivery_as_pf_vmexit;
+ bool pageready_pending;
} apf;
/* OSVW MSRs (AMD only) */
@@ -707,10 +751,22 @@
struct gfn_to_hva_cache data;
} pv_eoi;
+ u64 msr_kvm_poll_control;
+
/*
- * Indicate whether the access faults on its page table in guest
- * which is set when fix page fault and used to detect unhandeable
- * instruction.
+ * Indicates the guest is trying to write a gfn that contains one or
+ * more of the PTEs used to translate the write itself, i.e. the access
+ * is changing its own translation in the guest page tables. KVM exits
+ * to userspace if emulation of the faulting instruction fails and this
+ * flag is set, as KVM cannot make forward progress.
+ *
+ * If emulation fails for a write to guest page tables, KVM unprotects
+ * (zaps) the shadow page for the target gfn and resumes the guest to
+ * retry the non-emulatable instruction (on hardware). Unprotecting the
+ * gfn doesn't allow forward progress for a self-changing access because
+ * doing so also zaps the translation for the gfn, i.e. retrying the
+ * instruction will hit a !PRESENT fault, which results in a new shadow
+ * page and sends KVM back to square one.
*/
bool write_fault_to_shadow_pgtable;
@@ -725,15 +781,32 @@
int pending_ioapic_eoi;
int pending_external_vector;
- /* GPA available */
- bool gpa_available;
- gpa_t gpa_val;
-
/* be preempted when it's in kernel-mode(cpl=0) */
bool preempted_in_kernel;
/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
bool l1tf_flush_l1d;
+
+ /* Host CPU on which VM-entry was most recently attempted */
+ unsigned int last_vmentry_cpu;
+
+ /* AMD MSRC001_0015 Hardware Configuration */
+ u64 msr_hwcr;
+
+ /* pv related cpuid info */
+ struct {
+ /*
+ * value of the eax register in the KVM_CPUID_FEATURES CPUID
+ * leaf.
+ */
+ u32 features;
+
+ /*
+ * indicates whether pv emulation should be disabled if features
+ * are not present in the guest's cpuid
+ */
+ bool enforce;
+ } pv_cpuid;
};
struct kvm_lpage_info {
@@ -768,6 +841,18 @@
struct kvm_lapic *phys_map[];
};
+/* Hyper-V synthetic debugger (SynDbg)*/
+struct kvm_hv_syndbg {
+ struct {
+ u64 control;
+ u64 status;
+ u64 send_page;
+ u64 recv_page;
+ u64 pending_page;
+ } control;
+ u64 options;
+};
+
/* Hyper-V emulation context */
struct kvm_hv {
struct mutex hv_lock;
@@ -779,7 +864,7 @@
u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
u64 hv_crash_ctl;
- HV_REFERENCE_TSC_PAGE tsc_ref;
+ struct ms_hyperv_tsc_page tsc_ref;
struct idr conn_to_evt;
@@ -789,6 +874,16 @@
/* How many vCPUs have VP index != vCPU index */
atomic_t num_mismatched_vp_indexes;
+
+ struct hv_partition_assist_pg *hv_pa_pg;
+ struct kvm_hv_syndbg hv_syndbg;
+};
+
+struct msr_bitmap_range {
+ u32 flags;
+ u32 nmsrs;
+ u32 base;
+ unsigned long *bitmap;
};
enum kvm_irqchip_mode {
@@ -797,12 +892,25 @@
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
};
+struct kvm_x86_msr_filter {
+ u8 count;
+ bool default_allow:1;
+ struct msr_bitmap_range ranges[16];
+};
+
+#define APICV_INHIBIT_REASON_DISABLE 0
+#define APICV_INHIBIT_REASON_HYPERV 1
+#define APICV_INHIBIT_REASON_NESTED 2
+#define APICV_INHIBIT_REASON_IRQWIN 3
+#define APICV_INHIBIT_REASON_PIT_REINJ 4
+#define APICV_INHIBIT_REASON_X2APIC 5
+
struct kvm_arch {
unsigned long n_used_mmu_pages;
unsigned long n_requested_mmu_pages;
unsigned long n_max_mmu_pages;
unsigned int indirect_shadow_pages;
- unsigned long mmu_valid_gen;
+ u8 mmu_valid_gen;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
* Hash table of struct kvm_mmu_page.
@@ -826,14 +934,17 @@
atomic_t vapics_in_nmi_mode;
struct mutex apic_map_lock;
struct kvm_apic_map *apic_map;
+ atomic_t apic_map_dirty;
bool apic_access_page_done;
+ unsigned long apicv_inhibit_reasons;
gpa_t wall_clock;
bool mwait_in_guest;
bool hlt_in_guest;
bool pause_in_guest;
+ bool cstate_in_guest;
unsigned long irq_sources_bitmap;
s64 kvmclock_offset;
@@ -880,14 +991,36 @@
bool x2apic_broadcast_quirk_disabled;
bool guest_can_read_msr_platform_info;
+ bool exception_payload_enabled;
+ bool bus_lock_detection_enabled;
+
+ /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
+ u32 user_space_msr_mask;
+
+ struct kvm_x86_msr_filter __rcu *msr_filter;
+
+ struct kvm_pmu_event_filter *pmu_event_filter;
struct task_struct *nx_lpage_recovery_thread;
+
+ /*
+ * Whether the TDP MMU is enabled for this VM. This contains a
+ * snapshot of the TDP MMU module parameter from when the VM was
+ * created and remains unchanged for the life of the VM. If this is
+ * true, TDP MMU handler functions will run for various MMU
+ * operations.
+ */
+ bool tdp_mmu_enabled;
+
+ /* List of struct tdp_mmu_pages being used as roots */
+ struct list_head tdp_mmu_roots;
+ /* List of struct tdp_mmu_pages not being used as roots */
+ struct list_head tdp_mmu_pages;
};
struct kvm_vm_stat {
ulong mmu_shadow_zapped;
ulong mmu_pte_write;
- ulong mmu_pte_updated;
ulong mmu_pde_zapped;
ulong mmu_flooded;
ulong mmu_recycled;
@@ -927,6 +1060,10 @@
u64 irq_injections;
u64 nmi_injections;
u64 req_event;
+ u64 halt_poll_success_ns;
+ u64 halt_poll_fail_ns;
+ u64 preemption_reported;
+ u64 preemption_other;
};
struct x86_instruction_info;
@@ -948,25 +1085,25 @@
bool msi_redir_hint;
};
+static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
+{
+ return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
+}
+
struct kvm_x86_ops {
- int (*cpu_has_kvm_support)(void); /* __init */
- int (*disabled_by_bios)(void); /* __init */
int (*hardware_enable)(void);
void (*hardware_disable)(void);
- void (*check_processor_compatibility)(void *rtn);
- int (*hardware_setup)(void); /* __init */
- void (*hardware_unsetup)(void); /* __exit */
+ void (*hardware_unsetup)(void);
bool (*cpu_has_accelerated_tpr)(void);
- bool (*has_emulated_msr)(int index);
- void (*cpuid_update)(struct kvm_vcpu *vcpu);
+ bool (*has_emulated_msr)(u32 index);
+ void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
- struct kvm *(*vm_alloc)(void);
- void (*vm_free)(struct kvm *);
+ unsigned int vm_size;
int (*vm_init)(struct kvm *kvm);
void (*vm_destroy)(struct kvm *kvm);
/* Create, but do not attach this VCPU */
- struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
+ int (*vcpu_create)(struct kvm_vcpu *vcpu);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
@@ -974,7 +1111,7 @@
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
- void (*update_bp_intercept)(struct kvm_vcpu *vcpu);
+ void (*update_exception_bitmap)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
@@ -984,27 +1121,25 @@
void (*set_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
- void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
- void (*decache_cr3)(struct kvm_vcpu *vcpu);
- void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
- void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
- int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
- void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
+ bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
+ void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
+ int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
- u64 (*get_dr6)(struct kvm_vcpu *vcpu);
- void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
- void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
+ void (*tlb_flush_all)(struct kvm_vcpu *vcpu);
+ void (*tlb_flush_current)(struct kvm_vcpu *vcpu);
int (*tlb_remote_flush)(struct kvm *kvm);
+ int (*tlb_remote_flush_with_range)(struct kvm *kvm,
+ struct kvm_tlb_range *range);
/*
* Flush any TLB entries associated with the given GVA.
@@ -1014,9 +1149,17 @@
*/
void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
- void (*run)(struct kvm_vcpu *vcpu);
- int (*handle_exit)(struct kvm_vcpu *vcpu);
- void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+ /*
+ * Flush any TLB entries created by the guest. Like tlb_flush_gva(),
+ * does not need to flush GPA->HPA mappings.
+ */
+ void (*tlb_flush_guest)(struct kvm_vcpu *vcpu);
+
+ enum exit_fastpath_completion (*run)(struct kvm_vcpu *vcpu);
+ int (*handle_exit)(struct kvm_vcpu *vcpu,
+ enum exit_fastpath_completion exit_fastpath);
+ int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+ void (*update_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
void (*patch_hypercall)(struct kvm_vcpu *vcpu,
@@ -1025,52 +1168,49 @@
void (*set_nmi)(struct kvm_vcpu *vcpu);
void (*queue_exception)(struct kvm_vcpu *vcpu);
void (*cancel_injection)(struct kvm_vcpu *vcpu);
- int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
- int (*nmi_allowed)(struct kvm_vcpu *vcpu);
+ int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
+ int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
- bool (*get_enable_apicv)(struct kvm_vcpu *vcpu);
+ bool (*check_apicv_inhibit_reasons)(ulong bit);
+ void (*pre_update_apicv_exec_ctrl)(struct kvm *kvm, bool activate);
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
- void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
+ void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
- int (*get_tdp_level)(struct kvm_vcpu *vcpu);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
- int (*get_lpage_level)(void);
- bool (*rdtscp_supported)(void);
- bool (*invpcid_supported)(void);
- void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
-
- void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry);
+ void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, unsigned long pgd,
+ int pgd_level);
bool (*has_wbinvd_exit)(void);
- u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu);
/* Returns actual tsc_offset set in active VMCS */
u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
- void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
+ /*
+ * Retrieve somewhat arbitrary exit information. Intended to be used
+ * only from within tracepoints to avoid VMREADs when tracing is off.
+ */
+ void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
+ u32 *exit_int_info, u32 *exit_int_info_err_code);
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
- enum x86_intercept_stage stage);
- void (*handle_external_intr)(struct kvm_vcpu *vcpu);
- bool (*mpx_supported)(void);
- bool (*xsaves_supported)(void);
- bool (*umip_emulated)(void);
+ enum x86_intercept_stage stage,
+ struct x86_exception *exception);
+ void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
- int (*check_nested_events)(struct kvm_vcpu *vcpu);
void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
@@ -1099,10 +1239,10 @@
void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t offset, unsigned long mask);
- int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
+ const struct kvm_x86_nested_ops *nested_ops;
/*
* Architecture specific hooks for vCPU blocking due to
@@ -1124,29 +1264,59 @@
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
- int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc);
+ int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
+ bool *expired);
void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
void (*setup_mce)(struct kvm_vcpu *vcpu);
- int (*get_nested_state)(struct kvm_vcpu *vcpu,
- struct kvm_nested_state __user *user_kvm_nested_state,
- unsigned user_data_size);
- int (*set_nested_state)(struct kvm_vcpu *vcpu,
- struct kvm_nested_state __user *user_kvm_nested_state,
- struct kvm_nested_state *kvm_state);
- void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
-
- int (*smi_allowed)(struct kvm_vcpu *vcpu);
+ int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
- int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
- int (*enable_smi_window)(struct kvm_vcpu *vcpu);
+ int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
+ void (*enable_smi_window)(struct kvm_vcpu *vcpu);
int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+ void (*guest_memory_reclaimed)(struct kvm *kvm);
int (*get_msr_feature)(struct kvm_msr_entry *entry);
+
+ bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, void *insn, int insn_len);
+
+ bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
+ int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
+
+ void (*migrate_timers)(struct kvm_vcpu *vcpu);
+ void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
+};
+
+struct kvm_x86_nested_ops {
+ void (*leave_nested)(struct kvm_vcpu *vcpu);
+ int (*check_events)(struct kvm_vcpu *vcpu);
+ bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
+ int (*get_state)(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state __user *user_kvm_nested_state,
+ unsigned user_data_size);
+ int (*set_state)(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state __user *user_kvm_nested_state,
+ struct kvm_nested_state *kvm_state);
+ bool (*get_nested_state_pages)(struct kvm_vcpu *vcpu);
+ int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
+
+ int (*enable_evmcs)(struct kvm_vcpu *vcpu,
+ uint16_t *vmcs_version);
+ uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu);
+};
+
+struct kvm_x86_init_ops {
+ int (*cpu_has_kvm_support)(void);
+ int (*disabled_by_bios)(void);
+ int (*check_processor_compatibility)(void);
+ int (*hardware_setup)(void);
+ bool (*intel_pt_intr_in_guest)(void);
+
+ struct kvm_x86_ops *runtime_ops;
};
struct kvm_arch_async_pf {
@@ -1156,35 +1326,33 @@
bool direct_map;
};
-extern struct kvm_x86_ops *kvm_x86_ops;
+extern u64 __read_mostly host_efer;
+extern bool __read_mostly allow_smaller_maxphyaddr;
+extern struct kvm_x86_ops kvm_x86_ops;
#define __KVM_HAVE_ARCH_VM_ALLOC
static inline struct kvm *kvm_arch_alloc_vm(void)
{
- return kvm_x86_ops->vm_alloc();
+ return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
}
-
-static inline void kvm_arch_free_vm(struct kvm *kvm)
-{
- return kvm_x86_ops->vm_free(kvm);
-}
+void kvm_arch_free_vm(struct kvm *kvm);
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
{
- if (kvm_x86_ops->tlb_remote_flush &&
- !kvm_x86_ops->tlb_remote_flush(kvm))
+ if (kvm_x86_ops.tlb_remote_flush &&
+ !kvm_x86_ops.tlb_remote_flush(kvm))
return 0;
else
return -ENOTSUPP;
}
-int kvm_mmu_module_init(void);
-void kvm_mmu_module_exit(void);
+void __init kvm_mmu_x86_module_init(void);
+int kvm_mmu_vendor_module_init(void);
+void kvm_mmu_vendor_module_exit(void);
void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu);
-void kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_init_vm(struct kvm *kvm);
void kvm_mmu_uninit_vm(struct kvm *kvm);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
@@ -1193,7 +1361,8 @@
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
- struct kvm_memory_slot *memslot);
+ struct kvm_memory_slot *memslot,
+ int start_level);
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *memslot);
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
@@ -1207,7 +1376,7 @@
gfn_t gfn_offset, unsigned long mask);
void kvm_mmu_zap_all(struct kvm *kvm);
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
-unsigned long kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
+unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
@@ -1246,28 +1415,59 @@
extern u64 kvm_mce_cap_supported;
-enum emulation_result {
- EMULATE_DONE, /* no further processing */
- EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */
- EMULATE_FAIL, /* can't emulate this instruction */
-};
-
+/*
+ * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
+ * userspace I/O) to indicate that the emulation context
+ * should be resued as is, i.e. skip initialization of
+ * emulation context, instruction fetch and decode.
+ *
+ * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware.
+ * Indicates that only select instructions (tagged with
+ * EmulateOnUD) should be emulated (to minimize the emulator
+ * attack surface). See also EMULTYPE_TRAP_UD_FORCED.
+ *
+ * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
+ * decode the instruction length. For use *only* by
+ * kvm_x86_ops.skip_emulated_instruction() implementations.
+ *
+ * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to
+ * retry native execution under certain conditions,
+ * Can only be set in conjunction with EMULTYPE_PF.
+ *
+ * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was
+ * triggered by KVM's magic "force emulation" prefix,
+ * which is opt in via module param (off by default).
+ * Bypasses EmulateOnUD restriction despite emulating
+ * due to an intercepted #UD (see EMULTYPE_TRAP_UD).
+ * Used to test the full emulator from userspace.
+ *
+ * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware
+ * backdoor emulation, which is opt in via module param.
+ * VMware backoor emulation handles select instructions
+ * and reinjects the #GP for all other cases.
+ *
+ * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which
+ * case the CR2/GPA value pass on the stack is valid.
+ */
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
-#define EMULTYPE_ALLOW_RETRY (1 << 3)
-#define EMULTYPE_NO_UD_ON_FAIL (1 << 4)
-#define EMULTYPE_VMWARE (1 << 5)
+#define EMULTYPE_ALLOW_RETRY_PF (1 << 3)
+#define EMULTYPE_TRAP_UD_FORCED (1 << 4)
+#define EMULTYPE_VMWARE_GP (1 << 5)
+#define EMULTYPE_PF (1 << 6)
+
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
void *insn, int insn_len);
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
-int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
-int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
-
-struct x86_emulate_ctxt;
+int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated);
+int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
+int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
+int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
@@ -1302,9 +1502,12 @@
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
+void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload);
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
+bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
+ struct x86_exception *fault);
int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
gfn_t gfn, void *data, int offset, int len,
u32 access);
@@ -1332,13 +1535,16 @@
void kvm_inject_nmi(struct kvm_vcpu *vcpu);
+void kvm_update_dr7(struct kvm_vcpu *vcpu);
+
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free);
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ ulong roots_to_free);
gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
@@ -1350,31 +1556,25 @@
gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception);
-void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
+bool kvm_apicv_activated(struct kvm *kvm);
+void kvm_apicv_init(struct kvm *kvm, bool enable);
+void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
+void kvm_request_apicv_update(struct kvm *kvm, bool activate,
+ unsigned long bit);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
+void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ gva_t gva, hpa_t root_hpa);
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
-void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush);
+void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush,
+ bool skip_mmu_sync);
-void kvm_enable_tdp(void);
-void kvm_disable_tdp(void);
-
-static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
- struct x86_exception *exception)
-{
- return gpa;
-}
-
-static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
-{
- struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
-
- return (struct kvm_mmu_page *)page_private(page);
-}
+void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
+ int tdp_huge_page_level);
static inline u16 kvm_read_ldt(void)
{
@@ -1423,8 +1623,6 @@
};
#define HF_GIF_MASK (1 << 0)
-#define HF_HIF_MASK (1 << 1)
-#define HF_VINTR_MASK (1 << 2)
#define HF_NMI_MASK (1 << 3)
#define HF_IRET_MASK (1 << 4)
#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */
@@ -1437,7 +1635,7 @@
#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
-asmlinkage void __noreturn kvm_spurious_fault(void);
+asmlinkage void kvm_spurious_fault(void);
/*
* Hardware virtualization extension instructions may fault if a
@@ -1445,31 +1643,29 @@
* Usually after catching the fault we just panic; during reboot
* instead the instruction is ignored.
*/
-#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
+#define __kvm_handle_fault_on_reboot(insn) \
"666: \n\t" \
insn "\n\t" \
"jmp 668f \n\t" \
"667: \n\t" \
- "call kvm_spurious_fault \n\t" \
- "668: \n\t" \
- ".pushsection .fixup, \"ax\" \n\t" \
- "700: \n\t" \
- cleanup_insn "\n\t" \
- "cmpb $0, kvm_rebooting\n\t" \
- "je 667b \n\t" \
- "jmp 668b \n\t" \
+ "1: \n\t" \
+ ".pushsection .discard.instr_begin \n\t" \
+ ".long 1b - . \n\t" \
".popsection \n\t" \
- _ASM_EXTABLE(666b, 700b)
-
-#define __kvm_handle_fault_on_reboot(insn) \
- ____kvm_handle_fault_on_reboot(insn, "")
+ "call kvm_spurious_fault \n\t" \
+ "1: \n\t" \
+ ".pushsection .discard.instr_end \n\t" \
+ ".long 1b - . \n\t" \
+ ".popsection \n\t" \
+ "668: \n\t" \
+ _ASM_EXTABLE(666b, 667b)
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
- bool blockable);
+ unsigned flags);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_cpu_has_extint(struct kvm_vcpu *v);
@@ -1482,9 +1678,9 @@
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit);
-u64 kvm_get_arch_capabilities(void);
-void kvm_define_shared_msr(unsigned index, u32 msr);
-int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
+void kvm_define_user_return_msr(unsigned index, u32 msr);
+int kvm_probe_user_return_msr(u32 msr);
+int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
@@ -1494,14 +1690,17 @@
void kvm_make_mclock_inprogress_request(struct kvm *kvm);
void kvm_make_scan_ioapic_request(struct kvm *kvm);
+void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
+ unsigned long *vcpu_bitmap);
-void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
-bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu);
+bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu);
extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
@@ -1511,7 +1710,6 @@
int kvm_is_in_guest(void);
int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
-int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
@@ -1521,16 +1719,23 @@
void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
struct kvm_lapic_irq *irq);
+static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
+{
+ /* We can only post Fixed and LowPrio IRQs */
+ return (irq->delivery_mode == APIC_DM_FIXED ||
+ irq->delivery_mode == APIC_DM_LOWEST);
+}
+
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
- if (kvm_x86_ops->vcpu_blocking)
- kvm_x86_ops->vcpu_blocking(vcpu);
+ if (kvm_x86_ops.vcpu_blocking)
+ kvm_x86_ops.vcpu_blocking(vcpu);
}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
- if (kvm_x86_ops->vcpu_unblocking)
- kvm_x86_ops->vcpu_unblocking(vcpu);
+ if (kvm_x86_ops.vcpu_unblocking)
+ kvm_x86_ops.vcpu_unblocking(vcpu);
}
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
@@ -1548,4 +1753,7 @@
#define put_smstate(type, buf, offset, val) \
*(type *)((buf) + (offset) - 0x7e00) = val
+#define GET_SMSTATE(type, buf, offset) \
+ (*(type *)((buf) + (offset) - 0x7e00))
+
#endif /* _ASM_X86_KVM_HOST_H */
--
Gitblit v1.6.2