hc
2024-10-12 a5969cabbb4660eab42b6ef0412cbbd1200cf14d
kernel/arch/x86/include/asm/kvm_host.h
....@@ -1,11 +1,8 @@
1
+/* SPDX-License-Identifier: GPL-2.0-only */
12 /*
23 * Kernel-based Virtual Machine driver for Linux
34 *
45 * This header defines architecture specific interfaces, x86 version
5
- *
6
- * This work is licensed under the terms of the GNU GPL, version 2. See
7
- * the COPYING file in the top-level directory.
8
- *
96 */
107
118 #ifndef _ASM_X86_KVM_HOST_H
....@@ -35,7 +32,10 @@
3532 #include <asm/msr-index.h>
3633 #include <asm/asm.h>
3734 #include <asm/kvm_page_track.h>
35
+#include <asm/kvm_vcpu_regs.h>
3836 #include <asm/hyperv-tlfs.h>
37
+
38
+#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
3939
4040 #define KVM_MAX_VCPUS 288
4141 #define KVM_SOFT_MAX_VCPUS 240
....@@ -49,13 +49,16 @@
4949
5050 #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
5151
52
+#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
53
+ KVM_DIRTY_LOG_INITIALLY_SET)
54
+
5255 /* x86-specific vcpu->requests bit members */
5356 #define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0)
5457 #define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1)
5558 #define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2)
5659 #define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3)
5760 #define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4)
58
-#define KVM_REQ_LOAD_CR3 KVM_ARCH_REQ(5)
61
+#define KVM_REQ_LOAD_MMU_PGD KVM_ARCH_REQ(5)
5962 #define KVM_REQ_EVENT KVM_ARCH_REQ(6)
6063 #define KVM_REQ_APF_HALT KVM_ARCH_REQ(7)
6164 #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8)
....@@ -77,7 +80,14 @@
7780 #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
7881 #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
7982 #define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23)
80
-#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24)
83
+#define KVM_REQ_GET_NESTED_STATE_PAGES KVM_ARCH_REQ(24)
84
+#define KVM_REQ_APICV_UPDATE \
85
+ KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
86
+#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26)
87
+#define KVM_REQ_TLB_FLUSH_GUEST \
88
+ KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
89
+#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
90
+#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
8191
8292 #define CR0_RESERVED_BITS \
8393 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
....@@ -102,7 +112,8 @@
102112 #define UNMAPPED_GVA (~(gpa_t)0)
103113
104114 /* KVM Hugepage definitions for x86 */
105
-#define KVM_NR_PAGE_SIZES 3
115
+#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G
116
+#define KVM_NR_PAGE_SIZES (KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1)
106117 #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9)
107118 #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
108119 #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
....@@ -111,7 +122,7 @@
111122
112123 static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
113124 {
114
- /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
125
+ /* KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K) must be 0. */
115126 return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
116127 (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
117128 }
....@@ -122,40 +133,42 @@
122133 #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
123134 #define KVM_MIN_FREE_MMU_PAGES 5
124135 #define KVM_REFILL_PAGES 25
125
-#define KVM_MAX_CPUID_ENTRIES 80
136
+#define KVM_MAX_CPUID_ENTRIES 256
126137 #define KVM_NR_FIXED_MTRR_REGION 88
127138 #define KVM_NR_VAR_MTRR 8
128139
129140 #define ASYNC_PF_PER_VCPU 64
130141
131142 enum kvm_reg {
132
- VCPU_REGS_RAX = 0,
133
- VCPU_REGS_RCX = 1,
134
- VCPU_REGS_RDX = 2,
135
- VCPU_REGS_RBX = 3,
136
- VCPU_REGS_RSP = 4,
137
- VCPU_REGS_RBP = 5,
138
- VCPU_REGS_RSI = 6,
139
- VCPU_REGS_RDI = 7,
143
+ VCPU_REGS_RAX = __VCPU_REGS_RAX,
144
+ VCPU_REGS_RCX = __VCPU_REGS_RCX,
145
+ VCPU_REGS_RDX = __VCPU_REGS_RDX,
146
+ VCPU_REGS_RBX = __VCPU_REGS_RBX,
147
+ VCPU_REGS_RSP = __VCPU_REGS_RSP,
148
+ VCPU_REGS_RBP = __VCPU_REGS_RBP,
149
+ VCPU_REGS_RSI = __VCPU_REGS_RSI,
150
+ VCPU_REGS_RDI = __VCPU_REGS_RDI,
140151 #ifdef CONFIG_X86_64
141
- VCPU_REGS_R8 = 8,
142
- VCPU_REGS_R9 = 9,
143
- VCPU_REGS_R10 = 10,
144
- VCPU_REGS_R11 = 11,
145
- VCPU_REGS_R12 = 12,
146
- VCPU_REGS_R13 = 13,
147
- VCPU_REGS_R14 = 14,
148
- VCPU_REGS_R15 = 15,
152
+ VCPU_REGS_R8 = __VCPU_REGS_R8,
153
+ VCPU_REGS_R9 = __VCPU_REGS_R9,
154
+ VCPU_REGS_R10 = __VCPU_REGS_R10,
155
+ VCPU_REGS_R11 = __VCPU_REGS_R11,
156
+ VCPU_REGS_R12 = __VCPU_REGS_R12,
157
+ VCPU_REGS_R13 = __VCPU_REGS_R13,
158
+ VCPU_REGS_R14 = __VCPU_REGS_R14,
159
+ VCPU_REGS_R15 = __VCPU_REGS_R15,
149160 #endif
150161 VCPU_REGS_RIP,
151
- NR_VCPU_REGS
152
-};
162
+ NR_VCPU_REGS,
153163
154
-enum kvm_reg_ex {
155164 VCPU_EXREG_PDPTR = NR_VCPU_REGS,
165
+ VCPU_EXREG_CR0,
156166 VCPU_EXREG_CR3,
167
+ VCPU_EXREG_CR4,
157168 VCPU_EXREG_RFLAGS,
158169 VCPU_EXREG_SEGMENTS,
170
+ VCPU_EXREG_EXIT_INFO_1,
171
+ VCPU_EXREG_EXIT_INFO_2,
159172 };
160173
161174 enum {
....@@ -169,9 +182,17 @@
169182 VCPU_SREG_LDTR,
170183 };
171184
172
-#include <asm/kvm_emulate.h>
185
+enum exit_fastpath_completion {
186
+ EXIT_FASTPATH_NONE,
187
+ EXIT_FASTPATH_REENTER_GUEST,
188
+ EXIT_FASTPATH_EXIT_HANDLED,
189
+};
190
+typedef enum exit_fastpath_completion fastpath_t;
173191
174
-#define KVM_NR_MEM_OBJS 40
192
+struct x86_emulate_ctxt;
193
+struct x86_exception;
194
+enum x86_intercept;
195
+enum x86_intercept_stage;
175196
176197 #define KVM_NR_DB_REGS 4
177198
....@@ -211,13 +232,6 @@
211232 PFERR_WRITE_MASK | \
212233 PFERR_PRESENT_MASK)
213234
214
-/*
215
- * The mask used to denote special SPTEs, which can be either MMIO SPTEs or
216
- * Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting
217
- * with the SVE bit in EPT PTEs.
218
- */
219
-#define SPTE_SPECIAL_MASK (1ULL << 62)
220
-
221235 /* apic attention bits */
222236 #define KVM_APIC_CHECK_VAPIC 0
223237 /*
....@@ -231,27 +245,18 @@
231245 struct kvm_kernel_irq_routing_entry;
232246
233247 /*
234
- * We don't want allocation failures within the mmu code, so we preallocate
235
- * enough memory for a single page fault in a cache.
236
- */
237
-struct kvm_mmu_memory_cache {
238
- int nobjs;
239
- void *objects[KVM_NR_MEM_OBJS];
240
-};
241
-
242
-/*
243248 * the pages used as guest page table on soft mmu are tracked by
244249 * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
245250 * by indirect shadow page can not be more than 15 bits.
246251 *
247
- * Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access,
252
+ * Currently, we used 14 bits that are @level, @gpte_is_8_bytes, @quadrant, @access,
248253 * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
249254 */
250255 union kvm_mmu_page_role {
251
- unsigned word;
256
+ u32 word;
252257 struct {
253258 unsigned level:4;
254
- unsigned cr4_pae:1;
259
+ unsigned gpte_is_8_bytes:1;
255260 unsigned quadrant:2;
256261 unsigned direct:1;
257262 unsigned access:3;
....@@ -274,46 +279,38 @@
274279 };
275280 };
276281
277
-struct kvm_rmap_head {
278
- unsigned long val;
282
+union kvm_mmu_extended_role {
283
+/*
284
+ * This structure complements kvm_mmu_page_role caching everything needed for
285
+ * MMU configuration. If nothing in both these structures changed, MMU
286
+ * re-configuration can be skipped. @valid bit is set on first usage so we don't
287
+ * treat all-zero structure as valid data.
288
+ */
289
+ u32 word;
290
+ struct {
291
+ unsigned int valid:1;
292
+ unsigned int execonly:1;
293
+ unsigned int cr0_pg:1;
294
+ unsigned int cr4_pae:1;
295
+ unsigned int cr4_pse:1;
296
+ unsigned int cr4_pke:1;
297
+ unsigned int cr4_smap:1;
298
+ unsigned int cr4_smep:1;
299
+ unsigned int cr4_la57:1;
300
+ unsigned int maxphyaddr:6;
301
+ };
279302 };
280303
281
-struct kvm_mmu_page {
282
- struct list_head link;
283
- struct hlist_node hash_link;
284
- struct list_head lpage_disallowed_link;
304
+union kvm_mmu_role {
305
+ u64 as_u64;
306
+ struct {
307
+ union kvm_mmu_page_role base;
308
+ union kvm_mmu_extended_role ext;
309
+ };
310
+};
285311
286
- /*
287
- * The following two entries are used to key the shadow page in the
288
- * hash table.
289
- */
290
- gfn_t gfn;
291
- union kvm_mmu_page_role role;
292
-
293
- u64 *spt;
294
- /* hold the gfn of each spte inside spt */
295
- gfn_t *gfns;
296
- bool unsync;
297
- bool lpage_disallowed; /* Can't be replaced by an equiv large page */
298
- int root_count; /* Currently serving as active root */
299
- unsigned int unsync_children;
300
- struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
301
-
302
- /* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen. */
303
- unsigned long mmu_valid_gen;
304
-
305
- DECLARE_BITMAP(unsync_child_bitmap, 512);
306
-
307
-#ifdef CONFIG_X86_32
308
- /*
309
- * Used out of the mmu-lock to avoid reading spte values while an
310
- * update is in progress; see the comments in __get_spte_lockless().
311
- */
312
- int clear_spte_count;
313
-#endif
314
-
315
- /* Number of writes since the last time traversal visited this page. */
316
- atomic_t write_flooding_count;
312
+struct kvm_rmap_head {
313
+ unsigned long val;
317314 };
318315
319316 struct kvm_pio_request {
....@@ -332,14 +329,16 @@
332329 };
333330
334331 struct kvm_mmu_root_info {
335
- gpa_t cr3;
332
+ gpa_t pgd;
336333 hpa_t hpa;
337334 };
338335
339336 #define KVM_MMU_ROOT_INFO_INVALID \
340
- ((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
337
+ ((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE })
341338
342339 #define KVM_MMU_NUM_PREV_ROOTS 3
340
+
341
+struct kvm_mmu_page;
343342
344343 /*
345344 * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
....@@ -347,8 +346,7 @@
347346 * current mmu mode.
348347 */
349348 struct kvm_mmu {
350
- void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
351
- unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
349
+ unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu);
352350 u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
353351 int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err,
354352 bool prefault);
....@@ -361,10 +359,9 @@
361359 int (*sync_page)(struct kvm_vcpu *vcpu,
362360 struct kvm_mmu_page *sp);
363361 void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
364
- void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
365
- u64 *spte, const void *pte);
366362 hpa_t root_hpa;
367
- union kvm_mmu_page_role base_role;
363
+ gpa_t root_pgd;
364
+ union kvm_mmu_role mmu_role;
368365 u8 root_level;
369366 u8 shadow_root_level;
370367 u8 ept_ad;
....@@ -406,6 +403,11 @@
406403 u64 pdptrs[4]; /* pae */
407404 };
408405
406
+struct kvm_tlb_range {
407
+ u64 start_gfn;
408
+ u64 pages;
409
+};
410
+
409411 enum pmc_type {
410412 KVM_PMC_GP = 0,
411413 KVM_PMC_FIXED,
....@@ -418,6 +420,11 @@
418420 u64 eventsel;
419421 struct perf_event *perf_event;
420422 struct kvm_vcpu *vcpu;
423
+ /*
424
+ * eventsel value for general purpose counters,
425
+ * ctrl value for fixed counters.
426
+ */
427
+ u64 current_config;
421428 };
422429
423430 struct kvm_pmu {
....@@ -425,17 +432,34 @@
425432 unsigned nr_arch_fixed_counters;
426433 unsigned available_event_types;
427434 u64 fixed_ctr_ctrl;
435
+ u64 fixed_ctr_ctrl_mask;
428436 u64 global_ctrl;
429437 u64 global_status;
430438 u64 global_ovf_ctrl;
431439 u64 counter_bitmask[2];
432440 u64 global_ctrl_mask;
441
+ u64 global_ovf_ctrl_mask;
433442 u64 reserved_bits;
443
+ u64 raw_event_mask;
434444 u8 version;
435445 struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
436446 struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
437447 struct irq_work irq_work;
438
- u64 reprogram_pmi;
448
+ DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
449
+ DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
450
+ DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
451
+
452
+ /*
453
+ * The gate to release perf_events not marked in
454
+ * pmc_in_use only once in a vcpu time slice.
455
+ */
456
+ bool need_cleanup;
457
+
458
+ /*
459
+ * The total number of programmed perf_events and it helps to avoid
460
+ * redundant check before cleanup if guest don't use vPMU at all.
461
+ */
462
+ u8 event_count;
439463 };
440464
441465 struct kvm_pmu_ops;
....@@ -464,7 +488,7 @@
464488 struct kvm_vcpu_hv_stimer {
465489 struct hrtimer timer;
466490 int index;
467
- u64 config;
491
+ union hv_stimer_config config;
468492 u64 count;
469493 u64 exp_time;
470494 struct hv_message msg;
....@@ -494,7 +518,7 @@
494518 struct kvm_hyperv_exit exit;
495519 struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
496520 DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
497
- cpumask_t tlb_lush;
521
+ cpumask_t tlb_flush;
498522 };
499523
500524 struct kvm_vcpu_arch {
....@@ -512,7 +536,9 @@
512536 unsigned long cr3;
513537 unsigned long cr4;
514538 unsigned long cr4_guest_owned_bits;
539
+ unsigned long cr4_guest_rsvd_bits;
515540 unsigned long cr8;
541
+ u32 host_pkru;
516542 u32 pkru;
517543 u32 hflags;
518544 u64 efer;
....@@ -527,10 +553,13 @@
527553 u64 ia32_misc_enable_msr;
528554 u64 smbase;
529555 u64 smi_count;
556
+ bool at_instruction_boundary;
530557 bool tpr_access_reporting;
558
+ bool xsaves_enabled;
531559 u64 ia32_xss;
532560 u64 microcode_version;
533561 u64 arch_capabilities;
562
+ u64 perf_capabilities;
534563
535564 /*
536565 * Paging state of the vcpu
....@@ -539,13 +568,19 @@
539568 * the paging mode of the l1 guest. This context is always used to
540569 * handle faults.
541570 */
542
- struct kvm_mmu mmu;
571
+ struct kvm_mmu *mmu;
572
+
573
+ /* Non-nested MMU for L1 */
574
+ struct kvm_mmu root_mmu;
575
+
576
+ /* L1 MMU when running nested */
577
+ struct kvm_mmu guest_mmu;
543578
544579 /*
545580 * Paging state of an L2 guest (used for nested npt)
546581 *
547582 * This context will save all necessary information to walk page tables
548
- * of the an L2 guest. This context is only initialized for page table
583
+ * of an L2 guest. This context is only initialized for page table
549584 * walking and not for faulting since we never handle l2 page faults on
550585 * the host.
551586 */
....@@ -558,7 +593,8 @@
558593 struct kvm_mmu *walk_mmu;
559594
560595 struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
561
- struct kvm_mmu_memory_cache mmu_page_cache;
596
+ struct kvm_mmu_memory_cache mmu_shadow_page_cache;
597
+ struct kvm_mmu_memory_cache mmu_gfn_array_cache;
562598 struct kvm_mmu_memory_cache mmu_page_header_cache;
563599
564600 /*
....@@ -572,12 +608,11 @@
572608 * "guest_fpu" state here contains the guest FPU context, with the
573609 * host PRKU bits.
574610 */
575
- struct fpu user_fpu;
576
- struct fpu guest_fpu;
611
+ struct fpu *user_fpu;
612
+ struct fpu *guest_fpu;
577613
578614 u64 xcr0;
579615 u64 guest_supported_xcr0;
580
- u32 guest_xstate_size;
581616
582617 struct kvm_pio_request pio;
583618 void *pio_data;
....@@ -590,6 +625,8 @@
590625 bool has_error_code;
591626 u8 nr;
592627 u32 error_code;
628
+ unsigned long payload;
629
+ bool has_payload;
593630 u8 nested_apf;
594631 } exception;
595632
....@@ -602,13 +639,15 @@
602639 int halt_request; /* real mode on Intel only */
603640
604641 int cpuid_nent;
605
- struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
642
+ struct kvm_cpuid_entry2 *cpuid_entries;
606643
644
+ unsigned long cr3_lm_rsvd_bits;
607645 int maxphyaddr;
646
+ int max_tdp_level;
608647
609648 /* emulate context */
610649
611
- struct x86_emulate_ctxt emulate_ctxt;
650
+ struct x86_emulate_ctxt *emulate_ctxt;
612651 bool emulate_regs_need_sync_to_vcpu;
613652 bool emulate_regs_need_sync_from_vcpu;
614653 int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
....@@ -625,9 +664,10 @@
625664 u8 preempted;
626665 u64 msr_val;
627666 u64 last_steal;
628
- struct gfn_to_pfn_cache cache;
667
+ struct gfn_to_hva_cache cache;
629668 } st;
630669
670
+ u64 l1_tsc_offset;
631671 u64 tsc_offset;
632672 u64 last_guest_tsc;
633673 u64 last_host_tsc;
....@@ -641,6 +681,7 @@
641681 u32 virtual_tsc_mult;
642682 u32 virtual_tsc_khz;
643683 s64 ia32_tsc_adjust_msr;
684
+ u64 msr_ia32_power_ctl;
644685 u64 tsc_scaling_ratio;
645686
646687 atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
....@@ -668,7 +709,7 @@
668709
669710 /* Cache MMIO info */
670711 u64 mmio_gva;
671
- unsigned access;
712
+ unsigned mmio_access;
672713 gfn_t mmio_gfn;
673714 u64 mmio_gen;
674715
....@@ -686,14 +727,17 @@
686727
687728 struct {
688729 bool halted;
689
- gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
730
+ gfn_t gfns[ASYNC_PF_PER_VCPU];
690731 struct gfn_to_hva_cache data;
691
- u64 msr_val;
732
+ u64 msr_en_val; /* MSR_KVM_ASYNC_PF_EN */
733
+ u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */
734
+ u16 vec;
692735 u32 id;
693736 bool send_user_only;
694
- u32 host_apf_reason;
737
+ u32 host_apf_flags;
695738 unsigned long nested_apf_token;
696739 bool delivery_as_pf_vmexit;
740
+ bool pageready_pending;
697741 } apf;
698742
699743 /* OSVW MSRs (AMD only) */
....@@ -707,10 +751,22 @@
707751 struct gfn_to_hva_cache data;
708752 } pv_eoi;
709753
754
+ u64 msr_kvm_poll_control;
755
+
710756 /*
711
- * Indicate whether the access faults on its page table in guest
712
- * which is set when fix page fault and used to detect unhandeable
713
- * instruction.
757
+ * Indicates the guest is trying to write a gfn that contains one or
758
+ * more of the PTEs used to translate the write itself, i.e. the access
759
+ * is changing its own translation in the guest page tables. KVM exits
760
+ * to userspace if emulation of the faulting instruction fails and this
761
+ * flag is set, as KVM cannot make forward progress.
762
+ *
763
+ * If emulation fails for a write to guest page tables, KVM unprotects
764
+ * (zaps) the shadow page for the target gfn and resumes the guest to
765
+ * retry the non-emulatable instruction (on hardware). Unprotecting the
766
+ * gfn doesn't allow forward progress for a self-changing access because
767
+ * doing so also zaps the translation for the gfn, i.e. retrying the
768
+ * instruction will hit a !PRESENT fault, which results in a new shadow
769
+ * page and sends KVM back to square one.
714770 */
715771 bool write_fault_to_shadow_pgtable;
716772
....@@ -725,15 +781,32 @@
725781 int pending_ioapic_eoi;
726782 int pending_external_vector;
727783
728
- /* GPA available */
729
- bool gpa_available;
730
- gpa_t gpa_val;
731
-
732784 /* be preempted when it's in kernel-mode(cpl=0) */
733785 bool preempted_in_kernel;
734786
735787 /* Flush the L1 Data cache for L1TF mitigation on VMENTER */
736788 bool l1tf_flush_l1d;
789
+
790
+ /* Host CPU on which VM-entry was most recently attempted */
791
+ unsigned int last_vmentry_cpu;
792
+
793
+ /* AMD MSRC001_0015 Hardware Configuration */
794
+ u64 msr_hwcr;
795
+
796
+ /* pv related cpuid info */
797
+ struct {
798
+ /*
799
+ * value of the eax register in the KVM_CPUID_FEATURES CPUID
800
+ * leaf.
801
+ */
802
+ u32 features;
803
+
804
+ /*
805
+ * indicates whether pv emulation should be disabled if features
806
+ * are not present in the guest's cpuid
807
+ */
808
+ bool enforce;
809
+ } pv_cpuid;
737810 };
738811
739812 struct kvm_lpage_info {
....@@ -768,6 +841,18 @@
768841 struct kvm_lapic *phys_map[];
769842 };
770843
844
+/* Hyper-V synthetic debugger (SynDbg)*/
845
+struct kvm_hv_syndbg {
846
+ struct {
847
+ u64 control;
848
+ u64 status;
849
+ u64 send_page;
850
+ u64 recv_page;
851
+ u64 pending_page;
852
+ } control;
853
+ u64 options;
854
+};
855
+
771856 /* Hyper-V emulation context */
772857 struct kvm_hv {
773858 struct mutex hv_lock;
....@@ -779,7 +864,7 @@
779864 u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
780865 u64 hv_crash_ctl;
781866
782
- HV_REFERENCE_TSC_PAGE tsc_ref;
867
+ struct ms_hyperv_tsc_page tsc_ref;
783868
784869 struct idr conn_to_evt;
785870
....@@ -789,6 +874,16 @@
789874
790875 /* How many vCPUs have VP index != vCPU index */
791876 atomic_t num_mismatched_vp_indexes;
877
+
878
+ struct hv_partition_assist_pg *hv_pa_pg;
879
+ struct kvm_hv_syndbg hv_syndbg;
880
+};
881
+
882
+struct msr_bitmap_range {
883
+ u32 flags;
884
+ u32 nmsrs;
885
+ u32 base;
886
+ unsigned long *bitmap;
792887 };
793888
794889 enum kvm_irqchip_mode {
....@@ -797,12 +892,25 @@
797892 KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
798893 };
799894
895
+struct kvm_x86_msr_filter {
896
+ u8 count;
897
+ bool default_allow:1;
898
+ struct msr_bitmap_range ranges[16];
899
+};
900
+
901
+#define APICV_INHIBIT_REASON_DISABLE 0
902
+#define APICV_INHIBIT_REASON_HYPERV 1
903
+#define APICV_INHIBIT_REASON_NESTED 2
904
+#define APICV_INHIBIT_REASON_IRQWIN 3
905
+#define APICV_INHIBIT_REASON_PIT_REINJ 4
906
+#define APICV_INHIBIT_REASON_X2APIC 5
907
+
800908 struct kvm_arch {
801909 unsigned long n_used_mmu_pages;
802910 unsigned long n_requested_mmu_pages;
803911 unsigned long n_max_mmu_pages;
804912 unsigned int indirect_shadow_pages;
805
- unsigned long mmu_valid_gen;
913
+ u8 mmu_valid_gen;
806914 struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
807915 /*
808916 * Hash table of struct kvm_mmu_page.
....@@ -826,14 +934,17 @@
826934 atomic_t vapics_in_nmi_mode;
827935 struct mutex apic_map_lock;
828936 struct kvm_apic_map *apic_map;
937
+ atomic_t apic_map_dirty;
829938
830939 bool apic_access_page_done;
940
+ unsigned long apicv_inhibit_reasons;
831941
832942 gpa_t wall_clock;
833943
834944 bool mwait_in_guest;
835945 bool hlt_in_guest;
836946 bool pause_in_guest;
947
+ bool cstate_in_guest;
837948
838949 unsigned long irq_sources_bitmap;
839950 s64 kvmclock_offset;
....@@ -880,14 +991,36 @@
880991 bool x2apic_broadcast_quirk_disabled;
881992
882993 bool guest_can_read_msr_platform_info;
994
+ bool exception_payload_enabled;
883995
996
+ bool bus_lock_detection_enabled;
997
+
998
+ /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
999
+ u32 user_space_msr_mask;
1000
+
1001
+ struct kvm_x86_msr_filter __rcu *msr_filter;
1002
+
1003
+ struct kvm_pmu_event_filter *pmu_event_filter;
8841004 struct task_struct *nx_lpage_recovery_thread;
1005
+
1006
+ /*
1007
+ * Whether the TDP MMU is enabled for this VM. This contains a
1008
+ * snapshot of the TDP MMU module parameter from when the VM was
1009
+ * created and remains unchanged for the life of the VM. If this is
1010
+ * true, TDP MMU handler functions will run for various MMU
1011
+ * operations.
1012
+ */
1013
+ bool tdp_mmu_enabled;
1014
+
1015
+ /* List of struct tdp_mmu_pages being used as roots */
1016
+ struct list_head tdp_mmu_roots;
1017
+ /* List of struct tdp_mmu_pages not being used as roots */
1018
+ struct list_head tdp_mmu_pages;
8851019 };
8861020
8871021 struct kvm_vm_stat {
8881022 ulong mmu_shadow_zapped;
8891023 ulong mmu_pte_write;
890
- ulong mmu_pte_updated;
8911024 ulong mmu_pde_zapped;
8921025 ulong mmu_flooded;
8931026 ulong mmu_recycled;
....@@ -927,6 +1060,10 @@
9271060 u64 irq_injections;
9281061 u64 nmi_injections;
9291062 u64 req_event;
1063
+ u64 halt_poll_success_ns;
1064
+ u64 halt_poll_fail_ns;
1065
+ u64 preemption_reported;
1066
+ u64 preemption_other;
9301067 };
9311068
9321069 struct x86_instruction_info;
....@@ -948,25 +1085,25 @@
9481085 bool msi_redir_hint;
9491086 };
9501087
1088
+static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
1089
+{
1090
+ return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
1091
+}
1092
+
9511093 struct kvm_x86_ops {
952
- int (*cpu_has_kvm_support)(void); /* __init */
953
- int (*disabled_by_bios)(void); /* __init */
9541094 int (*hardware_enable)(void);
9551095 void (*hardware_disable)(void);
956
- void (*check_processor_compatibility)(void *rtn);
957
- int (*hardware_setup)(void); /* __init */
958
- void (*hardware_unsetup)(void); /* __exit */
1096
+ void (*hardware_unsetup)(void);
9591097 bool (*cpu_has_accelerated_tpr)(void);
960
- bool (*has_emulated_msr)(int index);
961
- void (*cpuid_update)(struct kvm_vcpu *vcpu);
1098
+ bool (*has_emulated_msr)(u32 index);
1099
+ void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
9621100
963
- struct kvm *(*vm_alloc)(void);
964
- void (*vm_free)(struct kvm *);
1101
+ unsigned int vm_size;
9651102 int (*vm_init)(struct kvm *kvm);
9661103 void (*vm_destroy)(struct kvm *kvm);
9671104
9681105 /* Create, but do not attach this VCPU */
969
- struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
1106
+ int (*vcpu_create)(struct kvm_vcpu *vcpu);
9701107 void (*vcpu_free)(struct kvm_vcpu *vcpu);
9711108 void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
9721109
....@@ -974,7 +1111,7 @@
9741111 void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
9751112 void (*vcpu_put)(struct kvm_vcpu *vcpu);
9761113
977
- void (*update_bp_intercept)(struct kvm_vcpu *vcpu);
1114
+ void (*update_exception_bitmap)(struct kvm_vcpu *vcpu);
9781115 int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
9791116 int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
9801117 u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
....@@ -984,27 +1121,25 @@
9841121 void (*set_segment)(struct kvm_vcpu *vcpu,
9851122 struct kvm_segment *var, int seg);
9861123 void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
987
- void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
988
- void (*decache_cr3)(struct kvm_vcpu *vcpu);
989
- void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
9901124 void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
991
- void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
992
- int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
993
- void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
1125
+ bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
1126
+ void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
1127
+ int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
9941128 void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
9951129 void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
9961130 void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
9971131 void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
998
- u64 (*get_dr6)(struct kvm_vcpu *vcpu);
999
- void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
10001132 void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
10011133 void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
10021134 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
10031135 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
10041136 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
10051137
1006
- void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
1138
+ void (*tlb_flush_all)(struct kvm_vcpu *vcpu);
1139
+ void (*tlb_flush_current)(struct kvm_vcpu *vcpu);
10071140 int (*tlb_remote_flush)(struct kvm *kvm);
1141
+ int (*tlb_remote_flush_with_range)(struct kvm *kvm,
1142
+ struct kvm_tlb_range *range);
10081143
10091144 /*
10101145 * Flush any TLB entries associated with the given GVA.
....@@ -1014,9 +1149,17 @@
10141149 */
10151150 void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
10161151
1017
- void (*run)(struct kvm_vcpu *vcpu);
1018
- int (*handle_exit)(struct kvm_vcpu *vcpu);
1019
- void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
1152
+ /*
1153
+ * Flush any TLB entries created by the guest. Like tlb_flush_gva(),
1154
+ * does not need to flush GPA->HPA mappings.
1155
+ */
1156
+ void (*tlb_flush_guest)(struct kvm_vcpu *vcpu);
1157
+
1158
+ enum exit_fastpath_completion (*run)(struct kvm_vcpu *vcpu);
1159
+ int (*handle_exit)(struct kvm_vcpu *vcpu,
1160
+ enum exit_fastpath_completion exit_fastpath);
1161
+ int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
1162
+ void (*update_emulated_instruction)(struct kvm_vcpu *vcpu);
10201163 void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
10211164 u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
10221165 void (*patch_hypercall)(struct kvm_vcpu *vcpu,
....@@ -1025,52 +1168,49 @@
10251168 void (*set_nmi)(struct kvm_vcpu *vcpu);
10261169 void (*queue_exception)(struct kvm_vcpu *vcpu);
10271170 void (*cancel_injection)(struct kvm_vcpu *vcpu);
1028
- int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
1029
- int (*nmi_allowed)(struct kvm_vcpu *vcpu);
1171
+ int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
1172
+ int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
10301173 bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
10311174 void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
10321175 void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
10331176 void (*enable_irq_window)(struct kvm_vcpu *vcpu);
10341177 void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
1035
- bool (*get_enable_apicv)(struct kvm_vcpu *vcpu);
1178
+ bool (*check_apicv_inhibit_reasons)(ulong bit);
1179
+ void (*pre_update_apicv_exec_ctrl)(struct kvm *kvm, bool activate);
10361180 void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
10371181 void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
10381182 void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
10391183 bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu);
10401184 void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
10411185 void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
1042
- void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
1186
+ void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
10431187 int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
10441188 int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
10451189 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
10461190 int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
1047
- int (*get_tdp_level)(struct kvm_vcpu *vcpu);
10481191 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
1049
- int (*get_lpage_level)(void);
1050
- bool (*rdtscp_supported)(void);
1051
- bool (*invpcid_supported)(void);
10521192
1053
- void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
1054
-
1055
- void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry);
1193
+ void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, unsigned long pgd,
1194
+ int pgd_level);
10561195
10571196 bool (*has_wbinvd_exit)(void);
10581197
1059
- u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu);
10601198 /* Returns actual tsc_offset set in active VMCS */
10611199 u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
10621200
1063
- void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
1201
+ /*
1202
+ * Retrieve somewhat arbitrary exit information. Intended to be used
1203
+ * only from within tracepoints to avoid VMREADs when tracing is off.
1204
+ */
1205
+ void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
1206
+ u32 *exit_int_info, u32 *exit_int_info_err_code);
10641207
10651208 int (*check_intercept)(struct kvm_vcpu *vcpu,
10661209 struct x86_instruction_info *info,
1067
- enum x86_intercept_stage stage);
1068
- void (*handle_external_intr)(struct kvm_vcpu *vcpu);
1069
- bool (*mpx_supported)(void);
1070
- bool (*xsaves_supported)(void);
1071
- bool (*umip_emulated)(void);
1210
+ enum x86_intercept_stage stage,
1211
+ struct x86_exception *exception);
1212
+ void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
10721213
1073
- int (*check_nested_events)(struct kvm_vcpu *vcpu);
10741214 void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
10751215
10761216 void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
....@@ -1099,10 +1239,10 @@
10991239 void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
11001240 struct kvm_memory_slot *slot,
11011241 gfn_t offset, unsigned long mask);
1102
- int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
11031242
11041243 /* pmu operations of sub-arch */
11051244 const struct kvm_pmu_ops *pmu_ops;
1245
+ const struct kvm_x86_nested_ops *nested_ops;
11061246
11071247 /*
11081248 * Architecture specific hooks for vCPU blocking due to
....@@ -1124,29 +1264,59 @@
11241264 void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
11251265 bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
11261266
1127
- int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc);
1267
+ int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
1268
+ bool *expired);
11281269 void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
11291270
11301271 void (*setup_mce)(struct kvm_vcpu *vcpu);
11311272
1132
- int (*get_nested_state)(struct kvm_vcpu *vcpu,
1133
- struct kvm_nested_state __user *user_kvm_nested_state,
1134
- unsigned user_data_size);
1135
- int (*set_nested_state)(struct kvm_vcpu *vcpu,
1136
- struct kvm_nested_state __user *user_kvm_nested_state,
1137
- struct kvm_nested_state *kvm_state);
1138
- void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
1139
-
1140
- int (*smi_allowed)(struct kvm_vcpu *vcpu);
1273
+ int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
11411274 int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
1142
- int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
1143
- int (*enable_smi_window)(struct kvm_vcpu *vcpu);
1275
+ int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
1276
+ void (*enable_smi_window)(struct kvm_vcpu *vcpu);
11441277
11451278 int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
11461279 int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
11471280 int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
1281
+ void (*guest_memory_reclaimed)(struct kvm *kvm);
11481282
11491283 int (*get_msr_feature)(struct kvm_msr_entry *entry);
1284
+
1285
+ bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, void *insn, int insn_len);
1286
+
1287
+ bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
1288
+ int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
1289
+
1290
+ void (*migrate_timers)(struct kvm_vcpu *vcpu);
1291
+ void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
1292
+};
1293
+
1294
+struct kvm_x86_nested_ops {
1295
+ void (*leave_nested)(struct kvm_vcpu *vcpu);
1296
+ int (*check_events)(struct kvm_vcpu *vcpu);
1297
+ bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
1298
+ int (*get_state)(struct kvm_vcpu *vcpu,
1299
+ struct kvm_nested_state __user *user_kvm_nested_state,
1300
+ unsigned user_data_size);
1301
+ int (*set_state)(struct kvm_vcpu *vcpu,
1302
+ struct kvm_nested_state __user *user_kvm_nested_state,
1303
+ struct kvm_nested_state *kvm_state);
1304
+ bool (*get_nested_state_pages)(struct kvm_vcpu *vcpu);
1305
+ int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
1306
+
1307
+ int (*enable_evmcs)(struct kvm_vcpu *vcpu,
1308
+ uint16_t *vmcs_version);
1309
+ uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu);
1310
+};
1311
+
1312
+struct kvm_x86_init_ops {
1313
+ int (*cpu_has_kvm_support)(void);
1314
+ int (*disabled_by_bios)(void);
1315
+ int (*check_processor_compatibility)(void);
1316
+ int (*hardware_setup)(void);
1317
+ bool (*intel_pt_intr_in_guest)(void);
1318
+
1319
+ struct kvm_x86_ops *runtime_ops;
11501320 };
11511321
11521322 struct kvm_arch_async_pf {
....@@ -1156,35 +1326,33 @@
11561326 bool direct_map;
11571327 };
11581328
1159
-extern struct kvm_x86_ops *kvm_x86_ops;
1329
+extern u64 __read_mostly host_efer;
1330
+extern bool __read_mostly allow_smaller_maxphyaddr;
1331
+extern struct kvm_x86_ops kvm_x86_ops;
11601332
11611333 #define __KVM_HAVE_ARCH_VM_ALLOC
11621334 static inline struct kvm *kvm_arch_alloc_vm(void)
11631335 {
1164
- return kvm_x86_ops->vm_alloc();
1336
+ return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
11651337 }
1166
-
1167
-static inline void kvm_arch_free_vm(struct kvm *kvm)
1168
-{
1169
- return kvm_x86_ops->vm_free(kvm);
1170
-}
1338
+void kvm_arch_free_vm(struct kvm *kvm);
11711339
11721340 #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
11731341 static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
11741342 {
1175
- if (kvm_x86_ops->tlb_remote_flush &&
1176
- !kvm_x86_ops->tlb_remote_flush(kvm))
1343
+ if (kvm_x86_ops.tlb_remote_flush &&
1344
+ !kvm_x86_ops.tlb_remote_flush(kvm))
11771345 return 0;
11781346 else
11791347 return -ENOTSUPP;
11801348 }
11811349
1182
-int kvm_mmu_module_init(void);
1183
-void kvm_mmu_module_exit(void);
1350
+void __init kvm_mmu_x86_module_init(void);
1351
+int kvm_mmu_vendor_module_init(void);
1352
+void kvm_mmu_vendor_module_exit(void);
11841353
11851354 void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
11861355 int kvm_mmu_create(struct kvm_vcpu *vcpu);
1187
-void kvm_mmu_setup(struct kvm_vcpu *vcpu);
11881356 void kvm_mmu_init_vm(struct kvm *kvm);
11891357 void kvm_mmu_uninit_vm(struct kvm *kvm);
11901358 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
....@@ -1193,7 +1361,8 @@
11931361
11941362 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
11951363 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
1196
- struct kvm_memory_slot *memslot);
1364
+ struct kvm_memory_slot *memslot,
1365
+ int start_level);
11971366 void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
11981367 const struct kvm_memory_slot *memslot);
11991368 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
....@@ -1207,7 +1376,7 @@
12071376 gfn_t gfn_offset, unsigned long mask);
12081377 void kvm_mmu_zap_all(struct kvm *kvm);
12091378 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
1210
-unsigned long kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
1379
+unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
12111380 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
12121381
12131382 int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
....@@ -1246,28 +1415,59 @@
12461415
12471416 extern u64 kvm_mce_cap_supported;
12481417
1249
-enum emulation_result {
1250
- EMULATE_DONE, /* no further processing */
1251
- EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */
1252
- EMULATE_FAIL, /* can't emulate this instruction */
1253
-};
1254
-
1418
+/*
1419
+ * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
1420
+ * userspace I/O) to indicate that the emulation context
1421
+ * should be resued as is, i.e. skip initialization of
1422
+ * emulation context, instruction fetch and decode.
1423
+ *
1424
+ * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware.
1425
+ * Indicates that only select instructions (tagged with
1426
+ * EmulateOnUD) should be emulated (to minimize the emulator
1427
+ * attack surface). See also EMULTYPE_TRAP_UD_FORCED.
1428
+ *
1429
+ * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
1430
+ * decode the instruction length. For use *only* by
1431
+ * kvm_x86_ops.skip_emulated_instruction() implementations.
1432
+ *
1433
+ * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to
1434
+ * retry native execution under certain conditions,
1435
+ * Can only be set in conjunction with EMULTYPE_PF.
1436
+ *
1437
+ * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was
1438
+ * triggered by KVM's magic "force emulation" prefix,
1439
+ * which is opt in via module param (off by default).
1440
+ * Bypasses EmulateOnUD restriction despite emulating
1441
+ * due to an intercepted #UD (see EMULTYPE_TRAP_UD).
1442
+ * Used to test the full emulator from userspace.
1443
+ *
1444
+ * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware
1445
+ * backdoor emulation, which is opt in via module param.
1446
+ * VMware backoor emulation handles select instructions
1447
+ * and reinjects the #GP for all other cases.
1448
+ *
1449
+ * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which
1450
+ * case the CR2/GPA value pass on the stack is valid.
1451
+ */
12551452 #define EMULTYPE_NO_DECODE (1 << 0)
12561453 #define EMULTYPE_TRAP_UD (1 << 1)
12571454 #define EMULTYPE_SKIP (1 << 2)
1258
-#define EMULTYPE_ALLOW_RETRY (1 << 3)
1259
-#define EMULTYPE_NO_UD_ON_FAIL (1 << 4)
1260
-#define EMULTYPE_VMWARE (1 << 5)
1455
+#define EMULTYPE_ALLOW_RETRY_PF (1 << 3)
1456
+#define EMULTYPE_TRAP_UD_FORCED (1 << 4)
1457
+#define EMULTYPE_VMWARE_GP (1 << 5)
1458
+#define EMULTYPE_PF (1 << 6)
1459
+
12611460 int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
12621461 int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
12631462 void *insn, int insn_len);
12641463
12651464 void kvm_enable_efer_bits(u64);
12661465 bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
1267
-int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
1268
-int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
1269
-
1270
-struct x86_emulate_ctxt;
1466
+int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated);
1467
+int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
1468
+int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
1469
+int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
1470
+int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
12711471
12721472 int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
12731473 int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
....@@ -1302,9 +1502,12 @@
13021502
13031503 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
13041504 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
1505
+void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload);
13051506 void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
13061507 void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
13071508 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
1509
+bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
1510
+ struct x86_exception *fault);
13081511 int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
13091512 gfn_t gfn, void *data, int offset, int len,
13101513 u32 access);
....@@ -1332,13 +1535,16 @@
13321535
13331536 void kvm_inject_nmi(struct kvm_vcpu *vcpu);
13341537
1538
+void kvm_update_dr7(struct kvm_vcpu *vcpu);
1539
+
13351540 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
13361541 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
13371542 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
13381543 int kvm_mmu_load(struct kvm_vcpu *vcpu);
13391544 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
13401545 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
1341
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free);
1546
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
1547
+ ulong roots_to_free);
13421548 gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
13431549 struct x86_exception *exception);
13441550 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
....@@ -1350,31 +1556,25 @@
13501556 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
13511557 struct x86_exception *exception);
13521558
1353
-void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
1559
+bool kvm_apicv_activated(struct kvm *kvm);
1560
+void kvm_apicv_init(struct kvm *kvm, bool enable);
1561
+void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
1562
+void kvm_request_apicv_update(struct kvm *kvm, bool activate,
1563
+ unsigned long bit);
13541564
13551565 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
13561566
13571567 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
13581568 void *insn, int insn_len);
13591569 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
1570
+void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
1571
+ gva_t gva, hpa_t root_hpa);
13601572 void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
1361
-void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush);
1573
+void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush,
1574
+ bool skip_mmu_sync);
13621575
1363
-void kvm_enable_tdp(void);
1364
-void kvm_disable_tdp(void);
1365
-
1366
-static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
1367
- struct x86_exception *exception)
1368
-{
1369
- return gpa;
1370
-}
1371
-
1372
-static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
1373
-{
1374
- struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
1375
-
1376
- return (struct kvm_mmu_page *)page_private(page);
1377
-}
1576
+void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
1577
+ int tdp_huge_page_level);
13781578
13791579 static inline u16 kvm_read_ldt(void)
13801580 {
....@@ -1423,8 +1623,6 @@
14231623 };
14241624
14251625 #define HF_GIF_MASK (1 << 0)
1426
-#define HF_HIF_MASK (1 << 1)
1427
-#define HF_VINTR_MASK (1 << 2)
14281626 #define HF_NMI_MASK (1 << 3)
14291627 #define HF_IRET_MASK (1 << 4)
14301628 #define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */
....@@ -1437,7 +1635,7 @@
14371635 #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
14381636 #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
14391637
1440
-asmlinkage void __noreturn kvm_spurious_fault(void);
1638
+asmlinkage void kvm_spurious_fault(void);
14411639
14421640 /*
14431641 * Hardware virtualization extension instructions may fault if a
....@@ -1445,31 +1643,29 @@
14451643 * Usually after catching the fault we just panic; during reboot
14461644 * instead the instruction is ignored.
14471645 */
1448
-#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
1646
+#define __kvm_handle_fault_on_reboot(insn) \
14491647 "666: \n\t" \
14501648 insn "\n\t" \
14511649 "jmp 668f \n\t" \
14521650 "667: \n\t" \
1453
- "call kvm_spurious_fault \n\t" \
1454
- "668: \n\t" \
1455
- ".pushsection .fixup, \"ax\" \n\t" \
1456
- "700: \n\t" \
1457
- cleanup_insn "\n\t" \
1458
- "cmpb $0, kvm_rebooting\n\t" \
1459
- "je 667b \n\t" \
1460
- "jmp 668b \n\t" \
1651
+ "1: \n\t" \
1652
+ ".pushsection .discard.instr_begin \n\t" \
1653
+ ".long 1b - . \n\t" \
14611654 ".popsection \n\t" \
1462
- _ASM_EXTABLE(666b, 700b)
1463
-
1464
-#define __kvm_handle_fault_on_reboot(insn) \
1465
- ____kvm_handle_fault_on_reboot(insn, "")
1655
+ "call kvm_spurious_fault \n\t" \
1656
+ "1: \n\t" \
1657
+ ".pushsection .discard.instr_end \n\t" \
1658
+ ".long 1b - . \n\t" \
1659
+ ".popsection \n\t" \
1660
+ "668: \n\t" \
1661
+ _ASM_EXTABLE(666b, 667b)
14661662
14671663 #define KVM_ARCH_WANT_MMU_NOTIFIER
14681664 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
1469
- bool blockable);
1665
+ unsigned flags);
14701666 int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
14711667 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
1472
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
1668
+int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
14731669 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
14741670 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
14751671 int kvm_cpu_has_extint(struct kvm_vcpu *v);
....@@ -1482,9 +1678,9 @@
14821678 unsigned long ipi_bitmap_high, u32 min,
14831679 unsigned long icr, int op_64_bit);
14841680
1485
-u64 kvm_get_arch_capabilities(void);
1486
-void kvm_define_shared_msr(unsigned index, u32 msr);
1487
-int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
1681
+void kvm_define_user_return_msr(unsigned index, u32 msr);
1682
+int kvm_probe_user_return_msr(u32 msr);
1683
+int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
14881684
14891685 u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
14901686 u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
....@@ -1494,14 +1690,17 @@
14941690
14951691 void kvm_make_mclock_inprogress_request(struct kvm *kvm);
14961692 void kvm_make_scan_ioapic_request(struct kvm *kvm);
1693
+void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
1694
+ unsigned long *vcpu_bitmap);
14971695
1498
-void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1696
+bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
14991697 struct kvm_async_pf *work);
15001698 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
15011699 struct kvm_async_pf *work);
15021700 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
15031701 struct kvm_async_pf *work);
1504
-bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
1702
+void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu);
1703
+bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu);
15051704 extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
15061705
15071706 int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
....@@ -1511,7 +1710,6 @@
15111710 int kvm_is_in_guest(void);
15121711
15131712 int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
1514
-int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
15151713 bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
15161714 bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
15171715
....@@ -1521,16 +1719,23 @@
15211719 void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
15221720 struct kvm_lapic_irq *irq);
15231721
1722
+static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
1723
+{
1724
+ /* We can only post Fixed and LowPrio IRQs */
1725
+ return (irq->delivery_mode == APIC_DM_FIXED ||
1726
+ irq->delivery_mode == APIC_DM_LOWEST);
1727
+}
1728
+
15241729 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
15251730 {
1526
- if (kvm_x86_ops->vcpu_blocking)
1527
- kvm_x86_ops->vcpu_blocking(vcpu);
1731
+ if (kvm_x86_ops.vcpu_blocking)
1732
+ kvm_x86_ops.vcpu_blocking(vcpu);
15281733 }
15291734
15301735 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
15311736 {
1532
- if (kvm_x86_ops->vcpu_unblocking)
1533
- kvm_x86_ops->vcpu_unblocking(vcpu);
1737
+ if (kvm_x86_ops.vcpu_unblocking)
1738
+ kvm_x86_ops.vcpu_unblocking(vcpu);
15341739 }
15351740
15361741 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
....@@ -1548,4 +1753,7 @@
15481753 #define put_smstate(type, buf, offset, val) \
15491754 *(type *)((buf) + (offset) - 0x7e00) = val
15501755
1756
+#define GET_SMSTATE(type, buf, offset) \
1757
+ (*(type *)((buf) + (offset) - 0x7e00))
1758
+
15511759 #endif /* _ASM_X86_KVM_HOST_H */