forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/arch/x86/include/asm/kvm_host.h
....@@ -1,11 +1,8 @@
1
+/* SPDX-License-Identifier: GPL-2.0-only */
12 /*
23 * Kernel-based Virtual Machine driver for Linux
34 *
45 * This header defines architecture specific interfaces, x86 version
5
- *
6
- * This work is licensed under the terms of the GNU GPL, version 2. See
7
- * the COPYING file in the top-level directory.
8
- *
96 */
107
118 #ifndef _ASM_X86_KVM_HOST_H
....@@ -35,7 +32,10 @@
3532 #include <asm/msr-index.h>
3633 #include <asm/asm.h>
3734 #include <asm/kvm_page_track.h>
35
+#include <asm/kvm_vcpu_regs.h>
3836 #include <asm/hyperv-tlfs.h>
37
+
38
+#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
3939
4040 #define KVM_MAX_VCPUS 288
4141 #define KVM_SOFT_MAX_VCPUS 240
....@@ -49,13 +49,16 @@
4949
5050 #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
5151
52
+#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
53
+ KVM_DIRTY_LOG_INITIALLY_SET)
54
+
5255 /* x86-specific vcpu->requests bit members */
5356 #define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0)
5457 #define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1)
5558 #define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2)
5659 #define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3)
5760 #define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4)
58
-#define KVM_REQ_LOAD_CR3 KVM_ARCH_REQ(5)
61
+#define KVM_REQ_LOAD_MMU_PGD KVM_ARCH_REQ(5)
5962 #define KVM_REQ_EVENT KVM_ARCH_REQ(6)
6063 #define KVM_REQ_APF_HALT KVM_ARCH_REQ(7)
6164 #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8)
....@@ -77,7 +80,14 @@
7780 #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
7881 #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
7982 #define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23)
80
-#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24)
83
+#define KVM_REQ_GET_NESTED_STATE_PAGES KVM_ARCH_REQ(24)
84
+#define KVM_REQ_APICV_UPDATE \
85
+ KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
86
+#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26)
87
+#define KVM_REQ_TLB_FLUSH_GUEST \
88
+ KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
89
+#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
90
+#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
8191
8292 #define CR0_RESERVED_BITS \
8393 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
....@@ -102,7 +112,8 @@
102112 #define UNMAPPED_GVA (~(gpa_t)0)
103113
104114 /* KVM Hugepage definitions for x86 */
105
-#define KVM_NR_PAGE_SIZES 3
115
+#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G
116
+#define KVM_NR_PAGE_SIZES (KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1)
106117 #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9)
107118 #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
108119 #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
....@@ -111,7 +122,7 @@
111122
112123 static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
113124 {
114
- /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
125
+ /* KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K) must be 0. */
115126 return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
116127 (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
117128 }
....@@ -122,40 +133,42 @@
122133 #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
123134 #define KVM_MIN_FREE_MMU_PAGES 5
124135 #define KVM_REFILL_PAGES 25
125
-#define KVM_MAX_CPUID_ENTRIES 80
136
+#define KVM_MAX_CPUID_ENTRIES 256
126137 #define KVM_NR_FIXED_MTRR_REGION 88
127138 #define KVM_NR_VAR_MTRR 8
128139
129140 #define ASYNC_PF_PER_VCPU 64
130141
131142 enum kvm_reg {
132
- VCPU_REGS_RAX = 0,
133
- VCPU_REGS_RCX = 1,
134
- VCPU_REGS_RDX = 2,
135
- VCPU_REGS_RBX = 3,
136
- VCPU_REGS_RSP = 4,
137
- VCPU_REGS_RBP = 5,
138
- VCPU_REGS_RSI = 6,
139
- VCPU_REGS_RDI = 7,
143
+ VCPU_REGS_RAX = __VCPU_REGS_RAX,
144
+ VCPU_REGS_RCX = __VCPU_REGS_RCX,
145
+ VCPU_REGS_RDX = __VCPU_REGS_RDX,
146
+ VCPU_REGS_RBX = __VCPU_REGS_RBX,
147
+ VCPU_REGS_RSP = __VCPU_REGS_RSP,
148
+ VCPU_REGS_RBP = __VCPU_REGS_RBP,
149
+ VCPU_REGS_RSI = __VCPU_REGS_RSI,
150
+ VCPU_REGS_RDI = __VCPU_REGS_RDI,
140151 #ifdef CONFIG_X86_64
141
- VCPU_REGS_R8 = 8,
142
- VCPU_REGS_R9 = 9,
143
- VCPU_REGS_R10 = 10,
144
- VCPU_REGS_R11 = 11,
145
- VCPU_REGS_R12 = 12,
146
- VCPU_REGS_R13 = 13,
147
- VCPU_REGS_R14 = 14,
148
- VCPU_REGS_R15 = 15,
152
+ VCPU_REGS_R8 = __VCPU_REGS_R8,
153
+ VCPU_REGS_R9 = __VCPU_REGS_R9,
154
+ VCPU_REGS_R10 = __VCPU_REGS_R10,
155
+ VCPU_REGS_R11 = __VCPU_REGS_R11,
156
+ VCPU_REGS_R12 = __VCPU_REGS_R12,
157
+ VCPU_REGS_R13 = __VCPU_REGS_R13,
158
+ VCPU_REGS_R14 = __VCPU_REGS_R14,
159
+ VCPU_REGS_R15 = __VCPU_REGS_R15,
149160 #endif
150161 VCPU_REGS_RIP,
151
- NR_VCPU_REGS
152
-};
162
+ NR_VCPU_REGS,
153163
154
-enum kvm_reg_ex {
155164 VCPU_EXREG_PDPTR = NR_VCPU_REGS,
165
+ VCPU_EXREG_CR0,
156166 VCPU_EXREG_CR3,
167
+ VCPU_EXREG_CR4,
157168 VCPU_EXREG_RFLAGS,
158169 VCPU_EXREG_SEGMENTS,
170
+ VCPU_EXREG_EXIT_INFO_1,
171
+ VCPU_EXREG_EXIT_INFO_2,
159172 };
160173
161174 enum {
....@@ -169,9 +182,17 @@
169182 VCPU_SREG_LDTR,
170183 };
171184
172
-#include <asm/kvm_emulate.h>
185
+enum exit_fastpath_completion {
186
+ EXIT_FASTPATH_NONE,
187
+ EXIT_FASTPATH_REENTER_GUEST,
188
+ EXIT_FASTPATH_EXIT_HANDLED,
189
+};
190
+typedef enum exit_fastpath_completion fastpath_t;
173191
174
-#define KVM_NR_MEM_OBJS 40
192
+struct x86_emulate_ctxt;
193
+struct x86_exception;
194
+enum x86_intercept;
195
+enum x86_intercept_stage;
175196
176197 #define KVM_NR_DB_REGS 4
177198
....@@ -211,13 +232,6 @@
211232 PFERR_WRITE_MASK | \
212233 PFERR_PRESENT_MASK)
213234
214
-/*
215
- * The mask used to denote special SPTEs, which can be either MMIO SPTEs or
216
- * Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting
217
- * with the SVE bit in EPT PTEs.
218
- */
219
-#define SPTE_SPECIAL_MASK (1ULL << 62)
220
-
221235 /* apic attention bits */
222236 #define KVM_APIC_CHECK_VAPIC 0
223237 /*
....@@ -231,27 +245,18 @@
231245 struct kvm_kernel_irq_routing_entry;
232246
233247 /*
234
- * We don't want allocation failures within the mmu code, so we preallocate
235
- * enough memory for a single page fault in a cache.
236
- */
237
-struct kvm_mmu_memory_cache {
238
- int nobjs;
239
- void *objects[KVM_NR_MEM_OBJS];
240
-};
241
-
242
-/*
243248 * the pages used as guest page table on soft mmu are tracked by
244249 * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
245250 * by indirect shadow page can not be more than 15 bits.
246251 *
247
- * Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access,
252
+ * Currently, we used 14 bits that are @level, @gpte_is_8_bytes, @quadrant, @access,
248253 * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
249254 */
250255 union kvm_mmu_page_role {
251
- unsigned word;
256
+ u32 word;
252257 struct {
253258 unsigned level:4;
254
- unsigned cr4_pae:1;
259
+ unsigned gpte_is_8_bytes:1;
255260 unsigned quadrant:2;
256261 unsigned direct:1;
257262 unsigned access:3;
....@@ -274,46 +279,38 @@
274279 };
275280 };
276281
277
-struct kvm_rmap_head {
278
- unsigned long val;
282
+union kvm_mmu_extended_role {
283
+/*
284
+ * This structure complements kvm_mmu_page_role caching everything needed for
285
+ * MMU configuration. If nothing in both these structures changed, MMU
286
+ * re-configuration can be skipped. @valid bit is set on first usage so we don't
287
+ * treat all-zero structure as valid data.
288
+ */
289
+ u32 word;
290
+ struct {
291
+ unsigned int valid:1;
292
+ unsigned int execonly:1;
293
+ unsigned int cr0_pg:1;
294
+ unsigned int cr4_pae:1;
295
+ unsigned int cr4_pse:1;
296
+ unsigned int cr4_pke:1;
297
+ unsigned int cr4_smap:1;
298
+ unsigned int cr4_smep:1;
299
+ unsigned int cr4_la57:1;
300
+ unsigned int maxphyaddr:6;
301
+ };
279302 };
280303
281
-struct kvm_mmu_page {
282
- struct list_head link;
283
- struct hlist_node hash_link;
284
- struct list_head lpage_disallowed_link;
304
+union kvm_mmu_role {
305
+ u64 as_u64;
306
+ struct {
307
+ union kvm_mmu_page_role base;
308
+ union kvm_mmu_extended_role ext;
309
+ };
310
+};
285311
286
- /*
287
- * The following two entries are used to key the shadow page in the
288
- * hash table.
289
- */
290
- gfn_t gfn;
291
- union kvm_mmu_page_role role;
292
-
293
- u64 *spt;
294
- /* hold the gfn of each spte inside spt */
295
- gfn_t *gfns;
296
- bool unsync;
297
- bool lpage_disallowed; /* Can't be replaced by an equiv large page */
298
- int root_count; /* Currently serving as active root */
299
- unsigned int unsync_children;
300
- struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
301
-
302
- /* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen. */
303
- unsigned long mmu_valid_gen;
304
-
305
- DECLARE_BITMAP(unsync_child_bitmap, 512);
306
-
307
-#ifdef CONFIG_X86_32
308
- /*
309
- * Used out of the mmu-lock to avoid reading spte values while an
310
- * update is in progress; see the comments in __get_spte_lockless().
311
- */
312
- int clear_spte_count;
313
-#endif
314
-
315
- /* Number of writes since the last time traversal visited this page. */
316
- atomic_t write_flooding_count;
312
+struct kvm_rmap_head {
313
+ unsigned long val;
317314 };
318315
319316 struct kvm_pio_request {
....@@ -332,14 +329,16 @@
332329 };
333330
334331 struct kvm_mmu_root_info {
335
- gpa_t cr3;
332
+ gpa_t pgd;
336333 hpa_t hpa;
337334 };
338335
339336 #define KVM_MMU_ROOT_INFO_INVALID \
340
- ((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
337
+ ((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE })
341338
342339 #define KVM_MMU_NUM_PREV_ROOTS 3
340
+
341
+struct kvm_mmu_page;
343342
344343 /*
345344 * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
....@@ -347,8 +346,7 @@
347346 * current mmu mode.
348347 */
349348 struct kvm_mmu {
350
- void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
351
- unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
349
+ unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu);
352350 u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
353351 int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err,
354352 bool prefault);
....@@ -361,10 +359,9 @@
361359 int (*sync_page)(struct kvm_vcpu *vcpu,
362360 struct kvm_mmu_page *sp);
363361 void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
364
- void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
365
- u64 *spte, const void *pte);
366362 hpa_t root_hpa;
367
- union kvm_mmu_page_role base_role;
363
+ gpa_t root_pgd;
364
+ union kvm_mmu_role mmu_role;
368365 u8 root_level;
369366 u8 shadow_root_level;
370367 u8 ept_ad;
....@@ -406,6 +403,11 @@
406403 u64 pdptrs[4]; /* pae */
407404 };
408405
406
+struct kvm_tlb_range {
407
+ u64 start_gfn;
408
+ u64 pages;
409
+};
410
+
409411 enum pmc_type {
410412 KVM_PMC_GP = 0,
411413 KVM_PMC_FIXED,
....@@ -418,6 +420,11 @@
418420 u64 eventsel;
419421 struct perf_event *perf_event;
420422 struct kvm_vcpu *vcpu;
423
+ /*
424
+ * eventsel value for general purpose counters,
425
+ * ctrl value for fixed counters.
426
+ */
427
+ u64 current_config;
421428 };
422429
423430 struct kvm_pmu {
....@@ -425,17 +432,34 @@
425432 unsigned nr_arch_fixed_counters;
426433 unsigned available_event_types;
427434 u64 fixed_ctr_ctrl;
435
+ u64 fixed_ctr_ctrl_mask;
428436 u64 global_ctrl;
429437 u64 global_status;
430438 u64 global_ovf_ctrl;
431439 u64 counter_bitmask[2];
432440 u64 global_ctrl_mask;
441
+ u64 global_ovf_ctrl_mask;
433442 u64 reserved_bits;
443
+ u64 raw_event_mask;
434444 u8 version;
435445 struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
436446 struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
437447 struct irq_work irq_work;
438
- u64 reprogram_pmi;
448
+ DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
449
+ DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
450
+ DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
451
+
452
+ /*
453
+ * The gate to release perf_events not marked in
454
+ * pmc_in_use only once in a vcpu time slice.
455
+ */
456
+ bool need_cleanup;
457
+
458
+ /*
459
+ * The total number of programmed perf_events and it helps to avoid
460
+ * redundant check before cleanup if guest don't use vPMU at all.
461
+ */
462
+ u8 event_count;
439463 };
440464
441465 struct kvm_pmu_ops;
....@@ -464,7 +488,7 @@
464488 struct kvm_vcpu_hv_stimer {
465489 struct hrtimer timer;
466490 int index;
467
- u64 config;
491
+ union hv_stimer_config config;
468492 u64 count;
469493 u64 exp_time;
470494 struct hv_message msg;
....@@ -494,7 +518,7 @@
494518 struct kvm_hyperv_exit exit;
495519 struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
496520 DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
497
- cpumask_t tlb_lush;
521
+ cpumask_t tlb_flush;
498522 };
499523
500524 struct kvm_vcpu_arch {
....@@ -512,7 +536,9 @@
512536 unsigned long cr3;
513537 unsigned long cr4;
514538 unsigned long cr4_guest_owned_bits;
539
+ unsigned long cr4_guest_rsvd_bits;
515540 unsigned long cr8;
541
+ u32 host_pkru;
516542 u32 pkru;
517543 u32 hflags;
518544 u64 efer;
....@@ -528,9 +554,11 @@
528554 u64 smbase;
529555 u64 smi_count;
530556 bool tpr_access_reporting;
557
+ bool xsaves_enabled;
531558 u64 ia32_xss;
532559 u64 microcode_version;
533560 u64 arch_capabilities;
561
+ u64 perf_capabilities;
534562
535563 /*
536564 * Paging state of the vcpu
....@@ -539,13 +567,19 @@
539567 * the paging mode of the l1 guest. This context is always used to
540568 * handle faults.
541569 */
542
- struct kvm_mmu mmu;
570
+ struct kvm_mmu *mmu;
571
+
572
+ /* Non-nested MMU for L1 */
573
+ struct kvm_mmu root_mmu;
574
+
575
+ /* L1 MMU when running nested */
576
+ struct kvm_mmu guest_mmu;
543577
544578 /*
545579 * Paging state of an L2 guest (used for nested npt)
546580 *
547581 * This context will save all necessary information to walk page tables
548
- * of the an L2 guest. This context is only initialized for page table
582
+ * of an L2 guest. This context is only initialized for page table
549583 * walking and not for faulting since we never handle l2 page faults on
550584 * the host.
551585 */
....@@ -558,7 +592,8 @@
558592 struct kvm_mmu *walk_mmu;
559593
560594 struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
561
- struct kvm_mmu_memory_cache mmu_page_cache;
595
+ struct kvm_mmu_memory_cache mmu_shadow_page_cache;
596
+ struct kvm_mmu_memory_cache mmu_gfn_array_cache;
562597 struct kvm_mmu_memory_cache mmu_page_header_cache;
563598
564599 /*
....@@ -572,12 +607,11 @@
572607 * "guest_fpu" state here contains the guest FPU context, with the
573608 * host PRKU bits.
574609 */
575
- struct fpu user_fpu;
576
- struct fpu guest_fpu;
610
+ struct fpu *user_fpu;
611
+ struct fpu *guest_fpu;
577612
578613 u64 xcr0;
579614 u64 guest_supported_xcr0;
580
- u32 guest_xstate_size;
581615
582616 struct kvm_pio_request pio;
583617 void *pio_data;
....@@ -590,6 +624,8 @@
590624 bool has_error_code;
591625 u8 nr;
592626 u32 error_code;
627
+ unsigned long payload;
628
+ bool has_payload;
593629 u8 nested_apf;
594630 } exception;
595631
....@@ -602,13 +638,15 @@
602638 int halt_request; /* real mode on Intel only */
603639
604640 int cpuid_nent;
605
- struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
641
+ struct kvm_cpuid_entry2 *cpuid_entries;
606642
643
+ unsigned long cr3_lm_rsvd_bits;
607644 int maxphyaddr;
645
+ int max_tdp_level;
608646
609647 /* emulate context */
610648
611
- struct x86_emulate_ctxt emulate_ctxt;
649
+ struct x86_emulate_ctxt *emulate_ctxt;
612650 bool emulate_regs_need_sync_to_vcpu;
613651 bool emulate_regs_need_sync_from_vcpu;
614652 int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
....@@ -628,6 +666,7 @@
628666 struct gfn_to_pfn_cache cache;
629667 } st;
630668
669
+ u64 l1_tsc_offset;
631670 u64 tsc_offset;
632671 u64 last_guest_tsc;
633672 u64 last_host_tsc;
....@@ -641,6 +680,7 @@
641680 u32 virtual_tsc_mult;
642681 u32 virtual_tsc_khz;
643682 s64 ia32_tsc_adjust_msr;
683
+ u64 msr_ia32_power_ctl;
644684 u64 tsc_scaling_ratio;
645685
646686 atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
....@@ -668,7 +708,7 @@
668708
669709 /* Cache MMIO info */
670710 u64 mmio_gva;
671
- unsigned access;
711
+ unsigned mmio_access;
672712 gfn_t mmio_gfn;
673713 u64 mmio_gen;
674714
....@@ -686,14 +726,17 @@
686726
687727 struct {
688728 bool halted;
689
- gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
729
+ gfn_t gfns[ASYNC_PF_PER_VCPU];
690730 struct gfn_to_hva_cache data;
691
- u64 msr_val;
731
+ u64 msr_en_val; /* MSR_KVM_ASYNC_PF_EN */
732
+ u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */
733
+ u16 vec;
692734 u32 id;
693735 bool send_user_only;
694
- u32 host_apf_reason;
736
+ u32 host_apf_flags;
695737 unsigned long nested_apf_token;
696738 bool delivery_as_pf_vmexit;
739
+ bool pageready_pending;
697740 } apf;
698741
699742 /* OSVW MSRs (AMD only) */
....@@ -707,10 +750,22 @@
707750 struct gfn_to_hva_cache data;
708751 } pv_eoi;
709752
753
+ u64 msr_kvm_poll_control;
754
+
710755 /*
711
- * Indicate whether the access faults on its page table in guest
712
- * which is set when fix page fault and used to detect unhandeable
713
- * instruction.
756
+ * Indicates the guest is trying to write a gfn that contains one or
757
+ * more of the PTEs used to translate the write itself, i.e. the access
758
+ * is changing its own translation in the guest page tables. KVM exits
759
+ * to userspace if emulation of the faulting instruction fails and this
760
+ * flag is set, as KVM cannot make forward progress.
761
+ *
762
+ * If emulation fails for a write to guest page tables, KVM unprotects
763
+ * (zaps) the shadow page for the target gfn and resumes the guest to
764
+ * retry the non-emulatable instruction (on hardware). Unprotecting the
765
+ * gfn doesn't allow forward progress for a self-changing access because
766
+ * doing so also zaps the translation for the gfn, i.e. retrying the
767
+ * instruction will hit a !PRESENT fault, which results in a new shadow
768
+ * page and sends KVM back to square one.
714769 */
715770 bool write_fault_to_shadow_pgtable;
716771
....@@ -725,15 +780,32 @@
725780 int pending_ioapic_eoi;
726781 int pending_external_vector;
727782
728
- /* GPA available */
729
- bool gpa_available;
730
- gpa_t gpa_val;
731
-
732783 /* be preempted when it's in kernel-mode(cpl=0) */
733784 bool preempted_in_kernel;
734785
735786 /* Flush the L1 Data cache for L1TF mitigation on VMENTER */
736787 bool l1tf_flush_l1d;
788
+
789
+ /* Host CPU on which VM-entry was most recently attempted */
790
+ unsigned int last_vmentry_cpu;
791
+
792
+ /* AMD MSRC001_0015 Hardware Configuration */
793
+ u64 msr_hwcr;
794
+
795
+ /* pv related cpuid info */
796
+ struct {
797
+ /*
798
+ * value of the eax register in the KVM_CPUID_FEATURES CPUID
799
+ * leaf.
800
+ */
801
+ u32 features;
802
+
803
+ /*
804
+ * indicates whether pv emulation should be disabled if features
805
+ * are not present in the guest's cpuid
806
+ */
807
+ bool enforce;
808
+ } pv_cpuid;
737809 };
738810
739811 struct kvm_lpage_info {
....@@ -768,6 +840,18 @@
768840 struct kvm_lapic *phys_map[];
769841 };
770842
843
+/* Hyper-V synthetic debugger (SynDbg)*/
844
+struct kvm_hv_syndbg {
845
+ struct {
846
+ u64 control;
847
+ u64 status;
848
+ u64 send_page;
849
+ u64 recv_page;
850
+ u64 pending_page;
851
+ } control;
852
+ u64 options;
853
+};
854
+
771855 /* Hyper-V emulation context */
772856 struct kvm_hv {
773857 struct mutex hv_lock;
....@@ -779,7 +863,7 @@
779863 u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
780864 u64 hv_crash_ctl;
781865
782
- HV_REFERENCE_TSC_PAGE tsc_ref;
866
+ struct ms_hyperv_tsc_page tsc_ref;
783867
784868 struct idr conn_to_evt;
785869
....@@ -789,6 +873,16 @@
789873
790874 /* How many vCPUs have VP index != vCPU index */
791875 atomic_t num_mismatched_vp_indexes;
876
+
877
+ struct hv_partition_assist_pg *hv_pa_pg;
878
+ struct kvm_hv_syndbg hv_syndbg;
879
+};
880
+
881
+struct msr_bitmap_range {
882
+ u32 flags;
883
+ u32 nmsrs;
884
+ u32 base;
885
+ unsigned long *bitmap;
792886 };
793887
794888 enum kvm_irqchip_mode {
....@@ -797,12 +891,25 @@
797891 KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
798892 };
799893
894
+struct kvm_x86_msr_filter {
895
+ u8 count;
896
+ bool default_allow:1;
897
+ struct msr_bitmap_range ranges[16];
898
+};
899
+
900
+#define APICV_INHIBIT_REASON_DISABLE 0
901
+#define APICV_INHIBIT_REASON_HYPERV 1
902
+#define APICV_INHIBIT_REASON_NESTED 2
903
+#define APICV_INHIBIT_REASON_IRQWIN 3
904
+#define APICV_INHIBIT_REASON_PIT_REINJ 4
905
+#define APICV_INHIBIT_REASON_X2APIC 5
906
+
800907 struct kvm_arch {
801908 unsigned long n_used_mmu_pages;
802909 unsigned long n_requested_mmu_pages;
803910 unsigned long n_max_mmu_pages;
804911 unsigned int indirect_shadow_pages;
805
- unsigned long mmu_valid_gen;
912
+ u8 mmu_valid_gen;
806913 struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
807914 /*
808915 * Hash table of struct kvm_mmu_page.
....@@ -826,14 +933,17 @@
826933 atomic_t vapics_in_nmi_mode;
827934 struct mutex apic_map_lock;
828935 struct kvm_apic_map *apic_map;
936
+ atomic_t apic_map_dirty;
829937
830938 bool apic_access_page_done;
939
+ unsigned long apicv_inhibit_reasons;
831940
832941 gpa_t wall_clock;
833942
834943 bool mwait_in_guest;
835944 bool hlt_in_guest;
836945 bool pause_in_guest;
946
+ bool cstate_in_guest;
837947
838948 unsigned long irq_sources_bitmap;
839949 s64 kvmclock_offset;
....@@ -880,14 +990,36 @@
880990 bool x2apic_broadcast_quirk_disabled;
881991
882992 bool guest_can_read_msr_platform_info;
993
+ bool exception_payload_enabled;
883994
995
+ bool bus_lock_detection_enabled;
996
+
997
+ /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
998
+ u32 user_space_msr_mask;
999
+
1000
+ struct kvm_x86_msr_filter __rcu *msr_filter;
1001
+
1002
+ struct kvm_pmu_event_filter *pmu_event_filter;
8841003 struct task_struct *nx_lpage_recovery_thread;
1004
+
1005
+ /*
1006
+ * Whether the TDP MMU is enabled for this VM. This contains a
1007
+ * snapshot of the TDP MMU module parameter from when the VM was
1008
+ * created and remains unchanged for the life of the VM. If this is
1009
+ * true, TDP MMU handler functions will run for various MMU
1010
+ * operations.
1011
+ */
1012
+ bool tdp_mmu_enabled;
1013
+
1014
+ /* List of struct tdp_mmu_pages being used as roots */
1015
+ struct list_head tdp_mmu_roots;
1016
+ /* List of struct tdp_mmu_pages not being used as roots */
1017
+ struct list_head tdp_mmu_pages;
8851018 };
8861019
8871020 struct kvm_vm_stat {
8881021 ulong mmu_shadow_zapped;
8891022 ulong mmu_pte_write;
890
- ulong mmu_pte_updated;
8911023 ulong mmu_pde_zapped;
8921024 ulong mmu_flooded;
8931025 ulong mmu_recycled;
....@@ -927,6 +1059,8 @@
9271059 u64 irq_injections;
9281060 u64 nmi_injections;
9291061 u64 req_event;
1062
+ u64 halt_poll_success_ns;
1063
+ u64 halt_poll_fail_ns;
9301064 };
9311065
9321066 struct x86_instruction_info;
....@@ -948,25 +1082,25 @@
9481082 bool msi_redir_hint;
9491083 };
9501084
1085
+static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
1086
+{
1087
+ return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
1088
+}
1089
+
9511090 struct kvm_x86_ops {
952
- int (*cpu_has_kvm_support)(void); /* __init */
953
- int (*disabled_by_bios)(void); /* __init */
9541091 int (*hardware_enable)(void);
9551092 void (*hardware_disable)(void);
956
- void (*check_processor_compatibility)(void *rtn);
957
- int (*hardware_setup)(void); /* __init */
958
- void (*hardware_unsetup)(void); /* __exit */
1093
+ void (*hardware_unsetup)(void);
9591094 bool (*cpu_has_accelerated_tpr)(void);
960
- bool (*has_emulated_msr)(int index);
961
- void (*cpuid_update)(struct kvm_vcpu *vcpu);
1095
+ bool (*has_emulated_msr)(u32 index);
1096
+ void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
9621097
963
- struct kvm *(*vm_alloc)(void);
964
- void (*vm_free)(struct kvm *);
1098
+ unsigned int vm_size;
9651099 int (*vm_init)(struct kvm *kvm);
9661100 void (*vm_destroy)(struct kvm *kvm);
9671101
9681102 /* Create, but do not attach this VCPU */
969
- struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
1103
+ int (*vcpu_create)(struct kvm_vcpu *vcpu);
9701104 void (*vcpu_free)(struct kvm_vcpu *vcpu);
9711105 void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
9721106
....@@ -974,7 +1108,7 @@
9741108 void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
9751109 void (*vcpu_put)(struct kvm_vcpu *vcpu);
9761110
977
- void (*update_bp_intercept)(struct kvm_vcpu *vcpu);
1111
+ void (*update_exception_bitmap)(struct kvm_vcpu *vcpu);
9781112 int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
9791113 int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
9801114 u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
....@@ -984,27 +1118,25 @@
9841118 void (*set_segment)(struct kvm_vcpu *vcpu,
9851119 struct kvm_segment *var, int seg);
9861120 void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
987
- void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
988
- void (*decache_cr3)(struct kvm_vcpu *vcpu);
989
- void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
9901121 void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
991
- void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
992
- int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
993
- void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
1122
+ bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
1123
+ void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
1124
+ int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
9941125 void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
9951126 void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
9961127 void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
9971128 void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
998
- u64 (*get_dr6)(struct kvm_vcpu *vcpu);
999
- void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
10001129 void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
10011130 void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
10021131 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
10031132 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
10041133 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
10051134
1006
- void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
1135
+ void (*tlb_flush_all)(struct kvm_vcpu *vcpu);
1136
+ void (*tlb_flush_current)(struct kvm_vcpu *vcpu);
10071137 int (*tlb_remote_flush)(struct kvm *kvm);
1138
+ int (*tlb_remote_flush_with_range)(struct kvm *kvm,
1139
+ struct kvm_tlb_range *range);
10081140
10091141 /*
10101142 * Flush any TLB entries associated with the given GVA.
....@@ -1014,9 +1146,17 @@
10141146 */
10151147 void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
10161148
1017
- void (*run)(struct kvm_vcpu *vcpu);
1018
- int (*handle_exit)(struct kvm_vcpu *vcpu);
1019
- void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
1149
+ /*
1150
+ * Flush any TLB entries created by the guest. Like tlb_flush_gva(),
1151
+ * does not need to flush GPA->HPA mappings.
1152
+ */
1153
+ void (*tlb_flush_guest)(struct kvm_vcpu *vcpu);
1154
+
1155
+ enum exit_fastpath_completion (*run)(struct kvm_vcpu *vcpu);
1156
+ int (*handle_exit)(struct kvm_vcpu *vcpu,
1157
+ enum exit_fastpath_completion exit_fastpath);
1158
+ int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
1159
+ void (*update_emulated_instruction)(struct kvm_vcpu *vcpu);
10201160 void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
10211161 u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
10221162 void (*patch_hypercall)(struct kvm_vcpu *vcpu,
....@@ -1025,52 +1165,49 @@
10251165 void (*set_nmi)(struct kvm_vcpu *vcpu);
10261166 void (*queue_exception)(struct kvm_vcpu *vcpu);
10271167 void (*cancel_injection)(struct kvm_vcpu *vcpu);
1028
- int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
1029
- int (*nmi_allowed)(struct kvm_vcpu *vcpu);
1168
+ int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
1169
+ int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
10301170 bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
10311171 void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
10321172 void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
10331173 void (*enable_irq_window)(struct kvm_vcpu *vcpu);
10341174 void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
1035
- bool (*get_enable_apicv)(struct kvm_vcpu *vcpu);
1175
+ bool (*check_apicv_inhibit_reasons)(ulong bit);
1176
+ void (*pre_update_apicv_exec_ctrl)(struct kvm *kvm, bool activate);
10361177 void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
10371178 void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
10381179 void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
10391180 bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu);
10401181 void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
10411182 void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
1042
- void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
1183
+ void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
10431184 int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
10441185 int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
10451186 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
10461187 int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
1047
- int (*get_tdp_level)(struct kvm_vcpu *vcpu);
10481188 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
1049
- int (*get_lpage_level)(void);
1050
- bool (*rdtscp_supported)(void);
1051
- bool (*invpcid_supported)(void);
10521189
1053
- void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
1054
-
1055
- void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry);
1190
+ void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, unsigned long pgd,
1191
+ int pgd_level);
10561192
10571193 bool (*has_wbinvd_exit)(void);
10581194
1059
- u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu);
10601195 /* Returns actual tsc_offset set in active VMCS */
10611196 u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
10621197
1063
- void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
1198
+ /*
1199
+ * Retrieve somewhat arbitrary exit information. Intended to be used
1200
+ * only from within tracepoints to avoid VMREADs when tracing is off.
1201
+ */
1202
+ void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
1203
+ u32 *exit_int_info, u32 *exit_int_info_err_code);
10641204
10651205 int (*check_intercept)(struct kvm_vcpu *vcpu,
10661206 struct x86_instruction_info *info,
1067
- enum x86_intercept_stage stage);
1068
- void (*handle_external_intr)(struct kvm_vcpu *vcpu);
1069
- bool (*mpx_supported)(void);
1070
- bool (*xsaves_supported)(void);
1071
- bool (*umip_emulated)(void);
1207
+ enum x86_intercept_stage stage,
1208
+ struct x86_exception *exception);
1209
+ void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
10721210
1073
- int (*check_nested_events)(struct kvm_vcpu *vcpu);
10741211 void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
10751212
10761213 void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
....@@ -1099,10 +1236,10 @@
10991236 void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
11001237 struct kvm_memory_slot *slot,
11011238 gfn_t offset, unsigned long mask);
1102
- int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
11031239
11041240 /* pmu operations of sub-arch */
11051241 const struct kvm_pmu_ops *pmu_ops;
1242
+ const struct kvm_x86_nested_ops *nested_ops;
11061243
11071244 /*
11081245 * Architecture specific hooks for vCPU blocking due to
....@@ -1124,29 +1261,59 @@
11241261 void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
11251262 bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
11261263
1127
- int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc);
1264
+ int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
1265
+ bool *expired);
11281266 void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
11291267
11301268 void (*setup_mce)(struct kvm_vcpu *vcpu);
11311269
1132
- int (*get_nested_state)(struct kvm_vcpu *vcpu,
1133
- struct kvm_nested_state __user *user_kvm_nested_state,
1134
- unsigned user_data_size);
1135
- int (*set_nested_state)(struct kvm_vcpu *vcpu,
1136
- struct kvm_nested_state __user *user_kvm_nested_state,
1137
- struct kvm_nested_state *kvm_state);
1138
- void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
1139
-
1140
- int (*smi_allowed)(struct kvm_vcpu *vcpu);
1270
+ int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
11411271 int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
1142
- int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
1143
- int (*enable_smi_window)(struct kvm_vcpu *vcpu);
1272
+ int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
1273
+ void (*enable_smi_window)(struct kvm_vcpu *vcpu);
11441274
11451275 int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
11461276 int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
11471277 int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
1278
+ void (*guest_memory_reclaimed)(struct kvm *kvm);
11481279
11491280 int (*get_msr_feature)(struct kvm_msr_entry *entry);
1281
+
1282
+ bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, void *insn, int insn_len);
1283
+
1284
+ bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
1285
+ int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
1286
+
1287
+ void (*migrate_timers)(struct kvm_vcpu *vcpu);
1288
+ void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
1289
+};
1290
+
1291
+struct kvm_x86_nested_ops {
1292
+ void (*leave_nested)(struct kvm_vcpu *vcpu);
1293
+ int (*check_events)(struct kvm_vcpu *vcpu);
1294
+ bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
1295
+ int (*get_state)(struct kvm_vcpu *vcpu,
1296
+ struct kvm_nested_state __user *user_kvm_nested_state,
1297
+ unsigned user_data_size);
1298
+ int (*set_state)(struct kvm_vcpu *vcpu,
1299
+ struct kvm_nested_state __user *user_kvm_nested_state,
1300
+ struct kvm_nested_state *kvm_state);
1301
+ bool (*get_nested_state_pages)(struct kvm_vcpu *vcpu);
1302
+ int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
1303
+
1304
+ int (*enable_evmcs)(struct kvm_vcpu *vcpu,
1305
+ uint16_t *vmcs_version);
1306
+ uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu);
1307
+};
1308
+
1309
+struct kvm_x86_init_ops {
1310
+ int (*cpu_has_kvm_support)(void);
1311
+ int (*disabled_by_bios)(void);
1312
+ int (*check_processor_compatibility)(void);
1313
+ int (*hardware_setup)(void);
1314
+ bool (*intel_pt_intr_in_guest)(void);
1315
+
1316
+ struct kvm_x86_ops *runtime_ops;
11501317 };
11511318
11521319 struct kvm_arch_async_pf {
....@@ -1156,35 +1323,33 @@
11561323 bool direct_map;
11571324 };
11581325
1159
-extern struct kvm_x86_ops *kvm_x86_ops;
1326
+extern u64 __read_mostly host_efer;
1327
+extern bool __read_mostly allow_smaller_maxphyaddr;
1328
+extern struct kvm_x86_ops kvm_x86_ops;
11601329
11611330 #define __KVM_HAVE_ARCH_VM_ALLOC
11621331 static inline struct kvm *kvm_arch_alloc_vm(void)
11631332 {
1164
- return kvm_x86_ops->vm_alloc();
1333
+ return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
11651334 }
1166
-
1167
-static inline void kvm_arch_free_vm(struct kvm *kvm)
1168
-{
1169
- return kvm_x86_ops->vm_free(kvm);
1170
-}
1335
+void kvm_arch_free_vm(struct kvm *kvm);
11711336
11721337 #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
11731338 static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
11741339 {
1175
- if (kvm_x86_ops->tlb_remote_flush &&
1176
- !kvm_x86_ops->tlb_remote_flush(kvm))
1340
+ if (kvm_x86_ops.tlb_remote_flush &&
1341
+ !kvm_x86_ops.tlb_remote_flush(kvm))
11771342 return 0;
11781343 else
11791344 return -ENOTSUPP;
11801345 }
11811346
1182
-int kvm_mmu_module_init(void);
1183
-void kvm_mmu_module_exit(void);
1347
+void __init kvm_mmu_x86_module_init(void);
1348
+int kvm_mmu_vendor_module_init(void);
1349
+void kvm_mmu_vendor_module_exit(void);
11841350
11851351 void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
11861352 int kvm_mmu_create(struct kvm_vcpu *vcpu);
1187
-void kvm_mmu_setup(struct kvm_vcpu *vcpu);
11881353 void kvm_mmu_init_vm(struct kvm *kvm);
11891354 void kvm_mmu_uninit_vm(struct kvm *kvm);
11901355 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
....@@ -1193,7 +1358,8 @@
11931358
11941359 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
11951360 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
1196
- struct kvm_memory_slot *memslot);
1361
+ struct kvm_memory_slot *memslot,
1362
+ int start_level);
11971363 void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
11981364 const struct kvm_memory_slot *memslot);
11991365 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
....@@ -1207,7 +1373,7 @@
12071373 gfn_t gfn_offset, unsigned long mask);
12081374 void kvm_mmu_zap_all(struct kvm *kvm);
12091375 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
1210
-unsigned long kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
1376
+unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
12111377 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
12121378
12131379 int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
....@@ -1246,28 +1412,59 @@
12461412
12471413 extern u64 kvm_mce_cap_supported;
12481414
1249
-enum emulation_result {
1250
- EMULATE_DONE, /* no further processing */
1251
- EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */
1252
- EMULATE_FAIL, /* can't emulate this instruction */
1253
-};
1254
-
1415
+/*
1416
+ * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
1417
+ * userspace I/O) to indicate that the emulation context
1418
+ * should be resued as is, i.e. skip initialization of
1419
+ * emulation context, instruction fetch and decode.
1420
+ *
1421
+ * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware.
1422
+ * Indicates that only select instructions (tagged with
1423
+ * EmulateOnUD) should be emulated (to minimize the emulator
1424
+ * attack surface). See also EMULTYPE_TRAP_UD_FORCED.
1425
+ *
1426
+ * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
1427
+ * decode the instruction length. For use *only* by
1428
+ * kvm_x86_ops.skip_emulated_instruction() implementations.
1429
+ *
1430
+ * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to
1431
+ * retry native execution under certain conditions,
1432
+ * Can only be set in conjunction with EMULTYPE_PF.
1433
+ *
1434
+ * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was
1435
+ * triggered by KVM's magic "force emulation" prefix,
1436
+ * which is opt in via module param (off by default).
1437
+ * Bypasses EmulateOnUD restriction despite emulating
1438
+ * due to an intercepted #UD (see EMULTYPE_TRAP_UD).
1439
+ * Used to test the full emulator from userspace.
1440
+ *
1441
+ * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware
1442
+ * backdoor emulation, which is opt in via module param.
1443
+ * VMware backoor emulation handles select instructions
1444
+ * and reinjects the #GP for all other cases.
1445
+ *
1446
+ * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which
1447
+ * case the CR2/GPA value pass on the stack is valid.
1448
+ */
12551449 #define EMULTYPE_NO_DECODE (1 << 0)
12561450 #define EMULTYPE_TRAP_UD (1 << 1)
12571451 #define EMULTYPE_SKIP (1 << 2)
1258
-#define EMULTYPE_ALLOW_RETRY (1 << 3)
1259
-#define EMULTYPE_NO_UD_ON_FAIL (1 << 4)
1260
-#define EMULTYPE_VMWARE (1 << 5)
1452
+#define EMULTYPE_ALLOW_RETRY_PF (1 << 3)
1453
+#define EMULTYPE_TRAP_UD_FORCED (1 << 4)
1454
+#define EMULTYPE_VMWARE_GP (1 << 5)
1455
+#define EMULTYPE_PF (1 << 6)
1456
+
12611457 int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
12621458 int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
12631459 void *insn, int insn_len);
12641460
12651461 void kvm_enable_efer_bits(u64);
12661462 bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
1267
-int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
1268
-int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
1269
-
1270
-struct x86_emulate_ctxt;
1463
+int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated);
1464
+int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
1465
+int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
1466
+int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
1467
+int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
12711468
12721469 int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
12731470 int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
....@@ -1302,9 +1499,12 @@
13021499
13031500 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
13041501 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
1502
+void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload);
13051503 void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
13061504 void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
13071505 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
1506
+bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
1507
+ struct x86_exception *fault);
13081508 int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
13091509 gfn_t gfn, void *data, int offset, int len,
13101510 u32 access);
....@@ -1332,13 +1532,16 @@
13321532
13331533 void kvm_inject_nmi(struct kvm_vcpu *vcpu);
13341534
1535
+void kvm_update_dr7(struct kvm_vcpu *vcpu);
1536
+
13351537 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
13361538 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
13371539 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
13381540 int kvm_mmu_load(struct kvm_vcpu *vcpu);
13391541 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
13401542 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
1341
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free);
1543
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
1544
+ ulong roots_to_free);
13421545 gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
13431546 struct x86_exception *exception);
13441547 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
....@@ -1350,31 +1553,25 @@
13501553 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
13511554 struct x86_exception *exception);
13521555
1353
-void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
1556
+bool kvm_apicv_activated(struct kvm *kvm);
1557
+void kvm_apicv_init(struct kvm *kvm, bool enable);
1558
+void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
1559
+void kvm_request_apicv_update(struct kvm *kvm, bool activate,
1560
+ unsigned long bit);
13541561
13551562 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
13561563
13571564 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
13581565 void *insn, int insn_len);
13591566 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
1567
+void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
1568
+ gva_t gva, hpa_t root_hpa);
13601569 void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
1361
-void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush);
1570
+void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush,
1571
+ bool skip_mmu_sync);
13621572
1363
-void kvm_enable_tdp(void);
1364
-void kvm_disable_tdp(void);
1365
-
1366
-static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
1367
- struct x86_exception *exception)
1368
-{
1369
- return gpa;
1370
-}
1371
-
1372
-static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
1373
-{
1374
- struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
1375
-
1376
- return (struct kvm_mmu_page *)page_private(page);
1377
-}
1573
+void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
1574
+ int tdp_huge_page_level);
13781575
13791576 static inline u16 kvm_read_ldt(void)
13801577 {
....@@ -1423,8 +1620,6 @@
14231620 };
14241621
14251622 #define HF_GIF_MASK (1 << 0)
1426
-#define HF_HIF_MASK (1 << 1)
1427
-#define HF_VINTR_MASK (1 << 2)
14281623 #define HF_NMI_MASK (1 << 3)
14291624 #define HF_IRET_MASK (1 << 4)
14301625 #define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */
....@@ -1437,7 +1632,7 @@
14371632 #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
14381633 #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
14391634
1440
-asmlinkage void __noreturn kvm_spurious_fault(void);
1635
+asmlinkage void kvm_spurious_fault(void);
14411636
14421637 /*
14431638 * Hardware virtualization extension instructions may fault if a
....@@ -1445,31 +1640,29 @@
14451640 * Usually after catching the fault we just panic; during reboot
14461641 * instead the instruction is ignored.
14471642 */
1448
-#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
1643
+#define __kvm_handle_fault_on_reboot(insn) \
14491644 "666: \n\t" \
14501645 insn "\n\t" \
14511646 "jmp 668f \n\t" \
14521647 "667: \n\t" \
1453
- "call kvm_spurious_fault \n\t" \
1454
- "668: \n\t" \
1455
- ".pushsection .fixup, \"ax\" \n\t" \
1456
- "700: \n\t" \
1457
- cleanup_insn "\n\t" \
1458
- "cmpb $0, kvm_rebooting\n\t" \
1459
- "je 667b \n\t" \
1460
- "jmp 668b \n\t" \
1648
+ "1: \n\t" \
1649
+ ".pushsection .discard.instr_begin \n\t" \
1650
+ ".long 1b - . \n\t" \
14611651 ".popsection \n\t" \
1462
- _ASM_EXTABLE(666b, 700b)
1463
-
1464
-#define __kvm_handle_fault_on_reboot(insn) \
1465
- ____kvm_handle_fault_on_reboot(insn, "")
1652
+ "call kvm_spurious_fault \n\t" \
1653
+ "1: \n\t" \
1654
+ ".pushsection .discard.instr_end \n\t" \
1655
+ ".long 1b - . \n\t" \
1656
+ ".popsection \n\t" \
1657
+ "668: \n\t" \
1658
+ _ASM_EXTABLE(666b, 667b)
14661659
14671660 #define KVM_ARCH_WANT_MMU_NOTIFIER
14681661 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
1469
- bool blockable);
1662
+ unsigned flags);
14701663 int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
14711664 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
1472
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
1665
+int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
14731666 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
14741667 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
14751668 int kvm_cpu_has_extint(struct kvm_vcpu *v);
....@@ -1482,9 +1675,9 @@
14821675 unsigned long ipi_bitmap_high, u32 min,
14831676 unsigned long icr, int op_64_bit);
14841677
1485
-u64 kvm_get_arch_capabilities(void);
1486
-void kvm_define_shared_msr(unsigned index, u32 msr);
1487
-int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
1678
+void kvm_define_user_return_msr(unsigned index, u32 msr);
1679
+int kvm_probe_user_return_msr(u32 msr);
1680
+int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
14881681
14891682 u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
14901683 u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
....@@ -1494,14 +1687,17 @@
14941687
14951688 void kvm_make_mclock_inprogress_request(struct kvm *kvm);
14961689 void kvm_make_scan_ioapic_request(struct kvm *kvm);
1690
+void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
1691
+ unsigned long *vcpu_bitmap);
14971692
1498
-void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1693
+bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
14991694 struct kvm_async_pf *work);
15001695 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
15011696 struct kvm_async_pf *work);
15021697 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
15031698 struct kvm_async_pf *work);
1504
-bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
1699
+void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu);
1700
+bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu);
15051701 extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
15061702
15071703 int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
....@@ -1511,7 +1707,6 @@
15111707 int kvm_is_in_guest(void);
15121708
15131709 int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
1514
-int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
15151710 bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
15161711 bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
15171712
....@@ -1521,16 +1716,23 @@
15211716 void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
15221717 struct kvm_lapic_irq *irq);
15231718
1719
+static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
1720
+{
1721
+ /* We can only post Fixed and LowPrio IRQs */
1722
+ return (irq->delivery_mode == APIC_DM_FIXED ||
1723
+ irq->delivery_mode == APIC_DM_LOWEST);
1724
+}
1725
+
15241726 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
15251727 {
1526
- if (kvm_x86_ops->vcpu_blocking)
1527
- kvm_x86_ops->vcpu_blocking(vcpu);
1728
+ if (kvm_x86_ops.vcpu_blocking)
1729
+ kvm_x86_ops.vcpu_blocking(vcpu);
15281730 }
15291731
15301732 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
15311733 {
1532
- if (kvm_x86_ops->vcpu_unblocking)
1533
- kvm_x86_ops->vcpu_unblocking(vcpu);
1734
+ if (kvm_x86_ops.vcpu_unblocking)
1735
+ kvm_x86_ops.vcpu_unblocking(vcpu);
15341736 }
15351737
15361738 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
....@@ -1548,4 +1750,7 @@
15481750 #define put_smstate(type, buf, offset, val) \
15491751 *(type *)((buf) + (offset) - 0x7e00) = val
15501752
1753
+#define GET_SMSTATE(type, buf, offset) \
1754
+ (*(type *)((buf) + (offset) - 0x7e00))
1755
+
15511756 #endif /* _ASM_X86_KVM_HOST_H */