hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/arch/x86/kvm/vmx/nested.c
....@@ -2998,7 +2998,7 @@
29982998 struct vmcs12 *vmcs12,
29992999 enum vm_entry_failure_code *entry_failure_code)
30003000 {
3001
- bool ia32e;
3001
+ bool ia32e = !!(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE);
30023002
30033003 *entry_failure_code = ENTRY_FAIL_DEFAULT;
30043004
....@@ -3024,6 +3024,13 @@
30243024 vmcs12->guest_ia32_perf_global_ctrl)))
30253025 return -EINVAL;
30263026
3027
+ if (CC((vmcs12->guest_cr0 & (X86_CR0_PG | X86_CR0_PE)) == X86_CR0_PG))
3028
+ return -EINVAL;
3029
+
3030
+ if (CC(ia32e && !(vmcs12->guest_cr4 & X86_CR4_PAE)) ||
3031
+ CC(ia32e && !(vmcs12->guest_cr0 & X86_CR0_PG)))
3032
+ return -EINVAL;
3033
+
30273034 /*
30283035 * If the load IA32_EFER VM-entry control is 1, the following checks
30293036 * are performed on the field for the IA32_EFER MSR:
....@@ -3035,7 +3042,6 @@
30353042 */
30363043 if (to_vmx(vcpu)->nested.nested_run_pending &&
30373044 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
3038
- ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
30393045 if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
30403046 CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
30413047 CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
....@@ -4556,6 +4562,17 @@
45564562
45574563 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
45584564
4565
+ /*
4566
+ * If IBRS is advertised to the vCPU, KVM must flush the indirect
4567
+ * branch predictors when transitioning from L2 to L1, as L1 expects
4568
+ * hardware (KVM in this case) to provide separate predictor modes.
4569
+ * Bare metal isolates VMX root (host) from VMX non-root (guest), but
4570
+ * doesn't isolate different VMCSs, i.e. in this case, doesn't provide
4571
+ * separate modes for L2 vs L1.
4572
+ */
4573
+ if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
4574
+ indirect_branch_prediction_barrier();
4575
+
45594576 /* Update any VMCS fields that might have changed while L2 ran */
45604577 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
45614578 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
....@@ -4901,24 +4918,35 @@
49014918 | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
49024919
49034920 /*
4904
- * Note, KVM cannot rely on hardware to perform the CR0/CR4 #UD checks
4905
- * that have higher priority than VM-Exit (see Intel SDM's pseudocode
4906
- * for VMXON), as KVM must load valid CR0/CR4 values into hardware while
4907
- * running the guest, i.e. KVM needs to check the _guest_ values.
4921
+ * Manually check CR4.VMXE checks, KVM must force CR4.VMXE=1 to enter
4922
+ * the guest and so cannot rely on hardware to perform the check,
4923
+ * which has higher priority than VM-Exit (see Intel SDM's pseudocode
4924
+ * for VMXON).
49084925 *
4909
- * Rely on hardware for the other two pre-VM-Exit checks, !VM86 and
4910
- * !COMPATIBILITY modes. KVM may run the guest in VM86 to emulate Real
4911
- * Mode, but KVM will never take the guest out of those modes.
4926
+ * Rely on hardware for the other pre-VM-Exit checks, CR0.PE=1, !VM86
4927
+ * and !COMPATIBILITY modes. For an unrestricted guest, KVM doesn't
4928
+ * force any of the relevant guest state. For a restricted guest, KVM
4929
+ * does force CR0.PE=1, but only to also force VM86 in order to emulate
4930
+ * Real Mode, and so there's no need to check CR0.PE manually.
49124931 */
4913
- if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) ||
4914
- !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) {
4932
+ if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
49154933 kvm_queue_exception(vcpu, UD_VECTOR);
49164934 return 1;
49174935 }
49184936
49194937 /*
4920
- * CPL=0 and all other checks that are lower priority than VM-Exit must
4921
- * be checked manually.
4938
+ * The CPL is checked for "not in VMX operation" and for "in VMX root",
4939
+ * and has higher priority than the VM-Fail due to being post-VMXON,
4940
+ * i.e. VMXON #GPs outside of VMX non-root if CPL!=0. In VMX non-root,
4941
+ * VMXON causes VM-Exit and KVM unconditionally forwards VMXON VM-Exits
4942
+ * from L2 to L1, i.e. there's no need to check for the vCPU being in
4943
+ * VMX non-root.
4944
+ *
4945
+ * Forwarding the VM-Exit unconditionally, i.e. without performing the
4946
+ * #UD checks (see above), is functionally ok because KVM doesn't allow
4947
+ * L1 to run L2 without CR4.VMXE=0, and because KVM never modifies L2's
4948
+ * CR0 or CR4, i.e. it's L2's responsibility to emulate #UDs that are
4949
+ * missed by hardware due to shadowing CR0 and/or CR4.
49224950 */
49234951 if (vmx_get_cpl(vcpu)) {
49244952 kvm_inject_gp(vcpu, 0);
....@@ -4928,6 +4956,17 @@
49284956 if (vmx->nested.vmxon)
49294957 return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
49304958
4959
+ /*
4960
+ * Invalid CR0/CR4 generates #GP. These checks are performed if and
4961
+ * only if the vCPU isn't already in VMX operation, i.e. effectively
4962
+ * have lower priority than the VM-Fail above.
4963
+ */
4964
+ if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) ||
4965
+ !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) {
4966
+ kvm_inject_gp(vcpu, 0);
4967
+ return 1;
4968
+ }
4969
+
49314970 if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
49324971 != VMXON_NEEDED_FEATURES) {
49334972 kvm_inject_gp(vcpu, 0);