| .. | .. | 
|---|
| 2998 | 2998 | struct vmcs12 *vmcs12, | 
|---|
| 2999 | 2999 | enum vm_entry_failure_code *entry_failure_code) | 
|---|
| 3000 | 3000 | { | 
|---|
| 3001 |  | -	bool ia32e; | 
|---|
|  | 3001 | +	bool ia32e = !!(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE); | 
|---|
| 3002 | 3002 |  | 
|---|
| 3003 | 3003 | *entry_failure_code = ENTRY_FAIL_DEFAULT; | 
|---|
| 3004 | 3004 |  | 
|---|
| .. | .. | 
|---|
| 3024 | 3024 | vmcs12->guest_ia32_perf_global_ctrl))) | 
|---|
| 3025 | 3025 | return -EINVAL; | 
|---|
| 3026 | 3026 |  | 
|---|
|  | 3027 | +	if (CC((vmcs12->guest_cr0 & (X86_CR0_PG | X86_CR0_PE)) == X86_CR0_PG)) | 
|---|
|  | 3028 | +		return -EINVAL; | 
|---|
|  | 3029 | + | 
|---|
|  | 3030 | +	if (CC(ia32e && !(vmcs12->guest_cr4 & X86_CR4_PAE)) || | 
|---|
|  | 3031 | +	    CC(ia32e && !(vmcs12->guest_cr0 & X86_CR0_PG))) | 
|---|
|  | 3032 | +		return -EINVAL; | 
|---|
|  | 3033 | + | 
|---|
| 3027 | 3034 | /* | 
|---|
| 3028 | 3035 | * If the load IA32_EFER VM-entry control is 1, the following checks | 
|---|
| 3029 | 3036 | * are performed on the field for the IA32_EFER MSR: | 
|---|
| .. | .. | 
|---|
| 3035 | 3042 | */ | 
|---|
| 3036 | 3043 | if (to_vmx(vcpu)->nested.nested_run_pending && | 
|---|
| 3037 | 3044 | (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) { | 
|---|
| 3038 |  | -		ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0; | 
|---|
| 3039 | 3045 | if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) || | 
|---|
| 3040 | 3046 | CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) || | 
|---|
| 3041 | 3047 | CC(((vmcs12->guest_cr0 & X86_CR0_PG) && | 
|---|
| .. | .. | 
|---|
| 4556 | 4562 |  | 
|---|
| 4557 | 4563 | vmx_switch_vmcs(vcpu, &vmx->vmcs01); | 
|---|
| 4558 | 4564 |  | 
|---|
|  | 4565 | +	/* | 
|---|
|  | 4566 | +	 * If IBRS is advertised to the vCPU, KVM must flush the indirect | 
|---|
|  | 4567 | +	 * branch predictors when transitioning from L2 to L1, as L1 expects | 
|---|
|  | 4568 | +	 * hardware (KVM in this case) to provide separate predictor modes. | 
|---|
|  | 4569 | +	 * Bare metal isolates VMX root (host) from VMX non-root (guest), but | 
|---|
|  | 4570 | +	 * doesn't isolate different VMCSs, i.e. in this case, doesn't provide | 
|---|
|  | 4571 | +	 * separate modes for L2 vs L1. | 
|---|
|  | 4572 | +	 */ | 
|---|
|  | 4573 | +	if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) | 
|---|
|  | 4574 | +		indirect_branch_prediction_barrier(); | 
|---|
|  | 4575 | + | 
|---|
| 4559 | 4576 | /* Update any VMCS fields that might have changed while L2 ran */ | 
|---|
| 4560 | 4577 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); | 
|---|
| 4561 | 4578 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); | 
|---|
| .. | .. | 
|---|
| 4901 | 4918 | | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; | 
|---|
| 4902 | 4919 |  | 
|---|
| 4903 | 4920 | /* | 
|---|
| 4904 |  | -	 * Note, KVM cannot rely on hardware to perform the CR0/CR4 #UD checks | 
|---|
| 4905 |  | -	 * that have higher priority than VM-Exit (see Intel SDM's pseudocode | 
|---|
| 4906 |  | -	 * for VMXON), as KVM must load valid CR0/CR4 values into hardware while | 
|---|
| 4907 |  | -	 * running the guest, i.e. KVM needs to check the _guest_ values. | 
|---|
|  | 4921 | +	 * Manually check CR4.VMXE checks, KVM must force CR4.VMXE=1 to enter | 
|---|
|  | 4922 | +	 * the guest and so cannot rely on hardware to perform the check, | 
|---|
|  | 4923 | +	 * which has higher priority than VM-Exit (see Intel SDM's pseudocode | 
|---|
|  | 4924 | +	 * for VMXON). | 
|---|
| 4908 | 4925 | * | 
|---|
| 4909 |  | -	 * Rely on hardware for the other two pre-VM-Exit checks, !VM86 and | 
|---|
| 4910 |  | -	 * !COMPATIBILITY modes.  KVM may run the guest in VM86 to emulate Real | 
|---|
| 4911 |  | -	 * Mode, but KVM will never take the guest out of those modes. | 
|---|
|  | 4926 | +	 * Rely on hardware for the other pre-VM-Exit checks, CR0.PE=1, !VM86 | 
|---|
|  | 4927 | +	 * and !COMPATIBILITY modes.  For an unrestricted guest, KVM doesn't | 
|---|
|  | 4928 | +	 * force any of the relevant guest state.  For a restricted guest, KVM | 
|---|
|  | 4929 | +	 * does force CR0.PE=1, but only to also force VM86 in order to emulate | 
|---|
|  | 4930 | +	 * Real Mode, and so there's no need to check CR0.PE manually. | 
|---|
| 4912 | 4931 | */ | 
|---|
| 4913 |  | -	if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) || | 
|---|
| 4914 |  | -	    !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) { | 
|---|
|  | 4932 | +	if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) { | 
|---|
| 4915 | 4933 | kvm_queue_exception(vcpu, UD_VECTOR); | 
|---|
| 4916 | 4934 | return 1; | 
|---|
| 4917 | 4935 | } | 
|---|
| 4918 | 4936 |  | 
|---|
| 4919 | 4937 | /* | 
|---|
| 4920 |  | -	 * CPL=0 and all other checks that are lower priority than VM-Exit must | 
|---|
| 4921 |  | -	 * be checked manually. | 
|---|
|  | 4938 | +	 * The CPL is checked for "not in VMX operation" and for "in VMX root", | 
|---|
|  | 4939 | +	 * and has higher priority than the VM-Fail due to being post-VMXON, | 
|---|
|  | 4940 | +	 * i.e. VMXON #GPs outside of VMX non-root if CPL!=0.  In VMX non-root, | 
|---|
|  | 4941 | +	 * VMXON causes VM-Exit and KVM unconditionally forwards VMXON VM-Exits | 
|---|
|  | 4942 | +	 * from L2 to L1, i.e. there's no need to check for the vCPU being in | 
|---|
|  | 4943 | +	 * VMX non-root. | 
|---|
|  | 4944 | +	 * | 
|---|
|  | 4945 | +	 * Forwarding the VM-Exit unconditionally, i.e. without performing the | 
|---|
|  | 4946 | +	 * #UD checks (see above), is functionally ok because KVM doesn't allow | 
|---|
|  | 4947 | +	 * L1 to run L2 without CR4.VMXE=0, and because KVM never modifies L2's | 
|---|
|  | 4948 | +	 * CR0 or CR4, i.e. it's L2's responsibility to emulate #UDs that are | 
|---|
|  | 4949 | +	 * missed by hardware due to shadowing CR0 and/or CR4. | 
|---|
| 4922 | 4950 | */ | 
|---|
| 4923 | 4951 | if (vmx_get_cpl(vcpu)) { | 
|---|
| 4924 | 4952 | kvm_inject_gp(vcpu, 0); | 
|---|
| .. | .. | 
|---|
| 4928 | 4956 | if (vmx->nested.vmxon) | 
|---|
| 4929 | 4957 | return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); | 
|---|
| 4930 | 4958 |  | 
|---|
|  | 4959 | +	/* | 
|---|
|  | 4960 | +	 * Invalid CR0/CR4 generates #GP.  These checks are performed if and | 
|---|
|  | 4961 | +	 * only if the vCPU isn't already in VMX operation, i.e. effectively | 
|---|
|  | 4962 | +	 * have lower priority than the VM-Fail above. | 
|---|
|  | 4963 | +	 */ | 
|---|
|  | 4964 | +	if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) || | 
|---|
|  | 4965 | +	    !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) { | 
|---|
|  | 4966 | +		kvm_inject_gp(vcpu, 0); | 
|---|
|  | 4967 | +		return 1; | 
|---|
|  | 4968 | +	} | 
|---|
|  | 4969 | + | 
|---|
| 4931 | 4970 | if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES) | 
|---|
| 4932 | 4971 | != VMXON_NEEDED_FEATURES) { | 
|---|
| 4933 | 4972 | kvm_inject_gp(vcpu, 0); | 
|---|