~hc/RK356X_SDK_RELEASE.git

..	..	@@ -231,6 +231,8 @@
231	231	VCPU_STAT("l1d_flush", l1d_flush),
232	232	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
233	233	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
	234	+ VCPU_STAT("preemption_reported", preemption_reported),
	235	+ VCPU_STAT("preemption_other", preemption_other),
234	236	VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
235	237	VM_STAT("mmu_pte_write", mmu_pte_write),
236	238	VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
..	..	@@ -1387,7 +1389,7 @@
1387	1389	ARCH_CAP_SKIP_VMENTRY_L1DFLUSH \| ARCH_CAP_SSB_NO \| ARCH_CAP_MDS_NO \| \
1388	1390	ARCH_CAP_PSCHANGE_MC_NO \| ARCH_CAP_TSX_CTRL_MSR \| ARCH_CAP_TAA_NO \| \
1389	1391	ARCH_CAP_SBDR_SSDP_NO \| ARCH_CAP_FBSDP_NO \| ARCH_CAP_PSDP_NO \| \
1390		- ARCH_CAP_FB_CLEAR \| ARCH_CAP_RRSBA \| ARCH_CAP_PBRSB_NO)
	1392	+ ARCH_CAP_FB_CLEAR \| ARCH_CAP_RRSBA \| ARCH_CAP_PBRSB_NO \| ARCH_CAP_GDS_NO)
1391	1393
1392	1394	static u64 kvm_get_arch_capabilities(void)
1393	1395	{
..	..	@@ -1443,6 +1445,9 @@
1443	1445	* using VERW to clear CPU buffers.
1444	1446	*/
1445	1447	}
	1448	+
	1449	+ if (!boot_cpu_has_bug(X86_BUG_GDS) \|\| gds_ucode_mitigated())
	1450	+ data \|= ARCH_CAP_GDS_NO;
1446	1451
1447	1452	return data;
1448	1453	}
..	..	@@ -1586,6 +1591,9 @@
1586	1591	allowed = !!test_bit(index - start, bitmap);
1587	1592	break;
1588	1593	}
	1594	+
	1595	+ /* Note, VM-Exits that go down the "slow" path are accounted below. */
	1596	+ ++vcpu->stat.exits;
1589	1597	}
1590	1598
1591	1599	out:
..	..	@@ -3020,51 +3028,95 @@
3020	3028
3021	3029	static void record_steal_time(struct kvm_vcpu *vcpu)
3022	3030	{
3023		- struct kvm_host_map map;
3024		- struct kvm_steal_time *st;
	3031	+ struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
	3032	+ struct kvm_steal_time __user *st;
	3033	+ struct kvm_memslots *slots;
	3034	+ gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
	3035	+ u64 steal;
	3036	+ u32 version;
3025	3037
3026	3038	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
3027	3039	return;
3028	3040
3029		- /* -EAGAIN is returned in atomic context so we can just return. */
3030		- if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
3031		- &map, &vcpu->arch.st.cache, false))
	3041	+ if (WARN_ON_ONCE(current->mm != vcpu->kvm->mm))
3032	3042	return;
3033	3043
3034		- st = map.hva +
3035		- offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
	3044	+ slots = kvm_memslots(vcpu->kvm);
3036	3045
	3046	+ if (unlikely(slots->generation != ghc->generation \|\|
	3047	+ gpa != ghc->gpa \|\|
	3048	+ kvm_is_error_hva(ghc->hva) \|\| !ghc->memslot)) {
	3049	+ /* We rely on the fact that it fits in a single page. */
	3050	+ BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
	3051	+
	3052	+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st)) \|\|
	3053	+ kvm_is_error_hva(ghc->hva) \|\| !ghc->memslot)
	3054	+ return;
	3055	+ }
	3056	+
	3057	+ st = (struct kvm_steal_time __user *)ghc->hva;
3037	3058	/*
3038	3059	* Doing a TLB flush here, on the guest's behalf, can avoid
3039	3060	* expensive IPIs.
3040	3061	*/
3041	3062	if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
	3063	+ u8 st_preempted = 0;
	3064	+ int err = -EFAULT;
	3065	+
	3066	+ if (!user_access_begin(st, sizeof(*st)))
	3067	+ return;
	3068	+
	3069	+ asm volatile("1: xchgb %0, %2\n"
	3070	+ "xor %1, %1\n"
	3071	+ "2:\n"
	3072	+ _ASM_EXTABLE_UA(1b, 2b)
	3073	+ : "+q" (st_preempted),
	3074	+ "+&r" (err),
	3075	+ "+m" (st->preempted));
	3076	+ if (err)
	3077	+ goto out;
	3078	+
	3079	+ user_access_end();
	3080	+
	3081	+ vcpu->arch.st.preempted = 0;
	3082	+
3042	3083	trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
3043		- st->preempted & KVM_VCPU_FLUSH_TLB);
3044		- if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
	3084	+ st_preempted & KVM_VCPU_FLUSH_TLB);
	3085	+ if (st_preempted & KVM_VCPU_FLUSH_TLB)
3045	3086	kvm_vcpu_flush_tlb_guest(vcpu);
	3087	+
	3088	+ if (!user_access_begin(st, sizeof(*st)))
	3089	+ goto dirty;
3046	3090	} else {
3047		- st->preempted = 0;
	3091	+ if (!user_access_begin(st, sizeof(*st)))
	3092	+ return;
	3093	+
	3094	+ unsafe_put_user(0, &st->preempted, out);
	3095	+ vcpu->arch.st.preempted = 0;
3048	3096	}
3049	3097
3050		- vcpu->arch.st.preempted = 0;
	3098	+ unsafe_get_user(version, &st->version, out);
	3099	+ if (version & 1)
	3100	+ version += 1; /* first time write, random junk */
3051	3101
3052		- if (st->version & 1)
3053		- st->version += 1; /* first time write, random junk */
3054		-
3055		- st->version += 1;
	3102	+ version += 1;
	3103	+ unsafe_put_user(version, &st->version, out);
3056	3104
3057	3105	smp_wmb();
3058	3106
3059		- st->steal += current->sched_info.run_delay -
	3107	+ unsafe_get_user(steal, &st->steal, out);
	3108	+ steal += current->sched_info.run_delay -
3060	3109	vcpu->arch.st.last_steal;
3061	3110	vcpu->arch.st.last_steal = current->sched_info.run_delay;
	3111	+ unsafe_put_user(steal, &st->steal, out);
3062	3112
3063		- smp_wmb();
	3113	+ version += 1;
	3114	+ unsafe_put_user(version, &st->version, out);
3064	3115
3065		- st->version += 1;
3066		-
3067		- kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
	3116	+ out:
	3117	+ user_access_end();
	3118	+ dirty:
	3119	+ mark_page_dirty_in_slot(ghc->memslot, gpa_to_gfn(ghc->gpa));
3068	3120	}
3069	3121
3070	3122	int kvm_set_msr_common(struct kvm_vcpu vcpu, struct msr_data msr_info)
..	..	@@ -4049,51 +4101,67 @@
4049	4101
4050	4102	static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
4051	4103	{
4052		- struct kvm_host_map map;
4053		- struct kvm_steal_time *st;
	4104	+ struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
	4105	+ struct kvm_steal_time __user *st;
	4106	+ struct kvm_memslots *slots;
	4107	+ static const u8 preempted = KVM_VCPU_PREEMPTED;
	4108	+ gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
4054	4109
	4110	+ /*
	4111	+ * The vCPU can be marked preempted if and only if the VM-Exit was on
	4112	+ * an instruction boundary and will not trigger guest emulation of any
	4113	+ * kind (see vcpu_run). Vendor specific code controls (conservatively)
	4114	+ * when this is true, for example allowing the vCPU to be marked
	4115	+ * preempted if and only if the VM-Exit was due to a host interrupt.
	4116	+ */
	4117	+ if (!vcpu->arch.at_instruction_boundary) {
	4118	+ vcpu->stat.preemption_other++;
	4119	+ return;
	4120	+ }
	4121	+
	4122	+ vcpu->stat.preemption_reported++;
4055	4123	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
4056	4124	return;
4057	4125
4058	4126	if (vcpu->arch.st.preempted)
4059	4127	return;
4060	4128
4061		- if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
4062		- &vcpu->arch.st.cache, true))
	4129	+ /* This happens on process exit */
	4130	+ if (unlikely(current->mm != vcpu->kvm->mm))
4063	4131	return;
4064	4132
4065		- st = map.hva +
4066		- offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
	4133	+ slots = kvm_memslots(vcpu->kvm);
4067	4134
4068		- st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
	4135	+ if (unlikely(slots->generation != ghc->generation \|\|
	4136	+ gpa != ghc->gpa \|\|
	4137	+ kvm_is_error_hva(ghc->hva) \|\| !ghc->memslot))
	4138	+ return;
4069	4139
4070		- kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
	4140	+ st = (struct kvm_steal_time __user *)ghc->hva;
	4141	+ BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted));
	4142	+
	4143	+ if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted)))
	4144	+ vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
	4145	+
	4146	+ mark_page_dirty_in_slot(ghc->memslot, gpa_to_gfn(ghc->gpa));
4071	4147	}
4072	4148
4073	4149	void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
4074	4150	{
4075	4151	int idx;
4076	4152
4077		- if (vcpu->preempted)
	4153	+ if (vcpu->preempted) {
4078	4154	vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu);
4079	4155
4080		- /*
4081		- * Disable page faults because we're in atomic context here.
4082		- * kvm_write_guest_offset_cached() would call might_fault()
4083		- * that relies on pagefault_disable() to tell if there's a
4084		- * bug. NOTE: the write to guest memory may not go through if
4085		- * during postcopy live migration or if there's heavy guest
4086		- * paging.
4087		- */
4088		- pagefault_disable();
4089		- /*
4090		- * kvm_memslots() will be called by
4091		- * kvm_write_guest_offset_cached() so take the srcu lock.
4092		- */
4093		- idx = srcu_read_lock(&vcpu->kvm->srcu);
4094		- kvm_steal_time_set_preempted(vcpu);
4095		- srcu_read_unlock(&vcpu->kvm->srcu, idx);
4096		- pagefault_enable();
	4156	+ /*
	4157	+ * Take the srcu lock as memslots will be accessed to check the gfn
	4158	+ * cache generation against the memslots generation.
	4159	+ */
	4160	+ idx = srcu_read_lock(&vcpu->kvm->srcu);
	4161	+ kvm_steal_time_set_preempted(vcpu);
	4162	+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
	4163	+ }
	4164	+
4097	4165	kvm_x86_ops.vcpu_put(vcpu);
4098	4166	vcpu->arch.last_host_tsc = rdtsc();
4099	4167	/*
..	..	@@ -4455,12 +4523,11 @@
4455	4523	{
4456	4524	unsigned long val;
4457	4525
	4526	+ memset(dbgregs, 0, sizeof(*dbgregs));
4458	4527	memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
4459	4528	kvm_get_dr(vcpu, 6, &val);
4460	4529	dbgregs->dr6 = val;
4461	4530	dbgregs->dr7 = vcpu->arch.dr7;
4462		- dbgregs->flags = 0;
4463		- memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
4464	4531	}
4465	4532
4466	4533	static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
..	..	@@ -7535,7 +7602,9 @@
7535	7602	write_fault_to_spt,
7536	7603	emulation_type))
7537	7604	return 1;
7538		- if (ctxt->have_exception) {
	7605	+
	7606	+ if (ctxt->have_exception &&
	7607	+ !(emulation_type & EMULTYPE_SKIP)) {
7539	7608	/*
7540	7609	* #UD should result in just EMULATION_FAILED, and trap-like
7541	7610	* exception should not be encountered during decode.
..	..	@@ -9356,6 +9425,13 @@
9356	9425	vcpu->arch.l1tf_flush_l1d = true;
9357	9426
9358	9427	for (;;) {
	9428	+ /*
	9429	+ * If another guest vCPU requests a PV TLB flush in the middle
	9430	+ * of instruction emulation, the rest of the emulation could
	9431	+ * use a stale page translation. Assume that any code after
	9432	+ * this point can start executing an instruction.
	9433	+ */
	9434	+ vcpu->arch.at_instruction_boundary = false;
9359	9435	if (kvm_vcpu_running(vcpu)) {
9360	9436	r = vcpu_enter_guest(vcpu);
9361	9437	} else {
..	..	@@ -10241,10 +10317,7 @@
10241	10317
10242	10318	void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
10243	10319	{
10244		- struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache;
10245	10320	int idx;
10246		-
10247		- kvm_release_pfn(cache->pfn, cache->dirty, cache);
10248	10321
10249	10322	kvmclock_reset(vcpu);
10250	10323