~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,3 +1,4 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/*
2	3	* KVM Microsoft Hyper-V emulation
3	4	*
..	..	@@ -15,17 +16,15 @@
15	16	* Amit Shah <amit.shah@qumranet.com>
16	17	* Ben-Ami Yassour <benami@il.ibm.com>
17	18	* Andrey Smetanin <asmetanin@virtuozzo.com>
18		- *
19		- * This work is licensed under the terms of the GNU GPL, version 2. See
20		- * the COPYING file in the top-level directory.
21		- *
22	19	*/
23	20
24	21	#include "x86.h"
25	22	#include "lapic.h"
26	23	#include "ioapic.h"
	24	+#include "cpuid.h"
27	25	#include "hyperv.h"
28	26
	27	+#include <linux/cpu.h>
29	28	#include <linux/kvm_host.h>
30	29	#include <linux/highmem.h>
31	30	#include <linux/sched/cputime.h>
..	..	@@ -35,6 +34,12 @@
35	34	#include <trace/events/kvm.h>
36	35
37	36	#include "trace.h"
	37	+#include "irq.h"
	38	+
	39	+#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
	40	+
	41	+static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
	42	+ bool vcpu_kick);
38	43
39	44	static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
40	45	{
..	..	@@ -156,59 +161,24 @@
156	161	return (synic->active) ? synic : NULL;
157	162	}
158	163
159		-static void synic_clear_sint_msg_pending(struct kvm_vcpu_hv_synic *synic,
160		- u32 sint)
161		-{
162		- struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
163		- struct page *page;
164		- gpa_t gpa;
165		- struct hv_message *msg;
166		- struct hv_message_page *msg_page;
167		-
168		- gpa = synic->msg_page & PAGE_MASK;
169		- page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
170		- if (is_error_page(page)) {
171		- vcpu_err(vcpu, "Hyper-V SynIC can't get msg page, gpa 0x%llx\n",
172		- gpa);
173		- return;
174		- }
175		- msg_page = kmap_atomic(page);
176		-
177		- msg = &msg_page->sint_message[sint];
178		- msg->header.message_flags.msg_pending = 0;
179		-
180		- kunmap_atomic(msg_page);
181		- kvm_release_page_dirty(page);
182		- kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
183		-}
184		-
185	164	static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
186	165	{
187	166	struct kvm *kvm = vcpu->kvm;
188	167	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
189	168	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
190	169	struct kvm_vcpu_hv_stimer *stimer;
191		- int gsi, idx, stimers_pending;
	170	+ int gsi, idx;
192	171
193	172	trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint);
194	173
195		- if (synic->msg_page & HV_SYNIC_SIMP_ENABLE)
196		- synic_clear_sint_msg_pending(synic, sint);
197		-
198	174	/* Try to deliver pending Hyper-V SynIC timers messages */
199		- stimers_pending = 0;
200	175	for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) {
201	176	stimer = &hv_vcpu->stimer[idx];
202		- if (stimer->msg_pending &&
203		- (stimer->config & HV_STIMER_ENABLE) &&
204		- HV_STIMER_SINT(stimer->config) == sint) {
205		- set_bit(stimer->index,
206		- hv_vcpu->stimer_pending_bitmap);
207		- stimers_pending++;
208		- }
	177	+ if (stimer->msg_pending && stimer->config.enable &&
	178	+ !stimer->config.direct_mode &&
	179	+ stimer->config.sintx == sint)
	180	+ stimer_mark_pending(stimer, false);
209	181	}
210		- if (stimers_pending)
211		- kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
212	182
213	183	idx = srcu_read_lock(&kvm->irq_srcu);
214	184	gsi = atomic_read(&synic->sint_to_gsi[sint]);
..	..	@@ -237,7 +207,7 @@
237	207	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
238	208	int ret;
239	209
240		- if (!synic->active && !host)
	210	+ if (!synic->active && (!host \|\| data))
241	211	return 1;
242	212
243	213	trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host);
..	..	@@ -283,6 +253,9 @@
283	253	case HV_X64_MSR_EOM: {
284	254	int i;
285	255
	256	+ if (!synic->active)
	257	+ break;
	258	+
286	259	for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
287	260	kvm_hv_notify_acked_sint(vcpu, i);
288	261	break;
..	..	@@ -295,6 +268,123 @@
295	268	break;
296	269	}
297	270	return ret;
	271	+}
	272	+
	273	+static bool kvm_hv_is_syndbg_enabled(struct kvm_vcpu *vcpu)
	274	+{
	275	+ struct kvm_cpuid_entry2 *entry;
	276	+
	277	+ entry = kvm_find_cpuid_entry(vcpu,
	278	+ HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES,
	279	+ 0);
	280	+ if (!entry)
	281	+ return false;
	282	+
	283	+ return entry->eax & HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
	284	+}
	285	+
	286	+static int kvm_hv_syndbg_complete_userspace(struct kvm_vcpu *vcpu)
	287	+{
	288	+ struct kvm *kvm = vcpu->kvm;
	289	+ struct kvm_hv *hv = &kvm->arch.hyperv;
	290	+
	291	+ if (vcpu->run->hyperv.u.syndbg.msr == HV_X64_MSR_SYNDBG_CONTROL)
	292	+ hv->hv_syndbg.control.status =
	293	+ vcpu->run->hyperv.u.syndbg.status;
	294	+ return 1;
	295	+}
	296	+
	297	+static void syndbg_exit(struct kvm_vcpu *vcpu, u32 msr)
	298	+{
	299	+ struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu);
	300	+ struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
	301	+
	302	+ hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNDBG;
	303	+ hv_vcpu->exit.u.syndbg.msr = msr;
	304	+ hv_vcpu->exit.u.syndbg.control = syndbg->control.control;
	305	+ hv_vcpu->exit.u.syndbg.send_page = syndbg->control.send_page;
	306	+ hv_vcpu->exit.u.syndbg.recv_page = syndbg->control.recv_page;
	307	+ hv_vcpu->exit.u.syndbg.pending_page = syndbg->control.pending_page;
	308	+ vcpu->arch.complete_userspace_io =
	309	+ kvm_hv_syndbg_complete_userspace;
	310	+
	311	+ kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
	312	+}
	313	+
	314	+static int syndbg_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
	315	+{
	316	+ struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu);
	317	+
	318	+ if (!kvm_hv_is_syndbg_enabled(vcpu) && !host)
	319	+ return 1;
	320	+
	321	+ trace_kvm_hv_syndbg_set_msr(vcpu->vcpu_id,
	322	+ vcpu_to_hv_vcpu(vcpu)->vp_index, msr, data);
	323	+ switch (msr) {
	324	+ case HV_X64_MSR_SYNDBG_CONTROL:
	325	+ syndbg->control.control = data;
	326	+ if (!host)
	327	+ syndbg_exit(vcpu, msr);
	328	+ break;
	329	+ case HV_X64_MSR_SYNDBG_STATUS:
	330	+ syndbg->control.status = data;
	331	+ break;
	332	+ case HV_X64_MSR_SYNDBG_SEND_BUFFER:
	333	+ syndbg->control.send_page = data;
	334	+ break;
	335	+ case HV_X64_MSR_SYNDBG_RECV_BUFFER:
	336	+ syndbg->control.recv_page = data;
	337	+ break;
	338	+ case HV_X64_MSR_SYNDBG_PENDING_BUFFER:
	339	+ syndbg->control.pending_page = data;
	340	+ if (!host)
	341	+ syndbg_exit(vcpu, msr);
	342	+ break;
	343	+ case HV_X64_MSR_SYNDBG_OPTIONS:
	344	+ syndbg->options = data;
	345	+ break;
	346	+ default:
	347	+ break;
	348	+ }
	349	+
	350	+ return 0;
	351	+}
	352	+
	353	+static int syndbg_get_msr(struct kvm_vcpu vcpu, u32 msr, u64 pdata, bool host)
	354	+{
	355	+ struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu);
	356	+
	357	+ if (!kvm_hv_is_syndbg_enabled(vcpu) && !host)
	358	+ return 1;
	359	+
	360	+ switch (msr) {
	361	+ case HV_X64_MSR_SYNDBG_CONTROL:
	362	+ *pdata = syndbg->control.control;
	363	+ break;
	364	+ case HV_X64_MSR_SYNDBG_STATUS:
	365	+ *pdata = syndbg->control.status;
	366	+ break;
	367	+ case HV_X64_MSR_SYNDBG_SEND_BUFFER:
	368	+ *pdata = syndbg->control.send_page;
	369	+ break;
	370	+ case HV_X64_MSR_SYNDBG_RECV_BUFFER:
	371	+ *pdata = syndbg->control.recv_page;
	372	+ break;
	373	+ case HV_X64_MSR_SYNDBG_PENDING_BUFFER:
	374	+ *pdata = syndbg->control.pending_page;
	375	+ break;
	376	+ case HV_X64_MSR_SYNDBG_OPTIONS:
	377	+ *pdata = syndbg->options;
	378	+ break;
	379	+ default:
	380	+ break;
	381	+ }
	382	+
	383	+ trace_kvm_hv_syndbg_get_msr(vcpu->vcpu_id,
	384	+ vcpu_to_hv_vcpu(vcpu)->vp_index, msr,
	385	+ *pdata);
	386	+
	387	+ return 0;
298	388	}
299	389
300	390	static int synic_get_msr(struct kvm_vcpu_hv_synic synic, u32 msr, u64 pdata,
..	..	@@ -337,6 +427,9 @@
337	427	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
338	428	struct kvm_lapic_irq irq;
339	429	int ret, vector;
	430	+
	431	+ if (KVM_BUG_ON(!lapic_in_kernel(vcpu), vcpu->kvm))
	432	+ return -EINVAL;
340	433
341	434	if (sint >= ARRAY_SIZE(synic->sint))
342	435	return -EINVAL;
..	..	@@ -495,7 +588,7 @@
495	588	time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm);
496	589	ktime_now = ktime_get();
497	590
498		- if (stimer->config & HV_STIMER_PERIODIC) {
	591	+ if (stimer->config.periodic) {
499	592	if (stimer->exp_time) {
500	593	if (time_now >= stimer->exp_time) {
501	594	u64 remainder;
..	..	@@ -544,36 +637,57 @@
544	637	static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
545	638	bool host)
546	639	{
	640	+ union hv_stimer_config new_config = {.as_uint64 = config},
	641	+ old_config = {.as_uint64 = stimer->config.as_uint64};
	642	+ struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
	643	+ struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
	644	+
	645	+ if (!synic->active && (!host \|\| config))
	646	+ return 1;
	647	+
547	648	trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id,
548	649	stimer->index, config, host);
549	650
550	651	stimer_cleanup(stimer);
551		- if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0)
552		- config &= ~HV_STIMER_ENABLE;
553		- stimer->config = config;
554		- stimer_mark_pending(stimer, false);
	652	+ if (old_config.enable &&
	653	+ !new_config.direct_mode && new_config.sintx == 0)
	654	+ new_config.enable = 0;
	655	+ stimer->config.as_uint64 = new_config.as_uint64;
	656	+
	657	+ if (stimer->config.enable)
	658	+ stimer_mark_pending(stimer, false);
	659	+
555	660	return 0;
556	661	}
557	662
558	663	static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
559	664	bool host)
560	665	{
	666	+ struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
	667	+ struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
	668	+
	669	+ if (!synic->active && (!host \|\| count))
	670	+ return 1;
	671	+
561	672	trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id,
562	673	stimer->index, count, host);
563	674
564	675	stimer_cleanup(stimer);
565	676	stimer->count = count;
566	677	if (stimer->count == 0)
567		- stimer->config &= ~HV_STIMER_ENABLE;
568		- else if (stimer->config & HV_STIMER_AUTOENABLE)
569		- stimer->config \|= HV_STIMER_ENABLE;
570		- stimer_mark_pending(stimer, false);
	678	+ stimer->config.enable = 0;
	679	+ else if (stimer->config.auto_enable)
	680	+ stimer->config.enable = 1;
	681	+
	682	+ if (stimer->config.enable)
	683	+ stimer_mark_pending(stimer, false);
	684	+
571	685	return 0;
572	686	}
573	687
574	688	static int stimer_get_config(struct kvm_vcpu_hv_stimer stimer, u64 pconfig)
575	689	{
576		- *pconfig = stimer->config;
	690	+ *pconfig = stimer->config.as_uint64;
577	691	return 0;
578	692	}
579	693
..	..	@@ -584,44 +698,60 @@
584	698	}
585	699
586	700	static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint,
587		- struct hv_message *src_msg)
	701	+ struct hv_message *src_msg, bool no_retry)
588	702	{
589	703	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
590		- struct page *page;
591		- gpa_t gpa;
592		- struct hv_message *dst_msg;
	704	+ int msg_off = offsetof(struct hv_message_page, sint_message[sint]);
	705	+ gfn_t msg_page_gfn;
	706	+ struct hv_message_header hv_hdr;
593	707	int r;
594		- struct hv_message_page *msg_page;
595	708
596	709	if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE))
597	710	return -ENOENT;
598	711
599		- gpa = synic->msg_page & PAGE_MASK;
600		- page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
601		- if (is_error_page(page))
602		- return -EFAULT;
	712	+ msg_page_gfn = synic->msg_page >> PAGE_SHIFT;
603	713
604		- msg_page = kmap_atomic(page);
605		- dst_msg = &msg_page->sint_message[sint];
606		- if (sync_cmpxchg(&dst_msg->header.message_type, HVMSG_NONE,
607		- src_msg->header.message_type) != HVMSG_NONE) {
608		- dst_msg->header.message_flags.msg_pending = 1;
609		- r = -EAGAIN;
610		- } else {
611		- memcpy(&dst_msg->u.payload, &src_msg->u.payload,
612		- src_msg->header.payload_size);
613		- dst_msg->header.message_type = src_msg->header.message_type;
614		- dst_msg->header.payload_size = src_msg->header.payload_size;
615		- r = synic_set_irq(synic, sint);
616		- if (r >= 1)
617		- r = 0;
618		- else if (r == 0)
619		- r = -EFAULT;
	714	+ /*
	715	+ * Strictly following the spec-mandated ordering would assume setting
	716	+ * .msg_pending before checking .message_type. However, this function
	717	+ * is only called in vcpu context so the entire update is atomic from
	718	+ * guest POV and thus the exact order here doesn't matter.
	719	+ */
	720	+ r = kvm_vcpu_read_guest_page(vcpu, msg_page_gfn, &hv_hdr.message_type,
	721	+ msg_off + offsetof(struct hv_message,
	722	+ header.message_type),
	723	+ sizeof(hv_hdr.message_type));
	724	+ if (r < 0)
	725	+ return r;
	726	+
	727	+ if (hv_hdr.message_type != HVMSG_NONE) {
	728	+ if (no_retry)
	729	+ return 0;
	730	+
	731	+ hv_hdr.message_flags.msg_pending = 1;
	732	+ r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn,
	733	+ &hv_hdr.message_flags,
	734	+ msg_off +
	735	+ offsetof(struct hv_message,
	736	+ header.message_flags),
	737	+ sizeof(hv_hdr.message_flags));
	738	+ if (r < 0)
	739	+ return r;
	740	+ return -EAGAIN;
620	741	}
621		- kunmap_atomic(msg_page);
622		- kvm_release_page_dirty(page);
623		- kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
624		- return r;
	742	+
	743	+ r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, src_msg, msg_off,
	744	+ sizeof(src_msg->header) +
	745	+ src_msg->header.payload_size);
	746	+ if (r < 0)
	747	+ return r;
	748	+
	749	+ r = synic_set_irq(synic, sint);
	750	+ if (r < 0)
	751	+ return r;
	752	+ if (r == 0)
	753	+ return -EFAULT;
	754	+ return 0;
625	755	}
626	756
627	757	static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
..	..	@@ -631,24 +761,47 @@
631	761	struct hv_timer_message_payload *payload =
632	762	(struct hv_timer_message_payload *)&msg->u.payload;
633	763
	764	+ /*
	765	+ * To avoid piling up periodic ticks, don't retry message
	766	+ * delivery for them (within "lazy" lost ticks policy).
	767	+ */
	768	+ bool no_retry = stimer->config.periodic;
	769	+
634	770	payload->expiration_time = stimer->exp_time;
635	771	payload->delivery_time = get_time_ref_counter(vcpu->kvm);
636	772	return synic_deliver_msg(vcpu_to_synic(vcpu),
637		- HV_STIMER_SINT(stimer->config), msg);
	773	+ stimer->config.sintx, msg,
	774	+ no_retry);
	775	+}
	776	+
	777	+static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer)
	778	+{
	779	+ struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
	780	+ struct kvm_lapic_irq irq = {
	781	+ .delivery_mode = APIC_DM_FIXED,
	782	+ .vector = stimer->config.apic_vector
	783	+ };
	784	+
	785	+ if (lapic_in_kernel(vcpu))
	786	+ return !kvm_apic_set_irq(vcpu, &irq, NULL);
	787	+ return 0;
638	788	}
639	789
640	790	static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
641	791	{
642		- int r;
	792	+ int r, direct = stimer->config.direct_mode;
643	793
644	794	stimer->msg_pending = true;
645		- r = stimer_send_msg(stimer);
	795	+ if (!direct)
	796	+ r = stimer_send_msg(stimer);
	797	+ else
	798	+ r = stimer_notify_direct(stimer);
646	799	trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id,
647		- stimer->index, r);
	800	+ stimer->index, direct, r);
648	801	if (!r) {
649	802	stimer->msg_pending = false;
650		- if (!(stimer->config & HV_STIMER_PERIODIC))
651		- stimer->config &= ~HV_STIMER_ENABLE;
	803	+ if (!(stimer->config.periodic))
	804	+ stimer->config.enable = 0;
652	805	}
653	806	}
654	807
..	..	@@ -662,7 +815,7 @@
662	815	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
663	816	if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
664	817	stimer = &hv_vcpu->stimer[i];
665		- if (stimer->config & HV_STIMER_ENABLE) {
	818	+ if (stimer->config.enable) {
666	819	exp_time = stimer->exp_time;
667	820
668	821	if (exp_time) {
..	..	@@ -672,7 +825,7 @@
672	825	stimer_expiration(stimer);
673	826	}
674	827
675		- if ((stimer->config & HV_STIMER_ENABLE) &&
	828	+ if ((stimer->config.enable) &&
676	829	stimer->count) {
677	830	if (!stimer->msg_pending)
678	831	stimer_start(stimer);
..	..	@@ -758,11 +911,13 @@
758	911
759	912	/*
760	913	* Hyper-V SynIC auto EOI SINT's are
761		- * not compatible with APICV, so deactivate APICV
	914	+ * not compatible with APICV, so request
	915	+ * to deactivate APICV permanently.
762	916	*/
763		- kvm_vcpu_deactivate_apicv(vcpu);
	917	+ kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_HYPERV);
764	918	synic->active = true;
765	919	synic->dont_zero_synic_pages = dont_zero_synic_pages;
	920	+ synic->control = HV_SYNIC_CONTROL_ENABLE;
766	921	return 0;
767	922	}
768	923
..	..	@@ -781,6 +936,8 @@
781	936	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
782	937	case HV_X64_MSR_TSC_EMULATION_CONTROL:
783	938	case HV_X64_MSR_TSC_EMULATION_STATUS:
	939	+ case HV_X64_MSR_SYNDBG_OPTIONS:
	940	+ case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
784	941	r = true;
785	942	break;
786	943	}
..	..	@@ -814,9 +971,9 @@
814	971	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
815	972
816	973	if (host)
817		- hv->hv_crash_ctl = data & HV_X64_MSR_CRASH_CTL_NOTIFY;
	974	+ hv->hv_crash_ctl = data & HV_CRASH_CTL_CRASH_NOTIFY;
818	975
819		- if (!host && (data & HV_X64_MSR_CRASH_CTL_NOTIFY)) {
	976	+ if (!host && (data & HV_CRASH_CTL_CRASH_NOTIFY)) {
820	977
821	978	vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n",
822	979	hv->hv_crash_param[0],
..	..	@@ -881,7 +1038,7 @@
881	1038	* These two equivalencies are implemented in this function.
882	1039	*/
883	1040	static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
884		- HV_REFERENCE_TSC_PAGE *tsc_ref)
	1041	+ struct ms_hyperv_tsc_page *tsc_ref)
885	1042	{
886	1043	u64 max_mul;
887	1044
..	..	@@ -922,7 +1079,7 @@
922	1079	u64 gfn;
923	1080
924	1081	BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
925		- BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0);
	1082	+ BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0);
926	1083
927	1084	if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
928	1085	return;
..	..	@@ -1003,7 +1160,7 @@
1003	1160	addr = gfn_to_hva(kvm, gfn);
1004	1161	if (kvm_is_error_hva(addr))
1005	1162	return 1;
1006		- kvm_x86_ops->patch_hypercall(vcpu, instructions);
	1163	+ kvm_x86_ops.patch_hypercall(vcpu, instructions);
1007	1164	((unsigned char )instructions)[3] = 0xc3; / ret */
1008	1165	if (__copy_to_user((void __user *)addr, instructions, 4))
1009	1166	return 1;
..	..	@@ -1042,8 +1199,11 @@
1042	1199	if (!host)
1043	1200	return 1;
1044	1201	break;
	1202	+ case HV_X64_MSR_SYNDBG_OPTIONS:
	1203	+ case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
	1204	+ return syndbg_set_msr(vcpu, msr, data, host);
1045	1205	default:
1046		- vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
	1206	+ vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n",
1047	1207	msr, data);
1048	1208	return 1;
1049	1209	}
..	..	@@ -1104,7 +1264,13 @@
1104	1264	addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
1105	1265	if (kvm_is_error_hva(addr))
1106	1266	return 1;
1107		- if (__clear_user((void __user *)addr, PAGE_SIZE))
	1267	+
	1268	+ /*
	1269	+ * Clear apic_assist portion of struct hv_vp_assist_page
	1270	+ * only, there can be valuable data in the rest which needs
	1271	+ * to be preserved e.g. on migration.
	1272	+ */
	1273	+ if (__put_user(0, (u32 __user *)addr))
1108	1274	return 1;
1109	1275	hv_vcpu->hv_vapic = data;
1110	1276	kvm_vcpu_mark_page_dirty(vcpu, gfn);
..	..	@@ -1157,7 +1323,7 @@
1157	1323	return 1;
1158	1324	break;
1159	1325	default:
1160		- vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
	1326	+ vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n",
1161	1327	msr, data);
1162	1328	return 1;
1163	1329	}
..	..	@@ -1165,7 +1331,8 @@
1165	1331	return 0;
1166	1332	}
1167	1333
1168		-static int kvm_hv_get_msr_pw(struct kvm_vcpu vcpu, u32 msr, u64 pdata)
	1334	+static int kvm_hv_get_msr_pw(struct kvm_vcpu vcpu, u32 msr, u64 pdata,
	1335	+ bool host)
1169	1336	{
1170	1337	u64 data = 0;
1171	1338	struct kvm *kvm = vcpu->kvm;
..	..	@@ -1202,6 +1369,9 @@
1202	1369	case HV_X64_MSR_TSC_EMULATION_STATUS:
1203	1370	data = hv->hv_tsc_emulation_status;
1204	1371	break;
	1372	+ case HV_X64_MSR_SYNDBG_OPTIONS:
	1373	+ case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
	1374	+ return syndbg_get_msr(vcpu, msr, pdata, host);
1205	1375	default:
1206	1376	vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1207	1377	return 1;
..	..	@@ -1291,39 +1461,54 @@
1291	1461	int r;
1292	1462
1293	1463	mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
1294		- r = kvm_hv_get_msr_pw(vcpu, msr, pdata);
	1464	+ r = kvm_hv_get_msr_pw(vcpu, msr, pdata, host);
1295	1465	mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
1296	1466	return r;
1297	1467	} else
1298	1468	return kvm_hv_get_msr(vcpu, msr, pdata, host);
1299	1469	}
1300	1470
1301		-static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
	1471	+static __always_inline unsigned long *sparse_set_to_vcpu_mask(
	1472	+ struct kvm kvm, u64 sparse_banks, u64 valid_bank_mask,
	1473	+ u64 vp_bitmap, unsigned long vcpu_bitmap)
1302	1474	{
1303		- int i = 0, j;
	1475	+ struct kvm_hv *hv = &kvm->arch.hyperv;
	1476	+ struct kvm_vcpu *vcpu;
	1477	+ int i, bank, sbank = 0;
1304	1478
1305		- if (!(valid_bank_mask & BIT_ULL(bank_no)))
1306		- return -1;
	1479	+ memset(vp_bitmap, 0,
	1480	+ KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap));
	1481	+ for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
	1482	+ KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
	1483	+ vp_bitmap[bank] = sparse_banks[sbank++];
1307	1484
1308		- for (j = 0; j < bank_no; j++)
1309		- if (valid_bank_mask & BIT_ULL(j))
1310		- i++;
	1485	+ if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) {
	1486	+ /* for all vcpus vp_index == vcpu_idx */
	1487	+ return (unsigned long *)vp_bitmap;
	1488	+ }
1311	1489
1312		- return i;
	1490	+ bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
	1491	+ kvm_for_each_vcpu(i, vcpu, kvm) {
	1492	+ if (test_bit(vcpu_to_hv_vcpu(vcpu)->vp_index,
	1493	+ (unsigned long *)vp_bitmap))
	1494	+ __set_bit(i, vcpu_bitmap);
	1495	+ }
	1496	+ return vcpu_bitmap;
1313	1497	}
1314	1498
1315	1499	static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
1316	1500	u16 rep_cnt, bool ex)
1317	1501	{
1318	1502	struct kvm *kvm = current_vcpu->kvm;
1319		- struct kvm_vcpu_hv *hv_current = &current_vcpu->arch.hyperv;
	1503	+ struct kvm_vcpu_hv *hv_vcpu = &current_vcpu->arch.hyperv;
1320	1504	struct hv_tlb_flush_ex flush_ex;
1321	1505	struct hv_tlb_flush flush;
1322		- struct kvm_vcpu *vcpu;
1323		- unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0};
1324		- unsigned long valid_bank_mask = 0;
	1506	+ u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
	1507	+ DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
	1508	+ unsigned long *vcpu_mask;
	1509	+ u64 valid_bank_mask;
1325	1510	u64 sparse_banks[64];
1326		- int sparse_banks_len, i;
	1511	+ int sparse_banks_len;
1327	1512	bool all_cpus;
1328	1513
1329	1514	if (!ex) {
..	..	@@ -1333,6 +1518,7 @@
1333	1518	trace_kvm_hv_flush_tlb(flush.processor_mask,
1334	1519	flush.address_space, flush.flags);
1335	1520
	1521	+ valid_bank_mask = BIT_ULL(0);
1336	1522	sparse_banks[0] = flush.processor_mask;
1337	1523
1338	1524	/*
..	..	@@ -1358,7 +1544,8 @@
1358	1544	all_cpus = flush_ex.hv_vp_set.format !=
1359	1545	HV_GENERIC_SET_SPARSE_4K;
1360	1546
1361		- sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
	1547	+ sparse_banks_len =
	1548	+ bitmap_weight((unsigned long )&valid_bank_mask, 64)
1362	1549	sizeof(sparse_banks[0]);
1363	1550
1364	1551	if (!sparse_banks_len && !all_cpus)
..	..	@@ -1373,48 +1560,18 @@
1373	1560	return HV_STATUS_INVALID_HYPERCALL_INPUT;
1374	1561	}
1375	1562
1376		- cpumask_clear(&hv_current->tlb_lush);
	1563	+ cpumask_clear(&hv_vcpu->tlb_flush);
1377	1564
1378		- kvm_for_each_vcpu(i, vcpu, kvm) {
1379		- struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
1380		- int bank = hv->vp_index / 64, sbank = 0;
	1565	+ vcpu_mask = all_cpus ? NULL :
	1566	+ sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
	1567	+ vp_bitmap, vcpu_bitmap);
1381	1568
1382		- if (!all_cpus) {
1383		- /* Banks >64 can't be represented */
1384		- if (bank >= 64)
1385		- continue;
1386		-
1387		- /* Non-ex hypercalls can only address first 64 vCPUs */
1388		- if (!ex && bank)
1389		- continue;
1390		-
1391		- if (ex) {
1392		- /*
1393		- * Check is the bank of this vCPU is in sparse
1394		- * set and get the sparse bank number.
1395		- */
1396		- sbank = get_sparse_bank_no(valid_bank_mask,
1397		- bank);
1398		-
1399		- if (sbank < 0)
1400		- continue;
1401		- }
1402		-
1403		- if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64)))
1404		- continue;
1405		- }
1406		-
1407		- /*
1408		- * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
1409		- * can't analyze it here, flush TLB regardless of the specified
1410		- * address space.
1411		- */
1412		- __set_bit(i, vcpu_bitmap);
1413		- }
1414		-
1415		- kvm_make_vcpus_request_mask(kvm,
1416		- KVM_REQ_TLB_FLUSH \| KVM_REQUEST_NO_WAKEUP,
1417		- vcpu_bitmap, &hv_current->tlb_lush);
	1569	+ /*
	1570	+ * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
	1571	+ * analyze it here, flush TLB regardless of the specified address space.
	1572	+ */
	1573	+ kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST,
	1574	+ NULL, vcpu_mask, &hv_vcpu->tlb_flush);
1418	1575
1419	1576	ret_success:
1420	1577	/* We always do full TLB flush, set rep_done = rep_cnt. */
..	..	@@ -1422,9 +1579,105 @@
1422	1579	((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
1423	1580	}
1424	1581
	1582	+static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
	1583	+ unsigned long *vcpu_bitmap)
	1584	+{
	1585	+ struct kvm_lapic_irq irq = {
	1586	+ .delivery_mode = APIC_DM_FIXED,
	1587	+ .vector = vector
	1588	+ };
	1589	+ struct kvm_vcpu *vcpu;
	1590	+ int i;
	1591	+
	1592	+ kvm_for_each_vcpu(i, vcpu, kvm) {
	1593	+ if (vcpu_bitmap && !test_bit(i, vcpu_bitmap))
	1594	+ continue;
	1595	+
	1596	+ /* We fail only when APIC is disabled */
	1597	+ kvm_apic_set_irq(vcpu, &irq, NULL);
	1598	+ }
	1599	+}
	1600	+
	1601	+static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa,
	1602	+ bool ex, bool fast)
	1603	+{
	1604	+ struct kvm *kvm = current_vcpu->kvm;
	1605	+ struct hv_send_ipi_ex send_ipi_ex;
	1606	+ struct hv_send_ipi send_ipi;
	1607	+ u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
	1608	+ DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
	1609	+ unsigned long *vcpu_mask;
	1610	+ unsigned long valid_bank_mask;
	1611	+ u64 sparse_banks[64];
	1612	+ int sparse_banks_len;
	1613	+ u32 vector;
	1614	+ bool all_cpus;
	1615	+
	1616	+ if (!ex) {
	1617	+ if (!fast) {
	1618	+ if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi,
	1619	+ sizeof(send_ipi))))
	1620	+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
	1621	+ sparse_banks[0] = send_ipi.cpu_mask;
	1622	+ vector = send_ipi.vector;
	1623	+ } else {
	1624	+ /* 'reserved' part of hv_send_ipi should be 0 */
	1625	+ if (unlikely(ingpa >> 32 != 0))
	1626	+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
	1627	+ sparse_banks[0] = outgpa;
	1628	+ vector = (u32)ingpa;
	1629	+ }
	1630	+ all_cpus = false;
	1631	+ valid_bank_mask = BIT_ULL(0);
	1632	+
	1633	+ trace_kvm_hv_send_ipi(vector, sparse_banks[0]);
	1634	+ } else {
	1635	+ if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex,
	1636	+ sizeof(send_ipi_ex))))
	1637	+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
	1638	+
	1639	+ trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector,
	1640	+ send_ipi_ex.vp_set.format,
	1641	+ send_ipi_ex.vp_set.valid_bank_mask);
	1642	+
	1643	+ vector = send_ipi_ex.vector;
	1644	+ valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
	1645	+ sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
	1646	+ sizeof(sparse_banks[0]);
	1647	+
	1648	+ all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
	1649	+
	1650	+ if (all_cpus)
	1651	+ goto check_and_send_ipi;
	1652	+
	1653	+ if (!sparse_banks_len)
	1654	+ goto ret_success;
	1655	+
	1656	+ if (kvm_read_guest(kvm,
	1657	+ ingpa + offsetof(struct hv_send_ipi_ex,
	1658	+ vp_set.bank_contents),
	1659	+ sparse_banks,
	1660	+ sparse_banks_len))
	1661	+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
	1662	+ }
	1663	+
	1664	+check_and_send_ipi:
	1665	+ if ((vector < HV_IPI_LOW_VECTOR) \|\| (vector > HV_IPI_HIGH_VECTOR))
	1666	+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
	1667	+
	1668	+ vcpu_mask = all_cpus ? NULL :
	1669	+ sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
	1670	+ vp_bitmap, vcpu_bitmap);
	1671	+
	1672	+ kvm_send_ipi_to_many(kvm, vector, vcpu_mask);
	1673	+
	1674	+ret_success:
	1675	+ return HV_STATUS_SUCCESS;
	1676	+}
	1677	+
1425	1678	bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1426	1679	{
1427		- return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
	1680	+ return READ_ONCE(kvm->arch.hyperv.hv_guest_os_id) != 0;
1428	1681	}
1429	1682
1430	1683	static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
..	..	@@ -1433,10 +1686,10 @@
1433	1686
1434	1687	longmode = is_64_bit_mode(vcpu);
1435	1688	if (longmode)
1436		- kvm_register_write(vcpu, VCPU_REGS_RAX, result);
	1689	+ kvm_rax_write(vcpu, result);
1437	1690	else {
1438		- kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32);
1439		- kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff);
	1691	+ kvm_rdx_write(vcpu, result >> 32);
	1692	+ kvm_rax_write(vcpu, result & 0xffffffff);
1440	1693	}
1441	1694	}
1442	1695
..	..	@@ -1495,34 +1748,32 @@
1495	1748	{
1496	1749	u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
1497	1750	uint16_t code, rep_idx, rep_cnt;
1498		- bool fast, longmode, rep;
	1751	+ bool fast, rep;
1499	1752
1500	1753	/*
1501	1754	* hypercall generates UD from non zero cpl and real mode
1502	1755	* per HYPER-V spec
1503	1756	*/
1504		- if (kvm_x86_ops->get_cpl(vcpu) != 0 \|\| !is_protmode(vcpu)) {
	1757	+ if (kvm_x86_ops.get_cpl(vcpu) != 0 \|\| !is_protmode(vcpu)) {
1505	1758	kvm_queue_exception(vcpu, UD_VECTOR);
1506	1759	return 1;
1507	1760	}
1508	1761
1509		- longmode = is_64_bit_mode(vcpu);
1510		-
1511		- if (!longmode) {
1512		- param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) \|
1513		- (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
1514		- ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) \|
1515		- (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
1516		- outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) \|
1517		- (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
1518		- }
1519	1762	#ifdef CONFIG_X86_64
1520		- else {
1521		- param = kvm_register_read(vcpu, VCPU_REGS_RCX);
1522		- ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
1523		- outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
1524		- }
	1763	+ if (is_64_bit_mode(vcpu)) {
	1764	+ param = kvm_rcx_read(vcpu);
	1765	+ ingpa = kvm_rdx_read(vcpu);
	1766	+ outgpa = kvm_r8_read(vcpu);
	1767	+ } else
1525	1768	#endif
	1769	+ {
	1770	+ param = ((u64)kvm_rdx_read(vcpu) << 32) \|
	1771	+ (kvm_rax_read(vcpu) & 0xffffffff);
	1772	+ ingpa = ((u64)kvm_rbx_read(vcpu) << 32) \|
	1773	+ (kvm_rcx_read(vcpu) & 0xffffffff);
	1774	+ outgpa = ((u64)kvm_rdi_read(vcpu) << 32) \|
	1775	+ (kvm_rsi_read(vcpu) & 0xffffffff);
	1776	+ }
1526	1777
1527	1778	code = param & 0xffff;
1528	1779	fast = !!(param & HV_HYPERCALL_FAST_BIT);
..	..	@@ -1548,7 +1799,7 @@
1548	1799	ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
1549	1800	if (ret != HV_STATUS_INVALID_PORT_ID)
1550	1801	break;
1551		- /* maybe userspace knows this conn_id: fall through */
	1802	+ fallthrough; /* maybe userspace knows this conn_id */
1552	1803	case HVCALL_POST_MESSAGE:
1553	1804	/* don't bother userspace if it has no way to handle it */
1554	1805	if (unlikely(rep \|\| !vcpu_to_synic(vcpu)->active)) {
..	..	@@ -1591,6 +1842,48 @@
1591	1842	}
1592	1843	ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
1593	1844	break;
	1845	+ case HVCALL_SEND_IPI:
	1846	+ if (unlikely(rep)) {
	1847	+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
	1848	+ break;
	1849	+ }
	1850	+ ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, false, fast);
	1851	+ break;
	1852	+ case HVCALL_SEND_IPI_EX:
	1853	+ if (unlikely(fast \|\| rep)) {
	1854	+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
	1855	+ break;
	1856	+ }
	1857	+ ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, true, false);
	1858	+ break;
	1859	+ case HVCALL_POST_DEBUG_DATA:
	1860	+ case HVCALL_RETRIEVE_DEBUG_DATA:
	1861	+ if (unlikely(fast)) {
	1862	+ ret = HV_STATUS_INVALID_PARAMETER;
	1863	+ break;
	1864	+ }
	1865	+ fallthrough;
	1866	+ case HVCALL_RESET_DEBUG_SESSION: {
	1867	+ struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu);
	1868	+
	1869	+ if (!kvm_hv_is_syndbg_enabled(vcpu)) {
	1870	+ ret = HV_STATUS_INVALID_HYPERCALL_CODE;
	1871	+ break;
	1872	+ }
	1873	+
	1874	+ if (!(syndbg->options & HV_X64_SYNDBG_OPTION_USE_HCALLS)) {
	1875	+ ret = HV_STATUS_OPERATION_DENIED;
	1876	+ break;
	1877	+ }
	1878	+ vcpu->run->exit_reason = KVM_EXIT_HYPERV;
	1879	+ vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
	1880	+ vcpu->run->hyperv.u.hcall.input = param;
	1881	+ vcpu->run->hyperv.u.hcall.params[0] = ingpa;
	1882	+ vcpu->run->hyperv.u.hcall.params[1] = outgpa;
	1883	+ vcpu->arch.complete_userspace_io =
	1884	+ kvm_hv_hypercall_complete_userspace;
	1885	+ return 0;
	1886	+ }
1594	1887	default:
1595	1888	ret = HV_STATUS_INVALID_HYPERCALL_CODE;
1596	1889	break;
..	..	@@ -1627,7 +1920,7 @@
1627	1920
1628	1921	mutex_lock(&hv->hv_lock);
1629	1922	ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
1630		- GFP_KERNEL);
	1923	+ GFP_KERNEL_ACCOUNT);
1631	1924	mutex_unlock(&hv->hv_lock);
1632	1925
1633	1926	if (ret >= 0)
..	..	@@ -1666,3 +1959,158 @@
1666	1959	return kvm_hv_eventfd_deassign(kvm, args->conn_id);
1667	1960	return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd);
1668	1961	}
	1962	+
	1963	+int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu vcpu, struct kvm_cpuid2 cpuid,
	1964	+ struct kvm_cpuid_entry2 __user *entries)
	1965	+{
	1966	+ uint16_t evmcs_ver = 0;
	1967	+ struct kvm_cpuid_entry2 cpuid_entries[] = {
	1968	+ { .function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS },
	1969	+ { .function = HYPERV_CPUID_INTERFACE },
	1970	+ { .function = HYPERV_CPUID_VERSION },
	1971	+ { .function = HYPERV_CPUID_FEATURES },
	1972	+ { .function = HYPERV_CPUID_ENLIGHTMENT_INFO },
	1973	+ { .function = HYPERV_CPUID_IMPLEMENT_LIMITS },
	1974	+ { .function = HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS },
	1975	+ { .function = HYPERV_CPUID_SYNDBG_INTERFACE },
	1976	+ { .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES },
	1977	+ { .function = HYPERV_CPUID_NESTED_FEATURES },
	1978	+ };
	1979	+ int i, nent = ARRAY_SIZE(cpuid_entries);
	1980	+
	1981	+ if (kvm_x86_ops.nested_ops->get_evmcs_version)
	1982	+ evmcs_ver = kvm_x86_ops.nested_ops->get_evmcs_version(vcpu);
	1983	+
	1984	+ /* Skip NESTED_FEATURES if eVMCS is not supported */
	1985	+ if (!evmcs_ver)
	1986	+ --nent;
	1987	+
	1988	+ if (cpuid->nent < nent)
	1989	+ return -E2BIG;
	1990	+
	1991	+ if (cpuid->nent > nent)
	1992	+ cpuid->nent = nent;
	1993	+
	1994	+ for (i = 0; i < nent; i++) {
	1995	+ struct kvm_cpuid_entry2 *ent = &cpuid_entries[i];
	1996	+ u32 signature[3];
	1997	+
	1998	+ switch (ent->function) {
	1999	+ case HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS:
	2000	+ memcpy(signature, "Linux KVM Hv", 12);
	2001	+
	2002	+ ent->eax = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES;
	2003	+ ent->ebx = signature[0];
	2004	+ ent->ecx = signature[1];
	2005	+ ent->edx = signature[2];
	2006	+ break;
	2007	+
	2008	+ case HYPERV_CPUID_INTERFACE:
	2009	+ memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12);
	2010	+ ent->eax = signature[0];
	2011	+ break;
	2012	+
	2013	+ case HYPERV_CPUID_VERSION:
	2014	+ /*
	2015	+ * We implement some Hyper-V 2016 functions so let's use
	2016	+ * this version.
	2017	+ */
	2018	+ ent->eax = 0x00003839;
	2019	+ ent->ebx = 0x000A0000;
	2020	+ break;
	2021	+
	2022	+ case HYPERV_CPUID_FEATURES:
	2023	+ ent->eax \|= HV_MSR_VP_RUNTIME_AVAILABLE;
	2024	+ ent->eax \|= HV_MSR_TIME_REF_COUNT_AVAILABLE;
	2025	+ ent->eax \|= HV_MSR_SYNIC_AVAILABLE;
	2026	+ ent->eax \|= HV_MSR_SYNTIMER_AVAILABLE;
	2027	+ ent->eax \|= HV_MSR_APIC_ACCESS_AVAILABLE;
	2028	+ ent->eax \|= HV_MSR_HYPERCALL_AVAILABLE;
	2029	+ ent->eax \|= HV_MSR_VP_INDEX_AVAILABLE;
	2030	+ ent->eax \|= HV_MSR_RESET_AVAILABLE;
	2031	+ ent->eax \|= HV_MSR_REFERENCE_TSC_AVAILABLE;
	2032	+ ent->eax \|= HV_ACCESS_FREQUENCY_MSRS;
	2033	+ ent->eax \|= HV_ACCESS_REENLIGHTENMENT;
	2034	+
	2035	+ ent->ebx \|= HV_POST_MESSAGES;
	2036	+ ent->ebx \|= HV_SIGNAL_EVENTS;
	2037	+
	2038	+ ent->edx \|= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
	2039	+ ent->edx \|= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
	2040	+
	2041	+ ent->ebx \|= HV_DEBUGGING;
	2042	+ ent->edx \|= HV_X64_GUEST_DEBUGGING_AVAILABLE;
	2043	+ ent->edx \|= HV_FEATURE_DEBUG_MSRS_AVAILABLE;
	2044	+
	2045	+ /*
	2046	+ * Direct Synthetic timers only make sense with in-kernel
	2047	+ * LAPIC
	2048	+ */
	2049	+ if (lapic_in_kernel(vcpu))
	2050	+ ent->edx \|= HV_STIMER_DIRECT_MODE_AVAILABLE;
	2051	+
	2052	+ break;
	2053	+
	2054	+ case HYPERV_CPUID_ENLIGHTMENT_INFO:
	2055	+ ent->eax \|= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
	2056	+ ent->eax \|= HV_X64_APIC_ACCESS_RECOMMENDED;
	2057	+ ent->eax \|= HV_X64_RELAXED_TIMING_RECOMMENDED;
	2058	+ ent->eax \|= HV_X64_CLUSTER_IPI_RECOMMENDED;
	2059	+ ent->eax \|= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
	2060	+ if (evmcs_ver)
	2061	+ ent->eax \|= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
	2062	+ if (!cpu_smt_possible())
	2063	+ ent->eax \|= HV_X64_NO_NONARCH_CORESHARING;
	2064	+ /*
	2065	+ * Default number of spinlock retry attempts, matches
	2066	+ * HyperV 2016.
	2067	+ */
	2068	+ ent->ebx = 0x00000FFF;
	2069	+
	2070	+ break;
	2071	+
	2072	+ case HYPERV_CPUID_IMPLEMENT_LIMITS:
	2073	+ /* Maximum number of virtual processors */
	2074	+ ent->eax = KVM_MAX_VCPUS;
	2075	+ /*
	2076	+ * Maximum number of logical processors, matches
	2077	+ * HyperV 2016.
	2078	+ */
	2079	+ ent->ebx = 64;
	2080	+
	2081	+ break;
	2082	+
	2083	+ case HYPERV_CPUID_NESTED_FEATURES:
	2084	+ ent->eax = evmcs_ver;
	2085	+
	2086	+ break;
	2087	+
	2088	+ case HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS:
	2089	+ memcpy(signature, "Linux KVM Hv", 12);
	2090	+
	2091	+ ent->eax = 0;
	2092	+ ent->ebx = signature[0];
	2093	+ ent->ecx = signature[1];
	2094	+ ent->edx = signature[2];
	2095	+ break;
	2096	+
	2097	+ case HYPERV_CPUID_SYNDBG_INTERFACE:
	2098	+ memcpy(signature, "VS#1\0\0\0\0\0\0\0\0", 12);
	2099	+ ent->eax = signature[0];
	2100	+ break;
	2101	+
	2102	+ case HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES:
	2103	+ ent->eax \|= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
	2104	+ break;
	2105	+
	2106	+ default:
	2107	+ break;
	2108	+ }
	2109	+ }
	2110	+
	2111	+ if (copy_to_user(entries, cpuid_entries,
	2112	+ nent * sizeof(struct kvm_cpuid_entry2)))
	2113	+ return -EFAULT;
	2114	+
	2115	+ return 0;
	2116	+}