hc
2024-11-01 2f529f9b558ca1c1bd74be7437a84e4711743404
kernel/arch/x86/kvm/x86.c
....@@ -178,6 +178,7 @@
178178 struct kvm_user_return_msrs {
179179 struct user_return_notifier urn;
180180 bool registered;
181
+ bool dirty;
181182 struct kvm_user_return_msr_values {
182183 u64 host;
183184 u64 curr;
....@@ -295,12 +296,29 @@
295296 vcpu->arch.apf.gfns[i] = ~0;
296297 }
297298
299
+static void __kvm_on_user_return(struct kvm_user_return_msrs *msrs)
300
+{
301
+ struct kvm_user_return_msr_values *values;
302
+ unsigned slot;
303
+
304
+ if (!msrs->dirty)
305
+ return;
306
+
307
+ for (slot = 0; slot < user_return_msrs_global.nr; ++slot) {
308
+ values = &msrs->values[slot];
309
+ if (values->host != values->curr) {
310
+ wrmsrl(user_return_msrs_global.msrs[slot], values->host);
311
+ values->curr = values->host;
312
+ }
313
+ }
314
+
315
+ msrs->dirty = false;
316
+}
317
+
298318 static void kvm_on_user_return(struct user_return_notifier *urn)
299319 {
300
- unsigned slot;
301320 struct kvm_user_return_msrs *msrs
302321 = container_of(urn, struct kvm_user_return_msrs, urn);
303
- struct kvm_user_return_msr_values *values;
304322 unsigned long flags;
305323
306324 /*
....@@ -313,13 +331,10 @@
313331 user_return_notifier_unregister(urn);
314332 }
315333 local_irq_restore(flags);
316
- for (slot = 0; slot < user_return_msrs_global.nr; ++slot) {
317
- values = &msrs->values[slot];
318
- if (values->host != values->curr) {
319
- wrmsrl(user_return_msrs_global.msrs[slot], values->host);
320
- values->curr = values->host;
321
- }
322
- }
334
+ flags = hard_cond_local_irq_save();
335
+ __kvm_on_user_return(msrs);
336
+ hard_cond_local_irq_restore(flags);
337
+ inband_exit_guest();
323338 }
324339
325340 int kvm_probe_user_return_msr(u32 msr)
....@@ -374,6 +389,7 @@
374389 if (err)
375390 return 1;
376391
392
+ msrs->dirty = true;
377393 msrs->values[slot].curr = value;
378394 if (!msrs->registered) {
379395 msrs->urn.on_user_return = kvm_on_user_return;
....@@ -4072,10 +4088,22 @@
40724088
40734089 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
40744090 {
4091
+ struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs);
4092
+ unsigned long flags;
40754093 int idx;
40764094
40774095 if (vcpu->preempted)
40784096 vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu);
4097
+
4098
+ flags = hard_cond_local_irq_save();
4099
+ /*
4100
+ * Skip steal time accounting from the out-of-band stage since
4101
+ * this is oob-unsafe. We leave it to the next call from the
4102
+ * inband stage.
4103
+ */
4104
+ if (running_oob())
4105
+ goto skip_steal_time_update;
4106
+
40794107
40804108 /*
40814109 * Disable page faults because we're in atomic context here.
....@@ -4094,6 +4122,7 @@
40944122 kvm_steal_time_set_preempted(vcpu);
40954123 srcu_read_unlock(&vcpu->kvm->srcu, idx);
40964124 pagefault_enable();
4125
+skip_steal_time_update:
40974126 kvm_x86_ops.vcpu_put(vcpu);
40984127 vcpu->arch.last_host_tsc = rdtsc();
40994128 /*
....@@ -4102,7 +4131,40 @@
41024131 * guest. do_debug expects dr6 to be cleared after it runs, do the same.
41034132 */
41044133 set_debugreg(0, 6);
4134
+
4135
+ inband_set_vcpu_release_state(vcpu, false);
4136
+ if (!msrs->dirty)
4137
+ inband_exit_guest();
4138
+
4139
+ hard_cond_local_irq_restore(flags);
41054140 }
4141
+
4142
+#ifdef CONFIG_DOVETAIL
4143
+/* hard irqs off. */
4144
+void kvm_handle_oob_switch(struct kvm_oob_notifier *nfy)
4145
+{
4146
+ struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs);
4147
+ struct kvm_vcpu *vcpu;
4148
+
4149
+ vcpu = container_of(nfy, struct kvm_vcpu, oob_notifier);
4150
+ /*
4151
+ * If user_return MSRs were still active when leaving
4152
+ * kvm_arch_vcpu_put(), inband_exit_guest() was not invoked,
4153
+ * so we might get called later on before kvm_on_user_return()
4154
+ * had a chance to run, if a switch to out-of-band scheduling
4155
+ * sneaks in in the meantime. Prevent kvm_arch_vcpu_put()
4156
+ * from running twice in such a case by checking ->put_vcpu
4157
+ * from the notifier block.
4158
+ */
4159
+ if (nfy->put_vcpu)
4160
+ kvm_arch_vcpu_put(vcpu);
4161
+
4162
+ __kvm_on_user_return(msrs);
4163
+ inband_exit_guest();
4164
+}
4165
+#else
4166
+#define kvm_handle_oob_switch NULL
4167
+#endif
41064168
41074169 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
41084170 struct kvm_lapic_state *s)
....@@ -9142,6 +9204,10 @@
91429204 }
91439205
91449206 preempt_disable();
9207
+ local_irq_disable_full();
9208
+
9209
+ inband_enter_guest(vcpu);
9210
+ inband_set_vcpu_release_state(vcpu, true);
91459211
91469212 kvm_x86_ops.prepare_guest_switch(vcpu);
91479213
....@@ -9150,7 +9216,6 @@
91509216 * IPI are then delayed after guest entry, which ensures that they
91519217 * result in virtual interrupt delivery.
91529218 */
9153
- local_irq_disable();
91549219 vcpu->mode = IN_GUEST_MODE;
91559220
91569221 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
....@@ -9179,7 +9244,7 @@
91799244 if (kvm_vcpu_exit_request(vcpu)) {
91809245 vcpu->mode = OUTSIDE_GUEST_MODE;
91819246 smp_wmb();
9182
- local_irq_enable();
9247
+ local_irq_enable_full();
91839248 preempt_enable();
91849249 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
91859250 r = 1;
....@@ -9251,9 +9316,9 @@
92519316 * stat.exits increment will do nicely.
92529317 */
92539318 kvm_before_interrupt(vcpu);
9254
- local_irq_enable();
9319
+ local_irq_enable_full();
92559320 ++vcpu->stat.exits;
9256
- local_irq_disable();
9321
+ local_irq_disable_full();
92579322 kvm_after_interrupt(vcpu);
92589323
92599324 /*
....@@ -9273,7 +9338,7 @@
92739338 }
92749339 }
92759340
9276
- local_irq_enable();
9341
+ local_irq_enable_full();
92779342 preempt_enable();
92789343
92799344 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
....@@ -9487,7 +9552,9 @@
94879552 /* Swap (qemu) user FPU context for the guest FPU context. */
94889553 static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
94899554 {
9490
- fpregs_lock();
9555
+ unsigned long flags;
9556
+
9557
+ flags = fpregs_lock();
94919558
94929559 kvm_save_current_fpu(vcpu->arch.user_fpu);
94939560
....@@ -9496,7 +9563,7 @@
94969563 ~XFEATURE_MASK_PKRU);
94979564
94989565 fpregs_mark_activate();
9499
- fpregs_unlock();
9566
+ fpregs_unlock(flags);
95009567
95019568 trace_kvm_fpu(1);
95029569 }
....@@ -9504,14 +9571,16 @@
95049571 /* When vcpu_run ends, restore user space FPU context. */
95059572 static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
95069573 {
9507
- fpregs_lock();
9574
+ unsigned long flags;
9575
+
9576
+ flags = fpregs_lock();
95089577
95099578 kvm_save_current_fpu(vcpu->arch.guest_fpu);
95109579
95119580 copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);
95129581
95139582 fpregs_mark_activate();
9514
- fpregs_unlock();
9583
+ fpregs_unlock(flags);
95159584
95169585 ++vcpu->stat.fpu_reload;
95179586 trace_kvm_fpu(0);
....@@ -10189,6 +10258,7 @@
1018910258 if (r)
1019010259 goto free_guest_fpu;
1019110260
10261
+ inband_init_vcpu(vcpu, kvm_handle_oob_switch);
1019210262 vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
1019310263 vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
1019410264 kvm_vcpu_mtrr_init(vcpu);