hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/arch/x86/kvm/lapic.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12
23 /*
34 * Local APIC virtualization
....@@ -13,9 +14,6 @@
1314 * Yaozu (Eddie) Dong <eddie.dong@intel.com>
1415 *
1516 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
16
- *
17
- * This work is licensed under the terms of the GNU GPL, version 2. See
18
- * the COPYING file in the top-level directory.
1917 */
2018
2119 #include <linux/kvm_host.h>
....@@ -38,6 +36,7 @@
3836 #include <linux/jump_label.h>
3937 #include "kvm_cache_regs.h"
4038 #include "irq.h"
39
+#include "ioapic.h"
4140 #include "trace.h"
4241 #include "x86.h"
4342 #include "cpuid.h"
....@@ -54,21 +53,20 @@
5453 #define PRIu64 "u"
5554 #define PRIo64 "o"
5655
57
-/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
58
-#define apic_debug(fmt, arg...) do {} while (0)
59
-
6056 /* 14 is the version for Xeon and Pentium 8.4.8*/
6157 #define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
6258 #define LAPIC_MMIO_LENGTH (1 << 12)
6359 /* followed define is not in apicdef.h */
64
-#define APIC_SHORT_MASK 0xc0000
65
-#define APIC_DEST_NOSHORT 0x0
66
-#define APIC_DEST_MASK 0x800
6760 #define MAX_APIC_VECTOR 256
6861 #define APIC_VECTORS_PER_REG 32
6962
70
-#define APIC_BROADCAST 0xFF
71
-#define X2APIC_BROADCAST 0xFFFFFFFFul
63
+static bool lapic_timer_advance_dynamic __read_mostly;
64
+#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */
65
+#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */
66
+#define LAPIC_TIMER_ADVANCE_NS_INIT 1000
67
+#define LAPIC_TIMER_ADVANCE_NS_MAX 5000
68
+/* step-by-step approximation to mitigate fluctuation */
69
+#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
7270
7371 static inline int apic_test_vector(int vec, void *bitmap)
7472 {
....@@ -81,11 +79,6 @@
8179
8280 return apic_test_vector(vector, apic->regs + APIC_ISR) ||
8381 apic_test_vector(vector, apic->regs + APIC_IRR);
84
-}
85
-
86
-static inline void apic_clear_vector(int vec, void *bitmap)
87
-{
88
- clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
8982 }
9083
9184 static inline int __apic_test_and_set_vector(int vec, void *bitmap)
....@@ -113,14 +106,28 @@
113106 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
114107 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
115108
116
-static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
117
-{
118
- return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
119
-}
120
-
121109 static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
122110 {
123111 return apic->vcpu->vcpu_id;
112
+}
113
+
114
+static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
115
+{
116
+ return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) &&
117
+ (kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
118
+}
119
+
120
+bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
121
+{
122
+ return kvm_x86_ops.set_hv_timer
123
+ && !(kvm_mwait_in_guest(vcpu->kvm) ||
124
+ kvm_can_post_timer_interrupt(vcpu));
125
+}
126
+EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer);
127
+
128
+static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
129
+{
130
+ return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
124131 }
125132
126133 static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
....@@ -163,21 +170,48 @@
163170 kvfree(map);
164171 }
165172
166
-static void recalculate_apic_map(struct kvm *kvm)
173
+/*
174
+ * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock.
175
+ *
176
+ * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with
177
+ * apic_map_lock_held.
178
+ */
179
+enum {
180
+ CLEAN,
181
+ UPDATE_IN_PROGRESS,
182
+ DIRTY
183
+};
184
+
185
+void kvm_recalculate_apic_map(struct kvm *kvm)
167186 {
168187 struct kvm_apic_map *new, *old = NULL;
169188 struct kvm_vcpu *vcpu;
170189 int i;
171190 u32 max_id = 255; /* enough space for any xAPIC ID */
172191
192
+ /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */
193
+ if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
194
+ return;
195
+
173196 mutex_lock(&kvm->arch.apic_map_lock);
197
+ /*
198
+ * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
199
+ * (if clean) or the APIC registers (if dirty).
200
+ */
201
+ if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
202
+ DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
203
+ /* Someone else has updated the map. */
204
+ mutex_unlock(&kvm->arch.apic_map_lock);
205
+ return;
206
+ }
174207
175208 kvm_for_each_vcpu(i, vcpu, kvm)
176209 if (kvm_apic_present(vcpu))
177210 max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
178211
179212 new = kvzalloc(sizeof(struct kvm_apic_map) +
180
- sizeof(struct kvm_lapic *) * ((u64)max_id + 1), GFP_KERNEL);
213
+ sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
214
+ GFP_KERNEL_ACCOUNT);
181215
182216 if (!new)
183217 goto out;
....@@ -234,6 +268,12 @@
234268 old = rcu_dereference_protected(kvm->arch.apic_map,
235269 lockdep_is_held(&kvm->arch.apic_map_lock));
236270 rcu_assign_pointer(kvm->arch.apic_map, new);
271
+ /*
272
+ * Write kvm->arch.apic_map before clearing apic->apic_map_dirty.
273
+ * If another update has come in, leave it DIRTY.
274
+ */
275
+ atomic_cmpxchg_release(&kvm->arch.apic_map_dirty,
276
+ UPDATE_IN_PROGRESS, CLEAN);
237277 mutex_unlock(&kvm->arch.apic_map_lock);
238278
239279 if (old)
....@@ -250,26 +290,35 @@
250290
251291 if (enabled != apic->sw_enabled) {
252292 apic->sw_enabled = enabled;
253
- if (enabled) {
293
+ if (enabled)
254294 static_key_slow_dec_deferred(&apic_sw_disabled);
255
- recalculate_apic_map(apic->vcpu->kvm);
256
- } else
295
+ else
257296 static_key_slow_inc(&apic_sw_disabled.key);
258297
259
- recalculate_apic_map(apic->vcpu->kvm);
298
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
260299 }
300
+
301
+ /* Check if there are APF page ready requests pending */
302
+ if (enabled)
303
+ kvm_make_request(KVM_REQ_APF_READY, apic->vcpu);
261304 }
262305
263306 static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
264307 {
265308 kvm_lapic_set_reg(apic, APIC_ID, id << 24);
266
- recalculate_apic_map(apic->vcpu->kvm);
309
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
267310 }
268311
269312 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
270313 {
271314 kvm_lapic_set_reg(apic, APIC_LDR, id);
272
- recalculate_apic_map(apic->vcpu->kvm);
315
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
316
+}
317
+
318
+static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val)
319
+{
320
+ kvm_lapic_set_reg(apic, APIC_DFR, val);
321
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
273322 }
274323
275324 static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
....@@ -285,17 +334,12 @@
285334
286335 kvm_lapic_set_reg(apic, APIC_ID, id);
287336 kvm_lapic_set_reg(apic, APIC_LDR, ldr);
288
- recalculate_apic_map(apic->vcpu->kvm);
337
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
289338 }
290339
291340 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
292341 {
293342 return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
294
-}
295
-
296
-static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
297
-{
298
- return kvm_lapic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
299343 }
300344
301345 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
....@@ -321,7 +365,6 @@
321365 void kvm_apic_set_version(struct kvm_vcpu *vcpu)
322366 {
323367 struct kvm_lapic *apic = vcpu->arch.apic;
324
- struct kvm_cpuid_entry2 *feat;
325368 u32 v = APIC_VERSION;
326369
327370 if (!lapic_in_kernel(vcpu))
....@@ -334,8 +377,7 @@
334377 * version first and level-triggered interrupts never get EOIed in
335378 * IOAPIC.
336379 */
337
- feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
338
- if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) &&
380
+ if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) &&
339381 !ioapic_in_kernel(vcpu->kvm))
340382 v |= APIC_LVR_DIRECTED_EOI;
341383 kvm_lapic_set_reg(apic, APIC_LVR, v);
....@@ -446,16 +488,22 @@
446488
447489 if (unlikely(vcpu->arch.apicv_active)) {
448490 /* need to update RVI */
449
- apic_clear_vector(vec, apic->regs + APIC_IRR);
450
- kvm_x86_ops->hwapic_irr_update(vcpu,
491
+ kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
492
+ kvm_x86_ops.hwapic_irr_update(vcpu,
451493 apic_find_highest_irr(apic));
452494 } else {
453495 apic->irr_pending = false;
454
- apic_clear_vector(vec, apic->regs + APIC_IRR);
496
+ kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
455497 if (apic_search_irr(apic) != -1)
456498 apic->irr_pending = true;
457499 }
458500 }
501
+
502
+void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec)
503
+{
504
+ apic_clear_irr(vec, vcpu->arch.apic);
505
+}
506
+EXPORT_SYMBOL_GPL(kvm_apic_clear_irr);
459507
460508 static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
461509 {
....@@ -472,7 +520,7 @@
472520 * just set SVI.
473521 */
474522 if (unlikely(vcpu->arch.apicv_active))
475
- kvm_x86_ops->hwapic_isr_update(vcpu, vec);
523
+ kvm_x86_ops.hwapic_isr_update(vcpu, vec);
476524 else {
477525 ++apic->isr_count;
478526 BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
....@@ -520,7 +568,7 @@
520568 * and must be left alone.
521569 */
522570 if (unlikely(vcpu->arch.apicv_active))
523
- kvm_x86_ops->hwapic_isr_update(vcpu,
571
+ kvm_x86_ops.hwapic_isr_update(vcpu,
524572 apic_find_highest_isr(apic));
525573 else {
526574 --apic->isr_count;
....@@ -553,60 +601,53 @@
553601 irq->level, irq->trig_mode, dest_map);
554602 }
555603
604
+static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
605
+ struct kvm_lapic_irq *irq, u32 min)
606
+{
607
+ int i, count = 0;
608
+ struct kvm_vcpu *vcpu;
609
+
610
+ if (min > map->max_apic_id)
611
+ return 0;
612
+
613
+ for_each_set_bit(i, ipi_bitmap,
614
+ min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
615
+ if (map->phys_map[min + i]) {
616
+ vcpu = map->phys_map[min + i]->vcpu;
617
+ count += kvm_apic_set_irq(vcpu, irq, NULL);
618
+ }
619
+ }
620
+
621
+ return count;
622
+}
623
+
556624 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
557625 unsigned long ipi_bitmap_high, u32 min,
558626 unsigned long icr, int op_64_bit)
559627 {
560
- int i;
561628 struct kvm_apic_map *map;
562
- struct kvm_vcpu *vcpu;
563629 struct kvm_lapic_irq irq = {0};
564630 int cluster_size = op_64_bit ? 64 : 32;
565
- int count = 0;
631
+ int count;
632
+
633
+ if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
634
+ return -KVM_EINVAL;
566635
567636 irq.vector = icr & APIC_VECTOR_MASK;
568637 irq.delivery_mode = icr & APIC_MODE_MASK;
569638 irq.level = (icr & APIC_INT_ASSERT) != 0;
570639 irq.trig_mode = icr & APIC_INT_LEVELTRIG;
571640
572
- if (icr & APIC_DEST_MASK)
573
- return -KVM_EINVAL;
574
- if (icr & APIC_SHORT_MASK)
575
- return -KVM_EINVAL;
576
-
577641 rcu_read_lock();
578642 map = rcu_dereference(kvm->arch.apic_map);
579643
580
- if (unlikely(!map)) {
581
- count = -EOPNOTSUPP;
582
- goto out;
644
+ count = -EOPNOTSUPP;
645
+ if (likely(map)) {
646
+ count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
647
+ min += cluster_size;
648
+ count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
583649 }
584650
585
- if (min > map->max_apic_id)
586
- goto out;
587
- /* Bits above cluster_size are masked in the caller. */
588
- for_each_set_bit(i, &ipi_bitmap_low,
589
- min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
590
- if (map->phys_map[min + i]) {
591
- vcpu = map->phys_map[min + i]->vcpu;
592
- count += kvm_apic_set_irq(vcpu, &irq, NULL);
593
- }
594
- }
595
-
596
- min += cluster_size;
597
-
598
- if (min > map->max_apic_id)
599
- goto out;
600
-
601
- for_each_set_bit(i, &ipi_bitmap_high,
602
- min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
603
- if (map->phys_map[min + i]) {
604
- vcpu = map->phys_map[min + i]->vcpu;
605
- count += kvm_apic_set_irq(vcpu, &irq, NULL);
606
- }
607
- }
608
-
609
-out:
610651 rcu_read_unlock();
611652 return count;
612653 }
....@@ -634,7 +675,7 @@
634675 {
635676 u8 val;
636677 if (pv_eoi_get_user(vcpu, &val) < 0) {
637
- apic_debug("Can't read EOI MSR value: 0x%llx\n",
678
+ printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
638679 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
639680 return false;
640681 }
....@@ -644,7 +685,7 @@
644685 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
645686 {
646687 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
647
- apic_debug("Can't set EOI MSR value: 0x%llx\n",
688
+ printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
648689 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
649690 return;
650691 }
....@@ -654,7 +695,7 @@
654695 static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
655696 {
656697 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
657
- apic_debug("Can't clear EOI MSR value: 0x%llx\n",
698
+ printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
658699 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
659700 return;
660701 }
....@@ -665,7 +706,7 @@
665706 {
666707 int highest_irr;
667708 if (apic->vcpu->arch.apicv_active)
668
- highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
709
+ highest_irr = kvm_x86_ops.sync_pir_to_irr(apic->vcpu);
669710 else
670711 highest_irr = apic_find_highest_irr(apic);
671712 if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
....@@ -687,9 +728,6 @@
687728 ppr = tpr & 0xff;
688729 else
689730 ppr = isrv & 0xf0;
690
-
691
- apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
692
- apic, ppr, isr, isrv);
693731
694732 *new_ppr = ppr;
695733 if (old_ppr != ppr)
....@@ -767,8 +805,6 @@
767805 return ((logical_id >> 4) == (mda >> 4))
768806 && (logical_id & mda & 0xf) != 0;
769807 default:
770
- apic_debug("Bad DFR vcpu %d: %08x\n",
771
- apic->vcpu->vcpu_id, kvm_lapic_get_reg(apic, APIC_DFR));
772808 return false;
773809 }
774810 }
....@@ -802,17 +838,13 @@
802838 }
803839
804840 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
805
- int short_hand, unsigned int dest, int dest_mode)
841
+ int shorthand, unsigned int dest, int dest_mode)
806842 {
807843 struct kvm_lapic *target = vcpu->arch.apic;
808844 u32 mda = kvm_apic_mda(vcpu, dest, source, target);
809845
810
- apic_debug("target %p, source %p, dest 0x%x, "
811
- "dest_mode 0x%x, short_hand 0x%x\n",
812
- target, source, dest, dest_mode, short_hand);
813
-
814846 ASSERT(target);
815
- switch (short_hand) {
847
+ switch (shorthand) {
816848 case APIC_DEST_NOSHORT:
817849 if (dest_mode == APIC_DEST_PHYSICAL)
818850 return kvm_apic_match_physical_addr(target, mda);
....@@ -825,8 +857,6 @@
825857 case APIC_DEST_ALLBUT:
826858 return target != source;
827859 default:
828
- apic_debug("kvm: apic: Bad dest shorthand value %x\n",
829
- short_hand);
830860 return false;
831861 }
832862 }
....@@ -961,6 +991,10 @@
961991 *r = -1;
962992
963993 if (irq->shorthand == APIC_DEST_SELF) {
994
+ if (KVM_BUG_ON(!src, kvm)) {
995
+ *r = 0;
996
+ return true;
997
+ }
964998 *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
965999 return true;
9661000 }
....@@ -969,26 +1003,26 @@
9691003 map = rcu_dereference(kvm->arch.apic_map);
9701004
9711005 ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
972
- if (ret)
1006
+ if (ret) {
1007
+ *r = 0;
9731008 for_each_set_bit(i, &bitmap, 16) {
9741009 if (!dst[i])
9751010 continue;
976
- if (*r < 0)
977
- *r = 0;
9781011 *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
9791012 }
1013
+ }
9801014
9811015 rcu_read_unlock();
9821016 return ret;
9831017 }
9841018
9851019 /*
986
- * This routine tries to handler interrupts in posted mode, here is how
1020
+ * This routine tries to handle interrupts in posted mode, here is how
9871021 * it deals with different cases:
9881022 * - For single-destination interrupts, handle it in posted mode
9891023 * - Else if vector hashing is enabled and it is a lowest-priority
9901024 * interrupt, handle it in posted mode and use the following mechanism
991
- * to find the destinaiton vCPU.
1025
+ * to find the destination vCPU.
9921026 * 1. For lowest-priority interrupts, store all the possible
9931027 * destination vCPUs in an array.
9941028 * 2. Use "guest vector % max number of destination vCPUs" to find
....@@ -1040,6 +1074,7 @@
10401074 switch (delivery_mode) {
10411075 case APIC_DM_LOWEST:
10421076 vcpu->arch.apic_arb_prio++;
1077
+ fallthrough;
10431078 case APIC_DM_FIXED:
10441079 if (unlikely(trig_mode && !level))
10451080 break;
....@@ -1057,12 +1092,14 @@
10571092
10581093 if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
10591094 if (trig_mode)
1060
- kvm_lapic_set_vector(vector, apic->regs + APIC_TMR);
1095
+ kvm_lapic_set_vector(vector,
1096
+ apic->regs + APIC_TMR);
10611097 else
1062
- apic_clear_vector(vector, apic->regs + APIC_TMR);
1098
+ kvm_lapic_clear_vector(vector,
1099
+ apic->regs + APIC_TMR);
10631100 }
10641101
1065
- if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) {
1102
+ if (kvm_x86_ops.deliver_posted_interrupt(vcpu, vector)) {
10661103 kvm_lapic_set_irr(vector, apic);
10671104 kvm_make_request(KVM_REQ_EVENT, vcpu);
10681105 kvm_vcpu_kick(vcpu);
....@@ -1093,20 +1130,12 @@
10931130 result = 1;
10941131 /* assumes that there are only KVM_APIC_INIT/SIPI */
10951132 apic->pending_events = (1UL << KVM_APIC_INIT);
1096
- /* make sure pending_events is visible before sending
1097
- * the request */
1098
- smp_wmb();
10991133 kvm_make_request(KVM_REQ_EVENT, vcpu);
11001134 kvm_vcpu_kick(vcpu);
1101
- } else {
1102
- apic_debug("Ignoring de-assert INIT to vcpu %d\n",
1103
- vcpu->vcpu_id);
11041135 }
11051136 break;
11061137
11071138 case APIC_DM_STARTUP:
1108
- apic_debug("SIPI to vcpu %d vector 0x%02x\n",
1109
- vcpu->vcpu_id, vector);
11101139 result = 1;
11111140 apic->sipi_vector = vector;
11121141 /* make sure sipi_vector is visible for the receiver */
....@@ -1130,6 +1159,50 @@
11301159 break;
11311160 }
11321161 return result;
1162
+}
1163
+
1164
+/*
1165
+ * This routine identifies the destination vcpus mask meant to receive the
1166
+ * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
1167
+ * out the destination vcpus array and set the bitmap or it traverses to
1168
+ * each available vcpu to identify the same.
1169
+ */
1170
+void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
1171
+ unsigned long *vcpu_bitmap)
1172
+{
1173
+ struct kvm_lapic **dest_vcpu = NULL;
1174
+ struct kvm_lapic *src = NULL;
1175
+ struct kvm_apic_map *map;
1176
+ struct kvm_vcpu *vcpu;
1177
+ unsigned long bitmap;
1178
+ int i, vcpu_idx;
1179
+ bool ret;
1180
+
1181
+ rcu_read_lock();
1182
+ map = rcu_dereference(kvm->arch.apic_map);
1183
+
1184
+ ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
1185
+ &bitmap);
1186
+ if (ret) {
1187
+ for_each_set_bit(i, &bitmap, 16) {
1188
+ if (!dest_vcpu[i])
1189
+ continue;
1190
+ vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
1191
+ __set_bit(vcpu_idx, vcpu_bitmap);
1192
+ }
1193
+ } else {
1194
+ kvm_for_each_vcpu(i, vcpu, kvm) {
1195
+ if (!kvm_apic_present(vcpu))
1196
+ continue;
1197
+ if (!kvm_apic_match_dest(vcpu, NULL,
1198
+ irq->shorthand,
1199
+ irq->dest_id,
1200
+ irq->dest_mode))
1201
+ continue;
1202
+ __set_bit(i, vcpu_bitmap);
1203
+ }
1204
+ }
1205
+ rcu_read_unlock();
11331206 }
11341207
11351208 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
....@@ -1204,10 +1277,8 @@
12041277 }
12051278 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
12061279
1207
-static void apic_send_ipi(struct kvm_lapic *apic)
1280
+void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
12081281 {
1209
- u32 icr_low = kvm_lapic_get_reg(apic, APIC_ICR);
1210
- u32 icr_high = kvm_lapic_get_reg(apic, APIC_ICR2);
12111282 struct kvm_lapic_irq irq;
12121283
12131284 irq.vector = icr_low & APIC_VECTOR_MASK;
....@@ -1223,14 +1294,6 @@
12231294 irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
12241295
12251296 trace_kvm_apic_ipi(icr_low, irq.dest_id);
1226
-
1227
- apic_debug("icr_high 0x%x, icr_low 0x%x, "
1228
- "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
1229
- "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, "
1230
- "msi_redir_hint 0x%x\n",
1231
- icr_high, icr_low, irq.shorthand, irq.dest_id,
1232
- irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
1233
- irq.vector, irq.msi_redir_hint);
12341297
12351298 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
12361299 }
....@@ -1285,7 +1348,6 @@
12851348
12861349 switch (offset) {
12871350 case APIC_ARBPRI:
1288
- apic_debug("Access APIC ARBPRI register which is for P6\n");
12891351 break;
12901352
12911353 case APIC_TMCCT: /* Timer CCR */
....@@ -1300,7 +1362,7 @@
13001362 break;
13011363 case APIC_TASKPRI:
13021364 report_tpr_access(apic, false);
1303
- /* fall thru */
1365
+ fallthrough;
13041366 default:
13051367 val = kvm_lapic_get_reg(apic, offset);
13061368 break;
....@@ -1314,25 +1376,49 @@
13141376 return container_of(dev, struct kvm_lapic, dev);
13151377 }
13161378
1379
+#define APIC_REG_MASK(reg) (1ull << ((reg) >> 4))
1380
+#define APIC_REGS_MASK(first, count) \
1381
+ (APIC_REG_MASK(first) * ((1ull << (count)) - 1))
1382
+
13171383 int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
13181384 void *data)
13191385 {
13201386 unsigned char alignment = offset & 0xf;
13211387 u32 result;
13221388 /* this bitmask has a bit cleared for each reserved register */
1323
- static const u64 rmask = 0x43ff01ffffffe70cULL;
1389
+ u64 valid_reg_mask =
1390
+ APIC_REG_MASK(APIC_ID) |
1391
+ APIC_REG_MASK(APIC_LVR) |
1392
+ APIC_REG_MASK(APIC_TASKPRI) |
1393
+ APIC_REG_MASK(APIC_PROCPRI) |
1394
+ APIC_REG_MASK(APIC_LDR) |
1395
+ APIC_REG_MASK(APIC_DFR) |
1396
+ APIC_REG_MASK(APIC_SPIV) |
1397
+ APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
1398
+ APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
1399
+ APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
1400
+ APIC_REG_MASK(APIC_ESR) |
1401
+ APIC_REG_MASK(APIC_ICR) |
1402
+ APIC_REG_MASK(APIC_ICR2) |
1403
+ APIC_REG_MASK(APIC_LVTT) |
1404
+ APIC_REG_MASK(APIC_LVTTHMR) |
1405
+ APIC_REG_MASK(APIC_LVTPC) |
1406
+ APIC_REG_MASK(APIC_LVT0) |
1407
+ APIC_REG_MASK(APIC_LVT1) |
1408
+ APIC_REG_MASK(APIC_LVTERR) |
1409
+ APIC_REG_MASK(APIC_TMICT) |
1410
+ APIC_REG_MASK(APIC_TMCCT) |
1411
+ APIC_REG_MASK(APIC_TDCR);
13241412
1325
- if ((alignment + len) > 4) {
1326
- apic_debug("KVM_APIC_READ: alignment error %x %d\n",
1327
- offset, len);
1328
- return 1;
1329
- }
1413
+ /* ARBPRI is not valid on x2APIC */
1414
+ if (!apic_x2apic_mode(apic))
1415
+ valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
13301416
1331
- if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) {
1332
- apic_debug("KVM_APIC_READ: read reserved register %x\n",
1333
- offset);
1417
+ if (alignment + len > 4)
13341418 return 1;
1335
- }
1419
+
1420
+ if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
1421
+ return 1;
13361422
13371423 result = __apic_read(apic, offset & ~0xf);
13381424
....@@ -1390,9 +1476,6 @@
13901476 tmp1 = tdcr & 0xf;
13911477 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
13921478 apic->divide_count = 0x1 << (tmp2 & 0x7);
1393
-
1394
- apic_debug("timer divide count is 0x%x\n",
1395
- apic->divide_count);
13961479 }
13971480
13981481 static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
....@@ -1416,6 +1499,8 @@
14161499 }
14171500 }
14181501
1502
+static void cancel_hv_timer(struct kvm_lapic *apic);
1503
+
14191504 static void apic_update_lvtt(struct kvm_lapic *apic)
14201505 {
14211506 u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
....@@ -1425,6 +1510,10 @@
14251510 if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
14261511 APIC_LVT_TIMER_TSCDEADLINE)) {
14271512 hrtimer_cancel(&apic->lapic_timer.timer);
1513
+ preempt_disable();
1514
+ if (apic->lapic_timer.hv_timer_in_use)
1515
+ cancel_hv_timer(apic);
1516
+ preempt_enable();
14281517 kvm_lapic_set_reg(apic, APIC_TMICT, 0);
14291518 apic->lapic_timer.period = 0;
14301519 apic->lapic_timer.tscdeadline = 0;
....@@ -1432,29 +1521,6 @@
14321521 apic->lapic_timer.timer_mode = timer_mode;
14331522 limit_periodic_timer_frequency(apic);
14341523 }
1435
-}
1436
-
1437
-static void apic_timer_expired(struct kvm_lapic *apic)
1438
-{
1439
- struct kvm_vcpu *vcpu = apic->vcpu;
1440
- struct swait_queue_head *q = &vcpu->wq;
1441
- struct kvm_timer *ktimer = &apic->lapic_timer;
1442
-
1443
- if (atomic_read(&apic->lapic_timer.pending))
1444
- return;
1445
-
1446
- atomic_inc(&apic->lapic_timer.pending);
1447
- kvm_set_pending_timer(vcpu);
1448
-
1449
- /*
1450
- * For x86, the atomic_inc() is serialized, thus
1451
- * using swait_active() is safe.
1452
- */
1453
- if (swait_active(q))
1454
- swake_up_one(q);
1455
-
1456
- if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
1457
- ktimer->expired_tscdeadline = ktimer->tscdeadline;
14581524 }
14591525
14601526 /*
....@@ -1480,34 +1546,137 @@
14801546 return false;
14811547 }
14821548
1483
-void wait_lapic_expire(struct kvm_vcpu *vcpu)
1549
+static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
1550
+{
1551
+ u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
1552
+
1553
+ /*
1554
+ * If the guest TSC is running at a different ratio than the host, then
1555
+ * convert the delay to nanoseconds to achieve an accurate delay. Note
1556
+ * that __delay() uses delay_tsc whenever the hardware has TSC, thus
1557
+ * always for VMX enabled hardware.
1558
+ */
1559
+ if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
1560
+ __delay(min(guest_cycles,
1561
+ nsec_to_cycles(vcpu, timer_advance_ns)));
1562
+ } else {
1563
+ u64 delay_ns = guest_cycles * 1000000ULL;
1564
+ do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
1565
+ ndelay(min_t(u32, delay_ns, timer_advance_ns));
1566
+ }
1567
+}
1568
+
1569
+static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
1570
+ s64 advance_expire_delta)
1571
+{
1572
+ struct kvm_lapic *apic = vcpu->arch.apic;
1573
+ u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
1574
+ u64 ns;
1575
+
1576
+ /* Do not adjust for tiny fluctuations or large random spikes. */
1577
+ if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
1578
+ abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
1579
+ return;
1580
+
1581
+ /* too early */
1582
+ if (advance_expire_delta < 0) {
1583
+ ns = -advance_expire_delta * 1000000ULL;
1584
+ do_div(ns, vcpu->arch.virtual_tsc_khz);
1585
+ timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1586
+ } else {
1587
+ /* too late */
1588
+ ns = advance_expire_delta * 1000000ULL;
1589
+ do_div(ns, vcpu->arch.virtual_tsc_khz);
1590
+ timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1591
+ }
1592
+
1593
+ if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
1594
+ timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
1595
+ apic->lapic_timer.timer_advance_ns = timer_advance_ns;
1596
+}
1597
+
1598
+static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
14841599 {
14851600 struct kvm_lapic *apic = vcpu->arch.apic;
14861601 u64 guest_tsc, tsc_deadline;
14871602
1488
- if (!lapic_in_kernel(vcpu))
1489
- return;
1490
-
1491
- if (apic->lapic_timer.expired_tscdeadline == 0)
1492
- return;
1493
-
1494
- if (!lapic_timer_int_injected(vcpu))
1495
- return;
1496
-
14971603 tsc_deadline = apic->lapic_timer.expired_tscdeadline;
14981604 apic->lapic_timer.expired_tscdeadline = 0;
14991605 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1500
- trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
1606
+ apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
15011607
1502
- /* __delay is delay_tsc whenever the hardware has TSC, thus always. */
15031608 if (guest_tsc < tsc_deadline)
1504
- __delay(min(tsc_deadline - guest_tsc,
1505
- nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
1609
+ __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
1610
+
1611
+ if (lapic_timer_advance_dynamic)
1612
+ adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
1613
+}
1614
+
1615
+void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1616
+{
1617
+ if (lapic_in_kernel(vcpu) &&
1618
+ vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1619
+ vcpu->arch.apic->lapic_timer.timer_advance_ns &&
1620
+ lapic_timer_int_injected(vcpu))
1621
+ __kvm_wait_lapic_expire(vcpu);
1622
+}
1623
+EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
1624
+
1625
+static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
1626
+{
1627
+ struct kvm_timer *ktimer = &apic->lapic_timer;
1628
+
1629
+ kvm_apic_local_deliver(apic, APIC_LVTT);
1630
+ if (apic_lvtt_tscdeadline(apic)) {
1631
+ ktimer->tscdeadline = 0;
1632
+ } else if (apic_lvtt_oneshot(apic)) {
1633
+ ktimer->tscdeadline = 0;
1634
+ ktimer->target_expiration = 0;
1635
+ }
1636
+}
1637
+
1638
+static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
1639
+{
1640
+ struct kvm_vcpu *vcpu = apic->vcpu;
1641
+ struct kvm_timer *ktimer = &apic->lapic_timer;
1642
+
1643
+ if (atomic_read(&apic->lapic_timer.pending))
1644
+ return;
1645
+
1646
+ if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
1647
+ ktimer->expired_tscdeadline = ktimer->tscdeadline;
1648
+
1649
+ if (!from_timer_fn && vcpu->arch.apicv_active) {
1650
+ WARN_ON(kvm_get_running_vcpu() != vcpu);
1651
+ kvm_apic_inject_pending_timer_irqs(apic);
1652
+ return;
1653
+ }
1654
+
1655
+ if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
1656
+ /*
1657
+ * Ensure the guest's timer has truly expired before posting an
1658
+ * interrupt. Open code the relevant checks to avoid querying
1659
+ * lapic_timer_int_injected(), which will be false since the
1660
+ * interrupt isn't yet injected. Waiting until after injecting
1661
+ * is not an option since that won't help a posted interrupt.
1662
+ */
1663
+ if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1664
+ vcpu->arch.apic->lapic_timer.timer_advance_ns)
1665
+ __kvm_wait_lapic_expire(vcpu);
1666
+ kvm_apic_inject_pending_timer_irqs(apic);
1667
+ return;
1668
+ }
1669
+
1670
+ atomic_inc(&apic->lapic_timer.pending);
1671
+ kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
1672
+ if (from_timer_fn)
1673
+ kvm_vcpu_kick(vcpu);
15061674 }
15071675
15081676 static void start_sw_tscdeadline(struct kvm_lapic *apic)
15091677 {
1510
- u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
1678
+ struct kvm_timer *ktimer = &apic->lapic_timer;
1679
+ u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
15111680 u64 ns = 0;
15121681 ktime_t expire;
15131682 struct kvm_vcpu *vcpu = apic->vcpu;
....@@ -1522,17 +1691,24 @@
15221691
15231692 now = ktime_get();
15241693 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1525
- if (likely(tscdeadline > guest_tsc)) {
1526
- ns = (tscdeadline - guest_tsc) * 1000000ULL;
1527
- do_div(ns, this_tsc_khz);
1694
+
1695
+ ns = (tscdeadline - guest_tsc) * 1000000ULL;
1696
+ do_div(ns, this_tsc_khz);
1697
+
1698
+ if (likely(tscdeadline > guest_tsc) &&
1699
+ likely(ns > apic->lapic_timer.timer_advance_ns)) {
15281700 expire = ktime_add_ns(now, ns);
1529
- expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
1530
- hrtimer_start(&apic->lapic_timer.timer,
1531
- expire, HRTIMER_MODE_ABS_PINNED);
1701
+ expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
1702
+ hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
15321703 } else
1533
- apic_timer_expired(apic);
1704
+ apic_timer_expired(apic, false);
15341705
15351706 local_irq_restore(flags);
1707
+}
1708
+
1709
+static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
1710
+{
1711
+ return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count;
15361712 }
15371713
15381714 static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
....@@ -1540,8 +1716,8 @@
15401716 ktime_t now, remaining;
15411717 u64 ns_remaining_old, ns_remaining_new;
15421718
1543
- apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
1544
- * APIC_BUS_CYCLE_NS * apic->divide_count;
1719
+ apic->lapic_timer.period =
1720
+ tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
15451721 limit_periodic_timer_frequency(apic);
15461722
15471723 now = ktime_get();
....@@ -1559,14 +1735,15 @@
15591735 apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
15601736 }
15611737
1562
-static bool set_target_expiration(struct kvm_lapic *apic)
1738
+static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg)
15631739 {
15641740 ktime_t now;
15651741 u64 tscl = rdtsc();
1742
+ s64 deadline;
15661743
15671744 now = ktime_get();
1568
- apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
1569
- * APIC_BUS_CYCLE_NS * apic->divide_count;
1745
+ apic->lapic_timer.period =
1746
+ tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
15701747
15711748 if (!apic->lapic_timer.period) {
15721749 apic->lapic_timer.tscdeadline = 0;
....@@ -1574,20 +1751,32 @@
15741751 }
15751752
15761753 limit_periodic_timer_frequency(apic);
1754
+ deadline = apic->lapic_timer.period;
15771755
1578
- apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
1579
- PRIx64 ", "
1580
- "timer initial count 0x%x, period %lldns, "
1581
- "expire @ 0x%016" PRIx64 ".\n", __func__,
1582
- APIC_BUS_CYCLE_NS, ktime_to_ns(now),
1583
- kvm_lapic_get_reg(apic, APIC_TMICT),
1584
- apic->lapic_timer.period,
1585
- ktime_to_ns(ktime_add_ns(now,
1586
- apic->lapic_timer.period)));
1756
+ if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
1757
+ if (unlikely(count_reg != APIC_TMICT)) {
1758
+ deadline = tmict_to_ns(apic,
1759
+ kvm_lapic_get_reg(apic, count_reg));
1760
+ if (unlikely(deadline <= 0))
1761
+ deadline = apic->lapic_timer.period;
1762
+ else if (unlikely(deadline > apic->lapic_timer.period)) {
1763
+ pr_info_ratelimited(
1764
+ "kvm: vcpu %i: requested lapic timer restore with "
1765
+ "starting count register %#x=%u (%lld ns) > initial count (%lld ns). "
1766
+ "Using initial count to start timer.\n",
1767
+ apic->vcpu->vcpu_id,
1768
+ count_reg,
1769
+ kvm_lapic_get_reg(apic, count_reg),
1770
+ deadline, apic->lapic_timer.period);
1771
+ kvm_lapic_set_reg(apic, count_reg, 0);
1772
+ deadline = apic->lapic_timer.period;
1773
+ }
1774
+ }
1775
+ }
15871776
15881777 apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1589
- nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
1590
- apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
1778
+ nsec_to_cycles(apic->vcpu, deadline);
1779
+ apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline);
15911780
15921781 return true;
15931782 }
....@@ -1620,7 +1809,7 @@
16201809
16211810 if (ktime_after(ktime_get(),
16221811 apic->lapic_timer.target_expiration)) {
1623
- apic_timer_expired(apic);
1812
+ apic_timer_expired(apic, false);
16241813
16251814 if (apic_lvtt_oneshot(apic))
16261815 return;
....@@ -1630,7 +1819,7 @@
16301819
16311820 hrtimer_start(&apic->lapic_timer.timer,
16321821 apic->lapic_timer.target_expiration,
1633
- HRTIMER_MODE_ABS_PINNED);
1822
+ HRTIMER_MODE_ABS_HARD);
16341823 }
16351824
16361825 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
....@@ -1646,44 +1835,49 @@
16461835 {
16471836 WARN_ON(preemptible());
16481837 WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1649
- kvm_x86_ops->cancel_hv_timer(apic->vcpu);
1838
+ kvm_x86_ops.cancel_hv_timer(apic->vcpu);
16501839 apic->lapic_timer.hv_timer_in_use = false;
16511840 }
16521841
16531842 static bool start_hv_timer(struct kvm_lapic *apic)
16541843 {
16551844 struct kvm_timer *ktimer = &apic->lapic_timer;
1656
- int r;
1845
+ struct kvm_vcpu *vcpu = apic->vcpu;
1846
+ bool expired;
16571847
16581848 WARN_ON(preemptible());
1659
- if (!kvm_x86_ops->set_hv_timer)
1660
- return false;
1661
-
1662
- if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
1849
+ if (!kvm_can_use_hv_timer(vcpu))
16631850 return false;
16641851
16651852 if (!ktimer->tscdeadline)
16661853 return false;
16671854
1668
- r = kvm_x86_ops->set_hv_timer(apic->vcpu, ktimer->tscdeadline);
1669
- if (r < 0)
1855
+ if (kvm_x86_ops.set_hv_timer(vcpu, ktimer->tscdeadline, &expired))
16701856 return false;
16711857
16721858 ktimer->hv_timer_in_use = true;
16731859 hrtimer_cancel(&ktimer->timer);
16741860
16751861 /*
1676
- * Also recheck ktimer->pending, in case the sw timer triggered in
1677
- * the window. For periodic timer, leave the hv timer running for
1678
- * simplicity, and the deadline will be recomputed on the next vmexit.
1862
+ * To simplify handling the periodic timer, leave the hv timer running
1863
+ * even if the deadline timer has expired, i.e. rely on the resulting
1864
+ * VM-Exit to recompute the periodic timer's target expiration.
16791865 */
1680
- if (!apic_lvtt_period(apic) && (r || atomic_read(&ktimer->pending))) {
1681
- if (r)
1682
- apic_timer_expired(apic);
1683
- return false;
1866
+ if (!apic_lvtt_period(apic)) {
1867
+ /*
1868
+ * Cancel the hv timer if the sw timer fired while the hv timer
1869
+ * was being programmed, or if the hv timer itself expired.
1870
+ */
1871
+ if (atomic_read(&ktimer->pending)) {
1872
+ cancel_hv_timer(apic);
1873
+ } else if (expired) {
1874
+ apic_timer_expired(apic, false);
1875
+ cancel_hv_timer(apic);
1876
+ }
16841877 }
16851878
1686
- trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, true);
1879
+ trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
1880
+
16871881 return true;
16881882 }
16891883
....@@ -1707,8 +1901,13 @@
17071901 static void restart_apic_timer(struct kvm_lapic *apic)
17081902 {
17091903 preempt_disable();
1904
+
1905
+ if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
1906
+ goto out;
1907
+
17101908 if (!start_hv_timer(apic))
17111909 start_sw_timer(apic);
1910
+out:
17121911 preempt_enable();
17131912 }
17141913
....@@ -1720,9 +1919,9 @@
17201919 /* If the preempt notifier has already run, it also called apic_timer_expired */
17211920 if (!apic->lapic_timer.hv_timer_in_use)
17221921 goto out;
1723
- WARN_ON(swait_active(&vcpu->wq));
1922
+ WARN_ON(rcuwait_active(&vcpu->wait));
1923
+ apic_timer_expired(apic, false);
17241924 cancel_hv_timer(apic);
1725
- apic_timer_expired(apic);
17261925
17271926 if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
17281927 advance_periodic_target_expiration(apic);
....@@ -1759,15 +1958,20 @@
17591958 restart_apic_timer(apic);
17601959 }
17611960
1762
-static void start_apic_timer(struct kvm_lapic *apic)
1961
+static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg)
17631962 {
17641963 atomic_set(&apic->lapic_timer.pending, 0);
17651964
17661965 if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1767
- && !set_target_expiration(apic))
1966
+ && !set_target_expiration(apic, count_reg))
17681967 return;
17691968
17701969 restart_apic_timer(apic);
1970
+}
1971
+
1972
+static void start_apic_timer(struct kvm_lapic *apic)
1973
+{
1974
+ __start_apic_timer(apic, APIC_TMICT);
17711975 }
17721976
17731977 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
....@@ -1777,8 +1981,6 @@
17771981 if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
17781982 apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
17791983 if (lvt0_in_nmi_mode) {
1780
- apic_debug("Receive NMI setting on APIC_LVT0 "
1781
- "for cpu %d\n", apic->vcpu->vcpu_id);
17821984 atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
17831985 } else
17841986 atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
....@@ -1816,10 +2018,9 @@
18162018 break;
18172019
18182020 case APIC_DFR:
1819
- if (!apic_x2apic_mode(apic)) {
1820
- kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
1821
- recalculate_apic_map(apic->vcpu->kvm);
1822
- } else
2021
+ if (!apic_x2apic_mode(apic))
2022
+ kvm_apic_set_dfr(apic, val | 0x0FFFFFFF);
2023
+ else
18232024 ret = 1;
18242025 break;
18252026
....@@ -1846,8 +2047,9 @@
18462047 }
18472048 case APIC_ICR:
18482049 /* No delay here, so we always clear the pending bit */
1849
- kvm_lapic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
1850
- apic_send_ipi(apic);
2050
+ val &= ~(1 << 12);
2051
+ kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
2052
+ kvm_lapic_set_reg(apic, APIC_ICR, val);
18512053 break;
18522054
18532055 case APIC_ICR2:
....@@ -1858,6 +2060,7 @@
18582060
18592061 case APIC_LVT0:
18602062 apic_manage_nmi_watchdog(apic, val);
2063
+ fallthrough;
18612064 case APIC_LVTTHMR:
18622065 case APIC_LVTPC:
18632066 case APIC_LVT1:
....@@ -1896,9 +2099,7 @@
18962099 case APIC_TDCR: {
18972100 uint32_t old_divisor = apic->divide_count;
18982101
1899
- if (val & 4)
1900
- apic_debug("KVM_WRITE:TDCR %x\n", val);
1901
- kvm_lapic_set_reg(apic, APIC_TDCR, val);
2102
+ kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb);
19022103 update_divide_count(apic);
19032104 if (apic->divide_count != old_divisor &&
19042105 apic->lapic_timer.period) {
....@@ -1909,24 +2110,27 @@
19092110 break;
19102111 }
19112112 case APIC_ESR:
1912
- if (apic_x2apic_mode(apic) && val != 0) {
1913
- apic_debug("KVM_WRITE:ESR not zero %x\n", val);
2113
+ if (apic_x2apic_mode(apic) && val != 0)
19142114 ret = 1;
1915
- }
19162115 break;
19172116
19182117 case APIC_SELF_IPI:
1919
- if (apic_x2apic_mode(apic)) {
1920
- kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
1921
- } else
2118
+ /*
2119
+ * Self-IPI exists only when x2APIC is enabled. Bits 7:0 hold
2120
+ * the vector, everything else is reserved.
2121
+ */
2122
+ if (!apic_x2apic_mode(apic) || (val & ~APIC_VECTOR_MASK))
19222123 ret = 1;
2124
+ else
2125
+ kvm_apic_send_ipi(apic, APIC_DEST_SELF | val, 0);
19232126 break;
19242127 default:
19252128 ret = 1;
19262129 break;
19272130 }
1928
- if (ret)
1929
- apic_debug("Local APIC Write to read-only register %x\n", reg);
2131
+
2132
+ kvm_recalculate_apic_map(apic->vcpu->kvm);
2133
+
19302134 return ret;
19312135 }
19322136 EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
....@@ -1954,18 +2158,10 @@
19542158 * 32/64/128 bits registers must be accessed thru 32 bits.
19552159 * Refer SDM 8.4.1
19562160 */
1957
- if (len != 4 || (offset & 0xf)) {
1958
- /* Don't shout loud, $infamous_os would cause only noise. */
1959
- apic_debug("apic write: bad size=%d %lx\n", len, (long)address);
2161
+ if (len != 4 || (offset & 0xf))
19602162 return 0;
1961
- }
19622163
19632164 val = *(u32*)data;
1964
-
1965
- /* too common printing */
1966
- if (offset != APIC_EOI)
1967
- apic_debug("%s: offset 0x%x with length 0x%x, and value is "
1968
- "0x%x\n", __func__, offset, len, val);
19692165
19702166 kvm_lapic_reg_write(apic, offset & 0xff0, val);
19712167
....@@ -2023,8 +2219,7 @@
20232219 {
20242220 struct kvm_lapic *apic = vcpu->arch.apic;
20252221
2026
- if (!lapic_in_kernel(vcpu) ||
2027
- !apic_lvtt_tscdeadline(apic))
2222
+ if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
20282223 return 0;
20292224
20302225 return apic->lapic_timer.tscdeadline;
....@@ -2034,8 +2229,7 @@
20342229 {
20352230 struct kvm_lapic *apic = vcpu->arch.apic;
20362231
2037
- if (!kvm_apic_present(vcpu) || apic_lvtt_oneshot(apic) ||
2038
- apic_lvtt_period(apic))
2232
+ if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
20392233 return;
20402234
20412235 hrtimer_cancel(&apic->lapic_timer.timer);
....@@ -2045,10 +2239,7 @@
20452239
20462240 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
20472241 {
2048
- struct kvm_lapic *apic = vcpu->arch.apic;
2049
-
2050
- apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
2051
- | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4));
2242
+ apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4);
20522243 }
20532244
20542245 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
....@@ -2071,7 +2262,7 @@
20712262 vcpu->arch.apic_base = value;
20722263
20732264 if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
2074
- kvm_update_cpuid(vcpu);
2265
+ kvm_update_cpuid_runtime(vcpu);
20752266
20762267 if (!apic)
20772268 return;
....@@ -2081,9 +2272,11 @@
20812272 if (value & MSR_IA32_APICBASE_ENABLE) {
20822273 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
20832274 static_key_slow_dec_deferred(&apic_hw_disabled);
2275
+ /* Check if there are APF page ready requests pending */
2276
+ kvm_make_request(KVM_REQ_APF_READY, vcpu);
20842277 } else {
20852278 static_key_slow_inc(&apic_hw_disabled.key);
2086
- recalculate_apic_map(vcpu->kvm);
2279
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
20872280 }
20882281 }
20892282
....@@ -2091,7 +2284,7 @@
20912284 kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
20922285
20932286 if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
2094
- kvm_x86_ops->set_virtual_apic_mode(vcpu);
2287
+ kvm_x86_ops.set_virtual_apic_mode(vcpu);
20952288
20962289 apic->base_address = apic->vcpu->arch.apic_base &
20972290 MSR_IA32_APICBASE_BASE;
....@@ -2099,12 +2292,22 @@
20992292 if ((value & MSR_IA32_APICBASE_ENABLE) &&
21002293 apic->base_address != APIC_DEFAULT_PHYS_BASE)
21012294 pr_warn_once("APIC base relocation is unsupported by KVM");
2102
-
2103
- /* with FSB delivery interrupt, we can restart APIC functionality */
2104
- apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
2105
- "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
2106
-
21072295 }
2296
+
2297
+void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
2298
+{
2299
+ struct kvm_lapic *apic = vcpu->arch.apic;
2300
+
2301
+ if (vcpu->arch.apicv_active) {
2302
+ /* irr_pending is always true when apicv is activated. */
2303
+ apic->irr_pending = true;
2304
+ apic->isr_count = 1;
2305
+ } else {
2306
+ apic->irr_pending = (apic_search_irr(apic) != -1);
2307
+ apic->isr_count = count_vectors(apic->regs + APIC_ISR);
2308
+ }
2309
+}
2310
+EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
21082311
21092312 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
21102313 {
....@@ -2113,8 +2316,6 @@
21132316
21142317 if (!apic)
21152318 return;
2116
-
2117
- apic_debug("%s\n", __func__);
21182319
21192320 /* Stop the timer in case it's a reset to an active apic */
21202321 hrtimer_cancel(&apic->lapic_timer.timer);
....@@ -2135,7 +2336,7 @@
21352336 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
21362337 apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
21372338
2138
- kvm_lapic_set_reg(apic, APIC_DFR, 0xffffffffU);
2339
+ kvm_apic_set_dfr(apic, 0xffffffffU);
21392340 apic_set_spiv(apic, 0xff);
21402341 kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
21412342 if (!apic_x2apic_mode(apic))
....@@ -2150,8 +2351,7 @@
21502351 kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
21512352 kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
21522353 }
2153
- apic->irr_pending = vcpu->arch.apicv_active;
2154
- apic->isr_count = vcpu->arch.apicv_active ? 1 : 0;
2354
+ kvm_apic_update_apicv(vcpu);
21552355 apic->highest_isr_cache = -1;
21562356 update_divide_count(apic);
21572357 atomic_set(&apic->lapic_timer.pending, 0);
....@@ -2161,18 +2361,15 @@
21612361 vcpu->arch.pv_eoi.msr_val = 0;
21622362 apic_update_ppr(apic);
21632363 if (vcpu->arch.apicv_active) {
2164
- kvm_x86_ops->apicv_post_state_restore(vcpu);
2165
- kvm_x86_ops->hwapic_irr_update(vcpu, -1);
2166
- kvm_x86_ops->hwapic_isr_update(vcpu, -1);
2364
+ kvm_x86_ops.apicv_post_state_restore(vcpu);
2365
+ kvm_x86_ops.hwapic_irr_update(vcpu, -1);
2366
+ kvm_x86_ops.hwapic_isr_update(vcpu, -1);
21672367 }
21682368
21692369 vcpu->arch.apic_arb_prio = 0;
21702370 vcpu->arch.apic_attention = 0;
21712371
2172
- apic_debug("%s: vcpu=%p, id=0x%x, base_msr="
2173
- "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
2174
- vcpu, kvm_lapic_get_reg(apic, APIC_ID),
2175
- vcpu->arch.apic_base, apic->base_address);
2372
+ kvm_recalculate_apic_map(vcpu->kvm);
21762373 }
21772374
21782375 /*
....@@ -2229,7 +2426,7 @@
22292426 struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
22302427 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
22312428
2232
- apic_timer_expired(apic);
2429
+ apic_timer_expired(apic, true);
22332430
22342431 if (lapic_is_periodic(apic)) {
22352432 advance_periodic_target_expiration(apic);
....@@ -2239,20 +2436,19 @@
22392436 return HRTIMER_NORESTART;
22402437 }
22412438
2242
-int kvm_create_lapic(struct kvm_vcpu *vcpu)
2439
+int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
22432440 {
22442441 struct kvm_lapic *apic;
22452442
22462443 ASSERT(vcpu != NULL);
2247
- apic_debug("apic_init %d\n", vcpu->vcpu_id);
22482444
2249
- apic = kzalloc(sizeof(*apic), GFP_KERNEL);
2445
+ apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
22502446 if (!apic)
22512447 goto nomem;
22522448
22532449 vcpu->arch.apic = apic;
22542450
2255
- apic->regs = (void *)get_zeroed_page(GFP_KERNEL);
2451
+ apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
22562452 if (!apic->regs) {
22572453 printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
22582454 vcpu->vcpu_id);
....@@ -2261,12 +2457,19 @@
22612457 apic->vcpu = vcpu;
22622458
22632459 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
2264
- HRTIMER_MODE_ABS_PINNED);
2460
+ HRTIMER_MODE_ABS_HARD);
22652461 apic->lapic_timer.timer.function = apic_timer_fn;
2462
+ if (timer_advance_ns == -1) {
2463
+ apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
2464
+ lapic_timer_advance_dynamic = true;
2465
+ } else {
2466
+ apic->lapic_timer.timer_advance_ns = timer_advance_ns;
2467
+ lapic_timer_advance_dynamic = false;
2468
+ }
22662469
22672470 /*
22682471 * APIC is created enabled. This will prevent kvm_lapic_set_base from
2269
- * thinking that APIC satet has changed.
2472
+ * thinking that APIC state has changed.
22702473 */
22712474 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
22722475 static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
....@@ -2275,6 +2478,7 @@
22752478 return 0;
22762479 nomem_free_apic:
22772480 kfree(apic);
2481
+ vcpu->arch.apic = NULL;
22782482 nomem:
22792483 return -ENOMEM;
22802484 }
....@@ -2290,18 +2494,18 @@
22902494 __apic_update_ppr(apic, &ppr);
22912495 return apic_has_interrupt_for_ppr(apic, ppr);
22922496 }
2497
+EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt);
22932498
22942499 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
22952500 {
22962501 u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
2297
- int r = 0;
22982502
22992503 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
2300
- r = 1;
2504
+ return 1;
23012505 if ((lvt0 & APIC_LVT_MASKED) == 0 &&
23022506 GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
2303
- r = 1;
2304
- return r;
2507
+ return 1;
2508
+ return 0;
23052509 }
23062510
23072511 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
....@@ -2309,13 +2513,7 @@
23092513 struct kvm_lapic *apic = vcpu->arch.apic;
23102514
23112515 if (atomic_read(&apic->lapic_timer.pending) > 0) {
2312
- kvm_apic_local_deliver(apic, APIC_LVTT);
2313
- if (apic_lvtt_tscdeadline(apic))
2314
- apic->lapic_timer.tscdeadline = 0;
2315
- if (apic_lvtt_oneshot(apic)) {
2316
- apic->lapic_timer.tscdeadline = 0;
2317
- apic->lapic_timer.target_expiration = 0;
2318
- }
2516
+ kvm_apic_inject_pending_timer_irqs(apic);
23192517 atomic_set(&apic->lapic_timer.pending, 0);
23202518 }
23212519 }
....@@ -2386,6 +2584,14 @@
23862584 int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
23872585 {
23882586 memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
2587
+
2588
+ /*
2589
+ * Get calculated timer current count for remaining timer period (if
2590
+ * any) and store it in the returned register set.
2591
+ */
2592
+ __kvm_lapic_set_reg(s->regs, APIC_TMCCT,
2593
+ __apic_read(vcpu->arch.apic, APIC_TMCCT));
2594
+
23892595 return kvm_apic_state_fixup(vcpu, s, false);
23902596 }
23912597
....@@ -2394,17 +2600,19 @@
23942600 struct kvm_lapic *apic = vcpu->arch.apic;
23952601 int r;
23962602
2397
-
23982603 kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
23992604 /* set SPIV separately to get count of SW disabled APICs right */
24002605 apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
24012606
24022607 r = kvm_apic_state_fixup(vcpu, s, true);
2403
- if (r)
2608
+ if (r) {
2609
+ kvm_recalculate_apic_map(vcpu->kvm);
24042610 return r;
2405
- memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
2611
+ }
2612
+ memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
24062613
2407
- recalculate_apic_map(vcpu->kvm);
2614
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
2615
+ kvm_recalculate_apic_map(vcpu->kvm);
24082616 kvm_apic_set_version(vcpu);
24092617
24102618 apic_update_ppr(apic);
....@@ -2412,16 +2620,14 @@
24122620 apic_update_lvtt(apic);
24132621 apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
24142622 update_divide_count(apic);
2415
- start_apic_timer(apic);
2416
- apic->irr_pending = true;
2417
- apic->isr_count = vcpu->arch.apicv_active ?
2418
- 1 : count_vectors(apic->regs + APIC_ISR);
2623
+ __start_apic_timer(apic, APIC_TMCCT);
2624
+ kvm_apic_update_apicv(vcpu);
24192625 apic->highest_isr_cache = -1;
24202626 if (vcpu->arch.apicv_active) {
2421
- kvm_x86_ops->apicv_post_state_restore(vcpu);
2422
- kvm_x86_ops->hwapic_irr_update(vcpu,
2627
+ kvm_x86_ops.apicv_post_state_restore(vcpu);
2628
+ kvm_x86_ops.hwapic_irr_update(vcpu,
24232629 apic_find_highest_irr(apic));
2424
- kvm_x86_ops->hwapic_isr_update(vcpu,
2630
+ kvm_x86_ops.hwapic_isr_update(vcpu,
24252631 apic_find_highest_isr(apic));
24262632 }
24272633 kvm_make_request(KVM_REQ_EVENT, vcpu);
....@@ -2437,12 +2643,13 @@
24372643 {
24382644 struct hrtimer *timer;
24392645
2440
- if (!lapic_in_kernel(vcpu))
2646
+ if (!lapic_in_kernel(vcpu) ||
2647
+ kvm_can_post_timer_interrupt(vcpu))
24412648 return;
24422649
24432650 timer = &vcpu->arch.apic->lapic_timer.timer;
24442651 if (hrtimer_cancel(timer))
2445
- hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
2652
+ hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
24462653 }
24472654
24482655 /*
....@@ -2590,11 +2797,8 @@
25902797 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
25912798 return 1;
25922799
2593
- if (reg == APIC_DFR || reg == APIC_ICR2) {
2594
- apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n",
2595
- reg);
2800
+ if (reg == APIC_DFR || reg == APIC_ICR2)
25962801 return 1;
2597
- }
25982802
25992803 if (kvm_lapic_reg_read(apic, reg, 4, &low))
26002804 return 1;
....@@ -2668,11 +2872,14 @@
26682872 return;
26692873
26702874 /*
2671
- * INITs are latched while in SMM. Because an SMM CPU cannot
2672
- * be in KVM_MP_STATE_INIT_RECEIVED state, just eat SIPIs
2673
- * and delay processing of INIT until the next RSM.
2875
+ * INITs are latched while CPU is in specific states
2876
+ * (SMM, VMX non-root mode, SVM with GIF=0).
2877
+ * Because a CPU cannot be in these states immediately
2878
+ * after it has processed an INIT signal (and thus in
2879
+ * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
2880
+ * and leave the INIT pending.
26742881 */
2675
- if (is_smm(vcpu)) {
2882
+ if (kvm_vcpu_latch_init(vcpu)) {
26762883 WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
26772884 if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
26782885 clear_bit(KVM_APIC_SIPI, &apic->pending_events);
....@@ -2692,8 +2899,6 @@
26922899 /* evaluate pending_events before reading the vector */
26932900 smp_rmb();
26942901 sipi_vector = apic->sipi_vector;
2695
- apic_debug("vcpu %d received sipi with vector # %x\n",
2696
- vcpu->vcpu_id, sipi_vector);
26972902 kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
26982903 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
26992904 }