forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-11 04dd17822334871b23ea2862f7798fb0e0007777
kernel/arch/powerpc/kvm/book3s_xive.c
....@@ -1,9 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation.
3
- *
4
- * This program is free software; you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License, version 2, as
6
- * published by the Free Software Foundation.
74 */
85
96 #define pr_fmt(fmt) "xive-kvm: " fmt
....@@ -62,6 +59,75 @@
6259 #define XIVE_Q_GAP 2
6360
6461 /*
62
+ * Push a vcpu's context to the XIVE on guest entry.
63
+ * This assumes we are in virtual mode (MMU on)
64
+ */
65
+void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
66
+{
67
+ void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
68
+ u64 pq;
69
+
70
+ /*
71
+ * Nothing to do if the platform doesn't have a XIVE
72
+ * or this vCPU doesn't have its own XIVE context
73
+ * (e.g. because it's not using an in-kernel interrupt controller).
74
+ */
75
+ if (!tima || !vcpu->arch.xive_cam_word)
76
+ return;
77
+
78
+ eieio();
79
+ __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
80
+ __raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
81
+ vcpu->arch.xive_pushed = 1;
82
+ eieio();
83
+
84
+ /*
85
+ * We clear the irq_pending flag. There is a small chance of a
86
+ * race vs. the escalation interrupt happening on another
87
+ * processor setting it again, but the only consequence is to
88
+ * cause a spurious wakeup on the next H_CEDE, which is not an
89
+ * issue.
90
+ */
91
+ vcpu->arch.irq_pending = 0;
92
+
93
+ /*
94
+ * In single escalation mode, if the escalation interrupt is
95
+ * on, we mask it.
96
+ */
97
+ if (vcpu->arch.xive_esc_on) {
98
+ pq = __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
99
+ XIVE_ESB_SET_PQ_01));
100
+ mb();
101
+
102
+ /*
103
+ * We have a possible subtle race here: The escalation
104
+ * interrupt might have fired and be on its way to the
105
+ * host queue while we mask it, and if we unmask it
106
+ * early enough (re-cede right away), there is a
107
+ * theorical possibility that it fires again, thus
108
+ * landing in the target queue more than once which is
109
+ * a big no-no.
110
+ *
111
+ * Fortunately, solving this is rather easy. If the
112
+ * above load setting PQ to 01 returns a previous
113
+ * value where P is set, then we know the escalation
114
+ * interrupt is somewhere on its way to the host. In
115
+ * that case we simply don't clear the xive_esc_on
116
+ * flag below. It will be eventually cleared by the
117
+ * handler for the escalation interrupt.
118
+ *
119
+ * Then, when doing a cede, we check that flag again
120
+ * before re-enabling the escalation interrupt, and if
121
+ * set, we abort the cede.
122
+ */
123
+ if (!(pq & XIVE_ESB_VAL_P))
124
+ /* Now P is 0, we can clear the flag */
125
+ vcpu->arch.xive_esc_on = 0;
126
+ }
127
+}
128
+EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
129
+
130
+/*
65131 * This is a simple trigger for a generic XIVE IRQ. This must
66132 * only be called for interrupts that support a trigger page
67133 */
....@@ -100,10 +166,14 @@
100166 */
101167 vcpu->arch.xive_esc_on = false;
102168
169
+ /* This orders xive_esc_on = false vs. subsequent stale_p = true */
170
+ smp_wmb(); /* goes with smp_mb() in cleanup_single_escalation */
171
+
103172 return IRQ_HANDLED;
104173 }
105174
106
-static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
175
+int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
176
+ bool single_escalation)
107177 {
108178 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
109179 struct xive_q *q = &xc->queues[prio];
....@@ -122,7 +192,7 @@
122192 return -EIO;
123193 }
124194
125
- if (xc->xive->single_escalation)
195
+ if (single_escalation)
126196 name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
127197 vcpu->kvm->arch.lpid, xc->server_num);
128198 else
....@@ -154,7 +224,7 @@
154224 * interrupt, thus leaving it effectively masked after
155225 * it fires once.
156226 */
157
- if (xc->xive->single_escalation) {
227
+ if (single_escalation) {
158228 struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
159229 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
160230
....@@ -207,14 +277,14 @@
207277 return rc;
208278 }
209279
210
-/* Called with kvm_lock held */
280
+/* Called with xive->lock held */
211281 static int xive_check_provisioning(struct kvm *kvm, u8 prio)
212282 {
213283 struct kvmppc_xive *xive = kvm->arch.xive;
214284 struct kvm_vcpu *vcpu;
215285 int i, rc;
216286
217
- lockdep_assert_held(&kvm->lock);
287
+ lockdep_assert_held(&xive->lock);
218288
219289 /* Already provisioned ? */
220290 if (xive->qmap & (1 << prio))
....@@ -228,7 +298,8 @@
228298 continue;
229299 rc = xive_provision_queue(vcpu, prio);
230300 if (rc == 0 && !xive->single_escalation)
231
- xive_attach_escalation(vcpu, prio);
301
+ kvmppc_xive_attach_escalation(vcpu, prio,
302
+ xive->single_escalation);
232303 if (rc)
233304 return rc;
234305 }
....@@ -279,7 +350,7 @@
279350 return atomic_add_unless(&q->count, 1, max) ? 0 : -EBUSY;
280351 }
281352
282
-static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
353
+int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
283354 {
284355 struct kvm_vcpu *vcpu;
285356 int i, rc;
....@@ -315,11 +386,6 @@
315386
316387 /* No available target ! */
317388 return -EBUSY;
318
-}
319
-
320
-static u32 xive_vp(struct kvmppc_xive *xive, u32 server)
321
-{
322
- return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
323389 }
324390
325391 static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
....@@ -367,8 +433,8 @@
367433 */
368434 if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
369435 xive_native_configure_irq(hw_num,
370
- xive_vp(xive, state->act_server),
371
- MASKED, state->number);
436
+ kvmppc_xive_vp(xive, state->act_server),
437
+ MASKED, state->number);
372438 /* set old_p so we can track if an H_EOI was done */
373439 state->old_p = true;
374440 state->old_q = false;
....@@ -418,13 +484,13 @@
418484 kvmppc_xive_select_irq(state, &hw_num, &xd);
419485
420486 /*
421
- * See command in xive_lock_and_mask() concerning masking
487
+ * See comment in xive_lock_and_mask() concerning masking
422488 * via firmware.
423489 */
424490 if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
425491 xive_native_configure_irq(hw_num,
426
- xive_vp(xive, state->act_server),
427
- state->act_priority, state->number);
492
+ kvmppc_xive_vp(xive, state->act_server),
493
+ state->act_priority, state->number);
428494 /* If an EOI is needed, do it here */
429495 if (!state->old_p)
430496 xive_vm_source_eoi(hw_num, xd);
....@@ -472,7 +538,7 @@
472538 * priority. The count for that new target will have
473539 * already been incremented.
474540 */
475
- rc = xive_select_target(kvm, &server, prio);
541
+ rc = kvmppc_xive_select_target(kvm, &server, prio);
476542
477543 /*
478544 * We failed to find a target ? Not much we can do
....@@ -500,7 +566,7 @@
500566 kvmppc_xive_select_irq(state, &hw_num, NULL);
501567
502568 return xive_native_configure_irq(hw_num,
503
- xive_vp(xive, server),
569
+ kvmppc_xive_vp(xive, server),
504570 prio, state->number);
505571 }
506572
....@@ -561,9 +627,12 @@
561627 irq, server, priority);
562628
563629 /* First, check provisioning of queues */
564
- if (priority != MASKED)
630
+ if (priority != MASKED) {
631
+ mutex_lock(&xive->lock);
565632 rc = xive_check_provisioning(xive->kvm,
566633 xive_prio_from_guest(priority));
634
+ mutex_unlock(&xive->lock);
635
+ }
567636 if (rc) {
568637 pr_devel(" provisioning failure %d !\n", rc);
569638 return rc;
....@@ -786,7 +855,8 @@
786855
787856 /*
788857 * We can't update the state of a "pushed" VCPU, but that
789
- * shouldn't happen.
858
+ * shouldn't happen because the vcpu->mutex makes running a
859
+ * vcpu mutually exclusive with doing one_reg get/set on it.
790860 */
791861 if (WARN_ON(vcpu->arch.xive_pushed))
792862 return -EIO;
....@@ -877,6 +947,13 @@
877947 /* Turn the IPI hard off */
878948 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
879949
950
+ /*
951
+ * Reset ESB guest mapping. Needed when ESB pages are exposed
952
+ * to the guest in XIVE native mode
953
+ */
954
+ if (xive->ops && xive->ops->reset_mapped)
955
+ xive->ops->reset_mapped(kvm, guest_irq);
956
+
880957 /* Grab info about irq */
881958 state->pt_number = hw_irq;
882959 state->pt_data = irq_data_get_irq_handler_data(host_data);
....@@ -888,7 +965,7 @@
888965 * which is fine for a never started interrupt.
889966 */
890967 xive_native_configure_irq(hw_irq,
891
- xive_vp(xive, state->act_server),
968
+ kvmppc_xive_vp(xive, state->act_server),
892969 state->act_priority, state->number);
893970
894971 /*
....@@ -962,9 +1039,17 @@
9621039 state->pt_number = 0;
9631040 state->pt_data = NULL;
9641041
1042
+ /*
1043
+ * Reset ESB guest mapping. Needed when ESB pages are exposed
1044
+ * to the guest in XIVE native mode
1045
+ */
1046
+ if (xive->ops && xive->ops->reset_mapped) {
1047
+ xive->ops->reset_mapped(kvm, guest_irq);
1048
+ }
1049
+
9651050 /* Reconfigure the IPI */
9661051 xive_native_configure_irq(state->ipi_number,
967
- xive_vp(xive, state->act_server),
1052
+ kvmppc_xive_vp(xive, state->act_server),
9681053 state->act_priority, state->number);
9691054
9701055 /*
....@@ -986,7 +1071,7 @@
9861071 }
9871072 EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped);
9881073
989
-static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
1074
+void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
9901075 {
9911076 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
9921077 struct kvm *kvm = vcpu->kvm;
....@@ -1020,13 +1105,59 @@
10201105 arch_spin_unlock(&sb->lock);
10211106 }
10221107 }
1108
+
1109
+ /* Disable vcpu's escalation interrupt */
1110
+ if (vcpu->arch.xive_esc_on) {
1111
+ __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
1112
+ XIVE_ESB_SET_PQ_01));
1113
+ vcpu->arch.xive_esc_on = false;
1114
+ }
1115
+
1116
+ /*
1117
+ * Clear pointers to escalation interrupt ESB.
1118
+ * This is safe because the vcpu->mutex is held, preventing
1119
+ * any other CPU from concurrently executing a KVM_RUN ioctl.
1120
+ */
1121
+ vcpu->arch.xive_esc_vaddr = 0;
1122
+ vcpu->arch.xive_esc_raddr = 0;
1123
+}
1124
+
1125
+/*
1126
+ * In single escalation mode, the escalation interrupt is marked so
1127
+ * that EOI doesn't re-enable it, but just sets the stale_p flag to
1128
+ * indicate that the P bit has already been dealt with. However, the
1129
+ * assembly code that enters the guest sets PQ to 00 without clearing
1130
+ * stale_p (because it has no easy way to address it). Hence we have
1131
+ * to adjust stale_p before shutting down the interrupt.
1132
+ */
1133
+void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
1134
+ struct kvmppc_xive_vcpu *xc, int irq)
1135
+{
1136
+ struct irq_data *d = irq_get_irq_data(irq);
1137
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
1138
+
1139
+ /*
1140
+ * This slightly odd sequence gives the right result
1141
+ * (i.e. stale_p set if xive_esc_on is false) even if
1142
+ * we race with xive_esc_irq() and xive_irq_eoi().
1143
+ */
1144
+ xd->stale_p = false;
1145
+ smp_mb(); /* paired with smb_wmb in xive_esc_irq */
1146
+ if (!vcpu->arch.xive_esc_on)
1147
+ xd->stale_p = true;
10231148 }
10241149
10251150 void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
10261151 {
10271152 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1028
- struct kvmppc_xive *xive = xc->xive;
1153
+ struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
10291154 int i;
1155
+
1156
+ if (!kvmppc_xics_enabled(vcpu))
1157
+ return;
1158
+
1159
+ if (!xc)
1160
+ return;
10301161
10311162 pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num);
10321163
....@@ -1040,6 +1171,9 @@
10401171 /* Free escalations */
10411172 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
10421173 if (xc->esc_virq[i]) {
1174
+ if (xc->xive->single_escalation)
1175
+ xive_cleanup_single_escalation(vcpu, xc,
1176
+ xc->esc_virq[i]);
10431177 free_irq(xc->esc_virq[i], vcpu);
10441178 irq_dispose_mapping(xc->esc_virq[i]);
10451179 kfree(xc->esc_virq_names[i]);
....@@ -1048,6 +1182,9 @@
10481182
10491183 /* Disable the VP */
10501184 xive_native_disable_vp(xc->vp_id);
1185
+
1186
+ /* Clear the cam word so guest entry won't try to push context */
1187
+ vcpu->arch.xive_cam_word = 0;
10511188
10521189 /* Free the queues */
10531190 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
....@@ -1068,6 +1205,46 @@
10681205 }
10691206 /* Free the VP */
10701207 kfree(xc);
1208
+
1209
+ /* Cleanup the vcpu */
1210
+ vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
1211
+ vcpu->arch.xive_vcpu = NULL;
1212
+}
1213
+
1214
+static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu)
1215
+{
1216
+ /* We have a block of xive->nr_servers VPs. We just need to check
1217
+ * packed vCPU ids are below that.
1218
+ */
1219
+ return kvmppc_pack_vcpu_id(xive->kvm, cpu) < xive->nr_servers;
1220
+}
1221
+
1222
+int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp)
1223
+{
1224
+ u32 vp_id;
1225
+
1226
+ if (!kvmppc_xive_vcpu_id_valid(xive, cpu)) {
1227
+ pr_devel("Out of bounds !\n");
1228
+ return -EINVAL;
1229
+ }
1230
+
1231
+ if (xive->vp_base == XIVE_INVALID_VP) {
1232
+ xive->vp_base = xive_native_alloc_vp_block(xive->nr_servers);
1233
+ pr_devel("VP_Base=%x nr_servers=%d\n", xive->vp_base, xive->nr_servers);
1234
+
1235
+ if (xive->vp_base == XIVE_INVALID_VP)
1236
+ return -ENOSPC;
1237
+ }
1238
+
1239
+ vp_id = kvmppc_xive_vp(xive, cpu);
1240
+ if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
1241
+ pr_devel("Duplicate !\n");
1242
+ return -EEXIST;
1243
+ }
1244
+
1245
+ *vp = vp_id;
1246
+
1247
+ return 0;
10711248 }
10721249
10731250 int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
....@@ -1076,6 +1253,7 @@
10761253 struct kvmppc_xive *xive = dev->private;
10771254 struct kvmppc_xive_vcpu *xc;
10781255 int i, r = -EBUSY;
1256
+ u32 vp_id;
10791257
10801258 pr_devel("connect_vcpu(cpu=%d)\n", cpu);
10811259
....@@ -1085,27 +1263,27 @@
10851263 }
10861264 if (xive->kvm != vcpu->kvm)
10871265 return -EPERM;
1088
- if (vcpu->arch.irq_type)
1266
+ if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
10891267 return -EBUSY;
1090
- if (kvmppc_xive_find_server(vcpu->kvm, cpu)) {
1091
- pr_devel("Duplicate !\n");
1092
- return -EEXIST;
1093
- }
1094
- if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
1095
- pr_devel("Out of bounds !\n");
1096
- return -EINVAL;
1097
- }
1098
- xc = kzalloc(sizeof(*xc), GFP_KERNEL);
1099
- if (!xc)
1100
- return -ENOMEM;
11011268
11021269 /* We need to synchronize with queue provisioning */
1103
- mutex_lock(&vcpu->kvm->lock);
1270
+ mutex_lock(&xive->lock);
1271
+
1272
+ r = kvmppc_xive_compute_vp_id(xive, cpu, &vp_id);
1273
+ if (r)
1274
+ goto bail;
1275
+
1276
+ xc = kzalloc(sizeof(*xc), GFP_KERNEL);
1277
+ if (!xc) {
1278
+ r = -ENOMEM;
1279
+ goto bail;
1280
+ }
1281
+
11041282 vcpu->arch.xive_vcpu = xc;
11051283 xc->xive = xive;
11061284 xc->vcpu = vcpu;
11071285 xc->server_num = cpu;
1108
- xc->vp_id = xive_vp(xive, cpu);
1286
+ xc->vp_id = vp_id;
11091287 xc->mfrr = 0xff;
11101288 xc->valid = true;
11111289
....@@ -1158,7 +1336,8 @@
11581336 if (xive->qmap & (1 << i)) {
11591337 r = xive_provision_queue(vcpu, i);
11601338 if (r == 0 && !xive->single_escalation)
1161
- xive_attach_escalation(vcpu, i);
1339
+ kvmppc_xive_attach_escalation(
1340
+ vcpu, i, xive->single_escalation);
11621341 if (r)
11631342 goto bail;
11641343 } else {
....@@ -1173,7 +1352,7 @@
11731352 }
11741353
11751354 /* If not done above, attach priority 0 escalation */
1176
- r = xive_attach_escalation(vcpu, 0);
1355
+ r = kvmppc_xive_attach_escalation(vcpu, 0, xive->single_escalation);
11771356 if (r)
11781357 goto bail;
11791358
....@@ -1183,7 +1362,7 @@
11831362 xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_00);
11841363
11851364 bail:
1186
- mutex_unlock(&vcpu->kvm->lock);
1365
+ mutex_unlock(&xive->lock);
11871366 if (r) {
11881367 kvmppc_xive_cleanup_vcpu(vcpu);
11891368 return r;
....@@ -1424,16 +1603,15 @@
14241603 return 0;
14251604 }
14261605
1427
-static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *xive,
1428
- int irq)
1606
+struct kvmppc_xive_src_block *kvmppc_xive_create_src_block(
1607
+ struct kvmppc_xive *xive, int irq)
14291608 {
1430
- struct kvm *kvm = xive->kvm;
14311609 struct kvmppc_xive_src_block *sb;
14321610 int i, bid;
14331611
14341612 bid = irq >> KVMPPC_XICS_ICS_SHIFT;
14351613
1436
- mutex_lock(&kvm->lock);
1614
+ mutex_lock(&xive->lock);
14371615
14381616 /* block already exists - somebody else got here first */
14391617 if (xive->src_blocks[bid])
....@@ -1448,6 +1626,7 @@
14481626
14491627 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
14501628 sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i;
1629
+ sb->irq_state[i].eisn = 0;
14511630 sb->irq_state[i].guest_priority = MASKED;
14521631 sb->irq_state[i].saved_priority = MASKED;
14531632 sb->irq_state[i].act_priority = MASKED;
....@@ -1459,7 +1638,7 @@
14591638 xive->max_sbid = bid;
14601639
14611640 out:
1462
- mutex_unlock(&kvm->lock);
1641
+ mutex_unlock(&xive->lock);
14631642 return xive->src_blocks[bid];
14641643 }
14651644
....@@ -1504,7 +1683,7 @@
15041683 sb = kvmppc_xive_find_source(xive, irq, &idx);
15051684 if (!sb) {
15061685 pr_devel("No source, creating source block...\n");
1507
- sb = xive_create_src_block(xive, irq);
1686
+ sb = kvmppc_xive_create_src_block(xive, irq);
15081687 if (!sb) {
15091688 pr_devel("Failed to create block...\n");
15101689 return -ENOMEM;
....@@ -1569,9 +1748,9 @@
15691748 /* If we have a priority target the interrupt */
15701749 if (act_prio != MASKED) {
15711750 /* First, check provisioning of queues */
1572
- mutex_lock(&xive->kvm->lock);
1751
+ mutex_lock(&xive->lock);
15731752 rc = xive_check_provisioning(xive->kvm, act_prio);
1574
- mutex_unlock(&xive->kvm->lock);
1753
+ mutex_unlock(&xive->lock);
15751754
15761755 /* Target interrupt */
15771756 if (rc == 0)
....@@ -1684,6 +1863,43 @@
16841863 return 0;
16851864 }
16861865
1866
+int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr)
1867
+{
1868
+ u32 __user *ubufp = (u32 __user *) addr;
1869
+ u32 nr_servers;
1870
+ int rc = 0;
1871
+
1872
+ if (get_user(nr_servers, ubufp))
1873
+ return -EFAULT;
1874
+
1875
+ pr_devel("%s nr_servers=%u\n", __func__, nr_servers);
1876
+
1877
+ if (!nr_servers || nr_servers > KVM_MAX_VCPU_ID)
1878
+ return -EINVAL;
1879
+
1880
+ mutex_lock(&xive->lock);
1881
+ if (xive->vp_base != XIVE_INVALID_VP)
1882
+ /* The VP block is allocated once and freed when the device
1883
+ * is released. Better not allow to change its size since its
1884
+ * used by connect_vcpu to validate vCPU ids are valid (eg,
1885
+ * setting it back to a higher value could allow connect_vcpu
1886
+ * to come up with a VP id that goes beyond the VP block, which
1887
+ * is likely to cause a crash in OPAL).
1888
+ */
1889
+ rc = -EBUSY;
1890
+ else if (nr_servers > KVM_MAX_VCPUS)
1891
+ /* We don't need more servers. Higher vCPU ids get packed
1892
+ * down below KVM_MAX_VCPUS by kvmppc_pack_vcpu_id().
1893
+ */
1894
+ xive->nr_servers = KVM_MAX_VCPUS;
1895
+ else
1896
+ xive->nr_servers = nr_servers;
1897
+
1898
+ mutex_unlock(&xive->lock);
1899
+
1900
+ return rc;
1901
+}
1902
+
16871903 static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
16881904 {
16891905 struct kvmppc_xive *xive = dev->private;
....@@ -1692,6 +1908,11 @@
16921908 switch (attr->group) {
16931909 case KVM_DEV_XICS_GRP_SOURCES:
16941910 return xive_set_source(xive, attr->attr, attr->addr);
1911
+ case KVM_DEV_XICS_GRP_CTRL:
1912
+ switch (attr->attr) {
1913
+ case KVM_DEV_XICS_NR_SERVERS:
1914
+ return kvmppc_xive_set_nr_servers(xive, attr->addr);
1915
+ }
16951916 }
16961917 return -ENXIO;
16971918 }
....@@ -1717,6 +1938,11 @@
17171938 attr->attr < KVMPPC_XICS_NR_IRQS)
17181939 return 0;
17191940 break;
1941
+ case KVM_DEV_XICS_GRP_CTRL:
1942
+ switch (attr->attr) {
1943
+ case KVM_DEV_XICS_NR_SERVERS:
1944
+ return 0;
1945
+ }
17201946 }
17211947 return -ENXIO;
17221948 }
....@@ -1727,7 +1953,7 @@
17271953 xive_native_configure_irq(hw_num, 0, MASKED, 0);
17281954 }
17291955
1730
-static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb)
1956
+void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb)
17311957 {
17321958 int i;
17331959
....@@ -1749,16 +1975,53 @@
17491975 }
17501976 }
17511977
1752
-static void kvmppc_xive_free(struct kvm_device *dev)
1978
+/*
1979
+ * Called when device fd is closed. kvm->lock is held.
1980
+ */
1981
+static void kvmppc_xive_release(struct kvm_device *dev)
17531982 {
17541983 struct kvmppc_xive *xive = dev->private;
17551984 struct kvm *kvm = xive->kvm;
1985
+ struct kvm_vcpu *vcpu;
17561986 int i;
1987
+
1988
+ pr_devel("Releasing xive device\n");
1989
+
1990
+ /*
1991
+ * Since this is the device release function, we know that
1992
+ * userspace does not have any open fd referring to the
1993
+ * device. Therefore there can not be any of the device
1994
+ * attribute set/get functions being executed concurrently,
1995
+ * and similarly, the connect_vcpu and set/clr_mapped
1996
+ * functions also cannot be being executed.
1997
+ */
17571998
17581999 debugfs_remove(xive->dentry);
17592000
1760
- if (kvm)
1761
- kvm->arch.xive = NULL;
2001
+ /*
2002
+ * We should clean up the vCPU interrupt presenters first.
2003
+ */
2004
+ kvm_for_each_vcpu(i, vcpu, kvm) {
2005
+ /*
2006
+ * Take vcpu->mutex to ensure that no one_reg get/set ioctl
2007
+ * (i.e. kvmppc_xive_[gs]et_icp) can be done concurrently.
2008
+ * Holding the vcpu->mutex also means that the vcpu cannot
2009
+ * be executing the KVM_RUN ioctl, and therefore it cannot
2010
+ * be executing the XIVE push or pull code or accessing
2011
+ * the XIVE MMIO regions.
2012
+ */
2013
+ mutex_lock(&vcpu->mutex);
2014
+ kvmppc_xive_cleanup_vcpu(vcpu);
2015
+ mutex_unlock(&vcpu->mutex);
2016
+ }
2017
+
2018
+ /*
2019
+ * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
2020
+ * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
2021
+ * against xive code getting called during vcpu execution or
2022
+ * set/get one_reg operations.
2023
+ */
2024
+ kvm->arch.xive = NULL;
17622025
17632026 /* Mask and free interrupts */
17642027 for (i = 0; i <= xive->max_sbid; i++) {
....@@ -1771,32 +2034,64 @@
17712034 if (xive->vp_base != XIVE_INVALID_VP)
17722035 xive_native_free_vp_block(xive->vp_base);
17732036
2037
+ /*
2038
+ * A reference of the kvmppc_xive pointer is now kept under
2039
+ * the xive_devices struct of the machine for reuse. It is
2040
+ * freed when the VM is destroyed for now until we fix all the
2041
+ * execution paths.
2042
+ */
17742043
1775
- kfree(xive);
17762044 kfree(dev);
17772045 }
17782046
2047
+/*
2048
+ * When the guest chooses the interrupt mode (XICS legacy or XIVE
2049
+ * native), the VM will switch of KVM device. The previous device will
2050
+ * be "released" before the new one is created.
2051
+ *
2052
+ * Until we are sure all execution paths are well protected, provide a
2053
+ * fail safe (transitional) method for device destruction, in which
2054
+ * the XIVE device pointer is recycled and not directly freed.
2055
+ */
2056
+struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type)
2057
+{
2058
+ struct kvmppc_xive **kvm_xive_device = type == KVM_DEV_TYPE_XIVE ?
2059
+ &kvm->arch.xive_devices.native :
2060
+ &kvm->arch.xive_devices.xics_on_xive;
2061
+ struct kvmppc_xive *xive = *kvm_xive_device;
2062
+
2063
+ if (!xive) {
2064
+ xive = kzalloc(sizeof(*xive), GFP_KERNEL);
2065
+ *kvm_xive_device = xive;
2066
+ } else {
2067
+ memset(xive, 0, sizeof(*xive));
2068
+ }
2069
+
2070
+ return xive;
2071
+}
2072
+
2073
+/*
2074
+ * Create a XICS device with XIVE backend. kvm->lock is held.
2075
+ */
17792076 static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
17802077 {
17812078 struct kvmppc_xive *xive;
17822079 struct kvm *kvm = dev->kvm;
1783
- int ret = 0;
17842080
17852081 pr_devel("Creating xive for partition\n");
17862082
1787
- xive = kzalloc(sizeof(*xive), GFP_KERNEL);
2083
+ /* Already there ? */
2084
+ if (kvm->arch.xive)
2085
+ return -EEXIST;
2086
+
2087
+ xive = kvmppc_xive_get_device(kvm, type);
17882088 if (!xive)
17892089 return -ENOMEM;
17902090
17912091 dev->private = xive;
17922092 xive->dev = dev;
17932093 xive->kvm = kvm;
1794
-
1795
- /* Already there ? */
1796
- if (kvm->arch.xive)
1797
- ret = -EEXIST;
1798
- else
1799
- kvm->arch.xive = xive;
2094
+ mutex_init(&xive->lock);
18002095
18012096 /* We use the default queue size set by the host */
18022097 xive->q_order = xive_native_default_eq_shift();
....@@ -1805,23 +2100,56 @@
18052100 else
18062101 xive->q_page_order = xive->q_order - PAGE_SHIFT;
18072102
1808
- /* Allocate a bunch of VPs */
1809
- xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
1810
- pr_devel("VP_Base=%x\n", xive->vp_base);
1811
-
1812
- if (xive->vp_base == XIVE_INVALID_VP)
1813
- ret = -ENOMEM;
2103
+ /* VP allocation is delayed to the first call to connect_vcpu */
2104
+ xive->vp_base = XIVE_INVALID_VP;
2105
+ /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
2106
+ * on a POWER9 system.
2107
+ */
2108
+ xive->nr_servers = KVM_MAX_VCPUS;
18142109
18152110 xive->single_escalation = xive_native_has_single_escalation();
18162111
1817
- if (ret) {
1818
- kfree(xive);
1819
- return ret;
1820
- }
1821
-
2112
+ kvm->arch.xive = xive;
18222113 return 0;
18232114 }
18242115
2116
+int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
2117
+{
2118
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
2119
+ unsigned int i;
2120
+
2121
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
2122
+ struct xive_q *q = &xc->queues[i];
2123
+ u32 i0, i1, idx;
2124
+
2125
+ if (!q->qpage && !xc->esc_virq[i])
2126
+ continue;
2127
+
2128
+ seq_printf(m, " [q%d]: ", i);
2129
+
2130
+ if (q->qpage) {
2131
+ idx = q->idx;
2132
+ i0 = be32_to_cpup(q->qpage + idx);
2133
+ idx = (idx + 1) & q->msk;
2134
+ i1 = be32_to_cpup(q->qpage + idx);
2135
+ seq_printf(m, "T=%d %08x %08x...\n", q->toggle,
2136
+ i0, i1);
2137
+ }
2138
+ if (xc->esc_virq[i]) {
2139
+ struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
2140
+ struct xive_irq_data *xd =
2141
+ irq_data_get_irq_handler_data(d);
2142
+ u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
2143
+
2144
+ seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
2145
+ (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
2146
+ (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
2147
+ xc->esc_virq[i], pq, xd->eoi_page);
2148
+ seq_puts(m, "\n");
2149
+ }
2150
+ }
2151
+ return 0;
2152
+}
18252153
18262154 static int xive_debug_show(struct seq_file *m, void *private)
18272155 {
....@@ -1847,43 +2175,17 @@
18472175
18482176 kvm_for_each_vcpu(i, vcpu, kvm) {
18492177 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1850
- unsigned int i;
18512178
18522179 if (!xc)
18532180 continue;
18542181
1855
- seq_printf(m, "cpu server %#x CPPR:%#x HWCPPR:%#x"
2182
+ seq_printf(m, "cpu server %#x VP:%#x CPPR:%#x HWCPPR:%#x"
18562183 " MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n",
1857
- xc->server_num, xc->cppr, xc->hw_cppr,
2184
+ xc->server_num, xc->vp_id, xc->cppr, xc->hw_cppr,
18582185 xc->mfrr, xc->pending,
18592186 xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
1860
- for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
1861
- struct xive_q *q = &xc->queues[i];
1862
- u32 i0, i1, idx;
18632187
1864
- if (!q->qpage && !xc->esc_virq[i])
1865
- continue;
1866
-
1867
- seq_printf(m, " [q%d]: ", i);
1868
-
1869
- if (q->qpage) {
1870
- idx = q->idx;
1871
- i0 = be32_to_cpup(q->qpage + idx);
1872
- idx = (idx + 1) & q->msk;
1873
- i1 = be32_to_cpup(q->qpage + idx);
1874
- seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1);
1875
- }
1876
- if (xc->esc_virq[i]) {
1877
- struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
1878
- struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
1879
- u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
1880
- seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
1881
- (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
1882
- (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
1883
- xc->esc_virq[i], pq, xd->eoi_page);
1884
- seq_printf(m, "\n");
1885
- }
1886
- }
2188
+ kvmppc_xive_debug_show_queues(m, vcpu);
18872189
18882190 t_rm_h_xirr += xc->stat_rm_h_xirr;
18892191 t_rm_h_ipoll += xc->stat_rm_h_ipoll;
....@@ -1907,17 +2209,7 @@
19072209 return 0;
19082210 }
19092211
1910
-static int xive_debug_open(struct inode *inode, struct file *file)
1911
-{
1912
- return single_open(file, xive_debug_show, inode->i_private);
1913
-}
1914
-
1915
-static const struct file_operations xive_debug_fops = {
1916
- .open = xive_debug_open,
1917
- .read = seq_read,
1918
- .llseek = seq_lseek,
1919
- .release = single_release,
1920
-};
2212
+DEFINE_SHOW_ATTRIBUTE(xive_debug);
19212213
19222214 static void xive_debugfs_init(struct kvmppc_xive *xive)
19232215 {
....@@ -1948,7 +2240,7 @@
19482240 .name = "kvm-xive",
19492241 .create = kvmppc_xive_create,
19502242 .init = kvmppc_xive_init,
1951
- .destroy = kvmppc_xive_free,
2243
+ .release = kvmppc_xive_release,
19522244 .set_attr = xive_set_attr,
19532245 .get_attr = xive_get_attr,
19542246 .has_attr = xive_has_attr,