hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/arch/s390/kvm/vsie.c
....@@ -135,13 +135,149 @@
135135 atomic_set(&scb_s->cpuflags, newflags);
136136 return 0;
137137 }
138
+/* Copy to APCB FORMAT1 from APCB FORMAT0 */
139
+static int setup_apcb10(struct kvm_vcpu *vcpu, struct kvm_s390_apcb1 *apcb_s,
140
+ unsigned long apcb_o, struct kvm_s390_apcb1 *apcb_h)
141
+{
142
+ struct kvm_s390_apcb0 tmp;
138143
139
-/*
144
+ if (read_guest_real(vcpu, apcb_o, &tmp, sizeof(struct kvm_s390_apcb0)))
145
+ return -EFAULT;
146
+
147
+ apcb_s->apm[0] = apcb_h->apm[0] & tmp.apm[0];
148
+ apcb_s->aqm[0] = apcb_h->aqm[0] & tmp.aqm[0] & 0xffff000000000000UL;
149
+ apcb_s->adm[0] = apcb_h->adm[0] & tmp.adm[0] & 0xffff000000000000UL;
150
+
151
+ return 0;
152
+
153
+}
154
+
155
+/**
156
+ * setup_apcb00 - Copy to APCB FORMAT0 from APCB FORMAT0
157
+ * @vcpu: pointer to the virtual CPU
158
+ * @apcb_s: pointer to start of apcb in the shadow crycb
159
+ * @apcb_o: pointer to start of original apcb in the guest2
160
+ * @apcb_h: pointer to start of apcb in the guest1
161
+ *
162
+ * Returns 0 and -EFAULT on error reading guest apcb
163
+ */
164
+static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
165
+ unsigned long apcb_o, unsigned long *apcb_h)
166
+{
167
+ if (read_guest_real(vcpu, apcb_o, apcb_s,
168
+ sizeof(struct kvm_s390_apcb0)))
169
+ return -EFAULT;
170
+
171
+ bitmap_and(apcb_s, apcb_s, apcb_h,
172
+ BITS_PER_BYTE * sizeof(struct kvm_s390_apcb0));
173
+
174
+ return 0;
175
+}
176
+
177
+/**
178
+ * setup_apcb11 - Copy the FORMAT1 APCB from the guest to the shadow CRYCB
179
+ * @vcpu: pointer to the virtual CPU
180
+ * @apcb_s: pointer to start of apcb in the shadow crycb
181
+ * @apcb_o: pointer to start of original guest apcb
182
+ * @apcb_h: pointer to start of apcb in the host
183
+ *
184
+ * Returns 0 and -EFAULT on error reading guest apcb
185
+ */
186
+static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
187
+ unsigned long apcb_o,
188
+ unsigned long *apcb_h)
189
+{
190
+ if (read_guest_real(vcpu, apcb_o, apcb_s,
191
+ sizeof(struct kvm_s390_apcb1)))
192
+ return -EFAULT;
193
+
194
+ bitmap_and(apcb_s, apcb_s, apcb_h,
195
+ BITS_PER_BYTE * sizeof(struct kvm_s390_apcb1));
196
+
197
+ return 0;
198
+}
199
+
200
+/**
201
+ * setup_apcb - Create a shadow copy of the apcb.
202
+ * @vcpu: pointer to the virtual CPU
203
+ * @crycb_s: pointer to shadow crycb
204
+ * @crycb_o: pointer to original guest crycb
205
+ * @crycb_h: pointer to the host crycb
206
+ * @fmt_o: format of the original guest crycb.
207
+ * @fmt_h: format of the host crycb.
208
+ *
209
+ * Checks the compatibility between the guest and host crycb and calls the
210
+ * appropriate copy function.
211
+ *
212
+ * Return 0 or an error number if the guest and host crycb are incompatible.
213
+ */
214
+static int setup_apcb(struct kvm_vcpu *vcpu, struct kvm_s390_crypto_cb *crycb_s,
215
+ const u32 crycb_o,
216
+ struct kvm_s390_crypto_cb *crycb_h,
217
+ int fmt_o, int fmt_h)
218
+{
219
+ struct kvm_s390_crypto_cb *crycb;
220
+
221
+ crycb = (struct kvm_s390_crypto_cb *) (unsigned long)crycb_o;
222
+
223
+ switch (fmt_o) {
224
+ case CRYCB_FORMAT2:
225
+ if ((crycb_o & PAGE_MASK) != ((crycb_o + 256) & PAGE_MASK))
226
+ return -EACCES;
227
+ if (fmt_h != CRYCB_FORMAT2)
228
+ return -EINVAL;
229
+ return setup_apcb11(vcpu, (unsigned long *)&crycb_s->apcb1,
230
+ (unsigned long) &crycb->apcb1,
231
+ (unsigned long *)&crycb_h->apcb1);
232
+ case CRYCB_FORMAT1:
233
+ switch (fmt_h) {
234
+ case CRYCB_FORMAT2:
235
+ return setup_apcb10(vcpu, &crycb_s->apcb1,
236
+ (unsigned long) &crycb->apcb0,
237
+ &crycb_h->apcb1);
238
+ case CRYCB_FORMAT1:
239
+ return setup_apcb00(vcpu,
240
+ (unsigned long *) &crycb_s->apcb0,
241
+ (unsigned long) &crycb->apcb0,
242
+ (unsigned long *) &crycb_h->apcb0);
243
+ }
244
+ break;
245
+ case CRYCB_FORMAT0:
246
+ if ((crycb_o & PAGE_MASK) != ((crycb_o + 32) & PAGE_MASK))
247
+ return -EACCES;
248
+
249
+ switch (fmt_h) {
250
+ case CRYCB_FORMAT2:
251
+ return setup_apcb10(vcpu, &crycb_s->apcb1,
252
+ (unsigned long) &crycb->apcb0,
253
+ &crycb_h->apcb1);
254
+ case CRYCB_FORMAT1:
255
+ case CRYCB_FORMAT0:
256
+ return setup_apcb00(vcpu,
257
+ (unsigned long *) &crycb_s->apcb0,
258
+ (unsigned long) &crycb->apcb0,
259
+ (unsigned long *) &crycb_h->apcb0);
260
+ }
261
+ }
262
+ return -EINVAL;
263
+}
264
+
265
+/**
266
+ * shadow_crycb - Create a shadow copy of the crycb block
267
+ * @vcpu: a pointer to the virtual CPU
268
+ * @vsie_page: a pointer to internal date used for the vSIE
269
+ *
140270 * Create a shadow copy of the crycb block and setup key wrapping, if
141271 * requested for guest 3 and enabled for guest 2.
142272 *
143
- * We only accept format-1 (no AP in g2), but convert it into format-2
273
+ * We accept format-1 or format-2, but we convert format-1 into format-2
274
+ * in the shadow CRYCB.
275
+ * Using format-2 enables the firmware to choose the right format when
276
+ * scheduling the SIE.
144277 * There is nothing to do for format-0.
278
+ *
279
+ * This function centralize the issuing of set_validity_icpt() for all
280
+ * the subfunctions working on the crycb.
145281 *
146282 * Returns: - 0 if shadowed or nothing to do
147283 * - > 0 if control has to be given to guest 2
....@@ -154,23 +290,44 @@
154290 const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
155291 unsigned long *b1, *b2;
156292 u8 ecb3_flags;
293
+ u32 ecd_flags;
294
+ int apie_h;
295
+ int apie_s;
296
+ int key_msk = test_kvm_facility(vcpu->kvm, 76);
297
+ int fmt_o = crycbd_o & CRYCB_FORMAT_MASK;
298
+ int fmt_h = vcpu->arch.sie_block->crycbd & CRYCB_FORMAT_MASK;
299
+ int ret = 0;
157300
158301 scb_s->crycbd = 0;
159
- if (!(crycbd_o & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
302
+
303
+ apie_h = vcpu->arch.sie_block->eca & ECA_APIE;
304
+ apie_s = apie_h & scb_o->eca;
305
+ if (!apie_s && (!key_msk || (fmt_o == CRYCB_FORMAT0)))
160306 return 0;
161
- /* format-1 is supported with message-security-assist extension 3 */
162
- if (!test_kvm_facility(vcpu->kvm, 76))
163
- return 0;
307
+
308
+ if (!crycb_addr)
309
+ return set_validity_icpt(scb_s, 0x0039U);
310
+
311
+ if (fmt_o == CRYCB_FORMAT1)
312
+ if ((crycb_addr & PAGE_MASK) !=
313
+ ((crycb_addr + 128) & PAGE_MASK))
314
+ return set_validity_icpt(scb_s, 0x003CU);
315
+
316
+ if (apie_s) {
317
+ ret = setup_apcb(vcpu, &vsie_page->crycb, crycb_addr,
318
+ vcpu->kvm->arch.crypto.crycb,
319
+ fmt_o, fmt_h);
320
+ if (ret)
321
+ goto end;
322
+ scb_s->eca |= scb_o->eca & ECA_APIE;
323
+ }
324
+
164325 /* we may only allow it if enabled for guest 2 */
165326 ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
166327 (ECB3_AES | ECB3_DEA);
167
- if (!ecb3_flags)
168
- return 0;
169
-
170
- if ((crycb_addr & PAGE_MASK) != ((crycb_addr + 128) & PAGE_MASK))
171
- return set_validity_icpt(scb_s, 0x003CU);
172
- else if (!crycb_addr)
173
- return set_validity_icpt(scb_s, 0x0039U);
328
+ ecd_flags = scb_o->ecd & vcpu->arch.sie_block->ecd & ECD_ECC;
329
+ if (!ecb3_flags && !ecd_flags)
330
+ goto end;
174331
175332 /* copy only the wrapping keys */
176333 if (read_guest_real(vcpu, crycb_addr + 72,
....@@ -178,8 +335,7 @@
178335 return set_validity_icpt(scb_s, 0x0035U);
179336
180337 scb_s->ecb3 |= ecb3_flags;
181
- scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT1 |
182
- CRYCB_FORMAT2;
338
+ scb_s->ecd |= ecd_flags;
183339
184340 /* xor both blocks in one run */
185341 b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask;
....@@ -187,6 +343,16 @@
187343 vcpu->kvm->arch.crypto.crycb->dea_wrapping_key_mask;
188344 /* as 56%8 == 0, bitmap_xor won't overwrite any data */
189345 bitmap_xor(b1, b1, b2, BITS_PER_BYTE * 56);
346
+end:
347
+ switch (ret) {
348
+ case -EINVAL:
349
+ return set_validity_icpt(scb_s, 0x0022U);
350
+ case -EFAULT:
351
+ return set_validity_icpt(scb_s, 0x0035U);
352
+ case -EACCES:
353
+ return set_validity_icpt(scb_s, 0x003CU);
354
+ }
355
+ scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT2;
190356 return 0;
191357 }
192358
....@@ -251,11 +417,6 @@
251417 case ICPT_EXTINT:
252418 memcpy((void *)((u64)scb_o + 0xc0),
253419 (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
254
- break;
255
- case ICPT_PARTEXEC:
256
- /* MVPG only */
257
- memcpy((void *)((u64)scb_o + 0xc0),
258
- (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
259420 break;
260421 }
261422
....@@ -376,12 +537,17 @@
376537 if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
377538 scb_s->eca |= scb_o->eca & ECA_CEI;
378539 /* Epoch Extension */
379
- if (test_kvm_facility(vcpu->kvm, 139))
540
+ if (test_kvm_facility(vcpu->kvm, 139)) {
380541 scb_s->ecd |= scb_o->ecd & ECD_MEF;
542
+ scb_s->epdx = scb_o->epdx;
543
+ }
381544
382545 /* etoken */
383546 if (test_kvm_facility(vcpu->kvm, 156))
384547 scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
548
+
549
+ scb_s->hpid = HPID_VSIE;
550
+ scb_s->cpnc = scb_o->cpnc;
385551
386552 prepare_ibc(vcpu, vsie_page);
387553 rc = shadow_crycb(vcpu, vsie_page);
....@@ -452,10 +618,10 @@
452618 /* with mso/msl, the prefix lies at offset *mso* */
453619 prefix += scb_s->mso;
454620
455
- rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
621
+ rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL);
456622 if (!rc && (scb_s->ecb & ECB_TE))
457623 rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
458
- prefix + PAGE_SIZE);
624
+ prefix + PAGE_SIZE, NULL);
459625 /*
460626 * We don't have to mprotect, we will be called for all unshadows.
461627 * SIE will detect if protection applies and trigger a validity.
....@@ -746,7 +912,7 @@
746912 current->thread.gmap_addr, 1);
747913
748914 rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
749
- current->thread.gmap_addr);
915
+ current->thread.gmap_addr, NULL);
750916 if (rc > 0) {
751917 rc = inject_fault(vcpu, rc,
752918 current->thread.gmap_addr,
....@@ -768,7 +934,7 @@
768934 {
769935 if (vsie_page->fault_addr)
770936 kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
771
- vsie_page->fault_addr);
937
+ vsie_page->fault_addr, NULL);
772938 vsie_page->fault_addr = 0;
773939 }
774940
....@@ -816,6 +982,98 @@
816982 }
817983
818984 /*
985
+ * Get a register for a nested guest.
986
+ * @vcpu the vcpu of the guest
987
+ * @vsie_page the vsie_page for the nested guest
988
+ * @reg the register number, the upper 4 bits are ignored.
989
+ * returns: the value of the register.
990
+ */
991
+static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, u8 reg)
992
+{
993
+ /* no need to validate the parameter and/or perform error handling */
994
+ reg &= 0xf;
995
+ switch (reg) {
996
+ case 15:
997
+ return vsie_page->scb_s.gg15;
998
+ case 14:
999
+ return vsie_page->scb_s.gg14;
1000
+ default:
1001
+ return vcpu->run->s.regs.gprs[reg];
1002
+ }
1003
+}
1004
+
1005
+static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
1006
+{
1007
+ struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
1008
+ unsigned long pei_dest, pei_src, src, dest, mask, prefix;
1009
+ u64 *pei_block = &vsie_page->scb_o->mcic;
1010
+ int edat, rc_dest, rc_src;
1011
+ union ctlreg0 cr0;
1012
+
1013
+ cr0.val = vcpu->arch.sie_block->gcr[0];
1014
+ edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
1015
+ mask = _kvm_s390_logical_to_effective(&scb_s->gpsw, PAGE_MASK);
1016
+ prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
1017
+
1018
+ dest = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 20) & mask;
1019
+ dest = _kvm_s390_real_to_abs(prefix, dest) + scb_s->mso;
1020
+ src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 16) & mask;
1021
+ src = _kvm_s390_real_to_abs(prefix, src) + scb_s->mso;
1022
+
1023
+ rc_dest = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, dest, &pei_dest);
1024
+ rc_src = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, src, &pei_src);
1025
+ /*
1026
+ * Either everything went well, or something non-critical went wrong
1027
+ * e.g. because of a race. In either case, simply retry.
1028
+ */
1029
+ if (rc_dest == -EAGAIN || rc_src == -EAGAIN || (!rc_dest && !rc_src)) {
1030
+ retry_vsie_icpt(vsie_page);
1031
+ return -EAGAIN;
1032
+ }
1033
+ /* Something more serious went wrong, propagate the error */
1034
+ if (rc_dest < 0)
1035
+ return rc_dest;
1036
+ if (rc_src < 0)
1037
+ return rc_src;
1038
+
1039
+ /* The only possible suppressing exception: just deliver it */
1040
+ if (rc_dest == PGM_TRANSLATION_SPEC || rc_src == PGM_TRANSLATION_SPEC) {
1041
+ clear_vsie_icpt(vsie_page);
1042
+ rc_dest = kvm_s390_inject_program_int(vcpu, PGM_TRANSLATION_SPEC);
1043
+ WARN_ON_ONCE(rc_dest);
1044
+ return 1;
1045
+ }
1046
+
1047
+ /*
1048
+ * Forward the PEI intercept to the guest if it was a page fault, or
1049
+ * also for segment and region table faults if EDAT applies.
1050
+ */
1051
+ if (edat) {
1052
+ rc_dest = rc_dest == PGM_ASCE_TYPE ? rc_dest : 0;
1053
+ rc_src = rc_src == PGM_ASCE_TYPE ? rc_src : 0;
1054
+ } else {
1055
+ rc_dest = rc_dest != PGM_PAGE_TRANSLATION ? rc_dest : 0;
1056
+ rc_src = rc_src != PGM_PAGE_TRANSLATION ? rc_src : 0;
1057
+ }
1058
+ if (!rc_dest && !rc_src) {
1059
+ pei_block[0] = pei_dest;
1060
+ pei_block[1] = pei_src;
1061
+ return 1;
1062
+ }
1063
+
1064
+ retry_vsie_icpt(vsie_page);
1065
+
1066
+ /*
1067
+ * The host has edat, and the guest does not, or it was an ASCE type
1068
+ * exception. The host needs to inject the appropriate DAT interrupts
1069
+ * into the guest.
1070
+ */
1071
+ if (rc_dest)
1072
+ return inject_fault(vcpu, rc_dest, dest, 1);
1073
+ return inject_fault(vcpu, rc_src, src, 0);
1074
+}
1075
+
1076
+/*
8191077 * Run the vsie on a shadow scb and a shadow gmap, without any further
8201078 * sanity checks, handling SIE faults.
8211079 *
....@@ -830,14 +1088,9 @@
8301088 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
8311089 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
8321090 int guest_bp_isolation;
833
- int rc;
1091
+ int rc = 0;
8341092
8351093 handle_last_fault(vcpu, vsie_page);
836
-
837
- if (need_resched())
838
- schedule();
839
- if (test_cpu_flag(CIF_MCCK_PENDING))
840
- s390_handle_mcck();
8411094
8421095 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
8431096
....@@ -858,7 +1111,18 @@
8581111 guest_enter_irqoff();
8591112 local_irq_enable();
8601113
861
- rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
1114
+ /*
1115
+ * Simulate a SIE entry of the VCPU (see sie64a), so VCPU blocking
1116
+ * and VCPU requests also hinder the vSIE from running and lead
1117
+ * to an immediate exit. kvm_s390_vsie_kick() has to be used to
1118
+ * also kick the vSIE.
1119
+ */
1120
+ vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
1121
+ barrier();
1122
+ if (!kvm_s390_vcpu_sie_inhibited(vcpu))
1123
+ rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
1124
+ barrier();
1125
+ vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
8621126
8631127 local_irq_disable();
8641128 guest_exit_irqoff();
....@@ -894,6 +1158,10 @@
8941158 case ICPT_VALIDITY:
8951159 if ((scb_s->ipa & 0xf000) != 0xf000)
8961160 scb_s->ipa += 0x1000;
1161
+ break;
1162
+ case ICPT_PARTEXEC:
1163
+ if (scb_s->ipa == 0xb254)
1164
+ rc = vsie_handle_mvpg(vcpu, vsie_page);
8971165 break;
8981166 }
8991167 return rc;
....@@ -1005,8 +1273,10 @@
10051273 if (rc == -EAGAIN)
10061274 rc = 0;
10071275 if (rc || scb_s->icptcode || signal_pending(current) ||
1008
- kvm_s390_vcpu_has_irq(vcpu, 0))
1276
+ kvm_s390_vcpu_has_irq(vcpu, 0) ||
1277
+ kvm_s390_vcpu_sie_inhibited(vcpu))
10091278 break;
1279
+ cond_resched();
10101280 }
10111281
10121282 if (rc == -EFAULT) {
....@@ -1123,7 +1393,8 @@
11231393 if (unlikely(scb_addr & 0x1ffUL))
11241394 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
11251395
1126
- if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0))
1396
+ if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0) ||
1397
+ kvm_s390_vcpu_sie_inhibited(vcpu))
11271398 return 0;
11281399
11291400 vsie_page = get_vsie_page(vcpu->kvm, scb_addr);