forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-11 04dd17822334871b23ea2862f7798fb0e0007777
kernel/arch/powerpc/kvm/book3s_hv_rmhandlers.S
....@@ -1,12 +1,5 @@
1
+/* SPDX-License-Identifier: GPL-2.0-only */
12 /*
2
- * This program is free software; you can redistribute it and/or modify
3
- * it under the terms of the GNU General Public License, version 2, as
4
- * published by the Free Software Foundation.
5
- *
6
- * This program is distributed in the hope that it will be useful,
7
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
8
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9
- * GNU General Public License for more details.
103 *
114 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
125 *
....@@ -29,12 +22,15 @@
2922 #include <asm/exception-64s.h>
3023 #include <asm/kvm_book3s_asm.h>
3124 #include <asm/book3s/64/mmu-hash.h>
25
+#include <asm/export.h>
3226 #include <asm/tm.h>
3327 #include <asm/opal.h>
3428 #include <asm/xive-regs.h>
3529 #include <asm/thread_info.h>
3630 #include <asm/asm-compat.h>
3731 #include <asm/feature-fixups.h>
32
+#include <asm/cpuidle.h>
33
+#include <asm/ultravisor-api.h>
3834
3935 /* Sign-extend HDEC if not on POWER9 */
4036 #define EXTEND_HDEC(reg) \
....@@ -45,10 +41,12 @@
4541 /* Values in HSTATE_NAPPING(r13) */
4642 #define NAPPING_CEDE 1
4743 #define NAPPING_NOVCPU 2
44
+#define NAPPING_UNSPLIT 3
4845
4946 /* Stack frame offsets for kvmppc_hv_entry */
50
-#define SFS 160
47
+#define SFS 208
5148 #define STACK_SLOT_TRAP (SFS-4)
49
+#define STACK_SLOT_SHORT_PATH (SFS-8)
5250 #define STACK_SLOT_TID (SFS-16)
5351 #define STACK_SLOT_PSSCR (SFS-24)
5452 #define STACK_SLOT_PID (SFS-32)
....@@ -59,6 +57,8 @@
5957 #define STACK_SLOT_HFSCR (SFS-72)
6058 #define STACK_SLOT_AMR (SFS-80)
6159 #define STACK_SLOT_UAMOR (SFS-88)
60
+/* the following is used by the P9 short path */
61
+#define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */
6262
6363 /*
6464 * Call kvmppc_hv_entry in real mode.
....@@ -116,45 +116,7 @@
116116 mtspr SPRN_SPRG_VDSO_WRITE,r3
117117
118118 /* Reload the host's PMU registers */
119
- lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
120
- cmpwi r4, 0
121
- beq 23f /* skip if not */
122
-BEGIN_FTR_SECTION
123
- ld r3, HSTATE_MMCR0(r13)
124
- andi. r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
125
- cmpwi r4, MMCR0_PMAO
126
- beql kvmppc_fix_pmao
127
-END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
128
- lwz r3, HSTATE_PMC1(r13)
129
- lwz r4, HSTATE_PMC2(r13)
130
- lwz r5, HSTATE_PMC3(r13)
131
- lwz r6, HSTATE_PMC4(r13)
132
- lwz r8, HSTATE_PMC5(r13)
133
- lwz r9, HSTATE_PMC6(r13)
134
- mtspr SPRN_PMC1, r3
135
- mtspr SPRN_PMC2, r4
136
- mtspr SPRN_PMC3, r5
137
- mtspr SPRN_PMC4, r6
138
- mtspr SPRN_PMC5, r8
139
- mtspr SPRN_PMC6, r9
140
- ld r3, HSTATE_MMCR0(r13)
141
- ld r4, HSTATE_MMCR1(r13)
142
- ld r5, HSTATE_MMCRA(r13)
143
- ld r6, HSTATE_SIAR(r13)
144
- ld r7, HSTATE_SDAR(r13)
145
- mtspr SPRN_MMCR1, r4
146
- mtspr SPRN_MMCRA, r5
147
- mtspr SPRN_SIAR, r6
148
- mtspr SPRN_SDAR, r7
149
-BEGIN_FTR_SECTION
150
- ld r8, HSTATE_MMCR2(r13)
151
- ld r9, HSTATE_SIER(r13)
152
- mtspr SPRN_MMCR2, r8
153
- mtspr SPRN_SIER, r9
154
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
155
- mtspr SPRN_MMCR0, r3
156
- isync
157
-23:
119
+ bl kvmhv_load_host_pmu
158120
159121 /*
160122 * Reload DEC. HDEC interrupts were disabled when
....@@ -325,17 +287,22 @@
325287 b kvmhv_switch_to_host
326288
327289 /*
328
- * We come in here when wakened from nap mode.
329
- * Relocation is off and most register values are lost.
330
- * r13 points to the PACA.
290
+ * We come in here when wakened from Linux offline idle code.
291
+ * Relocation is off
331292 * r3 contains the SRR1 wakeup value, SRR1 is trashed.
332293 */
333
- .globl kvm_start_guest
334
-kvm_start_guest:
335
- /* Set runlatch bit the minute you wake up from nap */
336
- mfspr r0, SPRN_CTRLF
337
- ori r0, r0, 1
338
- mtspr SPRN_CTRLT, r0
294
+_GLOBAL(idle_kvm_start_guest)
295
+ mfcr r5
296
+ mflr r0
297
+ std r5, 8(r1) // Save CR in caller's frame
298
+ std r0, 16(r1) // Save LR in caller's frame
299
+ // Create frame on emergency stack
300
+ ld r4, PACAEMERGSP(r13)
301
+ stdu r1, -SWITCH_FRAME_SIZE(r4)
302
+ // Switch to new frame on emergency stack
303
+ mr r1, r4
304
+ std r3, 32(r1) // Save SRR1 wakeup value
305
+ SAVE_NVGPRS(r1)
339306
340307 /*
341308 * Could avoid this and pass it through in r3. For now,
....@@ -343,27 +310,23 @@
343310 */
344311 mtspr SPRN_SRR1,r3
345312
346
- ld r2,PACATOC(r13)
347
-
348313 li r0,0
349314 stb r0,PACA_FTRACE_ENABLED(r13)
350315
351316 li r0,KVM_HWTHREAD_IN_KVM
352317 stb r0,HSTATE_HWTHREAD_STATE(r13)
353318
354
- /* NV GPR values from power7_idle() will no longer be valid */
355
- li r0,1
356
- stb r0,PACA_NAPSTATELOST(r13)
357
-
358
- /* were we napping due to cede? */
319
+ /* kvm cede / napping does not come through here */
359320 lbz r0,HSTATE_NAPPING(r13)
360
- cmpwi r0,NAPPING_CEDE
361
- beq kvm_end_cede
362
- cmpwi r0,NAPPING_NOVCPU
363
- beq kvm_novcpu_wakeup
321
+ twnei r0,0
364322
365
- ld r1,PACAEMERGSP(r13)
366
- subi r1,r1,STACK_FRAME_OVERHEAD
323
+ b 1f
324
+
325
+kvm_unsplit_wakeup:
326
+ li r0, 0
327
+ stb r0, HSTATE_NAPPING(r13)
328
+
329
+1:
367330
368331 /*
369332 * We weren't napping due to cede, so this must be a secondary
....@@ -389,6 +352,10 @@
389352 beq kvm_no_guest
390353
391354 kvm_secondary_got_guest:
355
+
356
+ // About to go to guest, clear saved SRR1
357
+ li r0, 0
358
+ std r0, 32(r1)
392359
393360 /* Set HSTATE_DSCR(r13) to something sensible */
394361 ld r6, PACA_DSCR_DEFAULT(r13)
....@@ -472,19 +439,24 @@
472439 lbz r3, HSTATE_HWTHREAD_REQ(r13)
473440 cmpwi r3, 0
474441 bne 54f
475
-/*
476
- * We jump to pnv_wakeup_loss, which will return to the caller
477
- * of power7_nap in the powernv cpu offline loop. The value we
478
- * put in r3 becomes the return value for power7_nap. pnv_wakeup_loss
479
- * requires SRR1 in r12.
480
- */
442
+
443
+ /*
444
+ * Jump to idle_return_gpr_loss, which returns to the
445
+ * idle_kvm_start_guest caller.
446
+ */
481447 li r3, LPCR_PECE0
482448 mfspr r4, SPRN_LPCR
483449 rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
484450 mtspr SPRN_LPCR, r4
485
- li r3, 0
486
- mfspr r12,SPRN_SRR1
487
- b pnv_wakeup_loss
451
+ // Return SRR1 wakeup value, or 0 if we went into the guest
452
+ ld r3, 32(r1)
453
+ REST_NVGPRS(r1)
454
+ ld r1, 0(r1) // Switch back to caller stack
455
+ ld r0, 16(r1) // Reload LR
456
+ ld r5, 8(r1) // Reload CR
457
+ mtlr r0
458
+ mtcr r5
459
+ blr
488460
489461 53: HMT_LOW
490462 ld r5, HSTATE_KVM_VCORE(r13)
....@@ -569,6 +541,8 @@
569541 lbz r0, KVM_SPLIT_DO_NAP(r3)
570542 cmpwi r0, 0
571543 beq 57f
544
+ li r3, NAPPING_UNSPLIT
545
+ stb r3, HSTATE_NAPPING(r13)
572546 li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
573547 mfspr r5, SPRN_LPCR
574548 rlwimi r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
....@@ -616,11 +590,8 @@
616590 1:
617591 #endif
618592
619
- /* Use cr7 as an indication of radix mode */
620593 ld r5, HSTATE_KVM_VCORE(r13)
621594 ld r9, VCORE_KVM(r5) /* pointer to struct kvm */
622
- lbz r0, KVM_RADIX(r9)
623
- cmpwi cr7, r0, 0
624595
625596 /*
626597 * POWER7/POWER8 host -> guest partition switch code.
....@@ -643,9 +614,6 @@
643614 cmpwi r6,0
644615 bne 10f
645616
646
- /* Radix has already switched LPID and flushed core TLB */
647
- bne cr7, 22f
648
-
649617 lwz r7,KVM_LPID(r9)
650618 BEGIN_FTR_SECTION
651619 ld r6,KVM_SDR1(r9)
....@@ -657,41 +625,13 @@
657625 mtspr SPRN_LPID,r7
658626 isync
659627
660
- /* See if we need to flush the TLB. Hash has to be done in RM */
661
- lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */
662
-BEGIN_FTR_SECTION
663
- /*
664
- * On POWER9, individual threads can come in here, but the
665
- * TLB is shared between the 4 threads in a core, hence
666
- * invalidating on one thread invalidates for all.
667
- * Thus we make all 4 threads use the same bit here.
668
- */
669
- clrrdi r6,r6,2
670
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
671
- clrldi r7,r6,64-6 /* extract bit number (6 bits) */
672
- srdi r6,r6,6 /* doubleword number */
673
- sldi r6,r6,3 /* address offset */
674
- add r6,r6,r9
675
- addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */
676
- li r8,1
677
- sld r8,r8,r7
678
- ld r7,0(r6)
679
- and. r7,r7,r8
680
- beq 22f
681
- /* Flush the TLB of any entries for this LPID */
682
- lwz r0,KVM_TLB_SETS(r9)
683
- mtctr r0
684
- li r7,0x800 /* IS field = 0b10 */
685
- ptesync
686
- li r0,0 /* RS for P9 version of tlbiel */
687
-28: tlbiel r7 /* On P9, rs=0, RIC=0, PRS=0, R=0 */
688
- addi r7,r7,0x1000
689
- bdnz 28b
690
- ptesync
691
-23: ldarx r7,0,r6 /* clear the bit after TLB flushed */
692
- andc r7,r7,r8
693
- stdcx. r7,0,r6
694
- bne 23b
628
+ /* See if we need to flush the TLB. */
629
+ mr r3, r9 /* kvm pointer */
630
+ lhz r4, PACAPACAINDEX(r13) /* physical cpu number */
631
+ li r5, 0 /* nested vcpu pointer */
632
+ bl kvmppc_check_need_tlb_flush
633
+ nop
634
+ ld r5, HSTATE_KVM_VCORE(r13)
695635
696636 /* Add timebase offset onto timebase */
697637 22: ld r8,VCORE_TB_OFFSET(r5)
....@@ -711,8 +651,10 @@
711651
712652 /* Load guest PCR value to select appropriate compat mode */
713653 37: ld r7, VCORE_PCR(r5)
714
- cmpdi r7, 0
654
+ LOAD_REG_IMMEDIATE(r6, PCR_MASK)
655
+ cmpld r7, r6
715656 beq 38f
657
+ or r7, r7, r6
716658 mtspr SPRN_PCR, r7
717659 38:
718660
....@@ -771,8 +713,8 @@
771713 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
772714 BEGIN_FTR_SECTION
773715 mfspr r5, SPRN_CIABR
774
- mfspr r6, SPRN_DAWR
775
- mfspr r7, SPRN_DAWRX
716
+ mfspr r6, SPRN_DAWR0
717
+ mfspr r7, SPRN_DAWRX0
776718 mfspr r8, SPRN_IAMR
777719 std r5, STACK_SLOT_CIABR(r1)
778720 std r6, STACK_SLOT_DAWR(r1)
....@@ -804,66 +746,23 @@
804746 b 91f
805747 END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
806748 /*
807
- * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
749
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
808750 */
809751 mr r3, r4
810752 ld r4, VCPU_MSR(r3)
753
+ li r5, 0 /* don't preserve non-vol regs */
811754 bl kvmppc_restore_tm_hv
755
+ nop
812756 ld r4, HSTATE_KVM_VCPU(r13)
813757 91:
814758 #endif
815759
816
- /* Load guest PMU registers */
817
- /* R4 is live here (vcpu pointer) */
818
- li r3, 1
819
- sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
820
- mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
821
- isync
822
-BEGIN_FTR_SECTION
823
- ld r3, VCPU_MMCR(r4)
824
- andi. r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
825
- cmpwi r5, MMCR0_PMAO
826
- beql kvmppc_fix_pmao
827
-END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
828
- lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */
829
- lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */
830
- lwz r6, VCPU_PMC + 8(r4)
831
- lwz r7, VCPU_PMC + 12(r4)
832
- lwz r8, VCPU_PMC + 16(r4)
833
- lwz r9, VCPU_PMC + 20(r4)
834
- mtspr SPRN_PMC1, r3
835
- mtspr SPRN_PMC2, r5
836
- mtspr SPRN_PMC3, r6
837
- mtspr SPRN_PMC4, r7
838
- mtspr SPRN_PMC5, r8
839
- mtspr SPRN_PMC6, r9
840
- ld r3, VCPU_MMCR(r4)
841
- ld r5, VCPU_MMCR + 8(r4)
842
- ld r6, VCPU_MMCR + 16(r4)
843
- ld r7, VCPU_SIAR(r4)
844
- ld r8, VCPU_SDAR(r4)
845
- mtspr SPRN_MMCR1, r5
846
- mtspr SPRN_MMCRA, r6
847
- mtspr SPRN_SIAR, r7
848
- mtspr SPRN_SDAR, r8
849
-BEGIN_FTR_SECTION
850
- ld r5, VCPU_MMCR + 24(r4)
851
- ld r6, VCPU_SIER(r4)
852
- mtspr SPRN_MMCR2, r5
853
- mtspr SPRN_SIER, r6
854
-BEGIN_FTR_SECTION_NESTED(96)
855
- lwz r7, VCPU_PMC + 24(r4)
856
- lwz r8, VCPU_PMC + 28(r4)
857
- ld r9, VCPU_MMCR + 32(r4)
858
- mtspr SPRN_SPMC1, r7
859
- mtspr SPRN_SPMC2, r8
860
- mtspr SPRN_MMCRS, r9
861
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
862
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
863
- mtspr SPRN_MMCR0, r3
864
- isync
760
+ /* Load guest PMU registers; r4 = vcpu pointer here */
761
+ mr r3, r4
762
+ bl kvmhv_load_guest_pmu
865763
866764 /* Load up FP, VMX and VSX registers */
765
+ ld r4, HSTATE_KVM_VCPU(r13)
867766 bl kvmppc_load_fp
868767
869768 ld r14, VCPU_GPR(R14)(r4)
....@@ -900,18 +799,21 @@
900799 mtspr SPRN_IAMR, r5
901800 mtspr SPRN_PSPB, r6
902801 mtspr SPRN_FSCR, r7
903
- ld r5, VCPU_DAWR(r4)
904
- ld r6, VCPU_DAWRX(r4)
905
- ld r7, VCPU_CIABR(r4)
906
- ld r8, VCPU_TAR(r4)
907802 /*
908803 * Handle broken DAWR case by not writing it. This means we
909804 * can still store the DAWR register for migration.
910805 */
911
-BEGIN_FTR_SECTION
912
- mtspr SPRN_DAWR, r5
913
- mtspr SPRN_DAWRX, r6
914
-END_FTR_SECTION_IFSET(CPU_FTR_DAWR)
806
+ LOAD_REG_ADDR(r5, dawr_force_enable)
807
+ lbz r5, 0(r5)
808
+ cmpdi r5, 0
809
+ beq 1f
810
+ ld r5, VCPU_DAWR(r4)
811
+ ld r6, VCPU_DAWRX(r4)
812
+ mtspr SPRN_DAWR0, r5
813
+ mtspr SPRN_DAWRX0, r6
814
+1:
815
+ ld r7, VCPU_CIABR(r4)
816
+ ld r8, VCPU_TAR(r4)
915817 mtspr SPRN_CIABR, r7
916818 mtspr SPRN_TAR, r8
917819 ld r5, VCPU_IC(r4)
....@@ -1047,17 +949,29 @@
1047949
1048950 #ifdef CONFIG_KVM_XICS
1049951 /* We are entering the guest on that thread, push VCPU to XIVE */
1050
- ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
1051
- cmpldi cr0, r10, 0
1052
- beq no_xive
1053952 ld r11, VCPU_XIVE_SAVED_STATE(r4)
1054953 li r9, TM_QW1_OS
954
+ lwz r8, VCPU_XIVE_CAM_WORD(r4)
955
+ cmpwi r8, 0
956
+ beq no_xive
957
+ li r7, TM_QW1_OS + TM_WORD2
958
+ mfmsr r0
959
+ andi. r0, r0, MSR_DR /* in real mode? */
960
+ beq 2f
961
+ ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
962
+ cmpldi cr1, r10, 0
963
+ beq cr1, no_xive
964
+ eieio
965
+ stdx r11,r9,r10
966
+ stwx r8,r7,r10
967
+ b 3f
968
+2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
969
+ cmpldi cr1, r10, 0
970
+ beq cr1, no_xive
1055971 eieio
1056972 stdcix r11,r9,r10
1057
- lwz r11, VCPU_XIVE_CAM_WORD(r4)
1058
- li r9, TM_QW1_OS + TM_WORD2
1059
- stwcix r11,r9,r10
1060
- li r9, 1
973
+ stwcix r8,r7,r10
974
+3: li r9, 1
1061975 stb r9, VCPU_XIVE_PUSHED(r4)
1062976 eieio
1063977
....@@ -1076,12 +990,16 @@
1076990 * on, we mask it.
1077991 */
1078992 lbz r0, VCPU_XIVE_ESC_ON(r4)
1079
- cmpwi r0,0
1080
- beq 1f
1081
- ld r10, VCPU_XIVE_ESC_RADDR(r4)
993
+ cmpwi cr1, r0,0
994
+ beq cr1, 1f
1082995 li r9, XIVE_ESB_SET_PQ_01
996
+ beq 4f /* in real mode? */
997
+ ld r10, VCPU_XIVE_ESC_VADDR(r4)
998
+ ldx r0, r10, r9
999
+ b 5f
1000
+4: ld r10, VCPU_XIVE_ESC_RADDR(r4)
10831001 ldcix r0, r10, r9
1084
- sync
1002
+5: sync
10851003
10861004 /* We have a possible subtle race here: The escalation interrupt might
10871005 * have fired and be on its way to the host queue while we mask it,
....@@ -1108,73 +1026,40 @@
11081026 no_xive:
11091027 #endif /* CONFIG_KVM_XICS */
11101028
1111
-deliver_guest_interrupt:
1112
- ld r6, VCPU_CTR(r4)
1113
- ld r7, VCPU_XER(r4)
1029
+ li r0, 0
1030
+ stw r0, STACK_SLOT_SHORT_PATH(r1)
11141031
1115
- mtctr r6
1116
- mtxer r7
1117
-
1118
-kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
1119
- ld r10, VCPU_PC(r4)
1120
- ld r11, VCPU_MSR(r4)
1032
+deliver_guest_interrupt: /* r4 = vcpu, r13 = paca */
1033
+ /* Check if we can deliver an external or decrementer interrupt now */
1034
+ ld r0, VCPU_PENDING_EXC(r4)
1035
+BEGIN_FTR_SECTION
1036
+ /* On POWER9, also check for emulated doorbell interrupt */
1037
+ lbz r3, VCPU_DBELL_REQ(r4)
1038
+ or r0, r0, r3
1039
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1040
+ cmpdi r0, 0
1041
+ beq 71f
1042
+ mr r3, r4
1043
+ bl kvmppc_guest_entry_inject_int
1044
+ ld r4, HSTATE_KVM_VCPU(r13)
1045
+71:
11211046 ld r6, VCPU_SRR0(r4)
11221047 ld r7, VCPU_SRR1(r4)
11231048 mtspr SPRN_SRR0, r6
11241049 mtspr SPRN_SRR1, r7
11251050
1051
+fast_guest_entry_c:
1052
+ ld r10, VCPU_PC(r4)
1053
+ ld r11, VCPU_MSR(r4)
11261054 /* r11 = vcpu->arch.msr & ~MSR_HV */
11271055 rldicl r11, r11, 63 - MSR_HV_LG, 1
11281056 rotldi r11, r11, 1 + MSR_HV_LG
11291057 ori r11, r11, MSR_ME
11301058
1131
- /* Check if we can deliver an external or decrementer interrupt now */
1132
- ld r0, VCPU_PENDING_EXC(r4)
1133
- rldicl r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
1134
- cmpdi cr1, r0, 0
1135
- andi. r8, r11, MSR_EE
1136
- mfspr r8, SPRN_LPCR
1137
- /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
1138
- rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
1139
- mtspr SPRN_LPCR, r8
1140
- isync
1141
- beq 5f
1142
- li r0, BOOK3S_INTERRUPT_EXTERNAL
1143
- bne cr1, 12f
1144
- mfspr r0, SPRN_DEC
1145
-BEGIN_FTR_SECTION
1146
- /* On POWER9 check whether the guest has large decrementer enabled */
1147
- andis. r8, r8, LPCR_LD@h
1148
- bne 15f
1149
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1150
- extsw r0, r0
1151
-15: cmpdi r0, 0
1152
- li r0, BOOK3S_INTERRUPT_DECREMENTER
1153
- bge 5f
1154
-
1155
-12: mtspr SPRN_SRR0, r10
1156
- mr r10,r0
1157
- mtspr SPRN_SRR1, r11
1158
- mr r9, r4
1159
- bl kvmppc_msr_interrupt
1160
-5:
1161
-BEGIN_FTR_SECTION
1162
- b fast_guest_return
1163
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
1164
- /* On POWER9, check for pending doorbell requests */
1165
- lbz r0, VCPU_DBELL_REQ(r4)
1166
- cmpwi r0, 0
1167
- beq fast_guest_return
1168
- ld r5, HSTATE_KVM_VCORE(r13)
1169
- /* Set DPDES register so the CPU will take a doorbell interrupt */
1170
- li r0, 1
1171
- mtspr SPRN_DPDES, r0
1172
- std r0, VCORE_DPDES(r5)
1173
- /* Make sure other cpus see vcore->dpdes set before dbell req clear */
1174
- lwsync
1175
- /* Clear the pending doorbell request */
1176
- li r0, 0
1177
- stb r0, VCPU_DBELL_REQ(r4)
1059
+ ld r6, VCPU_CTR(r4)
1060
+ ld r7, VCPU_XER(r4)
1061
+ mtctr r6
1062
+ mtxer r7
11781063
11791064 /*
11801065 * Required state:
....@@ -1210,16 +1095,10 @@
12101095 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
12111096
12121097 ld r5, VCPU_LR(r4)
1213
- ld r6, VCPU_CR(r4)
12141098 mtlr r5
1215
- mtcr r6
12161099
12171100 ld r1, VCPU_GPR(R1)(r4)
1218
- ld r2, VCPU_GPR(R2)(r4)
1219
- ld r3, VCPU_GPR(R3)(r4)
12201101 ld r5, VCPU_GPR(R5)(r4)
1221
- ld r6, VCPU_GPR(R6)(r4)
1222
- ld r7, VCPU_GPR(R7)(r4)
12231102 ld r8, VCPU_GPR(R8)(r4)
12241103 ld r9, VCPU_GPR(R9)(r4)
12251104 ld r10, VCPU_GPR(R10)(r4)
....@@ -1237,9 +1116,118 @@
12371116 mtspr SPRN_HDSISR, r0
12381117 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
12391118
1119
+ ld r6, VCPU_KVM(r4)
1120
+ lbz r7, KVM_SECURE_GUEST(r6)
1121
+ cmpdi r7, 0
1122
+ ld r6, VCPU_GPR(R6)(r4)
1123
+ ld r7, VCPU_GPR(R7)(r4)
1124
+ bne ret_to_ultra
1125
+
1126
+ ld r0, VCPU_CR(r4)
1127
+ mtcr r0
1128
+
12401129 ld r0, VCPU_GPR(R0)(r4)
1130
+ ld r2, VCPU_GPR(R2)(r4)
1131
+ ld r3, VCPU_GPR(R3)(r4)
12411132 ld r4, VCPU_GPR(R4)(r4)
12421133 HRFI_TO_GUEST
1134
+ b .
1135
+/*
1136
+ * Use UV_RETURN ultracall to return control back to the Ultravisor after
1137
+ * processing an hypercall or interrupt that was forwarded (a.k.a. reflected)
1138
+ * to the Hypervisor.
1139
+ *
1140
+ * All registers have already been loaded, except:
1141
+ * R0 = hcall result
1142
+ * R2 = SRR1, so UV can detect a synthesized interrupt (if any)
1143
+ * R3 = UV_RETURN
1144
+ */
1145
+ret_to_ultra:
1146
+ ld r0, VCPU_CR(r4)
1147
+ mtcr r0
1148
+
1149
+ ld r0, VCPU_GPR(R3)(r4)
1150
+ mfspr r2, SPRN_SRR1
1151
+ li r3, 0
1152
+ ori r3, r3, UV_RETURN
1153
+ ld r4, VCPU_GPR(R4)(r4)
1154
+ sc 2
1155
+
1156
+/*
1157
+ * Enter the guest on a P9 or later system where we have exactly
1158
+ * one vcpu per vcore and we don't need to go to real mode
1159
+ * (which implies that host and guest are both using radix MMU mode).
1160
+ * r3 = vcpu pointer
1161
+ * Most SPRs and all the VSRs have been loaded already.
1162
+ */
1163
+_GLOBAL(__kvmhv_vcpu_entry_p9)
1164
+EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
1165
+ mflr r0
1166
+ std r0, PPC_LR_STKOFF(r1)
1167
+ stdu r1, -SFS(r1)
1168
+
1169
+ li r0, 1
1170
+ stw r0, STACK_SLOT_SHORT_PATH(r1)
1171
+
1172
+ std r3, HSTATE_KVM_VCPU(r13)
1173
+ mfcr r4
1174
+ stw r4, SFS+8(r1)
1175
+
1176
+ std r1, HSTATE_HOST_R1(r13)
1177
+
1178
+ reg = 14
1179
+ .rept 18
1180
+ std reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
1181
+ reg = reg + 1
1182
+ .endr
1183
+
1184
+ reg = 14
1185
+ .rept 18
1186
+ ld reg, __VCPU_GPR(reg)(r3)
1187
+ reg = reg + 1
1188
+ .endr
1189
+
1190
+ mfmsr r10
1191
+ std r10, HSTATE_HOST_MSR(r13)
1192
+
1193
+ mr r4, r3
1194
+ b fast_guest_entry_c
1195
+guest_exit_short_path:
1196
+
1197
+ li r0, KVM_GUEST_MODE_NONE
1198
+ stb r0, HSTATE_IN_GUEST(r13)
1199
+
1200
+ reg = 14
1201
+ .rept 18
1202
+ std reg, __VCPU_GPR(reg)(r9)
1203
+ reg = reg + 1
1204
+ .endr
1205
+
1206
+ reg = 14
1207
+ .rept 18
1208
+ ld reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
1209
+ reg = reg + 1
1210
+ .endr
1211
+
1212
+ lwz r4, SFS+8(r1)
1213
+ mtcr r4
1214
+
1215
+ mr r3, r12 /* trap number */
1216
+
1217
+ addi r1, r1, SFS
1218
+ ld r0, PPC_LR_STKOFF(r1)
1219
+ mtlr r0
1220
+
1221
+ /* If we are in real mode, do a rfid to get back to the caller */
1222
+ mfmsr r4
1223
+ andi. r5, r4, MSR_IR
1224
+ bnelr
1225
+ rldicl r5, r4, 64 - MSR_TS_S_LG, 62 /* extract TS field */
1226
+ mtspr SPRN_SRR0, r0
1227
+ ld r10, HSTATE_HOST_MSR(r13)
1228
+ rldimi r10, r5, MSR_TS_S_LG, 63 - MSR_TS_T_LG
1229
+ mtspr SPRN_SRR1, r10
1230
+ RFI_TO_KERNEL
12431231 b .
12441232
12451233 secondary_too_late:
....@@ -1284,7 +1272,6 @@
12841272 * R12 = (guest CR << 32) | interrupt vector
12851273 * R13 = PACA
12861274 * guest R12 saved in shadow VCPU SCRATCH0
1287
- * guest CTR saved in shadow VCPU SCRATCH1 if RELOCATABLE
12881275 * guest R13 saved in SPRN_SCRATCH0
12891276 */
12901277 std r9, HSTATE_SCRATCH2(r13)
....@@ -1385,27 +1372,30 @@
13851372 11: stw r3,VCPU_HEIR(r9)
13861373
13871374 /* these are volatile across C function calls */
1388
-#ifdef CONFIG_RELOCATABLE
1389
- ld r3, HSTATE_SCRATCH1(r13)
1390
- mtctr r3
1391
-#else
13921375 mfctr r3
1393
-#endif
13941376 mfxer r4
13951377 std r3, VCPU_CTR(r9)
13961378 std r4, VCPU_XER(r9)
1379
+
1380
+ /* Save more register state */
1381
+ mfdar r3
1382
+ mfdsisr r4
1383
+ std r3, VCPU_DAR(r9)
1384
+ stw r4, VCPU_DSISR(r9)
1385
+
1386
+ /* If this is a page table miss then see if it's theirs or ours */
1387
+ cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
1388
+ beq kvmppc_hdsi
1389
+ std r3, VCPU_FAULT_DAR(r9)
1390
+ stw r4, VCPU_FAULT_DSISR(r9)
1391
+ cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE
1392
+ beq kvmppc_hisi
13971393
13981394 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
13991395 /* For softpatch interrupt, go off and do TM instruction emulation */
14001396 cmpwi r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
14011397 beq kvmppc_tm_emul
14021398 #endif
1403
-
1404
- /* If this is a page table miss then see if it's theirs or ours */
1405
- cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
1406
- beq kvmppc_hdsi
1407
- cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE
1408
- beq kvmppc_hisi
14091399
14101400 /* See if this is a leftover HDEC interrupt */
14111401 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
....@@ -1426,10 +1416,14 @@
14261416 BEGIN_FTR_SECTION
14271417 PPC_MSGSYNC
14281418 lwsync
1419
+ /* always exit if we're running a nested guest */
1420
+ ld r0, VCPU_NESTED(r9)
1421
+ cmpdi r0, 0
1422
+ bne guest_exit_cont
14291423 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
14301424 lbz r0, HSTATE_HOST_IPI(r13)
14311425 cmpwi r0, 0
1432
- beq 4f
1426
+ beq maybe_reenter_guest
14331427 b guest_exit_cont
14341428 3:
14351429 /* If it's a hypervisor facility unavailable interrupt, save HFSCR */
....@@ -1441,82 +1435,16 @@
14411435 14:
14421436 /* External interrupt ? */
14431437 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
1444
- bne+ guest_exit_cont
1445
-
1446
- /* External interrupt, first check for host_ipi. If this is
1447
- * set, we know the host wants us out so let's do it now
1448
- */
1449
- bl kvmppc_read_intr
1450
-
1451
- /*
1452
- * Restore the active volatile registers after returning from
1453
- * a C function.
1454
- */
1455
- ld r9, HSTATE_KVM_VCPU(r13)
1456
- li r12, BOOK3S_INTERRUPT_EXTERNAL
1457
-
1458
- /*
1459
- * kvmppc_read_intr return codes:
1460
- *
1461
- * Exit to host (r3 > 0)
1462
- * 1 An interrupt is pending that needs to be handled by the host
1463
- * Exit guest and return to host by branching to guest_exit_cont
1464
- *
1465
- * 2 Passthrough that needs completion in the host
1466
- * Exit guest and return to host by branching to guest_exit_cont
1467
- * However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
1468
- * to indicate to the host to complete handling the interrupt
1469
- *
1470
- * Before returning to guest, we check if any CPU is heading out
1471
- * to the host and if so, we head out also. If no CPUs are heading
1472
- * check return values <= 0.
1473
- *
1474
- * Return to guest (r3 <= 0)
1475
- * 0 No external interrupt is pending
1476
- * -1 A guest wakeup IPI (which has now been cleared)
1477
- * In either case, we return to guest to deliver any pending
1478
- * guest interrupts.
1479
- *
1480
- * -2 A PCI passthrough external interrupt was handled
1481
- * (interrupt was delivered directly to guest)
1482
- * Return to guest to deliver any pending guest interrupts.
1483
- */
1484
-
1485
- cmpdi r3, 1
1486
- ble 1f
1487
-
1488
- /* Return code = 2 */
1489
- li r12, BOOK3S_INTERRUPT_HV_RM_HARD
1490
- stw r12, VCPU_TRAP(r9)
1491
- b guest_exit_cont
1492
-
1493
-1: /* Return code <= 1 */
1494
- cmpdi r3, 0
1495
- bgt guest_exit_cont
1496
-
1497
- /* Return code <= 0 */
1498
-4: ld r5, HSTATE_KVM_VCORE(r13)
1499
- lwz r0, VCORE_ENTRY_EXIT(r5)
1500
- cmpwi r0, 0x100
1501
- mr r4, r9
1502
- blt deliver_guest_interrupt
1503
-
1504
-guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
1505
- /* Save more register state */
1506
- mfdar r6
1507
- mfdsisr r7
1508
- std r6, VCPU_DAR(r9)
1509
- stw r7, VCPU_DSISR(r9)
1510
- /* don't overwrite fault_dar/fault_dsisr if HDSI */
1511
- cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
1512
- beq mc_cont
1513
- std r6, VCPU_FAULT_DAR(r9)
1514
- stw r7, VCPU_FAULT_DSISR(r9)
1515
-
1438
+ beq kvmppc_guest_external
15161439 /* See if it is a machine check */
15171440 cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
15181441 beq machine_check_realmode
1519
-mc_cont:
1442
+ /* Or a hypervisor maintenance interrupt */
1443
+ cmpwi r12, BOOK3S_INTERRUPT_HMI
1444
+ beq hmi_realmode
1445
+
1446
+guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
1447
+
15201448 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
15211449 addi r3, r9, VCPU_TB_RMEXIT
15221450 mr r4, r9
....@@ -1560,9 +1488,17 @@
15601488 1:
15611489 #endif /* CONFIG_KVM_XICS */
15621490
1563
- /* Possibly flush the link stack here. */
1491
+ /*
1492
+ * Possibly flush the link stack here, before we do a blr in
1493
+ * guest_exit_short_path.
1494
+ */
15641495 1: nop
15651496 patch_site 1b patch__call_kvm_flush_link_stack
1497
+
1498
+ /* If we came in through the P9 short path, go back out to C now */
1499
+ lwz r0, STACK_SLOT_SHORT_PATH(r1)
1500
+ cmpwi r0, 0
1501
+ bne guest_exit_short_path
15661502
15671503 /* For hash guest, read the guest SLB and save it away */
15681504 ld r5, VCPU_KVM(r9)
....@@ -1795,11 +1731,13 @@
17951731 b 91f
17961732 END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
17971733 /*
1798
- * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
1734
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
17991735 */
18001736 mr r3, r9
18011737 ld r4, VCPU_MSR(r3)
1738
+ li r5, 0 /* don't preserve non-vol regs */
18021739 bl kvmppc_save_tm_hv
1740
+ nop
18031741 ld r9, HSTATE_KVM_VCPU(r13)
18041742 91:
18051743 #endif
....@@ -1817,83 +1755,12 @@
18171755 25:
18181756 /* Save PMU registers if requested */
18191757 /* r8 and cr0.eq are live here */
1820
-BEGIN_FTR_SECTION
1821
- /*
1822
- * POWER8 seems to have a hardware bug where setting
1823
- * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
1824
- * when some counters are already negative doesn't seem
1825
- * to cause a performance monitor alert (and hence interrupt).
1826
- * The effect of this is that when saving the PMU state,
1827
- * if there is no PMU alert pending when we read MMCR0
1828
- * before freezing the counters, but one becomes pending
1829
- * before we read the counters, we lose it.
1830
- * To work around this, we need a way to freeze the counters
1831
- * before reading MMCR0. Normally, freezing the counters
1832
- * is done by writing MMCR0 (to set MMCR0[FC]) which
1833
- * unavoidably writes MMCR0[PMA0] as well. On POWER8,
1834
- * we can also freeze the counters using MMCR2, by writing
1835
- * 1s to all the counter freeze condition bits (there are
1836
- * 9 bits each for 6 counters).
1837
- */
1838
- li r3, -1 /* set all freeze bits */
1839
- clrrdi r3, r3, 10
1840
- mfspr r10, SPRN_MMCR2
1841
- mtspr SPRN_MMCR2, r3
1842
- isync
1843
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1844
- li r3, 1
1845
- sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
1846
- mfspr r4, SPRN_MMCR0 /* save MMCR0 */
1847
- mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
1848
- mfspr r6, SPRN_MMCRA
1849
- /* Clear MMCRA in order to disable SDAR updates */
1850
- li r7, 0
1851
- mtspr SPRN_MMCRA, r7
1852
- isync
1758
+ mr r3, r9
1759
+ li r4, 1
18531760 beq 21f /* if no VPA, save PMU stuff anyway */
1854
- lbz r7, LPPACA_PMCINUSE(r8)
1855
- cmpwi r7, 0 /* did they ask for PMU stuff to be saved? */
1856
- bne 21f
1857
- std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
1858
- b 22f
1859
-21: mfspr r5, SPRN_MMCR1
1860
- mfspr r7, SPRN_SIAR
1861
- mfspr r8, SPRN_SDAR
1862
- std r4, VCPU_MMCR(r9)
1863
- std r5, VCPU_MMCR + 8(r9)
1864
- std r6, VCPU_MMCR + 16(r9)
1865
-BEGIN_FTR_SECTION
1866
- std r10, VCPU_MMCR + 24(r9)
1867
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1868
- std r7, VCPU_SIAR(r9)
1869
- std r8, VCPU_SDAR(r9)
1870
- mfspr r3, SPRN_PMC1
1871
- mfspr r4, SPRN_PMC2
1872
- mfspr r5, SPRN_PMC3
1873
- mfspr r6, SPRN_PMC4
1874
- mfspr r7, SPRN_PMC5
1875
- mfspr r8, SPRN_PMC6
1876
- stw r3, VCPU_PMC(r9)
1877
- stw r4, VCPU_PMC + 4(r9)
1878
- stw r5, VCPU_PMC + 8(r9)
1879
- stw r6, VCPU_PMC + 12(r9)
1880
- stw r7, VCPU_PMC + 16(r9)
1881
- stw r8, VCPU_PMC + 20(r9)
1882
-BEGIN_FTR_SECTION
1883
- mfspr r5, SPRN_SIER
1884
- std r5, VCPU_SIER(r9)
1885
-BEGIN_FTR_SECTION_NESTED(96)
1886
- mfspr r6, SPRN_SPMC1
1887
- mfspr r7, SPRN_SPMC2
1888
- mfspr r8, SPRN_MMCRS
1889
- stw r6, VCPU_PMC + 24(r9)
1890
- stw r7, VCPU_PMC + 28(r9)
1891
- std r8, VCPU_MMCR + 32(r9)
1892
- lis r4, 0x8000
1893
- mtspr SPRN_MMCRS, r4
1894
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
1895
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1896
-22:
1761
+ lbz r4, LPPACA_PMCINUSE(r8)
1762
+21: bl kvmhv_save_guest_pmu
1763
+ ld r9, HSTATE_KVM_VCPU(r13)
18971764
18981765 /* Restore host values of some registers */
18991766 BEGIN_FTR_SECTION
....@@ -1905,8 +1772,8 @@
19051772 * If the DAWR doesn't work, it's ok to write these here as
19061773 * this value should always be zero
19071774 */
1908
- mtspr SPRN_DAWR, r6
1909
- mtspr SPRN_DAWRX, r7
1775
+ mtspr SPRN_DAWR0, r6
1776
+ mtspr SPRN_DAWRX0, r7
19101777 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
19111778 BEGIN_FTR_SECTION
19121779 ld r5, STACK_SLOT_TID(r1)
....@@ -1934,6 +1801,7 @@
19341801 tlbsync
19351802 ptesync
19361803
1804
+BEGIN_FTR_SECTION
19371805 /* Radix: Handle the case where the guest used an illegal PID */
19381806 LOAD_REG_ADDR(r4, mmu_base_pid)
19391807 lwz r3, VCPU_GUEST_PID(r9)
....@@ -1963,9 +1831,18 @@
19631831 addi r7,r7,0x1000
19641832 bdnz 1b
19651833 ptesync
1834
+END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG)
19661835
19671836 2:
19681837 #endif /* CONFIG_PPC_RADIX_MMU */
1838
+
1839
+ /*
1840
+ * cp_abort is required if the processor supports local copy-paste
1841
+ * to clear the copy buffer that was under control of the guest.
1842
+ */
1843
+BEGIN_FTR_SECTION
1844
+ PPC_CP_ABORT
1845
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
19691846
19701847 /*
19711848 * POWER7/POWER8 guest -> host partition switch code.
....@@ -2023,24 +1900,6 @@
20231900 mtspr SPRN_DPDES, r8
20241901 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
20251902
2026
- /* If HMI, call kvmppc_realmode_hmi_handler() */
2027
- lwz r12, STACK_SLOT_TRAP(r1)
2028
- cmpwi r12, BOOK3S_INTERRUPT_HMI
2029
- bne 27f
2030
- bl kvmppc_realmode_hmi_handler
2031
- nop
2032
- cmpdi r3, 0
2033
- /*
2034
- * At this point kvmppc_realmode_hmi_handler may have resync-ed
2035
- * the TB, and if it has, we must not subtract the guest timebase
2036
- * offset from the timebase. So, skip it.
2037
- *
2038
- * Also, do not call kvmppc_subcore_exit_guest() because it has
2039
- * been invoked as part of kvmppc_realmode_hmi_handler().
2040
- */
2041
- beq 30f
2042
-
2043
-27:
20441903 /* Subtract timebase offset from timebase */
20451904 ld r8, VCORE_TB_OFFSET_APPL(r5)
20461905 cmpdi r8,0
....@@ -2058,19 +1917,29 @@
20581917 addis r8,r8,0x100 /* if so, increment upper 40 bits */
20591918 mtspr SPRN_TBU40,r8
20601919
2061
-17: bl kvmppc_subcore_exit_guest
1920
+17:
1921
+ /*
1922
+ * If this is an HMI, we called kvmppc_realmode_hmi_handler
1923
+ * above, which may or may not have already called
1924
+ * kvmppc_subcore_exit_guest. Fortunately, all that
1925
+ * kvmppc_subcore_exit_guest does is clear a flag, so calling
1926
+ * it again here is benign even if kvmppc_realmode_hmi_handler
1927
+ * has already called it.
1928
+ */
1929
+ bl kvmppc_subcore_exit_guest
20621930 nop
20631931 30: ld r5,HSTATE_KVM_VCORE(r13)
20641932 ld r4,VCORE_KVM(r5) /* pointer to struct kvm */
20651933
20661934 /* Reset PCR */
20671935 ld r0, VCORE_PCR(r5)
2068
- cmpdi r0, 0
1936
+ LOAD_REG_IMMEDIATE(r6, PCR_MASK)
1937
+ cmpld r0, r6
20691938 beq 18f
2070
- li r0, 0
2071
- mtspr SPRN_PCR, r0
1939
+ mtspr SPRN_PCR, r6
20721940 18:
20731941 /* Signal secondary CPUs to continue */
1942
+ li r0, 0
20741943 stb r0,VCORE_IN_GUEST(r5)
20751944 19: lis r8,0x7fff /* MAX_INT@h */
20761945 mtspr SPRN_HDEC,r8
....@@ -2134,6 +2003,66 @@
21342003 mtlr r0
21352004 blr
21362005
2006
+kvmppc_guest_external:
2007
+ /* External interrupt, first check for host_ipi. If this is
2008
+ * set, we know the host wants us out so let's do it now
2009
+ */
2010
+ bl kvmppc_read_intr
2011
+
2012
+ /*
2013
+ * Restore the active volatile registers after returning from
2014
+ * a C function.
2015
+ */
2016
+ ld r9, HSTATE_KVM_VCPU(r13)
2017
+ li r12, BOOK3S_INTERRUPT_EXTERNAL
2018
+
2019
+ /*
2020
+ * kvmppc_read_intr return codes:
2021
+ *
2022
+ * Exit to host (r3 > 0)
2023
+ * 1 An interrupt is pending that needs to be handled by the host
2024
+ * Exit guest and return to host by branching to guest_exit_cont
2025
+ *
2026
+ * 2 Passthrough that needs completion in the host
2027
+ * Exit guest and return to host by branching to guest_exit_cont
2028
+ * However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
2029
+ * to indicate to the host to complete handling the interrupt
2030
+ *
2031
+ * Before returning to guest, we check if any CPU is heading out
2032
+ * to the host and if so, we head out also. If no CPUs are heading
2033
+ * check return values <= 0.
2034
+ *
2035
+ * Return to guest (r3 <= 0)
2036
+ * 0 No external interrupt is pending
2037
+ * -1 A guest wakeup IPI (which has now been cleared)
2038
+ * In either case, we return to guest to deliver any pending
2039
+ * guest interrupts.
2040
+ *
2041
+ * -2 A PCI passthrough external interrupt was handled
2042
+ * (interrupt was delivered directly to guest)
2043
+ * Return to guest to deliver any pending guest interrupts.
2044
+ */
2045
+
2046
+ cmpdi r3, 1
2047
+ ble 1f
2048
+
2049
+ /* Return code = 2 */
2050
+ li r12, BOOK3S_INTERRUPT_HV_RM_HARD
2051
+ stw r12, VCPU_TRAP(r9)
2052
+ b guest_exit_cont
2053
+
2054
+1: /* Return code <= 1 */
2055
+ cmpdi r3, 0
2056
+ bgt guest_exit_cont
2057
+
2058
+ /* Return code <= 0 */
2059
+maybe_reenter_guest:
2060
+ ld r5, HSTATE_KVM_VCORE(r13)
2061
+ lwz r0, VCORE_ENTRY_EXIT(r5)
2062
+ cmpwi r0, 0x100
2063
+ mr r4, r9
2064
+ blt deliver_guest_interrupt
2065
+ b guest_exit_cont
21372066
21382067 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
21392068 /*
....@@ -2338,6 +2267,10 @@
23382267 andi. r0,r11,MSR_PR
23392268 /* sc 1 from userspace - reflect to guest syscall */
23402269 bne sc_1_fast_return
2270
+ /* sc 1 from nested guest - give it to L1 to handle */
2271
+ ld r0, VCPU_NESTED(r9)
2272
+ cmpdi r0, 0
2273
+ bne guest_exit_cont
23412274 clrrdi r3,r3,2
23422275 cmpldi r3,hcall_real_table_end - hcall_real_table
23432276 bge guest_exit_cont
....@@ -2395,11 +2328,16 @@
23952328 .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table
23962329 .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table
23972330 .long DOTSYM(kvmppc_h_protect) - hcall_real_table
2331
+#ifdef CONFIG_SPAPR_TCE_IOMMU
23982332 .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table
23992333 .long DOTSYM(kvmppc_rm_h_put_tce) - hcall_real_table
2334
+#else
2335
+ .long 0 /* 0x1c */
2336
+ .long 0 /* 0x20 */
2337
+#endif
24002338 .long 0 /* 0x24 - H_SET_SPRG0 */
24012339 .long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table
2402
- .long 0 /* 0x2c */
2340
+ .long DOTSYM(kvmppc_rm_h_page_init) - hcall_real_table
24032341 .long 0 /* 0x30 */
24042342 .long 0 /* 0x34 */
24052343 .long 0 /* 0x38 */
....@@ -2474,8 +2412,13 @@
24742412 .long 0 /* 0x12c */
24752413 .long 0 /* 0x130 */
24762414 .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
2415
+#ifdef CONFIG_SPAPR_TCE_IOMMU
24772416 .long DOTSYM(kvmppc_rm_h_stuff_tce) - hcall_real_table
24782417 .long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table
2418
+#else
2419
+ .long 0 /* 0x138 */
2420
+ .long 0 /* 0x13c */
2421
+#endif
24792422 .long 0 /* 0x140 */
24802423 .long 0 /* 0x144 */
24812424 .long 0 /* 0x148 */
....@@ -2596,7 +2539,8 @@
25962539 .globl hcall_real_table_end
25972540 hcall_real_table_end:
25982541
2599
-_GLOBAL(kvmppc_h_set_xdabr)
2542
+_GLOBAL_TOC(kvmppc_h_set_xdabr)
2543
+EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr)
26002544 andi. r0, r5, DABRX_USER | DABRX_KERNEL
26012545 beq 6f
26022546 li r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI
....@@ -2605,7 +2549,8 @@
26052549 6: li r3, H_PARAMETER
26062550 blr
26072551
2608
-_GLOBAL(kvmppc_h_set_dabr)
2552
+_GLOBAL_TOC(kvmppc_h_set_dabr)
2553
+EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr)
26092554 li r5, DABRX_USER | DABRX_KERNEL
26102555 3:
26112556 BEGIN_FTR_SECTION
....@@ -2624,20 +2569,31 @@
26242569 blr
26252570
26262571 2:
2627
-BEGIN_FTR_SECTION
2628
- /* POWER9 with disabled DAWR */
2572
+ LOAD_REG_ADDR(r11, dawr_force_enable)
2573
+ lbz r11, 0(r11)
2574
+ cmpdi r11, 0
2575
+ bne 3f
26292576 li r3, H_HARDWARE
26302577 blr
2631
-END_FTR_SECTION_IFCLR(CPU_FTR_DAWR)
2578
+3:
26322579 /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
26332580 rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
26342581 rlwimi r5, r4, 2, DAWRX_WT
26352582 clrrdi r4, r4, 3
26362583 std r4, VCPU_DAWR(r3)
26372584 std r5, VCPU_DAWRX(r3)
2638
- mtspr SPRN_DAWR, r4
2639
- mtspr SPRN_DAWRX, r5
2640
- li r3, 0
2585
+ /*
2586
+ * If came in through the real mode hcall handler then it is necessary
2587
+ * to write the registers since the return path won't. Otherwise it is
2588
+ * sufficient to store then in the vcpu struct as they will be loaded
2589
+ * next time the vcpu is run.
2590
+ */
2591
+ mfmsr r6
2592
+ andi. r6, r6, MSR_DR /* in real mode? */
2593
+ bne 4f
2594
+ mtspr SPRN_DAWR0, r4
2595
+ mtspr SPRN_DAWRX0, r5
2596
+4: li r3, 0
26412597 blr
26422598
26432599 _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */
....@@ -2718,11 +2674,13 @@
27182674 b 91f
27192675 END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
27202676 /*
2721
- * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
2677
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
27222678 */
27232679 ld r3, HSTATE_KVM_VCPU(r13)
27242680 ld r4, VCPU_MSR(r3)
2681
+ li r5, 0 /* don't preserve non-vol regs */
27252682 bl kvmppc_save_tm_hv
2683
+ nop
27262684 91:
27272685 #endif
27282686
....@@ -2763,6 +2721,9 @@
27632721
27642722 lis r3, LPCR_PECEDP@h /* Do wake on privileged doorbell */
27652723
2724
+ /* Go back to host stack */
2725
+ ld r1, HSTATE_HOST_R1(r13)
2726
+
27662727 /*
27672728 * Take a nap until a decrementer or external or doobell interrupt
27682729 * occurs, with PECE1 and PECE0 set in LPCR.
....@@ -2791,26 +2752,42 @@
27912752 * requested level = 0 (just stop dispatching)
27922753 */
27932754 lis r3, (PSSCR_EC | PSSCR_ESL)@h
2794
- mtspr SPRN_PSSCR, r3
27952755 /* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
27962756 li r4, LPCR_PECE_HVEE@higher
27972757 sldi r4, r4, 32
27982758 or r5, r5, r4
2799
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2759
+FTR_SECTION_ELSE
2760
+ li r3, PNV_THREAD_NAP
2761
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
28002762 mtspr SPRN_LPCR,r5
28012763 isync
2802
- li r0, 0
2803
- std r0, HSTATE_SCRATCH0(r13)
2804
- ptesync
2805
- ld r0, HSTATE_SCRATCH0(r13)
2806
-1: cmpd r0, r0
2807
- bne 1b
2764
+
28082765 BEGIN_FTR_SECTION
2809
- nap
2766
+ bl isa300_idle_stop_mayloss
28102767 FTR_SECTION_ELSE
2811
- PPC_STOP
2812
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
2813
- b .
2768
+ bl isa206_idle_insn_mayloss
2769
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
2770
+
2771
+ mfspr r0, SPRN_CTRLF
2772
+ ori r0, r0, 1
2773
+ mtspr SPRN_CTRLT, r0
2774
+
2775
+ mtspr SPRN_SRR1, r3
2776
+
2777
+ li r0, 0
2778
+ stb r0, PACA_FTRACE_ENABLED(r13)
2779
+
2780
+ li r0, KVM_HWTHREAD_IN_KVM
2781
+ stb r0, HSTATE_HWTHREAD_STATE(r13)
2782
+
2783
+ lbz r0, HSTATE_NAPPING(r13)
2784
+ cmpwi r0, NAPPING_CEDE
2785
+ beq kvm_end_cede
2786
+ cmpwi r0, NAPPING_NOVCPU
2787
+ beq kvm_novcpu_wakeup
2788
+ cmpwi r0, NAPPING_UNSPLIT
2789
+ beq kvm_unsplit_wakeup
2790
+ twi 31,0,0 /* Nap state must not be zero */
28142791
28152792 33: mr r4, r3
28162793 li r3, 0
....@@ -2818,11 +2795,10 @@
28182795 b 34f
28192796
28202797 kvm_end_cede:
2798
+ /* Woken by external or decrementer interrupt */
2799
+
28212800 /* get vcpu pointer */
28222801 ld r4, HSTATE_KVM_VCPU(r13)
2823
-
2824
- /* Woken by external or decrementer interrupt */
2825
- ld r1, HSTATE_HOST_R1(r13)
28262802
28272803 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
28282804 addi r3, r4, VCPU_TB_RMINTR
....@@ -2838,11 +2814,13 @@
28382814 b 91f
28392815 END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
28402816 /*
2841
- * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
2817
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
28422818 */
28432819 mr r3, r4
28442820 ld r4, VCPU_MSR(r3)
2821
+ li r5, 0 /* don't preserve non-vol regs */
28452822 bl kvmppc_restore_tm_hv
2823
+ nop
28462824 ld r4, HSTATE_KVM_VCPU(r13)
28472825 91:
28482826 #endif
....@@ -2910,13 +2888,7 @@
29102888 mr r9, r4
29112889 cmpdi r3, 0
29122890 bgt guest_exit_cont
2913
-
2914
- /* see if any other thread is already exiting */
2915
- lwz r0,VCORE_ENTRY_EXIT(r5)
2916
- cmpwi r0,0x100
2917
- bge guest_exit_cont
2918
-
2919
- b kvmppc_cede_reentry /* if not go back to guest */
2891
+ b maybe_reenter_guest
29202892
29212893 /* cede when already previously prodded case */
29222894 kvm_cede_prodded:
....@@ -2949,6 +2921,11 @@
29492921 beq 4f
29502922 li r0, 0
29512923 stb r0, VCPU_CEDED(r9)
2924
+ /*
2925
+ * The escalation interrupts are special as we don't EOI them.
2926
+ * There is no need to use the load-after-store ordering offset
2927
+ * to set PQ to 10 as we won't use StoreEOI.
2928
+ */
29522929 li r6, XIVE_ESB_SET_PQ_10
29532930 b 5f
29542931 4: li r0, 1
....@@ -2967,49 +2944,30 @@
29672944 #endif /* CONFIG_KVM_XICS */
29682945 3: b guest_exit_cont
29692946
2970
- /* Try to handle a machine check in real mode */
2947
+ /* Try to do machine check recovery in real mode */
29712948 machine_check_realmode:
29722949 mr r3, r9 /* get vcpu pointer */
29732950 bl kvmppc_realmode_machine_check
29742951 nop
2952
+ /* all machine checks go to virtual mode for further handling */
29752953 ld r9, HSTATE_KVM_VCPU(r13)
29762954 li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
2977
- /*
2978
- * For the guest that is FWNMI capable, deliver all the MCE errors
2979
- * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
2980
- * reason. This new approach injects machine check errors in guest
2981
- * address space to guest with additional information in the form
2982
- * of RTAS event, thus enabling guest kernel to suitably handle
2983
- * such errors.
2984
- *
2985
- * For the guest that is not FWNMI capable (old QEMU) fallback
2986
- * to old behaviour for backward compatibility:
2987
- * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
2988
- * through machine check interrupt (set HSRR0 to 0x200).
2989
- * For handled errors (no-fatal), just go back to guest execution
2990
- * with current HSRR0.
2991
- * if we receive machine check with MSR(RI=0) then deliver it to
2992
- * guest as machine check causing guest to crash.
2993
- */
2994
- ld r11, VCPU_MSR(r9)
2995
- rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
2996
- bne mc_cont /* if so, exit to host */
2997
- /* Check if guest is capable of handling NMI exit */
2998
- ld r10, VCPU_KVM(r9)
2999
- lbz r10, KVM_FWNMI(r10)
3000
- cmpdi r10, 1 /* FWNMI capable? */
3001
- beq mc_cont /* if so, exit with KVM_EXIT_NMI. */
2955
+ b guest_exit_cont
30022956
3003
- /* if not, fall through for backward compatibility. */
3004
- andi. r10, r11, MSR_RI /* check for unrecoverable exception */
3005
- beq 1f /* Deliver a machine check to guest */
3006
- ld r10, VCPU_PC(r9)
3007
- cmpdi r3, 0 /* Did we handle MCE ? */
3008
- bne 2f /* Continue guest execution. */
3009
- /* If not, deliver a machine check. SRR0/1 are already set */
3010
-1: li r10, BOOK3S_INTERRUPT_MACHINE_CHECK
3011
- bl kvmppc_msr_interrupt
3012
-2: b fast_interrupt_c_return
2957
+/*
2958
+ * Call C code to handle a HMI in real mode.
2959
+ * Only the primary thread does the call, secondary threads are handled
2960
+ * by calling hmi_exception_realmode() after kvmppc_hv_entry returns.
2961
+ * r9 points to the vcpu on entry
2962
+ */
2963
+hmi_realmode:
2964
+ lbz r0, HSTATE_PTID(r13)
2965
+ cmpwi r0, 0
2966
+ bne guest_exit_cont
2967
+ bl kvmppc_realmode_hmi_handler
2968
+ ld r9, HSTATE_KVM_VCPU(r13)
2969
+ li r12, BOOK3S_INTERRUPT_HMI
2970
+ b guest_exit_cont
30132971
30142972 /*
30152973 * Check the reason we woke from nap, and take appropriate action.
....@@ -3176,10 +3134,12 @@
31763134 * Save transactional state and TM-related registers.
31773135 * Called with r3 pointing to the vcpu struct and r4 containing
31783136 * the guest MSR value.
3179
- * This can modify all checkpointed registers, but
3137
+ * r5 is non-zero iff non-volatile register state needs to be maintained.
3138
+ * If r5 == 0, this can modify all checkpointed registers, but
31803139 * restores r1 and r2 before exit.
31813140 */
3182
-kvmppc_save_tm_hv:
3141
+_GLOBAL_TOC(kvmppc_save_tm_hv)
3142
+EXPORT_SYMBOL_GPL(kvmppc_save_tm_hv)
31833143 /* See if we need to handle fake suspend mode */
31843144 BEGIN_FTR_SECTION
31853145 b __kvmppc_save_tm
....@@ -3192,7 +3152,7 @@
31923152 /* The following code handles the fake_suspend = 1 case */
31933153 mflr r0
31943154 std r0, PPC_LR_STKOFF(r1)
3195
- stdu r1, -PPC_MIN_STKFRM(r1)
3155
+ stdu r1, -TM_FRAME_SIZE(r1)
31963156
31973157 /* Turn on TM. */
31983158 mfmsr r8
....@@ -3207,36 +3167,53 @@
32073167 END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
32083168 nop
32093169
3170
+ /*
3171
+ * It's possible that treclaim. may modify registers, if we have lost
3172
+ * track of fake-suspend state in the guest due to it using rfscv.
3173
+ * Save and restore registers in case this occurs.
3174
+ */
3175
+ mfspr r3, SPRN_DSCR
3176
+ mfspr r4, SPRN_XER
3177
+ mfspr r5, SPRN_AMR
3178
+ /* SPRN_TAR would need to be saved here if the kernel ever used it */
3179
+ mfcr r12
3180
+ SAVE_NVGPRS(r1)
3181
+ SAVE_GPR(2, r1)
3182
+ SAVE_GPR(3, r1)
3183
+ SAVE_GPR(4, r1)
3184
+ SAVE_GPR(5, r1)
3185
+ stw r12, 8(r1)
32103186 std r1, HSTATE_HOST_R1(r13)
3211
-
3212
- /* Clear the MSR RI since r1, r13 may be foobar. */
3213
- li r5, 0
3214
- mtmsrd r5, 1
32153187
32163188 /* We have to treclaim here because that's the only way to do S->N */
32173189 li r3, TM_CAUSE_KVM_RESCHED
32183190 TRECLAIM(R3)
32193191
3192
+ GET_PACA(r13)
3193
+ ld r1, HSTATE_HOST_R1(r13)
3194
+ REST_GPR(2, r1)
3195
+ REST_GPR(3, r1)
3196
+ REST_GPR(4, r1)
3197
+ REST_GPR(5, r1)
3198
+ lwz r12, 8(r1)
3199
+ REST_NVGPRS(r1)
3200
+ mtspr SPRN_DSCR, r3
3201
+ mtspr SPRN_XER, r4
3202
+ mtspr SPRN_AMR, r5
3203
+ mtcr r12
3204
+ HMT_MEDIUM
3205
+
32203206 /*
32213207 * We were in fake suspend, so we are not going to save the
32223208 * register state as the guest checkpointed state (since
32233209 * we already have it), therefore we can now use any volatile GPR.
3210
+ * In fact treclaim in fake suspend state doesn't modify
3211
+ * any registers.
32243212 */
3225
- /* Reload PACA pointer, stack pointer and TOC. */
3226
- GET_PACA(r13)
3227
- ld r1, HSTATE_HOST_R1(r13)
3228
- ld r2, PACATOC(r13)
32293213
3230
- /* Set MSR RI now we have r1 and r13 back. */
3231
- li r5, MSR_RI
3232
- mtmsrd r5, 1
3233
-
3234
- HMT_MEDIUM
3235
- ld r6, HSTATE_DSCR(r13)
3236
- mtspr SPRN_DSCR, r6
3237
-BEGIN_FTR_SECTION_NESTED(96)
3214
+BEGIN_FTR_SECTION
32383215 bl pnv_power9_force_smt4_release
3239
-END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
3216
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
32403217 nop
32413218
32423219 4:
....@@ -3253,7 +3230,7 @@
32533230 std r5, VCPU_TFHAR(r9)
32543231 std r6, VCPU_TFIAR(r9)
32553232
3256
- addi r1, r1, PPC_MIN_STKFRM
3233
+ addi r1, r1, TM_FRAME_SIZE
32573234 ld r0, PPC_LR_STKOFF(r1)
32583235 mtlr r0
32593236 blr
....@@ -3262,10 +3239,12 @@
32623239 * Restore transactional state and TM-related registers.
32633240 * Called with r3 pointing to the vcpu struct
32643241 * and r4 containing the guest MSR value.
3242
+ * r5 is non-zero iff non-volatile register state needs to be maintained.
32653243 * This potentially modifies all checkpointed registers.
32663244 * It restores r1 and r2 from the PACA.
32673245 */
3268
-kvmppc_restore_tm_hv:
3246
+_GLOBAL_TOC(kvmppc_restore_tm_hv)
3247
+EXPORT_SYMBOL_GPL(kvmppc_restore_tm_hv)
32693248 /*
32703249 * If we are doing TM emulation for the guest on a POWER9 DD2,
32713250 * then we don't actually do a trechkpt -- we either set up
....@@ -3324,7 +3303,6 @@
33243303 * r12 is (CR << 32) | vector
33253304 * r13 points to our PACA
33263305 * r12 is saved in HSTATE_SCRATCH0(r13)
3327
- * ctr is saved in HSTATE_SCRATCH1(r13) if RELOCATABLE
33283306 * r9 is saved in HSTATE_SCRATCH2(r13)
33293307 * r13 is saved in HSPRG1
33303308 * cfar is saved in HSTATE_CFAR(r13)
....@@ -3373,11 +3351,7 @@
33733351 ld r5, HSTATE_CFAR(r13)
33743352 std r5, ORIG_GPR3(r1)
33753353 mflr r3
3376
-#ifdef CONFIG_RELOCATABLE
3377
- ld r4, HSTATE_SCRATCH1(r13)
3378
-#else
33793354 mfctr r4
3380
-#endif
33813355 mfxer r5
33823356 lbz r6, PACAIRQSOFTMASK(r13)
33833357 std r3, _LINK(r1)
....@@ -3406,7 +3380,7 @@
34063380 mtspr SPRN_AMR, r0
34073381 mtspr SPRN_IAMR, r0
34083382 mtspr SPRN_CIABR, r0
3409
- mtspr SPRN_DAWRX, r0
3383
+ mtspr SPRN_DAWRX0, r0
34103384
34113385 BEGIN_MMU_FTR_SECTION
34123386 b 4f
....@@ -3470,6 +3444,218 @@
34703444 blr
34713445
34723446 /*
3447
+ * Load up guest PMU state. R3 points to the vcpu struct.
3448
+ */
3449
+_GLOBAL(kvmhv_load_guest_pmu)
3450
+EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu)
3451
+ mr r4, r3
3452
+ mflr r0
3453
+ li r3, 1
3454
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
3455
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
3456
+ isync
3457
+BEGIN_FTR_SECTION
3458
+ ld r3, VCPU_MMCR(r4)
3459
+ andi. r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
3460
+ cmpwi r5, MMCR0_PMAO
3461
+ beql kvmppc_fix_pmao
3462
+END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
3463
+ lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */
3464
+ lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */
3465
+ lwz r6, VCPU_PMC + 8(r4)
3466
+ lwz r7, VCPU_PMC + 12(r4)
3467
+ lwz r8, VCPU_PMC + 16(r4)
3468
+ lwz r9, VCPU_PMC + 20(r4)
3469
+ mtspr SPRN_PMC1, r3
3470
+ mtspr SPRN_PMC2, r5
3471
+ mtspr SPRN_PMC3, r6
3472
+ mtspr SPRN_PMC4, r7
3473
+ mtspr SPRN_PMC5, r8
3474
+ mtspr SPRN_PMC6, r9
3475
+ ld r3, VCPU_MMCR(r4)
3476
+ ld r5, VCPU_MMCR + 8(r4)
3477
+ ld r6, VCPU_MMCRA(r4)
3478
+ ld r7, VCPU_SIAR(r4)
3479
+ ld r8, VCPU_SDAR(r4)
3480
+ mtspr SPRN_MMCR1, r5
3481
+ mtspr SPRN_MMCRA, r6
3482
+ mtspr SPRN_SIAR, r7
3483
+ mtspr SPRN_SDAR, r8
3484
+BEGIN_FTR_SECTION
3485
+ ld r5, VCPU_MMCR + 24(r4)
3486
+ ld r6, VCPU_SIER + 8(r4)
3487
+ ld r7, VCPU_SIER + 16(r4)
3488
+ mtspr SPRN_MMCR3, r5
3489
+ mtspr SPRN_SIER2, r6
3490
+ mtspr SPRN_SIER3, r7
3491
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
3492
+BEGIN_FTR_SECTION
3493
+ ld r5, VCPU_MMCR + 16(r4)
3494
+ ld r6, VCPU_SIER(r4)
3495
+ mtspr SPRN_MMCR2, r5
3496
+ mtspr SPRN_SIER, r6
3497
+BEGIN_FTR_SECTION_NESTED(96)
3498
+ lwz r7, VCPU_PMC + 24(r4)
3499
+ lwz r8, VCPU_PMC + 28(r4)
3500
+ ld r9, VCPU_MMCRS(r4)
3501
+ mtspr SPRN_SPMC1, r7
3502
+ mtspr SPRN_SPMC2, r8
3503
+ mtspr SPRN_MMCRS, r9
3504
+END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
3505
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3506
+ mtspr SPRN_MMCR0, r3
3507
+ isync
3508
+ mtlr r0
3509
+ blr
3510
+
3511
+/*
3512
+ * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu.
3513
+ */
3514
+_GLOBAL(kvmhv_load_host_pmu)
3515
+EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu)
3516
+ mflr r0
3517
+ lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
3518
+ cmpwi r4, 0
3519
+ beq 23f /* skip if not */
3520
+BEGIN_FTR_SECTION
3521
+ ld r3, HSTATE_MMCR0(r13)
3522
+ andi. r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
3523
+ cmpwi r4, MMCR0_PMAO
3524
+ beql kvmppc_fix_pmao
3525
+END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
3526
+ lwz r3, HSTATE_PMC1(r13)
3527
+ lwz r4, HSTATE_PMC2(r13)
3528
+ lwz r5, HSTATE_PMC3(r13)
3529
+ lwz r6, HSTATE_PMC4(r13)
3530
+ lwz r8, HSTATE_PMC5(r13)
3531
+ lwz r9, HSTATE_PMC6(r13)
3532
+ mtspr SPRN_PMC1, r3
3533
+ mtspr SPRN_PMC2, r4
3534
+ mtspr SPRN_PMC3, r5
3535
+ mtspr SPRN_PMC4, r6
3536
+ mtspr SPRN_PMC5, r8
3537
+ mtspr SPRN_PMC6, r9
3538
+ ld r3, HSTATE_MMCR0(r13)
3539
+ ld r4, HSTATE_MMCR1(r13)
3540
+ ld r5, HSTATE_MMCRA(r13)
3541
+ ld r6, HSTATE_SIAR(r13)
3542
+ ld r7, HSTATE_SDAR(r13)
3543
+ mtspr SPRN_MMCR1, r4
3544
+ mtspr SPRN_MMCRA, r5
3545
+ mtspr SPRN_SIAR, r6
3546
+ mtspr SPRN_SDAR, r7
3547
+BEGIN_FTR_SECTION
3548
+ ld r8, HSTATE_MMCR2(r13)
3549
+ ld r9, HSTATE_SIER(r13)
3550
+ mtspr SPRN_MMCR2, r8
3551
+ mtspr SPRN_SIER, r9
3552
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3553
+BEGIN_FTR_SECTION
3554
+ ld r5, HSTATE_MMCR3(r13)
3555
+ ld r6, HSTATE_SIER2(r13)
3556
+ ld r7, HSTATE_SIER3(r13)
3557
+ mtspr SPRN_MMCR3, r5
3558
+ mtspr SPRN_SIER2, r6
3559
+ mtspr SPRN_SIER3, r7
3560
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
3561
+ mtspr SPRN_MMCR0, r3
3562
+ isync
3563
+ mtlr r0
3564
+23: blr
3565
+
3566
+/*
3567
+ * Save guest PMU state into the vcpu struct.
3568
+ * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA)
3569
+ */
3570
+_GLOBAL(kvmhv_save_guest_pmu)
3571
+EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu)
3572
+ mr r9, r3
3573
+ mr r8, r4
3574
+BEGIN_FTR_SECTION
3575
+ /*
3576
+ * POWER8 seems to have a hardware bug where setting
3577
+ * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
3578
+ * when some counters are already negative doesn't seem
3579
+ * to cause a performance monitor alert (and hence interrupt).
3580
+ * The effect of this is that when saving the PMU state,
3581
+ * if there is no PMU alert pending when we read MMCR0
3582
+ * before freezing the counters, but one becomes pending
3583
+ * before we read the counters, we lose it.
3584
+ * To work around this, we need a way to freeze the counters
3585
+ * before reading MMCR0. Normally, freezing the counters
3586
+ * is done by writing MMCR0 (to set MMCR0[FC]) which
3587
+ * unavoidably writes MMCR0[PMA0] as well. On POWER8,
3588
+ * we can also freeze the counters using MMCR2, by writing
3589
+ * 1s to all the counter freeze condition bits (there are
3590
+ * 9 bits each for 6 counters).
3591
+ */
3592
+ li r3, -1 /* set all freeze bits */
3593
+ clrrdi r3, r3, 10
3594
+ mfspr r10, SPRN_MMCR2
3595
+ mtspr SPRN_MMCR2, r3
3596
+ isync
3597
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3598
+ li r3, 1
3599
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
3600
+ mfspr r4, SPRN_MMCR0 /* save MMCR0 */
3601
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
3602
+ mfspr r6, SPRN_MMCRA
3603
+ /* Clear MMCRA in order to disable SDAR updates */
3604
+ li r7, 0
3605
+ mtspr SPRN_MMCRA, r7
3606
+ isync
3607
+ cmpwi r8, 0 /* did they ask for PMU stuff to be saved? */
3608
+ bne 21f
3609
+ std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
3610
+ b 22f
3611
+21: mfspr r5, SPRN_MMCR1
3612
+ mfspr r7, SPRN_SIAR
3613
+ mfspr r8, SPRN_SDAR
3614
+ std r4, VCPU_MMCR(r9)
3615
+ std r5, VCPU_MMCR + 8(r9)
3616
+ std r6, VCPU_MMCRA(r9)
3617
+BEGIN_FTR_SECTION
3618
+ std r10, VCPU_MMCR + 16(r9)
3619
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3620
+BEGIN_FTR_SECTION
3621
+ mfspr r5, SPRN_MMCR3
3622
+ mfspr r6, SPRN_SIER2
3623
+ mfspr r7, SPRN_SIER3
3624
+ std r5, VCPU_MMCR + 24(r9)
3625
+ std r6, VCPU_SIER + 8(r9)
3626
+ std r7, VCPU_SIER + 16(r9)
3627
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
3628
+ std r7, VCPU_SIAR(r9)
3629
+ std r8, VCPU_SDAR(r9)
3630
+ mfspr r3, SPRN_PMC1
3631
+ mfspr r4, SPRN_PMC2
3632
+ mfspr r5, SPRN_PMC3
3633
+ mfspr r6, SPRN_PMC4
3634
+ mfspr r7, SPRN_PMC5
3635
+ mfspr r8, SPRN_PMC6
3636
+ stw r3, VCPU_PMC(r9)
3637
+ stw r4, VCPU_PMC + 4(r9)
3638
+ stw r5, VCPU_PMC + 8(r9)
3639
+ stw r6, VCPU_PMC + 12(r9)
3640
+ stw r7, VCPU_PMC + 16(r9)
3641
+ stw r8, VCPU_PMC + 20(r9)
3642
+BEGIN_FTR_SECTION
3643
+ mfspr r5, SPRN_SIER
3644
+ std r5, VCPU_SIER(r9)
3645
+BEGIN_FTR_SECTION_NESTED(96)
3646
+ mfspr r6, SPRN_SPMC1
3647
+ mfspr r7, SPRN_SPMC2
3648
+ mfspr r8, SPRN_MMCRS
3649
+ stw r6, VCPU_PMC + 24(r9)
3650
+ stw r7, VCPU_PMC + 28(r9)
3651
+ std r8, VCPU_MMCRS(r9)
3652
+ lis r4, 0x8000
3653
+ mtspr SPRN_MMCRS, r4
3654
+END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
3655
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3656
+22: blr
3657
+
3658
+/*
34733659 * This works around a hardware bug on POWER8E processors, where
34743660 * writing a 1 to the MMCR0[PMAO] bit doesn't generate a
34753661 * performance monitor interrupt. Instead, when we need to have