forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-11 04dd17822334871b23ea2862f7798fb0e0007777
kernel/arch/powerpc/kvm/book3s_hv_builtin.c
....@@ -1,9 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
3
- *
4
- * This program is free software; you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License, version 2, as
6
- * published by the Free Software Foundation.
74 */
85
96 #include <linux/cpu.h>
....@@ -98,25 +95,17 @@
9895 void __init kvm_cma_reserve(void)
9996 {
10097 unsigned long align_size;
101
- struct memblock_region *reg;
102
- phys_addr_t selected_size = 0;
98
+ phys_addr_t selected_size;
10399
104100 /*
105101 * We need CMA reservation only when we are in HV mode
106102 */
107103 if (!cpu_has_feature(CPU_FTR_HVMODE))
108104 return;
109
- /*
110
- * We cannot use memblock_phys_mem_size() here, because
111
- * memblock_analyze() has not been called yet.
112
- */
113
- for_each_memblock(memory, reg)
114
- selected_size += memblock_region_memory_end_pfn(reg) -
115
- memblock_region_memory_base_pfn(reg);
116105
117
- selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT;
106
+ selected_size = PAGE_ALIGN(memblock_phys_mem_size() * kvm_cma_resv_ratio / 100);
118107 if (selected_size) {
119
- pr_debug("%s: reserving %ld MiB for global area\n", __func__,
108
+ pr_info("%s: reserving %ld MiB for global area\n", __func__,
120109 (unsigned long)selected_size / SZ_1M);
121110 align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
122111 cma_declare_contiguous(0, selected_size, 0, align_size,
....@@ -231,6 +220,15 @@
231220 void __iomem *xics_phys;
232221 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
233222
223
+ /* For a nested hypervisor, use the XICS via hcall */
224
+ if (kvmhv_on_pseries()) {
225
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
226
+
227
+ plpar_hcall_raw(H_IPI, retbuf, get_hard_smp_processor_id(cpu),
228
+ IPI_PRIORITY);
229
+ return;
230
+ }
231
+
234232 /* On POWER9 we can use msgsnd for any destination cpu. */
235233 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
236234 msg |= get_hard_smp_processor_id(cpu);
....@@ -248,7 +246,7 @@
248246 }
249247
250248 /* We should never reach this */
251
- if (WARN_ON_ONCE(xive_enabled()))
249
+ if (WARN_ON_ONCE(xics_on_xive()))
252250 return;
253251
254252 /* Else poke the target with an IPI */
....@@ -460,12 +458,19 @@
460458 return 1;
461459
462460 /* Now read the interrupt from the ICP */
463
- xics_phys = local_paca->kvm_hstate.xics_phys;
464
- rc = 0;
465
- if (!xics_phys)
466
- rc = opal_int_get_xirr(&xirr, false);
467
- else
468
- xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
461
+ if (kvmhv_on_pseries()) {
462
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
463
+
464
+ rc = plpar_hcall_raw(H_XIRR, retbuf, 0xFF);
465
+ xirr = cpu_to_be32(retbuf[0]);
466
+ } else {
467
+ xics_phys = local_paca->kvm_hstate.xics_phys;
468
+ rc = 0;
469
+ if (!xics_phys)
470
+ rc = opal_int_get_xirr(&xirr, false);
471
+ else
472
+ xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
473
+ }
469474 if (rc < 0)
470475 return 1;
471476
....@@ -494,7 +499,13 @@
494499 */
495500 if (xisr == XICS_IPI) {
496501 rc = 0;
497
- if (xics_phys) {
502
+ if (kvmhv_on_pseries()) {
503
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
504
+
505
+ plpar_hcall_raw(H_IPI, retbuf,
506
+ hard_smp_processor_id(), 0xff);
507
+ plpar_hcall_raw(H_EOI, retbuf, h_xirr);
508
+ } else if (xics_phys) {
498509 __raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
499510 __raw_rm_writel(xirr, xics_phys + XICS_XIRR);
500511 } else {
....@@ -520,7 +531,13 @@
520531 /* We raced with the host,
521532 * we need to resend that IPI, bummer
522533 */
523
- if (xics_phys)
534
+ if (kvmhv_on_pseries()) {
535
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
536
+
537
+ plpar_hcall_raw(H_IPI, retbuf,
538
+ hard_smp_processor_id(),
539
+ IPI_PRIORITY);
540
+ } else if (xics_phys)
524541 __raw_rm_writeb(IPI_PRIORITY,
525542 xics_phys + XICS_MFRR);
526543 else
....@@ -549,7 +566,7 @@
549566 {
550567 if (!kvmppc_xics_enabled(vcpu))
551568 return H_TOO_HARD;
552
- if (xive_enabled()) {
569
+ if (xics_on_xive()) {
553570 if (is_rm())
554571 return xive_rm_h_xirr(vcpu);
555572 if (unlikely(!__xive_vm_h_xirr))
....@@ -564,7 +581,7 @@
564581 if (!kvmppc_xics_enabled(vcpu))
565582 return H_TOO_HARD;
566583 vcpu->arch.regs.gpr[5] = get_tb();
567
- if (xive_enabled()) {
584
+ if (xics_on_xive()) {
568585 if (is_rm())
569586 return xive_rm_h_xirr(vcpu);
570587 if (unlikely(!__xive_vm_h_xirr))
....@@ -578,7 +595,7 @@
578595 {
579596 if (!kvmppc_xics_enabled(vcpu))
580597 return H_TOO_HARD;
581
- if (xive_enabled()) {
598
+ if (xics_on_xive()) {
582599 if (is_rm())
583600 return xive_rm_h_ipoll(vcpu, server);
584601 if (unlikely(!__xive_vm_h_ipoll))
....@@ -593,7 +610,7 @@
593610 {
594611 if (!kvmppc_xics_enabled(vcpu))
595612 return H_TOO_HARD;
596
- if (xive_enabled()) {
613
+ if (xics_on_xive()) {
597614 if (is_rm())
598615 return xive_rm_h_ipi(vcpu, server, mfrr);
599616 if (unlikely(!__xive_vm_h_ipi))
....@@ -607,7 +624,7 @@
607624 {
608625 if (!kvmppc_xics_enabled(vcpu))
609626 return H_TOO_HARD;
610
- if (xive_enabled()) {
627
+ if (xics_on_xive()) {
611628 if (is_rm())
612629 return xive_rm_h_cppr(vcpu, cppr);
613630 if (unlikely(!__xive_vm_h_cppr))
....@@ -621,7 +638,7 @@
621638 {
622639 if (!kvmppc_xics_enabled(vcpu))
623640 return H_TOO_HARD;
624
- if (xive_enabled()) {
641
+ if (xics_on_xive()) {
625642 if (is_rm())
626643 return xive_rm_h_eoi(vcpu, xirr);
627644 if (unlikely(!__xive_vm_h_eoi))
....@@ -729,3 +746,168 @@
729746 smp_mb();
730747 local_paca->kvm_hstate.kvm_split_mode = NULL;
731748 }
749
+
750
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
751
+{
752
+ vcpu->arch.ceded = 0;
753
+ if (vcpu->arch.timer_running) {
754
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
755
+ vcpu->arch.timer_running = 0;
756
+ }
757
+}
758
+
759
+void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
760
+{
761
+ /*
762
+ * Check for illegal transactional state bit combination
763
+ * and if we find it, force the TS field to a safe state.
764
+ */
765
+ if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
766
+ msr &= ~MSR_TS_MASK;
767
+ vcpu->arch.shregs.msr = msr;
768
+ kvmppc_end_cede(vcpu);
769
+}
770
+EXPORT_SYMBOL_GPL(kvmppc_set_msr_hv);
771
+
772
+static void inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
773
+{
774
+ unsigned long msr, pc, new_msr, new_pc;
775
+
776
+ msr = kvmppc_get_msr(vcpu);
777
+ pc = kvmppc_get_pc(vcpu);
778
+ new_msr = vcpu->arch.intr_msr;
779
+ new_pc = vec;
780
+
781
+ /* If transactional, change to suspend mode on IRQ delivery */
782
+ if (MSR_TM_TRANSACTIONAL(msr))
783
+ new_msr |= MSR_TS_S;
784
+ else
785
+ new_msr |= msr & MSR_TS_MASK;
786
+
787
+ /*
788
+ * Perform MSR and PC adjustment for LPCR[AIL]=3 if it is set and
789
+ * applicable. AIL=2 is not supported.
790
+ *
791
+ * AIL does not apply to SRESET, MCE, or HMI (which is never
792
+ * delivered to the guest), and does not apply if IR=0 or DR=0.
793
+ */
794
+ if (vec != BOOK3S_INTERRUPT_SYSTEM_RESET &&
795
+ vec != BOOK3S_INTERRUPT_MACHINE_CHECK &&
796
+ (vcpu->arch.vcore->lpcr & LPCR_AIL) == LPCR_AIL_3 &&
797
+ (msr & (MSR_IR|MSR_DR)) == (MSR_IR|MSR_DR) ) {
798
+ new_msr |= MSR_IR | MSR_DR;
799
+ new_pc += 0xC000000000004000ULL;
800
+ }
801
+
802
+ kvmppc_set_srr0(vcpu, pc);
803
+ kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
804
+ kvmppc_set_pc(vcpu, new_pc);
805
+ vcpu->arch.shregs.msr = new_msr;
806
+}
807
+
808
+void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
809
+{
810
+ inject_interrupt(vcpu, vec, srr1_flags);
811
+ kvmppc_end_cede(vcpu);
812
+}
813
+EXPORT_SYMBOL_GPL(kvmppc_inject_interrupt_hv);
814
+
815
+/*
816
+ * Is there a PRIV_DOORBELL pending for the guest (on POWER9)?
817
+ * Can we inject a Decrementer or a External interrupt?
818
+ */
819
+void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
820
+{
821
+ int ext;
822
+ unsigned long lpcr;
823
+
824
+ /* Insert EXTERNAL bit into LPCR at the MER bit position */
825
+ ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1;
826
+ lpcr = mfspr(SPRN_LPCR);
827
+ lpcr |= ext << LPCR_MER_SH;
828
+ mtspr(SPRN_LPCR, lpcr);
829
+ isync();
830
+
831
+ if (vcpu->arch.shregs.msr & MSR_EE) {
832
+ if (ext) {
833
+ inject_interrupt(vcpu, BOOK3S_INTERRUPT_EXTERNAL, 0);
834
+ } else {
835
+ long int dec = mfspr(SPRN_DEC);
836
+ if (!(lpcr & LPCR_LD))
837
+ dec = (int) dec;
838
+ if (dec < 0)
839
+ inject_interrupt(vcpu,
840
+ BOOK3S_INTERRUPT_DECREMENTER, 0);
841
+ }
842
+ }
843
+
844
+ if (vcpu->arch.doorbell_request) {
845
+ mtspr(SPRN_DPDES, 1);
846
+ vcpu->arch.vcore->dpdes = 1;
847
+ smp_wmb();
848
+ vcpu->arch.doorbell_request = 0;
849
+ }
850
+}
851
+
852
+static void flush_guest_tlb(struct kvm *kvm)
853
+{
854
+ unsigned long rb, set;
855
+
856
+ rb = PPC_BIT(52); /* IS = 2 */
857
+ if (kvm_is_radix(kvm)) {
858
+ /* R=1 PRS=1 RIC=2 */
859
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
860
+ : : "r" (rb), "i" (1), "i" (1), "i" (2),
861
+ "r" (0) : "memory");
862
+ for (set = 1; set < kvm->arch.tlb_sets; ++set) {
863
+ rb += PPC_BIT(51); /* increment set number */
864
+ /* R=1 PRS=1 RIC=0 */
865
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
866
+ : : "r" (rb), "i" (1), "i" (1), "i" (0),
867
+ "r" (0) : "memory");
868
+ }
869
+ asm volatile("ptesync": : :"memory");
870
+ // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
871
+ asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory");
872
+ } else {
873
+ for (set = 0; set < kvm->arch.tlb_sets; ++set) {
874
+ /* R=0 PRS=0 RIC=0 */
875
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
876
+ : : "r" (rb), "i" (0), "i" (0), "i" (0),
877
+ "r" (0) : "memory");
878
+ rb += PPC_BIT(51); /* increment set number */
879
+ }
880
+ asm volatile("ptesync": : :"memory");
881
+ // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
882
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
883
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
884
+ }
885
+}
886
+
887
+void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
888
+ struct kvm_nested_guest *nested)
889
+{
890
+ cpumask_t *need_tlb_flush;
891
+
892
+ /*
893
+ * On POWER9, individual threads can come in here, but the
894
+ * TLB is shared between the 4 threads in a core, hence
895
+ * invalidating on one thread invalidates for all.
896
+ * Thus we make all 4 threads use the same bit.
897
+ */
898
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
899
+ pcpu = cpu_first_tlb_thread_sibling(pcpu);
900
+
901
+ if (nested)
902
+ need_tlb_flush = &nested->need_tlb_flush;
903
+ else
904
+ need_tlb_flush = &kvm->arch.need_tlb_flush;
905
+
906
+ if (cpumask_test_cpu(pcpu, need_tlb_flush)) {
907
+ flush_guest_tlb(kvm);
908
+
909
+ /* Clear the bit after the TLB flush */
910
+ cpumask_clear_cpu(pcpu, need_tlb_flush);
911
+ }
912
+}
913
+EXPORT_SYMBOL_GPL(kvmppc_check_need_tlb_flush);