From 50a212ec906f7524620675f0c57357691c26c81f Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Wed, 16 Oct 2024 01:20:19 +0000
Subject: [PATCH] 修改GPIO导出默认初始值
---
kernel/arch/powerpc/kvm/book3s_64_vio_hv.c | 271 +++++++++++++++++++++++++++++++----------------------
1 files changed, 159 insertions(+), 112 deletions(-)
diff --git a/kernel/arch/powerpc/kvm/book3s_64_vio_hv.c b/kernel/arch/powerpc/kvm/book3s_64_vio_hv.c
index d258ed4..57af53a 100644
--- a/kernel/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/kernel/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -1,16 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
* Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
@@ -35,7 +24,6 @@
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
-#include <asm/kvm_host.h>
#include <asm/udbg.h>
#include <asm/iommu.h>
#include <asm/tce.h>
@@ -44,7 +32,7 @@
#ifdef CONFIG_BUG
#define WARN_ON_ONCE_RM(condition) ({ \
- static bool __section(.data.unlikely) __warned; \
+ static bool __section(".data.unlikely") __warned; \
int __ret_warn_once = !!(condition); \
\
if (unlikely(__ret_warn_once && !__warned)) { \
@@ -66,8 +54,6 @@
#endif
-#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
-
/*
* Finds a TCE table descriptor by LIOBN.
*
@@ -87,6 +73,23 @@
}
EXPORT_SYMBOL_GPL(kvmppc_find_table);
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static long kvmppc_rm_tce_to_ua(struct kvm *kvm,
+ unsigned long tce, unsigned long *ua)
+{
+ unsigned long gfn = tce >> PAGE_SHIFT;
+ struct kvm_memory_slot *memslot;
+
+ memslot = search_memslots(kvm_memslots_raw(kvm), gfn);
+ if (!memslot)
+ return -EINVAL;
+
+ *ua = __gfn_to_hva_memslot(memslot, gfn) |
+ (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
+
+ return 0;
+}
+
/*
* Validates TCE address.
* At the moment flags and page mask are validated.
@@ -94,14 +97,14 @@
* to the table and user space is supposed to process them), we can skip
* checking other things (such as TCE is a guest RAM address or the page
* was actually allocated).
- *
- * WARNING: This will be called in real-mode on HV KVM and virtual
- * mode on PR KVM
*/
-long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
+static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
+ unsigned long tce)
{
unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
enum dma_data_direction dir = iommu_tce_direction(tce);
+ struct kvmppc_spapr_tce_iommu_table *stit;
+ unsigned long ua = 0;
/* Allow userspace to poison TCE table */
if (dir == DMA_NONE)
@@ -110,9 +113,24 @@
if (iommu_tce_check_gpa(stt->page_shift, gpa))
return H_PARAMETER;
+ if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua))
+ return H_TOO_HARD;
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ unsigned long hpa = 0;
+ struct mm_iommu_table_group_mem_t *mem;
+ long shift = stit->tbl->it_page_shift;
+
+ mem = mm_iommu_lookup_rm(stt->kvm->mm, ua, 1ULL << shift);
+ if (!mem)
+ return H_TOO_HARD;
+
+ if (mm_iommu_ua_to_hpa_rm(mem, ua, shift, &hpa))
+ return H_TOO_HARD;
+ }
+
return H_SUCCESS;
}
-EXPORT_SYMBOL_GPL(kvmppc_tce_validate);
/* Note on the use of page_address() in real mode,
*
@@ -144,13 +162,9 @@
/*
* Handles TCE requests for emulated devices.
* Puts guest TCE values to the table and expects user space to convert them.
- * Called in both real and virtual modes.
- * Cannot fail so kvmppc_tce_validate must be called before it.
- *
- * WARNING: This will be called in real-mode on HV KVM and virtual
- * mode on PR KVM
+ * Cannot fail so kvmppc_rm_tce_validate must be called before it.
*/
-void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
+static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt,
unsigned long idx, unsigned long tce)
{
struct page *page;
@@ -158,46 +172,63 @@
idx -= stt->offset;
page = stt->pages[idx / TCES_PER_PAGE];
+ /*
+ * kvmppc_rm_ioba_validate() allows pages not be allocated if TCE is
+ * being cleared, otherwise it returns H_TOO_HARD and we skip this.
+ */
+ if (!page) {
+ WARN_ON_ONCE_RM(tce != 0);
+ return;
+ }
tbl = kvmppc_page_address(page);
tbl[idx % TCES_PER_PAGE] = tce;
}
-EXPORT_SYMBOL_GPL(kvmppc_tce_put);
-long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
- unsigned long *ua, unsigned long **prmap)
+/*
+ * TCEs pages are allocated in kvmppc_rm_tce_put() which won't be able to do so
+ * in real mode.
+ * Check if kvmppc_rm_tce_put() can succeed in real mode, i.e. a TCEs page is
+ * allocated or not required (when clearing a tce entry).
+ */
+static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt,
+ unsigned long ioba, unsigned long npages, bool clearing)
{
- unsigned long gfn = gpa >> PAGE_SHIFT;
- struct kvm_memory_slot *memslot;
+ unsigned long i, idx, sttpage, sttpages;
+ unsigned long ret = kvmppc_ioba_validate(stt, ioba, npages);
- memslot = search_memslots(kvm_memslots(kvm), gfn);
- if (!memslot)
- return -EINVAL;
+ if (ret)
+ return ret;
+ /*
+ * clearing==true says kvmppc_rm_tce_put won't be allocating pages
+ * for empty tces.
+ */
+ if (clearing)
+ return H_SUCCESS;
- *ua = __gfn_to_hva_memslot(memslot, gfn) |
- (gpa & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
+ idx = (ioba >> stt->page_shift) - stt->offset;
+ sttpage = idx / TCES_PER_PAGE;
+ sttpages = ALIGN(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) /
+ TCES_PER_PAGE;
+ for (i = sttpage; i < sttpage + sttpages; ++i)
+ if (!stt->pages[i])
+ return H_TOO_HARD;
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- if (prmap)
- *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
-#endif
-
- return 0;
+ return H_SUCCESS;
}
-EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
+static long iommu_tce_xchg_no_kill_rm(struct mm_struct *mm,
+ struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
{
long ret;
- ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
+ ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, true);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL))) {
- __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
+ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
/*
* kvmppc_rm_tce_iommu_do_map() updates the UA cache after
* calling this so we still get here a valid UA.
@@ -209,13 +240,26 @@
return ret;
}
-static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl,
- unsigned long entry)
+static void iommu_tce_kill_rm(struct iommu_table *tbl,
+ unsigned long entry, unsigned long pages)
{
- unsigned long hpa = 0;
- enum dma_data_direction dir = DMA_NONE;
+ if (tbl->it_ops->tce_kill)
+ tbl->it_ops->tce_kill(tbl, entry, pages, true);
+}
- iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
+static void kvmppc_rm_clear_tce(struct kvm *kvm, struct kvmppc_spapr_tce_table *stt,
+ struct iommu_table *tbl, unsigned long entry)
+{
+ unsigned long i;
+ unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
+ unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift);
+
+ for (i = 0; i < subpages; ++i) {
+ unsigned long hpa = 0;
+ enum dma_data_direction dir = DMA_NONE;
+
+ iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, io_entry + i, &hpa, &dir);
+ }
}
static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -223,7 +267,7 @@
{
struct mm_iommu_table_group_mem_t *mem = NULL;
const unsigned long pgsize = 1ULL << tbl->it_page_shift;
- __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
+ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
if (!pua)
/* it_userspace allocation might be delayed */
@@ -247,7 +291,7 @@
unsigned long hpa = 0;
long ret;
- if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir))
+ if (iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir))
/*
* real mode xchg can fail if struct page crosses
* a page boundary
@@ -259,7 +303,7 @@
ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
if (ret)
- iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
+ iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir);
return ret;
}
@@ -278,6 +322,8 @@
break;
}
+ iommu_tce_kill_rm(tbl, io_entry, subpages);
+
return ret;
}
@@ -287,7 +333,7 @@
{
long ret;
unsigned long hpa = 0;
- __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
+ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
struct mm_iommu_table_group_mem_t *mem;
if (!pua)
@@ -305,7 +351,7 @@
if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
return H_TOO_HARD;
- ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
+ ret = iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir);
if (ret) {
mm_iommu_mapped_dec(mem);
/*
@@ -341,6 +387,8 @@
break;
}
+ iommu_tce_kill_rm(tbl, io_entry, subpages);
+
return ret;
}
@@ -364,17 +412,16 @@
if (!stt)
return H_TOO_HARD;
- ret = kvmppc_ioba_validate(stt, ioba, 1);
+ ret = kvmppc_rm_ioba_validate(stt, ioba, 1, tce == 0);
if (ret != H_SUCCESS)
return ret;
- ret = kvmppc_tce_validate(stt, tce);
+ ret = kvmppc_rm_tce_validate(stt, tce);
if (ret != H_SUCCESS)
return ret;
dir = iommu_tce_direction(tce);
- if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm,
- tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL))
+ if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua))
return H_PARAMETER;
entry = ioba >> stt->page_shift;
@@ -387,23 +434,19 @@
ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
stit->tbl, entry, ua, dir);
- if (ret == H_SUCCESS)
- continue;
-
- if (ret == H_TOO_HARD)
+ if (ret != H_SUCCESS) {
+ kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry);
return ret;
-
- WARN_ON_ONCE_RM(1);
- kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+ }
}
- kvmppc_tce_put(stt, entry, tce);
+ kvmppc_rm_tce_put(stt, entry, tce);
return H_SUCCESS;
}
-static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
- unsigned long ua, unsigned long *phpa)
+static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
+ unsigned long ua, unsigned long *phpa)
{
pte_t *ptep, pte;
unsigned shift = 0;
@@ -417,10 +460,17 @@
* to exit which will agains result in the below page table walk
* to finish.
*/
- ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift);
- if (!ptep || !pte_present(*ptep))
+ /* an rmap lock won't make it safe. because that just ensure hash
+ * page table entries are removed with rmap lock held. After that
+ * mmu notifier returns and we go ahead and removing ptes from Qemu page table.
+ */
+ ptep = find_kvm_host_pte(vcpu->kvm, mmu_seq, ua, &shift);
+ if (!ptep)
return -ENXIO;
- pte = *ptep;
+
+ pte = READ_ONCE(*ptep);
+ if (!pte_present(pte))
+ return -ENXIO;
if (!shift)
shift = PAGE_SHIFT;
@@ -442,16 +492,23 @@
unsigned long liobn, unsigned long ioba,
unsigned long tce_list, unsigned long npages)
{
+ struct kvm *kvm = vcpu->kvm;
struct kvmppc_spapr_tce_table *stt;
long i, ret = H_SUCCESS;
unsigned long tces, entry, ua = 0;
- unsigned long *rmap = NULL;
+ unsigned long mmu_seq;
bool prereg = false;
struct kvmppc_spapr_tce_iommu_table *stit;
/* For radix, we might be in virtual mode, so punt */
if (kvm_is_radix(vcpu->kvm))
return H_TOO_HARD;
+
+ /*
+ * used to check for invalidations in progress
+ */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
@@ -468,7 +525,7 @@
if (tce_list & (SZ_4K - 1))
return H_PARAMETER;
- ret = kvmppc_ioba_validate(stt, ioba, npages);
+ ret = kvmppc_rm_ioba_validate(stt, ioba, npages, false);
if (ret != H_SUCCESS)
return ret;
@@ -480,7 +537,7 @@
*/
struct mm_iommu_table_group_mem_t *mem;
- if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL))
+ if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua))
return H_TOO_HARD;
mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
@@ -496,23 +553,11 @@
* We do not require memory to be preregistered in this case
* so lock rmap and do __find_linux_pte_or_hugepte().
*/
- if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
+ if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua))
return H_TOO_HARD;
- rmap = (void *) vmalloc_to_phys(rmap);
- if (WARN_ON_ONCE_RM(!rmap))
- return H_TOO_HARD;
-
- /*
- * Synchronize with the MMU notifier callbacks in
- * book3s_64_mmu_hv.c (kvm_unmap_hva_range_hv etc.).
- * While we have the rmap lock, code running on other CPUs
- * cannot finish unmapping the host real page that backs
- * this guest real page, so we are OK to access the host
- * real page.
- */
- lock_rmap(rmap);
- if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
+ arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
+ if (kvmppc_rm_ua_to_hpa(vcpu, mmu_seq, ua, &tces)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
@@ -521,14 +566,16 @@
for (i = 0; i < npages; ++i) {
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
- ret = kvmppc_tce_validate(stt, tce);
+ ret = kvmppc_rm_tce_validate(stt, tce);
if (ret != H_SUCCESS)
goto unlock_exit;
+ }
+
+ for (i = 0; i < npages; ++i) {
+ unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
ua = 0;
- if (kvmppc_gpa_to_ua(vcpu->kvm,
- tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
- &ua, NULL)) {
+ if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua)) {
ret = H_PARAMETER;
goto unlock_exit;
}
@@ -538,23 +585,19 @@
stit->tbl, entry + i, ua,
iommu_tce_direction(tce));
- if (ret == H_SUCCESS)
- continue;
-
- if (ret == H_TOO_HARD)
+ if (ret != H_SUCCESS) {
+ kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl,
+ entry + i);
goto unlock_exit;
-
- WARN_ON_ONCE_RM(1);
- kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+ }
}
- kvmppc_tce_put(stt, entry + i, tce);
+ kvmppc_rm_tce_put(stt, entry + i, tce);
}
unlock_exit:
- if (rmap)
- unlock_rmap(rmap);
-
+ if (!prereg)
+ arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
return ret;
}
@@ -574,7 +617,7 @@
if (!stt)
return H_TOO_HARD;
- ret = kvmppc_ioba_validate(stt, ioba, npages);
+ ret = kvmppc_rm_ioba_validate(stt, ioba, npages, tce_value == 0);
if (ret != H_SUCCESS)
return ret;
@@ -596,14 +639,14 @@
return ret;
WARN_ON_ONCE_RM(1);
- kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+ kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry + i);
}
}
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
- kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
+ kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value);
- return H_SUCCESS;
+ return ret;
}
/* This can be called in either virtual mode or real mode */
@@ -626,6 +669,10 @@
idx = (ioba >> stt->page_shift) - stt->offset;
page = stt->pages[idx / TCES_PER_PAGE];
+ if (!page) {
+ vcpu->arch.regs.gpr[4] = 0;
+ return H_SUCCESS;
+ }
tbl = (u64 *)page_address(page);
vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE];
--
Gitblit v1.6.2