From 04dd17822334871b23ea2862f7798fb0e0007777 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Sat, 11 May 2024 08:53:19 +0000 Subject: [PATCH] change otg to host mode --- kernel/arch/powerpc/kvm/book3s_64_vio_hv.c | 271 +++++++++++++++++++++++++++++++---------------------- 1 files changed, 159 insertions(+), 112 deletions(-) diff --git a/kernel/arch/powerpc/kvm/book3s_64_vio_hv.c b/kernel/arch/powerpc/kvm/book3s_64_vio_hv.c index d258ed4..57af53a 100644 --- a/kernel/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/kernel/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -1,16 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> @@ -35,7 +24,6 @@ #include <asm/hvcall.h> #include <asm/synch.h> #include <asm/ppc-opcode.h> -#include <asm/kvm_host.h> #include <asm/udbg.h> #include <asm/iommu.h> #include <asm/tce.h> @@ -44,7 +32,7 @@ #ifdef CONFIG_BUG #define WARN_ON_ONCE_RM(condition) ({ \ - static bool __section(.data.unlikely) __warned; \ + static bool __section(".data.unlikely") __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ @@ -66,8 +54,6 @@ #endif -#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) - /* * Finds a TCE table descriptor by LIOBN. * @@ -87,6 +73,23 @@ } EXPORT_SYMBOL_GPL(kvmppc_find_table); +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +static long kvmppc_rm_tce_to_ua(struct kvm *kvm, + unsigned long tce, unsigned long *ua) +{ + unsigned long gfn = tce >> PAGE_SHIFT; + struct kvm_memory_slot *memslot; + + memslot = search_memslots(kvm_memslots_raw(kvm), gfn); + if (!memslot) + return -EINVAL; + + *ua = __gfn_to_hva_memslot(memslot, gfn) | + (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); + + return 0; +} + /* * Validates TCE address. * At the moment flags and page mask are validated. @@ -94,14 +97,14 @@ * to the table and user space is supposed to process them), we can skip * checking other things (such as TCE is a guest RAM address or the page * was actually allocated). - * - * WARNING: This will be called in real-mode on HV KVM and virtual - * mode on PR KVM */ -long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce) +static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt, + unsigned long tce) { unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE); enum dma_data_direction dir = iommu_tce_direction(tce); + struct kvmppc_spapr_tce_iommu_table *stit; + unsigned long ua = 0; /* Allow userspace to poison TCE table */ if (dir == DMA_NONE) @@ -110,9 +113,24 @@ if (iommu_tce_check_gpa(stt->page_shift, gpa)) return H_PARAMETER; + if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua)) + return H_TOO_HARD; + + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { + unsigned long hpa = 0; + struct mm_iommu_table_group_mem_t *mem; + long shift = stit->tbl->it_page_shift; + + mem = mm_iommu_lookup_rm(stt->kvm->mm, ua, 1ULL << shift); + if (!mem) + return H_TOO_HARD; + + if (mm_iommu_ua_to_hpa_rm(mem, ua, shift, &hpa)) + return H_TOO_HARD; + } + return H_SUCCESS; } -EXPORT_SYMBOL_GPL(kvmppc_tce_validate); /* Note on the use of page_address() in real mode, * @@ -144,13 +162,9 @@ /* * Handles TCE requests for emulated devices. * Puts guest TCE values to the table and expects user space to convert them. - * Called in both real and virtual modes. - * Cannot fail so kvmppc_tce_validate must be called before it. - * - * WARNING: This will be called in real-mode on HV KVM and virtual - * mode on PR KVM + * Cannot fail so kvmppc_rm_tce_validate must be called before it. */ -void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, +static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt, unsigned long idx, unsigned long tce) { struct page *page; @@ -158,46 +172,63 @@ idx -= stt->offset; page = stt->pages[idx / TCES_PER_PAGE]; + /* + * kvmppc_rm_ioba_validate() allows pages not be allocated if TCE is + * being cleared, otherwise it returns H_TOO_HARD and we skip this. + */ + if (!page) { + WARN_ON_ONCE_RM(tce != 0); + return; + } tbl = kvmppc_page_address(page); tbl[idx % TCES_PER_PAGE] = tce; } -EXPORT_SYMBOL_GPL(kvmppc_tce_put); -long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, - unsigned long *ua, unsigned long **prmap) +/* + * TCEs pages are allocated in kvmppc_rm_tce_put() which won't be able to do so + * in real mode. + * Check if kvmppc_rm_tce_put() can succeed in real mode, i.e. a TCEs page is + * allocated or not required (when clearing a tce entry). + */ +static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt, + unsigned long ioba, unsigned long npages, bool clearing) { - unsigned long gfn = gpa >> PAGE_SHIFT; - struct kvm_memory_slot *memslot; + unsigned long i, idx, sttpage, sttpages; + unsigned long ret = kvmppc_ioba_validate(stt, ioba, npages); - memslot = search_memslots(kvm_memslots(kvm), gfn); - if (!memslot) - return -EINVAL; + if (ret) + return ret; + /* + * clearing==true says kvmppc_rm_tce_put won't be allocating pages + * for empty tces. + */ + if (clearing) + return H_SUCCESS; - *ua = __gfn_to_hva_memslot(memslot, gfn) | - (gpa & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); + idx = (ioba >> stt->page_shift) - stt->offset; + sttpage = idx / TCES_PER_PAGE; + sttpages = ALIGN(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) / + TCES_PER_PAGE; + for (i = sttpage; i < sttpage + sttpages; ++i) + if (!stt->pages[i]) + return H_TOO_HARD; -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - if (prmap) - *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; -#endif - - return 0; + return H_SUCCESS; } -EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua); -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl, +static long iommu_tce_xchg_no_kill_rm(struct mm_struct *mm, + struct iommu_table *tbl, unsigned long entry, unsigned long *hpa, enum dma_data_direction *direction) { long ret; - ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); + ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, true); if (!ret && ((*direction == DMA_FROM_DEVICE) || (*direction == DMA_BIDIRECTIONAL))) { - __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry); + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry); /* * kvmppc_rm_tce_iommu_do_map() updates the UA cache after * calling this so we still get here a valid UA. @@ -209,13 +240,26 @@ return ret; } -static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl, - unsigned long entry) +static void iommu_tce_kill_rm(struct iommu_table *tbl, + unsigned long entry, unsigned long pages) { - unsigned long hpa = 0; - enum dma_data_direction dir = DMA_NONE; + if (tbl->it_ops->tce_kill) + tbl->it_ops->tce_kill(tbl, entry, pages, true); +} - iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); +static void kvmppc_rm_clear_tce(struct kvm *kvm, struct kvmppc_spapr_tce_table *stt, + struct iommu_table *tbl, unsigned long entry) +{ + unsigned long i; + unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); + unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift); + + for (i = 0; i < subpages; ++i) { + unsigned long hpa = 0; + enum dma_data_direction dir = DMA_NONE; + + iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, io_entry + i, &hpa, &dir); + } } static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, @@ -223,7 +267,7 @@ { struct mm_iommu_table_group_mem_t *mem = NULL; const unsigned long pgsize = 1ULL << tbl->it_page_shift; - __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry); + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry); if (!pua) /* it_userspace allocation might be delayed */ @@ -247,7 +291,7 @@ unsigned long hpa = 0; long ret; - if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir)) + if (iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir)) /* * real mode xchg can fail if struct page crosses * a page boundary @@ -259,7 +303,7 @@ ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry); if (ret) - iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); + iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir); return ret; } @@ -278,6 +322,8 @@ break; } + iommu_tce_kill_rm(tbl, io_entry, subpages); + return ret; } @@ -287,7 +333,7 @@ { long ret; unsigned long hpa = 0; - __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry); + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry); struct mm_iommu_table_group_mem_t *mem; if (!pua) @@ -305,7 +351,7 @@ if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem))) return H_TOO_HARD; - ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); + ret = iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir); if (ret) { mm_iommu_mapped_dec(mem); /* @@ -341,6 +387,8 @@ break; } + iommu_tce_kill_rm(tbl, io_entry, subpages); + return ret; } @@ -364,17 +412,16 @@ if (!stt) return H_TOO_HARD; - ret = kvmppc_ioba_validate(stt, ioba, 1); + ret = kvmppc_rm_ioba_validate(stt, ioba, 1, tce == 0); if (ret != H_SUCCESS) return ret; - ret = kvmppc_tce_validate(stt, tce); + ret = kvmppc_rm_tce_validate(stt, tce); if (ret != H_SUCCESS) return ret; dir = iommu_tce_direction(tce); - if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, - tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) + if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua)) return H_PARAMETER; entry = ioba >> stt->page_shift; @@ -387,23 +434,19 @@ ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry, ua, dir); - if (ret == H_SUCCESS) - continue; - - if (ret == H_TOO_HARD) + if (ret != H_SUCCESS) { + kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry); return ret; - - WARN_ON_ONCE_RM(1); - kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); + } } - kvmppc_tce_put(stt, entry, tce); + kvmppc_rm_tce_put(stt, entry, tce); return H_SUCCESS; } -static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, - unsigned long ua, unsigned long *phpa) +static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq, + unsigned long ua, unsigned long *phpa) { pte_t *ptep, pte; unsigned shift = 0; @@ -417,10 +460,17 @@ * to exit which will agains result in the below page table walk * to finish. */ - ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift); - if (!ptep || !pte_present(*ptep)) + /* an rmap lock won't make it safe. because that just ensure hash + * page table entries are removed with rmap lock held. After that + * mmu notifier returns and we go ahead and removing ptes from Qemu page table. + */ + ptep = find_kvm_host_pte(vcpu->kvm, mmu_seq, ua, &shift); + if (!ptep) return -ENXIO; - pte = *ptep; + + pte = READ_ONCE(*ptep); + if (!pte_present(pte)) + return -ENXIO; if (!shift) shift = PAGE_SHIFT; @@ -442,16 +492,23 @@ unsigned long liobn, unsigned long ioba, unsigned long tce_list, unsigned long npages) { + struct kvm *kvm = vcpu->kvm; struct kvmppc_spapr_tce_table *stt; long i, ret = H_SUCCESS; unsigned long tces, entry, ua = 0; - unsigned long *rmap = NULL; + unsigned long mmu_seq; bool prereg = false; struct kvmppc_spapr_tce_iommu_table *stit; /* For radix, we might be in virtual mode, so punt */ if (kvm_is_radix(vcpu->kvm)) return H_TOO_HARD; + + /* + * used to check for invalidations in progress + */ + mmu_seq = kvm->mmu_notifier_seq; + smp_rmb(); stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) @@ -468,7 +525,7 @@ if (tce_list & (SZ_4K - 1)) return H_PARAMETER; - ret = kvmppc_ioba_validate(stt, ioba, npages); + ret = kvmppc_rm_ioba_validate(stt, ioba, npages, false); if (ret != H_SUCCESS) return ret; @@ -480,7 +537,7 @@ */ struct mm_iommu_table_group_mem_t *mem; - if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua)) return H_TOO_HARD; mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); @@ -496,23 +553,11 @@ * We do not require memory to be preregistered in this case * so lock rmap and do __find_linux_pte_or_hugepte(). */ - if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua)) return H_TOO_HARD; - rmap = (void *) vmalloc_to_phys(rmap); - if (WARN_ON_ONCE_RM(!rmap)) - return H_TOO_HARD; - - /* - * Synchronize with the MMU notifier callbacks in - * book3s_64_mmu_hv.c (kvm_unmap_hva_range_hv etc.). - * While we have the rmap lock, code running on other CPUs - * cannot finish unmapping the host real page that backs - * this guest real page, so we are OK to access the host - * real page. - */ - lock_rmap(rmap); - if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) { + arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); + if (kvmppc_rm_ua_to_hpa(vcpu, mmu_seq, ua, &tces)) { ret = H_TOO_HARD; goto unlock_exit; } @@ -521,14 +566,16 @@ for (i = 0; i < npages; ++i) { unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); - ret = kvmppc_tce_validate(stt, tce); + ret = kvmppc_rm_tce_validate(stt, tce); if (ret != H_SUCCESS) goto unlock_exit; + } + + for (i = 0; i < npages; ++i) { + unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); ua = 0; - if (kvmppc_gpa_to_ua(vcpu->kvm, - tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), - &ua, NULL)) { + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua)) { ret = H_PARAMETER; goto unlock_exit; } @@ -538,23 +585,19 @@ stit->tbl, entry + i, ua, iommu_tce_direction(tce)); - if (ret == H_SUCCESS) - continue; - - if (ret == H_TOO_HARD) + if (ret != H_SUCCESS) { + kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, + entry + i); goto unlock_exit; - - WARN_ON_ONCE_RM(1); - kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); + } } - kvmppc_tce_put(stt, entry + i, tce); + kvmppc_rm_tce_put(stt, entry + i, tce); } unlock_exit: - if (rmap) - unlock_rmap(rmap); - + if (!prereg) + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); return ret; } @@ -574,7 +617,7 @@ if (!stt) return H_TOO_HARD; - ret = kvmppc_ioba_validate(stt, ioba, npages); + ret = kvmppc_rm_ioba_validate(stt, ioba, npages, tce_value == 0); if (ret != H_SUCCESS) return ret; @@ -596,14 +639,14 @@ return ret; WARN_ON_ONCE_RM(1); - kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); + kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry + i); } } for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) - kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); + kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value); - return H_SUCCESS; + return ret; } /* This can be called in either virtual mode or real mode */ @@ -626,6 +669,10 @@ idx = (ioba >> stt->page_shift) - stt->offset; page = stt->pages[idx / TCES_PER_PAGE]; + if (!page) { + vcpu->arch.regs.gpr[4] = 0; + return H_SUCCESS; + } tbl = (u64 *)page_address(page); vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE]; -- Gitblit v1.6.2