From 04dd17822334871b23ea2862f7798fb0e0007777 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Sat, 11 May 2024 08:53:19 +0000 Subject: [PATCH] change otg to host mode --- kernel/arch/powerpc/kvm/book3s_64_mmu_hv.c | 255 ++++++++++++++++++++++++-------------------------- 1 files changed, 124 insertions(+), 131 deletions(-) diff --git a/kernel/arch/powerpc/kvm/book3s_64_mmu_hv.c b/kernel/arch/powerpc/kvm/book3s_64_mmu_hv.c index a488c10..38ea396 100644 --- a/kernel/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/kernel/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1,16 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> */ @@ -63,7 +52,7 @@ struct work_struct work; u32 order; - /* These fields protected by kvm->lock */ + /* These fields protected by kvm->arch.mmu_setup_lock */ /* Possible values and their usage: * <0 an error occurred during allocation, @@ -73,7 +62,7 @@ int error; /* Private to the work thread, until error != -EBUSY, - * then protected by kvm->lock. + * then protected by kvm->arch.mmu_setup_lock. */ struct kvm_hpt_info hpt; }; @@ -139,7 +128,7 @@ long err = -EBUSY; struct kvm_hpt_info info; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); if (kvm->arch.mmu_ready) { kvm->arch.mmu_ready = 0; /* order mmu_ready vs. vcpus_running */ @@ -183,7 +172,7 @@ /* Ensure that each vcpu will flush its TLB on next entry. */ cpumask_setall(&kvm->arch.need_tlb_flush); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); return err; } @@ -268,15 +257,18 @@ { unsigned long host_lpid, rsvd_lpid; - if (!cpu_has_feature(CPU_FTR_HVMODE)) - return -EINVAL; - if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE)) return -EINVAL; - /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */ - host_lpid = mfspr(SPRN_LPID); - rsvd_lpid = LPID_RSVD; + host_lpid = 0; + if (cpu_has_feature(CPU_FTR_HVMODE)) + host_lpid = mfspr(SPRN_LPID); + + /* POWER8 and above have 12-bit LPIDs (10-bit in POWER7) */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + rsvd_lpid = LPID_RSVD; + else + rsvd_lpid = LPID_RSVD_POWER7; kvmppc_init_lpid(rsvd_lpid + 1); @@ -287,29 +279,16 @@ return 0; } -static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) -{ - unsigned long msr = vcpu->arch.intr_msr; - - /* If transactional, change to suspend mode on IRQ delivery */ - if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr)) - msr |= MSR_TS_S; - else - msr |= vcpu->arch.shregs.msr & MSR_TS_MASK; - kvmppc_set_msr(vcpu, msr); -} - static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, unsigned long *pte_idx_ret) { long ret; - /* Protect linux PTE lookup from page table destruction */ - rcu_read_lock_sched(); /* this disables preemption too */ + preempt_disable(); ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel, - current->mm->pgd, false, pte_idx_ret); - rcu_read_unlock_sched(); + kvm->mm->pgd, false, pte_idx_ret); + preempt_enable(); if (ret == H_TOO_HARD) { /* this can't happen */ pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); @@ -437,10 +416,28 @@ return (instr & mask) != 0; } -int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_hv_emulate_mmio(struct kvm_vcpu *vcpu, unsigned long gpa, gva_t ea, int is_store) { u32 last_inst; + + /* + * Fast path - check if the guest physical address corresponds to a + * device on the FAST_MMIO_BUS, if so we can avoid loading the + * instruction all together, then we can just handle it and return. + */ + if (is_store) { + int idx, ret; + + idx = srcu_read_lock(&vcpu->kvm->srcu); + ret = kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, (gpa_t) gpa, 0, + NULL); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (!ret) { + kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); + return RESUME_GUEST; + } + } /* * If we fail, we just return to the guest and try executing it again. @@ -479,10 +476,10 @@ vcpu->arch.paddr_accessed = gpa; vcpu->arch.vaddr_accessed = ea; - return kvmppc_emulate_mmio(run, vcpu); + return kvmppc_emulate_mmio(vcpu); } -int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr) { struct kvm *kvm = vcpu->kvm; @@ -491,20 +488,21 @@ __be64 *hptep; unsigned long mmu_seq, psize, pte_size; unsigned long gpa_base, gfn_base; - unsigned long gpa, gfn, hva, pfn; + unsigned long gpa, gfn, hva, pfn, hpa; struct kvm_memory_slot *memslot; unsigned long *rmap; struct revmap_entry *rev; - struct page *page, *pages[1]; - long index, ret, npages; + struct page *page; + long index, ret; bool is_ci; - unsigned int writing, write_ok; - struct vm_area_struct *vma; + bool writing, write_ok; + unsigned int shift; unsigned long rcbits; long mmio_update; + pte_t pte, *ptep; if (kvm_is_radix(kvm)) - return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr); + return kvmppc_book3s_radix_page_fault(vcpu, ea, dsisr); /* * Real-mode code has already searched the HPT and found the @@ -524,7 +522,7 @@ gpa_base = r & HPTE_R_RPN & ~(psize - 1); gfn_base = gpa_base >> PAGE_SHIFT; gpa = gpa_base | (ea & (psize - 1)); - return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, + return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, dsisr & DSISR_ISSTORE); } } @@ -560,7 +558,7 @@ /* No memslot means it's an emulated MMIO region */ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) - return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, + return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, dsisr & DSISR_ISSTORE); /* @@ -575,59 +573,63 @@ smp_rmb(); ret = -EFAULT; - is_ci = false; - pfn = 0; page = NULL; - pte_size = PAGE_SIZE; writing = (dsisr & DSISR_ISSTORE) != 0; /* If writing != 0, then the HPTE must allow writing, if we get here */ write_ok = writing; hva = gfn_to_hva_memslot(memslot, gfn); - npages = get_user_pages_fast(hva, 1, writing, pages); - if (npages < 1) { - /* Check if it's an I/O mapping */ - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, hva); - if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end && - (vma->vm_flags & VM_PFNMAP)) { - pfn = vma->vm_pgoff + - ((hva - vma->vm_start) >> PAGE_SHIFT); - pte_size = psize; - is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot)))); - write_ok = vma->vm_flags & VM_WRITE; - } - up_read(¤t->mm->mmap_sem); - if (!pfn) - goto out_put; + + /* + * Do a fast check first, since __gfn_to_pfn_memslot doesn't + * do it with !atomic && !async, which is how we call it. + * We always ask for write permission since the common case + * is that the page is writable. + */ + if (get_user_page_fast_only(hva, FOLL_WRITE, &page)) { + write_ok = true; } else { - page = pages[0]; - pfn = page_to_pfn(page); - if (PageHuge(page)) { - page = compound_head(page); - pte_size <<= compound_order(page); - } - /* if the guest wants write access, see if that is OK */ - if (!writing && hpte_is_writable(r)) { - pte_t *ptep, pte; - unsigned long flags; - /* - * We need to protect against page table destruction - * hugepage split and collapse. - */ - local_irq_save(flags); - ptep = find_current_mm_pte(current->mm->pgd, - hva, NULL, NULL); - if (ptep) { - pte = kvmppc_read_update_linux_pte(ptep, 1); - if (__pte_write(pte)) - write_ok = 1; - } - local_irq_restore(flags); + /* Call KVM generic code to do the slow-path check */ + pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, + writing, &write_ok); + if (is_error_noslot_pfn(pfn)) + return -EFAULT; + page = NULL; + if (pfn_valid(pfn)) { + page = pfn_to_page(pfn); + if (PageReserved(page)) + page = NULL; } } + /* + * Read the PTE from the process' radix tree and use that + * so we get the shift and attribute bits. + */ + spin_lock(&kvm->mmu_lock); + ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); + pte = __pte(0); + if (ptep) + pte = READ_ONCE(*ptep); + spin_unlock(&kvm->mmu_lock); + /* + * If the PTE disappeared temporarily due to a THP + * collapse, just return and let the guest try again. + */ + if (!pte_present(pte)) { + if (page) + put_page(page); + return RESUME_GUEST; + } + hpa = pte_pfn(pte) << PAGE_SHIFT; + pte_size = PAGE_SIZE; + if (shift) + pte_size = 1ul << shift; + is_ci = pte_ci(pte); + if (psize > pte_size) goto out_put; + if (pte_size > psize) + hpa |= hva & (pte_size - psize); /* Check WIMG vs. the actual page we're accessing */ if (!hpte_cache_flags_ok(r, is_ci)) { @@ -641,14 +643,13 @@ } /* - * Set the HPTE to point to pfn. - * Since the pfn is at PAGE_SIZE granularity, make sure we + * Set the HPTE to point to hpa. + * Since the hpa is at PAGE_SIZE granularity, make sure we * don't mask out lower-order bits if psize < PAGE_SIZE. */ if (psize < PAGE_SIZE) psize = PAGE_SIZE; - r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | - ((pfn << PAGE_SHIFT) & ~(psize - 1)); + r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | hpa; if (hpte_is_writable(r) && !write_ok) r = hpte_make_readonly(r); ret = RESUME_GUEST; @@ -713,20 +714,13 @@ asm volatile("ptesync" : : : "memory"); preempt_enable(); if (page && hpte_is_writable(r)) - SetPageDirty(page); + set_page_dirty_lock(page); out_put: trace_kvm_page_fault_exit(vcpu, hpte, ret); - if (page) { - /* - * We drop pages[0] here, not page because page might - * have been set to the head page of a compound, but - * we have to drop the reference on the correct tail - * page to match the get inside gup() - */ - put_page(pages[0]); - } + if (page) + put_page(page); return ret; out_unlock: @@ -900,11 +894,12 @@ gfn = memslot->base_gfn; rmapp = memslot->arch.rmap; + if (kvm_is_radix(kvm)) { + kvmppc_radix_flush_memslot(kvm, memslot); + return; + } + for (n = memslot->npages; n; --n, ++gfn) { - if (kvm_is_radix(kvm)) { - kvm_unmap_radix(kvm, memslot, gfn); - continue; - } /* * Testing the present bit without locking is OK because * the memslot has been marked invalid already, and hence @@ -1175,7 +1170,7 @@ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) goto err; hva = gfn_to_hva_memslot(memslot, gfn); - npages = get_user_pages_fast(hva, 1, 1, pages); + npages = get_user_pages_fast(hva, 1, FOLL_WRITE, pages); if (npages < 1) goto err; page = pages[0]; @@ -1429,7 +1424,7 @@ static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize) { - if (WARN_ON(!mutex_is_locked(&kvm->lock))) + if (WARN_ON(!mutex_is_locked(&kvm->arch.mmu_setup_lock))) return; if (!resize) @@ -1456,14 +1451,14 @@ if (WARN_ON(resize->error != -EBUSY)) return; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); /* Request is still current? */ if (kvm->arch.resize_hpt == resize) { /* We may request large allocations here: - * do not sleep with kvm->lock held for a while. + * do not sleep with kvm->arch.mmu_setup_lock held for a while. */ - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", resize->order); @@ -1476,9 +1471,9 @@ if (WARN_ON(err == -EBUSY)) err = -EINPROGRESS; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); /* It is possible that kvm->arch.resize_hpt != resize - * after we grab kvm->lock again. + * after we grab kvm->arch.mmu_setup_lock again. */ } @@ -1487,7 +1482,7 @@ if (kvm->arch.resize_hpt != resize) resize_hpt_release(kvm, resize); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); } long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, @@ -1504,7 +1499,7 @@ if (shift && ((shift < 18) || (shift > 46))) return -EINVAL; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); resize = kvm->arch.resize_hpt; @@ -1547,7 +1542,7 @@ ret = 100; /* estimated time in ms */ out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); return ret; } @@ -1570,7 +1565,7 @@ if (shift && ((shift < 18) || (shift > 46))) return -EINVAL; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); resize = kvm->arch.resize_hpt; @@ -1607,7 +1602,7 @@ smp_mb(); out_no_hpt: resize_hpt_release(kvm, resize); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); return ret; } @@ -1744,7 +1739,7 @@ int first_pass; unsigned long hpte[2]; - if (!access_ok(VERIFY_WRITE, buf, count)) + if (!access_ok(buf, count)) return -EFAULT; if (kvm_is_radix(kvm)) return 0; @@ -1844,13 +1839,13 @@ int mmu_ready; int pshift; - if (!access_ok(VERIFY_READ, buf, count)) + if (!access_ok(buf, count)) return -EFAULT; if (kvm_is_radix(kvm)) return -EINVAL; /* lock out vcpus from running while we're doing this */ - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.mmu_setup_lock); mmu_ready = kvm->arch.mmu_ready; if (mmu_ready) { kvm->arch.mmu_ready = 0; /* temporarily */ @@ -1858,7 +1853,7 @@ smp_mb(); if (atomic_read(&kvm->arch.vcpus_running)) { kvm->arch.mmu_ready = 1; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); return -EBUSY; } } @@ -1945,7 +1940,7 @@ /* Order HPTE updates vs. mmu_ready */ smp_wmb(); kvm->arch.mmu_ready = mmu_ready; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.mmu_setup_lock); if (err) return err; @@ -1993,7 +1988,7 @@ ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC); if (ret < 0) { kfree(ctx); - kvm_put_kvm(kvm); + kvm_put_kvm_no_destroy(kvm); return ret; } @@ -2142,9 +2137,8 @@ void kvmppc_mmu_debugfs_init(struct kvm *kvm) { - kvm->arch.htab_dentry = debugfs_create_file("htab", 0400, - kvm->arch.debugfs_dir, kvm, - &debugfs_htab_fops); + debugfs_create_file("htab", 0400, kvm->arch.debugfs_dir, kvm, + &debugfs_htab_fops); } void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) @@ -2154,7 +2148,6 @@ vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */ mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; - mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; vcpu->arch.hflags |= BOOK3S_HFLAG_SLB; } -- Gitblit v1.6.2