From 04dd17822334871b23ea2862f7798fb0e0007777 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Sat, 11 May 2024 08:53:19 +0000 Subject: [PATCH] change otg to host mode --- kernel/arch/powerpc/kvm/book3s_xive.c | 530 +++++++++++++++++++++++++++++++++++++++++++++------------- 1 files changed, 411 insertions(+), 119 deletions(-) diff --git a/kernel/arch/powerpc/kvm/book3s_xive.c b/kernel/arch/powerpc/kvm/book3s_xive.c index 031f07f..a0ebc29 100644 --- a/kernel/arch/powerpc/kvm/book3s_xive.c +++ b/kernel/arch/powerpc/kvm/book3s_xive.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. */ #define pr_fmt(fmt) "xive-kvm: " fmt @@ -62,6 +59,75 @@ #define XIVE_Q_GAP 2 /* + * Push a vcpu's context to the XIVE on guest entry. + * This assumes we are in virtual mode (MMU on) + */ +void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) +{ + void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt; + u64 pq; + + /* + * Nothing to do if the platform doesn't have a XIVE + * or this vCPU doesn't have its own XIVE context + * (e.g. because it's not using an in-kernel interrupt controller). + */ + if (!tima || !vcpu->arch.xive_cam_word) + return; + + eieio(); + __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS); + __raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2); + vcpu->arch.xive_pushed = 1; + eieio(); + + /* + * We clear the irq_pending flag. There is a small chance of a + * race vs. the escalation interrupt happening on another + * processor setting it again, but the only consequence is to + * cause a spurious wakeup on the next H_CEDE, which is not an + * issue. + */ + vcpu->arch.irq_pending = 0; + + /* + * In single escalation mode, if the escalation interrupt is + * on, we mask it. + */ + if (vcpu->arch.xive_esc_on) { + pq = __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr + + XIVE_ESB_SET_PQ_01)); + mb(); + + /* + * We have a possible subtle race here: The escalation + * interrupt might have fired and be on its way to the + * host queue while we mask it, and if we unmask it + * early enough (re-cede right away), there is a + * theorical possibility that it fires again, thus + * landing in the target queue more than once which is + * a big no-no. + * + * Fortunately, solving this is rather easy. If the + * above load setting PQ to 01 returns a previous + * value where P is set, then we know the escalation + * interrupt is somewhere on its way to the host. In + * that case we simply don't clear the xive_esc_on + * flag below. It will be eventually cleared by the + * handler for the escalation interrupt. + * + * Then, when doing a cede, we check that flag again + * before re-enabling the escalation interrupt, and if + * set, we abort the cede. + */ + if (!(pq & XIVE_ESB_VAL_P)) + /* Now P is 0, we can clear the flag */ + vcpu->arch.xive_esc_on = 0; + } +} +EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu); + +/* * This is a simple trigger for a generic XIVE IRQ. This must * only be called for interrupts that support a trigger page */ @@ -100,10 +166,14 @@ */ vcpu->arch.xive_esc_on = false; + /* This orders xive_esc_on = false vs. subsequent stale_p = true */ + smp_wmb(); /* goes with smp_mb() in cleanup_single_escalation */ + return IRQ_HANDLED; } -static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) +int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, + bool single_escalation) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; struct xive_q *q = &xc->queues[prio]; @@ -122,7 +192,7 @@ return -EIO; } - if (xc->xive->single_escalation) + if (single_escalation) name = kasprintf(GFP_KERNEL, "kvm-%d-%d", vcpu->kvm->arch.lpid, xc->server_num); else @@ -154,7 +224,7 @@ * interrupt, thus leaving it effectively masked after * it fires once. */ - if (xc->xive->single_escalation) { + if (single_escalation) { struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]); struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); @@ -207,14 +277,14 @@ return rc; } -/* Called with kvm_lock held */ +/* Called with xive->lock held */ static int xive_check_provisioning(struct kvm *kvm, u8 prio) { struct kvmppc_xive *xive = kvm->arch.xive; struct kvm_vcpu *vcpu; int i, rc; - lockdep_assert_held(&kvm->lock); + lockdep_assert_held(&xive->lock); /* Already provisioned ? */ if (xive->qmap & (1 << prio)) @@ -228,7 +298,8 @@ continue; rc = xive_provision_queue(vcpu, prio); if (rc == 0 && !xive->single_escalation) - xive_attach_escalation(vcpu, prio); + kvmppc_xive_attach_escalation(vcpu, prio, + xive->single_escalation); if (rc) return rc; } @@ -279,7 +350,7 @@ return atomic_add_unless(&q->count, 1, max) ? 0 : -EBUSY; } -static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio) +int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio) { struct kvm_vcpu *vcpu; int i, rc; @@ -315,11 +386,6 @@ /* No available target ! */ return -EBUSY; -} - -static u32 xive_vp(struct kvmppc_xive *xive, u32 server) -{ - return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); } static u8 xive_lock_and_mask(struct kvmppc_xive *xive, @@ -367,8 +433,8 @@ */ if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { xive_native_configure_irq(hw_num, - xive_vp(xive, state->act_server), - MASKED, state->number); + kvmppc_xive_vp(xive, state->act_server), + MASKED, state->number); /* set old_p so we can track if an H_EOI was done */ state->old_p = true; state->old_q = false; @@ -418,13 +484,13 @@ kvmppc_xive_select_irq(state, &hw_num, &xd); /* - * See command in xive_lock_and_mask() concerning masking + * See comment in xive_lock_and_mask() concerning masking * via firmware. */ if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { xive_native_configure_irq(hw_num, - xive_vp(xive, state->act_server), - state->act_priority, state->number); + kvmppc_xive_vp(xive, state->act_server), + state->act_priority, state->number); /* If an EOI is needed, do it here */ if (!state->old_p) xive_vm_source_eoi(hw_num, xd); @@ -472,7 +538,7 @@ * priority. The count for that new target will have * already been incremented. */ - rc = xive_select_target(kvm, &server, prio); + rc = kvmppc_xive_select_target(kvm, &server, prio); /* * We failed to find a target ? Not much we can do @@ -500,7 +566,7 @@ kvmppc_xive_select_irq(state, &hw_num, NULL); return xive_native_configure_irq(hw_num, - xive_vp(xive, server), + kvmppc_xive_vp(xive, server), prio, state->number); } @@ -561,9 +627,12 @@ irq, server, priority); /* First, check provisioning of queues */ - if (priority != MASKED) + if (priority != MASKED) { + mutex_lock(&xive->lock); rc = xive_check_provisioning(xive->kvm, xive_prio_from_guest(priority)); + mutex_unlock(&xive->lock); + } if (rc) { pr_devel(" provisioning failure %d !\n", rc); return rc; @@ -786,7 +855,8 @@ /* * We can't update the state of a "pushed" VCPU, but that - * shouldn't happen. + * shouldn't happen because the vcpu->mutex makes running a + * vcpu mutually exclusive with doing one_reg get/set on it. */ if (WARN_ON(vcpu->arch.xive_pushed)) return -EIO; @@ -877,6 +947,13 @@ /* Turn the IPI hard off */ xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); + /* + * Reset ESB guest mapping. Needed when ESB pages are exposed + * to the guest in XIVE native mode + */ + if (xive->ops && xive->ops->reset_mapped) + xive->ops->reset_mapped(kvm, guest_irq); + /* Grab info about irq */ state->pt_number = hw_irq; state->pt_data = irq_data_get_irq_handler_data(host_data); @@ -888,7 +965,7 @@ * which is fine for a never started interrupt. */ xive_native_configure_irq(hw_irq, - xive_vp(xive, state->act_server), + kvmppc_xive_vp(xive, state->act_server), state->act_priority, state->number); /* @@ -962,9 +1039,17 @@ state->pt_number = 0; state->pt_data = NULL; + /* + * Reset ESB guest mapping. Needed when ESB pages are exposed + * to the guest in XIVE native mode + */ + if (xive->ops && xive->ops->reset_mapped) { + xive->ops->reset_mapped(kvm, guest_irq); + } + /* Reconfigure the IPI */ xive_native_configure_irq(state->ipi_number, - xive_vp(xive, state->act_server), + kvmppc_xive_vp(xive, state->act_server), state->act_priority, state->number); /* @@ -986,7 +1071,7 @@ } EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped); -static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) +void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; struct kvm *kvm = vcpu->kvm; @@ -1020,13 +1105,59 @@ arch_spin_unlock(&sb->lock); } } + + /* Disable vcpu's escalation interrupt */ + if (vcpu->arch.xive_esc_on) { + __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr + + XIVE_ESB_SET_PQ_01)); + vcpu->arch.xive_esc_on = false; + } + + /* + * Clear pointers to escalation interrupt ESB. + * This is safe because the vcpu->mutex is held, preventing + * any other CPU from concurrently executing a KVM_RUN ioctl. + */ + vcpu->arch.xive_esc_vaddr = 0; + vcpu->arch.xive_esc_raddr = 0; +} + +/* + * In single escalation mode, the escalation interrupt is marked so + * that EOI doesn't re-enable it, but just sets the stale_p flag to + * indicate that the P bit has already been dealt with. However, the + * assembly code that enters the guest sets PQ to 00 without clearing + * stale_p (because it has no easy way to address it). Hence we have + * to adjust stale_p before shutting down the interrupt. + */ +void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, + struct kvmppc_xive_vcpu *xc, int irq) +{ + struct irq_data *d = irq_get_irq_data(irq); + struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + + /* + * This slightly odd sequence gives the right result + * (i.e. stale_p set if xive_esc_on is false) even if + * we race with xive_esc_irq() and xive_irq_eoi(). + */ + xd->stale_p = false; + smp_mb(); /* paired with smb_wmb in xive_esc_irq */ + if (!vcpu->arch.xive_esc_on) + xd->stale_p = true; } void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; - struct kvmppc_xive *xive = xc->xive; + struct kvmppc_xive *xive = vcpu->kvm->arch.xive; int i; + + if (!kvmppc_xics_enabled(vcpu)) + return; + + if (!xc) + return; pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num); @@ -1040,6 +1171,9 @@ /* Free escalations */ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { if (xc->esc_virq[i]) { + if (xc->xive->single_escalation) + xive_cleanup_single_escalation(vcpu, xc, + xc->esc_virq[i]); free_irq(xc->esc_virq[i], vcpu); irq_dispose_mapping(xc->esc_virq[i]); kfree(xc->esc_virq_names[i]); @@ -1048,6 +1182,9 @@ /* Disable the VP */ xive_native_disable_vp(xc->vp_id); + + /* Clear the cam word so guest entry won't try to push context */ + vcpu->arch.xive_cam_word = 0; /* Free the queues */ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { @@ -1068,6 +1205,46 @@ } /* Free the VP */ kfree(xc); + + /* Cleanup the vcpu */ + vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; + vcpu->arch.xive_vcpu = NULL; +} + +static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu) +{ + /* We have a block of xive->nr_servers VPs. We just need to check + * packed vCPU ids are below that. + */ + return kvmppc_pack_vcpu_id(xive->kvm, cpu) < xive->nr_servers; +} + +int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp) +{ + u32 vp_id; + + if (!kvmppc_xive_vcpu_id_valid(xive, cpu)) { + pr_devel("Out of bounds !\n"); + return -EINVAL; + } + + if (xive->vp_base == XIVE_INVALID_VP) { + xive->vp_base = xive_native_alloc_vp_block(xive->nr_servers); + pr_devel("VP_Base=%x nr_servers=%d\n", xive->vp_base, xive->nr_servers); + + if (xive->vp_base == XIVE_INVALID_VP) + return -ENOSPC; + } + + vp_id = kvmppc_xive_vp(xive, cpu); + if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) { + pr_devel("Duplicate !\n"); + return -EEXIST; + } + + *vp = vp_id; + + return 0; } int kvmppc_xive_connect_vcpu(struct kvm_device *dev, @@ -1076,6 +1253,7 @@ struct kvmppc_xive *xive = dev->private; struct kvmppc_xive_vcpu *xc; int i, r = -EBUSY; + u32 vp_id; pr_devel("connect_vcpu(cpu=%d)\n", cpu); @@ -1085,27 +1263,27 @@ } if (xive->kvm != vcpu->kvm) return -EPERM; - if (vcpu->arch.irq_type) + if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) return -EBUSY; - if (kvmppc_xive_find_server(vcpu->kvm, cpu)) { - pr_devel("Duplicate !\n"); - return -EEXIST; - } - if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) { - pr_devel("Out of bounds !\n"); - return -EINVAL; - } - xc = kzalloc(sizeof(*xc), GFP_KERNEL); - if (!xc) - return -ENOMEM; /* We need to synchronize with queue provisioning */ - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&xive->lock); + + r = kvmppc_xive_compute_vp_id(xive, cpu, &vp_id); + if (r) + goto bail; + + xc = kzalloc(sizeof(*xc), GFP_KERNEL); + if (!xc) { + r = -ENOMEM; + goto bail; + } + vcpu->arch.xive_vcpu = xc; xc->xive = xive; xc->vcpu = vcpu; xc->server_num = cpu; - xc->vp_id = xive_vp(xive, cpu); + xc->vp_id = vp_id; xc->mfrr = 0xff; xc->valid = true; @@ -1158,7 +1336,8 @@ if (xive->qmap & (1 << i)) { r = xive_provision_queue(vcpu, i); if (r == 0 && !xive->single_escalation) - xive_attach_escalation(vcpu, i); + kvmppc_xive_attach_escalation( + vcpu, i, xive->single_escalation); if (r) goto bail; } else { @@ -1173,7 +1352,7 @@ } /* If not done above, attach priority 0 escalation */ - r = xive_attach_escalation(vcpu, 0); + r = kvmppc_xive_attach_escalation(vcpu, 0, xive->single_escalation); if (r) goto bail; @@ -1183,7 +1362,7 @@ xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_00); bail: - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&xive->lock); if (r) { kvmppc_xive_cleanup_vcpu(vcpu); return r; @@ -1424,16 +1603,15 @@ return 0; } -static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *xive, - int irq) +struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( + struct kvmppc_xive *xive, int irq) { - struct kvm *kvm = xive->kvm; struct kvmppc_xive_src_block *sb; int i, bid; bid = irq >> KVMPPC_XICS_ICS_SHIFT; - mutex_lock(&kvm->lock); + mutex_lock(&xive->lock); /* block already exists - somebody else got here first */ if (xive->src_blocks[bid]) @@ -1448,6 +1626,7 @@ for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i; + sb->irq_state[i].eisn = 0; sb->irq_state[i].guest_priority = MASKED; sb->irq_state[i].saved_priority = MASKED; sb->irq_state[i].act_priority = MASKED; @@ -1459,7 +1638,7 @@ xive->max_sbid = bid; out: - mutex_unlock(&kvm->lock); + mutex_unlock(&xive->lock); return xive->src_blocks[bid]; } @@ -1504,7 +1683,7 @@ sb = kvmppc_xive_find_source(xive, irq, &idx); if (!sb) { pr_devel("No source, creating source block...\n"); - sb = xive_create_src_block(xive, irq); + sb = kvmppc_xive_create_src_block(xive, irq); if (!sb) { pr_devel("Failed to create block...\n"); return -ENOMEM; @@ -1569,9 +1748,9 @@ /* If we have a priority target the interrupt */ if (act_prio != MASKED) { /* First, check provisioning of queues */ - mutex_lock(&xive->kvm->lock); + mutex_lock(&xive->lock); rc = xive_check_provisioning(xive->kvm, act_prio); - mutex_unlock(&xive->kvm->lock); + mutex_unlock(&xive->lock); /* Target interrupt */ if (rc == 0) @@ -1684,6 +1863,43 @@ return 0; } +int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr) +{ + u32 __user *ubufp = (u32 __user *) addr; + u32 nr_servers; + int rc = 0; + + if (get_user(nr_servers, ubufp)) + return -EFAULT; + + pr_devel("%s nr_servers=%u\n", __func__, nr_servers); + + if (!nr_servers || nr_servers > KVM_MAX_VCPU_ID) + return -EINVAL; + + mutex_lock(&xive->lock); + if (xive->vp_base != XIVE_INVALID_VP) + /* The VP block is allocated once and freed when the device + * is released. Better not allow to change its size since its + * used by connect_vcpu to validate vCPU ids are valid (eg, + * setting it back to a higher value could allow connect_vcpu + * to come up with a VP id that goes beyond the VP block, which + * is likely to cause a crash in OPAL). + */ + rc = -EBUSY; + else if (nr_servers > KVM_MAX_VCPUS) + /* We don't need more servers. Higher vCPU ids get packed + * down below KVM_MAX_VCPUS by kvmppc_pack_vcpu_id(). + */ + xive->nr_servers = KVM_MAX_VCPUS; + else + xive->nr_servers = nr_servers; + + mutex_unlock(&xive->lock); + + return rc; +} + static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { struct kvmppc_xive *xive = dev->private; @@ -1692,6 +1908,11 @@ switch (attr->group) { case KVM_DEV_XICS_GRP_SOURCES: return xive_set_source(xive, attr->attr, attr->addr); + case KVM_DEV_XICS_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_XICS_NR_SERVERS: + return kvmppc_xive_set_nr_servers(xive, attr->addr); + } } return -ENXIO; } @@ -1717,6 +1938,11 @@ attr->attr < KVMPPC_XICS_NR_IRQS) return 0; break; + case KVM_DEV_XICS_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_XICS_NR_SERVERS: + return 0; + } } return -ENXIO; } @@ -1727,7 +1953,7 @@ xive_native_configure_irq(hw_num, 0, MASKED, 0); } -static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) +void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) { int i; @@ -1749,16 +1975,53 @@ } } -static void kvmppc_xive_free(struct kvm_device *dev) +/* + * Called when device fd is closed. kvm->lock is held. + */ +static void kvmppc_xive_release(struct kvm_device *dev) { struct kvmppc_xive *xive = dev->private; struct kvm *kvm = xive->kvm; + struct kvm_vcpu *vcpu; int i; + + pr_devel("Releasing xive device\n"); + + /* + * Since this is the device release function, we know that + * userspace does not have any open fd referring to the + * device. Therefore there can not be any of the device + * attribute set/get functions being executed concurrently, + * and similarly, the connect_vcpu and set/clr_mapped + * functions also cannot be being executed. + */ debugfs_remove(xive->dentry); - if (kvm) - kvm->arch.xive = NULL; + /* + * We should clean up the vCPU interrupt presenters first. + */ + kvm_for_each_vcpu(i, vcpu, kvm) { + /* + * Take vcpu->mutex to ensure that no one_reg get/set ioctl + * (i.e. kvmppc_xive_[gs]et_icp) can be done concurrently. + * Holding the vcpu->mutex also means that the vcpu cannot + * be executing the KVM_RUN ioctl, and therefore it cannot + * be executing the XIVE push or pull code or accessing + * the XIVE MMIO regions. + */ + mutex_lock(&vcpu->mutex); + kvmppc_xive_cleanup_vcpu(vcpu); + mutex_unlock(&vcpu->mutex); + } + + /* + * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type + * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe + * against xive code getting called during vcpu execution or + * set/get one_reg operations. + */ + kvm->arch.xive = NULL; /* Mask and free interrupts */ for (i = 0; i <= xive->max_sbid; i++) { @@ -1771,32 +2034,64 @@ if (xive->vp_base != XIVE_INVALID_VP) xive_native_free_vp_block(xive->vp_base); + /* + * A reference of the kvmppc_xive pointer is now kept under + * the xive_devices struct of the machine for reuse. It is + * freed when the VM is destroyed for now until we fix all the + * execution paths. + */ - kfree(xive); kfree(dev); } +/* + * When the guest chooses the interrupt mode (XICS legacy or XIVE + * native), the VM will switch of KVM device. The previous device will + * be "released" before the new one is created. + * + * Until we are sure all execution paths are well protected, provide a + * fail safe (transitional) method for device destruction, in which + * the XIVE device pointer is recycled and not directly freed. + */ +struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type) +{ + struct kvmppc_xive **kvm_xive_device = type == KVM_DEV_TYPE_XIVE ? + &kvm->arch.xive_devices.native : + &kvm->arch.xive_devices.xics_on_xive; + struct kvmppc_xive *xive = *kvm_xive_device; + + if (!xive) { + xive = kzalloc(sizeof(*xive), GFP_KERNEL); + *kvm_xive_device = xive; + } else { + memset(xive, 0, sizeof(*xive)); + } + + return xive; +} + +/* + * Create a XICS device with XIVE backend. kvm->lock is held. + */ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) { struct kvmppc_xive *xive; struct kvm *kvm = dev->kvm; - int ret = 0; pr_devel("Creating xive for partition\n"); - xive = kzalloc(sizeof(*xive), GFP_KERNEL); + /* Already there ? */ + if (kvm->arch.xive) + return -EEXIST; + + xive = kvmppc_xive_get_device(kvm, type); if (!xive) return -ENOMEM; dev->private = xive; xive->dev = dev; xive->kvm = kvm; - - /* Already there ? */ - if (kvm->arch.xive) - ret = -EEXIST; - else - kvm->arch.xive = xive; + mutex_init(&xive->lock); /* We use the default queue size set by the host */ xive->q_order = xive_native_default_eq_shift(); @@ -1805,23 +2100,56 @@ else xive->q_page_order = xive->q_order - PAGE_SHIFT; - /* Allocate a bunch of VPs */ - xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS); - pr_devel("VP_Base=%x\n", xive->vp_base); - - if (xive->vp_base == XIVE_INVALID_VP) - ret = -ENOMEM; + /* VP allocation is delayed to the first call to connect_vcpu */ + xive->vp_base = XIVE_INVALID_VP; + /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets + * on a POWER9 system. + */ + xive->nr_servers = KVM_MAX_VCPUS; xive->single_escalation = xive_native_has_single_escalation(); - if (ret) { - kfree(xive); - return ret; - } - + kvm->arch.xive = xive; return 0; } +int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + unsigned int i; + + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { + struct xive_q *q = &xc->queues[i]; + u32 i0, i1, idx; + + if (!q->qpage && !xc->esc_virq[i]) + continue; + + seq_printf(m, " [q%d]: ", i); + + if (q->qpage) { + idx = q->idx; + i0 = be32_to_cpup(q->qpage + idx); + idx = (idx + 1) & q->msk; + i1 = be32_to_cpup(q->qpage + idx); + seq_printf(m, "T=%d %08x %08x...\n", q->toggle, + i0, i1); + } + if (xc->esc_virq[i]) { + struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); + struct xive_irq_data *xd = + irq_data_get_irq_handler_data(d); + u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); + + seq_printf(m, "E:%c%c I(%d:%llx:%llx)", + (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', + (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', + xc->esc_virq[i], pq, xd->eoi_page); + seq_puts(m, "\n"); + } + } + return 0; +} static int xive_debug_show(struct seq_file *m, void *private) { @@ -1847,43 +2175,17 @@ kvm_for_each_vcpu(i, vcpu, kvm) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; - unsigned int i; if (!xc) continue; - seq_printf(m, "cpu server %#x CPPR:%#x HWCPPR:%#x" + seq_printf(m, "cpu server %#x VP:%#x CPPR:%#x HWCPPR:%#x" " MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n", - xc->server_num, xc->cppr, xc->hw_cppr, + xc->server_num, xc->vp_id, xc->cppr, xc->hw_cppr, xc->mfrr, xc->pending, xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); - for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { - struct xive_q *q = &xc->queues[i]; - u32 i0, i1, idx; - if (!q->qpage && !xc->esc_virq[i]) - continue; - - seq_printf(m, " [q%d]: ", i); - - if (q->qpage) { - idx = q->idx; - i0 = be32_to_cpup(q->qpage + idx); - idx = (idx + 1) & q->msk; - i1 = be32_to_cpup(q->qpage + idx); - seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1); - } - if (xc->esc_virq[i]) { - struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); - u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); - seq_printf(m, "E:%c%c I(%d:%llx:%llx)", - (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', - (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', - xc->esc_virq[i], pq, xd->eoi_page); - seq_printf(m, "\n"); - } - } + kvmppc_xive_debug_show_queues(m, vcpu); t_rm_h_xirr += xc->stat_rm_h_xirr; t_rm_h_ipoll += xc->stat_rm_h_ipoll; @@ -1907,17 +2209,7 @@ return 0; } -static int xive_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, xive_debug_show, inode->i_private); -} - -static const struct file_operations xive_debug_fops = { - .open = xive_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(xive_debug); static void xive_debugfs_init(struct kvmppc_xive *xive) { @@ -1948,7 +2240,7 @@ .name = "kvm-xive", .create = kvmppc_xive_create, .init = kvmppc_xive_init, - .destroy = kvmppc_xive_free, + .release = kvmppc_xive_release, .set_attr = xive_set_attr, .get_attr = xive_get_attr, .has_attr = xive_has_attr, -- Gitblit v1.6.2