From 2f7c68cb55ecb7331f2381deb497c27155f32faf Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Wed, 03 Jan 2024 09:43:39 +0000 Subject: [PATCH] update kernel to 5.10.198 --- kernel/virt/kvm/kvm_main.c | 149 ++++++++++++++++++++++++++++++++++++++----------- 1 files changed, 114 insertions(+), 35 deletions(-) diff --git a/kernel/virt/kvm/kvm_main.c b/kernel/virt/kvm/kvm_main.c index 564d5c1..356fd5d 100644 --- a/kernel/virt/kvm/kvm_main.c +++ b/kernel/virt/kvm/kvm_main.c @@ -154,6 +154,8 @@ static unsigned long long kvm_createvm_count; static unsigned long long kvm_active_vms; +static DEFINE_PER_CPU(cpumask_var_t, cpu_kick_mask); + __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, unsigned long start, unsigned long end) { @@ -248,9 +250,13 @@ { } -static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait) +static inline bool kvm_kick_many_cpus(cpumask_var_t tmp, bool wait) { - if (unlikely(!cpus)) + const struct cpumask *cpus; + + if (likely(cpumask_available(tmp))) + cpus = tmp; + else cpus = cpu_online_mask; if (cpumask_empty(cpus)) @@ -260,30 +266,57 @@ return true; } +static void kvm_make_vcpu_request(struct kvm *kvm, struct kvm_vcpu *vcpu, + unsigned int req, cpumask_var_t tmp, + int current_cpu) +{ + int cpu; + + kvm_make_request(req, vcpu); + + if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu)) + return; + + /* + * tmp can be "unavailable" if cpumasks are allocated off stack as + * allocation of the mask is deliberately not fatal and is handled by + * falling back to kicking all online CPUs. + */ + if (!cpumask_available(tmp)) + return; + + /* + * Note, the vCPU could get migrated to a different pCPU at any point + * after kvm_request_needs_ipi(), which could result in sending an IPI + * to the previous pCPU. But, that's OK because the purpose of the IPI + * is to ensure the vCPU returns to OUTSIDE_GUEST_MODE, which is + * satisfied if the vCPU migrates. Entering READING_SHADOW_PAGE_TABLES + * after this point is also OK, as the requirement is only that KVM wait + * for vCPUs that were reading SPTEs _before_ any changes were + * finalized. See kvm_vcpu_kick() for more details on handling requests. + */ + if (kvm_request_needs_ipi(vcpu, req)) { + cpu = READ_ONCE(vcpu->cpu); + if (cpu != -1 && cpu != current_cpu) + __cpumask_set_cpu(cpu, tmp); + } +} + bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, struct kvm_vcpu *except, unsigned long *vcpu_bitmap, cpumask_var_t tmp) { - int i, cpu, me; struct kvm_vcpu *vcpu; + int i, me; bool called; me = get_cpu(); - kvm_for_each_vcpu(i, vcpu, kvm) { - if ((vcpu_bitmap && !test_bit(i, vcpu_bitmap)) || - vcpu == except) + for_each_set_bit(i, vcpu_bitmap, KVM_MAX_VCPUS) { + vcpu = kvm_get_vcpu(kvm, i); + if (!vcpu || vcpu == except) continue; - - kvm_make_request(req, vcpu); - cpu = vcpu->cpu; - - if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu)) - continue; - - if (tmp != NULL && cpu != -1 && cpu != me && - kvm_request_needs_ipi(vcpu, req)) - __cpumask_set_cpu(cpu, tmp); + kvm_make_vcpu_request(kvm, vcpu, req, tmp, me); } called = kvm_kick_many_cpus(tmp, !!(req & KVM_REQUEST_WAIT)); @@ -295,14 +328,25 @@ bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req, struct kvm_vcpu *except) { - cpumask_var_t cpus; + struct kvm_vcpu *vcpu; + struct cpumask *cpus; bool called; + int i, me; - zalloc_cpumask_var(&cpus, GFP_ATOMIC); + me = get_cpu(); - called = kvm_make_vcpus_request_mask(kvm, req, except, NULL, cpus); + cpus = this_cpu_cpumask_var_ptr(cpu_kick_mask); + cpumask_clear(cpus); - free_cpumask_var(cpus); + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu == except) + continue; + kvm_make_vcpu_request(kvm, vcpu, req, cpus, me); + } + + called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT)); + put_cpu(); + return called; } @@ -2937,16 +2981,24 @@ */ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) { - int me; - int cpu = vcpu->cpu; + int me, cpu; if (kvm_vcpu_wake_up(vcpu)) return; + /* + * Note, the vCPU could get migrated to a different pCPU at any point + * after kvm_arch_vcpu_should_kick(), which could result in sending an + * IPI to the previous pCPU. But, that's ok because the purpose of the + * IPI is to force the vCPU to leave IN_GUEST_MODE, and migrating the + * vCPU also requires it to leave IN_GUEST_MODE. + */ me = get_cpu(); - if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) - if (kvm_arch_vcpu_should_kick(vcpu)) + if (kvm_arch_vcpu_should_kick(vcpu)) { + cpu = READ_ONCE(vcpu->cpu); + if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) smp_send_reschedule(cpu); + } put_cpu(); } EXPORT_SYMBOL_GPL(kvm_vcpu_kick); @@ -4952,19 +5004,21 @@ goto out_free_3; } + for_each_possible_cpu(cpu) { + if (!alloc_cpumask_var_node(&per_cpu(cpu_kick_mask, cpu), + GFP_KERNEL, cpu_to_node(cpu))) { + r = -ENOMEM; + goto out_free_4; + } + } + r = kvm_async_pf_init(); if (r) - goto out_free; + goto out_free_4; kvm_chardev_ops.owner = module; kvm_vm_fops.owner = module; kvm_vcpu_fops.owner = module; - - r = misc_register(&kvm_dev); - if (r) { - pr_err("kvm: misc device register failed\n"); - goto out_unreg; - } register_syscore_ops(&kvm_syscore_ops); @@ -4974,13 +5028,28 @@ kvm_init_debug(); r = kvm_vfio_ops_init(); - WARN_ON(r); + if (WARN_ON_ONCE(r)) + goto err_vfio; + + /* + * Registration _must_ be the very last thing done, as this exposes + * /dev/kvm to userspace, i.e. all infrastructure must be setup! + */ + r = misc_register(&kvm_dev); + if (r) { + pr_err("kvm: misc device register failed\n"); + goto err_register; + } return 0; -out_unreg: +err_register: + kvm_vfio_ops_exit(); +err_vfio: kvm_async_pf_deinit(); -out_free: +out_free_4: + for_each_possible_cpu(cpu) + free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); kmem_cache_destroy(kvm_vcpu_cache); out_free_3: unregister_reboot_notifier(&kvm_reboot_notifier); @@ -5000,8 +5069,18 @@ void kvm_exit(void) { - debugfs_remove_recursive(kvm_debugfs_dir); + int cpu; + + /* + * Note, unregistering /dev/kvm doesn't strictly need to come first, + * fops_get(), a.k.a. try_module_get(), prevents acquiring references + * to KVM while the module is being stopped. + */ misc_deregister(&kvm_dev); + + debugfs_remove_recursive(kvm_debugfs_dir); + for_each_possible_cpu(cpu) + free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); kmem_cache_destroy(kvm_vcpu_cache); kvm_async_pf_deinit(); unregister_syscore_ops(&kvm_syscore_ops); -- Gitblit v1.6.2