From ea08eeccae9297f7aabd2ef7f0c2517ac4549acc Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Tue, 20 Feb 2024 01:18:26 +0000
Subject: [PATCH] write in 30M
---
kernel/virt/kvm/kvm_main.c | 149 ++++++++++++++++++++++++++++++++++++++-----------
1 files changed, 114 insertions(+), 35 deletions(-)
diff --git a/kernel/virt/kvm/kvm_main.c b/kernel/virt/kvm/kvm_main.c
index 564d5c1..356fd5d 100644
--- a/kernel/virt/kvm/kvm_main.c
+++ b/kernel/virt/kvm/kvm_main.c
@@ -154,6 +154,8 @@
static unsigned long long kvm_createvm_count;
static unsigned long long kvm_active_vms;
+static DEFINE_PER_CPU(cpumask_var_t, cpu_kick_mask);
+
__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
unsigned long start, unsigned long end)
{
@@ -248,9 +250,13 @@
{
}
-static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait)
+static inline bool kvm_kick_many_cpus(cpumask_var_t tmp, bool wait)
{
- if (unlikely(!cpus))
+ const struct cpumask *cpus;
+
+ if (likely(cpumask_available(tmp)))
+ cpus = tmp;
+ else
cpus = cpu_online_mask;
if (cpumask_empty(cpus))
@@ -260,30 +266,57 @@
return true;
}
+static void kvm_make_vcpu_request(struct kvm *kvm, struct kvm_vcpu *vcpu,
+ unsigned int req, cpumask_var_t tmp,
+ int current_cpu)
+{
+ int cpu;
+
+ kvm_make_request(req, vcpu);
+
+ if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu))
+ return;
+
+ /*
+ * tmp can be "unavailable" if cpumasks are allocated off stack as
+ * allocation of the mask is deliberately not fatal and is handled by
+ * falling back to kicking all online CPUs.
+ */
+ if (!cpumask_available(tmp))
+ return;
+
+ /*
+ * Note, the vCPU could get migrated to a different pCPU at any point
+ * after kvm_request_needs_ipi(), which could result in sending an IPI
+ * to the previous pCPU. But, that's OK because the purpose of the IPI
+ * is to ensure the vCPU returns to OUTSIDE_GUEST_MODE, which is
+ * satisfied if the vCPU migrates. Entering READING_SHADOW_PAGE_TABLES
+ * after this point is also OK, as the requirement is only that KVM wait
+ * for vCPUs that were reading SPTEs _before_ any changes were
+ * finalized. See kvm_vcpu_kick() for more details on handling requests.
+ */
+ if (kvm_request_needs_ipi(vcpu, req)) {
+ cpu = READ_ONCE(vcpu->cpu);
+ if (cpu != -1 && cpu != current_cpu)
+ __cpumask_set_cpu(cpu, tmp);
+ }
+}
+
bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
struct kvm_vcpu *except,
unsigned long *vcpu_bitmap, cpumask_var_t tmp)
{
- int i, cpu, me;
struct kvm_vcpu *vcpu;
+ int i, me;
bool called;
me = get_cpu();
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if ((vcpu_bitmap && !test_bit(i, vcpu_bitmap)) ||
- vcpu == except)
+ for_each_set_bit(i, vcpu_bitmap, KVM_MAX_VCPUS) {
+ vcpu = kvm_get_vcpu(kvm, i);
+ if (!vcpu || vcpu == except)
continue;
-
- kvm_make_request(req, vcpu);
- cpu = vcpu->cpu;
-
- if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu))
- continue;
-
- if (tmp != NULL && cpu != -1 && cpu != me &&
- kvm_request_needs_ipi(vcpu, req))
- __cpumask_set_cpu(cpu, tmp);
+ kvm_make_vcpu_request(kvm, vcpu, req, tmp, me);
}
called = kvm_kick_many_cpus(tmp, !!(req & KVM_REQUEST_WAIT));
@@ -295,14 +328,25 @@
bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
struct kvm_vcpu *except)
{
- cpumask_var_t cpus;
+ struct kvm_vcpu *vcpu;
+ struct cpumask *cpus;
bool called;
+ int i, me;
- zalloc_cpumask_var(&cpus, GFP_ATOMIC);
+ me = get_cpu();
- called = kvm_make_vcpus_request_mask(kvm, req, except, NULL, cpus);
+ cpus = this_cpu_cpumask_var_ptr(cpu_kick_mask);
+ cpumask_clear(cpus);
- free_cpumask_var(cpus);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu == except)
+ continue;
+ kvm_make_vcpu_request(kvm, vcpu, req, cpus, me);
+ }
+
+ called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT));
+ put_cpu();
+
return called;
}
@@ -2937,16 +2981,24 @@
*/
void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
{
- int me;
- int cpu = vcpu->cpu;
+ int me, cpu;
if (kvm_vcpu_wake_up(vcpu))
return;
+ /*
+ * Note, the vCPU could get migrated to a different pCPU at any point
+ * after kvm_arch_vcpu_should_kick(), which could result in sending an
+ * IPI to the previous pCPU. But, that's ok because the purpose of the
+ * IPI is to force the vCPU to leave IN_GUEST_MODE, and migrating the
+ * vCPU also requires it to leave IN_GUEST_MODE.
+ */
me = get_cpu();
- if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
- if (kvm_arch_vcpu_should_kick(vcpu))
+ if (kvm_arch_vcpu_should_kick(vcpu)) {
+ cpu = READ_ONCE(vcpu->cpu);
+ if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
smp_send_reschedule(cpu);
+ }
put_cpu();
}
EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
@@ -4952,19 +5004,21 @@
goto out_free_3;
}
+ for_each_possible_cpu(cpu) {
+ if (!alloc_cpumask_var_node(&per_cpu(cpu_kick_mask, cpu),
+ GFP_KERNEL, cpu_to_node(cpu))) {
+ r = -ENOMEM;
+ goto out_free_4;
+ }
+ }
+
r = kvm_async_pf_init();
if (r)
- goto out_free;
+ goto out_free_4;
kvm_chardev_ops.owner = module;
kvm_vm_fops.owner = module;
kvm_vcpu_fops.owner = module;
-
- r = misc_register(&kvm_dev);
- if (r) {
- pr_err("kvm: misc device register failed\n");
- goto out_unreg;
- }
register_syscore_ops(&kvm_syscore_ops);
@@ -4974,13 +5028,28 @@
kvm_init_debug();
r = kvm_vfio_ops_init();
- WARN_ON(r);
+ if (WARN_ON_ONCE(r))
+ goto err_vfio;
+
+ /*
+ * Registration _must_ be the very last thing done, as this exposes
+ * /dev/kvm to userspace, i.e. all infrastructure must be setup!
+ */
+ r = misc_register(&kvm_dev);
+ if (r) {
+ pr_err("kvm: misc device register failed\n");
+ goto err_register;
+ }
return 0;
-out_unreg:
+err_register:
+ kvm_vfio_ops_exit();
+err_vfio:
kvm_async_pf_deinit();
-out_free:
+out_free_4:
+ for_each_possible_cpu(cpu)
+ free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
kmem_cache_destroy(kvm_vcpu_cache);
out_free_3:
unregister_reboot_notifier(&kvm_reboot_notifier);
@@ -5000,8 +5069,18 @@
void kvm_exit(void)
{
- debugfs_remove_recursive(kvm_debugfs_dir);
+ int cpu;
+
+ /*
+ * Note, unregistering /dev/kvm doesn't strictly need to come first,
+ * fops_get(), a.k.a. try_module_get(), prevents acquiring references
+ * to KVM while the module is being stopped.
+ */
misc_deregister(&kvm_dev);
+
+ debugfs_remove_recursive(kvm_debugfs_dir);
+ for_each_possible_cpu(cpu)
+ free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
kmem_cache_destroy(kvm_vcpu_cache);
kvm_async_pf_deinit();
unregister_syscore_ops(&kvm_syscore_ops);
--
Gitblit v1.6.2