.. | .. |
---|
154 | 154 | static unsigned long long kvm_createvm_count; |
---|
155 | 155 | static unsigned long long kvm_active_vms; |
---|
156 | 156 | |
---|
| 157 | +static DEFINE_PER_CPU(cpumask_var_t, cpu_kick_mask); |
---|
| 158 | + |
---|
157 | 159 | __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, |
---|
158 | 160 | unsigned long start, unsigned long end) |
---|
159 | 161 | { |
---|
.. | .. |
---|
248 | 250 | { |
---|
249 | 251 | } |
---|
250 | 252 | |
---|
251 | | -static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait) |
---|
| 253 | +static inline bool kvm_kick_many_cpus(cpumask_var_t tmp, bool wait) |
---|
252 | 254 | { |
---|
253 | | - if (unlikely(!cpus)) |
---|
| 255 | + const struct cpumask *cpus; |
---|
| 256 | + |
---|
| 257 | + if (likely(cpumask_available(tmp))) |
---|
| 258 | + cpus = tmp; |
---|
| 259 | + else |
---|
254 | 260 | cpus = cpu_online_mask; |
---|
255 | 261 | |
---|
256 | 262 | if (cpumask_empty(cpus)) |
---|
.. | .. |
---|
260 | 266 | return true; |
---|
261 | 267 | } |
---|
262 | 268 | |
---|
| 269 | +static void kvm_make_vcpu_request(struct kvm *kvm, struct kvm_vcpu *vcpu, |
---|
| 270 | + unsigned int req, cpumask_var_t tmp, |
---|
| 271 | + int current_cpu) |
---|
| 272 | +{ |
---|
| 273 | + int cpu; |
---|
| 274 | + |
---|
| 275 | + kvm_make_request(req, vcpu); |
---|
| 276 | + |
---|
| 277 | + if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu)) |
---|
| 278 | + return; |
---|
| 279 | + |
---|
| 280 | + /* |
---|
| 281 | + * tmp can be "unavailable" if cpumasks are allocated off stack as |
---|
| 282 | + * allocation of the mask is deliberately not fatal and is handled by |
---|
| 283 | + * falling back to kicking all online CPUs. |
---|
| 284 | + */ |
---|
| 285 | + if (!cpumask_available(tmp)) |
---|
| 286 | + return; |
---|
| 287 | + |
---|
| 288 | + /* |
---|
| 289 | + * Note, the vCPU could get migrated to a different pCPU at any point |
---|
| 290 | + * after kvm_request_needs_ipi(), which could result in sending an IPI |
---|
| 291 | + * to the previous pCPU. But, that's OK because the purpose of the IPI |
---|
| 292 | + * is to ensure the vCPU returns to OUTSIDE_GUEST_MODE, which is |
---|
| 293 | + * satisfied if the vCPU migrates. Entering READING_SHADOW_PAGE_TABLES |
---|
| 294 | + * after this point is also OK, as the requirement is only that KVM wait |
---|
| 295 | + * for vCPUs that were reading SPTEs _before_ any changes were |
---|
| 296 | + * finalized. See kvm_vcpu_kick() for more details on handling requests. |
---|
| 297 | + */ |
---|
| 298 | + if (kvm_request_needs_ipi(vcpu, req)) { |
---|
| 299 | + cpu = READ_ONCE(vcpu->cpu); |
---|
| 300 | + if (cpu != -1 && cpu != current_cpu) |
---|
| 301 | + __cpumask_set_cpu(cpu, tmp); |
---|
| 302 | + } |
---|
| 303 | +} |
---|
| 304 | + |
---|
263 | 305 | bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, |
---|
264 | 306 | struct kvm_vcpu *except, |
---|
265 | 307 | unsigned long *vcpu_bitmap, cpumask_var_t tmp) |
---|
266 | 308 | { |
---|
267 | | - int i, cpu, me; |
---|
268 | 309 | struct kvm_vcpu *vcpu; |
---|
| 310 | + int i, me; |
---|
269 | 311 | bool called; |
---|
270 | 312 | |
---|
271 | 313 | me = get_cpu(); |
---|
272 | 314 | |
---|
273 | | - kvm_for_each_vcpu(i, vcpu, kvm) { |
---|
274 | | - if ((vcpu_bitmap && !test_bit(i, vcpu_bitmap)) || |
---|
275 | | - vcpu == except) |
---|
| 315 | + for_each_set_bit(i, vcpu_bitmap, KVM_MAX_VCPUS) { |
---|
| 316 | + vcpu = kvm_get_vcpu(kvm, i); |
---|
| 317 | + if (!vcpu || vcpu == except) |
---|
276 | 318 | continue; |
---|
277 | | - |
---|
278 | | - kvm_make_request(req, vcpu); |
---|
279 | | - cpu = vcpu->cpu; |
---|
280 | | - |
---|
281 | | - if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu)) |
---|
282 | | - continue; |
---|
283 | | - |
---|
284 | | - if (tmp != NULL && cpu != -1 && cpu != me && |
---|
285 | | - kvm_request_needs_ipi(vcpu, req)) |
---|
286 | | - __cpumask_set_cpu(cpu, tmp); |
---|
| 319 | + kvm_make_vcpu_request(kvm, vcpu, req, tmp, me); |
---|
287 | 320 | } |
---|
288 | 321 | |
---|
289 | 322 | called = kvm_kick_many_cpus(tmp, !!(req & KVM_REQUEST_WAIT)); |
---|
.. | .. |
---|
295 | 328 | bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req, |
---|
296 | 329 | struct kvm_vcpu *except) |
---|
297 | 330 | { |
---|
298 | | - cpumask_var_t cpus; |
---|
| 331 | + struct kvm_vcpu *vcpu; |
---|
| 332 | + struct cpumask *cpus; |
---|
299 | 333 | bool called; |
---|
| 334 | + int i, me; |
---|
300 | 335 | |
---|
301 | | - zalloc_cpumask_var(&cpus, GFP_ATOMIC); |
---|
| 336 | + me = get_cpu(); |
---|
302 | 337 | |
---|
303 | | - called = kvm_make_vcpus_request_mask(kvm, req, except, NULL, cpus); |
---|
| 338 | + cpus = this_cpu_cpumask_var_ptr(cpu_kick_mask); |
---|
| 339 | + cpumask_clear(cpus); |
---|
304 | 340 | |
---|
305 | | - free_cpumask_var(cpus); |
---|
| 341 | + kvm_for_each_vcpu(i, vcpu, kvm) { |
---|
| 342 | + if (vcpu == except) |
---|
| 343 | + continue; |
---|
| 344 | + kvm_make_vcpu_request(kvm, vcpu, req, cpus, me); |
---|
| 345 | + } |
---|
| 346 | + |
---|
| 347 | + called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT)); |
---|
| 348 | + put_cpu(); |
---|
| 349 | + |
---|
306 | 350 | return called; |
---|
307 | 351 | } |
---|
308 | 352 | |
---|
.. | .. |
---|
2937 | 2981 | */ |
---|
2938 | 2982 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu) |
---|
2939 | 2983 | { |
---|
2940 | | - int me; |
---|
2941 | | - int cpu = vcpu->cpu; |
---|
| 2984 | + int me, cpu; |
---|
2942 | 2985 | |
---|
2943 | 2986 | if (kvm_vcpu_wake_up(vcpu)) |
---|
2944 | 2987 | return; |
---|
2945 | 2988 | |
---|
| 2989 | + /* |
---|
| 2990 | + * Note, the vCPU could get migrated to a different pCPU at any point |
---|
| 2991 | + * after kvm_arch_vcpu_should_kick(), which could result in sending an |
---|
| 2992 | + * IPI to the previous pCPU. But, that's ok because the purpose of the |
---|
| 2993 | + * IPI is to force the vCPU to leave IN_GUEST_MODE, and migrating the |
---|
| 2994 | + * vCPU also requires it to leave IN_GUEST_MODE. |
---|
| 2995 | + */ |
---|
2946 | 2996 | me = get_cpu(); |
---|
2947 | | - if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) |
---|
2948 | | - if (kvm_arch_vcpu_should_kick(vcpu)) |
---|
| 2997 | + if (kvm_arch_vcpu_should_kick(vcpu)) { |
---|
| 2998 | + cpu = READ_ONCE(vcpu->cpu); |
---|
| 2999 | + if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) |
---|
2949 | 3000 | smp_send_reschedule(cpu); |
---|
| 3001 | + } |
---|
2950 | 3002 | put_cpu(); |
---|
2951 | 3003 | } |
---|
2952 | 3004 | EXPORT_SYMBOL_GPL(kvm_vcpu_kick); |
---|
.. | .. |
---|
4952 | 5004 | goto out_free_3; |
---|
4953 | 5005 | } |
---|
4954 | 5006 | |
---|
| 5007 | + for_each_possible_cpu(cpu) { |
---|
| 5008 | + if (!alloc_cpumask_var_node(&per_cpu(cpu_kick_mask, cpu), |
---|
| 5009 | + GFP_KERNEL, cpu_to_node(cpu))) { |
---|
| 5010 | + r = -ENOMEM; |
---|
| 5011 | + goto out_free_4; |
---|
| 5012 | + } |
---|
| 5013 | + } |
---|
| 5014 | + |
---|
4955 | 5015 | r = kvm_async_pf_init(); |
---|
4956 | 5016 | if (r) |
---|
4957 | | - goto out_free; |
---|
| 5017 | + goto out_free_4; |
---|
4958 | 5018 | |
---|
4959 | 5019 | kvm_chardev_ops.owner = module; |
---|
4960 | 5020 | kvm_vm_fops.owner = module; |
---|
4961 | 5021 | kvm_vcpu_fops.owner = module; |
---|
4962 | | - |
---|
4963 | | - r = misc_register(&kvm_dev); |
---|
4964 | | - if (r) { |
---|
4965 | | - pr_err("kvm: misc device register failed\n"); |
---|
4966 | | - goto out_unreg; |
---|
4967 | | - } |
---|
4968 | 5022 | |
---|
4969 | 5023 | register_syscore_ops(&kvm_syscore_ops); |
---|
4970 | 5024 | |
---|
.. | .. |
---|
4974 | 5028 | kvm_init_debug(); |
---|
4975 | 5029 | |
---|
4976 | 5030 | r = kvm_vfio_ops_init(); |
---|
4977 | | - WARN_ON(r); |
---|
| 5031 | + if (WARN_ON_ONCE(r)) |
---|
| 5032 | + goto err_vfio; |
---|
| 5033 | + |
---|
| 5034 | + /* |
---|
| 5035 | + * Registration _must_ be the very last thing done, as this exposes |
---|
| 5036 | + * /dev/kvm to userspace, i.e. all infrastructure must be setup! |
---|
| 5037 | + */ |
---|
| 5038 | + r = misc_register(&kvm_dev); |
---|
| 5039 | + if (r) { |
---|
| 5040 | + pr_err("kvm: misc device register failed\n"); |
---|
| 5041 | + goto err_register; |
---|
| 5042 | + } |
---|
4978 | 5043 | |
---|
4979 | 5044 | return 0; |
---|
4980 | 5045 | |
---|
4981 | | -out_unreg: |
---|
| 5046 | +err_register: |
---|
| 5047 | + kvm_vfio_ops_exit(); |
---|
| 5048 | +err_vfio: |
---|
4982 | 5049 | kvm_async_pf_deinit(); |
---|
4983 | | -out_free: |
---|
| 5050 | +out_free_4: |
---|
| 5051 | + for_each_possible_cpu(cpu) |
---|
| 5052 | + free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); |
---|
4984 | 5053 | kmem_cache_destroy(kvm_vcpu_cache); |
---|
4985 | 5054 | out_free_3: |
---|
4986 | 5055 | unregister_reboot_notifier(&kvm_reboot_notifier); |
---|
.. | .. |
---|
5000 | 5069 | |
---|
5001 | 5070 | void kvm_exit(void) |
---|
5002 | 5071 | { |
---|
5003 | | - debugfs_remove_recursive(kvm_debugfs_dir); |
---|
| 5072 | + int cpu; |
---|
| 5073 | + |
---|
| 5074 | + /* |
---|
| 5075 | + * Note, unregistering /dev/kvm doesn't strictly need to come first, |
---|
| 5076 | + * fops_get(), a.k.a. try_module_get(), prevents acquiring references |
---|
| 5077 | + * to KVM while the module is being stopped. |
---|
| 5078 | + */ |
---|
5004 | 5079 | misc_deregister(&kvm_dev); |
---|
| 5080 | + |
---|
| 5081 | + debugfs_remove_recursive(kvm_debugfs_dir); |
---|
| 5082 | + for_each_possible_cpu(cpu) |
---|
| 5083 | + free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); |
---|
5005 | 5084 | kmem_cache_destroy(kvm_vcpu_cache); |
---|
5006 | 5085 | kvm_async_pf_deinit(); |
---|
5007 | 5086 | unregister_syscore_ops(&kvm_syscore_ops); |
---|