| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * KVM paravirt_ops implementation |
|---|
| 3 | | - * |
|---|
| 4 | | - * This program is free software; you can redistribute it and/or modify |
|---|
| 5 | | - * it under the terms of the GNU General Public License as published by |
|---|
| 6 | | - * the Free Software Foundation; either version 2 of the License, or |
|---|
| 7 | | - * (at your option) any later version. |
|---|
| 8 | | - * |
|---|
| 9 | | - * This program is distributed in the hope that it will be useful, |
|---|
| 10 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 11 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 12 | | - * GNU General Public License for more details. |
|---|
| 13 | | - * |
|---|
| 14 | | - * You should have received a copy of the GNU General Public License |
|---|
| 15 | | - * along with this program; if not, write to the Free Software |
|---|
| 16 | | - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
|---|
| 17 | 4 | * |
|---|
| 18 | 5 | * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com> |
|---|
| 19 | 6 | * Copyright IBM Corporation, 2007 |
|---|
| 20 | 7 | * Authors: Anthony Liguori <aliguori@us.ibm.com> |
|---|
| 21 | 8 | */ |
|---|
| 22 | 9 | |
|---|
| 10 | +#define pr_fmt(fmt) "kvm-guest: " fmt |
|---|
| 11 | + |
|---|
| 23 | 12 | #include <linux/context_tracking.h> |
|---|
| 24 | 13 | #include <linux/init.h> |
|---|
| 14 | +#include <linux/irq.h> |
|---|
| 25 | 15 | #include <linux/kernel.h> |
|---|
| 26 | 16 | #include <linux/kvm_para.h> |
|---|
| 27 | 17 | #include <linux/cpu.h> |
|---|
| .. | .. |
|---|
| 34 | 24 | #include <linux/sched.h> |
|---|
| 35 | 25 | #include <linux/slab.h> |
|---|
| 36 | 26 | #include <linux/kprobes.h> |
|---|
| 37 | | -#include <linux/debugfs.h> |
|---|
| 38 | 27 | #include <linux/nmi.h> |
|---|
| 39 | 28 | #include <linux/swait.h> |
|---|
| 29 | +#include <linux/syscore_ops.h> |
|---|
| 40 | 30 | #include <asm/timer.h> |
|---|
| 41 | 31 | #include <asm/cpu.h> |
|---|
| 42 | 32 | #include <asm/traps.h> |
|---|
| .. | .. |
|---|
| 46 | 36 | #include <asm/apicdef.h> |
|---|
| 47 | 37 | #include <asm/hypervisor.h> |
|---|
| 48 | 38 | #include <asm/tlb.h> |
|---|
| 39 | +#include <asm/cpuidle_haltpoll.h> |
|---|
| 40 | +#include <asm/ptrace.h> |
|---|
| 41 | +#include <asm/reboot.h> |
|---|
| 42 | +#include <asm/svm.h> |
|---|
| 43 | + |
|---|
| 44 | +DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled); |
|---|
| 49 | 45 | |
|---|
| 50 | 46 | static int kvmapf = 1; |
|---|
| 51 | 47 | |
|---|
| .. | .. |
|---|
| 67 | 63 | early_param("no-steal-acc", parse_no_stealacc); |
|---|
| 68 | 64 | |
|---|
| 69 | 65 | static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); |
|---|
| 70 | | -static DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64); |
|---|
| 66 | +DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible; |
|---|
| 71 | 67 | static int has_steal_clock = 0; |
|---|
| 72 | 68 | |
|---|
| 69 | +static int has_guest_poll = 0; |
|---|
| 73 | 70 | /* |
|---|
| 74 | 71 | * No need for any "IO delay" on KVM |
|---|
| 75 | 72 | */ |
|---|
| .. | .. |
|---|
| 85 | 82 | struct swait_queue_head wq; |
|---|
| 86 | 83 | u32 token; |
|---|
| 87 | 84 | int cpu; |
|---|
| 88 | | - bool halted; |
|---|
| 89 | 85 | }; |
|---|
| 90 | 86 | |
|---|
| 91 | 87 | static struct kvm_task_sleep_head { |
|---|
| .. | .. |
|---|
| 108 | 104 | return NULL; |
|---|
| 109 | 105 | } |
|---|
| 110 | 106 | |
|---|
| 111 | | -/* |
|---|
| 112 | | - * @interrupt_kernel: Is this called from a routine which interrupts the kernel |
|---|
| 113 | | - * (other than user space)? |
|---|
| 114 | | - */ |
|---|
| 115 | | -void kvm_async_pf_task_wait(u32 token, int interrupt_kernel) |
|---|
| 107 | +static bool kvm_async_pf_queue_task(u32 token, struct kvm_task_sleep_node *n) |
|---|
| 116 | 108 | { |
|---|
| 117 | 109 | u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); |
|---|
| 118 | 110 | struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; |
|---|
| 119 | | - struct kvm_task_sleep_node n, *e; |
|---|
| 120 | | - DECLARE_SWAITQUEUE(wait); |
|---|
| 121 | | - |
|---|
| 122 | | - rcu_irq_enter(); |
|---|
| 111 | + struct kvm_task_sleep_node *e; |
|---|
| 123 | 112 | |
|---|
| 124 | 113 | raw_spin_lock(&b->lock); |
|---|
| 125 | 114 | e = _find_apf_task(b, token); |
|---|
| 126 | 115 | if (e) { |
|---|
| 127 | 116 | /* dummy entry exist -> wake up was delivered ahead of PF */ |
|---|
| 128 | 117 | hlist_del(&e->link); |
|---|
| 129 | | - kfree(e); |
|---|
| 130 | 118 | raw_spin_unlock(&b->lock); |
|---|
| 131 | | - |
|---|
| 132 | | - rcu_irq_exit(); |
|---|
| 133 | | - return; |
|---|
| 119 | + kfree(e); |
|---|
| 120 | + return false; |
|---|
| 134 | 121 | } |
|---|
| 135 | 122 | |
|---|
| 136 | | - n.token = token; |
|---|
| 137 | | - n.cpu = smp_processor_id(); |
|---|
| 138 | | - n.halted = is_idle_task(current) || |
|---|
| 139 | | - (IS_ENABLED(CONFIG_PREEMPT_COUNT) |
|---|
| 140 | | - ? preempt_count() > 1 || rcu_preempt_depth() |
|---|
| 141 | | - : interrupt_kernel); |
|---|
| 142 | | - init_swait_queue_head(&n.wq); |
|---|
| 143 | | - hlist_add_head(&n.link, &b->list); |
|---|
| 123 | + n->token = token; |
|---|
| 124 | + n->cpu = smp_processor_id(); |
|---|
| 125 | + init_swait_queue_head(&n->wq); |
|---|
| 126 | + hlist_add_head(&n->link, &b->list); |
|---|
| 144 | 127 | raw_spin_unlock(&b->lock); |
|---|
| 128 | + return true; |
|---|
| 129 | +} |
|---|
| 130 | + |
|---|
| 131 | +/* |
|---|
| 132 | + * kvm_async_pf_task_wait_schedule - Wait for pagefault to be handled |
|---|
| 133 | + * @token: Token to identify the sleep node entry |
|---|
| 134 | + * |
|---|
| 135 | + * Invoked from the async pagefault handling code or from the VM exit page |
|---|
| 136 | + * fault handler. In both cases RCU is watching. |
|---|
| 137 | + */ |
|---|
| 138 | +void kvm_async_pf_task_wait_schedule(u32 token) |
|---|
| 139 | +{ |
|---|
| 140 | + struct kvm_task_sleep_node n; |
|---|
| 141 | + DECLARE_SWAITQUEUE(wait); |
|---|
| 142 | + |
|---|
| 143 | + lockdep_assert_irqs_disabled(); |
|---|
| 144 | + |
|---|
| 145 | + if (!kvm_async_pf_queue_task(token, &n)) |
|---|
| 146 | + return; |
|---|
| 145 | 147 | |
|---|
| 146 | 148 | for (;;) { |
|---|
| 147 | | - if (!n.halted) |
|---|
| 148 | | - prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE); |
|---|
| 149 | + prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE); |
|---|
| 149 | 150 | if (hlist_unhashed(&n.link)) |
|---|
| 150 | 151 | break; |
|---|
| 151 | 152 | |
|---|
| 152 | | - rcu_irq_exit(); |
|---|
| 153 | | - |
|---|
| 154 | | - if (!n.halted) { |
|---|
| 155 | | - local_irq_enable(); |
|---|
| 156 | | - schedule(); |
|---|
| 157 | | - local_irq_disable(); |
|---|
| 158 | | - } else { |
|---|
| 159 | | - /* |
|---|
| 160 | | - * We cannot reschedule. So halt. |
|---|
| 161 | | - */ |
|---|
| 162 | | - native_safe_halt(); |
|---|
| 163 | | - local_irq_disable(); |
|---|
| 164 | | - } |
|---|
| 165 | | - |
|---|
| 166 | | - rcu_irq_enter(); |
|---|
| 153 | + local_irq_enable(); |
|---|
| 154 | + schedule(); |
|---|
| 155 | + local_irq_disable(); |
|---|
| 167 | 156 | } |
|---|
| 168 | | - if (!n.halted) |
|---|
| 169 | | - finish_swait(&n.wq, &wait); |
|---|
| 170 | | - |
|---|
| 171 | | - rcu_irq_exit(); |
|---|
| 172 | | - return; |
|---|
| 157 | + finish_swait(&n.wq, &wait); |
|---|
| 173 | 158 | } |
|---|
| 174 | | -EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); |
|---|
| 159 | +EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait_schedule); |
|---|
| 175 | 160 | |
|---|
| 176 | 161 | static void apf_task_wake_one(struct kvm_task_sleep_node *n) |
|---|
| 177 | 162 | { |
|---|
| 178 | 163 | hlist_del_init(&n->link); |
|---|
| 179 | | - if (n->halted) |
|---|
| 180 | | - smp_send_reschedule(n->cpu); |
|---|
| 181 | | - else if (swq_has_sleeper(&n->wq)) |
|---|
| 164 | + if (swq_has_sleeper(&n->wq)) |
|---|
| 182 | 165 | swake_up_one(&n->wq); |
|---|
| 183 | 166 | } |
|---|
| 184 | 167 | |
|---|
| .. | .. |
|---|
| 187 | 170 | int i; |
|---|
| 188 | 171 | |
|---|
| 189 | 172 | for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) { |
|---|
| 190 | | - struct hlist_node *p, *next; |
|---|
| 191 | 173 | struct kvm_task_sleep_head *b = &async_pf_sleepers[i]; |
|---|
| 174 | + struct kvm_task_sleep_node *n; |
|---|
| 175 | + struct hlist_node *p, *next; |
|---|
| 176 | + |
|---|
| 192 | 177 | raw_spin_lock(&b->lock); |
|---|
| 193 | 178 | hlist_for_each_safe(p, next, &b->list) { |
|---|
| 194 | | - struct kvm_task_sleep_node *n = |
|---|
| 195 | | - hlist_entry(p, typeof(*n), link); |
|---|
| 179 | + n = hlist_entry(p, typeof(*n), link); |
|---|
| 196 | 180 | if (n->cpu == smp_processor_id()) |
|---|
| 197 | 181 | apf_task_wake_one(n); |
|---|
| 198 | 182 | } |
|---|
| .. | .. |
|---|
| 204 | 188 | { |
|---|
| 205 | 189 | u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); |
|---|
| 206 | 190 | struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; |
|---|
| 207 | | - struct kvm_task_sleep_node *n; |
|---|
| 191 | + struct kvm_task_sleep_node *n, *dummy = NULL; |
|---|
| 208 | 192 | |
|---|
| 209 | 193 | if (token == ~0) { |
|---|
| 210 | 194 | apf_task_wake_all(); |
|---|
| .. | .. |
|---|
| 216 | 200 | n = _find_apf_task(b, token); |
|---|
| 217 | 201 | if (!n) { |
|---|
| 218 | 202 | /* |
|---|
| 219 | | - * async PF was not yet handled. |
|---|
| 220 | | - * Add dummy entry for the token. |
|---|
| 203 | + * Async #PF not yet handled, add a dummy entry for the token. |
|---|
| 204 | + * Allocating the token must be down outside of the raw lock |
|---|
| 205 | + * as the allocator is preemptible on PREEMPT_RT kernels. |
|---|
| 221 | 206 | */ |
|---|
| 222 | | - n = kzalloc(sizeof(*n), GFP_ATOMIC); |
|---|
| 223 | | - if (!n) { |
|---|
| 224 | | - /* |
|---|
| 225 | | - * Allocation failed! Busy wait while other cpu |
|---|
| 226 | | - * handles async PF. |
|---|
| 227 | | - */ |
|---|
| 207 | + if (!dummy) { |
|---|
| 228 | 208 | raw_spin_unlock(&b->lock); |
|---|
| 229 | | - cpu_relax(); |
|---|
| 209 | + dummy = kzalloc(sizeof(*dummy), GFP_ATOMIC); |
|---|
| 210 | + |
|---|
| 211 | + /* |
|---|
| 212 | + * Continue looping on allocation failure, eventually |
|---|
| 213 | + * the async #PF will be handled and allocating a new |
|---|
| 214 | + * node will be unnecessary. |
|---|
| 215 | + */ |
|---|
| 216 | + if (!dummy) |
|---|
| 217 | + cpu_relax(); |
|---|
| 218 | + |
|---|
| 219 | + /* |
|---|
| 220 | + * Recheck for async #PF completion before enqueueing |
|---|
| 221 | + * the dummy token to avoid duplicate list entries. |
|---|
| 222 | + */ |
|---|
| 230 | 223 | goto again; |
|---|
| 231 | 224 | } |
|---|
| 232 | | - n->token = token; |
|---|
| 233 | | - n->cpu = smp_processor_id(); |
|---|
| 234 | | - init_swait_queue_head(&n->wq); |
|---|
| 235 | | - hlist_add_head(&n->link, &b->list); |
|---|
| 236 | | - } else |
|---|
| 225 | + dummy->token = token; |
|---|
| 226 | + dummy->cpu = smp_processor_id(); |
|---|
| 227 | + init_swait_queue_head(&dummy->wq); |
|---|
| 228 | + hlist_add_head(&dummy->link, &b->list); |
|---|
| 229 | + dummy = NULL; |
|---|
| 230 | + } else { |
|---|
| 237 | 231 | apf_task_wake_one(n); |
|---|
| 232 | + } |
|---|
| 238 | 233 | raw_spin_unlock(&b->lock); |
|---|
| 239 | | - return; |
|---|
| 234 | + |
|---|
| 235 | + /* A dummy token might be allocated and ultimately not used. */ |
|---|
| 236 | + if (dummy) |
|---|
| 237 | + kfree(dummy); |
|---|
| 240 | 238 | } |
|---|
| 241 | 239 | EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); |
|---|
| 242 | 240 | |
|---|
| 243 | | -u32 kvm_read_and_reset_pf_reason(void) |
|---|
| 241 | +noinstr u32 kvm_read_and_reset_apf_flags(void) |
|---|
| 244 | 242 | { |
|---|
| 245 | | - u32 reason = 0; |
|---|
| 243 | + u32 flags = 0; |
|---|
| 246 | 244 | |
|---|
| 247 | 245 | if (__this_cpu_read(apf_reason.enabled)) { |
|---|
| 248 | | - reason = __this_cpu_read(apf_reason.reason); |
|---|
| 249 | | - __this_cpu_write(apf_reason.reason, 0); |
|---|
| 246 | + flags = __this_cpu_read(apf_reason.flags); |
|---|
| 247 | + __this_cpu_write(apf_reason.flags, 0); |
|---|
| 250 | 248 | } |
|---|
| 251 | 249 | |
|---|
| 252 | | - return reason; |
|---|
| 250 | + return flags; |
|---|
| 253 | 251 | } |
|---|
| 254 | | -EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); |
|---|
| 255 | | -NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason); |
|---|
| 252 | +EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags); |
|---|
| 256 | 253 | |
|---|
| 257 | | -dotraplinkage void |
|---|
| 258 | | -do_async_page_fault(struct pt_regs *regs, unsigned long error_code) |
|---|
| 254 | +noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token) |
|---|
| 259 | 255 | { |
|---|
| 260 | | - enum ctx_state prev_state; |
|---|
| 256 | + u32 flags = kvm_read_and_reset_apf_flags(); |
|---|
| 257 | + irqentry_state_t state; |
|---|
| 261 | 258 | |
|---|
| 262 | | - switch (kvm_read_and_reset_pf_reason()) { |
|---|
| 263 | | - default: |
|---|
| 264 | | - do_page_fault(regs, error_code); |
|---|
| 265 | | - break; |
|---|
| 266 | | - case KVM_PV_REASON_PAGE_NOT_PRESENT: |
|---|
| 267 | | - /* page is swapped out by the host. */ |
|---|
| 268 | | - prev_state = exception_enter(); |
|---|
| 269 | | - kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs)); |
|---|
| 270 | | - exception_exit(prev_state); |
|---|
| 271 | | - break; |
|---|
| 272 | | - case KVM_PV_REASON_PAGE_READY: |
|---|
| 273 | | - rcu_irq_enter(); |
|---|
| 274 | | - kvm_async_pf_task_wake((u32)read_cr2()); |
|---|
| 275 | | - rcu_irq_exit(); |
|---|
| 276 | | - break; |
|---|
| 259 | + if (!flags) |
|---|
| 260 | + return false; |
|---|
| 261 | + |
|---|
| 262 | + state = irqentry_enter(regs); |
|---|
| 263 | + instrumentation_begin(); |
|---|
| 264 | + |
|---|
| 265 | + /* |
|---|
| 266 | + * If the host managed to inject an async #PF into an interrupt |
|---|
| 267 | + * disabled region, then die hard as this is not going to end well |
|---|
| 268 | + * and the host side is seriously broken. |
|---|
| 269 | + */ |
|---|
| 270 | + if (unlikely(!(regs->flags & X86_EFLAGS_IF))) |
|---|
| 271 | + panic("Host injected async #PF in interrupt disabled region\n"); |
|---|
| 272 | + |
|---|
| 273 | + if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) { |
|---|
| 274 | + if (unlikely(!(user_mode(regs)))) |
|---|
| 275 | + panic("Host injected async #PF in kernel mode\n"); |
|---|
| 276 | + /* Page is swapped out by the host. */ |
|---|
| 277 | + kvm_async_pf_task_wait_schedule(token); |
|---|
| 278 | + } else { |
|---|
| 279 | + WARN_ONCE(1, "Unexpected async PF flags: %x\n", flags); |
|---|
| 277 | 280 | } |
|---|
| 281 | + |
|---|
| 282 | + instrumentation_end(); |
|---|
| 283 | + irqentry_exit(regs, state); |
|---|
| 284 | + return true; |
|---|
| 278 | 285 | } |
|---|
| 279 | | -NOKPROBE_SYMBOL(do_async_page_fault); |
|---|
| 286 | + |
|---|
| 287 | +DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt) |
|---|
| 288 | +{ |
|---|
| 289 | + struct pt_regs *old_regs = set_irq_regs(regs); |
|---|
| 290 | + u32 token; |
|---|
| 291 | + |
|---|
| 292 | + ack_APIC_irq(); |
|---|
| 293 | + |
|---|
| 294 | + inc_irq_stat(irq_hv_callback_count); |
|---|
| 295 | + |
|---|
| 296 | + if (__this_cpu_read(apf_reason.enabled)) { |
|---|
| 297 | + token = __this_cpu_read(apf_reason.token); |
|---|
| 298 | + kvm_async_pf_task_wake(token); |
|---|
| 299 | + __this_cpu_write(apf_reason.token, 0); |
|---|
| 300 | + wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1); |
|---|
| 301 | + } |
|---|
| 302 | + |
|---|
| 303 | + set_irq_regs(old_regs); |
|---|
| 304 | +} |
|---|
| 280 | 305 | |
|---|
| 281 | 306 | static void __init paravirt_ops_setup(void) |
|---|
| 282 | 307 | { |
|---|
| 283 | 308 | pv_info.name = "KVM"; |
|---|
| 284 | 309 | |
|---|
| 285 | 310 | if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) |
|---|
| 286 | | - pv_cpu_ops.io_delay = kvm_io_delay; |
|---|
| 311 | + pv_ops.cpu.io_delay = kvm_io_delay; |
|---|
| 287 | 312 | |
|---|
| 288 | 313 | #ifdef CONFIG_X86_IO_APIC |
|---|
| 289 | 314 | no_timer_check = 1; |
|---|
| .. | .. |
|---|
| 299 | 324 | return; |
|---|
| 300 | 325 | |
|---|
| 301 | 326 | wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED)); |
|---|
| 302 | | - pr_info("kvm-stealtime: cpu %d, msr %llx\n", |
|---|
| 303 | | - cpu, (unsigned long long) slow_virt_to_phys(st)); |
|---|
| 327 | + pr_info("stealtime: cpu %d, msr %llx\n", cpu, |
|---|
| 328 | + (unsigned long long) slow_virt_to_phys(st)); |
|---|
| 304 | 329 | } |
|---|
| 305 | 330 | |
|---|
| 306 | 331 | static DEFINE_PER_CPU_DECRYPTED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; |
|---|
| .. | .. |
|---|
| 321 | 346 | |
|---|
| 322 | 347 | static void kvm_guest_cpu_init(void) |
|---|
| 323 | 348 | { |
|---|
| 324 | | - if (!kvm_para_available()) |
|---|
| 325 | | - return; |
|---|
| 326 | | - |
|---|
| 327 | | - if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { |
|---|
| 349 | + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) { |
|---|
| 328 | 350 | u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason)); |
|---|
| 329 | 351 | |
|---|
| 330 | | -#ifdef CONFIG_PREEMPT |
|---|
| 331 | | - pa |= KVM_ASYNC_PF_SEND_ALWAYS; |
|---|
| 332 | | -#endif |
|---|
| 333 | | - pa |= KVM_ASYNC_PF_ENABLED; |
|---|
| 352 | + WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled)); |
|---|
| 353 | + |
|---|
| 354 | + pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason)); |
|---|
| 355 | + pa |= KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT; |
|---|
| 334 | 356 | |
|---|
| 335 | 357 | if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT)) |
|---|
| 336 | 358 | pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT; |
|---|
| 337 | 359 | |
|---|
| 360 | + wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR); |
|---|
| 361 | + |
|---|
| 338 | 362 | wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); |
|---|
| 339 | 363 | __this_cpu_write(apf_reason.enabled, 1); |
|---|
| 340 | | - printk(KERN_INFO"KVM setup async PF for cpu %d\n", |
|---|
| 341 | | - smp_processor_id()); |
|---|
| 364 | + pr_info("KVM setup async PF for cpu %d\n", smp_processor_id()); |
|---|
| 342 | 365 | } |
|---|
| 343 | 366 | |
|---|
| 344 | 367 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) { |
|---|
| 345 | 368 | unsigned long pa; |
|---|
| 369 | + |
|---|
| 346 | 370 | /* Size alignment is implied but just to make it explicit. */ |
|---|
| 347 | 371 | BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); |
|---|
| 348 | 372 | __this_cpu_write(kvm_apic_eoi, 0); |
|---|
| .. | .. |
|---|
| 363 | 387 | wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); |
|---|
| 364 | 388 | __this_cpu_write(apf_reason.enabled, 0); |
|---|
| 365 | 389 | |
|---|
| 366 | | - printk(KERN_INFO"Unregister pv shared memory for cpu %d\n", |
|---|
| 367 | | - smp_processor_id()); |
|---|
| 390 | + pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id()); |
|---|
| 391 | +} |
|---|
| 392 | + |
|---|
| 393 | +static void kvm_disable_steal_time(void) |
|---|
| 394 | +{ |
|---|
| 395 | + if (!has_steal_clock) |
|---|
| 396 | + return; |
|---|
| 397 | + |
|---|
| 398 | + wrmsr(MSR_KVM_STEAL_TIME, 0, 0); |
|---|
| 368 | 399 | } |
|---|
| 369 | 400 | |
|---|
| 370 | 401 | static void kvm_pv_guest_cpu_reboot(void *unused) |
|---|
| .. | .. |
|---|
| 409 | 440 | return steal; |
|---|
| 410 | 441 | } |
|---|
| 411 | 442 | |
|---|
| 412 | | -void kvm_disable_steal_time(void) |
|---|
| 413 | | -{ |
|---|
| 414 | | - if (!has_steal_clock) |
|---|
| 415 | | - return; |
|---|
| 416 | | - |
|---|
| 417 | | - wrmsr(MSR_KVM_STEAL_TIME, 0, 0); |
|---|
| 418 | | -} |
|---|
| 419 | | - |
|---|
| 420 | 443 | static inline void __set_percpu_decrypted(void *ptr, unsigned long size) |
|---|
| 421 | 444 | { |
|---|
| 422 | 445 | early_set_memory_decrypted((unsigned long) ptr, size); |
|---|
| .. | .. |
|---|
| 444 | 467 | } |
|---|
| 445 | 468 | } |
|---|
| 446 | 469 | |
|---|
| 470 | +static bool pv_tlb_flush_supported(void) |
|---|
| 471 | +{ |
|---|
| 472 | + return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && |
|---|
| 473 | + !kvm_para_has_hint(KVM_HINTS_REALTIME) && |
|---|
| 474 | + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)); |
|---|
| 475 | +} |
|---|
| 476 | + |
|---|
| 477 | +static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask); |
|---|
| 478 | + |
|---|
| 479 | +static void kvm_guest_cpu_offline(bool shutdown) |
|---|
| 480 | +{ |
|---|
| 481 | + kvm_disable_steal_time(); |
|---|
| 482 | + if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) |
|---|
| 483 | + wrmsrl(MSR_KVM_PV_EOI_EN, 0); |
|---|
| 484 | + kvm_pv_disable_apf(); |
|---|
| 485 | + if (!shutdown) |
|---|
| 486 | + apf_task_wake_all(); |
|---|
| 487 | + kvmclock_disable(); |
|---|
| 488 | +} |
|---|
| 489 | + |
|---|
| 490 | +static int kvm_cpu_online(unsigned int cpu) |
|---|
| 491 | +{ |
|---|
| 492 | + unsigned long flags; |
|---|
| 493 | + |
|---|
| 494 | + local_irq_save(flags); |
|---|
| 495 | + kvm_guest_cpu_init(); |
|---|
| 496 | + local_irq_restore(flags); |
|---|
| 497 | + return 0; |
|---|
| 498 | +} |
|---|
| 499 | + |
|---|
| 447 | 500 | #ifdef CONFIG_SMP |
|---|
| 501 | + |
|---|
| 502 | +static bool pv_ipi_supported(void) |
|---|
| 503 | +{ |
|---|
| 504 | + return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI); |
|---|
| 505 | +} |
|---|
| 506 | + |
|---|
| 507 | +static bool pv_sched_yield_supported(void) |
|---|
| 508 | +{ |
|---|
| 509 | + return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) && |
|---|
| 510 | + !kvm_para_has_hint(KVM_HINTS_REALTIME) && |
|---|
| 511 | + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)); |
|---|
| 512 | +} |
|---|
| 513 | + |
|---|
| 448 | 514 | #define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG) |
|---|
| 449 | 515 | |
|---|
| 450 | 516 | static void __send_ipi_mask(const struct cpumask *mask, int vector) |
|---|
| .. | .. |
|---|
| 480 | 546 | } else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) { |
|---|
| 481 | 547 | ipi_bitmap <<= min - apic_id; |
|---|
| 482 | 548 | min = apic_id; |
|---|
| 483 | | - } else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) { |
|---|
| 549 | + } else if (apic_id > min && apic_id < min + KVM_IPI_CLUSTER_SIZE) { |
|---|
| 484 | 550 | max = apic_id < max ? max : apic_id; |
|---|
| 485 | 551 | } else { |
|---|
| 486 | 552 | ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, |
|---|
| 487 | 553 | (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr); |
|---|
| 488 | | - WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret); |
|---|
| 554 | + WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld", |
|---|
| 555 | + ret); |
|---|
| 489 | 556 | min = max = apic_id; |
|---|
| 490 | 557 | ipi_bitmap = 0; |
|---|
| 491 | 558 | } |
|---|
| .. | .. |
|---|
| 495 | 562 | if (ipi_bitmap) { |
|---|
| 496 | 563 | ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, |
|---|
| 497 | 564 | (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr); |
|---|
| 498 | | - WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret); |
|---|
| 565 | + WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld", |
|---|
| 566 | + ret); |
|---|
| 499 | 567 | } |
|---|
| 500 | 568 | |
|---|
| 501 | 569 | local_irq_restore(flags); |
|---|
| .. | .. |
|---|
| 509 | 577 | static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) |
|---|
| 510 | 578 | { |
|---|
| 511 | 579 | unsigned int this_cpu = smp_processor_id(); |
|---|
| 512 | | - struct cpumask new_mask; |
|---|
| 580 | + struct cpumask *new_mask = this_cpu_cpumask_var_ptr(__pv_cpu_mask); |
|---|
| 513 | 581 | const struct cpumask *local_mask; |
|---|
| 514 | 582 | |
|---|
| 515 | | - cpumask_copy(&new_mask, mask); |
|---|
| 516 | | - cpumask_clear_cpu(this_cpu, &new_mask); |
|---|
| 517 | | - local_mask = &new_mask; |
|---|
| 583 | + cpumask_copy(new_mask, mask); |
|---|
| 584 | + cpumask_clear_cpu(this_cpu, new_mask); |
|---|
| 585 | + local_mask = new_mask; |
|---|
| 518 | 586 | __send_ipi_mask(local_mask, vector); |
|---|
| 519 | | -} |
|---|
| 520 | | - |
|---|
| 521 | | -static void kvm_send_ipi_allbutself(int vector) |
|---|
| 522 | | -{ |
|---|
| 523 | | - kvm_send_ipi_mask_allbutself(cpu_online_mask, vector); |
|---|
| 524 | | -} |
|---|
| 525 | | - |
|---|
| 526 | | -static void kvm_send_ipi_all(int vector) |
|---|
| 527 | | -{ |
|---|
| 528 | | - __send_ipi_mask(cpu_online_mask, vector); |
|---|
| 529 | 587 | } |
|---|
| 530 | 588 | |
|---|
| 531 | 589 | /* |
|---|
| .. | .. |
|---|
| 535 | 593 | { |
|---|
| 536 | 594 | apic->send_IPI_mask = kvm_send_ipi_mask; |
|---|
| 537 | 595 | apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself; |
|---|
| 538 | | - apic->send_IPI_allbutself = kvm_send_ipi_allbutself; |
|---|
| 539 | | - apic->send_IPI_all = kvm_send_ipi_all; |
|---|
| 540 | | - pr_info("KVM setup pv IPIs\n"); |
|---|
| 596 | + pr_info("setup PV IPIs\n"); |
|---|
| 541 | 597 | } |
|---|
| 542 | 598 | |
|---|
| 543 | | -static void __init kvm_smp_prepare_cpus(unsigned int max_cpus) |
|---|
| 599 | +static void kvm_smp_send_call_func_ipi(const struct cpumask *mask) |
|---|
| 544 | 600 | { |
|---|
| 545 | | - native_smp_prepare_cpus(max_cpus); |
|---|
| 546 | | - if (kvm_para_has_hint(KVM_HINTS_REALTIME)) |
|---|
| 547 | | - static_branch_disable(&virt_spin_lock_key); |
|---|
| 601 | + int cpu; |
|---|
| 602 | + |
|---|
| 603 | + native_send_call_func_ipi(mask); |
|---|
| 604 | + |
|---|
| 605 | + /* Make sure other vCPUs get a chance to run if they need to. */ |
|---|
| 606 | + for_each_cpu(cpu, mask) { |
|---|
| 607 | + if (vcpu_is_preempted(cpu)) { |
|---|
| 608 | + kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu)); |
|---|
| 609 | + break; |
|---|
| 610 | + } |
|---|
| 611 | + } |
|---|
| 548 | 612 | } |
|---|
| 549 | 613 | |
|---|
| 550 | 614 | static void __init kvm_smp_prepare_boot_cpu(void) |
|---|
| .. | .. |
|---|
| 560 | 624 | kvm_spinlock_init(); |
|---|
| 561 | 625 | } |
|---|
| 562 | 626 | |
|---|
| 563 | | -static void kvm_guest_cpu_offline(void) |
|---|
| 564 | | -{ |
|---|
| 565 | | - kvm_disable_steal_time(); |
|---|
| 566 | | - if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) |
|---|
| 567 | | - wrmsrl(MSR_KVM_PV_EOI_EN, 0); |
|---|
| 568 | | - kvm_pv_disable_apf(); |
|---|
| 569 | | - apf_task_wake_all(); |
|---|
| 570 | | -} |
|---|
| 571 | | - |
|---|
| 572 | | -static int kvm_cpu_online(unsigned int cpu) |
|---|
| 573 | | -{ |
|---|
| 574 | | - local_irq_disable(); |
|---|
| 575 | | - kvm_guest_cpu_init(); |
|---|
| 576 | | - local_irq_enable(); |
|---|
| 577 | | - return 0; |
|---|
| 578 | | -} |
|---|
| 579 | | - |
|---|
| 580 | 627 | static int kvm_cpu_down_prepare(unsigned int cpu) |
|---|
| 581 | 628 | { |
|---|
| 582 | | - local_irq_disable(); |
|---|
| 583 | | - kvm_guest_cpu_offline(); |
|---|
| 584 | | - local_irq_enable(); |
|---|
| 629 | + unsigned long flags; |
|---|
| 630 | + |
|---|
| 631 | + local_irq_save(flags); |
|---|
| 632 | + kvm_guest_cpu_offline(false); |
|---|
| 633 | + local_irq_restore(flags); |
|---|
| 585 | 634 | return 0; |
|---|
| 586 | 635 | } |
|---|
| 636 | + |
|---|
| 587 | 637 | #endif |
|---|
| 588 | 638 | |
|---|
| 589 | | -static void __init kvm_apf_trap_init(void) |
|---|
| 639 | +static int kvm_suspend(void) |
|---|
| 590 | 640 | { |
|---|
| 591 | | - update_intr_gate(X86_TRAP_PF, async_page_fault); |
|---|
| 641 | + u64 val = 0; |
|---|
| 642 | + |
|---|
| 643 | + kvm_guest_cpu_offline(false); |
|---|
| 644 | + |
|---|
| 645 | +#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL |
|---|
| 646 | + if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) |
|---|
| 647 | + rdmsrl(MSR_KVM_POLL_CONTROL, val); |
|---|
| 648 | + has_guest_poll = !(val & 1); |
|---|
| 649 | +#endif |
|---|
| 650 | + return 0; |
|---|
| 592 | 651 | } |
|---|
| 593 | 652 | |
|---|
| 594 | | -static DEFINE_PER_CPU(cpumask_var_t, __pv_tlb_mask); |
|---|
| 653 | +static void kvm_resume(void) |
|---|
| 654 | +{ |
|---|
| 655 | + kvm_cpu_online(raw_smp_processor_id()); |
|---|
| 656 | + |
|---|
| 657 | +#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL |
|---|
| 658 | + if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL) && has_guest_poll) |
|---|
| 659 | + wrmsrl(MSR_KVM_POLL_CONTROL, 0); |
|---|
| 660 | +#endif |
|---|
| 661 | +} |
|---|
| 662 | + |
|---|
| 663 | +static struct syscore_ops kvm_syscore_ops = { |
|---|
| 664 | + .suspend = kvm_suspend, |
|---|
| 665 | + .resume = kvm_resume, |
|---|
| 666 | +}; |
|---|
| 667 | + |
|---|
| 668 | +/* |
|---|
| 669 | + * After a PV feature is registered, the host will keep writing to the |
|---|
| 670 | + * registered memory location. If the guest happens to shutdown, this memory |
|---|
| 671 | + * won't be valid. In cases like kexec, in which you install a new kernel, this |
|---|
| 672 | + * means a random memory location will be kept being written. |
|---|
| 673 | + */ |
|---|
| 674 | +#ifdef CONFIG_KEXEC_CORE |
|---|
| 675 | +static void kvm_crash_shutdown(struct pt_regs *regs) |
|---|
| 676 | +{ |
|---|
| 677 | + kvm_guest_cpu_offline(true); |
|---|
| 678 | + native_machine_crash_shutdown(regs); |
|---|
| 679 | +} |
|---|
| 680 | +#endif |
|---|
| 595 | 681 | |
|---|
| 596 | 682 | static void kvm_flush_tlb_others(const struct cpumask *cpumask, |
|---|
| 597 | 683 | const struct flush_tlb_info *info) |
|---|
| .. | .. |
|---|
| 599 | 685 | u8 state; |
|---|
| 600 | 686 | int cpu; |
|---|
| 601 | 687 | struct kvm_steal_time *src; |
|---|
| 602 | | - struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_tlb_mask); |
|---|
| 688 | + struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask); |
|---|
| 603 | 689 | |
|---|
| 604 | 690 | cpumask_copy(flushmask, cpumask); |
|---|
| 605 | 691 | /* |
|---|
| .. | .. |
|---|
| 623 | 709 | { |
|---|
| 624 | 710 | int i; |
|---|
| 625 | 711 | |
|---|
| 626 | | - if (!kvm_para_available()) |
|---|
| 627 | | - return; |
|---|
| 628 | | - |
|---|
| 629 | 712 | paravirt_ops_setup(); |
|---|
| 630 | 713 | register_reboot_notifier(&kvm_pv_reboot_nb); |
|---|
| 631 | 714 | for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) |
|---|
| 632 | 715 | raw_spin_lock_init(&async_pf_sleepers[i].lock); |
|---|
| 633 | | - if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) |
|---|
| 634 | | - x86_init.irqs.trap_init = kvm_apf_trap_init; |
|---|
| 635 | 716 | |
|---|
| 636 | 717 | if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { |
|---|
| 637 | 718 | has_steal_clock = 1; |
|---|
| 638 | | - pv_time_ops.steal_clock = kvm_steal_clock; |
|---|
| 719 | + pv_ops.time.steal_clock = kvm_steal_clock; |
|---|
| 639 | 720 | } |
|---|
| 640 | 721 | |
|---|
| 641 | | - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && |
|---|
| 642 | | - !kvm_para_has_hint(KVM_HINTS_REALTIME) && |
|---|
| 643 | | - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { |
|---|
| 644 | | - pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; |
|---|
| 645 | | - pv_mmu_ops.tlb_remove_table = tlb_remove_table; |
|---|
| 722 | + if (pv_tlb_flush_supported()) { |
|---|
| 723 | + pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others; |
|---|
| 724 | + pv_ops.mmu.tlb_remove_table = tlb_remove_table; |
|---|
| 725 | + pr_info("KVM setup pv remote TLB flush\n"); |
|---|
| 646 | 726 | } |
|---|
| 647 | 727 | |
|---|
| 648 | 728 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) |
|---|
| 649 | 729 | apic_set_eoi_write(kvm_guest_apic_eoi_write); |
|---|
| 650 | 730 | |
|---|
| 731 | + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) { |
|---|
| 732 | + static_branch_enable(&kvm_async_pf_enabled); |
|---|
| 733 | + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_kvm_asyncpf_interrupt); |
|---|
| 734 | + } |
|---|
| 735 | + |
|---|
| 651 | 736 | #ifdef CONFIG_SMP |
|---|
| 652 | | - smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus; |
|---|
| 653 | 737 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
|---|
| 738 | + if (pv_sched_yield_supported()) { |
|---|
| 739 | + smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi; |
|---|
| 740 | + pr_info("setup PV sched yield\n"); |
|---|
| 741 | + } |
|---|
| 654 | 742 | if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online", |
|---|
| 655 | 743 | kvm_cpu_online, kvm_cpu_down_prepare) < 0) |
|---|
| 656 | | - pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n"); |
|---|
| 744 | + pr_err("failed to install cpu hotplug callbacks\n"); |
|---|
| 657 | 745 | #else |
|---|
| 658 | 746 | sev_map_percpu_data(); |
|---|
| 659 | 747 | kvm_guest_cpu_init(); |
|---|
| 660 | 748 | #endif |
|---|
| 749 | + |
|---|
| 750 | +#ifdef CONFIG_KEXEC_CORE |
|---|
| 751 | + machine_ops.crash_shutdown = kvm_crash_shutdown; |
|---|
| 752 | +#endif |
|---|
| 753 | + |
|---|
| 754 | + register_syscore_ops(&kvm_syscore_ops); |
|---|
| 661 | 755 | |
|---|
| 662 | 756 | /* |
|---|
| 663 | 757 | * Hard lockup detection is enabled by default. Disable it, as guests |
|---|
| .. | .. |
|---|
| 703 | 797 | { |
|---|
| 704 | 798 | return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES); |
|---|
| 705 | 799 | } |
|---|
| 800 | +EXPORT_SYMBOL_GPL(kvm_arch_para_hints); |
|---|
| 706 | 801 | |
|---|
| 707 | 802 | static uint32_t __init kvm_detect(void) |
|---|
| 708 | 803 | { |
|---|
| .. | .. |
|---|
| 712 | 807 | static void __init kvm_apic_init(void) |
|---|
| 713 | 808 | { |
|---|
| 714 | 809 | #if defined(CONFIG_SMP) |
|---|
| 715 | | - if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI)) |
|---|
| 810 | + if (pv_ipi_supported()) |
|---|
| 716 | 811 | kvm_setup_pv_ipi(); |
|---|
| 717 | 812 | #endif |
|---|
| 718 | 813 | } |
|---|
| .. | .. |
|---|
| 723 | 818 | x86_platform.apic_post_init = kvm_apic_init; |
|---|
| 724 | 819 | } |
|---|
| 725 | 820 | |
|---|
| 821 | +#if defined(CONFIG_AMD_MEM_ENCRYPT) |
|---|
| 822 | +static void kvm_sev_es_hcall_prepare(struct ghcb *ghcb, struct pt_regs *regs) |
|---|
| 823 | +{ |
|---|
| 824 | + /* RAX and CPL are already in the GHCB */ |
|---|
| 825 | + ghcb_set_rbx(ghcb, regs->bx); |
|---|
| 826 | + ghcb_set_rcx(ghcb, regs->cx); |
|---|
| 827 | + ghcb_set_rdx(ghcb, regs->dx); |
|---|
| 828 | + ghcb_set_rsi(ghcb, regs->si); |
|---|
| 829 | +} |
|---|
| 830 | + |
|---|
| 831 | +static bool kvm_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs) |
|---|
| 832 | +{ |
|---|
| 833 | + /* No checking of the return state needed */ |
|---|
| 834 | + return true; |
|---|
| 835 | +} |
|---|
| 836 | +#endif |
|---|
| 837 | + |
|---|
| 726 | 838 | const __initconst struct hypervisor_x86 x86_hyper_kvm = { |
|---|
| 727 | | - .name = "KVM", |
|---|
| 728 | | - .detect = kvm_detect, |
|---|
| 729 | | - .type = X86_HYPER_KVM, |
|---|
| 730 | | - .init.guest_late_init = kvm_guest_init, |
|---|
| 731 | | - .init.x2apic_available = kvm_para_available, |
|---|
| 732 | | - .init.init_platform = kvm_init_platform, |
|---|
| 839 | + .name = "KVM", |
|---|
| 840 | + .detect = kvm_detect, |
|---|
| 841 | + .type = X86_HYPER_KVM, |
|---|
| 842 | + .init.guest_late_init = kvm_guest_init, |
|---|
| 843 | + .init.x2apic_available = kvm_para_available, |
|---|
| 844 | + .init.init_platform = kvm_init_platform, |
|---|
| 845 | +#if defined(CONFIG_AMD_MEM_ENCRYPT) |
|---|
| 846 | + .runtime.sev_es_hcall_prepare = kvm_sev_es_hcall_prepare, |
|---|
| 847 | + .runtime.sev_es_hcall_finish = kvm_sev_es_hcall_finish, |
|---|
| 848 | +#endif |
|---|
| 733 | 849 | }; |
|---|
| 734 | 850 | |
|---|
| 735 | 851 | static __init int activate_jump_labels(void) |
|---|
| .. | .. |
|---|
| 744 | 860 | } |
|---|
| 745 | 861 | arch_initcall(activate_jump_labels); |
|---|
| 746 | 862 | |
|---|
| 747 | | -static __init int kvm_setup_pv_tlb_flush(void) |
|---|
| 863 | +static __init int kvm_alloc_cpumask(void) |
|---|
| 748 | 864 | { |
|---|
| 749 | 865 | int cpu; |
|---|
| 866 | + bool alloc = false; |
|---|
| 750 | 867 | |
|---|
| 751 | | - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && |
|---|
| 752 | | - !kvm_para_has_hint(KVM_HINTS_REALTIME) && |
|---|
| 753 | | - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { |
|---|
| 868 | + if (!kvm_para_available() || nopv) |
|---|
| 869 | + return 0; |
|---|
| 870 | + |
|---|
| 871 | + if (pv_tlb_flush_supported()) |
|---|
| 872 | + alloc = true; |
|---|
| 873 | + |
|---|
| 874 | +#if defined(CONFIG_SMP) |
|---|
| 875 | + if (pv_ipi_supported()) |
|---|
| 876 | + alloc = true; |
|---|
| 877 | +#endif |
|---|
| 878 | + |
|---|
| 879 | + if (alloc) |
|---|
| 754 | 880 | for_each_possible_cpu(cpu) { |
|---|
| 755 | | - zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), |
|---|
| 881 | + zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu), |
|---|
| 756 | 882 | GFP_KERNEL, cpu_to_node(cpu)); |
|---|
| 757 | 883 | } |
|---|
| 758 | | - pr_info("KVM setup pv remote TLB flush\n"); |
|---|
| 759 | | - } |
|---|
| 760 | 884 | |
|---|
| 761 | 885 | return 0; |
|---|
| 762 | 886 | } |
|---|
| 763 | | -arch_initcall(kvm_setup_pv_tlb_flush); |
|---|
| 887 | +arch_initcall(kvm_alloc_cpumask); |
|---|
| 764 | 888 | |
|---|
| 765 | 889 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |
|---|
| 766 | 890 | |
|---|
| .. | .. |
|---|
| 829 | 953 | "movq __per_cpu_offset(,%rdi,8), %rax;" |
|---|
| 830 | 954 | "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);" |
|---|
| 831 | 955 | "setne %al;" |
|---|
| 832 | | -"ret;" |
|---|
| 956 | +ASM_RET |
|---|
| 833 | 957 | ".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;" |
|---|
| 834 | 958 | ".popsection"); |
|---|
| 835 | 959 | |
|---|
| .. | .. |
|---|
| 840 | 964 | */ |
|---|
| 841 | 965 | void __init kvm_spinlock_init(void) |
|---|
| 842 | 966 | { |
|---|
| 843 | | - if (!kvm_para_available()) |
|---|
| 967 | + /* |
|---|
| 968 | + * In case host doesn't support KVM_FEATURE_PV_UNHALT there is still an |
|---|
| 969 | + * advantage of keeping virt_spin_lock_key enabled: virt_spin_lock() is |
|---|
| 970 | + * preferred over native qspinlock when vCPU is preempted. |
|---|
| 971 | + */ |
|---|
| 972 | + if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) { |
|---|
| 973 | + pr_info("PV spinlocks disabled, no host support\n"); |
|---|
| 844 | 974 | return; |
|---|
| 845 | | - /* Does host kernel support KVM_FEATURE_PV_UNHALT? */ |
|---|
| 846 | | - if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) |
|---|
| 847 | | - return; |
|---|
| 975 | + } |
|---|
| 848 | 976 | |
|---|
| 849 | | - if (kvm_para_has_hint(KVM_HINTS_REALTIME)) |
|---|
| 850 | | - return; |
|---|
| 977 | + /* |
|---|
| 978 | + * Disable PV spinlocks and use native qspinlock when dedicated pCPUs |
|---|
| 979 | + * are available. |
|---|
| 980 | + */ |
|---|
| 981 | + if (kvm_para_has_hint(KVM_HINTS_REALTIME)) { |
|---|
| 982 | + pr_info("PV spinlocks disabled with KVM_HINTS_REALTIME hints\n"); |
|---|
| 983 | + goto out; |
|---|
| 984 | + } |
|---|
| 851 | 985 | |
|---|
| 852 | | - /* Don't use the pvqspinlock code if there is only 1 vCPU. */ |
|---|
| 853 | | - if (num_possible_cpus() == 1) |
|---|
| 854 | | - return; |
|---|
| 986 | + if (num_possible_cpus() == 1) { |
|---|
| 987 | + pr_info("PV spinlocks disabled, single CPU\n"); |
|---|
| 988 | + goto out; |
|---|
| 989 | + } |
|---|
| 990 | + |
|---|
| 991 | + if (nopvspin) { |
|---|
| 992 | + pr_info("PV spinlocks disabled, forced by \"nopvspin\" parameter\n"); |
|---|
| 993 | + goto out; |
|---|
| 994 | + } |
|---|
| 995 | + |
|---|
| 996 | + pr_info("PV spinlocks enabled\n"); |
|---|
| 855 | 997 | |
|---|
| 856 | 998 | __pv_init_lock_hash(); |
|---|
| 857 | | - pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; |
|---|
| 858 | | - pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); |
|---|
| 859 | | - pv_lock_ops.wait = kvm_wait; |
|---|
| 860 | | - pv_lock_ops.kick = kvm_kick_cpu; |
|---|
| 999 | + pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; |
|---|
| 1000 | + pv_ops.lock.queued_spin_unlock = |
|---|
| 1001 | + PV_CALLEE_SAVE(__pv_queued_spin_unlock); |
|---|
| 1002 | + pv_ops.lock.wait = kvm_wait; |
|---|
| 1003 | + pv_ops.lock.kick = kvm_kick_cpu; |
|---|
| 861 | 1004 | |
|---|
| 862 | 1005 | if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { |
|---|
| 863 | | - pv_lock_ops.vcpu_is_preempted = |
|---|
| 1006 | + pv_ops.lock.vcpu_is_preempted = |
|---|
| 864 | 1007 | PV_CALLEE_SAVE(__kvm_vcpu_is_preempted); |
|---|
| 865 | 1008 | } |
|---|
| 1009 | + /* |
|---|
| 1010 | + * When PV spinlock is enabled which is preferred over |
|---|
| 1011 | + * virt_spin_lock(), virt_spin_lock_key's value is meaningless. |
|---|
| 1012 | + * Just disable it anyway. |
|---|
| 1013 | + */ |
|---|
| 1014 | +out: |
|---|
| 1015 | + static_branch_disable(&virt_spin_lock_key); |
|---|
| 866 | 1016 | } |
|---|
| 867 | 1017 | |
|---|
| 868 | 1018 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ |
|---|
| 1019 | + |
|---|
| 1020 | +#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL |
|---|
| 1021 | + |
|---|
| 1022 | +static void kvm_disable_host_haltpoll(void *i) |
|---|
| 1023 | +{ |
|---|
| 1024 | + wrmsrl(MSR_KVM_POLL_CONTROL, 0); |
|---|
| 1025 | +} |
|---|
| 1026 | + |
|---|
| 1027 | +static void kvm_enable_host_haltpoll(void *i) |
|---|
| 1028 | +{ |
|---|
| 1029 | + wrmsrl(MSR_KVM_POLL_CONTROL, 1); |
|---|
| 1030 | +} |
|---|
| 1031 | + |
|---|
| 1032 | +void arch_haltpoll_enable(unsigned int cpu) |
|---|
| 1033 | +{ |
|---|
| 1034 | + if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) { |
|---|
| 1035 | + pr_err_once("host does not support poll control\n"); |
|---|
| 1036 | + pr_err_once("host upgrade recommended\n"); |
|---|
| 1037 | + return; |
|---|
| 1038 | + } |
|---|
| 1039 | + |
|---|
| 1040 | + /* Enable guest halt poll disables host halt poll */ |
|---|
| 1041 | + smp_call_function_single(cpu, kvm_disable_host_haltpoll, NULL, 1); |
|---|
| 1042 | +} |
|---|
| 1043 | +EXPORT_SYMBOL_GPL(arch_haltpoll_enable); |
|---|
| 1044 | + |
|---|
| 1045 | +void arch_haltpoll_disable(unsigned int cpu) |
|---|
| 1046 | +{ |
|---|
| 1047 | + if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) |
|---|
| 1048 | + return; |
|---|
| 1049 | + |
|---|
| 1050 | + /* Disable guest halt poll enables host halt poll */ |
|---|
| 1051 | + smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1); |
|---|
| 1052 | +} |
|---|
| 1053 | +EXPORT_SYMBOL_GPL(arch_haltpoll_disable); |
|---|
| 1054 | +#endif |
|---|