.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * KVM paravirt_ops implementation |
---|
3 | | - * |
---|
4 | | - * This program is free software; you can redistribute it and/or modify |
---|
5 | | - * it under the terms of the GNU General Public License as published by |
---|
6 | | - * the Free Software Foundation; either version 2 of the License, or |
---|
7 | | - * (at your option) any later version. |
---|
8 | | - * |
---|
9 | | - * This program is distributed in the hope that it will be useful, |
---|
10 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
11 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
12 | | - * GNU General Public License for more details. |
---|
13 | | - * |
---|
14 | | - * You should have received a copy of the GNU General Public License |
---|
15 | | - * along with this program; if not, write to the Free Software |
---|
16 | | - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
---|
17 | 4 | * |
---|
18 | 5 | * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com> |
---|
19 | 6 | * Copyright IBM Corporation, 2007 |
---|
20 | 7 | * Authors: Anthony Liguori <aliguori@us.ibm.com> |
---|
21 | 8 | */ |
---|
22 | 9 | |
---|
| 10 | +#define pr_fmt(fmt) "kvm-guest: " fmt |
---|
| 11 | + |
---|
23 | 12 | #include <linux/context_tracking.h> |
---|
24 | 13 | #include <linux/init.h> |
---|
| 14 | +#include <linux/irq.h> |
---|
25 | 15 | #include <linux/kernel.h> |
---|
26 | 16 | #include <linux/kvm_para.h> |
---|
27 | 17 | #include <linux/cpu.h> |
---|
.. | .. |
---|
34 | 24 | #include <linux/sched.h> |
---|
35 | 25 | #include <linux/slab.h> |
---|
36 | 26 | #include <linux/kprobes.h> |
---|
37 | | -#include <linux/debugfs.h> |
---|
38 | 27 | #include <linux/nmi.h> |
---|
39 | 28 | #include <linux/swait.h> |
---|
| 29 | +#include <linux/syscore_ops.h> |
---|
40 | 30 | #include <asm/timer.h> |
---|
41 | 31 | #include <asm/cpu.h> |
---|
42 | 32 | #include <asm/traps.h> |
---|
.. | .. |
---|
46 | 36 | #include <asm/apicdef.h> |
---|
47 | 37 | #include <asm/hypervisor.h> |
---|
48 | 38 | #include <asm/tlb.h> |
---|
| 39 | +#include <asm/cpuidle_haltpoll.h> |
---|
| 40 | +#include <asm/ptrace.h> |
---|
| 41 | +#include <asm/reboot.h> |
---|
| 42 | +#include <asm/svm.h> |
---|
| 43 | + |
---|
| 44 | +DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled); |
---|
49 | 45 | |
---|
50 | 46 | static int kvmapf = 1; |
---|
51 | 47 | |
---|
.. | .. |
---|
67 | 63 | early_param("no-steal-acc", parse_no_stealacc); |
---|
68 | 64 | |
---|
69 | 65 | static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); |
---|
70 | | -static DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64); |
---|
| 66 | +DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible; |
---|
71 | 67 | static int has_steal_clock = 0; |
---|
72 | 68 | |
---|
| 69 | +static int has_guest_poll = 0; |
---|
73 | 70 | /* |
---|
74 | 71 | * No need for any "IO delay" on KVM |
---|
75 | 72 | */ |
---|
.. | .. |
---|
85 | 82 | struct swait_queue_head wq; |
---|
86 | 83 | u32 token; |
---|
87 | 84 | int cpu; |
---|
88 | | - bool halted; |
---|
89 | 85 | }; |
---|
90 | 86 | |
---|
91 | 87 | static struct kvm_task_sleep_head { |
---|
.. | .. |
---|
108 | 104 | return NULL; |
---|
109 | 105 | } |
---|
110 | 106 | |
---|
111 | | -/* |
---|
112 | | - * @interrupt_kernel: Is this called from a routine which interrupts the kernel |
---|
113 | | - * (other than user space)? |
---|
114 | | - */ |
---|
115 | | -void kvm_async_pf_task_wait(u32 token, int interrupt_kernel) |
---|
| 107 | +static bool kvm_async_pf_queue_task(u32 token, struct kvm_task_sleep_node *n) |
---|
116 | 108 | { |
---|
117 | 109 | u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); |
---|
118 | 110 | struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; |
---|
119 | | - struct kvm_task_sleep_node n, *e; |
---|
120 | | - DECLARE_SWAITQUEUE(wait); |
---|
121 | | - |
---|
122 | | - rcu_irq_enter(); |
---|
| 111 | + struct kvm_task_sleep_node *e; |
---|
123 | 112 | |
---|
124 | 113 | raw_spin_lock(&b->lock); |
---|
125 | 114 | e = _find_apf_task(b, token); |
---|
126 | 115 | if (e) { |
---|
127 | 116 | /* dummy entry exist -> wake up was delivered ahead of PF */ |
---|
128 | 117 | hlist_del(&e->link); |
---|
129 | | - kfree(e); |
---|
130 | 118 | raw_spin_unlock(&b->lock); |
---|
131 | | - |
---|
132 | | - rcu_irq_exit(); |
---|
133 | | - return; |
---|
| 119 | + kfree(e); |
---|
| 120 | + return false; |
---|
134 | 121 | } |
---|
135 | 122 | |
---|
136 | | - n.token = token; |
---|
137 | | - n.cpu = smp_processor_id(); |
---|
138 | | - n.halted = is_idle_task(current) || |
---|
139 | | - (IS_ENABLED(CONFIG_PREEMPT_COUNT) |
---|
140 | | - ? preempt_count() > 1 || rcu_preempt_depth() |
---|
141 | | - : interrupt_kernel); |
---|
142 | | - init_swait_queue_head(&n.wq); |
---|
143 | | - hlist_add_head(&n.link, &b->list); |
---|
| 123 | + n->token = token; |
---|
| 124 | + n->cpu = smp_processor_id(); |
---|
| 125 | + init_swait_queue_head(&n->wq); |
---|
| 126 | + hlist_add_head(&n->link, &b->list); |
---|
144 | 127 | raw_spin_unlock(&b->lock); |
---|
| 128 | + return true; |
---|
| 129 | +} |
---|
| 130 | + |
---|
| 131 | +/* |
---|
| 132 | + * kvm_async_pf_task_wait_schedule - Wait for pagefault to be handled |
---|
| 133 | + * @token: Token to identify the sleep node entry |
---|
| 134 | + * |
---|
| 135 | + * Invoked from the async pagefault handling code or from the VM exit page |
---|
| 136 | + * fault handler. In both cases RCU is watching. |
---|
| 137 | + */ |
---|
| 138 | +void kvm_async_pf_task_wait_schedule(u32 token) |
---|
| 139 | +{ |
---|
| 140 | + struct kvm_task_sleep_node n; |
---|
| 141 | + DECLARE_SWAITQUEUE(wait); |
---|
| 142 | + |
---|
| 143 | + lockdep_assert_irqs_disabled(); |
---|
| 144 | + |
---|
| 145 | + if (!kvm_async_pf_queue_task(token, &n)) |
---|
| 146 | + return; |
---|
145 | 147 | |
---|
146 | 148 | for (;;) { |
---|
147 | | - if (!n.halted) |
---|
148 | | - prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE); |
---|
| 149 | + prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE); |
---|
149 | 150 | if (hlist_unhashed(&n.link)) |
---|
150 | 151 | break; |
---|
151 | 152 | |
---|
152 | | - rcu_irq_exit(); |
---|
153 | | - |
---|
154 | | - if (!n.halted) { |
---|
155 | | - local_irq_enable(); |
---|
156 | | - schedule(); |
---|
157 | | - local_irq_disable(); |
---|
158 | | - } else { |
---|
159 | | - /* |
---|
160 | | - * We cannot reschedule. So halt. |
---|
161 | | - */ |
---|
162 | | - native_safe_halt(); |
---|
163 | | - local_irq_disable(); |
---|
164 | | - } |
---|
165 | | - |
---|
166 | | - rcu_irq_enter(); |
---|
| 153 | + local_irq_enable(); |
---|
| 154 | + schedule(); |
---|
| 155 | + local_irq_disable(); |
---|
167 | 156 | } |
---|
168 | | - if (!n.halted) |
---|
169 | | - finish_swait(&n.wq, &wait); |
---|
170 | | - |
---|
171 | | - rcu_irq_exit(); |
---|
172 | | - return; |
---|
| 157 | + finish_swait(&n.wq, &wait); |
---|
173 | 158 | } |
---|
174 | | -EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); |
---|
| 159 | +EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait_schedule); |
---|
175 | 160 | |
---|
176 | 161 | static void apf_task_wake_one(struct kvm_task_sleep_node *n) |
---|
177 | 162 | { |
---|
178 | 163 | hlist_del_init(&n->link); |
---|
179 | | - if (n->halted) |
---|
180 | | - smp_send_reschedule(n->cpu); |
---|
181 | | - else if (swq_has_sleeper(&n->wq)) |
---|
| 164 | + if (swq_has_sleeper(&n->wq)) |
---|
182 | 165 | swake_up_one(&n->wq); |
---|
183 | 166 | } |
---|
184 | 167 | |
---|
.. | .. |
---|
187 | 170 | int i; |
---|
188 | 171 | |
---|
189 | 172 | for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) { |
---|
190 | | - struct hlist_node *p, *next; |
---|
191 | 173 | struct kvm_task_sleep_head *b = &async_pf_sleepers[i]; |
---|
| 174 | + struct kvm_task_sleep_node *n; |
---|
| 175 | + struct hlist_node *p, *next; |
---|
| 176 | + |
---|
192 | 177 | raw_spin_lock(&b->lock); |
---|
193 | 178 | hlist_for_each_safe(p, next, &b->list) { |
---|
194 | | - struct kvm_task_sleep_node *n = |
---|
195 | | - hlist_entry(p, typeof(*n), link); |
---|
| 179 | + n = hlist_entry(p, typeof(*n), link); |
---|
196 | 180 | if (n->cpu == smp_processor_id()) |
---|
197 | 181 | apf_task_wake_one(n); |
---|
198 | 182 | } |
---|
.. | .. |
---|
204 | 188 | { |
---|
205 | 189 | u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); |
---|
206 | 190 | struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; |
---|
207 | | - struct kvm_task_sleep_node *n; |
---|
| 191 | + struct kvm_task_sleep_node *n, *dummy = NULL; |
---|
208 | 192 | |
---|
209 | 193 | if (token == ~0) { |
---|
210 | 194 | apf_task_wake_all(); |
---|
.. | .. |
---|
216 | 200 | n = _find_apf_task(b, token); |
---|
217 | 201 | if (!n) { |
---|
218 | 202 | /* |
---|
219 | | - * async PF was not yet handled. |
---|
220 | | - * Add dummy entry for the token. |
---|
| 203 | + * Async #PF not yet handled, add a dummy entry for the token. |
---|
| 204 | + * Allocating the token must be down outside of the raw lock |
---|
| 205 | + * as the allocator is preemptible on PREEMPT_RT kernels. |
---|
221 | 206 | */ |
---|
222 | | - n = kzalloc(sizeof(*n), GFP_ATOMIC); |
---|
223 | | - if (!n) { |
---|
224 | | - /* |
---|
225 | | - * Allocation failed! Busy wait while other cpu |
---|
226 | | - * handles async PF. |
---|
227 | | - */ |
---|
| 207 | + if (!dummy) { |
---|
228 | 208 | raw_spin_unlock(&b->lock); |
---|
229 | | - cpu_relax(); |
---|
| 209 | + dummy = kzalloc(sizeof(*dummy), GFP_ATOMIC); |
---|
| 210 | + |
---|
| 211 | + /* |
---|
| 212 | + * Continue looping on allocation failure, eventually |
---|
| 213 | + * the async #PF will be handled and allocating a new |
---|
| 214 | + * node will be unnecessary. |
---|
| 215 | + */ |
---|
| 216 | + if (!dummy) |
---|
| 217 | + cpu_relax(); |
---|
| 218 | + |
---|
| 219 | + /* |
---|
| 220 | + * Recheck for async #PF completion before enqueueing |
---|
| 221 | + * the dummy token to avoid duplicate list entries. |
---|
| 222 | + */ |
---|
230 | 223 | goto again; |
---|
231 | 224 | } |
---|
232 | | - n->token = token; |
---|
233 | | - n->cpu = smp_processor_id(); |
---|
234 | | - init_swait_queue_head(&n->wq); |
---|
235 | | - hlist_add_head(&n->link, &b->list); |
---|
236 | | - } else |
---|
| 225 | + dummy->token = token; |
---|
| 226 | + dummy->cpu = smp_processor_id(); |
---|
| 227 | + init_swait_queue_head(&dummy->wq); |
---|
| 228 | + hlist_add_head(&dummy->link, &b->list); |
---|
| 229 | + dummy = NULL; |
---|
| 230 | + } else { |
---|
237 | 231 | apf_task_wake_one(n); |
---|
| 232 | + } |
---|
238 | 233 | raw_spin_unlock(&b->lock); |
---|
239 | | - return; |
---|
| 234 | + |
---|
| 235 | + /* A dummy token might be allocated and ultimately not used. */ |
---|
| 236 | + if (dummy) |
---|
| 237 | + kfree(dummy); |
---|
240 | 238 | } |
---|
241 | 239 | EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); |
---|
242 | 240 | |
---|
243 | | -u32 kvm_read_and_reset_pf_reason(void) |
---|
| 241 | +noinstr u32 kvm_read_and_reset_apf_flags(void) |
---|
244 | 242 | { |
---|
245 | | - u32 reason = 0; |
---|
| 243 | + u32 flags = 0; |
---|
246 | 244 | |
---|
247 | 245 | if (__this_cpu_read(apf_reason.enabled)) { |
---|
248 | | - reason = __this_cpu_read(apf_reason.reason); |
---|
249 | | - __this_cpu_write(apf_reason.reason, 0); |
---|
| 246 | + flags = __this_cpu_read(apf_reason.flags); |
---|
| 247 | + __this_cpu_write(apf_reason.flags, 0); |
---|
250 | 248 | } |
---|
251 | 249 | |
---|
252 | | - return reason; |
---|
| 250 | + return flags; |
---|
253 | 251 | } |
---|
254 | | -EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); |
---|
255 | | -NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason); |
---|
| 252 | +EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags); |
---|
256 | 253 | |
---|
257 | | -dotraplinkage void |
---|
258 | | -do_async_page_fault(struct pt_regs *regs, unsigned long error_code) |
---|
| 254 | +noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token) |
---|
259 | 255 | { |
---|
260 | | - enum ctx_state prev_state; |
---|
| 256 | + u32 flags = kvm_read_and_reset_apf_flags(); |
---|
| 257 | + irqentry_state_t state; |
---|
261 | 258 | |
---|
262 | | - switch (kvm_read_and_reset_pf_reason()) { |
---|
263 | | - default: |
---|
264 | | - do_page_fault(regs, error_code); |
---|
265 | | - break; |
---|
266 | | - case KVM_PV_REASON_PAGE_NOT_PRESENT: |
---|
267 | | - /* page is swapped out by the host. */ |
---|
268 | | - prev_state = exception_enter(); |
---|
269 | | - kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs)); |
---|
270 | | - exception_exit(prev_state); |
---|
271 | | - break; |
---|
272 | | - case KVM_PV_REASON_PAGE_READY: |
---|
273 | | - rcu_irq_enter(); |
---|
274 | | - kvm_async_pf_task_wake((u32)read_cr2()); |
---|
275 | | - rcu_irq_exit(); |
---|
276 | | - break; |
---|
| 259 | + if (!flags) |
---|
| 260 | + return false; |
---|
| 261 | + |
---|
| 262 | + state = irqentry_enter(regs); |
---|
| 263 | + instrumentation_begin(); |
---|
| 264 | + |
---|
| 265 | + /* |
---|
| 266 | + * If the host managed to inject an async #PF into an interrupt |
---|
| 267 | + * disabled region, then die hard as this is not going to end well |
---|
| 268 | + * and the host side is seriously broken. |
---|
| 269 | + */ |
---|
| 270 | + if (unlikely(!(regs->flags & X86_EFLAGS_IF))) |
---|
| 271 | + panic("Host injected async #PF in interrupt disabled region\n"); |
---|
| 272 | + |
---|
| 273 | + if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) { |
---|
| 274 | + if (unlikely(!(user_mode(regs)))) |
---|
| 275 | + panic("Host injected async #PF in kernel mode\n"); |
---|
| 276 | + /* Page is swapped out by the host. */ |
---|
| 277 | + kvm_async_pf_task_wait_schedule(token); |
---|
| 278 | + } else { |
---|
| 279 | + WARN_ONCE(1, "Unexpected async PF flags: %x\n", flags); |
---|
277 | 280 | } |
---|
| 281 | + |
---|
| 282 | + instrumentation_end(); |
---|
| 283 | + irqentry_exit(regs, state); |
---|
| 284 | + return true; |
---|
278 | 285 | } |
---|
279 | | -NOKPROBE_SYMBOL(do_async_page_fault); |
---|
| 286 | + |
---|
| 287 | +DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt) |
---|
| 288 | +{ |
---|
| 289 | + struct pt_regs *old_regs = set_irq_regs(regs); |
---|
| 290 | + u32 token; |
---|
| 291 | + |
---|
| 292 | + ack_APIC_irq(); |
---|
| 293 | + |
---|
| 294 | + inc_irq_stat(irq_hv_callback_count); |
---|
| 295 | + |
---|
| 296 | + if (__this_cpu_read(apf_reason.enabled)) { |
---|
| 297 | + token = __this_cpu_read(apf_reason.token); |
---|
| 298 | + kvm_async_pf_task_wake(token); |
---|
| 299 | + __this_cpu_write(apf_reason.token, 0); |
---|
| 300 | + wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1); |
---|
| 301 | + } |
---|
| 302 | + |
---|
| 303 | + set_irq_regs(old_regs); |
---|
| 304 | +} |
---|
280 | 305 | |
---|
281 | 306 | static void __init paravirt_ops_setup(void) |
---|
282 | 307 | { |
---|
283 | 308 | pv_info.name = "KVM"; |
---|
284 | 309 | |
---|
285 | 310 | if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) |
---|
286 | | - pv_cpu_ops.io_delay = kvm_io_delay; |
---|
| 311 | + pv_ops.cpu.io_delay = kvm_io_delay; |
---|
287 | 312 | |
---|
288 | 313 | #ifdef CONFIG_X86_IO_APIC |
---|
289 | 314 | no_timer_check = 1; |
---|
.. | .. |
---|
299 | 324 | return; |
---|
300 | 325 | |
---|
301 | 326 | wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED)); |
---|
302 | | - pr_info("kvm-stealtime: cpu %d, msr %llx\n", |
---|
303 | | - cpu, (unsigned long long) slow_virt_to_phys(st)); |
---|
| 327 | + pr_info("stealtime: cpu %d, msr %llx\n", cpu, |
---|
| 328 | + (unsigned long long) slow_virt_to_phys(st)); |
---|
304 | 329 | } |
---|
305 | 330 | |
---|
306 | 331 | static DEFINE_PER_CPU_DECRYPTED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; |
---|
.. | .. |
---|
321 | 346 | |
---|
322 | 347 | static void kvm_guest_cpu_init(void) |
---|
323 | 348 | { |
---|
324 | | - if (!kvm_para_available()) |
---|
325 | | - return; |
---|
326 | | - |
---|
327 | | - if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { |
---|
| 349 | + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) { |
---|
328 | 350 | u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason)); |
---|
329 | 351 | |
---|
330 | | -#ifdef CONFIG_PREEMPT |
---|
331 | | - pa |= KVM_ASYNC_PF_SEND_ALWAYS; |
---|
332 | | -#endif |
---|
333 | | - pa |= KVM_ASYNC_PF_ENABLED; |
---|
| 352 | + WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled)); |
---|
| 353 | + |
---|
| 354 | + pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason)); |
---|
| 355 | + pa |= KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT; |
---|
334 | 356 | |
---|
335 | 357 | if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT)) |
---|
336 | 358 | pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT; |
---|
337 | 359 | |
---|
| 360 | + wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR); |
---|
| 361 | + |
---|
338 | 362 | wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); |
---|
339 | 363 | __this_cpu_write(apf_reason.enabled, 1); |
---|
340 | | - printk(KERN_INFO"KVM setup async PF for cpu %d\n", |
---|
341 | | - smp_processor_id()); |
---|
| 364 | + pr_info("KVM setup async PF for cpu %d\n", smp_processor_id()); |
---|
342 | 365 | } |
---|
343 | 366 | |
---|
344 | 367 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) { |
---|
345 | 368 | unsigned long pa; |
---|
| 369 | + |
---|
346 | 370 | /* Size alignment is implied but just to make it explicit. */ |
---|
347 | 371 | BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); |
---|
348 | 372 | __this_cpu_write(kvm_apic_eoi, 0); |
---|
.. | .. |
---|
363 | 387 | wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); |
---|
364 | 388 | __this_cpu_write(apf_reason.enabled, 0); |
---|
365 | 389 | |
---|
366 | | - printk(KERN_INFO"Unregister pv shared memory for cpu %d\n", |
---|
367 | | - smp_processor_id()); |
---|
| 390 | + pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id()); |
---|
| 391 | +} |
---|
| 392 | + |
---|
| 393 | +static void kvm_disable_steal_time(void) |
---|
| 394 | +{ |
---|
| 395 | + if (!has_steal_clock) |
---|
| 396 | + return; |
---|
| 397 | + |
---|
| 398 | + wrmsr(MSR_KVM_STEAL_TIME, 0, 0); |
---|
368 | 399 | } |
---|
369 | 400 | |
---|
370 | 401 | static void kvm_pv_guest_cpu_reboot(void *unused) |
---|
.. | .. |
---|
409 | 440 | return steal; |
---|
410 | 441 | } |
---|
411 | 442 | |
---|
412 | | -void kvm_disable_steal_time(void) |
---|
413 | | -{ |
---|
414 | | - if (!has_steal_clock) |
---|
415 | | - return; |
---|
416 | | - |
---|
417 | | - wrmsr(MSR_KVM_STEAL_TIME, 0, 0); |
---|
418 | | -} |
---|
419 | | - |
---|
420 | 443 | static inline void __set_percpu_decrypted(void *ptr, unsigned long size) |
---|
421 | 444 | { |
---|
422 | 445 | early_set_memory_decrypted((unsigned long) ptr, size); |
---|
.. | .. |
---|
444 | 467 | } |
---|
445 | 468 | } |
---|
446 | 469 | |
---|
| 470 | +static bool pv_tlb_flush_supported(void) |
---|
| 471 | +{ |
---|
| 472 | + return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && |
---|
| 473 | + !kvm_para_has_hint(KVM_HINTS_REALTIME) && |
---|
| 474 | + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)); |
---|
| 475 | +} |
---|
| 476 | + |
---|
| 477 | +static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask); |
---|
| 478 | + |
---|
| 479 | +static void kvm_guest_cpu_offline(bool shutdown) |
---|
| 480 | +{ |
---|
| 481 | + kvm_disable_steal_time(); |
---|
| 482 | + if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) |
---|
| 483 | + wrmsrl(MSR_KVM_PV_EOI_EN, 0); |
---|
| 484 | + kvm_pv_disable_apf(); |
---|
| 485 | + if (!shutdown) |
---|
| 486 | + apf_task_wake_all(); |
---|
| 487 | + kvmclock_disable(); |
---|
| 488 | +} |
---|
| 489 | + |
---|
| 490 | +static int kvm_cpu_online(unsigned int cpu) |
---|
| 491 | +{ |
---|
| 492 | + unsigned long flags; |
---|
| 493 | + |
---|
| 494 | + local_irq_save(flags); |
---|
| 495 | + kvm_guest_cpu_init(); |
---|
| 496 | + local_irq_restore(flags); |
---|
| 497 | + return 0; |
---|
| 498 | +} |
---|
| 499 | + |
---|
447 | 500 | #ifdef CONFIG_SMP |
---|
| 501 | + |
---|
| 502 | +static bool pv_ipi_supported(void) |
---|
| 503 | +{ |
---|
| 504 | + return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI); |
---|
| 505 | +} |
---|
| 506 | + |
---|
| 507 | +static bool pv_sched_yield_supported(void) |
---|
| 508 | +{ |
---|
| 509 | + return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) && |
---|
| 510 | + !kvm_para_has_hint(KVM_HINTS_REALTIME) && |
---|
| 511 | + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)); |
---|
| 512 | +} |
---|
| 513 | + |
---|
448 | 514 | #define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG) |
---|
449 | 515 | |
---|
450 | 516 | static void __send_ipi_mask(const struct cpumask *mask, int vector) |
---|
.. | .. |
---|
480 | 546 | } else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) { |
---|
481 | 547 | ipi_bitmap <<= min - apic_id; |
---|
482 | 548 | min = apic_id; |
---|
483 | | - } else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) { |
---|
| 549 | + } else if (apic_id > min && apic_id < min + KVM_IPI_CLUSTER_SIZE) { |
---|
484 | 550 | max = apic_id < max ? max : apic_id; |
---|
485 | 551 | } else { |
---|
486 | 552 | ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, |
---|
487 | 553 | (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr); |
---|
488 | | - WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret); |
---|
| 554 | + WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld", |
---|
| 555 | + ret); |
---|
489 | 556 | min = max = apic_id; |
---|
490 | 557 | ipi_bitmap = 0; |
---|
491 | 558 | } |
---|
.. | .. |
---|
495 | 562 | if (ipi_bitmap) { |
---|
496 | 563 | ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, |
---|
497 | 564 | (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr); |
---|
498 | | - WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret); |
---|
| 565 | + WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld", |
---|
| 566 | + ret); |
---|
499 | 567 | } |
---|
500 | 568 | |
---|
501 | 569 | local_irq_restore(flags); |
---|
.. | .. |
---|
509 | 577 | static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) |
---|
510 | 578 | { |
---|
511 | 579 | unsigned int this_cpu = smp_processor_id(); |
---|
512 | | - struct cpumask new_mask; |
---|
| 580 | + struct cpumask *new_mask = this_cpu_cpumask_var_ptr(__pv_cpu_mask); |
---|
513 | 581 | const struct cpumask *local_mask; |
---|
514 | 582 | |
---|
515 | | - cpumask_copy(&new_mask, mask); |
---|
516 | | - cpumask_clear_cpu(this_cpu, &new_mask); |
---|
517 | | - local_mask = &new_mask; |
---|
| 583 | + cpumask_copy(new_mask, mask); |
---|
| 584 | + cpumask_clear_cpu(this_cpu, new_mask); |
---|
| 585 | + local_mask = new_mask; |
---|
518 | 586 | __send_ipi_mask(local_mask, vector); |
---|
519 | | -} |
---|
520 | | - |
---|
521 | | -static void kvm_send_ipi_allbutself(int vector) |
---|
522 | | -{ |
---|
523 | | - kvm_send_ipi_mask_allbutself(cpu_online_mask, vector); |
---|
524 | | -} |
---|
525 | | - |
---|
526 | | -static void kvm_send_ipi_all(int vector) |
---|
527 | | -{ |
---|
528 | | - __send_ipi_mask(cpu_online_mask, vector); |
---|
529 | 587 | } |
---|
530 | 588 | |
---|
531 | 589 | /* |
---|
.. | .. |
---|
535 | 593 | { |
---|
536 | 594 | apic->send_IPI_mask = kvm_send_ipi_mask; |
---|
537 | 595 | apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself; |
---|
538 | | - apic->send_IPI_allbutself = kvm_send_ipi_allbutself; |
---|
539 | | - apic->send_IPI_all = kvm_send_ipi_all; |
---|
540 | | - pr_info("KVM setup pv IPIs\n"); |
---|
| 596 | + pr_info("setup PV IPIs\n"); |
---|
541 | 597 | } |
---|
542 | 598 | |
---|
543 | | -static void __init kvm_smp_prepare_cpus(unsigned int max_cpus) |
---|
| 599 | +static void kvm_smp_send_call_func_ipi(const struct cpumask *mask) |
---|
544 | 600 | { |
---|
545 | | - native_smp_prepare_cpus(max_cpus); |
---|
546 | | - if (kvm_para_has_hint(KVM_HINTS_REALTIME)) |
---|
547 | | - static_branch_disable(&virt_spin_lock_key); |
---|
| 601 | + int cpu; |
---|
| 602 | + |
---|
| 603 | + native_send_call_func_ipi(mask); |
---|
| 604 | + |
---|
| 605 | + /* Make sure other vCPUs get a chance to run if they need to. */ |
---|
| 606 | + for_each_cpu(cpu, mask) { |
---|
| 607 | + if (vcpu_is_preempted(cpu)) { |
---|
| 608 | + kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu)); |
---|
| 609 | + break; |
---|
| 610 | + } |
---|
| 611 | + } |
---|
548 | 612 | } |
---|
549 | 613 | |
---|
550 | 614 | static void __init kvm_smp_prepare_boot_cpu(void) |
---|
.. | .. |
---|
560 | 624 | kvm_spinlock_init(); |
---|
561 | 625 | } |
---|
562 | 626 | |
---|
563 | | -static void kvm_guest_cpu_offline(void) |
---|
564 | | -{ |
---|
565 | | - kvm_disable_steal_time(); |
---|
566 | | - if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) |
---|
567 | | - wrmsrl(MSR_KVM_PV_EOI_EN, 0); |
---|
568 | | - kvm_pv_disable_apf(); |
---|
569 | | - apf_task_wake_all(); |
---|
570 | | -} |
---|
571 | | - |
---|
572 | | -static int kvm_cpu_online(unsigned int cpu) |
---|
573 | | -{ |
---|
574 | | - local_irq_disable(); |
---|
575 | | - kvm_guest_cpu_init(); |
---|
576 | | - local_irq_enable(); |
---|
577 | | - return 0; |
---|
578 | | -} |
---|
579 | | - |
---|
580 | 627 | static int kvm_cpu_down_prepare(unsigned int cpu) |
---|
581 | 628 | { |
---|
582 | | - local_irq_disable(); |
---|
583 | | - kvm_guest_cpu_offline(); |
---|
584 | | - local_irq_enable(); |
---|
| 629 | + unsigned long flags; |
---|
| 630 | + |
---|
| 631 | + local_irq_save(flags); |
---|
| 632 | + kvm_guest_cpu_offline(false); |
---|
| 633 | + local_irq_restore(flags); |
---|
585 | 634 | return 0; |
---|
586 | 635 | } |
---|
| 636 | + |
---|
587 | 637 | #endif |
---|
588 | 638 | |
---|
589 | | -static void __init kvm_apf_trap_init(void) |
---|
| 639 | +static int kvm_suspend(void) |
---|
590 | 640 | { |
---|
591 | | - update_intr_gate(X86_TRAP_PF, async_page_fault); |
---|
| 641 | + u64 val = 0; |
---|
| 642 | + |
---|
| 643 | + kvm_guest_cpu_offline(false); |
---|
| 644 | + |
---|
| 645 | +#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL |
---|
| 646 | + if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) |
---|
| 647 | + rdmsrl(MSR_KVM_POLL_CONTROL, val); |
---|
| 648 | + has_guest_poll = !(val & 1); |
---|
| 649 | +#endif |
---|
| 650 | + return 0; |
---|
592 | 651 | } |
---|
593 | 652 | |
---|
594 | | -static DEFINE_PER_CPU(cpumask_var_t, __pv_tlb_mask); |
---|
| 653 | +static void kvm_resume(void) |
---|
| 654 | +{ |
---|
| 655 | + kvm_cpu_online(raw_smp_processor_id()); |
---|
| 656 | + |
---|
| 657 | +#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL |
---|
| 658 | + if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL) && has_guest_poll) |
---|
| 659 | + wrmsrl(MSR_KVM_POLL_CONTROL, 0); |
---|
| 660 | +#endif |
---|
| 661 | +} |
---|
| 662 | + |
---|
| 663 | +static struct syscore_ops kvm_syscore_ops = { |
---|
| 664 | + .suspend = kvm_suspend, |
---|
| 665 | + .resume = kvm_resume, |
---|
| 666 | +}; |
---|
| 667 | + |
---|
| 668 | +/* |
---|
| 669 | + * After a PV feature is registered, the host will keep writing to the |
---|
| 670 | + * registered memory location. If the guest happens to shutdown, this memory |
---|
| 671 | + * won't be valid. In cases like kexec, in which you install a new kernel, this |
---|
| 672 | + * means a random memory location will be kept being written. |
---|
| 673 | + */ |
---|
| 674 | +#ifdef CONFIG_KEXEC_CORE |
---|
| 675 | +static void kvm_crash_shutdown(struct pt_regs *regs) |
---|
| 676 | +{ |
---|
| 677 | + kvm_guest_cpu_offline(true); |
---|
| 678 | + native_machine_crash_shutdown(regs); |
---|
| 679 | +} |
---|
| 680 | +#endif |
---|
595 | 681 | |
---|
596 | 682 | static void kvm_flush_tlb_others(const struct cpumask *cpumask, |
---|
597 | 683 | const struct flush_tlb_info *info) |
---|
.. | .. |
---|
599 | 685 | u8 state; |
---|
600 | 686 | int cpu; |
---|
601 | 687 | struct kvm_steal_time *src; |
---|
602 | | - struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_tlb_mask); |
---|
| 688 | + struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask); |
---|
603 | 689 | |
---|
604 | 690 | cpumask_copy(flushmask, cpumask); |
---|
605 | 691 | /* |
---|
.. | .. |
---|
623 | 709 | { |
---|
624 | 710 | int i; |
---|
625 | 711 | |
---|
626 | | - if (!kvm_para_available()) |
---|
627 | | - return; |
---|
628 | | - |
---|
629 | 712 | paravirt_ops_setup(); |
---|
630 | 713 | register_reboot_notifier(&kvm_pv_reboot_nb); |
---|
631 | 714 | for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) |
---|
632 | 715 | raw_spin_lock_init(&async_pf_sleepers[i].lock); |
---|
633 | | - if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) |
---|
634 | | - x86_init.irqs.trap_init = kvm_apf_trap_init; |
---|
635 | 716 | |
---|
636 | 717 | if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { |
---|
637 | 718 | has_steal_clock = 1; |
---|
638 | | - pv_time_ops.steal_clock = kvm_steal_clock; |
---|
| 719 | + pv_ops.time.steal_clock = kvm_steal_clock; |
---|
639 | 720 | } |
---|
640 | 721 | |
---|
641 | | - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && |
---|
642 | | - !kvm_para_has_hint(KVM_HINTS_REALTIME) && |
---|
643 | | - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { |
---|
644 | | - pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; |
---|
645 | | - pv_mmu_ops.tlb_remove_table = tlb_remove_table; |
---|
| 722 | + if (pv_tlb_flush_supported()) { |
---|
| 723 | + pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others; |
---|
| 724 | + pv_ops.mmu.tlb_remove_table = tlb_remove_table; |
---|
| 725 | + pr_info("KVM setup pv remote TLB flush\n"); |
---|
646 | 726 | } |
---|
647 | 727 | |
---|
648 | 728 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) |
---|
649 | 729 | apic_set_eoi_write(kvm_guest_apic_eoi_write); |
---|
650 | 730 | |
---|
| 731 | + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) { |
---|
| 732 | + static_branch_enable(&kvm_async_pf_enabled); |
---|
| 733 | + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_kvm_asyncpf_interrupt); |
---|
| 734 | + } |
---|
| 735 | + |
---|
651 | 736 | #ifdef CONFIG_SMP |
---|
652 | | - smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus; |
---|
653 | 737 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
---|
| 738 | + if (pv_sched_yield_supported()) { |
---|
| 739 | + smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi; |
---|
| 740 | + pr_info("setup PV sched yield\n"); |
---|
| 741 | + } |
---|
654 | 742 | if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online", |
---|
655 | 743 | kvm_cpu_online, kvm_cpu_down_prepare) < 0) |
---|
656 | | - pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n"); |
---|
| 744 | + pr_err("failed to install cpu hotplug callbacks\n"); |
---|
657 | 745 | #else |
---|
658 | 746 | sev_map_percpu_data(); |
---|
659 | 747 | kvm_guest_cpu_init(); |
---|
660 | 748 | #endif |
---|
| 749 | + |
---|
| 750 | +#ifdef CONFIG_KEXEC_CORE |
---|
| 751 | + machine_ops.crash_shutdown = kvm_crash_shutdown; |
---|
| 752 | +#endif |
---|
| 753 | + |
---|
| 754 | + register_syscore_ops(&kvm_syscore_ops); |
---|
661 | 755 | |
---|
662 | 756 | /* |
---|
663 | 757 | * Hard lockup detection is enabled by default. Disable it, as guests |
---|
.. | .. |
---|
703 | 797 | { |
---|
704 | 798 | return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES); |
---|
705 | 799 | } |
---|
| 800 | +EXPORT_SYMBOL_GPL(kvm_arch_para_hints); |
---|
706 | 801 | |
---|
707 | 802 | static uint32_t __init kvm_detect(void) |
---|
708 | 803 | { |
---|
.. | .. |
---|
712 | 807 | static void __init kvm_apic_init(void) |
---|
713 | 808 | { |
---|
714 | 809 | #if defined(CONFIG_SMP) |
---|
715 | | - if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI)) |
---|
| 810 | + if (pv_ipi_supported()) |
---|
716 | 811 | kvm_setup_pv_ipi(); |
---|
717 | 812 | #endif |
---|
718 | 813 | } |
---|
.. | .. |
---|
723 | 818 | x86_platform.apic_post_init = kvm_apic_init; |
---|
724 | 819 | } |
---|
725 | 820 | |
---|
| 821 | +#if defined(CONFIG_AMD_MEM_ENCRYPT) |
---|
| 822 | +static void kvm_sev_es_hcall_prepare(struct ghcb *ghcb, struct pt_regs *regs) |
---|
| 823 | +{ |
---|
| 824 | + /* RAX and CPL are already in the GHCB */ |
---|
| 825 | + ghcb_set_rbx(ghcb, regs->bx); |
---|
| 826 | + ghcb_set_rcx(ghcb, regs->cx); |
---|
| 827 | + ghcb_set_rdx(ghcb, regs->dx); |
---|
| 828 | + ghcb_set_rsi(ghcb, regs->si); |
---|
| 829 | +} |
---|
| 830 | + |
---|
| 831 | +static bool kvm_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs) |
---|
| 832 | +{ |
---|
| 833 | + /* No checking of the return state needed */ |
---|
| 834 | + return true; |
---|
| 835 | +} |
---|
| 836 | +#endif |
---|
| 837 | + |
---|
726 | 838 | const __initconst struct hypervisor_x86 x86_hyper_kvm = { |
---|
727 | | - .name = "KVM", |
---|
728 | | - .detect = kvm_detect, |
---|
729 | | - .type = X86_HYPER_KVM, |
---|
730 | | - .init.guest_late_init = kvm_guest_init, |
---|
731 | | - .init.x2apic_available = kvm_para_available, |
---|
732 | | - .init.init_platform = kvm_init_platform, |
---|
| 839 | + .name = "KVM", |
---|
| 840 | + .detect = kvm_detect, |
---|
| 841 | + .type = X86_HYPER_KVM, |
---|
| 842 | + .init.guest_late_init = kvm_guest_init, |
---|
| 843 | + .init.x2apic_available = kvm_para_available, |
---|
| 844 | + .init.init_platform = kvm_init_platform, |
---|
| 845 | +#if defined(CONFIG_AMD_MEM_ENCRYPT) |
---|
| 846 | + .runtime.sev_es_hcall_prepare = kvm_sev_es_hcall_prepare, |
---|
| 847 | + .runtime.sev_es_hcall_finish = kvm_sev_es_hcall_finish, |
---|
| 848 | +#endif |
---|
733 | 849 | }; |
---|
734 | 850 | |
---|
735 | 851 | static __init int activate_jump_labels(void) |
---|
.. | .. |
---|
744 | 860 | } |
---|
745 | 861 | arch_initcall(activate_jump_labels); |
---|
746 | 862 | |
---|
747 | | -static __init int kvm_setup_pv_tlb_flush(void) |
---|
| 863 | +static __init int kvm_alloc_cpumask(void) |
---|
748 | 864 | { |
---|
749 | 865 | int cpu; |
---|
| 866 | + bool alloc = false; |
---|
750 | 867 | |
---|
751 | | - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && |
---|
752 | | - !kvm_para_has_hint(KVM_HINTS_REALTIME) && |
---|
753 | | - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { |
---|
| 868 | + if (!kvm_para_available() || nopv) |
---|
| 869 | + return 0; |
---|
| 870 | + |
---|
| 871 | + if (pv_tlb_flush_supported()) |
---|
| 872 | + alloc = true; |
---|
| 873 | + |
---|
| 874 | +#if defined(CONFIG_SMP) |
---|
| 875 | + if (pv_ipi_supported()) |
---|
| 876 | + alloc = true; |
---|
| 877 | +#endif |
---|
| 878 | + |
---|
| 879 | + if (alloc) |
---|
754 | 880 | for_each_possible_cpu(cpu) { |
---|
755 | | - zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), |
---|
| 881 | + zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu), |
---|
756 | 882 | GFP_KERNEL, cpu_to_node(cpu)); |
---|
757 | 883 | } |
---|
758 | | - pr_info("KVM setup pv remote TLB flush\n"); |
---|
759 | | - } |
---|
760 | 884 | |
---|
761 | 885 | return 0; |
---|
762 | 886 | } |
---|
763 | | -arch_initcall(kvm_setup_pv_tlb_flush); |
---|
| 887 | +arch_initcall(kvm_alloc_cpumask); |
---|
764 | 888 | |
---|
765 | 889 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |
---|
766 | 890 | |
---|
.. | .. |
---|
829 | 953 | "movq __per_cpu_offset(,%rdi,8), %rax;" |
---|
830 | 954 | "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);" |
---|
831 | 955 | "setne %al;" |
---|
832 | | -"ret;" |
---|
| 956 | +ASM_RET |
---|
833 | 957 | ".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;" |
---|
834 | 958 | ".popsection"); |
---|
835 | 959 | |
---|
.. | .. |
---|
840 | 964 | */ |
---|
841 | 965 | void __init kvm_spinlock_init(void) |
---|
842 | 966 | { |
---|
843 | | - if (!kvm_para_available()) |
---|
| 967 | + /* |
---|
| 968 | + * In case host doesn't support KVM_FEATURE_PV_UNHALT there is still an |
---|
| 969 | + * advantage of keeping virt_spin_lock_key enabled: virt_spin_lock() is |
---|
| 970 | + * preferred over native qspinlock when vCPU is preempted. |
---|
| 971 | + */ |
---|
| 972 | + if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) { |
---|
| 973 | + pr_info("PV spinlocks disabled, no host support\n"); |
---|
844 | 974 | return; |
---|
845 | | - /* Does host kernel support KVM_FEATURE_PV_UNHALT? */ |
---|
846 | | - if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) |
---|
847 | | - return; |
---|
| 975 | + } |
---|
848 | 976 | |
---|
849 | | - if (kvm_para_has_hint(KVM_HINTS_REALTIME)) |
---|
850 | | - return; |
---|
| 977 | + /* |
---|
| 978 | + * Disable PV spinlocks and use native qspinlock when dedicated pCPUs |
---|
| 979 | + * are available. |
---|
| 980 | + */ |
---|
| 981 | + if (kvm_para_has_hint(KVM_HINTS_REALTIME)) { |
---|
| 982 | + pr_info("PV spinlocks disabled with KVM_HINTS_REALTIME hints\n"); |
---|
| 983 | + goto out; |
---|
| 984 | + } |
---|
851 | 985 | |
---|
852 | | - /* Don't use the pvqspinlock code if there is only 1 vCPU. */ |
---|
853 | | - if (num_possible_cpus() == 1) |
---|
854 | | - return; |
---|
| 986 | + if (num_possible_cpus() == 1) { |
---|
| 987 | + pr_info("PV spinlocks disabled, single CPU\n"); |
---|
| 988 | + goto out; |
---|
| 989 | + } |
---|
| 990 | + |
---|
| 991 | + if (nopvspin) { |
---|
| 992 | + pr_info("PV spinlocks disabled, forced by \"nopvspin\" parameter\n"); |
---|
| 993 | + goto out; |
---|
| 994 | + } |
---|
| 995 | + |
---|
| 996 | + pr_info("PV spinlocks enabled\n"); |
---|
855 | 997 | |
---|
856 | 998 | __pv_init_lock_hash(); |
---|
857 | | - pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; |
---|
858 | | - pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); |
---|
859 | | - pv_lock_ops.wait = kvm_wait; |
---|
860 | | - pv_lock_ops.kick = kvm_kick_cpu; |
---|
| 999 | + pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; |
---|
| 1000 | + pv_ops.lock.queued_spin_unlock = |
---|
| 1001 | + PV_CALLEE_SAVE(__pv_queued_spin_unlock); |
---|
| 1002 | + pv_ops.lock.wait = kvm_wait; |
---|
| 1003 | + pv_ops.lock.kick = kvm_kick_cpu; |
---|
861 | 1004 | |
---|
862 | 1005 | if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { |
---|
863 | | - pv_lock_ops.vcpu_is_preempted = |
---|
| 1006 | + pv_ops.lock.vcpu_is_preempted = |
---|
864 | 1007 | PV_CALLEE_SAVE(__kvm_vcpu_is_preempted); |
---|
865 | 1008 | } |
---|
| 1009 | + /* |
---|
| 1010 | + * When PV spinlock is enabled which is preferred over |
---|
| 1011 | + * virt_spin_lock(), virt_spin_lock_key's value is meaningless. |
---|
| 1012 | + * Just disable it anyway. |
---|
| 1013 | + */ |
---|
| 1014 | +out: |
---|
| 1015 | + static_branch_disable(&virt_spin_lock_key); |
---|
866 | 1016 | } |
---|
867 | 1017 | |
---|
868 | 1018 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ |
---|
| 1019 | + |
---|
| 1020 | +#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL |
---|
| 1021 | + |
---|
| 1022 | +static void kvm_disable_host_haltpoll(void *i) |
---|
| 1023 | +{ |
---|
| 1024 | + wrmsrl(MSR_KVM_POLL_CONTROL, 0); |
---|
| 1025 | +} |
---|
| 1026 | + |
---|
| 1027 | +static void kvm_enable_host_haltpoll(void *i) |
---|
| 1028 | +{ |
---|
| 1029 | + wrmsrl(MSR_KVM_POLL_CONTROL, 1); |
---|
| 1030 | +} |
---|
| 1031 | + |
---|
| 1032 | +void arch_haltpoll_enable(unsigned int cpu) |
---|
| 1033 | +{ |
---|
| 1034 | + if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) { |
---|
| 1035 | + pr_err_once("host does not support poll control\n"); |
---|
| 1036 | + pr_err_once("host upgrade recommended\n"); |
---|
| 1037 | + return; |
---|
| 1038 | + } |
---|
| 1039 | + |
---|
| 1040 | + /* Enable guest halt poll disables host halt poll */ |
---|
| 1041 | + smp_call_function_single(cpu, kvm_disable_host_haltpoll, NULL, 1); |
---|
| 1042 | +} |
---|
| 1043 | +EXPORT_SYMBOL_GPL(arch_haltpoll_enable); |
---|
| 1044 | + |
---|
| 1045 | +void arch_haltpoll_disable(unsigned int cpu) |
---|
| 1046 | +{ |
---|
| 1047 | + if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) |
---|
| 1048 | + return; |
---|
| 1049 | + |
---|
| 1050 | + /* Disable guest halt poll enables host halt poll */ |
---|
| 1051 | + smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1); |
---|
| 1052 | +} |
---|
| 1053 | +EXPORT_SYMBOL_GPL(arch_haltpoll_disable); |
---|
| 1054 | +#endif |
---|