.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * X86 specific Hyper-V initialization code. |
---|
3 | 4 | * |
---|
4 | 5 | * Copyright (C) 2016, Microsoft, Inc. |
---|
5 | 6 | * |
---|
6 | 7 | * Author : K. Y. Srinivasan <kys@microsoft.com> |
---|
7 | | - * |
---|
8 | | - * This program is free software; you can redistribute it and/or modify it |
---|
9 | | - * under the terms of the GNU General Public License version 2 as published |
---|
10 | | - * by the Free Software Foundation. |
---|
11 | | - * |
---|
12 | | - * This program is distributed in the hope that it will be useful, but |
---|
13 | | - * WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
14 | | - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
---|
15 | | - * NON INFRINGEMENT. See the GNU General Public License for more |
---|
16 | | - * details. |
---|
17 | | - * |
---|
18 | 8 | */ |
---|
19 | 9 | |
---|
| 10 | +#include <linux/acpi.h> |
---|
20 | 11 | #include <linux/efi.h> |
---|
21 | 12 | #include <linux/types.h> |
---|
22 | 13 | #include <asm/apic.h> |
---|
.. | .. |
---|
24 | 15 | #include <asm/hypervisor.h> |
---|
25 | 16 | #include <asm/hyperv-tlfs.h> |
---|
26 | 17 | #include <asm/mshyperv.h> |
---|
| 18 | +#include <asm/idtentry.h> |
---|
| 19 | +#include <linux/kexec.h> |
---|
27 | 20 | #include <linux/version.h> |
---|
28 | 21 | #include <linux/vmalloc.h> |
---|
29 | 22 | #include <linux/mm.h> |
---|
30 | | -#include <linux/clockchips.h> |
---|
31 | 23 | #include <linux/hyperv.h> |
---|
32 | 24 | #include <linux/slab.h> |
---|
33 | 25 | #include <linux/kernel.h> |
---|
34 | 26 | #include <linux/cpuhotplug.h> |
---|
| 27 | +#include <linux/syscore_ops.h> |
---|
| 28 | +#include <clocksource/hyperv_timer.h> |
---|
35 | 29 | |
---|
36 | | -#ifdef CONFIG_HYPERV_TSCPAGE |
---|
37 | | - |
---|
38 | | -static struct ms_hyperv_tsc_page *tsc_pg; |
---|
39 | | - |
---|
40 | | -struct ms_hyperv_tsc_page *hv_get_tsc_page(void) |
---|
41 | | -{ |
---|
42 | | - return tsc_pg; |
---|
43 | | -} |
---|
44 | | -EXPORT_SYMBOL_GPL(hv_get_tsc_page); |
---|
45 | | - |
---|
46 | | -static u64 read_hv_clock_tsc(struct clocksource *arg) |
---|
47 | | -{ |
---|
48 | | - u64 current_tick = hv_read_tsc_page(tsc_pg); |
---|
49 | | - |
---|
50 | | - if (current_tick == U64_MAX) |
---|
51 | | - rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); |
---|
52 | | - |
---|
53 | | - return current_tick; |
---|
54 | | -} |
---|
55 | | - |
---|
56 | | -static struct clocksource hyperv_cs_tsc = { |
---|
57 | | - .name = "hyperv_clocksource_tsc_page", |
---|
58 | | - .rating = 400, |
---|
59 | | - .read = read_hv_clock_tsc, |
---|
60 | | - .mask = CLOCKSOURCE_MASK(64), |
---|
61 | | - .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
---|
62 | | -}; |
---|
63 | | -#endif |
---|
64 | | - |
---|
65 | | -static u64 read_hv_clock_msr(struct clocksource *arg) |
---|
66 | | -{ |
---|
67 | | - u64 current_tick; |
---|
68 | | - /* |
---|
69 | | - * Read the partition counter to get the current tick count. This count |
---|
70 | | - * is set to 0 when the partition is created and is incremented in |
---|
71 | | - * 100 nanosecond units. |
---|
72 | | - */ |
---|
73 | | - rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); |
---|
74 | | - return current_tick; |
---|
75 | | -} |
---|
76 | | - |
---|
77 | | -static struct clocksource hyperv_cs_msr = { |
---|
78 | | - .name = "hyperv_clocksource_msr", |
---|
79 | | - .rating = 400, |
---|
80 | | - .read = read_hv_clock_msr, |
---|
81 | | - .mask = CLOCKSOURCE_MASK(64), |
---|
82 | | - .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
---|
83 | | -}; |
---|
| 30 | +int hyperv_init_cpuhp; |
---|
84 | 31 | |
---|
85 | 32 | void *hv_hypercall_pg; |
---|
86 | 33 | EXPORT_SYMBOL_GPL(hv_hypercall_pg); |
---|
87 | | -struct clocksource *hyperv_cs; |
---|
88 | | -EXPORT_SYMBOL_GPL(hyperv_cs); |
---|
| 34 | + |
---|
| 35 | +/* Storage to save the hypercall page temporarily for hibernation */ |
---|
| 36 | +static void *hv_hypercall_pg_saved; |
---|
89 | 37 | |
---|
90 | 38 | u32 *hv_vp_index; |
---|
91 | 39 | EXPORT_SYMBOL_GPL(hv_vp_index); |
---|
.. | .. |
---|
97 | 45 | EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); |
---|
98 | 46 | |
---|
99 | 47 | u32 hv_max_vp_index; |
---|
| 48 | +EXPORT_SYMBOL_GPL(hv_max_vp_index); |
---|
| 49 | + |
---|
| 50 | +void *hv_alloc_hyperv_page(void) |
---|
| 51 | +{ |
---|
| 52 | + BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE); |
---|
| 53 | + |
---|
| 54 | + return (void *)__get_free_page(GFP_KERNEL); |
---|
| 55 | +} |
---|
| 56 | +EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page); |
---|
| 57 | + |
---|
| 58 | +void *hv_alloc_hyperv_zeroed_page(void) |
---|
| 59 | +{ |
---|
| 60 | + BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE); |
---|
| 61 | + |
---|
| 62 | + return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); |
---|
| 63 | +} |
---|
| 64 | +EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page); |
---|
| 65 | + |
---|
| 66 | +void hv_free_hyperv_page(unsigned long addr) |
---|
| 67 | +{ |
---|
| 68 | + free_page(addr); |
---|
| 69 | +} |
---|
| 70 | +EXPORT_SYMBOL_GPL(hv_free_hyperv_page); |
---|
100 | 71 | |
---|
101 | 72 | static int hv_cpu_init(unsigned int cpu) |
---|
102 | 73 | { |
---|
.. | .. |
---|
106 | 77 | struct page *pg; |
---|
107 | 78 | |
---|
108 | 79 | input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); |
---|
109 | | - pg = alloc_page(GFP_KERNEL); |
---|
| 80 | + /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */ |
---|
| 81 | + pg = alloc_page(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL); |
---|
110 | 82 | if (unlikely(!pg)) |
---|
111 | 83 | return -ENOMEM; |
---|
112 | 84 | *input_arg = page_address(pg); |
---|
.. | .. |
---|
121 | 93 | if (!hv_vp_assist_page) |
---|
122 | 94 | return 0; |
---|
123 | 95 | |
---|
124 | | - if (!*hvp) |
---|
125 | | - *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); |
---|
| 96 | + /* |
---|
| 97 | + * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's Section |
---|
| 98 | + * 5.2.1 "GPA Overlay Pages"). Here it must be zeroed out to make sure |
---|
| 99 | + * we always write the EOI MSR in hv_apic_eoi_write() *after* the |
---|
| 100 | + * EOI optimization is disabled in hv_cpu_die(), otherwise a CPU may |
---|
| 101 | + * not be stopped in the case of CPU offlining and the VM will hang. |
---|
| 102 | + */ |
---|
| 103 | + if (!*hvp) { |
---|
| 104 | + *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO); |
---|
| 105 | + } |
---|
126 | 106 | |
---|
127 | 107 | if (*hvp) { |
---|
128 | 108 | u64 val; |
---|
.. | .. |
---|
171 | 151 | * Check for required features and priviliges to make TSC frequency |
---|
172 | 152 | * change notifications work. |
---|
173 | 153 | */ |
---|
174 | | - return ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS && |
---|
| 154 | + return ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS && |
---|
175 | 155 | ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE && |
---|
176 | | - ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT; |
---|
| 156 | + ms_hyperv.features & HV_ACCESS_REENLIGHTENMENT; |
---|
177 | 157 | } |
---|
178 | 158 | |
---|
179 | | -__visible void __irq_entry hyperv_reenlightenment_intr(struct pt_regs *regs) |
---|
| 159 | +DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_reenlightenment) |
---|
180 | 160 | { |
---|
181 | | - entering_ack_irq(); |
---|
182 | | - |
---|
| 161 | + ack_APIC_irq(); |
---|
183 | 162 | inc_irq_stat(irq_hv_reenlightenment_count); |
---|
184 | | - |
---|
185 | 163 | schedule_delayed_work(&hv_reenlightenment_work, HZ/10); |
---|
186 | | - |
---|
187 | | - exiting_irq(); |
---|
188 | 164 | } |
---|
189 | 165 | |
---|
190 | 166 | void set_hv_tscchange_cb(void (*cb)(void)) |
---|
.. | .. |
---|
255 | 231 | |
---|
256 | 232 | rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); |
---|
257 | 233 | if (re_ctrl.target_vp == hv_vp_index[cpu]) { |
---|
258 | | - /* Reassign to some other online CPU */ |
---|
| 234 | + /* |
---|
| 235 | + * Reassign reenlightenment notifications to some other online |
---|
| 236 | + * CPU or just disable the feature if there are no online CPUs |
---|
| 237 | + * left (happens on hibernation). |
---|
| 238 | + */ |
---|
259 | 239 | new_cpu = cpumask_any_but(cpu_online_mask, cpu); |
---|
260 | 240 | |
---|
261 | | - re_ctrl.target_vp = hv_vp_index[new_cpu]; |
---|
| 241 | + if (new_cpu < nr_cpu_ids) |
---|
| 242 | + re_ctrl.target_vp = hv_vp_index[new_cpu]; |
---|
| 243 | + else |
---|
| 244 | + re_ctrl.enabled = 0; |
---|
| 245 | + |
---|
262 | 246 | wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); |
---|
263 | 247 | } |
---|
264 | 248 | |
---|
.. | .. |
---|
281 | 265 | return 1; |
---|
282 | 266 | } |
---|
283 | 267 | |
---|
| 268 | +static int hv_suspend(void) |
---|
| 269 | +{ |
---|
| 270 | + union hv_x64_msr_hypercall_contents hypercall_msr; |
---|
| 271 | + int ret; |
---|
| 272 | + |
---|
| 273 | + /* |
---|
| 274 | + * Reset the hypercall page as it is going to be invalidated |
---|
| 275 | + * accross hibernation. Setting hv_hypercall_pg to NULL ensures |
---|
| 276 | + * that any subsequent hypercall operation fails safely instead of |
---|
| 277 | + * crashing due to an access of an invalid page. The hypercall page |
---|
| 278 | + * pointer is restored on resume. |
---|
| 279 | + */ |
---|
| 280 | + hv_hypercall_pg_saved = hv_hypercall_pg; |
---|
| 281 | + hv_hypercall_pg = NULL; |
---|
| 282 | + |
---|
| 283 | + /* Disable the hypercall page in the hypervisor */ |
---|
| 284 | + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); |
---|
| 285 | + hypercall_msr.enable = 0; |
---|
| 286 | + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); |
---|
| 287 | + |
---|
| 288 | + ret = hv_cpu_die(0); |
---|
| 289 | + return ret; |
---|
| 290 | +} |
---|
| 291 | + |
---|
| 292 | +static void hv_resume(void) |
---|
| 293 | +{ |
---|
| 294 | + union hv_x64_msr_hypercall_contents hypercall_msr; |
---|
| 295 | + int ret; |
---|
| 296 | + |
---|
| 297 | + ret = hv_cpu_init(0); |
---|
| 298 | + WARN_ON(ret); |
---|
| 299 | + |
---|
| 300 | + /* Re-enable the hypercall page */ |
---|
| 301 | + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); |
---|
| 302 | + hypercall_msr.enable = 1; |
---|
| 303 | + hypercall_msr.guest_physical_address = |
---|
| 304 | + vmalloc_to_pfn(hv_hypercall_pg_saved); |
---|
| 305 | + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); |
---|
| 306 | + |
---|
| 307 | + hv_hypercall_pg = hv_hypercall_pg_saved; |
---|
| 308 | + hv_hypercall_pg_saved = NULL; |
---|
| 309 | + |
---|
| 310 | + /* |
---|
| 311 | + * Reenlightenment notifications are disabled by hv_cpu_die(0), |
---|
| 312 | + * reenable them here if hv_reenlightenment_cb was previously set. |
---|
| 313 | + */ |
---|
| 314 | + if (hv_reenlightenment_cb) |
---|
| 315 | + set_hv_tscchange_cb(hv_reenlightenment_cb); |
---|
| 316 | +} |
---|
| 317 | + |
---|
| 318 | +/* Note: when the ops are called, only CPU0 is online and IRQs are disabled. */ |
---|
| 319 | +static struct syscore_ops hv_syscore_ops = { |
---|
| 320 | + .suspend = hv_suspend, |
---|
| 321 | + .resume = hv_resume, |
---|
| 322 | +}; |
---|
| 323 | + |
---|
| 324 | +static void (* __initdata old_setup_percpu_clockev)(void); |
---|
| 325 | + |
---|
| 326 | +static void __init hv_stimer_setup_percpu_clockev(void) |
---|
| 327 | +{ |
---|
| 328 | + /* |
---|
| 329 | + * Ignore any errors in setting up stimer clockevents |
---|
| 330 | + * as we can run with the LAPIC timer as a fallback. |
---|
| 331 | + */ |
---|
| 332 | + (void)hv_stimer_alloc(); |
---|
| 333 | + |
---|
| 334 | + /* |
---|
| 335 | + * Still register the LAPIC timer, because the direct-mode STIMER is |
---|
| 336 | + * not supported by old versions of Hyper-V. This also allows users |
---|
| 337 | + * to switch to LAPIC timer via /sys, if they want to. |
---|
| 338 | + */ |
---|
| 339 | + if (old_setup_percpu_clockev) |
---|
| 340 | + old_setup_percpu_clockev(); |
---|
| 341 | +} |
---|
| 342 | + |
---|
284 | 343 | /* |
---|
285 | 344 | * This function is to be invoked early in the boot sequence after the |
---|
286 | 345 | * hypervisor has been detected. |
---|
.. | .. |
---|
299 | 358 | return; |
---|
300 | 359 | |
---|
301 | 360 | /* Absolutely required MSRs */ |
---|
302 | | - required_msrs = HV_X64_MSR_HYPERCALL_AVAILABLE | |
---|
303 | | - HV_X64_MSR_VP_INDEX_AVAILABLE; |
---|
| 361 | + required_msrs = HV_MSR_HYPERCALL_AVAILABLE | |
---|
| 362 | + HV_MSR_VP_INDEX_AVAILABLE; |
---|
304 | 363 | |
---|
305 | 364 | if ((ms_hyperv.features & required_msrs) != required_msrs) |
---|
306 | 365 | return; |
---|
.. | .. |
---|
344 | 403 | guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); |
---|
345 | 404 | wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); |
---|
346 | 405 | |
---|
347 | | - hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX); |
---|
| 406 | + hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, |
---|
| 407 | + VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX, |
---|
| 408 | + VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, |
---|
| 409 | + __builtin_return_address(0)); |
---|
348 | 410 | if (hv_hypercall_pg == NULL) { |
---|
349 | 411 | wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); |
---|
350 | 412 | goto remove_cpuhp_state; |
---|
.. | .. |
---|
355 | 417 | hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); |
---|
356 | 418 | wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); |
---|
357 | 419 | |
---|
| 420 | + /* |
---|
| 421 | + * hyperv_init() is called before LAPIC is initialized: see |
---|
| 422 | + * apic_intr_mode_init() -> x86_platform.apic_post_init() and |
---|
| 423 | + * apic_bsp_setup() -> setup_local_APIC(). The direct-mode STIMER |
---|
| 424 | + * depends on LAPIC, so hv_stimer_alloc() should be called from |
---|
| 425 | + * x86_init.timers.setup_percpu_clockev. |
---|
| 426 | + */ |
---|
| 427 | + old_setup_percpu_clockev = x86_init.timers.setup_percpu_clockev; |
---|
| 428 | + x86_init.timers.setup_percpu_clockev = hv_stimer_setup_percpu_clockev; |
---|
| 429 | + |
---|
358 | 430 | hv_apic_init(); |
---|
359 | 431 | |
---|
360 | 432 | x86_init.pci.arch_init = hv_pci_init; |
---|
361 | 433 | |
---|
362 | | - /* |
---|
363 | | - * Register Hyper-V specific clocksource. |
---|
364 | | - */ |
---|
365 | | -#ifdef CONFIG_HYPERV_TSCPAGE |
---|
366 | | - if (ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE) { |
---|
367 | | - union hv_x64_msr_hypercall_contents tsc_msr; |
---|
| 434 | + register_syscore_ops(&hv_syscore_ops); |
---|
368 | 435 | |
---|
369 | | - tsc_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); |
---|
370 | | - if (!tsc_pg) |
---|
371 | | - goto register_msr_cs; |
---|
372 | | - |
---|
373 | | - hyperv_cs = &hyperv_cs_tsc; |
---|
374 | | - |
---|
375 | | - rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); |
---|
376 | | - |
---|
377 | | - tsc_msr.enable = 1; |
---|
378 | | - tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg); |
---|
379 | | - |
---|
380 | | - wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); |
---|
381 | | - |
---|
382 | | - hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK; |
---|
383 | | - |
---|
384 | | - clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); |
---|
385 | | - return; |
---|
386 | | - } |
---|
387 | | -register_msr_cs: |
---|
388 | | -#endif |
---|
389 | | - /* |
---|
390 | | - * For 32 bit guests just use the MSR based mechanism for reading |
---|
391 | | - * the partition counter. |
---|
392 | | - */ |
---|
393 | | - |
---|
394 | | - hyperv_cs = &hyperv_cs_msr; |
---|
395 | | - if (ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE) |
---|
396 | | - clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); |
---|
397 | | - |
---|
| 436 | + hyperv_init_cpuhp = cpuhp; |
---|
398 | 437 | return; |
---|
399 | 438 | |
---|
400 | 439 | remove_cpuhp_state: |
---|
.. | .. |
---|
514 | 553 | return hypercall_msr.enable; |
---|
515 | 554 | } |
---|
516 | 555 | EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); |
---|
| 556 | + |
---|
| 557 | +bool hv_is_hibernation_supported(void) |
---|
| 558 | +{ |
---|
| 559 | + return acpi_sleep_state_supported(ACPI_STATE_S4); |
---|
| 560 | +} |
---|
| 561 | +EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); |
---|