hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/arch/x86/hyperv/hv_init.c
....@@ -1,22 +1,13 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * X86 specific Hyper-V initialization code.
34 *
45 * Copyright (C) 2016, Microsoft, Inc.
56 *
67 * Author : K. Y. Srinivasan <kys@microsoft.com>
7
- *
8
- * This program is free software; you can redistribute it and/or modify it
9
- * under the terms of the GNU General Public License version 2 as published
10
- * by the Free Software Foundation.
11
- *
12
- * This program is distributed in the hope that it will be useful, but
13
- * WITHOUT ANY WARRANTY; without even the implied warranty of
14
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
15
- * NON INFRINGEMENT. See the GNU General Public License for more
16
- * details.
17
- *
188 */
199
10
+#include <linux/acpi.h>
2011 #include <linux/efi.h>
2112 #include <linux/types.h>
2213 #include <asm/apic.h>
....@@ -24,68 +15,25 @@
2415 #include <asm/hypervisor.h>
2516 #include <asm/hyperv-tlfs.h>
2617 #include <asm/mshyperv.h>
18
+#include <asm/idtentry.h>
19
+#include <linux/kexec.h>
2720 #include <linux/version.h>
2821 #include <linux/vmalloc.h>
2922 #include <linux/mm.h>
30
-#include <linux/clockchips.h>
3123 #include <linux/hyperv.h>
3224 #include <linux/slab.h>
3325 #include <linux/kernel.h>
3426 #include <linux/cpuhotplug.h>
27
+#include <linux/syscore_ops.h>
28
+#include <clocksource/hyperv_timer.h>
3529
36
-#ifdef CONFIG_HYPERV_TSCPAGE
37
-
38
-static struct ms_hyperv_tsc_page *tsc_pg;
39
-
40
-struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
41
-{
42
- return tsc_pg;
43
-}
44
-EXPORT_SYMBOL_GPL(hv_get_tsc_page);
45
-
46
-static u64 read_hv_clock_tsc(struct clocksource *arg)
47
-{
48
- u64 current_tick = hv_read_tsc_page(tsc_pg);
49
-
50
- if (current_tick == U64_MAX)
51
- rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
52
-
53
- return current_tick;
54
-}
55
-
56
-static struct clocksource hyperv_cs_tsc = {
57
- .name = "hyperv_clocksource_tsc_page",
58
- .rating = 400,
59
- .read = read_hv_clock_tsc,
60
- .mask = CLOCKSOURCE_MASK(64),
61
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
62
-};
63
-#endif
64
-
65
-static u64 read_hv_clock_msr(struct clocksource *arg)
66
-{
67
- u64 current_tick;
68
- /*
69
- * Read the partition counter to get the current tick count. This count
70
- * is set to 0 when the partition is created and is incremented in
71
- * 100 nanosecond units.
72
- */
73
- rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
74
- return current_tick;
75
-}
76
-
77
-static struct clocksource hyperv_cs_msr = {
78
- .name = "hyperv_clocksource_msr",
79
- .rating = 400,
80
- .read = read_hv_clock_msr,
81
- .mask = CLOCKSOURCE_MASK(64),
82
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
83
-};
30
+int hyperv_init_cpuhp;
8431
8532 void *hv_hypercall_pg;
8633 EXPORT_SYMBOL_GPL(hv_hypercall_pg);
87
-struct clocksource *hyperv_cs;
88
-EXPORT_SYMBOL_GPL(hyperv_cs);
34
+
35
+/* Storage to save the hypercall page temporarily for hibernation */
36
+static void *hv_hypercall_pg_saved;
8937
9038 u32 *hv_vp_index;
9139 EXPORT_SYMBOL_GPL(hv_vp_index);
....@@ -97,6 +45,29 @@
9745 EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
9846
9947 u32 hv_max_vp_index;
48
+EXPORT_SYMBOL_GPL(hv_max_vp_index);
49
+
50
+void *hv_alloc_hyperv_page(void)
51
+{
52
+ BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE);
53
+
54
+ return (void *)__get_free_page(GFP_KERNEL);
55
+}
56
+EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
57
+
58
+void *hv_alloc_hyperv_zeroed_page(void)
59
+{
60
+ BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE);
61
+
62
+ return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
63
+}
64
+EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
65
+
66
+void hv_free_hyperv_page(unsigned long addr)
67
+{
68
+ free_page(addr);
69
+}
70
+EXPORT_SYMBOL_GPL(hv_free_hyperv_page);
10071
10172 static int hv_cpu_init(unsigned int cpu)
10273 {
....@@ -106,7 +77,8 @@
10677 struct page *pg;
10778
10879 input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
109
- pg = alloc_page(GFP_KERNEL);
80
+ /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
81
+ pg = alloc_page(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL);
11082 if (unlikely(!pg))
11183 return -ENOMEM;
11284 *input_arg = page_address(pg);
....@@ -121,8 +93,16 @@
12193 if (!hv_vp_assist_page)
12294 return 0;
12395
124
- if (!*hvp)
125
- *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
96
+ /*
97
+ * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's Section
98
+ * 5.2.1 "GPA Overlay Pages"). Here it must be zeroed out to make sure
99
+ * we always write the EOI MSR in hv_apic_eoi_write() *after* the
100
+ * EOI optimization is disabled in hv_cpu_die(), otherwise a CPU may
101
+ * not be stopped in the case of CPU offlining and the VM will hang.
102
+ */
103
+ if (!*hvp) {
104
+ *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
105
+ }
126106
127107 if (*hvp) {
128108 u64 val;
....@@ -171,20 +151,16 @@
171151 * Check for required features and priviliges to make TSC frequency
172152 * change notifications work.
173153 */
174
- return ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
154
+ return ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS &&
175155 ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE &&
176
- ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT;
156
+ ms_hyperv.features & HV_ACCESS_REENLIGHTENMENT;
177157 }
178158
179
-__visible void __irq_entry hyperv_reenlightenment_intr(struct pt_regs *regs)
159
+DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_reenlightenment)
180160 {
181
- entering_ack_irq();
182
-
161
+ ack_APIC_irq();
183162 inc_irq_stat(irq_hv_reenlightenment_count);
184
-
185163 schedule_delayed_work(&hv_reenlightenment_work, HZ/10);
186
-
187
- exiting_irq();
188164 }
189165
190166 void set_hv_tscchange_cb(void (*cb)(void))
....@@ -255,10 +231,18 @@
255231
256232 rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
257233 if (re_ctrl.target_vp == hv_vp_index[cpu]) {
258
- /* Reassign to some other online CPU */
234
+ /*
235
+ * Reassign reenlightenment notifications to some other online
236
+ * CPU or just disable the feature if there are no online CPUs
237
+ * left (happens on hibernation).
238
+ */
259239 new_cpu = cpumask_any_but(cpu_online_mask, cpu);
260240
261
- re_ctrl.target_vp = hv_vp_index[new_cpu];
241
+ if (new_cpu < nr_cpu_ids)
242
+ re_ctrl.target_vp = hv_vp_index[new_cpu];
243
+ else
244
+ re_ctrl.enabled = 0;
245
+
262246 wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
263247 }
264248
....@@ -281,6 +265,81 @@
281265 return 1;
282266 }
283267
268
+static int hv_suspend(void)
269
+{
270
+ union hv_x64_msr_hypercall_contents hypercall_msr;
271
+ int ret;
272
+
273
+ /*
274
+ * Reset the hypercall page as it is going to be invalidated
275
+ * accross hibernation. Setting hv_hypercall_pg to NULL ensures
276
+ * that any subsequent hypercall operation fails safely instead of
277
+ * crashing due to an access of an invalid page. The hypercall page
278
+ * pointer is restored on resume.
279
+ */
280
+ hv_hypercall_pg_saved = hv_hypercall_pg;
281
+ hv_hypercall_pg = NULL;
282
+
283
+ /* Disable the hypercall page in the hypervisor */
284
+ rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
285
+ hypercall_msr.enable = 0;
286
+ wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
287
+
288
+ ret = hv_cpu_die(0);
289
+ return ret;
290
+}
291
+
292
+static void hv_resume(void)
293
+{
294
+ union hv_x64_msr_hypercall_contents hypercall_msr;
295
+ int ret;
296
+
297
+ ret = hv_cpu_init(0);
298
+ WARN_ON(ret);
299
+
300
+ /* Re-enable the hypercall page */
301
+ rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
302
+ hypercall_msr.enable = 1;
303
+ hypercall_msr.guest_physical_address =
304
+ vmalloc_to_pfn(hv_hypercall_pg_saved);
305
+ wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
306
+
307
+ hv_hypercall_pg = hv_hypercall_pg_saved;
308
+ hv_hypercall_pg_saved = NULL;
309
+
310
+ /*
311
+ * Reenlightenment notifications are disabled by hv_cpu_die(0),
312
+ * reenable them here if hv_reenlightenment_cb was previously set.
313
+ */
314
+ if (hv_reenlightenment_cb)
315
+ set_hv_tscchange_cb(hv_reenlightenment_cb);
316
+}
317
+
318
+/* Note: when the ops are called, only CPU0 is online and IRQs are disabled. */
319
+static struct syscore_ops hv_syscore_ops = {
320
+ .suspend = hv_suspend,
321
+ .resume = hv_resume,
322
+};
323
+
324
+static void (* __initdata old_setup_percpu_clockev)(void);
325
+
326
+static void __init hv_stimer_setup_percpu_clockev(void)
327
+{
328
+ /*
329
+ * Ignore any errors in setting up stimer clockevents
330
+ * as we can run with the LAPIC timer as a fallback.
331
+ */
332
+ (void)hv_stimer_alloc();
333
+
334
+ /*
335
+ * Still register the LAPIC timer, because the direct-mode STIMER is
336
+ * not supported by old versions of Hyper-V. This also allows users
337
+ * to switch to LAPIC timer via /sys, if they want to.
338
+ */
339
+ if (old_setup_percpu_clockev)
340
+ old_setup_percpu_clockev();
341
+}
342
+
284343 /*
285344 * This function is to be invoked early in the boot sequence after the
286345 * hypervisor has been detected.
....@@ -299,8 +358,8 @@
299358 return;
300359
301360 /* Absolutely required MSRs */
302
- required_msrs = HV_X64_MSR_HYPERCALL_AVAILABLE |
303
- HV_X64_MSR_VP_INDEX_AVAILABLE;
361
+ required_msrs = HV_MSR_HYPERCALL_AVAILABLE |
362
+ HV_MSR_VP_INDEX_AVAILABLE;
304363
305364 if ((ms_hyperv.features & required_msrs) != required_msrs)
306365 return;
....@@ -344,7 +403,10 @@
344403 guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0);
345404 wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
346405
347
- hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
406
+ hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START,
407
+ VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
408
+ VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
409
+ __builtin_return_address(0));
348410 if (hv_hypercall_pg == NULL) {
349411 wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
350412 goto remove_cpuhp_state;
....@@ -355,46 +417,23 @@
355417 hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
356418 wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
357419
420
+ /*
421
+ * hyperv_init() is called before LAPIC is initialized: see
422
+ * apic_intr_mode_init() -> x86_platform.apic_post_init() and
423
+ * apic_bsp_setup() -> setup_local_APIC(). The direct-mode STIMER
424
+ * depends on LAPIC, so hv_stimer_alloc() should be called from
425
+ * x86_init.timers.setup_percpu_clockev.
426
+ */
427
+ old_setup_percpu_clockev = x86_init.timers.setup_percpu_clockev;
428
+ x86_init.timers.setup_percpu_clockev = hv_stimer_setup_percpu_clockev;
429
+
358430 hv_apic_init();
359431
360432 x86_init.pci.arch_init = hv_pci_init;
361433
362
- /*
363
- * Register Hyper-V specific clocksource.
364
- */
365
-#ifdef CONFIG_HYPERV_TSCPAGE
366
- if (ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE) {
367
- union hv_x64_msr_hypercall_contents tsc_msr;
434
+ register_syscore_ops(&hv_syscore_ops);
368435
369
- tsc_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
370
- if (!tsc_pg)
371
- goto register_msr_cs;
372
-
373
- hyperv_cs = &hyperv_cs_tsc;
374
-
375
- rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
376
-
377
- tsc_msr.enable = 1;
378
- tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg);
379
-
380
- wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
381
-
382
- hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK;
383
-
384
- clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
385
- return;
386
- }
387
-register_msr_cs:
388
-#endif
389
- /*
390
- * For 32 bit guests just use the MSR based mechanism for reading
391
- * the partition counter.
392
- */
393
-
394
- hyperv_cs = &hyperv_cs_msr;
395
- if (ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE)
396
- clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
397
-
436
+ hyperv_init_cpuhp = cpuhp;
398437 return;
399438
400439 remove_cpuhp_state:
....@@ -514,3 +553,9 @@
514553 return hypercall_msr.enable;
515554 }
516555 EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized);
556
+
557
+bool hv_is_hibernation_supported(void)
558
+{
559
+ return acpi_sleep_state_supported(ACPI_STATE_S4);
560
+}
561
+EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);