forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/hv/vmbus_drv.c
....@@ -1,24 +1,11 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (c) 2009, Microsoft Corporation.
3
- *
4
- * This program is free software; you can redistribute it and/or modify it
5
- * under the terms and conditions of the GNU General Public License,
6
- * version 2, as published by the Free Software Foundation.
7
- *
8
- * This program is distributed in the hope it will be useful, but WITHOUT
9
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11
- * more details.
12
- *
13
- * You should have received a copy of the GNU General Public License along with
14
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15
- * Place - Suite 330, Boston, MA 02111-1307 USA.
164 *
175 * Authors:
186 * Haiyang Zhang <haiyangz@microsoft.com>
197 * Hank Janssen <hjanssen@microsoft.com>
208 * K. Y. Srinivasan <kys@microsoft.com>
21
- *
229 */
2310 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2411
....@@ -36,7 +23,7 @@
3623 #include <linux/cpu.h>
3724 #include <linux/sched/task_stack.h>
3825
39
-#include <asm/mshyperv.h>
26
+#include <linux/delay.h>
4027 #include <linux/notifier.h>
4128 #include <linux/ptrace.h>
4229 #include <linux/screen_info.h>
....@@ -44,6 +31,8 @@
4431 #include <linux/efi.h>
4532 #include <linux/random.h>
4633 #include <linux/kernel.h>
34
+#include <linux/syscore_ops.h>
35
+#include <clocksource/hyperv_timer.h>
4736 #include "hyperv_vmbus.h"
4837
4938 struct vmbus_dynid {
....@@ -58,6 +47,10 @@
5847 static int hyperv_cpuhp_online;
5948
6049 static void *hv_panic_page;
50
+
51
+/* Values parsed from ACPI DSDT */
52
+static int vmbus_irq;
53
+int vmbus_interrupt;
6154
6255 /*
6356 * Boolean to control whether to report panic messages over Hyper-V.
....@@ -94,8 +87,12 @@
9487 static int hyperv_die_event(struct notifier_block *nb, unsigned long val,
9588 void *args)
9689 {
97
- struct die_args *die = (struct die_args *)args;
90
+ struct die_args *die = args;
9891 struct pt_regs *regs = die->regs;
92
+
93
+ /* Don't notify Hyper-V if the die event is other than oops */
94
+ if (val != DIE_OOPS)
95
+ return NOTIFY_DONE;
9996
10097 /*
10198 * Hyper-V should be notified only once about a panic. If we will be
....@@ -117,7 +114,7 @@
117114 static const char *fb_mmio_name = "fb_range";
118115 static struct resource *fb_mmio;
119116 static struct resource *hyperv_mmio;
120
-static DEFINE_SEMAPHORE(hyperv_mmio_lock);
117
+static DEFINE_MUTEX(hyperv_mmio_lock);
121118
122119 static int vmbus_exists(void)
123120 {
....@@ -125,14 +122,6 @@
125122 return -ENODEV;
126123
127124 return 0;
128
-}
129
-
130
-#define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2)
131
-static void print_alias_name(struct hv_device *hv_dev, char *alias_name)
132
-{
133
- int i;
134
- for (i = 0; i < VMBUS_ALIAS_LEN; i += 2)
135
- sprintf(&alias_name[i], "%02x", hv_dev->dev_type.b[i/2]);
136125 }
137126
138127 static u8 channel_monitor_group(const struct vmbus_channel *channel)
....@@ -211,7 +200,7 @@
211200 if (!hv_dev->channel)
212201 return -ENODEV;
213202 return sprintf(buf, "{%pUl}\n",
214
- hv_dev->channel->offermsg.offer.if_type.b);
203
+ &hv_dev->channel->offermsg.offer.if_type);
215204 }
216205 static DEVICE_ATTR_RO(class_id);
217206
....@@ -223,7 +212,7 @@
223212 if (!hv_dev->channel)
224213 return -ENODEV;
225214 return sprintf(buf, "{%pUl}\n",
226
- hv_dev->channel->offermsg.offer.if_instance.b);
215
+ &hv_dev->channel->offermsg.offer.if_instance);
227216 }
228217 static DEVICE_ATTR_RO(device_id);
229218
....@@ -231,10 +220,8 @@
231220 struct device_attribute *dev_attr, char *buf)
232221 {
233222 struct hv_device *hv_dev = device_to_hv_device(dev);
234
- char alias_name[VMBUS_ALIAS_LEN + 1];
235223
236
- print_alias_name(hv_dev, alias_name);
237
- return sprintf(buf, "vmbus:%s\n", alias_name);
224
+ return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
238225 }
239226 static DEVICE_ATTR_RO(modalias);
240227
....@@ -247,7 +234,7 @@
247234 if (!hv_dev->channel)
248235 return -ENODEV;
249236
250
- return sprintf(buf, "%d\n", hv_dev->channel->numa_node);
237
+ return sprintf(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu));
251238 }
252239 static DEVICE_ATTR_RO(numa_node);
253240 #endif
....@@ -262,7 +249,7 @@
262249 return -ENODEV;
263250 return sprintf(buf, "%d\n",
264251 channel_pending(hv_dev->channel,
265
- vmbus_connection.monitor_pages[1]));
252
+ vmbus_connection.monitor_pages[0]));
266253 }
267254 static DEVICE_ATTR_RO(server_monitor_pending);
268255
....@@ -528,17 +515,16 @@
528515 {
529516 struct hv_device *hv_dev = device_to_hv_device(dev);
530517 struct vmbus_channel *channel = hv_dev->channel, *cur_sc;
531
- unsigned long flags;
532518 int buf_size = PAGE_SIZE, n_written, tot_written;
533519 struct list_head *cur;
534520
535521 if (!channel)
536522 return -ENODEV;
537523
524
+ mutex_lock(&vmbus_connection.channel_mutex);
525
+
538526 tot_written = snprintf(buf, buf_size, "%u:%u\n",
539527 channel->offermsg.child_relid, channel->target_cpu);
540
-
541
- spin_lock_irqsave(&channel->lock, flags);
542528
543529 list_for_each(cur, &channel->sc_list) {
544530 if (tot_written >= buf_size - 1)
....@@ -553,7 +539,7 @@
553539 tot_written += n_written;
554540 }
555541
556
- spin_unlock_irqrestore(&channel->lock, flags);
542
+ mutex_unlock(&vmbus_connection.channel_mutex);
557543
558544 return tot_written;
559545 }
....@@ -576,6 +562,54 @@
576562 return sprintf(buf, "0x%x\n", hv_dev->device_id);
577563 }
578564 static DEVICE_ATTR_RO(device);
565
+
566
+static ssize_t driver_override_store(struct device *dev,
567
+ struct device_attribute *attr,
568
+ const char *buf, size_t count)
569
+{
570
+ struct hv_device *hv_dev = device_to_hv_device(dev);
571
+ char *driver_override, *old, *cp;
572
+
573
+ /* We need to keep extra room for a newline */
574
+ if (count >= (PAGE_SIZE - 1))
575
+ return -EINVAL;
576
+
577
+ driver_override = kstrndup(buf, count, GFP_KERNEL);
578
+ if (!driver_override)
579
+ return -ENOMEM;
580
+
581
+ cp = strchr(driver_override, '\n');
582
+ if (cp)
583
+ *cp = '\0';
584
+
585
+ device_lock(dev);
586
+ old = hv_dev->driver_override;
587
+ if (strlen(driver_override)) {
588
+ hv_dev->driver_override = driver_override;
589
+ } else {
590
+ kfree(driver_override);
591
+ hv_dev->driver_override = NULL;
592
+ }
593
+ device_unlock(dev);
594
+
595
+ kfree(old);
596
+
597
+ return count;
598
+}
599
+
600
+static ssize_t driver_override_show(struct device *dev,
601
+ struct device_attribute *attr, char *buf)
602
+{
603
+ struct hv_device *hv_dev = device_to_hv_device(dev);
604
+ ssize_t len;
605
+
606
+ device_lock(dev);
607
+ len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override);
608
+ device_unlock(dev);
609
+
610
+ return len;
611
+}
612
+static DEVICE_ATTR_RW(driver_override);
579613
580614 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
581615 static struct attribute *vmbus_dev_attrs[] = {
....@@ -607,6 +641,7 @@
607641 &dev_attr_channel_vp_mapping.attr,
608642 &dev_attr_vendor.attr,
609643 &dev_attr_device.attr,
644
+ &dev_attr_driver_override.attr,
610645 NULL,
611646 };
612647
....@@ -654,59 +689,72 @@
654689 static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env)
655690 {
656691 struct hv_device *dev = device_to_hv_device(device);
657
- int ret;
658
- char alias_name[VMBUS_ALIAS_LEN + 1];
692
+ const char *format = "MODALIAS=vmbus:%*phN";
659693
660
- print_alias_name(dev, alias_name);
661
- ret = add_uevent_var(env, "MODALIAS=vmbus:%s", alias_name);
662
- return ret;
694
+ return add_uevent_var(env, format, UUID_SIZE, &dev->dev_type);
663695 }
664696
665
-static const uuid_le null_guid;
666
-
667
-static inline bool is_null_guid(const uuid_le *guid)
697
+static const struct hv_vmbus_device_id *
698
+hv_vmbus_dev_match(const struct hv_vmbus_device_id *id, const guid_t *guid)
668699 {
669
- if (uuid_le_cmp(*guid, null_guid))
670
- return false;
671
- return true;
700
+ if (id == NULL)
701
+ return NULL; /* empty device table */
702
+
703
+ for (; !guid_is_null(&id->guid); id++)
704
+ if (guid_equal(&id->guid, guid))
705
+ return id;
706
+
707
+ return NULL;
672708 }
673709
674
-/*
675
- * Return a matching hv_vmbus_device_id pointer.
676
- * If there is no match, return NULL.
677
- */
678
-static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
679
- const uuid_le *guid)
710
+static const struct hv_vmbus_device_id *
711
+hv_vmbus_dynid_match(struct hv_driver *drv, const guid_t *guid)
680712 {
681713 const struct hv_vmbus_device_id *id = NULL;
682714 struct vmbus_dynid *dynid;
683715
684
- /* Look at the dynamic ids first, before the static ones */
685716 spin_lock(&drv->dynids.lock);
686717 list_for_each_entry(dynid, &drv->dynids.list, node) {
687
- if (!uuid_le_cmp(dynid->id.guid, *guid)) {
718
+ if (guid_equal(&dynid->id.guid, guid)) {
688719 id = &dynid->id;
689720 break;
690721 }
691722 }
692723 spin_unlock(&drv->dynids.lock);
693724
694
- if (id)
695
- return id;
725
+ return id;
726
+}
696727
697
- id = drv->id_table;
698
- if (id == NULL)
699
- return NULL; /* empty device table */
728
+static const struct hv_vmbus_device_id vmbus_device_null;
700729
701
- for (; !is_null_guid(&id->guid); id++)
702
- if (!uuid_le_cmp(id->guid, *guid))
703
- return id;
730
+/*
731
+ * Return a matching hv_vmbus_device_id pointer.
732
+ * If there is no match, return NULL.
733
+ */
734
+static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
735
+ struct hv_device *dev)
736
+{
737
+ const guid_t *guid = &dev->dev_type;
738
+ const struct hv_vmbus_device_id *id;
704739
705
- return NULL;
740
+ /* When driver_override is set, only bind to the matching driver */
741
+ if (dev->driver_override && strcmp(dev->driver_override, drv->name))
742
+ return NULL;
743
+
744
+ /* Look at the dynamic ids first, before the static ones */
745
+ id = hv_vmbus_dynid_match(drv, guid);
746
+ if (!id)
747
+ id = hv_vmbus_dev_match(drv->id_table, guid);
748
+
749
+ /* driver_override will always match, send a dummy id */
750
+ if (!id && dev->driver_override)
751
+ id = &vmbus_device_null;
752
+
753
+ return id;
706754 }
707755
708756 /* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */
709
-static int vmbus_add_dynid(struct hv_driver *drv, uuid_le *guid)
757
+static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid)
710758 {
711759 struct vmbus_dynid *dynid;
712760
....@@ -744,14 +792,14 @@
744792 size_t count)
745793 {
746794 struct hv_driver *drv = drv_to_hv_drv(driver);
747
- uuid_le guid;
795
+ guid_t guid;
748796 ssize_t retval;
749797
750
- retval = uuid_le_to_bin(buf, &guid);
798
+ retval = guid_parse(buf, &guid);
751799 if (retval)
752800 return retval;
753801
754
- if (hv_vmbus_get_id(drv, &guid))
802
+ if (hv_vmbus_dynid_match(drv, &guid))
755803 return -EEXIST;
756804
757805 retval = vmbus_add_dynid(drv, &guid);
....@@ -771,10 +819,10 @@
771819 {
772820 struct hv_driver *drv = drv_to_hv_drv(driver);
773821 struct vmbus_dynid *dynid, *n;
774
- uuid_le guid;
822
+ guid_t guid;
775823 ssize_t retval;
776824
777
- retval = uuid_le_to_bin(buf, &guid);
825
+ retval = guid_parse(buf, &guid);
778826 if (retval)
779827 return retval;
780828
....@@ -783,7 +831,7 @@
783831 list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) {
784832 struct hv_vmbus_device_id *id = &dynid->id;
785833
786
- if (!uuid_le_cmp(id->guid, guid)) {
834
+ if (guid_equal(&id->guid, &guid)) {
787835 list_del(&dynid->node);
788836 kfree(dynid);
789837 retval = count;
....@@ -816,7 +864,7 @@
816864 if (is_hvsock_channel(hv_dev->channel))
817865 return drv->hvsock;
818866
819
- if (hv_vmbus_get_id(drv, &hv_dev->dev_type))
867
+ if (hv_vmbus_get_id(drv, hv_dev))
820868 return 1;
821869
822870 return 0;
....@@ -833,7 +881,7 @@
833881 struct hv_device *dev = device_to_hv_device(child_device);
834882 const struct hv_vmbus_device_id *dev_id;
835883
836
- dev_id = hv_vmbus_get_id(drv, &dev->dev_type);
884
+ dev_id = hv_vmbus_get_id(drv, dev);
837885 if (drv->probe) {
838886 ret = drv->probe(dev, dev_id);
839887 if (ret != 0)
....@@ -885,6 +933,48 @@
885933 drv->shutdown(dev);
886934 }
887935
936
+#ifdef CONFIG_PM_SLEEP
937
+/*
938
+ * vmbus_suspend - Suspend a vmbus device
939
+ */
940
+static int vmbus_suspend(struct device *child_device)
941
+{
942
+ struct hv_driver *drv;
943
+ struct hv_device *dev = device_to_hv_device(child_device);
944
+
945
+ /* The device may not be attached yet */
946
+ if (!child_device->driver)
947
+ return 0;
948
+
949
+ drv = drv_to_hv_drv(child_device->driver);
950
+ if (!drv->suspend)
951
+ return -EOPNOTSUPP;
952
+
953
+ return drv->suspend(dev);
954
+}
955
+
956
+/*
957
+ * vmbus_resume - Resume a vmbus device
958
+ */
959
+static int vmbus_resume(struct device *child_device)
960
+{
961
+ struct hv_driver *drv;
962
+ struct hv_device *dev = device_to_hv_device(child_device);
963
+
964
+ /* The device may not be attached yet */
965
+ if (!child_device->driver)
966
+ return 0;
967
+
968
+ drv = drv_to_hv_drv(child_device->driver);
969
+ if (!drv->resume)
970
+ return -EOPNOTSUPP;
971
+
972
+ return drv->resume(dev);
973
+}
974
+#else
975
+#define vmbus_suspend NULL
976
+#define vmbus_resume NULL
977
+#endif /* CONFIG_PM_SLEEP */
888978
889979 /*
890980 * vmbus_device_release - Final callback release of the vmbus child device
....@@ -894,12 +984,32 @@
894984 struct hv_device *hv_dev = device_to_hv_device(device);
895985 struct vmbus_channel *channel = hv_dev->channel;
896986
987
+ hv_debug_rm_dev_dir(hv_dev);
988
+
897989 mutex_lock(&vmbus_connection.channel_mutex);
898
- hv_process_channel_removal(channel->offermsg.child_relid);
990
+ hv_process_channel_removal(channel);
899991 mutex_unlock(&vmbus_connection.channel_mutex);
900992 kfree(hv_dev);
901
-
902993 }
994
+
995
+/*
996
+ * Note: we must use the "noirq" ops: see the comment before vmbus_bus_pm.
997
+ *
998
+ * suspend_noirq/resume_noirq are set to NULL to support Suspend-to-Idle: we
999
+ * shouldn't suspend the vmbus devices upon Suspend-to-Idle, otherwise there
1000
+ * is no way to wake up a Generation-2 VM.
1001
+ *
1002
+ * The other 4 ops are for hibernation.
1003
+ */
1004
+
1005
+static const struct dev_pm_ops vmbus_pm = {
1006
+ .suspend_noirq = NULL,
1007
+ .resume_noirq = NULL,
1008
+ .freeze_noirq = vmbus_suspend,
1009
+ .thaw_noirq = vmbus_resume,
1010
+ .poweroff_noirq = vmbus_suspend,
1011
+ .restore_noirq = vmbus_resume,
1012
+};
9031013
9041014 /* The one and only one */
9051015 static struct bus_type hv_bus = {
....@@ -911,11 +1021,15 @@
9111021 .uevent = vmbus_uevent,
9121022 .dev_groups = vmbus_dev_groups,
9131023 .drv_groups = vmbus_drv_groups,
1024
+ .pm = &vmbus_pm,
9141025 };
9151026
9161027 struct onmessage_work_context {
9171028 struct work_struct work;
918
- struct hv_message msg;
1029
+ struct {
1030
+ struct hv_message_header header;
1031
+ u8 payload[];
1032
+ } msg;
9191033 };
9201034
9211035 static void vmbus_onmessage_work(struct work_struct *work)
....@@ -928,19 +1042,9 @@
9281042
9291043 ctx = container_of(work, struct onmessage_work_context,
9301044 work);
931
- vmbus_onmessage(&ctx->msg);
1045
+ vmbus_onmessage((struct vmbus_channel_message_header *)
1046
+ &ctx->msg.payload);
9321047 kfree(ctx);
933
-}
934
-
935
-static void hv_process_timer_expiration(struct hv_message *msg,
936
- struct hv_per_cpu_context *hv_cpu)
937
-{
938
- struct clock_event_device *dev = hv_cpu->clk_evt;
939
-
940
- if (dev->event_handler)
941
- dev->event_handler(dev);
942
-
943
- vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
9441048 }
9451049
9461050 void vmbus_on_msg_dpc(unsigned long data)
....@@ -953,6 +1057,13 @@
9531057 const struct vmbus_channel_message_table_entry *entry;
9541058 struct onmessage_work_context *ctx;
9551059 u32 message_type = msg->header.message_type;
1060
+
1061
+ /*
1062
+ * 'enum vmbus_channel_message_type' is supposed to always be 'u32' as
1063
+ * it is being used in 'struct vmbus_channel_message_header' definition
1064
+ * which is supposed to match hypervisor ABI.
1065
+ */
1066
+ BUILD_BUG_ON(sizeof(enum vmbus_channel_message_type) != sizeof(u32));
9561067
9571068 if (message_type == HVMSG_NONE)
9581069 /* no msg */
....@@ -967,41 +1078,88 @@
9671078 goto msg_handled;
9681079 }
9691080
1081
+ if (msg->header.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) {
1082
+ WARN_ONCE(1, "payload size is too large (%d)\n",
1083
+ msg->header.payload_size);
1084
+ goto msg_handled;
1085
+ }
1086
+
9701087 entry = &channel_message_table[hdr->msgtype];
9711088
9721089 if (!entry->message_handler)
9731090 goto msg_handled;
9741091
1092
+ if (msg->header.payload_size < entry->min_payload_len) {
1093
+ WARN_ONCE(1, "message too short: msgtype=%d len=%d\n",
1094
+ hdr->msgtype, msg->header.payload_size);
1095
+ goto msg_handled;
1096
+ }
1097
+
9751098 if (entry->handler_type == VMHT_BLOCKING) {
976
- ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
1099
+ ctx = kmalloc(sizeof(*ctx) + msg->header.payload_size,
1100
+ GFP_ATOMIC);
9771101 if (ctx == NULL)
9781102 return;
9791103
9801104 INIT_WORK(&ctx->work, vmbus_onmessage_work);
981
- memcpy(&ctx->msg, msg, sizeof(*msg));
1105
+ memcpy(&ctx->msg, msg, sizeof(msg->header) +
1106
+ msg->header.payload_size);
9821107
9831108 /*
9841109 * The host can generate a rescind message while we
9851110 * may still be handling the original offer. We deal with
986
- * this condition by ensuring the processing is done on the
987
- * same CPU.
1111
+ * this condition by relying on the synchronization provided
1112
+ * by offer_in_progress and by channel_mutex. See also the
1113
+ * inline comments in vmbus_onoffer_rescind().
9881114 */
9891115 switch (hdr->msgtype) {
9901116 case CHANNELMSG_RESCIND_CHANNELOFFER:
9911117 /*
9921118 * If we are handling the rescind message;
9931119 * schedule the work on the global work queue.
1120
+ *
1121
+ * The OFFER message and the RESCIND message should
1122
+ * not be handled by the same serialized work queue,
1123
+ * because the OFFER handler may call vmbus_open(),
1124
+ * which tries to open the channel by sending an
1125
+ * OPEN_CHANNEL message to the host and waits for
1126
+ * the host's response; however, if the host has
1127
+ * rescinded the channel before it receives the
1128
+ * OPEN_CHANNEL message, the host just silently
1129
+ * ignores the OPEN_CHANNEL message; as a result,
1130
+ * the guest's OFFER handler hangs for ever, if we
1131
+ * handle the RESCIND message in the same serialized
1132
+ * work queue: the RESCIND handler can not start to
1133
+ * run before the OFFER handler finishes.
9941134 */
995
- schedule_work_on(vmbus_connection.connect_cpu,
996
- &ctx->work);
1135
+ schedule_work(&ctx->work);
9971136 break;
9981137
9991138 case CHANNELMSG_OFFERCHANNEL:
1139
+ /*
1140
+ * The host sends the offer message of a given channel
1141
+ * before sending the rescind message of the same
1142
+ * channel. These messages are sent to the guest's
1143
+ * connect CPU; the guest then starts processing them
1144
+ * in the tasklet handler on this CPU:
1145
+ *
1146
+ * VMBUS_CONNECT_CPU
1147
+ *
1148
+ * [vmbus_on_msg_dpc()]
1149
+ * atomic_inc() // CHANNELMSG_OFFERCHANNEL
1150
+ * queue_work()
1151
+ * ...
1152
+ * [vmbus_on_msg_dpc()]
1153
+ * schedule_work() // CHANNELMSG_RESCIND_CHANNELOFFER
1154
+ *
1155
+ * We rely on the memory-ordering properties of the
1156
+ * queue_work() and schedule_work() primitives, which
1157
+ * guarantee that the atomic increment will be visible
1158
+ * to the CPUs which will execute the offer & rescind
1159
+ * works by the time these works will start execution.
1160
+ */
10001161 atomic_inc(&vmbus_connection.offer_in_progress);
1001
- queue_work_on(vmbus_connection.connect_cpu,
1002
- vmbus_connection.work_queue,
1003
- &ctx->work);
1004
- break;
1162
+ fallthrough;
10051163
10061164 default:
10071165 queue_work(vmbus_connection.work_queue, &ctx->work);
....@@ -1013,18 +1171,42 @@
10131171 vmbus_signal_eom(msg, message_type);
10141172 }
10151173
1016
-
1174
+#ifdef CONFIG_PM_SLEEP
10171175 /*
1018
- * Direct callback for channels using other deferred processing
1176
+ * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
1177
+ * hibernation, because hv_sock connections can not persist across hibernation.
10191178 */
1020
-static void vmbus_channel_isr(struct vmbus_channel *channel)
1179
+static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
10211180 {
1022
- void (*callback_fn)(void *);
1181
+ struct onmessage_work_context *ctx;
1182
+ struct vmbus_channel_rescind_offer *rescind;
10231183
1024
- callback_fn = READ_ONCE(channel->onchannel_callback);
1025
- if (likely(callback_fn != NULL))
1026
- (*callback_fn)(channel->channel_callback_context);
1184
+ WARN_ON(!is_hvsock_channel(channel));
1185
+
1186
+ /*
1187
+ * Allocation size is small and the allocation should really not fail,
1188
+ * otherwise the state of the hv_sock connections ends up in limbo.
1189
+ */
1190
+ ctx = kzalloc(sizeof(*ctx) + sizeof(*rescind),
1191
+ GFP_KERNEL | __GFP_NOFAIL);
1192
+
1193
+ /*
1194
+ * So far, these are not really used by Linux. Just set them to the
1195
+ * reasonable values conforming to the definitions of the fields.
1196
+ */
1197
+ ctx->msg.header.message_type = 1;
1198
+ ctx->msg.header.payload_size = sizeof(*rescind);
1199
+
1200
+ /* These values are actually used by Linux. */
1201
+ rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.payload;
1202
+ rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER;
1203
+ rescind->child_relid = channel->offermsg.child_relid;
1204
+
1205
+ INIT_WORK(&ctx->work, vmbus_onmessage_work);
1206
+
1207
+ queue_work(vmbus_connection.work_queue, &ctx->work);
10271208 }
1209
+#endif /* CONFIG_PM_SLEEP */
10281210
10291211 /*
10301212 * Schedule all channels with events pending
....@@ -1056,6 +1238,7 @@
10561238 return;
10571239
10581240 for_each_set_bit(relid, recv_int_page, maxbits) {
1241
+ void (*callback_fn)(void *context);
10591242 struct vmbus_channel *channel;
10601243
10611244 if (!sync_test_and_clear_bit(relid, recv_int_page))
....@@ -1065,33 +1248,54 @@
10651248 if (relid == 0)
10661249 continue;
10671250
1251
+ /*
1252
+ * Pairs with the kfree_rcu() in vmbus_chan_release().
1253
+ * Guarantees that the channel data structure doesn't
1254
+ * get freed while the channel pointer below is being
1255
+ * dereferenced.
1256
+ */
10681257 rcu_read_lock();
10691258
10701259 /* Find channel based on relid */
1071
- list_for_each_entry_rcu(channel, &hv_cpu->chan_list, percpu_list) {
1072
- if (channel->offermsg.child_relid != relid)
1073
- continue;
1260
+ channel = relid2channel(relid);
1261
+ if (channel == NULL)
1262
+ goto sched_unlock_rcu;
10741263
1075
- if (channel->rescind)
1076
- continue;
1264
+ if (channel->rescind)
1265
+ goto sched_unlock_rcu;
10771266
1078
- trace_vmbus_chan_sched(channel);
1267
+ /*
1268
+ * Make sure that the ring buffer data structure doesn't get
1269
+ * freed while we dereference the ring buffer pointer. Test
1270
+ * for the channel's onchannel_callback being NULL within a
1271
+ * sched_lock critical section. See also the inline comments
1272
+ * in vmbus_reset_channel_cb().
1273
+ */
1274
+ spin_lock(&channel->sched_lock);
10791275
1080
- ++channel->interrupts;
1276
+ callback_fn = channel->onchannel_callback;
1277
+ if (unlikely(callback_fn == NULL))
1278
+ goto sched_unlock;
10811279
1082
- switch (channel->callback_mode) {
1083
- case HV_CALL_ISR:
1084
- vmbus_channel_isr(channel);
1085
- break;
1280
+ trace_vmbus_chan_sched(channel);
10861281
1087
- case HV_CALL_BATCHED:
1088
- hv_begin_read(&channel->inbound);
1089
- /* fallthrough */
1090
- case HV_CALL_DIRECT:
1091
- tasklet_schedule(&channel->callback_event);
1092
- }
1282
+ ++channel->interrupts;
1283
+
1284
+ switch (channel->callback_mode) {
1285
+ case HV_CALL_ISR:
1286
+ (*callback_fn)(channel->channel_callback_context);
1287
+ break;
1288
+
1289
+ case HV_CALL_BATCHED:
1290
+ hv_begin_read(&channel->inbound);
1291
+ fallthrough;
1292
+ case HV_CALL_DIRECT:
1293
+ tasklet_schedule(&channel->callback_event);
10931294 }
10941295
1296
+sched_unlock:
1297
+ spin_unlock(&channel->sched_lock);
1298
+sched_unlock_rcu:
10951299 rcu_read_unlock();
10961300 }
10971301 }
....@@ -1103,8 +1307,6 @@
11031307 void *page_addr = hv_cpu->synic_event_page;
11041308 struct hv_message *msg;
11051309 union hv_synic_event_flags *event;
1106
- struct pt_regs *regs = get_irq_regs();
1107
- u64 ip = regs ? instruction_pointer(regs) : 0;
11081310 bool handled = false;
11091311
11101312 if (unlikely(page_addr == NULL))
....@@ -1142,13 +1344,14 @@
11421344
11431345 /* Check if there are actual msgs to be processed */
11441346 if (msg->header.message_type != HVMSG_NONE) {
1145
- if (msg->header.message_type == HVMSG_TIMER_EXPIRED)
1146
- hv_process_timer_expiration(msg, hv_cpu);
1147
- else
1347
+ if (msg->header.message_type == HVMSG_TIMER_EXPIRED) {
1348
+ hv_stimer0_isr();
1349
+ vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
1350
+ } else
11481351 tasklet_schedule(&hv_cpu->msg_dpc);
11491352 }
11501353
1151
- add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0, ip);
1354
+ add_interrupt_randomness(hv_get_vector());
11521355 }
11531356
11541357 /*
....@@ -1156,7 +1359,8 @@
11561359 * buffer and call into Hyper-V to transfer the data.
11571360 */
11581361 static void hv_kmsg_dump(struct kmsg_dumper *dumper,
1159
- enum kmsg_dump_reason reason)
1362
+ enum kmsg_dump_reason reason,
1363
+ struct kmsg_dumper_iter *iter)
11601364 {
11611365 size_t bytes_written;
11621366 phys_addr_t panic_pa;
....@@ -1171,7 +1375,7 @@
11711375 * Write dump contents to the page. No need to synchronize; panic should
11721376 * be single-threaded.
11731377 */
1174
- kmsg_dump_get_buffer(dumper, true, hv_panic_page, PAGE_SIZE,
1378
+ kmsg_dump_get_buffer(iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
11751379 &bytes_written);
11761380 if (bytes_written)
11771381 hyperv_report_panic_msg(panic_pa, bytes_written);
....@@ -1182,8 +1386,6 @@
11821386 };
11831387
11841388 static struct ctl_table_header *hv_ctl_table_hdr;
1185
-static int zero;
1186
-static int one = 1;
11871389
11881390 /*
11891391 * sysctl option to allow the user to control whether kmsg data should be
....@@ -1196,8 +1398,8 @@
11961398 .maxlen = sizeof(int),
11971399 .mode = 0644,
11981400 .proc_handler = proc_dointvec_minmax,
1199
- .extra1 = &zero,
1200
- .extra2 = &one
1401
+ .extra1 = SYSCTL_ZERO,
1402
+ .extra2 = SYSCTL_ONE
12011403 },
12021404 {}
12031405 };
....@@ -1223,7 +1425,6 @@
12231425 {
12241426 int ret;
12251427
1226
- /* Hypervisor initialization...setup hypercall page..etc */
12271428 ret = hv_init();
12281429 if (ret != 0) {
12291430 pr_err("Unable to initialize the hypervisor - 0x%x\n", ret);
....@@ -1234,19 +1435,22 @@
12341435 if (ret)
12351436 return ret;
12361437
1237
- hv_setup_vmbus_irq(vmbus_isr);
1438
+ ret = hv_setup_vmbus_irq(vmbus_irq, vmbus_isr);
1439
+ if (ret)
1440
+ goto err_setup;
12381441
12391442 ret = hv_synic_alloc();
12401443 if (ret)
12411444 goto err_alloc;
1445
+
12421446 /*
1243
- * Initialize the per-cpu interrupt state and
1244
- * connect to the host.
1447
+ * Initialize the per-cpu interrupt state and stimer state.
1448
+ * Then connect to the host.
12451449 */
12461450 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
12471451 hv_synic_init, hv_synic_cleanup);
12481452 if (ret < 0)
1249
- goto err_alloc;
1453
+ goto err_cpuhp;
12501454 hyperv_cpuhp_online = ret;
12511455
12521456 ret = vmbus_connect();
....@@ -1272,13 +1476,13 @@
12721476 */
12731477 hv_get_crash_ctl(hyperv_crash_ctl);
12741478 if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) {
1275
- hv_panic_page = (void *)get_zeroed_page(GFP_KERNEL);
1479
+ hv_panic_page = (void *)hv_alloc_hyperv_zeroed_page();
12761480 if (hv_panic_page) {
12771481 ret = kmsg_dump_register(&hv_kmsg_dumper);
12781482 if (ret) {
12791483 pr_err("Hyper-V: kmsg dump register "
12801484 "error 0x%x\n", ret);
1281
- free_page(
1485
+ hv_free_hyperv_page(
12821486 (unsigned long)hv_panic_page);
12831487 hv_panic_page = NULL;
12841488 }
....@@ -1304,10 +1508,11 @@
13041508
13051509 err_connect:
13061510 cpuhp_remove_state(hyperv_cpuhp_online);
1307
-err_alloc:
1511
+err_cpuhp:
13081512 hv_synic_free();
1513
+err_alloc:
13091514 hv_remove_vmbus_irq();
1310
-
1515
+err_setup:
13111516 bus_unregister(&hv_bus);
13121517 unregister_sysctl_table(hv_ctl_table_hdr);
13131518 hv_ctl_table_hdr = NULL;
....@@ -1382,7 +1587,7 @@
13821587
13831588 struct vmbus_chan_attribute {
13841589 struct attribute attr;
1385
- ssize_t (*show)(const struct vmbus_channel *chan, char *buf);
1590
+ ssize_t (*show)(struct vmbus_channel *chan, char *buf);
13861591 ssize_t (*store)(struct vmbus_channel *chan,
13871592 const char *buf, size_t count);
13881593 };
....@@ -1401,61 +1606,207 @@
14011606 {
14021607 const struct vmbus_chan_attribute *attribute
14031608 = container_of(attr, struct vmbus_chan_attribute, attr);
1404
- const struct vmbus_channel *chan
1609
+ struct vmbus_channel *chan
14051610 = container_of(kobj, struct vmbus_channel, kobj);
14061611
14071612 if (!attribute->show)
14081613 return -EIO;
14091614
1410
- if (chan->state != CHANNEL_OPENED_STATE)
1411
- return -EINVAL;
1412
-
14131615 return attribute->show(chan, buf);
1616
+}
1617
+
1618
+static ssize_t vmbus_chan_attr_store(struct kobject *kobj,
1619
+ struct attribute *attr, const char *buf,
1620
+ size_t count)
1621
+{
1622
+ const struct vmbus_chan_attribute *attribute
1623
+ = container_of(attr, struct vmbus_chan_attribute, attr);
1624
+ struct vmbus_channel *chan
1625
+ = container_of(kobj, struct vmbus_channel, kobj);
1626
+
1627
+ if (!attribute->store)
1628
+ return -EIO;
1629
+
1630
+ return attribute->store(chan, buf, count);
14141631 }
14151632
14161633 static const struct sysfs_ops vmbus_chan_sysfs_ops = {
14171634 .show = vmbus_chan_attr_show,
1635
+ .store = vmbus_chan_attr_store,
14181636 };
14191637
1420
-static ssize_t out_mask_show(const struct vmbus_channel *channel, char *buf)
1638
+static ssize_t out_mask_show(struct vmbus_channel *channel, char *buf)
14211639 {
1422
- const struct hv_ring_buffer_info *rbi = &channel->outbound;
1640
+ struct hv_ring_buffer_info *rbi = &channel->outbound;
1641
+ ssize_t ret;
14231642
1424
- return sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask);
1643
+ mutex_lock(&rbi->ring_buffer_mutex);
1644
+ if (!rbi->ring_buffer) {
1645
+ mutex_unlock(&rbi->ring_buffer_mutex);
1646
+ return -EINVAL;
1647
+ }
1648
+
1649
+ ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask);
1650
+ mutex_unlock(&rbi->ring_buffer_mutex);
1651
+ return ret;
14251652 }
14261653 static VMBUS_CHAN_ATTR_RO(out_mask);
14271654
1428
-static ssize_t in_mask_show(const struct vmbus_channel *channel, char *buf)
1655
+static ssize_t in_mask_show(struct vmbus_channel *channel, char *buf)
14291656 {
1430
- const struct hv_ring_buffer_info *rbi = &channel->inbound;
1657
+ struct hv_ring_buffer_info *rbi = &channel->inbound;
1658
+ ssize_t ret;
14311659
1432
- return sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask);
1660
+ mutex_lock(&rbi->ring_buffer_mutex);
1661
+ if (!rbi->ring_buffer) {
1662
+ mutex_unlock(&rbi->ring_buffer_mutex);
1663
+ return -EINVAL;
1664
+ }
1665
+
1666
+ ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask);
1667
+ mutex_unlock(&rbi->ring_buffer_mutex);
1668
+ return ret;
14331669 }
14341670 static VMBUS_CHAN_ATTR_RO(in_mask);
14351671
1436
-static ssize_t read_avail_show(const struct vmbus_channel *channel, char *buf)
1672
+static ssize_t read_avail_show(struct vmbus_channel *channel, char *buf)
14371673 {
1438
- const struct hv_ring_buffer_info *rbi = &channel->inbound;
1674
+ struct hv_ring_buffer_info *rbi = &channel->inbound;
1675
+ ssize_t ret;
14391676
1440
- return sprintf(buf, "%u\n", hv_get_bytes_to_read(rbi));
1677
+ mutex_lock(&rbi->ring_buffer_mutex);
1678
+ if (!rbi->ring_buffer) {
1679
+ mutex_unlock(&rbi->ring_buffer_mutex);
1680
+ return -EINVAL;
1681
+ }
1682
+
1683
+ ret = sprintf(buf, "%u\n", hv_get_bytes_to_read(rbi));
1684
+ mutex_unlock(&rbi->ring_buffer_mutex);
1685
+ return ret;
14411686 }
14421687 static VMBUS_CHAN_ATTR_RO(read_avail);
14431688
1444
-static ssize_t write_avail_show(const struct vmbus_channel *channel, char *buf)
1689
+static ssize_t write_avail_show(struct vmbus_channel *channel, char *buf)
14451690 {
1446
- const struct hv_ring_buffer_info *rbi = &channel->outbound;
1691
+ struct hv_ring_buffer_info *rbi = &channel->outbound;
1692
+ ssize_t ret;
14471693
1448
- return sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi));
1694
+ mutex_lock(&rbi->ring_buffer_mutex);
1695
+ if (!rbi->ring_buffer) {
1696
+ mutex_unlock(&rbi->ring_buffer_mutex);
1697
+ return -EINVAL;
1698
+ }
1699
+
1700
+ ret = sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi));
1701
+ mutex_unlock(&rbi->ring_buffer_mutex);
1702
+ return ret;
14491703 }
14501704 static VMBUS_CHAN_ATTR_RO(write_avail);
14511705
1452
-static ssize_t show_target_cpu(const struct vmbus_channel *channel, char *buf)
1706
+static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf)
14531707 {
14541708 return sprintf(buf, "%u\n", channel->target_cpu);
14551709 }
1456
-static VMBUS_CHAN_ATTR(cpu, S_IRUGO, show_target_cpu, NULL);
1710
+static ssize_t target_cpu_store(struct vmbus_channel *channel,
1711
+ const char *buf, size_t count)
1712
+{
1713
+ u32 target_cpu, origin_cpu;
1714
+ ssize_t ret = count;
14571715
1458
-static ssize_t channel_pending_show(const struct vmbus_channel *channel,
1716
+ if (vmbus_proto_version < VERSION_WIN10_V4_1)
1717
+ return -EIO;
1718
+
1719
+ if (sscanf(buf, "%uu", &target_cpu) != 1)
1720
+ return -EIO;
1721
+
1722
+ /* Validate target_cpu for the cpumask_test_cpu() operation below. */
1723
+ if (target_cpu >= nr_cpumask_bits)
1724
+ return -EINVAL;
1725
+
1726
+ /* No CPUs should come up or down during this. */
1727
+ cpus_read_lock();
1728
+
1729
+ if (!cpu_online(target_cpu)) {
1730
+ cpus_read_unlock();
1731
+ return -EINVAL;
1732
+ }
1733
+
1734
+ /*
1735
+ * Synchronizes target_cpu_store() and channel closure:
1736
+ *
1737
+ * { Initially: state = CHANNEL_OPENED }
1738
+ *
1739
+ * CPU1 CPU2
1740
+ *
1741
+ * [target_cpu_store()] [vmbus_disconnect_ring()]
1742
+ *
1743
+ * LOCK channel_mutex LOCK channel_mutex
1744
+ * LOAD r1 = state LOAD r2 = state
1745
+ * IF (r1 == CHANNEL_OPENED) IF (r2 == CHANNEL_OPENED)
1746
+ * SEND MODIFYCHANNEL STORE state = CHANNEL_OPEN
1747
+ * [...] SEND CLOSECHANNEL
1748
+ * UNLOCK channel_mutex UNLOCK channel_mutex
1749
+ *
1750
+ * Forbids: r1 == r2 == CHANNEL_OPENED (i.e., CPU1's LOCK precedes
1751
+ * CPU2's LOCK) && CPU2's SEND precedes CPU1's SEND
1752
+ *
1753
+ * Note. The host processes the channel messages "sequentially", in
1754
+ * the order in which they are received on a per-partition basis.
1755
+ */
1756
+ mutex_lock(&vmbus_connection.channel_mutex);
1757
+
1758
+ /*
1759
+ * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels;
1760
+ * avoid sending the message and fail here for such channels.
1761
+ */
1762
+ if (channel->state != CHANNEL_OPENED_STATE) {
1763
+ ret = -EIO;
1764
+ goto cpu_store_unlock;
1765
+ }
1766
+
1767
+ origin_cpu = channel->target_cpu;
1768
+ if (target_cpu == origin_cpu)
1769
+ goto cpu_store_unlock;
1770
+
1771
+ if (vmbus_send_modifychannel(channel->offermsg.child_relid,
1772
+ hv_cpu_number_to_vp_number(target_cpu))) {
1773
+ ret = -EIO;
1774
+ goto cpu_store_unlock;
1775
+ }
1776
+
1777
+ /*
1778
+ * Warning. At this point, there is *no* guarantee that the host will
1779
+ * have successfully processed the vmbus_send_modifychannel() request.
1780
+ * See the header comment of vmbus_send_modifychannel() for more info.
1781
+ *
1782
+ * Lags in the processing of the above vmbus_send_modifychannel() can
1783
+ * result in missed interrupts if the "old" target CPU is taken offline
1784
+ * before Hyper-V starts sending interrupts to the "new" target CPU.
1785
+ * But apart from this offlining scenario, the code tolerates such
1786
+ * lags. It will function correctly even if a channel interrupt comes
1787
+ * in on a CPU that is different from the channel target_cpu value.
1788
+ */
1789
+
1790
+ channel->target_cpu = target_cpu;
1791
+
1792
+ /* See init_vp_index(). */
1793
+ if (hv_is_perf_channel(channel))
1794
+ hv_update_alloced_cpus(origin_cpu, target_cpu);
1795
+
1796
+ /* Currently set only for storvsc channels. */
1797
+ if (channel->change_target_cpu_callback) {
1798
+ (*channel->change_target_cpu_callback)(channel,
1799
+ origin_cpu, target_cpu);
1800
+ }
1801
+
1802
+cpu_store_unlock:
1803
+ mutex_unlock(&vmbus_connection.channel_mutex);
1804
+ cpus_read_unlock();
1805
+ return ret;
1806
+}
1807
+static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store);
1808
+
1809
+static ssize_t channel_pending_show(struct vmbus_channel *channel,
14591810 char *buf)
14601811 {
14611812 return sprintf(buf, "%d\n",
....@@ -1464,7 +1815,7 @@
14641815 }
14651816 static VMBUS_CHAN_ATTR(pending, S_IRUGO, channel_pending_show, NULL);
14661817
1467
-static ssize_t channel_latency_show(const struct vmbus_channel *channel,
1818
+static ssize_t channel_latency_show(struct vmbus_channel *channel,
14681819 char *buf)
14691820 {
14701821 return sprintf(buf, "%d\n",
....@@ -1473,26 +1824,58 @@
14731824 }
14741825 static VMBUS_CHAN_ATTR(latency, S_IRUGO, channel_latency_show, NULL);
14751826
1476
-static ssize_t channel_interrupts_show(const struct vmbus_channel *channel, char *buf)
1827
+static ssize_t channel_interrupts_show(struct vmbus_channel *channel, char *buf)
14771828 {
14781829 return sprintf(buf, "%llu\n", channel->interrupts);
14791830 }
14801831 static VMBUS_CHAN_ATTR(interrupts, S_IRUGO, channel_interrupts_show, NULL);
14811832
1482
-static ssize_t channel_events_show(const struct vmbus_channel *channel, char *buf)
1833
+static ssize_t channel_events_show(struct vmbus_channel *channel, char *buf)
14831834 {
14841835 return sprintf(buf, "%llu\n", channel->sig_events);
14851836 }
14861837 static VMBUS_CHAN_ATTR(events, S_IRUGO, channel_events_show, NULL);
14871838
1488
-static ssize_t subchannel_monitor_id_show(const struct vmbus_channel *channel,
1839
+static ssize_t channel_intr_in_full_show(struct vmbus_channel *channel,
1840
+ char *buf)
1841
+{
1842
+ return sprintf(buf, "%llu\n",
1843
+ (unsigned long long)channel->intr_in_full);
1844
+}
1845
+static VMBUS_CHAN_ATTR(intr_in_full, 0444, channel_intr_in_full_show, NULL);
1846
+
1847
+static ssize_t channel_intr_out_empty_show(struct vmbus_channel *channel,
1848
+ char *buf)
1849
+{
1850
+ return sprintf(buf, "%llu\n",
1851
+ (unsigned long long)channel->intr_out_empty);
1852
+}
1853
+static VMBUS_CHAN_ATTR(intr_out_empty, 0444, channel_intr_out_empty_show, NULL);
1854
+
1855
+static ssize_t channel_out_full_first_show(struct vmbus_channel *channel,
1856
+ char *buf)
1857
+{
1858
+ return sprintf(buf, "%llu\n",
1859
+ (unsigned long long)channel->out_full_first);
1860
+}
1861
+static VMBUS_CHAN_ATTR(out_full_first, 0444, channel_out_full_first_show, NULL);
1862
+
1863
+static ssize_t channel_out_full_total_show(struct vmbus_channel *channel,
1864
+ char *buf)
1865
+{
1866
+ return sprintf(buf, "%llu\n",
1867
+ (unsigned long long)channel->out_full_total);
1868
+}
1869
+static VMBUS_CHAN_ATTR(out_full_total, 0444, channel_out_full_total_show, NULL);
1870
+
1871
+static ssize_t subchannel_monitor_id_show(struct vmbus_channel *channel,
14891872 char *buf)
14901873 {
14911874 return sprintf(buf, "%u\n", channel->offermsg.monitorid);
14921875 }
14931876 static VMBUS_CHAN_ATTR(monitor_id, S_IRUGO, subchannel_monitor_id_show, NULL);
14941877
1495
-static ssize_t subchannel_id_show(const struct vmbus_channel *channel,
1878
+static ssize_t subchannel_id_show(struct vmbus_channel *channel,
14961879 char *buf)
14971880 {
14981881 return sprintf(buf, "%u\n",
....@@ -1510,6 +1893,10 @@
15101893 &chan_attr_latency.attr,
15111894 &chan_attr_interrupts.attr,
15121895 &chan_attr_events.attr,
1896
+ &chan_attr_intr_in_full.attr,
1897
+ &chan_attr_intr_out_empty.attr,
1898
+ &chan_attr_out_full_first.attr,
1899
+ &chan_attr_out_full_total.attr,
15131900 &chan_attr_monitor_id.attr,
15141901 &chan_attr_subchannel_id.attr,
15151902 NULL
....@@ -1592,8 +1979,8 @@
15921979 * vmbus_device_create - Creates and registers a new child device
15931980 * on the vmbus.
15941981 */
1595
-struct hv_device *vmbus_device_create(const uuid_le *type,
1596
- const uuid_le *instance,
1982
+struct hv_device *vmbus_device_create(const guid_t *type,
1983
+ const guid_t *instance,
15971984 struct vmbus_channel *channel)
15981985 {
15991986 struct hv_device *child_device_obj;
....@@ -1605,11 +1992,9 @@
16051992 }
16061993
16071994 child_device_obj->channel = channel;
1608
- memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le));
1609
- memcpy(&child_device_obj->dev_instance, instance,
1610
- sizeof(uuid_le));
1995
+ guid_copy(&child_device_obj->dev_type, type);
1996
+ guid_copy(&child_device_obj->dev_instance, instance);
16111997 child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */
1612
-
16131998
16141999 return child_device_obj;
16152000 }
....@@ -1623,7 +2008,7 @@
16232008 int ret;
16242009
16252010 dev_set_name(&child_device_obj->device, "%pUl",
1626
- child_device_obj->channel->offermsg.offer.if_instance.b);
2011
+ &child_device_obj->channel->offermsg.offer.if_instance);
16272012
16282013 child_device_obj->device.bus = &hv_bus;
16292014 child_device_obj->device.parent = &hv_acpi_dev->dev;
....@@ -1636,6 +2021,7 @@
16362021 ret = device_register(&child_device_obj->device);
16372022 if (ret) {
16382023 pr_err("Unable to register child device\n");
2024
+ put_device(&child_device_obj->device);
16392025 return ret;
16402026 }
16412027
....@@ -1652,6 +2038,7 @@
16522038 pr_err("Unable to register primary channeln");
16532039 goto err_kset_unregister;
16542040 }
2041
+ hv_debug_add_dev_dir(child_device_obj);
16552042
16562043 return 0;
16572044
....@@ -1694,6 +2081,7 @@
16942081 struct resource *new_res;
16952082 struct resource **old_res = &hyperv_mmio;
16962083 struct resource **prev_res = NULL;
2084
+ struct resource r;
16972085
16982086 switch (res->type) {
16992087
....@@ -1711,6 +2099,23 @@
17112099 start = res->data.address64.address.minimum;
17122100 end = res->data.address64.address.maximum;
17132101 break;
2102
+
2103
+ /*
2104
+ * The IRQ information is needed only on ARM64, which Hyper-V
2105
+ * sets up in the extended format. IRQ information is present
2106
+ * on x86/x64 in the non-extended format but it is not used by
2107
+ * Linux. So don't bother checking for the non-extended format.
2108
+ */
2109
+ case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
2110
+ if (!acpi_dev_resource_interrupt(res, 0, &r)) {
2111
+ pr_err("Unable to parse Hyper-V ACPI interrupt\n");
2112
+ return AE_ERROR;
2113
+ }
2114
+ /* ARM64 INTID for VMbus */
2115
+ vmbus_interrupt = res->data.extended_irq.interrupts[0];
2116
+ /* Linux IRQ number */
2117
+ vmbus_irq = r.start;
2118
+ return AE_OK;
17142119
17152120 default:
17162121 /* Unused resource type */
....@@ -1848,12 +2253,12 @@
18482253 bool fb_overlap_ok)
18492254 {
18502255 struct resource *iter, *shadow;
1851
- resource_size_t range_min, range_max, start;
2256
+ resource_size_t range_min, range_max, start, end;
18522257 const char *dev_n = dev_name(&device_obj->device);
18532258 int retval;
18542259
18552260 retval = -ENXIO;
1856
- down(&hyperv_mmio_lock);
2261
+ mutex_lock(&hyperv_mmio_lock);
18572262
18582263 /*
18592264 * If overlaps with frame buffers are allowed, then first attempt to
....@@ -1883,6 +2288,14 @@
18832288 range_max = iter->end;
18842289 start = (range_min + align - 1) & ~(align - 1);
18852290 for (; start + size - 1 <= range_max; start += align) {
2291
+ end = start + size - 1;
2292
+
2293
+ /* Skip the whole fb_mmio region if not fb_overlap_ok */
2294
+ if (!fb_overlap_ok && fb_mmio &&
2295
+ (((start >= fb_mmio->start) && (start <= fb_mmio->end)) ||
2296
+ ((end >= fb_mmio->start) && (end <= fb_mmio->end))))
2297
+ continue;
2298
+
18862299 shadow = __request_region(iter, start, size, NULL,
18872300 IORESOURCE_BUSY);
18882301 if (!shadow)
....@@ -1900,7 +2313,7 @@
19002313 }
19012314
19022315 exit:
1903
- up(&hyperv_mmio_lock);
2316
+ mutex_unlock(&hyperv_mmio_lock);
19042317 return retval;
19052318 }
19062319 EXPORT_SYMBOL_GPL(vmbus_allocate_mmio);
....@@ -1917,7 +2330,7 @@
19172330 {
19182331 struct resource *iter;
19192332
1920
- down(&hyperv_mmio_lock);
2333
+ mutex_lock(&hyperv_mmio_lock);
19212334 for (iter = hyperv_mmio; iter; iter = iter->sibling) {
19222335 if ((iter->start >= start + size) || (iter->end <= start))
19232336 continue;
....@@ -1925,7 +2338,7 @@
19252338 __release_region(iter, start, size);
19262339 }
19272340 release_mem_region(start, size);
1928
- up(&hyperv_mmio_lock);
2341
+ mutex_unlock(&hyperv_mmio_lock);
19292342
19302343 }
19312344 EXPORT_SYMBOL_GPL(vmbus_free_mmio);
....@@ -1967,12 +2380,163 @@
19672380 return ret_val;
19682381 }
19692382
2383
+#ifdef CONFIG_PM_SLEEP
2384
+static int vmbus_bus_suspend(struct device *dev)
2385
+{
2386
+ struct vmbus_channel *channel, *sc;
2387
+
2388
+ while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
2389
+ /*
2390
+ * We wait here until the completion of any channel
2391
+ * offers that are currently in progress.
2392
+ */
2393
+ msleep(1);
2394
+ }
2395
+
2396
+ mutex_lock(&vmbus_connection.channel_mutex);
2397
+ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
2398
+ if (!is_hvsock_channel(channel))
2399
+ continue;
2400
+
2401
+ vmbus_force_channel_rescinded(channel);
2402
+ }
2403
+ mutex_unlock(&vmbus_connection.channel_mutex);
2404
+
2405
+ /*
2406
+ * Wait until all the sub-channels and hv_sock channels have been
2407
+ * cleaned up. Sub-channels should be destroyed upon suspend, otherwise
2408
+ * they would conflict with the new sub-channels that will be created
2409
+ * in the resume path. hv_sock channels should also be destroyed, but
2410
+ * a hv_sock channel of an established hv_sock connection can not be
2411
+ * really destroyed since it may still be referenced by the userspace
2412
+ * application, so we just force the hv_sock channel to be rescinded
2413
+ * by vmbus_force_channel_rescinded(), and the userspace application
2414
+ * will thoroughly destroy the channel after hibernation.
2415
+ *
2416
+ * Note: the counter nr_chan_close_on_suspend may never go above 0 if
2417
+ * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM.
2418
+ */
2419
+ if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
2420
+ wait_for_completion(&vmbus_connection.ready_for_suspend_event);
2421
+
2422
+ if (atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0) {
2423
+ pr_err("Can not suspend due to a previous failed resuming\n");
2424
+ return -EBUSY;
2425
+ }
2426
+
2427
+ mutex_lock(&vmbus_connection.channel_mutex);
2428
+
2429
+ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
2430
+ /*
2431
+ * Remove the channel from the array of channels and invalidate
2432
+ * the channel's relid. Upon resume, vmbus_onoffer() will fix
2433
+ * up the relid (and other fields, if necessary) and add the
2434
+ * channel back to the array.
2435
+ */
2436
+ vmbus_channel_unmap_relid(channel);
2437
+ channel->offermsg.child_relid = INVALID_RELID;
2438
+
2439
+ if (is_hvsock_channel(channel)) {
2440
+ if (!channel->rescind) {
2441
+ pr_err("hv_sock channel not rescinded!\n");
2442
+ WARN_ON_ONCE(1);
2443
+ }
2444
+ continue;
2445
+ }
2446
+
2447
+ list_for_each_entry(sc, &channel->sc_list, sc_list) {
2448
+ pr_err("Sub-channel not deleted!\n");
2449
+ WARN_ON_ONCE(1);
2450
+ }
2451
+
2452
+ atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume);
2453
+ }
2454
+
2455
+ mutex_unlock(&vmbus_connection.channel_mutex);
2456
+
2457
+ vmbus_initiate_unload(false);
2458
+
2459
+ /* Reset the event for the next resume. */
2460
+ reinit_completion(&vmbus_connection.ready_for_resume_event);
2461
+
2462
+ return 0;
2463
+}
2464
+
2465
+static int vmbus_bus_resume(struct device *dev)
2466
+{
2467
+ struct vmbus_channel_msginfo *msginfo;
2468
+ size_t msgsize;
2469
+ int ret;
2470
+
2471
+ /*
2472
+ * We only use the 'vmbus_proto_version', which was in use before
2473
+ * hibernation, to re-negotiate with the host.
2474
+ */
2475
+ if (!vmbus_proto_version) {
2476
+ pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version);
2477
+ return -EINVAL;
2478
+ }
2479
+
2480
+ msgsize = sizeof(*msginfo) +
2481
+ sizeof(struct vmbus_channel_initiate_contact);
2482
+
2483
+ msginfo = kzalloc(msgsize, GFP_KERNEL);
2484
+
2485
+ if (msginfo == NULL)
2486
+ return -ENOMEM;
2487
+
2488
+ ret = vmbus_negotiate_version(msginfo, vmbus_proto_version);
2489
+
2490
+ kfree(msginfo);
2491
+
2492
+ if (ret != 0)
2493
+ return ret;
2494
+
2495
+ WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0);
2496
+
2497
+ vmbus_request_offers();
2498
+
2499
+ if (wait_for_completion_timeout(
2500
+ &vmbus_connection.ready_for_resume_event, 10 * HZ) == 0)
2501
+ pr_err("Some vmbus device is missing after suspending?\n");
2502
+
2503
+ /* Reset the event for the next suspend. */
2504
+ reinit_completion(&vmbus_connection.ready_for_suspend_event);
2505
+
2506
+ return 0;
2507
+}
2508
+#else
2509
+#define vmbus_bus_suspend NULL
2510
+#define vmbus_bus_resume NULL
2511
+#endif /* CONFIG_PM_SLEEP */
2512
+
19702513 static const struct acpi_device_id vmbus_acpi_device_ids[] = {
19712514 {"VMBUS", 0},
19722515 {"VMBus", 0},
19732516 {"", 0},
19742517 };
19752518 MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);
2519
+
2520
+/*
2521
+ * Note: we must use the "no_irq" ops, otherwise hibernation can not work with
2522
+ * PCI device assignment, because "pci_dev_pm_ops" uses the "noirq" ops: in
2523
+ * the resume path, the pci "noirq" restore op runs before "non-noirq" op (see
2524
+ * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() ->
2525
+ * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's
2526
+ * resume callback must also run via the "noirq" ops.
2527
+ *
2528
+ * Set suspend_noirq/resume_noirq to NULL for Suspend-to-Idle: see the comment
2529
+ * earlier in this file before vmbus_pm.
2530
+ */
2531
+
2532
+static const struct dev_pm_ops vmbus_bus_pm = {
2533
+ .suspend_noirq = NULL,
2534
+ .resume_noirq = NULL,
2535
+ .freeze_noirq = vmbus_bus_suspend,
2536
+ .thaw_noirq = vmbus_bus_resume,
2537
+ .poweroff_noirq = vmbus_bus_suspend,
2538
+ .restore_noirq = vmbus_bus_resume
2539
+};
19762540
19772541 static struct acpi_driver vmbus_acpi_driver = {
19782542 .name = "vmbus",
....@@ -1981,28 +2545,75 @@
19812545 .add = vmbus_acpi_add,
19822546 .remove = vmbus_acpi_remove,
19832547 },
2548
+ .drv.pm = &vmbus_bus_pm,
19842549 };
19852550
19862551 static void hv_kexec_handler(void)
19872552 {
1988
- hv_synic_clockevents_cleanup();
2553
+ hv_stimer_global_cleanup();
19892554 vmbus_initiate_unload(false);
19902555 /* Make sure conn_state is set as hv_synic_cleanup checks for it */
19912556 mb();
19922557 cpuhp_remove_state(hyperv_cpuhp_online);
1993
- hyperv_cleanup();
19942558 };
19952559
19962560 static void hv_crash_handler(struct pt_regs *regs)
19972561 {
2562
+ int cpu;
2563
+
19982564 vmbus_initiate_unload(true);
19992565 /*
20002566 * In crash handler we can't schedule synic cleanup for all CPUs,
20012567 * doing the cleanup for current CPU only. This should be sufficient
20022568 * for kdump.
20032569 */
2004
- hv_synic_cleanup(smp_processor_id());
2005
- hyperv_cleanup();
2570
+ cpu = smp_processor_id();
2571
+ hv_stimer_cleanup(cpu);
2572
+ hv_synic_disable_regs(cpu);
2573
+};
2574
+
2575
+static int hv_synic_suspend(void)
2576
+{
2577
+ /*
2578
+ * When we reach here, all the non-boot CPUs have been offlined.
2579
+ * If we're in a legacy configuration where stimer Direct Mode is
2580
+ * not enabled, the stimers on the non-boot CPUs have been unbound
2581
+ * in hv_synic_cleanup() -> hv_stimer_legacy_cleanup() ->
2582
+ * hv_stimer_cleanup() -> clockevents_unbind_device().
2583
+ *
2584
+ * hv_synic_suspend() only runs on CPU0 with interrupts disabled.
2585
+ * Here we do not call hv_stimer_legacy_cleanup() on CPU0 because:
2586
+ * 1) it's unnecessary as interrupts remain disabled between
2587
+ * syscore_suspend() and syscore_resume(): see create_image() and
2588
+ * resume_target_kernel()
2589
+ * 2) the stimer on CPU0 is automatically disabled later by
2590
+ * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ...
2591
+ * -> clockevents_shutdown() -> ... -> hv_ce_shutdown()
2592
+ * 3) a warning would be triggered if we call
2593
+ * clockevents_unbind_device(), which may sleep, in an
2594
+ * interrupts-disabled context.
2595
+ */
2596
+
2597
+ hv_synic_disable_regs(0);
2598
+
2599
+ return 0;
2600
+}
2601
+
2602
+static void hv_synic_resume(void)
2603
+{
2604
+ hv_synic_enable_regs(0);
2605
+
2606
+ /*
2607
+ * Note: we don't need to call hv_stimer_init(0), because the timer
2608
+ * on CPU0 is not unbound in hv_synic_suspend(), and the timer is
2609
+ * automatically re-enabled in timekeeping_resume().
2610
+ */
2611
+}
2612
+
2613
+/* The callbacks run only on CPU0, with irqs_disabled. */
2614
+static struct syscore_ops hv_synic_syscore_ops = {
2615
+ .suspend = hv_synic_suspend,
2616
+ .resume = hv_synic_resume,
20062617 };
20072618
20082619 static int __init hv_acpi_init(void)
....@@ -2027,6 +2638,7 @@
20272638 ret = -ETIMEDOUT;
20282639 goto cleanup;
20292640 }
2641
+ hv_debug_init();
20302642
20312643 ret = vmbus_bus_init();
20322644 if (ret)
....@@ -2034,6 +2646,8 @@
20342646
20352647 hv_setup_kexec_handler(hv_kexec_handler);
20362648 hv_setup_crash_handler(hv_crash_handler);
2649
+
2650
+ register_syscore_ops(&hv_synic_syscore_ops);
20372651
20382652 return 0;
20392653
....@@ -2047,10 +2661,12 @@
20472661 {
20482662 int cpu;
20492663
2664
+ unregister_syscore_ops(&hv_synic_syscore_ops);
2665
+
20502666 hv_remove_kexec_handler();
20512667 hv_remove_crash_handler();
20522668 vmbus_connection.conn_state = DISCONNECTED;
2053
- hv_synic_clockevents_cleanup();
2669
+ hv_stimer_global_cleanup();
20542670 vmbus_disconnect();
20552671 hv_remove_vmbus_irq();
20562672 for_each_online_cpu(cpu) {
....@@ -2059,14 +2675,22 @@
20592675
20602676 tasklet_kill(&hv_cpu->msg_dpc);
20612677 }
2678
+ hv_debug_rm_all_dir();
2679
+
20622680 vmbus_free_channels();
2681
+ kfree(vmbus_connection.channels);
20632682
20642683 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
20652684 kmsg_dump_unregister(&hv_kmsg_dumper);
20662685 unregister_die_notifier(&hyperv_die_block);
2067
- atomic_notifier_chain_unregister(&panic_notifier_list,
2068
- &hyperv_panic_block);
20692686 }
2687
+
2688
+ /*
2689
+ * The panic notifier is always registered, hence we should
2690
+ * also unconditionally unregister it here as well.
2691
+ */
2692
+ atomic_notifier_chain_unregister(&panic_notifier_list,
2693
+ &hyperv_panic_block);
20702694
20712695 free_page((unsigned long)hv_panic_page);
20722696 unregister_sysctl_table(hv_ctl_table_hdr);
....@@ -2080,6 +2704,7 @@
20802704
20812705
20822706 MODULE_LICENSE("GPL");
2707
+MODULE_DESCRIPTION("Microsoft Hyper-V VMBus Driver");
20832708
20842709 subsys_initcall(hv_acpi_init);
20852710 module_exit(vmbus_exit);