forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-02-20 ea08eeccae9297f7aabd2ef7f0c2517ac4549acc
kernel/drivers/hv/vmbus_drv.c
....@@ -1,24 +1,11 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (c) 2009, Microsoft Corporation.
3
- *
4
- * This program is free software; you can redistribute it and/or modify it
5
- * under the terms and conditions of the GNU General Public License,
6
- * version 2, as published by the Free Software Foundation.
7
- *
8
- * This program is distributed in the hope it will be useful, but WITHOUT
9
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11
- * more details.
12
- *
13
- * You should have received a copy of the GNU General Public License along with
14
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15
- * Place - Suite 330, Boston, MA 02111-1307 USA.
164 *
175 * Authors:
186 * Haiyang Zhang <haiyangz@microsoft.com>
197 * Hank Janssen <hjanssen@microsoft.com>
208 * K. Y. Srinivasan <kys@microsoft.com>
21
- *
229 */
2310 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2411
....@@ -36,7 +23,7 @@
3623 #include <linux/cpu.h>
3724 #include <linux/sched/task_stack.h>
3825
39
-#include <asm/mshyperv.h>
26
+#include <linux/delay.h>
4027 #include <linux/notifier.h>
4128 #include <linux/ptrace.h>
4229 #include <linux/screen_info.h>
....@@ -44,6 +31,8 @@
4431 #include <linux/efi.h>
4532 #include <linux/random.h>
4633 #include <linux/kernel.h>
34
+#include <linux/syscore_ops.h>
35
+#include <clocksource/hyperv_timer.h>
4736 #include "hyperv_vmbus.h"
4837
4938 struct vmbus_dynid {
....@@ -58,6 +47,10 @@
5847 static int hyperv_cpuhp_online;
5948
6049 static void *hv_panic_page;
50
+
51
+/* Values parsed from ACPI DSDT */
52
+static int vmbus_irq;
53
+int vmbus_interrupt;
6154
6255 /*
6356 * Boolean to control whether to report panic messages over Hyper-V.
....@@ -94,8 +87,12 @@
9487 static int hyperv_die_event(struct notifier_block *nb, unsigned long val,
9588 void *args)
9689 {
97
- struct die_args *die = (struct die_args *)args;
90
+ struct die_args *die = args;
9891 struct pt_regs *regs = die->regs;
92
+
93
+ /* Don't notify Hyper-V if the die event is other than oops */
94
+ if (val != DIE_OOPS)
95
+ return NOTIFY_DONE;
9996
10097 /*
10198 * Hyper-V should be notified only once about a panic. If we will be
....@@ -117,7 +114,7 @@
117114 static const char *fb_mmio_name = "fb_range";
118115 static struct resource *fb_mmio;
119116 static struct resource *hyperv_mmio;
120
-static DEFINE_SEMAPHORE(hyperv_mmio_lock);
117
+static DEFINE_MUTEX(hyperv_mmio_lock);
121118
122119 static int vmbus_exists(void)
123120 {
....@@ -125,14 +122,6 @@
125122 return -ENODEV;
126123
127124 return 0;
128
-}
129
-
130
-#define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2)
131
-static void print_alias_name(struct hv_device *hv_dev, char *alias_name)
132
-{
133
- int i;
134
- for (i = 0; i < VMBUS_ALIAS_LEN; i += 2)
135
- sprintf(&alias_name[i], "%02x", hv_dev->dev_type.b[i/2]);
136125 }
137126
138127 static u8 channel_monitor_group(const struct vmbus_channel *channel)
....@@ -211,7 +200,7 @@
211200 if (!hv_dev->channel)
212201 return -ENODEV;
213202 return sprintf(buf, "{%pUl}\n",
214
- hv_dev->channel->offermsg.offer.if_type.b);
203
+ &hv_dev->channel->offermsg.offer.if_type);
215204 }
216205 static DEVICE_ATTR_RO(class_id);
217206
....@@ -223,7 +212,7 @@
223212 if (!hv_dev->channel)
224213 return -ENODEV;
225214 return sprintf(buf, "{%pUl}\n",
226
- hv_dev->channel->offermsg.offer.if_instance.b);
215
+ &hv_dev->channel->offermsg.offer.if_instance);
227216 }
228217 static DEVICE_ATTR_RO(device_id);
229218
....@@ -231,10 +220,8 @@
231220 struct device_attribute *dev_attr, char *buf)
232221 {
233222 struct hv_device *hv_dev = device_to_hv_device(dev);
234
- char alias_name[VMBUS_ALIAS_LEN + 1];
235223
236
- print_alias_name(hv_dev, alias_name);
237
- return sprintf(buf, "vmbus:%s\n", alias_name);
224
+ return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
238225 }
239226 static DEVICE_ATTR_RO(modalias);
240227
....@@ -247,7 +234,7 @@
247234 if (!hv_dev->channel)
248235 return -ENODEV;
249236
250
- return sprintf(buf, "%d\n", hv_dev->channel->numa_node);
237
+ return sprintf(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu));
251238 }
252239 static DEVICE_ATTR_RO(numa_node);
253240 #endif
....@@ -262,7 +249,7 @@
262249 return -ENODEV;
263250 return sprintf(buf, "%d\n",
264251 channel_pending(hv_dev->channel,
265
- vmbus_connection.monitor_pages[1]));
252
+ vmbus_connection.monitor_pages[0]));
266253 }
267254 static DEVICE_ATTR_RO(server_monitor_pending);
268255
....@@ -528,17 +515,16 @@
528515 {
529516 struct hv_device *hv_dev = device_to_hv_device(dev);
530517 struct vmbus_channel *channel = hv_dev->channel, *cur_sc;
531
- unsigned long flags;
532518 int buf_size = PAGE_SIZE, n_written, tot_written;
533519 struct list_head *cur;
534520
535521 if (!channel)
536522 return -ENODEV;
537523
524
+ mutex_lock(&vmbus_connection.channel_mutex);
525
+
538526 tot_written = snprintf(buf, buf_size, "%u:%u\n",
539527 channel->offermsg.child_relid, channel->target_cpu);
540
-
541
- spin_lock_irqsave(&channel->lock, flags);
542528
543529 list_for_each(cur, &channel->sc_list) {
544530 if (tot_written >= buf_size - 1)
....@@ -553,7 +539,7 @@
553539 tot_written += n_written;
554540 }
555541
556
- spin_unlock_irqrestore(&channel->lock, flags);
542
+ mutex_unlock(&vmbus_connection.channel_mutex);
557543
558544 return tot_written;
559545 }
....@@ -576,6 +562,54 @@
576562 return sprintf(buf, "0x%x\n", hv_dev->device_id);
577563 }
578564 static DEVICE_ATTR_RO(device);
565
+
566
+static ssize_t driver_override_store(struct device *dev,
567
+ struct device_attribute *attr,
568
+ const char *buf, size_t count)
569
+{
570
+ struct hv_device *hv_dev = device_to_hv_device(dev);
571
+ char *driver_override, *old, *cp;
572
+
573
+ /* We need to keep extra room for a newline */
574
+ if (count >= (PAGE_SIZE - 1))
575
+ return -EINVAL;
576
+
577
+ driver_override = kstrndup(buf, count, GFP_KERNEL);
578
+ if (!driver_override)
579
+ return -ENOMEM;
580
+
581
+ cp = strchr(driver_override, '\n');
582
+ if (cp)
583
+ *cp = '\0';
584
+
585
+ device_lock(dev);
586
+ old = hv_dev->driver_override;
587
+ if (strlen(driver_override)) {
588
+ hv_dev->driver_override = driver_override;
589
+ } else {
590
+ kfree(driver_override);
591
+ hv_dev->driver_override = NULL;
592
+ }
593
+ device_unlock(dev);
594
+
595
+ kfree(old);
596
+
597
+ return count;
598
+}
599
+
600
+static ssize_t driver_override_show(struct device *dev,
601
+ struct device_attribute *attr, char *buf)
602
+{
603
+ struct hv_device *hv_dev = device_to_hv_device(dev);
604
+ ssize_t len;
605
+
606
+ device_lock(dev);
607
+ len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override);
608
+ device_unlock(dev);
609
+
610
+ return len;
611
+}
612
+static DEVICE_ATTR_RW(driver_override);
579613
580614 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
581615 static struct attribute *vmbus_dev_attrs[] = {
....@@ -607,6 +641,7 @@
607641 &dev_attr_channel_vp_mapping.attr,
608642 &dev_attr_vendor.attr,
609643 &dev_attr_device.attr,
644
+ &dev_attr_driver_override.attr,
610645 NULL,
611646 };
612647
....@@ -654,59 +689,72 @@
654689 static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env)
655690 {
656691 struct hv_device *dev = device_to_hv_device(device);
657
- int ret;
658
- char alias_name[VMBUS_ALIAS_LEN + 1];
692
+ const char *format = "MODALIAS=vmbus:%*phN";
659693
660
- print_alias_name(dev, alias_name);
661
- ret = add_uevent_var(env, "MODALIAS=vmbus:%s", alias_name);
662
- return ret;
694
+ return add_uevent_var(env, format, UUID_SIZE, &dev->dev_type);
663695 }
664696
665
-static const uuid_le null_guid;
666
-
667
-static inline bool is_null_guid(const uuid_le *guid)
697
+static const struct hv_vmbus_device_id *
698
+hv_vmbus_dev_match(const struct hv_vmbus_device_id *id, const guid_t *guid)
668699 {
669
- if (uuid_le_cmp(*guid, null_guid))
670
- return false;
671
- return true;
700
+ if (id == NULL)
701
+ return NULL; /* empty device table */
702
+
703
+ for (; !guid_is_null(&id->guid); id++)
704
+ if (guid_equal(&id->guid, guid))
705
+ return id;
706
+
707
+ return NULL;
672708 }
673709
674
-/*
675
- * Return a matching hv_vmbus_device_id pointer.
676
- * If there is no match, return NULL.
677
- */
678
-static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
679
- const uuid_le *guid)
710
+static const struct hv_vmbus_device_id *
711
+hv_vmbus_dynid_match(struct hv_driver *drv, const guid_t *guid)
680712 {
681713 const struct hv_vmbus_device_id *id = NULL;
682714 struct vmbus_dynid *dynid;
683715
684
- /* Look at the dynamic ids first, before the static ones */
685716 spin_lock(&drv->dynids.lock);
686717 list_for_each_entry(dynid, &drv->dynids.list, node) {
687
- if (!uuid_le_cmp(dynid->id.guid, *guid)) {
718
+ if (guid_equal(&dynid->id.guid, guid)) {
688719 id = &dynid->id;
689720 break;
690721 }
691722 }
692723 spin_unlock(&drv->dynids.lock);
693724
694
- if (id)
695
- return id;
725
+ return id;
726
+}
696727
697
- id = drv->id_table;
698
- if (id == NULL)
699
- return NULL; /* empty device table */
728
+static const struct hv_vmbus_device_id vmbus_device_null;
700729
701
- for (; !is_null_guid(&id->guid); id++)
702
- if (!uuid_le_cmp(id->guid, *guid))
703
- return id;
730
+/*
731
+ * Return a matching hv_vmbus_device_id pointer.
732
+ * If there is no match, return NULL.
733
+ */
734
+static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
735
+ struct hv_device *dev)
736
+{
737
+ const guid_t *guid = &dev->dev_type;
738
+ const struct hv_vmbus_device_id *id;
704739
705
- return NULL;
740
+ /* When driver_override is set, only bind to the matching driver */
741
+ if (dev->driver_override && strcmp(dev->driver_override, drv->name))
742
+ return NULL;
743
+
744
+ /* Look at the dynamic ids first, before the static ones */
745
+ id = hv_vmbus_dynid_match(drv, guid);
746
+ if (!id)
747
+ id = hv_vmbus_dev_match(drv->id_table, guid);
748
+
749
+ /* driver_override will always match, send a dummy id */
750
+ if (!id && dev->driver_override)
751
+ id = &vmbus_device_null;
752
+
753
+ return id;
706754 }
707755
708756 /* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */
709
-static int vmbus_add_dynid(struct hv_driver *drv, uuid_le *guid)
757
+static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid)
710758 {
711759 struct vmbus_dynid *dynid;
712760
....@@ -744,14 +792,14 @@
744792 size_t count)
745793 {
746794 struct hv_driver *drv = drv_to_hv_drv(driver);
747
- uuid_le guid;
795
+ guid_t guid;
748796 ssize_t retval;
749797
750
- retval = uuid_le_to_bin(buf, &guid);
798
+ retval = guid_parse(buf, &guid);
751799 if (retval)
752800 return retval;
753801
754
- if (hv_vmbus_get_id(drv, &guid))
802
+ if (hv_vmbus_dynid_match(drv, &guid))
755803 return -EEXIST;
756804
757805 retval = vmbus_add_dynid(drv, &guid);
....@@ -771,10 +819,10 @@
771819 {
772820 struct hv_driver *drv = drv_to_hv_drv(driver);
773821 struct vmbus_dynid *dynid, *n;
774
- uuid_le guid;
822
+ guid_t guid;
775823 ssize_t retval;
776824
777
- retval = uuid_le_to_bin(buf, &guid);
825
+ retval = guid_parse(buf, &guid);
778826 if (retval)
779827 return retval;
780828
....@@ -783,7 +831,7 @@
783831 list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) {
784832 struct hv_vmbus_device_id *id = &dynid->id;
785833
786
- if (!uuid_le_cmp(id->guid, guid)) {
834
+ if (guid_equal(&id->guid, &guid)) {
787835 list_del(&dynid->node);
788836 kfree(dynid);
789837 retval = count;
....@@ -816,7 +864,7 @@
816864 if (is_hvsock_channel(hv_dev->channel))
817865 return drv->hvsock;
818866
819
- if (hv_vmbus_get_id(drv, &hv_dev->dev_type))
867
+ if (hv_vmbus_get_id(drv, hv_dev))
820868 return 1;
821869
822870 return 0;
....@@ -833,7 +881,7 @@
833881 struct hv_device *dev = device_to_hv_device(child_device);
834882 const struct hv_vmbus_device_id *dev_id;
835883
836
- dev_id = hv_vmbus_get_id(drv, &dev->dev_type);
884
+ dev_id = hv_vmbus_get_id(drv, dev);
837885 if (drv->probe) {
838886 ret = drv->probe(dev, dev_id);
839887 if (ret != 0)
....@@ -885,6 +933,48 @@
885933 drv->shutdown(dev);
886934 }
887935
936
+#ifdef CONFIG_PM_SLEEP
937
+/*
938
+ * vmbus_suspend - Suspend a vmbus device
939
+ */
940
+static int vmbus_suspend(struct device *child_device)
941
+{
942
+ struct hv_driver *drv;
943
+ struct hv_device *dev = device_to_hv_device(child_device);
944
+
945
+ /* The device may not be attached yet */
946
+ if (!child_device->driver)
947
+ return 0;
948
+
949
+ drv = drv_to_hv_drv(child_device->driver);
950
+ if (!drv->suspend)
951
+ return -EOPNOTSUPP;
952
+
953
+ return drv->suspend(dev);
954
+}
955
+
956
+/*
957
+ * vmbus_resume - Resume a vmbus device
958
+ */
959
+static int vmbus_resume(struct device *child_device)
960
+{
961
+ struct hv_driver *drv;
962
+ struct hv_device *dev = device_to_hv_device(child_device);
963
+
964
+ /* The device may not be attached yet */
965
+ if (!child_device->driver)
966
+ return 0;
967
+
968
+ drv = drv_to_hv_drv(child_device->driver);
969
+ if (!drv->resume)
970
+ return -EOPNOTSUPP;
971
+
972
+ return drv->resume(dev);
973
+}
974
+#else
975
+#define vmbus_suspend NULL
976
+#define vmbus_resume NULL
977
+#endif /* CONFIG_PM_SLEEP */
888978
889979 /*
890980 * vmbus_device_release - Final callback release of the vmbus child device
....@@ -894,12 +984,32 @@
894984 struct hv_device *hv_dev = device_to_hv_device(device);
895985 struct vmbus_channel *channel = hv_dev->channel;
896986
987
+ hv_debug_rm_dev_dir(hv_dev);
988
+
897989 mutex_lock(&vmbus_connection.channel_mutex);
898
- hv_process_channel_removal(channel->offermsg.child_relid);
990
+ hv_process_channel_removal(channel);
899991 mutex_unlock(&vmbus_connection.channel_mutex);
900992 kfree(hv_dev);
901
-
902993 }
994
+
995
+/*
996
+ * Note: we must use the "noirq" ops: see the comment before vmbus_bus_pm.
997
+ *
998
+ * suspend_noirq/resume_noirq are set to NULL to support Suspend-to-Idle: we
999
+ * shouldn't suspend the vmbus devices upon Suspend-to-Idle, otherwise there
1000
+ * is no way to wake up a Generation-2 VM.
1001
+ *
1002
+ * The other 4 ops are for hibernation.
1003
+ */
1004
+
1005
+static const struct dev_pm_ops vmbus_pm = {
1006
+ .suspend_noirq = NULL,
1007
+ .resume_noirq = NULL,
1008
+ .freeze_noirq = vmbus_suspend,
1009
+ .thaw_noirq = vmbus_resume,
1010
+ .poweroff_noirq = vmbus_suspend,
1011
+ .restore_noirq = vmbus_resume,
1012
+};
9031013
9041014 /* The one and only one */
9051015 static struct bus_type hv_bus = {
....@@ -911,11 +1021,15 @@
9111021 .uevent = vmbus_uevent,
9121022 .dev_groups = vmbus_dev_groups,
9131023 .drv_groups = vmbus_drv_groups,
1024
+ .pm = &vmbus_pm,
9141025 };
9151026
9161027 struct onmessage_work_context {
9171028 struct work_struct work;
918
- struct hv_message msg;
1029
+ struct {
1030
+ struct hv_message_header header;
1031
+ u8 payload[];
1032
+ } msg;
9191033 };
9201034
9211035 static void vmbus_onmessage_work(struct work_struct *work)
....@@ -928,19 +1042,9 @@
9281042
9291043 ctx = container_of(work, struct onmessage_work_context,
9301044 work);
931
- vmbus_onmessage(&ctx->msg);
1045
+ vmbus_onmessage((struct vmbus_channel_message_header *)
1046
+ &ctx->msg.payload);
9321047 kfree(ctx);
933
-}
934
-
935
-static void hv_process_timer_expiration(struct hv_message *msg,
936
- struct hv_per_cpu_context *hv_cpu)
937
-{
938
- struct clock_event_device *dev = hv_cpu->clk_evt;
939
-
940
- if (dev->event_handler)
941
- dev->event_handler(dev);
942
-
943
- vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
9441048 }
9451049
9461050 void vmbus_on_msg_dpc(unsigned long data)
....@@ -953,6 +1057,13 @@
9531057 const struct vmbus_channel_message_table_entry *entry;
9541058 struct onmessage_work_context *ctx;
9551059 u32 message_type = msg->header.message_type;
1060
+
1061
+ /*
1062
+ * 'enum vmbus_channel_message_type' is supposed to always be 'u32' as
1063
+ * it is being used in 'struct vmbus_channel_message_header' definition
1064
+ * which is supposed to match hypervisor ABI.
1065
+ */
1066
+ BUILD_BUG_ON(sizeof(enum vmbus_channel_message_type) != sizeof(u32));
9561067
9571068 if (message_type == HVMSG_NONE)
9581069 /* no msg */
....@@ -967,41 +1078,88 @@
9671078 goto msg_handled;
9681079 }
9691080
1081
+ if (msg->header.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) {
1082
+ WARN_ONCE(1, "payload size is too large (%d)\n",
1083
+ msg->header.payload_size);
1084
+ goto msg_handled;
1085
+ }
1086
+
9701087 entry = &channel_message_table[hdr->msgtype];
9711088
9721089 if (!entry->message_handler)
9731090 goto msg_handled;
9741091
1092
+ if (msg->header.payload_size < entry->min_payload_len) {
1093
+ WARN_ONCE(1, "message too short: msgtype=%d len=%d\n",
1094
+ hdr->msgtype, msg->header.payload_size);
1095
+ goto msg_handled;
1096
+ }
1097
+
9751098 if (entry->handler_type == VMHT_BLOCKING) {
976
- ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
1099
+ ctx = kmalloc(sizeof(*ctx) + msg->header.payload_size,
1100
+ GFP_ATOMIC);
9771101 if (ctx == NULL)
9781102 return;
9791103
9801104 INIT_WORK(&ctx->work, vmbus_onmessage_work);
981
- memcpy(&ctx->msg, msg, sizeof(*msg));
1105
+ memcpy(&ctx->msg, msg, sizeof(msg->header) +
1106
+ msg->header.payload_size);
9821107
9831108 /*
9841109 * The host can generate a rescind message while we
9851110 * may still be handling the original offer. We deal with
986
- * this condition by ensuring the processing is done on the
987
- * same CPU.
1111
+ * this condition by relying on the synchronization provided
1112
+ * by offer_in_progress and by channel_mutex. See also the
1113
+ * inline comments in vmbus_onoffer_rescind().
9881114 */
9891115 switch (hdr->msgtype) {
9901116 case CHANNELMSG_RESCIND_CHANNELOFFER:
9911117 /*
9921118 * If we are handling the rescind message;
9931119 * schedule the work on the global work queue.
1120
+ *
1121
+ * The OFFER message and the RESCIND message should
1122
+ * not be handled by the same serialized work queue,
1123
+ * because the OFFER handler may call vmbus_open(),
1124
+ * which tries to open the channel by sending an
1125
+ * OPEN_CHANNEL message to the host and waits for
1126
+ * the host's response; however, if the host has
1127
+ * rescinded the channel before it receives the
1128
+ * OPEN_CHANNEL message, the host just silently
1129
+ * ignores the OPEN_CHANNEL message; as a result,
1130
+ * the guest's OFFER handler hangs for ever, if we
1131
+ * handle the RESCIND message in the same serialized
1132
+ * work queue: the RESCIND handler can not start to
1133
+ * run before the OFFER handler finishes.
9941134 */
995
- schedule_work_on(vmbus_connection.connect_cpu,
996
- &ctx->work);
1135
+ schedule_work(&ctx->work);
9971136 break;
9981137
9991138 case CHANNELMSG_OFFERCHANNEL:
1139
+ /*
1140
+ * The host sends the offer message of a given channel
1141
+ * before sending the rescind message of the same
1142
+ * channel. These messages are sent to the guest's
1143
+ * connect CPU; the guest then starts processing them
1144
+ * in the tasklet handler on this CPU:
1145
+ *
1146
+ * VMBUS_CONNECT_CPU
1147
+ *
1148
+ * [vmbus_on_msg_dpc()]
1149
+ * atomic_inc() // CHANNELMSG_OFFERCHANNEL
1150
+ * queue_work()
1151
+ * ...
1152
+ * [vmbus_on_msg_dpc()]
1153
+ * schedule_work() // CHANNELMSG_RESCIND_CHANNELOFFER
1154
+ *
1155
+ * We rely on the memory-ordering properties of the
1156
+ * queue_work() and schedule_work() primitives, which
1157
+ * guarantee that the atomic increment will be visible
1158
+ * to the CPUs which will execute the offer & rescind
1159
+ * works by the time these works will start execution.
1160
+ */
10001161 atomic_inc(&vmbus_connection.offer_in_progress);
1001
- queue_work_on(vmbus_connection.connect_cpu,
1002
- vmbus_connection.work_queue,
1003
- &ctx->work);
1004
- break;
1162
+ fallthrough;
10051163
10061164 default:
10071165 queue_work(vmbus_connection.work_queue, &ctx->work);
....@@ -1013,18 +1171,42 @@
10131171 vmbus_signal_eom(msg, message_type);
10141172 }
10151173
1016
-
1174
+#ifdef CONFIG_PM_SLEEP
10171175 /*
1018
- * Direct callback for channels using other deferred processing
1176
+ * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
1177
+ * hibernation, because hv_sock connections can not persist across hibernation.
10191178 */
1020
-static void vmbus_channel_isr(struct vmbus_channel *channel)
1179
+static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
10211180 {
1022
- void (*callback_fn)(void *);
1181
+ struct onmessage_work_context *ctx;
1182
+ struct vmbus_channel_rescind_offer *rescind;
10231183
1024
- callback_fn = READ_ONCE(channel->onchannel_callback);
1025
- if (likely(callback_fn != NULL))
1026
- (*callback_fn)(channel->channel_callback_context);
1184
+ WARN_ON(!is_hvsock_channel(channel));
1185
+
1186
+ /*
1187
+ * Allocation size is small and the allocation should really not fail,
1188
+ * otherwise the state of the hv_sock connections ends up in limbo.
1189
+ */
1190
+ ctx = kzalloc(sizeof(*ctx) + sizeof(*rescind),
1191
+ GFP_KERNEL | __GFP_NOFAIL);
1192
+
1193
+ /*
1194
+ * So far, these are not really used by Linux. Just set them to the
1195
+ * reasonable values conforming to the definitions of the fields.
1196
+ */
1197
+ ctx->msg.header.message_type = 1;
1198
+ ctx->msg.header.payload_size = sizeof(*rescind);
1199
+
1200
+ /* These values are actually used by Linux. */
1201
+ rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.payload;
1202
+ rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER;
1203
+ rescind->child_relid = channel->offermsg.child_relid;
1204
+
1205
+ INIT_WORK(&ctx->work, vmbus_onmessage_work);
1206
+
1207
+ queue_work(vmbus_connection.work_queue, &ctx->work);
10271208 }
1209
+#endif /* CONFIG_PM_SLEEP */
10281210
10291211 /*
10301212 * Schedule all channels with events pending
....@@ -1056,6 +1238,7 @@
10561238 return;
10571239
10581240 for_each_set_bit(relid, recv_int_page, maxbits) {
1241
+ void (*callback_fn)(void *context);
10591242 struct vmbus_channel *channel;
10601243
10611244 if (!sync_test_and_clear_bit(relid, recv_int_page))
....@@ -1065,33 +1248,54 @@
10651248 if (relid == 0)
10661249 continue;
10671250
1251
+ /*
1252
+ * Pairs with the kfree_rcu() in vmbus_chan_release().
1253
+ * Guarantees that the channel data structure doesn't
1254
+ * get freed while the channel pointer below is being
1255
+ * dereferenced.
1256
+ */
10681257 rcu_read_lock();
10691258
10701259 /* Find channel based on relid */
1071
- list_for_each_entry_rcu(channel, &hv_cpu->chan_list, percpu_list) {
1072
- if (channel->offermsg.child_relid != relid)
1073
- continue;
1260
+ channel = relid2channel(relid);
1261
+ if (channel == NULL)
1262
+ goto sched_unlock_rcu;
10741263
1075
- if (channel->rescind)
1076
- continue;
1264
+ if (channel->rescind)
1265
+ goto sched_unlock_rcu;
10771266
1078
- trace_vmbus_chan_sched(channel);
1267
+ /*
1268
+ * Make sure that the ring buffer data structure doesn't get
1269
+ * freed while we dereference the ring buffer pointer. Test
1270
+ * for the channel's onchannel_callback being NULL within a
1271
+ * sched_lock critical section. See also the inline comments
1272
+ * in vmbus_reset_channel_cb().
1273
+ */
1274
+ spin_lock(&channel->sched_lock);
10791275
1080
- ++channel->interrupts;
1276
+ callback_fn = channel->onchannel_callback;
1277
+ if (unlikely(callback_fn == NULL))
1278
+ goto sched_unlock;
10811279
1082
- switch (channel->callback_mode) {
1083
- case HV_CALL_ISR:
1084
- vmbus_channel_isr(channel);
1085
- break;
1280
+ trace_vmbus_chan_sched(channel);
10861281
1087
- case HV_CALL_BATCHED:
1088
- hv_begin_read(&channel->inbound);
1089
- /* fallthrough */
1090
- case HV_CALL_DIRECT:
1091
- tasklet_schedule(&channel->callback_event);
1092
- }
1282
+ ++channel->interrupts;
1283
+
1284
+ switch (channel->callback_mode) {
1285
+ case HV_CALL_ISR:
1286
+ (*callback_fn)(channel->channel_callback_context);
1287
+ break;
1288
+
1289
+ case HV_CALL_BATCHED:
1290
+ hv_begin_read(&channel->inbound);
1291
+ fallthrough;
1292
+ case HV_CALL_DIRECT:
1293
+ tasklet_schedule(&channel->callback_event);
10931294 }
10941295
1296
+sched_unlock:
1297
+ spin_unlock(&channel->sched_lock);
1298
+sched_unlock_rcu:
10951299 rcu_read_unlock();
10961300 }
10971301 }
....@@ -1103,8 +1307,6 @@
11031307 void *page_addr = hv_cpu->synic_event_page;
11041308 struct hv_message *msg;
11051309 union hv_synic_event_flags *event;
1106
- struct pt_regs *regs = get_irq_regs();
1107
- u64 ip = regs ? instruction_pointer(regs) : 0;
11081310 bool handled = false;
11091311
11101312 if (unlikely(page_addr == NULL))
....@@ -1142,13 +1344,14 @@
11421344
11431345 /* Check if there are actual msgs to be processed */
11441346 if (msg->header.message_type != HVMSG_NONE) {
1145
- if (msg->header.message_type == HVMSG_TIMER_EXPIRED)
1146
- hv_process_timer_expiration(msg, hv_cpu);
1147
- else
1347
+ if (msg->header.message_type == HVMSG_TIMER_EXPIRED) {
1348
+ hv_stimer0_isr();
1349
+ vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
1350
+ } else
11481351 tasklet_schedule(&hv_cpu->msg_dpc);
11491352 }
11501353
1151
- add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0, ip);
1354
+ add_interrupt_randomness(hv_get_vector());
11521355 }
11531356
11541357 /*
....@@ -1171,7 +1374,7 @@
11711374 * Write dump contents to the page. No need to synchronize; panic should
11721375 * be single-threaded.
11731376 */
1174
- kmsg_dump_get_buffer(dumper, true, hv_panic_page, PAGE_SIZE,
1377
+ kmsg_dump_get_buffer(dumper, false, hv_panic_page, HV_HYP_PAGE_SIZE,
11751378 &bytes_written);
11761379 if (bytes_written)
11771380 hyperv_report_panic_msg(panic_pa, bytes_written);
....@@ -1182,8 +1385,6 @@
11821385 };
11831386
11841387 static struct ctl_table_header *hv_ctl_table_hdr;
1185
-static int zero;
1186
-static int one = 1;
11871388
11881389 /*
11891390 * sysctl option to allow the user to control whether kmsg data should be
....@@ -1196,8 +1397,8 @@
11961397 .maxlen = sizeof(int),
11971398 .mode = 0644,
11981399 .proc_handler = proc_dointvec_minmax,
1199
- .extra1 = &zero,
1200
- .extra2 = &one
1400
+ .extra1 = SYSCTL_ZERO,
1401
+ .extra2 = SYSCTL_ONE
12011402 },
12021403 {}
12031404 };
....@@ -1223,7 +1424,6 @@
12231424 {
12241425 int ret;
12251426
1226
- /* Hypervisor initialization...setup hypercall page..etc */
12271427 ret = hv_init();
12281428 if (ret != 0) {
12291429 pr_err("Unable to initialize the hypervisor - 0x%x\n", ret);
....@@ -1234,19 +1434,22 @@
12341434 if (ret)
12351435 return ret;
12361436
1237
- hv_setup_vmbus_irq(vmbus_isr);
1437
+ ret = hv_setup_vmbus_irq(vmbus_irq, vmbus_isr);
1438
+ if (ret)
1439
+ goto err_setup;
12381440
12391441 ret = hv_synic_alloc();
12401442 if (ret)
12411443 goto err_alloc;
1444
+
12421445 /*
1243
- * Initialize the per-cpu interrupt state and
1244
- * connect to the host.
1446
+ * Initialize the per-cpu interrupt state and stimer state.
1447
+ * Then connect to the host.
12451448 */
12461449 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
12471450 hv_synic_init, hv_synic_cleanup);
12481451 if (ret < 0)
1249
- goto err_alloc;
1452
+ goto err_cpuhp;
12501453 hyperv_cpuhp_online = ret;
12511454
12521455 ret = vmbus_connect();
....@@ -1272,13 +1475,13 @@
12721475 */
12731476 hv_get_crash_ctl(hyperv_crash_ctl);
12741477 if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) {
1275
- hv_panic_page = (void *)get_zeroed_page(GFP_KERNEL);
1478
+ hv_panic_page = (void *)hv_alloc_hyperv_zeroed_page();
12761479 if (hv_panic_page) {
12771480 ret = kmsg_dump_register(&hv_kmsg_dumper);
12781481 if (ret) {
12791482 pr_err("Hyper-V: kmsg dump register "
12801483 "error 0x%x\n", ret);
1281
- free_page(
1484
+ hv_free_hyperv_page(
12821485 (unsigned long)hv_panic_page);
12831486 hv_panic_page = NULL;
12841487 }
....@@ -1304,10 +1507,11 @@
13041507
13051508 err_connect:
13061509 cpuhp_remove_state(hyperv_cpuhp_online);
1307
-err_alloc:
1510
+err_cpuhp:
13081511 hv_synic_free();
1512
+err_alloc:
13091513 hv_remove_vmbus_irq();
1310
-
1514
+err_setup:
13111515 bus_unregister(&hv_bus);
13121516 unregister_sysctl_table(hv_ctl_table_hdr);
13131517 hv_ctl_table_hdr = NULL;
....@@ -1382,7 +1586,7 @@
13821586
13831587 struct vmbus_chan_attribute {
13841588 struct attribute attr;
1385
- ssize_t (*show)(const struct vmbus_channel *chan, char *buf);
1589
+ ssize_t (*show)(struct vmbus_channel *chan, char *buf);
13861590 ssize_t (*store)(struct vmbus_channel *chan,
13871591 const char *buf, size_t count);
13881592 };
....@@ -1401,61 +1605,207 @@
14011605 {
14021606 const struct vmbus_chan_attribute *attribute
14031607 = container_of(attr, struct vmbus_chan_attribute, attr);
1404
- const struct vmbus_channel *chan
1608
+ struct vmbus_channel *chan
14051609 = container_of(kobj, struct vmbus_channel, kobj);
14061610
14071611 if (!attribute->show)
14081612 return -EIO;
14091613
1410
- if (chan->state != CHANNEL_OPENED_STATE)
1411
- return -EINVAL;
1412
-
14131614 return attribute->show(chan, buf);
1615
+}
1616
+
1617
+static ssize_t vmbus_chan_attr_store(struct kobject *kobj,
1618
+ struct attribute *attr, const char *buf,
1619
+ size_t count)
1620
+{
1621
+ const struct vmbus_chan_attribute *attribute
1622
+ = container_of(attr, struct vmbus_chan_attribute, attr);
1623
+ struct vmbus_channel *chan
1624
+ = container_of(kobj, struct vmbus_channel, kobj);
1625
+
1626
+ if (!attribute->store)
1627
+ return -EIO;
1628
+
1629
+ return attribute->store(chan, buf, count);
14141630 }
14151631
14161632 static const struct sysfs_ops vmbus_chan_sysfs_ops = {
14171633 .show = vmbus_chan_attr_show,
1634
+ .store = vmbus_chan_attr_store,
14181635 };
14191636
1420
-static ssize_t out_mask_show(const struct vmbus_channel *channel, char *buf)
1637
+static ssize_t out_mask_show(struct vmbus_channel *channel, char *buf)
14211638 {
1422
- const struct hv_ring_buffer_info *rbi = &channel->outbound;
1639
+ struct hv_ring_buffer_info *rbi = &channel->outbound;
1640
+ ssize_t ret;
14231641
1424
- return sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask);
1642
+ mutex_lock(&rbi->ring_buffer_mutex);
1643
+ if (!rbi->ring_buffer) {
1644
+ mutex_unlock(&rbi->ring_buffer_mutex);
1645
+ return -EINVAL;
1646
+ }
1647
+
1648
+ ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask);
1649
+ mutex_unlock(&rbi->ring_buffer_mutex);
1650
+ return ret;
14251651 }
14261652 static VMBUS_CHAN_ATTR_RO(out_mask);
14271653
1428
-static ssize_t in_mask_show(const struct vmbus_channel *channel, char *buf)
1654
+static ssize_t in_mask_show(struct vmbus_channel *channel, char *buf)
14291655 {
1430
- const struct hv_ring_buffer_info *rbi = &channel->inbound;
1656
+ struct hv_ring_buffer_info *rbi = &channel->inbound;
1657
+ ssize_t ret;
14311658
1432
- return sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask);
1659
+ mutex_lock(&rbi->ring_buffer_mutex);
1660
+ if (!rbi->ring_buffer) {
1661
+ mutex_unlock(&rbi->ring_buffer_mutex);
1662
+ return -EINVAL;
1663
+ }
1664
+
1665
+ ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask);
1666
+ mutex_unlock(&rbi->ring_buffer_mutex);
1667
+ return ret;
14331668 }
14341669 static VMBUS_CHAN_ATTR_RO(in_mask);
14351670
1436
-static ssize_t read_avail_show(const struct vmbus_channel *channel, char *buf)
1671
+static ssize_t read_avail_show(struct vmbus_channel *channel, char *buf)
14371672 {
1438
- const struct hv_ring_buffer_info *rbi = &channel->inbound;
1673
+ struct hv_ring_buffer_info *rbi = &channel->inbound;
1674
+ ssize_t ret;
14391675
1440
- return sprintf(buf, "%u\n", hv_get_bytes_to_read(rbi));
1676
+ mutex_lock(&rbi->ring_buffer_mutex);
1677
+ if (!rbi->ring_buffer) {
1678
+ mutex_unlock(&rbi->ring_buffer_mutex);
1679
+ return -EINVAL;
1680
+ }
1681
+
1682
+ ret = sprintf(buf, "%u\n", hv_get_bytes_to_read(rbi));
1683
+ mutex_unlock(&rbi->ring_buffer_mutex);
1684
+ return ret;
14411685 }
14421686 static VMBUS_CHAN_ATTR_RO(read_avail);
14431687
1444
-static ssize_t write_avail_show(const struct vmbus_channel *channel, char *buf)
1688
+static ssize_t write_avail_show(struct vmbus_channel *channel, char *buf)
14451689 {
1446
- const struct hv_ring_buffer_info *rbi = &channel->outbound;
1690
+ struct hv_ring_buffer_info *rbi = &channel->outbound;
1691
+ ssize_t ret;
14471692
1448
- return sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi));
1693
+ mutex_lock(&rbi->ring_buffer_mutex);
1694
+ if (!rbi->ring_buffer) {
1695
+ mutex_unlock(&rbi->ring_buffer_mutex);
1696
+ return -EINVAL;
1697
+ }
1698
+
1699
+ ret = sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi));
1700
+ mutex_unlock(&rbi->ring_buffer_mutex);
1701
+ return ret;
14491702 }
14501703 static VMBUS_CHAN_ATTR_RO(write_avail);
14511704
1452
-static ssize_t show_target_cpu(const struct vmbus_channel *channel, char *buf)
1705
+static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf)
14531706 {
14541707 return sprintf(buf, "%u\n", channel->target_cpu);
14551708 }
1456
-static VMBUS_CHAN_ATTR(cpu, S_IRUGO, show_target_cpu, NULL);
1709
+static ssize_t target_cpu_store(struct vmbus_channel *channel,
1710
+ const char *buf, size_t count)
1711
+{
1712
+ u32 target_cpu, origin_cpu;
1713
+ ssize_t ret = count;
14571714
1458
-static ssize_t channel_pending_show(const struct vmbus_channel *channel,
1715
+ if (vmbus_proto_version < VERSION_WIN10_V4_1)
1716
+ return -EIO;
1717
+
1718
+ if (sscanf(buf, "%uu", &target_cpu) != 1)
1719
+ return -EIO;
1720
+
1721
+ /* Validate target_cpu for the cpumask_test_cpu() operation below. */
1722
+ if (target_cpu >= nr_cpumask_bits)
1723
+ return -EINVAL;
1724
+
1725
+ /* No CPUs should come up or down during this. */
1726
+ cpus_read_lock();
1727
+
1728
+ if (!cpu_online(target_cpu)) {
1729
+ cpus_read_unlock();
1730
+ return -EINVAL;
1731
+ }
1732
+
1733
+ /*
1734
+ * Synchronizes target_cpu_store() and channel closure:
1735
+ *
1736
+ * { Initially: state = CHANNEL_OPENED }
1737
+ *
1738
+ * CPU1 CPU2
1739
+ *
1740
+ * [target_cpu_store()] [vmbus_disconnect_ring()]
1741
+ *
1742
+ * LOCK channel_mutex LOCK channel_mutex
1743
+ * LOAD r1 = state LOAD r2 = state
1744
+ * IF (r1 == CHANNEL_OPENED) IF (r2 == CHANNEL_OPENED)
1745
+ * SEND MODIFYCHANNEL STORE state = CHANNEL_OPEN
1746
+ * [...] SEND CLOSECHANNEL
1747
+ * UNLOCK channel_mutex UNLOCK channel_mutex
1748
+ *
1749
+ * Forbids: r1 == r2 == CHANNEL_OPENED (i.e., CPU1's LOCK precedes
1750
+ * CPU2's LOCK) && CPU2's SEND precedes CPU1's SEND
1751
+ *
1752
+ * Note. The host processes the channel messages "sequentially", in
1753
+ * the order in which they are received on a per-partition basis.
1754
+ */
1755
+ mutex_lock(&vmbus_connection.channel_mutex);
1756
+
1757
+ /*
1758
+ * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels;
1759
+ * avoid sending the message and fail here for such channels.
1760
+ */
1761
+ if (channel->state != CHANNEL_OPENED_STATE) {
1762
+ ret = -EIO;
1763
+ goto cpu_store_unlock;
1764
+ }
1765
+
1766
+ origin_cpu = channel->target_cpu;
1767
+ if (target_cpu == origin_cpu)
1768
+ goto cpu_store_unlock;
1769
+
1770
+ if (vmbus_send_modifychannel(channel->offermsg.child_relid,
1771
+ hv_cpu_number_to_vp_number(target_cpu))) {
1772
+ ret = -EIO;
1773
+ goto cpu_store_unlock;
1774
+ }
1775
+
1776
+ /*
1777
+ * Warning. At this point, there is *no* guarantee that the host will
1778
+ * have successfully processed the vmbus_send_modifychannel() request.
1779
+ * See the header comment of vmbus_send_modifychannel() for more info.
1780
+ *
1781
+ * Lags in the processing of the above vmbus_send_modifychannel() can
1782
+ * result in missed interrupts if the "old" target CPU is taken offline
1783
+ * before Hyper-V starts sending interrupts to the "new" target CPU.
1784
+ * But apart from this offlining scenario, the code tolerates such
1785
+ * lags. It will function correctly even if a channel interrupt comes
1786
+ * in on a CPU that is different from the channel target_cpu value.
1787
+ */
1788
+
1789
+ channel->target_cpu = target_cpu;
1790
+
1791
+ /* See init_vp_index(). */
1792
+ if (hv_is_perf_channel(channel))
1793
+ hv_update_alloced_cpus(origin_cpu, target_cpu);
1794
+
1795
+ /* Currently set only for storvsc channels. */
1796
+ if (channel->change_target_cpu_callback) {
1797
+ (*channel->change_target_cpu_callback)(channel,
1798
+ origin_cpu, target_cpu);
1799
+ }
1800
+
1801
+cpu_store_unlock:
1802
+ mutex_unlock(&vmbus_connection.channel_mutex);
1803
+ cpus_read_unlock();
1804
+ return ret;
1805
+}
1806
+static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store);
1807
+
1808
+static ssize_t channel_pending_show(struct vmbus_channel *channel,
14591809 char *buf)
14601810 {
14611811 return sprintf(buf, "%d\n",
....@@ -1464,7 +1814,7 @@
14641814 }
14651815 static VMBUS_CHAN_ATTR(pending, S_IRUGO, channel_pending_show, NULL);
14661816
1467
-static ssize_t channel_latency_show(const struct vmbus_channel *channel,
1817
+static ssize_t channel_latency_show(struct vmbus_channel *channel,
14681818 char *buf)
14691819 {
14701820 return sprintf(buf, "%d\n",
....@@ -1473,26 +1823,58 @@
14731823 }
14741824 static VMBUS_CHAN_ATTR(latency, S_IRUGO, channel_latency_show, NULL);
14751825
1476
-static ssize_t channel_interrupts_show(const struct vmbus_channel *channel, char *buf)
1826
+static ssize_t channel_interrupts_show(struct vmbus_channel *channel, char *buf)
14771827 {
14781828 return sprintf(buf, "%llu\n", channel->interrupts);
14791829 }
14801830 static VMBUS_CHAN_ATTR(interrupts, S_IRUGO, channel_interrupts_show, NULL);
14811831
1482
-static ssize_t channel_events_show(const struct vmbus_channel *channel, char *buf)
1832
+static ssize_t channel_events_show(struct vmbus_channel *channel, char *buf)
14831833 {
14841834 return sprintf(buf, "%llu\n", channel->sig_events);
14851835 }
14861836 static VMBUS_CHAN_ATTR(events, S_IRUGO, channel_events_show, NULL);
14871837
1488
-static ssize_t subchannel_monitor_id_show(const struct vmbus_channel *channel,
1838
+static ssize_t channel_intr_in_full_show(struct vmbus_channel *channel,
1839
+ char *buf)
1840
+{
1841
+ return sprintf(buf, "%llu\n",
1842
+ (unsigned long long)channel->intr_in_full);
1843
+}
1844
+static VMBUS_CHAN_ATTR(intr_in_full, 0444, channel_intr_in_full_show, NULL);
1845
+
1846
+static ssize_t channel_intr_out_empty_show(struct vmbus_channel *channel,
1847
+ char *buf)
1848
+{
1849
+ return sprintf(buf, "%llu\n",
1850
+ (unsigned long long)channel->intr_out_empty);
1851
+}
1852
+static VMBUS_CHAN_ATTR(intr_out_empty, 0444, channel_intr_out_empty_show, NULL);
1853
+
1854
+static ssize_t channel_out_full_first_show(struct vmbus_channel *channel,
1855
+ char *buf)
1856
+{
1857
+ return sprintf(buf, "%llu\n",
1858
+ (unsigned long long)channel->out_full_first);
1859
+}
1860
+static VMBUS_CHAN_ATTR(out_full_first, 0444, channel_out_full_first_show, NULL);
1861
+
1862
+static ssize_t channel_out_full_total_show(struct vmbus_channel *channel,
1863
+ char *buf)
1864
+{
1865
+ return sprintf(buf, "%llu\n",
1866
+ (unsigned long long)channel->out_full_total);
1867
+}
1868
+static VMBUS_CHAN_ATTR(out_full_total, 0444, channel_out_full_total_show, NULL);
1869
+
1870
+static ssize_t subchannel_monitor_id_show(struct vmbus_channel *channel,
14891871 char *buf)
14901872 {
14911873 return sprintf(buf, "%u\n", channel->offermsg.monitorid);
14921874 }
14931875 static VMBUS_CHAN_ATTR(monitor_id, S_IRUGO, subchannel_monitor_id_show, NULL);
14941876
1495
-static ssize_t subchannel_id_show(const struct vmbus_channel *channel,
1877
+static ssize_t subchannel_id_show(struct vmbus_channel *channel,
14961878 char *buf)
14971879 {
14981880 return sprintf(buf, "%u\n",
....@@ -1510,6 +1892,10 @@
15101892 &chan_attr_latency.attr,
15111893 &chan_attr_interrupts.attr,
15121894 &chan_attr_events.attr,
1895
+ &chan_attr_intr_in_full.attr,
1896
+ &chan_attr_intr_out_empty.attr,
1897
+ &chan_attr_out_full_first.attr,
1898
+ &chan_attr_out_full_total.attr,
15131899 &chan_attr_monitor_id.attr,
15141900 &chan_attr_subchannel_id.attr,
15151901 NULL
....@@ -1592,8 +1978,8 @@
15921978 * vmbus_device_create - Creates and registers a new child device
15931979 * on the vmbus.
15941980 */
1595
-struct hv_device *vmbus_device_create(const uuid_le *type,
1596
- const uuid_le *instance,
1981
+struct hv_device *vmbus_device_create(const guid_t *type,
1982
+ const guid_t *instance,
15971983 struct vmbus_channel *channel)
15981984 {
15991985 struct hv_device *child_device_obj;
....@@ -1605,11 +1991,9 @@
16051991 }
16061992
16071993 child_device_obj->channel = channel;
1608
- memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le));
1609
- memcpy(&child_device_obj->dev_instance, instance,
1610
- sizeof(uuid_le));
1994
+ guid_copy(&child_device_obj->dev_type, type);
1995
+ guid_copy(&child_device_obj->dev_instance, instance);
16111996 child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */
1612
-
16131997
16141998 return child_device_obj;
16151999 }
....@@ -1623,7 +2007,7 @@
16232007 int ret;
16242008
16252009 dev_set_name(&child_device_obj->device, "%pUl",
1626
- child_device_obj->channel->offermsg.offer.if_instance.b);
2010
+ &child_device_obj->channel->offermsg.offer.if_instance);
16272011
16282012 child_device_obj->device.bus = &hv_bus;
16292013 child_device_obj->device.parent = &hv_acpi_dev->dev;
....@@ -1636,6 +2020,7 @@
16362020 ret = device_register(&child_device_obj->device);
16372021 if (ret) {
16382022 pr_err("Unable to register child device\n");
2023
+ put_device(&child_device_obj->device);
16392024 return ret;
16402025 }
16412026
....@@ -1652,6 +2037,7 @@
16522037 pr_err("Unable to register primary channeln");
16532038 goto err_kset_unregister;
16542039 }
2040
+ hv_debug_add_dev_dir(child_device_obj);
16552041
16562042 return 0;
16572043
....@@ -1694,6 +2080,7 @@
16942080 struct resource *new_res;
16952081 struct resource **old_res = &hyperv_mmio;
16962082 struct resource **prev_res = NULL;
2083
+ struct resource r;
16972084
16982085 switch (res->type) {
16992086
....@@ -1711,6 +2098,23 @@
17112098 start = res->data.address64.address.minimum;
17122099 end = res->data.address64.address.maximum;
17132100 break;
2101
+
2102
+ /*
2103
+ * The IRQ information is needed only on ARM64, which Hyper-V
2104
+ * sets up in the extended format. IRQ information is present
2105
+ * on x86/x64 in the non-extended format but it is not used by
2106
+ * Linux. So don't bother checking for the non-extended format.
2107
+ */
2108
+ case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
2109
+ if (!acpi_dev_resource_interrupt(res, 0, &r)) {
2110
+ pr_err("Unable to parse Hyper-V ACPI interrupt\n");
2111
+ return AE_ERROR;
2112
+ }
2113
+ /* ARM64 INTID for VMbus */
2114
+ vmbus_interrupt = res->data.extended_irq.interrupts[0];
2115
+ /* Linux IRQ number */
2116
+ vmbus_irq = r.start;
2117
+ return AE_OK;
17142118
17152119 default:
17162120 /* Unused resource type */
....@@ -1848,12 +2252,12 @@
18482252 bool fb_overlap_ok)
18492253 {
18502254 struct resource *iter, *shadow;
1851
- resource_size_t range_min, range_max, start;
2255
+ resource_size_t range_min, range_max, start, end;
18522256 const char *dev_n = dev_name(&device_obj->device);
18532257 int retval;
18542258
18552259 retval = -ENXIO;
1856
- down(&hyperv_mmio_lock);
2260
+ mutex_lock(&hyperv_mmio_lock);
18572261
18582262 /*
18592263 * If overlaps with frame buffers are allowed, then first attempt to
....@@ -1883,6 +2287,14 @@
18832287 range_max = iter->end;
18842288 start = (range_min + align - 1) & ~(align - 1);
18852289 for (; start + size - 1 <= range_max; start += align) {
2290
+ end = start + size - 1;
2291
+
2292
+ /* Skip the whole fb_mmio region if not fb_overlap_ok */
2293
+ if (!fb_overlap_ok && fb_mmio &&
2294
+ (((start >= fb_mmio->start) && (start <= fb_mmio->end)) ||
2295
+ ((end >= fb_mmio->start) && (end <= fb_mmio->end))))
2296
+ continue;
2297
+
18862298 shadow = __request_region(iter, start, size, NULL,
18872299 IORESOURCE_BUSY);
18882300 if (!shadow)
....@@ -1900,7 +2312,7 @@
19002312 }
19012313
19022314 exit:
1903
- up(&hyperv_mmio_lock);
2315
+ mutex_unlock(&hyperv_mmio_lock);
19042316 return retval;
19052317 }
19062318 EXPORT_SYMBOL_GPL(vmbus_allocate_mmio);
....@@ -1917,7 +2329,7 @@
19172329 {
19182330 struct resource *iter;
19192331
1920
- down(&hyperv_mmio_lock);
2332
+ mutex_lock(&hyperv_mmio_lock);
19212333 for (iter = hyperv_mmio; iter; iter = iter->sibling) {
19222334 if ((iter->start >= start + size) || (iter->end <= start))
19232335 continue;
....@@ -1925,7 +2337,7 @@
19252337 __release_region(iter, start, size);
19262338 }
19272339 release_mem_region(start, size);
1928
- up(&hyperv_mmio_lock);
2340
+ mutex_unlock(&hyperv_mmio_lock);
19292341
19302342 }
19312343 EXPORT_SYMBOL_GPL(vmbus_free_mmio);
....@@ -1967,12 +2379,163 @@
19672379 return ret_val;
19682380 }
19692381
2382
+#ifdef CONFIG_PM_SLEEP
2383
+static int vmbus_bus_suspend(struct device *dev)
2384
+{
2385
+ struct vmbus_channel *channel, *sc;
2386
+
2387
+ while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
2388
+ /*
2389
+ * We wait here until the completion of any channel
2390
+ * offers that are currently in progress.
2391
+ */
2392
+ msleep(1);
2393
+ }
2394
+
2395
+ mutex_lock(&vmbus_connection.channel_mutex);
2396
+ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
2397
+ if (!is_hvsock_channel(channel))
2398
+ continue;
2399
+
2400
+ vmbus_force_channel_rescinded(channel);
2401
+ }
2402
+ mutex_unlock(&vmbus_connection.channel_mutex);
2403
+
2404
+ /*
2405
+ * Wait until all the sub-channels and hv_sock channels have been
2406
+ * cleaned up. Sub-channels should be destroyed upon suspend, otherwise
2407
+ * they would conflict with the new sub-channels that will be created
2408
+ * in the resume path. hv_sock channels should also be destroyed, but
2409
+ * a hv_sock channel of an established hv_sock connection can not be
2410
+ * really destroyed since it may still be referenced by the userspace
2411
+ * application, so we just force the hv_sock channel to be rescinded
2412
+ * by vmbus_force_channel_rescinded(), and the userspace application
2413
+ * will thoroughly destroy the channel after hibernation.
2414
+ *
2415
+ * Note: the counter nr_chan_close_on_suspend may never go above 0 if
2416
+ * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM.
2417
+ */
2418
+ if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
2419
+ wait_for_completion(&vmbus_connection.ready_for_suspend_event);
2420
+
2421
+ if (atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0) {
2422
+ pr_err("Can not suspend due to a previous failed resuming\n");
2423
+ return -EBUSY;
2424
+ }
2425
+
2426
+ mutex_lock(&vmbus_connection.channel_mutex);
2427
+
2428
+ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
2429
+ /*
2430
+ * Remove the channel from the array of channels and invalidate
2431
+ * the channel's relid. Upon resume, vmbus_onoffer() will fix
2432
+ * up the relid (and other fields, if necessary) and add the
2433
+ * channel back to the array.
2434
+ */
2435
+ vmbus_channel_unmap_relid(channel);
2436
+ channel->offermsg.child_relid = INVALID_RELID;
2437
+
2438
+ if (is_hvsock_channel(channel)) {
2439
+ if (!channel->rescind) {
2440
+ pr_err("hv_sock channel not rescinded!\n");
2441
+ WARN_ON_ONCE(1);
2442
+ }
2443
+ continue;
2444
+ }
2445
+
2446
+ list_for_each_entry(sc, &channel->sc_list, sc_list) {
2447
+ pr_err("Sub-channel not deleted!\n");
2448
+ WARN_ON_ONCE(1);
2449
+ }
2450
+
2451
+ atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume);
2452
+ }
2453
+
2454
+ mutex_unlock(&vmbus_connection.channel_mutex);
2455
+
2456
+ vmbus_initiate_unload(false);
2457
+
2458
+ /* Reset the event for the next resume. */
2459
+ reinit_completion(&vmbus_connection.ready_for_resume_event);
2460
+
2461
+ return 0;
2462
+}
2463
+
2464
+static int vmbus_bus_resume(struct device *dev)
2465
+{
2466
+ struct vmbus_channel_msginfo *msginfo;
2467
+ size_t msgsize;
2468
+ int ret;
2469
+
2470
+ /*
2471
+ * We only use the 'vmbus_proto_version', which was in use before
2472
+ * hibernation, to re-negotiate with the host.
2473
+ */
2474
+ if (!vmbus_proto_version) {
2475
+ pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version);
2476
+ return -EINVAL;
2477
+ }
2478
+
2479
+ msgsize = sizeof(*msginfo) +
2480
+ sizeof(struct vmbus_channel_initiate_contact);
2481
+
2482
+ msginfo = kzalloc(msgsize, GFP_KERNEL);
2483
+
2484
+ if (msginfo == NULL)
2485
+ return -ENOMEM;
2486
+
2487
+ ret = vmbus_negotiate_version(msginfo, vmbus_proto_version);
2488
+
2489
+ kfree(msginfo);
2490
+
2491
+ if (ret != 0)
2492
+ return ret;
2493
+
2494
+ WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0);
2495
+
2496
+ vmbus_request_offers();
2497
+
2498
+ if (wait_for_completion_timeout(
2499
+ &vmbus_connection.ready_for_resume_event, 10 * HZ) == 0)
2500
+ pr_err("Some vmbus device is missing after suspending?\n");
2501
+
2502
+ /* Reset the event for the next suspend. */
2503
+ reinit_completion(&vmbus_connection.ready_for_suspend_event);
2504
+
2505
+ return 0;
2506
+}
2507
+#else
2508
+#define vmbus_bus_suspend NULL
2509
+#define vmbus_bus_resume NULL
2510
+#endif /* CONFIG_PM_SLEEP */
2511
+
19702512 static const struct acpi_device_id vmbus_acpi_device_ids[] = {
19712513 {"VMBUS", 0},
19722514 {"VMBus", 0},
19732515 {"", 0},
19742516 };
19752517 MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);
2518
+
2519
+/*
2520
+ * Note: we must use the "no_irq" ops, otherwise hibernation can not work with
2521
+ * PCI device assignment, because "pci_dev_pm_ops" uses the "noirq" ops: in
2522
+ * the resume path, the pci "noirq" restore op runs before "non-noirq" op (see
2523
+ * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() ->
2524
+ * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's
2525
+ * resume callback must also run via the "noirq" ops.
2526
+ *
2527
+ * Set suspend_noirq/resume_noirq to NULL for Suspend-to-Idle: see the comment
2528
+ * earlier in this file before vmbus_pm.
2529
+ */
2530
+
2531
+static const struct dev_pm_ops vmbus_bus_pm = {
2532
+ .suspend_noirq = NULL,
2533
+ .resume_noirq = NULL,
2534
+ .freeze_noirq = vmbus_bus_suspend,
2535
+ .thaw_noirq = vmbus_bus_resume,
2536
+ .poweroff_noirq = vmbus_bus_suspend,
2537
+ .restore_noirq = vmbus_bus_resume
2538
+};
19762539
19772540 static struct acpi_driver vmbus_acpi_driver = {
19782541 .name = "vmbus",
....@@ -1981,28 +2544,75 @@
19812544 .add = vmbus_acpi_add,
19822545 .remove = vmbus_acpi_remove,
19832546 },
2547
+ .drv.pm = &vmbus_bus_pm,
19842548 };
19852549
19862550 static void hv_kexec_handler(void)
19872551 {
1988
- hv_synic_clockevents_cleanup();
2552
+ hv_stimer_global_cleanup();
19892553 vmbus_initiate_unload(false);
19902554 /* Make sure conn_state is set as hv_synic_cleanup checks for it */
19912555 mb();
19922556 cpuhp_remove_state(hyperv_cpuhp_online);
1993
- hyperv_cleanup();
19942557 };
19952558
19962559 static void hv_crash_handler(struct pt_regs *regs)
19972560 {
2561
+ int cpu;
2562
+
19982563 vmbus_initiate_unload(true);
19992564 /*
20002565 * In crash handler we can't schedule synic cleanup for all CPUs,
20012566 * doing the cleanup for current CPU only. This should be sufficient
20022567 * for kdump.
20032568 */
2004
- hv_synic_cleanup(smp_processor_id());
2005
- hyperv_cleanup();
2569
+ cpu = smp_processor_id();
2570
+ hv_stimer_cleanup(cpu);
2571
+ hv_synic_disable_regs(cpu);
2572
+};
2573
+
2574
+static int hv_synic_suspend(void)
2575
+{
2576
+ /*
2577
+ * When we reach here, all the non-boot CPUs have been offlined.
2578
+ * If we're in a legacy configuration where stimer Direct Mode is
2579
+ * not enabled, the stimers on the non-boot CPUs have been unbound
2580
+ * in hv_synic_cleanup() -> hv_stimer_legacy_cleanup() ->
2581
+ * hv_stimer_cleanup() -> clockevents_unbind_device().
2582
+ *
2583
+ * hv_synic_suspend() only runs on CPU0 with interrupts disabled.
2584
+ * Here we do not call hv_stimer_legacy_cleanup() on CPU0 because:
2585
+ * 1) it's unnecessary as interrupts remain disabled between
2586
+ * syscore_suspend() and syscore_resume(): see create_image() and
2587
+ * resume_target_kernel()
2588
+ * 2) the stimer on CPU0 is automatically disabled later by
2589
+ * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ...
2590
+ * -> clockevents_shutdown() -> ... -> hv_ce_shutdown()
2591
+ * 3) a warning would be triggered if we call
2592
+ * clockevents_unbind_device(), which may sleep, in an
2593
+ * interrupts-disabled context.
2594
+ */
2595
+
2596
+ hv_synic_disable_regs(0);
2597
+
2598
+ return 0;
2599
+}
2600
+
2601
+static void hv_synic_resume(void)
2602
+{
2603
+ hv_synic_enable_regs(0);
2604
+
2605
+ /*
2606
+ * Note: we don't need to call hv_stimer_init(0), because the timer
2607
+ * on CPU0 is not unbound in hv_synic_suspend(), and the timer is
2608
+ * automatically re-enabled in timekeeping_resume().
2609
+ */
2610
+}
2611
+
2612
+/* The callbacks run only on CPU0, with irqs_disabled. */
2613
+static struct syscore_ops hv_synic_syscore_ops = {
2614
+ .suspend = hv_synic_suspend,
2615
+ .resume = hv_synic_resume,
20062616 };
20072617
20082618 static int __init hv_acpi_init(void)
....@@ -2027,6 +2637,7 @@
20272637 ret = -ETIMEDOUT;
20282638 goto cleanup;
20292639 }
2640
+ hv_debug_init();
20302641
20312642 ret = vmbus_bus_init();
20322643 if (ret)
....@@ -2034,6 +2645,8 @@
20342645
20352646 hv_setup_kexec_handler(hv_kexec_handler);
20362647 hv_setup_crash_handler(hv_crash_handler);
2648
+
2649
+ register_syscore_ops(&hv_synic_syscore_ops);
20372650
20382651 return 0;
20392652
....@@ -2047,10 +2660,12 @@
20472660 {
20482661 int cpu;
20492662
2663
+ unregister_syscore_ops(&hv_synic_syscore_ops);
2664
+
20502665 hv_remove_kexec_handler();
20512666 hv_remove_crash_handler();
20522667 vmbus_connection.conn_state = DISCONNECTED;
2053
- hv_synic_clockevents_cleanup();
2668
+ hv_stimer_global_cleanup();
20542669 vmbus_disconnect();
20552670 hv_remove_vmbus_irq();
20562671 for_each_online_cpu(cpu) {
....@@ -2059,14 +2674,22 @@
20592674
20602675 tasklet_kill(&hv_cpu->msg_dpc);
20612676 }
2677
+ hv_debug_rm_all_dir();
2678
+
20622679 vmbus_free_channels();
2680
+ kfree(vmbus_connection.channels);
20632681
20642682 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
20652683 kmsg_dump_unregister(&hv_kmsg_dumper);
20662684 unregister_die_notifier(&hyperv_die_block);
2067
- atomic_notifier_chain_unregister(&panic_notifier_list,
2068
- &hyperv_panic_block);
20692685 }
2686
+
2687
+ /*
2688
+ * The panic notifier is always registered, hence we should
2689
+ * also unconditionally unregister it here as well.
2690
+ */
2691
+ atomic_notifier_chain_unregister(&panic_notifier_list,
2692
+ &hyperv_panic_block);
20702693
20712694 free_page((unsigned long)hv_panic_page);
20722695 unregister_sysctl_table(hv_ctl_table_hdr);
....@@ -2080,6 +2703,7 @@
20802703
20812704
20822705 MODULE_LICENSE("GPL");
2706
+MODULE_DESCRIPTION("Microsoft Hyper-V VMBus Driver");
20832707
20842708 subsys_initcall(hv_acpi_init);
20852709 module_exit(vmbus_exit);