hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/pci/controller/pci-hyperv.c
....@@ -520,19 +520,10 @@
520520 struct hv_pcidev_description func[];
521521 };
522522
523
-enum hv_pcichild_state {
524
- hv_pcichild_init = 0,
525
- hv_pcichild_requirements,
526
- hv_pcichild_resourced,
527
- hv_pcichild_ejecting,
528
- hv_pcichild_maximum
529
-};
530
-
531523 struct hv_pci_dev {
532524 /* List protected by pci_rescan_remove_lock */
533525 struct list_head list_entry;
534526 refcount_t refs;
535
- enum hv_pcichild_state state;
536527 struct pci_slot *pci_slot;
537528 struct hv_pcidev_description desc;
538529 bool reported_missing;
....@@ -1237,6 +1228,11 @@
12371228 pbus = pdev->bus;
12381229 hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
12391230 int_desc = data->chip_data;
1231
+ if (!int_desc) {
1232
+ dev_warn(&hbus->hdev->device, "%s() can not unmask irq %u\n",
1233
+ __func__, data->irq);
1234
+ return;
1235
+ }
12401236
12411237 spin_lock_irqsave(&hbus->retarget_msi_interrupt_lock, flags);
12421238
....@@ -1522,7 +1518,7 @@
15221518 * Prevents hv_pci_onchannelcallback() from running concurrently
15231519 * in the tasklet.
15241520 */
1525
- tasklet_disable_in_atomic(&channel->callback_event);
1521
+ tasklet_disable(&channel->callback_event);
15261522
15271523 /*
15281524 * Since this function is called with IRQ locks held, can't
....@@ -1552,12 +1548,6 @@
15521548 }
15531549 hv_pci_onchannelcallback(hbus);
15541550 spin_unlock_irqrestore(&channel->sched_lock, flags);
1555
-
1556
- if (hpdev->state == hv_pcichild_ejecting) {
1557
- dev_err_once(&hbus->hdev->device,
1558
- "the device is being ejected\n");
1559
- goto enable_tasklet;
1560
- }
15611551
15621552 udelay(100);
15631553 }
....@@ -2378,8 +2368,6 @@
23782368 hpdev = container_of(work, struct hv_pci_dev, wrk);
23792369 hbus = hpdev->hbus;
23802370
2381
- WARN_ON(hpdev->state != hv_pcichild_ejecting);
2382
-
23832371 /*
23842372 * Ejection can come before or after the PCI bus has been set up, so
23852373 * attempt to find it and tear down the bus state, if it exists. This
....@@ -2438,7 +2426,6 @@
24382426 return;
24392427 }
24402428
2441
- hpdev->state = hv_pcichild_ejecting;
24422429 get_pcichild(hpdev);
24432430 INIT_WORK(&hpdev->wrk, hv_eject_device_work);
24442431 get_hvpcibus(hbus);
....@@ -2842,8 +2829,10 @@
28422829 struct pci_bus_d0_entry *d0_entry;
28432830 struct hv_pci_compl comp_pkt;
28442831 struct pci_packet *pkt;
2832
+ bool retry = true;
28452833 int ret;
28462834
2835
+enter_d0_retry:
28472836 /*
28482837 * Tell the host that the bus is ready to use, and moved into the
28492838 * powered-on state. This includes telling the host which region
....@@ -2869,6 +2858,38 @@
28692858
28702859 if (ret)
28712860 goto exit;
2861
+
2862
+ /*
2863
+ * In certain case (Kdump) the pci device of interest was
2864
+ * not cleanly shut down and resource is still held on host
2865
+ * side, the host could return invalid device status.
2866
+ * We need to explicitly request host to release the resource
2867
+ * and try to enter D0 again.
2868
+ */
2869
+ if (comp_pkt.completion_status < 0 && retry) {
2870
+ retry = false;
2871
+
2872
+ dev_err(&hdev->device, "Retrying D0 Entry\n");
2873
+
2874
+ /*
2875
+ * Hv_pci_bus_exit() calls hv_send_resource_released()
2876
+ * to free up resources of its child devices.
2877
+ * In the kdump kernel we need to set the
2878
+ * wslot_res_allocated to 255 so it scans all child
2879
+ * devices to release resources allocated in the
2880
+ * normal kernel before panic happened.
2881
+ */
2882
+ hbus->wslot_res_allocated = 255;
2883
+
2884
+ ret = hv_pci_bus_exit(hdev, true);
2885
+
2886
+ if (ret == 0) {
2887
+ kfree(pkt);
2888
+ goto enter_d0_retry;
2889
+ }
2890
+ dev_err(&hdev->device,
2891
+ "Retrying D0 failed with ret %d\n", ret);
2892
+ }
28722893
28732894 if (comp_pkt.completion_status < 0) {
28742895 dev_err(&hdev->device,
....@@ -2911,6 +2932,24 @@
29112932 0, VM_PKT_DATA_INBAND, 0);
29122933 if (!ret)
29132934 ret = wait_for_response(hdev, &comp);
2935
+
2936
+ /*
2937
+ * In the case of fast device addition/removal, it's possible that
2938
+ * vmbus_sendpacket() or wait_for_response() returns -ENODEV but we
2939
+ * already got a PCI_BUS_RELATIONS* message from the host and the
2940
+ * channel callback already scheduled a work to hbus->wq, which can be
2941
+ * running pci_devices_present_work() -> survey_child_resources() ->
2942
+ * complete(&hbus->survey_event), even after hv_pci_query_relations()
2943
+ * exits and the stack variable 'comp' is no longer valid; as a result,
2944
+ * a hang or a page fault may happen when the complete() calls
2945
+ * raw_spin_lock_irqsave(). Flush hbus->wq before we exit from
2946
+ * hv_pci_query_relations() to avoid the issues. Note: if 'ret' is
2947
+ * -ENODEV, there can't be any more work item scheduled to hbus->wq
2948
+ * after the flush_workqueue(): see vmbus_onoffer_rescind() ->
2949
+ * vmbus_reset_channel_cb(), vmbus_rescind_cleanup() ->
2950
+ * channel->rescind = true.
2951
+ */
2952
+ flush_workqueue(hbus->wq);
29142953
29152954 return ret;
29162955 }
....@@ -3107,7 +3146,6 @@
31073146 struct hv_pcibus_device *hbus;
31083147 u16 dom_req, dom;
31093148 char *name;
3110
- bool enter_d0_retry = true;
31113149 int ret;
31123150
31133151 /*
....@@ -3228,47 +3266,11 @@
32283266 if (ret)
32293267 goto free_fwnode;
32303268
3231
-retry:
32323269 ret = hv_pci_query_relations(hdev);
32333270 if (ret)
32343271 goto free_irq_domain;
32353272
32363273 ret = hv_pci_enter_d0(hdev);
3237
- /*
3238
- * In certain case (Kdump) the pci device of interest was
3239
- * not cleanly shut down and resource is still held on host
3240
- * side, the host could return invalid device status.
3241
- * We need to explicitly request host to release the resource
3242
- * and try to enter D0 again.
3243
- * Since the hv_pci_bus_exit() call releases structures
3244
- * of all its child devices, we need to start the retry from
3245
- * hv_pci_query_relations() call, requesting host to send
3246
- * the synchronous child device relations message before this
3247
- * information is needed in hv_send_resources_allocated()
3248
- * call later.
3249
- */
3250
- if (ret == -EPROTO && enter_d0_retry) {
3251
- enter_d0_retry = false;
3252
-
3253
- dev_err(&hdev->device, "Retrying D0 Entry\n");
3254
-
3255
- /*
3256
- * Hv_pci_bus_exit() calls hv_send_resources_released()
3257
- * to free up resources of its child devices.
3258
- * In the kdump kernel we need to set the
3259
- * wslot_res_allocated to 255 so it scans all child
3260
- * devices to release resources allocated in the
3261
- * normal kernel before panic happened.
3262
- */
3263
- hbus->wslot_res_allocated = 255;
3264
- ret = hv_pci_bus_exit(hdev, true);
3265
-
3266
- if (ret == 0)
3267
- goto retry;
3268
-
3269
- dev_err(&hdev->device,
3270
- "Retrying D0 failed with ret %d\n", ret);
3271
- }
32723274 if (ret)
32733275 goto free_irq_domain;
32743276