.. | .. |
---|
520 | 520 | struct hv_pcidev_description func[]; |
---|
521 | 521 | }; |
---|
522 | 522 | |
---|
523 | | -enum hv_pcichild_state { |
---|
524 | | - hv_pcichild_init = 0, |
---|
525 | | - hv_pcichild_requirements, |
---|
526 | | - hv_pcichild_resourced, |
---|
527 | | - hv_pcichild_ejecting, |
---|
528 | | - hv_pcichild_maximum |
---|
529 | | -}; |
---|
530 | | - |
---|
531 | 523 | struct hv_pci_dev { |
---|
532 | 524 | /* List protected by pci_rescan_remove_lock */ |
---|
533 | 525 | struct list_head list_entry; |
---|
534 | 526 | refcount_t refs; |
---|
535 | | - enum hv_pcichild_state state; |
---|
536 | 527 | struct pci_slot *pci_slot; |
---|
537 | 528 | struct hv_pcidev_description desc; |
---|
538 | 529 | bool reported_missing; |
---|
.. | .. |
---|
1237 | 1228 | pbus = pdev->bus; |
---|
1238 | 1229 | hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata); |
---|
1239 | 1230 | int_desc = data->chip_data; |
---|
| 1231 | + if (!int_desc) { |
---|
| 1232 | + dev_warn(&hbus->hdev->device, "%s() can not unmask irq %u\n", |
---|
| 1233 | + __func__, data->irq); |
---|
| 1234 | + return; |
---|
| 1235 | + } |
---|
1240 | 1236 | |
---|
1241 | 1237 | spin_lock_irqsave(&hbus->retarget_msi_interrupt_lock, flags); |
---|
1242 | 1238 | |
---|
.. | .. |
---|
1552 | 1548 | } |
---|
1553 | 1549 | hv_pci_onchannelcallback(hbus); |
---|
1554 | 1550 | spin_unlock_irqrestore(&channel->sched_lock, flags); |
---|
1555 | | - |
---|
1556 | | - if (hpdev->state == hv_pcichild_ejecting) { |
---|
1557 | | - dev_err_once(&hbus->hdev->device, |
---|
1558 | | - "the device is being ejected\n"); |
---|
1559 | | - goto enable_tasklet; |
---|
1560 | | - } |
---|
1561 | 1551 | |
---|
1562 | 1552 | udelay(100); |
---|
1563 | 1553 | } |
---|
.. | .. |
---|
2378 | 2368 | hpdev = container_of(work, struct hv_pci_dev, wrk); |
---|
2379 | 2369 | hbus = hpdev->hbus; |
---|
2380 | 2370 | |
---|
2381 | | - WARN_ON(hpdev->state != hv_pcichild_ejecting); |
---|
2382 | | - |
---|
2383 | 2371 | /* |
---|
2384 | 2372 | * Ejection can come before or after the PCI bus has been set up, so |
---|
2385 | 2373 | * attempt to find it and tear down the bus state, if it exists. This |
---|
.. | .. |
---|
2438 | 2426 | return; |
---|
2439 | 2427 | } |
---|
2440 | 2428 | |
---|
2441 | | - hpdev->state = hv_pcichild_ejecting; |
---|
2442 | 2429 | get_pcichild(hpdev); |
---|
2443 | 2430 | INIT_WORK(&hpdev->wrk, hv_eject_device_work); |
---|
2444 | 2431 | get_hvpcibus(hbus); |
---|
.. | .. |
---|
2842 | 2829 | struct pci_bus_d0_entry *d0_entry; |
---|
2843 | 2830 | struct hv_pci_compl comp_pkt; |
---|
2844 | 2831 | struct pci_packet *pkt; |
---|
| 2832 | + bool retry = true; |
---|
2845 | 2833 | int ret; |
---|
2846 | 2834 | |
---|
| 2835 | +enter_d0_retry: |
---|
2847 | 2836 | /* |
---|
2848 | 2837 | * Tell the host that the bus is ready to use, and moved into the |
---|
2849 | 2838 | * powered-on state. This includes telling the host which region |
---|
.. | .. |
---|
2869 | 2858 | |
---|
2870 | 2859 | if (ret) |
---|
2871 | 2860 | goto exit; |
---|
| 2861 | + |
---|
| 2862 | + /* |
---|
| 2863 | + * In certain case (Kdump) the pci device of interest was |
---|
| 2864 | + * not cleanly shut down and resource is still held on host |
---|
| 2865 | + * side, the host could return invalid device status. |
---|
| 2866 | + * We need to explicitly request host to release the resource |
---|
| 2867 | + * and try to enter D0 again. |
---|
| 2868 | + */ |
---|
| 2869 | + if (comp_pkt.completion_status < 0 && retry) { |
---|
| 2870 | + retry = false; |
---|
| 2871 | + |
---|
| 2872 | + dev_err(&hdev->device, "Retrying D0 Entry\n"); |
---|
| 2873 | + |
---|
| 2874 | + /* |
---|
| 2875 | + * Hv_pci_bus_exit() calls hv_send_resource_released() |
---|
| 2876 | + * to free up resources of its child devices. |
---|
| 2877 | + * In the kdump kernel we need to set the |
---|
| 2878 | + * wslot_res_allocated to 255 so it scans all child |
---|
| 2879 | + * devices to release resources allocated in the |
---|
| 2880 | + * normal kernel before panic happened. |
---|
| 2881 | + */ |
---|
| 2882 | + hbus->wslot_res_allocated = 255; |
---|
| 2883 | + |
---|
| 2884 | + ret = hv_pci_bus_exit(hdev, true); |
---|
| 2885 | + |
---|
| 2886 | + if (ret == 0) { |
---|
| 2887 | + kfree(pkt); |
---|
| 2888 | + goto enter_d0_retry; |
---|
| 2889 | + } |
---|
| 2890 | + dev_err(&hdev->device, |
---|
| 2891 | + "Retrying D0 failed with ret %d\n", ret); |
---|
| 2892 | + } |
---|
2872 | 2893 | |
---|
2873 | 2894 | if (comp_pkt.completion_status < 0) { |
---|
2874 | 2895 | dev_err(&hdev->device, |
---|
.. | .. |
---|
2911 | 2932 | 0, VM_PKT_DATA_INBAND, 0); |
---|
2912 | 2933 | if (!ret) |
---|
2913 | 2934 | ret = wait_for_response(hdev, &comp); |
---|
| 2935 | + |
---|
| 2936 | + /* |
---|
| 2937 | + * In the case of fast device addition/removal, it's possible that |
---|
| 2938 | + * vmbus_sendpacket() or wait_for_response() returns -ENODEV but we |
---|
| 2939 | + * already got a PCI_BUS_RELATIONS* message from the host and the |
---|
| 2940 | + * channel callback already scheduled a work to hbus->wq, which can be |
---|
| 2941 | + * running pci_devices_present_work() -> survey_child_resources() -> |
---|
| 2942 | + * complete(&hbus->survey_event), even after hv_pci_query_relations() |
---|
| 2943 | + * exits and the stack variable 'comp' is no longer valid; as a result, |
---|
| 2944 | + * a hang or a page fault may happen when the complete() calls |
---|
| 2945 | + * raw_spin_lock_irqsave(). Flush hbus->wq before we exit from |
---|
| 2946 | + * hv_pci_query_relations() to avoid the issues. Note: if 'ret' is |
---|
| 2947 | + * -ENODEV, there can't be any more work item scheduled to hbus->wq |
---|
| 2948 | + * after the flush_workqueue(): see vmbus_onoffer_rescind() -> |
---|
| 2949 | + * vmbus_reset_channel_cb(), vmbus_rescind_cleanup() -> |
---|
| 2950 | + * channel->rescind = true. |
---|
| 2951 | + */ |
---|
| 2952 | + flush_workqueue(hbus->wq); |
---|
2914 | 2953 | |
---|
2915 | 2954 | return ret; |
---|
2916 | 2955 | } |
---|
.. | .. |
---|
3107 | 3146 | struct hv_pcibus_device *hbus; |
---|
3108 | 3147 | u16 dom_req, dom; |
---|
3109 | 3148 | char *name; |
---|
3110 | | - bool enter_d0_retry = true; |
---|
3111 | 3149 | int ret; |
---|
3112 | 3150 | |
---|
3113 | 3151 | /* |
---|
.. | .. |
---|
3228 | 3266 | if (ret) |
---|
3229 | 3267 | goto free_fwnode; |
---|
3230 | 3268 | |
---|
3231 | | -retry: |
---|
3232 | 3269 | ret = hv_pci_query_relations(hdev); |
---|
3233 | 3270 | if (ret) |
---|
3234 | 3271 | goto free_irq_domain; |
---|
3235 | 3272 | |
---|
3236 | 3273 | ret = hv_pci_enter_d0(hdev); |
---|
3237 | | - /* |
---|
3238 | | - * In certain case (Kdump) the pci device of interest was |
---|
3239 | | - * not cleanly shut down and resource is still held on host |
---|
3240 | | - * side, the host could return invalid device status. |
---|
3241 | | - * We need to explicitly request host to release the resource |
---|
3242 | | - * and try to enter D0 again. |
---|
3243 | | - * Since the hv_pci_bus_exit() call releases structures |
---|
3244 | | - * of all its child devices, we need to start the retry from |
---|
3245 | | - * hv_pci_query_relations() call, requesting host to send |
---|
3246 | | - * the synchronous child device relations message before this |
---|
3247 | | - * information is needed in hv_send_resources_allocated() |
---|
3248 | | - * call later. |
---|
3249 | | - */ |
---|
3250 | | - if (ret == -EPROTO && enter_d0_retry) { |
---|
3251 | | - enter_d0_retry = false; |
---|
3252 | | - |
---|
3253 | | - dev_err(&hdev->device, "Retrying D0 Entry\n"); |
---|
3254 | | - |
---|
3255 | | - /* |
---|
3256 | | - * Hv_pci_bus_exit() calls hv_send_resources_released() |
---|
3257 | | - * to free up resources of its child devices. |
---|
3258 | | - * In the kdump kernel we need to set the |
---|
3259 | | - * wslot_res_allocated to 255 so it scans all child |
---|
3260 | | - * devices to release resources allocated in the |
---|
3261 | | - * normal kernel before panic happened. |
---|
3262 | | - */ |
---|
3263 | | - hbus->wslot_res_allocated = 255; |
---|
3264 | | - ret = hv_pci_bus_exit(hdev, true); |
---|
3265 | | - |
---|
3266 | | - if (ret == 0) |
---|
3267 | | - goto retry; |
---|
3268 | | - |
---|
3269 | | - dev_err(&hdev->device, |
---|
3270 | | - "Retrying D0 failed with ret %d\n", ret); |
---|
3271 | | - } |
---|
3272 | 3274 | if (ret) |
---|
3273 | 3275 | goto free_irq_domain; |
---|
3274 | 3276 | |
---|