.. | .. |
---|
10 | 10 | * Zhang Yanmin (yanmin.zhang@intel.com) |
---|
11 | 11 | */ |
---|
12 | 12 | |
---|
| 13 | +#define dev_fmt(fmt) "AER: " fmt |
---|
| 14 | + |
---|
13 | 15 | #include <linux/pci.h> |
---|
14 | 16 | #include <linux/module.h> |
---|
15 | | -#include <linux/pci.h> |
---|
16 | 17 | #include <linux/kernel.h> |
---|
17 | 18 | #include <linux/errno.h> |
---|
18 | 19 | #include <linux/aer.h> |
---|
19 | 20 | #include "portdrv.h" |
---|
20 | 21 | #include "../pci.h" |
---|
21 | | - |
---|
22 | | -struct aer_broadcast_data { |
---|
23 | | - enum pci_channel_state state; |
---|
24 | | - enum pci_ers_result result; |
---|
25 | | -}; |
---|
26 | 22 | |
---|
27 | 23 | static pci_ers_result_t merge_result(enum pci_ers_result orig, |
---|
28 | 24 | enum pci_ers_result new) |
---|
.. | .. |
---|
49 | 45 | return orig; |
---|
50 | 46 | } |
---|
51 | 47 | |
---|
52 | | -static int report_error_detected(struct pci_dev *dev, void *data) |
---|
| 48 | +static int report_error_detected(struct pci_dev *dev, |
---|
| 49 | + pci_channel_state_t state, |
---|
| 50 | + enum pci_ers_result *result) |
---|
53 | 51 | { |
---|
54 | 52 | pci_ers_result_t vote; |
---|
55 | 53 | const struct pci_error_handlers *err_handler; |
---|
56 | | - struct aer_broadcast_data *result_data; |
---|
57 | | - |
---|
58 | | - result_data = (struct aer_broadcast_data *) data; |
---|
59 | 54 | |
---|
60 | 55 | device_lock(&dev->dev); |
---|
61 | | - dev->error_state = result_data->state; |
---|
62 | | - |
---|
63 | | - if (!dev->driver || |
---|
| 56 | + if (!pci_dev_set_io_state(dev, state) || |
---|
| 57 | + !dev->driver || |
---|
64 | 58 | !dev->driver->err_handler || |
---|
65 | 59 | !dev->driver->err_handler->error_detected) { |
---|
66 | 60 | /* |
---|
.. | .. |
---|
69 | 63 | * error callbacks of "any" device in the subtree, and will |
---|
70 | 64 | * exit in the disconnected error state. |
---|
71 | 65 | */ |
---|
72 | | - if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) |
---|
| 66 | + if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { |
---|
73 | 67 | vote = PCI_ERS_RESULT_NO_AER_DRIVER; |
---|
74 | | - else |
---|
| 68 | + pci_info(dev, "can't recover (no error_detected callback)\n"); |
---|
| 69 | + } else { |
---|
75 | 70 | vote = PCI_ERS_RESULT_NONE; |
---|
| 71 | + } |
---|
76 | 72 | } else { |
---|
77 | 73 | err_handler = dev->driver->err_handler; |
---|
78 | | - vote = err_handler->error_detected(dev, result_data->state); |
---|
79 | | - pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); |
---|
| 74 | + vote = err_handler->error_detected(dev, state); |
---|
80 | 75 | } |
---|
81 | | - |
---|
82 | | - result_data->result = merge_result(result_data->result, vote); |
---|
| 76 | + pci_uevent_ers(dev, vote); |
---|
| 77 | + *result = merge_result(*result, vote); |
---|
83 | 78 | device_unlock(&dev->dev); |
---|
84 | 79 | return 0; |
---|
85 | 80 | } |
---|
86 | 81 | |
---|
| 82 | +static int report_frozen_detected(struct pci_dev *dev, void *data) |
---|
| 83 | +{ |
---|
| 84 | + return report_error_detected(dev, pci_channel_io_frozen, data); |
---|
| 85 | +} |
---|
| 86 | + |
---|
| 87 | +static int report_normal_detected(struct pci_dev *dev, void *data) |
---|
| 88 | +{ |
---|
| 89 | + return report_error_detected(dev, pci_channel_io_normal, data); |
---|
| 90 | +} |
---|
| 91 | + |
---|
87 | 92 | static int report_mmio_enabled(struct pci_dev *dev, void *data) |
---|
88 | 93 | { |
---|
89 | | - pci_ers_result_t vote; |
---|
| 94 | + pci_ers_result_t vote, *result = data; |
---|
90 | 95 | const struct pci_error_handlers *err_handler; |
---|
91 | | - struct aer_broadcast_data *result_data; |
---|
92 | | - |
---|
93 | | - result_data = (struct aer_broadcast_data *) data; |
---|
94 | 96 | |
---|
95 | 97 | device_lock(&dev->dev); |
---|
96 | 98 | if (!dev->driver || |
---|
.. | .. |
---|
100 | 102 | |
---|
101 | 103 | err_handler = dev->driver->err_handler; |
---|
102 | 104 | vote = err_handler->mmio_enabled(dev); |
---|
103 | | - result_data->result = merge_result(result_data->result, vote); |
---|
| 105 | + *result = merge_result(*result, vote); |
---|
104 | 106 | out: |
---|
105 | 107 | device_unlock(&dev->dev); |
---|
106 | 108 | return 0; |
---|
.. | .. |
---|
108 | 110 | |
---|
109 | 111 | static int report_slot_reset(struct pci_dev *dev, void *data) |
---|
110 | 112 | { |
---|
111 | | - pci_ers_result_t vote; |
---|
| 113 | + pci_ers_result_t vote, *result = data; |
---|
112 | 114 | const struct pci_error_handlers *err_handler; |
---|
113 | | - struct aer_broadcast_data *result_data; |
---|
114 | | - |
---|
115 | | - result_data = (struct aer_broadcast_data *) data; |
---|
116 | 115 | |
---|
117 | 116 | device_lock(&dev->dev); |
---|
118 | 117 | if (!dev->driver || |
---|
.. | .. |
---|
122 | 121 | |
---|
123 | 122 | err_handler = dev->driver->err_handler; |
---|
124 | 123 | vote = err_handler->slot_reset(dev); |
---|
125 | | - result_data->result = merge_result(result_data->result, vote); |
---|
| 124 | + *result = merge_result(*result, vote); |
---|
126 | 125 | out: |
---|
127 | 126 | device_unlock(&dev->dev); |
---|
128 | 127 | return 0; |
---|
.. | .. |
---|
133 | 132 | const struct pci_error_handlers *err_handler; |
---|
134 | 133 | |
---|
135 | 134 | device_lock(&dev->dev); |
---|
136 | | - dev->error_state = pci_channel_io_normal; |
---|
137 | | - |
---|
138 | | - if (!dev->driver || |
---|
| 135 | + if (!pci_dev_set_io_state(dev, pci_channel_io_normal) || |
---|
| 136 | + !dev->driver || |
---|
139 | 137 | !dev->driver->err_handler || |
---|
140 | 138 | !dev->driver->err_handler->resume) |
---|
141 | 139 | goto out; |
---|
142 | 140 | |
---|
143 | 141 | err_handler = dev->driver->err_handler; |
---|
144 | 142 | err_handler->resume(dev); |
---|
145 | | - pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); |
---|
146 | 143 | out: |
---|
| 144 | + pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); |
---|
147 | 145 | device_unlock(&dev->dev); |
---|
148 | 146 | return 0; |
---|
149 | 147 | } |
---|
150 | 148 | |
---|
151 | 149 | /** |
---|
152 | | - * default_reset_link - default reset function |
---|
153 | | - * @dev: pointer to pci_dev data structure |
---|
| 150 | + * pci_walk_bridge - walk bridges potentially AER affected |
---|
| 151 | + * @bridge: bridge which may be a Port or an RCEC |
---|
| 152 | + * @cb: callback to be called for each device found |
---|
| 153 | + * @userdata: arbitrary pointer to be passed to callback |
---|
154 | 154 | * |
---|
155 | | - * Invoked when performing link reset on a Downstream Port or a |
---|
156 | | - * Root Port with no aer driver. |
---|
157 | | - */ |
---|
158 | | -static pci_ers_result_t default_reset_link(struct pci_dev *dev) |
---|
159 | | -{ |
---|
160 | | - int rc; |
---|
161 | | - |
---|
162 | | - rc = pci_bus_error_reset(dev); |
---|
163 | | - pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n"); |
---|
164 | | - return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; |
---|
165 | | -} |
---|
166 | | - |
---|
167 | | -static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) |
---|
168 | | -{ |
---|
169 | | - pci_ers_result_t status; |
---|
170 | | - struct pcie_port_service_driver *driver = NULL; |
---|
171 | | - |
---|
172 | | - driver = pcie_port_find_service(dev, service); |
---|
173 | | - if (driver && driver->reset_link) { |
---|
174 | | - status = driver->reset_link(dev); |
---|
175 | | - } else if (dev->has_secondary_link) { |
---|
176 | | - status = default_reset_link(dev); |
---|
177 | | - } else { |
---|
178 | | - pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n", |
---|
179 | | - pci_name(dev)); |
---|
180 | | - return PCI_ERS_RESULT_DISCONNECT; |
---|
181 | | - } |
---|
182 | | - |
---|
183 | | - if (status != PCI_ERS_RESULT_RECOVERED) { |
---|
184 | | - pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n", |
---|
185 | | - pci_name(dev)); |
---|
186 | | - return PCI_ERS_RESULT_DISCONNECT; |
---|
187 | | - } |
---|
188 | | - |
---|
189 | | - return status; |
---|
190 | | -} |
---|
191 | | - |
---|
192 | | -/** |
---|
193 | | - * broadcast_error_message - handle message broadcast to downstream drivers |
---|
194 | | - * @dev: pointer to from where in a hierarchy message is broadcasted down |
---|
195 | | - * @state: error state |
---|
196 | | - * @error_mesg: message to print |
---|
197 | | - * @cb: callback to be broadcasted |
---|
| 155 | + * If the device provided is a bridge, walk the subordinate bus, including |
---|
| 156 | + * any bridged devices on buses under this bus. Call the provided callback |
---|
| 157 | + * on each device found. |
---|
198 | 158 | * |
---|
199 | | - * Invoked during error recovery process. Once being invoked, the content |
---|
200 | | - * of error severity will be broadcasted to all downstream drivers in a |
---|
201 | | - * hierarchy in question. |
---|
| 159 | + * If the device provided has no subordinate bus, e.g., an RCEC, call the |
---|
| 160 | + * callback on the device itself. |
---|
202 | 161 | */ |
---|
203 | | -static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, |
---|
204 | | - enum pci_channel_state state, |
---|
205 | | - char *error_mesg, |
---|
206 | | - int (*cb)(struct pci_dev *, void *)) |
---|
| 162 | +static void pci_walk_bridge(struct pci_dev *bridge, |
---|
| 163 | + int (*cb)(struct pci_dev *, void *), |
---|
| 164 | + void *userdata) |
---|
207 | 165 | { |
---|
208 | | - struct aer_broadcast_data result_data; |
---|
209 | | - |
---|
210 | | - pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg); |
---|
211 | | - result_data.state = state; |
---|
212 | | - if (cb == report_error_detected) |
---|
213 | | - result_data.result = PCI_ERS_RESULT_CAN_RECOVER; |
---|
| 166 | + if (bridge->subordinate) |
---|
| 167 | + pci_walk_bus(bridge->subordinate, cb, userdata); |
---|
214 | 168 | else |
---|
215 | | - result_data.result = PCI_ERS_RESULT_RECOVERED; |
---|
216 | | - |
---|
217 | | - pci_walk_bus(dev->subordinate, cb, &result_data); |
---|
218 | | - return result_data.result; |
---|
| 169 | + cb(bridge, userdata); |
---|
219 | 170 | } |
---|
220 | 171 | |
---|
221 | | -/** |
---|
222 | | - * pcie_do_fatal_recovery - handle fatal error recovery process |
---|
223 | | - * @dev: pointer to a pci_dev data structure of agent detecting an error |
---|
224 | | - * |
---|
225 | | - * Invoked when an error is fatal. Once being invoked, removes the devices |
---|
226 | | - * beneath this AER agent, followed by reset link e.g. secondary bus reset |
---|
227 | | - * followed by re-enumeration of devices. |
---|
228 | | - */ |
---|
229 | | -void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service) |
---|
| 172 | +pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, |
---|
| 173 | + pci_channel_state_t state, |
---|
| 174 | + pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev)) |
---|
230 | 175 | { |
---|
231 | | - struct pci_dev *udev; |
---|
232 | | - struct pci_bus *parent; |
---|
233 | | - struct pci_dev *pdev, *temp; |
---|
234 | | - pci_ers_result_t result; |
---|
235 | | - |
---|
236 | | - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) |
---|
237 | | - udev = dev; |
---|
238 | | - else |
---|
239 | | - udev = dev->bus->self; |
---|
240 | | - |
---|
241 | | - parent = udev->subordinate; |
---|
242 | | - pci_lock_rescan_remove(); |
---|
243 | | - pci_dev_get(dev); |
---|
244 | | - list_for_each_entry_safe_reverse(pdev, temp, &parent->devices, |
---|
245 | | - bus_list) { |
---|
246 | | - pci_dev_get(pdev); |
---|
247 | | - pci_dev_set_disconnected(pdev, NULL); |
---|
248 | | - if (pci_has_subordinate(pdev)) |
---|
249 | | - pci_walk_bus(pdev->subordinate, |
---|
250 | | - pci_dev_set_disconnected, NULL); |
---|
251 | | - pci_stop_and_remove_bus_device(pdev); |
---|
252 | | - pci_dev_put(pdev); |
---|
253 | | - } |
---|
254 | | - |
---|
255 | | - result = reset_link(udev, service); |
---|
256 | | - |
---|
257 | | - if ((service == PCIE_PORT_SERVICE_AER) && |
---|
258 | | - (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) { |
---|
259 | | - /* |
---|
260 | | - * If the error is reported by a bridge, we think this error |
---|
261 | | - * is related to the downstream link of the bridge, so we |
---|
262 | | - * do error recovery on all subordinates of the bridge instead |
---|
263 | | - * of the bridge and clear the error status of the bridge. |
---|
264 | | - */ |
---|
265 | | - pci_aer_clear_fatal_status(dev); |
---|
266 | | - pci_aer_clear_device_status(dev); |
---|
267 | | - } |
---|
268 | | - |
---|
269 | | - if (result == PCI_ERS_RESULT_RECOVERED) { |
---|
270 | | - if (pcie_wait_for_link(udev, true)) |
---|
271 | | - pci_rescan_bus(udev->bus); |
---|
272 | | - pci_info(dev, "Device recovery from fatal error successful\n"); |
---|
273 | | - } else { |
---|
274 | | - pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); |
---|
275 | | - pci_info(dev, "Device recovery from fatal error failed\n"); |
---|
276 | | - } |
---|
277 | | - |
---|
278 | | - pci_dev_put(dev); |
---|
279 | | - pci_unlock_rescan_remove(); |
---|
280 | | -} |
---|
281 | | - |
---|
282 | | -/** |
---|
283 | | - * pcie_do_nonfatal_recovery - handle nonfatal error recovery process |
---|
284 | | - * @dev: pointer to a pci_dev data structure of agent detecting an error |
---|
285 | | - * |
---|
286 | | - * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast |
---|
287 | | - * error detected message to all downstream drivers within a hierarchy in |
---|
288 | | - * question and return the returned code. |
---|
289 | | - */ |
---|
290 | | -void pcie_do_nonfatal_recovery(struct pci_dev *dev) |
---|
291 | | -{ |
---|
292 | | - pci_ers_result_t status; |
---|
293 | | - enum pci_channel_state state; |
---|
294 | | - |
---|
295 | | - state = pci_channel_io_normal; |
---|
| 176 | + int type = pci_pcie_type(dev); |
---|
| 177 | + struct pci_dev *bridge; |
---|
| 178 | + pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; |
---|
296 | 179 | |
---|
297 | 180 | /* |
---|
298 | | - * Error recovery runs on all subordinates of the first downstream port. |
---|
299 | | - * If the downstream port detected the error, it is cleared at the end. |
---|
| 181 | + * If the error was detected by a Root Port, Downstream Port, or |
---|
| 182 | + * RCEC, recovery runs on the device itself. For Ports, that also |
---|
| 183 | + * includes any subordinate devices. |
---|
| 184 | + * |
---|
| 185 | + * If it was detected by another device (Endpoint, etc), recovery |
---|
| 186 | + * runs on the device and anything else under the same Port, i.e., |
---|
| 187 | + * everything under "bridge". |
---|
300 | 188 | */ |
---|
301 | | - if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || |
---|
302 | | - pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)) |
---|
303 | | - dev = dev->bus->self; |
---|
| 189 | + if (type == PCI_EXP_TYPE_ROOT_PORT || |
---|
| 190 | + type == PCI_EXP_TYPE_DOWNSTREAM || |
---|
| 191 | + type == PCI_EXP_TYPE_RC_EC) |
---|
| 192 | + bridge = dev; |
---|
| 193 | + else |
---|
| 194 | + bridge = pci_upstream_bridge(dev); |
---|
304 | 195 | |
---|
305 | | - status = broadcast_error_message(dev, |
---|
306 | | - state, |
---|
307 | | - "error_detected", |
---|
308 | | - report_error_detected); |
---|
| 196 | + pci_dbg(bridge, "broadcast error_detected message\n"); |
---|
| 197 | + if (state == pci_channel_io_frozen) { |
---|
| 198 | + pci_walk_bridge(bridge, report_frozen_detected, &status); |
---|
| 199 | + if (reset_subordinates(bridge) != PCI_ERS_RESULT_RECOVERED) { |
---|
| 200 | + pci_warn(bridge, "subordinate device reset failed\n"); |
---|
| 201 | + goto failed; |
---|
| 202 | + } |
---|
| 203 | + } else { |
---|
| 204 | + pci_walk_bridge(bridge, report_normal_detected, &status); |
---|
| 205 | + } |
---|
309 | 206 | |
---|
310 | | - if (status == PCI_ERS_RESULT_CAN_RECOVER) |
---|
311 | | - status = broadcast_error_message(dev, |
---|
312 | | - state, |
---|
313 | | - "mmio_enabled", |
---|
314 | | - report_mmio_enabled); |
---|
| 207 | + if (status == PCI_ERS_RESULT_CAN_RECOVER) { |
---|
| 208 | + status = PCI_ERS_RESULT_RECOVERED; |
---|
| 209 | + pci_dbg(bridge, "broadcast mmio_enabled message\n"); |
---|
| 210 | + pci_walk_bridge(bridge, report_mmio_enabled, &status); |
---|
| 211 | + } |
---|
315 | 212 | |
---|
316 | 213 | if (status == PCI_ERS_RESULT_NEED_RESET) { |
---|
317 | 214 | /* |
---|
.. | .. |
---|
319 | 216 | * functions to reset slot before calling |
---|
320 | 217 | * drivers' slot_reset callbacks? |
---|
321 | 218 | */ |
---|
322 | | - status = broadcast_error_message(dev, |
---|
323 | | - state, |
---|
324 | | - "slot_reset", |
---|
325 | | - report_slot_reset); |
---|
| 219 | + status = PCI_ERS_RESULT_RECOVERED; |
---|
| 220 | + pci_dbg(bridge, "broadcast slot_reset message\n"); |
---|
| 221 | + pci_walk_bridge(bridge, report_slot_reset, &status); |
---|
326 | 222 | } |
---|
327 | 223 | |
---|
328 | 224 | if (status != PCI_ERS_RESULT_RECOVERED) |
---|
329 | 225 | goto failed; |
---|
330 | 226 | |
---|
331 | | - broadcast_error_message(dev, |
---|
332 | | - state, |
---|
333 | | - "resume", |
---|
334 | | - report_resume); |
---|
| 227 | + pci_dbg(bridge, "broadcast resume message\n"); |
---|
| 228 | + pci_walk_bridge(bridge, report_resume, &status); |
---|
335 | 229 | |
---|
336 | | - pci_aer_clear_device_status(dev); |
---|
337 | | - pci_cleanup_aer_uncorrect_error_status(dev); |
---|
338 | | - pci_info(dev, "AER: Device recovery successful\n"); |
---|
339 | | - return; |
---|
| 230 | + if (pcie_aer_is_native(bridge)) |
---|
| 231 | + pcie_clear_device_status(bridge); |
---|
| 232 | + pci_aer_clear_nonfatal_status(bridge); |
---|
| 233 | + pci_info(bridge, "device recovery successful\n"); |
---|
| 234 | + return status; |
---|
340 | 235 | |
---|
341 | 236 | failed: |
---|
342 | | - pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); |
---|
| 237 | + pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT); |
---|
343 | 238 | |
---|
344 | 239 | /* TODO: Should kernel panic here? */ |
---|
345 | | - pci_info(dev, "AER: Device recovery failed\n"); |
---|
| 240 | + pci_info(bridge, "device recovery failed\n"); |
---|
| 241 | + |
---|
| 242 | + return status; |
---|
346 | 243 | } |
---|