From 6778948f9de86c3cfaf36725a7c87dcff9ba247f Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 11 Dec 2023 08:20:59 +0000 Subject: [PATCH] kernel_5.10 no rt --- kernel/arch/powerpc/kernel/eeh.c | 885 +++++++++++++++++++++++++++++++--------------------------- 1 files changed, 468 insertions(+), 417 deletions(-) diff --git a/kernel/arch/powerpc/kernel/eeh.c b/kernel/arch/powerpc/kernel/eeh.c index 44bb522..20c417a 100644 --- a/kernel/arch/powerpc/kernel/eeh.c +++ b/kernel/arch/powerpc/kernel/eeh.c @@ -1,22 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright IBM Corporation 2001, 2005, 2006 * Copyright Dave Engebretsen & Todd Inglett 2001 * Copyright Linas Vepstas 2005, 2006 * Copyright 2001-2012 IBM Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> */ @@ -109,7 +96,14 @@ * frozen count in last hour exceeds this limit, the PE will * be forced to be offline permanently. */ -int eeh_max_freezes = 5; +u32 eeh_max_freezes = 5; + +/* + * Controls whether a recovery event should be scheduled when an + * isolated device is discovered. This is only really useful for + * debugging problems with the EEH core. + */ +bool eeh_debugfs_no_recover; /* Platform dependent EEH operations */ struct eeh_ops *eeh_ops = NULL; @@ -156,6 +150,16 @@ } __setup("eeh=", eeh_setup); +void eeh_show_enabled(void) +{ + if (eeh_has_flag(EEH_FORCE_DISABLED)) + pr_info("EEH: Recovery disabled by kernel parameter.\n"); + else if (eeh_has_flag(EEH_ENABLED)) + pr_info("EEH: Capable adapter found: recovery enabled.\n"); + else + pr_info("EEH: No capable adapters found: recovery disabled.\n"); +} + /* * This routine captures assorted PCI configuration space data * for the indicated PCI device, and puts them into a buffer @@ -163,39 +167,33 @@ */ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); u32 cfg; int cap, i; int n = 0, l = 0; char buffer[128]; - if (!pdn) { - pr_warn("EEH: Note: No error log for absent device.\n"); - return 0; - } - n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", - pdn->phb->global_number, pdn->busno, - PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); + edev->pe->phb->global_number, edev->bdfn >> 8, + PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn)); pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n", - pdn->phb->global_number, pdn->busno, - PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); + edev->pe->phb->global_number, edev->bdfn >> 8, + PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn)); - eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); + eeh_ops->read_config(edev, PCI_VENDOR_ID, 4, &cfg); n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); pr_warn("EEH: PCI device/vendor: %08x\n", cfg); - eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); + eeh_ops->read_config(edev, PCI_COMMAND, 4, &cfg); n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); /* Gather bridge-specific registers */ if (edev->mode & EEH_DEV_BRIDGE) { - eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); + eeh_ops->read_config(edev, PCI_SEC_STATUS, 2, &cfg); n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); pr_warn("EEH: Bridge secondary status: %04x\n", cfg); - eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); + eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &cfg); n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); pr_warn("EEH: Bridge control: %04x\n", cfg); } @@ -203,11 +201,11 @@ /* Dump out the PCI-X command and status regs */ cap = edev->pcix_cap; if (cap) { - eeh_ops->read_config(pdn, cap, 4, &cfg); + eeh_ops->read_config(edev, cap, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); pr_warn("EEH: PCI-X cmd: %08x\n", cfg); - eeh_ops->read_config(pdn, cap+4, 4, &cfg); + eeh_ops->read_config(edev, cap+4, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); pr_warn("EEH: PCI-X status: %08x\n", cfg); } @@ -219,7 +217,7 @@ pr_warn("EEH: PCI-E capabilities and status follow:\n"); for (i=0; i<=8; i++) { - eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); + eeh_ops->read_config(edev, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); if ((i % 4) == 0) { @@ -246,7 +244,7 @@ pr_warn("EEH: PCI-E AER capability register set follows:\n"); for (i=0; i<=13; i++) { - eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); + eeh_ops->read_config(edev, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); if ((i % 4) == 0) { @@ -410,14 +408,12 @@ } /* Isolate the PHB and send event */ - eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); + eeh_pe_mark_isolated(phb_pe); eeh_serialize_unlock(flags); - pr_err("EEH: PHB#%x failure detected, location: %s\n", + pr_debug("EEH: PHB#%x failure detected, location: %s\n", phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); - dump_stack(); eeh_send_failure_event(phb_pe); - return 1; out: eeh_serialize_unlock(flags); @@ -444,7 +440,7 @@ unsigned long flags; struct device_node *dn; struct pci_dev *dev; - struct eeh_pe *pe, *parent_pe, *phb_pe; + struct eeh_pe *pe, *parent_pe; int rc = 0; const char *location = NULL; @@ -463,13 +459,7 @@ /* Access to IO BARs might get this far and still not want checking. */ if (!pe) { eeh_stats.ignored_check++; - pr_debug("EEH: Ignored check for %s\n", - eeh_pci_name(dev)); - return 0; - } - - if (!pe->addr && !pe->config_addr) { - eeh_stats.no_cfg_addr++; + eeh_edev_dbg(edev, "Ignored check\n"); return 0; } @@ -504,12 +494,11 @@ if (dn) location = of_get_property(dn, "ibm,loc-code", NULL); - printk(KERN_ERR "EEH: %d reads ignored for recovering device at " - "location=%s driver=%s pci addr=%s\n", + eeh_edev_err(edev, "%d reads ignored for recovering device at location=%s driver=%s\n", pe->check_count, location ? location : "unknown", - eeh_driver_name(dev), eeh_pci_name(dev)); - printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", + eeh_driver_name(dev)); + eeh_edev_err(edev, "Might be infinite loop in %s driver\n", eeh_driver_name(dev)); dump_stack(); } @@ -569,20 +558,15 @@ * with other functions on this device, and functions under * bridges. */ - eeh_pe_state_mark(pe, EEH_PE_ISOLATED); + eeh_pe_mark_isolated(pe); eeh_serialize_unlock(flags); /* Most EEH events are due to device driver bugs. Having * a stack trace will help the device-driver authors figure * out what happened. So print that out. */ - phb_pe = eeh_phb_pe_get(pe->phb); - pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", - pe->phb->global_number, pe->addr); - pr_err("EEH: PE location: %s, PHB location: %s\n", - eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); - dump_stack(); - + pr_debug("EEH: %s: Frozen PHB#%x-PE#%x detected\n", + __func__, pe->phb->global_number, pe->addr); eeh_send_failure_event(pe); return 1; @@ -687,7 +671,7 @@ /* Check if the request is finished successfully */ if (active_flag) { - rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); + rc = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); if (rc < 0) return rc; @@ -700,7 +684,7 @@ return rc; } -static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev, +static void eeh_disable_and_save_dev_state(struct eeh_dev *edev, void *userdata) { struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); @@ -711,7 +695,7 @@ * state for the specified device */ if (!pdev || pdev == dev) - return NULL; + return; /* Ensure we have D0 power state */ pci_set_power_state(pdev, PCI_D0); @@ -724,87 +708,23 @@ * interrupt from the device */ pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); - - return NULL; } -static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) +static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); struct pci_dev *dev = userdata; if (!pdev) - return NULL; + return; /* Apply customization from firmware */ - if (pdn && eeh_ops->restore_config) - eeh_ops->restore_config(pdn); + if (eeh_ops->restore_config) + eeh_ops->restore_config(edev); /* The caller should restore state for the specified device */ if (pdev != dev) pci_restore_state(pdev); - - return NULL; -} - -int eeh_restore_vf_config(struct pci_dn *pdn) -{ - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - u32 devctl, cmd, cap2, aer_capctl; - int old_mps; - - if (edev->pcie_cap) { - /* Restore MPS */ - old_mps = (ffs(pdn->mps) - 8) << 5; - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, &devctl); - devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; - devctl |= old_mps; - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, devctl); - - /* Disable Completion Timeout if possible */ - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, - 4, &cap2); - if (cap2 & PCI_EXP_DEVCAP2_COMP_TMOUT_DIS) { - eeh_ops->read_config(pdn, - edev->pcie_cap + PCI_EXP_DEVCTL2, - 4, &cap2); - cap2 |= PCI_EXP_DEVCTL2_COMP_TMOUT_DIS; - eeh_ops->write_config(pdn, - edev->pcie_cap + PCI_EXP_DEVCTL2, - 4, cap2); - } - } - - /* Enable SERR and parity checking */ - eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); - cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); - eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); - - /* Enable report various errors */ - if (edev->pcie_cap) { - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, &devctl); - devctl &= ~PCI_EXP_DEVCTL_CERE; - devctl |= (PCI_EXP_DEVCTL_NFERE | - PCI_EXP_DEVCTL_FERE | - PCI_EXP_DEVCTL_URRE); - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, devctl); - } - - /* Enable ECRC generation and check */ - if (edev->pcie_cap && edev->aer_cap) { - eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, - 4, &aer_capctl); - aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); - eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, - 4, aer_capctl); - } - - return 0; } /** @@ -829,14 +749,15 @@ switch (state) { case pcie_deassert_reset: eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); - eeh_unfreeze_pe(pe, false); + eeh_unfreeze_pe(pe); if (!(pe->type & EEH_PE_VF)) - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); - eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); break; case pcie_hot_reset: - eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); + eeh_pe_mark_isolated(pe); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); if (!(pe->type & EEH_PE_VF)) @@ -844,7 +765,8 @@ eeh_ops->reset(pe, EEH_RESET_HOT); break; case pcie_warm_reset: - eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); + eeh_pe_mark_isolated(pe); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); if (!(pe->type & EEH_PE_VF)) @@ -852,7 +774,7 @@ eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); break; default: - eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED); + eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true); return -EINVAL; }; @@ -869,7 +791,7 @@ * the indicated device and its children so that the bunch of the * devices could be reset properly. */ -static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag) +static void eeh_set_dev_freset(struct eeh_dev *edev, void *flag) { struct pci_dev *dev; unsigned int *freset = (unsigned int *)flag; @@ -877,8 +799,24 @@ dev = eeh_dev_to_pci_dev(edev); if (dev) *freset |= dev->needs_freset; +} - return NULL; +static void eeh_pe_refreeze_passed(struct eeh_pe *root) +{ + struct eeh_pe *pe; + int state; + + eeh_for_each_pe(root, pe) { + if (eeh_pe_passed(pe)) { + state = eeh_ops->get_state(pe, NULL); + if (state & + (EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED)) { + pr_info("EEH: Passed-through PE PHB#%x-PE#%x was thawed by reset, re-freezing for safety.\n", + pe->phb->global_number, pe->addr); + eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE); + } + } + } } /** @@ -893,12 +831,12 @@ * * This function will attempt to reset a PE three times before failing. */ -int eeh_pe_reset_full(struct eeh_pe *pe) +int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed) { int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); int type = EEH_RESET_HOT; unsigned int freset = 0; - int i, state, ret; + int i, state = 0, ret; /* * Determine the type of reset to perform - hot or fundamental. @@ -915,33 +853,42 @@ /* Make three attempts at resetting the bus */ for (i = 0; i < 3; i++) { - ret = eeh_pe_reset(pe, type); - if (ret) - break; - - ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE); - if (ret) - break; + ret = eeh_pe_reset(pe, type, include_passed); + if (!ret) + ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, + include_passed); + if (ret) { + ret = -EIO; + pr_warn("EEH: Failure %d resetting PHB#%x-PE#%x (attempt %d)\n\n", + state, pe->phb->global_number, pe->addr, i + 1); + continue; + } + if (i) + pr_warn("EEH: PHB#%x-PE#%x: Successful reset (attempt %d)\n", + pe->phb->global_number, pe->addr, i + 1); /* Wait until the PE is in a functioning state */ - state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if (eeh_state_active(state)) - break; - + state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); if (state < 0) { - pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x", - __func__, pe->phb->global_number, pe->addr); + pr_warn("EEH: Unrecoverable slot failure on PHB#%x-PE#%x", + pe->phb->global_number, pe->addr); ret = -ENOTRECOVERABLE; break; } - - /* Set error in case this is our last attempt */ - ret = -EIO; - pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", - __func__, state, pe->phb->global_number, pe->addr, (i + 1)); + if (eeh_state_active(state)) + break; + else + pr_warn("EEH: PHB#%x-PE#%x: Slot inactive after reset: 0x%x (attempt %d)\n", + pe->phb->global_number, pe->addr, state, i + 1); } - eeh_pe_state_clear(pe, reset_state); + /* Resetting the PE may have unfrozen child PEs. If those PEs have been + * (potentially) passed through to a guest, re-freeze them: + */ + if (!include_passed) + eeh_pe_refreeze_passed(pe); + + eeh_pe_state_clear(pe, reset_state, true); return ret; } @@ -956,15 +903,13 @@ */ void eeh_save_bars(struct eeh_dev *edev) { - struct pci_dn *pdn; int i; - pdn = eeh_dev_to_pdn(edev); - if (!pdn) + if (!edev) return; for (i = 0; i < 16; i++) - eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); + eeh_ops->read_config(edev, i * 4, 4, &edev->config_space[i]); /* * For PCI bridges including root port, we need enable bus @@ -974,56 +919,6 @@ */ if (edev->mode & EEH_DEV_BRIDGE) edev->config_space[1] |= PCI_COMMAND_MASTER; -} - -/** - * eeh_ops_register - Register platform dependent EEH operations - * @ops: platform dependent EEH operations - * - * Register the platform dependent EEH operation callback - * functions. The platform should call this function before - * any other EEH operations. - */ -int __init eeh_ops_register(struct eeh_ops *ops) -{ - if (!ops->name) { - pr_warn("%s: Invalid EEH ops name for %p\n", - __func__, ops); - return -EINVAL; - } - - if (eeh_ops && eeh_ops != ops) { - pr_warn("%s: EEH ops of platform %s already existing (%s)\n", - __func__, eeh_ops->name, ops->name); - return -EEXIST; - } - - eeh_ops = ops; - - return 0; -} - -/** - * eeh_ops_unregister - Unreigster platform dependent EEH operations - * @name: name of EEH platform operations - * - * Unregister the platform dependent EEH operation callback - * functions. - */ -int __exit eeh_ops_unregister(const char *name) -{ - if (!name || !strlen(name)) { - pr_warn("%s: Invalid EEH ops name\n", - __func__); - return -EINVAL; - } - - if (eeh_ops && !strcmp(eeh_ops->name, name)) { - eeh_ops = NULL; - return 0; - } - - return -EEXIST; } static int eeh_reboot_notifier(struct notifier_block *nb, @@ -1037,162 +932,113 @@ .notifier_call = eeh_reboot_notifier, }; -void eeh_probe_devices(void) +static int eeh_device_notifier(struct notifier_block *nb, + unsigned long action, void *data) { - struct pci_controller *hose, *tmp; - struct pci_dn *pdn; + struct device *dev = data; - /* Enable EEH for all adapters */ - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { - pdn = hose->pci_data; - traverse_pci_dn(pdn, eeh_ops->probe, NULL); + switch (action) { + /* + * Note: It's not possible to perform EEH device addition (i.e. + * {pseries,pnv}_pcibios_bus_add_device()) here because it depends on + * the device's resources, which have not yet been set up. + */ + case BUS_NOTIFY_DEL_DEVICE: + eeh_remove_device(to_pci_dev(dev)); + break; + default: + break; } + return NOTIFY_DONE; } +static struct notifier_block eeh_device_nb = { + .notifier_call = eeh_device_notifier, +}; + /** - * eeh_init - EEH initialization + * eeh_init - System wide EEH initialization * - * Initialize EEH by trying to enable it for all of the adapters in the system. - * As a side effect we can determine here if eeh is supported at all. - * Note that we leave EEH on so failed config cycles won't cause a machine - * check. If a user turns off EEH for a particular adapter they are really - * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't - * grant access to a slot if EEH isn't enabled, and so we always enable - * EEH for all slots/all devices. - * - * The eeh-force-off option disables EEH checking globally, for all slots. - * Even if force-off is set, the EEH hardware is still enabled, so that - * newer systems can boot. + * It's the platform's job to call this from an arch_initcall(). */ -static int eeh_init(void) +int eeh_init(struct eeh_ops *ops) { struct pci_controller *hose, *tmp; int ret = 0; + /* the platform should only initialise EEH once */ + if (WARN_ON(eeh_ops)) + return -EEXIST; + if (WARN_ON(!ops)) + return -ENOENT; + eeh_ops = ops; + /* Register reboot notifier */ ret = register_reboot_notifier(&eeh_reboot_nb); if (ret) { - pr_warn("%s: Failed to register notifier (%d)\n", + pr_warn("%s: Failed to register reboot notifier (%d)\n", __func__, ret); return ret; } - /* call platform initialization function */ - if (!eeh_ops) { - pr_warn("%s: Platform EEH operation not found\n", - __func__); - return -EEXIST; - } else if ((ret = eeh_ops->init())) + ret = bus_register_notifier(&pci_bus_type, &eeh_device_nb); + if (ret) { + pr_warn("%s: Failed to register bus notifier (%d)\n", + __func__, ret); return ret; + } /* Initialize PHB PEs */ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) - eeh_dev_phb_init_dynamic(hose); + eeh_phb_pe_create(hose); + + eeh_addr_cache_init(); /* Initialize EEH event */ - ret = eeh_event_init(); - if (ret) - return ret; - - eeh_probe_devices(); - - if (eeh_enabled()) - pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); - else if (!eeh_has_flag(EEH_POSTPONED_PROBE)) - pr_info("EEH: No capable adapters found\n"); - - return ret; -} - -core_initcall_sync(eeh_init); - -/** - * eeh_add_device_early - Enable EEH for the indicated device node - * @pdn: PCI device node for which to set up EEH - * - * This routine must be used to perform EEH initialization for PCI - * devices that were added after system boot (e.g. hotplug, dlpar). - * This routine must be called before any i/o is performed to the - * adapter (inluding any config-space i/o). - * Whether this actually enables EEH or not for this device depends - * on the CEC architecture, type of the device, on earlier boot - * command-line arguments & etc. - */ -void eeh_add_device_early(struct pci_dn *pdn) -{ - struct pci_controller *phb = pdn ? pdn->phb : NULL; - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - - if (!edev) - return; - - if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) - return; - - /* USB Bus children of PCI devices will not have BUID's */ - if (NULL == phb || - (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) - return; - - eeh_ops->probe(pdn, NULL); + return eeh_event_init(); } /** - * eeh_add_device_tree_early - Enable EEH for the indicated device - * @pdn: PCI device node - * - * This routine must be used to perform EEH initialization for the - * indicated PCI device that was added after system boot (e.g. - * hotplug, dlpar). - */ -void eeh_add_device_tree_early(struct pci_dn *pdn) -{ - struct pci_dn *n; - - if (!pdn) - return; - - list_for_each_entry(n, &pdn->child_list, list) - eeh_add_device_tree_early(n); - eeh_add_device_early(pdn); -} -EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); - -/** - * eeh_add_device_late - Perform EEH initialization for the indicated pci device + * eeh_probe_device() - Perform EEH initialization for the indicated pci device * @dev: pci device for which to set up EEH * * This routine must be used to complete EEH initialization for PCI * devices that were added after system boot (e.g. hotplug, dlpar). */ -void eeh_add_device_late(struct pci_dev *dev) +void eeh_probe_device(struct pci_dev *dev) { - struct pci_dn *pdn; struct eeh_dev *edev; - - if (!dev || !eeh_enabled()) - return; pr_debug("EEH: Adding device %s\n", pci_name(dev)); - pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); - edev = pdn_to_eeh_dev(pdn); - if (edev->pdev == dev) { - pr_debug("EEH: Already referenced !\n"); + /* + * pci_dev_to_eeh_dev() can only work if eeh_probe_dev() was + * already called for this device. + */ + if (WARN_ON_ONCE(pci_dev_to_eeh_dev(dev))) { + pci_dbg(dev, "Already bound to an eeh_dev!\n"); + return; + } + + edev = eeh_ops->probe(dev); + if (!edev) { + pr_debug("EEH: Adding device failed\n"); return; } /* - * The EEH cache might not be removed correctly because of - * unbalanced kref to the device during unplug time, which - * relies on pcibios_release_device(). So we have to remove - * that here explicitly. + * FIXME: We rely on pcibios_release_device() to remove the + * existing EEH state. The release function is only called if + * the pci_dev's refcount drops to zero so if something is + * keeping a ref to a device (e.g. a filesystem) we need to + * remove the old EEH state. + * + * FIXME: HEY MA, LOOK AT ME, NO LOCKING! */ - if (edev->pdev) { - eeh_rmv_from_parent_pe(edev); + if (edev->pdev && edev->pdev != dev) { + eeh_pe_tree_remove(edev); eeh_addr_cache_rmv_dev(edev->pdev); eeh_sysfs_remove_device(edev->pdev); - edev->mode &= ~EEH_DEV_SYSFS; /* * We definitely should have the PCI device removed @@ -1200,65 +1046,14 @@ * into error handler afterwards. */ edev->mode |= EEH_DEV_NO_HANDLER; - - edev->pdev = NULL; - dev->dev.archdata.edev = NULL; } - if (eeh_has_flag(EEH_PROBE_MODE_DEV)) - eeh_ops->probe(pdn, NULL); - + /* bind the pdev and the edev together */ edev->pdev = dev; dev->dev.archdata.edev = edev; - eeh_addr_cache_insert_dev(dev); + eeh_sysfs_add_device(dev); } - -/** - * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus - * @bus: PCI bus - * - * This routine must be used to perform EEH initialization for PCI - * devices which are attached to the indicated PCI bus. The PCI bus - * is added after system boot through hotplug or dlpar. - */ -void eeh_add_device_tree_late(struct pci_bus *bus) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - eeh_add_device_late(dev); - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - struct pci_bus *subbus = dev->subordinate; - if (subbus) - eeh_add_device_tree_late(subbus); - } - } -} -EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); - -/** - * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus - * @bus: PCI bus - * - * This routine must be used to add EEH sysfs files for PCI - * devices which are attached to the indicated PCI bus. The PCI bus - * is added after system boot through hotplug or dlpar. - */ -void eeh_add_sysfs_files(struct pci_bus *bus) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - eeh_sysfs_add_device(dev); - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - struct pci_bus *subbus = dev->subordinate; - if (subbus) - eeh_add_sysfs_files(subbus); - } - } -} -EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); /** * eeh_remove_device - Undo EEH setup for the indicated pci device @@ -1279,10 +1074,10 @@ edev = pci_dev_to_eeh_dev(dev); /* Unregister the device with the EEH/PCI address search system */ - pr_debug("EEH: Removing device %s\n", pci_name(dev)); + dev_dbg(&dev->dev, "EEH: Removing device\n"); if (!edev || !edev->pdev || !edev->pe) { - pr_debug("EEH: Not referenced !\n"); + dev_dbg(&dev->dev, "EEH: Device not referenced!\n"); return; } @@ -1295,17 +1090,11 @@ edev->pdev = NULL; /* - * The flag "in_error" is used to trace EEH devices for VFs - * in error state or not. It's set in eeh_report_error(). If - * it's not set, eeh_report_{reset,resume}() won't be called - * for the VF EEH device. + * eeh_sysfs_remove_device() uses pci_dev_to_eeh_dev() so we need to + * remove the sysfs files before clearing dev.archdata.edev */ - edev->in_error = false; - dev->dev.archdata.edev = NULL; - if (!(edev->pe->state & EEH_PE_KEEP)) - eeh_rmv_from_parent_pe(edev); - else - edev->mode |= EEH_DEV_DISCONNECTED; + if (edev->mode & EEH_DEV_SYSFS) + eeh_sysfs_remove_device(dev); /* * We're removing from the PCI subsystem, that means @@ -1316,11 +1105,22 @@ edev->mode |= EEH_DEV_NO_HANDLER; eeh_addr_cache_rmv_dev(dev); - eeh_sysfs_remove_device(dev); - edev->mode &= ~EEH_DEV_SYSFS; + + /* + * The flag "in_error" is used to trace EEH devices for VFs + * in error state or not. It's set in eeh_report_error(). If + * it's not set, eeh_report_{reset,resume}() won't be called + * for the VF EEH device. + */ + edev->in_error = false; + dev->dev.archdata.edev = NULL; + if (!(edev->pe->state & EEH_PE_KEEP)) + eeh_pe_tree_remove(edev); + else + edev->mode |= EEH_DEV_DISCONNECTED; } -int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) +int eeh_unfreeze_pe(struct eeh_pe *pe) { int ret; @@ -1337,10 +1137,6 @@ __func__, ret, pe->phb->global_number, pe->addr); return ret; } - - /* Clear software isolated state */ - if (sw_state && (pe->state & EEH_PE_ISOLATED)) - eeh_pe_state_clear(pe, EEH_PE_ISOLATED); return ret; } @@ -1393,7 +1189,10 @@ } } - return eeh_unfreeze_pe(pe, true); + ret = eeh_unfreeze_pe(pe); + if (!ret) + eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); + return ret; } /** @@ -1483,7 +1282,7 @@ if (!dev) return 0; - if (dev->iommu_group) { + if (device_iommu_mapped(dev)) { *ppdev = pdev; return 1; } @@ -1623,13 +1422,12 @@ } EXPORT_SYMBOL_GPL(eeh_pe_get_state); -static int eeh_pe_reenable_devices(struct eeh_pe *pe) +static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed) { struct eeh_dev *edev, *tmp; struct pci_dev *pdev; int ret = 0; - /* Restore config space */ eeh_pe_restore_bars(pe); /* @@ -1650,7 +1448,14 @@ } /* The PE is still in frozen state */ - return eeh_unfreeze_pe(pe, true); + if (include_passed || !eeh_pe_passed(pe)) { + ret = eeh_unfreeze_pe(pe); + } else + pr_info("EEH: Note: Leaving passthrough PHB#%x-PE#%x frozen.\n", + pe->phb->global_number, pe->addr); + if (!ret) + eeh_pe_state_clear(pe, EEH_PE_ISOLATED, include_passed); + return ret; } @@ -1663,7 +1468,7 @@ * indicated type, either fundamental reset or hot reset. * PE reset is the most important part for error recovery. */ -int eeh_pe_reset(struct eeh_pe *pe, int option) +int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed) { int ret = 0; @@ -1677,11 +1482,11 @@ switch (option) { case EEH_RESET_DEACTIVATE: ret = eeh_ops->reset(pe, option); - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, include_passed); if (ret) break; - ret = eeh_pe_reenable_devices(pe); + ret = eeh_pe_reenable_devices(pe, include_passed); break; case EEH_RESET_HOT: case EEH_RESET_FUNDAMENTAL: @@ -1807,22 +1612,256 @@ return 0; } -static int eeh_freeze_dbgfs_set(void *data, u64 val) +DEFINE_DEBUGFS_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, + eeh_enable_dbgfs_set, "0x%llx\n"); + +static ssize_t eeh_force_recover_write(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) { - eeh_max_freezes = val; + struct pci_controller *hose; + uint32_t phbid, pe_no; + struct eeh_pe *pe; + char buf[20]; + int ret; + + ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); + if (!ret) + return -EFAULT; + + /* + * When PE is NULL the event is a "special" event. Rather than + * recovering a specific PE it forces the EEH core to scan for failed + * PHBs and recovers each. This needs to be done before any device + * recoveries can occur. + */ + if (!strncmp(buf, "hwcheck", 7)) { + __eeh_send_failure_event(NULL); + return count; + } + + ret = sscanf(buf, "%x:%x", &phbid, &pe_no); + if (ret != 2) + return -EINVAL; + + hose = pci_find_controller_for_domain(phbid); + if (!hose) + return -ENODEV; + + /* Retrieve PE */ + pe = eeh_pe_get(hose, pe_no); + if (!pe) + return -ENODEV; + + /* + * We don't do any state checking here since the detection + * process is async to the recovery process. The recovery + * thread *should* not break even if we schedule a recovery + * from an odd state (e.g. PE removed, or recovery of a + * non-isolated PE) + */ + __eeh_send_failure_event(pe); + + return ret < 0 ? ret : count; +} + +static const struct file_operations eeh_force_recover_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = eeh_force_recover_write, +}; + +static ssize_t eeh_debugfs_dev_usage(struct file *filp, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + static const char usage[] = "input format: <domain>:<bus>:<dev>.<fn>\n"; + + return simple_read_from_buffer(user_buf, count, ppos, + usage, sizeof(usage) - 1); +} + +static ssize_t eeh_dev_check_write(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + uint32_t domain, bus, dev, fn; + struct pci_dev *pdev; + struct eeh_dev *edev; + char buf[20]; + int ret; + + memset(buf, 0, sizeof(buf)); + ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count); + if (!ret) + return -EFAULT; + + ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn); + if (ret != 4) { + pr_err("%s: expected 4 args, got %d\n", __func__, ret); + return -EINVAL; + } + + pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn); + if (!pdev) + return -ENODEV; + + edev = pci_dev_to_eeh_dev(pdev); + if (!edev) { + pci_err(pdev, "No eeh_dev for this device!\n"); + pci_dev_put(pdev); + return -ENODEV; + } + + ret = eeh_dev_check_failure(edev); + pci_info(pdev, "eeh_dev_check_failure(%04x:%02x:%02x.%01x) = %d\n", + domain, bus, dev, fn, ret); + + pci_dev_put(pdev); + + return count; +} + +static const struct file_operations eeh_dev_check_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = eeh_dev_check_write, + .read = eeh_debugfs_dev_usage, +}; + +static int eeh_debugfs_break_device(struct pci_dev *pdev) +{ + struct resource *bar = NULL; + void __iomem *mapped; + u16 old, bit; + int i, pos; + + /* Do we have an MMIO BAR to disable? */ + for (i = 0; i <= PCI_STD_RESOURCE_END; i++) { + struct resource *r = &pdev->resource[i]; + + if (!r->flags || !r->start) + continue; + if (r->flags & IORESOURCE_IO) + continue; + if (r->flags & IORESOURCE_UNSET) + continue; + + bar = r; + break; + } + + if (!bar) { + pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n"); + return -ENXIO; + } + + pci_err(pdev, "Going to break: %pR\n", bar); + + if (pdev->is_virtfn) { +#ifndef CONFIG_PCI_IOV + return -ENXIO; +#else + /* + * VFs don't have a per-function COMMAND register, so the best + * we can do is clear the Memory Space Enable bit in the PF's + * SRIOV control reg. + * + * Unfortunately, this requires that we have a PF (i.e doesn't + * work for a passed-through VF) and it has the potential side + * effect of also causing an EEH on every other VF under the + * PF. Oh well. + */ + pdev = pdev->physfn; + if (!pdev) + return -ENXIO; /* passed through VFs have no PF */ + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + pos += PCI_SRIOV_CTRL; + bit = PCI_SRIOV_CTRL_MSE; +#endif /* !CONFIG_PCI_IOV */ + } else { + bit = PCI_COMMAND_MEMORY; + pos = PCI_COMMAND; + } + + /* + * Process here is: + * + * 1. Disable Memory space. + * + * 2. Perform an MMIO to the device. This should result in an error + * (CA / UR) being raised by the device which results in an EEH + * PE freeze. Using the in_8() accessor skips the eeh detection hook + * so the freeze hook so the EEH Detection machinery won't be + * triggered here. This is to match the usual behaviour of EEH + * where the HW will asyncronously freeze a PE and it's up to + * the kernel to notice and deal with it. + * + * 3. Turn Memory space back on. This is more important for VFs + * since recovery will probably fail if we don't. For normal + * the COMMAND register is reset as a part of re-initialising + * the device. + * + * Breaking stuff is the point so who cares if it's racy ;) + */ + pci_read_config_word(pdev, pos, &old); + + mapped = ioremap(bar->start, PAGE_SIZE); + if (!mapped) { + pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar); + return -ENXIO; + } + + pci_write_config_word(pdev, pos, old & ~bit); + in_8(mapped); + pci_write_config_word(pdev, pos, old); + + iounmap(mapped); + return 0; } -static int eeh_freeze_dbgfs_get(void *data, u64 *val) +static ssize_t eeh_dev_break_write(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) { - *val = eeh_max_freezes; - return 0; + uint32_t domain, bus, dev, fn; + struct pci_dev *pdev; + char buf[20]; + int ret; + + memset(buf, 0, sizeof(buf)); + ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count); + if (!ret) + return -EFAULT; + + ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn); + if (ret != 4) { + pr_err("%s: expected 4 args, got %d\n", __func__, ret); + return -EINVAL; + } + + pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn); + if (!pdev) + return -ENODEV; + + ret = eeh_debugfs_break_device(pdev); + pci_dev_put(pdev); + + if (ret < 0) + return ret; + + return count; } -DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, - eeh_enable_dbgfs_set, "0x%llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get, - eeh_freeze_dbgfs_set, "0x%llx\n"); +static const struct file_operations eeh_dev_break_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = eeh_dev_break_write, + .read = eeh_debugfs_dev_usage, +}; + #endif static int __init eeh_init_proc(void) @@ -1830,12 +1869,24 @@ if (machine_is(pseries) || machine_is(powernv)) { proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show); #ifdef CONFIG_DEBUG_FS - debugfs_create_file("eeh_enable", 0600, - powerpc_debugfs_root, NULL, - &eeh_enable_dbgfs_ops); - debugfs_create_file("eeh_max_freezes", 0600, - powerpc_debugfs_root, NULL, - &eeh_freeze_dbgfs_ops); + debugfs_create_file_unsafe("eeh_enable", 0600, + powerpc_debugfs_root, NULL, + &eeh_enable_dbgfs_ops); + debugfs_create_u32("eeh_max_freezes", 0600, + powerpc_debugfs_root, &eeh_max_freezes); + debugfs_create_bool("eeh_disable_recovery", 0600, + powerpc_debugfs_root, + &eeh_debugfs_no_recover); + debugfs_create_file_unsafe("eeh_dev_check", 0600, + powerpc_debugfs_root, NULL, + &eeh_dev_check_fops); + debugfs_create_file_unsafe("eeh_dev_break", 0600, + powerpc_debugfs_root, NULL, + &eeh_dev_break_fops); + debugfs_create_file_unsafe("eeh_force_recover", 0600, + powerpc_debugfs_root, NULL, + &eeh_force_recover_fops); + eeh_cache_debugfs_init(); #endif } -- Gitblit v1.6.2