Release 4.14 arch/powerpc/kernel/eeh.c
/*
* Copyright IBM Corporation 2001, 2005, 2006
* Copyright Dave Engebretsen & Todd Inglett 2001
* Copyright Linas Vepstas 2005, 2006
* Copyright 2001-2012 IBM Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
*/
#include <linux/delay.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/pci.h>
#include <linux/iommu.h>
#include <linux/proc_fs.h>
#include <linux/rbtree.h>
#include <linux/reboot.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
#include <linux/export.h>
#include <linux/of.h>
#include <linux/atomic.h>
#include <asm/debugfs.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/io.h>
#include <asm/iommu.h>
#include <asm/machdep.h>
#include <asm/ppc-pci.h>
#include <asm/rtas.h>
#include <asm/pte-walk.h>
/** Overview:
* EEH, or "Enhanced Error Handling" is a PCI bridge technology for
* dealing with PCI bus errors that can't be dealt with within the
* usual PCI framework, except by check-stopping the CPU. Systems
* that are designed for high-availability/reliability cannot afford
* to crash due to a "mere" PCI error, thus the need for EEH.
* An EEH-capable bridge operates by converting a detected error
* into a "slot freeze", taking the PCI adapter off-line, making
* the slot behave, from the OS'es point of view, as if the slot
* were "empty": all reads return 0xff's and all writes are silently
* ignored. EEH slot isolation events can be triggered by parity
* errors on the address or data busses (e.g. during posted writes),
* which in turn might be caused by low voltage on the bus, dust,
* vibration, humidity, radioactivity or plain-old failed hardware.
*
* Note, however, that one of the leading causes of EEH slot
* freeze events are buggy device drivers, buggy device microcode,
* or buggy device hardware. This is because any attempt by the
* device to bus-master data to a memory address that is not
* assigned to the device will trigger a slot freeze. (The idea
* is to prevent devices-gone-wild from corrupting system memory).
* Buggy hardware/drivers will have a miserable time co-existing
* with EEH.
*
* Ideally, a PCI device driver, when suspecting that an isolation
* event has occurred (e.g. by reading 0xff's), will then ask EEH
* whether this is the case, and then take appropriate steps to
* reset the PCI slot, the PCI device, and then resume operations.
* However, until that day, the checking is done here, with the
* eeh_check_failure() routine embedded in the MMIO macros. If
* the slot is found to be isolated, an "EEH Event" is synthesized
* and sent out for processing.
*/
/* If a device driver keeps reading an MMIO register in an interrupt
* handler after a slot isolation event, it might be broken.
* This sets the threshold for how many read attempts we allow
* before printing an error message.
*/
#define EEH_MAX_FAILS 2100000
/* Time to wait for a PCI slot to report status, in milliseconds */
#define PCI_BUS_RESET_WAIT_MSEC (5*60*1000)
/*
* EEH probe mode support, which is part of the flags,
* is to support multiple platforms for EEH. Some platforms
* like pSeries do PCI emunation based on device tree.
* However, other platforms like powernv probe PCI devices
* from hardware. The flag is used to distinguish that.
* In addition, struct eeh_ops::probe would be invoked for
* particular OF node or PCI device so that the corresponding
* PE would be created there.
*/
int eeh_subsystem_flags;
EXPORT_SYMBOL(eeh_subsystem_flags);
/*
* EEH allowed maximal frozen times. If one particular PE's
* frozen count in last hour exceeds this limit, the PE will
* be forced to be offline permanently.
*/
int eeh_max_freezes = 5;
/* Platform dependent EEH operations */
struct eeh_ops *eeh_ops = NULL;
/* Lock to avoid races due to multiple reports of an error */
DEFINE_RAW_SPINLOCK(confirm_error_lock);
EXPORT_SYMBOL_GPL(confirm_error_lock);
/* Lock to protect passed flags */
static DEFINE_MUTEX(eeh_dev_mutex);
/* Buffer for reporting pci register dumps. Its here in BSS, and
* not dynamically alloced, so that it ends up in RMO where RTAS
* can access it.
*/
#define EEH_PCI_REGS_LOG_LEN 8192
static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
/*
* The struct is used to maintain the EEH global statistic
* information. Besides, the EEH global statistics will be
* exported to user space through procfs
*/
struct eeh_stats {
u64 no_device; /* PCI device not found */
u64 no_dn; /* OF node not found */
u64 no_cfg_addr; /* Config address not found */
u64 ignored_check; /* EEH check skipped */
u64 total_mmio_ffs; /* Total EEH checks */
u64 false_positives; /* Unnecessary EEH checks */
u64 slot_resets; /* PE reset */
};
static struct eeh_stats eeh_stats;
static int __init eeh_setup(char *str)
{
if (!strcmp(str, "off"))
eeh_add_flag(EEH_FORCE_DISABLED);
else if (!strcmp(str, "early_log"))
eeh_add_flag(EEH_EARLY_DUMP_LOG);
return 1;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Gavin Shan | 45 | 100.00% | 3 | 100.00% |
Total | 45 | 100.00% | 3 | 100.00% |
__setup("eeh=", eeh_setup);
/*
* This routine captures assorted PCI configuration space data
* for the indicated PCI device, and puts them into a buffer
* for RTAS error logging.
*/
static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
{
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
u32 cfg;
int cap, i;
int n = 0, l = 0;
char buffer[128];
n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
pdn->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
pdn->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
pr_warn("EEH: PCI device/vendor: %08x\n", cfg);
eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg);
n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
pr_warn("EEH: PCI cmd/status register: %08x\n", cfg);
/* Gather bridge-specific registers */
if (edev->mode & EEH_DEV_BRIDGE) {
eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg);
n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
pr_warn("EEH: Bridge secondary status: %04x\n", cfg);
eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg);
n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
pr_warn("EEH: Bridge control: %04x\n", cfg);
}
/* Dump out the PCI-X command and status regs */
cap = edev->pcix_cap;
if (cap) {
eeh_ops->read_config(pdn, cap, 4, &cfg);
n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
pr_warn("EEH: PCI-X cmd: %08x\n", cfg);
eeh_ops->read_config(pdn, cap+4, 4, &cfg);
n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
pr_warn("EEH: PCI-X status: %08x\n", cfg);
}
/* If PCI-E capable, dump PCI-E cap 10 */
cap = edev->pcie_cap;
if (cap) {
n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
pr_warn("EEH: PCI-E capabilities and status follow:\n");
for (i=0; i<=8; i++) {
eeh_ops->read_config(pdn, cap+4*i, 4, &cfg);
n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
if ((i % 4) == 0) {
if (i != 0)
pr_warn("%s\n", buffer);
l = scnprintf(buffer, sizeof(buffer),
"EEH: PCI-E %02x: %08x ",
4*i, cfg);
} else {
l += scnprintf(buffer+l, sizeof(buffer)-l,
"%08x ", cfg);
}
}
pr_warn("%s\n", buffer);
}
/* If AER capable, dump it */
cap = edev->aer_cap;
if (cap) {
n += scnprintf(buf+n, len-n, "pci-e AER:\n");
pr_warn("EEH: PCI-E AER capability register set follows:\n");
for (i=0; i<=13; i++) {
eeh_ops->read_config(pdn, cap+4*i, 4, &cfg);
n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
if ((i % 4) == 0) {
if (i != 0)
pr_warn("%s\n", buffer);
l = scnprintf(buffer, sizeof(buffer),
"EEH: PCI-E AER %02x: %08x ",
4*i, cfg);
} else {
l += scnprintf(buffer+l, sizeof(buffer)-l,
"%08x ", cfg);
}
}
pr_warn("%s\n", buffer);
}
return n;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Linas Vepstas | 421 | 60.14% | 3 | 23.08% |
Gavin Shan | 273 | 39.00% | 7 | 53.85% |
Alexey Kardashevskiy | 2 | 0.29% | 1 | 7.69% |
Guilherme G. Piccoli | 2 | 0.29% | 1 | 7.69% |
Yijing Wang | 2 | 0.29% | 1 | 7.69% |
Total | 700 | 100.00% | 13 | 100.00% |
static void *eeh_dump_pe_log(void *data, void *flag)
{
struct eeh_pe *pe = data;
struct eeh_dev *edev, *tmp;
size_t *plen = flag;
eeh_pe_for_each_dev(pe, edev, tmp)
*plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen,
EEH_PCI_REGS_LOG_LEN - *plen);
return NULL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Gavin Shan | 65 | 100.00% | 1 | 100.00% |
Total | 65 | 100.00% | 1 | 100.00% |
/**
* eeh_slot_error_detail - Generate combined log including driver log and error log
* @pe: EEH PE
* @severity: temporary or permanent error log
*
* This routine should be called to generate the combined log, which
* is comprised of driver log and error log. The driver log is figured
* out from the config space of the corresponding PCI device, while
* the error log is fetched through platform dependent function call.
*/
void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
{
size_t loglen = 0;
/*
* When the PHB is fenced or dead, it's pointless to collect
* the data from PCI config space because it should return
* 0xFF's. For ER, we still retrieve the data from the PCI
* config space.
*
* For pHyp, we have to enable IO for log retrieval. Otherwise,
* 0xFF's is always returned from PCI config space.
*
* When the @severity is EEH_LOG_PERM, the PE is going to be
* removed. Prior to that, the drivers for devices included in
* the PE will be closed. The drivers rely on working IO path
* to bring the devices to quiet state. Otherwise, PCI traffic
* from those devices after they are removed is like to cause
* another unexpected EEH error.
*/
if (!(pe->type & EEH_PE_PHB)) {
if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG) ||
severity == EEH_LOG_PERM)
eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
/*
* The config space of some PCI devices can't be accessed
* when their PEs are in frozen state. Otherwise, fenced
* PHB might be seen. Those PEs are identified with flag
* EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED
* is set automatically when the PE is put to EEH_PE_ISOLATED.
*
* Restoring BARs possibly triggers PCI config access in
* (OPAL) firmware and then causes fenced PHB. If the
* PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's
* pointless to restore BARs and dump config space.
*/
eeh_ops->configure_bridge(pe);
if (!(pe->state & EEH_PE_CFG_BLOCKED)) {
eeh_pe_restore_bars(pe);
pci_regs_buf[0] = 0;
eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen);
}
}
eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Gavin Shan | 65 | 61.32% | 13 | 86.67% |
Linas Vepstas | 35 | 33.02% | 1 | 6.67% |
Richard A. Lary | 6 | 5.66% | 1 | 6.67% |
Total | 106 | 100.00% | 15 | 100.00% |
/**
* eeh_token_to_phys - Convert EEH address token to phys address
* @token: I/O token, should be address in the form 0xA....
*
* This routine should be called to convert virtual I/O address
* to physical one.
*/
static inline unsigned long eeh_token_to_phys(unsigned long token)
{
pte_t *ptep;
unsigned long pa;
int hugepage_shift;
/*
* We won't find hugepages here(this is iomem). Hence we are not
* worried about _PAGE_SPLITTING/collapse. Also we will not hit
* page table free, because of init_mm.
*/
ptep = find_init_mm_pte(token, &hugepage_shift);
if (!ptep)
return token;
WARN_ON(hugepage_shift);
pa = pte_pfn(*ptep) << PAGE_SHIFT;
return pa | (token & (PAGE_SIZE-1));
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Anton Blanchard | 33 | 47.14% | 2 | 25.00% |
Aneesh Kumar K.V | 13 | 18.57% | 3 | 37.50% |
Andrew Morton | 13 | 18.57% | 1 | 12.50% |
Roland Dreier | 10 | 14.29% | 1 | 12.50% |
Paul Mackerras | 1 | 1.43% | 1 | 12.50% |
Total | 70 | 100.00% | 8 | 100.00% |
/*
* On PowerNV platform, we might already have fenced PHB there.
* For that case, it's meaningless to recover frozen PE. Intead,
* We have to handle fenced PHB firstly.
*/
static int eeh_phb_check_failure(struct eeh_pe *pe)
{
struct eeh_pe *phb_pe;
unsigned long flags;
int ret;
if (!eeh_has_flag(EEH_PROBE_MODE_DEV))
return -EPERM;
/* Find the PHB PE */
phb_pe = eeh_phb_pe_get(pe->phb);
if (!phb_pe) {
pr_warn("%s Can't find PE for PHB#%x\n",
__func__, pe->phb->global_number);
return -EEXIST;
}
/* If the PHB has been in problematic state */
eeh_serialize_lock(&flags);
if (phb_pe->state & EEH_PE_ISOLATED) {
ret = 0;
goto out;
}
/* Check PHB state */
ret = eeh_ops->get_state(phb_pe, NULL);
if ((ret < 0) ||
(ret == EEH_STATE_NOT_SUPPORT) ||
(ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
(EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
ret = 0;
goto out;
}
/* Isolate the PHB and send event */
eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
eeh_serialize_unlock(flags);
pr_err("EEH: PHB#%x failure detected, location: %s\n",
phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
dump_stack();
eeh_send_failure_event(phb_pe);
return 1;
out:
eeh_serialize_unlock(flags);
return ret;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Gavin Shan | 193 | 99.48% | 6 | 85.71% |
Russell Currey | 1 | 0.52% | 1 | 14.29% |
Total | 194 | 100.00% | 7 | 100.00% |
/**
* eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
* @edev: eeh device
*
* Check for an EEH failure for the given device node. Call this
* routine if the result of a read was all 0xff's and you want to
* find out if this is due to an EEH slot freeze. This routine
* will query firmware for the EEH status.
*
* Returns 0 if there has not been an EEH error; otherwise returns
* a non-zero value and queues up a slot isolation event notification.
*
* It is safe to call this routine in an interrupt context.
*/
int eeh_dev_check_failure(struct eeh_dev *edev)
{
int ret;
int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
unsigned long flags;
struct device_node *dn;
struct pci_dev *dev;
struct eeh_pe *pe, *parent_pe, *phb_pe;
int rc = 0;
const char *location = NULL;
eeh_stats.total_mmio_ffs++;
if (!eeh_enabled())
return 0;
if (!edev) {
eeh_stats.no_dn++;
return 0;
}
dev = eeh_dev_to_pci_dev(edev);
pe = eeh_dev_to_pe(edev);
/* Access to IO BARs might get this far and still not want checking. */
if (!pe) {
eeh_stats.ignored_check++;
pr_debug("EEH: Ignored check for %s\n",
eeh_pci_name(dev));
return 0;
}
if (!pe->addr && !pe->config_addr) {
eeh_stats.no_cfg_addr++;
return 0;
}
/*
* On PowerNV platform, we might already have fenced PHB
* there and we need take care of that firstly.
*/
ret = eeh_phb_check_failure(pe);
if (ret > 0)
return ret;
/*
* If the PE isn't owned by us, we shouldn't check the
* state. Instead, let the owner handle it if the PE has
* been frozen.
*/
if (eeh_pe_passed(pe))
return 0;
/* If we already have a pending isolation event for this
* slot, we know it's bad already, we don't need to check.
* Do this checking under a lock; as multiple PCI devices
* in one slot might report errors simultaneously, and we
* only want one error recovery routine running.
*/
eeh_serialize_lock(&flags);
rc = 1;
if (pe->state & EEH_PE_ISOLATED) {
pe->check_count++;
if (pe->check_count % EEH_MAX_FAILS == 0) {
dn = pci_device_to_OF_node(dev);
if (dn)
location = of_get_property(dn, "ibm,loc-code",
NULL);
printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
"location=%s driver=%s pci addr=%s\n",
pe->check_count,
location ? location : "unknown",
eeh_driver_name(dev), eeh_pci_name(dev));
printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
eeh_driver_name(dev));
dump_stack();
}
goto dn_unlock;
}
/*
* Now test for an EEH failure. This is VERY expensive.
* Note that the eeh_config_addr may be a parent device
* in the case of a device behind a bridge, or it may be
* function zero of a multi-function device.
* In any case they must share a common PHB.
*/
ret = eeh_ops->get_state(pe, NULL);
/* Note that config-io to empty slots may fail;
* they are empty when they don't have children.
* We will punt with the following conditions: Failure to get
* PE's state, EEH not support and Permanently unavailable
* state, PE is in good state.
*/
if ((ret < 0) ||
(ret == EEH_STATE_NOT_SUPPORT) ||
((ret & active_flags) == active_flags)) {
eeh_stats.false_positives++;
pe->false_positives++;
rc = 0;
goto dn_unlock;
}
/*
* It should be corner case that the parent PE has been
* put into frozen state as well. We should take care
* that at first.
*/
parent_pe = pe->parent;
while (parent_pe) {
/* Hit the ceiling ? */
if (parent_pe->type & EEH_PE_PHB)
break;
/* Frozen parent PE ? */
ret = eeh_ops->get_state(parent_pe, NULL);
if (ret > 0 &&
(ret & active_flags) != active_flags)
pe = parent_pe;
/* Next parent level */
parent_pe = parent_pe->parent;
}
eeh_stats.slot_resets++;
/* Avoid repeated reports of this failure, including problems
* with other functions on this device, and functions under
* bridges.
*/
eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
eeh_serialize_unlock(flags);
/* Most EEH events are due to device driver bugs. Having
* a stack trace will help the device-driver authors figure
* out what happened. So print that out.
*/
phb_pe = eeh_phb_pe_get(pe->phb);
pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
pe->phb->global_number, pe->addr);
pr_err("EEH: PE location: %s, PHB location: %s\n",
eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
dump_stack();
eeh_send_failure_event(pe);
return 1;
dn_unlock:
eeh_serialize_unlock(flags);
return rc;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Gavin Shan | 243 | 50.94% | 15 | 36.59% |
Linas Vepstas | 101 | 21.17% | 10 | 24.39% |
Paul Mackerras | 36 | 7.55% | 6 | 14.63% |
Mike Mason | 33 | 6.92% | 1 | 2.44% |
Anton Blanchard | 31 | 6.50% | 3 | 7.32% |
Andrew Morton | 14 | 2.94% | 1 | 2.44% |
Alexey Kardashevskiy | 7 | 1.47% | 1 | 2.44% |
Thadeu Lima de Souza Cascardo | 6 | 1.26% | 1 | 2.44% |
Wei Yang | 3 | 0.63% | 1 | 2.44% |
Breno Leitão | 2 | 0.42% | 1 | 2.44% |
Benjamin Herrenschmidt | 1 | 0.21% | 1 | 2.44% |
Total | 477 | 100.00% | 41 | 100.00% |
EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
/**
* eeh_check_failure - Check if all 1's data is due to EEH slot freeze
* @token: I/O address
*
* Check for an EEH failure at the given I/O address. Call this
* routine if the result of a read was all 0xff's and you want to
* find out if this is due to an EEH slot freeze event. This routine
* will query firmware for the EEH status.
*
* Note this routine is safe to call in an interrupt context.
*/
int eeh_check_failure(const volatile void __iomem *token)
{
unsigned long addr;
struct eeh_dev *edev;
/* Finding the phys addr + pci device; this is pretty quick. */
addr = eeh_token_to_phys((unsigned long __force) token);
edev = eeh_addr_cache_get_dev(addr);
if (!edev) {
eeh_stats.no_device++;
return 0;
}
return eeh_dev_check_failure(edev);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Paul Mackerras | 39 | 62.90% | 1 | 11.11% |
Gavin Shan | 12 | 19.35% | 4 | 44.44% |
Linas Vepstas | 5 | 8.06% | 1 | 11.11% |
Linus Torvalds | 3 | 4.84% | 1 | 11.11% |
Anton Blanchard | 2 | 3.23% | 1 | 11.11% |
Roland Dreier | 1 | 1.61% | 1 | 11.11% |
Total | 62 | 100.00% | 9 | 100.00% |
EXPORT_SYMBOL(eeh_check_failure);
/**
* eeh_pci_enable - Enable MMIO or DMA transfers for this slot
* @pe: EEH PE
*
* This routine should be called to reenable frozen MMIO or DMA
* so that it would work correctly again. It's useful while doing
* recovery or log collection on the indicated device.
*/
int eeh_pci_enable(struct eeh_pe *pe, int function)
{
int active_flag, rc;
/*
* pHyp doesn't allow to enable IO or DMA on unfrozen PE.
* Also, it's pointless to enable them on unfrozen PE. So
* we have to check before enabling IO or DMA.
*/
switch (function) {
case EEH_OPT_THAW_MMIO:
active_flag = EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED;
break;
case EEH_OPT_THAW_DMA:
active_flag = EEH_STATE_DMA_ACTIVE;
break;
case EEH_OPT_DISABLE:
case EEH_OPT_ENABLE:
case EEH_OPT_FREEZE_PE:
active_flag = 0;
break;
default:
pr_warn("%s: Invalid function %d\n",
__func__, function);
return -EINVAL;
}
/*
* Check if IO or DMA has been enabled before
* enabling them.
*/
if (active_flag) {
rc = eeh_ops->get_state(pe, NULL);
if (rc < 0)
return rc;
/* Needn't enable it at all */
if (rc == EEH_STATE_NOT_SUPPORT)
return 0;
/* It's already enabled */
if (rc & active_flag)
return 0;
}
/* Issue the request */
rc = eeh_ops->set_option(pe, function);
if (rc)
pr_warn("%s: Unexpected state change %d on "
"PHB#%x-PE#%x, err=%d\n",
__func__, function, pe->phb->global_number,
pe->addr, rc);
/* Check if the request is finished successfully */
if (active_flag) {
rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
if (rc < 0)
return rc;
if (rc & active_flag)
return 0;
return -EIO;
}
return rc;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Gavin Shan | 150 | 75.38% | 8 | 66.67% |
Linas Vepstas | 47 | 23.62% | 2 | 16.67% |
Russell Currey | 1 | 0.50% | 1 | 8.33% |
Andrew Donnellan | 1 | 0.50% | 1 | 8.33% |
Total | 199 | 100.00% | 12 | 100.00% |
static void *eeh_disable_and_save_dev_state(void *data, void *userdata)
{
struct eeh_dev *edev = data;
struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
struct pci_dev *dev = userdata;
/*
* The caller should have disabled and saved the
* state for the specified device
*/
if (!pdev || pdev == dev)
return NULL;
/* Ensure we have D0 power state */
pci_set_power_state(pdev, PCI_D0);
/* Save device state */
pci_save_state(pdev);
/*
* Disable device to avoid any DMA traffic and
* interrupt from the device
*/
pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
return NULL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Gavin Shan | 79 | 100.00% | 1 | 100.00% |
Total | 79 | 100.00% | 1 | 100.00% |
static void *eeh_restore_dev_state(void *data, void *userdata)
{
struct eeh_dev *edev = data;
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
struct pci_dev *dev = userdata;
if (!pdev)
return NULL;
/* Apply customization from firmware */
if (pdn && eeh_ops->restore_config)
eeh_ops->restore_config(pdn);
/* The caller should restore state for the specified device */
if (pdev != dev)
pci_restore_state(pdev);
return NULL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Gavin Shan | 87 | 98.86% | 2 | 66.67% |
David Gibson | 1 | 1.14% | 1 | 33.33% |
Total | 88 | 100.00% | 3 | 100.00% |
/**
* pcibios_set_pcie_reset_state - Set PCI-E reset state
* @dev: pci device struct
* @state: reset state to enter
*
* Return value:
* 0 if success
*/
int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
{
struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
struct eeh_pe *pe = eeh_dev_to_pe(edev);
if (!pe) {
pr_err("%s: No PE found on PCI device %s\n",
__func__, pci_name(dev));
return -EINVAL;
}
switch (state) {
case pcie_deassert_reset:
eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
eeh_unfreeze_pe(pe, false);
if (!(pe->type & EEH_PE_VF))
eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev);
eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
break;
case pcie_hot_reset:
eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED);
eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
if (!(pe->type & EEH_PE_VF))
eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
eeh_ops->reset(pe, EEH_RESET_HOT);
break;
case pcie_warm_reset:
eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED);
eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
if (!(pe->type & EEH_PE_VF))
eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
break;
default:
eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED);
return -EINVAL;
};
return 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Gavin Shan | 151 | 61.13% | 7 | 70.00% |
Brian King | 60 | 24.29% | 1 | 10.00% |
Wei Yang | 36 | 14.57% | 2 | 20.00% |
Total | 247 | 100.00% | 10 | 100.00% |
/**
* eeh_set_pe_freset - Check the required reset for the indicated device
* @data: EEH device
* @flag: return value
*
* Each device might have its preferred reset type: fundamental or
* hot reset. The routine is used to collected the information for
* the indicated device and its children so that the bunch of the
* devices could be reset properly.
*/
static void *eeh_set_dev_freset(void *data, void *flag)
{
struct pci_dev *dev;
unsigned int *freset = (unsigned int *)flag;
struct eeh_dev *edev = (struct eeh_dev *)data;
dev = eeh_dev_to_pci_dev(edev);
if (dev)
*freset |= dev->needs_freset;
return NULL;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Gavin Shan | 65 | 100.00% | 2 | 100.00% |
Total | 65 | 100.00% | 2 | 100.00% |
/**
* eeh_pe_reset_full - Complete a full reset process on the indicated PE
* @pe: EEH PE
*
* This function executes a full reset procedure on a PE, including setting
* the appropriate flags, performing a fundamental or hot reset, and then
* deactivating the reset status. It is designed to be used within the EEH
* subsystem, as opposed to eeh_pe_reset which is exported to drivers and
* only performs a single operation at a time.
*
* This function will attempt to reset a PE three times before failing.
*/
int eeh_pe_reset_full(struct eeh_pe *pe)
{
int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
int type = EEH_RESET_HOT;
unsigned int freset = 0;
int i, state, ret;
/*
* Determine the type of reset to perform - hot or fundamental.
* Hot reset is the default operation, unless any device under the
* PE requires a fundamental reset.
*/
eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
if (freset)
type = EEH_RESET_FUNDAMENTAL;
/* Mark the PE as in reset state and block config space accesses */
eeh_pe_state_mark(pe, reset_state);
/* Make three attempts at resetting the bus */
for (i = 0; i < 3; i++) {
ret = eeh_pe_reset(pe, type);
if (ret)
break;
ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
if (ret)
break;
/* Wait until the PE is in a functioning state */
state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
if ((state & active_flags) == active_flags)
break;
if (state < 0) {
pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x",
__func__, pe->phb->global_number, pe->addr);
ret = -ENOTRECOVERABLE;
break;
}
/* Set error in case this is our last attempt */
ret = -EIO;
pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n",
__func__, state, pe->phb->global_number, pe->addr, (i + 1));
}
eeh_pe_state_clear(pe, reset_state);
return ret;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Russell Currey | 70 | 33.02% | 2 | 12.50% |
Gavin Shan | 70 | 33.02% | 7 | 43.75% |
Linas Vepstas | 56 | 26.42% | 5 | 31.25% |
Richard A. Lary | 12 | 5.66% | 1 | 6.25% |
Mike Mason | 4 | 1.89% | 1 | 6.25% |
Total | 212 | 100.00% | 16 | 100.00% |
/**
* eeh_save_bars - Save device bars
* @edev: PCI device associated EEH device
*
* Save the values of the device bars. Unlike the restore
* routine, this routine is *not* recursive. This is because
* PCI devices are added individually; but, for the restore,
* an entire slot is reset at a time.
*/
void eeh_save_bars(struct eeh_dev *edev)
{
struct pci_dn *pdn;
int i;
pdn = eeh_dev_to_pdn(edev);
if (!pdn)
return;
for (i = 0; i < 16; i++)
eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]);
/*
* For PCI bridges including root port, we need enable bus
* master explicitly. Otherwise, it can't fetch IODA table
* entries correctly. So we cache the bit in advance so that
* we can restore it after reset, either PHB range or PE range.
*/
if (edev->mode & EEH_DEV_BRIDGE)
edev->config_space[1] |= PCI_COMMAND_MASTER;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Gavin Shan | 47 | 56.63% | 4 | 66.67% |
Linas Vepstas | 36 | 43.37% | 2 | 33.33% |
Total | 83 | 100.00% | 6 | 100.00% |
/**
* eeh_ops_register - Register platform dependent EEH operations
* @ops: platform dependent EEH operations
*
* Register the platform dependent EEH operation callback
* functions. The platform should call this function before
* any other EEH operations.
*/
int __init eeh_ops_register(struct eeh_ops *ops)
{
if (!ops->name) {
pr_warn("%s: Invalid EEH ops name for %p\n",
__func__, ops);
return -EINVAL;
}
if (eeh_ops && eeh_ops != ops) {
pr_warn("%s: EEH ops of platform %s already existing (%s)\n",
__func__, eeh_ops->name, ops->name);
return -EEXIST;
}
eeh_ops = ops;
return 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Gavin Shan | 69 | 100.00% | 2 | 100.00% |
Total | 69 | 100.00% | 2 | 100.00% |
/**
* eeh_ops_unregister - Unreigster platform dependent EEH operations
* @name: name of EEH platform operations
*
* Unregister the platform dependent EEH operation callback
* functions.
*/
int __exit eeh_ops_unregister(const char