summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/kernel/eeh.c27
-rw-r--r--arch/powerpc/platforms/powernv/eeh-ioda.c30
2 files changed, 52 insertions, 5 deletions
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 7051ea3101b9..c25064b7d667 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -358,10 +358,11 @@ out:
int eeh_dev_check_failure(struct eeh_dev *edev)
{
int ret;
+ int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
unsigned long flags;
struct device_node *dn;
struct pci_dev *dev;
- struct eeh_pe *pe;
+ struct eeh_pe *pe, *parent_pe;
int rc = 0;
const char *location;
@@ -439,14 +440,34 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
*/
if ((ret < 0) ||
(ret == EEH_STATE_NOT_SUPPORT) ||
- (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
- (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
+ ((ret & active_flags) == active_flags)) {
eeh_stats.false_positives++;
pe->false_positives++;
rc = 0;
goto dn_unlock;
}
+ /*
+ * It should be corner case that the parent PE has been
+ * put into frozen state as well. We should take care
+ * that at first.
+ */
+ parent_pe = pe->parent;
+ while (parent_pe) {
+ /* Hit the ceiling ? */
+ if (parent_pe->type & EEH_PE_PHB)
+ break;
+
+ /* Frozen parent PE ? */
+ ret = eeh_ops->get_state(parent_pe, NULL);
+ if (ret > 0 &&
+ (ret & active_flags) != active_flags)
+ pe = parent_pe;
+
+ /* Next parent level */
+ parent_pe = parent_pe->parent;
+ }
+
eeh_stats.slot_resets++;
/* Avoid repeated reports of this failure, including problems
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index e0d6a3a213e2..68167cd9ea97 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -705,11 +705,12 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
{
struct pci_controller *hose;
struct pnv_phb *phb;
- struct eeh_pe *phb_pe;
+ struct eeh_pe *phb_pe, *parent_pe;
__be64 frozen_pe_no;
__be16 err_type, severity;
+ int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
long rc;
- int ret = EEH_NEXT_ERR_NONE;
+ int state, ret = EEH_NEXT_ERR_NONE;
/*
* While running here, it's safe to purge the event queue.
@@ -839,6 +840,31 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
}
/*
+ * We probably have the frozen parent PE out there and
+ * we need have to handle frozen parent PE firstly.
+ */
+ if (ret == EEH_NEXT_ERR_FROZEN_PE) {
+ parent_pe = (*pe)->parent;
+ while (parent_pe) {
+ /* Hit the ceiling ? */
+ if (parent_pe->type & EEH_PE_PHB)
+ break;
+
+ /* Frozen parent PE ? */
+ state = ioda_eeh_get_state(parent_pe);
+ if (state > 0 &&
+ (state & active_flags) != active_flags)
+ *pe = parent_pe;
+
+ /* Next parent level */
+ parent_pe = parent_pe->parent;
+ }
+
+ /* We possibly migrate to another PE */
+ eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+ }
+
+ /*
* If we have no errors on the specific PHB or only
* informative error there, we continue poking it.
* Otherwise, we need actions to be taken by upper