From 7d7cbeaba5b7aea8e1e4eb988d6b5e7cb3c34490 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 4 Jan 2021 15:02:56 -0800 Subject: PCI/ERR: Clear status of the reporting device Error handling operates on the first Downstream Port above the detected error, but the error may have been reported by a downstream device. Clear the AER status of the device that reported the error rather than the first Downstream Port. Link: https://lore.kernel.org/r/20210104230300.1277180-2-kbusch@kernel.org Tested-by: Hedi Berriche Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas Acked-by: Sean V Kelley Acked-by: Hedi Berriche --- drivers/pci/pcie/err.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/pci/pcie/err.c') diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 510f31f0ef6d..a84f0bf4c1e2 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -231,15 +231,14 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pci_walk_bridge(bridge, report_resume, &status); /* - * If we have native control of AER, clear error status in the Root - * Port or Downstream Port that signaled the error. If the - * platform retained control of AER, it is responsible for clearing - * this status. In that case, the signaling device may not even be - * visible to the OS. + * If we have native control of AER, clear error status in the device + * that detected the error. If the platform retained control of AER, + * it is responsible for clearing this status. In that case, the + * signaling device may not even be visible to the OS. */ if (host->native_aer || pcie_ports_native) { - pcie_clear_device_status(bridge); - pci_aer_clear_nonfatal_status(bridge); + pcie_clear_device_status(dev); + pci_aer_clear_nonfatal_status(dev); } pci_info(bridge, "device recovery successful\n"); return status; -- cgit v1.2.3 From 387c72cdd7fb6bef650fb078d0f6ae9682abf631 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 4 Jan 2021 15:02:58 -0800 Subject: PCI/ERR: Retain status from error notification Overwriting the frozen detected status with the result of the link reset loses the NEED_RESET result that drivers are depending on for error handling to report the .slot_reset() callback. Retain this status so that subsequent error handling has the correct flow. Link: https://lore.kernel.org/r/20210104230300.1277180-4-kbusch@kernel.org Reported-by: Hinko Kocevar Tested-by: Hedi Berriche Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas Acked-by: Sean V Kelley Acked-by: Hedi Berriche --- drivers/pci/pcie/err.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/pci/pcie/err.c') diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index a84f0bf4c1e2..b576aa890c76 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -198,8 +198,7 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pci_dbg(bridge, "broadcast error_detected message\n"); if (state == pci_channel_io_frozen) { pci_walk_bridge(bridge, report_frozen_detected, &status); - status = reset_subordinates(bridge); - if (status != PCI_ERS_RESULT_RECOVERED) { + if (reset_subordinates(bridge) != PCI_ERS_RESULT_RECOVERED) { pci_warn(bridge, "subordinate device reset failed\n"); goto failed; } -- cgit v1.2.3