From 202853595e53f981c86656c49fc1cc1e3620f558 Mon Sep 17 00:00:00 2001 From: Alexandru Gagniuc Date: Fri, 25 Oct 2019 15:00:45 -0400 Subject: PCI: pciehp: Disable in-band presence detect when possible The presence detect state (PDS) is normally a logical OR of in-band and out-of-band (OOB) presence detect. As of PCIe 4.0, there is the option to disable in-band presence so that the PDS bit always reflects the state of the out-of-band presence. The recommendation of the PCIe spec is to disable in-band presence whenever supported (PCIe r5.0, appendix I implementation note): Due to architectural issues, the in-band (Physical-Layer-based) portion of the PD mechanism is deprecated for use with async hot-plug. One issue is that in-band PD as architected does not detect adapter removal during certain LTSSM states, notably the L1 and Disabled States. Another issue is that when both in-band and OOB PD are being used together, the Presence Detect State bit and its associated interrupt mechanism always reflect the logical OR of the inband and OOB PD states, and with some hot-plug hardware configurations, it is important for software to detect and respond to in-band and OOB PD events independently. If OOB PD is being used and the associated DSP supports In-Band PD Disable, it is recommended that the In-Band PD Disable bit be Set, and the Presence Detect State bit and its associated interrupt mechanism be used exclusively for OOB PD. As a substitute for in-band PD with async hot-plug, the reference model uses either the DPC or the DLL Link Active mechanism. Link: https://lore.kernel.org/r/20191025190047.38130-2-stuart.w.hayes@gmail.com [bhelgaas: move PCI_EXP_SLTCAP2 read earlier & print PCI_EXP_SLTCAP2_IBPD value (suggested by Lukas)] Signed-off-by: Alexandru Gagniuc Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Reviewed-by: Lukas Wunner --- drivers/pci/hotplug/pciehp.h | 1 + drivers/pci/hotplug/pciehp_hpc.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index aa61d4c219d7..ae44f46d1bf3 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -84,6 +84,7 @@ struct controller { struct pcie_device *pcie; u32 slot_cap; /* capabilities and quirks */ + unsigned int inband_presence_disabled:1; u16 slot_ctrl; /* control register access */ struct mutex ctrl_lock; diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 8a2cb1764386..a573490289c3 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -848,7 +848,7 @@ static inline void dbg_ctrl(struct controller *ctrl) struct controller *pcie_init(struct pcie_device *dev) { struct controller *ctrl; - u32 slot_cap, link_cap; + u32 slot_cap, slot_cap2, link_cap; u8 poweron; struct pci_dev *pdev = dev->port; struct pci_bus *subordinate = pdev->subordinate; @@ -883,6 +883,13 @@ struct controller *pcie_init(struct pcie_device *dev) ctrl->state = list_empty(&subordinate->devices) ? OFF_STATE : ON_STATE; up_read(&pci_bus_sem); + pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP2, &slot_cap2); + if (slot_cap2 & PCI_EXP_SLTCAP2_IBPD) { + pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_IBPD_DISABLE, + PCI_EXP_SLTCTL_IBPD_DISABLE); + ctrl->inband_presence_disabled = 1; + } + /* Check if Data Link Layer Link Active Reporting is implemented */ pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &link_cap); @@ -892,7 +899,7 @@ struct controller *pcie_init(struct pcie_device *dev) PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_CC | PCI_EXP_SLTSTA_DLLSC | PCI_EXP_SLTSTA_PDC); - ctrl_info(ctrl, "Slot #%d AttnBtn%c PwrCtrl%c MRL%c AttnInd%c PwrInd%c HotPlug%c Surprise%c Interlock%c NoCompl%c LLActRep%c%s\n", + ctrl_info(ctrl, "Slot #%d AttnBtn%c PwrCtrl%c MRL%c AttnInd%c PwrInd%c HotPlug%c Surprise%c Interlock%c NoCompl%c IbPresDis%c LLActRep%c%s\n", (slot_cap & PCI_EXP_SLTCAP_PSN) >> 19, FLAG(slot_cap, PCI_EXP_SLTCAP_ABP), FLAG(slot_cap, PCI_EXP_SLTCAP_PCP), @@ -903,6 +910,7 @@ struct controller *pcie_init(struct pcie_device *dev) FLAG(slot_cap, PCI_EXP_SLTCAP_HPS), FLAG(slot_cap, PCI_EXP_SLTCAP_EIP), FLAG(slot_cap, PCI_EXP_SLTCAP_NCCS), + FLAG(slot_cap2, PCI_EXP_SLTCAP2_IBPD), FLAG(link_cap, PCI_EXP_LNKCAP_DLLLARC), pdev->broken_cmd_compl ? " (with Cmd Compl erratum)" : ""); -- cgit v1.2.3 From f496648b99f8f7f6711f7c30a6327381f37dd1e8 Mon Sep 17 00:00:00 2001 From: Alexandru Gagniuc Date: Fri, 25 Oct 2019 15:00:46 -0400 Subject: PCI: pciehp: Wait for PDS if in-band presence is disabled When in-band presence detect is disabled, PDS may come up at any time or not at all. PDS being low may indicate that the card is still mating, and we could expect contact bounce to bring down the link as well. It is reasonable to assume that most cards will mate in a hotplug slot in about a second. Thus, when we know PDS only reflects out-of-band presence detect, it's worthwhile to wait the extra second or so to make sure the card is properly mated before loading the driver and to prevent the hotplug code from disabling a device if the presence detect change goes active after the device is enabled. Link: https://lore.kernel.org/r/20191025190047.38130-3-stuart.w.hayes@gmail.com [bhelgaas: use ctrl_info() instead of pci_info()] Signed-off-by: Alexandru Gagniuc Signed-off-by: Stuart Hayes Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Reviewed-by: Lukas Wunner --- drivers/pci/hotplug/pciehp_hpc.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index a573490289c3..bb9ad0032e6b 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -252,6 +252,22 @@ static bool pci_bus_check_dev(struct pci_bus *bus, int devfn) return found; } +static void pcie_wait_for_presence(struct pci_dev *pdev) +{ + int timeout = 1250; + u16 slot_status; + + do { + pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status); + if (slot_status & PCI_EXP_SLTSTA_PDS) + return; + msleep(10); + timeout -= 10; + } while (timeout > 0); + + pci_info(pdev, "Timeout waiting for Presence Detect\n"); +} + int pciehp_check_link_status(struct controller *ctrl) { struct pci_dev *pdev = ctrl_dev(ctrl); @@ -261,6 +277,9 @@ int pciehp_check_link_status(struct controller *ctrl) if (!pcie_wait_for_link(pdev, true)) return -1; + if (ctrl->inband_presence_disabled) + pcie_wait_for_presence(pdev); + found = pci_bus_check_dev(ctrl->pcie->port->subordinate, PCI_DEVFN(0, 0)); -- cgit v1.2.3 From 0b382546d863f2f09eecaccda95a0b4bfd148f92 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Fri, 25 Oct 2019 15:00:47 -0400 Subject: PCI: pciehp: Add DMI table for in-band presence detection disabled Some systems have in-band presence detection disabled for hot-plug PCI slots but do not report this in the slot capabilities 2 (SLTCAP2) register. On these systems, presence detect can become active well after the link is reported to be active, which can cause the slots to be disabled after a device is connected. Add a DMI table to flag these systems as having in-band presence detect disabled. Link: https://lore.kernel.org/r/20191025190047.38130-4-stuart.w.hayes@gmail.com Signed-off-by: Stuart Hayes Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Reviewed-by: Lukas Wunner --- drivers/pci/hotplug/pciehp_hpc.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index bb9ad0032e6b..e4627c68b30f 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -14,6 +14,7 @@ #define dev_fmt(fmt) "pciehp: " fmt +#include #include #include #include @@ -26,6 +27,24 @@ #include "../pci.h" #include "pciehp.h" +static const struct dmi_system_id inband_presence_disabled_dmi_table[] = { + /* + * Match all Dell systems, as some Dell systems have inband + * presence disabled on NVMe slots (but don't support the bit to + * report it). Setting inband presence disabled should have no + * negative effect, except on broken hotplug slots that never + * assert presence detect--and those will still work, they will + * just have a bit of extra delay before being probed. + */ + { + .ident = "Dell System", + .matches = { + DMI_MATCH(DMI_OEM_STRING, "Dell System"), + }, + }, + {} +}; + static inline struct pci_dev *ctrl_dev(struct controller *ctrl) { return ctrl->pcie->port; @@ -909,6 +928,9 @@ struct controller *pcie_init(struct pcie_device *dev) ctrl->inband_presence_disabled = 1; } + if (dmi_first_match(inband_presence_disabled_dmi_table)) + ctrl->inband_presence_disabled = 1; + /* Check if Data Link Layer Link Active Reporting is implemented */ pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &link_cap); -- cgit v1.2.3 From 3e487d2e4aa466decd226353755c9d423e8fbacc Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 18 Mar 2020 12:33:12 +0100 Subject: PCI: pciehp: Fix indefinite wait on sysfs requests David Hoyer reports that powering pciehp slots up or down via sysfs may hang: The call to wait_event() in pciehp_sysfs_enable_slot() and _disable_slot() does not return because ctrl->ist_running remains true. This flag, which was introduced by commit 157c1062fcd8 ("PCI: pciehp: Avoid returning prematurely from sysfs requests"), signifies that the IRQ thread pciehp_ist() is running. It is set to true at the top of pciehp_ist() and reset to false at the end. However there are two additional return statements in pciehp_ist() before which the commit neglected to reset the flag to false and wake up waiters for the flag. That omission opens up the following race when powering up the slot: * pciehp_ist() runs because a PCI_EXP_SLTSTA_PDC event was requested by pciehp_sysfs_enable_slot() * pciehp_ist() turns on slot power via the following call stack: pciehp_handle_presence_or_link_change() -> pciehp_enable_slot() -> __pciehp_enable_slot() -> board_added() -> pciehp_power_on_slot() * after slot power is turned on, the link comes up, resulting in a PCI_EXP_SLTSTA_DLLSC event * the IRQ handler pciehp_isr() stores the event in ctrl->pending_events and returns IRQ_WAKE_THREAD * the IRQ thread is already woken (it's bringing up the slot), but the genirq code remembers to re-run the IRQ thread after it has finished (such that it can deal with the new event) by setting IRQTF_RUNTHREAD via __handle_irq_event_percpu() -> __irq_wake_thread() * the IRQ thread removes PCI_EXP_SLTSTA_DLLSC from ctrl->pending_events via board_added() -> pciehp_check_link_status() in order to deal with presence and link flaps per commit 6c35a1ac3da6 ("PCI: pciehp: Tolerate initially unstable link") * after pciehp_ist() has successfully brought up the slot, it resets ctrl->ist_running to false and wakes up the sysfs requester * the genirq code re-runs pciehp_ist(), which sets ctrl->ist_running to true but then returns with IRQ_NONE because ctrl->pending_events is empty * pciehp_sysfs_enable_slot() is finally woken but notices that ctrl->ist_running is true, hence continues waiting The only way to get the hung task going again is to trigger a hotplug event which brings down the slot, e.g. by yanking out the card. The same race exists when powering down the slot because remove_board() likewise clears link or presence changes in ctrl->pending_events per commit 3943af9d01e9 ("PCI: pciehp: Ignore Link State Changes after powering off a slot") and thereby may cause a re-run of pciehp_ist() which returns with IRQ_NONE without resetting ctrl->ist_running to false. Fix by adding a goto label before the teardown steps at the end of pciehp_ist() and jumping to that label from the two return statements which currently neglect to reset the ctrl->ist_running flag. Fixes: 157c1062fcd8 ("PCI: pciehp: Avoid returning prematurely from sysfs requests") Link: https://lore.kernel.org/r/cca1effa488065cb055120aa01b65719094bdcb5.1584530321.git.lukas@wunner.de Reported-by: David Hoyer Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Reviewed-by: Keith Busch Cc: stable@vger.kernel.org # v4.19+ --- drivers/pci/hotplug/pciehp_hpc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index e4627c68b30f..5f1a27bfcb19 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -663,17 +663,15 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) if (atomic_fetch_and(~RERUN_ISR, &ctrl->pending_events) & RERUN_ISR) { ret = pciehp_isr(irq, dev_id); enable_irq(irq); - if (ret != IRQ_WAKE_THREAD) { - pci_config_pm_runtime_put(pdev); - return ret; - } + if (ret != IRQ_WAKE_THREAD) + goto out; } synchronize_hardirq(irq); events = atomic_xchg(&ctrl->pending_events, 0); if (!events) { - pci_config_pm_runtime_put(pdev); - return IRQ_NONE; + ret = IRQ_NONE; + goto out; } /* Check Attention Button Pressed */ @@ -702,10 +700,12 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) pciehp_handle_presence_or_link_change(ctrl, events); up_read(&ctrl->reset_lock); + ret = IRQ_HANDLED; +out: pci_config_pm_runtime_put(pdev); ctrl->ist_running = false; wake_up(&ctrl->requester); - return IRQ_HANDLED; + return ret; } static int pciehp_poll(void *data) -- cgit v1.2.3 From 8edf5332c39340b9583cf9cba659eb7ec71f75b5 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Wed, 19 Feb 2020 15:31:13 +0100 Subject: PCI: pciehp: Fix MSI interrupt race Without this commit, a PCIe hotplug port can stop generating interrupts on hotplug events, so device adds and removals will not be seen: The pciehp interrupt handler pciehp_isr() reads the Slot Status register and then writes back to it to clear the bits that caused the interrupt. If a different interrupt event bit gets set between the read and the write, pciehp_isr() returns without having cleared all of the interrupt event bits. If this happens when the MSI isn't masked (which by default it isn't in handle_edge_irq(), and which it will never be when MSI per-vector masking is not supported), we won't get any more hotplug interrupts from that device. That is expected behavior, according to the PCIe Base Spec r5.0, section 6.7.3.4, "Software Notification of Hot-Plug Events". Because the Presence Detect Changed and Data Link Layer State Changed event bits can both get set at nearly the same time when a device is added or removed, this is more likely to happen than it might seem. The issue was found (and can be reproduced rather easily) by connecting and disconnecting an NVMe storage device on at least one system model where the NVMe devices were being connected to an AMD PCIe port (PCI device 0x1022/0x1483). Fix the issue by modifying pciehp_isr() to loop back and re-read the Slot Status register immediately after writing to it, until it sees that all of the event status bits have been cleared. [lukas: drop loop count limitation, write "events" instead of "status", don't loop back in INTx and poll modes, tweak code comment & commit msg] Link: https://lore.kernel.org/r/78b4ced5072bfe6e369d20e8b47c279b8c7af12e.1582121613.git.lukas@wunner.de Tested-by: Stuart Hayes Signed-off-by: Stuart Hayes Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Reviewed-by: Joerg Roedel --- drivers/pci/hotplug/pciehp_hpc.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 5f1a27bfcb19..53433b37e181 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -565,7 +565,7 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id) struct controller *ctrl = (struct controller *)dev_id; struct pci_dev *pdev = ctrl_dev(ctrl); struct device *parent = pdev->dev.parent; - u16 status, events; + u16 status, events = 0; /* * Interrupts only occur in D3hot or shallower and only if enabled @@ -590,6 +590,7 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id) } } +read_status: pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &status); if (status == (u16) ~0) { ctrl_info(ctrl, "%s: no response from device\n", __func__); @@ -602,24 +603,37 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id) * Slot Status contains plain status bits as well as event * notification bits; right now we only want the event bits. */ - events = status & (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD | - PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_CC | - PCI_EXP_SLTSTA_DLLSC); + status &= PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD | + PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_CC | + PCI_EXP_SLTSTA_DLLSC; /* * If we've already reported a power fault, don't report it again * until we've done something to handle it. */ if (ctrl->power_fault_detected) - events &= ~PCI_EXP_SLTSTA_PFD; + status &= ~PCI_EXP_SLTSTA_PFD; + events |= status; if (!events) { if (parent) pm_runtime_put(parent); return IRQ_NONE; } - pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, events); + if (status) { + pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, events); + + /* + * In MSI mode, all event bits must be zero before the port + * will send a new interrupt (PCIe Base Spec r5.0 sec 6.7.3.4). + * So re-read the Slot Status register in case a bit was set + * between read and write. + */ + if (pci_dev_msi_enabled(pdev) && !pciehp_poll_mode) + goto read_status; + } + ctrl_dbg(ctrl, "pending interrupts %#06x from Slot Status\n", events); if (parent) pm_runtime_put(parent); -- cgit v1.2.3