From 07eab0901ede8b7540c52160663bd300cc238164 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 8 Dec 2022 13:03:38 -0600 Subject: efi/x86: Remove EfiMemoryMappedIO from E820 map Firmware can use EfiMemoryMappedIO to request that MMIO regions be mapped by the OS so they can be accessed by EFI runtime services, but should have no other significance to the OS (UEFI r2.10, sec 7.2). However, most bootloaders and EFI stubs convert EfiMemoryMappedIO regions to E820_TYPE_RESERVED entries, which prevent Linux from allocating space from them (see remove_e820_regions()). Some platforms use EfiMemoryMappedIO entries for PCI MMCONFIG space and PCI host bridge windows, which means Linux can't allocate BAR space for hot-added devices. Remove large EfiMemoryMappedIO regions from the E820 map to avoid this problem. Leave small (< 256KB) EfiMemoryMappedIO regions alone because on some platforms, these describe non-window space that's included in host bridge _CRS. If we assign that space to PCI devices, they don't work. On the Lenovo X1 Carbon, this leads to suspend/resume failures. The previous solution to the problem of allocating BARs in these regions was to add pci_crs_quirks[] entries to disable E820 checking for these machines (see d341838d776a ("x86/PCI: Disable E820 reserved region clipping via quirks")): Acer DMI_PRODUCT_NAME Spin SP513-54N Clevo DMI_BOARD_NAME X170KM-G Lenovo DMI_PRODUCT_VERSION *IIL* Florent reported the BAR allocation issue on the Clevo NL4XLU. We could add another quirk for the NL4XLU, but I hope this generic change can solve it for many machines without having to add quirks. This change has been tested on Clevo X170KM-G (Konrad) and Lenovo Ideapad Slim 3 (Matt) and solves the problem even when overriding the existing quirks by booting with "pci=use_e820". Link: https://bugzilla.kernel.org/show_bug.cgi?id=216565 Clevo NL4XLU Link: https://bugzilla.kernel.org/show_bug.cgi?id=206459#c78 Clevo X170KM-G Link: https://bugzilla.redhat.com/show_bug.cgi?id=1868899 Ideapad Slim 3 Link: https://bugzilla.redhat.com/show_bug.cgi?id=2029207 X1 Carbon Link: https://lore.kernel.org/r/20221208190341.1560157-2-helgaas@kernel.org Reported-by: Florent DELAHAYE Tested-by: Konrad J Hambrick Tested-by: Matt Hansen <2lprbe78@duck.com> Signed-off-by: Bjorn Helgaas Acked-by: Hans de Goede --- arch/x86/platform/efi/efi.c | 46 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index ebc98a68c400..75bf0e56bb53 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -303,6 +303,50 @@ static void __init efi_clean_memmap(void) } } +/* + * Firmware can use EfiMemoryMappedIO to request that MMIO regions be + * mapped by the OS so they can be accessed by EFI runtime services, but + * should have no other significance to the OS (UEFI r2.10, sec 7.2). + * However, most bootloaders and EFI stubs convert EfiMemoryMappedIO + * regions to E820_TYPE_RESERVED entries, which prevent Linux from + * allocating space from them (see remove_e820_regions()). + * + * Some platforms use EfiMemoryMappedIO entries for PCI MMCONFIG space and + * PCI host bridge windows, which means Linux can't allocate BAR space for + * hot-added devices. + * + * Remove large EfiMemoryMappedIO regions from the E820 map to avoid this + * problem. + * + * Retain small EfiMemoryMappedIO regions because on some platforms, these + * describe non-window space that's included in host bridge _CRS. If we + * assign that space to PCI devices, they don't work. + */ +static void __init efi_remove_e820_mmio(void) +{ + efi_memory_desc_t *md; + u64 size, start, end; + int i = 0; + + for_each_efi_memory_desc(md) { + if (md->type == EFI_MEMORY_MAPPED_IO) { + size = md->num_pages << EFI_PAGE_SHIFT; + start = md->phys_addr; + end = start + size - 1; + if (size >= 256*1024) { + pr_info("Remove mem%02u: MMIO range=[0x%08llx-0x%08llx] (%lluMB) from e820 map\n", + i, start, end, size >> 20); + e820__range_remove(start, size, + E820_TYPE_RESERVED, 1); + } else { + pr_info("Not removing mem%02u: MMIO range=[0x%08llx-0x%08llx] (%lluKB) from e820 map\n", + i, start, end, size >> 10); + } + } + i++; + } +} + void __init efi_print_memmap(void) { efi_memory_desc_t *md; @@ -474,6 +518,8 @@ void __init efi_init(void) set_bit(EFI_RUNTIME_SERVICES, &efi.flags); efi_clean_memmap(); + efi_remove_e820_mmio(); + if (efi_enabled(EFI_DBG)) efi_print_memmap(); } -- cgit v1.2.3 From 5c5fb3c3a793b34554e1d21f07cda34308b082cd Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 8 Dec 2022 13:03:39 -0600 Subject: PCI: Skip allocate_resource() if too little space available pci_bus_alloc_from_region() allocates MMIO space by iterating through all the resources available on the bus. The available resource might be reduced if the caller requires 32-bit space or we're avoiding BIOS or E820 areas. Don't bother calling allocate_resource() if we need more space than is available in this resource. This prevents some pointless and annoying messages about avoided areas. Link: https://lore.kernel.org/r/20221208190341.1560157-3-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Acked-by: Hans de Goede --- drivers/pci/bus.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 3cef835b375f..83ae838ceb5f 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -197,6 +197,10 @@ static int pci_bus_alloc_from_region(struct pci_bus *bus, struct resource *res, max = avail.end; + /* Don't bother if available space isn't large enough */ + if (size > max - min_used + 1) + continue; + /* Ok, try it out.. */ ret = allocate_resource(r, res, size, min_used, max, align, alignf, alignf_data); -- cgit v1.2.3 From 00904bf64c2819d90a76407d79bdbc8918541320 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 8 Dec 2022 13:03:40 -0600 Subject: x86/PCI: Tidy E820 removal messages These messages: clipped [mem size 0x00000000 64bit] to [mem size 0xfffffffffffa0000 64bit] for e820 entry [mem 0x0009f000-0x000fffff] aren't as useful as they could be because (a) the resource is often IORESOURCE_UNSET, so we print the size instead of the start/end and (b) we print the available resource even if it is empty after removing the E820 entry. Print the available space by hand to avoid the IORESOURCE_UNSET problem and only if it's non-empty. No functional change intended. Link: https://lore.kernel.org/r/20221208190341.1560157-4-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Acked-by: Hans de Goede --- arch/x86/kernel/resource.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c index bba1abd05bfe..79bc8a97a083 100644 --- a/arch/x86/kernel/resource.c +++ b/arch/x86/kernel/resource.c @@ -42,8 +42,16 @@ static void remove_e820_regions(struct resource *avail) resource_clip(avail, e820_start, e820_end); if (orig.start != avail->start || orig.end != avail->end) { - pr_info("clipped %pR to %pR for e820 entry [mem %#010Lx-%#010Lx]\n", - &orig, avail, e820_start, e820_end); + pr_info("resource: avoiding allocation from e820 entry [mem %#010Lx-%#010Lx]\n", + e820_start, e820_end); + if (avail->end > avail->start) + /* + * Use %pa instead of %pR because "avail" + * is typically IORESOURCE_UNSET, so %pR + * shows the size instead of addresses. + */ + pr_info("resource: remaining [mem %pa-%pa] available\n", + &avail->start, &avail->end); orig = *avail; } } -- cgit v1.2.3 From 2bfa89fab5ff06703c034c4702e6ea4418194ffe Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 8 Dec 2022 13:03:41 -0600 Subject: x86/PCI: Fix log message typo Add missing word in the log message: - ... so future kernels can this automatically + ... so future kernels can do this automatically Suggested-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221208190341.1560157-5-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Acked-by: Hans de Goede --- arch/x86/pci/acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 2f82480fd430..83dfea9e9894 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -245,7 +245,7 @@ void __init pci_acpi_crs_quirks(void) printk(KERN_INFO "PCI: %s E820 reservations for host bridge windows\n", pci_use_e820 ? "Using" : "Ignoring"); if (pci_probe & (PCI_NO_E820 | PCI_USE_E820)) - printk(KERN_INFO "PCI: Please notify linux-pci@vger.kernel.org so future kernels can this automatically\n"); + printk(KERN_INFO "PCI: Please notify linux-pci@vger.kernel.org so future kernels can do this automatically\n"); } #ifdef CONFIG_PCI_MMCONFIG -- cgit v1.2.3 From d91482bb212b36354b0e46d7a5c0adae807e7a12 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 9 Dec 2022 14:41:27 -0600 Subject: x86/PCI: Use pr_info() when possible Use pr_info() and similar when possible. No functional change intended. Link: https://lore.kernel.org/r/20221209205131.GA1726524@bhelgaas Suggested-by: Andy Shevchenko Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko --- arch/x86/pci/acpi.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 83dfea9e9894..ea2eb2ec90e2 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -1,4 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 + +#define pr_fmt(fmt) "PCI: " fmt + #include #include #include @@ -37,15 +40,15 @@ static int __init set_nouse_crs(const struct dmi_system_id *id) static int __init set_ignore_seg(const struct dmi_system_id *id) { - printk(KERN_INFO "PCI: %s detected: ignoring ACPI _SEG\n", id->ident); + pr_info("%s detected: ignoring ACPI _SEG\n", id->ident); pci_ignore_seg = true; return 0; } static int __init set_no_e820(const struct dmi_system_id *id) { - printk(KERN_INFO "PCI: %s detected: not clipping E820 regions from _CRS\n", - id->ident); + pr_info("%s detected: not clipping E820 regions from _CRS\n", + id->ident); pci_use_e820 = false; return 0; } @@ -231,10 +234,9 @@ void __init pci_acpi_crs_quirks(void) else if (pci_probe & PCI_USE__CRS) pci_use_crs = true; - printk(KERN_INFO "PCI: %s host bridge windows from ACPI; " - "if necessary, use \"pci=%s\" and report a bug\n", - pci_use_crs ? "Using" : "Ignoring", - pci_use_crs ? "nocrs" : "use_crs"); + pr_info("%s host bridge windows from ACPI; if necessary, use \"pci=%s\" and report a bug\n", + pci_use_crs ? "Using" : "Ignoring", + pci_use_crs ? "nocrs" : "use_crs"); /* "pci=use_e820"/"pci=no_e820" on the kernel cmdline takes precedence */ if (pci_probe & PCI_NO_E820) @@ -242,19 +244,17 @@ void __init pci_acpi_crs_quirks(void) else if (pci_probe & PCI_USE_E820) pci_use_e820 = true; - printk(KERN_INFO "PCI: %s E820 reservations for host bridge windows\n", - pci_use_e820 ? "Using" : "Ignoring"); + pr_info("%s E820 reservations for host bridge windows\n", + pci_use_e820 ? "Using" : "Ignoring"); if (pci_probe & (PCI_NO_E820 | PCI_USE_E820)) - printk(KERN_INFO "PCI: Please notify linux-pci@vger.kernel.org so future kernels can do this automatically\n"); + pr_info("Please notify linux-pci@vger.kernel.org so future kernels can do this automatically\n"); } #ifdef CONFIG_PCI_MMCONFIG static int check_segment(u16 seg, struct device *dev, char *estr) { if (seg) { - dev_err(dev, - "%s can't access PCI configuration " - "space under this host bridge.\n", + dev_err(dev, "%s can't access configuration space under this host bridge\n", estr); return -EIO; } @@ -264,9 +264,7 @@ static int check_segment(u16 seg, struct device *dev, char *estr) * just can't access extended configuration space of * devices under this host bridge. */ - dev_warn(dev, - "%s can't access extended PCI configuration " - "space under this bridge.\n", + dev_warn(dev, "%s can't access extended configuration space under this bridge\n", estr); return 0; @@ -421,9 +419,8 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) root->segment = domain = 0; if (domain && !pci_domains_supported) { - printk(KERN_WARNING "pci_bus %04x:%02x: " - "ignored (multiple domains not supported)\n", - domain, busnum); + pr_warn("pci_bus %04x:%02x: ignored (multiple domains not supported)\n", + domain, busnum); return NULL; } @@ -491,7 +488,7 @@ int __init pci_acpi_init(void) if (acpi_noirq) return -ENODEV; - printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); + pr_info("Using ACPI for IRQ routing\n"); acpi_irq_penalty_init(); pcibios_enable_irq = acpi_pci_irq_enable; pcibios_disable_irq = acpi_pci_irq_disable; @@ -503,7 +500,7 @@ int __init pci_acpi_init(void) * also do it here in case there are still broken drivers that * don't use pci_enable_device(). */ - printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n"); + pr_info("Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n"); for_each_pci_dev(dev) acpi_pci_irq_enable(dev); } -- cgit v1.2.3