From 07cf2a64c2ad3408a0e12aa4cd6040b30c09381d Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Sat, 6 Nov 2010 10:06:49 +0100 Subject: xen: fix memory leak in Xen PCI MSI/MSI-X allocator. Stanse found that xen_setup_msi_irqs leaks memory when xen_allocate_pirq fails. Free the memory in that fail path. Signed-off-by: Jiri Slaby Signed-off-by: Konrad Rzeszutek Wilk Cc: xen-devel@lists.xensource.com Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x86@kernel.org --- arch/x86/pci/xen.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 117f5b8daf75..d7b5109f7a9c 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -147,8 +147,10 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) irq = xen_allocate_pirq(v[i], 0, /* not sharable */ (type == PCI_CAP_ID_MSIX) ? "pcifront-msi-x" : "pcifront-msi"); - if (irq < 0) - return -1; + if (irq < 0) { + ret = -1; + goto free; + } ret = set_irq_msi(irq, msidesc); if (ret) @@ -164,7 +166,7 @@ error: if (ret == -ENODEV) dev_err(&dev->dev, "Xen PCI frontend has not registered" \ " MSI/MSI-X support!\n"); - +free: kfree(v); return ret; } -- cgit v1.2.3 From 4723d0f2f96e6c910f951d595067eb31e0dd2d01 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 22 Sep 2010 11:09:19 -0600 Subject: x86/PCI: coalesce overlapping host bridge windows Some BIOSes provide PCI host bridge windows that overlap, e.g., pci_root PNP0A03:00: host bridge window [mem 0xb0000000-0xffffffff] pci_root PNP0A03:00: host bridge window [mem 0xafffffff-0xdfffffff] pci_root PNP0A03:00: host bridge window [mem 0xf0000000-0xffffffff] If we simply insert these as children of iomem_resource, the second window fails because it conflicts with the first, and the third is inserted as a child of the first, i.e., b0000000-ffffffff PCI Bus 0000:00 f0000000-ffffffff PCI Bus 0000:00 When we claim PCI device resources, this can cause collisions like this if we put them in the first window: pci 0000:00:01.0: address space collision: [mem 0xff300000-0xff4fffff] conflicts with PCI Bus 0000:00 [mem 0xf0000000-0xffffffff] Host bridge windows are top-level resources by definition, so it doesn't make sense to make the third window a child of the first. This patch coalesces any host bridge windows that overlap. For the example above, the result is this single window: pci_root PNP0A03:00: host bridge window [mem 0xafffffff-0xffffffff] This fixes a 2.6.34 regression. Reference: https://bugzilla.kernel.org/show_bug.cgi?id=17011 Reported-and-tested-by: Anisse Astier Reported-and-tested-by: Pramod Dematagoda Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 103 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 83 insertions(+), 20 deletions(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 15466c096ba5..0972315c3860 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -138,7 +138,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data) struct acpi_resource_address64 addr; acpi_status status; unsigned long flags; - struct resource *root, *conflict; u64 start, end; status = resource_to_addr(acpi_res, &addr); @@ -146,12 +145,10 @@ setup_resource(struct acpi_resource *acpi_res, void *data) return AE_OK; if (addr.resource_type == ACPI_MEMORY_RANGE) { - root = &iomem_resource; flags = IORESOURCE_MEM; if (addr.info.mem.caching == ACPI_PREFETCHABLE_MEMORY) flags |= IORESOURCE_PREFETCH; } else if (addr.resource_type == ACPI_IO_RANGE) { - root = &ioport_resource; flags = IORESOURCE_IO; } else return AE_OK; @@ -172,25 +169,90 @@ setup_resource(struct acpi_resource *acpi_res, void *data) return AE_OK; } - conflict = insert_resource_conflict(root, res); - if (conflict) { - dev_err(&info->bridge->dev, - "address space collision: host bridge window %pR " - "conflicts with %s %pR\n", - res, conflict->name, conflict); - } else { - pci_bus_add_resource(info->bus, res, 0); - info->res_num++; - if (addr.translation_offset) - dev_info(&info->bridge->dev, "host bridge window %pR " - "(PCI address [%#llx-%#llx])\n", - res, res->start - addr.translation_offset, - res->end - addr.translation_offset); + info->res_num++; + if (addr.translation_offset) + dev_info(&info->bridge->dev, "host bridge window %pR " + "(PCI address [%#llx-%#llx])\n", + res, res->start - addr.translation_offset, + res->end - addr.translation_offset); + else + dev_info(&info->bridge->dev, "host bridge window %pR\n", res); + + return AE_OK; +} + +static bool resource_contains(struct resource *res, resource_size_t point) +{ + if (res->start <= point && point <= res->end) + return true; + return false; +} + +static void coalesce_windows(struct pci_root_info *info, int type) +{ + int i, j; + struct resource *res1, *res2; + + for (i = 0; i < info->res_num; i++) { + res1 = &info->res[i]; + if (!(res1->flags & type)) + continue; + + for (j = i + 1; j < info->res_num; j++) { + res2 = &info->res[j]; + if (!(res2->flags & type)) + continue; + + /* + * I don't like throwing away windows because then + * our resources no longer match the ACPI _CRS, but + * the kernel resource tree doesn't allow overlaps. + */ + if (resource_contains(res1, res2->start) || + resource_contains(res1, res2->end) || + resource_contains(res2, res1->start) || + resource_contains(res2, res1->end)) { + res1->start = min(res1->start, res2->start); + res1->end = max(res1->end, res2->end); + dev_info(&info->bridge->dev, + "host bridge window expanded to %pR; %pR ignored\n", + res1, res2); + res2->flags = 0; + } + } + } +} + +static void add_resources(struct pci_root_info *info) +{ + int i; + struct resource *res, *root, *conflict; + + if (!pci_use_crs) + return; + + coalesce_windows(info, IORESOURCE_MEM); + coalesce_windows(info, IORESOURCE_IO); + + for (i = 0; i < info->res_num; i++) { + res = &info->res[i]; + + if (res->flags & IORESOURCE_MEM) + root = &iomem_resource; + else if (res->flags & IORESOURCE_IO) + root = &ioport_resource; else - dev_info(&info->bridge->dev, - "host bridge window %pR\n", res); + continue; + + conflict = insert_resource_conflict(root, res); + if (conflict) + dev_err(&info->bridge->dev, + "address space collision: host bridge window %pR " + "conflicts with %s %pR\n", + res, conflict->name, conflict); + else + pci_bus_add_resource(info->bus, res, 0); } - return AE_OK; } static void @@ -224,6 +286,7 @@ get_current_resources(struct acpi_device *device, int busnum, acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, &info); + add_resources(&info); return; name_alloc_fail: -- cgit v1.2.3 From 5bd5a452662bc37c54fb6828db1a3faf87e6511c Mon Sep 17 00:00:00 2001 From: Matthieu Castet Date: Tue, 16 Nov 2010 22:31:26 +0100 Subject: x86: Add NX protection for kernel data This patch expands functionality of CONFIG_DEBUG_RODATA to set main (static) kernel data area as NX. The following steps are taken to achieve this: 1. Linker script is adjusted so .text always starts and ends on a page bound 2. Linker script is adjusted so .rodata always start and end on a page boundary 3. NX is set for all pages from _etext through _end in mark_rodata_ro. 4. free_init_pages() sets released memory NX in arch/x86/mm/init.c 5. bios rom is set to x when pcibios is used. The results of patch application may be observed in the diff of kernel page table dumps: pcibios: -- data_nx_pt_before.txt 2009-10-13 07:48:59.000000000 -0400 ++ data_nx_pt_after.txt 2009-10-13 07:26:46.000000000 -0400 0x00000000-0xc0000000 3G pmd ---[ Kernel Mapping ]--- -0xc0000000-0xc0100000 1M RW GLB x pte +0xc0000000-0xc00a0000 640K RW GLB NX pte +0xc00a0000-0xc0100000 384K RW GLB x pte -0xc0100000-0xc03d7000 2908K ro GLB x pte +0xc0100000-0xc0318000 2144K ro GLB x pte +0xc0318000-0xc03d7000 764K ro GLB NX pte -0xc03d7000-0xc0600000 2212K RW GLB x pte +0xc03d7000-0xc0600000 2212K RW GLB NX pte 0xc0600000-0xf7a00000 884M RW PSE GLB NX pmd 0xf7a00000-0xf7bfe000 2040K RW GLB NX pte 0xf7bfe000-0xf7c00000 8K pte No pcibios: -- data_nx_pt_before.txt 2009-10-13 07:48:59.000000000 -0400 ++ data_nx_pt_after.txt 2009-10-13 07:26:46.000000000 -0400 0x00000000-0xc0000000 3G pmd ---[ Kernel Mapping ]--- -0xc0000000-0xc0100000 1M RW GLB x pte +0xc0000000-0xc0100000 1M RW GLB NX pte -0xc0100000-0xc03d7000 2908K ro GLB x pte +0xc0100000-0xc0318000 2144K ro GLB x pte +0xc0318000-0xc03d7000 764K ro GLB NX pte -0xc03d7000-0xc0600000 2212K RW GLB x pte +0xc03d7000-0xc0600000 2212K RW GLB NX pte 0xc0600000-0xf7a00000 884M RW PSE GLB NX pmd 0xf7a00000-0xf7bfe000 2040K RW GLB NX pte 0xf7bfe000-0xf7c00000 8K pte The patch has been originally developed for Linux 2.6.34-rc2 x86 by Siarhei Liakh and Xuxian Jiang . -v1: initial patch for 2.6.30 -v2: patch for 2.6.31-rc7 -v3: moved all code into arch/x86, adjusted credits -v4: fixed ifdef, removed credits from CREDITS -v5: fixed an address calculation bug in mark_nxdata_nx() -v6: added acked-by and PT dump diff to commit log -v7: minor adjustments for -tip -v8: rework with the merge of "Set first MB as RW+NX" Signed-off-by: Siarhei Liakh Signed-off-by: Xuxian Jiang Signed-off-by: Matthieu CASTET Cc: Arjan van de Ven Cc: James Morris Cc: Andi Kleen Cc: Rusty Russell Cc: Stephen Rothwell Cc: Dave Jones Cc: Kees Cook Cc: Linus Torvalds LKML-Reference: <4CE2F82E.60601@free.fr> [ minor cleanliness edits ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pci.h | 1 + arch/x86/kernel/vmlinux.lds.S | 8 ++++++-- arch/x86/mm/init.c | 3 ++- arch/x86/mm/init_32.c | 20 +++++++++++++++++++- arch/x86/mm/init_64.c | 3 ++- arch/x86/mm/pageattr.c | 5 ++++- arch/x86/pci/pcbios.c | 23 +++++++++++++++++++++++ 7 files changed, 57 insertions(+), 6 deletions(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index ca0437c714b2..676129229630 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -65,6 +65,7 @@ extern unsigned long pci_mem_start; #define PCIBIOS_MIN_CARDBUS_IO 0x4000 +extern int pcibios_enabled; void pcibios_config_init(void); struct pci_bus *pcibios_scan_root(int bus); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index e03530aebfd0..bf4700755184 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -69,7 +69,7 @@ jiffies_64 = jiffies; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ - data PT_LOAD FLAGS(7); /* RWE */ + data PT_LOAD FLAGS(6); /* RW_ */ #ifdef CONFIG_X86_64 user PT_LOAD FLAGS(5); /* R_E */ #ifdef CONFIG_SMP @@ -116,6 +116,10 @@ SECTIONS EXCEPTION_TABLE(16) :text = 0x9090 +#if defined(CONFIG_DEBUG_RODATA) + /* .text should occupy whole number of pages */ + . = ALIGN(PAGE_SIZE); +#endif X64_ALIGN_DEBUG_RODATA_BEGIN RO_DATA(PAGE_SIZE) X64_ALIGN_DEBUG_RODATA_END @@ -335,7 +339,7 @@ SECTIONS __bss_start = .; *(.bss..page_aligned) *(.bss) - . = ALIGN(4); + . = ALIGN(PAGE_SIZE); __bss_stop = .; } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index c0e28a13de7d..947f42abe820 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -364,8 +364,9 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) /* * We just marked the kernel text read only above, now that * we are going to free part of that, we need to make that - * writeable first. + * writeable and non-executable first. */ + set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 0e969f9f401b..f89b5bb4e93f 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -226,7 +226,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) static inline int is_kernel_text(unsigned long addr) { - if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) + if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end) return 1; return 0; } @@ -912,6 +912,23 @@ void set_kernel_text_ro(void) set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); } +static void mark_nxdata_nx(void) +{ + /* + * When this called, init has already been executed and released, + * so everything past _etext sould be NX. + */ + unsigned long start = PFN_ALIGN(_etext); + /* + * This comes from is_kernel_text upper limit. Also HPAGE where used: + */ + unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start; + + if (__supported_pte_mask & _PAGE_NX) + printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10); + set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT); +} + void mark_rodata_ro(void) { unsigned long start = PFN_ALIGN(_text); @@ -946,6 +963,7 @@ void mark_rodata_ro(void) printk(KERN_INFO "Testing CPA: write protecting again\n"); set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); #endif + mark_nxdata_nx(); } #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 71a59296af80..ce59c05cae12 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -788,6 +788,7 @@ void mark_rodata_ro(void) unsigned long rodata_start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; unsigned long end = (unsigned long) &__end_rodata_hpage_align; + unsigned long kernel_end = (((unsigned long)&__init_end + HPAGE_SIZE) & HPAGE_MASK); unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table); unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata); unsigned long data_start = (unsigned long) &_sdata; @@ -802,7 +803,7 @@ void mark_rodata_ro(void) * The rodata section (but not the kernel text!) should also be * not-executable. */ - set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT); + set_memory_nx(rodata_start, (kernel_end - rodata_start) >> PAGE_SHIFT); rodata_test(); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 6f2a6b6deb6b..8b830ca14ac4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -261,8 +262,10 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * The BIOS area between 640k and 1Mb needs to be executable for * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. */ - if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) +#ifdef CONFIG_PCI_BIOS + if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_NX; +#endif /* * The kernel text needs to be executable for obvious reasons diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 2492d165096a..a5f7d0d63de0 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -9,6 +9,7 @@ #include #include #include +#include /* BIOS32 signature: "_32_" */ #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) @@ -25,6 +26,27 @@ #define PCIBIOS_HW_TYPE1_SPEC 0x10 #define PCIBIOS_HW_TYPE2_SPEC 0x20 +int pcibios_enabled; + +/* According to the BIOS specification at: + * http://members.datafast.net.au/dft0802/specs/bios21.pdf, we could + * restrict the x zone to some pages and make it ro. But this may be + * broken on some bios, complex to handle with static_protections. + * We could make the 0xe0000-0x100000 range rox, but this can break + * some ISA mapping. + * + * So we let's an rw and x hole when pcibios is used. This shouldn't + * happen for modern system with mmconfig, and if you don't want it + * you could disable pcibios... + */ +static inline void set_bios_x(void) +{ + pcibios_enabled = 1; + set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT); + if (__supported_pte_mask & _PAGE_NX) + printk(KERN_INFO "PCI : PCI BIOS aera is rw and x. Use pci=nobios if you want it NX.\n"); +} + /* * This is the standard structure used to identify the entry point * to the BIOS32 Service Directory, as documented in @@ -332,6 +354,7 @@ static struct pci_raw_ops * __devinit pci_find_bios(void) DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry); bios32_indirect.address = bios32_entry + PAGE_OFFSET; + set_bios_x(); if (check_pcibios()) return &pci_bios_access; } -- cgit v1.2.3 From af42b8d12f8adec6711cb824549a0edac6a4ae8f Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 1 Dec 2010 14:51:44 +0000 Subject: xen: fix MSI setup and teardown for PV on HVM guests When remapping MSIs into pirqs for PV on HVM guests, qemu is responsible for doing the actual mapping and unmapping. We only give qemu the desired pirq number when we ask to do the mapping the first time, after that we should be reading back the pirq number from qemu every time we want to re-enable the MSI. This fixes a bug in xen_hvm_setup_msi_irqs that manifests itself when trying to enable the same MSI for the second time: the old MSI to pirq mapping is still valid at this point but xen_hvm_setup_msi_irqs would try to assign a new pirq anyway. A simple way to reproduce this bug is to assign an MSI capable network card to a PV on HVM guest, if the user brings down the corresponding ethernet interface and up again, Linux would fail to enable MSIs on the device. Signed-off-by: Stefano Stabellini --- arch/x86/pci/xen.c | 27 ++++++++++++++++++++------- drivers/xen/events.c | 24 +++++++++++++++++------- include/xen/events.h | 7 ++++++- 3 files changed, 43 insertions(+), 15 deletions(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index d7b5109f7a9c..25cd4a07d09f 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -70,6 +70,9 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, struct xen_pci_frontend_ops *xen_pci_frontend; EXPORT_SYMBOL_GPL(xen_pci_frontend); +#define XEN_PIRQ_MSI_DATA (MSI_DATA_TRIGGER_EDGE | \ + MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0)) + static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, struct msi_msg *msg) { @@ -83,12 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, MSI_ADDR_REDIRECTION_CPU | MSI_ADDR_DEST_ID(pirq); - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - /* delivery mode reserved */ - (3 << 8) | - MSI_DATA_VECTOR(0); + msg->data = XEN_PIRQ_MSI_DATA; } static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) @@ -98,8 +96,23 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) struct msi_msg msg; list_for_each_entry(msidesc, &dev->msi_list, list) { + __read_msi_msg(msidesc, &msg); + pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | + ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); + if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) { + xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? + "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ); + if (irq < 0) + goto error; + ret = set_irq_msi(irq, msidesc); + if (ret < 0) + goto error_while; + printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d" + " pirq=%d\n", irq, pirq); + return 0; + } xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? - "msi-x" : "msi", &irq, &pirq); + "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ)); if (irq < 0 || pirq < 0) goto error; printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 7ab43c33f746..f78945ce8aeb 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -668,17 +668,21 @@ out: #include #include "../pci/msi.h" -void xen_allocate_pirq_msi(char *name, int *irq, int *pirq) +void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc) { spin_lock(&irq_mapping_update_lock); - *irq = find_unbound_irq(); - if (*irq == -1) - goto out; + if (alloc & XEN_ALLOC_IRQ) { + *irq = find_unbound_irq(); + if (*irq == -1) + goto out; + } - *pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI); - if (*pirq == -1) - goto out; + if (alloc & XEN_ALLOC_PIRQ) { + *pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI); + if (*pirq == -1) + goto out; + } set_irq_chip_and_handler_name(*irq, &xen_pirq_chip, handle_level_irq, name); @@ -766,6 +770,7 @@ int xen_destroy_irq(int irq) printk(KERN_WARNING "unmap irq failed %d\n", rc); goto out; } + pirq_to_irq[info->u.pirq.pirq] = -1; } irq_info[irq] = mk_unbound_info(); @@ -786,6 +791,11 @@ int xen_gsi_from_irq(unsigned irq) return gsi_from_irq(irq); } +int xen_irq_from_pirq(unsigned pirq) +{ + return pirq_to_irq[pirq]; +} + int bind_evtchn_to_irq(unsigned int evtchn) { int irq; diff --git a/include/xen/events.h b/include/xen/events.h index 646dd17d3aa4..00f53ddcc062 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -76,7 +76,9 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name); #ifdef CONFIG_PCI_MSI /* Allocate an irq and a pirq to be used with MSIs. */ -void xen_allocate_pirq_msi(char *name, int *irq, int *pirq); +#define XEN_ALLOC_PIRQ (1 << 0) +#define XEN_ALLOC_IRQ (1 << 1) +void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc_mask); int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type); #endif @@ -89,4 +91,7 @@ int xen_vector_from_irq(unsigned pirq); /* Return gsi allocated to pirq */ int xen_gsi_from_irq(unsigned pirq); +/* Return irq from pirq */ +int xen_irq_from_pirq(unsigned pirq); + #endif /* _XEN_EVENTS_H */ -- cgit v1.2.3 From d14125ecfee05473de46f06d992db109308c57a3 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 16 Dec 2010 10:38:31 -0700 Subject: Revert "x86/PCI: allocate space from the end of a region, not the beginning" This reverts commit dc9887dc02e37bcf83f4e792aa14b07782ef54cf. Acked-by: H. Peter Anvin Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/i386.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index c4bb261c106e..8379c2c3d076 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -65,21 +65,16 @@ pcibios_align_resource(void *data, const struct resource *res, resource_size_t size, resource_size_t align) { struct pci_dev *dev = data; - resource_size_t start = round_down(res->end - size + 1, align); + resource_size_t start = res->start; if (res->flags & IORESOURCE_IO) { - - /* - * If we're avoiding ISA aliases, the largest contiguous I/O - * port space is 256 bytes. Clearing bits 9 and 10 preserves - * all 256-byte and smaller alignments, so the result will - * still be correctly aligned. - */ - if (!skip_isa_ioresource_align(dev)) - start &= ~0x300; + if (skip_isa_ioresource_align(dev)) + return start; + if (start & 0x300) + start = (start + 0x3ff) & ~0x3ff; } else if (res->flags & IORESOURCE_MEM) { if (start < BIOS_END) - start = res->end; /* fail; no space */ + start = BIOS_END; } return start; } -- cgit v1.2.3 From 30919b0bf356a8ee0ef4f7d38ca8ad99b96820b2 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 16 Dec 2010 10:38:51 -0700 Subject: x86: avoid low BIOS area when allocating address space This implements arch_remove_reservations() so allocate_resource() can avoid any arch-specific reserved areas. This currently just avoids the BIOS area (the first 1MB), but could be used for E820 reserved areas if that turns out to be necessary. We previously avoided this area in pcibios_align_resource(). This patch moves the test from that PCI-specific path to a generic path, so *all* resource allocations will avoid this area. Acked-by: H. Peter Anvin Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/resource.c | 11 +++++++++++ arch/x86/pci/i386.c | 3 --- 3 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 arch/x86/kernel/resource.c (limited to 'arch/x86/pci') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9e13763b6092..1e994754d323 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -45,6 +45,7 @@ obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o obj-y += tsc.o io_delay.o rtc.o obj-y += pci-iommu_table.o +obj-y += resource.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c new file mode 100644 index 000000000000..407a900da9df --- /dev/null +++ b/arch/x86/kernel/resource.c @@ -0,0 +1,11 @@ +#include +#include + +void arch_remove_reservations(struct resource *avail) +{ + /* Trim out BIOS area (low 1MB) */ + if (avail->flags & IORESOURCE_MEM) { + if (avail->start < BIOS_END) + avail->start = BIOS_END; + } +} diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 8379c2c3d076..b1805b78842f 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -72,9 +72,6 @@ pcibios_align_resource(void *data, const struct resource *res, return start; if (start & 0x300) start = (start + 0x3ff) & ~0x3ff; - } else if (res->flags & IORESOURCE_MEM) { - if (start < BIOS_END) - start = BIOS_END; } return start; } -- cgit v1.2.3 From 9b444b36fee16d2aaae9cc91ce594ecb15d922a9 Mon Sep 17 00:00:00 2001 From: Seth Heasley Date: Wed, 17 Nov 2010 12:12:08 -0700 Subject: x86/PCI: irq and pci_ids patch for Intel Patsburg This patch adds an additional LPC Controller DeviceID for the Intel Patsburg PCH. Signed-off-by: Seth Heasley Signed-off-by: Jesse Barnes --- arch/x86/pci/irq.c | 3 ++- include/linux/pci_ids.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 9f9bfb705cf9..87e6c8323117 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -589,7 +589,8 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route case PCI_DEVICE_ID_INTEL_ICH10_1: case PCI_DEVICE_ID_INTEL_ICH10_2: case PCI_DEVICE_ID_INTEL_ICH10_3: - case PCI_DEVICE_ID_INTEL_PATSBURG_LPC: + case PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0: + case PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1: r->name = "PIIX/ICH"; r->get = pirq_piix_get; r->set = pirq_piix_set; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index cb845c16ad7d..d830106827fd 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2468,7 +2468,8 @@ #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN 0x1c41 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX 0x1c5f #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS 0x1d22 -#define PCI_DEVICE_ID_INTEL_PATSBURG_LPC 0x1d40 +#define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0 0x1d40 +#define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1 0x1d41 #define PCI_DEVICE_ID_INTEL_82801AA_0 0x2410 #define PCI_DEVICE_ID_INTEL_82801AA_1 0x2411 #define PCI_DEVICE_ID_INTEL_82801AA_3 0x2413 -- cgit v1.2.3 From 24d9b70b8c679264756a6980e668b96b3f964826 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 10 Jan 2011 16:20:23 +0000 Subject: x86: Use PCI method for enabling AMD extended config space before MSR method While both methods should work equivalently well for the native case, the Xen Dom0 case can't reliably work with the MSR one, since there's no guarantee that the virtual CPUs it has available fully cover all necessary physical ones. As per the suggestion of Robert Richter the patch only adds the PCI method, but leaves the MSR one as a fallback to cover new systems the PCI IDs of which may not have got added to the code base yet. The only change in v2 is the breaking out of the new CPI initialization method into a separate function, as requested by Ingo. Signed-off-by: Jan Beulich Acked-by: Robert Richter Cc: Andreas Herrmann3 Cc: Joerg Roedel Cc: Jeremy Fitzhardinge LKML-Reference: <4D2B3FD7020000780002B67D@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/amd_nb.h | 7 +++++++ arch/x86/kernel/amd_nb.c | 7 +++++++ arch/x86/kernel/aperture_64.c | 44 ++++++++++++++++--------------------------- arch/x86/pci/amd_bus.c | 33 ++++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 28 deletions(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 980f22567631..64dc82ee19f0 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -3,7 +3,14 @@ #include +struct amd_nb_bus_dev_range { + u8 bus; + u8 dev_base; + u8 dev_limit; +}; + extern struct pci_device_id amd_nb_misc_ids[]; +extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; struct bootnode; extern int early_is_amd_nb(u32 value); diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index affacb5e0065..0a99f7198bc3 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -20,6 +20,13 @@ struct pci_device_id amd_nb_misc_ids[] = { }; EXPORT_SYMBOL(amd_nb_misc_ids); +const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = { + { 0x00, 0x18, 0x20 }, + { 0xff, 0x00, 0x20 }, + { 0xfe, 0x00, 0x20 }, + { } +}; + struct amd_northbridge_info amd_northbridges; EXPORT_SYMBOL(amd_northbridges); diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index dcd7c83e1659..5955a7800a96 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -39,18 +39,6 @@ int fallback_aper_force __initdata; int fix_aperture __initdata = 1; -struct bus_dev_range { - int bus; - int dev_base; - int dev_limit; -}; - -static struct bus_dev_range bus_dev_ranges[] __initdata = { - { 0x00, 0x18, 0x20}, - { 0xff, 0x00, 0x20}, - { 0xfe, 0x00, 0x20} -}; - static struct resource gart_resource = { .name = "GART", .flags = IORESOURCE_MEM, @@ -294,13 +282,13 @@ void __init early_gart_iommu_check(void) search_agp_bridge(&agp_aper_order, &valid_agp); fix = 0; - for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { + for (i = 0; amd_nb_bus_dev_ranges[i].dev_limit; i++) { int bus; int dev_base, dev_limit; - bus = bus_dev_ranges[i].bus; - dev_base = bus_dev_ranges[i].dev_base; - dev_limit = bus_dev_ranges[i].dev_limit; + bus = amd_nb_bus_dev_ranges[i].bus; + dev_base = amd_nb_bus_dev_ranges[i].dev_base; + dev_limit = amd_nb_bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) @@ -349,13 +337,13 @@ void __init early_gart_iommu_check(void) return; /* disable them all at first */ - for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { + for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { int bus; int dev_base, dev_limit; - bus = bus_dev_ranges[i].bus; - dev_base = bus_dev_ranges[i].dev_base; - dev_limit = bus_dev_ranges[i].dev_limit; + bus = amd_nb_bus_dev_ranges[i].bus; + dev_base = amd_nb_bus_dev_ranges[i].dev_base; + dev_limit = amd_nb_bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) @@ -390,14 +378,14 @@ int __init gart_iommu_hole_init(void) fix = 0; node = 0; - for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { + for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { int bus; int dev_base, dev_limit; u32 ctl; - bus = bus_dev_ranges[i].bus; - dev_base = bus_dev_ranges[i].dev_base; - dev_limit = bus_dev_ranges[i].dev_limit; + bus = amd_nb_bus_dev_ranges[i].bus; + dev_base = amd_nb_bus_dev_ranges[i].dev_base; + dev_limit = amd_nb_bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) @@ -505,7 +493,7 @@ out: } /* Fix up the north bridges */ - for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { + for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { int bus, dev_base, dev_limit; /* @@ -514,9 +502,9 @@ out: */ u32 ctl = DISTLBWALKPRB | aper_order << 1; - bus = bus_dev_ranges[i].bus; - dev_base = bus_dev_ranges[i].dev_base; - dev_limit = bus_dev_ranges[i].dev_limit; + bus = amd_nb_bus_dev_ranges[i].bus; + dev_base = amd_nb_bus_dev_ranges[i].dev_base; + dev_limit = amd_nb_bus_dev_ranges[i].dev_limit; for (slot = dev_base; slot < dev_limit; slot++) { if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00))) continue; diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index fc1e8fe07e5c..e27dffbbb1a7 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -378,6 +379,34 @@ static struct notifier_block __cpuinitdata amd_cpu_notifier = { .notifier_call = amd_cpu_notify, }; +static void __init pci_enable_pci_io_ecs(void) +{ +#ifdef CONFIG_AMD_NB + unsigned int i, n; + + for (n = i = 0; !n && amd_nb_bus_dev_ranges[i].dev_limit; ++i) { + u8 bus = amd_nb_bus_dev_ranges[i].bus; + u8 slot = amd_nb_bus_dev_ranges[i].dev_base; + u8 limit = amd_nb_bus_dev_ranges[i].dev_limit; + + for (; slot < limit; ++slot) { + u32 val = read_pci_config(bus, slot, 3, 0); + + if (!early_is_amd_nb(val)) + continue; + + val = read_pci_config(bus, slot, 3, 0x8c); + if (!(val & (ENABLE_CF8_EXT_CFG >> 32))) { + val |= ENABLE_CF8_EXT_CFG >> 32; + write_pci_config(bus, slot, 3, 0x8c, val); + } + ++n; + } + } + pr_info("Extended Config Space enabled on %u nodes\n", n); +#endif +} + static int __init pci_io_ecs_init(void) { int cpu; @@ -386,6 +415,10 @@ static int __init pci_io_ecs_init(void) if (boot_cpu_data.x86 < 0x10) return 0; + /* Try the PCI method first. */ + if (early_pci_allowed()) + pci_enable_pci_io_ecs(); + register_cpu_notifier(&amd_cpu_notifier); for_each_online_cpu(cpu) amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE, -- cgit v1.2.3 From 6e8af08dfa40b747002207d3ce8e8b43a050d99f Mon Sep 17 00:00:00 2001 From: "Narendra_K@Dell.com" Date: Tue, 14 Dec 2010 09:57:12 -0800 Subject: PCI: enable pci=bfsort by default on future Dell systems This patch enables pci=bfsort by default on future Dell systems. It reads SMBIOS type 0xB1 vendor specific record and sets pci=bfsort accordingly. Offset Name Length Value Description 04 Flags0 Word Varies Bits 9-10 - 10:9 = 00 Unknown - 10:9 = 01 Breadth First - 10:9 = 10 Depth First - 10:9 = 11 Reserved 1. Any time pci=bfsort has to be enabled on a system, we need to add the model number of the system to the white list. With this patch, that is not required. 2. Typically, model number has to be added to the white list when the system is under development. With this change, that is not required. Signed-off-by: Jordan Hargrave Signed-off-by: Narendra K Signed-off-by: Jesse Barnes --- arch/x86/pci/common.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index f7c8a399978c..5fe75026ecc2 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -22,6 +22,7 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | unsigned int pci_early_dump_regs; static int pci_bf_sort; +static int smbios_type_b1_flag; int pci_routeirq; int noioapicquirk; #ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS @@ -185,6 +186,39 @@ static int __devinit set_bf_sort(const struct dmi_system_id *d) return 0; } +static void __devinit read_dmi_type_b1(const struct dmi_header *dm, + void *private_data) +{ + u8 *d = (u8 *)dm + 4; + + if (dm->type != 0xB1) + return; + switch (((*(u32 *)d) >> 9) & 0x03) { + case 0x00: + printk(KERN_INFO "dmi type 0xB1 record - unknown flag\n"); + break; + case 0x01: /* set pci=bfsort */ + smbios_type_b1_flag = 1; + break; + case 0x02: /* do not set pci=bfsort */ + smbios_type_b1_flag = 2; + break; + default: + break; + } +} + +static int __devinit find_sort_method(const struct dmi_system_id *d) +{ + dmi_walk(read_dmi_type_b1, NULL); + + if (smbios_type_b1_flag == 1) { + set_bf_sort(d); + return 0; + } + return -1; +} + /* * Enable renumbering of PCI bus# ranges to reach all PCI busses (Cardbus) */ @@ -212,6 +246,13 @@ static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = { }, }, #endif /* __i386__ */ + { + .callback = find_sort_method, + .ident = "Dell System", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), + }, + }, { .callback = set_bf_sort, .ident = "Dell PowerEdge 1950", -- cgit v1.2.3 From 30e664afb5cb597dd6f7651e6d116e10b9741084 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 6 Jan 2011 10:12:24 -0700 Subject: x86/PCI: don't use native Broadcom CNB20LE driver when ACPI is available The broadcom_bus.c quirk was written (without benefit of documentation) to support PCI hotplug on an old system that doesn't have ACPI. As such, we should only use it when the system doesn't have ACPI. If the system does have ACPI and we need the host bridge description, we should get it from the ACPI _CRS method. On machines older than 2008, we currently ignore _CRS, but that doesn't mean we should use broadcom_bus.c. It means we should either (a) do what we've done in the past and assume everything in the PCI gap is routed to bus 0 (so hotplug may not work), or (b) arrange to use _CRS. This patch does (a). Reference: https://bugzilla.redhat.com/show_bug.cgi?id=665109 Acked-by: Ira W. Snyder Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/broadcom_bus.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86/pci') diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c index 0846a5bbbfbd..ab8269b0da29 100644 --- a/arch/x86/pci/broadcom_bus.c +++ b/arch/x86/pci/broadcom_bus.c @@ -9,6 +9,7 @@ * option) any later version. */ +#include #include #include #include @@ -25,12 +26,14 @@ static void __devinit cnb20le_res(struct pci_dev *dev) u8 fbus, lbus; int i; +#ifdef CONFIG_ACPI /* - * The x86_pci_root_bus_res_quirks() function already refuses to use - * this information if ACPI _CRS was used. Therefore, we don't bother - * checking if ACPI is enabled, and just generate the information - * for both the ACPI _CRS and no ACPI cases. + * We should get host bridge information from ACPI unless the BIOS + * doesn't support it. */ + if (acpi_os_get_root_pointer()) + return; +#endif info = &pci_root_info[pci_root_num]; pci_root_num++; -- cgit v1.2.3