From 4bb9c5c02153dfc89a6c73a6f32091413805ad7d Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Thu, 12 Mar 2009 17:45:27 -0700 Subject: VM, x86, PAT: Change is_linear_pfn_mapping to not use vm_pgoff Impact: fix false positive PAT warnings - also fix VirtalBox hang Use of vma->vm_pgoff to identify the pfnmaps that are fully mapped at mmap time is broken. vm_pgoff is set by generic mmap code even for cases where drivers are setting up the mappings at the fault time. The problem was originally reported here: http://marc.info/?l=linux-kernel&m=123383810628583&w=2 Change is_linear_pfn_mapping logic to overload VM_INSERTPAGE flag along with VM_PFNMAP to mean full PFNMAP setup at mmap time. Problem also tracked at: http://bugzilla.kernel.org/show_bug.cgi?id=12800 Reported-by: Thomas Hellstrom Tested-by: Frans Pop Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha @intel.com> Cc: Nick Piggin Cc: "ebiederm@xmission.com" Cc: # only for 2.6.29.1, not .28 LKML-Reference: <20090313004527.GA7176@linux-os.sc.intel.com> Signed-off-by: Ingo Molnar --- include/linux/mm.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 065cdf8c09fb..3daa05feed9f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -98,7 +98,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ -#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ +#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it. Refer note in VM_PFNMAP_AT_MMAP below */ #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ @@ -126,6 +126,17 @@ extern unsigned int kobjsize(const void *objp); */ #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) +/* + * pfnmap vmas that are fully mapped at mmap time (not mapped on fault). + * Used by x86 PAT to identify such PFNMAP mappings and optimize their handling. + * Note VM_INSERTPAGE flag is overloaded here. i.e, + * VM_INSERTPAGE && !VM_PFNMAP implies + * The vma has had "vm_insert_page()" done on it + * VM_INSERTPAGE && VM_PFNMAP implies + * The vma is PFNMAP with full mapping at mmap time + */ +#define VM_PFNMAP_AT_MMAP (VM_INSERTPAGE | VM_PFNMAP) + /* * mapping from the currently active vm_flags protection bits (the * low four bits) to a page protection mask.. @@ -145,7 +156,7 @@ extern pgprot_t protection_map[16]; */ static inline int is_linear_pfn_mapping(struct vm_area_struct *vma) { - return ((vma->vm_flags & VM_PFNMAP) && vma->vm_pgoff); + return ((vma->vm_flags & VM_PFNMAP_AT_MMAP) == VM_PFNMAP_AT_MMAP); } static inline int is_pfn_mapping(struct vm_area_struct *vma) -- cgit v1.2.3 From 895791dac6946d535991edd11341046f8e85ea77 Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Fri, 13 Mar 2009 16:35:44 -0700 Subject: VM, x86, PAT: add a new vm flag to track full pfnmap at mmap Impact: cleanup Add a new vm flag VM_PFN_AT_MMAP to identify a PFNMAP that is fully mapped with remap_pfn_range. Patch removes the overloading of VM_INSERTPAGE from the earlier patch. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Acked-by: Nick Piggin LKML-Reference: <20090313233543.GA19909@linux-os.sc.intel.com> Signed-off-by: Ingo Molnar --- include/linux/mm.h | 16 +++------------- mm/memory.c | 4 ++-- 2 files changed, 5 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3daa05feed9f..b1ea37fc7a24 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -98,12 +98,13 @@ extern unsigned int kobjsize(const void *objp); #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ -#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it. Refer note in VM_PFNMAP_AT_MMAP below */ +#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ #define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ +#define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */ #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS @@ -126,17 +127,6 @@ extern unsigned int kobjsize(const void *objp); */ #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) -/* - * pfnmap vmas that are fully mapped at mmap time (not mapped on fault). - * Used by x86 PAT to identify such PFNMAP mappings and optimize their handling. - * Note VM_INSERTPAGE flag is overloaded here. i.e, - * VM_INSERTPAGE && !VM_PFNMAP implies - * The vma has had "vm_insert_page()" done on it - * VM_INSERTPAGE && VM_PFNMAP implies - * The vma is PFNMAP with full mapping at mmap time - */ -#define VM_PFNMAP_AT_MMAP (VM_INSERTPAGE | VM_PFNMAP) - /* * mapping from the currently active vm_flags protection bits (the * low four bits) to a page protection mask.. @@ -156,7 +146,7 @@ extern pgprot_t protection_map[16]; */ static inline int is_linear_pfn_mapping(struct vm_area_struct *vma) { - return ((vma->vm_flags & VM_PFNMAP_AT_MMAP) == VM_PFNMAP_AT_MMAP); + return (vma->vm_flags & VM_PFN_AT_MMAP); } static inline int is_pfn_mapping(struct vm_area_struct *vma) diff --git a/mm/memory.c b/mm/memory.c index d7df5babcba9..2032ad2fc34b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1667,7 +1667,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, */ if (addr == vma->vm_start && end == vma->vm_end) { vma->vm_pgoff = pfn; - vma->vm_flags |= VM_PFNMAP_AT_MMAP; + vma->vm_flags |= VM_PFN_AT_MMAP; } else if (is_cow_mapping(vma->vm_flags)) return -EINVAL; @@ -1680,7 +1680,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, * needed from higher level routine calling unmap_vmas */ vma->vm_flags &= ~(VM_IO | VM_RESERVED | VM_PFNMAP); - vma->vm_flags &= ~VM_PFNMAP_AT_MMAP; + vma->vm_flags &= ~VM_PFN_AT_MMAP; return -EINVAL; } -- cgit v1.2.3 From 9d783ba042771284fb4ee5013c3d94220755ae7f Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:04:55 -0700 Subject: x86, x2apic: enable fault handling for intr-remapping Impact: interface augmentation (not yet used) Enable fault handling flow for intr-remapping aswell. Fault handling code now shared by both dma-remapping and intr-remapping. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/msidef.h | 1 + arch/x86/kernel/apic/io_apic.c | 9 +++- arch/x86/kernel/apic/probe_64.c | 9 ++++ drivers/pci/dmar.c | 102 +++++++++++++++++++++++++++++++++------- drivers/pci/intel-iommu.c | 3 +- drivers/pci/intr_remapping.c | 2 +- include/linux/dmar.h | 5 +- include/linux/intel-iommu.h | 4 +- 8 files changed, 107 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/msidef.h b/arch/x86/include/asm/msidef.h index 6706b3006f13..4cc48af23fef 100644 --- a/arch/x86/include/asm/msidef.h +++ b/arch/x86/include/asm/msidef.h @@ -47,6 +47,7 @@ #define MSI_ADDR_DEST_ID_MASK 0x00ffff0 #define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ MSI_ADDR_DEST_ID_MASK) +#define MSI_ADDR_EXT_DEST_ID(dest) ((dest) & 0xffffff00) #define MSI_ADDR_IR_EXT_INT (1 << 4) #define MSI_ADDR_IR_SHV (1 << 3) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 00e6071cefc4..b18a7734d689 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3294,7 +3294,12 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms } else #endif { - msg->address_hi = MSI_ADDR_BASE_HI; + if (x2apic_enabled()) + msg->address_hi = MSI_ADDR_BASE_HI | + MSI_ADDR_EXT_DEST_ID(dest); + else + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = MSI_ADDR_BASE_LO | ((apic->irq_dest_mode == 0) ? @@ -3528,7 +3533,7 @@ void arch_teardown_msi_irq(unsigned int irq) destroy_irq(irq); } -#ifdef CONFIG_DMAR +#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) #ifdef CONFIG_SMP static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 8d7748efe6a8..8297c2b8ed20 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -68,6 +68,15 @@ void __init default_setup_apic_routing(void) apic = &apic_physflat; printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } + +#ifdef CONFIG_X86_X2APIC + /* + * Now that apic routing model is selected, configure the + * fault handling for intr remapping. + */ + if (intr_remapping_enabled) + enable_drhd_fault_handling(); +#endif } /* Same for both flat and physical. */ diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 75d34bf2db50..bb4ed985f9c7 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -511,6 +511,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) return -ENOMEM; iommu->seq_id = iommu_allocated++; + sprintf (iommu->name, "dmar%d", iommu->seq_id); iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE); if (!iommu->reg) { @@ -817,7 +818,13 @@ int dmar_enable_qi(struct intel_iommu *iommu) /* iommu interrupt handling. Most stuff are MSI-like. */ -static const char *fault_reason_strings[] = +enum faulttype { + DMA_REMAP, + INTR_REMAP, + UNKNOWN, +}; + +static const char *dma_remap_fault_reasons[] = { "Software", "Present bit in root entry is clear", @@ -833,14 +840,33 @@ static const char *fault_reason_strings[] = "non-zero reserved fields in CTP", "non-zero reserved fields in PTE", }; + +static const char *intr_remap_fault_reasons[] = +{ + "Detected reserved fields in the decoded interrupt-remapped request", + "Interrupt index exceeded the interrupt-remapping table size", + "Present field in the IRTE entry is clear", + "Error accessing interrupt-remapping table pointed by IRTA_REG", + "Detected reserved fields in the IRTE entry", + "Blocked a compatibility format interrupt request", + "Blocked an interrupt request due to source-id verification failure", +}; + #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1) -const char *dmar_get_fault_reason(u8 fault_reason) +const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type) { - if (fault_reason > MAX_FAULT_REASON_IDX) + if (fault_reason >= 0x20 && (fault_reason <= 0x20 + + ARRAY_SIZE(intr_remap_fault_reasons))) { + *fault_type = INTR_REMAP; + return intr_remap_fault_reasons[fault_reason - 0x20]; + } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) { + *fault_type = DMA_REMAP; + return dma_remap_fault_reasons[fault_reason]; + } else { + *fault_type = UNKNOWN; return "Unknown"; - else - return fault_reason_strings[fault_reason]; + } } void dmar_msi_unmask(unsigned int irq) @@ -897,16 +923,25 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type, u8 fault_reason, u16 source_id, unsigned long long addr) { const char *reason; + int fault_type; - reason = dmar_get_fault_reason(fault_reason); + reason = dmar_get_fault_reason(fault_reason, &fault_type); - printk(KERN_ERR - "DMAR:[%s] Request device [%02x:%02x.%d] " - "fault addr %llx \n" - "DMAR:[fault reason %02d] %s\n", - (type ? "DMA Read" : "DMA Write"), - (source_id >> 8), PCI_SLOT(source_id & 0xFF), - PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); + if (fault_type == INTR_REMAP) + printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] " + "fault index %llx\n" + "INTR-REMAP:[fault reason %02d] %s\n", + (source_id >> 8), PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr >> 48, + fault_reason, reason); + else + printk(KERN_ERR + "DMAR:[%s] Request device [%02x:%02x.%d] " + "fault addr %llx \n" + "DMAR:[fault reason %02d] %s\n", + (type ? "DMA Read" : "DMA Write"), + (source_id >> 8), PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); return 0; } @@ -920,10 +955,13 @@ static irqreturn_t dmar_fault(int irq, void *dev_id) spin_lock_irqsave(&iommu->register_lock, flag); fault_status = readl(iommu->reg + DMAR_FSTS_REG); + if (fault_status) + printk(KERN_ERR "DRHD: handling fault status reg %x\n", + fault_status); /* TBD: ignore advanced fault log currently */ if (!(fault_status & DMA_FSTS_PPF)) - goto clear_overflow; + goto clear_rest; fault_index = dma_fsts_fault_record_index(fault_status); reg = cap_fault_reg_offset(iommu->cap); @@ -964,11 +1002,10 @@ static irqreturn_t dmar_fault(int irq, void *dev_id) fault_index = 0; spin_lock_irqsave(&iommu->register_lock, flag); } -clear_overflow: - /* clear primary fault overflow */ +clear_rest: + /* clear all the other faults */ fault_status = readl(iommu->reg + DMAR_FSTS_REG); - if (fault_status & DMA_FSTS_PFO) - writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG); + writel(fault_status, iommu->reg + DMAR_FSTS_REG); spin_unlock_irqrestore(&iommu->register_lock, flag); return IRQ_HANDLED; @@ -978,6 +1015,12 @@ int dmar_set_interrupt(struct intel_iommu *iommu) { int irq, ret; + /* + * Check if the fault interrupt is already initialized. + */ + if (iommu->irq) + return 0; + irq = create_irq(); if (!irq) { printk(KERN_ERR "IOMMU: no free vectors\n"); @@ -1003,3 +1046,26 @@ int dmar_set_interrupt(struct intel_iommu *iommu) printk(KERN_ERR "IOMMU: can't request irq\n"); return ret; } + +int __init enable_drhd_fault_handling(void) +{ + struct dmar_drhd_unit *drhd; + + /* + * Enable fault control interrupt. + */ + for_each_drhd_unit(drhd) { + int ret; + struct intel_iommu *iommu = drhd->iommu; + ret = dmar_set_interrupt(iommu); + + if (ret) { + printk(KERN_ERR "DRHD %Lx: failed to enable fault, " + " interrupt, ret %d\n", + (unsigned long long)drhd->reg_base_addr, ret); + return -1; + } + } + + return 0; +} diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 4a4ab651b709..25fc1df486bb 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1799,7 +1799,7 @@ static int __init init_dmars(void) struct dmar_rmrr_unit *rmrr; struct pci_dev *pdev; struct intel_iommu *iommu; - int i, ret, unit = 0; + int i, ret; /* * for each drhd @@ -1921,7 +1921,6 @@ static int __init init_dmars(void) if (drhd->ignored) continue; iommu = drhd->iommu; - sprintf (iommu->name, "dmar%d", unit++); iommu_flush_write_buffer(iommu); diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 5ffa65fffb6a..c38e3f437a81 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -308,7 +308,7 @@ int modify_irte(int irq, struct irte *irte_modified) index = irq_iommu->irte_index + irq_iommu->sub_handle; irte = &iommu->ir_table->base[index]; - set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1)); + set_64bit((unsigned long *)irte, irte_modified->low); __iommu_flush_cache(iommu, irte, sizeof(*irte)); rc = qi_flush_iec(iommu, index, 0); diff --git a/include/linux/dmar.h b/include/linux/dmar.h index f28440784cf0..c7768330c11d 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -49,6 +49,7 @@ extern int dmar_dev_scope_init(void); /* Intel IOMMU detection */ extern void detect_intel_iommu(void); +extern int enable_drhd_fault_handling(void); extern int parse_ioapics_under_ir(void); @@ -116,9 +117,6 @@ extern struct intel_iommu *map_ioapic_to_ir(int apic); #define intr_remapping_enabled (0) #endif -#ifdef CONFIG_DMAR -extern const char *dmar_get_fault_reason(u8 fault_reason); - /* Can't use the common MSI interrupt functions * since DMAR is not a pci device */ @@ -129,6 +127,7 @@ extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); extern int arch_setup_dmar_msi(unsigned int irq); +#ifdef CONFIG_DMAR extern int iommu_detected, no_iommu; extern struct list_head dmar_rmrr_units; struct dmar_rmrr_unit { diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index d2e3cbfba14f..a9563840644b 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -292,6 +292,8 @@ struct intel_iommu { spinlock_t register_lock; /* protect register handling */ int seq_id; /* sequence id of the iommu */ int agaw; /* agaw of this iommu */ + unsigned int irq; + unsigned char name[13]; /* Device Name */ #ifdef CONFIG_DMAR unsigned long *domain_ids; /* bitmap of domains */ @@ -299,8 +301,6 @@ struct intel_iommu { spinlock_t lock; /* protect context, domain ids */ struct root_entry *root_entry; /* virtual address */ - unsigned int irq; - unsigned char name[7]; /* Device Name */ struct iommu_flush flush; #endif struct q_inval *qi; /* Queued invalidation info */ -- cgit v1.2.3 From eba67e5da6e971993b2899d2cdf459ce77d3dbc5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:04:56 -0700 Subject: x86, dmar: routines for disabling queued invalidation and intr remapping Impact: new interfaces (not yet used) Routines for disabling queued invalidation and interrupt remapping. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- drivers/pci/dmar.c | 36 ++++++++++++++++++++++++++++++++++++ drivers/pci/intr_remapping.c | 27 +++++++++++++++++++++++++++ include/linux/intel-iommu.h | 1 + 3 files changed, 64 insertions(+) (limited to 'include') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index bb4ed985f9c7..932e5e3930fc 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -753,6 +753,42 @@ int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, return qi_submit_sync(&desc, iommu); } +/* + * Disable Queued Invalidation interface. + */ +void dmar_disable_qi(struct intel_iommu *iommu) +{ + unsigned long flags; + u32 sts; + cycles_t start_time = get_cycles(); + + if (!ecap_qis(iommu->ecap)) + return; + + spin_lock_irqsave(&iommu->register_lock, flags); + + sts = dmar_readq(iommu->reg + DMAR_GSTS_REG); + if (!(sts & DMA_GSTS_QIES)) + goto end; + + /* + * Give a chance to HW to complete the pending invalidation requests. + */ + while ((readl(iommu->reg + DMAR_IQT_REG) != + readl(iommu->reg + DMAR_IQH_REG)) && + (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time))) + cpu_relax(); + + iommu->gcmd &= ~DMA_GCMD_QIE; + + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); + + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, + !(sts & DMA_GSTS_QIES), sts); +end: + spin_unlock_irqrestore(&iommu->register_lock, flags); +} + /* * Enable Queued Invalidation interface. This is a must to support * interrupt-remapping. Also used by DMA-remapping, which replaces diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index c38e3f437a81..0d202d73a1ac 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -467,6 +467,33 @@ static int setup_intr_remapping(struct intel_iommu *iommu, int mode) return 0; } +/* + * Disable Interrupt Remapping. + */ +static void disable_intr_remapping(struct intel_iommu *iommu) +{ + unsigned long flags; + u32 sts; + + if (!ecap_ir_support(iommu->ecap)) + return; + + spin_lock_irqsave(&iommu->register_lock, flags); + + sts = dmar_readq(iommu->reg + DMAR_GSTS_REG); + if (!(sts & DMA_GSTS_IRES)) + goto end; + + iommu->gcmd &= ~DMA_GCMD_IRE; + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); + + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, !(sts & DMA_GSTS_IRES), sts); + +end: + spin_unlock_irqrestore(&iommu->register_lock, flags); +} + int __init enable_intr_remapping(int eim) { struct dmar_drhd_unit *drhd; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index a9563840644b..78c1262e8704 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -321,6 +321,7 @@ extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); extern int alloc_iommu(struct dmar_drhd_unit *drhd); extern void free_iommu(struct intel_iommu *iommu); extern int dmar_enable_qi(struct intel_iommu *iommu); +extern void dmar_disable_qi(struct intel_iommu *iommu); extern void qi_global_iec(struct intel_iommu *iommu); extern int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, -- cgit v1.2.3 From 1531a6a6b81a4e6f9eec9a5608758a6ea14b96e0 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:04:57 -0700 Subject: x86, dmar: start with sane state while enabling dma and interrupt-remapping Impact: cleanup/sanitization Start from a sane state while enabling dma and interrupt-remapping, by clearing the previous recorded faults and disabling previously enabled queued invalidation and interrupt-remapping. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- drivers/pci/dmar.c | 5 +---- drivers/pci/intel-iommu.c | 29 +++++++++++++++++++++++++++++ drivers/pci/intr_remapping.c | 17 +++++++++++++++++ include/linux/dmar.h | 2 ++ 4 files changed, 49 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 932e5e3930fc..f1805002e436 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -982,7 +982,7 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type, } #define PRIMARY_FAULT_REG_LEN (16) -static irqreturn_t dmar_fault(int irq, void *dev_id) +irqreturn_t dmar_fault(int irq, void *dev_id) { struct intel_iommu *iommu = dev_id; int reg, fault_index; @@ -1074,9 +1074,6 @@ int dmar_set_interrupt(struct intel_iommu *iommu) return 0; } - /* Force fault register is cleared */ - dmar_fault(irq, iommu); - ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu); if (ret) printk(KERN_ERR "IOMMU: can't request irq\n"); diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 25fc1df486bb..ef167b8b047d 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1855,11 +1855,40 @@ static int __init init_dmars(void) } } + /* + * Start from the sane iommu hardware state. + */ for_each_drhd_unit(drhd) { if (drhd->ignored) continue; iommu = drhd->iommu; + + /* + * If the queued invalidation is already initialized by us + * (for example, while enabling interrupt-remapping) then + * we got the things already rolling from a sane state. + */ + if (iommu->qi) + continue; + + /* + * Clear any previous faults. + */ + dmar_fault(-1, iommu); + /* + * Disable queued invalidation if supported and already enabled + * before OS handover. + */ + dmar_disable_qi(iommu); + } + + for_each_drhd_unit(drhd) { + if (drhd->ignored) + continue; + + iommu = drhd->iommu; + if (dmar_enable_qi(iommu)) { /* * Queued Invalidate not enabled, use Register Based diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 0d202d73a1ac..a84686b2478b 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -499,6 +499,23 @@ int __init enable_intr_remapping(int eim) struct dmar_drhd_unit *drhd; int setup = 0; + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu = drhd->iommu; + + /* + * Clear previous faults. + */ + dmar_fault(-1, iommu); + + /* + * Disable intr remapping and queued invalidation, if already + * enabled prior to OS handover. + */ + disable_intr_remapping(iommu); + + dmar_disable_qi(iommu); + } + /* * check for the Interrupt-remapping support */ diff --git a/include/linux/dmar.h b/include/linux/dmar.h index c7768330c11d..8a035aec14a9 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -24,6 +24,7 @@ #include #include #include +#include #if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) struct intel_iommu; @@ -125,6 +126,7 @@ extern void dmar_msi_mask(unsigned int irq); extern void dmar_msi_read(int irq, struct msi_msg *msg); extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); +extern irqreturn_t dmar_fault(int irq, void *dev_id); extern int arch_setup_dmar_msi(unsigned int irq); #ifdef CONFIG_DMAR -- cgit v1.2.3 From 29b61be65a33c95564fa82e7e8d60d97adb68ea8 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 16 Mar 2009 17:05:02 -0700 Subject: x86, x2apic: cleanup ifdef CONFIG_INTR_REMAP in io_apic code Impact: cleanup Clean up #ifdefs and replace them with helper functions. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 44 +++++++++------------------------------- arch/x86/kernel/apic/probe_64.c | 2 -- include/linux/dmar.h | 45 ++++++++++++++++++++++++++++++++++------- 3 files changed, 48 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e074eac5bd35..cf27795c641c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -554,16 +554,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq apic = entry->apic; pin = entry->pin; -#ifdef CONFIG_INTR_REMAP /* * With interrupt-remapping, destination information comes * from interrupt-remapping table entry. */ if (!irq_remapped(irq)) io_apic_write(apic, 0x11 + pin*2, dest); -#else - io_apic_write(apic, 0x11 + pin*2, dest); -#endif reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; @@ -1419,9 +1415,8 @@ void __setup_vector_irq(int cpu) } static struct irq_chip ioapic_chip; -#ifdef CONFIG_INTR_REMAP static struct irq_chip ir_ioapic_chip; -#endif +static struct irq_chip msi_ir_chip; #define IOAPIC_AUTO -1 #define IOAPIC_EDGE 0 @@ -1460,7 +1455,6 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t else desc->status &= ~IRQ_LEVEL; -#ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { desc->status |= IRQ_MOVE_PCNTXT; if (trigger) @@ -1472,7 +1466,7 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t handle_edge_irq, "edge"); return; } -#endif + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) set_irq_chip_and_handler_name(irq, &ioapic_chip, @@ -1493,7 +1487,6 @@ int setup_ioapic_entry(int apic_id, int irq, */ memset(entry,0,sizeof(*entry)); -#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) { struct intel_iommu *iommu = map_ioapic_to_ir(apic_id); struct irte irte; @@ -1535,9 +1528,7 @@ int setup_ioapic_entry(int apic_id, int irq, * irq handler will do the explicit EOI to the io-apic. */ ir_entry->vector = pin; - } else -#endif - { + } else { entry->delivery_mode = apic->irq_delivery_mode; entry->dest_mode = apic->irq_dest_mode; entry->dest = destination; @@ -1662,10 +1653,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, { struct IO_APIC_route_entry entry; -#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) return; -#endif memset(&entry, 0, sizeof(entry)); @@ -2395,6 +2384,11 @@ static void set_ir_ioapic_affinity_irq(unsigned int irq, set_ir_ioapic_affinity_irq_desc(desc, mask); } +#else +static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, + const struct cpumask *mask) +{ +} #endif asmlinkage void smp_irq_move_cleanup_interrupt(void) @@ -2883,10 +2877,8 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { -#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) panic("BIOS bug: timer not connected to IO-APIC"); -#endif pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -2922,10 +2914,8 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } -#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) panic("timer doesn't work through Interrupt-remapped IO-APIC"); -#endif local_irq_disable(); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) @@ -3219,9 +3209,7 @@ void destroy_irq(unsigned int irq) if (desc) desc->chip_data = cfg; -#ifdef CONFIG_INTR_REMAP free_irte(irq); -#endif spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq, cfg); spin_unlock_irqrestore(&vector_lock, flags); @@ -3247,7 +3235,6 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); -#ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { struct irte irte; int ir_index; @@ -3273,9 +3260,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms MSI_ADDR_IR_SHV | MSI_ADDR_IR_INDEX1(ir_index) | MSI_ADDR_IR_INDEX2(ir_index); - } else -#endif - { + } else { if (x2apic_enabled()) msg->address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest); @@ -3392,6 +3377,7 @@ static struct irq_chip msi_ir_chip = { #endif .retrigger = ioapic_retrigger_irq, }; +#endif /* * Map the PCI dev to the corresponding remapping hardware unit @@ -3419,7 +3405,6 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) } return index; } -#endif static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) { @@ -3433,7 +3418,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) set_irq_msi(irq, msidesc); write_msi_msg(irq, &msg); -#ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { struct irq_desc *desc = irq_to_desc(irq); /* @@ -3442,7 +3426,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) desc->status |= IRQ_MOVE_PCNTXT; set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); } else -#endif set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); @@ -3456,11 +3439,8 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) int ret, sub_handle; struct msi_desc *msidesc; unsigned int irq_want; - -#ifdef CONFIG_INTR_REMAP struct intel_iommu *iommu = 0; int index = 0; -#endif irq_want = nr_irqs_gsi; sub_handle = 0; @@ -3469,7 +3449,6 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) if (irq == 0) return -1; irq_want = irq + 1; -#ifdef CONFIG_INTR_REMAP if (!intr_remapping_enabled) goto no_ir; @@ -3497,7 +3476,6 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) set_irte_irq(irq, iommu, index, sub_handle); } no_ir: -#endif ret = setup_msi_irq(dev, msidesc, irq); if (ret < 0) goto error; @@ -4032,11 +4010,9 @@ void __init setup_ioapic_dest(void) else mask = apic->target_cpus(); -#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) set_ir_ioapic_affinity_irq_desc(desc, mask); else -#endif set_ioapic_affinity_irq_desc(desc, mask); } diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 8297c2b8ed20..1783652bb0e5 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -69,14 +69,12 @@ void __init default_setup_apic_routing(void) printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } -#ifdef CONFIG_X86_X2APIC /* * Now that apic routing model is selected, configure the * fault handling for intr remapping. */ if (intr_remapping_enabled) enable_drhd_fault_handling(); -#endif } /* Same for both flat and physical. */ diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 8a035aec14a9..2f3427468956 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -26,9 +26,8 @@ #include #include -#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) struct intel_iommu; - +#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) struct dmar_drhd_unit { struct list_head list; /* list of drhd units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -52,7 +51,6 @@ extern int dmar_dev_scope_init(void); extern void detect_intel_iommu(void); extern int enable_drhd_fault_handling(void); - extern int parse_ioapics_under_ir(void); extern int alloc_iommu(struct dmar_drhd_unit *); #else @@ -65,12 +63,12 @@ static inline int dmar_table_init(void) { return -ENODEV; } +static inline int enable_drhd_fault_handling(void) +{ + return -1; +} #endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */ -#ifdef CONFIG_INTR_REMAP -extern int intr_remapping_enabled; -extern int enable_intr_remapping(int); - struct irte { union { struct { @@ -99,6 +97,10 @@ struct irte { __u64 high; }; }; +#ifdef CONFIG_INTR_REMAP +extern int intr_remapping_enabled; +extern int enable_intr_remapping(int); + extern int get_irte(int irq, struct irte *entry); extern int modify_irte(int irq, struct irte *irte_modified); extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count); @@ -113,6 +115,35 @@ extern int irq_remapped(int irq); extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); extern struct intel_iommu *map_ioapic_to_ir(int apic); #else +static inline int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) +{ + return -1; +} +static inline int modify_irte(int irq, struct irte *irte_modified) +{ + return -1; +} +static inline int free_irte(int irq) +{ + return -1; +} +static inline int map_irq_to_irte_handle(int irq, u16 *sub_handle) +{ + return -1; +} +static inline int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, + u16 sub_handle) +{ + return -1; +} +static inline struct intel_iommu *map_dev_to_ir(struct pci_dev *dev) +{ + return NULL; +} +static inline struct intel_iommu *map_ioapic_to_ir(int apic) +{ + return NULL; +} #define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) #define intr_remapping_enabled (0) -- cgit v1.2.3