diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-08 06:33:44 +0900 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-08 06:33:44 +0900 |
commit | c0703c12ef6744b6d2565ec67a15aaf25d534abd (patch) | |
tree | aff6bcf3874dba969805b057f0ceb3ea0b0b2112 | |
parent | 0e51793e162ca432fc5f04178cf82b80a92c2659 (diff) | |
parent | 009487258399cb4f431992919fa0f386d1b74ceb (diff) | |
download | linux-c0703c12ef6744b6d2565ec67a15aaf25d534abd.tar.bz2 |
Merge tag 'iommu-updates-v3.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull IOMMU updates from Joerg Roedel:
"This time the IOMMU updates contain a bunch of fixes and cleanups to
various IOMMU drivers and the DMA debug code. New features are the
code for IRQ remapping support with the AMD IOMMU (preperation for
that was already merged in the last release) and a debugfs interface
to export some statistics in the NVidia Tegra IOMMU driver."
* tag 'iommu-updates-v3.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (27 commits)
iommu/amd: Remove obsolete comment line
dma-debug: Remove local BUS_NOTIFY_UNBOUND_DRIVER define
iommu/amd: Fix possible use after free in get_irq_table()
iommu/amd: Report irq remapping through IOMMU-API
iommu/amd: Print message to system log when irq remapping is enabled
iommu/irq: Use amd_iommu_irq_ops if supported
iommu/amd: Make sure irq remapping still works on dma init failure
iommu/amd: Add initialization routines for AMD interrupt remapping
iommu/amd: Add call-back routine for HPET MSI
iommu/amd: Implement MSI routines for interrupt remapping
iommu/amd: Add IOAPIC remapping routines
iommu/amd: Add routines to manage irq remapping tables
iommu/amd: Add IRTE invalidation routine
iommu/amd: Make sure IOMMU is not considered to translate itself
iommu/amd: Split device table initialization into irq and dma part
iommu/amd: Check if IOAPIC information is correct
iommu/amd: Allocate data structures to keep track of irq remapping tables
iommu/amd: Add slab-cache for irq remapping tables
iommu/amd: Keep track of HPET and IOAPIC device ids
iommu/amd: Fix features reporting
...
-rw-r--r-- | arch/arm/mach-tegra/include/mach/smmu.h | 63 | ||||
-rw-r--r-- | drivers/iommu/Kconfig | 2 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu.c | 514 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_init.c | 253 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_proto.h | 8 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_types.h | 59 | ||||
-rw-r--r-- | drivers/iommu/exynos-iommu.c | 3 | ||||
-rw-r--r-- | drivers/iommu/intel-iommu.c | 4 | ||||
-rw-r--r-- | drivers/iommu/irq_remapping.c | 5 | ||||
-rw-r--r-- | drivers/iommu/irq_remapping.h | 6 | ||||
-rw-r--r-- | drivers/iommu/tegra-smmu.c | 261 | ||||
-rw-r--r-- | lib/dma-debug.c | 5 |
12 files changed, 1071 insertions, 112 deletions
diff --git a/arch/arm/mach-tegra/include/mach/smmu.h b/arch/arm/mach-tegra/include/mach/smmu.h deleted file mode 100644 index dad403a9cf00..000000000000 --- a/arch/arm/mach-tegra/include/mach/smmu.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * IOMMU API for SMMU in Tegra30 - * - * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#ifndef MACH_SMMU_H -#define MACH_SMMU_H - -enum smmu_hwgrp { - HWGRP_AFI, - HWGRP_AVPC, - HWGRP_DC, - HWGRP_DCB, - HWGRP_EPP, - HWGRP_G2, - HWGRP_HC, - HWGRP_HDA, - HWGRP_ISP, - HWGRP_MPE, - HWGRP_NV, - HWGRP_NV2, - HWGRP_PPCS, - HWGRP_SATA, - HWGRP_VDE, - HWGRP_VI, - - HWGRP_COUNT, - - HWGRP_END = ~0, -}; - -#define HWG_AFI (1 << HWGRP_AFI) -#define HWG_AVPC (1 << HWGRP_AVPC) -#define HWG_DC (1 << HWGRP_DC) -#define HWG_DCB (1 << HWGRP_DCB) -#define HWG_EPP (1 << HWGRP_EPP) -#define HWG_G2 (1 << HWGRP_G2) -#define HWG_HC (1 << HWGRP_HC) -#define HWG_HDA (1 << HWGRP_HDA) -#define HWG_ISP (1 << HWGRP_ISP) -#define HWG_MPE (1 << HWGRP_MPE) -#define HWG_NV (1 << HWGRP_NV) -#define HWG_NV2 (1 << HWGRP_NV2) -#define HWG_PPCS (1 << HWGRP_PPCS) -#define HWG_SATA (1 << HWGRP_SATA) -#define HWG_VDE (1 << HWGRP_VDE) -#define HWG_VI (1 << HWGRP_VI) - -#endif /* MACH_SMMU_H */ diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 9f69b561f5db..e39f9dbf297b 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -42,7 +42,7 @@ config AMD_IOMMU select PCI_PRI select PCI_PASID select IOMMU_API - depends on X86_64 && PCI && ACPI + depends on X86_64 && PCI && ACPI && X86_IO_APIC ---help--- With this option you can enable support for AMD IOMMU hardware in your system. An IOMMU is a hardware component which provides diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index e89daf1b21b4..55074cba20eb 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -31,6 +31,12 @@ #include <linux/amd-iommu.h> #include <linux/notifier.h> #include <linux/export.h> +#include <linux/irq.h> +#include <linux/msi.h> +#include <asm/irq_remapping.h> +#include <asm/io_apic.h> +#include <asm/apic.h> +#include <asm/hw_irq.h> #include <asm/msidef.h> #include <asm/proto.h> #include <asm/iommu.h> @@ -39,6 +45,7 @@ #include "amd_iommu_proto.h" #include "amd_iommu_types.h" +#include "irq_remapping.h" #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) @@ -72,6 +79,9 @@ static DEFINE_SPINLOCK(iommu_pd_list_lock); static LIST_HEAD(dev_data_list); static DEFINE_SPINLOCK(dev_data_list_lock); +LIST_HEAD(ioapic_map); +LIST_HEAD(hpet_map); + /* * Domain for untranslated devices - only allocated * if iommu=pt passed on kernel cmd line. @@ -92,6 +102,8 @@ struct iommu_cmd { u32 data[4]; }; +struct kmem_cache *amd_iommu_irq_cache; + static void update_domain(struct protection_domain *domain); static int __init alloc_passthrough_domain(void); @@ -686,7 +698,7 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu) /* * Release iommu->lock because ppr-handling might need to - * re-aquire it + * re-acquire it */ spin_unlock_irqrestore(&iommu->lock, flags); @@ -804,7 +816,7 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); if (s) /* size bit - we flush more than one 4kb page */ cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; - if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ + if (pde) /* PDE bit - we want to flush everything, not only the PTEs */ cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; } @@ -899,6 +911,13 @@ static void build_inv_all(struct iommu_cmd *cmd) CMD_SET_TYPE(cmd, CMD_INV_ALL); } +static void build_inv_irt(struct iommu_cmd *cmd, u16 devid) +{ + memset(cmd, 0, sizeof(*cmd)); + cmd->data[0] = devid; + CMD_SET_TYPE(cmd, CMD_INV_IRT); +} + /* * Writes the command to the IOMMUs command buffer and informs the * hardware about the new command. @@ -1020,12 +1039,32 @@ static void iommu_flush_all(struct amd_iommu *iommu) iommu_completion_wait(iommu); } +static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid) +{ + struct iommu_cmd cmd; + + build_inv_irt(&cmd, devid); + + iommu_queue_command(iommu, &cmd); +} + +static void iommu_flush_irt_all(struct amd_iommu *iommu) +{ + u32 devid; + + for (devid = 0; devid <= MAX_DEV_TABLE_ENTRIES; devid++) + iommu_flush_irt(iommu, devid); + + iommu_completion_wait(iommu); +} + void iommu_flush_all_caches(struct amd_iommu *iommu) { if (iommu_feature(iommu, FEATURE_IA)) { iommu_flush_all(iommu); } else { iommu_flush_dte_all(iommu); + iommu_flush_irt_all(iommu); iommu_flush_tlb_all(iommu); } } @@ -2155,7 +2194,7 @@ static bool pci_pri_tlp_required(struct pci_dev *pdev) } /* - * If a device is not yet associated with a domain, this function does + * If a device is not yet associated with a domain, this function * assigns it visible for the hardware */ static int attach_device(struct device *dev, @@ -2405,7 +2444,7 @@ static struct protection_domain *get_domain(struct device *dev) if (domain != NULL) return domain; - /* Device not bount yet - bind it */ + /* Device not bound yet - bind it */ dma_dom = find_protection_domain(devid); if (!dma_dom) dma_dom = amd_iommu_rlookup_table[devid]->default_dom; @@ -2944,7 +2983,7 @@ static void __init prealloc_protection_domains(void) alloc_passthrough_domain(); dev_data->passthrough = true; attach_device(&dev->dev, pt_domain); - pr_info("AMD-Vi: Using passthough domain for device %s\n", + pr_info("AMD-Vi: Using passthrough domain for device %s\n", dev_name(&dev->dev)); } @@ -3316,6 +3355,8 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain, switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: return 1; + case IOMMU_CAP_INTR_REMAP: + return irq_remapping_enabled; } return 0; @@ -3743,3 +3784,466 @@ int amd_iommu_device_info(struct pci_dev *pdev, return 0; } EXPORT_SYMBOL(amd_iommu_device_info); + +#ifdef CONFIG_IRQ_REMAP + +/***************************************************************************** + * + * Interrupt Remapping Implementation + * + *****************************************************************************/ + +union irte { + u32 val; + struct { + u32 valid : 1, + no_fault : 1, + int_type : 3, + rq_eoi : 1, + dm : 1, + rsvd_1 : 1, + destination : 8, + vector : 8, + rsvd_2 : 8; + } fields; +}; + +#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) +#define DTE_IRQ_REMAP_INTCTL (2ULL << 60) +#define DTE_IRQ_TABLE_LEN (8ULL << 1) +#define DTE_IRQ_REMAP_ENABLE 1ULL + +static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) +{ + u64 dte; + + dte = amd_iommu_dev_table[devid].data[2]; + dte &= ~DTE_IRQ_PHYS_ADDR_MASK; + dte |= virt_to_phys(table->table); + dte |= DTE_IRQ_REMAP_INTCTL; + dte |= DTE_IRQ_TABLE_LEN; + dte |= DTE_IRQ_REMAP_ENABLE; + + amd_iommu_dev_table[devid].data[2] = dte; +} + +#define IRTE_ALLOCATED (~1U) + +static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic) +{ + struct irq_remap_table *table = NULL; + struct amd_iommu *iommu; + unsigned long flags; + u16 alias; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + + iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) + goto out_unlock; + + table = irq_lookup_table[devid]; + if (table) + goto out; + + alias = amd_iommu_alias_table[devid]; + table = irq_lookup_table[alias]; + if (table) { + irq_lookup_table[devid] = table; + set_dte_irq_entry(devid, table); + iommu_flush_dte(iommu, devid); + goto out; + } + + /* Nothing there yet, allocate new irq remapping table */ + table = kzalloc(sizeof(*table), GFP_ATOMIC); + if (!table) + goto out; + + if (ioapic) + /* Keep the first 32 indexes free for IOAPIC interrupts */ + table->min_index = 32; + + table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_ATOMIC); + if (!table->table) { + kfree(table); + table = NULL; + goto out; + } + + memset(table->table, 0, MAX_IRQS_PER_TABLE * sizeof(u32)); + + if (ioapic) { + int i; + + for (i = 0; i < 32; ++i) + table->table[i] = IRTE_ALLOCATED; + } + + irq_lookup_table[devid] = table; + set_dte_irq_entry(devid, table); + iommu_flush_dte(iommu, devid); + if (devid != alias) { + irq_lookup_table[alias] = table; + set_dte_irq_entry(devid, table); + iommu_flush_dte(iommu, alias); + } + +out: + iommu_completion_wait(iommu); + +out_unlock: + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + return table; +} + +static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count) +{ + struct irq_remap_table *table; + unsigned long flags; + int index, c; + + table = get_irq_table(devid, false); + if (!table) + return -ENODEV; + + spin_lock_irqsave(&table->lock, flags); + + /* Scan table for free entries */ + for (c = 0, index = table->min_index; + index < MAX_IRQS_PER_TABLE; + ++index) { + if (table->table[index] == 0) + c += 1; + else + c = 0; + + if (c == count) { + struct irq_2_iommu *irte_info; + + for (; c != 0; --c) + table->table[index - c + 1] = IRTE_ALLOCATED; + + index -= count - 1; + + irte_info = &cfg->irq_2_iommu; + irte_info->sub_handle = devid; + irte_info->irte_index = index; + irte_info->iommu = (void *)cfg; + + goto out; + } + } + + index = -ENOSPC; + +out: + spin_unlock_irqrestore(&table->lock, flags); + + return index; +} + +static int get_irte(u16 devid, int index, union irte *irte) +{ + struct irq_remap_table *table; + unsigned long flags; + + table = get_irq_table(devid, false); + if (!table) + return -ENOMEM; + + spin_lock_irqsave(&table->lock, flags); + irte->val = table->table[index]; + spin_unlock_irqrestore(&table->lock, flags); + + return 0; +} + +static int modify_irte(u16 devid, int index, union irte irte) +{ + struct irq_remap_table *table; + struct amd_iommu *iommu; + unsigned long flags; + + iommu = amd_iommu_rlookup_table[devid]; + if (iommu == NULL) + return -EINVAL; + + table = get_irq_table(devid, false); + if (!table) + return -ENOMEM; + + spin_lock_irqsave(&table->lock, flags); + table->table[index] = irte.val; + spin_unlock_irqrestore(&table->lock, flags); + + iommu_flush_irt(iommu, devid); + iommu_completion_wait(iommu); + + return 0; +} + +static void free_irte(u16 devid, int index) +{ + struct irq_remap_table *table; + struct amd_iommu *iommu; + unsigned long flags; + + iommu = amd_iommu_rlookup_table[devid]; + if (iommu == NULL) + return; + + table = get_irq_table(devid, false); + if (!table) + return; + + spin_lock_irqsave(&table->lock, flags); + table->table[index] = 0; + spin_unlock_irqrestore(&table->lock, flags); + + iommu_flush_irt(iommu, devid); + iommu_completion_wait(iommu); +} + +static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, + unsigned int destination, int vector, + struct io_apic_irq_attr *attr) +{ + struct irq_remap_table *table; + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + union irte irte; + int ioapic_id; + int index; + int devid; + int ret; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + irte_info = &cfg->irq_2_iommu; + ioapic_id = mpc_ioapic_id(attr->ioapic); + devid = get_ioapic_devid(ioapic_id); + + if (devid < 0) + return devid; + + table = get_irq_table(devid, true); + if (table == NULL) + return -ENOMEM; + + index = attr->ioapic_pin; + + /* Setup IRQ remapping info */ + irte_info->sub_handle = devid; + irte_info->irte_index = index; + irte_info->iommu = (void *)cfg; + + /* Setup IRTE for IOMMU */ + irte.val = 0; + irte.fields.vector = vector; + irte.fields.int_type = apic->irq_delivery_mode; + irte.fields.destination = destination; + irte.fields.dm = apic->irq_dest_mode; + irte.fields.valid = 1; + + ret = modify_irte(devid, index, irte); + if (ret) + return ret; + + /* Setup IOAPIC entry */ + memset(entry, 0, sizeof(*entry)); + + entry->vector = index; + entry->mask = 0; + entry->trigger = attr->trigger; + entry->polarity = attr->polarity; + + /* + * Mask level triggered irqs. + */ + if (attr->trigger) + entry->mask = 1; + + return 0; +} + +static int set_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + struct irq_2_iommu *irte_info; + unsigned int dest, irq; + struct irq_cfg *cfg; + union irte irte; + int err; + + if (!config_enabled(CONFIG_SMP)) + return -1; + + cfg = data->chip_data; + irq = data->irq; + irte_info = &cfg->irq_2_iommu; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return -EINVAL; + + if (get_irte(irte_info->sub_handle, irte_info->irte_index, &irte)) + return -EBUSY; + + if (assign_irq_vector(irq, cfg, mask)) + return -EBUSY; + + err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest); + if (err) { + if (assign_irq_vector(irq, cfg, data->affinity)) + pr_err("AMD-Vi: Failed to recover vector for irq %d\n", irq); + return err; + } + + irte.fields.vector = cfg->vector; + irte.fields.destination = dest; + + modify_irte(irte_info->sub_handle, irte_info->irte_index, irte); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + cpumask_copy(data->affinity, mask); + + return 0; +} + +static int free_irq(int irq) +{ + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + irte_info = &cfg->irq_2_iommu; + + free_irte(irte_info->sub_handle, irte_info->irte_index); + + return 0; +} + +static void compose_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id) +{ + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + union irte irte; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return; + + irte_info = &cfg->irq_2_iommu; + + irte.val = 0; + irte.fields.vector = cfg->vector; + irte.fields.int_type = apic->irq_delivery_mode; + irte.fields.destination = dest; + irte.fields.dm = apic->irq_dest_mode; + irte.fields.valid = 1; + + modify_irte(irte_info->sub_handle, irte_info->irte_index, irte); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = MSI_ADDR_BASE_LO; + msg->data = irte_info->irte_index; +} + +static int msi_alloc_irq(struct pci_dev *pdev, int irq, int nvec) +{ + struct irq_cfg *cfg; + int index; + u16 devid; + + if (!pdev) + return -EINVAL; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + devid = get_device_id(&pdev->dev); + index = alloc_irq_index(cfg, devid, nvec); + + return index < 0 ? MAX_IRQS_PER_TABLE : index; +} + +static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq, + int index, int offset) +{ + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + u16 devid; + + if (!pdev) + return -EINVAL; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + if (index >= MAX_IRQS_PER_TABLE) + return 0; + + devid = get_device_id(&pdev->dev); + irte_info = &cfg->irq_2_iommu; + + irte_info->sub_handle = devid; + irte_info->irte_index = index + offset; + irte_info->iommu = (void *)cfg; + + return 0; +} + +static int setup_hpet_msi(unsigned int irq, unsigned int id) +{ + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + int index, devid; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + irte_info = &cfg->irq_2_iommu; + devid = get_hpet_devid(id); + if (devid < 0) + return devid; + + index = alloc_irq_index(cfg, devid, 1); + if (index < 0) + return index; + + irte_info->sub_handle = devid; + irte_info->irte_index = index; + irte_info->iommu = (void *)cfg; + + return 0; +} + +struct irq_remap_ops amd_iommu_irq_ops = { + .supported = amd_iommu_supported, + .prepare = amd_iommu_prepare, + .enable = amd_iommu_enable, + .disable = amd_iommu_disable, + .reenable = amd_iommu_reenable, + .enable_faulting = amd_iommu_enable_faulting, + .setup_ioapic_entry = setup_ioapic_entry, + .set_affinity = set_affinity, + .free_irq = free_irq, + .compose_msi_msg = compose_msi_msg, + .msi_alloc_irq = msi_alloc_irq, + .msi_setup_irq = msi_setup_irq, + .setup_hpet_msi = setup_hpet_msi, +}; +#endif diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 18a89b760aaa..18b0d99bd4d6 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -26,16 +26,18 @@ #include <linux/msi.h> #include <linux/amd-iommu.h> #include <linux/export.h> -#include <linux/acpi.h> #include <acpi/acpi.h> #include <asm/pci-direct.h> #include <asm/iommu.h> #include <asm/gart.h> #include <asm/x86_init.h> #include <asm/iommu_table.h> +#include <asm/io_apic.h> +#include <asm/irq_remapping.h> #include "amd_iommu_proto.h" #include "amd_iommu_types.h" +#include "irq_remapping.h" /* * definitions for the ACPI scanning code @@ -55,6 +57,10 @@ #define IVHD_DEV_ALIAS_RANGE 0x43 #define IVHD_DEV_EXT_SELECT 0x46 #define IVHD_DEV_EXT_SELECT_RANGE 0x47 +#define IVHD_DEV_SPECIAL 0x48 + +#define IVHD_SPECIAL_IOAPIC 1 +#define IVHD_SPECIAL_HPET 2 #define IVHD_FLAG_HT_TUN_EN_MASK 0x01 #define IVHD_FLAG_PASSPW_EN_MASK 0x02 @@ -123,6 +129,7 @@ struct ivmd_header { } __attribute__((packed)); bool amd_iommu_dump; +bool amd_iommu_irq_remap __read_mostly; static bool amd_iommu_detected; static bool __initdata amd_iommu_disabled; @@ -178,7 +185,13 @@ u16 *amd_iommu_alias_table; struct amd_iommu **amd_iommu_rlookup_table; /* - * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap + * This table is used to find the irq remapping table for a given device id + * quickly. + */ +struct irq_remap_table **irq_lookup_table; + +/* + * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap * to know which ones are already in use. */ unsigned long *amd_iommu_pd_alloc_bitmap; @@ -478,7 +491,7 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) /**************************************************************************** * - * The following functions belong the the code path which parses the ACPI table + * The following functions belong to the code path which parses the ACPI table * the second time. In this ACPI parsing iteration we allocate IOMMU specific * data structures, initialize the device/alias/rlookup table and also * basically initialize the hardware. @@ -690,8 +703,33 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, set_iommu_for_device(iommu, devid); } +static int add_special_device(u8 type, u8 id, u16 devid) +{ + struct devid_map *entry; + struct list_head *list; + + if (type != IVHD_SPECIAL_IOAPIC && type != IVHD_SPECIAL_HPET) + return -EINVAL; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->id = id; + entry->devid = devid; + + if (type == IVHD_SPECIAL_IOAPIC) + list = &ioapic_map; + else + list = &hpet_map; + + list_add_tail(&entry->list, list); + + return 0; +} + /* - * Reads the device exclusion range from ACPI and initialize IOMMU with + * Reads the device exclusion range from ACPI and initializes the IOMMU with * it */ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) @@ -717,7 +755,7 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) * Takes a pointer to an AMD IOMMU entry in the ACPI table and * initializes the hardware and our data structures with it. */ -static void __init init_iommu_from_acpi(struct amd_iommu *iommu, +static int __init init_iommu_from_acpi(struct amd_iommu *iommu, struct ivhd_header *h) { u8 *p = (u8 *)h; @@ -867,12 +905,43 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, flags, ext_flags); } break; + case IVHD_DEV_SPECIAL: { + u8 handle, type; + const char *var; + u16 devid; + int ret; + + handle = e->ext & 0xff; + devid = (e->ext >> 8) & 0xffff; + type = (e->ext >> 24) & 0xff; + + if (type == IVHD_SPECIAL_IOAPIC) + var = "IOAPIC"; + else if (type == IVHD_SPECIAL_HPET) + var = "HPET"; + else + var = "UNKNOWN"; + + DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %02x:%02x.%x\n", + var, (int)handle, + PCI_BUS(devid), + PCI_SLOT(devid), + PCI_FUNC(devid)); + + set_dev_entry_from_acpi(iommu, devid, e->flags, 0); + ret = add_special_device(type, handle, devid); + if (ret) + return ret; + break; + } default: break; } p += ivhd_entry_length(p); } + + return 0; } /* Initializes the device->iommu mapping for the driver */ @@ -912,6 +981,8 @@ static void __init free_iommu_all(void) */ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) { + int ret; + spin_lock_init(&iommu->lock); /* Add IOMMU to internal data structures */ @@ -947,7 +1018,16 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->int_enabled = false; - init_iommu_from_acpi(iommu, h); + ret = init_iommu_from_acpi(iommu, h); + if (ret) + return ret; + + /* + * Make sure IOMMU is not considered to translate itself. The IVRS + * table tells us so, but this is a lie! + */ + amd_iommu_rlookup_table[iommu->devid] = NULL; + init_iommu_devices(iommu); return 0; @@ -1115,9 +1195,11 @@ static void print_iommu_info(void) if (iommu_feature(iommu, (1ULL << i))) pr_cont(" %s", feat_str[i]); } - } pr_cont("\n"); + } } + if (irq_remapping_enabled) + pr_info("AMD-Vi: Interrupt remapping enabled\n"); } static int __init amd_iommu_init_pci(void) @@ -1141,7 +1223,7 @@ static int __init amd_iommu_init_pci(void) /**************************************************************************** * * The following functions initialize the MSI interrupts for all IOMMUs - * in the system. Its a bit challenging because there could be multiple + * in the system. It's a bit challenging because there could be multiple * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per * pci_dev. * @@ -1199,7 +1281,7 @@ enable_faults: * * The next functions belong to the third pass of parsing the ACPI * table. In this last pass the memory mapping requirements are - * gathered (like exclusion and unity mapping reanges). + * gathered (like exclusion and unity mapping ranges). * ****************************************************************************/ @@ -1308,7 +1390,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table) * Init the device table to not allow DMA access for devices and * suppress all page faults */ -static void init_device_table(void) +static void init_device_table_dma(void) { u32 devid; @@ -1318,6 +1400,27 @@ static void init_device_table(void) } } +static void __init uninit_device_table_dma(void) +{ + u32 devid; + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { + amd_iommu_dev_table[devid].data[0] = 0ULL; + amd_iommu_dev_table[devid].data[1] = 0ULL; + } +} + +static void init_device_table(void) +{ + u32 devid; + + if (!amd_iommu_irq_remap) + return; + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) + set_dev_entry_bit(devid, DEV_ENTRY_IRQ_TBL_EN); +} + static void iommu_init_flags(struct amd_iommu *iommu) { iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? @@ -1466,10 +1569,14 @@ static struct syscore_ops amd_iommu_syscore_ops = { static void __init free_on_init_error(void) { - amd_iommu_uninit_devices(); + free_pages((unsigned long)irq_lookup_table, + get_order(rlookup_table_size)); - free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, - get_order(MAX_DOMAIN_ID/8)); + if (amd_iommu_irq_cache) { + kmem_cache_destroy(amd_iommu_irq_cache); + amd_iommu_irq_cache = NULL; + + } free_pages((unsigned long)amd_iommu_rlookup_table, get_order(rlookup_table_size)); @@ -1482,8 +1589,6 @@ static void __init free_on_init_error(void) free_iommu_all(); - free_unity_maps(); - #ifdef CONFIG_GART_IOMMU /* * We failed to initialize the AMD IOMMU - try fallback to GART @@ -1494,6 +1599,33 @@ static void __init free_on_init_error(void) #endif } +static bool __init check_ioapic_information(void) +{ + int idx; + + for (idx = 0; idx < nr_ioapics; idx++) { + int id = mpc_ioapic_id(idx); + + if (get_ioapic_devid(id) < 0) { + pr_err(FW_BUG "AMD-Vi: IO-APIC[%d] not in IVRS table\n", id); + pr_err("AMD-Vi: Disabling interrupt remapping due to BIOS Bug\n"); + return false; + } + } + + return true; +} + +static void __init free_dma_resources(void) +{ + amd_iommu_uninit_devices(); + + free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, + get_order(MAX_DOMAIN_ID/8)); + + free_unity_maps(); +} + /* * This is the hardware init function for AMD IOMMU in the system. * This function is called either from amd_iommu_init or from the interrupt @@ -1580,9 +1712,6 @@ static int __init early_amd_iommu_init(void) if (amd_iommu_pd_alloc_bitmap == NULL) goto out; - /* init the device table */ - init_device_table(); - /* * let all alias entries point to itself */ @@ -1605,10 +1734,35 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; + if (amd_iommu_irq_remap) + amd_iommu_irq_remap = check_ioapic_information(); + + if (amd_iommu_irq_remap) { + /* + * Interrupt remapping enabled, create kmem_cache for the + * remapping tables. + */ + amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache", + MAX_IRQS_PER_TABLE * sizeof(u32), + IRQ_TABLE_ALIGNMENT, + 0, NULL); + if (!amd_iommu_irq_cache) + goto out; + + irq_lookup_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, + get_order(rlookup_table_size)); + if (!irq_lookup_table) + goto out; + } + ret = init_memory_definitions(ivrs_base); if (ret) goto out; + /* init the device table */ + init_device_table(); + out: /* Don't leak any ACPI memory */ early_acpi_os_unmap_memory((char __iomem *)ivrs_base, ivrs_size); @@ -1652,13 +1806,22 @@ static bool detect_ivrs(void) /* Make sure ACS will be enabled during PCI probe */ pci_request_acs(); + if (!disable_irq_remap) + amd_iommu_irq_remap = true; + return true; } static int amd_iommu_init_dma(void) { + struct amd_iommu *iommu; int ret; + init_device_table_dma(); + + for_each_iommu(iommu) + iommu_flush_all_caches(iommu); + if (iommu_pass_through) ret = amd_iommu_init_passthrough(); else @@ -1749,7 +1912,48 @@ static int __init iommu_go_to_state(enum iommu_init_state state) return ret; } +#ifdef CONFIG_IRQ_REMAP +int __init amd_iommu_prepare(void) +{ + return iommu_go_to_state(IOMMU_ACPI_FINISHED); +} + +int __init amd_iommu_supported(void) +{ + return amd_iommu_irq_remap ? 1 : 0; +} + +int __init amd_iommu_enable(void) +{ + int ret; + + ret = iommu_go_to_state(IOMMU_ENABLED); + if (ret) + return ret; + + irq_remapping_enabled = 1; + + return 0; +} + +void amd_iommu_disable(void) +{ + amd_iommu_suspend(); +} + +int amd_iommu_reenable(int mode) +{ + amd_iommu_resume(); + + return 0; +} +int __init amd_iommu_enable_faulting(void) +{ + /* We enable MSI later when PCI is initialized */ + return 0; +} +#endif /* * This is the core init function for AMD IOMMU hardware in the system. @@ -1762,8 +1966,17 @@ static int __init amd_iommu_init(void) ret = iommu_go_to_state(IOMMU_INITIALIZED); if (ret) { - disable_iommus(); - free_on_init_error(); + free_dma_resources(); + if (!irq_remapping_enabled) { + disable_iommus(); + free_on_init_error(); + } else { + struct amd_iommu *iommu; + + uninit_device_table_dma(); + for_each_iommu(iommu) + iommu_flush_all_caches(iommu); + } } return ret; diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index 1a7f41c6cc66..c294961bdd36 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h @@ -32,6 +32,14 @@ extern void amd_iommu_uninit_devices(void); extern void amd_iommu_init_notifier(void); extern void amd_iommu_init_api(void); +/* Needed for interrupt remapping */ +extern int amd_iommu_supported(void); +extern int amd_iommu_prepare(void); +extern int amd_iommu_enable(void); +extern void amd_iommu_disable(void); +extern int amd_iommu_reenable(int); +extern int amd_iommu_enable_faulting(void); + /* IOMMUv2 specific functions */ struct iommu_domain; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index d0dab865a8b8..c9aa3d079ff0 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -152,6 +152,7 @@ #define CMD_INV_DEV_ENTRY 0x02 #define CMD_INV_IOMMU_PAGES 0x03 #define CMD_INV_IOTLB_PAGES 0x04 +#define CMD_INV_IRT 0x05 #define CMD_COMPLETE_PPR 0x07 #define CMD_INV_ALL 0x08 @@ -175,6 +176,7 @@ #define DEV_ENTRY_EX 0x67 #define DEV_ENTRY_SYSMGT1 0x68 #define DEV_ENTRY_SYSMGT2 0x69 +#define DEV_ENTRY_IRQ_TBL_EN 0x80 #define DEV_ENTRY_INIT_PASS 0xb8 #define DEV_ENTRY_EINT_PASS 0xb9 #define DEV_ENTRY_NMI_PASS 0xba @@ -183,6 +185,8 @@ #define DEV_ENTRY_MODE_MASK 0x07 #define DEV_ENTRY_MODE_SHIFT 0x09 +#define MAX_DEV_TABLE_ENTRIES 0xffff + /* constants to configure the command buffer */ #define CMD_BUFFER_SIZE 8192 #define CMD_BUFFER_UNINITIALIZED 1 @@ -255,7 +259,7 @@ #define PAGE_SIZE_ALIGN(address, pagesize) \ ((address) & ~((pagesize) - 1)) /* - * Creates an IOMMU PTE for an address an a given pagesize + * Creates an IOMMU PTE for an address and a given pagesize * The PTE has no permission bits set * Pagesize is expected to be a power-of-two larger than 4096 */ @@ -334,6 +338,23 @@ extern bool amd_iommu_np_cache; /* Only true if all IOMMUs support device IOTLBs */ extern bool amd_iommu_iotlb_sup; +#define MAX_IRQS_PER_TABLE 256 +#define IRQ_TABLE_ALIGNMENT 128 + +struct irq_remap_table { + spinlock_t lock; + unsigned min_index; + u32 *table; +}; + +extern struct irq_remap_table **irq_lookup_table; + +/* Interrupt remapping feature used? */ +extern bool amd_iommu_irq_remap; + +/* kmem_cache to get tables with 128 byte alignement */ +extern struct kmem_cache *amd_iommu_irq_cache; + /* * Make iterating over all IOMMUs easier */ @@ -404,7 +425,7 @@ struct iommu_dev_data { struct list_head dev_data_list; /* For global dev_data_list */ struct iommu_dev_data *alias_data;/* The alias dev_data */ struct protection_domain *domain; /* Domain the device is bound to */ - atomic_t bind; /* Domain attach reverent count */ + atomic_t bind; /* Domain attach reference count */ u16 devid; /* PCI Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ bool passthrough; /* Default for device is pt_domain */ @@ -565,6 +586,16 @@ struct amd_iommu { u32 stored_l2[0x83]; }; +struct devid_map { + struct list_head list; + u8 id; + u16 devid; +}; + +/* Map HPET and IOAPIC ids to the devid used by the IOMMU */ +extern struct list_head ioapic_map; +extern struct list_head hpet_map; + /* * List with all IOMMUs in the system. This list is not locked because it is * only written and read at driver initialization or suspend time @@ -678,6 +709,30 @@ static inline u16 calc_devid(u8 bus, u8 devfn) return (((u16)bus) << 8) | devfn; } +static inline int get_ioapic_devid(int id) +{ + struct devid_map *entry; + + list_for_each_entry(entry, &ioapic_map, list) { + if (entry->id == id) + return entry->devid; + } + + return -EINVAL; +} + +static inline int get_hpet_devid(int id) +{ + struct devid_map *entry; + + list_for_each_entry(entry, &hpet_map, list) { + if (entry->id == id) + return entry->devid; + } + + return -EINVAL; +} + #ifdef CONFIG_AMD_IOMMU_STATS struct __iommu_counter { diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 80bad32aa463..7fe44f83cc37 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -840,8 +840,7 @@ static void exynos_iommu_detach_device(struct iommu_domain *domain, if (__exynos_sysmmu_disable(data)) { dev_dbg(dev, "%s: Detached IOMMU with pgtable %#lx\n", __func__, __pa(priv->pgtable)); - list_del(&data->node); - INIT_LIST_HEAD(&data->node); + list_del_init(&data->node); } else { dev_dbg(dev, "%s: Detaching IOMMU with pgtable %#lx delayed", diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index db820d7dd0bc..d4a4cd445cab 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -589,7 +589,9 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain) { int i; - domain->iommu_coherency = 1; + i = find_first_bit(domain->iommu_bmp, g_num_of_iommus); + + domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0; for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) { if (!ecap_coherent(g_iommus[i]->ecap)) { diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 151690db692c..faf85d6e33fe 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -51,6 +51,11 @@ early_param("intremap", setup_irqremap); void __init setup_irq_remapping_ops(void) { remap_ops = &intel_irq_remap_ops; + +#ifdef CONFIG_AMD_IOMMU + if (amd_iommu_irq_ops.prepare() == 0) + remap_ops = &amd_iommu_irq_ops; +#endif } int irq_remapping_supported(void) diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index b12974cc1dfe..95363acb583f 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -82,6 +82,12 @@ struct irq_remap_ops { }; extern struct irq_remap_ops intel_irq_remap_ops; +extern struct irq_remap_ops amd_iommu_irq_ops; + +#else /* CONFIG_IRQ_REMAP */ + +#define irq_remapping_enabled 0 +#define disable_irq_remap 1 #endif /* CONFIG_IRQ_REMAP */ diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 2a4bb36bc688..0b4d62e0c645 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -32,14 +32,55 @@ #include <linux/io.h> #include <linux/of.h> #include <linux/of_iommu.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> #include <asm/page.h> #include <asm/cacheflush.h> #include <mach/iomap.h> -#include <mach/smmu.h> #include <mach/tegra-ahb.h> +enum smmu_hwgrp { + HWGRP_AFI, + HWGRP_AVPC, + HWGRP_DC, + HWGRP_DCB, + HWGRP_EPP, + HWGRP_G2, + HWGRP_HC, + HWGRP_HDA, + HWGRP_ISP, + HWGRP_MPE, + HWGRP_NV, + HWGRP_NV2, + HWGRP_PPCS, + HWGRP_SATA, + HWGRP_VDE, + HWGRP_VI, + + HWGRP_COUNT, + + HWGRP_END = ~0, +}; + +#define HWG_AFI (1 << HWGRP_AFI) +#define HWG_AVPC (1 << HWGRP_AVPC) +#define HWG_DC (1 << HWGRP_DC) +#define HWG_DCB (1 << HWGRP_DCB) +#define HWG_EPP (1 << HWGRP_EPP) +#define HWG_G2 (1 << HWGRP_G2) +#define HWG_HC (1 << HWGRP_HC) +#define HWG_HDA (1 << HWGRP_HDA) +#define HWG_ISP (1 << HWGRP_ISP) +#define HWG_MPE (1 << HWGRP_MPE) +#define HWG_NV (1 << HWGRP_NV) +#define HWG_NV2 (1 << HWGRP_NV2) +#define HWG_PPCS (1 << HWGRP_PPCS) +#define HWG_SATA (1 << HWGRP_SATA) +#define HWG_VDE (1 << HWGRP_VDE) +#define HWG_VI (1 << HWGRP_VI) + /* bitmap of the page sizes currently supported */ #define SMMU_IOMMU_PGSIZES (SZ_4K) @@ -47,16 +88,29 @@ #define SMMU_CONFIG_DISABLE 0 #define SMMU_CONFIG_ENABLE 1 -#define SMMU_TLB_CONFIG 0x14 -#define SMMU_TLB_CONFIG_STATS__MASK (1 << 31) -#define SMMU_TLB_CONFIG_STATS__ENABLE (1 << 31) +/* REVISIT: To support multiple MCs */ +enum { + _MC = 0, +}; + +enum { + _TLB = 0, + _PTC, +}; + +#define SMMU_CACHE_CONFIG_BASE 0x14 +#define __SMMU_CACHE_CONFIG(mc, cache) (SMMU_CACHE_CONFIG_BASE + 4 * cache) +#define SMMU_CACHE_CONFIG(cache) __SMMU_CACHE_CONFIG(_MC, cache) + +#define SMMU_CACHE_CONFIG_STATS_SHIFT 31 +#define SMMU_CACHE_CONFIG_STATS_ENABLE (1 << SMMU_CACHE_CONFIG_STATS_SHIFT) +#define SMMU_CACHE_CONFIG_STATS_TEST_SHIFT 30 +#define SMMU_CACHE_CONFIG_STATS_TEST (1 << SMMU_CACHE_CONFIG_STATS_TEST_SHIFT) + #define SMMU_TLB_CONFIG_HIT_UNDER_MISS__ENABLE (1 << 29) #define SMMU_TLB_CONFIG_ACTIVE_LINES__VALUE 0x10 #define SMMU_TLB_CONFIG_RESET_VAL 0x20000010 -#define SMMU_PTC_CONFIG 0x18 -#define SMMU_PTC_CONFIG_STATS__MASK (1 << 31) -#define SMMU_PTC_CONFIG_STATS__ENABLE (1 << 31) #define SMMU_PTC_CONFIG_CACHE__ENABLE (1 << 29) #define SMMU_PTC_CONFIG_INDEX_MAP__PATTERN 0x3f #define SMMU_PTC_CONFIG_RESET_VAL 0x2000003f @@ -86,10 +140,10 @@ #define SMMU_ASID_SECURITY 0x38 -#define SMMU_STATS_TLB_HIT_COUNT 0x1f0 -#define SMMU_STATS_TLB_MISS_COUNT 0x1f4 -#define SMMU_STATS_PTC_HIT_COUNT 0x1f8 -#define SMMU_STATS_PTC_MISS_COUNT 0x1fc +#define SMMU_STATS_CACHE_COUNT_BASE 0x1f0 + +#define SMMU_STATS_CACHE_COUNT(mc, cache, hitmiss) \ + (SMMU_STATS_CACHE_COUNT_BASE + 8 * cache + 4 * hitmiss) #define SMMU_TRANSLATION_ENABLE_0 0x228 #define SMMU_TRANSLATION_ENABLE_1 0x22c @@ -231,6 +285,12 @@ struct smmu_as { spinlock_t client_lock; /* for client list */ }; +struct smmu_debugfs_info { + struct smmu_device *smmu; + int mc; + int cache; +}; + /* * Per SMMU device - IOMMU device */ @@ -251,6 +311,9 @@ struct smmu_device { unsigned long translation_enable_2; unsigned long asid_security; + struct dentry *debugfs_root; + struct smmu_debugfs_info *debugfs_info; + struct device_node *ahb; int num_as; @@ -412,8 +475,8 @@ static int smmu_setup_regs(struct smmu_device *smmu) smmu_write(smmu, smmu->translation_enable_1, SMMU_TRANSLATION_ENABLE_1); smmu_write(smmu, smmu->translation_enable_2, SMMU_TRANSLATION_ENABLE_2); smmu_write(smmu, smmu->asid_security, SMMU_ASID_SECURITY); - smmu_write(smmu, SMMU_TLB_CONFIG_RESET_VAL, SMMU_TLB_CONFIG); - smmu_write(smmu, SMMU_PTC_CONFIG_RESET_VAL, SMMU_PTC_CONFIG); + smmu_write(smmu, SMMU_TLB_CONFIG_RESET_VAL, SMMU_CACHE_CONFIG(_TLB)); + smmu_write(smmu, SMMU_PTC_CONFIG_RESET_VAL, SMMU_CACHE_CONFIG(_PTC)); smmu_flush_regs(smmu, 1); @@ -895,6 +958,175 @@ static struct iommu_ops smmu_iommu_ops = { .pgsize_bitmap = SMMU_IOMMU_PGSIZES, }; +/* Should be in the order of enum */ +static const char * const smmu_debugfs_mc[] = { "mc", }; +static const char * const smmu_debugfs_cache[] = { "tlb", "ptc", }; + +static ssize_t smmu_debugfs_stats_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *pos) +{ + struct smmu_debugfs_info *info; + struct smmu_device *smmu; + struct dentry *dent; + int i; + enum { + _OFF = 0, + _ON, + _RESET, + }; + const char * const command[] = { + [_OFF] = "off", + [_ON] = "on", + [_RESET] = "reset", + }; + char str[] = "reset"; + u32 val; + size_t offs; + + count = min_t(size_t, count, sizeof(str)); + if (copy_from_user(str, buffer, count)) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(command); i++) + if (strncmp(str, command[i], + strlen(command[i])) == 0) + break; + + if (i == ARRAY_SIZE(command)) + return -EINVAL; + + dent = file->f_dentry; + info = dent->d_inode->i_private; + smmu = info->smmu; + + offs = SMMU_CACHE_CONFIG(info->cache); + val = smmu_read(smmu, offs); + switch (i) { + case _OFF: + val &= ~SMMU_CACHE_CONFIG_STATS_ENABLE; + val &= ~SMMU_CACHE_CONFIG_STATS_TEST; + smmu_write(smmu, val, offs); + break; + case _ON: + val |= SMMU_CACHE_CONFIG_STATS_ENABLE; + val &= ~SMMU_CACHE_CONFIG_STATS_TEST; + smmu_write(smmu, val, offs); + break; + case _RESET: + val |= SMMU_CACHE_CONFIG_STATS_TEST; + smmu_write(smmu, val, offs); + val &= ~SMMU_CACHE_CONFIG_STATS_TEST; + smmu_write(smmu, val, offs); + break; + default: + BUG(); + break; + } + + dev_dbg(smmu->dev, "%s() %08x, %08x @%08x\n", __func__, + val, smmu_read(smmu, offs), offs); + + return count; +} + +static int smmu_debugfs_stats_show(struct seq_file *s, void *v) +{ + struct smmu_debugfs_info *info; + struct smmu_device *smmu; + struct dentry *dent; + int i; + const char * const stats[] = { "hit", "miss", }; + + dent = d_find_alias(s->private); + info = dent->d_inode->i_private; + smmu = info->smmu; + + for (i = 0; i < ARRAY_SIZE(stats); i++) { + u32 val; + size_t offs; + + offs = SMMU_STATS_CACHE_COUNT(info->mc, info->cache, i); + val = smmu_read(smmu, offs); + seq_printf(s, "%s:%08x ", stats[i], val); + + dev_dbg(smmu->dev, "%s() %s %08x @%08x\n", __func__, + stats[i], val, offs); + } + seq_printf(s, "\n"); + + return 0; +} + +static int smmu_debugfs_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, smmu_debugfs_stats_show, inode); +} + +static const struct file_operations smmu_debugfs_stats_fops = { + .open = smmu_debugfs_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = smmu_debugfs_stats_write, +}; + +static void smmu_debugfs_delete(struct smmu_device *smmu) +{ + debugfs_remove_recursive(smmu->debugfs_root); + kfree(smmu->debugfs_info); +} + +static void smmu_debugfs_create(struct smmu_device *smmu) +{ + int i; + size_t bytes; + struct dentry *root; + + bytes = ARRAY_SIZE(smmu_debugfs_mc) * ARRAY_SIZE(smmu_debugfs_cache) * + sizeof(*smmu->debugfs_info); + smmu->debugfs_info = kmalloc(bytes, GFP_KERNEL); + if (!smmu->debugfs_info) + return; + + root = debugfs_create_dir(dev_name(smmu->dev), NULL); + if (!root) + goto err_out; + smmu->debugfs_root = root; + + for (i = 0; i < ARRAY_SIZE(smmu_debugfs_mc); i++) { + int j; + struct dentry *mc; + + mc = debugfs_create_dir(smmu_debugfs_mc[i], root); + if (!mc) + goto err_out; + + for (j = 0; j < ARRAY_SIZE(smmu_debugfs_cache); j++) { + struct dentry *cache; + struct smmu_debugfs_info *info; + + info = smmu->debugfs_info; + info += i * ARRAY_SIZE(smmu_debugfs_mc) + j; + info->smmu = smmu; + info->mc = i; + info->cache = j; + + cache = debugfs_create_file(smmu_debugfs_cache[j], + S_IWUGO | S_IRUGO, mc, + (void *)info, + &smmu_debugfs_stats_fops); + if (!cache) + goto err_out; + } + } + + return; + +err_out: + smmu_debugfs_delete(smmu); +} + static int tegra_smmu_suspend(struct device *dev) { struct smmu_device *smmu = dev_get_drvdata(dev); @@ -999,6 +1231,7 @@ static int tegra_smmu_probe(struct platform_device *pdev) if (!smmu->avp_vector_page) return -ENOMEM; + smmu_debugfs_create(smmu); smmu_handle = smmu; return 0; } @@ -1008,6 +1241,8 @@ static int tegra_smmu_remove(struct platform_device *pdev) struct smmu_device *smmu = platform_get_drvdata(pdev); int i; + smmu_debugfs_delete(smmu); + smmu_write(smmu, SMMU_CONFIG_DISABLE, SMMU_CONFIG); for (i = 0; i < smmu->num_as; i++) free_pdir(&smmu->as[i]); diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 66ce41489133..b9087bff008b 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -120,11 +120,6 @@ static const char *type2name[4] = { "single", "page", static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", "DMA_FROM_DEVICE", "DMA_NONE" }; -/* little merge helper - remove it after the merge window */ -#ifndef BUS_NOTIFY_UNBOUND_DRIVER -#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005 -#endif - /* * The access to some variables in this macro is racy. We can't use atomic_t * here because all these variables are exported to debugfs. Some of them even |