summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGerald Schaefer <gerald.schaefer@de.ibm.com>2014-07-18 17:37:08 +0200
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2014-07-22 09:26:24 +0200
commitc60d1ae4efcb5790f7d085369baf66c167a6484f (patch)
tree7555d2b2e44a6ad6f07e16afaf15492b862d8f99
parent29b8dd9d4274bca6526e4bb8d4f46dec1f4c15c9 (diff)
downloadlinux-c60d1ae4efcb5790f7d085369baf66c167a6484f.tar.bz2
s390/pci: introduce lazy IOTLB flushing for DMA unmap
This changes the default IOTLB flushing method to lazy flushing, which means that there will be no direct flush after each DMA unmap operation. Instead, the iommu bitmap pointer will be adjusted after unmap, so that no DMA address will be re-used until after an iommu bitmap wrap-around. The only IOTLB flush will then happen after each wrap-around. A new kernel parameter "s390_iommu=" is also introduced, to allow changing the flushing behaviour to the old strict method. Reviewed-by: Sebastian Ott <sebott@linux.vnet.ibm.com> Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r--Documentation/kernel-parameters.txt7
-rw-r--r--arch/s390/pci/pci_dma.c50
2 files changed, 43 insertions, 14 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index b7fa2f599459..c848095f2cb0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3023,6 +3023,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
S [KNL] Run init in single mode
+ s390_iommu= [HW,S390]
+ Set s390 IOTLB flushing mode
+ strict
+ With strict flushing every unmap operation will result in
+ an IOTLB flush. Default is lazy flushing before reuse,
+ which is faster.
+
sa1100ir [NET]
See drivers/net/irda/sa1100_ir.c.
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index f91c03119804..4cbb29a4d615 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -16,6 +16,13 @@
static struct kmem_cache *dma_region_table_cache;
static struct kmem_cache *dma_page_table_cache;
+static int s390_iommu_strict;
+
+static int zpci_refresh_global(struct zpci_dev *zdev)
+{
+ return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
+ zdev->iommu_pages * PAGE_SIZE);
+}
static unsigned long *dma_alloc_cpu_table(void)
{
@@ -155,18 +162,15 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
}
/*
- * rpcit is not required to establish new translations when previously
- * invalid translation-table entries are validated, however it is
- * required when altering previously valid entries.
+ * With zdev->tlb_refresh == 0, rpcit is not required to establish new
+ * translations when previously invalid translation-table entries are
+ * validated. With lazy unmap, it also is skipped for previously valid
+ * entries, but a global rpcit is then required before any address can
+ * be re-used, i.e. after each iommu bitmap wrap-around.
*/
if (!zdev->tlb_refresh &&
- ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))
- /*
- * TODO: also need to check that the old entry is indeed INVALID
- * and not only for one page but for the whole range...
- * -> now we WARN_ON in that case but with lazy unmap that
- * needs to be redone!
- */
+ (!s390_iommu_strict ||
+ ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
goto no_refresh;
rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
@@ -220,16 +224,21 @@ static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size)
{
unsigned long offset, flags;
+ int wrap = 0;
spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
offset = __dma_alloc_iommu(zdev, zdev->next_bit, size);
- if (offset == -1)
+ if (offset == -1) {
+ /* wrap-around */
offset = __dma_alloc_iommu(zdev, 0, size);
+ wrap = 1;
+ }
if (offset != -1) {
zdev->next_bit = offset + size;
- if (zdev->next_bit >= zdev->iommu_pages)
- zdev->next_bit = 0;
+ if (!zdev->tlb_refresh && !s390_iommu_strict && wrap)
+ /* global flush after wrap-around with lazy unmap */
+ zpci_refresh_global(zdev);
}
spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
return offset;
@@ -243,7 +252,11 @@ static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size
if (!zdev->iommu_bitmap)
goto out;
bitmap_clear(zdev->iommu_bitmap, offset, size);
- if (offset >= zdev->next_bit)
+ /*
+ * Lazy flush for unmap: need to move next_bit to avoid address re-use
+ * until wrap-around.
+ */
+ if (!s390_iommu_strict && offset >= zdev->next_bit)
zdev->next_bit = offset + size;
out:
spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
@@ -504,3 +517,12 @@ struct dma_map_ops s390_dma_ops = {
/* dma_supported is unconditionally true without a callback */
};
EXPORT_SYMBOL_GPL(s390_dma_ops);
+
+static int __init s390_iommu_setup(char *str)
+{
+ if (!strncmp(str, "strict", 6))
+ s390_iommu_strict = 1;
+ return 0;
+}
+
+__setup("s390_iommu=", s390_iommu_setup);