From 374c15594c4ee0dfcceb38852bd43be09070f402 Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Wed, 16 Jun 2021 06:38:42 -0700 Subject: iommu/io-pgtable: Introduce unmap_pages() as a page table op The io-pgtable code expects to operate on a single block or granule of memory that is supported by the IOMMU hardware when unmapping memory. This means that when a large buffer that consists of multiple such blocks is unmapped, the io-pgtable code will walk the page tables to the correct level to unmap each block, even for blocks that are virtually contiguous and at the same level, which can incur an overhead in performance. Introduce the unmap_pages() page table op to express to the io-pgtable code that it should unmap a number of blocks of the same size, instead of a single block. Doing so allows multiple blocks to be unmapped in one call to the io-pgtable code, reducing the number of page table walks, and indirect calls. Signed-off-by: Isaac J. Manjarres Suggested-by: Will Deacon Signed-off-by: Will Deacon Signed-off-by: Georgi Djakov Link: https://lore.kernel.org/r/1623850736-389584-2-git-send-email-quic_c_gdjako@quicinc.com Signed-off-by: Joerg Roedel --- include/linux/io-pgtable.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 4d40dfa75b55..9391c5fa71e6 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -144,6 +144,7 @@ struct io_pgtable_cfg { * * @map: Map a physically contiguous memory region. * @unmap: Unmap a physically contiguous memory region. + * @unmap_pages: Unmap a range of virtually contiguous pages of the same size. * @iova_to_phys: Translate iova to physical address. * * These functions map directly onto the iommu_ops member functions with @@ -154,6 +155,9 @@ struct io_pgtable_ops { phys_addr_t paddr, size_t size, int prot, gfp_t gfp); size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova, size_t size, struct iommu_iotlb_gather *gather); + size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather); phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, unsigned long iova); }; -- cgit v1.2.3 From cacffb7f7b45ba7649eedea4c196c6e9f1863bf3 Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Wed, 16 Jun 2021 06:38:43 -0700 Subject: iommu: Add an unmap_pages() op for IOMMU drivers Add a callback for IOMMU drivers to provide a path for the IOMMU framework to call into an IOMMU driver, which can call into the io-pgtable code, to unmap a virtually contiguous range of pages of the same size. For IOMMU drivers that do not specify an unmap_pages() callback, the existing logic of unmapping memory one page block at a time will be used. Signed-off-by: Isaac J. Manjarres Suggested-by: Will Deacon Signed-off-by: Will Deacon Acked-by: Lu Baolu Signed-off-by: Georgi Djakov Link: https://lore.kernel.org/r/1623850736-389584-3-git-send-email-quic_c_gdjako@quicinc.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 32d448050bf7..25a844121be5 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -181,6 +181,7 @@ struct iommu_iotlb_gather { * @detach_dev: detach device from an iommu domain * @map: map a physically contiguous memory region to an iommu domain * @unmap: unmap a physically contiguous memory region from an iommu domain + * @unmap_pages: unmap a number of pages of the same size from an iommu domain * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain * @iotlb_sync_map: Sync mappings created recently using @map to the hardware * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush @@ -231,6 +232,9 @@ struct iommu_ops { phys_addr_t paddr, size_t size, int prot, gfp_t gfp); size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather); + size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *iotlb_gather); void (*flush_iotlb_all)(struct iommu_domain *domain); void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, size_t size); -- cgit v1.2.3 From ca073b55d16a83ba7e73cd313312abc68f07f293 Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Wed, 16 Jun 2021 06:38:44 -0700 Subject: iommu/io-pgtable: Introduce map_pages() as a page table op Mapping memory into io-pgtables follows the same semantics that unmapping memory used to follow (i.e. a buffer will be mapped one page block per call to the io-pgtable code). This means that it can be optimized in the same way that unmapping memory was, so add a map_pages() callback to the io-pgtable ops structure, so that a range of pages of the same size can be mapped within the same call. Signed-off-by: Isaac J. Manjarres Suggested-by: Will Deacon Signed-off-by: Georgi Djakov Link: https://lore.kernel.org/r/1623850736-389584-4-git-send-email-quic_c_gdjako@quicinc.com Signed-off-by: Joerg Roedel --- include/linux/io-pgtable.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 9391c5fa71e6..c43f3b899d2a 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -143,6 +143,7 @@ struct io_pgtable_cfg { * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers. * * @map: Map a physically contiguous memory region. + * @map_pages: Map a physically contiguous range of pages of the same size. * @unmap: Unmap a physically contiguous memory region. * @unmap_pages: Unmap a range of virtually contiguous pages of the same size. * @iova_to_phys: Translate iova to physical address. @@ -153,6 +154,9 @@ struct io_pgtable_cfg { struct io_pgtable_ops { int (*map)(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp); + int (*map_pages)(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped); size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova, size_t size, struct iommu_iotlb_gather *gather); size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova, -- cgit v1.2.3 From 910c4406ccc9613de0a54abf910edc4bf8a575c0 Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Wed, 16 Jun 2021 06:38:45 -0700 Subject: iommu: Add a map_pages() op for IOMMU drivers Add a callback for IOMMU drivers to provide a path for the IOMMU framework to call into an IOMMU driver, which can call into the io-pgtable code, to map a physically contiguous rnage of pages of the same size. For IOMMU drivers that do not specify a map_pages() callback, the existing logic of mapping memory one page block at a time will be used. Signed-off-by: Isaac J. Manjarres Suggested-by: Will Deacon Acked-by: Lu Baolu Signed-off-by: Georgi Djakov Link: https://lore.kernel.org/r/1623850736-389584-5-git-send-email-quic_c_gdjako@quicinc.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 25a844121be5..d7989d4a7404 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -180,6 +180,8 @@ struct iommu_iotlb_gather { * @attach_dev: attach device to an iommu domain * @detach_dev: detach device from an iommu domain * @map: map a physically contiguous memory region to an iommu domain + * @map_pages: map a physically contiguous set of pages of the same size to + * an iommu domain. * @unmap: unmap a physically contiguous memory region from an iommu domain * @unmap_pages: unmap a number of pages of the same size from an iommu domain * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain @@ -230,6 +232,9 @@ struct iommu_ops { void (*detach_dev)(struct iommu_domain *domain, struct device *dev); int (*map)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp); + int (*map_pages)(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped); size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather); size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova, -- cgit v1.2.3 From 308723e3580027f0cd7c86a5edfe6b5acb6863d2 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 12 Jul 2021 19:12:20 +0800 Subject: iommu: Remove mode argument from iommu_set_dma_strict() We only ever now set strict mode enabled in iommu_set_dma_strict(), so just remove the argument. Signed-off-by: John Garry Reviewed-by: Robin Murphy Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/1626088340-5838-7-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 2 +- drivers/iommu/intel/iommu.c | 6 +++--- drivers/iommu/iommu.c | 5 ++--- include/linux/iommu.h | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 1e641cb6dddc..6e12a615117b 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3099,7 +3099,7 @@ static int __init parse_amd_iommu_options(char *str) for (; *str; ++str) { if (strncmp(str, "fullflush", 9) == 0) { pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n"); - iommu_set_dma_strict(true); + iommu_set_dma_strict(); } if (strncmp(str, "force_enable", 12) == 0) amd_iommu_force_enable = true; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6fd004a1a66d..da9afa730df1 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -454,7 +454,7 @@ static int __init intel_iommu_setup(char *str) iommu_dma_forcedac = true; } else if (!strncmp(str, "strict", 6)) { pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n"); - iommu_set_dma_strict(true); + iommu_set_dma_strict(); } else if (!strncmp(str, "sp_off", 6)) { pr_info("Disable supported super page\n"); intel_iommu_superpage = 0; @@ -4394,7 +4394,7 @@ int __init intel_iommu_init(void) */ if (cap_caching_mode(iommu->cap)) { pr_info_once("IOMMU batching disallowed due to virtualization\n"); - iommu_set_dma_strict(true); + iommu_set_dma_strict(); } iommu_device_sysfs_add(&iommu->iommu, NULL, intel_iommu_groups, @@ -5712,7 +5712,7 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) } else if (dmar_map_gfx) { /* we have to ensure the gfx device is idle before we flush */ pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n"); - iommu_set_dma_strict(true); + iommu_set_dma_strict(); } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index bd9ccce387c5..eeea5e5c4d10 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -350,10 +350,9 @@ static int __init iommu_dma_setup(char *str) } early_param("iommu.strict", iommu_dma_setup); -void iommu_set_dma_strict(bool strict) +void iommu_set_dma_strict(void) { - if (strict || !(iommu_cmd_line & IOMMU_CMD_LINE_STRICT)) - iommu_dma_strict = strict; + iommu_dma_strict = true; } bool iommu_get_dma_strict(struct iommu_domain *domain) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d7989d4a7404..4997c78e2670 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -485,7 +485,7 @@ int iommu_enable_nesting(struct iommu_domain *domain); int iommu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirks); -void iommu_set_dma_strict(bool val); +void iommu_set_dma_strict(void); bool iommu_get_dma_strict(struct iommu_domain *domain); extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, -- cgit v1.2.3 From 3136895cc5b665c1ab406d78f90c0700a3551e74 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 23 Jul 2021 02:32:05 -0700 Subject: iommu: Improve iommu_iotlb_gather helpers The Mediatek driver is not the only one which might want a basic address-based gathering behaviour, so although it's arguably simple enough to open-code, let's factor it out for the sake of cleanliness. Let's also take this opportunity to document the intent of these helpers for clarity. Cc: Joerg Roedel Cc: Will Deacon Cc: Jiajun Cao Cc: Robin Murphy Cc: Lu Baolu Cc: iommu@lists.linux-foundation.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Robin Murphy Signed-off-by: Nadav Amit Link: https://lore.kernel.org/r/20210723093209.714328-4-namit@vmware.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 6 +----- include/linux/iommu.h | 38 +++++++++++++++++++++++++++++++++----- 2 files changed, 34 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 6f7c69688ce2..d9939e4af35c 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -520,12 +520,8 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - unsigned long end = iova + size - 1; - if (gather->start > iova) - gather->start = iova; - if (gather->end < end) - gather->end = end; + iommu_iotlb_gather_add_range(gather, iova, size); return dom->iop->unmap(dom->iop, iova, size, gather); } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 32d448050bf7..e554871db46f 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -497,6 +497,38 @@ static inline void iommu_iotlb_sync(struct iommu_domain *domain, iommu_iotlb_gather_init(iotlb_gather); } +/** + * iommu_iotlb_gather_add_range - Gather for address-based TLB invalidation + * @gather: TLB gather data + * @iova: start of page to invalidate + * @size: size of page to invalidate + * + * Helper for IOMMU drivers to build arbitrarily-sized invalidation commands + * where only the address range matters, and simply minimising intermediate + * syncs is preferred. + */ +static inline void iommu_iotlb_gather_add_range(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t size) +{ + unsigned long end = iova + size - 1; + + if (gather->start > iova) + gather->start = iova; + if (gather->end < end) + gather->end = end; +} + +/** + * iommu_iotlb_gather_add_page - Gather for page-based TLB invalidation + * @domain: IOMMU domain to be invalidated + * @gather: TLB gather data + * @iova: start of page to invalidate + * @size: size of page to invalidate + * + * Helper for IOMMU drivers to build invalidation commands based on individual + * pages, or with page size/table level hints which cannot be gathered if they + * differ. + */ static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain, struct iommu_iotlb_gather *gather, unsigned long iova, size_t size) @@ -515,11 +547,7 @@ static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain, gather->pgsize = size; } - if (gather->end < end) - gather->end = end; - - if (gather->start > start) - gather->start = start; + iommu_iotlb_gather_add_range(gather, iova, size); } /* PCI device grouping function */ -- cgit v1.2.3 From febb82c208e481eee057c70fa3176bb48712a111 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Fri, 23 Jul 2021 02:32:06 -0700 Subject: iommu: Factor iommu_iotlb_gather_is_disjoint() out MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor iommu_iotlb_gather_add_page() and factor out the logic that detects whether IOTLB gather range and a new range are disjoint. To be used by the next patch that implements different gathering logic for AMD. Note that updating gather->pgsize unconditionally does not affect correctness as the function had (and has) an invariant, in which gather->pgsize always represents the flushing granularity of its range. Arguably, “size" should never be zero, but lets assume for the matter of discussion that it might. If "size" equals to "gather->pgsize", then the assignment in question has no impact. Otherwise, if "size" is non-zero, then iommu_iotlb_sync() would initialize the size and range (see iommu_iotlb_gather_init()), and the invariant is kept. Otherwise, "size" is zero, and "gather" already holds a range, so gather->pgsize is non-zero and (gather->pgsize && gather->pgsize != size) is true. Therefore, again, iommu_iotlb_sync() would be called and initialize the size. Cc: Joerg Roedel Cc: Jiajun Cao Cc: Lu Baolu Cc: iommu@lists.linux-foundation.org Cc: linux-kernel@vger.kernel.org> Reviewed-by: Robin Murphy Acked-by: Will Deacon Signed-off-by: Nadav Amit Link: https://lore.kernel.org/r/20210723093209.714328-5-namit@vmware.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e554871db46f..979a5ceeea55 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -497,6 +497,28 @@ static inline void iommu_iotlb_sync(struct iommu_domain *domain, iommu_iotlb_gather_init(iotlb_gather); } +/** + * iommu_iotlb_gather_is_disjoint - Checks whether a new range is disjoint + * + * @gather: TLB gather data + * @iova: start of page to invalidate + * @size: size of page to invalidate + * + * Helper for IOMMU drivers to check whether a new range and the gathered range + * are disjoint. For many IOMMUs, flushing the IOMMU in this case is better + * than merging the two, which might lead to unnecessary invalidations. + */ +static inline +bool iommu_iotlb_gather_is_disjoint(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t size) +{ + unsigned long start = iova, end = start + size - 1; + + return gather->end != 0 && + (end + 1 < gather->start || start > gather->end + 1); +} + + /** * iommu_iotlb_gather_add_range - Gather for address-based TLB invalidation * @gather: TLB gather data @@ -533,20 +555,16 @@ static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain, struct iommu_iotlb_gather *gather, unsigned long iova, size_t size) { - unsigned long start = iova, end = start + size - 1; - /* * If the new page is disjoint from the current range or is mapped at * a different granularity, then sync the TLB so that the gather * structure can be rewritten. */ - if (gather->pgsize != size || - end + 1 < gather->start || start > gather->end + 1) { - if (gather->pgsize) - iommu_iotlb_sync(domain, gather); - gather->pgsize = size; - } + if ((gather->pgsize && gather->pgsize != size) || + iommu_iotlb_gather_is_disjoint(gather, iova, size)) + iommu_iotlb_sync(domain, gather); + gather->pgsize = size; iommu_iotlb_gather_add_range(gather, iova, size); } -- cgit v1.2.3 From 892384cd998a17960dff6ebefc27375f63364111 Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Tue, 3 Aug 2021 14:16:49 +0200 Subject: iommu/io-pgtable: Add DART pagetable format Apple's DART iommu uses a pagetable format that shares some similarities with the ones already implemented by io-pgtable.c. Add a new format variant to support the required differences so that we don't have to duplicate the pagetable handling code. Reviewed-by: Alexander Graf Reviewed-by: Alyssa Rosenzweig Reviewed-by: Robin Murphy Signed-off-by: Sven Peter Link: https://lore.kernel.org/r/20210803121651.61594-2-sven@svenpeter.dev Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-arm.c | 63 ++++++++++++++++++++++++++++++++++++++++++ drivers/iommu/io-pgtable.c | 1 + include/linux/io-pgtable.h | 7 +++++ 3 files changed, 71 insertions(+) (limited to 'include') diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 053df4048a29..0779eb96bd29 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -130,6 +130,9 @@ #define ARM_MALI_LPAE_MEMATTR_IMP_DEF 0x88ULL #define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL +#define APPLE_DART_PTE_PROT_NO_WRITE (1<<7) +#define APPLE_DART_PTE_PROT_NO_READ (1<<8) + /* IOPTE accessors */ #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d)) @@ -402,6 +405,15 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, { arm_lpae_iopte pte; + if (data->iop.fmt == APPLE_DART) { + pte = 0; + if (!(prot & IOMMU_WRITE)) + pte |= APPLE_DART_PTE_PROT_NO_WRITE; + if (!(prot & IOMMU_READ)) + pte |= APPLE_DART_PTE_PROT_NO_READ; + return pte; + } + if (data->iop.fmt == ARM_64_LPAE_S1 || data->iop.fmt == ARM_32_LPAE_S1) { pte = ARM_LPAE_PTE_nG; @@ -1102,6 +1114,52 @@ out_free_data: return NULL; } +static struct io_pgtable * +apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct arm_lpae_io_pgtable *data; + int i; + + if (cfg->oas > 36) + return NULL; + + data = arm_lpae_alloc_pgtable(cfg); + if (!data) + return NULL; + + /* + * The table format itself always uses two levels, but the total VA + * space is mapped by four separate tables, making the MMIO registers + * an effective "level 1". For simplicity, though, we treat this + * equivalently to LPAE stage 2 concatenation at level 2, with the + * additional TTBRs each just pointing at consecutive pages. + */ + if (data->start_level < 1) + goto out_free_data; + if (data->start_level == 1 && data->pgd_bits > 2) + goto out_free_data; + if (data->start_level > 1) + data->pgd_bits = 0; + data->start_level = 2; + cfg->apple_dart_cfg.n_ttbrs = 1 << data->pgd_bits; + data->pgd_bits += data->bits_per_level; + + data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL, + cfg); + if (!data->pgd) + goto out_free_data; + + for (i = 0; i < cfg->apple_dart_cfg.n_ttbrs; ++i) + cfg->apple_dart_cfg.ttbr[i] = + virt_to_phys(data->pgd + i * ARM_LPAE_GRANULE(data)); + + return &data->iop; + +out_free_data: + kfree(data); + return NULL; +} + struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = { .alloc = arm_64_lpae_alloc_pgtable_s1, .free = arm_lpae_free_pgtable, @@ -1127,6 +1185,11 @@ struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = { .free = arm_lpae_free_pgtable, }; +struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns = { + .alloc = apple_dart_alloc_pgtable, + .free = arm_lpae_free_pgtable, +}; + #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST static struct io_pgtable_cfg *cfg_cookie __initdata; diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index 6e9917ce980f..f4bfcef98297 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -20,6 +20,7 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = { [ARM_64_LPAE_S1] = &io_pgtable_arm_64_lpae_s1_init_fns, [ARM_64_LPAE_S2] = &io_pgtable_arm_64_lpae_s2_init_fns, [ARM_MALI_LPAE] = &io_pgtable_arm_mali_lpae_init_fns, + [APPLE_DART] = &io_pgtable_apple_dart_init_fns, #endif #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S [ARM_V7S] = &io_pgtable_arm_v7s_init_fns, diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index c43f3b899d2a..a738483fb4da 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -16,6 +16,7 @@ enum io_pgtable_fmt { ARM_V7S, ARM_MALI_LPAE, AMD_IOMMU_V1, + APPLE_DART, IO_PGTABLE_NUM_FMTS, }; @@ -136,6 +137,11 @@ struct io_pgtable_cfg { u64 transtab; u64 memattr; } arm_mali_lpae_cfg; + + struct { + u64 ttbr[4]; + u32 n_ttbrs; + } apple_dart_cfg; }; }; @@ -254,5 +260,6 @@ extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns; extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns; +extern struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns; #endif /* __IO_PGTABLE_H */ -- cgit v1.2.3 From 46983fcd67ac5a830d41ebe3755314db67a6dd16 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 11 Aug 2021 13:21:15 +0100 Subject: iommu: Pull IOVA cookie management into the core Now that everyone has converged on iommu-dma for IOMMU_DOMAIN_DMA support, we can abandon the notion of drivers being responsible for the cookie type, and consolidate all the management into the core code. CC: Yong Wu CC: Chunyan Zhang CC: Maxime Ripard Tested-by: Heiko Stuebner Tested-by: Marek Szyprowski Tested-by: Yoshihiro Shimoda Reviewed-by: Jean-Philippe Brucker Reviewed-by: Lu Baolu Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/46a2c0e7419c7d1d931762dc7b6a69fa082d199a.1628682048.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 7 +++++++ include/linux/iommu.h | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index f2cda9950bd5..b65fcc66ffa4 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "iommu: " fmt #include +#include #include #include #include @@ -1946,6 +1947,11 @@ static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, /* Assume all sizes by default; the driver may override this later */ domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; + /* Temporarily avoid -EEXIST while drivers still get their own cookies */ + if (type == IOMMU_DOMAIN_DMA && !domain->iova_cookie && iommu_get_dma_cookie(domain)) { + iommu_domain_free(domain); + domain = NULL; + } return domain; } @@ -1957,6 +1963,7 @@ EXPORT_SYMBOL_GPL(iommu_domain_alloc); void iommu_domain_free(struct iommu_domain *domain) { + iommu_put_dma_cookie(domain); domain->ops->domain_free(domain); } EXPORT_SYMBOL_GPL(iommu_domain_free); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 4997c78e2670..141779d76035 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -40,6 +40,7 @@ struct iommu_domain; struct notifier_block; struct iommu_sva; struct iommu_fault_event; +struct iommu_dma_cookie; /* iommu fault flags */ #define IOMMU_FAULT_READ 0x0 @@ -86,7 +87,7 @@ struct iommu_domain { iommu_fault_handler_t handler; void *handler_token; struct iommu_domain_geometry geometry; - void *iova_cookie; + struct iommu_dma_cookie *iova_cookie; }; enum iommu_cap { -- cgit v1.2.3 From 7a7c5badf85806eab75e31ab8d45021f1545b0e3 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 11 Aug 2021 13:21:28 +0100 Subject: iommu: Indicate queued flushes via gather data Since iommu_iotlb_gather exists to help drivers optimise flushing for a given unmap request, it is also the logical place to indicate whether the unmap is strict or not, and thus help them further optimise for whether to expect a sync or a flush_all subsequently. As part of that, it also seems fair to make the flush queue code take responsibility for enforcing the really subtle ordering requirement it brings, so that we don't need to worry about forgetting that if new drivers want to add flush queue support, and can consolidate the existing versions. While we're adding to the kerneldoc, also fill in some info for @freelist which was overlooked previously. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/bf5f8e2ad84e48c712ccbf80fa8c610594c7595f.1628682049.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 1 + drivers/iommu/iova.c | 7 +++++++ include/linux/iommu.h | 8 +++++++- 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index afaa1f9b5935..1eacbbdf601c 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -481,6 +481,7 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, dma_addr -= iova_off; size = iova_align(iovad, size + iova_off); iommu_iotlb_gather_init(&iotlb_gather); + iotlb_gather.queued = cookie->fq_domain; unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather); WARN_ON(unmapped != size); diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index b6cf5f16123b..2ad73fb2e94e 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -637,6 +637,13 @@ void queue_iova(struct iova_domain *iovad, unsigned long flags; unsigned idx; + /* + * Order against the IOMMU driver's pagetable update from unmapping + * @pte, to guarantee that iova_domain_flush() observes that if called + * from a different CPU before we release the lock below. + */ + smp_wmb(); + spin_lock_irqsave(&fq->lock, flags); /* diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 141779d76035..f7679f6684b1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -161,16 +161,22 @@ enum iommu_dev_features { * @start: IOVA representing the start of the range to be flushed * @end: IOVA representing the end of the range to be flushed (inclusive) * @pgsize: The interval at which to perform the flush + * @freelist: Removed pages to free after sync + * @queued: Indicates that the flush will be queued * * This structure is intended to be updated by multiple calls to the * ->unmap() function in struct iommu_ops before eventually being passed - * into ->iotlb_sync(). + * into ->iotlb_sync(). Drivers can add pages to @freelist to be freed after + * ->iotlb_sync() or ->iotlb_flush_all() have cleared all cached references to + * them. @queued is set to indicate when ->iotlb_flush_all() will be called + * later instead of ->iotlb_sync(), so drivers may optimise accordingly. */ struct iommu_iotlb_gather { unsigned long start; unsigned long end; size_t pgsize; struct page *freelist; + bool queued; }; /** -- cgit v1.2.3 From a8e5f04458c4e496eada2b029ce96713bb6c388d Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 11 Aug 2021 13:21:29 +0100 Subject: iommu/io-pgtable: Remove non-strict quirk IO_PGTABLE_QUIRK_NON_STRICT was never a very comfortable fit, since it's not a quirk of the pagetable format itself. Now that we have a more appropriate way to convey non-strict unmaps, though, this last of the non-quirk quirks can also go, and with the flush queue code also now enforcing its own ordering we can have a lovely cleanup all round. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/155b5c621cd8936472e273a8b07a182f62c6c20d.1628682049.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 3 --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 3 --- drivers/iommu/io-pgtable-arm-v7s.c | 12 ++---------- drivers/iommu/io-pgtable-arm.c | 12 ++---------- include/linux/io-pgtable.h | 5 ----- 5 files changed, 4 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index ee53a841815e..69801866090c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2174,9 +2174,6 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain, .iommu_dev = smmu->dev, }; - if (!iommu_get_dma_strict(domain)) - pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; - pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) return -ENOMEM; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 970d9e4dcd69..a325d4769c17 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -765,9 +765,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .iommu_dev = smmu->dev, }; - if (!iommu_get_dma_strict(domain)) - pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; - if (smmu->impl && smmu->impl->init_context) { ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev); if (ret) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 5db90d7ce2ec..e84478d39705 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -700,14 +700,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, ARM_V7S_BLOCK_SIZE(lvl + 1)); ptep = iopte_deref(pte[i], lvl, data); __arm_v7s_free_table(ptep, lvl + 1, data); - } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) { - /* - * Order the PTE update against queueing the IOVA, to - * guarantee that a flush callback from a different CPU - * has observed it before the TLBIALL can be issued. - */ - smp_wmb(); - } else { + } else if (!gather->queued) { io_pgtable_tlb_add_page(iop, gather, iova, blk_size); } iova += blk_size; @@ -791,8 +784,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_PERMS | - IO_PGTABLE_QUIRK_ARM_MTK_EXT | - IO_PGTABLE_QUIRK_NON_STRICT)) + IO_PGTABLE_QUIRK_ARM_MTK_EXT)) return NULL; /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */ diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 053df4048a29..48a5bd8f571d 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -638,14 +638,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, io_pgtable_tlb_flush_walk(iop, iova + i * size, size, ARM_LPAE_GRANULE(data)); __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); - } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) { - /* - * Order the PTE update against queueing the IOVA, to - * guarantee that a flush callback from a different CPU - * has observed it before the TLBIALL can be issued. - */ - smp_wmb(); - } else { + } else if (!gather->queued) { io_pgtable_tlb_add_page(iop, gather, iova + i * size, size); } @@ -825,7 +818,6 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) bool tg1; if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | - IO_PGTABLE_QUIRK_NON_STRICT | IO_PGTABLE_QUIRK_ARM_TTBR1 | IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)) return NULL; @@ -929,7 +921,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr; /* The NS quirk doesn't apply at stage 2 */ - if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NON_STRICT)) + if (cfg->quirks) return NULL; data = arm_lpae_alloc_pgtable(cfg); diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index c43f3b899d2a..9ba6d9ea316e 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -73,10 +73,6 @@ struct io_pgtable_cfg { * to support up to 35 bits PA where the bit32, bit33 and bit34 are * encoded in the bit9, bit4 and bit5 of the PTE respectively. * - * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs - * on unmap, for DMA domains using the flush queue mechanism for - * delayed invalidation. - * * IO_PGTABLE_QUIRK_ARM_TTBR1: (ARM LPAE format) Configure the table * for use in the upper half of a split address space. * @@ -86,7 +82,6 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3) - #define IO_PGTABLE_QUIRK_NON_STRICT BIT(4) #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) unsigned long quirks; -- cgit v1.2.3 From bf3aed4660c6e3c44c69f07d8927ee5a22a952ac Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 11 Aug 2021 13:21:30 +0100 Subject: iommu: Introduce explicit type for non-strict DMA domains Promote the difference between strict and non-strict DMA domains from an internal detail to a distinct domain feature and type, to pave the road for exposing it through the sysfs default domain interface. Reviewed-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/08cd2afaf6b63c58ad49acec3517c9b32c2bb946.1628682049.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 2 +- drivers/iommu/iommu.c | 8 ++++++-- include/linux/iommu.h | 11 +++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 1eacbbdf601c..17ac3dd4f23e 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1319,7 +1319,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) * The IOMMU core code allocates the default DMA domain, which the * underlying IOMMU driver needs to support via the dma-iommu layer. */ - if (domain->type == IOMMU_DOMAIN_DMA) { + if (iommu_is_dma_domain(domain)) { if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev)) goto out_err; dev->dma_ops = &iommu_dma_ops; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index b65fcc66ffa4..17d6728f5a09 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -115,6 +115,7 @@ static const char *iommu_domain_type_str(unsigned int t) case IOMMU_DOMAIN_UNMANAGED: return "Unmanaged"; case IOMMU_DOMAIN_DMA: + case IOMMU_DOMAIN_DMA_FQ: return "Translated"; default: return "Unknown"; @@ -552,6 +553,9 @@ static ssize_t iommu_group_show_type(struct iommu_group *group, case IOMMU_DOMAIN_DMA: type = "DMA\n"; break; + case IOMMU_DOMAIN_DMA_FQ: + type = "DMA-FQ\n"; + break; } } mutex_unlock(&group->mutex); @@ -765,7 +769,7 @@ static int iommu_create_device_direct_mappings(struct iommu_group *group, unsigned long pg_size; int ret = 0; - if (!domain || domain->type != IOMMU_DOMAIN_DMA) + if (!domain || !iommu_is_dma_domain(domain)) return 0; BUG_ON(!domain->pgsize_bitmap); @@ -1948,7 +1952,7 @@ static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap; /* Temporarily avoid -EEXIST while drivers still get their own cookies */ - if (type == IOMMU_DOMAIN_DMA && !domain->iova_cookie && iommu_get_dma_cookie(domain)) { + if (iommu_is_dma_domain(domain) && !domain->iova_cookie && iommu_get_dma_cookie(domain)) { iommu_domain_free(domain); domain = NULL; } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f7679f6684b1..5629ae42951f 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -61,6 +61,7 @@ struct iommu_domain_geometry { #define __IOMMU_DOMAIN_DMA_API (1U << 1) /* Domain for use in DMA-API implementation */ #define __IOMMU_DOMAIN_PT (1U << 2) /* Domain is identity mapped */ +#define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */ /* * This are the possible domain-types @@ -73,12 +74,17 @@ struct iommu_domain_geometry { * IOMMU_DOMAIN_DMA - Internally used for DMA-API implementations. * This flag allows IOMMU drivers to implement * certain optimizations for these domains + * IOMMU_DOMAIN_DMA_FQ - As above, but definitely using batched TLB + * invalidation. */ #define IOMMU_DOMAIN_BLOCKED (0U) #define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT) #define IOMMU_DOMAIN_UNMANAGED (__IOMMU_DOMAIN_PAGING) #define IOMMU_DOMAIN_DMA (__IOMMU_DOMAIN_PAGING | \ __IOMMU_DOMAIN_DMA_API) +#define IOMMU_DOMAIN_DMA_FQ (__IOMMU_DOMAIN_PAGING | \ + __IOMMU_DOMAIN_DMA_API | \ + __IOMMU_DOMAIN_DMA_FQ) struct iommu_domain { unsigned type; @@ -90,6 +96,11 @@ struct iommu_domain { struct iommu_dma_cookie *iova_cookie; }; +static inline bool iommu_is_dma_domain(struct iommu_domain *domain) +{ + return domain->type & __IOMMU_DOMAIN_DMA_API; +} + enum iommu_cap { IOMMU_CAP_CACHE_COHERENCY, /* IOMMU can enforce cache coherent DMA transactions */ -- cgit v1.2.3 From c208916fe6c7b84e3ec95cd91853039596eeb2cf Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 11 Aug 2021 13:21:34 +0100 Subject: iommu: Express DMA strictness via the domain type Eliminate the iommu_get_dma_strict() indirection and pipe the information through the domain type from the beginning. Besides the flow simplification this also has several nice side-effects: - Automatically implies strict mode for untrusted devices by virtue of their IOMMU_DOMAIN_DMA override. - Ensures that we only end up using flush queues for drivers which are aware of them and can actually benefit. - Allows us to handle flush queue init failure by falling back to strict mode instead of leaving it to possibly blow up later. Reviewed-by: Lu Baolu Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/47083d69155577f1367877b1594921948c366eb3.1628682049.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 10 ++++++---- drivers/iommu/iommu.c | 14 +++++--------- include/linux/iommu.h | 1 - 3 files changed, 11 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 17ac3dd4f23e..b7ae855c1e89 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -370,13 +370,15 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, init_iova_domain(iovad, 1UL << order, base_pfn); - if (!cookie->fq_domain && !dev_is_untrusted(dev) && - domain->ops->flush_iotlb_all && !iommu_get_dma_strict(domain)) { + /* If the FQ fails we can simply fall back to strict mode */ + if (domain->type == IOMMU_DOMAIN_DMA_FQ && !cookie->fq_domain) { if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, - iommu_dma_entry_dtor)) + iommu_dma_entry_dtor)) { pr_warn("iova flush queue initialization failed\n"); - else + domain->type = IOMMU_DOMAIN_DMA; + } else { cookie->fq_domain = domain; + } } return iova_reserve_iommu_regions(dev, domain); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 17d6728f5a09..e09f0d433683 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -136,6 +136,9 @@ static int __init iommu_subsys_init(void) } } + if (!iommu_default_passthrough() && !iommu_dma_strict) + iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ; + pr_info("Default domain type: %s %s\n", iommu_domain_type_str(iommu_def_domain_type), (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? @@ -355,17 +358,10 @@ early_param("iommu.strict", iommu_dma_setup); void iommu_set_dma_strict(void) { iommu_dma_strict = true; + if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ) + iommu_def_domain_type = IOMMU_DOMAIN_DMA; } -bool iommu_get_dma_strict(struct iommu_domain *domain) -{ - /* only allow lazy flushing for DMA domains */ - if (domain->type == IOMMU_DOMAIN_DMA) - return iommu_dma_strict; - return true; -} -EXPORT_SYMBOL_GPL(iommu_get_dma_strict); - static ssize_t iommu_group_attr_show(struct kobject *kobj, struct attribute *__attr, char *buf) { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 5629ae42951f..923a8d1c5e39 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -504,7 +504,6 @@ int iommu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirks); void iommu_set_dma_strict(void); -bool iommu_get_dma_strict(struct iommu_domain *domain); extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags); -- cgit v1.2.3 From 452e69b58c2889e5546edb92d9e66285410f7463 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 11 Aug 2021 13:21:38 +0100 Subject: iommu: Allow enabling non-strict mode dynamically Allocating and enabling a flush queue is in fact something we can reasonably do while a DMA domain is active, without having to rebuild it from scratch. Thus we can allow a strict -> non-strict transition from sysfs without requiring to unbind the device's driver, which is of particular interest to users who want to make selective relaxations to critical devices like the one serving their root filesystem. Disabling and draining a queue also seems technically possible to achieve without rebuilding the whole domain, but would certainly be more involved. Furthermore there's not such a clear use-case for tightening up security *after* the device may already have done whatever it is that you don't trust it not to do, so we only consider the relaxation case. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/d652966348c78457c38bf18daf369272a4ebc2c9.1628682049.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 47 ++++++++++++++++++++++++++++++++--------------- drivers/iommu/iommu.c | 17 +++++++++++++---- drivers/iommu/iova.c | 11 ++++++----- include/linux/dma-iommu.h | 6 ++++++ 4 files changed, 57 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index b7ae855c1e89..bac7370ead3e 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -317,6 +317,30 @@ static bool dev_is_untrusted(struct device *dev) return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; } +/* sysfs updates are serialised by the mutex of the group owning @domain */ +int iommu_dma_init_fq(struct iommu_domain *domain) +{ + struct iommu_dma_cookie *cookie = domain->iova_cookie; + int ret; + + if (cookie->fq_domain) + return 0; + + ret = init_iova_flush_queue(&cookie->iovad, iommu_dma_flush_iotlb_all, + iommu_dma_entry_dtor); + if (ret) { + pr_warn("iova flush queue initialization failed\n"); + return ret; + } + /* + * Prevent incomplete iovad->fq being observable. Pairs with path from + * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova() + */ + smp_wmb(); + WRITE_ONCE(cookie->fq_domain, domain); + return 0; +} + /** * iommu_dma_init_domain - Initialise a DMA mapping domain * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() @@ -371,15 +395,8 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, init_iova_domain(iovad, 1UL << order, base_pfn); /* If the FQ fails we can simply fall back to strict mode */ - if (domain->type == IOMMU_DOMAIN_DMA_FQ && !cookie->fq_domain) { - if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, - iommu_dma_entry_dtor)) { - pr_warn("iova flush queue initialization failed\n"); - domain->type = IOMMU_DOMAIN_DMA; - } else { - cookie->fq_domain = domain; - } - } + if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain)) + domain->type = IOMMU_DOMAIN_DMA; return iova_reserve_iommu_regions(dev, domain); } @@ -454,17 +471,17 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, } static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, - dma_addr_t iova, size_t size, struct page *freelist) + dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather) { struct iova_domain *iovad = &cookie->iovad; /* The MSI case is only ever cleaning up its most recent allocation */ if (cookie->type == IOMMU_DMA_MSI_COOKIE) cookie->msi_iova -= size; - else if (cookie->fq_domain) /* non-strict mode */ + else if (gather && gather->queued) queue_iova(iovad, iova_pfn(iovad, iova), size >> iova_shift(iovad), - (unsigned long)freelist); + (unsigned long)gather->freelist); else free_iova_fast(iovad, iova_pfn(iovad, iova), size >> iova_shift(iovad)); @@ -483,14 +500,14 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, dma_addr -= iova_off; size = iova_align(iovad, size + iova_off); iommu_iotlb_gather_init(&iotlb_gather); - iotlb_gather.queued = cookie->fq_domain; + iotlb_gather.queued = READ_ONCE(cookie->fq_domain); unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather); WARN_ON(unmapped != size); - if (!cookie->fq_domain) + if (!iotlb_gather.queued) iommu_iotlb_sync(domain, &iotlb_gather); - iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist); + iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather); } static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr, diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 0e1f791873fa..feb66d937c9c 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3204,6 +3204,14 @@ static int iommu_change_dev_def_domain(struct iommu_group *group, goto out; } + /* We can bring up a flush queue without tearing down the domain */ + if (type == IOMMU_DOMAIN_DMA_FQ && prev_dom->type == IOMMU_DOMAIN_DMA) { + ret = iommu_dma_init_fq(prev_dom); + if (!ret) + prev_dom->type = IOMMU_DOMAIN_DMA_FQ; + goto out; + } + /* Sets group->default_domain to the newly allocated domain */ ret = iommu_group_alloc_default_domain(dev->bus, group, type); if (ret) @@ -3244,9 +3252,9 @@ out: } /* - * Changing the default domain through sysfs requires the users to ubind the - * drivers from the devices in the iommu group. Return failure if this doesn't - * meet. + * Changing the default domain through sysfs requires the users to unbind the + * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ + * transition. Return failure if this isn't met. * * We need to consider the race between this and the device release path. * device_lock(dev) is used here to guarantee that the device release path @@ -3322,7 +3330,8 @@ static ssize_t iommu_group_store_type(struct iommu_group *group, /* Check if the device in the group still has a driver bound to it */ device_lock(dev); - if (device_is_bound(dev)) { + if (device_is_bound(dev) && !(req_type == IOMMU_DOMAIN_DMA_FQ && + group->default_domain->type == IOMMU_DOMAIN_DMA)) { pr_err_ratelimited("Device is still bound to driver\n"); ret = -EBUSY; goto out; diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 2ad73fb2e94e..0af42fb93a49 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -121,8 +121,6 @@ int init_iova_flush_queue(struct iova_domain *iovad, spin_lock_init(&fq->lock); } - smp_wmb(); - iovad->fq = queue; timer_setup(&iovad->fq_timer, fq_flush_timeout, 0); @@ -633,17 +631,20 @@ void queue_iova(struct iova_domain *iovad, unsigned long pfn, unsigned long pages, unsigned long data) { - struct iova_fq *fq = raw_cpu_ptr(iovad->fq); + struct iova_fq *fq; unsigned long flags; unsigned idx; /* * Order against the IOMMU driver's pagetable update from unmapping * @pte, to guarantee that iova_domain_flush() observes that if called - * from a different CPU before we release the lock below. + * from a different CPU before we release the lock below. Full barrier + * so it also pairs with iommu_dma_init_fq() to avoid seeing partially + * written fq state here. */ - smp_wmb(); + smp_mb(); + fq = raw_cpu_ptr(iovad->fq); spin_lock_irqsave(&fq->lock, flags); /* diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 758ca4694257..24607dc3c2ac 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -20,6 +20,7 @@ void iommu_put_dma_cookie(struct iommu_domain *domain); /* Setup call for arch DMA mapping code */ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit); +int iommu_dma_init_fq(struct iommu_domain *domain); /* The DMA API isn't _quite_ the whole story, though... */ /* @@ -54,6 +55,11 @@ static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, { } +static inline int iommu_dma_init_fq(struct iommu_domain *domain) +{ + return -EINVAL; +} + static inline int iommu_get_dma_cookie(struct iommu_domain *domain) { return -ENODEV; -- cgit v1.2.3 From 4d99efb229e63928c6b03a756a2e38cd4777fbe8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 18 Aug 2021 21:48:44 +0800 Subject: iommu/vt-d: Update the virtual command related registers The VT-d spec Revision 3.3 updated the virtual command registers, virtual command opcode B register, virtual command response register and virtual command capability register (Section 10.4.43, 10.4.44, 10.4.45, 10.4.46). This updates the virtual command interface implementation in the Intel IOMMU driver accordingly. Fixes: 24f27d32ab6b7 ("iommu/vt-d: Enlightened PASID allocation") Signed-off-by: Lu Baolu Cc: Ashok Raj Cc: Sanjay Kumar Cc: Kevin Tian Link: https://lore.kernel.org/r/20210713042649.3547403-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210818134852.1847070-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/pasid.h | 10 +++++----- include/linux/intel-iommu.h | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 5ff61c3d401f..8c2efb85fb3b 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -28,12 +28,12 @@ #define VCMD_CMD_ALLOC 0x1 #define VCMD_CMD_FREE 0x2 #define VCMD_VRSP_IP 0x1 -#define VCMD_VRSP_SC(e) (((e) >> 1) & 0x3) +#define VCMD_VRSP_SC(e) (((e) & 0xff) >> 1) #define VCMD_VRSP_SC_SUCCESS 0 -#define VCMD_VRSP_SC_NO_PASID_AVAIL 2 -#define VCMD_VRSP_SC_INVALID_PASID 2 -#define VCMD_VRSP_RESULT_PASID(e) (((e) >> 8) & 0xfffff) -#define VCMD_CMD_OPERAND(e) ((e) << 8) +#define VCMD_VRSP_SC_NO_PASID_AVAIL 16 +#define VCMD_VRSP_SC_INVALID_PASID 16 +#define VCMD_VRSP_RESULT_PASID(e) (((e) >> 16) & 0xfffff) +#define VCMD_CMD_OPERAND(e) ((e) << 16) /* * Domain ID reserved for pasid entries programmed for first-level * only and pass-through transfer modes. diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index d0fa0b31994d..05a65eb155f7 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -124,9 +124,9 @@ #define DMAR_MTRR_PHYSMASK8_REG 0x208 #define DMAR_MTRR_PHYSBASE9_REG 0x210 #define DMAR_MTRR_PHYSMASK9_REG 0x218 -#define DMAR_VCCAP_REG 0xe00 /* Virtual command capability register */ -#define DMAR_VCMD_REG 0xe10 /* Virtual command register */ -#define DMAR_VCRSP_REG 0xe20 /* Virtual command response register */ +#define DMAR_VCCAP_REG 0xe30 /* Virtual command capability register */ +#define DMAR_VCMD_REG 0xe00 /* Virtual command register */ +#define DMAR_VCRSP_REG 0xe10 /* Virtual command response register */ #define DMAR_IQER_REG_IQEI(reg) FIELD_GET(GENMASK_ULL(3, 0), reg) #define DMAR_IQER_REG_ITESID(reg) FIELD_GET(GENMASK_ULL(47, 32), reg) -- cgit v1.2.3 From 48811c44349ffbb778d3e36b53beb03ad43a979c Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 18 Aug 2021 21:48:49 +0800 Subject: iommu/vt-d: Allow devices to have more than 32 outstanding PRs The minimum per-IOMMU PRQ queue size is one 4K page, this is more entries than the hardcoded limit of 32 in the current VT-d code. Some devices can support up to 512 outstanding PRQs but underutilized by this limit of 32. Although, 32 gives some rough fairness when multiple devices share the same IOMMU PRQ queue, but far from optimal for customized use case. This extends the per-IOMMU PRQ queue size to four 4K pages and let the devices have as many outstanding page requests as they can. Signed-off-by: Jacob Pan Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210720013856.4143880-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210818134852.1847070-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 3 ++- drivers/iommu/intel/svm.c | 4 ---- include/linux/intel-svm.h | 5 +++++ 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 8d4d49e12c51..d75f59ae28e6 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1541,7 +1542,7 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info) if (info->pri_supported && (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) && - !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) + !pci_reset_pri(pdev) && !pci_enable_pri(pdev, PRQ_DEPTH)) info->pri_enabled = 1; #endif if (info->ats_supported && pci_ats_page_aligned(pdev) && diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 9b0f22bc0514..813438a07b62 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -31,8 +31,6 @@ static irqreturn_t prq_event_thread(int irq, void *d); static void intel_svm_drain_prq(struct device *dev, u32 pasid); #define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) -#define PRQ_ORDER 0 - static DEFINE_XARRAY_ALLOC(pasid_private_array); static int pasid_private_add(ioasid_t pasid, void *priv) { @@ -724,8 +722,6 @@ struct page_req_dsc { u64 priv_data[2]; }; -#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) - static bool is_canonical_address(u64 addr) { int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 10fa80eef13a..57cceecbe37f 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -14,6 +14,11 @@ #define SVM_REQ_EXEC (1<<1) #define SVM_REQ_PRIV (1<<0) +/* Page Request Queue depth */ +#define PRQ_ORDER 2 +#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) +#define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5) + /* * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only * for access to kernel addresses. No IOTLB flushes are automatically done -- cgit v1.2.3 From f7403abf5f06f407c50252e003f5fb332325147b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 20 Aug 2021 14:14:42 +0100 Subject: iommu/io-pgtable: Abstract iommu_iotlb_gather access Previously io-pgtable merely passed the iommu_iotlb_gather pointer through to helpers, but now it has grown its own direct dereference. This turns out to break the build for !IOMMU_API configs where the structure only has a dummy definition. It will probably also crash drivers who don't use the gather mechanism and simply pass in NULL. Wrap this dereference in a suitable helper which can both be stubbed out for !IOMMU_API and encapsulate a NULL check otherwise. Fixes: 7a7c5badf858 ("iommu: Indicate queued flushes via gather data") Reported-by: kernel test robot Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/83672ee76f6405c82845a55c148fa836f56fbbc1.1629465282.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-arm-v7s.c | 2 +- drivers/iommu/io-pgtable-arm.c | 2 +- include/linux/iommu.h | 10 ++++++++++ 3 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index e84478d39705..bfb6acb651e5 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -700,7 +700,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, ARM_V7S_BLOCK_SIZE(lvl + 1)); ptep = iopte_deref(pte[i], lvl, data); __arm_v7s_free_table(ptep, lvl + 1, data); - } else if (!gather->queued) { + } else if (!iommu_iotlb_gather_queued(gather)) { io_pgtable_tlb_add_page(iop, gather, iova, blk_size); } iova += blk_size; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 48a5bd8f571d..9697721f7e3a 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -638,7 +638,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, io_pgtable_tlb_flush_walk(iop, iova + i * size, size, ARM_LPAE_GRANULE(data)); __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); - } else if (!gather->queued) { + } else if (!iommu_iotlb_gather_queued(gather)) { io_pgtable_tlb_add_page(iop, gather, iova + i * size, size); } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 923a8d1c5e39..a23779c093c7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -548,6 +548,11 @@ static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain, gather->start = start; } +static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather) +{ + return gather && gather->queued; +} + /* PCI device grouping function */ extern struct iommu_group *pci_device_group(struct device *dev); /* Generic device grouping function */ @@ -896,6 +901,11 @@ static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain, { } +static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather) +{ + return false; +} + static inline void iommu_device_unregister(struct iommu_device *iommu) { } -- cgit v1.2.3