From 4e50ce03976fbc8ae995a000c4b10c737467beaa Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 13 Mar 2019 10:03:17 +0100 Subject: iommu/amd: fix sg->dma_address for sg->offset bigger than PAGE_SIZE Take into account that sg->offset can be bigger than PAGE_SIZE when setting segment sg->dma_address. Otherwise sg->dma_address will point at diffrent page, what makes DMA not possible with erros like this: xhci_hcd 0000:38:00.3: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0000 address=0x00000000fdaa70c0 flags=0x0020] xhci_hcd 0000:38:00.3: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0000 address=0x00000000fdaa7040 flags=0x0020] xhci_hcd 0000:38:00.3: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0000 address=0x00000000fdaa7080 flags=0x0020] xhci_hcd 0000:38:00.3: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0000 address=0x00000000fdaa7100 flags=0x0020] xhci_hcd 0000:38:00.3: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0000 address=0x00000000fdaa7000 flags=0x0020] Additinally with wrong sg->dma_address unmap_sg will free wrong pages, what what can cause crashes like this: Feb 28 19:27:45 kernel: BUG: Bad page state in process cinnamon pfn:39e8b1 Feb 28 19:27:45 kernel: Disabling lock debugging due to kernel taint Feb 28 19:27:45 kernel: flags: 0x2ffff0000000000() Feb 28 19:27:45 kernel: raw: 02ffff0000000000 0000000000000000 ffffffff00000301 0000000000000000 Feb 28 19:27:45 kernel: raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 Feb 28 19:27:45 kernel: page dumped because: nonzero _refcount Feb 28 19:27:45 kernel: Modules linked in: ccm fuse arc4 nct6775 hwmon_vid amdgpu nls_iso8859_1 nls_cp437 edac_mce_amd vfat fat kvm_amd ccp rng_core kvm mt76x0u mt76x0_common mt76x02_usb irqbypass mt76_usb mt76x02_lib mt76 crct10dif_pclmul crc32_pclmul chash mac80211 amd_iommu_v2 ghash_clmulni_intel gpu_sched i2c_algo_bit ttm wmi_bmof snd_hda_codec_realtek snd_hda_codec_generic drm_kms_helper snd_hda_codec_hdmi snd_hda_intel drm snd_hda_codec aesni_intel snd_hda_core snd_hwdep aes_x86_64 crypto_simd snd_pcm cfg80211 cryptd mousedev snd_timer glue_helper pcspkr r8169 input_leds realtek agpgart libphy rfkill snd syscopyarea sysfillrect sysimgblt fb_sys_fops soundcore sp5100_tco k10temp i2c_piix4 wmi evdev gpio_amdpt pinctrl_amd mac_hid pcc_cpufreq acpi_cpufreq sg ip_tables x_tables ext4(E) crc32c_generic(E) crc16(E) mbcache(E) jbd2(E) fscrypto(E) sd_mod(E) hid_generic(E) usbhid(E) hid(E) dm_mod(E) serio_raw(E) atkbd(E) libps2(E) crc32c_intel(E) ahci(E) libahci(E) libata(E) xhci_pci(E) xhci_hcd(E) Feb 28 19:27:45 kernel: scsi_mod(E) i8042(E) serio(E) bcache(E) crc64(E) Feb 28 19:27:45 kernel: CPU: 2 PID: 896 Comm: cinnamon Tainted: G B W E 4.20.12-arch1-1-custom #1 Feb 28 19:27:45 kernel: Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./B450M Pro4, BIOS P1.20 06/26/2018 Feb 28 19:27:45 kernel: Call Trace: Feb 28 19:27:45 kernel: dump_stack+0x5c/0x80 Feb 28 19:27:45 kernel: bad_page.cold.29+0x7f/0xb2 Feb 28 19:27:45 kernel: __free_pages_ok+0x2c0/0x2d0 Feb 28 19:27:45 kernel: skb_release_data+0x96/0x180 Feb 28 19:27:45 kernel: __kfree_skb+0xe/0x20 Feb 28 19:27:45 kernel: tcp_recvmsg+0x894/0xc60 Feb 28 19:27:45 kernel: ? reuse_swap_page+0x120/0x340 Feb 28 19:27:45 kernel: ? ptep_set_access_flags+0x23/0x30 Feb 28 19:27:45 kernel: inet_recvmsg+0x5b/0x100 Feb 28 19:27:45 kernel: __sys_recvfrom+0xc3/0x180 Feb 28 19:27:45 kernel: ? handle_mm_fault+0x10a/0x250 Feb 28 19:27:45 kernel: ? syscall_trace_enter+0x1d3/0x2d0 Feb 28 19:27:45 kernel: ? __audit_syscall_exit+0x22a/0x290 Feb 28 19:27:45 kernel: __x64_sys_recvfrom+0x24/0x30 Feb 28 19:27:45 kernel: do_syscall_64+0x5b/0x170 Feb 28 19:27:45 kernel: entry_SYSCALL_64_after_hwframe+0x44/0xa9 Cc: stable@vger.kernel.org Reported-and-tested-by: Jan Viktorin Reviewed-by: Alexander Duyck Signed-off-by: Stanislaw Gruszka Fixes: 80187fd39dcb ('iommu/amd: Optimize map_sg and unmap_sg') Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b319e51c379b..21cb088d6687 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2608,7 +2608,12 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, /* Everything is mapped - write the right values into s->dma_address */ for_each_sg(sglist, s, nelems, i) { - s->dma_address += address + s->offset; + /* + * Add in the remaining piece of the scatter-gather offset that + * was masked out when we were determining the physical address + * via (sg_phys(s) & PAGE_MASK) earlier. + */ + s->dma_address += address + (s->offset & ~PAGE_MASK); s->dma_length = s->length; } -- cgit v1.2.3 From 80ef4464d5e27408685e609d389663aad46644b9 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 20 Mar 2019 18:57:23 +0000 Subject: iommu/iova: Fix tracking of recently failed iova address If a 32 bit allocation request is too big to possibly succeed, it early exits with a failure and then should never update max32_alloc_ size. This patch fixes current code, now the size is only updated if the slow path failed while walking the tree. Without the fix the allocation may enter the slow path again even if there was a failure before of a request with the same or a smaller size. Cc: # 4.20+ Fixes: bee60e94a1e2 ("iommu/iova: Optimise attempts to allocate iova from 32bit address range") Reviewed-by: Robin Murphy Signed-off-by: Robert Richter Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index f8d3ba247523..2de8122e218f 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -207,8 +207,10 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, curr_iova = rb_entry(curr, struct iova, node); } while (curr && new_pfn <= curr_iova->pfn_hi); - if (limit_pfn < size || new_pfn < iovad->start_pfn) + if (limit_pfn < size || new_pfn < iovad->start_pfn) { + iovad->max32_alloc_size = size; goto iova32_full; + } /* pfn_lo will point to size aligned address if size_aligned is set */ new->pfn_lo = new_pfn; @@ -222,7 +224,6 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, return 0; iova32_full: - iovad->max32_alloc_size = size; spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); return -ENOMEM; } -- cgit v1.2.3 From 5bb71fc790a88d063507dc5d445ab8b14e845591 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 20 Mar 2019 09:58:33 +0800 Subject: iommu/vt-d: Check capability before disabling protected memory The spec states in 10.4.16 that the Protected Memory Enable Register should be treated as read-only for implementations not supporting protected memory regions (PLMR and PHMR fields reported as Clear in the Capability register). Cc: Jacob Pan Cc: mark gross Suggested-by: Ashok Raj Fixes: f8bab73515ca5 ("intel-iommu: PMEN support") Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 87274b54febd..f002d47d2f27 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1538,6 +1538,9 @@ static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) u32 pmen; unsigned long flags; + if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap)) + return; + raw_spin_lock_irqsave(&iommu->register_lock, flags); pmen = readl(iommu->reg + DMAR_PMEN_REG); pmen &= ~DMA_PMEN_EPM; -- cgit v1.2.3 From 84c11e4df5aa4955acaa441f0cf1cb2e50daf64b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 20 Mar 2019 09:58:34 +0800 Subject: iommu/vt-d: Save the right domain ID used by hardware The driver sets a default domain id (FLPT_DEFAULT_DID) in the first level only pasid entry, but saves a different domain id in @sdev->did. The value saved in @sdev->did will be used to invalidate the translation caches. Hence, the driver might result in invalidating the caches with a wrong domain id. Cc: Ashok Raj Cc: Jacob Pan Fixes: 1c4f88b7f1f92 ("iommu/vt-d: Shared virtual address in scalable mode") Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index f002d47d2f27..28cb713d728c 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5335,7 +5335,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd ctx_lo = context[0].lo; - sdev->did = domain->iommu_did[iommu->seq_id]; + sdev->did = FLPT_DEFAULT_DID; sdev->sid = PCI_DEVID(info->bus, info->devfn); if (!(ctx_lo & CONTEXT_PASIDE)) { -- cgit v1.2.3 From 8bc32a285660e13fdcf92ddaf5b8653abe112040 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 Mar 2019 16:52:17 +0100 Subject: iommu: Don't print warning when IOMMU driver only supports unmanaged domains Print the warning about the fall-back to IOMMU_DOMAIN_DMA in iommu_group_get_for_dev() only when such a domain was actually allocated. Otherwise the user will get misleading warnings in the kernel log when the iommu driver used doesn't support IOMMU_DOMAIN_DMA and IOMMU_DOMAIN_IDENTITY. Fixes: fccb4e3b8ab09 ('iommu: Allow default domain type to be set on the kernel command line') Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 33a982e33716..109de67d5d72 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1105,10 +1105,12 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) dom = __iommu_domain_alloc(dev->bus, iommu_def_domain_type); if (!dom && iommu_def_domain_type != IOMMU_DOMAIN_DMA) { - dev_warn(dev, - "failed to allocate default IOMMU domain of type %u; falling back to IOMMU_DOMAIN_DMA", - iommu_def_domain_type); dom = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_DMA); + if (dom) { + dev_warn(dev, + "failed to allocate default IOMMU domain of type %u; falling back to IOMMU_DOMAIN_DMA", + iommu_def_domain_type); + } } group->default_domain = dom; -- cgit v1.2.3 From 8aafaaf2212192012f5bae305bb31cdf7681d777 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 28 Mar 2019 11:44:59 +0100 Subject: iommu/amd: Reserve exclusion range in iova-domain If a device has an exclusion range specified in the IVRS table, this region needs to be reserved in the iova-domain of that device. This hasn't happened until now and can cause data corruption on data transfered with these devices. Treat exclusion ranges as reserved regions in the iommu-core to fix the problem. Fixes: be2a022c0dd0 ('x86, AMD IOMMU: add functions to parse IOMMU memory mapping requirements for devices') Signed-off-by: Joerg Roedel Reviewed-by: Gary R Hook --- drivers/iommu/amd_iommu.c | 9 ++++++--- drivers/iommu/amd_iommu_init.c | 7 ++++--- drivers/iommu/amd_iommu_types.h | 2 ++ 3 files changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 21cb088d6687..f7cdd2ab7f11 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3169,21 +3169,24 @@ static void amd_iommu_get_resv_regions(struct device *dev, return; list_for_each_entry(entry, &amd_iommu_unity_map, list) { + int type, prot = 0; size_t length; - int prot = 0; if (devid < entry->devid_start || devid > entry->devid_end) continue; + type = IOMMU_RESV_DIRECT; length = entry->address_end - entry->address_start; if (entry->prot & IOMMU_PROT_IR) prot |= IOMMU_READ; if (entry->prot & IOMMU_PROT_IW) prot |= IOMMU_WRITE; + if (entry->prot & IOMMU_UNITY_MAP_FLAG_EXCL_RANGE) + /* Exclusion range */ + type = IOMMU_RESV_RESERVED; region = iommu_alloc_resv_region(entry->address_start, - length, prot, - IOMMU_RESV_DIRECT); + length, prot, type); if (!region) { dev_err(dev, "Out of memory allocating dm-regions\n"); return; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index f773792d77fd..1b1378619fc9 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -2013,6 +2013,9 @@ static int __init init_unity_map_range(struct ivmd_header *m) if (e == NULL) return -ENOMEM; + if (m->flags & IVMD_FLAG_EXCL_RANGE) + init_exclusion_range(m); + switch (m->type) { default: kfree(e); @@ -2059,9 +2062,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table) while (p < end) { m = (struct ivmd_header *)p; - if (m->flags & IVMD_FLAG_EXCL_RANGE) - init_exclusion_range(m); - else if (m->flags & IVMD_FLAG_UNITY_MAP) + if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE)) init_unity_map_range(m); p += m->length; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index eae0741f72dc..87965e4d9647 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -374,6 +374,8 @@ #define IOMMU_PROT_IR 0x01 #define IOMMU_PROT_IW 0x02 +#define IOMMU_UNITY_MAP_FLAG_EXCL_RANGE (1 << 2) + /* IOMMU capabilities */ #define IOMMU_CAP_IOTLB 24 #define IOMMU_CAP_NPCACHE 26 -- cgit v1.2.3 From 0a352554da69b02f75ca3389c885c741f1f63235 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Thu, 28 Mar 2019 20:43:46 -0700 Subject: iommu/io-pgtable-arm-v7s: request DMA32 memory, and improve debugging IOMMUs using ARMv7 short-descriptor format require page tables (level 1 and 2) to be allocated within the first 4GB of RAM, even on 64-bit systems. For level 1/2 pages, ensure GFP_DMA32 is used if CONFIG_ZONE_DMA32 is defined (e.g. on arm64 platforms). For level 2 pages, allocate a slab cache in SLAB_CACHE_DMA32. Note that we do not explicitly pass GFP_DMA[32] to kmem_cache_zalloc, as this is not strictly necessary, and would cause a warning in mm/sl*b.c, as we did not update GFP_SLAB_BUG_MASK. Also, print an error when the physical address does not fit in 32-bit, to make debugging easier in the future. Link: http://lkml.kernel.org/r/20181210011504.122604-3-drinkcat@chromium.org Fixes: ad67f5a6545f ("arm64: replace ZONE_DMA with ZONE_DMA32") Signed-off-by: Nicolas Boichat Acked-by: Will Deacon Cc: Christoph Hellwig Cc: Christoph Lameter Cc: David Rientjes Cc: Hsin-Yi Wang Cc: Huaisheng Ye Cc: Joerg Roedel Cc: Joonsoo Kim Cc: Matthew Wilcox Cc: Matthias Brugger Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Rapoport Cc: Pekka Enberg Cc: Robin Murphy Cc: Sasha Levin Cc: Tomasz Figa Cc: Vlastimil Babka Cc: Yingjoe Chen Cc: Yong Wu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/iommu/io-pgtable-arm-v7s.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index f101afc315ab..9a8a8870e267 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -160,6 +160,14 @@ #define ARM_V7S_TCR_PD1 BIT(5) +#ifdef CONFIG_ZONE_DMA32 +#define ARM_V7S_TABLE_GFP_DMA GFP_DMA32 +#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA32 +#else +#define ARM_V7S_TABLE_GFP_DMA GFP_DMA +#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA +#endif + typedef u32 arm_v7s_iopte; static bool selftest_running; @@ -197,13 +205,16 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, void *table = NULL; if (lvl == 1) - table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size)); + table = (void *)__get_free_pages( + __GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size)); else if (lvl == 2) - table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA); + table = kmem_cache_zalloc(data->l2_tables, gfp); phys = virt_to_phys(table); - if (phys != (arm_v7s_iopte)phys) + if (phys != (arm_v7s_iopte)phys) { /* Doesn't fit in PTE */ + dev_err(dev, "Page table does not fit in PTE: %pa", &phys); goto out_free; + } if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) @@ -733,7 +744,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2", ARM_V7S_TABLE_SIZE(2), ARM_V7S_TABLE_SIZE(2), - SLAB_CACHE_DMA, NULL); + ARM_V7S_TABLE_SLAB_FLAGS, NULL); if (!data->l2_tables) goto out_free_data; -- cgit v1.2.3 From a3a195929d40b38833ffd0f82b2db2cc898641eb Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 25 Mar 2019 09:30:28 +0800 Subject: iommu: Add APIs for multiple domains per device Sharing a physical PCI device in a finer-granularity way is becoming a consensus in the industry. IOMMU vendors are also engaging efforts to support such sharing as well as possible. Among the efforts, the capability of support finer-granularity DMA isolation is a common requirement due to the security consideration. With finer-granularity DMA isolation, subsets of a PCI function can be isolated from each others by the IOMMU. As a result, there is a request in software to attach multiple domains to a physical PCI device. One example of such use model is the Intel Scalable IOV [1] [2]. The Intel vt-d 3.0 spec [3] introduces the scalable mode which enables PASID granularity DMA isolation. This adds the APIs to support multiple domains per device. In order to ease the discussions, we call it 'a domain in auxiliary mode' or simply 'auxiliary domain' when multiple domains are attached to a physical device. The APIs include: * iommu_dev_has_feature(dev, IOMMU_DEV_FEAT_AUX) - Detect both IOMMU and PCI endpoint devices supporting the feature (aux-domain here) without the host driver dependency. * iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX) - Check the enabling status of the feature (aux-domain here). The aux-domain interfaces are available only if this returns true. * iommu_dev_enable/disable_feature(dev, IOMMU_DEV_FEAT_AUX) - Enable/disable device specific aux-domain feature. * iommu_aux_attach_device(domain, dev) - Attaches @domain to @dev in the auxiliary mode. Multiple domains could be attached to a single device in the auxiliary mode with each domain representing an isolated address space for an assignable subset of the device. * iommu_aux_detach_device(domain, dev) - Detach @domain which has been attached to @dev in the auxiliary mode. * iommu_aux_get_pasid(domain, dev) - Return ID used for finer-granularity DMA translation. For the Intel Scalable IOV usage model, this will be a PASID. The device which supports Scalable IOV needs to write this ID to the device register so that DMA requests could be tagged with a right PASID prefix. This has been updated with the latest proposal from Joerg posted here [5]. Many people involved in discussions of this design. Kevin Tian Liu Yi L Ashok Raj Sanjay Kumar Jacob Pan Alex Williamson Jean-Philippe Brucker Joerg Roedel and some discussions can be found here [4] [5]. [1] https://software.intel.com/en-us/download/intel-scalable-io-virtualization-technical-specification [2] https://schd.ws/hosted_files/lc32018/00/LC3-SIOV-final.pdf [3] https://software.intel.com/en-us/download/intel-virtualization-technology-for-directed-io-architecture-specification [4] https://lkml.org/lkml/2018/7/26/4 [5] https://www.spinics.net/lists/iommu/msg31874.html Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Suggested-by: Kevin Tian Suggested-by: Jean-Philippe Brucker Suggested-by: Joerg Roedel Signed-off-by: Lu Baolu Reviewed-by: Jean-Philippe Brucker Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/iommu.h | 70 +++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 109de67d5d72..344e27e8f188 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2039,3 +2039,99 @@ int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) return 0; } EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); + +/* + * Per device IOMMU features. + */ +bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (ops && ops->dev_has_feat) + return ops->dev_has_feat(dev, feat); + + return false; +} +EXPORT_SYMBOL_GPL(iommu_dev_has_feature); + +int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (ops && ops->dev_enable_feat) + return ops->dev_enable_feat(dev, feat); + + return -ENODEV; +} +EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); + +/* + * The device drivers should do the necessary cleanups before calling this. + * For example, before disabling the aux-domain feature, the device driver + * should detach all aux-domains. Otherwise, this will return -EBUSY. + */ +int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (ops && ops->dev_disable_feat) + return ops->dev_disable_feat(dev, feat); + + return -EBUSY; +} +EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); + +bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (ops && ops->dev_feat_enabled) + return ops->dev_feat_enabled(dev, feat); + + return false; +} +EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled); + +/* + * Aux-domain specific attach/detach. + * + * Only works if iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX) returns + * true. Also, as long as domains are attached to a device through this + * interface, any tries to call iommu_attach_device() should fail + * (iommu_detach_device() can't fail, so we fail when trying to re-attach). + * This should make us safe against a device being attached to a guest as a + * whole while there are still pasid users on it (aux and sva). + */ +int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev) +{ + int ret = -ENODEV; + + if (domain->ops->aux_attach_dev) + ret = domain->ops->aux_attach_dev(domain, dev); + + if (!ret) + trace_attach_device_to_domain(dev); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_aux_attach_device); + +void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev) +{ + if (domain->ops->aux_detach_dev) { + domain->ops->aux_detach_dev(domain, dev); + trace_detach_device_from_domain(dev); + } +} +EXPORT_SYMBOL_GPL(iommu_aux_detach_device); + +int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) +{ + int ret = -ENODEV; + + if (domain->ops->aux_get_pasid) + ret = domain->ops->aux_get_pasid(domain, dev); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_aux_get_pasid); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ffbbc7e39cee..8239ece9fdfc 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -156,6 +156,11 @@ struct iommu_resv_region { enum iommu_resv_type type; }; +/* Per device IOMMU features */ +enum iommu_dev_features { + IOMMU_DEV_FEAT_AUX, /* Aux-domain feature */ +}; + #ifdef CONFIG_IOMMU_API /** @@ -186,6 +191,11 @@ struct iommu_resv_region { * @of_xlate: add OF master IDs to iommu grouping * @is_attach_deferred: Check if domain attach should be deferred from iommu * driver init to device driver init (default no) + * @dev_has/enable/disable_feat: per device entries to check/enable/disable + * iommu specific features. + * @dev_feat_enabled: check enabled feature + * @aux_attach/detach_dev: aux-domain specific attach/detach entries. + * @aux_get_pasid: get the pasid given an aux-domain * @pgsize_bitmap: bitmap of all possible supported page sizes */ struct iommu_ops { @@ -230,6 +240,17 @@ struct iommu_ops { int (*of_xlate)(struct device *dev, struct of_phandle_args *args); bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev); + /* Per device IOMMU features */ + bool (*dev_has_feat)(struct device *dev, enum iommu_dev_features f); + bool (*dev_feat_enabled)(struct device *dev, enum iommu_dev_features f); + int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); + int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); + + /* Aux-domain specific attach/detach entries */ + int (*aux_attach_dev)(struct iommu_domain *domain, struct device *dev); + void (*aux_detach_dev)(struct iommu_domain *domain, struct device *dev); + int (*aux_get_pasid)(struct iommu_domain *domain, struct device *dev); + unsigned long pgsize_bitmap; }; @@ -416,6 +437,14 @@ static inline void dev_iommu_fwspec_set(struct device *dev, int iommu_probe_device(struct device *dev); void iommu_release_device(struct device *dev); +bool iommu_dev_has_feature(struct device *dev, enum iommu_dev_features f); +int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); +int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); +bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f); +int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev); +void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev); +int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev); + #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; @@ -700,6 +729,47 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) return NULL; } +static inline bool +iommu_dev_has_feature(struct device *dev, enum iommu_dev_features feat) +{ + return false; +} + +static inline bool +iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat) +{ + return false; +} + +static inline int +iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) +{ + return -ENODEV; +} + +static inline int +iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) +{ + return -ENODEV; +} + +static inline int +iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev) +{ + return -ENODEV; +} + +static inline void +iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev) +{ +} + +static inline int +iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) +{ + return -ENODEV; +} + #endif /* CONFIG_IOMMU_API */ #ifdef CONFIG_IOMMU_DEBUGFS -- cgit v1.2.3 From 26b25a2b98e45aeb40eedcedc586ad5034cbd984 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 10 Apr 2019 16:15:16 +0100 Subject: iommu: Bind process address spaces to devices Add bind() and unbind() operations to the IOMMU API. iommu_sva_bind_device() binds a device to an mm, and returns a handle to the bond, which is released by calling iommu_sva_unbind_device(). Each mm bound to devices gets a PASID (by convention, a 20-bit system-wide ID representing the address space), which can be retrieved with iommu_sva_get_pasid(). When programming DMA addresses, device drivers include this PASID in a device-specific manner, to let the device access the given address space. Since the process memory may be paged out, device and IOMMU must support I/O page faults (e.g. PCI PRI). Using iommu_sva_set_ops(), device drivers provide an mm_exit() callback that is called by the IOMMU driver if the process exits before the device driver called unbind(). In mm_exit(), device driver should disable DMA from the given context, so that the core IOMMU can reallocate the PASID. Whether the process exited or nor, the device driver should always release the handle with unbind(). To use these functions, device driver must first enable the IOMMU_DEV_FEAT_SVA device feature with iommu_dev_enable_feature(). Signed-off-by: Jean-Philippe Brucker Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/iommu.h | 70 +++++++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 344e27e8f188..f8fe112e507a 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2135,3 +2135,107 @@ int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) return ret; } EXPORT_SYMBOL_GPL(iommu_aux_get_pasid); + +/** + * iommu_sva_bind_device() - Bind a process address space to a device + * @dev: the device + * @mm: the mm to bind, caller must hold a reference to it + * + * Create a bond between device and address space, allowing the device to access + * the mm using the returned PASID. If a bond already exists between @device and + * @mm, it is returned and an additional reference is taken. Caller must call + * iommu_sva_unbind_device() to release each reference. + * + * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to + * initialize the required SVA features. + * + * On error, returns an ERR_PTR value. + */ +struct iommu_sva * +iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) +{ + struct iommu_group *group; + struct iommu_sva *handle = ERR_PTR(-EINVAL); + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (!ops || !ops->sva_bind) + return ERR_PTR(-ENODEV); + + group = iommu_group_get(dev); + if (!group) + return ERR_PTR(-ENODEV); + + /* Ensure device count and domain don't change while we're binding */ + mutex_lock(&group->mutex); + + /* + * To keep things simple, SVA currently doesn't support IOMMU groups + * with more than one device. Existing SVA-capable systems are not + * affected by the problems that required IOMMU groups (lack of ACS + * isolation, device ID aliasing and other hardware issues). + */ + if (iommu_group_device_count(group) != 1) + goto out_unlock; + + handle = ops->sva_bind(dev, mm, drvdata); + +out_unlock: + mutex_unlock(&group->mutex); + iommu_group_put(group); + + return handle; +} +EXPORT_SYMBOL_GPL(iommu_sva_bind_device); + +/** + * iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device + * @handle: the handle returned by iommu_sva_bind_device() + * + * Put reference to a bond between device and address space. The device should + * not be issuing any more transaction for this PASID. All outstanding page + * requests for this PASID must have been flushed to the IOMMU. + * + * Returns 0 on success, or an error value + */ +void iommu_sva_unbind_device(struct iommu_sva *handle) +{ + struct iommu_group *group; + struct device *dev = handle->dev; + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (!ops || !ops->sva_unbind) + return; + + group = iommu_group_get(dev); + if (!group) + return; + + mutex_lock(&group->mutex); + ops->sva_unbind(handle); + mutex_unlock(&group->mutex); + + iommu_group_put(group); +} +EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); + +int iommu_sva_set_ops(struct iommu_sva *handle, + const struct iommu_sva_ops *sva_ops) +{ + if (handle->ops && handle->ops != sva_ops) + return -EEXIST; + + handle->ops = sva_ops; + return 0; +} +EXPORT_SYMBOL_GPL(iommu_sva_set_ops); + +int iommu_sva_get_pasid(struct iommu_sva *handle) +{ + const struct iommu_ops *ops = handle->dev->bus->iommu_ops; + + if (!ops || !ops->sva_get_pasid) + return IOMMU_PASID_INVALID; + + return ops->sva_get_pasid(handle); +} +EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 8239ece9fdfc..480921dfbadf 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -48,6 +48,7 @@ struct bus_type; struct device; struct iommu_domain; struct notifier_block; +struct iommu_sva; /* iommu fault flags */ #define IOMMU_FAULT_READ 0x0 @@ -55,6 +56,8 @@ struct notifier_block; typedef int (*iommu_fault_handler_t)(struct iommu_domain *, struct device *, unsigned long, int, void *); +typedef int (*iommu_mm_exit_handler_t)(struct device *dev, struct iommu_sva *, + void *); struct iommu_domain_geometry { dma_addr_t aperture_start; /* First address that can be mapped */ @@ -159,6 +162,28 @@ struct iommu_resv_region { /* Per device IOMMU features */ enum iommu_dev_features { IOMMU_DEV_FEAT_AUX, /* Aux-domain feature */ + IOMMU_DEV_FEAT_SVA, /* Shared Virtual Addresses */ +}; + +#define IOMMU_PASID_INVALID (-1U) + +/** + * struct iommu_sva_ops - device driver callbacks for an SVA context + * + * @mm_exit: called when the mm is about to be torn down by exit_mmap. After + * @mm_exit returns, the device must not issue any more transaction + * with the PASID given as argument. + * + * The @mm_exit handler is allowed to sleep. Be careful about the + * locks taken in @mm_exit, because they might lead to deadlocks if + * they are also held when dropping references to the mm. Consider the + * following call chain: + * mutex_lock(A); mmput(mm) -> exit_mm() -> @mm_exit() -> mutex_lock(A) + * Using mmput_async() prevents this scenario. + * + */ +struct iommu_sva_ops { + iommu_mm_exit_handler_t mm_exit; }; #ifdef CONFIG_IOMMU_API @@ -196,6 +221,9 @@ enum iommu_dev_features { * @dev_feat_enabled: check enabled feature * @aux_attach/detach_dev: aux-domain specific attach/detach entries. * @aux_get_pasid: get the pasid given an aux-domain + * @sva_bind: Bind process address space to device + * @sva_unbind: Unbind process address space from device + * @sva_get_pasid: Get PASID associated to a SVA handle * @pgsize_bitmap: bitmap of all possible supported page sizes */ struct iommu_ops { @@ -251,6 +279,11 @@ struct iommu_ops { void (*aux_detach_dev)(struct iommu_domain *domain, struct device *dev); int (*aux_get_pasid)(struct iommu_domain *domain, struct device *dev); + struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm, + void *drvdata); + void (*sva_unbind)(struct iommu_sva *handle); + int (*sva_get_pasid)(struct iommu_sva *handle); + unsigned long pgsize_bitmap; }; @@ -417,6 +450,14 @@ struct iommu_fwspec { u32 ids[1]; }; +/** + * struct iommu_sva - handle to a device-mm bond + */ +struct iommu_sva { + struct device *dev; + const struct iommu_sva_ops *ops; +}; + int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, const struct iommu_ops *ops); void iommu_fwspec_free(struct device *dev); @@ -445,6 +486,14 @@ int iommu_aux_attach_device(struct iommu_domain *domain, struct device *dev); void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev); int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev); +struct iommu_sva *iommu_sva_bind_device(struct device *dev, + struct mm_struct *mm, + void *drvdata); +void iommu_sva_unbind_device(struct iommu_sva *handle); +int iommu_sva_set_ops(struct iommu_sva *handle, + const struct iommu_sva_ops *ops); +int iommu_sva_get_pasid(struct iommu_sva *handle); + #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; @@ -770,6 +819,27 @@ iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) return -ENODEV; } +static inline struct iommu_sva * +iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) +{ + return NULL; +} + +static inline void iommu_sva_unbind_device(struct iommu_sva *handle) +{ +} + +static inline int iommu_sva_set_ops(struct iommu_sva *handle, + const struct iommu_sva_ops *ops) +{ + return -EINVAL; +} + +static inline int iommu_sva_get_pasid(struct iommu_sva *handle) +{ + return IOMMU_PASID_INVALID; +} + #endif /* CONFIG_IOMMU_API */ #ifdef CONFIG_IOMMU_DEBUGFS -- cgit v1.2.3