From 512881eacfa72c2136b27b9934b7b27504a9efc2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 18 Apr 2022 08:49:53 +0800 Subject: bus: platform,amba,fsl-mc,PCI: Add device DMA ownership management The devices on platform/amba/fsl-mc/PCI buses could be bound to drivers with the device DMA managed by kernel drivers or user-space applications. Unfortunately, multiple devices may be placed in the same IOMMU group because they cannot be isolated from each other. The DMA on these devices must either be entirely under kernel control or userspace control, never a mixture. Otherwise the driver integrity is not guaranteed because they could access each other through the peer-to-peer accesses which by-pass the IOMMU protection. This checks and sets the default DMA mode during driver binding, and cleanups during driver unbinding. In the default mode, the device DMA is managed by the device driver which handles DMA operations through the kernel DMA APIs (see Documentation/core-api/dma-api.rst). For cases where the devices are assigned for userspace control through the userspace driver framework(i.e. VFIO), the drivers(for example, vfio_pci/ vfio_platfrom etc.) may set a new flag (driver_managed_dma) to skip this default setting in the assumption that the drivers know what they are doing with the device DMA. Calling iommu_device_use_default_domain() before {of,acpi}_dma_configure is currently a problem. As things stand, the IOMMU driver ignored the initial iommu_probe_device() call when the device was added, since at that point it had no fwspec yet. In this situation, {of,acpi}_iommu_configure() are retriggering iommu_probe_device() after the IOMMU driver has seen the firmware data via .of_xlate to learn that it actually responsible for the given device. As the result, before that gets fixed, iommu_use_default_domain() goes at the end, and calls arch_teardown_dma_ops() if it fails. Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Stuart Yoder Cc: Laurentiu Tudor Signed-off-by: Lu Baolu Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jason Gunthorpe Reviewed-by: Robin Murphy Tested-by: Eric Auger Link: https://lore.kernel.org/r/20220418005000.897664-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/pci/pci-driver.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 4ceeb75fc899..f83f7fbac68f 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "pci.h" #include "pcie/portdrv.h" @@ -1601,6 +1602,7 @@ static int pci_bus_num_vf(struct device *dev) */ static int pci_dma_configure(struct device *dev) { + struct pci_driver *driver = to_pci_driver(dev->driver); struct device *bridge; int ret = 0; @@ -1616,9 +1618,24 @@ static int pci_dma_configure(struct device *dev) } pci_put_host_bridge_device(bridge); + + if (!ret && !driver->driver_managed_dma) { + ret = iommu_device_use_default_domain(dev); + if (ret) + arch_teardown_dma_ops(dev); + } + return ret; } +static void pci_dma_cleanup(struct device *dev) +{ + struct pci_driver *driver = to_pci_driver(dev->driver); + + if (!driver->driver_managed_dma) + iommu_device_unuse_default_domain(dev); +} + struct bus_type pci_bus_type = { .name = "pci", .match = pci_bus_match, @@ -1632,6 +1649,7 @@ struct bus_type pci_bus_type = { .pm = PCI_PM_OPS_PTR, .num_vf = pci_bus_num_vf, .dma_configure = pci_dma_configure, + .dma_cleanup = pci_dma_cleanup, }; EXPORT_SYMBOL(pci_bus_type); -- cgit v1.2.3 From 18c7a349d072a222ff80598373035820c194747b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 18 Apr 2022 08:49:54 +0800 Subject: PCI: pci_stub: Set driver_managed_dma The current VFIO implementation allows pci-stub driver to be bound to a PCI device with other devices in the same IOMMU group being assigned to userspace. The pci-stub driver has no dependencies on DMA or the IOVA mapping of the device, but it does prevent the user from having direct access to the device, which is useful in some circumstances. The pci_dma_configure() marks the iommu_group as containing only devices with kernel drivers that manage DMA. For compatibility with the VFIO usage, avoid this default behavior for the pci_stub. This allows the pci_stub still able to be used by the admin to block driver binding after applying the DMA ownership to VFIO. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20220418005000.897664-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/pci/pci-stub.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-stub.c b/drivers/pci/pci-stub.c index e408099fea52..d1f4c1ce7bd1 100644 --- a/drivers/pci/pci-stub.c +++ b/drivers/pci/pci-stub.c @@ -36,6 +36,7 @@ static struct pci_driver stub_driver = { .name = "pci-stub", .id_table = NULL, /* only dynamic id's */ .probe = pci_stub_probe, + .driver_managed_dma = true, }; static int __init pci_stub_init(void) -- cgit v1.2.3 From c7d469849747ce380785536173d08b30a820a83c Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 18 Apr 2022 08:49:55 +0800 Subject: PCI: portdrv: Set driver_managed_dma If a switch lacks ACS P2P Request Redirect, a device below the switch can bypass the IOMMU and DMA directly to other devices below the switch, so all the downstream devices must be in the same IOMMU group as the switch itself. The existing VFIO framework allows the portdrv driver to be bound to the bridge while its downstream devices are assigned to user space. The pci_dma_configure() marks the IOMMU group as containing only devices with kernel drivers that manage DMA. Avoid this default behavior for the portdrv driver in order for compatibility with the current VFIO usage. We achieve this by setting ".driver_managed_dma = true" in pci_driver structure. It is safe because the portdrv driver meets below criteria: - This driver doesn't use DMA, as you can't find any related calls like pci_set_master() or any kernel DMA API (dma_map_*() and etc.). - It doesn't use MMIO as you can't find ioremap() or similar calls. It's tolerant to userspace possibly also touching the same MMIO registers via P2P DMA access. Suggested-by: Jason Gunthorpe Suggested-by: Kevin Tian Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20220418005000.897664-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/pci/pcie/portdrv_pci.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 4b8801656ffb..7f8788a970ae 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -202,6 +202,8 @@ static struct pci_driver pcie_portdriver = { .err_handler = &pcie_portdrv_err_handler, + .driver_managed_dma = true, + .driver.pm = PCIE_PORTDRV_PM_OPS, }; -- cgit v1.2.3 From b8397a8f4ebc0b84eefd990dc08995ba2ae9015c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 9 May 2022 11:16:08 +0100 Subject: iommu/dma: Explicitly sort PCI DMA windows Originally, creating the dma_ranges resource list in pre-sorted fashion was the simplest and most efficient way to enforce the order required by iova_reserve_pci_windows(). However since then at least one PCI host driver is now re-sorting the list for its own probe-time processing, which doesn't seem entirely unreasonable, so that basic assumption no longer holds. Make iommu-dma robust and get the sort order it needs by explicitly sorting, which means we can also save the effort at creation time and just build the list in whatever natural order the DT had. Signed-off-by: Robin Murphy Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/35661036a7e4160850895f9b37f35408b6a29f2f.1652091160.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 13 ++++++++++++- drivers/pci/of.c | 8 +------- 2 files changed, 13 insertions(+), 8 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 8e0ed400a439..f90251572a5d 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -414,6 +415,15 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, return 0; } +static int iommu_dma_ranges_sort(void *priv, const struct list_head *a, + const struct list_head *b) +{ + struct resource_entry *res_a = list_entry(a, typeof(*res_a), node); + struct resource_entry *res_b = list_entry(b, typeof(*res_b), node); + + return res_a->res->start > res_b->res->start; +} + static int iova_reserve_pci_windows(struct pci_dev *dev, struct iova_domain *iovad) { @@ -432,6 +442,7 @@ static int iova_reserve_pci_windows(struct pci_dev *dev, } /* Get reserved DMA windows from host bridge */ + list_sort(NULL, &bridge->dma_ranges, iommu_dma_ranges_sort); resource_list_for_each_entry(window, &bridge->dma_ranges) { end = window->res->start - window->offset; resv_iova: @@ -440,7 +451,7 @@ resv_iova: hi = iova_pfn(iovad, end); reserve_iova(iovad, lo, hi); } else if (end < start) { - /* dma_ranges list should be sorted */ + /* DMA ranges should be non-overlapping */ dev_err(&dev->dev, "Failed to reserve IOVA [%pa-%pa]\n", &start, &end); diff --git a/drivers/pci/of.c b/drivers/pci/of.c index cb2e8351c2cc..8f0ebaf9ae85 100644 --- a/drivers/pci/of.c +++ b/drivers/pci/of.c @@ -369,7 +369,6 @@ static int devm_of_pci_get_host_bridge_resources(struct device *dev, dev_dbg(dev, "Parsing dma-ranges property...\n"); for_each_of_pci_range(&parser, &range) { - struct resource_entry *entry; /* * If we failed translation or got a zero-sized region * then skip this range @@ -393,12 +392,7 @@ static int devm_of_pci_get_host_bridge_resources(struct device *dev, goto failed; } - /* Keep the resource list sorted */ - resource_list_for_each_entry(entry, ib_resources) - if (entry->res->start > res->start) - break; - - pci_add_resource_offset(&entry->node, res, + pci_add_resource_offset(ib_resources, res, res->start - range.pci_addr); } -- cgit v1.2.3