diff options
56 files changed, 2172 insertions, 781 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index cb89dbdedc46..2dda5ada4e1a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -301,6 +301,9 @@ allowed anymore to lift isolation requirements as needed. This option does not override iommu=pt + force_enable - Force enable the IOMMU on platforms known + to be buggy with IOMMU enabled. Use this + option with care. amd_iommu_dump= [HW,X86-64] Enable AMD IOMMU driver option to dump the ACPI table @@ -1987,7 +1990,7 @@ forcing Dual Address Cycle for PCI cards supporting greater than 32-bit addressing. - iommu.strict= [ARM64] Configure TLB invalidation behaviour + iommu.strict= [ARM64, X86] Configure TLB invalidation behaviour Format: { "0" | "1" } 0 - Lazy mode. Request that DMA unmap operations use deferred @@ -1998,6 +2001,10 @@ 1 - Strict mode (default). DMA unmap operations invalidate IOMMU hardware TLBs synchronously. + Note: on x86, the default behaviour depends on the + equivalent driver-specific parameters, but a strict + mode explicitly specified by either method takes + precedence. iommu.passthrough= [ARM64, X86] Configure DMA to bypass the IOMMU by default. diff --git a/Documentation/devicetree/bindings/iommu/iommu.txt b/Documentation/devicetree/bindings/iommu/iommu.txt index 3c36334e4f94..26ba9e530f13 100644 --- a/Documentation/devicetree/bindings/iommu/iommu.txt +++ b/Documentation/devicetree/bindings/iommu/iommu.txt @@ -92,6 +92,24 @@ Optional properties: tagging DMA transactions with an address space identifier. By default, this is 0, which means that the device only has one address space. +- dma-can-stall: When present, the master can wait for a transaction to + complete for an indefinite amount of time. Upon translation fault some + IOMMUs, instead of aborting the translation immediately, may first + notify the driver and keep the transaction in flight. This allows the OS + to inspect the fault and, for example, make physical pages resident + before updating the mappings and completing the transaction. Such IOMMU + accepts a limited number of simultaneous stalled transactions before + having to either put back-pressure on the master, or abort new faulting + transactions. + + Firmware has to opt-in stalling, because most buses and masters don't + support it. In particular it isn't compatible with PCI, where + transactions have to complete before a time limit. More generally it + won't work in systems and masters that haven't been designed for + stalling. For example the OS, in order to handle a stalled transaction, + may attempt to retrieve pages from secondary storage in a stalled + domain, leading to a deadlock. + Notes: ====== diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt deleted file mode 100644 index 6ecefea1c6f9..000000000000 --- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt +++ /dev/null @@ -1,38 +0,0 @@ -Rockchip IOMMU -============== - -A Rockchip DRM iommu translates io virtual addresses to physical addresses for -its master device. Each slave device is bound to a single master device, and -shares its clocks, power domain and irq. - -Required properties: -- compatible : Should be "rockchip,iommu" -- reg : Address space for the configuration registers -- interrupts : Interrupt specifier for the IOMMU instance -- interrupt-names : Interrupt name for the IOMMU instance -- #iommu-cells : Should be <0>. This indicates the iommu is a - "single-master" device, and needs no additional information - to associate with its master device. See: - Documentation/devicetree/bindings/iommu/iommu.txt -- clocks : A list of clocks required for the IOMMU to be accessible by - the host CPU. -- clock-names : Should contain the following: - "iface" - Main peripheral bus clock (PCLK/HCL) (required) - "aclk" - AXI bus clock (required) - -Optional properties: -- rockchip,disable-mmu-reset : Don't use the mmu reset operation. - Some mmu instances may produce unexpected results - when the reset operation is used. - -Example: - - vopl_mmu: iommu@ff940300 { - compatible = "rockchip,iommu"; - reg = <0xff940300 0x100>; - interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "vopl_mmu"; - clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>; - clock-names = "aclk", "iface"; - #iommu-cells = <0>; - }; diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml new file mode 100644 index 000000000000..d2e28a9e3545 --- /dev/null +++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml @@ -0,0 +1,85 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iommu/rockchip,iommu.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Rockchip IOMMU + +maintainers: + - Heiko Stuebner <heiko@sntech.de> + +description: |+ + A Rockchip DRM iommu translates io virtual addresses to physical addresses for + its master device. Each slave device is bound to a single master device and + shares its clocks, power domain and irq. + + For information on assigning IOMMU controller to its peripheral devices, + see generic IOMMU bindings. + +properties: + compatible: + enum: + - rockchip,iommu + - rockchip,rk3568-iommu + + reg: + items: + - description: configuration registers for MMU instance 0 + - description: configuration registers for MMU instance 1 + minItems: 1 + maxItems: 2 + + interrupts: + items: + - description: interruption for MMU instance 0 + - description: interruption for MMU instance 1 + minItems: 1 + maxItems: 2 + + clocks: + items: + - description: Core clock + - description: Interface clock + + clock-names: + items: + - const: aclk + - const: iface + + "#iommu-cells": + const: 0 + + power-domains: + maxItems: 1 + + rockchip,disable-mmu-reset: + $ref: /schemas/types.yaml#/definitions/flag + description: | + Do not use the mmu reset operation. + Some mmu instances may produce unexpected results + when the reset operation is used. + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - "#iommu-cells" + +additionalProperties: false + +examples: + - | + #include <dt-bindings/clock/rk3399-cru.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + + vopl_mmu: iommu@ff940300 { + compatible = "rockchip,iommu"; + reg = <0xff940300 0x100>; + interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>; + clock-names = "aclk", "iface"; + #iommu-cells = <0>; + }; diff --git a/MAINTAINERS b/MAINTAINERS index 8c5ee008301a..714dbe03e150 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -431,6 +431,14 @@ W: https://01.org/linux-acpi B: https://bugzilla.kernel.org F: drivers/acpi/acpi_video.c +ACPI VIOT DRIVER +M: Jean-Philippe Brucker <jean-philippe@linaro.org> +L: linux-acpi@vger.kernel.org +L: iommu@lists.linux-foundation.org +S: Maintained +F: drivers/acpi/viot.c +F: include/linux/acpi_viot.h + ACPI WMI DRIVER L: platform-driver-x86@vger.kernel.org S: Orphan diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index ce430ba9c118..5d06216c5c1c 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -1136,7 +1136,7 @@ }; adreno_smmu: iommu@b40000 { - compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2"; + compatible = "qcom,msm8996-smmu-v2", "qcom,adreno-smmu", "qcom,smmu-v2"; reg = <0x00b40000 0x10000>; #global-interrupts = <1>; diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 4bf1dd3eb041..6719f9efea09 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -50,7 +50,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, dev->dma_coherent = coherent; if (iommu) - iommu_setup_dma_ops(dev, dma_base, size); + iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1); #ifdef CONFIG_XEN if (xen_swiotlb_detect()) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index eedec61e3476..3758c6940ed7 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -526,6 +526,9 @@ endif source "drivers/acpi/pmic/Kconfig" +config ACPI_VIOT + bool + endif # ACPI config X86_PM_TIMER diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 700b41adf2db..a6e644c48987 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -118,3 +118,5 @@ video-objs += acpi_video.o video_detect.o obj-y += dptf/ obj-$(CONFIG_ARM64) += arm64/ + +obj-$(CONFIG_ACPI_VIOT) += viot.o diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile index 6ff50f4ed947..66acbe77f46e 100644 --- a/drivers/acpi/arm64/Makefile +++ b/drivers/acpi/arm64/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_ACPI_IORT) += iort.o obj-$(CONFIG_ACPI_GTDT) += gtdt.o +obj-y += dma.o diff --git a/drivers/acpi/arm64/dma.c b/drivers/acpi/arm64/dma.c new file mode 100644 index 000000000000..f16739ad3cc0 --- /dev/null +++ b/drivers/acpi/arm64/dma.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/acpi.h> +#include <linux/acpi_iort.h> +#include <linux/device.h> +#include <linux/dma-direct.h> + +void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) +{ + int ret; + u64 end, mask; + u64 dmaaddr = 0, size = 0, offset = 0; + + /* + * If @dev is expected to be DMA-capable then the bus code that created + * it should have initialised its dma_mask pointer by this point. For + * now, we'll continue the legacy behaviour of coercing it to the + * coherent mask if not, but we'll no longer do so quietly. + */ + if (!dev->dma_mask) { + dev_warn(dev, "DMA mask not set\n"); + dev->dma_mask = &dev->coherent_dma_mask; + } + + if (dev->coherent_dma_mask) + size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1); + else + size = 1ULL << 32; + + ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size); + if (ret == -ENODEV) + ret = iort_dma_get_ranges(dev, &size); + if (!ret) { + /* + * Limit coherent and dma mask based on size retrieved from + * firmware. + */ + end = dmaaddr + size - 1; + mask = DMA_BIT_MASK(ilog2(end) + 1); + dev->bus_dma_limit = end; + dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask); + *dev->dma_mask = min(*dev->dma_mask, mask); + } + + *dma_addr = dmaaddr; + *dma_size = size; + + ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size); + + dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : ""); +} diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 3912a1f6058e..9efe7bcde866 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -806,23 +806,6 @@ static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev) return NULL; } -static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev) -{ - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - - return (fwspec && fwspec->ops) ? fwspec->ops : NULL; -} - -static inline int iort_add_device_replay(struct device *dev) -{ - int err = 0; - - if (dev->bus && !device_iommu_mapped(dev)) - err = iommu_probe_device(dev); - - return err; -} - /** * iort_iommu_msi_get_resv_regions - Reserved region driver helper * @dev: Device from iommu_get_resv_regions() @@ -900,18 +883,6 @@ static inline bool iort_iommu_driver_enabled(u8 type) } } -static int arm_smmu_iort_xlate(struct device *dev, u32 streamid, - struct fwnode_handle *fwnode, - const struct iommu_ops *ops) -{ - int ret = iommu_fwspec_init(dev, fwnode, ops); - - if (!ret) - ret = iommu_fwspec_add_ids(dev, &streamid, 1); - - return ret; -} - static bool iort_pci_rc_supports_ats(struct acpi_iort_node *node) { struct acpi_iort_root_complex *pci_rc; @@ -946,7 +917,7 @@ static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node, return iort_iommu_driver_enabled(node->type) ? -EPROBE_DEFER : -ENODEV; - return arm_smmu_iort_xlate(dev, streamid, iort_fwnode, ops); + return acpi_iommu_fwspec_init(dev, streamid, iort_fwnode, ops); } struct iort_pci_alias_info { @@ -968,13 +939,15 @@ static int iort_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data) static void iort_named_component_init(struct device *dev, struct acpi_iort_node *node) { - struct property_entry props[2] = {}; + struct property_entry props[3] = {}; struct acpi_iort_named_component *nc; nc = (struct acpi_iort_named_component *)node->node_data; props[0] = PROPERTY_ENTRY_U32("pasid-num-bits", FIELD_GET(ACPI_IORT_NC_PASID_BITS, nc->node_flags)); + if (nc->node_flags & ACPI_IORT_NC_STALL_SUPPORTED) + props[1] = PROPERTY_ENTRY_BOOL("dma-can-stall"); if (device_add_properties(dev, props)) dev_warn(dev, "Could not add device properties\n"); @@ -1020,24 +993,13 @@ static int iort_nc_iommu_map_id(struct device *dev, * @dev: device to configure * @id_in: optional input id const value pointer * - * Returns: iommu_ops pointer on configuration success - * NULL on configuration failure + * Returns: 0 on success, <0 on failure */ -const struct iommu_ops *iort_iommu_configure_id(struct device *dev, - const u32 *id_in) +int iort_iommu_configure_id(struct device *dev, const u32 *id_in) { struct acpi_iort_node *node; - const struct iommu_ops *ops; int err = -ENODEV; - /* - * If we already translated the fwspec there - * is nothing left to do, return the iommu_ops. - */ - ops = iort_fwspec_iommu_ops(dev); - if (ops) - return ops; - if (dev_is_pci(dev)) { struct iommu_fwspec *fwspec; struct pci_bus *bus = to_pci_dev(dev)->bus; @@ -1046,7 +1008,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, node = iort_scan_node(ACPI_IORT_NODE_PCI_ROOT_COMPLEX, iort_match_node_callback, &bus->dev); if (!node) - return NULL; + return -ENODEV; info.node = node; err = pci_for_each_dma_alias(to_pci_dev(dev), @@ -1059,7 +1021,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, node = iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT, iort_match_node_callback, dev); if (!node) - return NULL; + return -ENODEV; err = id_in ? iort_nc_iommu_map_id(dev, node, id_in) : iort_nc_iommu_map(dev, node); @@ -1068,32 +1030,14 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, iort_named_component_init(dev, node); } - /* - * If we have reason to believe the IOMMU driver missed the initial - * add_device callback for dev, replay it to get things in order. - */ - if (!err) { - ops = iort_fwspec_iommu_ops(dev); - err = iort_add_device_replay(dev); - } - - /* Ignore all other errors apart from EPROBE_DEFER */ - if (err == -EPROBE_DEFER) { - ops = ERR_PTR(err); - } else if (err) { - dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); - ops = NULL; - } - - return ops; + return err; } #else int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) { return 0; } -const struct iommu_ops *iort_iommu_configure_id(struct device *dev, - const u32 *input_id) -{ return NULL; } +int iort_iommu_configure_id(struct device *dev, const u32 *input_id) +{ return -ENODEV; } #endif static int nc_dma_get_range(struct device *dev, u64 *size) @@ -1144,56 +1088,18 @@ static int rc_dma_get_range(struct device *dev, u64 *size) } /** - * iort_dma_setup() - Set-up device DMA parameters. + * iort_dma_get_ranges() - Look up DMA addressing limit for the device + * @dev: device to lookup + * @size: DMA range size result pointer * - * @dev: device to configure - * @dma_addr: device DMA address result pointer - * @dma_size: DMA range size result pointer + * Return: 0 on success, an error otherwise. */ -void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) +int iort_dma_get_ranges(struct device *dev, u64 *size) { - u64 end, mask, dmaaddr = 0, size = 0, offset = 0; - int ret; - - /* - * If @dev is expected to be DMA-capable then the bus code that created - * it should have initialised its dma_mask pointer by this point. For - * now, we'll continue the legacy behaviour of coercing it to the - * coherent mask if not, but we'll no longer do so quietly. - */ - if (!dev->dma_mask) { - dev_warn(dev, "DMA mask not set\n"); - dev->dma_mask = &dev->coherent_dma_mask; - } - - if (dev->coherent_dma_mask) - size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1); + if (dev_is_pci(dev)) + return rc_dma_get_range(dev, size); else - size = 1ULL << 32; - - ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size); - if (ret == -ENODEV) - ret = dev_is_pci(dev) ? rc_dma_get_range(dev, &size) - : nc_dma_get_range(dev, &size); - - if (!ret) { - /* - * Limit coherent and dma mask based on size retrieved from - * firmware. - */ - end = dmaaddr + size - 1; - mask = DMA_BIT_MASK(ilog2(end) + 1); - dev->bus_dma_limit = end; - dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask); - *dev->dma_mask = min(*dev->dma_mask, mask); - } - - *dma_addr = dmaaddr; - *dma_size = size; - - ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size); - - dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : ""); + return nc_dma_get_range(dev, size); } static void __init acpi_iort_register_irq(int hwirq, const char *name, diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index a4bd673934c0..d6f4e2f06fdb 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -27,6 +27,7 @@ #include <linux/dmi.h> #endif #include <linux/acpi_iort.h> +#include <linux/acpi_viot.h> #include <linux/pci.h> #include <acpi/apei.h> #include <linux/suspend.h> @@ -1334,6 +1335,7 @@ static int __init acpi_init(void) acpi_wakeup_device_init(); acpi_debugger_init(); acpi_setup_sb_notify_handler(); + acpi_viot_init(); return 0; } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index e10d38ac7cf2..3e2bb04ab528 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -9,6 +9,8 @@ #include <linux/kernel.h> #include <linux/acpi.h> #include <linux/acpi_iort.h> +#include <linux/acpi_viot.h> +#include <linux/iommu.h> #include <linux/signal.h> #include <linux/kthread.h> #include <linux/dmi.h> @@ -1520,6 +1522,78 @@ int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, return ret >= 0 ? 0 : ret; } +#ifdef CONFIG_IOMMU_API +int acpi_iommu_fwspec_init(struct device *dev, u32 id, + struct fwnode_handle *fwnode, + const struct iommu_ops *ops) +{ + int ret = iommu_fwspec_init(dev, fwnode, ops); + + if (!ret) + ret = iommu_fwspec_add_ids(dev, &id, 1); + + return ret; +} + +static inline const struct iommu_ops *acpi_iommu_fwspec_ops(struct device *dev) +{ + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + + return fwspec ? fwspec->ops : NULL; +} + +static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, + const u32 *id_in) +{ + int err; + const struct iommu_ops *ops; + + /* + * If we already translated the fwspec there is nothing left to do, + * return the iommu_ops. + */ + ops = acpi_iommu_fwspec_ops(dev); + if (ops) + return ops; + + err = iort_iommu_configure_id(dev, id_in); + if (err && err != -EPROBE_DEFER) + err = viot_iommu_configure(dev); + + /* + * If we have reason to believe the IOMMU driver missed the initial + * iommu_probe_device() call for dev, replay it to get things in order. + */ + if (!err && dev->bus && !device_iommu_mapped(dev)) + err = iommu_probe_device(dev); + + /* Ignore all other errors apart from EPROBE_DEFER */ + if (err == -EPROBE_DEFER) { + return ERR_PTR(err); + } else if (err) { + dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); + return NULL; + } + return acpi_iommu_fwspec_ops(dev); +} + +#else /* !CONFIG_IOMMU_API */ + +int acpi_iommu_fwspec_init(struct device *dev, u32 id, + struct fwnode_handle *fwnode, + const struct iommu_ops *ops) +{ + return -ENODEV; +} + +static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, + const u32 *id_in) +{ + return NULL; +} + +#endif /* !CONFIG_IOMMU_API */ + /** * acpi_dma_configure_id - Set-up DMA configuration for the device. * @dev: The pointer to the device @@ -1537,9 +1611,9 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, return 0; } - iort_dma_setup(dev, &dma_addr, &size); + acpi_arch_dma_setup(dev, &dma_addr, &size); - iommu = iort_iommu_configure_id(dev, input_id); + iommu = acpi_iommu_configure_id(dev, input_id); if (PTR_ERR(iommu) == -EPROBE_DEFER) return -EPROBE_DEFER; diff --git a/drivers/acpi/viot.c b/drivers/acpi/viot.c new file mode 100644 index 000000000000..d2256326c73a --- /dev/null +++ b/drivers/acpi/viot.c @@ -0,0 +1,366 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Virtual I/O topology + * + * The Virtual I/O Translation Table (VIOT) describes the topology of + * para-virtual IOMMUs and the endpoints they manage. The OS uses it to + * initialize devices in the right order, preventing endpoints from issuing DMA + * before their IOMMU is ready. + * + * When binding a driver to a device, before calling the device driver's probe() + * method, the driver infrastructure calls dma_configure(). At that point the + * VIOT driver looks for an IOMMU associated to the device in the VIOT table. + * If an IOMMU exists and has been initialized, the VIOT driver initializes the + * device's IOMMU fwspec, allowing the DMA infrastructure to invoke the IOMMU + * ops when the device driver configures DMA mappings. If an IOMMU exists and + * hasn't yet been initialized, VIOT returns -EPROBE_DEFER to postpone probing + * the device until the IOMMU is available. + */ +#define pr_fmt(fmt) "ACPI: VIOT: " fmt + +#include <linux/acpi_viot.h> +#include <linux/dma-iommu.h> +#include <linux/fwnode.h> +#include <linux/iommu.h> +#include <linux/list.h> +#include <linux/pci.h> +#include <linux/platform_device.h> + +struct viot_iommu { + /* Node offset within the table */ + unsigned int offset; + struct fwnode_handle *fwnode; + struct list_head list; +}; + +struct viot_endpoint { + union { + /* PCI range */ + struct { + u16 segment_start; + u16 segment_end; + u16 bdf_start; + u16 bdf_end; + }; + /* MMIO */ + u64 address; + }; + u32 endpoint_id; + struct viot_iommu *viommu; + struct list_head list; +}; + +static struct acpi_table_viot *viot; +static LIST_HEAD(viot_iommus); +static LIST_HEAD(viot_pci_ranges); +static LIST_HEAD(viot_mmio_endpoints); + +static int __init viot_check_bounds(const struct acpi_viot_header *hdr) +{ + struct acpi_viot_header *start, *end, *hdr_end; + + start = ACPI_ADD_PTR(struct acpi_viot_header, viot, + max_t(size_t, sizeof(*viot), viot->node_offset)); + end = ACPI_ADD_PTR(struct acpi_viot_header, viot, viot->header.length); + hdr_end = ACPI_ADD_PTR(struct acpi_viot_header, hdr, sizeof(*hdr)); + + if (hdr < start || hdr_end > end) { + pr_err(FW_BUG "Node pointer overflows\n"); + return -EOVERFLOW; + } + if (hdr->length < sizeof(*hdr)) { + pr_err(FW_BUG "Empty node\n"); + return -EINVAL; + } + return 0; +} + +static int __init viot_get_pci_iommu_fwnode(struct viot_iommu *viommu, + u16 segment, u16 bdf) +{ + struct pci_dev *pdev; + struct fwnode_handle *fwnode; + + pdev = pci_get_domain_bus_and_slot(segment, PCI_BUS_NUM(bdf), + bdf & 0xff); + if (!pdev) { + pr_err("Could not find PCI IOMMU\n"); + return -ENODEV; + } + + fwnode = pdev->dev.fwnode; + if (!fwnode) { + /* + * PCI devices aren't necessarily described by ACPI. Create a + * fwnode so the IOMMU subsystem can identify this device. + */ + fwnode = acpi_alloc_fwnode_static(); + if (!fwnode) { + pci_dev_put(pdev); + return -ENOMEM; + } + set_primary_fwnode(&pdev->dev, fwnode); + } + viommu->fwnode = pdev->dev.fwnode; + pci_dev_put(pdev); + return 0; +} + +static int __init viot_get_mmio_iommu_fwnode(struct viot_iommu *viommu, + u64 address) +{ + struct acpi_device *adev; + struct resource res = { + .start = address, + .end = address, + .flags = IORESOURCE_MEM, + }; + + adev = acpi_resource_consumer(&res); + if (!adev) { + pr_err("Could not find MMIO IOMMU\n"); + return -EINVAL; + } + viommu->fwnode = &adev->fwnode; + return 0; +} + +static struct viot_iommu * __init viot_get_iommu(unsigned int offset) +{ + int ret; + struct viot_iommu *viommu; + struct acpi_viot_header *hdr = ACPI_ADD_PTR(struct acpi_viot_header, + viot, offset); + union { + struct acpi_viot_virtio_iommu_pci pci; + struct acpi_viot_virtio_iommu_mmio mmio; + } *node = (void *)hdr; + + list_for_each_entry(viommu, &viot_iommus, list) + if (viommu->offset == offset) + return viommu; + + if (viot_check_bounds(hdr)) + return NULL; + + viommu = kzalloc(sizeof(*viommu), GFP_KERNEL); + if (!viommu) + return NULL; + + viommu->offset = offset; + switch (hdr->type) { + case ACPI_VIOT_NODE_VIRTIO_IOMMU_PCI: + if (hdr->length < sizeof(node->pci)) + goto err_free; + + ret = viot_get_pci_iommu_fwnode(viommu, node->pci.segment, + node->pci.bdf); + break; + case ACPI_VIOT_NODE_VIRTIO_IOMMU_MMIO: + if (hdr->length < sizeof(node->mmio)) + goto err_free; + + ret = viot_get_mmio_iommu_fwnode(viommu, + node->mmio.base_address); + break; + default: + ret = -EINVAL; + } + if (ret) + goto err_free; + + list_add(&viommu->list, &viot_iommus); + return viommu; + +err_free: + kfree(viommu); + return NULL; +} + +static int __init viot_parse_node(const struct acpi_viot_header *hdr) +{ + int ret = -EINVAL; + struct list_head *list; + struct viot_endpoint *ep; + union { + struct acpi_viot_mmio mmio; + struct acpi_viot_pci_range pci; + } *node = (void *)hdr; + + if (viot_check_bounds(hdr)) + return -EINVAL; + + if (hdr->type == ACPI_VIOT_NODE_VIRTIO_IOMMU_PCI || + hdr->type == ACPI_VIOT_NODE_VIRTIO_IOMMU_MMIO) + return 0; + + ep = kzalloc(sizeof(*ep), GFP_KERNEL); + if (!ep) + return -ENOMEM; + + switch (hdr->type) { + case ACPI_VIOT_NODE_PCI_RANGE: + if (hdr->length < sizeof(node->pci)) { + pr_err(FW_BUG "Invalid PCI node size\n"); + goto err_free; + } + + ep->segment_start = node->pci.segment_start; + ep->segment_end = node->pci.segment_end; + ep->bdf_start = node->pci.bdf_start; + ep->bdf_end = node->pci.bdf_end; + ep->endpoint_id = node->pci.endpoint_start; + ep->viommu = viot_get_iommu(node->pci.output_node); + list = &viot_pci_ranges; + break; + case ACPI_VIOT_NODE_MMIO: + if (hdr->length < sizeof(node->mmio)) { + pr_err(FW_BUG "Invalid MMIO node size\n"); + goto err_free; + } + + ep->address = node->mmio.base_address; + ep->endpoint_id = node->mmio.endpoint; + ep->viommu = viot_get_iommu(node->mmio.output_node); + list = &viot_mmio_endpoints; + break; + default: + pr_warn("Unsupported node %x\n", hdr->type); + ret = 0; + goto err_free; + } + + if (!ep->viommu) { + pr_warn("No IOMMU node found\n"); + /* + * A future version of the table may use the node for other + * purposes. Keep parsing. + */ + ret = 0; + goto err_free; + } + + list_add(&ep->list, list); + return 0; + +err_free: + kfree(ep); + return ret; +} + +/** + * acpi_viot_init - Parse the VIOT table + * + * Parse the VIOT table, prepare the list of endpoints to be used during DMA + * setup of devices. + */ +void __init acpi_viot_init(void) +{ + int i; + acpi_status status; + struct acpi_table_header *hdr; + struct acpi_viot_header *node; + + status = acpi_get_table(ACPI_SIG_VIOT, 0, &hdr); + if (ACPI_FAILURE(status)) { + if (status != AE_NOT_FOUND) { + const char *msg = acpi_format_exception(status); + + pr_err("Failed to get table, %s\n", msg); + } + return; + } + + viot = (void *)hdr; + + node = ACPI_ADD_PTR(struct acpi_viot_header, viot, viot->node_offset); + for (i = 0; i < viot->node_count; i++) { + if (viot_parse_node(node)) + return; + + node = ACPI_ADD_PTR(struct acpi_viot_header, node, + node->length); + } + + acpi_put_table(hdr); +} + +static int viot_dev_iommu_init(struct device *dev, struct viot_iommu *viommu, + u32 epid) +{ + const struct iommu_ops *ops; + + if (!viommu) + return -ENODEV; + + /* We're not translating ourself */ + if (viommu->fwnode == dev->fwnode) + return -EINVAL; + + ops = iommu_ops_from_fwnode(viommu->fwnode); + if (!ops) + return IS_ENABLED(CONFIG_VIRTIO_IOMMU) ? + -EPROBE_DEFER : -ENODEV; + + return acpi_iommu_fwspec_init(dev, epid, viommu->fwnode, ops); +} + +static int viot_pci_dev_iommu_init(struct pci_dev *pdev, u16 dev_id, void *data) +{ + u32 epid; + struct viot_endpoint *ep; + u32 domain_nr = pci_domain_nr(pdev->bus); + + list_for_each_entry(ep, &viot_pci_ranges, list) { + if (domain_nr >= ep->segment_start && + domain_nr <= ep->segment_end && + dev_id >= ep->bdf_start && + dev_id <= ep->bdf_end) { + epid = ((domain_nr - ep->segment_start) << 16) + + dev_id - ep->bdf_start + ep->endpoint_id; + + /* + * If we found a PCI range managed by the viommu, we're + * the one that has to request ACS. + */ + pci_request_acs(); + + return viot_dev_iommu_init(&pdev->dev, ep->viommu, + epid); + } + } + return -ENODEV; +} + +static int viot_mmio_dev_iommu_init(struct platform_device *pdev) +{ + struct resource *mem; + struct viot_endpoint *ep; + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) + return -ENODEV; + + list_for_each_entry(ep, &viot_mmio_endpoints, list) { + if (ep->address == mem->start) + return viot_dev_iommu_init(&pdev->dev, ep->viommu, + ep->endpoint_id); + } + return -ENODEV; +} + +/** + * viot_iommu_configure - Setup IOMMU ops for an endpoint described by VIOT + * @dev: the endpoint + * + * Return: 0 on success, <0 on failure + */ +int viot_iommu_configure(struct device *dev) +{ + if (dev_is_pci(dev)) + return pci_for_each_dma_alias(to_pci_dev(dev), + viot_pci_dev_iommu_init, NULL); + else if (dev_is_platform(dev)) + return viot_mmio_dev_iommu_init(to_platform_device(dev)); + return -ENODEV; +} diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 1f111b399bca..07b7c25cbed8 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -400,9 +400,11 @@ config HYPERV_IOMMU config VIRTIO_IOMMU tristate "Virtio IOMMU driver" depends on VIRTIO - depends on ARM64 + depends on (ARM64 || X86) select IOMMU_API + select IOMMU_DMA select INTERVAL_TREE + select ACPI_VIOT if ACPI help Para-virtualised IOMMU driver with virtio. diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 55dd38d814d9..416815a525d6 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -11,8 +11,6 @@ #include "amd_iommu_types.h" -extern int amd_iommu_init_dma_ops(void); -extern int amd_iommu_init_passthrough(void); extern irqreturn_t amd_iommu_int_thread(int irq, void *data); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern void amd_iommu_apply_erratum_63(u16 devid); diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index d006724f4dc2..46280e6e1535 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -153,7 +153,8 @@ int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; static bool amd_iommu_detected; -static bool __initdata amd_iommu_disabled; +static bool amd_iommu_disabled __initdata; +static bool amd_iommu_force_enable __initdata; static int amd_iommu_target_ivhd_type; u16 amd_iommu_last_bdf; /* largest PCI device id we have @@ -231,7 +232,6 @@ enum iommu_init_state { IOMMU_ENABLED, IOMMU_PCI_INIT, IOMMU_INTERRUPTS_EN, - IOMMU_DMA_OPS, IOMMU_INITIALIZED, IOMMU_NOT_FOUND, IOMMU_INIT_ERROR, @@ -1908,8 +1908,8 @@ static void print_iommu_info(void) pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr); if (iommu->cap & (1 << IOMMU_CAP_EFR)) { - pci_info(pdev, "Extended features (%#llx):", - iommu->features); + pr_info("Extended features (%#llx):", iommu->features); + for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { if (iommu_feature(iommu, (1ULL << i))) pr_cont(" %s", feat_str[i]); @@ -2817,7 +2817,7 @@ out: return ret; } -static bool detect_ivrs(void) +static bool __init detect_ivrs(void) { struct acpi_table_header *ivrs_base; acpi_status status; @@ -2834,6 +2834,9 @@ static bool detect_ivrs(void) acpi_put_table(ivrs_base); + if (amd_iommu_force_enable) + goto out; + /* Don't use IOMMU if there is Stoney Ridge graphics */ for (i = 0; i < 32; i++) { u32 pci_id; @@ -2845,6 +2848,7 @@ static bool detect_ivrs(void) } } +out: /* Make sure ACS will be enabled during PCI probe */ pci_request_acs(); @@ -2895,10 +2899,6 @@ static int __init state_next(void) init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN; break; case IOMMU_INTERRUPTS_EN: - ret = amd_iommu_init_dma_ops(); - init_state = ret ? IOMMU_INIT_ERROR : IOMMU_DMA_OPS; - break; - case IOMMU_DMA_OPS: init_state = IOMMU_INITIALIZED; break; case IOMMU_INITIALIZED: @@ -3100,6 +3100,8 @@ static int __init parse_amd_iommu_options(char *str) for (; *str; ++str) { if (strncmp(str, "fullflush", 9) == 0) amd_iommu_unmap_flush = true; + if (strncmp(str, "force_enable", 12) == 0) + amd_iommu_force_enable = true; if (strncmp(str, "off", 3) == 0) amd_iommu_disabled = true; if (strncmp(str, "force_isolation", 15) == 0) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 3ac42bbdefc6..811a49a95d04 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -30,7 +30,6 @@ #include <linux/msi.h> #include <linux/irqdomain.h> #include <linux/percpu.h> -#include <linux/iova.h> #include <linux/io-pgtable.h> #include <asm/irq_remapping.h> #include <asm/io_apic.h> @@ -1713,7 +1712,7 @@ static void amd_iommu_probe_finalize(struct device *dev) /* Domains are initialized for this device - have a look what we ended up with */ domain = iommu_get_domain_for_dev(dev); if (domain->type == IOMMU_DOMAIN_DMA) - iommu_setup_dma_ops(dev, IOVA_START_PFN << PAGE_SHIFT, 0); + iommu_setup_dma_ops(dev, 0, U64_MAX); else set_dma_ops(dev, NULL); } @@ -1773,13 +1772,22 @@ void amd_iommu_domain_update(struct protection_domain *domain) amd_iommu_domain_flush_complete(domain); } +static void __init amd_iommu_init_dma_ops(void) +{ + swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0; + + if (amd_iommu_unmap_flush) + pr_info("IO/TLB flush on unmap enabled\n"); + else + pr_info("Lazy IO/TLB flushing enabled\n"); + iommu_set_dma_strict(amd_iommu_unmap_flush); +} + int __init amd_iommu_init_api(void) { - int ret, err = 0; + int err; - ret = iova_cache_get(); - if (ret) - return ret; + amd_iommu_init_dma_ops(); err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops); if (err) @@ -1796,19 +1804,6 @@ int __init amd_iommu_init_api(void) return 0; } -int __init amd_iommu_init_dma_ops(void) -{ - swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0; - - if (amd_iommu_unmap_flush) - pr_info("IO/TLB flush on unmap enabled\n"); - else - pr_info("Lazy IO/TLB flushing enabled\n"); - iommu_set_dma_strict(amd_iommu_unmap_flush); - return 0; - -} - /***************************************************************************** * * The following functions belong to the exported interface of AMD IOMMU diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index bb251cab61f3..ee66d1f4cb81 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -435,9 +435,13 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) return true; } -static bool arm_smmu_iopf_supported(struct arm_smmu_master *master) +bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master) { - return false; + /* We're not keeping track of SIDs in fault events */ + if (master->num_streams != 1) + return false; + + return master->stall_enabled; } bool arm_smmu_master_sva_supported(struct arm_smmu_master *master) @@ -445,8 +449,8 @@ bool arm_smmu_master_sva_supported(struct arm_smmu_master *master) if (!(master->smmu->features & ARM_SMMU_FEAT_SVA)) return false; - /* SSID and IOPF support are mandatory for the moment */ - return master->ssid_bits && arm_smmu_iopf_supported(master); + /* SSID support is mandatory for the moment */ + return master->ssid_bits; } bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master) @@ -459,13 +463,55 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master) return enabled; } +static int arm_smmu_master_sva_enable_iopf(struct arm_smmu_master *master) +{ + int ret; + struct device *dev = master->dev; + + /* + * Drivers for devices supporting PRI or stall should enable IOPF first. + * Others have device-specific fault handlers and don't need IOPF. + */ + if (!arm_smmu_master_iopf_supported(master)) + return 0; + + if (!master->iopf_enabled) + return -EINVAL; + + ret = iopf_queue_add_device(master->smmu->evtq.iopf, dev); + if (ret) + return ret; + + ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); + if (ret) { + iopf_queue_remove_device(master->smmu->evtq.iopf, dev); + return ret; + } + return 0; +} + +static void arm_smmu_master_sva_disable_iopf(struct arm_smmu_master *master) +{ + struct device *dev = master->dev; + + if (!master->iopf_enabled) + return; + + iommu_unregister_device_fault_handler(dev); + iopf_queue_remove_device(master->smmu->evtq.iopf, dev); +} + int arm_smmu_master_enable_sva(struct arm_smmu_master *master) { + int ret; + mutex_lock(&sva_lock); - master->sva_enabled = true; + ret = arm_smmu_master_sva_enable_iopf(master); + if (!ret) + master->sva_enabled = true; mutex_unlock(&sva_lock); - return 0; + return ret; } int arm_smmu_master_disable_sva(struct arm_smmu_master *master) @@ -476,6 +522,7 @@ int arm_smmu_master_disable_sva(struct arm_smmu_master *master) mutex_unlock(&sva_lock); return -EBUSY; } + arm_smmu_master_sva_disable_iopf(master); master->sva_enabled = false; mutex_unlock(&sva_lock); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 54b2f27b81d4..dd20b01771c4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -23,7 +23,6 @@ #include <linux/msi.h> #include <linux/of.h> #include <linux/of_address.h> -#include <linux/of_iommu.h> #include <linux/of_platform.h> #include <linux/pci.h> #include <linux/pci-ats.h> @@ -32,6 +31,7 @@ #include <linux/amba/bus.h> #include "arm-smmu-v3.h" +#include "../../iommu-sva-lib.h" static bool disable_bypass = true; module_param(disable_bypass, bool, 0444); @@ -313,6 +313,11 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) } cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp); break; + case CMDQ_OP_RESUME: + cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid); + cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp); + cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag); + break; case CMDQ_OP_CMD_SYNC: if (ent->sync.msiaddr) { cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ); @@ -352,7 +357,7 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) { - static const char *cerror_str[] = { + static const char * const cerror_str[] = { [CMDQ_ERR_CERROR_NONE_IDX] = "No error", [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command", [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch", @@ -876,6 +881,44 @@ static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu, return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); } +static int arm_smmu_page_response(struct device *dev, + struct iommu_fault_event *unused, + struct iommu_page_response *resp) +{ + struct arm_smmu_cmdq_ent cmd = {0}; + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + int sid = master->streams[0].id; + + if (master->stall_enabled) { + cmd.opcode = CMDQ_OP_RESUME; + cmd.resume.sid = sid; + cmd.resume.stag = resp->grpid; + switch (resp->code) { + case IOMMU_PAGE_RESP_INVALID: + case IOMMU_PAGE_RESP_FAILURE: + cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT; + break; + case IOMMU_PAGE_RESP_SUCCESS: + cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY; + break; + default: + return -EINVAL; + } + } else { + return -ENODEV; + } + + arm_smmu_cmdq_issue_cmd(master->smmu, &cmd); + /* + * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP. + * RESUME consumption guarantees that the stalled transaction will be + * terminated... at some point in the future. PRI_RESP is fire and + * forget. + */ + + return 0; +} + /* Context descriptor manipulation functions */ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) { @@ -986,7 +1029,6 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, u64 val; bool cd_live; __le64 *cdptr; - struct arm_smmu_device *smmu = smmu_domain->smmu; if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax))) return -E2BIG; @@ -1031,8 +1073,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | CTXDESC_CD_0_V; - /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ - if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) + if (smmu_domain->stall_enabled) val |= CTXDESC_CD_0_S; } @@ -1276,7 +1317,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, FIELD_PREP(STRTAB_STE_1_STRW, strw)); if (smmu->features & ARM_SMMU_FEAT_STALLS && - !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) + !master->stall_enabled) dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | @@ -1353,7 +1394,6 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) return 0; } -__maybe_unused static struct arm_smmu_master * arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) { @@ -1377,18 +1417,118 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) } /* IRQ and event handlers */ +static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) +{ + int ret; + u32 reason; + u32 perm = 0; + struct arm_smmu_master *master; + bool ssid_valid = evt[0] & EVTQ_0_SSV; + u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]); + struct iommu_fault_event fault_evt = { }; + struct iommu_fault *flt = &fault_evt.fault; + + switch (FIELD_GET(EVTQ_0_ID, evt[0])) { + case EVT_ID_TRANSLATION_FAULT: + reason = IOMMU_FAULT_REASON_PTE_FETCH; + break; + case EVT_ID_ADDR_SIZE_FAULT: + reason = IOMMU_FAULT_REASON_OOR_ADDRESS; + break; + case EVT_ID_ACCESS_FAULT: + reason = IOMMU_FAULT_REASON_ACCESS; + break; + case EVT_ID_PERMISSION_FAULT: + reason = IOMMU_FAULT_REASON_PERMISSION; + break; + default: + return -EOPNOTSUPP; + } + + /* Stage-2 is always pinned at the moment */ + if (evt[1] & EVTQ_1_S2) + return -EFAULT; + + if (evt[1] & EVTQ_1_RnW) + perm |= IOMMU_FAULT_PERM_READ; + else + perm |= IOMMU_FAULT_PERM_WRITE; + + if (evt[1] & EVTQ_1_InD) + perm |= IOMMU_FAULT_PERM_EXEC; + + if (evt[1] & EVTQ_1_PnU) + perm |= IOMMU_FAULT_PERM_PRIV; + + if (evt[1] & EVTQ_1_STALL) { + flt->type = IOMMU_FAULT_PAGE_REQ; + flt->prm = (struct iommu_fault_page_request) { + .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, + .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]), + .perm = perm, + .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]), + }; + + if (ssid_valid) { + flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; + flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]); + } + } else { + flt->type = IOMMU_FAULT_DMA_UNRECOV; + flt->event = (struct iommu_fault_unrecoverable) { + .reason = reason, + .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID, + .perm = perm, + .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]), + }; + + if (ssid_valid) { + flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID; + flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]); + } + } + + mutex_lock(&smmu->streams_mutex); + master = arm_smmu_find_master(smmu, sid); + if (!master) { + ret = -EINVAL; + goto out_unlock; + } + + ret = iommu_report_device_fault(master->dev, &fault_evt); + if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) { + /* Nobody cared, abort the access */ + struct iommu_page_response resp = { + .pasid = flt->prm.pasid, + .grpid = flt->prm.grpid, + .code = IOMMU_PAGE_RESP_FAILURE, + }; + arm_smmu_page_response(master->dev, &fault_evt, &resp); + } + +out_unlock: + mutex_unlock(&smmu->streams_mutex); + return ret; +} + static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) { - int i; + int i, ret; struct arm_smmu_device *smmu = dev; struct arm_smmu_queue *q = &smmu->evtq.q; struct arm_smmu_ll_queue *llq = &q->llq; + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); u64 evt[EVTQ_ENT_DWORDS]; do { while (!queue_remove_raw(q, evt)) { u8 id = FIELD_GET(EVTQ_0_ID, evt[0]); + ret = arm_smmu_handle_evt(smmu, evt); + if (!ret || !__ratelimit(&rs)) + continue; + dev_info(smmu->dev, "event 0x%02x received:\n", id); for (i = 0; i < ARRAY_SIZE(evt); ++i) dev_info(smmu->dev, "\t0x%016llx\n", @@ -1923,6 +2063,8 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, cfg->s1cdmax = master->ssid_bits; + smmu_domain->stall_enabled = master->stall_enabled; + ret = arm_smmu_alloc_cd_tables(smmu_domain); if (ret) goto out_free_asid; @@ -2270,6 +2412,12 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) smmu_domain->s1_cfg.s1cdmax, master->ssid_bits); ret = -EINVAL; goto out_unlock; + } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && + smmu_domain->stall_enabled != master->stall_enabled) { + dev_err(dev, "cannot attach to stall-%s domain\n", + smmu_domain->stall_enabled ? "enabled" : "disabled"); + ret = -EINVAL; + goto out_unlock; } master->domain = smmu_domain; @@ -2508,6 +2656,11 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) master->ssid_bits = min_t(u8, master->ssid_bits, CTXDESC_LINEAR_CDMAX); + if ((smmu->features & ARM_SMMU_FEAT_STALLS && + device_property_read_bool(dev, "dma-can-stall")) || + smmu->features & ARM_SMMU_FEAT_STALL_FORCE) + master->stall_enabled = true; + return &smmu->iommu; err_free_master: @@ -2525,7 +2678,8 @@ static void arm_smmu_release_device(struct device *dev) return; master = dev_iommu_priv_get(dev); - WARN_ON(arm_smmu_master_sva_enabled(master)); + if (WARN_ON(arm_smmu_master_sva_enabled(master))) + iopf_queue_remove_device(master->smmu->evtq.iopf, dev); arm_smmu_detach_dev(master); arm_smmu_disable_pasid(master); arm_smmu_remove_master(master); @@ -2595,6 +2749,8 @@ static bool arm_smmu_dev_has_feature(struct device *dev, return false; switch (feat) { + case IOMMU_DEV_FEAT_IOPF: + return arm_smmu_master_iopf_supported(master); case IOMMU_DEV_FEAT_SVA: return arm_smmu_master_sva_supported(master); default: @@ -2611,6 +2767,8 @@ static bool arm_smmu_dev_feature_enabled(struct device *dev, return false; switch (feat) { + case IOMMU_DEV_FEAT_IOPF: + return master->iopf_enabled; case IOMMU_DEV_FEAT_SVA: return arm_smmu_master_sva_enabled(master); default: @@ -2621,6 +2779,8 @@ static bool arm_smmu_dev_feature_enabled(struct device *dev, static int arm_smmu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) { + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + if (!arm_smmu_dev_has_feature(dev, feat)) return -ENODEV; @@ -2628,8 +2788,11 @@ static int arm_smmu_dev_enable_feature(struct device *dev, return -EBUSY; switch (feat) { + case IOMMU_DEV_FEAT_IOPF: + master->iopf_enabled = true; + return 0; case IOMMU_DEV_FEAT_SVA: - return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev)); + return arm_smmu_master_enable_sva(master); default: return -EINVAL; } @@ -2638,12 +2801,19 @@ static int arm_smmu_dev_enable_feature(struct device *dev, static int arm_smmu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) { + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + if (!arm_smmu_dev_feature_enabled(dev, feat)) return -EINVAL; switch (feat) { + case IOMMU_DEV_FEAT_IOPF: + if (master->sva_enabled) + return -EBUSY; + master->iopf_enabled = false; + return 0; case IOMMU_DEV_FEAT_SVA: - return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev)); + return arm_smmu_master_disable_sva(master); default: return -EINVAL; } @@ -2673,6 +2843,7 @@ static struct iommu_ops arm_smmu_ops = { .sva_bind = arm_smmu_sva_bind, .sva_unbind = arm_smmu_sva_unbind, .sva_get_pasid = arm_smmu_sva_get_pasid, + .page_response = arm_smmu_page_response, .pgsize_bitmap = -1UL, /* Restricted during device attach */ .owner = THIS_MODULE, }; @@ -2771,6 +2942,13 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) if (ret) return ret; + if ((smmu->features & ARM_SMMU_FEAT_SVA) && + (smmu->features & ARM_SMMU_FEAT_STALLS)) { + smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev)); + if (!smmu->evtq.iopf) + return -ENOMEM; + } + /* priq */ if (!(smmu->features & ARM_SMMU_FEAT_PRI)) return 0; @@ -2788,10 +2966,8 @@ static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) void *strtab = smmu->strtab_cfg.strtab; cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL); - if (!cfg->l1_desc) { - dev_err(smmu->dev, "failed to allocate l1 stream table desc\n"); + if (!cfg->l1_desc) return -ENOMEM; - } for (i = 0; i < cfg->num_l1_ents; ++i) { arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]); @@ -3582,10 +3758,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev) bool bypass; smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); - if (!smmu) { - dev_err(dev, "failed to allocate arm_smmu_device\n"); + if (!smmu) return -ENOMEM; - } smmu->dev = dev; if (dev->of_node) { @@ -3669,10 +3843,20 @@ static int arm_smmu_device_probe(struct platform_device *pdev) ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev); if (ret) { dev_err(dev, "Failed to register iommu\n"); - return ret; + goto err_sysfs_remove; } - return arm_smmu_set_bus_ops(&arm_smmu_ops); + ret = arm_smmu_set_bus_ops(&arm_smmu_ops); + if (ret) + goto err_unregister_device; + + return 0; + +err_unregister_device: + iommu_device_unregister(&smmu->iommu); +err_sysfs_remove: + iommu_device_sysfs_remove(&smmu->iommu); + return ret; } static int arm_smmu_device_remove(struct platform_device *pdev) @@ -3683,6 +3867,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev) iommu_device_unregister(&smmu->iommu); iommu_device_sysfs_remove(&smmu->iommu); arm_smmu_device_disable(smmu); + iopf_queue_free(smmu->evtq.iopf); return 0; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 46e8c49214a8..4cb136f07914 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -184,6 +184,7 @@ #else #define Q_MAX_SZ_SHIFT (PAGE_SHIFT + MAX_ORDER - 1) #endif +#define Q_MIN_SZ_SHIFT (PAGE_SHIFT) /* * Stream table. @@ -354,6 +355,13 @@ #define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0) #define CMDQ_PRI_1_RESP GENMASK_ULL(13, 12) +#define CMDQ_RESUME_0_RESP_TERM 0UL +#define CMDQ_RESUME_0_RESP_RETRY 1UL +#define CMDQ_RESUME_0_RESP_ABORT 2UL +#define CMDQ_RESUME_0_RESP GENMASK_ULL(13, 12) +#define CMDQ_RESUME_0_SID GENMASK_ULL(63, 32) +#define CMDQ_RESUME_1_STAG GENMASK_ULL(15, 0) + #define CMDQ_SYNC_0_CS GENMASK_ULL(13, 12) #define CMDQ_SYNC_0_CS_NONE 0 #define CMDQ_SYNC_0_CS_IRQ 1 @@ -366,14 +374,33 @@ /* Event queue */ #define EVTQ_ENT_SZ_SHIFT 5 #define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3) -#define EVTQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT) +#define EVTQ_MAX_SZ_SHIFT (Q_MIN_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT) #define EVTQ_0_ID GENMASK_ULL(7, 0) +#define EVT_ID_TRANSLATION_FAULT 0x10 +#define EVT_ID_ADDR_SIZE_FAULT 0x11 +#define EVT_ID_ACCESS_FAULT 0x12 +#define EVT_ID_PERMISSION_FAULT 0x13 + +#define EVTQ_0_SSV (1UL << 11) +#define EVTQ_0_SSID GENMASK_ULL(31, 12) +#define EVTQ_0_SID GENMASK_ULL(63, 32) +#define EVTQ_1_STAG GENMASK_ULL(15, 0) +#define EVTQ_1_STALL (1UL << 31) +#define EVTQ_1_PnU (1UL << 33) +#define EVTQ_1_InD (1UL << 34) +#define EVTQ_1_RnW (1UL << 35) +#define EVTQ_1_S2 (1UL << 39) +#define EVTQ_1_CLASS GENMASK_ULL(41, 40) +#define EVTQ_1_TT_READ (1UL << 44) +#define EVTQ_2_ADDR GENMASK_ULL(63, 0) +#define EVTQ_3_IPA GENMASK_ULL(51, 12) + /* PRI queue */ #define PRIQ_ENT_SZ_SHIFT 4 #define PRIQ_ENT_DWORDS ((1 << PRIQ_ENT_SZ_SHIFT) >> 3) -#define PRIQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT) +#define PRIQ_MAX_SZ_SHIFT (Q_MIN_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT) #define PRIQ_0_SID GENMASK_ULL(31, 0) #define PRIQ_0_SSID GENMASK_ULL(51, 32) @@ -462,6 +489,13 @@ struct arm_smmu_cmdq_ent { enum pri_resp resp; } pri; + #define CMDQ_OP_RESUME 0x44 + struct { + u32 sid; + u16 stag; + u8 resp; + } resume; + #define CMDQ_OP_CMD_SYNC 0x46 struct { u64 msiaddr; @@ -520,6 +554,7 @@ struct arm_smmu_cmdq_batch { struct arm_smmu_evtq { struct arm_smmu_queue q; + struct iopf_queue *iopf; u32 max_stalls; }; @@ -657,7 +692,9 @@ struct arm_smmu_master { struct arm_smmu_stream *streams; unsigned int num_streams; bool ats_enabled; + bool stall_enabled; bool sva_enabled; + bool iopf_enabled; struct list_head bonds; unsigned int ssid_bits; }; @@ -675,6 +712,7 @@ struct arm_smmu_domain { struct mutex init_mutex; /* Protects smmu pointer */ struct io_pgtable_ops *pgtbl_ops; + bool stall_enabled; atomic_t nr_ats_masters; enum arm_smmu_domain_stage stage; @@ -716,6 +754,7 @@ bool arm_smmu_master_sva_supported(struct arm_smmu_master *master); bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master); int arm_smmu_master_enable_sva(struct arm_smmu_master *master); int arm_smmu_master_disable_sva(struct arm_smmu_master *master); +bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master); struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata); void arm_smmu_sva_unbind(struct iommu_sva *handle); @@ -747,6 +786,11 @@ static inline int arm_smmu_master_disable_sva(struct arm_smmu_master *master) return -ENODEV; } +static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master) +{ + return false; +} + static inline struct iommu_sva * arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata) { diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 98b3a1c2a181..7771d40176de 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -3,6 +3,7 @@ * Copyright (c) 2019, The Linux Foundation. All rights reserved. */ +#include <linux/acpi.h> #include <linux/adreno-smmu-priv.h> #include <linux/of_device.h> #include <linux/qcom_scm.h> @@ -130,6 +131,16 @@ static int qcom_adreno_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_doma return __arm_smmu_alloc_bitmap(smmu->context_map, start, count); } +static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu) +{ + const struct device_node *np = smmu->dev->of_node; + + if (of_device_is_compatible(np, "qcom,msm8996-smmu-v2")) + return false; + + return true; +} + static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg, struct device *dev) { @@ -144,7 +155,8 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain, * be AARCH64 stage 1 but double check because the arm-smmu code assumes * that is the case when the TTBR1 quirk is enabled */ - if ((smmu_domain->stage == ARM_SMMU_DOMAIN_S1) && + if (qcom_adreno_can_do_ttbr1(smmu_domain->smmu) && + (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) && (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)) pgtbl_cfg->quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1; @@ -166,6 +178,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,mdss" }, { .compatible = "qcom,sc7180-mdss" }, { .compatible = "qcom,sc7180-mss-pil" }, + { .compatible = "qcom,sc7280-mdss" }, { .compatible = "qcom,sc8180x-mdss" }, { .compatible = "qcom,sdm845-mdss" }, { .compatible = "qcom,sdm845-mss-pil" }, @@ -330,24 +343,48 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,msm8998-smmu-v2" }, { .compatible = "qcom,sc7180-smmu-500" }, + { .compatible = "qcom,sc7280-smmu-500" }, { .compatible = "qcom,sc8180x-smmu-500" }, { .compatible = "qcom,sdm630-smmu-v2" }, { .compatible = "qcom,sdm845-smmu-500" }, + { .compatible = "qcom,sm6125-smmu-500" }, { .compatible = "qcom,sm8150-smmu-500" }, { .compatible = "qcom,sm8250-smmu-500" }, { .compatible = "qcom,sm8350-smmu-500" }, { } }; +#ifdef CONFIG_ACPI +static struct acpi_platform_list qcom_acpi_platlist[] = { + { "LENOVO", "CB-01 ", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" }, + { "QCOM ", "QCOMEDK2", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" }, + { } +}; +#endif + struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) { const struct device_node *np = smmu->dev->of_node; - if (of_match_node(qcom_smmu_impl_of_match, np)) - return qcom_smmu_create(smmu, &qcom_smmu_impl); +#ifdef CONFIG_ACPI + if (np == NULL) { + /* Match platform for ACPI boot */ + if (acpi_match_platform_list(qcom_acpi_platlist) >= 0) + return qcom_smmu_create(smmu, &qcom_smmu_impl); + } +#endif + /* + * Do not change this order of implementation, i.e., first adreno + * smmu impl and then apss smmu since we can have both implementing + * arm,mmu-500 in which case we will miss setting adreno smmu specific + * features if the order is changed. + */ if (of_device_is_compatible(np, "qcom,adreno-smmu")) return qcom_smmu_create(smmu, &qcom_adreno_smmu_impl); + if (of_match_node(qcom_smmu_impl_of_match, np)) + return qcom_smmu_create(smmu, &qcom_smmu_impl); + return smmu; } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 6f72c4d208ca..58c703f253ef 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -31,7 +31,6 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_device.h> -#include <linux/of_iommu.h> #include <linux/pci.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> @@ -74,7 +73,7 @@ static bool using_legacy_binding, using_generic_binding; static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu) { if (pm_runtime_enabled(smmu->dev)) - return pm_runtime_get_sync(smmu->dev); + return pm_runtime_resume_and_get(smmu->dev); return 0; } @@ -1271,6 +1270,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, u64 phys; unsigned long va, flags; int ret, idx = cfg->cbndx; + phys_addr_t addr = 0; ret = arm_smmu_rpm_get(smmu); if (ret < 0) @@ -1290,6 +1290,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, dev_err(dev, "iova to phys timed out on %pad. Falling back to software table walk.\n", &iova); + arm_smmu_rpm_put(smmu); return ops->iova_to_phys(ops, iova); } @@ -1298,12 +1299,14 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, if (phys & ARM_SMMU_CB_PAR_F) { dev_err(dev, "translation fault!\n"); dev_err(dev, "PAR = 0x%llx\n", phys); - return 0; + goto out; } + addr = (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff); +out: arm_smmu_rpm_put(smmu); - return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff); + return addr; } static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, @@ -1450,6 +1453,18 @@ static void arm_smmu_release_device(struct device *dev) iommu_fwspec_free(dev); } +static void arm_smmu_probe_finalize(struct device *dev) +{ + struct arm_smmu_master_cfg *cfg; + struct arm_smmu_device *smmu; + + cfg = dev_iommu_priv_get(dev); + smmu = cfg->smmu; + + if (smmu->impl && smmu->impl->probe_finalize) + smmu->impl->probe_finalize(smmu, dev); +} + static struct iommu_group *arm_smmu_device_group(struct device *dev) { struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev); @@ -1569,6 +1584,7 @@ static struct iommu_ops arm_smmu_ops = { .iova_to_phys = arm_smmu_iova_to_phys, .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, + .probe_finalize = arm_smmu_probe_finalize, .device_group = arm_smmu_device_group, .enable_nesting = arm_smmu_enable_nesting, .set_pgtable_quirks = arm_smmu_set_pgtable_quirks, @@ -2164,7 +2180,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev); if (err) { dev_err(dev, "Failed to register iommu\n"); - return err; + goto err_sysfs_remove; } platform_set_drvdata(pdev, smmu); @@ -2187,10 +2203,19 @@ static int arm_smmu_device_probe(struct platform_device *pdev) * any device which might need it, so we want the bus ops in place * ready to handle default domain setup as soon as any SMMU exists. */ - if (!using_legacy_binding) - return arm_smmu_bus_init(&arm_smmu_ops); + if (!using_legacy_binding) { + err = arm_smmu_bus_init(&arm_smmu_ops); + if (err) + goto err_unregister_device; + } return 0; + +err_unregister_device: + iommu_device_unregister(&smmu->iommu); +err_sysfs_remove: + iommu_device_sysfs_remove(&smmu->iommu); + return err; } static int arm_smmu_device_remove(struct platform_device *pdev) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index c31a59d35c64..147c95e7c59c 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -439,6 +439,7 @@ struct arm_smmu_impl { struct device *dev, int start); void (*write_s2cr)(struct arm_smmu_device *smmu, int idx); void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 reg); + void (*probe_finalize)(struct arm_smmu_device *smmu, struct device *dev); }; #define INVALID_SMENDX -1 diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 4294abe389b2..25ed444ff94d 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -25,7 +25,6 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_device.h> -#include <linux/of_iommu.h> #include <linux/platform_device.h> #include <linux/pm.h> #include <linux/pm_runtime.h> @@ -850,10 +849,12 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) ret = iommu_device_register(&qcom_iommu->iommu, &qcom_iommu_ops, dev); if (ret) { dev_err(dev, "Failed to register iommu\n"); - return ret; + goto err_sysfs_remove; } - bus_set_iommu(&platform_bus_type, &qcom_iommu_ops); + ret = bus_set_iommu(&platform_bus_type, &qcom_iommu_ops); + if (ret) + goto err_unregister_device; if (qcom_iommu->local_base) { pm_runtime_get_sync(dev); @@ -862,6 +863,13 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) } return 0; + +err_unregister_device: + iommu_device_unregister(&qcom_iommu->iommu); + +err_sysfs_remove: + iommu_device_sysfs_remove(&qcom_iommu->iommu); + return ret; } static int qcom_iommu_device_remove(struct platform_device *pdev) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 7bcdd1205535..98ba927aee1a 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -243,9 +243,11 @@ resv_iova: lo = iova_pfn(iovad, start); hi = iova_pfn(iovad, end); reserve_iova(iovad, lo, hi); - } else { + } else if (end < start) { /* dma_ranges list should be sorted */ - dev_err(&dev->dev, "Failed to reserve IOVA\n"); + dev_err(&dev->dev, + "Failed to reserve IOVA [%pa-%pa]\n", + &start, &end); return -EINVAL; } @@ -319,16 +321,16 @@ static bool dev_is_untrusted(struct device *dev) * iommu_dma_init_domain - Initialise a DMA mapping domain * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() * @base: IOVA at which the mappable address space starts - * @size: Size of IOVA space + * @limit: Last address of the IOVA space * @dev: Device the domain is being initialised for * - * @base and @size should be exact multiples of IOMMU page granularity to + * @base and @limit + 1 should be exact multiples of IOMMU page granularity to * avoid rounding surprises. If necessary, we reserve the page at address 0 * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but * any change which could make prior IOVAs invalid will fail. */ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, - u64 size, struct device *dev) + dma_addr_t limit, struct device *dev) { struct iommu_dma_cookie *cookie = domain->iova_cookie; unsigned long order, base_pfn; @@ -346,7 +348,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, /* Check the domain allows at least some access to the device... */ if (domain->geometry.force_aperture) { if (base > domain->geometry.aperture_end || - base + size <= domain->geometry.aperture_start) { + limit < domain->geometry.aperture_start) { pr_warn("specified DMA range outside IOMMU capability\n"); return -EFAULT; } @@ -1308,7 +1310,7 @@ static const struct dma_map_ops iommu_dma_ops = { * The IOMMU core code allocates the default DMA domain, which the underlying * IOMMU driver needs to support via the dma-iommu layer. */ -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size) +void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); @@ -1320,7 +1322,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size) * underlying IOMMU driver needs to support via the dma-iommu layer. */ if (domain->type == IOMMU_DOMAIN_DMA) { - if (iommu_dma_init_domain(domain, dma_base, size, dev)) + if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev)) goto out_err; dev->dma_ops = &iommu_dma_ops; } @@ -1330,6 +1332,7 @@ out_err: pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", dev_name(dev)); } +EXPORT_SYMBOL_GPL(iommu_setup_dma_ops); static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, phys_addr_t msi_addr, struct iommu_domain *domain) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 7623d8c371f5..d0fbf1d10e18 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -17,7 +17,6 @@ #include <linux/kmemleak.h> #include <linux/list.h> #include <linux/of.h> -#include <linux/of_iommu.h> #include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 28a3d1596c76..43ebd8af11c5 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -3,6 +3,9 @@ config DMAR_TABLE bool +config DMAR_PERF + bool + config INTEL_IOMMU bool "Support for Intel IOMMU using DMA Remapping Devices" depends on PCI_MSI && ACPI && (X86 || IA64) @@ -14,6 +17,7 @@ config INTEL_IOMMU select SWIOTLB select IOASID select IOMMU_DMA + select PCI_ATS help DMA remapping (DMAR) devices support enables independent address translations for Direct Memory Access (DMA) from devices. @@ -24,6 +28,7 @@ config INTEL_IOMMU config INTEL_IOMMU_DEBUGFS bool "Export Intel IOMMU internals in Debugfs" depends on INTEL_IOMMU && IOMMU_DEBUGFS + select DMAR_PERF help !!!WARNING!!! @@ -41,6 +46,7 @@ config INTEL_IOMMU_SVM select PCI_PRI select MMU_NOTIFIER select IOASID + select IOMMU_SVA_LIB help Shared Virtual Memory (SVM) provides a facility for devices to access DMA resources through process address space by diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index ae236ec7d219..fa0dae16441c 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_DMAR_TABLE) += dmar.o obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o +obj-$(CONFIG_DMAR_PERF) += perf.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c index efea7f02abd9..62e23ff3c987 100644 --- a/drivers/iommu/intel/debugfs.c +++ b/drivers/iommu/intel/debugfs.c @@ -16,6 +16,7 @@ #include <asm/irq_remapping.h> #include "pasid.h" +#include "perf.h" struct tbl_walk { u16 bus; @@ -31,6 +32,9 @@ struct iommu_regset { const char *regs; }; +#define DEBUG_BUFFER_SIZE 1024 +static char debug_buf[DEBUG_BUFFER_SIZE]; + #define IOMMU_REGSET_ENTRY(_reg_) \ { DMAR_##_reg_##_REG, __stringify(_reg_) } @@ -538,6 +542,111 @@ static int ir_translation_struct_show(struct seq_file *m, void *unused) DEFINE_SHOW_ATTRIBUTE(ir_translation_struct); #endif +static void latency_show_one(struct seq_file *m, struct intel_iommu *iommu, + struct dmar_drhd_unit *drhd) +{ + int ret; + + seq_printf(m, "IOMMU: %s Register Base Address: %llx\n", + iommu->name, drhd->reg_base_addr); + + ret = dmar_latency_snapshot(iommu, debug_buf, DEBUG_BUFFER_SIZE); + if (ret < 0) + seq_puts(m, "Failed to get latency snapshot"); + else + seq_puts(m, debug_buf); + seq_puts(m, "\n"); +} + +static int latency_show(struct seq_file *m, void *v) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) + latency_show_one(m, iommu, drhd); + rcu_read_unlock(); + + return 0; +} + +static int dmar_perf_latency_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, latency_show, NULL); +} + +static ssize_t dmar_perf_latency_write(struct file *filp, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + int counting; + char buf[64]; + + if (cnt > 63) + cnt = 63; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + if (kstrtoint(buf, 0, &counting)) + return -EINVAL; + + switch (counting) { + case 0: + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + dmar_latency_disable(iommu, DMAR_LATENCY_INV_IOTLB); + dmar_latency_disable(iommu, DMAR_LATENCY_INV_DEVTLB); + dmar_latency_disable(iommu, DMAR_LATENCY_INV_IEC); + dmar_latency_disable(iommu, DMAR_LATENCY_PRQ); + } + rcu_read_unlock(); + break; + case 1: + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) + dmar_latency_enable(iommu, DMAR_LATENCY_INV_IOTLB); + rcu_read_unlock(); + break; + case 2: + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) + dmar_latency_enable(iommu, DMAR_LATENCY_INV_DEVTLB); + rcu_read_unlock(); + break; + case 3: + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) + dmar_latency_enable(iommu, DMAR_LATENCY_INV_IEC); + rcu_read_unlock(); + break; + case 4: + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) + dmar_latency_enable(iommu, DMAR_LATENCY_PRQ); + rcu_read_unlock(); + break; + default: + return -EINVAL; + } + + *ppos += cnt; + return cnt; +} + +static const struct file_operations dmar_perf_latency_fops = { + .open = dmar_perf_latency_open, + .write = dmar_perf_latency_write, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + void __init intel_iommu_debugfs_init(void) { struct dentry *intel_iommu_debug = debugfs_create_dir("intel", @@ -556,4 +665,6 @@ void __init intel_iommu_debugfs_init(void) debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug, NULL, &ir_translation_struct_fops); #endif + debugfs_create_file("dmar_perf_latency", 0644, intel_iommu_debug, + NULL, &dmar_perf_latency_fops); } diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 84057cb9596c..d66f79acd14d 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -34,6 +34,7 @@ #include <trace/events/intel_iommu.h> #include "../irq_remapping.h" +#include "perf.h" typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *); struct dmar_res_callback { @@ -1342,15 +1343,33 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, unsigned int count, unsigned long options) { struct q_inval *qi = iommu->qi; + s64 devtlb_start_ktime = 0; + s64 iotlb_start_ktime = 0; + s64 iec_start_ktime = 0; struct qi_desc wait_desc; int wait_index, index; unsigned long flags; int offset, shift; int rc, i; + u64 type; if (!qi) return 0; + type = desc->qw0 & GENMASK_ULL(3, 0); + + if ((type == QI_IOTLB_TYPE || type == QI_EIOTLB_TYPE) && + dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IOTLB)) + iotlb_start_ktime = ktime_to_ns(ktime_get()); + + if ((type == QI_DIOTLB_TYPE || type == QI_DEIOTLB_TYPE) && + dmar_latency_enabled(iommu, DMAR_LATENCY_INV_DEVTLB)) + devtlb_start_ktime = ktime_to_ns(ktime_get()); + + if (type == QI_IEC_TYPE && + dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IEC)) + iec_start_ktime = ktime_to_ns(ktime_get()); + restart: rc = 0; @@ -1425,6 +1444,18 @@ restart: if (rc == -EAGAIN) goto restart; + if (iotlb_start_ktime) + dmar_latency_update(iommu, DMAR_LATENCY_INV_IOTLB, + ktime_to_ns(ktime_get()) - iotlb_start_ktime); + + if (devtlb_start_ktime) + dmar_latency_update(iommu, DMAR_LATENCY_INV_DEVTLB, + ktime_to_ns(ktime_get()) - devtlb_start_ktime); + + if (iec_start_ktime) + dmar_latency_update(iommu, DMAR_LATENCY_INV_IEC, + ktime_to_ns(ktime_get()) - iec_start_ktime); + return rc; } @@ -1913,16 +1944,23 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type, reason = dmar_get_fault_reason(fault_reason, &fault_type); if (fault_type == INTR_REMAP) - pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n", - source_id >> 8, PCI_SLOT(source_id & 0xFF), - PCI_FUNC(source_id & 0xFF), addr >> 48, - fault_reason, reason); - else - pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n", + pr_err("[INTR-REMAP] Request device [0x%02x:0x%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n", + source_id >> 8, PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr >> 48, + fault_reason, reason); + else if (pasid == INVALID_IOASID) + pr_err("[%s NO_PASID] Request device [0x%02x:0x%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n", type ? "DMA Read" : "DMA Write", source_id >> 8, PCI_SLOT(source_id & 0xFF), - PCI_FUNC(source_id & 0xFF), pasid, addr, + PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); + else + pr_err("[%s PASID 0x%x] Request device [0x%02x:0x%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n", + type ? "DMA Read" : "DMA Write", pasid, + source_id >> 8, PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr, + fault_reason, reason); + return 0; } @@ -1989,7 +2027,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id) if (!ratelimited) /* Using pasid -1 if pasid is not present */ dmar_fault_do_one(iommu, type, fault_reason, - pasid_present ? pasid : -1, + pasid_present ? pasid : INVALID_IOASID, source_id, guest_addr); fault_index++; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index be35284a2016..a6a07d985709 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -46,6 +46,7 @@ #include <asm/iommu.h> #include "../irq_remapping.h" +#include "../iommu-sva-lib.h" #include "pasid.h" #include "cap_audit.h" @@ -564,7 +565,7 @@ static inline int domain_pfn_supported(struct dmar_domain *domain, static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) { unsigned long sagaw; - int agaw = -1; + int agaw; sagaw = cap_sagaw(iommu->cap); for (agaw = width_to_agaw(max_gaw); @@ -625,12 +626,12 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain) bool found = false; int i; - domain->iommu_coherency = 1; + domain->iommu_coherency = true; for_each_domain_iommu(i, domain) { found = true; if (!iommu_paging_structure_coherency(g_iommus[i])) { - domain->iommu_coherency = 0; + domain->iommu_coherency = false; break; } } @@ -641,18 +642,18 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain) rcu_read_lock(); for_each_active_iommu(iommu, drhd) { if (!iommu_paging_structure_coherency(iommu)) { - domain->iommu_coherency = 0; + domain->iommu_coherency = false; break; } } rcu_read_unlock(); } -static int domain_update_iommu_snooping(struct intel_iommu *skip) +static bool domain_update_iommu_snooping(struct intel_iommu *skip) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; - int ret = 1; + bool ret = true; rcu_read_lock(); for_each_active_iommu(iommu, drhd) { @@ -665,7 +666,7 @@ static int domain_update_iommu_snooping(struct intel_iommu *skip) */ if (!sm_supported(iommu) && !ecap_sc_support(iommu->ecap)) { - ret = 0; + ret = false; break; } } @@ -682,9 +683,8 @@ static int domain_update_iommu_superpage(struct dmar_domain *domain, struct intel_iommu *iommu; int mask = 0x3; - if (!intel_iommu_superpage) { + if (!intel_iommu_superpage) return 0; - } /* set iommu_superpage to the smallest common denominator */ rcu_read_lock(); @@ -1919,7 +1919,6 @@ static int domain_attach_iommu(struct dmar_domain *domain, assert_spin_locked(&iommu->lock); domain->iommu_refcnt[iommu->seq_id] += 1; - domain->iommu_count += 1; if (domain->iommu_refcnt[iommu->seq_id] == 1) { ndomains = cap_ndoms(iommu->cap); num = find_first_zero_bit(iommu->domain_ids, ndomains); @@ -1927,7 +1926,6 @@ static int domain_attach_iommu(struct dmar_domain *domain, if (num >= ndomains) { pr_err("%s: No free domain ids\n", iommu->name); domain->iommu_refcnt[iommu->seq_id] -= 1; - domain->iommu_count -= 1; return -ENOSPC; } @@ -1943,16 +1941,15 @@ static int domain_attach_iommu(struct dmar_domain *domain, return 0; } -static int domain_detach_iommu(struct dmar_domain *domain, - struct intel_iommu *iommu) +static void domain_detach_iommu(struct dmar_domain *domain, + struct intel_iommu *iommu) { - int num, count; + int num; assert_spin_locked(&device_domain_lock); assert_spin_locked(&iommu->lock); domain->iommu_refcnt[iommu->seq_id] -= 1; - count = --domain->iommu_count; if (domain->iommu_refcnt[iommu->seq_id] == 0) { num = domain->iommu_did[iommu->seq_id]; clear_bit(num, iommu->domain_ids); @@ -1961,8 +1958,6 @@ static int domain_detach_iommu(struct dmar_domain *domain, domain_update_iommu_cap(domain); domain->iommu_did[iommu->seq_id] = 0; } - - return count; } static inline int guestwidth_to_adjustwidth(int gaw) @@ -4138,62 +4133,56 @@ static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev) return container_of(iommu_dev, struct intel_iommu, iommu); } -static ssize_t intel_iommu_show_version(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t version_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); u32 ver = readl(iommu->reg + DMAR_VER_REG); return sprintf(buf, "%d:%d\n", DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); } -static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL); +static DEVICE_ATTR_RO(version); -static ssize_t intel_iommu_show_address(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t address_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); return sprintf(buf, "%llx\n", iommu->reg_phys); } -static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL); +static DEVICE_ATTR_RO(address); -static ssize_t intel_iommu_show_cap(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t cap_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); return sprintf(buf, "%llx\n", iommu->cap); } -static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL); +static DEVICE_ATTR_RO(cap); -static ssize_t intel_iommu_show_ecap(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t ecap_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); return sprintf(buf, "%llx\n", iommu->ecap); } -static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL); +static DEVICE_ATTR_RO(ecap); -static ssize_t intel_iommu_show_ndoms(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t domains_supported_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap)); } -static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL); +static DEVICE_ATTR_RO(domains_supported); -static ssize_t intel_iommu_show_ndoms_used(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t domains_used_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids, cap_ndoms(iommu->cap))); } -static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL); +static DEVICE_ATTR_RO(domains_used); static struct attribute *intel_iommu_attrs[] = { &dev_attr_version.attr, @@ -4511,13 +4500,13 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) adjust_width = guestwidth_to_adjustwidth(guest_width); domain->agaw = width_to_agaw(adjust_width); - domain->iommu_coherency = 0; - domain->iommu_snooping = 0; + domain->iommu_coherency = false; + domain->iommu_snooping = false; domain->iommu_superpage = 0; domain->max_addr = 0; /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); + domain->pgd = alloc_pgtable_page(domain->nid); if (!domain->pgd) return -ENOMEM; domain_flush_cache(domain, domain->pgd, PAGE_SIZE); @@ -4757,6 +4746,13 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, if (!iommu) return -ENODEV; + if ((dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE) && + !ecap_nest(iommu->ecap)) { + dev_err(dev, "%s: iommu not support nested translation\n", + iommu->name); + return -EINVAL; + } + /* check if this iommu agaw is sufficient for max mapped address */ addr_width = agaw_to_width(iommu->agaw); if (addr_width > cap_mgaw(iommu->cap)) @@ -4778,8 +4774,7 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, pte = dmar_domain->pgd; if (dma_pte_present(pte)) { - dmar_domain->pgd = (struct dma_pte *) - phys_to_virt(dma_pte_addr(pte)); + dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte)); free_pgtable_page(pte); } dmar_domain->agaw--; @@ -5129,7 +5124,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, static bool intel_iommu_capable(enum iommu_cap cap) { if (cap == IOMMU_CAP_CACHE_COHERENCY) - return domain_update_iommu_snooping(NULL) == 1; + return domain_update_iommu_snooping(NULL); if (cap == IOMMU_CAP_INTR_REMAP) return irq_remapping_enabled == 1; @@ -5165,13 +5160,10 @@ static void intel_iommu_release_device(struct device *dev) static void intel_iommu_probe_finalize(struct device *dev) { - dma_addr_t base = IOVA_START_PFN << VTD_PAGE_SHIFT; struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct dmar_domain *dmar_domain = to_dmar_domain(domain); if (domain && domain->type == IOMMU_DOMAIN_DMA) - iommu_setup_dma_ops(dev, base, - __DOMAIN_MAX_ADDR(dmar_domain->gaw) - base); + iommu_setup_dma_ops(dev, 0, U64_MAX); else set_dma_ops(dev, NULL); } @@ -5331,6 +5323,48 @@ static int intel_iommu_disable_auxd(struct device *dev) return 0; } +static int intel_iommu_enable_sva(struct device *dev) +{ + struct device_domain_info *info = get_domain_info(dev); + struct intel_iommu *iommu; + int ret; + + if (!info || dmar_disabled) + return -EINVAL; + + iommu = info->iommu; + if (!iommu) + return -EINVAL; + + if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE)) + return -ENODEV; + + if (intel_iommu_enable_pasid(iommu, dev)) + return -ENODEV; + + if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) + return -EINVAL; + + ret = iopf_queue_add_device(iommu->iopf_queue, dev); + if (!ret) + ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); + + return ret; +} + +static int intel_iommu_disable_sva(struct device *dev) +{ + struct device_domain_info *info = get_domain_info(dev); + struct intel_iommu *iommu = info->iommu; + int ret; + + ret = iommu_unregister_device_fault_handler(dev); + if (!ret) + ret = iopf_queue_remove_device(iommu->iopf_queue, dev); + + return ret; +} + /* * A PCI express designated vendor specific extended capability is defined * in the section 3.7 of Intel scalable I/O virtualization technical spec @@ -5392,35 +5426,37 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) static int intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) { - if (feat == IOMMU_DEV_FEAT_AUX) + switch (feat) { + case IOMMU_DEV_FEAT_AUX: return intel_iommu_enable_auxd(dev); - if (feat == IOMMU_DEV_FEAT_IOPF) + case IOMMU_DEV_FEAT_IOPF: return intel_iommu_dev_has_feat(dev, feat) ? 0 : -ENODEV; - if (feat == IOMMU_DEV_FEAT_SVA) { - struct device_domain_info *info = get_domain_info(dev); - - if (!info) - return -EINVAL; + case IOMMU_DEV_FEAT_SVA: + return intel_iommu_enable_sva(dev); - if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) - return -EINVAL; - - if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) - return 0; + default: + return -ENODEV; } - - return -ENODEV; } static int intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) { - if (feat == IOMMU_DEV_FEAT_AUX) + switch (feat) { + case IOMMU_DEV_FEAT_AUX: return intel_iommu_disable_auxd(dev); - return -ENODEV; + case IOMMU_DEV_FEAT_IOPF: + return 0; + + case IOMMU_DEV_FEAT_SVA: + return intel_iommu_disable_sva(dev); + + default: + return -ENODEV; + } } static bool @@ -5457,7 +5493,7 @@ intel_iommu_enable_nesting(struct iommu_domain *domain) int ret = -ENODEV; spin_lock_irqsave(&device_domain_lock, flags); - if (nested_mode_support() && list_empty(&dmar_domain->devices)) { + if (list_empty(&dmar_domain->devices)) { dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE; dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL; ret = 0; diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 72dc84821dad..c6cf44a6c923 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/** +/* * intel-pasid.c - PASID idr, table and entry manipulation * * Copyright (C) 2018 Intel Corporation diff --git a/drivers/iommu/intel/perf.c b/drivers/iommu/intel/perf.c new file mode 100644 index 000000000000..73b7ec705552 --- /dev/null +++ b/drivers/iommu/intel/perf.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 +/** + * perf.c - performance monitor + * + * Copyright (C) 2021 Intel Corporation + * + * Author: Lu Baolu <baolu.lu@linux.intel.com> + * Fenghua Yu <fenghua.yu@intel.com> + */ + +#include <linux/spinlock.h> +#include <linux/intel-iommu.h> + +#include "perf.h" + +static DEFINE_SPINLOCK(latency_lock); + +bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type) +{ + struct latency_statistic *lstat = iommu->perf_statistic; + + return lstat && lstat[type].enabled; +} + +int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type) +{ + struct latency_statistic *lstat; + unsigned long flags; + int ret = -EBUSY; + + if (dmar_latency_enabled(iommu, type)) + return 0; + + spin_lock_irqsave(&latency_lock, flags); + if (!iommu->perf_statistic) { + iommu->perf_statistic = kzalloc(sizeof(*lstat) * DMAR_LATENCY_NUM, + GFP_ATOMIC); + if (!iommu->perf_statistic) { + ret = -ENOMEM; + goto unlock_out; + } + } + + lstat = iommu->perf_statistic; + + if (!lstat[type].enabled) { + lstat[type].enabled = true; + lstat[type].counter[COUNTS_MIN] = UINT_MAX; + ret = 0; + } +unlock_out: + spin_unlock_irqrestore(&latency_lock, flags); + + return ret; +} + +void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type) +{ + struct latency_statistic *lstat = iommu->perf_statistic; + unsigned long flags; + + if (!dmar_latency_enabled(iommu, type)) + return; + + spin_lock_irqsave(&latency_lock, flags); + memset(&lstat[type], 0, sizeof(*lstat) * DMAR_LATENCY_NUM); + spin_unlock_irqrestore(&latency_lock, flags); +} + +void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency) +{ + struct latency_statistic *lstat = iommu->perf_statistic; + unsigned long flags; + u64 min, max; + + if (!dmar_latency_enabled(iommu, type)) + return; + + spin_lock_irqsave(&latency_lock, flags); + if (latency < 100) + lstat[type].counter[COUNTS_10e2]++; + else if (latency < 1000) + lstat[type].counter[COUNTS_10e3]++; + else if (latency < 10000) + lstat[type].counter[COUNTS_10e4]++; + else if (latency < 100000) + lstat[type].counter[COUNTS_10e5]++; + else if (latency < 1000000) + lstat[type].counter[COUNTS_10e6]++; + else if (latency < 10000000) + lstat[type].counter[COUNTS_10e7]++; + else + lstat[type].counter[COUNTS_10e8_plus]++; + + min = lstat[type].counter[COUNTS_MIN]; + max = lstat[type].counter[COUNTS_MAX]; + lstat[type].counter[COUNTS_MIN] = min_t(u64, min, latency); + lstat[type].counter[COUNTS_MAX] = max_t(u64, max, latency); + lstat[type].counter[COUNTS_SUM] += latency; + lstat[type].samples++; + spin_unlock_irqrestore(&latency_lock, flags); +} + +static char *latency_counter_names[] = { + " <0.1us", + " 0.1us-1us", " 1us-10us", " 10us-100us", + " 100us-1ms", " 1ms-10ms", " >=10ms", + " min(us)", " max(us)", " average(us)" +}; + +static char *latency_type_names[] = { + " inv_iotlb", " inv_devtlb", " inv_iec", + " svm_prq" +}; + +int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size) +{ + struct latency_statistic *lstat = iommu->perf_statistic; + unsigned long flags; + int bytes = 0, i, j; + + memset(str, 0, size); + + for (i = 0; i < COUNTS_NUM; i++) + bytes += snprintf(str + bytes, size - bytes, + "%s", latency_counter_names[i]); + + spin_lock_irqsave(&latency_lock, flags); + for (i = 0; i < DMAR_LATENCY_NUM; i++) { + if (!dmar_latency_enabled(iommu, i)) + continue; + + bytes += snprintf(str + bytes, size - bytes, + "\n%s", latency_type_names[i]); + + for (j = 0; j < COUNTS_NUM; j++) { + u64 val = lstat[i].counter[j]; + + switch (j) { + case COUNTS_MIN: + if (val == UINT_MAX) + val = 0; + else + val = div_u64(val, 1000); + break; + case COUNTS_MAX: + val = div_u64(val, 1000); + break; + case COUNTS_SUM: + if (lstat[i].samples) + val = div_u64(val, (lstat[i].samples * 1000)); + else + val = 0; + break; + default: + break; + } + + bytes += snprintf(str + bytes, size - bytes, + "%12lld", val); + } + } + spin_unlock_irqrestore(&latency_lock, flags); + + return bytes; +} diff --git a/drivers/iommu/intel/perf.h b/drivers/iommu/intel/perf.h new file mode 100644 index 000000000000..fd6db8049d1a --- /dev/null +++ b/drivers/iommu/intel/perf.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * perf.h - performance monitor header + * + * Copyright (C) 2021 Intel Corporation + * + * Author: Lu Baolu <baolu.lu@linux.intel.com> + */ + +enum latency_type { + DMAR_LATENCY_INV_IOTLB = 0, + DMAR_LATENCY_INV_DEVTLB, + DMAR_LATENCY_INV_IEC, + DMAR_LATENCY_PRQ, + DMAR_LATENCY_NUM +}; + +enum latency_count { + COUNTS_10e2 = 0, /* < 0.1us */ + COUNTS_10e3, /* 0.1us ~ 1us */ + COUNTS_10e4, /* 1us ~ 10us */ + COUNTS_10e5, /* 10us ~ 100us */ + COUNTS_10e6, /* 100us ~ 1ms */ + COUNTS_10e7, /* 1ms ~ 10ms */ + COUNTS_10e8_plus, /* 10ms and plus*/ + COUNTS_MIN, + COUNTS_MAX, + COUNTS_SUM, + COUNTS_NUM +}; + +struct latency_statistic { + bool enabled; + u64 counter[COUNTS_NUM]; + u64 samples; +}; + +#ifdef CONFIG_DMAR_PERF +int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type); +void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type); +bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type); +void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, + u64 latency); +int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size); +#else +static inline int +dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type) +{ + return -EINVAL; +} + +static inline void +dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type) +{ +} + +static inline bool +dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type) +{ + return false; +} + +static inline void +dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency) +{ +} + +static inline int +dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size) +{ + return 0; +} +#endif /* CONFIG_DMAR_PERF */ diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 5165cea90421..9b0f22bc0514 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -17,19 +17,76 @@ #include <linux/dmar.h> #include <linux/interrupt.h> #include <linux/mm_types.h> +#include <linux/xarray.h> #include <linux/ioasid.h> #include <asm/page.h> #include <asm/fpu/api.h> +#include <trace/events/intel_iommu.h> #include "pasid.h" +#include "perf.h" +#include "../iommu-sva-lib.h" static irqreturn_t prq_event_thread(int irq, void *d); static void intel_svm_drain_prq(struct device *dev, u32 pasid); +#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) #define PRQ_ORDER 0 +static DEFINE_XARRAY_ALLOC(pasid_private_array); +static int pasid_private_add(ioasid_t pasid, void *priv) +{ + return xa_alloc(&pasid_private_array, &pasid, priv, + XA_LIMIT(pasid, pasid), GFP_ATOMIC); +} + +static void pasid_private_remove(ioasid_t pasid) +{ + xa_erase(&pasid_private_array, pasid); +} + +static void *pasid_private_find(ioasid_t pasid) +{ + return xa_load(&pasid_private_array, pasid); +} + +static struct intel_svm_dev * +svm_lookup_device_by_sid(struct intel_svm *svm, u16 sid) +{ + struct intel_svm_dev *sdev = NULL, *t; + + rcu_read_lock(); + list_for_each_entry_rcu(t, &svm->devs, list) { + if (t->sid == sid) { + sdev = t; + break; + } + } + rcu_read_unlock(); + + return sdev; +} + +static struct intel_svm_dev * +svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev) +{ + struct intel_svm_dev *sdev = NULL, *t; + + rcu_read_lock(); + list_for_each_entry_rcu(t, &svm->devs, list) { + if (t->dev == dev) { + sdev = t; + break; + } + } + rcu_read_unlock(); + + return sdev; +} + int intel_svm_enable_prq(struct intel_iommu *iommu) { + struct iopf_queue *iopfq; struct page *pages; int irq, ret; @@ -46,13 +103,20 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n", iommu->name); ret = -EINVAL; - err: - free_pages((unsigned long)iommu->prq, PRQ_ORDER); - iommu->prq = NULL; - return ret; + goto free_prq; } iommu->pr_irq = irq; + snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name), + "dmar%d-iopfq", iommu->seq_id); + iopfq = iopf_queue_alloc(iommu->iopfq_name); + if (!iopfq) { + pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name); + ret = -ENOMEM; + goto free_hwirq; + } + iommu->iopf_queue = iopfq; + snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id); ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT, @@ -60,9 +124,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) if (ret) { pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n", iommu->name); - dmar_free_hwirq(irq); - iommu->pr_irq = 0; - goto err; + goto free_iopfq; } dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); @@ -71,6 +133,18 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) init_completion(&iommu->prq_complete); return 0; + +free_iopfq: + iopf_queue_free(iommu->iopf_queue); + iommu->iopf_queue = NULL; +free_hwirq: + dmar_free_hwirq(irq); + iommu->pr_irq = 0; +free_prq: + free_pages((unsigned long)iommu->prq, PRQ_ORDER); + iommu->prq = NULL; + + return ret; } int intel_svm_finish_prq(struct intel_iommu *iommu) @@ -85,6 +159,11 @@ int intel_svm_finish_prq(struct intel_iommu *iommu) iommu->pr_irq = 0; } + if (iommu->iopf_queue) { + iopf_queue_free(iommu->iopf_queue); + iommu->iopf_queue = NULL; + } + free_pages((unsigned long)iommu->prq, PRQ_ORDER); iommu->prq = NULL; @@ -204,17 +283,12 @@ static const struct mmu_notifier_ops intel_mmuops = { }; static DEFINE_MUTEX(pasid_mutex); -static LIST_HEAD(global_svm_list); - -#define for_each_svm_dev(sdev, svm, d) \ - list_for_each_entry((sdev), &(svm)->devs, list) \ - if ((d) != (sdev)->dev) {} else static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, struct intel_svm **rsvm, struct intel_svm_dev **rsdev) { - struct intel_svm_dev *d, *sdev = NULL; + struct intel_svm_dev *sdev = NULL; struct intel_svm *svm; /* The caller should hold the pasid_mutex lock */ @@ -224,7 +298,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, if (pasid == INVALID_IOASID || pasid >= PASID_MAX) return -EINVAL; - svm = ioasid_find(NULL, pasid, NULL); + svm = pasid_private_find(pasid); if (IS_ERR(svm)) return PTR_ERR(svm); @@ -237,15 +311,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, */ if (WARN_ON(list_empty(&svm->devs))) return -EINVAL; - - rcu_read_lock(); - list_for_each_entry_rcu(d, &svm->devs, list) { - if (d->dev == dev) { - sdev = d; - break; - } - } - rcu_read_unlock(); + sdev = svm_lookup_device_by_dev(svm, dev); out: *rsvm = svm; @@ -334,7 +400,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, svm->gpasid = data->gpasid; svm->flags |= SVM_FLAG_GUEST_PASID; } - ioasid_set_data(data->hpasid, svm); + pasid_private_add(data->hpasid, svm); INIT_LIST_HEAD_RCU(&svm->devs); mmput(svm->mm); } @@ -388,7 +454,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, list_add_rcu(&sdev->list, &svm->devs); out: if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) { - ioasid_set_data(data->hpasid, NULL); + pasid_private_remove(data->hpasid); kfree(svm); } @@ -431,7 +497,7 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid) * the unbind, IOMMU driver will get notified * and perform cleanup. */ - ioasid_set_data(pasid, NULL); + pasid_private_remove(pasid); kfree(svm); } } @@ -459,79 +525,81 @@ static void load_pasid(struct mm_struct *mm, u32 pasid) mutex_unlock(&mm->context.lock); } -/* Caller must hold pasid_mutex, mm reference */ -static int -intel_svm_bind_mm(struct device *dev, unsigned int flags, - struct mm_struct *mm, struct intel_svm_dev **sd) +static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, + unsigned int flags) { - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); - struct intel_svm *svm = NULL, *t; - struct device_domain_info *info; - struct intel_svm_dev *sdev; - unsigned long iflags; - int pasid_max; - int ret; + ioasid_t max_pasid = dev_is_pci(dev) ? + pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id; - if (!iommu || dmar_disabled) - return -EINVAL; + return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); +} - if (!intel_svm_capable(iommu)) - return -ENOTSUPP; +static void intel_svm_free_pasid(struct mm_struct *mm) +{ + iommu_sva_free_pasid(mm); +} - if (dev_is_pci(dev)) { - pasid_max = pci_max_pasids(to_pci_dev(dev)); - if (pasid_max < 0) - return -EINVAL; - } else - pasid_max = 1 << 20; +static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, + struct device *dev, + struct mm_struct *mm, + unsigned int flags) +{ + struct device_domain_info *info = get_domain_info(dev); + unsigned long iflags, sflags; + struct intel_svm_dev *sdev; + struct intel_svm *svm; + int ret = 0; - /* Bind supervisor PASID shuld have mm = NULL */ - if (flags & SVM_FLAG_SUPERVISOR_MODE) { - if (!ecap_srs(iommu->ecap) || mm) { - pr_err("Supervisor PASID with user provided mm.\n"); - return -EINVAL; - } - } + svm = pasid_private_find(mm->pasid); + if (!svm) { + svm = kzalloc(sizeof(*svm), GFP_KERNEL); + if (!svm) + return ERR_PTR(-ENOMEM); - list_for_each_entry(t, &global_svm_list, list) { - if (t->mm != mm) - continue; + svm->pasid = mm->pasid; + svm->mm = mm; + svm->flags = flags; + INIT_LIST_HEAD_RCU(&svm->devs); - svm = t; - if (svm->pasid >= pasid_max) { - dev_warn(dev, - "Limited PASID width. Cannot use existing PASID %d\n", - svm->pasid); - ret = -ENOSPC; - goto out; + if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) { + svm->notifier.ops = &intel_mmuops; + ret = mmu_notifier_register(&svm->notifier, mm); + if (ret) { + kfree(svm); + return ERR_PTR(ret); + } } - /* Find the matching device in svm list */ - for_each_svm_dev(sdev, svm, dev) { - sdev->users++; - goto success; + ret = pasid_private_add(svm->pasid, svm); + if (ret) { + if (svm->notifier.ops) + mmu_notifier_unregister(&svm->notifier, mm); + kfree(svm); + return ERR_PTR(ret); } + } - break; + /* Find the matching device in svm list */ + sdev = svm_lookup_device_by_dev(svm, dev); + if (sdev) { + sdev->users++; + goto success; } sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); if (!sdev) { ret = -ENOMEM; - goto out; + goto free_svm; } + sdev->dev = dev; sdev->iommu = iommu; - - ret = intel_iommu_enable_pasid(iommu, dev); - if (ret) { - kfree(sdev); - goto out; - } - - info = get_domain_info(dev); sdev->did = FLPT_DEFAULT_DID; sdev->sid = PCI_DEVID(info->bus, info->devfn); + sdev->users = 1; + sdev->pasid = svm->pasid; + sdev->sva.dev = dev; + init_rcu_head(&sdev->rcu); if (info->ats_enabled) { sdev->dev_iotlb = 1; sdev->qdep = info->ats_qdep; @@ -539,95 +607,37 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, sdev->qdep = 0; } - /* Finish the setup now we know we're keeping it */ - sdev->users = 1; - init_rcu_head(&sdev->rcu); - - if (!svm) { - svm = kzalloc(sizeof(*svm), GFP_KERNEL); - if (!svm) { - ret = -ENOMEM; - kfree(sdev); - goto out; - } - - if (pasid_max > intel_pasid_max_id) - pasid_max = intel_pasid_max_id; + /* Setup the pasid table: */ + sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ? + PASID_FLAG_SUPERVISOR_MODE : 0; + sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; + spin_lock_irqsave(&iommu->lock, iflags); + ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, + FLPT_DEFAULT_DID, sflags); + spin_unlock_irqrestore(&iommu->lock, iflags); - /* Do not use PASID 0, reserved for RID to PASID */ - svm->pasid = ioasid_alloc(NULL, PASID_MIN, - pasid_max - 1, svm); - if (svm->pasid == INVALID_IOASID) { - kfree(svm); - kfree(sdev); - ret = -ENOSPC; - goto out; - } - svm->notifier.ops = &intel_mmuops; - svm->mm = mm; - svm->flags = flags; - INIT_LIST_HEAD_RCU(&svm->devs); - INIT_LIST_HEAD(&svm->list); - ret = -ENOMEM; - if (mm) { - ret = mmu_notifier_register(&svm->notifier, mm); - if (ret) { - ioasid_put(svm->pasid); - kfree(svm); - kfree(sdev); - goto out; - } - } + if (ret) + goto free_sdev; - spin_lock_irqsave(&iommu->lock, iflags); - ret = intel_pasid_setup_first_level(iommu, dev, - mm ? mm->pgd : init_mm.pgd, - svm->pasid, FLPT_DEFAULT_DID, - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | - (cpu_feature_enabled(X86_FEATURE_LA57) ? - PASID_FLAG_FL5LP : 0)); - spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) { - if (mm) - mmu_notifier_unregister(&svm->notifier, mm); - ioasid_put(svm->pasid); - kfree(svm); - kfree(sdev); - goto out; - } + /* The newly allocated pasid is loaded to the mm. */ + if (!(flags & SVM_FLAG_SUPERVISOR_MODE) && list_empty(&svm->devs)) + load_pasid(mm, svm->pasid); - list_add_tail(&svm->list, &global_svm_list); - if (mm) { - /* The newly allocated pasid is loaded to the mm. */ - load_pasid(mm, svm->pasid); - } - } else { - /* - * Binding a new device with existing PASID, need to setup - * the PASID entry. - */ - spin_lock_irqsave(&iommu->lock, iflags); - ret = intel_pasid_setup_first_level(iommu, dev, - mm ? mm->pgd : init_mm.pgd, - svm->pasid, FLPT_DEFAULT_DID, - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | - (cpu_feature_enabled(X86_FEATURE_LA57) ? - PASID_FLAG_FL5LP : 0)); - spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) { - kfree(sdev); - goto out; - } - } list_add_rcu(&sdev->list, &svm->devs); success: - sdev->pasid = svm->pasid; - sdev->sva.dev = dev; - if (sd) - *sd = sdev; - ret = 0; -out: - return ret; + return &sdev->sva; + +free_sdev: + kfree(sdev); +free_svm: + if (list_empty(&svm->devs)) { + if (svm->notifier.ops) + mmu_notifier_unregister(&svm->notifier, mm); + pasid_private_remove(mm->pasid); + kfree(svm); + } + + return ERR_PTR(ret); } /* Caller must hold pasid_mutex */ @@ -636,6 +646,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) struct intel_svm_dev *sdev; struct intel_iommu *iommu; struct intel_svm *svm; + struct mm_struct *mm; int ret = -EINVAL; iommu = device_to_iommu(dev, NULL, NULL); @@ -645,6 +656,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); if (ret) goto out; + mm = svm->mm; if (sdev) { sdev->users--; @@ -663,13 +675,13 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { - ioasid_put(svm->pasid); - if (svm->mm) { - mmu_notifier_unregister(&svm->notifier, svm->mm); + intel_svm_free_pasid(mm); + if (svm->notifier.ops) { + mmu_notifier_unregister(&svm->notifier, mm); /* Clear mm's pasid. */ - load_pasid(svm->mm, PASID_DISABLED); + load_pasid(mm, PASID_DISABLED); } - list_del(&svm->list); + pasid_private_remove(svm->pasid); /* We mandate that no page faults may be outstanding * for the PASID when intel_svm_unbind_mm() is called. * If that is not obeyed, subtle errors will happen. @@ -714,22 +726,6 @@ struct page_req_dsc { #define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) -static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req) -{ - unsigned long requested = 0; - - if (req->exe_req) - requested |= VM_EXEC; - - if (req->rd_req) - requested |= VM_READ; - - if (req->wr_req) - requested |= VM_WRITE; - - return (requested & ~vma->vm_flags) != 0; -} - static bool is_canonical_address(u64 addr) { int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); @@ -799,6 +795,8 @@ prq_retry: goto prq_retry; } + iopf_queue_flush_dev(dev); + /* * Perform steps described in VT-d spec CH7.10 to drain page * requests and responses in hardware. @@ -841,8 +839,8 @@ static int prq_to_iommu_prot(struct page_req_dsc *req) return prot; } -static int -intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc) +static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, + struct page_req_dsc *desc) { struct iommu_fault_event event; @@ -872,159 +870,136 @@ intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc) */ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; - memcpy(event.fault.prm.private_data, desc->priv_data, - sizeof(desc->priv_data)); + event.fault.prm.private_data[0] = desc->priv_data[0]; + event.fault.prm.private_data[1] = desc->priv_data[1]; + } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) { + /* + * If the private data fields are not used by hardware, use it + * to monitor the prq handle latency. + */ + event.fault.prm.private_data[0] = ktime_to_ns(ktime_get()); } return iommu_report_device_fault(dev, &event); } +static void handle_bad_prq_event(struct intel_iommu *iommu, + struct page_req_dsc *req, int result) +{ + struct qi_desc desc; + + pr_err("%s: Invalid page request: %08llx %08llx\n", + iommu->name, ((unsigned long long *)req)[0], + ((unsigned long long *)req)[1]); + + /* + * Per VT-d spec. v3.0 ch7.7, system software must + * respond with page group response if private data + * is present (PDP) or last page in group (LPIG) bit + * is set. This is an additional VT-d feature beyond + * PCI ATS spec. + */ + if (!req->lpig && !req->priv_data_present) + return; + + desc.qw0 = QI_PGRP_PASID(req->pasid) | + QI_PGRP_DID(req->rid) | + QI_PGRP_PASID_P(req->pasid_present) | + QI_PGRP_PDP(req->priv_data_present) | + QI_PGRP_RESP_CODE(result) | + QI_PGRP_RESP_TYPE; + desc.qw1 = QI_PGRP_IDX(req->prg_index) | + QI_PGRP_LPIG(req->lpig); + + if (req->priv_data_present) { + desc.qw2 = req->priv_data[0]; + desc.qw3 = req->priv_data[1]; + } else { + desc.qw2 = 0; + desc.qw3 = 0; + } + + qi_submit_sync(iommu, &desc, 1, 0); +} + static irqreturn_t prq_event_thread(int irq, void *d) { struct intel_svm_dev *sdev = NULL; struct intel_iommu *iommu = d; struct intel_svm *svm = NULL; - int head, tail, handled = 0; - unsigned int flags = 0; + struct page_req_dsc *req; + int head, tail, handled; + u64 address; - /* Clear PPR bit before reading head/tail registers, to - * ensure that we get a new interrupt if needed. */ + /* + * Clear PPR bit before reading head/tail registers, to ensure that + * we get a new interrupt if needed. + */ writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG); tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; + handled = (head != tail); while (head != tail) { - struct vm_area_struct *vma; - struct page_req_dsc *req; - struct qi_desc resp; - int result; - vm_fault_t ret; - u64 address; - - handled = 1; req = &iommu->prq[head / sizeof(*req)]; - result = QI_RESP_INVALID; address = (u64)req->addr << VTD_PAGE_SHIFT; - if (!req->pasid_present) { - pr_err("%s: Page request without PASID: %08llx %08llx\n", - iommu->name, ((unsigned long long *)req)[0], - ((unsigned long long *)req)[1]); - goto no_pasid; + + if (unlikely(!req->pasid_present)) { + pr_err("IOMMU: %s: Page request without PASID\n", + iommu->name); +bad_req: + svm = NULL; + sdev = NULL; + handle_bad_prq_event(iommu, req, QI_RESP_INVALID); + goto prq_advance; } - /* We shall not receive page request for supervisor SVM */ - if (req->pm_req && (req->rd_req | req->wr_req)) { - pr_err("Unexpected page request in Privilege Mode"); - /* No need to find the matching sdev as for bad_req */ - goto no_pasid; + + if (unlikely(!is_canonical_address(address))) { + pr_err("IOMMU: %s: Address is not canonical\n", + iommu->name); + goto bad_req; } - /* DMA read with exec requeset is not supported. */ - if (req->exe_req && req->rd_req) { - pr_err("Execution request not supported\n"); - goto no_pasid; + + if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) { + pr_err("IOMMU: %s: Page request in Privilege Mode\n", + iommu->name); + goto bad_req; } + + if (unlikely(req->exe_req && req->rd_req)) { + pr_err("IOMMU: %s: Execution request not supported\n", + iommu->name); + goto bad_req; + } + if (!svm || svm->pasid != req->pasid) { - rcu_read_lock(); - svm = ioasid_find(NULL, req->pasid, NULL); - /* It *can't* go away, because the driver is not permitted + /* + * It can't go away, because the driver is not permitted * to unbind the mm while any page faults are outstanding. - * So we only need RCU to protect the internal idr code. */ - rcu_read_unlock(); - if (IS_ERR_OR_NULL(svm)) { - pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n", - iommu->name, req->pasid, ((unsigned long long *)req)[0], - ((unsigned long long *)req)[1]); - goto no_pasid; - } + */ + svm = pasid_private_find(req->pasid); + if (IS_ERR_OR_NULL(svm) || (svm->flags & SVM_FLAG_SUPERVISOR_MODE)) + goto bad_req; } if (!sdev || sdev->sid != req->rid) { - struct intel_svm_dev *t; - - sdev = NULL; - rcu_read_lock(); - list_for_each_entry_rcu(t, &svm->devs, list) { - if (t->sid == req->rid) { - sdev = t; - break; - } - } - rcu_read_unlock(); + sdev = svm_lookup_device_by_sid(svm, req->rid); + if (!sdev) + goto bad_req; } - /* Since we're using init_mm.pgd directly, we should never take - * any faults on kernel addresses. */ - if (!svm->mm) - goto bad_req; - - /* If address is not canonical, return invalid response */ - if (!is_canonical_address(address)) - goto bad_req; + sdev->prq_seq_number++; /* * If prq is to be handled outside iommu driver via receiver of * the fault notifiers, we skip the page response here. */ - if (svm->flags & SVM_FLAG_GUEST_MODE) { - if (sdev && !intel_svm_prq_report(sdev->dev, req)) - goto prq_advance; - else - goto bad_req; - } - - /* If the mm is already defunct, don't handle faults. */ - if (!mmget_not_zero(svm->mm)) - goto bad_req; - - mmap_read_lock(svm->mm); - vma = find_extend_vma(svm->mm, address); - if (!vma || address < vma->vm_start) - goto invalid; - - if (access_error(vma, req)) - goto invalid; + if (intel_svm_prq_report(iommu, sdev->dev, req)) + handle_bad_prq_event(iommu, req, QI_RESP_INVALID); - flags = FAULT_FLAG_USER | FAULT_FLAG_REMOTE; - if (req->wr_req) - flags |= FAULT_FLAG_WRITE; - - ret = handle_mm_fault(vma, address, flags, NULL); - if (ret & VM_FAULT_ERROR) - goto invalid; - - result = QI_RESP_SUCCESS; -invalid: - mmap_read_unlock(svm->mm); - mmput(svm->mm); -bad_req: - /* We get here in the error case where the PASID lookup failed, - and these can be NULL. Do not use them below this point! */ - sdev = NULL; - svm = NULL; -no_pasid: - if (req->lpig || req->priv_data_present) { - /* - * Per VT-d spec. v3.0 ch7.7, system software must - * respond with page group response if private data - * is present (PDP) or last page in group (LPIG) bit - * is set. This is an additional VT-d feature beyond - * PCI ATS spec. - */ - resp.qw0 = QI_PGRP_PASID(req->pasid) | - QI_PGRP_DID(req->rid) | - QI_PGRP_PASID_P(req->pasid_present) | - QI_PGRP_PDP(req->priv_data_present) | - QI_PGRP_RESP_CODE(result) | - QI_PGRP_RESP_TYPE; - resp.qw1 = QI_PGRP_IDX(req->prg_index) | - QI_PGRP_LPIG(req->lpig); - resp.qw2 = 0; - resp.qw3 = 0; - - if (req->priv_data_present) - memcpy(&resp.qw2, req->priv_data, - sizeof(req->priv_data)); - qi_submit_sync(iommu, &resp, 1, 0); - } + trace_prq_report(iommu, sdev->dev, req->qw_0, req->qw_1, + req->priv_data[0], req->priv_data[1], + sdev->prq_seq_number); prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; } @@ -1041,6 +1016,7 @@ prq_advance: head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; if (head == tail) { + iopf_queue_discard_partial(iommu->iopf_queue); writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared", iommu->name); @@ -1053,31 +1029,42 @@ prq_advance: return IRQ_RETVAL(handled); } -#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) -struct iommu_sva * -intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) +struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) { - struct iommu_sva *sva = ERR_PTR(-EINVAL); - struct intel_svm_dev *sdev = NULL; + struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); unsigned int flags = 0; + struct iommu_sva *sva; int ret; - /* - * TODO: Consolidate with generic iommu-sva bind after it is merged. - * It will require shared SVM data structures, i.e. combine io_mm - * and intel_svm etc. - */ if (drvdata) flags = *(unsigned int *)drvdata; + + if (flags & SVM_FLAG_SUPERVISOR_MODE) { + if (!ecap_srs(iommu->ecap)) { + dev_err(dev, "%s: Supervisor PASID not supported\n", + iommu->name); + return ERR_PTR(-EOPNOTSUPP); + } + + if (mm) { + dev_err(dev, "%s: Supervisor PASID with user provided mm\n", + iommu->name); + return ERR_PTR(-EINVAL); + } + + mm = &init_mm; + } + mutex_lock(&pasid_mutex); - ret = intel_svm_bind_mm(dev, flags, mm, &sdev); - if (ret) - sva = ERR_PTR(ret); - else if (sdev) - sva = &sdev->sva; - else - WARN(!sdev, "SVM bind succeeded with no sdev!\n"); + ret = intel_svm_alloc_pasid(dev, mm, flags); + if (ret) { + mutex_unlock(&pasid_mutex); + return ERR_PTR(ret); + } + sva = intel_svm_bind_mm(iommu, dev, mm, flags); + if (IS_ERR_OR_NULL(sva)) + intel_svm_free_pasid(mm); mutex_unlock(&pasid_mutex); return sva; @@ -1085,10 +1072,9 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) void intel_svm_unbind(struct iommu_sva *sva) { - struct intel_svm_dev *sdev; + struct intel_svm_dev *sdev = to_intel_svm_dev(sva); mutex_lock(&pasid_mutex); - sdev = to_intel_svm_dev(sva); intel_svm_unbind_mm(sdev->dev, sdev->pasid); mutex_unlock(&pasid_mutex); } @@ -1194,9 +1180,14 @@ int intel_svm_page_response(struct device *dev, desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page); desc.qw2 = 0; desc.qw3 = 0; - if (private_present) - memcpy(&desc.qw2, prm->private_data, - sizeof(prm->private_data)); + + if (private_present) { + desc.qw2 = prm->private_data[0]; + desc.qw3 = prm->private_data[1]; + } else if (prm->private_data[0]) { + dmar_latency_update(iommu, DMAR_LATENCY_PRQ, + ktime_to_ns(ktime_get()) - prm->private_data[0]); + } qi_submit_sync(iommu, &desc, 1, 0); } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 808ab70d5df5..5419c4b9f27a 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3059,9 +3059,6 @@ static int iommu_change_dev_def_domain(struct iommu_group *group, int ret, dev_def_dom; struct device *dev; - if (!group) - return -EINVAL; - mutex_lock(&group->mutex); if (group->default_domain != group->domain) { diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index b7ecd5b08039..b6cf5f16123b 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -412,12 +412,11 @@ private_find_iova(struct iova_domain *iovad, unsigned long pfn) return NULL; } -static void private_free_iova(struct iova_domain *iovad, struct iova *iova) +static void remove_iova(struct iova_domain *iovad, struct iova *iova) { assert_spin_locked(&iovad->iova_rbtree_lock); __cached_rbnode_delete_update(iovad, iova); rb_erase(&iova->node, &iovad->rbroot); - free_iova_mem(iova); } /** @@ -452,8 +451,9 @@ __free_iova(struct iova_domain *iovad, struct iova *iova) unsigned long flags; spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - private_free_iova(iovad, iova); + remove_iova(iovad, iova); spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + free_iova_mem(iova); } EXPORT_SYMBOL_GPL(__free_iova); @@ -472,10 +472,13 @@ free_iova(struct iova_domain *iovad, unsigned long pfn) spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); iova = private_find_iova(iovad, pfn); - if (iova) - private_free_iova(iovad, iova); + if (!iova) { + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return; + } + remove_iova(iovad, iova); spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - + free_iova_mem(iova); } EXPORT_SYMBOL_GPL(free_iova); @@ -825,7 +828,8 @@ iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad) if (WARN_ON(!iova)) continue; - private_free_iova(iovad, iova); + remove_iova(iovad, iova); + free_iova_mem(iova); } spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index aaa6a4d59057..51ea6f00db2f 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -19,7 +19,6 @@ #include <linux/iommu.h> #include <linux/of.h> #include <linux/of_device.h> -#include <linux/of_iommu.h> #include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/sizes.h> diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 7880f307cb2d..3a38352b603f 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -18,7 +18,6 @@ #include <linux/iommu.h> #include <linux/clk.h> #include <linux/err.h> -#include <linux/of_iommu.h> #include <asm/cacheflush.h> #include <linux/sizes.h> diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index e06b8a0e2b56..6f7c69688ce2 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -19,7 +19,6 @@ #include <linux/mfd/syscon.h> #include <linux/module.h> #include <linux/of_address.h> -#include <linux/of_iommu.h> #include <linux/of_irq.h> #include <linux/of_platform.h> #include <linux/platform_device.h> diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 5915d7b38211..778e66f5f1aa 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -22,7 +22,6 @@ #include <linux/list.h> #include <linux/module.h> #include <linux/of_address.h> -#include <linux/of_iommu.h> #include <linux/of_irq.h> #include <linux/of_platform.h> #include <linux/platform_device.h> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index a9d2df001149..5696314ae69e 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -19,74 +19,6 @@ #define NO_IOMMU 1 -/** - * of_get_dma_window - Parse *dma-window property and returns 0 if found. - * - * @dn: device node - * @prefix: prefix for property name if any - * @index: index to start to parse - * @busno: Returns busno if supported. Otherwise pass NULL - * @addr: Returns address that DMA starts - * @size: Returns the range that DMA can handle - * - * This supports different formats flexibly. "prefix" can be - * configured if any. "busno" and "index" are optionally - * specified. Set 0(or NULL) if not used. - */ -int of_get_dma_window(struct device_node *dn, const char *prefix, int index, - unsigned long *busno, dma_addr_t *addr, size_t *size) -{ - const __be32 *dma_window, *end; - int bytes, cur_index = 0; - char propname[NAME_MAX], addrname[NAME_MAX], sizename[NAME_MAX]; - - if (!dn || !addr || !size) - return -EINVAL; - - if (!prefix) - prefix = ""; - - snprintf(propname, sizeof(propname), "%sdma-window", prefix); - snprintf(addrname, sizeof(addrname), "%s#dma-address-cells", prefix); - snprintf(sizename, sizeof(sizename), "%s#dma-size-cells", prefix); - - dma_window = of_get_property(dn, propname, &bytes); - if (!dma_window) - return -ENODEV; - end = dma_window + bytes / sizeof(*dma_window); - - while (dma_window < end) { - u32 cells; - const void *prop; - - /* busno is one cell if supported */ - if (busno) - *busno = be32_to_cpup(dma_window++); - - prop = of_get_property(dn, addrname, NULL); - if (!prop) - prop = of_get_property(dn, "#address-cells", NULL); - - cells = prop ? be32_to_cpup(prop) : of_n_addr_cells(dn); - if (!cells) - return -EINVAL; - *addr = of_read_number(dma_window, cells); - dma_window += cells; - - prop = of_get_property(dn, sizename, NULL); - cells = prop ? be32_to_cpup(prop) : of_n_size_cells(dn); - if (!cells) - return -EINVAL; - *size = of_read_number(dma_window, cells); - dma_window += cells; - - if (cur_index++ == index) - break; - } - return 0; -} -EXPORT_SYMBOL_GPL(of_get_dma_window); - static int of_iommu_xlate(struct device *dev, struct of_phandle_args *iommu_spec) { diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 26e517eb0dd3..91749654fd49 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -22,7 +22,6 @@ #include <linux/io.h> #include <linux/pm_runtime.h> #include <linux/of.h> -#include <linux/of_iommu.h> #include <linux/of_irq.h> #include <linux/of_platform.h> #include <linux/regmap.h> diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 7a2932772fdf..94b9d8e5b9a4 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -21,7 +21,6 @@ #include <linux/mm.h> #include <linux/init.h> #include <linux/of.h> -#include <linux/of_iommu.h> #include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> @@ -96,6 +95,15 @@ static const char * const rk_iommu_clocks[] = { "aclk", "iface", }; +struct rk_iommu_ops { + phys_addr_t (*pt_address)(u32 dte); + u32 (*mk_dtentries)(dma_addr_t pt_dma); + u32 (*mk_ptentries)(phys_addr_t page, int prot); + phys_addr_t (*dte_addr_phys)(u32 addr); + u32 (*dma_addr_dte)(dma_addr_t dt_dma); + u64 dma_bit_mask; +}; + struct rk_iommu { struct device *dev; void __iomem **bases; @@ -116,6 +124,7 @@ struct rk_iommudata { }; static struct device *dma_dev; +static const struct rk_iommu_ops *rk_ops; static inline void rk_table_flush(struct rk_iommu_domain *dom, dma_addr_t dma, unsigned int count) @@ -179,6 +188,33 @@ static inline phys_addr_t rk_dte_pt_address(u32 dte) return (phys_addr_t)dte & RK_DTE_PT_ADDRESS_MASK; } +/* + * In v2: + * 31:12 - PT address bit 31:0 + * 11: 8 - PT address bit 35:32 + * 7: 4 - PT address bit 39:36 + * 3: 1 - Reserved + * 0 - 1 if PT @ PT address is valid + */ +#define RK_DTE_PT_ADDRESS_MASK_V2 GENMASK_ULL(31, 4) +#define DTE_HI_MASK1 GENMASK(11, 8) +#define DTE_HI_MASK2 GENMASK(7, 4) +#define DTE_HI_SHIFT1 24 /* shift bit 8 to bit 32 */ +#define DTE_HI_SHIFT2 32 /* shift bit 4 to bit 36 */ +#define PAGE_DESC_HI_MASK1 GENMASK_ULL(39, 36) +#define PAGE_DESC_HI_MASK2 GENMASK_ULL(35, 32) + +static inline phys_addr_t rk_dte_pt_address_v2(u32 dte) +{ + u64 dte_v2 = dte; + + dte_v2 = ((dte_v2 & DTE_HI_MASK2) << DTE_HI_SHIFT2) | + ((dte_v2 & DTE_HI_MASK1) << DTE_HI_SHIFT1) | + (dte_v2 & RK_DTE_PT_ADDRESS_MASK); + + return (phys_addr_t)dte_v2; +} + static inline bool rk_dte_is_pt_valid(u32 dte) { return dte & RK_DTE_PT_VALID; @@ -189,6 +225,15 @@ static inline u32 rk_mk_dte(dma_addr_t pt_dma) return (pt_dma & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID; } +static inline u32 rk_mk_dte_v2(dma_addr_t pt_dma) +{ + pt_dma = (pt_dma & RK_DTE_PT_ADDRESS_MASK) | + ((pt_dma & PAGE_DESC_HI_MASK1) >> DTE_HI_SHIFT1) | + (pt_dma & PAGE_DESC_HI_MASK2) >> DTE_HI_SHIFT2; + + return (pt_dma & RK_DTE_PT_ADDRESS_MASK_V2) | RK_DTE_PT_VALID; +} + /* * Each PTE has a Page address, some flags and a valid bit: * +---------------------+---+-------+-+ @@ -215,11 +260,6 @@ static inline u32 rk_mk_dte(dma_addr_t pt_dma) #define RK_PTE_PAGE_READABLE BIT(1) #define RK_PTE_PAGE_VALID BIT(0) -static inline phys_addr_t rk_pte_page_address(u32 pte) -{ - return (phys_addr_t)pte & RK_PTE_PAGE_ADDRESS_MASK; -} - static inline bool rk_pte_is_page_valid(u32 pte) { return pte & RK_PTE_PAGE_VALID; @@ -235,6 +275,29 @@ static u32 rk_mk_pte(phys_addr_t page, int prot) return page | flags | RK_PTE_PAGE_VALID; } +/* + * In v2: + * 31:12 - Page address bit 31:0 + * 11:9 - Page address bit 34:32 + * 8:4 - Page address bit 39:35 + * 3 - Security + * 2 - Readable + * 1 - Writable + * 0 - 1 if Page @ Page address is valid + */ +#define RK_PTE_PAGE_READABLE_V2 BIT(2) +#define RK_PTE_PAGE_WRITABLE_V2 BIT(1) + +static u32 rk_mk_pte_v2(phys_addr_t page, int prot) +{ + u32 flags = 0; + + flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE_V2 : 0; + flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE_V2 : 0; + + return rk_mk_dte_v2(page) | flags; +} + static u32 rk_mk_pte_invalid(u32 pte) { return pte & ~RK_PTE_PAGE_VALID; @@ -448,10 +511,10 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu) * and verifying that upper 5 nybbles are read back. */ for (i = 0; i < iommu->num_mmu; i++) { - rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, DTE_ADDR_DUMMY); + dte_addr = rk_ops->pt_address(DTE_ADDR_DUMMY); + rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, dte_addr); - dte_addr = rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR); - if (dte_addr != (DTE_ADDR_DUMMY & RK_DTE_PT_ADDRESS_MASK)) { + if (dte_addr != rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR)) { dev_err(iommu->dev, "Error during raw reset. MMU_DTE_ADDR is not functioning\n"); return -EFAULT; } @@ -470,6 +533,31 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu) return 0; } +static inline phys_addr_t rk_dte_addr_phys(u32 addr) +{ + return (phys_addr_t)addr; +} + +static inline u32 rk_dma_addr_dte(dma_addr_t dt_dma) +{ + return dt_dma; +} + +#define DT_HI_MASK GENMASK_ULL(39, 32) +#define DT_SHIFT 28 + +static inline phys_addr_t rk_dte_addr_phys_v2(u32 addr) +{ + return (phys_addr_t)(addr & RK_DTE_PT_ADDRESS_MASK) | + ((addr & DT_HI_MASK) << DT_SHIFT); +} + +static inline u32 rk_dma_addr_dte_v2(dma_addr_t dt_dma) +{ + return (dt_dma & RK_DTE_PT_ADDRESS_MASK) | + ((dt_dma & DT_HI_MASK) >> DT_SHIFT); +} + static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova) { void __iomem *base = iommu->bases[index]; @@ -489,7 +577,7 @@ static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova) page_offset = rk_iova_page_offset(iova); mmu_dte_addr = rk_iommu_read(base, RK_MMU_DTE_ADDR); - mmu_dte_addr_phys = (phys_addr_t)mmu_dte_addr; + mmu_dte_addr_phys = rk_ops->dte_addr_phys(mmu_dte_addr); dte_addr_phys = mmu_dte_addr_phys + (4 * dte_index); dte_addr = phys_to_virt(dte_addr_phys); @@ -498,14 +586,14 @@ static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova) if (!rk_dte_is_pt_valid(dte)) goto print_it; - pte_addr_phys = rk_dte_pt_address(dte) + (pte_index * 4); + pte_addr_phys = rk_ops->pt_address(dte) + (pte_index * 4); pte_addr = phys_to_virt(pte_addr_phys); pte = *pte_addr; if (!rk_pte_is_page_valid(pte)) goto print_it; - page_addr_phys = rk_pte_page_address(pte) + page_offset; + page_addr_phys = rk_ops->pt_address(pte) + page_offset; page_flags = pte & RK_PTE_PAGE_FLAGS_MASK; print_it: @@ -601,13 +689,13 @@ static phys_addr_t rk_iommu_iova_to_phys(struct iommu_domain *domain, if (!rk_dte_is_pt_valid(dte)) goto out; - pt_phys = rk_dte_pt_address(dte); + pt_phys = rk_ops->pt_address(dte); page_table = (u32 *)phys_to_virt(pt_phys); pte = page_table[rk_iova_pte_index(iova)]; if (!rk_pte_is_page_valid(pte)) goto out; - phys = rk_pte_page_address(pte) + rk_iova_page_offset(iova); + phys = rk_ops->pt_address(pte) + rk_iova_page_offset(iova); out: spin_unlock_irqrestore(&rk_domain->dt_lock, flags); @@ -679,14 +767,13 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain, return ERR_PTR(-ENOMEM); } - dte = rk_mk_dte(pt_dma); + dte = rk_ops->mk_dtentries(pt_dma); *dte_addr = dte; - rk_table_flush(rk_domain, pt_dma, NUM_PT_ENTRIES); rk_table_flush(rk_domain, rk_domain->dt_dma + dte_index * sizeof(u32), 1); done: - pt_phys = rk_dte_pt_address(dte); + pt_phys = rk_ops->pt_address(dte); return (u32 *)phys_to_virt(pt_phys); } @@ -728,7 +815,7 @@ static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr, if (rk_pte_is_page_valid(pte)) goto unwind; - pte_addr[pte_count] = rk_mk_pte(paddr, prot); + pte_addr[pte_count] = rk_ops->mk_ptentries(paddr, prot); paddr += SPAGE_SIZE; } @@ -750,7 +837,7 @@ unwind: pte_count * SPAGE_SIZE); iova += pte_count * SPAGE_SIZE; - page_phys = rk_pte_page_address(pte_addr[pte_count]); + page_phys = rk_ops->pt_address(pte_addr[pte_count]); pr_err("iova: %pad already mapped to %pa cannot remap to phys: %pa prot: %#x\n", &iova, &page_phys, &paddr, prot); @@ -785,7 +872,8 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova, dte_index = rk_domain->dt[rk_iova_dte_index(iova)]; pte_index = rk_iova_pte_index(iova); pte_addr = &page_table[pte_index]; - pte_dma = rk_dte_pt_address(dte_index) + pte_index * sizeof(u32); + + pte_dma = rk_ops->pt_address(dte_index) + pte_index * sizeof(u32); ret = rk_iommu_map_iova(rk_domain, pte_addr, pte_dma, iova, paddr, size, prot); @@ -821,7 +909,7 @@ static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova, return 0; } - pt_phys = rk_dte_pt_address(dte); + pt_phys = rk_ops->pt_address(dte); pte_addr = (u32 *)phys_to_virt(pt_phys) + rk_iova_pte_index(iova); pte_dma = pt_phys + rk_iova_pte_index(iova) * sizeof(u32); unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, pte_dma, size); @@ -879,7 +967,7 @@ static int rk_iommu_enable(struct rk_iommu *iommu) for (i = 0; i < iommu->num_mmu; i++) { rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, - rk_domain->dt_dma); + rk_ops->dma_addr_dte(rk_domain->dt_dma)); rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE); rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK); } @@ -1004,8 +1092,6 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) goto err_free_dt; } - rk_table_flush(rk_domain, rk_domain->dt_dma, NUM_DT_ENTRIES); - spin_lock_init(&rk_domain->iommus_lock); spin_lock_init(&rk_domain->dt_lock); INIT_LIST_HEAD(&rk_domain->iommus); @@ -1037,7 +1123,7 @@ static void rk_iommu_domain_free(struct iommu_domain *domain) for (i = 0; i < NUM_DT_ENTRIES; i++) { u32 dte = rk_domain->dt[i]; if (rk_dte_is_pt_valid(dte)) { - phys_addr_t pt_phys = rk_dte_pt_address(dte); + phys_addr_t pt_phys = rk_ops->pt_address(dte); u32 *page_table = phys_to_virt(pt_phys); dma_unmap_single(dma_dev, pt_phys, SPAGE_SIZE, DMA_TO_DEVICE); @@ -1127,6 +1213,7 @@ static int rk_iommu_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct rk_iommu *iommu; struct resource *res; + const struct rk_iommu_ops *ops; int num_res = pdev->num_resources; int err, i; @@ -1138,6 +1225,17 @@ static int rk_iommu_probe(struct platform_device *pdev) iommu->dev = dev; iommu->num_mmu = 0; + ops = of_device_get_match_data(dev); + if (!rk_ops) + rk_ops = ops; + + /* + * That should not happen unless different versions of the + * hardware block are embedded the same SoC + */ + if (WARN_ON(rk_ops != ops)) + return -EINVAL; + iommu->bases = devm_kcalloc(dev, num_res, sizeof(*iommu->bases), GFP_KERNEL); if (!iommu->bases) @@ -1226,6 +1324,8 @@ static int rk_iommu_probe(struct platform_device *pdev) } } + dma_set_mask_and_coherent(dev, rk_ops->dma_bit_mask); + return 0; err_remove_sysfs: iommu_device_sysfs_remove(&iommu->iommu); @@ -1277,8 +1377,31 @@ static const struct dev_pm_ops rk_iommu_pm_ops = { pm_runtime_force_resume) }; +static struct rk_iommu_ops iommu_data_ops_v1 = { + .pt_address = &rk_dte_pt_address, + .mk_dtentries = &rk_mk_dte, + .mk_ptentries = &rk_mk_pte, + .dte_addr_phys = &rk_dte_addr_phys, + .dma_addr_dte = &rk_dma_addr_dte, + .dma_bit_mask = DMA_BIT_MASK(32), +}; + +static struct rk_iommu_ops iommu_data_ops_v2 = { + .pt_address = &rk_dte_pt_address_v2, + .mk_dtentries = &rk_mk_dte_v2, + .mk_ptentries = &rk_mk_pte_v2, + .dte_addr_phys = &rk_dte_addr_phys_v2, + .dma_addr_dte = &rk_dma_addr_dte_v2, + .dma_bit_mask = DMA_BIT_MASK(40), +}; + static const struct of_device_id rk_iommu_dt_ids[] = { - { .compatible = "rockchip,iommu" }, + { .compatible = "rockchip,iommu", + .data = &iommu_data_ops_v1, + }, + { .compatible = "rockchip,rk3568-iommu", + .data = &iommu_data_ops_v2, + }, { /* sentinel */ } }; diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index c6e5ee4d9cef..6abdcab7273b 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -10,11 +10,11 @@ #include <linux/amba/bus.h> #include <linux/delay.h> #include <linux/dma-iommu.h> +#include <linux/dma-map-ops.h> #include <linux/freezer.h> #include <linux/interval_tree.h> #include <linux/iommu.h> #include <linux/module.h> -#include <linux/of_iommu.h> #include <linux/of_platform.h> #include <linux/pci.h> #include <linux/platform_device.h> @@ -904,6 +904,15 @@ err_free_dev: return ERR_PTR(ret); } +static void viommu_probe_finalize(struct device *dev) +{ +#ifndef CONFIG_ARCH_HAS_SETUP_DMA_OPS + /* First clear the DMA ops in case we're switching from a DMA domain */ + set_dma_ops(dev, NULL); + iommu_setup_dma_ops(dev, 0, U64_MAX); +#endif +} + static void viommu_release_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); @@ -940,6 +949,7 @@ static struct iommu_ops viommu_ops = { .iova_to_phys = viommu_iova_to_phys, .iotlb_sync = viommu_iotlb_sync, .probe_device = viommu_probe_device, + .probe_finalize = viommu_probe_finalize, .release_device = viommu_release_device, .device_group = viommu_device_group, .get_resv_regions = viommu_get_resv_regions, diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 25d448f5af91..74afbb7a4f5e 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -17,7 +17,6 @@ #include <linux/slab.h> #include <linux/of_address.h> #include <linux/of_device.h> -#include <linux/of_iommu.h> #include <linux/of_irq.h> #include <linux/of_platform.h> #include <linux/platform_device.h> diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 3a82faac5767..41f092a269f6 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -588,6 +588,9 @@ struct acpi_pci_root { bool acpi_dma_supported(struct acpi_device *adev); enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); +int acpi_iommu_fwspec_init(struct device *dev, u32 id, + struct fwnode_handle *fwnode, + const struct iommu_ops *ops); int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, u64 *size); int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c60745f657e9..7aaa9559cc19 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -259,9 +259,12 @@ void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa); #ifdef CONFIG_ARM64 void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa); +void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size); #else static inline void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { } +static inline void +acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) { } #endif int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 1a12baa58e40..f1f0842a2cb2 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -34,9 +34,8 @@ struct irq_domain *iort_get_device_domain(struct device *dev, u32 id, void acpi_configure_pmsi_domain(struct device *dev); int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ -void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size); -const struct iommu_ops *iort_iommu_configure_id(struct device *dev, - const u32 *id_in); +int iort_dma_get_ranges(struct device *dev, u64 *size); +int iort_iommu_configure_id(struct device *dev, const u32 *id_in); int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); phys_addr_t acpi_iort_dma_get_max_cpu_address(void); #else @@ -48,11 +47,10 @@ static inline struct irq_domain *iort_get_device_domain( { return NULL; } static inline void acpi_configure_pmsi_domain(struct device *dev) { } /* IOMMU interface */ -static inline void iort_dma_setup(struct device *dev, u64 *dma_addr, - u64 *size) { } -static inline const struct iommu_ops *iort_iommu_configure_id( - struct device *dev, const u32 *id_in) -{ return NULL; } +static inline int iort_dma_get_ranges(struct device *dev, u64 *size) +{ return -ENODEV; } +static inline int iort_iommu_configure_id(struct device *dev, const u32 *id_in) +{ return -ENODEV; } static inline int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) { return 0; } diff --git a/include/linux/acpi_viot.h b/include/linux/acpi_viot.h new file mode 100644 index 000000000000..1eb8ee5b0e5f --- /dev/null +++ b/include/linux/acpi_viot.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ACPI_VIOT_H__ +#define __ACPI_VIOT_H__ + +#include <linux/acpi.h> + +#ifdef CONFIG_ACPI_VIOT +void __init acpi_viot_init(void); +int viot_iommu_configure(struct device *dev); +#else +static inline void acpi_viot_init(void) {} +static inline int viot_iommu_configure(struct device *dev) +{ + return -ENODEV; +} +#endif + +#endif /* __ACPI_VIOT_H__ */ diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 6e75a2d689b4..758ca4694257 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -19,7 +19,7 @@ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); void iommu_put_dma_cookie(struct iommu_domain *domain); /* Setup call for arch DMA mapping code */ -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size); +void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit); /* The DMA API isn't _quite_ the whole story, though... */ /* @@ -50,7 +50,7 @@ struct msi_msg; struct device; static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, - u64 size) + u64 dma_limit) { } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 03faf20a6817..d0fa0b31994d 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -537,7 +537,7 @@ struct context_entry { struct dmar_domain { int nid; /* node id */ - unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED]; + unsigned int iommu_refcnt[DMAR_UNITS_SUPPORTED]; /* Refcount of devices per iommu */ @@ -546,7 +546,10 @@ struct dmar_domain { * domain ids are 16 bit wide according * to VT-d spec, section 9.3 */ - bool has_iotlb_device; + u8 has_iotlb_device: 1; + u8 iommu_coherency: 1; /* indicate coherency of iommu access */ + u8 iommu_snooping: 1; /* indicate snooping control feature */ + struct list_head devices; /* all devices' list */ struct list_head subdevices; /* all subdevices' list */ struct iova_domain iovad; /* iova's that belong to this domain */ @@ -558,10 +561,6 @@ struct dmar_domain { int agaw; int flags; /* flags to find out type of domain */ - - int iommu_coherency;/* indicate coherency of iommu access */ - int iommu_snooping; /* indicate snooping control feature*/ - int iommu_count; /* reference count of iommu */ int iommu_superpage;/* Level of superpages supported: 0 == 4KiB (no superpages), 1 == 2MiB, 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ @@ -606,6 +605,8 @@ struct intel_iommu { struct completion prq_complete; struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for PASIDs */ #endif + struct iopf_queue *iopf_queue; + unsigned char iopfq_name[16]; struct q_inval *qi; /* Queued invalidation info */ u32 *iommu_state; /* Store iommu states between suspend and resume.*/ @@ -619,6 +620,7 @@ struct intel_iommu { u32 flags; /* Software defined flags */ struct dmar_drhd_unit *drhd; + void *perf_statistic; }; /* Per subdevice private data */ @@ -776,6 +778,7 @@ struct intel_svm_dev { struct device *dev; struct intel_iommu *iommu; struct iommu_sva sva; + unsigned long prq_seq_number; u32 pasid; int users; u16 did; @@ -791,7 +794,6 @@ struct intel_svm { u32 pasid; int gpasid; /* In case that guest PASID is different from host PASID */ struct list_head devs; - struct list_head list; }; #else static inline void intel_svm_check(struct intel_iommu *iommu) {} @@ -827,4 +829,32 @@ static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) #define intel_iommu_enabled (0) #endif +static inline const char *decode_prq_descriptor(char *str, size_t size, + u64 dw0, u64 dw1, u64 dw2, u64 dw3) +{ + char *buf = str; + int bytes; + + bytes = snprintf(buf, size, + "rid=0x%llx addr=0x%llx %c%c%c%c%c pasid=0x%llx index=0x%llx", + FIELD_GET(GENMASK_ULL(31, 16), dw0), + FIELD_GET(GENMASK_ULL(63, 12), dw1), + dw1 & BIT_ULL(0) ? 'r' : '-', + dw1 & BIT_ULL(1) ? 'w' : '-', + dw0 & BIT_ULL(52) ? 'x' : '-', + dw0 & BIT_ULL(53) ? 'p' : '-', + dw1 & BIT_ULL(2) ? 'l' : '-', + FIELD_GET(GENMASK_ULL(51, 32), dw0), + FIELD_GET(GENMASK_ULL(11, 3), dw1)); + + /* Private Data */ + if (dw0 & BIT_ULL(9)) { + size -= bytes; + buf += bytes; + snprintf(buf, size, " private=0x%llx/0x%llx\n", dw2, dw3); + } + + return str; +} + #endif diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index 16f4b3e87f20..55c1eb300a86 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -2,29 +2,18 @@ #ifndef __OF_IOMMU_H #define __OF_IOMMU_H -#include <linux/device.h> -#include <linux/iommu.h> -#include <linux/of.h> +struct device; +struct device_node; +struct iommu_ops; #ifdef CONFIG_OF_IOMMU -extern int of_get_dma_window(struct device_node *dn, const char *prefix, - int index, unsigned long *busno, dma_addr_t *addr, - size_t *size); - extern const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np, const u32 *id); #else -static inline int of_get_dma_window(struct device_node *dn, const char *prefix, - int index, unsigned long *busno, dma_addr_t *addr, - size_t *size) -{ - return -EINVAL; -} - static inline const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np, const u32 *id) diff --git a/include/trace/events/intel_iommu.h b/include/trace/events/intel_iommu.h index d233f2916584..e5c1ca6d16ee 100644 --- a/include/trace/events/intel_iommu.h +++ b/include/trace/events/intel_iommu.h @@ -15,6 +15,8 @@ #include <linux/tracepoint.h> #include <linux/intel-iommu.h> +#define MSG_MAX 256 + TRACE_EVENT(qi_submit, TP_PROTO(struct intel_iommu *iommu, u64 qw0, u64 qw1, u64 qw2, u64 qw3), @@ -51,6 +53,41 @@ TRACE_EVENT(qi_submit, __entry->qw0, __entry->qw1, __entry->qw2, __entry->qw3 ) ); + +TRACE_EVENT(prq_report, + TP_PROTO(struct intel_iommu *iommu, struct device *dev, + u64 dw0, u64 dw1, u64 dw2, u64 dw3, + unsigned long seq), + + TP_ARGS(iommu, dev, dw0, dw1, dw2, dw3, seq), + + TP_STRUCT__entry( + __field(u64, dw0) + __field(u64, dw1) + __field(u64, dw2) + __field(u64, dw3) + __field(unsigned long, seq) + __string(iommu, iommu->name) + __string(dev, dev_name(dev)) + __dynamic_array(char, buff, MSG_MAX) + ), + + TP_fast_assign( + __entry->dw0 = dw0; + __entry->dw1 = dw1; + __entry->dw2 = dw2; + __entry->dw3 = dw3; + __entry->seq = seq; + __assign_str(iommu, iommu->name); + __assign_str(dev, dev_name(dev)); + ), + + TP_printk("%s/%s seq# %ld: %s", + __get_str(iommu), __get_str(dev), __entry->seq, + decode_prq_descriptor(__get_str(buff), MSG_MAX, __entry->dw0, + __entry->dw1, __entry->dw2, __entry->dw3) + ) +); #endif /* _TRACE_INTEL_IOMMU_H */ /* This part must be outside protection */ |