From daedaa33d9c578220b311fbad3748d3ecd5a8f66 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 12 Nov 2018 14:40:08 +0800 Subject: iommu/vtd: Cleanup dma_remapping.h header Commit e61d98d8dad00 ("x64, x2apic/intr-remap: Intel vt-d, IOMMU code reorganization") moved dma_remapping.h from drivers/pci/ to current place. It is entirely VT-d specific, but uses a generic name. This merges dma_remapping.h with include/linux/intel-iommu.h and removes dma_remapping.h as the result. Cc: Ashok Raj Cc: Jacob Pan Cc: Sohil Mehta Suggested-by: Christoph Hellwig Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Liu, Yi L Signed-off-by: Joerg Roedel --- include/linux/dma_remapping.h | 58 ------------------------------------------- include/linux/intel-iommu.h | 49 ++++++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 60 deletions(-) delete mode 100644 include/linux/dma_remapping.h (limited to 'include') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h deleted file mode 100644 index 21b3e7d33d68..000000000000 --- a/include/linux/dma_remapping.h +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _DMA_REMAPPING_H -#define _DMA_REMAPPING_H - -/* - * VT-d hardware uses 4KiB page size regardless of host page size. - */ -#define VTD_PAGE_SHIFT (12) -#define VTD_PAGE_SIZE (1UL << VTD_PAGE_SHIFT) -#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) -#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) - -#define VTD_STRIDE_SHIFT (9) -#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT) - -#define DMA_PTE_READ (1) -#define DMA_PTE_WRITE (2) -#define DMA_PTE_LARGE_PAGE (1 << 7) -#define DMA_PTE_SNP (1 << 11) - -#define CONTEXT_TT_MULTI_LEVEL 0 -#define CONTEXT_TT_DEV_IOTLB 1 -#define CONTEXT_TT_PASS_THROUGH 2 -/* Extended context entry types */ -#define CONTEXT_TT_PT_PASID 4 -#define CONTEXT_TT_PT_PASID_DEV_IOTLB 5 -#define CONTEXT_TT_MASK (7ULL << 2) - -#define CONTEXT_DINVE (1ULL << 8) -#define CONTEXT_PRS (1ULL << 9) -#define CONTEXT_PASIDE (1ULL << 11) - -struct intel_iommu; -struct dmar_domain; -struct root_entry; - - -#ifdef CONFIG_INTEL_IOMMU -extern int iommu_calculate_agaw(struct intel_iommu *iommu); -extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); -extern int dmar_disabled; -extern int intel_iommu_enabled; -extern int intel_iommu_tboot_noforce; -#else -static inline int iommu_calculate_agaw(struct intel_iommu *iommu) -{ - return 0; -} -static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) -{ - return 0; -} -#define dmar_disabled (1) -#define intel_iommu_enabled (0) -#endif - - -#endif diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index b0ae25837361..a58bc05d6798 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -37,9 +36,36 @@ #include /* - * Intel IOMMU register specification per version 1.0 public spec. + * VT-d hardware uses 4KiB page size regardless of host page size. */ +#define VTD_PAGE_SHIFT (12) +#define VTD_PAGE_SIZE (1UL << VTD_PAGE_SHIFT) +#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) +#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) + +#define VTD_STRIDE_SHIFT (9) +#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT) + +#define DMA_PTE_READ (1) +#define DMA_PTE_WRITE (2) +#define DMA_PTE_LARGE_PAGE (1 << 7) +#define DMA_PTE_SNP (1 << 11) + +#define CONTEXT_TT_MULTI_LEVEL 0 +#define CONTEXT_TT_DEV_IOTLB 1 +#define CONTEXT_TT_PASS_THROUGH 2 +/* Extended context entry types */ +#define CONTEXT_TT_PT_PASID 4 +#define CONTEXT_TT_PT_PASID_DEV_IOTLB 5 +#define CONTEXT_TT_MASK (7ULL << 2) + +#define CONTEXT_DINVE (1ULL << 8) +#define CONTEXT_PRS (1ULL << 9) +#define CONTEXT_PASIDE (1ULL << 11) +/* + * Intel IOMMU register specification per version 1.0 public spec. + */ #define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ #define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ #define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ @@ -632,4 +658,23 @@ bool context_present(struct context_entry *context); struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, u8 devfn, int alloc); +#ifdef CONFIG_INTEL_IOMMU +extern int iommu_calculate_agaw(struct intel_iommu *iommu); +extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); +extern int dmar_disabled; +extern int intel_iommu_enabled; +extern int intel_iommu_tboot_noforce; +#else +static inline int iommu_calculate_agaw(struct intel_iommu *iommu) +{ + return 0; +} +static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) +{ + return 0; +} +#define dmar_disabled (1) +#define intel_iommu_enabled (0) +#endif + #endif -- cgit v1.2.3 From 51eb78098ab79bba8b1df24da2304e61deb74629 Mon Sep 17 00:00:00 2001 From: tom Date: Tue, 4 Dec 2018 18:27:34 +0000 Subject: iommu: Change tlb_range_add to iotlb_range_add and tlb_sync to iotlb_sync Someone forgot to update this comment. Signed-off-by: Tom Murphy Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a1d28f42cb77..11db18b9ffe8 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -168,8 +168,8 @@ struct iommu_resv_region { * @map: map a physically contiguous memory region to an iommu domain * @unmap: unmap a physically contiguous memory region from an iommu domain * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain - * @tlb_range_add: Add a given iova range to the flush queue for this domain - * @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush + * @iotlb_range_add: Add a given iova range to the flush queue for this domain + * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush * queue * @iova_to_phys: translate iova to physical address * @add_device: add device to iommu grouping -- cgit v1.2.3 From 765b6a98c1de3d84dfdae344cc4ee4c24d9447f7 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Dec 2018 09:58:55 +0800 Subject: iommu/vt-d: Enumerate the scalable mode capability The Intel vt-d spec rev3.0 introduces a new translation mode called scalable mode, which enables PASID-granular translations for first level, second level, nested and pass-through modes. At the same time, the previous Extended Context (ECS) mode is deprecated (no production ever implements ECS). This patch adds enumeration for Scalable Mode and removes the deprecated ECS enumeration. It provides a boot time option to disable scalable mode even hardware claims to support it. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Reviewed-by: Kevin Tian Signed-off-by: Joerg Roedel --- Documentation/admin-guide/kernel-parameters.txt | 12 ++--- drivers/iommu/intel-iommu.c | 64 +++++++------------------ include/linux/intel-iommu.h | 1 + 3 files changed, 24 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 81d1d5a74728..abe9769a9276 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1682,12 +1682,12 @@ By default, super page will be supported if Intel IOMMU has the capability. With this option, super page will not be supported. - ecs_off [Default Off] - By default, extended context tables will be supported if - the hardware advertises that it has support both for the - extended tables themselves, and also PASID support. With - this option set, extended tables will not be used even - on hardware which claims to support them. + sm_off [Default Off] + By default, scalable mode will be supported if the + hardware advertises that it has support for the scalable + mode translation. With this option set, scalable mode + will not be used even on hardware which claims to support + it. tboot_noforce [Default Off] Do not force the Intel IOMMU enabled under tboot. By default, tboot will force Intel IOMMU on, which diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index fdf79baf1d79..2b9784a1887b 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -405,38 +405,16 @@ static int dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; -static int intel_iommu_ecs = 1; -static int intel_iommu_pasid28; +static int intel_iommu_sm = 1; static int iommu_identity_mapping; #define IDENTMAP_ALL 1 #define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 -/* Broadwell and Skylake have broken ECS support — normal so-called "second - * level" translation of DMA requests-without-PASID doesn't actually happen - * unless you also set the NESTE bit in an extended context-entry. Which of - * course means that SVM doesn't work because it's trying to do nested - * translation of the physical addresses it finds in the process page tables, - * through the IOVA->phys mapping found in the "second level" page tables. - * - * The VT-d specification was retroactively changed to change the definition - * of the capability bits and pretend that Broadwell/Skylake never happened... - * but unfortunately the wrong bit was changed. It's ECS which is broken, but - * for some reason it was the PASID capability bit which was redefined (from - * bit 28 on BDW/SKL to bit 40 in future). - * - * So our test for ECS needs to eschew those implementations which set the old - * PASID capabiity bit 28, since those are the ones on which ECS is broken. - * Unless we are working around the 'pasid28' limitations, that is, by putting - * the device into passthrough mode for normal DMA and thus masking the bug. - */ -#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \ - (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap))) -/* PASID support is thus enabled if ECS is enabled and *either* of the old - * or new capability bits are set. */ -#define pasid_enabled(iommu) (ecs_enabled(iommu) && \ - (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap))) +#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) +#define pasid_supported(iommu) (sm_supported(iommu) && \ + ecap_pasid((iommu)->ecap)) int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); @@ -516,15 +494,9 @@ static int __init intel_iommu_setup(char *str) } else if (!strncmp(str, "sp_off", 6)) { pr_info("Disable supported super page\n"); intel_iommu_superpage = 0; - } else if (!strncmp(str, "ecs_off", 7)) { - printk(KERN_INFO - "Intel-IOMMU: disable extended context table support\n"); - intel_iommu_ecs = 0; - } else if (!strncmp(str, "pasid28", 7)) { - printk(KERN_INFO - "Intel-IOMMU: enable pre-production PASID support\n"); - intel_iommu_pasid28 = 1; - iommu_identity_mapping |= IDENTMAP_GFX; + } else if (!strncmp(str, "sm_off", 6)) { + pr_info("Intel-IOMMU: disable scalable mode support\n"); + intel_iommu_sm = 0; } else if (!strncmp(str, "tboot_noforce", 13)) { printk(KERN_INFO "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); @@ -771,7 +743,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, u64 *entry; entry = &root->lo; - if (ecs_enabled(iommu)) { + if (sm_supported(iommu)) { if (devfn >= 0x80) { devfn -= 0x80; entry = &root->hi; @@ -913,7 +885,7 @@ static void free_context_table(struct intel_iommu *iommu) if (context) free_pgtable_page(context); - if (!ecs_enabled(iommu)) + if (!sm_supported(iommu)) continue; context = iommu_context_addr(iommu, i, 0x80, 0); @@ -1265,8 +1237,6 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) unsigned long flag; addr = virt_to_phys(iommu->root_entry); - if (ecs_enabled(iommu)) - addr |= DMA_RTADDR_RTT; raw_spin_lock_irqsave(&iommu->register_lock, flag); dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr); @@ -1755,7 +1725,7 @@ static void free_dmar_iommu(struct intel_iommu *iommu) free_context_table(iommu); #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu)) { + if (pasid_supported(iommu)) { if (ecap_prs(iommu->ecap)) intel_svm_finish_prq(iommu); intel_svm_exit(iommu); @@ -2464,8 +2434,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, dmar_find_matched_atsr_unit(pdev)) info->ats_supported = 1; - if (ecs_enabled(iommu)) { - if (pasid_enabled(iommu)) { + if (sm_supported(iommu)) { + if (pasid_supported(iommu)) { int features = pci_pasid_features(pdev); if (features >= 0) info->pasid_supported = features | 1; @@ -3277,7 +3247,7 @@ static int __init init_dmars(void) * We need to ensure the system pasid table is no bigger * than the smallest supported. */ - if (pasid_enabled(iommu)) { + if (pasid_supported(iommu)) { u32 temp = 2 << ecap_pss(iommu->ecap); intel_pasid_max_id = min_t(u32, temp, @@ -3338,7 +3308,7 @@ static int __init init_dmars(void) if (!ecap_pass_through(iommu->ecap)) hw_pass_through = 0; #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu)) + if (pasid_supported(iommu)) intel_svm_init(iommu); #endif } @@ -3442,7 +3412,7 @@ domains_done: iommu_flush_write_buffer(iommu); #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) { + if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { ret = intel_svm_enable_prq(iommu); if (ret) goto free_iommu; @@ -4331,7 +4301,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) goto out; #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu)) + if (pasid_supported(iommu)) intel_svm_init(iommu); #endif @@ -4348,7 +4318,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) iommu_flush_write_buffer(iommu); #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) { + if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { ret = intel_svm_enable_prq(iommu); if (ret) goto disable_iommu; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index a58bc05d6798..8c9b6063d275 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -177,6 +177,7 @@ * Extended Capability Register */ +#define ecap_smts(e) (((e) >> 43) & 0x1) #define ecap_dit(e) ((e >> 41) & 0x1) #define ecap_pasid(e) ((e >> 40) & 0x1) #define ecap_pss(e) ((e >> 35) & 0x1f) -- cgit v1.2.3 From 4f2ed183cfebf42b29ed8fe442169de97bc0fe61 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Dec 2018 09:58:57 +0800 Subject: iommu/vt-d: Move page table helpers into header So that they could also be used in other source files. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Cc: Sanjay Kumar Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Reviewed-by: Kevin Tian Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 43 ------------------------------------------- include/linux/intel-iommu.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 5552a1aaf5ea..d55254abd5ff 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -290,49 +290,6 @@ static inline void context_clear_entry(struct context_entry *context) context->hi = 0; } -/* - * 0: readable - * 1: writable - * 2-6: reserved - * 7: super page - * 8-10: available - * 11: snoop behavior - * 12-63: Host physcial address - */ -struct dma_pte { - u64 val; -}; - -static inline void dma_clear_pte(struct dma_pte *pte) -{ - pte->val = 0; -} - -static inline u64 dma_pte_addr(struct dma_pte *pte) -{ -#ifdef CONFIG_64BIT - return pte->val & VTD_PAGE_MASK; -#else - /* Must have a full atomic 64-bit read */ - return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK; -#endif -} - -static inline bool dma_pte_present(struct dma_pte *pte) -{ - return (pte->val & 3) != 0; -} - -static inline bool dma_pte_superpage(struct dma_pte *pte) -{ - return (pte->val & DMA_PTE_LARGE_PAGE); -} - -static inline int first_pte_in_page(struct dma_pte *pte) -{ - return !((unsigned long)pte & ~VTD_PAGE_MASK); -} - /* * This domain is a statically identity mapping domain. * 1. This domain creats a static 1:1 mapping to all usable memory. diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 8c9b6063d275..b4da61385ebf 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -590,6 +590,49 @@ static inline void __iommu_flush_cache( clflush_cache_range(addr, size); } +/* + * 0: readable + * 1: writable + * 2-6: reserved + * 7: super page + * 8-10: available + * 11: snoop behavior + * 12-63: Host physcial address + */ +struct dma_pte { + u64 val; +}; + +static inline void dma_clear_pte(struct dma_pte *pte) +{ + pte->val = 0; +} + +static inline u64 dma_pte_addr(struct dma_pte *pte) +{ +#ifdef CONFIG_64BIT + return pte->val & VTD_PAGE_MASK; +#else + /* Must have a full atomic 64-bit read */ + return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK; +#endif +} + +static inline bool dma_pte_present(struct dma_pte *pte) +{ + return (pte->val & 3) != 0; +} + +static inline bool dma_pte_superpage(struct dma_pte *pte) +{ + return (pte->val & DMA_PTE_LARGE_PAGE); +} + +static inline int first_pte_in_page(struct dma_pte *pte) +{ + return !((unsigned long)pte & ~VTD_PAGE_MASK); +} + extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); extern int dmar_find_matched_atsr_unit(struct pci_dev *dev); -- cgit v1.2.3 From 5d308fc1ecf5351418a4f003ccb74dc91b424bd1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Dec 2018 09:58:58 +0800 Subject: iommu/vt-d: Add 256-bit invalidation descriptor support Intel vt-d spec rev3.0 requires software to use 256-bit descriptors in invalidation queue. As the spec reads in section 6.5.2: Remapping hardware supporting Scalable Mode Translations (ECAP_REG.SMTS=1) allow software to additionally program the width of the descriptors (128-bits or 256-bits) that will be written into the Queue. Software should setup the Invalidation Queue for 256-bit descriptors before progra- mming remapping hardware for scalable-mode translation as 128-bit descriptors are treated as invalid descriptors (see Table 21 in Section 6.5.2.10) in scalable-mode. This patch adds 256-bit invalidation descriptor support if the hardware presents scalable mode capability. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 91 +++++++++++++++++++++++++------------ drivers/iommu/intel-svm.c | 76 ++++++++++++++++++++----------- drivers/iommu/intel_irq_remapping.c | 6 ++- include/linux/intel-iommu.h | 9 +++- 4 files changed, 121 insertions(+), 61 deletions(-) (limited to 'include') diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index d9c748b6f9e4..9511f9aeb77c 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1160,6 +1160,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) int head, tail; struct q_inval *qi = iommu->qi; int wait_index = (index + 1) % QI_LENGTH; + int shift = qi_shift(iommu); if (qi->desc_status[wait_index] == QI_ABORT) return -EAGAIN; @@ -1173,13 +1174,19 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) */ if (fault & DMA_FSTS_IQE) { head = readl(iommu->reg + DMAR_IQH_REG); - if ((head >> DMAR_IQ_SHIFT) == index) { - pr_err("VT-d detected invalid descriptor: " - "low=%llx, high=%llx\n", - (unsigned long long)qi->desc[index].low, - (unsigned long long)qi->desc[index].high); - memcpy(&qi->desc[index], &qi->desc[wait_index], - sizeof(struct qi_desc)); + if ((head >> shift) == index) { + struct qi_desc *desc = qi->desc + head; + + /* + * desc->qw2 and desc->qw3 are either reserved or + * used by software as private data. We won't print + * out these two qw's for security consideration. + */ + pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n", + (unsigned long long)desc->qw0, + (unsigned long long)desc->qw1); + memcpy(desc, qi->desc + (wait_index << shift), + 1 << shift); writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG); return -EINVAL; } @@ -1191,10 +1198,10 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) */ if (fault & DMA_FSTS_ITE) { head = readl(iommu->reg + DMAR_IQH_REG); - head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH; + head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH; head |= 1; tail = readl(iommu->reg + DMAR_IQT_REG); - tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH; + tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH; writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG); @@ -1222,15 +1229,14 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) { int rc; struct q_inval *qi = iommu->qi; - struct qi_desc *hw, wait_desc; + int offset, shift, length; + struct qi_desc wait_desc; int wait_index, index; unsigned long flags; if (!qi) return 0; - hw = qi->desc; - restart: rc = 0; @@ -1243,16 +1249,21 @@ restart: index = qi->free_head; wait_index = (index + 1) % QI_LENGTH; + shift = qi_shift(iommu); + length = 1 << shift; qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE; - hw[index] = *desc; - - wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) | + offset = index << shift; + memcpy(qi->desc + offset, desc, length); + wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE; - wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]); + wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]); + wait_desc.qw2 = 0; + wait_desc.qw3 = 0; - hw[wait_index] = wait_desc; + offset = wait_index << shift; + memcpy(qi->desc + offset, &wait_desc, length); qi->free_head = (qi->free_head + 2) % QI_LENGTH; qi->free_cnt -= 2; @@ -1261,7 +1272,7 @@ restart: * update the HW tail register indicating the presence of * new descriptors. */ - writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG); + writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG); while (qi->desc_status[wait_index] != QI_DONE) { /* @@ -1298,8 +1309,10 @@ void qi_global_iec(struct intel_iommu *iommu) { struct qi_desc desc; - desc.low = QI_IEC_TYPE; - desc.high = 0; + desc.qw0 = QI_IEC_TYPE; + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; /* should never fail */ qi_submit_sync(&desc, iommu); @@ -1310,9 +1323,11 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, { struct qi_desc desc; - desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did) + desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did) | QI_CC_GRAN(type) | QI_CC_TYPE; - desc.high = 0; + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; qi_submit_sync(&desc, iommu); } @@ -1331,10 +1346,12 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, if (cap_read_drain(iommu->cap)) dr = 1; - desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw) + desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw) | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE; - desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) + desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) | QI_IOTLB_AM(size_order); + desc.qw2 = 0; + desc.qw3 = 0; qi_submit_sync(&desc, iommu); } @@ -1347,15 +1364,17 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, if (mask) { WARN_ON_ONCE(addr & ((1ULL << (VTD_PAGE_SHIFT + mask)) - 1)); addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; - desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; + desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; } else - desc.high = QI_DEV_IOTLB_ADDR(addr); + desc.qw1 = QI_DEV_IOTLB_ADDR(addr); if (qdep >= QI_DEV_IOTLB_MAX_INVS) qdep = 0; - desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | + desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); + desc.qw2 = 0; + desc.qw3 = 0; qi_submit_sync(&desc, iommu); } @@ -1403,16 +1422,24 @@ static void __dmar_enable_qi(struct intel_iommu *iommu) u32 sts; unsigned long flags; struct q_inval *qi = iommu->qi; + u64 val = virt_to_phys(qi->desc); qi->free_head = qi->free_tail = 0; qi->free_cnt = QI_LENGTH; + /* + * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability + * is present. + */ + if (ecap_smts(iommu->ecap)) + val |= (1 << 11) | 1; + raw_spin_lock_irqsave(&iommu->register_lock, flags); /* write zero to the tail reg */ writel(0, iommu->reg + DMAR_IQT_REG); - dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc)); + dmar_writeq(iommu->reg + DMAR_IQA_REG, val); iommu->gcmd |= DMA_GCMD_QIE; writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); @@ -1448,8 +1475,12 @@ int dmar_enable_qi(struct intel_iommu *iommu) qi = iommu->qi; - - desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0); + /* + * Need two pages to accommodate 256 descriptors of 256 bits each + * if the remapping hardware supports scalable mode translation. + */ + desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, + !!ecap_smts(iommu->ecap)); if (!desc_page) { kfree(qi); iommu->qi = NULL; diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index d6c99935d5d9..b7f1d12e24b0 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -161,27 +161,40 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d * because that's the only option the hardware gives us. Despite * the fact that they are actually only accessible through one. */ if (gl) - desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | - QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) | QI_EIOTLB_TYPE; + desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | + QI_EIOTLB_DID(sdev->did) | + QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) | + QI_EIOTLB_TYPE; else - desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | - QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; - desc.high = 0; + desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | + QI_EIOTLB_DID(sdev->did) | + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | + QI_EIOTLB_TYPE; + desc.qw1 = 0; } else { int mask = ilog2(__roundup_pow_of_two(pages)); - desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | - QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE; - desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(gl) | - QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask); + desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | + QI_EIOTLB_DID(sdev->did) | + QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | + QI_EIOTLB_TYPE; + desc.qw1 = QI_EIOTLB_ADDR(address) | + QI_EIOTLB_GL(gl) | + QI_EIOTLB_IH(ih) | + QI_EIOTLB_AM(mask); } + desc.qw2 = 0; + desc.qw3 = 0; qi_submit_sync(&desc, svm->iommu); if (sdev->dev_iotlb) { - desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) | - QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE; + desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) | + QI_DEV_EIOTLB_SID(sdev->sid) | + QI_DEV_EIOTLB_QDEP(sdev->qdep) | + QI_DEIOTLB_TYPE; if (pages == -1) { - desc.high = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | QI_DEV_EIOTLB_SIZE; + desc.qw1 = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | + QI_DEV_EIOTLB_SIZE; } else if (pages > 1) { /* The least significant zero bit indicates the size. So, * for example, an "address" value of 0x12345f000 will @@ -189,10 +202,13 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT); unsigned long mask = __rounddown_pow_of_two(address ^ last); - desc.high = QI_DEV_EIOTLB_ADDR((address & ~mask) | (mask - 1)) | QI_DEV_EIOTLB_SIZE; + desc.qw1 = QI_DEV_EIOTLB_ADDR((address & ~mask) | + (mask - 1)) | QI_DEV_EIOTLB_SIZE; } else { - desc.high = QI_DEV_EIOTLB_ADDR(address); + desc.qw1 = QI_DEV_EIOTLB_ADDR(address); } + desc.qw2 = 0; + desc.qw3 = 0; qi_submit_sync(&desc, svm->iommu); } } @@ -237,8 +253,11 @@ static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *s { struct qi_desc desc; - desc.high = 0; - desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid); + desc.qw0 = QI_PC_TYPE | QI_PC_DID(sdev->did) | + QI_PC_PASID_SEL | QI_PC_PASID(pasid); + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; qi_submit_sync(&desc, svm->iommu); } @@ -667,24 +686,27 @@ static irqreturn_t prq_event_thread(int irq, void *d) no_pasid: if (req->lpig) { /* Page Group Response */ - resp.low = QI_PGRP_PASID(req->pasid) | + resp.qw0 = QI_PGRP_PASID(req->pasid) | QI_PGRP_DID((req->bus << 8) | req->devfn) | QI_PGRP_PASID_P(req->pasid_present) | QI_PGRP_RESP_TYPE; - resp.high = QI_PGRP_IDX(req->prg_index) | - QI_PGRP_PRIV(req->private) | QI_PGRP_RESP_CODE(result); - - qi_submit_sync(&resp, iommu); + resp.qw1 = QI_PGRP_IDX(req->prg_index) | + QI_PGRP_PRIV(req->private) | + QI_PGRP_RESP_CODE(result); } else if (req->srr) { /* Page Stream Response */ - resp.low = QI_PSTRM_IDX(req->prg_index) | - QI_PSTRM_PRIV(req->private) | QI_PSTRM_BUS(req->bus) | - QI_PSTRM_PASID(req->pasid) | QI_PSTRM_RESP_TYPE; - resp.high = QI_PSTRM_ADDR(address) | QI_PSTRM_DEVFN(req->devfn) | + resp.qw0 = QI_PSTRM_IDX(req->prg_index) | + QI_PSTRM_PRIV(req->private) | + QI_PSTRM_BUS(req->bus) | + QI_PSTRM_PASID(req->pasid) | + QI_PSTRM_RESP_TYPE; + resp.qw1 = QI_PSTRM_ADDR(address) | + QI_PSTRM_DEVFN(req->devfn) | QI_PSTRM_RESP_CODE(result); - - qi_submit_sync(&resp, iommu); } + resp.qw2 = 0; + resp.qw3 = 0; + qi_submit_sync(&resp, iommu); head = (head + sizeof(*req)) & PRQ_RING_MASK; } diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index c2d6c11431de..24d45b07f425 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -145,9 +145,11 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask) { struct qi_desc desc; - desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask) + desc.qw0 = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask) | QI_IEC_SELECTIVE; - desc.high = 0; + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; return qi_submit_sync(&desc, iommu); } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index b4da61385ebf..08ff588a4df7 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -401,13 +401,18 @@ enum { #define QI_GRAN_NONG_PASID 2 #define QI_GRAN_PSI_PASID 3 +#define qi_shift(iommu) (DMAR_IQ_SHIFT + !!ecap_smts((iommu)->ecap)) + struct qi_desc { - u64 low, high; + u64 qw0; + u64 qw1; + u64 qw2; + u64 qw3; }; struct q_inval { raw_spinlock_t q_lock; - struct qi_desc *desc; /* invalidation queue */ + void *desc; /* invalidation queue */ int *desc_status; /* desc status */ int free_head; /* first free entry */ int free_tail; /* last free entry */ -- cgit v1.2.3 From 6f7db75e1c469057fe7588ed959328ead771ccc7 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Dec 2018 09:59:00 +0800 Subject: iommu/vt-d: Add second level page table interface This adds the interfaces to setup or tear down the structures for second level page table translations. This includes types of second level only translation and pass through. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 2 +- drivers/iommu/intel-pasid.c | 280 ++++++++++++++++++++++++++++++++++++++++++++ drivers/iommu/intel-pasid.h | 8 ++ include/linux/intel-iommu.h | 3 + 4 files changed, 292 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 9818aaf2d0f7..f2976a3f1d67 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1210,7 +1210,7 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } -static void iommu_flush_write_buffer(struct intel_iommu *iommu) +void iommu_flush_write_buffer(struct intel_iommu *iommu) { u32 val; unsigned long flag; diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index fd3ccc0753b0..6d2b2e87e6fc 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -9,6 +9,7 @@ #define pr_fmt(fmt) "DMAR: " fmt +#include #include #include #include @@ -294,3 +295,282 @@ void intel_pasid_clear_entry(struct device *dev, int pasid) pasid_clear_entry(pe); } + +static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits) +{ + u64 old; + + old = READ_ONCE(*ptr); + WRITE_ONCE(*ptr, (old & ~mask) | bits); +} + +/* + * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode + * PASID entry. + */ +static inline void +pasid_set_domain_id(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value); +} + +/* + * Get domain ID value of a scalable mode PASID entry. + */ +static inline u16 +pasid_get_domain_id(struct pasid_entry *pe) +{ + return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0)); +} + +/* + * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_slptr(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value); +} + +/* + * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID + * entry. + */ +static inline void +pasid_set_address_width(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2); +} + +/* + * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_translation_type(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6); +} + +/* + * Enable fault processing by clearing the FPD(Fault Processing + * Disable) field (Bit 1) of a scalable mode PASID entry. + */ +static inline void pasid_set_fault_enable(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 1, 0); +} + +/* + * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a + * scalable mode PASID entry. + */ +static inline void pasid_set_sre(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 0, 1); +} + +/* + * Setup the P(Present) field (Bit 0) of a scalable mode PASID + * entry. + */ +static inline void pasid_set_present(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 0, 1); +} + +/* + * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID + * entry. + */ +static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) +{ + pasid_set_bits(&pe->val[1], 1 << 23, value); +} + +static void +pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, + u16 did, int pasid) +{ + struct qi_desc desc; + + desc.qw0 = QI_PC_DID(did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid); + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; + + qi_submit_sync(&desc, iommu); +} + +static void +iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid) +{ + struct qi_desc desc; + + desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) | + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; + + qi_submit_sync(&desc, iommu); +} + +static void +devtlb_invalidation_with_pasid(struct intel_iommu *iommu, + struct device *dev, int pasid) +{ + struct device_domain_info *info; + u16 sid, qdep, pfsid; + + info = dev->archdata.iommu; + if (!info || !info->ats_enabled) + return; + + sid = info->bus << 8 | info->devfn; + qdep = info->ats_qdep; + pfsid = info->pfsid; + + qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); +} + +void intel_pasid_tear_down_entry(struct intel_iommu *iommu, + struct device *dev, int pasid) +{ + struct pasid_entry *pte; + u16 did; + + pte = intel_pasid_get_entry(dev, pasid); + if (WARN_ON(!pte)) + return; + + intel_pasid_clear_entry(dev, pasid); + did = pasid_get_domain_id(pte); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + iotlb_invalidation_with_pasid(iommu, did, pasid); + + /* Device IOTLB doesn't need to be flushed in caching mode. */ + if (!cap_caching_mode(iommu->cap)) + devtlb_invalidation_with_pasid(iommu, dev, pasid); +} + +/* + * Set up the scalable mode pasid entry for second only translation type. + */ +int intel_pasid_setup_second_level(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, int pasid) +{ + struct pasid_entry *pte; + struct dma_pte *pgd; + u64 pgd_val; + int agaw; + u16 did; + + /* + * If hardware advertises no support for second level + * translation, return directly. + */ + if (!ecap_slts(iommu->ecap)) { + pr_err("No second level translation support on %s\n", + iommu->name); + return -EINVAL; + } + + /* + * Skip top levels of page tables for iommu which has less agaw + * than default. Unnecessary for PT mode. + */ + pgd = domain->pgd; + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) { + dev_err(dev, "Invalid domain page table\n"); + return -EINVAL; + } + } + + pgd_val = virt_to_phys(pgd); + did = domain->iommu_did[iommu->seq_id]; + + pte = intel_pasid_get_entry(dev, pasid); + if (!pte) { + dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid); + return -ENODEV; + } + + pasid_clear_entry(pte); + pasid_set_domain_id(pte, did); + pasid_set_slptr(pte, pgd_val); + pasid_set_address_width(pte, agaw); + pasid_set_translation_type(pte, 2); + pasid_set_fault_enable(pte); + pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + + /* + * Since it is a second level only translation setup, we should + * set SRE bit as well (addresses are expected to be GPAs). + */ + pasid_set_sre(pte); + pasid_set_present(pte); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + if (cap_caching_mode(iommu->cap)) { + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + iotlb_invalidation_with_pasid(iommu, did, pasid); + } else { + iommu_flush_write_buffer(iommu); + } + + return 0; +} + +/* + * Set up the scalable mode pasid entry for passthrough translation type. + */ +int intel_pasid_setup_pass_through(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, int pasid) +{ + u16 did = FLPT_DEFAULT_DID; + struct pasid_entry *pte; + + pte = intel_pasid_get_entry(dev, pasid); + if (!pte) { + dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid); + return -ENODEV; + } + + pasid_clear_entry(pte); + pasid_set_domain_id(pte, did); + pasid_set_address_width(pte, iommu->agaw); + pasid_set_translation_type(pte, 4); + pasid_set_fault_enable(pte); + pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + + /* + * We should set SRE bit as well since the addresses are expected + * to be GPAs. + */ + pasid_set_sre(pte); + pasid_set_present(pte); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + if (cap_caching_mode(iommu->cap)) { + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + iotlb_invalidation_with_pasid(iommu, did, pasid); + } else { + iommu_flush_write_buffer(iommu); + } + + return 0; +} diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 03c1612d173c..3c70522091d3 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -49,5 +49,13 @@ struct pasid_table *intel_pasid_get_table(struct device *dev); int intel_pasid_get_dev_max_id(struct device *dev); struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid); void intel_pasid_clear_entry(struct device *dev, int pasid); +int intel_pasid_setup_second_level(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, int pasid); +int intel_pasid_setup_pass_through(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, int pasid); +void intel_pasid_tear_down_entry(struct intel_iommu *iommu, + struct device *dev, int pasid); #endif /* __INTEL_PASID_H */ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 08ff588a4df7..cb3ebda47fa7 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -177,6 +177,8 @@ * Extended Capability Register */ +#define ecap_smpwc(e) (((e) >> 48) & 0x1) +#define ecap_slts(e) (((e) >> 46) & 0x1) #define ecap_smts(e) (((e) >> 43) & 0x1) #define ecap_dit(e) ((e >> 41) & 0x1) #define ecap_pasid(e) ((e >> 40) & 0x1) @@ -662,6 +664,7 @@ void free_pgtable_page(void *vaddr); struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); int for_each_device_domain(int (*fn)(struct device_domain_info *info, void *data), void *data); +void iommu_flush_write_buffer(struct intel_iommu *iommu); #ifdef CONFIG_INTEL_IOMMU_SVM int intel_svm_init(struct intel_iommu *iommu); -- cgit v1.2.3 From 7373a8cc381978cfafa4b0285cdd935682f1b2d2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Dec 2018 09:59:03 +0800 Subject: iommu/vt-d: Setup context and enable RID2PASID support This patch enables the translation for requests without PASID in the scalable mode by setting up the root and context entries. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 136 ++++++++++++++++++++++++++++++++++---------- drivers/iommu/intel-pasid.h | 1 + include/linux/intel-iommu.h | 1 + 3 files changed, 108 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 13c3c2dd0459..21a6853290cc 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1197,6 +1197,8 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) unsigned long flag; addr = virt_to_phys(iommu->root_entry); + if (sm_supported(iommu)) + addr |= DMA_RTADDR_SMT; raw_spin_lock_irqsave(&iommu->register_lock, flag); dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr); @@ -1918,6 +1920,56 @@ static void domain_exit(struct dmar_domain *domain) free_domain_mem(domain); } +/* + * Get the PASID directory size for scalable mode context entry. + * Value of X in the PDTS field of a scalable mode context entry + * indicates PASID directory with 2^(X + 7) entries. + */ +static inline unsigned long context_get_sm_pds(struct pasid_table *table) +{ + int pds, max_pde; + + max_pde = table->max_pasid >> PASID_PDE_SHIFT; + pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS); + if (pds < 7) + return 0; + + return pds - 7; +} + +/* + * Set the RID_PASID field of a scalable mode context entry. The + * IOMMU hardware will use the PASID value set in this field for + * DMA translations of DMA requests without PASID. + */ +static inline void +context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid) +{ + context->hi |= pasid & ((1 << 20) - 1); + context->hi |= (1 << 20); +} + +/* + * Set the DTE(Device-TLB Enable) field of a scalable mode context + * entry. + */ +static inline void context_set_sm_dte(struct context_entry *context) +{ + context->lo |= (1 << 2); +} + +/* + * Set the PRE(Page Request Enable) field of a scalable mode context + * entry. + */ +static inline void context_set_sm_pre(struct context_entry *context) +{ + context->lo |= (1 << 4); +} + +/* Convert value to context PASID directory size field coding. */ +#define context_pdts(pds) (((pds) & 0x7) << 9) + static int domain_context_mapping_one(struct dmar_domain *domain, struct intel_iommu *iommu, struct pasid_table *table, @@ -1928,8 +1980,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, struct device_domain_info *info = NULL; struct context_entry *context; unsigned long flags; - struct dma_pte *pgd; - int ret, agaw; + int ret; WARN_ON(did == 0); @@ -1975,41 +2026,67 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } } - pgd = domain->pgd; - context_clear_entry(context); - context_set_domain_id(context, did); - /* - * Skip top levels of page tables for iommu which has less agaw - * than default. Unnecessary for PT mode. - */ - if (translation != CONTEXT_TT_PASS_THROUGH) { - for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { - ret = -ENOMEM; - pgd = phys_to_virt(dma_pte_addr(pgd)); - if (!dma_pte_present(pgd)) - goto out_unlock; - } + if (sm_supported(iommu)) { + unsigned long pds; - info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); - if (info && info->ats_supported) - translation = CONTEXT_TT_DEV_IOTLB; - else - translation = CONTEXT_TT_MULTI_LEVEL; + WARN_ON(!table); + + /* Setup the PASID DIR pointer: */ + pds = context_get_sm_pds(table); + context->lo = (u64)virt_to_phys(table->table) | + context_pdts(pds); + + /* Setup the RID_PASID field: */ + context_set_sm_rid2pasid(context, PASID_RID2PASID); - context_set_address_root(context, virt_to_phys(pgd)); - context_set_address_width(context, agaw); - } else { /* - * In pass through mode, AW must be programmed to - * indicate the largest AGAW value supported by - * hardware. And ASR is ignored by hardware. + * Setup the Device-TLB enable bit and Page request + * Enable bit: */ - context_set_address_width(context, iommu->msagaw); + info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); + if (info && info->ats_supported) + context_set_sm_dte(context); + if (info && info->pri_supported) + context_set_sm_pre(context); + } else { + struct dma_pte *pgd = domain->pgd; + int agaw; + + context_set_domain_id(context, did); + context_set_translation_type(context, translation); + + if (translation != CONTEXT_TT_PASS_THROUGH) { + /* + * Skip top levels of page tables for iommu which has + * less agaw than default. Unnecessary for PT mode. + */ + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { + ret = -ENOMEM; + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) + goto out_unlock; + } + + info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); + if (info && info->ats_supported) + translation = CONTEXT_TT_DEV_IOTLB; + else + translation = CONTEXT_TT_MULTI_LEVEL; + + context_set_address_root(context, virt_to_phys(pgd)); + context_set_address_width(context, agaw); + } else { + /* + * In pass through mode, AW must be programmed to + * indicate the largest AGAW value supported by + * hardware. And ASR is ignored by hardware. + */ + context_set_address_width(context, iommu->msagaw); + } } - context_set_translation_type(context, translation); context_set_fault_enable(context); context_set_present(context); domain_flush_cache(domain, context, sizeof(*context)); @@ -5180,7 +5257,6 @@ static void intel_iommu_put_resv_regions(struct device *dev, } #ifdef CONFIG_INTEL_IOMMU_SVM -#define MAX_NR_PASID_BITS (20) static inline unsigned long intel_iommu_get_pts(struct device *dev) { int pts, max_pasid; diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index d6f4fead4491..55bb8715329d 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -17,6 +17,7 @@ #define PASID_PTE_PRESENT 1 #define PDE_PFN_MASK PAGE_MASK #define PASID_PDE_SHIFT 6 +#define MAX_NR_PASID_BITS 20 /* * Domain ID reserved for pasid entries programmed for first-level diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index cb3ebda47fa7..5fdd33ed2cce 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -258,6 +258,7 @@ /* DMA_RTADDR_REG */ #define DMA_RTADDR_RTT (((u64)1) << 11) +#define DMA_RTADDR_SMT (((u64)1) << 10) /* CCMD_REG */ #define DMA_CCMD_ICC (((u64)1) << 63) -- cgit v1.2.3 From 437f35e1cd4c8d043633bb72f4260369af68fbf7 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Dec 2018 09:59:04 +0800 Subject: iommu/vt-d: Add first level page table interface This adds an interface to setup the PASID entries for first level page table translation. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Signed-off-by: Joerg Roedel --- drivers/iommu/intel-pasid.c | 80 +++++++++++++++++++++++++++++++++++++++++++++ drivers/iommu/intel-pasid.h | 11 +++++++ include/linux/intel-iommu.h | 1 + 3 files changed, 92 insertions(+) (limited to 'include') diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index 6d2b2e87e6fc..c3dcf4dc2496 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -10,6 +10,7 @@ #define pr_fmt(fmt) "DMAR: " fmt #include +#include #include #include #include @@ -389,6 +390,26 @@ static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) pasid_set_bits(&pe->val[1], 1 << 23, value); } +/* + * Setup the First Level Page table Pointer field (Bit 140~191) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_flptr(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[2], VTD_PAGE_MASK, value); +} + +/* + * Setup the First Level Paging Mode field (Bit 130~131) of a + * scalable mode PASID entry. + */ +static inline void +pasid_set_flpm(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2); +} + static void pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, int pasid) @@ -459,6 +480,65 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, devtlb_invalidation_with_pasid(iommu, dev, pasid); } +/* + * Set up the scalable mode pasid table entry for first only + * translation type. + */ +int intel_pasid_setup_first_level(struct intel_iommu *iommu, + struct device *dev, pgd_t *pgd, + int pasid, u16 did, int flags) +{ + struct pasid_entry *pte; + + if (!ecap_flts(iommu->ecap)) { + pr_err("No first level translation support on %s\n", + iommu->name); + return -EINVAL; + } + + pte = intel_pasid_get_entry(dev, pasid); + if (WARN_ON(!pte)) + return -EINVAL; + + pasid_clear_entry(pte); + + /* Setup the first level page table pointer: */ + pasid_set_flptr(pte, (u64)__pa(pgd)); + if (flags & PASID_FLAG_SUPERVISOR_MODE) { + if (!ecap_srs(iommu->ecap)) { + pr_err("No supervisor request support on %s\n", + iommu->name); + return -EINVAL; + } + pasid_set_sre(pte); + } + +#ifdef CONFIG_X86 + if (cpu_feature_enabled(X86_FEATURE_LA57)) + pasid_set_flpm(pte, 1); +#endif /* CONFIG_X86 */ + + pasid_set_domain_id(pte, did); + pasid_set_address_width(pte, iommu->agaw); + pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + + /* Setup Present and PASID Granular Transfer Type: */ + pasid_set_translation_type(pte, 1); + pasid_set_present(pte); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + if (cap_caching_mode(iommu->cap)) { + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + iotlb_invalidation_with_pasid(iommu, did, pasid); + } else { + iommu_flush_write_buffer(iommu); + } + + return 0; +} + /* * Set up the scalable mode pasid entry for second only translation type. */ diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 55bb8715329d..512c63ec8a22 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -25,6 +25,14 @@ */ #define FLPT_DEFAULT_DID 1 +/* + * The SUPERVISOR_MODE flag indicates a first level translation which + * can be used for access to kernel addresses. It is valid only for + * access to the kernel's static 1:1 mapping of physical memory — not + * to vmalloc or even module mappings. + */ +#define PASID_FLAG_SUPERVISOR_MODE BIT(0) + struct pasid_dir_entry { u64 val; }; @@ -51,6 +59,9 @@ struct pasid_table *intel_pasid_get_table(struct device *dev); int intel_pasid_get_dev_max_id(struct device *dev); struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid); void intel_pasid_clear_entry(struct device *dev, int pasid); +int intel_pasid_setup_first_level(struct intel_iommu *iommu, + struct device *dev, pgd_t *pgd, + int pasid, u16 did, int flags); int intel_pasid_setup_second_level(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, int pasid); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 5fdd33ed2cce..4ad62396e81e 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -178,6 +178,7 @@ */ #define ecap_smpwc(e) (((e) >> 48) & 0x1) +#define ecap_flts(e) (((e) >> 47) & 0x1) #define ecap_slts(e) (((e) >> 46) & 0x1) #define ecap_smts(e) (((e) >> 43) & 0x1) #define ecap_dit(e) ((e >> 41) & 0x1) -- cgit v1.2.3 From 1c4f88b7f1f9298b56c7dac18c0bcd8d2f75059a Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Dec 2018 09:59:05 +0800 Subject: iommu/vt-d: Shared virtual address in scalable mode This patch enables the current SVA (Shared Virtual Address) implementation to work in the scalable mode. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 38 ----------------------------- drivers/iommu/intel-pasid.c | 2 +- drivers/iommu/intel-pasid.h | 1 - drivers/iommu/intel-svm.c | 58 ++++++++++++++------------------------------- include/linux/intel-iommu.h | 9 +------ 5 files changed, 20 insertions(+), 88 deletions(-) (limited to 'include') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 21a6853290cc..cec88df671a6 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5257,18 +5257,6 @@ static void intel_iommu_put_resv_regions(struct device *dev, } #ifdef CONFIG_INTEL_IOMMU_SVM -static inline unsigned long intel_iommu_get_pts(struct device *dev) -{ - int pts, max_pasid; - - max_pasid = intel_pasid_get_dev_max_id(dev); - pts = find_first_bit((unsigned long *)&max_pasid, MAX_NR_PASID_BITS); - if (pts < 5) - return 0; - - return pts - 5; -} - int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev) { struct device_domain_info *info; @@ -5300,33 +5288,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd sdev->sid = PCI_DEVID(info->bus, info->devfn); if (!(ctx_lo & CONTEXT_PASIDE)) { - if (iommu->pasid_state_table) - context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table); - context[1].lo = (u64)virt_to_phys(info->pasid_table->table) | - intel_iommu_get_pts(sdev->dev); - - wmb(); - /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both - * extended to permit requests-with-PASID if the PASIDE bit - * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH, - * however, the PASIDE bit is ignored and requests-with-PASID - * are unconditionally blocked. Which makes less sense. - * So convert from CONTEXT_TT_PASS_THROUGH to one of the new - * "guest mode" translation types depending on whether ATS - * is available or not. Annoyingly, we can't use the new - * modes *unless* PASIDE is set. */ - if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) { - ctx_lo &= ~CONTEXT_TT_MASK; - if (info->ats_supported) - ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2; - else - ctx_lo |= CONTEXT_TT_PT_PASID << 2; - } ctx_lo |= CONTEXT_PASIDE; - if (iommu->pasid_state_table) - ctx_lo |= CONTEXT_DINVE; - if (info->pri_supported) - ctx_lo |= CONTEXT_PRS; context[0].lo = ctx_lo; wmb(); iommu->flush.flush_context(iommu, sdev->did, sdev->sid, diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index c3dcf4dc2496..53fe5248d8f1 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -286,7 +286,7 @@ static inline void pasid_clear_entry(struct pasid_entry *pe) WRITE_ONCE(pe->val[7], 0); } -void intel_pasid_clear_entry(struct device *dev, int pasid) +static void intel_pasid_clear_entry(struct device *dev, int pasid) { struct pasid_entry *pe; diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 512c63ec8a22..23537b3f34e3 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -58,7 +58,6 @@ void intel_pasid_free_table(struct device *dev); struct pasid_table *intel_pasid_get_table(struct device *dev); int intel_pasid_get_dev_max_id(struct device *dev); struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid); -void intel_pasid_clear_entry(struct device *dev, int pasid); int intel_pasid_setup_first_level(struct intel_iommu *iommu, struct device *dev, pgd_t *pgd, int pasid, u16 did, int flags); diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index b7f1d12e24b0..04d6bdb51404 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -29,10 +29,6 @@ #include "intel-pasid.h" -#define PASID_ENTRY_P BIT_ULL(0) -#define PASID_ENTRY_FLPM_5LP BIT_ULL(9) -#define PASID_ENTRY_SRE BIT_ULL(11) - static irqreturn_t prq_event_thread(int irq, void *d); struct pasid_state_entry { @@ -248,20 +244,6 @@ static void intel_invalidate_range(struct mmu_notifier *mn, (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0); } - -static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev, int pasid) -{ - struct qi_desc desc; - - desc.qw0 = QI_PC_TYPE | QI_PC_DID(sdev->did) | - QI_PC_PASID_SEL | QI_PC_PASID(pasid); - desc.qw1 = 0; - desc.qw2 = 0; - desc.qw3 = 0; - - qi_submit_sync(&desc, svm->iommu); -} - static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); @@ -281,8 +263,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) */ rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) { - intel_pasid_clear_entry(sdev->dev, svm->pasid); - intel_flush_pasid_dev(svm, sdev, svm->pasid); + intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid); intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm); } rcu_read_unlock(); @@ -301,11 +282,9 @@ static LIST_HEAD(global_svm_list); int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops) { struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); - struct pasid_entry *entry; struct intel_svm_dev *sdev; struct intel_svm *svm = NULL; struct mm_struct *mm = NULL; - u64 pasid_entry_val; int pasid_max; int ret; @@ -414,22 +393,22 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ kfree(sdev); goto out; } - pasid_entry_val = (u64)__pa(mm->pgd) | PASID_ENTRY_P; - } else - pasid_entry_val = (u64)__pa(init_mm.pgd) | - PASID_ENTRY_P | PASID_ENTRY_SRE; - if (cpu_feature_enabled(X86_FEATURE_LA57)) - pasid_entry_val |= PASID_ENTRY_FLPM_5LP; - - entry = intel_pasid_get_entry(dev, svm->pasid); - WRITE_ONCE(entry->val[0], pasid_entry_val); - - /* - * Flush PASID cache when a PASID table entry becomes - * present. - */ - if (cap_caching_mode(iommu->cap)) - intel_flush_pasid_dev(svm, sdev, svm->pasid); + } + + spin_lock(&iommu->lock); + ret = intel_pasid_setup_first_level(iommu, dev, + mm ? mm->pgd : init_mm.pgd, + svm->pasid, FLPT_DEFAULT_DID, + mm ? 0 : PASID_FLAG_SUPERVISOR_MODE); + spin_unlock(&iommu->lock); + if (ret) { + if (mm) + mmu_notifier_unregister(&svm->notifier, mm); + intel_pasid_free_id(svm->pasid); + kfree(svm); + kfree(sdev); + goto out; + } list_add_tail(&svm->list, &global_svm_list); } @@ -475,10 +454,9 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) * to use. We have a *shared* PASID table, because it's * large and has to be physically contiguous. So it's * hard to be as defensive as we might like. */ - intel_flush_pasid_dev(svm, sdev, svm->pasid); + intel_pasid_tear_down_entry(iommu, dev, svm->pasid); intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm); kfree_rcu(sdev, rcu); - intel_pasid_clear_entry(dev, svm->pasid); if (list_empty(&svm->devs)) { intel_pasid_free_id(svm->pasid); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4ad62396e81e..cfcf9c1e1872 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -54,14 +54,7 @@ #define CONTEXT_TT_MULTI_LEVEL 0 #define CONTEXT_TT_DEV_IOTLB 1 #define CONTEXT_TT_PASS_THROUGH 2 -/* Extended context entry types */ -#define CONTEXT_TT_PT_PASID 4 -#define CONTEXT_TT_PT_PASID_DEV_IOTLB 5 -#define CONTEXT_TT_MASK (7ULL << 2) - -#define CONTEXT_DINVE (1ULL << 8) -#define CONTEXT_PRS (1ULL << 9) -#define CONTEXT_PASIDE (1ULL << 11) +#define CONTEXT_PASIDE BIT_ULL(3) /* * Intel IOMMU register specification per version 1.0 public spec. -- cgit v1.2.3 From 6d68b88e0993d67e9ebb1240f84240b712fbc8a4 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Dec 2018 09:59:06 +0800 Subject: iommu/vt-d: Remove deferred invalidation Deferred invalidation is an ECS specific feature. It will not be supported when IOMMU works in scalable mode. As we deprecated the ECS support, remove deferred invalidation and cleanup the code. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Cc: Sanjay Kumar Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 1 - drivers/iommu/intel-svm.c | 45 --------------------------------------------- include/linux/intel-iommu.h | 8 -------- 3 files changed, 54 deletions(-) (limited to 'include') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index cec88df671a6..9043e1e9b2be 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1700,7 +1700,6 @@ static void free_dmar_iommu(struct intel_iommu *iommu) if (pasid_supported(iommu)) { if (ecap_prs(iommu->ecap)) intel_svm_finish_prq(iommu); - intel_svm_exit(iommu); } #endif } diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 04d6bdb51404..5b2e3b2d593b 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -31,15 +31,8 @@ static irqreturn_t prq_event_thread(int irq, void *d); -struct pasid_state_entry { - u64 val; -}; - int intel_svm_init(struct intel_iommu *iommu) { - struct page *pages; - int order; - if (cpu_feature_enabled(X86_FEATURE_GBPAGES) && !cap_fl1gp_support(iommu->cap)) return -EINVAL; @@ -48,39 +41,6 @@ int intel_svm_init(struct intel_iommu *iommu) !cap_5lp_support(iommu->cap)) return -EINVAL; - /* Start at 2 because it's defined as 2^(1+PSS) */ - iommu->pasid_max = 2 << ecap_pss(iommu->ecap); - - /* Eventually I'm promised we will get a multi-level PASID table - * and it won't have to be physically contiguous. Until then, - * limit the size because 8MiB contiguous allocations can be hard - * to come by. The limit of 0x20000, which is 1MiB for each of - * the PASID and PASID-state tables, is somewhat arbitrary. */ - if (iommu->pasid_max > 0x20000) - iommu->pasid_max = 0x20000; - - order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max); - if (ecap_dis(iommu->ecap)) { - pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); - if (pages) - iommu->pasid_state_table = page_address(pages); - else - pr_warn("IOMMU: %s: Failed to allocate PASID state table\n", - iommu->name); - } - - return 0; -} - -int intel_svm_exit(struct intel_iommu *iommu) -{ - int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max); - - if (iommu->pasid_state_table) { - free_pages((unsigned long)iommu->pasid_state_table, order); - iommu->pasid_state_table = NULL; - } - return 0; } @@ -214,11 +174,6 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, { struct intel_svm_dev *sdev; - /* Try deferred invalidate if available */ - if (svm->iommu->pasid_state_table && - !cmpxchg64(&svm->iommu->pasid_state_table[svm->pasid].val, 0, 1ULL << 63)) - return; - rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) intel_flush_svm_range_dev(svm, sdev, address, pages, ih, gl); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index cfcf9c1e1872..0605f3bf6e79 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -541,15 +541,8 @@ struct intel_iommu { struct iommu_flush flush; #endif #ifdef CONFIG_INTEL_IOMMU_SVM - /* These are large and need to be contiguous, so we allocate just - * one for now. We'll maybe want to rethink that if we truly give - * devices away to userspace processes (e.g. for DPDK) and don't - * want to trust that userspace will use *only* the PASID it was - * told to. But while it's all driver-arbitrated, we're fine. */ - struct pasid_state_entry *pasid_state_table; struct page_req_dsc *prq; unsigned char prq_name[16]; /* Name for PRQ interrupt */ - u32 pasid_max; #endif struct q_inval *qi; /* Queued invalidation info */ u32 *iommu_state; /* Store iommu states between suspend and resume.*/ @@ -663,7 +656,6 @@ void iommu_flush_write_buffer(struct intel_iommu *iommu); #ifdef CONFIG_INTEL_IOMMU_SVM int intel_svm_init(struct intel_iommu *iommu); -int intel_svm_exit(struct intel_iommu *iommu); extern int intel_svm_enable_prq(struct intel_iommu *iommu); extern int intel_svm_finish_prq(struct intel_iommu *iommu); -- cgit v1.2.3 From b4ef725eeba158f365da9de1f05149094643ddea Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 28 Nov 2018 13:35:24 +0100 Subject: iommu: Introduce wrappers around dev->iommu_fwspec These wrappers will be used to easily change the location of the field later when all users are converted. Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 14 +++++++------- include/linux/iommu.h | 11 +++++++++++ 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index cc25ec6d4c06..304c067a0f85 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1976,7 +1976,7 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, const struct iommu_ops *ops) { - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); if (fwspec) return ops == fwspec->ops ? 0 : -EINVAL; @@ -1988,26 +1988,26 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, of_node_get(to_of_node(iommu_fwnode)); fwspec->iommu_fwnode = iommu_fwnode; fwspec->ops = ops; - dev->iommu_fwspec = fwspec; + dev_iommu_fwspec_set(dev, fwspec); return 0; } EXPORT_SYMBOL_GPL(iommu_fwspec_init); void iommu_fwspec_free(struct device *dev) { - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); if (fwspec) { fwnode_handle_put(fwspec->iommu_fwnode); kfree(fwspec); - dev->iommu_fwspec = NULL; + dev_iommu_fwspec_set(dev, NULL); } } EXPORT_SYMBOL_GPL(iommu_fwspec_free); int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) { - struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); size_t size; int i; @@ -2016,11 +2016,11 @@ int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) size = offsetof(struct iommu_fwspec, ids[fwspec->num_ids + num_ids]); if (size > sizeof(*fwspec)) { - fwspec = krealloc(dev->iommu_fwspec, size, GFP_KERNEL); + fwspec = krealloc(fwspec, size, GFP_KERNEL); if (!fwspec) return -ENOMEM; - dev->iommu_fwspec = fwspec; + dev_iommu_fwspec_set(dev, fwspec); } for (i = 0; i < num_ids; i++) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 11db18b9ffe8..26225f762cd7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -398,6 +398,17 @@ void iommu_fwspec_free(struct device *dev); int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids); const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode); +static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) +{ + return dev->iommu_fwspec; +} + +static inline void dev_iommu_fwspec_set(struct device *dev, + struct iommu_fwspec *fwspec) +{ + dev->iommu_fwspec = fwspec; +} + #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; -- cgit v1.2.3 From dbba197edf32209d110727a02d3a91de4c88520f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 30 Nov 2018 12:51:52 +0100 Subject: driver core: Introduce device_iommu_mapped() function Some places in the kernel check the iommu_group pointer in 'struct device' in order to find out whether a device is mapped by an IOMMU. This is not good way to make this check, as the pointer will be moved to 'struct dev_iommu_data'. This way to make the check is also not very readable. Introduce an explicit function to perform this check. Acked-by: Greg Kroah-Hartman Acked-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/device.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 1b25c7a43f4c..6cb4640b6160 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1058,6 +1058,16 @@ static inline struct device *kobj_to_dev(struct kobject *kobj) return container_of(kobj, struct device, kobj); } +/** + * device_iommu_mapped - Returns true when the device DMA is translated + * by an IOMMU + * @dev: Device to perform the check on + */ +static inline bool device_iommu_mapped(struct device *dev) +{ + return (dev->iommu_group != NULL); +} + /* Get the wakeup routines, which depend on struct device */ #include -- cgit v1.2.3 From cc5aed44a3a8e4fca721636cf881a52f8d68a098 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 30 Nov 2018 10:31:59 +0100 Subject: iommu: Consolitate ->add/remove_device() calls Put them into separate functions and call those where the plain ops have been called before. Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 51 +++++++++++++++++++++++++-------------------------- include/linux/iommu.h | 3 +++ 2 files changed, 28 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 304c067a0f85..a2131751dcff 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -111,6 +111,23 @@ void iommu_device_unregister(struct iommu_device *iommu) spin_unlock(&iommu_device_lock); } +int iommu_probe_device(struct device *dev) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + WARN_ON(dev->iommu_group); + + return ops->add_device(dev); +} + +void iommu_release_device(struct device *dev) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (dev->iommu_group) + ops->remove_device(dev); +} + static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, unsigned type); static int __iommu_attach_device(struct iommu_domain *domain, @@ -1118,16 +1135,7 @@ struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) static int add_iommu_group(struct device *dev, void *data) { - struct iommu_callback_data *cb = data; - const struct iommu_ops *ops = cb->ops; - int ret; - - if (!ops->add_device) - return 0; - - WARN_ON(dev->iommu_group); - - ret = ops->add_device(dev); + int ret = iommu_probe_device(dev); /* * We ignore -ENODEV errors for now, as they just mean that the @@ -1142,11 +1150,7 @@ static int add_iommu_group(struct device *dev, void *data) static int remove_iommu_group(struct device *dev, void *data) { - struct iommu_callback_data *cb = data; - const struct iommu_ops *ops = cb->ops; - - if (ops->remove_device && dev->iommu_group) - ops->remove_device(dev); + iommu_release_device(dev); return 0; } @@ -1154,27 +1158,22 @@ static int remove_iommu_group(struct device *dev, void *data) static int iommu_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { + unsigned long group_action = 0; struct device *dev = data; - const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_group *group; - unsigned long group_action = 0; /* * ADD/DEL call into iommu driver ops if provided, which may * result in ADD/DEL notifiers to group->notifier */ if (action == BUS_NOTIFY_ADD_DEVICE) { - if (ops->add_device) { - int ret; + int ret; - ret = ops->add_device(dev); - return (ret) ? NOTIFY_DONE : NOTIFY_OK; - } + ret = iommu_probe_device(dev); + return (ret) ? NOTIFY_DONE : NOTIFY_OK; } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { - if (ops->remove_device && dev->iommu_group) { - ops->remove_device(dev); - return 0; - } + iommu_release_device(dev); + return NOTIFY_OK; } /* diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 26225f762cd7..e90da6b6f3d1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -409,6 +409,9 @@ static inline void dev_iommu_fwspec_set(struct device *dev, dev->iommu_fwspec = fwspec; } +int iommu_probe_device(struct device *dev); +void iommu_release_device(struct device *dev); + #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; -- cgit v1.2.3