diff options
Diffstat (limited to 'drivers/iommu/intel/iommu.c')
-rw-r--r-- | drivers/iommu/intel/iommu.c | 233 |
1 files changed, 95 insertions, 138 deletions
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index ee0932307d64..708f430af1c4 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -360,7 +360,6 @@ int intel_iommu_enabled = 0; EXPORT_SYMBOL_GPL(intel_iommu_enabled); static int dmar_map_gfx = 1; -static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; static int iommu_identity_mapping; @@ -451,8 +450,8 @@ static int __init intel_iommu_setup(char *str) dmar_map_gfx = 0; pr_info("Disable GFX device mapping\n"); } else if (!strncmp(str, "forcedac", 8)) { - pr_info("Forcing DAC for PCI devices\n"); - dmar_forcedac = 1; + pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n"); + iommu_dma_forcedac = true; } else if (!strncmp(str, "strict", 6)) { pr_info("Disable batched IOTLB flush\n"); intel_iommu_strict = 1; @@ -658,7 +657,14 @@ static int domain_update_iommu_snooping(struct intel_iommu *skip) rcu_read_lock(); for_each_active_iommu(iommu, drhd) { if (iommu != skip) { - if (!ecap_sc_support(iommu->ecap)) { + /* + * If the hardware is operating in the scalable mode, + * the snooping control is always supported since we + * always set PASID-table-entry.PGSNP bit if the domain + * is managed outside (UNMANAGED). + */ + if (!sm_supported(iommu) && + !ecap_sc_support(iommu->ecap)) { ret = 0; break; } @@ -1340,6 +1346,11 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) readl, (sts & DMA_GSTS_RTPS), sts); raw_spin_unlock_irqrestore(&iommu->register_lock, flag); + + iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); + if (sm_supported(iommu)) + qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); } void iommu_flush_write_buffer(struct intel_iommu *iommu) @@ -2289,6 +2300,41 @@ static inline int hardware_largepage_caps(struct dmar_domain *domain, return level; } +/* + * Ensure that old small page tables are removed to make room for superpage(s). + * We're going to add new large pages, so make sure we don't remove their parent + * tables. The IOTLB/devTLBs should be flushed if any PDE/PTEs are cleared. + */ +static void switch_to_super_page(struct dmar_domain *domain, + unsigned long start_pfn, + unsigned long end_pfn, int level) +{ + unsigned long lvl_pages = lvl_to_nr_pages(level); + struct dma_pte *pte = NULL; + int i; + + while (start_pfn <= end_pfn) { + if (!pte) + pte = pfn_to_dma_pte(domain, start_pfn, &level); + + if (dma_pte_present(pte)) { + dma_pte_free_pagetable(domain, start_pfn, + start_pfn + lvl_pages - 1, + level + 1); + + for_each_domain_iommu(i, domain) + iommu_flush_iotlb_psi(g_iommus[i], domain, + start_pfn, lvl_pages, + 0, 0); + } + + pte++; + start_pfn += lvl_pages; + if (first_pte_in_page(pte)) + pte = NULL; + } +} + static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, unsigned long phys_pfn, unsigned long nr_pages, int prot) @@ -2305,8 +2351,9 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, return -EINVAL; attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP); + attr |= DMA_FL_PTE_PRESENT; if (domain_use_first_level(domain)) { - attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US; + attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US; if (domain->domain.type == IOMMU_DOMAIN_DMA) { attr |= DMA_FL_PTE_ACCESS; @@ -2329,22 +2376,11 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, return -ENOMEM; /* It is large page*/ if (largepage_lvl > 1) { - unsigned long nr_superpages, end_pfn; + unsigned long end_pfn; pteval |= DMA_PTE_LARGE_PAGE; - lvl_pages = lvl_to_nr_pages(largepage_lvl); - - nr_superpages = nr_pages / lvl_pages; - end_pfn = iov_pfn + nr_superpages * lvl_pages - 1; - - /* - * Ensure that old small page tables are - * removed to make room for superpage(s). - * We're adding new large pages, so make sure - * we don't remove their parent tables. - */ - dma_pte_free_pagetable(domain, iov_pfn, end_pfn, - largepage_lvl + 1); + end_pfn = ((iov_pfn + nr_pages) & level_mask(largepage_lvl)) - 1; + switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl); } else { pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; } @@ -2422,6 +2458,10 @@ static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); + + if (sm_supported(iommu)) + qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0); + iommu->flush.flush_iotlb(iommu, did_old, 0, @@ -2505,6 +2545,9 @@ static int domain_setup_first_level(struct intel_iommu *iommu, flags |= (level == 5) ? PASID_FLAG_FL5LP : 0; + if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED) + flags |= PASID_FLAG_PAGE_SNOOP; + return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid, domain->iommu_did[iommu->seq_id], flags); @@ -3267,8 +3310,6 @@ static int __init init_dmars(void) register_pasid_allocator(iommu); #endif iommu_set_root_entry(iommu); - iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); } #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA @@ -3458,12 +3499,7 @@ static int init_iommu_hw(void) } iommu_flush_write_buffer(iommu); - iommu_set_root_entry(iommu); - - iommu->flush.flush_context(iommu, 0, 0, 0, - DMA_CCMD_GLOBAL_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); iommu_enable_translation(iommu); iommu_disable_protect_mem_regions(iommu); } @@ -3846,8 +3882,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) goto disable_iommu; iommu_set_root_entry(iommu); - iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); iommu_enable_translation(iommu); iommu_disable_protect_mem_regions(iommu); @@ -4065,35 +4099,6 @@ static struct notifier_block intel_iommu_memory_nb = { .priority = 0 }; -static void free_all_cpu_cached_iovas(unsigned int cpu) -{ - int i; - - for (i = 0; i < g_num_of_iommus; i++) { - struct intel_iommu *iommu = g_iommus[i]; - struct dmar_domain *domain; - int did; - - if (!iommu) - continue; - - for (did = 0; did < cap_ndoms(iommu->cap); did++) { - domain = get_iommu_domain(iommu, (u16)did); - - if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA) - continue; - - iommu_dma_free_cpu_cached_iovas(cpu, &domain->domain); - } - } -} - -static int intel_iommu_cpu_dead(unsigned int cpu) -{ - free_all_cpu_cached_iovas(cpu); - return 0; -} - static void intel_disable_iommus(void) { struct intel_iommu *iommu = NULL; @@ -4377,19 +4382,28 @@ int __init intel_iommu_init(void) down_read(&dmar_global_lock); for_each_active_iommu(iommu, drhd) { + /* + * The flush queue implementation does not perform + * page-selective invalidations that are required for efficient + * TLB flushes in virtual environments. The benefit of batching + * is likely to be much lower than the overhead of synchronizing + * the virtual and physical IOMMU page-tables. + */ + if (!intel_iommu_strict && cap_caching_mode(iommu->cap)) { + pr_warn("IOMMU batching is disabled due to virtualization"); + intel_iommu_strict = 1; + } iommu_device_sysfs_add(&iommu->iommu, NULL, intel_iommu_groups, "%s", iommu->name); - iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops); - iommu_device_register(&iommu->iommu); + iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); } up_read(&dmar_global_lock); + iommu_set_dma_strict(intel_iommu_strict); bus_set_iommu(&pci_bus_type, &intel_iommu_ops); if (si_domain && !hw_pass_through) register_memory_notifier(&intel_iommu_memory_nb); - cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL, - intel_iommu_cpu_dead); down_read(&dmar_global_lock); if (probe_acpi_namespace_devices()) @@ -5343,6 +5357,8 @@ static int siov_find_pci_dvsec(struct pci_dev *pdev) static bool intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) { + struct device_domain_info *info = get_domain_info(dev); + if (feat == IOMMU_DEV_FEAT_AUX) { int ret; @@ -5357,13 +5373,13 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) return !!siov_find_pci_dvsec(to_pci_dev(dev)); } - if (feat == IOMMU_DEV_FEAT_SVA) { - struct device_domain_info *info = get_domain_info(dev); + if (feat == IOMMU_DEV_FEAT_IOPF) + return info && info->pri_supported; + if (feat == IOMMU_DEV_FEAT_SVA) return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) && info->pasid_supported && info->pri_supported && info->ats_supported; - } return false; } @@ -5374,12 +5390,18 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) if (feat == IOMMU_DEV_FEAT_AUX) return intel_iommu_enable_auxd(dev); + if (feat == IOMMU_DEV_FEAT_IOPF) + return intel_iommu_dev_has_feat(dev, feat) ? 0 : -ENODEV; + if (feat == IOMMU_DEV_FEAT_SVA) { struct device_domain_info *info = get_domain_info(dev); if (!info) return -EINVAL; + if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) + return -EINVAL; + if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) return 0; } @@ -5423,87 +5445,23 @@ static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain, } static int -intel_iommu_domain_set_attr(struct iommu_domain *domain, - enum iommu_attr attr, void *data) +intel_iommu_enable_nesting(struct iommu_domain *domain) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); unsigned long flags; - int ret = 0; - - if (domain->type != IOMMU_DOMAIN_UNMANAGED) - return -EINVAL; + int ret = -ENODEV; - switch (attr) { - case DOMAIN_ATTR_NESTING: - spin_lock_irqsave(&device_domain_lock, flags); - if (nested_mode_support() && - list_empty(&dmar_domain->devices)) { - dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE; - dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL; - } else { - ret = -ENODEV; - } - spin_unlock_irqrestore(&device_domain_lock, flags); - break; - default: - ret = -EINVAL; - break; + spin_lock_irqsave(&device_domain_lock, flags); + if (nested_mode_support() && list_empty(&dmar_domain->devices)) { + dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE; + dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL; + ret = 0; } + spin_unlock_irqrestore(&device_domain_lock, flags); return ret; } -static bool domain_use_flush_queue(void) -{ - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - bool r = true; - - if (intel_iommu_strict) - return false; - - /* - * The flush queue implementation does not perform page-selective - * invalidations that are required for efficient TLB flushes in virtual - * environments. The benefit of batching is likely to be much lower than - * the overhead of synchronizing the virtual and physical IOMMU - * page-tables. - */ - rcu_read_lock(); - for_each_active_iommu(iommu, drhd) { - if (!cap_caching_mode(iommu->cap)) - continue; - - pr_warn_once("IOMMU batching is disabled due to virtualization"); - r = false; - break; - } - rcu_read_unlock(); - - return r; -} - -static int -intel_iommu_domain_get_attr(struct iommu_domain *domain, - enum iommu_attr attr, void *data) -{ - switch (domain->type) { - case IOMMU_DOMAIN_UNMANAGED: - return -ENODEV; - case IOMMU_DOMAIN_DMA: - switch (attr) { - case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: - *(int *)data = domain_use_flush_queue(); - return 0; - default: - return -ENODEV; - } - break; - default: - return -EINVAL; - } -} - /* * Check that the device does not live on an external facing PCI port that is * marked as untrusted. Such devices should not be able to apply quirks and @@ -5576,8 +5534,7 @@ const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, .domain_free = intel_iommu_domain_free, - .domain_get_attr = intel_iommu_domain_get_attr, - .domain_set_attr = intel_iommu_domain_set_attr, + .enable_nesting = intel_iommu_enable_nesting, .attach_dev = intel_iommu_attach_device, .detach_dev = intel_iommu_detach_device, .aux_attach_dev = intel_iommu_aux_attach_device, |