diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 17 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_spapr_tce.c | 9 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_type1.c | 518 |
3 files changed, 524 insertions, 20 deletions
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 703948c9fbe1..02206162eaa9 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -438,11 +438,20 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); /* - * Try to reset the device. The success of this is dependent on - * being able to lock the device, which is not always possible. + * Try to get the locks ourselves to prevent a deadlock. The + * success of this is dependent on being able to lock the device, + * which is not always possible. + * We can not use the "try" reset interface here, which will + * overwrite the previously restored configuration information. */ - if (vdev->reset_works && !pci_try_reset_function(pdev)) - vdev->needs_reset = false; + if (vdev->reset_works && pci_cfg_access_trylock(pdev)) { + if (device_trylock(&pdev->dev)) { + if (!__pci_reset_function_locked(pdev)) + vdev->needs_reset = false; + device_unlock(&pdev->dev); + } + pci_cfg_access_unlock(pdev); + } pci_restore_state(pdev); out: diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 9809369e0ed3..3b18fa4d090a 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -1240,7 +1240,7 @@ release_exit: static int tce_iommu_attach_group(void *iommu_data, struct iommu_group *iommu_group) { - int ret; + int ret = 0; struct tce_container *container = iommu_data; struct iommu_table_group *table_group; struct tce_iommu_group *tcegrp = NULL; @@ -1293,13 +1293,13 @@ static int tce_iommu_attach_group(void *iommu_data, !table_group->ops->release_ownership) { if (container->v2) { ret = -EPERM; - goto unlock_exit; + goto free_exit; } ret = tce_iommu_take_ownership(container, table_group); } else { if (!container->v2) { ret = -EPERM; - goto unlock_exit; + goto free_exit; } ret = tce_iommu_take_ownership_ddw(container, table_group); if (!tce_groups_attached(container) && !container->tables[0]) @@ -1311,10 +1311,11 @@ static int tce_iommu_attach_group(void *iommu_data, list_add(&tcegrp->next, &container->group_list); } -unlock_exit: +free_exit: if (ret && tcegrp) kfree(tcegrp); +unlock_exit: mutex_unlock(&container->lock); return ret; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index ad830abe1021..9a50b0558fa9 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -62,6 +62,7 @@ MODULE_PARM_DESC(dma_entry_limit, struct vfio_iommu { struct list_head domain_list; + struct list_head iova_list; struct vfio_domain *external_domain; /* domain for external user */ struct mutex lock; struct rb_root dma_list; @@ -97,6 +98,12 @@ struct vfio_group { bool mdev_group; /* An mdev group */ }; +struct vfio_iova { + struct list_head list; + dma_addr_t start; + dma_addr_t end; +}; + /* * Guest RAM pinning working set or DMA target */ @@ -1038,6 +1045,27 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, return ret; } +/* + * Check dma map request is within a valid iova range + */ +static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu, + dma_addr_t start, dma_addr_t end) +{ + struct list_head *iova = &iommu->iova_list; + struct vfio_iova *node; + + list_for_each_entry(node, iova, list) { + if (start >= node->start && end <= node->end) + return true; + } + + /* + * Check for list_empty() as well since a container with + * a single mdev device will have an empty list. + */ + return list_empty(iova); +} + static int vfio_dma_do_map(struct vfio_iommu *iommu, struct vfio_iommu_type1_dma_map *map) { @@ -1081,6 +1109,11 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, goto out_unlock; } + if (!vfio_iommu_iova_dma_valid(iommu, iova, iova + size - 1)) { + ret = -EINVAL; + goto out_unlock; + } + dma = kzalloc(sizeof(*dma), GFP_KERNEL); if (!dma) { ret = -ENOMEM; @@ -1270,15 +1303,13 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain, return NULL; } -static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base) +static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions, + phys_addr_t *base) { - struct list_head group_resv_regions; - struct iommu_resv_region *region, *next; + struct iommu_resv_region *region; bool ret = false; - INIT_LIST_HEAD(&group_resv_regions); - iommu_get_group_resv_regions(group, &group_resv_regions); - list_for_each_entry(region, &group_resv_regions, list) { + list_for_each_entry(region, group_resv_regions, list) { /* * The presence of any 'real' MSI regions should take * precedence over the software-managed one if the @@ -1294,8 +1325,7 @@ static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base) ret = true; } } - list_for_each_entry_safe(region, next, &group_resv_regions, list) - kfree(region); + return ret; } @@ -1395,6 +1425,228 @@ static int vfio_mdev_iommu_device(struct device *dev, void *data) return 0; } +/* + * This is a helper function to insert an address range to iova list. + * The list is initially created with a single entry corresponding to + * the IOMMU domain geometry to which the device group is attached. + * The list aperture gets modified when a new domain is added to the + * container if the new aperture doesn't conflict with the current one + * or with any existing dma mappings. The list is also modified to + * exclude any reserved regions associated with the device group. + */ +static int vfio_iommu_iova_insert(struct list_head *head, + dma_addr_t start, dma_addr_t end) +{ + struct vfio_iova *region; + + region = kmalloc(sizeof(*region), GFP_KERNEL); + if (!region) + return -ENOMEM; + + INIT_LIST_HEAD(®ion->list); + region->start = start; + region->end = end; + + list_add_tail(®ion->list, head); + return 0; +} + +/* + * Check the new iommu aperture conflicts with existing aper or with any + * existing dma mappings. + */ +static bool vfio_iommu_aper_conflict(struct vfio_iommu *iommu, + dma_addr_t start, dma_addr_t end) +{ + struct vfio_iova *first, *last; + struct list_head *iova = &iommu->iova_list; + + if (list_empty(iova)) + return false; + + /* Disjoint sets, return conflict */ + first = list_first_entry(iova, struct vfio_iova, list); + last = list_last_entry(iova, struct vfio_iova, list); + if (start > last->end || end < first->start) + return true; + + /* Check for any existing dma mappings below the new start */ + if (start > first->start) { + if (vfio_find_dma(iommu, first->start, start - first->start)) + return true; + } + + /* Check for any existing dma mappings beyond the new end */ + if (end < last->end) { + if (vfio_find_dma(iommu, end + 1, last->end - end)) + return true; + } + + return false; +} + +/* + * Resize iommu iova aperture window. This is called only if the new + * aperture has no conflict with existing aperture and dma mappings. + */ +static int vfio_iommu_aper_resize(struct list_head *iova, + dma_addr_t start, dma_addr_t end) +{ + struct vfio_iova *node, *next; + + if (list_empty(iova)) + return vfio_iommu_iova_insert(iova, start, end); + + /* Adjust iova list start */ + list_for_each_entry_safe(node, next, iova, list) { + if (start < node->start) + break; + if (start >= node->start && start < node->end) { + node->start = start; + break; + } + /* Delete nodes before new start */ + list_del(&node->list); + kfree(node); + } + + /* Adjust iova list end */ + list_for_each_entry_safe(node, next, iova, list) { + if (end > node->end) + continue; + if (end > node->start && end <= node->end) { + node->end = end; + continue; + } + /* Delete nodes after new end */ + list_del(&node->list); + kfree(node); + } + + return 0; +} + +/* + * Check reserved region conflicts with existing dma mappings + */ +static bool vfio_iommu_resv_conflict(struct vfio_iommu *iommu, + struct list_head *resv_regions) +{ + struct iommu_resv_region *region; + + /* Check for conflict with existing dma mappings */ + list_for_each_entry(region, resv_regions, list) { + if (region->type == IOMMU_RESV_DIRECT_RELAXABLE) + continue; + + if (vfio_find_dma(iommu, region->start, region->length)) + return true; + } + + return false; +} + +/* + * Check iova region overlap with reserved regions and + * exclude them from the iommu iova range + */ +static int vfio_iommu_resv_exclude(struct list_head *iova, + struct list_head *resv_regions) +{ + struct iommu_resv_region *resv; + struct vfio_iova *n, *next; + + list_for_each_entry(resv, resv_regions, list) { + phys_addr_t start, end; + + if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) + continue; + + start = resv->start; + end = resv->start + resv->length - 1; + + list_for_each_entry_safe(n, next, iova, list) { + int ret = 0; + + /* No overlap */ + if (start > n->end || end < n->start) + continue; + /* + * Insert a new node if current node overlaps with the + * reserve region to exlude that from valid iova range. + * Note that, new node is inserted before the current + * node and finally the current node is deleted keeping + * the list updated and sorted. + */ + if (start > n->start) + ret = vfio_iommu_iova_insert(&n->list, n->start, + start - 1); + if (!ret && end < n->end) + ret = vfio_iommu_iova_insert(&n->list, end + 1, + n->end); + if (ret) + return ret; + + list_del(&n->list); + kfree(n); + } + } + + if (list_empty(iova)) + return -EINVAL; + + return 0; +} + +static void vfio_iommu_resv_free(struct list_head *resv_regions) +{ + struct iommu_resv_region *n, *next; + + list_for_each_entry_safe(n, next, resv_regions, list) { + list_del(&n->list); + kfree(n); + } +} + +static void vfio_iommu_iova_free(struct list_head *iova) +{ + struct vfio_iova *n, *next; + + list_for_each_entry_safe(n, next, iova, list) { + list_del(&n->list); + kfree(n); + } +} + +static int vfio_iommu_iova_get_copy(struct vfio_iommu *iommu, + struct list_head *iova_copy) +{ + struct list_head *iova = &iommu->iova_list; + struct vfio_iova *n; + int ret; + + list_for_each_entry(n, iova, list) { + ret = vfio_iommu_iova_insert(iova_copy, n->start, n->end); + if (ret) + goto out_free; + } + + return 0; + +out_free: + vfio_iommu_iova_free(iova_copy); + return ret; +} + +static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu, + struct list_head *iova_copy) +{ + struct list_head *iova = &iommu->iova_list; + + vfio_iommu_iova_free(iova); + + list_splice_tail(iova_copy, iova); +} static int vfio_iommu_type1_attach_group(void *iommu_data, struct iommu_group *iommu_group) { @@ -1405,6 +1657,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, int ret; bool resv_msi, msi_remap; phys_addr_t resv_msi_base; + struct iommu_domain_geometry geo; + LIST_HEAD(iova_copy); + LIST_HEAD(group_resv_regions); mutex_lock(&iommu->lock); @@ -1481,7 +1736,43 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, if (ret) goto out_domain; - resv_msi = vfio_iommu_has_sw_msi(iommu_group, &resv_msi_base); + /* Get aperture info */ + iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, &geo); + + if (vfio_iommu_aper_conflict(iommu, geo.aperture_start, + geo.aperture_end)) { + ret = -EINVAL; + goto out_detach; + } + + ret = iommu_get_group_resv_regions(iommu_group, &group_resv_regions); + if (ret) + goto out_detach; + + if (vfio_iommu_resv_conflict(iommu, &group_resv_regions)) { + ret = -EINVAL; + goto out_detach; + } + + /* + * We don't want to work on the original iova list as the list + * gets modified and in case of failure we have to retain the + * original list. Get a copy here. + */ + ret = vfio_iommu_iova_get_copy(iommu, &iova_copy); + if (ret) + goto out_detach; + + ret = vfio_iommu_aper_resize(&iova_copy, geo.aperture_start, + geo.aperture_end); + if (ret) + goto out_detach; + + ret = vfio_iommu_resv_exclude(&iova_copy, &group_resv_regions); + if (ret) + goto out_detach; + + resv_msi = vfio_iommu_has_sw_msi(&group_resv_regions, &resv_msi_base); INIT_LIST_HEAD(&domain->group_list); list_add(&group->next, &domain->group_list); @@ -1514,8 +1805,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, list_add(&group->next, &d->group_list); iommu_domain_free(domain->domain); kfree(domain); - mutex_unlock(&iommu->lock); - return 0; + goto done; } ret = vfio_iommu_attach_group(domain, group); @@ -1538,8 +1828,11 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, } list_add(&domain->next, &iommu->domain_list); - +done: + /* Delete the old one and insert new iova list */ + vfio_iommu_iova_insert_copy(iommu, &iova_copy); mutex_unlock(&iommu->lock); + vfio_iommu_resv_free(&group_resv_regions); return 0; @@ -1547,6 +1840,8 @@ out_detach: vfio_iommu_detach_group(domain, group); out_domain: iommu_domain_free(domain->domain); + vfio_iommu_iova_free(&iova_copy); + vfio_iommu_resv_free(&group_resv_regions); out_free: kfree(domain); kfree(group); @@ -1602,12 +1897,93 @@ static void vfio_sanity_check_pfn_list(struct vfio_iommu *iommu) WARN_ON(iommu->notifier.head); } +/* + * Called when a domain is removed in detach. It is possible that + * the removed domain decided the iova aperture window. Modify the + * iova aperture with the smallest window among existing domains. + */ +static void vfio_iommu_aper_expand(struct vfio_iommu *iommu, + struct list_head *iova_copy) +{ + struct vfio_domain *domain; + struct iommu_domain_geometry geo; + struct vfio_iova *node; + dma_addr_t start = 0; + dma_addr_t end = (dma_addr_t)~0; + + if (list_empty(iova_copy)) + return; + + list_for_each_entry(domain, &iommu->domain_list, next) { + iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, + &geo); + if (geo.aperture_start > start) + start = geo.aperture_start; + if (geo.aperture_end < end) + end = geo.aperture_end; + } + + /* Modify aperture limits. The new aper is either same or bigger */ + node = list_first_entry(iova_copy, struct vfio_iova, list); + node->start = start; + node = list_last_entry(iova_copy, struct vfio_iova, list); + node->end = end; +} + +/* + * Called when a group is detached. The reserved regions for that + * group can be part of valid iova now. But since reserved regions + * may be duplicated among groups, populate the iova valid regions + * list again. + */ +static int vfio_iommu_resv_refresh(struct vfio_iommu *iommu, + struct list_head *iova_copy) +{ + struct vfio_domain *d; + struct vfio_group *g; + struct vfio_iova *node; + dma_addr_t start, end; + LIST_HEAD(resv_regions); + int ret; + + if (list_empty(iova_copy)) + return -EINVAL; + + list_for_each_entry(d, &iommu->domain_list, next) { + list_for_each_entry(g, &d->group_list, next) { + ret = iommu_get_group_resv_regions(g->iommu_group, + &resv_regions); + if (ret) + goto done; + } + } + + node = list_first_entry(iova_copy, struct vfio_iova, list); + start = node->start; + node = list_last_entry(iova_copy, struct vfio_iova, list); + end = node->end; + + /* purge the iova list and create new one */ + vfio_iommu_iova_free(iova_copy); + + ret = vfio_iommu_aper_resize(iova_copy, start, end); + if (ret) + goto done; + + /* Exclude current reserved regions from iova ranges */ + ret = vfio_iommu_resv_exclude(iova_copy, &resv_regions); +done: + vfio_iommu_resv_free(&resv_regions); + return ret; +} + static void vfio_iommu_type1_detach_group(void *iommu_data, struct iommu_group *iommu_group) { struct vfio_iommu *iommu = iommu_data; struct vfio_domain *domain; struct vfio_group *group; + LIST_HEAD(iova_copy); mutex_lock(&iommu->lock); @@ -1630,6 +2006,13 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, } } + /* + * Get a copy of iova list. This will be used to update + * and to replace the current one later. Please note that + * we will leave the original list as it is if update fails. + */ + vfio_iommu_iova_get_copy(iommu, &iova_copy); + list_for_each_entry(domain, &iommu->domain_list, next) { group = find_iommu_group(domain, iommu_group); if (!group) @@ -1655,10 +2038,16 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, iommu_domain_free(domain->domain); list_del(&domain->next); kfree(domain); + vfio_iommu_aper_expand(iommu, &iova_copy); } break; } + if (!vfio_iommu_resv_refresh(iommu, &iova_copy)) + vfio_iommu_iova_insert_copy(iommu, &iova_copy); + else + vfio_iommu_iova_free(&iova_copy); + detach_group_done: mutex_unlock(&iommu->lock); } @@ -1686,6 +2075,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) } INIT_LIST_HEAD(&iommu->domain_list); + INIT_LIST_HEAD(&iommu->iova_list); iommu->dma_list = RB_ROOT; iommu->dma_avail = dma_entry_limit; mutex_init(&iommu->lock); @@ -1729,6 +2119,9 @@ static void vfio_iommu_type1_release(void *iommu_data) list_del(&domain->next); kfree(domain); } + + vfio_iommu_iova_free(&iommu->iova_list); + kfree(iommu); } @@ -1749,6 +2142,73 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) return ret; } +static int vfio_iommu_iova_add_cap(struct vfio_info_cap *caps, + struct vfio_iommu_type1_info_cap_iova_range *cap_iovas, + size_t size) +{ + struct vfio_info_cap_header *header; + struct vfio_iommu_type1_info_cap_iova_range *iova_cap; + + header = vfio_info_cap_add(caps, size, + VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, 1); + if (IS_ERR(header)) + return PTR_ERR(header); + + iova_cap = container_of(header, + struct vfio_iommu_type1_info_cap_iova_range, + header); + iova_cap->nr_iovas = cap_iovas->nr_iovas; + memcpy(iova_cap->iova_ranges, cap_iovas->iova_ranges, + cap_iovas->nr_iovas * sizeof(*cap_iovas->iova_ranges)); + return 0; +} + +static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu, + struct vfio_info_cap *caps) +{ + struct vfio_iommu_type1_info_cap_iova_range *cap_iovas; + struct vfio_iova *iova; + size_t size; + int iovas = 0, i = 0, ret; + + mutex_lock(&iommu->lock); + + list_for_each_entry(iova, &iommu->iova_list, list) + iovas++; + + if (!iovas) { + /* + * Return 0 as a container with a single mdev device + * will have an empty list + */ + ret = 0; + goto out_unlock; + } + + size = sizeof(*cap_iovas) + (iovas * sizeof(*cap_iovas->iova_ranges)); + + cap_iovas = kzalloc(size, GFP_KERNEL); + if (!cap_iovas) { + ret = -ENOMEM; + goto out_unlock; + } + + cap_iovas->nr_iovas = iovas; + + list_for_each_entry(iova, &iommu->iova_list, list) { + cap_iovas->iova_ranges[i].start = iova->start; + cap_iovas->iova_ranges[i].end = iova->end; + i++; + } + + ret = vfio_iommu_iova_add_cap(caps, cap_iovas, size); + + kfree(cap_iovas); +out_unlock: + mutex_unlock(&iommu->lock); + return ret; +} + static long vfio_iommu_type1_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -1770,19 +2230,53 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, } } else if (cmd == VFIO_IOMMU_GET_INFO) { struct vfio_iommu_type1_info info; + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; + unsigned long capsz; + int ret; minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); + /* For backward compatibility, cannot require this */ + capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset); + if (copy_from_user(&info, (void __user *)arg, minsz)) return -EFAULT; if (info.argsz < minsz) return -EINVAL; + if (info.argsz >= capsz) { + minsz = capsz; + info.cap_offset = 0; /* output, no-recopy necessary */ + } + info.flags = VFIO_IOMMU_INFO_PGSIZES; info.iova_pgsizes = vfio_pgsize_bitmap(iommu); + ret = vfio_iommu_iova_build_caps(iommu, &caps); + if (ret) + return ret; + + if (caps.size) { + info.flags |= VFIO_IOMMU_INFO_CAPS; + + if (info.argsz < sizeof(info) + caps.size) { + info.argsz = sizeof(info) + caps.size; + } else { + vfio_info_cap_shift(&caps, sizeof(info)); + if (copy_to_user((void __user *)arg + + sizeof(info), caps.buf, + caps.size)) { + kfree(caps.buf); + return -EFAULT; + } + info.cap_offset = sizeof(info); + } + + kfree(caps.buf); + } + return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; |