summaryrefslogtreecommitdiffstats
path: root/drivers/vfio
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio')
-rw-r--r--drivers/vfio/vfio_iommu_spapr_tce.c9
-rw-r--r--drivers/vfio/vfio_iommu_type1.c518
2 files changed, 511 insertions, 16 deletions
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 8ce9ad21129f..babef8b00daf 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -1234,7 +1234,7 @@ release_exit:
static int tce_iommu_attach_group(void *iommu_data,
struct iommu_group *iommu_group)
{
- int ret;
+ int ret = 0;
struct tce_container *container = iommu_data;
struct iommu_table_group *table_group;
struct tce_iommu_group *tcegrp = NULL;
@@ -1287,13 +1287,13 @@ static int tce_iommu_attach_group(void *iommu_data,
!table_group->ops->release_ownership) {
if (container->v2) {
ret = -EPERM;
- goto unlock_exit;
+ goto free_exit;
}
ret = tce_iommu_take_ownership(container, table_group);
} else {
if (!container->v2) {
ret = -EPERM;
- goto unlock_exit;
+ goto free_exit;
}
ret = tce_iommu_take_ownership_ddw(container, table_group);
if (!tce_groups_attached(container) && !container->tables[0])
@@ -1305,10 +1305,11 @@ static int tce_iommu_attach_group(void *iommu_data,
list_add(&tcegrp->next, &container->group_list);
}
-unlock_exit:
+free_exit:
if (ret && tcegrp)
kfree(tcegrp);
+unlock_exit:
mutex_unlock(&container->lock);
return ret;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 054391f30fa8..a68405f24fbf 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -62,6 +62,7 @@ MODULE_PARM_DESC(dma_entry_limit,
struct vfio_iommu {
struct list_head domain_list;
+ struct list_head iova_list;
struct vfio_domain *external_domain; /* domain for external user */
struct mutex lock;
struct rb_root dma_list;
@@ -97,6 +98,12 @@ struct vfio_group {
bool mdev_group; /* An mdev group */
};
+struct vfio_iova {
+ struct list_head list;
+ dma_addr_t start;
+ dma_addr_t end;
+};
+
/*
* Guest RAM pinning working set or DMA target
*/
@@ -1031,6 +1038,27 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
return ret;
}
+/*
+ * Check dma map request is within a valid iova range
+ */
+static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu,
+ dma_addr_t start, dma_addr_t end)
+{
+ struct list_head *iova = &iommu->iova_list;
+ struct vfio_iova *node;
+
+ list_for_each_entry(node, iova, list) {
+ if (start >= node->start && end <= node->end)
+ return true;
+ }
+
+ /*
+ * Check for list_empty() as well since a container with
+ * a single mdev device will have an empty list.
+ */
+ return list_empty(iova);
+}
+
static int vfio_dma_do_map(struct vfio_iommu *iommu,
struct vfio_iommu_type1_dma_map *map)
{
@@ -1074,6 +1102,11 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
goto out_unlock;
}
+ if (!vfio_iommu_iova_dma_valid(iommu, iova, iova + size - 1)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
dma = kzalloc(sizeof(*dma), GFP_KERNEL);
if (!dma) {
ret = -ENOMEM;
@@ -1263,15 +1296,13 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain,
return NULL;
}
-static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base)
+static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions,
+ phys_addr_t *base)
{
- struct list_head group_resv_regions;
- struct iommu_resv_region *region, *next;
+ struct iommu_resv_region *region;
bool ret = false;
- INIT_LIST_HEAD(&group_resv_regions);
- iommu_get_group_resv_regions(group, &group_resv_regions);
- list_for_each_entry(region, &group_resv_regions, list) {
+ list_for_each_entry(region, group_resv_regions, list) {
/*
* The presence of any 'real' MSI regions should take
* precedence over the software-managed one if the
@@ -1287,8 +1318,7 @@ static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base)
ret = true;
}
}
- list_for_each_entry_safe(region, next, &group_resv_regions, list)
- kfree(region);
+
return ret;
}
@@ -1388,6 +1418,228 @@ static int vfio_mdev_iommu_device(struct device *dev, void *data)
return 0;
}
+/*
+ * This is a helper function to insert an address range to iova list.
+ * The list is initially created with a single entry corresponding to
+ * the IOMMU domain geometry to which the device group is attached.
+ * The list aperture gets modified when a new domain is added to the
+ * container if the new aperture doesn't conflict with the current one
+ * or with any existing dma mappings. The list is also modified to
+ * exclude any reserved regions associated with the device group.
+ */
+static int vfio_iommu_iova_insert(struct list_head *head,
+ dma_addr_t start, dma_addr_t end)
+{
+ struct vfio_iova *region;
+
+ region = kmalloc(sizeof(*region), GFP_KERNEL);
+ if (!region)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&region->list);
+ region->start = start;
+ region->end = end;
+
+ list_add_tail(&region->list, head);
+ return 0;
+}
+
+/*
+ * Check the new iommu aperture conflicts with existing aper or with any
+ * existing dma mappings.
+ */
+static bool vfio_iommu_aper_conflict(struct vfio_iommu *iommu,
+ dma_addr_t start, dma_addr_t end)
+{
+ struct vfio_iova *first, *last;
+ struct list_head *iova = &iommu->iova_list;
+
+ if (list_empty(iova))
+ return false;
+
+ /* Disjoint sets, return conflict */
+ first = list_first_entry(iova, struct vfio_iova, list);
+ last = list_last_entry(iova, struct vfio_iova, list);
+ if (start > last->end || end < first->start)
+ return true;
+
+ /* Check for any existing dma mappings below the new start */
+ if (start > first->start) {
+ if (vfio_find_dma(iommu, first->start, start - first->start))
+ return true;
+ }
+
+ /* Check for any existing dma mappings beyond the new end */
+ if (end < last->end) {
+ if (vfio_find_dma(iommu, end + 1, last->end - end))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Resize iommu iova aperture window. This is called only if the new
+ * aperture has no conflict with existing aperture and dma mappings.
+ */
+static int vfio_iommu_aper_resize(struct list_head *iova,
+ dma_addr_t start, dma_addr_t end)
+{
+ struct vfio_iova *node, *next;
+
+ if (list_empty(iova))
+ return vfio_iommu_iova_insert(iova, start, end);
+
+ /* Adjust iova list start */
+ list_for_each_entry_safe(node, next, iova, list) {
+ if (start < node->start)
+ break;
+ if (start >= node->start && start < node->end) {
+ node->start = start;
+ break;
+ }
+ /* Delete nodes before new start */
+ list_del(&node->list);
+ kfree(node);
+ }
+
+ /* Adjust iova list end */
+ list_for_each_entry_safe(node, next, iova, list) {
+ if (end > node->end)
+ continue;
+ if (end > node->start && end <= node->end) {
+ node->end = end;
+ continue;
+ }
+ /* Delete nodes after new end */
+ list_del(&node->list);
+ kfree(node);
+ }
+
+ return 0;
+}
+
+/*
+ * Check reserved region conflicts with existing dma mappings
+ */
+static bool vfio_iommu_resv_conflict(struct vfio_iommu *iommu,
+ struct list_head *resv_regions)
+{
+ struct iommu_resv_region *region;
+
+ /* Check for conflict with existing dma mappings */
+ list_for_each_entry(region, resv_regions, list) {
+ if (region->type == IOMMU_RESV_DIRECT_RELAXABLE)
+ continue;
+
+ if (vfio_find_dma(iommu, region->start, region->length))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Check iova region overlap with reserved regions and
+ * exclude them from the iommu iova range
+ */
+static int vfio_iommu_resv_exclude(struct list_head *iova,
+ struct list_head *resv_regions)
+{
+ struct iommu_resv_region *resv;
+ struct vfio_iova *n, *next;
+
+ list_for_each_entry(resv, resv_regions, list) {
+ phys_addr_t start, end;
+
+ if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE)
+ continue;
+
+ start = resv->start;
+ end = resv->start + resv->length - 1;
+
+ list_for_each_entry_safe(n, next, iova, list) {
+ int ret = 0;
+
+ /* No overlap */
+ if (start > n->end || end < n->start)
+ continue;
+ /*
+ * Insert a new node if current node overlaps with the
+ * reserve region to exlude that from valid iova range.
+ * Note that, new node is inserted before the current
+ * node and finally the current node is deleted keeping
+ * the list updated and sorted.
+ */
+ if (start > n->start)
+ ret = vfio_iommu_iova_insert(&n->list, n->start,
+ start - 1);
+ if (!ret && end < n->end)
+ ret = vfio_iommu_iova_insert(&n->list, end + 1,
+ n->end);
+ if (ret)
+ return ret;
+
+ list_del(&n->list);
+ kfree(n);
+ }
+ }
+
+ if (list_empty(iova))
+ return -EINVAL;
+
+ return 0;
+}
+
+static void vfio_iommu_resv_free(struct list_head *resv_regions)
+{
+ struct iommu_resv_region *n, *next;
+
+ list_for_each_entry_safe(n, next, resv_regions, list) {
+ list_del(&n->list);
+ kfree(n);
+ }
+}
+
+static void vfio_iommu_iova_free(struct list_head *iova)
+{
+ struct vfio_iova *n, *next;
+
+ list_for_each_entry_safe(n, next, iova, list) {
+ list_del(&n->list);
+ kfree(n);
+ }
+}
+
+static int vfio_iommu_iova_get_copy(struct vfio_iommu *iommu,
+ struct list_head *iova_copy)
+{
+ struct list_head *iova = &iommu->iova_list;
+ struct vfio_iova *n;
+ int ret;
+
+ list_for_each_entry(n, iova, list) {
+ ret = vfio_iommu_iova_insert(iova_copy, n->start, n->end);
+ if (ret)
+ goto out_free;
+ }
+
+ return 0;
+
+out_free:
+ vfio_iommu_iova_free(iova_copy);
+ return ret;
+}
+
+static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
+ struct list_head *iova_copy)
+{
+ struct list_head *iova = &iommu->iova_list;
+
+ vfio_iommu_iova_free(iova);
+
+ list_splice_tail(iova_copy, iova);
+}
static int vfio_iommu_type1_attach_group(void *iommu_data,
struct iommu_group *iommu_group)
{
@@ -1398,6 +1650,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
int ret;
bool resv_msi, msi_remap;
phys_addr_t resv_msi_base;
+ struct iommu_domain_geometry geo;
+ LIST_HEAD(iova_copy);
+ LIST_HEAD(group_resv_regions);
mutex_lock(&iommu->lock);
@@ -1474,7 +1729,43 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
if (ret)
goto out_domain;
- resv_msi = vfio_iommu_has_sw_msi(iommu_group, &resv_msi_base);
+ /* Get aperture info */
+ iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, &geo);
+
+ if (vfio_iommu_aper_conflict(iommu, geo.aperture_start,
+ geo.aperture_end)) {
+ ret = -EINVAL;
+ goto out_detach;
+ }
+
+ ret = iommu_get_group_resv_regions(iommu_group, &group_resv_regions);
+ if (ret)
+ goto out_detach;
+
+ if (vfio_iommu_resv_conflict(iommu, &group_resv_regions)) {
+ ret = -EINVAL;
+ goto out_detach;
+ }
+
+ /*
+ * We don't want to work on the original iova list as the list
+ * gets modified and in case of failure we have to retain the
+ * original list. Get a copy here.
+ */
+ ret = vfio_iommu_iova_get_copy(iommu, &iova_copy);
+ if (ret)
+ goto out_detach;
+
+ ret = vfio_iommu_aper_resize(&iova_copy, geo.aperture_start,
+ geo.aperture_end);
+ if (ret)
+ goto out_detach;
+
+ ret = vfio_iommu_resv_exclude(&iova_copy, &group_resv_regions);
+ if (ret)
+ goto out_detach;
+
+ resv_msi = vfio_iommu_has_sw_msi(&group_resv_regions, &resv_msi_base);
INIT_LIST_HEAD(&domain->group_list);
list_add(&group->next, &domain->group_list);
@@ -1507,8 +1798,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
list_add(&group->next, &d->group_list);
iommu_domain_free(domain->domain);
kfree(domain);
- mutex_unlock(&iommu->lock);
- return 0;
+ goto done;
}
ret = vfio_iommu_attach_group(domain, group);
@@ -1531,8 +1821,11 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
}
list_add(&domain->next, &iommu->domain_list);
-
+done:
+ /* Delete the old one and insert new iova list */
+ vfio_iommu_iova_insert_copy(iommu, &iova_copy);
mutex_unlock(&iommu->lock);
+ vfio_iommu_resv_free(&group_resv_regions);
return 0;
@@ -1540,6 +1833,8 @@ out_detach:
vfio_iommu_detach_group(domain, group);
out_domain:
iommu_domain_free(domain->domain);
+ vfio_iommu_iova_free(&iova_copy);
+ vfio_iommu_resv_free(&group_resv_regions);
out_free:
kfree(domain);
kfree(group);
@@ -1595,12 +1890,93 @@ static void vfio_sanity_check_pfn_list(struct vfio_iommu *iommu)
WARN_ON(iommu->notifier.head);
}
+/*
+ * Called when a domain is removed in detach. It is possible that
+ * the removed domain decided the iova aperture window. Modify the
+ * iova aperture with the smallest window among existing domains.
+ */
+static void vfio_iommu_aper_expand(struct vfio_iommu *iommu,
+ struct list_head *iova_copy)
+{
+ struct vfio_domain *domain;
+ struct iommu_domain_geometry geo;
+ struct vfio_iova *node;
+ dma_addr_t start = 0;
+ dma_addr_t end = (dma_addr_t)~0;
+
+ if (list_empty(iova_copy))
+ return;
+
+ list_for_each_entry(domain, &iommu->domain_list, next) {
+ iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY,
+ &geo);
+ if (geo.aperture_start > start)
+ start = geo.aperture_start;
+ if (geo.aperture_end < end)
+ end = geo.aperture_end;
+ }
+
+ /* Modify aperture limits. The new aper is either same or bigger */
+ node = list_first_entry(iova_copy, struct vfio_iova, list);
+ node->start = start;
+ node = list_last_entry(iova_copy, struct vfio_iova, list);
+ node->end = end;
+}
+
+/*
+ * Called when a group is detached. The reserved regions for that
+ * group can be part of valid iova now. But since reserved regions
+ * may be duplicated among groups, populate the iova valid regions
+ * list again.
+ */
+static int vfio_iommu_resv_refresh(struct vfio_iommu *iommu,
+ struct list_head *iova_copy)
+{
+ struct vfio_domain *d;
+ struct vfio_group *g;
+ struct vfio_iova *node;
+ dma_addr_t start, end;
+ LIST_HEAD(resv_regions);
+ int ret;
+
+ if (list_empty(iova_copy))
+ return -EINVAL;
+
+ list_for_each_entry(d, &iommu->domain_list, next) {
+ list_for_each_entry(g, &d->group_list, next) {
+ ret = iommu_get_group_resv_regions(g->iommu_group,
+ &resv_regions);
+ if (ret)
+ goto done;
+ }
+ }
+
+ node = list_first_entry(iova_copy, struct vfio_iova, list);
+ start = node->start;
+ node = list_last_entry(iova_copy, struct vfio_iova, list);
+ end = node->end;
+
+ /* purge the iova list and create new one */
+ vfio_iommu_iova_free(iova_copy);
+
+ ret = vfio_iommu_aper_resize(iova_copy, start, end);
+ if (ret)
+ goto done;
+
+ /* Exclude current reserved regions from iova ranges */
+ ret = vfio_iommu_resv_exclude(iova_copy, &resv_regions);
+done:
+ vfio_iommu_resv_free(&resv_regions);
+ return ret;
+}
+
static void vfio_iommu_type1_detach_group(void *iommu_data,
struct iommu_group *iommu_group)
{
struct vfio_iommu *iommu = iommu_data;
struct vfio_domain *domain;
struct vfio_group *group;
+ LIST_HEAD(iova_copy);
mutex_lock(&iommu->lock);
@@ -1623,6 +1999,13 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
}
}
+ /*
+ * Get a copy of iova list. This will be used to update
+ * and to replace the current one later. Please note that
+ * we will leave the original list as it is if update fails.
+ */
+ vfio_iommu_iova_get_copy(iommu, &iova_copy);
+
list_for_each_entry(domain, &iommu->domain_list, next) {
group = find_iommu_group(domain, iommu_group);
if (!group)
@@ -1648,10 +2031,16 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
iommu_domain_free(domain->domain);
list_del(&domain->next);
kfree(domain);
+ vfio_iommu_aper_expand(iommu, &iova_copy);
}
break;
}
+ if (!vfio_iommu_resv_refresh(iommu, &iova_copy))
+ vfio_iommu_iova_insert_copy(iommu, &iova_copy);
+ else
+ vfio_iommu_iova_free(&iova_copy);
+
detach_group_done:
mutex_unlock(&iommu->lock);
}
@@ -1679,6 +2068,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
}
INIT_LIST_HEAD(&iommu->domain_list);
+ INIT_LIST_HEAD(&iommu->iova_list);
iommu->dma_list = RB_ROOT;
iommu->dma_avail = dma_entry_limit;
mutex_init(&iommu->lock);
@@ -1722,6 +2112,9 @@ static void vfio_iommu_type1_release(void *iommu_data)
list_del(&domain->next);
kfree(domain);
}
+
+ vfio_iommu_iova_free(&iommu->iova_list);
+
kfree(iommu);
}
@@ -1742,6 +2135,73 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
return ret;
}
+static int vfio_iommu_iova_add_cap(struct vfio_info_cap *caps,
+ struct vfio_iommu_type1_info_cap_iova_range *cap_iovas,
+ size_t size)
+{
+ struct vfio_info_cap_header *header;
+ struct vfio_iommu_type1_info_cap_iova_range *iova_cap;
+
+ header = vfio_info_cap_add(caps, size,
+ VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, 1);
+ if (IS_ERR(header))
+ return PTR_ERR(header);
+
+ iova_cap = container_of(header,
+ struct vfio_iommu_type1_info_cap_iova_range,
+ header);
+ iova_cap->nr_iovas = cap_iovas->nr_iovas;
+ memcpy(iova_cap->iova_ranges, cap_iovas->iova_ranges,
+ cap_iovas->nr_iovas * sizeof(*cap_iovas->iova_ranges));
+ return 0;
+}
+
+static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu,
+ struct vfio_info_cap *caps)
+{
+ struct vfio_iommu_type1_info_cap_iova_range *cap_iovas;
+ struct vfio_iova *iova;
+ size_t size;
+ int iovas = 0, i = 0, ret;
+
+ mutex_lock(&iommu->lock);
+
+ list_for_each_entry(iova, &iommu->iova_list, list)
+ iovas++;
+
+ if (!iovas) {
+ /*
+ * Return 0 as a container with a single mdev device
+ * will have an empty list
+ */
+ ret = 0;
+ goto out_unlock;
+ }
+
+ size = sizeof(*cap_iovas) + (iovas * sizeof(*cap_iovas->iova_ranges));
+
+ cap_iovas = kzalloc(size, GFP_KERNEL);
+ if (!cap_iovas) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ cap_iovas->nr_iovas = iovas;
+
+ list_for_each_entry(iova, &iommu->iova_list, list) {
+ cap_iovas->iova_ranges[i].start = iova->start;
+ cap_iovas->iova_ranges[i].end = iova->end;
+ i++;
+ }
+
+ ret = vfio_iommu_iova_add_cap(caps, cap_iovas, size);
+
+ kfree(cap_iovas);
+out_unlock:
+ mutex_unlock(&iommu->lock);
+ return ret;
+}
+
static long vfio_iommu_type1_ioctl(void *iommu_data,
unsigned int cmd, unsigned long arg)
{
@@ -1763,19 +2223,53 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
}
} else if (cmd == VFIO_IOMMU_GET_INFO) {
struct vfio_iommu_type1_info info;
+ struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+ unsigned long capsz;
+ int ret;
minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
+ /* For backward compatibility, cannot require this */
+ capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
+
if (copy_from_user(&info, (void __user *)arg, minsz))
return -EFAULT;
if (info.argsz < minsz)
return -EINVAL;
+ if (info.argsz >= capsz) {
+ minsz = capsz;
+ info.cap_offset = 0; /* output, no-recopy necessary */
+ }
+
info.flags = VFIO_IOMMU_INFO_PGSIZES;
info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
+ ret = vfio_iommu_iova_build_caps(iommu, &caps);
+ if (ret)
+ return ret;
+
+ if (caps.size) {
+ info.flags |= VFIO_IOMMU_INFO_CAPS;
+
+ if (info.argsz < sizeof(info) + caps.size) {
+ info.argsz = sizeof(info) + caps.size;
+ } else {
+ vfio_info_cap_shift(&caps, sizeof(info));
+ if (copy_to_user((void __user *)arg +
+ sizeof(info), caps.buf,
+ caps.size)) {
+ kfree(caps.buf);
+ return -EFAULT;
+ }
+ info.cap_offset = sizeof(info);
+ }
+
+ kfree(caps.buf);
+ }
+
return copy_to_user((void __user *)arg, &info, minsz) ?
-EFAULT : 0;