From 7b96953bc640b6b25665fe17ffca4b668b371f14 Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Thu, 17 Nov 2016 02:16:13 +0530 Subject: vfio: Mediated device Core driver Design for Mediated Device Driver: Main purpose of this driver is to provide a common interface for mediated device management that can be used by different drivers of different devices. This module provides a generic interface to create the device, add it to mediated bus, add device to IOMMU group and then add it to vfio group. Below is the high Level block diagram, with Nvidia, Intel and IBM devices as example, since these are the devices which are going to actively use this module as of now. +---------------+ | | | +-----------+ | mdev_register_driver() +--------------+ | | | +<------------------------+ __init() | | | mdev | | | | | | bus | +------------------------>+ |<-> VFIO user | | driver | | probe()/remove() | vfio_mdev.ko | APIs | | | | | | | +-----------+ | +--------------+ | | | MDEV CORE | | MODULE | | mdev.ko | | +-----------+ | mdev_register_device() +--------------+ | | | +<------------------------+ | | | | | | nvidia.ko |<-> physical | | | +------------------------>+ | device | | | | callback +--------------+ | | Physical | | | | device | | mdev_register_device() +--------------+ | | interface | |<------------------------+ | | | | | | i915.ko |<-> physical | | | +------------------------>+ | device | | | | callback +--------------+ | | | | | | | | mdev_register_device() +--------------+ | | | +<------------------------+ | | | | | | ccw_device.ko|<-> physical | | | +------------------------>+ | device | | | | callback +--------------+ | +-----------+ | +---------------+ Core driver provides two types of registration interfaces: 1. Registration interface for mediated bus driver: /** * struct mdev_driver - Mediated device's driver * @name: driver name * @probe: called when new device created * @remove:called when device removed * @driver:device driver structure * **/ struct mdev_driver { const char *name; int (*probe) (struct device *dev); void (*remove) (struct device *dev); struct device_driver driver; }; Mediated bus driver for mdev device should use this interface to register and unregister with core driver respectively: int mdev_register_driver(struct mdev_driver *drv, struct module *owner); void mdev_unregister_driver(struct mdev_driver *drv); Mediated bus driver is responsible to add/delete mediated devices to/from VFIO group when devices are bound and unbound to the driver. 2. Physical device driver interface This interface provides vendor driver the set APIs to manage physical device related work in its driver. APIs are : * dev_attr_groups: attributes of the parent device. * mdev_attr_groups: attributes of the mediated device. * supported_type_groups: attributes to define supported type. This is mandatory field. * create: to allocate basic resources in vendor driver for a mediated device. This is mandatory to be provided by vendor driver. * remove: to free resources in vendor driver when mediated device is destroyed. This is mandatory to be provided by vendor driver. * open: open callback of mediated device * release: release callback of mediated device * read : read emulation callback. * write: write emulation callback. * ioctl: ioctl callback. * mmap: mmap emulation callback. Drivers should use these interfaces to register and unregister device to mdev core driver respectively: extern int mdev_register_device(struct device *dev, const struct parent_ops *ops); extern void mdev_unregister_device(struct device *dev); There are no locks to serialize above callbacks in mdev driver and vfio_mdev driver. If required, vendor driver can have locks to serialize above APIs in their driver. Signed-off-by: Kirti Wankhede Signed-off-by: Neo Jia Reviewed-by: Jike Song Reviewed-by: Dong Jia Shi Signed-off-by: Alex Williamson --- include/linux/mdev.h | 168 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 include/linux/mdev.h (limited to 'include') diff --git a/include/linux/mdev.h b/include/linux/mdev.h new file mode 100644 index 000000000000..ec819e9a115a --- /dev/null +++ b/include/linux/mdev.h @@ -0,0 +1,168 @@ +/* + * Mediated device definition + * + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * Author: Neo Jia + * Kirti Wankhede + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef MDEV_H +#define MDEV_H + +/* Parent device */ +struct parent_device { + struct device *dev; + const struct parent_ops *ops; + + /* internal */ + struct kref ref; + struct mutex lock; + struct list_head next; + struct kset *mdev_types_kset; + struct list_head type_list; +}; + +/* Mediated device */ +struct mdev_device { + struct device dev; + struct parent_device *parent; + uuid_le uuid; + void *driver_data; + + /* internal */ + struct kref ref; + struct list_head next; + struct kobject *type_kobj; +}; + +/** + * struct parent_ops - Structure to be registered for each parent device to + * register the device to mdev module. + * + * @owner: The module owner. + * @dev_attr_groups: Attributes of the parent device. + * @mdev_attr_groups: Attributes of the mediated device. + * @supported_type_groups: Attributes to define supported types. It is mandatory + * to provide supported types. + * @create: Called to allocate basic resources in parent device's + * driver for a particular mediated device. It is + * mandatory to provide create ops. + * @kobj: kobject of type for which 'create' is called. + * @mdev: mdev_device structure on of mediated device + * that is being created + * Returns integer: success (0) or error (< 0) + * @remove: Called to free resources in parent device's driver for a + * a mediated device. It is mandatory to provide 'remove' + * ops. + * @mdev: mdev_device device structure which is being + * destroyed + * Returns integer: success (0) or error (< 0) + * @open: Open mediated device. + * @mdev: mediated device. + * Returns integer: success (0) or error (< 0) + * @release: release mediated device + * @mdev: mediated device. + * @read: Read emulation callback + * @mdev: mediated device structure + * @buf: read buffer + * @count: number of bytes to read + * @ppos: address. + * Retuns number on bytes read on success or error. + * @write: Write emulation callback + * @mdev: mediated device structure + * @buf: write buffer + * @count: number of bytes to be written + * @ppos: address. + * Retuns number on bytes written on success or error. + * @ioctl: IOCTL callback + * @mdev: mediated device structure + * @cmd: ioctl command + * @arg: arguments to ioctl + * @mmap: mmap callback + * @mdev: mediated device structure + * @vma: vma structure + * Parent device that support mediated device should be registered with mdev + * module with parent_ops structure. + **/ + +struct parent_ops { + struct module *owner; + const struct attribute_group **dev_attr_groups; + const struct attribute_group **mdev_attr_groups; + struct attribute_group **supported_type_groups; + + int (*create)(struct kobject *kobj, struct mdev_device *mdev); + int (*remove)(struct mdev_device *mdev); + int (*open)(struct mdev_device *mdev); + void (*release)(struct mdev_device *mdev); + ssize_t (*read)(struct mdev_device *mdev, char __user *buf, + size_t count, loff_t *ppos); + ssize_t (*write)(struct mdev_device *mdev, const char __user *buf, + size_t count, loff_t *ppos); + ssize_t (*ioctl)(struct mdev_device *mdev, unsigned int cmd, + unsigned long arg); + int (*mmap)(struct mdev_device *mdev, struct vm_area_struct *vma); +}; + +/* interface for exporting mdev supported type attributes */ +struct mdev_type_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, struct device *dev, char *buf); + ssize_t (*store)(struct kobject *kobj, struct device *dev, + const char *buf, size_t count); +}; + +#define MDEV_TYPE_ATTR(_name, _mode, _show, _store) \ +struct mdev_type_attribute mdev_type_attr_##_name = \ + __ATTR(_name, _mode, _show, _store) +#define MDEV_TYPE_ATTR_RW(_name) \ + struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RW(_name) +#define MDEV_TYPE_ATTR_RO(_name) \ + struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RO(_name) +#define MDEV_TYPE_ATTR_WO(_name) \ + struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_WO(_name) + +/** + * struct mdev_driver - Mediated device driver + * @name: driver name + * @probe: called when new device created + * @remove: called when device removed + * @driver: device driver structure + * + **/ +struct mdev_driver { + const char *name; + int (*probe)(struct device *dev); + void (*remove)(struct device *dev); + struct device_driver driver; +}; + +#define to_mdev_driver(drv) container_of(drv, struct mdev_driver, driver) +#define to_mdev_device(dev) container_of(dev, struct mdev_device, dev) + +static inline void *mdev_get_drvdata(struct mdev_device *mdev) +{ + return mdev->driver_data; +} + +static inline void mdev_set_drvdata(struct mdev_device *mdev, void *data) +{ + mdev->driver_data = data; +} + +extern struct bus_type mdev_bus_type; + +#define dev_is_mdev(d) ((d)->bus == &mdev_bus_type) + +extern int mdev_register_device(struct device *dev, + const struct parent_ops *ops); +extern void mdev_unregister_device(struct device *dev); + +extern int mdev_register_driver(struct mdev_driver *drv, struct module *owner); +extern void mdev_unregister_driver(struct mdev_driver *drv); + +#endif /* MDEV_H */ -- cgit v1.2.3 From 2169037dc322d8baa84d9bd4468995f818f25d82 Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Thu, 17 Nov 2016 02:16:17 +0530 Subject: vfio iommu: Added pin and unpin callback functions to vfio_iommu_driver_ops Added APIs for pining and unpining set of pages. These call back into backend iommu module to actually pin and unpin pages. Added two new callback functions to struct vfio_iommu_driver_ops. Backend IOMMU module that supports pining and unpinning pages for mdev devices should provide these functions. Renamed static functions in vfio_type1_iommu.c to resolve conflicts Signed-off-by: Kirti Wankhede Signed-off-by: Neo Jia Reviewed-by: Dong Jia Shi Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 102 ++++++++++++++++++++++++++++++++++++++++ drivers/vfio/vfio_iommu_type1.c | 20 ++++---- include/linux/vfio.h | 13 ++++- 3 files changed, 124 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 2e83bdf007fe..bd36c16b0ef2 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1799,6 +1799,108 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) } EXPORT_SYMBOL_GPL(vfio_info_cap_shift); + +/* + * Pin a set of guest PFNs and return their associated host PFNs for local + * domain only. + * @dev [in] : device + * @user_pfn [in]: array of user/guest PFNs to be unpinned. + * @npage [in] : count of elements in user_pfn array. This count should not + * be greater VFIO_PIN_PAGES_MAX_ENTRIES. + * @prot [in] : protection flags + * @phys_pfn[out]: array of host PFNs + * Return error or number of pages pinned. + */ +int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage, + int prot, unsigned long *phys_pfn) +{ + struct vfio_container *container; + struct vfio_group *group; + struct vfio_iommu_driver *driver; + int ret; + + if (!dev || !user_pfn || !phys_pfn || !npage) + return -EINVAL; + + if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) + return -E2BIG; + + group = vfio_group_get_from_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); + + ret = vfio_group_add_container_user(group); + if (ret) + goto err_pin_pages; + + container = group->container; + down_read(&container->group_lock); + + driver = container->iommu_driver; + if (likely(driver && driver->ops->pin_pages)) + ret = driver->ops->pin_pages(container->iommu_data, user_pfn, + npage, prot, phys_pfn); + else + ret = -ENOTTY; + + up_read(&container->group_lock); + vfio_group_try_dissolve_container(group); + +err_pin_pages: + vfio_group_put(group); + return ret; +} +EXPORT_SYMBOL(vfio_pin_pages); + +/* + * Unpin set of host PFNs for local domain only. + * @dev [in] : device + * @user_pfn [in]: array of user/guest PFNs to be unpinned. Number of user/guest + * PFNs should not be greater than VFIO_PIN_PAGES_MAX_ENTRIES. + * @npage [in] : count of elements in user_pfn array. This count should not + * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. + * Return error or number of pages unpinned. + */ +int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage) +{ + struct vfio_container *container; + struct vfio_group *group; + struct vfio_iommu_driver *driver; + int ret; + + if (!dev || !user_pfn || !npage) + return -EINVAL; + + if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) + return -E2BIG; + + group = vfio_group_get_from_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); + + ret = vfio_group_add_container_user(group); + if (ret) + goto err_unpin_pages; + + container = group->container; + down_read(&container->group_lock); + + driver = container->iommu_driver; + if (likely(driver && driver->ops->unpin_pages)) + ret = driver->ops->unpin_pages(container->iommu_data, user_pfn, + npage); + else + ret = -ENOTTY; + + up_read(&container->group_lock); + vfio_group_try_dissolve_container(group); + +err_unpin_pages: + vfio_group_put(group); + return ret; +} +EXPORT_SYMBOL(vfio_unpin_pages); + /** * Module/class support */ diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 2ba19424e4a1..9f3d58d3dfaf 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -259,8 +259,8 @@ static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn) * the iommu can only map chunks of consecutive pfns anyway, so get the * first page and all consecutive pages with the same locking. */ -static long vfio_pin_pages(unsigned long vaddr, long npage, - int prot, unsigned long *pfn_base) +static long vfio_pin_pages_remote(unsigned long vaddr, long npage, + int prot, unsigned long *pfn_base) { unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; bool lock_cap = capable(CAP_IPC_LOCK); @@ -318,8 +318,8 @@ static long vfio_pin_pages(unsigned long vaddr, long npage, return i; } -static long vfio_unpin_pages(unsigned long pfn, long npage, - int prot, bool do_accounting) +static long vfio_unpin_pages_remote(unsigned long pfn, long npage, + int prot, bool do_accounting) { unsigned long unlocked = 0; long i; @@ -382,9 +382,9 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma) if (WARN_ON(!unmapped)) break; - unlocked += vfio_unpin_pages(phys >> PAGE_SHIFT, - unmapped >> PAGE_SHIFT, - dma->prot, false); + unlocked += vfio_unpin_pages_remote(phys >> PAGE_SHIFT, + unmapped >> PAGE_SHIFT, + dma->prot, false); iova += unmapped; cond_resched(); @@ -613,8 +613,8 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, while (size) { /* Pin a contiguous chunk of memory */ - npage = vfio_pin_pages(vaddr + dma->size, - size >> PAGE_SHIFT, prot, &pfn); + npage = vfio_pin_pages_remote(vaddr + dma->size, + size >> PAGE_SHIFT, prot, &pfn); if (npage <= 0) { WARN_ON(!npage); ret = (int)npage; @@ -624,7 +624,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, /* Map it! */ ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, prot); if (ret) { - vfio_unpin_pages(pfn, npage, prot, true); + vfio_unpin_pages_remote(pfn, npage, prot, true); break; } diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 0ecae0b1cd34..3c862a030029 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -75,7 +75,11 @@ struct vfio_iommu_driver_ops { struct iommu_group *group); void (*detach_group)(void *iommu_data, struct iommu_group *group); - + int (*pin_pages)(void *iommu_data, unsigned long *user_pfn, + int npage, int prot, + unsigned long *phys_pfn); + int (*unpin_pages)(void *iommu_data, + unsigned long *user_pfn, int npage); }; extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); @@ -92,6 +96,13 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group); extern long vfio_external_check_extension(struct vfio_group *group, unsigned long arg); +#define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long)) + +extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, + int npage, int prot, unsigned long *phys_pfn); +extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, + int npage); + /* * Sub-module helpers */ -- cgit v1.2.3 From c086de818dd81c3c2f7cecff23de6585b74340c0 Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Thu, 17 Nov 2016 10:28:26 +0530 Subject: vfio iommu: Add blocking notifier to notify DMA_UNMAP Added blocking notifier to IOMMU TYPE1 driver to notify vendor drivers about DMA_UNMAP. Exported two APIs vfio_register_notifier() and vfio_unregister_notifier(). Notifier should be registered, if external user wants to use vfio_pin_pages()/vfio_unpin_pages() APIs to pin/unpin pages. Vendor driver should use VFIO_IOMMU_NOTIFY_DMA_UNMAP action to invalidate mappings. Signed-off-by: Kirti Wankhede Signed-off-by: Neo Jia Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 73 ++++++++++++++++++++++++++++++++++++++++ drivers/vfio/vfio_iommu_type1.c | 74 +++++++++++++++++++++++++++++++++-------- include/linux/vfio.h | 12 +++++++ 3 files changed, 146 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index bd36c16b0ef2..301eed07a0ab 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1901,6 +1901,79 @@ err_unpin_pages: } EXPORT_SYMBOL(vfio_unpin_pages); +int vfio_register_notifier(struct device *dev, struct notifier_block *nb) +{ + struct vfio_container *container; + struct vfio_group *group; + struct vfio_iommu_driver *driver; + int ret; + + if (!dev || !nb) + return -EINVAL; + + group = vfio_group_get_from_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); + + ret = vfio_group_add_container_user(group); + if (ret) + goto err_register_nb; + + container = group->container; + down_read(&container->group_lock); + + driver = container->iommu_driver; + if (likely(driver && driver->ops->register_notifier)) + ret = driver->ops->register_notifier(container->iommu_data, nb); + else + ret = -ENOTTY; + + up_read(&container->group_lock); + vfio_group_try_dissolve_container(group); + +err_register_nb: + vfio_group_put(group); + return ret; +} +EXPORT_SYMBOL(vfio_register_notifier); + +int vfio_unregister_notifier(struct device *dev, struct notifier_block *nb) +{ + struct vfio_container *container; + struct vfio_group *group; + struct vfio_iommu_driver *driver; + int ret; + + if (!dev || !nb) + return -EINVAL; + + group = vfio_group_get_from_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); + + ret = vfio_group_add_container_user(group); + if (ret) + goto err_unregister_nb; + + container = group->container; + down_read(&container->group_lock); + + driver = container->iommu_driver; + if (likely(driver && driver->ops->unregister_notifier)) + ret = driver->ops->unregister_notifier(container->iommu_data, + nb); + else + ret = -ENOTTY; + + up_read(&container->group_lock); + vfio_group_try_dissolve_container(group); + +err_unregister_nb: + vfio_group_put(group); + return ret; +} +EXPORT_SYMBOL(vfio_unregister_notifier); + /** * Module/class support */ diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index db0901f8a149..51810a95416e 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -38,6 +38,7 @@ #include #include #include +#include #define DRIVER_VERSION "0.2" #define DRIVER_AUTHOR "Alex Williamson " @@ -60,6 +61,7 @@ struct vfio_iommu { struct vfio_domain *external_domain; /* domain for external user */ struct mutex lock; struct rb_root dma_list; + struct blocking_notifier_head notifier; bool v2; bool nesting; }; @@ -561,7 +563,8 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, mutex_lock(&iommu->lock); - if (!iommu->external_domain) { + /* Fail if notifier list is empty */ + if ((!iommu->external_domain) || (!iommu->notifier.head)) { ret = -EINVAL; goto pin_done; } @@ -776,9 +779,9 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, struct vfio_iommu_type1_dma_unmap *unmap) { uint64_t mask; - struct vfio_dma *dma; + struct vfio_dma *dma, *dma_last = NULL; size_t unmapped = 0; - int ret = 0; + int ret = 0, retries = 0; mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1; @@ -788,7 +791,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, return -EINVAL; WARN_ON(mask & PAGE_MASK); - +again: mutex_lock(&iommu->lock); /* @@ -844,6 +847,32 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, */ if (dma->task->mm != current->mm) break; + + if (!RB_EMPTY_ROOT(&dma->pfn_list)) { + struct vfio_iommu_type1_dma_unmap nb_unmap; + + if (dma_last == dma) { + BUG_ON(++retries > 10); + } else { + dma_last = dma; + retries = 0; + } + + nb_unmap.iova = dma->iova; + nb_unmap.size = dma->size; + + /* + * Notify anyone (mdev vendor drivers) to invalidate and + * unmap iovas within the range we're about to unmap. + * Vendor drivers MUST unpin pages in response to an + * invalidation. + */ + mutex_unlock(&iommu->lock); + blocking_notifier_call_chain(&iommu->notifier, + VFIO_IOMMU_NOTIFY_DMA_UNMAP, + &nb_unmap); + goto again; + } unmapped += dma->size; vfio_remove_dma(iommu, dma); } @@ -1419,6 +1448,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) INIT_LIST_HEAD(&iommu->domain_list); iommu->dma_list = RB_ROOT; mutex_init(&iommu->lock); + BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); return iommu; } @@ -1553,16 +1583,34 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, return -ENOTTY; } +static int vfio_iommu_type1_register_notifier(void *iommu_data, + struct notifier_block *nb) +{ + struct vfio_iommu *iommu = iommu_data; + + return blocking_notifier_chain_register(&iommu->notifier, nb); +} + +static int vfio_iommu_type1_unregister_notifier(void *iommu_data, + struct notifier_block *nb) +{ + struct vfio_iommu *iommu = iommu_data; + + return blocking_notifier_chain_unregister(&iommu->notifier, nb); +} + static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { - .name = "vfio-iommu-type1", - .owner = THIS_MODULE, - .open = vfio_iommu_type1_open, - .release = vfio_iommu_type1_release, - .ioctl = vfio_iommu_type1_ioctl, - .attach_group = vfio_iommu_type1_attach_group, - .detach_group = vfio_iommu_type1_detach_group, - .pin_pages = vfio_iommu_type1_pin_pages, - .unpin_pages = vfio_iommu_type1_unpin_pages, + .name = "vfio-iommu-type1", + .owner = THIS_MODULE, + .open = vfio_iommu_type1_open, + .release = vfio_iommu_type1_release, + .ioctl = vfio_iommu_type1_ioctl, + .attach_group = vfio_iommu_type1_attach_group, + .detach_group = vfio_iommu_type1_detach_group, + .pin_pages = vfio_iommu_type1_pin_pages, + .unpin_pages = vfio_iommu_type1_unpin_pages, + .register_notifier = vfio_iommu_type1_register_notifier, + .unregister_notifier = vfio_iommu_type1_unregister_notifier, }; static int __init vfio_iommu_type1_init(void) diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 3c862a030029..6ab13f7e2920 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -80,6 +80,10 @@ struct vfio_iommu_driver_ops { unsigned long *phys_pfn); int (*unpin_pages)(void *iommu_data, unsigned long *user_pfn, int npage); + int (*register_notifier)(void *iommu_data, + struct notifier_block *nb); + int (*unregister_notifier)(void *iommu_data, + struct notifier_block *nb); }; extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); @@ -103,6 +107,14 @@ extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage); +#define VFIO_IOMMU_NOTIFY_DMA_UNMAP (1) + +extern int vfio_register_notifier(struct device *dev, + struct notifier_block *nb); + +extern int vfio_unregister_notifier(struct device *dev, + struct notifier_block *nb); + /* * Sub-module helpers */ -- cgit v1.2.3 From b3c0a866f1692da2d1059dadd9c429ff5b364fc9 Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Thu, 17 Nov 2016 02:16:25 +0530 Subject: vfio: Introduce common function to add capabilities Vendor driver using mediated device framework should use vfio_info_add_capability() to add capabilities. Introduced this function to reduce code duplication in vendor drivers. vfio_info_cap_shift() manipulated a data buffer to add an offset to each element in a chain. This data buffer is documented in a uapi header. Changing vfio_info_cap_shift symbol to be available to all drivers. Signed-off-by: Kirti Wankhede Signed-off-by: Neo Jia Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++- include/linux/vfio.h | 3 +++ 2 files changed, 62 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 301eed07a0ab..bb3ddb76911f 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1797,8 +1797,66 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next - offset) tmp->next += offset; } -EXPORT_SYMBOL_GPL(vfio_info_cap_shift); +EXPORT_SYMBOL(vfio_info_cap_shift); +static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type) +{ + struct vfio_info_cap_header *header; + struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type; + size_t size; + + size = sizeof(*sparse) + sparse->nr_areas * sizeof(*sparse->areas); + header = vfio_info_cap_add(caps, size, + VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1); + if (IS_ERR(header)) + return PTR_ERR(header); + + sparse_cap = container_of(header, + struct vfio_region_info_cap_sparse_mmap, header); + sparse_cap->nr_areas = sparse->nr_areas; + memcpy(sparse_cap->areas, sparse->areas, + sparse->nr_areas * sizeof(*sparse->areas)); + return 0; +} + +static int region_type_cap(struct vfio_info_cap *caps, void *cap_type) +{ + struct vfio_info_cap_header *header; + struct vfio_region_info_cap_type *type_cap, *cap = cap_type; + + header = vfio_info_cap_add(caps, sizeof(*cap), + VFIO_REGION_INFO_CAP_TYPE, 1); + if (IS_ERR(header)) + return PTR_ERR(header); + + type_cap = container_of(header, struct vfio_region_info_cap_type, + header); + type_cap->type = cap->type; + type_cap->subtype = cap->subtype; + return 0; +} + +int vfio_info_add_capability(struct vfio_info_cap *caps, int cap_type_id, + void *cap_type) +{ + int ret = -EINVAL; + + if (!cap_type) + return 0; + + switch (cap_type_id) { + case VFIO_REGION_INFO_CAP_SPARSE_MMAP: + ret = sparse_mmap_cap(caps, cap_type); + break; + + case VFIO_REGION_INFO_CAP_TYPE: + ret = region_type_cap(caps, cap_type); + break; + } + + return ret; +} +EXPORT_SYMBOL(vfio_info_add_capability); /* * Pin a set of guest PFNs and return their associated host PFNs for local diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 6ab13f7e2920..e26f7ccab564 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -126,6 +126,9 @@ extern struct vfio_info_cap_header *vfio_info_cap_add( struct vfio_info_cap *caps, size_t size, u16 id, u16 version); extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset); +extern int vfio_info_add_capability(struct vfio_info_cap *caps, + int cap_type_id, void *cap_type); + struct pci_dev; #ifdef CONFIG_EEH extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev); -- cgit v1.2.3 From c747f08aea847c8c0704acf9375ca83c4800f6c1 Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Thu, 17 Nov 2016 02:16:27 +0530 Subject: vfio: Introduce vfio_set_irqs_validate_and_prepare() Vendor driver using mediated device framework would use same mechnism to validate and prepare IRQs. Introducing this function to reduce code replication in multiple drivers. Signed-off-by: Kirti Wankhede Signed-off-by: Neo Jia Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/vfio.h | 4 ++++ 2 files changed, 52 insertions(+) (limited to 'include') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index bb3ddb76911f..c8150674787c 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1858,6 +1858,54 @@ int vfio_info_add_capability(struct vfio_info_cap *caps, int cap_type_id, } EXPORT_SYMBOL(vfio_info_add_capability); +int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, + int max_irq_type, size_t *data_size) +{ + unsigned long minsz; + size_t size; + + minsz = offsetofend(struct vfio_irq_set, count); + + if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) || + (hdr->count >= (U32_MAX - hdr->start)) || + (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | + VFIO_IRQ_SET_ACTION_TYPE_MASK))) + return -EINVAL; + + if (data_size) + *data_size = 0; + + if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs) + return -EINVAL; + + switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { + case VFIO_IRQ_SET_DATA_NONE: + size = 0; + break; + case VFIO_IRQ_SET_DATA_BOOL: + size = sizeof(uint8_t); + break; + case VFIO_IRQ_SET_DATA_EVENTFD: + size = sizeof(int32_t); + break; + default: + return -EINVAL; + } + + if (size) { + if (hdr->argsz - minsz < hdr->count * size) + return -EINVAL; + + if (!data_size) + return -EINVAL; + + *data_size = hdr->count * size; + } + + return 0; +} +EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); + /* * Pin a set of guest PFNs and return their associated host PFNs for local * domain only. diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e26f7ccab564..15ff0421b423 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -129,6 +129,10 @@ extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset); extern int vfio_info_add_capability(struct vfio_info_cap *caps, int cap_type_id, void *cap_type); +extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, + int num_irqs, int max_irq_type, + size_t *data_size); + struct pci_dev; #ifdef CONFIG_EEH extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev); -- cgit v1.2.3 From 2818c6e91980d966d015a9f763ab24b41e6a7c3d Mon Sep 17 00:00:00 2001 From: Kirti Wankhede Date: Thu, 17 Nov 2016 02:16:30 +0530 Subject: vfio: Define device_api strings Defined device API strings. Vendor driver using mediated device framework should use corresponding string for device_api attribute. Signed-off-by: Kirti Wankhede Signed-off-by: Neo Jia Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 255a2113f53c..519eff362c1c 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -203,6 +203,16 @@ struct vfio_device_info { }; #define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7) +/* + * Vendor driver using Mediated device framework should provide device_api + * attribute in supported type attribute groups. Device API string should be one + * of the following corresponding to device flags in vfio_device_info structure. + */ + +#define VFIO_DEVICE_API_PCI_STRING "vfio-pci" +#define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform" +#define VFIO_DEVICE_API_AMBA_STRING "vfio-amba" + /** * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, * struct vfio_region_info) -- cgit v1.2.3 From 22195cbd3451a75abaf30651a61cf85c89061327 Mon Sep 17 00:00:00 2001 From: Jike Song Date: Thu, 1 Dec 2016 13:20:05 +0800 Subject: vfio: vfio_register_notifier: classify iommu notifier Currently vfio_register_notifier assumes that there is only one notifier chain, which is in vfio_iommu. However, the user might also be interested in events other than vfio_iommu, for example, vfio_group. Refactor vfio_{un}register_notifier implementation to make it feasible. Cc: Paolo Bonzini Cc: Xiao Guangrong Reviewed-by: Kirti Wankhede Signed-off-by: Jike Song [aw: merge with commit 816ca69ea9c7 ("vfio: Fix handling of error returned by 'vfio_group_get_from_dev()'"), remove typedef] Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 84 +++++++++++++++++++++++++++++------------ drivers/vfio/vfio_iommu_type1.c | 8 ++++ include/linux/vfio.h | 13 ++++++- 3 files changed, 78 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 7b39313abf0d..6cc2a9ffac10 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -2008,59 +2008,44 @@ err_unpin_pages: } EXPORT_SYMBOL(vfio_unpin_pages); -int vfio_register_notifier(struct device *dev, struct notifier_block *nb) +static int vfio_register_iommu_notifier(struct vfio_group *group, + unsigned long *events, + struct notifier_block *nb) { struct vfio_container *container; - struct vfio_group *group; struct vfio_iommu_driver *driver; int ret; - if (!dev || !nb) - return -EINVAL; - - group = vfio_group_get_from_dev(dev); - if (!group) - return -ENODEV; - ret = vfio_group_add_container_user(group); if (ret) - goto err_register_nb; + return -EINVAL; container = group->container; down_read(&container->group_lock); driver = container->iommu_driver; if (likely(driver && driver->ops->register_notifier)) - ret = driver->ops->register_notifier(container->iommu_data, nb); + ret = driver->ops->register_notifier(container->iommu_data, + events, nb); else ret = -ENOTTY; up_read(&container->group_lock); vfio_group_try_dissolve_container(group); -err_register_nb: - vfio_group_put(group); return ret; } -EXPORT_SYMBOL(vfio_register_notifier); -int vfio_unregister_notifier(struct device *dev, struct notifier_block *nb) +static int vfio_unregister_iommu_notifier(struct vfio_group *group, + struct notifier_block *nb) { struct vfio_container *container; - struct vfio_group *group; struct vfio_iommu_driver *driver; int ret; - if (!dev || !nb) - return -EINVAL; - - group = vfio_group_get_from_dev(dev); - if (!group) - return -ENODEV; - ret = vfio_group_add_container_user(group); if (ret) - goto err_unregister_nb; + return -EINVAL; container = group->container; down_read(&container->group_lock); @@ -2075,7 +2060,56 @@ int vfio_unregister_notifier(struct device *dev, struct notifier_block *nb) up_read(&container->group_lock); vfio_group_try_dissolve_container(group); -err_unregister_nb: + return ret; +} + +int vfio_register_notifier(struct device *dev, enum vfio_notify_type type, + unsigned long *events, struct notifier_block *nb) +{ + struct vfio_group *group; + int ret; + + if (!dev || !nb || !events || (*events == 0)) + return -EINVAL; + + group = vfio_group_get_from_dev(dev); + if (!group) + return -ENODEV; + + switch (type) { + case VFIO_IOMMU_NOTIFY: + ret = vfio_register_iommu_notifier(group, events, nb); + break; + default: + ret = -EINVAL; + } + + vfio_group_put(group); + return ret; +} +EXPORT_SYMBOL(vfio_register_notifier); + +int vfio_unregister_notifier(struct device *dev, enum vfio_notify_type type, + struct notifier_block *nb) +{ + struct vfio_group *group; + int ret; + + if (!dev || !nb) + return -EINVAL; + + group = vfio_group_get_from_dev(dev); + if (!group) + return -ENODEV; + + switch (type) { + case VFIO_IOMMU_NOTIFY: + ret = vfio_unregister_iommu_notifier(group, nb); + break; + default: + ret = -EINVAL; + } + vfio_group_put(group); return ret; } diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 51810a95416e..b88ad1e5e7ff 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1584,10 +1584,18 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, } static int vfio_iommu_type1_register_notifier(void *iommu_data, + unsigned long *events, struct notifier_block *nb) { struct vfio_iommu *iommu = iommu_data; + /* clear known events */ + *events &= ~VFIO_IOMMU_NOTIFY_DMA_UNMAP; + + /* refuse to register if still events remaining */ + if (*events) + return -EINVAL; + return blocking_notifier_chain_register(&iommu->notifier, nb); } diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 15ff0421b423..0e5201f207d3 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -81,6 +81,7 @@ struct vfio_iommu_driver_ops { int (*unpin_pages)(void *iommu_data, unsigned long *user_pfn, int npage); int (*register_notifier)(void *iommu_data, + unsigned long *events, struct notifier_block *nb); int (*unregister_notifier)(void *iommu_data, struct notifier_block *nb); @@ -107,12 +108,20 @@ extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage); -#define VFIO_IOMMU_NOTIFY_DMA_UNMAP (1) +/* each type has independent events */ +enum vfio_notify_type { + VFIO_IOMMU_NOTIFY = 0, +}; + +/* events for VFIO_IOMMU_NOTIFY */ +#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0) extern int vfio_register_notifier(struct device *dev, + enum vfio_notify_type type, + unsigned long *required_events, struct notifier_block *nb); - extern int vfio_unregister_notifier(struct device *dev, + enum vfio_notify_type type, struct notifier_block *nb); /* -- cgit v1.2.3 From ccd46dbae77dbf0d33f42e04b59536f108c395e8 Mon Sep 17 00:00:00 2001 From: Jike Song Date: Thu, 1 Dec 2016 13:20:06 +0800 Subject: vfio: support notifier chain in vfio_group Beyond vfio_iommu events, users might also be interested in vfio_group events. For example, if a vfio_group is used along with Qemu/KVM, whenever kvm pointer is set to/cleared from the vfio_group, users could be notified. Currently only VFIO_GROUP_NOTIFY_SET_KVM supported. Cc: Kirti Wankhede Cc: Paolo Bonzini Cc: Xiao Guangrong Signed-off-by: Jike Song [aw: remove use of new typedef] Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/vfio.h | 7 +++++ 2 files changed, 89 insertions(+) (limited to 'include') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 6cc2a9ffac10..9901c4671e2f 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -86,6 +86,8 @@ struct vfio_group { struct mutex unbound_lock; atomic_t opened; bool noiommu; + struct kvm *kvm; + struct blocking_notifier_head notifier; }; struct vfio_device { @@ -339,6 +341,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) #ifdef CONFIG_VFIO_NOIOMMU group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu); #endif + BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); group->nb.notifier_call = vfio_iommu_group_notifier; @@ -1581,6 +1584,9 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep) filep->private_data = NULL; + /* Any user didn't unregister? */ + WARN_ON(group->notifier.head); + vfio_group_try_dissolve_container(group); atomic_dec(&group->opened); @@ -2063,6 +2069,76 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group, return ret; } +void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm) +{ + group->kvm = kvm; + blocking_notifier_call_chain(&group->notifier, + VFIO_GROUP_NOTIFY_SET_KVM, kvm); +} +EXPORT_SYMBOL_GPL(vfio_group_set_kvm); + +static int vfio_register_group_notifier(struct vfio_group *group, + unsigned long *events, + struct notifier_block *nb) +{ + struct vfio_container *container; + int ret; + bool set_kvm = false; + + if (*events & VFIO_GROUP_NOTIFY_SET_KVM) + set_kvm = true; + + /* clear known events */ + *events &= ~VFIO_GROUP_NOTIFY_SET_KVM; + + /* refuse to continue if still events remaining */ + if (*events) + return -EINVAL; + + ret = vfio_group_add_container_user(group); + if (ret) + return -EINVAL; + + container = group->container; + down_read(&container->group_lock); + + ret = blocking_notifier_chain_register(&group->notifier, nb); + + /* + * The attaching of kvm and vfio_group might already happen, so + * here we replay once upon registration. + */ + if (!ret && set_kvm && group->kvm) + blocking_notifier_call_chain(&group->notifier, + VFIO_GROUP_NOTIFY_SET_KVM, group->kvm); + + up_read(&container->group_lock); + vfio_group_try_dissolve_container(group); + + return ret; +} + +static int vfio_unregister_group_notifier(struct vfio_group *group, + struct notifier_block *nb) +{ + struct vfio_container *container; + int ret; + + ret = vfio_group_add_container_user(group); + if (ret) + return -EINVAL; + + container = group->container; + down_read(&container->group_lock); + + ret = blocking_notifier_chain_unregister(&group->notifier, nb); + + up_read(&container->group_lock); + vfio_group_try_dissolve_container(group); + + return ret; +} + int vfio_register_notifier(struct device *dev, enum vfio_notify_type type, unsigned long *events, struct notifier_block *nb) { @@ -2080,6 +2156,9 @@ int vfio_register_notifier(struct device *dev, enum vfio_notify_type type, case VFIO_IOMMU_NOTIFY: ret = vfio_register_iommu_notifier(group, events, nb); break; + case VFIO_GROUP_NOTIFY: + ret = vfio_register_group_notifier(group, events, nb); + break; default: ret = -EINVAL; } @@ -2106,6 +2185,9 @@ int vfio_unregister_notifier(struct device *dev, enum vfio_notify_type type, case VFIO_IOMMU_NOTIFY: ret = vfio_unregister_iommu_notifier(group, nb); break; + case VFIO_GROUP_NOTIFY: + ret = vfio_unregister_group_notifier(group, nb); + break; default: ret = -EINVAL; } diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 0e5201f207d3..edf9b2cad277 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -111,11 +111,15 @@ extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, /* each type has independent events */ enum vfio_notify_type { VFIO_IOMMU_NOTIFY = 0, + VFIO_GROUP_NOTIFY = 1, }; /* events for VFIO_IOMMU_NOTIFY */ #define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0) +/* events for VFIO_GROUP_NOTIFY */ +#define VFIO_GROUP_NOTIFY_SET_KVM BIT(0) + extern int vfio_register_notifier(struct device *dev, enum vfio_notify_type type, unsigned long *required_events, @@ -124,6 +128,9 @@ extern int vfio_unregister_notifier(struct device *dev, enum vfio_notify_type type, struct notifier_block *nb); +struct kvm; +extern void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm); + /* * Sub-module helpers */ -- cgit v1.2.3