diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-03 13:29:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-03 13:29:36 -0700 |
commit | d4ec3d5535c784c3adbc41c2bbc5d17a00a4a898 (patch) | |
tree | 4b752170355f1d02992d2b266cc22a1ffd98ab34 /drivers | |
parent | a602285ac11b019e9ce7c3907328e9f95f4967f0 (diff) | |
parent | 3bf1311f351ef289f2aee79b86bcece2039fa611 (diff) | |
download | linux-d4ec3d5535c784c3adbc41c2bbc5d17a00a4a898.tar.bz2 |
Merge tag 'vfio-v5.16-rc1' of git://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson:
- Cleanup vfio iommu_group creation (Christoph Hellwig)
- Add individual device reset for vfio/fsl-mc (Diana Craciun)
- IGD OpRegion 2.0+ support (Colin Xu)
- Use modern cdev lifecycle for vfio_group (Jason Gunthorpe)
- Use new mdev API in vfio_ccw (Jason Gunthorpe)
* tag 'vfio-v5.16-rc1' of git://github.com/awilliam/linux-vfio: (27 commits)
vfio/ccw: Convert to use vfio_register_emulated_iommu_dev()
vfio/ccw: Pass vfio_ccw_private not mdev_device to various functions
vfio/ccw: Use functions for alloc/free of the vfio_ccw_private
vfio/ccw: Remove unneeded GFP_DMA
vfio: Use cdev_device_add() instead of device_create()
vfio: Use a refcount_t instead of a kref in the vfio_group
vfio: Don't leak a group reference if the group already exists
vfio: Do not open code the group list search in vfio_create_group()
vfio: Delete vfio_get/put_group from vfio_iommu_group_notifier()
vfio/pci: Add OpRegion 2.0+ Extended VBT support.
vfio/iommu_type1: remove IS_IOMMU_CAP_DOMAIN_IN_CONTAINER
vfio/iommu_type1: remove the "external" domain
vfio/iommu_type1: initialize pgsize_bitmap in ->open
vfio/spapr_tce: reject mediated devices
vfio: clean up the check for mediated device in vfio_iommu_type1
vfio: remove the unused mdev iommu hook
vfio: move the vfio_iommu_driver_ops interface out of <linux/vfio.h>
vfio: remove unused method from vfio_iommu_driver_ops
vfio: simplify iommu group allocation for mediated devices
vfio: remove the iommudata hack for noiommu groups
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/bus/fsl-mc/Makefile | 3 | ||||
-rw-r--r-- | drivers/bus/fsl-mc/fsl-mc-private.h | 39 | ||||
-rw-r--r-- | drivers/bus/fsl-mc/obj-api.c | 103 | ||||
-rw-r--r-- | drivers/s390/cio/vfio_ccw_drv.c | 136 | ||||
-rw-r--r-- | drivers/s390/cio/vfio_ccw_ops.c | 142 | ||||
-rw-r--r-- | drivers/s390/cio/vfio_ccw_private.h | 5 | ||||
-rw-r--r-- | drivers/s390/crypto/vfio_ap_ops.c | 2 | ||||
-rw-r--r-- | drivers/vfio/fsl-mc/vfio_fsl_mc.c | 62 | ||||
-rw-r--r-- | drivers/vfio/mdev/mdev_driver.c | 45 | ||||
-rw-r--r-- | drivers/vfio/mdev/vfio_mdev.c | 2 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_core.c | 13 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_igd.c | 234 | ||||
-rw-r--r-- | drivers/vfio/platform/vfio_platform_common.c | 13 | ||||
-rw-r--r-- | drivers/vfio/vfio.c | 622 | ||||
-rw-r--r-- | drivers/vfio/vfio.h | 72 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_spapr_tce.c | 6 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_type1.c | 256 |
17 files changed, 927 insertions, 828 deletions
diff --git a/drivers/bus/fsl-mc/Makefile b/drivers/bus/fsl-mc/Makefile index 4ae292a30e53..892946245527 100644 --- a/drivers/bus/fsl-mc/Makefile +++ b/drivers/bus/fsl-mc/Makefile @@ -15,7 +15,8 @@ mc-bus-driver-objs := fsl-mc-bus.o \ dprc-driver.o \ fsl-mc-allocator.o \ fsl-mc-msi.o \ - dpmcp.o + dpmcp.o \ + obj-api.o # MC userspace support obj-$(CONFIG_FSL_MC_UAPI_SUPPORT) += fsl-mc-uapi.o diff --git a/drivers/bus/fsl-mc/fsl-mc-private.h b/drivers/bus/fsl-mc/fsl-mc-private.h index 1958fa065360..b3520ea1b9f4 100644 --- a/drivers/bus/fsl-mc/fsl-mc-private.h +++ b/drivers/bus/fsl-mc/fsl-mc-private.h @@ -48,7 +48,6 @@ struct dpmng_rsp_get_version { /* DPMCP command IDs */ #define DPMCP_CMDID_CLOSE DPMCP_CMD(0x800) -#define DPMCP_CMDID_OPEN DPMCP_CMD(0x80b) #define DPMCP_CMDID_RESET DPMCP_CMD(0x005) struct dpmcp_cmd_open { @@ -91,7 +90,6 @@ int dpmcp_reset(struct fsl_mc_io *mc_io, /* DPRC command IDs */ #define DPRC_CMDID_CLOSE DPRC_CMD(0x800) -#define DPRC_CMDID_OPEN DPRC_CMD(0x805) #define DPRC_CMDID_GET_API_VERSION DPRC_CMD(0xa05) #define DPRC_CMDID_GET_ATTR DPRC_CMD(0x004) @@ -453,7 +451,6 @@ int dprc_get_connection(struct fsl_mc_io *mc_io, /* Command IDs */ #define DPBP_CMDID_CLOSE DPBP_CMD(0x800) -#define DPBP_CMDID_OPEN DPBP_CMD(0x804) #define DPBP_CMDID_ENABLE DPBP_CMD(0x002) #define DPBP_CMDID_DISABLE DPBP_CMD(0x003) @@ -492,7 +489,6 @@ struct dpbp_rsp_get_attributes { /* Command IDs */ #define DPCON_CMDID_CLOSE DPCON_CMD(0x800) -#define DPCON_CMDID_OPEN DPCON_CMD(0x808) #define DPCON_CMDID_ENABLE DPCON_CMD(0x002) #define DPCON_CMDID_DISABLE DPCON_CMD(0x003) @@ -524,6 +520,41 @@ struct dpcon_cmd_set_notification { __le64 user_ctx; }; +/* + * Generic FSL MC API + */ + +/* generic command versioning */ +#define OBJ_CMD_BASE_VERSION 1 +#define OBJ_CMD_ID_OFFSET 4 + +#define OBJ_CMD(id) (((id) << OBJ_CMD_ID_OFFSET) | OBJ_CMD_BASE_VERSION) + +/* open command codes */ +#define DPRTC_CMDID_OPEN OBJ_CMD(0x810) +#define DPNI_CMDID_OPEN OBJ_CMD(0x801) +#define DPSW_CMDID_OPEN OBJ_CMD(0x802) +#define DPIO_CMDID_OPEN OBJ_CMD(0x803) +#define DPBP_CMDID_OPEN OBJ_CMD(0x804) +#define DPRC_CMDID_OPEN OBJ_CMD(0x805) +#define DPDMUX_CMDID_OPEN OBJ_CMD(0x806) +#define DPCI_CMDID_OPEN OBJ_CMD(0x807) +#define DPCON_CMDID_OPEN OBJ_CMD(0x808) +#define DPSECI_CMDID_OPEN OBJ_CMD(0x809) +#define DPAIOP_CMDID_OPEN OBJ_CMD(0x80a) +#define DPMCP_CMDID_OPEN OBJ_CMD(0x80b) +#define DPMAC_CMDID_OPEN OBJ_CMD(0x80c) +#define DPDCEI_CMDID_OPEN OBJ_CMD(0x80d) +#define DPDMAI_CMDID_OPEN OBJ_CMD(0x80e) +#define DPDBG_CMDID_OPEN OBJ_CMD(0x80f) + +/* Generic object command IDs */ +#define OBJ_CMDID_CLOSE OBJ_CMD(0x800) +#define OBJ_CMDID_RESET OBJ_CMD(0x005) + +struct fsl_mc_obj_cmd_open { + __le32 obj_id; +}; /** * struct fsl_mc_resource_pool - Pool of MC resources of a given diff --git a/drivers/bus/fsl-mc/obj-api.c b/drivers/bus/fsl-mc/obj-api.c new file mode 100644 index 000000000000..06c1dd84e38d --- /dev/null +++ b/drivers/bus/fsl-mc/obj-api.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* + * Copyright 2021 NXP + * + */ +#include <linux/kernel.h> +#include <linux/fsl/mc.h> + +#include "fsl-mc-private.h" + +static int fsl_mc_get_open_cmd_id(const char *type) +{ + static const struct { + int cmd_id; + const char *type; + } dev_ids[] = { + { DPRTC_CMDID_OPEN, "dprtc" }, + { DPRC_CMDID_OPEN, "dprc" }, + { DPNI_CMDID_OPEN, "dpni" }, + { DPIO_CMDID_OPEN, "dpio" }, + { DPSW_CMDID_OPEN, "dpsw" }, + { DPBP_CMDID_OPEN, "dpbp" }, + { DPCON_CMDID_OPEN, "dpcon" }, + { DPMCP_CMDID_OPEN, "dpmcp" }, + { DPMAC_CMDID_OPEN, "dpmac" }, + { DPSECI_CMDID_OPEN, "dpseci" }, + { DPDMUX_CMDID_OPEN, "dpdmux" }, + { DPDCEI_CMDID_OPEN, "dpdcei" }, + { DPAIOP_CMDID_OPEN, "dpaiop" }, + { DPCI_CMDID_OPEN, "dpci" }, + { DPDMAI_CMDID_OPEN, "dpdmai" }, + { DPDBG_CMDID_OPEN, "dpdbg" }, + { 0, NULL } + }; + int i; + + for (i = 0; dev_ids[i].type; i++) + if (!strcmp(dev_ids[i].type, type)) + return dev_ids[i].cmd_id; + + return -1; +} + +int fsl_mc_obj_open(struct fsl_mc_io *mc_io, + u32 cmd_flags, + int obj_id, + char *obj_type, + u16 *token) +{ + struct fsl_mc_command cmd = { 0 }; + struct fsl_mc_obj_cmd_open *cmd_params; + int err = 0; + int cmd_id = fsl_mc_get_open_cmd_id(obj_type); + + if (cmd_id == -1) + return -ENODEV; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(cmd_id, cmd_flags, 0); + cmd_params = (struct fsl_mc_obj_cmd_open *)cmd.params; + cmd_params->obj_id = cpu_to_le32(obj_id); + + /* send command to mc*/ + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + /* retrieve response parameters */ + *token = mc_cmd_hdr_read_token(&cmd); + + return err; +} +EXPORT_SYMBOL_GPL(fsl_mc_obj_open); + +int fsl_mc_obj_close(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token) +{ + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(OBJ_CMDID_CLOSE, cmd_flags, + token); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} +EXPORT_SYMBOL_GPL(fsl_mc_obj_close); + +int fsl_mc_obj_reset(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token) +{ + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(OBJ_CMDID_RESET, cmd_flags, + token); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} +EXPORT_SYMBOL_GPL(fsl_mc_obj_reset); diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 76099bcb765b..040742777095 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -137,77 +137,107 @@ static void vfio_ccw_sch_irq(struct subchannel *sch) vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT); } -static void vfio_ccw_free_regions(struct vfio_ccw_private *private) +static struct vfio_ccw_private *vfio_ccw_alloc_private(struct subchannel *sch) { - if (private->crw_region) - kmem_cache_free(vfio_ccw_crw_region, private->crw_region); - if (private->schib_region) - kmem_cache_free(vfio_ccw_schib_region, private->schib_region); - if (private->cmd_region) - kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region); - if (private->io_region) - kmem_cache_free(vfio_ccw_io_region, private->io_region); -} - -static int vfio_ccw_sch_probe(struct subchannel *sch) -{ - struct pmcw *pmcw = &sch->schib.pmcw; struct vfio_ccw_private *private; - int ret = -ENOMEM; - if (pmcw->qf) { - dev_warn(&sch->dev, "vfio: ccw: does not support QDIO: %s\n", - dev_name(&sch->dev)); - return -ENODEV; - } - - private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA); + private = kzalloc(sizeof(*private), GFP_KERNEL); if (!private) - return -ENOMEM; + return ERR_PTR(-ENOMEM); + + private->sch = sch; + mutex_init(&private->io_mutex); + private->state = VFIO_CCW_STATE_NOT_OPER; + INIT_LIST_HEAD(&private->crw); + INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo); + INIT_WORK(&private->crw_work, vfio_ccw_crw_todo); + atomic_set(&private->avail, 1); private->cp.guest_cp = kcalloc(CCWCHAIN_LEN_MAX, sizeof(struct ccw1), GFP_KERNEL); if (!private->cp.guest_cp) - goto out_free; + goto out_free_private; private->io_region = kmem_cache_zalloc(vfio_ccw_io_region, GFP_KERNEL | GFP_DMA); if (!private->io_region) - goto out_free; + goto out_free_cp; private->cmd_region = kmem_cache_zalloc(vfio_ccw_cmd_region, GFP_KERNEL | GFP_DMA); if (!private->cmd_region) - goto out_free; + goto out_free_io; private->schib_region = kmem_cache_zalloc(vfio_ccw_schib_region, GFP_KERNEL | GFP_DMA); if (!private->schib_region) - goto out_free; + goto out_free_cmd; private->crw_region = kmem_cache_zalloc(vfio_ccw_crw_region, GFP_KERNEL | GFP_DMA); if (!private->crw_region) - goto out_free; + goto out_free_schib; + return private; + +out_free_schib: + kmem_cache_free(vfio_ccw_schib_region, private->schib_region); +out_free_cmd: + kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region); +out_free_io: + kmem_cache_free(vfio_ccw_io_region, private->io_region); +out_free_cp: + kfree(private->cp.guest_cp); +out_free_private: + mutex_destroy(&private->io_mutex); + kfree(private); + return ERR_PTR(-ENOMEM); +} + +static void vfio_ccw_free_private(struct vfio_ccw_private *private) +{ + struct vfio_ccw_crw *crw, *temp; + + list_for_each_entry_safe(crw, temp, &private->crw, next) { + list_del(&crw->next); + kfree(crw); + } + + kmem_cache_free(vfio_ccw_crw_region, private->crw_region); + kmem_cache_free(vfio_ccw_schib_region, private->schib_region); + kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region); + kmem_cache_free(vfio_ccw_io_region, private->io_region); + kfree(private->cp.guest_cp); + mutex_destroy(&private->io_mutex); + kfree(private); +} + +static int vfio_ccw_sch_probe(struct subchannel *sch) +{ + struct pmcw *pmcw = &sch->schib.pmcw; + struct vfio_ccw_private *private; + int ret = -ENOMEM; + + if (pmcw->qf) { + dev_warn(&sch->dev, "vfio: ccw: does not support QDIO: %s\n", + dev_name(&sch->dev)); + return -ENODEV; + } + + private = vfio_ccw_alloc_private(sch); + if (IS_ERR(private)) + return PTR_ERR(private); - private->sch = sch; dev_set_drvdata(&sch->dev, private); - mutex_init(&private->io_mutex); spin_lock_irq(sch->lock); - private->state = VFIO_CCW_STATE_NOT_OPER; sch->isc = VFIO_CCW_ISC; ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch); spin_unlock_irq(sch->lock); if (ret) goto out_free; - INIT_LIST_HEAD(&private->crw); - INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo); - INIT_WORK(&private->crw_work, vfio_ccw_crw_todo); - atomic_set(&private->avail, 1); private->state = VFIO_CCW_STATE_STANDBY; ret = vfio_ccw_mdev_reg(sch); @@ -228,31 +258,20 @@ out_disable: cio_disable_subchannel(sch); out_free: dev_set_drvdata(&sch->dev, NULL); - vfio_ccw_free_regions(private); - kfree(private->cp.guest_cp); - kfree(private); + vfio_ccw_free_private(private); return ret; } static void vfio_ccw_sch_remove(struct subchannel *sch) { struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); - struct vfio_ccw_crw *crw, *temp; vfio_ccw_sch_quiesce(sch); - - list_for_each_entry_safe(crw, temp, &private->crw, next) { - list_del(&crw->next); - kfree(crw); - } - vfio_ccw_mdev_unreg(sch); dev_set_drvdata(&sch->dev, NULL); - vfio_ccw_free_regions(private); - kfree(private->cp.guest_cp); - kfree(private); + vfio_ccw_free_private(private); VFIO_CCW_MSG_EVENT(4, "unbound from subchannel %x.%x.%04x\n", sch->schid.cssid, sch->schid.ssid, @@ -449,7 +468,7 @@ static int __init vfio_ccw_sch_init(void) vfio_ccw_work_q = create_singlethread_workqueue("vfio-ccw"); if (!vfio_ccw_work_q) { ret = -ENOMEM; - goto out_err; + goto out_regions; } vfio_ccw_io_region = kmem_cache_create_usercopy("vfio_ccw_io_region", @@ -458,7 +477,7 @@ static int __init vfio_ccw_sch_init(void) sizeof(struct ccw_io_region), NULL); if (!vfio_ccw_io_region) { ret = -ENOMEM; - goto out_err; + goto out_regions; } vfio_ccw_cmd_region = kmem_cache_create_usercopy("vfio_ccw_cmd_region", @@ -467,7 +486,7 @@ static int __init vfio_ccw_sch_init(void) sizeof(struct ccw_cmd_region), NULL); if (!vfio_ccw_cmd_region) { ret = -ENOMEM; - goto out_err; + goto out_regions; } vfio_ccw_schib_region = kmem_cache_create_usercopy("vfio_ccw_schib_region", @@ -477,7 +496,7 @@ static int __init vfio_ccw_sch_init(void) if (!vfio_ccw_schib_region) { ret = -ENOMEM; - goto out_err; + goto out_regions; } vfio_ccw_crw_region = kmem_cache_create_usercopy("vfio_ccw_crw_region", @@ -487,19 +506,25 @@ static int __init vfio_ccw_sch_init(void) if (!vfio_ccw_crw_region) { ret = -ENOMEM; - goto out_err; + goto out_regions; } + ret = mdev_register_driver(&vfio_ccw_mdev_driver); + if (ret) + goto out_regions; + isc_register(VFIO_CCW_ISC); ret = css_driver_register(&vfio_ccw_sch_driver); if (ret) { isc_unregister(VFIO_CCW_ISC); - goto out_err; + goto out_driver; } return ret; -out_err: +out_driver: + mdev_unregister_driver(&vfio_ccw_mdev_driver); +out_regions: vfio_ccw_destroy_regions(); destroy_workqueue(vfio_ccw_work_q); vfio_ccw_debug_exit(); @@ -509,6 +534,7 @@ out_err: static void __exit vfio_ccw_sch_exit(void) { css_driver_unregister(&vfio_ccw_sch_driver); + mdev_unregister_driver(&vfio_ccw_mdev_driver); isc_unregister(VFIO_CCW_ISC); vfio_ccw_destroy_regions(); destroy_workqueue(vfio_ccw_work_q); diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 7f540ad0b568..d8589afac272 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -17,13 +17,13 @@ #include "vfio_ccw_private.h" -static int vfio_ccw_mdev_reset(struct mdev_device *mdev) +static const struct vfio_device_ops vfio_ccw_dev_ops; + +static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private) { - struct vfio_ccw_private *private; struct subchannel *sch; int ret; - private = dev_get_drvdata(mdev_parent_dev(mdev)); sch = private->sch; /* * TODO: @@ -61,7 +61,7 @@ static int vfio_ccw_mdev_notifier(struct notifier_block *nb, if (!cp_iova_pinned(&private->cp, unmap->iova)) return NOTIFY_OK; - if (vfio_ccw_mdev_reset(private->mdev)) + if (vfio_ccw_mdev_reset(private)) return NOTIFY_BAD; cp_free(&private->cp); @@ -113,10 +113,10 @@ static struct attribute_group *mdev_type_groups[] = { NULL, }; -static int vfio_ccw_mdev_create(struct mdev_device *mdev) +static int vfio_ccw_mdev_probe(struct mdev_device *mdev) { - struct vfio_ccw_private *private = - dev_get_drvdata(mdev_parent_dev(mdev)); + struct vfio_ccw_private *private = dev_get_drvdata(mdev->dev.parent); + int ret; if (private->state == VFIO_CCW_STATE_NOT_OPER) return -ENODEV; @@ -124,6 +124,10 @@ static int vfio_ccw_mdev_create(struct mdev_device *mdev) if (atomic_dec_if_positive(&private->avail) < 0) return -EPERM; + memset(&private->vdev, 0, sizeof(private->vdev)); + vfio_init_group_dev(&private->vdev, &mdev->dev, + &vfio_ccw_dev_ops); + private->mdev = mdev; private->state = VFIO_CCW_STATE_IDLE; @@ -132,19 +136,31 @@ static int vfio_ccw_mdev_create(struct mdev_device *mdev) private->sch->schid.ssid, private->sch->schid.sch_no); + ret = vfio_register_emulated_iommu_dev(&private->vdev); + if (ret) + goto err_atomic; + dev_set_drvdata(&mdev->dev, private); return 0; + +err_atomic: + vfio_uninit_group_dev(&private->vdev); + atomic_inc(&private->avail); + private->mdev = NULL; + private->state = VFIO_CCW_STATE_IDLE; + return ret; } -static int vfio_ccw_mdev_remove(struct mdev_device *mdev) +static void vfio_ccw_mdev_remove(struct mdev_device *mdev) { - struct vfio_ccw_private *private = - dev_get_drvdata(mdev_parent_dev(mdev)); + struct vfio_ccw_private *private = dev_get_drvdata(mdev->dev.parent); VFIO_CCW_MSG_EVENT(2, "mdev %pUl, sch %x.%x.%04x: remove\n", mdev_uuid(mdev), private->sch->schid.cssid, private->sch->schid.ssid, private->sch->schid.sch_no); + vfio_unregister_group_dev(&private->vdev); + if ((private->state != VFIO_CCW_STATE_NOT_OPER) && (private->state != VFIO_CCW_STATE_STANDBY)) { if (!vfio_ccw_sch_quiesce(private->sch)) @@ -152,23 +168,22 @@ static int vfio_ccw_mdev_remove(struct mdev_device *mdev) /* The state will be NOT_OPER on error. */ } + vfio_uninit_group_dev(&private->vdev); cp_free(&private->cp); private->mdev = NULL; atomic_inc(&private->avail); - - return 0; } -static int vfio_ccw_mdev_open_device(struct mdev_device *mdev) +static int vfio_ccw_mdev_open_device(struct vfio_device *vdev) { struct vfio_ccw_private *private = - dev_get_drvdata(mdev_parent_dev(mdev)); + container_of(vdev, struct vfio_ccw_private, vdev); unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; int ret; private->nb.notifier_call = vfio_ccw_mdev_notifier; - ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, + ret = vfio_register_notifier(vdev->dev, VFIO_IOMMU_NOTIFY, &events, &private->nb); if (ret) return ret; @@ -189,27 +204,26 @@ static int vfio_ccw_mdev_open_device(struct mdev_device *mdev) out_unregister: vfio_ccw_unregister_dev_regions(private); - vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, + vfio_unregister_notifier(vdev->dev, VFIO_IOMMU_NOTIFY, &private->nb); return ret; } -static void vfio_ccw_mdev_close_device(struct mdev_device *mdev) +static void vfio_ccw_mdev_close_device(struct vfio_device *vdev) { struct vfio_ccw_private *private = - dev_get_drvdata(mdev_parent_dev(mdev)); + container_of(vdev, struct vfio_ccw_private, vdev); if ((private->state != VFIO_CCW_STATE_NOT_OPER) && (private->state != VFIO_CCW_STATE_STANDBY)) { - if (!vfio_ccw_mdev_reset(mdev)) + if (!vfio_ccw_mdev_reset(private)) private->state = VFIO_CCW_STATE_STANDBY; /* The state will be NOT_OPER on error. */ } cp_free(&private->cp); vfio_ccw_unregister_dev_regions(private); - vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, - &private->nb); + vfio_unregister_notifier(vdev->dev, VFIO_IOMMU_NOTIFY, &private->nb); } static ssize_t vfio_ccw_mdev_read_io_region(struct vfio_ccw_private *private, @@ -233,15 +247,14 @@ static ssize_t vfio_ccw_mdev_read_io_region(struct vfio_ccw_private *private, return ret; } -static ssize_t vfio_ccw_mdev_read(struct mdev_device *mdev, +static ssize_t vfio_ccw_mdev_read(struct vfio_device *vdev, char __user *buf, size_t count, loff_t *ppos) { + struct vfio_ccw_private *private = + container_of(vdev, struct vfio_ccw_private, vdev); unsigned int index = VFIO_CCW_OFFSET_TO_INDEX(*ppos); - struct vfio_ccw_private *private; - - private = dev_get_drvdata(mdev_parent_dev(mdev)); if (index >= VFIO_CCW_NUM_REGIONS + private->num_regions) return -EINVAL; @@ -286,15 +299,14 @@ out_unlock: return ret; } -static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev, +static ssize_t vfio_ccw_mdev_write(struct vfio_device *vdev, const char __user *buf, size_t count, loff_t *ppos) { + struct vfio_ccw_private *private = + container_of(vdev, struct vfio_ccw_private, vdev); unsigned int index = VFIO_CCW_OFFSET_TO_INDEX(*ppos); - struct vfio_ccw_private *private; - - private = dev_get_drvdata(mdev_parent_dev(mdev)); if (index >= VFIO_CCW_NUM_REGIONS + private->num_regions) return -EINVAL; @@ -311,12 +323,9 @@ static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev, return -EINVAL; } -static int vfio_ccw_mdev_get_device_info(struct vfio_device_info *info, - struct mdev_device *mdev) +static int vfio_ccw_mdev_get_device_info(struct vfio_ccw_private *private, + struct vfio_device_info *info) { - struct vfio_ccw_private *private; - - private = dev_get_drvdata(mdev_parent_dev(mdev)); info->flags = VFIO_DEVICE_FLAGS_CCW | VFIO_DEVICE_FLAGS_RESET; info->num_regions = VFIO_CCW_NUM_REGIONS + private->num_regions; info->num_irqs = VFIO_CCW_NUM_IRQS; @@ -324,14 +333,12 @@ static int vfio_ccw_mdev_get_device_info(struct vfio_device_info *info, return 0; } -static int vfio_ccw_mdev_get_region_info(struct vfio_region_info *info, - struct mdev_device *mdev, +static int vfio_ccw_mdev_get_region_info(struct vfio_ccw_private *private, + struct vfio_region_info *info, unsigned long arg) { - struct vfio_ccw_private *private; int i; - private = dev_get_drvdata(mdev_parent_dev(mdev)); switch (info->index) { case VFIO_CCW_CONFIG_REGION_INDEX: info->offset = 0; @@ -406,19 +413,16 @@ static int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info) return 0; } -static int vfio_ccw_mdev_set_irqs(struct mdev_device *mdev, +static int vfio_ccw_mdev_set_irqs(struct vfio_ccw_private *private, uint32_t flags, uint32_t index, void __user *data) { - struct vfio_ccw_private *private; struct eventfd_ctx **ctx; if (!(flags & VFIO_IRQ_SET_ACTION_TRIGGER)) return -EINVAL; - private = dev_get_drvdata(mdev_parent_dev(mdev)); - switch (index) { case VFIO_CCW_IO_IRQ_INDEX: ctx = &private->io_trigger; @@ -520,10 +524,12 @@ void vfio_ccw_unregister_dev_regions(struct vfio_ccw_private *private) private->region = NULL; } -static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev, +static ssize_t vfio_ccw_mdev_ioctl(struct vfio_device *vdev, unsigned int cmd, unsigned long arg) { + struct vfio_ccw_private *private = + container_of(vdev, struct vfio_ccw_private, vdev); int ret = 0; unsigned long minsz; @@ -540,7 +546,7 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev, if (info.argsz < minsz) return -EINVAL; - ret = vfio_ccw_mdev_get_device_info(&info, mdev); + ret = vfio_ccw_mdev_get_device_info(private, &info); if (ret) return ret; @@ -558,7 +564,7 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev, if (info.argsz < minsz) return -EINVAL; - ret = vfio_ccw_mdev_get_region_info(&info, mdev, arg); + ret = vfio_ccw_mdev_get_region_info(private, &info, arg); if (ret) return ret; @@ -603,47 +609,59 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev, return ret; data = (void __user *)(arg + minsz); - return vfio_ccw_mdev_set_irqs(mdev, hdr.flags, hdr.index, data); + return vfio_ccw_mdev_set_irqs(private, hdr.flags, hdr.index, + data); } case VFIO_DEVICE_RESET: - return vfio_ccw_mdev_reset(mdev); + return vfio_ccw_mdev_reset(private); default: return -ENOTTY; } } /* Request removal of the device*/ -static void vfio_ccw_mdev_request(struct mdev_device *mdev, unsigned int count) +static void vfio_ccw_mdev_request(struct vfio_device *vdev, unsigned int count) { - struct vfio_ccw_private *private = dev_get_drvdata(mdev_parent_dev(mdev)); - - if (!private) - return; + struct vfio_ccw_private *private = + container_of(vdev, struct vfio_ccw_private, vdev); + struct device *dev = vdev->dev; if (private->req_trigger) { if (!(count % 10)) - dev_notice_ratelimited(mdev_dev(private->mdev), + dev_notice_ratelimited(dev, "Relaying device request to user (#%u)\n", count); eventfd_signal(private->req_trigger, 1); } else if (count == 0) { - dev_notice(mdev_dev(private->mdev), + dev_notice(dev, "No device request channel registered, blocked until released by user\n"); } } +static const struct vfio_device_ops vfio_ccw_dev_ops = { + .open_device = vfio_ccw_mdev_open_device, + .close_device = vfio_ccw_mdev_close_device, + .read = vfio_ccw_mdev_read, + .write = vfio_ccw_mdev_write, + .ioctl = vfio_ccw_mdev_ioctl, + .request = vfio_ccw_mdev_request, +}; + +struct mdev_driver vfio_ccw_mdev_driver = { + .driver = { + .name = "vfio_ccw_mdev", + .owner = THIS_MODULE, + .mod_name = KBUILD_MODNAME, + }, + .probe = vfio_ccw_mdev_probe, + .remove = vfio_ccw_mdev_remove, +}; + static const struct mdev_parent_ops vfio_ccw_mdev_ops = { .owner = THIS_MODULE, + .device_driver = &vfio_ccw_mdev_driver, .supported_type_groups = mdev_type_groups, - .create = vfio_ccw_mdev_create, - .remove = vfio_ccw_mdev_remove, - .open_device = vfio_ccw_mdev_open_device, - .close_device = vfio_ccw_mdev_close_device, - .read = vfio_ccw_mdev_read, - .write = vfio_ccw_mdev_write, - .ioctl = vfio_ccw_mdev_ioctl, - .request = vfio_ccw_mdev_request, }; int vfio_ccw_mdev_reg(struct subchannel *sch) diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index b2c762eb42b9..7272eb788612 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -17,6 +17,7 @@ #include <linux/eventfd.h> #include <linux/workqueue.h> #include <linux/vfio_ccw.h> +#include <linux/vfio.h> #include <asm/crw.h> #include <asm/debug.h> @@ -67,6 +68,7 @@ struct vfio_ccw_crw { /** * struct vfio_ccw_private + * @vdev: Embedded VFIO device * @sch: pointer to the subchannel * @state: internal state of the device * @completion: synchronization helper of the I/O completion @@ -90,6 +92,7 @@ struct vfio_ccw_crw { * @crw_work: work for deferral process of CRW handling */ struct vfio_ccw_private { + struct vfio_device vdev; struct subchannel *sch; int state; struct completion *completion; @@ -121,6 +124,8 @@ extern void vfio_ccw_mdev_unreg(struct subchannel *sch); extern int vfio_ccw_sch_quiesce(struct subchannel *sch); +extern struct mdev_driver vfio_ccw_mdev_driver; + /* * States of the device statemachine. */ diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 623d5269a52c..2341425f6967 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -351,7 +351,7 @@ static int vfio_ap_mdev_probe(struct mdev_device *mdev) list_add(&matrix_mdev->node, &matrix_dev->mdev_list); mutex_unlock(&matrix_dev->lock); - ret = vfio_register_group_dev(&matrix_mdev->vdev); + ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev); if (ret) goto err_list; dev_set_drvdata(&mdev->dev, matrix_mdev); diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index 0ead91bfa838..6e2e62c6f47a 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -65,6 +65,34 @@ static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev) kfree(vdev->regions); } +static int vfio_fsl_mc_reset_device(struct vfio_fsl_mc_device *vdev) +{ + struct fsl_mc_device *mc_dev = vdev->mc_dev; + int ret = 0; + + if (is_fsl_mc_bus_dprc(vdev->mc_dev)) { + return dprc_reset_container(mc_dev->mc_io, 0, + mc_dev->mc_handle, + mc_dev->obj_desc.id, + DPRC_RESET_OPTION_NON_RECURSIVE); + } else { + u16 token; + + ret = fsl_mc_obj_open(mc_dev->mc_io, 0, mc_dev->obj_desc.id, + mc_dev->obj_desc.type, + &token); + if (ret) + goto out; + ret = fsl_mc_obj_reset(mc_dev->mc_io, 0, token); + if (ret) { + fsl_mc_obj_close(mc_dev->mc_io, 0, token); + goto out; + } + ret = fsl_mc_obj_close(mc_dev->mc_io, 0, token); + } +out: + return ret; +} static void vfio_fsl_mc_close_device(struct vfio_device *core_vdev) { @@ -78,9 +106,7 @@ static void vfio_fsl_mc_close_device(struct vfio_device *core_vdev) vfio_fsl_mc_regions_cleanup(vdev); /* reset the device before cleaning up the interrupts */ - ret = dprc_reset_container(mc_cont->mc_io, 0, mc_cont->mc_handle, - mc_cont->obj_desc.id, - DPRC_RESET_OPTION_NON_RECURSIVE); + ret = vfio_fsl_mc_reset_device(vdev); if (WARN_ON(ret)) dev_warn(&mc_cont->dev, @@ -203,18 +229,7 @@ static long vfio_fsl_mc_ioctl(struct vfio_device *core_vdev, } case VFIO_DEVICE_RESET: { - int ret; - struct fsl_mc_device *mc_dev = vdev->mc_dev; - - /* reset is supported only for the DPRC */ - if (!is_fsl_mc_bus_dprc(mc_dev)) - return -ENOTTY; - - ret = dprc_reset_container(mc_dev->mc_io, 0, - mc_dev->mc_handle, - mc_dev->obj_desc.id, - DPRC_RESET_OPTION_NON_RECURSIVE); - return ret; + return vfio_fsl_mc_reset_device(vdev); } default: @@ -505,22 +520,13 @@ static void vfio_fsl_uninit_device(struct vfio_fsl_mc_device *vdev) static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev) { - struct iommu_group *group; struct vfio_fsl_mc_device *vdev; struct device *dev = &mc_dev->dev; int ret; - group = vfio_iommu_group_get(dev); - if (!group) { - dev_err(dev, "VFIO_FSL_MC: No IOMMU group\n"); - return -EINVAL; - } - vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); - if (!vdev) { - ret = -ENOMEM; - goto out_group_put; - } + if (!vdev) + return -ENOMEM; vfio_init_group_dev(&vdev->vdev, dev, &vfio_fsl_mc_ops); vdev->mc_dev = mc_dev; @@ -556,8 +562,6 @@ out_device: out_uninit: vfio_uninit_group_dev(&vdev->vdev); kfree(vdev); -out_group_put: - vfio_iommu_group_put(group, dev); return ret; } @@ -574,8 +578,6 @@ static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev) vfio_uninit_group_dev(&vdev->vdev); kfree(vdev); - vfio_iommu_group_put(mc_dev->dev.iommu_group, dev); - return 0; } diff --git a/drivers/vfio/mdev/mdev_driver.c b/drivers/vfio/mdev/mdev_driver.c index e2cb1ff56f6c..7927ed4f1711 100644 --- a/drivers/vfio/mdev/mdev_driver.c +++ b/drivers/vfio/mdev/mdev_driver.c @@ -13,60 +13,23 @@ #include "mdev_private.h" -static int mdev_attach_iommu(struct mdev_device *mdev) -{ - int ret; - struct iommu_group *group; - - group = iommu_group_alloc(); - if (IS_ERR(group)) - return PTR_ERR(group); - - ret = iommu_group_add_device(group, &mdev->dev); - if (!ret) - dev_info(&mdev->dev, "MDEV: group_id = %d\n", - iommu_group_id(group)); - - iommu_group_put(group); - return ret; -} - -static void mdev_detach_iommu(struct mdev_device *mdev) -{ - iommu_group_remove_device(&mdev->dev); - dev_info(&mdev->dev, "MDEV: detaching iommu\n"); -} - static int mdev_probe(struct device *dev) { struct mdev_driver *drv = container_of(dev->driver, struct mdev_driver, driver); - struct mdev_device *mdev = to_mdev_device(dev); - int ret; - ret = mdev_attach_iommu(mdev); - if (ret) - return ret; - - if (drv->probe) { - ret = drv->probe(mdev); - if (ret) - mdev_detach_iommu(mdev); - } - - return ret; + if (!drv->probe) + return 0; + return drv->probe(to_mdev_device(dev)); } static void mdev_remove(struct device *dev) { struct mdev_driver *drv = container_of(dev->driver, struct mdev_driver, driver); - struct mdev_device *mdev = to_mdev_device(dev); if (drv->remove) - drv->remove(mdev); - - mdev_detach_iommu(mdev); + drv->remove(to_mdev_device(dev)); } static int mdev_match(struct device *dev, struct device_driver *drv) diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c index 7a9883048216..a90e24b0c851 100644 --- a/drivers/vfio/mdev/vfio_mdev.c +++ b/drivers/vfio/mdev/vfio_mdev.c @@ -119,7 +119,7 @@ static int vfio_mdev_probe(struct mdev_device *mdev) return -ENOMEM; vfio_init_group_dev(vdev, &mdev->dev, &vfio_mdev_dev_ops); - ret = vfio_register_group_dev(vdev); + ret = vfio_register_emulated_iommu_dev(vdev); if (ret) goto out_uninit; diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index a03b5a99c2da..f948e6cd2993 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1806,7 +1806,6 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_uninit_device); int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; - struct iommu_group *group; int ret; if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) @@ -1825,10 +1824,6 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) return -EBUSY; } - group = vfio_iommu_group_get(&pdev->dev); - if (!group) - return -EINVAL; - if (pci_is_root_bus(pdev->bus)) { ret = vfio_assign_device_set(&vdev->vdev, vdev); } else if (!pci_probe_reset_slot(pdev->slot)) { @@ -1842,10 +1837,10 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) } if (ret) - goto out_group_put; + return ret; ret = vfio_pci_vf_init(vdev); if (ret) - goto out_group_put; + return ret; ret = vfio_pci_vga_init(vdev); if (ret) goto out_vf; @@ -1876,8 +1871,6 @@ out_power: vfio_pci_set_power_state(vdev, PCI_D0); out_vf: vfio_pci_vf_uninit(vdev); -out_group_put: - vfio_iommu_group_put(group, &pdev->dev); return ret; } EXPORT_SYMBOL_GPL(vfio_pci_core_register_device); @@ -1893,8 +1886,6 @@ void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev) vfio_pci_vf_uninit(vdev); vfio_pci_vga_uninit(vdev); - vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev); - if (!disable_idle_d3) vfio_pci_set_power_state(vdev, PCI_D0); } diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index 7ca4109bba48..56cd551e0e04 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -25,20 +25,121 @@ #define OPREGION_RVDS 0x3c2 #define OPREGION_VERSION 0x16 +struct igd_opregion_vbt { + void *opregion; + void *vbt_ex; +}; + +/** + * igd_opregion_shift_copy() - Copy OpRegion to user buffer and shift position. + * @dst: User buffer ptr to copy to. + * @off: Offset to user buffer ptr. Increased by bytes on return. + * @src: Source buffer to copy from. + * @pos: Increased by bytes on return. + * @remaining: Decreased by bytes on return. + * @bytes: Bytes to copy and adjust off, pos and remaining. + * + * Copy OpRegion to offset from specific source ptr and shift the offset. + * + * Return: 0 on success, -EFAULT otherwise. + * + */ +static inline unsigned long igd_opregion_shift_copy(char __user *dst, + loff_t *off, + void *src, + loff_t *pos, + size_t *remaining, + size_t bytes) +{ + if (copy_to_user(dst + (*off), src, bytes)) + return -EFAULT; + + *off += bytes; + *pos += bytes; + *remaining -= bytes; + + return 0; +} + static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) { unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS; - void *base = vdev->region[i].data; - loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + struct igd_opregion_vbt *opregionvbt = vdev->region[i].data; + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK, off = 0; + size_t remaining; if (pos >= vdev->region[i].size || iswrite) return -EINVAL; - count = min(count, (size_t)(vdev->region[i].size - pos)); + count = min_t(size_t, count, vdev->region[i].size - pos); + remaining = count; + + /* Copy until OpRegion version */ + if (remaining && pos < OPREGION_VERSION) { + size_t bytes = min_t(size_t, remaining, OPREGION_VERSION - pos); + + if (igd_opregion_shift_copy(buf, &off, + opregionvbt->opregion + pos, &pos, + &remaining, bytes)) + return -EFAULT; + } + + /* Copy patched (if necessary) OpRegion version */ + if (remaining && pos < OPREGION_VERSION + sizeof(__le16)) { + size_t bytes = min_t(size_t, remaining, + OPREGION_VERSION + sizeof(__le16) - pos); + __le16 version = *(__le16 *)(opregionvbt->opregion + + OPREGION_VERSION); + + /* Patch to 2.1 if OpRegion 2.0 has extended VBT */ + if (le16_to_cpu(version) == 0x0200 && opregionvbt->vbt_ex) + version = cpu_to_le16(0x0201); + + if (igd_opregion_shift_copy(buf, &off, + &version + (pos - OPREGION_VERSION), + &pos, &remaining, bytes)) + return -EFAULT; + } + + /* Copy until RVDA */ + if (remaining && pos < OPREGION_RVDA) { + size_t bytes = min_t(size_t, remaining, OPREGION_RVDA - pos); - if (copy_to_user(buf, base + pos, count)) + if (igd_opregion_shift_copy(buf, &off, + opregionvbt->opregion + pos, &pos, + &remaining, bytes)) + return -EFAULT; + } + + /* Copy modified (if necessary) RVDA */ + if (remaining && pos < OPREGION_RVDA + sizeof(__le64)) { + size_t bytes = min_t(size_t, remaining, + OPREGION_RVDA + sizeof(__le64) - pos); + __le64 rvda = cpu_to_le64(opregionvbt->vbt_ex ? + OPREGION_SIZE : 0); + + if (igd_opregion_shift_copy(buf, &off, + &rvda + (pos - OPREGION_RVDA), + &pos, &remaining, bytes)) + return -EFAULT; + } + + /* Copy the rest of OpRegion */ + if (remaining && pos < OPREGION_SIZE) { + size_t bytes = min_t(size_t, remaining, OPREGION_SIZE - pos); + + if (igd_opregion_shift_copy(buf, &off, + opregionvbt->opregion + pos, &pos, + &remaining, bytes)) + return -EFAULT; + } + + /* Copy extended VBT if exists */ + if (remaining && + copy_to_user(buf + off, opregionvbt->vbt_ex + (pos - OPREGION_SIZE), + remaining)) return -EFAULT; *ppos += count; @@ -49,7 +150,13 @@ static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev, static void vfio_pci_igd_release(struct vfio_pci_core_device *vdev, struct vfio_pci_region *region) { - memunmap(region->data); + struct igd_opregion_vbt *opregionvbt = region->data; + + if (opregionvbt->vbt_ex) + memunmap(opregionvbt->vbt_ex); + + memunmap(opregionvbt->opregion); + kfree(opregionvbt); } static const struct vfio_pci_regops vfio_pci_igd_regops = { @@ -61,7 +168,7 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_core_device *vdev) { __le32 *dwordp = (__le32 *)(vdev->vconfig + OPREGION_PCI_ADDR); u32 addr, size; - void *base; + struct igd_opregion_vbt *opregionvbt; int ret; u16 version; @@ -72,84 +179,93 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_core_device *vdev) if (!addr || !(~addr)) return -ENODEV; - base = memremap(addr, OPREGION_SIZE, MEMREMAP_WB); - if (!base) + opregionvbt = kzalloc(sizeof(*opregionvbt), GFP_KERNEL); + if (!opregionvbt) + return -ENOMEM; + + opregionvbt->opregion = memremap(addr, OPREGION_SIZE, MEMREMAP_WB); + if (!opregionvbt->opregion) { + kfree(opregionvbt); return -ENOMEM; + } - if (memcmp(base, OPREGION_SIGNATURE, 16)) { - memunmap(base); + if (memcmp(opregionvbt->opregion, OPREGION_SIGNATURE, 16)) { + memunmap(opregionvbt->opregion); + kfree(opregionvbt); return -EINVAL; } - size = le32_to_cpu(*(__le32 *)(base + 16)); + size = le32_to_cpu(*(__le32 *)(opregionvbt->opregion + 16)); if (!size) { - memunmap(base); + memunmap(opregionvbt->opregion); + kfree(opregionvbt); return -EINVAL; } size *= 1024; /* In KB */ /* - * Support opregion v2.1+ - * When VBT data exceeds 6KB size and cannot be within mailbox #4, then - * the Extended VBT region next to opregion is used to hold the VBT data. - * RVDA (Relative Address of VBT Data from Opregion Base) and RVDS - * (Raw VBT Data Size) from opregion structure member are used to hold the - * address from region base and size of VBT data. RVDA/RVDS are not - * defined before opregion 2.0. - * - * opregion 2.1+: RVDA is unsigned, relative offset from - * opregion base, and should point to the end of opregion. - * otherwise, exposing to userspace to allow read access to everything between - * the OpRegion and VBT is not safe. - * RVDS is defined as size in bytes. + * OpRegion and VBT: + * When VBT data doesn't exceed 6KB, it's stored in Mailbox #4. + * When VBT data exceeds 6KB size, Mailbox #4 is no longer large enough + * to hold the VBT data, the Extended VBT region is introduced since + * OpRegion 2.0 to hold the VBT data. Since OpRegion 2.0, RVDA/RVDS are + * introduced to define the extended VBT data location and size. + * OpRegion 2.0: RVDA defines the absolute physical address of the + * extended VBT data, RVDS defines the VBT data size. + * OpRegion 2.1 and above: RVDA defines the relative address of the + * extended VBT data to OpRegion base, RVDS defines the VBT data size. * - * opregion 2.0: rvda is the physical VBT address. - * Since rvda is HPA it cannot be directly used in guest. - * And it should not be practically available for end user,so it is not supported. + * Due to the RVDA definition diff in OpRegion VBT (also the only diff + * between 2.0 and 2.1), exposing OpRegion and VBT as a contiguous range + * for OpRegion 2.0 and above makes it possible to support the + * non-contiguous VBT through a single vfio region. From r/w ops view, + * only contiguous VBT after OpRegion with version 2.1+ is exposed, + * regardless the host OpRegion is 2.0 or non-contiguous 2.1+. The r/w + * ops will on-the-fly shift the actural offset into VBT so that data at + * correct position can be returned to the requester. */ - version = le16_to_cpu(*(__le16 *)(base + OPREGION_VERSION)); + version = le16_to_cpu(*(__le16 *)(opregionvbt->opregion + + OPREGION_VERSION)); if (version >= 0x0200) { - u64 rvda; - u32 rvds; + u64 rvda = le64_to_cpu(*(__le64 *)(opregionvbt->opregion + + OPREGION_RVDA)); + u32 rvds = le32_to_cpu(*(__le32 *)(opregionvbt->opregion + + OPREGION_RVDS)); - rvda = le64_to_cpu(*(__le64 *)(base + OPREGION_RVDA)); - rvds = le32_to_cpu(*(__le32 *)(base + OPREGION_RVDS)); + /* The extended VBT is valid only when RVDA/RVDS are non-zero */ if (rvda && rvds) { - /* no support for opregion v2.0 with physical VBT address */ - if (version == 0x0200) { - memunmap(base); - pci_err(vdev->pdev, - "IGD assignment does not support opregion v2.0 with an extended VBT region\n"); - return -EINVAL; - } + size += rvds; - if (rvda != size) { - memunmap(base); - pci_err(vdev->pdev, - "Extended VBT does not follow opregion on version 0x%04x\n", - version); - return -EINVAL; + /* + * Extended VBT location by RVDA: + * Absolute physical addr for 2.0. + * Relative addr to OpRegion header for 2.1+. + */ + if (version == 0x0200) + addr = rvda; + else + addr += rvda; + + opregionvbt->vbt_ex = memremap(addr, rvds, MEMREMAP_WB); + if (!opregionvbt->vbt_ex) { + memunmap(opregionvbt->opregion); + kfree(opregionvbt); + return -ENOMEM; } - - /* region size for opregion v2.0+: opregion and VBT size. */ - size += rvds; } } - if (size != OPREGION_SIZE) { - memunmap(base); - base = memremap(addr, size, MEMREMAP_WB); - if (!base) - return -ENOMEM; - } - ret = vfio_pci_register_dev_region(vdev, PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE, - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, - &vfio_pci_igd_regops, size, VFIO_REGION_INFO_FLAG_READ, base); + VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &vfio_pci_igd_regops, + size, VFIO_REGION_INFO_FLAG_READ, opregionvbt); if (ret) { - memunmap(base); + if (opregionvbt->vbt_ex) + memunmap(opregionvbt->vbt_ex); + + memunmap(opregionvbt->opregion); + kfree(opregionvbt); return ret; } diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 6af7ce7d619c..256f55b84e70 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -642,7 +642,6 @@ static int vfio_platform_of_probe(struct vfio_platform_device *vdev, int vfio_platform_probe_common(struct vfio_platform_device *vdev, struct device *dev) { - struct iommu_group *group; int ret; vfio_init_group_dev(&vdev->vdev, dev, &vfio_platform_ops); @@ -663,24 +662,15 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, goto out_uninit; } - group = vfio_iommu_group_get(dev); - if (!group) { - dev_err(dev, "No IOMMU group for device %s\n", vdev->name); - ret = -EINVAL; - goto put_reset; - } - ret = vfio_register_group_dev(&vdev->vdev); if (ret) - goto put_iommu; + goto put_reset; mutex_init(&vdev->igate); pm_runtime_enable(dev); return 0; -put_iommu: - vfio_iommu_group_put(group, dev); put_reset: vfio_platform_put_reset(vdev); out_uninit: @@ -696,7 +686,6 @@ void vfio_platform_remove_common(struct vfio_platform_device *vdev) pm_runtime_disable(vdev->device); vfio_platform_put_reset(vdev); vfio_uninit_group_dev(&vdev->vdev); - vfio_iommu_group_put(vdev->vdev.dev->iommu_group, vdev->vdev.dev); } EXPORT_SYMBOL_GPL(vfio_platform_remove_common); diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 3c034fe14ccb..82fb75464f92 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -32,6 +32,7 @@ #include <linux/vfio.h> #include <linux/wait.h> #include <linux/sched/signal.h> +#include "vfio.h" #define DRIVER_VERSION "0.3" #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" @@ -42,9 +43,8 @@ static struct vfio { struct list_head iommu_drivers_list; struct mutex iommu_drivers_lock; struct list_head group_list; - struct idr group_idr; - struct mutex group_lock; - struct cdev group_cdev; + struct mutex group_lock; /* locks group_list */ + struct ida group_ida; dev_t group_devt; } vfio; @@ -68,14 +68,14 @@ struct vfio_unbound_dev { }; struct vfio_group { - struct kref kref; - int minor; + struct device dev; + struct cdev cdev; + refcount_t users; atomic_t container_users; struct iommu_group *iommu_group; struct vfio_container *container; struct list_head device_list; struct mutex device_lock; - struct device *dev; struct notifier_block nb; struct list_head vfio_next; struct list_head container_next; @@ -83,7 +83,7 @@ struct vfio_group { struct mutex unbound_lock; atomic_t opened; wait_queue_head_t container_q; - bool noiommu; + enum vfio_group_type type; unsigned int dev_counter; struct kvm *kvm; struct blocking_notifier_head notifier; @@ -97,6 +97,7 @@ MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. Thi #endif static DEFINE_XARRAY(vfio_device_set_xa); +static const struct file_operations vfio_group_fops; int vfio_assign_device_set(struct vfio_device *device, void *set_id) { @@ -169,70 +170,6 @@ static void vfio_release_device_set(struct vfio_device *device) xa_unlock(&vfio_device_set_xa); } -/* - * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe - * and remove functions, any use cases other than acquiring the first - * reference for the purpose of calling vfio_register_group_dev() or removing - * that symmetric reference after vfio_unregister_group_dev() should use the raw - * iommu_group_{get,put} functions. In particular, vfio_iommu_group_put() - * removes the device from the dummy group and cannot be nested. - */ -struct iommu_group *vfio_iommu_group_get(struct device *dev) -{ - struct iommu_group *group; - int __maybe_unused ret; - - group = iommu_group_get(dev); - -#ifdef CONFIG_VFIO_NOIOMMU - /* - * With noiommu enabled, an IOMMU group will be created for a device - * that doesn't already have one and doesn't have an iommu_ops on their - * bus. We set iommudata simply to be able to identify these groups - * as special use and for reclamation later. - */ - if (group || !noiommu || iommu_present(dev->bus)) - return group; - - group = iommu_group_alloc(); - if (IS_ERR(group)) - return NULL; - - iommu_group_set_name(group, "vfio-noiommu"); - iommu_group_set_iommudata(group, &noiommu, NULL); - ret = iommu_group_add_device(group, dev); - if (ret) { - iommu_group_put(group); - return NULL; - } - - /* - * Where to taint? At this point we've added an IOMMU group for a - * device that is not backed by iommu_ops, therefore any iommu_ - * callback using iommu_ops can legitimately Oops. So, while we may - * be about to give a DMA capable device to a user without IOMMU - * protection, which is clearly taint-worthy, let's go ahead and do - * it here. - */ - add_taint(TAINT_USER, LOCKDEP_STILL_OK); - dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); -#endif - - return group; -} -EXPORT_SYMBOL_GPL(vfio_iommu_group_get); - -void vfio_iommu_group_put(struct iommu_group *group, struct device *dev) -{ -#ifdef CONFIG_VFIO_NOIOMMU - if (iommu_group_get_iommudata(group) == &noiommu) - iommu_group_remove_device(dev); -#endif - - iommu_group_put(group); -} -EXPORT_SYMBOL_GPL(vfio_iommu_group_put); - #ifdef CONFIG_VFIO_NOIOMMU static void *vfio_noiommu_open(unsigned long arg) { @@ -258,9 +195,9 @@ static long vfio_noiommu_ioctl(void *iommu_data, } static int vfio_noiommu_attach_group(void *iommu_data, - struct iommu_group *iommu_group) + struct iommu_group *iommu_group, enum vfio_group_type type) { - return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL; + return 0; } static void vfio_noiommu_detach_group(void *iommu_data, @@ -277,8 +214,23 @@ static const struct vfio_iommu_driver_ops vfio_noiommu_ops = { .attach_group = vfio_noiommu_attach_group, .detach_group = vfio_noiommu_detach_group, }; -#endif +/* + * Only noiommu containers can use vfio-noiommu and noiommu containers can only + * use vfio-noiommu. + */ +static inline bool vfio_iommu_driver_allowed(struct vfio_container *container, + const struct vfio_iommu_driver *driver) +{ + return container->noiommu == (driver->ops == &vfio_noiommu_ops); +} +#else +static inline bool vfio_iommu_driver_allowed(struct vfio_container *container, + const struct vfio_iommu_driver *driver) +{ + return true; +} +#endif /* CONFIG_VFIO_NOIOMMU */ /** * IOMMU driver registration @@ -329,19 +281,6 @@ void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) } EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); -/** - * Group minor allocation/free - both called with vfio.group_lock held - */ -static int vfio_alloc_group_minor(struct vfio_group *group) -{ - return idr_alloc(&vfio.group_idr, group, 0, MINORMASK + 1, GFP_KERNEL); -} - -static void vfio_free_group_minor(int minor) -{ - idr_remove(&vfio.group_idr, minor); -} - static int vfio_iommu_group_notifier(struct notifier_block *nb, unsigned long action, void *data); static void vfio_group_get(struct vfio_group *group); @@ -370,105 +309,38 @@ static void vfio_container_put(struct vfio_container *container) kref_put(&container->kref, vfio_container_release); } -static void vfio_group_unlock_and_free(struct vfio_group *group) -{ - mutex_unlock(&vfio.group_lock); - /* - * Unregister outside of lock. A spurious callback is harmless now - * that the group is no longer in vfio.group_list. - */ - iommu_group_unregister_notifier(group->iommu_group, &group->nb); - kfree(group); -} - /** * Group objects - create, release, get, put, search */ -static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) +static struct vfio_group * +__vfio_group_get_from_iommu(struct iommu_group *iommu_group) { - struct vfio_group *group, *tmp; - struct device *dev; - int ret, minor; - - group = kzalloc(sizeof(*group), GFP_KERNEL); - if (!group) - return ERR_PTR(-ENOMEM); - - kref_init(&group->kref); - INIT_LIST_HEAD(&group->device_list); - mutex_init(&group->device_lock); - INIT_LIST_HEAD(&group->unbound_list); - mutex_init(&group->unbound_lock); - atomic_set(&group->container_users, 0); - atomic_set(&group->opened, 0); - init_waitqueue_head(&group->container_q); - group->iommu_group = iommu_group; -#ifdef CONFIG_VFIO_NOIOMMU - group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu); -#endif - BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); - - group->nb.notifier_call = vfio_iommu_group_notifier; - - /* - * blocking notifiers acquire a rwsem around registering and hold - * it around callback. Therefore, need to register outside of - * vfio.group_lock to avoid A-B/B-A contention. Our callback won't - * do anything unless it can find the group in vfio.group_list, so - * no harm in registering early. - */ - ret = iommu_group_register_notifier(iommu_group, &group->nb); - if (ret) { - kfree(group); - return ERR_PTR(ret); - } - - mutex_lock(&vfio.group_lock); + struct vfio_group *group; - /* Did we race creating this group? */ - list_for_each_entry(tmp, &vfio.group_list, vfio_next) { - if (tmp->iommu_group == iommu_group) { - vfio_group_get(tmp); - vfio_group_unlock_and_free(group); - return tmp; + list_for_each_entry(group, &vfio.group_list, vfio_next) { + if (group->iommu_group == iommu_group) { + vfio_group_get(group); + return group; } } + return NULL; +} - minor = vfio_alloc_group_minor(group); - if (minor < 0) { - vfio_group_unlock_and_free(group); - return ERR_PTR(minor); - } - - dev = device_create(vfio.class, NULL, - MKDEV(MAJOR(vfio.group_devt), minor), - group, "%s%d", group->noiommu ? "noiommu-" : "", - iommu_group_id(iommu_group)); - if (IS_ERR(dev)) { - vfio_free_group_minor(minor); - vfio_group_unlock_and_free(group); - return ERR_CAST(dev); - } - - group->minor = minor; - group->dev = dev; - - list_add(&group->vfio_next, &vfio.group_list); +static struct vfio_group * +vfio_group_get_from_iommu(struct iommu_group *iommu_group) +{ + struct vfio_group *group; + mutex_lock(&vfio.group_lock); + group = __vfio_group_get_from_iommu(iommu_group); mutex_unlock(&vfio.group_lock); - return group; } -/* called with vfio.group_lock held */ -static void vfio_group_release(struct kref *kref) +static void vfio_group_release(struct device *dev) { - struct vfio_group *group = container_of(kref, struct vfio_group, kref); + struct vfio_group *group = container_of(dev, struct vfio_group, dev); struct vfio_unbound_dev *unbound, *tmp; - struct iommu_group *iommu_group = group->iommu_group; - - WARN_ON(!list_empty(&group->device_list)); - WARN_ON(group->notifier.head); list_for_each_entry_safe(unbound, tmp, &group->unbound_list, unbound_next) { @@ -476,105 +348,129 @@ static void vfio_group_release(struct kref *kref) kfree(unbound); } - device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor)); - list_del(&group->vfio_next); - vfio_free_group_minor(group->minor); - vfio_group_unlock_and_free(group); - iommu_group_put(iommu_group); + mutex_destroy(&group->device_lock); + mutex_destroy(&group->unbound_lock); + iommu_group_put(group->iommu_group); + ida_free(&vfio.group_ida, MINOR(group->dev.devt)); + kfree(group); } -static void vfio_group_put(struct vfio_group *group) +static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, + enum vfio_group_type type) { - kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock); -} - -struct vfio_group_put_work { - struct work_struct work; struct vfio_group *group; -}; + int minor; -static void vfio_group_put_bg(struct work_struct *work) -{ - struct vfio_group_put_work *do_work; - - do_work = container_of(work, struct vfio_group_put_work, work); + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); - vfio_group_put(do_work->group); - kfree(do_work); -} + minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL); + if (minor < 0) { + kfree(group); + return ERR_PTR(minor); + } -static void vfio_group_schedule_put(struct vfio_group *group) -{ - struct vfio_group_put_work *do_work; + device_initialize(&group->dev); + group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor); + group->dev.class = vfio.class; + group->dev.release = vfio_group_release; + cdev_init(&group->cdev, &vfio_group_fops); + group->cdev.owner = THIS_MODULE; - do_work = kmalloc(sizeof(*do_work), GFP_KERNEL); - if (WARN_ON(!do_work)) - return; + refcount_set(&group->users, 1); + INIT_LIST_HEAD(&group->device_list); + mutex_init(&group->device_lock); + INIT_LIST_HEAD(&group->unbound_list); + mutex_init(&group->unbound_lock); + init_waitqueue_head(&group->container_q); + group->iommu_group = iommu_group; + /* put in vfio_group_release() */ + iommu_group_ref_get(iommu_group); + group->type = type; + BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); - INIT_WORK(&do_work->work, vfio_group_put_bg); - do_work->group = group; - schedule_work(&do_work->work); + return group; } -/* Assume group_lock or group reference is held */ -static void vfio_group_get(struct vfio_group *group) +static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, + enum vfio_group_type type) { - kref_get(&group->kref); -} + struct vfio_group *group; + struct vfio_group *ret; + int err; -/* - * Not really a try as we will sleep for mutex, but we need to make - * sure the group pointer is valid under lock and get a reference. - */ -static struct vfio_group *vfio_group_try_get(struct vfio_group *group) -{ - struct vfio_group *target = group; + group = vfio_group_alloc(iommu_group, type); + if (IS_ERR(group)) + return group; - mutex_lock(&vfio.group_lock); - list_for_each_entry(group, &vfio.group_list, vfio_next) { - if (group == target) { - vfio_group_get(group); - mutex_unlock(&vfio.group_lock); - return group; - } + err = dev_set_name(&group->dev, "%s%d", + group->type == VFIO_NO_IOMMU ? "noiommu-" : "", + iommu_group_id(iommu_group)); + if (err) { + ret = ERR_PTR(err); + goto err_put; } - mutex_unlock(&vfio.group_lock); - return NULL; -} - -static -struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group) -{ - struct vfio_group *group; + group->nb.notifier_call = vfio_iommu_group_notifier; + err = iommu_group_register_notifier(iommu_group, &group->nb); + if (err) { + ret = ERR_PTR(err); + goto err_put; + } mutex_lock(&vfio.group_lock); - list_for_each_entry(group, &vfio.group_list, vfio_next) { - if (group->iommu_group == iommu_group) { - vfio_group_get(group); - mutex_unlock(&vfio.group_lock); - return group; - } + + /* Did we race creating this group? */ + ret = __vfio_group_get_from_iommu(iommu_group); + if (ret) + goto err_unlock; + + err = cdev_device_add(&group->cdev, &group->dev); + if (err) { + ret = ERR_PTR(err); + goto err_unlock; } + + list_add(&group->vfio_next, &vfio.group_list); + mutex_unlock(&vfio.group_lock); + return group; - return NULL; +err_unlock: + mutex_unlock(&vfio.group_lock); + iommu_group_unregister_notifier(group->iommu_group, &group->nb); +err_put: + put_device(&group->dev); + return ret; } -static struct vfio_group *vfio_group_get_from_minor(int minor) +static void vfio_group_put(struct vfio_group *group) { - struct vfio_group *group; + if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock)) + return; - mutex_lock(&vfio.group_lock); - group = idr_find(&vfio.group_idr, minor); - if (!group) { - mutex_unlock(&vfio.group_lock); - return NULL; - } - vfio_group_get(group); + /* + * These data structures all have paired operations that can only be + * undone when the caller holds a live reference on the group. Since all + * pairs must be undone these WARN_ON's indicate some caller did not + * properly hold the group reference. + */ + WARN_ON(!list_empty(&group->device_list)); + WARN_ON(atomic_read(&group->container_users)); + WARN_ON(group->notifier.head); + + list_del(&group->vfio_next); + cdev_device_del(&group->cdev, &group->dev); mutex_unlock(&vfio.group_lock); - return group; + iommu_group_unregister_notifier(group->iommu_group, &group->nb); + put_device(&group->dev); +} + +static void vfio_group_get(struct vfio_group *group) +{ + refcount_inc(&group->users); } static struct vfio_group *vfio_group_get_from_dev(struct device *dev) @@ -740,14 +636,6 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb, struct device *dev = data; struct vfio_unbound_dev *unbound; - /* - * Need to go through a group_lock lookup to get a reference or we - * risk racing a group being removed. Ignore spurious notifies. - */ - group = vfio_group_try_get(group); - if (!group) - return NOTIFY_OK; - switch (action) { case IOMMU_GROUP_NOTIFY_ADD_DEVICE: vfio_group_nb_add_dev(group, dev); @@ -798,15 +686,6 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb, mutex_unlock(&group->unbound_lock); break; } - - /* - * If we're the last reference to the group, the group will be - * released, which includes unregistering the iommu group notifier. - * We hold a read-lock on that notifier list, unregistering needs - * a write-lock... deadlock. Release our reference asynchronously - * to avoid that situation. - */ - vfio_group_schedule_put(group); return NOTIFY_OK; } @@ -828,11 +707,78 @@ void vfio_uninit_group_dev(struct vfio_device *device) } EXPORT_SYMBOL_GPL(vfio_uninit_group_dev); -int vfio_register_group_dev(struct vfio_device *device) +static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, + enum vfio_group_type type) { - struct vfio_device *existing_device; struct iommu_group *iommu_group; struct vfio_group *group; + int ret; + + iommu_group = iommu_group_alloc(); + if (IS_ERR(iommu_group)) + return ERR_CAST(iommu_group); + + iommu_group_set_name(iommu_group, "vfio-noiommu"); + ret = iommu_group_add_device(iommu_group, dev); + if (ret) + goto out_put_group; + + group = vfio_create_group(iommu_group, type); + if (IS_ERR(group)) { + ret = PTR_ERR(group); + goto out_remove_device; + } + iommu_group_put(iommu_group); + return group; + +out_remove_device: + iommu_group_remove_device(dev); +out_put_group: + iommu_group_put(iommu_group); + return ERR_PTR(ret); +} + +static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) +{ + struct iommu_group *iommu_group; + struct vfio_group *group; + + iommu_group = iommu_group_get(dev); +#ifdef CONFIG_VFIO_NOIOMMU + if (!iommu_group && noiommu && !iommu_present(dev->bus)) { + /* + * With noiommu enabled, create an IOMMU group for devices that + * don't already have one and don't have an iommu_ops on their + * bus. Taint the kernel because we're about to give a DMA + * capable device to a user without IOMMU protection. + */ + group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); + if (!IS_ERR(group)) { + add_taint(TAINT_USER, LOCKDEP_STILL_OK); + dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); + } + return group; + } +#endif + if (!iommu_group) + return ERR_PTR(-EINVAL); + + group = vfio_group_get_from_iommu(iommu_group); + if (!group) + group = vfio_create_group(iommu_group, VFIO_IOMMU); + + /* The vfio_group holds a reference to the iommu_group */ + iommu_group_put(iommu_group); + return group; +} + +static int __vfio_register_dev(struct vfio_device *device, + struct vfio_group *group) +{ + struct vfio_device *existing_device; + + if (IS_ERR(group)) + return PTR_ERR(group); /* * If the driver doesn't specify a set then the device is added to a @@ -841,30 +787,14 @@ int vfio_register_group_dev(struct vfio_device *device) if (!device->dev_set) vfio_assign_device_set(device, device); - iommu_group = iommu_group_get(device->dev); - if (!iommu_group) - return -EINVAL; - - group = vfio_group_get_from_iommu(iommu_group); - if (!group) { - group = vfio_create_group(iommu_group); - if (IS_ERR(group)) { - iommu_group_put(iommu_group); - return PTR_ERR(group); - } - } else { - /* - * A found vfio_group already holds a reference to the - * iommu_group. A created vfio_group keeps the reference. - */ - iommu_group_put(iommu_group); - } - existing_device = vfio_group_get_device(group, device->dev); if (existing_device) { dev_WARN(device->dev, "Device already exists on group %d\n", - iommu_group_id(iommu_group)); + iommu_group_id(group->iommu_group)); vfio_device_put(existing_device); + if (group->type == VFIO_NO_IOMMU || + group->type == VFIO_EMULATED_IOMMU) + iommu_group_remove_device(device->dev); vfio_group_put(group); return -EBUSY; } @@ -882,8 +812,25 @@ int vfio_register_group_dev(struct vfio_device *device) return 0; } + +int vfio_register_group_dev(struct vfio_device *device) +{ + return __vfio_register_dev(device, + vfio_group_find_or_alloc(device->dev)); +} EXPORT_SYMBOL_GPL(vfio_register_group_dev); +/* + * Register a virtual device without IOMMU backing. The user of this + * device must not be able to directly trigger unmediated DMA. + */ +int vfio_register_emulated_iommu_dev(struct vfio_device *device) +{ + return __vfio_register_dev(device, + vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU)); +} +EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); + /** * Get a reference to the vfio_device for a device. Even if the * caller thinks they own the device, they could be racing with a @@ -1010,6 +957,9 @@ void vfio_unregister_group_dev(struct vfio_device *device) if (list_empty(&group->device_list)) wait_event(group->container_q, !group->container); + if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) + iommu_group_remove_device(device->dev); + /* Matches the get in vfio_register_group_dev() */ vfio_group_put(group); } @@ -1042,13 +992,10 @@ static long vfio_ioctl_check_extension(struct vfio_container *container, list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { -#ifdef CONFIG_VFIO_NOIOMMU if (!list_empty(&container->group_list) && - (container->noiommu != - (driver->ops == &vfio_noiommu_ops))) + !vfio_iommu_driver_allowed(container, + driver)) continue; -#endif - if (!try_module_get(driver->ops->owner)) continue; @@ -1079,7 +1026,8 @@ static int __vfio_container_attach_groups(struct vfio_container *container, int ret = -ENODEV; list_for_each_entry(group, &container->group_list, container_next) { - ret = driver->ops->attach_group(data, group->iommu_group); + ret = driver->ops->attach_group(data, group->iommu_group, + group->type); if (ret) goto unwind; } @@ -1120,15 +1068,8 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container, list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { void *data; -#ifdef CONFIG_VFIO_NOIOMMU - /* - * Only noiommu containers can use vfio-noiommu and noiommu - * containers can only use vfio-noiommu. - */ - if (container->noiommu != (driver->ops == &vfio_noiommu_ops)) + if (!vfio_iommu_driver_allowed(container, driver)) continue; -#endif - if (!try_module_get(driver->ops->owner)) continue; @@ -1234,62 +1175,12 @@ static int vfio_fops_release(struct inode *inode, struct file *filep) return 0; } -/* - * Once an iommu driver is set, we optionally pass read/write/mmap - * on to the driver, allowing management interfaces beyond ioctl. - */ -static ssize_t vfio_fops_read(struct file *filep, char __user *buf, - size_t count, loff_t *ppos) -{ - struct vfio_container *container = filep->private_data; - struct vfio_iommu_driver *driver; - ssize_t ret = -EINVAL; - - driver = container->iommu_driver; - if (likely(driver && driver->ops->read)) - ret = driver->ops->read(container->iommu_data, - buf, count, ppos); - - return ret; -} - -static ssize_t vfio_fops_write(struct file *filep, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct vfio_container *container = filep->private_data; - struct vfio_iommu_driver *driver; - ssize_t ret = -EINVAL; - - driver = container->iommu_driver; - if (likely(driver && driver->ops->write)) - ret = driver->ops->write(container->iommu_data, - buf, count, ppos); - - return ret; -} - -static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma) -{ - struct vfio_container *container = filep->private_data; - struct vfio_iommu_driver *driver; - int ret = -EINVAL; - - driver = container->iommu_driver; - if (likely(driver && driver->ops->mmap)) - ret = driver->ops->mmap(container->iommu_data, vma); - - return ret; -} - static const struct file_operations vfio_fops = { .owner = THIS_MODULE, .open = vfio_fops_open, .release = vfio_fops_release, - .read = vfio_fops_read, - .write = vfio_fops_write, .unlocked_ioctl = vfio_fops_unl_ioctl, .compat_ioctl = compat_ptr_ioctl, - .mmap = vfio_fops_mmap, }; /** @@ -1366,7 +1257,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd) if (atomic_read(&group->container_users)) return -EINVAL; - if (group->noiommu && !capable(CAP_SYS_RAWIO)) + if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) return -EPERM; f = fdget(container_fd); @@ -1386,7 +1277,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd) /* Real groups and fake groups cannot mix */ if (!list_empty(&container->group_list) && - container->noiommu != group->noiommu) { + container->noiommu != (group->type == VFIO_NO_IOMMU)) { ret = -EPERM; goto unlock_out; } @@ -1394,13 +1285,14 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd) driver = container->iommu_driver; if (driver) { ret = driver->ops->attach_group(container->iommu_data, - group->iommu_group); + group->iommu_group, + group->type); if (ret) goto unlock_out; } group->container = container; - container->noiommu = group->noiommu; + container->noiommu = (group->type == VFIO_NO_IOMMU); list_add(&group->container_next, &container->group_list); /* Get a reference on the container and mark a user within the group */ @@ -1424,7 +1316,7 @@ static int vfio_group_add_container_user(struct vfio_group *group) if (!atomic_inc_not_zero(&group->container_users)) return -EINVAL; - if (group->noiommu) { + if (group->type == VFIO_NO_IOMMU) { atomic_dec(&group->container_users); return -EPERM; } @@ -1449,7 +1341,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) !group->container->iommu_driver || !vfio_group_viable(group)) return -EINVAL; - if (group->noiommu && !capable(CAP_SYS_RAWIO)) + if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) return -EPERM; device = vfio_device_get_from_name(group, buf); @@ -1496,7 +1388,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) fd_install(fdno, filep); - if (group->noiommu) + if (group->type == VFIO_NO_IOMMU) dev_warn(device->dev, "vfio-noiommu device opened by user " "(%s:%d)\n", current->comm, task_pid_nr(current)); return fdno; @@ -1585,14 +1477,15 @@ static long vfio_group_fops_unl_ioctl(struct file *filep, static int vfio_group_fops_open(struct inode *inode, struct file *filep) { - struct vfio_group *group; + struct vfio_group *group = + container_of(inode->i_cdev, struct vfio_group, cdev); int opened; - group = vfio_group_get_from_minor(iminor(inode)); - if (!group) + /* users can be zero if this races with vfio_group_put() */ + if (!refcount_inc_not_zero(&group->users)) return -ENODEV; - if (group->noiommu && !capable(CAP_SYS_RAWIO)) { + if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { vfio_group_put(group); return -EPERM; } @@ -1757,6 +1650,9 @@ struct vfio_group *vfio_group_get_external_user(struct file *filep) if (ret) return ERR_PTR(ret); + /* + * Since the caller holds the fget on the file group->users must be >= 1 + */ vfio_group_get(group); return group; @@ -2396,7 +2292,7 @@ static int __init vfio_init(void) { int ret; - idr_init(&vfio.group_idr); + ida_init(&vfio.group_ida); mutex_init(&vfio.group_lock); mutex_init(&vfio.iommu_drivers_lock); INIT_LIST_HEAD(&vfio.group_list); @@ -2421,11 +2317,6 @@ static int __init vfio_init(void) if (ret) goto err_alloc_chrdev; - cdev_init(&vfio.group_cdev, &vfio_group_fops); - ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK + 1); - if (ret) - goto err_cdev_add; - pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); #ifdef CONFIG_VFIO_NOIOMMU @@ -2433,8 +2324,6 @@ static int __init vfio_init(void) #endif return 0; -err_cdev_add: - unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); err_alloc_chrdev: class_destroy(vfio.class); vfio.class = NULL; @@ -2450,8 +2339,7 @@ static void __exit vfio_cleanup(void) #ifdef CONFIG_VFIO_NOIOMMU vfio_unregister_iommu_driver(&vfio_noiommu_ops); #endif - idr_destroy(&vfio.group_idr); - cdev_del(&vfio.group_cdev); + ida_destroy(&vfio.group_ida); unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); class_destroy(vfio.class); vfio.class = NULL; diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h new file mode 100644 index 000000000000..a67130221151 --- /dev/null +++ b/drivers/vfio/vfio.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson <alex.williamson@redhat.com> + */ + +enum vfio_group_type { + /* + * Physical device with IOMMU backing. + */ + VFIO_IOMMU, + + /* + * Virtual device without IOMMU backing. The VFIO core fakes up an + * iommu_group as the iommu_group sysfs interface is part of the + * userspace ABI. The user of these devices must not be able to + * directly trigger unmediated DMA. + */ + VFIO_EMULATED_IOMMU, + + /* + * Physical device without IOMMU backing. The VFIO core fakes up an + * iommu_group as the iommu_group sysfs interface is part of the + * userspace ABI. Users can trigger unmediated DMA by the device, + * usage is highly dangerous, requires an explicit opt-in and will + * taint the kernel. + */ + VFIO_NO_IOMMU, +}; + +/* events for the backend driver notify callback */ +enum vfio_iommu_notify_type { + VFIO_IOMMU_CONTAINER_CLOSE = 0, +}; + +/** + * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks + */ +struct vfio_iommu_driver_ops { + char *name; + struct module *owner; + void *(*open)(unsigned long arg); + void (*release)(void *iommu_data); + long (*ioctl)(void *iommu_data, unsigned int cmd, + unsigned long arg); + int (*attach_group)(void *iommu_data, + struct iommu_group *group, + enum vfio_group_type); + void (*detach_group)(void *iommu_data, + struct iommu_group *group); + int (*pin_pages)(void *iommu_data, + struct iommu_group *group, + unsigned long *user_pfn, + int npage, int prot, + unsigned long *phys_pfn); + int (*unpin_pages)(void *iommu_data, + unsigned long *user_pfn, int npage); + int (*register_notifier)(void *iommu_data, + unsigned long *events, + struct notifier_block *nb); + int (*unregister_notifier)(void *iommu_data, + struct notifier_block *nb); + int (*dma_rw)(void *iommu_data, dma_addr_t user_iova, + void *data, size_t count, bool write); + struct iommu_domain *(*group_iommu_domain)(void *iommu_data, + struct iommu_group *group); + void (*notify)(void *iommu_data, + enum vfio_iommu_notify_type event); +}; + +int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); +void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops); diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index fe888b5dcc00..708a95e61831 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -20,6 +20,7 @@ #include <linux/sched/mm.h> #include <linux/sched/signal.h> #include <linux/mm.h> +#include "vfio.h" #include <asm/iommu.h> #include <asm/tce.h> @@ -1238,13 +1239,16 @@ release_exit: } static int tce_iommu_attach_group(void *iommu_data, - struct iommu_group *iommu_group) + struct iommu_group *iommu_group, enum vfio_group_type type) { int ret = 0; struct tce_container *container = iommu_data; struct iommu_table_group *table_group; struct tce_iommu_group *tcegrp = NULL; + if (type == VFIO_EMULATED_IOMMU) + return -EINVAL; + mutex_lock(&container->lock); /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 0e9217687f5c..f17490ab238f 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -36,10 +36,10 @@ #include <linux/uaccess.h> #include <linux/vfio.h> #include <linux/workqueue.h> -#include <linux/mdev.h> #include <linux/notifier.h> #include <linux/dma-iommu.h> #include <linux/irqdomain.h> +#include "vfio.h" #define DRIVER_VERSION "0.2" #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" @@ -65,7 +65,6 @@ MODULE_PARM_DESC(dma_entry_limit, struct vfio_iommu { struct list_head domain_list; struct list_head iova_list; - struct vfio_domain *external_domain; /* domain for external user */ struct mutex lock; struct rb_root dma_list; struct blocking_notifier_head notifier; @@ -78,6 +77,7 @@ struct vfio_iommu { bool nesting; bool dirty_page_tracking; bool container_open; + struct list_head emulated_iommu_groups; }; struct vfio_domain { @@ -113,7 +113,6 @@ struct vfio_batch { struct vfio_iommu_group { struct iommu_group *iommu_group; struct list_head next; - bool mdev_group; /* An mdev group */ bool pinned_page_dirty_scope; }; @@ -140,9 +139,6 @@ struct vfio_regions { size_t len; }; -#define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \ - (!list_empty(&iommu->domain_list)) - #define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) / BITS_PER_BYTE) /* @@ -880,7 +876,7 @@ again: * already pinned and accounted. Accounting should be done if there is no * iommu capable domain in the container. */ - do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu); + do_accounting = list_empty(&iommu->domain_list); for (i = 0; i < npage; i++) { struct vfio_pfn *vpfn; @@ -969,7 +965,7 @@ static int vfio_iommu_type1_unpin_pages(void *iommu_data, mutex_lock(&iommu->lock); - do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu); + do_accounting = list_empty(&iommu->domain_list); for (i = 0; i < npage; i++) { struct vfio_dma *dma; dma_addr_t iova; @@ -1090,7 +1086,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, if (!dma->size) return 0; - if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)) + if (list_empty(&iommu->domain_list)) return 0; /* @@ -1667,7 +1663,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, vfio_link_dma(iommu, dma); /* Don't pin and map if container doesn't contain IOMMU capable domain*/ - if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)) + if (list_empty(&iommu->domain_list)) dma->size = size; else ret = vfio_pin_map_dma(iommu, dma, size); @@ -1893,8 +1889,8 @@ static struct vfio_iommu_group* vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, struct iommu_group *iommu_group) { + struct vfio_iommu_group *group; struct vfio_domain *domain; - struct vfio_iommu_group *group = NULL; list_for_each_entry(domain, &iommu->domain_list, next) { group = find_iommu_group(domain, iommu_group); @@ -1902,10 +1898,10 @@ vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, return group; } - if (iommu->external_domain) - group = find_iommu_group(iommu->external_domain, iommu_group); - - return group; + list_for_each_entry(group, &iommu->emulated_iommu_groups, next) + if (group->iommu_group == iommu_group) + return group; + return NULL; } static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions, @@ -1934,89 +1930,6 @@ static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions, return ret; } -static int vfio_mdev_attach_domain(struct device *dev, void *data) -{ - struct mdev_device *mdev = to_mdev_device(dev); - struct iommu_domain *domain = data; - struct device *iommu_device; - - iommu_device = mdev_get_iommu_device(mdev); - if (iommu_device) { - if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX)) - return iommu_aux_attach_device(domain, iommu_device); - else - return iommu_attach_device(domain, iommu_device); - } - - return -EINVAL; -} - -static int vfio_mdev_detach_domain(struct device *dev, void *data) -{ - struct mdev_device *mdev = to_mdev_device(dev); - struct iommu_domain *domain = data; - struct device *iommu_device; - - iommu_device = mdev_get_iommu_device(mdev); - if (iommu_device) { - if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX)) - iommu_aux_detach_device(domain, iommu_device); - else - iommu_detach_device(domain, iommu_device); - } - - return 0; -} - -static int vfio_iommu_attach_group(struct vfio_domain *domain, - struct vfio_iommu_group *group) -{ - if (group->mdev_group) - return iommu_group_for_each_dev(group->iommu_group, - domain->domain, - vfio_mdev_attach_domain); - else - return iommu_attach_group(domain->domain, group->iommu_group); -} - -static void vfio_iommu_detach_group(struct vfio_domain *domain, - struct vfio_iommu_group *group) -{ - if (group->mdev_group) - iommu_group_for_each_dev(group->iommu_group, domain->domain, - vfio_mdev_detach_domain); - else - iommu_detach_group(domain->domain, group->iommu_group); -} - -static bool vfio_bus_is_mdev(struct bus_type *bus) -{ - struct bus_type *mdev_bus; - bool ret = false; - - mdev_bus = symbol_get(mdev_bus_type); - if (mdev_bus) { - ret = (bus == mdev_bus); - symbol_put(mdev_bus_type); - } - - return ret; -} - -static int vfio_mdev_iommu_device(struct device *dev, void *data) -{ - struct mdev_device *mdev = to_mdev_device(dev); - struct device **old = data, *new; - - new = mdev_get_iommu_device(mdev); - if (!new || (*old && *old != new)) - return -EINVAL; - - *old = new; - - return 0; -} - /* * This is a helper function to insert an address range to iova list. * The list is initially created with a single entry corresponding to @@ -2241,81 +2154,58 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu, } static int vfio_iommu_type1_attach_group(void *iommu_data, - struct iommu_group *iommu_group) + struct iommu_group *iommu_group, enum vfio_group_type type) { struct vfio_iommu *iommu = iommu_data; struct vfio_iommu_group *group; struct vfio_domain *domain, *d; struct bus_type *bus = NULL; - int ret; bool resv_msi, msi_remap; phys_addr_t resv_msi_base = 0; struct iommu_domain_geometry *geo; LIST_HEAD(iova_copy); LIST_HEAD(group_resv_regions); + int ret = -EINVAL; mutex_lock(&iommu->lock); /* Check for duplicates */ - if (vfio_iommu_find_iommu_group(iommu, iommu_group)) { - mutex_unlock(&iommu->lock); - return -EINVAL; - } + if (vfio_iommu_find_iommu_group(iommu, iommu_group)) + goto out_unlock; + ret = -ENOMEM; group = kzalloc(sizeof(*group), GFP_KERNEL); - domain = kzalloc(sizeof(*domain), GFP_KERNEL); - if (!group || !domain) { - ret = -ENOMEM; - goto out_free; - } - + if (!group) + goto out_unlock; group->iommu_group = iommu_group; + if (type == VFIO_EMULATED_IOMMU) { + list_add(&group->next, &iommu->emulated_iommu_groups); + /* + * An emulated IOMMU group cannot dirty memory directly, it can + * only use interfaces that provide dirty tracking. + * The iommu scope can only be promoted with the addition of a + * dirty tracking group. + */ + group->pinned_page_dirty_scope = true; + ret = 0; + goto out_unlock; + } + /* Determine bus_type in order to allocate a domain */ ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type); if (ret) - goto out_free; - - if (vfio_bus_is_mdev(bus)) { - struct device *iommu_device = NULL; - - group->mdev_group = true; - - /* Determine the isolation type */ - ret = iommu_group_for_each_dev(iommu_group, &iommu_device, - vfio_mdev_iommu_device); - if (ret || !iommu_device) { - if (!iommu->external_domain) { - INIT_LIST_HEAD(&domain->group_list); - iommu->external_domain = domain; - vfio_update_pgsize_bitmap(iommu); - } else { - kfree(domain); - } - - list_add(&group->next, - &iommu->external_domain->group_list); - /* - * Non-iommu backed group cannot dirty memory directly, - * it can only use interfaces that provide dirty - * tracking. - * The iommu scope can only be promoted with the - * addition of a dirty tracking group. - */ - group->pinned_page_dirty_scope = true; - mutex_unlock(&iommu->lock); + goto out_free_group; - return 0; - } - - bus = iommu_device->bus; - } + ret = -ENOMEM; + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + goto out_free_group; + ret = -EIO; domain->domain = iommu_domain_alloc(bus); - if (!domain->domain) { - ret = -EIO; - goto out_free; - } + if (!domain->domain) + goto out_free_domain; if (iommu->nesting) { ret = iommu_enable_nesting(domain->domain); @@ -2323,7 +2213,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, goto out_domain; } - ret = vfio_iommu_attach_group(domain, group); + ret = iommu_attach_group(domain->domain, group->iommu_group); if (ret) goto out_domain; @@ -2390,15 +2280,17 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, list_for_each_entry(d, &iommu->domain_list, next) { if (d->domain->ops == domain->domain->ops && d->prot == domain->prot) { - vfio_iommu_detach_group(domain, group); - if (!vfio_iommu_attach_group(d, group)) { + iommu_detach_group(domain->domain, group->iommu_group); + if (!iommu_attach_group(d->domain, + group->iommu_group)) { list_add(&group->next, &d->group_list); iommu_domain_free(domain->domain); kfree(domain); goto done; } - ret = vfio_iommu_attach_group(domain, group); + ret = iommu_attach_group(domain->domain, + group->iommu_group); if (ret) goto out_domain; } @@ -2435,14 +2327,16 @@ done: return 0; out_detach: - vfio_iommu_detach_group(domain, group); + iommu_detach_group(domain->domain, group->iommu_group); out_domain: iommu_domain_free(domain->domain); vfio_iommu_iova_free(&iova_copy); vfio_iommu_resv_free(&group_resv_regions); -out_free: +out_free_domain: kfree(domain); +out_free_group: kfree(group); +out_unlock: mutex_unlock(&iommu->lock); return ret; } @@ -2567,25 +2461,19 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, LIST_HEAD(iova_copy); mutex_lock(&iommu->lock); + list_for_each_entry(group, &iommu->emulated_iommu_groups, next) { + if (group->iommu_group != iommu_group) + continue; + update_dirty_scope = !group->pinned_page_dirty_scope; + list_del(&group->next); + kfree(group); - if (iommu->external_domain) { - group = find_iommu_group(iommu->external_domain, iommu_group); - if (group) { - update_dirty_scope = !group->pinned_page_dirty_scope; - list_del(&group->next); - kfree(group); - - if (list_empty(&iommu->external_domain->group_list)) { - if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)) { - WARN_ON(iommu->notifier.head); - vfio_iommu_unmap_unpin_all(iommu); - } - - kfree(iommu->external_domain); - iommu->external_domain = NULL; - } - goto detach_group_done; + if (list_empty(&iommu->emulated_iommu_groups) && + list_empty(&iommu->domain_list)) { + WARN_ON(iommu->notifier.head); + vfio_iommu_unmap_unpin_all(iommu); } + goto detach_group_done; } /* @@ -2600,7 +2488,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, if (!group) continue; - vfio_iommu_detach_group(domain, group); + iommu_detach_group(domain->domain, group->iommu_group); update_dirty_scope = !group->pinned_page_dirty_scope; list_del(&group->next); kfree(group); @@ -2613,7 +2501,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, */ if (list_empty(&domain->group_list)) { if (list_is_singular(&iommu->domain_list)) { - if (!iommu->external_domain) { + if (list_empty(&iommu->emulated_iommu_groups)) { WARN_ON(iommu->notifier.head); vfio_iommu_unmap_unpin_all(iommu); } else { @@ -2677,41 +2565,43 @@ static void *vfio_iommu_type1_open(unsigned long arg) mutex_init(&iommu->lock); BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); init_waitqueue_head(&iommu->vaddr_wait); + iommu->pgsize_bitmap = PAGE_MASK; + INIT_LIST_HEAD(&iommu->emulated_iommu_groups); return iommu; } -static void vfio_release_domain(struct vfio_domain *domain, bool external) +static void vfio_release_domain(struct vfio_domain *domain) { struct vfio_iommu_group *group, *group_tmp; list_for_each_entry_safe(group, group_tmp, &domain->group_list, next) { - if (!external) - vfio_iommu_detach_group(domain, group); + iommu_detach_group(domain->domain, group->iommu_group); list_del(&group->next); kfree(group); } - if (!external) - iommu_domain_free(domain->domain); + iommu_domain_free(domain->domain); } static void vfio_iommu_type1_release(void *iommu_data) { struct vfio_iommu *iommu = iommu_data; struct vfio_domain *domain, *domain_tmp; + struct vfio_iommu_group *group, *next_group; - if (iommu->external_domain) { - vfio_release_domain(iommu->external_domain, true); - kfree(iommu->external_domain); + list_for_each_entry_safe(group, next_group, + &iommu->emulated_iommu_groups, next) { + list_del(&group->next); + kfree(group); } vfio_iommu_unmap_unpin_all(iommu); list_for_each_entry_safe(domain, domain_tmp, &iommu->domain_list, next) { - vfio_release_domain(domain, false); + vfio_release_domain(domain); list_del(&domain->next); kfree(domain); } |