summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-05-05 13:31:39 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-05-05 13:31:39 -0700
commit16bb86b5569cb7489367101f6ed69b25682b47db (patch)
tree4b0ec63032cb93491d65fc5766243bfc7ff60346
parent57151b502cbc0fa6ff9074a76883fa9d9eda322e (diff)
parentd7bce85aa7b92b5de8f69b3bcedfe51d7b1aabe1 (diff)
downloadlinux-16bb86b5569cb7489367101f6ed69b25682b47db.tar.bz2
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin: "A bunch of new drivers including vdpa support for block and virtio-vdpa. Beginning of vq kick (aka doorbell) mapping support. Misc fixes" * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (40 commits) virtio_pci_modern: correct sparse tags for notify virtio_pci_modern: __force cast the notify mapping vDPA/ifcvf: get_config_size should return dev specific config size vDPA/ifcvf: enable Intel C5000X-PL virtio-block for vDPA vDPA/ifcvf: deduce VIRTIO device ID when probe vdpa_sim_blk: add support for vdpa management tool vdpa_sim_blk: handle VIRTIO_BLK_T_GET_ID vdpa_sim_blk: implement ramdisk behaviour vdpa: add vdpa simulator for block device vhost/vdpa: Remove the restriction that only supports virtio-net devices vhost/vdpa: use get_config_size callback in vhost_vdpa_config_validate() vdpa: add get_config_size callback in vdpa_config_ops vdpa_sim: cleanup kiovs in vdpasim_free() vringh: add vringh_kiov_length() helper vringh: implement vringh_kiov_advance() vringh: explain more about cleaning riov and wiov vringh: reset kiov 'consumed' field in __vringh_iov() vringh: add 'iotlb_lock' to synchronize iotlb accesses vdpa_sim: use iova module to allocate IOVA addresses vDPA/ifcvf: deduce VIRTIO device ID from pdev ids ...
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/net/virtio_net.c10
-rw-r--r--drivers/vdpa/Kconfig15
-rw-r--r--drivers/vdpa/Makefile1
-rw-r--r--drivers/vdpa/ifcvf/ifcvf_base.c24
-rw-r--r--drivers/vdpa/ifcvf/ifcvf_base.h26
-rw-r--r--drivers/vdpa/ifcvf/ifcvf_main.c86
-rw-r--r--drivers/vdpa/mlx5/net/mlx5_vnet.c85
-rw-r--r--drivers/vdpa/vdpa.c12
-rw-r--r--drivers/vdpa/vdpa_sim/Makefile1
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim.c127
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim.h2
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim_blk.c338
-rw-r--r--drivers/vdpa/virtio_pci/Makefile2
-rw-r--r--drivers/vdpa/virtio_pci/vp_vdpa.c484
-rw-r--r--drivers/vhost/vdpa.c16
-rw-r--r--drivers/vhost/vringh.c69
-rw-r--r--drivers/virtio/virtio_balloon.c2
-rw-r--r--drivers/virtio/virtio_pci_modern.c27
-rw-r--r--drivers/virtio/virtio_pci_modern_dev.c67
-rw-r--r--include/linux/vdpa.h42
-rw-r--r--include/linux/virtio_pci_modern.h11
-rw-r--r--include/linux/vringh.h19
23 files changed, 1295 insertions, 172 deletions
diff --git a/drivers/Makefile b/drivers/Makefile
index 8f3fee8281ad..5a6d613e868d 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_DMADEVICES) += dma/
obj-y += soc/
obj-$(CONFIG_VIRTIO) += virtio/
+obj-$(CONFIG_VIRTIO_PCI_LIB) += virtio/
obj-$(CONFIG_VDPA) += vdpa/
obj-$(CONFIG_XEN) += xen/
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 7fda2ae4c40f..9b6a4a875c55 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2870,9 +2870,13 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
{
int i;
- vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
- if (!vi->ctrl)
- goto err_ctrl;
+ if (vi->has_cvq) {
+ vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
+ if (!vi->ctrl)
+ goto err_ctrl;
+ } else {
+ vi->ctrl = NULL;
+ }
vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL);
if (!vi->sq)
goto err_sq;
diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig
index ffd1e098bfd2..a503c1b2bfd9 100644
--- a/drivers/vdpa/Kconfig
+++ b/drivers/vdpa/Kconfig
@@ -14,6 +14,7 @@ config VDPA_SIM
depends on RUNTIME_TESTING_MENU && HAS_DMA
select DMA_OPS
select VHOST_RING
+ select IOMMU_IOVA
help
Enable this module to support vDPA device simulators. These devices
are used for testing, prototyping and development of vDPA.
@@ -25,6 +26,13 @@ config VDPA_SIM_NET
help
vDPA networking device simulator which loops TX traffic back to RX.
+config VDPA_SIM_BLOCK
+ tristate "vDPA simulator for block device"
+ depends on VDPA_SIM
+ help
+ vDPA block device simulator which terminates IO request in a
+ memory buffer.
+
config IFCVF
tristate "Intel IFC VF vDPA driver"
depends on PCI_MSI
@@ -52,4 +60,11 @@ config MLX5_VDPA_NET
be executed by the hardware. It also supports a variety of stateless
offloads depending on the actual device used and firmware version.
+config VP_VDPA
+ tristate "Virtio PCI bridge vDPA driver"
+ select VIRTIO_PCI_LIB
+ depends on PCI_MSI
+ help
+ This kernel module bridges virtio PCI device to vDPA bus.
+
endif # VDPA
diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile
index d160e9b63a66..67fe7f3d6943 100644
--- a/drivers/vdpa/Makefile
+++ b/drivers/vdpa/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_VDPA) += vdpa.o
obj-$(CONFIG_VDPA_SIM) += vdpa_sim/
obj-$(CONFIG_IFCVF) += ifcvf/
obj-$(CONFIG_MLX5_VDPA) += mlx5/
+obj-$(CONFIG_VP_VDPA) += virtio_pci/
diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c
index f2a128e56de5..1a661ab45af5 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.c
+++ b/drivers/vdpa/ifcvf/ifcvf_base.c
@@ -202,10 +202,11 @@ static void ifcvf_add_status(struct ifcvf_hw *hw, u8 status)
ifcvf_get_status(hw);
}
-u64 ifcvf_get_features(struct ifcvf_hw *hw)
+u64 ifcvf_get_hw_features(struct ifcvf_hw *hw)
{
struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
u32 features_lo, features_hi;
+ u64 features;
ifc_iowrite32(0, &cfg->device_feature_select);
features_lo = ifc_ioread32(&cfg->device_feature);
@@ -213,7 +214,26 @@ u64 ifcvf_get_features(struct ifcvf_hw *hw)
ifc_iowrite32(1, &cfg->device_feature_select);
features_hi = ifc_ioread32(&cfg->device_feature);
- return ((u64)features_hi << 32) | features_lo;
+ features = ((u64)features_hi << 32) | features_lo;
+
+ return features;
+}
+
+u64 ifcvf_get_features(struct ifcvf_hw *hw)
+{
+ return hw->hw_features;
+}
+
+int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features)
+{
+ struct ifcvf_adapter *ifcvf = vf_to_adapter(hw);
+
+ if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)) && features) {
+ IFCVF_ERR(ifcvf->pdev, "VIRTIO_F_ACCESS_PLATFORM is not negotiated\n");
+ return -EINVAL;
+ }
+
+ return 0;
}
void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset,
diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h
index 64696d63fe07..0111bfdeb342 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.h
+++ b/drivers/vdpa/ifcvf/ifcvf_base.h
@@ -15,15 +15,26 @@
#include <linux/pci_regs.h>
#include <linux/vdpa.h>
#include <uapi/linux/virtio_net.h>
+#include <uapi/linux/virtio_blk.h>
#include <uapi/linux/virtio_config.h>
#include <uapi/linux/virtio_pci.h>
-#define IFCVF_VENDOR_ID 0x1AF4
-#define IFCVF_DEVICE_ID 0x1041
-#define IFCVF_SUBSYS_VENDOR_ID 0x8086
-#define IFCVF_SUBSYS_DEVICE_ID 0x001A
+#define N3000_VENDOR_ID 0x1AF4
+#define N3000_DEVICE_ID 0x1041
+#define N3000_SUBSYS_VENDOR_ID 0x8086
+#define N3000_SUBSYS_DEVICE_ID 0x001A
-#define IFCVF_SUPPORTED_FEATURES \
+#define C5000X_PL_VENDOR_ID 0x1AF4
+#define C5000X_PL_DEVICE_ID 0x1000
+#define C5000X_PL_SUBSYS_VENDOR_ID 0x8086
+#define C5000X_PL_SUBSYS_DEVICE_ID 0x0001
+
+#define C5000X_PL_BLK_VENDOR_ID 0x1AF4
+#define C5000X_PL_BLK_DEVICE_ID 0x1001
+#define C5000X_PL_BLK_SUBSYS_VENDOR_ID 0x8086
+#define C5000X_PL_BLK_SUBSYS_DEVICE_ID 0x0002
+
+#define IFCVF_NET_SUPPORTED_FEATURES \
((1ULL << VIRTIO_NET_F_MAC) | \
(1ULL << VIRTIO_F_ANY_LAYOUT) | \
(1ULL << VIRTIO_F_VERSION_1) | \
@@ -78,6 +89,8 @@ struct ifcvf_hw {
void __iomem *notify_base;
u32 notify_off_multiplier;
u64 req_features;
+ u64 hw_features;
+ u32 dev_type;
struct virtio_pci_common_cfg __iomem *common_cfg;
void __iomem *net_cfg;
struct vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2];
@@ -116,7 +129,10 @@ void ifcvf_set_status(struct ifcvf_hw *hw, u8 status);
void io_write64_twopart(u64 val, u32 *lo, u32 *hi);
void ifcvf_reset(struct ifcvf_hw *hw);
u64 ifcvf_get_features(struct ifcvf_hw *hw);
+u64 ifcvf_get_hw_features(struct ifcvf_hw *hw);
+int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features);
u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid);
int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num);
struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw);
+int ifcvf_probed_virtio_net(struct ifcvf_hw *hw);
#endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index d555a6a5d1ba..ab0ab5cf0f6e 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -14,7 +14,6 @@
#include <linux/sysfs.h>
#include "ifcvf_base.h"
-#define VERSION_STRING "0.1"
#define DRIVER_AUTHOR "Intel Corporation"
#define IFCVF_DRIVER_NAME "ifcvf"
@@ -169,10 +168,23 @@ static struct ifcvf_hw *vdpa_to_vf(struct vdpa_device *vdpa_dev)
static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev)
{
+ struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+ struct pci_dev *pdev = adapter->pdev;
+
u64 features;
- features = ifcvf_get_features(vf) & IFCVF_SUPPORTED_FEATURES;
+ switch (vf->dev_type) {
+ case VIRTIO_ID_NET:
+ features = ifcvf_get_features(vf) & IFCVF_NET_SUPPORTED_FEATURES;
+ break;
+ case VIRTIO_ID_BLOCK:
+ features = ifcvf_get_features(vf);
+ break;
+ default:
+ features = 0;
+ IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", vf->dev_type);
+ }
return features;
}
@@ -180,6 +192,11 @@ static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev)
static int ifcvf_vdpa_set_features(struct vdpa_device *vdpa_dev, u64 features)
{
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+ int ret;
+
+ ret = ifcvf_verify_min_features(vf, features);
+ if (ret)
+ return ret;
vf->req_features = features;
@@ -319,12 +336,17 @@ static u32 ifcvf_vdpa_get_generation(struct vdpa_device *vdpa_dev)
static u32 ifcvf_vdpa_get_device_id(struct vdpa_device *vdpa_dev)
{
- return VIRTIO_ID_NET;
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+ return vf->dev_type;
}
static u32 ifcvf_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev)
{
- return IFCVF_SUBSYS_VENDOR_ID;
+ struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
+ struct pci_dev *pdev = adapter->pdev;
+
+ return pdev->subsystem_vendor;
}
static u32 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev)
@@ -332,6 +354,28 @@ static u32 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev)
return IFCVF_QUEUE_ALIGNMENT;
}
+static size_t ifcvf_vdpa_get_config_size(struct vdpa_device *vdpa_dev)
+{
+ struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+ struct pci_dev *pdev = adapter->pdev;
+ size_t size;
+
+ switch (vf->dev_type) {
+ case VIRTIO_ID_NET:
+ size = sizeof(struct virtio_net_config);
+ break;
+ case VIRTIO_ID_BLOCK:
+ size = sizeof(struct virtio_blk_config);
+ break;
+ default:
+ size = 0;
+ IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", vf->dev_type);
+ }
+
+ return size;
+}
+
static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev,
unsigned int offset,
void *buf, unsigned int len)
@@ -392,6 +436,7 @@ static const struct vdpa_config_ops ifc_vdpa_ops = {
.get_device_id = ifcvf_vdpa_get_device_id,
.get_vendor_id = ifcvf_vdpa_get_vendor_id,
.get_vq_align = ifcvf_vdpa_get_vq_align,
+ .get_config_size = ifcvf_vdpa_get_config_size,
.get_config = ifcvf_vdpa_get_config,
.set_config = ifcvf_vdpa_set_config,
.set_config_cb = ifcvf_vdpa_set_config_cb,
@@ -441,6 +486,19 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
pci_set_drvdata(pdev, adapter);
vf = &adapter->vf;
+
+ /* This drirver drives both modern virtio devices and transitional
+ * devices in modern mode.
+ * vDPA requires feature bit VIRTIO_F_ACCESS_PLATFORM,
+ * so legacy devices and transitional devices in legacy
+ * mode will not work for vDPA, this driver will not
+ * drive devices with legacy interface.
+ */
+ if (pdev->device < 0x1040)
+ vf->dev_type = pdev->subsystem_device;
+ else
+ vf->dev_type = pdev->device - 0x1040;
+
vf->base = pcim_iomap_table(pdev);
adapter->pdev = pdev;
@@ -455,6 +513,8 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++)
vf->vring[i].irq = -EINVAL;
+ vf->hw_features = ifcvf_get_hw_features(vf);
+
ret = vdpa_register_device(&adapter->vdpa, IFCVF_MAX_QUEUE_PAIRS * 2);
if (ret) {
IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus");
@@ -476,10 +536,19 @@ static void ifcvf_remove(struct pci_dev *pdev)
}
static struct pci_device_id ifcvf_pci_ids[] = {
- { PCI_DEVICE_SUB(IFCVF_VENDOR_ID,
- IFCVF_DEVICE_ID,
- IFCVF_SUBSYS_VENDOR_ID,
- IFCVF_SUBSYS_DEVICE_ID) },
+ { PCI_DEVICE_SUB(N3000_VENDOR_ID,
+ N3000_DEVICE_ID,
+ N3000_SUBSYS_VENDOR_ID,
+ N3000_SUBSYS_DEVICE_ID) },
+ { PCI_DEVICE_SUB(C5000X_PL_VENDOR_ID,
+ C5000X_PL_DEVICE_ID,
+ C5000X_PL_SUBSYS_VENDOR_ID,
+ C5000X_PL_SUBSYS_DEVICE_ID) },
+ { PCI_DEVICE_SUB(C5000X_PL_BLK_VENDOR_ID,
+ C5000X_PL_BLK_DEVICE_ID,
+ C5000X_PL_BLK_SUBSYS_VENDOR_ID,
+ C5000X_PL_BLK_SUBSYS_DEVICE_ID) },
+
{ 0 },
};
MODULE_DEVICE_TABLE(pci, ifcvf_pci_ids);
@@ -494,4 +563,3 @@ static struct pci_driver ifcvf_driver = {
module_pci_driver(ifcvf_driver);
MODULE_LICENSE("GPL v2");
-MODULE_VERSION(VERSION_STRING);
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 4d2809c7d4e3..189e4385df40 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -1809,6 +1809,11 @@ err_setup:
ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
}
+static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
+{
+ return sizeof(struct virtio_net_config);
+}
+
static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
unsigned int len)
{
@@ -1895,6 +1900,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
.get_vendor_id = mlx5_vdpa_get_vendor_id,
.get_status = mlx5_vdpa_get_status,
.set_status = mlx5_vdpa_set_status,
+ .get_config_size = mlx5_vdpa_get_config_size,
.get_config = mlx5_vdpa_get_config,
.set_config = mlx5_vdpa_set_config,
.get_generation = mlx5_vdpa_get_generation,
@@ -1974,23 +1980,32 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
}
}
-static int mlx5v_probe(struct auxiliary_device *adev,
- const struct auxiliary_device_id *id)
+struct mlx5_vdpa_mgmtdev {
+ struct vdpa_mgmt_dev mgtdev;
+ struct mlx5_adev *madev;
+ struct mlx5_vdpa_net *ndev;
+};
+
+static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
{
- struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
- struct mlx5_core_dev *mdev = madev->mdev;
+ struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
struct virtio_net_config *config;
struct mlx5_vdpa_dev *mvdev;
struct mlx5_vdpa_net *ndev;
+ struct mlx5_core_dev *mdev;
u32 max_vqs;
int err;
+ if (mgtdev->ndev)
+ return -ENOSPC;
+
+ mdev = mgtdev->madev->mdev;
/* we save one virtqueue for control virtqueue should we require it */
max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
- NULL);
+ name);
if (IS_ERR(ndev))
return PTR_ERR(ndev);
@@ -2017,11 +2032,12 @@ static int mlx5v_probe(struct auxiliary_device *adev,
if (err)
goto err_res;
- err = vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
+ mvdev->vdev.mdev = &mgtdev->mgtdev;
+ err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
if (err)
goto err_reg;
- dev_set_drvdata(&adev->dev, ndev);
+ mgtdev->ndev = ndev;
return 0;
err_reg:
@@ -2034,11 +2050,62 @@ err_mtu:
return err;
}
+static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
+{
+ struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
+
+ _vdpa_unregister_device(dev);
+ mgtdev->ndev = NULL;
+}
+
+static const struct vdpa_mgmtdev_ops mdev_ops = {
+ .dev_add = mlx5_vdpa_dev_add,
+ .dev_del = mlx5_vdpa_dev_del,
+};
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+static int mlx5v_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+
+{
+ struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = madev->mdev;
+ struct mlx5_vdpa_mgmtdev *mgtdev;
+ int err;
+
+ mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
+ if (!mgtdev)
+ return -ENOMEM;
+
+ mgtdev->mgtdev.ops = &mdev_ops;
+ mgtdev->mgtdev.device = mdev->device;
+ mgtdev->mgtdev.id_table = id_table;
+ mgtdev->madev = madev;
+
+ err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
+ if (err)
+ goto reg_err;
+
+ dev_set_drvdata(&adev->dev, mgtdev);
+
+ return 0;
+
+reg_err:
+ kfree(mgtdev);
+ return err;
+}
+
static void mlx5v_remove(struct auxiliary_device *adev)
{
- struct mlx5_vdpa_dev *mvdev = dev_get_drvdata(&adev->dev);
+ struct mlx5_vdpa_mgmtdev *mgtdev;
- vdpa_unregister_device(&mvdev->vdev);
+ mgtdev = dev_get_drvdata(&adev->dev);
+ vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
+ kfree(mgtdev);
}
static const struct auxiliary_device_id mlx5v_id_table[] = {
diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index 5cffce67cab0..bb3f1d1f0422 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -75,8 +75,8 @@ static void vdpa_release_dev(struct device *d)
* Driver should use vdpa_alloc_device() wrapper macro instead of
* using this directly.
*
- * Returns an error when parent/config/dma_dev is not set or fail to get
- * ida.
+ * Return: Returns an error when parent/config/dma_dev is not set or fail to get
+ * ida.
*/
struct vdpa_device *__vdpa_alloc_device(struct device *parent,
const struct vdpa_config_ops *config,
@@ -157,7 +157,7 @@ static int __vdpa_register_device(struct vdpa_device *vdev, int nvqs)
* @vdev: the vdpa device to be registered to vDPA bus
* @nvqs: number of virtqueues supported by this device
*
- * Returns an error when fail to add device to vDPA bus
+ * Return: Returns an error when fail to add device to vDPA bus
*/
int _vdpa_register_device(struct vdpa_device *vdev, int nvqs)
{
@@ -174,7 +174,7 @@ EXPORT_SYMBOL_GPL(_vdpa_register_device);
* @vdev: the vdpa device to be registered to vDPA bus
* @nvqs: number of virtqueues supported by this device
*
- * Returns an error when fail to add to vDPA bus
+ * Return: Returns an error when fail to add to vDPA bus
*/
int vdpa_register_device(struct vdpa_device *vdev, int nvqs)
{
@@ -218,7 +218,7 @@ EXPORT_SYMBOL_GPL(vdpa_unregister_device);
* @drv: the vdpa device driver to be registered
* @owner: module owner of the driver
*
- * Returns an err when fail to do the registration
+ * Return: Returns an err when fail to do the registration
*/
int __vdpa_register_driver(struct vdpa_driver *drv, struct module *owner)
{
@@ -245,6 +245,8 @@ EXPORT_SYMBOL_GPL(vdpa_unregister_driver);
* @mdev: Pointer to vdpa management device
* vdpa_mgmtdev_register() register a vdpa management device which supports
* vdpa device management.
+ * Return: Returns 0 on success or failure when required callback ops are not
+ * initialized.
*/
int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev)
{
diff --git a/drivers/vdpa/vdpa_sim/Makefile b/drivers/vdpa/vdpa_sim/Makefile
index 79d4536d347e..d458103302f2 100644
--- a/drivers/vdpa/vdpa_sim/Makefile
+++ b/drivers/vdpa/vdpa_sim/Makefile
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o
obj-$(CONFIG_VDPA_SIM_NET) += vdpa_sim_net.o
+obj-$(CONFIG_VDPA_SIM_BLOCK) += vdpa_sim_blk.o
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 5b6b2f87d40c..98f793bc9376 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -17,6 +17,7 @@
#include <linux/vringh.h>
#include <linux/vdpa.h>
#include <linux/vhost_iotlb.h>
+#include <linux/iova.h>
#include "vdpa_sim.h"
@@ -128,30 +129,57 @@ static int dir_to_perm(enum dma_data_direction dir)
return perm;
}
+static dma_addr_t vdpasim_map_range(struct vdpasim *vdpasim, phys_addr_t paddr,
+ size_t size, unsigned int perm)
+{
+ struct iova *iova;
+ dma_addr_t dma_addr;
+ int ret;
+
+ /* We set the limit_pfn to the maximum (ULONG_MAX - 1) */
+ iova = alloc_iova(&vdpasim->iova, size, ULONG_MAX - 1, true);
+ if (!iova)
+ return DMA_MAPPING_ERROR;
+
+ dma_addr = iova_dma_addr(&vdpasim->iova, iova);
+
+ spin_lock(&vdpasim->iommu_lock);
+ ret = vhost_iotlb_add_range(vdpasim->iommu, (u64)dma_addr,
+ (u64)dma_addr + size - 1, (u64)paddr, perm);
+ spin_unlock(&vdpasim->iommu_lock);
+
+ if (ret) {
+ __free_iova(&vdpasim->iova, iova);
+ return DMA_MAPPING_ERROR;
+ }
+
+ return dma_addr;
+}
+
+static void vdpasim_unmap_range(struct vdpasim *vdpasim, dma_addr_t dma_addr,
+ size_t size)
+{
+ spin_lock(&vdpasim->iommu_lock);
+ vhost_iotlb_del_range(vdpasim->iommu, (u64)dma_addr,
+ (u64)dma_addr + size - 1);
+ spin_unlock(&vdpasim->iommu_lock);
+
+ free_iova(&vdpasim->iova, iova_pfn(&vdpasim->iova, dma_addr));
+}
+
static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
unsigned long attrs)
{
struct vdpasim *vdpasim = dev_to_sim(dev);
- struct vhost_iotlb *iommu = vdpasim->iommu;
- u64 pa = (page_to_pfn(page) << PAGE_SHIFT) + offset;
- int ret, perm = dir_to_perm(dir);
+ phys_addr_t paddr = page_to_phys(page) + offset;
+ int perm = dir_to_perm(dir);
if (perm < 0)
return DMA_MAPPING_ERROR;
- /* For simplicity, use identical mapping to avoid e.g iova
- * allocator.
- */
- spin_lock(&vdpasim->iommu_lock);
- ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1,
- pa, dir_to_perm(dir));
- spin_unlock(&vdpasim->iommu_lock);
- if (ret)
- return DMA_MAPPING_ERROR;
-
- return (dma_addr_t)(pa);
+ return vdpasim_map_range(vdpasim, paddr, size, perm);
}
static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr,
@@ -159,12 +187,8 @@ static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr,
unsigned long attrs)
{
struct vdpasim *vdpasim = dev_to_sim(dev);
- struct vhost_iotlb *iommu = vdpasim->iommu;
- spin_lock(&vdpasim->iommu_lock);
- vhost_iotlb_del_range(iommu, (u64)dma_addr,
- (u64)dma_addr + size - 1);
- spin_unlock(&vdpasim->iommu_lock);
+ vdpasim_unmap_range(vdpasim, dma_addr, size);
}
static void *vdpasim_alloc_coherent(struct device *dev, size_t size,
@@ -172,27 +196,22 @@ static void *vdpasim_alloc_coherent(struct device *dev, size_t size,
unsigned long attrs)
{
struct vdpasim *vdpasim = dev_to_sim(dev);
- struct vhost_iotlb *iommu = vdpasim->iommu;
- void *addr = kmalloc(size, flag);
- int ret;
+ phys_addr_t paddr;
+ void *addr;
- spin_lock(&vdpasim->iommu_lock);
+ addr = kmalloc(size, flag);
if (!addr) {
*dma_addr = DMA_MAPPING_ERROR;
- } else {
- u64 pa = virt_to_phys(addr);
-
- ret = vhost_iotlb_add_range(iommu, (u64)pa,
- (u64)pa + size - 1,
- pa, VHOST_MAP_RW);
- if (ret) {
- *dma_addr = DMA_MAPPING_ERROR;
- kfree(addr);
- addr = NULL;
- } else
- *dma_addr = (dma_addr_t)pa;
+ return NULL;
+ }
+
+ paddr = virt_to_phys(addr);
+
+ *dma_addr = vdpasim_map_range(vdpasim, paddr, size, VHOST_MAP_RW);
+ if (*dma_addr == DMA_MAPPING_ERROR) {
+ kfree(addr);
+ return NULL;
}
- spin_unlock(&vdpasim->iommu_lock);
return addr;
}
@@ -202,14 +221,10 @@ static void vdpasim_free_coherent(struct device *dev, size_t size,
unsigned long attrs)
{
struct vdpasim *vdpasim = dev_to_sim(dev);
- struct vhost_iotlb *iommu = vdpasim->iommu;
- spin_lock(&vdpasim->iommu_lock);
- vhost_iotlb_del_range(iommu, (u64)dma_addr,
- (u64)dma_addr + size - 1);
- spin_unlock(&vdpasim->iommu_lock);
+ vdpasim_unmap_range(vdpasim, dma_addr, size);
- kfree(phys_to_virt((uintptr_t)dma_addr));
+ kfree(vaddr);
}
static const struct dma_map_ops vdpasim_dma_ops = {
@@ -269,7 +284,15 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
goto err_iommu;
for (i = 0; i < dev_attr->nvqs; i++)
- vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu);
+ vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu,
+ &vdpasim->iommu_lock);
+
+ ret = iova_cache_get();
+ if (ret)
+ goto err_iommu;
+
+ /* For simplicity we use an IOVA allocator with byte granularity */
+ init_iova_domain(&vdpasim->iova, 1, 0);
vdpasim->vdpa.dma_dev = dev;
@@ -439,6 +462,13 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
spin_unlock(&vdpasim->lock);
}
+static size_t vdpasim_get_config_size(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ return vdpasim->dev_attr.config_size;
+}
+
static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset,
void *buf, unsigned int len)
{
@@ -539,8 +569,17 @@ static int vdpasim_dma_unmap(struct vdpa_device *vdpa, u64 iova, u64 size)
static void vdpasim_free(struct vdpa_device *vdpa)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ int i;
cancel_work_sync(&vdpasim->work);
+
+ for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
+ vringh_kiov_cleanup(&vdpasim->vqs[i].out_iov);
+ vringh_kiov_cleanup(&vdpasim->vqs[i].in_iov);
+ }
+
+ put_iova_domain(&vdpasim->iova);
+ iova_cache_put();
kvfree(vdpasim->buffer);
if (vdpasim->iommu)
vhost_iotlb_free(vdpasim->iommu);
@@ -566,6 +605,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
.get_vendor_id = vdpasim_get_vendor_id,
.get_status = vdpasim_get_status,
.set_status = vdpasim_set_status,
+ .get_config_size = vdpasim_get_config_size,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
.get_generation = vdpasim_get_generation,
@@ -593,6 +633,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
.get_vendor_id = vdpasim_get_vendor_id,
.get_status = vdpasim_get_status,
.set_status = vdpasim_set_status,
+ .get_config_size = vdpasim_get_config_size,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
.get_generation = vdpasim_get_generation,
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
index 6d75444f9948..cd58e888bcf3 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
@@ -6,6 +6,7 @@
#ifndef _VDPA_SIM_H
#define _VDPA_SIM_H
+#include <linux/iova.h>
#include <linux/vringh.h>
#include <linux/vdpa.h>
#include <linux/virtio_byteorder.h>
@@ -57,6 +58,7 @@ struct vdpasim {
/* virtio config according to device type */
void *config;
struct vhost_iotlb *iommu;
+ struct iova_domain iova;
void *buffer;
u32 status;
u32 generation;
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
new file mode 100644
index 000000000000..5bfe1c281645
--- /dev/null
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VDPA simulator for block device.
+ *
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2021, Red Hat Inc. All rights reserved.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/blkdev.h>
+#include <linux/vringh.h>
+#include <linux/vdpa.h>
+#include <linux/blkdev.h>
+#include <uapi/linux/virtio_blk.h>
+
+#include "vdpa_sim.h"
+
+#define DRV_VERSION "0.1"
+#define DRV_AUTHOR "Max Gurtovoy <mgurtovoy@nvidia.com>"
+#define DRV_DESC "vDPA Device Simulator for block device"
+#define DRV_LICENSE "GPL v2"
+
+#define VDPASIM_BLK_FEATURES (VDPASIM_FEATURES | \
+ (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \
+ (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+ (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+ (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \
+ (1ULL << VIRTIO_BLK_F_MQ))
+
+#define VDPASIM_BLK_CAPACITY 0x40000
+#define VDPASIM_BLK_SIZE_MAX 0x1000
+#define VDPASIM_BLK_SEG_MAX 32
+#define VDPASIM_BLK_VQ_NUM 1
+
+static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim";
+
+static bool vdpasim_blk_check_range(u64 start_sector, size_t range_size)
+{
+ u64 range_sectors = range_size >> SECTOR_SHIFT;
+
+ if (range_size > VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)
+ return false;
+
+ if (start_sector > VDPASIM_BLK_CAPACITY)
+ return false;
+
+ if (range_sectors > VDPASIM_BLK_CAPACITY - start_sector)
+ return false;
+
+ return true;
+}
+
+/* Returns 'true' if the request is handled (with or without an I/O error)
+ * and the status is correctly written in the last byte of the 'in iov',
+ * 'false' otherwise.
+ */
+static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
+ struct vdpasim_virtqueue *vq)
+{
+ size_t pushed = 0, to_pull, to_push;
+ struct virtio_blk_outhdr hdr;
+ ssize_t bytes;
+ loff_t offset;
+ u64 sector;
+ u8 status;
+ u32 type;
+ int ret;
+
+ ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov,
+ &vq->head, GFP_ATOMIC);
+ if (ret != 1)
+ return false;
+
+ if (vq->out_iov.used < 1 || vq->in_iov.used < 1) {
+ dev_err(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n",
+ vq->out_iov.used, vq->in_iov.used);
+ return false;
+ }
+
+ if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) {
+ dev_err(&vdpasim->vdpa.dev, "request in header too short\n");
+ return false;
+ }
+
+ /* The last byte is the status and we checked if the last iov has
+ * enough room for it.
+ */
+ to_push = vringh_kiov_length(&vq->in_iov) - 1;
+
+ to_pull = vringh_kiov_length(&vq->out_iov);
+
+ bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr,
+ sizeof(hdr));
+ if (bytes != sizeof(hdr)) {
+ dev_err(&vdpasim->vdpa.dev, "request out header too short\n");
+ return false;
+ }
+
+ to_pull -= bytes;
+
+ type = vdpasim32_to_cpu(vdpasim, hdr.type);
+ sector = vdpasim64_to_cpu(vdpasim, hdr.sector);
+ offset = sector << SECTOR_SHIFT;
+ status = VIRTIO_BLK_S_OK;
+
+ switch (type) {
+ case VIRTIO_BLK_T_IN:
+ if (!vdpasim_blk_check_range(sector, to_push)) {
+ dev_err(&vdpasim->vdpa.dev,
+ "reading over the capacity - offset: 0x%llx len: 0x%zx\n",
+ offset, to_push);
+ status = VIRTIO_BLK_S_IOERR;
+ break;
+ }
+
+ bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
+ vdpasim->buffer + offset,
+ to_push);
+ if (bytes < 0) {
+ dev_err(&vdpasim->vdpa.dev,
+ "vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
+ bytes, offset, to_push);
+ status = VIRTIO_BLK_S_IOERR;
+ break;
+ }
+
+ pushed += bytes;
+ break;
+
+ case VIRTIO_BLK_T_OUT:
+ if (!vdpasim_blk_check_range(sector, to_pull)) {
+ dev_err(&vdpasim->vdpa.dev,
+ "writing over the capacity - offset: 0x%llx len: 0x%zx\n",
+ offset, to_pull);
+ status = VIRTIO_BLK_S_IOERR;
+ break;
+ }
+
+ bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov,
+ vdpasim->buffer + offset,
+ to_pull);
+ if (bytes < 0) {
+ dev_err(&vdpasim->vdpa.dev,
+ "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
+ bytes, offset, to_pull);
+ status = VIRTIO_BLK_S_IOERR;
+ break;
+ }
+ break;
+
+ case VIRTIO_BLK_T_GET_ID:
+ bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
+ vdpasim_blk_id,
+ VIRTIO_BLK_ID_BYTES);
+ if (bytes < 0) {
+ dev_err(&vdpasim->vdpa.dev,
+ "vringh_iov_push_iotlb() error: %zd\n", bytes);
+ status = VIRTIO_BLK_S_IOERR;
+ break;
+ }
+
+ pushed += bytes;
+ break;
+
+ default:
+ dev_warn(&vdpasim->vdpa.dev,
+ "Unsupported request type %d\n", type);
+ status = VIRTIO_BLK_S_IOERR;
+ break;
+ }
+
+ /* If some operations fail, we need to skip the remaining bytes
+ * to put the status in the last byte
+ */
+ if (to_push - pushed > 0)
+ vringh_kiov_advance(&vq->in_iov, to_push - pushed);
+
+ /* Last byte is the status */
+ bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1);
+ if (bytes != 1)
+ return false;
+
+ pushed += bytes;
+
+ /* Make sure data is wrote before advancing index */
+ smp_wmb();
+
+ vringh_complete_iotlb(&vq->vring, vq->head, pushed);
+
+ return true;
+}
+
+static void vdpasim_blk_work(struct work_struct *work)
+{
+ struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
+ int i;
+
+ spin_lock(&vdpasim->lock);
+
+ if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
+ goto out;
+
+ for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) {
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[i];
+
+ if (!vq->ready)
+ continue;
+
+ while (vdpasim_blk_handle_req(vdpasim, vq)) {
+ /* Make sure used is visible before rasing the interrupt. */
+ smp_wmb();
+
+ local_bh_disable();
+ if (vringh_need_notify_iotlb(&vq->vring) > 0)
+ vringh_notify(&vq->vring);
+ local_bh_enable();
+ }
+ }
+out:
+ spin_unlock(&vdpasim->lock);
+}
+
+static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
+{
+ struct virtio_blk_config *blk_config = config;
+
+ memset(config, 0, sizeof(struct virtio_blk_config));
+
+ blk_config->capacity = cpu_to_vdpasim64(vdpasim, VDPASIM_BLK_CAPACITY);
+ blk_config->size_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SIZE_MAX);
+ blk_config->seg_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SEG_MAX);
+ blk_config->num_queues = cpu_to_vdpasim16(vdpasim, VDPASIM_BLK_VQ_NUM);
+ blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1);
+ blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1);
+ blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
+}
+
+static void vdpasim_blk_mgmtdev_release(struct device *dev)
+{
+}
+
+static struct device vdpasim_blk_mgmtdev = {
+ .init_name = "vdpasim_blk",
+ .release = vdpasim_blk_mgmtdev_release,
+};
+
+static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
+{
+ struct vdpasim_dev_attr dev_attr = {};
+ struct vdpasim *simdev;
+ int ret;
+
+ dev_attr.mgmt_dev = mdev;
+ dev_attr.name = name;
+ dev_attr.id = VIRTIO_ID_BLOCK;
+ dev_attr.supported_features = VDPASIM_BLK_FEATURES;
+ dev_attr.nvqs = VDPASIM_BLK_VQ_NUM;
+ dev_attr.config_size = sizeof(struct virtio_blk_config);
+ dev_attr.get_config = vdpasim_blk_get_config;
+ dev_attr.work_fn = vdpasim_blk_work;
+ dev_attr.buffer_size = VDPASIM_BLK_CAPACITY << SECTOR_SHIFT;
+
+ simdev = vdpasim_create(&dev_attr);
+ if (IS_ERR(simdev))
+ return PTR_ERR(simdev);
+
+ ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM);
+ if (ret)
+ goto put_dev;
+
+ return 0;
+
+put_dev:
+ put_device(&simdev->vdpa.dev);
+ return ret;
+}
+
+static void vdpasim_blk_dev_del(struct vdpa_mgmt_dev *mdev,
+ struct vdpa_device *dev)
+{
+ struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa);
+
+ _vdpa_unregister_device(&simdev->vdpa);
+}
+
+static const struct vdpa_mgmtdev_ops vdpasim_blk_mgmtdev_ops = {
+ .dev_add = vdpasim_blk_dev_add,
+ .dev_del = vdpasim_blk_dev_del
+};
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+static struct vdpa_mgmt_dev mgmt_dev = {
+ .device = &vdpasim_blk_mgmtdev,
+ .id_table = id_table,
+ .ops = &vdpasim_blk_mgmtdev_ops,
+};
+
+static int __init vdpasim_blk_init(void)
+{
+ int ret;
+
+ ret = device_register(&vdpasim_blk_mgmtdev);
+ if (ret)
+ return ret;
+
+ ret = vdpa_mgmtdev_register(&mgmt_dev);
+ if (ret)
+ goto parent_err;
+
+ return 0;
+
+parent_err:
+ device_unregister(&vdpasim_blk_mgmtdev);
+ return ret;
+}
+
+static void __exit vdpasim_blk_exit(void)
+{
+ vdpa_mgmtdev_unregister(&mgmt_dev);
+ device_unregister(&vdpasim_blk_mgmtdev);
+}
+
+module_init(vdpasim_blk_init)
+module_exit(vdpasim_blk_exit)
+
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE(DRV_LICENSE);
+MODULE_AUTHOR(DRV_AUTHOR);
+MODULE_DESCRIPTION(DRV_DESC);
diff --git a/drivers/vdpa/virtio_pci/Makefile b/drivers/vdpa/virtio_pci/Makefile
new file mode 100644
index 000000000000..231088d3af7d
--- /dev/null
+++ b/drivers/vdpa/virtio_pci/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_VP_VDPA) += vp_vdpa.o
diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c
new file mode 100644
index 000000000000..c76ebb531212
--- /dev/null
+++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
@@ -0,0 +1,484 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vDPA bridge driver for modern virtio-pci device
+ *
+ * Copyright (c) 2020, Red Hat Inc. All rights reserved.
+ * Author: Jason Wang <jasowang@redhat.com>
+ *
+ * Based on virtio_pci_modern.c.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/vdpa.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_pci.h>
+#include <linux/virtio_pci_modern.h>
+
+#define VP_VDPA_QUEUE_MAX 256
+#define VP_VDPA_DRIVER_NAME "vp_vdpa"
+#define VP_VDPA_NAME_SIZE 256
+
+struct vp_vring {
+ void __iomem *notify;
+ char msix_name[VP_VDPA_NAME_SIZE];
+ struct vdpa_callback cb;
+ resource_size_t notify_pa;
+ int irq;
+};
+
+struct vp_vdpa {
+ struct vdpa_device vdpa;
+ struct virtio_pci_modern_device mdev;
+ struct vp_vring *vring;
+ struct vdpa_callback config_cb;
+ char msix_name[VP_VDPA_NAME_SIZE];
+ int config_irq;
+ int queues;
+ int vectors;
+};
+
+static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa)
+{
+ return container_of(vdpa, struct vp_vdpa, vdpa);
+}
+
+static struct virtio_pci_modern_device *vdpa_to_mdev(struct vdpa_device *vdpa)
+{
+ struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+
+ return &vp_vdpa->mdev;
+}
+
+static u64 vp_vdpa_get_features(struct vdpa_device *vdpa)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ return vp_modern_get_features(mdev);
+}
+
+static int vp_vdpa_set_features(struct vdpa_device *vdpa, u64 features)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ vp_modern_set_features(mdev, features);
+
+ return 0;
+}
+
+static u8 vp_vdpa_get_status(struct vdpa_device *vdpa)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ return vp_modern_get_status(mdev);
+}
+
+static void vp_vdpa_free_irq(struct vp_vdpa *vp_vdpa)
+{
+ struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev;
+ struct pci_dev *pdev = mdev->pci_dev;
+ int i;
+
+ for (i = 0; i < vp_vdpa->queues; i++) {
+ if (vp_vdpa->vring[i].irq != VIRTIO_MSI_NO_VECTOR) {
+ vp_modern_queue_vector(mdev, i, VIRTIO_MSI_NO_VECTOR);
+ devm_free_irq(&pdev->dev, vp_vdpa->vring[i].irq,
+ &vp_vdpa->vring[i]);
+ vp_vdpa->vring[i].irq = VIRTIO_MSI_NO_VECTOR;
+ }
+ }
+
+ if (vp_vdpa->config_irq != VIRTIO_MSI_NO_VECTOR) {
+ vp_modern_config_vector(mdev, VIRTIO_MSI_NO_VECTOR);
+ devm_free_irq(&pdev->dev, vp_vdpa->config_irq, vp_vdpa);
+ vp_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR;
+ }
+
+ if (vp_vdpa->vectors) {
+ pci_free_irq_vectors(pdev);
+ vp_vdpa->vectors = 0;
+ }
+}
+
+static irqreturn_t vp_vdpa_vq_handler(int irq, void *arg)
+{
+ struct vp_vring *vring = arg;
+
+ if (vring->cb.callback)
+ return vring->cb.callback(vring->cb.private);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t vp_vdpa_config_handler(int irq, void *arg)
+{
+ struct vp_vdpa *vp_vdpa = arg;
+
+ if (vp_vdpa->config_cb.callback)
+ return vp_vdpa->config_cb.callback(vp_vdpa->config_cb.private);
+
+ return IRQ_HANDLED;
+}
+
+static int vp_vdpa_request_irq(struct vp_vdpa *vp_vdpa)
+{
+ struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev;
+ struct pci_dev *pdev = mdev->pci_dev;
+ int i, ret, irq;
+ int queues = vp_vdpa->queues;
+ int vectors = queues + 1;
+
+ ret = pci_alloc_irq_vectors(pdev, vectors, vectors, PCI_IRQ_MSIX);
+ if (ret != vectors) {
+ dev_err(&pdev->dev,
+ "vp_vdpa: fail to allocate irq vectors want %d but %d\n",
+ vectors, ret);
+ return ret;
+ }
+
+ vp_vdpa->vectors = vectors;
+
+ for (i = 0; i < queues; i++) {
+ snprintf(vp_vdpa->vring[i].msix_name, VP_VDPA_NAME_SIZE,
+ "vp-vdpa[%s]-%d\n", pci_name(pdev), i);
+ irq = pci_irq_vector(pdev, i);
+ ret = devm_request_irq(&pdev->dev, irq,
+ vp_vdpa_vq_handler,
+ 0, vp_vdpa->vring[i].msix_name,
+ &vp_vdpa->vring[i]);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "vp_vdpa: fail to request irq for vq %d\n", i);
+ goto err;
+ }
+ vp_modern_queue_vector(mdev, i, i);
+ vp_vdpa->vring[i].irq = irq;
+ }
+
+ snprintf(vp_vdpa->msix_name, VP_VDPA_NAME_SIZE, "vp-vdpa[%s]-config\n",
+ pci_name(pdev));
+ irq = pci_irq_vector(pdev, queues);
+ ret = devm_request_irq(&pdev->dev, irq, vp_vdpa_config_handler, 0,
+ vp_vdpa->msix_name, vp_vdpa);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "vp_vdpa: fail to request irq for vq %d\n", i);
+ goto err;
+ }
+ vp_modern_config_vector(mdev, queues);
+ vp_vdpa->config_irq = irq;
+
+ return 0;
+err:
+ vp_vdpa_free_irq(vp_vdpa);
+ return ret;
+}
+
+static void vp_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
+{
+ struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+ struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev;
+ u8 s = vp_vdpa_get_status(vdpa);
+
+ if (status & VIRTIO_CONFIG_S_DRIVER_OK &&
+ !(s & VIRTIO_CONFIG_S_DRIVER_OK)) {
+ vp_vdpa_request_irq(vp_vdpa);
+ }
+
+ vp_modern_set_status(mdev, status);
+
+ if (!(status & VIRTIO_CONFIG_S_DRIVER_OK) &&
+ (s & VIRTIO_CONFIG_S_DRIVER_OK))
+ vp_vdpa_free_irq(vp_vdpa);
+}
+
+static u16 vp_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
+{
+ return VP_VDPA_QUEUE_MAX;
+}
+
+static int vp_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 qid,
+ struct vdpa_vq_state *state)
+{
+ /* Note that this is not supported by virtio specification, so
+ * we return -EOPNOTSUPP here. This means we can't support live
+ * migration, vhost device start/stop.
+ */
+ return -EOPNOTSUPP;
+}
+
+static int vp_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 qid,
+ const struct vdpa_vq_state *state)
+{
+ /* Note that this is not supported by virtio specification, so
+ * we return -ENOPOTSUPP here. This means we can't support live
+ * migration, vhost device start/stop.
+ */
+ return -EOPNOTSUPP;
+}
+
+static void vp_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 qid,
+ struct vdpa_callback *cb)
+{
+ struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+
+ vp_vdpa->vring[qid].cb = *cb;
+}
+
+static void vp_vdpa_set_vq_ready(struct vdpa_device *vdpa,
+ u16 qid, bool ready)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ vp_modern_set_queue_enable(mdev, qid, ready);
+}
+
+static bool vp_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 qid)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ return vp_modern_get_queue_enable(mdev, qid);
+}
+
+static void vp_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 qid,
+ u32 num)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ vp_modern_set_queue_size(mdev, qid, num);
+}
+
+static int vp_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 qid,
+ u64 desc_area, u64 driver_area,
+ u64 device_area)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ vp_modern_queue_address(mdev, qid, desc_area,
+ driver_area, device_area);
+
+ return 0;
+}
+
+static void vp_vdpa_kick_vq(struct vdpa_device *vdpa, u16 qid)
+{
+ struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+
+ vp_iowrite16(qid, vp_vdpa->vring[qid].notify);
+}
+
+static u32 vp_vdpa_get_generation(struct vdpa_device *vdpa)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ return vp_modern_generation(mdev);
+}
+
+static u32 vp_vdpa_get_device_id(struct vdpa_device *vdpa)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ return mdev->id.device;
+}
+
+static u32 vp_vdpa_get_vendor_id(struct vdpa_device *vdpa)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ return mdev->id.vendor;
+}
+
+static u32 vp_vdpa_get_vq_align(struct vdpa_device *vdpa)
+{
+ return PAGE_SIZE;
+}
+
+static size_t vp_vdpa_get_config_size(struct vdpa_device *vdpa)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ return mdev->device_len;
+}
+
+static void vp_vdpa_get_config(struct vdpa_device *vdpa,
+ unsigned int offset,
+ void *buf, unsigned int len)
+{
+ struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+ struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev;
+ u8 old, new;
+ u8 *p;
+ int i;
+
+ do {
+ old = vp_ioread8(&mdev->common->config_generation);
+ p = buf;
+ for (i = 0; i < len; i++)
+ *p++ = vp_ioread8(mdev->device + offset + i);
+
+ new = vp_ioread8(&mdev->common->config_generation);
+ } while (old != new);
+}
+
+static void vp_vdpa_set_config(struct vdpa_device *vdpa,
+ unsigned int offset, const void *buf,
+ unsigned int len)
+{
+ struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+ struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev;
+ const u8 *p = buf;
+ int i;
+
+ for (i = 0; i < len; i++)
+ vp_iowrite8(*p++, mdev->device + offset + i);
+}
+
+static void vp_vdpa_set_config_cb(struct vdpa_device *vdpa,
+ struct vdpa_callback *cb)
+{
+ struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+
+ vp_vdpa->config_cb = *cb;
+}
+
+static struct vdpa_notification_area
+vp_vdpa_get_vq_notification(struct vdpa_device *vdpa, u16 qid)
+{
+ struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+ struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev;
+ struct vdpa_notification_area notify;
+
+ notify.addr = vp_vdpa->vring[qid].notify_pa;
+ notify.size = mdev->notify_offset_multiplier;
+
+ return notify;
+}
+
+static const struct vdpa_config_ops vp_vdpa_ops = {
+ .get_features = vp_vdpa_get_features,
+ .set_features = vp_vdpa_set_features,
+ .get_status = vp_vdpa_get_status,
+ .set_status = vp_vdpa_set_status,
+ .get_vq_num_max = vp_vdpa_get_vq_num_max,
+ .get_vq_state = vp_vdpa_get_vq_state,
+ .get_vq_notification = vp_vdpa_get_vq_notification,
+ .set_vq_state = vp_vdpa_set_vq_state,
+ .set_vq_cb = vp_vdpa_set_vq_cb,
+ .set_vq_ready = vp_vdpa_set_vq_ready,
+ .get_vq_ready = vp_vdpa_get_vq_ready,
+ .set_vq_num = vp_vdpa_set_vq_num,
+ .set_vq_address = vp_vdpa_set_vq_address,
+ .kick_vq = vp_vdpa_kick_vq,
+ .get_generation = vp_vdpa_get_generation,
+ .get_device_id = vp_vdpa_get_device_id,
+ .get_vendor_id = vp_vdpa_get_vendor_id,
+ .get_vq_align = vp_vdpa_get_vq_align,
+ .get_config_size = vp_vdpa_get_config_size,
+ .get_config = vp_vdpa_get_config,
+ .set_config = vp_vdpa_set_config,
+ .set_config_cb = vp_vdpa_set_config_cb,
+};
+
+static void vp_vdpa_free_irq_vectors(void *data)
+{
+ pci_free_irq_vectors(data);
+}
+
+static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct virtio_pci_modern_device *mdev;
+ struct device *dev = &pdev->dev;
+ struct vp_vdpa *vp_vdpa;
+ int ret, i;
+
+ ret = pcim_enable_device(pdev);
+ if (ret)
+ return ret;
+
+ vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa,
+ dev, &vp_vdpa_ops, NULL);
+ if (vp_vdpa == NULL) {
+ dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n");
+ return -ENOMEM;
+ }
+
+ mdev = &vp_vdpa->mdev;
+ mdev->pci_dev = pdev;
+
+ ret = vp_modern_probe(mdev);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to probe modern PCI device\n");
+ goto err;
+ }
+
+ pci_set_master(pdev);
+ pci_set_drvdata(pdev, vp_vdpa);
+
+ vp_vdpa->vdpa.dma_dev = &pdev->dev;
+ vp_vdpa->queues = vp_modern_get_num_queues(mdev);
+
+ ret = devm_add_action_or_reset(dev, vp_vdpa_free_irq_vectors, pdev);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "Failed for adding devres for freeing irq vectors\n");
+ goto err;
+ }
+
+ vp_vdpa->vring = devm_kcalloc(&pdev->dev, vp_vdpa->queues,
+ sizeof(*vp_vdpa->vring),
+ GFP_KERNEL);
+ if (!vp_vdpa->vring) {
+ ret = -ENOMEM;
+ dev_err(&pdev->dev, "Fail to allocate virtqueues\n");
+ goto err;
+ }
+
+ for (i = 0; i < vp_vdpa->queues; i++) {
+ vp_vdpa->vring[i].irq = VIRTIO_MSI_NO_VECTOR;
+ vp_vdpa->vring[i].notify =
+ vp_modern_map_vq_notify(mdev, i,
+ &vp_vdpa->vring[i].notify_pa);
+ if (!vp_vdpa->vring[i].notify) {
+ dev_warn(&pdev->dev, "Fail to map vq notify %d\n", i);
+ goto err;
+ }
+ }
+ vp_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR;
+
+ ret = vdpa_register_device(&vp_vdpa->vdpa, vp_vdpa->queues);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to register to vdpa bus\n");
+ goto err;
+ }
+
+ return 0;
+
+err:
+ put_device(&vp_vdpa->vdpa.dev);
+ return ret;
+}
+
+static void vp_vdpa_remove(struct pci_dev *pdev)
+{
+ struct vp_vdpa *vp_vdpa = pci_get_drvdata(pdev);
+
+ vdpa_unregister_device(&vp_vdpa->vdpa);
+ vp_modern_remove(&vp_vdpa->mdev);
+}
+
+static struct pci_driver vp_vdpa_driver = {
+ .name = "vp-vdpa",
+ .id_table = NULL, /* only dynamic ids */
+ .probe = vp_vdpa_probe,
+ .remove = vp_vdpa_remove,
+};
+
+module_pci_driver(vp_vdpa_driver);
+
+MODULE_AUTHOR("Jason Wang <jasowang@redhat.com>");
+MODULE_DESCRIPTION("vp-vdpa");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1");
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index f5ebe008a28b..fb41db3da611 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -16,12 +16,12 @@
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/mm.h>
+#include <linux/slab.h>
#include <linux/iommu.h>
#include <linux/uuid.h>
#include <linux/vdpa.h>
#include <linux/nospec.h>
#include <linux/vhost.h>
-#include <linux/virtio_net.h>
#include "vhost.h"
@@ -188,13 +188,8 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
struct vhost_vdpa_config *c)
{
- long size = 0;
-
- switch (v->virtio_id) {
- case VIRTIO_ID_NET:
- size = sizeof(struct virtio_net_config);
- break;
- }
+ struct vdpa_device *vdpa = v->vdpa;
+ long size = vdpa->config->get_config_size(vdpa);
if (c->len == 0)
return -EINVAL;
@@ -989,6 +984,7 @@ static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
if (vma->vm_end - vma->vm_start != notify.size)
return -ENOTSUPP;
+ vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
vma->vm_ops = &vhost_vdpa_vm_ops;
return 0;
}
@@ -1023,10 +1019,6 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
int minor;
int r;
- /* Currently, we only accept the network devices. */
- if (ops->get_device_id(vdpa) != VIRTIO_ID_NET)
- return -ENOTSUPP;
-
v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!v)
return -ENOMEM;
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 85d85faba058..4af8fa259d65 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -75,6 +75,34 @@ static inline int __vringh_get_head(const struct vringh *vrh,
return head;
}
+/**
+ * vringh_kiov_advance - skip bytes from vring_kiov
+ * @iov: an iov passed to vringh_getdesc_*() (updated as we consume)
+ * @len: the maximum length to advance
+ */
+void vringh_kiov_advance(struct vringh_kiov *iov, size_t len)
+{
+ while (len && iov->i < iov->used) {
+ size_t partlen = min(iov->iov[iov->i].iov_len, len);
+
+ iov->consumed += partlen;
+ iov->iov[iov->i].iov_len -= partlen;
+ iov->iov[iov->i].iov_base += partlen;
+
+ if (!iov->iov[iov->i].iov_len) {
+ /* Fix up old iov element then increment. */
+ iov->iov[iov->i].iov_len = iov->consumed;
+ iov->iov[iov->i].iov_base -= iov->consumed;
+
+ iov->consumed = 0;
+ iov->i++;
+ }
+
+ len -= partlen;
+ }
+}
+EXPORT_SYMBOL(vringh_kiov_advance);
+
/* Copy some bytes to/from the iovec. Returns num copied. */
static inline ssize_t vringh_iov_xfer(struct vringh *vrh,
struct vringh_kiov *iov,
@@ -95,19 +123,8 @@ static inline ssize_t vringh_iov_xfer(struct vringh *vrh,
done += partlen;
len -= partlen;
ptr += partlen;
- iov->consumed += partlen;
- iov->iov[iov->i].iov_len -= partlen;
- iov->iov[iov->i].iov_base += partlen;
- if (!iov->iov[iov->i].iov_len) {
- /* Fix up old iov element then increment. */
- iov->iov[iov->i].iov_len = iov->consumed;
- iov->iov[iov->i].iov_base -= iov->consumed;
-
-
- iov->consumed = 0;
- iov->i++;
- }
+ vringh_kiov_advance(iov, partlen);
}
return done;
}
@@ -290,9 +307,9 @@ __vringh_iov(struct vringh *vrh, u16 i,
return -EINVAL;
if (riov)
- riov->i = riov->used = 0;
+ riov->i = riov->used = riov->consumed = 0;
if (wiov)
- wiov->i = wiov->used = 0;
+ wiov->i = wiov->used = wiov->consumed = 0;
for (;;) {
void *addr;
@@ -662,7 +679,10 @@ EXPORT_SYMBOL(vringh_init_user);
* *head will be vrh->vring.num. You may be able to ignore an invalid
* descriptor, but there's not much you can do with an invalid ring.
*
- * Note that you may need to clean up riov and wiov, even on error!
+ * Note that you can reuse riov and wiov with subsequent calls. Content is
+ * overwritten and memory reallocated if more space is needed.
+ * When you don't have to use riov and wiov anymore, you should clean up them
+ * calling vringh_iov_cleanup() to release the memory, even on error!
*/
int vringh_getdesc_user(struct vringh *vrh,
struct vringh_iov *riov,
@@ -932,7 +952,10 @@ EXPORT_SYMBOL(vringh_init_kern);
* *head will be vrh->vring.num. You may be able to ignore an invalid
* descriptor, but there's not much you can do with an invalid ring.
*
- * Note that you may need to clean up riov and wiov, even on error!
+ * Note that you can reuse riov and wiov with subsequent calls. Content is
+ * overwritten and memory reallocated if more space is needed.
+ * When you don't have to use riov and wiov anymore, you should clean up them
+ * calling vringh_kiov_cleanup() to release the memory, even on error!
*/
int vringh_getdesc_kern(struct vringh *vrh,
struct vringh_kiov *riov,
@@ -1074,6 +1097,8 @@ static int iotlb_translate(const struct vringh *vrh,
int ret = 0;
u64 s = 0;
+ spin_lock(vrh->iotlb_lock);
+
while (len > s) {
u64 size, pa, pfn;
@@ -1103,6 +1128,8 @@ static int iotlb_translate(const struct vringh *vrh,
++ret;
}
+ spin_unlock(vrh->iotlb_lock);
+
return ret;
}
@@ -1262,10 +1289,13 @@ EXPORT_SYMBOL(vringh_init_iotlb);
* vringh_set_iotlb - initialize a vringh for a ring with IOTLB.
* @vrh: the vring
* @iotlb: iotlb associated with this vring
+ * @iotlb_lock: spinlock to synchronize the iotlb accesses
*/
-void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb)
+void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
+ spinlock_t *iotlb_lock)
{
vrh->iotlb = iotlb;
+ vrh->iotlb_lock = iotlb_lock;
}
EXPORT_SYMBOL(vringh_set_iotlb);
@@ -1285,7 +1315,10 @@ EXPORT_SYMBOL(vringh_set_iotlb);
* *head will be vrh->vring.num. You may be able to ignore an invalid
* descriptor, but there's not much you can do with an invalid ring.
*
- * Note that you may need to clean up riov and wiov, even on error!
+ * Note that you can reuse riov and wiov with subsequent calls. Content is
+ * overwritten and memory reallocated if more space is needed.
+ * When you don't have to use riov and wiov anymore, you should clean up them
+ * calling vringh_kiov_cleanup() to release the memory, even on error!
*/
int vringh_getdesc_iotlb(struct vringh *vrh,
struct vringh_kiov *riov,
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 8985fc2cea86..510e9318854d 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -734,7 +734,7 @@ static void report_free_page_func(struct work_struct *work)
#ifdef CONFIG_BALLOON_COMPACTION
/*
* virtballoon_migratepage - perform the balloon page migration on behalf of
- * a compation thread. (called under page lock)
+ * a compaction thread. (called under page lock)
* @vb_dev_info: the balloon device
* @newpage: page that will replace the isolated page after migration finishes.
* @page : the isolated (old) page that is about to be migrated to newpage.
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index fbd4ebc00eb6..30654d3a0b41 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -192,7 +192,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
struct virtqueue *vq;
- u16 num, off;
+ u16 num;
int err;
if (index >= vp_modern_get_num_queues(mdev))
@@ -208,9 +208,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
return ERR_PTR(-EINVAL);
}
- /* get offset of notification word for this vq */
- off = vp_modern_get_queue_notify_off(mdev, index);
-
info->msix_vector = msix_vec;
/* create the vring */
@@ -227,27 +224,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
virtqueue_get_avail_addr(vq),
virtqueue_get_used_addr(vq));
- if (mdev->notify_base) {
- /* offset should not wrap */
- if ((u64)off * mdev->notify_offset_multiplier + 2
- > mdev->notify_len) {
- dev_warn(&mdev->pci_dev->dev,
- "bad notification offset %u (x %u) "
- "for queue %u > %zd",
- off, mdev->notify_offset_multiplier,
- index, mdev->notify_len);
- err = -EINVAL;
- goto err_map_notify;
- }
- vq->priv = (void __force *)mdev->notify_base +
- off * mdev->notify_offset_multiplier;
- } else {
- vq->priv = (void __force *)vp_modern_map_capability(mdev,
- mdev->notify_map_cap, 2, 2,
- off * mdev->notify_offset_multiplier, 2,
- NULL);
- }
-
+ vq->priv = (void __force *)vp_modern_map_vq_notify(mdev, index, NULL);
if (!vq->priv) {
err = -ENOMEM;
goto err_map_notify;
diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c
index cbd667496bb1..54f297028586 100644
--- a/drivers/virtio/virtio_pci_modern_dev.c
+++ b/drivers/virtio/virtio_pci_modern_dev.c
@@ -13,14 +13,14 @@
* @start: start from the capability
* @size: map size
* @len: the length that is actually mapped
+ * @pa: physical address of the capability
*
* Returns the io address of for the part of the capability
*/
-void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off,
- size_t minlen,
- u32 align,
- u32 start, u32 size,
- size_t *len)
+static void __iomem *
+vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off,
+ size_t minlen, u32 align, u32 start, u32 size,
+ size_t *len, resource_size_t *pa)
{
struct pci_dev *dev = mdev->pci_dev;
u8 bar;
@@ -88,9 +88,11 @@ void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, in
dev_err(&dev->dev,
"virtio_pci: unable to map virtio %u@%u on bar %i\n",
length, offset, bar);
+ else if (pa)
+ *pa = pci_resource_start(dev, bar) + offset;
+
return p;
}
-EXPORT_SYMBOL_GPL(vp_modern_map_capability);
/**
* virtio_pci_find_capability - walk capabilities to find device info.
@@ -275,12 +277,12 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev)
mdev->common = vp_modern_map_capability(mdev, common,
sizeof(struct virtio_pci_common_cfg), 4,
0, sizeof(struct virtio_pci_common_cfg),
- NULL);
+ NULL, NULL);
if (!mdev->common)
goto err_map_common;
mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1,
0, 1,
- NULL);
+ NULL, NULL);
if (!mdev->isr)
goto err_map_isr;
@@ -308,7 +310,8 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev)
mdev->notify_base = vp_modern_map_capability(mdev, notify,
2, 2,
0, notify_length,
- &mdev->notify_len);
+ &mdev->notify_len,
+ &mdev->notify_pa);
if (!mdev->notify_base)
goto err_map_notify;
} else {
@@ -321,7 +324,8 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev)
if (device) {
mdev->device = vp_modern_map_capability(mdev, device, 0, 4,
0, PAGE_SIZE,
- &mdev->device_len);
+ &mdev->device_len,
+ NULL);
if (!mdev->device)
goto err_map_device;
}
@@ -584,14 +588,51 @@ EXPORT_SYMBOL_GPL(vp_modern_get_num_queues);
*
* Returns the notification offset for a virtqueue
*/
-u16 vp_modern_get_queue_notify_off(struct virtio_pci_modern_device *mdev,
- u16 index)
+static u16 vp_modern_get_queue_notify_off(struct virtio_pci_modern_device *mdev,
+ u16 index)
{
vp_iowrite16(index, &mdev->common->queue_select);
return vp_ioread16(&mdev->common->queue_notify_off);
}
-EXPORT_SYMBOL_GPL(vp_modern_get_queue_notify_off);
+
+/*
+ * vp_modern_map_vq_notify - map notification area for a
+ * specific virtqueue
+ * @mdev: the modern virtio-pci device
+ * @index: the queue index
+ * @pa: the pointer to the physical address of the nofity area
+ *
+ * Returns the address of the notification area
+ */
+void __iomem *vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev,
+ u16 index, resource_size_t *pa)
+{
+ u16 off = vp_modern_get_queue_notify_off(mdev, index);
+
+ if (mdev->notify_base) {
+ /* offset should not wrap */
+ if ((u64)off * mdev->notify_offset_multiplier + 2
+ > mdev->notify_len) {
+ dev_warn(&mdev->pci_dev->dev,
+ "bad notification offset %u (x %u) "
+ "for queue %u > %zd",
+ off, mdev->notify_offset_multiplier,
+ index, mdev->notify_len);
+ return NULL;
+ }
+ if (pa)
+ *pa = mdev->notify_pa +
+ off * mdev->notify_offset_multiplier;
+ return mdev->notify_base + off * mdev->notify_offset_multiplier;
+ } else {
+ return vp_modern_map_capability(mdev,
+ mdev->notify_map_cap, 2, 2,
+ off * mdev->notify_offset_multiplier, 2,
+ NULL, pa);
+ }
+}
+EXPORT_SYMBOL_GPL(vp_modern_map_vq_notify);
MODULE_VERSION("0.1");
MODULE_DESCRIPTION("Modern Virtio PCI Device");
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 15fa085fab05..f311d227aa1b 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -8,7 +8,7 @@
#include <linux/vhost_iotlb.h>
/**
- * vDPA callback definition.
+ * struct vdpa_calllback - vDPA callback definition.
* @callback: interrupt callback function
* @private: the data passed to the callback function
*/
@@ -18,7 +18,7 @@ struct vdpa_callback {
};
/**
- * vDPA notification area
+ * struct vdpa_notification_area - vDPA notification area
* @addr: base address of the notification area
* @size: size of the notification area
*/
@@ -28,7 +28,7 @@ struct vdpa_notification_area {
};
/**
- * vDPA vq_state definition
+ * struct vdpa_vq_state - vDPA vq_state definition
* @avail_index: available index
*/
struct vdpa_vq_state {
@@ -38,7 +38,7 @@ struct vdpa_vq_state {
struct vdpa_mgmt_dev;
/**
- * vDPA device - representation of a vDPA device
+ * struct vdpa_device - representation of a vDPA device
* @dev: underlying device
* @dma_dev: the actual device that is performing DMA
* @config: the configuration ops for this device.
@@ -59,7 +59,7 @@ struct vdpa_device {
};
/**
- * vDPA IOVA range - the IOVA range support by the device
+ * struct vdpa_iova_range - the IOVA range support by the device
* @first: start of the IOVA range
* @last: end of the IOVA range
*/
@@ -69,7 +69,7 @@ struct vdpa_iova_range {
};
/**
- * vDPA_config_ops - operations for configuring a vDPA device.
+ * struct vdpa_config_ops - operations for configuring a vDPA device.
* Note: vDPA device drivers are required to implement all of the
* operations unless it is mentioned to be optional in the following
* list.
@@ -150,6 +150,9 @@ struct vdpa_iova_range {
* @set_status: Set the device status
* @vdev: vdpa device
* @status: virtio device status
+ * @get_config_size: Get the size of the configuration space
+ * @vdev: vdpa device
+ * Returns size_t: configuration size
* @get_config: Read from device specific configuration space
* @vdev: vdpa device
* @offset: offset from the beginning of
@@ -231,6 +234,7 @@ struct vdpa_config_ops {
u32 (*get_vendor_id)(struct vdpa_device *vdev);
u8 (*get_status)(struct vdpa_device *vdev);
void (*set_status)(struct vdpa_device *vdev, u8 status);
+ size_t (*get_config_size)(struct vdpa_device *vdev);
void (*get_config)(struct vdpa_device *vdev, unsigned int offset,
void *buf, unsigned int len);
void (*set_config)(struct vdpa_device *vdev, unsigned int offset,
@@ -267,7 +271,7 @@ int _vdpa_register_device(struct vdpa_device *vdev, int nvqs);
void _vdpa_unregister_device(struct vdpa_device *vdev);
/**
- * vdpa_driver - operations for a vDPA driver
+ * struct vdpa_driver - operations for a vDPA driver
* @driver: underlying device driver
* @probe: the function to call when a device is found. Returns 0 or -errno.
* @remove: the function to call when a device is removed.
@@ -344,18 +348,18 @@ static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset,
}
/**
- * vdpa_mgmtdev_ops - vdpa device ops
- * @dev_add: Add a vdpa device using alloc and register
- * @mdev: parent device to use for device addition
- * @name: name of the new vdpa device
- * Driver need to add a new device using _vdpa_register_device()
- * after fully initializing the vdpa device. Driver must return 0
- * on success or appropriate error code.
- * @dev_del: Remove a vdpa device using unregister
- * @mdev: parent device to use for device removal
- * @dev: vdpa device to remove
- * Driver need to remove the specified device by calling
- * _vdpa_unregister_device().
+ * struct vdpa_mgmtdev_ops - vdpa device ops
+ * @dev_add: Add a vdpa device using alloc and register
+ * @mdev: parent device to use for device addition
+ * @name: name of the new vdpa device
+ * Driver need to add a new device using _vdpa_register_device()
+ * after fully initializing the vdpa device. Driver must return 0
+ * on success or appropriate error code.
+ * @dev_del: Remove a vdpa device using unregister
+ * @mdev: parent device to use for device removal
+ * @dev: vdpa device to remove
+ * Driver need to remove the specified device by calling
+ * _vdpa_unregister_device().
*/
struct vdpa_mgmtdev_ops {
int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name);
diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h
index f26acbeec965..6a95b58fd0f4 100644
--- a/include/linux/virtio_pci_modern.h
+++ b/include/linux/virtio_pci_modern.h
@@ -13,6 +13,8 @@ struct virtio_pci_modern_device {
void __iomem *device;
/* Base of vq notifications (non-legacy mode). */
void __iomem *notify_base;
+ /* Physical base of vq notifications */
+ resource_size_t notify_pa;
/* Where to read and clear interrupt */
u8 __iomem *isr;
@@ -99,13 +101,8 @@ void vp_modern_set_queue_size(struct virtio_pci_modern_device *mdev,
u16 vp_modern_get_queue_size(struct virtio_pci_modern_device *mdev,
u16 idx);
u16 vp_modern_get_num_queues(struct virtio_pci_modern_device *mdev);
-u16 vp_modern_get_queue_notify_off(struct virtio_pci_modern_device *mdev,
- u16 idx);
-void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off,
- size_t minlen,
- u32 align,
- u32 start, u32 size,
- size_t *len);
+void __iomem * vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev,
+ u16 index, resource_size_t *pa);
int vp_modern_probe(struct virtio_pci_modern_device *mdev);
void vp_modern_remove(struct virtio_pci_modern_device *mdev);
#endif
diff --git a/include/linux/vringh.h b/include/linux/vringh.h
index 59bd50f99291..84db7b8f912f 100644
--- a/include/linux/vringh.h
+++ b/include/linux/vringh.h
@@ -46,6 +46,9 @@ struct vringh {
/* IOTLB for this vring */
struct vhost_iotlb *iotlb;
+ /* spinlock to synchronize IOTLB accesses */
+ spinlock_t *iotlb_lock;
+
/* The function to call to notify the guest about added buffers */
void (*notify)(struct vringh *);
};
@@ -196,6 +199,19 @@ static inline void vringh_kiov_cleanup(struct vringh_kiov *kiov)
kiov->iov = NULL;
}
+static inline size_t vringh_kiov_length(struct vringh_kiov *kiov)
+{
+ size_t len = 0;
+ int i;
+
+ for (i = kiov->i; i < kiov->used; i++)
+ len += kiov->iov[i].iov_len;
+
+ return len;
+}
+
+void vringh_kiov_advance(struct vringh_kiov *kiov, size_t len);
+
int vringh_getdesc_kern(struct vringh *vrh,
struct vringh_kiov *riov,
struct vringh_kiov *wiov,
@@ -258,7 +274,8 @@ static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val)
#if IS_REACHABLE(CONFIG_VHOST_IOTLB)
-void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb);
+void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
+ spinlock_t *iotlb_lock);
int vringh_init_iotlb(struct vringh *vrh, u64 features,
unsigned int num, bool weak_barriers,