From a865e420b9561235851c3f5d483c82ef389d29bd Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 6 Apr 2020 08:42:55 -0400 Subject: virtio: force spec specified alignment on types The ring element addresses are passed between components with different alignments assumptions. Thus, if guest/userspace selects a pointer and host then gets and dereferences it, we might need to decrease the compiler-selected alignment to prevent compiler on the host from assuming pointer is aligned. This actually triggers on ARM with -mabi=apcs-gnu - which is a deprecated configuration, but it seems safer to handle this generally. Note that userspace that allocates the memory is actually OK and does not need to be fixed, but userspace that gets it from guest or another process does need to be fixed. The later doesn't generally talk to the kernel so while it might be buggy it's not talking to the kernel in the buggy way - it's just using the header in the buggy way - so fixing header and asking userspace to recompile is the best we can do. I verified that the produced kernel binary on x86 is exactly identical before and after the change. Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/vhost.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/vhost/vhost.c') diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 21a59b598ed8..96d9871fa0cb 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1244,9 +1244,9 @@ static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access) } static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, - struct vring_desc __user *desc, - struct vring_avail __user *avail, - struct vring_used __user *used) + vring_desc_t __user *desc, + vring_avail_t __user *avail, + vring_used_t __user *used) { return access_ok(desc, vhost_get_desc_size(vq, num)) && @@ -2301,7 +2301,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, unsigned count) { - struct vring_used_elem __user *used; + vring_used_elem_t __user *used; u16 old, new; int start; -- cgit v1.2.3 From 01fcb1cbc88effb3493c6197efc96b69b9f4823a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 29 May 2020 16:02:58 +0800 Subject: vhost: allow device that does not depend on vhost worker vDPA device currently relays the eventfd via vhost worker. This is inefficient due the latency of wakeup and scheduling, so this patch tries to introduce a use_worker attribute for the vhost device. When use_worker is not set with vhost_dev_init(), vhost won't try to allocate a worker thread and the vhost_poll will be processed directly in the wakeup function. This help for vDPA since it reduces the latency caused by vhost worker. In my testing, it saves 0.2 ms in pings between VMs on a mutual host. Signed-off-by: Zhu Lingshan Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200529080303.15449-2-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 2 +- drivers/vhost/scsi.c | 2 +- drivers/vhost/vdpa.c | 2 +- drivers/vhost/vhost.c | 38 +++++++++++++++++++++++++------------- drivers/vhost/vhost.h | 2 ++ drivers/vhost/vsock.c | 2 +- 6 files changed, 31 insertions(+), 17 deletions(-) (limited to 'drivers/vhost/vhost.c') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 2927f02cc7e1..bf5e1d81ae25 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1326,7 +1326,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) } vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, UIO_MAXIOV + VHOST_NET_BATCH, - VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, + VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true, NULL); vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index c39952243fd3..0cbaa0b3893d 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1628,7 +1628,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; } vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV, - VHOST_SCSI_WEIGHT, 0, NULL); + VHOST_SCSI_WEIGHT, 0, true, NULL); vhost_scsi_init_inflight(vs, NULL); diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 0968361e3b77..f22bb316b8c4 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -694,7 +694,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) vqs[i] = &v->vqs[i]; vqs[i]->handle_kick = handle_vq_kick; } - vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, + vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, vhost_vdpa_process_iotlb_msg); dev->iotlb = vhost_iotlb_alloc(0, 0); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 96d9871fa0cb..80da9d9662f2 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -166,11 +166,16 @@ static int vhost_poll_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); + struct vhost_work *work = &poll->work; if (!(key_to_poll(key) & poll->mask)) return 0; - vhost_poll_queue(poll); + if (!poll->dev->use_worker) + work->fn(work); + else + vhost_poll_queue(poll); + return 0; } @@ -454,6 +459,7 @@ static size_t vhost_get_desc_size(struct vhost_virtqueue *vq, void vhost_dev_init(struct vhost_dev *dev, struct vhost_virtqueue **vqs, int nvqs, int iov_limit, int weight, int byte_weight, + bool use_worker, int (*msg_handler)(struct vhost_dev *dev, struct vhost_iotlb_msg *msg)) { @@ -471,6 +477,7 @@ void vhost_dev_init(struct vhost_dev *dev, dev->iov_limit = iov_limit; dev->weight = weight; dev->byte_weight = byte_weight; + dev->use_worker = use_worker; dev->msg_handler = msg_handler; init_llist_head(&dev->work_list); init_waitqueue_head(&dev->wait); @@ -549,18 +556,21 @@ long vhost_dev_set_owner(struct vhost_dev *dev) /* No owner, become one */ dev->mm = get_task_mm(current); dev->kcov_handle = kcov_common_handle(); - worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); - if (IS_ERR(worker)) { - err = PTR_ERR(worker); - goto err_worker; - } + if (dev->use_worker) { + worker = kthread_create(vhost_worker, dev, + "vhost-%d", current->pid); + if (IS_ERR(worker)) { + err = PTR_ERR(worker); + goto err_worker; + } - dev->worker = worker; - wake_up_process(worker); /* avoid contributing to loadavg */ + dev->worker = worker; + wake_up_process(worker); /* avoid contributing to loadavg */ - err = vhost_attach_cgroups(dev); - if (err) - goto err_cgroup; + err = vhost_attach_cgroups(dev); + if (err) + goto err_cgroup; + } err = vhost_dev_alloc_iovecs(dev); if (err) @@ -568,8 +578,10 @@ long vhost_dev_set_owner(struct vhost_dev *dev) return 0; err_cgroup: - kthread_stop(worker); - dev->worker = NULL; + if (dev->worker) { + kthread_stop(dev->worker); + dev->worker = NULL; + } err_worker: if (dev->mm) mmput(dev->mm); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 60cab4c78229..c8e96a095d3b 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -154,6 +154,7 @@ struct vhost_dev { int weight; int byte_weight; u64 kcov_handle; + bool use_worker; int (*msg_handler)(struct vhost_dev *dev, struct vhost_iotlb_msg *msg); }; @@ -161,6 +162,7 @@ struct vhost_dev { bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len); void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs, int iov_limit, int weight, int byte_weight, + bool use_worker, int (*msg_handler)(struct vhost_dev *dev, struct vhost_iotlb_msg *msg)); long vhost_dev_set_owner(struct vhost_dev *dev); diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index fb4e944c4d0d..a483cec31d5c 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -632,7 +632,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT, - VHOST_VSOCK_WEIGHT, NULL); + VHOST_VSOCK_WEIGHT, true, NULL); file->private_data = vsock; spin_lock_init(&vsock->send_pkt_list_lock); -- cgit v1.2.3 From 5ce995f313ce56c0c62425c3ddc37c5c50fc33db Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 29 May 2020 16:02:59 +0800 Subject: vhost: use mmgrab() instead of mmget() for non worker device For the device that doesn't use vhost worker and use_mm(), mmget() is too heavy weight and it may brings troubles for implementing mmap() support for vDPA device. This is because, an reference to the address space was held via mm_get() in vhost_dev_set_owner() and an reference to the file was held in mmap(). This means when process exits, the mm can not be released thus we can not release the file. This patch tries to use mmgrab() instead of mmget(), which allows the address space to be destroy in process exit without releasing the mm structure itself. This is sufficient for vDPA device which pin user pages and does not depend on the address space to work. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200529080303.15449-3-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) (limited to 'drivers/vhost/vhost.c') diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 80da9d9662f2..a40d16bdebb5 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -541,6 +541,36 @@ bool vhost_dev_has_owner(struct vhost_dev *dev) } EXPORT_SYMBOL_GPL(vhost_dev_has_owner); +static void vhost_attach_mm(struct vhost_dev *dev) +{ + /* No owner, become one */ + if (dev->use_worker) { + dev->mm = get_task_mm(current); + } else { + /* vDPA device does not use worker thead, so there's + * no need to hold the address space for mm. This help + * to avoid deadlock in the case of mmap() which may + * held the refcnt of the file and depends on release + * method to remove vma. + */ + dev->mm = current->mm; + mmgrab(dev->mm); + } +} + +static void vhost_detach_mm(struct vhost_dev *dev) +{ + if (!dev->mm) + return; + + if (dev->use_worker) + mmput(dev->mm); + else + mmdrop(dev->mm); + + dev->mm = NULL; +} + /* Caller should have device mutex */ long vhost_dev_set_owner(struct vhost_dev *dev) { @@ -553,8 +583,8 @@ long vhost_dev_set_owner(struct vhost_dev *dev) goto err_mm; } - /* No owner, become one */ - dev->mm = get_task_mm(current); + vhost_attach_mm(dev); + dev->kcov_handle = kcov_common_handle(); if (dev->use_worker) { worker = kthread_create(vhost_worker, dev, @@ -583,9 +613,7 @@ err_cgroup: dev->worker = NULL; } err_worker: - if (dev->mm) - mmput(dev->mm); - dev->mm = NULL; + vhost_detach_mm(dev); dev->kcov_handle = 0; err_mm: return err; @@ -682,9 +710,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) dev->worker = NULL; dev->kcov_handle = 0; } - if (dev->mm) - mmput(dev->mm); - dev->mm = NULL; + vhost_detach_mm(dev); } EXPORT_SYMBOL_GPL(vhost_dev_cleanup); -- cgit v1.2.3 From 002ef18eff43708be1d914cb11dbdd84fb149474 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Wed, 27 May 2020 20:05:38 +0200 Subject: vhost: (cosmetic) remove a superfluous variable initialisation Even the compiler is able to figure out that in this case the initialisation is superfluous. Signed-off-by: Guennadi Liakhovetski Link: https://lore.kernel.org/r/20200527180541.5570-3-guennadi.liakhovetski@linux.intel.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/vhost/vhost.c') diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index a40d16bdebb5..534d1267b761 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -920,7 +920,7 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, #define vhost_put_user(vq, x, ptr) \ ({ \ - int ret = -EFAULT; \ + int ret; \ if (!vq->iotlb) { \ ret = __put_user(x, ptr); \ } else { \ -- cgit v1.2.3 From e0136c16fae95dc3762f3912f6f9336f5f57fe81 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Fri, 5 Jun 2020 18:27:14 +0800 Subject: vhost: replace -1 with VHOST_FILE_UNBIND in ioctls This commit replaces -1 with VHOST_FILE_UNBIND in ioctls since we have added such a macro in the uapi header for vdpa_host. Signed-off-by: Zhu Lingshan Acked-by: Jason Wang Link: https://lore.kernel.org/r/1591352835-22441-5-git-send-email-lingshan.zhu@intel.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/vhost/vhost.c') diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 534d1267b761..172da092107e 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1612,7 +1612,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg r = -EFAULT; break; } - eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); + eventfp = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_fget(f.fd); if (IS_ERR(eventfp)) { r = PTR_ERR(eventfp); break; @@ -1628,7 +1628,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg r = -EFAULT; break; } - ctx = f.fd == -1 ? NULL : eventfd_ctx_fdget(f.fd); + ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; @@ -1640,7 +1640,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg r = -EFAULT; break; } - ctx = f.fd == -1 ? NULL : eventfd_ctx_fdget(f.fd); + ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; @@ -1765,7 +1765,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) r = get_user(fd, (int __user *)argp); if (r < 0) break; - ctx = fd == -1 ? NULL : eventfd_ctx_fdget(fd); + ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; -- cgit v1.2.3