diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-20 13:28:18 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-20 13:28:18 -0700 |
commit | f0691533b756931089902464ca15afc218a49d70 (patch) | |
tree | 7d72b43866be5ae5507efb9c2976059d5d5cc0f1 /drivers/vhost | |
parent | 2b2f72d8ce59acce95ceb156f0f31b848e32e6d4 (diff) | |
parent | c67f5db82027ba6d2ea4ac9176bc45996a03ae6a (diff) | |
download | linux-f0691533b756931089902464ca15afc218a49d70.tar.bz2 |
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio/vhost updates from Michael Tsirkin:
"New features, performance improvements, cleanups:
- basic polling support for vhost
- rework virtio to optionally use DMA API, fixing it on Xen
- balloon stats gained a new entry
- using the new napi_alloc_skb speeds up virtio net
- virtio blk stats can now be read while another VCPU is busy
inflating or deflating the balloon
plus misc cleanups in various places"
* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
virtio_net: replace netdev_alloc_skb_ip_align() with napi_alloc_skb()
vhost_net: basic polling support
vhost: introduce vhost_vq_avail_empty()
vhost: introduce vhost_has_work()
virtio_balloon: Allow to resize and update the balloon stats in parallel
virtio_balloon: Use a workqueue instead of "vballoon" kthread
virtio/s390: size of SET_IND payload
virtio/s390: use dev_to_virtio
vhost: rename vhost_init_used()
vhost: rename cross-endian helpers
virtio_blk: VIRTIO_BLK_F_WCE->VIRTIO_BLK_F_FLUSH
vring: Use the DMA API on Xen
virtio_pci: Use the DMA API if enabled
virtio_mmio: Use the DMA API if enabled
virtio: Add improved queue allocation API
virtio_ring: Support DMA APIs
vring: Introduce vring_use_dma_api()
s390/dma: Allow per device dma ops
alpha/dma: use common noop dma ops
dma: Provide simple noop dma ops
Diffstat (limited to 'drivers/vhost')
-rw-r--r-- | drivers/vhost/net.c | 80 | ||||
-rw-r--r-- | drivers/vhost/scsi.c | 2 | ||||
-rw-r--r-- | drivers/vhost/test.c | 2 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 69 | ||||
-rw-r--r-- | drivers/vhost/vhost.h | 5 |
5 files changed, 141 insertions, 17 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 9eda69e40678..f744eeb3e2b4 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -287,6 +287,43 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) rcu_read_unlock_bh(); } +static inline unsigned long busy_clock(void) +{ + return local_clock() >> 10; +} + +static bool vhost_can_busy_poll(struct vhost_dev *dev, + unsigned long endtime) +{ + return likely(!need_resched()) && + likely(!time_after(busy_clock(), endtime)) && + likely(!signal_pending(current)) && + !vhost_has_work(dev); +} + +static int vhost_net_tx_get_vq_desc(struct vhost_net *net, + struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num) +{ + unsigned long uninitialized_var(endtime); + int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + out_num, in_num, NULL, NULL); + + if (r == vq->num && vq->busyloop_timeout) { + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + while (vhost_can_busy_poll(vq->dev, endtime) && + vhost_vq_avail_empty(vq->dev, vq)) + cpu_relax_lowlatency(); + preempt_enable(); + r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + out_num, in_num, NULL, NULL); + } + + return r; +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) @@ -331,10 +368,9 @@ static void handle_tx(struct vhost_net *net) % UIO_MAXIOV == nvq->done_idx)) break; - head = vhost_get_vq_desc(vq, vq->iov, - ARRAY_SIZE(vq->iov), - &out, &in, - NULL, NULL); + head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, + ARRAY_SIZE(vq->iov), + &out, &in); /* On error, stop handling until the next kick. */ if (unlikely(head < 0)) break; @@ -435,6 +471,38 @@ static int peek_head_len(struct sock *sk) return len; } +static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) +{ + struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; + struct vhost_virtqueue *vq = &nvq->vq; + unsigned long uninitialized_var(endtime); + int len = peek_head_len(sk); + + if (!len && vq->busyloop_timeout) { + /* Both tx vq and rx socket were polled here */ + mutex_lock(&vq->mutex); + vhost_disable_notify(&net->dev, vq); + + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + + while (vhost_can_busy_poll(&net->dev, endtime) && + skb_queue_empty(&sk->sk_receive_queue) && + vhost_vq_avail_empty(&net->dev, vq)) + cpu_relax_lowlatency(); + + preempt_enable(); + + if (vhost_enable_notify(&net->dev, vq)) + vhost_poll_queue(&vq->poll); + mutex_unlock(&vq->mutex); + + len = peek_head_len(sk); + } + + return len; +} + /* This is a multi-buffer version of vhost_get_desc, that works if * vq has read descriptors only. * @vq - the relevant virtqueue @@ -553,7 +621,7 @@ static void handle_rx(struct vhost_net *net) vq->log : NULL; mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); - while ((sock_len = peek_head_len(sock->sk))) { + while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) { sock_len += sock_hlen; vhost_len = sock_len + vhost_hlen; headcount = get_rx_bufs(vq, vq->heads, vhost_len, @@ -917,7 +985,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) vhost_net_disable_vq(n, vq); vq->private_data = sock; - r = vhost_init_used(vq); + r = vhost_vq_init_access(vq); if (r) goto err_used; r = vhost_net_enable_vq(n, vq); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 29cfc57d496e..f898686cdd93 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1274,7 +1274,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, vq = &vs->vqs[i].vq; mutex_lock(&vq->mutex); vq->private_data = vs_tpg; - vhost_init_used(vq); + vhost_vq_init_access(vq); mutex_unlock(&vq->mutex); } ret = 0; diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index f2882ac98726..388eec4e1a90 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -196,7 +196,7 @@ static long vhost_test_run(struct vhost_test *n, int test) oldpriv = vq->private_data; vq->private_data = priv; - r = vhost_init_used(&n->vqs[index]); + r = vhost_vq_init_access(&n->vqs[index]); mutex_unlock(&vq->mutex); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 236553e81027..669fef1e2bb6 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -43,11 +43,21 @@ enum { #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY -static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq) +static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) { vq->user_be = !virtio_legacy_is_little_endian(); } +static void vhost_enable_cross_endian_big(struct vhost_virtqueue *vq) +{ + vq->user_be = true; +} + +static void vhost_enable_cross_endian_little(struct vhost_virtqueue *vq) +{ + vq->user_be = false; +} + static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) { struct vhost_vring_state s; @@ -62,7 +72,10 @@ static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) s.num != VHOST_VRING_BIG_ENDIAN) return -EINVAL; - vq->user_be = s.num; + if (s.num == VHOST_VRING_BIG_ENDIAN) + vhost_enable_cross_endian_big(vq); + else + vhost_enable_cross_endian_little(vq); return 0; } @@ -91,7 +104,7 @@ static void vhost_init_is_le(struct vhost_virtqueue *vq) vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be; } #else -static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq) +static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) { } @@ -113,6 +126,11 @@ static void vhost_init_is_le(struct vhost_virtqueue *vq) } #endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */ +static void vhost_reset_is_le(struct vhost_virtqueue *vq) +{ + vq->is_le = virtio_legacy_is_little_endian(); +} + static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { @@ -245,6 +263,13 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) } EXPORT_SYMBOL_GPL(vhost_work_queue); +/* A lockless hint for busy polling code to exit the loop */ +bool vhost_has_work(struct vhost_dev *dev) +{ + return !list_empty(&dev->work_list); +} +EXPORT_SYMBOL_GPL(vhost_has_work); + void vhost_poll_queue(struct vhost_poll *poll) { vhost_work_queue(poll->dev, &poll->work); @@ -276,8 +301,9 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->call = NULL; vq->log_ctx = NULL; vq->memory = NULL; - vq->is_le = virtio_legacy_is_little_endian(); - vhost_vq_reset_user_be(vq); + vhost_reset_is_le(vq); + vhost_disable_cross_endian(vq); + vq->busyloop_timeout = 0; } static int vhost_worker(void *data) @@ -912,6 +938,19 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) case VHOST_GET_VRING_ENDIAN: r = vhost_get_vring_endian(vq, idx, argp); break; + case VHOST_SET_VRING_BUSYLOOP_TIMEOUT: + if (copy_from_user(&s, argp, sizeof(s))) { + r = -EFAULT; + break; + } + vq->busyloop_timeout = s.num; + break; + case VHOST_GET_VRING_BUSYLOOP_TIMEOUT: + s.index = idx; + s.num = vq->busyloop_timeout; + if (copy_to_user(argp, &s, sizeof(s))) + r = -EFAULT; + break; default: r = -ENOIOCTLCMD; } @@ -1152,14 +1191,14 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) return 0; } -int vhost_init_used(struct vhost_virtqueue *vq) +int vhost_vq_init_access(struct vhost_virtqueue *vq) { __virtio16 last_used_idx; int r; bool is_le = vq->is_le; if (!vq->private_data) { - vq->is_le = virtio_legacy_is_little_endian(); + vhost_reset_is_le(vq); return 0; } @@ -1182,7 +1221,7 @@ err: vq->is_le = is_le; return r; } -EXPORT_SYMBOL_GPL(vhost_init_used); +EXPORT_SYMBOL_GPL(vhost_vq_init_access); static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, struct iovec iov[], int iov_size) @@ -1633,6 +1672,20 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev, } EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n); +/* return true if we're sure that avaiable ring is empty */ +bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) +{ + __virtio16 avail_idx; + int r; + + r = __get_user(avail_idx, &vq->avail->idx); + if (r) + return false; + + return vhost16_to_cpu(vq, avail_idx) == vq->avail_idx; +} +EXPORT_SYMBOL_GPL(vhost_vq_avail_empty); + /* OK, now we need to know about added descriptors. */ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index d3f767448a72..d36d8beb3351 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -37,6 +37,7 @@ struct vhost_poll { void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn); void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work); +bool vhost_has_work(struct vhost_dev *dev); void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, unsigned long mask, struct vhost_dev *dev); @@ -114,6 +115,7 @@ struct vhost_virtqueue { /* Ring endianness requested by userspace for cross-endian support. */ bool user_be; #endif + u32 busyloop_timeout; }; struct vhost_dev { @@ -148,7 +150,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *, struct vhost_log *log, unsigned int *log_num); void vhost_discard_vq_desc(struct vhost_virtqueue *, int n); -int vhost_init_used(struct vhost_virtqueue *); +int vhost_vq_init_access(struct vhost_virtqueue *); int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len); int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads, unsigned count); @@ -158,6 +160,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *, struct vring_used_elem *heads, unsigned count); void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *); void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *); +bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *); bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, |