diff options
author | David S. Miller <davem@davemloft.net> | 2019-01-30 14:02:43 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-01-30 14:02:43 -0800 |
commit | a10cc84791b1eaa8471ff4e76a1bc8f399800a0b (patch) | |
tree | c10f040dd73559e01d2511127d7bac7b55aa7249 | |
parent | 41ef81be794f8d3f0edb7f15f952113ec999cbd6 (diff) | |
parent | 5050471d35d1316ba32dfcbb409978337eb9e75e (diff) | |
download | linux-a10cc84791b1eaa8471ff4e76a1bc8f399800a0b.tar.bz2 |
Merge branch 'virtio_net-Fix-problems-around-XDP-tx-and-napi_tx'
Toshiaki Makita says:
====================
virtio_net: Fix problems around XDP tx and napi_tx
While I'm looking into how to account standard tx counters on XDP tx
processing, I found several bugs around XDP tx and napi_tx.
Patch1: Fix oops on error path. Patch2 depends on this.
Patch2: Fix memory corruption on freeing xdp_frames with napi_tx enabled.
Patch3: Minor fix patch5 depends on.
Patch4: Fix memory corruption on processing xdp_frames when XDP is disabled.
Also patch5 depends on this.
Patch5: Fix memory corruption on processing xdp_frames while XDP is being
disabled.
Patch6: Minor fix patch7 depends on.
Patch7: Fix memory corruption on freeing sk_buff or xdp_frames when a normal
queue is reused for XDP and vise versa.
v2:
- patch5: Make rcu_assign_pointer/synchronize_net conditional instead of
_virtnet_set_queues.
- patch7: Use napi_consume_skb() instead of dev_consume_skb_any()
====================
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/virtio_net.c | 159 |
1 files changed, 112 insertions, 47 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8fadd8eaf601..259448182272 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -57,6 +57,8 @@ module_param(napi_tx, bool, 0644); #define VIRTIO_XDP_TX BIT(0) #define VIRTIO_XDP_REDIR BIT(1) +#define VIRTIO_XDP_FLAG BIT(0) + /* RX packet size EWMA. The average packet size is used to determine the packet * buffer size when refilling RX rings. As the entire RX ring may be refilled * at once, the weight is chosen so that the EWMA will be insensitive to short- @@ -252,6 +254,21 @@ struct padded_vnet_hdr { char padding[4]; }; +static bool is_xdp_frame(void *ptr) +{ + return (unsigned long)ptr & VIRTIO_XDP_FLAG; +} + +static void *xdp_to_ptr(struct xdp_frame *ptr) +{ + return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); +} + +static struct xdp_frame *ptr_to_xdp(void *ptr) +{ + return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); +} + /* Converting between virtqueue no. and kernel tx/rx queue no. * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq */ @@ -462,7 +479,8 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, sg_init_one(sq->sg, xdpf->data, xdpf->len); - err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC); + err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf), + GFP_ATOMIC); if (unlikely(err)) return -ENOSPC; /* Caller handle free/refcnt */ @@ -482,36 +500,37 @@ static int virtnet_xdp_xmit(struct net_device *dev, { struct virtnet_info *vi = netdev_priv(dev); struct receive_queue *rq = vi->rq; - struct xdp_frame *xdpf_sent; struct bpf_prog *xdp_prog; struct send_queue *sq; unsigned int len; int drops = 0; int kicks = 0; int ret, err; + void *ptr; int i; - sq = virtnet_xdp_sq(vi); - - if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { - ret = -EINVAL; - drops = n; - goto out; - } - /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this * indicate XDP resources have been successfully allocated. */ xdp_prog = rcu_dereference(rq->xdp_prog); - if (!xdp_prog) { - ret = -ENXIO; + if (!xdp_prog) + return -ENXIO; + + sq = virtnet_xdp_sq(vi); + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { + ret = -EINVAL; drops = n; goto out; } /* Free up any pending old buffers before queueing new ones. */ - while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) - xdp_return_frame(xdpf_sent); + while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { + if (likely(is_xdp_frame(ptr))) + xdp_return_frame(ptr_to_xdp(ptr)); + else + napi_consume_skb(ptr, false); + } for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; @@ -1332,18 +1351,26 @@ static int virtnet_receive(struct receive_queue *rq, int budget, static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) { - struct sk_buff *skb; unsigned int len; unsigned int packets = 0; unsigned int bytes = 0; + void *ptr; - while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) { - pr_debug("Sent skb %p\n", skb); + while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { + if (likely(!is_xdp_frame(ptr))) { + struct sk_buff *skb = ptr; - bytes += skb->len; - packets++; + pr_debug("Sent skb %p\n", skb); + + bytes += skb->len; + napi_consume_skb(skb, in_napi); + } else { + struct xdp_frame *frame = ptr_to_xdp(ptr); - napi_consume_skb(skb, in_napi); + bytes += frame->len; + xdp_return_frame(frame); + } + packets++; } /* Avoid overhead when no packets have been processed @@ -1358,6 +1385,16 @@ static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) u64_stats_update_end(&sq->stats.syncp); } +static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) +{ + if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) + return false; + else if (q < vi->curr_queue_pairs) + return true; + else + return false; +} + static void virtnet_poll_cleantx(struct receive_queue *rq) { struct virtnet_info *vi = rq->vq->vdev->priv; @@ -1365,7 +1402,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) struct send_queue *sq = &vi->sq[index]; struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index); - if (!sq->napi.weight) + if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) return; if (__netif_tx_trylock(txq)) { @@ -1442,8 +1479,16 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) { struct send_queue *sq = container_of(napi, struct send_queue, napi); struct virtnet_info *vi = sq->vq->vdev->priv; - struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq)); + unsigned int index = vq2txq(sq->vq); + struct netdev_queue *txq; + if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { + /* We don't need to enable cb for XDP */ + napi_complete_done(napi, 0); + return 0; + } + + txq = netdev_get_tx_queue(vi->dev, index); __netif_tx_lock(txq, raw_smp_processor_id()); free_old_xmit_skbs(sq, true); __netif_tx_unlock(txq); @@ -2395,6 +2440,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, return -ENOMEM; } + old_prog = rtnl_dereference(vi->rq[0].xdp_prog); + if (!prog && !old_prog) + return 0; + if (prog) { prog = bpf_prog_add(prog, vi->max_queue_pairs - 1); if (IS_ERR(prog)) @@ -2402,36 +2451,62 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, } /* Make sure NAPI is not using any XDP TX queues for RX. */ - if (netif_running(dev)) - for (i = 0; i < vi->max_queue_pairs; i++) + if (netif_running(dev)) { + for (i = 0; i < vi->max_queue_pairs; i++) { napi_disable(&vi->rq[i].napi); + virtnet_napi_tx_disable(&vi->sq[i].napi); + } + } + + if (!prog) { + for (i = 0; i < vi->max_queue_pairs; i++) { + rcu_assign_pointer(vi->rq[i].xdp_prog, prog); + if (i == 0) + virtnet_restore_guest_offloads(vi); + } + synchronize_net(); + } - netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); err = _virtnet_set_queues(vi, curr_qp + xdp_qp); if (err) goto err; + netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); vi->xdp_queue_pairs = xdp_qp; - for (i = 0; i < vi->max_queue_pairs; i++) { - old_prog = rtnl_dereference(vi->rq[i].xdp_prog); - rcu_assign_pointer(vi->rq[i].xdp_prog, prog); - if (i == 0) { - if (!old_prog) + if (prog) { + for (i = 0; i < vi->max_queue_pairs; i++) { + rcu_assign_pointer(vi->rq[i].xdp_prog, prog); + if (i == 0 && !old_prog) virtnet_clear_guest_offloads(vi); - if (!prog) - virtnet_restore_guest_offloads(vi); } + } + + for (i = 0; i < vi->max_queue_pairs; i++) { if (old_prog) bpf_prog_put(old_prog); - if (netif_running(dev)) + if (netif_running(dev)) { virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + virtnet_napi_tx_enable(vi, vi->sq[i].vq, + &vi->sq[i].napi); + } } return 0; err: - for (i = 0; i < vi->max_queue_pairs; i++) - virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + if (!prog) { + virtnet_clear_guest_offloads(vi); + for (i = 0; i < vi->max_queue_pairs; i++) + rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); + } + + if (netif_running(dev)) { + for (i = 0; i < vi->max_queue_pairs; i++) { + virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + virtnet_napi_tx_enable(vi, vi->sq[i].vq, + &vi->sq[i].napi); + } + } if (prog) bpf_prog_sub(prog, vi->max_queue_pairs - 1); return err; @@ -2613,16 +2688,6 @@ static void free_receive_page_frags(struct virtnet_info *vi) put_page(vi->rq[i].alloc_frag.page); } -static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) -{ - if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) - return false; - else if (q < vi->curr_queue_pairs) - return true; - else - return false; -} - static void free_unused_bufs(struct virtnet_info *vi) { void *buf; @@ -2631,10 +2696,10 @@ static void free_unused_bufs(struct virtnet_info *vi) for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->sq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (!is_xdp_raw_buffer_queue(vi, i)) + if (!is_xdp_frame(buf)) dev_kfree_skb(buf); else - put_page(virt_to_head_page(buf)); + xdp_return_frame(ptr_to_xdp(buf)); } } |