summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-01-30 14:02:43 -0800
committerDavid S. Miller <davem@davemloft.net>2019-01-30 14:02:43 -0800
commita10cc84791b1eaa8471ff4e76a1bc8f399800a0b (patch)
treec10f040dd73559e01d2511127d7bac7b55aa7249
parent41ef81be794f8d3f0edb7f15f952113ec999cbd6 (diff)
parent5050471d35d1316ba32dfcbb409978337eb9e75e (diff)
downloadlinux-a10cc84791b1eaa8471ff4e76a1bc8f399800a0b.tar.bz2
Merge branch 'virtio_net-Fix-problems-around-XDP-tx-and-napi_tx'
Toshiaki Makita says: ==================== virtio_net: Fix problems around XDP tx and napi_tx While I'm looking into how to account standard tx counters on XDP tx processing, I found several bugs around XDP tx and napi_tx. Patch1: Fix oops on error path. Patch2 depends on this. Patch2: Fix memory corruption on freeing xdp_frames with napi_tx enabled. Patch3: Minor fix patch5 depends on. Patch4: Fix memory corruption on processing xdp_frames when XDP is disabled. Also patch5 depends on this. Patch5: Fix memory corruption on processing xdp_frames while XDP is being disabled. Patch6: Minor fix patch7 depends on. Patch7: Fix memory corruption on freeing sk_buff or xdp_frames when a normal queue is reused for XDP and vise versa. v2: - patch5: Make rcu_assign_pointer/synchronize_net conditional instead of _virtnet_set_queues. - patch7: Use napi_consume_skb() instead of dev_consume_skb_any() ==================== Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/virtio_net.c159
1 files changed, 112 insertions, 47 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 8fadd8eaf601..259448182272 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -57,6 +57,8 @@ module_param(napi_tx, bool, 0644);
#define VIRTIO_XDP_TX BIT(0)
#define VIRTIO_XDP_REDIR BIT(1)
+#define VIRTIO_XDP_FLAG BIT(0)
+
/* RX packet size EWMA. The average packet size is used to determine the packet
* buffer size when refilling RX rings. As the entire RX ring may be refilled
* at once, the weight is chosen so that the EWMA will be insensitive to short-
@@ -252,6 +254,21 @@ struct padded_vnet_hdr {
char padding[4];
};
+static bool is_xdp_frame(void *ptr)
+{
+ return (unsigned long)ptr & VIRTIO_XDP_FLAG;
+}
+
+static void *xdp_to_ptr(struct xdp_frame *ptr)
+{
+ return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG);
+}
+
+static struct xdp_frame *ptr_to_xdp(void *ptr)
+{
+ return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
+}
+
/* Converting between virtqueue no. and kernel tx/rx queue no.
* 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
*/
@@ -462,7 +479,8 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
sg_init_one(sq->sg, xdpf->data, xdpf->len);
- err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC);
+ err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf),
+ GFP_ATOMIC);
if (unlikely(err))
return -ENOSPC; /* Caller handle free/refcnt */
@@ -482,36 +500,37 @@ static int virtnet_xdp_xmit(struct net_device *dev,
{
struct virtnet_info *vi = netdev_priv(dev);
struct receive_queue *rq = vi->rq;
- struct xdp_frame *xdpf_sent;
struct bpf_prog *xdp_prog;
struct send_queue *sq;
unsigned int len;
int drops = 0;
int kicks = 0;
int ret, err;
+ void *ptr;
int i;
- sq = virtnet_xdp_sq(vi);
-
- if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
- ret = -EINVAL;
- drops = n;
- goto out;
- }
-
/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
* indicate XDP resources have been successfully allocated.
*/
xdp_prog = rcu_dereference(rq->xdp_prog);
- if (!xdp_prog) {
- ret = -ENXIO;
+ if (!xdp_prog)
+ return -ENXIO;
+
+ sq = virtnet_xdp_sq(vi);
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
+ ret = -EINVAL;
drops = n;
goto out;
}
/* Free up any pending old buffers before queueing new ones. */
- while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
- xdp_return_frame(xdpf_sent);
+ while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+ if (likely(is_xdp_frame(ptr)))
+ xdp_return_frame(ptr_to_xdp(ptr));
+ else
+ napi_consume_skb(ptr, false);
+ }
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
@@ -1332,18 +1351,26 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
{
- struct sk_buff *skb;
unsigned int len;
unsigned int packets = 0;
unsigned int bytes = 0;
+ void *ptr;
- while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
- pr_debug("Sent skb %p\n", skb);
+ while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+ if (likely(!is_xdp_frame(ptr))) {
+ struct sk_buff *skb = ptr;
- bytes += skb->len;
- packets++;
+ pr_debug("Sent skb %p\n", skb);
+
+ bytes += skb->len;
+ napi_consume_skb(skb, in_napi);
+ } else {
+ struct xdp_frame *frame = ptr_to_xdp(ptr);
- napi_consume_skb(skb, in_napi);
+ bytes += frame->len;
+ xdp_return_frame(frame);
+ }
+ packets++;
}
/* Avoid overhead when no packets have been processed
@@ -1358,6 +1385,16 @@ static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
u64_stats_update_end(&sq->stats.syncp);
}
+static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
+{
+ if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
+ return false;
+ else if (q < vi->curr_queue_pairs)
+ return true;
+ else
+ return false;
+}
+
static void virtnet_poll_cleantx(struct receive_queue *rq)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
@@ -1365,7 +1402,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
struct send_queue *sq = &vi->sq[index];
struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
- if (!sq->napi.weight)
+ if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
return;
if (__netif_tx_trylock(txq)) {
@@ -1442,8 +1479,16 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
{
struct send_queue *sq = container_of(napi, struct send_queue, napi);
struct virtnet_info *vi = sq->vq->vdev->priv;
- struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq));
+ unsigned int index = vq2txq(sq->vq);
+ struct netdev_queue *txq;
+ if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
+ /* We don't need to enable cb for XDP */
+ napi_complete_done(napi, 0);
+ return 0;
+ }
+
+ txq = netdev_get_tx_queue(vi->dev, index);
__netif_tx_lock(txq, raw_smp_processor_id());
free_old_xmit_skbs(sq, true);
__netif_tx_unlock(txq);
@@ -2395,6 +2440,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
return -ENOMEM;
}
+ old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
+ if (!prog && !old_prog)
+ return 0;
+
if (prog) {
prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
if (IS_ERR(prog))
@@ -2402,36 +2451,62 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
}
/* Make sure NAPI is not using any XDP TX queues for RX. */
- if (netif_running(dev))
- for (i = 0; i < vi->max_queue_pairs; i++)
+ if (netif_running(dev)) {
+ for (i = 0; i < vi->max_queue_pairs; i++) {
napi_disable(&vi->rq[i].napi);
+ virtnet_napi_tx_disable(&vi->sq[i].napi);
+ }
+ }
+
+ if (!prog) {
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
+ if (i == 0)
+ virtnet_restore_guest_offloads(vi);
+ }
+ synchronize_net();
+ }
- netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
if (err)
goto err;
+ netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
vi->xdp_queue_pairs = xdp_qp;
- for (i = 0; i < vi->max_queue_pairs; i++) {
- old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
- rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
- if (i == 0) {
- if (!old_prog)
+ if (prog) {
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
+ if (i == 0 && !old_prog)
virtnet_clear_guest_offloads(vi);
- if (!prog)
- virtnet_restore_guest_offloads(vi);
}
+ }
+
+ for (i = 0; i < vi->max_queue_pairs; i++) {
if (old_prog)
bpf_prog_put(old_prog);
- if (netif_running(dev))
+ if (netif_running(dev)) {
virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
+ virtnet_napi_tx_enable(vi, vi->sq[i].vq,
+ &vi->sq[i].napi);
+ }
}
return 0;
err:
- for (i = 0; i < vi->max_queue_pairs; i++)
- virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
+ if (!prog) {
+ virtnet_clear_guest_offloads(vi);
+ for (i = 0; i < vi->max_queue_pairs; i++)
+ rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
+ }
+
+ if (netif_running(dev)) {
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
+ virtnet_napi_tx_enable(vi, vi->sq[i].vq,
+ &vi->sq[i].napi);
+ }
+ }
if (prog)
bpf_prog_sub(prog, vi->max_queue_pairs - 1);
return err;
@@ -2613,16 +2688,6 @@ static void free_receive_page_frags(struct virtnet_info *vi)
put_page(vi->rq[i].alloc_frag.page);
}
-static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
-{
- if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
- return false;
- else if (q < vi->curr_queue_pairs)
- return true;
- else
- return false;
-}
-
static void free_unused_bufs(struct virtnet_info *vi)
{
void *buf;
@@ -2631,10 +2696,10 @@ static void free_unused_bufs(struct virtnet_info *vi)
for (i = 0; i < vi->max_queue_pairs; i++) {
struct virtqueue *vq = vi->sq[i].vq;
while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
- if (!is_xdp_raw_buffer_queue(vi, i))
+ if (!is_xdp_frame(buf))
dev_kfree_skb(buf);
else
- put_page(virt_to_head_page(buf));
+ xdp_return_frame(ptr_to_xdp(buf));
}
}