diff options
Diffstat (limited to 'net/xdp')
-rw-r--r-- | net/xdp/xdp_umem.c | 106 | ||||
-rw-r--r-- | net/xdp/xdp_umem.h | 12 | ||||
-rw-r--r-- | net/xdp/xdp_umem_props.h | 14 | ||||
-rw-r--r-- | net/xdp/xsk.c | 56 | ||||
-rw-r--r-- | net/xdp/xsk_queue.c | 60 | ||||
-rw-r--r-- | net/xdp/xsk_queue.h | 16 |
6 files changed, 170 insertions, 94 deletions
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index bfe2dbea480b..a264cf2accd0 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -32,37 +32,49 @@ void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) { unsigned long flags; - if (xs->dev) { - spin_lock_irqsave(&umem->xsk_list_lock, flags); - list_del_rcu(&xs->list); - spin_unlock_irqrestore(&umem->xsk_list_lock, flags); - - if (umem->zc) - synchronize_net(); - } + spin_lock_irqsave(&umem->xsk_list_lock, flags); + list_del_rcu(&xs->list); + spin_unlock_irqrestore(&umem->xsk_list_lock, flags); } -int xdp_umem_query(struct net_device *dev, u16 queue_id) +/* The umem is stored both in the _rx struct and the _tx struct as we do + * not know if the device has more tx queues than rx, or the opposite. + * This might also change during run time. + */ +static void xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem, + u16 queue_id) { - struct netdev_bpf bpf; + if (queue_id < dev->real_num_rx_queues) + dev->_rx[queue_id].umem = umem; + if (queue_id < dev->real_num_tx_queues) + dev->_tx[queue_id].umem = umem; +} - ASSERT_RTNL(); +struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, + u16 queue_id) +{ + if (queue_id < dev->real_num_rx_queues) + return dev->_rx[queue_id].umem; + if (queue_id < dev->real_num_tx_queues) + return dev->_tx[queue_id].umem; - memset(&bpf, 0, sizeof(bpf)); - bpf.command = XDP_QUERY_XSK_UMEM; - bpf.xsk.queue_id = queue_id; + return NULL; +} - if (!dev->netdev_ops->ndo_bpf) - return 0; - return dev->netdev_ops->ndo_bpf(dev, &bpf) ?: !!bpf.xsk.umem; +static void xdp_clear_umem_at_qid(struct net_device *dev, u16 queue_id) +{ + if (queue_id < dev->real_num_rx_queues) + dev->_rx[queue_id].umem = NULL; + if (queue_id < dev->real_num_tx_queues) + dev->_tx[queue_id].umem = NULL; } int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, - u32 queue_id, u16 flags) + u16 queue_id, u16 flags) { bool force_zc, force_copy; struct netdev_bpf bpf; - int err; + int err = 0; force_zc = flags & XDP_ZEROCOPY; force_copy = flags & XDP_COPY; @@ -70,19 +82,23 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, if (force_zc && force_copy) return -EINVAL; - if (force_copy) - return 0; - - if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit) - return force_zc ? -EOPNOTSUPP : 0; /* fail or fallback */ + rtnl_lock(); + if (xdp_get_umem_from_qid(dev, queue_id)) { + err = -EBUSY; + goto out_rtnl_unlock; + } - bpf.command = XDP_QUERY_XSK_UMEM; + xdp_reg_umem_at_qid(dev, umem, queue_id); + umem->dev = dev; + umem->queue_id = queue_id; + if (force_copy) + /* For copy-mode, we are done. */ + goto out_rtnl_unlock; - rtnl_lock(); - err = xdp_umem_query(dev, queue_id); - if (err) { - err = err < 0 ? -EOPNOTSUPP : -EBUSY; - goto err_rtnl_unlock; + if (!dev->netdev_ops->ndo_bpf || + !dev->netdev_ops->ndo_xsk_async_xmit) { + err = -EOPNOTSUPP; + goto err_unreg_umem; } bpf.command = XDP_SETUP_XSK_UMEM; @@ -91,18 +107,20 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, err = dev->netdev_ops->ndo_bpf(dev, &bpf); if (err) - goto err_rtnl_unlock; + goto err_unreg_umem; rtnl_unlock(); dev_hold(dev); - umem->dev = dev; - umem->queue_id = queue_id; umem->zc = true; return 0; -err_rtnl_unlock: +err_unreg_umem: + xdp_clear_umem_at_qid(dev, queue_id); + if (!force_zc) + err = 0; /* fallback to copy mode */ +out_rtnl_unlock: rtnl_unlock(); - return force_zc ? err : 0; /* fail or fallback */ + return err; } static void xdp_umem_clear_dev(struct xdp_umem *umem) @@ -110,7 +128,7 @@ static void xdp_umem_clear_dev(struct xdp_umem *umem) struct netdev_bpf bpf; int err; - if (umem->dev) { + if (umem->zc) { bpf.command = XDP_SETUP_XSK_UMEM; bpf.xsk.umem = NULL; bpf.xsk.queue_id = umem->queue_id; @@ -121,9 +139,17 @@ static void xdp_umem_clear_dev(struct xdp_umem *umem) if (err) WARN(1, "failed to disable umem!\n"); + } + + if (umem->dev) { + rtnl_lock(); + xdp_clear_umem_at_qid(umem->dev, umem->queue_id); + rtnl_unlock(); + } + if (umem->zc) { dev_put(umem->dev); - umem->dev = NULL; + umem->zc = false; } } @@ -167,6 +193,8 @@ static void xdp_umem_release(struct xdp_umem *umem) umem->cq = NULL; } + xsk_reuseq_destroy(umem); + xdp_umem_unpin_pages(umem); task = get_pid_task(umem->pid, PIDTYPE_PID); @@ -314,8 +342,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) umem->pid = get_task_pid(current, PIDTYPE_PID); umem->address = (unsigned long)addr; - umem->props.chunk_mask = ~((u64)chunk_size - 1); - umem->props.size = size; + umem->chunk_mask = ~((u64)chunk_size - 1); + umem->size = size; umem->headroom = headroom; umem->chunk_size_nohr = chunk_size - headroom; umem->npgs = size / PAGE_SIZE; diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h index f11560334f88..27603227601b 100644 --- a/net/xdp/xdp_umem.h +++ b/net/xdp/xdp_umem.h @@ -8,18 +8,8 @@ #include <net/xdp_sock.h> -static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) -{ - return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1)); -} - -static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) -{ - return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1)); -} - int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, - u32 queue_id, u16 flags); + u16 queue_id, u16 flags); bool xdp_umem_validate_queues(struct xdp_umem *umem); void xdp_get_umem(struct xdp_umem *umem); void xdp_put_umem(struct xdp_umem *umem); diff --git a/net/xdp/xdp_umem_props.h b/net/xdp/xdp_umem_props.h deleted file mode 100644 index 40eab10dfc49..000000000000 --- a/net/xdp/xdp_umem_props.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* XDP user-space packet buffer - * Copyright(c) 2018 Intel Corporation. - */ - -#ifndef XDP_UMEM_PROPS_H_ -#define XDP_UMEM_PROPS_H_ - -struct xdp_umem_props { - u64 chunk_mask; - u64 size; -}; - -#endif /* XDP_UMEM_PROPS_H_ */ diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 4e937cd7c17d..07156f43d295 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -55,20 +55,30 @@ EXPORT_SYMBOL(xsk_umem_discard_addr); static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { - void *buffer; + void *to_buf, *from_buf; + u32 metalen; u64 addr; int err; if (!xskq_peek_addr(xs->umem->fq, &addr) || - len > xs->umem->chunk_size_nohr) { + len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) { xs->rx_dropped++; return -ENOSPC; } addr += xs->umem->headroom; - buffer = xdp_umem_get_data(xs->umem, addr); - memcpy(buffer, xdp->data, len); + if (unlikely(xdp_data_meta_unsupported(xdp))) { + from_buf = xdp->data; + metalen = 0; + } else { + from_buf = xdp->data_meta; + metalen = xdp->data - xdp->data_meta; + } + + to_buf = xdp_umem_get_data(xs->umem, addr); + memcpy(to_buf, from_buf, len + metalen); + addr += metalen; err = xskq_produce_batch_desc(xs->rx, addr, len); if (!err) { xskq_discard_addr(xs->umem->fq); @@ -111,6 +121,7 @@ void xsk_flush(struct xdp_sock *xs) int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { + u32 metalen = xdp->data - xdp->data_meta; u32 len = xdp->data_end - xdp->data; void *buffer; u64 addr; @@ -120,7 +131,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) return -EINVAL; if (!xskq_peek_addr(xs->umem->fq, &addr) || - len > xs->umem->chunk_size_nohr) { + len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) { xs->rx_dropped++; return -ENOSPC; } @@ -128,7 +139,8 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) addr += xs->umem->headroom; buffer = xdp_umem_get_data(xs->umem, addr); - memcpy(buffer, xdp->data, len); + memcpy(buffer, xdp->data_meta, len + metalen); + addr += metalen; err = xskq_produce_batch_desc(xs->rx, addr, len); if (!err) { xskq_discard_addr(xs->umem->fq); @@ -343,12 +355,18 @@ static int xsk_release(struct socket *sock) local_bh_enable(); if (xs->dev) { + struct net_device *dev = xs->dev; + /* Wait for driver to stop using the xdp socket. */ - synchronize_net(); - dev_put(xs->dev); + xdp_del_sk_umem(xs->umem, xs); xs->dev = NULL; + synchronize_net(); + dev_put(dev); } + xskq_destroy(xs->rx); + xskq_destroy(xs->tx); + sock_orphan(sk); sock->sk = NULL; @@ -407,13 +425,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) } qid = sxdp->sxdp_queue_id; - - if ((xs->rx && qid >= dev->real_num_rx_queues) || - (xs->tx && qid >= dev->real_num_tx_queues)) { - err = -EINVAL; - goto out_unlock; - } - flags = sxdp->sxdp_flags; if (flags & XDP_SHARED_UMEM) { @@ -458,8 +469,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) goto out_unlock; } else { /* This xsk has its own umem. */ - xskq_set_umem(xs->umem->fq, &xs->umem->props); - xskq_set_umem(xs->umem->cq, &xs->umem->props); + xskq_set_umem(xs->umem->fq, xs->umem->size, + xs->umem->chunk_mask); + xskq_set_umem(xs->umem->cq, xs->umem->size, + xs->umem->chunk_mask); err = xdp_umem_assign_dev(xs->umem, dev, qid, flags); if (err) @@ -469,8 +482,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xs->dev = dev; xs->zc = xs->umem->zc; xs->queue_id = qid; - xskq_set_umem(xs->rx, &xs->umem->props); - xskq_set_umem(xs->tx, &xs->umem->props); + xskq_set_umem(xs->rx, xs->umem->size, xs->umem->chunk_mask); + xskq_set_umem(xs->tx, xs->umem->size, xs->umem->chunk_mask); xdp_add_sk_umem(xs->umem, xs); out_unlock: @@ -707,9 +720,6 @@ static void xsk_destruct(struct sock *sk) if (!sock_flag(sk, SOCK_DEAD)) return; - xskq_destroy(xs->rx); - xskq_destroy(xs->tx); - xdp_del_sk_umem(xs->umem, xs); xdp_put_umem(xs->umem); sk_refcnt_debug_dec(sk); @@ -744,6 +754,8 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, sk->sk_destruct = xsk_destruct; sk_refcnt_debug_inc(sk); + sock_set_flag(sk, SOCK_RCU_FREE); + xs = xdp_sk(sk); mutex_init(&xs->mutex); spin_lock_init(&xs->tx_completion_lock); diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c index 6c32e92e98fc..b66504592d9b 100644 --- a/net/xdp/xsk_queue.c +++ b/net/xdp/xsk_queue.c @@ -3,16 +3,19 @@ * Copyright(c) 2018 Intel Corporation. */ +#include <linux/log2.h> #include <linux/slab.h> +#include <linux/overflow.h> #include "xsk_queue.h" -void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props) +void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask) { if (!q) return; - q->umem_props = *umem_props; + q->size = size; + q->chunk_mask = chunk_mask; } static u32 xskq_umem_get_ring_size(struct xsk_queue *q) @@ -61,3 +64,56 @@ void xskq_destroy(struct xsk_queue *q) page_frag_free(q->ring); kfree(q); } + +struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries) +{ + struct xdp_umem_fq_reuse *newq; + + /* Check for overflow */ + if (nentries > (u32)roundup_pow_of_two(nentries)) + return NULL; + nentries = roundup_pow_of_two(nentries); + + newq = kvmalloc(struct_size(newq, handles, nentries), GFP_KERNEL); + if (!newq) + return NULL; + memset(newq, 0, offsetof(typeof(*newq), handles)); + + newq->nentries = nentries; + return newq; +} +EXPORT_SYMBOL_GPL(xsk_reuseq_prepare); + +struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem, + struct xdp_umem_fq_reuse *newq) +{ + struct xdp_umem_fq_reuse *oldq = umem->fq_reuse; + + if (!oldq) { + umem->fq_reuse = newq; + return NULL; + } + + if (newq->nentries < oldq->length) + return newq; + + memcpy(newq->handles, oldq->handles, + array_size(oldq->length, sizeof(u64))); + newq->length = oldq->length; + + umem->fq_reuse = newq; + return oldq; +} +EXPORT_SYMBOL_GPL(xsk_reuseq_swap); + +void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq) +{ + kvfree(rq); +} +EXPORT_SYMBOL_GPL(xsk_reuseq_free); + +void xsk_reuseq_destroy(struct xdp_umem *umem) +{ + xsk_reuseq_free(umem->fq_reuse); + umem->fq_reuse = NULL; +} diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 8a64b150be54..bcb5cbb40419 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -31,7 +31,8 @@ struct xdp_umem_ring { }; struct xsk_queue { - struct xdp_umem_props umem_props; + u64 chunk_mask; + u64 size; u32 ring_mask; u32 nentries; u32 prod_head; @@ -78,7 +79,7 @@ static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt) static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr) { - if (addr >= q->umem_props.size) { + if (addr >= q->size) { q->invalid_descs++; return false; } @@ -92,7 +93,7 @@ static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr) struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; unsigned int idx = q->cons_tail & q->ring_mask; - *addr = READ_ONCE(ring->desc[idx]) & q->umem_props.chunk_mask; + *addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask; if (xskq_is_valid_addr(q, *addr)) return addr; @@ -173,8 +174,8 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d) if (!xskq_is_valid_addr(q, d->addr)) return false; - if (((d->addr + d->len) & q->umem_props.chunk_mask) != - (d->addr & q->umem_props.chunk_mask)) { + if (((d->addr + d->len) & q->chunk_mask) != + (d->addr & q->chunk_mask)) { q->invalid_descs++; return false; } @@ -253,8 +254,11 @@ static inline bool xskq_empty_desc(struct xsk_queue *q) return xskq_nb_free(q, q->prod_tail, q->nentries) == q->nentries; } -void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props); +void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask); struct xsk_queue *xskq_create(u32 nentries, bool umem_queue); void xskq_destroy(struct xsk_queue *q_ops); +/* Executed by the core when the entire UMEM gets freed */ +void xsk_reuseq_destroy(struct xdp_umem *umem); + #endif /* _LINUX_XSK_QUEUE_H */ |