summaryrefslogtreecommitdiffstats
path: root/net/xdp
diff options
context:
space:
mode:
Diffstat (limited to 'net/xdp')
-rw-r--r--net/xdp/xdp_umem.c106
-rw-r--r--net/xdp/xdp_umem.h12
-rw-r--r--net/xdp/xdp_umem_props.h14
-rw-r--r--net/xdp/xsk.c56
-rw-r--r--net/xdp/xsk_queue.c60
-rw-r--r--net/xdp/xsk_queue.h16
6 files changed, 170 insertions, 94 deletions
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index bfe2dbea480b..a264cf2accd0 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -32,37 +32,49 @@ void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
{
unsigned long flags;
- if (xs->dev) {
- spin_lock_irqsave(&umem->xsk_list_lock, flags);
- list_del_rcu(&xs->list);
- spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
-
- if (umem->zc)
- synchronize_net();
- }
+ spin_lock_irqsave(&umem->xsk_list_lock, flags);
+ list_del_rcu(&xs->list);
+ spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
}
-int xdp_umem_query(struct net_device *dev, u16 queue_id)
+/* The umem is stored both in the _rx struct and the _tx struct as we do
+ * not know if the device has more tx queues than rx, or the opposite.
+ * This might also change during run time.
+ */
+static void xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem,
+ u16 queue_id)
{
- struct netdev_bpf bpf;
+ if (queue_id < dev->real_num_rx_queues)
+ dev->_rx[queue_id].umem = umem;
+ if (queue_id < dev->real_num_tx_queues)
+ dev->_tx[queue_id].umem = umem;
+}
- ASSERT_RTNL();
+struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
+ u16 queue_id)
+{
+ if (queue_id < dev->real_num_rx_queues)
+ return dev->_rx[queue_id].umem;
+ if (queue_id < dev->real_num_tx_queues)
+ return dev->_tx[queue_id].umem;
- memset(&bpf, 0, sizeof(bpf));
- bpf.command = XDP_QUERY_XSK_UMEM;
- bpf.xsk.queue_id = queue_id;
+ return NULL;
+}
- if (!dev->netdev_ops->ndo_bpf)
- return 0;
- return dev->netdev_ops->ndo_bpf(dev, &bpf) ?: !!bpf.xsk.umem;
+static void xdp_clear_umem_at_qid(struct net_device *dev, u16 queue_id)
+{
+ if (queue_id < dev->real_num_rx_queues)
+ dev->_rx[queue_id].umem = NULL;
+ if (queue_id < dev->real_num_tx_queues)
+ dev->_tx[queue_id].umem = NULL;
}
int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
- u32 queue_id, u16 flags)
+ u16 queue_id, u16 flags)
{
bool force_zc, force_copy;
struct netdev_bpf bpf;
- int err;
+ int err = 0;
force_zc = flags & XDP_ZEROCOPY;
force_copy = flags & XDP_COPY;
@@ -70,19 +82,23 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
if (force_zc && force_copy)
return -EINVAL;
- if (force_copy)
- return 0;
-
- if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit)
- return force_zc ? -EOPNOTSUPP : 0; /* fail or fallback */
+ rtnl_lock();
+ if (xdp_get_umem_from_qid(dev, queue_id)) {
+ err = -EBUSY;
+ goto out_rtnl_unlock;
+ }
- bpf.command = XDP_QUERY_XSK_UMEM;
+ xdp_reg_umem_at_qid(dev, umem, queue_id);
+ umem->dev = dev;
+ umem->queue_id = queue_id;
+ if (force_copy)
+ /* For copy-mode, we are done. */
+ goto out_rtnl_unlock;
- rtnl_lock();
- err = xdp_umem_query(dev, queue_id);
- if (err) {
- err = err < 0 ? -EOPNOTSUPP : -EBUSY;
- goto err_rtnl_unlock;
+ if (!dev->netdev_ops->ndo_bpf ||
+ !dev->netdev_ops->ndo_xsk_async_xmit) {
+ err = -EOPNOTSUPP;
+ goto err_unreg_umem;
}
bpf.command = XDP_SETUP_XSK_UMEM;
@@ -91,18 +107,20 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
err = dev->netdev_ops->ndo_bpf(dev, &bpf);
if (err)
- goto err_rtnl_unlock;
+ goto err_unreg_umem;
rtnl_unlock();
dev_hold(dev);
- umem->dev = dev;
- umem->queue_id = queue_id;
umem->zc = true;
return 0;
-err_rtnl_unlock:
+err_unreg_umem:
+ xdp_clear_umem_at_qid(dev, queue_id);
+ if (!force_zc)
+ err = 0; /* fallback to copy mode */
+out_rtnl_unlock:
rtnl_unlock();
- return force_zc ? err : 0; /* fail or fallback */
+ return err;
}
static void xdp_umem_clear_dev(struct xdp_umem *umem)
@@ -110,7 +128,7 @@ static void xdp_umem_clear_dev(struct xdp_umem *umem)
struct netdev_bpf bpf;
int err;
- if (umem->dev) {
+ if (umem->zc) {
bpf.command = XDP_SETUP_XSK_UMEM;
bpf.xsk.umem = NULL;
bpf.xsk.queue_id = umem->queue_id;
@@ -121,9 +139,17 @@ static void xdp_umem_clear_dev(struct xdp_umem *umem)
if (err)
WARN(1, "failed to disable umem!\n");
+ }
+
+ if (umem->dev) {
+ rtnl_lock();
+ xdp_clear_umem_at_qid(umem->dev, umem->queue_id);
+ rtnl_unlock();
+ }
+ if (umem->zc) {
dev_put(umem->dev);
- umem->dev = NULL;
+ umem->zc = false;
}
}
@@ -167,6 +193,8 @@ static void xdp_umem_release(struct xdp_umem *umem)
umem->cq = NULL;
}
+ xsk_reuseq_destroy(umem);
+
xdp_umem_unpin_pages(umem);
task = get_pid_task(umem->pid, PIDTYPE_PID);
@@ -314,8 +342,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->pid = get_task_pid(current, PIDTYPE_PID);
umem->address = (unsigned long)addr;
- umem->props.chunk_mask = ~((u64)chunk_size - 1);
- umem->props.size = size;
+ umem->chunk_mask = ~((u64)chunk_size - 1);
+ umem->size = size;
umem->headroom = headroom;
umem->chunk_size_nohr = chunk_size - headroom;
umem->npgs = size / PAGE_SIZE;
diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h
index f11560334f88..27603227601b 100644
--- a/net/xdp/xdp_umem.h
+++ b/net/xdp/xdp_umem.h
@@ -8,18 +8,8 @@
#include <net/xdp_sock.h>
-static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
-{
- return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1));
-}
-
-static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
-{
- return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1));
-}
-
int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
- u32 queue_id, u16 flags);
+ u16 queue_id, u16 flags);
bool xdp_umem_validate_queues(struct xdp_umem *umem);
void xdp_get_umem(struct xdp_umem *umem);
void xdp_put_umem(struct xdp_umem *umem);
diff --git a/net/xdp/xdp_umem_props.h b/net/xdp/xdp_umem_props.h
deleted file mode 100644
index 40eab10dfc49..000000000000
--- a/net/xdp/xdp_umem_props.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* XDP user-space packet buffer
- * Copyright(c) 2018 Intel Corporation.
- */
-
-#ifndef XDP_UMEM_PROPS_H_
-#define XDP_UMEM_PROPS_H_
-
-struct xdp_umem_props {
- u64 chunk_mask;
- u64 size;
-};
-
-#endif /* XDP_UMEM_PROPS_H_ */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 4e937cd7c17d..07156f43d295 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -55,20 +55,30 @@ EXPORT_SYMBOL(xsk_umem_discard_addr);
static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
- void *buffer;
+ void *to_buf, *from_buf;
+ u32 metalen;
u64 addr;
int err;
if (!xskq_peek_addr(xs->umem->fq, &addr) ||
- len > xs->umem->chunk_size_nohr) {
+ len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
xs->rx_dropped++;
return -ENOSPC;
}
addr += xs->umem->headroom;
- buffer = xdp_umem_get_data(xs->umem, addr);
- memcpy(buffer, xdp->data, len);
+ if (unlikely(xdp_data_meta_unsupported(xdp))) {
+ from_buf = xdp->data;
+ metalen = 0;
+ } else {
+ from_buf = xdp->data_meta;
+ metalen = xdp->data - xdp->data_meta;
+ }
+
+ to_buf = xdp_umem_get_data(xs->umem, addr);
+ memcpy(to_buf, from_buf, len + metalen);
+ addr += metalen;
err = xskq_produce_batch_desc(xs->rx, addr, len);
if (!err) {
xskq_discard_addr(xs->umem->fq);
@@ -111,6 +121,7 @@ void xsk_flush(struct xdp_sock *xs)
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
+ u32 metalen = xdp->data - xdp->data_meta;
u32 len = xdp->data_end - xdp->data;
void *buffer;
u64 addr;
@@ -120,7 +131,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
return -EINVAL;
if (!xskq_peek_addr(xs->umem->fq, &addr) ||
- len > xs->umem->chunk_size_nohr) {
+ len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
xs->rx_dropped++;
return -ENOSPC;
}
@@ -128,7 +139,8 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
addr += xs->umem->headroom;
buffer = xdp_umem_get_data(xs->umem, addr);
- memcpy(buffer, xdp->data, len);
+ memcpy(buffer, xdp->data_meta, len + metalen);
+ addr += metalen;
err = xskq_produce_batch_desc(xs->rx, addr, len);
if (!err) {
xskq_discard_addr(xs->umem->fq);
@@ -343,12 +355,18 @@ static int xsk_release(struct socket *sock)
local_bh_enable();
if (xs->dev) {
+ struct net_device *dev = xs->dev;
+
/* Wait for driver to stop using the xdp socket. */
- synchronize_net();
- dev_put(xs->dev);
+ xdp_del_sk_umem(xs->umem, xs);
xs->dev = NULL;
+ synchronize_net();
+ dev_put(dev);
}
+ xskq_destroy(xs->rx);
+ xskq_destroy(xs->tx);
+
sock_orphan(sk);
sock->sk = NULL;
@@ -407,13 +425,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
}
qid = sxdp->sxdp_queue_id;
-
- if ((xs->rx && qid >= dev->real_num_rx_queues) ||
- (xs->tx && qid >= dev->real_num_tx_queues)) {
- err = -EINVAL;
- goto out_unlock;
- }
-
flags = sxdp->sxdp_flags;
if (flags & XDP_SHARED_UMEM) {
@@ -458,8 +469,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
goto out_unlock;
} else {
/* This xsk has its own umem. */
- xskq_set_umem(xs->umem->fq, &xs->umem->props);
- xskq_set_umem(xs->umem->cq, &xs->umem->props);
+ xskq_set_umem(xs->umem->fq, xs->umem->size,
+ xs->umem->chunk_mask);
+ xskq_set_umem(xs->umem->cq, xs->umem->size,
+ xs->umem->chunk_mask);
err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
if (err)
@@ -469,8 +482,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xs->dev = dev;
xs->zc = xs->umem->zc;
xs->queue_id = qid;
- xskq_set_umem(xs->rx, &xs->umem->props);
- xskq_set_umem(xs->tx, &xs->umem->props);
+ xskq_set_umem(xs->rx, xs->umem->size, xs->umem->chunk_mask);
+ xskq_set_umem(xs->tx, xs->umem->size, xs->umem->chunk_mask);
xdp_add_sk_umem(xs->umem, xs);
out_unlock:
@@ -707,9 +720,6 @@ static void xsk_destruct(struct sock *sk)
if (!sock_flag(sk, SOCK_DEAD))
return;
- xskq_destroy(xs->rx);
- xskq_destroy(xs->tx);
- xdp_del_sk_umem(xs->umem, xs);
xdp_put_umem(xs->umem);
sk_refcnt_debug_dec(sk);
@@ -744,6 +754,8 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
sk->sk_destruct = xsk_destruct;
sk_refcnt_debug_inc(sk);
+ sock_set_flag(sk, SOCK_RCU_FREE);
+
xs = xdp_sk(sk);
mutex_init(&xs->mutex);
spin_lock_init(&xs->tx_completion_lock);
diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c
index 6c32e92e98fc..b66504592d9b 100644
--- a/net/xdp/xsk_queue.c
+++ b/net/xdp/xsk_queue.c
@@ -3,16 +3,19 @@
* Copyright(c) 2018 Intel Corporation.
*/
+#include <linux/log2.h>
#include <linux/slab.h>
+#include <linux/overflow.h>
#include "xsk_queue.h"
-void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props)
+void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask)
{
if (!q)
return;
- q->umem_props = *umem_props;
+ q->size = size;
+ q->chunk_mask = chunk_mask;
}
static u32 xskq_umem_get_ring_size(struct xsk_queue *q)
@@ -61,3 +64,56 @@ void xskq_destroy(struct xsk_queue *q)
page_frag_free(q->ring);
kfree(q);
}
+
+struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries)
+{
+ struct xdp_umem_fq_reuse *newq;
+
+ /* Check for overflow */
+ if (nentries > (u32)roundup_pow_of_two(nentries))
+ return NULL;
+ nentries = roundup_pow_of_two(nentries);
+
+ newq = kvmalloc(struct_size(newq, handles, nentries), GFP_KERNEL);
+ if (!newq)
+ return NULL;
+ memset(newq, 0, offsetof(typeof(*newq), handles));
+
+ newq->nentries = nentries;
+ return newq;
+}
+EXPORT_SYMBOL_GPL(xsk_reuseq_prepare);
+
+struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
+ struct xdp_umem_fq_reuse *newq)
+{
+ struct xdp_umem_fq_reuse *oldq = umem->fq_reuse;
+
+ if (!oldq) {
+ umem->fq_reuse = newq;
+ return NULL;
+ }
+
+ if (newq->nentries < oldq->length)
+ return newq;
+
+ memcpy(newq->handles, oldq->handles,
+ array_size(oldq->length, sizeof(u64)));
+ newq->length = oldq->length;
+
+ umem->fq_reuse = newq;
+ return oldq;
+}
+EXPORT_SYMBOL_GPL(xsk_reuseq_swap);
+
+void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)
+{
+ kvfree(rq);
+}
+EXPORT_SYMBOL_GPL(xsk_reuseq_free);
+
+void xsk_reuseq_destroy(struct xdp_umem *umem)
+{
+ xsk_reuseq_free(umem->fq_reuse);
+ umem->fq_reuse = NULL;
+}
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 8a64b150be54..bcb5cbb40419 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -31,7 +31,8 @@ struct xdp_umem_ring {
};
struct xsk_queue {
- struct xdp_umem_props umem_props;
+ u64 chunk_mask;
+ u64 size;
u32 ring_mask;
u32 nentries;
u32 prod_head;
@@ -78,7 +79,7 @@ static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
{
- if (addr >= q->umem_props.size) {
+ if (addr >= q->size) {
q->invalid_descs++;
return false;
}
@@ -92,7 +93,7 @@ static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr)
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
unsigned int idx = q->cons_tail & q->ring_mask;
- *addr = READ_ONCE(ring->desc[idx]) & q->umem_props.chunk_mask;
+ *addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask;
if (xskq_is_valid_addr(q, *addr))
return addr;
@@ -173,8 +174,8 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
if (!xskq_is_valid_addr(q, d->addr))
return false;
- if (((d->addr + d->len) & q->umem_props.chunk_mask) !=
- (d->addr & q->umem_props.chunk_mask)) {
+ if (((d->addr + d->len) & q->chunk_mask) !=
+ (d->addr & q->chunk_mask)) {
q->invalid_descs++;
return false;
}
@@ -253,8 +254,11 @@ static inline bool xskq_empty_desc(struct xsk_queue *q)
return xskq_nb_free(q, q->prod_tail, q->nentries) == q->nentries;
}
-void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props);
+void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask);
struct xsk_queue *xskq_create(u32 nentries, bool umem_queue);
void xskq_destroy(struct xsk_queue *q_ops);
+/* Executed by the core when the entire UMEM gets freed */
+void xsk_reuseq_destroy(struct xdp_umem *umem);
+
#endif /* _LINUX_XSK_QUEUE_H */