diff options
author | David S. Miller <davem@davemloft.net> | 2018-01-07 21:26:31 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-01-07 21:26:31 -0500 |
commit | 7f0b800048b562d716372466ea8d9de648c422dd (patch) | |
tree | 8fbad920adc333fd00cbc3acaba09cdfa9b63fb3 /net/core/dev.c | |
parent | d0adb51edb73c94a595bfa9d9bd8b35977e74fbf (diff) | |
parent | 9be99badee761f0b2c065ecbd8bd54a96cbd0fa0 (diff) | |
download | linux-7f0b800048b562d716372466ea8d9de648c422dd.tar.bz2 |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says:
====================
pull-request: bpf-next 2018-01-07
The following pull-request contains BPF updates for your *net-next* tree.
The main changes are:
1) Add a start of a framework for extending struct xdp_buff without
having the overhead of populating every data at runtime. Idea
is to have a new per-queue struct xdp_rxq_info that holds read
mostly data (currently that is, queue number and a pointer to
the corresponding netdev) which is set up during rxqueue config
time. When a XDP program is invoked, struct xdp_buff holds a
pointer to struct xdp_rxq_info that the BPF program can then
walk. The user facing BPF program that uses struct xdp_md for
context can use these members directly, and the verifier rewrites
context access transparently by walking the xdp_rxq_info and
net_device pointers to load the data, from Jesper.
2) Redo the reporting of offload device information to user space
such that it works in combination with network namespaces. The
latter is reported through a device/inode tuple as similarly
done in other subsystems as well (e.g. perf) in order to identify
the namespace. For this to work, ns_get_path() has been generalized
such that the namespace can be retrieved not only from a specific
task (perf case), but also from a callback where we deduce the
netns (ns_common) from a netdevice. bpftool support using the new
uapi info and extensive test cases for test_offload.py in BPF
selftests have been added as well, from Jakub.
3) Add two bpftool improvements: i) properly report the bpftool
version such that it corresponds to the version from the kernel
source tree. So pick the right linux/version.h from the source
tree instead of the installed one. ii) fix bpftool and also
bpf_jit_disasm build with bintutils >= 2.9. The reason for the
build breakage is that binutils library changed the function
signature to select the disassembler. Given this is needed in
multiple tools, add a proper feature detection to the
tools/build/features infrastructure, from Roman.
4) Implement the BPF syscall command BPF_MAP_GET_NEXT_KEY for the
stacktrace map. It is currently unimplemented, but there are
use cases where user space needs to walk all stacktrace map
entries e.g. for dumping or deleting map entries w/o having to
close and recreate the map. Add BPF selftests along with it,
from Yonghong.
5) Few follow-up cleanups for the bpftool cgroup code: i) rename
the cgroup 'list' command into 'show' as we have it for other
subcommands as well, ii) then alias the 'show' command such that
'list' is accepted which is also common practice in iproute2,
and iii) remove couple of newlines from error messages using
p_err(), from Jakub.
6) Two follow-up cleanups to sockmap code: i) remove the unused
bpf_compute_data_end_sk_skb() function and ii) only build the
sockmap infrastructure when CONFIG_INET is enabled since it's
only aware of TCP sockets at this time, from John.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 69 |
1 files changed, 59 insertions, 10 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 2eb66c0d9cdb..d7925ef8743d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3906,9 +3906,33 @@ drop: return NET_RX_DROP; } +static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct netdev_rx_queue *rxqueue; + + rxqueue = dev->_rx; + + if (skb_rx_queue_recorded(skb)) { + u16 index = skb_get_rx_queue(skb); + + if (unlikely(index >= dev->real_num_rx_queues)) { + WARN_ONCE(dev->real_num_rx_queues > 1, + "%s received packet on queue %u, but number " + "of RX queues is %u\n", + dev->name, index, dev->real_num_rx_queues); + + return rxqueue; /* Return first rxqueue */ + } + rxqueue += index; + } + return rxqueue; +} + static u32 netif_receive_generic_xdp(struct sk_buff *skb, struct bpf_prog *xdp_prog) { + struct netdev_rx_queue *rxqueue; u32 metalen, act = XDP_DROP; struct xdp_buff xdp; void *orig_data; @@ -3952,6 +3976,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, xdp.data_hard_start = skb->data - skb_headroom(skb); orig_data = xdp.data; + rxqueue = netif_get_rxqueue(skb); + xdp.rxq = &rxqueue->xdp_rxq; + act = bpf_prog_run_xdp(xdp_prog, &xdp); off = xdp.data - orig_data; @@ -7589,12 +7616,12 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, } EXPORT_SYMBOL(netif_stacked_transfer_operstate); -#ifdef CONFIG_SYSFS static int netif_alloc_rx_queues(struct net_device *dev) { unsigned int i, count = dev->num_rx_queues; struct netdev_rx_queue *rx; size_t sz = count * sizeof(*rx); + int err = 0; BUG_ON(count < 1); @@ -7604,11 +7631,39 @@ static int netif_alloc_rx_queues(struct net_device *dev) dev->_rx = rx; - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { rx[i].dev = dev; + + /* XDP RX-queue setup */ + err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i); + if (err < 0) + goto err_rxq_info; + } return 0; + +err_rxq_info: + /* Rollback successful reg's and free other resources */ + while (i--) + xdp_rxq_info_unreg(&rx[i].xdp_rxq); + kfree(dev->_rx); + dev->_rx = NULL; + return err; +} + +static void netif_free_rx_queues(struct net_device *dev) +{ + unsigned int i, count = dev->num_rx_queues; + struct netdev_rx_queue *rx; + + /* netif_alloc_rx_queues alloc failed, resources have been unreg'ed */ + if (!dev->_rx) + return; + + rx = dev->_rx; + + for (i = 0; i < count; i++) + xdp_rxq_info_unreg(&rx[i].xdp_rxq); } -#endif static void netdev_init_one_queue(struct net_device *dev, struct netdev_queue *queue, void *_unused) @@ -8169,12 +8224,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, return NULL; } -#ifdef CONFIG_SYSFS if (rxqs < 1) { pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); return NULL; } -#endif alloc_size = sizeof(struct net_device); if (sizeof_priv) { @@ -8231,12 +8284,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (netif_alloc_netdev_queues(dev)) goto free_all; -#ifdef CONFIG_SYSFS dev->num_rx_queues = rxqs; dev->real_num_rx_queues = rxqs; if (netif_alloc_rx_queues(dev)) goto free_all; -#endif strcpy(dev->name, name); dev->name_assign_type = name_assign_type; @@ -8275,9 +8326,7 @@ void free_netdev(struct net_device *dev) might_sleep(); netif_free_tx_queues(dev); -#ifdef CONFIG_SYSFS - kvfree(dev->_rx); -#endif + netif_free_rx_queues(dev); kfree(rcu_dereference_protected(dev->ingress_queue, 1)); |