summaryrefslogtreecommitdiffstats
path: root/samples/bpf/xdp_sample.bpf.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-08-31 16:43:06 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-08-31 16:43:06 -0700
commit9e9fb7655ed585da8f468e29221f0ba194a5f613 (patch)
treed2c51887389b8297635a5b90d5766897f00fe928 /samples/bpf/xdp_sample.bpf.c
parent86ac54e79fe09b34c52691a780a6e31d12fa57f4 (diff)
parent29ce8f9701072fc221d9c38ad952de1a9578f95c (diff)
downloadlinux-9e9fb7655ed585da8f468e29221f0ba194a5f613.tar.bz2
Merge tag 'net-next-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core: - Enable memcg accounting for various networking objects. BPF: - Introduce bpf timers. - Add perf link and opaque bpf_cookie which the program can read out again, to be used in libbpf-based USDT library. - Add bpf_task_pt_regs() helper to access user space pt_regs in kprobes, to help user space stack unwinding. - Add support for UNIX sockets for BPF sockmap. - Extend BPF iterator support for UNIX domain sockets. - Allow BPF TCP congestion control progs and bpf iterators to call bpf_setsockopt(), e.g. to switch to another congestion control algorithm. Protocols: - Support IOAM Pre-allocated Trace with IPv6. - Support Management Component Transport Protocol. - bridge: multicast: add vlan support. - netfilter: add hooks for the SRv6 lightweight tunnel driver. - tcp: - enable mid-stream window clamping (by user space or BPF) - allow data-less, empty-cookie SYN with TFO_SERVER_COOKIE_NOT_REQD - more accurate DSACK processing for RACK-TLP - mptcp: - add full mesh path manager option - add partial support for MP_FAIL - improve use of backup subflows - optimize option processing - af_unix: add OOB notification support. - ipv6: add IFLA_INET6_RA_MTU to expose MTU value advertised by the router. - mac80211: Target Wake Time support in AP mode. - can: j1939: extend UAPI to notify about RX status. Driver APIs: - Add page frag support in page pool API. - Many improvements to the DSA (distributed switch) APIs. - ethtool: extend IRQ coalesce uAPI with timer reset modes. - devlink: control which auxiliary devices are created. - Support CAN PHYs via the generic PHY subsystem. - Proper cross-chip support for tag_8021q. - Allow TX forwarding for the software bridge data path to be offloaded to capable devices. Drivers: - veth: more flexible channels number configuration. - openvswitch: introduce per-cpu upcall dispatch. - Add internet mix (IMIX) mode to pktgen. - Transparently handle XDP operations in the bonding driver. - Add LiteETH network driver. - Renesas (ravb): - support Gigabit Ethernet IP - NXP Ethernet switch (sja1105): - fast aging support - support for "H" switch topologies - traffic termination for ports under VLAN-aware bridge - Intel 1G Ethernet - support getcrosststamp() with PCIe PTM (Precision Time Measurement) for better time sync - support Credit-Based Shaper (CBS) offload, enabling HW traffic prioritization and bandwidth reservation - Broadcom Ethernet (bnxt) - support pulse-per-second output - support larger Rx rings - Mellanox Ethernet (mlx5) - support ethtool RSS contexts and MQPRIO channel mode - support LAG offload with bridging - support devlink rate limit API - support packet sampling on tunnels - Huawei Ethernet (hns3): - basic devlink support - add extended IRQ coalescing support - report extended link state - Netronome Ethernet (nfp): - add conntrack offload support - Broadcom WiFi (brcmfmac): - add WPA3 Personal with FT to supported cipher suites - support 43752 SDIO device - Intel WiFi (iwlwifi): - support scanning hidden 6GHz networks - support for a new hardware family (Bz) - Xen pv driver: - harden netfront against malicious backends - Qualcomm mobile - ipa: refactor power management and enable automatic suspend - mhi: move MBIM to WWAN subsystem interfaces Refactor: - Ambient BPF run context and cgroup storage cleanup. - Compat rework for ndo_ioctl. Old code removal: - prism54 remove the obsoleted driver, deprecated by the p54 driver. - wan: remove sbni/granch driver" * tag 'net-next-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1715 commits) net: Add depends on OF_NET for LiteX's LiteETH ipv6: seg6: remove duplicated include net: hns3: remove unnecessary spaces net: hns3: add some required spaces net: hns3: clean up a type mismatch warning net: hns3: refine function hns3_set_default_feature() ipv6: remove duplicated 'net/lwtunnel.h' include net: w5100: check return value after calling platform_get_resource() net/mlxbf_gige: Make use of devm_platform_ioremap_resourcexxx() net: mdio: mscc-miim: Make use of the helper function devm_platform_ioremap_resource() net: mdio-ipq4019: Make use of devm_platform_ioremap_resource() fou: remove sparse errors ipv4: fix endianness issue in inet_rtm_getroute_build_skb() octeontx2-af: Set proper errorcode for IPv4 checksum errors octeontx2-af: Fix static code analyzer reported issues octeontx2-af: Fix mailbox errors in nix_rss_flowkey_cfg octeontx2-af: Fix loop in free and unmap counter af_unix: fix potential NULL deref in unix_dgram_connect() dpaa2-eth: Replace strlcpy with strscpy octeontx2-af: Use NDC TX for transmit packet data ...
Diffstat (limited to 'samples/bpf/xdp_sample.bpf.c')
-rw-r--r--samples/bpf/xdp_sample.bpf.c266
1 files changed, 266 insertions, 0 deletions
diff --git a/samples/bpf/xdp_sample.bpf.c b/samples/bpf/xdp_sample.bpf.c
new file mode 100644
index 000000000000..0eb7e1dcae22
--- /dev/null
+++ b/samples/bpf/xdp_sample.bpf.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0
+/* GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */
+#include "xdp_sample.bpf.h"
+
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+array_map rx_cnt SEC(".maps");
+array_map redir_err_cnt SEC(".maps");
+array_map cpumap_enqueue_cnt SEC(".maps");
+array_map cpumap_kthread_cnt SEC(".maps");
+array_map exception_cnt SEC(".maps");
+array_map devmap_xmit_cnt SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 32 * 32);
+ __type(key, u64);
+ __type(value, struct datarec);
+} devmap_xmit_cnt_multi SEC(".maps");
+
+const volatile int nr_cpus = 0;
+
+/* These can be set before loading so that redundant comparisons can be DCE'd by
+ * the verifier, and only actual matches are tried after loading tp_btf program.
+ * This allows sample to filter tracepoint stats based on net_device.
+ */
+const volatile int from_match[32] = {};
+const volatile int to_match[32] = {};
+
+int cpumap_map_id = 0;
+
+/* Find if b is part of set a, but if a is empty set then evaluate to true */
+#define IN_SET(a, b) \
+ ({ \
+ bool __res = !(a)[0]; \
+ for (int i = 0; i < ARRAY_SIZE(a) && (a)[i]; i++) { \
+ __res = (a)[i] == (b); \
+ if (__res) \
+ break; \
+ } \
+ __res; \
+ })
+
+static __always_inline __u32 xdp_get_err_key(int err)
+{
+ switch (err) {
+ case 0:
+ return 0;
+ case -EINVAL:
+ return 2;
+ case -ENETDOWN:
+ return 3;
+ case -EMSGSIZE:
+ return 4;
+ case -EOPNOTSUPP:
+ return 5;
+ case -ENOSPC:
+ return 6;
+ default:
+ return 1;
+ }
+}
+
+static __always_inline int xdp_redirect_collect_stat(int from, int err)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ u32 key = XDP_REDIRECT_ERROR;
+ struct datarec *rec;
+ u32 idx;
+
+ if (!IN_SET(from_match, from))
+ return 0;
+
+ key = xdp_get_err_key(err);
+
+ idx = key * nr_cpus + cpu;
+ rec = bpf_map_lookup_elem(&redir_err_cnt, &idx);
+ if (!rec)
+ return 0;
+ if (key)
+ NO_TEAR_INC(rec->dropped);
+ else
+ NO_TEAR_INC(rec->processed);
+ return 0; /* Indicate event was filtered (no further processing)*/
+ /*
+ * Returning 1 here would allow e.g. a perf-record tracepoint
+ * to see and record these events, but it doesn't work well
+ * in-practice as stopping perf-record also unload this
+ * bpf_prog. Plus, there is additional overhead of doing so.
+ */
+}
+
+SEC("tp_btf/xdp_redirect_err")
+int BPF_PROG(tp_xdp_redirect_err, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_redirect_map_err")
+int BPF_PROG(tp_xdp_redirect_map_err, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_redirect")
+int BPF_PROG(tp_xdp_redirect, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_redirect_map")
+int BPF_PROG(tp_xdp_redirect_map, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_cpumap_enqueue")
+int BPF_PROG(tp_xdp_cpumap_enqueue, int map_id, unsigned int processed,
+ unsigned int drops, int to_cpu)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ struct datarec *rec;
+ u32 idx;
+
+ if (cpumap_map_id && cpumap_map_id != map_id)
+ return 0;
+
+ idx = to_cpu * nr_cpus + cpu;
+ rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &idx);
+ if (!rec)
+ return 0;
+ NO_TEAR_ADD(rec->processed, processed);
+ NO_TEAR_ADD(rec->dropped, drops);
+ /* Record bulk events, then userspace can calc average bulk size */
+ if (processed > 0)
+ NO_TEAR_INC(rec->issue);
+ /* Inception: It's possible to detect overload situations, via
+ * this tracepoint. This can be used for creating a feedback
+ * loop to XDP, which can take appropriate actions to mitigate
+ * this overload situation.
+ */
+ return 0;
+}
+
+SEC("tp_btf/xdp_cpumap_kthread")
+int BPF_PROG(tp_xdp_cpumap_kthread, int map_id, unsigned int processed,
+ unsigned int drops, int sched, struct xdp_cpumap_stats *xdp_stats)
+{
+ struct datarec *rec;
+ u32 cpu;
+
+ if (cpumap_map_id && cpumap_map_id != map_id)
+ return 0;
+
+ cpu = bpf_get_smp_processor_id();
+ rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &cpu);
+ if (!rec)
+ return 0;
+ NO_TEAR_ADD(rec->processed, processed);
+ NO_TEAR_ADD(rec->dropped, drops);
+ NO_TEAR_ADD(rec->xdp_pass, xdp_stats->pass);
+ NO_TEAR_ADD(rec->xdp_drop, xdp_stats->drop);
+ NO_TEAR_ADD(rec->xdp_redirect, xdp_stats->redirect);
+ /* Count times kthread yielded CPU via schedule call */
+ if (sched)
+ NO_TEAR_INC(rec->issue);
+ return 0;
+}
+
+SEC("tp_btf/xdp_exception")
+int BPF_PROG(tp_xdp_exception, const struct net_device *dev,
+ const struct bpf_prog *xdp, u32 act)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ struct datarec *rec;
+ u32 key = act, idx;
+
+ if (!IN_SET(from_match, dev->ifindex))
+ return 0;
+ if (!IN_SET(to_match, dev->ifindex))
+ return 0;
+
+ if (key > XDP_REDIRECT)
+ key = XDP_REDIRECT + 1;
+
+ idx = key * nr_cpus + cpu;
+ rec = bpf_map_lookup_elem(&exception_cnt, &idx);
+ if (!rec)
+ return 0;
+ NO_TEAR_INC(rec->dropped);
+
+ return 0;
+}
+
+SEC("tp_btf/xdp_devmap_xmit")
+int BPF_PROG(tp_xdp_devmap_xmit, const struct net_device *from_dev,
+ const struct net_device *to_dev, int sent, int drops, int err)
+{
+ struct datarec *rec;
+ int idx_in, idx_out;
+ u32 cpu;
+
+ idx_in = from_dev->ifindex;
+ idx_out = to_dev->ifindex;
+
+ if (!IN_SET(from_match, idx_in))
+ return 0;
+ if (!IN_SET(to_match, idx_out))
+ return 0;
+
+ cpu = bpf_get_smp_processor_id();
+ rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &cpu);
+ if (!rec)
+ return 0;
+ NO_TEAR_ADD(rec->processed, sent);
+ NO_TEAR_ADD(rec->dropped, drops);
+ /* Record bulk events, then userspace can calc average bulk size */
+ NO_TEAR_INC(rec->info);
+ /* Record error cases, where no frame were sent */
+ /* Catch API error of drv ndo_xdp_xmit sent more than count */
+ if (err || drops < 0)
+ NO_TEAR_INC(rec->issue);
+ return 0;
+}
+
+SEC("tp_btf/xdp_devmap_xmit")
+int BPF_PROG(tp_xdp_devmap_xmit_multi, const struct net_device *from_dev,
+ const struct net_device *to_dev, int sent, int drops, int err)
+{
+ struct datarec empty = {};
+ struct datarec *rec;
+ int idx_in, idx_out;
+ u64 idx;
+
+ idx_in = from_dev->ifindex;
+ idx_out = to_dev->ifindex;
+ idx = idx_in;
+ idx = idx << 32 | idx_out;
+
+ if (!IN_SET(from_match, idx_in))
+ return 0;
+ if (!IN_SET(to_match, idx_out))
+ return 0;
+
+ bpf_map_update_elem(&devmap_xmit_cnt_multi, &idx, &empty, BPF_NOEXIST);
+ rec = bpf_map_lookup_elem(&devmap_xmit_cnt_multi, &idx);
+ if (!rec)
+ return 0;
+
+ NO_TEAR_ADD(rec->processed, sent);
+ NO_TEAR_ADD(rec->dropped, drops);
+ NO_TEAR_INC(rec->info);
+ if (err || drops < 0)
+ NO_TEAR_INC(rec->issue);
+ return 0;
+}