summaryrefslogtreecommitdiffstats
path: root/net/bpf/test_run.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-04-29 11:57:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-04-29 11:57:23 -0700
commit9d31d2338950293ec19d9b095fbaa9030899dcb4 (patch)
treee688040d0557c24a2eeb9f6c9c223d949f6f7ef9 /net/bpf/test_run.c
parent635de956a7f5a6ffcb04f29d70630c64c717b56b (diff)
parent4a52dd8fefb45626dace70a63c0738dbd83b7edb (diff)
downloadlinux-9d31d2338950293ec19d9b095fbaa9030899dcb4.tar.bz2
Merge tag 'net-next-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core: - bpf: - allow bpf programs calling kernel functions (initially to reuse TCP congestion control implementations) - enable task local storage for tracing programs - remove the need to store per-task state in hash maps, and allow tracing programs access to task local storage previously added for BPF_LSM - add bpf_for_each_map_elem() helper, allowing programs to walk all map elements in a more robust and easier to verify fashion - sockmap: support UDP and cross-protocol BPF_SK_SKB_VERDICT redirection - lpm: add support for batched ops in LPM trie - add BTF_KIND_FLOAT support - mostly to allow use of BTF on s390 which has floats in its headers files - improve BPF syscall documentation and extend the use of kdoc parsing scripts we already employ for bpf-helpers - libbpf, bpftool: support static linking of BPF ELF files - improve support for encapsulation of L2 packets - xdp: restructure redirect actions to avoid a runtime lookup, improving performance by 4-8% in microbenchmarks - xsk: build skb by page (aka generic zerocopy xmit) - improve performance of software AF_XDP path by 33% for devices which don't need headers in the linear skb part (e.g. virtio) - nexthop: resilient next-hop groups - improve path stability on next-hops group changes (incl. offload for mlxsw) - ipv6: segment routing: add support for IPv4 decapsulation - icmp: add support for RFC 8335 extended PROBE messages - inet: use bigger hash table for IP ID generation - tcp: deal better with delayed TX completions - make sure we don't give up on fast TCP retransmissions only because driver is slow in reporting that it completed transmitting the original - tcp: reorder tcp_congestion_ops for better cache locality - mptcp: - add sockopt support for common TCP options - add support for common TCP msg flags - include multiple address ids in RM_ADDR - add reset option support for resetting one subflow - udp: GRO L4 improvements - improve 'forward' / 'frag_list' co-existence with UDP tunnel GRO, allowing the first to take place correctly even for encapsulated UDP traffic - micro-optimize dev_gro_receive() and flow dissection, avoid retpoline overhead on VLAN and TEB GRO - use less memory for sysctls, add a new sysctl type, to allow using u8 instead of "int" and "long" and shrink networking sysctls - veth: allow GRO without XDP - this allows aggregating UDP packets before handing them off to routing, bridge, OvS, etc. - allow specifing ifindex when device is moved to another namespace - netfilter: - nft_socket: add support for cgroupsv2 - nftables: add catch-all set element - special element used to define a default action in case normal lookup missed - use net_generic infra in many modules to avoid allocating per-ns memory unnecessarily - xps: improve the xps handling to avoid potential out-of-bound accesses and use-after-free when XPS change race with other re-configuration under traffic - add a config knob to turn off per-cpu netdev refcnt to catch underflows in testing Device APIs: - add WWAN subsystem to organize the WWAN interfaces better and hopefully start driving towards more unified and vendor- independent APIs - ethtool: - add interface for reading IEEE MIB stats (incl. mlx5 and bnxt support) - allow network drivers to dump arbitrary SFP EEPROM data, current offset+length API was a poor fit for modern SFP which define EEPROM in terms of pages (incl. mlx5 support) - act_police, flow_offload: add support for packet-per-second policing (incl. offload for nfp) - psample: add additional metadata attributes like transit delay for packets sampled from switch HW (and corresponding egress and policy-based sampling in the mlxsw driver) - dsa: improve support for sandwiched LAGs with bridge and DSA - netfilter: - flowtable: use direct xmit in topologies with IP forwarding, bridging, vlans etc. - nftables: counter hardware offload support - Bluetooth: - improvements for firmware download w/ Intel devices - add support for reading AOSP vendor capabilities - add support for virtio transport driver - mac80211: - allow concurrent monitor iface and ethernet rx decap - set priority and queue mapping for injected frames - phy: add support for Clause-45 PHY Loopback - pci/iov: add sysfs MSI-X vector assignment interface to distribute MSI-X resources to VFs (incl. mlx5 support) New hardware/drivers: - dsa: mv88e6xxx: add support for Marvell mv88e6393x - 11-port Ethernet switch with 8x 1-Gigabit Ethernet and 3x 10-Gigabit interfaces. - dsa: support for legacy Broadcom tags used on BCM5325, BCM5365 and BCM63xx switches - Microchip KSZ8863 and KSZ8873; 3x 10/100Mbps Ethernet switches - ath11k: support for QCN9074 a 802.11ax device - Bluetooth: Broadcom BCM4330 and BMC4334 - phy: Marvell 88X2222 transceiver support - mdio: add BCM6368 MDIO mux bus controller - r8152: support RTL8153 and RTL8156 (USB Ethernet) chips - mana: driver for Microsoft Azure Network Adapter (MANA) - Actions Semi Owl Ethernet MAC - can: driver for ETAS ES58X CAN/USB interfaces Pure driver changes: - add XDP support to: enetc, igc, stmmac - add AF_XDP support to: stmmac - virtio: - page_to_skb() use build_skb when there's sufficient tailroom (21% improvement for 1000B UDP frames) - support XDP even without dedicated Tx queues - share the Tx queues with the stack when necessary - mlx5: - flow rules: add support for mirroring with conntrack, matching on ICMP, GTP, flex filters and more - support packet sampling with flow offloads - persist uplink representor netdev across eswitch mode changes - allow coexistence of CQE compression and HW time-stamping - add ethtool extended link error state reporting - ice, iavf: support flow filters, UDP Segmentation Offload - dpaa2-switch: - move the driver out of staging - add spanning tree (STP) support - add rx copybreak support - add tc flower hardware offload on ingress traffic - ionic: - implement Rx page reuse - support HW PTP time-stamping - octeon: support TC hardware offloads - flower matching on ingress and egress ratelimitting. - stmmac: - add RX frame steering based on VLAN priority in tc flower - support frame preemption (FPE) - intel: add cross time-stamping freq difference adjustment - ocelot: - support forwarding of MRP frames in HW - support multiple bridges - support PTP Sync one-step timestamping - dsa: mv88e6xxx, dpaa2-switch: offload bridge port flags like learning, flooding etc. - ipa: add IPA v4.5, v4.9 and v4.11 support (Qualcomm SDX55, SM8350, SC7280 SoCs) - mt7601u: enable TDLS support - mt76: - add support for 802.3 rx frames (mt7915/mt7615) - mt7915 flash pre-calibration support - mt7921/mt7663 runtime power management fixes" * tag 'net-next-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2451 commits) net: selftest: fix build issue if INET is disabled net: netrom: nr_in: Remove redundant assignment to ns net: tun: Remove redundant assignment to ret net: phy: marvell: add downshift support for M88E1240 net: dsa: ksz: Make reg_mib_cnt a u8 as it never exceeds 255 net/sched: act_ct: Remove redundant ct get and check icmp: standardize naming of RFC 8335 PROBE constants bpf, selftests: Update array map tests for per-cpu batched ops bpf: Add batched ops support for percpu array bpf: Implement formatted output helpers with bstr_printf seq_file: Add a seq_bprintf function sfc: adjust efx->xdp_tx_queue_count with the real number of initialized queues net:nfc:digital: Fix a double free in digital_tg_recv_dep_req net: fix a concurrency bug in l2tp_tunnel_register() net/smc: Remove redundant assignment to rc mpls: Remove redundant assignment to err llc2: Remove redundant assignment to rc net/tls: Remove redundant initialization of record rds: Remove redundant assignment to nr_sig dt-bindings: net: mdio-gpio: add compatible for microchip,mdio-smi0 ...
Diffstat (limited to 'net/bpf/test_run.c')
-rw-r--r--net/bpf/test_run.c276
1 files changed, 214 insertions, 62 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 58bcb8c849d5..a5d72c48fb66 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2017 Facebook
*/
#include <linux/bpf.h>
+#include <linux/btf_ids.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/etherdevice.h>
@@ -10,20 +11,86 @@
#include <net/bpf_sk_storage.h>
#include <net/sock.h>
#include <net/tcp.h>
+#include <net/net_namespace.h>
#include <linux/error-injection.h>
#include <linux/smp.h>
+#include <linux/sock_diag.h>
#define CREATE_TRACE_POINTS
#include <trace/events/bpf_test_run.h>
+struct bpf_test_timer {
+ enum { NO_PREEMPT, NO_MIGRATE } mode;
+ u32 i;
+ u64 time_start, time_spent;
+};
+
+static void bpf_test_timer_enter(struct bpf_test_timer *t)
+ __acquires(rcu)
+{
+ rcu_read_lock();
+ if (t->mode == NO_PREEMPT)
+ preempt_disable();
+ else
+ migrate_disable();
+
+ t->time_start = ktime_get_ns();
+}
+
+static void bpf_test_timer_leave(struct bpf_test_timer *t)
+ __releases(rcu)
+{
+ t->time_start = 0;
+
+ if (t->mode == NO_PREEMPT)
+ preempt_enable();
+ else
+ migrate_enable();
+ rcu_read_unlock();
+}
+
+static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int *err, u32 *duration)
+ __must_hold(rcu)
+{
+ t->i++;
+ if (t->i >= repeat) {
+ /* We're done. */
+ t->time_spent += ktime_get_ns() - t->time_start;
+ do_div(t->time_spent, t->i);
+ *duration = t->time_spent > U32_MAX ? U32_MAX : (u32)t->time_spent;
+ *err = 0;
+ goto reset;
+ }
+
+ if (signal_pending(current)) {
+ /* During iteration: we've been cancelled, abort. */
+ *err = -EINTR;
+ goto reset;
+ }
+
+ if (need_resched()) {
+ /* During iteration: we need to reschedule between runs. */
+ t->time_spent += ktime_get_ns() - t->time_start;
+ bpf_test_timer_leave(t);
+ cond_resched();
+ bpf_test_timer_enter(t);
+ }
+
+ /* Do another round. */
+ return true;
+
+reset:
+ t->i = 0;
+ return false;
+}
+
static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
u32 *retval, u32 *time, bool xdp)
{
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
+ struct bpf_test_timer t = { NO_MIGRATE };
enum bpf_cgroup_storage_type stype;
- u64 time_start, time_spent = 0;
- int ret = 0;
- u32 i;
+ int ret;
for_each_cgroup_storage_type(stype) {
storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
@@ -38,40 +105,20 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
if (!repeat)
repeat = 1;
- rcu_read_lock();
- migrate_disable();
- time_start = ktime_get_ns();
- for (i = 0; i < repeat; i++) {
- bpf_cgroup_storage_set(storage);
+ bpf_test_timer_enter(&t);
+ do {
+ ret = bpf_cgroup_storage_set(storage);
+ if (ret)
+ break;
if (xdp)
*retval = bpf_prog_run_xdp(prog, ctx);
else
*retval = BPF_PROG_RUN(prog, ctx);
- if (signal_pending(current)) {
- ret = -EINTR;
- break;
- }
-
- if (need_resched()) {
- time_spent += ktime_get_ns() - time_start;
- migrate_enable();
- rcu_read_unlock();
-
- cond_resched();
-
- rcu_read_lock();
- migrate_disable();
- time_start = ktime_get_ns();
- }
- }
- time_spent += ktime_get_ns() - time_start;
- migrate_enable();
- rcu_read_unlock();
-
- do_div(time_spent, repeat);
- *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
+ bpf_cgroup_storage_unset();
+ } while (bpf_test_timer_continue(&t, repeat, &ret, time));
+ bpf_test_timer_leave(&t);
for_each_cgroup_storage_type(stype)
bpf_cgroup_storage_free(storage[stype]);
@@ -167,10 +214,37 @@ int noinline bpf_modify_return_test(int a, int *b)
*b += 1;
return a + *b;
}
+
+u64 noinline bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
+{
+ return a + b + c + d;
+}
+
+int noinline bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
+{
+ return a + b;
+}
+
+struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
+{
+ return sk;
+}
+
__diag_pop();
ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
+BTF_SET_START(test_sk_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test1)
+BTF_ID(func, bpf_kfunc_call_test2)
+BTF_ID(func, bpf_kfunc_call_test3)
+BTF_SET_END(test_sk_kfunc_ids)
+
+bool bpf_prog_test_check_kfunc_call(u32 kfunc_id)
+{
+ return btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id);
+}
+
static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
u32 headroom, u32 tailroom)
{
@@ -674,18 +748,17 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
+ struct bpf_test_timer t = { NO_PREEMPT };
u32 size = kattr->test.data_size_in;
struct bpf_flow_dissector ctx = {};
u32 repeat = kattr->test.repeat;
struct bpf_flow_keys *user_ctx;
struct bpf_flow_keys flow_keys;
- u64 time_start, time_spent = 0;
const struct ethhdr *eth;
unsigned int flags = 0;
u32 retval, duration;
void *data;
int ret;
- u32 i;
if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
return -EINVAL;
@@ -721,48 +794,127 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
ctx.data = data;
ctx.data_end = (__u8 *)data + size;
- rcu_read_lock();
- preempt_disable();
- time_start = ktime_get_ns();
- for (i = 0; i < repeat; i++) {
+ bpf_test_timer_enter(&t);
+ do {
retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
size, flags);
+ } while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
+ bpf_test_timer_leave(&t);
- if (signal_pending(current)) {
- preempt_enable();
- rcu_read_unlock();
+ if (ret < 0)
+ goto out;
- ret = -EINTR;
- goto out;
- }
+ ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
+ retval, duration);
+ if (!ret)
+ ret = bpf_ctx_finish(kattr, uattr, user_ctx,
+ sizeof(struct bpf_flow_keys));
- if (need_resched()) {
- time_spent += ktime_get_ns() - time_start;
- preempt_enable();
- rcu_read_unlock();
+out:
+ kfree(user_ctx);
+ kfree(data);
+ return ret;
+}
- cond_resched();
+int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ struct bpf_test_timer t = { NO_PREEMPT };
+ struct bpf_prog_array *progs = NULL;
+ struct bpf_sk_lookup_kern ctx = {};
+ u32 repeat = kattr->test.repeat;
+ struct bpf_sk_lookup *user_ctx;
+ u32 retval, duration;
+ int ret = -EINVAL;
- rcu_read_lock();
- preempt_disable();
- time_start = ktime_get_ns();
- }
+ if (prog->type != BPF_PROG_TYPE_SK_LOOKUP)
+ return -EINVAL;
+
+ if (kattr->test.flags || kattr->test.cpu)
+ return -EINVAL;
+
+ if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out ||
+ kattr->test.data_size_out)
+ return -EINVAL;
+
+ if (!repeat)
+ repeat = 1;
+
+ user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx));
+ if (IS_ERR(user_ctx))
+ return PTR_ERR(user_ctx);
+
+ if (!user_ctx)
+ return -EINVAL;
+
+ if (user_ctx->sk)
+ goto out;
+
+ if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))
+ goto out;
+
+ if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {
+ ret = -ERANGE;
+ goto out;
}
- time_spent += ktime_get_ns() - time_start;
- preempt_enable();
- rcu_read_unlock();
- do_div(time_spent, repeat);
- duration = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
+ ctx.family = (u16)user_ctx->family;
+ ctx.protocol = (u16)user_ctx->protocol;
+ ctx.dport = (u16)user_ctx->local_port;
+ ctx.sport = (__force __be16)user_ctx->remote_port;
- ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
- retval, duration);
+ switch (ctx.family) {
+ case AF_INET:
+ ctx.v4.daddr = (__force __be32)user_ctx->local_ip4;
+ ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4;
+ break;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6;
+ ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6;
+ break;
+#endif
+
+ default:
+ ret = -EAFNOSUPPORT;
+ goto out;
+ }
+
+ progs = bpf_prog_array_alloc(1, GFP_KERNEL);
+ if (!progs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ progs->items[0].prog = prog;
+
+ bpf_test_timer_enter(&t);
+ do {
+ ctx.selected_sk = NULL;
+ retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN);
+ } while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
+ bpf_test_timer_leave(&t);
+
+ if (ret < 0)
+ goto out;
+
+ user_ctx->cookie = 0;
+ if (ctx.selected_sk) {
+ if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
+ }
+
+ ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);
if (!ret)
- ret = bpf_ctx_finish(kattr, uattr, user_ctx,
- sizeof(struct bpf_flow_keys));
+ ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
out:
+ bpf_prog_array_free(progs);
kfree(user_ctx);
- kfree(data);
return ret;
}