diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-08-31 16:43:06 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-08-31 16:43:06 -0700 |
commit | 9e9fb7655ed585da8f468e29221f0ba194a5f613 (patch) | |
tree | d2c51887389b8297635a5b90d5766897f00fe928 /net/bpf/test_run.c | |
parent | 86ac54e79fe09b34c52691a780a6e31d12fa57f4 (diff) | |
parent | 29ce8f9701072fc221d9c38ad952de1a9578f95c (diff) | |
download | linux-9e9fb7655ed585da8f468e29221f0ba194a5f613.tar.bz2 |
Merge tag 'net-next-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski:
"Core:
- Enable memcg accounting for various networking objects.
BPF:
- Introduce bpf timers.
- Add perf link and opaque bpf_cookie which the program can read out
again, to be used in libbpf-based USDT library.
- Add bpf_task_pt_regs() helper to access user space pt_regs in
kprobes, to help user space stack unwinding.
- Add support for UNIX sockets for BPF sockmap.
- Extend BPF iterator support for UNIX domain sockets.
- Allow BPF TCP congestion control progs and bpf iterators to call
bpf_setsockopt(), e.g. to switch to another congestion control
algorithm.
Protocols:
- Support IOAM Pre-allocated Trace with IPv6.
- Support Management Component Transport Protocol.
- bridge: multicast: add vlan support.
- netfilter: add hooks for the SRv6 lightweight tunnel driver.
- tcp:
- enable mid-stream window clamping (by user space or BPF)
- allow data-less, empty-cookie SYN with TFO_SERVER_COOKIE_NOT_REQD
- more accurate DSACK processing for RACK-TLP
- mptcp:
- add full mesh path manager option
- add partial support for MP_FAIL
- improve use of backup subflows
- optimize option processing
- af_unix: add OOB notification support.
- ipv6: add IFLA_INET6_RA_MTU to expose MTU value advertised by the
router.
- mac80211: Target Wake Time support in AP mode.
- can: j1939: extend UAPI to notify about RX status.
Driver APIs:
- Add page frag support in page pool API.
- Many improvements to the DSA (distributed switch) APIs.
- ethtool: extend IRQ coalesce uAPI with timer reset modes.
- devlink: control which auxiliary devices are created.
- Support CAN PHYs via the generic PHY subsystem.
- Proper cross-chip support for tag_8021q.
- Allow TX forwarding for the software bridge data path to be
offloaded to capable devices.
Drivers:
- veth: more flexible channels number configuration.
- openvswitch: introduce per-cpu upcall dispatch.
- Add internet mix (IMIX) mode to pktgen.
- Transparently handle XDP operations in the bonding driver.
- Add LiteETH network driver.
- Renesas (ravb):
- support Gigabit Ethernet IP
- NXP Ethernet switch (sja1105):
- fast aging support
- support for "H" switch topologies
- traffic termination for ports under VLAN-aware bridge
- Intel 1G Ethernet
- support getcrosststamp() with PCIe PTM (Precision Time
Measurement) for better time sync
- support Credit-Based Shaper (CBS) offload, enabling HW traffic
prioritization and bandwidth reservation
- Broadcom Ethernet (bnxt)
- support pulse-per-second output
- support larger Rx rings
- Mellanox Ethernet (mlx5)
- support ethtool RSS contexts and MQPRIO channel mode
- support LAG offload with bridging
- support devlink rate limit API
- support packet sampling on tunnels
- Huawei Ethernet (hns3):
- basic devlink support
- add extended IRQ coalescing support
- report extended link state
- Netronome Ethernet (nfp):
- add conntrack offload support
- Broadcom WiFi (brcmfmac):
- add WPA3 Personal with FT to supported cipher suites
- support 43752 SDIO device
- Intel WiFi (iwlwifi):
- support scanning hidden 6GHz networks
- support for a new hardware family (Bz)
- Xen pv driver:
- harden netfront against malicious backends
- Qualcomm mobile
- ipa: refactor power management and enable automatic suspend
- mhi: move MBIM to WWAN subsystem interfaces
Refactor:
- Ambient BPF run context and cgroup storage cleanup.
- Compat rework for ndo_ioctl.
Old code removal:
- prism54 remove the obsoleted driver, deprecated by the p54 driver.
- wan: remove sbni/granch driver"
* tag 'net-next-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1715 commits)
net: Add depends on OF_NET for LiteX's LiteETH
ipv6: seg6: remove duplicated include
net: hns3: remove unnecessary spaces
net: hns3: add some required spaces
net: hns3: clean up a type mismatch warning
net: hns3: refine function hns3_set_default_feature()
ipv6: remove duplicated 'net/lwtunnel.h' include
net: w5100: check return value after calling platform_get_resource()
net/mlxbf_gige: Make use of devm_platform_ioremap_resourcexxx()
net: mdio: mscc-miim: Make use of the helper function devm_platform_ioremap_resource()
net: mdio-ipq4019: Make use of devm_platform_ioremap_resource()
fou: remove sparse errors
ipv4: fix endianness issue in inet_rtm_getroute_build_skb()
octeontx2-af: Set proper errorcode for IPv4 checksum errors
octeontx2-af: Fix static code analyzer reported issues
octeontx2-af: Fix mailbox errors in nix_rss_flowkey_cfg
octeontx2-af: Fix loop in free and unmap counter
af_unix: fix potential NULL deref in unix_dgram_connect()
dpaa2-eth: Replace strlcpy with strscpy
octeontx2-af: Use NDC TX for transmit packet data
...
Diffstat (limited to 'net/bpf/test_run.c')
-rw-r--r-- | net/bpf/test_run.c | 139 |
1 files changed, 116 insertions, 23 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index caa16bf30fb5..2eb0e55ef54d 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -16,6 +16,7 @@ #include <linux/error-injection.h> #include <linux/smp.h> #include <linux/sock_diag.h> +#include <net/xdp.h> #define CREATE_TRACE_POINTS #include <trace/events/bpf_test_run.h> @@ -88,17 +89,19 @@ reset: static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *retval, u32 *time, bool xdp) { - struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL }; + struct bpf_prog_array_item item = {.prog = prog}; + struct bpf_run_ctx *old_ctx; + struct bpf_cg_run_ctx run_ctx; struct bpf_test_timer t = { NO_MIGRATE }; enum bpf_cgroup_storage_type stype; int ret; for_each_cgroup_storage_type(stype) { - storage[stype] = bpf_cgroup_storage_alloc(prog, stype); - if (IS_ERR(storage[stype])) { - storage[stype] = NULL; + item.cgroup_storage[stype] = bpf_cgroup_storage_alloc(prog, stype); + if (IS_ERR(item.cgroup_storage[stype])) { + item.cgroup_storage[stype] = NULL; for_each_cgroup_storage_type(stype) - bpf_cgroup_storage_free(storage[stype]); + bpf_cgroup_storage_free(item.cgroup_storage[stype]); return -ENOMEM; } } @@ -107,22 +110,19 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, repeat = 1; bpf_test_timer_enter(&t); + old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); do { - ret = bpf_cgroup_storage_set(storage); - if (ret) - break; - + run_ctx.prog_item = &item; if (xdp) *retval = bpf_prog_run_xdp(prog, ctx); else - *retval = BPF_PROG_RUN(prog, ctx); - - bpf_cgroup_storage_unset(); + *retval = bpf_prog_run(prog, ctx); } while (bpf_test_timer_continue(&t, repeat, &ret, time)); + bpf_reset_run_ctx(old_ctx); bpf_test_timer_leave(&t); for_each_cgroup_storage_type(stype) - bpf_cgroup_storage_free(storage[stype]); + bpf_cgroup_storage_free(item.cgroup_storage[stype]); return ret; } @@ -327,7 +327,7 @@ __bpf_prog_test_run_raw_tp(void *data) struct bpf_raw_tp_test_run_info *info = data; rcu_read_lock(); - info->retval = BPF_PROG_RUN(info->prog, info->ctx); + info->retval = bpf_prog_run(info->prog, info->ctx); rcu_read_unlock(); } @@ -688,6 +688,64 @@ out: return ret; } +static int xdp_convert_md_to_buff(struct xdp_md *xdp_md, struct xdp_buff *xdp) +{ + unsigned int ingress_ifindex, rx_queue_index; + struct netdev_rx_queue *rxqueue; + struct net_device *device; + + if (!xdp_md) + return 0; + + if (xdp_md->egress_ifindex != 0) + return -EINVAL; + + ingress_ifindex = xdp_md->ingress_ifindex; + rx_queue_index = xdp_md->rx_queue_index; + + if (!ingress_ifindex && rx_queue_index) + return -EINVAL; + + if (ingress_ifindex) { + device = dev_get_by_index(current->nsproxy->net_ns, + ingress_ifindex); + if (!device) + return -ENODEV; + + if (rx_queue_index >= device->real_num_rx_queues) + goto free_dev; + + rxqueue = __netif_get_rx_queue(device, rx_queue_index); + + if (!xdp_rxq_info_is_reg(&rxqueue->xdp_rxq)) + goto free_dev; + + xdp->rxq = &rxqueue->xdp_rxq; + /* The device is now tracked in the xdp->rxq for later + * dev_put() + */ + } + + xdp->data = xdp->data_meta + xdp_md->data; + return 0; + +free_dev: + dev_put(device); + return -EINVAL; +} + +static void xdp_convert_buff_to_md(struct xdp_buff *xdp, struct xdp_md *xdp_md) +{ + if (!xdp_md) + return; + + xdp_md->data = xdp->data - xdp->data_meta; + xdp_md->data_end = xdp->data_end - xdp->data_meta; + + if (xdp_md->ingress_ifindex) + dev_put(xdp->rxq->dev); +} + int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { @@ -698,38 +756,73 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, struct netdev_rx_queue *rxqueue; struct xdp_buff xdp = {}; u32 retval, duration; + struct xdp_md *ctx; u32 max_data_sz; void *data; - int ret; + int ret = -EINVAL; if (prog->expected_attach_type == BPF_XDP_DEVMAP || prog->expected_attach_type == BPF_XDP_CPUMAP) return -EINVAL; - if (kattr->test.ctx_in || kattr->test.ctx_out) - return -EINVAL; + + ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md)); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + if (ctx) { + /* There can't be user provided data before the meta data */ + if (ctx->data_meta || ctx->data_end != size || + ctx->data > ctx->data_end || + unlikely(xdp_metalen_invalid(ctx->data))) + goto free_ctx; + /* Meta data is allocated from the headroom */ + headroom -= ctx->data; + } /* XDP have extra tailroom as (most) drivers use full page */ max_data_sz = 4096 - headroom - tailroom; data = bpf_test_init(kattr, max_data_sz, headroom, tailroom); - if (IS_ERR(data)) - return PTR_ERR(data); + if (IS_ERR(data)) { + ret = PTR_ERR(data); + goto free_ctx; + } rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0); xdp_init_buff(&xdp, headroom + max_data_sz + tailroom, &rxqueue->xdp_rxq); xdp_prepare_buff(&xdp, data, headroom, size, true); + ret = xdp_convert_md_to_buff(ctx, &xdp); + if (ret) + goto free_data; + bpf_prog_change_xdp(NULL, prog); ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true); + /* We convert the xdp_buff back to an xdp_md before checking the return + * code so the reference count of any held netdevice will be decremented + * even if the test run failed. + */ + xdp_convert_buff_to_md(&xdp, ctx); if (ret) goto out; - if (xdp.data != data + headroom || xdp.data_end != xdp.data + size) - size = xdp.data_end - xdp.data; - ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration); + + if (xdp.data_meta != data + headroom || + xdp.data_end != xdp.data_meta + size) + size = xdp.data_end - xdp.data_meta; + + ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval, + duration); + if (!ret) + ret = bpf_ctx_finish(kattr, uattr, ctx, + sizeof(struct xdp_md)); + out: bpf_prog_change_xdp(prog, NULL); +free_data: kfree(data); +free_ctx: + kfree(ctx); return ret; } @@ -896,7 +989,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat bpf_test_timer_enter(&t); do { ctx.selected_sk = NULL; - retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN); + retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, bpf_prog_run); } while (bpf_test_timer_continue(&t, repeat, &ret, &duration)); bpf_test_timer_leave(&t); |