diff options
Diffstat (limited to 'samples')
-rw-r--r-- | samples/bpf/Makefile | 3 | ||||
-rwxr-xr-x | samples/bpf/do_hbm_test.sh | 22 | ||||
-rw-r--r-- | samples/bpf/hbm.c | 18 | ||||
-rw-r--r-- | samples/bpf/hbm_edt_kern.c | 168 | ||||
-rw-r--r-- | samples/bpf/hbm_kern.h | 40 | ||||
-rw-r--r-- | samples/bpf/ibumad_kern.c | 18 | ||||
-rw-r--r-- | samples/bpf/tcp_bpf.readme | 2 | ||||
-rw-r--r-- | samples/bpf/tcp_dumpstats_kern.c | 68 | ||||
-rw-r--r-- | samples/bpf/xdp_adjust_tail_user.c | 12 | ||||
-rw-r--r-- | samples/bpf/xdp_redirect_map_user.c | 15 | ||||
-rw-r--r-- | samples/bpf/xdp_redirect_user.c | 15 | ||||
-rw-r--r-- | samples/bpf/xdp_tx_iptunnel_user.c | 12 | ||||
-rw-r--r-- | samples/bpf/xdpsock_user.c | 44 |
13 files changed, 377 insertions, 60 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 0917f8cf4fab..f90daadfbc89 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -154,6 +154,7 @@ always += tcp_iw_kern.o always += tcp_clamp_kern.o always += tcp_basertt_kern.o always += tcp_tos_reflect_kern.o +always += tcp_dumpstats_kern.o always += xdp_redirect_kern.o always += xdp_redirect_map_kern.o always += xdp_redirect_cpu_kern.o @@ -168,6 +169,7 @@ always += task_fd_query_kern.o always += xdp_sample_pkts_kern.o always += ibumad_kern.o always += hbm_out_kern.o +always += hbm_edt_kern.o KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/bpf/ @@ -272,6 +274,7 @@ $(src)/*.c: verify_target_bpf $(LIBBPF) $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h $(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h $(obj)/hbm.o: $(src)/hbm.h +$(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h # asm/sysreg.h - inline assembly used by it is incompatible with llvm. # But, there is no easy way to fix it, so just exclude it since it is diff --git a/samples/bpf/do_hbm_test.sh b/samples/bpf/do_hbm_test.sh index e48b047d4646..ffe4c0607341 100755 --- a/samples/bpf/do_hbm_test.sh +++ b/samples/bpf/do_hbm_test.sh @@ -14,7 +14,7 @@ Usage() { echo "loads. The output is the goodput in Mbps (unless -D was used)." echo "" echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]" - echo " [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E]" + echo " [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E] [--edt]" echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]" echo " [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]" echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]" @@ -30,6 +30,7 @@ Usage() { echo " other detailed information. This information is" echo " test dependent (i.e. iperf3 or netperf)." echo " -E enable ECN (not required for dctcp)" + echo " --edt use fq's Earliest Departure Time (requires fq)" echo " -f or --flows number of concurrent flows (default=1)" echo " -i or --id cgroup id (an integer, default is 1)" echo " -N use netperf instead of iperf3" @@ -130,13 +131,12 @@ processArgs () { details=1 ;; -E) - ecn=1 + ecn=1 + ;; + --edt) + flags="$flags --edt" + qdisc="fq" ;; - # Support for upcomming fq Early Departure Time egress rate limiting - #--edt) - # prog="hbm_out_edt_kern.o" - # qdisc="fq" - # ;; -f=*|--flows=*) flows="${i#*=}" ;; @@ -228,8 +228,8 @@ if [ "$netem" -ne "0" ] ; then tc qdisc del dev lo root > /dev/null 2>&1 tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1 elif [ "$qdisc" != "" ] ; then - tc qdisc del dev lo root > /dev/null 2>&1 - tc qdisc add dev lo root $qdisc > /dev/null 2>&1 + tc qdisc del dev eth0 root > /dev/null 2>&1 + tc qdisc add dev eth0 root $qdisc > /dev/null 2>&1 fi n=0 @@ -399,7 +399,9 @@ fi if [ "$netem" -ne "0" ] ; then tc qdisc del dev lo root > /dev/null 2>&1 fi - +if [ "$qdisc" != "" ] ; then + tc qdisc del dev eth0 root > /dev/null 2>&1 +fi sleep 2 hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'` diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c index b905b32ff185..e0fbab9bec83 100644 --- a/samples/bpf/hbm.c +++ b/samples/bpf/hbm.c @@ -62,6 +62,7 @@ bool loopback_flag; bool debugFlag; bool work_conserving_flag; bool no_cn_flag; +bool edt_flag; static void Usage(void); static void read_trace_pipe2(void); @@ -372,9 +373,14 @@ static int run_bpf_prog(char *prog, int cg_id) fprintf(fout, "avg rtt:%d\n", (int)(qstats.sum_rtt / (qstats.pkts_total + 1))); // Average credit - fprintf(fout, "avg credit:%d\n", - (int)(qstats.sum_credit / - (1500 * ((int)qstats.pkts_total) + 1))); + if (edt_flag) + fprintf(fout, "avg credit_ms:%.03f\n", + (qstats.sum_credit / + (qstats.pkts_total + 1.0)) / 1000000.0); + else + fprintf(fout, "avg credit:%d\n", + (int)(qstats.sum_credit / + (1500 * ((int)qstats.pkts_total ) + 1))); // Return values stats for (k = 0; k < RET_VAL_COUNT; k++) { @@ -408,6 +414,7 @@ static void Usage(void) " Where:\n" " -o indicates egress direction (default)\n" " -d print BPF trace debug buffer\n" + " --edt use fq's Earliest Departure Time\n" " -l also limit flows using loopback\n" " -n <#> to create cgroup \"/hbm#\" and attach prog\n" " Default is /hbm1\n" @@ -433,6 +440,7 @@ int main(int argc, char **argv) char *optstring = "iodln:r:st:wh"; struct option loptions[] = { {"no_cn", 0, NULL, 1}, + {"edt", 0, NULL, 2}, {NULL, 0, NULL, 0} }; @@ -441,6 +449,10 @@ int main(int argc, char **argv) case 1: no_cn_flag = true; break; + case 2: + prog = "hbm_edt_kern.o"; + edt_flag = true; + break; case'o': break; case 'd': diff --git a/samples/bpf/hbm_edt_kern.c b/samples/bpf/hbm_edt_kern.c new file mode 100644 index 000000000000..a65b677acdb0 --- /dev/null +++ b/samples/bpf/hbm_edt_kern.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Sample Host Bandwidth Manager (HBM) BPF program. + * + * A cgroup skb BPF egress program to limit cgroup output bandwidth. + * It uses a modified virtual token bucket queue to limit average + * egress bandwidth. The implementation uses credits instead of tokens. + * Negative credits imply that queueing would have happened (this is + * a virtual queue, so no queueing is done by it. However, queueing may + * occur at the actual qdisc (which is not used for rate limiting). + * + * This implementation uses 3 thresholds, one to start marking packets and + * the other two to drop packets: + * CREDIT + * - <--------------------------|------------------------> + + * | | | 0 + * | Large pkt | + * | drop thresh | + * Small pkt drop Mark threshold + * thresh + * + * The effect of marking depends on the type of packet: + * a) If the packet is ECN enabled and it is a TCP packet, then the packet + * is ECN marked. + * b) If the packet is a TCP packet, then we probabilistically call tcp_cwr + * to reduce the congestion window. The current implementation uses a linear + * distribution (0% probability at marking threshold, 100% probability + * at drop threshold). + * c) If the packet is not a TCP packet, then it is dropped. + * + * If the credit is below the drop threshold, the packet is dropped. If it + * is a TCP packet, then it also calls tcp_cwr since packets dropped by + * by a cgroup skb BPF program do not automatically trigger a call to + * tcp_cwr in the current kernel code. + * + * This BPF program actually uses 2 drop thresholds, one threshold + * for larger packets (>= 120 bytes) and another for smaller packets. This + * protects smaller packets such as SYNs, ACKs, etc. + * + * The default bandwidth limit is set at 1Gbps but this can be changed by + * a user program through a shared BPF map. In addition, by default this BPF + * program does not limit connections using loopback. This behavior can be + * overwritten by the user program. There is also an option to calculate + * some statistics, such as percent of packets marked or dropped, which + * a user program, such as hbm, can access. + */ + +#include "hbm_kern.h" + +SEC("cgroup_skb/egress") +int _hbm_out_cg(struct __sk_buff *skb) +{ + long long delta = 0, delta_send; + unsigned long long curtime, sendtime; + struct hbm_queue_stats *qsp = NULL; + unsigned int queue_index = 0; + bool congestion_flag = false; + bool ecn_ce_flag = false; + struct hbm_pkt_info pkti = {}; + struct hbm_vqueue *qdp; + bool drop_flag = false; + bool cwr_flag = false; + int len = skb->len; + int rv = ALLOW_PKT; + + qsp = bpf_map_lookup_elem(&queue_stats, &queue_index); + + // Check if we should ignore loopback traffic + if (qsp != NULL && !qsp->loopback && (skb->ifindex == 1)) + return ALLOW_PKT; + + hbm_get_pkt_info(skb, &pkti); + + // We may want to account for the length of headers in len + // calculation, like ETH header + overhead, specially if it + // is a gso packet. But I am not doing it right now. + + qdp = bpf_get_local_storage(&queue_state, 0); + if (!qdp) + return ALLOW_PKT; + if (qdp->lasttime == 0) + hbm_init_edt_vqueue(qdp, 1024); + + curtime = bpf_ktime_get_ns(); + + // Begin critical section + bpf_spin_lock(&qdp->lock); + delta = qdp->lasttime - curtime; + // bound bursts to 100us + if (delta < -BURST_SIZE_NS) { + // negative delta is a credit that allows bursts + qdp->lasttime = curtime - BURST_SIZE_NS; + delta = -BURST_SIZE_NS; + } + sendtime = qdp->lasttime; + delta_send = BYTES_TO_NS(len, qdp->rate); + __sync_add_and_fetch(&(qdp->lasttime), delta_send); + bpf_spin_unlock(&qdp->lock); + // End critical section + + // Set EDT of packet + skb->tstamp = sendtime; + + // Check if we should update rate + if (qsp != NULL && (qsp->rate * 128) != qdp->rate) + qdp->rate = qsp->rate * 128; + + // Set flags (drop, congestion, cwr) + // last packet will be sent in the future, bound latency + if (delta > DROP_THRESH_NS || (delta > LARGE_PKT_DROP_THRESH_NS && + len > LARGE_PKT_THRESH)) { + drop_flag = true; + if (pkti.is_tcp && pkti.ecn == 0) + cwr_flag = true; + } else if (delta > MARK_THRESH_NS) { + if (pkti.is_tcp) + congestion_flag = true; + else + drop_flag = true; + } + + if (congestion_flag) { + if (bpf_skb_ecn_set_ce(skb)) { + ecn_ce_flag = true; + } else { + if (pkti.is_tcp) { + unsigned int rand = bpf_get_prandom_u32(); + + if (delta >= MARK_THRESH_NS + + (rand % MARK_REGION_SIZE_NS)) { + // Do congestion control + cwr_flag = true; + } + } else if (len > LARGE_PKT_THRESH) { + // Problem if too many small packets? + drop_flag = true; + congestion_flag = false; + } + } + } + + if (pkti.is_tcp && drop_flag && pkti.packets_out <= 1) { + drop_flag = false; + cwr_flag = true; + congestion_flag = false; + } + + if (qsp != NULL && qsp->no_cn) + cwr_flag = false; + + hbm_update_stats(qsp, len, curtime, congestion_flag, drop_flag, + cwr_flag, ecn_ce_flag, &pkti, (int) delta); + + if (drop_flag) { + __sync_add_and_fetch(&(qdp->lasttime), -delta_send); + rv = DROP_PKT; + } + + if (cwr_flag) + rv |= CWR; + return rv; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/hbm_kern.h b/samples/bpf/hbm_kern.h index be19cf1d5cd5..aa207a2eebbd 100644 --- a/samples/bpf/hbm_kern.h +++ b/samples/bpf/hbm_kern.h @@ -29,6 +29,7 @@ #define DROP_PKT 0 #define ALLOW_PKT 1 #define TCP_ECN_OK 1 +#define CWR 2 #ifndef HBM_DEBUG // Define HBM_DEBUG to enable debugging #undef bpf_printk @@ -45,8 +46,18 @@ #define MAX_CREDIT (100 * MAX_BYTES_PER_PACKET) #define INIT_CREDIT (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET) +// Time base accounting for fq's EDT +#define BURST_SIZE_NS 100000 // 100us +#define MARK_THRESH_NS 50000 // 50us +#define DROP_THRESH_NS 500000 // 500us +// Reserve 20us of queuing for small packets (less than 120 bytes) +#define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000) +#define MARK_REGION_SIZE_NS (LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS) + // rate in bytes per ns << 20 #define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20) +#define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20) +#define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate)) struct bpf_map_def SEC("maps") queue_state = { .type = BPF_MAP_TYPE_CGROUP_STORAGE, @@ -67,6 +78,7 @@ BPF_ANNOTATE_KV_PAIR(queue_stats, int, struct hbm_queue_stats); struct hbm_pkt_info { int cwnd; int rtt; + int packets_out; bool is_ip; bool is_tcp; short ecn; @@ -86,16 +98,20 @@ static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti) if (tp) { pkti->cwnd = tp->snd_cwnd; pkti->rtt = tp->srtt_us >> 3; + pkti->packets_out = tp->packets_out; return 0; } } } } + pkti->cwnd = 0; + pkti->rtt = 0; + pkti->packets_out = 0; return 1; } -static __always_inline void hbm_get_pkt_info(struct __sk_buff *skb, - struct hbm_pkt_info *pkti) +static void hbm_get_pkt_info(struct __sk_buff *skb, + struct hbm_pkt_info *pkti) { struct iphdr iph; struct ipv6hdr *ip6h; @@ -123,10 +139,22 @@ static __always_inline void hbm_get_pkt_info(struct __sk_buff *skb, static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate) { - bpf_printk("Initializing queue_state, rate:%d\n", rate * 128); - qdp->lasttime = bpf_ktime_get_ns(); - qdp->credit = INIT_CREDIT; - qdp->rate = rate * 128; + bpf_printk("Initializing queue_state, rate:%d\n", rate * 128); + qdp->lasttime = bpf_ktime_get_ns(); + qdp->credit = INIT_CREDIT; + qdp->rate = rate * 128; +} + +static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp, + int rate) +{ + unsigned long long curtime; + + curtime = bpf_ktime_get_ns(); + bpf_printk("Initializing queue_state, rate:%d\n", rate * 128); + qdp->lasttime = curtime - BURST_SIZE_NS; // support initial burst + qdp->credit = 0; // not used + qdp->rate = rate * 128; } static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp, diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c index 38b2b3f22049..f281df7e0089 100644 --- a/samples/bpf/ibumad_kern.c +++ b/samples/bpf/ibumad_kern.c @@ -31,15 +31,9 @@ struct bpf_map_def SEC("maps") write_count = { }; #undef DEBUG -#ifdef DEBUG -#define bpf_debug(fmt, ...) \ -({ \ - char ____fmt[] = fmt; \ - bpf_trace_printk(____fmt, sizeof(____fmt), \ - ##__VA_ARGS__); \ -}) -#else -#define bpf_debug(fmt, ...) +#ifndef DEBUG +#undef bpf_printk +#define bpf_printk(fmt, ...) #endif /* Taken from the current format defined in @@ -86,7 +80,7 @@ int on_ib_umad_read_recv(struct ib_umad_rw_args *ctx) u64 zero = 0, *val; u8 class = ctx->mgmt_class; - bpf_debug("ib_umad read recv : class 0x%x\n", class); + bpf_printk("ib_umad read recv : class 0x%x\n", class); val = bpf_map_lookup_elem(&read_count, &class); if (!val) { @@ -106,7 +100,7 @@ int on_ib_umad_read_send(struct ib_umad_rw_args *ctx) u64 zero = 0, *val; u8 class = ctx->mgmt_class; - bpf_debug("ib_umad read send : class 0x%x\n", class); + bpf_printk("ib_umad read send : class 0x%x\n", class); val = bpf_map_lookup_elem(&read_count, &class); if (!val) { @@ -126,7 +120,7 @@ int on_ib_umad_write(struct ib_umad_rw_args *ctx) u64 zero = 0, *val; u8 class = ctx->mgmt_class; - bpf_debug("ib_umad write : class 0x%x\n", class); + bpf_printk("ib_umad write : class 0x%x\n", class); val = bpf_map_lookup_elem(&write_count, &class); if (!val) { diff --git a/samples/bpf/tcp_bpf.readme b/samples/bpf/tcp_bpf.readme index fee746621aec..78e247f62108 100644 --- a/samples/bpf/tcp_bpf.readme +++ b/samples/bpf/tcp_bpf.readme @@ -25,4 +25,4 @@ attached to the cgroupv2). To remove (unattach) a socket_ops BPF program from a cgroupv2: - bpftool cgroup attach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog + bpftool cgroup detach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog diff --git a/samples/bpf/tcp_dumpstats_kern.c b/samples/bpf/tcp_dumpstats_kern.c new file mode 100644 index 000000000000..8557913106a0 --- /dev/null +++ b/samples/bpf/tcp_dumpstats_kern.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Refer to samples/bpf/tcp_bpf.readme for the instructions on + * how to run this sample program. + */ +#include <linux/bpf.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define INTERVAL 1000000000ULL + +int _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; + +struct { + __u32 type; + __u32 map_flags; + int *key; + __u64 *value; +} bpf_next_dump SEC(".maps") = { + .type = BPF_MAP_TYPE_SK_STORAGE, + .map_flags = BPF_F_NO_PREALLOC, +}; + +SEC("sockops") +int _sockops(struct bpf_sock_ops *ctx) +{ + struct bpf_tcp_sock *tcp_sk; + struct bpf_sock *sk; + __u64 *next_dump; + __u64 now; + + switch (ctx->op) { + case BPF_SOCK_OPS_TCP_CONNECT_CB: + bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_RTT_CB_FLAG); + return 1; + case BPF_SOCK_OPS_RTT_CB: + break; + default: + return 1; + } + + sk = ctx->sk; + if (!sk) + return 1; + + next_dump = bpf_sk_storage_get(&bpf_next_dump, sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!next_dump) + return 1; + + now = bpf_ktime_get_ns(); + if (now < *next_dump) + return 1; + + tcp_sk = bpf_tcp_sock(sk); + if (!tcp_sk) + return 1; + + *next_dump = now + INTERVAL; + + bpf_printk("dsack_dups=%u delivered=%u\n", + tcp_sk->dsack_dups, tcp_sk->delivered); + bpf_printk("delivered_ce=%u icsk_retransmits=%u\n", + tcp_sk->delivered_ce, tcp_sk->icsk_retransmits); + + return 1; +} diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c index 586ff751aba9..a3596b617c4c 100644 --- a/samples/bpf/xdp_adjust_tail_user.c +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -13,6 +13,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <net/if.h> #include <sys/resource.h> #include <arpa/inet.h> #include <netinet/ether.h> @@ -69,7 +70,7 @@ static void usage(const char *cmd) printf("Start a XDP prog which send ICMP \"packet too big\" \n" "messages if ingress packet is bigger then MAX_SIZE bytes\n"); printf("Usage: %s [...]\n", cmd); - printf(" -i <ifindex> Interface Index\n"); + printf(" -i <ifname|ifindex> Interface\n"); printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n"); printf(" -S use skb-mode\n"); printf(" -N enforce native mode\n"); @@ -102,7 +103,9 @@ int main(int argc, char **argv) switch (opt) { case 'i': - ifindex = atoi(optarg); + ifindex = if_nametoindex(optarg); + if (!ifindex) + ifindex = atoi(optarg); break; case 'T': kill_after_s = atoi(optarg); @@ -136,6 +139,11 @@ int main(int argc, char **argv) return 1; } + if (!ifindex) { + fprintf(stderr, "Invalid ifname\n"); + return 1; + } + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); prog_load_attr.file = filename; diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c index 15bb6f67f9c3..f70ee33907fd 100644 --- a/samples/bpf/xdp_redirect_map_user.c +++ b/samples/bpf/xdp_redirect_map_user.c @@ -10,6 +10,7 @@ #include <stdlib.h> #include <stdbool.h> #include <string.h> +#include <net/if.h> #include <unistd.h> #include <libgen.h> #include <sys/resource.h> @@ -85,7 +86,7 @@ static void poll_stats(int interval, int ifindex) static void usage(const char *prog) { fprintf(stderr, - "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n" + "usage: %s [OPTS] <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n\n" "OPTS:\n" " -S use skb-mode\n" " -N enforce native mode\n" @@ -127,7 +128,7 @@ int main(int argc, char **argv) } if (optind == argc) { - printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]); + printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]); return 1; } @@ -136,8 +137,14 @@ int main(int argc, char **argv) return 1; } - ifindex_in = strtoul(argv[optind], NULL, 0); - ifindex_out = strtoul(argv[optind + 1], NULL, 0); + ifindex_in = if_nametoindex(argv[optind]); + if (!ifindex_in) + ifindex_in = strtoul(argv[optind], NULL, 0); + + ifindex_out = if_nametoindex(argv[optind + 1]); + if (!ifindex_out) + ifindex_out = strtoul(argv[optind + 1], NULL, 0); + printf("input: %d output: %d\n", ifindex_in, ifindex_out); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c index ce71be187205..39de06f3ec25 100644 --- a/samples/bpf/xdp_redirect_user.c +++ b/samples/bpf/xdp_redirect_user.c @@ -10,6 +10,7 @@ #include <stdlib.h> #include <stdbool.h> #include <string.h> +#include <net/if.h> #include <unistd.h> #include <libgen.h> #include <sys/resource.h> @@ -85,7 +86,7 @@ static void poll_stats(int interval, int ifindex) static void usage(const char *prog) { fprintf(stderr, - "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n" + "usage: %s [OPTS] <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n\n" "OPTS:\n" " -S use skb-mode\n" " -N enforce native mode\n" @@ -128,7 +129,7 @@ int main(int argc, char **argv) } if (optind == argc) { - printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]); + printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]); return 1; } @@ -137,8 +138,14 @@ int main(int argc, char **argv) return 1; } - ifindex_in = strtoul(argv[optind], NULL, 0); - ifindex_out = strtoul(argv[optind + 1], NULL, 0); + ifindex_in = if_nametoindex(argv[optind]); + if (!ifindex_in) + ifindex_in = strtoul(argv[optind], NULL, 0); + + ifindex_out = if_nametoindex(argv[optind + 1]); + if (!ifindex_out) + ifindex_out = strtoul(argv[optind + 1], NULL, 0); + printf("input: %d output: %d\n", ifindex_in, ifindex_out); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index 394896430712..dfb68582e243 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -9,6 +9,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <net/if.h> #include <sys/resource.h> #include <arpa/inet.h> #include <netinet/ether.h> @@ -83,7 +84,7 @@ static void usage(const char *cmd) "in an IPv4/v6 header and XDP_TX it out. The dst <VIP:PORT>\n" "is used to select packets to encapsulate\n\n"); printf("Usage: %s [...]\n", cmd); - printf(" -i <ifindex> Interface Index\n"); + printf(" -i <ifname|ifindex> Interface\n"); printf(" -a <vip-service-address> IPv4 or IPv6\n"); printf(" -p <vip-service-port> A port range (e.g. 433-444) is also allowed\n"); printf(" -s <source-ip> Used in the IPTunnel header\n"); @@ -181,7 +182,9 @@ int main(int argc, char **argv) switch (opt) { case 'i': - ifindex = atoi(optarg); + ifindex = if_nametoindex(optarg); + if (!ifindex) + ifindex = atoi(optarg); break; case 'a': vip.family = parse_ipstr(optarg, vip.daddr.v6); @@ -253,6 +256,11 @@ int main(int argc, char **argv) return 1; } + if (!ifindex) { + fprintf(stderr, "Invalid ifname\n"); + return 1; + } + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); prog_load_attr.file = filename; diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 0f5eb0d7f2df..93eaaf7239b2 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -68,6 +68,7 @@ static int opt_queue; static int opt_poll; static int opt_interval = 1; static u32 opt_xdp_bind_flags; +static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; static __u32 prog_id; struct xsk_umem_info { @@ -276,6 +277,12 @@ static size_t gen_eth_frame(struct xsk_umem_info *umem, u64 addr) static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) { struct xsk_umem_info *umem; + struct xsk_umem_config cfg = { + .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, + .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, + .frame_size = opt_xsk_frame_size, + .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM, + }; int ret; umem = calloc(1, sizeof(*umem)); @@ -283,7 +290,7 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) exit_with_error(errno); ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq, - NULL); + &cfg); if (ret) exit_with_error(-ret); @@ -323,11 +330,9 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem) &idx); if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS) exit_with_error(-ret); - for (i = 0; - i < XSK_RING_PROD__DEFAULT_NUM_DESCS * - XSK_UMEM__DEFAULT_FRAME_SIZE; - i += XSK_UMEM__DEFAULT_FRAME_SIZE) - *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) = i; + for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++) + *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) = + i * opt_xsk_frame_size; xsk_ring_prod__submit(&xsk->umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS); @@ -346,6 +351,7 @@ static struct option long_options[] = { {"interval", required_argument, 0, 'n'}, {"zero-copy", no_argument, 0, 'z'}, {"copy", no_argument, 0, 'c'}, + {"frame-size", required_argument, 0, 'f'}, {0, 0, 0, 0} }; @@ -365,8 +371,9 @@ static void usage(const char *prog) " -n, --interval=n Specify statistics update interval (default 1 sec).\n" " -z, --zero-copy Force zero-copy mode.\n" " -c, --copy Force copy mode.\n" + " -f, --frame-size=n Set the frame size (must be a power of two, default is %d).\n" "\n"; - fprintf(stderr, str, prog); + fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE); exit(EXIT_FAILURE); } @@ -377,7 +384,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "Frtli:q:psSNn:cz", long_options, + c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:", long_options, &option_index); if (c == -1) break; @@ -420,6 +427,9 @@ static void parse_command_line(int argc, char **argv) case 'F': opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; break; + case 'f': + opt_xsk_frame_size = atoi(optarg); + break; default: usage(basename(argv[0])); } @@ -432,6 +442,11 @@ static void parse_command_line(int argc, char **argv) usage(basename(argv[0])); } + if (opt_xsk_frame_size & (opt_xsk_frame_size - 1)) { + fprintf(stderr, "--frame-size=%d is not a power of two\n", + opt_xsk_frame_size); + usage(basename(argv[0])); + } } static void kick_tx(struct xsk_socket_info *xsk) @@ -583,8 +598,7 @@ static void tx_only(struct xsk_socket_info *xsk) for (i = 0; i < BATCH_SIZE; i++) { xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr - = (frame_nb + i) << - XSK_UMEM__DEFAULT_FRAME_SHIFT; + = (frame_nb + i) * opt_xsk_frame_size; xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len = sizeof(pkt_data) - 1; } @@ -661,21 +675,19 @@ int main(int argc, char **argv) } ret = posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */ - NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE); + NUM_FRAMES * opt_xsk_frame_size); if (ret) exit_with_error(ret); /* Create sockets... */ - umem = xsk_configure_umem(bufs, - NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE); + umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size); xsks[num_socks++] = xsk_configure_socket(umem); if (opt_bench == BENCH_TXONLY) { int i; - for (i = 0; i < NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE; - i += XSK_UMEM__DEFAULT_FRAME_SIZE) - (void)gen_eth_frame(umem, i); + for (i = 0; i < NUM_FRAMES; i++) + (void)gen_eth_frame(umem, i * opt_xsk_frame_size); } signal(SIGINT, int_exit); |