From eb88d58559b756065667f97ef5891b5c23c57c76 Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 21 Jun 2016 21:05:58 -0700 Subject: samples/bpf: set max locked memory to ulimited Signed-off-by: William Tu Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/sockex2_user.c | 3 +++ samples/bpf/sockex3_user.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'samples/bpf') diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c index 29a276d766fc..8a4085c2d117 100644 --- a/samples/bpf/sockex2_user.c +++ b/samples/bpf/sockex2_user.c @@ -5,6 +5,7 @@ #include "bpf_load.h" #include #include +#include struct pair { __u64 packets; @@ -13,11 +14,13 @@ struct pair { int main(int ac, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; char filename[256]; FILE *f; int i, sock; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + setrlimit(RLIMIT_MEMLOCK, &r); if (load_bpf_file(filename)) { printf("%s", bpf_log_buf); diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index 2617772d060d..d4184ab5f3ac 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -5,6 +5,7 @@ #include "bpf_load.h" #include #include +#include struct flow_keys { __be32 src; @@ -23,11 +24,13 @@ struct pair { int main(int argc, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; char filename[256]; FILE *f; int i, sock; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + setrlimit(RLIMIT_MEMLOCK, &r); if (load_bpf_file(filename)) { printf("%s", bpf_log_buf); -- cgit v1.2.3 From a3f74617340b598dbc7eb5b68d4ed53b4a70f5eb Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 30 Jun 2016 10:28:45 -0700 Subject: cgroup: bpf: Add an example to do cgroup checking in BPF test_cgrp2_array_pin.c: A userland program that creates a bpf_map (BPF_MAP_TYPE_GROUP_ARRAY), pouplates/updates it with a cgroup2's backed fd and pins it to a bpf-fs's file. The pinned file can be loaded by tc and then used by the bpf prog later. This program can also update an existing pinned array and it could be useful for debugging/testing purpose. test_cgrp2_tc_kern.c: A bpf prog which should be loaded by tc. It is to demonstrate the usage of bpf_skb_in_cgroup. test_cgrp2_tc.sh: A script that glues the test_cgrp2_array_pin.c and test_cgrp2_tc_kern.c together. The idea is like: 1. Load the test_cgrp2_tc_kern.o by tc 2. Use test_cgrp2_array_pin.c to populate a BPF_MAP_TYPE_CGROUP_ARRAY with a cgroup fd 3. Do a 'ping -6 ff02::1%ve' to ensure the packet has been dropped because of a match on the cgroup Most of the lines in test_cgrp2_tc.sh is the boilerplate to setup the cgroup/bpf-fs/net-devices/netns...etc. It is not bulletproof on errors but should work well enough and give enough debug info if things did not go well. Signed-off-by: Martin KaFai Lau Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Tejun Heo Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/Makefile | 3 + samples/bpf/bpf_helpers.h | 2 + samples/bpf/test_cgrp2_array_pin.c | 109 ++++++++++++++++++++++ samples/bpf/test_cgrp2_tc.sh | 184 +++++++++++++++++++++++++++++++++++++ samples/bpf/test_cgrp2_tc_kern.c | 69 ++++++++++++++ 5 files changed, 367 insertions(+) create mode 100644 samples/bpf/test_cgrp2_array_pin.c create mode 100755 samples/bpf/test_cgrp2_tc.sh create mode 100644 samples/bpf/test_cgrp2_tc_kern.c (limited to 'samples/bpf') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 0bf2478cb7df..a98b780e974c 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -20,6 +20,7 @@ hostprogs-y += offwaketime hostprogs-y += spintest hostprogs-y += map_perf_test hostprogs-y += test_overhead +hostprogs-y += test_cgrp2_array_pin test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -40,6 +41,7 @@ offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o spintest-objs := bpf_load.o libbpf.o spintest_user.o map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o +test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -61,6 +63,7 @@ always += map_perf_test_kern.o always += test_overhead_tp_kern.o always += test_overhead_kprobe_kern.o always += parse_varlen.o parse_simple.o parse_ldabs.o +always += test_cgrp2_tc_kern.o HOSTCFLAGS += -I$(objtree)/usr/include diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 7904a2a493de..84e3fd919a06 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -70,6 +70,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag (void *) BPF_FUNC_l3_csum_replace; static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = (void *) BPF_FUNC_l4_csum_replace; +static int (*bpf_skb_in_cgroup)(void *ctx, void *map, int index) = + (void *) BPF_FUNC_skb_in_cgroup; #if defined(__x86_64__) diff --git a/samples/bpf/test_cgrp2_array_pin.c b/samples/bpf/test_cgrp2_array_pin.c new file mode 100644 index 000000000000..70e86f7be69d --- /dev/null +++ b/samples/bpf/test_cgrp2_array_pin.c @@ -0,0 +1,109 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "libbpf.h" + +static void usage(void) +{ + printf("Usage: test_cgrp2_array_pin [...]\n"); + printf(" -F File to pin an BPF cgroup array\n"); + printf(" -U Update an already pinned BPF cgroup array\n"); + printf(" -v Full path of the cgroup2\n"); + printf(" -h Display this help\n"); +} + +int main(int argc, char **argv) +{ + const char *pinned_file = NULL, *cg2 = NULL; + int create_array = 1; + int array_key = 0; + int array_fd = -1; + int cg2_fd = -1; + int ret = -1; + int opt; + + while ((opt = getopt(argc, argv, "F:U:v:")) != -1) { + switch (opt) { + /* General args */ + case 'F': + pinned_file = optarg; + break; + case 'U': + pinned_file = optarg; + create_array = 0; + break; + case 'v': + cg2 = optarg; + break; + default: + usage(); + goto out; + } + } + + if (!cg2 || !pinned_file) { + usage(); + goto out; + } + + cg2_fd = open(cg2, O_RDONLY); + if (cg2_fd < 0) { + fprintf(stderr, "open(%s,...): %s(%d)\n", + cg2, strerror(errno), errno); + goto out; + } + + if (create_array) { + array_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY, + sizeof(uint32_t), sizeof(uint32_t), + 1, 0); + if (array_fd < 0) { + fprintf(stderr, + "bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,...): %s(%d)\n", + strerror(errno), errno); + goto out; + } + } else { + array_fd = bpf_obj_get(pinned_file); + if (array_fd < 0) { + fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n", + pinned_file, strerror(errno), errno); + goto out; + } + } + + ret = bpf_update_elem(array_fd, &array_key, &cg2_fd, 0); + if (ret) { + perror("bpf_update_elem"); + goto out; + } + + if (create_array) { + ret = bpf_obj_pin(array_fd, pinned_file); + if (ret) { + fprintf(stderr, "bpf_obj_pin(..., %s): %s(%d)\n", + pinned_file, strerror(errno), errno); + goto out; + } + } + +out: + if (array_fd != -1) + close(array_fd); + if (cg2_fd != -1) + close(cg2_fd); + return ret; +} diff --git a/samples/bpf/test_cgrp2_tc.sh b/samples/bpf/test_cgrp2_tc.sh new file mode 100755 index 000000000000..0b119eeaf85c --- /dev/null +++ b/samples/bpf/test_cgrp2_tc.sh @@ -0,0 +1,184 @@ +#!/bin/bash + +MY_DIR=$(dirname $0) +# Details on the bpf prog +BPF_CGRP2_ARRAY_NAME='test_cgrp2_array_pin' +BPF_PROG="$MY_DIR/test_cgrp2_tc_kern.o" +BPF_SECTION='filter' + +[ -z "$TC" ] && TC='tc' +[ -z "$IP" ] && IP='ip' + +# Names of the veth interface, net namespace...etc. +HOST_IFC='ve' +NS_IFC='vens' +NS='ns' + +find_mnt() { + cat /proc/mounts | \ + awk '{ if ($3 == "'$1'" && mnt == "") { mnt = $2 }} END { print mnt }' +} + +# Init cgroup2 vars +init_cgrp2_vars() { + CGRP2_ROOT=$(find_mnt cgroup2) + if [ -z "$CGRP2_ROOT" ] + then + CGRP2_ROOT='/mnt/cgroup2' + MOUNT_CGRP2="yes" + fi + CGRP2_TC="$CGRP2_ROOT/tc" + CGRP2_TC_LEAF="$CGRP2_TC/leaf" +} + +# Init bpf fs vars +init_bpf_fs_vars() { + local bpf_fs_root=$(find_mnt bpf) + [ -n "$bpf_fs_root" ] || return -1 + BPF_FS_TC_SHARE="$bpf_fs_root/tc/globals" +} + +setup_cgrp2() { + case $1 in + start) + if [ "$MOUNT_CGRP2" == 'yes' ] + then + [ -d $CGRP2_ROOT ] || mkdir -p $CGRP2_ROOT + mount -t cgroup2 none $CGRP2_ROOT || return $? + fi + mkdir -p $CGRP2_TC_LEAF + ;; + *) + rmdir $CGRP2_TC_LEAF && rmdir $CGRP2_TC + [ "$MOUNT_CGRP2" == 'yes' ] && umount $CGRP2_ROOT + ;; + esac +} + +setup_bpf_cgrp2_array() { + local bpf_cgrp2_array="$BPF_FS_TC_SHARE/$BPF_CGRP2_ARRAY_NAME" + case $1 in + start) + $MY_DIR/test_cgrp2_array_pin -U $bpf_cgrp2_array -v $CGRP2_TC + ;; + *) + [ -d "$BPF_FS_TC_SHARE" ] && rm -f $bpf_cgrp2_array + ;; + esac +} + +setup_net() { + case $1 in + start) + $IP link add $HOST_IFC type veth peer name $NS_IFC || return $? + $IP link set dev $HOST_IFC up || return $? + sysctl -q net.ipv6.conf.$HOST_IFC.accept_dad=0 + + $IP netns add ns || return $? + $IP link set dev $NS_IFC netns ns || return $? + $IP -n $NS link set dev $NS_IFC up || return $? + $IP netns exec $NS sysctl -q net.ipv6.conf.$NS_IFC.accept_dad=0 + $TC qdisc add dev $HOST_IFC clsact || return $? + $TC filter add dev $HOST_IFC egress bpf da obj $BPF_PROG sec $BPF_SECTION || return $? + ;; + *) + $IP netns del $NS + $IP link del $HOST_IFC + ;; + esac +} + +run_in_cgrp() { + # Fork another bash and move it under the specified cgroup. + # It makes the cgroup cleanup easier at the end of the test. + cmd='echo $$ > ' + cmd="$cmd $1/cgroup.procs; exec $2" + bash -c "$cmd" +} + +do_test() { + run_in_cgrp $CGRP2_TC_LEAF "ping -6 -c3 ff02::1%$HOST_IFC >& /dev/null" + local dropped=$($TC -s qdisc show dev $HOST_IFC | tail -3 | \ + awk '/drop/{print substr($7, 0, index($7, ",")-1)}') + if [[ $dropped -eq 0 ]] + then + echo "FAIL" + return 1 + else + echo "Successfully filtered $dropped packets" + return 0 + fi +} + +do_exit() { + if [ "$DEBUG" == "yes" ] && [ "$MODE" != 'cleanuponly' ] + then + echo "------ DEBUG ------" + echo "mount: "; mount | egrep '(cgroup2|bpf)'; echo + echo "$CGRP2_TC_LEAF: "; ls -l $CGRP2_TC_LEAF; echo + if [ -d "$BPF_FS_TC_SHARE" ] + then + echo "$BPF_FS_TC_SHARE: "; ls -l $BPF_FS_TC_SHARE; echo + fi + echo "Host net:" + $IP netns + $IP link show dev $HOST_IFC + $IP -6 a show dev $HOST_IFC + $TC -s qdisc show dev $HOST_IFC + echo + echo "$NS net:" + $IP -n $NS link show dev $NS_IFC + $IP -n $NS -6 link show dev $NS_IFC + echo "------ DEBUG ------" + echo + fi + + if [ "$MODE" != 'nocleanup' ] + then + setup_net stop + setup_bpf_cgrp2_array stop + setup_cgrp2 stop + fi +} + +init_cgrp2_vars +init_bpf_fs_vars + +while [[ $# -ge 1 ]] +do + a="$1" + case $a in + debug) + DEBUG='yes' + shift 1 + ;; + cleanup-only) + MODE='cleanuponly' + shift 1 + ;; + no-cleanup) + MODE='nocleanup' + shift 1 + ;; + *) + echo "test_cgrp2_tc [debug] [cleanup-only | no-cleanup]" + echo " debug: Print cgrp and network setup details at the end of the test" + echo " cleanup-only: Try to cleanup things from last test. No test will be run" + echo " no-cleanup: Run the test but don't do cleanup at the end" + echo "[Note: If no arg is given, it will run the test and do cleanup at the end]" + echo + exit -1 + ;; + esac +done + +trap do_exit 0 + +[ "$MODE" == 'cleanuponly' ] && exit + +setup_cgrp2 start || exit $? +setup_net start || exit $? +init_bpf_fs_vars || exit $? +setup_bpf_cgrp2_array start || exit $? +do_test +echo diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c new file mode 100644 index 000000000000..2732c37c8d5b --- /dev/null +++ b/samples/bpf/test_cgrp2_tc_kern.c @@ -0,0 +1,69 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +/* copy of 'struct ethhdr' without __packed */ +struct eth_hdr { + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + unsigned short h_proto; +}; + +#define PIN_GLOBAL_NS 2 +struct bpf_elf_map { + __u32 type; + __u32 size_key; + __u32 size_value; + __u32 max_elem; + __u32 flags; + __u32 id; + __u32 pinning; +}; + +struct bpf_elf_map SEC("maps") test_cgrp2_array_pin = { + .type = BPF_MAP_TYPE_CGROUP_ARRAY, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .pinning = PIN_GLOBAL_NS, + .max_elem = 1, +}; + +SEC("filter") +int handle_egress(struct __sk_buff *skb) +{ + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + struct ipv6hdr *ip6h = data + sizeof(*eth); + void *data_end = (void *)(long)skb->data_end; + char dont_care_msg[] = "dont care %04x %d\n"; + char pass_msg[] = "pass\n"; + char reject_msg[] = "reject\n"; + + /* single length check */ + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) + return TC_ACT_OK; + + if (eth->h_proto != htons(ETH_P_IPV6) || + ip6h->nexthdr != IPPROTO_ICMPV6) { + bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg), + eth->h_proto, ip6h->nexthdr); + return TC_ACT_OK; + } else if (bpf_skb_in_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) { + bpf_trace_printk(pass_msg, sizeof(pass_msg)); + return TC_ACT_OK; + } else { + bpf_trace_printk(reject_msg, sizeof(reject_msg)); + return TC_ACT_SHOT; + } +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 86af8b4191d20bb17e868d3167f4cf52ca9331d0 Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Tue, 19 Jul 2016 12:16:51 -0700 Subject: Add sample for adding simple drop program to link Add a sample program that only drops packets at the BPF_PROG_TYPE_XDP_RX hook of a link. With the drop-only program, observed single core rate is ~20Mpps. Other tests were run, for instance without the dropcnt increment or without reading from the packet header, the packet rate was mostly unchanged. $ perf record -a samples/bpf/xdp1 $( Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/Makefile | 4 ++ samples/bpf/bpf_load.c | 8 +++ samples/bpf/xdp1_kern.c | 93 +++++++++++++++++++++++++ samples/bpf/xdp1_user.c | 181 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 286 insertions(+) create mode 100644 samples/bpf/xdp1_kern.c create mode 100644 samples/bpf/xdp1_user.c (limited to 'samples/bpf') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index a98b780e974c..0e4ab3a9dfa9 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -21,6 +21,7 @@ hostprogs-y += spintest hostprogs-y += map_perf_test hostprogs-y += test_overhead hostprogs-y += test_cgrp2_array_pin +hostprogs-y += xdp1 test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -42,6 +43,7 @@ spintest-objs := bpf_load.o libbpf.o spintest_user.o map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o +xdp1-objs := bpf_load.o libbpf.o xdp1_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -64,6 +66,7 @@ always += test_overhead_tp_kern.o always += test_overhead_kprobe_kern.o always += parse_varlen.o parse_simple.o parse_ldabs.o always += test_cgrp2_tc_kern.o +always += xdp1_kern.o HOSTCFLAGS += -I$(objtree)/usr/include @@ -84,6 +87,7 @@ HOSTLOADLIBES_offwaketime += -lelf HOSTLOADLIBES_spintest += -lelf HOSTLOADLIBES_map_perf_test += -lelf -lrt HOSTLOADLIBES_test_overhead += -lelf -lrt +HOSTLOADLIBES_xdp1 += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 022af71c2bb5..0cfda2320320 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -50,6 +50,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) bool is_kprobe = strncmp(event, "kprobe/", 7) == 0; bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0; bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0; + bool is_xdp = strncmp(event, "xdp", 3) == 0; enum bpf_prog_type prog_type; char buf[256]; int fd, efd, err, id; @@ -66,6 +67,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_type = BPF_PROG_TYPE_KPROBE; } else if (is_tracepoint) { prog_type = BPF_PROG_TYPE_TRACEPOINT; + } else if (is_xdp) { + prog_type = BPF_PROG_TYPE_XDP; } else { printf("Unknown event '%s'\n", event); return -1; @@ -79,6 +82,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_fd[prog_cnt++] = fd; + if (is_xdp) + return 0; + if (is_socket) { event += 6; if (*event != '/') @@ -319,6 +325,7 @@ int load_bpf_file(char *path) if (memcmp(shname_prog, "kprobe/", 7) == 0 || memcmp(shname_prog, "kretprobe/", 10) == 0 || memcmp(shname_prog, "tracepoint/", 11) == 0 || + memcmp(shname_prog, "xdp", 3) == 0 || memcmp(shname_prog, "socket", 6) == 0) load_and_attach(shname_prog, insns, data_prog->d_size); } @@ -336,6 +343,7 @@ int load_bpf_file(char *path) if (memcmp(shname, "kprobe/", 7) == 0 || memcmp(shname, "kretprobe/", 10) == 0 || memcmp(shname, "tracepoint/", 11) == 0 || + memcmp(shname, "xdp", 3) == 0 || memcmp(shname, "socket", 6) == 0) load_and_attach(shname, data->d_buf, data->d_size); } diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c new file mode 100644 index 000000000000..e7dd8ac40d12 --- /dev/null +++ b/samples/bpf/xdp1_kern.c @@ -0,0 +1,93 @@ +/* Copyright (c) 2016 PLUMgrid + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#define KBUILD_MODNAME "foo" +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") dropcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 256, +}; + +static int parse_ipv4(void *data, u64 nh_off, void *data_end) +{ + struct iphdr *iph = data + nh_off; + + if (iph + 1 > data_end) + return 0; + return iph->protocol; +} + +static int parse_ipv6(void *data, u64 nh_off, void *data_end) +{ + struct ipv6hdr *ip6h = data + nh_off; + + if (ip6h + 1 > data_end) + return 0; + return ip6h->nexthdr; +} + +SEC("xdp1") +int xdp_prog1(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + int rc = XDP_DROP; + long *value; + u16 h_proto; + u64 nh_off; + u32 index; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return rc; + + h_proto = eth->h_proto; + + if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { + struct vlan_hdr *vhdr; + + vhdr = data + nh_off; + nh_off += sizeof(struct vlan_hdr); + if (data + nh_off > data_end) + return rc; + h_proto = vhdr->h_vlan_encapsulated_proto; + } + if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { + struct vlan_hdr *vhdr; + + vhdr = data + nh_off; + nh_off += sizeof(struct vlan_hdr); + if (data + nh_off > data_end) + return rc; + h_proto = vhdr->h_vlan_encapsulated_proto; + } + + if (h_proto == htons(ETH_P_IP)) + index = parse_ipv4(data, nh_off, data_end); + else if (h_proto == htons(ETH_P_IPV6)) + index = parse_ipv6(data, nh_off, data_end); + else + index = 0; + + value = bpf_map_lookup_elem(&dropcnt, &index); + if (value) + *value += 1; + + return rc; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c new file mode 100644 index 000000000000..a5e109e398a1 --- /dev/null +++ b/samples/bpf/xdp1_user.c @@ -0,0 +1,181 @@ +/* Copyright (c) 2016 PLUMgrid + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_load.h" +#include "libbpf.h" + +static int set_link_xdp_fd(int ifindex, int fd) +{ + struct sockaddr_nl sa; + int sock, seq = 0, len, ret = -1; + char buf[4096]; + struct nlattr *nla, *nla_xdp; + struct { + struct nlmsghdr nh; + struct ifinfomsg ifinfo; + char attrbuf[64]; + } req; + struct nlmsghdr *nh; + struct nlmsgerr *err; + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + printf("open netlink socket: %s\n", strerror(errno)); + return -1; + } + + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + printf("bind to netlink: %s\n", strerror(errno)); + goto cleanup; + } + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_type = RTM_SETLINK; + req.nh.nlmsg_pid = 0; + req.nh.nlmsg_seq = ++seq; + req.ifinfo.ifi_family = AF_UNSPEC; + req.ifinfo.ifi_index = ifindex; + nla = (struct nlattr *)(((char *)&req) + + NLMSG_ALIGN(req.nh.nlmsg_len)); + nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/; + + nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN); + nla_xdp->nla_type = 1/*IFLA_XDP_FD*/; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); + memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); + nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len; + + req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + printf("send to netlink: %s\n", strerror(errno)); + goto cleanup; + } + + len = recv(sock, buf, sizeof(buf), 0); + if (len < 0) { + printf("recv from netlink: %s\n", strerror(errno)); + goto cleanup; + } + + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + nh = NLMSG_NEXT(nh, len)) { + if (nh->nlmsg_pid != getpid()) { + printf("Wrong pid %d, expected %d\n", + nh->nlmsg_pid, getpid()); + goto cleanup; + } + if (nh->nlmsg_seq != seq) { + printf("Wrong seq %d, expected %d\n", + nh->nlmsg_seq, seq); + goto cleanup; + } + switch (nh->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(nh); + if (!err->error) + continue; + printf("nlmsg error %s\n", strerror(-err->error)); + goto cleanup; + case NLMSG_DONE: + break; + } + } + + ret = 0; + +cleanup: + close(sock); + return ret; +} + +static int ifindex; + +static void int_exit(int sig) +{ + set_link_xdp_fd(ifindex, -1); + exit(0); +} + +/* simple per-protocol drop counter + */ +static void poll_stats(int interval) +{ + unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + const unsigned int nr_keys = 256; + __u64 values[nr_cpus], prev[nr_keys][nr_cpus]; + __u32 key; + int i; + + memset(prev, 0, sizeof(prev)); + + while (1) { + sleep(interval); + + for (key = 0; key < nr_keys; key++) { + __u64 sum = 0; + + assert(bpf_lookup_elem(map_fd[0], &key, values) == 0); + for (i = 0; i < nr_cpus; i++) + sum += (values[i] - prev[key][i]); + if (sum) + printf("proto %u: %10llu pkt/s\n", + key, sum / interval); + memcpy(prev[key], values, sizeof(values)); + } + } +} + +int main(int ac, char **argv) +{ + char filename[256]; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (ac != 2) { + printf("usage: %s IFINDEX\n", argv[0]); + return 1; + } + + ifindex = strtoul(argv[1], NULL, 0); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + if (!prog_fd[0]) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + signal(SIGINT, int_exit); + + if (set_link_xdp_fd(ifindex, prog_fd[0]) < 0) { + printf("link set xdp fd failed\n"); + return 1; + } + + poll_stats(2); + + return 0; +} -- cgit v1.2.3 From 764cbccef8c9cb95e869ba2bb8371c42685c934a Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Tue, 19 Jul 2016 12:16:57 -0700 Subject: bpf: add sample for xdp forwarding and rewrite Add a sample that rewrites and forwards packets out on the same interface. Observed single core forwarding performance of ~10Mpps. Since the mlx4 driver under test recycles every single packet page, the perf output shows almost exclusively just the ring management and bpf program work. Slowdowns are likely occurring due to cache misses. Signed-off-by: Brenden Blanco Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/Makefile | 5 +++ samples/bpf/xdp2_kern.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 samples/bpf/xdp2_kern.c (limited to 'samples/bpf') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 0e4ab3a9dfa9..d2d2b35c67eb 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -22,6 +22,7 @@ hostprogs-y += map_perf_test hostprogs-y += test_overhead hostprogs-y += test_cgrp2_array_pin hostprogs-y += xdp1 +hostprogs-y += xdp2 test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -44,6 +45,8 @@ map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o xdp1-objs := bpf_load.o libbpf.o xdp1_user.o +# reuse xdp1 source intentionally +xdp2-objs := bpf_load.o libbpf.o xdp1_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -67,6 +70,7 @@ always += test_overhead_kprobe_kern.o always += parse_varlen.o parse_simple.o parse_ldabs.o always += test_cgrp2_tc_kern.o always += xdp1_kern.o +always += xdp2_kern.o HOSTCFLAGS += -I$(objtree)/usr/include @@ -88,6 +92,7 @@ HOSTLOADLIBES_spintest += -lelf HOSTLOADLIBES_map_perf_test += -lelf -lrt HOSTLOADLIBES_test_overhead += -lelf -lrt HOSTLOADLIBES_xdp1 += -lelf +HOSTLOADLIBES_xdp2 += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c new file mode 100644 index 000000000000..38fe7e1d0db4 --- /dev/null +++ b/samples/bpf/xdp2_kern.c @@ -0,0 +1,114 @@ +/* Copyright (c) 2016 PLUMgrid + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#define KBUILD_MODNAME "foo" +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") dropcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 256, +}; + +static void swap_src_dst_mac(void *data) +{ + unsigned short *p = data; + unsigned short dst[3]; + + dst[0] = p[0]; + dst[1] = p[1]; + dst[2] = p[2]; + p[0] = p[3]; + p[1] = p[4]; + p[2] = p[5]; + p[3] = dst[0]; + p[4] = dst[1]; + p[5] = dst[2]; +} + +static int parse_ipv4(void *data, u64 nh_off, void *data_end) +{ + struct iphdr *iph = data + nh_off; + + if (iph + 1 > data_end) + return 0; + return iph->protocol; +} + +static int parse_ipv6(void *data, u64 nh_off, void *data_end) +{ + struct ipv6hdr *ip6h = data + nh_off; + + if (ip6h + 1 > data_end) + return 0; + return ip6h->nexthdr; +} + +SEC("xdp1") +int xdp_prog1(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + int rc = XDP_DROP; + long *value; + u16 h_proto; + u64 nh_off; + u32 index; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return rc; + + h_proto = eth->h_proto; + + if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { + struct vlan_hdr *vhdr; + + vhdr = data + nh_off; + nh_off += sizeof(struct vlan_hdr); + if (data + nh_off > data_end) + return rc; + h_proto = vhdr->h_vlan_encapsulated_proto; + } + if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { + struct vlan_hdr *vhdr; + + vhdr = data + nh_off; + nh_off += sizeof(struct vlan_hdr); + if (data + nh_off > data_end) + return rc; + h_proto = vhdr->h_vlan_encapsulated_proto; + } + + if (h_proto == htons(ETH_P_IP)) + index = parse_ipv4(data, nh_off, data_end); + else if (h_proto == htons(ETH_P_IPV6)) + index = parse_ipv6(data, nh_off, data_end); + else + index = 0; + + value = bpf_map_lookup_elem(&dropcnt, &index); + if (value) + *value += 1; + + if (index == 17) { + swap_src_dst_mac(data); + rc = XDP_TX; + } + + return rc; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From d9094bda5c985d1f9da66e9e3fd6323b49dee44d Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Wed, 20 Jul 2016 17:22:35 -0700 Subject: bpf: make xdp sample variable names more meaningful The naming choice of index is not terribly descriptive, and dropcnt is in fact incorrect for xdp2. Pick better names for these: ipproto and rxcnt. Signed-off-by: Brenden Blanco Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/xdp1_kern.c | 12 ++++++------ samples/bpf/xdp2_kern.c | 14 +++++++------- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c index e7dd8ac40d12..219742106bfd 100644 --- a/samples/bpf/xdp1_kern.c +++ b/samples/bpf/xdp1_kern.c @@ -14,7 +14,7 @@ #include #include "bpf_helpers.h" -struct bpf_map_def SEC("maps") dropcnt = { +struct bpf_map_def SEC("maps") rxcnt = { .type = BPF_MAP_TYPE_PERCPU_ARRAY, .key_size = sizeof(u32), .value_size = sizeof(long), @@ -49,7 +49,7 @@ int xdp_prog1(struct xdp_md *ctx) long *value; u16 h_proto; u64 nh_off; - u32 index; + u32 ipproto; nh_off = sizeof(*eth); if (data + nh_off > data_end) @@ -77,13 +77,13 @@ int xdp_prog1(struct xdp_md *ctx) } if (h_proto == htons(ETH_P_IP)) - index = parse_ipv4(data, nh_off, data_end); + ipproto = parse_ipv4(data, nh_off, data_end); else if (h_proto == htons(ETH_P_IPV6)) - index = parse_ipv6(data, nh_off, data_end); + ipproto = parse_ipv6(data, nh_off, data_end); else - index = 0; + ipproto = 0; - value = bpf_map_lookup_elem(&dropcnt, &index); + value = bpf_map_lookup_elem(&rxcnt, &ipproto); if (value) *value += 1; diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c index 38fe7e1d0db4..e01288867d15 100644 --- a/samples/bpf/xdp2_kern.c +++ b/samples/bpf/xdp2_kern.c @@ -14,7 +14,7 @@ #include #include "bpf_helpers.h" -struct bpf_map_def SEC("maps") dropcnt = { +struct bpf_map_def SEC("maps") rxcnt = { .type = BPF_MAP_TYPE_PERCPU_ARRAY, .key_size = sizeof(u32), .value_size = sizeof(long), @@ -65,7 +65,7 @@ int xdp_prog1(struct xdp_md *ctx) long *value; u16 h_proto; u64 nh_off; - u32 index; + u32 ipproto; nh_off = sizeof(*eth); if (data + nh_off > data_end) @@ -93,17 +93,17 @@ int xdp_prog1(struct xdp_md *ctx) } if (h_proto == htons(ETH_P_IP)) - index = parse_ipv4(data, nh_off, data_end); + ipproto = parse_ipv4(data, nh_off, data_end); else if (h_proto == htons(ETH_P_IPV6)) - index = parse_ipv6(data, nh_off, data_end); + ipproto = parse_ipv6(data, nh_off, data_end); else - index = 0; + ipproto = 0; - value = bpf_map_lookup_elem(&dropcnt, &index); + value = bpf_map_lookup_elem(&rxcnt, &ipproto); if (value) *value += 1; - if (index == 17) { + if (ipproto == IPPROTO_UDP) { swap_src_dst_mac(data); rc = XDP_TX; } -- cgit v1.2.3 From 96ae52279594470622ff0585621a13e96b700600 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Mon, 25 Jul 2016 05:54:46 -0700 Subject: bpf: Add bpf_probe_write_user BPF helper to be called in tracers This allows user memory to be written to during the course of a kprobe. It shouldn't be used to implement any kind of security mechanism because of TOC-TOU attacks, but rather to debug, divert, and manipulate execution of semi-cooperative processes. Although it uses probe_kernel_write, we limit the address space the probe can write into by checking the space with access_ok. We do this as opposed to calling copy_to_user directly, in order to avoid sleeping. In addition we ensure the threads's current fs / segment is USER_DS and the thread isn't exiting nor a kernel thread. Given this feature is meant for experiments, and it has a risk of crashing the system, and running programs, we print a warning on when a proglet that attempts to use this helper is installed, along with the pid and process name. Signed-off-by: Sargun Dhillon Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 10 ++++++++++ kernel/trace/bpf_trace.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ samples/bpf/bpf_helpers.h | 2 ++ 3 files changed, 57 insertions(+) (limited to 'samples/bpf') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2b7076f5b5ad..da218fec6056 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -365,6 +365,16 @@ enum bpf_func_id { */ BPF_FUNC_get_current_task, + /** + * bpf_probe_write_user(void *dst, void *src, int len) + * safely attempt to write to a location + * @dst: destination address in userspace + * @src: source address on stack + * @len: number of bytes to copy + * Return: 0 on success or negative error + */ + BPF_FUNC_probe_write_user, + __BPF_FUNC_MAX_ID, }; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index a12bbd32c0a6..b20438fdb029 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -81,6 +81,49 @@ static const struct bpf_func_proto bpf_probe_read_proto = { .arg3_type = ARG_ANYTHING, }; +static u64 bpf_probe_write_user(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + void *unsafe_ptr = (void *) (long) r1; + void *src = (void *) (long) r2; + int size = (int) r3; + + /* + * Ensure we're in user context which is safe for the helper to + * run. This helper has no business in a kthread. + * + * access_ok() should prevent writing to non-user memory, but in + * some situations (nommu, temporary switch, etc) access_ok() does + * not provide enough validation, hence the check on KERNEL_DS. + */ + + if (unlikely(in_interrupt() || + current->flags & (PF_KTHREAD | PF_EXITING))) + return -EPERM; + if (unlikely(segment_eq(get_fs(), KERNEL_DS))) + return -EPERM; + if (!access_ok(VERIFY_WRITE, unsafe_ptr, size)) + return -EPERM; + + return probe_kernel_write(unsafe_ptr, src, size); +} + +static const struct bpf_func_proto bpf_probe_write_user_proto = { + .func = bpf_probe_write_user, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_PTR_TO_STACK, + .arg3_type = ARG_CONST_STACK_SIZE, +}; + +static const struct bpf_func_proto *bpf_get_probe_write_proto(void) +{ + pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!", + current->comm, task_pid_nr(current)); + + return &bpf_probe_write_user_proto; +} + /* * limited trace_printk() * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed @@ -362,6 +405,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) return &bpf_get_smp_processor_id_proto; case BPF_FUNC_perf_event_read: return &bpf_perf_event_read_proto; + case BPF_FUNC_probe_write_user: + return bpf_get_probe_write_proto(); default: return NULL; } diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 84e3fd919a06..217c8d507f2e 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -41,6 +41,8 @@ static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, (void *) BPF_FUNC_perf_event_output; static int (*bpf_get_stackid)(void *ctx, void *map, int flags) = (void *) BPF_FUNC_get_stackid; +static int (*bpf_probe_write_user)(void *dst, void *src, int size) = + (void *) BPF_FUNC_probe_write_user; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions -- cgit v1.2.3 From cf9b1199de27ece1eafacf165933df10f0314232 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Mon, 25 Jul 2016 05:55:02 -0700 Subject: samples/bpf: Add test/example of using bpf_probe_write_user bpf helper This example shows using a kprobe to act as a dnat mechanism to divert traffic for arbitrary endpoints. It rewrite the arguments to a syscall while they're still in userspace, and before the syscall has a chance to copy the argument into kernel space. Although this is an example, it also acts as a test because the mapped address is 255.255.255.255:555 -> real address, and that's not a legal address to connect to. If the helper is broken, the example will fail on the intermediate steps, as well as the final step to verify the rewrite of userspace memory succeeded. Signed-off-by: Sargun Dhillon Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/Makefile | 4 ++ samples/bpf/test_probe_write_user_kern.c | 52 +++++++++++++++++++++ samples/bpf/test_probe_write_user_user.c | 78 ++++++++++++++++++++++++++++++++ 3 files changed, 134 insertions(+) create mode 100644 samples/bpf/test_probe_write_user_kern.c create mode 100644 samples/bpf/test_probe_write_user_user.c (limited to 'samples/bpf') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index d2d2b35c67eb..90ebf7d35c07 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -14,6 +14,7 @@ hostprogs-y += tracex3 hostprogs-y += tracex4 hostprogs-y += tracex5 hostprogs-y += tracex6 +hostprogs-y += test_probe_write_user hostprogs-y += trace_output hostprogs-y += lathist hostprogs-y += offwaketime @@ -37,6 +38,7 @@ tracex3-objs := bpf_load.o libbpf.o tracex3_user.o tracex4-objs := bpf_load.o libbpf.o tracex4_user.o tracex5-objs := bpf_load.o libbpf.o tracex5_user.o tracex6-objs := bpf_load.o libbpf.o tracex6_user.o +test_probe_write_user-objs := bpf_load.o libbpf.o test_probe_write_user_user.o trace_output-objs := bpf_load.o libbpf.o trace_output_user.o lathist-objs := bpf_load.o libbpf.o lathist_user.o offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o @@ -59,6 +61,7 @@ always += tracex3_kern.o always += tracex4_kern.o always += tracex5_kern.o always += tracex6_kern.o +always += test_probe_write_user_kern.o always += trace_output_kern.o always += tcbpf1_kern.o always += lathist_kern.o @@ -85,6 +88,7 @@ HOSTLOADLIBES_tracex3 += -lelf HOSTLOADLIBES_tracex4 += -lelf -lrt HOSTLOADLIBES_tracex5 += -lelf HOSTLOADLIBES_tracex6 += -lelf +HOSTLOADLIBES_test_probe_write_user += -lelf HOSTLOADLIBES_trace_output += -lelf -lrt HOSTLOADLIBES_lathist += -lelf HOSTLOADLIBES_offwaketime += -lelf diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c new file mode 100644 index 000000000000..3a677c807044 --- /dev/null +++ b/samples/bpf/test_probe_write_user_kern.c @@ -0,0 +1,52 @@ +/* Copyright (c) 2016 Sargun Dhillon + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") dnat_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct sockaddr_in), + .value_size = sizeof(struct sockaddr_in), + .max_entries = 256, +}; + +/* kprobe is NOT a stable ABI + * kernel functions can be removed, renamed or completely change semantics. + * Number of arguments and their positions can change, etc. + * In such case this bpf+kprobe example will no longer be meaningful + * + * This example sits on a syscall, and the syscall ABI is relatively stable + * of course, across platforms, and over time, the ABI may change. + */ +SEC("kprobe/sys_connect") +int bpf_prog1(struct pt_regs *ctx) +{ + struct sockaddr_in new_addr, orig_addr = {}; + struct sockaddr_in *mapped_addr; + void *sockaddr_arg = (void *)PT_REGS_PARM2(ctx); + int sockaddr_len = (int)PT_REGS_PARM3(ctx); + + if (sockaddr_len > sizeof(orig_addr)) + return 0; + + if (bpf_probe_read(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0) + return 0; + + mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr); + if (mapped_addr != NULL) { + memcpy(&new_addr, mapped_addr, sizeof(new_addr)); + bpf_probe_write_user(sockaddr_arg, &new_addr, + sizeof(new_addr)); + } + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c new file mode 100644 index 000000000000..a44bf347bedd --- /dev/null +++ b/samples/bpf/test_probe_write_user_user.c @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include "libbpf.h" +#include "bpf_load.h" +#include +#include +#include +#include + +int main(int ac, char **argv) +{ + int serverfd, serverconnfd, clientfd; + socklen_t sockaddr_len; + struct sockaddr serv_addr, mapped_addr, tmp_addr; + struct sockaddr_in *serv_addr_in, *mapped_addr_in, *tmp_addr_in; + char filename[256]; + char *ip; + + serv_addr_in = (struct sockaddr_in *)&serv_addr; + mapped_addr_in = (struct sockaddr_in *)&mapped_addr; + tmp_addr_in = (struct sockaddr_in *)&tmp_addr; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + assert((serverfd = socket(AF_INET, SOCK_STREAM, 0)) > 0); + assert((clientfd = socket(AF_INET, SOCK_STREAM, 0)) > 0); + + /* Bind server to ephemeral port on lo */ + memset(&serv_addr, 0, sizeof(serv_addr)); + serv_addr_in->sin_family = AF_INET; + serv_addr_in->sin_port = 0; + serv_addr_in->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + + assert(bind(serverfd, &serv_addr, sizeof(serv_addr)) == 0); + + sockaddr_len = sizeof(serv_addr); + assert(getsockname(serverfd, &serv_addr, &sockaddr_len) == 0); + ip = inet_ntoa(serv_addr_in->sin_addr); + printf("Server bound to: %s:%d\n", ip, ntohs(serv_addr_in->sin_port)); + + memset(&mapped_addr, 0, sizeof(mapped_addr)); + mapped_addr_in->sin_family = AF_INET; + mapped_addr_in->sin_port = htons(5555); + mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255"); + + assert(!bpf_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY)); + + assert(listen(serverfd, 5) == 0); + + ip = inet_ntoa(mapped_addr_in->sin_addr); + printf("Client connecting to: %s:%d\n", + ip, ntohs(mapped_addr_in->sin_port)); + assert(connect(clientfd, &mapped_addr, sizeof(mapped_addr)) == 0); + + sockaddr_len = sizeof(tmp_addr); + ip = inet_ntoa(tmp_addr_in->sin_addr); + assert((serverconnfd = accept(serverfd, &tmp_addr, &sockaddr_len)) > 0); + printf("Server received connection from: %s:%d\n", + ip, ntohs(tmp_addr_in->sin_port)); + + sockaddr_len = sizeof(tmp_addr); + assert(getpeername(clientfd, &tmp_addr, &sockaddr_len) == 0); + ip = inet_ntoa(tmp_addr_in->sin_addr); + printf("Client's peer address: %s:%d\n", + ip, ntohs(tmp_addr_in->sin_port)); + + /* Is the server's getsockname = the socket getpeername */ + assert(memcmp(&serv_addr, &tmp_addr, sizeof(struct sockaddr_in)) == 0); + + return 0; +} -- cgit v1.2.3