diff options
Diffstat (limited to 'samples/bpf')
-rw-r--r-- | samples/bpf/Makefile | 23 | ||||
-rw-r--r-- | samples/bpf/bpf_helpers.h | 40 | ||||
-rw-r--r-- | samples/bpf/bpf_load.c | 203 | ||||
-rw-r--r-- | samples/bpf/bpf_load.h | 24 | ||||
-rw-r--r-- | samples/bpf/libbpf.c | 31 | ||||
-rw-r--r-- | samples/bpf/libbpf.h | 17 | ||||
-rw-r--r-- | samples/bpf/sock_example.c | 101 | ||||
-rw-r--r-- | samples/bpf/sockex1_kern.c | 25 | ||||
-rw-r--r-- | samples/bpf/sockex1_user.c | 49 | ||||
-rw-r--r-- | samples/bpf/sockex2_kern.c | 215 | ||||
-rw-r--r-- | samples/bpf/sockex2_user.c | 44 | ||||
-rw-r--r-- | samples/bpf/test_maps.c | 291 | ||||
-rw-r--r-- | samples/bpf/test_verifier.c | 60 |
13 files changed, 1111 insertions, 12 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 634391797856..b5b3600dcdf5 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -2,11 +2,32 @@ obj- := dummy.o # List of programs to build -hostprogs-y := test_verifier +hostprogs-y := test_verifier test_maps +hostprogs-y += sock_example +hostprogs-y += sockex1 +hostprogs-y += sockex2 test_verifier-objs := test_verifier.o libbpf.o +test_maps-objs := test_maps.o libbpf.o +sock_example-objs := sock_example.o libbpf.o +sockex1-objs := bpf_load.o libbpf.o sockex1_user.o +sockex2-objs := bpf_load.o libbpf.o sockex2_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) +always += sockex1_kern.o +always += sockex2_kern.o HOSTCFLAGS += -I$(objtree)/usr/include + +HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable +HOSTLOADLIBES_sockex1 += -lelf +HOSTLOADLIBES_sockex2 += -lelf + +# point this to your LLVM backend with bpf support +LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc + +%.o: %.c + clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ + -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ + -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h new file mode 100644 index 000000000000..ca0333146006 --- /dev/null +++ b/samples/bpf/bpf_helpers.h @@ -0,0 +1,40 @@ +#ifndef __BPF_HELPERS_H +#define __BPF_HELPERS_H + +/* helper macro to place programs, maps, license in + * different sections in elf_bpf file. Section names + * are interpreted by elf_bpf loader + */ +#define SEC(NAME) __attribute__((section(NAME), used)) + +/* helper functions called from eBPF programs written in C */ +static void *(*bpf_map_lookup_elem)(void *map, void *key) = + (void *) BPF_FUNC_map_lookup_elem; +static int (*bpf_map_update_elem)(void *map, void *key, void *value, + unsigned long long flags) = + (void *) BPF_FUNC_map_update_elem; +static int (*bpf_map_delete_elem)(void *map, void *key) = + (void *) BPF_FUNC_map_delete_elem; + +/* llvm builtin functions that eBPF C program may use to + * emit BPF_LD_ABS and BPF_LD_IND instructions + */ +struct sk_buff; +unsigned long long load_byte(void *skb, + unsigned long long off) asm("llvm.bpf.load.byte"); +unsigned long long load_half(void *skb, + unsigned long long off) asm("llvm.bpf.load.half"); +unsigned long long load_word(void *skb, + unsigned long long off) asm("llvm.bpf.load.word"); + +/* a helper structure used by eBPF C program + * to describe map attributes to elf_bpf loader + */ +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; +}; + +#endif diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c new file mode 100644 index 000000000000..1831d236382b --- /dev/null +++ b/samples/bpf/bpf_load.c @@ -0,0 +1,203 @@ +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <libelf.h> +#include <gelf.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> +#include <stdbool.h> +#include <linux/bpf.h> +#include <linux/filter.h> +#include "libbpf.h" +#include "bpf_helpers.h" +#include "bpf_load.h" + +static char license[128]; +static bool processed_sec[128]; +int map_fd[MAX_MAPS]; +int prog_fd[MAX_PROGS]; +int prog_cnt; + +static int load_and_attach(const char *event, struct bpf_insn *prog, int size) +{ + int fd; + bool is_socket = strncmp(event, "socket", 6) == 0; + + if (!is_socket) + /* tracing events tbd */ + return -1; + + fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, + prog, size, license); + + if (fd < 0) { + printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf); + return -1; + } + + prog_fd[prog_cnt++] = fd; + + return 0; +} + +static int load_maps(struct bpf_map_def *maps, int len) +{ + int i; + + for (i = 0; i < len / sizeof(struct bpf_map_def); i++) { + + map_fd[i] = bpf_create_map(maps[i].type, + maps[i].key_size, + maps[i].value_size, + maps[i].max_entries); + if (map_fd[i] < 0) + return 1; + } + return 0; +} + +static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname, + GElf_Shdr *shdr, Elf_Data **data) +{ + Elf_Scn *scn; + + scn = elf_getscn(elf, i); + if (!scn) + return 1; + + if (gelf_getshdr(scn, shdr) != shdr) + return 2; + + *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name); + if (!*shname || !shdr->sh_size) + return 3; + + *data = elf_getdata(scn, 0); + if (!*data || elf_getdata(scn, *data) != NULL) + return 4; + + return 0; +} + +static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols, + GElf_Shdr *shdr, struct bpf_insn *insn) +{ + int i, nrels; + + nrels = shdr->sh_size / shdr->sh_entsize; + + for (i = 0; i < nrels; i++) { + GElf_Sym sym; + GElf_Rel rel; + unsigned int insn_idx; + + gelf_getrel(data, i, &rel); + + insn_idx = rel.r_offset / sizeof(struct bpf_insn); + + gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym); + + if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { + printf("invalid relo for insn[%d].code 0x%x\n", + insn_idx, insn[insn_idx].code); + return 1; + } + insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; + insn[insn_idx].imm = map_fd[sym.st_value / sizeof(struct bpf_map_def)]; + } + + return 0; +} + +int load_bpf_file(char *path) +{ + int fd, i; + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr, shdr_prog; + Elf_Data *data, *data_prog, *symbols = NULL; + char *shname, *shname_prog; + + if (elf_version(EV_CURRENT) == EV_NONE) + return 1; + + fd = open(path, O_RDONLY, 0); + if (fd < 0) + return 1; + + elf = elf_begin(fd, ELF_C_READ, NULL); + + if (!elf) + return 1; + + if (gelf_getehdr(elf, &ehdr) != &ehdr) + return 1; + + /* scan over all elf sections to get license and map info */ + for (i = 1; i < ehdr.e_shnum; i++) { + + if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) + continue; + + if (0) /* helpful for llvm debugging */ + printf("section %d:%s data %p size %zd link %d flags %d\n", + i, shname, data->d_buf, data->d_size, + shdr.sh_link, (int) shdr.sh_flags); + + if (strcmp(shname, "license") == 0) { + processed_sec[i] = true; + memcpy(license, data->d_buf, data->d_size); + } else if (strcmp(shname, "maps") == 0) { + processed_sec[i] = true; + if (load_maps(data->d_buf, data->d_size)) + return 1; + } else if (shdr.sh_type == SHT_SYMTAB) { + symbols = data; + } + } + + /* load programs that need map fixup (relocations) */ + for (i = 1; i < ehdr.e_shnum; i++) { + + if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) + continue; + if (shdr.sh_type == SHT_REL) { + struct bpf_insn *insns; + + if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog, + &shdr_prog, &data_prog)) + continue; + + insns = (struct bpf_insn *) data_prog->d_buf; + + processed_sec[shdr.sh_info] = true; + processed_sec[i] = true; + + if (parse_relo_and_apply(data, symbols, &shdr, insns)) + continue; + + if (memcmp(shname_prog, "events/", 7) == 0 || + memcmp(shname_prog, "socket", 6) == 0) + load_and_attach(shname_prog, insns, data_prog->d_size); + } + } + + /* load programs that don't use maps */ + for (i = 1; i < ehdr.e_shnum; i++) { + + if (processed_sec[i]) + continue; + + if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) + continue; + + if (memcmp(shname, "events/", 7) == 0 || + memcmp(shname, "socket", 6) == 0) + load_and_attach(shname, data->d_buf, data->d_size); + } + + close(fd); + return 0; +} diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h new file mode 100644 index 000000000000..27789a34f5e6 --- /dev/null +++ b/samples/bpf/bpf_load.h @@ -0,0 +1,24 @@ +#ifndef __BPF_LOAD_H +#define __BPF_LOAD_H + +#define MAX_MAPS 32 +#define MAX_PROGS 32 + +extern int map_fd[MAX_MAPS]; +extern int prog_fd[MAX_PROGS]; + +/* parses elf file compiled by llvm .c->.o + * . parses 'maps' section and creates maps via BPF syscall + * . parses 'license' section and passes it to syscall + * . parses elf relocations for BPF maps and adjusts BPF_LD_IMM64 insns by + * storing map_fd into insn->imm and marking such insns as BPF_PSEUDO_MAP_FD + * . loads eBPF programs via BPF syscall + * + * One ELF file can contain multiple BPF programs which will be loaded + * and their FDs stored stored in prog_fd array + * + * returns zero on success + */ +int load_bpf_file(char *path); + +#endif diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c index ff6504420738..46d50b7ddf79 100644 --- a/samples/bpf/libbpf.c +++ b/samples/bpf/libbpf.c @@ -7,6 +7,10 @@ #include <linux/netlink.h> #include <linux/bpf.h> #include <errno.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <linux/if_packet.h> +#include <arpa/inet.h> #include "libbpf.h" static __u64 ptr_to_u64(void *ptr) @@ -27,12 +31,13 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, return syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr)); } -int bpf_update_elem(int fd, void *key, void *value) +int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags) { union bpf_attr attr = { .map_fd = fd, .key = ptr_to_u64(key), .value = ptr_to_u64(value), + .flags = flags, }; return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); @@ -92,3 +97,27 @@ int bpf_prog_load(enum bpf_prog_type prog_type, return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); } + +int open_raw_sock(const char *name) +{ + struct sockaddr_ll sll; + int sock; + + sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL)); + if (sock < 0) { + printf("cannot create raw socket\n"); + return -1; + } + + memset(&sll, 0, sizeof(sll)); + sll.sll_family = AF_PACKET; + sll.sll_ifindex = if_nametoindex(name); + sll.sll_protocol = htons(ETH_P_ALL); + if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) { + printf("bind to %s: %s\n", name, strerror(errno)); + close(sock); + return -1; + } + + return sock; +} diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index 8a31babeca5d..58c5fe1bdba1 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -6,7 +6,7 @@ struct bpf_insn; int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries); -int bpf_update_elem(int fd, void *key, void *value); +int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags); int bpf_lookup_elem(int fd, void *key, void *value); int bpf_delete_elem(int fd, void *key); int bpf_get_next_key(int fd, void *key, void *next_key); @@ -15,7 +15,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, int insn_len, const char *license); -#define LOG_BUF_SIZE 8192 +#define LOG_BUF_SIZE 65536 extern char bpf_log_buf[LOG_BUF_SIZE]; /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ @@ -99,6 +99,16 @@ extern char bpf_log_buf[LOG_BUF_SIZE]; BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) +/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ + +#define BPF_LD_ABS(SIZE, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + /* Memory load, dst_reg = *(uint *) (src_reg + off16) */ #define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ @@ -169,4 +179,7 @@ extern char bpf_log_buf[LOG_BUF_SIZE]; .off = 0, \ .imm = 0 }) +/* create RAW socket and bind to interface 'name' */ +int open_raw_sock(const char *name); + #endif diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c new file mode 100644 index 000000000000..c8ad0404416f --- /dev/null +++ b/samples/bpf/sock_example.c @@ -0,0 +1,101 @@ +/* eBPF example program: + * - creates arraymap in kernel with key 4 bytes and value 8 bytes + * + * - loads eBPF program: + * r0 = skb->data[ETH_HLEN + offsetof(struct iphdr, protocol)]; + * *(u32*)(fp - 4) = r0; + * // assuming packet is IPv4, lookup ip->proto in a map + * value = bpf_map_lookup_elem(map_fd, fp - 4); + * if (value) + * (*(u64*)value) += 1; + * + * - attaches this program to eth0 raw socket + * + * - every second user space reads map[tcp], map[udp], map[icmp] to see + * how many packets of given protocol were seen on eth0 + */ +#include <stdio.h> +#include <unistd.h> +#include <assert.h> +#include <linux/bpf.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/socket.h> +#include <arpa/inet.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <stddef.h> +#include "libbpf.h" + +static int test_sock(void) +{ + int sock = -1, map_fd, prog_fd, i, key; + long long value = 0, tcp_cnt, udp_cnt, icmp_cnt; + + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), + 256); + if (map_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + goto cleanup; + } + + struct bpf_insn prog[] = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, ETH_HLEN + offsetof(struct iphdr, protocol) /* R0 = ip->proto */), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ + BPF_LD_MAP_FD(BPF_REG_1, map_fd), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */ + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ + BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 */ + BPF_EXIT_INSN(), + }; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog), + "GPL"); + if (prog_fd < 0) { + printf("failed to load prog '%s'\n", strerror(errno)); + goto cleanup; + } + + sock = open_raw_sock("lo"); + + if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd, + sizeof(prog_fd)) < 0) { + printf("setsockopt %s\n", strerror(errno)); + goto cleanup; + } + + for (i = 0; i < 10; i++) { + key = IPPROTO_TCP; + assert(bpf_lookup_elem(map_fd, &key, &tcp_cnt) == 0); + + key = IPPROTO_UDP; + assert(bpf_lookup_elem(map_fd, &key, &udp_cnt) == 0); + + key = IPPROTO_ICMP; + assert(bpf_lookup_elem(map_fd, &key, &icmp_cnt) == 0); + + printf("TCP %lld UDP %lld ICMP %lld packets\n", + tcp_cnt, udp_cnt, icmp_cnt); + sleep(1); + } + +cleanup: + /* maps, programs, raw sockets will auto cleanup on process exit */ + return 0; +} + +int main(void) +{ + FILE *f; + + f = popen("ping -c5 localhost", "r"); + (void)f; + + return test_sock(); +} diff --git a/samples/bpf/sockex1_kern.c b/samples/bpf/sockex1_kern.c new file mode 100644 index 000000000000..066892662915 --- /dev/null +++ b/samples/bpf/sockex1_kern.c @@ -0,0 +1,25 @@ +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/ip.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") my_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 256, +}; + +SEC("socket1") +int bpf_prog1(struct sk_buff *skb) +{ + int index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol)); + long *value; + + value = bpf_map_lookup_elem(&my_map, &index); + if (value) + __sync_fetch_and_add(value, 1); + + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c new file mode 100644 index 000000000000..34a443ff3831 --- /dev/null +++ b/samples/bpf/sockex1_user.c @@ -0,0 +1,49 @@ +#include <stdio.h> +#include <assert.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "bpf_load.h" +#include <unistd.h> +#include <arpa/inet.h> + +int main(int ac, char **argv) +{ + char filename[256]; + FILE *f; + int i, sock; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + sock = open_raw_sock("lo"); + + assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd, + sizeof(prog_fd[0])) == 0); + + f = popen("ping -c5 localhost", "r"); + (void) f; + + for (i = 0; i < 5; i++) { + long long tcp_cnt, udp_cnt, icmp_cnt; + int key; + + key = IPPROTO_TCP; + assert(bpf_lookup_elem(map_fd[0], &key, &tcp_cnt) == 0); + + key = IPPROTO_UDP; + assert(bpf_lookup_elem(map_fd[0], &key, &udp_cnt) == 0); + + key = IPPROTO_ICMP; + assert(bpf_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0); + + printf("TCP %lld UDP %lld ICMP %lld packets\n", + tcp_cnt, udp_cnt, icmp_cnt); + sleep(1); + } + + return 0; +} diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c new file mode 100644 index 000000000000..6f0135f0f217 --- /dev/null +++ b/samples/bpf/sockex2_kern.c @@ -0,0 +1,215 @@ +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" +#include <uapi/linux/in.h> +#include <uapi/linux/if.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/ip.h> +#include <uapi/linux/ipv6.h> +#include <uapi/linux/if_tunnel.h> +#define IP_MF 0x2000 +#define IP_OFFSET 0x1FFF + +struct vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +struct flow_keys { + __be32 src; + __be32 dst; + union { + __be32 ports; + __be16 port16[2]; + }; + __u16 thoff; + __u8 ip_proto; +}; + +static inline int proto_ports_offset(__u64 proto) +{ + switch (proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_DCCP: + case IPPROTO_ESP: + case IPPROTO_SCTP: + case IPPROTO_UDPLITE: + return 0; + case IPPROTO_AH: + return 4; + default: + return 0; + } +} + +static inline int ip_is_fragment(struct sk_buff *ctx, __u64 nhoff) +{ + return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off)) + & (IP_MF | IP_OFFSET); +} + +static inline __u32 ipv6_addr_hash(struct sk_buff *ctx, __u64 off) +{ + __u64 w0 = load_word(ctx, off); + __u64 w1 = load_word(ctx, off + 4); + __u64 w2 = load_word(ctx, off + 8); + __u64 w3 = load_word(ctx, off + 12); + + return (__u32)(w0 ^ w1 ^ w2 ^ w3); +} + +static inline __u64 parse_ip(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto, + struct flow_keys *flow) +{ + __u64 verlen; + + if (unlikely(ip_is_fragment(skb, nhoff))) + *ip_proto = 0; + else + *ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol)); + + if (*ip_proto != IPPROTO_GRE) { + flow->src = load_word(skb, nhoff + offsetof(struct iphdr, saddr)); + flow->dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr)); + } + + verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/); + if (likely(verlen == 0x45)) + nhoff += 20; + else + nhoff += (verlen & 0xF) << 2; + + return nhoff; +} + +static inline __u64 parse_ipv6(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto, + struct flow_keys *flow) +{ + *ip_proto = load_byte(skb, + nhoff + offsetof(struct ipv6hdr, nexthdr)); + flow->src = ipv6_addr_hash(skb, + nhoff + offsetof(struct ipv6hdr, saddr)); + flow->dst = ipv6_addr_hash(skb, + nhoff + offsetof(struct ipv6hdr, daddr)); + nhoff += sizeof(struct ipv6hdr); + + return nhoff; +} + +static inline bool flow_dissector(struct sk_buff *skb, struct flow_keys *flow) +{ + __u64 nhoff = ETH_HLEN; + __u64 ip_proto; + __u64 proto = load_half(skb, 12); + int poff; + + if (proto == ETH_P_8021AD) { + proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, + h_vlan_encapsulated_proto)); + nhoff += sizeof(struct vlan_hdr); + } + + if (proto == ETH_P_8021Q) { + proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, + h_vlan_encapsulated_proto)); + nhoff += sizeof(struct vlan_hdr); + } + + if (likely(proto == ETH_P_IP)) + nhoff = parse_ip(skb, nhoff, &ip_proto, flow); + else if (proto == ETH_P_IPV6) + nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow); + else + return false; + + switch (ip_proto) { + case IPPROTO_GRE: { + struct gre_hdr { + __be16 flags; + __be16 proto; + }; + + __u64 gre_flags = load_half(skb, + nhoff + offsetof(struct gre_hdr, flags)); + __u64 gre_proto = load_half(skb, + nhoff + offsetof(struct gre_hdr, proto)); + + if (gre_flags & (GRE_VERSION|GRE_ROUTING)) + break; + + proto = gre_proto; + nhoff += 4; + if (gre_flags & GRE_CSUM) + nhoff += 4; + if (gre_flags & GRE_KEY) + nhoff += 4; + if (gre_flags & GRE_SEQ) + nhoff += 4; + + if (proto == ETH_P_8021Q) { + proto = load_half(skb, + nhoff + offsetof(struct vlan_hdr, + h_vlan_encapsulated_proto)); + nhoff += sizeof(struct vlan_hdr); + } + + if (proto == ETH_P_IP) + nhoff = parse_ip(skb, nhoff, &ip_proto, flow); + else if (proto == ETH_P_IPV6) + nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow); + else + return false; + break; + } + case IPPROTO_IPIP: + nhoff = parse_ip(skb, nhoff, &ip_proto, flow); + break; + case IPPROTO_IPV6: + nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow); + break; + default: + break; + } + + flow->ip_proto = ip_proto; + poff = proto_ports_offset(ip_proto); + if (poff >= 0) { + nhoff += poff; + flow->ports = load_word(skb, nhoff); + } + + flow->thoff = (__u16) nhoff; + + return true; +} + +struct bpf_map_def SEC("maps") hash_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__be32), + .value_size = sizeof(long), + .max_entries = 1024, +}; + +SEC("socket2") +int bpf_prog2(struct sk_buff *skb) +{ + struct flow_keys flow; + long *value; + u32 key; + + if (!flow_dissector(skb, &flow)) + return 0; + + key = flow.dst; + value = bpf_map_lookup_elem(&hash_map, &key); + if (value) { + __sync_fetch_and_add(value, 1); + } else { + long val = 1; + + bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY); + } + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c new file mode 100644 index 000000000000..d2d5f5a790d3 --- /dev/null +++ b/samples/bpf/sockex2_user.c @@ -0,0 +1,44 @@ +#include <stdio.h> +#include <assert.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "bpf_load.h" +#include <unistd.h> +#include <arpa/inet.h> + +int main(int ac, char **argv) +{ + char filename[256]; + FILE *f; + int i, sock; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + sock = open_raw_sock("lo"); + + assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd, + sizeof(prog_fd[0])) == 0); + + f = popen("ping -c5 localhost", "r"); + (void) f; + + for (i = 0; i < 5; i++) { + int key = 0, next_key; + long long value; + + while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { + bpf_lookup_elem(map_fd[0], &next_key, &value); + printf("ip %s count %lld\n", + inet_ntoa((struct in_addr){htonl(next_key)}), + value); + key = next_key; + } + sleep(1); + } + return 0; +} diff --git a/samples/bpf/test_maps.c b/samples/bpf/test_maps.c new file mode 100644 index 000000000000..e286b42307f3 --- /dev/null +++ b/samples/bpf/test_maps.c @@ -0,0 +1,291 @@ +/* + * Testsuite for eBPF maps + * + * Copyright (c) 2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stdio.h> +#include <unistd.h> +#include <linux/bpf.h> +#include <errno.h> +#include <string.h> +#include <assert.h> +#include <sys/wait.h> +#include <stdlib.h> +#include "libbpf.h" + +/* sanity tests for map API */ +static void test_hashmap_sanity(int i, void *data) +{ + long long key, next_key, value; + int map_fd; + + map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), 2); + if (map_fd < 0) { + printf("failed to create hashmap '%s'\n", strerror(errno)); + exit(1); + } + + key = 1; + value = 1234; + /* insert key=1 element */ + assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0); + + value = 0; + /* BPF_NOEXIST means: add new element if it doesn't exist */ + assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 && + /* key=1 already exists */ + errno == EEXIST); + + assert(bpf_update_elem(map_fd, &key, &value, -1) == -1 && errno == EINVAL); + + /* check that key=1 can be found */ + assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 1234); + + key = 2; + /* check that key=2 is not found */ + assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT); + + /* BPF_EXIST means: update existing element */ + assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == -1 && + /* key=2 is not there */ + errno == ENOENT); + + /* insert key=2 element */ + assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0); + + /* key=1 and key=2 were inserted, check that key=0 cannot be inserted + * due to max_entries limit + */ + key = 0; + assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 && + errno == E2BIG); + + /* check that key = 0 doesn't exist */ + assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT); + + /* iterate over two elements */ + assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 && + next_key == 2); + assert(bpf_get_next_key(map_fd, &next_key, &next_key) == 0 && + next_key == 1); + assert(bpf_get_next_key(map_fd, &next_key, &next_key) == -1 && + errno == ENOENT); + + /* delete both elements */ + key = 1; + assert(bpf_delete_elem(map_fd, &key) == 0); + key = 2; + assert(bpf_delete_elem(map_fd, &key) == 0); + assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT); + + key = 0; + /* check that map is empty */ + assert(bpf_get_next_key(map_fd, &key, &next_key) == -1 && + errno == ENOENT); + close(map_fd); +} + +static void test_arraymap_sanity(int i, void *data) +{ + int key, next_key, map_fd; + long long value; + + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), 2); + if (map_fd < 0) { + printf("failed to create arraymap '%s'\n", strerror(errno)); + exit(1); + } + + key = 1; + value = 1234; + /* insert key=1 element */ + assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0); + + value = 0; + assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 && + errno == EEXIST); + + /* check that key=1 can be found */ + assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 1234); + + key = 0; + /* check that key=0 is also found and zero initialized */ + assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 0); + + + /* key=0 and key=1 were inserted, check that key=2 cannot be inserted + * due to max_entries limit + */ + key = 2; + assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == -1 && + errno == E2BIG); + + /* check that key = 2 doesn't exist */ + assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT); + + /* iterate over two elements */ + assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 && + next_key == 0); + assert(bpf_get_next_key(map_fd, &next_key, &next_key) == 0 && + next_key == 1); + assert(bpf_get_next_key(map_fd, &next_key, &next_key) == -1 && + errno == ENOENT); + + /* delete shouldn't succeed */ + key = 1; + assert(bpf_delete_elem(map_fd, &key) == -1 && errno == EINVAL); + + close(map_fd); +} + +#define MAP_SIZE (32 * 1024) +static void test_map_large(void) +{ + struct bigkey { + int a; + char b[116]; + long long c; + } key; + int map_fd, i, value; + + /* allocate 4Mbyte of memory */ + map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), + MAP_SIZE); + if (map_fd < 0) { + printf("failed to create large map '%s'\n", strerror(errno)); + exit(1); + } + + for (i = 0; i < MAP_SIZE; i++) { + key = (struct bigkey) {.c = i}; + value = i; + assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0); + } + key.c = -1; + assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 && + errno == E2BIG); + + /* iterate through all elements */ + for (i = 0; i < MAP_SIZE; i++) + assert(bpf_get_next_key(map_fd, &key, &key) == 0); + assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT); + + key.c = 0; + assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 0); + key.a = 1; + assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT); + + close(map_fd); +} + +/* fork N children and wait for them to complete */ +static void run_parallel(int tasks, void (*fn)(int i, void *data), void *data) +{ + pid_t pid[tasks]; + int i; + + for (i = 0; i < tasks; i++) { + pid[i] = fork(); + if (pid[i] == 0) { + fn(i, data); + exit(0); + } else if (pid[i] == -1) { + printf("couldn't spawn #%d process\n", i); + exit(1); + } + } + for (i = 0; i < tasks; i++) { + int status; + + assert(waitpid(pid[i], &status, 0) == pid[i]); + assert(status == 0); + } +} + +static void test_map_stress(void) +{ + run_parallel(100, test_hashmap_sanity, NULL); + run_parallel(100, test_arraymap_sanity, NULL); +} + +#define TASKS 1024 +#define DO_UPDATE 1 +#define DO_DELETE 0 +static void do_work(int fn, void *data) +{ + int map_fd = ((int *)data)[0]; + int do_update = ((int *)data)[1]; + int i; + int key, value; + + for (i = fn; i < MAP_SIZE; i += TASKS) { + key = value = i; + if (do_update) + assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0); + else + assert(bpf_delete_elem(map_fd, &key) == 0); + } +} + +static void test_map_parallel(void) +{ + int i, map_fd, key = 0, value = 0; + int data[2]; + + map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), + MAP_SIZE); + if (map_fd < 0) { + printf("failed to create map for parallel test '%s'\n", + strerror(errno)); + exit(1); + } + + data[0] = map_fd; + data[1] = DO_UPDATE; + /* use the same map_fd in children to add elements to this map + * child_0 adds key=0, key=1024, key=2048, ... + * child_1 adds key=1, key=1025, key=2049, ... + * child_1023 adds key=1023, ... + */ + run_parallel(TASKS, do_work, data); + + /* check that key=0 is already there */ + assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 && + errno == EEXIST); + + /* check that all elements were inserted */ + key = -1; + for (i = 0; i < MAP_SIZE; i++) + assert(bpf_get_next_key(map_fd, &key, &key) == 0); + assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT); + + /* another check for all elements */ + for (i = 0; i < MAP_SIZE; i++) { + key = MAP_SIZE - i - 1; + assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && + value == key); + } + + /* now let's delete all elemenets in parallel */ + data[1] = DO_DELETE; + run_parallel(TASKS, do_work, data); + + /* nothing should be left */ + key = -1; + assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT); +} + +int main(void) +{ + test_hashmap_sanity(0, NULL); + test_arraymap_sanity(0, NULL); + test_map_large(); + test_map_parallel(); + test_map_stress(); + printf("test_maps: OK\n"); + return 0; +} diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index eb4bec0ad8af..b96175e90363 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -261,7 +261,7 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_unspec), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, .fixup = {2}, @@ -417,7 +417,7 @@ static struct bpf_test tests[] = { BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_unspec), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem), BPF_EXIT_INSN(), }, .errstr = "fd 0 is not pointing to valid bpf_map", @@ -430,7 +430,7 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_unspec), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), BPF_EXIT_INSN(), }, @@ -445,7 +445,7 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_unspec), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0), BPF_EXIT_INSN(), @@ -461,7 +461,7 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_unspec), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), BPF_EXIT_INSN(), @@ -548,7 +548,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -56), BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_unspec), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem), BPF_EXIT_INSN(), }, .fixup = {24}, @@ -602,6 +602,45 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, }, + { + "jump test 5", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, }; static int probe_filter_length(struct bpf_insn *fp) @@ -620,7 +659,7 @@ static int create_map(void) long long key, value = 0; int map_fd; - map_fd = bpf_create_map(BPF_MAP_TYPE_UNSPEC, sizeof(key), sizeof(value), 1024); + map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), 1024); if (map_fd < 0) { printf("failed to create map '%s'\n", strerror(errno)); } @@ -630,7 +669,7 @@ static int create_map(void) static int test(void) { - int prog_fd, i; + int prog_fd, i, pass_cnt = 0, err_cnt = 0; for (i = 0; i < ARRAY_SIZE(tests); i++) { struct bpf_insn *prog = tests[i].insns; @@ -657,21 +696,25 @@ static int test(void) printf("FAIL\nfailed to load prog '%s'\n", strerror(errno)); printf("%s", bpf_log_buf); + err_cnt++; goto fail; } } else { if (prog_fd >= 0) { printf("FAIL\nunexpected success to load\n"); printf("%s", bpf_log_buf); + err_cnt++; goto fail; } if (strstr(bpf_log_buf, tests[i].errstr) == 0) { printf("FAIL\nunexpected error message: %s", bpf_log_buf); + err_cnt++; goto fail; } } + pass_cnt++; printf("OK\n"); fail: if (map_fd >= 0) @@ -679,6 +722,7 @@ fail: close(prog_fd); } + printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, err_cnt); return 0; } |