From 1a4691b204e75a853ec74947bed0dd6966558d29 Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Fri, 24 Feb 2017 17:30:32 +0000
Subject: tools: hv: Add clean up function for Ubuntu config

This patch adds a function to clean up duplicate config info
on Ubuntu.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/hv/bondvf.sh | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'tools')

diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh
index 4aa5369ffa4e..d85968cb1bf2 100755
--- a/tools/hv/bondvf.sh
+++ b/tools/hv/bondvf.sh
@@ -101,9 +101,25 @@ function create_bond_cfg_redhat {
 	echo BONDING_OPTS=\"mode=active-backup miimon=100 primary=$2\" >>$fn
 }
 
+function del_eth_cfg_ubuntu {
+	local fn=$cfgdir/interfaces
+	local tmpfl=$(mktemp)
+
+	local nic_start='^[ \t]*(auto|iface|mapping|allow-.*)[ \t]+'$1
+	local nic_end='^[ \t]*(auto|iface|mapping|allow-.*|source)'
+
+	awk "/$nic_end/{x=0} x{next} /$nic_start/{x=1;next} 1"  $fn >$tmpfl
+
+	cp $tmpfl $fn
+
+	rm $tmpfl
+}
+
 function create_eth_cfg_ubuntu {
 	local fn=$cfgdir/interfaces
 
+	del_eth_cfg_ubuntu $1
+
 	echo $'\n'auto $1 >>$fn
 	echo iface $1 inet manual >>$fn
 	echo bond-master $2 >>$fn
@@ -119,6 +135,8 @@ function create_eth_cfg_pri_ubuntu {
 function create_bond_cfg_ubuntu {
 	local fn=$cfgdir/interfaces
 
+	del_eth_cfg_ubuntu $1
+
 	echo $'\n'auto $1 >>$fn
 	echo iface $1 inet dhcp >>$fn
 	echo bond-mode active-backup >>$fn
-- 
cgit v1.2.3


From fb30d4b71214aa1811e997f8f753b14b46d5b912 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 22 Mar 2017 10:00:35 -0700
Subject: bpf: Add tests for map-in-map

Test cases for array of maps and hash of maps.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/Makefile                        |   4 +
 samples/bpf/bpf_helpers.h                   |   1 +
 samples/bpf/bpf_load.c                      |  22 +++-
 samples/bpf/test_map_in_map_kern.c          | 173 ++++++++++++++++++++++++++++
 samples/bpf/test_map_in_map_user.c          | 116 +++++++++++++++++++
 tools/include/uapi/linux/bpf.h              |   3 +
 tools/lib/bpf/bpf.c                         |  17 +++
 tools/lib/bpf/bpf.h                         |   2 +
 tools/testing/selftests/bpf/test_verifier.c | 131 ++++++++++++++++++---
 9 files changed, 451 insertions(+), 18 deletions(-)
 create mode 100644 samples/bpf/test_map_in_map_kern.c
 create mode 100644 samples/bpf/test_map_in_map_user.c

(limited to 'tools')

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 09e9d535bd74..91c1d616d975 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -34,6 +34,7 @@ hostprogs-y += sampleip
 hostprogs-y += tc_l2_redirect
 hostprogs-y += lwt_len_hist
 hostprogs-y += xdp_tx_iptunnel
+hostprogs-y += test_map_in_map
 
 # Libbpf dependencies
 LIBBPF := ../../tools/lib/bpf/bpf.o
@@ -72,6 +73,7 @@ sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o
 tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o
 lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o
 xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o
+test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -105,6 +107,7 @@ always += trace_event_kern.o
 always += sampleip_kern.o
 always += lwt_len_hist_kern.o
 always += xdp_tx_iptunnel_kern.o
+always += test_map_in_map_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -139,6 +142,7 @@ HOSTLOADLIBES_sampleip += -lelf
 HOSTLOADLIBES_tc_l2_redirect += -l elf
 HOSTLOADLIBES_lwt_len_hist += -l elf
 HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
+HOSTLOADLIBES_test_map_in_map += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index faaffe2e139a..52de9d88c021 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -80,6 +80,7 @@ struct bpf_map_def {
 	unsigned int value_size;
 	unsigned int max_entries;
 	unsigned int map_flags;
+	unsigned int inner_map_idx;
 };
 
 static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index b86ee54da2d1..dcdce1270d38 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -43,6 +43,7 @@ struct bpf_map_def {
 	unsigned int value_size;
 	unsigned int max_entries;
 	unsigned int map_flags;
+	unsigned int inner_map_idx;
 };
 
 static int populate_prog_array(const char *event, int prog_fd)
@@ -198,11 +199,22 @@ static int load_maps(struct bpf_map_def *maps, int len)
 
 	for (i = 0; i < len / sizeof(struct bpf_map_def); i++) {
 
-		map_fd[i] = bpf_create_map(maps[i].type,
-					   maps[i].key_size,
-					   maps[i].value_size,
-					   maps[i].max_entries,
-					   maps[i].map_flags);
+		if (maps[i].type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+		    maps[i].type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+			int inner_map_fd = map_fd[maps[i].inner_map_idx];
+
+			map_fd[i] = bpf_create_map_in_map(maps[i].type,
+							  maps[i].key_size,
+							  inner_map_fd,
+							  maps[i].max_entries,
+							  maps[i].map_flags);
+		} else {
+			map_fd[i] = bpf_create_map(maps[i].type,
+						   maps[i].key_size,
+						   maps[i].value_size,
+						   maps[i].max_entries,
+						   maps[i].map_flags);
+		}
 		if (map_fd[i] < 0) {
 			printf("failed to create a map: %d %s\n",
 			       errno, strerror(errno));
diff --git a/samples/bpf/test_map_in_map_kern.c b/samples/bpf/test_map_in_map_kern.c
new file mode 100644
index 000000000000..42c44d091dd1
--- /dev/null
+++ b/samples/bpf/test_map_in_map_kern.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#define KBUILD_MODNAME "foo"
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/in6.h>
+#include "bpf_helpers.h"
+
+#define MAX_NR_PORTS 65536
+
+/* map #0 */
+struct bpf_map_def SEC("maps") port_a = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(int),
+	.max_entries = MAX_NR_PORTS,
+};
+
+/* map #1 */
+struct bpf_map_def SEC("maps") port_h = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(int),
+	.max_entries = 1,
+};
+
+/* map #2 */
+struct bpf_map_def SEC("maps") reg_result_h = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(int),
+	.max_entries = 1,
+};
+
+/* map #3 */
+struct bpf_map_def SEC("maps") inline_result_h = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(int),
+	.max_entries = 1,
+};
+
+/* map #4 */ /* Test case #0 */
+struct bpf_map_def SEC("maps") a_of_port_a = {
+	.type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
+	.key_size = sizeof(u32),
+	.inner_map_idx = 0, /* map_fd[0] is port_a */
+	.max_entries = MAX_NR_PORTS,
+};
+
+/* map #5 */ /* Test case #1 */
+struct bpf_map_def SEC("maps") h_of_port_a = {
+	.type = BPF_MAP_TYPE_HASH_OF_MAPS,
+	.key_size = sizeof(u32),
+	.inner_map_idx = 0, /* map_fd[0] is port_a */
+	.max_entries = 1,
+};
+
+/* map #6 */ /* Test case #2 */
+struct bpf_map_def SEC("maps") h_of_port_h = {
+	.type = BPF_MAP_TYPE_HASH_OF_MAPS,
+	.key_size = sizeof(u32),
+	.inner_map_idx = 1, /* map_fd[1] is port_h */
+	.max_entries = 1,
+};
+
+static __always_inline int do_reg_lookup(void *inner_map, u32 port)
+{
+	int *result;
+
+	result = bpf_map_lookup_elem(inner_map, &port);
+	return result ? *result : -ENOENT;
+}
+
+static __always_inline int do_inline_array_lookup(void *inner_map, u32 port)
+{
+	int *result;
+
+	if (inner_map != &port_a)
+		return -EINVAL;
+
+	result = bpf_map_lookup_elem(&port_a, &port);
+	return result ? *result : -ENOENT;
+}
+
+static __always_inline int do_inline_hash_lookup(void *inner_map, u32 port)
+{
+	int *result;
+
+	if (inner_map != &port_h)
+		return -EINVAL;
+
+	result = bpf_map_lookup_elem(&port_h, &port);
+	return result ? *result : -ENOENT;
+}
+
+SEC("kprobe/sys_connect")
+int trace_sys_connect(struct pt_regs *ctx)
+{
+	struct sockaddr_in6 *in6;
+	u16 test_case, port, dst6[8];
+	int addrlen, ret, inline_ret, ret_key = 0;
+	u32 port_key;
+	void *outer_map, *inner_map;
+	bool inline_hash = false;
+
+	in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx);
+	addrlen = (int)PT_REGS_PARM3(ctx);
+
+	if (addrlen != sizeof(*in6))
+		return 0;
+
+	ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr);
+	if (ret) {
+		inline_ret = ret;
+		goto done;
+	}
+
+	if (dst6[0] != 0xdead || dst6[1] != 0xbeef)
+		return 0;
+
+	test_case = dst6[7];
+
+	ret = bpf_probe_read(&port, sizeof(port), &in6->sin6_port);
+	if (ret) {
+		inline_ret = ret;
+		goto done;
+	}
+
+	port_key = port;
+
+	ret = -ENOENT;
+	if (test_case == 0) {
+		outer_map = &a_of_port_a;
+	} else if (test_case == 1) {
+		outer_map = &h_of_port_a;
+	} else if (test_case == 2) {
+		outer_map = &h_of_port_h;
+	} else {
+		ret = __LINE__;
+		inline_ret = ret;
+		goto done;
+	}
+
+	inner_map = bpf_map_lookup_elem(outer_map, &port_key);
+	if (!inner_map) {
+		ret = __LINE__;
+		inline_ret = ret;
+		goto done;
+	}
+
+	ret = do_reg_lookup(inner_map, port_key);
+
+	if (test_case == 0 || test_case == 1)
+		inline_ret = do_inline_array_lookup(inner_map, port_key);
+	else
+		inline_ret = do_inline_hash_lookup(inner_map, port_key);
+
+done:
+	bpf_map_update_elem(&reg_result_h, &ret_key, &ret, BPF_ANY);
+	bpf_map_update_elem(&inline_result_h, &ret_key, &inline_ret, BPF_ANY);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c
new file mode 100644
index 000000000000..f62fdc2bd428
--- /dev/null
+++ b/samples/bpf/test_map_in_map_user.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <stdint.h>
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define PORT_A		(map_fd[0])
+#define PORT_H		(map_fd[1])
+#define REG_RESULT_H	(map_fd[2])
+#define INLINE_RESULT_H	(map_fd[3])
+#define A_OF_PORT_A	(map_fd[4]) /* Test case #0 */
+#define H_OF_PORT_A	(map_fd[5]) /* Test case #1 */
+#define H_OF_PORT_H	(map_fd[6]) /* Test case #2 */
+
+static const char * const test_names[] = {
+	"Array of Array",
+	"Hash of Array",
+	"Hash of Hash",
+};
+
+#define NR_TESTS (sizeof(test_names) / sizeof(*test_names))
+
+static void populate_map(uint32_t port_key, int magic_result)
+{
+	int ret;
+
+	ret = bpf_map_update_elem(PORT_A, &port_key, &magic_result, BPF_ANY);
+	assert(!ret);
+
+	ret = bpf_map_update_elem(PORT_H, &port_key, &magic_result,
+				  BPF_NOEXIST);
+	assert(!ret);
+
+	ret = bpf_map_update_elem(A_OF_PORT_A, &port_key, &PORT_A, BPF_ANY);
+	assert(!ret);
+
+	ret = bpf_map_update_elem(H_OF_PORT_A, &port_key, &PORT_A, BPF_NOEXIST);
+	assert(!ret);
+
+	ret = bpf_map_update_elem(H_OF_PORT_H, &port_key, &PORT_H, BPF_NOEXIST);
+	assert(!ret);
+}
+
+static void test_map_in_map(void)
+{
+	struct sockaddr_in6 in6 = { .sin6_family = AF_INET6 };
+	uint32_t result_key = 0, port_key;
+	int result, inline_result;
+	int magic_result = 0xfaceb00c;
+	int ret;
+	int i;
+
+	port_key = rand() & 0x00FF;
+	populate_map(port_key, magic_result);
+
+	in6.sin6_addr.s6_addr16[0] = 0xdead;
+	in6.sin6_addr.s6_addr16[1] = 0xbeef;
+	in6.sin6_port = port_key;
+
+	for (i = 0; i < NR_TESTS; i++) {
+		printf("%s: ", test_names[i]);
+
+		in6.sin6_addr.s6_addr16[7] = i;
+		ret = connect(-1, (struct sockaddr *)&in6, sizeof(in6));
+		assert(ret == -1 && errno == EBADF);
+
+		ret = bpf_map_lookup_elem(REG_RESULT_H, &result_key, &result);
+		assert(!ret);
+
+		ret = bpf_map_lookup_elem(INLINE_RESULT_H, &result_key,
+					  &inline_result);
+		assert(!ret);
+
+		if (result != magic_result || inline_result != magic_result) {
+			printf("Error. result:%d inline_result:%d\n",
+			       result, inline_result);
+			exit(1);
+		}
+
+		bpf_map_delete_elem(REG_RESULT_H, &result_key);
+		bpf_map_delete_elem(INLINE_RESULT_H, &result_key);
+
+		printf("Pass\n");
+	}
+}
+
+int main(int argc, char **argv)
+{
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	char filename[256];
+
+	assert(!setrlimit(RLIMIT_MEMLOCK, &r));
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	test_map_in_map();
+
+	return 0;
+}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0539a0ceef38..ce6f029ac368 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -96,6 +96,8 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_LRU_HASH,
 	BPF_MAP_TYPE_LRU_PERCPU_HASH,
 	BPF_MAP_TYPE_LPM_TRIE,
+	BPF_MAP_TYPE_ARRAY_OF_MAPS,
+	BPF_MAP_TYPE_HASH_OF_MAPS,
 };
 
 enum bpf_prog_type {
@@ -152,6 +154,7 @@ union bpf_attr {
 		__u32	value_size;	/* size of value in bytes */
 		__u32	max_entries;	/* max number of entries in a map */
 		__u32	map_flags;	/* prealloc or not */
+		__u32	inner_map_fd;	/* fd pointing to the inner map */
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 207c2eeddab0..9b58d20e8c93 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -69,6 +69,23 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size,
 	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
 }
 
+int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size,
+			  int inner_map_fd, int max_entries, __u32 map_flags)
+{
+	union bpf_attr attr;
+
+	memset(&attr, '\0', sizeof(attr));
+
+	attr.map_type = map_type;
+	attr.key_size = key_size;
+	attr.value_size = 4;
+	attr.inner_map_fd = inner_map_fd;
+	attr.max_entries = max_entries;
+	attr.map_flags = map_flags;
+
+	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
 int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 		     size_t insns_cnt, const char *license,
 		     __u32 kern_version, char *log_buf, size_t log_buf_sz)
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 09c3dcac0496..93f021932623 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -26,6 +26,8 @@
 
 int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
 		   int max_entries, __u32 map_flags);
+int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size,
+			  int inner_map_fd, int max_entries, __u32 map_flags);
 
 /* Recommend log buffer size */
 #define BPF_LOG_BUF_SIZE 65536
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index d1555e4240c0..f4f43c98cf7f 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -38,6 +38,7 @@
 
 #define MAX_INSNS	512
 #define MAX_FIXUPS	8
+#define MAX_NR_MAPS	4
 
 struct bpf_test {
 	const char *descr;
@@ -45,6 +46,7 @@ struct bpf_test {
 	int fixup_map1[MAX_FIXUPS];
 	int fixup_map2[MAX_FIXUPS];
 	int fixup_prog[MAX_FIXUPS];
+	int fixup_map_in_map[MAX_FIXUPS];
 	const char *errstr;
 	const char *errstr_unpriv;
 	enum {
@@ -4452,7 +4454,76 @@ static struct bpf_test tests[] = {
 		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 		.result = REJECT,
 		.result_unpriv = REJECT,
-	}
+	},
+	{
+		"map in map access",
+		.insns = {
+			BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_in_map = { 3 },
+		.result = ACCEPT,
+	},
+	{
+		"invalid inner map pointer",
+		.insns = {
+			BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_in_map = { 3 },
+		.errstr = "R1 type=inv expected=map_ptr",
+		.errstr_unpriv = "R1 pointer arithmetic prohibited",
+		.result = REJECT,
+	},
+	{
+		"forgot null checking on the inner map pointer",
+		.insns = {
+			BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_in_map = { 3 },
+		.errstr = "R1 type=map_value_or_null expected=map_ptr",
+		.result = REJECT,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
@@ -4489,42 +4560,73 @@ static int create_prog_array(void)
 	return fd;
 }
 
+static int create_map_in_map(void)
+{
+	int inner_map_fd, outer_map_fd;
+
+	inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+				      sizeof(int), 1, 0);
+	if (inner_map_fd < 0) {
+		printf("Failed to create array '%s'!\n", strerror(errno));
+		return inner_map_fd;
+	}
+
+	outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS,
+					     sizeof(int), inner_map_fd, 1, 0);
+	if (outer_map_fd < 0)
+		printf("Failed to create array of maps '%s'!\n",
+		       strerror(errno));
+
+	close(inner_map_fd);
+
+	return outer_map_fd;
+}
+
 static char bpf_vlog[32768];
 
 static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
-			  int *fd_f1, int *fd_f2, int *fd_f3)
+			  int *map_fds)
 {
 	int *fixup_map1 = test->fixup_map1;
 	int *fixup_map2 = test->fixup_map2;
 	int *fixup_prog = test->fixup_prog;
+	int *fixup_map_in_map = test->fixup_map_in_map;
 
 	/* Allocating HTs with 1 elem is fine here, since we only test
 	 * for verifier and not do a runtime lookup, so the only thing
 	 * that really matters is value size in this case.
 	 */
 	if (*fixup_map1) {
-		*fd_f1 = create_map(sizeof(long long), 1);
+		map_fds[0] = create_map(sizeof(long long), 1);
 		do {
-			prog[*fixup_map1].imm = *fd_f1;
+			prog[*fixup_map1].imm = map_fds[0];
 			fixup_map1++;
 		} while (*fixup_map1);
 	}
 
 	if (*fixup_map2) {
-		*fd_f2 = create_map(sizeof(struct test_val), 1);
+		map_fds[1] = create_map(sizeof(struct test_val), 1);
 		do {
-			prog[*fixup_map2].imm = *fd_f2;
+			prog[*fixup_map2].imm = map_fds[1];
 			fixup_map2++;
 		} while (*fixup_map2);
 	}
 
 	if (*fixup_prog) {
-		*fd_f3 = create_prog_array();
+		map_fds[2] = create_prog_array();
 		do {
-			prog[*fixup_prog].imm = *fd_f3;
+			prog[*fixup_prog].imm = map_fds[2];
 			fixup_prog++;
 		} while (*fixup_prog);
 	}
+
+	if (*fixup_map_in_map) {
+		map_fds[3] = create_map_in_map();
+		do {
+			prog[*fixup_map_in_map].imm = map_fds[3];
+			fixup_map_in_map++;
+		} while (*fixup_map_in_map);
+	}
 }
 
 static void do_test_single(struct bpf_test *test, bool unpriv,
@@ -4533,11 +4635,15 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	struct bpf_insn *prog = test->insns;
 	int prog_len = probe_filter_length(prog);
 	int prog_type = test->prog_type;
-	int fd_f1 = -1, fd_f2 = -1, fd_f3 = -1;
+	int map_fds[MAX_NR_MAPS];
 	int fd_prog, expected_ret;
 	const char *expected_err;
+	int i;
+
+	for (i = 0; i < MAX_NR_MAPS; i++)
+		map_fds[i] = -1;
 
-	do_test_fixup(test, prog, &fd_f1, &fd_f2, &fd_f3);
+	do_test_fixup(test, prog, map_fds);
 
 	fd_prog = bpf_load_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
 				   prog, prog_len, "GPL", 0, bpf_vlog,
@@ -4568,9 +4674,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	printf("OK\n");
 close_fds:
 	close(fd_prog);
-	close(fd_f1);
-	close(fd_f2);
-	close(fd_f3);
+	for (i = 0; i < MAX_NR_MAPS; i++)
+		close(map_fds[i]);
 	sched_yield();
 	return;
 fail_log:
-- 
cgit v1.2.3


From 91b8270f2a4d1d9b268de90451cdca63a70052d6 Mon Sep 17 00:00:00 2001
From: Chenbo Feng <fengc@google.com>
Date: Wed, 22 Mar 2017 17:27:34 -0700
Subject: Add a helper function to get socket cookie in eBPF

Retrieve the socket cookie generated by sock_gen_cookie() from a sk_buff
with a known socket. Generates a new cookie if one was not yet set.If
the socket pointer inside sk_buff is NULL, 0 is returned. The helper
function coud be useful in monitoring per socket networking traffic
statistics and provide a unique socket identifier per namespace.

Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Chenbo Feng <fengc@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sock_diag.h      |  1 +
 include/uapi/linux/bpf.h       |  9 ++++++++-
 net/core/filter.c              | 17 +++++++++++++++++
 net/core/sock_diag.c           |  2 +-
 tools/include/uapi/linux/bpf.h |  3 ++-
 5 files changed, 29 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index a0596ca0e80a..a2f8109bb215 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -24,6 +24,7 @@ void sock_diag_unregister(const struct sock_diag_handler *h);
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 
+u64 sock_gen_cookie(struct sock *sk);
 int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie);
 void sock_diag_save_cookie(struct sock *sk, __u32 *cookie);
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ce6f029ac368..cdfc5595fbc1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -459,6 +459,12 @@ union bpf_attr {
  *     Return:
  *       > 0 length of the string including the trailing NUL on success
  *       < 0 error
+ *
+ * u64 bpf_bpf_get_socket_cookie(skb)
+ *     Get the cookie for the socket stored inside sk_buff.
+ *     @skb: pointer to skb
+ *     Return: 8 Bytes non-decreasing number on success or 0 if the socket
+ *     field is missing inside sk_buff
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -506,7 +512,8 @@ union bpf_attr {
 	FN(get_numa_node_id),		\
 	FN(skb_change_head),		\
 	FN(xdp_adjust_head),		\
-	FN(probe_read_str),
+	FN(probe_read_str),		\
+	FN(get_socket_cookie),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index c7f0ccd1c0d3..35b0f97c3fdf 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -26,6 +26,7 @@
 #include <linux/mm.h>
 #include <linux/fcntl.h>
 #include <linux/socket.h>
+#include <linux/sock_diag.h>
 #include <linux/in.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
@@ -2606,6 +2607,18 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
 	.arg5_type	= ARG_CONST_SIZE,
 };
 
+BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
+{
+	return skb->sk ? sock_gen_cookie(skb->sk) : 0;
+}
+
+static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
+	.func           = bpf_get_socket_cookie,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+};
+
 static const struct bpf_func_proto *
 bpf_base_func_proto(enum bpf_func_id func_id)
 {
@@ -2640,6 +2653,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 	switch (func_id) {
 	case BPF_FUNC_skb_load_bytes:
 		return &bpf_skb_load_bytes_proto;
+	case BPF_FUNC_get_socket_cookie:
+		return &bpf_get_socket_cookie_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -2699,6 +2714,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_get_smp_processor_id_proto;
 	case BPF_FUNC_skb_under_cgroup:
 		return &bpf_skb_under_cgroup_proto;
+	case BPF_FUNC_get_socket_cookie:
+		return &bpf_get_socket_cookie_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 8d11ee75a100..fb9d0e2fd148 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -19,7 +19,7 @@ static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
 static DEFINE_MUTEX(sock_diag_table_mutex);
 static struct workqueue_struct *broadcast_wq;
 
-static u64 sock_gen_cookie(struct sock *sk)
+u64 sock_gen_cookie(struct sock *sk)
 {
 	while (1) {
 		u64 res = atomic64_read(&sk->sk_cookie);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ce6f029ac368..a3851859e5f3 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -506,7 +506,8 @@ union bpf_attr {
 	FN(get_numa_node_id),		\
 	FN(skb_change_head),		\
 	FN(xdp_adjust_head),		\
-	FN(probe_read_str),
+	FN(probe_read_str),		\
+	FN(get_socket_cookie),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit v1.2.3


From 6acc5c2910689fc6ee181bf63085c5efff6a42bd Mon Sep 17 00:00:00 2001
From: Chenbo Feng <fengc@google.com>
Date: Wed, 22 Mar 2017 17:27:35 -0700
Subject: Add a eBPF helper function to retrieve socket uid

Returns the owner uid of the socket inside a sk_buff. This is useful to
perform per-UID accounting of network traffic or per-UID packet
filtering. The socket need to be a fullsock otherwise overflowuid is
returned.

Signed-off-by: Chenbo Feng <fengc@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h       |  9 ++++++++-
 net/core/filter.c              | 22 ++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  3 ++-
 3 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index cdfc5595fbc1..28317a04c34d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -465,6 +465,12 @@ union bpf_attr {
  *     @skb: pointer to skb
  *     Return: 8 Bytes non-decreasing number on success or 0 if the socket
  *     field is missing inside sk_buff
+ *
+ * u32 bpf_get_socket_uid(skb)
+ *     Get the owner uid of the socket stored inside sk_buff.
+ *     @skb: pointer to skb
+ *     Return: uid of the socket owner on success or 0 if the socket pointer
+ *     inside sk_buff is NULL
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -513,7 +519,8 @@ union bpf_attr {
 	FN(skb_change_head),		\
 	FN(xdp_adjust_head),		\
 	FN(probe_read_str),		\
-	FN(get_socket_cookie),
+	FN(get_socket_cookie),		\
+	FN(get_socket_uid),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 35b0f97c3fdf..dfb9f61a2fd5 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2619,6 +2619,24 @@ static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
+BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
+{
+	struct sock *sk = sk_to_full_sk(skb->sk);
+	kuid_t kuid;
+
+	if (!sk || !sk_fullsock(sk))
+		return overflowuid;
+	kuid = sock_net_uid(sock_net(sk), sk);
+	return from_kuid_munged(sock_net(sk)->user_ns, kuid);
+}
+
+static const struct bpf_func_proto bpf_get_socket_uid_proto = {
+	.func           = bpf_get_socket_uid,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+};
+
 static const struct bpf_func_proto *
 bpf_base_func_proto(enum bpf_func_id func_id)
 {
@@ -2655,6 +2673,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 		return &bpf_skb_load_bytes_proto;
 	case BPF_FUNC_get_socket_cookie:
 		return &bpf_get_socket_cookie_proto;
+	case BPF_FUNC_get_socket_uid:
+		return &bpf_get_socket_uid_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -2716,6 +2736,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_skb_under_cgroup_proto;
 	case BPF_FUNC_get_socket_cookie:
 		return &bpf_get_socket_cookie_proto;
+	case BPF_FUNC_get_socket_uid:
+		return &bpf_get_socket_uid_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index a3851859e5f3..1ea08ce35567 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -507,7 +507,8 @@ union bpf_attr {
 	FN(skb_change_head),		\
 	FN(xdp_adjust_head),		\
 	FN(probe_read_str),		\
-	FN(get_socket_cookie),
+	FN(get_socket_cookie),		\
+	FN(get_socket_uid),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit v1.2.3


From 3084887378f5271daedd52cc3372cb8011ad39b6 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Thu, 30 Mar 2017 21:45:39 -0700
Subject: tools/lib/bpf: add support for BPF_PROG_TEST_RUN command

add support for BPF_PROG_TEST_RUN command to libbpf.a

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Wang Nan <wangnan0@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/include/uapi/linux/bpf.h | 24 ++++++++++++++++++++++++
 tools/lib/bpf/bpf.c            | 24 ++++++++++++++++++++++++
 tools/lib/bpf/bpf.h            |  4 +++-
 3 files changed, 51 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1ea08ce35567..a1d95386f562 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -81,6 +81,7 @@ enum bpf_cmd {
 	BPF_OBJ_GET,
 	BPF_PROG_ATTACH,
 	BPF_PROG_DETACH,
+	BPF_PROG_TEST_RUN,
 };
 
 enum bpf_map_type {
@@ -189,6 +190,17 @@ union bpf_attr {
 		__u32		attach_type;
 		__u32		attach_flags;
 	};
+
+	struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
+		__u32		prog_fd;
+		__u32		retval;
+		__u32		data_size_in;
+		__u32		data_size_out;
+		__aligned_u64	data_in;
+		__aligned_u64	data_out;
+		__u32		repeat;
+		__u32		duration;
+	} test;
 } __attribute__((aligned(8)));
 
 /* BPF helper function descriptions:
@@ -459,6 +471,18 @@ union bpf_attr {
  *     Return:
  *       > 0 length of the string including the trailing NUL on success
  *       < 0 error
+ *
+ * u64 bpf_bpf_get_socket_cookie(skb)
+ *     Get the cookie for the socket stored inside sk_buff.
+ *     @skb: pointer to skb
+ *     Return: 8 Bytes non-decreasing number on success or 0 if the socket
+ *     field is missing inside sk_buff
+ *
+ * u32 bpf_get_socket_uid(skb)
+ *     Get the owner uid of the socket stored inside sk_buff.
+ *     @skb: pointer to skb
+ *     Return: uid of the socket owner on success or 0 if the socket pointer
+ *     inside sk_buff is NULL
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 9b58d20e8c93..f84c398c11f4 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -209,3 +209,27 @@ int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
 
 	return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
 }
+
+int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
+		      void *data_out, __u32 *size_out, __u32 *retval,
+		      __u32 *duration)
+{
+	union bpf_attr attr;
+	int ret;
+
+	bzero(&attr, sizeof(attr));
+	attr.test.prog_fd = prog_fd;
+	attr.test.data_in = ptr_to_u64(data);
+	attr.test.data_out = ptr_to_u64(data_out);
+	attr.test.data_size_in = size;
+	attr.test.repeat = repeat;
+
+	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+	if (size_out)
+		*size_out = attr.test.data_size_out;
+	if (retval)
+		*retval = attr.test.retval;
+	if (duration)
+		*duration = attr.test.duration;
+	return ret;
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 93f021932623..edb4daeff7a5 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -47,6 +47,8 @@ int bpf_obj_get(const char *pathname);
 int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type,
 		    unsigned int flags);
 int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
-
+int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
+		      void *data_out, __u32 *size_out, __u32 *retval,
+		      __u32 *duration);
 
 #endif
-- 
cgit v1.2.3


From dd26b7f54a08fd1a9fd804b51b9fce8e16628349 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Thu, 30 Mar 2017 21:45:40 -0700
Subject: tools/lib/bpf: expose bpf_program__set_type()

expose bpf_program__set_type() to set program type

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/lib/bpf/libbpf.c | 3 +--
 tools/lib/bpf/libbpf.h | 2 ++
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ac6eb863b2a4..1a2c07eb7795 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1618,8 +1618,7 @@ int bpf_program__nth_fd(struct bpf_program *prog, int n)
 	return fd;
 }
 
-static void bpf_program__set_type(struct bpf_program *prog,
-				  enum bpf_prog_type type)
+void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
 {
 	prog->type = type;
 }
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index b30394f9947a..32c7252f734e 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -25,6 +25,7 @@
 #include <stdint.h>
 #include <stdbool.h>
 #include <sys/types.h>  // for size_t
+#include <linux/bpf.h>
 
 enum libbpf_errno {
 	__LIBBPF_ERRNO__START = 4000,
@@ -185,6 +186,7 @@ int bpf_program__set_sched_cls(struct bpf_program *prog);
 int bpf_program__set_sched_act(struct bpf_program *prog);
 int bpf_program__set_xdp(struct bpf_program *prog);
 int bpf_program__set_perf_event(struct bpf_program *prog);
+void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type);
 
 bool bpf_program__is_socket_filter(struct bpf_program *prog);
 bool bpf_program__is_tracepoint(struct bpf_program *prog);
-- 
cgit v1.2.3


From 6882804c916beaa945bae90dfd3295e635f6b78f Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Thu, 30 Mar 2017 21:45:41 -0700
Subject: selftests/bpf: add a test for overlapping packet range checks

add simple C test case for llvm and verifier range check fix from
commit b1977682a385 ("bpf: improve verifier packet range checks")

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/Makefile          |  17 +++-
 tools/testing/selftests/bpf/test_pkt_access.c |  64 ++++++++++++
 tools/testing/selftests/bpf/test_progs.c      | 138 ++++++++++++++++++++++++++
 3 files changed, 215 insertions(+), 4 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/test_pkt_access.c
 create mode 100644 tools/testing/selftests/bpf/test_progs.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 6a1ad58cb66f..ff68c9419a67 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,16 +1,18 @@
 LIBDIR := ../../../lib
 BPFDIR := $(LIBDIR)/bpf
 
-CFLAGS += -Wall -O2 -I../../../include/uapi -I$(LIBDIR)
-LDLIBS += -lcap
+CFLAGS += -Wall -O2 -I../../../include/uapi -I$(LIBDIR) -I../../../include
+LDLIBS += -lcap -lelf
 
-TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map
+TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs
+
+TEST_GEN_FILES = test_pkt_access.o
 
 TEST_PROGS := test_kmod.sh
 
 include ../lib.mk
 
-BPFOBJ := $(OUTPUT)/bpf.o
+BPFOBJ := $(OUTPUT)/libbpf.a
 
 $(TEST_GEN_PROGS): $(BPFOBJ)
 
@@ -21,3 +23,10 @@ force:
 
 $(BPFOBJ): force
 	$(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/
+
+CLANG ?= clang
+
+%.o: %.c
+	$(CLANG) -I../../../include/uapi -I../../../../samples/bpf/ \
+		-D__x86_64__ -Wno-compare-distinct-pointer-types \
+		-O2 -target bpf -c $< -o $@
diff --git a/tools/testing/selftests/bpf/test_pkt_access.c b/tools/testing/selftests/bpf/test_pkt_access.c
new file mode 100644
index 000000000000..fd1e0832d409
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_pkt_access.c
@@ -0,0 +1,64 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+
+#define _htons __builtin_bswap16
+#define barrier() __asm__ __volatile__("": : :"memory")
+int _version SEC("version") = 1;
+
+SEC("test1")
+int process(struct __sk_buff *skb)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	void *data = (void *)(long)skb->data;
+	struct ethhdr *eth = (struct ethhdr *)(data);
+	struct tcphdr *tcp = NULL;
+	__u8 proto = 255;
+	__u64 ihl_len;
+
+	if (eth + 1 > data_end)
+		return TC_ACT_SHOT;
+
+	if (eth->h_proto == _htons(ETH_P_IP)) {
+		struct iphdr *iph = (struct iphdr *)(eth + 1);
+
+		if (iph + 1 > data_end)
+			return TC_ACT_SHOT;
+		ihl_len = iph->ihl * 4;
+		proto = iph->protocol;
+		tcp = (struct tcphdr *)((void *)(iph) + ihl_len);
+	} else if (eth->h_proto == _htons(ETH_P_IPV6)) {
+		struct ipv6hdr *ip6h = (struct ipv6hdr *)(eth + 1);
+
+		if (ip6h + 1 > data_end)
+			return TC_ACT_SHOT;
+		ihl_len = sizeof(*ip6h);
+		proto = ip6h->nexthdr;
+		tcp = (struct tcphdr *)((void *)(ip6h) + ihl_len);
+	}
+
+	if (tcp) {
+		if (((void *)(tcp) + 20) > data_end || proto != 6)
+			return TC_ACT_SHOT;
+		barrier(); /* to force ordering of checks */
+		if (((void *)(tcp) + 18) > data_end)
+			return TC_ACT_SHOT;
+		if (tcp->urg_ptr == 123)
+			return TC_ACT_OK;
+	}
+
+	return TC_ACT_UNSPEC;
+}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
new file mode 100644
index 000000000000..1d9a310e71e5
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -0,0 +1,138 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include <linux/types.h>
+typedef __u16 __sum16;
+#include <arpa/inet.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+
+#include <sys/wait.h>
+#include <sys/resource.h>
+
+#include <linux/bpf.h>
+#include <linux/err.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#define _htons __builtin_bswap16
+
+static int error_cnt, pass_cnt;
+
+/* ipv4 test vector */
+static struct {
+	struct ethhdr eth;
+	struct iphdr iph;
+	struct tcphdr tcp;
+} __packed pkt_v4 = {
+	.eth.h_proto = _htons(ETH_P_IP),
+	.iph.ihl = 5,
+	.iph.protocol = 6,
+	.tcp.urg_ptr = 123,
+};
+
+/* ipv6 test vector */
+static struct {
+	struct ethhdr eth;
+	struct ipv6hdr iph;
+	struct tcphdr tcp;
+} __packed pkt_v6 = {
+	.eth.h_proto = _htons(ETH_P_IPV6),
+	.iph.nexthdr = 6,
+	.tcp.urg_ptr = 123,
+};
+
+#define CHECK(condition, tag, format...) ({				\
+	int __ret = !!(condition);					\
+	if (__ret) {							\
+		error_cnt++;						\
+		printf("%s:FAIL:%s ", __func__, tag);			\
+		printf(format);						\
+	} else {							\
+		pass_cnt++;						\
+		printf("%s:PASS:%s %d nsec\n", __func__, tag, duration);\
+	}								\
+})
+
+static int bpf_prog_load(const char *file, enum bpf_prog_type type,
+			 struct bpf_object **pobj, int *prog_fd)
+{
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	int err;
+
+	obj = bpf_object__open(file);
+	if (IS_ERR(obj)) {
+		error_cnt++;
+		return -ENOENT;
+	}
+
+	prog = bpf_program__next(NULL, obj);
+	if (!prog) {
+		bpf_object__close(obj);
+		error_cnt++;
+		return -ENOENT;
+	}
+
+	bpf_program__set_type(prog, type);
+	err = bpf_object__load(obj);
+	if (err) {
+		bpf_object__close(obj);
+		error_cnt++;
+		return -EINVAL;
+	}
+
+	*pobj = obj;
+	*prog_fd = bpf_program__fd(prog);
+	return 0;
+}
+
+static void test_pkt_access(void)
+{
+	const char *file = "./test_pkt_access.o";
+	struct bpf_object *obj;
+	__u32 duration, retval;
+	int err, prog_fd;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+	if (err)
+		return;
+
+	err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, &retval, &duration);
+	CHECK(err || errno || retval, "ipv4",
+	      "err %d errno %d retval %d duration %d\n",
+	      err, errno, retval, duration);
+
+	err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6),
+				NULL, NULL, &retval, &duration);
+	CHECK(err || errno || retval, "ipv6",
+	      "err %d errno %d retval %d duration %d\n",
+	      err, errno, retval, duration);
+	bpf_object__close(obj);
+}
+
+int main(void)
+{
+	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
+
+	setrlimit(RLIMIT_MEMLOCK, &rinf);
+
+	test_pkt_access();
+
+	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
+	return 0;
+}
-- 
cgit v1.2.3


From 8d48f5e427923fa6f6482c716082d310b7f3bcd5 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Thu, 30 Mar 2017 21:45:42 -0700
Subject: selftests/bpf: add a test for basic XDP functionality

add C test for xdp_adjust_head(), packet rewrite and map lookups

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/Makefile               |   2 +-
 tools/testing/selftests/bpf/test_iptunnel_common.h |  37 ++++
 tools/testing/selftests/bpf/test_progs.c           |  58 +++++
 tools/testing/selftests/bpf/test_xdp.c             | 236 +++++++++++++++++++++
 4 files changed, 332 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_iptunnel_common.h
 create mode 100644 tools/testing/selftests/bpf/test_xdp.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index ff68c9419a67..76cbe1d42dda 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -6,7 +6,7 @@ LDLIBS += -lcap -lelf
 
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs
 
-TEST_GEN_FILES = test_pkt_access.o
+TEST_GEN_FILES = test_pkt_access.o test_xdp.o
 
 TEST_PROGS := test_kmod.sh
 
diff --git a/tools/testing/selftests/bpf/test_iptunnel_common.h b/tools/testing/selftests/bpf/test_iptunnel_common.h
new file mode 100644
index 000000000000..e4cd252a1b20
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_iptunnel_common.h
@@ -0,0 +1,37 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _TEST_IPTNL_COMMON_H
+#define _TEST_IPTNL_COMMON_H
+
+#include <linux/types.h>
+
+#define MAX_IPTNL_ENTRIES 256U
+
+struct vip {
+	union {
+		__u32 v6[4];
+		__u32 v4;
+	} daddr;
+	__u16 dport;
+	__u16 family;
+	__u8 protocol;
+};
+
+struct iptnl_info {
+	union {
+		__u32 v6[4];
+		__u32 v4;
+	} saddr;
+	union {
+		__u32 v6[4];
+		__u32 v4;
+	} daddr;
+	__u16 family;
+	__u8 dmac[6];
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 1d9a310e71e5..defcb273242e 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -27,6 +27,7 @@ typedef __u16 __sum16;
 #include <linux/err.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
+#include "test_iptunnel_common.h"
 
 #define _htons __builtin_bswap16
 
@@ -100,6 +101,20 @@ static int bpf_prog_load(const char *file, enum bpf_prog_type type,
 	return 0;
 }
 
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+			const char *name)
+{
+	struct bpf_map *map;
+
+	map = bpf_object__find_map_by_name(obj, name);
+	if (!map) {
+		printf("%s:FAIL:map '%s' not found\n", test, name);
+		error_cnt++;
+		return -1;
+	}
+	return bpf_map__fd(map);
+}
+
 static void test_pkt_access(void)
 {
 	const char *file = "./test_pkt_access.o";
@@ -125,6 +140,48 @@ static void test_pkt_access(void)
 	bpf_object__close(obj);
 }
 
+static void test_xdp(void)
+{
+	struct vip key4 = {.protocol = 6, .family = AF_INET};
+	struct vip key6 = {.protocol = 6, .family = AF_INET6};
+	struct iptnl_info value4 = {.family = AF_INET};
+	struct iptnl_info value6 = {.family = AF_INET6};
+	const char *file = "./test_xdp.o";
+	struct bpf_object *obj;
+	char buf[128];
+	struct ipv6hdr *iph6 = (void *)buf + sizeof(struct ethhdr);
+	struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+	__u32 duration, retval, size;
+	int err, prog_fd, map_fd;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+	if (err)
+		return;
+
+	map_fd = bpf_find_map(__func__, obj, "vip2tnl");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_update_elem(map_fd, &key4, &value4, 0);
+	bpf_map_update_elem(map_fd, &key6, &value6, 0);
+
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				buf, &size, &retval, &duration);
+
+	CHECK(err || errno || retval != XDP_TX || size != 74 ||
+	      iph->protocol != IPPROTO_IPIP, "ipv4",
+	      "err %d errno %d retval %d size %d\n",
+	      err, errno, retval, size);
+
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
+				buf, &size, &retval, &duration);
+	CHECK(err || errno || retval != XDP_TX || size != 114 ||
+	      iph6->nexthdr != IPPROTO_IPV6, "ipv6",
+	      "err %d errno %d retval %d size %d\n",
+	      err, errno, retval, size);
+out:
+	bpf_object__close(obj);
+}
+
 int main(void)
 {
 	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
@@ -132,6 +189,7 @@ int main(void)
 	setrlimit(RLIMIT_MEMLOCK, &rinf);
 
 	test_pkt_access();
+	test_xdp();
 
 	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
 	return 0;
diff --git a/tools/testing/selftests/bpf/test_xdp.c b/tools/testing/selftests/bpf/test_xdp.c
new file mode 100644
index 000000000000..9a33b038cb31
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp.c
@@ -0,0 +1,236 @@
+/* Copyright (c) 2016,2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "test_iptunnel_common.h"
+
+#define htons __builtin_bswap16
+#define ntohs __builtin_bswap16
+int _version SEC("version") = 1;
+
+struct bpf_map_def SEC("maps") rxcnt = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64),
+	.max_entries = 256,
+};
+
+struct bpf_map_def SEC("maps") vip2tnl = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(struct vip),
+	.value_size = sizeof(struct iptnl_info),
+	.max_entries = MAX_IPTNL_ENTRIES,
+};
+
+static __always_inline void count_tx(__u32 protocol)
+{
+	__u64 *rxcnt_count;
+
+	rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
+	if (rxcnt_count)
+		*rxcnt_count += 1;
+}
+
+static __always_inline int get_dport(void *trans_data, void *data_end,
+				     __u8 protocol)
+{
+	struct tcphdr *th;
+	struct udphdr *uh;
+
+	switch (protocol) {
+	case IPPROTO_TCP:
+		th = (struct tcphdr *)trans_data;
+		if (th + 1 > data_end)
+			return -1;
+		return th->dest;
+	case IPPROTO_UDP:
+		uh = (struct udphdr *)trans_data;
+		if (uh + 1 > data_end)
+			return -1;
+		return uh->dest;
+	default:
+		return 0;
+	}
+}
+
+static __always_inline void set_ethhdr(struct ethhdr *new_eth,
+				       const struct ethhdr *old_eth,
+				       const struct iptnl_info *tnl,
+				       __be16 h_proto)
+{
+	memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
+	memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
+	new_eth->h_proto = h_proto;
+}
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct iptnl_info *tnl;
+	struct ethhdr *new_eth;
+	struct ethhdr *old_eth;
+	struct iphdr *iph = data + sizeof(struct ethhdr);
+	__u16 *next_iph;
+	__u16 payload_len;
+	struct vip vip = {};
+	int dport;
+	__u32 csum = 0;
+	int i;
+
+	if (iph + 1 > data_end)
+		return XDP_DROP;
+
+	dport = get_dport(iph + 1, data_end, iph->protocol);
+	if (dport == -1)
+		return XDP_DROP;
+
+	vip.protocol = iph->protocol;
+	vip.family = AF_INET;
+	vip.daddr.v4 = iph->daddr;
+	vip.dport = dport;
+	payload_len = ntohs(iph->tot_len);
+
+	tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+	/* It only does v4-in-v4 */
+	if (!tnl || tnl->family != AF_INET)
+		return XDP_PASS;
+
+	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+		return XDP_DROP;
+
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+
+	new_eth = data;
+	iph = data + sizeof(*new_eth);
+	old_eth = data + sizeof(*iph);
+
+	if (new_eth + 1 > data_end ||
+	    old_eth + 1 > data_end ||
+	    iph + 1 > data_end)
+		return XDP_DROP;
+
+	set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IP));
+
+	iph->version = 4;
+	iph->ihl = sizeof(*iph) >> 2;
+	iph->frag_off =	0;
+	iph->protocol = IPPROTO_IPIP;
+	iph->check = 0;
+	iph->tos = 0;
+	iph->tot_len = htons(payload_len + sizeof(*iph));
+	iph->daddr = tnl->daddr.v4;
+	iph->saddr = tnl->saddr.v4;
+	iph->ttl = 8;
+
+	next_iph = (__u16 *)iph;
+#pragma clang loop unroll(full)
+	for (i = 0; i < sizeof(*iph) >> 1; i++)
+		csum += *next_iph++;
+
+	iph->check = ~((csum & 0xffff) + (csum >> 16));
+
+	count_tx(vip.protocol);
+
+	return XDP_TX;
+}
+
+static __always_inline int handle_ipv6(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct iptnl_info *tnl;
+	struct ethhdr *new_eth;
+	struct ethhdr *old_eth;
+	struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
+	__u16 payload_len;
+	struct vip vip = {};
+	int dport;
+
+	if (ip6h + 1 > data_end)
+		return XDP_DROP;
+
+	dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
+	if (dport == -1)
+		return XDP_DROP;
+
+	vip.protocol = ip6h->nexthdr;
+	vip.family = AF_INET6;
+	memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
+	vip.dport = dport;
+	payload_len = ip6h->payload_len;
+
+	tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+	/* It only does v6-in-v6 */
+	if (!tnl || tnl->family != AF_INET6)
+		return XDP_PASS;
+
+	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+		return XDP_DROP;
+
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+
+	new_eth = data;
+	ip6h = data + sizeof(*new_eth);
+	old_eth = data + sizeof(*ip6h);
+
+	if (new_eth + 1 > data_end || old_eth + 1 > data_end ||
+	    ip6h + 1 > data_end)
+		return XDP_DROP;
+
+	set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IPV6));
+
+	ip6h->version = 6;
+	ip6h->priority = 0;
+	memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+	ip6h->payload_len = htons(ntohs(payload_len) + sizeof(*ip6h));
+	ip6h->nexthdr = IPPROTO_IPV6;
+	ip6h->hop_limit = 8;
+	memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
+	memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
+
+	count_tx(vip.protocol);
+
+	return XDP_TX;
+}
+
+SEC("xdp_tx_iptunnel")
+int _xdp_tx_iptunnel(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct ethhdr *eth = data;
+	__u16 h_proto;
+
+	if (eth + 1 > data_end)
+		return XDP_DROP;
+
+	h_proto = eth->h_proto;
+
+	if (h_proto == htons(ETH_P_IP))
+		return handle_ipv4(xdp);
+	else if (h_proto == htons(ETH_P_IPV6))
+
+		return handle_ipv6(xdp);
+	else
+		return XDP_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 37821613626e99828491302c3a59a094ec427395 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Thu, 30 Mar 2017 21:45:43 -0700
Subject: selftests/bpf: add l4 load balancer test based on sched_cls

this l4lb demo is a comprehensive test case for LLVM codegen and
kernel verifier. It's using fully inlined jhash(), complex packet
parsing and multiple map lookups of different types to stress
llvm and verifier.
The map sizes, map population and test vectors are artificial to
exercise different paths through the bpf program.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/Makefile     |   2 +-
 tools/testing/selftests/bpf/test_l4lb.c  | 474 +++++++++++++++++++++++++++++++
 tools/testing/selftests/bpf/test_progs.c |  88 ++++++
 3 files changed, 563 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_l4lb.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 76cbe1d42dda..32fb7a294f0f 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -6,7 +6,7 @@ LDLIBS += -lcap -lelf
 
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs
 
-TEST_GEN_FILES = test_pkt_access.o test_xdp.o
+TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o
 
 TEST_PROGS := test_kmod.sh
 
diff --git a/tools/testing/selftests/bpf/test_l4lb.c b/tools/testing/selftests/bpf/test_l4lb.c
new file mode 100644
index 000000000000..368bfe8b9842
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_l4lb.c
@@ -0,0 +1,474 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+#include "test_iptunnel_common.h"
+
+#define htons __builtin_bswap16
+#define ntohs __builtin_bswap16
+int _version SEC("version") = 1;
+
+static inline __u32 rol32(__u32 word, unsigned int shift)
+{
+	return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c)			\
+{						\
+	a -= c;  a ^= rol32(c, 4);  c += b;	\
+	b -= a;  b ^= rol32(a, 6);  a += c;	\
+	c -= b;  c ^= rol32(b, 8);  b += a;	\
+	a -= c;  a ^= rol32(c, 16); c += b;	\
+	b -= a;  b ^= rol32(a, 19); a += c;	\
+	c -= b;  c ^= rol32(b, 4);  b += a;	\
+}
+
+#define __jhash_final(a, b, c)			\
+{						\
+	c ^= b; c -= rol32(b, 14);		\
+	a ^= c; a -= rol32(c, 11);		\
+	b ^= a; b -= rol32(a, 25);		\
+	c ^= b; c -= rol32(b, 16);		\
+	a ^= c; a -= rol32(c, 4);		\
+	b ^= a; b -= rol32(a, 14);		\
+	c ^= b; c -= rol32(b, 24);		\
+}
+
+#define JHASH_INITVAL		0xdeadbeef
+
+typedef unsigned int u32;
+
+static inline u32 jhash(const void *key, u32 length, u32 initval)
+{
+	u32 a, b, c;
+	const unsigned char *k = key;
+
+	a = b = c = JHASH_INITVAL + length + initval;
+
+	while (length > 12) {
+		a += *(u32 *)(k);
+		b += *(u32 *)(k + 4);
+		c += *(u32 *)(k + 8);
+		__jhash_mix(a, b, c);
+		length -= 12;
+		k += 12;
+	}
+	switch (length) {
+	case 12: c += (u32)k[11]<<24;
+	case 11: c += (u32)k[10]<<16;
+	case 10: c += (u32)k[9]<<8;
+	case 9:  c += k[8];
+	case 8:  b += (u32)k[7]<<24;
+	case 7:  b += (u32)k[6]<<16;
+	case 6:  b += (u32)k[5]<<8;
+	case 5:  b += k[4];
+	case 4:  a += (u32)k[3]<<24;
+	case 3:  a += (u32)k[2]<<16;
+	case 2:  a += (u32)k[1]<<8;
+	case 1:  a += k[0];
+		 __jhash_final(a, b, c);
+	case 0: /* Nothing left to add */
+		break;
+	}
+
+	return c;
+}
+
+static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+	a += initval;
+	b += initval;
+	c += initval;
+	__jhash_final(a, b, c);
+	return c;
+}
+
+static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+	return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+#define PCKT_FRAGMENTED 65343
+#define IPV4_HDR_LEN_NO_OPT 20
+#define IPV4_PLUS_ICMP_HDR 28
+#define IPV6_PLUS_ICMP_HDR 48
+#define RING_SIZE 2
+#define MAX_VIPS 12
+#define MAX_REALS 5
+#define CTL_MAP_SIZE 16
+#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
+#define F_IPV6 (1 << 0)
+#define F_HASH_NO_SRC_PORT (1 << 0)
+#define F_ICMP (1 << 0)
+#define F_SYN_SET (1 << 1)
+
+struct packet_description {
+	union {
+		__be32 src;
+		__be32 srcv6[4];
+	};
+	union {
+		__be32 dst;
+		__be32 dstv6[4];
+	};
+	union {
+		__u32 ports;
+		__u16 port16[2];
+	};
+	__u8 proto;
+	__u8 flags;
+};
+
+struct ctl_value {
+	union {
+		__u64 value;
+		__u32 ifindex;
+		__u8 mac[6];
+	};
+};
+
+struct vip_meta {
+	__u32 flags;
+	__u32 vip_num;
+};
+
+struct real_definition {
+	union {
+		__be32 dst;
+		__be32 dstv6[4];
+	};
+	__u8 flags;
+};
+
+struct vip_stats {
+	__u64 bytes;
+	__u64 pkts;
+};
+
+struct eth_hdr {
+	unsigned char eth_dest[ETH_ALEN];
+	unsigned char eth_source[ETH_ALEN];
+	unsigned short eth_proto;
+};
+
+struct bpf_map_def SEC("maps") vip_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(struct vip),
+	.value_size = sizeof(struct vip_meta),
+	.max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ch_rings = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = CH_RINGS_SIZE,
+};
+
+struct bpf_map_def SEC("maps") reals = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct real_definition),
+	.max_entries = MAX_REALS,
+};
+
+struct bpf_map_def SEC("maps") stats = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct vip_stats),
+	.max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ctl_array = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct ctl_value),
+	.max_entries = CTL_MAP_SIZE,
+};
+
+static __always_inline __u32 get_packet_hash(struct packet_description *pckt,
+					     bool ipv6)
+{
+	if (ipv6)
+		return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
+				    pckt->ports, CH_RINGS_SIZE);
+	else
+		return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
+}
+
+static __always_inline bool get_packet_dst(struct real_definition **real,
+					   struct packet_description *pckt,
+					   struct vip_meta *vip_info,
+					   bool is_ipv6)
+{
+	__u32 hash = get_packet_hash(pckt, is_ipv6) % RING_SIZE;
+	__u32 key = RING_SIZE * vip_info->vip_num + hash;
+	__u32 *real_pos;
+
+	real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+	if (!real_pos)
+		return false;
+	key = *real_pos;
+	*real = bpf_map_lookup_elem(&reals, &key);
+	if (!(*real))
+		return false;
+	return true;
+}
+
+static __always_inline int parse_icmpv6(void *data, void *data_end, __u64 off,
+					struct packet_description *pckt)
+{
+	struct icmp6hdr *icmp_hdr;
+	struct ipv6hdr *ip6h;
+
+	icmp_hdr = data + off;
+	if (icmp_hdr + 1 > data_end)
+		return TC_ACT_SHOT;
+	if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
+		return TC_ACT_OK;
+	off += sizeof(struct icmp6hdr);
+	ip6h = data + off;
+	if (ip6h + 1 > data_end)
+		return TC_ACT_SHOT;
+	pckt->proto = ip6h->nexthdr;
+	pckt->flags |= F_ICMP;
+	memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
+	memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
+	return TC_ACT_UNSPEC;
+}
+
+static __always_inline int parse_icmp(void *data, void *data_end, __u64 off,
+				      struct packet_description *pckt)
+{
+	struct icmphdr *icmp_hdr;
+	struct iphdr *iph;
+
+	icmp_hdr = data + off;
+	if (icmp_hdr + 1 > data_end)
+		return TC_ACT_SHOT;
+	if (icmp_hdr->type != ICMP_DEST_UNREACH ||
+	    icmp_hdr->code != ICMP_FRAG_NEEDED)
+		return TC_ACT_OK;
+	off += sizeof(struct icmphdr);
+	iph = data + off;
+	if (iph + 1 > data_end)
+		return TC_ACT_SHOT;
+	if (iph->ihl != 5)
+		return TC_ACT_SHOT;
+	pckt->proto = iph->protocol;
+	pckt->flags |= F_ICMP;
+	pckt->src = iph->daddr;
+	pckt->dst = iph->saddr;
+	return TC_ACT_UNSPEC;
+}
+
+static __always_inline bool parse_udp(void *data, __u64 off, void *data_end,
+				      struct packet_description *pckt)
+{
+	struct udphdr *udp;
+	udp = data + off;
+
+	if (udp + 1 > data_end)
+		return false;
+
+	if (!(pckt->flags & F_ICMP)) {
+		pckt->port16[0] = udp->source;
+		pckt->port16[1] = udp->dest;
+	} else {
+		pckt->port16[0] = udp->dest;
+		pckt->port16[1] = udp->source;
+	}
+	return true;
+}
+
+static __always_inline bool parse_tcp(void *data, __u64 off, void *data_end,
+				      struct packet_description *pckt)
+{
+	struct tcphdr *tcp;
+
+	tcp = data + off;
+	if (tcp + 1 > data_end)
+		return false;
+
+	if (tcp->syn)
+		pckt->flags |= F_SYN_SET;
+
+	if (!(pckt->flags & F_ICMP)) {
+		pckt->port16[0] = tcp->source;
+		pckt->port16[1] = tcp->dest;
+	} else {
+		pckt->port16[0] = tcp->dest;
+		pckt->port16[1] = tcp->source;
+	}
+	return true;
+}
+
+static __always_inline int process_packet(void *data, __u64 off, void *data_end,
+					  bool is_ipv6, struct __sk_buff *skb)
+{
+	void *pkt_start = (void *)(long)skb->data;
+	struct packet_description pckt = {};
+	struct eth_hdr *eth = pkt_start;
+	struct bpf_tunnel_key tkey = {};
+	struct vip_stats *data_stats;
+	struct real_definition *dst;
+	struct vip_meta *vip_info;
+	struct ctl_value *cval;
+	__u32 v4_intf_pos = 1;
+	__u32 v6_intf_pos = 2;
+	struct ipv6hdr *ip6h;
+	struct vip vip = {};
+	struct iphdr *iph;
+	int tun_flag = 0;
+	__u16 pkt_bytes;
+	__u64 iph_len;
+	__u32 ifindex;
+	__u8 protocol;
+	__u32 vip_num;
+	int action;
+
+	tkey.tunnel_ttl = 64;
+	if (is_ipv6) {
+		ip6h = data + off;
+		if (ip6h + 1 > data_end)
+			return TC_ACT_SHOT;
+
+		iph_len = sizeof(struct ipv6hdr);
+		protocol = ip6h->nexthdr;
+		pckt.proto = protocol;
+		pkt_bytes = ntohs(ip6h->payload_len);
+		off += iph_len;
+		if (protocol == IPPROTO_FRAGMENT) {
+			return TC_ACT_SHOT;
+		} else if (protocol == IPPROTO_ICMPV6) {
+			action = parse_icmpv6(data, data_end, off, &pckt);
+			if (action >= 0)
+				return action;
+			off += IPV6_PLUS_ICMP_HDR;
+		} else {
+			memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
+			memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
+		}
+	} else {
+		iph = data + off;
+		if (iph + 1 > data_end)
+			return TC_ACT_SHOT;
+		if (iph->ihl != 5)
+			return TC_ACT_SHOT;
+
+		protocol = iph->protocol;
+		pckt.proto = protocol;
+		pkt_bytes = ntohs(iph->tot_len);
+		off += IPV4_HDR_LEN_NO_OPT;
+
+		if (iph->frag_off & PCKT_FRAGMENTED)
+			return TC_ACT_SHOT;
+		if (protocol == IPPROTO_ICMP) {
+			action = parse_icmp(data, data_end, off, &pckt);
+			if (action >= 0)
+				return action;
+			off += IPV4_PLUS_ICMP_HDR;
+		} else {
+			pckt.src = iph->saddr;
+			pckt.dst = iph->daddr;
+		}
+	}
+	protocol = pckt.proto;
+
+	if (protocol == IPPROTO_TCP) {
+		if (!parse_tcp(data, off, data_end, &pckt))
+			return TC_ACT_SHOT;
+	} else if (protocol == IPPROTO_UDP) {
+		if (!parse_udp(data, off, data_end, &pckt))
+			return TC_ACT_SHOT;
+	} else {
+		return TC_ACT_SHOT;
+	}
+
+	if (is_ipv6)
+		memcpy(vip.daddr.v6, pckt.dstv6, 16);
+	else
+		vip.daddr.v4 = pckt.dst;
+
+	vip.dport = pckt.port16[1];
+	vip.protocol = pckt.proto;
+	vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+	if (!vip_info) {
+		vip.dport = 0;
+		vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+		if (!vip_info)
+			return TC_ACT_SHOT;
+		pckt.port16[1] = 0;
+	}
+
+	if (vip_info->flags & F_HASH_NO_SRC_PORT)
+		pckt.port16[0] = 0;
+
+	if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
+		return TC_ACT_SHOT;
+
+	if (dst->flags & F_IPV6) {
+		cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
+		if (!cval)
+			return TC_ACT_SHOT;
+		ifindex = cval->ifindex;
+		memcpy(tkey.remote_ipv6, dst->dstv6, 16);
+		tun_flag = BPF_F_TUNINFO_IPV6;
+	} else {
+		cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
+		if (!cval)
+			return TC_ACT_SHOT;
+		ifindex = cval->ifindex;
+		tkey.remote_ipv4 = dst->dst;
+	}
+	vip_num = vip_info->vip_num;
+	data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+	if (!data_stats)
+		return TC_ACT_SHOT;
+	data_stats->pkts++;
+	data_stats->bytes += pkt_bytes;
+	bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
+	*(u32 *)eth->eth_dest = tkey.remote_ipv4;
+	return bpf_redirect(ifindex, 0);
+}
+
+SEC("l4lb-demo")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct eth_hdr *eth = data;
+	__u32 eth_proto;
+	__u32 nh_off;
+
+	nh_off = sizeof(struct eth_hdr);
+	if (data + nh_off > data_end)
+		return TC_ACT_SHOT;
+	eth_proto = eth->eth_proto;
+	if (eth_proto == htons(ETH_P_IP))
+		return process_packet(data, nh_off, data_end, false, ctx);
+	else if (eth_proto == htons(ETH_P_IPV6))
+		return process_packet(data, nh_off, data_end, true, ctx);
+	else
+		return TC_ACT_SHOT;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index defcb273242e..5275d4a1df24 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -28,11 +28,14 @@ typedef __u16 __sum16;
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 #include "test_iptunnel_common.h"
+#include "bpf_util.h"
 
 #define _htons __builtin_bswap16
 
 static int error_cnt, pass_cnt;
 
+#define MAGIC_BYTES 123
+
 /* ipv4 test vector */
 static struct {
 	struct ethhdr eth;
@@ -42,6 +45,7 @@ static struct {
 	.eth.h_proto = _htons(ETH_P_IP),
 	.iph.ihl = 5,
 	.iph.protocol = 6,
+	.iph.tot_len = _htons(MAGIC_BYTES),
 	.tcp.urg_ptr = 123,
 };
 
@@ -53,6 +57,7 @@ static struct {
 } __packed pkt_v6 = {
 	.eth.h_proto = _htons(ETH_P_IPV6),
 	.iph.nexthdr = 6,
+	.iph.payload_len = _htons(MAGIC_BYTES),
 	.tcp.urg_ptr = 123,
 };
 
@@ -182,6 +187,88 @@ out:
 	bpf_object__close(obj);
 }
 
+#define MAGIC_VAL 0x1234
+#define NUM_ITER 100000
+#define VIP_NUM 5
+
+static void test_l4lb(void)
+{
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	const char *file = "./test_l4lb.o";
+	struct vip key = {.protocol = 6};
+	struct vip_meta {
+		__u32 flags;
+		__u32 vip_num;
+	} value = {.vip_num = VIP_NUM};
+	__u32 stats_key = VIP_NUM;
+	struct vip_stats {
+		__u64 bytes;
+		__u64 pkts;
+	} stats[nr_cpus];
+	struct real_definition {
+		union {
+			__be32 dst;
+			__be32 dstv6[4];
+		};
+		__u8 flags;
+	} real_def = {.dst = MAGIC_VAL};
+	__u32 ch_key = 11, real_num = 3;
+	__u32 duration, retval, size;
+	int err, i, prog_fd, map_fd;
+	__u64 bytes = 0, pkts = 0;
+	struct bpf_object *obj;
+	char buf[128];
+	u32 *magic = (u32 *)buf;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+	if (err)
+		return;
+
+	map_fd = bpf_find_map(__func__, obj, "vip_map");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_update_elem(map_fd, &key, &value, 0);
+
+	map_fd = bpf_find_map(__func__, obj, "ch_rings");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
+
+	map_fd = bpf_find_map(__func__, obj, "reals");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
+
+	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
+				buf, &size, &retval, &duration);
+	CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
+	      *magic != MAGIC_VAL, "ipv4",
+	      "err %d errno %d retval %d size %d magic %x\n",
+	      err, errno, retval, size, *magic);
+
+	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
+				buf, &size, &retval, &duration);
+	CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
+	      *magic != MAGIC_VAL, "ipv6",
+	      "err %d errno %d retval %d size %d magic %x\n",
+	      err, errno, retval, size, *magic);
+
+	map_fd = bpf_find_map(__func__, obj, "stats");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_lookup_elem(map_fd, &stats_key, stats);
+	for (i = 0; i < nr_cpus; i++) {
+		bytes += stats[i].bytes;
+		pkts += stats[i].pkts;
+	}
+	if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
+		error_cnt++;
+		printf("test_l4lb:FAIL:stats %lld %lld\n", bytes, pkts);
+	}
+out:
+	bpf_object__close(obj);
+}
+
 int main(void)
 {
 	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
@@ -190,6 +277,7 @@ int main(void)
 
 	test_pkt_access();
 	test_xdp();
+	test_l4lb();
 
 	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
 	return 0;
-- 
cgit v1.2.3


From af0e54619d9b6c23f084d310d35fca2e68faa95c Mon Sep 17 00:00:00 2001
From: LABBE Corentin <clabbe.montjoie@gmail.com>
Date: Tue, 4 Apr 2017 15:32:47 +0200
Subject: selftests: add a generic testsuite for ethernet device

This patch add a generic testsuite for testing ethernet network device driver.

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Tested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/Makefile     |   2 +-
 tools/testing/selftests/net/netdevice.sh | 200 +++++++++++++++++++++++++++++++
 2 files changed, 201 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/net/netdevice.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index fbfe5d0d5c2e..35cbb4cba410 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,7 +5,7 @@ CFLAGS += -I../../../../usr/include/
 
 reuseport_bpf_numa: LDFLAGS += -lnuma
 
-TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh
+TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket
 TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh
new file mode 100755
index 000000000000..4e00568d70c2
--- /dev/null
+++ b/tools/testing/selftests/net/netdevice.sh
@@ -0,0 +1,200 @@
+#!/bin/sh
+#
+# This test is for checking network interface
+# For the moment it tests only ethernet interface (but wifi could be easily added)
+#
+# We assume that all network driver are loaded
+# if not they probably have failed earlier in the boot process and their logged error will be catched by another test
+#
+
+# this function will try to up the interface
+# if already up, nothing done
+# arg1: network interface name
+kci_net_start()
+{
+	netdev=$1
+
+	ip link show "$netdev" |grep -q UP
+	if [ $? -eq 0 ];then
+		echo "SKIP: $netdev: interface already up"
+		return 0
+	fi
+
+	ip link set "$netdev" up
+	if [ $? -ne 0 ];then
+		echo "FAIL: $netdev: Fail to up interface"
+		return 1
+	else
+		echo "PASS: $netdev: set interface up"
+		NETDEV_STARTED=1
+	fi
+	return 0
+}
+
+# this function will try to setup an IP and MAC address on a network interface
+# Doing nothing if the interface was already up
+# arg1: network interface name
+kci_net_setup()
+{
+	netdev=$1
+
+	# do nothing if the interface was already up
+	if [ $NETDEV_STARTED -eq 0 ];then
+		return 0
+	fi
+
+	MACADDR='02:03:04:05:06:07'
+	ip link set dev $netdev address "$MACADDR"
+	if [ $? -ne 0 ];then
+		echo "FAIL: $netdev: Cannot set MAC address"
+	else
+		ip link show $netdev |grep -q "$MACADDR"
+		if [ $? -eq 0 ];then
+			echo "PASS: $netdev: set MAC address"
+		else
+			echo "FAIL: $netdev: Cannot set MAC address"
+		fi
+	fi
+
+	#check that the interface did not already have an IP
+	ip address show "$netdev" |grep '^[[:space:]]*inet'
+	if [ $? -eq 0 ];then
+		echo "SKIP: $netdev: already have an IP"
+		return 0
+	fi
+
+	# TODO what ipaddr to set ? DHCP ?
+	echo "SKIP: $netdev: set IP address"
+	return 0
+}
+
+# test an ethtool command
+# arg1: return code for not supported (see ethtool code source)
+# arg2: summary of the command
+# arg3: command to execute
+kci_netdev_ethtool_test()
+{
+	if [ $# -le 2 ];then
+		echo "SKIP: $netdev: ethtool: invalid number of arguments"
+		return 1
+	fi
+	$3 >/dev/null
+	ret=$?
+	if [ $ret -ne 0 ];then
+		if [ $ret -eq "$1" ];then
+			echo "SKIP: $netdev: ethtool $2 not supported"
+		else
+			echo "FAIL: $netdev: ethtool $2"
+			return 1
+		fi
+	else
+		echo "PASS: $netdev: ethtool $2"
+	fi
+	return 0
+}
+
+# test ethtool commands
+# arg1: network interface name
+kci_netdev_ethtool()
+{
+	netdev=$1
+
+	#check presence of ethtool
+	ethtool --version 2>/dev/null >/dev/null
+	if [ $? -ne 0 ];then
+		echo "SKIP: ethtool not present"
+		return 1
+	fi
+
+	TMP_ETHTOOL_FEATURES="$(mktemp)"
+	if [ ! -e "$TMP_ETHTOOL_FEATURES" ];then
+		echo "SKIP: Cannot create a tmp file"
+		return 1
+	fi
+
+	ethtool -k "$netdev" > "$TMP_ETHTOOL_FEATURES"
+	if [ $? -ne 0 ];then
+		echo "FAIL: $netdev: ethtool list features"
+		rm "$TMP_ETHTOOL_FEATURES"
+		return 1
+	fi
+	echo "PASS: $netdev: ethtool list features"
+	#TODO for each non fixed features, try to turn them on/off
+	rm "$TMP_ETHTOOL_FEATURES"
+
+	kci_netdev_ethtool_test 74 'dump' "ethtool -d $netdev"
+	kci_netdev_ethtool_test 94 'stats' "ethtool -S $netdev"
+	return 0
+}
+
+# stop a netdev
+# arg1: network interface name
+kci_netdev_stop()
+{
+	netdev=$1
+
+	if [ $NETDEV_STARTED -eq 0 ];then
+		echo "SKIP: $netdev: interface kept up"
+		return 0
+	fi
+
+	ip link set "$netdev" down
+	if [ $? -ne 0 ];then
+		echo "FAIL: $netdev: stop interface"
+		return 1
+	fi
+	echo "PASS: $netdev: stop interface"
+	return 0
+}
+
+# run all test on a netdev
+# arg1: network interface name
+kci_test_netdev()
+{
+	NETDEV_STARTED=0
+	IFACE_TO_UPDOWN="$1"
+	IFACE_TO_TEST="$1"
+	#check for VLAN interface
+	MASTER_IFACE="$(echo $1 | cut -d@ -f2)"
+	if [ ! -z "$MASTER_IFACE" ];then
+		IFACE_TO_UPDOWN="$MASTER_IFACE"
+		IFACE_TO_TEST="$(echo $1 | cut -d@ -f1)"
+	fi
+
+	NETDEV_STARTED=0
+	kci_net_start "$IFACE_TO_UPDOWN"
+
+	kci_net_setup "$IFACE_TO_TEST"
+
+	kci_netdev_ethtool "$IFACE_TO_TEST"
+
+	kci_netdev_stop "$IFACE_TO_UPDOWN"
+	return 0
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit 0
+fi
+
+ip -Version 2>/dev/null >/dev/null
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without the ip tool"
+	exit 0
+fi
+
+TMP_LIST_NETDEV="$(mktemp)"
+if [ ! -e "$TMP_LIST_NETDEV" ];then
+	echo "FAIL: Cannot create a tmp file"
+	exit 1
+fi
+
+ip link show |grep '^[0-9]' | grep -oE '[[:space:]].*eth[0-9]*:|[[:space:]].*enp[0-9]s[0-9]:' | cut -d\  -f2 | cut -d: -f1> "$TMP_LIST_NETDEV"
+while read netdev
+do
+	kci_test_netdev "$netdev"
+done < "$TMP_LIST_NETDEV"
+
+rm "$TMP_LIST_NETDEV"
+exit 0
-- 
cgit v1.2.3


From 89c0a361301f81d76be5ab77c3f5470b88792670 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Thu, 6 Apr 2017 12:20:26 -0700
Subject: selftests/bpf: fix merge conflict

fix artifact of merge resolution

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 0963f8ffd25c..6178b65fee59 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -4904,12 +4904,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	struct bpf_insn *prog = test->insns;
 	int prog_len = probe_filter_length(prog);
 	int prog_type = test->prog_type;
-<<<<<<< HEAD
 	int map_fds[MAX_NR_MAPS];
-	int fd_prog, expected_ret;
-=======
-	int fd_f1 = -1, fd_f2 = -1, fd_f3 = -1;
->>>>>>> ea6b1720ce25f92f7a17b2e0c2b653d20773d10a
 	const char *expected_err;
 	int i;
 
-- 
cgit v1.2.3


From 3c60a531b9e175693a2d61f6bfd7ffacce4146cd Mon Sep 17 00:00:00 2001
From: Alexander Alemayhu <alexander@alemayhu.com>
Date: Sat, 8 Apr 2017 22:08:10 +0200
Subject: bpf: fix comment typo

o s/bpf_bpf_get_socket_cookie/bpf_get_socket_cookie

Signed-off-by: Alexander Alemayhu <alexander@alemayhu.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h       | 2 +-
 tools/include/uapi/linux/bpf.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a1d95386f562..1e062bb54eec 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -472,7 +472,7 @@ union bpf_attr {
  *       > 0 length of the string including the trailing NUL on success
  *       < 0 error
  *
- * u64 bpf_bpf_get_socket_cookie(skb)
+ * u64 bpf_get_socket_cookie(skb)
  *     Get the cookie for the socket stored inside sk_buff.
  *     @skb: pointer to skb
  *     Return: 8 Bytes non-decreasing number on success or 0 if the socket
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index a1d95386f562..1e062bb54eec 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -472,7 +472,7 @@ union bpf_attr {
  *       > 0 length of the string including the trailing NUL on success
  *       < 0 error
  *
- * u64 bpf_bpf_get_socket_cookie(skb)
+ * u64 bpf_get_socket_cookie(skb)
  *     Get the cookie for the socket stored inside sk_buff.
  *     @skb: pointer to skb
  *     Return: 8 Bytes non-decreasing number on success or 0 if the socket
-- 
cgit v1.2.3


From b6518e6a0086fc152f9a35ac5062930788f8b4bc Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 11 Apr 2017 14:30:52 -0700
Subject: tools: bpf_jit_disasm: Add option to dump JIT image to a file.

When debugging the JIT on an embedded platform or cross build
environment, libbfd may not be available, making it impossible to run
bpf_jit_disasm natively.

Add an option to emit a binary image of the JIT code to a file.  This
file can then be disassembled off line.  Typical usage in this case
might be (pasting mips64 dmesg output to cat command):

   $ cat > jit.raw
   $ bpf_jit_disasm -f jit.raw -O jit.bin
   $ mips64-linux-gnu-objdump -D -b binary -m mips:isa64r2 -EB jit.bin

Signed-off-by: David Daney <david.daney@cavium.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/net/bpf_jit_disasm.c | 40 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 36 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/net/bpf_jit_disasm.c b/tools/net/bpf_jit_disasm.c
index 544b05a53b70..ad572e6cdbd0 100644
--- a/tools/net/bpf_jit_disasm.c
+++ b/tools/net/bpf_jit_disasm.c
@@ -229,6 +229,7 @@ static void usage(void)
 {
 	printf("Usage: bpf_jit_disasm [...]\n");
 	printf("       -o          Also display related opcodes (default: off).\n");
+	printf("       -O <file>   Write binary image of code to file, don't disassemble to stdout.\n");
 	printf("       -f <file>   Read last image dump from file or stdin (default: klog).\n");
 	printf("       -h          Display this help.\n");
 }
@@ -238,12 +239,19 @@ int main(int argc, char **argv)
 	unsigned int len, klen, opt, opcodes = 0;
 	static uint8_t image[32768];
 	char *kbuff, *file = NULL;
+	char *ofile = NULL;
+	int ofd;
+	ssize_t nr;
+	uint8_t *pos;
 
-	while ((opt = getopt(argc, argv, "of:")) != -1) {
+	while ((opt = getopt(argc, argv, "of:O:")) != -1) {
 		switch (opt) {
 		case 'o':
 			opcodes = 1;
 			break;
+		case 'O':
+			ofile = optarg;
+			break;
 		case 'f':
 			file = optarg;
 			break;
@@ -263,11 +271,35 @@ int main(int argc, char **argv)
 	}
 
 	len = get_last_jit_image(kbuff, klen, image, sizeof(image));
-	if (len > 0)
-		get_asm_insns(image, len, opcodes);
-	else
+	if (len <= 0) {
 		fprintf(stderr, "No JIT image found!\n");
+		goto done;
+	}
+	if (!ofile) {
+		get_asm_insns(image, len, opcodes);
+		goto done;
+	}
+
+	ofd = open(ofile, O_WRONLY | O_CREAT | O_TRUNC, DEFFILEMODE);
+	if (ofd < 0) {
+		fprintf(stderr, "Could not open file %s for writing: ", ofile);
+		perror(NULL);
+		goto done;
+	}
+	pos = image;
+	do {
+		nr = write(ofd, pos, len);
+		if (nr < 0) {
+			fprintf(stderr, "Could not write data to %s: ", ofile);
+			perror(NULL);
+			goto done;
+		}
+		len -= nr;
+		pos += nr;
+	} while (len);
+	close(ofd);
 
+done:
 	put_log_buff(kbuff);
 	return 0;
 }
-- 
cgit v1.2.3


From 9746f85686849df107c899f34188f977e78154b0 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 14 Apr 2017 10:30:25 -0700
Subject: bpf: lru: Add test_lru_sanity6 for BPF_F_NO_COMMON_LRU

test_lru_sanity3 is not applicable to BPF_F_NO_COMMON_LRU.
It just happens to work when PERCPU_FREE_TARGET == 16.

This patch:
1) Disable test_lru_sanity3 for BPF_F_NO_COMMON_LRU
2) Add test_lru_sanity6 to test list rotation for
   the BPF_F_NO_COMMON_LRU map.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_lru_map.c | 70 +++++++++++++++++++++++++++---
 1 file changed, 65 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 00b0aff56e2e..04b2242937cc 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -387,6 +387,10 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 	unsigned int map_size;
 	int next_cpu = 0;
 
+	if (map_flags & BPF_F_NO_COMMON_LRU)
+		/* This test is only applicable to common LRU list */
+		return;
+
 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
 	       map_flags);
 
@@ -396,11 +400,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 	assert(batch_size * 2 == tgt_free);
 
 	map_size = tgt_free * 2;
-	if (map_flags & BPF_F_NO_COMMON_LRU)
-		lru_map_fd = create_map(map_type, map_flags,
-					map_size * nr_cpus);
-	else
-		lru_map_fd = create_map(map_type, map_flags, map_size);
+	lru_map_fd = create_map(map_type, map_flags, map_size);
 	assert(lru_map_fd != -1);
 
 	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
@@ -566,6 +566,65 @@ static void test_lru_sanity5(int map_type, int map_flags)
 	printf("Pass\n");
 }
 
+/* Test list rotation for BPF_F_NO_COMMON_LRU map */
+static void test_lru_sanity6(int map_type, int map_flags, int tgt_free)
+{
+	int lru_map_fd, expected_map_fd;
+	unsigned long long key, value[nr_cpus];
+	unsigned int map_size = tgt_free * 2;
+	int next_cpu = 0;
+
+	if (!(map_flags & BPF_F_NO_COMMON_LRU))
+		return;
+
+	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+	       map_flags);
+
+	assert(sched_next_online(0, &next_cpu) != -1);
+
+	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
+	assert(expected_map_fd != -1);
+
+	lru_map_fd = create_map(map_type, map_flags, map_size * nr_cpus);
+	assert(lru_map_fd != -1);
+
+	value[0] = 1234;
+
+	for (key = 1; key <= tgt_free; key++) {
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	for (; key <= tgt_free * 2; key++) {
+		unsigned long long stable_key;
+
+		/* Make ref bit sticky for key: [1, tgt_free] */
+		for (stable_key = 1; stable_key <= tgt_free; stable_key++) {
+			/* Mark the ref bit */
+			assert(!bpf_map_lookup_elem(lru_map_fd, &stable_key,
+						    value));
+		}
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	for (; key <= tgt_free * 3; key++) {
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	assert(map_equal(lru_map_fd, expected_map_fd));
+
+	close(expected_map_fd);
+	close(lru_map_fd);
+
+	printf("Pass\n");
+}
+
 int main(int argc, char **argv)
 {
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
@@ -593,6 +652,7 @@ int main(int argc, char **argv)
 			test_lru_sanity3(map_types[t], map_flags[f], tgt_free);
 			test_lru_sanity4(map_types[t], map_flags[f], tgt_free);
 			test_lru_sanity5(map_types[t], map_flags[f]);
+			test_lru_sanity6(map_types[t], map_flags[f], tgt_free);
 
 			printf("\n");
 		}
-- 
cgit v1.2.3


From 6467acbc70f011d5852f60f6e04825a268c8e8b0 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 14 Apr 2017 10:30:26 -0700
Subject: bpf: lru: Cleanup test_lru_map.c

This patch does the following cleanup on test_lru_map.c
1) Fix indentation (Replace spaces by tabs)
2) Remove redundant BPF_F_NO_COMMON_LRU test
3) Simplify some comments

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_lru_map.c | 32 +++++++++---------------------
 1 file changed, 9 insertions(+), 23 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 04b2242937cc..87c05e5bdfdd 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -191,12 +191,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
 	int next_cpu = 0;
 
 	if (map_flags & BPF_F_NO_COMMON_LRU)
-		/* Ther percpu lru list (i.e each cpu has its own LRU
-		 * list) does not have a local free list.  Hence,
-		 * it will only free old nodes till there is no free
-		 * from the LRU list.  Hence, this test does not apply
-		 * to BPF_F_NO_COMMON_LRU
-		 */
+		/* This test is only applicable to common LRU list */
 		return;
 
 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
@@ -227,7 +222,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
 	for (key = 1; key < end_key; key++) {
 		assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
-				            BPF_NOEXIST));
+					    BPF_NOEXIST));
 	}
 
 	/* Insert 1+tgt_free to 2*tgt_free
@@ -273,12 +268,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 	int next_cpu = 0;
 
 	if (map_flags & BPF_F_NO_COMMON_LRU)
-		/* Ther percpu lru list (i.e each cpu has its own LRU
-		 * list) does not have a local free list.  Hence,
-		 * it will only free old nodes till there is no free
-		 * from the LRU list.  Hence, this test does not apply
-		 * to BPF_F_NO_COMMON_LRU
-		 */
+		/* This test is only applicable to common LRU list */
 		return;
 
 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
@@ -290,11 +280,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 	assert(batch_size * 2 == tgt_free);
 
 	map_size = tgt_free + batch_size;
-	if (map_flags & BPF_F_NO_COMMON_LRU)
-		lru_map_fd = create_map(map_type, map_flags,
-					map_size * nr_cpus);
-	else
-		lru_map_fd = create_map(map_type, map_flags, map_size);
+	lru_map_fd = create_map(map_type, map_flags, map_size);
 	assert(lru_map_fd != -1);
 
 	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
@@ -341,7 +327,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 		assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
 		assert(value[0] == 4321);
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
-				            BPF_NOEXIST));
+					    BPF_NOEXIST));
 	}
 
 	value[0] = 1234;
@@ -361,7 +347,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
-				            BPF_NOEXIST));
+					    BPF_NOEXIST));
 	}
 
 	assert(map_equal(lru_map_fd, expected_map_fd));
@@ -419,7 +405,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 	for (key = 1; key < end_key; key++) {
 		assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
-				            BPF_NOEXIST));
+					    BPF_NOEXIST));
 	}
 
 	/* Add 1+2*tgt_free to tgt_free*5/2
@@ -431,7 +417,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
-				            BPF_NOEXIST));
+					    BPF_NOEXIST));
 	}
 
 	assert(map_equal(lru_map_fd, expected_map_fd));
@@ -491,7 +477,7 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
-				            BPF_NOEXIST));
+					    BPF_NOEXIST));
 	}
 
 	assert(map_equal(lru_map_fd, expected_map_fd));
-- 
cgit v1.2.3


From 695ba2651a2ecbb336145f9cc033c85b9c6a5cee Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 14 Apr 2017 10:30:29 -0700
Subject: bpf: lru: Lower the PERCPU_NR_SCANS from 16 to 4

After doing map_perf_test with a much bigger
BPF_F_NO_COMMON_LRU map, the perf report shows a
lot of time spent in rotating the inactive list (i.e.
__bpf_lru_list_rotate_inactive):
> map_perf_test 32 8 10000 1000000 | awk '{sum += $3}END{print sum}'
19644783 (19M/s)
> map_perf_test 32 8 10000000 10000000 |  awk '{sum += $3}END{print sum}'
6283930 (6.28M/s)

By inactive, it usually means the element is not in cache.  Hence,
there is a need to tune the PERCPU_NR_SCANS value.

This patch finds a better number of elements to
scan during each list rotation.  The PERCPU_NR_SCANS (which
is defined the same as PERCPU_FREE_TARGET) decreases
from 16 elements to 4 elements.  This change only
affects the BPF_F_NO_COMMON_LRU map.

The test_lru_dist does not show meaningful difference
between 16 and 4.  Our production L4 load balancer which uses
the LRU map for conntrack-ing also shows little change in cache
hit rate.  Since both benchmark and production data show no
cache-hit difference, PERCPU_NR_SCANS is lowered from 16 to 4.
We can consider making it configurable if we find a usecase
later that shows another value works better and/or use
a different rotation strategy.

After this change:
> map_perf_test 32 8 10000000 10000000 |  awk '{sum += $3}END{print sum}'
9240324 (9.2M/s)

i.e. 6.28M/s -> 9.2M/s

The test_lru_dist has not shown meaningful difference:
> test_lru_dist zipf.100k.a1_01.out 4000 1:
nr_misses: 31575 (Before) vs 31566 (After)

> test_lru_dist zipf.100k.a0_01.out 40000 1
nr_misses: 67036 (Before) vs 67031 (After)

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/bpf_lru_list.c                  | 2 +-
 tools/testing/selftests/bpf/test_lru_map.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c
index f62d1d56f41d..e6ef4401a138 100644
--- a/kernel/bpf/bpf_lru_list.c
+++ b/kernel/bpf/bpf_lru_list.c
@@ -13,7 +13,7 @@
 #define LOCAL_FREE_TARGET		(128)
 #define LOCAL_NR_SCANS			LOCAL_FREE_TARGET
 
-#define PERCPU_FREE_TARGET		(16)
+#define PERCPU_FREE_TARGET		(4)
 #define PERCPU_NR_SCANS			PERCPU_FREE_TARGET
 
 /* Helpers to get the local list index */
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 87c05e5bdfdd..8c10c9180c1a 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -22,7 +22,7 @@
 #include "bpf_util.h"
 
 #define LOCAL_FREE_TARGET	(128)
-#define PERCPU_FREE_TARGET	(16)
+#define PERCPU_FREE_TARGET	(4)
 
 static int nr_cpus;
 
-- 
cgit v1.2.3


From b1d9fc41aab11f9520b2e0d57ae872e2ec5d6f32 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 19 Apr 2017 23:01:17 +0200
Subject: bpf: add napi_id read access to __sk_buff

Add napi_id access to __sk_buff for socket filter program types, tc
program types and other bpf_convert_ctx_access() users. Having access
to skb->napi_id is useful for per RX queue listener siloing, f.e.
in combination with SO_ATTACH_REUSEPORT_EBPF and when busy polling is
used, meaning SO_REUSEPORT enabled listeners can then select the
corresponding socket at SYN time already [1]. The skb is marked via
skb_mark_napi_id() early in the receive path (e.g., napi_gro_receive()).

Currently, sockets can only use SO_INCOMING_NAPI_ID from 6d4339028b35
("net: Introduce SO_INCOMING_NAPI_ID") as a socket option to look up
the NAPI ID associated with the queue for steering, which requires a
prior sk_mark_napi_id() after the socket was looked up.

Semantics for the __sk_buff napi_id access are similar, meaning if
skb->napi_id is < MIN_NAPI_ID (e.g. outgoing packets using sender_cpu),
then an invalid napi_id of 0 is returned to the program, otherwise a
valid non-zero napi_id.

  [1] http://netdevconf.org/2.1/slides/apr6/dumazet-BUSY-POLLING-Netdev-2.1.pdf

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h                    |  1 +
 net/core/filter.c                           | 14 ++++++++++++++
 tools/include/uapi/linux/bpf.h              |  1 +
 tools/testing/selftests/bpf/test_verifier.c |  3 +++
 4 files changed, 19 insertions(+)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1e062bb54eec..e553529929f6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -603,6 +603,7 @@ struct __sk_buff {
 	__u32 tc_classid;
 	__u32 data;
 	__u32 data_end;
+	__u32 napi_id;
 };
 
 struct bpf_tunnel_key {
diff --git a/net/core/filter.c b/net/core/filter.c
index 085925834727..9a37860a80fc 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -53,6 +53,7 @@
 #include <net/dst_metadata.h>
 #include <net/dst.h>
 #include <net/sock_reuseport.h>
+#include <net/busy_poll.h>
 
 /**
  *	sk_filter_trim_cap - run a packet through a socket filter
@@ -3201,6 +3202,19 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 			*insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
 		else
 			*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
+#endif
+		break;
+
+	case offsetof(struct __sk_buff, napi_id):
+#if defined(CONFIG_NET_RX_BUSY_POLL)
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, napi_id) != 4);
+
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, napi_id));
+		*insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
+		*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
+#else
+		*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
 #endif
 		break;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1e062bb54eec..e553529929f6 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -603,6 +603,7 @@ struct __sk_buff {
 	__u32 tc_classid;
 	__u32 data;
 	__u32 data_end;
+	__u32 napi_id;
 };
 
 struct bpf_tunnel_key {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 6178b65fee59..95a8d5f3ab80 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -772,6 +772,9 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
 				    offsetof(struct __sk_buff, vlan_tci)),
 			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, napi_id)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
-- 
cgit v1.2.3


From b0c47807d31deccdfb78a22a0d04dd9785bdb9d3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 22 Apr 2017 12:31:05 -0700
Subject: bpf: Add sparc support to tools and samples.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/bpf_helpers.h      | 19 +++++++++++++++++++
 tools/build/feature/test-bpf.c |  3 +++
 tools/lib/bpf/bpf.c            |  2 ++
 3 files changed, 24 insertions(+)

(limited to 'tools')

diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 52de9d88c021..9a9c95f2c9fb 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -146,11 +146,30 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) =
 #define PT_REGS_SP(x) ((x)->sp)
 #define PT_REGS_IP(x) ((x)->nip)
 
+#elif defined(__sparc__)
+
+#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1])
+#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2])
+#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3])
+#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4])
+#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7])
+#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
+#if defined(__arch64__)
+#define PT_REGS_IP(x) ((x)->tpc)
+#else
+#define PT_REGS_IP(x) ((x)->pc)
+#endif
+
 #endif
 
 #ifdef __powerpc__
 #define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = (ctx)->link; })
 #define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
+#elif defined(__sparc__)
+#define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = PT_REGS_RET(ctx); })
+#define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
 #else
 #define BPF_KPROBE_READ_RET_IP(ip, ctx)		({				\
 		bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c
index e04ab89a1013..ebc6dceddb58 100644
--- a/tools/build/feature/test-bpf.c
+++ b/tools/build/feature/test-bpf.c
@@ -9,6 +9,9 @@
 #  define __NR_bpf 321
 # elif defined(__aarch64__)
 #  define __NR_bpf 280
+# elif defined(__sparc__)
+#  define __NR_bpf 349
+# else
 #  error __NR_bpf not defined. libbpf does not support your arch.
 # endif
 #endif
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index f84c398c11f4..4fe444b8092e 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -37,6 +37,8 @@
 #  define __NR_bpf 321
 # elif defined(__aarch64__)
 #  define __NR_bpf 280
+# elif defined(__sparc__)
+#  define __NR_bpf 349
 # else
 #  error __NR_bpf not defined. libbpf does not support your arch.
 # endif
-- 
cgit v1.2.3


From 2e7a721714b9cdca539da78a0eb1f59dbe4020ac Mon Sep 17 00:00:00 2001
From: Mike Maloney <maloney@google.com>
Date: Fri, 21 Apr 2017 10:56:10 -0400
Subject: selftests/net: cleanup unused parameter in psock_fanout

sock_fanout_open no longer sets the size of packet_socket ring, so stop
passing the parameter.

Signed-off-by: Mike Maloney <maloney@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/psock_fanout.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index e62bb354820c..27c4027fab03 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -71,7 +71,7 @@
 
 /* Open a socket in a given fanout mode.
  * @return -1 if mode is bad, a valid socket otherwise */
-static int sock_fanout_open(uint16_t typeflags, int num_packets)
+static int sock_fanout_open(uint16_t typeflags)
 {
 	int fd, val;
 
@@ -228,7 +228,7 @@ static void test_control_single(void)
 	fprintf(stderr, "test: control single socket\n");
 
 	if (sock_fanout_open(PACKET_FANOUT_ROLLOVER |
-			       PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) {
+			       PACKET_FANOUT_FLAG_ROLLOVER) != -1) {
 		fprintf(stderr, "ERROR: opened socket with dual rollover\n");
 		exit(1);
 	}
@@ -241,26 +241,26 @@ static void test_control_group(void)
 
 	fprintf(stderr, "test: control multiple sockets\n");
 
-	fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 20);
+	fds[0] = sock_fanout_open(PACKET_FANOUT_HASH);
 	if (fds[0] == -1) {
 		fprintf(stderr, "ERROR: failed to open HASH socket\n");
 		exit(1);
 	}
 	if (sock_fanout_open(PACKET_FANOUT_HASH |
-			       PACKET_FANOUT_FLAG_DEFRAG, 10) != -1) {
+			       PACKET_FANOUT_FLAG_DEFRAG) != -1) {
 		fprintf(stderr, "ERROR: joined group with wrong flag defrag\n");
 		exit(1);
 	}
 	if (sock_fanout_open(PACKET_FANOUT_HASH |
-			       PACKET_FANOUT_FLAG_ROLLOVER, 10) != -1) {
+			       PACKET_FANOUT_FLAG_ROLLOVER) != -1) {
 		fprintf(stderr, "ERROR: joined group with wrong flag ro\n");
 		exit(1);
 	}
-	if (sock_fanout_open(PACKET_FANOUT_CPU, 10) != -1) {
+	if (sock_fanout_open(PACKET_FANOUT_CPU) != -1) {
 		fprintf(stderr, "ERROR: joined group with wrong mode\n");
 		exit(1);
 	}
-	fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 20);
+	fds[1] = sock_fanout_open(PACKET_FANOUT_HASH);
 	if (fds[1] == -1) {
 		fprintf(stderr, "ERROR: failed to join group\n");
 		exit(1);
@@ -281,8 +281,8 @@ static int test_datapath(uint16_t typeflags, int port_off,
 
 	fprintf(stderr, "test: datapath 0x%hx\n", typeflags);
 
-	fds[0] = sock_fanout_open(typeflags, 20);
-	fds[1] = sock_fanout_open(typeflags, 20);
+	fds[0] = sock_fanout_open(typeflags);
+	fds[1] = sock_fanout_open(typeflags);
 	if (fds[0] == -1 || fds[1] == -1) {
 		fprintf(stderr, "ERROR: failed open\n");
 		exit(1);
-- 
cgit v1.2.3


From 28be04f5c1c95bdf5614af19f7faf7d4fb781fa6 Mon Sep 17 00:00:00 2001
From: Mike Maloney <maloney@google.com>
Date: Fri, 21 Apr 2017 10:56:12 -0400
Subject: selftests/net: add tests for PACKET_FANOUT_FLAG_UNIQUEID

Create two groups with PACKET_FANOUT_FLAG_UNIQUEID, add a socket to one.
Ensure that the groups can only be joined if all options are consistent
with the original except for this flag.

Signed-off-by: Mike Maloney <maloney@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/psock_fanout.c | 95 ++++++++++++++++++++++++++----
 1 file changed, 84 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 27c4027fab03..b4b1d91fcea5 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -71,7 +71,7 @@
 
 /* Open a socket in a given fanout mode.
  * @return -1 if mode is bad, a valid socket otherwise */
-static int sock_fanout_open(uint16_t typeflags)
+static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
 {
 	int fd, val;
 
@@ -81,8 +81,7 @@ static int sock_fanout_open(uint16_t typeflags)
 		exit(1);
 	}
 
-	/* fanout group ID is always 0: tests whether old groups are deleted */
-	val = ((int) typeflags) << 16;
+	val = (((int) typeflags) << 16) | group_id;
 	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
 		if (close(fd)) {
 			perror("close packet");
@@ -113,6 +112,20 @@ static void sock_fanout_set_cbpf(int fd)
 	}
 }
 
+static void sock_fanout_getopts(int fd, uint16_t *typeflags, uint16_t *group_id)
+{
+	int sockopt;
+	socklen_t sockopt_len = sizeof(sockopt);
+
+	if (getsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+		       &sockopt, &sockopt_len)) {
+		perror("failed to getsockopt");
+		exit(1);
+	}
+	*typeflags = sockopt >> 16;
+	*group_id = sockopt & 0xfffff;
+}
+
 static void sock_fanout_set_ebpf(int fd)
 {
 	const int len_off = __builtin_offsetof(struct __sk_buff, len);
@@ -228,7 +241,7 @@ static void test_control_single(void)
 	fprintf(stderr, "test: control single socket\n");
 
 	if (sock_fanout_open(PACKET_FANOUT_ROLLOVER |
-			       PACKET_FANOUT_FLAG_ROLLOVER) != -1) {
+			       PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) {
 		fprintf(stderr, "ERROR: opened socket with dual rollover\n");
 		exit(1);
 	}
@@ -241,26 +254,26 @@ static void test_control_group(void)
 
 	fprintf(stderr, "test: control multiple sockets\n");
 
-	fds[0] = sock_fanout_open(PACKET_FANOUT_HASH);
+	fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
 	if (fds[0] == -1) {
 		fprintf(stderr, "ERROR: failed to open HASH socket\n");
 		exit(1);
 	}
 	if (sock_fanout_open(PACKET_FANOUT_HASH |
-			       PACKET_FANOUT_FLAG_DEFRAG) != -1) {
+			       PACKET_FANOUT_FLAG_DEFRAG, 0) != -1) {
 		fprintf(stderr, "ERROR: joined group with wrong flag defrag\n");
 		exit(1);
 	}
 	if (sock_fanout_open(PACKET_FANOUT_HASH |
-			       PACKET_FANOUT_FLAG_ROLLOVER) != -1) {
+			       PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) {
 		fprintf(stderr, "ERROR: joined group with wrong flag ro\n");
 		exit(1);
 	}
-	if (sock_fanout_open(PACKET_FANOUT_CPU) != -1) {
+	if (sock_fanout_open(PACKET_FANOUT_CPU, 0) != -1) {
 		fprintf(stderr, "ERROR: joined group with wrong mode\n");
 		exit(1);
 	}
-	fds[1] = sock_fanout_open(PACKET_FANOUT_HASH);
+	fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
 	if (fds[1] == -1) {
 		fprintf(stderr, "ERROR: failed to join group\n");
 		exit(1);
@@ -271,6 +284,61 @@ static void test_control_group(void)
 	}
 }
 
+/* Test creating a unique fanout group ids */
+static void test_unique_fanout_group_ids(void)
+{
+	int fds[3];
+	uint16_t typeflags, first_group_id, second_group_id;
+
+	fprintf(stderr, "test: unique ids\n");
+
+	fds[0] = sock_fanout_open(PACKET_FANOUT_HASH |
+				  PACKET_FANOUT_FLAG_UNIQUEID, 0);
+	if (fds[0] == -1) {
+		fprintf(stderr, "ERROR: failed to create a unique id group.\n");
+		exit(1);
+	}
+
+	sock_fanout_getopts(fds[0], &typeflags, &first_group_id);
+	if (typeflags != PACKET_FANOUT_HASH) {
+		fprintf(stderr, "ERROR: unexpected typeflags %x\n", typeflags);
+		exit(1);
+	}
+
+	if (sock_fanout_open(PACKET_FANOUT_CPU, first_group_id)) {
+		fprintf(stderr, "ERROR: joined group with wrong type.\n");
+		exit(1);
+	}
+
+	fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, first_group_id);
+	if (fds[1] == -1) {
+		fprintf(stderr,
+			"ERROR: failed to join previously created group.\n");
+		exit(1);
+	}
+
+	fds[2] = sock_fanout_open(PACKET_FANOUT_HASH |
+				  PACKET_FANOUT_FLAG_UNIQUEID, 0);
+	if (fds[2] == -1) {
+		fprintf(stderr,
+			"ERROR: failed to create a second unique id group.\n");
+		exit(1);
+	}
+
+	sock_fanout_getopts(fds[2], &typeflags, &second_group_id);
+	if (sock_fanout_open(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_UNIQUEID,
+			     second_group_id) != -1) {
+		fprintf(stderr,
+			"ERROR: specified a group id when requesting unique id\n");
+		exit(1);
+	}
+
+	if (close(fds[0]) || close(fds[1]) || close(fds[2])) {
+		fprintf(stderr, "ERROR: closing sockets\n");
+		exit(1);
+	}
+}
+
 static int test_datapath(uint16_t typeflags, int port_off,
 			 const int expect1[], const int expect2[])
 {
@@ -281,8 +349,8 @@ static int test_datapath(uint16_t typeflags, int port_off,
 
 	fprintf(stderr, "test: datapath 0x%hx\n", typeflags);
 
-	fds[0] = sock_fanout_open(typeflags);
-	fds[1] = sock_fanout_open(typeflags);
+	fds[0] = sock_fanout_open(typeflags, 0);
+	fds[1] = sock_fanout_open(typeflags, 0);
 	if (fds[0] == -1 || fds[1] == -1) {
 		fprintf(stderr, "ERROR: failed open\n");
 		exit(1);
@@ -349,10 +417,12 @@ int main(int argc, char **argv)
 	const int expect_cpu0[2][2]	= { { 20, 0 },  { 20, 0 } };
 	const int expect_cpu1[2][2]	= { { 0, 20 },  { 0, 20 } };
 	const int expect_bpf[2][2]	= { { 15, 5 },  { 15, 20 } };
+	const int expect_uniqueid[2][2] = { { 20, 20},  { 20, 20 } };
 	int port_off = 2, tries = 5, ret;
 
 	test_control_single();
 	test_control_group();
+	test_unique_fanout_group_ids();
 
 	/* find a set of ports that do not collide onto the same socket */
 	ret = test_datapath(PACKET_FANOUT_HASH, port_off,
@@ -383,6 +453,9 @@ int main(int argc, char **argv)
 		ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
 				     expect_cpu1[0], expect_cpu1[1]);
 
+	ret |= test_datapath(PACKET_FANOUT_FLAG_UNIQUEID, port_off,
+			     expect_uniqueid[0], expect_uniqueid[1]);
+
 	if (ret)
 		return 1;
 
-- 
cgit v1.2.3


From 472ecf084a7687347f79720c83881c07407bfd8b Mon Sep 17 00:00:00 2001
From: Mike Maloney <maloney@google.com>
Date: Mon, 24 Apr 2017 21:29:11 -0400
Subject: selftests/net: Fix broken test case in psock_fanout

The error return falue form sock_fanout_open is -1, not zero.  One test
case was checking for 0 instead of -1.

Tested: Built and tested in clean client.
Signed-off-by: Mike Maloney <maloney@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/psock_fanout.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index b4b1d91fcea5..989f917068d1 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -305,7 +305,7 @@ static void test_unique_fanout_group_ids(void)
 		exit(1);
 	}
 
-	if (sock_fanout_open(PACKET_FANOUT_CPU, first_group_id)) {
+	if (sock_fanout_open(PACKET_FANOUT_CPU, first_group_id) != -1) {
 		fprintf(stderr, "ERROR: joined group with wrong type.\n");
 		exit(1);
 	}
-- 
cgit v1.2.3


From 8fe45924387be6b5c1be59a7eb330790c61d5d10 Mon Sep 17 00:00:00 2001
From: Teng Qin <qinteng@fb.com>
Date: Mon, 24 Apr 2017 19:00:37 -0700
Subject: bpf: map_get_next_key to return first key on NULL

When iterating through a map, we need to find a key that does not exist
in the map so map_get_next_key will give us the first key of the map.
This often requires a lot of guessing in production systems.

This patch makes map_get_next_key return the first key when the key
pointer in the parameter is NULL.

Signed-off-by: Teng Qin <qinteng@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/bpf.h              | 10 +++++++---
 kernel/bpf/arraymap.c                   |  2 +-
 kernel/bpf/hashtab.c                    |  9 +++++----
 kernel/bpf/syscall.c                    | 20 ++++++++++++--------
 tools/testing/selftests/bpf/test_maps.c | 29 +++++++++++++++++++++++++----
 5 files changed, 50 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h
index c3a53fd47ff1..52c8425d144b 100644
--- a/include/trace/events/bpf.h
+++ b/include/trace/events/bpf.h
@@ -321,11 +321,14 @@ TRACE_EVENT(bpf_map_next_key,
 		__dynamic_array(u8, key, map->key_size)
 		__dynamic_array(u8, nxt, map->key_size)
 		__field(bool, key_trunc)
+		__field(bool, key_null)
 		__field(int, ufd)
 	),
 
 	TP_fast_assign(
-		memcpy(__get_dynamic_array(key), key, map->key_size);
+		if (key)
+			memcpy(__get_dynamic_array(key), key, map->key_size);
+		__entry->key_null = !key;
 		memcpy(__get_dynamic_array(nxt), key_next, map->key_size);
 		__entry->type      = map->map_type;
 		__entry->key_len   = min(map->key_size, 16U);
@@ -336,8 +339,9 @@ TRACE_EVENT(bpf_map_next_key,
 	TP_printk("map type=%s ufd=%d key=[%s%s] next=[%s%s]",
 		  __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
 		  __entry->ufd,
-		  __print_hex(__get_dynamic_array(key), __entry->key_len),
-		  __entry->key_trunc ? " ..." : "",
+		  __entry->key_null ? "NULL" : __print_hex(__get_dynamic_array(key),
+							   __entry->key_len),
+		  __entry->key_trunc && !__entry->key_null ? " ..." : "",
 		  __print_hex(__get_dynamic_array(nxt), __entry->key_len),
 		  __entry->key_trunc ? " ..." : "")
 );
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index ec621df5a97a..5e00b2333c26 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -182,7 +182,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
 static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 {
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
-	u32 index = *(u32 *)key;
+	u32 index = key ? *(u32 *)key : U32_MAX;
 	u32 *next = (u32 *)next_key;
 
 	if (index >= array->map.max_entries) {
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index bc80c038e430..004334ea13ba 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -540,12 +540,15 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	struct hlist_nulls_head *head;
 	struct htab_elem *l, *next_l;
 	u32 hash, key_size;
-	int i;
+	int i = 0;
 
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
 	key_size = map->key_size;
 
+	if (!key)
+		goto find_first_elem;
+
 	hash = htab_map_hash(key, key_size);
 
 	head = select_bucket(htab, hash);
@@ -553,10 +556,8 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	/* lookup the key */
 	l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
 
-	if (!l) {
-		i = 0;
+	if (!l)
 		goto find_first_elem;
-	}
 
 	/* key was found, get next key in the same bucket */
 	next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b89288e2b589..13642c73dca0 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -536,14 +536,18 @@ static int map_get_next_key(union bpf_attr *attr)
 	if (IS_ERR(map))
 		return PTR_ERR(map);
 
-	err = -ENOMEM;
-	key = kmalloc(map->key_size, GFP_USER);
-	if (!key)
-		goto err_put;
-
-	err = -EFAULT;
-	if (copy_from_user(key, ukey, map->key_size) != 0)
-		goto free_key;
+	if (ukey) {
+		err = -ENOMEM;
+		key = kmalloc(map->key_size, GFP_USER);
+		if (!key)
+			goto err_put;
+
+		err = -EFAULT;
+		if (copy_from_user(key, ukey, map->key_size) != 0)
+			goto free_key;
+	} else {
+		key = NULL;
+	}
 
 	err = -ENOMEM;
 	next_key = kmalloc(map->key_size, GFP_USER);
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 20f1871874df..a977c4f7b0ce 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -28,7 +28,7 @@ static int map_flags;
 
 static void test_hashmap(int task, void *data)
 {
-	long long key, next_key, value;
+	long long key, next_key, first_key, value;
 	int fd;
 
 	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
@@ -89,10 +89,13 @@ static void test_hashmap(int task, void *data)
 	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
 
 	/* Iterate over two elements. */
+	assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
+	       (first_key == 1 || first_key == 2));
 	assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
-	       (next_key == 1 || next_key == 2));
+	       (next_key == first_key));
 	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
-	       (next_key == 1 || next_key == 2));
+	       (next_key == 1 || next_key == 2) &&
+	       (next_key != first_key));
 	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
 	       errno == ENOENT);
 
@@ -105,6 +108,8 @@ static void test_hashmap(int task, void *data)
 
 	key = 0;
 	/* Check that map is empty. */
+	assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+	       errno == ENOENT);
 	assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
 	       errno == ENOENT);
 
@@ -133,7 +138,7 @@ static void test_hashmap_percpu(int task, void *data)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
 	long long value[nr_cpus];
-	long long key, next_key;
+	long long key, next_key, first_key;
 	int expected_key_mask = 0;
 	int fd, i;
 
@@ -193,7 +198,13 @@ static void test_hashmap_percpu(int task, void *data)
 	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
 
 	/* Iterate over two elements. */
+	assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
+	       ((expected_key_mask & first_key) == first_key));
 	while (!bpf_map_get_next_key(fd, &key, &next_key)) {
+		if (first_key) {
+			assert(next_key == first_key);
+			first_key = 0;
+		}
 		assert((expected_key_mask & next_key) == next_key);
 		expected_key_mask &= ~next_key;
 
@@ -219,6 +230,8 @@ static void test_hashmap_percpu(int task, void *data)
 
 	key = 0;
 	/* Check that map is empty. */
+	assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+	       errno == ENOENT);
 	assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
 	       errno == ENOENT);
 
@@ -264,6 +277,8 @@ static void test_arraymap(int task, void *data)
 	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
 
 	/* Iterate over two elements. */
+	assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
+	       next_key == 0);
 	assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
 	       next_key == 0);
 	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
@@ -319,6 +334,8 @@ static void test_arraymap_percpu(int task, void *data)
 	assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT);
 
 	/* Iterate over two elements. */
+	assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
+	       next_key == 0);
 	assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
 	       next_key == 0);
 	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
@@ -400,6 +417,8 @@ static void test_map_large(void)
 	       errno == E2BIG);
 
 	/* Iterate through all elements. */
+	assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
+	key.c = -1;
 	for (i = 0; i < MAP_SIZE; i++)
 		assert(bpf_map_get_next_key(fd, &key, &key) == 0);
 	assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
@@ -499,6 +518,7 @@ static void test_map_parallel(void)
 	       errno == EEXIST);
 
 	/* Check that all elements were inserted. */
+	assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
 	key = -1;
 	for (i = 0; i < MAP_SIZE; i++)
 		assert(bpf_map_get_next_key(fd, &key, &key) == 0);
@@ -518,6 +538,7 @@ static void test_map_parallel(void)
 
 	/* Nothing should be left. */
 	key = -1;
+	assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT);
 	assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
 }
 
-- 
cgit v1.2.3


From 728a853a44a0f01111883f95216e980722474c07 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 27 Apr 2017 01:39:32 +0200
Subject: bpf: add various test cases to verifier selftests

Add several test cases around ldimm64, fp arithmetic and direct
packet access.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 137 ++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 95a8d5f3ab80..d3395c192a24 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -190,6 +190,86 @@ static struct bpf_test tests[] = {
 		.errstr = "invalid bpf_ld_imm64 insn",
 		.result = REJECT,
 	},
+	{
+		"test6 ld_imm64",
+		.insns = {
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+			BPF_RAW_INSN(0, 0, 0, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"test7 ld_imm64",
+		.insns = {
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1),
+			BPF_RAW_INSN(0, 0, 0, 0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"test8 ld_imm64",
+		.insns = {
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 1, 1),
+			BPF_RAW_INSN(0, 0, 0, 0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "uses reserved fields",
+		.result = REJECT,
+	},
+	{
+		"test9 ld_imm64",
+		.insns = {
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1),
+			BPF_RAW_INSN(0, 0, 0, 1, 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_ld_imm64 insn",
+		.result = REJECT,
+	},
+	{
+		"test10 ld_imm64",
+		.insns = {
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1),
+			BPF_RAW_INSN(0, BPF_REG_1, 0, 0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_ld_imm64 insn",
+		.result = REJECT,
+	},
+	{
+		"test11 ld_imm64",
+		.insns = {
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1),
+			BPF_RAW_INSN(0, 0, BPF_REG_1, 0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_ld_imm64 insn",
+		.result = REJECT,
+	},
+	{
+		"test12 ld_imm64",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, BPF_REG_1, 0, 1),
+			BPF_RAW_INSN(0, 0, 0, 0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "not pointing to valid bpf_map",
+		.result = REJECT,
+	},
+	{
+		"test13 ld_imm64",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, BPF_REG_1, 0, 1),
+			BPF_RAW_INSN(0, 0, BPF_REG_1, 0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_ld_imm64 insn",
+		.result = REJECT,
+	},
 	{
 		"no bpf_exit",
 		.insns = {
@@ -330,6 +410,30 @@ static struct bpf_test tests[] = {
 		.errstr = "invalid read from stack",
 		.result = REJECT,
 	},
+	{
+		"invalid fp arithmetic",
+		/* If this gets ever changed, make sure JITs can deal with it. */
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R1 pointer arithmetic",
+		.result_unpriv = REJECT,
+		.errstr = "R1 invalid mem access",
+		.result = REJECT,
+	},
+	{
+		"non-invalid fp arithmetic",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
 	{
 		"invalid argument register",
 		.insns = {
@@ -1800,6 +1904,20 @@ static struct bpf_test tests[] = {
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
 	},
+	{
+		"unpriv: adding of fp",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "pointer arithmetic prohibited",
+		.result_unpriv = REJECT,
+		.errstr = "R1 invalid mem access",
+		.result = REJECT,
+	},
 	{
 		"unpriv: cmp of stack pointer",
 		.insns = {
@@ -2471,6 +2589,25 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
+	{
+		"direct packet access: test16 (arith on data_end)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 16),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid access to packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
 	{
 		"helper access to packet: test1, valid packet_ptr range",
 		.insns = {
-- 
cgit v1.2.3


From 43bcf707ccdc79ee63edb953fcf72e6dc244cfa1 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 27 Apr 2017 01:39:34 +0200
Subject: bpf: fix _htons occurences in test_progs

Dave reported that on sparc test_progs generates buggy swapped
eth->h_proto protocol comparisons:

  10: (15) if r3 == 0xdd86 goto pc+9
   R0=imm2,min_value=2,max_value=2 R1=pkt(id=0,off=0,r=14) R2=pkt_end R3=inv
   R4=pkt(id=0,off=14,r=14) R5=inv56 R10=fp

This is due to the unconditional ...

  #define htons __builtin_bswap16
  #define ntohs __builtin_bswap16

... in test_progs that causes this. Make use of asm/byteorder.h
and use __constant_htons() where possible and only perform the
bswap16 when on little endian in non-constant case.

Fixes: 6882804c916b ("selftests/bpf: add a test for overlapping packet range checks")
Fixes: 37821613626e ("selftests/bpf: add l4 load balancer test based on sched_cls")
Reported-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/bpf_util.h        | 19 +++++++++++++++++++
 tools/testing/selftests/bpf/test_l4lb.c       | 11 +++++------
 tools/testing/selftests/bpf/test_pkt_access.c |  6 +++---
 tools/testing/selftests/bpf/test_progs.c      | 10 ++++------
 4 files changed, 31 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index 84a5d1823f02..7de27966103d 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -6,6 +6,25 @@
 #include <string.h>
 #include <errno.h>
 
+#include <asm/byteorder.h>
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+# define __bpf_ntohs(x)		__builtin_bswap16(x)
+# define __bpf_htons(x)		__builtin_bswap16(x)
+#elif __BYTE_ORDER == __BIG_ENDIAN
+# define __bpf_ntohs(x)		(x)
+# define __bpf_htons(x)		(x)
+#else
+# error "Fix your __BYTE_ORDER?!"
+#endif
+
+#define bpf_htons(x)				\
+	(__builtin_constant_p(x) ?		\
+	 __constant_htons(x) : __bpf_htons(x))
+#define bpf_ntohs(x)				\
+	(__builtin_constant_p(x) ?		\
+	 __constant_ntohs(x) : __bpf_ntohs(x))
+
 static inline unsigned int bpf_num_possible_cpus(void)
 {
 	static const char *fcpu = "/sys/devices/system/cpu/possible";
diff --git a/tools/testing/selftests/bpf/test_l4lb.c b/tools/testing/selftests/bpf/test_l4lb.c
index 368bfe8b9842..b68b21274bac 100644
--- a/tools/testing/selftests/bpf/test_l4lb.c
+++ b/tools/testing/selftests/bpf/test_l4lb.c
@@ -19,9 +19,8 @@
 #include <linux/udp.h>
 #include "bpf_helpers.h"
 #include "test_iptunnel_common.h"
+#include "bpf_util.h"
 
-#define htons __builtin_bswap16
-#define ntohs __builtin_bswap16
 int _version SEC("version") = 1;
 
 static inline __u32 rol32(__u32 word, unsigned int shift)
@@ -355,7 +354,7 @@ static __always_inline int process_packet(void *data, __u64 off, void *data_end,
 		iph_len = sizeof(struct ipv6hdr);
 		protocol = ip6h->nexthdr;
 		pckt.proto = protocol;
-		pkt_bytes = ntohs(ip6h->payload_len);
+		pkt_bytes = bpf_ntohs(ip6h->payload_len);
 		off += iph_len;
 		if (protocol == IPPROTO_FRAGMENT) {
 			return TC_ACT_SHOT;
@@ -377,7 +376,7 @@ static __always_inline int process_packet(void *data, __u64 off, void *data_end,
 
 		protocol = iph->protocol;
 		pckt.proto = protocol;
-		pkt_bytes = ntohs(iph->tot_len);
+		pkt_bytes = bpf_ntohs(iph->tot_len);
 		off += IPV4_HDR_LEN_NO_OPT;
 
 		if (iph->frag_off & PCKT_FRAGMENTED)
@@ -464,9 +463,9 @@ int balancer_ingress(struct __sk_buff *ctx)
 	if (data + nh_off > data_end)
 		return TC_ACT_SHOT;
 	eth_proto = eth->eth_proto;
-	if (eth_proto == htons(ETH_P_IP))
+	if (eth_proto == bpf_htons(ETH_P_IP))
 		return process_packet(data, nh_off, data_end, false, ctx);
-	else if (eth_proto == htons(ETH_P_IPV6))
+	else if (eth_proto == bpf_htons(ETH_P_IPV6))
 		return process_packet(data, nh_off, data_end, true, ctx);
 	else
 		return TC_ACT_SHOT;
diff --git a/tools/testing/selftests/bpf/test_pkt_access.c b/tools/testing/selftests/bpf/test_pkt_access.c
index fd1e0832d409..711300508ee0 100644
--- a/tools/testing/selftests/bpf/test_pkt_access.c
+++ b/tools/testing/selftests/bpf/test_pkt_access.c
@@ -14,8 +14,8 @@
 #include <linux/tcp.h>
 #include <linux/pkt_cls.h>
 #include "bpf_helpers.h"
+#include "bpf_util.h"
 
-#define _htons __builtin_bswap16
 #define barrier() __asm__ __volatile__("": : :"memory")
 int _version SEC("version") = 1;
 
@@ -32,7 +32,7 @@ int process(struct __sk_buff *skb)
 	if (eth + 1 > data_end)
 		return TC_ACT_SHOT;
 
-	if (eth->h_proto == _htons(ETH_P_IP)) {
+	if (eth->h_proto == bpf_htons(ETH_P_IP)) {
 		struct iphdr *iph = (struct iphdr *)(eth + 1);
 
 		if (iph + 1 > data_end)
@@ -40,7 +40,7 @@ int process(struct __sk_buff *skb)
 		ihl_len = iph->ihl * 4;
 		proto = iph->protocol;
 		tcp = (struct tcphdr *)((void *)(iph) + ihl_len);
-	} else if (eth->h_proto == _htons(ETH_P_IPV6)) {
+	} else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
 		struct ipv6hdr *ip6h = (struct ipv6hdr *)(eth + 1);
 
 		if (ip6h + 1 > data_end)
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 5275d4a1df24..7c2d899c8f43 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -30,8 +30,6 @@ typedef __u16 __sum16;
 #include "test_iptunnel_common.h"
 #include "bpf_util.h"
 
-#define _htons __builtin_bswap16
-
 static int error_cnt, pass_cnt;
 
 #define MAGIC_BYTES 123
@@ -42,10 +40,10 @@ static struct {
 	struct iphdr iph;
 	struct tcphdr tcp;
 } __packed pkt_v4 = {
-	.eth.h_proto = _htons(ETH_P_IP),
+	.eth.h_proto = bpf_htons(ETH_P_IP),
 	.iph.ihl = 5,
 	.iph.protocol = 6,
-	.iph.tot_len = _htons(MAGIC_BYTES),
+	.iph.tot_len = bpf_htons(MAGIC_BYTES),
 	.tcp.urg_ptr = 123,
 };
 
@@ -55,9 +53,9 @@ static struct {
 	struct ipv6hdr iph;
 	struct tcphdr tcp;
 } __packed pkt_v6 = {
-	.eth.h_proto = _htons(ETH_P_IPV6),
+	.eth.h_proto = bpf_htons(ETH_P_IPV6),
 	.iph.nexthdr = 6,
-	.iph.payload_len = _htons(MAGIC_BYTES),
+	.iph.payload_len = bpf_htons(MAGIC_BYTES),
 	.tcp.urg_ptr = 123,
 };
 
-- 
cgit v1.2.3


From f3515b5d0b718cceae9e67802cfa20d5e6f9b567 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 27 Apr 2017 01:39:35 +0200
Subject: bpf: provide a generic macro for percpu values for selftests

To overcome bugs as described and fixed in 89087c456fb5 ("bpf: Fix
values type used in test_maps"), provide a generic BPF_DECLARE_PERCPU()
and bpf_percpu() accessor macro for all percpu map values used in
tests.

Declaring variables works as follows (also works for structs):

  BPF_DECLARE_PERCPU(uint32_t, my_value);

They can then be accessed normally as uint32_t type through:

  bpf_percpu(my_value, <cpu_nr>)

For example:

  bpf_percpu(my_value, 0)++;

Implicitly, we make sure that the passed type is allocated and aligned
by gcc at least on a 8-byte boundary, so that it works together with
the map lookup/update syscall for percpu maps. We use it as a usage
example in test_maps, so that others are free to adapt this into their
code when necessary.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/bpf_util.h  |  7 +++++++
 tools/testing/selftests/bpf/test_maps.c | 37 ++++++++++++++++++---------------
 2 files changed, 27 insertions(+), 17 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index 7de27966103d..369e7d7bba80 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -54,4 +54,11 @@ static inline unsigned int bpf_num_possible_cpus(void)
 	return possible_cpus;
 }
 
+#define __bpf_percpu_val_align	__attribute__((__aligned__(8)))
+
+#define BPF_DECLARE_PERCPU(type, name)				\
+	struct { type v; /* padding */ } __bpf_percpu_val_align	\
+		name[bpf_num_possible_cpus()]
+#define bpf_percpu(name, cpu) name[(cpu)].v
+
 #endif /* __BPF_UTIL__ */
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index a977c4f7b0ce..93314524de0d 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -137,20 +137,20 @@ static void test_hashmap_sizes(int task, void *data)
 static void test_hashmap_percpu(int task, void *data)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
-	long long value[nr_cpus];
+	BPF_DECLARE_PERCPU(long, value);
 	long long key, next_key, first_key;
 	int expected_key_mask = 0;
 	int fd, i;
 
 	fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key),
-			    sizeof(value[0]), 2, map_flags);
+			    sizeof(bpf_percpu(value, 0)), 2, map_flags);
 	if (fd < 0) {
 		printf("Failed to create hashmap '%s'!\n", strerror(errno));
 		exit(1);
 	}
 
 	for (i = 0; i < nr_cpus; i++)
-		value[i] = i + 100;
+		bpf_percpu(value, i) = i + 100;
 
 	key = 1;
 	/* Insert key=1 element. */
@@ -170,8 +170,9 @@ static void test_hashmap_percpu(int task, void *data)
 	/* Check that key=1 can be found. Value could be 0 if the lookup
 	 * was run from a different CPU.
 	 */
-	value[0] = 1;
-	assert(bpf_map_lookup_elem(fd, &key, value) == 0 && value[0] == 100);
+	bpf_percpu(value, 0) = 1;
+	assert(bpf_map_lookup_elem(fd, &key, value) == 0 &&
+	       bpf_percpu(value, 0) == 100);
 
 	key = 2;
 	/* Check that key=2 is not found. */
@@ -211,7 +212,7 @@ static void test_hashmap_percpu(int task, void *data)
 		assert(bpf_map_lookup_elem(fd, &next_key, value) == 0);
 
 		for (i = 0; i < nr_cpus; i++)
-			assert(value[i] == i + 100);
+			assert(bpf_percpu(value, i) == i + 100);
 
 		key = next_key;
 	}
@@ -296,34 +297,36 @@ static void test_arraymap(int task, void *data)
 static void test_arraymap_percpu(int task, void *data)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
+	BPF_DECLARE_PERCPU(long, values);
 	int key, next_key, fd, i;
-	long long values[nr_cpus];
 
 	fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
-			    sizeof(values[0]), 2, 0);
+			    sizeof(bpf_percpu(values, 0)), 2, 0);
 	if (fd < 0) {
 		printf("Failed to create arraymap '%s'!\n", strerror(errno));
 		exit(1);
 	}
 
 	for (i = 0; i < nr_cpus; i++)
-		values[i] = i + 100;
+		bpf_percpu(values, i) = i + 100;
 
 	key = 1;
 	/* Insert key=1 element. */
 	assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
 
-	values[0] = 0;
+	bpf_percpu(values, 0) = 0;
 	assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 &&
 	       errno == EEXIST);
 
 	/* Check that key=1 can be found. */
-	assert(bpf_map_lookup_elem(fd, &key, values) == 0 && values[0] == 100);
+	assert(bpf_map_lookup_elem(fd, &key, values) == 0 &&
+	       bpf_percpu(values, 0) == 100);
 
 	key = 0;
 	/* Check that key=0 is also found and zero initialized. */
 	assert(bpf_map_lookup_elem(fd, &key, values) == 0 &&
-	       values[0] == 0 && values[nr_cpus - 1] == 0);
+	       bpf_percpu(values, 0) == 0 &&
+	       bpf_percpu(values, nr_cpus - 1) == 0);
 
 	/* Check that key=2 cannot be inserted due to max_entries limit. */
 	key = 2;
@@ -353,15 +356,15 @@ static void test_arraymap_percpu(int task, void *data)
 static void test_arraymap_percpu_many_keys(void)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
+	BPF_DECLARE_PERCPU(long, values);
 	/* nr_keys is not too large otherwise the test stresses percpu
 	 * allocator more than anything else
 	 */
 	unsigned int nr_keys = 2000;
-	long long values[nr_cpus];
 	int key, fd, i;
 
 	fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
-			    sizeof(values[0]), nr_keys, 0);
+			    sizeof(bpf_percpu(values, 0)), nr_keys, 0);
 	if (fd < 0) {
 		printf("Failed to create per-cpu arraymap '%s'!\n",
 		       strerror(errno));
@@ -369,19 +372,19 @@ static void test_arraymap_percpu_many_keys(void)
 	}
 
 	for (i = 0; i < nr_cpus; i++)
-		values[i] = i + 10;
+		bpf_percpu(values, i) = i + 10;
 
 	for (key = 0; key < nr_keys; key++)
 		assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
 
 	for (key = 0; key < nr_keys; key++) {
 		for (i = 0; i < nr_cpus; i++)
-			values[i] = 0;
+			bpf_percpu(values, i) = 0;
 
 		assert(bpf_map_lookup_elem(fd, &key, values) == 0);
 
 		for (i = 0; i < nr_cpus; i++)
-			assert(values[i] == i + 10);
+			assert(bpf_percpu(values, i) == i + 10);
 	}
 
 	close(fd);
-- 
cgit v1.2.3


From 332270fdc8b6fba07d059a9ad44df9e1a2ad4529 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 29 Apr 2017 22:52:42 -0700
Subject: bpf: enhance verifier to understand stack pointer arithmetic

llvm 4.0 and above generates the code like below:
....
440: (b7) r1 = 15
441: (05) goto pc+73
515: (79) r6 = *(u64 *)(r10 -152)
516: (bf) r7 = r10
517: (07) r7 += -112
518: (bf) r2 = r7
519: (0f) r2 += r1
520: (71) r1 = *(u8 *)(r8 +0)
521: (73) *(u8 *)(r2 +45) = r1
....
and the verifier complains "R2 invalid mem access 'inv'" for insn #521.
This is because verifier marks register r2 as unknown value after #519
where r2 is a stack pointer and r1 holds a constant value.

Teach verifier to recognize "stack_ptr + imm" and
"stack_ptr + reg with const val" as valid stack_ptr with new offset.

Signed-off-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c                       | 11 +++++++++++
 tools/testing/selftests/bpf/test_verifier.c | 18 ++++++++++++------
 2 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6f8b6ed690be..c2ff608c1984 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1922,6 +1922,17 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 			dst_reg->type = PTR_TO_STACK;
 			dst_reg->imm = insn->imm;
 			return 0;
+		} else if (opcode == BPF_ADD &&
+			   BPF_CLASS(insn->code) == BPF_ALU64 &&
+			   dst_reg->type == PTR_TO_STACK &&
+			   ((BPF_SRC(insn->code) == BPF_X &&
+			     regs[insn->src_reg].type == CONST_IMM) ||
+			    BPF_SRC(insn->code) == BPF_K)) {
+			if (BPF_SRC(insn->code) == BPF_X)
+				dst_reg->imm += regs[insn->src_reg].imm;
+			else
+				dst_reg->imm += insn->imm;
+			return 0;
 		} else if (opcode == BPF_ADD &&
 			   BPF_CLASS(insn->code) == BPF_ALU64 &&
 			   (dst_reg->type == PTR_TO_PACKET ||
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index d3395c192a24..3773562056da 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1932,16 +1932,22 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 	},
 	{
-		"unpriv: obfuscate stack pointer",
+		"stack pointer arithmetic",
 		.insns = {
-			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_MOV64_IMM(BPF_REG_1, 4),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1),
+			BPF_ST_MEM(0, BPF_REG_2, 4, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+			BPF_ST_MEM(0, BPF_REG_2, 4, 0),
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr_unpriv = "R2 pointer arithmetic",
-		.result_unpriv = REJECT,
 		.result = ACCEPT,
 	},
 	{
-- 
cgit v1.2.3


From bc1bafbbe9b3558d7789ff151ef4f185b6ad21f3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 1 May 2017 12:43:49 -0700
Subject: bpf: Move endianness BPF helpers out of bpf_util.h

We do not want to include things like stdio.h and friends into
eBPF program builds.  bpf_util.h is for host compiled programs,
so eBPF C-code helpers don't really belong there.

Add a new bpf_endian.h as a quick fix for this for now.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/bpf_endian.h      | 23 +++++++++++++++++++++++
 tools/testing/selftests/bpf/bpf_util.h        | 19 -------------------
 tools/testing/selftests/bpf/test_l4lb.c       |  2 +-
 tools/testing/selftests/bpf/test_pkt_access.c |  2 +-
 4 files changed, 25 insertions(+), 21 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/bpf_endian.h

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h
new file mode 100644
index 000000000000..19d0604f8694
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_endian.h
@@ -0,0 +1,23 @@
+#ifndef __BPF_ENDIAN__
+#define __BPF_ENDIAN__
+
+#include <asm/byteorder.h>
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+# define __bpf_ntohs(x)		__builtin_bswap16(x)
+# define __bpf_htons(x)		__builtin_bswap16(x)
+#elif __BYTE_ORDER == __BIG_ENDIAN
+# define __bpf_ntohs(x)		(x)
+# define __bpf_htons(x)		(x)
+#else
+# error "Fix your __BYTE_ORDER?!"
+#endif
+
+#define bpf_htons(x)				\
+	(__builtin_constant_p(x) ?		\
+	 __constant_htons(x) : __bpf_htons(x))
+#define bpf_ntohs(x)				\
+	(__builtin_constant_p(x) ?		\
+	 __constant_ntohs(x) : __bpf_ntohs(x))
+
+#endif
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index 369e7d7bba80..20ecbaa0d85d 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -6,25 +6,6 @@
 #include <string.h>
 #include <errno.h>
 
-#include <asm/byteorder.h>
-
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-# define __bpf_ntohs(x)		__builtin_bswap16(x)
-# define __bpf_htons(x)		__builtin_bswap16(x)
-#elif __BYTE_ORDER == __BIG_ENDIAN
-# define __bpf_ntohs(x)		(x)
-# define __bpf_htons(x)		(x)
-#else
-# error "Fix your __BYTE_ORDER?!"
-#endif
-
-#define bpf_htons(x)				\
-	(__builtin_constant_p(x) ?		\
-	 __constant_htons(x) : __bpf_htons(x))
-#define bpf_ntohs(x)				\
-	(__builtin_constant_p(x) ?		\
-	 __constant_ntohs(x) : __bpf_ntohs(x))
-
 static inline unsigned int bpf_num_possible_cpus(void)
 {
 	static const char *fcpu = "/sys/devices/system/cpu/possible";
diff --git a/tools/testing/selftests/bpf/test_l4lb.c b/tools/testing/selftests/bpf/test_l4lb.c
index b68b21274bac..1e10c9590991 100644
--- a/tools/testing/selftests/bpf/test_l4lb.c
+++ b/tools/testing/selftests/bpf/test_l4lb.c
@@ -19,7 +19,7 @@
 #include <linux/udp.h>
 #include "bpf_helpers.h"
 #include "test_iptunnel_common.h"
-#include "bpf_util.h"
+#include "bpf_endian.h"
 
 int _version SEC("version") = 1;
 
diff --git a/tools/testing/selftests/bpf/test_pkt_access.c b/tools/testing/selftests/bpf/test_pkt_access.c
index 711300508ee0..39387bb7e08c 100644
--- a/tools/testing/selftests/bpf/test_pkt_access.c
+++ b/tools/testing/selftests/bpf/test_pkt_access.c
@@ -14,7 +14,7 @@
 #include <linux/tcp.h>
 #include <linux/pkt_cls.h>
 #include "bpf_helpers.h"
-#include "bpf_util.h"
+#include "bpf_endian.h"
 
 #define barrier() __asm__ __volatile__("": : :"memory")
 int _version SEC("version") = 1;
-- 
cgit v1.2.3


From e06422c43968916dc945018fd9220f60866470b1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 1 May 2017 12:58:21 -0700
Subject: bpf: Include bpf_endian.h in test_progs.c too.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_progs.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 7c2d899c8f43..4ed049a0b14b 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -29,6 +29,7 @@ typedef __u16 __sum16;
 #include <bpf/libbpf.h>
 #include "test_iptunnel_common.h"
 #include "bpf_util.h"
+#include "bpf_endian.h"
 
 static int error_cnt, pass_cnt;
 
-- 
cgit v1.2.3


From 4e9c3a667135799d50f2778a8a8dae2ca13aafd0 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 2 May 2017 07:52:01 -0700
Subject: selftests: bpf: Use bpf_endian.h in test_xdp.c

This fixes the testcase on big-endian.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_xdp.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_xdp.c b/tools/testing/selftests/bpf/test_xdp.c
index 9a33b038cb31..5e7df8bb5b5d 100644
--- a/tools/testing/selftests/bpf/test_xdp.c
+++ b/tools/testing/selftests/bpf/test_xdp.c
@@ -17,10 +17,9 @@
 #include <linux/pkt_cls.h>
 #include <sys/socket.h>
 #include "bpf_helpers.h"
+#include "bpf_endian.h"
 #include "test_iptunnel_common.h"
 
-#define htons __builtin_bswap16
-#define ntohs __builtin_bswap16
 int _version SEC("version") = 1;
 
 struct bpf_map_def SEC("maps") rxcnt = {
@@ -104,7 +103,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp)
 	vip.family = AF_INET;
 	vip.daddr.v4 = iph->daddr;
 	vip.dport = dport;
-	payload_len = ntohs(iph->tot_len);
+	payload_len = bpf_ntohs(iph->tot_len);
 
 	tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
 	/* It only does v4-in-v4 */
@@ -126,7 +125,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp)
 	    iph + 1 > data_end)
 		return XDP_DROP;
 
-	set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IP));
+	set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP));
 
 	iph->version = 4;
 	iph->ihl = sizeof(*iph) >> 2;
@@ -134,7 +133,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp)
 	iph->protocol = IPPROTO_IPIP;
 	iph->check = 0;
 	iph->tos = 0;
-	iph->tot_len = htons(payload_len + sizeof(*iph));
+	iph->tot_len = bpf_htons(payload_len + sizeof(*iph));
 	iph->daddr = tnl->daddr.v4;
 	iph->saddr = tnl->saddr.v4;
 	iph->ttl = 8;
@@ -195,12 +194,12 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
 	    ip6h + 1 > data_end)
 		return XDP_DROP;
 
-	set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IPV6));
+	set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6));
 
 	ip6h->version = 6;
 	ip6h->priority = 0;
 	memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
-	ip6h->payload_len = htons(ntohs(payload_len) + sizeof(*ip6h));
+	ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + sizeof(*ip6h));
 	ip6h->nexthdr = IPPROTO_IPV6;
 	ip6h->hop_limit = 8;
 	memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
@@ -224,9 +223,9 @@ int _xdp_tx_iptunnel(struct xdp_md *xdp)
 
 	h_proto = eth->h_proto;
 
-	if (h_proto == htons(ETH_P_IP))
+	if (h_proto == bpf_htons(ETH_P_IP))
 		return handle_ipv4(xdp);
-	else if (h_proto == htons(ETH_P_IPV6))
+	else if (h_proto == bpf_htons(ETH_P_IPV6))
 
 		return handle_ipv6(xdp);
 	else
-- 
cgit v1.2.3